diff options
-rw-r--r-- | bridge/core/import.go | 12 | ||||
-rw-r--r-- | bridge/github/import.go | 544 | ||||
-rw-r--r-- | bridge/github/import_mediator.go | 436 | ||||
-rw-r--r-- | bridge/github/import_query.go | 252 | ||||
-rw-r--r-- | bridge/github/iterator.go | 423 | ||||
-rw-r--r-- | bridge/gitlab/import.go | 25 | ||||
-rw-r--r-- | bridge/jira/import.go | 52 |
7 files changed, 899 insertions, 845 deletions
diff --git a/bridge/core/import.go b/bridge/core/import.go index 0b0b4c68..c1965d4e 100644 --- a/bridge/core/import.go +++ b/bridge/core/import.go @@ -34,6 +34,9 @@ const ( // but not severe enough to consider the import a failure. ImportEventWarning + // The import system (web API) has reached the rate limit + ImportEventRateLimiting + // Error happened during import ImportEventError ) @@ -87,6 +90,8 @@ func (er ImportResult) String() string { parts = append(parts, fmt.Sprintf("err: %s", er.Err)) } return strings.Join(parts, " ") + case ImportEventRateLimiting: + return fmt.Sprintf("rate limiting: %s", er.Reason) default: panic("unknown import result") @@ -165,3 +170,10 @@ func NewImportIdentity(id entity.Id) ImportResult { Event: ImportEventIdentity, } } + +func NewImportRateLimiting(msg string) ImportResult { + return ImportResult{ + Reason: msg, + Event: ImportEventRateLimiting, + } +} diff --git a/bridge/github/import.go b/bridge/github/import.go index af62746f..47be6374 100644 --- a/bridge/github/import.go +++ b/bridge/github/import.go @@ -15,84 +15,128 @@ import ( "github.com/MichaelMure/git-bug/util/text" ) +const EMPTY_TITLE_PLACEHOLDER = "<empty string>" + // githubImporter implement the Importer interface type githubImporter struct { conf core.Configuration - // default client - client *githubv4.Client - - // iterator - iterator *iterator + // mediator to access the Github API + mediator *importMediator // send only channel out chan<- core.ImportResult } -func (gi *githubImporter) Init(_ context.Context, repo *cache.RepoCache, conf core.Configuration) error { +func (gi *githubImporter) Init(_ context.Context, _ *cache.RepoCache, conf core.Configuration) error { gi.conf = conf + return nil +} +// ImportAll iterate over all the configured repository issues and ensure the creation of the +// missing issues / timeline items / edits / label events ... +func (gi *githubImporter) ImportAll(ctx context.Context, repo *cache.RepoCache, since time.Time) (<-chan core.ImportResult, error) { creds, err := auth.List(repo, auth.WithTarget(target), auth.WithKind(auth.KindToken), - auth.WithMeta(auth.MetaKeyLogin, conf[confKeyDefaultLogin]), + auth.WithMeta(auth.MetaKeyLogin, gi.conf[confKeyDefaultLogin]), ) if err != nil { - return err + return nil, err } - - if len(creds) == 0 { - return ErrMissingIdentityToken + if len(creds) <= 0 { + return nil, ErrMissingIdentityToken } - - gi.client = buildClient(creds[0].(*auth.Token)) - - return nil -} - -// ImportAll iterate over all the configured repository issues and ensure the creation of the -// missing issues / timeline items / edits / label events ... -func (gi *githubImporter) ImportAll(ctx context.Context, repo *cache.RepoCache, since time.Time) (<-chan core.ImportResult, error) { - gi.iterator = NewIterator(ctx, gi.client, 10, gi.conf[confKeyOwner], gi.conf[confKeyProject], since) + client := buildClient(creds[0].(*auth.Token)) + gi.mediator = NewImportMediator(ctx, client, gi.conf[confKeyOwner], gi.conf[confKeyProject], since) out := make(chan core.ImportResult) gi.out = out go func() { defer close(gi.out) - - // Loop over all matching issues - for gi.iterator.NextIssue() { - issue := gi.iterator.IssueValue() - // create issue - b, err := gi.ensureIssue(repo, issue) - if err != nil { - err := fmt.Errorf("issue creation: %v", err) - out <- core.NewImportError(err, "") - return + var currBug *cache.BugCache + var currEvent ImportEvent + var nextEvent ImportEvent + var err error + for { + // An IssueEvent contains the issue in its most recent state. If an issue + // has at least one issue edit, then the history of the issue edits is + // represented by IssueEditEvents. That is, the unedited (original) issue + // might be saved only in the IssueEditEvent following the IssueEvent. + // Since we replicate the edit history we need to either use the IssueEvent + // (if there are no edits) or the IssueEvent together with its first + // IssueEditEvent (if there are edits). + // Exactly the same is true for comments and comment edits. + // As a consequence we need to look at the current event and one look ahead + // event. + currEvent = nextEvent + if currEvent == nil { + currEvent = gi.getEventHandleMsgs() } - - // loop over timeline items - for gi.iterator.NextTimelineItem() { - item := gi.iterator.TimelineItemValue() - err := gi.ensureTimelineItem(repo, b, item) + if currEvent == nil { + break + } + nextEvent = gi.getEventHandleMsgs() + + switch event := currEvent.(type) { + case RateLimitingEvent: + out <- core.NewImportRateLimiting(event.msg) + case IssueEvent: + // first: commit what is being held in currBug + if err = gi.commit(currBug, out); err != nil { + out <- core.NewImportError(err, "") + return + } + // second: create new issue + switch next := nextEvent.(type) { + case IssueEditEvent: + // consuming and using next event + nextEvent = nil + currBug, err = gi.ensureIssue(ctx, repo, &event.issue, &next.userContentEdit) + default: + currBug, err = gi.ensureIssue(ctx, repo, &event.issue, nil) + } + if err != nil { + err := fmt.Errorf("issue creation: %v", err) + out <- core.NewImportError(err, "") + return + } + case IssueEditEvent: + err = gi.ensureIssueEdit(ctx, repo, currBug, event.issueId, &event.userContentEdit) + if err != nil { + err = fmt.Errorf("issue edit: %v", err) + out <- core.NewImportError(err, "") + return + } + case TimelineEvent: + if next, ok := nextEvent.(CommentEditEvent); ok && event.Typename == "IssueComment" { + // consuming and using next event + nextEvent = nil + err = gi.ensureComment(ctx, repo, currBug, &event.timelineItem.IssueComment, &next.userContentEdit) + } else { + err = gi.ensureTimelineItem(ctx, repo, currBug, &event.timelineItem) + } if err != nil { err = fmt.Errorf("timeline item creation: %v", err) out <- core.NewImportError(err, "") return } - } - - if !b.NeedCommit() { - out <- core.NewImportNothing(b.Id(), "no imported operation") - } else if err := b.Commit(); err != nil { - // commit bug state - err = fmt.Errorf("bug commit: %v", err) - out <- core.NewImportError(err, "") - return + case CommentEditEvent: + err = gi.ensureCommentEdit(ctx, repo, currBug, event.commentId, &event.userContentEdit) + if err != nil { + err = fmt.Errorf("comment edit: %v", err) + out <- core.NewImportError(err, "") + return + } + default: + panic("Unknown event type") } } - - if err := gi.iterator.Error(); err != nil { + // commit what is being held in currBug before returning + if err = gi.commit(currBug, out); err != nil { + out <- core.NewImportError(err, "") + } + if err = gi.mediator.Error(); err != nil { gi.out <- core.NewImportError(err, "") } }() @@ -100,9 +144,35 @@ func (gi *githubImporter) ImportAll(ctx context.Context, repo *cache.RepoCache, return out, nil } -func (gi *githubImporter) ensureIssue(repo *cache.RepoCache, issue issue) (*cache.BugCache, error) { - // ensure issue author - author, err := gi.ensurePerson(repo, issue.Author) +func (gi *githubImporter) getEventHandleMsgs() ImportEvent { + for { + // read event from import mediator + event := gi.mediator.NextImportEvent() + // consume (and use) all rate limiting events + if e, ok := event.(RateLimitingEvent); ok { + gi.out <- core.NewImportRateLimiting(e.msg) + continue + } + return event + } +} + +func (gi *githubImporter) commit(b *cache.BugCache, out chan<- core.ImportResult) error { + if b == nil { + return nil + } + if !b.NeedCommit() { + out <- core.NewImportNothing(b.Id(), "no imported operation") + return nil + } else if err := b.Commit(); err != nil { + // commit bug state + return fmt.Errorf("bug commit: %v", err) + } + return nil +} + +func (gi *githubImporter) ensureIssue(ctx context.Context, repo *cache.RepoCache, issue *issue, issueEdit *userContentEdit) (*cache.BugCache, error) { + author, err := gi.ensurePerson(ctx, repo, issue.Author) if err != nil { return nil, err } @@ -112,113 +182,66 @@ func (gi *githubImporter) ensureIssue(repo *cache.RepoCache, issue issue) (*cach return excerpt.CreateMetadata[core.MetaKeyOrigin] == target && excerpt.CreateMetadata[metaKeyGithubId] == parseId(issue.Id) }) - if err != nil && err != bug.ErrBugNotExist { + if err == nil { + return b, nil + } + if err != bug.ErrBugNotExist { return nil, err } - // get issue edits - var issueEdits []userContentEdit - for gi.iterator.NextIssueEdit() { - issueEdits = append(issueEdits, gi.iterator.IssueEditValue()) + // At Github there exist issues with seemingly empty titles. An example is + // https://github.com/NixOS/nixpkgs/issues/72730 . + // The title provided by the GraphQL API actually consists of a space followed by a + // zero width space (U+200B). This title would cause the NewBugRaw() function to + // return an error: empty title. + title := string(issue.Title) + if title == " \u200b" { // U+200B == zero width space + title = EMPTY_TITLE_PLACEHOLDER } - // if issueEdits is empty - if len(issueEdits) == 0 { - if err == bug.ErrBugNotExist { - cleanText, err := text.Cleanup(string(issue.Body)) - if err != nil { - return nil, err - } - - // create bug - b, _, err = repo.NewBugRaw( - author, - issue.CreatedAt.Unix(), - issue.Title, - cleanText, - nil, - map[string]string{ - core.MetaKeyOrigin: target, - metaKeyGithubId: parseId(issue.Id), - metaKeyGithubUrl: issue.Url.String(), - }) - if err != nil { - return nil, err - } - - // importing a new bug - gi.out <- core.NewImportBug(b.Id()) - } + var textInput string + if issueEdit != nil { + // use the first issue edit: it represents the bug creation itself + textInput = string(*issueEdit.Diff) } else { - // create bug from given issueEdits - for i, edit := range issueEdits { - if i == 0 && b != nil { - // The first edit in the github result is the issue creation itself, we already have that - continue - } - - cleanText, err := text.Cleanup(string(*edit.Diff)) - if err != nil { - return nil, err - } - - // if the bug doesn't exist - if b == nil { - // we create the bug as soon as we have a legit first edition - b, _, err = repo.NewBugRaw( - author, - issue.CreatedAt.Unix(), - issue.Title, // TODO: this is the *current* title, not the original one - cleanText, - nil, - map[string]string{ - core.MetaKeyOrigin: target, - metaKeyGithubId: parseId(issue.Id), - metaKeyGithubUrl: issue.Url.String(), - }, - ) - - if err != nil { - return nil, err - } - // importing a new bug - gi.out <- core.NewImportBug(b.Id()) - continue - } - - // other edits will be added as CommentEdit operations - target, err := b.ResolveOperationWithMetadata(metaKeyGithubId, parseId(issue.Id)) - if err == cache.ErrNoMatchingOp { - // original comment is missing somehow, issuing a warning - gi.out <- core.NewImportWarning(fmt.Errorf("comment ID %s to edit is missing", parseId(issue.Id)), b.Id()) - continue - } - if err != nil { - return nil, err - } + // if there are no issue edits then the issue struct holds the bug creation + textInput = string(issue.Body) + } + cleanText, err := text.Cleanup(textInput) + if err != nil { + return nil, err + } - err = gi.ensureCommentEdit(repo, b, target, edit) - if err != nil { - return nil, err - } - } + // create bug + b, _, err = repo.NewBugRaw( + author, + issue.CreatedAt.Unix(), + title, // TODO: this is the *current* title, not the original one + cleanText, + nil, + map[string]string{ + core.MetaKeyOrigin: target, + metaKeyGithubId: parseId(issue.Id), + metaKeyGithubUrl: issue.Url.String(), + }) + if err != nil { + return nil, err } + // importing a new bug + gi.out <- core.NewImportBug(b.Id()) return b, nil } -func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.BugCache, item timelineItem) error { +func (gi *githubImporter) ensureIssueEdit(ctx context.Context, repo *cache.RepoCache, bug *cache.BugCache, ghIssueId githubv4.ID, edit *userContentEdit) error { + return gi.ensureCommentEdit(ctx, repo, bug, ghIssueId, edit) +} + +func (gi *githubImporter) ensureTimelineItem(ctx context.Context, repo *cache.RepoCache, b *cache.BugCache, item *timelineItem) error { switch item.Typename { case "IssueComment": - // collect all comment edits - var commentEdits []userContentEdit - for gi.iterator.NextCommentEdit() { - commentEdits = append(commentEdits, gi.iterator.CommentEditValue()) - } - - // ensureTimelineComment send import events over out chanel - err := gi.ensureTimelineComment(repo, b, item.IssueComment, commentEdits) + err := gi.ensureComment(ctx, repo, b, &item.IssueComment, nil) if err != nil { return fmt.Errorf("timeline comment creation: %v", err) } @@ -234,7 +257,7 @@ func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.Bug if err != cache.ErrNoMatchingOp { return err } - author, err := gi.ensurePerson(repo, item.LabeledEvent.Actor) + author, err := gi.ensurePerson(ctx, repo, item.LabeledEvent.Actor) if err != nil { return err } @@ -263,7 +286,7 @@ func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.Bug if err != cache.ErrNoMatchingOp { return err } - author, err := gi.ensurePerson(repo, item.UnlabeledEvent.Actor) + author, err := gi.ensurePerson(ctx, repo, item.UnlabeledEvent.Actor) if err != nil { return err } @@ -293,7 +316,7 @@ func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.Bug if err == nil { return nil } - author, err := gi.ensurePerson(repo, item.ClosedEvent.Actor) + author, err := gi.ensurePerson(ctx, repo, item.ClosedEvent.Actor) if err != nil { return err } @@ -319,7 +342,7 @@ func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.Bug if err == nil { return nil } - author, err := gi.ensurePerson(repo, item.ReopenedEvent.Actor) + author, err := gi.ensurePerson(ctx, repo, item.ReopenedEvent.Actor) if err != nil { return err } @@ -345,14 +368,25 @@ func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.Bug if err == nil { return nil } - author, err := gi.ensurePerson(repo, item.RenamedTitleEvent.Actor) + author, err := gi.ensurePerson(ctx, repo, item.RenamedTitleEvent.Actor) if err != nil { return err } + + // At Github there exist issues with seemingly empty titles. An example is + // https://github.com/NixOS/nixpkgs/issues/72730 . + // The title provided by the GraphQL API actually consists of a space followed + // by a zero width space (U+200B). This title would cause the NewBugRaw() + // function to return an error: empty title. + title := string(item.RenamedTitleEvent.CurrentTitle) + if title == " \u200b" { // U+200B == zero width space + title = EMPTY_TITLE_PLACEHOLDER + } + op, err := b.SetTitleRaw( author, item.RenamedTitleEvent.CreatedAt.Unix(), - string(item.RenamedTitleEvent.CurrentTitle), + title, map[string]string{metaKeyGithubId: id}, ) if err != nil { @@ -366,97 +400,62 @@ func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.Bug return nil } -func (gi *githubImporter) ensureTimelineComment(repo *cache.RepoCache, b *cache.BugCache, item issueComment, edits []userContentEdit) error { - // ensure person - author, err := gi.ensurePerson(repo, item.Author) +func (gi *githubImporter) ensureCommentEdit(ctx context.Context, repo *cache.RepoCache, b *cache.BugCache, ghTargetId githubv4.ID, edit *userContentEdit) error { + // find comment + target, err := b.ResolveOperationWithMetadata(metaKeyGithubId, parseId(ghTargetId)) if err != nil { return err } - - targetOpID, err := b.ResolveOperationWithMetadata(metaKeyGithubId, parseId(item.Id)) - if err != nil && err != cache.ErrNoMatchingOp { + _, err = b.ResolveOperationWithMetadata(metaKeyGithubId, parseId(edit.Id)) + if err == nil { + return nil + } + if err != cache.ErrNoMatchingOp { // real error return err } - // if no edits are given we create the comment - if len(edits) == 0 { - if err == cache.ErrNoMatchingOp { - cleanText, err := text.Cleanup(string(item.Body)) - if err != nil { - return err - } - - // add comment operation - op, err := b.AddCommentRaw( - author, - item.CreatedAt.Unix(), - cleanText, - nil, - map[string]string{ - metaKeyGithubId: parseId(item.Id), - metaKeyGithubUrl: parseId(item.Url.String()), - }, - ) - if err != nil { - return err - } - - gi.out <- core.NewImportComment(op.Id()) - return nil - } - - } else { - for i, edit := range edits { - if i == 0 && targetOpID != "" { - // The first edit in the github result is the comment creation itself, we already have that - continue - } - - // ensure editor identity - editor, err := gi.ensurePerson(repo, edit.Editor) - if err != nil { - return err - } + editor, err := gi.ensurePerson(ctx, repo, edit.Editor) + if err != nil { + return err + } - // create comment when target is empty - if targetOpID == "" { - cleanText, err := text.Cleanup(string(*edit.Diff)) - if err != nil { - return err - } + if edit.DeletedAt != nil { + // comment deletion, not supported yet + return nil + } - op, err := b.AddCommentRaw( - editor, - edit.CreatedAt.Unix(), - cleanText, - nil, - map[string]string{ - metaKeyGithubId: parseId(item.Id), - metaKeyGithubUrl: item.Url.String(), - }, - ) - if err != nil { - return err - } - gi.out <- core.NewImportComment(op.Id()) + cleanText, err := text.Cleanup(string(*edit.Diff)) + if err != nil { + return err + } - // set target for the next edit now that the comment is created - targetOpID = op.Id() - continue - } + // comment edition + op, err := b.EditCommentRaw( + editor, + edit.CreatedAt.Unix(), + target, + cleanText, + map[string]string{ + metaKeyGithubId: parseId(edit.Id), + }, + ) - err = gi.ensureCommentEdit(repo, b, targetOpID, edit) - if err != nil { - return err - } - } + if err != nil { + return err } + + gi.out <- core.NewImportCommentEdition(op.Id()) return nil } -func (gi *githubImporter) ensureCommentEdit(repo *cache.RepoCache, b *cache.BugCache, target entity.Id, edit userContentEdit) error { - _, err := b.ResolveOperationWithMetadata(metaKeyGithubId, parseId(edit.Id)) +func (gi *githubImporter) ensureComment(ctx context.Context, repo *cache.RepoCache, b *cache.BugCache, comment *issueComment, firstEdit *userContentEdit) error { + author, err := gi.ensurePerson(ctx, repo, comment.Author) + if err != nil { + return err + } + + _, err = b.ResolveOperationWithMetadata(metaKeyGithubId, parseId(comment.Id)) if err == nil { return nil } @@ -465,50 +464,44 @@ func (gi *githubImporter) ensureCommentEdit(repo *cache.RepoCache, b *cache.BugC return err } - editor, err := gi.ensurePerson(repo, edit.Editor) + var textInput string + if firstEdit != nil { + // use the first comment edit: it represents the comment creation itself + textInput = string(*firstEdit.Diff) + } else { + // if there are not comment edits, then the comment struct holds the comment creation + textInput = string(comment.Body) + } + cleanText, err := text.Cleanup(textInput) if err != nil { return err } - switch { - case edit.DeletedAt != nil: - // comment deletion, not supported yet - return nil - - case edit.DeletedAt == nil: - - cleanText, err := text.Cleanup(string(*edit.Diff)) - if err != nil { - return err - } - - // comment edition - op, err := b.EditCommentRaw( - editor, - edit.CreatedAt.Unix(), - target, - cleanText, - map[string]string{ - metaKeyGithubId: parseId(edit.Id), - }, - ) - - if err != nil { - return err - } - - gi.out <- core.NewImportCommentEdition(op.Id()) - return nil + // add comment operation + op, err := b.AddCommentRaw( + author, + comment.CreatedAt.Unix(), + cleanText, + nil, + map[string]string{ + metaKeyGithubId: parseId(comment.Id), + metaKeyGithubUrl: comment.Url.String(), + }, + ) + if err != nil { + return err } + + gi.out <- core.NewImportComment(op.Id()) return nil } // ensurePerson create a bug.Person from the Github data -func (gi *githubImporter) ensurePerson(repo *cache.RepoCache, actor *actor) (*cache.IdentityCache, error) { +func (gi *githubImporter) ensurePerson(ctx context.Context, repo *cache.RepoCache, actor *actor) (*cache.IdentityCache, error) { // When a user has been deleted, Github return a null actor, while displaying a profile named "ghost" // in it's UI. So we need a special case to get it. if actor == nil { - return gi.getGhost(repo) + return gi.getGhost(ctx, repo) } // Look first in the cache @@ -521,7 +514,6 @@ func (gi *githubImporter) ensurePerson(repo *cache.RepoCache, actor *actor) (*ca } // importing a new identity - var name string var email string @@ -565,48 +557,34 @@ func (gi *githubImporter) ensurePerson(repo *cache.RepoCache, actor *actor) (*ca return i, nil } -func (gi *githubImporter) getGhost(repo *cache.RepoCache) (*cache.IdentityCache, error) { +func (gi *githubImporter) getGhost(ctx context.Context, repo *cache.RepoCache) (*cache.IdentityCache, error) { + loginName := "ghost" // Look first in the cache - i, err := repo.ResolveIdentityImmutableMetadata(metaKeyGithubLogin, "ghost") + i, err := repo.ResolveIdentityImmutableMetadata(metaKeyGithubLogin, loginName) if err == nil { return i, nil } if entity.IsErrMultipleMatch(err) { return nil, err } - - var q ghostQuery - - variables := map[string]interface{}{ - "login": githubv4.String("ghost"), + user, err := gi.mediator.User(ctx, loginName) + userName := "" + if user.Name != nil { + userName = string(*user.Name) } - - ctx, cancel := context.WithTimeout(gi.iterator.ctx, defaultTimeout) - defer cancel() - - err = gi.client.Query(ctx, &q, variables) - if err != nil { - return nil, err - } - - var name string - if q.User.Name != nil { - name = string(*q.User.Name) - } - return repo.NewIdentityRaw( - name, + userName, "", - string(q.User.Login), - string(q.User.AvatarUrl), + string(user.Login), + string(user.AvatarUrl), nil, map[string]string{ - metaKeyGithubLogin: string(q.User.Login), + metaKeyGithubLogin: string(user.Login), }, ) } -// parseId convert the unusable githubv4.ID (an interface{}) into a string +// parseId converts the unusable githubv4.ID (an interface{}) into a string func parseId(id githubv4.ID) string { return fmt.Sprintf("%v", id) } diff --git a/bridge/github/import_mediator.go b/bridge/github/import_mediator.go new file mode 100644 index 00000000..825a0f98 --- /dev/null +++ b/bridge/github/import_mediator.go @@ -0,0 +1,436 @@ +package github + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/shurcooL/githubv4" +) + +const ( + // These values influence how fast the github graphql rate limit is exhausted. + NUM_ISSUES = 40 + NUM_ISSUE_EDITS = 100 + NUM_TIMELINE_ITEMS = 100 + NUM_COMMENT_EDITS = 100 + + CHAN_CAPACITY = 128 +) + +// importMediator provides a convenient interface to retrieve issues from the Github GraphQL API. +type importMediator struct { + // Github graphql client + gc *githubv4.Client + + // name of the repository owner on Github + owner string + + // name of the Github repository + project string + + // since specifies which issues to import. Issues that have been updated at or after the + // given date should be imported. + since time.Time + + // importEvents holds events representing issues, comments, edits, ... + // In this channel issues are immediately followed by their issue edits and comments are + // immediately followed by their comment edits. + importEvents chan ImportEvent + + // Sticky error + err error +} + +type ImportEvent interface { + isImportEvent() +} + +type RateLimitingEvent struct { + msg string +} + +func (RateLimitingEvent) isImportEvent() {} + +type IssueEvent struct { + issue +} + +func (IssueEvent) isImportEvent() {} + +type IssueEditEvent struct { + issueId githubv4.ID + userContentEdit +} + +func (IssueEditEvent) isImportEvent() {} + +type TimelineEvent struct { + issueId githubv4.ID + timelineItem +} + +func (TimelineEvent) isImportEvent() {} + +type CommentEditEvent struct { + commentId githubv4.ID + userContentEdit +} + +func (CommentEditEvent) isImportEvent() {} + +func (mm *importMediator) NextImportEvent() ImportEvent { + return <-mm.importEvents +} + +func NewImportMediator(ctx context.Context, client *githubv4.Client, owner, project string, since time.Time) *importMediator { + mm := importMediator{ + gc: client, + owner: owner, + project: project, + since: since, + importEvents: make(chan ImportEvent, CHAN_CAPACITY), + err: nil, + } + go func() { + mm.fillImportEvents(ctx) + close(mm.importEvents) + }() + return &mm +} + +type varmap map[string]interface{} + +func newIssueVars(owner, project string, since time.Time) varmap { + return varmap{ + "owner": githubv4.String(owner), + "name": githubv4.String(project), + "issueSince": githubv4.DateTime{Time: since}, + "issueFirst": githubv4.Int(NUM_ISSUES), + "issueEditLast": githubv4.Int(NUM_ISSUE_EDITS), + "issueEditBefore": (*githubv4.String)(nil), + "timelineFirst": githubv4.Int(NUM_TIMELINE_ITEMS), + "timelineAfter": (*githubv4.String)(nil), + "commentEditLast": githubv4.Int(NUM_COMMENT_EDITS), + "commentEditBefore": (*githubv4.String)(nil), + } +} + +func newIssueEditVars() varmap { + return varmap{ + "issueEditLast": githubv4.Int(NUM_ISSUE_EDITS), + } +} + +func newTimelineVars() varmap { + return varmap{ + "timelineFirst": githubv4.Int(NUM_TIMELINE_ITEMS), + "commentEditLast": githubv4.Int(NUM_COMMENT_EDITS), + "commentEditBefore": (*githubv4.String)(nil), + } +} + +func newCommentEditVars() varmap { + return varmap{ + "commentEditLast": githubv4.Int(NUM_COMMENT_EDITS), + } +} + +func (mm *importMediator) Error() error { + return mm.err +} + +func (mm *importMediator) User(ctx context.Context, loginName string) (*user, error) { + query := userQuery{} + vars := varmap{"login": githubv4.String(loginName)} + if err := mm.mQuery(ctx, &query, vars); err != nil { + return nil, err + } + return &query.User, nil +} + +func (mm *importMediator) fillImportEvents(ctx context.Context) { + initialCursor := githubv4.String("") + issues, hasIssues := mm.queryIssue(ctx, initialCursor) + for hasIssues { + for _, node := range issues.Nodes { + select { + case <-ctx.Done(): + return + case mm.importEvents <- IssueEvent{node.issue}: + } + + // issue edit events follow the issue event + mm.fillIssueEditEvents(ctx, &node) + // last come the timeline events + mm.fillTimelineEvents(ctx, &node) + } + if !issues.PageInfo.HasNextPage { + break + } + issues, hasIssues = mm.queryIssue(ctx, issues.PageInfo.EndCursor) + } +} + +func (mm *importMediator) fillIssueEditEvents(ctx context.Context, issueNode *issueNode) { + edits := &issueNode.UserContentEdits + hasEdits := true + for hasEdits { + for edit := range reverse(edits.Nodes) { + if edit.Diff == nil || string(*edit.Diff) == "" { + // issueEdit.Diff == nil happen if the event is older than early + // 2018, Github doesn't have the data before that. Best we can do is + // to ignore the event. + continue + } + select { + case <-ctx.Done(): + return + case mm.importEvents <- IssueEditEvent{issueId: issueNode.issue.Id, userContentEdit: edit}: + } + } + if !edits.PageInfo.HasPreviousPage { + break + } + edits, hasEdits = mm.queryIssueEdits(ctx, issueNode.issue.Id, edits.PageInfo.EndCursor) + } +} + +func (mm *importMediator) queryIssueEdits(ctx context.Context, nid githubv4.ID, cursor githubv4.String) (*userContentEditConnection, bool) { + vars := newIssueEditVars() + vars["gqlNodeId"] = nid + if cursor == "" { + vars["issueEditBefore"] = (*githubv4.String)(nil) + } else { + vars["issueEditBefore"] = cursor + } + query := issueEditQuery{} + if err := mm.mQuery(ctx, &query, vars); err != nil { + mm.err = err + return nil, false + } + connection := &query.Node.Issue.UserContentEdits + if len(connection.Nodes) <= 0 { + return nil, false + } + return connection, true +} + +func (mm *importMediator) fillTimelineEvents(ctx context.Context, issueNode *issueNode) { + items := &issueNode.TimelineItems + hasItems := true + for hasItems { + for _, item := range items.Nodes { + select { + case <-ctx.Done(): + return + case mm.importEvents <- TimelineEvent{issueId: issueNode.issue.Id, timelineItem: item}: + } + if item.Typename == "IssueComment" { + // Issue comments are different than other timeline items in that + // they may have associated user content edits. + // Right after the comment we send the comment edits. + mm.fillCommentEdits(ctx, &item) + } + } + if !items.PageInfo.HasNextPage { + break + } + items, hasItems = mm.queryTimeline(ctx, issueNode.issue.Id, items.PageInfo.EndCursor) + } +} + +func (mm *importMediator) queryTimeline(ctx context.Context, nid githubv4.ID, cursor githubv4.String) (*timelineItemsConnection, bool) { + vars := newTimelineVars() + vars["gqlNodeId"] = nid + if cursor == "" { + vars["timelineAfter"] = (*githubv4.String)(nil) + } else { + vars["timelineAfter"] = cursor + } + query := timelineQuery{} + if err := mm.mQuery(ctx, &query, vars); err != nil { + mm.err = err + return nil, false + } + connection := &query.Node.Issue.TimelineItems + if len(connection.Nodes) <= 0 { + return nil, false + } + return connection, true +} + +func (mm *importMediator) fillCommentEdits(ctx context.Context, item *timelineItem) { + // Here we are only concerned with timeline items of type issueComment. + if item.Typename != "IssueComment" { + return + } + // First: setup message handling while submitting GraphQL queries. + comment := &item.IssueComment + edits := &comment.UserContentEdits + hasEdits := true + for hasEdits { + for edit := range reverse(edits.Nodes) { + if edit.Diff == nil || string(*edit.Diff) == "" { + // issueEdit.Diff == nil happen if the event is older than early + // 2018, Github doesn't have the data before that. Best we can do is + // to ignore the event. + continue + } + select { + case <-ctx.Done(): + return + case mm.importEvents <- CommentEditEvent{commentId: comment.Id, userContentEdit: edit}: + } + } + if !edits.PageInfo.HasPreviousPage { + break + } + edits, hasEdits = mm.queryCommentEdits(ctx, comment.Id, edits.PageInfo.EndCursor) + } +} + +func (mm *importMediator) queryCommentEdits(ctx context.Context, nid githubv4.ID, cursor githubv4.String) (*userContentEditConnection, bool) { + vars := newCommentEditVars() + vars["gqlNodeId"] = nid + if cursor == "" { + vars["commentEditBefore"] = (*githubv4.String)(nil) + } else { + vars["commentEditBefore"] = cursor + } + query := commentEditQuery{} + if err := mm.mQuery(ctx, &query, vars); err != nil { + mm.err = err + return nil, false + } + connection := &query.Node.IssueComment.UserContentEdits + if len(connection.Nodes) <= 0 { + return nil, false + } + return connection, true +} + +func (mm *importMediator) queryIssue(ctx context.Context, cursor githubv4.String) (*issueConnection, bool) { + vars := newIssueVars(mm.owner, mm.project, mm.since) + if cursor == "" { + vars["issueAfter"] = (*githubv4.String)(nil) + } else { + vars["issueAfter"] = githubv4.String(cursor) + } + query := issueQuery{} + if err := mm.mQuery(ctx, &query, vars); err != nil { + mm.err = err + return nil, false + } + connection := &query.Repository.Issues + if len(connection.Nodes) <= 0 { + return nil, false + } + return connection, true +} + +func reverse(eds []userContentEdit) chan userContentEdit { + ret := make(chan userContentEdit) + go func() { + for i := range eds { + ret <- eds[len(eds)-1-i] + } + close(ret) + }() + return ret +} + +// mQuery executes a single GraphQL query. The variable query is used to derive the GraphQL query +// and it is used to populate the response into it. It should be a pointer to a struct that +// corresponds to the Github graphql schema and it has to implement the rateLimiter interface. If +// there is a Github rate limiting error, then the function sleeps and retries after the rate limit +// is expired. If there is another error, then the method will retry before giving up. +func (mm *importMediator) mQuery(ctx context.Context, query rateLimiter, vars map[string]interface{}) error { + if err := mm.queryOnce(ctx, query, vars); err == nil { + // success: done + return nil + } + // failure: we will retry + // To retry is important for importing projects with a big number of issues, because + // there may be temporary network errors or momentary internal errors of the github servers. + retries := 3 + var err error + for i := 0; i < retries; i++ { + // wait a few seconds before retry + sleepTime := time.Duration(8*(i+1)) * time.Second + timer := time.NewTimer(sleepTime) + select { + case <-ctx.Done(): + stop(timer) + return ctx.Err() + case <-timer.C: + } + err = mm.queryOnce(ctx, query, vars) + if err == nil { + // success: done + return nil + } + } + return err +} + +func (mm *importMediator) queryOnce(ctx context.Context, query rateLimiter, vars map[string]interface{}) error { + // first: just send the query to the graphql api + vars["dryRun"] = githubv4.Boolean(false) + qctx, cancel := context.WithTimeout(ctx, defaultTimeout) + defer cancel() + err := mm.gc.Query(qctx, query, vars) + if err == nil { + // no error: done + return nil + } + // matching the error string + if !strings.Contains(err.Error(), "API rate limit exceeded") { + // an error, but not the API rate limit error: done + return err + } + // a rate limit error + // ask the graphql api for rate limiting information + vars["dryRun"] = githubv4.Boolean(true) + qctx, cancel = context.WithTimeout(ctx, defaultTimeout) + defer cancel() + if err := mm.gc.Query(qctx, query, vars); err != nil { + return err + } + rateLimit := query.rateLimit() + if rateLimit.Cost > rateLimit.Remaining { + // sleep + resetTime := rateLimit.ResetAt.Time + // Add a few seconds (8) for good measure + resetTime = resetTime.Add(8 * time.Second) + msg := fmt.Sprintf("Github GraphQL API: import will sleep until %s", resetTime.String()) + select { + case <-ctx.Done(): + return ctx.Err() + case mm.importEvents <- RateLimitingEvent{msg}: + } + timer := time.NewTimer(time.Until(resetTime)) + select { + case <-ctx.Done(): + stop(timer) + return ctx.Err() + case <-timer.C: + } + } + // run the original query again + vars["dryRun"] = githubv4.Boolean(false) + qctx, cancel = context.WithTimeout(ctx, defaultTimeout) + defer cancel() + err = mm.gc.Query(qctx, query, vars) + return err // might be nil +} + +func stop(t *time.Timer) { + if !t.Stop() { + select { + case <-t.C: + default: + } + } +} diff --git a/bridge/github/import_query.go b/bridge/github/import_query.go index 228d204a..461daf94 100644 --- a/bridge/github/import_query.go +++ b/bridge/github/import_query.go @@ -2,37 +2,136 @@ package github import "github.com/shurcooL/githubv4" -type pageInfo struct { - EndCursor githubv4.String - HasNextPage bool - StartCursor githubv4.String - HasPreviousPage bool +type rateLimit struct { + Cost githubv4.Int + Limit githubv4.Int + NodeCount githubv4.Int + Remaining githubv4.Int + ResetAt githubv4.DateTime + Used githubv4.Int } -type actor struct { - Typename githubv4.String `graphql:"__typename"` +type rateLimiter interface { + rateLimit() rateLimit +} + +type userQuery struct { + RateLimit rateLimit `graphql:"rateLimit(dryRun: $dryRun)"` + User user `graphql:"user(login: $login)"` +} + +func (q *userQuery) rateLimit() rateLimit { + return q.RateLimit +} + +type labelsQuery struct { + //RateLimit rateLimit `graphql:"rateLimit(dryRun: $dryRun)"` + Repository struct { + Labels struct { + Nodes []struct { + ID string `graphql:"id"` + Name string `graphql:"name"` + Color string `graphql:"color"` + Description string `graphql:"description"` + } + PageInfo pageInfo + } `graphql:"labels(first: $first, after: $after)"` + } `graphql:"repository(owner: $owner, name: $name)"` +} + +type loginQuery struct { + //RateLimit rateLimit `graphql:"rateLimit(dryRun: $dryRun)"` + Viewer struct { + Login string `graphql:"login"` + } `graphql:"viewer"` +} + +type issueQuery struct { + RateLimit rateLimit `graphql:"rateLimit(dryRun: $dryRun)"` + Repository struct { + Issues issueConnection `graphql:"issues(first: $issueFirst, after: $issueAfter, orderBy: {field: CREATED_AT, direction: ASC}, filterBy: {since: $issueSince})"` + } `graphql:"repository(owner: $owner, name: $name)"` +} + +func (q *issueQuery) rateLimit() rateLimit { + return q.RateLimit +} + +type issueEditQuery struct { + RateLimit rateLimit `graphql:"rateLimit(dryRun: $dryRun)"` + Node struct { + Typename githubv4.String `graphql:"__typename"` + Issue struct { + UserContentEdits userContentEditConnection `graphql:"userContentEdits(last: $issueEditLast, before: $issueEditBefore)"` + } `graphql:"... on Issue"` + } `graphql:"node(id: $gqlNodeId)"` +} + +func (q *issueEditQuery) rateLimit() rateLimit { + return q.RateLimit +} + +type timelineQuery struct { + RateLimit rateLimit `graphql:"rateLimit(dryRun: $dryRun)"` + Node struct { + Typename githubv4.String `graphql:"__typename"` + Issue struct { + TimelineItems timelineItemsConnection `graphql:"timelineItems(first: $timelineFirst, after: $timelineAfter)"` + } `graphql:"... on Issue"` + } `graphql:"node(id: $gqlNodeId)"` +} + +func (q *timelineQuery) rateLimit() rateLimit { + return q.RateLimit +} + +type commentEditQuery struct { + RateLimit rateLimit `graphql:"rateLimit(dryRun: $dryRun)"` + Node struct { + Typename githubv4.String `graphql:"__typename"` + IssueComment struct { + UserContentEdits userContentEditConnection `graphql:"userContentEdits(last: $commentEditLast, before: $commentEditBefore)"` + } `graphql:"... on IssueComment"` + } `graphql:"node(id: $gqlNodeId)"` +} + +func (q *commentEditQuery) rateLimit() rateLimit { + return q.RateLimit +} + +type user struct { Login githubv4.String AvatarUrl githubv4.String - User struct { - Name *githubv4.String - Email githubv4.String - } `graphql:"... on User"` - Organization struct { - Name *githubv4.String - Email *githubv4.String - } `graphql:"... on Organization"` + Name *githubv4.String } -type actorEvent struct { - Id githubv4.ID - CreatedAt githubv4.DateTime - Actor *actor +type issueConnection struct { + Nodes []issueNode + PageInfo pageInfo } -type authorEvent struct { - Id githubv4.ID - CreatedAt githubv4.DateTime - Author *actor +type issueNode struct { + issue + UserContentEdits userContentEditConnection `graphql:"userContentEdits(last: $issueEditLast, before: $issueEditBefore)"` + TimelineItems timelineItemsConnection `graphql:"timelineItems(first: $timelineFirst, after: $timelineAfter)"` +} + +type issue struct { + authorEvent + Title githubv4.String + Number githubv4.Int + Body githubv4.String + Url githubv4.URI +} + +type timelineItemsConnection struct { + Nodes []timelineItem + PageInfo pageInfo +} + +type userContentEditConnection struct { + Nodes []userContentEdit + PageInfo pageInfo } type userContentEdit struct { @@ -46,12 +145,6 @@ type userContentEdit struct { Diff *githubv4.String } -type issueComment struct { - authorEvent // NOTE: contains Id - Body githubv4.String - Url githubv4.URI -} - type timelineItem struct { Typename githubv4.String `graphql:"__typename"` @@ -91,84 +184,43 @@ type timelineItem struct { } `graphql:"... on RenamedTitleEvent"` } -type ghostQuery struct { - User struct { - Login githubv4.String - AvatarUrl githubv4.String - Name *githubv4.String - } `graphql:"user(login: $login)"` -} - -type labelsQuery struct { - Repository struct { - Labels struct { - Nodes []struct { - ID string `graphql:"id"` - Name string `graphql:"name"` - Color string `graphql:"color"` - Description string `graphql:"description"` - } - PageInfo pageInfo - } `graphql:"labels(first: $first, after: $after)"` - } `graphql:"repository(owner: $owner, name: $name)"` -} - -type loginQuery struct { - Viewer struct { - Login string `graphql:"login"` - } `graphql:"viewer"` -} +type issueComment struct { + authorEvent // NOTE: contains Id + Body githubv4.String + Url githubv4.URI -type issueQuery struct { - Repository struct { - Issues struct { - Nodes []issue - PageInfo pageInfo - } `graphql:"issues(first: $issueFirst, after: $issueAfter, orderBy: {field: CREATED_AT, direction: ASC}, filterBy: {since: $issueSince})"` - } `graphql:"repository(owner: $owner, name: $name)"` + UserContentEdits userContentEditConnection `graphql:"userContentEdits(last: $commentEditLast, before: $commentEditBefore)"` } -type issue struct { - authorEvent - Title string - Number githubv4.Int - Body githubv4.String - Url githubv4.URI +type actor struct { + Typename githubv4.String `graphql:"__typename"` + Login githubv4.String + AvatarUrl githubv4.String + User struct { + Name *githubv4.String + Email githubv4.String + } `graphql:"... on User"` + Organization struct { + Name *githubv4.String + Email *githubv4.String + } `graphql:"... on Organization"` } -type issueEditQuery struct { - Node struct { - Typename githubv4.String `graphql:"__typename"` - Issue struct { - UserContentEdits struct { - Nodes []userContentEdit - TotalCount githubv4.Int - PageInfo pageInfo - } `graphql:"userContentEdits(last: $issueEditLast, before: $issueEditBefore)"` - } `graphql:"... on Issue"` - } `graphql:"node(id: $gqlNodeId)"` +type actorEvent struct { + Id githubv4.ID + CreatedAt githubv4.DateTime + Actor *actor } -type timelineQuery struct { - Node struct { - Typename githubv4.String `graphql:"__typename"` - Issue struct { - TimelineItems struct { - Nodes []timelineItem - PageInfo pageInfo - } `graphql:"timelineItems(first: $timelineFirst, after: $timelineAfter)"` - } `graphql:"... on Issue"` - } `graphql:"node(id: $gqlNodeId)"` +type authorEvent struct { + Id githubv4.ID + CreatedAt githubv4.DateTime + Author *actor } -type commentEditQuery struct { - Node struct { - Typename githubv4.String `graphql:"__typename"` - IssueComment struct { - UserContentEdits struct { - Nodes []userContentEdit - PageInfo pageInfo - } `graphql:"userContentEdits(last: $commentEditLast, before: $commentEditBefore)"` - } `graphql:"... on IssueComment"` - } `graphql:"node(id: $gqlNodeId)"` +type pageInfo struct { + EndCursor githubv4.String + HasNextPage bool + StartCursor githubv4.String + HasPreviousPage bool } diff --git a/bridge/github/iterator.go b/bridge/github/iterator.go deleted file mode 100644 index d21faae8..00000000 --- a/bridge/github/iterator.go +++ /dev/null @@ -1,423 +0,0 @@ -package github - -import ( - "context" - "time" - - "github.com/pkg/errors" - "github.com/shurcooL/githubv4" -) - -type iterator struct { - // Github graphql client - gc *githubv4.Client - - // The iterator will only query issues updated or created after the date given in - // the variable since. - since time.Time - - // Shared context, which is used for all graphql queries. - ctx context.Context - - // Sticky error - err error - - // Issue iterator - issueIter issueIter -} - -type issueIter struct { - iterVars - query issueQuery - issueEditIter []issueEditIter - timelineIter []timelineIter -} - -type issueEditIter struct { - iterVars - query issueEditQuery -} - -type timelineIter struct { - iterVars - query timelineQuery - commentEditIter []commentEditIter -} - -type commentEditIter struct { - iterVars - query commentEditQuery -} - -type iterVars struct { - // Iterator index - index int - - // capacity is the number of elements (issues, issue edits, timeline items, or - // comment edits) to query at a time. More capacity = more used memory = - // less queries to make. - capacity int - - // Variable assignments for graphql query - variables varmap -} - -type varmap map[string]interface{} - -func newIterVars(capacity int) iterVars { - return iterVars{ - index: -1, - capacity: capacity, - variables: varmap{}, - } -} - -// NewIterator creates and initialize a new iterator. -func NewIterator(ctx context.Context, client *githubv4.Client, capacity int, owner, project string, since time.Time) *iterator { - i := &iterator{ - gc: client, - since: since, - ctx: ctx, - issueIter: issueIter{ - iterVars: newIterVars(capacity), - timelineIter: make([]timelineIter, capacity), - issueEditIter: make([]issueEditIter, capacity), - }, - } - i.issueIter.variables.setOwnerProject(owner, project) - for idx := range i.issueIter.issueEditIter { - ie := &i.issueIter.issueEditIter[idx] - ie.iterVars = newIterVars(capacity) - } - for i1 := range i.issueIter.timelineIter { - tli := &i.issueIter.timelineIter[i1] - tli.iterVars = newIterVars(capacity) - tli.commentEditIter = make([]commentEditIter, capacity) - for i2 := range tli.commentEditIter { - cei := &tli.commentEditIter[i2] - cei.iterVars = newIterVars(capacity) - } - } - i.resetIssueVars() - return i -} - -func (v *varmap) setOwnerProject(owner, project string) { - (*v)["owner"] = githubv4.String(owner) - (*v)["name"] = githubv4.String(project) -} - -func (i *iterator) resetIssueVars() { - vars := &i.issueIter.variables - (*vars)["issueFirst"] = githubv4.Int(i.issueIter.capacity) - (*vars)["issueAfter"] = (*githubv4.String)(nil) - (*vars)["issueSince"] = githubv4.DateTime{Time: i.since} - i.issueIter.query.Repository.Issues.PageInfo.HasNextPage = true - i.issueIter.query.Repository.Issues.PageInfo.EndCursor = "" -} - -func (i *iterator) resetIssueEditVars() { - for idx := range i.issueIter.issueEditIter { - ie := &i.issueIter.issueEditIter[idx] - ie.variables["issueEditLast"] = githubv4.Int(ie.capacity) - ie.variables["issueEditBefore"] = (*githubv4.String)(nil) - ie.query.Node.Issue.UserContentEdits.PageInfo.HasNextPage = true - ie.query.Node.Issue.UserContentEdits.PageInfo.EndCursor = "" - } -} - -func (i *iterator) resetTimelineVars() { - for idx := range i.issueIter.timelineIter { - ip := &i.issueIter.timelineIter[idx] - ip.variables["timelineFirst"] = githubv4.Int(ip.capacity) - ip.variables["timelineAfter"] = (*githubv4.String)(nil) - ip.query.Node.Issue.TimelineItems.PageInfo.HasNextPage = true - ip.query.Node.Issue.TimelineItems.PageInfo.EndCursor = "" - } -} - -func (i *iterator) resetCommentEditVars() { - for i1 := range i.issueIter.timelineIter { - for i2 := range i.issueIter.timelineIter[i1].commentEditIter { - ce := &i.issueIter.timelineIter[i1].commentEditIter[i2] - ce.variables["commentEditLast"] = githubv4.Int(ce.capacity) - ce.variables["commentEditBefore"] = (*githubv4.String)(nil) - ce.query.Node.IssueComment.UserContentEdits.PageInfo.HasNextPage = true - ce.query.Node.IssueComment.UserContentEdits.PageInfo.EndCursor = "" - } - } -} - -// Error return last encountered error -func (i *iterator) Error() error { - if i.err != nil { - return i.err - } - return i.ctx.Err() // might return nil -} - -func (i *iterator) HasError() bool { - return i.err != nil || i.ctx.Err() != nil -} - -func (i *iterator) currIssueItem() *issue { - return &i.issueIter.query.Repository.Issues.Nodes[i.issueIter.index] -} - -func (i *iterator) currIssueEditIter() *issueEditIter { - return &i.issueIter.issueEditIter[i.issueIter.index] -} - -func (i *iterator) currTimelineIter() *timelineIter { - return &i.issueIter.timelineIter[i.issueIter.index] -} - -func (i *iterator) currCommentEditIter() *commentEditIter { - timelineIter := i.currTimelineIter() - return &timelineIter.commentEditIter[timelineIter.index] -} - -func (i *iterator) currIssueGqlNodeId() githubv4.ID { - return i.currIssueItem().Id -} - -// NextIssue returns true if there exists a next issue and advances the iterator by one. -// It is used to iterate over all issues. Queries to github are made when necessary. -func (i *iterator) NextIssue() bool { - if i.HasError() { - return false - } - index := &i.issueIter.index - issues := &i.issueIter.query.Repository.Issues - issueItems := &issues.Nodes - if 0 <= *index && *index < len(*issueItems)-1 { - *index += 1 - return true - } - - if !issues.PageInfo.HasNextPage { - return false - } - nextIssue := i.queryIssue() - return nextIssue -} - -// IssueValue returns the actual issue value. -func (i *iterator) IssueValue() issue { - return *i.currIssueItem() -} - -func (i *iterator) queryIssue() bool { - ctx, cancel := context.WithTimeout(i.ctx, defaultTimeout) - defer cancel() - if endCursor := i.issueIter.query.Repository.Issues.PageInfo.EndCursor; endCursor != "" { - i.issueIter.variables["issueAfter"] = endCursor - } - if err := i.gc.Query(ctx, &i.issueIter.query, i.issueIter.variables); err != nil { - i.err = err - return false - } - i.resetIssueEditVars() - i.resetTimelineVars() - issueItems := &i.issueIter.query.Repository.Issues.Nodes - if len(*issueItems) <= 0 { - i.issueIter.index = -1 - return false - } - i.issueIter.index = 0 - return true -} - -// NextIssueEdit returns true if there exists a next issue edit and advances the iterator -// by one. It is used to iterate over all the issue edits. Queries to github are made when -// necessary. -func (i *iterator) NextIssueEdit() bool { - if i.HasError() { - return false - } - ieIter := i.currIssueEditIter() - ieIdx := &ieIter.index - ieItems := ieIter.query.Node.Issue.UserContentEdits - if 0 <= *ieIdx && *ieIdx < len(ieItems.Nodes)-1 { - *ieIdx += 1 - return i.nextValidIssueEdit() - } - if !ieItems.PageInfo.HasNextPage { - return false - } - querySucc := i.queryIssueEdit() - if !querySucc { - return false - } - return i.nextValidIssueEdit() -} - -func (i *iterator) nextValidIssueEdit() bool { - // issueEdit.Diff == nil happen if the event is older than early 2018, Github doesn't have - // the data before that. Best we can do is to ignore the event. - if issueEdit := i.IssueEditValue(); issueEdit.Diff == nil || string(*issueEdit.Diff) == "" { - return i.NextIssueEdit() - } - return true -} - -// IssueEditValue returns the actual issue edit value. -func (i *iterator) IssueEditValue() userContentEdit { - iei := i.currIssueEditIter() - return iei.query.Node.Issue.UserContentEdits.Nodes[iei.index] -} - -func (i *iterator) queryIssueEdit() bool { - ctx, cancel := context.WithTimeout(i.ctx, defaultTimeout) - defer cancel() - iei := i.currIssueEditIter() - if endCursor := iei.query.Node.Issue.UserContentEdits.PageInfo.EndCursor; endCursor != "" { - iei.variables["issueEditBefore"] = endCursor - } - iei.variables["gqlNodeId"] = i.currIssueGqlNodeId() - if err := i.gc.Query(ctx, &iei.query, iei.variables); err != nil { - i.err = err - return false - } - issueEditItems := iei.query.Node.Issue.UserContentEdits.Nodes - if len(issueEditItems) <= 0 { - iei.index = -1 - return false - } - // The UserContentEditConnection in the Github API serves its elements in reverse chronological - // order. For our purpose we have to reverse the edits. - reverseEdits(issueEditItems) - iei.index = 0 - return true -} - -// NextTimelineItem returns true if there exists a next timeline item and advances the iterator -// by one. It is used to iterate over all the timeline items. Queries to github are made when -// necessary. -func (i *iterator) NextTimelineItem() bool { - if i.HasError() { - return false - } - tlIter := &i.issueIter.timelineIter[i.issueIter.index] - tlIdx := &tlIter.index - tlItems := tlIter.query.Node.Issue.TimelineItems - if 0 <= *tlIdx && *tlIdx < len(tlItems.Nodes)-1 { - *tlIdx += 1 - return true - } - if !tlItems.PageInfo.HasNextPage { - return false - } - nextTlItem := i.queryTimeline() - return nextTlItem -} - -// TimelineItemValue returns the actual timeline item value. -func (i *iterator) TimelineItemValue() timelineItem { - tli := i.currTimelineIter() - return tli.query.Node.Issue.TimelineItems.Nodes[tli.index] -} - -func (i *iterator) queryTimeline() bool { - ctx, cancel := context.WithTimeout(i.ctx, defaultTimeout) - defer cancel() - tli := i.currTimelineIter() - if endCursor := tli.query.Node.Issue.TimelineItems.PageInfo.EndCursor; endCursor != "" { - tli.variables["timelineAfter"] = endCursor - } - tli.variables["gqlNodeId"] = i.currIssueGqlNodeId() - if err := i.gc.Query(ctx, &tli.query, tli.variables); err != nil { - i.err = err - return false - } - i.resetCommentEditVars() - timelineItems := &tli.query.Node.Issue.TimelineItems - if len(timelineItems.Nodes) <= 0 { - tli.index = -1 - return false - } - tli.index = 0 - return true -} - -// NextCommentEdit returns true if there exists a next comment edit and advances the iterator -// by one. It is used to iterate over all issue edits. Queries to github are made when -// necessary. -func (i *iterator) NextCommentEdit() bool { - if i.HasError() { - return false - } - - tmlnVal := i.TimelineItemValue() - if tmlnVal.Typename != "IssueComment" { - // The timeline iterator does not point to a comment. - i.err = errors.New("Call to NextCommentEdit() while timeline item is not a comment") - return false - } - - cei := i.currCommentEditIter() - ceIdx := &cei.index - ceItems := &cei.query.Node.IssueComment.UserContentEdits - if 0 <= *ceIdx && *ceIdx < len(ceItems.Nodes)-1 { - *ceIdx += 1 - return i.nextValidCommentEdit() - } - if !ceItems.PageInfo.HasNextPage { - return false - } - querySucc := i.queryCommentEdit() - if !querySucc { - return false - } - return i.nextValidCommentEdit() -} - -func (i *iterator) nextValidCommentEdit() bool { - // if comment edit diff is a nil pointer or points to an empty string look for next value - if commentEdit := i.CommentEditValue(); commentEdit.Diff == nil || string(*commentEdit.Diff) == "" { - return i.NextCommentEdit() - } - return true -} - -// CommentEditValue returns the actual comment edit value. -func (i *iterator) CommentEditValue() userContentEdit { - cei := i.currCommentEditIter() - return cei.query.Node.IssueComment.UserContentEdits.Nodes[cei.index] -} - -func (i *iterator) queryCommentEdit() bool { - ctx, cancel := context.WithTimeout(i.ctx, defaultTimeout) - defer cancel() - cei := i.currCommentEditIter() - - if endCursor := cei.query.Node.IssueComment.UserContentEdits.PageInfo.EndCursor; endCursor != "" { - cei.variables["commentEditBefore"] = endCursor - } - tmlnVal := i.TimelineItemValue() - if tmlnVal.Typename != "IssueComment" { - i.err = errors.New("Call to queryCommentEdit() while timeline item is not a comment") - return false - } - cei.variables["gqlNodeId"] = tmlnVal.IssueComment.Id - if err := i.gc.Query(ctx, &cei.query, cei.variables); err != nil { - i.err = err - return false - } - ceItems := cei.query.Node.IssueComment.UserContentEdits.Nodes - if len(ceItems) <= 0 { - cei.index = -1 - return false - } - // The UserContentEditConnection in the Github API serves its elements in reverse chronological - // order. For our purpose we have to reverse the edits. - reverseEdits(ceItems) - cei.index = 0 - return true -} - -func reverseEdits(edits []userContentEdit) { - for i, j := 0, len(edits)-1; i < j; i, j = i+1, j-1 { - edits[i], edits[j] = edits[j], edits[i] - } -} diff --git a/bridge/gitlab/import.go b/bridge/gitlab/import.go index 7939f4e4..7dc99056 100644 --- a/bridge/gitlab/import.go +++ b/bridge/gitlab/import.go @@ -33,32 +33,29 @@ type gitlabImporter struct { func (gi *gitlabImporter) Init(_ context.Context, repo *cache.RepoCache, conf core.Configuration) error { gi.conf = conf + return nil +} +// ImportAll iterate over all the configured repository issues (notes) and ensure the creation +// of the missing issues / comments / label events / title changes ... +func (gi *gitlabImporter) ImportAll(ctx context.Context, repo *cache.RepoCache, since time.Time) (<-chan core.ImportResult, error) { creds, err := auth.List(repo, auth.WithTarget(target), auth.WithKind(auth.KindToken), - auth.WithMeta(auth.MetaKeyBaseURL, conf[confKeyGitlabBaseUrl]), - auth.WithMeta(auth.MetaKeyLogin, conf[confKeyDefaultLogin]), + auth.WithMeta(auth.MetaKeyBaseURL, gi.conf[confKeyGitlabBaseUrl]), + auth.WithMeta(auth.MetaKeyLogin, gi.conf[confKeyDefaultLogin]), ) if err != nil { - return err + return nil, err } - if len(creds) == 0 { - return ErrMissingIdentityToken + return nil, ErrMissingIdentityToken } - - gi.client, err = buildClient(conf[confKeyGitlabBaseUrl], creds[0].(*auth.Token)) + gi.client, err = buildClient(gi.conf[confKeyGitlabBaseUrl], creds[0].(*auth.Token)) if err != nil { - return err + return nil, err } - return nil -} - -// ImportAll iterate over all the configured repository issues (notes) and ensure the creation -// of the missing issues / comments / label events / title changes ... -func (gi *gitlabImporter) ImportAll(ctx context.Context, repo *cache.RepoCache, since time.Time) (<-chan core.ImportResult, error) { gi.iterator = iterator.NewIterator(ctx, gi.client, 10, gi.conf[confKeyProjectID], since) out := make(chan core.ImportResult) gi.out = out diff --git a/bridge/jira/import.go b/bridge/jira/import.go index 00148bb6..f9daadd8 100644 --- a/bridge/jira/import.go +++ b/bridge/jira/import.go @@ -34,6 +34,12 @@ type jiraImporter struct { // Init . func (ji *jiraImporter) Init(ctx context.Context, repo *cache.RepoCache, conf core.Configuration) error { ji.conf = conf + return nil +} + +// ImportAll iterate over all the configured repository issues and ensure the +// creation of the missing issues / timeline items / edits / label events ... +func (ji *jiraImporter) ImportAll(ctx context.Context, repo *cache.RepoCache, since time.Time) (<-chan core.ImportResult, error) { var cred auth.Credential @@ -41,44 +47,40 @@ func (ji *jiraImporter) Init(ctx context.Context, repo *cache.RepoCache, conf co creds, err := auth.List(repo, auth.WithTarget(target), auth.WithKind(auth.KindLoginPassword), - auth.WithMeta(auth.MetaKeyBaseURL, conf[confKeyBaseUrl]), - auth.WithMeta(auth.MetaKeyLogin, conf[confKeyDefaultLogin]), - ) - if err != nil { - return err - } - if len(creds) > 0 { - cred = creds[0] - goto end - } - - creds, err = auth.List(repo, - auth.WithTarget(target), - auth.WithKind(auth.KindLogin), - auth.WithMeta(auth.MetaKeyBaseURL, conf[confKeyBaseUrl]), - auth.WithMeta(auth.MetaKeyLogin, conf[confKeyDefaultLogin]), + auth.WithMeta(auth.MetaKeyBaseURL, ji.conf[confKeyBaseUrl]), + auth.WithMeta(auth.MetaKeyLogin, ji.conf[confKeyDefaultLogin]), ) if err != nil { - return err + return nil, err } if len(creds) > 0 { cred = creds[0] + } else { + creds, err = auth.List(repo, + auth.WithTarget(target), + auth.WithKind(auth.KindLogin), + auth.WithMeta(auth.MetaKeyBaseURL, ji.conf[confKeyBaseUrl]), + auth.WithMeta(auth.MetaKeyLogin, ji.conf[confKeyDefaultLogin]), + ) + if err != nil { + return nil, err + } + if len(creds) > 0 { + cred = creds[0] + } } -end: if cred == nil { - return fmt.Errorf("no credential for this bridge") + return nil, fmt.Errorf("no credential for this bridge") } // TODO(josh)[da52062]: Validate token and if it is expired then prompt for // credentials and generate a new one - ji.client, err = buildClient(ctx, conf[confKeyBaseUrl], conf[confKeyCredentialType], cred) - return err -} + ji.client, err = buildClient(ctx, ji.conf[confKeyBaseUrl], ji.conf[confKeyCredentialType], cred) + if err != nil { + return nil, err + } -// ImportAll iterate over all the configured repository issues and ensure the -// creation of the missing issues / timeline items / edits / label events ... -func (ji *jiraImporter) ImportAll(ctx context.Context, repo *cache.RepoCache, since time.Time) (<-chan core.ImportResult, error) { sinceStr := since.Format("2006-01-02 15:04") project := ji.conf[confKeyProject] |