diff options
-rw-r--r-- | bridge/github/import.go | 95 | ||||
-rw-r--r-- | bridge/github/import_test.go | 2 | ||||
-rw-r--r-- | bridge/github/iterator.go | 75 |
3 files changed, 100 insertions, 72 deletions
diff --git a/bridge/github/import.go b/bridge/github/import.go index 0c5468d8..2b9e5561 100644 --- a/bridge/github/import.go +++ b/bridge/github/import.go @@ -23,6 +23,15 @@ const ( // githubImporter implement the Importer interface type githubImporter struct { conf core.Configuration + + // iterator + iterator *iterator + + // number of imported issues + importedIssues int + + // number of imported identities + importedIdentities int } func (gi *githubImporter) Init(conf core.Configuration) error { @@ -30,54 +39,26 @@ func (gi *githubImporter) Init(conf core.Configuration) error { return nil } -// ImportAll . +// ImportAll iterate over all the configured repository issues and ensure the creation of the +// missing issues / timeline items / edits / label events ... func (gi *githubImporter) ImportAll(repo *cache.RepoCache, since time.Time) error { - iterator := NewIterator(gi.conf[keyUser], gi.conf[keyProject], gi.conf[keyToken], since) + gi.iterator = NewIterator(gi.conf[keyUser], gi.conf[keyProject], gi.conf[keyToken], since) // Loop over all matching issues - for iterator.NextIssue() { - issue := iterator.IssueValue() - - fmt.Printf("importing issue: %v %v\n", iterator.importedIssues, issue.Title) - // get issue edits - issueEdits := []userContentEdit{} - for iterator.NextIssueEdit() { - // issueEdit.Diff == nil happen if the event is older than early 2018, Github doesn't have the data before that. - // Best we can do is to ignore the event. - if issueEdit := iterator.IssueEditValue(); issueEdit.Diff != nil && string(*issueEdit.Diff) != "" { - issueEdits = append(issueEdits, issueEdit) - } - } + for gi.iterator.NextIssue() { + issue := gi.iterator.IssueValue() + fmt.Printf("importing issue: %v\n", issue.Title) // create issue - b, err := gi.ensureIssue(repo, issue, issueEdits) + b, err := gi.ensureIssue(repo, issue) if err != nil { return fmt.Errorf("issue creation: %v", err) } // loop over timeline items - for iterator.NextTimeline() { - item := iterator.TimelineValue() - - // if item is comment - if item.Typename == "IssueComment" { - // collect all edits - commentEdits := []userContentEdit{} - for iterator.NextCommentEdit() { - if commentEdit := iterator.CommentEditValue(); commentEdit.Diff != nil && string(*commentEdit.Diff) != "" { - commentEdits = append(commentEdits, commentEdit) - } - } - - err := gi.ensureTimelineComment(repo, b, item.IssueComment, commentEdits) - if err != nil { - return fmt.Errorf("timeline comment creation: %v", err) - } - - } else { - if err := gi.ensureTimelineItem(repo, b, item); err != nil { - return fmt.Errorf("timeline event creation: %v", err) - } + for gi.iterator.NextTimelineItem() { + if err := gi.ensureTimelineItem(repo, b, gi.iterator.TimelineItemValue()); err != nil { + return fmt.Errorf("timeline item creation: %v", err) } } @@ -87,16 +68,16 @@ func (gi *githubImporter) ImportAll(repo *cache.RepoCache, since time.Time) erro } } - if err := iterator.Error(); err != nil { + if err := gi.iterator.Error(); err != nil { fmt.Printf("import error: %v\n", err) return err } - fmt.Printf("Successfully imported %v issues from Github\n", iterator.ImportedIssues()) + fmt.Printf("Successfully imported %d issues and %d identities from Github\n", gi.importedIssues, gi.importedIdentities) return nil } -func (gi *githubImporter) ensureIssue(repo *cache.RepoCache, issue issueTimeline, issueEdits []userContentEdit) (*cache.BugCache, error) { +func (gi *githubImporter) ensureIssue(repo *cache.RepoCache, issue issueTimeline) (*cache.BugCache, error) { // ensure issue author author, err := gi.ensurePerson(repo, issue.Author) if err != nil { @@ -109,6 +90,12 @@ func (gi *githubImporter) ensureIssue(repo *cache.RepoCache, issue issueTimeline return nil, err } + // get issue edits + issueEdits := []userContentEdit{} + for gi.iterator.NextIssueEdit() { + issueEdits = append(issueEdits, gi.iterator.IssueEditValue()) + } + // if issueEdits is empty if len(issueEdits) == 0 { if err == bug.ErrBugNotExist { @@ -131,6 +118,9 @@ func (gi *githubImporter) ensureIssue(repo *cache.RepoCache, issue issueTimeline if err != nil { return nil, err } + + // importing a new bug + gi.importedIssues++ } } else { @@ -165,6 +155,9 @@ func (gi *githubImporter) ensureIssue(repo *cache.RepoCache, issue issueTimeline return nil, err } + // importing a new bug + gi.importedIssues++ + continue } @@ -189,6 +182,16 @@ func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.Bug switch item.Typename { case "IssueComment": + // collect all comment edits + commentEdits := []userContentEdit{} + for gi.iterator.NextCommentEdit() { + commentEdits = append(commentEdits, gi.iterator.CommentEditValue()) + } + + err := gi.ensureTimelineComment(repo, b, item.IssueComment, commentEdits) + if err != nil { + return fmt.Errorf("timeline comment creation: %v", err) + } case "LabeledEvent": id := parseId(item.LabeledEvent.Id) @@ -455,6 +458,9 @@ func (gi *githubImporter) ensurePerson(repo *cache.RepoCache, actor *actor) (*ca return nil, err } + // importing a new identity + gi.importedIdentities++ + var name string var email string @@ -528,10 +534,3 @@ func (gi *githubImporter) getGhost(repo *cache.RepoCache) (*cache.IdentityCache, func parseId(id githubv4.ID) string { return fmt.Sprintf("%v", id) } - -func reverseEdits(edits []userContentEdit) []userContentEdit { - for i, j := 0, len(edits)-1; i < j; i, j = i+1, j-1 { - edits[i], edits[j] = edits[j], edits[i] - } - return edits -} diff --git a/bridge/github/import_test.go b/bridge/github/import_test.go index 48283b7a..7f83130c 100644 --- a/bridge/github/import_test.go +++ b/bridge/github/import_test.go @@ -152,7 +152,7 @@ func Test_Importer(t *testing.T) { fmt.Printf("test repository imported in %f seconds\n", time.Since(start).Seconds()) - require.Len(t, backend.AllBugsIds(), 9) + require.Len(t, backend.AllBugsIds(), len(tests)) for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { diff --git a/bridge/github/iterator.go b/bridge/github/iterator.go index 48e98f17..5935276a 100644 --- a/bridge/github/iterator.go +++ b/bridge/github/iterator.go @@ -49,9 +49,6 @@ type iterator struct { // sticky error err error - // number of imported issues - importedIssues int - // timeline iterator timeline timelineIterator @@ -62,8 +59,9 @@ type iterator struct { commentEdit commentEditIterator } +// NewIterator create and initalize a new iterator func NewIterator(user, project, token string, since time.Time) *iterator { - return &iterator{ + i := &iterator{ gc: buildClient(token), since: since, capacity: 10, @@ -91,6 +89,9 @@ func NewIterator(user, project, token string, since time.Time) *iterator { }, }, } + + i.initTimelineQueryVariables() + return i } // init issue timeline variables @@ -145,11 +146,6 @@ func (i *iterator) Error() error { return i.err } -// ImportedIssues return the number of issues we iterated over -func (i *iterator) ImportedIssues() int { - return i.importedIssues -} - func (i *iterator) queryIssue() bool { if err := i.gc.Query(context.TODO(), &i.timeline.query, i.timeline.variables); err != nil { i.err = err @@ -161,18 +157,18 @@ func (i *iterator) queryIssue() bool { } i.reverseTimelineEditNodes() - i.importedIssues++ return true } -// Next issue +// NextIssue try to query the next issue and return true. Only one issue is +// queried at each call. func (i *iterator) NextIssue() bool { - // we make the first move - if i.importedIssues == 0 { - - // init variables and goto queryIssue block - i.initTimelineQueryVariables() - return i.queryIssue() + // if $issueAfter variable is nil we can directly make the first query + if i.timeline.variables["issueAfter"] == (*githubv4.String)(nil) { + nextIssue := i.queryIssue() + // prevent from infinite loop by setting a non nil cursor + i.timeline.variables["issueAfter"] = i.timeline.query.Repository.Issues.PageInfo.EndCursor + return nextIssue } if i.err != nil { @@ -195,11 +191,14 @@ func (i *iterator) NextIssue() bool { return i.queryIssue() } +// IssueValue return the actual issue value func (i *iterator) IssueValue() issueTimeline { return i.timeline.query.Repository.Issues.Nodes[0] } -func (i *iterator) NextTimeline() bool { +// NextTimelineItem return true if there is a next timeline item and increments the index by one. +// It is used iterates over all the timeline items. Extra queries are made if it is necessary. +func (i *iterator) NextTimelineItem() bool { if i.err != nil { return false } @@ -231,7 +230,8 @@ func (i *iterator) NextTimeline() bool { return true } -func (i *iterator) TimelineValue() timelineItem { +// TimelineItemValue return the actual timeline item value +func (i *iterator) TimelineItemValue() timelineItem { return i.timeline.query.Repository.Issues.Nodes[0].Timeline.Edges[i.timeline.index].Node } @@ -253,9 +253,20 @@ func (i *iterator) queryIssueEdit() bool { i.issueEdit.index = 0 i.timeline.issueEdit.index = -2 + return i.nextValidIssueEdit() +} + +func (i *iterator) nextValidIssueEdit() bool { + // issueEdit.Diff == nil happen if the event is older than early 2018, Github doesn't have the data before that. + // Best we can do is to ignore the event. + if issueEdit := i.IssueEditValue(); issueEdit.Diff == nil || string(*issueEdit.Diff) == "" { + return i.NextIssueEdit() + } return true } +// NextIssueEdit return true if there is a next issue edit and increments the index by one. +// It is used iterates over all the issue edits. Extra queries are made if it is necessary. func (i *iterator) NextIssueEdit() bool { if i.err != nil { return false @@ -266,7 +277,7 @@ func (i *iterator) NextIssueEdit() bool { if i.timeline.issueEdit.index == -2 { if i.issueEdit.index < min(i.capacity, len(i.issueEdit.query.Repository.Issues.Nodes[0].UserContentEdits.Nodes))-1 { i.issueEdit.index++ - return true + return i.nextValidIssueEdit() } if !i.issueEdit.query.Repository.Issues.Nodes[0].UserContentEdits.PageInfo.HasPreviousPage { @@ -297,7 +308,7 @@ func (i *iterator) NextIssueEdit() bool { // loop over them timeline comment edits if i.timeline.issueEdit.index < min(i.capacity, len(i.timeline.query.Repository.Issues.Nodes[0].UserContentEdits.Nodes))-1 { i.timeline.issueEdit.index++ - return true + return i.nextValidIssueEdit() } if !i.timeline.query.Repository.Issues.Nodes[0].UserContentEdits.PageInfo.HasPreviousPage { @@ -311,6 +322,7 @@ func (i *iterator) NextIssueEdit() bool { return i.queryIssueEdit() } +// IssueEditValue return the actual issue edit value func (i *iterator) IssueEditValue() userContentEdit { // if we are using issue edit query if i.timeline.issueEdit.index == -2 { @@ -337,9 +349,19 @@ func (i *iterator) queryCommentEdit() bool { i.commentEdit.index = 0 i.timeline.commentEdit.index = -2 + return i.nextValidCommentEdit() +} + +func (i *iterator) nextValidCommentEdit() bool { + // if comment edit diff is a nil pointer or points to an empty string look for next value + if commentEdit := i.CommentEditValue(); commentEdit.Diff == nil || string(*commentEdit.Diff) == "" { + return i.NextCommentEdit() + } return true } +// NextCommentEdit return true if there is a next comment edit and increments the index by one. +// It is used iterates over all the comment edits. Extra queries are made if it is necessary. func (i *iterator) NextCommentEdit() bool { if i.err != nil { return false @@ -350,7 +372,7 @@ func (i *iterator) NextCommentEdit() bool { if i.commentEdit.index < min(i.capacity, len(i.commentEdit.query.Repository.Issues.Nodes[0].Timeline.Nodes[0].IssueComment.UserContentEdits.Nodes))-1 { i.commentEdit.index++ - return true + return i.nextValidCommentEdit() } if !i.commentEdit.query.Repository.Issues.Nodes[0].Timeline.Nodes[0].IssueComment.UserContentEdits.PageInfo.HasPreviousPage { @@ -372,7 +394,7 @@ func (i *iterator) NextCommentEdit() bool { // loop over them timeline comment edits if i.timeline.commentEdit.index < min(i.capacity, len(i.timeline.query.Repository.Issues.Nodes[0].Timeline.Edges[i.timeline.index].Node.IssueComment.UserContentEdits.Nodes))-1 { i.timeline.commentEdit.index++ - return true + return i.nextValidCommentEdit() } if !i.timeline.query.Repository.Issues.Nodes[0].Timeline.Edges[i.timeline.index].Node.IssueComment.UserContentEdits.PageInfo.HasPreviousPage { @@ -392,6 +414,7 @@ func (i *iterator) NextCommentEdit() bool { return i.queryCommentEdit() } +// CommentEditValue return the actual comment edit value func (i *iterator) CommentEditValue() userContentEdit { if i.timeline.commentEdit.index == -2 { return i.commentEdit.query.Repository.Issues.Nodes[0].Timeline.Nodes[0].IssueComment.UserContentEdits.Nodes[i.commentEdit.index] @@ -407,3 +430,9 @@ func min(a, b int) int { return a } + +func reverseEdits(edits []userContentEdit) { + for i, j := 0, len(edits)-1; i < j; i, j = i+1, j-1 { + edits[i], edits[j] = edits[j], edits[i] + } +} |