aboutsummaryrefslogtreecommitdiffstats
path: root/bridge/github
diff options
context:
space:
mode:
Diffstat (limited to 'bridge/github')
-rw-r--r--bridge/github/config.go2
-rw-r--r--bridge/github/export.go4
-rw-r--r--bridge/github/export_test.go2
-rw-r--r--bridge/github/import.go517
-rw-r--r--bridge/github/import_mediator.go436
-rw-r--r--bridge/github/import_query.go252
-rw-r--r--bridge/github/iterator.go423
7 files changed, 843 insertions, 793 deletions
diff --git a/bridge/github/config.go b/bridge/github/config.go
index 2b5af7fb..1e23c8ee 100644
--- a/bridge/github/config.go
+++ b/bridge/github/config.go
@@ -251,7 +251,7 @@ func promptUserToGoToBrowser(url, userCode string) {
fmt.Println("Please visit the following Github URL in a browser and enter your user authentication code.")
fmt.Println()
fmt.Println(" URL:", url)
- fmt.Println(" user authentiation code:", userCode)
+ fmt.Println(" user authentication code:", userCode)
fmt.Println()
}
diff --git a/bridge/github/export.go b/bridge/github/export.go
index 1a59fbb3..264f2a23 100644
--- a/bridge/github/export.go
+++ b/bridge/github/export.go
@@ -504,7 +504,7 @@ func (ge *githubExporter) cacheGithubLabels(ctx context.Context, gc *githubv4.Cl
return nil
}
-func (ge *githubExporter) getLabelID(gc *githubv4.Client, label string) (string, error) {
+func (ge *githubExporter) getLabelID(label string) (string, error) {
label = strings.ToLower(label)
for cachedLabel, ID := range ge.cachedLabels {
if label == strings.ToLower(cachedLabel) {
@@ -598,7 +598,7 @@ func (ge *githubExporter) createGithubLabelV4(gc *githubv4.Client, label, labelC
func (ge *githubExporter) getOrCreateGithubLabelID(ctx context.Context, gc *githubv4.Client, repositoryID string, label bug.Label) (string, error) {
// try to get label id from cache
- labelID, err := ge.getLabelID(gc, string(label))
+ labelID, err := ge.getLabelID(string(label))
if err == nil {
return labelID, nil
}
diff --git a/bridge/github/export_test.go b/bridge/github/export_test.go
index b7a36bcf..78425e60 100644
--- a/bridge/github/export_test.go
+++ b/bridge/github/export_test.go
@@ -268,7 +268,7 @@ func TestGithubPushPull(t *testing.T) {
require.True(t, ok)
require.Equal(t, issueOrigin, target)
- //TODO: maybe more tests to ensure bug final state
+ // TODO: maybe more tests to ensure bug final state
})
}
}
diff --git a/bridge/github/import.go b/bridge/github/import.go
index af62746f..bf43a877 100644
--- a/bridge/github/import.go
+++ b/bridge/github/import.go
@@ -15,6 +15,8 @@ import (
"github.com/MichaelMure/git-bug/util/text"
)
+const EmptyTitlePlaceholder = "<empty string>"
+
// githubImporter implement the Importer interface
type githubImporter struct {
conf core.Configuration
@@ -22,8 +24,8 @@ type githubImporter struct {
// default client
client *githubv4.Client
- // iterator
- iterator *iterator
+ // mediator to access the Github API
+ mediator *importMediator
// send only channel
out chan<- core.ImportResult
@@ -31,7 +33,6 @@ type githubImporter struct {
func (gi *githubImporter) Init(_ context.Context, repo *cache.RepoCache, conf core.Configuration) error {
gi.conf = conf
-
creds, err := auth.List(repo,
auth.WithTarget(target),
auth.WithKind(auth.KindToken),
@@ -40,11 +41,9 @@ func (gi *githubImporter) Init(_ context.Context, repo *cache.RepoCache, conf co
if err != nil {
return err
}
-
- if len(creds) == 0 {
+ if len(creds) <= 0 {
return ErrMissingIdentityToken
}
-
gi.client = buildClient(creds[0].(*auth.Token))
return nil
@@ -53,46 +52,95 @@ func (gi *githubImporter) Init(_ context.Context, repo *cache.RepoCache, conf co
// ImportAll iterate over all the configured repository issues and ensure the creation of the
// missing issues / timeline items / edits / label events ...
func (gi *githubImporter) ImportAll(ctx context.Context, repo *cache.RepoCache, since time.Time) (<-chan core.ImportResult, error) {
- gi.iterator = NewIterator(ctx, gi.client, 10, gi.conf[confKeyOwner], gi.conf[confKeyProject], since)
+ gi.mediator = NewImportMediator(ctx, gi.client, gi.conf[confKeyOwner], gi.conf[confKeyProject], since)
out := make(chan core.ImportResult)
gi.out = out
go func() {
defer close(gi.out)
-
- // Loop over all matching issues
- for gi.iterator.NextIssue() {
- issue := gi.iterator.IssueValue()
- // create issue
- b, err := gi.ensureIssue(repo, issue)
- if err != nil {
- err := fmt.Errorf("issue creation: %v", err)
- out <- core.NewImportError(err, "")
- return
+ var currBug *cache.BugCache
+ var currEvent ImportEvent
+ var nextEvent ImportEvent
+ var err error
+ for {
+ // An IssueEvent contains the issue in its most recent state. If an issue
+ // has at least one issue edit, then the history of the issue edits is
+ // represented by IssueEditEvents. That is, the unedited (original) issue
+ // might be saved only in the IssueEditEvent following the IssueEvent.
+ // Since we replicate the edit history we need to either use the IssueEvent
+ // (if there are no edits) or the IssueEvent together with its first
+ // IssueEditEvent (if there are edits).
+ // Exactly the same is true for comments and comment edits.
+ // As a consequence we need to look at the current event and one look ahead
+ // event.
+ currEvent = nextEvent
+ if currEvent == nil {
+ currEvent = gi.getEventHandleMsgs()
}
-
- // loop over timeline items
- for gi.iterator.NextTimelineItem() {
- item := gi.iterator.TimelineItemValue()
- err := gi.ensureTimelineItem(repo, b, item)
+ if currEvent == nil {
+ break
+ }
+ nextEvent = gi.getEventHandleMsgs()
+
+ switch event := currEvent.(type) {
+ case RateLimitingEvent:
+ out <- core.NewImportRateLimiting(event.msg)
+ case IssueEvent:
+ // first: commit what is being held in currBug
+ if err = gi.commit(currBug, out); err != nil {
+ out <- core.NewImportError(err, "")
+ return
+ }
+ // second: create new issue
+ switch next := nextEvent.(type) {
+ case IssueEditEvent:
+ // consuming and using next event
+ nextEvent = nil
+ currBug, err = gi.ensureIssue(ctx, repo, &event.issue, &next.userContentEdit)
+ default:
+ currBug, err = gi.ensureIssue(ctx, repo, &event.issue, nil)
+ }
+ if err != nil {
+ err := fmt.Errorf("issue creation: %v", err)
+ out <- core.NewImportError(err, "")
+ return
+ }
+ case IssueEditEvent:
+ err = gi.ensureIssueEdit(ctx, repo, currBug, event.issueId, &event.userContentEdit)
+ if err != nil {
+ err = fmt.Errorf("issue edit: %v", err)
+ out <- core.NewImportError(err, "")
+ return
+ }
+ case TimelineEvent:
+ if next, ok := nextEvent.(CommentEditEvent); ok && event.Typename == "IssueComment" {
+ // consuming and using next event
+ nextEvent = nil
+ err = gi.ensureComment(ctx, repo, currBug, &event.timelineItem.IssueComment, &next.userContentEdit)
+ } else {
+ err = gi.ensureTimelineItem(ctx, repo, currBug, &event.timelineItem)
+ }
if err != nil {
err = fmt.Errorf("timeline item creation: %v", err)
out <- core.NewImportError(err, "")
return
}
- }
-
- if !b.NeedCommit() {
- out <- core.NewImportNothing(b.Id(), "no imported operation")
- } else if err := b.Commit(); err != nil {
- // commit bug state
- err = fmt.Errorf("bug commit: %v", err)
- out <- core.NewImportError(err, "")
- return
+ case CommentEditEvent:
+ err = gi.ensureCommentEdit(ctx, repo, currBug, event.commentId, &event.userContentEdit)
+ if err != nil {
+ err = fmt.Errorf("comment edit: %v", err)
+ out <- core.NewImportError(err, "")
+ return
+ }
+ default:
+ panic("Unknown event type")
}
}
-
- if err := gi.iterator.Error(); err != nil {
+ // commit what is being held in currBug before returning
+ if err = gi.commit(currBug, out); err != nil {
+ out <- core.NewImportError(err, "")
+ }
+ if err = gi.mediator.Error(); err != nil {
gi.out <- core.NewImportError(err, "")
}
}()
@@ -100,9 +148,35 @@ func (gi *githubImporter) ImportAll(ctx context.Context, repo *cache.RepoCache,
return out, nil
}
-func (gi *githubImporter) ensureIssue(repo *cache.RepoCache, issue issue) (*cache.BugCache, error) {
- // ensure issue author
- author, err := gi.ensurePerson(repo, issue.Author)
+func (gi *githubImporter) getEventHandleMsgs() ImportEvent {
+ for {
+ // read event from import mediator
+ event := gi.mediator.NextImportEvent()
+ // consume (and use) all rate limiting events
+ if e, ok := event.(RateLimitingEvent); ok {
+ gi.out <- core.NewImportRateLimiting(e.msg)
+ continue
+ }
+ return event
+ }
+}
+
+func (gi *githubImporter) commit(b *cache.BugCache, out chan<- core.ImportResult) error {
+ if b == nil {
+ return nil
+ }
+ if !b.NeedCommit() {
+ out <- core.NewImportNothing(b.Id(), "no imported operation")
+ return nil
+ } else if err := b.Commit(); err != nil {
+ // commit bug state
+ return fmt.Errorf("bug commit: %v", err)
+ }
+ return nil
+}
+
+func (gi *githubImporter) ensureIssue(ctx context.Context, repo *cache.RepoCache, issue *issue, issueEdit *userContentEdit) (*cache.BugCache, error) {
+ author, err := gi.ensurePerson(ctx, repo, issue.Author)
if err != nil {
return nil, err
}
@@ -112,113 +186,66 @@ func (gi *githubImporter) ensureIssue(repo *cache.RepoCache, issue issue) (*cach
return excerpt.CreateMetadata[core.MetaKeyOrigin] == target &&
excerpt.CreateMetadata[metaKeyGithubId] == parseId(issue.Id)
})
- if err != nil && err != bug.ErrBugNotExist {
+ if err == nil {
+ return b, nil
+ }
+ if err != bug.ErrBugNotExist {
return nil, err
}
- // get issue edits
- var issueEdits []userContentEdit
- for gi.iterator.NextIssueEdit() {
- issueEdits = append(issueEdits, gi.iterator.IssueEditValue())
+ // At Github there exist issues with seemingly empty titles. An example is
+ // https://github.com/NixOS/nixpkgs/issues/72730 .
+ // The title provided by the GraphQL API actually consists of a space followed by a
+ // zero width space (U+200B). This title would cause the NewBugRaw() function to
+ // return an error: empty title.
+ title := string(issue.Title)
+ if title == " \u200b" { // U+200B == zero width space
+ title = EmptyTitlePlaceholder
}
- // if issueEdits is empty
- if len(issueEdits) == 0 {
- if err == bug.ErrBugNotExist {
- cleanText, err := text.Cleanup(string(issue.Body))
- if err != nil {
- return nil, err
- }
-
- // create bug
- b, _, err = repo.NewBugRaw(
- author,
- issue.CreatedAt.Unix(),
- issue.Title,
- cleanText,
- nil,
- map[string]string{
- core.MetaKeyOrigin: target,
- metaKeyGithubId: parseId(issue.Id),
- metaKeyGithubUrl: issue.Url.String(),
- })
- if err != nil {
- return nil, err
- }
-
- // importing a new bug
- gi.out <- core.NewImportBug(b.Id())
- }
+ var textInput string
+ if issueEdit != nil {
+ // use the first issue edit: it represents the bug creation itself
+ textInput = string(*issueEdit.Diff)
} else {
- // create bug from given issueEdits
- for i, edit := range issueEdits {
- if i == 0 && b != nil {
- // The first edit in the github result is the issue creation itself, we already have that
- continue
- }
-
- cleanText, err := text.Cleanup(string(*edit.Diff))
- if err != nil {
- return nil, err
- }
-
- // if the bug doesn't exist
- if b == nil {
- // we create the bug as soon as we have a legit first edition
- b, _, err = repo.NewBugRaw(
- author,
- issue.CreatedAt.Unix(),
- issue.Title, // TODO: this is the *current* title, not the original one
- cleanText,
- nil,
- map[string]string{
- core.MetaKeyOrigin: target,
- metaKeyGithubId: parseId(issue.Id),
- metaKeyGithubUrl: issue.Url.String(),
- },
- )
-
- if err != nil {
- return nil, err
- }
- // importing a new bug
- gi.out <- core.NewImportBug(b.Id())
- continue
- }
-
- // other edits will be added as CommentEdit operations
- target, err := b.ResolveOperationWithMetadata(metaKeyGithubId, parseId(issue.Id))
- if err == cache.ErrNoMatchingOp {
- // original comment is missing somehow, issuing a warning
- gi.out <- core.NewImportWarning(fmt.Errorf("comment ID %s to edit is missing", parseId(issue.Id)), b.Id())
- continue
- }
- if err != nil {
- return nil, err
- }
+ // if there are no issue edits then the issue struct holds the bug creation
+ textInput = string(issue.Body)
+ }
+ cleanText, err := text.Cleanup(textInput)
+ if err != nil {
+ return nil, err
+ }
- err = gi.ensureCommentEdit(repo, b, target, edit)
- if err != nil {
- return nil, err
- }
- }
+ // create bug
+ b, _, err = repo.NewBugRaw(
+ author,
+ issue.CreatedAt.Unix(),
+ title, // TODO: this is the *current* title, not the original one
+ cleanText,
+ nil,
+ map[string]string{
+ core.MetaKeyOrigin: target,
+ metaKeyGithubId: parseId(issue.Id),
+ metaKeyGithubUrl: issue.Url.String(),
+ })
+ if err != nil {
+ return nil, err
}
+ // importing a new bug
+ gi.out <- core.NewImportBug(b.Id())
return b, nil
}
-func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.BugCache, item timelineItem) error {
+func (gi *githubImporter) ensureIssueEdit(ctx context.Context, repo *cache.RepoCache, bug *cache.BugCache, ghIssueId githubv4.ID, edit *userContentEdit) error {
+ return gi.ensureCommentEdit(ctx, repo, bug, ghIssueId, edit)
+}
+
+func (gi *githubImporter) ensureTimelineItem(ctx context.Context, repo *cache.RepoCache, b *cache.BugCache, item *timelineItem) error {
switch item.Typename {
case "IssueComment":
- // collect all comment edits
- var commentEdits []userContentEdit
- for gi.iterator.NextCommentEdit() {
- commentEdits = append(commentEdits, gi.iterator.CommentEditValue())
- }
-
- // ensureTimelineComment send import events over out chanel
- err := gi.ensureTimelineComment(repo, b, item.IssueComment, commentEdits)
+ err := gi.ensureComment(ctx, repo, b, &item.IssueComment, nil)
if err != nil {
return fmt.Errorf("timeline comment creation: %v", err)
}
@@ -234,7 +261,7 @@ func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.Bug
if err != cache.ErrNoMatchingOp {
return err
}
- author, err := gi.ensurePerson(repo, item.LabeledEvent.Actor)
+ author, err := gi.ensurePerson(ctx, repo, item.LabeledEvent.Actor)
if err != nil {
return err
}
@@ -263,7 +290,7 @@ func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.Bug
if err != cache.ErrNoMatchingOp {
return err
}
- author, err := gi.ensurePerson(repo, item.UnlabeledEvent.Actor)
+ author, err := gi.ensurePerson(ctx, repo, item.UnlabeledEvent.Actor)
if err != nil {
return err
}
@@ -293,7 +320,7 @@ func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.Bug
if err == nil {
return nil
}
- author, err := gi.ensurePerson(repo, item.ClosedEvent.Actor)
+ author, err := gi.ensurePerson(ctx, repo, item.ClosedEvent.Actor)
if err != nil {
return err
}
@@ -319,7 +346,7 @@ func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.Bug
if err == nil {
return nil
}
- author, err := gi.ensurePerson(repo, item.ReopenedEvent.Actor)
+ author, err := gi.ensurePerson(ctx, repo, item.ReopenedEvent.Actor)
if err != nil {
return err
}
@@ -345,14 +372,25 @@ func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.Bug
if err == nil {
return nil
}
- author, err := gi.ensurePerson(repo, item.RenamedTitleEvent.Actor)
+ author, err := gi.ensurePerson(ctx, repo, item.RenamedTitleEvent.Actor)
if err != nil {
return err
}
+
+ // At Github there exist issues with seemingly empty titles. An example is
+ // https://github.com/NixOS/nixpkgs/issues/72730 .
+ // The title provided by the GraphQL API actually consists of a space followed
+ // by a zero width space (U+200B). This title would cause the NewBugRaw()
+ // function to return an error: empty title.
+ title := string(item.RenamedTitleEvent.CurrentTitle)
+ if title == " \u200b" { // U+200B == zero width space
+ title = EmptyTitlePlaceholder
+ }
+
op, err := b.SetTitleRaw(
author,
item.RenamedTitleEvent.CreatedAt.Unix(),
- string(item.RenamedTitleEvent.CurrentTitle),
+ title,
map[string]string{metaKeyGithubId: id},
)
if err != nil {
@@ -366,97 +404,62 @@ func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.Bug
return nil
}
-func (gi *githubImporter) ensureTimelineComment(repo *cache.RepoCache, b *cache.BugCache, item issueComment, edits []userContentEdit) error {
- // ensure person
- author, err := gi.ensurePerson(repo, item.Author)
+func (gi *githubImporter) ensureCommentEdit(ctx context.Context, repo *cache.RepoCache, b *cache.BugCache, ghTargetId githubv4.ID, edit *userContentEdit) error {
+ // find comment
+ target, err := b.ResolveOperationWithMetadata(metaKeyGithubId, parseId(ghTargetId))
if err != nil {
return err
}
-
- targetOpID, err := b.ResolveOperationWithMetadata(metaKeyGithubId, parseId(item.Id))
- if err != nil && err != cache.ErrNoMatchingOp {
+ _, err = b.ResolveOperationWithMetadata(metaKeyGithubId, parseId(edit.Id))
+ if err == nil {
+ return nil
+ }
+ if err != cache.ErrNoMatchingOp {
// real error
return err
}
- // if no edits are given we create the comment
- if len(edits) == 0 {
- if err == cache.ErrNoMatchingOp {
- cleanText, err := text.Cleanup(string(item.Body))
- if err != nil {
- return err
- }
-
- // add comment operation
- op, err := b.AddCommentRaw(
- author,
- item.CreatedAt.Unix(),
- cleanText,
- nil,
- map[string]string{
- metaKeyGithubId: parseId(item.Id),
- metaKeyGithubUrl: parseId(item.Url.String()),
- },
- )
- if err != nil {
- return err
- }
-
- gi.out <- core.NewImportComment(op.Id())
- return nil
- }
-
- } else {
- for i, edit := range edits {
- if i == 0 && targetOpID != "" {
- // The first edit in the github result is the comment creation itself, we already have that
- continue
- }
-
- // ensure editor identity
- editor, err := gi.ensurePerson(repo, edit.Editor)
- if err != nil {
- return err
- }
+ editor, err := gi.ensurePerson(ctx, repo, edit.Editor)
+ if err != nil {
+ return err
+ }
- // create comment when target is empty
- if targetOpID == "" {
- cleanText, err := text.Cleanup(string(*edit.Diff))
- if err != nil {
- return err
- }
+ if edit.DeletedAt != nil {
+ // comment deletion, not supported yet
+ return nil
+ }
- op, err := b.AddCommentRaw(
- editor,
- edit.CreatedAt.Unix(),
- cleanText,
- nil,
- map[string]string{
- metaKeyGithubId: parseId(item.Id),
- metaKeyGithubUrl: item.Url.String(),
- },
- )
- if err != nil {
- return err
- }
- gi.out <- core.NewImportComment(op.Id())
+ cleanText, err := text.Cleanup(string(*edit.Diff))
+ if err != nil {
+ return err
+ }
- // set target for the next edit now that the comment is created
- targetOpID = op.Id()
- continue
- }
+ // comment edition
+ op, err := b.EditCommentRaw(
+ editor,
+ edit.CreatedAt.Unix(),
+ target,
+ cleanText,
+ map[string]string{
+ metaKeyGithubId: parseId(edit.Id),
+ },
+ )
- err = gi.ensureCommentEdit(repo, b, targetOpID, edit)
- if err != nil {
- return err
- }
- }
+ if err != nil {
+ return err
}
+
+ gi.out <- core.NewImportCommentEdition(op.Id())
return nil
}
-func (gi *githubImporter) ensureCommentEdit(repo *cache.RepoCache, b *cache.BugCache, target entity.Id, edit userContentEdit) error {
- _, err := b.ResolveOperationWithMetadata(metaKeyGithubId, parseId(edit.Id))
+func (gi *githubImporter) ensureComment(ctx context.Context, repo *cache.RepoCache, b *cache.BugCache, comment *issueComment, firstEdit *userContentEdit) error {
+ author, err := gi.ensurePerson(ctx, repo, comment.Author)
+ if err != nil {
+ return err
+ }
+
+ _, err = b.ResolveOperationWithMetadata(metaKeyGithubId, parseId(comment.Id))
if err == nil {
return nil
}
@@ -465,50 +468,44 @@ func (gi *githubImporter) ensureCommentEdit(repo *cache.RepoCache, b *cache.BugC
return err
}
- editor, err := gi.ensurePerson(repo, edit.Editor)
+ var textInput string
+ if firstEdit != nil {
+ // use the first comment edit: it represents the comment creation itself
+ textInput = string(*firstEdit.Diff)
+ } else {
+ // if there are not comment edits, then the comment struct holds the comment creation
+ textInput = string(comment.Body)
+ }
+ cleanText, err := text.Cleanup(textInput)
if err != nil {
return err
}
- switch {
- case edit.DeletedAt != nil:
- // comment deletion, not supported yet
- return nil
-
- case edit.DeletedAt == nil:
-
- cleanText, err := text.Cleanup(string(*edit.Diff))
- if err != nil {
- return err
- }
-
- // comment edition
- op, err := b.EditCommentRaw(
- editor,
- edit.CreatedAt.Unix(),
- target,
- cleanText,
- map[string]string{
- metaKeyGithubId: parseId(edit.Id),
- },
- )
-
- if err != nil {
- return err
- }
-
- gi.out <- core.NewImportCommentEdition(op.Id())
- return nil
+ // add comment operation
+ op, err := b.AddCommentRaw(
+ author,
+ comment.CreatedAt.Unix(),
+ cleanText,
+ nil,
+ map[string]string{
+ metaKeyGithubId: parseId(comment.Id),
+ metaKeyGithubUrl: comment.Url.String(),
+ },
+ )
+ if err != nil {
+ return err
}
+
+ gi.out <- core.NewImportComment(op.Id())
return nil
}
// ensurePerson create a bug.Person from the Github data
-func (gi *githubImporter) ensurePerson(repo *cache.RepoCache, actor *actor) (*cache.IdentityCache, error) {
+func (gi *githubImporter) ensurePerson(ctx context.Context, repo *cache.RepoCache, actor *actor) (*cache.IdentityCache, error) {
// When a user has been deleted, Github return a null actor, while displaying a profile named "ghost"
// in it's UI. So we need a special case to get it.
if actor == nil {
- return gi.getGhost(repo)
+ return gi.getGhost(ctx, repo)
}
// Look first in the cache
@@ -521,7 +518,6 @@ func (gi *githubImporter) ensurePerson(repo *cache.RepoCache, actor *actor) (*ca
}
// importing a new identity
-
var name string
var email string
@@ -565,48 +561,37 @@ func (gi *githubImporter) ensurePerson(repo *cache.RepoCache, actor *actor) (*ca
return i, nil
}
-func (gi *githubImporter) getGhost(repo *cache.RepoCache) (*cache.IdentityCache, error) {
+func (gi *githubImporter) getGhost(ctx context.Context, repo *cache.RepoCache) (*cache.IdentityCache, error) {
+ loginName := "ghost"
// Look first in the cache
- i, err := repo.ResolveIdentityImmutableMetadata(metaKeyGithubLogin, "ghost")
+ i, err := repo.ResolveIdentityImmutableMetadata(metaKeyGithubLogin, loginName)
if err == nil {
return i, nil
}
if entity.IsErrMultipleMatch(err) {
return nil, err
}
-
- var q ghostQuery
-
- variables := map[string]interface{}{
- "login": githubv4.String("ghost"),
- }
-
- ctx, cancel := context.WithTimeout(gi.iterator.ctx, defaultTimeout)
- defer cancel()
-
- err = gi.client.Query(ctx, &q, variables)
+ user, err := gi.mediator.User(ctx, loginName)
if err != nil {
return nil, err
}
-
- var name string
- if q.User.Name != nil {
- name = string(*q.User.Name)
+ userName := ""
+ if user.Name != nil {
+ userName = string(*user.Name)
}
-
return repo.NewIdentityRaw(
- name,
+ userName,
"",
- string(q.User.Login),
- string(q.User.AvatarUrl),
+ string(user.Login),
+ string(user.AvatarUrl),
nil,
map[string]string{
- metaKeyGithubLogin: string(q.User.Login),
+ metaKeyGithubLogin: string(user.Login),
},
)
}
-// parseId convert the unusable githubv4.ID (an interface{}) into a string
+// parseId converts the unusable githubv4.ID (an interface{}) into a string
func parseId(id githubv4.ID) string {
return fmt.Sprintf("%v", id)
}
diff --git a/bridge/github/import_mediator.go b/bridge/github/import_mediator.go
new file mode 100644
index 00000000..873d5f62
--- /dev/null
+++ b/bridge/github/import_mediator.go
@@ -0,0 +1,436 @@
+package github
+
+import (
+ "context"
+ "fmt"
+ "strings"
+ "time"
+
+ "github.com/shurcooL/githubv4"
+)
+
+const (
+ // These values influence how fast the github graphql rate limit is exhausted.
+ NumIssues = 40
+ NumIssueEdits = 100
+ NumTimelineItems = 100
+ NumCommentEdits = 100
+
+ ChanCapacity = 128
+)
+
+// importMediator provides a convenient interface to retrieve issues from the Github GraphQL API.
+type importMediator struct {
+ // Github graphql client
+ gc *githubv4.Client
+
+ // name of the repository owner on Github
+ owner string
+
+ // name of the Github repository
+ project string
+
+ // since specifies which issues to import. Issues that have been updated at or after the
+ // given date should be imported.
+ since time.Time
+
+ // importEvents holds events representing issues, comments, edits, ...
+ // In this channel issues are immediately followed by their issue edits and comments are
+ // immediately followed by their comment edits.
+ importEvents chan ImportEvent
+
+ // Sticky error
+ err error
+}
+
+type ImportEvent interface {
+ isImportEvent()
+}
+
+type RateLimitingEvent struct {
+ msg string
+}
+
+func (RateLimitingEvent) isImportEvent() {}
+
+type IssueEvent struct {
+ issue
+}
+
+func (IssueEvent) isImportEvent() {}
+
+type IssueEditEvent struct {
+ issueId githubv4.ID
+ userContentEdit
+}
+
+func (IssueEditEvent) isImportEvent() {}
+
+type TimelineEvent struct {
+ issueId githubv4.ID
+ timelineItem
+}
+
+func (TimelineEvent) isImportEvent() {}
+
+type CommentEditEvent struct {
+ commentId githubv4.ID
+ userContentEdit
+}
+
+func (CommentEditEvent) isImportEvent() {}
+
+func (mm *importMediator) NextImportEvent() ImportEvent {
+ return <-mm.importEvents
+}
+
+func NewImportMediator(ctx context.Context, client *githubv4.Client, owner, project string, since time.Time) *importMediator {
+ mm := importMediator{
+ gc: client,
+ owner: owner,
+ project: project,
+ since: since,
+ importEvents: make(chan ImportEvent, ChanCapacity),
+ err: nil,
+ }
+ go func() {
+ mm.fillImportEvents(ctx)
+ close(mm.importEvents)
+ }()
+ return &mm
+}
+
+type varmap map[string]interface{}
+
+func newIssueVars(owner, project string, since time.Time) varmap {
+ return varmap{
+ "owner": githubv4.String(owner),
+ "name": githubv4.String(project),
+ "issueSince": githubv4.DateTime{Time: since},
+ "issueFirst": githubv4.Int(NumIssues),
+ "issueEditLast": githubv4.Int(NumIssueEdits),
+ "issueEditBefore": (*githubv4.String)(nil),
+ "timelineFirst": githubv4.Int(NumTimelineItems),
+ "timelineAfter": (*githubv4.String)(nil),
+ "commentEditLast": githubv4.Int(NumCommentEdits),
+ "commentEditBefore": (*githubv4.String)(nil),
+ }
+}
+
+func newIssueEditVars() varmap {
+ return varmap{
+ "issueEditLast": githubv4.Int(NumIssueEdits),
+ }
+}
+
+func newTimelineVars() varmap {
+ return varmap{
+ "timelineFirst": githubv4.Int(NumTimelineItems),
+ "commentEditLast": githubv4.Int(NumCommentEdits),
+ "commentEditBefore": (*githubv4.String)(nil),
+ }
+}
+
+func newCommentEditVars() varmap {
+ return varmap{
+ "commentEditLast": githubv4.Int(NumCommentEdits),
+ }
+}
+
+func (mm *importMediator) Error() error {
+ return mm.err
+}
+
+func (mm *importMediator) User(ctx context.Context, loginName string) (*user, error) {
+ query := userQuery{}
+ vars := varmap{"login": githubv4.String(loginName)}
+ if err := mm.mQuery(ctx, &query, vars); err != nil {
+ return nil, err
+ }
+ return &query.User, nil
+}
+
+func (mm *importMediator) fillImportEvents(ctx context.Context) {
+ initialCursor := githubv4.String("")
+ issues, hasIssues := mm.queryIssue(ctx, initialCursor)
+ for hasIssues {
+ for _, node := range issues.Nodes {
+ select {
+ case <-ctx.Done():
+ return
+ case mm.importEvents <- IssueEvent{node.issue}:
+ }
+
+ // issue edit events follow the issue event
+ mm.fillIssueEditEvents(ctx, &node)
+ // last come the timeline events
+ mm.fillTimelineEvents(ctx, &node)
+ }
+ if !issues.PageInfo.HasNextPage {
+ break
+ }
+ issues, hasIssues = mm.queryIssue(ctx, issues.PageInfo.EndCursor)
+ }
+}
+
+func (mm *importMediator) fillIssueEditEvents(ctx context.Context, issueNode *issueNode) {
+ edits := &issueNode.UserContentEdits
+ hasEdits := true
+ for hasEdits {
+ for edit := range reverse(edits.Nodes) {
+ if edit.Diff == nil || string(*edit.Diff) == "" {
+ // issueEdit.Diff == nil happen if the event is older than early
+ // 2018, Github doesn't have the data before that. Best we can do is
+ // to ignore the event.
+ continue
+ }
+ select {
+ case <-ctx.Done():
+ return
+ case mm.importEvents <- IssueEditEvent{issueId: issueNode.issue.Id, userContentEdit: edit}:
+ }
+ }
+ if !edits.PageInfo.HasPreviousPage {
+ break
+ }
+ edits, hasEdits = mm.queryIssueEdits(ctx, issueNode.issue.Id, edits.PageInfo.EndCursor)
+ }
+}
+
+func (mm *importMediator) queryIssueEdits(ctx context.Context, nid githubv4.ID, cursor githubv4.String) (*userContentEditConnection, bool) {
+ vars := newIssueEditVars()
+ vars["gqlNodeId"] = nid
+ if cursor == "" {
+ vars["issueEditBefore"] = (*githubv4.String)(nil)
+ } else {
+ vars["issueEditBefore"] = cursor
+ }
+ query := issueEditQuery{}
+ if err := mm.mQuery(ctx, &query, vars); err != nil {
+ mm.err = err
+ return nil, false
+ }
+ connection := &query.Node.Issue.UserContentEdits
+ if len(connection.Nodes) <= 0 {
+ return nil, false
+ }
+ return connection, true
+}
+
+func (mm *importMediator) fillTimelineEvents(ctx context.Context, issueNode *issueNode) {
+ items := &issueNode.TimelineItems
+ hasItems := true
+ for hasItems {
+ for _, item := range items.Nodes {
+ select {
+ case <-ctx.Done():
+ return
+ case mm.importEvents <- TimelineEvent{issueId: issueNode.issue.Id, timelineItem: item}:
+ }
+ if item.Typename == "IssueComment" {
+ // Issue comments are different than other timeline items in that
+ // they may have associated user content edits.
+ // Right after the comment we send the comment edits.
+ mm.fillCommentEdits(ctx, &item)
+ }
+ }
+ if !items.PageInfo.HasNextPage {
+ break
+ }
+ items, hasItems = mm.queryTimeline(ctx, issueNode.issue.Id, items.PageInfo.EndCursor)
+ }
+}
+
+func (mm *importMediator) queryTimeline(ctx context.Context, nid githubv4.ID, cursor githubv4.String) (*timelineItemsConnection, bool) {
+ vars := newTimelineVars()
+ vars["gqlNodeId"] = nid
+ if cursor == "" {
+ vars["timelineAfter"] = (*githubv4.String)(nil)
+ } else {
+ vars["timelineAfter"] = cursor
+ }
+ query := timelineQuery{}
+ if err := mm.mQuery(ctx, &query, vars); err != nil {
+ mm.err = err
+ return nil, false
+ }
+ connection := &query.Node.Issue.TimelineItems
+ if len(connection.Nodes) <= 0 {
+ return nil, false
+ }
+ return connection, true
+}
+
+func (mm *importMediator) fillCommentEdits(ctx context.Context, item *timelineItem) {
+ // Here we are only concerned with timeline items of type issueComment.
+ if item.Typename != "IssueComment" {
+ return
+ }
+ // First: setup message handling while submitting GraphQL queries.
+ comment := &item.IssueComment
+ edits := &comment.UserContentEdits
+ hasEdits := true
+ for hasEdits {
+ for edit := range reverse(edits.Nodes) {
+ if edit.Diff == nil || string(*edit.Diff) == "" {
+ // issueEdit.Diff == nil happen if the event is older than early
+ // 2018, Github doesn't have the data before that. Best we can do is
+ // to ignore the event.
+ continue
+ }
+ select {
+ case <-ctx.Done():
+ return
+ case mm.importEvents <- CommentEditEvent{commentId: comment.Id, userContentEdit: edit}:
+ }
+ }
+ if !edits.PageInfo.HasPreviousPage {
+ break
+ }
+ edits, hasEdits = mm.queryCommentEdits(ctx, comment.Id, edits.PageInfo.EndCursor)
+ }
+}
+
+func (mm *importMediator) queryCommentEdits(ctx context.Context, nid githubv4.ID, cursor githubv4.String) (*userContentEditConnection, bool) {
+ vars := newCommentEditVars()
+ vars["gqlNodeId"] = nid
+ if cursor == "" {
+ vars["commentEditBefore"] = (*githubv4.String)(nil)
+ } else {
+ vars["commentEditBefore"] = cursor
+ }
+ query := commentEditQuery{}
+ if err := mm.mQuery(ctx, &query, vars); err != nil {
+ mm.err = err
+ return nil, false
+ }
+ connection := &query.Node.IssueComment.UserContentEdits
+ if len(connection.Nodes) <= 0 {
+ return nil, false
+ }
+ return connection, true
+}
+
+func (mm *importMediator) queryIssue(ctx context.Context, cursor githubv4.String) (*issueConnection, bool) {
+ vars := newIssueVars(mm.owner, mm.project, mm.since)
+ if cursor == "" {
+ vars["issueAfter"] = (*githubv4.String)(nil)
+ } else {
+ vars["issueAfter"] = cursor
+ }
+ query := issueQuery{}
+ if err := mm.mQuery(ctx, &query, vars); err != nil {
+ mm.err = err
+ return nil, false
+ }
+ connection := &query.Repository.Issues
+ if len(connection.Nodes) <= 0 {
+ return nil, false
+ }
+ return connection, true
+}
+
+func reverse(eds []userContentEdit) chan userContentEdit {
+ ret := make(chan userContentEdit)
+ go func() {
+ for i := range eds {
+ ret <- eds[len(eds)-1-i]
+ }
+ close(ret)
+ }()
+ return ret
+}
+
+// mQuery executes a single GraphQL query. The variable query is used to derive the GraphQL query
+// and it is used to populate the response into it. It should be a pointer to a struct that
+// corresponds to the Github graphql schema and it has to implement the rateLimiter interface. If
+// there is a Github rate limiting error, then the function sleeps and retries after the rate limit
+// is expired. If there is another error, then the method will retry before giving up.
+func (mm *importMediator) mQuery(ctx context.Context, query rateLimiter, vars map[string]interface{}) error {
+ if err := mm.queryOnce(ctx, query, vars); err == nil {
+ // success: done
+ return nil
+ }
+ // failure: we will retry
+ // To retry is important for importing projects with a big number of issues, because
+ // there may be temporary network errors or momentary internal errors of the github servers.
+ retries := 3
+ var err error
+ for i := 0; i < retries; i++ {
+ // wait a few seconds before retry
+ sleepTime := time.Duration(8*(i+1)) * time.Second
+ timer := time.NewTimer(sleepTime)
+ select {
+ case <-ctx.Done():
+ stop(timer)
+ return ctx.Err()
+ case <-timer.C:
+ }
+ err = mm.queryOnce(ctx, query, vars)
+ if err == nil {
+ // success: done
+ return nil
+ }
+ }
+ return err
+}
+
+func (mm *importMediator) queryOnce(ctx context.Context, query rateLimiter, vars map[string]interface{}) error {
+ // first: just send the query to the graphql api
+ vars["dryRun"] = githubv4.Boolean(false)
+ qctx, cancel := context.WithTimeout(ctx, defaultTimeout)
+ defer cancel()
+ err := mm.gc.Query(qctx, query, vars)
+ if err == nil {
+ // no error: done
+ return nil
+ }
+ // matching the error string
+ if !strings.Contains(err.Error(), "API rate limit exceeded") {
+ // an error, but not the API rate limit error: done
+ return err
+ }
+ // a rate limit error
+ // ask the graphql api for rate limiting information
+ vars["dryRun"] = githubv4.Boolean(true)
+ qctx, cancel = context.WithTimeout(ctx, defaultTimeout)
+ defer cancel()
+ if err := mm.gc.Query(qctx, query, vars); err != nil {
+ return err
+ }
+ rateLimit := query.rateLimit()
+ if rateLimit.Cost > rateLimit.Remaining {
+ // sleep
+ resetTime := rateLimit.ResetAt.Time
+ // Add a few seconds (8) for good measure
+ resetTime = resetTime.Add(8 * time.Second)
+ msg := fmt.Sprintf("Github GraphQL API: import will sleep until %s", resetTime.String())
+ select {
+ case <-ctx.Done():
+ return ctx.Err()
+ case mm.importEvents <- RateLimitingEvent{msg}:
+ }
+ timer := time.NewTimer(time.Until(resetTime))
+ select {
+ case <-ctx.Done():
+ stop(timer)
+ return ctx.Err()
+ case <-timer.C:
+ }
+ }
+ // run the original query again
+ vars["dryRun"] = githubv4.Boolean(false)
+ qctx, cancel = context.WithTimeout(ctx, defaultTimeout)
+ defer cancel()
+ err = mm.gc.Query(qctx, query, vars)
+ return err // might be nil
+}
+
+func stop(t *time.Timer) {
+ if !t.Stop() {
+ select {
+ case <-t.C:
+ default:
+ }
+ }
+}
diff --git a/bridge/github/import_query.go b/bridge/github/import_query.go
index 228d204a..461daf94 100644
--- a/bridge/github/import_query.go
+++ b/bridge/github/import_query.go
@@ -2,37 +2,136 @@ package github
import "github.com/shurcooL/githubv4"
-type pageInfo struct {
- EndCursor githubv4.String
- HasNextPage bool
- StartCursor githubv4.String
- HasPreviousPage bool
+type rateLimit struct {
+ Cost githubv4.Int
+ Limit githubv4.Int
+ NodeCount githubv4.Int
+ Remaining githubv4.Int
+ ResetAt githubv4.DateTime
+ Used githubv4.Int
}
-type actor struct {
- Typename githubv4.String `graphql:"__typename"`
+type rateLimiter interface {
+ rateLimit() rateLimit
+}
+
+type userQuery struct {
+ RateLimit rateLimit `graphql:"rateLimit(dryRun: $dryRun)"`
+ User user `graphql:"user(login: $login)"`
+}
+
+func (q *userQuery) rateLimit() rateLimit {
+ return q.RateLimit
+}
+
+type labelsQuery struct {
+ //RateLimit rateLimit `graphql:"rateLimit(dryRun: $dryRun)"`
+ Repository struct {
+ Labels struct {
+ Nodes []struct {
+ ID string `graphql:"id"`
+ Name string `graphql:"name"`
+ Color string `graphql:"color"`
+ Description string `graphql:"description"`
+ }
+ PageInfo pageInfo
+ } `graphql:"labels(first: $first, after: $after)"`
+ } `graphql:"repository(owner: $owner, name: $name)"`
+}
+
+type loginQuery struct {
+ //RateLimit rateLimit `graphql:"rateLimit(dryRun: $dryRun)"`
+ Viewer struct {
+ Login string `graphql:"login"`
+ } `graphql:"viewer"`
+}
+
+type issueQuery struct {
+ RateLimit rateLimit `graphql:"rateLimit(dryRun: $dryRun)"`
+ Repository struct {
+ Issues issueConnection `graphql:"issues(first: $issueFirst, after: $issueAfter, orderBy: {field: CREATED_AT, direction: ASC}, filterBy: {since: $issueSince})"`
+ } `graphql:"repository(owner: $owner, name: $name)"`
+}
+
+func (q *issueQuery) rateLimit() rateLimit {
+ return q.RateLimit
+}
+
+type issueEditQuery struct {
+ RateLimit rateLimit `graphql:"rateLimit(dryRun: $dryRun)"`
+ Node struct {
+ Typename githubv4.String `graphql:"__typename"`
+ Issue struct {
+ UserContentEdits userContentEditConnection `graphql:"userContentEdits(last: $issueEditLast, before: $issueEditBefore)"`
+ } `graphql:"... on Issue"`
+ } `graphql:"node(id: $gqlNodeId)"`
+}
+
+func (q *issueEditQuery) rateLimit() rateLimit {
+ return q.RateLimit
+}
+
+type timelineQuery struct {
+ RateLimit rateLimit `graphql:"rateLimit(dryRun: $dryRun)"`
+ Node struct {
+ Typename githubv4.String `graphql:"__typename"`
+ Issue struct {
+ TimelineItems timelineItemsConnection `graphql:"timelineItems(first: $timelineFirst, after: $timelineAfter)"`
+ } `graphql:"... on Issue"`
+ } `graphql:"node(id: $gqlNodeId)"`
+}
+
+func (q *timelineQuery) rateLimit() rateLimit {
+ return q.RateLimit
+}
+
+type commentEditQuery struct {
+ RateLimit rateLimit `graphql:"rateLimit(dryRun: $dryRun)"`
+ Node struct {
+ Typename githubv4.String `graphql:"__typename"`
+ IssueComment struct {
+ UserContentEdits userContentEditConnection `graphql:"userContentEdits(last: $commentEditLast, before: $commentEditBefore)"`
+ } `graphql:"... on IssueComment"`
+ } `graphql:"node(id: $gqlNodeId)"`
+}
+
+func (q *commentEditQuery) rateLimit() rateLimit {
+ return q.RateLimit
+}
+
+type user struct {
Login githubv4.String
AvatarUrl githubv4.String
- User struct {
- Name *githubv4.String
- Email githubv4.String
- } `graphql:"... on User"`
- Organization struct {
- Name *githubv4.String
- Email *githubv4.String
- } `graphql:"... on Organization"`
+ Name *githubv4.String
}
-type actorEvent struct {
- Id githubv4.ID
- CreatedAt githubv4.DateTime
- Actor *actor
+type issueConnection struct {
+ Nodes []issueNode
+ PageInfo pageInfo
}
-type authorEvent struct {
- Id githubv4.ID
- CreatedAt githubv4.DateTime
- Author *actor
+type issueNode struct {
+ issue
+ UserContentEdits userContentEditConnection `graphql:"userContentEdits(last: $issueEditLast, before: $issueEditBefore)"`
+ TimelineItems timelineItemsConnection `graphql:"timelineItems(first: $timelineFirst, after: $timelineAfter)"`
+}
+
+type issue struct {
+ authorEvent
+ Title githubv4.String
+ Number githubv4.Int
+ Body githubv4.String
+ Url githubv4.URI
+}
+
+type timelineItemsConnection struct {
+ Nodes []timelineItem
+ PageInfo pageInfo
+}
+
+type userContentEditConnection struct {
+ Nodes []userContentEdit
+ PageInfo pageInfo
}
type userContentEdit struct {
@@ -46,12 +145,6 @@ type userContentEdit struct {
Diff *githubv4.String
}
-type issueComment struct {
- authorEvent // NOTE: contains Id
- Body githubv4.String
- Url githubv4.URI
-}
-
type timelineItem struct {
Typename githubv4.String `graphql:"__typename"`
@@ -91,84 +184,43 @@ type timelineItem struct {
} `graphql:"... on RenamedTitleEvent"`
}
-type ghostQuery struct {
- User struct {
- Login githubv4.String
- AvatarUrl githubv4.String
- Name *githubv4.String
- } `graphql:"user(login: $login)"`
-}
-
-type labelsQuery struct {
- Repository struct {
- Labels struct {
- Nodes []struct {
- ID string `graphql:"id"`
- Name string `graphql:"name"`
- Color string `graphql:"color"`
- Description string `graphql:"description"`
- }
- PageInfo pageInfo
- } `graphql:"labels(first: $first, after: $after)"`
- } `graphql:"repository(owner: $owner, name: $name)"`
-}
-
-type loginQuery struct {
- Viewer struct {
- Login string `graphql:"login"`
- } `graphql:"viewer"`
-}
+type issueComment struct {
+ authorEvent // NOTE: contains Id
+ Body githubv4.String
+ Url githubv4.URI
-type issueQuery struct {
- Repository struct {
- Issues struct {
- Nodes []issue
- PageInfo pageInfo
- } `graphql:"issues(first: $issueFirst, after: $issueAfter, orderBy: {field: CREATED_AT, direction: ASC}, filterBy: {since: $issueSince})"`
- } `graphql:"repository(owner: $owner, name: $name)"`
+ UserContentEdits userContentEditConnection `graphql:"userContentEdits(last: $commentEditLast, before: $commentEditBefore)"`
}
-type issue struct {
- authorEvent
- Title string
- Number githubv4.Int
- Body githubv4.String
- Url githubv4.URI
+type actor struct {
+ Typename githubv4.String `graphql:"__typename"`
+ Login githubv4.String
+ AvatarUrl githubv4.String
+ User struct {
+ Name *githubv4.String
+ Email githubv4.String
+ } `graphql:"... on User"`
+ Organization struct {
+ Name *githubv4.String
+ Email *githubv4.String
+ } `graphql:"... on Organization"`
}
-type issueEditQuery struct {
- Node struct {
- Typename githubv4.String `graphql:"__typename"`
- Issue struct {
- UserContentEdits struct {
- Nodes []userContentEdit
- TotalCount githubv4.Int
- PageInfo pageInfo
- } `graphql:"userContentEdits(last: $issueEditLast, before: $issueEditBefore)"`
- } `graphql:"... on Issue"`
- } `graphql:"node(id: $gqlNodeId)"`
+type actorEvent struct {
+ Id githubv4.ID
+ CreatedAt githubv4.DateTime
+ Actor *actor
}
-type timelineQuery struct {
- Node struct {
- Typename githubv4.String `graphql:"__typename"`
- Issue struct {
- TimelineItems struct {
- Nodes []timelineItem
- PageInfo pageInfo
- } `graphql:"timelineItems(first: $timelineFirst, after: $timelineAfter)"`
- } `graphql:"... on Issue"`
- } `graphql:"node(id: $gqlNodeId)"`
+type authorEvent struct {
+ Id githubv4.ID
+ CreatedAt githubv4.DateTime
+ Author *actor
}
-type commentEditQuery struct {
- Node struct {
- Typename githubv4.String `graphql:"__typename"`
- IssueComment struct {
- UserContentEdits struct {
- Nodes []userContentEdit
- PageInfo pageInfo
- } `graphql:"userContentEdits(last: $commentEditLast, before: $commentEditBefore)"`
- } `graphql:"... on IssueComment"`
- } `graphql:"node(id: $gqlNodeId)"`
+type pageInfo struct {
+ EndCursor githubv4.String
+ HasNextPage bool
+ StartCursor githubv4.String
+ HasPreviousPage bool
}
diff --git a/bridge/github/iterator.go b/bridge/github/iterator.go
deleted file mode 100644
index d21faae8..00000000
--- a/bridge/github/iterator.go
+++ /dev/null
@@ -1,423 +0,0 @@
-package github
-
-import (
- "context"
- "time"
-
- "github.com/pkg/errors"
- "github.com/shurcooL/githubv4"
-)
-
-type iterator struct {
- // Github graphql client
- gc *githubv4.Client
-
- // The iterator will only query issues updated or created after the date given in
- // the variable since.
- since time.Time
-
- // Shared context, which is used for all graphql queries.
- ctx context.Context
-
- // Sticky error
- err error
-
- // Issue iterator
- issueIter issueIter
-}
-
-type issueIter struct {
- iterVars
- query issueQuery
- issueEditIter []issueEditIter
- timelineIter []timelineIter
-}
-
-type issueEditIter struct {
- iterVars
- query issueEditQuery
-}
-
-type timelineIter struct {
- iterVars
- query timelineQuery
- commentEditIter []commentEditIter
-}
-
-type commentEditIter struct {
- iterVars
- query commentEditQuery
-}
-
-type iterVars struct {
- // Iterator index
- index int
-
- // capacity is the number of elements (issues, issue edits, timeline items, or
- // comment edits) to query at a time. More capacity = more used memory =
- // less queries to make.
- capacity int
-
- // Variable assignments for graphql query
- variables varmap
-}
-
-type varmap map[string]interface{}
-
-func newIterVars(capacity int) iterVars {
- return iterVars{
- index: -1,
- capacity: capacity,
- variables: varmap{},
- }
-}
-
-// NewIterator creates and initialize a new iterator.
-func NewIterator(ctx context.Context, client *githubv4.Client, capacity int, owner, project string, since time.Time) *iterator {
- i := &iterator{
- gc: client,
- since: since,
- ctx: ctx,
- issueIter: issueIter{
- iterVars: newIterVars(capacity),
- timelineIter: make([]timelineIter, capacity),
- issueEditIter: make([]issueEditIter, capacity),
- },
- }
- i.issueIter.variables.setOwnerProject(owner, project)
- for idx := range i.issueIter.issueEditIter {
- ie := &i.issueIter.issueEditIter[idx]
- ie.iterVars = newIterVars(capacity)
- }
- for i1 := range i.issueIter.timelineIter {
- tli := &i.issueIter.timelineIter[i1]
- tli.iterVars = newIterVars(capacity)
- tli.commentEditIter = make([]commentEditIter, capacity)
- for i2 := range tli.commentEditIter {
- cei := &tli.commentEditIter[i2]
- cei.iterVars = newIterVars(capacity)
- }
- }
- i.resetIssueVars()
- return i
-}
-
-func (v *varmap) setOwnerProject(owner, project string) {
- (*v)["owner"] = githubv4.String(owner)
- (*v)["name"] = githubv4.String(project)
-}
-
-func (i *iterator) resetIssueVars() {
- vars := &i.issueIter.variables
- (*vars)["issueFirst"] = githubv4.Int(i.issueIter.capacity)
- (*vars)["issueAfter"] = (*githubv4.String)(nil)
- (*vars)["issueSince"] = githubv4.DateTime{Time: i.since}
- i.issueIter.query.Repository.Issues.PageInfo.HasNextPage = true
- i.issueIter.query.Repository.Issues.PageInfo.EndCursor = ""
-}
-
-func (i *iterator) resetIssueEditVars() {
- for idx := range i.issueIter.issueEditIter {
- ie := &i.issueIter.issueEditIter[idx]
- ie.variables["issueEditLast"] = githubv4.Int(ie.capacity)
- ie.variables["issueEditBefore"] = (*githubv4.String)(nil)
- ie.query.Node.Issue.UserContentEdits.PageInfo.HasNextPage = true
- ie.query.Node.Issue.UserContentEdits.PageInfo.EndCursor = ""
- }
-}
-
-func (i *iterator) resetTimelineVars() {
- for idx := range i.issueIter.timelineIter {
- ip := &i.issueIter.timelineIter[idx]
- ip.variables["timelineFirst"] = githubv4.Int(ip.capacity)
- ip.variables["timelineAfter"] = (*githubv4.String)(nil)
- ip.query.Node.Issue.TimelineItems.PageInfo.HasNextPage = true
- ip.query.Node.Issue.TimelineItems.PageInfo.EndCursor = ""
- }
-}
-
-func (i *iterator) resetCommentEditVars() {
- for i1 := range i.issueIter.timelineIter {
- for i2 := range i.issueIter.timelineIter[i1].commentEditIter {
- ce := &i.issueIter.timelineIter[i1].commentEditIter[i2]
- ce.variables["commentEditLast"] = githubv4.Int(ce.capacity)
- ce.variables["commentEditBefore"] = (*githubv4.String)(nil)
- ce.query.Node.IssueComment.UserContentEdits.PageInfo.HasNextPage = true
- ce.query.Node.IssueComment.UserContentEdits.PageInfo.EndCursor = ""
- }
- }
-}
-
-// Error return last encountered error
-func (i *iterator) Error() error {
- if i.err != nil {
- return i.err
- }
- return i.ctx.Err() // might return nil
-}
-
-func (i *iterator) HasError() bool {
- return i.err != nil || i.ctx.Err() != nil
-}
-
-func (i *iterator) currIssueItem() *issue {
- return &i.issueIter.query.Repository.Issues.Nodes[i.issueIter.index]
-}
-
-func (i *iterator) currIssueEditIter() *issueEditIter {
- return &i.issueIter.issueEditIter[i.issueIter.index]
-}
-
-func (i *iterator) currTimelineIter() *timelineIter {
- return &i.issueIter.timelineIter[i.issueIter.index]
-}
-
-func (i *iterator) currCommentEditIter() *commentEditIter {
- timelineIter := i.currTimelineIter()
- return &timelineIter.commentEditIter[timelineIter.index]
-}
-
-func (i *iterator) currIssueGqlNodeId() githubv4.ID {
- return i.currIssueItem().Id
-}
-
-// NextIssue returns true if there exists a next issue and advances the iterator by one.
-// It is used to iterate over all issues. Queries to github are made when necessary.
-func (i *iterator) NextIssue() bool {
- if i.HasError() {
- return false
- }
- index := &i.issueIter.index
- issues := &i.issueIter.query.Repository.Issues
- issueItems := &issues.Nodes
- if 0 <= *index && *index < len(*issueItems)-1 {
- *index += 1
- return true
- }
-
- if !issues.PageInfo.HasNextPage {
- return false
- }
- nextIssue := i.queryIssue()
- return nextIssue
-}
-
-// IssueValue returns the actual issue value.
-func (i *iterator) IssueValue() issue {
- return *i.currIssueItem()
-}
-
-func (i *iterator) queryIssue() bool {
- ctx, cancel := context.WithTimeout(i.ctx, defaultTimeout)
- defer cancel()
- if endCursor := i.issueIter.query.Repository.Issues.PageInfo.EndCursor; endCursor != "" {
- i.issueIter.variables["issueAfter"] = endCursor
- }
- if err := i.gc.Query(ctx, &i.issueIter.query, i.issueIter.variables); err != nil {
- i.err = err
- return false
- }
- i.resetIssueEditVars()
- i.resetTimelineVars()
- issueItems := &i.issueIter.query.Repository.Issues.Nodes
- if len(*issueItems) <= 0 {
- i.issueIter.index = -1
- return false
- }
- i.issueIter.index = 0
- return true
-}
-
-// NextIssueEdit returns true if there exists a next issue edit and advances the iterator
-// by one. It is used to iterate over all the issue edits. Queries to github are made when
-// necessary.
-func (i *iterator) NextIssueEdit() bool {
- if i.HasError() {
- return false
- }
- ieIter := i.currIssueEditIter()
- ieIdx := &ieIter.index
- ieItems := ieIter.query.Node.Issue.UserContentEdits
- if 0 <= *ieIdx && *ieIdx < len(ieItems.Nodes)-1 {
- *ieIdx += 1
- return i.nextValidIssueEdit()
- }
- if !ieItems.PageInfo.HasNextPage {
- return false
- }
- querySucc := i.queryIssueEdit()
- if !querySucc {
- return false
- }
- return i.nextValidIssueEdit()
-}
-
-func (i *iterator) nextValidIssueEdit() bool {
- // issueEdit.Diff == nil happen if the event is older than early 2018, Github doesn't have
- // the data before that. Best we can do is to ignore the event.
- if issueEdit := i.IssueEditValue(); issueEdit.Diff == nil || string(*issueEdit.Diff) == "" {
- return i.NextIssueEdit()
- }
- return true
-}
-
-// IssueEditValue returns the actual issue edit value.
-func (i *iterator) IssueEditValue() userContentEdit {
- iei := i.currIssueEditIter()
- return iei.query.Node.Issue.UserContentEdits.Nodes[iei.index]
-}
-
-func (i *iterator) queryIssueEdit() bool {
- ctx, cancel := context.WithTimeout(i.ctx, defaultTimeout)
- defer cancel()
- iei := i.currIssueEditIter()
- if endCursor := iei.query.Node.Issue.UserContentEdits.PageInfo.EndCursor; endCursor != "" {
- iei.variables["issueEditBefore"] = endCursor
- }
- iei.variables["gqlNodeId"] = i.currIssueGqlNodeId()
- if err := i.gc.Query(ctx, &iei.query, iei.variables); err != nil {
- i.err = err
- return false
- }
- issueEditItems := iei.query.Node.Issue.UserContentEdits.Nodes
- if len(issueEditItems) <= 0 {
- iei.index = -1
- return false
- }
- // The UserContentEditConnection in the Github API serves its elements in reverse chronological
- // order. For our purpose we have to reverse the edits.
- reverseEdits(issueEditItems)
- iei.index = 0
- return true
-}
-
-// NextTimelineItem returns true if there exists a next timeline item and advances the iterator
-// by one. It is used to iterate over all the timeline items. Queries to github are made when
-// necessary.
-func (i *iterator) NextTimelineItem() bool {
- if i.HasError() {
- return false
- }
- tlIter := &i.issueIter.timelineIter[i.issueIter.index]
- tlIdx := &tlIter.index
- tlItems := tlIter.query.Node.Issue.TimelineItems
- if 0 <= *tlIdx && *tlIdx < len(tlItems.Nodes)-1 {
- *tlIdx += 1
- return true
- }
- if !tlItems.PageInfo.HasNextPage {
- return false
- }
- nextTlItem := i.queryTimeline()
- return nextTlItem
-}
-
-// TimelineItemValue returns the actual timeline item value.
-func (i *iterator) TimelineItemValue() timelineItem {
- tli := i.currTimelineIter()
- return tli.query.Node.Issue.TimelineItems.Nodes[tli.index]
-}
-
-func (i *iterator) queryTimeline() bool {
- ctx, cancel := context.WithTimeout(i.ctx, defaultTimeout)
- defer cancel()
- tli := i.currTimelineIter()
- if endCursor := tli.query.Node.Issue.TimelineItems.PageInfo.EndCursor; endCursor != "" {
- tli.variables["timelineAfter"] = endCursor
- }
- tli.variables["gqlNodeId"] = i.currIssueGqlNodeId()
- if err := i.gc.Query(ctx, &tli.query, tli.variables); err != nil {
- i.err = err
- return false
- }
- i.resetCommentEditVars()
- timelineItems := &tli.query.Node.Issue.TimelineItems
- if len(timelineItems.Nodes) <= 0 {
- tli.index = -1
- return false
- }
- tli.index = 0
- return true
-}
-
-// NextCommentEdit returns true if there exists a next comment edit and advances the iterator
-// by one. It is used to iterate over all issue edits. Queries to github are made when
-// necessary.
-func (i *iterator) NextCommentEdit() bool {
- if i.HasError() {
- return false
- }
-
- tmlnVal := i.TimelineItemValue()
- if tmlnVal.Typename != "IssueComment" {
- // The timeline iterator does not point to a comment.
- i.err = errors.New("Call to NextCommentEdit() while timeline item is not a comment")
- return false
- }
-
- cei := i.currCommentEditIter()
- ceIdx := &cei.index
- ceItems := &cei.query.Node.IssueComment.UserContentEdits
- if 0 <= *ceIdx && *ceIdx < len(ceItems.Nodes)-1 {
- *ceIdx += 1
- return i.nextValidCommentEdit()
- }
- if !ceItems.PageInfo.HasNextPage {
- return false
- }
- querySucc := i.queryCommentEdit()
- if !querySucc {
- return false
- }
- return i.nextValidCommentEdit()
-}
-
-func (i *iterator) nextValidCommentEdit() bool {
- // if comment edit diff is a nil pointer or points to an empty string look for next value
- if commentEdit := i.CommentEditValue(); commentEdit.Diff == nil || string(*commentEdit.Diff) == "" {
- return i.NextCommentEdit()
- }
- return true
-}
-
-// CommentEditValue returns the actual comment edit value.
-func (i *iterator) CommentEditValue() userContentEdit {
- cei := i.currCommentEditIter()
- return cei.query.Node.IssueComment.UserContentEdits.Nodes[cei.index]
-}
-
-func (i *iterator) queryCommentEdit() bool {
- ctx, cancel := context.WithTimeout(i.ctx, defaultTimeout)
- defer cancel()
- cei := i.currCommentEditIter()
-
- if endCursor := cei.query.Node.IssueComment.UserContentEdits.PageInfo.EndCursor; endCursor != "" {
- cei.variables["commentEditBefore"] = endCursor
- }
- tmlnVal := i.TimelineItemValue()
- if tmlnVal.Typename != "IssueComment" {
- i.err = errors.New("Call to queryCommentEdit() while timeline item is not a comment")
- return false
- }
- cei.variables["gqlNodeId"] = tmlnVal.IssueComment.Id
- if err := i.gc.Query(ctx, &cei.query, cei.variables); err != nil {
- i.err = err
- return false
- }
- ceItems := cei.query.Node.IssueComment.UserContentEdits.Nodes
- if len(ceItems) <= 0 {
- cei.index = -1
- return false
- }
- // The UserContentEditConnection in the Github API serves its elements in reverse chronological
- // order. For our purpose we have to reverse the edits.
- reverseEdits(ceItems)
- cei.index = 0
- return true
-}
-
-func reverseEdits(edits []userContentEdit) {
- for i, j := 0, len(edits)-1; i < j; i, j = i+1, j-1 {
- edits[i], edits[j] = edits[j], edits[i]
- }
-}