diff options
author | Amine Hilaly <hilalyamine@gmail.com> | 2019-04-27 00:15:02 +0100 |
---|---|---|
committer | Amine Hilaly <hilalyamine@gmail.com> | 2019-05-05 18:16:10 +0200 |
commit | 3bcaa35b5d25ca9e12389ab4bf78600ae5df8af8 (patch) | |
tree | 3c512c5324cce2fb1e9beefc810e2807936a0768 /bridge/github | |
parent | c8ad4dbfd9511f4cfa748fa85c01fbca2edb348a (diff) | |
download | git-bug-3bcaa35b5d25ca9e12389ab4bf78600ae5df8af8.tar.gz |
Integrate iterator with importer
Diffstat (limited to 'bridge/github')
-rw-r--r-- | bridge/github/config.go | 10 | ||||
-rw-r--r-- | bridge/github/import.go | 594 | ||||
-rw-r--r-- | bridge/github/iterator.go | 29 | ||||
-rw-r--r-- | bridge/github/iterator_test.go | 10 |
4 files changed, 222 insertions, 421 deletions
diff --git a/bridge/github/config.go b/bridge/github/config.go index b881c585..2a3119a6 100644 --- a/bridge/github/config.go +++ b/bridge/github/config.go @@ -20,10 +20,12 @@ import ( "golang.org/x/crypto/ssh/terminal" ) -const githubV3Url = "https://api.github.com" -const keyUser = "user" -const keyProject = "project" -const keyToken = "token" +const ( + githubV3Url = "https://api.github.com" + keyUser = "user" + keyProject = "project" + keyToken = "token" +) func (*Github) Configure(repo repository.RepoCommon) (core.Configuration, error) { conf := make(core.Configuration) diff --git a/bridge/github/import.go b/bridge/github/import.go index d641b192..74ccb776 100644 --- a/bridge/github/import.go +++ b/bridge/github/import.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "strings" + "time" "github.com/MichaelMure/git-bug/bridge/core" "github.com/MichaelMure/git-bug/bug" @@ -13,308 +14,250 @@ import ( "github.com/shurcooL/githubv4" ) -const keyGithubId = "github-id" -const keyGithubUrl = "github-url" -const keyGithubLogin = "github-login" +const ( + keyGithubId = "github-id" + keyGithubUrl = "github-url" + keyGithubLogin = "github-login" +) // githubImporter implement the Importer interface type githubImporter struct { - client *githubv4.Client - conf core.Configuration + iterator *iterator + conf core.Configuration } func (gi *githubImporter) Init(conf core.Configuration) error { - gi.conf = conf - gi.client = buildClient(conf) - - return nil -} + var since time.Time -func (gi *githubImporter) ImportAll(repo *cache.RepoCache) error { - q := &issueTimelineQuery{} - variables := map[string]interface{}{ - "owner": githubv4.String(gi.conf[keyUser]), - "name": githubv4.String(gi.conf[keyProject]), - "issueFirst": githubv4.Int(1), - "issueAfter": (*githubv4.String)(nil), - "timelineFirst": githubv4.Int(10), - "timelineAfter": (*githubv4.String)(nil), - - // Fun fact, github provide the comment edition in reverse chronological - // order, because haha. Look at me, I'm dying of laughter. - "issueEditLast": githubv4.Int(10), - "issueEditBefore": (*githubv4.String)(nil), - "commentEditLast": githubv4.Int(10), - "commentEditBefore": (*githubv4.String)(nil), - } - - var b *cache.BugCache - - for { - err := gi.client.Query(context.TODO(), &q, variables) + // parse since value from configuration + if value, ok := conf["since"]; ok && value != "" { + s, err := time.Parse(time.RFC3339, value) if err != nil { return err } - if len(q.Repository.Issues.Nodes) == 0 { - return nil - } - - issue := q.Repository.Issues.Nodes[0] - - if b == nil { - b, err = gi.ensureIssue(repo, issue, variables) - if err != nil { - return err - } - } - - for _, itemEdge := range q.Repository.Issues.Nodes[0].Timeline.Edges { - err = gi.ensureTimelineItem(repo, b, itemEdge.Cursor, itemEdge.Node, variables) - if err != nil { - return err - } - } - - if !issue.Timeline.PageInfo.HasNextPage { - err = b.CommitAsNeeded() - if err != nil { - return err - } - - b = nil - - if !q.Repository.Issues.PageInfo.HasNextPage { - break - } - - variables["issueAfter"] = githubv4.NewString(q.Repository.Issues.PageInfo.EndCursor) - variables["timelineAfter"] = (*githubv4.String)(nil) - continue - } - - variables["timelineAfter"] = githubv4.NewString(issue.Timeline.PageInfo.EndCursor) + since = s } + gi.iterator = newIterator(conf, since) return nil } -func (gi *githubImporter) Import(repo *cache.RepoCache, id string) error { - fmt.Println("IMPORT") - - return nil -} - -func (gi *githubImporter) ensureIssue(repo *cache.RepoCache, issue issueTimeline, rootVariables map[string]interface{}) (*cache.BugCache, error) { - fmt.Printf("import issue: %s\n", issue.Title) - - author, err := gi.ensurePerson(repo, issue.Author) - if err != nil { - return nil, err - } - - b, err := repo.ResolveBugCreateMetadata(keyGithubId, parseId(issue.Id)) - if err != nil && err != bug.ErrBugNotExist { - return nil, err - } - - // if there is no edit, the UserContentEdits given by github is empty. That - // means that the original message is given by the issue message. - // - // if there is edits, the UserContentEdits given by github contains both the - // original message and the following edits. The issue message give the last - // version so we don't care about that. - // - // the tricky part: for an issue older than the UserContentEdits API, github - // doesn't have the previous message version anymore and give an edition - // with .Diff == nil. We have to filter them. - - if len(issue.UserContentEdits.Nodes) == 0 { - if err == bug.ErrBugNotExist { - b, err = repo.NewBugRaw( - author, - issue.CreatedAt.Unix(), - // Todo: this might not be the initial title, we need to query the - // timeline to be sure - issue.Title, - cleanupText(string(issue.Body)), - nil, - map[string]string{ - keyGithubId: parseId(issue.Id), - keyGithubUrl: issue.Url.String(), - }, - ) - if err != nil { - return nil, err - } - } - - return b, nil - } - - // reverse the order, because github - reverseEdits(issue.UserContentEdits.Nodes) - - for i, edit := range issue.UserContentEdits.Nodes { - if b != nil && i == 0 { - // The first edit in the github result is the creation itself, we already have that - continue - } - - if b == nil { - if edit.Diff == nil { - // not enough data given by github for old edit, ignore them - continue - } - - // we create the bug as soon as we have a legit first edition - b, err = repo.NewBugRaw( - author, - issue.CreatedAt.Unix(), - // Todo: this might not be the initial title, we need to query the - // timeline to be sure - issue.Title, - cleanupText(string(*edit.Diff)), - nil, - map[string]string{ - keyGithubId: parseId(issue.Id), - keyGithubUrl: issue.Url.String(), - }, - ) - if err != nil { - return nil, err - } - continue - } +func (gi *githubImporter) ImportAll(repo *cache.RepoCache) error { + // Loop over all available issues + for gi.iterator.NextIssue() { + issue := gi.iterator.IssueValue() + fmt.Printf("importing issue: %v\n", issue.Title) - target, err := b.ResolveOperationWithMetadata(keyGithubId, parseId(issue.Id)) - if err != nil { - return nil, err - } + // In each iteration create a new bug + var b *cache.BugCache - err = gi.ensureCommentEdit(repo, b, target, edit) + // ensure issue author + author, err := gi.ensurePerson(repo, issue.Author) if err != nil { - return nil, err - } - } - - if !issue.UserContentEdits.PageInfo.HasNextPage { - // if we still didn't get a legit edit, create the bug from the issue data - if b == nil { - return repo.NewBugRaw( - author, - issue.CreatedAt.Unix(), - // Todo: this might not be the initial title, we need to query the - // timeline to be sure - issue.Title, - cleanupText(string(issue.Body)), - nil, - map[string]string{ - keyGithubId: parseId(issue.Id), - keyGithubUrl: issue.Url.String(), - }, - ) + return err } - return b, nil - } - // We have more edit, querying them - - q := &issueEditQuery{} - variables := map[string]interface{}{ - "owner": rootVariables["owner"], - "name": rootVariables["name"], - "issueFirst": rootVariables["issueFirst"], - "issueAfter": rootVariables["issueAfter"], - "issueEditLast": githubv4.Int(10), - "issueEditBefore": issue.UserContentEdits.PageInfo.StartCursor, - } - - for { - err := gi.client.Query(context.TODO(), &q, variables) - if err != nil { - return nil, err + // resolve bug + b, err = repo.ResolveBugCreateMetadata(keyGithubId, parseId(issue.Id)) + if err != nil && err != bug.ErrBugNotExist { + return err } - edits := q.Repository.Issues.Nodes[0].UserContentEdits - - if len(edits.Nodes) == 0 { - return b, nil + // get issue edits + issueEdits := []userContentEdit{} + for gi.iterator.NextIssueEdit() { + // append only edits with non empty diff + if issueEdit := gi.iterator.IssueEditValue(); issueEdit.Diff != nil { + issueEdits = append(issueEdits, issueEdit) + } } - for _, edit := range edits.Nodes { - if b == nil { - if edit.Diff == nil { - // not enough data given by github for old edit, ignore them - continue - } - - // we create the bug as soon as we have a legit first edition + // if issueEdits is empty + if len(issueEdits) == 0 { + if err == bug.ErrBugNotExist { + // create bug b, err = repo.NewBugRaw( author, issue.CreatedAt.Unix(), - // Todo: this might not be the initial title, we need to query the - // timeline to be sure issue.Title, - cleanupText(string(*edit.Diff)), + cleanupText(string(issue.Body)), nil, map[string]string{ keyGithubId: parseId(issue.Id), keyGithubUrl: issue.Url.String(), - }, - ) + }) if err != nil { - return nil, err + return err } - continue } + } else { + // create bug from given issueEdits + for _, edit := range issueEdits { + // if the bug doesn't exist + if b == nil { + // we create the bug as soon as we have a legit first edition + b, err = repo.NewBugRaw( + author, + issue.CreatedAt.Unix(), + issue.Title, + cleanupText(string(*edit.Diff)), + nil, + map[string]string{ + keyGithubId: parseId(issue.Id), + keyGithubUrl: issue.Url.String(), + }, + ) + + if err != nil { + return err + } - target, err := b.ResolveOperationWithMetadata(keyGithubId, parseId(issue.Id)) - if err != nil { - return nil, err + continue + } + + // other edits will be added as CommentEdit operations + + target, err := b.ResolveOperationWithMetadata(keyGithubId, parseId(issue.Id)) + if err != nil { + return err + } + + err = gi.ensureCommentEdit(repo, b, target, edit) + if err != nil { + return err + } } + } + + // check timeline items + for gi.iterator.NextTimeline() { + item := gi.iterator.TimelineValue() + + // if item is not a comment (label, unlabel, rename, close, open ...) + if item.Typename != "IssueComment" { + if err := gi.ensureTimelineItem(repo, b, item); err != nil { + return err + } + } else { // if item is comment + + // ensure person + author, err := gi.ensurePerson(repo, item.IssueComment.Author) + if err != nil { + return err + } + + var target git.Hash + target, err = b.ResolveOperationWithMetadata(keyGithubId, parseId(item.IssueComment.Id)) + if err != nil && err != cache.ErrNoMatchingOp { + // real error + return err + } + + // collect all edits + commentEdits := []userContentEdit{} + for gi.iterator.NextCommentEdit() { + if commentEdit := gi.iterator.CommentEditValue(); commentEdit.Diff != nil { + commentEdits = append(commentEdits, commentEdit) + } + } + + // if no edits are given we create the comment + if len(commentEdits) == 0 { + + // if comment doesn't exist + if err == cache.ErrNoMatchingOp { + + // add comment operation + op, err := b.AddCommentRaw( + author, + item.IssueComment.CreatedAt.Unix(), + cleanupText(string(item.IssueComment.Body)), + nil, + map[string]string{ + keyGithubId: parseId(item.IssueComment.Id), + }, + ) + if err != nil { + return err + } + + // set hash + target, err = op.Hash() + if err != nil { + return err + } + } + } else { + // if we have some edits + for _, edit := range item.IssueComment.UserContentEdits.Nodes { + + // create comment when target is an empty string + if target == "" { + op, err := b.AddCommentRaw( + author, + item.IssueComment.CreatedAt.Unix(), + cleanupText(string(*edit.Diff)), + nil, + map[string]string{ + keyGithubId: parseId(item.IssueComment.Id), + keyGithubUrl: item.IssueComment.Url.String(), + }, + ) + if err != nil { + return err + } + + // set hash + target, err = op.Hash() + if err != nil { + return err + } + } + + err := gi.ensureCommentEdit(repo, b, target, edit) + if err != nil { + return err + } + + } + } - err = gi.ensureCommentEdit(repo, b, target, edit) - if err != nil { - return nil, err } + } - if !edits.PageInfo.HasNextPage { - break + if err := gi.iterator.Error(); err != nil { + fmt.Printf("error importing issue %v\n", issue.Id) + return err } - variables["issueEditBefore"] = edits.PageInfo.StartCursor + // commit bug state + err = b.CommitAsNeeded() + if err != nil { + return err + } } - // TODO: check + import files - - // if we still didn't get a legit edit, create the bug from the issue data - if b == nil { - return repo.NewBugRaw( - author, - issue.CreatedAt.Unix(), - // Todo: this might not be the initial title, we need to query the - // timeline to be sure - issue.Title, - cleanupText(string(issue.Body)), - nil, - map[string]string{ - keyGithubId: parseId(issue.Id), - keyGithubUrl: issue.Url.String(), - }, - ) + if err := gi.iterator.Error(); err != nil { + fmt.Printf("import error: %v\n", err) } - return b, nil + fmt.Printf("Successfully imported %v issues from Github\n", gi.iterator.Count()) + return nil } -func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.BugCache, cursor githubv4.String, item timelineItem, rootVariables map[string]interface{}) error { - fmt.Printf("import %s\n", item.Typename) +func (gi *githubImporter) Import(repo *cache.RepoCache, id string) error { + fmt.Println("IMPORT") + return nil +} + +func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.BugCache, item timelineItem) error { + fmt.Printf("import item: %s\n", item.Typename) switch item.Typename { case "IssueComment": - return gi.ensureComment(repo, b, cursor, item.IssueComment, rootVariables) + //return gi.ensureComment(repo, b, cursor, item.IssueComment, rootVariables) case "LabeledEvent": id := parseId(item.LabeledEvent.Id) @@ -411,162 +354,13 @@ func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.Bug return err default: - fmt.Println("ignore event ", item.Typename) + fmt.Printf("ignore event: %v\n", item.Typename) } return nil } -func (gi *githubImporter) ensureComment(repo *cache.RepoCache, b *cache.BugCache, cursor githubv4.String, comment issueComment, rootVariables map[string]interface{}) error { - author, err := gi.ensurePerson(repo, comment.Author) - if err != nil { - return err - } - - var target git.Hash - target, err = b.ResolveOperationWithMetadata(keyGithubId, parseId(comment.Id)) - if err != nil && err != cache.ErrNoMatchingOp { - // real error - return err - } - - // if there is no edit, the UserContentEdits given by github is empty. That - // means that the original message is given by the comment message. - // - // if there is edits, the UserContentEdits given by github contains both the - // original message and the following edits. The comment message give the last - // version so we don't care about that. - // - // the tricky part: for a comment older than the UserContentEdits API, github - // doesn't have the previous message version anymore and give an edition - // with .Diff == nil. We have to filter them. - - if len(comment.UserContentEdits.Nodes) == 0 { - if err == cache.ErrNoMatchingOp { - op, err := b.AddCommentRaw( - author, - comment.CreatedAt.Unix(), - cleanupText(string(comment.Body)), - nil, - map[string]string{ - keyGithubId: parseId(comment.Id), - }, - ) - if err != nil { - return err - } - - target, err = op.Hash() - if err != nil { - return err - } - } - - return nil - } - - // reverse the order, because github - reverseEdits(comment.UserContentEdits.Nodes) - - for i, edit := range comment.UserContentEdits.Nodes { - if target != "" && i == 0 { - // The first edit in the github result is the comment creation itself, we already have that - continue - } - - if target == "" { - if edit.Diff == nil { - // not enough data given by github for old edit, ignore them - continue - } - - op, err := b.AddCommentRaw( - author, - comment.CreatedAt.Unix(), - cleanupText(string(*edit.Diff)), - nil, - map[string]string{ - keyGithubId: parseId(comment.Id), - keyGithubUrl: comment.Url.String(), - }, - ) - if err != nil { - return err - } - - target, err = op.Hash() - if err != nil { - return err - } - } - - err := gi.ensureCommentEdit(repo, b, target, edit) - if err != nil { - return err - } - } - - if !comment.UserContentEdits.PageInfo.HasNextPage { - return nil - } - - // We have more edit, querying them - - q := &commentEditQuery{} - variables := map[string]interface{}{ - "owner": rootVariables["owner"], - "name": rootVariables["name"], - "issueFirst": rootVariables["issueFirst"], - "issueAfter": rootVariables["issueAfter"], - "timelineFirst": githubv4.Int(1), - "timelineAfter": cursor, - "commentEditLast": githubv4.Int(10), - "commentEditBefore": comment.UserContentEdits.PageInfo.StartCursor, - } - - for { - err := gi.client.Query(context.TODO(), &q, variables) - if err != nil { - return err - } - - edits := q.Repository.Issues.Nodes[0].Timeline.Nodes[0].IssueComment.UserContentEdits - - if len(edits.Nodes) == 0 { - return nil - } - - for i, edit := range edits.Nodes { - if i == 0 { - // The first edit in the github result is the creation itself, we already have that - continue - } - - err := gi.ensureCommentEdit(repo, b, target, edit) - if err != nil { - return err - } - } - - if !edits.PageInfo.HasNextPage { - break - } - - variables["commentEditBefore"] = edits.PageInfo.StartCursor - } - - // TODO: check + import files - - return nil -} - func (gi *githubImporter) ensureCommentEdit(repo *cache.RepoCache, b *cache.BugCache, target git.Hash, edit userContentEdit) error { - if edit.Diff == nil { - // this happen if the event is older than early 2018, Github doesn't have the data before that. - // Best we can do is to ignore the event. - return nil - } - _, err := b.ResolveOperationWithMetadata(keyGithubId, parseId(edit.Id)) if err == nil { // already imported @@ -670,7 +464,7 @@ func (gi *githubImporter) getGhost(repo *cache.RepoCache) (*cache.IdentityCache, "login": githubv4.String("ghost"), } - err = gi.client.Query(context.TODO(), &q, variables) + err = gi.iterator.gc.Query(context.TODO(), &q, variables) if err != nil { return nil, err } diff --git a/bridge/github/iterator.go b/bridge/github/iterator.go index cb7c9760..9e1ff30e 100644 --- a/bridge/github/iterator.go +++ b/bridge/github/iterator.go @@ -46,6 +46,8 @@ type timelineIterator struct { issueEdit indexer commentEdit indexer + + lastEndCursor githubv4.String // storing timeline end cursor for future use } type iterator struct { @@ -81,9 +83,8 @@ func newIterator(conf core.Configuration, since time.Time) *iterator { return &iterator{ since: since, gc: buildClient(conf), - capacity: 8, - count: -1, - + capacity: 10, + count: 0, timeline: timelineIterator{ index: -1, issueEdit: indexer{-1}, @@ -154,19 +155,20 @@ func (i *iterator) reverseTimelineEditNodes() { } } -// Error . +// Error return last encountered error func (i *iterator) Error() error { return i.err } -// Count . +// Count return number of issues we iterated over func (i *iterator) Count() int { return i.count } +// Next issue func (i *iterator) NextIssue() bool { // we make the first move - if i.count == -1 { + if i.count == 0 { // init variables and goto queryIssue block i.initTimelineQueryVariables() @@ -181,11 +183,14 @@ func (i *iterator) NextIssue() bool { return false } - // if we have more pages updates variables and query them + // if we have more issues, query them i.timeline.variables["timelineAfter"] = (*githubv4.String)(nil) i.timeline.variables["issueAfter"] = i.timeline.query.Repository.Issues.PageInfo.EndCursor i.timeline.index = -1 + // store cursor for future use + i.timeline.lastEndCursor = i.timeline.query.Repository.Issues.Nodes[0].Timeline.PageInfo.EndCursor + // query issue block queryIssue: if err := i.gc.Query(context.TODO(), &i.timeline.query, i.timeline.variables); err != nil { @@ -224,6 +229,8 @@ func (i *iterator) NextTimeline() bool { return false } + i.timeline.lastEndCursor = i.timeline.query.Repository.Issues.Nodes[0].Timeline.PageInfo.EndCursor + // more timelines, query them i.timeline.variables["timelineAfter"] = i.timeline.query.Repository.Issues.Nodes[0].Timeline.PageInfo.EndCursor if err := i.gc.Query(context.TODO(), &i.timeline.query, i.timeline.variables); err != nil { @@ -240,10 +247,6 @@ func (i *iterator) TimelineValue() timelineItem { return i.timeline.query.Repository.Issues.Nodes[0].Timeline.Edges[i.timeline.index].Node } -func (i *iterator) timelineCursor() string { - return "" -} - func (i *iterator) NextIssueEdit() bool { if i.err != nil { return false @@ -359,11 +362,9 @@ func (i *iterator) NextCommentEdit() bool { return false } - // if there is more comment edits, query them - i.initCommentEditQueryVariables() if i.timeline.index == 0 { - i.commentEdit.variables["timelineAfter"] = i.timeline.query.Repository.Issues.Nodes[0].Timeline.PageInfo.EndCursor + i.commentEdit.variables["timelineAfter"] = i.timeline.lastEndCursor } else { i.commentEdit.variables["timelineAfter"] = i.timeline.query.Repository.Issues.Nodes[0].Timeline.Edges[i.timeline.index-1].Cursor } diff --git a/bridge/github/iterator_test.go b/bridge/github/iterator_test.go index c5820973..c5fad349 100644 --- a/bridge/github/iterator_test.go +++ b/bridge/github/iterator_test.go @@ -16,11 +16,12 @@ func Test_Iterator(t *testing.T) { keyToken: token, "user": user, "project": project, - }, time.Now().Add(-14*24*time.Hour)) + }, time.Time{}) + //time.Now().Add(-14*24*time.Hour)) for i.NextIssue() { v := i.IssueValue() - fmt.Printf("issue = id:%v title:%v\n", v.Id, v.Title) + fmt.Printf(" issue = id:%v title:%v\n", v.Id, v.Title) for i.NextIssueEdit() { v := i.IssueEditValue() @@ -33,12 +34,15 @@ func Test_Iterator(t *testing.T) { if v.Typename == "IssueComment" { for i.NextCommentEdit() { + _ = i.CommentEditValue() - //fmt.Printf("comment edit: %v\n", *v.Diff) fmt.Printf("comment edit\n") } } } } + + fmt.Println(i.Error()) + fmt.Println(i.Count()) } |