From 3bcaa35b5d25ca9e12389ab4bf78600ae5df8af8 Mon Sep 17 00:00:00 2001 From: Amine Hilaly Date: Sat, 27 Apr 2019 00:15:02 +0100 Subject: Integrate iterator with importer --- bridge/github/import.go | 594 ++++++++++++++++-------------------------------- 1 file changed, 194 insertions(+), 400 deletions(-) (limited to 'bridge/github/import.go') diff --git a/bridge/github/import.go b/bridge/github/import.go index d641b192..74ccb776 100644 --- a/bridge/github/import.go +++ b/bridge/github/import.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "strings" + "time" "github.com/MichaelMure/git-bug/bridge/core" "github.com/MichaelMure/git-bug/bug" @@ -13,308 +14,250 @@ import ( "github.com/shurcooL/githubv4" ) -const keyGithubId = "github-id" -const keyGithubUrl = "github-url" -const keyGithubLogin = "github-login" +const ( + keyGithubId = "github-id" + keyGithubUrl = "github-url" + keyGithubLogin = "github-login" +) // githubImporter implement the Importer interface type githubImporter struct { - client *githubv4.Client - conf core.Configuration + iterator *iterator + conf core.Configuration } func (gi *githubImporter) Init(conf core.Configuration) error { - gi.conf = conf - gi.client = buildClient(conf) - - return nil -} + var since time.Time -func (gi *githubImporter) ImportAll(repo *cache.RepoCache) error { - q := &issueTimelineQuery{} - variables := map[string]interface{}{ - "owner": githubv4.String(gi.conf[keyUser]), - "name": githubv4.String(gi.conf[keyProject]), - "issueFirst": githubv4.Int(1), - "issueAfter": (*githubv4.String)(nil), - "timelineFirst": githubv4.Int(10), - "timelineAfter": (*githubv4.String)(nil), - - // Fun fact, github provide the comment edition in reverse chronological - // order, because haha. Look at me, I'm dying of laughter. - "issueEditLast": githubv4.Int(10), - "issueEditBefore": (*githubv4.String)(nil), - "commentEditLast": githubv4.Int(10), - "commentEditBefore": (*githubv4.String)(nil), - } - - var b *cache.BugCache - - for { - err := gi.client.Query(context.TODO(), &q, variables) + // parse since value from configuration + if value, ok := conf["since"]; ok && value != "" { + s, err := time.Parse(time.RFC3339, value) if err != nil { return err } - if len(q.Repository.Issues.Nodes) == 0 { - return nil - } - - issue := q.Repository.Issues.Nodes[0] - - if b == nil { - b, err = gi.ensureIssue(repo, issue, variables) - if err != nil { - return err - } - } - - for _, itemEdge := range q.Repository.Issues.Nodes[0].Timeline.Edges { - err = gi.ensureTimelineItem(repo, b, itemEdge.Cursor, itemEdge.Node, variables) - if err != nil { - return err - } - } - - if !issue.Timeline.PageInfo.HasNextPage { - err = b.CommitAsNeeded() - if err != nil { - return err - } - - b = nil - - if !q.Repository.Issues.PageInfo.HasNextPage { - break - } - - variables["issueAfter"] = githubv4.NewString(q.Repository.Issues.PageInfo.EndCursor) - variables["timelineAfter"] = (*githubv4.String)(nil) - continue - } - - variables["timelineAfter"] = githubv4.NewString(issue.Timeline.PageInfo.EndCursor) + since = s } + gi.iterator = newIterator(conf, since) return nil } -func (gi *githubImporter) Import(repo *cache.RepoCache, id string) error { - fmt.Println("IMPORT") - - return nil -} - -func (gi *githubImporter) ensureIssue(repo *cache.RepoCache, issue issueTimeline, rootVariables map[string]interface{}) (*cache.BugCache, error) { - fmt.Printf("import issue: %s\n", issue.Title) - - author, err := gi.ensurePerson(repo, issue.Author) - if err != nil { - return nil, err - } - - b, err := repo.ResolveBugCreateMetadata(keyGithubId, parseId(issue.Id)) - if err != nil && err != bug.ErrBugNotExist { - return nil, err - } - - // if there is no edit, the UserContentEdits given by github is empty. That - // means that the original message is given by the issue message. - // - // if there is edits, the UserContentEdits given by github contains both the - // original message and the following edits. The issue message give the last - // version so we don't care about that. - // - // the tricky part: for an issue older than the UserContentEdits API, github - // doesn't have the previous message version anymore and give an edition - // with .Diff == nil. We have to filter them. - - if len(issue.UserContentEdits.Nodes) == 0 { - if err == bug.ErrBugNotExist { - b, err = repo.NewBugRaw( - author, - issue.CreatedAt.Unix(), - // Todo: this might not be the initial title, we need to query the - // timeline to be sure - issue.Title, - cleanupText(string(issue.Body)), - nil, - map[string]string{ - keyGithubId: parseId(issue.Id), - keyGithubUrl: issue.Url.String(), - }, - ) - if err != nil { - return nil, err - } - } - - return b, nil - } - - // reverse the order, because github - reverseEdits(issue.UserContentEdits.Nodes) - - for i, edit := range issue.UserContentEdits.Nodes { - if b != nil && i == 0 { - // The first edit in the github result is the creation itself, we already have that - continue - } - - if b == nil { - if edit.Diff == nil { - // not enough data given by github for old edit, ignore them - continue - } - - // we create the bug as soon as we have a legit first edition - b, err = repo.NewBugRaw( - author, - issue.CreatedAt.Unix(), - // Todo: this might not be the initial title, we need to query the - // timeline to be sure - issue.Title, - cleanupText(string(*edit.Diff)), - nil, - map[string]string{ - keyGithubId: parseId(issue.Id), - keyGithubUrl: issue.Url.String(), - }, - ) - if err != nil { - return nil, err - } - continue - } +func (gi *githubImporter) ImportAll(repo *cache.RepoCache) error { + // Loop over all available issues + for gi.iterator.NextIssue() { + issue := gi.iterator.IssueValue() + fmt.Printf("importing issue: %v\n", issue.Title) - target, err := b.ResolveOperationWithMetadata(keyGithubId, parseId(issue.Id)) - if err != nil { - return nil, err - } + // In each iteration create a new bug + var b *cache.BugCache - err = gi.ensureCommentEdit(repo, b, target, edit) + // ensure issue author + author, err := gi.ensurePerson(repo, issue.Author) if err != nil { - return nil, err - } - } - - if !issue.UserContentEdits.PageInfo.HasNextPage { - // if we still didn't get a legit edit, create the bug from the issue data - if b == nil { - return repo.NewBugRaw( - author, - issue.CreatedAt.Unix(), - // Todo: this might not be the initial title, we need to query the - // timeline to be sure - issue.Title, - cleanupText(string(issue.Body)), - nil, - map[string]string{ - keyGithubId: parseId(issue.Id), - keyGithubUrl: issue.Url.String(), - }, - ) + return err } - return b, nil - } - // We have more edit, querying them - - q := &issueEditQuery{} - variables := map[string]interface{}{ - "owner": rootVariables["owner"], - "name": rootVariables["name"], - "issueFirst": rootVariables["issueFirst"], - "issueAfter": rootVariables["issueAfter"], - "issueEditLast": githubv4.Int(10), - "issueEditBefore": issue.UserContentEdits.PageInfo.StartCursor, - } - - for { - err := gi.client.Query(context.TODO(), &q, variables) - if err != nil { - return nil, err + // resolve bug + b, err = repo.ResolveBugCreateMetadata(keyGithubId, parseId(issue.Id)) + if err != nil && err != bug.ErrBugNotExist { + return err } - edits := q.Repository.Issues.Nodes[0].UserContentEdits - - if len(edits.Nodes) == 0 { - return b, nil + // get issue edits + issueEdits := []userContentEdit{} + for gi.iterator.NextIssueEdit() { + // append only edits with non empty diff + if issueEdit := gi.iterator.IssueEditValue(); issueEdit.Diff != nil { + issueEdits = append(issueEdits, issueEdit) + } } - for _, edit := range edits.Nodes { - if b == nil { - if edit.Diff == nil { - // not enough data given by github for old edit, ignore them - continue - } - - // we create the bug as soon as we have a legit first edition + // if issueEdits is empty + if len(issueEdits) == 0 { + if err == bug.ErrBugNotExist { + // create bug b, err = repo.NewBugRaw( author, issue.CreatedAt.Unix(), - // Todo: this might not be the initial title, we need to query the - // timeline to be sure issue.Title, - cleanupText(string(*edit.Diff)), + cleanupText(string(issue.Body)), nil, map[string]string{ keyGithubId: parseId(issue.Id), keyGithubUrl: issue.Url.String(), - }, - ) + }) if err != nil { - return nil, err + return err } - continue } + } else { + // create bug from given issueEdits + for _, edit := range issueEdits { + // if the bug doesn't exist + if b == nil { + // we create the bug as soon as we have a legit first edition + b, err = repo.NewBugRaw( + author, + issue.CreatedAt.Unix(), + issue.Title, + cleanupText(string(*edit.Diff)), + nil, + map[string]string{ + keyGithubId: parseId(issue.Id), + keyGithubUrl: issue.Url.String(), + }, + ) + + if err != nil { + return err + } - target, err := b.ResolveOperationWithMetadata(keyGithubId, parseId(issue.Id)) - if err != nil { - return nil, err + continue + } + + // other edits will be added as CommentEdit operations + + target, err := b.ResolveOperationWithMetadata(keyGithubId, parseId(issue.Id)) + if err != nil { + return err + } + + err = gi.ensureCommentEdit(repo, b, target, edit) + if err != nil { + return err + } } + } + + // check timeline items + for gi.iterator.NextTimeline() { + item := gi.iterator.TimelineValue() + + // if item is not a comment (label, unlabel, rename, close, open ...) + if item.Typename != "IssueComment" { + if err := gi.ensureTimelineItem(repo, b, item); err != nil { + return err + } + } else { // if item is comment + + // ensure person + author, err := gi.ensurePerson(repo, item.IssueComment.Author) + if err != nil { + return err + } + + var target git.Hash + target, err = b.ResolveOperationWithMetadata(keyGithubId, parseId(item.IssueComment.Id)) + if err != nil && err != cache.ErrNoMatchingOp { + // real error + return err + } + + // collect all edits + commentEdits := []userContentEdit{} + for gi.iterator.NextCommentEdit() { + if commentEdit := gi.iterator.CommentEditValue(); commentEdit.Diff != nil { + commentEdits = append(commentEdits, commentEdit) + } + } + + // if no edits are given we create the comment + if len(commentEdits) == 0 { + + // if comment doesn't exist + if err == cache.ErrNoMatchingOp { + + // add comment operation + op, err := b.AddCommentRaw( + author, + item.IssueComment.CreatedAt.Unix(), + cleanupText(string(item.IssueComment.Body)), + nil, + map[string]string{ + keyGithubId: parseId(item.IssueComment.Id), + }, + ) + if err != nil { + return err + } + + // set hash + target, err = op.Hash() + if err != nil { + return err + } + } + } else { + // if we have some edits + for _, edit := range item.IssueComment.UserContentEdits.Nodes { + + // create comment when target is an empty string + if target == "" { + op, err := b.AddCommentRaw( + author, + item.IssueComment.CreatedAt.Unix(), + cleanupText(string(*edit.Diff)), + nil, + map[string]string{ + keyGithubId: parseId(item.IssueComment.Id), + keyGithubUrl: item.IssueComment.Url.String(), + }, + ) + if err != nil { + return err + } + + // set hash + target, err = op.Hash() + if err != nil { + return err + } + } + + err := gi.ensureCommentEdit(repo, b, target, edit) + if err != nil { + return err + } + + } + } - err = gi.ensureCommentEdit(repo, b, target, edit) - if err != nil { - return nil, err } + } - if !edits.PageInfo.HasNextPage { - break + if err := gi.iterator.Error(); err != nil { + fmt.Printf("error importing issue %v\n", issue.Id) + return err } - variables["issueEditBefore"] = edits.PageInfo.StartCursor + // commit bug state + err = b.CommitAsNeeded() + if err != nil { + return err + } } - // TODO: check + import files - - // if we still didn't get a legit edit, create the bug from the issue data - if b == nil { - return repo.NewBugRaw( - author, - issue.CreatedAt.Unix(), - // Todo: this might not be the initial title, we need to query the - // timeline to be sure - issue.Title, - cleanupText(string(issue.Body)), - nil, - map[string]string{ - keyGithubId: parseId(issue.Id), - keyGithubUrl: issue.Url.String(), - }, - ) + if err := gi.iterator.Error(); err != nil { + fmt.Printf("import error: %v\n", err) } - return b, nil + fmt.Printf("Successfully imported %v issues from Github\n", gi.iterator.Count()) + return nil } -func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.BugCache, cursor githubv4.String, item timelineItem, rootVariables map[string]interface{}) error { - fmt.Printf("import %s\n", item.Typename) +func (gi *githubImporter) Import(repo *cache.RepoCache, id string) error { + fmt.Println("IMPORT") + return nil +} + +func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.BugCache, item timelineItem) error { + fmt.Printf("import item: %s\n", item.Typename) switch item.Typename { case "IssueComment": - return gi.ensureComment(repo, b, cursor, item.IssueComment, rootVariables) + //return gi.ensureComment(repo, b, cursor, item.IssueComment, rootVariables) case "LabeledEvent": id := parseId(item.LabeledEvent.Id) @@ -411,162 +354,13 @@ func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.Bug return err default: - fmt.Println("ignore event ", item.Typename) + fmt.Printf("ignore event: %v\n", item.Typename) } return nil } -func (gi *githubImporter) ensureComment(repo *cache.RepoCache, b *cache.BugCache, cursor githubv4.String, comment issueComment, rootVariables map[string]interface{}) error { - author, err := gi.ensurePerson(repo, comment.Author) - if err != nil { - return err - } - - var target git.Hash - target, err = b.ResolveOperationWithMetadata(keyGithubId, parseId(comment.Id)) - if err != nil && err != cache.ErrNoMatchingOp { - // real error - return err - } - - // if there is no edit, the UserContentEdits given by github is empty. That - // means that the original message is given by the comment message. - // - // if there is edits, the UserContentEdits given by github contains both the - // original message and the following edits. The comment message give the last - // version so we don't care about that. - // - // the tricky part: for a comment older than the UserContentEdits API, github - // doesn't have the previous message version anymore and give an edition - // with .Diff == nil. We have to filter them. - - if len(comment.UserContentEdits.Nodes) == 0 { - if err == cache.ErrNoMatchingOp { - op, err := b.AddCommentRaw( - author, - comment.CreatedAt.Unix(), - cleanupText(string(comment.Body)), - nil, - map[string]string{ - keyGithubId: parseId(comment.Id), - }, - ) - if err != nil { - return err - } - - target, err = op.Hash() - if err != nil { - return err - } - } - - return nil - } - - // reverse the order, because github - reverseEdits(comment.UserContentEdits.Nodes) - - for i, edit := range comment.UserContentEdits.Nodes { - if target != "" && i == 0 { - // The first edit in the github result is the comment creation itself, we already have that - continue - } - - if target == "" { - if edit.Diff == nil { - // not enough data given by github for old edit, ignore them - continue - } - - op, err := b.AddCommentRaw( - author, - comment.CreatedAt.Unix(), - cleanupText(string(*edit.Diff)), - nil, - map[string]string{ - keyGithubId: parseId(comment.Id), - keyGithubUrl: comment.Url.String(), - }, - ) - if err != nil { - return err - } - - target, err = op.Hash() - if err != nil { - return err - } - } - - err := gi.ensureCommentEdit(repo, b, target, edit) - if err != nil { - return err - } - } - - if !comment.UserContentEdits.PageInfo.HasNextPage { - return nil - } - - // We have more edit, querying them - - q := &commentEditQuery{} - variables := map[string]interface{}{ - "owner": rootVariables["owner"], - "name": rootVariables["name"], - "issueFirst": rootVariables["issueFirst"], - "issueAfter": rootVariables["issueAfter"], - "timelineFirst": githubv4.Int(1), - "timelineAfter": cursor, - "commentEditLast": githubv4.Int(10), - "commentEditBefore": comment.UserContentEdits.PageInfo.StartCursor, - } - - for { - err := gi.client.Query(context.TODO(), &q, variables) - if err != nil { - return err - } - - edits := q.Repository.Issues.Nodes[0].Timeline.Nodes[0].IssueComment.UserContentEdits - - if len(edits.Nodes) == 0 { - return nil - } - - for i, edit := range edits.Nodes { - if i == 0 { - // The first edit in the github result is the creation itself, we already have that - continue - } - - err := gi.ensureCommentEdit(repo, b, target, edit) - if err != nil { - return err - } - } - - if !edits.PageInfo.HasNextPage { - break - } - - variables["commentEditBefore"] = edits.PageInfo.StartCursor - } - - // TODO: check + import files - - return nil -} - func (gi *githubImporter) ensureCommentEdit(repo *cache.RepoCache, b *cache.BugCache, target git.Hash, edit userContentEdit) error { - if edit.Diff == nil { - // this happen if the event is older than early 2018, Github doesn't have the data before that. - // Best we can do is to ignore the event. - return nil - } - _, err := b.ResolveOperationWithMetadata(keyGithubId, parseId(edit.Id)) if err == nil { // already imported @@ -670,7 +464,7 @@ func (gi *githubImporter) getGhost(repo *cache.RepoCache) (*cache.IdentityCache, "login": githubv4.String("ghost"), } - err = gi.client.Query(context.TODO(), &q, variables) + err = gi.iterator.gc.Query(context.TODO(), &q, variables) if err != nil { return nil, err } -- cgit