aboutsummaryrefslogtreecommitdiffstats
path: root/bridge/github/import.go
diff options
context:
space:
mode:
authorAmine Hilaly <hilalyamine@gmail.com>2019-04-27 00:15:02 +0100
committerAmine Hilaly <hilalyamine@gmail.com>2019-05-05 18:16:10 +0200
commit3bcaa35b5d25ca9e12389ab4bf78600ae5df8af8 (patch)
tree3c512c5324cce2fb1e9beefc810e2807936a0768 /bridge/github/import.go
parentc8ad4dbfd9511f4cfa748fa85c01fbca2edb348a (diff)
downloadgit-bug-3bcaa35b5d25ca9e12389ab4bf78600ae5df8af8.tar.gz
Integrate iterator with importer
Diffstat (limited to 'bridge/github/import.go')
-rw-r--r--bridge/github/import.go594
1 files changed, 194 insertions, 400 deletions
diff --git a/bridge/github/import.go b/bridge/github/import.go
index d641b192..74ccb776 100644
--- a/bridge/github/import.go
+++ b/bridge/github/import.go
@@ -4,6 +4,7 @@ import (
"context"
"fmt"
"strings"
+ "time"
"github.com/MichaelMure/git-bug/bridge/core"
"github.com/MichaelMure/git-bug/bug"
@@ -13,308 +14,250 @@ import (
"github.com/shurcooL/githubv4"
)
-const keyGithubId = "github-id"
-const keyGithubUrl = "github-url"
-const keyGithubLogin = "github-login"
+const (
+ keyGithubId = "github-id"
+ keyGithubUrl = "github-url"
+ keyGithubLogin = "github-login"
+)
// githubImporter implement the Importer interface
type githubImporter struct {
- client *githubv4.Client
- conf core.Configuration
+ iterator *iterator
+ conf core.Configuration
}
func (gi *githubImporter) Init(conf core.Configuration) error {
- gi.conf = conf
- gi.client = buildClient(conf)
-
- return nil
-}
+ var since time.Time
-func (gi *githubImporter) ImportAll(repo *cache.RepoCache) error {
- q := &issueTimelineQuery{}
- variables := map[string]interface{}{
- "owner": githubv4.String(gi.conf[keyUser]),
- "name": githubv4.String(gi.conf[keyProject]),
- "issueFirst": githubv4.Int(1),
- "issueAfter": (*githubv4.String)(nil),
- "timelineFirst": githubv4.Int(10),
- "timelineAfter": (*githubv4.String)(nil),
-
- // Fun fact, github provide the comment edition in reverse chronological
- // order, because haha. Look at me, I'm dying of laughter.
- "issueEditLast": githubv4.Int(10),
- "issueEditBefore": (*githubv4.String)(nil),
- "commentEditLast": githubv4.Int(10),
- "commentEditBefore": (*githubv4.String)(nil),
- }
-
- var b *cache.BugCache
-
- for {
- err := gi.client.Query(context.TODO(), &q, variables)
+ // parse since value from configuration
+ if value, ok := conf["since"]; ok && value != "" {
+ s, err := time.Parse(time.RFC3339, value)
if err != nil {
return err
}
- if len(q.Repository.Issues.Nodes) == 0 {
- return nil
- }
-
- issue := q.Repository.Issues.Nodes[0]
-
- if b == nil {
- b, err = gi.ensureIssue(repo, issue, variables)
- if err != nil {
- return err
- }
- }
-
- for _, itemEdge := range q.Repository.Issues.Nodes[0].Timeline.Edges {
- err = gi.ensureTimelineItem(repo, b, itemEdge.Cursor, itemEdge.Node, variables)
- if err != nil {
- return err
- }
- }
-
- if !issue.Timeline.PageInfo.HasNextPage {
- err = b.CommitAsNeeded()
- if err != nil {
- return err
- }
-
- b = nil
-
- if !q.Repository.Issues.PageInfo.HasNextPage {
- break
- }
-
- variables["issueAfter"] = githubv4.NewString(q.Repository.Issues.PageInfo.EndCursor)
- variables["timelineAfter"] = (*githubv4.String)(nil)
- continue
- }
-
- variables["timelineAfter"] = githubv4.NewString(issue.Timeline.PageInfo.EndCursor)
+ since = s
}
+ gi.iterator = newIterator(conf, since)
return nil
}
-func (gi *githubImporter) Import(repo *cache.RepoCache, id string) error {
- fmt.Println("IMPORT")
-
- return nil
-}
-
-func (gi *githubImporter) ensureIssue(repo *cache.RepoCache, issue issueTimeline, rootVariables map[string]interface{}) (*cache.BugCache, error) {
- fmt.Printf("import issue: %s\n", issue.Title)
-
- author, err := gi.ensurePerson(repo, issue.Author)
- if err != nil {
- return nil, err
- }
-
- b, err := repo.ResolveBugCreateMetadata(keyGithubId, parseId(issue.Id))
- if err != nil && err != bug.ErrBugNotExist {
- return nil, err
- }
-
- // if there is no edit, the UserContentEdits given by github is empty. That
- // means that the original message is given by the issue message.
- //
- // if there is edits, the UserContentEdits given by github contains both the
- // original message and the following edits. The issue message give the last
- // version so we don't care about that.
- //
- // the tricky part: for an issue older than the UserContentEdits API, github
- // doesn't have the previous message version anymore and give an edition
- // with .Diff == nil. We have to filter them.
-
- if len(issue.UserContentEdits.Nodes) == 0 {
- if err == bug.ErrBugNotExist {
- b, err = repo.NewBugRaw(
- author,
- issue.CreatedAt.Unix(),
- // Todo: this might not be the initial title, we need to query the
- // timeline to be sure
- issue.Title,
- cleanupText(string(issue.Body)),
- nil,
- map[string]string{
- keyGithubId: parseId(issue.Id),
- keyGithubUrl: issue.Url.String(),
- },
- )
- if err != nil {
- return nil, err
- }
- }
-
- return b, nil
- }
-
- // reverse the order, because github
- reverseEdits(issue.UserContentEdits.Nodes)
-
- for i, edit := range issue.UserContentEdits.Nodes {
- if b != nil && i == 0 {
- // The first edit in the github result is the creation itself, we already have that
- continue
- }
-
- if b == nil {
- if edit.Diff == nil {
- // not enough data given by github for old edit, ignore them
- continue
- }
-
- // we create the bug as soon as we have a legit first edition
- b, err = repo.NewBugRaw(
- author,
- issue.CreatedAt.Unix(),
- // Todo: this might not be the initial title, we need to query the
- // timeline to be sure
- issue.Title,
- cleanupText(string(*edit.Diff)),
- nil,
- map[string]string{
- keyGithubId: parseId(issue.Id),
- keyGithubUrl: issue.Url.String(),
- },
- )
- if err != nil {
- return nil, err
- }
- continue
- }
+func (gi *githubImporter) ImportAll(repo *cache.RepoCache) error {
+ // Loop over all available issues
+ for gi.iterator.NextIssue() {
+ issue := gi.iterator.IssueValue()
+ fmt.Printf("importing issue: %v\n", issue.Title)
- target, err := b.ResolveOperationWithMetadata(keyGithubId, parseId(issue.Id))
- if err != nil {
- return nil, err
- }
+ // In each iteration create a new bug
+ var b *cache.BugCache
- err = gi.ensureCommentEdit(repo, b, target, edit)
+ // ensure issue author
+ author, err := gi.ensurePerson(repo, issue.Author)
if err != nil {
- return nil, err
- }
- }
-
- if !issue.UserContentEdits.PageInfo.HasNextPage {
- // if we still didn't get a legit edit, create the bug from the issue data
- if b == nil {
- return repo.NewBugRaw(
- author,
- issue.CreatedAt.Unix(),
- // Todo: this might not be the initial title, we need to query the
- // timeline to be sure
- issue.Title,
- cleanupText(string(issue.Body)),
- nil,
- map[string]string{
- keyGithubId: parseId(issue.Id),
- keyGithubUrl: issue.Url.String(),
- },
- )
+ return err
}
- return b, nil
- }
- // We have more edit, querying them
-
- q := &issueEditQuery{}
- variables := map[string]interface{}{
- "owner": rootVariables["owner"],
- "name": rootVariables["name"],
- "issueFirst": rootVariables["issueFirst"],
- "issueAfter": rootVariables["issueAfter"],
- "issueEditLast": githubv4.Int(10),
- "issueEditBefore": issue.UserContentEdits.PageInfo.StartCursor,
- }
-
- for {
- err := gi.client.Query(context.TODO(), &q, variables)
- if err != nil {
- return nil, err
+ // resolve bug
+ b, err = repo.ResolveBugCreateMetadata(keyGithubId, parseId(issue.Id))
+ if err != nil && err != bug.ErrBugNotExist {
+ return err
}
- edits := q.Repository.Issues.Nodes[0].UserContentEdits
-
- if len(edits.Nodes) == 0 {
- return b, nil
+ // get issue edits
+ issueEdits := []userContentEdit{}
+ for gi.iterator.NextIssueEdit() {
+ // append only edits with non empty diff
+ if issueEdit := gi.iterator.IssueEditValue(); issueEdit.Diff != nil {
+ issueEdits = append(issueEdits, issueEdit)
+ }
}
- for _, edit := range edits.Nodes {
- if b == nil {
- if edit.Diff == nil {
- // not enough data given by github for old edit, ignore them
- continue
- }
-
- // we create the bug as soon as we have a legit first edition
+ // if issueEdits is empty
+ if len(issueEdits) == 0 {
+ if err == bug.ErrBugNotExist {
+ // create bug
b, err = repo.NewBugRaw(
author,
issue.CreatedAt.Unix(),
- // Todo: this might not be the initial title, we need to query the
- // timeline to be sure
issue.Title,
- cleanupText(string(*edit.Diff)),
+ cleanupText(string(issue.Body)),
nil,
map[string]string{
keyGithubId: parseId(issue.Id),
keyGithubUrl: issue.Url.String(),
- },
- )
+ })
if err != nil {
- return nil, err
+ return err
}
- continue
}
+ } else {
+ // create bug from given issueEdits
+ for _, edit := range issueEdits {
+ // if the bug doesn't exist
+ if b == nil {
+ // we create the bug as soon as we have a legit first edition
+ b, err = repo.NewBugRaw(
+ author,
+ issue.CreatedAt.Unix(),
+ issue.Title,
+ cleanupText(string(*edit.Diff)),
+ nil,
+ map[string]string{
+ keyGithubId: parseId(issue.Id),
+ keyGithubUrl: issue.Url.String(),
+ },
+ )
+
+ if err != nil {
+ return err
+ }
- target, err := b.ResolveOperationWithMetadata(keyGithubId, parseId(issue.Id))
- if err != nil {
- return nil, err
+ continue
+ }
+
+ // other edits will be added as CommentEdit operations
+
+ target, err := b.ResolveOperationWithMetadata(keyGithubId, parseId(issue.Id))
+ if err != nil {
+ return err
+ }
+
+ err = gi.ensureCommentEdit(repo, b, target, edit)
+ if err != nil {
+ return err
+ }
}
+ }
+
+ // check timeline items
+ for gi.iterator.NextTimeline() {
+ item := gi.iterator.TimelineValue()
+
+ // if item is not a comment (label, unlabel, rename, close, open ...)
+ if item.Typename != "IssueComment" {
+ if err := gi.ensureTimelineItem(repo, b, item); err != nil {
+ return err
+ }
+ } else { // if item is comment
+
+ // ensure person
+ author, err := gi.ensurePerson(repo, item.IssueComment.Author)
+ if err != nil {
+ return err
+ }
+
+ var target git.Hash
+ target, err = b.ResolveOperationWithMetadata(keyGithubId, parseId(item.IssueComment.Id))
+ if err != nil && err != cache.ErrNoMatchingOp {
+ // real error
+ return err
+ }
+
+ // collect all edits
+ commentEdits := []userContentEdit{}
+ for gi.iterator.NextCommentEdit() {
+ if commentEdit := gi.iterator.CommentEditValue(); commentEdit.Diff != nil {
+ commentEdits = append(commentEdits, commentEdit)
+ }
+ }
+
+ // if no edits are given we create the comment
+ if len(commentEdits) == 0 {
+
+ // if comment doesn't exist
+ if err == cache.ErrNoMatchingOp {
+
+ // add comment operation
+ op, err := b.AddCommentRaw(
+ author,
+ item.IssueComment.CreatedAt.Unix(),
+ cleanupText(string(item.IssueComment.Body)),
+ nil,
+ map[string]string{
+ keyGithubId: parseId(item.IssueComment.Id),
+ },
+ )
+ if err != nil {
+ return err
+ }
+
+ // set hash
+ target, err = op.Hash()
+ if err != nil {
+ return err
+ }
+ }
+ } else {
+ // if we have some edits
+ for _, edit := range item.IssueComment.UserContentEdits.Nodes {
+
+ // create comment when target is an empty string
+ if target == "" {
+ op, err := b.AddCommentRaw(
+ author,
+ item.IssueComment.CreatedAt.Unix(),
+ cleanupText(string(*edit.Diff)),
+ nil,
+ map[string]string{
+ keyGithubId: parseId(item.IssueComment.Id),
+ keyGithubUrl: item.IssueComment.Url.String(),
+ },
+ )
+ if err != nil {
+ return err
+ }
+
+ // set hash
+ target, err = op.Hash()
+ if err != nil {
+ return err
+ }
+ }
+
+ err := gi.ensureCommentEdit(repo, b, target, edit)
+ if err != nil {
+ return err
+ }
+
+ }
+ }
- err = gi.ensureCommentEdit(repo, b, target, edit)
- if err != nil {
- return nil, err
}
+
}
- if !edits.PageInfo.HasNextPage {
- break
+ if err := gi.iterator.Error(); err != nil {
+ fmt.Printf("error importing issue %v\n", issue.Id)
+ return err
}
- variables["issueEditBefore"] = edits.PageInfo.StartCursor
+ // commit bug state
+ err = b.CommitAsNeeded()
+ if err != nil {
+ return err
+ }
}
- // TODO: check + import files
-
- // if we still didn't get a legit edit, create the bug from the issue data
- if b == nil {
- return repo.NewBugRaw(
- author,
- issue.CreatedAt.Unix(),
- // Todo: this might not be the initial title, we need to query the
- // timeline to be sure
- issue.Title,
- cleanupText(string(issue.Body)),
- nil,
- map[string]string{
- keyGithubId: parseId(issue.Id),
- keyGithubUrl: issue.Url.String(),
- },
- )
+ if err := gi.iterator.Error(); err != nil {
+ fmt.Printf("import error: %v\n", err)
}
- return b, nil
+ fmt.Printf("Successfully imported %v issues from Github\n", gi.iterator.Count())
+ return nil
}
-func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.BugCache, cursor githubv4.String, item timelineItem, rootVariables map[string]interface{}) error {
- fmt.Printf("import %s\n", item.Typename)
+func (gi *githubImporter) Import(repo *cache.RepoCache, id string) error {
+ fmt.Println("IMPORT")
+ return nil
+}
+
+func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.BugCache, item timelineItem) error {
+ fmt.Printf("import item: %s\n", item.Typename)
switch item.Typename {
case "IssueComment":
- return gi.ensureComment(repo, b, cursor, item.IssueComment, rootVariables)
+ //return gi.ensureComment(repo, b, cursor, item.IssueComment, rootVariables)
case "LabeledEvent":
id := parseId(item.LabeledEvent.Id)
@@ -411,162 +354,13 @@ func (gi *githubImporter) ensureTimelineItem(repo *cache.RepoCache, b *cache.Bug
return err
default:
- fmt.Println("ignore event ", item.Typename)
+ fmt.Printf("ignore event: %v\n", item.Typename)
}
return nil
}
-func (gi *githubImporter) ensureComment(repo *cache.RepoCache, b *cache.BugCache, cursor githubv4.String, comment issueComment, rootVariables map[string]interface{}) error {
- author, err := gi.ensurePerson(repo, comment.Author)
- if err != nil {
- return err
- }
-
- var target git.Hash
- target, err = b.ResolveOperationWithMetadata(keyGithubId, parseId(comment.Id))
- if err != nil && err != cache.ErrNoMatchingOp {
- // real error
- return err
- }
-
- // if there is no edit, the UserContentEdits given by github is empty. That
- // means that the original message is given by the comment message.
- //
- // if there is edits, the UserContentEdits given by github contains both the
- // original message and the following edits. The comment message give the last
- // version so we don't care about that.
- //
- // the tricky part: for a comment older than the UserContentEdits API, github
- // doesn't have the previous message version anymore and give an edition
- // with .Diff == nil. We have to filter them.
-
- if len(comment.UserContentEdits.Nodes) == 0 {
- if err == cache.ErrNoMatchingOp {
- op, err := b.AddCommentRaw(
- author,
- comment.CreatedAt.Unix(),
- cleanupText(string(comment.Body)),
- nil,
- map[string]string{
- keyGithubId: parseId(comment.Id),
- },
- )
- if err != nil {
- return err
- }
-
- target, err = op.Hash()
- if err != nil {
- return err
- }
- }
-
- return nil
- }
-
- // reverse the order, because github
- reverseEdits(comment.UserContentEdits.Nodes)
-
- for i, edit := range comment.UserContentEdits.Nodes {
- if target != "" && i == 0 {
- // The first edit in the github result is the comment creation itself, we already have that
- continue
- }
-
- if target == "" {
- if edit.Diff == nil {
- // not enough data given by github for old edit, ignore them
- continue
- }
-
- op, err := b.AddCommentRaw(
- author,
- comment.CreatedAt.Unix(),
- cleanupText(string(*edit.Diff)),
- nil,
- map[string]string{
- keyGithubId: parseId(comment.Id),
- keyGithubUrl: comment.Url.String(),
- },
- )
- if err != nil {
- return err
- }
-
- target, err = op.Hash()
- if err != nil {
- return err
- }
- }
-
- err := gi.ensureCommentEdit(repo, b, target, edit)
- if err != nil {
- return err
- }
- }
-
- if !comment.UserContentEdits.PageInfo.HasNextPage {
- return nil
- }
-
- // We have more edit, querying them
-
- q := &commentEditQuery{}
- variables := map[string]interface{}{
- "owner": rootVariables["owner"],
- "name": rootVariables["name"],
- "issueFirst": rootVariables["issueFirst"],
- "issueAfter": rootVariables["issueAfter"],
- "timelineFirst": githubv4.Int(1),
- "timelineAfter": cursor,
- "commentEditLast": githubv4.Int(10),
- "commentEditBefore": comment.UserContentEdits.PageInfo.StartCursor,
- }
-
- for {
- err := gi.client.Query(context.TODO(), &q, variables)
- if err != nil {
- return err
- }
-
- edits := q.Repository.Issues.Nodes[0].Timeline.Nodes[0].IssueComment.UserContentEdits
-
- if len(edits.Nodes) == 0 {
- return nil
- }
-
- for i, edit := range edits.Nodes {
- if i == 0 {
- // The first edit in the github result is the creation itself, we already have that
- continue
- }
-
- err := gi.ensureCommentEdit(repo, b, target, edit)
- if err != nil {
- return err
- }
- }
-
- if !edits.PageInfo.HasNextPage {
- break
- }
-
- variables["commentEditBefore"] = edits.PageInfo.StartCursor
- }
-
- // TODO: check + import files
-
- return nil
-}
-
func (gi *githubImporter) ensureCommentEdit(repo *cache.RepoCache, b *cache.BugCache, target git.Hash, edit userContentEdit) error {
- if edit.Diff == nil {
- // this happen if the event is older than early 2018, Github doesn't have the data before that.
- // Best we can do is to ignore the event.
- return nil
- }
-
_, err := b.ResolveOperationWithMetadata(keyGithubId, parseId(edit.Id))
if err == nil {
// already imported
@@ -670,7 +464,7 @@ func (gi *githubImporter) getGhost(repo *cache.RepoCache) (*cache.IdentityCache,
"login": githubv4.String("ghost"),
}
- err = gi.client.Query(context.TODO(), &q, variables)
+ err = gi.iterator.gc.Query(context.TODO(), &q, variables)
if err != nil {
return nil, err
}