From ec65d90feaf3172a8bd1bf51bb85e7bdaaf28a54 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 23 Apr 2019 19:11:43 +0200 Subject: plumbing: object, add APIs for traversing over commit graphs Signed-off-by: Filip Navara --- plumbing/object/commitnode.go | 310 +++++++++++++++++++++++++++++ plumbing/object/commitnode_test.go | 81 ++++++++ plumbing/object/commitnode_walker_ctime.go | 108 ++++++++++ 3 files changed, 499 insertions(+) create mode 100644 plumbing/object/commitnode.go create mode 100644 plumbing/object/commitnode_test.go create mode 100644 plumbing/object/commitnode_walker_ctime.go (limited to 'plumbing') diff --git a/plumbing/object/commitnode.go b/plumbing/object/commitnode.go new file mode 100644 index 0000000..a613eb4 --- /dev/null +++ b/plumbing/object/commitnode.go @@ -0,0 +1,310 @@ +package object + +import ( + "fmt" + "io" + "time" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" + "gopkg.in/src-d/go-git.v4/plumbing/storer" +) + +// CommitNode is generic interface encapsulating either Commit object or +// graphCommitNode object +type CommitNode interface { + ID() plumbing.Hash + Tree() (*Tree, error) + CommitTime() time.Time +} + +// CommitNodeIndex is generic interface encapsulating an index of CommitNode objects +// and accessor methods for walking it as a directed graph +type CommitNodeIndex interface { + NumParents(node CommitNode) int + ParentNodes(node CommitNode) CommitNodeIter + ParentNode(node CommitNode, i int) (CommitNode, error) + ParentHashes(node CommitNode) []plumbing.Hash + + Get(hash plumbing.Hash) (CommitNode, error) + + // Commit returns the full commit object from the node + Commit(node CommitNode) (*Commit, error) +} + +// CommitNodeIter is a generic closable interface for iterating over commit nodes. +type CommitNodeIter interface { + Next() (CommitNode, error) + ForEach(func(CommitNode) error) error + Close() +} + +// graphCommitNode is a reduced representation of Commit as presented in the commit +// graph file (commitgraph.Node). It is merely useful as an optimization for walking +// the commit graphs. +// +// graphCommitNode implements the CommitNode interface. +type graphCommitNode struct { + // Hash for the Commit object + hash plumbing.Hash + // Index of the node in the commit graph file + index int + + node *commitgraph.Node + gci *graphCommitNodeIndex +} + +// graphCommitNodeIndex is an index that can load CommitNode objects from both the commit +// graph files and the object store. +// +// graphCommitNodeIndex implements the CommitNodeIndex interface +type graphCommitNodeIndex struct { + commitGraph commitgraph.Index + s storer.EncodedObjectStorer +} + +// objectCommitNode is a representation of Commit as presented in the GIT object format. +// +// objectCommitNode implements the CommitNode interface. +type objectCommitNode struct { + commit *Commit +} + +// objectCommitNodeIndex is an index that can load CommitNode objects only from the +// object store. +// +// objectCommitNodeIndex implements the CommitNodeIndex interface +type objectCommitNodeIndex struct { + s storer.EncodedObjectStorer +} + +// ID returns the Commit object id referenced by the commit graph node. +func (c *graphCommitNode) ID() plumbing.Hash { + return c.hash +} + +// Tree returns the Tree referenced by the commit graph node. +func (c *graphCommitNode) Tree() (*Tree, error) { + return GetTree(c.gci.s, c.node.TreeHash) +} + +// CommitTime returns the Commiter.When time of the Commit referenced by the commit graph node. +func (c *graphCommitNode) CommitTime() time.Time { + return c.node.When +} + +func (c *graphCommitNode) String() string { + return fmt.Sprintf( + "%s %s\nDate: %s", + plumbing.CommitObject, c.ID(), + c.CommitTime().Format(DateFormat), + ) +} + +func NewGraphCommitNodeIndex(commitGraph commitgraph.Index, s storer.EncodedObjectStorer) CommitNodeIndex { + return &graphCommitNodeIndex{commitGraph, s} +} + +// NumParents returns the number of parents in a commit. +func (gci *graphCommitNodeIndex) NumParents(node CommitNode) int { + if cgn, ok := node.(*graphCommitNode); ok { + return len(cgn.node.ParentIndexes) + } + co := node.(*objectCommitNode) + return co.commit.NumParents() +} + +// ParentNodes return a CommitNodeIter for parents of specified node. +func (gci *graphCommitNodeIndex) ParentNodes(node CommitNode) CommitNodeIter { + return newParentgraphCommitNodeIter(gci, node) +} + +// ParentNode returns the ith parent of a commit. +func (gci *graphCommitNodeIndex) ParentNode(node CommitNode, i int) (CommitNode, error) { + if cgn, ok := node.(*graphCommitNode); ok { + if len(cgn.node.ParentIndexes) == 0 || i >= len(cgn.node.ParentIndexes) { + return nil, ErrParentNotFound + } + + parent, err := gci.commitGraph.GetNodeByIndex(cgn.node.ParentIndexes[i]) + if err != nil { + return nil, err + } + + return &graphCommitNode{ + hash: cgn.node.ParentHashes[i], + index: cgn.node.ParentIndexes[i], + node: parent, + gci: gci, + }, nil + } + + co := node.(*objectCommitNode) + if len(co.commit.ParentHashes) == 0 || i >= len(co.commit.ParentHashes) { + return nil, ErrParentNotFound + } + + parentHash := co.commit.ParentHashes[i] + return gci.Get(parentHash) +} + +// ParentHashes returns hashes of the parent commits for a specified node +func (gci *graphCommitNodeIndex) ParentHashes(node CommitNode) []plumbing.Hash { + if cgn, ok := node.(*graphCommitNode); ok { + return cgn.node.ParentHashes + } + co := node.(*objectCommitNode) + return co.commit.ParentHashes +} + +// NodeFromHash looks up a commit node by it's object hash +func (gci *graphCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { + // Check the commit graph first + parentIndex, err := gci.commitGraph.GetIndexByHash(hash) + if err == nil { + parent, err := gci.commitGraph.GetNodeByIndex(parentIndex) + if err != nil { + return nil, err + } + + return &graphCommitNode{ + hash: hash, + index: parentIndex, + node: parent, + gci: gci, + }, nil + } + + // Fallback to loading full commit object + commit, err := GetCommit(gci.s, hash) + if err != nil { + return nil, err + } + + return &objectCommitNode{commit: commit}, nil +} + +// Commit returns the full Commit object representing the commit graph node. +func (gci *graphCommitNodeIndex) Commit(node CommitNode) (*Commit, error) { + if cgn, ok := node.(*graphCommitNode); ok { + return GetCommit(gci.s, cgn.ID()) + } + co := node.(*objectCommitNode) + return co.commit, nil +} + +// CommitTime returns the time when the commit was performed. +// +// CommitTime is present to fulfill the CommitNode interface. +func (c *objectCommitNode) CommitTime() time.Time { + return c.commit.Committer.When +} + +// ID returns the Commit object id referenced by the node. +func (c *objectCommitNode) ID() plumbing.Hash { + return c.commit.ID() +} + +// Tree returns the Tree referenced by the node. +func (c *objectCommitNode) Tree() (*Tree, error) { + return c.commit.Tree() +} + +func NewObjectCommitNodeIndex(s storer.EncodedObjectStorer) CommitNodeIndex { + return &objectCommitNodeIndex{s} +} + +// NumParents returns the number of parents in a commit. +func (oci *objectCommitNodeIndex) NumParents(node CommitNode) int { + co := node.(*objectCommitNode) + return co.commit.NumParents() +} + +// ParentNodes return a CommitNodeIter for parents of specified node. +func (oci *objectCommitNodeIndex) ParentNodes(node CommitNode) CommitNodeIter { + return newParentgraphCommitNodeIter(oci, node) +} + +// ParentNode returns the ith parent of a commit. +func (oci *objectCommitNodeIndex) ParentNode(node CommitNode, i int) (CommitNode, error) { + co := node.(*objectCommitNode) + parent, err := co.commit.Parent(i) + if err != nil { + return nil, err + } + return &objectCommitNode{commit: parent}, nil +} + +// ParentHashes returns hashes of the parent commits for a specified node +func (oci *objectCommitNodeIndex) ParentHashes(node CommitNode) []plumbing.Hash { + co := node.(*objectCommitNode) + return co.commit.ParentHashes +} + +// NodeFromHash looks up a commit node by it's object hash +func (oci *objectCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { + commit, err := GetCommit(oci.s, hash) + if err != nil { + return nil, err + } + + return &objectCommitNode{commit: commit}, nil +} + +// Commit returns the full Commit object representing the commit graph node. +func (oci *objectCommitNodeIndex) Commit(node CommitNode) (*Commit, error) { + co := node.(*objectCommitNode) + return co.commit, nil +} + +// parentCommitNodeIter provides an iterator for parent commits from associated CommitNodeIndex. +type parentCommitNodeIter struct { + gci CommitNodeIndex + node CommitNode + i int +} + +func newParentgraphCommitNodeIter(gci CommitNodeIndex, node CommitNode) CommitNodeIter { + return &parentCommitNodeIter{gci, node, 0} +} + +// Next moves the iterator to the next commit and returns a pointer to it. If +// there are no more commits, it returns io.EOF. +func (iter *parentCommitNodeIter) Next() (CommitNode, error) { + obj, err := iter.gci.ParentNode(iter.node, iter.i) + if err == ErrParentNotFound { + return nil, io.EOF + } + if err == nil { + iter.i++ + } + + return obj, err +} + +// ForEach call the cb function for each commit contained on this iter until +// an error appends or the end of the iter is reached. If ErrStop is sent +// the iteration is stopped but no error is returned. The iterator is closed. +func (iter *parentCommitNodeIter) ForEach(cb func(CommitNode) error) error { + for { + obj, err := iter.Next() + if err != nil { + if err == io.EOF { + return nil + } + + return err + } + + if err := cb(obj); err != nil { + if err == storer.ErrStop { + return nil + } + + return err + } + } +} + +func (iter *parentCommitNodeIter) Close() { +} diff --git a/plumbing/object/commitnode_test.go b/plumbing/object/commitnode_test.go new file mode 100644 index 0000000..8f59665 --- /dev/null +++ b/plumbing/object/commitnode_test.go @@ -0,0 +1,81 @@ +package object + +import ( + "path" + + "golang.org/x/exp/mmap" + . "gopkg.in/check.v1" + "gopkg.in/src-d/go-git-fixtures.v3" + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" + "gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" + "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" + "gopkg.in/src-d/go-git.v4/storage/filesystem" +) + +type CommitNodeSuite struct { + fixtures.Suite +} + +var _ = Suite(&CommitNodeSuite{}) + +func testWalker(c *C, nodeIndex CommitNodeIndex) { + head, err := nodeIndex.Get(plumbing.NewHash("b9d69064b190e7aedccf84731ca1d917871f8a1c")) + c.Assert(err, IsNil) + + iter := NewCommitNodeIterCTime( + head, + nodeIndex, + nil, + nil, + ) + + var commits []CommitNode + iter.ForEach(func(c CommitNode) error { + commits = append(commits, c) + return nil + }) + + c.Assert(commits, HasLen, 9) + + expected := []string{ + "b9d69064b190e7aedccf84731ca1d917871f8a1c", + "6f6c5d2be7852c782be1dd13e36496dd7ad39560", + "a45273fe2d63300e1962a9e26a6b15c276cd7082", + "c0edf780dd0da6a65a7a49a86032fcf8a0c2d467", + "bb13916df33ed23004c3ce9ed3b8487528e655c1", + "03d2c021ff68954cf3ef0a36825e194a4b98f981", + "ce275064ad67d51e99f026084e20827901a8361c", + "e713b52d7e13807e87a002e812041f248db3f643", + "347c91919944a68e9413581a1bc15519550a3afe", + } + for i, commit := range commits { + c.Assert(commit.ID().String(), Equals, expected[i]) + } +} + +func (s *CommitNodeSuite) TestWalkObject(c *C) { + f := fixtures.ByTag("commit-graph").One() + storer := filesystem.NewStorage(f.DotGit(), cache.NewObjectLRUDefault()) + p := f.Packfile() + defer p.Close() + err := packfile.UpdateObjectStorage(storer, p) + c.Assert(err, IsNil) + + nodeIndex := NewObjectCommitNodeIndex(storer) + testWalker(c, nodeIndex) +} + +func (s *CommitNodeSuite) TestWalkCommitGraph(c *C) { + f := fixtures.ByTag("commit-graph").One() + dotgit := f.DotGit() + storer := filesystem.NewStorage(dotgit, cache.NewObjectLRUDefault()) + reader, err := mmap.Open(path.Join(dotgit.Root(), "objects", "info", "commit-graph")) + c.Assert(err, IsNil) + defer reader.Close() + index, err := commitgraph.OpenFileIndex(reader) + c.Assert(err, IsNil) + + nodeIndex := NewGraphCommitNodeIndex(index, storer) + testWalker(c, nodeIndex) +} diff --git a/plumbing/object/commitnode_walker_ctime.go b/plumbing/object/commitnode_walker_ctime.go new file mode 100644 index 0000000..86b6c57 --- /dev/null +++ b/plumbing/object/commitnode_walker_ctime.go @@ -0,0 +1,108 @@ +package object + +import ( + "io" + + "github.com/emirpasic/gods/trees/binaryheap" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/storer" +) + +type commitNodeIteratorByCTime struct { + heap *binaryheap.Heap + seenExternal map[plumbing.Hash]bool + seen map[plumbing.Hash]bool + nodeIndex CommitNodeIndex +} + +// NewCommitNodeIterCTime returns a CommitNodeIter that walks the commit history, +// starting at the given commit and visiting its parents while preserving Committer Time order. +// this appears to be the closest order to `git log` +// The given callback will be called for each visited commit. Each commit will +// be visited only once. If the callback returns an error, walking will stop +// and will return the error. Other errors might be returned if the history +// cannot be traversed (e.g. missing objects). Ignore allows to skip some +// commits from being iterated. +func NewCommitNodeIterCTime( + c CommitNode, + nodeIndex CommitNodeIndex, + seenExternal map[plumbing.Hash]bool, + ignore []plumbing.Hash, +) CommitNodeIter { + seen := make(map[plumbing.Hash]bool) + for _, h := range ignore { + seen[h] = true + } + + heap := binaryheap.NewWith(func(a, b interface{}) int { + if a.(CommitNode).CommitTime().Before(b.(CommitNode).CommitTime()) { + return 1 + } + return -1 + }) + + heap.Push(c) + + return &commitNodeIteratorByCTime{ + heap: heap, + seenExternal: seenExternal, + seen: seen, + nodeIndex: nodeIndex, + } +} + +func (w *commitNodeIteratorByCTime) Next() (CommitNode, error) { + var c CommitNode + for { + cIn, ok := w.heap.Pop() + if !ok { + return nil, io.EOF + } + c = cIn.(CommitNode) + cID := c.ID() + + if w.seen[cID] || w.seenExternal[cID] { + continue + } + + w.seen[cID] = true + + for i, h := range w.nodeIndex.ParentHashes(c) { + if w.seen[h] || w.seenExternal[h] { + continue + } + pc, err := w.nodeIndex.ParentNode(c, i) + if err != nil { + return nil, err + } + w.heap.Push(pc) + } + + return c, nil + } +} + +func (w *commitNodeIteratorByCTime) ForEach(cb func(CommitNode) error) error { + for { + c, err := w.Next() + if err == io.EOF { + break + } + if err != nil { + return err + } + + err = cb(c) + if err == storer.ErrStop { + break + } + if err != nil { + return err + } + } + + return nil +} + +func (w *commitNodeIteratorByCTime) Close() {} -- cgit