From 07b8edee398163db2e61430414957df73aec7e20 Mon Sep 17 00:00:00 2001 From: "Santiago M. Mola" Date: Fri, 4 Nov 2016 16:12:01 +0100 Subject: add Blobs, Trees and Objects iters. (#114) * Now every object type as an iterator in Repository. * old TreeIter is TreeWalker again, TreeIter now matches the same behaviour as other iterators. --- blobs.go | 115 ++++++++++++++++++++++++++++++++++++++++++++++++++ blobs_test.go | 94 +++++++++++++++++++++++++++++++++++++++++ file.go | 4 +- objects.go | 129 +++++++++++++++++++++++++++++++------------------------- objects_test.go | 79 ++++++++++++++-------------------- repository.go | 31 ++++++++++++++ tree.go | 64 +++++++++++++++++++++++++--- tree_diff.go | 2 +- tree_test.go | 41 ++++++++++++++++-- 9 files changed, 440 insertions(+), 119 deletions(-) create mode 100644 blobs.go create mode 100644 blobs_test.go diff --git a/blobs.go b/blobs.go new file mode 100644 index 0000000..f720c86 --- /dev/null +++ b/blobs.go @@ -0,0 +1,115 @@ +package git + +import ( + "io" + + "gopkg.in/src-d/go-git.v4/core" +) + +// Blob is used to store file data - it is generally a file. +type Blob struct { + Hash core.Hash + Size int64 + + obj core.Object +} + +// ID returns the object ID of the blob. The returned value will always match +// the current value of Blob.Hash. +// +// ID is present to fulfill the Object interface. +func (b *Blob) ID() core.Hash { + return b.Hash +} + +// Type returns the type of object. It always returns core.BlobObject. +// +// Type is present to fulfill the Object interface. +func (b *Blob) Type() core.ObjectType { + return core.BlobObject +} + +// Decode transforms a core.Object into a Blob struct. +func (b *Blob) Decode(o core.Object) error { + if o.Type() != core.BlobObject { + return ErrUnsupportedObject + } + + b.Hash = o.Hash() + b.Size = o.Size() + b.obj = o + + return nil +} + +// Encode transforms a Blob into a core.Object. +func (b *Blob) Encode(o core.Object) error { + w, err := o.Writer() + if err != nil { + return err + } + defer checkClose(w, &err) + r, err := b.Reader() + if err != nil { + return err + } + defer checkClose(r, &err) + _, err = io.Copy(w, r) + o.SetType(core.BlobObject) + return err +} + +// Reader returns a reader allow the access to the content of the blob +func (b *Blob) Reader() (core.ObjectReader, error) { + return b.obj.Reader() +} + +// BlobIter provides an iterator for a set of blobs. +type BlobIter struct { + core.ObjectIter + r *Repository +} + +// NewBlobIter returns a CommitIter for the given repository and underlying +// object iterator. +// +// The returned BlobIter will automatically skip over non-blob objects. +func NewBlobIter(r *Repository, iter core.ObjectIter) *BlobIter { + return &BlobIter{iter, r} +} + +// Next moves the iterator to the next blob and returns a pointer to it. If it +// has reached the end of the set it will return io.EOF. +func (iter *BlobIter) Next() (*Blob, error) { + for { + obj, err := iter.ObjectIter.Next() + if err != nil { + return nil, err + } + + if obj.Type() != core.BlobObject { + continue + } + + blob := &Blob{} + return blob, blob.Decode(obj) + } +} + +// ForEach call the cb function for each blob contained on this iter until +// an error happens or the end of the iter is reached. If ErrStop is sent +// the iteration is stop but no error is returned. The iterator is closed. +func (iter *BlobIter) ForEach(cb func(*Blob) error) error { + return iter.ObjectIter.ForEach(func(obj core.Object) error { + if obj.Type() != core.BlobObject { + return nil + } + + blob := &Blob{} + if err := blob.Decode(obj); err != nil { + return err + } + + return cb(blob) + }) +} diff --git a/blobs_test.go b/blobs_test.go new file mode 100644 index 0000000..b667fa4 --- /dev/null +++ b/blobs_test.go @@ -0,0 +1,94 @@ +package git + +import ( + "io" + "io/ioutil" + + "gopkg.in/src-d/go-git.v4/core" + + . "gopkg.in/check.v1" +) + +type BlobsSuite struct { + BaseSuite +} + +var _ = Suite(&BlobsSuite{}) + +func (s *BlobsSuite) TestBlobHash(c *C) { + o := &core.MemoryObject{} + o.SetType(core.BlobObject) + o.SetSize(3) + + writer, err := o.Writer() + c.Assert(err, IsNil) + defer func() { c.Assert(writer.Close(), IsNil) }() + + writer.Write([]byte{'F', 'O', 'O'}) + + blob := &Blob{} + c.Assert(blob.Decode(o), IsNil) + + c.Assert(blob.Size, Equals, int64(3)) + c.Assert(blob.Hash.String(), Equals, "d96c7efbfec2814ae0301ad054dc8d9fc416c9b5") + + reader, err := blob.Reader() + c.Assert(err, IsNil) + defer func() { c.Assert(reader.Close(), IsNil) }() + + data, err := ioutil.ReadAll(reader) + c.Assert(err, IsNil) + c.Assert(string(data), Equals, "FOO") +} + +func (s *BlobsSuite) TestBlobDecodeEncodeIdempotent(c *C) { + var objects []*core.MemoryObject + for _, str := range []string{"foo", "foo\n"} { + obj := &core.MemoryObject{} + obj.Write([]byte(str)) + obj.SetType(core.BlobObject) + obj.Hash() + objects = append(objects, obj) + } + for _, object := range objects { + blob := &Blob{} + err := blob.Decode(object) + c.Assert(err, IsNil) + newObject := &core.MemoryObject{} + err = blob.Encode(newObject) + c.Assert(err, IsNil) + newObject.Hash() // Ensure Hash is pre-computed before deep comparison + c.Assert(newObject, DeepEquals, object) + } +} + +func (s *BlobsSuite) TestBlobIter(c *C) { + iter, err := s.Repository.Blobs() + c.Assert(err, IsNil) + + blobs := []*Blob{} + iter.ForEach(func(b *Blob) error { + blobs = append(blobs, b) + return nil + }) + + c.Assert(len(blobs) > 0, Equals, true) + iter.Close() + + iter, err = s.Repository.Blobs() + c.Assert(err, IsNil) + + i := 0 + for { + b, err := iter.Next() + if err == io.EOF { + break + } + + c.Assert(err, IsNil) + c.Assert(b, DeepEquals, blobs[i]) + i += 1 + } + + iter.Close() +} diff --git a/file.go b/file.go index 6ba9d0b..3278a8c 100644 --- a/file.go +++ b/file.go @@ -55,11 +55,11 @@ func (f *File) Lines() ([]string, error) { type FileIter struct { r *Repository - w TreeIter + w TreeWalker } func NewFileIter(r *Repository, t *Tree) *FileIter { - return &FileIter{r: r, w: *NewTreeIter(r, t, true)} + return &FileIter{r: r, w: *NewTreeWalker(r, t, true)} } func (iter *FileIter) Next() (*File, error) { diff --git a/objects.go b/objects.go index fd79deb..9a0007d 100644 --- a/objects.go +++ b/objects.go @@ -43,64 +43,6 @@ type Object interface { Encode(core.Object) error } -// Blob is used to store file data - it is generally a file. -type Blob struct { - Hash core.Hash - Size int64 - - obj core.Object -} - -// ID returns the object ID of the blob. The returned value will always match -// the current value of Blob.Hash. -// -// ID is present to fulfill the Object interface. -func (b *Blob) ID() core.Hash { - return b.Hash -} - -// Type returns the type of object. It always returns core.BlobObject. -// -// Type is present to fulfill the Object interface. -func (b *Blob) Type() core.ObjectType { - return core.BlobObject -} - -// Decode transforms a core.Object into a Blob struct. -func (b *Blob) Decode(o core.Object) error { - if o.Type() != core.BlobObject { - return ErrUnsupportedObject - } - - b.Hash = o.Hash() - b.Size = o.Size() - b.obj = o - - return nil -} - -// Encode transforms a Blob into a core.Object. -func (b *Blob) Encode(o core.Object) error { - w, err := o.Writer() - if err != nil { - return err - } - defer checkClose(w, &err) - r, err := b.Reader() - if err != nil { - return err - } - defer checkClose(r, &err) - _, err = io.Copy(w, r) - o.SetType(core.BlobObject) - return err -} - -// Reader returns a reader allow the access to the content of the blob -func (b *Blob) Reader() (core.ObjectReader, error) { - return b.obj.Reader() -} - // Signature represents an action signed by a person type Signature struct { Name string @@ -171,3 +113,74 @@ func (s *Signature) encodeTimeAndTimeZone(w io.Writer) error { func (s *Signature) String() string { return fmt.Sprintf("%s <%s>", s.Name, s.Email) } + +// ObjectIter provides an iterator for a set of objects. +type ObjectIter struct { + core.ObjectIter + r *Repository +} + +// NewObjectIter returns a ObjectIter for the given repository and underlying +// object iterator. +func NewObjectIter(r *Repository, iter core.ObjectIter) *ObjectIter { + return &ObjectIter{iter, r} +} + +// Next moves the iterator to the next object and returns a pointer to it. If it +// has reached the end of the set it will return io.EOF. +func (iter *ObjectIter) Next() (Object, error) { + for { + obj, err := iter.ObjectIter.Next() + if err != nil { + return nil, err + } + + o, err := iter.toObject(obj) + if err == core.ErrInvalidType { + continue + } + + if err != nil { + return nil, err + } + + return o, nil + } +} + +// ForEach call the cb function for each object contained on this iter until +// an error happens or the end of the iter is reached. If ErrStop is sent +// the iteration is stop but no error is returned. The iterator is closed. +func (iter *ObjectIter) ForEach(cb func(Object) error) error { + return iter.ObjectIter.ForEach(func(obj core.Object) error { + o, err := iter.toObject(obj) + if err == core.ErrInvalidType { + return nil + } + + if err != nil { + return err + } + + return cb(o) + }) +} + +func (iter *ObjectIter) toObject(obj core.Object) (Object, error) { + switch obj.Type() { + case core.BlobObject: + blob := &Blob{} + return blob, blob.Decode(obj) + case core.TreeObject: + tree := &Tree{r: iter.r} + return tree, tree.Decode(obj) + case core.CommitObject: + commit := &Commit{} + return commit, commit.Decode(obj) + case core.TagObject: + tag := &Tag{} + return tag, tag.Decode(obj) + default: + return nil, core.ErrInvalidType + } +} diff --git a/objects_test.go b/objects_test.go index ab4aa30..ee77869 100644 --- a/objects_test.go +++ b/objects_test.go @@ -7,6 +7,7 @@ import ( "gopkg.in/src-d/go-git.v4/core" . "gopkg.in/check.v1" + "io" ) type ObjectsSuite struct { @@ -73,53 +74,6 @@ func (s *ObjectsSuite) TestParseTree(c *C) { c.Assert(count, Equals, 9) } -func (s *ObjectsSuite) TestBlobHash(c *C) { - o := &core.MemoryObject{} - o.SetType(core.BlobObject) - o.SetSize(3) - - writer, err := o.Writer() - c.Assert(err, IsNil) - defer func() { c.Assert(writer.Close(), IsNil) }() - - writer.Write([]byte{'F', 'O', 'O'}) - - blob := &Blob{} - c.Assert(blob.Decode(o), IsNil) - - c.Assert(blob.Size, Equals, int64(3)) - c.Assert(blob.Hash.String(), Equals, "d96c7efbfec2814ae0301ad054dc8d9fc416c9b5") - - reader, err := blob.Reader() - c.Assert(err, IsNil) - defer func() { c.Assert(reader.Close(), IsNil) }() - - data, err := ioutil.ReadAll(reader) - c.Assert(err, IsNil) - c.Assert(string(data), Equals, "FOO") -} - -func (s *ObjectsSuite) TestBlobDecodeEncodeIdempotent(c *C) { - var objects []*core.MemoryObject - for _, str := range []string{"foo", "foo\n"} { - obj := &core.MemoryObject{} - obj.Write([]byte(str)) - obj.SetType(core.BlobObject) - obj.Hash() - objects = append(objects, obj) - } - for _, object := range objects { - blob := &Blob{} - err := blob.Decode(object) - c.Assert(err, IsNil) - newObject := &core.MemoryObject{} - err = blob.Encode(newObject) - c.Assert(err, IsNil) - newObject.Hash() // Ensure Hash is pre-computed before deep comparison - c.Assert(newObject, DeepEquals, object) - } -} - func (s *ObjectsSuite) TestParseSignature(c *C) { cases := map[string]Signature{ `Foo Bar 1257894000 +0100`: { @@ -169,6 +123,37 @@ func (s *ObjectsSuite) TestParseSignature(c *C) { } } +func (s *ObjectsSuite) TestObjectIter(c *C) { + iter, err := s.Repository.Objects() + c.Assert(err, IsNil) + + objects := []Object{} + iter.ForEach(func(o Object) error { + objects = append(objects, o) + return nil + }) + + c.Assert(len(objects) > 0, Equals, true) + iter.Close() + + iter, err = s.Repository.Objects() + c.Assert(err, IsNil) + + i := 0 + for { + o, err := iter.Next() + if err == io.EOF { + break + } + + c.Assert(err, IsNil) + c.Assert(o, DeepEquals, objects[i]) + i += 1 + } + + iter.Close() +} + func MustParseTime(value string) time.Time { t, _ := time.Parse("2006-01-02 15:04:05 -0700", value) return t diff --git a/repository.go b/repository.go index cd94159..553703a 100644 --- a/repository.go +++ b/repository.go @@ -259,6 +259,16 @@ func (r *Repository) Tree(h core.Hash) (*Tree, error) { return tree.(*Tree), nil } +// Trees decodes the objects into trees +func (r *Repository) Trees() (*TreeIter, error) { + iter, err := r.s.ObjectStorage().Iter(core.TreeObject) + if err != nil { + return nil, err + } + + return NewTreeIter(r, iter), nil +} + // Blob returns the blob with the given hash func (r *Repository) Blob(h core.Hash) (*Blob, error) { blob, err := r.Object(core.BlobObject, h) @@ -269,6 +279,16 @@ func (r *Repository) Blob(h core.Hash) (*Blob, error) { return blob.(*Blob), nil } +// Blobs decodes the objects into blobs +func (r *Repository) Blobs() (*BlobIter, error) { + iter, err := r.s.ObjectStorage().Iter(core.BlobObject) + if err != nil { + return nil, err + } + + return NewBlobIter(r, iter), nil +} + // Tag returns a tag with the given hash. func (r *Repository) Tag(h core.Hash) (*Tag, error) { tag, err := r.Object(core.TagObject, h) @@ -318,6 +338,17 @@ func (r *Repository) Object(t core.ObjectType, h core.Hash) (Object, error) { } } +// Objects returns an ObjectIter that can step through all of the annotated tags +// in the repository. +func (r *Repository) Objects() (*ObjectIter, error) { + iter, err := r.s.ObjectStorage().Iter(core.AnyObject) + if err != nil { + return nil, err + } + + return NewObjectIter(r, iter), nil +} + // Head returns the reference where HEAD is pointing func (r *Repository) Head() (*core.Reference, error) { return core.ResolveReference(r.s.ReferenceStorage(), core.HEAD) diff --git a/tree.go b/tree.go index baec587..9c97872 100644 --- a/tree.go +++ b/tree.go @@ -257,7 +257,7 @@ func (iter *treeEntryIter) Next() (TreeEntry, error) { } // TreeWalker provides a means of walking through all of the entries in a Tree. -type TreeIter struct { +type TreeWalker struct { stack []treeEntryIter base string recursive bool @@ -266,15 +266,15 @@ type TreeIter struct { t *Tree } -// NewTreeIter returns a new TreeIter for the given repository and tree. +// NewTreeWalker returns a new TreeWalker for the given repository and tree. // // It is the caller's responsibility to call Close() when finished with the // tree walker. -func NewTreeIter(r *Repository, t *Tree, recursive bool) *TreeIter { +func NewTreeWalker(r *Repository, t *Tree, recursive bool) *TreeWalker { stack := make([]treeEntryIter, 0, startingStackSize) stack = append(stack, treeEntryIter{t, 0}) - return &TreeIter{ + return &TreeWalker{ stack: stack, recursive: recursive, @@ -290,7 +290,7 @@ func NewTreeIter(r *Repository, t *Tree, recursive bool) *TreeIter { // In the current implementation any objects which cannot be found in the // underlying repository will be skipped automatically. It is possible that this // may change in future versions. -func (w *TreeIter) Next() (name string, entry TreeEntry, err error) { +func (w *TreeWalker) Next() (name string, entry TreeEntry, err error) { var obj Object for { current := len(w.stack) - 1 @@ -351,7 +351,7 @@ func (w *TreeIter) Next() (name string, entry TreeEntry, err error) { } // Tree returns the tree that the tree walker most recently operated on. -func (w *TreeIter) Tree() *Tree { +func (w *TreeWalker) Tree() *Tree { current := len(w.stack) - 1 if w.stack[current].pos == 0 { current-- @@ -365,6 +365,56 @@ func (w *TreeIter) Tree() *Tree { } // Close releases any resources used by the TreeWalker. -func (w *TreeIter) Close() { +func (w *TreeWalker) Close() { w.stack = nil } + +// TreeIter provides an iterator for a set of trees. +type TreeIter struct { + core.ObjectIter + r *Repository +} + +// NewTreeIter returns a TreeIter for the given repository and underlying +// object iterator. +// +// The returned TreeIter will automatically skip over non-tree objects. +func NewTreeIter(r *Repository, iter core.ObjectIter) *TreeIter { + return &TreeIter{iter, r} +} + +// Next moves the iterator to the next tree and returns a pointer to it. If it +// has reached the end of the set it will return io.EOF. +func (iter *TreeIter) Next() (*Tree, error) { + for { + obj, err := iter.ObjectIter.Next() + if err != nil { + return nil, err + } + + if obj.Type() != core.TreeObject { + continue + } + + tree := &Tree{r: iter.r} + return tree, tree.Decode(obj) + } +} + +// ForEach call the cb function for each tree contained on this iter until +// an error happens or the end of the iter is reached. If ErrStop is sent +// the iteration is stop but no error is returned. The iterator is closed. +func (iter *TreeIter) ForEach(cb func(*Tree) error) error { + return iter.ObjectIter.ForEach(func(obj core.Object) error { + if obj.Type() != core.TreeObject { + return nil + } + + tree := &Tree{r: iter.r} + if err := tree.Decode(obj); err != nil { + return err + } + + return cb(tree) + }) +} diff --git a/tree_diff.go b/tree_diff.go index e142bcd..1f5f076 100644 --- a/tree_diff.go +++ b/tree_diff.go @@ -140,7 +140,7 @@ func newWithEmpty(a, b *Tree) (Changes, error) { tree = a } - w := NewTreeIter(tree.r, tree, true) + w := NewTreeWalker(tree.r, tree, true) defer w.Close() for { diff --git a/tree_test.go b/tree_test.go index 18d6b14..86b4851 100644 --- a/tree_test.go +++ b/tree_test.go @@ -146,7 +146,40 @@ func (s *TreeSuite) TestTreeDecodeEncodeIdempotent(c *C) { } } -func (s *TreeSuite) TestTreeIterNext(c *C) { +func (s *TreeSuite) TestTreeIter(c *C) { + iter, err := s.Repository.Trees() + c.Assert(err, IsNil) + + trees := []*Tree{} + iter.ForEach(func(t *Tree) error { + t.r = nil + trees = append(trees, t) + return nil + }) + + c.Assert(len(trees) > 0, Equals, true) + iter.Close() + + iter, err = s.Repository.Trees() + c.Assert(err, IsNil) + + i := 0 + for { + t, err := iter.Next() + if err == io.EOF { + break + } + + t.r = nil + c.Assert(err, IsNil) + c.Assert(t, DeepEquals, trees[i]) + i += 1 + } + + iter.Close() +} + +func (s *TreeSuite) TestTreeWalkerNext(c *C) { r := s.Repository commit, err := r.Commit(core.NewHash("6ecf0ef2c2dffb796033e5a02219af86ec6584e5")) c.Assert(err, IsNil) @@ -154,7 +187,7 @@ func (s *TreeSuite) TestTreeIterNext(c *C) { tree, err := commit.Tree() c.Assert(err, IsNil) - walker := NewTreeIter(r, tree, true) + walker := NewTreeWalker(r, tree, true) for _, e := range treeWalkerExpects { name, entry, err := walker.Next() if err == io.EOF { @@ -171,7 +204,7 @@ func (s *TreeSuite) TestTreeIterNext(c *C) { } } -func (s *TreeSuite) TestTreeIterNextNonRecursive(c *C) { +func (s *TreeSuite) TestTreeWalkerNextNonRecursive(c *C) { r := s.Repository commit, err := r.Commit(core.NewHash("6ecf0ef2c2dffb796033e5a02219af86ec6584e5")) c.Assert(err, IsNil) @@ -180,7 +213,7 @@ func (s *TreeSuite) TestTreeIterNextNonRecursive(c *C) { c.Assert(err, IsNil) var count int - walker := NewTreeIter(r, tree, false) + walker := NewTreeWalker(r, tree, false) for { name, entry, err := walker.Next() if err == io.EOF { -- cgit