From 689e334b51565dda54fcd44b2bf14da99eed61bb Mon Sep 17 00:00:00 2001 From: David Symonds Date: Wed, 30 May 2018 10:34:28 +1000 Subject: idxfile: optimise allocations in readObjectNames This makes all the required Entry allocations in one go, instead of huge amounts of small individual allocations. Signed-off-by: David Symonds --- plumbing/format/idxfile/decoder.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'plumbing') diff --git a/plumbing/format/idxfile/decoder.go b/plumbing/format/idxfile/decoder.go index f361213..45afb1e 100644 --- a/plumbing/format/idxfile/decoder.go +++ b/plumbing/format/idxfile/decoder.go @@ -6,7 +6,6 @@ import ( "errors" "io" - "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/utils/binary" ) @@ -98,13 +97,14 @@ func readFanout(idx *Idxfile, r io.Reader) error { func readObjectNames(idx *Idxfile, r io.Reader) error { c := int(idx.ObjectCount) + new := make([]Entry, c) for i := 0; i < c; i++ { - var ref plumbing.Hash - if _, err := io.ReadFull(r, ref[:]); err != nil { + e := &new[i] + if _, err := io.ReadFull(r, e.Hash[:]); err != nil { return err } - idx.Entries = append(idx.Entries, &Entry{Hash: ref}) + idx.Entries = append(idx.Entries, e) } return nil -- cgit From cf532f99e3e7632bc1d813245a4c79ae38b4d320 Mon Sep 17 00:00:00 2001 From: David Symonds Date: Wed, 30 May 2018 11:06:44 +1000 Subject: packfile: improve Index memory representation to be more compact Instead of using a map for offset indexing, use a sorted slice. Binary searching is fast, and a slice is much more compact. This has a negligible hit on speed, but has a significant impact on memory usage, especially for larger repos. benchmark old ns/op new ns/op delta BenchmarkIndexConstruction-12 15506506 14056098 -9.35% benchmark old allocs new allocs delta BenchmarkIndexConstruction-12 60764 60385 -0.62% benchmark old bytes new bytes delta BenchmarkIndexConstruction-12 4318145 3913169 -9.38% Signed-off-by: David Symonds --- plumbing/format/packfile/index.go | 53 +++++++++++++++++++++++++++------- plumbing/format/packfile/index_test.go | 37 +++++++++++++++--------- 2 files changed, 67 insertions(+), 23 deletions(-) (limited to 'plumbing') diff --git a/plumbing/format/packfile/index.go b/plumbing/format/packfile/index.go index 2c5f98f..7d8f2ad 100644 --- a/plumbing/format/packfile/index.go +++ b/plumbing/format/packfile/index.go @@ -1,6 +1,8 @@ package packfile import ( + "sort" + "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" ) @@ -10,7 +12,7 @@ import ( // or to store them. type Index struct { byHash map[plumbing.Hash]*idxfile.Entry - byOffset map[uint64]*idxfile.Entry + byOffset []*idxfile.Entry // sorted by their offset } // NewIndex creates a new empty index with the given size. Size is a hint and @@ -19,7 +21,7 @@ type Index struct { func NewIndex(size int) *Index { return &Index{ byHash: make(map[plumbing.Hash]*idxfile.Entry, size), - byOffset: make(map[uint64]*idxfile.Entry, size), + byOffset: make([]*idxfile.Entry, 0, size), } } @@ -27,28 +29,54 @@ func NewIndex(size int) *Index { func NewIndexFromIdxFile(idxf *idxfile.Idxfile) *Index { idx := &Index{ byHash: make(map[plumbing.Hash]*idxfile.Entry, idxf.ObjectCount), - byOffset: make(map[uint64]*idxfile.Entry, idxf.ObjectCount), + byOffset: make([]*idxfile.Entry, 0, idxf.ObjectCount), } for _, e := range idxf.Entries { - idx.add(e) + idx.addUnsorted(e) } + sort.Sort(orderByOffset(idx.byOffset)) return idx } +// orderByOffset is a sort.Interface adapter that arranges +// a slice of entries by their offset. +type orderByOffset []*idxfile.Entry + +func (o orderByOffset) Len() int { return len(o) } +func (o orderByOffset) Less(i, j int) bool { return o[i].Offset < o[j].Offset } +func (o orderByOffset) Swap(i, j int) { o[i], o[j] = o[j], o[i] } + // Add adds a new Entry with the given values to the index. func (idx *Index) Add(h plumbing.Hash, offset uint64, crc32 uint32) { - e := idxfile.Entry{ + e := &idxfile.Entry{ Hash: h, Offset: offset, CRC32: crc32, } - idx.add(&e) + idx.byHash[e.Hash] = e + + // Find the right position in byOffset. + // Look for the first position whose offset is *greater* than e.Offset. + i := sort.Search(len(idx.byOffset), func(i int) bool { + return idx.byOffset[i].Offset > offset + }) + if i == len(idx.byOffset) { + // Simple case: add it to the end. + idx.byOffset = append(idx.byOffset, e) + return + } + // Harder case: shift existing entries down by one to make room. + // Append a nil entry first so we can use existing capacity in case + // the index was carefully preallocated. + idx.byOffset = append(idx.byOffset, nil) + copy(idx.byOffset[i+1:], idx.byOffset[i:len(idx.byOffset)-1]) + idx.byOffset[i] = e } -func (idx *Index) add(e *idxfile.Entry) { +func (idx *Index) addUnsorted(e *idxfile.Entry) { idx.byHash[e.Hash] = e - idx.byOffset[e.Offset] = e + idx.byOffset = append(idx.byOffset, e) } // LookupHash looks an entry up by its hash. An idxfile.Entry is returned and @@ -61,8 +89,13 @@ func (idx *Index) LookupHash(h plumbing.Hash) (*idxfile.Entry, bool) { // LookupHash looks an entry up by its offset in the packfile. An idxfile.Entry // is returned and a bool, which is true if it was found or false if it wasn't. func (idx *Index) LookupOffset(offset uint64) (*idxfile.Entry, bool) { - e, ok := idx.byOffset[offset] - return e, ok + i := sort.Search(len(idx.byOffset), func(i int) bool { + return idx.byOffset[i].Offset >= offset + }) + if i >= len(idx.byOffset) || idx.byOffset[i].Offset != offset { + return nil, false // not present + } + return idx.byOffset[i], true } // Size returns the number of entries in the index. diff --git a/plumbing/format/packfile/index_test.go b/plumbing/format/packfile/index_test.go index 6714704..8de886d 100644 --- a/plumbing/format/packfile/index_test.go +++ b/plumbing/format/packfile/index_test.go @@ -3,6 +3,7 @@ package packfile import ( "strconv" "strings" + "testing" "gopkg.in/src-d/go-git.v4/plumbing" @@ -26,12 +27,12 @@ func (s *IndexSuite) TestLookupOffset(c *C) { e, ok := idx.LookupOffset(uint64(o2)) c.Assert(ok, Equals, true) c.Assert(e, NotNil) - c.Assert(e.Hash, Equals, s.toHash(o2)) + c.Assert(e.Hash, Equals, toHash(o2)) c.Assert(e.Offset, Equals, uint64(o2)) } } - h1 := s.toHash(o1) + h1 := toHash(o1) idx.Add(h1, uint64(o1), 0) for o2 := 0; o2 < 10000; o2 += 100 { @@ -43,7 +44,7 @@ func (s *IndexSuite) TestLookupOffset(c *C) { e, ok := idx.LookupOffset(uint64(o2)) c.Assert(ok, Equals, true) c.Assert(e, NotNil) - c.Assert(e.Hash, Equals, s.toHash(o2)) + c.Assert(e.Hash, Equals, toHash(o2)) c.Assert(e.Offset, Equals, uint64(o2)) } } @@ -56,31 +57,31 @@ func (s *IndexSuite) TestLookupHash(c *C) { for o1 := 0; o1 < 10000; o1 += 100 { for o2 := 0; o2 < 10000; o2 += 100 { if o2 >= o1 { - e, ok := idx.LookupHash(s.toHash(o2)) + e, ok := idx.LookupHash(toHash(o2)) c.Assert(ok, Equals, false) c.Assert(e, IsNil) } else { - e, ok := idx.LookupHash(s.toHash(o2)) + e, ok := idx.LookupHash(toHash(o2)) c.Assert(ok, Equals, true) c.Assert(e, NotNil) - c.Assert(e.Hash, Equals, s.toHash(o2)) + c.Assert(e.Hash, Equals, toHash(o2)) c.Assert(e.Offset, Equals, uint64(o2)) } } - h1 := s.toHash(o1) + h1 := toHash(o1) idx.Add(h1, uint64(o1), 0) for o2 := 0; o2 < 10000; o2 += 100 { if o2 > o1 { - e, ok := idx.LookupHash(s.toHash(o2)) + e, ok := idx.LookupHash(toHash(o2)) c.Assert(ok, Equals, false) c.Assert(e, IsNil) } else { - e, ok := idx.LookupHash(s.toHash(o2)) + e, ok := idx.LookupHash(toHash(o2)) c.Assert(ok, Equals, true) c.Assert(e, NotNil) - c.Assert(e.Hash, Equals, s.toHash(o2)) + c.Assert(e.Hash, Equals, toHash(o2)) c.Assert(e.Offset, Equals, uint64(o2)) } } @@ -92,7 +93,7 @@ func (s *IndexSuite) TestSize(c *C) { for o1 := 0; o1 < 1000; o1++ { c.Assert(idx.Size(), Equals, o1) - h1 := s.toHash(o1) + h1 := toHash(o1) idx.Add(h1, uint64(o1), 0) } } @@ -107,7 +108,7 @@ func (s *IndexSuite) TestIdxFileEmpty(c *C) { func (s *IndexSuite) TestIdxFile(c *C) { idx := NewIndex(0) for o1 := 0; o1 < 1000; o1++ { - h1 := s.toHash(o1) + h1 := toHash(o1) idx.Add(h1, uint64(o1), 0) } @@ -115,8 +116,18 @@ func (s *IndexSuite) TestIdxFile(c *C) { c.Assert(idx, DeepEquals, idx2) } -func (s *IndexSuite) toHash(i int) plumbing.Hash { +func toHash(i int) plumbing.Hash { is := strconv.Itoa(i) padding := strings.Repeat("a", 40-len(is)) return plumbing.NewHash(padding + is) } + +func BenchmarkIndexConstruction(b *testing.B) { + b.ReportAllocs() + + idx := NewIndex(0) + for o := 0; o < 1e6*b.N; o += 100 { + h1 := toHash(o) + idx.Add(h1, uint64(o), 0) + } +} -- cgit From c39bd4d4a6ba0b0e75a9902c3bbb2064f27a3f6e Mon Sep 17 00:00:00 2001 From: Antonio Jesus Navarro Perez Date: Tue, 5 Jun 2018 18:45:15 +0200 Subject: Remove println Signed-off-by: Antonio Jesus Navarro Perez --- plumbing/transport/test/receive_pack.go | 1 - 1 file changed, 1 deletion(-) (limited to 'plumbing') diff --git a/plumbing/transport/test/receive_pack.go b/plumbing/transport/test/receive_pack.go index 6179850..57f602d 100644 --- a/plumbing/transport/test/receive_pack.go +++ b/plumbing/transport/test/receive_pack.go @@ -231,7 +231,6 @@ func (s *ReceivePackSuite) receivePackNoCheck(c *C, ep *transport.Endpoint, // fixtures are generated with read only permissions, this casuses // errors deleting or modifying files. rootPath := ep.Path - println("STAT", rootPath) stat, err := os.Stat(ep.Path) if rootPath != "" && err == nil && stat.IsDir() { -- cgit From f01958913fab6e1967c1317b7222d1160212371c Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Wed, 6 Jun 2018 15:18:57 +0200 Subject: plumbing: object, adds tree path cache to trees. Fixes #793 The cache is used in Tree.FindEntry for faster path search. Signed-off-by: Javi Fontan --- plumbing/object/tree.go | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'plumbing') diff --git a/plumbing/object/tree.go b/plumbing/object/tree.go index c2399f8..30bbcb0 100644 --- a/plumbing/object/tree.go +++ b/plumbing/object/tree.go @@ -6,6 +6,7 @@ import ( "fmt" "io" "path" + "path/filepath" "strings" "gopkg.in/src-d/go-git.v4/plumbing" @@ -34,6 +35,7 @@ type Tree struct { s storer.EncodedObjectStorer m map[string]*TreeEntry + t map[string]*Tree // tree path cache } // GetTree gets a tree from an object storer and decodes it. @@ -111,14 +113,37 @@ func (t *Tree) TreeEntryFile(e *TreeEntry) (*File, error) { // FindEntry search a TreeEntry in this tree or any subtree. func (t *Tree) FindEntry(path string) (*TreeEntry, error) { + if t.t == nil { + t.t = make(map[string]*Tree) + } + pathParts := strings.Split(path, "/") + startingTree := t + pathCurrent := "" + + // search for the longest path in the tree path cache + for i := len(pathParts); i > 1; i-- { + path := filepath.Join(pathParts[:i]...) + + tree, ok := t.t[path] + if ok { + startingTree = tree + pathParts = pathParts[i:] + pathCurrent = path + + break + } + } var tree *Tree var err error - for tree = t; len(pathParts) > 1; pathParts = pathParts[1:] { + for tree = startingTree; len(pathParts) > 1; pathParts = pathParts[1:] { if tree, err = tree.dir(pathParts[0]); err != nil { return nil, err } + + pathCurrent = filepath.Join(pathCurrent, pathParts[0]) + t.t[pathCurrent] = tree } return tree.entry(pathParts[0]) -- cgit From 88f0dc3d89a0391f9f52d913207556d15a4c2a77 Mon Sep 17 00:00:00 2001 From: kuba-- Date: Thu, 7 Jun 2018 22:23:58 +0200 Subject: plumbing: packfile, Don't push empty objects. Fixes #840 Signed-off-by: kuba-- --- plumbing/format/packfile/delta_test.go | 21 ++++++++++++++++++--- plumbing/format/packfile/diff_delta.go | 2 +- 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'plumbing') diff --git a/plumbing/format/packfile/delta_test.go b/plumbing/format/packfile/delta_test.go index 42b777a..98f53f6 100644 --- a/plumbing/format/packfile/delta_test.go +++ b/plumbing/format/packfile/delta_test.go @@ -62,7 +62,7 @@ func (s *DeltaSuite) SetUpSuite(c *C) { target: []piece{{"1", 30}, {"2", 20}, {"7", 40}, {"4", 400}, {"5", 10}}, }, { - description: "A copy operation bigger tan 64kb", + description: "A copy operation bigger than 64kb", base: []piece{{bigRandStr, 1}, {"1", 200}}, target: []piece{{bigRandStr, 1}}, }} @@ -72,12 +72,16 @@ var bigRandStr = randStringBytes(100 * 1024) const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" -func randStringBytes(n int) string { +func randBytes(n int) []byte { b := make([]byte, n) for i := range b { b[i] = letterBytes[rand.Intn(len(letterBytes))] } - return string(b) + return b +} + +func randStringBytes(n int) string { + return string(randBytes(n)) } func (s *DeltaSuite) TestAddDelta(c *C) { @@ -110,3 +114,14 @@ func (s *DeltaSuite) TestIncompleteDelta(c *C) { c.Assert(err, NotNil) c.Assert(result, IsNil) } + +func (s *DeltaSuite) TestMaxCopySizeDelta(c *C) { + baseBuf := randBytes(maxCopySize) + targetBuf := baseBuf[0:] + targetBuf = append(targetBuf, byte(1)) + + delta := DiffDelta(baseBuf, targetBuf) + result, err := PatchDelta(baseBuf, delta) + c.Assert(err, IsNil) + c.Assert(result, DeepEquals, targetBuf) +} diff --git a/plumbing/format/packfile/diff_delta.go b/plumbing/format/packfile/diff_delta.go index 4d56dc1..d35e78a 100644 --- a/plumbing/format/packfile/diff_delta.go +++ b/plumbing/format/packfile/diff_delta.go @@ -111,7 +111,7 @@ func diffDelta(index *deltaIndex, src []byte, tgt []byte) []byte { rl := l aOffset := offset - for { + for rl > 0 { if rl < maxCopySize { buf.Write(encodeCopyOperation(aOffset, rl)) break -- cgit From bf6190841e8b6cd3a216bc056e5b71c73e18c410 Mon Sep 17 00:00:00 2001 From: Eric Billingsley Date: Fri, 8 Jun 2018 15:19:12 -0700 Subject: plumbing/transport: http, Adds token authentication support [Fixes #858] Signed-off-by: Eric Billingsley --- plumbing/transport/http/common.go | 25 +++++++++++++++++++++++++ plumbing/transport/http/common_test.go | 13 +++++++++++++ 2 files changed, 38 insertions(+) (limited to 'plumbing') diff --git a/plumbing/transport/http/common.go b/plumbing/transport/http/common.go index 2c337b7..c034846 100644 --- a/plumbing/transport/http/common.go +++ b/plumbing/transport/http/common.go @@ -201,6 +201,31 @@ func (a *BasicAuth) String() string { return fmt.Sprintf("%s - %s:%s", a.Name(), a.Username, masked) } +// TokenAuth implements the go-git http.AuthMethod and transport.AuthMethod interfaces +type TokenAuth struct { + Token string +} + +func (a *TokenAuth) setAuth(r *http.Request) { + if a == nil { + return + } + r.Header.Add("Authorization", fmt.Sprintf("Bearer %s", a.Token)) +} + +// Name is name of the auth +func (a *TokenAuth) Name() string { + return "http-token-auth" +} + +func (a *TokenAuth) String() string { + masked := "*******" + if a.Token == "" { + masked = "" + } + return fmt.Sprintf("%s - %s", a.Name(), masked) +} + // Err is a dedicated error to return errors based on status code type Err struct { Response *http.Response diff --git a/plumbing/transport/http/common_test.go b/plumbing/transport/http/common_test.go index 8d57996..71eede4 100644 --- a/plumbing/transport/http/common_test.go +++ b/plumbing/transport/http/common_test.go @@ -54,6 +54,19 @@ func (s *ClientSuite) TestNewBasicAuth(c *C) { c.Assert(a.String(), Equals, "http-basic-auth - foo:*******") } +func (s *ClientSuite) TestNewTokenAuth(c *C) { + a := &TokenAuth{"OAUTH-TOKEN-TEXT"} + + c.Assert(a.Name(), Equals, "http-token-auth") + c.Assert(a.String(), Equals, "http-token-auth - *******") + + // Check header is set correctly + req, err := http.NewRequest("GET", "https://github.com/git-fixtures/basic", nil) + c.Assert(err, Equals, nil) + a.setAuth(req) + c.Assert(req.Header.Get("Authorization"), Equals, "Bearer OAUTH-TOKEN-TEXT") +} + func (s *ClientSuite) TestNewErrOK(c *C) { res := &http.Response{StatusCode: http.StatusOK} err := NewErr(res) -- cgit From 2d9816a5e7daea58a1419fef70bfc8d220ffd6a2 Mon Sep 17 00:00:00 2001 From: David Symonds Date: Thu, 21 Jun 2018 13:24:03 +1000 Subject: packfile: optimise NewIndexFromIdxFile for a very common case Loading from an on-disk idxfile will usually already have the idxfile entries in order, so check that before wasting time on sorting. Signed-off-by: David Symonds --- plumbing/format/packfile/index.go | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'plumbing') diff --git a/plumbing/format/packfile/index.go b/plumbing/format/packfile/index.go index 7d8f2ad..021b2d1 100644 --- a/plumbing/format/packfile/index.go +++ b/plumbing/format/packfile/index.go @@ -31,10 +31,20 @@ func NewIndexFromIdxFile(idxf *idxfile.Idxfile) *Index { byHash: make(map[plumbing.Hash]*idxfile.Entry, idxf.ObjectCount), byOffset: make([]*idxfile.Entry, 0, idxf.ObjectCount), } - for _, e := range idxf.Entries { + sorted := true + for i, e := range idxf.Entries { idx.addUnsorted(e) + if i > 0 && idx.byOffset[i-1].Offset >= e.Offset { + sorted = false + } + } + + // If the idxfile was loaded from a regular packfile index + // then it will already be in offset order, in which case we + // can avoid doing a relatively expensive idempotent sort. + if !sorted { + sort.Sort(orderByOffset(idx.byOffset)) } - sort.Sort(orderByOffset(idx.byOffset)) return idx } -- cgit From 9251ea764df3de13518f974635e76315b2b89e3e Mon Sep 17 00:00:00 2001 From: Marc Barussaud Date: Tue, 26 Jun 2018 15:23:19 +0200 Subject: plumbing: add context to allow cancel on diff/patch computing Signed-off-by: Marc Barussaud --- plumbing/object/change.go | 21 ++++++++++++-- plumbing/object/change_test.go | 61 ++++++++++++++++++++++++++++++++++++++ plumbing/object/commit.go | 11 +++++-- plumbing/object/commit_test.go | 66 ++++++++++++++++++++++++++++++++++++++++++ plumbing/object/difftree.go | 13 ++++++++- plumbing/object/patch.go | 32 ++++++++++++++++++-- plumbing/object/tree.go | 20 +++++++++++-- 7 files changed, 215 insertions(+), 9 deletions(-) (limited to 'plumbing') diff --git a/plumbing/object/change.go b/plumbing/object/change.go index 729ff5a..a1b4c27 100644 --- a/plumbing/object/change.go +++ b/plumbing/object/change.go @@ -2,6 +2,7 @@ package object import ( "bytes" + "context" "fmt" "strings" @@ -81,7 +82,15 @@ func (c *Change) String() string { // Patch returns a Patch with all the file changes in chunks. This // representation can be used to create several diff outputs. func (c *Change) Patch() (*Patch, error) { - return getPatch("", c) + return c.PatchContext(context.Background()) +} + +// Patch returns a Patch with all the file changes in chunks. This +// representation can be used to create several diff outputs. +// If context expires, an non-nil error will be returned +// Provided context must be non-nil +func (c *Change) PatchContext(ctx context.Context) (*Patch, error) { + return getPatchContext(ctx, "", c) } func (c *Change) name() string { @@ -136,5 +145,13 @@ func (c Changes) String() string { // Patch returns a Patch with all the changes in chunks. This // representation can be used to create several diff outputs. func (c Changes) Patch() (*Patch, error) { - return getPatch("", c...) + return c.PatchContext(context.Background()) +} + +// Patch returns a Patch with all the changes in chunks. This +// representation can be used to create several diff outputs. +// If context expires, an non-nil error will be returned +// Provided context must be non-nil +func (c Changes) PatchContext(ctx context.Context) (*Patch, error) { + return getPatchContext(ctx, "", c...) } diff --git a/plumbing/object/change_test.go b/plumbing/object/change_test.go index 7036fa3..b0e89c7 100644 --- a/plumbing/object/change_test.go +++ b/plumbing/object/change_test.go @@ -1,6 +1,7 @@ package object import ( + "context" "sort" "gopkg.in/src-d/go-git.v4/plumbing" @@ -82,6 +83,12 @@ func (s *ChangeSuite) TestInsert(c *C) { c.Assert(len(p.FilePatches()[0].Chunks()), Equals, 1) c.Assert(p.FilePatches()[0].Chunks()[0].Type(), Equals, diff.Add) + p, err = change.PatchContext(context.Background()) + c.Assert(err, IsNil) + c.Assert(len(p.FilePatches()), Equals, 1) + c.Assert(len(p.FilePatches()[0].Chunks()), Equals, 1) + c.Assert(p.FilePatches()[0].Chunks()[0].Type(), Equals, diff.Add) + str := change.String() c.Assert(str, Equals, "") } @@ -134,6 +141,12 @@ func (s *ChangeSuite) TestDelete(c *C) { c.Assert(len(p.FilePatches()[0].Chunks()), Equals, 1) c.Assert(p.FilePatches()[0].Chunks()[0].Type(), Equals, diff.Delete) + p, err = change.PatchContext(context.Background()) + c.Assert(err, IsNil) + c.Assert(len(p.FilePatches()), Equals, 1) + c.Assert(len(p.FilePatches()[0].Chunks()), Equals, 1) + c.Assert(p.FilePatches()[0].Chunks()[0].Type(), Equals, diff.Delete) + str := change.String() c.Assert(str, Equals, "") } @@ -206,6 +219,18 @@ func (s *ChangeSuite) TestModify(c *C) { c.Assert(p.FilePatches()[0].Chunks()[5].Type(), Equals, diff.Add) c.Assert(p.FilePatches()[0].Chunks()[6].Type(), Equals, diff.Equal) + p, err = change.PatchContext(context.Background()) + c.Assert(err, IsNil) + c.Assert(len(p.FilePatches()), Equals, 1) + c.Assert(len(p.FilePatches()[0].Chunks()), Equals, 7) + c.Assert(p.FilePatches()[0].Chunks()[0].Type(), Equals, diff.Equal) + c.Assert(p.FilePatches()[0].Chunks()[1].Type(), Equals, diff.Delete) + c.Assert(p.FilePatches()[0].Chunks()[2].Type(), Equals, diff.Add) + c.Assert(p.FilePatches()[0].Chunks()[3].Type(), Equals, diff.Equal) + c.Assert(p.FilePatches()[0].Chunks()[4].Type(), Equals, diff.Delete) + c.Assert(p.FilePatches()[0].Chunks()[5].Type(), Equals, diff.Add) + c.Assert(p.FilePatches()[0].Chunks()[6].Type(), Equals, diff.Equal) + str := change.String() c.Assert(str, Equals, "") } @@ -367,3 +392,39 @@ func (s *ChangeSuite) TestChangesSort(c *C) { sort.Sort(changes) c.Assert(changes.String(), Equals, expected) } + +func (s *ChangeSuite) TestCancel(c *C) { + // Commit a5078b19f08f63e7948abd0a5e2fb7d319d3a565 of the go-git + // fixture inserted "examples/clone/main.go". + // + // On that commit, the "examples/clone" tree is + // 6efca3ff41cab651332f9ebc0c96bb26be809615 + // + // and the "examples/colone/main.go" is + // f95dc8f7923add1a8b9f72ecb1e8db1402de601a + + path := "examples/clone/main.go" + name := "main.go" + mode := filemode.Regular + blob := plumbing.NewHash("f95dc8f7923add1a8b9f72ecb1e8db1402de601a") + tree := plumbing.NewHash("6efca3ff41cab651332f9ebc0c96bb26be809615") + + change := &Change{ + From: empty, + To: ChangeEntry{ + Name: path, + Tree: s.tree(c, tree), + TreeEntry: TreeEntry{ + Name: name, + Mode: mode, + Hash: blob, + }, + }, + } + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + p, err := change.PatchContext(ctx) + c.Assert(p, IsNil) + c.Assert(err, ErrorMatches, "operation canceled") +} diff --git a/plumbing/object/commit.go b/plumbing/object/commit.go index c9a4c0e..3ed85ba 100644 --- a/plumbing/object/commit.go +++ b/plumbing/object/commit.go @@ -3,6 +3,7 @@ package object import ( "bufio" "bytes" + "context" "errors" "fmt" "io" @@ -75,7 +76,8 @@ func (c *Commit) Tree() (*Tree, error) { } // Patch returns the Patch between the actual commit and the provided one. -func (c *Commit) Patch(to *Commit) (*Patch, error) { +// Error will be return if context expires. Provided context must be non-nil +func (c *Commit) PatchContext(ctx context.Context, to *Commit) (*Patch, error) { fromTree, err := c.Tree() if err != nil { return nil, err @@ -86,7 +88,12 @@ func (c *Commit) Patch(to *Commit) (*Patch, error) { return nil, err } - return fromTree.Patch(toTree) + return fromTree.PatchContext(ctx, toTree) +} + +// Patch returns the Patch between the actual commit and the provided one. +func (c *Commit) Patch(to *Commit) (*Patch, error) { + return c.PatchContext(context.Background(), to) } // Parents return a CommitIter to the parent Commits. diff --git a/plumbing/object/commit_test.go b/plumbing/object/commit_test.go index 191b14d..996d481 100644 --- a/plumbing/object/commit_test.go +++ b/plumbing/object/commit_test.go @@ -2,6 +2,7 @@ package object import ( "bytes" + "context" "io" "strings" "time" @@ -132,6 +133,59 @@ Binary files /dev/null and b/binary.jpg differ c.Assert(buf.String(), Equals, patch.String()) } +func (s *SuiteCommit) TestPatchContext(c *C) { + from := s.commit(c, plumbing.NewHash("918c48b83bd081e863dbe1b80f8998f058cd8294")) + to := s.commit(c, plumbing.NewHash("6ecf0ef2c2dffb796033e5a02219af86ec6584e5")) + + patch, err := from.PatchContext(context.Background(), to) + c.Assert(err, IsNil) + + buf := bytes.NewBuffer(nil) + err = patch.Encode(buf) + c.Assert(err, IsNil) + + c.Assert(buf.String(), Equals, `diff --git a/vendor/foo.go b/vendor/foo.go +new file mode 100644 +index 0000000000000000000000000000000000000000..9dea2395f5403188298c1dabe8bdafe562c491e3 +--- /dev/null ++++ b/vendor/foo.go +@@ -0,0 +1,7 @@ ++package main ++ ++import "fmt" ++ ++func main() { ++ fmt.Println("Hello, playground") ++} +`) + c.Assert(buf.String(), Equals, patch.String()) + + from = s.commit(c, plumbing.NewHash("b8e471f58bcbca63b07bda20e428190409c2db47")) + to = s.commit(c, plumbing.NewHash("35e85108805c84807bc66a02d91535e1e24b38b9")) + + patch, err = from.PatchContext(context.Background(), to) + c.Assert(err, IsNil) + + buf.Reset() + err = patch.Encode(buf) + c.Assert(err, IsNil) + + c.Assert(buf.String(), Equals, `diff --git a/CHANGELOG b/CHANGELOG +deleted file mode 100644 +index d3ff53e0564a9f87d8e84b6e28e5060e517008aa..0000000000000000000000000000000000000000 +--- a/CHANGELOG ++++ /dev/null +@@ -1 +0,0 @@ +-Initial changelog +diff --git a/binary.jpg b/binary.jpg +new file mode 100644 +index 0000000000000000000000000000000000000000..d5c0f4ab811897cadf03aec358ae60d21f91c50d +Binary files /dev/null and b/binary.jpg differ +`) + + c.Assert(buf.String(), Equals, patch.String()) +} + func (s *SuiteCommit) TestCommitEncodeDecodeIdempotent(c *C) { ts, err := time.Parse(time.RFC3339, "2006-01-02T15:04:05-07:00") c.Assert(err, IsNil) @@ -363,3 +417,15 @@ sYyf9RfOnw/KUFAQbdtvLx3ikODQC+D3KBtuKI9ISHQfgw== _, ok := e.Identities["Sunny "] c.Assert(ok, Equals, true) } + +func (s *SuiteCommit) TestPatchCancel(c *C) { + from := s.commit(c, plumbing.NewHash("918c48b83bd081e863dbe1b80f8998f058cd8294")) + to := s.commit(c, plumbing.NewHash("6ecf0ef2c2dffb796033e5a02219af86ec6584e5")) + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + patch, err := from.PatchContext(ctx, to) + c.Assert(patch, IsNil) + c.Assert(err, ErrorMatches, "operation canceled") + +} diff --git a/plumbing/object/difftree.go b/plumbing/object/difftree.go index ac58c4d..a30a29e 100644 --- a/plumbing/object/difftree.go +++ b/plumbing/object/difftree.go @@ -2,6 +2,7 @@ package object import ( "bytes" + "context" "gopkg.in/src-d/go-git.v4/utils/merkletrie" "gopkg.in/src-d/go-git.v4/utils/merkletrie/noder" @@ -10,6 +11,13 @@ import ( // DiffTree compares the content and mode of the blobs found via two // tree objects. func DiffTree(a, b *Tree) (Changes, error) { + return DiffTreeContext(context.Background(), a, b) +} + +// DiffTree compares the content and mode of the blobs found via two +// tree objects. Provided context must be non-nil. +// An error will be return if context expires +func DiffTreeContext(ctx context.Context, a, b *Tree) (Changes, error) { from := NewTreeRootNode(a) to := NewTreeRootNode(b) @@ -17,8 +25,11 @@ func DiffTree(a, b *Tree) (Changes, error) { return bytes.Equal(a.Hash(), b.Hash()) } - merkletrieChanges, err := merkletrie.DiffTree(from, to, hashEqual) + merkletrieChanges, err := merkletrie.DiffTreeContext(ctx, from, to, hashEqual) if err != nil { + if err == merkletrie.ErrCanceled { + return nil, ErrCanceled + } return nil, err } diff --git a/plumbing/object/patch.go b/plumbing/object/patch.go index aa96a96..adeaccb 100644 --- a/plumbing/object/patch.go +++ b/plumbing/object/patch.go @@ -2,6 +2,8 @@ package object import ( "bytes" + "context" + "errors" "fmt" "io" "math" @@ -15,10 +17,25 @@ import ( dmp "github.com/sergi/go-diff/diffmatchpatch" ) +var ( + ErrCanceled = errors.New("operation canceled") +) + func getPatch(message string, changes ...*Change) (*Patch, error) { + ctx := context.Background() + return getPatchContext(ctx, message, changes...) +} + +func getPatchContext(ctx context.Context, message string, changes ...*Change) (*Patch, error) { var filePatches []fdiff.FilePatch for _, c := range changes { - fp, err := filePatch(c) + select { + case <-ctx.Done(): + return nil, ErrCanceled + default: + } + + fp, err := filePatchWithContext(ctx, c) if err != nil { return nil, err } @@ -29,7 +46,7 @@ func getPatch(message string, changes ...*Change) (*Patch, error) { return &Patch{message, filePatches}, nil } -func filePatch(c *Change) (fdiff.FilePatch, error) { +func filePatchWithContext(ctx context.Context, c *Change) (fdiff.FilePatch, error) { from, to, err := c.Files() if err != nil { return nil, err @@ -52,6 +69,12 @@ func filePatch(c *Change) (fdiff.FilePatch, error) { var chunks []fdiff.Chunk for _, d := range diffs { + select { + case <-ctx.Done(): + return nil, ErrCanceled + default: + } + var op fdiff.Operation switch d.Type { case dmp.DiffEqual: @@ -70,6 +93,11 @@ func filePatch(c *Change) (fdiff.FilePatch, error) { from: c.From, to: c.To, }, nil + +} + +func filePatch(c *Change) (fdiff.FilePatch, error) { + return filePatchWithContext(context.Background(), c) } func fileContent(f *File) (content string, isBinary bool, err error) { diff --git a/plumbing/object/tree.go b/plumbing/object/tree.go index 30bbcb0..86d19c0 100644 --- a/plumbing/object/tree.go +++ b/plumbing/object/tree.go @@ -2,6 +2,7 @@ package object import ( "bufio" + "context" "errors" "fmt" "io" @@ -295,15 +296,30 @@ func (from *Tree) Diff(to *Tree) (Changes, error) { return DiffTree(from, to) } +// Diff returns a list of changes between this tree and the provided one +// Error will be returned if context expires +// Provided context must be non nil +func (from *Tree) DiffContext(ctx context.Context, to *Tree) (Changes, error) { + return DiffTreeContext(ctx, from, to) +} + // Patch returns a slice of Patch objects with all the changes between trees // in chunks. This representation can be used to create several diff outputs. func (from *Tree) Patch(to *Tree) (*Patch, error) { - changes, err := DiffTree(from, to) + return from.PatchContext(context.Background(), to) +} + +// Patch returns a slice of Patch objects with all the changes between trees +// in chunks. This representation can be used to create several diff outputs. +// If context expires, an error will be returned +// Provided context must be non-nil +func (from *Tree) PatchContext(ctx context.Context, to *Tree) (*Patch, error) { + changes, err := DiffTreeContext(ctx, from, to) if err != nil { return nil, err } - return changes.Patch() + return changes.PatchContext(ctx) } // treeEntryIter facilitates iterating through the TreeEntry objects in a Tree. -- cgit From b304997a387a5106321fe87069a6f136d9fbd1f6 Mon Sep 17 00:00:00 2001 From: James Ravn Date: Thu, 5 Jul 2018 10:50:09 +0100 Subject: plumbing: object, expose ErrEntryNotFound in FindEntry. Fixes #883 FindEntry will return ErrDirNotFound if the directory doesn't exist. But it doesn't return a public error if the entry itself is missing. This exposes the internal error ErrEntryNotFound, so users can programmatically check for this condition. Signed-off-by: James Ravn --- plumbing/object/tree.go | 5 ++--- plumbing/object/tree_test.go | 6 ++++++ 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'plumbing') diff --git a/plumbing/object/tree.go b/plumbing/object/tree.go index 30bbcb0..7d9e90b 100644 --- a/plumbing/object/tree.go +++ b/plumbing/object/tree.go @@ -25,6 +25,7 @@ var ( ErrMaxTreeDepth = errors.New("maximum tree depth exceeded") ErrFileNotFound = errors.New("file not found") ErrDirectoryNotFound = errors.New("directory not found") + ErrEntryNotFound = errors.New("entry not found") ) // Tree is basically like a directory - it references a bunch of other trees @@ -166,8 +167,6 @@ func (t *Tree) dir(baseName string) (*Tree, error) { return tree, err } -var errEntryNotFound = errors.New("entry not found") - func (t *Tree) entry(baseName string) (*TreeEntry, error) { if t.m == nil { t.buildMap() @@ -175,7 +174,7 @@ func (t *Tree) entry(baseName string) (*TreeEntry, error) { entry, ok := t.m[baseName] if !ok { - return nil, errEntryNotFound + return nil, ErrEntryNotFound } return entry, nil diff --git a/plumbing/object/tree_test.go b/plumbing/object/tree_test.go index 3a687dd..59d5d21 100644 --- a/plumbing/object/tree_test.go +++ b/plumbing/object/tree_test.go @@ -114,6 +114,12 @@ func (s *TreeSuite) TestFindEntry(c *C) { c.Assert(e.Name, Equals, "foo.go") } +func (s *TreeSuite) TestFindEntryNotFound(c *C) { + e, err := s.Tree.FindEntry("not-found") + c.Assert(e, IsNil) + c.Assert(err, Equals, ErrEntryNotFound) +} + // Overrides returned plumbing.EncodedObject for given hash. // Otherwise, delegates to actual storer to get real object type fakeStorer struct { -- cgit From 54d8c38fd63feefe6c25c1ac2945a6fc0bc7f16a Mon Sep 17 00:00:00 2001 From: Jerome Doucet Date: Sat, 14 Jul 2018 15:19:56 +0200 Subject: plumbing/transport/internal: common, add support of Gogs for ErrRepositoryNotFound, avoiding to get an 'unknown error: '. Add some tests for existing supported services (github, gitlab, etc...) too. Signed-off-by: Jerome Doucet --- plumbing/transport/internal/common/common.go | 5 ++ plumbing/transport/internal/common/common_test.go | 78 +++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 plumbing/transport/internal/common/common_test.go (limited to 'plumbing') diff --git a/plumbing/transport/internal/common/common.go b/plumbing/transport/internal/common/common.go index 8ec1ea5..00497f3 100644 --- a/plumbing/transport/internal/common/common.go +++ b/plumbing/transport/internal/common/common.go @@ -382,6 +382,7 @@ var ( gitProtocolNotFoundErr = "ERR \n Repository not found." gitProtocolNoSuchErr = "ERR no such repository" gitProtocolAccessDeniedErr = "ERR access denied" + gogsAccessDeniedErr = "Gogs: Repository does not exist or you do not have access" ) func isRepoNotFoundError(s string) bool { @@ -409,6 +410,10 @@ func isRepoNotFoundError(s string) bool { return true } + if strings.HasPrefix(s, gogsAccessDeniedErr) { + return true + } + return false } diff --git a/plumbing/transport/internal/common/common_test.go b/plumbing/transport/internal/common/common_test.go new file mode 100644 index 0000000..b2f035d --- /dev/null +++ b/plumbing/transport/internal/common/common_test.go @@ -0,0 +1,78 @@ +package common + +import ( + "fmt" + "testing" + + . "gopkg.in/check.v1" +) + +func Test(t *testing.T) { TestingT(t) } + +type CommonSuite struct{} + +var _ = Suite(&CommonSuite{}) + +func (s *CommonSuite) TestIsRepoNotFoundErrorForUnknowSource(c *C) { + msg := "unknown system is complaining of something very sad :(" + + isRepoNotFound := isRepoNotFoundError(msg) + + c.Assert(isRepoNotFound, Equals, false) +} + +func (s *CommonSuite) TestIsRepoNotFoundErrorForGithub(c *C) { + msg := fmt.Sprintf("%s : some error stuf", githubRepoNotFoundErr) + + isRepoNotFound := isRepoNotFoundError(msg) + + c.Assert(isRepoNotFound, Equals, true) +} + +func (s *CommonSuite) TestIsRepoNotFoundErrorForBitBucket(c *C) { + msg := fmt.Sprintf("%s : some error stuf", bitbucketRepoNotFoundErr) + + isRepoNotFound := isRepoNotFoundError(msg) + + c.Assert(isRepoNotFound, Equals, true) +} + +func (s *CommonSuite) TestIsRepoNotFoundErrorForLocal(c *C) { + msg := fmt.Sprintf("some error stuf : %s", localRepoNotFoundErr) + + isRepoNotFound := isRepoNotFoundError(msg) + + c.Assert(isRepoNotFound, Equals, true) +} + +func (s *CommonSuite) TestIsRepoNotFoundErrorForGitProtocolNotFound(c *C) { + msg := fmt.Sprintf("%s : some error stuf", gitProtocolNotFoundErr) + + isRepoNotFound := isRepoNotFoundError(msg) + + c.Assert(isRepoNotFound, Equals, true) +} + +func (s *CommonSuite) TestIsRepoNotFoundErrorForGitProtocolNoSuch(c *C) { + msg := fmt.Sprintf("%s : some error stuf", gitProtocolNoSuchErr) + + isRepoNotFound := isRepoNotFoundError(msg) + + c.Assert(isRepoNotFound, Equals, true) +} + +func (s *CommonSuite) TestIsRepoNotFoundErrorForGitProtocolAccessDenied(c *C) { + msg := fmt.Sprintf("%s : some error stuf", gitProtocolAccessDeniedErr) + + isRepoNotFound := isRepoNotFoundError(msg) + + c.Assert(isRepoNotFound, Equals, true) +} + +func (s *CommonSuite) TestIsRepoNotFoundErrorForGogsAccessDenied(c *C) { + msg := fmt.Sprintf("%s : some error stuf", gogsAccessDeniedErr) + + isRepoNotFound := isRepoNotFoundError(msg) + + c.Assert(isRepoNotFound, Equals, true) +} -- cgit From 8df413fe09e6cb2069a76b6df6715d0e610c8458 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Wed, 18 Jul 2018 11:14:49 +0200 Subject: plumbing/object: fix pgp signature encoder/decoder The way of reading pgp signatures was searching for pgp begin line in the header. This caused problems when this string appeared and was not part of the signature. For example if it appears in the message as an example or is part of the author name the decoder starts treating it as a signature. In this state the code was not able to notice then the header ended so it entered in an infinite loop searching for pgp end string. Now it uses the same method as original git. Searches for gpgsig section in header and starts getting all lines until the next part. In encoder the string used to add signatures was incorrect. It is now changed to the proper "gpgsig" string instead of "pgpsig". Signed-off-by: Javi Fontan --- plumbing/object/commit.go | 31 +++++++++++++------------------ plumbing/object/commit_test.go | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 18 deletions(-) (limited to 'plumbing') diff --git a/plumbing/object/commit.go b/plumbing/object/commit.go index 3ed85ba..b1c0e01 100644 --- a/plumbing/object/commit.go +++ b/plumbing/object/commit.go @@ -17,8 +17,9 @@ import ( ) const ( - beginpgp string = "-----BEGIN PGP SIGNATURE-----" - endpgp string = "-----END PGP SIGNATURE-----" + beginpgp string = "-----BEGIN PGP SIGNATURE-----" + endpgp string = "-----END PGP SIGNATURE-----" + headerpgp string = "gpgsig" ) // Hash represents the hash of an object @@ -181,23 +182,13 @@ func (c *Commit) Decode(o plumbing.EncodedObject) (err error) { } if pgpsig { - // Check if it's the end of a PGP signature. - if bytes.Contains(line, []byte(endpgp)) { - c.PGPSignature += endpgp + "\n" - pgpsig = false - } else { - // Trim the left padding. + if len(line) > 0 && line[0] == ' ' { line = bytes.TrimLeft(line, " ") c.PGPSignature += string(line) + continue + } else { + pgpsig = false } - continue - } - - // Check if it's the beginning of a PGP signature. - if bytes.Contains(line, []byte(beginpgp)) { - c.PGPSignature += beginpgp + "\n" - pgpsig = true - continue } if !message { @@ -217,6 +208,9 @@ func (c *Commit) Decode(o plumbing.EncodedObject) (err error) { c.Author.Decode(split[1]) case "committer": c.Committer.Decode(split[1]) + case headerpgp: + c.PGPSignature += string(split[1]) + "\n" + pgpsig = true } } else { c.Message += string(line) @@ -269,13 +263,14 @@ func (b *Commit) encode(o plumbing.EncodedObject, includeSig bool) (err error) { } if b.PGPSignature != "" && includeSig { - if _, err = fmt.Fprint(w, "pgpsig"); err != nil { + if _, err = fmt.Fprint(w, "\n"+headerpgp); err != nil { return err } // Split all the signature lines and write with a left padding and // newline at the end. - lines := strings.Split(b.PGPSignature, "\n") + signature := strings.TrimSuffix(b.PGPSignature, "\n") + lines := strings.Split(signature, "\n") for _, line := range lines { if _, err = fmt.Fprintf(w, " %s\n", line); err != nil { return err diff --git a/plumbing/object/commit_test.go b/plumbing/object/commit_test.go index 996d481..b5dfbe3 100644 --- a/plumbing/object/commit_test.go +++ b/plumbing/object/commit_test.go @@ -324,6 +324,38 @@ RUysgqjcpT8+iQM1PblGfHR4XAhuOqN5Fx06PSaFZhqvWFezJ28/CLyX5q+oIVk= err = decoded.Decode(encoded) c.Assert(err, IsNil) c.Assert(decoded.PGPSignature, Equals, pgpsignature) + + // signature in author name + + commit.PGPSignature = "" + commit.Author.Name = beginpgp + encoded = &plumbing.MemoryObject{} + decoded = &Commit{} + + err = commit.Encode(encoded) + c.Assert(err, IsNil) + + err = decoded.Decode(encoded) + c.Assert(err, IsNil) + c.Assert(decoded.PGPSignature, Equals, "") + c.Assert(decoded.Author.Name, Equals, beginpgp) + + // broken signature + + commit.PGPSignature = beginpgp + "\n" + + "some\n" + + "trash\n" + + endpgp + + "text\n" + encoded = &plumbing.MemoryObject{} + decoded = &Commit{} + + err = commit.Encode(encoded) + c.Assert(err, IsNil) + + err = decoded.Decode(encoded) + c.Assert(err, IsNil) + c.Assert(decoded.PGPSignature, Equals, commit.PGPSignature) } func (s *SuiteCommit) TestStat(c *C) { -- cgit From 009f1069a1248c1e9189a9e4c342f6d017156ec4 Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Thu, 19 Jul 2018 15:20:10 +0200 Subject: plumbing/format/idxfile: add new Index and MemoryIndex Signed-off-by: Miguel Molina --- plumbing/format/idxfile/decoder.go | 109 +++++++++------- plumbing/format/idxfile/decoder_test.go | 106 +++++++-------- plumbing/format/idxfile/encoder.go | 101 +++++++------- plumbing/format/idxfile/encoder_test.go | 21 +-- plumbing/format/idxfile/idxfile.go | 224 +++++++++++++++++++++++++++----- plumbing/format/idxfile/idxfile_test.go | 109 ++++++++++++++++ plumbing/format/packfile/decoder.go | 27 ++-- plumbing/format/packfile/index.go | 125 ------------------ 8 files changed, 483 insertions(+), 339 deletions(-) create mode 100644 plumbing/format/idxfile/idxfile_test.go delete mode 100644 plumbing/format/packfile/index.go (limited to 'plumbing') diff --git a/plumbing/format/idxfile/decoder.go b/plumbing/format/idxfile/decoder.go index 45afb1e..25ff88e 100644 --- a/plumbing/format/idxfile/decoder.go +++ b/plumbing/format/idxfile/decoder.go @@ -17,6 +17,11 @@ var ( ErrMalformedIdxFile = errors.New("Malformed IDX file") ) +const ( + fanout = 256 + objectIDLength = 20 +) + // Decoder reads and decodes idx files from an input stream. type Decoder struct { *bufio.Reader @@ -27,13 +32,13 @@ func NewDecoder(r io.Reader) *Decoder { return &Decoder{bufio.NewReader(r)} } -// Decode reads from the stream and decode the content into the Idxfile struct. -func (d *Decoder) Decode(idx *Idxfile) error { +// Decode reads from the stream and decode the content into the MemoryIndex struct. +func (d *Decoder) Decode(idx *MemoryIndex) error { if err := validateHeader(d); err != nil { return err } - flow := []func(*Idxfile, io.Reader) error{ + flow := []func(*MemoryIndex, io.Reader) error{ readVersion, readFanout, readObjectNames, @@ -48,10 +53,6 @@ func (d *Decoder) Decode(idx *Idxfile) error { } } - if !idx.isValid() { - return ErrMalformedIdxFile - } - return nil } @@ -68,7 +69,7 @@ func validateHeader(r io.Reader) error { return nil } -func readVersion(idx *Idxfile, r io.Reader) error { +func readVersion(idx *MemoryIndex, r io.Reader) error { v, err := binary.ReadUint32(r) if err != nil { return err @@ -82,74 +83,92 @@ func readVersion(idx *Idxfile, r io.Reader) error { return nil } -func readFanout(idx *Idxfile, r io.Reader) error { - var err error - for i := 0; i < 255; i++ { - idx.Fanout[i], err = binary.ReadUint32(r) +func readFanout(idx *MemoryIndex, r io.Reader) error { + for k := 0; k < fanout; k++ { + n, err := binary.ReadUint32(r) if err != nil { return err } + + idx.Fanout[k] = n + idx.FanoutMapping[k] = noMapping } - idx.ObjectCount, err = binary.ReadUint32(r) - return err + return nil } -func readObjectNames(idx *Idxfile, r io.Reader) error { - c := int(idx.ObjectCount) - new := make([]Entry, c) - for i := 0; i < c; i++ { - e := &new[i] - if _, err := io.ReadFull(r, e.Hash[:]); err != nil { +func readObjectNames(idx *MemoryIndex, r io.Reader) error { + for k := 0; k < fanout; k++ { + var buckets uint32 + if k == 0 { + buckets = idx.Fanout[k] + } else { + buckets = idx.Fanout[k] - idx.Fanout[k-1] + } + + if buckets == 0 { + continue + } + + if buckets < 0 { + return ErrMalformedIdxFile + } + + idx.FanoutMapping[k] = len(idx.Names) + + nameLen := int(buckets * objectIDLength) + bin := make([]byte, nameLen) + if _, err := io.ReadFull(r, bin); err != nil { return err } - idx.Entries = append(idx.Entries, e) + idx.Names = append(idx.Names, bin) + idx.Offset32 = append(idx.Offset32, make([]byte, buckets*4)) + idx.Crc32 = append(idx.Crc32, make([]byte, buckets*4)) } return nil } -func readCRC32(idx *Idxfile, r io.Reader) error { - c := int(idx.ObjectCount) - for i := 0; i < c; i++ { - if err := binary.Read(r, &idx.Entries[i].CRC32); err != nil { - return err +func readCRC32(idx *MemoryIndex, r io.Reader) error { + for k := 0; k < fanout; k++ { + if pos := idx.FanoutMapping[k]; pos != noMapping { + if _, err := io.ReadFull(r, idx.Crc32[pos]); err != nil { + return err + } } } return nil } -func readOffsets(idx *Idxfile, r io.Reader) error { - c := int(idx.ObjectCount) - - for i := 0; i < c; i++ { - o, err := binary.ReadUint32(r) - if err != nil { - return err +func readOffsets(idx *MemoryIndex, r io.Reader) error { + var o64cnt int + for k := 0; k < fanout; k++ { + if pos := idx.FanoutMapping[k]; pos != noMapping { + if _, err := io.ReadFull(r, idx.Offset32[pos]); err != nil { + return err + } + + for p := 0; p < len(idx.Offset32[pos]); p += 4 { + if idx.Offset32[pos][p]&(byte(1)<<7) > 0 { + o64cnt++ + } + } } - - idx.Entries[i].Offset = uint64(o) } - for i := 0; i < c; i++ { - if idx.Entries[i].Offset <= offsetLimit { - continue - } - - o, err := binary.ReadUint64(r) - if err != nil { + if o64cnt > 0 { + idx.Offset64 = make([]byte, o64cnt*8) + if _, err := io.ReadFull(r, idx.Offset64); err != nil { return err } - - idx.Entries[i].Offset = o } return nil } -func readChecksums(idx *Idxfile, r io.Reader) error { +func readChecksums(idx *MemoryIndex, r io.Reader) error { if _, err := io.ReadFull(r, idx.PackfileChecksum[:]); err != nil { return err } diff --git a/plumbing/format/idxfile/decoder_test.go b/plumbing/format/idxfile/decoder_test.go index 20d6859..b43d7c5 100644 --- a/plumbing/format/idxfile/decoder_test.go +++ b/plumbing/format/idxfile/decoder_test.go @@ -4,11 +4,12 @@ import ( "bytes" "encoding/base64" "fmt" + "io" + "io/ioutil" "testing" + "gopkg.in/src-d/go-git.v4/plumbing" . "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" - "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" - "gopkg.in/src-d/go-git.v4/storage/memory" . "gopkg.in/check.v1" "gopkg.in/src-d/go-git-fixtures.v3" @@ -26,51 +27,34 @@ func (s *IdxfileSuite) TestDecode(c *C) { f := fixtures.Basic().One() d := NewDecoder(f.Idx()) - idx := &Idxfile{} + idx := new(MemoryIndex) err := d.Decode(idx) c.Assert(err, IsNil) - c.Assert(idx.Entries, HasLen, 31) - c.Assert(idx.Entries[0].Hash.String(), Equals, "1669dce138d9b841a518c64b10914d88f5e488ea") - c.Assert(idx.Entries[0].Offset, Equals, uint64(615)) - c.Assert(idx.Entries[0].CRC32, Equals, uint32(3645019190)) + count, _ := idx.Count() + c.Assert(count, Equals, int64(31)) - c.Assert(fmt.Sprintf("%x", idx.IdxChecksum), Equals, "fb794f1ec720b9bc8e43257451bd99c4be6fa1c9") - c.Assert(fmt.Sprintf("%x", idx.PackfileChecksum), Equals, f.PackfileHash.String()) -} - -func (s *IdxfileSuite) TestDecodeCRCs(c *C) { - f := fixtures.Basic().ByTag("ofs-delta").One() - - scanner := packfile.NewScanner(f.Packfile()) - storage := memory.NewStorage() - - pd, err := packfile.NewDecoder(scanner, storage) + hash := plumbing.NewHash("1669dce138d9b841a518c64b10914d88f5e488ea") + ok, err := idx.Contains(hash) c.Assert(err, IsNil) - _, err = pd.Decode() - c.Assert(err, IsNil) - - i := pd.Index().ToIdxFile() - i.Version = VersionSupported + c.Assert(ok, Equals, true) - buf := bytes.NewBuffer(nil) - e := NewEncoder(buf) - _, err = e.Encode(i) + offset, err := idx.FindOffset(hash) c.Assert(err, IsNil) + c.Assert(offset, Equals, int64(615)) - idx := &Idxfile{} - - d := NewDecoder(buf) - err = d.Decode(idx) + crc32, err := idx.FindCRC32(hash) c.Assert(err, IsNil) + c.Assert(crc32, Equals, uint32(3645019190)) - c.Assert(idx.Entries, DeepEquals, i.Entries) + c.Assert(fmt.Sprintf("%x", idx.IdxChecksum), Equals, "fb794f1ec720b9bc8e43257451bd99c4be6fa1c9") + c.Assert(fmt.Sprintf("%x", idx.PackfileChecksum), Equals, f.PackfileHash.String()) } func (s *IdxfileSuite) TestDecode64bitsOffsets(c *C) { f := bytes.NewBufferString(fixtureLarge4GB) - idx := &Idxfile{} + idx := new(MemoryIndex) d := NewDecoder(base64.NewDecoder(base64.StdEncoding, f)) err := d.Decode(idx) @@ -88,29 +72,22 @@ func (s *IdxfileSuite) TestDecode64bitsOffsets(c *C) { "35858be9c6f5914cbe6768489c41eb6809a2bceb": 5924278919, } - for _, e := range idx.Entries { - c.Assert(expected[e.Hash.String()], Equals, e.Offset) - } -} - -func (s *IdxfileSuite) TestDecode64bitsOffsetsIdempotent(c *C) { - f := bytes.NewBufferString(fixtureLarge4GB) - - expected := &Idxfile{} - - d := NewDecoder(base64.NewDecoder(base64.StdEncoding, f)) - err := d.Decode(expected) + iter, err := idx.Entries() c.Assert(err, IsNil) - buf := bytes.NewBuffer(nil) - _, err = NewEncoder(buf).Encode(expected) - c.Assert(err, IsNil) + var entries int + for { + e, err := iter.Next() + if err == io.EOF { + break + } + c.Assert(err, IsNil) + entries++ - idx := &Idxfile{} - err = NewDecoder(buf).Decode(idx) - c.Assert(err, IsNil) + c.Assert(expected[e.Hash.String()], Equals, e.Offset) + } - c.Assert(idx.Entries, DeepEquals, expected.Entries) + c.Assert(entries, Equals, len(expected)) } const fixtureLarge4GB = `/3RPYwAAAAIAAAAAAAAAAAAAAAAAAAABAAAAAQAAAAEAAAABAAAAAQAAAAEAAAABAAAAAQAAAAEA @@ -139,3 +116,30 @@ AAAAAAAMgAAAAQAAAI6AAAACgAAAA4AAAASAAAAFAAAAAV9Qam8AAAABYR1ShwAAAACdxfYxAAAA ANz1Di4AAAABPUnxJAAAAADNxzlGr6vCJpIFz4XaG/fi/f9C9zgQ8ptKSQpfQ1NMJBGTDTxxYGGp ch2xUA== ` + +func BenchmarkDecode(b *testing.B) { + if err := fixtures.Init(); err != nil { + b.Errorf("unexpected error initializing fixtures: %s", err) + } + + f := fixtures.Basic().One() + fixture, err := ioutil.ReadAll(f.Idx()) + if err != nil { + b.Errorf("unexpected error reading idx file: %s", err) + } + + defer func() { + if err := fixtures.Clean(); err != nil { + b.Errorf("unexpected error cleaning fixtures: %s", err) + } + }() + + for i := 0; i < b.N; i++ { + f := bytes.NewBuffer(fixture) + idx := new(MemoryIndex) + d := NewDecoder(f) + if err := d.Decode(idx); err != nil { + b.Errorf("unexpected error decoding: %s", err) + } + } +} diff --git a/plumbing/format/idxfile/encoder.go b/plumbing/format/idxfile/encoder.go index 40abfb8..55df466 100644 --- a/plumbing/format/idxfile/encoder.go +++ b/plumbing/format/idxfile/encoder.go @@ -4,12 +4,11 @@ import ( "crypto/sha1" "hash" "io" - "sort" "gopkg.in/src-d/go-git.v4/utils/binary" ) -// Encoder writes Idxfile structs to an output stream. +// Encoder writes MemoryIndex structs to an output stream. type Encoder struct { io.Writer hash hash.Hash @@ -22,11 +21,9 @@ func NewEncoder(w io.Writer) *Encoder { return &Encoder{mw, h} } -// Encode encodes an Idxfile to the encoder writer. -func (e *Encoder) Encode(idx *Idxfile) (int, error) { - idx.Entries.Sort() - - flow := []func(*Idxfile) (int, error){ +// Encode encodes an MemoryIndex to the encoder writer. +func (e *Encoder) Encode(idx *MemoryIndex) (int, error) { + flow := []func(*MemoryIndex) (int, error){ e.encodeHeader, e.encodeFanout, e.encodeHashes, @@ -48,7 +45,7 @@ func (e *Encoder) Encode(idx *Idxfile) (int, error) { return sz, nil } -func (e *Encoder) encodeHeader(idx *Idxfile) (int, error) { +func (e *Encoder) encodeHeader(idx *MemoryIndex) (int, error) { c, err := e.Write(idxHeader) if err != nil { return c, err @@ -57,75 +54,81 @@ func (e *Encoder) encodeHeader(idx *Idxfile) (int, error) { return c + 4, binary.WriteUint32(e, idx.Version) } -func (e *Encoder) encodeFanout(idx *Idxfile) (int, error) { - fanout := idx.calculateFanout() - for _, c := range fanout { +func (e *Encoder) encodeFanout(idx *MemoryIndex) (int, error) { + for _, c := range idx.Fanout { if err := binary.WriteUint32(e, c); err != nil { return 0, err } } - return 1024, nil + return fanout * 4, nil } -func (e *Encoder) encodeHashes(idx *Idxfile) (int, error) { - sz := 0 - for _, ent := range idx.Entries { - i, err := e.Write(ent.Hash[:]) - sz += i +func (e *Encoder) encodeHashes(idx *MemoryIndex) (int, error) { + var size int + for k := 0; k < fanout; k++ { + pos := idx.FanoutMapping[k] + if pos == noMapping { + continue + } + n, err := e.Write(idx.Names[pos]) if err != nil { - return sz, err + return size, err } + size += n } - - return sz, nil + return size, nil } -func (e *Encoder) encodeCRC32(idx *Idxfile) (int, error) { - sz := 0 - for _, ent := range idx.Entries { - err := binary.Write(e, ent.CRC32) - sz += 4 +func (e *Encoder) encodeCRC32(idx *MemoryIndex) (int, error) { + var size int + for k := 0; k < fanout; k++ { + pos := idx.FanoutMapping[k] + if pos == noMapping { + continue + } + n, err := e.Write(idx.Crc32[pos]) if err != nil { - return sz, err + return size, err } + + size += n } - return sz, nil + return size, nil } -func (e *Encoder) encodeOffsets(idx *Idxfile) (int, error) { - sz := 0 - - var o64bits []uint64 - for _, ent := range idx.Entries { - o := ent.Offset - if o > offsetLimit { - o64bits = append(o64bits, o) - o = offsetLimit + uint64(len(o64bits)) +func (e *Encoder) encodeOffsets(idx *MemoryIndex) (int, error) { + var size int + for k := 0; k < fanout; k++ { + pos := idx.FanoutMapping[k] + if pos == noMapping { + continue } - if err := binary.WriteUint32(e, uint32(o)); err != nil { - return sz, err + n, err := e.Write(idx.Offset32[pos]) + if err != nil { + return size, err } - sz += 4 + size += n } - for _, o := range o64bits { - if err := binary.WriteUint64(e, o); err != nil { - return sz, err + if len(idx.Offset64) > 0 { + n, err := e.Write(idx.Offset64) + if err != nil { + return size, err } - sz += 8 + size += n } - return sz, nil + return size, nil } -func (e *Encoder) encodeChecksums(idx *Idxfile) (int, error) { +func (e *Encoder) encodeChecksums(idx *MemoryIndex) (int, error) { if _, err := e.Write(idx.PackfileChecksum[:]); err != nil { return 0, err } @@ -137,11 +140,3 @@ func (e *Encoder) encodeChecksums(idx *Idxfile) (int, error) { return 40, nil } - -// EntryList implements sort.Interface allowing sorting in increasing order. -type EntryList []*Entry - -func (p EntryList) Len() int { return len(p) } -func (p EntryList) Less(i, j int) bool { return p[i].Hash.String() < p[j].Hash.String() } -func (p EntryList) Swap(i, j int) { p[i], p[j] = p[j], p[i] } -func (p EntryList) Sort() { sort.Sort(p) } diff --git a/plumbing/format/idxfile/encoder_test.go b/plumbing/format/idxfile/encoder_test.go index e5b96b7..e8deeea 100644 --- a/plumbing/format/idxfile/encoder_test.go +++ b/plumbing/format/idxfile/encoder_test.go @@ -4,37 +4,18 @@ import ( "bytes" "io/ioutil" - "gopkg.in/src-d/go-git.v4/plumbing" . "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" . "gopkg.in/check.v1" "gopkg.in/src-d/go-git-fixtures.v3" ) -func (s *IdxfileSuite) TestEncode(c *C) { - expected := &Idxfile{} - expected.Add(plumbing.NewHash("4bfc730165c370df4a012afbb45ba3f9c332c0d4"), 82, 82) - expected.Add(plumbing.NewHash("8fa2238efdae08d83c12ee176fae65ff7c99af46"), 42, 42) - - buf := bytes.NewBuffer(nil) - e := NewEncoder(buf) - _, err := e.Encode(expected) - c.Assert(err, IsNil) - - idx := &Idxfile{} - d := NewDecoder(buf) - err = d.Decode(idx) - c.Assert(err, IsNil) - - c.Assert(idx.Entries, DeepEquals, expected.Entries) -} - func (s *IdxfileSuite) TestDecodeEncode(c *C) { fixtures.ByTag("packfile").Test(c, func(f *fixtures.Fixture) { expected, err := ioutil.ReadAll(f.Idx()) c.Assert(err, IsNil) - idx := &Idxfile{} + idx := new(MemoryIndex) d := NewDecoder(bytes.NewBuffer(expected)) err = d.Decode(idx) c.Assert(err, IsNil) diff --git a/plumbing/format/idxfile/idxfile.go b/plumbing/format/idxfile/idxfile.go index 6b05eaa..b196608 100644 --- a/plumbing/format/idxfile/idxfile.go +++ b/plumbing/format/idxfile/idxfile.go @@ -1,68 +1,222 @@ package idxfile -import "gopkg.in/src-d/go-git.v4/plumbing" +import ( + "bytes" + "io" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/utils/binary" +) const ( // VersionSupported is the only idx version supported. VersionSupported = 2 - offsetLimit = 0x7fffffff + noMapping = -1 ) var ( idxHeader = []byte{255, 't', 'O', 'c'} ) -// Idxfile is the in memory representation of an idx file. -type Idxfile struct { - Version uint32 - Fanout [255]uint32 - ObjectCount uint32 - Entries EntryList +// Index represents an index of a packfile. +type Index interface { + // Contains checks whether the given hash is in the index. + Contains(h plumbing.Hash) (bool, error) + // FindOffset finds the offset in the packfile for the object with + // the given hash. + FindOffset(h plumbing.Hash) (int64, error) + // FindCRC32 finds the CRC32 of the object with the given hash. + FindCRC32(h plumbing.Hash) (uint32, error) + // Count returns the number of entries in the index. + Count() (int64, error) + // Entries returns an iterator to retrieve all index entries. + Entries() (EntryIter, error) +} + +// MemoryIndex is the in memory representation of an idx file. +type MemoryIndex struct { + Version uint32 + Fanout [256]uint32 + // FanoutMapping maps the position in the fanout table to the position + // in the Names, Offset32 and Crc32 slices. This improves the memory + // usage by not needing an array with unnecessary empty slots. + FanoutMapping [256]int + Names [][]byte + Offset32 [][]byte + Crc32 [][]byte + Offset64 []byte PackfileChecksum [20]byte IdxChecksum [20]byte } -func NewIdxfile() *Idxfile { - return &Idxfile{} +var _ Index = (*MemoryIndex)(nil) + +// NewMemoryIndex returns an instance of a new MemoryIndex. +func NewMemoryIndex() *MemoryIndex { + return &MemoryIndex{} } -// Entry is the in memory representation of an object entry in the idx file. -type Entry struct { - Hash plumbing.Hash - CRC32 uint32 - Offset uint64 +func (idx *MemoryIndex) findHashIndex(h plumbing.Hash) int { + k := idx.FanoutMapping[h[0]] + if k == noMapping { + return -1 + } + + data := idx.Names[k] + high := uint64(len(idx.Offset32[k])) >> 2 + if high == 0 { + return -1 + } + + low := uint64(0) + for { + mid := (low + high) >> 1 + offset := mid + (mid << 2) + + cmp := bytes.Compare(h[:], data[offset:offset+objectIDLength]) + if cmp < 0 { + high = mid + } else if cmp == 0 { + return int(mid) + } else { + low = mid + 1 + } + + if low < high { + break + } + } + + return -1 } -// Add adds a new Entry with the given values to the Idxfile. -func (idx *Idxfile) Add(h plumbing.Hash, offset uint64, crc32 uint32) { - idx.Entries = append(idx.Entries, &Entry{ - Hash: h, - Offset: offset, - CRC32: crc32, - }) +// Contains implements the Index interface. +func (idx *MemoryIndex) Contains(h plumbing.Hash) (bool, error) { + i := idx.findHashIndex(h) + return i >= 0, nil } -func (idx *Idxfile) isValid() bool { - fanout := idx.calculateFanout() - for k, c := range idx.Fanout { - if fanout[k] != c { - return false +// FindOffset implements the Index interface. +func (idx *MemoryIndex) FindOffset(h plumbing.Hash) (int64, error) { + k := idx.FanoutMapping[h[0]] + i := idx.findHashIndex(h) + if i < 0 { + return 0, plumbing.ErrObjectNotFound + } + + return idx.getOffset(k, i) +} + +const isO64Mask = uint64(1) << 31 + +func (idx *MemoryIndex) getOffset(firstLevel, secondLevel int) (int64, error) { + offset := secondLevel << 2 + buf := bytes.NewBuffer(idx.Offset32[firstLevel][offset : offset+4]) + ofs, err := binary.ReadUint32(buf) + if err != nil { + return -1, err + } + + if (uint64(ofs) & isO64Mask) != 0 { + offset := 8 * (uint64(ofs) & ^isO64Mask) + buf := bytes.NewBuffer(idx.Offset64[offset : offset+8]) + n, err := binary.ReadUint64(buf) + if err != nil { + return -1, err } + + return int64(n), nil } - return true + return int64(ofs), nil } -func (idx *Idxfile) calculateFanout() [256]uint32 { - fanout := [256]uint32{} - for _, e := range idx.Entries { - fanout[e.Hash[0]]++ +// FindCRC32 implements the Index interface. +func (idx *MemoryIndex) FindCRC32(h plumbing.Hash) (uint32, error) { + k := idx.FanoutMapping[h[0]] + i := idx.findHashIndex(h) + if i < 0 { + return 0, plumbing.ErrObjectNotFound } - for i := 1; i < 256; i++ { - fanout[i] += fanout[i-1] + return idx.getCrc32(k, i) +} + +func (idx *MemoryIndex) getCrc32(firstLevel, secondLevel int) (uint32, error) { + offset := secondLevel << 2 + buf := bytes.NewBuffer(idx.Crc32[firstLevel][offset : offset+4]) + return binary.ReadUint32(buf) +} + +// Count implements the Index interface. +func (idx *MemoryIndex) Count() (int64, error) { + return int64(idx.Fanout[fanout-1]), nil +} + +// Entries implements the Index interface. +func (idx *MemoryIndex) Entries() (EntryIter, error) { + return &idxfileEntryIter{idx, 0, 0, 0}, nil +} + +// EntryIter is an iterator that will return the entries in a packfile index. +type EntryIter interface { + // Next returns the next entry in the packfile index. + Next() (*Entry, error) + // Close closes the iterator. + Close() error +} + +type idxfileEntryIter struct { + idx *MemoryIndex + total int + firstLevel, secondLevel int +} + +func (i *idxfileEntryIter) Next() (*Entry, error) { + for { + if i.firstLevel >= fanout { + return nil, io.EOF + } + + if i.total >= int(i.idx.Fanout[i.firstLevel]) { + i.firstLevel++ + i.secondLevel = 0 + continue + } + + entry := new(Entry) + ofs := i.secondLevel * objectIDLength + copy(entry.Hash[:], i.idx.Names[i.idx.FanoutMapping[i.firstLevel]][ofs:]) + + pos := i.idx.FanoutMapping[entry.Hash[0]] + + offset, err := i.idx.getOffset(pos, i.secondLevel) + if err != nil { + return nil, err + } + entry.Offset = uint64(offset) + + entry.CRC32, err = i.idx.getCrc32(pos, i.secondLevel) + if err != nil { + return nil, err + } + + i.secondLevel++ + i.total++ + + return entry, nil } +} - return fanout +func (i *idxfileEntryIter) Close() error { + i.firstLevel = fanout + return nil +} + +// Entry is the in memory representation of an object entry in the idx file. +type Entry struct { + Hash plumbing.Hash + CRC32 uint32 + Offset uint64 } diff --git a/plumbing/format/idxfile/idxfile_test.go b/plumbing/format/idxfile/idxfile_test.go new file mode 100644 index 0000000..f42a419 --- /dev/null +++ b/plumbing/format/idxfile/idxfile_test.go @@ -0,0 +1,109 @@ +package idxfile_test + +import ( + "bytes" + "encoding/base64" + "io" + "testing" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" +) + +func BenchmarkFindOffset(b *testing.B) { + idx := fixtureIndex(b) + + for i := 0; i < b.N; i++ { + for _, h := range fixtureHashes { + _, err := idx.FindOffset(h) + if err != nil { + b.Fatalf("error getting offset: %s", err) + } + } + } +} + +func BenchmarkFindCRC32(b *testing.B) { + idx := fixtureIndex(b) + + for i := 0; i < b.N; i++ { + for _, h := range fixtureHashes { + _, err := idx.FindCRC32(h) + if err != nil { + b.Fatalf("error getting crc32: %s", err) + } + } + } +} + +func BenchmarkContains(b *testing.B) { + idx := fixtureIndex(b) + + for i := 0; i < b.N; i++ { + for _, h := range fixtureHashes { + ok, err := idx.Contains(h) + if err != nil { + b.Fatalf("error checking if hash is in index: %s", err) + } + + if !ok { + b.Error("expected hash to be in index") + } + } + } +} + +func BenchmarkEntries(b *testing.B) { + idx := fixtureIndex(b) + + for i := 0; i < b.N; i++ { + iter, err := idx.Entries() + if err != nil { + b.Fatalf("unexpected error getting entries: %s", err) + } + + var entries int + for { + _, err := iter.Next() + if err != nil { + if err == io.EOF { + break + } + + b.Errorf("unexpected error getting entry: %s", err) + } + + entries++ + } + + if entries != len(fixtureHashes) { + b.Errorf("expecting entries to be %d, got %d", len(fixtureHashes), entries) + } + } +} + +var fixtureHashes = []plumbing.Hash{ + plumbing.NewHash("303953e5aa461c203a324821bc1717f9b4fff895"), + plumbing.NewHash("5296768e3d9f661387ccbff18c4dea6c997fd78c"), + plumbing.NewHash("03fc8d58d44267274edef4585eaeeb445879d33f"), + plumbing.NewHash("8f3ceb4ea4cb9e4a0f751795eb41c9a4f07be772"), + plumbing.NewHash("e0d1d625010087f79c9e01ad9d8f95e1628dda02"), + plumbing.NewHash("90eba326cdc4d1d61c5ad25224ccbf08731dd041"), + plumbing.NewHash("bab53055add7bc35882758a922c54a874d6b1272"), + plumbing.NewHash("1b8995f51987d8a449ca5ea4356595102dc2fbd4"), + plumbing.NewHash("35858be9c6f5914cbe6768489c41eb6809a2bceb"), +} + +func fixtureIndex(t testing.TB) *idxfile.MemoryIndex { + f := bytes.NewBufferString(fixtureLarge4GB) + + idx := new(idxfile.MemoryIndex) + + d := idxfile.NewDecoder(base64.NewDecoder(base64.StdEncoding, f)) + err := d.Decode(idx) + if err != nil { + t.Fatalf("unexpected error decoding index: %s", err) + } + + return idx +} diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go index f706e5d..765401f 100644 --- a/plumbing/format/packfile/decoder.go +++ b/plumbing/format/packfile/decoder.go @@ -5,6 +5,7 @@ import ( "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/cache" + "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" "gopkg.in/src-d/go-git.v4/plumbing/storer" ) @@ -63,7 +64,7 @@ type Decoder struct { // hasBuiltIndex indicates if the index is fully built or not. If it is not, // will be built incrementally while decoding. hasBuiltIndex bool - idx *Index + idx idxfile.Index offsetToType map[int64]plumbing.ObjectType decoderType plumbing.ObjectType @@ -117,7 +118,7 @@ func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer, o: o, deltaBaseCache: cacheObject, - idx: NewIndex(0), + idx: idxfile.NewMemoryIndex(), offsetToType: make(map[int64]plumbing.ObjectType), decoderType: t, }, nil @@ -150,7 +151,8 @@ func (d *Decoder) doDecode() error { } if !d.hasBuiltIndex { - d.idx = NewIndex(int(count)) + // TODO: MemoryIndex is not writable, change to something else + d.idx = idxfile.NewMemoryIndex() } defer func() { d.hasBuiltIndex = true }() @@ -284,12 +286,12 @@ func (d *Decoder) ofsDeltaType(offset int64) (plumbing.ObjectType, error) { } func (d *Decoder) refDeltaType(ref plumbing.Hash) (plumbing.ObjectType, error) { - e, ok := d.idx.LookupHash(ref) - if !ok { + offset, err := d.idx.FindOffset(ref) + if err != nil { return plumbing.InvalidObject, plumbing.ErrObjectNotFound } - return d.ofsDeltaType(int64(e.Offset)) + return d.ofsDeltaType(offset) } func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error) { @@ -314,9 +316,14 @@ func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error return obj, err } + // TODO: remove this + _ = crc + + /* Add is no longer available if !d.hasBuiltIndex { d.idx.Add(obj.Hash(), uint64(h.Offset), crc) } + */ return obj, nil } @@ -448,8 +455,8 @@ func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) { func (d *Decoder) recallByHash(h plumbing.Hash) (plumbing.EncodedObject, error) { if d.s.IsSeekable { - if e, ok := d.idx.LookupHash(h); ok { - return d.DecodeObjectAt(int64(e.Offset)) + if offset, err := d.idx.FindOffset(h); err != nil { + return d.DecodeObjectAt(offset) } } @@ -475,7 +482,7 @@ func (d *Decoder) recallByHashNonSeekable(h plumbing.Hash) (obj plumbing.Encoded // SetIndex sets an index for the packfile. It is recommended to set this. // The index might be read from a file or reused from a previous Decoder usage // (see Index function). -func (d *Decoder) SetIndex(idx *Index) { +func (d *Decoder) SetIndex(idx idxfile.Index) { d.hasBuiltIndex = true d.idx = idx } @@ -484,7 +491,7 @@ func (d *Decoder) SetIndex(idx *Index) { // Index will return it. Otherwise, it will return an index that is built while // decoding. If neither SetIndex was called with a full index or Decode called // for the whole packfile, then the returned index will be incomplete. -func (d *Decoder) Index() *Index { +func (d *Decoder) Index() idxfile.Index { return d.idx } diff --git a/plumbing/format/packfile/index.go b/plumbing/format/packfile/index.go deleted file mode 100644 index 021b2d1..0000000 --- a/plumbing/format/packfile/index.go +++ /dev/null @@ -1,125 +0,0 @@ -package packfile - -import ( - "sort" - - "gopkg.in/src-d/go-git.v4/plumbing" - "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" -) - -// Index is an in-memory representation of a packfile index. -// This uses idxfile.Idxfile under the hood to obtain indexes from .idx files -// or to store them. -type Index struct { - byHash map[plumbing.Hash]*idxfile.Entry - byOffset []*idxfile.Entry // sorted by their offset -} - -// NewIndex creates a new empty index with the given size. Size is a hint and -// can be 0. It is recommended to set it to the number of objects to be indexed -// if it is known beforehand (e.g. reading from a packfile). -func NewIndex(size int) *Index { - return &Index{ - byHash: make(map[plumbing.Hash]*idxfile.Entry, size), - byOffset: make([]*idxfile.Entry, 0, size), - } -} - -// NewIndexFromIdxFile creates a new Index from an idxfile.IdxFile. -func NewIndexFromIdxFile(idxf *idxfile.Idxfile) *Index { - idx := &Index{ - byHash: make(map[plumbing.Hash]*idxfile.Entry, idxf.ObjectCount), - byOffset: make([]*idxfile.Entry, 0, idxf.ObjectCount), - } - sorted := true - for i, e := range idxf.Entries { - idx.addUnsorted(e) - if i > 0 && idx.byOffset[i-1].Offset >= e.Offset { - sorted = false - } - } - - // If the idxfile was loaded from a regular packfile index - // then it will already be in offset order, in which case we - // can avoid doing a relatively expensive idempotent sort. - if !sorted { - sort.Sort(orderByOffset(idx.byOffset)) - } - - return idx -} - -// orderByOffset is a sort.Interface adapter that arranges -// a slice of entries by their offset. -type orderByOffset []*idxfile.Entry - -func (o orderByOffset) Len() int { return len(o) } -func (o orderByOffset) Less(i, j int) bool { return o[i].Offset < o[j].Offset } -func (o orderByOffset) Swap(i, j int) { o[i], o[j] = o[j], o[i] } - -// Add adds a new Entry with the given values to the index. -func (idx *Index) Add(h plumbing.Hash, offset uint64, crc32 uint32) { - e := &idxfile.Entry{ - Hash: h, - Offset: offset, - CRC32: crc32, - } - idx.byHash[e.Hash] = e - - // Find the right position in byOffset. - // Look for the first position whose offset is *greater* than e.Offset. - i := sort.Search(len(idx.byOffset), func(i int) bool { - return idx.byOffset[i].Offset > offset - }) - if i == len(idx.byOffset) { - // Simple case: add it to the end. - idx.byOffset = append(idx.byOffset, e) - return - } - // Harder case: shift existing entries down by one to make room. - // Append a nil entry first so we can use existing capacity in case - // the index was carefully preallocated. - idx.byOffset = append(idx.byOffset, nil) - copy(idx.byOffset[i+1:], idx.byOffset[i:len(idx.byOffset)-1]) - idx.byOffset[i] = e -} - -func (idx *Index) addUnsorted(e *idxfile.Entry) { - idx.byHash[e.Hash] = e - idx.byOffset = append(idx.byOffset, e) -} - -// LookupHash looks an entry up by its hash. An idxfile.Entry is returned and -// a bool, which is true if it was found or false if it wasn't. -func (idx *Index) LookupHash(h plumbing.Hash) (*idxfile.Entry, bool) { - e, ok := idx.byHash[h] - return e, ok -} - -// LookupHash looks an entry up by its offset in the packfile. An idxfile.Entry -// is returned and a bool, which is true if it was found or false if it wasn't. -func (idx *Index) LookupOffset(offset uint64) (*idxfile.Entry, bool) { - i := sort.Search(len(idx.byOffset), func(i int) bool { - return idx.byOffset[i].Offset >= offset - }) - if i >= len(idx.byOffset) || idx.byOffset[i].Offset != offset { - return nil, false // not present - } - return idx.byOffset[i], true -} - -// Size returns the number of entries in the index. -func (idx *Index) Size() int { - return len(idx.byHash) -} - -// ToIdxFile converts the index to an idxfile.Idxfile, which can then be used -// to serialize. -func (idx *Index) ToIdxFile() *idxfile.Idxfile { - idxf := idxfile.NewIdxfile() - for _, e := range idx.byHash { - idxf.Entries = append(idxf.Entries, e) - } - - return idxf -} -- cgit From da5677f5ba3970d585d5955b15a6a1c3c262c07b Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Thu, 19 Jul 2018 17:05:45 +0200 Subject: plumbing/packfile: add new packfile parser Signed-off-by: Javi Fontan --- plumbing/format/packfile/parser.go | 359 ++++++++++++++++++++++++++++++++ plumbing/format/packfile/parser_test.go | 139 +++++++++++++ 2 files changed, 498 insertions(+) create mode 100644 plumbing/format/packfile/parser.go create mode 100644 plumbing/format/packfile/parser_test.go (limited to 'plumbing') diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go new file mode 100644 index 0000000..460fc3f --- /dev/null +++ b/plumbing/format/packfile/parser.go @@ -0,0 +1,359 @@ +package packfile + +import ( + "bytes" + "errors" + "io" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" +) + +// Observer interface is implemented by index encoders. +type Observer interface { + // OnHeader is called when a new packfile is opened. + OnHeader(count uint32) error + // OnInflatedObjectHeader is called for each object header read. + OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error + // OnInflatedObjectContent is called for each decoded object. + OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32) error + // OnFooter is called when decoding is done. + OnFooter(h plumbing.Hash) error +} + +// Parser decodes a packfile and calls any observer associated to it. Is used +// to generate indexes. +type Parser struct { + scanner *Scanner + count uint32 + oi []*objectInfo + oiByHash map[plumbing.Hash]*objectInfo + oiByOffset map[int64]*objectInfo + hashOffset map[plumbing.Hash]int64 + checksum plumbing.Hash + + cache *cache.ObjectLRU + + ob []Observer +} + +// NewParser creates a new Parser struct. +func NewParser(scanner *Scanner, ob ...Observer) *Parser { + return &Parser{ + scanner: scanner, + ob: ob, + count: 0, + cache: cache.NewObjectLRUDefault(), + } +} + +// Parse start decoding phase of the packfile. +func (p *Parser) Parse() (plumbing.Hash, error) { + err := p.init() + if err != nil { + return plumbing.ZeroHash, err + } + + err = p.firstPass() + if err != nil { + return plumbing.ZeroHash, err + } + + err = p.resolveDeltas() + if err != nil { + return plumbing.ZeroHash, err + } + + for _, o := range p.ob { + err := o.OnFooter(p.checksum) + if err != nil { + return plumbing.ZeroHash, err + } + } + + return p.checksum, nil +} + +func (p *Parser) init() error { + _, c, err := p.scanner.Header() + if err != nil { + return err + } + + for _, o := range p.ob { + err := o.OnHeader(c) + if err != nil { + return err + } + } + + p.count = c + p.oiByHash = make(map[plumbing.Hash]*objectInfo, p.count) + p.oiByOffset = make(map[int64]*objectInfo, p.count) + p.oi = make([]*objectInfo, p.count) + + return nil +} + +func (p *Parser) firstPass() error { + buf := new(bytes.Buffer) + + for i := uint32(0); i < p.count; i++ { + buf.Truncate(0) + + oh, err := p.scanner.NextObjectHeader() + if err != nil { + return err + } + + delta := false + var ota *objectInfo + switch t := oh.Type; t { + case plumbing.OFSDeltaObject, plumbing.REFDeltaObject: + delta = true + + var parent *objectInfo + var ok bool + + if t == plumbing.OFSDeltaObject { + parent, ok = p.oiByOffset[oh.OffsetReference] + } else { + parent, ok = p.oiByHash[oh.Reference] + } + + if !ok { + // TODO improve error + return errors.New("Reference delta not found") + } + + ota = newDeltaObject(oh.Offset, oh.Length, t, parent) + + parent.Children = append(parent.Children, ota) + default: + ota = newBaseObject(oh.Offset, oh.Length, t) + } + + size, crc, err := p.scanner.NextObject(buf) + if err != nil { + return err + } + + ota.Crc32 = crc + ota.PackSize = size + ota.Length = oh.Length + + if !delta { + ota.Write(buf.Bytes()) + ota.SHA1 = ota.Sum() + } + + p.oiByOffset[oh.Offset] = ota + p.oiByHash[oh.Reference] = ota + + p.oi[i] = ota + } + + checksum, err := p.scanner.Checksum() + p.checksum = checksum + + if err == io.EOF { + return nil + } + + return err +} + +func (p *Parser) resolveDeltas() error { + for _, obj := range p.oi { + for _, o := range p.ob { + err := o.OnInflatedObjectHeader(obj.Type, obj.Length, obj.Offset) + if err != nil { + return err + } + + err = o.OnInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32) + if err != nil { + return err + } + } + + if !obj.IsDelta() && len(obj.Children) > 0 { + var err error + base, err := p.get(obj) + if err != nil { + return err + } + + for _, child := range obj.Children { + _, err = p.resolveObject(child, base) + if err != nil { + return err + } + } + } + } + + return nil +} + +func (p *Parser) get(o *objectInfo) ([]byte, error) { + e, ok := p.cache.Get(o.SHA1) + if ok { + r, err := e.Reader() + if err != nil { + return nil, err + } + + buf := make([]byte, e.Size()) + _, err = r.Read(buf) + if err != nil { + return nil, err + } + + return buf, nil + } + + // Read from disk + if o.DiskType.IsDelta() { + base, err := p.get(o.Parent) + if err != nil { + return nil, err + } + + data, err := p.resolveObject(o, base) + if err != nil { + return nil, err + } + + if len(o.Children) > 0 { + m := &plumbing.MemoryObject{} + m.Write(data) + m.SetType(o.Type) + m.SetSize(o.Size()) + p.cache.Put(m) + } + + return data, nil + } + + data, err := p.readData(o) + if err != nil { + return nil, err + } + + if len(o.Children) > 0 { + m := &plumbing.MemoryObject{} + m.Write(data) + m.SetType(o.Type) + m.SetSize(o.Size()) + p.cache.Put(m) + } + + return data, nil +} + +func (p *Parser) resolveObject( + o *objectInfo, + base []byte) ([]byte, error) { + + if !o.DiskType.IsDelta() { + return nil, nil + } + + data, err := p.readData(o) + if err != nil { + return nil, err + } + + data, err = applyPatchBase(o, data, base) + if err != nil { + return nil, err + } + + return data, nil +} + +func (p *Parser) readData(o *objectInfo) ([]byte, error) { + buf := new(bytes.Buffer) + + // TODO: skip header. Header size can be calculated with the offset of the + // next offset in the first pass. + p.scanner.SeekFromStart(o.Offset) + _, err := p.scanner.NextObjectHeader() + if err != nil { + return nil, err + } + + buf.Truncate(0) + + _, _, err = p.scanner.NextObject(buf) + if err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) { + patched, err := PatchDelta(base, data) + if err != nil { + return nil, err + } + + ota.Type = ota.Parent.Type + hash := plumbing.ComputeHash(ota.Type, patched) + + ota.SHA1 = hash + + return patched, nil +} + +type objectInfo struct { + plumbing.Hasher + + Offset int64 + Length int64 + PackSize int64 + Type plumbing.ObjectType + DiskType plumbing.ObjectType + + Crc32 uint32 + + Parent *objectInfo + Children []*objectInfo + SHA1 plumbing.Hash +} + +func newBaseObject(offset, length int64, t plumbing.ObjectType) *objectInfo { + return newDeltaObject(offset, length, t, nil) +} + +func newDeltaObject( + offset, length int64, + t plumbing.ObjectType, + parent *objectInfo, +) *objectInfo { + children := make([]*objectInfo, 0) + + obj := &objectInfo{ + Hasher: plumbing.NewHasher(t, length), + Offset: offset, + Length: length, + PackSize: 0, + Type: t, + DiskType: t, + Crc32: 0, + Parent: parent, + Children: children, + } + + return obj +} + +func (o *objectInfo) IsDelta() bool { + return o.Type.IsDelta() +} + +func (o *objectInfo) Size() int64 { + return o.Length +} diff --git a/plumbing/format/packfile/parser_test.go b/plumbing/format/packfile/parser_test.go new file mode 100644 index 0000000..87a8804 --- /dev/null +++ b/plumbing/format/packfile/parser_test.go @@ -0,0 +1,139 @@ +package packfile_test + +import ( + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" + + . "gopkg.in/check.v1" + "gopkg.in/src-d/go-git-fixtures.v3" +) + +type ParserSuite struct { + fixtures.Suite +} + +var _ = Suite(&ParserSuite{}) + +func (s *ParserSuite) TestParserHashes(c *C) { + f := fixtures.Basic().One() + scanner := packfile.NewScanner(f.Packfile()) + + obs := new(testObserver) + parser := packfile.NewParser(scanner, obs) + + ch, err := parser.Parse() + c.Assert(err, IsNil) + + checksum := "a3fed42da1e8189a077c0e6846c040dcf73fc9dd" + c.Assert(ch.String(), Equals, checksum) + + c.Assert(obs.checksum, Equals, checksum) + c.Assert(int(obs.count), Equals, int(31)) + + commit := plumbing.CommitObject + blob := plumbing.BlobObject + tree := plumbing.TreeObject + + objs := []observerObject{ + {"e8d3ffab552895c19b9fcf7aa264d277cde33881", commit, 254, 12, 0xaa07ba4b}, + {"6ecf0ef2c2dffb796033e5a02219af86ec6584e5", commit, 93, 186, 0xf706df58}, + {"918c48b83bd081e863dbe1b80f8998f058cd8294", commit, 242, 286, 0x12438846}, + {"af2d6a6954d532f8ffb47615169c8fdf9d383a1a", commit, 242, 449, 0x2905a38c}, + {"1669dce138d9b841a518c64b10914d88f5e488ea", commit, 333, 615, 0xd9429436}, + {"a5b8b09e2f8fcb0bb99d3ccb0958157b40890d69", commit, 332, 838, 0xbecfde4e}, + {"35e85108805c84807bc66a02d91535e1e24b38b9", commit, 244, 1063, 0x780e4b3e}, + {"b8e471f58bcbca63b07bda20e428190409c2db47", commit, 243, 1230, 0xdc18344f}, + {"b029517f6300c2da0f4b651b8642506cd6aaf45d", commit, 187, 1392, 0xcf4e4280}, + {"32858aad3c383ed1ff0a0f9bdf231d54a00c9e88", blob, 189, 1524, 0x1f08118a}, + {"d3ff53e0564a9f87d8e84b6e28e5060e517008aa", blob, 18, 1685, 0xafded7b8}, + {"c192bd6a24ea1ab01d78686e417c8bdc7c3d197f", blob, 1072, 1713, 0xcc1428ed}, + {"d5c0f4ab811897cadf03aec358ae60d21f91c50d", blob, 76110, 2351, 0x1631d22f}, + {"880cd14280f4b9b6ed3986d6671f907d7cc2a198", blob, 2780, 78050, 0xbfff5850}, + {"49c6bb89b17060d7b4deacb7b338fcc6ea2352a9", blob, 217848, 78882, 0xd108e1d8}, + {"c8f1d8c61f9da76f4cb49fd86322b6e685dba956", blob, 706, 80725, 0x8e97ba25}, + {"9a48f23120e880dfbe41f7c9b7b708e9ee62a492", blob, 11488, 80998, 0x7316ff70}, + {"9dea2395f5403188298c1dabe8bdafe562c491e3", blob, 78, 84032, 0xdb4fce56}, + {"dbd3641b371024f44d0e469a9c8f5457b0660de1", tree, 272, 84115, 0x901cce2c}, + {"a8d315b2b1c615d43042c3a62402b8a54288cf5c", tree, 43, 84375, 0xec4552b0}, + {"a39771a7651f97faf5c72e08224d857fc35133db", tree, 38, 84430, 0x847905bf}, + {"5a877e6a906a2743ad6e45d99c1793642aaf8eda", tree, 75, 84479, 0x3689459a}, + {"586af567d0bb5e771e49bdd9434f5e0fb76d25fa", tree, 38, 84559, 0xe67af94a}, + {"cf4aa3b38974fb7d81f367c0830f7d78d65ab86b", tree, 34, 84608, 0xc2314a2e}, + {"7e59600739c96546163833214c36459e324bad0a", blob, 9, 84653, 0xcd987848}, + {"fb72698cab7617ac416264415f13224dfd7a165e", tree, 6, 84671, 0x8a853a6d}, + {"4d081c50e250fa32ea8b1313cf8bb7c2ad7627fd", tree, 9, 84688, 0x70c6518}, + {"eba74343e2f15d62adedfd8c883ee0262b5c8021", tree, 6, 84708, 0x4f4108e2}, + {"c2d30fa8ef288618f65f6eed6e168e0d514886f4", tree, 5, 84725, 0xd6fe09e9}, + {"8dcef98b1d52143e1e2dbc458ffe38f925786bf2", tree, 8, 84741, 0xf07a2804}, + {"aa9b383c260e1d05fbbf6b30a02914555e20c725", tree, 4, 84760, 0x1d75d6be}, + } + + c.Assert(obs.objects, DeepEquals, objs) +} + +type observerObject struct { + hash string + otype plumbing.ObjectType + size int64 + offset int64 + crc uint32 +} + +type testObserver struct { + count uint32 + checksum string + objects []observerObject + pos map[int64]int +} + +func (t *testObserver) OnHeader(count uint32) error { + t.count = count + t.pos = make(map[int64]int, count) + return nil +} + +func (t *testObserver) OnInflatedObjectHeader(otype plumbing.ObjectType, objSize int64, pos int64) error { + o := t.get(pos) + o.otype = otype + o.size = objSize + o.offset = pos + + t.put(pos, o) + + return nil +} + +func (t *testObserver) OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32) error { + o := t.get(pos) + o.hash = h.String() + o.crc = crc + + t.put(pos, o) + + return nil +} + +func (t *testObserver) OnFooter(h plumbing.Hash) error { + t.checksum = h.String() + return nil +} + +func (t *testObserver) get(pos int64) observerObject { + i, ok := t.pos[pos] + if ok { + return t.objects[i] + } + + return observerObject{} +} + +func (t *testObserver) put(pos int64, o observerObject) { + i, ok := t.pos[pos] + if ok { + t.objects[i] = o + return + } + + t.pos[pos] = len(t.objects) + t.objects = append(t.objects, o) +} -- cgit From ce91d71f96097ede2bb77d2af444aee6fff73183 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Thu, 19 Jul 2018 23:25:14 +0200 Subject: plumbing/packfile: disable lookup by offset In one case it disables the cache and the other disables lookup when the scanner is not seekable. Could be added back later. Signed-off-by: Javi Fontan --- plumbing/format/packfile/decoder.go | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'plumbing') diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go index 765401f..9bfd69b 100644 --- a/plumbing/format/packfile/decoder.go +++ b/plumbing/format/packfile/decoder.go @@ -403,12 +403,13 @@ func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset i return 0, err } - e, ok := d.idx.LookupOffset(uint64(offset)) - var base plumbing.EncodedObject - if ok { - base, ok = d.cacheGet(e.Hash) - } + // e, ok := d.idx.LookupOffset(uint64(offset)) + // if ok { + // base, ok = d.cacheGet(e.Hash) + // } + var base plumbing.EncodedObject + ok := false if !ok { base, err = d.recallByOffset(offset) if err != nil { @@ -446,9 +447,9 @@ func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) { return d.DecodeObjectAt(o) } - if e, ok := d.idx.LookupOffset(uint64(o)); ok { - return d.recallByHashNonSeekable(e.Hash) - } + // if e, ok := d.idx.LookupOffset(uint64(o)); ok { + // return d.recallByHashNonSeekable(e.Hash) + // } return nil, plumbing.ErrObjectNotFound } -- cgit From 355cfc3df3a64d1bd438e0e17e1c4ba21350badf Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Thu, 19 Jul 2018 23:27:16 +0200 Subject: plumbing: idxfile, add idxfile.Writer with Observer interface It's still not complete: * 64 bit offsets * IdxChecksum Signed-off-by: Javi Fontan --- plumbing/format/idxfile/writer.go | 132 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 plumbing/format/idxfile/writer.go (limited to 'plumbing') diff --git a/plumbing/format/idxfile/writer.go b/plumbing/format/idxfile/writer.go new file mode 100644 index 0000000..aac68b5 --- /dev/null +++ b/plumbing/format/idxfile/writer.go @@ -0,0 +1,132 @@ +package idxfile + +import ( + "bytes" + "math" + "sort" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/utils/binary" +) + +type object struct { + hash plumbing.Hash + offset int64 + crc uint32 +} + +type objects []object + +// Writer implements a packfile Observer interface and is used to generate +// indexes. +type Writer struct { + count uint32 + checksum plumbing.Hash + objects objects +} + +// Create index returns a filled MemoryIndex with the information filled by +// the observer callbacks. +func (w *Writer) CreateIndex() (*MemoryIndex, error) { + idx := new(MemoryIndex) + sort.Sort(w.objects) + + // unmap all fans by default + for i := range idx.FanoutMapping { + idx.FanoutMapping[i] = noMapping + } + + buf := new(bytes.Buffer) + + last := -1 + bucket := -1 + for i, o := range w.objects { + fan := o.hash[0] + + // fill the gaps between fans + for j := last + 1; j < int(fan); j++ { + idx.Fanout[j] = uint32(i) + } + + // update the number of objects for this position + idx.Fanout[fan] = uint32(i + 1) + + // we move from one bucket to another, update counters and allocate + // memory + if last != int(fan) { + bucket++ + idx.FanoutMapping[fan] = bucket + last = int(fan) + + idx.Names = append(idx.Names, make([]byte, 0)) + idx.Offset32 = append(idx.Offset32, make([]byte, 0)) + idx.Crc32 = append(idx.Crc32, make([]byte, 0)) + } + + idx.Names[bucket] = append(idx.Names[bucket], o.hash[:]...) + + // TODO: implement 64 bit offsets + if o.offset > math.MaxInt32 { + panic("64 bit offsets not implemented") + } + + buf.Truncate(0) + binary.WriteUint32(buf, uint32(o.offset)) + idx.Offset32[bucket] = append(idx.Offset32[bucket], buf.Bytes()...) + + buf.Truncate(0) + binary.WriteUint32(buf, uint32(o.crc)) + idx.Crc32[bucket] = append(idx.Crc32[bucket], buf.Bytes()...) + } + + for j := last + 1; j < 256; j++ { + idx.Fanout[j] = uint32(len(w.objects)) + } + + idx.PackfileChecksum = w.checksum + // TODO: fill IdxChecksum + + return idx, nil +} + +// Add appends new object data. +func (w *Writer) Add(h plumbing.Hash, pos int64, crc uint32) { + w.objects = append(w.objects, object{h, pos, crc}) +} + +// OnHeader implements packfile.Observer interface. +func (w *Writer) OnHeader(count uint32) error { + w.count = count + w.objects = make(objects, 0, count) + return nil +} + +// OnInflatedObjectHeader implements packfile.Observer interface. +func (w *Writer) OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error { + return nil +} + +// OnInflatedObjectContent implements packfile.Observer interface. +func (w *Writer) OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32) error { + w.Add(h, pos, crc) + return nil +} + +// OnFooter implements packfile.Observer interface. +func (w *Writer) OnFooter(h plumbing.Hash) error { + w.checksum = h + return nil +} + +func (o objects) Len() int { + return len(o) +} + +func (o objects) Less(i int, j int) bool { + cmp := bytes.Compare(o[i].hash[:], o[j].hash[:]) + return cmp < 0 +} + +func (o objects) Swap(i int, j int) { + o[i], o[j] = o[j], o[i] +} -- cgit From 4e3765aef344eae2fbcd977fefd66b6571638d59 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Fri, 20 Jul 2018 12:22:55 +0200 Subject: plumbing/idxfile: use Entry to hold object data Signed-off-by: Javi Fontan --- plumbing/format/idxfile/writer.go | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) (limited to 'plumbing') diff --git a/plumbing/format/idxfile/writer.go b/plumbing/format/idxfile/writer.go index aac68b5..3c5a00e 100644 --- a/plumbing/format/idxfile/writer.go +++ b/plumbing/format/idxfile/writer.go @@ -9,13 +9,8 @@ import ( "gopkg.in/src-d/go-git.v4/utils/binary" ) -type object struct { - hash plumbing.Hash - offset int64 - crc uint32 -} - -type objects []object +// objects implements sort.Interface and uses hash as sorting key. +type objects []Entry // Writer implements a packfile Observer interface and is used to generate // indexes. @@ -41,7 +36,7 @@ func (w *Writer) CreateIndex() (*MemoryIndex, error) { last := -1 bucket := -1 for i, o := range w.objects { - fan := o.hash[0] + fan := o.Hash[0] // fill the gaps between fans for j := last + 1; j < int(fan); j++ { @@ -63,19 +58,19 @@ func (w *Writer) CreateIndex() (*MemoryIndex, error) { idx.Crc32 = append(idx.Crc32, make([]byte, 0)) } - idx.Names[bucket] = append(idx.Names[bucket], o.hash[:]...) + idx.Names[bucket] = append(idx.Names[bucket], o.Hash[:]...) // TODO: implement 64 bit offsets - if o.offset > math.MaxInt32 { + if o.Offset > math.MaxInt32 { panic("64 bit offsets not implemented") } buf.Truncate(0) - binary.WriteUint32(buf, uint32(o.offset)) + binary.WriteUint32(buf, uint32(o.Offset)) idx.Offset32[bucket] = append(idx.Offset32[bucket], buf.Bytes()...) buf.Truncate(0) - binary.WriteUint32(buf, uint32(o.crc)) + binary.WriteUint32(buf, uint32(o.CRC32)) idx.Crc32[bucket] = append(idx.Crc32[bucket], buf.Bytes()...) } @@ -90,8 +85,8 @@ func (w *Writer) CreateIndex() (*MemoryIndex, error) { } // Add appends new object data. -func (w *Writer) Add(h plumbing.Hash, pos int64, crc uint32) { - w.objects = append(w.objects, object{h, pos, crc}) +func (w *Writer) Add(h plumbing.Hash, pos uint64, crc uint32) { + w.objects = append(w.objects, Entry{h, crc, pos}) } // OnHeader implements packfile.Observer interface. @@ -108,7 +103,7 @@ func (w *Writer) OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, po // OnInflatedObjectContent implements packfile.Observer interface. func (w *Writer) OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32) error { - w.Add(h, pos, crc) + w.Add(h, uint64(pos), crc) return nil } @@ -123,7 +118,7 @@ func (o objects) Len() int { } func (o objects) Less(i int, j int) bool { - cmp := bytes.Compare(o[i].hash[:], o[j].hash[:]) + cmp := bytes.Compare(o[i].Hash[:], o[j].Hash[:]) return cmp < 0 } -- cgit From 65e8359db00ae79838d19e19f69594f6a262c3d4 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Fri, 20 Jul 2018 13:01:27 +0200 Subject: plumbing/idxfile: support offset64 generating indexes Signed-off-by: Javi Fontan --- plumbing/format/idxfile/writer.go | 25 +++++++++++++++---- plumbing/format/idxfile/writer_test.go | 45 ++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 5 deletions(-) create mode 100644 plumbing/format/idxfile/writer_test.go (limited to 'plumbing') diff --git a/plumbing/format/idxfile/writer.go b/plumbing/format/idxfile/writer.go index 3c5a00e..ea54081 100644 --- a/plumbing/format/idxfile/writer.go +++ b/plumbing/format/idxfile/writer.go @@ -18,12 +18,16 @@ type Writer struct { count uint32 checksum plumbing.Hash objects objects + offset64 uint32 + idx *MemoryIndex } // Create index returns a filled MemoryIndex with the information filled by // the observer callbacks. func (w *Writer) CreateIndex() (*MemoryIndex, error) { idx := new(MemoryIndex) + w.idx = idx + sort.Sort(w.objects) // unmap all fans by default @@ -60,13 +64,13 @@ func (w *Writer) CreateIndex() (*MemoryIndex, error) { idx.Names[bucket] = append(idx.Names[bucket], o.Hash[:]...) - // TODO: implement 64 bit offsets - if o.Offset > math.MaxInt32 { - panic("64 bit offsets not implemented") + offset := o.Offset + if offset > math.MaxInt32 { + offset = w.addOffset64(offset) } buf.Truncate(0) - binary.WriteUint32(buf, uint32(o.Offset)) + binary.WriteUint32(buf, uint32(offset)) idx.Offset32[bucket] = append(idx.Offset32[bucket], buf.Bytes()...) buf.Truncate(0) @@ -78,12 +82,23 @@ func (w *Writer) CreateIndex() (*MemoryIndex, error) { idx.Fanout[j] = uint32(len(w.objects)) } + idx.Version = VersionSupported idx.PackfileChecksum = w.checksum - // TODO: fill IdxChecksum return idx, nil } +func (w *Writer) addOffset64(pos uint64) uint64 { + buf := new(bytes.Buffer) + binary.WriteUint64(buf, pos) + w.idx.Offset64 = append(w.idx.Offset64, buf.Bytes()...) + + index := uint64(w.offset64 | (1 << 31)) + w.offset64++ + + return index +} + // Add appends new object data. func (w *Writer) Add(h plumbing.Hash, pos uint64, crc uint32) { w.objects = append(w.objects, Entry{h, crc, pos}) diff --git a/plumbing/format/idxfile/writer_test.go b/plumbing/format/idxfile/writer_test.go new file mode 100644 index 0000000..92d2046 --- /dev/null +++ b/plumbing/format/idxfile/writer_test.go @@ -0,0 +1,45 @@ +package idxfile_test + +import ( + "bytes" + "io/ioutil" + + "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" + "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" + + . "gopkg.in/check.v1" + "gopkg.in/src-d/go-git-fixtures.v3" +) + +type IndexSuite struct { + fixtures.Suite +} + +var _ = Suite(&IndexSuite{}) + +func (s *IndexSuite) TestIndexWriter(c *C) { + f := fixtures.Basic().One() + scanner := packfile.NewScanner(f.Packfile()) + + obs := new(idxfile.Writer) + parser := packfile.NewParser(scanner, obs) + + _, err := parser.Parse() + c.Assert(err, IsNil) + + idx, err := obs.CreateIndex() + c.Assert(err, IsNil) + + idxFile := f.Idx() + expected, err := ioutil.ReadAll(idxFile) + c.Assert(err, IsNil) + idxFile.Close() + + buf := new(bytes.Buffer) + encoder := idxfile.NewEncoder(buf) + n, err := encoder.Encode(idx) + c.Assert(err, IsNil) + c.Assert(n, Equals, len(expected)) + + c.Assert(buf.Bytes(), DeepEquals, expected) +} -- cgit From a716126aa7f9b77030d2e697db24d206d944f05d Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Tue, 24 Jul 2018 17:36:21 +0200 Subject: plumbing/packfile: preallocate memory in PatchDelta Signed-off-by: Javi Fontan --- plumbing/format/packfile/patch_delta.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'plumbing') diff --git a/plumbing/format/packfile/patch_delta.go b/plumbing/format/packfile/patch_delta.go index c604851..a972f1c 100644 --- a/plumbing/format/packfile/patch_delta.go +++ b/plumbing/format/packfile/patch_delta.go @@ -63,8 +63,8 @@ func PatchDelta(src, delta []byte) ([]byte, error) { targetSz, delta := decodeLEB128(delta) remainingTargetSz := targetSz - var dest []byte var cmd byte + dest := make([]byte, 0, targetSz) for { if len(delta) == 0 { return nil, ErrInvalidDelta -- cgit From 7418b411660aaa3d8d54eb602fda8accaed2833f Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Thu, 26 Jul 2018 12:24:26 +0200 Subject: plumbing/idxfile: fix bug searching in MemoryIndex Signed-off-by: Javi Fontan --- plumbing/format/idxfile/idxfile.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'plumbing') diff --git a/plumbing/format/idxfile/idxfile.go b/plumbing/format/idxfile/idxfile.go index b196608..adeba44 100644 --- a/plumbing/format/idxfile/idxfile.go +++ b/plumbing/format/idxfile/idxfile.go @@ -72,7 +72,7 @@ func (idx *MemoryIndex) findHashIndex(h plumbing.Hash) int { low := uint64(0) for { mid := (low + high) >> 1 - offset := mid + (mid << 2) + offset := mid * objectIDLength cmp := bytes.Compare(h[:], data[offset:offset+objectIDLength]) if cmp < 0 { @@ -83,7 +83,7 @@ func (idx *MemoryIndex) findHashIndex(h plumbing.Hash) int { low = mid + 1 } - if low < high { + if low > high { break } } -- cgit From 4ddd6783cf9707f8b72ebb00e5bb4705f5fd436a Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Thu, 26 Jul 2018 12:27:53 +0200 Subject: plumbing/idxfile: add offset/hash mapping to index This functionality may be moved elsewhere in the future but is needed now to fit filesystem.ObjectStorage and the new index. Signed-off-by: Javi Fontan --- plumbing/format/idxfile/idxfile.go | 51 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'plumbing') diff --git a/plumbing/format/idxfile/idxfile.go b/plumbing/format/idxfile/idxfile.go index adeba44..f8debb1 100644 --- a/plumbing/format/idxfile/idxfile.go +++ b/plumbing/format/idxfile/idxfile.go @@ -28,6 +28,8 @@ type Index interface { FindOffset(h plumbing.Hash) (int64, error) // FindCRC32 finds the CRC32 of the object with the given hash. FindCRC32(h plumbing.Hash) (uint32, error) + // FindHash finds the hash for the object with the given offset. + FindHash(o int64) (plumbing.Hash, error) // Count returns the number of entries in the index. Count() (int64, error) // Entries returns an iterator to retrieve all index entries. @@ -48,6 +50,8 @@ type MemoryIndex struct { Offset64 []byte PackfileChecksum [20]byte IdxChecksum [20]byte + + offsetHash map[int64]plumbing.Hash } var _ Index = (*MemoryIndex)(nil) @@ -149,6 +153,53 @@ func (idx *MemoryIndex) getCrc32(firstLevel, secondLevel int) (uint32, error) { return binary.ReadUint32(buf) } +// FindHash implements the Index interface. +func (idx *MemoryIndex) FindHash(o int64) (plumbing.Hash, error) { + // Lazily generate the reverse offset/hash map if required. + if idx.offsetHash == nil { + err := idx.genOffsetHash() + if err != nil { + return plumbing.ZeroHash, nil + } + } + + hash, ok := idx.offsetHash[o] + if !ok { + return plumbing.ZeroHash, plumbing.ErrObjectNotFound + } + + return hash, nil +} + +// genOffsetHash generates the offset/hash mapping for reverse search. +func (idx *MemoryIndex) genOffsetHash() error { + count, err := idx.Count() + if err != nil { + return err + } + + idx.offsetHash = make(map[int64]plumbing.Hash, count) + + iter, err := idx.Entries() + if err != nil { + return err + } + + var entry *Entry + for err != nil { + entry, err = iter.Next() + if err == nil { + idx.offsetHash[int64(entry.Offset)] = entry.Hash + } + } + + if err == io.EOF { + return nil + } + + return err +} + // Count implements the Index interface. func (idx *MemoryIndex) Count() (int64, error) { return int64(idx.Fanout[fanout-1]), nil -- cgit From 74f56f388bbe8072bfcd976add2373f9a7e20341 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Thu, 26 Jul 2018 13:14:02 +0200 Subject: plumbing/idxfile: index is created only once and retrieved with Index Index is also automatically generated when OnFooter is called. Signed-off-by: Javi Fontan --- plumbing/format/idxfile/writer.go | 103 ++++++++++++++++++++++----------- plumbing/format/idxfile/writer_test.go | 2 +- 2 files changed, 70 insertions(+), 35 deletions(-) (limited to 'plumbing') diff --git a/plumbing/format/idxfile/writer.go b/plumbing/format/idxfile/writer.go index ea54081..efcdcc6 100644 --- a/plumbing/format/idxfile/writer.go +++ b/plumbing/format/idxfile/writer.go @@ -2,8 +2,10 @@ package idxfile import ( "bytes" + "fmt" "math" "sort" + "sync" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/utils/binary" @@ -15,18 +17,80 @@ type objects []Entry // Writer implements a packfile Observer interface and is used to generate // indexes. type Writer struct { + m sync.Mutex + count uint32 checksum plumbing.Hash objects objects offset64 uint32 - idx *MemoryIndex + finished bool + index *MemoryIndex +} + +// Index returns a previously created MemoryIndex or creates a new one if +// needed. +func (w *Writer) Index() (*MemoryIndex, error) { + w.m.Lock() + defer w.m.Unlock() + + if w.index == nil { + return w.createIndex() + } + + return w.index, nil +} + +// Add appends new object data. +func (w *Writer) Add(h plumbing.Hash, pos uint64, crc uint32) { + w.m.Lock() + defer w.m.Unlock() + + w.objects = append(w.objects, Entry{h, crc, pos}) +} + +func (w *Writer) Finished() bool { + return w.finished +} + +// OnHeader implements packfile.Observer interface. +func (w *Writer) OnHeader(count uint32) error { + w.count = count + w.objects = make(objects, 0, count) + return nil +} + +// OnInflatedObjectHeader implements packfile.Observer interface. +func (w *Writer) OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error { + return nil +} + +// OnInflatedObjectContent implements packfile.Observer interface. +func (w *Writer) OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32) error { + w.Add(h, uint64(pos), crc) + return nil } -// Create index returns a filled MemoryIndex with the information filled by +// OnFooter implements packfile.Observer interface. +func (w *Writer) OnFooter(h plumbing.Hash) error { + w.checksum = h + w.finished = true + _, err := w.createIndex() + if err != nil { + return err + } + + return nil +} + +// creatIndex returns a filled MemoryIndex with the information filled by // the observer callbacks. -func (w *Writer) CreateIndex() (*MemoryIndex, error) { +func (w *Writer) createIndex() (*MemoryIndex, error) { + if !w.finished { + return nil, fmt.Errorf("the index still hasn't finished building") + } + idx := new(MemoryIndex) - w.idx = idx + w.index = idx sort.Sort(w.objects) @@ -91,7 +155,7 @@ func (w *Writer) CreateIndex() (*MemoryIndex, error) { func (w *Writer) addOffset64(pos uint64) uint64 { buf := new(bytes.Buffer) binary.WriteUint64(buf, pos) - w.idx.Offset64 = append(w.idx.Offset64, buf.Bytes()...) + w.index.Offset64 = append(w.index.Offset64, buf.Bytes()...) index := uint64(w.offset64 | (1 << 31)) w.offset64++ @@ -99,35 +163,6 @@ func (w *Writer) addOffset64(pos uint64) uint64 { return index } -// Add appends new object data. -func (w *Writer) Add(h plumbing.Hash, pos uint64, crc uint32) { - w.objects = append(w.objects, Entry{h, crc, pos}) -} - -// OnHeader implements packfile.Observer interface. -func (w *Writer) OnHeader(count uint32) error { - w.count = count - w.objects = make(objects, 0, count) - return nil -} - -// OnInflatedObjectHeader implements packfile.Observer interface. -func (w *Writer) OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error { - return nil -} - -// OnInflatedObjectContent implements packfile.Observer interface. -func (w *Writer) OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32) error { - w.Add(h, uint64(pos), crc) - return nil -} - -// OnFooter implements packfile.Observer interface. -func (w *Writer) OnFooter(h plumbing.Hash) error { - w.checksum = h - return nil -} - func (o objects) Len() int { return len(o) } diff --git a/plumbing/format/idxfile/writer_test.go b/plumbing/format/idxfile/writer_test.go index 92d2046..51273a3 100644 --- a/plumbing/format/idxfile/writer_test.go +++ b/plumbing/format/idxfile/writer_test.go @@ -27,7 +27,7 @@ func (s *IndexSuite) TestIndexWriter(c *C) { _, err := parser.Parse() c.Assert(err, IsNil) - idx, err := obs.CreateIndex() + idx, err := obs.Index() c.Assert(err, IsNil) idxFile := f.Idx() -- cgit From 79f249465b24104b73c9dc220d9098cecdab4d77 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Thu, 26 Jul 2018 13:42:51 +0200 Subject: plumbing, storage: integrate new index Now dotgit.PackWriter uses the new packfile.Parser and index. Signed-off-by: Javi Fontan --- plumbing/format/packfile/decoder.go | 9 +++++---- plumbing/format/packfile/parser.go | 11 ++++++----- 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'plumbing') diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go index 9bfd69b..69aef2d 100644 --- a/plumbing/format/packfile/decoder.go +++ b/plumbing/format/packfile/decoder.go @@ -447,11 +447,12 @@ func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) { return d.DecodeObjectAt(o) } - // if e, ok := d.idx.LookupOffset(uint64(o)); ok { - // return d.recallByHashNonSeekable(e.Hash) - // } + hash, err := d.idx.FindHash(o) + if err != nil { + return nil, err + } - return nil, plumbing.ErrObjectNotFound + return d.recallByHashNonSeekable(hash) } func (d *Decoder) recallByHash(h plumbing.Hash) (plumbing.EncodedObject, error) { diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go index 460fc3f..696f5ba 100644 --- a/plumbing/format/packfile/parser.go +++ b/plumbing/format/packfile/parser.go @@ -311,11 +311,12 @@ func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) { type objectInfo struct { plumbing.Hasher - Offset int64 - Length int64 - PackSize int64 - Type plumbing.ObjectType - DiskType plumbing.ObjectType + Offset int64 + Length int64 + HeaderLength int64 + PackSize int64 + Type plumbing.ObjectType + DiskType plumbing.ObjectType Crc32 uint32 -- cgit From ffdfb7dbabb78090b27ca29b762b803969c89fd7 Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Fri, 20 Jul 2018 15:51:15 +0200 Subject: plumbing: packfile, new Packfile representation Signed-off-by: Miguel Molina --- plumbing/format/packfile/decoder.go | 57 ++++--- plumbing/format/packfile/decoder_test.go | 12 +- plumbing/format/packfile/index_test.go | 133 ---------------- plumbing/format/packfile/packfile.go | 249 ++++++++++++++++++++++++++++++ plumbing/format/packfile/packfile_test.go | 121 +++++++++++++++ plumbing/memory.go | 8 +- 6 files changed, 422 insertions(+), 158 deletions(-) delete mode 100644 plumbing/format/packfile/index_test.go create mode 100644 plumbing/format/packfile/packfile.go create mode 100644 plumbing/format/packfile/packfile_test.go (limited to 'plumbing') diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go index 69aef2d..b1a0a26 100644 --- a/plumbing/format/packfile/decoder.go +++ b/plumbing/format/packfile/decoder.go @@ -2,6 +2,7 @@ package packfile import ( "bytes" + "io" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/cache" @@ -68,6 +69,7 @@ type Decoder struct { offsetToType map[int64]plumbing.ObjectType decoderType plumbing.ObjectType + offsetToHash map[int64]plumbing.Hash } // NewDecoder returns a new Decoder that decodes a Packfile using the given @@ -120,6 +122,7 @@ func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer, idx: idxfile.NewMemoryIndex(), offsetToType: make(map[int64]plumbing.ObjectType), + offsetToHash: make(map[int64]plumbing.Hash), decoderType: t, }, nil } @@ -144,6 +147,27 @@ func (d *Decoder) Decode() (checksum plumbing.Hash, err error) { return d.s.Checksum() } +func (d *Decoder) fillOffsetsToHashes() error { + entries, err := d.idx.Entries() + if err != nil { + return err + } + + for { + e, err := entries.Next() + if err != nil { + if err == io.EOF { + break + } + return err + } + + d.offsetToHash[int64(e.Offset)] = e.Hash + } + + return entries.Close() +} + func (d *Decoder) doDecode() error { _, count, err := d.s.Header() if err != nil { @@ -156,6 +180,12 @@ func (d *Decoder) doDecode() error { } defer func() { d.hasBuiltIndex = true }() + if d.hasBuiltIndex && !d.s.IsSeekable { + if err := d.fillOffsetsToHashes(); err != nil { + return err + } + } + _, isTxStorer := d.o.(storer.Transactioner) switch { case d.o == nil: @@ -299,15 +329,14 @@ func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error obj.SetSize(h.Length) obj.SetType(h.Type) - var crc uint32 var err error switch h.Type { case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: - crc, err = d.fillRegularObjectContent(obj) + _, err = d.fillRegularObjectContent(obj) case plumbing.REFDeltaObject: - crc, err = d.fillREFDeltaObjectContent(obj, h.Reference) + _, err = d.fillREFDeltaObjectContent(obj, h.Reference) case plumbing.OFSDeltaObject: - crc, err = d.fillOFSDeltaObjectContent(obj, h.OffsetReference) + _, err = d.fillOFSDeltaObjectContent(obj, h.OffsetReference) default: err = ErrInvalidObject.AddDetails("type %q", h.Type) } @@ -316,14 +345,7 @@ func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error return obj, err } - // TODO: remove this - _ = crc - - /* Add is no longer available - if !d.hasBuiltIndex { - d.idx.Add(obj.Hash(), uint64(h.Offset), crc) - } - */ + d.offsetToHash[h.Offset] = obj.Hash() return obj, nil } @@ -403,13 +425,12 @@ func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset i return 0, err } - // e, ok := d.idx.LookupOffset(uint64(offset)) - // if ok { - // base, ok = d.cacheGet(e.Hash) - // } - + h, ok := d.offsetToHash[offset] var base plumbing.EncodedObject - ok := false + if ok { + base, ok = d.cacheGet(h) + } + if !ok { base, err = d.recallByOffset(offset) if err != nil { diff --git a/plumbing/format/packfile/decoder_test.go b/plumbing/format/packfile/decoder_test.go index b5bc7b7..4fe9b5e 100644 --- a/plumbing/format/packfile/decoder_test.go +++ b/plumbing/format/packfile/decoder_test.go @@ -5,7 +5,6 @@ import ( "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/cache" - "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" "gopkg.in/src-d/go-git.v4/plumbing/storer" "gopkg.in/src-d/go-git.v4/storage/filesystem" @@ -47,6 +46,7 @@ func (s *ReaderSuite) TestDecode(c *C) { }) } +/* func (s *ReaderSuite) TestDecodeByTypeRefDelta(c *C) { f := fixtures.Basic().ByTag("ref-delta").One() @@ -101,7 +101,9 @@ func (s *ReaderSuite) TestDecodeByTypeRefDeltaError(c *C) { }) } +*/ +/* func (s *ReaderSuite) TestDecodeByType(c *C) { ts := []plumbing.ObjectType{ plumbing.CommitObject, @@ -140,6 +142,8 @@ func (s *ReaderSuite) TestDecodeByType(c *C) { } }) } +*/ + func (s *ReaderSuite) TestDecodeByTypeConstructor(c *C) { f := fixtures.Basic().ByTag("packfile").One() storage := memory.NewStorage() @@ -280,6 +284,7 @@ var expectedHashes = []string{ "7e59600739c96546163833214c36459e324bad0a", } +/* func (s *ReaderSuite) TestDecodeCRCs(c *C) { f := fixtures.Basic().ByTag("ofs-delta").One() @@ -366,7 +371,7 @@ func (s *ReaderSuite) TestSetIndex(c *C) { idxf := d.Index().ToIdxFile() c.Assert(idxf.Entries, HasLen, 1) c.Assert(idxf.Entries[0].Offset, Equals, uint64(42)) -} +}*/ func assertObjects(c *C, s storer.EncodedObjectStorer, expects []string) { @@ -385,6 +390,7 @@ func assertObjects(c *C, s storer.EncodedObjectStorer, expects []string) { } } +/* func getIndexFromIdxFile(r io.Reader) *packfile.Index { idxf := idxfile.NewIdxfile() d := idxfile.NewDecoder(r) @@ -393,4 +399,4 @@ func getIndexFromIdxFile(r io.Reader) *packfile.Index { } return packfile.NewIndexFromIdxFile(idxf) -} +}*/ diff --git a/plumbing/format/packfile/index_test.go b/plumbing/format/packfile/index_test.go deleted file mode 100644 index 8de886d..0000000 --- a/plumbing/format/packfile/index_test.go +++ /dev/null @@ -1,133 +0,0 @@ -package packfile - -import ( - "strconv" - "strings" - "testing" - - "gopkg.in/src-d/go-git.v4/plumbing" - - . "gopkg.in/check.v1" -) - -type IndexSuite struct{} - -var _ = Suite(&IndexSuite{}) - -func (s *IndexSuite) TestLookupOffset(c *C) { - idx := NewIndex(0) - - for o1 := 0; o1 < 10000; o1 += 100 { - for o2 := 0; o2 < 10000; o2 += 100 { - if o2 >= o1 { - e, ok := idx.LookupOffset(uint64(o2)) - c.Assert(ok, Equals, false) - c.Assert(e, IsNil) - } else { - e, ok := idx.LookupOffset(uint64(o2)) - c.Assert(ok, Equals, true) - c.Assert(e, NotNil) - c.Assert(e.Hash, Equals, toHash(o2)) - c.Assert(e.Offset, Equals, uint64(o2)) - } - } - - h1 := toHash(o1) - idx.Add(h1, uint64(o1), 0) - - for o2 := 0; o2 < 10000; o2 += 100 { - if o2 > o1 { - e, ok := idx.LookupOffset(uint64(o2)) - c.Assert(ok, Equals, false) - c.Assert(e, IsNil) - } else { - e, ok := idx.LookupOffset(uint64(o2)) - c.Assert(ok, Equals, true) - c.Assert(e, NotNil) - c.Assert(e.Hash, Equals, toHash(o2)) - c.Assert(e.Offset, Equals, uint64(o2)) - } - } - } -} - -func (s *IndexSuite) TestLookupHash(c *C) { - idx := NewIndex(0) - - for o1 := 0; o1 < 10000; o1 += 100 { - for o2 := 0; o2 < 10000; o2 += 100 { - if o2 >= o1 { - e, ok := idx.LookupHash(toHash(o2)) - c.Assert(ok, Equals, false) - c.Assert(e, IsNil) - } else { - e, ok := idx.LookupHash(toHash(o2)) - c.Assert(ok, Equals, true) - c.Assert(e, NotNil) - c.Assert(e.Hash, Equals, toHash(o2)) - c.Assert(e.Offset, Equals, uint64(o2)) - } - } - - h1 := toHash(o1) - idx.Add(h1, uint64(o1), 0) - - for o2 := 0; o2 < 10000; o2 += 100 { - if o2 > o1 { - e, ok := idx.LookupHash(toHash(o2)) - c.Assert(ok, Equals, false) - c.Assert(e, IsNil) - } else { - e, ok := idx.LookupHash(toHash(o2)) - c.Assert(ok, Equals, true) - c.Assert(e, NotNil) - c.Assert(e.Hash, Equals, toHash(o2)) - c.Assert(e.Offset, Equals, uint64(o2)) - } - } - } -} - -func (s *IndexSuite) TestSize(c *C) { - idx := NewIndex(0) - - for o1 := 0; o1 < 1000; o1++ { - c.Assert(idx.Size(), Equals, o1) - h1 := toHash(o1) - idx.Add(h1, uint64(o1), 0) - } -} - -func (s *IndexSuite) TestIdxFileEmpty(c *C) { - idx := NewIndex(0) - idxf := idx.ToIdxFile() - idx2 := NewIndexFromIdxFile(idxf) - c.Assert(idx, DeepEquals, idx2) -} - -func (s *IndexSuite) TestIdxFile(c *C) { - idx := NewIndex(0) - for o1 := 0; o1 < 1000; o1++ { - h1 := toHash(o1) - idx.Add(h1, uint64(o1), 0) - } - - idx2 := NewIndexFromIdxFile(idx.ToIdxFile()) - c.Assert(idx, DeepEquals, idx2) -} - -func toHash(i int) plumbing.Hash { - is := strconv.Itoa(i) - padding := strings.Repeat("a", 40-len(is)) - return plumbing.NewHash(padding + is) -} - -func BenchmarkIndexConstruction(b *testing.B) { - b.ReportAllocs() - - idx := NewIndex(0) - for o := 0; o < 1e6*b.N; o += 100 { - h1 := toHash(o) - idx.Add(h1, uint64(o), 0) - } -} diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go new file mode 100644 index 0000000..cee6031 --- /dev/null +++ b/plumbing/format/packfile/packfile.go @@ -0,0 +1,249 @@ +package packfile + +import ( + "bytes" + "io" + + billy "gopkg.in/src-d/go-billy.v4" + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" + "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" + "gopkg.in/src-d/go-git.v4/plumbing/storer" +) + +// Packfile allows retrieving information from inside a packfile. +type Packfile struct { + idxfile.Index + billy.File + s *Scanner + deltaBaseCache cache.Object + offsetToHash map[int64]plumbing.Hash +} + +// NewPackfile returns a packfile representation for the given packfile file +// and packfile idx. +func NewPackfile(index idxfile.Index, file billy.File) *Packfile { + s := NewScanner(file) + + return &Packfile{ + index, + file, + s, + cache.NewObjectLRUDefault(), + make(map[int64]plumbing.Hash), + } +} + +// Get retrieves the encoded object in the packfile with the given hash. +func (p *Packfile) Get(h plumbing.Hash) (plumbing.EncodedObject, error) { + offset, err := p.FindOffset(h) + if err != nil { + return nil, err + } + + return p.GetByOffset(offset) +} + +// GetByOffset retrieves the encoded object from the packfile with the given +// offset. +func (p *Packfile) GetByOffset(o int64) (plumbing.EncodedObject, error) { + if h, ok := p.offsetToHash[o]; ok { + if obj, ok := p.deltaBaseCache.Get(h); ok { + return obj, nil + } + } + + if _, err := p.s.SeekFromStart(o); err != nil { + return nil, err + } + + return p.nextObject() +} + +func (p *Packfile) nextObject() (plumbing.EncodedObject, error) { + h, err := p.s.NextObjectHeader() + if err != nil { + return nil, err + } + + obj := new(plumbing.MemoryObject) + obj.SetSize(h.Length) + obj.SetType(h.Type) + + switch h.Type { + case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: + err = p.fillRegularObjectContent(obj) + case plumbing.REFDeltaObject: + err = p.fillREFDeltaObjectContent(obj, h.Reference) + case plumbing.OFSDeltaObject: + err = p.fillOFSDeltaObjectContent(obj, h.OffsetReference) + default: + err = ErrInvalidObject.AddDetails("type %q", h.Type) + } + + if err != nil { + return obj, err + } + + p.offsetToHash[h.Offset] = obj.Hash() + + return obj, nil +} + +func (p *Packfile) fillRegularObjectContent(obj plumbing.EncodedObject) error { + w, err := obj.Writer() + if err != nil { + return err + } + + _, _, err = p.s.NextObject(w) + return err +} + +func (p *Packfile) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plumbing.Hash) error { + buf := bufPool.Get().(*bytes.Buffer) + buf.Reset() + _, _, err := p.s.NextObject(buf) + if err != nil { + return err + } + + base, ok := p.cacheGet(ref) + if !ok { + base, err = p.Get(ref) + if err != nil { + return err + } + } + + obj.SetType(base.Type()) + err = ApplyDelta(obj, base, buf.Bytes()) + p.cachePut(obj) + bufPool.Put(buf) + + return err +} + +func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset int64) error { + buf := bytes.NewBuffer(nil) + _, _, err := p.s.NextObject(buf) + if err != nil { + return err + } + + var base plumbing.EncodedObject + h, ok := p.offsetToHash[offset] + if ok { + base, ok = p.cacheGet(h) + } + + if !ok { + base, err = p.GetByOffset(offset) + if err != nil { + return err + } + + p.cachePut(base) + } + + obj.SetType(base.Type()) + err = ApplyDelta(obj, base, buf.Bytes()) + p.cachePut(obj) + + return err +} + +func (p *Packfile) cacheGet(h plumbing.Hash) (plumbing.EncodedObject, bool) { + if p.deltaBaseCache == nil { + return nil, false + } + + return p.deltaBaseCache.Get(h) +} + +func (p *Packfile) cachePut(obj plumbing.EncodedObject) { + if p.deltaBaseCache == nil { + return + } + + p.deltaBaseCache.Put(obj) +} + +// GetAll returns an iterator with all encoded objects in the packfile. +// The iterator returned is not thread-safe, it should be used in the same +// thread as the Packfile instance. +func (p *Packfile) GetAll() (storer.EncodedObjectIter, error) { + s := NewScanner(p.File) + + _, count, err := s.Header() + if err != nil { + return nil, err + } + + return &objectIter{ + // Easiest way to provide an object decoder is just to pass a Packfile + // instance. To not mess with the seeks, it's a new instance with a + // different scanner but the same cache and offset to hash map for + // reusing as much cache as possible. + d: &Packfile{p.Index, nil, s, p.deltaBaseCache, p.offsetToHash}, + count: int(count), + }, nil +} + +// ID returns the ID of the packfile, which is the checksum at the end of it. +func (p *Packfile) ID() (plumbing.Hash, error) { + if _, err := p.File.Seek(-20, io.SeekEnd); err != nil { + return plumbing.ZeroHash, err + } + + var hash plumbing.Hash + if _, err := io.ReadFull(p.File, hash[:]); err != nil { + return plumbing.ZeroHash, err + } + + return hash, nil +} + +// Close the packfile and its resources. +func (p *Packfile) Close() error { + return p.File.Close() +} + +type objectDecoder interface { + nextObject() (plumbing.EncodedObject, error) +} + +type objectIter struct { + d objectDecoder + count int + pos int +} + +func (i *objectIter) Next() (plumbing.EncodedObject, error) { + if i.pos >= i.count { + return nil, io.EOF + } + + i.pos++ + return i.d.nextObject() +} + +func (i *objectIter) ForEach(f func(plumbing.EncodedObject) error) error { + for { + o, err := i.Next() + if err != nil { + if err == io.EOF { + return nil + } + return err + } + + if err := f(o); err != nil { + return err + } + } +} + +func (i *objectIter) Close() { + i.pos = i.count +} diff --git a/plumbing/format/packfile/packfile_test.go b/plumbing/format/packfile/packfile_test.go new file mode 100644 index 0000000..10e4080 --- /dev/null +++ b/plumbing/format/packfile/packfile_test.go @@ -0,0 +1,121 @@ +package packfile + +import ( + "io" + "math" + + . "gopkg.in/check.v1" + "gopkg.in/src-d/go-billy.v4/osfs" + fixtures "gopkg.in/src-d/go-git-fixtures.v3" + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" +) + +type PackfileSuite struct { + fixtures.Suite + p *Packfile + idx *idxfile.MemoryIndex + f *fixtures.Fixture +} + +var _ = Suite(&PackfileSuite{}) + +func (s *PackfileSuite) TestGet(c *C) { + for h := range expectedEntries { + obj, err := s.p.Get(h) + c.Assert(err, IsNil) + c.Assert(obj, Not(IsNil)) + c.Assert(obj.Hash(), Equals, h) + } + + _, err := s.p.Get(plumbing.ZeroHash) + c.Assert(err, Equals, plumbing.ErrObjectNotFound) +} + +func (s *PackfileSuite) TestGetByOffset(c *C) { + for h, o := range expectedEntries { + obj, err := s.p.GetByOffset(o) + c.Assert(err, IsNil) + c.Assert(obj, Not(IsNil)) + c.Assert(obj.Hash(), Equals, h) + } + + _, err := s.p.GetByOffset(math.MaxInt64) + c.Assert(err, Equals, io.EOF) +} + +func (s *PackfileSuite) TestID(c *C) { + id, err := s.p.ID() + c.Assert(err, IsNil) + c.Assert(id, Equals, s.f.PackfileHash) +} + +func (s *PackfileSuite) TestGetAll(c *C) { + iter, err := s.p.GetAll() + c.Assert(err, IsNil) + + var objects int + for { + o, err := iter.Next() + if err == io.EOF { + break + } + c.Assert(err, IsNil) + + objects++ + _, ok := expectedEntries[o.Hash()] + c.Assert(ok, Equals, true) + } + + c.Assert(objects, Equals, len(expectedEntries)) +} + +var expectedEntries = map[plumbing.Hash]int64{ + plumbing.NewHash("1669dce138d9b841a518c64b10914d88f5e488ea"): 615, + plumbing.NewHash("32858aad3c383ed1ff0a0f9bdf231d54a00c9e88"): 1524, + plumbing.NewHash("35e85108805c84807bc66a02d91535e1e24b38b9"): 1063, + plumbing.NewHash("49c6bb89b17060d7b4deacb7b338fcc6ea2352a9"): 78882, + plumbing.NewHash("4d081c50e250fa32ea8b1313cf8bb7c2ad7627fd"): 84688, + plumbing.NewHash("586af567d0bb5e771e49bdd9434f5e0fb76d25fa"): 84559, + plumbing.NewHash("5a877e6a906a2743ad6e45d99c1793642aaf8eda"): 84479, + plumbing.NewHash("6ecf0ef2c2dffb796033e5a02219af86ec6584e5"): 186, + plumbing.NewHash("7e59600739c96546163833214c36459e324bad0a"): 84653, + plumbing.NewHash("880cd14280f4b9b6ed3986d6671f907d7cc2a198"): 78050, + plumbing.NewHash("8dcef98b1d52143e1e2dbc458ffe38f925786bf2"): 84741, + plumbing.NewHash("918c48b83bd081e863dbe1b80f8998f058cd8294"): 286, + plumbing.NewHash("9a48f23120e880dfbe41f7c9b7b708e9ee62a492"): 80998, + plumbing.NewHash("9dea2395f5403188298c1dabe8bdafe562c491e3"): 84032, + plumbing.NewHash("a39771a7651f97faf5c72e08224d857fc35133db"): 84430, + plumbing.NewHash("a5b8b09e2f8fcb0bb99d3ccb0958157b40890d69"): 838, + plumbing.NewHash("a8d315b2b1c615d43042c3a62402b8a54288cf5c"): 84375, + plumbing.NewHash("aa9b383c260e1d05fbbf6b30a02914555e20c725"): 84760, + plumbing.NewHash("af2d6a6954d532f8ffb47615169c8fdf9d383a1a"): 449, + plumbing.NewHash("b029517f6300c2da0f4b651b8642506cd6aaf45d"): 1392, + plumbing.NewHash("b8e471f58bcbca63b07bda20e428190409c2db47"): 1230, + plumbing.NewHash("c192bd6a24ea1ab01d78686e417c8bdc7c3d197f"): 1713, + plumbing.NewHash("c2d30fa8ef288618f65f6eed6e168e0d514886f4"): 84725, + plumbing.NewHash("c8f1d8c61f9da76f4cb49fd86322b6e685dba956"): 80725, + plumbing.NewHash("cf4aa3b38974fb7d81f367c0830f7d78d65ab86b"): 84608, + plumbing.NewHash("d3ff53e0564a9f87d8e84b6e28e5060e517008aa"): 1685, + plumbing.NewHash("d5c0f4ab811897cadf03aec358ae60d21f91c50d"): 2351, + plumbing.NewHash("dbd3641b371024f44d0e469a9c8f5457b0660de1"): 84115, + plumbing.NewHash("e8d3ffab552895c19b9fcf7aa264d277cde33881"): 12, + plumbing.NewHash("eba74343e2f15d62adedfd8c883ee0262b5c8021"): 84708, + plumbing.NewHash("fb72698cab7617ac416264415f13224dfd7a165e"): 84671, +} + +func (s *PackfileSuite) SetUpTest(c *C) { + s.f = fixtures.Basic().One() + + f, err := osfs.New("/").Open(s.f.Packfile().Name()) + c.Assert(err, IsNil) + + s.idx = idxfile.NewMemoryIndex() + c.Assert(idxfile.NewDecoder(s.f.Idx()).Decode(s.idx), IsNil) + + s.p = NewPackfile(s.idx, f) +} + +func (s *PackfileSuite) TearDownTest(c *C) { + c.Assert(s.p.Close(), IsNil) +} diff --git a/plumbing/memory.go b/plumbing/memory.go index 51cbb54..b8e1e1b 100644 --- a/plumbing/memory.go +++ b/plumbing/memory.go @@ -14,10 +14,10 @@ type MemoryObject struct { sz int64 } -// Hash return the object Hash, the hash is calculated on-the-fly the first -// time is called, the subsequent calls the same Hash is returned even if the -// type or the content has changed. The Hash is only generated if the size of -// the content is exactly the Object.Size +// Hash returns the object Hash, the hash is calculated on-the-fly the first +// time it's called, in all subsequent calls the same Hash is returned even +// if the type or the content have changed. The Hash is only generated if the +// size of the content is exactly the object size. func (o *MemoryObject) Hash() Hash { if o.h == ZeroHash && int64(len(o.cont)) == o.sz { o.h = ComputeHash(o.t, o.cont) -- cgit From ccd0fa0bc17f0680038529b00f5c5a44f8e77b41 Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Fri, 27 Jul 2018 15:07:25 +0200 Subject: plumbing: packfile, lazy object reads with DiskObjects Signed-off-by: Miguel Molina --- plumbing/format/idxfile/idxfile.go | 25 ++-- plumbing/format/packfile/decoder.go | 2 +- plumbing/format/packfile/disk_object.go | 64 +++++++++ plumbing/format/packfile/packfile.go | 208 ++++++++++++++++++++++++++---- plumbing/format/packfile/packfile_test.go | 46 +++++++ 5 files changed, 304 insertions(+), 41 deletions(-) create mode 100644 plumbing/format/packfile/disk_object.go (limited to 'plumbing') diff --git a/plumbing/format/idxfile/idxfile.go b/plumbing/format/idxfile/idxfile.go index f8debb1..d4a9365 100644 --- a/plumbing/format/idxfile/idxfile.go +++ b/plumbing/format/idxfile/idxfile.go @@ -87,7 +87,7 @@ func (idx *MemoryIndex) findHashIndex(h plumbing.Hash) int { low = mid + 1 } - if low > high { + if low >= high { break } } @@ -157,9 +157,8 @@ func (idx *MemoryIndex) getCrc32(firstLevel, secondLevel int) (uint32, error) { func (idx *MemoryIndex) FindHash(o int64) (plumbing.Hash, error) { // Lazily generate the reverse offset/hash map if required. if idx.offsetHash == nil { - err := idx.genOffsetHash() - if err != nil { - return plumbing.ZeroHash, nil + if err := idx.genOffsetHash(); err != nil { + return plumbing.ZeroHash, err } } @@ -185,19 +184,17 @@ func (idx *MemoryIndex) genOffsetHash() error { return err } - var entry *Entry - for err != nil { - entry, err = iter.Next() - if err == nil { - idx.offsetHash[int64(entry.Offset)] = entry.Hash + for { + entry, err := iter.Next() + if err != nil { + if err == io.EOF { + return nil + } + return err } - } - if err == io.EOF { - return nil + idx.offsetHash[int64(entry.Offset)] = entry.Hash } - - return err } // Count implements the Index interface. diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go index b1a0a26..edf386b 100644 --- a/plumbing/format/packfile/decoder.go +++ b/plumbing/format/packfile/decoder.go @@ -478,7 +478,7 @@ func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) { func (d *Decoder) recallByHash(h plumbing.Hash) (plumbing.EncodedObject, error) { if d.s.IsSeekable { - if offset, err := d.idx.FindOffset(h); err != nil { + if offset, err := d.idx.FindOffset(h); err == nil { return d.DecodeObjectAt(offset) } } diff --git a/plumbing/format/packfile/disk_object.go b/plumbing/format/packfile/disk_object.go new file mode 100644 index 0000000..d3e8520 --- /dev/null +++ b/plumbing/format/packfile/disk_object.go @@ -0,0 +1,64 @@ +package packfile + +import ( + "io" + + "gopkg.in/src-d/go-git.v4/plumbing" +) + +// DiskObject is an object from the packfile on disk. +type DiskObject struct { + hash plumbing.Hash + h *ObjectHeader + offset int64 + size int64 + typ plumbing.ObjectType + packfile *Packfile +} + +// NewDiskObject creates a new disk object. +func NewDiskObject( + hash plumbing.Hash, + finalType plumbing.ObjectType, + offset int64, + contentSize int64, + packfile *Packfile, +) *DiskObject { + return &DiskObject{ + hash: hash, + offset: offset, + size: contentSize, + typ: finalType, + packfile: packfile, + } +} + +// Reader implements the plumbing.EncodedObject interface. +func (o *DiskObject) Reader() (io.ReadCloser, error) { + return o.packfile.getObjectContent(o.offset) +} + +// SetSize implements the plumbing.EncodedObject interface. This method +// is a noop. +func (o *DiskObject) SetSize(int64) {} + +// SetType implements the plumbing.EncodedObject interface. This method is +// a noop. +func (o *DiskObject) SetType(plumbing.ObjectType) {} + +// Hash implements the plumbing.EncodedObject interface. +func (o *DiskObject) Hash() plumbing.Hash { return o.hash } + +// Size implements the plumbing.EncodedObject interface. +func (o *DiskObject) Size() int64 { return o.size } + +// Type implements the plumbing.EncodedObject interface. +func (o *DiskObject) Type() plumbing.ObjectType { + return o.typ +} + +// Writer implements the plumbing.EncodedObject interface. This method always +// returns a nil writer. +func (o *DiskObject) Writer() (io.WriteCloser, error) { + return nil, nil +} diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index cee6031..00014f6 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -17,7 +17,7 @@ type Packfile struct { billy.File s *Scanner deltaBaseCache cache.Object - offsetToHash map[int64]plumbing.Hash + offsetToType map[int64]plumbing.ObjectType } // NewPackfile returns a packfile representation for the given packfile file @@ -30,7 +30,7 @@ func NewPackfile(index idxfile.Index, file billy.File) *Packfile { file, s, cache.NewObjectLRUDefault(), - make(map[int64]plumbing.Hash), + make(map[int64]plumbing.ObjectType), } } @@ -47,8 +47,9 @@ func (p *Packfile) Get(h plumbing.Hash) (plumbing.EncodedObject, error) { // GetByOffset retrieves the encoded object from the packfile with the given // offset. func (p *Packfile) GetByOffset(o int64) (plumbing.EncodedObject, error) { - if h, ok := p.offsetToHash[o]; ok { - if obj, ok := p.deltaBaseCache.Get(h); ok { + hash, err := p.FindHash(o) + if err == nil { + if obj, ok := p.deltaBaseCache.Get(hash); ok { return obj, nil } } @@ -60,13 +61,166 @@ func (p *Packfile) GetByOffset(o int64) (plumbing.EncodedObject, error) { return p.nextObject() } -func (p *Packfile) nextObject() (plumbing.EncodedObject, error) { +func (p *Packfile) nextObjectHeader() (*ObjectHeader, error) { h, err := p.s.NextObjectHeader() + p.s.pendingObject = nil + return h, err +} + +func (p *Packfile) getObjectData( + h *ObjectHeader, +) (typ plumbing.ObjectType, size int64, err error) { + switch h.Type { + case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: + typ = h.Type + size = h.Length + case plumbing.REFDeltaObject, plumbing.OFSDeltaObject: + buf := bufPool.Get().(*bytes.Buffer) + buf.Reset() + defer bufPool.Put(buf) + + _, _, err = p.s.NextObject(buf) + if err != nil { + return + } + + delta := buf.Bytes() + _, delta = decodeLEB128(delta) // skip src size + sz, _ := decodeLEB128(delta) + size = int64(sz) + + var offset int64 + if h.Type == plumbing.REFDeltaObject { + offset, err = p.FindOffset(h.Reference) + if err != nil { + return + } + } else { + offset = h.OffsetReference + } + + if baseType, ok := p.offsetToType[offset]; ok { + typ = baseType + } else { + if _, err = p.s.SeekFromStart(offset); err != nil { + return + } + + h, err = p.nextObjectHeader() + if err != nil { + return + } + + typ, _, err = p.getObjectData(h) + if err != nil { + return + } + } + default: + err = ErrInvalidObject.AddDetails("type %q", h.Type) + } + + return +} + +func (p *Packfile) getObjectSize(h *ObjectHeader) (int64, error) { + switch h.Type { + case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: + return h.Length, nil + case plumbing.REFDeltaObject, plumbing.OFSDeltaObject: + buf := bufPool.Get().(*bytes.Buffer) + buf.Reset() + defer bufPool.Put(buf) + + if _, _, err := p.s.NextObject(buf); err != nil { + return 0, err + } + + delta := buf.Bytes() + _, delta = decodeLEB128(delta) // skip src size + sz, _ := decodeLEB128(delta) + return int64(sz), nil + default: + return 0, ErrInvalidObject.AddDetails("type %q", h.Type) + } +} + +func (p *Packfile) getObjectType(h *ObjectHeader) (typ plumbing.ObjectType, err error) { + switch h.Type { + case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: + return h.Type, nil + case plumbing.REFDeltaObject, plumbing.OFSDeltaObject: + var offset int64 + if h.Type == plumbing.REFDeltaObject { + offset, err = p.FindOffset(h.Reference) + if err != nil { + return + } + } else { + offset = h.OffsetReference + } + + if baseType, ok := p.offsetToType[offset]; ok { + typ = baseType + } else { + if _, err = p.s.SeekFromStart(offset); err != nil { + return + } + + h, err = p.nextObjectHeader() + if err != nil { + return + } + + typ, err = p.getObjectType(h) + if err != nil { + return + } + } + default: + err = ErrInvalidObject.AddDetails("type %q", h.Type) + } + + return +} + +func (p *Packfile) nextObject() (plumbing.EncodedObject, error) { + h, err := p.nextObjectHeader() + if err != nil { + return nil, err + } + + hash, err := p.FindHash(h.Offset) + if err != nil { + return nil, err + } + + size, err := p.getObjectSize(h) if err != nil { return nil, err } - obj := new(plumbing.MemoryObject) + typ, err := p.getObjectType(h) + if err != nil { + return nil, err + } + + p.offsetToType[h.Offset] = typ + + return NewDiskObject(hash, typ, h.Offset, size, p), nil +} + +func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) { + if _, err := p.s.SeekFromStart(offset); err != nil { + return nil, err + } + + h, err := p.nextObjectHeader() + if err != nil { + return nil, err + } + + var obj = new(plumbing.MemoryObject) obj.SetSize(h.Length) obj.SetType(h.Type) @@ -82,12 +236,10 @@ func (p *Packfile) nextObject() (plumbing.EncodedObject, error) { } if err != nil { - return obj, err + return nil, err } - p.offsetToHash[h.Offset] = obj.Hash() - - return obj, nil + return obj.Reader() } func (p *Packfile) fillRegularObjectContent(obj plumbing.EncodedObject) error { @@ -132,9 +284,10 @@ func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset } var base plumbing.EncodedObject - h, ok := p.offsetToHash[offset] - if ok { - base, ok = p.cacheGet(h) + var ok bool + hash, err := p.FindHash(offset) + if err == nil { + base, ok = p.cacheGet(hash) } if !ok { @@ -173,9 +326,7 @@ func (p *Packfile) cachePut(obj plumbing.EncodedObject) { // The iterator returned is not thread-safe, it should be used in the same // thread as the Packfile instance. func (p *Packfile) GetAll() (storer.EncodedObjectIter, error) { - s := NewScanner(p.File) - - _, count, err := s.Header() + entries, err := p.Entries() if err != nil { return nil, err } @@ -185,8 +336,14 @@ func (p *Packfile) GetAll() (storer.EncodedObjectIter, error) { // instance. To not mess with the seeks, it's a new instance with a // different scanner but the same cache and offset to hash map for // reusing as much cache as possible. - d: &Packfile{p.Index, nil, s, p.deltaBaseCache, p.offsetToHash}, - count: int(count), + p: &Packfile{ + p.Index, + p.File, + NewScanner(p.File), + p.deltaBaseCache, + p.offsetToType, + }, + iter: entries, }, nil } @@ -214,18 +371,17 @@ type objectDecoder interface { } type objectIter struct { - d objectDecoder - count int - pos int + p *Packfile + iter idxfile.EntryIter } func (i *objectIter) Next() (plumbing.EncodedObject, error) { - if i.pos >= i.count { - return nil, io.EOF + e, err := i.iter.Next() + if err != nil { + return nil, err } - i.pos++ - return i.d.nextObject() + return i.p.GetByOffset(int64(e.Offset)) } func (i *objectIter) ForEach(f func(plumbing.EncodedObject) error) error { @@ -245,5 +401,5 @@ func (i *objectIter) ForEach(f func(plumbing.EncodedObject) error) error { } func (i *objectIter) Close() { - i.pos = i.count + i.iter.Close() } diff --git a/plumbing/format/packfile/packfile_test.go b/plumbing/format/packfile/packfile_test.go index 10e4080..0d7a806 100644 --- a/plumbing/format/packfile/packfile_test.go +++ b/plumbing/format/packfile/packfile_test.go @@ -1,14 +1,18 @@ package packfile import ( + "bytes" "io" "math" + "io/ioutil" + . "gopkg.in/check.v1" "gopkg.in/src-d/go-billy.v4/osfs" fixtures "gopkg.in/src-d/go-git-fixtures.v3" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" + "gopkg.in/src-d/go-git.v4/storage/memory" ) type PackfileSuite struct { @@ -104,6 +108,48 @@ var expectedEntries = map[plumbing.Hash]int64{ plumbing.NewHash("fb72698cab7617ac416264415f13224dfd7a165e"): 84671, } +func (s *PackfileSuite) TestContent(c *C) { + storer := memory.NewObjectStorage() + decoder, err := NewDecoder(NewScanner(s.f.Packfile()), storer) + c.Assert(err, IsNil) + + _, err = decoder.Decode() + c.Assert(err, IsNil) + + iter, err := s.p.GetAll() + c.Assert(err, IsNil) + + for { + o, err := iter.Next() + if err == io.EOF { + break + } + c.Assert(err, IsNil) + + o2, err := storer.EncodedObject(plumbing.AnyObject, o.Hash()) + c.Assert(err, IsNil) + + c.Assert(o.Type(), Equals, o2.Type()) + c.Assert(o.Size(), Equals, o2.Size()) + + r, err := o.Reader() + c.Assert(err, IsNil) + + c1, err := ioutil.ReadAll(r) + c.Assert(err, IsNil) + c.Assert(r.Close(), IsNil) + + r, err = o2.Reader() + c.Assert(err, IsNil) + + c2, err := ioutil.ReadAll(r) + c.Assert(err, IsNil) + c.Assert(r.Close(), IsNil) + + c.Assert(bytes.Compare(c1, c2), Equals, 0) + } +} + func (s *PackfileSuite) SetUpTest(c *C) { s.f = fixtures.Basic().One() -- cgit From 823abfeb3d677a74e5bb50b20cbe8cc0306e9075 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Fri, 27 Jul 2018 18:08:55 +0200 Subject: plumbing/idxfile: test FindHash and writer with 64 bit offsets Signed-off-by: Javi Fontan --- plumbing/format/idxfile/idxfile_test.go | 59 +++++++++++++++++++++++++++++---- plumbing/format/idxfile/writer_test.go | 58 ++++++++++++++++++++++++++++++-- 2 files changed, 107 insertions(+), 10 deletions(-) (limited to 'plumbing') diff --git a/plumbing/format/idxfile/idxfile_test.go b/plumbing/format/idxfile/idxfile_test.go index f42a419..d15accf 100644 --- a/plumbing/format/idxfile/idxfile_test.go +++ b/plumbing/format/idxfile/idxfile_test.go @@ -3,15 +3,22 @@ package idxfile_test import ( "bytes" "encoding/base64" + "fmt" "io" "testing" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" + + . "gopkg.in/check.v1" + "gopkg.in/src-d/go-git-fixtures.v3" ) func BenchmarkFindOffset(b *testing.B) { - idx := fixtureIndex(b) + idx, err := fixtureIndex() + if err != nil { + b.Fatalf(err.Error()) + } for i := 0; i < b.N; i++ { for _, h := range fixtureHashes { @@ -24,7 +31,10 @@ func BenchmarkFindOffset(b *testing.B) { } func BenchmarkFindCRC32(b *testing.B) { - idx := fixtureIndex(b) + idx, err := fixtureIndex() + if err != nil { + b.Fatalf(err.Error()) + } for i := 0; i < b.N; i++ { for _, h := range fixtureHashes { @@ -37,7 +47,10 @@ func BenchmarkFindCRC32(b *testing.B) { } func BenchmarkContains(b *testing.B) { - idx := fixtureIndex(b) + idx, err := fixtureIndex() + if err != nil { + b.Fatalf(err.Error()) + } for i := 0; i < b.N; i++ { for _, h := range fixtureHashes { @@ -54,7 +67,10 @@ func BenchmarkContains(b *testing.B) { } func BenchmarkEntries(b *testing.B) { - idx := fixtureIndex(b) + idx, err := fixtureIndex() + if err != nil { + b.Fatalf(err.Error()) + } for i := 0; i < b.N; i++ { iter, err := idx.Entries() @@ -82,6 +98,23 @@ func BenchmarkEntries(b *testing.B) { } } +type IndexSuite struct { + fixtures.Suite +} + +var _ = Suite(&IndexSuite{}) + +func (s *IndexSuite) TestFindHash(c *C) { + idx, err := fixtureIndex() + c.Assert(err, IsNil) + + for i, pos := range fixtureOffsets { + hash, err := idx.FindHash(pos) + c.Assert(err, IsNil) + c.Assert(hash, Equals, fixtureHashes[i]) + } +} + var fixtureHashes = []plumbing.Hash{ plumbing.NewHash("303953e5aa461c203a324821bc1717f9b4fff895"), plumbing.NewHash("5296768e3d9f661387ccbff18c4dea6c997fd78c"), @@ -94,7 +127,19 @@ var fixtureHashes = []plumbing.Hash{ plumbing.NewHash("35858be9c6f5914cbe6768489c41eb6809a2bceb"), } -func fixtureIndex(t testing.TB) *idxfile.MemoryIndex { +var fixtureOffsets = []int64{ + 12, + 142, + 1601322837, + 2646996529, + 3452385606, + 3707047470, + 5323223332, + 5894072943, + 5924278919, +} + +func fixtureIndex() (*idxfile.MemoryIndex, error) { f := bytes.NewBufferString(fixtureLarge4GB) idx := new(idxfile.MemoryIndex) @@ -102,8 +147,8 @@ func fixtureIndex(t testing.TB) *idxfile.MemoryIndex { d := idxfile.NewDecoder(base64.NewDecoder(base64.StdEncoding, f)) err := d.Decode(idx) if err != nil { - t.Fatalf("unexpected error decoding index: %s", err) + return nil, fmt.Errorf("unexpected error decoding index: %s", err) } - return idx + return idx, nil } diff --git a/plumbing/format/idxfile/writer_test.go b/plumbing/format/idxfile/writer_test.go index 51273a3..780acd9 100644 --- a/plumbing/format/idxfile/writer_test.go +++ b/plumbing/format/idxfile/writer_test.go @@ -2,8 +2,10 @@ package idxfile_test import ( "bytes" + "encoding/base64" "io/ioutil" + "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" @@ -11,13 +13,13 @@ import ( "gopkg.in/src-d/go-git-fixtures.v3" ) -type IndexSuite struct { +type WriterSuite struct { fixtures.Suite } -var _ = Suite(&IndexSuite{}) +var _ = Suite(&WriterSuite{}) -func (s *IndexSuite) TestIndexWriter(c *C) { +func (s *WriterSuite) TestWriter(c *C) { f := fixtures.Basic().One() scanner := packfile.NewScanner(f.Packfile()) @@ -43,3 +45,53 @@ func (s *IndexSuite) TestIndexWriter(c *C) { c.Assert(buf.Bytes(), DeepEquals, expected) } + +func (s *WriterSuite) TestWriterLarge(c *C) { + writer := new(idxfile.Writer) + err := writer.OnHeader(uint32(len(fixture4GbEntries))) + c.Assert(err, IsNil) + + for _, o := range fixture4GbEntries { + err = writer.OnInflatedObjectContent(plumbing.NewHash(o.hash), o.offset, o.crc) + c.Assert(err, IsNil) + } + + err = writer.OnFooter(fixture4GbChecksum) + c.Assert(err, IsNil) + + idx, err := writer.Index() + c.Assert(err, IsNil) + + // load fixture index + f := bytes.NewBufferString(fixtureLarge4GB) + expected, err := ioutil.ReadAll(base64.NewDecoder(base64.StdEncoding, f)) + c.Assert(err, IsNil) + + buf := new(bytes.Buffer) + encoder := idxfile.NewEncoder(buf) + n, err := encoder.Encode(idx) + c.Assert(err, IsNil) + c.Assert(n, Equals, len(expected)) + + c.Assert(buf.Bytes(), DeepEquals, expected) +} + +var ( + fixture4GbChecksum = plumbing.NewHash("afabc2269205cf85da1bf7e2fdff42f73810f29b") + + fixture4GbEntries = []struct { + offset int64 + hash string + crc uint32 + }{ + {12, "303953e5aa461c203a324821bc1717f9b4fff895", 0xbc347c4c}, + {142, "5296768e3d9f661387ccbff18c4dea6c997fd78c", 0xcdc22842}, + {1601322837, "03fc8d58d44267274edef4585eaeeb445879d33f", 0x929dfaaa}, + {2646996529, "8f3ceb4ea4cb9e4a0f751795eb41c9a4f07be772", 0xa61def8a}, + {3452385606, "e0d1d625010087f79c9e01ad9d8f95e1628dda02", 0x06bea180}, + {3707047470, "90eba326cdc4d1d61c5ad25224ccbf08731dd041", 0x7193f3ba}, + {5323223332, "bab53055add7bc35882758a922c54a874d6b1272", 0xac269b8e}, + {5894072943, "1b8995f51987d8a449ca5ea4356595102dc2fbd4", 0x2187c056}, + {5924278919, "35858be9c6f5914cbe6768489c41eb6809a2bceb", 0x9c89d9d2}, + } +) -- cgit From b4cd0899e24e0e8c7910bcdc33c96dc463dcb1e4 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Fri, 27 Jul 2018 18:31:40 +0200 Subject: plumbing/packfile: add index generation to decoder Signed-off-by: Javi Fontan --- plumbing/format/packfile/decoder.go | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) (limited to 'plumbing') diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go index edf386b..d6bc0ef 100644 --- a/plumbing/format/packfile/decoder.go +++ b/plumbing/format/packfile/decoder.go @@ -66,6 +66,7 @@ type Decoder struct { // will be built incrementally while decoding. hasBuiltIndex bool idx idxfile.Index + writer *idxfile.Writer offsetToType map[int64]plumbing.ObjectType decoderType plumbing.ObjectType @@ -144,7 +145,17 @@ func (d *Decoder) Decode() (checksum plumbing.Hash, err error) { return plumbing.ZeroHash, err } - return d.s.Checksum() + checksum, err = d.s.Checksum() + if err != nil { + return plumbing.ZeroHash, err + } + + if !d.hasBuiltIndex { + d.writer.OnFooter(checksum) + d.idx = d.Index() + } + + return checksum, err } func (d *Decoder) fillOffsetsToHashes() error { @@ -177,6 +188,8 @@ func (d *Decoder) doDecode() error { if !d.hasBuiltIndex { // TODO: MemoryIndex is not writable, change to something else d.idx = idxfile.NewMemoryIndex() + d.writer = new(idxfile.Writer) + d.writer.OnHeader(count) } defer func() { d.hasBuiltIndex = true }() @@ -329,14 +342,15 @@ func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error obj.SetSize(h.Length) obj.SetType(h.Type) + var crc uint32 var err error switch h.Type { case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: - _, err = d.fillRegularObjectContent(obj) + crc, err = d.fillRegularObjectContent(obj) case plumbing.REFDeltaObject: - _, err = d.fillREFDeltaObjectContent(obj, h.Reference) + crc, err = d.fillREFDeltaObjectContent(obj, h.Reference) case plumbing.OFSDeltaObject: - _, err = d.fillOFSDeltaObjectContent(obj, h.OffsetReference) + crc, err = d.fillOFSDeltaObjectContent(obj, h.OffsetReference) default: err = ErrInvalidObject.AddDetails("type %q", h.Type) } @@ -345,6 +359,10 @@ func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error return obj, err } + if !d.hasBuiltIndex { + d.writer.Add(obj.Hash(), uint64(h.Offset), crc) + } + d.offsetToHash[h.Offset] = obj.Hash() return obj, nil @@ -468,9 +486,9 @@ func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) { return d.DecodeObjectAt(o) } - hash, err := d.idx.FindHash(o) - if err != nil { - return nil, err + hash, ok := d.offsetToHash[o] + if !ok { + return nil, plumbing.ErrObjectNotFound } return d.recallByHashNonSeekable(hash) -- cgit From 6f7fc05543861ee074aa17f75e1d1b5c1b948d48 Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Mon, 30 Jul 2018 17:11:01 +0200 Subject: plumbing: packfile, fix package tests Signed-off-by: Miguel Molina --- plumbing/format/idxfile/idxfile.go | 8 ++++ plumbing/format/idxfile/writer.go | 11 ++++- plumbing/format/packfile/decoder.go | 28 ++++++++---- plumbing/format/packfile/decoder_test.go | 72 +++++++++++++++++++++---------- plumbing/format/packfile/packfile_test.go | 2 +- 5 files changed, 88 insertions(+), 33 deletions(-) (limited to 'plumbing') diff --git a/plumbing/format/idxfile/idxfile.go b/plumbing/format/idxfile/idxfile.go index d4a9365..71c7630 100644 --- a/plumbing/format/idxfile/idxfile.go +++ b/plumbing/format/idxfile/idxfile.go @@ -67,6 +67,10 @@ func (idx *MemoryIndex) findHashIndex(h plumbing.Hash) int { return -1 } + if len(idx.Names) <= k { + return -1 + } + data := idx.Names[k] high := uint64(len(idx.Offset32[k])) >> 2 if high == 0 { @@ -103,6 +107,10 @@ func (idx *MemoryIndex) Contains(h plumbing.Hash) (bool, error) { // FindOffset implements the Index interface. func (idx *MemoryIndex) FindOffset(h plumbing.Hash) (int64, error) { + if len(idx.FanoutMapping) <= int(h[0]) { + return 0, plumbing.ErrObjectNotFound + } + k := idx.FanoutMapping[h[0]] i := idx.findHashIndex(h) if i < 0 { diff --git a/plumbing/format/idxfile/writer.go b/plumbing/format/idxfile/writer.go index efcdcc6..a22cf16 100644 --- a/plumbing/format/idxfile/writer.go +++ b/plumbing/format/idxfile/writer.go @@ -25,6 +25,7 @@ type Writer struct { offset64 uint32 finished bool index *MemoryIndex + added map[plumbing.Hash]struct{} } // Index returns a previously created MemoryIndex or creates a new one if @@ -45,7 +46,15 @@ func (w *Writer) Add(h plumbing.Hash, pos uint64, crc uint32) { w.m.Lock() defer w.m.Unlock() - w.objects = append(w.objects, Entry{h, crc, pos}) + if w.added == nil { + w.added = make(map[plumbing.Hash]struct{}) + } + + if _, ok := w.added[h]; !ok { + w.added[h] = struct{}{} + w.objects = append(w.objects, Entry{h, crc, pos}) + } + } func (w *Writer) Finished() bool { diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go index d6bc0ef..6bb0677 100644 --- a/plumbing/format/packfile/decoder.go +++ b/plumbing/format/packfile/decoder.go @@ -122,6 +122,7 @@ func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer, deltaBaseCache: cacheObject, idx: idxfile.NewMemoryIndex(), + writer: new(idxfile.Writer), offsetToType: make(map[int64]plumbing.ObjectType), offsetToHash: make(map[int64]plumbing.Hash), decoderType: t, @@ -152,7 +153,12 @@ func (d *Decoder) Decode() (checksum plumbing.Hash, err error) { if !d.hasBuiltIndex { d.writer.OnFooter(checksum) - d.idx = d.Index() + + idx, err := d.writer.Index() + if err != nil { + return plumbing.ZeroHash, err + } + d.SetIndex(idx) } return checksum, err @@ -186,12 +192,8 @@ func (d *Decoder) doDecode() error { } if !d.hasBuiltIndex { - // TODO: MemoryIndex is not writable, change to something else - d.idx = idxfile.NewMemoryIndex() - d.writer = new(idxfile.Writer) d.writer.OnHeader(count) } - defer func() { d.hasBuiltIndex = true }() if d.hasBuiltIndex && !d.s.IsSeekable { if err := d.fillOffsetsToHashes(); err != nil { @@ -202,12 +204,18 @@ func (d *Decoder) doDecode() error { _, isTxStorer := d.o.(storer.Transactioner) switch { case d.o == nil: - return d.decodeObjects(int(count)) + err = d.decodeObjects(int(count)) case isTxStorer: - return d.decodeObjectsWithObjectStorerTx(int(count)) + err = d.decodeObjectsWithObjectStorerTx(int(count)) default: - return d.decodeObjectsWithObjectStorer(int(count)) + err = d.decodeObjectsWithObjectStorer(int(count)) + } + + if err != nil { + return err } + + return nil } func (d *Decoder) decodeObjects(count int) error { @@ -509,8 +517,10 @@ func (d *Decoder) recallByHash(h plumbing.Hash) (plumbing.EncodedObject, error) func (d *Decoder) recallByHashNonSeekable(h plumbing.Hash) (obj plumbing.EncodedObject, err error) { if d.tx != nil { obj, err = d.tx.EncodedObject(plumbing.AnyObject, h) - } else { + } else if d.o != nil { obj, err = d.o.EncodedObject(plumbing.AnyObject, h) + } else { + return nil, plumbing.ErrObjectNotFound } if err != plumbing.ErrObjectNotFound { diff --git a/plumbing/format/packfile/decoder_test.go b/plumbing/format/packfile/decoder_test.go index 4fe9b5e..d4f7145 100644 --- a/plumbing/format/packfile/decoder_test.go +++ b/plumbing/format/packfile/decoder_test.go @@ -5,6 +5,7 @@ import ( "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/cache" + "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" "gopkg.in/src-d/go-git.v4/plumbing/storer" "gopkg.in/src-d/go-git.v4/storage/filesystem" @@ -46,7 +47,6 @@ func (s *ReaderSuite) TestDecode(c *C) { }) } -/* func (s *ReaderSuite) TestDecodeByTypeRefDelta(c *C) { f := fixtures.Basic().ByTag("ref-delta").One() @@ -101,9 +101,7 @@ func (s *ReaderSuite) TestDecodeByTypeRefDeltaError(c *C) { }) } -*/ -/* func (s *ReaderSuite) TestDecodeByType(c *C) { ts := []plumbing.ObjectType{ plumbing.CommitObject, @@ -142,7 +140,6 @@ func (s *ReaderSuite) TestDecodeByType(c *C) { } }) } -*/ func (s *ReaderSuite) TestDecodeByTypeConstructor(c *C) { f := fixtures.Basic().ByTag("packfile").One() @@ -184,7 +181,7 @@ func (s *ReaderSuite) TestDecodeMultipleTimes(c *C) { func (s *ReaderSuite) TestDecodeInMemory(c *C) { fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoder(scanner, nil) + d, err := packfile.NewDecoder(scanner, memory.NewStorage()) c.Assert(err, IsNil) ch, err := d.Decode() @@ -284,7 +281,6 @@ var expectedHashes = []string{ "7e59600739c96546163833214c36459e324bad0a", } -/* func (s *ReaderSuite) TestDecodeCRCs(c *C) { f := fixtures.Basic().ByTag("ofs-delta").One() @@ -297,8 +293,16 @@ func (s *ReaderSuite) TestDecodeCRCs(c *C) { c.Assert(err, IsNil) var sum uint64 - idx := d.Index().ToIdxFile() - for _, e := range idx.Entries { + iter, err := d.Index().Entries() + c.Assert(err, IsNil) + + for { + e, err := iter.Next() + if err == io.EOF { + break + } + + c.Assert(err, IsNil) sum += uint64(e.CRC32) } @@ -349,12 +353,30 @@ func (s *ReaderSuite) TestIndex(c *C) { d, err := packfile.NewDecoder(scanner, nil) c.Assert(err, IsNil) - c.Assert(d.Index().ToIdxFile().Entries, HasLen, 0) + c.Assert(indexEntries(c, d), Equals, 0) _, err = d.Decode() c.Assert(err, IsNil) - c.Assert(len(d.Index().ToIdxFile().Entries), Equals, 31) + c.Assert(indexEntries(c, d), Equals, 31) +} + +func indexEntries(c *C, d *packfile.Decoder) int { + var count int + entries, err := d.Index().Entries() + c.Assert(err, IsNil) + + for { + _, err := entries.Next() + if err == io.EOF { + break + } + + c.Assert(err, IsNil) + count++ + } + + return count } func (s *ReaderSuite) TestSetIndex(c *C) { @@ -363,18 +385,25 @@ func (s *ReaderSuite) TestSetIndex(c *C) { d, err := packfile.NewDecoder(scanner, nil) c.Assert(err, IsNil) - idx := packfile.NewIndex(1) + w := new(idxfile.Writer) h := plumbing.NewHash("6ecf0ef2c2dffb796033e5a02219af86ec6584e5") - idx.Add(h, uint64(42), 0) + w.Add(h, uint64(42), 0) + w.OnFooter(plumbing.ZeroHash) + + var idx idxfile.Index + idx, err = w.Index() + c.Assert(err, IsNil) d.SetIndex(idx) - idxf := d.Index().ToIdxFile() - c.Assert(idxf.Entries, HasLen, 1) - c.Assert(idxf.Entries[0].Offset, Equals, uint64(42)) -}*/ + idx = d.Index() + c.Assert(indexEntries(c, d), Equals, 1) -func assertObjects(c *C, s storer.EncodedObjectStorer, expects []string) { + offset, err := idx.FindOffset(h) + c.Assert(err, IsNil) + c.Assert(offset, Equals, int64(42)) +} +func assertObjects(c *C, s storer.EncodedObjectStorer, expects []string) { i, err := s.IterEncodedObjects(plumbing.AnyObject) c.Assert(err, IsNil) @@ -390,13 +419,12 @@ func assertObjects(c *C, s storer.EncodedObjectStorer, expects []string) { } } -/* -func getIndexFromIdxFile(r io.Reader) *packfile.Index { - idxf := idxfile.NewIdxfile() +func getIndexFromIdxFile(r io.Reader) idxfile.Index { + idxf := idxfile.NewMemoryIndex() d := idxfile.NewDecoder(r) if err := d.Decode(idxf); err != nil { panic(err) } - return packfile.NewIndexFromIdxFile(idxf) -}*/ + return idxf +} diff --git a/plumbing/format/packfile/packfile_test.go b/plumbing/format/packfile/packfile_test.go index 0d7a806..a17a483 100644 --- a/plumbing/format/packfile/packfile_test.go +++ b/plumbing/format/packfile/packfile_test.go @@ -109,7 +109,7 @@ var expectedEntries = map[plumbing.Hash]int64{ } func (s *PackfileSuite) TestContent(c *C) { - storer := memory.NewObjectStorage() + storer := memory.NewStorage() decoder, err := NewDecoder(NewScanner(s.f.Packfile()), storer) c.Assert(err, IsNil) -- cgit From 6a24b4c1f0cb9e5daf30fa7979f2643a967af1ad Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Tue, 7 Aug 2018 18:41:19 +0200 Subject: *: use parser to populate non writable storages and bug fixes Signed-off-by: Miguel Molina --- plumbing/format/idxfile/writer.go | 2 +- plumbing/format/idxfile/writer_test.go | 2 +- plumbing/format/packfile/common.go | 79 +++- plumbing/format/packfile/decoder.go | 553 ---------------------- plumbing/format/packfile/decoder_test.go | 430 ----------------- plumbing/format/packfile/encoder_advanced_test.go | 37 +- plumbing/format/packfile/encoder_test.go | 110 +++-- plumbing/format/packfile/packfile.go | 135 ++++-- plumbing/format/packfile/packfile_test.go | 169 +++++-- plumbing/format/packfile/parser.go | 130 +++-- plumbing/format/packfile/parser_test.go | 2 +- plumbing/object/blob_test.go | 23 +- plumbing/object/difftree_test.go | 16 +- plumbing/object/object_test.go | 5 +- plumbing/transport/test/receive_pack.go | 8 +- plumbing/transport/test/upload_pack.go | 5 +- 16 files changed, 525 insertions(+), 1181 deletions(-) delete mode 100644 plumbing/format/packfile/decoder.go delete mode 100644 plumbing/format/packfile/decoder_test.go (limited to 'plumbing') diff --git a/plumbing/format/idxfile/writer.go b/plumbing/format/idxfile/writer.go index a22cf16..89b79cd 100644 --- a/plumbing/format/idxfile/writer.go +++ b/plumbing/format/idxfile/writer.go @@ -74,7 +74,7 @@ func (w *Writer) OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, po } // OnInflatedObjectContent implements packfile.Observer interface. -func (w *Writer) OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32) error { +func (w *Writer) OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32, _ []byte) error { w.Add(h, uint64(pos), crc) return nil } diff --git a/plumbing/format/idxfile/writer_test.go b/plumbing/format/idxfile/writer_test.go index 780acd9..7c3cceb 100644 --- a/plumbing/format/idxfile/writer_test.go +++ b/plumbing/format/idxfile/writer_test.go @@ -52,7 +52,7 @@ func (s *WriterSuite) TestWriterLarge(c *C) { c.Assert(err, IsNil) for _, o := range fixture4GbEntries { - err = writer.OnInflatedObjectContent(plumbing.NewHash(o.hash), o.offset, o.crc) + err = writer.OnInflatedObjectContent(plumbing.NewHash(o.hash), o.offset, o.crc, nil) c.Assert(err, IsNil) } diff --git a/plumbing/format/packfile/common.go b/plumbing/format/packfile/common.go index beb015d..76254f0 100644 --- a/plumbing/format/packfile/common.go +++ b/plumbing/format/packfile/common.go @@ -2,9 +2,11 @@ package packfile import ( "bytes" + "errors" "io" "sync" + "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/storer" "gopkg.in/src-d/go-git.v4/utils/ioutil" ) @@ -23,24 +25,24 @@ const ( maskType = uint8(112) // 0111 0000 ) -// UpdateObjectStorage updates the given storer.EncodedObjectStorer with the contents of the +// UpdateObjectStorage updates the storer with the objects in the given // packfile. -func UpdateObjectStorage(s storer.EncodedObjectStorer, packfile io.Reader) error { - if sw, ok := s.(storer.PackfileWriter); ok { - return writePackfileToObjectStorage(sw, packfile) +func UpdateObjectStorage(s storer.Storer, packfile io.Reader) error { + if pw, ok := s.(storer.PackfileWriter); ok { + return WritePackfileToObjectStorage(pw, packfile) } - stream := NewScanner(packfile) - d, err := NewDecoder(stream, s) - if err != nil { - return err - } - - _, err = d.Decode() + updater := newPackfileStorageUpdater(s) + _, err := NewParser(NewScanner(packfile), updater).Parse() return err } -func writePackfileToObjectStorage(sw storer.PackfileWriter, packfile io.Reader) (err error) { +// WritePackfileToObjectStorage writes all the packfile objects into the given +// object storage. +func WritePackfileToObjectStorage( + sw storer.PackfileWriter, + packfile io.Reader, +) (err error) { w, err := sw.PackfileWriter() if err != nil { return err @@ -56,3 +58,56 @@ var bufPool = sync.Pool{ return bytes.NewBuffer(nil) }, } + +var errMissingObjectContent = errors.New("missing object content") + +type packfileStorageUpdater struct { + storer.Storer + lastSize int64 + lastType plumbing.ObjectType +} + +func newPackfileStorageUpdater(s storer.Storer) *packfileStorageUpdater { + return &packfileStorageUpdater{Storer: s} +} + +func (p *packfileStorageUpdater) OnHeader(count uint32) error { + return nil +} + +func (p *packfileStorageUpdater) OnInflatedObjectHeader( + t plumbing.ObjectType, + objSize int64, + pos int64, +) error { + if p.lastSize > 0 || p.lastType != plumbing.InvalidObject { + return errMissingObjectContent + } + + p.lastType = t + p.lastSize = objSize + return nil +} + +func (p *packfileStorageUpdater) OnInflatedObjectContent( + h plumbing.Hash, + pos int64, + crc uint32, + content []byte, +) error { + obj := new(plumbing.MemoryObject) + obj.SetSize(p.lastSize) + obj.SetType(p.lastType) + if _, err := obj.Write(content); err != nil { + return err + } + + _, err := p.SetEncodedObject(obj) + p.lastSize = 0 + p.lastType = plumbing.InvalidObject + return err +} + +func (p *packfileStorageUpdater) OnFooter(h plumbing.Hash) error { + return nil +} diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go deleted file mode 100644 index 6bb0677..0000000 --- a/plumbing/format/packfile/decoder.go +++ /dev/null @@ -1,553 +0,0 @@ -package packfile - -import ( - "bytes" - "io" - - "gopkg.in/src-d/go-git.v4/plumbing" - "gopkg.in/src-d/go-git.v4/plumbing/cache" - "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" - "gopkg.in/src-d/go-git.v4/plumbing/storer" -) - -// Format specifies if the packfile uses ref-deltas or ofs-deltas. -type Format int - -// Possible values of the Format type. -const ( - UnknownFormat Format = iota - OFSDeltaFormat - REFDeltaFormat -) - -var ( - // ErrMaxObjectsLimitReached is returned by Decode when the number - // of objects in the packfile is higher than - // Decoder.MaxObjectsLimit. - ErrMaxObjectsLimitReached = NewError("max. objects limit reached") - // ErrInvalidObject is returned by Decode when an invalid object is - // found in the packfile. - ErrInvalidObject = NewError("invalid git object") - // ErrPackEntryNotFound is returned by Decode when a reference in - // the packfile references and unknown object. - ErrPackEntryNotFound = NewError("can't find a pack entry") - // ErrZLib is returned by Decode when there was an error unzipping - // the packfile contents. - ErrZLib = NewError("zlib reading error") - // ErrCannotRecall is returned by RecallByOffset or RecallByHash if the object - // to recall cannot be returned. - ErrCannotRecall = NewError("cannot recall object") - // ErrResolveDeltasNotSupported is returned if a NewDecoder is used with a - // non-seekable scanner and without a plumbing.ObjectStorage - ErrResolveDeltasNotSupported = NewError("resolve delta is not supported") - // ErrNonSeekable is returned if a ReadObjectAt method is called without a - // seekable scanner - ErrNonSeekable = NewError("non-seekable scanner") - // ErrRollback error making Rollback over a transaction after an error - ErrRollback = NewError("rollback error, during set error") - // ErrAlreadyDecoded is returned if NewDecoder is called for a second time - ErrAlreadyDecoded = NewError("packfile was already decoded") -) - -// Decoder reads and decodes packfiles from an input Scanner, if an ObjectStorer -// was provided the decoded objects are store there. If not the decode object -// is destroyed. The Offsets and CRCs are calculated whether an -// ObjectStorer was provided or not. -type Decoder struct { - deltaBaseCache cache.Object - - s *Scanner - o storer.EncodedObjectStorer - tx storer.Transaction - - isDecoded bool - - // hasBuiltIndex indicates if the index is fully built or not. If it is not, - // will be built incrementally while decoding. - hasBuiltIndex bool - idx idxfile.Index - writer *idxfile.Writer - - offsetToType map[int64]plumbing.ObjectType - decoderType plumbing.ObjectType - offsetToHash map[int64]plumbing.Hash -} - -// NewDecoder returns a new Decoder that decodes a Packfile using the given -// Scanner and stores the objects in the provided EncodedObjectStorer. ObjectStorer can be nil, in this -// If the passed EncodedObjectStorer is nil, objects are not stored, but -// offsets on the Packfile and CRCs are calculated. -// -// If EncodedObjectStorer is nil and the Scanner is not Seekable, ErrNonSeekable is -// returned. -// -// If the ObjectStorer implements storer.Transactioner, a transaction is created -// during the Decode execution. If anything fails, Rollback is called -func NewDecoder(s *Scanner, o storer.EncodedObjectStorer) (*Decoder, error) { - return NewDecoderForType(s, o, plumbing.AnyObject, - cache.NewObjectLRUDefault()) -} - -// NewDecoderWithCache is a version of NewDecoder where cache can be specified. -func NewDecoderWithCache(s *Scanner, o storer.EncodedObjectStorer, - cacheObject cache.Object) (*Decoder, error) { - - return NewDecoderForType(s, o, plumbing.AnyObject, cacheObject) -} - -// NewDecoderForType returns a new Decoder but in this case for a specific object type. -// When an object is read using this Decoder instance and it is not of the same type of -// the specified one, nil will be returned. This is intended to avoid the content -// deserialization of all the objects. -// -// cacheObject is a cache.Object implementation that is used to speed up the -// process. If cache is not needed you can pass nil. To create an LRU cache -// object with the default size you can use the helper cache.ObjectLRUDefault(). -func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer, - t plumbing.ObjectType, cacheObject cache.Object) (*Decoder, error) { - - if t == plumbing.OFSDeltaObject || - t == plumbing.REFDeltaObject || - t == plumbing.InvalidObject { - return nil, plumbing.ErrInvalidType - } - - if !canResolveDeltas(s, o) { - return nil, ErrResolveDeltasNotSupported - } - - return &Decoder{ - s: s, - o: o, - deltaBaseCache: cacheObject, - - idx: idxfile.NewMemoryIndex(), - writer: new(idxfile.Writer), - offsetToType: make(map[int64]plumbing.ObjectType), - offsetToHash: make(map[int64]plumbing.Hash), - decoderType: t, - }, nil -} - -func canResolveDeltas(s *Scanner, o storer.EncodedObjectStorer) bool { - return s.IsSeekable || o != nil -} - -// Decode reads a packfile and stores it in the value pointed to by s. The -// offsets and the CRCs are calculated by this method -func (d *Decoder) Decode() (checksum plumbing.Hash, err error) { - defer func() { d.isDecoded = true }() - - if d.isDecoded { - return plumbing.ZeroHash, ErrAlreadyDecoded - } - - if err := d.doDecode(); err != nil { - return plumbing.ZeroHash, err - } - - checksum, err = d.s.Checksum() - if err != nil { - return plumbing.ZeroHash, err - } - - if !d.hasBuiltIndex { - d.writer.OnFooter(checksum) - - idx, err := d.writer.Index() - if err != nil { - return plumbing.ZeroHash, err - } - d.SetIndex(idx) - } - - return checksum, err -} - -func (d *Decoder) fillOffsetsToHashes() error { - entries, err := d.idx.Entries() - if err != nil { - return err - } - - for { - e, err := entries.Next() - if err != nil { - if err == io.EOF { - break - } - return err - } - - d.offsetToHash[int64(e.Offset)] = e.Hash - } - - return entries.Close() -} - -func (d *Decoder) doDecode() error { - _, count, err := d.s.Header() - if err != nil { - return err - } - - if !d.hasBuiltIndex { - d.writer.OnHeader(count) - } - - if d.hasBuiltIndex && !d.s.IsSeekable { - if err := d.fillOffsetsToHashes(); err != nil { - return err - } - } - - _, isTxStorer := d.o.(storer.Transactioner) - switch { - case d.o == nil: - err = d.decodeObjects(int(count)) - case isTxStorer: - err = d.decodeObjectsWithObjectStorerTx(int(count)) - default: - err = d.decodeObjectsWithObjectStorer(int(count)) - } - - if err != nil { - return err - } - - return nil -} - -func (d *Decoder) decodeObjects(count int) error { - for i := 0; i < count; i++ { - if _, err := d.DecodeObject(); err != nil { - return err - } - } - - return nil -} - -func (d *Decoder) decodeObjectsWithObjectStorer(count int) error { - for i := 0; i < count; i++ { - obj, err := d.DecodeObject() - if err != nil { - return err - } - - if _, err := d.o.SetEncodedObject(obj); err != nil { - return err - } - } - - return nil -} - -func (d *Decoder) decodeObjectsWithObjectStorerTx(count int) error { - d.tx = d.o.(storer.Transactioner).Begin() - - for i := 0; i < count; i++ { - obj, err := d.DecodeObject() - if err != nil { - return err - } - - if _, err := d.tx.SetEncodedObject(obj); err != nil { - if rerr := d.tx.Rollback(); rerr != nil { - return ErrRollback.AddDetails( - "error: %s, during tx.Set error: %s", rerr, err, - ) - } - - return err - } - - } - - return d.tx.Commit() -} - -// DecodeObject reads the next object from the scanner and returns it. This -// method can be used in replacement of the Decode method, to work in a -// interactive way. If you created a new decoder instance using NewDecoderForType -// constructor, if the object decoded is not equals to the specified one, nil will -// be returned -func (d *Decoder) DecodeObject() (plumbing.EncodedObject, error) { - return d.doDecodeObject(d.decoderType) -} - -func (d *Decoder) doDecodeObject(t plumbing.ObjectType) (plumbing.EncodedObject, error) { - h, err := d.s.NextObjectHeader() - if err != nil { - return nil, err - } - - if t == plumbing.AnyObject { - return d.decodeByHeader(h) - } - - return d.decodeIfSpecificType(h) -} - -func (d *Decoder) decodeIfSpecificType(h *ObjectHeader) (plumbing.EncodedObject, error) { - var ( - obj plumbing.EncodedObject - realType plumbing.ObjectType - err error - ) - switch h.Type { - case plumbing.OFSDeltaObject: - realType, err = d.ofsDeltaType(h.OffsetReference) - case plumbing.REFDeltaObject: - realType, err = d.refDeltaType(h.Reference) - if err == plumbing.ErrObjectNotFound { - obj, err = d.decodeByHeader(h) - if err != nil { - realType = obj.Type() - } - } - default: - realType = h.Type - } - - if err != nil { - return nil, err - } - - d.offsetToType[h.Offset] = realType - - if d.decoderType == realType { - if obj != nil { - return obj, nil - } - - return d.decodeByHeader(h) - } - - return nil, nil -} - -func (d *Decoder) ofsDeltaType(offset int64) (plumbing.ObjectType, error) { - t, ok := d.offsetToType[offset] - if !ok { - return plumbing.InvalidObject, plumbing.ErrObjectNotFound - } - - return t, nil -} - -func (d *Decoder) refDeltaType(ref plumbing.Hash) (plumbing.ObjectType, error) { - offset, err := d.idx.FindOffset(ref) - if err != nil { - return plumbing.InvalidObject, plumbing.ErrObjectNotFound - } - - return d.ofsDeltaType(offset) -} - -func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error) { - obj := d.newObject() - obj.SetSize(h.Length) - obj.SetType(h.Type) - - var crc uint32 - var err error - switch h.Type { - case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: - crc, err = d.fillRegularObjectContent(obj) - case plumbing.REFDeltaObject: - crc, err = d.fillREFDeltaObjectContent(obj, h.Reference) - case plumbing.OFSDeltaObject: - crc, err = d.fillOFSDeltaObjectContent(obj, h.OffsetReference) - default: - err = ErrInvalidObject.AddDetails("type %q", h.Type) - } - - if err != nil { - return obj, err - } - - if !d.hasBuiltIndex { - d.writer.Add(obj.Hash(), uint64(h.Offset), crc) - } - - d.offsetToHash[h.Offset] = obj.Hash() - - return obj, nil -} - -func (d *Decoder) newObject() plumbing.EncodedObject { - if d.o == nil { - return &plumbing.MemoryObject{} - } - - return d.o.NewEncodedObject() -} - -// DecodeObjectAt reads an object at the given location. Every EncodedObject -// returned is added into a internal index. This is intended to be able to regenerate -// objects from deltas (offset deltas or reference deltas) without an package index -// (.idx file). If Decode wasn't called previously objects offset should provided -// using the SetOffsets method. It decodes the object regardless of the Decoder -// type. -func (d *Decoder) DecodeObjectAt(offset int64) (plumbing.EncodedObject, error) { - if !d.s.IsSeekable { - return nil, ErrNonSeekable - } - - beforeJump, err := d.s.SeekFromStart(offset) - if err != nil { - return nil, err - } - - defer func() { - _, seekErr := d.s.SeekFromStart(beforeJump) - if err == nil { - err = seekErr - } - }() - - return d.doDecodeObject(plumbing.AnyObject) -} - -func (d *Decoder) fillRegularObjectContent(obj plumbing.EncodedObject) (uint32, error) { - w, err := obj.Writer() - if err != nil { - return 0, err - } - - _, crc, err := d.s.NextObject(w) - return crc, err -} - -func (d *Decoder) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plumbing.Hash) (uint32, error) { - buf := bufPool.Get().(*bytes.Buffer) - buf.Reset() - _, crc, err := d.s.NextObject(buf) - if err != nil { - return 0, err - } - - base, ok := d.cacheGet(ref) - if !ok { - base, err = d.recallByHash(ref) - if err != nil { - return 0, err - } - } - - obj.SetType(base.Type()) - err = ApplyDelta(obj, base, buf.Bytes()) - d.cachePut(obj) - bufPool.Put(buf) - - return crc, err -} - -func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset int64) (uint32, error) { - buf := bytes.NewBuffer(nil) - _, crc, err := d.s.NextObject(buf) - if err != nil { - return 0, err - } - - h, ok := d.offsetToHash[offset] - var base plumbing.EncodedObject - if ok { - base, ok = d.cacheGet(h) - } - - if !ok { - base, err = d.recallByOffset(offset) - if err != nil { - return 0, err - } - - d.cachePut(base) - } - - obj.SetType(base.Type()) - err = ApplyDelta(obj, base, buf.Bytes()) - d.cachePut(obj) - - return crc, err -} - -func (d *Decoder) cacheGet(h plumbing.Hash) (plumbing.EncodedObject, bool) { - if d.deltaBaseCache == nil { - return nil, false - } - - return d.deltaBaseCache.Get(h) -} - -func (d *Decoder) cachePut(obj plumbing.EncodedObject) { - if d.deltaBaseCache == nil { - return - } - - d.deltaBaseCache.Put(obj) -} - -func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) { - if d.s.IsSeekable { - return d.DecodeObjectAt(o) - } - - hash, ok := d.offsetToHash[o] - if !ok { - return nil, plumbing.ErrObjectNotFound - } - - return d.recallByHashNonSeekable(hash) -} - -func (d *Decoder) recallByHash(h plumbing.Hash) (plumbing.EncodedObject, error) { - if d.s.IsSeekable { - if offset, err := d.idx.FindOffset(h); err == nil { - return d.DecodeObjectAt(offset) - } - } - - return d.recallByHashNonSeekable(h) -} - -// recallByHashNonSeekable if we are in a transaction the objects are read from -// the transaction, if not are directly read from the ObjectStorer -func (d *Decoder) recallByHashNonSeekable(h plumbing.Hash) (obj plumbing.EncodedObject, err error) { - if d.tx != nil { - obj, err = d.tx.EncodedObject(plumbing.AnyObject, h) - } else if d.o != nil { - obj, err = d.o.EncodedObject(plumbing.AnyObject, h) - } else { - return nil, plumbing.ErrObjectNotFound - } - - if err != plumbing.ErrObjectNotFound { - return obj, err - } - - return nil, plumbing.ErrObjectNotFound -} - -// SetIndex sets an index for the packfile. It is recommended to set this. -// The index might be read from a file or reused from a previous Decoder usage -// (see Index function). -func (d *Decoder) SetIndex(idx idxfile.Index) { - d.hasBuiltIndex = true - d.idx = idx -} - -// Index returns the index for the packfile. If index was set with SetIndex, -// Index will return it. Otherwise, it will return an index that is built while -// decoding. If neither SetIndex was called with a full index or Decode called -// for the whole packfile, then the returned index will be incomplete. -func (d *Decoder) Index() idxfile.Index { - return d.idx -} - -// Close closes the Scanner. usually this mean that the whole reader is read and -// discarded -func (d *Decoder) Close() error { - return d.s.Close() -} diff --git a/plumbing/format/packfile/decoder_test.go b/plumbing/format/packfile/decoder_test.go deleted file mode 100644 index d4f7145..0000000 --- a/plumbing/format/packfile/decoder_test.go +++ /dev/null @@ -1,430 +0,0 @@ -package packfile_test - -import ( - "io" - - "gopkg.in/src-d/go-git.v4/plumbing" - "gopkg.in/src-d/go-git.v4/plumbing/cache" - "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" - "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" - "gopkg.in/src-d/go-git.v4/plumbing/storer" - "gopkg.in/src-d/go-git.v4/storage/filesystem" - "gopkg.in/src-d/go-git.v4/storage/memory" - - . "gopkg.in/check.v1" - "gopkg.in/src-d/go-billy.v4/memfs" - "gopkg.in/src-d/go-git-fixtures.v3" -) - -type ReaderSuite struct { - fixtures.Suite -} - -var _ = Suite(&ReaderSuite{}) - -func (s *ReaderSuite) TestNewDecodeNonSeekable(c *C) { - scanner := packfile.NewScanner(nil) - d, err := packfile.NewDecoder(scanner, nil) - - c.Assert(d, IsNil) - c.Assert(err, NotNil) -} - -func (s *ReaderSuite) TestDecode(c *C) { - fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { - scanner := packfile.NewScanner(f.Packfile()) - storage := memory.NewStorage() - - d, err := packfile.NewDecoder(scanner, storage) - c.Assert(err, IsNil) - defer d.Close() - - ch, err := d.Decode() - c.Assert(err, IsNil) - c.Assert(ch, Equals, f.PackfileHash) - - assertObjects(c, storage, expectedHashes) - }) -} - -func (s *ReaderSuite) TestDecodeByTypeRefDelta(c *C) { - f := fixtures.Basic().ByTag("ref-delta").One() - - storage := memory.NewStorage() - scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoderForType(scanner, storage, plumbing.CommitObject, - cache.NewObjectLRUDefault()) - c.Assert(err, IsNil) - - // Index required to decode by ref-delta. - d.SetIndex(getIndexFromIdxFile(f.Idx())) - - defer d.Close() - - _, count, err := scanner.Header() - c.Assert(err, IsNil) - - var i uint32 - for i = 0; i < count; i++ { - obj, err := d.DecodeObject() - c.Assert(err, IsNil) - - if obj != nil { - c.Assert(obj.Type(), Equals, plumbing.CommitObject) - } - } -} - -func (s *ReaderSuite) TestDecodeByTypeRefDeltaError(c *C) { - fixtures.Basic().ByTag("ref-delta").Test(c, func(f *fixtures.Fixture) { - storage := memory.NewStorage() - scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoderForType(scanner, storage, - plumbing.CommitObject, cache.NewObjectLRUDefault()) - c.Assert(err, IsNil) - - defer d.Close() - - _, count, err := scanner.Header() - c.Assert(err, IsNil) - - isError := false - var i uint32 - for i = 0; i < count; i++ { - _, err := d.DecodeObject() - if err != nil { - isError = true - break - } - } - c.Assert(isError, Equals, true) - }) - -} - -func (s *ReaderSuite) TestDecodeByType(c *C) { - ts := []plumbing.ObjectType{ - plumbing.CommitObject, - plumbing.TagObject, - plumbing.TreeObject, - plumbing.BlobObject, - } - - fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { - for _, t := range ts { - storage := memory.NewStorage() - scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoderForType(scanner, storage, t, - cache.NewObjectLRUDefault()) - c.Assert(err, IsNil) - - // when the packfile is ref-delta based, the offsets are required - if f.Is("ref-delta") { - d.SetIndex(getIndexFromIdxFile(f.Idx())) - } - - defer d.Close() - - _, count, err := scanner.Header() - c.Assert(err, IsNil) - - var i uint32 - for i = 0; i < count; i++ { - obj, err := d.DecodeObject() - c.Assert(err, IsNil) - - if obj != nil { - c.Assert(obj.Type(), Equals, t) - } - } - } - }) -} - -func (s *ReaderSuite) TestDecodeByTypeConstructor(c *C) { - f := fixtures.Basic().ByTag("packfile").One() - storage := memory.NewStorage() - scanner := packfile.NewScanner(f.Packfile()) - - _, err := packfile.NewDecoderForType(scanner, storage, - plumbing.OFSDeltaObject, cache.NewObjectLRUDefault()) - c.Assert(err, Equals, plumbing.ErrInvalidType) - - _, err = packfile.NewDecoderForType(scanner, storage, - plumbing.REFDeltaObject, cache.NewObjectLRUDefault()) - - c.Assert(err, Equals, plumbing.ErrInvalidType) - - _, err = packfile.NewDecoderForType(scanner, storage, plumbing.InvalidObject, - cache.NewObjectLRUDefault()) - c.Assert(err, Equals, plumbing.ErrInvalidType) -} - -func (s *ReaderSuite) TestDecodeMultipleTimes(c *C) { - f := fixtures.Basic().ByTag("packfile").One() - scanner := packfile.NewScanner(f.Packfile()) - storage := memory.NewStorage() - - d, err := packfile.NewDecoder(scanner, storage) - c.Assert(err, IsNil) - defer d.Close() - - ch, err := d.Decode() - c.Assert(err, IsNil) - c.Assert(ch, Equals, f.PackfileHash) - - ch, err = d.Decode() - c.Assert(err, Equals, packfile.ErrAlreadyDecoded) - c.Assert(ch, Equals, plumbing.ZeroHash) -} - -func (s *ReaderSuite) TestDecodeInMemory(c *C) { - fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { - scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoder(scanner, memory.NewStorage()) - c.Assert(err, IsNil) - - ch, err := d.Decode() - c.Assert(err, IsNil) - c.Assert(ch, Equals, f.PackfileHash) - }) -} - -type nonSeekableReader struct { - r io.Reader -} - -func (nsr nonSeekableReader) Read(b []byte) (int, error) { - return nsr.r.Read(b) -} - -func (s *ReaderSuite) TestDecodeNoSeekableWithTxStorer(c *C) { - fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { - reader := nonSeekableReader{ - r: f.Packfile(), - } - - scanner := packfile.NewScanner(reader) - - var storage storer.EncodedObjectStorer = memory.NewStorage() - _, isTxStorer := storage.(storer.Transactioner) - c.Assert(isTxStorer, Equals, true) - - d, err := packfile.NewDecoder(scanner, storage) - c.Assert(err, IsNil) - defer d.Close() - - ch, err := d.Decode() - c.Assert(err, IsNil) - c.Assert(ch, Equals, f.PackfileHash) - - assertObjects(c, storage, expectedHashes) - }) -} - -func (s *ReaderSuite) TestDecodeNoSeekableWithoutTxStorer(c *C) { - fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { - reader := nonSeekableReader{ - r: f.Packfile(), - } - - scanner := packfile.NewScanner(reader) - - var storage storer.EncodedObjectStorer - storage, _ = filesystem.NewStorage(memfs.New()) - _, isTxStorer := storage.(storer.Transactioner) - c.Assert(isTxStorer, Equals, false) - - d, err := packfile.NewDecoder(scanner, storage) - c.Assert(err, IsNil) - defer d.Close() - - ch, err := d.Decode() - c.Assert(err, IsNil) - c.Assert(ch, Equals, f.PackfileHash) - - assertObjects(c, storage, expectedHashes) - }) -} - -var expectedHashes = []string{ - "918c48b83bd081e863dbe1b80f8998f058cd8294", - "af2d6a6954d532f8ffb47615169c8fdf9d383a1a", - "1669dce138d9b841a518c64b10914d88f5e488ea", - "a5b8b09e2f8fcb0bb99d3ccb0958157b40890d69", - "b8e471f58bcbca63b07bda20e428190409c2db47", - "35e85108805c84807bc66a02d91535e1e24b38b9", - "b029517f6300c2da0f4b651b8642506cd6aaf45d", - "32858aad3c383ed1ff0a0f9bdf231d54a00c9e88", - "d3ff53e0564a9f87d8e84b6e28e5060e517008aa", - "c192bd6a24ea1ab01d78686e417c8bdc7c3d197f", - "d5c0f4ab811897cadf03aec358ae60d21f91c50d", - "49c6bb89b17060d7b4deacb7b338fcc6ea2352a9", - "cf4aa3b38974fb7d81f367c0830f7d78d65ab86b", - "9dea2395f5403188298c1dabe8bdafe562c491e3", - "586af567d0bb5e771e49bdd9434f5e0fb76d25fa", - "9a48f23120e880dfbe41f7c9b7b708e9ee62a492", - "5a877e6a906a2743ad6e45d99c1793642aaf8eda", - "c8f1d8c61f9da76f4cb49fd86322b6e685dba956", - "a8d315b2b1c615d43042c3a62402b8a54288cf5c", - "a39771a7651f97faf5c72e08224d857fc35133db", - "880cd14280f4b9b6ed3986d6671f907d7cc2a198", - "fb72698cab7617ac416264415f13224dfd7a165e", - "4d081c50e250fa32ea8b1313cf8bb7c2ad7627fd", - "eba74343e2f15d62adedfd8c883ee0262b5c8021", - "c2d30fa8ef288618f65f6eed6e168e0d514886f4", - "8dcef98b1d52143e1e2dbc458ffe38f925786bf2", - "aa9b383c260e1d05fbbf6b30a02914555e20c725", - "6ecf0ef2c2dffb796033e5a02219af86ec6584e5", - "dbd3641b371024f44d0e469a9c8f5457b0660de1", - "e8d3ffab552895c19b9fcf7aa264d277cde33881", - "7e59600739c96546163833214c36459e324bad0a", -} - -func (s *ReaderSuite) TestDecodeCRCs(c *C) { - f := fixtures.Basic().ByTag("ofs-delta").One() - - scanner := packfile.NewScanner(f.Packfile()) - storage := memory.NewStorage() - - d, err := packfile.NewDecoder(scanner, storage) - c.Assert(err, IsNil) - _, err = d.Decode() - c.Assert(err, IsNil) - - var sum uint64 - iter, err := d.Index().Entries() - c.Assert(err, IsNil) - - for { - e, err := iter.Next() - if err == io.EOF { - break - } - - c.Assert(err, IsNil) - sum += uint64(e.CRC32) - } - - c.Assert(int(sum), Equals, 78022211966) -} - -func (s *ReaderSuite) TestDecodeObjectAt(c *C) { - f := fixtures.Basic().One() - scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoder(scanner, nil) - c.Assert(err, IsNil) - - // when the packfile is ref-delta based, the offsets are required - if f.Is("ref-delta") { - d.SetIndex(getIndexFromIdxFile(f.Idx())) - } - - // the objects at reference 186, is a delta, so should be recall, - // without being read before. - obj, err := d.DecodeObjectAt(186) - c.Assert(err, IsNil) - c.Assert(obj.Hash().String(), Equals, "6ecf0ef2c2dffb796033e5a02219af86ec6584e5") -} - -func (s *ReaderSuite) TestDecodeObjectAtForType(c *C) { - f := fixtures.Basic().One() - scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoderForType(scanner, nil, plumbing.TreeObject, - cache.NewObjectLRUDefault()) - c.Assert(err, IsNil) - - // when the packfile is ref-delta based, the offsets are required - if f.Is("ref-delta") { - d.SetIndex(getIndexFromIdxFile(f.Idx())) - } - - // the objects at reference 186, is a delta, so should be recall, - // without being read before. - obj, err := d.DecodeObjectAt(186) - c.Assert(err, IsNil) - c.Assert(obj.Type(), Equals, plumbing.CommitObject) - c.Assert(obj.Hash().String(), Equals, "6ecf0ef2c2dffb796033e5a02219af86ec6584e5") -} - -func (s *ReaderSuite) TestIndex(c *C) { - f := fixtures.Basic().One() - scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoder(scanner, nil) - c.Assert(err, IsNil) - - c.Assert(indexEntries(c, d), Equals, 0) - - _, err = d.Decode() - c.Assert(err, IsNil) - - c.Assert(indexEntries(c, d), Equals, 31) -} - -func indexEntries(c *C, d *packfile.Decoder) int { - var count int - entries, err := d.Index().Entries() - c.Assert(err, IsNil) - - for { - _, err := entries.Next() - if err == io.EOF { - break - } - - c.Assert(err, IsNil) - count++ - } - - return count -} - -func (s *ReaderSuite) TestSetIndex(c *C) { - f := fixtures.Basic().One() - scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoder(scanner, nil) - c.Assert(err, IsNil) - - w := new(idxfile.Writer) - h := plumbing.NewHash("6ecf0ef2c2dffb796033e5a02219af86ec6584e5") - w.Add(h, uint64(42), 0) - w.OnFooter(plumbing.ZeroHash) - - var idx idxfile.Index - idx, err = w.Index() - c.Assert(err, IsNil) - d.SetIndex(idx) - - idx = d.Index() - c.Assert(indexEntries(c, d), Equals, 1) - - offset, err := idx.FindOffset(h) - c.Assert(err, IsNil) - c.Assert(offset, Equals, int64(42)) -} - -func assertObjects(c *C, s storer.EncodedObjectStorer, expects []string) { - i, err := s.IterEncodedObjects(plumbing.AnyObject) - c.Assert(err, IsNil) - - var count int - err = i.ForEach(func(plumbing.EncodedObject) error { count++; return nil }) - c.Assert(err, IsNil) - c.Assert(count, Equals, len(expects)) - - for _, exp := range expects { - obt, err := s.EncodedObject(plumbing.AnyObject, plumbing.NewHash(exp)) - c.Assert(err, IsNil) - c.Assert(obt.Hash().String(), Equals, exp) - } -} - -func getIndexFromIdxFile(r io.Reader) idxfile.Index { - idxf := idxfile.NewMemoryIndex() - d := idxfile.NewDecoder(r) - if err := d.Decode(idxf); err != nil { - panic(err) - } - - return idxf -} diff --git a/plumbing/format/packfile/encoder_advanced_test.go b/plumbing/format/packfile/encoder_advanced_test.go index 8cc7180..6ffebc2 100644 --- a/plumbing/format/packfile/encoder_advanced_test.go +++ b/plumbing/format/packfile/encoder_advanced_test.go @@ -2,14 +2,16 @@ package packfile_test import ( "bytes" + "io" "math/rand" "testing" + "gopkg.in/src-d/go-billy.v3/memfs" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" . "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" "gopkg.in/src-d/go-git.v4/plumbing/storer" "gopkg.in/src-d/go-git.v4/storage/filesystem" - "gopkg.in/src-d/go-git.v4/storage/memory" . "gopkg.in/check.v1" "gopkg.in/src-d/go-git-fixtures.v3" @@ -34,7 +36,6 @@ func (s *EncoderAdvancedSuite) TestEncodeDecode(c *C) { c.Assert(err, IsNil) s.testEncodeDecode(c, storage, 10) }) - } func (s *EncoderAdvancedSuite) TestEncodeDecodeNoDeltaCompression(c *C) { @@ -52,8 +53,11 @@ func (s *EncoderAdvancedSuite) TestEncodeDecodeNoDeltaCompression(c *C) { }) } -func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer, packWindow uint) { - +func (s *EncoderAdvancedSuite) testEncodeDecode( + c *C, + storage storer.Storer, + packWindow uint, +) { objIter, err := storage.IterEncodedObjects(plumbing.AnyObject) c.Assert(err, IsNil) @@ -80,16 +84,31 @@ func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer, pac encodeHash, err := enc.Encode(hashes, packWindow) c.Assert(err, IsNil) - scanner := NewScanner(buf) - storage = memory.NewStorage() - d, err := NewDecoder(scanner, storage) + f, err := memfs.New().Create("packfile") + c.Assert(err, IsNil) + + _, err = f.Write(buf.Bytes()) + c.Assert(err, IsNil) + + _, err = f.Seek(0, io.SeekStart) c.Assert(err, IsNil) - decodeHash, err := d.Decode() + + w := new(idxfile.Writer) + _, err = NewParser(NewScanner(f), w).Parse() + c.Assert(err, IsNil) + index, err := w.Index() + c.Assert(err, IsNil) + + _, err = f.Seek(0, io.SeekStart) c.Assert(err, IsNil) + p := NewPackfile(index, f) + + decodeHash, err := p.ID() + c.Assert(err, IsNil) c.Assert(encodeHash, Equals, decodeHash) - objIter, err = storage.IterEncodedObjects(plumbing.AnyObject) + objIter, err = p.GetAll() c.Assert(err, IsNil) obtainedObjects := map[plumbing.Hash]bool{} err = objIter.ForEach(func(o plumbing.EncodedObject) error { diff --git a/plumbing/format/packfile/encoder_test.go b/plumbing/format/packfile/encoder_test.go index 84d03fb..7b6dde2 100644 --- a/plumbing/format/packfile/encoder_test.go +++ b/plumbing/format/packfile/encoder_test.go @@ -2,8 +2,12 @@ package packfile import ( "bytes" + "io" + stdioutil "io/ioutil" + "gopkg.in/src-d/go-billy.v3/memfs" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" "gopkg.in/src-d/go-git.v4/storage/memory" . "gopkg.in/check.v1" @@ -130,24 +134,20 @@ func (s *EncoderSuite) simpleDeltaTest(c *C) { }) c.Assert(err, IsNil) - scanner := NewScanner(s.buf) - - storage := memory.NewStorage() - d, err := NewDecoder(scanner, storage) - c.Assert(err, IsNil) - - decHash, err := d.Decode() + p, cleanup := packfileFromReader(c, s.buf) + defer cleanup() + decHash, err := p.ID() c.Assert(err, IsNil) c.Assert(encHash, Equals, decHash) - decSrc, err := storage.EncodedObject(srcObject.Type(), srcObject.Hash()) + decSrc, err := p.Get(srcObject.Hash()) c.Assert(err, IsNil) - c.Assert(decSrc, DeepEquals, srcObject) + objectsEqual(c, decSrc, srcObject) - decTarget, err := storage.EncodedObject(targetObject.Type(), targetObject.Hash()) + decTarget, err := p.Get(targetObject.Hash()) c.Assert(err, IsNil) - c.Assert(decTarget, DeepEquals, targetObject) + objectsEqual(c, decTarget, targetObject) } func (s *EncoderSuite) deltaOverDeltaTest(c *C) { @@ -173,27 +173,24 @@ func (s *EncoderSuite) deltaOverDeltaTest(c *C) { }) c.Assert(err, IsNil) - scanner := NewScanner(s.buf) - storage := memory.NewStorage() - d, err := NewDecoder(scanner, storage) - c.Assert(err, IsNil) - - decHash, err := d.Decode() + p, cleanup := packfileFromReader(c, s.buf) + defer cleanup() + decHash, err := p.ID() c.Assert(err, IsNil) c.Assert(encHash, Equals, decHash) - decSrc, err := storage.EncodedObject(srcObject.Type(), srcObject.Hash()) + decSrc, err := p.Get(srcObject.Hash()) c.Assert(err, IsNil) - c.Assert(decSrc, DeepEquals, srcObject) + objectsEqual(c, decSrc, srcObject) - decTarget, err := storage.EncodedObject(targetObject.Type(), targetObject.Hash()) + decTarget, err := p.Get(targetObject.Hash()) c.Assert(err, IsNil) - c.Assert(decTarget, DeepEquals, targetObject) + objectsEqual(c, decTarget, targetObject) - decOtherTarget, err := storage.EncodedObject(otherTargetObject.Type(), otherTargetObject.Hash()) + decOtherTarget, err := p.Get(otherTargetObject.Hash()) c.Assert(err, IsNil) - c.Assert(decOtherTarget, DeepEquals, otherTargetObject) + objectsEqual(c, decOtherTarget, otherTargetObject) } func (s *EncoderSuite) deltaOverDeltaCyclicTest(c *C) { @@ -248,29 +245,70 @@ func (s *EncoderSuite) deltaOverDeltaCyclicTest(c *C) { }) c.Assert(err, IsNil) - scanner := NewScanner(s.buf) - storage := memory.NewStorage() - d, err := NewDecoder(scanner, storage) + p, cleanup := packfileFromReader(c, s.buf) + defer cleanup() + decHash, err := p.ID() c.Assert(err, IsNil) - decHash, err := d.Decode() + c.Assert(encHash, Equals, decHash) + + decSrc, err := p.Get(o1.Hash()) c.Assert(err, IsNil) + objectsEqual(c, decSrc, o1) - c.Assert(encHash, Equals, decHash) + decTarget, err := p.Get(o2.Hash()) + c.Assert(err, IsNil) + objectsEqual(c, decTarget, o2) + + decOtherTarget, err := p.Get(o3.Hash()) + c.Assert(err, IsNil) + objectsEqual(c, decOtherTarget, o3) + + decAnotherTarget, err := p.Get(o4.Hash()) + c.Assert(err, IsNil) + objectsEqual(c, decAnotherTarget, o4) +} + +func objectsEqual(c *C, o1, o2 plumbing.EncodedObject) { + c.Assert(o1.Type(), Equals, o2.Type()) + c.Assert(o1.Hash(), Equals, o2.Hash()) + c.Assert(o1.Size(), Equals, o2.Size()) - decSrc, err := storage.EncodedObject(o1.Type(), o1.Hash()) + r1, err := o1.Reader() c.Assert(err, IsNil) - c.Assert(decSrc, DeepEquals, o1) - decTarget, err := storage.EncodedObject(o2.Type(), o2.Hash()) + b1, err := stdioutil.ReadAll(r1) c.Assert(err, IsNil) - c.Assert(decTarget, DeepEquals, o2) - decOtherTarget, err := storage.EncodedObject(o3.Type(), o3.Hash()) + r2, err := o2.Reader() c.Assert(err, IsNil) - c.Assert(decOtherTarget, DeepEquals, o3) - decAnotherTarget, err := storage.EncodedObject(o4.Type(), o4.Hash()) + b2, err := stdioutil.ReadAll(r2) c.Assert(err, IsNil) - c.Assert(decAnotherTarget, DeepEquals, o4) + + c.Assert(bytes.Compare(b1, b2), Equals, 0) +} + +func packfileFromReader(c *C, buf *bytes.Buffer) (*Packfile, func()) { + file, err := memfs.New().Create("packfile") + c.Assert(err, IsNil) + + _, err = file.Write(buf.Bytes()) + c.Assert(err, IsNil) + + _, err = file.Seek(0, io.SeekStart) + c.Assert(err, IsNil) + + scanner := NewScanner(file) + + w := new(idxfile.Writer) + _, err = NewParser(scanner, w).Parse() + c.Assert(err, IsNil) + + index, err := w.Index() + c.Assert(err, IsNil) + + return NewPackfile(index, file), func() { + c.Assert(file.Close(), IsNil) + } } diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index 2e831f2..37743ba 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -3,38 +3,55 @@ package packfile import ( "bytes" "io" + stdioutil "io/ioutil" "os" - billy "gopkg.in/src-d/go-billy.v4" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/cache" "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" "gopkg.in/src-d/go-git.v4/plumbing/storer" ) +var ( + // ErrInvalidObject is returned by Decode when an invalid object is + // found in the packfile. + ErrInvalidObject = NewError("invalid git object") + // ErrZLib is returned by Decode when there was an error unzipping + // the packfile contents. + ErrZLib = NewError("zlib reading error") +) + // Packfile allows retrieving information from inside a packfile. type Packfile struct { idxfile.Index - billy.File + file io.ReadSeeker s *Scanner deltaBaseCache cache.Object offsetToType map[int64]plumbing.ObjectType } -// NewPackfile returns a packfile representation for the given packfile file -// and packfile idx. -func NewPackfile(index idxfile.Index, file billy.File) *Packfile { +// NewPackfileWithCache creates a new Packfile with the given object cache. +func NewPackfileWithCache( + index idxfile.Index, + file io.ReadSeeker, + cache cache.Object, +) *Packfile { s := NewScanner(file) - return &Packfile{ index, file, s, - cache.NewObjectLRUDefault(), + cache, make(map[int64]plumbing.ObjectType), } } +// NewPackfile returns a packfile representation for the given packfile file +// and packfile idx. +func NewPackfile(index idxfile.Index, file io.ReadSeeker) *Packfile { + return NewPackfileWithCache(index, file, cache.NewObjectLRUDefault()) +} + // Get retrieves the encoded object in the packfile with the given hash. func (p *Packfile) Get(h plumbing.Hash) (plumbing.EncodedObject, error) { offset, err := p.FindOffset(h) @@ -334,35 +351,49 @@ func (p *Packfile) cachePut(obj plumbing.EncodedObject) { // The iterator returned is not thread-safe, it should be used in the same // thread as the Packfile instance. func (p *Packfile) GetAll() (storer.EncodedObjectIter, error) { - entries, err := p.Entries() - if err != nil { - return nil, err - } + return p.GetByType(plumbing.AnyObject) +} - return &objectIter{ - // Easiest way to provide an object decoder is just to pass a Packfile - // instance. To not mess with the seeks, it's a new instance with a - // different scanner but the same cache and offset to hash map for - // reusing as much cache as possible. - p: &Packfile{ - p.Index, - p.File, - NewScanner(p.File), - p.deltaBaseCache, - p.offsetToType, - }, - iter: entries, - }, nil +// GetByType returns all the objects of the given type. +func (p *Packfile) GetByType(typ plumbing.ObjectType) (storer.EncodedObjectIter, error) { + switch typ { + case plumbing.AnyObject, + plumbing.BlobObject, + plumbing.TreeObject, + plumbing.CommitObject, + plumbing.TagObject: + entries, err := p.Entries() + if err != nil { + return nil, err + } + + return &objectIter{ + // Easiest way to provide an object decoder is just to pass a Packfile + // instance. To not mess with the seeks, it's a new instance with a + // different scanner but the same cache and offset to hash map for + // reusing as much cache as possible. + p: p, + iter: entries, + typ: typ, + }, nil + default: + return nil, plumbing.ErrInvalidType + } } // ID returns the ID of the packfile, which is the checksum at the end of it. func (p *Packfile) ID() (plumbing.Hash, error) { - if _, err := p.File.Seek(-20, io.SeekEnd); err != nil { + prev, err := p.file.Seek(-20, io.SeekEnd) + if err != nil { return plumbing.ZeroHash, err } var hash plumbing.Hash - if _, err := io.ReadFull(p.File, hash[:]); err != nil { + if _, err := io.ReadFull(p.file, hash[:]); err != nil { + return plumbing.ZeroHash, err + } + + if _, err := p.file.Seek(prev, io.SeekStart); err != nil { return plumbing.ZeroHash, err } @@ -371,25 +402,59 @@ func (p *Packfile) ID() (plumbing.Hash, error) { // Close the packfile and its resources. func (p *Packfile) Close() error { - return p.File.Close() + closer, ok := p.file.(io.Closer) + if !ok { + return nil + } + + return closer.Close() } -type objectDecoder interface { - nextObject() (plumbing.EncodedObject, error) +// MemoryObjectFromDisk converts a DiskObject to a MemoryObject. +func MemoryObjectFromDisk(obj plumbing.EncodedObject) (plumbing.EncodedObject, error) { + o2 := new(plumbing.MemoryObject) + o2.SetType(obj.Type()) + o2.SetSize(obj.Size()) + + r, err := obj.Reader() + if err != nil { + return nil, err + } + + data, err := stdioutil.ReadAll(r) + if err != nil { + return nil, err + } + + if _, err := o2.Write(data); err != nil { + return nil, err + } + + return o2, nil } type objectIter struct { p *Packfile + typ plumbing.ObjectType iter idxfile.EntryIter } func (i *objectIter) Next() (plumbing.EncodedObject, error) { - e, err := i.iter.Next() - if err != nil { - return nil, err - } + for { + e, err := i.iter.Next() + if err != nil { + return nil, err + } - return i.p.GetByOffset(int64(e.Offset)) + obj, err := i.p.GetByOffset(int64(e.Offset)) + if err != nil { + return nil, err + } + + if i.typ == plumbing.AnyObject || obj.Type() == i.typ { + return obj, nil + } + } } func (i *objectIter) ForEach(f func(plumbing.EncodedObject) error) error { diff --git a/plumbing/format/packfile/packfile_test.go b/plumbing/format/packfile/packfile_test.go index e234794..3193bed 100644 --- a/plumbing/format/packfile/packfile_test.go +++ b/plumbing/format/packfile/packfile_test.go @@ -1,23 +1,21 @@ -package packfile +package packfile_test import ( - "bytes" "io" "math" - "io/ioutil" - . "gopkg.in/check.v1" "gopkg.in/src-d/go-billy.v4/osfs" fixtures "gopkg.in/src-d/go-git-fixtures.v3" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" - "gopkg.in/src-d/go-git.v4/storage/memory" + "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" + "gopkg.in/src-d/go-git.v4/plumbing/storer" ) type PackfileSuite struct { fixtures.Suite - p *Packfile + p *packfile.Packfile idx *idxfile.MemoryIndex f *fixtures.Fixture } @@ -108,60 +106,157 @@ var expectedEntries = map[plumbing.Hash]int64{ plumbing.NewHash("fb72698cab7617ac416264415f13224dfd7a165e"): 84671, } -func (s *PackfileSuite) TestContent(c *C) { - storer := memory.NewStorage() - decoder, err := NewDecoder(NewScanner(s.f.Packfile()), storer) - c.Assert(err, IsNil) +func (s *PackfileSuite) SetUpTest(c *C) { + s.f = fixtures.Basic().One() - _, err = decoder.Decode() + f, err := osfs.New("").Open(s.f.Packfile().Name()) c.Assert(err, IsNil) - iter, err := s.p.GetAll() + s.idx = idxfile.NewMemoryIndex() + c.Assert(idxfile.NewDecoder(s.f.Idx()).Decode(s.idx), IsNil) + + s.p = packfile.NewPackfile(s.idx, f) +} + +func (s *PackfileSuite) TearDownTest(c *C) { + c.Assert(s.p.Close(), IsNil) +} + +func (s *PackfileSuite) TestDecode(c *C) { + fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { + index := getIndexFromIdxFile(f.Idx()) + p := packfile.NewPackfile(index, f.Packfile()) + defer p.Close() + + for _, h := range expectedHashes { + obj, err := p.Get(plumbing.NewHash(h)) + c.Assert(err, IsNil) + c.Assert(obj.Hash().String(), Equals, h) + } + }) +} + +func (s *PackfileSuite) TestDecodeByTypeRefDelta(c *C) { + f := fixtures.Basic().ByTag("ref-delta").One() + + index := getIndexFromIdxFile(f.Idx()) + packfile := packfile.NewPackfile(index, f.Packfile()) + defer packfile.Close() + + iter, err := packfile.GetByType(plumbing.CommitObject) c.Assert(err, IsNil) + var count int for { - o, err := iter.Next() + obj, err := iter.Next() if err == io.EOF { break } + count++ c.Assert(err, IsNil) + c.Assert(obj.Type(), Equals, plumbing.CommitObject) + } - o2, err := storer.EncodedObject(plumbing.AnyObject, o.Hash()) - c.Assert(err, IsNil) + c.Assert(count > 0, Equals, true) +} - c.Assert(o.Type(), Equals, o2.Type()) - c.Assert(o.Size(), Equals, o2.Size()) +func (s *PackfileSuite) TestDecodeByType(c *C) { + ts := []plumbing.ObjectType{ + plumbing.CommitObject, + plumbing.TagObject, + plumbing.TreeObject, + plumbing.BlobObject, + } - r, err := o.Reader() - c.Assert(err, IsNil) + fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { + for _, t := range ts { + index := getIndexFromIdxFile(f.Idx()) + packfile := packfile.NewPackfile(index, f.Packfile()) + defer packfile.Close() - c1, err := ioutil.ReadAll(r) - c.Assert(err, IsNil) - c.Assert(r.Close(), IsNil) + iter, err := packfile.GetByType(t) + c.Assert(err, IsNil) - r, err = o2.Reader() - c.Assert(err, IsNil) + c.Assert(iter.ForEach(func(obj plumbing.EncodedObject) error { + c.Assert(obj.Type(), Equals, t) + return nil + }), IsNil) + } + }) +} - c2, err := ioutil.ReadAll(r) - c.Assert(err, IsNil) - c.Assert(r.Close(), IsNil) +func (s *PackfileSuite) TestDecodeByTypeConstructor(c *C) { + f := fixtures.Basic().ByTag("packfile").One() + index := getIndexFromIdxFile(f.Idx()) + packfile := packfile.NewPackfile(index, f.Packfile()) + defer packfile.Close() - c.Assert(bytes.Compare(c1, c2), Equals, 0) - } + _, err := packfile.GetByType(plumbing.OFSDeltaObject) + c.Assert(err, Equals, plumbing.ErrInvalidType) + + _, err = packfile.GetByType(plumbing.REFDeltaObject) + c.Assert(err, Equals, plumbing.ErrInvalidType) + + _, err = packfile.GetByType(plumbing.InvalidObject) + c.Assert(err, Equals, plumbing.ErrInvalidType) } -func (s *PackfileSuite) SetUpTest(c *C) { - s.f = fixtures.Basic().One() +var expectedHashes = []string{ + "918c48b83bd081e863dbe1b80f8998f058cd8294", + "af2d6a6954d532f8ffb47615169c8fdf9d383a1a", + "1669dce138d9b841a518c64b10914d88f5e488ea", + "a5b8b09e2f8fcb0bb99d3ccb0958157b40890d69", + "b8e471f58bcbca63b07bda20e428190409c2db47", + "35e85108805c84807bc66a02d91535e1e24b38b9", + "b029517f6300c2da0f4b651b8642506cd6aaf45d", + "32858aad3c383ed1ff0a0f9bdf231d54a00c9e88", + "d3ff53e0564a9f87d8e84b6e28e5060e517008aa", + "c192bd6a24ea1ab01d78686e417c8bdc7c3d197f", + "d5c0f4ab811897cadf03aec358ae60d21f91c50d", + "49c6bb89b17060d7b4deacb7b338fcc6ea2352a9", + "cf4aa3b38974fb7d81f367c0830f7d78d65ab86b", + "9dea2395f5403188298c1dabe8bdafe562c491e3", + "586af567d0bb5e771e49bdd9434f5e0fb76d25fa", + "9a48f23120e880dfbe41f7c9b7b708e9ee62a492", + "5a877e6a906a2743ad6e45d99c1793642aaf8eda", + "c8f1d8c61f9da76f4cb49fd86322b6e685dba956", + "a8d315b2b1c615d43042c3a62402b8a54288cf5c", + "a39771a7651f97faf5c72e08224d857fc35133db", + "880cd14280f4b9b6ed3986d6671f907d7cc2a198", + "fb72698cab7617ac416264415f13224dfd7a165e", + "4d081c50e250fa32ea8b1313cf8bb7c2ad7627fd", + "eba74343e2f15d62adedfd8c883ee0262b5c8021", + "c2d30fa8ef288618f65f6eed6e168e0d514886f4", + "8dcef98b1d52143e1e2dbc458ffe38f925786bf2", + "aa9b383c260e1d05fbbf6b30a02914555e20c725", + "6ecf0ef2c2dffb796033e5a02219af86ec6584e5", + "dbd3641b371024f44d0e469a9c8f5457b0660de1", + "e8d3ffab552895c19b9fcf7aa264d277cde33881", + "7e59600739c96546163833214c36459e324bad0a", +} - f, err := osfs.New("").Open(s.f.Packfile().Name()) +func assertObjects(c *C, s storer.EncodedObjectStorer, expects []string) { + i, err := s.IterEncodedObjects(plumbing.AnyObject) c.Assert(err, IsNil) - s.idx = idxfile.NewMemoryIndex() - c.Assert(idxfile.NewDecoder(s.f.Idx()).Decode(s.idx), IsNil) + var count int + err = i.ForEach(func(plumbing.EncodedObject) error { count++; return nil }) + c.Assert(err, IsNil) + c.Assert(count, Equals, len(expects)) - s.p = NewPackfile(s.idx, f) + for _, exp := range expects { + obt, err := s.EncodedObject(plumbing.AnyObject, plumbing.NewHash(exp)) + c.Assert(err, IsNil) + c.Assert(obt.Hash().String(), Equals, exp) + } } -func (s *PackfileSuite) TearDownTest(c *C) { - c.Assert(s.p.Close(), IsNil) +func getIndexFromIdxFile(r io.Reader) idxfile.Index { + idxf := idxfile.NewMemoryIndex() + d := idxfile.NewDecoder(r) + if err := d.Decode(idxf); err != nil { + panic(err) + } + + return idxf } diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go index 696f5ba..f0a7674 100644 --- a/plumbing/format/packfile/parser.go +++ b/plumbing/format/packfile/parser.go @@ -9,6 +9,16 @@ import ( "gopkg.in/src-d/go-git.v4/plumbing/cache" ) +var ( + // ErrObjectContentAlreadyRead is returned when the content of the object + // was already read, since the content can only be read once. + ErrObjectContentAlreadyRead = errors.New("object content was already read") + + // ErrReferenceDeltaNotFound is returned when the reference delta is not + // found. + ErrReferenceDeltaNotFound = errors.New("reference delta not found") +) + // Observer interface is implemented by index encoders. type Observer interface { // OnHeader is called when a new packfile is opened. @@ -16,7 +26,7 @@ type Observer interface { // OnInflatedObjectHeader is called for each object header read. OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error // OnInflatedObjectContent is called for each decoded object. - OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32) error + OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32, content []byte) error // OnFooter is called when decoding is done. OnFooter(h plumbing.Hash) error } @@ -32,41 +42,44 @@ type Parser struct { hashOffset map[plumbing.Hash]int64 checksum plumbing.Hash - cache *cache.ObjectLRU + cache *cache.ObjectLRU + contentCache map[int64][]byte ob []Observer } // NewParser creates a new Parser struct. func NewParser(scanner *Scanner, ob ...Observer) *Parser { + var contentCache map[int64][]byte + if !scanner.IsSeekable { + contentCache = make(map[int64][]byte) + } + return &Parser{ - scanner: scanner, - ob: ob, - count: 0, - cache: cache.NewObjectLRUDefault(), + scanner: scanner, + ob: ob, + count: 0, + cache: cache.NewObjectLRUDefault(), + contentCache: contentCache, } } // Parse start decoding phase of the packfile. func (p *Parser) Parse() (plumbing.Hash, error) { - err := p.init() - if err != nil { + if err := p.init(); err != nil { return plumbing.ZeroHash, err } - err = p.firstPass() - if err != nil { + if err := p.firstPass(); err != nil { return plumbing.ZeroHash, err } - err = p.resolveDeltas() - if err != nil { + if err := p.resolveDeltas(); err != nil { return plumbing.ZeroHash, err } for _, o := range p.ob { - err := o.OnFooter(p.checksum) - if err != nil { + if err := o.OnFooter(p.checksum); err != nil { return plumbing.ZeroHash, err } } @@ -81,8 +94,7 @@ func (p *Parser) init() error { } for _, o := range p.ob { - err := o.OnHeader(c) - if err != nil { + if err := o.OnHeader(c); err != nil { return err } } @@ -99,7 +111,7 @@ func (p *Parser) firstPass() error { buf := new(bytes.Buffer) for i := uint32(0); i < p.count; i++ { - buf.Truncate(0) + buf.Reset() oh, err := p.scanner.NextObjectHeader() if err != nil { @@ -122,8 +134,7 @@ func (p *Parser) firstPass() error { } if !ok { - // TODO improve error - return errors.New("Reference delta not found") + return ErrReferenceDeltaNotFound } ota = newDeltaObject(oh.Offset, oh.Length, t, parent) @@ -143,35 +154,41 @@ func (p *Parser) firstPass() error { ota.Length = oh.Length if !delta { - ota.Write(buf.Bytes()) + if _, err := ota.Write(buf.Bytes()); err != nil { + return err + } ota.SHA1 = ota.Sum() + p.oiByHash[ota.SHA1] = ota } p.oiByOffset[oh.Offset] = ota - p.oiByHash[oh.Reference] = ota p.oi[i] = ota } - checksum, err := p.scanner.Checksum() - p.checksum = checksum - - if err == io.EOF { - return nil + var err error + p.checksum, err = p.scanner.Checksum() + if err != nil && err != io.EOF { + return err } - return err + return nil } func (p *Parser) resolveDeltas() error { for _, obj := range p.oi { + content, err := obj.Content() + if err != nil { + return err + } + for _, o := range p.ob { err := o.OnInflatedObjectHeader(obj.Type, obj.Length, obj.Offset) if err != nil { return err } - err = o.OnInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32) + err = o.OnInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32, content) if err != nil { return err } @@ -185,8 +202,7 @@ func (p *Parser) resolveDeltas() error { } for _, child := range obj.Children { - _, err = p.resolveObject(child, base) - if err != nil { + if _, err := p.resolveObject(child, base); err != nil { return err } } @@ -205,8 +221,7 @@ func (p *Parser) get(o *objectInfo) ([]byte, error) { } buf := make([]byte, e.Size()) - _, err = r.Read(buf) - if err != nil { + if _, err = r.Read(buf); err != nil { return nil, err } @@ -254,8 +269,8 @@ func (p *Parser) get(o *objectInfo) ([]byte, error) { func (p *Parser) resolveObject( o *objectInfo, - base []byte) ([]byte, error) { - + base []byte, +) ([]byte, error) { if !o.DiskType.IsDelta() { return nil, nil } @@ -278,16 +293,17 @@ func (p *Parser) readData(o *objectInfo) ([]byte, error) { // TODO: skip header. Header size can be calculated with the offset of the // next offset in the first pass. - p.scanner.SeekFromStart(o.Offset) - _, err := p.scanner.NextObjectHeader() - if err != nil { + if _, err := p.scanner.SeekFromStart(o.Offset); err != nil { return nil, err } - buf.Truncate(0) + if _, err := p.scanner.NextObjectHeader(); err != nil { + return nil, err + } - _, _, err = p.scanner.NextObject(buf) - if err != nil { + buf.Reset() + + if _, _, err := p.scanner.NextObject(buf); err != nil { return nil, err } @@ -301,9 +317,11 @@ func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) { } ota.Type = ota.Parent.Type - hash := plumbing.ComputeHash(ota.Type, patched) - - ota.SHA1 = hash + ota.Hasher = plumbing.NewHasher(ota.Type, int64(len(patched))) + if _, err := ota.Write(patched); err != nil { + return nil, err + } + ota.SHA1 = ota.Sum() return patched, nil } @@ -323,6 +341,8 @@ type objectInfo struct { Parent *objectInfo Children []*objectInfo SHA1 plumbing.Hash + + content *bytes.Buffer } func newBaseObject(offset, length int64, t plumbing.ObjectType) *objectInfo { @@ -351,6 +371,30 @@ func newDeltaObject( return obj } +func (o *objectInfo) Write(bs []byte) (int, error) { + n, err := o.Hasher.Write(bs) + if err != nil { + return 0, err + } + + o.content = bytes.NewBuffer(nil) + + _, _ = o.content.Write(bs) + return n, nil +} + +// Content returns the content of the object. This operation can only be done +// once. +func (o *objectInfo) Content() ([]byte, error) { + if o.content == nil { + return nil, ErrObjectContentAlreadyRead + } + + r := o.content + o.content = nil + return r.Bytes(), nil +} + func (o *objectInfo) IsDelta() bool { return o.Type.IsDelta() } diff --git a/plumbing/format/packfile/parser_test.go b/plumbing/format/packfile/parser_test.go index 87a8804..b18f20f 100644 --- a/plumbing/format/packfile/parser_test.go +++ b/plumbing/format/packfile/parser_test.go @@ -103,7 +103,7 @@ func (t *testObserver) OnInflatedObjectHeader(otype plumbing.ObjectType, objSize return nil } -func (t *testObserver) OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32) error { +func (t *testObserver) OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32, _ []byte) error { o := t.get(pos) o.hash = h.String() o.crc = crc diff --git a/plumbing/object/blob_test.go b/plumbing/object/blob_test.go index 5ed9de0..181436d 100644 --- a/plumbing/object/blob_test.go +++ b/plumbing/object/blob_test.go @@ -1,6 +1,7 @@ package object import ( + "bytes" "io" "io/ioutil" @@ -88,8 +89,26 @@ func (s *BlobsSuite) TestBlobIter(c *C) { } c.Assert(err, IsNil) - c.Assert(b, DeepEquals, blobs[i]) - i += 1 + c.Assert(b.ID(), Equals, blobs[i].ID()) + c.Assert(b.Size, Equals, blobs[i].Size) + c.Assert(b.Type(), Equals, blobs[i].Type()) + + r1, err := b.Reader() + c.Assert(err, IsNil) + + b1, err := ioutil.ReadAll(r1) + c.Assert(err, IsNil) + c.Assert(r1.Close(), IsNil) + + r2, err := blobs[i].Reader() + c.Assert(err, IsNil) + + b2, err := ioutil.ReadAll(r2) + c.Assert(err, IsNil) + c.Assert(r2.Close(), IsNil) + + c.Assert(bytes.Compare(b1, b2), Equals, 0) + i++ } iter.Close() diff --git a/plumbing/object/difftree_test.go b/plumbing/object/difftree_test.go index 40af8f2..ff9ecbc 100644 --- a/plumbing/object/difftree_test.go +++ b/plumbing/object/difftree_test.go @@ -45,25 +45,17 @@ func (s *DiffTreeSuite) storageFromPackfile(f *fixtures.Fixture) storer.EncodedO return sto } - sto = memory.NewStorage() + storer := memory.NewStorage() pf := f.Packfile() - defer pf.Close() - n := packfile.NewScanner(pf) - d, err := packfile.NewDecoder(n, sto) - if err != nil { - panic(err) - } - - _, err = d.Decode() - if err != nil { + if err := packfile.UpdateObjectStorage(storer, pf); err != nil { panic(err) } - s.cache[f.URL] = sto - return sto + s.cache[f.URL] = storer + return storer } var _ = Suite(&DiffTreeSuite{}) diff --git a/plumbing/object/object_test.go b/plumbing/object/object_test.go index 4f0fcb3..68aa1a1 100644 --- a/plumbing/object/object_test.go +++ b/plumbing/object/object_test.go @@ -197,8 +197,9 @@ func (s *ObjectsSuite) TestObjectIter(c *C) { } c.Assert(err, IsNil) - c.Assert(o, DeepEquals, objects[i]) - i += 1 + c.Assert(o.ID(), Equals, objects[i].ID()) + c.Assert(o.Type(), Equals, objects[i].Type()) + i++ } iter.Close() diff --git a/plumbing/transport/test/receive_pack.go b/plumbing/transport/test/receive_pack.go index 57f602d..5aea1c0 100644 --- a/plumbing/transport/test/receive_pack.go +++ b/plumbing/transport/test/receive_pack.go @@ -262,13 +262,16 @@ func (s *ReceivePackSuite) receivePackNoCheck(c *C, ep *transport.Endpoint, req.Packfile = s.emptyPackfile() } - return r.ReceivePack(context.Background(), req) + if s, err := r.ReceivePack(context.Background(), req); err != nil { + return s, err + } else { + return s, err + } } func (s *ReceivePackSuite) receivePack(c *C, ep *transport.Endpoint, req *packp.ReferenceUpdateRequest, fixture *fixtures.Fixture, callAdvertisedReferences bool) { - url := "" if fixture != nil { url = fixture.URL @@ -279,7 +282,6 @@ func (s *ReceivePackSuite) receivePack(c *C, ep *transport.Endpoint, ep.String(), url, callAdvertisedReferences, ) report, err := s.receivePackNoCheck(c, ep, req, fixture, callAdvertisedReferences) - c.Assert(err, IsNil, comment) if req.Capabilities.Supports(capability.ReportStatus) { c.Assert(report, NotNil, comment) diff --git a/plumbing/transport/test/upload_pack.go b/plumbing/transport/test/upload_pack.go index 70e4e56..8709ac2 100644 --- a/plumbing/transport/test/upload_pack.go +++ b/plumbing/transport/test/upload_pack.go @@ -258,11 +258,8 @@ func (s *UploadPackSuite) checkObjectNumber(c *C, r io.Reader, n int) { b, err := ioutil.ReadAll(r) c.Assert(err, IsNil) buf := bytes.NewBuffer(b) - scanner := packfile.NewScanner(buf) storage := memory.NewStorage() - d, err := packfile.NewDecoder(scanner, storage) - c.Assert(err, IsNil) - _, err = d.Decode() + err = packfile.UpdateObjectStorage(storage, buf) c.Assert(err, IsNil) c.Assert(len(storage.Objects), Equals, n) } -- cgit From 5889a3b669f0f515ff445aa040afc1e7eeb2bbd1 Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Wed, 8 Aug 2018 16:56:20 +0200 Subject: plumbing: packfile, allow non-seekable sources on Parser Signed-off-by: Miguel Molina --- plumbing/format/idxfile/writer_test.go | 5 +- plumbing/format/packfile/common.go | 63 +---- plumbing/format/packfile/encoder_advanced_test.go | 5 +- plumbing/format/packfile/encoder_test.go | 5 +- plumbing/format/packfile/parser.go | 311 ++++++++++++++-------- plumbing/format/packfile/parser_test.go | 19 +- 6 files changed, 229 insertions(+), 179 deletions(-) (limited to 'plumbing') diff --git a/plumbing/format/idxfile/writer_test.go b/plumbing/format/idxfile/writer_test.go index 7c3cceb..912211d 100644 --- a/plumbing/format/idxfile/writer_test.go +++ b/plumbing/format/idxfile/writer_test.go @@ -24,9 +24,10 @@ func (s *WriterSuite) TestWriter(c *C) { scanner := packfile.NewScanner(f.Packfile()) obs := new(idxfile.Writer) - parser := packfile.NewParser(scanner, obs) + parser, err := packfile.NewParser(scanner, obs) + c.Assert(err, IsNil) - _, err := parser.Parse() + _, err = parser.Parse() c.Assert(err, IsNil) idx, err := obs.Index() diff --git a/plumbing/format/packfile/common.go b/plumbing/format/packfile/common.go index 76254f0..2b4aceb 100644 --- a/plumbing/format/packfile/common.go +++ b/plumbing/format/packfile/common.go @@ -2,11 +2,9 @@ package packfile import ( "bytes" - "errors" "io" "sync" - "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/storer" "gopkg.in/src-d/go-git.v4/utils/ioutil" ) @@ -32,8 +30,12 @@ func UpdateObjectStorage(s storer.Storer, packfile io.Reader) error { return WritePackfileToObjectStorage(pw, packfile) } - updater := newPackfileStorageUpdater(s) - _, err := NewParser(NewScanner(packfile), updater).Parse() + p, err := NewParserWithStorage(NewScanner(packfile), s) + if err != nil { + return err + } + + _, err = p.Parse() return err } @@ -58,56 +60,3 @@ var bufPool = sync.Pool{ return bytes.NewBuffer(nil) }, } - -var errMissingObjectContent = errors.New("missing object content") - -type packfileStorageUpdater struct { - storer.Storer - lastSize int64 - lastType plumbing.ObjectType -} - -func newPackfileStorageUpdater(s storer.Storer) *packfileStorageUpdater { - return &packfileStorageUpdater{Storer: s} -} - -func (p *packfileStorageUpdater) OnHeader(count uint32) error { - return nil -} - -func (p *packfileStorageUpdater) OnInflatedObjectHeader( - t plumbing.ObjectType, - objSize int64, - pos int64, -) error { - if p.lastSize > 0 || p.lastType != plumbing.InvalidObject { - return errMissingObjectContent - } - - p.lastType = t - p.lastSize = objSize - return nil -} - -func (p *packfileStorageUpdater) OnInflatedObjectContent( - h plumbing.Hash, - pos int64, - crc uint32, - content []byte, -) error { - obj := new(plumbing.MemoryObject) - obj.SetSize(p.lastSize) - obj.SetType(p.lastType) - if _, err := obj.Write(content); err != nil { - return err - } - - _, err := p.SetEncodedObject(obj) - p.lastSize = 0 - p.lastType = plumbing.InvalidObject - return err -} - -func (p *packfileStorageUpdater) OnFooter(h plumbing.Hash) error { - return nil -} diff --git a/plumbing/format/packfile/encoder_advanced_test.go b/plumbing/format/packfile/encoder_advanced_test.go index 6ffebc2..78ddc45 100644 --- a/plumbing/format/packfile/encoder_advanced_test.go +++ b/plumbing/format/packfile/encoder_advanced_test.go @@ -94,7 +94,10 @@ func (s *EncoderAdvancedSuite) testEncodeDecode( c.Assert(err, IsNil) w := new(idxfile.Writer) - _, err = NewParser(NewScanner(f), w).Parse() + parser, err := NewParser(NewScanner(f), w) + c.Assert(err, IsNil) + + _, err = parser.Parse() c.Assert(err, IsNil) index, err := w.Index() c.Assert(err, IsNil) diff --git a/plumbing/format/packfile/encoder_test.go b/plumbing/format/packfile/encoder_test.go index 7b6dde2..24e2082 100644 --- a/plumbing/format/packfile/encoder_test.go +++ b/plumbing/format/packfile/encoder_test.go @@ -302,7 +302,10 @@ func packfileFromReader(c *C, buf *bytes.Buffer) (*Packfile, func()) { scanner := NewScanner(file) w := new(idxfile.Writer) - _, err = NewParser(scanner, w).Parse() + p, err := NewParser(scanner, w) + c.Assert(err, IsNil) + + _, err = p.Parse() c.Assert(err, IsNil) index, err := w.Index() diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go index f0a7674..beb3e27 100644 --- a/plumbing/format/packfile/parser.go +++ b/plumbing/format/packfile/parser.go @@ -7,16 +7,20 @@ import ( "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/cache" + "gopkg.in/src-d/go-git.v4/plumbing/storer" ) var ( - // ErrObjectContentAlreadyRead is returned when the content of the object - // was already read, since the content can only be read once. - ErrObjectContentAlreadyRead = errors.New("object content was already read") - // ErrReferenceDeltaNotFound is returned when the reference delta is not // found. ErrReferenceDeltaNotFound = errors.New("reference delta not found") + + // ErrNotSeekableSource is returned when the source for the parser is not + // seekable and a storage was not provided, so it can't be parsed. + ErrNotSeekableSource = errors.New("parser source is not seekable and storage was not provided") + + // ErrDeltaNotCached is returned when the delta could not be found in cache. + ErrDeltaNotCached = errors.New("delta could not be found in cache") ) // Observer interface is implemented by index encoders. @@ -34,34 +38,96 @@ type Observer interface { // Parser decodes a packfile and calls any observer associated to it. Is used // to generate indexes. type Parser struct { - scanner *Scanner - count uint32 - oi []*objectInfo - oiByHash map[plumbing.Hash]*objectInfo - oiByOffset map[int64]*objectInfo - hashOffset map[plumbing.Hash]int64 - checksum plumbing.Hash - - cache *cache.ObjectLRU - contentCache map[int64][]byte + storage storer.EncodedObjectStorer + scanner *Scanner + count uint32 + oi []*objectInfo + oiByHash map[plumbing.Hash]*objectInfo + oiByOffset map[int64]*objectInfo + hashOffset map[plumbing.Hash]int64 + pendingRefDeltas map[plumbing.Hash][]*objectInfo + checksum plumbing.Hash + + cache *cache.ObjectLRU + // delta content by offset, only used if source is not seekable + deltas map[int64][]byte ob []Observer } -// NewParser creates a new Parser struct. -func NewParser(scanner *Scanner, ob ...Observer) *Parser { - var contentCache map[int64][]byte +// NewParser creates a new Parser. The Scanner source must be seekable. +// If it's not, NewParserWithStorage should be used instead. +func NewParser(scanner *Scanner, ob ...Observer) (*Parser, error) { + return NewParserWithStorage(scanner, nil, ob...) +} + +// NewParserWithStorage creates a new Parser. The scanner source must either +// be seekable or a storage must be provided. +func NewParserWithStorage( + scanner *Scanner, + storage storer.EncodedObjectStorer, + ob ...Observer, +) (*Parser, error) { + if !scanner.IsSeekable && storage == nil { + return nil, ErrNotSeekableSource + } + + var deltas map[int64][]byte if !scanner.IsSeekable { - contentCache = make(map[int64][]byte) + deltas = make(map[int64][]byte) } return &Parser{ - scanner: scanner, - ob: ob, - count: 0, - cache: cache.NewObjectLRUDefault(), - contentCache: contentCache, + storage: storage, + scanner: scanner, + ob: ob, + count: 0, + cache: cache.NewObjectLRUDefault(), + pendingRefDeltas: make(map[plumbing.Hash][]*objectInfo), + deltas: deltas, + }, nil +} + +func (p *Parser) forEachObserver(f func(o Observer) error) error { + for _, o := range p.ob { + if err := f(o); err != nil { + return err + } } + return nil +} + +func (p *Parser) onHeader(count uint32) error { + return p.forEachObserver(func(o Observer) error { + return o.OnHeader(count) + }) +} + +func (p *Parser) onInflatedObjectHeader( + t plumbing.ObjectType, + objSize int64, + pos int64, +) error { + return p.forEachObserver(func(o Observer) error { + return o.OnInflatedObjectHeader(t, objSize, pos) + }) +} + +func (p *Parser) onInflatedObjectContent( + h plumbing.Hash, + pos int64, + crc uint32, + content []byte, +) error { + return p.forEachObserver(func(o Observer) error { + return o.OnInflatedObjectContent(h, pos, crc, content) + }) +} + +func (p *Parser) onFooter(h plumbing.Hash) error { + return p.forEachObserver(func(o Observer) error { + return o.OnFooter(h) + }) } // Parse start decoding phase of the packfile. @@ -70,7 +136,13 @@ func (p *Parser) Parse() (plumbing.Hash, error) { return plumbing.ZeroHash, err } - if err := p.firstPass(); err != nil { + if err := p.indexObjects(); err != nil { + return plumbing.ZeroHash, err + } + + var err error + p.checksum, err = p.scanner.Checksum() + if err != nil && err != io.EOF { return plumbing.ZeroHash, err } @@ -78,10 +150,12 @@ func (p *Parser) Parse() (plumbing.Hash, error) { return plumbing.ZeroHash, err } - for _, o := range p.ob { - if err := o.OnFooter(p.checksum); err != nil { - return plumbing.ZeroHash, err - } + if len(p.pendingRefDeltas) > 0 { + return plumbing.ZeroHash, ErrReferenceDeltaNotFound + } + + if err := p.onFooter(p.checksum); err != nil { + return plumbing.ZeroHash, err } return p.checksum, nil @@ -93,10 +167,8 @@ func (p *Parser) init() error { return err } - for _, o := range p.ob { - if err := o.OnHeader(c); err != nil { - return err - } + if err := p.onHeader(c); err != nil { + return err } p.count = c @@ -107,7 +179,7 @@ func (p *Parser) init() error { return nil } -func (p *Parser) firstPass() error { +func (p *Parser) indexObjects() error { buf := new(bytes.Buffer) for i := uint32(0); i < p.count; i++ { @@ -121,25 +193,30 @@ func (p *Parser) firstPass() error { delta := false var ota *objectInfo switch t := oh.Type; t { - case plumbing.OFSDeltaObject, plumbing.REFDeltaObject: + case plumbing.OFSDeltaObject: delta = true - var parent *objectInfo - var ok bool - - if t == plumbing.OFSDeltaObject { - parent, ok = p.oiByOffset[oh.OffsetReference] - } else { - parent, ok = p.oiByHash[oh.Reference] - } - + parent, ok := p.oiByOffset[oh.OffsetReference] if !ok { - return ErrReferenceDeltaNotFound + return plumbing.ErrObjectNotFound } ota = newDeltaObject(oh.Offset, oh.Length, t, parent) - parent.Children = append(parent.Children, ota) + case plumbing.REFDeltaObject: + delta = true + + parent, ok := p.oiByHash[oh.Reference] + if ok { + ota = newDeltaObject(oh.Offset, oh.Length, t, parent) + parent.Children = append(parent.Children, ota) + } else { + ota = newBaseObject(oh.Offset, oh.Length, t) + p.pendingRefDeltas[oh.Reference] = append( + p.pendingRefDeltas[oh.Reference], + ota, + ) + } default: ota = newBaseObject(oh.Offset, oh.Length, t) } @@ -153,23 +230,35 @@ func (p *Parser) firstPass() error { ota.PackSize = size ota.Length = oh.Length + data := buf.Bytes() if !delta { - if _, err := ota.Write(buf.Bytes()); err != nil { + if _, err := ota.Write(data); err != nil { return err } ota.SHA1 = ota.Sum() p.oiByHash[ota.SHA1] = ota } - p.oiByOffset[oh.Offset] = ota + if p.storage != nil && !delta { + obj := new(plumbing.MemoryObject) + obj.SetSize(oh.Length) + obj.SetType(oh.Type) + if _, err := obj.Write(data); err != nil { + return err + } - p.oi[i] = ota - } + if _, err := p.storage.SetEncodedObject(obj); err != nil { + return err + } + } - var err error - p.checksum, err = p.scanner.Checksum() - if err != nil && err != io.EOF { - return err + if delta && !p.scanner.IsSeekable { + p.deltas[oh.Offset] = make([]byte, len(data)) + copy(p.deltas[oh.Offset], data) + } + + p.oiByOffset[oh.Offset] = ota + p.oi[i] = ota } return nil @@ -177,21 +266,17 @@ func (p *Parser) firstPass() error { func (p *Parser) resolveDeltas() error { for _, obj := range p.oi { - content, err := obj.Content() + content, err := p.get(obj) if err != nil { return err } - for _, o := range p.ob { - err := o.OnInflatedObjectHeader(obj.Type, obj.Length, obj.Offset) - if err != nil { - return err - } + if err := p.onInflatedObjectHeader(obj.Type, obj.Length, obj.Offset); err != nil { + return err + } - err = o.OnInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32, content) - if err != nil { - return err - } + if err := p.onInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32, content); err != nil { + return err } if !obj.IsDelta() && len(obj.Children) > 0 { @@ -206,6 +291,11 @@ func (p *Parser) resolveDeltas() error { return err } } + + // Remove the delta from the cache. + if obj.DiskType.IsDelta() && !p.scanner.IsSeekable { + delete(p.deltas, obj.Offset) + } } } @@ -214,7 +304,17 @@ func (p *Parser) resolveDeltas() error { func (p *Parser) get(o *objectInfo) ([]byte, error) { e, ok := p.cache.Get(o.SHA1) - if ok { + // If it's not on the cache and is not a delta we can try to find it in the + // storage, if there's one. + if !ok && p.storage != nil && !o.Type.IsDelta() { + var err error + e, err = p.storage.EncodedObject(plumbing.AnyObject, o.SHA1) + if err != nil { + return nil, err + } + } + + if e != nil { r, err := e.Reader() if err != nil { return nil, err @@ -228,32 +328,23 @@ func (p *Parser) get(o *objectInfo) ([]byte, error) { return buf, nil } - // Read from disk + var data []byte if o.DiskType.IsDelta() { base, err := p.get(o.Parent) if err != nil { return nil, err } - data, err := p.resolveObject(o, base) + data, err = p.resolveObject(o, base) if err != nil { return nil, err } - - if len(o.Children) > 0 { - m := &plumbing.MemoryObject{} - m.Write(data) - m.SetType(o.Type) - m.SetSize(o.Size()) - p.cache.Put(m) + } else { + var err error + data, err = p.readData(o) + if err != nil { + return nil, err } - - return data, nil - } - - data, err := p.readData(o) - if err != nil { - return nil, err } if len(o.Children) > 0 { @@ -285,11 +376,39 @@ func (p *Parser) resolveObject( return nil, err } + if pending, ok := p.pendingRefDeltas[o.SHA1]; ok { + for _, po := range pending { + po.Parent = o + o.Children = append(o.Children, po) + } + delete(p.pendingRefDeltas, o.SHA1) + } + + if p.storage != nil { + obj := new(plumbing.MemoryObject) + obj.SetSize(o.Size()) + obj.SetType(o.Type) + if _, err := obj.Write(data); err != nil { + return nil, err + } + + if _, err := p.storage.SetEncodedObject(obj); err != nil { + return nil, err + } + } + return data, nil } func (p *Parser) readData(o *objectInfo) ([]byte, error) { - buf := new(bytes.Buffer) + if !p.scanner.IsSeekable && o.DiskType.IsDelta() { + data, ok := p.deltas[o.Offset] + if !ok { + return nil, ErrDeltaNotCached + } + + return data, nil + } // TODO: skip header. Header size can be calculated with the offset of the // next offset in the first pass. @@ -301,8 +420,7 @@ func (p *Parser) readData(o *objectInfo) ([]byte, error) { return nil, err } - buf.Reset() - + buf := new(bytes.Buffer) if _, _, err := p.scanner.NextObject(buf); err != nil { return nil, err } @@ -322,6 +440,7 @@ func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) { return nil, err } ota.SHA1 = ota.Sum() + ota.Length = int64(len(patched)) return patched, nil } @@ -341,8 +460,6 @@ type objectInfo struct { Parent *objectInfo Children []*objectInfo SHA1 plumbing.Hash - - content *bytes.Buffer } func newBaseObject(offset, length int64, t plumbing.ObjectType) *objectInfo { @@ -371,30 +488,6 @@ func newDeltaObject( return obj } -func (o *objectInfo) Write(bs []byte) (int, error) { - n, err := o.Hasher.Write(bs) - if err != nil { - return 0, err - } - - o.content = bytes.NewBuffer(nil) - - _, _ = o.content.Write(bs) - return n, nil -} - -// Content returns the content of the object. This operation can only be done -// once. -func (o *objectInfo) Content() ([]byte, error) { - if o.content == nil { - return nil, ErrObjectContentAlreadyRead - } - - r := o.content - o.content = nil - return r.Bytes(), nil -} - func (o *objectInfo) IsDelta() bool { return o.Type.IsDelta() } diff --git a/plumbing/format/packfile/parser_test.go b/plumbing/format/packfile/parser_test.go index b18f20f..7bce737 100644 --- a/plumbing/format/packfile/parser_test.go +++ b/plumbing/format/packfile/parser_test.go @@ -19,7 +19,8 @@ func (s *ParserSuite) TestParserHashes(c *C) { scanner := packfile.NewScanner(f.Packfile()) obs := new(testObserver) - parser := packfile.NewParser(scanner, obs) + parser, err := packfile.NewParser(scanner, obs) + c.Assert(err, IsNil) ch, err := parser.Parse() c.Assert(err, IsNil) @@ -36,7 +37,7 @@ func (s *ParserSuite) TestParserHashes(c *C) { objs := []observerObject{ {"e8d3ffab552895c19b9fcf7aa264d277cde33881", commit, 254, 12, 0xaa07ba4b}, - {"6ecf0ef2c2dffb796033e5a02219af86ec6584e5", commit, 93, 186, 0xf706df58}, + {"6ecf0ef2c2dffb796033e5a02219af86ec6584e5", commit, 245, 186, 0xf706df58}, {"918c48b83bd081e863dbe1b80f8998f058cd8294", commit, 242, 286, 0x12438846}, {"af2d6a6954d532f8ffb47615169c8fdf9d383a1a", commit, 242, 449, 0x2905a38c}, {"1669dce138d9b841a518c64b10914d88f5e488ea", commit, 333, 615, 0xd9429436}, @@ -54,18 +55,18 @@ func (s *ParserSuite) TestParserHashes(c *C) { {"9a48f23120e880dfbe41f7c9b7b708e9ee62a492", blob, 11488, 80998, 0x7316ff70}, {"9dea2395f5403188298c1dabe8bdafe562c491e3", blob, 78, 84032, 0xdb4fce56}, {"dbd3641b371024f44d0e469a9c8f5457b0660de1", tree, 272, 84115, 0x901cce2c}, - {"a8d315b2b1c615d43042c3a62402b8a54288cf5c", tree, 43, 84375, 0xec4552b0}, + {"a8d315b2b1c615d43042c3a62402b8a54288cf5c", tree, 271, 84375, 0xec4552b0}, {"a39771a7651f97faf5c72e08224d857fc35133db", tree, 38, 84430, 0x847905bf}, {"5a877e6a906a2743ad6e45d99c1793642aaf8eda", tree, 75, 84479, 0x3689459a}, {"586af567d0bb5e771e49bdd9434f5e0fb76d25fa", tree, 38, 84559, 0xe67af94a}, {"cf4aa3b38974fb7d81f367c0830f7d78d65ab86b", tree, 34, 84608, 0xc2314a2e}, {"7e59600739c96546163833214c36459e324bad0a", blob, 9, 84653, 0xcd987848}, - {"fb72698cab7617ac416264415f13224dfd7a165e", tree, 6, 84671, 0x8a853a6d}, - {"4d081c50e250fa32ea8b1313cf8bb7c2ad7627fd", tree, 9, 84688, 0x70c6518}, - {"eba74343e2f15d62adedfd8c883ee0262b5c8021", tree, 6, 84708, 0x4f4108e2}, - {"c2d30fa8ef288618f65f6eed6e168e0d514886f4", tree, 5, 84725, 0xd6fe09e9}, - {"8dcef98b1d52143e1e2dbc458ffe38f925786bf2", tree, 8, 84741, 0xf07a2804}, - {"aa9b383c260e1d05fbbf6b30a02914555e20c725", tree, 4, 84760, 0x1d75d6be}, + {"fb72698cab7617ac416264415f13224dfd7a165e", tree, 238, 84671, 0x8a853a6d}, + {"4d081c50e250fa32ea8b1313cf8bb7c2ad7627fd", tree, 179, 84688, 0x70c6518}, + {"eba74343e2f15d62adedfd8c883ee0262b5c8021", tree, 148, 84708, 0x4f4108e2}, + {"c2d30fa8ef288618f65f6eed6e168e0d514886f4", tree, 110, 84725, 0xd6fe09e9}, + {"8dcef98b1d52143e1e2dbc458ffe38f925786bf2", tree, 111, 84741, 0xf07a2804}, + {"aa9b383c260e1d05fbbf6b30a02914555e20c725", tree, 73, 84760, 0x1d75d6be}, } c.Assert(obs.objects, DeepEquals, objs) -- cgit From b3d995f5ca6b544ed8a48fced85ffa94600af302 Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Thu, 9 Aug 2018 09:23:44 +0200 Subject: plumbing: packfile, add Parse benchmark Signed-off-by: Miguel Molina --- plumbing/format/packfile/parser_test.go | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'plumbing') diff --git a/plumbing/format/packfile/parser_test.go b/plumbing/format/packfile/parser_test.go index 7bce737..b5d482e 100644 --- a/plumbing/format/packfile/parser_test.go +++ b/plumbing/format/packfile/parser_test.go @@ -1,6 +1,8 @@ package packfile_test import ( + "testing" + "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" @@ -138,3 +140,31 @@ func (t *testObserver) put(pos int64, o observerObject) { t.pos[pos] = len(t.objects) t.objects = append(t.objects, o) } + +func BenchmarkParse(b *testing.B) { + if err := fixtures.Init(); err != nil { + b.Fatal(err) + } + + defer func() { + if err := fixtures.Clean(); err != nil { + b.Fatal(err) + } + }() + + for _, f := range fixtures.ByTag("packfile") { + b.Run(f.URL, func(b *testing.B) { + for i := 0; i < b.N; i++ { + parser, err := packfile.NewParser(packfile.NewScanner(f.Packfile())) + if err != nil { + b.Fatal(err) + } + + _, err = parser.Parse() + if err != nil { + b.Fatal(err) + } + } + }) + } +} -- cgit From 71a3c9161d4d8d2baf16440a86a02e8f5678aef2 Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Thu, 9 Aug 2018 10:55:51 +0200 Subject: plumbing: packfile, read object content only once Signed-off-by: Miguel Molina --- plumbing/format/packfile/parser.go | 22 +++++++++++++++------- plumbing/format/packfile/parser_test.go | 25 +++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 7 deletions(-) (limited to 'plumbing') diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go index beb3e27..581c334 100644 --- a/plumbing/format/packfile/parser.go +++ b/plumbing/format/packfile/parser.go @@ -280,14 +280,8 @@ func (p *Parser) resolveDeltas() error { } if !obj.IsDelta() && len(obj.Children) > 0 { - var err error - base, err := p.get(obj) - if err != nil { - return err - } - for _, child := range obj.Children { - if _, err := p.resolveObject(child, base); err != nil { + if _, err := p.resolveObject(child, content); err != nil { return err } } @@ -297,12 +291,18 @@ func (p *Parser) resolveDeltas() error { delete(p.deltas, obj.Offset) } } + + obj.Content = nil } return nil } func (p *Parser) get(o *objectInfo) ([]byte, error) { + if len(o.Content) > 0 { + return o.Content, nil + } + e, ok := p.cache.Get(o.SHA1) // If it's not on the cache and is not a delta we can try to find it in the // storage, if there's one. @@ -460,6 +460,8 @@ type objectInfo struct { Parent *objectInfo Children []*objectInfo SHA1 plumbing.Hash + + Content []byte } func newBaseObject(offset, length int64, t plumbing.ObjectType) *objectInfo { @@ -488,6 +490,12 @@ func newDeltaObject( return obj } +func (o *objectInfo) Write(b []byte) (int, error) { + o.Content = make([]byte, len(b)) + copy(o.Content, b) + return o.Hasher.Write(b) +} + func (o *objectInfo) IsDelta() bool { return o.Type.IsDelta() } diff --git a/plumbing/format/packfile/parser_test.go b/plumbing/format/packfile/parser_test.go index b5d482e..012a140 100644 --- a/plumbing/format/packfile/parser_test.go +++ b/plumbing/format/packfile/parser_test.go @@ -168,3 +168,28 @@ func BenchmarkParse(b *testing.B) { }) } } + +func BenchmarkParseBasic(b *testing.B) { + if err := fixtures.Init(); err != nil { + b.Fatal(err) + } + + defer func() { + if err := fixtures.Clean(); err != nil { + b.Fatal(err) + } + }() + + f := fixtures.Basic().One() + for i := 0; i < b.N; i++ { + parser, err := packfile.NewParser(packfile.NewScanner(f.Packfile())) + if err != nil { + b.Fatal(err) + } + + _, err = parser.Parse() + if err != nil { + b.Fatal(err) + } + } +} -- cgit From 65dc4f9f192cc013e4765fb1162ce6ebda16573d Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Thu, 9 Aug 2018 12:18:49 +0200 Subject: plumbing: packfile, rename DiskObject to FSObject Signed-off-by: Miguel Molina --- plumbing/format/packfile/disk_object.go | 64 --------------------------------- plumbing/format/packfile/fsobject.go | 64 +++++++++++++++++++++++++++++++++ plumbing/format/packfile/packfile.go | 4 +-- 3 files changed, 66 insertions(+), 66 deletions(-) delete mode 100644 plumbing/format/packfile/disk_object.go create mode 100644 plumbing/format/packfile/fsobject.go (limited to 'plumbing') diff --git a/plumbing/format/packfile/disk_object.go b/plumbing/format/packfile/disk_object.go deleted file mode 100644 index d3e8520..0000000 --- a/plumbing/format/packfile/disk_object.go +++ /dev/null @@ -1,64 +0,0 @@ -package packfile - -import ( - "io" - - "gopkg.in/src-d/go-git.v4/plumbing" -) - -// DiskObject is an object from the packfile on disk. -type DiskObject struct { - hash plumbing.Hash - h *ObjectHeader - offset int64 - size int64 - typ plumbing.ObjectType - packfile *Packfile -} - -// NewDiskObject creates a new disk object. -func NewDiskObject( - hash plumbing.Hash, - finalType plumbing.ObjectType, - offset int64, - contentSize int64, - packfile *Packfile, -) *DiskObject { - return &DiskObject{ - hash: hash, - offset: offset, - size: contentSize, - typ: finalType, - packfile: packfile, - } -} - -// Reader implements the plumbing.EncodedObject interface. -func (o *DiskObject) Reader() (io.ReadCloser, error) { - return o.packfile.getObjectContent(o.offset) -} - -// SetSize implements the plumbing.EncodedObject interface. This method -// is a noop. -func (o *DiskObject) SetSize(int64) {} - -// SetType implements the plumbing.EncodedObject interface. This method is -// a noop. -func (o *DiskObject) SetType(plumbing.ObjectType) {} - -// Hash implements the plumbing.EncodedObject interface. -func (o *DiskObject) Hash() plumbing.Hash { return o.hash } - -// Size implements the plumbing.EncodedObject interface. -func (o *DiskObject) Size() int64 { return o.size } - -// Type implements the plumbing.EncodedObject interface. -func (o *DiskObject) Type() plumbing.ObjectType { - return o.typ -} - -// Writer implements the plumbing.EncodedObject interface. This method always -// returns a nil writer. -func (o *DiskObject) Writer() (io.WriteCloser, error) { - return nil, nil -} diff --git a/plumbing/format/packfile/fsobject.go b/plumbing/format/packfile/fsobject.go new file mode 100644 index 0000000..d63127e --- /dev/null +++ b/plumbing/format/packfile/fsobject.go @@ -0,0 +1,64 @@ +package packfile + +import ( + "io" + + "gopkg.in/src-d/go-git.v4/plumbing" +) + +// FSObject is an object from the packfile on the filesystem. +type FSObject struct { + hash plumbing.Hash + h *ObjectHeader + offset int64 + size int64 + typ plumbing.ObjectType + packfile *Packfile +} + +// NewFSObject creates a new filesystem object. +func NewFSObject( + hash plumbing.Hash, + finalType plumbing.ObjectType, + offset int64, + contentSize int64, + packfile *Packfile, +) *FSObject { + return &FSObject{ + hash: hash, + offset: offset, + size: contentSize, + typ: finalType, + packfile: packfile, + } +} + +// Reader implements the plumbing.EncodedObject interface. +func (o *FSObject) Reader() (io.ReadCloser, error) { + return o.packfile.getObjectContent(o.offset) +} + +// SetSize implements the plumbing.EncodedObject interface. This method +// is a noop. +func (o *FSObject) SetSize(int64) {} + +// SetType implements the plumbing.EncodedObject interface. This method is +// a noop. +func (o *FSObject) SetType(plumbing.ObjectType) {} + +// Hash implements the plumbing.EncodedObject interface. +func (o *FSObject) Hash() plumbing.Hash { return o.hash } + +// Size implements the plumbing.EncodedObject interface. +func (o *FSObject) Size() int64 { return o.size } + +// Type implements the plumbing.EncodedObject interface. +func (o *FSObject) Type() plumbing.ObjectType { + return o.typ +} + +// Writer implements the plumbing.EncodedObject interface. This method always +// returns a nil writer. +func (o *FSObject) Writer() (io.WriteCloser, error) { + return nil, nil +} diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index 37743ba..df8a3d4 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -232,7 +232,7 @@ func (p *Packfile) nextObject() (plumbing.EncodedObject, error) { p.offsetToType[h.Offset] = typ - return NewDiskObject(hash, typ, h.Offset, size, p), nil + return NewFSObject(hash, typ, h.Offset, size, p), nil } func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) { @@ -410,7 +410,7 @@ func (p *Packfile) Close() error { return closer.Close() } -// MemoryObjectFromDisk converts a DiskObject to a MemoryObject. +// MemoryObjectFromDisk converts a FSObject to a MemoryObject. func MemoryObjectFromDisk(obj plumbing.EncodedObject) (plumbing.EncodedObject, error) { o2 := new(plumbing.MemoryObject) o2.SetType(obj.Type()) -- cgit From 038cf238e6250094c7aeb387fd7ea92438719699 Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Thu, 9 Aug 2018 12:36:37 +0200 Subject: storage: filesystem, close Packfile after iterating objects Signed-off-by: Miguel Molina --- plumbing/object/blob_test.go | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'plumbing') diff --git a/plumbing/object/blob_test.go b/plumbing/object/blob_test.go index 181436d..e08ff25 100644 --- a/plumbing/object/blob_test.go +++ b/plumbing/object/blob_test.go @@ -6,6 +6,7 @@ import ( "io/ioutil" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" . "gopkg.in/check.v1" ) @@ -70,6 +71,12 @@ func (s *BlobsSuite) TestBlobIter(c *C) { blobs := []*Blob{} iter.ForEach(func(b *Blob) error { + var err error + b.obj, err = packfile.MemoryObjectFromDisk(b.obj) + if err != nil { + return err + } + blobs = append(blobs, b) return nil }) -- cgit From 56c5e91b158bc4569b38bfd5d27d4b4be5e06a27 Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Thu, 9 Aug 2018 16:53:00 +0200 Subject: plumbing: packfile, open and close packfile on FSObject reads Signed-off-by: Miguel Molina --- plumbing/format/packfile/encoder_advanced_test.go | 7 ++- plumbing/format/packfile/encoder_test.go | 7 ++- plumbing/format/packfile/fsobject.go | 68 +++++++++++++++++----- plumbing/format/packfile/packfile.go | 69 +++++++++++++---------- plumbing/format/packfile/packfile_test.go | 31 +++++++--- plumbing/object/blob_test.go | 7 --- 6 files changed, 126 insertions(+), 63 deletions(-) (limited to 'plumbing') diff --git a/plumbing/format/packfile/encoder_advanced_test.go b/plumbing/format/packfile/encoder_advanced_test.go index 78ddc45..fc1419e 100644 --- a/plumbing/format/packfile/encoder_advanced_test.go +++ b/plumbing/format/packfile/encoder_advanced_test.go @@ -6,7 +6,7 @@ import ( "math/rand" "testing" - "gopkg.in/src-d/go-billy.v3/memfs" + "gopkg.in/src-d/go-billy.v4/memfs" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" . "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" @@ -84,7 +84,8 @@ func (s *EncoderAdvancedSuite) testEncodeDecode( encodeHash, err := enc.Encode(hashes, packWindow) c.Assert(err, IsNil) - f, err := memfs.New().Create("packfile") + fs := memfs.New() + f, err := fs.Create("packfile") c.Assert(err, IsNil) _, err = f.Write(buf.Bytes()) @@ -105,7 +106,7 @@ func (s *EncoderAdvancedSuite) testEncodeDecode( _, err = f.Seek(0, io.SeekStart) c.Assert(err, IsNil) - p := NewPackfile(index, f) + p := NewPackfile(index, fs, f) decodeHash, err := p.ID() c.Assert(err, IsNil) diff --git a/plumbing/format/packfile/encoder_test.go b/plumbing/format/packfile/encoder_test.go index 24e2082..80b916d 100644 --- a/plumbing/format/packfile/encoder_test.go +++ b/plumbing/format/packfile/encoder_test.go @@ -5,7 +5,7 @@ import ( "io" stdioutil "io/ioutil" - "gopkg.in/src-d/go-billy.v3/memfs" + "gopkg.in/src-d/go-billy.v4/memfs" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" "gopkg.in/src-d/go-git.v4/storage/memory" @@ -290,7 +290,8 @@ func objectsEqual(c *C, o1, o2 plumbing.EncodedObject) { } func packfileFromReader(c *C, buf *bytes.Buffer) (*Packfile, func()) { - file, err := memfs.New().Create("packfile") + fs := memfs.New() + file, err := fs.Create("packfile") c.Assert(err, IsNil) _, err = file.Write(buf.Bytes()) @@ -311,7 +312,7 @@ func packfileFromReader(c *C, buf *bytes.Buffer) (*Packfile, func()) { index, err := w.Index() c.Assert(err, IsNil) - return NewPackfile(index, file), func() { + return NewPackfile(index, fs, file), func() { c.Assert(file.Close(), IsNil) } } diff --git a/plumbing/format/packfile/fsobject.go b/plumbing/format/packfile/fsobject.go index d63127e..6fd3ca5 100644 --- a/plumbing/format/packfile/fsobject.go +++ b/plumbing/format/packfile/fsobject.go @@ -3,17 +3,23 @@ package packfile import ( "io" + billy "gopkg.in/src-d/go-billy.v4" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" + "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" ) // FSObject is an object from the packfile on the filesystem. type FSObject struct { - hash plumbing.Hash - h *ObjectHeader - offset int64 - size int64 - typ plumbing.ObjectType - packfile *Packfile + hash plumbing.Hash + h *ObjectHeader + offset int64 + size int64 + typ plumbing.ObjectType + index idxfile.Index + fs billy.Filesystem + path string + cache cache.Object } // NewFSObject creates a new filesystem object. @@ -22,20 +28,42 @@ func NewFSObject( finalType plumbing.ObjectType, offset int64, contentSize int64, - packfile *Packfile, + index idxfile.Index, + fs billy.Filesystem, + path string, + cache cache.Object, ) *FSObject { return &FSObject{ - hash: hash, - offset: offset, - size: contentSize, - typ: finalType, - packfile: packfile, + hash: hash, + offset: offset, + size: contentSize, + typ: finalType, + index: index, + fs: fs, + path: path, + cache: cache, } } // Reader implements the plumbing.EncodedObject interface. func (o *FSObject) Reader() (io.ReadCloser, error) { - return o.packfile.getObjectContent(o.offset) + f, err := o.fs.Open(o.path) + if err != nil { + return nil, err + } + + p := NewPackfileWithCache(o.index, nil, f, o.cache) + r, err := p.getObjectContent(o.offset) + if err != nil { + _ = f.Close() + return nil, err + } + + if err := f.Close(); err != nil { + return nil, err + } + + return r, nil } // SetSize implements the plumbing.EncodedObject interface. This method @@ -62,3 +90,17 @@ func (o *FSObject) Type() plumbing.ObjectType { func (o *FSObject) Writer() (io.WriteCloser, error) { return nil, nil } + +type objectReader struct { + io.ReadCloser + f billy.File +} + +func (r *objectReader) Close() error { + if err := r.ReadCloser.Close(); err != nil { + _ = r.f.Close() + return err + } + + return r.f.Close() +} diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index df8a3d4..5feb781 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -3,9 +3,9 @@ package packfile import ( "bytes" "io" - stdioutil "io/ioutil" "os" + billy "gopkg.in/src-d/go-billy.v4" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/cache" "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" @@ -24,21 +24,26 @@ var ( // Packfile allows retrieving information from inside a packfile. type Packfile struct { idxfile.Index - file io.ReadSeeker + fs billy.Filesystem + file billy.File s *Scanner deltaBaseCache cache.Object offsetToType map[int64]plumbing.ObjectType } // NewPackfileWithCache creates a new Packfile with the given object cache. +// If the filesystem is provided, the packfile will return FSObjects, otherwise +// it will return MemoryObjects. func NewPackfileWithCache( index idxfile.Index, - file io.ReadSeeker, + fs billy.Filesystem, + file billy.File, cache cache.Object, ) *Packfile { s := NewScanner(file) return &Packfile{ index, + fs, file, s, cache, @@ -48,8 +53,10 @@ func NewPackfileWithCache( // NewPackfile returns a packfile representation for the given packfile file // and packfile idx. -func NewPackfile(index idxfile.Index, file io.ReadSeeker) *Packfile { - return NewPackfileWithCache(index, file, cache.NewObjectLRUDefault()) +// If the filesystem is provided, the packfile will return FSObjects, otherwise +// it will return MemoryObjects. +func NewPackfile(index idxfile.Index, fs billy.Filesystem, file billy.File) *Packfile { + return NewPackfileWithCache(index, fs, file, cache.NewObjectLRUDefault()) } // Get retrieves the encoded object in the packfile with the given hash. @@ -215,6 +222,12 @@ func (p *Packfile) nextObject() (plumbing.EncodedObject, error) { return nil, err } + // If we have no filesystem, we will return a MemoryObject instead + // of an FSObject. + if p.fs == nil { + return p.getNextObject(h) + } + hash, err := p.FindHash(h.Offset) if err != nil { return nil, err @@ -232,7 +245,16 @@ func (p *Packfile) nextObject() (plumbing.EncodedObject, error) { p.offsetToType[h.Offset] = typ - return NewFSObject(hash, typ, h.Offset, size, p), nil + return NewFSObject( + hash, + typ, + h.Offset, + size, + p.Index, + p.fs, + p.file.Name(), + p.deltaBaseCache, + ), nil } func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) { @@ -245,10 +267,20 @@ func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) { return nil, err } + obj, err := p.getNextObject(h) + if err != nil { + return nil, err + } + + return obj.Reader() +} + +func (p *Packfile) getNextObject(h *ObjectHeader) (plumbing.EncodedObject, error) { var obj = new(plumbing.MemoryObject) obj.SetSize(h.Length) obj.SetType(h.Type) + var err error switch h.Type { case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: err = p.fillRegularObjectContent(obj) @@ -264,7 +296,7 @@ func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) { return nil, err } - return obj.Reader() + return obj, nil } func (p *Packfile) fillRegularObjectContent(obj plumbing.EncodedObject) error { @@ -410,29 +442,6 @@ func (p *Packfile) Close() error { return closer.Close() } -// MemoryObjectFromDisk converts a FSObject to a MemoryObject. -func MemoryObjectFromDisk(obj plumbing.EncodedObject) (plumbing.EncodedObject, error) { - o2 := new(plumbing.MemoryObject) - o2.SetType(obj.Type()) - o2.SetSize(obj.Size()) - - r, err := obj.Reader() - if err != nil { - return nil, err - } - - data, err := stdioutil.ReadAll(r) - if err != nil { - return nil, err - } - - if _, err := o2.Write(data); err != nil { - return nil, err - } - - return o2, nil -} - type objectIter struct { p *Packfile typ plumbing.ObjectType diff --git a/plumbing/format/packfile/packfile_test.go b/plumbing/format/packfile/packfile_test.go index 3193bed..05dc8a7 100644 --- a/plumbing/format/packfile/packfile_test.go +++ b/plumbing/format/packfile/packfile_test.go @@ -109,13 +109,14 @@ var expectedEntries = map[plumbing.Hash]int64{ func (s *PackfileSuite) SetUpTest(c *C) { s.f = fixtures.Basic().One() - f, err := osfs.New("").Open(s.f.Packfile().Name()) + fs := osfs.New("") + f, err := fs.Open(s.f.Packfile().Name()) c.Assert(err, IsNil) s.idx = idxfile.NewMemoryIndex() c.Assert(idxfile.NewDecoder(s.f.Idx()).Decode(s.idx), IsNil) - s.p = packfile.NewPackfile(s.idx, f) + s.p = packfile.NewPackfile(s.idx, fs, f) } func (s *PackfileSuite) TearDownTest(c *C) { @@ -125,7 +126,11 @@ func (s *PackfileSuite) TearDownTest(c *C) { func (s *PackfileSuite) TestDecode(c *C) { fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { index := getIndexFromIdxFile(f.Idx()) - p := packfile.NewPackfile(index, f.Packfile()) + fs := osfs.New("") + pf, err := fs.Open(f.Packfile().Name()) + c.Assert(err, IsNil) + + p := packfile.NewPackfile(index, fs, pf) defer p.Close() for _, h := range expectedHashes { @@ -140,7 +145,11 @@ func (s *PackfileSuite) TestDecodeByTypeRefDelta(c *C) { f := fixtures.Basic().ByTag("ref-delta").One() index := getIndexFromIdxFile(f.Idx()) - packfile := packfile.NewPackfile(index, f.Packfile()) + fs := osfs.New("") + pf, err := fs.Open(f.Packfile().Name()) + c.Assert(err, IsNil) + + packfile := packfile.NewPackfile(index, fs, pf) defer packfile.Close() iter, err := packfile.GetByType(plumbing.CommitObject) @@ -171,7 +180,11 @@ func (s *PackfileSuite) TestDecodeByType(c *C) { fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { for _, t := range ts { index := getIndexFromIdxFile(f.Idx()) - packfile := packfile.NewPackfile(index, f.Packfile()) + fs := osfs.New("") + pf, err := fs.Open(f.Packfile().Name()) + c.Assert(err, IsNil) + + packfile := packfile.NewPackfile(index, fs, pf) defer packfile.Close() iter, err := packfile.GetByType(t) @@ -188,10 +201,14 @@ func (s *PackfileSuite) TestDecodeByType(c *C) { func (s *PackfileSuite) TestDecodeByTypeConstructor(c *C) { f := fixtures.Basic().ByTag("packfile").One() index := getIndexFromIdxFile(f.Idx()) - packfile := packfile.NewPackfile(index, f.Packfile()) + fs := osfs.New("") + pf, err := fs.Open(f.Packfile().Name()) + c.Assert(err, IsNil) + + packfile := packfile.NewPackfile(index, fs, pf) defer packfile.Close() - _, err := packfile.GetByType(plumbing.OFSDeltaObject) + _, err = packfile.GetByType(plumbing.OFSDeltaObject) c.Assert(err, Equals, plumbing.ErrInvalidType) _, err = packfile.GetByType(plumbing.REFDeltaObject) diff --git a/plumbing/object/blob_test.go b/plumbing/object/blob_test.go index e08ff25..181436d 100644 --- a/plumbing/object/blob_test.go +++ b/plumbing/object/blob_test.go @@ -6,7 +6,6 @@ import ( "io/ioutil" "gopkg.in/src-d/go-git.v4/plumbing" - "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" . "gopkg.in/check.v1" ) @@ -71,12 +70,6 @@ func (s *BlobsSuite) TestBlobIter(c *C) { blobs := []*Blob{} iter.ForEach(func(b *Blob) error { - var err error - b.obj, err = packfile.MemoryObjectFromDisk(b.obj) - if err != nil { - return err - } - blobs = append(blobs, b) return nil }) -- cgit From 8d75d239e93474e4287870e4e5143da14e2c360d Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Fri, 10 Aug 2018 12:33:56 +0200 Subject: plumbing: idxfile, Crc32 to CRC32 and return ok from findHashIndex Signed-off-by: Miguel Molina --- plumbing/format/idxfile/decoder.go | 4 ++-- plumbing/format/idxfile/encoder.go | 2 +- plumbing/format/idxfile/idxfile.go | 36 ++++++++++++++++++------------------ plumbing/format/idxfile/writer.go | 4 ++-- 4 files changed, 23 insertions(+), 23 deletions(-) (limited to 'plumbing') diff --git a/plumbing/format/idxfile/decoder.go b/plumbing/format/idxfile/decoder.go index 25ff88e..5b92782 100644 --- a/plumbing/format/idxfile/decoder.go +++ b/plumbing/format/idxfile/decoder.go @@ -124,7 +124,7 @@ func readObjectNames(idx *MemoryIndex, r io.Reader) error { idx.Names = append(idx.Names, bin) idx.Offset32 = append(idx.Offset32, make([]byte, buckets*4)) - idx.Crc32 = append(idx.Crc32, make([]byte, buckets*4)) + idx.CRC32 = append(idx.CRC32, make([]byte, buckets*4)) } return nil @@ -133,7 +133,7 @@ func readObjectNames(idx *MemoryIndex, r io.Reader) error { func readCRC32(idx *MemoryIndex, r io.Reader) error { for k := 0; k < fanout; k++ { if pos := idx.FanoutMapping[k]; pos != noMapping { - if _, err := io.ReadFull(r, idx.Crc32[pos]); err != nil { + if _, err := io.ReadFull(r, idx.CRC32[pos]); err != nil { return err } } diff --git a/plumbing/format/idxfile/encoder.go b/plumbing/format/idxfile/encoder.go index 55df466..e479511 100644 --- a/plumbing/format/idxfile/encoder.go +++ b/plumbing/format/idxfile/encoder.go @@ -89,7 +89,7 @@ func (e *Encoder) encodeCRC32(idx *MemoryIndex) (int, error) { continue } - n, err := e.Write(idx.Crc32[pos]) + n, err := e.Write(idx.CRC32[pos]) if err != nil { return size, err } diff --git a/plumbing/format/idxfile/idxfile.go b/plumbing/format/idxfile/idxfile.go index 71c7630..c977bee 100644 --- a/plumbing/format/idxfile/idxfile.go +++ b/plumbing/format/idxfile/idxfile.go @@ -41,12 +41,12 @@ type MemoryIndex struct { Version uint32 Fanout [256]uint32 // FanoutMapping maps the position in the fanout table to the position - // in the Names, Offset32 and Crc32 slices. This improves the memory + // in the Names, Offset32 and CRC32 slices. This improves the memory // usage by not needing an array with unnecessary empty slots. FanoutMapping [256]int Names [][]byte Offset32 [][]byte - Crc32 [][]byte + CRC32 [][]byte Offset64 []byte PackfileChecksum [20]byte IdxChecksum [20]byte @@ -61,20 +61,20 @@ func NewMemoryIndex() *MemoryIndex { return &MemoryIndex{} } -func (idx *MemoryIndex) findHashIndex(h plumbing.Hash) int { +func (idx *MemoryIndex) findHashIndex(h plumbing.Hash) (int, bool) { k := idx.FanoutMapping[h[0]] if k == noMapping { - return -1 + return 0, false } if len(idx.Names) <= k { - return -1 + return 0, false } data := idx.Names[k] high := uint64(len(idx.Offset32[k])) >> 2 if high == 0 { - return -1 + return 0, false } low := uint64(0) @@ -86,7 +86,7 @@ func (idx *MemoryIndex) findHashIndex(h plumbing.Hash) int { if cmp < 0 { high = mid } else if cmp == 0 { - return int(mid) + return int(mid), true } else { low = mid + 1 } @@ -96,13 +96,13 @@ func (idx *MemoryIndex) findHashIndex(h plumbing.Hash) int { } } - return -1 + return 0, false } // Contains implements the Index interface. func (idx *MemoryIndex) Contains(h plumbing.Hash) (bool, error) { - i := idx.findHashIndex(h) - return i >= 0, nil + _, ok := idx.findHashIndex(h) + return ok, nil } // FindOffset implements the Index interface. @@ -112,8 +112,8 @@ func (idx *MemoryIndex) FindOffset(h plumbing.Hash) (int64, error) { } k := idx.FanoutMapping[h[0]] - i := idx.findHashIndex(h) - if i < 0 { + i, ok := idx.findHashIndex(h) + if !ok { return 0, plumbing.ErrObjectNotFound } @@ -147,17 +147,17 @@ func (idx *MemoryIndex) getOffset(firstLevel, secondLevel int) (int64, error) { // FindCRC32 implements the Index interface. func (idx *MemoryIndex) FindCRC32(h plumbing.Hash) (uint32, error) { k := idx.FanoutMapping[h[0]] - i := idx.findHashIndex(h) - if i < 0 { + i, ok := idx.findHashIndex(h) + if !ok { return 0, plumbing.ErrObjectNotFound } - return idx.getCrc32(k, i) + return idx.getCRC32(k, i) } -func (idx *MemoryIndex) getCrc32(firstLevel, secondLevel int) (uint32, error) { +func (idx *MemoryIndex) getCRC32(firstLevel, secondLevel int) (uint32, error) { offset := secondLevel << 2 - buf := bytes.NewBuffer(idx.Crc32[firstLevel][offset : offset+4]) + buf := bytes.NewBuffer(idx.CRC32[firstLevel][offset : offset+4]) return binary.ReadUint32(buf) } @@ -253,7 +253,7 @@ func (i *idxfileEntryIter) Next() (*Entry, error) { } entry.Offset = uint64(offset) - entry.CRC32, err = i.idx.getCrc32(pos, i.secondLevel) + entry.CRC32, err = i.idx.getCRC32(pos, i.secondLevel) if err != nil { return nil, err } diff --git a/plumbing/format/idxfile/writer.go b/plumbing/format/idxfile/writer.go index 89b79cd..aa919e7 100644 --- a/plumbing/format/idxfile/writer.go +++ b/plumbing/format/idxfile/writer.go @@ -132,7 +132,7 @@ func (w *Writer) createIndex() (*MemoryIndex, error) { idx.Names = append(idx.Names, make([]byte, 0)) idx.Offset32 = append(idx.Offset32, make([]byte, 0)) - idx.Crc32 = append(idx.Crc32, make([]byte, 0)) + idx.CRC32 = append(idx.CRC32, make([]byte, 0)) } idx.Names[bucket] = append(idx.Names[bucket], o.Hash[:]...) @@ -148,7 +148,7 @@ func (w *Writer) createIndex() (*MemoryIndex, error) { buf.Truncate(0) binary.WriteUint32(buf, uint32(o.CRC32)) - idx.Crc32[bucket] = append(idx.Crc32[bucket], buf.Bytes()...) + idx.CRC32[bucket] = append(idx.CRC32[bucket], buf.Bytes()...) } for j := last + 1; j < 256; j++ { -- cgit From a8c4426d204f42e683e902dcb277494004d5e59d Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Tue, 14 Aug 2018 11:59:11 +0200 Subject: plumbing: add buffer cache and use it in packfile parser It uses less memory and is faster as slices don't have to be converted from/to MemoryObject and they are indexed by offset. Signed-off-by: Javi Fontan --- plumbing/cache/buffer_lru.go | 98 ++++++++++++++++++++++++++++ plumbing/cache/buffer_test.go | 128 +++++++++++++++++++++++++++++++++++++ plumbing/cache/common.go | 13 ++++ plumbing/format/packfile/parser.go | 24 +++---- 4 files changed, 249 insertions(+), 14 deletions(-) create mode 100644 plumbing/cache/buffer_lru.go create mode 100644 plumbing/cache/buffer_test.go (limited to 'plumbing') diff --git a/plumbing/cache/buffer_lru.go b/plumbing/cache/buffer_lru.go new file mode 100644 index 0000000..f2c0f90 --- /dev/null +++ b/plumbing/cache/buffer_lru.go @@ -0,0 +1,98 @@ +package cache + +import ( + "container/list" + "sync" +) + +// BufferLRU implements an object cache with an LRU eviction policy and a +// maximum size (measured in object size). +type BufferLRU struct { + MaxSize FileSize + + actualSize FileSize + ll *list.List + cache map[int64]*list.Element + mut sync.Mutex +} + +// NewBufferLRU creates a new BufferLRU with the given maximum size. The maximum +// size will never be exceeded. +func NewBufferLRU(maxSize FileSize) *BufferLRU { + return &BufferLRU{MaxSize: maxSize} +} + +// NewBufferLRUDefault creates a new BufferLRU with the default cache size. +func NewBufferLRUDefault() *BufferLRU { + return &BufferLRU{MaxSize: DefaultMaxSize} +} + +type buffer struct { + Key int64 + Slice []byte +} + +// Put puts a buffer into the cache. If the buffer is already in the cache, it +// will be marked as used. Otherwise, it will be inserted. A buffers might +// be evicted to make room for the new one. +func (c *BufferLRU) Put(key int64, slice []byte) { + c.mut.Lock() + defer c.mut.Unlock() + + if c.cache == nil { + c.actualSize = 0 + c.cache = make(map[int64]*list.Element, 1000) + c.ll = list.New() + } + + if ee, ok := c.cache[key]; ok { + c.ll.MoveToFront(ee) + ee.Value = buffer{key, slice} + return + } + + objSize := FileSize(len(slice)) + + if objSize > c.MaxSize { + return + } + + for c.actualSize+objSize > c.MaxSize { + last := c.ll.Back() + lastObj := last.Value.(buffer) + lastSize := FileSize(len(lastObj.Slice)) + + c.ll.Remove(last) + delete(c.cache, lastObj.Key) + c.actualSize -= lastSize + } + + ee := c.ll.PushFront(buffer{key, slice}) + c.cache[key] = ee + c.actualSize += objSize +} + +// Get returns a buffer by its key. It marks the buffer as used. If the buffer +// is not in the cache, (nil, false) will be returned. +func (c *BufferLRU) Get(key int64) ([]byte, bool) { + c.mut.Lock() + defer c.mut.Unlock() + + ee, ok := c.cache[key] + if !ok { + return nil, false + } + + c.ll.MoveToFront(ee) + return ee.Value.(buffer).Slice, true +} + +// Clear the content of this buffer cache. +func (c *BufferLRU) Clear() { + c.mut.Lock() + defer c.mut.Unlock() + + c.ll = nil + c.cache = nil + c.actualSize = 0 +} diff --git a/plumbing/cache/buffer_test.go b/plumbing/cache/buffer_test.go new file mode 100644 index 0000000..262138a --- /dev/null +++ b/plumbing/cache/buffer_test.go @@ -0,0 +1,128 @@ +package cache + +import ( + "sync" + + . "gopkg.in/check.v1" +) + +type BufferSuite struct { + c map[string]Buffer + aBuffer []byte + bBuffer []byte + cBuffer []byte + dBuffer []byte + eBuffer []byte +} + +var _ = Suite(&BufferSuite{}) + +func (s *BufferSuite) SetUpTest(c *C) { + s.aBuffer = []byte("a") + s.bBuffer = []byte("bbb") + s.cBuffer = []byte("c") + s.dBuffer = []byte("d") + s.eBuffer = []byte("ee") + + s.c = make(map[string]Buffer) + s.c["two_bytes"] = NewBufferLRU(2 * Byte) + s.c["default_lru"] = NewBufferLRUDefault() +} + +func (s *BufferSuite) TestPutSameBuffer(c *C) { + for _, o := range s.c { + o.Put(1, s.aBuffer) + o.Put(1, s.aBuffer) + _, ok := o.Get(1) + c.Assert(ok, Equals, true) + } +} + +func (s *BufferSuite) TestPutBigBuffer(c *C) { + for _, o := range s.c { + o.Put(1, s.bBuffer) + _, ok := o.Get(2) + c.Assert(ok, Equals, false) + } +} + +func (s *BufferSuite) TestPutCacheOverflow(c *C) { + // this test only works with an specific size + o := s.c["two_bytes"] + + o.Put(1, s.aBuffer) + o.Put(2, s.cBuffer) + o.Put(3, s.dBuffer) + + obj, ok := o.Get(1) + c.Assert(ok, Equals, false) + c.Assert(obj, IsNil) + obj, ok = o.Get(2) + c.Assert(ok, Equals, true) + c.Assert(obj, NotNil) + obj, ok = o.Get(3) + c.Assert(ok, Equals, true) + c.Assert(obj, NotNil) +} + +func (s *BufferSuite) TestEvictMultipleBuffers(c *C) { + o := s.c["two_bytes"] + + o.Put(1, s.cBuffer) + o.Put(2, s.dBuffer) // now cache is full with two objects + o.Put(3, s.eBuffer) // this put should evict all previous objects + + obj, ok := o.Get(1) + c.Assert(ok, Equals, false) + c.Assert(obj, IsNil) + obj, ok = o.Get(2) + c.Assert(ok, Equals, false) + c.Assert(obj, IsNil) + obj, ok = o.Get(3) + c.Assert(ok, Equals, true) + c.Assert(obj, NotNil) +} + +func (s *BufferSuite) TestClear(c *C) { + for _, o := range s.c { + o.Put(1, s.aBuffer) + o.Clear() + obj, ok := o.Get(1) + c.Assert(ok, Equals, false) + c.Assert(obj, IsNil) + } +} + +func (s *BufferSuite) TestConcurrentAccess(c *C) { + for _, o := range s.c { + var wg sync.WaitGroup + + for i := 0; i < 1000; i++ { + wg.Add(3) + go func(i int) { + o.Put(int64(i), []byte{00}) + wg.Done() + }(i) + + go func(i int) { + if i%30 == 0 { + o.Clear() + } + wg.Done() + }(i) + + go func(i int) { + o.Get(int64(i)) + wg.Done() + }(i) + } + + wg.Wait() + } +} + +func (s *BufferSuite) TestDefaultLRU(c *C) { + defaultLRU := s.c["default_lru"].(*BufferLRU) + + c.Assert(defaultLRU.MaxSize, Equals, DefaultMaxSize) +} diff --git a/plumbing/cache/common.go b/plumbing/cache/common.go index e77baf0..2b7f36a 100644 --- a/plumbing/cache/common.go +++ b/plumbing/cache/common.go @@ -24,3 +24,16 @@ type Object interface { // Clear clears every object from the cache. Clear() } + +// Buffer is an interface to a buffer cache. +type Buffer interface { + // Put puts a buffer into the cache. If the buffer is already in the cache, + // it will be marked as used. Otherwise, it will be inserted. Buffer might + // be evicted to make room for the new one. + Put(key int64, slice []byte) + // Get returns a buffer by its key. It marks the buffer as used. If the + // buffer is not in the cache, (nil, false) will be returned. + Get(key int64) ([]byte, bool) + // Clear clears every object from the cache. + Clear() +} diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go index 581c334..88f33dc 100644 --- a/plumbing/format/packfile/parser.go +++ b/plumbing/format/packfile/parser.go @@ -48,7 +48,7 @@ type Parser struct { pendingRefDeltas map[plumbing.Hash][]*objectInfo checksum plumbing.Hash - cache *cache.ObjectLRU + cache *cache.BufferLRU // delta content by offset, only used if source is not seekable deltas map[int64][]byte @@ -82,7 +82,7 @@ func NewParserWithStorage( scanner: scanner, ob: ob, count: 0, - cache: cache.NewObjectLRUDefault(), + cache: cache.NewBufferLRUDefault(), pendingRefDeltas: make(map[plumbing.Hash][]*objectInfo), deltas: deltas, }, nil @@ -303,29 +303,29 @@ func (p *Parser) get(o *objectInfo) ([]byte, error) { return o.Content, nil } - e, ok := p.cache.Get(o.SHA1) + b, ok := p.cache.Get(o.Offset) // If it's not on the cache and is not a delta we can try to find it in the // storage, if there's one. if !ok && p.storage != nil && !o.Type.IsDelta() { var err error - e, err = p.storage.EncodedObject(plumbing.AnyObject, o.SHA1) + e, err := p.storage.EncodedObject(plumbing.AnyObject, o.SHA1) if err != nil { return nil, err } - } - if e != nil { r, err := e.Reader() if err != nil { return nil, err } - buf := make([]byte, e.Size()) - if _, err = r.Read(buf); err != nil { + b = make([]byte, e.Size()) + if _, err = r.Read(b); err != nil { return nil, err } + } - return buf, nil + if b != nil { + return b, nil } var data []byte @@ -348,11 +348,7 @@ func (p *Parser) get(o *objectInfo) ([]byte, error) { } if len(o.Children) > 0 { - m := &plumbing.MemoryObject{} - m.Write(data) - m.SetType(o.Type) - m.SetSize(o.Size()) - p.cache.Put(m) + p.cache.Put(o.Offset, data) } return data, nil -- cgit From 555a6ca02e88279cef421df88a108c2955fcde77 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Tue, 14 Aug 2018 12:21:12 +0200 Subject: plumbing/pacfile: tidy up objectInfo struct * a new hasher is created when needed * delete unused fields * base content is no longer kept in memory Signed-off-by: Javi Fontan --- plumbing/format/packfile/parser.go | 58 +++++++++++++++----------------------- 1 file changed, 22 insertions(+), 36 deletions(-) (limited to 'plumbing') diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go index 88f33dc..3a9c4d7 100644 --- a/plumbing/format/packfile/parser.go +++ b/plumbing/format/packfile/parser.go @@ -221,21 +221,22 @@ func (p *Parser) indexObjects() error { ota = newBaseObject(oh.Offset, oh.Length, t) } - size, crc, err := p.scanner.NextObject(buf) + _, crc, err := p.scanner.NextObject(buf) if err != nil { return err } ota.Crc32 = crc - ota.PackSize = size ota.Length = oh.Length data := buf.Bytes() if !delta { - if _, err := ota.Write(data); err != nil { + sha1, err := getSHA1(ota.Type, data) + if err != nil { return err } - ota.SHA1 = ota.Sum() + + ota.SHA1 = sha1 p.oiByHash[ota.SHA1] = ota } @@ -291,18 +292,12 @@ func (p *Parser) resolveDeltas() error { delete(p.deltas, obj.Offset) } } - - obj.Content = nil } return nil } func (p *Parser) get(o *objectInfo) ([]byte, error) { - if len(o.Content) > 0 { - return o.Content, nil - } - b, ok := p.cache.Get(o.Offset) // If it's not on the cache and is not a delta we can try to find it in the // storage, if there's one. @@ -406,8 +401,6 @@ func (p *Parser) readData(o *objectInfo) ([]byte, error) { return data, nil } - // TODO: skip header. Header size can be calculated with the offset of the - // next offset in the first pass. if _, err := p.scanner.SeekFromStart(o.Offset); err != nil { return nil, err } @@ -431,33 +424,37 @@ func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) { } ota.Type = ota.Parent.Type - ota.Hasher = plumbing.NewHasher(ota.Type, int64(len(patched))) - if _, err := ota.Write(patched); err != nil { + sha1, err := getSHA1(ota.Type, patched) + if err != nil { return nil, err } - ota.SHA1 = ota.Sum() + + ota.SHA1 = sha1 ota.Length = int64(len(patched)) return patched, nil } -type objectInfo struct { - plumbing.Hasher +func getSHA1(t plumbing.ObjectType, data []byte) (plumbing.Hash, error) { + hasher := plumbing.NewHasher(t, int64(len(data))) + if _, err := hasher.Write(data); err != nil { + return plumbing.ZeroHash, err + } + + return hasher.Sum(), nil +} - Offset int64 - Length int64 - HeaderLength int64 - PackSize int64 - Type plumbing.ObjectType - DiskType plumbing.ObjectType +type objectInfo struct { + Offset int64 + Length int64 + Type plumbing.ObjectType + DiskType plumbing.ObjectType Crc32 uint32 Parent *objectInfo Children []*objectInfo SHA1 plumbing.Hash - - Content []byte } func newBaseObject(offset, length int64, t plumbing.ObjectType) *objectInfo { @@ -469,29 +466,18 @@ func newDeltaObject( t plumbing.ObjectType, parent *objectInfo, ) *objectInfo { - children := make([]*objectInfo, 0) - obj := &objectInfo{ - Hasher: plumbing.NewHasher(t, length), Offset: offset, Length: length, - PackSize: 0, Type: t, DiskType: t, Crc32: 0, Parent: parent, - Children: children, } return obj } -func (o *objectInfo) Write(b []byte) (int, error) { - o.Content = make([]byte, len(b)) - copy(o.Content, b) - return o.Hasher.Write(b) -} - func (o *objectInfo) IsDelta() bool { return o.Type.IsDelta() } -- cgit From eb2aa9b2c3bf7af93fd261228be1b96e61c52bcf Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Tue, 14 Aug 2018 16:56:29 +0200 Subject: plumbing/packfile: do not compute sha1 for already undeltified objects Signed-off-by: Javi Fontan --- plumbing/format/packfile/parser.go | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'plumbing') diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go index 3a9c4d7..28582b5 100644 --- a/plumbing/format/packfile/parser.go +++ b/plumbing/format/packfile/parser.go @@ -423,14 +423,16 @@ func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) { return nil, err } - ota.Type = ota.Parent.Type - sha1, err := getSHA1(ota.Type, patched) - if err != nil { - return nil, err - } + if ota.SHA1 == plumbing.ZeroHash { + ota.Type = ota.Parent.Type + sha1, err := getSHA1(ota.Type, patched) + if err != nil { + return nil, err + } - ota.SHA1 = sha1 - ota.Length = int64(len(patched)) + ota.SHA1 = sha1 + ota.Length = int64(len(patched)) + } return patched, nil } -- cgit From ec3d2a817d7cf43696a42d8460c7a8957a12a57b Mon Sep 17 00:00:00 2001 From: Chris Marchesi Date: Thu, 16 Aug 2018 17:41:03 -0700 Subject: plumbing: object, Don't add new line at end of commit signature The way that commit signatures were being written out was causing an extra newline to be written at the end of the commit when the message encoding was already taking care of this. Ultimately, this results in a corrupt object, rendering the object unverifiable with the signature in the commit. Signed-off-by: Chris Marchesi --- plumbing/object/commit.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'plumbing') diff --git a/plumbing/object/commit.go b/plumbing/object/commit.go index b1c0e01..00ae3f1 100644 --- a/plumbing/object/commit.go +++ b/plumbing/object/commit.go @@ -263,18 +263,18 @@ func (b *Commit) encode(o plumbing.EncodedObject, includeSig bool) (err error) { } if b.PGPSignature != "" && includeSig { - if _, err = fmt.Fprint(w, "\n"+headerpgp); err != nil { + if _, err = fmt.Fprint(w, "\n"+headerpgp+" "); err != nil { return err } - // Split all the signature lines and write with a left padding and - // newline at the end. + // Split all the signature lines and re-write with a left padding and + // newline. Use join for this so it's clear that a newline should not be + // added after this section, as it will be added when the message is + // printed. signature := strings.TrimSuffix(b.PGPSignature, "\n") lines := strings.Split(signature, "\n") - for _, line := range lines { - if _, err = fmt.Fprintf(w, " %s\n", line); err != nil { - return err - } + if _, err = fmt.Fprint(w, strings.Join(lines, "\n ")); err != nil { + return err } } -- cgit From 166623633e285e17b0582443c9d03b842b6370fa Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Fri, 17 Aug 2018 18:52:18 +0200 Subject: object: fix panic when reading object header When the first line of the pgp signature is an empty line or some header is malformed it crashes as there's no data for the header element. For example, if author name is "\n". Signed-off-by: Javi Fontan --- plumbing/object/commit.go | 16 +++++++++++----- plumbing/object/commit_test.go | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 5 deletions(-) (limited to 'plumbing') diff --git a/plumbing/object/commit.go b/plumbing/object/commit.go index 00ae3f1..e254342 100644 --- a/plumbing/object/commit.go +++ b/plumbing/object/commit.go @@ -199,17 +199,23 @@ func (c *Commit) Decode(o plumbing.EncodedObject) (err error) { } split := bytes.SplitN(line, []byte{' '}, 2) + + var data []byte + if len(split) == 2 { + data = split[1] + } + switch string(split[0]) { case "tree": - c.TreeHash = plumbing.NewHash(string(split[1])) + c.TreeHash = plumbing.NewHash(string(data)) case "parent": - c.ParentHashes = append(c.ParentHashes, plumbing.NewHash(string(split[1]))) + c.ParentHashes = append(c.ParentHashes, plumbing.NewHash(string(data))) case "author": - c.Author.Decode(split[1]) + c.Author.Decode(data) case "committer": - c.Committer.Decode(split[1]) + c.Committer.Decode(data) case headerpgp: - c.PGPSignature += string(split[1]) + "\n" + c.PGPSignature += string(data) + "\n" pgpsig = true } } else { diff --git a/plumbing/object/commit_test.go b/plumbing/object/commit_test.go index b5dfbe3..e72b703 100644 --- a/plumbing/object/commit_test.go +++ b/plumbing/object/commit_test.go @@ -325,6 +325,22 @@ RUysgqjcpT8+iQM1PblGfHR4XAhuOqN5Fx06PSaFZhqvWFezJ28/CLyX5q+oIVk= c.Assert(err, IsNil) c.Assert(decoded.PGPSignature, Equals, pgpsignature) + // signature with extra empty line, it caused "index out of range" when + // parsing it + + pgpsignature2 := "\n" + pgpsignature + + commit.PGPSignature = pgpsignature2 + encoded = &plumbing.MemoryObject{} + decoded = &Commit{} + + err = commit.Encode(encoded) + c.Assert(err, IsNil) + + err = decoded.Decode(encoded) + c.Assert(err, IsNil) + c.Assert(decoded.PGPSignature, Equals, pgpsignature2) + // signature in author name commit.PGPSignature = "" @@ -461,3 +477,21 @@ func (s *SuiteCommit) TestPatchCancel(c *C) { c.Assert(err, ErrorMatches, "operation canceled") } + +func (s *SuiteCommit) TestMalformedHeader(c *C) { + encoded := &plumbing.MemoryObject{} + decoded := &Commit{} + commit := *s.Commit + + commit.PGPSignature = "\n" + commit.Author.Name = "\n" + commit.Author.Email = "\n" + commit.Committer.Name = "\n" + commit.Committer.Email = "\n" + + err := commit.Encode(encoded) + c.Assert(err, IsNil) + + err = decoded.Decode(encoded) + c.Assert(err, IsNil) +} -- cgit From 7b4a8379327653167e87e9e46a2d397f8fd9cfc8 Mon Sep 17 00:00:00 2001 From: Fedor Korotkov Date: Sun, 19 Aug 2018 11:51:21 -0400 Subject: Fixed an edge case for .gitignore Fixes #923 Signed-off-by: Fedor Korotkov --- plumbing/format/gitignore/pattern.go | 3 +++ plumbing/format/gitignore/pattern_test.go | 6 ++++++ 2 files changed, 9 insertions(+) (limited to 'plumbing') diff --git a/plumbing/format/gitignore/pattern.go b/plumbing/format/gitignore/pattern.go index 2603352..098cb50 100644 --- a/plumbing/format/gitignore/pattern.go +++ b/plumbing/format/gitignore/pattern.go @@ -133,6 +133,9 @@ func (p *pattern) globMatch(path []string, isDir bool) bool { } else if match { matched = true break + } else if len(path) == 0 { + // if nothing left then fail + matched = false } } } else { diff --git a/plumbing/format/gitignore/pattern_test.go b/plumbing/format/gitignore/pattern_test.go index f94cef3..c410442 100644 --- a/plumbing/format/gitignore/pattern_test.go +++ b/plumbing/format/gitignore/pattern_test.go @@ -281,3 +281,9 @@ func (s *PatternSuite) TestGlobMatch_wrongPattern_onTraversal_mismatch(c *C) { r := p.Match([]string{"value", "head", "vol["}, false) c.Assert(r, Equals, NoMatch) } + +func (s *PatternSuite) TestGlobMatch_issue_923(c *C) { + p := ParsePattern("**/android/**/GeneratedPluginRegistrant.java", nil) + r := p.Match([]string{"packages", "flutter_tools", "lib", "src", "android", "gradle.dart"}, false) + c.Assert(r, Equals, NoMatch) +} -- cgit From f84c6b194f2eced0c068b9cdc9264d30e6d2021b Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Tue, 21 Aug 2018 17:35:52 +0200 Subject: plumbing/idxfile: object iterators returns entries in offset order In the latest change the order was changed from offset order in packfiles to hash order. This makes reading all the objects not as efficient as before. It also created problems when the previous order was expected. Also added EntriesByOffset to indexes. Signed-off-by: Javi Fontan --- plumbing/format/idxfile/idxfile.go | 69 +++++++++++++++++++++++++++++++++ plumbing/format/idxfile/idxfile_test.go | 15 +++++++ plumbing/format/packfile/packfile.go | 2 +- 3 files changed, 85 insertions(+), 1 deletion(-) (limited to 'plumbing') diff --git a/plumbing/format/idxfile/idxfile.go b/plumbing/format/idxfile/idxfile.go index c977bee..5fed278 100644 --- a/plumbing/format/idxfile/idxfile.go +++ b/plumbing/format/idxfile/idxfile.go @@ -3,6 +3,7 @@ package idxfile import ( "bytes" "io" + "sort" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/utils/binary" @@ -34,6 +35,9 @@ type Index interface { Count() (int64, error) // Entries returns an iterator to retrieve all index entries. Entries() (EntryIter, error) + // EntriesByOffset returns an iterator to retrieve all index entries ordered + // by offset. + EntriesByOffset() (EntryIter, error) } // MemoryIndex is the in memory representation of an idx file. @@ -215,6 +219,36 @@ func (idx *MemoryIndex) Entries() (EntryIter, error) { return &idxfileEntryIter{idx, 0, 0, 0}, nil } +// EntriesByOffset implements the Index interface. +func (idx *MemoryIndex) EntriesByOffset() (EntryIter, error) { + count, err := idx.Count() + if err != nil { + return nil, err + } + + iter := &idxfileEntryOffsetIter{ + entries: make(entriesByOffset, count), + } + + entries, err := idx.Entries() + if err != nil { + return nil, err + } + + for pos := 0; int64(pos) < count; pos++ { + entry, err := entries.Next() + if err != nil { + return nil, err + } + + iter.entries[pos] = entry + } + + sort.Sort(iter.entries) + + return iter, nil +} + // EntryIter is an iterator that will return the entries in a packfile index. type EntryIter interface { // Next returns the next entry in the packfile index. @@ -276,3 +310,38 @@ type Entry struct { CRC32 uint32 Offset uint64 } + +type idxfileEntryOffsetIter struct { + entries entriesByOffset + pos int +} + +func (i *idxfileEntryOffsetIter) Next() (*Entry, error) { + if i.pos >= len(i.entries) { + return nil, io.EOF + } + + entry := i.entries[i.pos] + i.pos++ + + return entry, nil +} + +func (i *idxfileEntryOffsetIter) Close() error { + i.pos = len(i.entries) + 1 + return nil +} + +type entriesByOffset []*Entry + +func (o entriesByOffset) Len() int { + return len(o) +} + +func (o entriesByOffset) Less(i int, j int) bool { + return o[i].Offset < o[j].Offset +} + +func (o entriesByOffset) Swap(i int, j int) { + o[i], o[j] = o[j], o[i] +} diff --git a/plumbing/format/idxfile/idxfile_test.go b/plumbing/format/idxfile/idxfile_test.go index d15accf..0e0ca2a 100644 --- a/plumbing/format/idxfile/idxfile_test.go +++ b/plumbing/format/idxfile/idxfile_test.go @@ -115,6 +115,21 @@ func (s *IndexSuite) TestFindHash(c *C) { } } +func (s *IndexSuite) TestEntriesByOffset(c *C) { + idx, err := fixtureIndex() + c.Assert(err, IsNil) + + entries, err := idx.EntriesByOffset() + c.Assert(err, IsNil) + + for _, pos := range fixtureOffsets { + e, err := entries.Next() + c.Assert(err, IsNil) + + c.Assert(e.Offset, Equals, uint64(pos)) + } +} + var fixtureHashes = []plumbing.Hash{ plumbing.NewHash("303953e5aa461c203a324821bc1717f9b4fff895"), plumbing.NewHash("5296768e3d9f661387ccbff18c4dea6c997fd78c"), diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index 5feb781..18fcca7 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -394,7 +394,7 @@ func (p *Packfile) GetByType(typ plumbing.ObjectType) (storer.EncodedObjectIter, plumbing.TreeObject, plumbing.CommitObject, plumbing.TagObject: - entries, err := p.Entries() + entries, err := p.EntriesByOffset() if err != nil { return nil, err } -- cgit From 9b73a3ead6559576cb017b09c41c23c251b5af1c Mon Sep 17 00:00:00 2001 From: Chris Marchesi Date: Tue, 21 Aug 2018 17:38:09 -0700 Subject: plumbing: object, correct tag PGP encoding As with the update in ec3d2a8, tag encoding needed to be corrected to ensure extra newlines were not being added in during tag object encoding, so that it did not corrupt the object for verification. Signed-off-by: Chris Marchesi --- plumbing/object/tag.go | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'plumbing') diff --git a/plumbing/object/tag.go b/plumbing/object/tag.go index 905206b..6354973 100644 --- a/plumbing/object/tag.go +++ b/plumbing/object/tag.go @@ -195,13 +195,9 @@ func (t *Tag) encode(o plumbing.EncodedObject, includeSig bool) (err error) { return err } - if t.PGPSignature != "" && includeSig { - // Split all the signature lines and write with a newline at the end. - lines := strings.Split(t.PGPSignature, "\n") - for _, line := range lines { - if _, err = fmt.Fprintf(w, "%s\n", line); err != nil { - return err - } + if includeSig { + if _, err = fmt.Fprint(w, "\n"+t.PGPSignature); err != nil { + return err } } -- cgit From 8c3c8b30b3394677d8eb16b159bfe9b4f61726a8 Mon Sep 17 00:00:00 2001 From: Chris Marchesi Date: Tue, 21 Aug 2018 22:20:09 -0700 Subject: plumbing: object, don't add extra newline on PGP signature Tag encoding/decoding seems to be a lot more sensitive to requiring the exact expected format in the object, which generally includes messages canonicalized so that they have a newline on the end (even if they didn't before). As such, the message should be written with the newline (no need for an extra), and the PGP signature right after that, which will be newline split already, so there's no need to split it again. All of this means it's very important for the caller to send the message in the correct format - which I'm correcting in the next commit. Signed-off-by: Chris Marchesi --- plumbing/object/tag.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'plumbing') diff --git a/plumbing/object/tag.go b/plumbing/object/tag.go index 6354973..03749f9 100644 --- a/plumbing/object/tag.go +++ b/plumbing/object/tag.go @@ -195,8 +195,13 @@ func (t *Tag) encode(o plumbing.EncodedObject, includeSig bool) (err error) { return err } + // Note that this is highly sensitive to what it sent along in the message. + // Message *always* needs to end with a newline, or else the message and the + // signature will be concatenated into a corrupt object. Since this is a + // lower-level method, we assume you know what you are doing and have already + // done the needful on the message in the caller. if includeSig { - if _, err = fmt.Fprint(w, "\n"+t.PGPSignature); err != nil { + if _, err = fmt.Fprint(w, t.PGPSignature); err != nil { return err } } -- cgit From 790191ef92ec6382ce65cc30286c901863b3b7a3 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Wed, 22 Aug 2018 16:46:50 +0200 Subject: plumbing, storage: add bases to the common cache After clone only resolved deltas were added to the cache. This caused slowdowns in small repositories where most objects can be held in cache. It also makes packfiles reuse delta cache from the store. Previously it created a new delta cache each time a packfile object was created. This also slowed down a bit accessing objects and had an impact on memory consumption when bases are added to the cache. Signed-off-by: Javi Fontan --- plumbing/format/packfile/fsobject.go | 10 ++++++++++ plumbing/format/packfile/packfile.go | 15 +++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'plumbing') diff --git a/plumbing/format/packfile/fsobject.go b/plumbing/format/packfile/fsobject.go index 6fd3ca5..330cb73 100644 --- a/plumbing/format/packfile/fsobject.go +++ b/plumbing/format/packfile/fsobject.go @@ -47,6 +47,16 @@ func NewFSObject( // Reader implements the plumbing.EncodedObject interface. func (o *FSObject) Reader() (io.ReadCloser, error) { + obj, ok := o.cache.Get(o.hash) + if ok { + reader, err := obj.Reader() + if err != nil { + return nil, err + } + + return reader, nil + } + f, err := o.fs.Open(o.path) if err != nil { return nil, err diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index 18fcca7..852a834 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -258,6 +258,19 @@ func (p *Packfile) nextObject() (plumbing.EncodedObject, error) { } func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) { + ref, err := p.FindHash(offset) + if err == nil { + obj, ok := p.cacheGet(ref) + if ok { + reader, err := obj.Reader() + if err != nil { + return nil, err + } + + return reader, nil + } + } + if _, err := p.s.SeekFromStart(offset); err != nil { return nil, err } @@ -306,6 +319,8 @@ func (p *Packfile) fillRegularObjectContent(obj plumbing.EncodedObject) error { } _, _, err = p.s.NextObject(w) + p.cachePut(obj) + return err } -- cgit From ba3ee05efbdeb11364d585ec4dfa84fe07e64430 Mon Sep 17 00:00:00 2001 From: Taru Karttunen Date: Wed, 29 Aug 2018 12:43:23 +0000 Subject: plumbing: object: Clamp object timestamps before unix epoch to unix epoch Signed-off-by: Taru Karttunen --- plumbing/object/object.go | 6 +++++- plumbing/object/tag_test.go | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'plumbing') diff --git a/plumbing/object/object.go b/plumbing/object/object.go index 4b59aba..e960e50 100644 --- a/plumbing/object/object.go +++ b/plumbing/object/object.go @@ -152,7 +152,11 @@ func (s *Signature) decodeTimeAndTimeZone(b []byte) { } func (s *Signature) encodeTimeAndTimeZone(w io.Writer) error { - _, err := fmt.Fprintf(w, "%d %s", s.When.Unix(), s.When.Format("-0700")) + u := s.When.Unix() + if u < 0 { + u = 0 + } + _, err := fmt.Fprintf(w, "%d %s", u, s.When.Format("-0700")) return err } diff --git a/plumbing/object/tag_test.go b/plumbing/object/tag_test.go index 9900093..e7dd06e 100644 --- a/plumbing/object/tag_test.go +++ b/plumbing/object/tag_test.go @@ -265,7 +265,7 @@ func (s *TagSuite) TestStringNonCommit(c *C) { c.Assert(tag.String(), Equals, "tag TAG TWO\n"+ "Tagger: <>\n"+ - "Date: Mon Jan 01 00:00:00 0001 +0000\n"+ + "Date: Thu Jan 01 00:00:00 1970 +0000\n"+ "\n"+ "tag two\n") } -- cgit From a4b12e4161738af6f724776c0c8c55f90542f06f Mon Sep 17 00:00:00 2001 From: Kuba Podgórski Date: Fri, 7 Sep 2018 10:25:23 +0200 Subject: plumbing/transport: ssh check if list of known_hosts files is empty Signed-off-by: kuba-- --- plumbing/transport/ssh/auth_method.go | 14 ++++--- plumbing/transport/ssh/auth_method_test.go | 62 +++++++++++++++++++++++++++++- 2 files changed, 69 insertions(+), 7 deletions(-) (limited to 'plumbing') diff --git a/plumbing/transport/ssh/auth_method.go b/plumbing/transport/ssh/auth_method.go index 84cfab2..dbb47c5 100644 --- a/plumbing/transport/ssh/auth_method.go +++ b/plumbing/transport/ssh/auth_method.go @@ -236,7 +236,7 @@ func (a *PublicKeysCallback) ClientConfig() (*ssh.ClientConfig, error) { // NewKnownHostsCallback returns ssh.HostKeyCallback based on a file based on a // known_hosts file. http://man.openbsd.org/sshd#SSH_KNOWN_HOSTS_FILE_FORMAT // -// If files is empty, the list of files will be read from the SSH_KNOWN_HOSTS +// If list of files is empty, then it will be read from the SSH_KNOWN_HOSTS // environment variable, example: // /home/foo/custom_known_hosts_file:/etc/custom_known/hosts_file // @@ -244,13 +244,15 @@ func (a *PublicKeysCallback) ClientConfig() (*ssh.ClientConfig, error) { // ~/.ssh/known_hosts // /etc/ssh/ssh_known_hosts func NewKnownHostsCallback(files ...string) (ssh.HostKeyCallback, error) { - files, err := getDefaultKnownHostsFiles() - if err != nil { - return nil, err + var err error + + if len(files) == 0 { + if files, err = getDefaultKnownHostsFiles(); err != nil { + return nil, err + } } - files, err = filterKnownHostsFiles(files...) - if err != nil { + if files, err = filterKnownHostsFiles(files...); err != nil { return nil, err } diff --git a/plumbing/transport/ssh/auth_method_test.go b/plumbing/transport/ssh/auth_method_test.go index 0025669..0cde61e 100644 --- a/plumbing/transport/ssh/auth_method_test.go +++ b/plumbing/transport/ssh/auth_method_test.go @@ -1,16 +1,30 @@ package ssh import ( + "bufio" "fmt" "io/ioutil" "os" + "strings" + "golang.org/x/crypto/ssh" "golang.org/x/crypto/ssh/testdata" . "gopkg.in/check.v1" ) -type SuiteCommon struct{} +type ( + SuiteCommon struct{} + + mockKnownHosts struct{} +) + +func (mockKnownHosts) host() string { return "github.com" } +func (mockKnownHosts) knownHosts() []byte { + return []byte(`github.com ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ==`) +} +func (mockKnownHosts) Network() string { return "tcp" } +func (mockKnownHosts) String() string { return "github.com:22" } var _ = Suite(&SuiteCommon{}) @@ -149,3 +163,49 @@ func (*SuiteCommon) TestNewPublicKeysWithInvalidPEM(c *C) { c.Assert(err, NotNil) c.Assert(auth, IsNil) } + +func (*SuiteCommon) TestNewKnownHostsCallback(c *C) { + var mock = mockKnownHosts{} + + f, err := ioutil.TempFile("", "known-hosts") + c.Assert(err, IsNil) + + _, err = f.Write(mock.knownHosts()) + c.Assert(err, IsNil) + + err = f.Close() + c.Assert(err, IsNil) + + defer os.RemoveAll(f.Name()) + + f, err = os.Open(f.Name()) + c.Assert(err, IsNil) + + defer f.Close() + + var hostKey ssh.PublicKey + scanner := bufio.NewScanner(f) + for scanner.Scan() { + fields := strings.Split(scanner.Text(), " ") + if len(fields) != 3 { + continue + } + if strings.Contains(fields[0], mock.host()) { + var err error + hostKey, _, _, _, err = ssh.ParseAuthorizedKey(scanner.Bytes()) + if err != nil { + c.Fatalf("error parsing %q: %v", fields[2], err) + } + break + } + } + if hostKey == nil { + c.Fatalf("no hostkey for %s", mock.host()) + } + + clb, err := NewKnownHostsCallback(f.Name()) + c.Assert(err, IsNil) + + err = clb(mock.String(), mock, hostKey) + c.Assert(err, IsNil) +} -- cgit From 80170bd73d5d6298ea6d40c66987fcde8148f1e8 Mon Sep 17 00:00:00 2001 From: Antonio Jesus Navarro Perez Date: Fri, 7 Sep 2018 10:50:31 +0200 Subject: Fix fatal corrupt patch in unified diff format Signed-off-by: Antonio Jesus Navarro Perez --- plumbing/format/diff/unified_encoder.go | 8 ++++-- plumbing/format/diff/unified_encoder_test.go | 37 ++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) (limited to 'plumbing') diff --git a/plumbing/format/diff/unified_encoder.go b/plumbing/format/diff/unified_encoder.go index 58edd95..8bd6d8a 100644 --- a/plumbing/format/diff/unified_encoder.go +++ b/plumbing/format/diff/unified_encoder.go @@ -237,9 +237,13 @@ func (c *hunksGenerator) addLineNumbers(la, lb int, linesBefore int, i int, op O // we need to search for a reference for the next diff switch { case linesBefore != 0 && c.ctxLines != 0: - clb = lb - c.ctxLines + 1 + if lb > c.ctxLines { + clb = lb - c.ctxLines + 1 + } else { + clb = 1 + } case c.ctxLines == 0: - clb = lb - c.ctxLines + clb = lb case i != len(c.chunks)-1: next := c.chunks[i+1] if next.Type() == op || next.Type() == Equal { diff --git a/plumbing/format/diff/unified_encoder_test.go b/plumbing/format/diff/unified_encoder_test.go index 0e419ca..7736af1 100644 --- a/plumbing/format/diff/unified_encoder_test.go +++ b/plumbing/format/diff/unified_encoder_test.go @@ -150,6 +150,43 @@ var oneChunkPatchInverted Patch = testPatch{ } var fixtures []*fixture = []*fixture{{ + patch: testPatch{ + message: "", + filePatches: []testFilePatch{{ + from: &testFile{ + mode: filemode.Regular, + path: "README.md", + seed: "hello\nworld\n", + }, + to: &testFile{ + mode: filemode.Regular, + path: "README.md", + seed: "hello\nbug\n", + }, + chunks: []testChunk{{ + content: "hello", + op: Equal, + }, { + content: "world", + op: Delete, + }, { + content: "bug", + op: Add, + }}, + }}, + }, + desc: "positive negative number", + context: 2, + diff: `diff --git a/README.md b/README.md +index 94954abda49de8615a048f8d2e64b5de848e27a1..f3dad9514629b9ff9136283ae331ad1fc95748a8 100644 +--- a/README.md ++++ b/README.md +@@ -1,2 +1,2 @@ + hello +-world ++bug +`, +}, { patch: testPatch{ message: "", filePatches: []testFilePatch{{ -- cgit From 8f6b3127c1ff7661113fff2662416c328971a285 Mon Sep 17 00:00:00 2001 From: kuba-- Date: Fri, 7 Sep 2018 09:27:35 +0200 Subject: Expose Storage cache. Signed-off-by: kuba-- --- plumbing/format/packfile/encoder_advanced_test.go | 7 +++---- plumbing/object/change_adaptor_test.go | 4 ++-- plumbing/object/change_test.go | 7 +++---- plumbing/object/commit_test.go | 4 ++-- plumbing/object/difftree_test.go | 4 ++-- plumbing/object/file_test.go | 17 ++++++----------- plumbing/object/object_test.go | 4 ++-- plumbing/object/patch_test.go | 5 +++-- plumbing/object/tag_test.go | 5 ++--- plumbing/object/tree_test.go | 4 ++-- plumbing/revlist/revlist_test.go | 12 +++++------- plumbing/transport/server/loader.go | 3 ++- plumbing/transport/server/server_test.go | 4 ++-- 13 files changed, 36 insertions(+), 44 deletions(-) (limited to 'plumbing') diff --git a/plumbing/format/packfile/encoder_advanced_test.go b/plumbing/format/packfile/encoder_advanced_test.go index fc1419e..e15126e 100644 --- a/plumbing/format/packfile/encoder_advanced_test.go +++ b/plumbing/format/packfile/encoder_advanced_test.go @@ -8,6 +8,7 @@ import ( "gopkg.in/src-d/go-billy.v4/memfs" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" . "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" "gopkg.in/src-d/go-git.v4/plumbing/storer" @@ -32,8 +33,7 @@ func (s *EncoderAdvancedSuite) TestEncodeDecode(c *C) { fixs = append(fixs, fixtures.ByURL("https://github.com/src-d/go-git.git"). ByTag("packfile").ByTag(".git").One()) fixs.Test(c, func(f *fixtures.Fixture) { - storage, err := filesystem.NewStorage(f.DotGit()) - c.Assert(err, IsNil) + storage := filesystem.NewStorage(f.DotGit(), cache.NewObjectLRUDefault()) s.testEncodeDecode(c, storage, 10) }) } @@ -47,8 +47,7 @@ func (s *EncoderAdvancedSuite) TestEncodeDecodeNoDeltaCompression(c *C) { fixs = append(fixs, fixtures.ByURL("https://github.com/src-d/go-git.git"). ByTag("packfile").ByTag(".git").One()) fixs.Test(c, func(f *fixtures.Fixture) { - storage, err := filesystem.NewStorage(f.DotGit()) - c.Assert(err, IsNil) + storage := filesystem.NewStorage(f.DotGit(), cache.NewObjectLRUDefault()) s.testEncodeDecode(c, storage, 0) }) } diff --git a/plumbing/object/change_adaptor_test.go b/plumbing/object/change_adaptor_test.go index 803c3b8..c7c003b 100644 --- a/plumbing/object/change_adaptor_test.go +++ b/plumbing/object/change_adaptor_test.go @@ -4,6 +4,7 @@ import ( "sort" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" "gopkg.in/src-d/go-git.v4/plumbing/filemode" "gopkg.in/src-d/go-git.v4/plumbing/storer" "gopkg.in/src-d/go-git.v4/storage/filesystem" @@ -23,8 +24,7 @@ type ChangeAdaptorSuite struct { func (s *ChangeAdaptorSuite) SetUpSuite(c *C) { s.Suite.SetUpSuite(c) s.Fixture = fixtures.Basic().One() - sto, err := filesystem.NewStorage(s.Fixture.DotGit()) - c.Assert(err, IsNil) + sto := filesystem.NewStorage(s.Fixture.DotGit(), cache.NewObjectLRUDefault()) s.Storer = sto } diff --git a/plumbing/object/change_test.go b/plumbing/object/change_test.go index b0e89c7..e2f0a23 100644 --- a/plumbing/object/change_test.go +++ b/plumbing/object/change_test.go @@ -5,6 +5,7 @@ import ( "sort" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" "gopkg.in/src-d/go-git.v4/plumbing/filemode" "gopkg.in/src-d/go-git.v4/plumbing/format/diff" "gopkg.in/src-d/go-git.v4/plumbing/storer" @@ -25,8 +26,7 @@ func (s *ChangeSuite) SetUpSuite(c *C) { s.Suite.SetUpSuite(c) s.Fixture = fixtures.ByURL("https://github.com/src-d/go-git.git"). ByTag(".git").One() - sto, err := filesystem.NewStorage(s.Fixture.DotGit()) - c.Assert(err, IsNil) + sto := filesystem.NewStorage(s.Fixture.DotGit(), cache.NewObjectLRUDefault()) s.Storer = sto } @@ -253,8 +253,7 @@ func (s *ChangeSuite) TestNoFileFilemodes(c *C) { s.Suite.SetUpSuite(c) f := fixtures.ByURL("https://github.com/git-fixtures/submodule.git").One() - sto, err := filesystem.NewStorage(f.DotGit()) - c.Assert(err, IsNil) + sto := filesystem.NewStorage(f.DotGit(), cache.NewObjectLRUDefault()) iter, err := sto.IterEncodedObjects(plumbing.AnyObject) c.Assert(err, IsNil) diff --git a/plumbing/object/commit_test.go b/plumbing/object/commit_test.go index e72b703..c9acf42 100644 --- a/plumbing/object/commit_test.go +++ b/plumbing/object/commit_test.go @@ -8,6 +8,7 @@ import ( "time" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" . "gopkg.in/check.v1" "gopkg.in/src-d/go-git-fixtures.v3" @@ -247,8 +248,7 @@ func (s *SuiteCommit) TestStringMultiLine(c *C) { hash := plumbing.NewHash("e7d896db87294e33ca3202e536d4d9bb16023db3") f := fixtures.ByURL("https://github.com/src-d/go-git.git").One() - sto, err := filesystem.NewStorage(f.DotGit()) - c.Assert(err, IsNil) + sto := filesystem.NewStorage(f.DotGit(), cache.NewObjectLRUDefault()) o, err := sto.EncodedObject(plumbing.CommitObject, hash) c.Assert(err, IsNil) diff --git a/plumbing/object/difftree_test.go b/plumbing/object/difftree_test.go index ff9ecbc..4af8684 100644 --- a/plumbing/object/difftree_test.go +++ b/plumbing/object/difftree_test.go @@ -4,6 +4,7 @@ import ( "sort" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" "gopkg.in/src-d/go-git.v4/plumbing/filemode" "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" "gopkg.in/src-d/go-git.v4/plumbing/storer" @@ -25,8 +26,7 @@ type DiffTreeSuite struct { func (s *DiffTreeSuite) SetUpSuite(c *C) { s.Suite.SetUpSuite(c) s.Fixture = fixtures.Basic().One() - sto, err := filesystem.NewStorage(s.Fixture.DotGit()) - c.Assert(err, IsNil) + sto := filesystem.NewStorage(s.Fixture.DotGit(), cache.NewObjectLRUDefault()) s.Storer = sto s.cache = make(map[string]storer.EncodedObjectStorer) } diff --git a/plumbing/object/file_test.go b/plumbing/object/file_test.go index edb82d0..4b92749 100644 --- a/plumbing/object/file_test.go +++ b/plumbing/object/file_test.go @@ -4,6 +4,7 @@ import ( "io" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" "gopkg.in/src-d/go-git.v4/plumbing/filemode" "gopkg.in/src-d/go-git.v4/plumbing/storer" "gopkg.in/src-d/go-git.v4/storage/filesystem" @@ -44,8 +45,7 @@ var fileIterTests = []struct { func (s *FileSuite) TestIter(c *C) { for i, t := range fileIterTests { f := fixtures.ByURL(t.repo).One() - sto, err := filesystem.NewStorage(f.DotGit()) - c.Assert(err, IsNil) + sto := filesystem.NewStorage(f.DotGit(), cache.NewObjectLRUDefault()) h := plumbing.NewHash(t.commit) commit, err := GetCommit(sto, h) @@ -106,8 +106,7 @@ hs_err_pid* func (s *FileSuite) TestContents(c *C) { for i, t := range contentsTests { f := fixtures.ByURL(t.repo).One() - sto, err := filesystem.NewStorage(f.DotGit()) - c.Assert(err, IsNil) + sto := filesystem.NewStorage(f.DotGit(), cache.NewObjectLRUDefault()) h := plumbing.NewHash(t.commit) commit, err := GetCommit(sto, h) @@ -160,8 +159,7 @@ var linesTests = []struct { func (s *FileSuite) TestLines(c *C) { for i, t := range linesTests { f := fixtures.ByURL(t.repo).One() - sto, err := filesystem.NewStorage(f.DotGit()) - c.Assert(err, IsNil) + sto := filesystem.NewStorage(f.DotGit(), cache.NewObjectLRUDefault()) h := plumbing.NewHash(t.commit) commit, err := GetCommit(sto, h) @@ -195,8 +193,7 @@ var ignoreEmptyDirEntriesTests = []struct { func (s *FileSuite) TestIgnoreEmptyDirEntries(c *C) { for i, t := range ignoreEmptyDirEntriesTests { f := fixtures.ByURL(t.repo).One() - sto, err := filesystem.NewStorage(f.DotGit()) - c.Assert(err, IsNil) + sto := filesystem.NewStorage(f.DotGit(), cache.NewObjectLRUDefault()) h := plumbing.NewHash(t.commit) commit, err := GetCommit(sto, h) @@ -251,9 +248,7 @@ func (s *FileSuite) TestFileIter(c *C) { func (s *FileSuite) TestFileIterSubmodule(c *C) { dotgit := fixtures.ByURL("https://github.com/git-fixtures/submodule.git").One().DotGit() - st, err := filesystem.NewStorage(dotgit) - - c.Assert(err, IsNil) + st := filesystem.NewStorage(dotgit, cache.NewObjectLRUDefault()) hash := plumbing.NewHash("b685400c1f9316f350965a5993d350bc746b0bf4") commit, err := GetCommit(st, hash) diff --git a/plumbing/object/object_test.go b/plumbing/object/object_test.go index 68aa1a1..8f0eede 100644 --- a/plumbing/object/object_test.go +++ b/plumbing/object/object_test.go @@ -7,6 +7,7 @@ import ( "time" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" "gopkg.in/src-d/go-git.v4/plumbing/filemode" "gopkg.in/src-d/go-git.v4/plumbing/storer" "gopkg.in/src-d/go-git.v4/storage/filesystem" @@ -26,8 +27,7 @@ type BaseObjectsSuite struct { func (s *BaseObjectsSuite) SetUpSuite(c *C) { s.Suite.SetUpSuite(c) s.Fixture = fixtures.Basic().One() - storer, err := filesystem.NewStorage(s.Fixture.DotGit()) - c.Assert(err, IsNil) + storer := filesystem.NewStorage(s.Fixture.DotGit(), cache.NewObjectLRUDefault()) s.Storer = storer } diff --git a/plumbing/object/patch_test.go b/plumbing/object/patch_test.go index 8eb65ec..47057fb 100644 --- a/plumbing/object/patch_test.go +++ b/plumbing/object/patch_test.go @@ -4,6 +4,7 @@ import ( . "gopkg.in/check.v1" fixtures "gopkg.in/src-d/go-git-fixtures.v3" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" "gopkg.in/src-d/go-git.v4/storage/filesystem" ) @@ -14,8 +15,8 @@ type PatchSuite struct { var _ = Suite(&PatchSuite{}) func (s *PatchSuite) TestStatsWithSubmodules(c *C) { - storer, err := filesystem.NewStorage( - fixtures.ByURL("https://github.com/git-fixtures/submodule.git").One().DotGit()) + storer := filesystem.NewStorage( + fixtures.ByURL("https://github.com/git-fixtures/submodule.git").One().DotGit(), cache.NewObjectLRUDefault()) commit, err := GetCommit(storer, plumbing.NewHash("b685400c1f9316f350965a5993d350bc746b0bf4")) diff --git a/plumbing/object/tag_test.go b/plumbing/object/tag_test.go index e7dd06e..59c28b0 100644 --- a/plumbing/object/tag_test.go +++ b/plumbing/object/tag_test.go @@ -7,6 +7,7 @@ import ( "time" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" "gopkg.in/src-d/go-git.v4/storage/filesystem" "gopkg.in/src-d/go-git.v4/storage/memory" @@ -22,9 +23,7 @@ var _ = Suite(&TagSuite{}) func (s *TagSuite) SetUpSuite(c *C) { s.BaseObjectsSuite.SetUpSuite(c) - storer, err := filesystem.NewStorage( - fixtures.ByURL("https://github.com/git-fixtures/tags.git").One().DotGit()) - c.Assert(err, IsNil) + storer := filesystem.NewStorage(fixtures.ByURL("https://github.com/git-fixtures/tags.git").One().DotGit(), cache.NewObjectLRUDefault()) s.Storer = storer } diff --git a/plumbing/object/tree_test.go b/plumbing/object/tree_test.go index 59d5d21..7366421 100644 --- a/plumbing/object/tree_test.go +++ b/plumbing/object/tree_test.go @@ -5,6 +5,7 @@ import ( "io" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" "gopkg.in/src-d/go-git.v4/plumbing/filemode" "gopkg.in/src-d/go-git.v4/plumbing/storer" "gopkg.in/src-d/go-git.v4/storage/filesystem" @@ -341,8 +342,7 @@ func (s *TreeSuite) TestTreeWalkerNextNonRecursive(c *C) { func (s *TreeSuite) TestTreeWalkerNextSubmodule(c *C) { dotgit := fixtures.ByURL("https://github.com/git-fixtures/submodule.git").One().DotGit() - st, err := filesystem.NewStorage(dotgit) - c.Assert(err, IsNil) + st := filesystem.NewStorage(dotgit, cache.NewObjectLRUDefault()) hash := plumbing.NewHash("b685400c1f9316f350965a5993d350bc746b0bf4") commit, err := GetCommit(st, hash) diff --git a/plumbing/revlist/revlist_test.go b/plumbing/revlist/revlist_test.go index 55d9bca..dea1c73 100644 --- a/plumbing/revlist/revlist_test.go +++ b/plumbing/revlist/revlist_test.go @@ -4,6 +4,7 @@ import ( "testing" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" "gopkg.in/src-d/go-git.v4/plumbing/object" "gopkg.in/src-d/go-git.v4/plumbing/storer" "gopkg.in/src-d/go-git.v4/storage/filesystem" @@ -51,8 +52,7 @@ const ( func (s *RevListSuite) SetUpTest(c *C) { s.Suite.SetUpSuite(c) - sto, err := filesystem.NewStorage(fixtures.Basic().One().DotGit()) - c.Assert(err, IsNil) + sto := filesystem.NewStorage(fixtures.Basic().One().DotGit(), cache.NewObjectLRUDefault()) s.Storer = sto } @@ -67,8 +67,7 @@ func (s *RevListSuite) TestRevListObjects_Submodules(c *C) { "6ecf0ef2c2dffb796033e5a02219af86ec6584e5": true, } - sto, err := filesystem.NewStorage(fixtures.ByTag("submodule").One().DotGit()) - c.Assert(err, IsNil) + sto := filesystem.NewStorage(fixtures.ByTag("submodule").One().DotGit(), cache.NewObjectLRUDefault()) ref, err := storer.ResolveReference(sto, plumbing.HEAD) c.Assert(err, IsNil) @@ -109,10 +108,9 @@ func (s *RevListSuite) TestRevListObjects(c *C) { } func (s *RevListSuite) TestRevListObjectsTagObject(c *C) { - sto, err := filesystem.NewStorage( + sto := filesystem.NewStorage( fixtures.ByTag("tags"). - ByURL("https://github.com/git-fixtures/tags.git").One().DotGit()) - c.Assert(err, IsNil) + ByURL("https://github.com/git-fixtures/tags.git").One().DotGit(), cache.NewObjectLRUDefault()) expected := map[string]bool{ "70846e9a10ef7b41064b40f07713d5b8b9a8fc73": true, diff --git a/plumbing/transport/server/loader.go b/plumbing/transport/server/loader.go index c83752c..13b3526 100644 --- a/plumbing/transport/server/loader.go +++ b/plumbing/transport/server/loader.go @@ -1,6 +1,7 @@ package server import ( + "gopkg.in/src-d/go-git.v4/plumbing/cache" "gopkg.in/src-d/go-git.v4/plumbing/storer" "gopkg.in/src-d/go-git.v4/plumbing/transport" "gopkg.in/src-d/go-git.v4/storage/filesystem" @@ -43,7 +44,7 @@ func (l *fsLoader) Load(ep *transport.Endpoint) (storer.Storer, error) { return nil, transport.ErrRepositoryNotFound } - return filesystem.NewStorage(fs) + return filesystem.NewStorage(fs, cache.NewObjectLRUDefault()), nil } // MapLoader is a Loader that uses a lookup map of storer.Storer by diff --git a/plumbing/transport/server/server_test.go b/plumbing/transport/server/server_test.go index 33d74d1..302ff48 100644 --- a/plumbing/transport/server/server_test.go +++ b/plumbing/transport/server/server_test.go @@ -3,6 +3,7 @@ package server_test import ( "testing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" "gopkg.in/src-d/go-git.v4/plumbing/transport" "gopkg.in/src-d/go-git.v4/plumbing/transport/client" "gopkg.in/src-d/go-git.v4/plumbing/transport/server" @@ -53,8 +54,7 @@ func (s *BaseSuite) prepareRepositories(c *C) { fs := fixtures.Basic().One().DotGit() s.Endpoint, err = transport.NewEndpoint(fs.Root()) c.Assert(err, IsNil) - s.loader[s.Endpoint.String()], err = filesystem.NewStorage(fs) - c.Assert(err, IsNil) + s.loader[s.Endpoint.String()] = filesystem.NewStorage(fs, cache.NewObjectLRUDefault()) s.EmptyEndpoint, err = transport.NewEndpoint("/empty.git") c.Assert(err, IsNil) -- cgit From 4896974b4daf86f53d782c868d408f830f84c294 Mon Sep 17 00:00:00 2001 From: kuba-- Date: Mon, 17 Sep 2018 22:20:50 +0200 Subject: Fix potential LRU cache size issue. Signed-off-by: kuba-- --- plumbing/cache/buffer_lru.go | 24 +++++++++++++----------- plumbing/cache/buffer_test.go | 23 +++++++++++++++++++++++ plumbing/cache/object_lru.go | 24 +++++++++++++----------- plumbing/cache/object_test.go | 19 +++++++++++++++++++ 4 files changed, 68 insertions(+), 22 deletions(-) (limited to 'plumbing') diff --git a/plumbing/cache/buffer_lru.go b/plumbing/cache/buffer_lru.go index f2c0f90..e86ccb2 100644 --- a/plumbing/cache/buffer_lru.go +++ b/plumbing/cache/buffer_lru.go @@ -45,19 +45,25 @@ func (c *BufferLRU) Put(key int64, slice []byte) { c.ll = list.New() } + bufSize := FileSize(len(slice)) if ee, ok := c.cache[key]; ok { + oldBuf := ee.Value.(buffer) + // in this case bufSize is a delta: new size - old size + bufSize -= FileSize(len(oldBuf.Slice)) + c.ll.MoveToFront(ee) ee.Value = buffer{key, slice} - return - } + } else { + if bufSize > c.MaxSize { + return + } - objSize := FileSize(len(slice)) - - if objSize > c.MaxSize { - return + ee := c.ll.PushFront(buffer{key, slice}) + c.cache[key] = ee } - for c.actualSize+objSize > c.MaxSize { + c.actualSize += bufSize + for c.actualSize > c.MaxSize { last := c.ll.Back() lastObj := last.Value.(buffer) lastSize := FileSize(len(lastObj.Slice)) @@ -66,10 +72,6 @@ func (c *BufferLRU) Put(key int64, slice []byte) { delete(c.cache, lastObj.Key) c.actualSize -= lastSize } - - ee := c.ll.PushFront(buffer{key, slice}) - c.cache[key] = ee - c.actualSize += objSize } // Get returns a buffer by its key. It marks the buffer as used. If the buffer diff --git a/plumbing/cache/buffer_test.go b/plumbing/cache/buffer_test.go index 262138a..3e3adc2 100644 --- a/plumbing/cache/buffer_test.go +++ b/plumbing/cache/buffer_test.go @@ -1,6 +1,7 @@ package cache import ( + "bytes" "sync" . "gopkg.in/check.v1" @@ -38,6 +39,28 @@ func (s *BufferSuite) TestPutSameBuffer(c *C) { } } +func (s *ObjectSuite) TestPutSameBufferWithDifferentSize(c *C) { + aBuffer := []byte("a") + bBuffer := []byte("bbb") + cBuffer := []byte("ccccc") + dBuffer := []byte("ddddddd") + + cache := NewBufferLRU(7 * Byte) + cache.Put(1, aBuffer) + cache.Put(1, bBuffer) + cache.Put(1, cBuffer) + cache.Put(1, dBuffer) + + c.Assert(cache.MaxSize, Equals, 7*Byte) + c.Assert(cache.actualSize, Equals, 7*Byte) + c.Assert(cache.ll.Len(), Equals, 1) + + buf, ok := cache.Get(1) + c.Assert(bytes.Equal(buf, dBuffer), Equals, true) + c.Assert(FileSize(len(buf)), Equals, 7*Byte) + c.Assert(ok, Equals, true) +} + func (s *BufferSuite) TestPutBigBuffer(c *C) { for _, o := range s.c { o.Put(1, s.bBuffer) diff --git a/plumbing/cache/object_lru.go b/plumbing/cache/object_lru.go index 0494539..31c0202 100644 --- a/plumbing/cache/object_lru.go +++ b/plumbing/cache/object_lru.go @@ -42,20 +42,26 @@ func (c *ObjectLRU) Put(obj plumbing.EncodedObject) { c.ll = list.New() } + objSize := FileSize(obj.Size()) key := obj.Hash() if ee, ok := c.cache[key]; ok { + oldObj := ee.Value.(plumbing.EncodedObject) + // in this case objSize is a delta: new size - old size + objSize -= FileSize(oldObj.Size()) + c.ll.MoveToFront(ee) ee.Value = obj - return - } - - objSize := FileSize(obj.Size()) + } else { + if objSize > c.MaxSize { + return + } - if objSize > c.MaxSize { - return + ee := c.ll.PushFront(obj) + c.cache[key] = ee } - for c.actualSize+objSize > c.MaxSize { + c.actualSize += objSize + for c.actualSize > c.MaxSize { last := c.ll.Back() lastObj := last.Value.(plumbing.EncodedObject) lastSize := FileSize(lastObj.Size()) @@ -64,10 +70,6 @@ func (c *ObjectLRU) Put(obj plumbing.EncodedObject) { delete(c.cache, lastObj.Hash()) c.actualSize -= lastSize } - - ee := c.ll.PushFront(obj) - c.cache[key] = ee - c.actualSize += objSize } // Get returns an object by its hash. It marks the object as used. If the object diff --git a/plumbing/cache/object_test.go b/plumbing/cache/object_test.go index ac3f0a3..b3e5f79 100644 --- a/plumbing/cache/object_test.go +++ b/plumbing/cache/object_test.go @@ -45,6 +45,25 @@ func (s *ObjectSuite) TestPutSameObject(c *C) { } } +func (s *ObjectSuite) TestPutSameObjectWithDifferentSize(c *C) { + const hash = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + + cache := NewObjectLRU(7 * Byte) + cache.Put(newObject(hash, 1*Byte)) + cache.Put(newObject(hash, 3*Byte)) + cache.Put(newObject(hash, 5*Byte)) + cache.Put(newObject(hash, 7*Byte)) + + c.Assert(cache.MaxSize, Equals, 7*Byte) + c.Assert(cache.actualSize, Equals, 7*Byte) + c.Assert(cache.ll.Len(), Equals, 1) + + obj, ok := cache.Get(plumbing.NewHash(hash)) + c.Assert(obj.Hash(), Equals, plumbing.NewHash(hash)) + c.Assert(FileSize(obj.Size()), Equals, 7*Byte) + c.Assert(ok, Equals, true) +} + func (s *ObjectSuite) TestPutBigObject(c *C) { for _, o := range s.c { o.Put(s.bObject) -- cgit From edfc16e3ea6b0ce2533bacb5f370d042042b4784 Mon Sep 17 00:00:00 2001 From: kuba-- Date: Mon, 17 Sep 2018 23:26:45 +0200 Subject: Remove empty space to trigger windows build. Signed-off-by: kuba-- --- plumbing/cache/buffer_lru.go | 2 -- plumbing/cache/object_lru.go | 2 -- 2 files changed, 4 deletions(-) (limited to 'plumbing') diff --git a/plumbing/cache/buffer_lru.go b/plumbing/cache/buffer_lru.go index e86ccb2..acaf195 100644 --- a/plumbing/cache/buffer_lru.go +++ b/plumbing/cache/buffer_lru.go @@ -50,14 +50,12 @@ func (c *BufferLRU) Put(key int64, slice []byte) { oldBuf := ee.Value.(buffer) // in this case bufSize is a delta: new size - old size bufSize -= FileSize(len(oldBuf.Slice)) - c.ll.MoveToFront(ee) ee.Value = buffer{key, slice} } else { if bufSize > c.MaxSize { return } - ee := c.ll.PushFront(buffer{key, slice}) c.cache[key] = ee } diff --git a/plumbing/cache/object_lru.go b/plumbing/cache/object_lru.go index 31c0202..53d8b02 100644 --- a/plumbing/cache/object_lru.go +++ b/plumbing/cache/object_lru.go @@ -48,14 +48,12 @@ func (c *ObjectLRU) Put(obj plumbing.EncodedObject) { oldObj := ee.Value.(plumbing.EncodedObject) // in this case objSize is a delta: new size - old size objSize -= FileSize(oldObj.Size()) - c.ll.MoveToFront(ee) ee.Value = obj } else { if objSize > c.MaxSize { return } - ee := c.ll.PushFront(obj) c.cache[key] = ee } -- cgit From 156d632a533263091491e9b4a3d9770245fa4af9 Mon Sep 17 00:00:00 2001 From: Jongmin Kim Date: Wed, 26 Sep 2018 23:59:23 +0900 Subject: all: remove extra 's' in "mismatch" Signed-off-by: Jongmin Kim --- plumbing/format/index/decoder.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'plumbing') diff --git a/plumbing/format/index/decoder.go b/plumbing/format/index/decoder.go index 1a58128..df25530 100644 --- a/plumbing/format/index/decoder.go +++ b/plumbing/format/index/decoder.go @@ -21,7 +21,7 @@ var ( // ErrMalformedSignature is returned by Decode when the index header file is // malformed ErrMalformedSignature = errors.New("malformed index signature file") - // ErrInvalidChecksum is returned by Decode if the SHA1 hash missmatch with + // ErrInvalidChecksum is returned by Decode if the SHA1 hash mismatch with // the read content ErrInvalidChecksum = errors.New("invalid checksum") -- cgit From 37f80c63cc19d8f224d7c5eb0338594b9cef3838 Mon Sep 17 00:00:00 2001 From: "Santiago M. Mola" Date: Thu, 27 Sep 2018 09:27:48 +0200 Subject: test: improve test for urlencoded user:pass Signed-off-by: Santiago M. Mola --- plumbing/transport/common_test.go | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'plumbing') diff --git a/plumbing/transport/common_test.go b/plumbing/transport/common_test.go index 17f62a6..65ed5b9 100644 --- a/plumbing/transport/common_test.go +++ b/plumbing/transport/common_test.go @@ -1,6 +1,7 @@ package transport import ( + "fmt" "net/url" "testing" @@ -155,12 +156,21 @@ func (s *SuiteCommon) TestNewEndpointFileURL(c *C) { } func (s *SuiteCommon) TestValidEndpoint(c *C) { - e, err := NewEndpoint("http://github.com/user/repository.git") - e.User = "person@mail.com" - e.Password = " !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" - url, err := url.Parse(e.String()) + user := "person@mail.com" + pass := " !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" + e, err := NewEndpoint(fmt.Sprintf( + "http://%s:%s@github.com/user/repository.git", + url.PathEscape(user), + url.PathEscape(pass), + )) c.Assert(err, IsNil) - c.Assert(url, NotNil) + c.Assert(e, NotNil) + c.Assert(e.User, Equals, user) + c.Assert(e.Password, Equals, pass) + c.Assert(e.Host, Equals, "github.com") + c.Assert(e.Path, Equals, "/user/repository.git") + + c.Assert(e.String(), Equals, "http://person@mail.com:%20%21%22%23$%25&%27%28%29%2A+%2C-.%2F:%3B%3C=%3E%3F@%5B%5C%5D%5E_%60%7B%7C%7D~@github.com/user/repository.git") } func (s *SuiteCommon) TestNewEndpointInvalidURL(c *C) { -- cgit From 0bfe038a16551ede1d22bfb54f52c31b646a9e1a Mon Sep 17 00:00:00 2001 From: Nithin Gangadharan Date: Thu, 11 Oct 2018 14:17:08 +0530 Subject: Plumbing: object, Add support for Log with filenames. Fixes #826 (#979) plumbing: object, Add support for Log with filenames. Fixes #826 --- plumbing/object/commit_walker_file.go | 115 ++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 plumbing/object/commit_walker_file.go (limited to 'plumbing') diff --git a/plumbing/object/commit_walker_file.go b/plumbing/object/commit_walker_file.go new file mode 100644 index 0000000..84e738a --- /dev/null +++ b/plumbing/object/commit_walker_file.go @@ -0,0 +1,115 @@ +package object + +import ( + "gopkg.in/src-d/go-git.v4/plumbing/storer" + "io" +) + +type commitFileIter struct { + fileName string + sourceIter CommitIter + currentCommit *Commit +} + +// NewCommitFileIterFromIter returns a commit iterator which performs diffTree between +// successive trees returned from the commit iterator from the argument. The purpose of this is +// to find the commits that explain how the files that match the path came to be. +func NewCommitFileIterFromIter(fileName string, commitIter CommitIter) CommitIter { + iterator := new(commitFileIter) + iterator.sourceIter = commitIter + iterator.fileName = fileName + return iterator +} + +func (c *commitFileIter) Next() (*Commit, error) { + if c.currentCommit == nil { + var err error + c.currentCommit, err = c.sourceIter.Next() + if err != nil { + return nil, err + } + } + commit, commitErr := c.getNextFileCommit() + + // Setting current-commit to nil to prevent unwanted states when errors are raised + if commitErr != nil { + c.currentCommit = nil + } + return commit, commitErr +} + +func (c *commitFileIter) getNextFileCommit() (*Commit, error) { + for { + // Parent-commit can be nil if the current-commit is the initial commit + parentCommit, parentCommitErr := c.sourceIter.Next() + if parentCommitErr != nil { + // If the parent-commit is beyond the initial commit, keep it nil + if parentCommitErr != io.EOF { + return nil, parentCommitErr + } + parentCommit = nil + } + + // Fetch the trees of the current and parent commits + currentTree, currTreeErr := c.currentCommit.Tree() + if currTreeErr != nil { + return nil, currTreeErr + } + + var parentTree *Tree + if parentCommit != nil { + var parentTreeErr error + parentTree, parentTreeErr = parentCommit.Tree() + if parentTreeErr != nil { + return nil, parentTreeErr + } + } + + // Find diff between current and parent trees + changes, diffErr := DiffTree(currentTree, parentTree) + if diffErr != nil { + return nil, diffErr + } + + foundChangeForFile := false + for _, change := range changes { + if change.name() == c.fileName { + foundChangeForFile = true + break + } + } + + // Storing the current-commit in-case a change is found, and + // Updating the current-commit for the next-iteration + prevCommit := c.currentCommit + c.currentCommit = parentCommit + + if foundChangeForFile == true { + return prevCommit, nil + } + + // If not matches found and if parent-commit is beyond the initial commit, then return with EOF + if parentCommit == nil { + return nil, io.EOF + } + } +} + +func (c *commitFileIter) ForEach(cb func(*Commit) error) error { + for { + commit, nextErr := c.Next() + if nextErr != nil { + return nextErr + } + err := cb(commit) + if err == storer.ErrStop { + return nil + } else if err != nil { + return err + } + } +} + +func (c *commitFileIter) Close() { + c.sourceIter.Close() +} -- cgit