From f5dfba3742d551411ed0d6279c18f867b6496368 Mon Sep 17 00:00:00 2001 From: Máximo Cuadros Date: Sun, 25 Oct 2015 02:11:04 +0100 Subject: formats/packfile: new reader API (wip) --- formats/packfile/objects.go | 32 ++++++++-- formats/packfile/objects_test.go | 6 +- formats/packfile/reader.go | 132 ++++++++++++++------------------------ formats/packfile/reader_test.go | 134 ++++++++++++++------------------------- 4 files changed, 128 insertions(+), 176 deletions(-) diff --git a/formats/packfile/objects.go b/formats/packfile/objects.go index e46d8af..bd76896 100644 --- a/formats/packfile/objects.go +++ b/formats/packfile/objects.go @@ -9,14 +9,36 @@ import ( "time" ) -type ObjectType string +type ObjectType int8 const ( - CommitObject ObjectType = "commit" - TreeObject ObjectType = "tree" - BlobObject ObjectType = "blob" + CommitObject ObjectType = 1 + TreeObject ObjectType = 2 + BlobObject ObjectType = 3 + TagObject ObjectType = 4 + OFSDeltaObject ObjectType = 6 + REFDeltaObject ObjectType = 7 ) +func (t ObjectType) String() string { + switch t { + case CommitObject: + return "commit" + case TreeObject: + return "tree" + case BlobObject: + return "blob" + default: + return "-" + } +} + +type RAWObject struct { + Hash Hash + Type ObjectType + Bytes []byte +} + // Object generic object interface type Object interface { Type() ObjectType @@ -28,7 +50,7 @@ type Hash [20]byte // ComputeHash compute the hash for a given objType and content func ComputeHash(t ObjectType, content []byte) Hash { - h := []byte(t) + h := []byte(t.String()) h = append(h, ' ') h = strconv.AppendInt(h, int64(len(content)), 10) h = append(h, 0) diff --git a/formats/packfile/objects_test.go b/formats/packfile/objects_test.go index 3d9675f..0760653 100644 --- a/formats/packfile/objects_test.go +++ b/formats/packfile/objects_test.go @@ -12,15 +12,15 @@ type ObjectsSuite struct{} var _ = Suite(&ObjectsSuite{}) func (s *ObjectsSuite) TestComputeHash(c *C) { - hash := ComputeHash("blob", []byte("")) + hash := ComputeHash(BlobObject, []byte("")) c.Assert(hash.String(), Equals, "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") - hash = ComputeHash("blob", []byte("Hello, World!\n")) + hash = ComputeHash(BlobObject, []byte("Hello, World!\n")) c.Assert(hash.String(), Equals, "8ab686eafeb1f44702738c8b0f24f2567c36da6d") } func (s *ObjectsSuite) TestNewHash(c *C) { - hash := ComputeHash("blob", []byte("Hello, World!\n")) + hash := ComputeHash(BlobObject, []byte("Hello, World!\n")) c.Assert(hash, Equals, NewHash(hash.String())) } diff --git a/formats/packfile/reader.go b/formats/packfile/reader.go index d5f40b9..f79f2ab 100644 --- a/formats/packfile/reader.go +++ b/formats/packfile/reader.go @@ -13,12 +13,6 @@ const ( DefaultMaxObjectsLimit = 1 << 20 DefaultMaxObjectSize = 1 << 32 // 4GB - rawCommit = 1 - rawTree = 2 - rawBlob = 3 - rawTag = 4 - rawOFSDelta = 6 - rawREFDelta = 7 ) type PackfileReader struct { @@ -34,7 +28,7 @@ type PackfileReader struct { MaxObjectSize int r *trackingReader - objects map[Hash]packfileObject + objects map[Hash]*RAWObject offsets map[int]Hash deltas []packfileDelta contentCallback ContentCallback @@ -42,7 +36,7 @@ type PackfileReader struct { type packfileObject struct { bytes []byte - typ int8 + typ ObjectType } type packfileDelta struct { @@ -55,20 +49,21 @@ func NewPackfileReader(r io.Reader, fn ContentCallback) (*PackfileReader, error) MaxObjectsLimit: DefaultMaxObjectsLimit, MaxObjectSize: DefaultMaxObjectSize, r: &trackingReader{r: r}, - objects: make(map[Hash]packfileObject, 0), + objects: make(map[Hash]*RAWObject, 0), offsets: make(map[int]Hash, 0), contentCallback: fn, }, nil } -func (pr *PackfileReader) Read() (*Packfile, error) { +func (pr *PackfileReader) Read() (chan *RAWObject, error) { packfile := NewPackfile() if err := pr.validateHeader(); err != nil { if err == io.EOF { // This is an empty repo. It's OK. - return packfile, nil + return nil, nil } + return nil, err } @@ -90,13 +85,13 @@ func (pr *PackfileReader) Read() (*Packfile, error) { packfile.ObjectCount, pr.MaxObjectsLimit) } - if err := pr.readObjects(packfile); err != nil { - return nil, err - } + ch := make(chan *RAWObject, 1) + + go pr.readObjects(ch, count) packfile.Size = int64(pr.r.Pos()) - return packfile, nil + return ch, nil } func (pr *PackfileReader) validateHeader() error { @@ -121,20 +116,26 @@ func (pr *PackfileReader) readInt32() (uint32, error) { return value, nil } -func (pr *PackfileReader) readObjects(packfile *Packfile) error { +func (pr *PackfileReader) readObjects(ch chan *RAWObject, count uint32) error { // This code has 50-80 µs of overhead per object not counting zlib inflation. // Together with zlib inflation, it's 400-410 µs for small objects. // That's 1 sec for ~2450 objects, ~4.20 MB, or ~250 ms per MB, // of which 12-20 % is _not_ zlib inflation (ie. is our code). + defer func() { + close(ch) + }() - for i := 0; i < packfile.ObjectCount; i++ { + for i := 0; i < int(count); i++ { var pos = pr.Pos() - obj, err := pr.readObject(packfile) + obj, err := pr.readObject() if err != nil && err != io.EOF { + fmt.Println(err) return err } - pr.offsets[pos] = obj.hash + pr.offsets[pos] = obj.Hash + pr.objects[obj.Hash] = obj + ch <- obj if err == io.EOF { break @@ -144,19 +145,22 @@ func (pr *PackfileReader) readObjects(packfile *Packfile) error { return nil } -func (pr *PackfileReader) readObject(packfile *Packfile) (*objectReader, error) { - o, err := newObjectReader(pr, packfile, pr.MaxObjectSize) +func (pr *PackfileReader) readObject() (*RAWObject, error) { + + o, err := newObjectReader(pr, pr.MaxObjectSize) if err != nil { return nil, err } + raw := &RAWObject{Type: o.typ} + switch o.typ { - case rawREFDelta: - err = o.readREFDelta() - case rawOFSDelta: - err = o.readOFSDelta() - case rawCommit, rawTree, rawBlob, rawTag: - err = o.readObject() + case REFDeltaObject: + err = o.readREFDelta(raw) + case OFSDeltaObject: + err = o.readOFSDelta(raw) + case CommitObject, TreeObject, BlobObject, TagObject: + err = o.readObject(raw) default: err = NewError("Invalid git object tag %q", o.typ) } @@ -165,7 +169,7 @@ func (pr *PackfileReader) readObject(packfile *Packfile) (*objectReader, error) return nil, err } - return o, err + return raw, err } func (pr *PackfileReader) Pos() int { return pr.r.Pos() } @@ -177,19 +181,19 @@ type objectReader struct { hash Hash steps int - typ int8 + typ ObjectType size uint64 } -func newObjectReader(pr *PackfileReader, pf *Packfile, maxSize int) (*objectReader, error) { - o := &objectReader{pr: pr, pf: pf, maxSize: uint64(maxSize)} +func newObjectReader(pr *PackfileReader, maxSize int) (*objectReader, error) { + o := &objectReader{pr: pr, maxSize: uint64(maxSize)} var buf [1]byte if _, err := o.Read(buf[:]); err != nil { return nil, err } - o.typ = int8((buf[0] >> 4) & 7) + o.typ = ObjectType((buf[0] >> 4) & 7) o.size = uint64(buf[0] & 15) o.steps++ // byte we just read to get `o.typ` and `o.size` @@ -207,7 +211,7 @@ func newObjectReader(pr *PackfileReader, pf *Packfile, maxSize int) (*objectRead return o, nil } -func (o *objectReader) readREFDelta() error { +func (o *objectReader) readREFDelta(raw *RAWObject) error { var ref Hash if _, err := o.Read(ref[:]); err != nil { return err @@ -222,16 +226,14 @@ func (o *objectReader) readREFDelta() error { if !ok { o.pr.deltas = append(o.pr.deltas, packfileDelta{hash: ref, delta: buf[:]}) } else { - patched := PatchDelta(referenced.bytes, buf[:]) + patched := PatchDelta(referenced.Bytes, buf[:]) if patched == nil { return NewError("error while patching %x", ref) } - o.typ = referenced.typ - err = o.addObject(patched) - if err != nil { - return err - } + raw.Type = referenced.Type + raw.Bytes = patched + raw.Hash = ComputeHash(raw.Type, raw.Bytes) } return nil @@ -258,7 +260,7 @@ func decodeOffset(src io.ByteReader, steps int) (int, error) { return -offset, nil } -func (o *objectReader) readOFSDelta() error { +func (o *objectReader) readOFSDelta(raw *RAWObject) error { var pos = o.pr.Pos() // read negative offset @@ -278,62 +280,26 @@ func (o *objectReader) readOFSDelta() error { return NewError("can't find a pack entry at %d", pos+offset) } - patched := PatchDelta(referenced.bytes, buf) + patched := PatchDelta(referenced.Bytes, buf) if patched == nil { return NewError("error while patching %q", ref) } - o.typ = referenced.typ - err = o.addObject(patched) - if err != nil { - return err - } + raw.Type = referenced.Type + raw.Bytes = patched + raw.Hash = ComputeHash(raw.Type, raw.Bytes) return nil } -func (o *objectReader) readObject() error { +func (o *objectReader) readObject(raw *RAWObject) error { buf, err := o.inflate() if err != nil { return err } - return o.addObject(buf) -} - -func (o *objectReader) addObject(bytes []byte) error { - var hash Hash - - switch o.typ { - case rawCommit: - c, err := ParseCommit(bytes) - if err != nil { - return err - } - o.pf.Commits[c.Hash()] = c - hash = c.Hash() - case rawTree: - c, err := ParseTree(bytes) - if err != nil { - return err - } - o.pf.Trees[c.Hash()] = c - hash = c.Hash() - case rawBlob: - c, err := ParseBlob(bytes) - if err != nil { - return err - } - o.pf.Blobs[c.Hash()] = c - hash = c.Hash() - - if o.pr.contentCallback != nil { - o.pr.contentCallback(hash, bytes) - } - } - - o.pr.objects[hash] = packfileObject{bytes: bytes, typ: o.typ} - o.hash = hash + raw.Bytes = buf + raw.Hash = ComputeHash(raw.Type, raw.Bytes) return nil } diff --git a/formats/packfile/reader_test.go b/formats/packfile/reader_test.go index e1def62..e49a976 100644 --- a/formats/packfile/reader_test.go +++ b/formats/packfile/reader_test.go @@ -21,117 +21,81 @@ func (s *ReaderSuite) TestReadPackfile(c *C) { r, err := NewPackfileReader(d, nil) c.Assert(err, IsNil) - p, err := r.Read() + ch, err := r.Read() c.Assert(err, IsNil) - c.Assert(p.ObjectCount, Equals, 11) - c.Assert(p.Commits, HasLen, 4) - c.Assert(p.Commits[NewHash("db4002e880a08bf6cc7217512ad937f1ac8824a2")], NotNil) - c.Assert(p.Commits[NewHash("551fe11a9ef992763b7e0be4500cf7169f2f8575")], NotNil) - c.Assert(p.Commits[NewHash("3d8d2705c6b936ceff0020989eca90db7a372609")], NotNil) - c.Assert(p.Commits[NewHash("778c85ff95b5514fea0ba4c7b6a029d32e2c3b96")], NotNil) - - c.Assert(p.Trees, HasLen, 4) - c.Assert(p.Trees[NewHash("af01d4cac3441bba4bdd4574938e1d231ee5d45e")], NotNil) - c.Assert(p.Trees[NewHash("a028c5b32117ed11bd310a61d50ca10827d853f1")], NotNil) - c.Assert(p.Trees[NewHash("c6b65deb8be57436ceaf920b82d51a3fc59830bd")], NotNil) - c.Assert(p.Trees[NewHash("496d6428b9cf92981dc9495211e6e1120fb6f2ba")], NotNil) - - c.Assert(p.Blobs, HasLen, 3) - c.Assert(p.Blobs[NewHash("85553e8dc42a79b8a483904dcfcdb048fc004055")], NotNil) - c.Assert(p.Blobs[NewHash("90b451628d8449f4c47e627eb1392672e5ccec98")], NotNil) - c.Assert(p.Blobs[NewHash("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391")], NotNil) + AssertObjects(c, ch, []string{ + "778c85ff95b5514fea0ba4c7b6a029d32e2c3b96", + "db4002e880a08bf6cc7217512ad937f1ac8824a2", + "551fe11a9ef992763b7e0be4500cf7169f2f8575", + "3d8d2705c6b936ceff0020989eca90db7a372609", + "af01d4cac3441bba4bdd4574938e1d231ee5d45e", + "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", + "85553e8dc42a79b8a483904dcfcdb048fc004055", + "a028c5b32117ed11bd310a61d50ca10827d853f1", + "c6b65deb8be57436ceaf920b82d51a3fc59830bd", + "90b451628d8449f4c47e627eb1392672e5ccec98", + "496d6428b9cf92981dc9495211e6e1120fb6f2ba", + }) } -func (s *ReaderSuite) TestReadPackfileInvalid(c *C) { - r, err := NewPackfileReader(bytes.NewReader([]byte("dasdsadasas")), nil) - c.Assert(err, IsNil) +func (s *ReaderSuite) TestReadPackfileOFSDelta(c *C) { + s.testReadPackfileGitFixture(c, "fixtures/git-fixture.ofs-delta") - _, err = r.Read() - _, ok := err.(*ReaderError) - c.Assert(ok, Equals, true) } - -func (s *ReaderSuite) TestReadPackfileRefDelta(c *C) { - d, err := os.Open("fixtures/git-fixture.ref-delta") - c.Assert(err, IsNil) - - r, err := NewPackfileReader(d, nil) - c.Assert(err, IsNil) - - p, err := r.Read() - c.Assert(err, IsNil) - - s.AssertGitFixture(c, p) +func (s *ReaderSuite) TestReadPackfileREFDelta(c *C) { + s.testReadPackfileGitFixture(c, "fixtures/git-fixture.ref-delta") } -func (s *ReaderSuite) TestReadPackfileOfsDelta(c *C) { - d, err := os.Open("fixtures/git-fixture.ofs-delta") +func (s *ReaderSuite) testReadPackfileGitFixture(c *C, file string) { + d, err := os.Open(file) c.Assert(err, IsNil) r, err := NewPackfileReader(d, nil) c.Assert(err, IsNil) - p, err := r.Read() + ch, err := r.Read() c.Assert(err, IsNil) - s.AssertGitFixture(c, p) -} - -func (s *ReaderSuite) AssertGitFixture(c *C, p *Packfile) { - - c.Assert(p.ObjectCount, Equals, 28) - - c.Assert(p.Commits, HasLen, 8) - c.Assert(p.Trees, HasLen, 11) - c.Assert(p.Blobs, HasLen, 9) - - commits := []string{ + AssertObjects(c, ch, []string{ + "918c48b83bd081e863dbe1b80f8998f058cd8294", + "af2d6a6954d532f8ffb47615169c8fdf9d383a1a", + "1669dce138d9b841a518c64b10914d88f5e488ea", "a5b8b09e2f8fcb0bb99d3ccb0958157b40890d69", "b8e471f58bcbca63b07bda20e428190409c2db47", "35e85108805c84807bc66a02d91535e1e24b38b9", "b029517f6300c2da0f4b651b8642506cd6aaf45d", - "6ecf0ef2c2dffb796033e5a02219af86ec6584e5", - "918c48b83bd081e863dbe1b80f8998f058cd8294", - "af2d6a6954d532f8ffb47615169c8fdf9d383a1a", - "1669dce138d9b841a518c64b10914d88f5e488ea", - } - - for _, hash := range commits { - c.Assert(p.Commits[NewHash(hash)], NotNil) - } - - trees := []string{ - "aa9b383c260e1d05fbbf6b30a02914555e20c725", + "32858aad3c383ed1ff0a0f9bdf231d54a00c9e88", + "d3ff53e0564a9f87d8e84b6e28e5060e517008aa", + "c192bd6a24ea1ab01d78686e417c8bdc7c3d197f", + "d5c0f4ab811897cadf03aec358ae60d21f91c50d", + "49c6bb89b17060d7b4deacb7b338fcc6ea2352a9", "cf4aa3b38974fb7d81f367c0830f7d78d65ab86b", + "9dea2395f5403188298c1dabe8bdafe562c491e3", "586af567d0bb5e771e49bdd9434f5e0fb76d25fa", - "4d081c50e250fa32ea8b1313cf8bb7c2ad7627fd", - "eba74343e2f15d62adedfd8c883ee0262b5c8021", - "c2d30fa8ef288618f65f6eed6e168e0d514886f4", - "8dcef98b1d52143e1e2dbc458ffe38f925786bf2", + "9a48f23120e880dfbe41f7c9b7b708e9ee62a492", "5a877e6a906a2743ad6e45d99c1793642aaf8eda", + "c8f1d8c61f9da76f4cb49fd86322b6e685dba956", "a8d315b2b1c615d43042c3a62402b8a54288cf5c", "a39771a7651f97faf5c72e08224d857fc35133db", + "880cd14280f4b9b6ed3986d6671f907d7cc2a198", "fb72698cab7617ac416264415f13224dfd7a165e", - } - - for _, hash := range trees { - c.Assert(p.Trees[NewHash(hash)], NotNil) - } + "4d081c50e250fa32ea8b1313cf8bb7c2ad7627fd", + "eba74343e2f15d62adedfd8c883ee0262b5c8021", + "c2d30fa8ef288618f65f6eed6e168e0d514886f4", + "8dcef98b1d52143e1e2dbc458ffe38f925786bf2", + "aa9b383c260e1d05fbbf6b30a02914555e20c725", + "6ecf0ef2c2dffb796033e5a02219af86ec6584e5", + }) +} - blobs := []string{ - "d5c0f4ab811897cadf03aec358ae60d21f91c50d", - "9a48f23120e880dfbe41f7c9b7b708e9ee62a492", - "c8f1d8c61f9da76f4cb49fd86322b6e685dba956", - "880cd14280f4b9b6ed3986d6671f907d7cc2a198", - "32858aad3c383ed1ff0a0f9bdf231d54a00c9e88", - "d3ff53e0564a9f87d8e84b6e28e5060e517008aa", - "c192bd6a24ea1ab01d78686e417c8bdc7c3d197f", - "49c6bb89b17060d7b4deacb7b338fcc6ea2352a9", - "9dea2395f5403188298c1dabe8bdafe562c491e3", - } +func AssertObjects(c *C, ch chan *RAWObject, expected []string) { + i := 0 + for obtained := range ch { + c.Assert(obtained.Hash.String(), Equals, expected[i]) + computed := ComputeHash(obtained.Type, obtained.Bytes) + c.Assert(computed.String(), Equals, expected[i]) - for _, hash := range blobs { - c.Assert(p.Blobs[NewHash(hash)], NotNil) + i++ } } -- cgit