aboutsummaryrefslogtreecommitdiffstats
path: root/formats/packfile
diff options
context:
space:
mode:
authorMáximo Cuadros <mcuadros@gmail.com>2015-10-25 02:11:04 +0100
committerMáximo Cuadros <mcuadros@gmail.com>2015-10-25 02:11:04 +0100
commitf5dfba3742d551411ed0d6279c18f867b6496368 (patch)
treea2907d4f5d4814aa3fd8ea21af5bf5e9ead54a00 /formats/packfile
parentec6f456c0e8c7058a29611429965aa05c190b54b (diff)
downloadgo-git-f5dfba3742d551411ed0d6279c18f867b6496368.tar.gz
formats/packfile: new reader API (wip)
Diffstat (limited to 'formats/packfile')
-rw-r--r--formats/packfile/objects.go32
-rw-r--r--formats/packfile/objects_test.go6
-rw-r--r--formats/packfile/reader.go132
-rw-r--r--formats/packfile/reader_test.go134
4 files changed, 128 insertions, 176 deletions
diff --git a/formats/packfile/objects.go b/formats/packfile/objects.go
index e46d8af..bd76896 100644
--- a/formats/packfile/objects.go
+++ b/formats/packfile/objects.go
@@ -9,14 +9,36 @@ import (
"time"
)
-type ObjectType string
+type ObjectType int8
const (
- CommitObject ObjectType = "commit"
- TreeObject ObjectType = "tree"
- BlobObject ObjectType = "blob"
+ CommitObject ObjectType = 1
+ TreeObject ObjectType = 2
+ BlobObject ObjectType = 3
+ TagObject ObjectType = 4
+ OFSDeltaObject ObjectType = 6
+ REFDeltaObject ObjectType = 7
)
+func (t ObjectType) String() string {
+ switch t {
+ case CommitObject:
+ return "commit"
+ case TreeObject:
+ return "tree"
+ case BlobObject:
+ return "blob"
+ default:
+ return "-"
+ }
+}
+
+type RAWObject struct {
+ Hash Hash
+ Type ObjectType
+ Bytes []byte
+}
+
// Object generic object interface
type Object interface {
Type() ObjectType
@@ -28,7 +50,7 @@ type Hash [20]byte
// ComputeHash compute the hash for a given objType and content
func ComputeHash(t ObjectType, content []byte) Hash {
- h := []byte(t)
+ h := []byte(t.String())
h = append(h, ' ')
h = strconv.AppendInt(h, int64(len(content)), 10)
h = append(h, 0)
diff --git a/formats/packfile/objects_test.go b/formats/packfile/objects_test.go
index 3d9675f..0760653 100644
--- a/formats/packfile/objects_test.go
+++ b/formats/packfile/objects_test.go
@@ -12,15 +12,15 @@ type ObjectsSuite struct{}
var _ = Suite(&ObjectsSuite{})
func (s *ObjectsSuite) TestComputeHash(c *C) {
- hash := ComputeHash("blob", []byte(""))
+ hash := ComputeHash(BlobObject, []byte(""))
c.Assert(hash.String(), Equals, "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391")
- hash = ComputeHash("blob", []byte("Hello, World!\n"))
+ hash = ComputeHash(BlobObject, []byte("Hello, World!\n"))
c.Assert(hash.String(), Equals, "8ab686eafeb1f44702738c8b0f24f2567c36da6d")
}
func (s *ObjectsSuite) TestNewHash(c *C) {
- hash := ComputeHash("blob", []byte("Hello, World!\n"))
+ hash := ComputeHash(BlobObject, []byte("Hello, World!\n"))
c.Assert(hash, Equals, NewHash(hash.String()))
}
diff --git a/formats/packfile/reader.go b/formats/packfile/reader.go
index d5f40b9..f79f2ab 100644
--- a/formats/packfile/reader.go
+++ b/formats/packfile/reader.go
@@ -13,12 +13,6 @@ const (
DefaultMaxObjectsLimit = 1 << 20
DefaultMaxObjectSize = 1 << 32 // 4GB
- rawCommit = 1
- rawTree = 2
- rawBlob = 3
- rawTag = 4
- rawOFSDelta = 6
- rawREFDelta = 7
)
type PackfileReader struct {
@@ -34,7 +28,7 @@ type PackfileReader struct {
MaxObjectSize int
r *trackingReader
- objects map[Hash]packfileObject
+ objects map[Hash]*RAWObject
offsets map[int]Hash
deltas []packfileDelta
contentCallback ContentCallback
@@ -42,7 +36,7 @@ type PackfileReader struct {
type packfileObject struct {
bytes []byte
- typ int8
+ typ ObjectType
}
type packfileDelta struct {
@@ -55,20 +49,21 @@ func NewPackfileReader(r io.Reader, fn ContentCallback) (*PackfileReader, error)
MaxObjectsLimit: DefaultMaxObjectsLimit,
MaxObjectSize: DefaultMaxObjectSize,
r: &trackingReader{r: r},
- objects: make(map[Hash]packfileObject, 0),
+ objects: make(map[Hash]*RAWObject, 0),
offsets: make(map[int]Hash, 0),
contentCallback: fn,
}, nil
}
-func (pr *PackfileReader) Read() (*Packfile, error) {
+func (pr *PackfileReader) Read() (chan *RAWObject, error) {
packfile := NewPackfile()
if err := pr.validateHeader(); err != nil {
if err == io.EOF {
// This is an empty repo. It's OK.
- return packfile, nil
+ return nil, nil
}
+
return nil, err
}
@@ -90,13 +85,13 @@ func (pr *PackfileReader) Read() (*Packfile, error) {
packfile.ObjectCount, pr.MaxObjectsLimit)
}
- if err := pr.readObjects(packfile); err != nil {
- return nil, err
- }
+ ch := make(chan *RAWObject, 1)
+
+ go pr.readObjects(ch, count)
packfile.Size = int64(pr.r.Pos())
- return packfile, nil
+ return ch, nil
}
func (pr *PackfileReader) validateHeader() error {
@@ -121,20 +116,26 @@ func (pr *PackfileReader) readInt32() (uint32, error) {
return value, nil
}
-func (pr *PackfileReader) readObjects(packfile *Packfile) error {
+func (pr *PackfileReader) readObjects(ch chan *RAWObject, count uint32) error {
// This code has 50-80 µs of overhead per object not counting zlib inflation.
// Together with zlib inflation, it's 400-410 µs for small objects.
// That's 1 sec for ~2450 objects, ~4.20 MB, or ~250 ms per MB,
// of which 12-20 % is _not_ zlib inflation (ie. is our code).
+ defer func() {
+ close(ch)
+ }()
- for i := 0; i < packfile.ObjectCount; i++ {
+ for i := 0; i < int(count); i++ {
var pos = pr.Pos()
- obj, err := pr.readObject(packfile)
+ obj, err := pr.readObject()
if err != nil && err != io.EOF {
+ fmt.Println(err)
return err
}
- pr.offsets[pos] = obj.hash
+ pr.offsets[pos] = obj.Hash
+ pr.objects[obj.Hash] = obj
+ ch <- obj
if err == io.EOF {
break
@@ -144,19 +145,22 @@ func (pr *PackfileReader) readObjects(packfile *Packfile) error {
return nil
}
-func (pr *PackfileReader) readObject(packfile *Packfile) (*objectReader, error) {
- o, err := newObjectReader(pr, packfile, pr.MaxObjectSize)
+func (pr *PackfileReader) readObject() (*RAWObject, error) {
+
+ o, err := newObjectReader(pr, pr.MaxObjectSize)
if err != nil {
return nil, err
}
+ raw := &RAWObject{Type: o.typ}
+
switch o.typ {
- case rawREFDelta:
- err = o.readREFDelta()
- case rawOFSDelta:
- err = o.readOFSDelta()
- case rawCommit, rawTree, rawBlob, rawTag:
- err = o.readObject()
+ case REFDeltaObject:
+ err = o.readREFDelta(raw)
+ case OFSDeltaObject:
+ err = o.readOFSDelta(raw)
+ case CommitObject, TreeObject, BlobObject, TagObject:
+ err = o.readObject(raw)
default:
err = NewError("Invalid git object tag %q", o.typ)
}
@@ -165,7 +169,7 @@ func (pr *PackfileReader) readObject(packfile *Packfile) (*objectReader, error)
return nil, err
}
- return o, err
+ return raw, err
}
func (pr *PackfileReader) Pos() int { return pr.r.Pos() }
@@ -177,19 +181,19 @@ type objectReader struct {
hash Hash
steps int
- typ int8
+ typ ObjectType
size uint64
}
-func newObjectReader(pr *PackfileReader, pf *Packfile, maxSize int) (*objectReader, error) {
- o := &objectReader{pr: pr, pf: pf, maxSize: uint64(maxSize)}
+func newObjectReader(pr *PackfileReader, maxSize int) (*objectReader, error) {
+ o := &objectReader{pr: pr, maxSize: uint64(maxSize)}
var buf [1]byte
if _, err := o.Read(buf[:]); err != nil {
return nil, err
}
- o.typ = int8((buf[0] >> 4) & 7)
+ o.typ = ObjectType((buf[0] >> 4) & 7)
o.size = uint64(buf[0] & 15)
o.steps++ // byte we just read to get `o.typ` and `o.size`
@@ -207,7 +211,7 @@ func newObjectReader(pr *PackfileReader, pf *Packfile, maxSize int) (*objectRead
return o, nil
}
-func (o *objectReader) readREFDelta() error {
+func (o *objectReader) readREFDelta(raw *RAWObject) error {
var ref Hash
if _, err := o.Read(ref[:]); err != nil {
return err
@@ -222,16 +226,14 @@ func (o *objectReader) readREFDelta() error {
if !ok {
o.pr.deltas = append(o.pr.deltas, packfileDelta{hash: ref, delta: buf[:]})
} else {
- patched := PatchDelta(referenced.bytes, buf[:])
+ patched := PatchDelta(referenced.Bytes, buf[:])
if patched == nil {
return NewError("error while patching %x", ref)
}
- o.typ = referenced.typ
- err = o.addObject(patched)
- if err != nil {
- return err
- }
+ raw.Type = referenced.Type
+ raw.Bytes = patched
+ raw.Hash = ComputeHash(raw.Type, raw.Bytes)
}
return nil
@@ -258,7 +260,7 @@ func decodeOffset(src io.ByteReader, steps int) (int, error) {
return -offset, nil
}
-func (o *objectReader) readOFSDelta() error {
+func (o *objectReader) readOFSDelta(raw *RAWObject) error {
var pos = o.pr.Pos()
// read negative offset
@@ -278,62 +280,26 @@ func (o *objectReader) readOFSDelta() error {
return NewError("can't find a pack entry at %d", pos+offset)
}
- patched := PatchDelta(referenced.bytes, buf)
+ patched := PatchDelta(referenced.Bytes, buf)
if patched == nil {
return NewError("error while patching %q", ref)
}
- o.typ = referenced.typ
- err = o.addObject(patched)
- if err != nil {
- return err
- }
+ raw.Type = referenced.Type
+ raw.Bytes = patched
+ raw.Hash = ComputeHash(raw.Type, raw.Bytes)
return nil
}
-func (o *objectReader) readObject() error {
+func (o *objectReader) readObject(raw *RAWObject) error {
buf, err := o.inflate()
if err != nil {
return err
}
- return o.addObject(buf)
-}
-
-func (o *objectReader) addObject(bytes []byte) error {
- var hash Hash
-
- switch o.typ {
- case rawCommit:
- c, err := ParseCommit(bytes)
- if err != nil {
- return err
- }
- o.pf.Commits[c.Hash()] = c
- hash = c.Hash()
- case rawTree:
- c, err := ParseTree(bytes)
- if err != nil {
- return err
- }
- o.pf.Trees[c.Hash()] = c
- hash = c.Hash()
- case rawBlob:
- c, err := ParseBlob(bytes)
- if err != nil {
- return err
- }
- o.pf.Blobs[c.Hash()] = c
- hash = c.Hash()
-
- if o.pr.contentCallback != nil {
- o.pr.contentCallback(hash, bytes)
- }
- }
-
- o.pr.objects[hash] = packfileObject{bytes: bytes, typ: o.typ}
- o.hash = hash
+ raw.Bytes = buf
+ raw.Hash = ComputeHash(raw.Type, raw.Bytes)
return nil
}
diff --git a/formats/packfile/reader_test.go b/formats/packfile/reader_test.go
index e1def62..e49a976 100644
--- a/formats/packfile/reader_test.go
+++ b/formats/packfile/reader_test.go
@@ -21,117 +21,81 @@ func (s *ReaderSuite) TestReadPackfile(c *C) {
r, err := NewPackfileReader(d, nil)
c.Assert(err, IsNil)
- p, err := r.Read()
+ ch, err := r.Read()
c.Assert(err, IsNil)
- c.Assert(p.ObjectCount, Equals, 11)
- c.Assert(p.Commits, HasLen, 4)
- c.Assert(p.Commits[NewHash("db4002e880a08bf6cc7217512ad937f1ac8824a2")], NotNil)
- c.Assert(p.Commits[NewHash("551fe11a9ef992763b7e0be4500cf7169f2f8575")], NotNil)
- c.Assert(p.Commits[NewHash("3d8d2705c6b936ceff0020989eca90db7a372609")], NotNil)
- c.Assert(p.Commits[NewHash("778c85ff95b5514fea0ba4c7b6a029d32e2c3b96")], NotNil)
-
- c.Assert(p.Trees, HasLen, 4)
- c.Assert(p.Trees[NewHash("af01d4cac3441bba4bdd4574938e1d231ee5d45e")], NotNil)
- c.Assert(p.Trees[NewHash("a028c5b32117ed11bd310a61d50ca10827d853f1")], NotNil)
- c.Assert(p.Trees[NewHash("c6b65deb8be57436ceaf920b82d51a3fc59830bd")], NotNil)
- c.Assert(p.Trees[NewHash("496d6428b9cf92981dc9495211e6e1120fb6f2ba")], NotNil)
-
- c.Assert(p.Blobs, HasLen, 3)
- c.Assert(p.Blobs[NewHash("85553e8dc42a79b8a483904dcfcdb048fc004055")], NotNil)
- c.Assert(p.Blobs[NewHash("90b451628d8449f4c47e627eb1392672e5ccec98")], NotNil)
- c.Assert(p.Blobs[NewHash("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391")], NotNil)
+ AssertObjects(c, ch, []string{
+ "778c85ff95b5514fea0ba4c7b6a029d32e2c3b96",
+ "db4002e880a08bf6cc7217512ad937f1ac8824a2",
+ "551fe11a9ef992763b7e0be4500cf7169f2f8575",
+ "3d8d2705c6b936ceff0020989eca90db7a372609",
+ "af01d4cac3441bba4bdd4574938e1d231ee5d45e",
+ "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391",
+ "85553e8dc42a79b8a483904dcfcdb048fc004055",
+ "a028c5b32117ed11bd310a61d50ca10827d853f1",
+ "c6b65deb8be57436ceaf920b82d51a3fc59830bd",
+ "90b451628d8449f4c47e627eb1392672e5ccec98",
+ "496d6428b9cf92981dc9495211e6e1120fb6f2ba",
+ })
}
-func (s *ReaderSuite) TestReadPackfileInvalid(c *C) {
- r, err := NewPackfileReader(bytes.NewReader([]byte("dasdsadasas")), nil)
- c.Assert(err, IsNil)
+func (s *ReaderSuite) TestReadPackfileOFSDelta(c *C) {
+ s.testReadPackfileGitFixture(c, "fixtures/git-fixture.ofs-delta")
- _, err = r.Read()
- _, ok := err.(*ReaderError)
- c.Assert(ok, Equals, true)
}
-
-func (s *ReaderSuite) TestReadPackfileRefDelta(c *C) {
- d, err := os.Open("fixtures/git-fixture.ref-delta")
- c.Assert(err, IsNil)
-
- r, err := NewPackfileReader(d, nil)
- c.Assert(err, IsNil)
-
- p, err := r.Read()
- c.Assert(err, IsNil)
-
- s.AssertGitFixture(c, p)
+func (s *ReaderSuite) TestReadPackfileREFDelta(c *C) {
+ s.testReadPackfileGitFixture(c, "fixtures/git-fixture.ref-delta")
}
-func (s *ReaderSuite) TestReadPackfileOfsDelta(c *C) {
- d, err := os.Open("fixtures/git-fixture.ofs-delta")
+func (s *ReaderSuite) testReadPackfileGitFixture(c *C, file string) {
+ d, err := os.Open(file)
c.Assert(err, IsNil)
r, err := NewPackfileReader(d, nil)
c.Assert(err, IsNil)
- p, err := r.Read()
+ ch, err := r.Read()
c.Assert(err, IsNil)
- s.AssertGitFixture(c, p)
-}
-
-func (s *ReaderSuite) AssertGitFixture(c *C, p *Packfile) {
-
- c.Assert(p.ObjectCount, Equals, 28)
-
- c.Assert(p.Commits, HasLen, 8)
- c.Assert(p.Trees, HasLen, 11)
- c.Assert(p.Blobs, HasLen, 9)
-
- commits := []string{
+ AssertObjects(c, ch, []string{
+ "918c48b83bd081e863dbe1b80f8998f058cd8294",
+ "af2d6a6954d532f8ffb47615169c8fdf9d383a1a",
+ "1669dce138d9b841a518c64b10914d88f5e488ea",
"a5b8b09e2f8fcb0bb99d3ccb0958157b40890d69",
"b8e471f58bcbca63b07bda20e428190409c2db47",
"35e85108805c84807bc66a02d91535e1e24b38b9",
"b029517f6300c2da0f4b651b8642506cd6aaf45d",
- "6ecf0ef2c2dffb796033e5a02219af86ec6584e5",
- "918c48b83bd081e863dbe1b80f8998f058cd8294",
- "af2d6a6954d532f8ffb47615169c8fdf9d383a1a",
- "1669dce138d9b841a518c64b10914d88f5e488ea",
- }
-
- for _, hash := range commits {
- c.Assert(p.Commits[NewHash(hash)], NotNil)
- }
-
- trees := []string{
- "aa9b383c260e1d05fbbf6b30a02914555e20c725",
+ "32858aad3c383ed1ff0a0f9bdf231d54a00c9e88",
+ "d3ff53e0564a9f87d8e84b6e28e5060e517008aa",
+ "c192bd6a24ea1ab01d78686e417c8bdc7c3d197f",
+ "d5c0f4ab811897cadf03aec358ae60d21f91c50d",
+ "49c6bb89b17060d7b4deacb7b338fcc6ea2352a9",
"cf4aa3b38974fb7d81f367c0830f7d78d65ab86b",
+ "9dea2395f5403188298c1dabe8bdafe562c491e3",
"586af567d0bb5e771e49bdd9434f5e0fb76d25fa",
- "4d081c50e250fa32ea8b1313cf8bb7c2ad7627fd",
- "eba74343e2f15d62adedfd8c883ee0262b5c8021",
- "c2d30fa8ef288618f65f6eed6e168e0d514886f4",
- "8dcef98b1d52143e1e2dbc458ffe38f925786bf2",
+ "9a48f23120e880dfbe41f7c9b7b708e9ee62a492",
"5a877e6a906a2743ad6e45d99c1793642aaf8eda",
+ "c8f1d8c61f9da76f4cb49fd86322b6e685dba956",
"a8d315b2b1c615d43042c3a62402b8a54288cf5c",
"a39771a7651f97faf5c72e08224d857fc35133db",
+ "880cd14280f4b9b6ed3986d6671f907d7cc2a198",
"fb72698cab7617ac416264415f13224dfd7a165e",
- }
-
- for _, hash := range trees {
- c.Assert(p.Trees[NewHash(hash)], NotNil)
- }
+ "4d081c50e250fa32ea8b1313cf8bb7c2ad7627fd",
+ "eba74343e2f15d62adedfd8c883ee0262b5c8021",
+ "c2d30fa8ef288618f65f6eed6e168e0d514886f4",
+ "8dcef98b1d52143e1e2dbc458ffe38f925786bf2",
+ "aa9b383c260e1d05fbbf6b30a02914555e20c725",
+ "6ecf0ef2c2dffb796033e5a02219af86ec6584e5",
+ })
+}
- blobs := []string{
- "d5c0f4ab811897cadf03aec358ae60d21f91c50d",
- "9a48f23120e880dfbe41f7c9b7b708e9ee62a492",
- "c8f1d8c61f9da76f4cb49fd86322b6e685dba956",
- "880cd14280f4b9b6ed3986d6671f907d7cc2a198",
- "32858aad3c383ed1ff0a0f9bdf231d54a00c9e88",
- "d3ff53e0564a9f87d8e84b6e28e5060e517008aa",
- "c192bd6a24ea1ab01d78686e417c8bdc7c3d197f",
- "49c6bb89b17060d7b4deacb7b338fcc6ea2352a9",
- "9dea2395f5403188298c1dabe8bdafe562c491e3",
- }
+func AssertObjects(c *C, ch chan *RAWObject, expected []string) {
+ i := 0
+ for obtained := range ch {
+ c.Assert(obtained.Hash.String(), Equals, expected[i])
+ computed := ComputeHash(obtained.Type, obtained.Bytes)
+ c.Assert(computed.String(), Equals, expected[i])
- for _, hash := range blobs {
- c.Assert(p.Blobs[NewHash(hash)], NotNil)
+ i++
}
}