aboutsummaryrefslogtreecommitdiffstats
path: root/formats/packfile
diff options
context:
space:
mode:
authorMáximo Cuadros <mcuadros@gmail.com>2015-10-23 16:27:08 +0200
committerMáximo Cuadros <mcuadros@gmail.com>2015-10-23 16:28:13 +0200
commit27aa8cdd2431068606741a589383c02c149ea625 (patch)
treed423447ee374fbfa802f7ff354651fd34afe0fb2 /formats/packfile
parentfa058d42fa3bc53f39108a56dad67157169b2191 (diff)
downloadgo-git-27aa8cdd2431068606741a589383c02c149ea625.tar.gz
formats/packfile: cleanup
Diffstat (limited to 'formats/packfile')
-rw-r--r--formats/packfile/common.go39
-rw-r--r--formats/packfile/common_test.go9
-rw-r--r--formats/packfile/objects.go25
-rw-r--r--formats/packfile/objects_test.go28
-rw-r--r--formats/packfile/reader.go160
-rw-r--r--formats/packfile/reader_test.go4
6 files changed, 166 insertions, 99 deletions
diff --git a/formats/packfile/common.go b/formats/packfile/common.go
new file mode 100644
index 0000000..4a97dc7
--- /dev/null
+++ b/formats/packfile/common.go
@@ -0,0 +1,39 @@
+package packfile
+
+import (
+ "fmt"
+ "io"
+)
+
+type trackingReader struct {
+ r io.Reader
+ n int
+}
+
+func (t *trackingReader) Pos() int { return t.n }
+
+func (t *trackingReader) Read(p []byte) (n int, err error) {
+ n, err = t.r.Read(p)
+ if err != nil {
+ return 0, err
+ }
+
+ t.n += n
+
+ return n, err
+}
+
+func (t *trackingReader) ReadByte() (c byte, err error) {
+ var p [1]byte
+ n, err := t.r.Read(p[:])
+ if err != nil {
+ return 0, err
+ }
+
+ if n > 1 {
+ return 0, fmt.Errorf("read %d bytes, should have read just 1", n)
+ }
+
+ t.n += n // n is 1
+ return p[0], nil
+}
diff --git a/formats/packfile/common_test.go b/formats/packfile/common_test.go
new file mode 100644
index 0000000..104a5d2
--- /dev/null
+++ b/formats/packfile/common_test.go
@@ -0,0 +1,9 @@
+package packfile
+
+import (
+ "testing"
+
+ . "gopkg.in/check.v1"
+)
+
+func Test(t *testing.T) { TestingT(t) }
diff --git a/formats/packfile/objects.go b/formats/packfile/objects.go
index 4c7ee75..1077b5f 100644
--- a/formats/packfile/objects.go
+++ b/formats/packfile/objects.go
@@ -57,9 +57,6 @@ type Commit struct {
// ParseCommit transform a byte slice into a Commit struct
func ParseCommit(b []byte) (*Commit, error) {
- // b64 := base64.StdEncoding.EncodeToString(b)
- //fmt.Printf("%q\n", b64)
-
o := &Commit{hash: ComputeHash(CommitObject, b)}
lines := bytes.Split(b, []byte{'\n'})
@@ -106,25 +103,29 @@ func (o *Commit) Hash() Hash {
return o.hash
}
+// Tree is basically like a directory - it references a bunch of other trees
+// and/or blobs (i.e. files and sub-directories)
type Tree struct {
Entries []TreeEntry
hash Hash
}
+// TreeEntry represents a file
type TreeEntry struct {
Name string
Hash Hash
}
-func NewTree(body []byte) (*Tree, error) {
- o := &Tree{hash: ComputeHash(TreeObject, body)}
+// ParseTree transform a byte slice into a Tree struct
+func ParseTree(b []byte) (*Tree, error) {
+ o := &Tree{hash: ComputeHash(TreeObject, b)}
- if len(body) == 0 {
+ if len(b) == 0 {
return o, nil
}
for {
- split := bytes.SplitN(body, []byte{0}, 2)
+ split := bytes.SplitN(b, []byte{0}, 2)
split1 := bytes.SplitN(split[0], []byte{' '}, 2)
entry := TreeEntry{}
@@ -133,7 +134,7 @@ func NewTree(body []byte) (*Tree, error) {
o.Entries = append(o.Entries, entry)
- body = split[1][20:]
+ b = split[1][20:]
if len(split[1]) == 20 {
break
}
@@ -142,30 +143,36 @@ func NewTree(body []byte) (*Tree, error) {
return o, nil
}
+// Type returns the object type
func (o *Tree) Type() ObjectType {
return TreeObject
}
+// Hash returns the computed hash of the tree
func (o *Tree) Hash() Hash {
return o.hash
}
+// Blob is used to store file data - it is generally a file.
type Blob struct {
Len int
hash Hash
}
-func NewBlob(b []byte) (*Blob, error) {
+// ParseBlob transform a byte slice into a Blob struct
+func ParseBlob(b []byte) (*Blob, error) {
return &Blob{
Len: len(b),
hash: ComputeHash(BlobObject, b),
}, nil
}
+// Type returns the object type
func (o *Blob) Type() ObjectType {
return BlobObject
}
+// Hash returns the computed hash of the blob
func (o *Blob) Hash() Hash {
return o.hash
}
diff --git a/formats/packfile/objects_test.go b/formats/packfile/objects_test.go
index 70f4ae6..5952432 100644
--- a/formats/packfile/objects_test.go
+++ b/formats/packfile/objects_test.go
@@ -45,6 +45,34 @@ func (s *ObjectsSuite) TestCommitHash(c *C) {
c.Assert(commit.Hash().String(), Equals, "a5b8b09e2f8fcb0bb99d3ccb0958157b40890d69")
}
+var TreeFixture = "MTAwNjQ0IC5naXRpZ25vcmUAMoWKrTw4PtH/Cg+b3yMdVKAMnogxMDA2NDQgQ0hBTkdFTE9HANP/U+BWSp+H2OhLbijlBg5RcAiqMTAwNjQ0IExJQ0VOU0UAwZK9aiTqGrAdeGhuQXyL3Hw9GX8xMDA2NDQgYmluYXJ5LmpwZwDVwPSrgRiXyt8DrsNYrmDSH5HFDTQwMDAwIGdvAKOXcadlH5f69ccuCCJNhX/DUTPbNDAwMDAganNvbgBah35qkGonQ61uRdmcF5NkKq+O2jQwMDAwIHBocABYavVn0Ltedx5JvdlDT14Pt20l+jQwMDAwIHZlbmRvcgDPSqOziXT7fYHzZ8CDD3141lq4aw=="
+
+func (s *ObjectsSuite) TestParseTree(c *C) {
+ data, _ := base64.StdEncoding.DecodeString(TreeFixture)
+ tree, err := ParseTree(data)
+ c.Assert(err, IsNil)
+
+ c.Assert(tree.Entries, HasLen, 8)
+ c.Assert(tree.Entries[0].Name, Equals, ".gitignore")
+ c.Assert(tree.Entries[0].Hash.String(), Equals, "32858aad3c383ed1ff0a0f9bdf231d54a00c9e88")
+}
+
+func (s *ObjectsSuite) TestTreeHash(c *C) {
+ data, _ := base64.StdEncoding.DecodeString(TreeFixture)
+ tree, err := ParseTree(data)
+
+ c.Assert(err, IsNil)
+ c.Assert(tree.Hash().String(), Equals, "a8d315b2b1c615d43042c3a62402b8a54288cf5c")
+}
+
+func (s *ObjectsSuite) TestBlobHash(c *C) {
+ blob, err := ParseBlob([]byte{'F', 'O', 'O'})
+ c.Assert(err, IsNil)
+
+ c.Assert(blob.Len, Equals, 3)
+ c.Assert(blob.Hash().String(), Equals, "d96c7efbfec2814ae0301ad054dc8d9fc416c9b5")
+}
+
func (s *ObjectsSuite) TestParseSignature(c *C) {
cases := map[string]Signature{
`Foo Bar <foo@bar.com> 1257894000 +0100`: {
diff --git a/formats/packfile/reader.go b/formats/packfile/reader.go
index ccf4822..d5f40b9 100644
--- a/formats/packfile/reader.go
+++ b/formats/packfile/reader.go
@@ -9,49 +9,34 @@ import (
"github.com/klauspost/compress/zlib"
)
-const MaxObjectsLimit = 1000000
-
-var ErrMaxSize = fmt.Errorf("Max size exceeded for in-memory client")
-
-type TrackingByteReader struct {
- r io.Reader
- n, l int
-}
-
-func (t *TrackingByteReader) Pos() int { return t.n }
-
-func (t *TrackingByteReader) Read(p []byte) (n int, err error) {
- n, err = t.r.Read(p)
- if err != nil {
- return 0, err
- }
- t.n += n
- if t.n >= t.l {
- return n, ErrMaxSize
- }
- return n, err
-}
-
-func (t *TrackingByteReader) ReadByte() (c byte, err error) {
- var p [1]byte
- n, err := t.r.Read(p[:])
- if err != nil {
- return 0, err
- }
- if n > 1 {
- return 0, fmt.Errorf("read %d bytes, should have read just 1", n)
- }
- t.n += n // n is 1
- return p[0], nil
-}
+const (
+ DefaultMaxObjectsLimit = 1 << 20
+ DefaultMaxObjectSize = 1 << 32 // 4GB
+
+ rawCommit = 1
+ rawTree = 2
+ rawBlob = 3
+ rawTag = 4
+ rawOFSDelta = 6
+ rawREFDelta = 7
+)
type PackfileReader struct {
- r *TrackingByteReader
-
- objects map[Hash]packfileObject
- offsets map[int]Hash
- deltas []packfileDelta
-
+ // MaxObjectsLimit is the limit of objects to be load in the packfile, if
+ // a packfile excess this number an error is throw, the default value
+ // is defined by DefaultMaxObjectsLimit, usually the default limit is more
+ // than enough to work with any repository, working extremly big repositories
+ // where the number of object is bigger the memory can be exhausted.
+ MaxObjectsLimit int
+
+ // MaxObjectSize is the maximum size in bytes, reading objects with a bigger
+ // size cause a error. The default value is defined by DefaultMaxObjectSize
+ MaxObjectSize int
+
+ r *trackingReader
+ objects map[Hash]packfileObject
+ offsets map[int]Hash
+ deltas []packfileDelta
contentCallback ContentCallback
}
@@ -65,17 +50,17 @@ type packfileDelta struct {
delta []byte
}
-func NewPackfileReader(r io.Reader, l int, fn ContentCallback) (*PackfileReader, error) {
+func NewPackfileReader(r io.Reader, fn ContentCallback) (*PackfileReader, error) {
return &PackfileReader{
- r: &TrackingByteReader{r: r, n: 0, l: l},
+ MaxObjectsLimit: DefaultMaxObjectsLimit,
+ MaxObjectSize: DefaultMaxObjectSize,
+ r: &trackingReader{r: r},
objects: make(map[Hash]packfileObject, 0),
offsets: make(map[int]Hash, 0),
contentCallback: fn,
}, nil
}
-func (pr *PackfileReader) Pos() int { return pr.r.Pos() }
-
func (pr *PackfileReader) Read() (*Packfile, error) {
packfile := NewPackfile()
@@ -100,8 +85,9 @@ func (pr *PackfileReader) Read() (*Packfile, error) {
packfile.Version = uint32(ver)
packfile.ObjectCount = int(count)
- if packfile.ObjectCount > MaxObjectsLimit {
- return nil, NewError("too many objects (%d)", packfile.ObjectCount)
+ if packfile.ObjectCount > pr.MaxObjectsLimit {
+ return nil, NewError("too many objects %d, limit is %d",
+ packfile.ObjectCount, pr.MaxObjectsLimit)
}
if err := pr.readObjects(packfile); err != nil {
@@ -159,17 +145,17 @@ func (pr *PackfileReader) readObjects(packfile *Packfile) error {
}
func (pr *PackfileReader) readObject(packfile *Packfile) (*objectReader, error) {
- o, err := newObjectReader(pr, packfile)
+ o, err := newObjectReader(pr, packfile, pr.MaxObjectSize)
if err != nil {
return nil, err
}
switch o.typ {
- case OBJ_REF_DELTA:
+ case rawREFDelta:
err = o.readREFDelta()
- case OBJ_OFS_DELTA:
+ case rawOFSDelta:
err = o.readOFSDelta()
- case OBJ_COMMIT, OBJ_TREE, OBJ_BLOB, OBJ_TAG:
+ case rawCommit, rawTree, rawBlob, rawTag:
err = o.readObject()
default:
err = NewError("Invalid git object tag %q", o.typ)
@@ -182,29 +168,21 @@ func (pr *PackfileReader) readObject(packfile *Packfile) (*objectReader, error)
return o, err
}
-const (
- OBJ_COMMIT = 1
- OBJ_TREE = 2
- OBJ_BLOB = 3
- OBJ_TAG = 4
- OBJ_OFS_DELTA = 6
- OBJ_REF_DELTA = 7
-)
-
-const SIZE_LIMIT uint64 = 1 << 32 // 4GB
+func (pr *PackfileReader) Pos() int { return pr.r.Pos() }
type objectReader struct {
- pr *PackfileReader
- pf *Packfile
+ pr *PackfileReader
+ pf *Packfile
+ maxSize uint64
+
hash Hash
steps int
-
- typ int8
- size uint64
+ typ int8
+ size uint64
}
-func newObjectReader(pr *PackfileReader, pf *Packfile) (*objectReader, error) {
- o := &objectReader{pr: pr, pf: pf}
+func newObjectReader(pr *PackfileReader, pf *Packfile, maxSize int) (*objectReader, error) {
+ o := &objectReader{pr: pr, pf: pf, maxSize: uint64(maxSize)}
var buf [1]byte
if _, err := o.Read(buf[:]); err != nil {
@@ -248,6 +226,7 @@ func (o *objectReader) readREFDelta() error {
if patched == nil {
return NewError("error while patching %x", ref)
}
+
o.typ = referenced.typ
err = o.addObject(patched)
if err != nil {
@@ -265,13 +244,15 @@ func decodeOffset(src io.ByteReader, steps int) (int, error) {
}
var offset = int(b & 0x7f)
for (b & 0x80) != 0 {
- offset += 1 // WHY?
+ offset++ // WHY?
b, err = src.ReadByte()
if err != nil {
return 0, err
}
+
offset = (offset << 7) + int(b&0x7f)
}
+
// offset needs to be aware of the bytes we read for `o.typ` and `o.size`
offset += steps
return -offset, nil
@@ -295,16 +276,17 @@ func (o *objectReader) readOFSDelta() error {
referenced, ok := o.pr.objects[ref]
if !ok {
return NewError("can't find a pack entry at %d", pos+offset)
- } else {
- patched := PatchDelta(referenced.bytes, buf)
- if patched == nil {
- return NewError("error while patching %q", ref)
- }
- o.typ = referenced.typ
- err = o.addObject(patched)
- if err != nil {
- return err
- }
+ }
+
+ patched := PatchDelta(referenced.bytes, buf)
+ if patched == nil {
+ return NewError("error while patching %q", ref)
+ }
+
+ o.typ = referenced.typ
+ err = o.addObject(patched)
+ if err != nil {
+ return err
}
return nil
@@ -323,22 +305,22 @@ func (o *objectReader) addObject(bytes []byte) error {
var hash Hash
switch o.typ {
- case OBJ_COMMIT:
+ case rawCommit:
c, err := ParseCommit(bytes)
if err != nil {
return err
}
o.pf.Commits[c.Hash()] = c
hash = c.Hash()
- case OBJ_TREE:
- c, err := NewTree(bytes)
+ case rawTree:
+ c, err := ParseTree(bytes)
if err != nil {
return err
}
o.pf.Trees[c.Hash()] = c
hash = c.Hash()
- case OBJ_BLOB:
- c, err := NewBlob(bytes)
+ case rawBlob:
+ c, err := ParseBlob(bytes)
if err != nil {
return err
}
@@ -361,14 +343,16 @@ func (o *objectReader) inflate() ([]byte, error) {
if err != nil {
if err == zlib.ErrHeader {
return nil, zlib.ErrHeader
- } else {
- return nil, NewError("error opening packfile's object zlib: %v", err)
}
+
+ return nil, NewError("error opening packfile's object zlib: %v", err)
}
+
defer zr.Close()
- if o.size > SIZE_LIMIT {
- return nil, NewError("the object size exceeed the allowed limit: %d", o.size)
+ if o.size > o.maxSize {
+ return nil, NewError("the object size %q exceeed the allowed limit: %q",
+ o.size, o.maxSize)
}
var buf bytes.Buffer
diff --git a/formats/packfile/reader_test.go b/formats/packfile/reader_test.go
index 04f2948..e52cbc3 100644
--- a/formats/packfile/reader_test.go
+++ b/formats/packfile/reader_test.go
@@ -14,7 +14,7 @@ func TestReadPackfile(t *testing.T) {
data, _ := base64.StdEncoding.DecodeString(packFileWithEmptyObjects)
d := bytes.NewReader(data)
- r, err := NewPackfileReader(d, 8<<20, nil)
+ r, err := NewPackfileReader(d, nil)
assert.Nil(t, err)
p, err := r.Read()
@@ -26,7 +26,7 @@ func TestReadPackfile(t *testing.T) {
}
func TestReadPackfileInvalid(t *testing.T) {
- r, err := NewPackfileReader(bytes.NewReader([]byte("dasdsadasas")), 8<<20, nil)
+ r, err := NewPackfileReader(bytes.NewReader([]byte("dasdsadasas")), nil)
assert.Nil(t, err)
_, err = r.Read()