diff options
author | Máximo Cuadros <mcuadros@gmail.com> | 2015-10-23 16:27:08 +0200 |
---|---|---|
committer | Máximo Cuadros <mcuadros@gmail.com> | 2015-10-23 16:28:13 +0200 |
commit | 27aa8cdd2431068606741a589383c02c149ea625 (patch) | |
tree | d423447ee374fbfa802f7ff354651fd34afe0fb2 | |
parent | fa058d42fa3bc53f39108a56dad67157169b2191 (diff) | |
download | go-git-27aa8cdd2431068606741a589383c02c149ea625.tar.gz |
formats/packfile: cleanup
-rw-r--r-- | .travis.yml | 2 | ||||
-rw-r--r-- | formats/packfile/common.go | 39 | ||||
-rw-r--r-- | formats/packfile/common_test.go | 9 | ||||
-rw-r--r-- | formats/packfile/objects.go | 25 | ||||
-rw-r--r-- | formats/packfile/objects_test.go | 28 | ||||
-rw-r--r-- | formats/packfile/reader.go | 160 | ||||
-rw-r--r-- | formats/packfile/reader_test.go | 4 | ||||
-rw-r--r-- | remote_test.go | 2 |
8 files changed, 168 insertions, 101 deletions
diff --git a/.travis.yml b/.travis.yml index 235ad5d..8bd2ecb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,7 +20,7 @@ script: - tail -n +2 tmp.out >> coverage.out - go test -v gopkg.in/src-d/go-git.v2/clients/http -covermode=count -coverprofile=tmp.out - tail -n +2 tmp.out >> coverage.out - - go test -v gopkg.in/src-d/go-git.v2/packfile -covermode=count -coverprofile=tmp.out + - go test -v gopkg.in/src-d/go-git.v2/formats/packfile -covermode=count -coverprofile=tmp.out - tail -n +2 tmp.out >> coverage.out - go test -v gopkg.in/src-d/go-git.v2/formats/pktline -covermode=count -coverprofile=tmp.out - tail -n +2 tmp.out >> coverage.out diff --git a/formats/packfile/common.go b/formats/packfile/common.go new file mode 100644 index 0000000..4a97dc7 --- /dev/null +++ b/formats/packfile/common.go @@ -0,0 +1,39 @@ +package packfile + +import ( + "fmt" + "io" +) + +type trackingReader struct { + r io.Reader + n int +} + +func (t *trackingReader) Pos() int { return t.n } + +func (t *trackingReader) Read(p []byte) (n int, err error) { + n, err = t.r.Read(p) + if err != nil { + return 0, err + } + + t.n += n + + return n, err +} + +func (t *trackingReader) ReadByte() (c byte, err error) { + var p [1]byte + n, err := t.r.Read(p[:]) + if err != nil { + return 0, err + } + + if n > 1 { + return 0, fmt.Errorf("read %d bytes, should have read just 1", n) + } + + t.n += n // n is 1 + return p[0], nil +} diff --git a/formats/packfile/common_test.go b/formats/packfile/common_test.go new file mode 100644 index 0000000..104a5d2 --- /dev/null +++ b/formats/packfile/common_test.go @@ -0,0 +1,9 @@ +package packfile + +import ( + "testing" + + . "gopkg.in/check.v1" +) + +func Test(t *testing.T) { TestingT(t) } diff --git a/formats/packfile/objects.go b/formats/packfile/objects.go index 4c7ee75..1077b5f 100644 --- a/formats/packfile/objects.go +++ b/formats/packfile/objects.go @@ -57,9 +57,6 @@ type Commit struct { // ParseCommit transform a byte slice into a Commit struct func ParseCommit(b []byte) (*Commit, error) { - // b64 := base64.StdEncoding.EncodeToString(b) - //fmt.Printf("%q\n", b64) - o := &Commit{hash: ComputeHash(CommitObject, b)} lines := bytes.Split(b, []byte{'\n'}) @@ -106,25 +103,29 @@ func (o *Commit) Hash() Hash { return o.hash } +// Tree is basically like a directory - it references a bunch of other trees +// and/or blobs (i.e. files and sub-directories) type Tree struct { Entries []TreeEntry hash Hash } +// TreeEntry represents a file type TreeEntry struct { Name string Hash Hash } -func NewTree(body []byte) (*Tree, error) { - o := &Tree{hash: ComputeHash(TreeObject, body)} +// ParseTree transform a byte slice into a Tree struct +func ParseTree(b []byte) (*Tree, error) { + o := &Tree{hash: ComputeHash(TreeObject, b)} - if len(body) == 0 { + if len(b) == 0 { return o, nil } for { - split := bytes.SplitN(body, []byte{0}, 2) + split := bytes.SplitN(b, []byte{0}, 2) split1 := bytes.SplitN(split[0], []byte{' '}, 2) entry := TreeEntry{} @@ -133,7 +134,7 @@ func NewTree(body []byte) (*Tree, error) { o.Entries = append(o.Entries, entry) - body = split[1][20:] + b = split[1][20:] if len(split[1]) == 20 { break } @@ -142,30 +143,36 @@ func NewTree(body []byte) (*Tree, error) { return o, nil } +// Type returns the object type func (o *Tree) Type() ObjectType { return TreeObject } +// Hash returns the computed hash of the tree func (o *Tree) Hash() Hash { return o.hash } +// Blob is used to store file data - it is generally a file. type Blob struct { Len int hash Hash } -func NewBlob(b []byte) (*Blob, error) { +// ParseBlob transform a byte slice into a Blob struct +func ParseBlob(b []byte) (*Blob, error) { return &Blob{ Len: len(b), hash: ComputeHash(BlobObject, b), }, nil } +// Type returns the object type func (o *Blob) Type() ObjectType { return BlobObject } +// Hash returns the computed hash of the blob func (o *Blob) Hash() Hash { return o.hash } diff --git a/formats/packfile/objects_test.go b/formats/packfile/objects_test.go index 70f4ae6..5952432 100644 --- a/formats/packfile/objects_test.go +++ b/formats/packfile/objects_test.go @@ -45,6 +45,34 @@ func (s *ObjectsSuite) TestCommitHash(c *C) { c.Assert(commit.Hash().String(), Equals, "a5b8b09e2f8fcb0bb99d3ccb0958157b40890d69") } +var TreeFixture = "MTAwNjQ0IC5naXRpZ25vcmUAMoWKrTw4PtH/Cg+b3yMdVKAMnogxMDA2NDQgQ0hBTkdFTE9HANP/U+BWSp+H2OhLbijlBg5RcAiqMTAwNjQ0IExJQ0VOU0UAwZK9aiTqGrAdeGhuQXyL3Hw9GX8xMDA2NDQgYmluYXJ5LmpwZwDVwPSrgRiXyt8DrsNYrmDSH5HFDTQwMDAwIGdvAKOXcadlH5f69ccuCCJNhX/DUTPbNDAwMDAganNvbgBah35qkGonQ61uRdmcF5NkKq+O2jQwMDAwIHBocABYavVn0Ltedx5JvdlDT14Pt20l+jQwMDAwIHZlbmRvcgDPSqOziXT7fYHzZ8CDD3141lq4aw==" + +func (s *ObjectsSuite) TestParseTree(c *C) { + data, _ := base64.StdEncoding.DecodeString(TreeFixture) + tree, err := ParseTree(data) + c.Assert(err, IsNil) + + c.Assert(tree.Entries, HasLen, 8) + c.Assert(tree.Entries[0].Name, Equals, ".gitignore") + c.Assert(tree.Entries[0].Hash.String(), Equals, "32858aad3c383ed1ff0a0f9bdf231d54a00c9e88") +} + +func (s *ObjectsSuite) TestTreeHash(c *C) { + data, _ := base64.StdEncoding.DecodeString(TreeFixture) + tree, err := ParseTree(data) + + c.Assert(err, IsNil) + c.Assert(tree.Hash().String(), Equals, "a8d315b2b1c615d43042c3a62402b8a54288cf5c") +} + +func (s *ObjectsSuite) TestBlobHash(c *C) { + blob, err := ParseBlob([]byte{'F', 'O', 'O'}) + c.Assert(err, IsNil) + + c.Assert(blob.Len, Equals, 3) + c.Assert(blob.Hash().String(), Equals, "d96c7efbfec2814ae0301ad054dc8d9fc416c9b5") +} + func (s *ObjectsSuite) TestParseSignature(c *C) { cases := map[string]Signature{ `Foo Bar <foo@bar.com> 1257894000 +0100`: { diff --git a/formats/packfile/reader.go b/formats/packfile/reader.go index ccf4822..d5f40b9 100644 --- a/formats/packfile/reader.go +++ b/formats/packfile/reader.go @@ -9,49 +9,34 @@ import ( "github.com/klauspost/compress/zlib" ) -const MaxObjectsLimit = 1000000 - -var ErrMaxSize = fmt.Errorf("Max size exceeded for in-memory client") - -type TrackingByteReader struct { - r io.Reader - n, l int -} - -func (t *TrackingByteReader) Pos() int { return t.n } - -func (t *TrackingByteReader) Read(p []byte) (n int, err error) { - n, err = t.r.Read(p) - if err != nil { - return 0, err - } - t.n += n - if t.n >= t.l { - return n, ErrMaxSize - } - return n, err -} - -func (t *TrackingByteReader) ReadByte() (c byte, err error) { - var p [1]byte - n, err := t.r.Read(p[:]) - if err != nil { - return 0, err - } - if n > 1 { - return 0, fmt.Errorf("read %d bytes, should have read just 1", n) - } - t.n += n // n is 1 - return p[0], nil -} +const ( + DefaultMaxObjectsLimit = 1 << 20 + DefaultMaxObjectSize = 1 << 32 // 4GB + + rawCommit = 1 + rawTree = 2 + rawBlob = 3 + rawTag = 4 + rawOFSDelta = 6 + rawREFDelta = 7 +) type PackfileReader struct { - r *TrackingByteReader - - objects map[Hash]packfileObject - offsets map[int]Hash - deltas []packfileDelta - + // MaxObjectsLimit is the limit of objects to be load in the packfile, if + // a packfile excess this number an error is throw, the default value + // is defined by DefaultMaxObjectsLimit, usually the default limit is more + // than enough to work with any repository, working extremly big repositories + // where the number of object is bigger the memory can be exhausted. + MaxObjectsLimit int + + // MaxObjectSize is the maximum size in bytes, reading objects with a bigger + // size cause a error. The default value is defined by DefaultMaxObjectSize + MaxObjectSize int + + r *trackingReader + objects map[Hash]packfileObject + offsets map[int]Hash + deltas []packfileDelta contentCallback ContentCallback } @@ -65,17 +50,17 @@ type packfileDelta struct { delta []byte } -func NewPackfileReader(r io.Reader, l int, fn ContentCallback) (*PackfileReader, error) { +func NewPackfileReader(r io.Reader, fn ContentCallback) (*PackfileReader, error) { return &PackfileReader{ - r: &TrackingByteReader{r: r, n: 0, l: l}, + MaxObjectsLimit: DefaultMaxObjectsLimit, + MaxObjectSize: DefaultMaxObjectSize, + r: &trackingReader{r: r}, objects: make(map[Hash]packfileObject, 0), offsets: make(map[int]Hash, 0), contentCallback: fn, }, nil } -func (pr *PackfileReader) Pos() int { return pr.r.Pos() } - func (pr *PackfileReader) Read() (*Packfile, error) { packfile := NewPackfile() @@ -100,8 +85,9 @@ func (pr *PackfileReader) Read() (*Packfile, error) { packfile.Version = uint32(ver) packfile.ObjectCount = int(count) - if packfile.ObjectCount > MaxObjectsLimit { - return nil, NewError("too many objects (%d)", packfile.ObjectCount) + if packfile.ObjectCount > pr.MaxObjectsLimit { + return nil, NewError("too many objects %d, limit is %d", + packfile.ObjectCount, pr.MaxObjectsLimit) } if err := pr.readObjects(packfile); err != nil { @@ -159,17 +145,17 @@ func (pr *PackfileReader) readObjects(packfile *Packfile) error { } func (pr *PackfileReader) readObject(packfile *Packfile) (*objectReader, error) { - o, err := newObjectReader(pr, packfile) + o, err := newObjectReader(pr, packfile, pr.MaxObjectSize) if err != nil { return nil, err } switch o.typ { - case OBJ_REF_DELTA: + case rawREFDelta: err = o.readREFDelta() - case OBJ_OFS_DELTA: + case rawOFSDelta: err = o.readOFSDelta() - case OBJ_COMMIT, OBJ_TREE, OBJ_BLOB, OBJ_TAG: + case rawCommit, rawTree, rawBlob, rawTag: err = o.readObject() default: err = NewError("Invalid git object tag %q", o.typ) @@ -182,29 +168,21 @@ func (pr *PackfileReader) readObject(packfile *Packfile) (*objectReader, error) return o, err } -const ( - OBJ_COMMIT = 1 - OBJ_TREE = 2 - OBJ_BLOB = 3 - OBJ_TAG = 4 - OBJ_OFS_DELTA = 6 - OBJ_REF_DELTA = 7 -) - -const SIZE_LIMIT uint64 = 1 << 32 // 4GB +func (pr *PackfileReader) Pos() int { return pr.r.Pos() } type objectReader struct { - pr *PackfileReader - pf *Packfile + pr *PackfileReader + pf *Packfile + maxSize uint64 + hash Hash steps int - - typ int8 - size uint64 + typ int8 + size uint64 } -func newObjectReader(pr *PackfileReader, pf *Packfile) (*objectReader, error) { - o := &objectReader{pr: pr, pf: pf} +func newObjectReader(pr *PackfileReader, pf *Packfile, maxSize int) (*objectReader, error) { + o := &objectReader{pr: pr, pf: pf, maxSize: uint64(maxSize)} var buf [1]byte if _, err := o.Read(buf[:]); err != nil { @@ -248,6 +226,7 @@ func (o *objectReader) readREFDelta() error { if patched == nil { return NewError("error while patching %x", ref) } + o.typ = referenced.typ err = o.addObject(patched) if err != nil { @@ -265,13 +244,15 @@ func decodeOffset(src io.ByteReader, steps int) (int, error) { } var offset = int(b & 0x7f) for (b & 0x80) != 0 { - offset += 1 // WHY? + offset++ // WHY? b, err = src.ReadByte() if err != nil { return 0, err } + offset = (offset << 7) + int(b&0x7f) } + // offset needs to be aware of the bytes we read for `o.typ` and `o.size` offset += steps return -offset, nil @@ -295,16 +276,17 @@ func (o *objectReader) readOFSDelta() error { referenced, ok := o.pr.objects[ref] if !ok { return NewError("can't find a pack entry at %d", pos+offset) - } else { - patched := PatchDelta(referenced.bytes, buf) - if patched == nil { - return NewError("error while patching %q", ref) - } - o.typ = referenced.typ - err = o.addObject(patched) - if err != nil { - return err - } + } + + patched := PatchDelta(referenced.bytes, buf) + if patched == nil { + return NewError("error while patching %q", ref) + } + + o.typ = referenced.typ + err = o.addObject(patched) + if err != nil { + return err } return nil @@ -323,22 +305,22 @@ func (o *objectReader) addObject(bytes []byte) error { var hash Hash switch o.typ { - case OBJ_COMMIT: + case rawCommit: c, err := ParseCommit(bytes) if err != nil { return err } o.pf.Commits[c.Hash()] = c hash = c.Hash() - case OBJ_TREE: - c, err := NewTree(bytes) + case rawTree: + c, err := ParseTree(bytes) if err != nil { return err } o.pf.Trees[c.Hash()] = c hash = c.Hash() - case OBJ_BLOB: - c, err := NewBlob(bytes) + case rawBlob: + c, err := ParseBlob(bytes) if err != nil { return err } @@ -361,14 +343,16 @@ func (o *objectReader) inflate() ([]byte, error) { if err != nil { if err == zlib.ErrHeader { return nil, zlib.ErrHeader - } else { - return nil, NewError("error opening packfile's object zlib: %v", err) } + + return nil, NewError("error opening packfile's object zlib: %v", err) } + defer zr.Close() - if o.size > SIZE_LIMIT { - return nil, NewError("the object size exceeed the allowed limit: %d", o.size) + if o.size > o.maxSize { + return nil, NewError("the object size %q exceeed the allowed limit: %q", + o.size, o.maxSize) } var buf bytes.Buffer diff --git a/formats/packfile/reader_test.go b/formats/packfile/reader_test.go index 04f2948..e52cbc3 100644 --- a/formats/packfile/reader_test.go +++ b/formats/packfile/reader_test.go @@ -14,7 +14,7 @@ func TestReadPackfile(t *testing.T) { data, _ := base64.StdEncoding.DecodeString(packFileWithEmptyObjects) d := bytes.NewReader(data) - r, err := NewPackfileReader(d, 8<<20, nil) + r, err := NewPackfileReader(d, nil) assert.Nil(t, err) p, err := r.Read() @@ -26,7 +26,7 @@ func TestReadPackfile(t *testing.T) { } func TestReadPackfileInvalid(t *testing.T) { - r, err := NewPackfileReader(bytes.NewReader([]byte("dasdsadasas")), 8<<20, nil) + r, err := NewPackfileReader(bytes.NewReader([]byte("dasdsadasas")), nil) assert.Nil(t, err) _, err = r.Read() diff --git a/remote_test.go b/remote_test.go index a3c602d..881f629 100644 --- a/remote_test.go +++ b/remote_test.go @@ -40,7 +40,7 @@ func (s *SuiteRemote) TestFetchDefaultBranch(c *C) { reader, err := r.FetchDefaultBranch() c.Assert(err, IsNil) - pr, err := packfile.NewPackfileReader(reader, 8<<20, nil) + pr, err := packfile.NewPackfileReader(reader, nil) c.Assert(err, IsNil) pf, err := pr.Read() |