diff options
author | Máximo Cuadros <mcuadros@gmail.com> | 2015-10-25 20:30:36 +0100 |
---|---|---|
committer | Máximo Cuadros <mcuadros@gmail.com> | 2015-10-25 20:30:36 +0100 |
commit | 9a44cd8ccff143a112436c38bfe5581e74b68f07 (patch) | |
tree | f4d2f38cc61647bf159a7c870913e6f6b60828b2 | |
parent | be69a505926451bf10450ac68d40265a6f43e150 (diff) | |
download | go-git-9a44cd8ccff143a112436c38bfe5581e74b68f07.tar.gz |
formats/packfile: new reader API
-rw-r--r-- | common/hash.go | 35 | ||||
-rw-r--r-- | common/hash_test.go | 27 | ||||
-rw-r--r-- | common/object.go | 95 | ||||
-rw-r--r-- | formats/packfile/common.go | 11 | ||||
-rw-r--r-- | formats/packfile/common_test.go | 9 | ||||
-rw-r--r-- | formats/packfile/delta.go | 13 | ||||
-rw-r--r-- | formats/packfile/doc.go | 3 | ||||
-rw-r--r-- | formats/packfile/packfile.go | 82 | ||||
-rw-r--r-- | formats/packfile/reader.go | 256 | ||||
-rw-r--r-- | formats/packfile/reader_test.go | 58 | ||||
-rw-r--r-- | objects.go (renamed from formats/packfile/objects.go) | 105 | ||||
-rw-r--r-- | objects_test.go (renamed from formats/packfile/objects_test.go) | 16 | ||||
-rw-r--r-- | remote_test.go | 9 |
13 files changed, 347 insertions, 372 deletions
diff --git a/common/hash.go b/common/hash.go new file mode 100644 index 0000000..83844c7 --- /dev/null +++ b/common/hash.go @@ -0,0 +1,35 @@ +package common + +import ( + "crypto/sha1" + "encoding/hex" + "strconv" +) + +// Hash SHA1 hased content +type Hash [20]byte + +// ComputeHash compute the hash for a given ObjectType and content +func ComputeHash(t ObjectType, content []byte) Hash { + h := t.Bytes() + h = append(h, ' ') + h = strconv.AppendInt(h, int64(len(content)), 10) + h = append(h, 0) + h = append(h, content...) + + return Hash(sha1.Sum(h)) +} + +// NewHash return a new Hash from a hexadecimal hash representation +func NewHash(s string) Hash { + b, _ := hex.DecodeString(s) + + var h Hash + copy(h[:], b) + + return h +} + +func (h Hash) String() string { + return hex.EncodeToString(h[:]) +} diff --git a/common/hash_test.go b/common/hash_test.go new file mode 100644 index 0000000..cee0c0f --- /dev/null +++ b/common/hash_test.go @@ -0,0 +1,27 @@ +package common + +import ( + "testing" + + . "gopkg.in/check.v1" +) + +func Test(t *testing.T) { TestingT(t) } + +type HashSuite struct{} + +var _ = Suite(&HashSuite{}) + +func (s *HashSuite) TestComputeHash(c *C) { + hash := ComputeHash(BlobObject, []byte("")) + c.Assert(hash.String(), Equals, "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") + + hash = ComputeHash(BlobObject, []byte("Hello, World!\n")) + c.Assert(hash.String(), Equals, "8ab686eafeb1f44702738c8b0f24f2567c36da6d") +} + +func (s *HashSuite) TestNewHash(c *C) { + hash := ComputeHash(BlobObject, []byte("Hello, World!\n")) + + c.Assert(hash, Equals, NewHash(hash.String())) +} diff --git a/common/object.go b/common/object.go new file mode 100644 index 0000000..60c44da --- /dev/null +++ b/common/object.go @@ -0,0 +1,95 @@ +package common + +import ( + "bytes" + "io" +) + +// Object is a generic representation of any git object +type Object interface { + Type() ObjectType + SetType(ObjectType) + Size() int64 + SetSize(int64) + Hash() Hash + Reader() io.Reader + Writer() io.Writer +} + +// ObjectStorage generic storage of objects +type ObjectStorage interface { + New() Object + Set(Object) Hash + Get(Hash) (Object, bool) +} + +// ObjectType internal object type's +type ObjectType int8 + +const ( + CommitObject ObjectType = 1 + TreeObject ObjectType = 2 + BlobObject ObjectType = 3 + TagObject ObjectType = 4 + OFSDeltaObject ObjectType = 6 + REFDeltaObject ObjectType = 7 +) + +func (t ObjectType) String() string { + switch t { + case CommitObject: + return "commit" + case TreeObject: + return "tree" + case BlobObject: + return "blob" + default: + return "-" + } +} + +func (t ObjectType) Bytes() []byte { + return []byte(t.String()) +} + +type RAWObject struct { + b []byte + t ObjectType + s int64 +} + +func (o *RAWObject) Type() ObjectType { return o.t } +func (o *RAWObject) SetType(t ObjectType) { o.t = t } +func (o *RAWObject) Size() int64 { return o.s } +func (o *RAWObject) SetSize(s int64) { o.s = s } +func (o *RAWObject) Reader() io.Reader { return bytes.NewBuffer(o.b) } +func (o *RAWObject) Hash() Hash { return ComputeHash(o.t, o.b) } +func (o *RAWObject) Writer() io.Writer { return o } +func (o *RAWObject) Write(p []byte) (n int, err error) { + o.b = append(o.b, p...) + return len(p), nil +} + +type RAWObjectStorage struct { + Objects map[Hash]*RAWObject +} + +func NewRAWObjectStorage() *RAWObjectStorage { + return &RAWObjectStorage{make(map[Hash]*RAWObject, 0)} +} + +func (o *RAWObjectStorage) New() Object { + return &RAWObject{} +} + +func (o *RAWObjectStorage) Set(obj Object) Hash { + h := obj.Hash() + o.Objects[h] = obj.(*RAWObject) + + return h +} + +func (o *RAWObjectStorage) Get(h Hash) (Object, bool) { + obj, ok := o.Objects[h] + return obj, ok +} diff --git a/formats/packfile/common.go b/formats/packfile/common.go index 4a97dc7..06c63d4 100644 --- a/formats/packfile/common.go +++ b/formats/packfile/common.go @@ -6,20 +6,17 @@ import ( ) type trackingReader struct { - r io.Reader - n int + r io.Reader + position int64 } -func (t *trackingReader) Pos() int { return t.n } - func (t *trackingReader) Read(p []byte) (n int, err error) { n, err = t.r.Read(p) if err != nil { return 0, err } - t.n += n - + t.position += int64(n) return n, err } @@ -34,6 +31,6 @@ func (t *trackingReader) ReadByte() (c byte, err error) { return 0, fmt.Errorf("read %d bytes, should have read just 1", n) } - t.n += n // n is 1 + t.position++ return p[0], nil } diff --git a/formats/packfile/common_test.go b/formats/packfile/common_test.go deleted file mode 100644 index 104a5d2..0000000 --- a/formats/packfile/common_test.go +++ /dev/null @@ -1,9 +0,0 @@ -package packfile - -import ( - "testing" - - . "gopkg.in/check.v1" -) - -func Test(t *testing.T) { TestingT(t) } diff --git a/formats/packfile/delta.go b/formats/packfile/delta.go index 30703eb..571ccf8 100644 --- a/formats/packfile/delta.go +++ b/formats/packfile/delta.go @@ -2,7 +2,7 @@ package packfile import "io" -const delta_size_min = 4 +const deltaSizeMin = 4 func deltaHeaderSize(b []byte) (uint, []byte) { var size, j uint @@ -18,8 +18,8 @@ func deltaHeaderSize(b []byte) (uint, []byte) { return size, b[j:] } -func PatchDelta(src, delta []byte) []byte { - if len(delta) < delta_size_min { +func patchDelta(src, delta []byte) []byte { + if len(delta) < deltaSizeMin { return nil } size, delta := deltaHeaderSize(delta) @@ -94,12 +94,13 @@ func PatchDelta(src, delta []byte) []byte { return dest } -func decodeOffset(src io.ByteReader, steps int) (int, error) { +func decodeOffset(src io.ByteReader, steps int64) (int64, error) { b, err := src.ReadByte() if err != nil { return 0, err } - var offset = int(b & 0x7f) + + var offset = int64(b & 0x7f) for (b & 0x80) != 0 { offset++ // WHY? b, err = src.ReadByte() @@ -107,7 +108,7 @@ func decodeOffset(src io.ByteReader, steps int) (int, error) { return 0, err } - offset = (offset << 7) + int(b&0x7f) + offset = (offset << 7) + int64(b&0x7f) } // offset needs to be aware of the bytes we read for `o.typ` and `o.size` diff --git a/formats/packfile/doc.go b/formats/packfile/doc.go index 1fc28da..cb3f542 100644 --- a/formats/packfile/doc.go +++ b/formats/packfile/doc.go @@ -1,8 +1,5 @@ package packfile -// Code from: -// https://github.com/gitchain/gitchain/tree/master/git @ 4c2fabdf9 -// // GIT pack format // =============== // diff --git a/formats/packfile/packfile.go b/formats/packfile/packfile.go deleted file mode 100644 index d70f396..0000000 --- a/formats/packfile/packfile.go +++ /dev/null @@ -1,82 +0,0 @@ -package packfile - -import "fmt" - -type Packfile struct { - Version uint32 - Size int64 - ObjectCount int - Checksum []byte - Commits map[Hash]*Commit - Trees map[Hash]*Tree - Blobs map[Hash]*Blob -} - -func NewPackfile() *Packfile { - return &Packfile{ - Commits: make(map[Hash]*Commit, 0), - Trees: make(map[Hash]*Tree, 0), - Blobs: make(map[Hash]*Blob, 0), - } -} - -type BlobEntry struct { - path string - *Blob -} - -type SubtreeEntry struct { - path string - *Tree - TreeCh -} - -type treeEntry interface { - isTreeEntry() - Path() string -} - -func (b BlobEntry) isTreeEntry() {} -func (b BlobEntry) Path() string { return b.path } -func (b SubtreeEntry) isTreeEntry() {} -func (b SubtreeEntry) Path() string { return b.path } - -type TreeCh <-chan treeEntry - -func (p *Packfile) WalkCommit(commitHash Hash) (TreeCh, error) { - commit, ok := p.Commits[commitHash] - if !ok { - return nil, fmt.Errorf("Unable to find %q commit", commitHash) - } - - return p.WalkTree(p.Trees[commit.Tree]), nil -} - -func (p *Packfile) WalkTree(tree *Tree) TreeCh { - return p.walkTree(tree, "") -} - -func (p *Packfile) walkTree(tree *Tree, pathPrefix string) TreeCh { - ch := make(chan treeEntry) - - if tree == nil { - close(ch) - return ch - } - - go func() { - defer func() { - close(ch) - }() - for _, e := range tree.Entries { - path := pathPrefix + e.Name - if blob, ok := p.Blobs[e.Hash]; ok { - ch <- BlobEntry{path, blob} - } else if subtree, ok := p.Trees[e.Hash]; ok { - ch <- SubtreeEntry{path, subtree, p.walkTree(subtree, path+"/")} - } - } - }() - - return ch -} diff --git a/formats/packfile/reader.go b/formats/packfile/reader.go index c355e12..6ccf384 100644 --- a/formats/packfile/reader.go +++ b/formats/packfile/reader.go @@ -5,15 +5,29 @@ import ( "encoding/binary" "fmt" "io" + "io/ioutil" + + "gopkg.in/src-d/go-git.v2/common" "github.com/klauspost/compress/zlib" ) type Format int +var ( + EmptyRepositoryErr = newError("empty repository") + UnsupportedVersionErr = newError("unsupported packfile version") + MaxObjectsLimitReachedErr = newError("max. objects limit reached") + MalformedPackfileErr = newError("malformed pack file, does not start with 'PACK'") + InvalidObjectErr = newError("invalid git object") + PatchingErr = newError("patching error") + PackEntryNotFoundErr = newError("can't find a pack entry") + ObjectNotFoundErr = newError("can't find a object") + ZLibErr = newError("zlib reading error") +) + const ( DefaultMaxObjectsLimit = 1 << 20 - DefaultMaxObjectSize = 1 << 32 // 4GB VersionSupported = 2 UnknownFormat Format = 0 @@ -21,7 +35,8 @@ const ( REFDeltaFormat Format = 2 ) -type PackfileReader struct { +// Reader reads a packfile from a binary string splitting it on objects +type Reader struct { // MaxObjectsLimit is the limit of objects to be load in the packfile, if // a packfile excess this number an error is throw, the default value // is defined by DefaultMaxObjectsLimit, usually the default limit is more @@ -29,116 +44,98 @@ type PackfileReader struct { // where the number of object is bigger the memory can be exhausted. MaxObjectsLimit uint32 - // MaxObjectSize is the maximum size in bytes, reading objects with a bigger - // size cause a error. The default value is defined by DefaultMaxObjectSize - MaxObjectSize uint64 - // Format specifies if we are using ref-delta's or ofs-delta's, choosing the // correct format the memory usage is optimized // https://github.com/git/git/blob/8d530c4d64ffcc853889f7b385f554d53db375ed/Documentation/technical/protocol-capabilities.txt#L154 Format Format r *trackingReader - objects map[Hash]*RAWObject - offsets map[int]*RAWObject + s common.ObjectStorage + offsets map[int64]common.Hash } -func NewPackfileReader(r io.Reader, fn ContentCallback) (*PackfileReader, error) { - return &PackfileReader{ +// NewReader returns a new Reader that reads from a io.Reader +func NewReader(r io.Reader) *Reader { + return &Reader{ MaxObjectsLimit: DefaultMaxObjectsLimit, - MaxObjectSize: DefaultMaxObjectSize, r: &trackingReader{r: r}, - objects: make(map[Hash]*RAWObject, 0), - offsets: make(map[int]*RAWObject, 0), - }, nil + offsets: make(map[int64]common.Hash, 0), + } } -func (pr *PackfileReader) Read() (chan *RAWObject, error) { - if err := pr.validateHeader(); err != nil { +// Read reads the objects and stores it at the ObjectStorage +func (r *Reader) Read(s common.ObjectStorage) (int64, error) { + r.s = s + if err := r.validateHeader(); err != nil { if err == io.EOF { - // This is an empty repo. It's OK. - return nil, nil + return -1, EmptyRepositoryErr } - return nil, err + return -1, err } - version, err := pr.readInt32() + version, err := r.readInt32() if err != nil { - return nil, err + return -1, err } if version > VersionSupported { - return nil, NewError("unsupported packfile version %d", version) + return -1, UnsupportedVersionErr } - count, err := pr.readInt32() + count, err := r.readInt32() if err != nil { - return nil, err + return -1, err } - if count > pr.MaxObjectsLimit { - return nil, NewError("too many objects %d, limit is %d", count, pr.MaxObjectsLimit) + if count > r.MaxObjectsLimit { + return -1, MaxObjectsLimitReachedErr } - ch := make(chan *RAWObject, 1) - go pr.readObjects(ch, count) - - // packfile.Size = int64(pr.r.Pos()) - - return ch, nil + return r.r.position, r.readObjects(count) } -func (pr *PackfileReader) validateHeader() error { +func (r *Reader) validateHeader() error { var header = make([]byte, 4) - if _, err := pr.r.Read(header); err != nil { + if _, err := r.r.Read(header); err != nil { return err } if !bytes.Equal(header, []byte{'P', 'A', 'C', 'K'}) { - return NewError("Pack file does not start with 'PACK'") + return MalformedPackfileErr } return nil } -func (pr *PackfileReader) readInt32() (uint32, error) { +func (r *Reader) readInt32() (uint32, error) { var value uint32 - if err := binary.Read(pr.r, binary.BigEndian, &value); err != nil { + if err := binary.Read(r.r, binary.BigEndian, &value); err != nil { return 0, err } return value, nil } -func (pr *PackfileReader) readObjects(ch chan *RAWObject, count uint32) error { +func (r *Reader) readObjects(count uint32) error { // This code has 50-80 µs of overhead per object not counting zlib inflation. // Together with zlib inflation, it's 400-410 µs for small objects. // That's 1 sec for ~2450 objects, ~4.20 MB, or ~250 ms per MB, // of which 12-20 % is _not_ zlib inflation (ie. is our code). - defer func() { - close(ch) - }() - for i := 0; i < int(count); i++ { - var pos = pr.Pos() - obj, err := pr.newRAWObject() + start := r.r.position + obj, err := r.newRAWObject() if err != nil && err != io.EOF { fmt.Println(err) return err } - if pr.Format == UnknownFormat || pr.Format == OFSDeltaFormat { - pr.offsets[pos] = obj - } - - if pr.Format == UnknownFormat || pr.Format == REFDeltaFormat { - pr.objects[obj.Hash] = obj + if r.Format == UnknownFormat || r.Format == OFSDeltaFormat { + r.offsets[start] = obj.Hash() } - ch <- obj - + r.s.Set(obj) if err == io.EOF { break } @@ -147,154 +144,147 @@ func (pr *PackfileReader) readObjects(ch chan *RAWObject, count uint32) error { return nil } -func (pr *PackfileReader) Pos() int { return pr.r.Pos() } - -func (pr *PackfileReader) newRAWObject() (*RAWObject, error) { - raw := &RAWObject{} - steps := 0 +func (r *Reader) newRAWObject() (common.Object, error) { + raw := r.s.New() + var steps int64 var buf [1]byte - if _, err := pr.r.Read(buf[:]); err != nil { + if _, err := r.r.Read(buf[:]); err != nil { return nil, err } - raw.Type = ObjectType((buf[0] >> 4) & 7) - raw.Size = uint64(buf[0] & 15) + typ := common.ObjectType((buf[0] >> 4) & 7) + size := int64(buf[0] & 15) steps++ // byte we just read to get `o.typ` and `o.size` var shift uint = 4 for buf[0]&0x80 == 0x80 { - if _, err := pr.r.Read(buf[:]); err != nil { + if _, err := r.r.Read(buf[:]); err != nil { return nil, err } - raw.Size += uint64(buf[0]&0x7f) << shift + size += int64(buf[0]&0x7f) << shift steps++ // byte we just read to update `o.size` shift += 7 } + raw.SetType(typ) + raw.SetSize(size) + var err error - switch raw.Type { - case REFDeltaObject: - err = pr.readREFDelta(raw) - case OFSDeltaObject: - err = pr.readOFSDelta(raw, steps) - case CommitObject, TreeObject, BlobObject, TagObject: - err = pr.readObject(raw) + switch raw.Type() { + case common.REFDeltaObject: + err = r.readREFDelta(raw) + case common.OFSDeltaObject: + err = r.readOFSDelta(raw, steps) + case common.CommitObject, common.TreeObject, common.BlobObject, common.TagObject: + err = r.readObject(raw) default: - err = NewError("Invalid git object tag %q", raw.Type) + err = InvalidObjectErr.n("tag %q", raw.Type) } return raw, err } -func (pr *PackfileReader) readREFDelta(raw *RAWObject) error { - var ref Hash - if _, err := pr.r.Read(ref[:]); err != nil { +func (r *Reader) readREFDelta(raw common.Object) error { + var ref common.Hash + if _, err := r.r.Read(ref[:]); err != nil { return err } - buf, err := pr.inflate(raw.Size) - if err != nil { + buf := bytes.NewBuffer(nil) + if err := r.inflate(buf); err != nil { return err } - referenced, ok := pr.objects[ref] + referenced, ok := r.s.Get(ref) if !ok { - fmt.Println("not found", ref) - } else { - patched := PatchDelta(referenced.Bytes, buf[:]) - if patched == nil { - return NewError("error while patching %x", ref) - } + return ObjectNotFoundErr.n("%s", ref) + } - raw.Type = referenced.Type - raw.Bytes = patched - raw.Size = uint64(len(patched)) - raw.Hash = ComputeHash(raw.Type, raw.Bytes) + d, _ := ioutil.ReadAll(referenced.Reader()) + patched := patchDelta(d, buf.Bytes()) + if patched == nil { + return PatchingErr.n("hash %q", ref) } + raw.SetType(referenced.Type()) + raw.SetSize(int64(len(patched))) + raw.Writer().Write(patched) + return nil } -func (pr *PackfileReader) readOFSDelta(raw *RAWObject, steps int) error { - var pos = pr.Pos() - - // read negative offset - offset, err := decodeOffset(pr.r, steps) +func (r *Reader) readOFSDelta(raw common.Object, steps int64) error { + start := r.r.position + offset, err := decodeOffset(r.r, steps) if err != nil { return err } - buf, err := pr.inflate(raw.Size) - if err != nil { + buf := bytes.NewBuffer(nil) + if err := r.inflate(buf); err != nil { return err } - ref, ok := pr.offsets[pos+offset] + ref, ok := r.offsets[start+offset] if !ok { - return NewError("can't find a pack entry at %d", pos+offset) + return PackEntryNotFoundErr.n("offset %d", start+offset) } - patched := PatchDelta(ref.Bytes, buf) + referenced, _ := r.s.Get(ref) + d, _ := ioutil.ReadAll(referenced.Reader()) + patched := patchDelta(d, buf.Bytes()) if patched == nil { - return NewError("error while patching %q", ref) + return PatchingErr.n("hash %q", ref) } - raw.Type = ref.Type - raw.Bytes = patched - raw.Size = uint64(len(patched)) - raw.Hash = ComputeHash(raw.Type, raw.Bytes) + raw.SetType(referenced.Type()) + raw.SetSize(int64(len(patched))) + raw.Writer().Write(patched) return nil } -func (pr *PackfileReader) readObject(raw *RAWObject) error { - buf, err := pr.inflate(raw.Size) - if err != nil { - return err - } - - raw.Bytes = buf - raw.Hash = ComputeHash(raw.Type, raw.Bytes) - - return nil +func (r *Reader) readObject(raw common.Object) error { + return r.inflate(raw.Writer()) } -func (pr *PackfileReader) inflate(size uint64) ([]byte, error) { - zr, err := zlib.NewReader(pr.r) +func (r *Reader) inflate(w io.Writer) error { + zr, err := zlib.NewReader(r.r) if err != nil { if err == zlib.ErrHeader { - return nil, zlib.ErrHeader + return zlib.ErrHeader } - return nil, NewError("error opening packfile's object zlib: %v", err) + return ZLibErr.n("%s", err) } defer zr.Close() - if size > pr.MaxObjectSize { - return nil, NewError("the object size %q exceeed the allowed limit: %q", - size, pr.MaxObjectSize) - } - - var buf bytes.Buffer - io.Copy(&buf, zr) // also: io.CopyN(&buf, zr, int64(o.size)) - - if buf.Len() != int(size) { - return nil, NewError( - "inflated size mismatch, expected %d, got %d", size, buf.Len()) - } - - return buf.Bytes(), nil + _, err = io.Copy(w, zr) + return err } type ReaderError struct { - Msg string // description of error + reason, additional string +} + +func newError(reason string) *ReaderError { + return &ReaderError{reason: reason} } -func NewError(format string, args ...interface{}) error { - return &ReaderError{Msg: fmt.Sprintf(format, args...)} +func (e *ReaderError) Error() string { + if e.additional == "" { + return e.reason + } + + return fmt.Sprintf("%s: %s", e.reason, e.additional) } -func (e *ReaderError) Error() string { return e.Msg } +func (e *ReaderError) n(format string, args ...interface{}) *ReaderError { + return &ReaderError{ + reason: e.reason, + additional: fmt.Sprintf(format, args...), + } +} diff --git a/formats/packfile/reader_test.go b/formats/packfile/reader_test.go index 917eee1..14c092e 100644 --- a/formats/packfile/reader_test.go +++ b/formats/packfile/reader_test.go @@ -6,13 +6,17 @@ import ( "fmt" "os" "runtime" + "testing" "time" - "github.com/dustin/go-humanize" + "gopkg.in/src-d/go-git.v2/common" + "github.com/dustin/go-humanize" . "gopkg.in/check.v1" ) +func Test(t *testing.T) { TestingT(t) } + type ReaderSuite struct{} var _ = Suite(&ReaderSuite{}) @@ -23,13 +27,13 @@ func (s *ReaderSuite) TestReadPackfile(c *C) { data, _ := base64.StdEncoding.DecodeString(packFileWithEmptyObjects) d := bytes.NewReader(data) - r, err := NewPackfileReader(d, nil) - c.Assert(err, IsNil) + r := NewReader(d) - ch, err := r.Read() + storage := common.NewRAWObjectStorage() + _, err := r.Read(storage) c.Assert(err, IsNil) - AssertObjects(c, ch, []string{ + AssertObjects(c, storage, []string{ "778c85ff95b5514fea0ba4c7b6a029d32e2c3b96", "db4002e880a08bf6cc7217512ad937f1ac8824a2", "551fe11a9ef992763b7e0be4500cf7169f2f8575", @@ -56,14 +60,14 @@ func (s *ReaderSuite) testReadPackfileGitFixture(c *C, file string, f Format) { d, err := os.Open(file) c.Assert(err, IsNil) - r, err := NewPackfileReader(d, nil) - c.Assert(err, IsNil) - + r := NewReader(d) r.Format = f - ch, err := r.Read() + + storage := common.NewRAWObjectStorage() + _, err = r.Read(storage) c.Assert(err, IsNil) - AssertObjects(c, ch, []string{ + AssertObjects(c, storage, []string{ "918c48b83bd081e863dbe1b80f8998f058cd8294", "af2d6a6954d532f8ffb47615169c8fdf9d383a1a", "1669dce138d9b841a518c64b10914d88f5e488ea", @@ -95,14 +99,12 @@ func (s *ReaderSuite) testReadPackfileGitFixture(c *C, file string, f Format) { }) } -func AssertObjects(c *C, ch chan *RAWObject, expects []string) { +func AssertObjects(c *C, s *common.RAWObjectStorage, expects []string) { + c.Assert(len(expects), Equals, len(s.Objects)) for _, expected := range expects { - obtained := <-ch - c.Assert(obtained.Hash.String(), Equals, expected) - - computed := ComputeHash(obtained.Type, obtained.Bytes) - c.Assert(computed.String(), Equals, expected) - c.Assert(obtained.Bytes, HasLen, int(obtained.Size)) + obtained, ok := s.Get(common.NewHash(expected)) + c.Assert(ok, Equals, true) + c.Assert(obtained.Hash().String(), Equals, expected) } } @@ -150,7 +152,7 @@ func (s *ReaderSuite) _TestMemoryOFS(c *C) { fmt.Println("HeapAlloc", a.HeapAlloc-b.HeapAlloc, humanize.Bytes(a.HeapAlloc-b.HeapAlloc)) fmt.Println("HeapSys", a.HeapSys, humanize.Bytes(a.HeapSys-b.HeapSys)) - fmt.Println("objects", len(p)) + fmt.Println("objects", len(p.Objects)) fmt.Println("time", time.Since(start)) } @@ -168,26 +170,20 @@ func (s *ReaderSuite) _TestMemoryREF(c *C) { fmt.Println("HeapAlloc", a.HeapAlloc-b.HeapAlloc, humanize.Bytes(a.HeapAlloc-b.HeapAlloc)) fmt.Println("HeapSys", a.HeapSys, humanize.Bytes(a.HeapSys-b.HeapSys)) - fmt.Println("objects", len(p)) + fmt.Println("objects", len(p.Objects)) fmt.Println("time", time.Since(start)) } -func readFromFile(c *C, file string, f Format) []*RAWObject { +func readFromFile(c *C, file string, f Format) *common.RAWObjectStorage { d, err := os.Open(file) c.Assert(err, IsNil) - r, err := NewPackfileReader(d, nil) - c.Assert(err, IsNil) - + r := NewReader(d) r.Format = f - ch, err := r.Read() - c.Assert(err, IsNil) - c.Assert(ch, NotNil) - var objs []*RAWObject - for o := range ch { - objs = append(objs, o) - } + storage := common.NewRAWObjectStorage() + _, err = r.Read(storage) + c.Assert(err, IsNil) - return objs + return storage } diff --git a/formats/packfile/objects.go b/objects.go index 9286090..77bdc2a 100644 --- a/formats/packfile/objects.go +++ b/objects.go @@ -1,76 +1,19 @@ -package packfile +package git import ( "bytes" - "crypto/sha1" "encoding/hex" "fmt" "strconv" "time" -) - -type ObjectType int8 -const ( - CommitObject ObjectType = 1 - TreeObject ObjectType = 2 - BlobObject ObjectType = 3 - TagObject ObjectType = 4 - OFSDeltaObject ObjectType = 6 - REFDeltaObject ObjectType = 7 + "gopkg.in/src-d/go-git.v2/common" ) -func (t ObjectType) String() string { - switch t { - case CommitObject: - return "commit" - case TreeObject: - return "tree" - case BlobObject: - return "blob" - default: - return "-" - } -} - -type RAWObject struct { - Hash Hash - Type ObjectType - Size uint64 - Bytes []byte -} - // Object generic object interface type Object interface { - Type() ObjectType - Hash() Hash -} - -// Hash SHA1 hased content -type Hash [20]byte - -// ComputeHash compute the hash for a given objType and content -func ComputeHash(t ObjectType, content []byte) Hash { - h := []byte(t.String()) - h = append(h, ' ') - h = strconv.AppendInt(h, int64(len(content)), 10) - h = append(h, 0) - h = append(h, content...) - - return Hash(sha1.Sum(h)) -} - -func NewHash(s string) Hash { - b, _ := hex.DecodeString(s) - - var h Hash - copy(h[:], b) - - return h -} - -func (h Hash) String() string { - return hex.EncodeToString(h[:]) + Type() common.ObjectType + Hash() common.Hash } // Commit points to a single tree, marking it as what the project looked like @@ -79,17 +22,17 @@ func (h Hash) String() string { // commit, a pointer to the previous commit(s), etc. // http://schacon.github.io/gitbook/1_the_git_object_model.html type Commit struct { - Tree Hash - Parents []Hash + Tree common.Hash + Parents []common.Hash Author Signature Committer Signature Message string - hash Hash + hash common.Hash } // ParseCommit transform a byte slice into a Commit struct func ParseCommit(b []byte) (*Commit, error) { - o := &Commit{hash: ComputeHash(CommitObject, b)} + o := &Commit{hash: common.ComputeHash(common.CommitObject, b)} lines := bytes.Split(b, []byte{'\n'}) for i := range lines { @@ -101,7 +44,7 @@ func ParseCommit(b []byte) (*Commit, error) { case "tree": _, err = hex.Decode(o.Tree[:], split[1]) case "parent": - var h Hash + var h common.Hash _, err = hex.Decode(h[:], split[1]) if err == nil { o.Parents = append(o.Parents, h) @@ -126,12 +69,12 @@ func ParseCommit(b []byte) (*Commit, error) { } // Type returns the object type -func (o *Commit) Type() ObjectType { - return CommitObject +func (o *Commit) Type() common.ObjectType { + return common.CommitObject } // Hash returns the computed hash of the commit -func (o *Commit) Hash() Hash { +func (o *Commit) Hash() common.Hash { return o.hash } @@ -139,18 +82,18 @@ func (o *Commit) Hash() Hash { // and/or blobs (i.e. files and sub-directories) type Tree struct { Entries []TreeEntry - hash Hash + hash common.Hash } // TreeEntry represents a file type TreeEntry struct { Name string - Hash Hash + Hash common.Hash } // ParseTree transform a byte slice into a Tree struct func ParseTree(b []byte) (*Tree, error) { - o := &Tree{hash: ComputeHash(TreeObject, b)} + o := &Tree{hash: common.ComputeHash(common.TreeObject, b)} if len(b) == 0 { return o, nil @@ -176,41 +119,39 @@ func ParseTree(b []byte) (*Tree, error) { } // Type returns the object type -func (o *Tree) Type() ObjectType { - return TreeObject +func (o *Tree) Type() common.ObjectType { + return common.TreeObject } // Hash returns the computed hash of the tree -func (o *Tree) Hash() Hash { +func (o *Tree) Hash() common.Hash { return o.hash } // Blob is used to store file data - it is generally a file. type Blob struct { Len int - hash Hash + hash common.Hash } // ParseBlob transform a byte slice into a Blob struct func ParseBlob(b []byte) (*Blob, error) { return &Blob{ Len: len(b), - hash: ComputeHash(BlobObject, b), + hash: common.ComputeHash(common.BlobObject, b), }, nil } // Type returns the object type -func (o *Blob) Type() ObjectType { - return BlobObject +func (o *Blob) Type() common.ObjectType { + return common.BlobObject } // Hash returns the computed hash of the blob -func (o *Blob) Hash() Hash { +func (o *Blob) Hash() common.Hash { return o.hash } -type ContentCallback func(hash Hash, content []byte) - // Signature represents an action signed by a person type Signature struct { Name string diff --git a/formats/packfile/objects_test.go b/objects_test.go index 0760653..53da5f0 100644 --- a/formats/packfile/objects_test.go +++ b/objects_test.go @@ -1,4 +1,4 @@ -package packfile +package git import ( "encoding/base64" @@ -11,20 +11,6 @@ type ObjectsSuite struct{} var _ = Suite(&ObjectsSuite{}) -func (s *ObjectsSuite) TestComputeHash(c *C) { - hash := ComputeHash(BlobObject, []byte("")) - c.Assert(hash.String(), Equals, "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") - - hash = ComputeHash(BlobObject, []byte("Hello, World!\n")) - c.Assert(hash.String(), Equals, "8ab686eafeb1f44702738c8b0f24f2567c36da6d") -} - -func (s *ObjectsSuite) TestNewHash(c *C) { - hash := ComputeHash(BlobObject, []byte("Hello, World!\n")) - - c.Assert(hash, Equals, NewHash(hash.String())) -} - var CommitFixture = "dHJlZSBjMmQzMGZhOGVmMjg4NjE4ZjY1ZjZlZWQ2ZTE2OGUwZDUxNDg4NmY0CnBhcmVudCBiMDI5NTE3ZjYzMDBjMmRhMGY0YjY1MWI4NjQyNTA2Y2Q2YWFmNDVkCnBhcmVudCBiOGU0NzFmNThiY2JjYTYzYjA3YmRhMjBlNDI4MTkwNDA5YzJkYjQ3CmF1dGhvciBNw6F4aW1vIEN1YWRyb3MgPG1jdWFkcm9zQGdtYWlsLmNvbT4gMTQyNzgwMjQzNCArMDIwMApjb21taXR0ZXIgTcOheGltbyBDdWFkcm9zIDxtY3VhZHJvc0BnbWFpbC5jb20+IDE0Mjc4MDI0MzQgKzAyMDAKCk1lcmdlIHB1bGwgcmVxdWVzdCAjMSBmcm9tIGRyaXBvbGxlcy9mZWF0dXJlCgpDcmVhdGluZyBjaGFuZ2Vsb2c=" func (s *ObjectsSuite) TestParseCommit(c *C) { diff --git a/remote_test.go b/remote_test.go index 6cdbce0..5674407 100644 --- a/remote_test.go +++ b/remote_test.go @@ -1,6 +1,7 @@ package git import ( + "gopkg.in/src-d/go-git.v2/common" "gopkg.in/src-d/go-git.v2/formats/packfile" . "gopkg.in/check.v1" @@ -40,10 +41,10 @@ func (s *SuiteRemote) TestFetchDefaultBranch(c *C) { reader, err := r.FetchDefaultBranch() c.Assert(err, IsNil) - pr, err := packfile.NewPackfileReader(reader, nil) - c.Assert(err, IsNil) + pr := packfile.NewReader(reader) - ch, err := pr.Read() + storage := common.NewRAWObjectStorage() + _, err = pr.Read(storage) c.Assert(err, IsNil) - c.Assert(ch, NotNil) + c.Assert(storage.Objects, HasLen, 28) } |