aboutsummaryrefslogtreecommitdiffstats
path: root/formats
diff options
context:
space:
mode:
authorMáximo Cuadros <mcuadros@gmail.com>2015-10-25 12:34:25 +0100
committerMáximo Cuadros <mcuadros@gmail.com>2015-10-25 12:34:25 +0100
commitbe69a505926451bf10450ac68d40265a6f43e150 (patch)
treec0c4a19d75a5cb9158d1d35419918d806b251dfd /formats
parentf5dfba3742d551411ed0d6279c18f867b6496368 (diff)
downloadgo-git-be69a505926451bf10450ac68d40265a6f43e150.tar.gz
formats/packfile: new reader API (wip)
Diffstat (limited to 'formats')
-rw-r--r--formats/packfile/delta.go23
-rw-r--r--formats/packfile/objects.go1
-rw-r--r--formats/packfile/reader.go215
-rw-r--r--formats/packfile/reader_test.go110
4 files changed, 207 insertions, 142 deletions
diff --git a/formats/packfile/delta.go b/formats/packfile/delta.go
index 86b556f..30703eb 100644
--- a/formats/packfile/delta.go
+++ b/formats/packfile/delta.go
@@ -1,5 +1,7 @@
package packfile
+import "io"
+
const delta_size_min = 4
func deltaHeaderSize(b []byte) (uint, []byte) {
@@ -91,3 +93,24 @@ func PatchDelta(src, delta []byte) []byte {
}
return dest
}
+
+func decodeOffset(src io.ByteReader, steps int) (int, error) {
+ b, err := src.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ var offset = int(b & 0x7f)
+ for (b & 0x80) != 0 {
+ offset++ // WHY?
+ b, err = src.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+
+ offset = (offset << 7) + int(b&0x7f)
+ }
+
+ // offset needs to be aware of the bytes we read for `o.typ` and `o.size`
+ offset += steps
+ return -offset, nil
+}
diff --git a/formats/packfile/objects.go b/formats/packfile/objects.go
index bd76896..9286090 100644
--- a/formats/packfile/objects.go
+++ b/formats/packfile/objects.go
@@ -36,6 +36,7 @@ func (t ObjectType) String() string {
type RAWObject struct {
Hash Hash
Type ObjectType
+ Size uint64
Bytes []byte
}
diff --git a/formats/packfile/reader.go b/formats/packfile/reader.go
index f79f2ab..c355e12 100644
--- a/formats/packfile/reader.go
+++ b/formats/packfile/reader.go
@@ -9,10 +9,16 @@ import (
"github.com/klauspost/compress/zlib"
)
+type Format int
+
const (
DefaultMaxObjectsLimit = 1 << 20
DefaultMaxObjectSize = 1 << 32 // 4GB
+ VersionSupported = 2
+ UnknownFormat Format = 0
+ OFSDeltaFormat Format = 1
+ REFDeltaFormat Format = 2
)
type PackfileReader struct {
@@ -21,43 +27,34 @@ type PackfileReader struct {
// is defined by DefaultMaxObjectsLimit, usually the default limit is more
// than enough to work with any repository, working extremly big repositories
// where the number of object is bigger the memory can be exhausted.
- MaxObjectsLimit int
+ MaxObjectsLimit uint32
// MaxObjectSize is the maximum size in bytes, reading objects with a bigger
// size cause a error. The default value is defined by DefaultMaxObjectSize
- MaxObjectSize int
-
- r *trackingReader
- objects map[Hash]*RAWObject
- offsets map[int]Hash
- deltas []packfileDelta
- contentCallback ContentCallback
-}
+ MaxObjectSize uint64
-type packfileObject struct {
- bytes []byte
- typ ObjectType
-}
+ // Format specifies if we are using ref-delta's or ofs-delta's, choosing the
+ // correct format the memory usage is optimized
+ // https://github.com/git/git/blob/8d530c4d64ffcc853889f7b385f554d53db375ed/Documentation/technical/protocol-capabilities.txt#L154
+ Format Format
-type packfileDelta struct {
- hash Hash
- delta []byte
+ r *trackingReader
+ objects map[Hash]*RAWObject
+ offsets map[int]*RAWObject
}
func NewPackfileReader(r io.Reader, fn ContentCallback) (*PackfileReader, error) {
return &PackfileReader{
MaxObjectsLimit: DefaultMaxObjectsLimit,
MaxObjectSize: DefaultMaxObjectSize,
- r: &trackingReader{r: r},
- objects: make(map[Hash]*RAWObject, 0),
- offsets: make(map[int]Hash, 0),
- contentCallback: fn,
+
+ r: &trackingReader{r: r},
+ objects: make(map[Hash]*RAWObject, 0),
+ offsets: make(map[int]*RAWObject, 0),
}, nil
}
func (pr *PackfileReader) Read() (chan *RAWObject, error) {
- packfile := NewPackfile()
-
if err := pr.validateHeader(); err != nil {
if err == io.EOF {
// This is an empty repo. It's OK.
@@ -67,29 +64,28 @@ func (pr *PackfileReader) Read() (chan *RAWObject, error) {
return nil, err
}
- ver, err := pr.readInt32()
+ version, err := pr.readInt32()
if err != nil {
return nil, err
}
+ if version > VersionSupported {
+ return nil, NewError("unsupported packfile version %d", version)
+ }
+
count, err := pr.readInt32()
if err != nil {
return nil, err
}
- packfile.Version = uint32(ver)
- packfile.ObjectCount = int(count)
-
- if packfile.ObjectCount > pr.MaxObjectsLimit {
- return nil, NewError("too many objects %d, limit is %d",
- packfile.ObjectCount, pr.MaxObjectsLimit)
+ if count > pr.MaxObjectsLimit {
+ return nil, NewError("too many objects %d, limit is %d", count, pr.MaxObjectsLimit)
}
ch := make(chan *RAWObject, 1)
-
go pr.readObjects(ch, count)
- packfile.Size = int64(pr.r.Pos())
+ // packfile.Size = int64(pr.r.Pos())
return ch, nil
}
@@ -127,14 +123,20 @@ func (pr *PackfileReader) readObjects(ch chan *RAWObject, count uint32) error {
for i := 0; i < int(count); i++ {
var pos = pr.Pos()
- obj, err := pr.readObject()
+ obj, err := pr.newRAWObject()
if err != nil && err != io.EOF {
fmt.Println(err)
return err
}
- pr.offsets[pos] = obj.Hash
- pr.objects[obj.Hash] = obj
+ if pr.Format == UnknownFormat || pr.Format == OFSDeltaFormat {
+ pr.offsets[pos] = obj
+ }
+
+ if pr.Format == UnknownFormat || pr.Format == REFDeltaFormat {
+ pr.objects[obj.Hash] = obj
+ }
+
ch <- obj
if err == io.EOF {
@@ -145,86 +147,61 @@ func (pr *PackfileReader) readObjects(ch chan *RAWObject, count uint32) error {
return nil
}
-func (pr *PackfileReader) readObject() (*RAWObject, error) {
-
- o, err := newObjectReader(pr, pr.MaxObjectSize)
- if err != nil {
- return nil, err
- }
-
- raw := &RAWObject{Type: o.typ}
-
- switch o.typ {
- case REFDeltaObject:
- err = o.readREFDelta(raw)
- case OFSDeltaObject:
- err = o.readOFSDelta(raw)
- case CommitObject, TreeObject, BlobObject, TagObject:
- err = o.readObject(raw)
- default:
- err = NewError("Invalid git object tag %q", o.typ)
- }
-
- if err != nil {
- return nil, err
- }
-
- return raw, err
-}
-
func (pr *PackfileReader) Pos() int { return pr.r.Pos() }
-type objectReader struct {
- pr *PackfileReader
- pf *Packfile
- maxSize uint64
-
- hash Hash
- steps int
- typ ObjectType
- size uint64
-}
-
-func newObjectReader(pr *PackfileReader, maxSize int) (*objectReader, error) {
- o := &objectReader{pr: pr, maxSize: uint64(maxSize)}
+func (pr *PackfileReader) newRAWObject() (*RAWObject, error) {
+ raw := &RAWObject{}
+ steps := 0
var buf [1]byte
- if _, err := o.Read(buf[:]); err != nil {
+ if _, err := pr.r.Read(buf[:]); err != nil {
return nil, err
}
- o.typ = ObjectType((buf[0] >> 4) & 7)
- o.size = uint64(buf[0] & 15)
- o.steps++ // byte we just read to get `o.typ` and `o.size`
+ raw.Type = ObjectType((buf[0] >> 4) & 7)
+ raw.Size = uint64(buf[0] & 15)
+ steps++ // byte we just read to get `o.typ` and `o.size`
var shift uint = 4
for buf[0]&0x80 == 0x80 {
- if _, err := o.Read(buf[:]); err != nil {
+ if _, err := pr.r.Read(buf[:]); err != nil {
return nil, err
}
- o.size += uint64(buf[0]&0x7f) << shift
- o.steps++ // byte we just read to update `o.size`
+ raw.Size += uint64(buf[0]&0x7f) << shift
+ steps++ // byte we just read to update `o.size`
shift += 7
}
- return o, nil
+ var err error
+ switch raw.Type {
+ case REFDeltaObject:
+ err = pr.readREFDelta(raw)
+ case OFSDeltaObject:
+ err = pr.readOFSDelta(raw, steps)
+ case CommitObject, TreeObject, BlobObject, TagObject:
+ err = pr.readObject(raw)
+ default:
+ err = NewError("Invalid git object tag %q", raw.Type)
+ }
+
+ return raw, err
}
-func (o *objectReader) readREFDelta(raw *RAWObject) error {
+func (pr *PackfileReader) readREFDelta(raw *RAWObject) error {
var ref Hash
- if _, err := o.Read(ref[:]); err != nil {
+ if _, err := pr.r.Read(ref[:]); err != nil {
return err
}
- buf, err := o.inflate()
+ buf, err := pr.inflate(raw.Size)
if err != nil {
return err
}
- referenced, ok := o.pr.objects[ref]
+ referenced, ok := pr.objects[ref]
if !ok {
- o.pr.deltas = append(o.pr.deltas, packfileDelta{hash: ref, delta: buf[:]})
+ fmt.Println("not found", ref)
} else {
patched := PatchDelta(referenced.Bytes, buf[:])
if patched == nil {
@@ -233,67 +210,47 @@ func (o *objectReader) readREFDelta(raw *RAWObject) error {
raw.Type = referenced.Type
raw.Bytes = patched
+ raw.Size = uint64(len(patched))
raw.Hash = ComputeHash(raw.Type, raw.Bytes)
}
return nil
}
-func decodeOffset(src io.ByteReader, steps int) (int, error) {
- b, err := src.ReadByte()
- if err != nil {
- return 0, err
- }
- var offset = int(b & 0x7f)
- for (b & 0x80) != 0 {
- offset++ // WHY?
- b, err = src.ReadByte()
- if err != nil {
- return 0, err
- }
-
- offset = (offset << 7) + int(b&0x7f)
- }
-
- // offset needs to be aware of the bytes we read for `o.typ` and `o.size`
- offset += steps
- return -offset, nil
-}
-
-func (o *objectReader) readOFSDelta(raw *RAWObject) error {
- var pos = o.pr.Pos()
+func (pr *PackfileReader) readOFSDelta(raw *RAWObject, steps int) error {
+ var pos = pr.Pos()
// read negative offset
- offset, err := decodeOffset(o.pr.r, o.steps)
+ offset, err := decodeOffset(pr.r, steps)
if err != nil {
return err
}
- buf, err := o.inflate()
+ buf, err := pr.inflate(raw.Size)
if err != nil {
return err
}
- ref := o.pr.offsets[pos+offset]
- referenced, ok := o.pr.objects[ref]
+ ref, ok := pr.offsets[pos+offset]
if !ok {
return NewError("can't find a pack entry at %d", pos+offset)
}
- patched := PatchDelta(referenced.Bytes, buf)
+ patched := PatchDelta(ref.Bytes, buf)
if patched == nil {
return NewError("error while patching %q", ref)
}
- raw.Type = referenced.Type
+ raw.Type = ref.Type
raw.Bytes = patched
+ raw.Size = uint64(len(patched))
raw.Hash = ComputeHash(raw.Type, raw.Bytes)
return nil
}
-func (o *objectReader) readObject(raw *RAWObject) error {
- buf, err := o.inflate()
+func (pr *PackfileReader) readObject(raw *RAWObject) error {
+ buf, err := pr.inflate(raw.Size)
if err != nil {
return err
}
@@ -304,8 +261,8 @@ func (o *objectReader) readObject(raw *RAWObject) error {
return nil
}
-func (o *objectReader) inflate() ([]byte, error) {
- zr, err := zlib.NewReader(o.pr.r)
+func (pr *PackfileReader) inflate(size uint64) ([]byte, error) {
+ zr, err := zlib.NewReader(pr.r)
if err != nil {
if err == zlib.ErrHeader {
return nil, zlib.ErrHeader
@@ -316,30 +273,22 @@ func (o *objectReader) inflate() ([]byte, error) {
defer zr.Close()
- if o.size > o.maxSize {
+ if size > pr.MaxObjectSize {
return nil, NewError("the object size %q exceeed the allowed limit: %q",
- o.size, o.maxSize)
+ size, pr.MaxObjectSize)
}
var buf bytes.Buffer
io.Copy(&buf, zr) // also: io.CopyN(&buf, zr, int64(o.size))
- var bufLen = buf.Len()
- if bufLen != int(o.size) {
- return nil, NewError("inflated size mismatch, expected %d, got %d", o.size, bufLen)
+ if buf.Len() != int(size) {
+ return nil, NewError(
+ "inflated size mismatch, expected %d, got %d", size, buf.Len())
}
return buf.Bytes(), nil
}
-func (o *objectReader) Read(p []byte) (int, error) {
- return o.pr.r.Read(p)
-}
-
-func (o *objectReader) ReadByte() (byte, error) {
- return o.pr.r.ReadByte()
-}
-
type ReaderError struct {
Msg string // description of error
}
diff --git a/formats/packfile/reader_test.go b/formats/packfile/reader_test.go
index e49a976..917eee1 100644
--- a/formats/packfile/reader_test.go
+++ b/formats/packfile/reader_test.go
@@ -3,7 +3,12 @@ package packfile
import (
"bytes"
"encoding/base64"
+ "fmt"
"os"
+ "runtime"
+ "time"
+
+ "github.com/dustin/go-humanize"
. "gopkg.in/check.v1"
)
@@ -40,20 +45,21 @@ func (s *ReaderSuite) TestReadPackfile(c *C) {
}
func (s *ReaderSuite) TestReadPackfileOFSDelta(c *C) {
- s.testReadPackfileGitFixture(c, "fixtures/git-fixture.ofs-delta")
+ s.testReadPackfileGitFixture(c, "fixtures/git-fixture.ofs-delta", OFSDeltaFormat)
}
func (s *ReaderSuite) TestReadPackfileREFDelta(c *C) {
- s.testReadPackfileGitFixture(c, "fixtures/git-fixture.ref-delta")
+ s.testReadPackfileGitFixture(c, "fixtures/git-fixture.ref-delta", REFDeltaFormat)
}
-func (s *ReaderSuite) testReadPackfileGitFixture(c *C, file string) {
+func (s *ReaderSuite) testReadPackfileGitFixture(c *C, file string, f Format) {
d, err := os.Open(file)
c.Assert(err, IsNil)
r, err := NewPackfileReader(d, nil)
c.Assert(err, IsNil)
+ r.Format = f
ch, err := r.Read()
c.Assert(err, IsNil)
@@ -89,13 +95,99 @@ func (s *ReaderSuite) testReadPackfileGitFixture(c *C, file string) {
})
}
-func AssertObjects(c *C, ch chan *RAWObject, expected []string) {
- i := 0
- for obtained := range ch {
- c.Assert(obtained.Hash.String(), Equals, expected[i])
+func AssertObjects(c *C, ch chan *RAWObject, expects []string) {
+ for _, expected := range expects {
+ obtained := <-ch
+ c.Assert(obtained.Hash.String(), Equals, expected)
+
computed := ComputeHash(obtained.Type, obtained.Bytes)
- c.Assert(computed.String(), Equals, expected[i])
+ c.Assert(computed.String(), Equals, expected)
+ c.Assert(obtained.Bytes, HasLen, int(obtained.Size))
+ }
+}
+
+func (s *ReaderSuite) BenchmarkFixtureRef(c *C) {
+ for i := 0; i < c.N; i++ {
+ readFromFile(c, "fixtures/git-fixture.ref-delta", REFDeltaFormat)
+ }
+}
+
+func (s *ReaderSuite) BenchmarkFixtureOfs(c *C) {
+ for i := 0; i < c.N; i++ {
+ readFromFile(c, "fixtures/git-fixture.ofs-delta", OFSDeltaFormat)
+ }
+}
- i++
+func (s *ReaderSuite) BenchmarkCandyJS(c *C) {
+ for i := 0; i < c.N; i++ {
+ readFromFile(c, "/tmp/go-candyjs", REFDeltaFormat)
}
}
+
+func (s *ReaderSuite) BenchmarkSymfony(c *C) {
+ for i := 0; i < c.N; i++ {
+ readFromFile(c, "/tmp/symonfy", REFDeltaFormat)
+ }
+}
+
+func (s *ReaderSuite) BenchmarkGit(c *C) {
+ for i := 0; i < c.N; i++ {
+ readFromFile(c, "/tmp/git", REFDeltaFormat)
+ }
+}
+
+func (s *ReaderSuite) _TestMemoryOFS(c *C) {
+ var b, a runtime.MemStats
+
+ start := time.Now()
+ runtime.ReadMemStats(&b)
+ p := readFromFile(c, "/tmp/symfony.ofs-delta", OFSDeltaFormat)
+ runtime.ReadMemStats(&a)
+
+ fmt.Println("OFS--->")
+ fmt.Println("Alloc", a.Alloc-b.Alloc, humanize.Bytes(a.Alloc-b.Alloc))
+ fmt.Println("TotalAlloc", a.TotalAlloc-b.TotalAlloc, humanize.Bytes(a.TotalAlloc-b.TotalAlloc))
+ fmt.Println("HeapAlloc", a.HeapAlloc-b.HeapAlloc, humanize.Bytes(a.HeapAlloc-b.HeapAlloc))
+ fmt.Println("HeapSys", a.HeapSys, humanize.Bytes(a.HeapSys-b.HeapSys))
+
+ fmt.Println("objects", len(p))
+ fmt.Println("time", time.Since(start))
+}
+
+func (s *ReaderSuite) _TestMemoryREF(c *C) {
+ var b, a runtime.MemStats
+
+ start := time.Now()
+ runtime.ReadMemStats(&b)
+ p := readFromFile(c, "/tmp/symonfy", REFDeltaFormat)
+ runtime.ReadMemStats(&a)
+
+ fmt.Println("REF--->")
+ fmt.Println("Alloc", a.Alloc-b.Alloc, humanize.Bytes(a.Alloc-b.Alloc))
+ fmt.Println("TotalAlloc", a.TotalAlloc-b.TotalAlloc, humanize.Bytes(a.TotalAlloc-b.TotalAlloc))
+ fmt.Println("HeapAlloc", a.HeapAlloc-b.HeapAlloc, humanize.Bytes(a.HeapAlloc-b.HeapAlloc))
+ fmt.Println("HeapSys", a.HeapSys, humanize.Bytes(a.HeapSys-b.HeapSys))
+
+ fmt.Println("objects", len(p))
+ fmt.Println("time", time.Since(start))
+}
+
+func readFromFile(c *C, file string, f Format) []*RAWObject {
+ d, err := os.Open(file)
+ c.Assert(err, IsNil)
+
+ r, err := NewPackfileReader(d, nil)
+ c.Assert(err, IsNil)
+
+ r.Format = f
+ ch, err := r.Read()
+ c.Assert(err, IsNil)
+ c.Assert(ch, NotNil)
+
+ var objs []*RAWObject
+ for o := range ch {
+ objs = append(objs, o)
+ }
+
+ return objs
+}