aboutsummaryrefslogtreecommitdiffstats
path: root/formats
diff options
context:
space:
mode:
authorMáximo Cuadros <mcuadros@gmail.com>2015-10-25 20:30:36 +0100
committerMáximo Cuadros <mcuadros@gmail.com>2015-10-25 20:30:36 +0100
commit9a44cd8ccff143a112436c38bfe5581e74b68f07 (patch)
treef4d2f38cc61647bf159a7c870913e6f6b60828b2 /formats
parentbe69a505926451bf10450ac68d40265a6f43e150 (diff)
downloadgo-git-9a44cd8ccff143a112436c38bfe5581e74b68f07.tar.gz
formats/packfile: new reader API
Diffstat (limited to 'formats')
-rw-r--r--formats/packfile/common.go11
-rw-r--r--formats/packfile/common_test.go9
-rw-r--r--formats/packfile/delta.go13
-rw-r--r--formats/packfile/doc.go3
-rw-r--r--formats/packfile/objects.go276
-rw-r--r--formats/packfile/objects_test.go122
-rw-r--r--formats/packfile/packfile.go82
-rw-r--r--formats/packfile/reader.go256
-rw-r--r--formats/packfile/reader_test.go58
9 files changed, 161 insertions, 669 deletions
diff --git a/formats/packfile/common.go b/formats/packfile/common.go
index 4a97dc7..06c63d4 100644
--- a/formats/packfile/common.go
+++ b/formats/packfile/common.go
@@ -6,20 +6,17 @@ import (
)
type trackingReader struct {
- r io.Reader
- n int
+ r io.Reader
+ position int64
}
-func (t *trackingReader) Pos() int { return t.n }
-
func (t *trackingReader) Read(p []byte) (n int, err error) {
n, err = t.r.Read(p)
if err != nil {
return 0, err
}
- t.n += n
-
+ t.position += int64(n)
return n, err
}
@@ -34,6 +31,6 @@ func (t *trackingReader) ReadByte() (c byte, err error) {
return 0, fmt.Errorf("read %d bytes, should have read just 1", n)
}
- t.n += n // n is 1
+ t.position++
return p[0], nil
}
diff --git a/formats/packfile/common_test.go b/formats/packfile/common_test.go
deleted file mode 100644
index 104a5d2..0000000
--- a/formats/packfile/common_test.go
+++ /dev/null
@@ -1,9 +0,0 @@
-package packfile
-
-import (
- "testing"
-
- . "gopkg.in/check.v1"
-)
-
-func Test(t *testing.T) { TestingT(t) }
diff --git a/formats/packfile/delta.go b/formats/packfile/delta.go
index 30703eb..571ccf8 100644
--- a/formats/packfile/delta.go
+++ b/formats/packfile/delta.go
@@ -2,7 +2,7 @@ package packfile
import "io"
-const delta_size_min = 4
+const deltaSizeMin = 4
func deltaHeaderSize(b []byte) (uint, []byte) {
var size, j uint
@@ -18,8 +18,8 @@ func deltaHeaderSize(b []byte) (uint, []byte) {
return size, b[j:]
}
-func PatchDelta(src, delta []byte) []byte {
- if len(delta) < delta_size_min {
+func patchDelta(src, delta []byte) []byte {
+ if len(delta) < deltaSizeMin {
return nil
}
size, delta := deltaHeaderSize(delta)
@@ -94,12 +94,13 @@ func PatchDelta(src, delta []byte) []byte {
return dest
}
-func decodeOffset(src io.ByteReader, steps int) (int, error) {
+func decodeOffset(src io.ByteReader, steps int64) (int64, error) {
b, err := src.ReadByte()
if err != nil {
return 0, err
}
- var offset = int(b & 0x7f)
+
+ var offset = int64(b & 0x7f)
for (b & 0x80) != 0 {
offset++ // WHY?
b, err = src.ReadByte()
@@ -107,7 +108,7 @@ func decodeOffset(src io.ByteReader, steps int) (int, error) {
return 0, err
}
- offset = (offset << 7) + int(b&0x7f)
+ offset = (offset << 7) + int64(b&0x7f)
}
// offset needs to be aware of the bytes we read for `o.typ` and `o.size`
diff --git a/formats/packfile/doc.go b/formats/packfile/doc.go
index 1fc28da..cb3f542 100644
--- a/formats/packfile/doc.go
+++ b/formats/packfile/doc.go
@@ -1,8 +1,5 @@
package packfile
-// Code from:
-// https://github.com/gitchain/gitchain/tree/master/git @ 4c2fabdf9
-//
// GIT pack format
// ===============
//
diff --git a/formats/packfile/objects.go b/formats/packfile/objects.go
deleted file mode 100644
index 9286090..0000000
--- a/formats/packfile/objects.go
+++ /dev/null
@@ -1,276 +0,0 @@
-package packfile
-
-import (
- "bytes"
- "crypto/sha1"
- "encoding/hex"
- "fmt"
- "strconv"
- "time"
-)
-
-type ObjectType int8
-
-const (
- CommitObject ObjectType = 1
- TreeObject ObjectType = 2
- BlobObject ObjectType = 3
- TagObject ObjectType = 4
- OFSDeltaObject ObjectType = 6
- REFDeltaObject ObjectType = 7
-)
-
-func (t ObjectType) String() string {
- switch t {
- case CommitObject:
- return "commit"
- case TreeObject:
- return "tree"
- case BlobObject:
- return "blob"
- default:
- return "-"
- }
-}
-
-type RAWObject struct {
- Hash Hash
- Type ObjectType
- Size uint64
- Bytes []byte
-}
-
-// Object generic object interface
-type Object interface {
- Type() ObjectType
- Hash() Hash
-}
-
-// Hash SHA1 hased content
-type Hash [20]byte
-
-// ComputeHash compute the hash for a given objType and content
-func ComputeHash(t ObjectType, content []byte) Hash {
- h := []byte(t.String())
- h = append(h, ' ')
- h = strconv.AppendInt(h, int64(len(content)), 10)
- h = append(h, 0)
- h = append(h, content...)
-
- return Hash(sha1.Sum(h))
-}
-
-func NewHash(s string) Hash {
- b, _ := hex.DecodeString(s)
-
- var h Hash
- copy(h[:], b)
-
- return h
-}
-
-func (h Hash) String() string {
- return hex.EncodeToString(h[:])
-}
-
-// Commit points to a single tree, marking it as what the project looked like
-// at a certain point in time. It contains meta-information about that point
-// in time, such as a timestamp, the author of the changes since the last
-// commit, a pointer to the previous commit(s), etc.
-// http://schacon.github.io/gitbook/1_the_git_object_model.html
-type Commit struct {
- Tree Hash
- Parents []Hash
- Author Signature
- Committer Signature
- Message string
- hash Hash
-}
-
-// ParseCommit transform a byte slice into a Commit struct
-func ParseCommit(b []byte) (*Commit, error) {
- o := &Commit{hash: ComputeHash(CommitObject, b)}
-
- lines := bytes.Split(b, []byte{'\n'})
- for i := range lines {
- if len(lines[i]) > 0 {
- var err error
-
- split := bytes.SplitN(lines[i], []byte{' '}, 2)
- switch string(split[0]) {
- case "tree":
- _, err = hex.Decode(o.Tree[:], split[1])
- case "parent":
- var h Hash
- _, err = hex.Decode(h[:], split[1])
- if err == nil {
- o.Parents = append(o.Parents, h)
- }
- case "author":
- o.Author = ParseSignature(split[1])
- case "committer":
- o.Committer = ParseSignature(split[1])
- }
-
- if err != nil {
- return nil, err
- }
- } else {
- o.Message = string(bytes.Join(append(lines[i+1:]), []byte{'\n'}))
- break
- }
-
- }
-
- return o, nil
-}
-
-// Type returns the object type
-func (o *Commit) Type() ObjectType {
- return CommitObject
-}
-
-// Hash returns the computed hash of the commit
-func (o *Commit) Hash() Hash {
- return o.hash
-}
-
-// Tree is basically like a directory - it references a bunch of other trees
-// and/or blobs (i.e. files and sub-directories)
-type Tree struct {
- Entries []TreeEntry
- hash Hash
-}
-
-// TreeEntry represents a file
-type TreeEntry struct {
- Name string
- Hash Hash
-}
-
-// ParseTree transform a byte slice into a Tree struct
-func ParseTree(b []byte) (*Tree, error) {
- o := &Tree{hash: ComputeHash(TreeObject, b)}
-
- if len(b) == 0 {
- return o, nil
- }
-
- for {
- split := bytes.SplitN(b, []byte{0}, 2)
- split1 := bytes.SplitN(split[0], []byte{' '}, 2)
-
- entry := TreeEntry{}
- entry.Name = string(split1[1])
- copy(entry.Hash[:], split[1][0:20])
-
- o.Entries = append(o.Entries, entry)
-
- b = split[1][20:]
- if len(split[1]) == 20 {
- break
- }
- }
-
- return o, nil
-}
-
-// Type returns the object type
-func (o *Tree) Type() ObjectType {
- return TreeObject
-}
-
-// Hash returns the computed hash of the tree
-func (o *Tree) Hash() Hash {
- return o.hash
-}
-
-// Blob is used to store file data - it is generally a file.
-type Blob struct {
- Len int
- hash Hash
-}
-
-// ParseBlob transform a byte slice into a Blob struct
-func ParseBlob(b []byte) (*Blob, error) {
- return &Blob{
- Len: len(b),
- hash: ComputeHash(BlobObject, b),
- }, nil
-}
-
-// Type returns the object type
-func (o *Blob) Type() ObjectType {
- return BlobObject
-}
-
-// Hash returns the computed hash of the blob
-func (o *Blob) Hash() Hash {
- return o.hash
-}
-
-type ContentCallback func(hash Hash, content []byte)
-
-// Signature represents an action signed by a person
-type Signature struct {
- Name string
- Email string
- When time.Time
-}
-
-// ParseSignature parse a byte slice returning a new action signature.
-func ParseSignature(signature []byte) Signature {
- ret := Signature{}
- if len(signature) == 0 {
- return ret
- }
-
- from := 0
- state := 'n' // n: name, e: email, t: timestamp, z: timezone
- for i := 0; ; i++ {
- var c byte
- var end bool
- if i < len(signature) {
- c = signature[i]
- } else {
- end = true
- }
-
- switch state {
- case 'n':
- if c == '<' || end {
- if i == 0 {
- break
- }
- ret.Name = string(signature[from : i-1])
- state = 'e'
- from = i + 1
- }
- case 'e':
- if c == '>' || end {
- ret.Email = string(signature[from:i])
- i++
- state = 't'
- from = i + 1
- }
- case 't':
- if c == ' ' || end {
- t, err := strconv.ParseInt(string(signature[from:i]), 10, 64)
- if err == nil {
- ret.When = time.Unix(t, 0)
- }
- end = true
- }
- }
-
- if end {
- break
- }
- }
-
- return ret
-}
-
-func (s *Signature) String() string {
- return fmt.Sprintf("%q <%s> @ %s", s.Name, s.Email, s.When)
-}
diff --git a/formats/packfile/objects_test.go b/formats/packfile/objects_test.go
deleted file mode 100644
index 0760653..0000000
--- a/formats/packfile/objects_test.go
+++ /dev/null
@@ -1,122 +0,0 @@
-package packfile
-
-import (
- "encoding/base64"
- "time"
-
- . "gopkg.in/check.v1"
-)
-
-type ObjectsSuite struct{}
-
-var _ = Suite(&ObjectsSuite{})
-
-func (s *ObjectsSuite) TestComputeHash(c *C) {
- hash := ComputeHash(BlobObject, []byte(""))
- c.Assert(hash.String(), Equals, "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391")
-
- hash = ComputeHash(BlobObject, []byte("Hello, World!\n"))
- c.Assert(hash.String(), Equals, "8ab686eafeb1f44702738c8b0f24f2567c36da6d")
-}
-
-func (s *ObjectsSuite) TestNewHash(c *C) {
- hash := ComputeHash(BlobObject, []byte("Hello, World!\n"))
-
- c.Assert(hash, Equals, NewHash(hash.String()))
-}
-
-var CommitFixture = "dHJlZSBjMmQzMGZhOGVmMjg4NjE4ZjY1ZjZlZWQ2ZTE2OGUwZDUxNDg4NmY0CnBhcmVudCBiMDI5NTE3ZjYzMDBjMmRhMGY0YjY1MWI4NjQyNTA2Y2Q2YWFmNDVkCnBhcmVudCBiOGU0NzFmNThiY2JjYTYzYjA3YmRhMjBlNDI4MTkwNDA5YzJkYjQ3CmF1dGhvciBNw6F4aW1vIEN1YWRyb3MgPG1jdWFkcm9zQGdtYWlsLmNvbT4gMTQyNzgwMjQzNCArMDIwMApjb21taXR0ZXIgTcOheGltbyBDdWFkcm9zIDxtY3VhZHJvc0BnbWFpbC5jb20+IDE0Mjc4MDI0MzQgKzAyMDAKCk1lcmdlIHB1bGwgcmVxdWVzdCAjMSBmcm9tIGRyaXBvbGxlcy9mZWF0dXJlCgpDcmVhdGluZyBjaGFuZ2Vsb2c="
-
-func (s *ObjectsSuite) TestParseCommit(c *C) {
- data, _ := base64.StdEncoding.DecodeString(CommitFixture)
- commit, err := ParseCommit(data)
- c.Assert(err, IsNil)
-
- c.Assert(commit.Tree.String(), Equals, "c2d30fa8ef288618f65f6eed6e168e0d514886f4")
- c.Assert(commit.Parents, HasLen, 2)
- c.Assert(commit.Parents[0].String(), Equals, "b029517f6300c2da0f4b651b8642506cd6aaf45d")
- c.Assert(commit.Parents[1].String(), Equals, "b8e471f58bcbca63b07bda20e428190409c2db47")
- c.Assert(commit.Author.Email, Equals, "mcuadros@gmail.com")
- c.Assert(commit.Author.Name, Equals, "Máximo Cuadros")
- c.Assert(commit.Author.When.Unix(), Equals, int64(1427802434))
- c.Assert(commit.Committer.Email, Equals, "mcuadros@gmail.com")
- c.Assert(commit.Message, Equals, "Merge pull request #1 from dripolles/feature\n\nCreating changelog")
-}
-
-func (s *ObjectsSuite) TestCommitHash(c *C) {
- data, _ := base64.StdEncoding.DecodeString(CommitFixture)
- commit, err := ParseCommit(data)
-
- c.Assert(err, IsNil)
- c.Assert(commit.Hash().String(), Equals, "a5b8b09e2f8fcb0bb99d3ccb0958157b40890d69")
-}
-
-var TreeFixture = "MTAwNjQ0IC5naXRpZ25vcmUAMoWKrTw4PtH/Cg+b3yMdVKAMnogxMDA2NDQgQ0hBTkdFTE9HANP/U+BWSp+H2OhLbijlBg5RcAiqMTAwNjQ0IExJQ0VOU0UAwZK9aiTqGrAdeGhuQXyL3Hw9GX8xMDA2NDQgYmluYXJ5LmpwZwDVwPSrgRiXyt8DrsNYrmDSH5HFDTQwMDAwIGdvAKOXcadlH5f69ccuCCJNhX/DUTPbNDAwMDAganNvbgBah35qkGonQ61uRdmcF5NkKq+O2jQwMDAwIHBocABYavVn0Ltedx5JvdlDT14Pt20l+jQwMDAwIHZlbmRvcgDPSqOziXT7fYHzZ8CDD3141lq4aw=="
-
-func (s *ObjectsSuite) TestParseTree(c *C) {
- data, _ := base64.StdEncoding.DecodeString(TreeFixture)
- tree, err := ParseTree(data)
- c.Assert(err, IsNil)
-
- c.Assert(tree.Entries, HasLen, 8)
- c.Assert(tree.Entries[0].Name, Equals, ".gitignore")
- c.Assert(tree.Entries[0].Hash.String(), Equals, "32858aad3c383ed1ff0a0f9bdf231d54a00c9e88")
-}
-
-func (s *ObjectsSuite) TestTreeHash(c *C) {
- data, _ := base64.StdEncoding.DecodeString(TreeFixture)
- tree, err := ParseTree(data)
-
- c.Assert(err, IsNil)
- c.Assert(tree.Hash().String(), Equals, "a8d315b2b1c615d43042c3a62402b8a54288cf5c")
-}
-
-func (s *ObjectsSuite) TestBlobHash(c *C) {
- blob, err := ParseBlob([]byte{'F', 'O', 'O'})
- c.Assert(err, IsNil)
-
- c.Assert(blob.Len, Equals, 3)
- c.Assert(blob.Hash().String(), Equals, "d96c7efbfec2814ae0301ad054dc8d9fc416c9b5")
-}
-
-func (s *ObjectsSuite) TestParseSignature(c *C) {
- cases := map[string]Signature{
- `Foo Bar <foo@bar.com> 1257894000 +0100`: {
- Name: "Foo Bar",
- Email: "foo@bar.com",
- When: time.Unix(1257894000, 0),
- },
- `Foo Bar <> 1257894000 +0100`: {
- Name: "Foo Bar",
- Email: "",
- When: time.Unix(1257894000, 0),
- },
- ` <> 1257894000`: {
- Name: "",
- Email: "",
- When: time.Unix(1257894000, 0),
- },
- `Foo Bar <foo@bar.com>`: {
- Name: "Foo Bar",
- Email: "foo@bar.com",
- When: time.Time{},
- },
- ``: {
- Name: "",
- Email: "",
- When: time.Time{},
- },
- `<`: {
- Name: "",
- Email: "",
- When: time.Time{},
- },
- }
-
- for raw, exp := range cases {
- got := ParseSignature([]byte(raw))
- c.Assert(got.Name, Equals, exp.Name)
- c.Assert(got.Email, Equals, exp.Email)
- c.Assert(got.When.Unix(), Equals, exp.When.Unix())
- }
-}
diff --git a/formats/packfile/packfile.go b/formats/packfile/packfile.go
deleted file mode 100644
index d70f396..0000000
--- a/formats/packfile/packfile.go
+++ /dev/null
@@ -1,82 +0,0 @@
-package packfile
-
-import "fmt"
-
-type Packfile struct {
- Version uint32
- Size int64
- ObjectCount int
- Checksum []byte
- Commits map[Hash]*Commit
- Trees map[Hash]*Tree
- Blobs map[Hash]*Blob
-}
-
-func NewPackfile() *Packfile {
- return &Packfile{
- Commits: make(map[Hash]*Commit, 0),
- Trees: make(map[Hash]*Tree, 0),
- Blobs: make(map[Hash]*Blob, 0),
- }
-}
-
-type BlobEntry struct {
- path string
- *Blob
-}
-
-type SubtreeEntry struct {
- path string
- *Tree
- TreeCh
-}
-
-type treeEntry interface {
- isTreeEntry()
- Path() string
-}
-
-func (b BlobEntry) isTreeEntry() {}
-func (b BlobEntry) Path() string { return b.path }
-func (b SubtreeEntry) isTreeEntry() {}
-func (b SubtreeEntry) Path() string { return b.path }
-
-type TreeCh <-chan treeEntry
-
-func (p *Packfile) WalkCommit(commitHash Hash) (TreeCh, error) {
- commit, ok := p.Commits[commitHash]
- if !ok {
- return nil, fmt.Errorf("Unable to find %q commit", commitHash)
- }
-
- return p.WalkTree(p.Trees[commit.Tree]), nil
-}
-
-func (p *Packfile) WalkTree(tree *Tree) TreeCh {
- return p.walkTree(tree, "")
-}
-
-func (p *Packfile) walkTree(tree *Tree, pathPrefix string) TreeCh {
- ch := make(chan treeEntry)
-
- if tree == nil {
- close(ch)
- return ch
- }
-
- go func() {
- defer func() {
- close(ch)
- }()
- for _, e := range tree.Entries {
- path := pathPrefix + e.Name
- if blob, ok := p.Blobs[e.Hash]; ok {
- ch <- BlobEntry{path, blob}
- } else if subtree, ok := p.Trees[e.Hash]; ok {
- ch <- SubtreeEntry{path, subtree, p.walkTree(subtree, path+"/")}
- }
- }
- }()
-
- return ch
-}
diff --git a/formats/packfile/reader.go b/formats/packfile/reader.go
index c355e12..6ccf384 100644
--- a/formats/packfile/reader.go
+++ b/formats/packfile/reader.go
@@ -5,15 +5,29 @@ import (
"encoding/binary"
"fmt"
"io"
+ "io/ioutil"
+
+ "gopkg.in/src-d/go-git.v2/common"
"github.com/klauspost/compress/zlib"
)
type Format int
+var (
+ EmptyRepositoryErr = newError("empty repository")
+ UnsupportedVersionErr = newError("unsupported packfile version")
+ MaxObjectsLimitReachedErr = newError("max. objects limit reached")
+ MalformedPackfileErr = newError("malformed pack file, does not start with 'PACK'")
+ InvalidObjectErr = newError("invalid git object")
+ PatchingErr = newError("patching error")
+ PackEntryNotFoundErr = newError("can't find a pack entry")
+ ObjectNotFoundErr = newError("can't find a object")
+ ZLibErr = newError("zlib reading error")
+)
+
const (
DefaultMaxObjectsLimit = 1 << 20
- DefaultMaxObjectSize = 1 << 32 // 4GB
VersionSupported = 2
UnknownFormat Format = 0
@@ -21,7 +35,8 @@ const (
REFDeltaFormat Format = 2
)
-type PackfileReader struct {
+// Reader reads a packfile from a binary string splitting it on objects
+type Reader struct {
// MaxObjectsLimit is the limit of objects to be load in the packfile, if
// a packfile excess this number an error is throw, the default value
// is defined by DefaultMaxObjectsLimit, usually the default limit is more
@@ -29,116 +44,98 @@ type PackfileReader struct {
// where the number of object is bigger the memory can be exhausted.
MaxObjectsLimit uint32
- // MaxObjectSize is the maximum size in bytes, reading objects with a bigger
- // size cause a error. The default value is defined by DefaultMaxObjectSize
- MaxObjectSize uint64
-
// Format specifies if we are using ref-delta's or ofs-delta's, choosing the
// correct format the memory usage is optimized
// https://github.com/git/git/blob/8d530c4d64ffcc853889f7b385f554d53db375ed/Documentation/technical/protocol-capabilities.txt#L154
Format Format
r *trackingReader
- objects map[Hash]*RAWObject
- offsets map[int]*RAWObject
+ s common.ObjectStorage
+ offsets map[int64]common.Hash
}
-func NewPackfileReader(r io.Reader, fn ContentCallback) (*PackfileReader, error) {
- return &PackfileReader{
+// NewReader returns a new Reader that reads from a io.Reader
+func NewReader(r io.Reader) *Reader {
+ return &Reader{
MaxObjectsLimit: DefaultMaxObjectsLimit,
- MaxObjectSize: DefaultMaxObjectSize,
r: &trackingReader{r: r},
- objects: make(map[Hash]*RAWObject, 0),
- offsets: make(map[int]*RAWObject, 0),
- }, nil
+ offsets: make(map[int64]common.Hash, 0),
+ }
}
-func (pr *PackfileReader) Read() (chan *RAWObject, error) {
- if err := pr.validateHeader(); err != nil {
+// Read reads the objects and stores it at the ObjectStorage
+func (r *Reader) Read(s common.ObjectStorage) (int64, error) {
+ r.s = s
+ if err := r.validateHeader(); err != nil {
if err == io.EOF {
- // This is an empty repo. It's OK.
- return nil, nil
+ return -1, EmptyRepositoryErr
}
- return nil, err
+ return -1, err
}
- version, err := pr.readInt32()
+ version, err := r.readInt32()
if err != nil {
- return nil, err
+ return -1, err
}
if version > VersionSupported {
- return nil, NewError("unsupported packfile version %d", version)
+ return -1, UnsupportedVersionErr
}
- count, err := pr.readInt32()
+ count, err := r.readInt32()
if err != nil {
- return nil, err
+ return -1, err
}
- if count > pr.MaxObjectsLimit {
- return nil, NewError("too many objects %d, limit is %d", count, pr.MaxObjectsLimit)
+ if count > r.MaxObjectsLimit {
+ return -1, MaxObjectsLimitReachedErr
}
- ch := make(chan *RAWObject, 1)
- go pr.readObjects(ch, count)
-
- // packfile.Size = int64(pr.r.Pos())
-
- return ch, nil
+ return r.r.position, r.readObjects(count)
}
-func (pr *PackfileReader) validateHeader() error {
+func (r *Reader) validateHeader() error {
var header = make([]byte, 4)
- if _, err := pr.r.Read(header); err != nil {
+ if _, err := r.r.Read(header); err != nil {
return err
}
if !bytes.Equal(header, []byte{'P', 'A', 'C', 'K'}) {
- return NewError("Pack file does not start with 'PACK'")
+ return MalformedPackfileErr
}
return nil
}
-func (pr *PackfileReader) readInt32() (uint32, error) {
+func (r *Reader) readInt32() (uint32, error) {
var value uint32
- if err := binary.Read(pr.r, binary.BigEndian, &value); err != nil {
+ if err := binary.Read(r.r, binary.BigEndian, &value); err != nil {
return 0, err
}
return value, nil
}
-func (pr *PackfileReader) readObjects(ch chan *RAWObject, count uint32) error {
+func (r *Reader) readObjects(count uint32) error {
// This code has 50-80 µs of overhead per object not counting zlib inflation.
// Together with zlib inflation, it's 400-410 µs for small objects.
// That's 1 sec for ~2450 objects, ~4.20 MB, or ~250 ms per MB,
// of which 12-20 % is _not_ zlib inflation (ie. is our code).
- defer func() {
- close(ch)
- }()
-
for i := 0; i < int(count); i++ {
- var pos = pr.Pos()
- obj, err := pr.newRAWObject()
+ start := r.r.position
+ obj, err := r.newRAWObject()
if err != nil && err != io.EOF {
fmt.Println(err)
return err
}
- if pr.Format == UnknownFormat || pr.Format == OFSDeltaFormat {
- pr.offsets[pos] = obj
- }
-
- if pr.Format == UnknownFormat || pr.Format == REFDeltaFormat {
- pr.objects[obj.Hash] = obj
+ if r.Format == UnknownFormat || r.Format == OFSDeltaFormat {
+ r.offsets[start] = obj.Hash()
}
- ch <- obj
-
+ r.s.Set(obj)
if err == io.EOF {
break
}
@@ -147,154 +144,147 @@ func (pr *PackfileReader) readObjects(ch chan *RAWObject, count uint32) error {
return nil
}
-func (pr *PackfileReader) Pos() int { return pr.r.Pos() }
-
-func (pr *PackfileReader) newRAWObject() (*RAWObject, error) {
- raw := &RAWObject{}
- steps := 0
+func (r *Reader) newRAWObject() (common.Object, error) {
+ raw := r.s.New()
+ var steps int64
var buf [1]byte
- if _, err := pr.r.Read(buf[:]); err != nil {
+ if _, err := r.r.Read(buf[:]); err != nil {
return nil, err
}
- raw.Type = ObjectType((buf[0] >> 4) & 7)
- raw.Size = uint64(buf[0] & 15)
+ typ := common.ObjectType((buf[0] >> 4) & 7)
+ size := int64(buf[0] & 15)
steps++ // byte we just read to get `o.typ` and `o.size`
var shift uint = 4
for buf[0]&0x80 == 0x80 {
- if _, err := pr.r.Read(buf[:]); err != nil {
+ if _, err := r.r.Read(buf[:]); err != nil {
return nil, err
}
- raw.Size += uint64(buf[0]&0x7f) << shift
+ size += int64(buf[0]&0x7f) << shift
steps++ // byte we just read to update `o.size`
shift += 7
}
+ raw.SetType(typ)
+ raw.SetSize(size)
+
var err error
- switch raw.Type {
- case REFDeltaObject:
- err = pr.readREFDelta(raw)
- case OFSDeltaObject:
- err = pr.readOFSDelta(raw, steps)
- case CommitObject, TreeObject, BlobObject, TagObject:
- err = pr.readObject(raw)
+ switch raw.Type() {
+ case common.REFDeltaObject:
+ err = r.readREFDelta(raw)
+ case common.OFSDeltaObject:
+ err = r.readOFSDelta(raw, steps)
+ case common.CommitObject, common.TreeObject, common.BlobObject, common.TagObject:
+ err = r.readObject(raw)
default:
- err = NewError("Invalid git object tag %q", raw.Type)
+ err = InvalidObjectErr.n("tag %q", raw.Type)
}
return raw, err
}
-func (pr *PackfileReader) readREFDelta(raw *RAWObject) error {
- var ref Hash
- if _, err := pr.r.Read(ref[:]); err != nil {
+func (r *Reader) readREFDelta(raw common.Object) error {
+ var ref common.Hash
+ if _, err := r.r.Read(ref[:]); err != nil {
return err
}
- buf, err := pr.inflate(raw.Size)
- if err != nil {
+ buf := bytes.NewBuffer(nil)
+ if err := r.inflate(buf); err != nil {
return err
}
- referenced, ok := pr.objects[ref]
+ referenced, ok := r.s.Get(ref)
if !ok {
- fmt.Println("not found", ref)
- } else {
- patched := PatchDelta(referenced.Bytes, buf[:])
- if patched == nil {
- return NewError("error while patching %x", ref)
- }
+ return ObjectNotFoundErr.n("%s", ref)
+ }
- raw.Type = referenced.Type
- raw.Bytes = patched
- raw.Size = uint64(len(patched))
- raw.Hash = ComputeHash(raw.Type, raw.Bytes)
+ d, _ := ioutil.ReadAll(referenced.Reader())
+ patched := patchDelta(d, buf.Bytes())
+ if patched == nil {
+ return PatchingErr.n("hash %q", ref)
}
+ raw.SetType(referenced.Type())
+ raw.SetSize(int64(len(patched)))
+ raw.Writer().Write(patched)
+
return nil
}
-func (pr *PackfileReader) readOFSDelta(raw *RAWObject, steps int) error {
- var pos = pr.Pos()
-
- // read negative offset
- offset, err := decodeOffset(pr.r, steps)
+func (r *Reader) readOFSDelta(raw common.Object, steps int64) error {
+ start := r.r.position
+ offset, err := decodeOffset(r.r, steps)
if err != nil {
return err
}
- buf, err := pr.inflate(raw.Size)
- if err != nil {
+ buf := bytes.NewBuffer(nil)
+ if err := r.inflate(buf); err != nil {
return err
}
- ref, ok := pr.offsets[pos+offset]
+ ref, ok := r.offsets[start+offset]
if !ok {
- return NewError("can't find a pack entry at %d", pos+offset)
+ return PackEntryNotFoundErr.n("offset %d", start+offset)
}
- patched := PatchDelta(ref.Bytes, buf)
+ referenced, _ := r.s.Get(ref)
+ d, _ := ioutil.ReadAll(referenced.Reader())
+ patched := patchDelta(d, buf.Bytes())
if patched == nil {
- return NewError("error while patching %q", ref)
+ return PatchingErr.n("hash %q", ref)
}
- raw.Type = ref.Type
- raw.Bytes = patched
- raw.Size = uint64(len(patched))
- raw.Hash = ComputeHash(raw.Type, raw.Bytes)
+ raw.SetType(referenced.Type())
+ raw.SetSize(int64(len(patched)))
+ raw.Writer().Write(patched)
return nil
}
-func (pr *PackfileReader) readObject(raw *RAWObject) error {
- buf, err := pr.inflate(raw.Size)
- if err != nil {
- return err
- }
-
- raw.Bytes = buf
- raw.Hash = ComputeHash(raw.Type, raw.Bytes)
-
- return nil
+func (r *Reader) readObject(raw common.Object) error {
+ return r.inflate(raw.Writer())
}
-func (pr *PackfileReader) inflate(size uint64) ([]byte, error) {
- zr, err := zlib.NewReader(pr.r)
+func (r *Reader) inflate(w io.Writer) error {
+ zr, err := zlib.NewReader(r.r)
if err != nil {
if err == zlib.ErrHeader {
- return nil, zlib.ErrHeader
+ return zlib.ErrHeader
}
- return nil, NewError("error opening packfile's object zlib: %v", err)
+ return ZLibErr.n("%s", err)
}
defer zr.Close()
- if size > pr.MaxObjectSize {
- return nil, NewError("the object size %q exceeed the allowed limit: %q",
- size, pr.MaxObjectSize)
- }
-
- var buf bytes.Buffer
- io.Copy(&buf, zr) // also: io.CopyN(&buf, zr, int64(o.size))
-
- if buf.Len() != int(size) {
- return nil, NewError(
- "inflated size mismatch, expected %d, got %d", size, buf.Len())
- }
-
- return buf.Bytes(), nil
+ _, err = io.Copy(w, zr)
+ return err
}
type ReaderError struct {
- Msg string // description of error
+ reason, additional string
+}
+
+func newError(reason string) *ReaderError {
+ return &ReaderError{reason: reason}
}
-func NewError(format string, args ...interface{}) error {
- return &ReaderError{Msg: fmt.Sprintf(format, args...)}
+func (e *ReaderError) Error() string {
+ if e.additional == "" {
+ return e.reason
+ }
+
+ return fmt.Sprintf("%s: %s", e.reason, e.additional)
}
-func (e *ReaderError) Error() string { return e.Msg }
+func (e *ReaderError) n(format string, args ...interface{}) *ReaderError {
+ return &ReaderError{
+ reason: e.reason,
+ additional: fmt.Sprintf(format, args...),
+ }
+}
diff --git a/formats/packfile/reader_test.go b/formats/packfile/reader_test.go
index 917eee1..14c092e 100644
--- a/formats/packfile/reader_test.go
+++ b/formats/packfile/reader_test.go
@@ -6,13 +6,17 @@ import (
"fmt"
"os"
"runtime"
+ "testing"
"time"
- "github.com/dustin/go-humanize"
+ "gopkg.in/src-d/go-git.v2/common"
+ "github.com/dustin/go-humanize"
. "gopkg.in/check.v1"
)
+func Test(t *testing.T) { TestingT(t) }
+
type ReaderSuite struct{}
var _ = Suite(&ReaderSuite{})
@@ -23,13 +27,13 @@ func (s *ReaderSuite) TestReadPackfile(c *C) {
data, _ := base64.StdEncoding.DecodeString(packFileWithEmptyObjects)
d := bytes.NewReader(data)
- r, err := NewPackfileReader(d, nil)
- c.Assert(err, IsNil)
+ r := NewReader(d)
- ch, err := r.Read()
+ storage := common.NewRAWObjectStorage()
+ _, err := r.Read(storage)
c.Assert(err, IsNil)
- AssertObjects(c, ch, []string{
+ AssertObjects(c, storage, []string{
"778c85ff95b5514fea0ba4c7b6a029d32e2c3b96",
"db4002e880a08bf6cc7217512ad937f1ac8824a2",
"551fe11a9ef992763b7e0be4500cf7169f2f8575",
@@ -56,14 +60,14 @@ func (s *ReaderSuite) testReadPackfileGitFixture(c *C, file string, f Format) {
d, err := os.Open(file)
c.Assert(err, IsNil)
- r, err := NewPackfileReader(d, nil)
- c.Assert(err, IsNil)
-
+ r := NewReader(d)
r.Format = f
- ch, err := r.Read()
+
+ storage := common.NewRAWObjectStorage()
+ _, err = r.Read(storage)
c.Assert(err, IsNil)
- AssertObjects(c, ch, []string{
+ AssertObjects(c, storage, []string{
"918c48b83bd081e863dbe1b80f8998f058cd8294",
"af2d6a6954d532f8ffb47615169c8fdf9d383a1a",
"1669dce138d9b841a518c64b10914d88f5e488ea",
@@ -95,14 +99,12 @@ func (s *ReaderSuite) testReadPackfileGitFixture(c *C, file string, f Format) {
})
}
-func AssertObjects(c *C, ch chan *RAWObject, expects []string) {
+func AssertObjects(c *C, s *common.RAWObjectStorage, expects []string) {
+ c.Assert(len(expects), Equals, len(s.Objects))
for _, expected := range expects {
- obtained := <-ch
- c.Assert(obtained.Hash.String(), Equals, expected)
-
- computed := ComputeHash(obtained.Type, obtained.Bytes)
- c.Assert(computed.String(), Equals, expected)
- c.Assert(obtained.Bytes, HasLen, int(obtained.Size))
+ obtained, ok := s.Get(common.NewHash(expected))
+ c.Assert(ok, Equals, true)
+ c.Assert(obtained.Hash().String(), Equals, expected)
}
}
@@ -150,7 +152,7 @@ func (s *ReaderSuite) _TestMemoryOFS(c *C) {
fmt.Println("HeapAlloc", a.HeapAlloc-b.HeapAlloc, humanize.Bytes(a.HeapAlloc-b.HeapAlloc))
fmt.Println("HeapSys", a.HeapSys, humanize.Bytes(a.HeapSys-b.HeapSys))
- fmt.Println("objects", len(p))
+ fmt.Println("objects", len(p.Objects))
fmt.Println("time", time.Since(start))
}
@@ -168,26 +170,20 @@ func (s *ReaderSuite) _TestMemoryREF(c *C) {
fmt.Println("HeapAlloc", a.HeapAlloc-b.HeapAlloc, humanize.Bytes(a.HeapAlloc-b.HeapAlloc))
fmt.Println("HeapSys", a.HeapSys, humanize.Bytes(a.HeapSys-b.HeapSys))
- fmt.Println("objects", len(p))
+ fmt.Println("objects", len(p.Objects))
fmt.Println("time", time.Since(start))
}
-func readFromFile(c *C, file string, f Format) []*RAWObject {
+func readFromFile(c *C, file string, f Format) *common.RAWObjectStorage {
d, err := os.Open(file)
c.Assert(err, IsNil)
- r, err := NewPackfileReader(d, nil)
- c.Assert(err, IsNil)
-
+ r := NewReader(d)
r.Format = f
- ch, err := r.Read()
- c.Assert(err, IsNil)
- c.Assert(ch, NotNil)
- var objs []*RAWObject
- for o := range ch {
- objs = append(objs, o)
- }
+ storage := common.NewRAWObjectStorage()
+ _, err = r.Read(storage)
+ c.Assert(err, IsNil)
- return objs
+ return storage
}