aboutsummaryrefslogtreecommitdiffstats
path: root/plumbing/format/packfile
diff options
context:
space:
mode:
Diffstat (limited to 'plumbing/format/packfile')
-rw-r--r--plumbing/format/packfile/common.go8
-rw-r--r--plumbing/format/packfile/common_test.go14
-rw-r--r--plumbing/format/packfile/fsobject.go2
-rw-r--r--plumbing/format/packfile/packfile.go107
-rw-r--r--plumbing/format/packfile/parser.go98
-rw-r--r--plumbing/format/packfile/parser_test.go50
-rw-r--r--plumbing/format/packfile/scanner.go46
-rw-r--r--plumbing/format/packfile/scanner_test.go17
8 files changed, 205 insertions, 137 deletions
diff --git a/plumbing/format/packfile/common.go b/plumbing/format/packfile/common.go
index 2b4aceb..0d9ed54 100644
--- a/plumbing/format/packfile/common.go
+++ b/plumbing/format/packfile/common.go
@@ -51,7 +51,13 @@ func WritePackfileToObjectStorage(
}
defer ioutil.CheckClose(w, &err)
- _, err = io.Copy(w, packfile)
+
+ var n int64
+ n, err = io.Copy(w, packfile)
+ if err == nil && n == 0 {
+ return ErrEmptyPackfile
+ }
+
return err
}
diff --git a/plumbing/format/packfile/common_test.go b/plumbing/format/packfile/common_test.go
index 387c0d1..eafc617 100644
--- a/plumbing/format/packfile/common_test.go
+++ b/plumbing/format/packfile/common_test.go
@@ -1,15 +1,29 @@
package packfile
import (
+ "bytes"
"testing"
"gopkg.in/src-d/go-git.v4/plumbing"
+ "gopkg.in/src-d/go-git.v4/storage/memory"
. "gopkg.in/check.v1"
)
func Test(t *testing.T) { TestingT(t) }
+type CommonSuite struct{}
+
+var _ = Suite(&CommonSuite{})
+
+func (s *CommonSuite) TestEmptyUpdateObjectStorage(c *C) {
+ var buf bytes.Buffer
+ sto := memory.NewStorage()
+
+ err := UpdateObjectStorage(sto, &buf)
+ c.Assert(err, Equals, ErrEmptyPackfile)
+}
+
func newObject(t plumbing.ObjectType, cont []byte) plumbing.EncodedObject {
o := plumbing.MemoryObject{}
o.SetType(t)
diff --git a/plumbing/format/packfile/fsobject.go b/plumbing/format/packfile/fsobject.go
index 330cb73..a268bce 100644
--- a/plumbing/format/packfile/fsobject.go
+++ b/plumbing/format/packfile/fsobject.go
@@ -48,7 +48,7 @@ func NewFSObject(
// Reader implements the plumbing.EncodedObject interface.
func (o *FSObject) Reader() (io.ReadCloser, error) {
obj, ok := o.cache.Get(o.hash)
- if ok {
+ if ok && obj != o {
reader, err := obj.Reader()
if err != nil {
return nil, err
diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go
index 0d13066..1e7ef26 100644
--- a/plumbing/format/packfile/packfile.go
+++ b/plumbing/format/packfile/packfile.go
@@ -21,6 +21,16 @@ var (
ErrZLib = NewError("zlib reading error")
)
+// When reading small objects from packfile it is beneficial to do so at
+// once to exploit the buffered I/O. In many cases the objects are so small
+// that they were already loaded to memory when the object header was
+// loaded from the packfile. Wrapping in FSObject would cause this buffered
+// data to be thrown away and then re-read later, with the additional
+// seeking causing reloads from disk. Objects smaller than this threshold
+// are now always read into memory and stored in cache instead of being
+// wrapped in FSObject.
+const smallObjectThreshold = 16 * 1024
+
// Packfile allows retrieving information from inside a packfile.
type Packfile struct {
idxfile.Index
@@ -79,15 +89,7 @@ func (p *Packfile) GetByOffset(o int64) (plumbing.EncodedObject, error) {
}
}
- if _, err := p.s.SeekFromStart(o); err != nil {
- if err == io.EOF || isInvalid(err) {
- return nil, plumbing.ErrObjectNotFound
- }
-
- return nil, err
- }
-
- return p.nextObject()
+ return p.objectAtOffset(o)
}
// GetSizeByOffset retrieves the size of the encoded object from the
@@ -108,66 +110,16 @@ func (p *Packfile) GetSizeByOffset(o int64) (size int64, err error) {
return h.Length, nil
}
-func (p *Packfile) nextObjectHeader() (*ObjectHeader, error) {
- h, err := p.s.NextObjectHeader()
+func (p *Packfile) objectHeaderAtOffset(offset int64) (*ObjectHeader, error) {
+ h, err := p.s.SeekObjectHeader(offset)
p.s.pendingObject = nil
return h, err
}
-func (p *Packfile) getObjectData(
- h *ObjectHeader,
-) (typ plumbing.ObjectType, size int64, err error) {
- switch h.Type {
- case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject:
- typ = h.Type
- size = h.Length
- case plumbing.REFDeltaObject, plumbing.OFSDeltaObject:
- buf := bufPool.Get().(*bytes.Buffer)
- buf.Reset()
- defer bufPool.Put(buf)
-
- _, _, err = p.s.NextObject(buf)
- if err != nil {
- return
- }
-
- delta := buf.Bytes()
- _, delta = decodeLEB128(delta) // skip src size
- sz, _ := decodeLEB128(delta)
- size = int64(sz)
-
- var offset int64
- if h.Type == plumbing.REFDeltaObject {
- offset, err = p.FindOffset(h.Reference)
- if err != nil {
- return
- }
- } else {
- offset = h.OffsetReference
- }
-
- if baseType, ok := p.offsetToType[offset]; ok {
- typ = baseType
- } else {
- if _, err = p.s.SeekFromStart(offset); err != nil {
- return
- }
-
- h, err = p.nextObjectHeader()
- if err != nil {
- return
- }
-
- typ, _, err = p.getObjectData(h)
- if err != nil {
- return
- }
- }
- default:
- err = ErrInvalidObject.AddDetails("type %q", h.Type)
- }
-
- return
+func (p *Packfile) nextObjectHeader() (*ObjectHeader, error) {
+ h, err := p.s.NextObjectHeader()
+ p.s.pendingObject = nil
+ return h, err
}
func (p *Packfile) getObjectSize(h *ObjectHeader) (int64, error) {
@@ -210,11 +162,7 @@ func (p *Packfile) getObjectType(h *ObjectHeader) (typ plumbing.ObjectType, err
if baseType, ok := p.offsetToType[offset]; ok {
typ = baseType
} else {
- if _, err = p.s.SeekFromStart(offset); err != nil {
- return
- }
-
- h, err = p.nextObjectHeader()
+ h, err = p.objectHeaderAtOffset(offset)
if err != nil {
return
}
@@ -231,8 +179,8 @@ func (p *Packfile) getObjectType(h *ObjectHeader) (typ plumbing.ObjectType, err
return
}
-func (p *Packfile) nextObject() (plumbing.EncodedObject, error) {
- h, err := p.nextObjectHeader()
+func (p *Packfile) objectAtOffset(offset int64) (plumbing.EncodedObject, error) {
+ h, err := p.objectHeaderAtOffset(offset)
if err != nil {
if err == io.EOF || isInvalid(err) {
return nil, plumbing.ErrObjectNotFound
@@ -246,6 +194,13 @@ func (p *Packfile) nextObject() (plumbing.EncodedObject, error) {
return p.getNextObject(h)
}
+ // If the object is not a delta and it's small enough then read it
+ // completely into memory now since it is already read from disk
+ // into buffer anyway.
+ if h.Length <= smallObjectThreshold && h.Type != plumbing.OFSDeltaObject && h.Type != plumbing.REFDeltaObject {
+ return p.getNextObject(h)
+ }
+
hash, err := p.FindHash(h.Offset)
if err != nil {
return nil, err
@@ -289,11 +244,7 @@ func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) {
}
}
- if _, err := p.s.SeekFromStart(offset); err != nil {
- return nil, err
- }
-
- h, err := p.nextObjectHeader()
+ h, err := p.objectHeaderAtOffset(offset)
if err != nil {
return nil, err
}
@@ -385,8 +336,6 @@ func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset
if err != nil {
return err
}
-
- p.cachePut(base)
}
obj.SetType(base.Type())
diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go
index 28582b5..71cbba9 100644
--- a/plumbing/format/packfile/parser.go
+++ b/plumbing/format/packfile/parser.go
@@ -38,15 +38,14 @@ type Observer interface {
// Parser decodes a packfile and calls any observer associated to it. Is used
// to generate indexes.
type Parser struct {
- storage storer.EncodedObjectStorer
- scanner *Scanner
- count uint32
- oi []*objectInfo
- oiByHash map[plumbing.Hash]*objectInfo
- oiByOffset map[int64]*objectInfo
- hashOffset map[plumbing.Hash]int64
- pendingRefDeltas map[plumbing.Hash][]*objectInfo
- checksum plumbing.Hash
+ storage storer.EncodedObjectStorer
+ scanner *Scanner
+ count uint32
+ oi []*objectInfo
+ oiByHash map[plumbing.Hash]*objectInfo
+ oiByOffset map[int64]*objectInfo
+ hashOffset map[plumbing.Hash]int64
+ checksum plumbing.Hash
cache *cache.BufferLRU
// delta content by offset, only used if source is not seekable
@@ -78,13 +77,12 @@ func NewParserWithStorage(
}
return &Parser{
- storage: storage,
- scanner: scanner,
- ob: ob,
- count: 0,
- cache: cache.NewBufferLRUDefault(),
- pendingRefDeltas: make(map[plumbing.Hash][]*objectInfo),
- deltas: deltas,
+ storage: storage,
+ scanner: scanner,
+ ob: ob,
+ count: 0,
+ cache: cache.NewBufferLRUDefault(),
+ deltas: deltas,
}, nil
}
@@ -150,10 +148,6 @@ func (p *Parser) Parse() (plumbing.Hash, error) {
return plumbing.ZeroHash, err
}
- if len(p.pendingRefDeltas) > 0 {
- return plumbing.ZeroHash, ErrReferenceDeltaNotFound
- }
-
if err := p.onFooter(p.checksum); err != nil {
return plumbing.ZeroHash, err
}
@@ -205,18 +199,21 @@ func (p *Parser) indexObjects() error {
parent.Children = append(parent.Children, ota)
case plumbing.REFDeltaObject:
delta = true
-
parent, ok := p.oiByHash[oh.Reference]
- if ok {
- ota = newDeltaObject(oh.Offset, oh.Length, t, parent)
- parent.Children = append(parent.Children, ota)
- } else {
- ota = newBaseObject(oh.Offset, oh.Length, t)
- p.pendingRefDeltas[oh.Reference] = append(
- p.pendingRefDeltas[oh.Reference],
- ota,
- )
+ if !ok {
+ // can't find referenced object in this pack file
+ // this must be a "thin" pack.
+ parent = &objectInfo{ //Placeholder parent
+ SHA1: oh.Reference,
+ ExternalRef: true, // mark as an external reference that must be resolved
+ Type: plumbing.AnyObject,
+ DiskType: plumbing.AnyObject,
+ }
+ p.oiByHash[oh.Reference] = parent
}
+ ota = newDeltaObject(oh.Offset, oh.Length, t, parent)
+ parent.Children = append(parent.Children, ota)
+
default:
ota = newBaseObject(oh.Offset, oh.Length, t)
}
@@ -297,16 +294,20 @@ func (p *Parser) resolveDeltas() error {
return nil
}
-func (p *Parser) get(o *objectInfo) ([]byte, error) {
- b, ok := p.cache.Get(o.Offset)
+func (p *Parser) get(o *objectInfo) (b []byte, err error) {
+ var ok bool
+ if !o.ExternalRef { // skip cache check for placeholder parents
+ b, ok = p.cache.Get(o.Offset)
+ }
+
// If it's not on the cache and is not a delta we can try to find it in the
- // storage, if there's one.
+ // storage, if there's one. External refs must enter here.
if !ok && p.storage != nil && !o.Type.IsDelta() {
- var err error
e, err := p.storage.EncodedObject(plumbing.AnyObject, o.SHA1)
if err != nil {
return nil, err
}
+ o.Type = e.Type()
r, err := e.Reader()
if err != nil {
@@ -323,6 +324,11 @@ func (p *Parser) get(o *objectInfo) ([]byte, error) {
return b, nil
}
+ if o.ExternalRef {
+ // we were not able to resolve a ref in a thin pack
+ return nil, ErrReferenceDeltaNotFound
+ }
+
var data []byte
if o.DiskType.IsDelta() {
base, err := p.get(o.Parent)
@@ -335,7 +341,6 @@ func (p *Parser) get(o *objectInfo) ([]byte, error) {
return nil, err
}
} else {
- var err error
data, err = p.readData(o)
if err != nil {
return nil, err
@@ -367,14 +372,6 @@ func (p *Parser) resolveObject(
return nil, err
}
- if pending, ok := p.pendingRefDeltas[o.SHA1]; ok {
- for _, po := range pending {
- po.Parent = o
- o.Children = append(o.Children, po)
- }
- delete(p.pendingRefDeltas, o.SHA1)
- }
-
if p.storage != nil {
obj := new(plumbing.MemoryObject)
obj.SetSize(o.Size())
@@ -401,11 +398,7 @@ func (p *Parser) readData(o *objectInfo) ([]byte, error) {
return data, nil
}
- if _, err := p.scanner.SeekFromStart(o.Offset); err != nil {
- return nil, err
- }
-
- if _, err := p.scanner.NextObjectHeader(); err != nil {
+ if _, err := p.scanner.SeekObjectHeader(o.Offset); err != nil {
return nil, err
}
@@ -447,10 +440,11 @@ func getSHA1(t plumbing.ObjectType, data []byte) (plumbing.Hash, error) {
}
type objectInfo struct {
- Offset int64
- Length int64
- Type plumbing.ObjectType
- DiskType plumbing.ObjectType
+ Offset int64
+ Length int64
+ Type plumbing.ObjectType
+ DiskType plumbing.ObjectType
+ ExternalRef bool // indicates this is an external reference in a thin pack file
Crc32 uint32
diff --git a/plumbing/format/packfile/parser_test.go b/plumbing/format/packfile/parser_test.go
index 012a140..6e7c84b 100644
--- a/plumbing/format/packfile/parser_test.go
+++ b/plumbing/format/packfile/parser_test.go
@@ -1,10 +1,13 @@
package packfile_test
import (
+ "io"
"testing"
+ git "gopkg.in/src-d/go-git.v4"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/format/packfile"
+ "gopkg.in/src-d/go-git.v4/plumbing/storer"
. "gopkg.in/check.v1"
"gopkg.in/src-d/go-git-fixtures.v3"
@@ -74,6 +77,53 @@ func (s *ParserSuite) TestParserHashes(c *C) {
c.Assert(obs.objects, DeepEquals, objs)
}
+func (s *ParserSuite) TestThinPack(c *C) {
+
+ // Initialize an empty repository
+ fs, err := git.PlainInit(c.MkDir(), true)
+ c.Assert(err, IsNil)
+
+ // Try to parse a thin pack without having the required objects in the repo to
+ // see if the correct errors are returned
+ thinpack := fixtures.ByTag("thinpack").One()
+ scanner := packfile.NewScanner(thinpack.Packfile())
+ parser, err := packfile.NewParserWithStorage(scanner, fs.Storer) // ParserWithStorage writes to the storer all parsed objects!
+ c.Assert(err, IsNil)
+
+ _, err = parser.Parse()
+ c.Assert(err, Equals, plumbing.ErrObjectNotFound)
+
+ // start over with a clean repo
+ fs, err = git.PlainInit(c.MkDir(), true)
+ c.Assert(err, IsNil)
+
+ // Now unpack a base packfile into our empty repo:
+ f := fixtures.ByURL("https://github.com/spinnaker/spinnaker.git").One()
+ w, err := fs.Storer.(storer.PackfileWriter).PackfileWriter()
+ c.Assert(err, IsNil)
+ _, err = io.Copy(w, f.Packfile())
+ c.Assert(err, IsNil)
+ w.Close()
+
+ // Check that the test object that will come with our thin pack is *not* in the repo
+ _, err = fs.Storer.EncodedObject(plumbing.CommitObject, thinpack.Head)
+ c.Assert(err, Equals, plumbing.ErrObjectNotFound)
+
+ // Now unpack the thin pack:
+ scanner = packfile.NewScanner(thinpack.Packfile())
+ parser, err = packfile.NewParserWithStorage(scanner, fs.Storer) // ParserWithStorage writes to the storer all parsed objects!
+ c.Assert(err, IsNil)
+
+ h, err := parser.Parse()
+ c.Assert(err, IsNil)
+ c.Assert(h, Equals, plumbing.NewHash("1288734cbe0b95892e663221d94b95de1f5d7be8"))
+
+ // Check that our test object is now accessible
+ _, err = fs.Storer.EncodedObject(plumbing.CommitObject, thinpack.Head)
+ c.Assert(err, IsNil)
+
+}
+
type observerObject struct {
hash string
otype plumbing.ObjectType
diff --git a/plumbing/format/packfile/scanner.go b/plumbing/format/packfile/scanner.go
index 6fc183b..614b0d1 100644
--- a/plumbing/format/packfile/scanner.go
+++ b/plumbing/format/packfile/scanner.go
@@ -138,14 +138,52 @@ func (s *Scanner) readCount() (uint32, error) {
return binary.ReadUint32(s.r)
}
+// SeekObjectHeader seeks to specified offset and returns the ObjectHeader
+// for the next object in the reader
+func (s *Scanner) SeekObjectHeader(offset int64) (*ObjectHeader, error) {
+ // if seeking we assume that you are not interested in the header
+ if s.version == 0 {
+ s.version = VersionSupported
+ }
+
+ if _, err := s.r.Seek(offset, io.SeekStart); err != nil {
+ return nil, err
+ }
+
+ h, err := s.nextObjectHeader()
+ if err != nil {
+ return nil, err
+ }
+
+ h.Offset = offset
+ return h, nil
+}
+
// NextObjectHeader returns the ObjectHeader for the next object in the reader
func (s *Scanner) NextObjectHeader() (*ObjectHeader, error) {
- defer s.Flush()
-
if err := s.doPending(); err != nil {
return nil, err
}
+ offset, err := s.r.Seek(0, io.SeekCurrent)
+ if err != nil {
+ return nil, err
+ }
+
+ h, err := s.nextObjectHeader()
+ if err != nil {
+ return nil, err
+ }
+
+ h.Offset = offset
+ return h, nil
+}
+
+// nextObjectHeader returns the ObjectHeader for the next object in the reader
+// without the Offset field
+func (s *Scanner) nextObjectHeader() (*ObjectHeader, error) {
+ defer s.Flush()
+
s.crc.Reset()
h := &ObjectHeader{}
@@ -308,7 +346,7 @@ var byteSlicePool = sync.Pool{
// SeekFromStart sets a new offset from start, returns the old position before
// the change.
func (s *Scanner) SeekFromStart(offset int64) (previous int64, err error) {
- // if seeking we assume that you are not interested on the header
+ // if seeking we assume that you are not interested in the header
if s.version == 0 {
s.version = VersionSupported
}
@@ -385,7 +423,7 @@ type bufferedSeeker struct {
}
func (r *bufferedSeeker) Seek(offset int64, whence int) (int64, error) {
- if whence == io.SeekCurrent {
+ if whence == io.SeekCurrent && offset == 0 {
current, err := r.r.Seek(offset, whence)
if err != nil {
return current, err
diff --git a/plumbing/format/packfile/scanner_test.go b/plumbing/format/packfile/scanner_test.go
index 644d0eb..091b457 100644
--- a/plumbing/format/packfile/scanner_test.go
+++ b/plumbing/format/packfile/scanner_test.go
@@ -118,6 +118,23 @@ func (s *ScannerSuite) TestNextObjectHeaderWithOutReadObjectNonSeekable(c *C) {
c.Assert(n, Equals, f.PackfileHash)
}
+func (s *ScannerSuite) TestSeekObjectHeader(c *C) {
+ r := fixtures.Basic().One().Packfile()
+ p := NewScanner(r)
+
+ h, err := p.SeekObjectHeader(expectedHeadersOFS[4].Offset)
+ c.Assert(err, IsNil)
+ c.Assert(h, DeepEquals, &expectedHeadersOFS[4])
+}
+
+func (s *ScannerSuite) TestSeekObjectHeaderNonSeekable(c *C) {
+ r := io.MultiReader(fixtures.Basic().One().Packfile())
+ p := NewScanner(r)
+
+ _, err := p.SeekObjectHeader(expectedHeadersOFS[4].Offset)
+ c.Assert(err, Equals, ErrSeekNotSupported)
+}
+
var expectedHeadersOFS = []ObjectHeader{
{Type: plumbing.CommitObject, Offset: 12, Length: 254},
{Type: plumbing.OFSDeltaObject, Offset: 186, Length: 93, OffsetReference: 12},