diff options
Diffstat (limited to 'plumbing/format/packfile')
-rw-r--r-- | plumbing/format/packfile/common.go | 8 | ||||
-rw-r--r-- | plumbing/format/packfile/common_test.go | 14 | ||||
-rw-r--r-- | plumbing/format/packfile/fsobject.go | 2 | ||||
-rw-r--r-- | plumbing/format/packfile/packfile.go | 107 | ||||
-rw-r--r-- | plumbing/format/packfile/parser.go | 98 | ||||
-rw-r--r-- | plumbing/format/packfile/parser_test.go | 50 | ||||
-rw-r--r-- | plumbing/format/packfile/scanner.go | 46 | ||||
-rw-r--r-- | plumbing/format/packfile/scanner_test.go | 17 |
8 files changed, 205 insertions, 137 deletions
diff --git a/plumbing/format/packfile/common.go b/plumbing/format/packfile/common.go index 2b4aceb..0d9ed54 100644 --- a/plumbing/format/packfile/common.go +++ b/plumbing/format/packfile/common.go @@ -51,7 +51,13 @@ func WritePackfileToObjectStorage( } defer ioutil.CheckClose(w, &err) - _, err = io.Copy(w, packfile) + + var n int64 + n, err = io.Copy(w, packfile) + if err == nil && n == 0 { + return ErrEmptyPackfile + } + return err } diff --git a/plumbing/format/packfile/common_test.go b/plumbing/format/packfile/common_test.go index 387c0d1..eafc617 100644 --- a/plumbing/format/packfile/common_test.go +++ b/plumbing/format/packfile/common_test.go @@ -1,15 +1,29 @@ package packfile import ( + "bytes" "testing" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/storage/memory" . "gopkg.in/check.v1" ) func Test(t *testing.T) { TestingT(t) } +type CommonSuite struct{} + +var _ = Suite(&CommonSuite{}) + +func (s *CommonSuite) TestEmptyUpdateObjectStorage(c *C) { + var buf bytes.Buffer + sto := memory.NewStorage() + + err := UpdateObjectStorage(sto, &buf) + c.Assert(err, Equals, ErrEmptyPackfile) +} + func newObject(t plumbing.ObjectType, cont []byte) plumbing.EncodedObject { o := plumbing.MemoryObject{} o.SetType(t) diff --git a/plumbing/format/packfile/fsobject.go b/plumbing/format/packfile/fsobject.go index 330cb73..a268bce 100644 --- a/plumbing/format/packfile/fsobject.go +++ b/plumbing/format/packfile/fsobject.go @@ -48,7 +48,7 @@ func NewFSObject( // Reader implements the plumbing.EncodedObject interface. func (o *FSObject) Reader() (io.ReadCloser, error) { obj, ok := o.cache.Get(o.hash) - if ok { + if ok && obj != o { reader, err := obj.Reader() if err != nil { return nil, err diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index 0d13066..1e7ef26 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -21,6 +21,16 @@ var ( ErrZLib = NewError("zlib reading error") ) +// When reading small objects from packfile it is beneficial to do so at +// once to exploit the buffered I/O. In many cases the objects are so small +// that they were already loaded to memory when the object header was +// loaded from the packfile. Wrapping in FSObject would cause this buffered +// data to be thrown away and then re-read later, with the additional +// seeking causing reloads from disk. Objects smaller than this threshold +// are now always read into memory and stored in cache instead of being +// wrapped in FSObject. +const smallObjectThreshold = 16 * 1024 + // Packfile allows retrieving information from inside a packfile. type Packfile struct { idxfile.Index @@ -79,15 +89,7 @@ func (p *Packfile) GetByOffset(o int64) (plumbing.EncodedObject, error) { } } - if _, err := p.s.SeekFromStart(o); err != nil { - if err == io.EOF || isInvalid(err) { - return nil, plumbing.ErrObjectNotFound - } - - return nil, err - } - - return p.nextObject() + return p.objectAtOffset(o) } // GetSizeByOffset retrieves the size of the encoded object from the @@ -108,66 +110,16 @@ func (p *Packfile) GetSizeByOffset(o int64) (size int64, err error) { return h.Length, nil } -func (p *Packfile) nextObjectHeader() (*ObjectHeader, error) { - h, err := p.s.NextObjectHeader() +func (p *Packfile) objectHeaderAtOffset(offset int64) (*ObjectHeader, error) { + h, err := p.s.SeekObjectHeader(offset) p.s.pendingObject = nil return h, err } -func (p *Packfile) getObjectData( - h *ObjectHeader, -) (typ plumbing.ObjectType, size int64, err error) { - switch h.Type { - case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: - typ = h.Type - size = h.Length - case plumbing.REFDeltaObject, plumbing.OFSDeltaObject: - buf := bufPool.Get().(*bytes.Buffer) - buf.Reset() - defer bufPool.Put(buf) - - _, _, err = p.s.NextObject(buf) - if err != nil { - return - } - - delta := buf.Bytes() - _, delta = decodeLEB128(delta) // skip src size - sz, _ := decodeLEB128(delta) - size = int64(sz) - - var offset int64 - if h.Type == plumbing.REFDeltaObject { - offset, err = p.FindOffset(h.Reference) - if err != nil { - return - } - } else { - offset = h.OffsetReference - } - - if baseType, ok := p.offsetToType[offset]; ok { - typ = baseType - } else { - if _, err = p.s.SeekFromStart(offset); err != nil { - return - } - - h, err = p.nextObjectHeader() - if err != nil { - return - } - - typ, _, err = p.getObjectData(h) - if err != nil { - return - } - } - default: - err = ErrInvalidObject.AddDetails("type %q", h.Type) - } - - return +func (p *Packfile) nextObjectHeader() (*ObjectHeader, error) { + h, err := p.s.NextObjectHeader() + p.s.pendingObject = nil + return h, err } func (p *Packfile) getObjectSize(h *ObjectHeader) (int64, error) { @@ -210,11 +162,7 @@ func (p *Packfile) getObjectType(h *ObjectHeader) (typ plumbing.ObjectType, err if baseType, ok := p.offsetToType[offset]; ok { typ = baseType } else { - if _, err = p.s.SeekFromStart(offset); err != nil { - return - } - - h, err = p.nextObjectHeader() + h, err = p.objectHeaderAtOffset(offset) if err != nil { return } @@ -231,8 +179,8 @@ func (p *Packfile) getObjectType(h *ObjectHeader) (typ plumbing.ObjectType, err return } -func (p *Packfile) nextObject() (plumbing.EncodedObject, error) { - h, err := p.nextObjectHeader() +func (p *Packfile) objectAtOffset(offset int64) (plumbing.EncodedObject, error) { + h, err := p.objectHeaderAtOffset(offset) if err != nil { if err == io.EOF || isInvalid(err) { return nil, plumbing.ErrObjectNotFound @@ -246,6 +194,13 @@ func (p *Packfile) nextObject() (plumbing.EncodedObject, error) { return p.getNextObject(h) } + // If the object is not a delta and it's small enough then read it + // completely into memory now since it is already read from disk + // into buffer anyway. + if h.Length <= smallObjectThreshold && h.Type != plumbing.OFSDeltaObject && h.Type != plumbing.REFDeltaObject { + return p.getNextObject(h) + } + hash, err := p.FindHash(h.Offset) if err != nil { return nil, err @@ -289,11 +244,7 @@ func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) { } } - if _, err := p.s.SeekFromStart(offset); err != nil { - return nil, err - } - - h, err := p.nextObjectHeader() + h, err := p.objectHeaderAtOffset(offset) if err != nil { return nil, err } @@ -385,8 +336,6 @@ func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset if err != nil { return err } - - p.cachePut(base) } obj.SetType(base.Type()) diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go index 28582b5..71cbba9 100644 --- a/plumbing/format/packfile/parser.go +++ b/plumbing/format/packfile/parser.go @@ -38,15 +38,14 @@ type Observer interface { // Parser decodes a packfile and calls any observer associated to it. Is used // to generate indexes. type Parser struct { - storage storer.EncodedObjectStorer - scanner *Scanner - count uint32 - oi []*objectInfo - oiByHash map[plumbing.Hash]*objectInfo - oiByOffset map[int64]*objectInfo - hashOffset map[plumbing.Hash]int64 - pendingRefDeltas map[plumbing.Hash][]*objectInfo - checksum plumbing.Hash + storage storer.EncodedObjectStorer + scanner *Scanner + count uint32 + oi []*objectInfo + oiByHash map[plumbing.Hash]*objectInfo + oiByOffset map[int64]*objectInfo + hashOffset map[plumbing.Hash]int64 + checksum plumbing.Hash cache *cache.BufferLRU // delta content by offset, only used if source is not seekable @@ -78,13 +77,12 @@ func NewParserWithStorage( } return &Parser{ - storage: storage, - scanner: scanner, - ob: ob, - count: 0, - cache: cache.NewBufferLRUDefault(), - pendingRefDeltas: make(map[plumbing.Hash][]*objectInfo), - deltas: deltas, + storage: storage, + scanner: scanner, + ob: ob, + count: 0, + cache: cache.NewBufferLRUDefault(), + deltas: deltas, }, nil } @@ -150,10 +148,6 @@ func (p *Parser) Parse() (plumbing.Hash, error) { return plumbing.ZeroHash, err } - if len(p.pendingRefDeltas) > 0 { - return plumbing.ZeroHash, ErrReferenceDeltaNotFound - } - if err := p.onFooter(p.checksum); err != nil { return plumbing.ZeroHash, err } @@ -205,18 +199,21 @@ func (p *Parser) indexObjects() error { parent.Children = append(parent.Children, ota) case plumbing.REFDeltaObject: delta = true - parent, ok := p.oiByHash[oh.Reference] - if ok { - ota = newDeltaObject(oh.Offset, oh.Length, t, parent) - parent.Children = append(parent.Children, ota) - } else { - ota = newBaseObject(oh.Offset, oh.Length, t) - p.pendingRefDeltas[oh.Reference] = append( - p.pendingRefDeltas[oh.Reference], - ota, - ) + if !ok { + // can't find referenced object in this pack file + // this must be a "thin" pack. + parent = &objectInfo{ //Placeholder parent + SHA1: oh.Reference, + ExternalRef: true, // mark as an external reference that must be resolved + Type: plumbing.AnyObject, + DiskType: plumbing.AnyObject, + } + p.oiByHash[oh.Reference] = parent } + ota = newDeltaObject(oh.Offset, oh.Length, t, parent) + parent.Children = append(parent.Children, ota) + default: ota = newBaseObject(oh.Offset, oh.Length, t) } @@ -297,16 +294,20 @@ func (p *Parser) resolveDeltas() error { return nil } -func (p *Parser) get(o *objectInfo) ([]byte, error) { - b, ok := p.cache.Get(o.Offset) +func (p *Parser) get(o *objectInfo) (b []byte, err error) { + var ok bool + if !o.ExternalRef { // skip cache check for placeholder parents + b, ok = p.cache.Get(o.Offset) + } + // If it's not on the cache and is not a delta we can try to find it in the - // storage, if there's one. + // storage, if there's one. External refs must enter here. if !ok && p.storage != nil && !o.Type.IsDelta() { - var err error e, err := p.storage.EncodedObject(plumbing.AnyObject, o.SHA1) if err != nil { return nil, err } + o.Type = e.Type() r, err := e.Reader() if err != nil { @@ -323,6 +324,11 @@ func (p *Parser) get(o *objectInfo) ([]byte, error) { return b, nil } + if o.ExternalRef { + // we were not able to resolve a ref in a thin pack + return nil, ErrReferenceDeltaNotFound + } + var data []byte if o.DiskType.IsDelta() { base, err := p.get(o.Parent) @@ -335,7 +341,6 @@ func (p *Parser) get(o *objectInfo) ([]byte, error) { return nil, err } } else { - var err error data, err = p.readData(o) if err != nil { return nil, err @@ -367,14 +372,6 @@ func (p *Parser) resolveObject( return nil, err } - if pending, ok := p.pendingRefDeltas[o.SHA1]; ok { - for _, po := range pending { - po.Parent = o - o.Children = append(o.Children, po) - } - delete(p.pendingRefDeltas, o.SHA1) - } - if p.storage != nil { obj := new(plumbing.MemoryObject) obj.SetSize(o.Size()) @@ -401,11 +398,7 @@ func (p *Parser) readData(o *objectInfo) ([]byte, error) { return data, nil } - if _, err := p.scanner.SeekFromStart(o.Offset); err != nil { - return nil, err - } - - if _, err := p.scanner.NextObjectHeader(); err != nil { + if _, err := p.scanner.SeekObjectHeader(o.Offset); err != nil { return nil, err } @@ -447,10 +440,11 @@ func getSHA1(t plumbing.ObjectType, data []byte) (plumbing.Hash, error) { } type objectInfo struct { - Offset int64 - Length int64 - Type plumbing.ObjectType - DiskType plumbing.ObjectType + Offset int64 + Length int64 + Type plumbing.ObjectType + DiskType plumbing.ObjectType + ExternalRef bool // indicates this is an external reference in a thin pack file Crc32 uint32 diff --git a/plumbing/format/packfile/parser_test.go b/plumbing/format/packfile/parser_test.go index 012a140..6e7c84b 100644 --- a/plumbing/format/packfile/parser_test.go +++ b/plumbing/format/packfile/parser_test.go @@ -1,10 +1,13 @@ package packfile_test import ( + "io" "testing" + git "gopkg.in/src-d/go-git.v4" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" + "gopkg.in/src-d/go-git.v4/plumbing/storer" . "gopkg.in/check.v1" "gopkg.in/src-d/go-git-fixtures.v3" @@ -74,6 +77,53 @@ func (s *ParserSuite) TestParserHashes(c *C) { c.Assert(obs.objects, DeepEquals, objs) } +func (s *ParserSuite) TestThinPack(c *C) { + + // Initialize an empty repository + fs, err := git.PlainInit(c.MkDir(), true) + c.Assert(err, IsNil) + + // Try to parse a thin pack without having the required objects in the repo to + // see if the correct errors are returned + thinpack := fixtures.ByTag("thinpack").One() + scanner := packfile.NewScanner(thinpack.Packfile()) + parser, err := packfile.NewParserWithStorage(scanner, fs.Storer) // ParserWithStorage writes to the storer all parsed objects! + c.Assert(err, IsNil) + + _, err = parser.Parse() + c.Assert(err, Equals, plumbing.ErrObjectNotFound) + + // start over with a clean repo + fs, err = git.PlainInit(c.MkDir(), true) + c.Assert(err, IsNil) + + // Now unpack a base packfile into our empty repo: + f := fixtures.ByURL("https://github.com/spinnaker/spinnaker.git").One() + w, err := fs.Storer.(storer.PackfileWriter).PackfileWriter() + c.Assert(err, IsNil) + _, err = io.Copy(w, f.Packfile()) + c.Assert(err, IsNil) + w.Close() + + // Check that the test object that will come with our thin pack is *not* in the repo + _, err = fs.Storer.EncodedObject(plumbing.CommitObject, thinpack.Head) + c.Assert(err, Equals, plumbing.ErrObjectNotFound) + + // Now unpack the thin pack: + scanner = packfile.NewScanner(thinpack.Packfile()) + parser, err = packfile.NewParserWithStorage(scanner, fs.Storer) // ParserWithStorage writes to the storer all parsed objects! + c.Assert(err, IsNil) + + h, err := parser.Parse() + c.Assert(err, IsNil) + c.Assert(h, Equals, plumbing.NewHash("1288734cbe0b95892e663221d94b95de1f5d7be8")) + + // Check that our test object is now accessible + _, err = fs.Storer.EncodedObject(plumbing.CommitObject, thinpack.Head) + c.Assert(err, IsNil) + +} + type observerObject struct { hash string otype plumbing.ObjectType diff --git a/plumbing/format/packfile/scanner.go b/plumbing/format/packfile/scanner.go index 6fc183b..614b0d1 100644 --- a/plumbing/format/packfile/scanner.go +++ b/plumbing/format/packfile/scanner.go @@ -138,14 +138,52 @@ func (s *Scanner) readCount() (uint32, error) { return binary.ReadUint32(s.r) } +// SeekObjectHeader seeks to specified offset and returns the ObjectHeader +// for the next object in the reader +func (s *Scanner) SeekObjectHeader(offset int64) (*ObjectHeader, error) { + // if seeking we assume that you are not interested in the header + if s.version == 0 { + s.version = VersionSupported + } + + if _, err := s.r.Seek(offset, io.SeekStart); err != nil { + return nil, err + } + + h, err := s.nextObjectHeader() + if err != nil { + return nil, err + } + + h.Offset = offset + return h, nil +} + // NextObjectHeader returns the ObjectHeader for the next object in the reader func (s *Scanner) NextObjectHeader() (*ObjectHeader, error) { - defer s.Flush() - if err := s.doPending(); err != nil { return nil, err } + offset, err := s.r.Seek(0, io.SeekCurrent) + if err != nil { + return nil, err + } + + h, err := s.nextObjectHeader() + if err != nil { + return nil, err + } + + h.Offset = offset + return h, nil +} + +// nextObjectHeader returns the ObjectHeader for the next object in the reader +// without the Offset field +func (s *Scanner) nextObjectHeader() (*ObjectHeader, error) { + defer s.Flush() + s.crc.Reset() h := &ObjectHeader{} @@ -308,7 +346,7 @@ var byteSlicePool = sync.Pool{ // SeekFromStart sets a new offset from start, returns the old position before // the change. func (s *Scanner) SeekFromStart(offset int64) (previous int64, err error) { - // if seeking we assume that you are not interested on the header + // if seeking we assume that you are not interested in the header if s.version == 0 { s.version = VersionSupported } @@ -385,7 +423,7 @@ type bufferedSeeker struct { } func (r *bufferedSeeker) Seek(offset int64, whence int) (int64, error) { - if whence == io.SeekCurrent { + if whence == io.SeekCurrent && offset == 0 { current, err := r.r.Seek(offset, whence) if err != nil { return current, err diff --git a/plumbing/format/packfile/scanner_test.go b/plumbing/format/packfile/scanner_test.go index 644d0eb..091b457 100644 --- a/plumbing/format/packfile/scanner_test.go +++ b/plumbing/format/packfile/scanner_test.go @@ -118,6 +118,23 @@ func (s *ScannerSuite) TestNextObjectHeaderWithOutReadObjectNonSeekable(c *C) { c.Assert(n, Equals, f.PackfileHash) } +func (s *ScannerSuite) TestSeekObjectHeader(c *C) { + r := fixtures.Basic().One().Packfile() + p := NewScanner(r) + + h, err := p.SeekObjectHeader(expectedHeadersOFS[4].Offset) + c.Assert(err, IsNil) + c.Assert(h, DeepEquals, &expectedHeadersOFS[4]) +} + +func (s *ScannerSuite) TestSeekObjectHeaderNonSeekable(c *C) { + r := io.MultiReader(fixtures.Basic().One().Packfile()) + p := NewScanner(r) + + _, err := p.SeekObjectHeader(expectedHeadersOFS[4].Offset) + c.Assert(err, Equals, ErrSeekNotSupported) +} + var expectedHeadersOFS = []ObjectHeader{ {Type: plumbing.CommitObject, Offset: 12, Length: 254}, {Type: plumbing.OFSDeltaObject, Offset: 186, Length: 93, OffsetReference: 12}, |