aboutsummaryrefslogtreecommitdiffstats
path: root/formats/packfile/decoder.go
diff options
context:
space:
mode:
Diffstat (limited to 'formats/packfile/decoder.go')
-rw-r--r--formats/packfile/decoder.go116
1 files changed, 116 insertions, 0 deletions
diff --git a/formats/packfile/decoder.go b/formats/packfile/decoder.go
new file mode 100644
index 0000000..e8c5c6a
--- /dev/null
+++ b/formats/packfile/decoder.go
@@ -0,0 +1,116 @@
+package packfile
+
+import (
+ "io"
+
+ "gopkg.in/src-d/go-git.v3/core"
+)
+
+// Format specifies if the packfile uses ref-deltas or ofs-deltas.
+type Format int
+
+// Possible values of the Format type.
+const (
+ UnknownFormat Format = iota
+ OFSDeltaFormat
+ REFDeltaFormat
+)
+
+var (
+ // ErrMaxObjectsLimitReached is returned by Decode when the number
+ // of objects in the packfile is higher than
+ // Decoder.MaxObjectsLimit.
+ ErrMaxObjectsLimitReached = NewError("max. objects limit reached")
+
+ // ErrInvalidObject is returned by Decode when an invalid object is
+ // found in the packfile.
+ ErrInvalidObject = NewError("invalid git object")
+
+ // ErrPackEntryNotFound is returned by Decode when a reference in
+ // the packfile references and unknown object.
+ ErrPackEntryNotFound = NewError("can't find a pack entry")
+
+ // ErrZLib is returned by Decode when there was an error unzipping
+ // the packfile contents.
+ ErrZLib = NewError("zlib reading error")
+)
+
+const (
+ // DefaultMaxObjectsLimit is the maximum amount of objects the
+ // decoder will decode before returning ErrMaxObjectsLimitReached.
+ DefaultMaxObjectsLimit = 1 << 20
+)
+
+// Decoder reads and decodes packfiles from an input stream.
+type Decoder struct {
+ // MaxObjectsLimit is the limit of objects to be load in the packfile, if
+ // a packfile excess this number an error is throw, the default value
+ // is defined by DefaultMaxObjectsLimit, usually the default limit is more
+ // than enough to work with any repository, with higher values and huge
+ // repositories you can run out of memory.
+ MaxObjectsLimit uint32
+
+ p *Parser
+ s core.ObjectStorage
+}
+
+// NewDecoder returns a new Decoder that reads from r.
+func NewDecoder(r ReadRecaller) *Decoder {
+ return &Decoder{
+ MaxObjectsLimit: DefaultMaxObjectsLimit,
+
+ p: NewParser(r),
+ }
+}
+
+// Decode reads a packfile and stores it in the value pointed to by s.
+func (d *Decoder) Decode(s core.ObjectStorage) error {
+ d.s = s
+
+ count, err := d.p.ReadHeader()
+ if err != nil {
+ return err
+ }
+
+ if count > d.MaxObjectsLimit {
+ return ErrMaxObjectsLimitReached.AddDetails("%d", count)
+ }
+
+ err = d.readObjects(count)
+
+ return err
+}
+
+func (d *Decoder) readObjects(count uint32) error {
+ // This code has 50-80 µs of overhead per object not counting zlib inflation.
+ // Together with zlib inflation, it's 400-410 µs for small objects.
+ // That's 1 sec for ~2450 objects, ~4.20 MB, or ~250 ms per MB,
+ // of which 12-20 % is _not_ zlib inflation (ie. is our code).
+ for i := 0; i < int(count); i++ {
+ start, err := d.p.Offset()
+ if err != nil {
+ return err
+ }
+
+ obj, err := d.p.ReadObject()
+ if err != nil {
+ if err == io.EOF {
+ break
+ }
+
+ return err
+ }
+
+ err = d.p.Remember(start, obj)
+ if err != nil {
+ return err
+ }
+
+ _, err = d.s.Set(obj)
+ if err == io.EOF {
+ break
+ }
+ }
+
+ return nil
+}