aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoshua Sjoding <joshua.sjoding@scjalliance.com>2016-02-27 00:30:35 -0800
committerJoshua Sjoding <joshua.sjoding@scjalliance.com>2016-02-27 00:30:35 -0800
commite3cb5921c8f3b730a8bbd21877176197c20b8fc7 (patch)
treeb56c8d3d3ec2aa5a90d175da7010fe11ee2cdc3e
parentff809118743100300c38d0c626ffe8c840fb1275 (diff)
downloadgo-git-e3cb5921c8f3b730a8bbd21877176197c20b8fc7.tar.gz
Added objfile format used for loose git objects
-rw-r--r--formats/objfile/common.go73
-rw-r--r--formats/objfile/common_test.go69
-rw-r--r--formats/objfile/reader.go123
-rw-r--r--formats/objfile/reader_test.go68
-rw-r--r--formats/objfile/writer.go133
-rw-r--r--formats/objfile/writer_test.go53
6 files changed, 519 insertions, 0 deletions
diff --git a/formats/objfile/common.go b/formats/objfile/common.go
new file mode 100644
index 0000000..7389086
--- /dev/null
+++ b/formats/objfile/common.go
@@ -0,0 +1,73 @@
+package objfile
+
+import (
+ "errors"
+ "io"
+ "strconv"
+
+ "gopkg.in/src-d/go-git.v3/core"
+)
+
+var (
+ // ErrClosed is returned when the objfile Reader or Writer is already closed.
+ ErrClosed = errors.New("objfile: already closed")
+ // ErrHeader is returned when the objfile has an invalid header.
+ ErrHeader = errors.New("objfile: invalid header")
+)
+
+type header struct {
+ t core.ObjectType
+ size int64
+}
+
+func (h *header) Read(r io.Reader) error {
+ t, err := h.readSlice(r, ' ')
+ if err != nil {
+ return err
+ }
+
+ h.t, err = core.ParseObjectType(string(t))
+ if err != nil {
+ return err
+ }
+
+ size, err := h.readSlice(r, 0)
+ if err != nil {
+ return err
+ }
+
+ h.size, err = strconv.ParseInt(string(size), 10, 64)
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (h *header) Write(w io.Writer) error {
+ b := h.t.Bytes()
+ b = append(b, ' ')
+ b = append(b, []byte(strconv.FormatInt(h.size, 10))...)
+ b = append(b, 0)
+ _, err := w.Write(b)
+ return err
+}
+
+// readSlice reads one byte at a time from r until it encounters delim or an
+// error.
+func (h *header) readSlice(r io.Reader, delim byte) ([]byte, error) {
+ var buf [1]byte
+ value := make([]byte, 0, 16)
+ for {
+ if n, err := r.Read(buf[:]); err != nil && (err != io.EOF || n == 0) {
+ if err == io.EOF {
+ return nil, ErrHeader
+ }
+ return nil, err
+ }
+ if buf[0] == delim {
+ return value, nil
+ }
+ value = append(value, buf[0])
+ }
+}
diff --git a/formats/objfile/common_test.go b/formats/objfile/common_test.go
new file mode 100644
index 0000000..4727685
--- /dev/null
+++ b/formats/objfile/common_test.go
@@ -0,0 +1,69 @@
+package objfile
+
+import (
+ "encoding/base64"
+ "testing"
+
+ . "gopkg.in/check.v1"
+ "gopkg.in/src-d/go-git.v3/core"
+)
+
+type objfileFixture struct {
+ hash string // hash of data
+ t core.ObjectType // object type
+ content string // base64-encoded content
+ data string // base64-encoded objfile data
+}
+
+var objfileFixtures = []objfileFixture{
+ {
+ "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391",
+ core.BlobObject,
+ base64.StdEncoding.EncodeToString([]byte("")),
+ "eAFLyslPUjBgAAAJsAHw",
+ },
+ {
+ "a8a940627d132695a9769df883f85992f0ff4a43",
+ core.BlobObject,
+ base64.StdEncoding.EncodeToString([]byte("this is a test")),
+ "eAFLyslPUjA0YSjJyCxWAKJEhZLU4hIAUDYHOg==",
+ },
+ {
+ "4dc2174801ac4a3d36886210fd086fbe134cf7b2",
+ core.BlobObject,
+ base64.StdEncoding.EncodeToString([]byte("this\nis\n\n\na\nmultiline\n\ntest.\n")),
+ "eAFLyslPUjCyZCjJyCzmAiIurkSu3NKcksyczLxULq6S1OISPS4A1I8LMQ==",
+ },
+ {
+ "13e6f47dd57798bfdc728d91f5c6d7f40c5bb5fc",
+ core.BlobObject,
+ base64.StdEncoding.EncodeToString([]byte("this tests\r\nCRLF\r\nencoded files.\r\n")),
+ "eAFLyslPUjA2YSjJyCxWKEktLinm5XIO8nHj5UrNS85PSU1RSMvMSS3W4+UCABp3DNE=",
+ },
+ {
+ "72a7bc4667ab068e954172437b993d9fbaa137cb",
+ core.BlobObject,
+ base64.StdEncoding.EncodeToString([]byte("test@example.com")),
+ "eAFLyslPUjA0YyhJLS5xSK1IzC3ISdVLzs8FAGVtCIA=",
+ },
+ {
+ "bb2b40e85ec0455d1de72daff71583f0dd72a33f",
+ core.BlobObject,
+ base64.StdEncoding.EncodeToString([]byte("package main\r\n\r\nimport (\r\n\t\"fmt\"\r\n\t\"io\"\r\n\t\"os\"\r\n\r\n\t\"gopkg.in/src-d/go-git.v3\"\r\n)\r\n\r\nfunc main() {\r\n\tfmt.Printf(\"Retrieving %q ...\\n\", os.Args[2])\r\n\tr, err := git.NewRepository(os.Args[2], nil)\r\n\tif err != nil {\r\n\t\tpanic(err)\r\n\t}\r\n\r\n\tif err := r.Pull(\"origin\", \"refs/heads/master\"); err != nil {\r\n\t\tpanic(err)\r\n\t}\r\n\r\n\tdumpCommits(r)\r\n}\r\n\r\nfunc dumpCommits(r *git.Repository) {\r\n\titer := r.Commits()\r\n\tdefer iter.Close()\r\n\r\n\tfor {\r\n\t\tcommit, err := iter.Next()\r\n\t\tif err != nil {\r\n\t\t\tif err == io.EOF {\r\n\t\t\t\tbreak\r\n\t\t\t}\r\n\r\n\t\t\tpanic(err)\r\n\t\t}\r\n\r\n\t\tfmt.Println(commit)\r\n\t}\r\n}\r\n")),
+ "eAGNUU1LAzEU9JpC/0NcEFJps2ARQdmDFD3W0qt6SHez8dHdZH1JqyL+d/Oy/aDgQVh47LzJTGayatyKX99MzzpVrpXRvFVgh4PhANrOYeBiOGBZ3YaMJrg0nI+D/o3r1kaCzT2Wkyo3bmIgyO00rkfEqDe2TIJixL/jgagjFwg21CJb6oCgt2ANv3jnUsoXm4258/IejX++eo0CDMdcI/LbgpPuXH8sdec8BIdf4sgccwsN0aFO9POCgGTIOmWhFFGE9j/p1jtWFEW52DSNyByCAXLPUNc+f9Oq8nmrfNCYje7+o1lt2m7m2haCF2SVnFL6kw2/pBzHEH0rEH0oI8q9BF220nWEaSdnjfNaRDDCtcM+WZnsDgUl4lx/BuKxv6rYY0XBwcmHp8deh7EVarWmQ7uC2Glre/TweI0VvTk5xaTx+wWX66Gs",
+ },
+ {
+ "e94db0f9ffca44dc7bade6a3591f544183395a7c",
+ core.TreeObject,
+ "MTAwNjQ0IFRlc3QgMS50eHQAqKlAYn0TJpWpdp34g/hZkvD/SkMxMDA2NDQgVGVzdCAyLnR4dABNwhdIAaxKPTaIYhD9CG++E0z3sjEwMDY0NCBUZXN0IDMudHh0ABPm9H3Vd5i/3HKNkfXG1/QMW7X8MTAwNjQ0IFRlc3QgNC50eHQAcqe8RmerBo6VQXJDe5k9n7qhN8sxMDA2NDQgVGVzdCA1LnR4dAC7K0DoXsBFXR3nLa/3FYPw3XKjPw==",
+ "eAErKUpNVTC0NGAwNDAwMzFRCEktLlEw1CupKGFYsdIhqVZYberKsrk/mn9ETvrw38sZWZURWJXvIXEPxjVetmYdSQJ/OfL3Cft834SsyhisSvjZl9qr5TP23ynqnfj12PUvPNFb/yCrMgGrKlq+xy19NVvfVMci5+qZtvN3LTQ/jazKFKxqt7bDi7gDrrGyz3XXfxdt/nC3aLE9AA2STmk=",
+ },
+ {
+ "9d7f8a56eaf92469dee8a856e716a03387ddb076",
+ core.CommitObject,
+ "dHJlZSBlOTRkYjBmOWZmY2E0NGRjN2JhZGU2YTM1OTFmNTQ0MTgzMzk1YTdjCmF1dGhvciBKb3NodWEgU2pvZGluZyA8am9zaHVhLnNqb2RpbmdAc2NqYWxsaWFuY2UuY29tPiAxNDU2NTMxNTgzIC0wODAwCmNvbW1pdHRlciBKb3NodWEgU2pvZGluZyA8am9zaHVhLnNqb2RpbmdAc2NqYWxsaWFuY2UuY29tPiAxNDU2NTMxNTgzIC0wODAwCgpUZXN0IENvbW1pdAo=",
+ "eAGtjksOgjAUAF33FO8CktZ+aBNjTNy51Qs8Xl8FAjSh5f4SvILLmcVkKM/zUOEi3amuzMDBxE6mkBKhMZHaDiM71DaoZI1RXutgsSWBW+3zCs9c+g3hNeY4LB+4jgc35cf3QiNO04ALcUN5voEy1lmtrNdwll5Ksdt9oPIfUuLNpcLjCIov3ApFmQ==",
+ },
+}
+
+func Test(t *testing.T) { TestingT(t) }
diff --git a/formats/objfile/reader.go b/formats/objfile/reader.go
new file mode 100644
index 0000000..b3c2e5c
--- /dev/null
+++ b/formats/objfile/reader.go
@@ -0,0 +1,123 @@
+package objfile
+
+import (
+ "errors"
+ "io"
+
+ "gopkg.in/src-d/go-git.v3/core"
+
+ "github.com/klauspost/compress/zlib"
+)
+
+var (
+ // ErrZLib is returned when the objfile contains invalid zlib data.
+ ErrZLib = errors.New("objfile: invalid zlib data")
+)
+
+// Reader reads and decodes compressed objfile data from a provided io.Reader.
+//
+// Reader implements io.ReadCloser. Close should be called when finished with
+// the Reader. Close will not close the underlying io.Reader.
+type Reader struct {
+ header header
+ hash core.Hash // final computed hash stored after Close
+
+ r io.Reader // provided reader wrapped in decompressor and tee
+ decompressor io.ReadCloser // provided reader wrapped in decompressor, retained for calling Close
+ h core.Hasher // streaming SHA1 hash of decoded data
+}
+
+// NewReader returns a new Reader reading from r.
+//
+// Calling NewReader causes it to immediately read in header data from r
+// containing size and type information. Any errors encountered in that
+// process will be returned in err.
+//
+// The returned Reader implements io.ReadCloser. Close should be called when
+// finished with the Reader. Close will not close the underlying io.Reader.
+func NewReader(r io.Reader) (*Reader, error) {
+ reader := &Reader{}
+ return reader, reader.init(r)
+}
+
+// init prepares the zlib decompressor for the given input as well as a hasher
+// for computing its hash.
+//
+// init immediately reads header data from the input and stores it. This leaves
+// the Reader in a state that is ready to read content.
+func (r *Reader) init(input io.Reader) (err error) {
+ r.decompressor, err = zlib.NewReader(input)
+ if err != nil {
+ // TODO: Make this error match the ZLibErr in formats/packfile/reader.go?
+ return ErrZLib
+ }
+
+ err = r.header.Read(r.decompressor)
+ if err != nil {
+ r.decompressor.Close()
+ return
+ }
+
+ r.h = core.NewHasher(r.header.t, r.header.size)
+ r.r = io.TeeReader(r.decompressor, r.h) // All reads from the decompressor also write to the hash
+
+ return
+}
+
+// Read reads len(p) bytes into p from the object data stream. It returns
+// the number of bytes read (0 <= n <= len(p)) and any error encountered. Even
+// if Read returns n < len(p), it may use all of p as scratch space during the
+// call.
+//
+// If Read encounters the end of the data stream it will return err == io.EOF,
+// either in the current call if n > 0 or in a subsequent call.
+func (r *Reader) Read(p []byte) (n int, err error) {
+ if r.r == nil {
+ return 0, ErrClosed
+ }
+
+ return r.r.Read(p)
+}
+
+// Type returns the type of the object.
+func (r *Reader) Type() core.ObjectType {
+ return r.header.t
+}
+
+// Size returns the uncompressed size of the object in bytes.
+func (r *Reader) Size() int64 {
+ return r.header.size
+}
+
+// Hash returns the hash of the object data stream that has been read so far.
+// It can be called before or after Close.
+func (r *Reader) Hash() core.Hash {
+ if r.r != nil {
+ return r.h.Sum() // Not yet closed, return hash of data read so far
+ }
+ return r.hash
+}
+
+// Close releases any resources consumed by the Reader.
+//
+// Calling Close does not close the wrapped io.Reader originally passed to
+// NewReader.
+func (r *Reader) Close() (err error) {
+ if r.r == nil {
+ // TODO: Consider returning ErrClosed here?
+ return nil // Already closed
+ }
+
+ // Release the decompressor's resources
+ err = r.decompressor.Close()
+
+ // Save the hash because we're about to throw away the hasher
+ r.hash = r.h.Sum()
+
+ // Release references
+ r.r = nil // Indicates closed state
+ r.decompressor = nil
+ r.h.Hash = nil
+
+ return
+}
diff --git a/formats/objfile/reader_test.go b/formats/objfile/reader_test.go
new file mode 100644
index 0000000..871eefe
--- /dev/null
+++ b/formats/objfile/reader_test.go
@@ -0,0 +1,68 @@
+package objfile
+
+import (
+ "bytes"
+ "encoding/base64"
+ "fmt"
+ "io"
+ "io/ioutil"
+
+ . "gopkg.in/check.v1"
+ "gopkg.in/src-d/go-git.v3/core"
+
+ "github.com/klauspost/compress/zlib"
+)
+
+type SuiteReader struct{}
+
+var _ = Suite(&SuiteReader{})
+
+func (s *SuiteReader) TestReadObjfile(c *C) {
+ for k, fixture := range objfileFixtures {
+ comment := fmt.Sprintf("test %d: ", k)
+ hash := core.NewHash(fixture.hash)
+ content, _ := base64.StdEncoding.DecodeString(fixture.content)
+ data, _ := base64.StdEncoding.DecodeString(fixture.data)
+
+ testReader(c, bytes.NewReader(data), hash, fixture.t, content, comment)
+ }
+}
+
+func testReader(c *C, source io.Reader, hash core.Hash, typ core.ObjectType, content []byte, comment string) {
+ r, err := NewReader(source)
+ c.Assert(err, IsNil)
+ c.Assert(r.Type(), Equals, typ)
+ rc, err := ioutil.ReadAll(r)
+ c.Assert(err, IsNil)
+ c.Assert(rc, DeepEquals, content, Commentf("%scontent=%s, expected=%s", base64.StdEncoding.EncodeToString(rc), base64.StdEncoding.EncodeToString(content)))
+ c.Assert(r.Hash(), Equals, hash) // Test Hash() before close
+ c.Assert(r.Close(), IsNil)
+ c.Assert(r.Hash(), Equals, hash) // Test Hash() after close
+}
+
+func (s *SuiteReader) TestReadEmptyObjfile(c *C) {
+ source := bytes.NewReader([]byte{})
+ _, err := NewReader(source)
+ c.Assert(err, Equals, ErrZLib)
+}
+
+func (s *SuiteReader) TestReadEmptyContent(c *C) {
+ b := new(bytes.Buffer)
+ w := zlib.NewWriter(b)
+ c.Assert(w.Close(), IsNil)
+ _, err := NewReader(b)
+ c.Assert(err, Equals, ErrHeader)
+}
+
+func (s *SuiteReader) TestReadGarbage(c *C) {
+ source := bytes.NewReader([]byte("!@#$RO!@NROSADfinq@o#irn@oirfn"))
+ _, err := NewReader(source)
+ c.Assert(err, Equals, ErrZLib)
+}
+
+func (s *SuiteReader) TestReadCorruptZLib(c *C) {
+ data, _ := base64.StdEncoding.DecodeString("eAFLysaalPUjBgAAAJsAHw")
+ source := bytes.NewReader(data)
+ _, err := NewReader(source)
+ c.Assert(err, NotNil)
+}
diff --git a/formats/objfile/writer.go b/formats/objfile/writer.go
new file mode 100644
index 0000000..d80256c
--- /dev/null
+++ b/formats/objfile/writer.go
@@ -0,0 +1,133 @@
+package objfile
+
+import (
+ "errors"
+ "io"
+
+ "gopkg.in/src-d/go-git.v3/core"
+
+ "github.com/klauspost/compress/zlib"
+)
+
+var (
+ // ErrOverflow is returned when an attempt is made to write more data than
+ // was declared in NewWriter.
+ ErrOverflow = errors.New("objfile: declared data length exceeded (overflow)")
+)
+
+// Writer writes and encodes data in compressed objfile format to a provided
+// io.Writer.
+//
+// Writer implements io.WriteCloser. Close should be called when finished with
+// the Writer. Close will not close the underlying io.Writer.
+type Writer struct {
+ header header
+ hash core.Hash // final computed hash stored after Close
+
+ w io.Writer // provided writer wrapped in compressor and tee
+ compressor io.WriteCloser // provided writer wrapped in compressor, retained for calling Close
+ h core.Hasher // streaming SHA1 hash of encoded data
+ written int64 // Number of bytes written
+}
+
+// NewWriter returns a new Writer writing to w.
+//
+// The provided t is the type of object being written. The provided size is the
+// number of uncompressed bytes being written.
+//
+// Calling NewWriter causes it to immediately write header data containing
+// size and type information. Any errors encountered in that process will be
+// returned in err.
+//
+// The returned Writer implements io.WriteCloser. Close should be called when
+// finished with the Writer. Close will not close the underlying io.Writer.
+func NewWriter(w io.Writer, t core.ObjectType, size int64) (*Writer, error) {
+ writer := &Writer{
+ header: header{t: t, size: size},
+ }
+ return writer, writer.init(w)
+}
+
+// init prepares the zlib compressor for the given output as well as a hasher
+// for computing its hash.
+//
+// init immediately writes header data to the output. This leaves the writer in
+// a state that is ready to write content.
+func (w *Writer) init(output io.Writer) (err error) {
+ w.compressor = zlib.NewWriter(output)
+
+ err = w.header.Write(w.compressor)
+ if err != nil {
+ defer w.compressor.Close()
+ return
+ }
+
+ w.h = core.NewHasher(w.header.t, w.header.size)
+ w.w = io.MultiWriter(w.compressor, w.h) // All writes to the compressor also write to the hash
+
+ return
+}
+
+// Write reads len(p) from p to the object data stream. It returns the number of
+// bytes written from p (0 <= n <= len(p)) and any error encountered that caused
+// the write to stop early. The slice data contained in p will not be modified.
+//
+// If writing len(p) bytes would exceed the size provided in NewWriter,
+// ErrOverflow is returned without writing any data.
+func (w *Writer) Write(p []byte) (n int, err error) {
+ if w.w == nil {
+ return 0, ErrClosed
+ }
+
+ if w.written+int64(len(p)) > w.header.size {
+ return 0, ErrOverflow
+ }
+
+ n, err = w.w.Write(p)
+ w.written += int64(n)
+
+ return
+}
+
+// Type returns the type of the object.
+func (w *Writer) Type() core.ObjectType {
+ return w.header.t
+}
+
+// Size returns the uncompressed size of the object in bytes.
+func (w *Writer) Size() int64 {
+ return w.header.size
+}
+
+// Hash returns the hash of the object data stream that has been written so far.
+// It can be called before or after Close.
+func (w *Writer) Hash() core.Hash {
+ if w.w != nil {
+ return w.h.Sum() // Not yet closed, return hash of data written so far
+ }
+ return w.hash
+}
+
+// Close releases any resources consumed by the Writer.
+//
+// Calling Close does not close the wrapped io.Writer originally passed to
+// NewWriter.
+func (w *Writer) Close() (err error) {
+ if w.w == nil {
+ // TODO: Consider returning ErrClosed here?
+ return nil // Already closed
+ }
+
+ // Release the compressor's resources
+ err = w.compressor.Close()
+
+ // Save the hash because we're about to throw away the hasher
+ w.hash = w.h.Sum()
+
+ // Release references
+ w.w = nil // Indicates closed state
+ w.compressor = nil
+ w.h.Hash = nil
+
+ return
+}
diff --git a/formats/objfile/writer_test.go b/formats/objfile/writer_test.go
new file mode 100644
index 0000000..03b8370
--- /dev/null
+++ b/formats/objfile/writer_test.go
@@ -0,0 +1,53 @@
+package objfile
+
+import (
+ "bytes"
+ "encoding/base64"
+ "fmt"
+ "io"
+
+ . "gopkg.in/check.v1"
+ "gopkg.in/src-d/go-git.v3/core"
+)
+
+type SuiteWriter struct{}
+
+var _ = Suite(&SuiteWriter{})
+
+func (s *SuiteWriter) TestWriteObjfile(c *C) {
+ for k, fixture := range objfileFixtures {
+ comment := fmt.Sprintf("test %d: ", k)
+ hash := core.NewHash(fixture.hash)
+ content, _ := base64.StdEncoding.DecodeString(fixture.content)
+ buffer := new(bytes.Buffer)
+
+ // Write the data out to the buffer
+ testWriter(c, buffer, hash, fixture.t, content, comment)
+
+ // Read the data back in from the buffer to be sure it matches
+ testReader(c, buffer, hash, fixture.t, content, comment)
+ }
+}
+
+func testWriter(c *C, dest io.Writer, hash core.Hash, typ core.ObjectType, content []byte, comment string) {
+ length := int64(len(content))
+ w, err := NewWriter(dest, typ, length)
+ c.Assert(err, IsNil)
+ c.Assert(w.Type(), Equals, typ)
+ c.Assert(w.Size(), Equals, length)
+ written, err := io.Copy(w, bytes.NewReader(content))
+ c.Assert(err, IsNil)
+ c.Assert(written, Equals, length)
+ c.Assert(w.Hash(), Equals, hash) // Test Hash() before close
+ c.Assert(w.Close(), IsNil)
+ c.Assert(w.Hash(), Equals, hash) // Test Hash() after close
+}
+
+func (s *SuiteWriter) TestWriteOverflow(c *C) {
+ w, err := NewWriter(new(bytes.Buffer), core.BlobObject, 8)
+ c.Assert(err, IsNil)
+ _, err = w.Write([]byte("1234"))
+ c.Assert(err, IsNil)
+ _, err = w.Write([]byte("56789"))
+ c.Assert(err, Equals, ErrOverflow)
+}