diff options
author | Máximo Cuadros <mcuadros@gmail.com> | 2017-02-07 15:23:57 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-02-07 15:23:57 +0100 |
commit | 431af32445562b389397f3ee7af90bf61455fff1 (patch) | |
tree | 11ad4dfafd66e7f499307afb99a267d5d8763140 /plumbing/format | |
parent | d0cf20797464ab12b41ccb5c603f67884a6e8e17 (diff) | |
download | go-git-431af32445562b389397f3ee7af90bf61455fff1.tar.gz |
package plumbing documentation improvements (#248)
Diffstat (limited to 'plumbing/format')
-rw-r--r-- | plumbing/format/config/common.go | 40 | ||||
-rw-r--r-- | plumbing/format/config/doc.go | 319 | ||||
-rw-r--r-- | plumbing/format/config/option.go | 1 | ||||
-rw-r--r-- | plumbing/format/idxfile/decoder.go | 7 | ||||
-rw-r--r-- | plumbing/format/idxfile/doc.go | 258 | ||||
-rw-r--r-- | plumbing/format/idxfile/encoder.go | 7 | ||||
-rw-r--r-- | plumbing/format/idxfile/idxfile.go | 6 | ||||
-rw-r--r-- | plumbing/format/index/doc.go | 601 | ||||
-rw-r--r-- | plumbing/format/index/index.go | 6 | ||||
-rw-r--r-- | plumbing/format/objfile/doc.go | 2 | ||||
-rw-r--r-- | plumbing/format/packfile/doc.go | 205 | ||||
-rw-r--r-- | plumbing/format/pktline/encoder.go | 3 |
12 files changed, 625 insertions, 830 deletions
diff --git a/plumbing/format/config/common.go b/plumbing/format/config/common.go index cc1e81a..8f98ad1 100644 --- a/plumbing/format/config/common.go +++ b/plumbing/format/config/common.go @@ -5,26 +5,32 @@ func New() *Config { return &Config{} } +// Config contains all the sections, comments and includes from a config file. type Config struct { Comment *Comment Sections Sections Includes Includes } +// Includes is a list of Includes in a config file. type Includes []*Include -// A reference to an included configuration. +// Include is a reference to an included config file. type Include struct { Path string Config *Config } +// Comment string without the prefix '#' or ';'. type Comment string const ( + // NoSubsection token is passed to Config.Section and Config.SetSection to + // represent the absence of a section. NoSubsection = "" ) +// Section returns a existing section with the given name or creates a new one. func (c *Config) Section(name string) *Section { for i := len(c.Sections) - 1; i >= 0; i-- { s := c.Sections[i] @@ -38,36 +44,31 @@ func (c *Config) Section(name string) *Section { return s } -// AddOption is a convenience method to add an option to a given -// section and subsection. -// -// Use the NoSubsection constant for the subsection argument -// if no subsection is wanted. -func (s *Config) AddOption(section string, subsection string, key string, value string) *Config { +// AddOption adds an option to a given section and subsection. Use the +// NoSubsection constant for the subsection argument if no subsection is wanted. +func (c *Config) AddOption(section string, subsection string, key string, value string) *Config { if subsection == "" { - s.Section(section).AddOption(key, value) + c.Section(section).AddOption(key, value) } else { - s.Section(section).Subsection(subsection).AddOption(key, value) + c.Section(section).Subsection(subsection).AddOption(key, value) } - return s + return c } -// SetOption is a convenience method to set an option to a given -// section and subsection. -// -// Use the NoSubsection constant for the subsection argument -// if no subsection is wanted. -func (s *Config) SetOption(section string, subsection string, key string, value string) *Config { +// SetOption sets an option to a given section and subsection. Use the +// NoSubsection constant for the subsection argument if no subsection is wanted. +func (c *Config) SetOption(section string, subsection string, key string, value string) *Config { if subsection == "" { - s.Section(section).SetOption(key, value) + c.Section(section).SetOption(key, value) } else { - s.Section(section).Subsection(subsection).SetOption(key, value) + c.Section(section).Subsection(subsection).SetOption(key, value) } - return s + return c } +// RemoveSection removes a section from a config file. func (c *Config) RemoveSection(name string) *Config { result := Sections{} for _, s := range c.Sections { @@ -80,6 +81,7 @@ func (c *Config) RemoveSection(name string) *Config { return c } +// RemoveSubsection remove s a subsection from a config file. func (c *Config) RemoveSubsection(section string, subsection string) *Config { for _, s := range c.Sections { if s.IsName(section) { diff --git a/plumbing/format/config/doc.go b/plumbing/format/config/doc.go index dd77fbc..3986c83 100644 --- a/plumbing/format/config/doc.go +++ b/plumbing/format/config/doc.go @@ -1,199 +1,122 @@ -// Package config implements decoding/encoding of git config files. +// Package config implements encoding and decoding of git config files. +// +// Configuration File +// ------------------ +// +// The Git configuration file contains a number of variables that affect +// the Git commands' behavior. The `.git/config` file in each repository +// is used to store the configuration for that repository, and +// `$HOME/.gitconfig` is used to store a per-user configuration as +// fallback values for the `.git/config` file. The file `/etc/gitconfig` +// can be used to store a system-wide default configuration. +// +// The configuration variables are used by both the Git plumbing +// and the porcelains. The variables are divided into sections, wherein +// the fully qualified variable name of the variable itself is the last +// dot-separated segment and the section name is everything before the last +// dot. The variable names are case-insensitive, allow only alphanumeric +// characters and `-`, and must start with an alphabetic character. Some +// variables may appear multiple times; we say then that the variable is +// multivalued. +// +// Syntax +// ~~~~~~ +// +// The syntax is fairly flexible and permissive; whitespaces are mostly +// ignored. The '#' and ';' characters begin comments to the end of line, +// blank lines are ignored. +// +// The file consists of sections and variables. A section begins with +// the name of the section in square brackets and continues until the next +// section begins. Section names are case-insensitive. Only alphanumeric +// characters, `-` and `.` are allowed in section names. Each variable +// must belong to some section, which means that there must be a section +// header before the first setting of a variable. +// +// Sections can be further divided into subsections. To begin a subsection +// put its name in double quotes, separated by space from the section name, +// in the section header, like in the example below: +// +// -------- +// [section "subsection"] +// +// -------- +// +// Subsection names are case sensitive and can contain any characters except +// newline (doublequote `"` and backslash can be included by escaping them +// as `\"` and `\\`, respectively). Section headers cannot span multiple +// lines. Variables may belong directly to a section or to a given subsection. +// You can have `[section]` if you have `[section "subsection"]`, but you +// don't need to. +// +// There is also a deprecated `[section.subsection]` syntax. With this +// syntax, the subsection name is converted to lower-case and is also +// compared case sensitively. These subsection names follow the same +// restrictions as section names. +// +// All the other lines (and the remainder of the line after the section +// header) are recognized as setting variables, in the form +// 'name = value' (or just 'name', which is a short-hand to say that +// the variable is the boolean "true"). +// The variable names are case-insensitive, allow only alphanumeric characters +// and `-`, and must start with an alphabetic character. +// +// A line that defines a value can be continued to the next line by +// ending it with a `\`; the backquote and the end-of-line are +// stripped. Leading whitespaces after 'name =', the remainder of the +// line after the first comment character '#' or ';', and trailing +// whitespaces of the line are discarded unless they are enclosed in +// double quotes. Internal whitespaces within the value are retained +// verbatim. +// +// Inside double quotes, double quote `"` and backslash `\` characters +// must be escaped: use `\"` for `"` and `\\` for `\`. +// +// The following escape sequences (beside `\"` and `\\`) are recognized: +// `\n` for newline character (NL), `\t` for horizontal tabulation (HT, TAB) +// and `\b` for backspace (BS). Other char escape sequences (including octal +// escape sequences) are invalid. +// +// Includes +// ~~~~~~~~ +// +// You can include one config file from another by setting the special +// `include.path` variable to the name of the file to be included. The +// variable takes a pathname as its value, and is subject to tilde +// expansion. +// +// The included file is expanded immediately, as if its contents had been +// found at the location of the include directive. If the value of the +// `include.path` variable is a relative path, the path is considered to be +// relative to the configuration file in which the include directive was +// found. See below for examples. +// +// +// Example +// ~~~~~~~ +// +// # Core variables +// [core] +// ; Don't trust file modes +// filemode = false +// +// # Our diff algorithm +// [diff] +// external = /usr/local/bin/diff-wrapper +// renames = true +// +// [branch "devel"] +// remote = origin +// merge = refs/heads/devel +// +// # Proxy settings +// [core] +// gitProxy="ssh" for "kernel.org" +// gitProxy=default-proxy ; for the rest +// +// [include] +// path = /path/to/foo.inc ; include by absolute path +// path = foo ; expand "foo" relative to the current file +// path = ~/foo ; expand "foo" in your `$HOME` directory +// package config - -/* - -CONFIGURATION FILE ------------------- - -The Git configuration file contains a number of variables that affect -the Git commands' behavior. The `.git/config` file in each repository -is used to store the configuration for that repository, and -`$HOME/.gitconfig` is used to store a per-user configuration as -fallback values for the `.git/config` file. The file `/etc/gitconfig` -can be used to store a system-wide default configuration. - -The configuration variables are used by both the Git plumbing -and the porcelains. The variables are divided into sections, wherein -the fully qualified variable name of the variable itself is the last -dot-separated segment and the section name is everything before the last -dot. The variable names are case-insensitive, allow only alphanumeric -characters and `-`, and must start with an alphabetic character. Some -variables may appear multiple times; we say then that the variable is -multivalued. - -Syntax -~~~~~~ - -The syntax is fairly flexible and permissive; whitespaces are mostly -ignored. The '#' and ';' characters begin comments to the end of line, -blank lines are ignored. - -The file consists of sections and variables. A section begins with -the name of the section in square brackets and continues until the next -section begins. Section names are case-insensitive. Only alphanumeric -characters, `-` and `.` are allowed in section names. Each variable -must belong to some section, which means that there must be a section -header before the first setting of a variable. - -Sections can be further divided into subsections. To begin a subsection -put its name in double quotes, separated by space from the section name, -in the section header, like in the example below: - --------- - [section "subsection"] - --------- - -Subsection names are case sensitive and can contain any characters except -newline (doublequote `"` and backslash can be included by escaping them -as `\"` and `\\`, respectively). Section headers cannot span multiple -lines. Variables may belong directly to a section or to a given subsection. -You can have `[section]` if you have `[section "subsection"]`, but you -don't need to. - -There is also a deprecated `[section.subsection]` syntax. With this -syntax, the subsection name is converted to lower-case and is also -compared case sensitively. These subsection names follow the same -restrictions as section names. - -All the other lines (and the remainder of the line after the section -header) are recognized as setting variables, in the form -'name = value' (or just 'name', which is a short-hand to say that -the variable is the boolean "true"). -The variable names are case-insensitive, allow only alphanumeric characters -and `-`, and must start with an alphabetic character. - -A line that defines a value can be continued to the next line by -ending it with a `\`; the backquote and the end-of-line are -stripped. Leading whitespaces after 'name =', the remainder of the -line after the first comment character '#' or ';', and trailing -whitespaces of the line are discarded unless they are enclosed in -double quotes. Internal whitespaces within the value are retained -verbatim. - -Inside double quotes, double quote `"` and backslash `\` characters -must be escaped: use `\"` for `"` and `\\` for `\`. - -The following escape sequences (beside `\"` and `\\`) are recognized: -`\n` for newline character (NL), `\t` for horizontal tabulation (HT, TAB) -and `\b` for backspace (BS). Other char escape sequences (including octal -escape sequences) are invalid. - - -Includes -~~~~~~~~ - -You can include one config file from another by setting the special -`include.path` variable to the name of the file to be included. The -variable takes a pathname as its value, and is subject to tilde -expansion. - -The -included file is expanded immediately, as if its contents had been -found at the location of the include directive. If the value of the -`include.path` variable is a relative path, the path is considered to be -relative to the configuration file in which the include directive was -found. See below for examples. - - -Example -~~~~~~~ - - # Core variables - [core] - ; Don't trust file modes - filemode = false - - # Our diff algorithm - [diff] - external = /usr/local/bin/diff-wrapper - renames = true - - [branch "devel"] - remote = origin - merge = refs/heads/devel - - # Proxy settings - [core] - gitProxy="ssh" for "kernel.org" - gitProxy=default-proxy ; for the rest - - [include] - path = /path/to/foo.inc ; include by absolute path - path = foo ; expand "foo" relative to the current file - path = ~/foo ; expand "foo" in your `$HOME` directory - - -Values -~~~~~~ - -Values of many variables are treated as a simple string, but there -are variables that take values of specific types and there are rules -as to how to spell them. - -boolean:: - - When a variable is said to take a boolean value, many - synonyms are accepted for 'true' and 'false'; these are all - case-insensitive. - - true;; Boolean true can be spelled as `yes`, `on`, `true`, - or `1`. Also, a variable defined without `= <value>` - is taken as true. - - false;; Boolean false can be spelled as `no`, `off`, - `false`, or `0`. -+ -When converting value to the canonical form using `--bool` type -specifier; 'git config' will ensure that the output is "true" or -"false" (spelled in lowercase). - -integer:: - The value for many variables that specify various sizes can - be suffixed with `k`, `M`,... to mean "scale the number by - 1024", "by 1024x1024", etc. - -color:: - The value for a variable that takes a color is a list of - colors (at most two, one for foreground and one for background) - and attributes (as many as you want), separated by spaces. -+ -The basic colors accepted are `normal`, `black`, `red`, `green`, `yellow`, -`blue`, `magenta`, `cyan` and `white`. The first color given is the -foreground; the second is the background. -+ -Colors may also be given as numbers between 0 and 255; these use ANSI -256-color mode (but note that not all terminals may support this). If -your terminal supports it, you may also specify 24-bit RGB values as -hex, like `#ff0ab3`. -+ - -From: https://git-scm.com/docs/git-config -The accepted attributes are `bold`, `dim`, `ul`, `blink`, `reverse`, -`italic`, and `strike` (for crossed-out or "strikethrough" letters). -The position of any attributes with respect to the colors -(before, after, or in between), doesn't matter. Specific attributes may -be turned off by prefixing them with `no` or `no-` (e.g., `noreverse`, -`no-ul`, etc). -+ -For git's pre-defined color slots, the attributes are meant to be reset -at the beginning of each item in the colored output. So setting -`color.decorate.branch` to `black` will paint that branch name in a -plain `black`, even if the previous thing on the same output line (e.g. -opening parenthesis before the list of branch names in `log --decorate` -output) is set to be painted with `bold` or some other attribute. -However, custom log formats may do more complicated and layered -coloring, and the negated forms may be useful there. - -pathname:: - A variable that takes a pathname value can be given a - string that begins with "`~/`" or "`~user/`", and the usual - tilde expansion happens to such a string: `~/` - is expanded to the value of `$HOME`, and `~user/` to the - specified user's home directory. - -From: -https://raw.githubusercontent.com/git/git/659889482ac63411daea38b2c3d127842ea04e4d/Documentation/config.txt - -*/ diff --git a/plumbing/format/config/option.go b/plumbing/format/config/option.go index cae83e5..3c391c6 100644 --- a/plumbing/format/config/option.go +++ b/plumbing/format/config/option.go @@ -4,6 +4,7 @@ import ( "strings" ) +// Option defines a key/value entity in a config file. type Option struct { // Key preserving original caseness. // Use IsKey instead to compare key regardless of caseness. diff --git a/plumbing/format/idxfile/decoder.go b/plumbing/format/idxfile/decoder.go index 835978b..020c997 100644 --- a/plumbing/format/idxfile/decoder.go +++ b/plumbing/format/idxfile/decoder.go @@ -17,18 +17,17 @@ var ( ErrMalformedIdxFile = errors.New("Malformed IDX file") ) -// A Decoder reads and decodes idx files from an input stream. +// Decoder reads and decodes idx files from an input stream. type Decoder struct { io.Reader } -// NewDecoder returns a new decoder that reads from r. +// NewDecoder builds a new idx stream decoder, that reads from r. func NewDecoder(r io.Reader) *Decoder { return &Decoder{r} } -// Decode reads the whole idx object from its input and stores it in the -// value pointed to by idx. +// Decode reads from the stream and decode the content into the Idxfile struct. func (d *Decoder) Decode(idx *Idxfile) error { if err := validateHeader(d); err != nil { return err diff --git a/plumbing/format/idxfile/doc.go b/plumbing/format/idxfile/doc.go index fb70b7d..1e628ab 100644 --- a/plumbing/format/idxfile/doc.go +++ b/plumbing/format/idxfile/doc.go @@ -1,132 +1,128 @@ -// Package idxfile implements an encoder and a decoder of idx files +// Package idxfile implements encoding and decoding of packfile idx files. +// +// == Original (version 1) pack-*.idx files have the following format: +// +// - The header consists of 256 4-byte network byte order +// integers. N-th entry of this table records the number of +// objects in the corresponding pack, the first byte of whose +// object name is less than or equal to N. This is called the +// 'first-level fan-out' table. +// +// - The header is followed by sorted 24-byte entries, one entry +// per object in the pack. Each entry is: +// +// 4-byte network byte order integer, recording where the +// object is stored in the packfile as the offset from the +// beginning. +// +// 20-byte object name. +// +// - The file is concluded with a trailer: +// +// A copy of the 20-byte SHA1 checksum at the end of +// corresponding packfile. +// +// 20-byte SHA1-checksum of all of the above. +// +// Pack Idx file: +// +// -- +--------------------------------+ +// fanout | fanout[0] = 2 (for example) |-. +// table +--------------------------------+ | +// | fanout[1] | | +// +--------------------------------+ | +// | fanout[2] | | +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | +// | fanout[255] = total objects |---. +// -- +--------------------------------+ | | +// main | offset | | | +// index | object name 00XXXXXXXXXXXXXXXX | | | +// tab +--------------------------------+ | | +// | offset | | | +// | object name 00XXXXXXXXXXXXXXXX | | | +// +--------------------------------+<+ | +// .-| offset | | +// | | object name 01XXXXXXXXXXXXXXXX | | +// | +--------------------------------+ | +// | | offset | | +// | | object name 01XXXXXXXXXXXXXXXX | | +// | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | +// | | offset | | +// | | object name FFXXXXXXXXXXXXXXXX | | +// --| +--------------------------------+<--+ +// trailer | | packfile checksum | +// | +--------------------------------+ +// | | idxfile checksum | +// | +--------------------------------+ +// .---------. +// | +// Pack file entry: <+ +// +// packed object header: +// 1-byte size extension bit (MSB) +// type (next 3 bit) +// size0 (lower 4-bit) +// n-byte sizeN (as long as MSB is set, each 7-bit) +// size0..sizeN form 4+7+7+..+7 bit integer, size0 +// is the least significant part, and sizeN is the +// most significant part. +// packed object data: +// If it is not DELTA, then deflated bytes (the size above +// is the size before compression). +// If it is REF_DELTA, then +// 20-byte base object name SHA1 (the size above is the +// size of the delta data that follows). +// delta data, deflated. +// If it is OFS_DELTA, then +// n-byte offset (see below) interpreted as a negative +// offset from the type-byte of the header of the +// ofs-delta entry (the size above is the size of +// the delta data that follows). +// delta data, deflated. +// +// offset encoding: +// n bytes with MSB set in all but the last one. +// The offset is then the number constructed by +// concatenating the lower 7 bit of each byte, and +// for n >= 2 adding 2^7 + 2^14 + ... + 2^(7*(n-1)) +// to the result. +// +// == Version 2 pack-*.idx files support packs larger than 4 GiB, and +// have some other reorganizations. They have the format: +// +// - A 4-byte magic number '\377tOc' which is an unreasonable +// fanout[0] value. +// +// - A 4-byte version number (= 2) +// +// - A 256-entry fan-out table just like v1. +// +// - A table of sorted 20-byte SHA1 object names. These are +// packed together without offset values to reduce the cache +// footprint of the binary search for a specific object name. +// +// - A table of 4-byte CRC32 values of the packed object data. +// This is new in v2 so compressed data can be copied directly +// from pack to pack during repacking without undetected +// data corruption. +// +// - A table of 4-byte offset values (in network byte order). +// These are usually 31-bit pack file offsets, but large +// offsets are encoded as an index into the next table with +// the msbit set. +// +// - A table of 8-byte offset entries (empty for pack files less +// than 2 GiB). Pack files are organized with heavily used +// objects toward the front, so most object references should +// not need to refer to this table. +// +// - The same trailer as a v1 pack file: +// +// A copy of the 20-byte SHA1 checksum at the end of +// corresponding packfile. +// +// 20-byte SHA1-checksum of all of the above. +// +// Source: +// https://www.kernel.org/pub/software/scm/git/docs/v1.7.5/technical/pack-format.txt package idxfile - -/* -== Original (version 1) pack-*.idx files have the following format: - - - The header consists of 256 4-byte network byte order - integers. N-th entry of this table records the number of - objects in the corresponding pack, the first byte of whose - object name is less than or equal to N. This is called the - 'first-level fan-out' table. - - - The header is followed by sorted 24-byte entries, one entry - per object in the pack. Each entry is: - - 4-byte network byte order integer, recording where the - object is stored in the packfile as the offset from the - beginning. - - 20-byte object name. - - - The file is concluded with a trailer: - - A copy of the 20-byte SHA1 checksum at the end of - corresponding packfile. - - 20-byte SHA1-checksum of all of the above. - -Pack Idx file: - - -- +--------------------------------+ -fanout | fanout[0] = 2 (for example) |-. -table +--------------------------------+ | - | fanout[1] | | - +--------------------------------+ | - | fanout[2] | | - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | - | fanout[255] = total objects |---. - -- +--------------------------------+ | | -main | offset | | | -index | object name 00XXXXXXXXXXXXXXXX | | | -table +--------------------------------+ | | - | offset | | | - | object name 00XXXXXXXXXXXXXXXX | | | - +--------------------------------+<+ | - .-| offset | | - | | object name 01XXXXXXXXXXXXXXXX | | - | +--------------------------------+ | - | | offset | | - | | object name 01XXXXXXXXXXXXXXXX | | - | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | - | | offset | | - | | object name FFXXXXXXXXXXXXXXXX | | - --| +--------------------------------+<--+ -trailer | | packfile checksum | - | +--------------------------------+ - | | idxfile checksum | - | +--------------------------------+ - .-------. - | -Pack file entry: <+ - - packed object header: - 1-byte size extension bit (MSB) - type (next 3 bit) - size0 (lower 4-bit) - n-byte sizeN (as long as MSB is set, each 7-bit) - size0..sizeN form 4+7+7+..+7 bit integer, size0 - is the least significant part, and sizeN is the - most significant part. - packed object data: - If it is not DELTA, then deflated bytes (the size above - is the size before compression). - If it is REF_DELTA, then - 20-byte base object name SHA1 (the size above is the - size of the delta data that follows). - delta data, deflated. - If it is OFS_DELTA, then - n-byte offset (see below) interpreted as a negative - offset from the type-byte of the header of the - ofs-delta entry (the size above is the size of - the delta data that follows). - delta data, deflated. - - offset encoding: - n bytes with MSB set in all but the last one. - The offset is then the number constructed by - concatenating the lower 7 bit of each byte, and - for n >= 2 adding 2^7 + 2^14 + ... + 2^(7*(n-1)) - to the result. - - - -== Version 2 pack-*.idx files support packs larger than 4 GiB, and - have some other reorganizations. They have the format: - - - A 4-byte magic number '\377tOc' which is an unreasonable - fanout[0] value. - - - A 4-byte version number (= 2) - - - A 256-entry fan-out table just like v1. - - - A table of sorted 20-byte SHA1 object names. These are - packed together without offset values to reduce the cache - footprint of the binary search for a specific object name. - - - A table of 4-byte CRC32 values of the packed object data. - This is new in v2 so compressed data can be copied directly - from pack to pack during repacking without undetected - data corruption. - - - A table of 4-byte offset values (in network byte order). - These are usually 31-bit pack file offsets, but large - offsets are encoded as an index into the next table with - the msbit set. - - - A table of 8-byte offset entries (empty for pack files less - than 2 GiB). Pack files are organized with heavily used - objects toward the front, so most object references should - not need to refer to this table. - - - The same trailer as a v1 pack file: - - A copy of the 20-byte SHA1 checksum at the end of - corresponding packfile. - - 20-byte SHA1-checksum of all of the above. - -From: -https://www.kernel.org/pub/software/scm/git/docs/v1.7.5/technical/pack-protocol.txt -*/ diff --git a/plumbing/format/idxfile/encoder.go b/plumbing/format/idxfile/encoder.go index 2b0ef89..374d053 100644 --- a/plumbing/format/idxfile/encoder.go +++ b/plumbing/format/idxfile/encoder.go @@ -9,20 +9,20 @@ import ( "srcd.works/go-git.v4/utils/binary" ) -// An Encoder writes idx files to an output stream. +// Encoder writes Idxfile structs to an output stream. type Encoder struct { io.Writer hash hash.Hash } -// NewEncoder returns a new encoder that writes to w. +// NewEncoder returns a new stream encoder that writes to w. func NewEncoder(w io.Writer) *Encoder { h := sha1.New() mw := io.MultiWriter(w, h) return &Encoder{mw, h} } -// Encode writes the idx in an idx file format to the stream of the encoder. +// Encode encodes an Idxfile to the encoder writer. func (e *Encoder) Encode(idx *Idxfile) (int, error) { idx.Entries.Sort() @@ -123,6 +123,7 @@ func (e *Encoder) encodeChecksums(idx *Idxfile) (int, error) { return 40, nil } +// EntryList implements sort.Interface allowing sorting in increasing order. type EntryList []Entry func (p EntryList) Len() int { return len(p) } diff --git a/plumbing/format/idxfile/idxfile.go b/plumbing/format/idxfile/idxfile.go index ee014e5..b39295a 100644 --- a/plumbing/format/idxfile/idxfile.go +++ b/plumbing/format/idxfile/idxfile.go @@ -11,7 +11,7 @@ var ( idxHeader = []byte{255, 't', 'O', 'c'} ) -// An Idxfile represents an idx file in memory. +// Idxfile is the in memory representation of an idx file. type Idxfile struct { Version uint32 Fanout [255]uint32 @@ -21,14 +21,14 @@ type Idxfile struct { IdxChecksum [20]byte } -// An Entry represents data about an object in the packfile: its hash, -// offset and CRC32 checksum. +// Entry is the in memory representation of an object entry in the idx file. type Entry struct { Hash plumbing.Hash CRC32 uint32 Offset uint64 } +// Add adds a new Entry with the given values to the Idxfile. func (idx *Idxfile) Add(h plumbing.Hash, offset uint64, crc32 uint32) { idx.Entries = append(idx.Entries, Entry{ Hash: h, diff --git a/plumbing/format/index/doc.go b/plumbing/format/index/doc.go index 7000944..d1e7b33 100644 --- a/plumbing/format/index/doc.go +++ b/plumbing/format/index/doc.go @@ -1,302 +1,301 @@ -// Package index implements an encoder and a decoder of index format files +// Package index implements encoding and decoding of index format files. +// +// Git index format +// ================ +// +// == The Git index file has the following format +// +// All binary numbers are in network byte order. Version 2 is described +// here unless stated otherwise. +// +// - A 12-byte header consisting of +// +// 4-byte signature: +// The signature is { 'D', 'I', 'R', 'C' } (stands for "dircache") +// +// 4-byte version number: +// The current supported versions are 2, 3 and 4. +// +// 32-bit number of index entries. +// +// - A number of sorted index entries (see below). +// +// - Extensions +// +// Extensions are identified by signature. Optional extensions can +// be ignored if Git does not understand them. +// +// Git currently supports cached tree and resolve undo extensions. +// +// 4-byte extension signature. If the first byte is 'A'..'Z' the +// extension is optional and can be ignored. +// +// 32-bit size of the extension +// +// Extension data +// +// - 160-bit SHA-1 over the content of the index file before this +// checksum. +// +// == Index entry +// +// Index entries are sorted in ascending order on the name field, +// interpreted as a string of unsigned bytes (i.e. memcmp() order, no +// localization, no special casing of directory separator '/'). Entries +// with the same name are sorted by their stage field. +// +// 32-bit ctime seconds, the last time a file's metadata changed +// this is stat(2) data +// +// 32-bit ctime nanosecond fractions +// this is stat(2) data +// +// 32-bit mtime seconds, the last time a file's data changed +// this is stat(2) data +// +// 32-bit mtime nanosecond fractions +// this is stat(2) data +// +// 32-bit dev +// this is stat(2) data +// +// 32-bit ino +// this is stat(2) data +// +// 32-bit mode, split into (high to low bits) +// +// 4-bit object type +// valid values in binary are 1000 (regular file), 1010 (symbolic link) +// and 1110 (gitlink) +// +// 3-bit unused +// +// 9-bit unix permission. Only 0755 and 0644 are valid for regular files. +// Symbolic links and gitlinks have value 0 in this field. +// +// 32-bit uid +// this is stat(2) data +// +// 32-bit gid +// this is stat(2) data +// +// 32-bit file size +// This is the on-disk size from stat(2), truncated to 32-bit. +// +// 160-bit SHA-1 for the represented object +// +// A 16-bit 'flags' field split into (high to low bits) +// +// 1-bit assume-valid flag +// +// 1-bit extended flag (must be zero in version 2) +// +// 2-bit stage (during merge) +// +// 12-bit name length if the length is less than 0xFFF; otherwise 0xFFF +// is stored in this field. +// +// (Version 3 or later) A 16-bit field, only applicable if the +// "extended flag" above is 1, split into (high to low bits). +// +// 1-bit reserved for future +// +// 1-bit skip-worktree flag (used by sparse checkout) +// +// 1-bit intent-to-add flag (used by "git add -N") +// +// 13-bit unused, must be zero +// +// Entry path name (variable length) relative to top level directory +// (without leading slash). '/' is used as path separator. The special +// path components ".", ".." and ".git" (without quotes) are disallowed. +// Trailing slash is also disallowed. +// +// The exact encoding is undefined, but the '.' and '/' characters +// are encoded in 7-bit ASCII and the encoding cannot contain a NUL +// byte (iow, this is a UNIX pathname). +// +// (Version 4) In version 4, the entry path name is prefix-compressed +// relative to the path name for the previous entry (the very first +// entry is encoded as if the path name for the previous entry is an +// empty string). At the beginning of an entry, an integer N in the +// variable width encoding (the same encoding as the offset is encoded +// for OFS_DELTA pack entries; see pack-format.txt) is stored, followed +// by a NUL-terminated string S. Removing N bytes from the end of the +// path name for the previous entry, and replacing it with the string S +// yields the path name for this entry. +// +// 1-8 nul bytes as necessary to pad the entry to a multiple of eight bytes +// while keeping the name NUL-terminated. +// +// (Version 4) In version 4, the padding after the pathname does not +// exist. +// +// Interpretation of index entries in split index mode is completely +// different. See below for details. +// +// == Extensions +// +// === Cached tree +// +// Cached tree extension contains pre-computed hashes for trees that can +// be derived from the index. It helps speed up tree object generation +// from index for a new commit. +// +// When a path is updated in index, the path must be invalidated and +// removed from tree cache. +// +// The signature for this extension is { 'T', 'R', 'E', 'E' }. +// +// A series of entries fill the entire extension; each of which +// consists of: +// +// - NUL-terminated path component (relative to its parent directory); +// +// - ASCII decimal number of entries in the index that is covered by the +// tree this entry represents (entry_count); +// +// - A space (ASCII 32); +// +// - ASCII decimal number that represents the number of subtrees this +// tree has; +// +// - A newline (ASCII 10); and +// +// - 160-bit object name for the object that would result from writing +// this span of index as a tree. +// +// An entry can be in an invalidated state and is represented by having +// a negative number in the entry_count field. In this case, there is no +// object name and the next entry starts immediately after the newline. +// When writing an invalid entry, -1 should always be used as entry_count. +// +// The entries are written out in the top-down, depth-first order. The +// first entry represents the root level of the repository, followed by the +// first subtree--let's call this A--of the root level (with its name +// relative to the root level), followed by the first subtree of A (with +// its name relative to A), ... +// +// === Resolve undo +// +// A conflict is represented in the index as a set of higher stage entries. +// When a conflict is resolved (e.g. with "git add path"), these higher +// stage entries will be removed and a stage-0 entry with proper resolution +// is added. +// +// When these higher stage entries are removed, they are saved in the +// resolve undo extension, so that conflicts can be recreated (e.g. with +// "git checkout -m"), in case users want to redo a conflict resolution +// from scratch. +// +// The signature for this extension is { 'R', 'E', 'U', 'C' }. +// +// A series of entries fill the entire extension; each of which +// consists of: +// +// - NUL-terminated pathname the entry describes (relative to the root of +// the repository, i.e. full pathname); +// +// - Three NUL-terminated ASCII octal numbers, entry mode of entries in +// stage 1 to 3 (a missing stage is represented by "0" in this field); +// and +// +// - At most three 160-bit object names of the entry in stages from 1 to 3 +// (nothing is written for a missing stage). +// +// === Split index +// +// In split index mode, the majority of index entries could be stored +// in a separate file. This extension records the changes to be made on +// top of that to produce the final index. +// +// The signature for this extension is { 'l', 'i', 'n', 'k' }. +// +// The extension consists of: +// +// - 160-bit SHA-1 of the shared index file. The shared index file path +// is $GIT_DIR/sharedindex.<SHA-1>. If all 160 bits are zero, the +// index does not require a shared index file. +// +// - An ewah-encoded delete bitmap, each bit represents an entry in the +// shared index. If a bit is set, its corresponding entry in the +// shared index will be removed from the final index. Note, because +// a delete operation changes index entry positions, but we do need +// original positions in replace phase, it's best to just mark +// entries for removal, then do a mass deletion after replacement. +// +// - An ewah-encoded replace bitmap, each bit represents an entry in +// the shared index. If a bit is set, its corresponding entry in the +// shared index will be replaced with an entry in this index +// file. All replaced entries are stored in sorted order in this +// index. The first "1" bit in the replace bitmap corresponds to the +// first index entry, the second "1" bit to the second entry and so +// on. Replaced entries may have empty path names to save space. +// +// The remaining index entries after replaced ones will be added to the +// final index. These added entries are also sorted by entry name then +// stage. +// +// == Untracked cache +// +// Untracked cache saves the untracked file list and necessary data to +// verify the cache. The signature for this extension is { 'U', 'N', +// 'T', 'R' }. +// +// The extension starts with +// +// - A sequence of NUL-terminated strings, preceded by the size of the +// sequence in variable width encoding. Each string describes the +// environment where the cache can be used. +// +// - Stat data of $GIT_DIR/info/exclude. See "Index entry" section from +// ctime field until "file size". +// +// - Stat data of plumbing.excludesfile +// +// - 32-bit dir_flags (see struct dir_struct) +// +// - 160-bit SHA-1 of $GIT_DIR/info/exclude. Null SHA-1 means the file +// does not exist. +// +// - 160-bit SHA-1 of plumbing.excludesfile. Null SHA-1 means the file does +// not exist. +// +// - NUL-terminated string of per-dir exclude file name. This usually +// is ".gitignore". +// +// - The number of following directory blocks, variable width +// encoding. If this number is zero, the extension ends here with a +// following NUL. +// +// - A number of directory blocks in depth-first-search order, each +// consists of +// +// - The number of untracked entries, variable width encoding. +// +// - The number of sub-directory blocks, variable width encoding. +// +// - The directory name terminated by NUL. +// +// - A number of untracked file/dir names terminated by NUL. +// +// The remaining data of each directory block is grouped by type: +// +// - An ewah bitmap, the n-th bit marks whether the n-th directory has +// valid untracked cache entries. +// +// - An ewah bitmap, the n-th bit records "check-only" bit of +// read_directory_recursive() for the n-th directory. +// +// - An ewah bitmap, the n-th bit indicates whether SHA-1 and stat data +// is valid for the n-th directory and exists in the next data. +// +// - An array of stat data. The n-th data corresponds with the n-th +// "one" bit in the previous ewah bitmap. +// +// - An array of SHA-1. The n-th SHA-1 corresponds with the n-th "one" bit +// in the previous ewah bitmap. +// +// - One NUL. +// Source https://www.kernel.org/pub/software/scm/git/docs/technical/index-format.txt package index - -/* -Git index format -================ - -== The Git index file has the following format - - All binary numbers are in network byte order. Version 2 is described - here unless stated otherwise. - - - A 12-byte header consisting of - - 4-byte signature: - The signature is { 'D', 'I', 'R', 'C' } (stands for "dircache") - - 4-byte version number: - The current supported versions are 2, 3 and 4. - - 32-bit number of index entries. - - - A number of sorted index entries (see below). - - - Extensions - - Extensions are identified by signature. Optional extensions can - be ignored if Git does not understand them. - - Git currently supports cached tree and resolve undo extensions. - - 4-byte extension signature. If the first byte is 'A'..'Z' the - extension is optional and can be ignored. - - 32-bit size of the extension - - Extension data - - - 160-bit SHA-1 over the content of the index file before this - checksum. - -== Index entry - - Index entries are sorted in ascending order on the name field, - interpreted as a string of unsigned bytes (i.e. memcmp() order, no - localization, no special casing of directory separator '/'). Entries - with the same name are sorted by their stage field. - - 32-bit ctime seconds, the last time a file's metadata changed - this is stat(2) data - - 32-bit ctime nanosecond fractions - this is stat(2) data - - 32-bit mtime seconds, the last time a file's data changed - this is stat(2) data - - 32-bit mtime nanosecond fractions - this is stat(2) data - - 32-bit dev - this is stat(2) data - - 32-bit ino - this is stat(2) data - - 32-bit mode, split into (high to low bits) - - 4-bit object type - valid values in binary are 1000 (regular file), 1010 (symbolic link) - and 1110 (gitlink) - - 3-bit unused - - 9-bit unix permission. Only 0755 and 0644 are valid for regular files. - Symbolic links and gitlinks have value 0 in this field. - - 32-bit uid - this is stat(2) data - - 32-bit gid - this is stat(2) data - - 32-bit file size - This is the on-disk size from stat(2), truncated to 32-bit. - - 160-bit SHA-1 for the represented object - - A 16-bit 'flags' field split into (high to low bits) - - 1-bit assume-valid flag - - 1-bit extended flag (must be zero in version 2) - - 2-bit stage (during merge) - - 12-bit name length if the length is less than 0xFFF; otherwise 0xFFF - is stored in this field. - - (Version 3 or later) A 16-bit field, only applicable if the - "extended flag" above is 1, split into (high to low bits). - - 1-bit reserved for future - - 1-bit skip-worktree flag (used by sparse checkout) - - 1-bit intent-to-add flag (used by "git add -N") - - 13-bit unused, must be zero - - Entry path name (variable length) relative to top level directory - (without leading slash). '/' is used as path separator. The special - path components ".", ".." and ".git" (without quotes) are disallowed. - Trailing slash is also disallowed. - - The exact encoding is undefined, but the '.' and '/' characters - are encoded in 7-bit ASCII and the encoding cannot contain a NUL - byte (iow, this is a UNIX pathname). - - (Version 4) In version 4, the entry path name is prefix-compressed - relative to the path name for the previous entry (the very first - entry is encoded as if the path name for the previous entry is an - empty string). At the beginning of an entry, an integer N in the - variable width encoding (the same encoding as the offset is encoded - for OFS_DELTA pack entries; see pack-format.txt) is stored, followed - by a NUL-terminated string S. Removing N bytes from the end of the - path name for the previous entry, and replacing it with the string S - yields the path name for this entry. - - 1-8 nul bytes as necessary to pad the entry to a multiple of eight bytes - while keeping the name NUL-terminated. - - (Version 4) In version 4, the padding after the pathname does not - exist. - - Interpretation of index entries in split index mode is completely - different. See below for details. - -== Extensions - -=== Cached tree - - Cached tree extension contains pre-computed hashes for trees that can - be derived from the index. It helps speed up tree object generation - from index for a new commit. - - When a path is updated in index, the path must be invalidated and - removed from tree cache. - - The signature for this extension is { 'T', 'R', 'E', 'E' }. - - A series of entries fill the entire extension; each of which - consists of: - - - NUL-terminated path component (relative to its parent directory); - - - ASCII decimal number of entries in the index that is covered by the - tree this entry represents (entry_count); - - - A space (ASCII 32); - - - ASCII decimal number that represents the number of subtrees this - tree has; - - - A newline (ASCII 10); and - - - 160-bit object name for the object that would result from writing - this span of index as a tree. - - An entry can be in an invalidated state and is represented by having - a negative number in the entry_count field. In this case, there is no - object name and the next entry starts immediately after the newline. - When writing an invalid entry, -1 should always be used as entry_count. - - The entries are written out in the top-down, depth-first order. The - first entry represents the root level of the repository, followed by the - first subtree--let's call this A--of the root level (with its name - relative to the root level), followed by the first subtree of A (with - its name relative to A), ... - -=== Resolve undo - - A conflict is represented in the index as a set of higher stage entries. - When a conflict is resolved (e.g. with "git add path"), these higher - stage entries will be removed and a stage-0 entry with proper resolution - is added. - - When these higher stage entries are removed, they are saved in the - resolve undo extension, so that conflicts can be recreated (e.g. with - "git checkout -m"), in case users want to redo a conflict resolution - from scratch. - - The signature for this extension is { 'R', 'E', 'U', 'C' }. - - A series of entries fill the entire extension; each of which - consists of: - - - NUL-terminated pathname the entry describes (relative to the root of - the repository, i.e. full pathname); - - - Three NUL-terminated ASCII octal numbers, entry mode of entries in - stage 1 to 3 (a missing stage is represented by "0" in this field); - and - - - At most three 160-bit object names of the entry in stages from 1 to 3 - (nothing is written for a missing stage). - -=== Split index - - In split index mode, the majority of index entries could be stored - in a separate file. This extension records the changes to be made on - top of that to produce the final index. - - The signature for this extension is { 'l', 'i', 'n', 'k' }. - - The extension consists of: - - - 160-bit SHA-1 of the shared index file. The shared index file path - is $GIT_DIR/sharedindex.<SHA-1>. If all 160 bits are zero, the - index does not require a shared index file. - - - An ewah-encoded delete bitmap, each bit represents an entry in the - shared index. If a bit is set, its corresponding entry in the - shared index will be removed from the final index. Note, because - a delete operation changes index entry positions, but we do need - original positions in replace phase, it's best to just mark - entries for removal, then do a mass deletion after replacement. - - - An ewah-encoded replace bitmap, each bit represents an entry in - the shared index. If a bit is set, its corresponding entry in the - shared index will be replaced with an entry in this index - file. All replaced entries are stored in sorted order in this - index. The first "1" bit in the replace bitmap corresponds to the - first index entry, the second "1" bit to the second entry and so - on. Replaced entries may have empty path names to save space. - - The remaining index entries after replaced ones will be added to the - final index. These added entries are also sorted by entry name then - stage. - -== Untracked cache - - Untracked cache saves the untracked file list and necessary data to - verify the cache. The signature for this extension is { 'U', 'N', - 'T', 'R' }. - - The extension starts with - - - A sequence of NUL-terminated strings, preceded by the size of the - sequence in variable width encoding. Each string describes the - environment where the cache can be used. - - - Stat data of $GIT_DIR/info/exclude. See "Index entry" section from - ctime field until "file size". - - - Stat data of plumbing.excludesfile - - - 32-bit dir_flags (see struct dir_struct) - - - 160-bit SHA-1 of $GIT_DIR/info/exclude. Null SHA-1 means the file - does not exist. - - - 160-bit SHA-1 of plumbing.excludesfile. Null SHA-1 means the file does - not exist. - - - NUL-terminated string of per-dir exclude file name. This usually - is ".gitignore". - - - The number of following directory blocks, variable width - encoding. If this number is zero, the extension ends here with a - following NUL. - - - A number of directory blocks in depth-first-search order, each - consists of - - - The number of untracked entries, variable width encoding. - - - The number of sub-directory blocks, variable width encoding. - - - The directory name terminated by NUL. - - - A number of untracked file/dir names terminated by NUL. - -The remaining data of each directory block is grouped by type: - - - An ewah bitmap, the n-th bit marks whether the n-th directory has - valid untracked cache entries. - - - An ewah bitmap, the n-th bit records "check-only" bit of - read_directory_recursive() for the n-th directory. - - - An ewah bitmap, the n-th bit indicates whether SHA-1 and stat data - is valid for the n-th directory and exists in the next data. - - - An array of stat data. The n-th data corresponds with the n-th - "one" bit in the previous ewah bitmap. - - - An array of SHA-1. The n-th SHA-1 corresponds with the n-th "one" bit - in the previous ewah bitmap. - - - One NUL. -*/ diff --git a/plumbing/format/index/index.go b/plumbing/format/index/index.go index 0e9132f..e5dc178 100644 --- a/plumbing/format/index/index.go +++ b/plumbing/format/index/index.go @@ -93,10 +93,10 @@ type TreeEntry struct { Hash plumbing.Hash } -// ResolveUndo when a conflict is resolved (e.g. with "git add path"), these -// higher stage entries will be removed and a stage-0 entry with proper +// ResolveUndo is used when a conflict is resolved (e.g. with "git add path"), +// these higher stage entries are removed and a stage-0 entry with proper // resolution is added. When these higher stage entries are removed, they are -// saved in the resolve undo extension +// saved in the resolve undo extension. type ResolveUndo struct { Entries []ResolveUndoEntry } diff --git a/plumbing/format/objfile/doc.go b/plumbing/format/objfile/doc.go new file mode 100644 index 0000000..a714516 --- /dev/null +++ b/plumbing/format/objfile/doc.go @@ -0,0 +1,2 @@ +// Package objfile implements encoding and decoding of object files. +package objfile diff --git a/plumbing/format/packfile/doc.go b/plumbing/format/packfile/doc.go index 0b173ca..2882a7f 100644 --- a/plumbing/format/packfile/doc.go +++ b/plumbing/format/packfile/doc.go @@ -1,168 +1,39 @@ -// Package packfile implements a encoder/decoder of packfile format +// Package packfile implements encoding and decoding of packfile format. +// +// == pack-*.pack files have the following format: +// +// - A header appears at the beginning and consists of the following: +// +// 4-byte signature: +// The signature is: {'P', 'A', 'C', 'K'} +// +// 4-byte version number (network byte order): +// GIT currently accepts version number 2 or 3 but +// generates version 2 only. +// +// 4-byte number of objects contained in the pack (network byte order) +// +// Observation: we cannot have more than 4G versions ;-) and +// more than 4G objects in a pack. +// +// - The header is followed by number of object entries, each of +// which looks like this: +// +// (undeltified representation) +// n-byte type and length (3-bit type, (n-1)*7+4-bit length) +// compressed data +// +// (deltified representation) +// n-byte type and length (3-bit type, (n-1)*7+4-bit length) +// 20-byte base object name +// compressed delta data +// +// Observation: length of each object is encoded in a variable +// length format and is not constrained to 32-bit or anything. +// +// - The trailer records 20-byte SHA1 checksum of all of the above. +// +// +// Source: +// https://www.kernel.org/pub/software/scm/git/docs/v1.7.5/technical/pack-protocol.txt package packfile - -/* -GIT pack format -=============== - -== pack-*.pack files have the following format: - - - A header appears at the beginning and consists of the following: - - 4-byte signature: - The signature is: {'P', 'A', 'C', 'K'} - - 4-byte version number (network byte order): - GIT currently accepts version number 2 or 3 but - generates version 2 only. - - 4-byte number of objects contained in the pack (network byte order) - - Observation: we cannot have more than 4G versions ;-) and - more than 4G objects in a pack. - - - The header is followed by number of object entries, each of - which looks like this: - - (undeltified representation) - n-byte type and length (3-bit type, (n-1)*7+4-bit length) - compressed data - - (deltified representation) - n-byte type and length (3-bit type, (n-1)*7+4-bit length) - 20-byte base object name - compressed delta data - - Observation: length of each object is encoded in a variable - length format and is not constrained to 32-bit or anything. - - - The trailer records 20-byte SHA1 checksum of all of the above. - -== Original (version 1) pack-*.idx files have the following format: - - - The header consists of 256 4-byte network byte order - integers. N-th entry of this table records the number of - objects in the corresponding pack, the first byte of whose - object name is less than or equal to N. This is called the - 'first-level fan-out' table. - - - The header is followed by sorted 24-byte entries, one entry - per object in the pack. Each entry is: - - 4-byte network byte order integer, recording where the - object is stored in the packfile as the offset from the - beginning. - - 20-byte object name. - - - The file is concluded with a trailer: - - A copy of the 20-byte SHA1 checksum at the end of - corresponding packfile. - - 20-byte SHA1-checksum of all of the above. - -Pack Idx file: - - -- +--------------------------------+ -fanout | fanout[0] = 2 (for example) |-. -table +--------------------------------+ | - | fanout[1] | | - +--------------------------------+ | - | fanout[2] | | - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | - | fanout[255] = total objects |---. - -- +--------------------------------+ | | -main | offset | | | -index | object name 00XXXXXXXXXXXXXXXX | | | -table +--------------------------------+ | | - | offset | | | - | object name 00XXXXXXXXXXXXXXXX | | | - +--------------------------------+<+ | - .-| offset | | - | | object name 01XXXXXXXXXXXXXXXX | | - | +--------------------------------+ | - | | offset | | - | | object name 01XXXXXXXXXXXXXXXX | | - | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | - | | offset | | - | | object name FFXXXXXXXXXXXXXXXX | | - --| +--------------------------------+<--+ -trailer | | packfile checksum | - | +--------------------------------+ - | | idxfile checksum | - | +--------------------------------+ - .-------. - | -Pack file entry: <+ - - packed object header: - 1-byte size extension bit (MSB) - type (next 3 bit) - size0 (lower 4-bit) - n-byte sizeN (as long as MSB is set, each 7-bit) - size0..sizeN form 4+7+7+..+7 bit integer, size0 - is the least significant part, and sizeN is the - most significant part. - packed object data: - If it is not DELTA, then deflated bytes (the size above - is the size before compression). - If it is REF_DELTA, then - 20-byte base object name SHA1 (the size above is the - size of the delta data that follows). - delta data, deflated. - If it is OFS_DELTA, then - n-byte offset (see below) interpreted as a negative - offset from the type-byte of the header of the - ofs-delta entry (the size above is the size of - the delta data that follows). - delta data, deflated. - - offset encoding: - n bytes with MSB set in all but the last one. - The offset is then the number constructed by - concatenating the lower 7 bit of each byte, and - for n >= 2 adding 2^7 + 2^14 + ... + 2^(7*(n-1)) - to the result. - - - -== Version 2 pack-*.idx files support packs larger than 4 GiB, and - have some other reorganizations. They have the format: - - - A 4-byte magic number '\377tOc' which is an unreasonable - fanout[0] value. - - - A 4-byte version number (= 2) - - - A 256-entry fan-out table just like v1. - - - A table of sorted 20-byte SHA1 object names. These are - packed together without offset values to reduce the cache - footprint of the binary search for a specific object name. - - - A table of 4-byte CRC32 values of the packed object data. - This is new in v2 so compressed data can be copied directly - from pack to pack during repacking without undetected - data corruption. - - - A table of 4-byte offset values (in network byte order). - These are usually 31-bit pack file offsets, but large - offsets are encoded as an index into the next table with - the msbit set. - - - A table of 8-byte offset entries (empty for pack files less - than 2 GiB). Pack files are organized with heavily used - objects toward the front, so most object references should - not need to refer to this table. - - - The same trailer as a v1 pack file: - - A copy of the 20-byte SHA1 checksum at the end of - corresponding packfile. - - 20-byte SHA1-checksum of all of the above. - -From: -https://www.kernel.org/pub/software/scm/git/docs/v1.7.5/technical/pack-protocol.txt -*/ diff --git a/plumbing/format/pktline/encoder.go b/plumbing/format/pktline/encoder.go index 0a88a9b..753b225 100644 --- a/plumbing/format/pktline/encoder.go +++ b/plumbing/format/pktline/encoder.go @@ -1,4 +1,5 @@ -// Package pktline implements reading payloads form pkt-lines and encoding pkt-lines from payloads. +// Package pktline implements reading payloads form pkt-lines and encoding +// pkt-lines from payloads. package pktline import ( |