diff options
32 files changed, 3056 insertions, 834 deletions
diff --git a/internal/revision/parser.go b/internal/revision/parser.go new file mode 100644 index 0000000..b45a6d8 --- /dev/null +++ b/internal/revision/parser.go @@ -0,0 +1,622 @@ +// Package revision extracts git revision from string +// More informations about revision : https://www.kernel.org/pub/software/scm/git/docs/gitrevisions.html +package revision + +import ( + "bytes" + "fmt" + "io" + "regexp" + "strconv" + "time" +) + +// ErrInvalidRevision is emitted if string doesn't match valid revision +type ErrInvalidRevision struct { + s string +} + +func (e *ErrInvalidRevision) Error() string { + return "Revision invalid : " + e.s +} + +// Revisioner represents a revision component. +// A revision is made of multiple revision components +// obtained after parsing a revision string, +// for instance revision "master~" will be converted in +// two revision components Ref and TildePath +type Revisioner interface { +} + +// Ref represents a reference name : HEAD, master +type Ref string + +// TildePath represents ~, ~{n} +type TildePath struct { + Depth int +} + +// CaretPath represents ^, ^{n} +type CaretPath struct { + Depth int +} + +// CaretReg represents ^{/foo bar} +type CaretReg struct { + Regexp *regexp.Regexp + Negate bool +} + +// CaretType represents ^{commit} +type CaretType struct { + ObjectType string +} + +// AtReflog represents @{n} +type AtReflog struct { + Depth int +} + +// AtCheckout represents @{-n} +type AtCheckout struct { + Depth int +} + +// AtUpstream represents @{upstream}, @{u} +type AtUpstream struct { + BranchName string +} + +// AtPush represents @{push} +type AtPush struct { + BranchName string +} + +// AtDate represents @{"2006-01-02T15:04:05Z"} +type AtDate struct { + Date time.Time +} + +// ColonReg represents :/foo bar +type ColonReg struct { + Regexp *regexp.Regexp + Negate bool +} + +// ColonPath represents :./<path> :<path> +type ColonPath struct { + Path string +} + +// ColonStagePath represents :<n>:/<path> +type ColonStagePath struct { + Path string + Stage int +} + +// Parser represents a parser +// use to tokenize and transform to revisioner chunks +// a given string +type Parser struct { + s *scanner + currentParsedChar struct { + tok token + lit string + } + unreadLastChar bool +} + +// NewParserFromString returns a new instance of parser from a string. +func NewParserFromString(s string) *Parser { + return NewParser(bytes.NewBufferString(s)) +} + +// NewParser returns a new instance of parser. +func NewParser(r io.Reader) *Parser { + return &Parser{s: newScanner(r)} +} + +// scan returns the next token from the underlying scanner +// or the last scanned token if an unscan was requested +func (p *Parser) scan() (token, string, error) { + if p.unreadLastChar { + p.unreadLastChar = false + return p.currentParsedChar.tok, p.currentParsedChar.lit, nil + } + + tok, lit, err := p.s.scan() + + p.currentParsedChar.tok, p.currentParsedChar.lit = tok, lit + + return tok, lit, err +} + +// unscan pushes the previously read token back onto the buffer. +func (p *Parser) unscan() { p.unreadLastChar = true } + +// Parse explode a revision string into revisioner chunks +func (p *Parser) Parse() ([]Revisioner, error) { + var rev Revisioner + var revs []Revisioner + var tok token + var err error + + for { + tok, _, err = p.scan() + + if err != nil { + return nil, err + } + + switch tok { + case at: + rev, err = p.parseAt() + case tilde: + rev, err = p.parseTilde() + case caret: + rev, err = p.parseCaret() + case colon: + rev, err = p.parseColon() + case eof: + err = p.validateFullRevision(&revs) + + if err != nil { + return []Revisioner{}, err + } + + return revs, nil + default: + p.unscan() + rev, err = p.parseRef() + } + + if err != nil { + return []Revisioner{}, err + } + + revs = append(revs, rev) + } +} + +// validateFullRevision ensures all revisioner chunks make a valid revision +func (p *Parser) validateFullRevision(chunks *[]Revisioner) error { + var hasReference bool + + for i, chunk := range *chunks { + switch chunk.(type) { + case Ref: + if i == 0 { + hasReference = true + } else { + return &ErrInvalidRevision{`reference must be defined once at the beginning`} + } + case AtDate: + if len(*chunks) == 1 || hasReference && len(*chunks) == 2 { + return nil + } + + return &ErrInvalidRevision{`"@" statement is not valid, could be : <refname>@{<ISO-8601 date>}, @{<ISO-8601 date>}`} + case AtReflog: + if len(*chunks) == 1 || hasReference && len(*chunks) == 2 { + return nil + } + + return &ErrInvalidRevision{`"@" statement is not valid, could be : <refname>@{<n>}, @{<n>}`} + case AtCheckout: + if len(*chunks) == 1 { + return nil + } + + return &ErrInvalidRevision{`"@" statement is not valid, could be : @{-<n>}`} + case AtUpstream: + if len(*chunks) == 1 || hasReference && len(*chunks) == 2 { + return nil + } + + return &ErrInvalidRevision{`"@" statement is not valid, could be : <refname>@{upstream}, @{upstream}, <refname>@{u}, @{u}`} + case AtPush: + if len(*chunks) == 1 || hasReference && len(*chunks) == 2 { + return nil + } + + return &ErrInvalidRevision{`"@" statement is not valid, could be : <refname>@{push}, @{push}`} + case TildePath, CaretPath, CaretReg: + if !hasReference { + return &ErrInvalidRevision{`"~" or "^" statement must have a reference defined at the beginning`} + } + case ColonReg: + if len(*chunks) == 1 { + return nil + } + + return &ErrInvalidRevision{`":" statement is not valid, could be : :/<regexp>`} + case ColonPath: + if i == len(*chunks)-1 && hasReference || len(*chunks) == 1 { + return nil + } + + return &ErrInvalidRevision{`":" statement is not valid, could be : <revision>:<path>`} + case ColonStagePath: + if len(*chunks) == 1 { + return nil + } + + return &ErrInvalidRevision{`":" statement is not valid, could be : :<n>:<path>`} + } + } + + return nil +} + +// parseAt extract @ statements +func (p *Parser) parseAt() (Revisioner, error) { + var tok, nextTok token + var lit, nextLit string + var err error + + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + if tok != obrace { + p.unscan() + + return Ref("HEAD"), nil + } + + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + nextTok, nextLit, err = p.scan() + + if err != nil { + return nil, err + } + + switch { + case tok == word && (lit == "u" || lit == "upstream") && nextTok == cbrace: + return AtUpstream{}, nil + case tok == word && lit == "push" && nextTok == cbrace: + return AtPush{}, nil + case tok == number && nextTok == cbrace: + n, _ := strconv.Atoi(lit) + + return AtReflog{n}, nil + case tok == minus && nextTok == number: + n, _ := strconv.Atoi(nextLit) + + t, _, err := p.scan() + + if err != nil { + return nil, err + } + + if t != cbrace { + return nil, &ErrInvalidRevision{fmt.Sprintf(`missing "}" in @{-n} structure`)} + } + + return AtCheckout{n}, nil + default: + p.unscan() + + date := lit + + for { + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + switch { + case tok == cbrace: + t, err := time.Parse("2006-01-02T15:04:05Z", date) + + if err != nil { + return nil, &ErrInvalidRevision{fmt.Sprintf(`wrong date "%s" must fit ISO-8601 format : 2006-01-02T15:04:05Z`, date)} + } + + return AtDate{t}, nil + default: + date += lit + } + } + } +} + +// parseTilde extract ~ statements +func (p *Parser) parseTilde() (Revisioner, error) { + var tok token + var lit string + var err error + + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + switch { + case tok == number: + n, _ := strconv.Atoi(lit) + + return TildePath{n}, nil + default: + p.unscan() + return TildePath{1}, nil + } +} + +// parseCaret extract ^ statements +func (p *Parser) parseCaret() (Revisioner, error) { + var tok token + var lit string + var err error + + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + switch { + case tok == obrace: + r, err := p.parseCaretBraces() + + if err != nil { + return nil, err + } + + return r, nil + case tok == number: + n, _ := strconv.Atoi(lit) + + if n > 2 { + return nil, &ErrInvalidRevision{fmt.Sprintf(`"%s" found must be 0, 1 or 2 after "^"`, lit)} + } + + return CaretPath{n}, nil + default: + p.unscan() + return CaretPath{1}, nil + } +} + +// parseCaretBraces extract ^{<data>} statements +func (p *Parser) parseCaretBraces() (Revisioner, error) { + var tok, nextTok token + var lit, _ string + start := true + var re string + var negate bool + var err error + + for { + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + nextTok, _, err = p.scan() + + if err != nil { + return nil, err + } + + switch { + case tok == word && nextTok == cbrace && (lit == "commit" || lit == "tree" || lit == "blob" || lit == "tag" || lit == "object"): + return CaretType{lit}, nil + case re == "" && tok == cbrace: + return CaretType{"tag"}, nil + case re == "" && tok == emark && nextTok == emark: + re += lit + case re == "" && tok == emark && nextTok == minus: + negate = true + case re == "" && tok == emark: + return nil, &ErrInvalidRevision{fmt.Sprintf(`revision suffix brace component sequences starting with "/!" others than those defined are reserved`)} + case re == "" && tok == slash: + p.unscan() + case tok != slash && start: + return nil, &ErrInvalidRevision{fmt.Sprintf(`"%s" is not a valid revision suffix brace component`, lit)} + case tok != cbrace: + p.unscan() + re += lit + case tok == cbrace: + p.unscan() + + reg, err := regexp.Compile(re) + + if err != nil { + return CaretReg{}, &ErrInvalidRevision{fmt.Sprintf(`revision suffix brace component, %s`, err.Error())} + } + + return CaretReg{reg, negate}, nil + } + + start = false + } +} + +// parseColon extract : statements +func (p *Parser) parseColon() (Revisioner, error) { + var tok token + var err error + + tok, _, err = p.scan() + + if err != nil { + return nil, err + } + + switch tok { + case slash: + return p.parseColonSlash() + default: + p.unscan() + return p.parseColonDefault() + } +} + +// parseColonSlash extract :/<data> statements +func (p *Parser) parseColonSlash() (Revisioner, error) { + var tok, nextTok token + var lit string + var re string + var negate bool + var err error + + for { + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + nextTok, _, err = p.scan() + + if err != nil { + return nil, err + } + + switch { + case tok == emark && nextTok == emark: + re += lit + case re == "" && tok == emark && nextTok == minus: + negate = true + case re == "" && tok == emark: + return nil, &ErrInvalidRevision{fmt.Sprintf(`revision suffix brace component sequences starting with "/!" others than those defined are reserved`)} + case tok == eof: + p.unscan() + reg, err := regexp.Compile(re) + + if err != nil { + return ColonReg{}, &ErrInvalidRevision{fmt.Sprintf(`revision suffix brace component, %s`, err.Error())} + } + + return ColonReg{reg, negate}, nil + default: + p.unscan() + re += lit + } + } +} + +// parseColonDefault extract :<data> statements +func (p *Parser) parseColonDefault() (Revisioner, error) { + var tok token + var lit string + var path string + var stage int + var err error + var n = -1 + + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + nextTok, _, err := p.scan() + + if err != nil { + return nil, err + } + + if tok == number && nextTok == colon { + n, _ = strconv.Atoi(lit) + } + + switch n { + case 0, 1, 2, 3: + stage = n + default: + path += lit + p.unscan() + } + + for { + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + switch { + case tok == eof && n == -1: + return ColonPath{path}, nil + case tok == eof: + return ColonStagePath{path, stage}, nil + default: + path += lit + } + } +} + +// parseRef extract reference name +func (p *Parser) parseRef() (Revisioner, error) { + var tok, prevTok token + var lit, buf string + var endOfRef bool + var err error + + for { + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + switch tok { + case eof, at, colon, tilde, caret: + endOfRef = true + } + + err := p.checkRefFormat(tok, lit, prevTok, buf, endOfRef) + + if err != nil { + return "", err + } + + if endOfRef { + p.unscan() + return Ref(buf), nil + } + + buf += lit + prevTok = tok + } +} + +// checkRefFormat ensure reference name follow rules defined here : +// https://git-scm.com/docs/git-check-ref-format +func (p *Parser) checkRefFormat(token token, literal string, previousToken token, buffer string, endOfRef bool) error { + switch token { + case aslash, space, control, qmark, asterisk, obracket: + return &ErrInvalidRevision{fmt.Sprintf(`must not contains "%s"`, literal)} + } + + switch { + case (token == dot || token == slash) && buffer == "": + return &ErrInvalidRevision{fmt.Sprintf(`must not start with "%s"`, literal)} + case previousToken == slash && endOfRef: + return &ErrInvalidRevision{`must not end with "/"`} + case previousToken == dot && endOfRef: + return &ErrInvalidRevision{`must not end with "."`} + case token == dot && previousToken == slash: + return &ErrInvalidRevision{`must not contains "/."`} + case previousToken == dot && token == dot: + return &ErrInvalidRevision{`must not contains ".."`} + case previousToken == slash && token == slash: + return &ErrInvalidRevision{`must not contains consecutively "/"`} + case (token == slash || endOfRef) && len(buffer) > 4 && buffer[len(buffer)-5:] == ".lock": + return &ErrInvalidRevision{"cannot end with .lock"} + } + + return nil +} diff --git a/internal/revision/parser_test.go b/internal/revision/parser_test.go new file mode 100644 index 0000000..a58f0ad --- /dev/null +++ b/internal/revision/parser_test.go @@ -0,0 +1,395 @@ +package revision + +import ( + "bytes" + "regexp" + "time" + + . "gopkg.in/check.v1" +) + +type ParserSuite struct{} + +var _ = Suite(&ParserSuite{}) + +func (s *ParserSuite) TestErrInvalidRevision(c *C) { + e := ErrInvalidRevision{"test"} + + c.Assert(e.Error(), Equals, "Revision invalid : test") +} + +func (s *ParserSuite) TestNewParserFromString(c *C) { + p := NewParserFromString("test") + + c.Assert(p, FitsTypeOf, &Parser{}) +} + +func (s *ParserSuite) TestScan(c *C) { + parser := NewParser(bytes.NewBufferString("Hello world !")) + + expected := []struct { + t token + s string + }{ + { + word, + "Hello", + }, + { + space, + " ", + }, + { + word, + "world", + }, + { + space, + " ", + }, + { + emark, + "!", + }, + } + + for i := 0; ; { + tok, str, err := parser.scan() + + if tok == eof { + return + } + + c.Assert(err, Equals, nil) + c.Assert(str, Equals, expected[i].s) + c.Assert(tok, Equals, expected[i].t) + + i++ + } +} + +func (s *ParserSuite) TestUnscan(c *C) { + parser := NewParser(bytes.NewBufferString("Hello world !")) + + tok, str, err := parser.scan() + + c.Assert(err, Equals, nil) + c.Assert(str, Equals, "Hello") + c.Assert(tok, Equals, word) + + parser.unscan() + + tok, str, err = parser.scan() + + c.Assert(err, Equals, nil) + c.Assert(str, Equals, "Hello") + c.Assert(tok, Equals, word) +} + +func (s *ParserSuite) TestParseWithValidExpression(c *C) { + tim, _ := time.Parse("2006-01-02T15:04:05Z", "2016-12-16T21:42:47Z") + + datas := map[string]Revisioner{ + "@": []Revisioner{Ref("HEAD")}, + "@~3": []Revisioner{ + Ref("HEAD"), + TildePath{3}, + }, + "@{2016-12-16T21:42:47Z}": []Revisioner{AtDate{tim}}, + "@{1}": []Revisioner{AtReflog{1}}, + "@{-1}": []Revisioner{AtCheckout{1}}, + "master@{upstream}": []Revisioner{ + Ref("master"), + AtUpstream{}, + }, + "@{upstream}": []Revisioner{ + AtUpstream{}, + }, + "@{u}": []Revisioner{ + AtUpstream{}, + }, + "master@{push}": []Revisioner{ + Ref("master"), + AtPush{}, + }, + "master@{2016-12-16T21:42:47Z}": []Revisioner{ + Ref("master"), + AtDate{tim}, + }, + "HEAD^": []Revisioner{ + Ref("HEAD"), + CaretPath{1}, + }, + "master~3": []Revisioner{ + Ref("master"), + TildePath{3}, + }, + "v0.99.8^{commit}": []Revisioner{ + Ref("v0.99.8"), + CaretType{"commit"}, + }, + "v0.99.8^{}": []Revisioner{ + Ref("v0.99.8"), + CaretType{"tag"}, + }, + "HEAD^{/fix nasty bug}": []Revisioner{ + Ref("HEAD"), + CaretReg{regexp.MustCompile("fix nasty bug"), false}, + }, + ":/fix nasty bug": []Revisioner{ + ColonReg{regexp.MustCompile("fix nasty bug"), false}, + }, + "HEAD:README": []Revisioner{ + Ref("HEAD"), + ColonPath{"README"}, + }, + ":README": []Revisioner{ + ColonPath{"README"}, + }, + "master:./README": []Revisioner{ + Ref("master"), + ColonPath{"./README"}, + }, + "master^1~:./README": []Revisioner{ + Ref("master"), + CaretPath{1}, + TildePath{1}, + ColonPath{"./README"}, + }, + ":0:README": []Revisioner{ + ColonStagePath{"README", 0}, + }, + ":3:README": []Revisioner{ + ColonStagePath{"README", 3}, + }, + "master~1^{/update}~5~^^1": []Revisioner{ + Ref("master"), + TildePath{1}, + CaretReg{regexp.MustCompile("update"), false}, + TildePath{5}, + TildePath{1}, + CaretPath{1}, + CaretPath{1}, + }, + } + + for d, expected := range datas { + parser := NewParser(bytes.NewBufferString(d)) + + result, err := parser.Parse() + + c.Assert(err, Equals, nil) + c.Assert(result, DeepEquals, expected) + } +} + +func (s *ParserSuite) TestParseWithUnValidExpression(c *C) { + datas := map[string]error{ + "..": &ErrInvalidRevision{`must not start with "."`}, + "master^1master": &ErrInvalidRevision{`reference must be defined once at the beginning`}, + "master^1@{2016-12-16T21:42:47Z}": &ErrInvalidRevision{`"@" statement is not valid, could be : <refname>@{<ISO-8601 date>}, @{<ISO-8601 date>}`}, + "master^1@{1}": &ErrInvalidRevision{`"@" statement is not valid, could be : <refname>@{<n>}, @{<n>}`}, + "master@{-1}": &ErrInvalidRevision{`"@" statement is not valid, could be : @{-<n>}`}, + "master^1@{upstream}": &ErrInvalidRevision{`"@" statement is not valid, could be : <refname>@{upstream}, @{upstream}, <refname>@{u}, @{u}`}, + "master^1@{u}": &ErrInvalidRevision{`"@" statement is not valid, could be : <refname>@{upstream}, @{upstream}, <refname>@{u}, @{u}`}, + "master^1@{push}": &ErrInvalidRevision{`"@" statement is not valid, could be : <refname>@{push}, @{push}`}, + "^1": &ErrInvalidRevision{`"~" or "^" statement must have a reference defined at the beginning`}, + "^{/test}": &ErrInvalidRevision{`"~" or "^" statement must have a reference defined at the beginning`}, + "~1": &ErrInvalidRevision{`"~" or "^" statement must have a reference defined at the beginning`}, + "master:/test": &ErrInvalidRevision{`":" statement is not valid, could be : :/<regexp>`}, + "master:0:README": &ErrInvalidRevision{`":" statement is not valid, could be : :<n>:<path>`}, + } + + for s, e := range datas { + parser := NewParser(bytes.NewBufferString(s)) + _, err := parser.Parse() + c.Assert(err, DeepEquals, e) + } +} + +func (s *ParserSuite) TestParseAtWithValidExpression(c *C) { + tim, _ := time.Parse("2006-01-02T15:04:05Z", "2016-12-16T21:42:47Z") + + datas := map[string]Revisioner{ + "": Ref("HEAD"), + "{1}": AtReflog{1}, + "{-1}": AtCheckout{1}, + "{push}": AtPush{}, + "{upstream}": AtUpstream{}, + "{u}": AtUpstream{}, + "{2016-12-16T21:42:47Z}": AtDate{tim}, + } + + for d, expected := range datas { + parser := NewParser(bytes.NewBufferString(d)) + + result, err := parser.parseAt() + + c.Assert(err, Equals, nil) + c.Assert(result, DeepEquals, expected) + } +} + +func (s *ParserSuite) TestParseAtWithUnValidExpression(c *C) { + datas := map[string]error{ + "{test}": &ErrInvalidRevision{`wrong date "test" must fit ISO-8601 format : 2006-01-02T15:04:05Z`}, + "{-1": &ErrInvalidRevision{`missing "}" in @{-n} structure`}, + } + + for s, e := range datas { + parser := NewParser(bytes.NewBufferString(s)) + + _, err := parser.parseAt() + + c.Assert(err, DeepEquals, e) + } +} + +func (s *ParserSuite) TestParseCaretWithValidExpression(c *C) { + datas := map[string]Revisioner{ + "": CaretPath{1}, + "2": CaretPath{2}, + "{}": CaretType{"tag"}, + "{commit}": CaretType{"commit"}, + "{tree}": CaretType{"tree"}, + "{blob}": CaretType{"blob"}, + "{tag}": CaretType{"tag"}, + "{object}": CaretType{"object"}, + "{/hello world !}": CaretReg{regexp.MustCompile("hello world !"), false}, + "{/!-hello world !}": CaretReg{regexp.MustCompile("hello world !"), true}, + "{/!! hello world !}": CaretReg{regexp.MustCompile("! hello world !"), false}, + } + + for d, expected := range datas { + parser := NewParser(bytes.NewBufferString(d)) + + result, err := parser.parseCaret() + + c.Assert(err, Equals, nil) + c.Assert(result, DeepEquals, expected) + } +} + +func (s *ParserSuite) TestParseCaretWithUnValidExpression(c *C) { + datas := map[string]error{ + "3": &ErrInvalidRevision{`"3" found must be 0, 1 or 2 after "^"`}, + "{test}": &ErrInvalidRevision{`"test" is not a valid revision suffix brace component`}, + "{/!test}": &ErrInvalidRevision{`revision suffix brace component sequences starting with "/!" others than those defined are reserved`}, + "{/test**}": &ErrInvalidRevision{"revision suffix brace component, error parsing regexp: invalid nested repetition operator: `**`"}, + } + + for s, e := range datas { + parser := NewParser(bytes.NewBufferString(s)) + + _, err := parser.parseCaret() + + c.Assert(err, DeepEquals, e) + } +} + +func (s *ParserSuite) TestParseTildeWithValidExpression(c *C) { + datas := map[string]Revisioner{ + "3": TildePath{3}, + "1": TildePath{1}, + "": TildePath{1}, + } + + for d, expected := range datas { + parser := NewParser(bytes.NewBufferString(d)) + + result, err := parser.parseTilde() + + c.Assert(err, Equals, nil) + c.Assert(result, DeepEquals, expected) + } +} + +func (s *ParserSuite) TestParseColonWithValidExpression(c *C) { + datas := map[string]Revisioner{ + "/hello world !": ColonReg{regexp.MustCompile("hello world !"), false}, + "/!-hello world !": ColonReg{regexp.MustCompile("hello world !"), true}, + "/!! hello world !": ColonReg{regexp.MustCompile("! hello world !"), false}, + "../parser.go": ColonPath{"../parser.go"}, + "./parser.go": ColonPath{"./parser.go"}, + "parser.go": ColonPath{"parser.go"}, + "0:parser.go": ColonStagePath{"parser.go", 0}, + "1:parser.go": ColonStagePath{"parser.go", 1}, + "2:parser.go": ColonStagePath{"parser.go", 2}, + "3:parser.go": ColonStagePath{"parser.go", 3}, + } + + for d, expected := range datas { + parser := NewParser(bytes.NewBufferString(d)) + + result, err := parser.parseColon() + + c.Assert(err, Equals, nil) + c.Assert(result, DeepEquals, expected) + } +} + +func (s *ParserSuite) TestParseColonWithUnValidExpression(c *C) { + datas := map[string]error{ + "/!test": &ErrInvalidRevision{`revision suffix brace component sequences starting with "/!" others than those defined are reserved`}, + "/*": &ErrInvalidRevision{"revision suffix brace component, error parsing regexp: missing argument to repetition operator: `*`"}, + } + + for s, e := range datas { + parser := NewParser(bytes.NewBufferString(s)) + + _, err := parser.parseColon() + + c.Assert(err, DeepEquals, e) + } +} + +func (s *ParserSuite) TestParseRefWithValidName(c *C) { + datas := []string{ + "lock", + "master", + "v1.0.0", + "refs/stash", + "refs/tags/v1.0.0", + "refs/heads/master", + "refs/remotes/test", + "refs/remotes/origin/HEAD", + "refs/remotes/origin/master", + } + + for _, d := range datas { + parser := NewParser(bytes.NewBufferString(d)) + + result, err := parser.parseRef() + + c.Assert(err, Equals, nil) + c.Assert(result, Equals, Ref(d)) + } +} + +func (s *ParserSuite) TestParseRefWithUnvalidName(c *C) { + datas := map[string]error{ + ".master": &ErrInvalidRevision{`must not start with "."`}, + "/master": &ErrInvalidRevision{`must not start with "/"`}, + "master/": &ErrInvalidRevision{`must not end with "/"`}, + "master.": &ErrInvalidRevision{`must not end with "."`}, + "refs/remotes/.origin/HEAD": &ErrInvalidRevision{`must not contains "/."`}, + "test..test": &ErrInvalidRevision{`must not contains ".."`}, + "test..": &ErrInvalidRevision{`must not contains ".."`}, + "test test": &ErrInvalidRevision{`must not contains " "`}, + "test*test": &ErrInvalidRevision{`must not contains "*"`}, + "test?test": &ErrInvalidRevision{`must not contains "?"`}, + "test\\test": &ErrInvalidRevision{`must not contains "\"`}, + "test[test": &ErrInvalidRevision{`must not contains "["`}, + "te//st": &ErrInvalidRevision{`must not contains consecutively "/"`}, + "refs/remotes/test.lock/HEAD": &ErrInvalidRevision{`cannot end with .lock`}, + "test.lock": &ErrInvalidRevision{`cannot end with .lock`}, + } + + for s, e := range datas { + parser := NewParser(bytes.NewBufferString(s)) + + _, err := parser.parseRef() + + c.Assert(err, DeepEquals, e) + } +} diff --git a/internal/revision/scanner.go b/internal/revision/scanner.go new file mode 100644 index 0000000..fb5f333 --- /dev/null +++ b/internal/revision/scanner.go @@ -0,0 +1,117 @@ +package revision + +import ( + "bufio" + "io" + "unicode" +) + +// runeCategoryValidator takes a rune as input and +// validates it belongs to a rune category +type runeCategoryValidator func(r rune) bool + +// tokenizeExpression aggegates a series of runes matching check predicate into a single +// string and provides given tokenType as token type +func tokenizeExpression(ch rune, tokenType token, check runeCategoryValidator, r *bufio.Reader) (token, string, error) { + var data []rune + data = append(data, ch) + + for { + c, _, err := r.ReadRune() + + if c == zeroRune { + break + } + + if err != nil { + return tokenError, "", err + } + + if check(c) { + data = append(data, c) + } else { + err := r.UnreadRune() + + if err != nil { + return tokenError, "", err + } + + return tokenType, string(data), nil + } + } + + return tokenType, string(data), nil +} + +var zeroRune = rune(0) + +// scanner represents a lexical scanner. +type scanner struct { + r *bufio.Reader +} + +// newScanner returns a new instance of scanner. +func newScanner(r io.Reader) *scanner { + return &scanner{r: bufio.NewReader(r)} +} + +// Scan extracts tokens and their strings counterpart +// from the reader +func (s *scanner) scan() (token, string, error) { + ch, _, err := s.r.ReadRune() + + if err != nil && err != io.EOF { + return tokenError, "", err + } + + switch ch { + case zeroRune: + return eof, "", nil + case ':': + return colon, string(ch), nil + case '~': + return tilde, string(ch), nil + case '^': + return caret, string(ch), nil + case '.': + return dot, string(ch), nil + case '/': + return slash, string(ch), nil + case '{': + return obrace, string(ch), nil + case '}': + return cbrace, string(ch), nil + case '-': + return minus, string(ch), nil + case '@': + return at, string(ch), nil + case '\\': + return aslash, string(ch), nil + case '?': + return qmark, string(ch), nil + case '*': + return asterisk, string(ch), nil + case '[': + return obracket, string(ch), nil + case '!': + return emark, string(ch), nil + } + + if unicode.IsSpace(ch) { + return space, string(ch), nil + } + + if unicode.IsControl(ch) { + return control, string(ch), nil + } + + if unicode.IsLetter(ch) { + return tokenizeExpression(ch, word, unicode.IsLetter, s.r) + } + + if unicode.IsNumber(ch) { + return tokenizeExpression(ch, number, unicode.IsNumber, s.r) + } + + return tokenError, string(ch), nil +} diff --git a/internal/revision/scanner_test.go b/internal/revision/scanner_test.go new file mode 100644 index 0000000..d27ccb1 --- /dev/null +++ b/internal/revision/scanner_test.go @@ -0,0 +1,194 @@ +package revision + +import ( + "bytes" + "testing" + + . "gopkg.in/check.v1" +) + +func Test(t *testing.T) { TestingT(t) } + +type ScannerSuite struct{} + +var _ = Suite(&ScannerSuite{}) + +func (s *ScannerSuite) TestReadColon(c *C) { + scanner := newScanner(bytes.NewBufferString(":")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, ":") + c.Assert(tok, Equals, colon) +} + +func (s *ScannerSuite) TestReadTilde(c *C) { + scanner := newScanner(bytes.NewBufferString("~")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "~") + c.Assert(tok, Equals, tilde) +} + +func (s *ScannerSuite) TestReadCaret(c *C) { + scanner := newScanner(bytes.NewBufferString("^")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "^") + c.Assert(tok, Equals, caret) +} + +func (s *ScannerSuite) TestReadDot(c *C) { + scanner := newScanner(bytes.NewBufferString(".")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, ".") + c.Assert(tok, Equals, dot) +} + +func (s *ScannerSuite) TestReadSlash(c *C) { + scanner := newScanner(bytes.NewBufferString("/")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "/") + c.Assert(tok, Equals, slash) +} + +func (s *ScannerSuite) TestReadEOF(c *C) { + scanner := newScanner(bytes.NewBufferString(string(rune(0)))) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "") + c.Assert(tok, Equals, eof) +} + +func (s *ScannerSuite) TestReadNumber(c *C) { + scanner := newScanner(bytes.NewBufferString("1234")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "1234") + c.Assert(tok, Equals, number) +} + +func (s *ScannerSuite) TestReadSpace(c *C) { + scanner := newScanner(bytes.NewBufferString(" ")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, " ") + c.Assert(tok, Equals, space) +} + +func (s *ScannerSuite) TestReadControl(c *C) { + scanner := newScanner(bytes.NewBufferString("")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "\x01") + c.Assert(tok, Equals, control) +} + +func (s *ScannerSuite) TestReadOpenBrace(c *C) { + scanner := newScanner(bytes.NewBufferString("{")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "{") + c.Assert(tok, Equals, obrace) +} + +func (s *ScannerSuite) TestReadCloseBrace(c *C) { + scanner := newScanner(bytes.NewBufferString("}")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "}") + c.Assert(tok, Equals, cbrace) +} + +func (s *ScannerSuite) TestReadMinus(c *C) { + scanner := newScanner(bytes.NewBufferString("-")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "-") + c.Assert(tok, Equals, minus) +} + +func (s *ScannerSuite) TestReadAt(c *C) { + scanner := newScanner(bytes.NewBufferString("@")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "@") + c.Assert(tok, Equals, at) +} + +func (s *ScannerSuite) TestReadAntislash(c *C) { + scanner := newScanner(bytes.NewBufferString("\\")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "\\") + c.Assert(tok, Equals, aslash) +} + +func (s *ScannerSuite) TestReadQuestionMark(c *C) { + scanner := newScanner(bytes.NewBufferString("?")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "?") + c.Assert(tok, Equals, qmark) +} + +func (s *ScannerSuite) TestReadAsterisk(c *C) { + scanner := newScanner(bytes.NewBufferString("*")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "*") + c.Assert(tok, Equals, asterisk) +} + +func (s *ScannerSuite) TestReadOpenBracket(c *C) { + scanner := newScanner(bytes.NewBufferString("[")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "[") + c.Assert(tok, Equals, obracket) +} + +func (s *ScannerSuite) TestReadExclamationMark(c *C) { + scanner := newScanner(bytes.NewBufferString("!")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "!") + c.Assert(tok, Equals, emark) +} + +func (s *ScannerSuite) TestReadWord(c *C) { + scanner := newScanner(bytes.NewBufferString("abcde")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "abcde") + c.Assert(tok, Equals, word) +} + +func (s *ScannerSuite) TestReadTokenError(c *C) { + scanner := newScanner(bytes.NewBufferString("`")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "`") + c.Assert(tok, Equals, tokenError) +} diff --git a/internal/revision/token.go b/internal/revision/token.go new file mode 100644 index 0000000..abc4048 --- /dev/null +++ b/internal/revision/token.go @@ -0,0 +1,28 @@ +package revision + +// token represents a entity extracted from string parsing +type token int + +const ( + eof token = iota + + aslash + asterisk + at + caret + cbrace + colon + control + dot + emark + minus + number + obrace + obracket + qmark + slash + space + tilde + tokenError + word +) diff --git a/plumbing/format/config/common.go b/plumbing/format/config/common.go index cc1e81a..8f98ad1 100644 --- a/plumbing/format/config/common.go +++ b/plumbing/format/config/common.go @@ -5,26 +5,32 @@ func New() *Config { return &Config{} } +// Config contains all the sections, comments and includes from a config file. type Config struct { Comment *Comment Sections Sections Includes Includes } +// Includes is a list of Includes in a config file. type Includes []*Include -// A reference to an included configuration. +// Include is a reference to an included config file. type Include struct { Path string Config *Config } +// Comment string without the prefix '#' or ';'. type Comment string const ( + // NoSubsection token is passed to Config.Section and Config.SetSection to + // represent the absence of a section. NoSubsection = "" ) +// Section returns a existing section with the given name or creates a new one. func (c *Config) Section(name string) *Section { for i := len(c.Sections) - 1; i >= 0; i-- { s := c.Sections[i] @@ -38,36 +44,31 @@ func (c *Config) Section(name string) *Section { return s } -// AddOption is a convenience method to add an option to a given -// section and subsection. -// -// Use the NoSubsection constant for the subsection argument -// if no subsection is wanted. -func (s *Config) AddOption(section string, subsection string, key string, value string) *Config { +// AddOption adds an option to a given section and subsection. Use the +// NoSubsection constant for the subsection argument if no subsection is wanted. +func (c *Config) AddOption(section string, subsection string, key string, value string) *Config { if subsection == "" { - s.Section(section).AddOption(key, value) + c.Section(section).AddOption(key, value) } else { - s.Section(section).Subsection(subsection).AddOption(key, value) + c.Section(section).Subsection(subsection).AddOption(key, value) } - return s + return c } -// SetOption is a convenience method to set an option to a given -// section and subsection. -// -// Use the NoSubsection constant for the subsection argument -// if no subsection is wanted. -func (s *Config) SetOption(section string, subsection string, key string, value string) *Config { +// SetOption sets an option to a given section and subsection. Use the +// NoSubsection constant for the subsection argument if no subsection is wanted. +func (c *Config) SetOption(section string, subsection string, key string, value string) *Config { if subsection == "" { - s.Section(section).SetOption(key, value) + c.Section(section).SetOption(key, value) } else { - s.Section(section).Subsection(subsection).SetOption(key, value) + c.Section(section).Subsection(subsection).SetOption(key, value) } - return s + return c } +// RemoveSection removes a section from a config file. func (c *Config) RemoveSection(name string) *Config { result := Sections{} for _, s := range c.Sections { @@ -80,6 +81,7 @@ func (c *Config) RemoveSection(name string) *Config { return c } +// RemoveSubsection remove s a subsection from a config file. func (c *Config) RemoveSubsection(section string, subsection string) *Config { for _, s := range c.Sections { if s.IsName(section) { diff --git a/plumbing/format/config/doc.go b/plumbing/format/config/doc.go index dd77fbc..3986c83 100644 --- a/plumbing/format/config/doc.go +++ b/plumbing/format/config/doc.go @@ -1,199 +1,122 @@ -// Package config implements decoding/encoding of git config files. +// Package config implements encoding and decoding of git config files. +// +// Configuration File +// ------------------ +// +// The Git configuration file contains a number of variables that affect +// the Git commands' behavior. The `.git/config` file in each repository +// is used to store the configuration for that repository, and +// `$HOME/.gitconfig` is used to store a per-user configuration as +// fallback values for the `.git/config` file. The file `/etc/gitconfig` +// can be used to store a system-wide default configuration. +// +// The configuration variables are used by both the Git plumbing +// and the porcelains. The variables are divided into sections, wherein +// the fully qualified variable name of the variable itself is the last +// dot-separated segment and the section name is everything before the last +// dot. The variable names are case-insensitive, allow only alphanumeric +// characters and `-`, and must start with an alphabetic character. Some +// variables may appear multiple times; we say then that the variable is +// multivalued. +// +// Syntax +// ~~~~~~ +// +// The syntax is fairly flexible and permissive; whitespaces are mostly +// ignored. The '#' and ';' characters begin comments to the end of line, +// blank lines are ignored. +// +// The file consists of sections and variables. A section begins with +// the name of the section in square brackets and continues until the next +// section begins. Section names are case-insensitive. Only alphanumeric +// characters, `-` and `.` are allowed in section names. Each variable +// must belong to some section, which means that there must be a section +// header before the first setting of a variable. +// +// Sections can be further divided into subsections. To begin a subsection +// put its name in double quotes, separated by space from the section name, +// in the section header, like in the example below: +// +// -------- +// [section "subsection"] +// +// -------- +// +// Subsection names are case sensitive and can contain any characters except +// newline (doublequote `"` and backslash can be included by escaping them +// as `\"` and `\\`, respectively). Section headers cannot span multiple +// lines. Variables may belong directly to a section or to a given subsection. +// You can have `[section]` if you have `[section "subsection"]`, but you +// don't need to. +// +// There is also a deprecated `[section.subsection]` syntax. With this +// syntax, the subsection name is converted to lower-case and is also +// compared case sensitively. These subsection names follow the same +// restrictions as section names. +// +// All the other lines (and the remainder of the line after the section +// header) are recognized as setting variables, in the form +// 'name = value' (or just 'name', which is a short-hand to say that +// the variable is the boolean "true"). +// The variable names are case-insensitive, allow only alphanumeric characters +// and `-`, and must start with an alphabetic character. +// +// A line that defines a value can be continued to the next line by +// ending it with a `\`; the backquote and the end-of-line are +// stripped. Leading whitespaces after 'name =', the remainder of the +// line after the first comment character '#' or ';', and trailing +// whitespaces of the line are discarded unless they are enclosed in +// double quotes. Internal whitespaces within the value are retained +// verbatim. +// +// Inside double quotes, double quote `"` and backslash `\` characters +// must be escaped: use `\"` for `"` and `\\` for `\`. +// +// The following escape sequences (beside `\"` and `\\`) are recognized: +// `\n` for newline character (NL), `\t` for horizontal tabulation (HT, TAB) +// and `\b` for backspace (BS). Other char escape sequences (including octal +// escape sequences) are invalid. +// +// Includes +// ~~~~~~~~ +// +// You can include one config file from another by setting the special +// `include.path` variable to the name of the file to be included. The +// variable takes a pathname as its value, and is subject to tilde +// expansion. +// +// The included file is expanded immediately, as if its contents had been +// found at the location of the include directive. If the value of the +// `include.path` variable is a relative path, the path is considered to be +// relative to the configuration file in which the include directive was +// found. See below for examples. +// +// +// Example +// ~~~~~~~ +// +// # Core variables +// [core] +// ; Don't trust file modes +// filemode = false +// +// # Our diff algorithm +// [diff] +// external = /usr/local/bin/diff-wrapper +// renames = true +// +// [branch "devel"] +// remote = origin +// merge = refs/heads/devel +// +// # Proxy settings +// [core] +// gitProxy="ssh" for "kernel.org" +// gitProxy=default-proxy ; for the rest +// +// [include] +// path = /path/to/foo.inc ; include by absolute path +// path = foo ; expand "foo" relative to the current file +// path = ~/foo ; expand "foo" in your `$HOME` directory +// package config - -/* - -CONFIGURATION FILE ------------------- - -The Git configuration file contains a number of variables that affect -the Git commands' behavior. The `.git/config` file in each repository -is used to store the configuration for that repository, and -`$HOME/.gitconfig` is used to store a per-user configuration as -fallback values for the `.git/config` file. The file `/etc/gitconfig` -can be used to store a system-wide default configuration. - -The configuration variables are used by both the Git plumbing -and the porcelains. The variables are divided into sections, wherein -the fully qualified variable name of the variable itself is the last -dot-separated segment and the section name is everything before the last -dot. The variable names are case-insensitive, allow only alphanumeric -characters and `-`, and must start with an alphabetic character. Some -variables may appear multiple times; we say then that the variable is -multivalued. - -Syntax -~~~~~~ - -The syntax is fairly flexible and permissive; whitespaces are mostly -ignored. The '#' and ';' characters begin comments to the end of line, -blank lines are ignored. - -The file consists of sections and variables. A section begins with -the name of the section in square brackets and continues until the next -section begins. Section names are case-insensitive. Only alphanumeric -characters, `-` and `.` are allowed in section names. Each variable -must belong to some section, which means that there must be a section -header before the first setting of a variable. - -Sections can be further divided into subsections. To begin a subsection -put its name in double quotes, separated by space from the section name, -in the section header, like in the example below: - --------- - [section "subsection"] - --------- - -Subsection names are case sensitive and can contain any characters except -newline (doublequote `"` and backslash can be included by escaping them -as `\"` and `\\`, respectively). Section headers cannot span multiple -lines. Variables may belong directly to a section or to a given subsection. -You can have `[section]` if you have `[section "subsection"]`, but you -don't need to. - -There is also a deprecated `[section.subsection]` syntax. With this -syntax, the subsection name is converted to lower-case and is also -compared case sensitively. These subsection names follow the same -restrictions as section names. - -All the other lines (and the remainder of the line after the section -header) are recognized as setting variables, in the form -'name = value' (or just 'name', which is a short-hand to say that -the variable is the boolean "true"). -The variable names are case-insensitive, allow only alphanumeric characters -and `-`, and must start with an alphabetic character. - -A line that defines a value can be continued to the next line by -ending it with a `\`; the backquote and the end-of-line are -stripped. Leading whitespaces after 'name =', the remainder of the -line after the first comment character '#' or ';', and trailing -whitespaces of the line are discarded unless they are enclosed in -double quotes. Internal whitespaces within the value are retained -verbatim. - -Inside double quotes, double quote `"` and backslash `\` characters -must be escaped: use `\"` for `"` and `\\` for `\`. - -The following escape sequences (beside `\"` and `\\`) are recognized: -`\n` for newline character (NL), `\t` for horizontal tabulation (HT, TAB) -and `\b` for backspace (BS). Other char escape sequences (including octal -escape sequences) are invalid. - - -Includes -~~~~~~~~ - -You can include one config file from another by setting the special -`include.path` variable to the name of the file to be included. The -variable takes a pathname as its value, and is subject to tilde -expansion. - -The -included file is expanded immediately, as if its contents had been -found at the location of the include directive. If the value of the -`include.path` variable is a relative path, the path is considered to be -relative to the configuration file in which the include directive was -found. See below for examples. - - -Example -~~~~~~~ - - # Core variables - [core] - ; Don't trust file modes - filemode = false - - # Our diff algorithm - [diff] - external = /usr/local/bin/diff-wrapper - renames = true - - [branch "devel"] - remote = origin - merge = refs/heads/devel - - # Proxy settings - [core] - gitProxy="ssh" for "kernel.org" - gitProxy=default-proxy ; for the rest - - [include] - path = /path/to/foo.inc ; include by absolute path - path = foo ; expand "foo" relative to the current file - path = ~/foo ; expand "foo" in your `$HOME` directory - - -Values -~~~~~~ - -Values of many variables are treated as a simple string, but there -are variables that take values of specific types and there are rules -as to how to spell them. - -boolean:: - - When a variable is said to take a boolean value, many - synonyms are accepted for 'true' and 'false'; these are all - case-insensitive. - - true;; Boolean true can be spelled as `yes`, `on`, `true`, - or `1`. Also, a variable defined without `= <value>` - is taken as true. - - false;; Boolean false can be spelled as `no`, `off`, - `false`, or `0`. -+ -When converting value to the canonical form using `--bool` type -specifier; 'git config' will ensure that the output is "true" or -"false" (spelled in lowercase). - -integer:: - The value for many variables that specify various sizes can - be suffixed with `k`, `M`,... to mean "scale the number by - 1024", "by 1024x1024", etc. - -color:: - The value for a variable that takes a color is a list of - colors (at most two, one for foreground and one for background) - and attributes (as many as you want), separated by spaces. -+ -The basic colors accepted are `normal`, `black`, `red`, `green`, `yellow`, -`blue`, `magenta`, `cyan` and `white`. The first color given is the -foreground; the second is the background. -+ -Colors may also be given as numbers between 0 and 255; these use ANSI -256-color mode (but note that not all terminals may support this). If -your terminal supports it, you may also specify 24-bit RGB values as -hex, like `#ff0ab3`. -+ - -From: https://git-scm.com/docs/git-config -The accepted attributes are `bold`, `dim`, `ul`, `blink`, `reverse`, -`italic`, and `strike` (for crossed-out or "strikethrough" letters). -The position of any attributes with respect to the colors -(before, after, or in between), doesn't matter. Specific attributes may -be turned off by prefixing them with `no` or `no-` (e.g., `noreverse`, -`no-ul`, etc). -+ -For git's pre-defined color slots, the attributes are meant to be reset -at the beginning of each item in the colored output. So setting -`color.decorate.branch` to `black` will paint that branch name in a -plain `black`, even if the previous thing on the same output line (e.g. -opening parenthesis before the list of branch names in `log --decorate` -output) is set to be painted with `bold` or some other attribute. -However, custom log formats may do more complicated and layered -coloring, and the negated forms may be useful there. - -pathname:: - A variable that takes a pathname value can be given a - string that begins with "`~/`" or "`~user/`", and the usual - tilde expansion happens to such a string: `~/` - is expanded to the value of `$HOME`, and `~user/` to the - specified user's home directory. - -From: -https://raw.githubusercontent.com/git/git/659889482ac63411daea38b2c3d127842ea04e4d/Documentation/config.txt - -*/ diff --git a/plumbing/format/config/option.go b/plumbing/format/config/option.go index cae83e5..3c391c6 100644 --- a/plumbing/format/config/option.go +++ b/plumbing/format/config/option.go @@ -4,6 +4,7 @@ import ( "strings" ) +// Option defines a key/value entity in a config file. type Option struct { // Key preserving original caseness. // Use IsKey instead to compare key regardless of caseness. diff --git a/plumbing/format/idxfile/decoder.go b/plumbing/format/idxfile/decoder.go index 835978b..020c997 100644 --- a/plumbing/format/idxfile/decoder.go +++ b/plumbing/format/idxfile/decoder.go @@ -17,18 +17,17 @@ var ( ErrMalformedIdxFile = errors.New("Malformed IDX file") ) -// A Decoder reads and decodes idx files from an input stream. +// Decoder reads and decodes idx files from an input stream. type Decoder struct { io.Reader } -// NewDecoder returns a new decoder that reads from r. +// NewDecoder builds a new idx stream decoder, that reads from r. func NewDecoder(r io.Reader) *Decoder { return &Decoder{r} } -// Decode reads the whole idx object from its input and stores it in the -// value pointed to by idx. +// Decode reads from the stream and decode the content into the Idxfile struct. func (d *Decoder) Decode(idx *Idxfile) error { if err := validateHeader(d); err != nil { return err diff --git a/plumbing/format/idxfile/doc.go b/plumbing/format/idxfile/doc.go index fb70b7d..1e628ab 100644 --- a/plumbing/format/idxfile/doc.go +++ b/plumbing/format/idxfile/doc.go @@ -1,132 +1,128 @@ -// Package idxfile implements an encoder and a decoder of idx files +// Package idxfile implements encoding and decoding of packfile idx files. +// +// == Original (version 1) pack-*.idx files have the following format: +// +// - The header consists of 256 4-byte network byte order +// integers. N-th entry of this table records the number of +// objects in the corresponding pack, the first byte of whose +// object name is less than or equal to N. This is called the +// 'first-level fan-out' table. +// +// - The header is followed by sorted 24-byte entries, one entry +// per object in the pack. Each entry is: +// +// 4-byte network byte order integer, recording where the +// object is stored in the packfile as the offset from the +// beginning. +// +// 20-byte object name. +// +// - The file is concluded with a trailer: +// +// A copy of the 20-byte SHA1 checksum at the end of +// corresponding packfile. +// +// 20-byte SHA1-checksum of all of the above. +// +// Pack Idx file: +// +// -- +--------------------------------+ +// fanout | fanout[0] = 2 (for example) |-. +// table +--------------------------------+ | +// | fanout[1] | | +// +--------------------------------+ | +// | fanout[2] | | +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | +// | fanout[255] = total objects |---. +// -- +--------------------------------+ | | +// main | offset | | | +// index | object name 00XXXXXXXXXXXXXXXX | | | +// tab +--------------------------------+ | | +// | offset | | | +// | object name 00XXXXXXXXXXXXXXXX | | | +// +--------------------------------+<+ | +// .-| offset | | +// | | object name 01XXXXXXXXXXXXXXXX | | +// | +--------------------------------+ | +// | | offset | | +// | | object name 01XXXXXXXXXXXXXXXX | | +// | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | +// | | offset | | +// | | object name FFXXXXXXXXXXXXXXXX | | +// --| +--------------------------------+<--+ +// trailer | | packfile checksum | +// | +--------------------------------+ +// | | idxfile checksum | +// | +--------------------------------+ +// .---------. +// | +// Pack file entry: <+ +// +// packed object header: +// 1-byte size extension bit (MSB) +// type (next 3 bit) +// size0 (lower 4-bit) +// n-byte sizeN (as long as MSB is set, each 7-bit) +// size0..sizeN form 4+7+7+..+7 bit integer, size0 +// is the least significant part, and sizeN is the +// most significant part. +// packed object data: +// If it is not DELTA, then deflated bytes (the size above +// is the size before compression). +// If it is REF_DELTA, then +// 20-byte base object name SHA1 (the size above is the +// size of the delta data that follows). +// delta data, deflated. +// If it is OFS_DELTA, then +// n-byte offset (see below) interpreted as a negative +// offset from the type-byte of the header of the +// ofs-delta entry (the size above is the size of +// the delta data that follows). +// delta data, deflated. +// +// offset encoding: +// n bytes with MSB set in all but the last one. +// The offset is then the number constructed by +// concatenating the lower 7 bit of each byte, and +// for n >= 2 adding 2^7 + 2^14 + ... + 2^(7*(n-1)) +// to the result. +// +// == Version 2 pack-*.idx files support packs larger than 4 GiB, and +// have some other reorganizations. They have the format: +// +// - A 4-byte magic number '\377tOc' which is an unreasonable +// fanout[0] value. +// +// - A 4-byte version number (= 2) +// +// - A 256-entry fan-out table just like v1. +// +// - A table of sorted 20-byte SHA1 object names. These are +// packed together without offset values to reduce the cache +// footprint of the binary search for a specific object name. +// +// - A table of 4-byte CRC32 values of the packed object data. +// This is new in v2 so compressed data can be copied directly +// from pack to pack during repacking without undetected +// data corruption. +// +// - A table of 4-byte offset values (in network byte order). +// These are usually 31-bit pack file offsets, but large +// offsets are encoded as an index into the next table with +// the msbit set. +// +// - A table of 8-byte offset entries (empty for pack files less +// than 2 GiB). Pack files are organized with heavily used +// objects toward the front, so most object references should +// not need to refer to this table. +// +// - The same trailer as a v1 pack file: +// +// A copy of the 20-byte SHA1 checksum at the end of +// corresponding packfile. +// +// 20-byte SHA1-checksum of all of the above. +// +// Source: +// https://www.kernel.org/pub/software/scm/git/docs/v1.7.5/technical/pack-format.txt package idxfile - -/* -== Original (version 1) pack-*.idx files have the following format: - - - The header consists of 256 4-byte network byte order - integers. N-th entry of this table records the number of - objects in the corresponding pack, the first byte of whose - object name is less than or equal to N. This is called the - 'first-level fan-out' table. - - - The header is followed by sorted 24-byte entries, one entry - per object in the pack. Each entry is: - - 4-byte network byte order integer, recording where the - object is stored in the packfile as the offset from the - beginning. - - 20-byte object name. - - - The file is concluded with a trailer: - - A copy of the 20-byte SHA1 checksum at the end of - corresponding packfile. - - 20-byte SHA1-checksum of all of the above. - -Pack Idx file: - - -- +--------------------------------+ -fanout | fanout[0] = 2 (for example) |-. -table +--------------------------------+ | - | fanout[1] | | - +--------------------------------+ | - | fanout[2] | | - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | - | fanout[255] = total objects |---. - -- +--------------------------------+ | | -main | offset | | | -index | object name 00XXXXXXXXXXXXXXXX | | | -table +--------------------------------+ | | - | offset | | | - | object name 00XXXXXXXXXXXXXXXX | | | - +--------------------------------+<+ | - .-| offset | | - | | object name 01XXXXXXXXXXXXXXXX | | - | +--------------------------------+ | - | | offset | | - | | object name 01XXXXXXXXXXXXXXXX | | - | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | - | | offset | | - | | object name FFXXXXXXXXXXXXXXXX | | - --| +--------------------------------+<--+ -trailer | | packfile checksum | - | +--------------------------------+ - | | idxfile checksum | - | +--------------------------------+ - .-------. - | -Pack file entry: <+ - - packed object header: - 1-byte size extension bit (MSB) - type (next 3 bit) - size0 (lower 4-bit) - n-byte sizeN (as long as MSB is set, each 7-bit) - size0..sizeN form 4+7+7+..+7 bit integer, size0 - is the least significant part, and sizeN is the - most significant part. - packed object data: - If it is not DELTA, then deflated bytes (the size above - is the size before compression). - If it is REF_DELTA, then - 20-byte base object name SHA1 (the size above is the - size of the delta data that follows). - delta data, deflated. - If it is OFS_DELTA, then - n-byte offset (see below) interpreted as a negative - offset from the type-byte of the header of the - ofs-delta entry (the size above is the size of - the delta data that follows). - delta data, deflated. - - offset encoding: - n bytes with MSB set in all but the last one. - The offset is then the number constructed by - concatenating the lower 7 bit of each byte, and - for n >= 2 adding 2^7 + 2^14 + ... + 2^(7*(n-1)) - to the result. - - - -== Version 2 pack-*.idx files support packs larger than 4 GiB, and - have some other reorganizations. They have the format: - - - A 4-byte magic number '\377tOc' which is an unreasonable - fanout[0] value. - - - A 4-byte version number (= 2) - - - A 256-entry fan-out table just like v1. - - - A table of sorted 20-byte SHA1 object names. These are - packed together without offset values to reduce the cache - footprint of the binary search for a specific object name. - - - A table of 4-byte CRC32 values of the packed object data. - This is new in v2 so compressed data can be copied directly - from pack to pack during repacking without undetected - data corruption. - - - A table of 4-byte offset values (in network byte order). - These are usually 31-bit pack file offsets, but large - offsets are encoded as an index into the next table with - the msbit set. - - - A table of 8-byte offset entries (empty for pack files less - than 2 GiB). Pack files are organized with heavily used - objects toward the front, so most object references should - not need to refer to this table. - - - The same trailer as a v1 pack file: - - A copy of the 20-byte SHA1 checksum at the end of - corresponding packfile. - - 20-byte SHA1-checksum of all of the above. - -From: -https://www.kernel.org/pub/software/scm/git/docs/v1.7.5/technical/pack-protocol.txt -*/ diff --git a/plumbing/format/idxfile/encoder.go b/plumbing/format/idxfile/encoder.go index 2b0ef89..374d053 100644 --- a/plumbing/format/idxfile/encoder.go +++ b/plumbing/format/idxfile/encoder.go @@ -9,20 +9,20 @@ import ( "srcd.works/go-git.v4/utils/binary" ) -// An Encoder writes idx files to an output stream. +// Encoder writes Idxfile structs to an output stream. type Encoder struct { io.Writer hash hash.Hash } -// NewEncoder returns a new encoder that writes to w. +// NewEncoder returns a new stream encoder that writes to w. func NewEncoder(w io.Writer) *Encoder { h := sha1.New() mw := io.MultiWriter(w, h) return &Encoder{mw, h} } -// Encode writes the idx in an idx file format to the stream of the encoder. +// Encode encodes an Idxfile to the encoder writer. func (e *Encoder) Encode(idx *Idxfile) (int, error) { idx.Entries.Sort() @@ -123,6 +123,7 @@ func (e *Encoder) encodeChecksums(idx *Idxfile) (int, error) { return 40, nil } +// EntryList implements sort.Interface allowing sorting in increasing order. type EntryList []Entry func (p EntryList) Len() int { return len(p) } diff --git a/plumbing/format/idxfile/idxfile.go b/plumbing/format/idxfile/idxfile.go index ee014e5..b39295a 100644 --- a/plumbing/format/idxfile/idxfile.go +++ b/plumbing/format/idxfile/idxfile.go @@ -11,7 +11,7 @@ var ( idxHeader = []byte{255, 't', 'O', 'c'} ) -// An Idxfile represents an idx file in memory. +// Idxfile is the in memory representation of an idx file. type Idxfile struct { Version uint32 Fanout [255]uint32 @@ -21,14 +21,14 @@ type Idxfile struct { IdxChecksum [20]byte } -// An Entry represents data about an object in the packfile: its hash, -// offset and CRC32 checksum. +// Entry is the in memory representation of an object entry in the idx file. type Entry struct { Hash plumbing.Hash CRC32 uint32 Offset uint64 } +// Add adds a new Entry with the given values to the Idxfile. func (idx *Idxfile) Add(h plumbing.Hash, offset uint64, crc32 uint32) { idx.Entries = append(idx.Entries, Entry{ Hash: h, diff --git a/plumbing/format/index/doc.go b/plumbing/format/index/doc.go index 7000944..d1e7b33 100644 --- a/plumbing/format/index/doc.go +++ b/plumbing/format/index/doc.go @@ -1,302 +1,301 @@ -// Package index implements an encoder and a decoder of index format files +// Package index implements encoding and decoding of index format files. +// +// Git index format +// ================ +// +// == The Git index file has the following format +// +// All binary numbers are in network byte order. Version 2 is described +// here unless stated otherwise. +// +// - A 12-byte header consisting of +// +// 4-byte signature: +// The signature is { 'D', 'I', 'R', 'C' } (stands for "dircache") +// +// 4-byte version number: +// The current supported versions are 2, 3 and 4. +// +// 32-bit number of index entries. +// +// - A number of sorted index entries (see below). +// +// - Extensions +// +// Extensions are identified by signature. Optional extensions can +// be ignored if Git does not understand them. +// +// Git currently supports cached tree and resolve undo extensions. +// +// 4-byte extension signature. If the first byte is 'A'..'Z' the +// extension is optional and can be ignored. +// +// 32-bit size of the extension +// +// Extension data +// +// - 160-bit SHA-1 over the content of the index file before this +// checksum. +// +// == Index entry +// +// Index entries are sorted in ascending order on the name field, +// interpreted as a string of unsigned bytes (i.e. memcmp() order, no +// localization, no special casing of directory separator '/'). Entries +// with the same name are sorted by their stage field. +// +// 32-bit ctime seconds, the last time a file's metadata changed +// this is stat(2) data +// +// 32-bit ctime nanosecond fractions +// this is stat(2) data +// +// 32-bit mtime seconds, the last time a file's data changed +// this is stat(2) data +// +// 32-bit mtime nanosecond fractions +// this is stat(2) data +// +// 32-bit dev +// this is stat(2) data +// +// 32-bit ino +// this is stat(2) data +// +// 32-bit mode, split into (high to low bits) +// +// 4-bit object type +// valid values in binary are 1000 (regular file), 1010 (symbolic link) +// and 1110 (gitlink) +// +// 3-bit unused +// +// 9-bit unix permission. Only 0755 and 0644 are valid for regular files. +// Symbolic links and gitlinks have value 0 in this field. +// +// 32-bit uid +// this is stat(2) data +// +// 32-bit gid +// this is stat(2) data +// +// 32-bit file size +// This is the on-disk size from stat(2), truncated to 32-bit. +// +// 160-bit SHA-1 for the represented object +// +// A 16-bit 'flags' field split into (high to low bits) +// +// 1-bit assume-valid flag +// +// 1-bit extended flag (must be zero in version 2) +// +// 2-bit stage (during merge) +// +// 12-bit name length if the length is less than 0xFFF; otherwise 0xFFF +// is stored in this field. +// +// (Version 3 or later) A 16-bit field, only applicable if the +// "extended flag" above is 1, split into (high to low bits). +// +// 1-bit reserved for future +// +// 1-bit skip-worktree flag (used by sparse checkout) +// +// 1-bit intent-to-add flag (used by "git add -N") +// +// 13-bit unused, must be zero +// +// Entry path name (variable length) relative to top level directory +// (without leading slash). '/' is used as path separator. The special +// path components ".", ".." and ".git" (without quotes) are disallowed. +// Trailing slash is also disallowed. +// +// The exact encoding is undefined, but the '.' and '/' characters +// are encoded in 7-bit ASCII and the encoding cannot contain a NUL +// byte (iow, this is a UNIX pathname). +// +// (Version 4) In version 4, the entry path name is prefix-compressed +// relative to the path name for the previous entry (the very first +// entry is encoded as if the path name for the previous entry is an +// empty string). At the beginning of an entry, an integer N in the +// variable width encoding (the same encoding as the offset is encoded +// for OFS_DELTA pack entries; see pack-format.txt) is stored, followed +// by a NUL-terminated string S. Removing N bytes from the end of the +// path name for the previous entry, and replacing it with the string S +// yields the path name for this entry. +// +// 1-8 nul bytes as necessary to pad the entry to a multiple of eight bytes +// while keeping the name NUL-terminated. +// +// (Version 4) In version 4, the padding after the pathname does not +// exist. +// +// Interpretation of index entries in split index mode is completely +// different. See below for details. +// +// == Extensions +// +// === Cached tree +// +// Cached tree extension contains pre-computed hashes for trees that can +// be derived from the index. It helps speed up tree object generation +// from index for a new commit. +// +// When a path is updated in index, the path must be invalidated and +// removed from tree cache. +// +// The signature for this extension is { 'T', 'R', 'E', 'E' }. +// +// A series of entries fill the entire extension; each of which +// consists of: +// +// - NUL-terminated path component (relative to its parent directory); +// +// - ASCII decimal number of entries in the index that is covered by the +// tree this entry represents (entry_count); +// +// - A space (ASCII 32); +// +// - ASCII decimal number that represents the number of subtrees this +// tree has; +// +// - A newline (ASCII 10); and +// +// - 160-bit object name for the object that would result from writing +// this span of index as a tree. +// +// An entry can be in an invalidated state and is represented by having +// a negative number in the entry_count field. In this case, there is no +// object name and the next entry starts immediately after the newline. +// When writing an invalid entry, -1 should always be used as entry_count. +// +// The entries are written out in the top-down, depth-first order. The +// first entry represents the root level of the repository, followed by the +// first subtree--let's call this A--of the root level (with its name +// relative to the root level), followed by the first subtree of A (with +// its name relative to A), ... +// +// === Resolve undo +// +// A conflict is represented in the index as a set of higher stage entries. +// When a conflict is resolved (e.g. with "git add path"), these higher +// stage entries will be removed and a stage-0 entry with proper resolution +// is added. +// +// When these higher stage entries are removed, they are saved in the +// resolve undo extension, so that conflicts can be recreated (e.g. with +// "git checkout -m"), in case users want to redo a conflict resolution +// from scratch. +// +// The signature for this extension is { 'R', 'E', 'U', 'C' }. +// +// A series of entries fill the entire extension; each of which +// consists of: +// +// - NUL-terminated pathname the entry describes (relative to the root of +// the repository, i.e. full pathname); +// +// - Three NUL-terminated ASCII octal numbers, entry mode of entries in +// stage 1 to 3 (a missing stage is represented by "0" in this field); +// and +// +// - At most three 160-bit object names of the entry in stages from 1 to 3 +// (nothing is written for a missing stage). +// +// === Split index +// +// In split index mode, the majority of index entries could be stored +// in a separate file. This extension records the changes to be made on +// top of that to produce the final index. +// +// The signature for this extension is { 'l', 'i', 'n', 'k' }. +// +// The extension consists of: +// +// - 160-bit SHA-1 of the shared index file. The shared index file path +// is $GIT_DIR/sharedindex.<SHA-1>. If all 160 bits are zero, the +// index does not require a shared index file. +// +// - An ewah-encoded delete bitmap, each bit represents an entry in the +// shared index. If a bit is set, its corresponding entry in the +// shared index will be removed from the final index. Note, because +// a delete operation changes index entry positions, but we do need +// original positions in replace phase, it's best to just mark +// entries for removal, then do a mass deletion after replacement. +// +// - An ewah-encoded replace bitmap, each bit represents an entry in +// the shared index. If a bit is set, its corresponding entry in the +// shared index will be replaced with an entry in this index +// file. All replaced entries are stored in sorted order in this +// index. The first "1" bit in the replace bitmap corresponds to the +// first index entry, the second "1" bit to the second entry and so +// on. Replaced entries may have empty path names to save space. +// +// The remaining index entries after replaced ones will be added to the +// final index. These added entries are also sorted by entry name then +// stage. +// +// == Untracked cache +// +// Untracked cache saves the untracked file list and necessary data to +// verify the cache. The signature for this extension is { 'U', 'N', +// 'T', 'R' }. +// +// The extension starts with +// +// - A sequence of NUL-terminated strings, preceded by the size of the +// sequence in variable width encoding. Each string describes the +// environment where the cache can be used. +// +// - Stat data of $GIT_DIR/info/exclude. See "Index entry" section from +// ctime field until "file size". +// +// - Stat data of plumbing.excludesfile +// +// - 32-bit dir_flags (see struct dir_struct) +// +// - 160-bit SHA-1 of $GIT_DIR/info/exclude. Null SHA-1 means the file +// does not exist. +// +// - 160-bit SHA-1 of plumbing.excludesfile. Null SHA-1 means the file does +// not exist. +// +// - NUL-terminated string of per-dir exclude file name. This usually +// is ".gitignore". +// +// - The number of following directory blocks, variable width +// encoding. If this number is zero, the extension ends here with a +// following NUL. +// +// - A number of directory blocks in depth-first-search order, each +// consists of +// +// - The number of untracked entries, variable width encoding. +// +// - The number of sub-directory blocks, variable width encoding. +// +// - The directory name terminated by NUL. +// +// - A number of untracked file/dir names terminated by NUL. +// +// The remaining data of each directory block is grouped by type: +// +// - An ewah bitmap, the n-th bit marks whether the n-th directory has +// valid untracked cache entries. +// +// - An ewah bitmap, the n-th bit records "check-only" bit of +// read_directory_recursive() for the n-th directory. +// +// - An ewah bitmap, the n-th bit indicates whether SHA-1 and stat data +// is valid for the n-th directory and exists in the next data. +// +// - An array of stat data. The n-th data corresponds with the n-th +// "one" bit in the previous ewah bitmap. +// +// - An array of SHA-1. The n-th SHA-1 corresponds with the n-th "one" bit +// in the previous ewah bitmap. +// +// - One NUL. +// Source https://www.kernel.org/pub/software/scm/git/docs/technical/index-format.txt package index - -/* -Git index format -================ - -== The Git index file has the following format - - All binary numbers are in network byte order. Version 2 is described - here unless stated otherwise. - - - A 12-byte header consisting of - - 4-byte signature: - The signature is { 'D', 'I', 'R', 'C' } (stands for "dircache") - - 4-byte version number: - The current supported versions are 2, 3 and 4. - - 32-bit number of index entries. - - - A number of sorted index entries (see below). - - - Extensions - - Extensions are identified by signature. Optional extensions can - be ignored if Git does not understand them. - - Git currently supports cached tree and resolve undo extensions. - - 4-byte extension signature. If the first byte is 'A'..'Z' the - extension is optional and can be ignored. - - 32-bit size of the extension - - Extension data - - - 160-bit SHA-1 over the content of the index file before this - checksum. - -== Index entry - - Index entries are sorted in ascending order on the name field, - interpreted as a string of unsigned bytes (i.e. memcmp() order, no - localization, no special casing of directory separator '/'). Entries - with the same name are sorted by their stage field. - - 32-bit ctime seconds, the last time a file's metadata changed - this is stat(2) data - - 32-bit ctime nanosecond fractions - this is stat(2) data - - 32-bit mtime seconds, the last time a file's data changed - this is stat(2) data - - 32-bit mtime nanosecond fractions - this is stat(2) data - - 32-bit dev - this is stat(2) data - - 32-bit ino - this is stat(2) data - - 32-bit mode, split into (high to low bits) - - 4-bit object type - valid values in binary are 1000 (regular file), 1010 (symbolic link) - and 1110 (gitlink) - - 3-bit unused - - 9-bit unix permission. Only 0755 and 0644 are valid for regular files. - Symbolic links and gitlinks have value 0 in this field. - - 32-bit uid - this is stat(2) data - - 32-bit gid - this is stat(2) data - - 32-bit file size - This is the on-disk size from stat(2), truncated to 32-bit. - - 160-bit SHA-1 for the represented object - - A 16-bit 'flags' field split into (high to low bits) - - 1-bit assume-valid flag - - 1-bit extended flag (must be zero in version 2) - - 2-bit stage (during merge) - - 12-bit name length if the length is less than 0xFFF; otherwise 0xFFF - is stored in this field. - - (Version 3 or later) A 16-bit field, only applicable if the - "extended flag" above is 1, split into (high to low bits). - - 1-bit reserved for future - - 1-bit skip-worktree flag (used by sparse checkout) - - 1-bit intent-to-add flag (used by "git add -N") - - 13-bit unused, must be zero - - Entry path name (variable length) relative to top level directory - (without leading slash). '/' is used as path separator. The special - path components ".", ".." and ".git" (without quotes) are disallowed. - Trailing slash is also disallowed. - - The exact encoding is undefined, but the '.' and '/' characters - are encoded in 7-bit ASCII and the encoding cannot contain a NUL - byte (iow, this is a UNIX pathname). - - (Version 4) In version 4, the entry path name is prefix-compressed - relative to the path name for the previous entry (the very first - entry is encoded as if the path name for the previous entry is an - empty string). At the beginning of an entry, an integer N in the - variable width encoding (the same encoding as the offset is encoded - for OFS_DELTA pack entries; see pack-format.txt) is stored, followed - by a NUL-terminated string S. Removing N bytes from the end of the - path name for the previous entry, and replacing it with the string S - yields the path name for this entry. - - 1-8 nul bytes as necessary to pad the entry to a multiple of eight bytes - while keeping the name NUL-terminated. - - (Version 4) In version 4, the padding after the pathname does not - exist. - - Interpretation of index entries in split index mode is completely - different. See below for details. - -== Extensions - -=== Cached tree - - Cached tree extension contains pre-computed hashes for trees that can - be derived from the index. It helps speed up tree object generation - from index for a new commit. - - When a path is updated in index, the path must be invalidated and - removed from tree cache. - - The signature for this extension is { 'T', 'R', 'E', 'E' }. - - A series of entries fill the entire extension; each of which - consists of: - - - NUL-terminated path component (relative to its parent directory); - - - ASCII decimal number of entries in the index that is covered by the - tree this entry represents (entry_count); - - - A space (ASCII 32); - - - ASCII decimal number that represents the number of subtrees this - tree has; - - - A newline (ASCII 10); and - - - 160-bit object name for the object that would result from writing - this span of index as a tree. - - An entry can be in an invalidated state and is represented by having - a negative number in the entry_count field. In this case, there is no - object name and the next entry starts immediately after the newline. - When writing an invalid entry, -1 should always be used as entry_count. - - The entries are written out in the top-down, depth-first order. The - first entry represents the root level of the repository, followed by the - first subtree--let's call this A--of the root level (with its name - relative to the root level), followed by the first subtree of A (with - its name relative to A), ... - -=== Resolve undo - - A conflict is represented in the index as a set of higher stage entries. - When a conflict is resolved (e.g. with "git add path"), these higher - stage entries will be removed and a stage-0 entry with proper resolution - is added. - - When these higher stage entries are removed, they are saved in the - resolve undo extension, so that conflicts can be recreated (e.g. with - "git checkout -m"), in case users want to redo a conflict resolution - from scratch. - - The signature for this extension is { 'R', 'E', 'U', 'C' }. - - A series of entries fill the entire extension; each of which - consists of: - - - NUL-terminated pathname the entry describes (relative to the root of - the repository, i.e. full pathname); - - - Three NUL-terminated ASCII octal numbers, entry mode of entries in - stage 1 to 3 (a missing stage is represented by "0" in this field); - and - - - At most three 160-bit object names of the entry in stages from 1 to 3 - (nothing is written for a missing stage). - -=== Split index - - In split index mode, the majority of index entries could be stored - in a separate file. This extension records the changes to be made on - top of that to produce the final index. - - The signature for this extension is { 'l', 'i', 'n', 'k' }. - - The extension consists of: - - - 160-bit SHA-1 of the shared index file. The shared index file path - is $GIT_DIR/sharedindex.<SHA-1>. If all 160 bits are zero, the - index does not require a shared index file. - - - An ewah-encoded delete bitmap, each bit represents an entry in the - shared index. If a bit is set, its corresponding entry in the - shared index will be removed from the final index. Note, because - a delete operation changes index entry positions, but we do need - original positions in replace phase, it's best to just mark - entries for removal, then do a mass deletion after replacement. - - - An ewah-encoded replace bitmap, each bit represents an entry in - the shared index. If a bit is set, its corresponding entry in the - shared index will be replaced with an entry in this index - file. All replaced entries are stored in sorted order in this - index. The first "1" bit in the replace bitmap corresponds to the - first index entry, the second "1" bit to the second entry and so - on. Replaced entries may have empty path names to save space. - - The remaining index entries after replaced ones will be added to the - final index. These added entries are also sorted by entry name then - stage. - -== Untracked cache - - Untracked cache saves the untracked file list and necessary data to - verify the cache. The signature for this extension is { 'U', 'N', - 'T', 'R' }. - - The extension starts with - - - A sequence of NUL-terminated strings, preceded by the size of the - sequence in variable width encoding. Each string describes the - environment where the cache can be used. - - - Stat data of $GIT_DIR/info/exclude. See "Index entry" section from - ctime field until "file size". - - - Stat data of plumbing.excludesfile - - - 32-bit dir_flags (see struct dir_struct) - - - 160-bit SHA-1 of $GIT_DIR/info/exclude. Null SHA-1 means the file - does not exist. - - - 160-bit SHA-1 of plumbing.excludesfile. Null SHA-1 means the file does - not exist. - - - NUL-terminated string of per-dir exclude file name. This usually - is ".gitignore". - - - The number of following directory blocks, variable width - encoding. If this number is zero, the extension ends here with a - following NUL. - - - A number of directory blocks in depth-first-search order, each - consists of - - - The number of untracked entries, variable width encoding. - - - The number of sub-directory blocks, variable width encoding. - - - The directory name terminated by NUL. - - - A number of untracked file/dir names terminated by NUL. - -The remaining data of each directory block is grouped by type: - - - An ewah bitmap, the n-th bit marks whether the n-th directory has - valid untracked cache entries. - - - An ewah bitmap, the n-th bit records "check-only" bit of - read_directory_recursive() for the n-th directory. - - - An ewah bitmap, the n-th bit indicates whether SHA-1 and stat data - is valid for the n-th directory and exists in the next data. - - - An array of stat data. The n-th data corresponds with the n-th - "one" bit in the previous ewah bitmap. - - - An array of SHA-1. The n-th SHA-1 corresponds with the n-th "one" bit - in the previous ewah bitmap. - - - One NUL. -*/ diff --git a/plumbing/format/index/index.go b/plumbing/format/index/index.go index 0e9132f..e5dc178 100644 --- a/plumbing/format/index/index.go +++ b/plumbing/format/index/index.go @@ -93,10 +93,10 @@ type TreeEntry struct { Hash plumbing.Hash } -// ResolveUndo when a conflict is resolved (e.g. with "git add path"), these -// higher stage entries will be removed and a stage-0 entry with proper +// ResolveUndo is used when a conflict is resolved (e.g. with "git add path"), +// these higher stage entries are removed and a stage-0 entry with proper // resolution is added. When these higher stage entries are removed, they are -// saved in the resolve undo extension +// saved in the resolve undo extension. type ResolveUndo struct { Entries []ResolveUndoEntry } diff --git a/plumbing/format/objfile/doc.go b/plumbing/format/objfile/doc.go new file mode 100644 index 0000000..a714516 --- /dev/null +++ b/plumbing/format/objfile/doc.go @@ -0,0 +1,2 @@ +// Package objfile implements encoding and decoding of object files. +package objfile diff --git a/plumbing/format/packfile/doc.go b/plumbing/format/packfile/doc.go index 0b173ca..2882a7f 100644 --- a/plumbing/format/packfile/doc.go +++ b/plumbing/format/packfile/doc.go @@ -1,168 +1,39 @@ -// Package packfile implements a encoder/decoder of packfile format +// Package packfile implements encoding and decoding of packfile format. +// +// == pack-*.pack files have the following format: +// +// - A header appears at the beginning and consists of the following: +// +// 4-byte signature: +// The signature is: {'P', 'A', 'C', 'K'} +// +// 4-byte version number (network byte order): +// GIT currently accepts version number 2 or 3 but +// generates version 2 only. +// +// 4-byte number of objects contained in the pack (network byte order) +// +// Observation: we cannot have more than 4G versions ;-) and +// more than 4G objects in a pack. +// +// - The header is followed by number of object entries, each of +// which looks like this: +// +// (undeltified representation) +// n-byte type and length (3-bit type, (n-1)*7+4-bit length) +// compressed data +// +// (deltified representation) +// n-byte type and length (3-bit type, (n-1)*7+4-bit length) +// 20-byte base object name +// compressed delta data +// +// Observation: length of each object is encoded in a variable +// length format and is not constrained to 32-bit or anything. +// +// - The trailer records 20-byte SHA1 checksum of all of the above. +// +// +// Source: +// https://www.kernel.org/pub/software/scm/git/docs/v1.7.5/technical/pack-protocol.txt package packfile - -/* -GIT pack format -=============== - -== pack-*.pack files have the following format: - - - A header appears at the beginning and consists of the following: - - 4-byte signature: - The signature is: {'P', 'A', 'C', 'K'} - - 4-byte version number (network byte order): - GIT currently accepts version number 2 or 3 but - generates version 2 only. - - 4-byte number of objects contained in the pack (network byte order) - - Observation: we cannot have more than 4G versions ;-) and - more than 4G objects in a pack. - - - The header is followed by number of object entries, each of - which looks like this: - - (undeltified representation) - n-byte type and length (3-bit type, (n-1)*7+4-bit length) - compressed data - - (deltified representation) - n-byte type and length (3-bit type, (n-1)*7+4-bit length) - 20-byte base object name - compressed delta data - - Observation: length of each object is encoded in a variable - length format and is not constrained to 32-bit or anything. - - - The trailer records 20-byte SHA1 checksum of all of the above. - -== Original (version 1) pack-*.idx files have the following format: - - - The header consists of 256 4-byte network byte order - integers. N-th entry of this table records the number of - objects in the corresponding pack, the first byte of whose - object name is less than or equal to N. This is called the - 'first-level fan-out' table. - - - The header is followed by sorted 24-byte entries, one entry - per object in the pack. Each entry is: - - 4-byte network byte order integer, recording where the - object is stored in the packfile as the offset from the - beginning. - - 20-byte object name. - - - The file is concluded with a trailer: - - A copy of the 20-byte SHA1 checksum at the end of - corresponding packfile. - - 20-byte SHA1-checksum of all of the above. - -Pack Idx file: - - -- +--------------------------------+ -fanout | fanout[0] = 2 (for example) |-. -table +--------------------------------+ | - | fanout[1] | | - +--------------------------------+ | - | fanout[2] | | - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | - | fanout[255] = total objects |---. - -- +--------------------------------+ | | -main | offset | | | -index | object name 00XXXXXXXXXXXXXXXX | | | -table +--------------------------------+ | | - | offset | | | - | object name 00XXXXXXXXXXXXXXXX | | | - +--------------------------------+<+ | - .-| offset | | - | | object name 01XXXXXXXXXXXXXXXX | | - | +--------------------------------+ | - | | offset | | - | | object name 01XXXXXXXXXXXXXXXX | | - | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | - | | offset | | - | | object name FFXXXXXXXXXXXXXXXX | | - --| +--------------------------------+<--+ -trailer | | packfile checksum | - | +--------------------------------+ - | | idxfile checksum | - | +--------------------------------+ - .-------. - | -Pack file entry: <+ - - packed object header: - 1-byte size extension bit (MSB) - type (next 3 bit) - size0 (lower 4-bit) - n-byte sizeN (as long as MSB is set, each 7-bit) - size0..sizeN form 4+7+7+..+7 bit integer, size0 - is the least significant part, and sizeN is the - most significant part. - packed object data: - If it is not DELTA, then deflated bytes (the size above - is the size before compression). - If it is REF_DELTA, then - 20-byte base object name SHA1 (the size above is the - size of the delta data that follows). - delta data, deflated. - If it is OFS_DELTA, then - n-byte offset (see below) interpreted as a negative - offset from the type-byte of the header of the - ofs-delta entry (the size above is the size of - the delta data that follows). - delta data, deflated. - - offset encoding: - n bytes with MSB set in all but the last one. - The offset is then the number constructed by - concatenating the lower 7 bit of each byte, and - for n >= 2 adding 2^7 + 2^14 + ... + 2^(7*(n-1)) - to the result. - - - -== Version 2 pack-*.idx files support packs larger than 4 GiB, and - have some other reorganizations. They have the format: - - - A 4-byte magic number '\377tOc' which is an unreasonable - fanout[0] value. - - - A 4-byte version number (= 2) - - - A 256-entry fan-out table just like v1. - - - A table of sorted 20-byte SHA1 object names. These are - packed together without offset values to reduce the cache - footprint of the binary search for a specific object name. - - - A table of 4-byte CRC32 values of the packed object data. - This is new in v2 so compressed data can be copied directly - from pack to pack during repacking without undetected - data corruption. - - - A table of 4-byte offset values (in network byte order). - These are usually 31-bit pack file offsets, but large - offsets are encoded as an index into the next table with - the msbit set. - - - A table of 8-byte offset entries (empty for pack files less - than 2 GiB). Pack files are organized with heavily used - objects toward the front, so most object references should - not need to refer to this table. - - - The same trailer as a v1 pack file: - - A copy of the 20-byte SHA1 checksum at the end of - corresponding packfile. - - 20-byte SHA1-checksum of all of the above. - -From: -https://www.kernel.org/pub/software/scm/git/docs/v1.7.5/technical/pack-protocol.txt -*/ diff --git a/plumbing/format/pktline/encoder.go b/plumbing/format/pktline/encoder.go index 0a88a9b..753b225 100644 --- a/plumbing/format/pktline/encoder.go +++ b/plumbing/format/pktline/encoder.go @@ -1,4 +1,5 @@ -// Package pktline implements reading payloads form pkt-lines and encoding pkt-lines from payloads. +// Package pktline implements reading payloads form pkt-lines and encoding +// pkt-lines from payloads. package pktline import ( diff --git a/plumbing/protocol/packp/capability/capability.go b/plumbing/protocol/packp/capability/capability.go index 351ba0b..96d93f6 100644 --- a/plumbing/protocol/packp/capability/capability.go +++ b/plumbing/protocol/packp/capability/capability.go @@ -1,6 +1,7 @@ +// Package capability defines the server and client capabilities. package capability -// Capability describes a server or client capability +// Capability describes a server or client capability. type Capability string func (n Capability) String() string { diff --git a/plumbing/revision.go b/plumbing/revision.go new file mode 100644 index 0000000..5f053b2 --- /dev/null +++ b/plumbing/revision.go @@ -0,0 +1,11 @@ +package plumbing + +// Revision represents a git revision +// to get more details about git revisions +// please check git manual page : +// https://www.kernel.org/pub/software/scm/git/docs/gitrevisions.html +type Revision string + +func (r Revision) String() string { + return string(r) +} diff --git a/plumbing/revlist/revlist.go b/plumbing/revlist/revlist.go index 57ae6d7..f1d66b4 100644 --- a/plumbing/revlist/revlist.go +++ b/plumbing/revlist/revlist.go @@ -1,5 +1,5 @@ -// Package revlist implements functions to walk the objects referenced by a -// commit history. Roughly equivalent to git-rev-list command. +// Package revlist provides support to access the ancestors of commits, in a +// similar way as the git-rev-list command. package revlist import ( diff --git a/plumbing/storer/doc.go b/plumbing/storer/doc.go new file mode 100644 index 0000000..4d4f179 --- /dev/null +++ b/plumbing/storer/doc.go @@ -0,0 +1,2 @@ +// Package storer defines the interfaces to store objects, references, etc. +package storer diff --git a/plumbing/transport/client/client.go b/plumbing/transport/client/client.go index 5ac1be2..f749875 100644 --- a/plumbing/transport/client/client.go +++ b/plumbing/transport/client/client.go @@ -1,3 +1,5 @@ +// Package client contains helper function to deal with the different client +// protocols. package client import ( diff --git a/plumbing/transport/file/client.go b/plumbing/transport/file/client.go index 7468426..47a6adc 100644 --- a/plumbing/transport/file/client.go +++ b/plumbing/transport/file/client.go @@ -1,3 +1,4 @@ +// Package file implements the file transport protocol. package file import ( diff --git a/plumbing/transport/git/common.go b/plumbing/transport/git/common.go index dd4bac4..fee3bf3 100644 --- a/plumbing/transport/git/common.go +++ b/plumbing/transport/git/common.go @@ -1,3 +1,4 @@ +// Package git implements the git transport protocol. package git import ( diff --git a/plumbing/transport/http/common.go b/plumbing/transport/http/common.go index 8ca5cc1..3cc2bd4 100644 --- a/plumbing/transport/http/common.go +++ b/plumbing/transport/http/common.go @@ -1,4 +1,4 @@ -// Package http implements a HTTP client for go-git. +// Package http implements the HTTP transport protocol. package http import ( diff --git a/plumbing/transport/ssh/common.go b/plumbing/transport/ssh/common.go index c65fff9..5c50933 100644 --- a/plumbing/transport/ssh/common.go +++ b/plumbing/transport/ssh/common.go @@ -1,3 +1,4 @@ +// Package ssh implements the SSH transport protocol. package ssh import ( diff --git a/repository.go b/repository.go index 4a65f46..b11e8b6 100644 --- a/repository.go +++ b/repository.go @@ -6,6 +6,7 @@ import ( "os" "srcd.works/go-git.v4/config" + "srcd.works/go-git.v4/internal/revision" "srcd.works/go-git.v4/plumbing" "srcd.works/go-git.v4/plumbing/object" "srcd.works/go-git.v4/plumbing/storer" @@ -632,3 +633,129 @@ func (r *Repository) Worktree() (*Worktree, error) { return &Worktree{r: r, fs: r.wt}, nil } + +// ResolveRevision resolves revision to corresponding hash +func (r *Repository) ResolveRevision(rev plumbing.Revision) (*plumbing.Hash, error) { + p := revision.NewParserFromString(string(rev)) + + items, err := p.Parse() + + if err != nil { + return nil, err + } + + var commit *object.Commit + + for _, item := range items { + switch item.(type) { + case revision.Ref: + ref, err := storer.ResolveReference(r.s, plumbing.ReferenceName(item.(revision.Ref))) + + if err != nil { + return &plumbing.ZeroHash, err + } + + h := ref.Hash() + + commit, err = r.Commit(h) + + if err != nil { + return &plumbing.ZeroHash, err + } + case revision.CaretPath: + depth := item.(revision.CaretPath).Depth + + if depth == 0 { + break + } + + iter := commit.Parents() + + c, err := iter.Next() + + if err != nil { + return &plumbing.ZeroHash, err + } + + if depth == 1 { + commit = c + + break + } + + c, err = iter.Next() + + if err != nil { + return &plumbing.ZeroHash, err + } + + commit = c + case revision.TildePath: + for i := 0; i < item.(revision.TildePath).Depth; i++ { + c, err := commit.Parents().Next() + + if err != nil { + return &plumbing.ZeroHash, err + } + + commit = c + } + case revision.CaretReg: + history, err := commit.History() + + if err != nil { + return &plumbing.ZeroHash, err + } + + re := item.(revision.CaretReg).Regexp + negate := item.(revision.CaretReg).Negate + + var c *object.Commit + + for i := 0; i < len(history); i++ { + if !negate && re.MatchString(history[i].Message) { + c = history[i] + + break + } + + if negate && !re.MatchString(history[i].Message) { + c = history[i] + + break + } + } + + if c == nil { + return &plumbing.ZeroHash, fmt.Errorf(`No commit message match regexp : "%s"`, re.String()) + } + + commit = c + case revision.AtDate: + history, err := commit.History() + + if err != nil { + return &plumbing.ZeroHash, err + } + + date := item.(revision.AtDate).Date + var c *object.Commit + + for i := 0; i < len(history); i++ { + if date.Equal(history[i].Committer.When.UTC()) || history[i].Committer.When.UTC().Before(date) { + c = history[i] + + break + } + } + + if c == nil { + return &plumbing.ZeroHash, fmt.Errorf(`No commit exists prior to date "%s"`, date.String()) + } + + commit = c + } + } + + return &commit.Hash, nil +} diff --git a/repository_test.go b/repository_test.go index 76ba9f3..2d29cd3 100644 --- a/repository_test.go +++ b/repository_test.go @@ -780,6 +780,58 @@ func (s *RepositorySuite) TestWorktreeBare(c *C) { c.Assert(w, IsNil) } +func (s *RepositorySuite) TestResolveRevision(c *C) { + url := s.GetLocalRepositoryURL( + fixtures.ByURL("https://github.com/git-fixtures/basic.git").One(), + ) + + r, _ := Init(memory.NewStorage(), nil) + err := r.clone(&CloneOptions{URL: url}) + c.Assert(err, IsNil) + + datas := map[string]string{ + "HEAD": "6ecf0ef2c2dffb796033e5a02219af86ec6584e5", + "refs/heads/master~2^^~": "b029517f6300c2da0f4b651b8642506cd6aaf45d", + "HEAD~2^^~": "b029517f6300c2da0f4b651b8642506cd6aaf45d", + "HEAD~3^2": "a5b8b09e2f8fcb0bb99d3ccb0958157b40890d69", + "HEAD~3^2^0": "a5b8b09e2f8fcb0bb99d3ccb0958157b40890d69", + "HEAD~2^{/binary file}": "35e85108805c84807bc66a02d91535e1e24b38b9", + "HEAD~^{!-some}": "1669dce138d9b841a518c64b10914d88f5e488ea", + "HEAD@{2015-03-31T11:56:18Z}": "918c48b83bd081e863dbe1b80f8998f058cd8294", + "HEAD@{2015-03-31T11:49:00Z}": "1669dce138d9b841a518c64b10914d88f5e488ea", + } + + for rev, hash := range datas { + h, err := r.ResolveRevision(plumbing.Revision(rev)) + + c.Assert(err, IsNil) + c.Assert(h.String(), Equals, hash) + } +} + +func (s *RepositorySuite) TestResolveRevisionWithErrors(c *C) { + url := s.GetLocalRepositoryURL( + fixtures.ByURL("https://github.com/git-fixtures/basic.git").One(), + ) + + r, _ := Init(memory.NewStorage(), nil) + err := r.clone(&CloneOptions{URL: url}) + c.Assert(err, IsNil) + + datas := map[string]string{ + "efs/heads/master~": "reference not found", + "HEAD^3": `Revision invalid : "3" found must be 0, 1 or 2 after "^"`, + "HEAD^{/whatever}": `No commit message match regexp : "whatever"`, + "HEAD@{2015-03-31T09:49:00Z}": `No commit exists prior to date "2015-03-31 09:49:00 +0000 UTC"`, + } + + for rev, rerr := range datas { + _, err := r.ResolveRevision(plumbing.Revision(rev)) + + c.Assert(err.Error(), Equals, rerr) + } +} + func ExecuteOnPath(c *C, path string, cmds ...string) error { for _, cmd := range cmds { err := executeOnPath(path, cmd) diff --git a/utils/merkletrie/internal/frame/frame.go b/utils/merkletrie/internal/frame/frame.go new file mode 100644 index 0000000..fd06307 --- /dev/null +++ b/utils/merkletrie/internal/frame/frame.go @@ -0,0 +1,91 @@ +package frame + +import ( + "bytes" + "fmt" + "sort" + "strings" + + "srcd.works/go-git.v4/utils/merkletrie/noder" +) + +// A Frame is a collection of siblings in a trie, sorted alphabetically +// by name. +type Frame struct { + // siblings, sorted in reverse alphabetical order by name + stack []noder.Noder +} + +type byName []noder.Noder + +func (a byName) Len() int { return len(a) } +func (a byName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a byName) Less(i, j int) bool { + return strings.Compare(a[i].Name(), a[j].Name()) < 0 +} + +// New returns a frame with the children of the provided node. +func New(n noder.Noder) (*Frame, error) { + children, err := n.Children() + if err != nil { + return nil, err + } + + sort.Sort(sort.Reverse(byName(children))) + return &Frame{ + stack: children, + }, nil +} + +// String returns the quoted names of the noders in the frame sorted in +// alphabeticall order by name, surrounded by square brackets and +// separated by comas. +// +// Examples: +// [] +// ["a", "b"] +func (f *Frame) String() string { + var buf bytes.Buffer + _ = buf.WriteByte('[') + + sep := "" + for i := f.Len() - 1; i >= 0; i-- { + _, _ = buf.WriteString(sep) + sep = ", " + _, _ = buf.WriteString(fmt.Sprintf("%q", f.stack[i].Name())) + } + + _ = buf.WriteByte(']') + + return buf.String() +} + +// First returns, but dont extract, the noder with the alphabetically +// smaller name in the frame and true if the frame was not empy. +// Otherwise it returns nil and false. +func (f *Frame) First() (noder.Noder, bool) { + if f.Len() == 0 { + return nil, false + } + + top := f.Len() - 1 + + return f.stack[top], true +} + +// Drop extracts the noder with the alphabetically smaller name in the +// frame or does nothing if the frame was empty. +func (f *Frame) Drop() { + if f.Len() == 0 { + return + } + + top := f.Len() - 1 + f.stack[top] = nil + f.stack = f.stack[:top] +} + +// Len returns the number of noders in the frame. +func (f *Frame) Len() int { + return len(f.stack) +} diff --git a/utils/merkletrie/internal/frame/frame_test.go b/utils/merkletrie/internal/frame/frame_test.go new file mode 100644 index 0000000..9cc0994 --- /dev/null +++ b/utils/merkletrie/internal/frame/frame_test.go @@ -0,0 +1,108 @@ +package frame + +import ( + "fmt" + "testing" + + "srcd.works/go-git.v4/utils/merkletrie/internal/fsnoder" + "srcd.works/go-git.v4/utils/merkletrie/noder" + + . "gopkg.in/check.v1" +) + +func Test(t *testing.T) { TestingT(t) } + +type FrameSuite struct{} + +var _ = Suite(&FrameSuite{}) + +func (s *FrameSuite) TestNewFrameFromEmptyDir(c *C) { + A, err := fsnoder.New("A()") + c.Assert(err, IsNil) + + frame, err := New(A) + c.Assert(err, IsNil) + + expectedString := `[]` + c.Assert(frame.String(), Equals, expectedString) + + first, ok := frame.First() + c.Assert(first, IsNil) + c.Assert(ok, Equals, false) + + first, ok = frame.First() + c.Assert(first, IsNil) + c.Assert(ok, Equals, false) + + l := frame.Len() + c.Assert(l, Equals, 0) +} + +func (s *FrameSuite) TestNewFrameFromNonEmpty(c *C) { + // _______A/________ + // | / \ | + // x y B/ C/ + // | + // z + root, err := fsnoder.New("A(x<> y<> B() C(z<>))") + c.Assert(err, IsNil) + frame, err := New(root) + c.Assert(err, IsNil) + + expectedString := `["B", "C", "x", "y"]` + c.Assert(frame.String(), Equals, expectedString) + + l := frame.Len() + c.Assert(l, Equals, 4) + + checkFirstAndDrop(c, frame, "B", true) + l = frame.Len() + c.Assert(l, Equals, 3) + + checkFirstAndDrop(c, frame, "C", true) + l = frame.Len() + c.Assert(l, Equals, 2) + + checkFirstAndDrop(c, frame, "x", true) + l = frame.Len() + c.Assert(l, Equals, 1) + + checkFirstAndDrop(c, frame, "y", true) + l = frame.Len() + c.Assert(l, Equals, 0) + + checkFirstAndDrop(c, frame, "", false) + l = frame.Len() + c.Assert(l, Equals, 0) + + checkFirstAndDrop(c, frame, "", false) +} + +func checkFirstAndDrop(c *C, f *Frame, expectedNodeName string, expectedOK bool) { + first, ok := f.First() + c.Assert(ok, Equals, expectedOK) + if expectedOK { + c.Assert(first.Name(), Equals, expectedNodeName) + } + + f.Drop() +} + +// a mock noder that returns error when Children() is called +type errorNoder struct{} + +func (e *errorNoder) Hash() []byte { return nil } +func (e *errorNoder) Name() string { return "" } +func (e *errorNoder) String() string { return "" } +func (e *errorNoder) IsDir() bool { return true } +func (e *errorNoder) Children() ([]noder.Noder, error) { + return nil, fmt.Errorf("mock error") +} +func (e *errorNoder) NumChildren() (int, error) { + return 0, fmt.Errorf("mock error") +} + +func (s *FrameSuite) TestNewFrameErrors(c *C) { + _, err := New(&errorNoder{}) + c.Assert(err, Not(IsNil)) +} diff --git a/utils/merkletrie/iter.go b/utils/merkletrie/iter.go new file mode 100644 index 0000000..44cba70 --- /dev/null +++ b/utils/merkletrie/iter.go @@ -0,0 +1,212 @@ +package merkletrie + +import ( + "fmt" + "io" + + "srcd.works/go-git.v4/utils/merkletrie/internal/frame" + "srcd.works/go-git.v4/utils/merkletrie/noder" +) + +// Iter is an iterator for merkletries (only the trie part of the +// merkletrie is relevant here, it does not use the Hasher interface). +// +// The iteration is performed in depth-first pre-order. Entries at each +// depth are traversed in (case-sensitive) alphabetical order. +// +// This is the kind of traversal you will expect when listing ordinary +// files and directories recursively, for example: +// +// Trie Traversal order +// ---- --------------- +// . +// / | \ c +// / | \ d/ +// d c z ===> d/a +// / \ d/b +// b a z +// +// +// This iterator is somewhat especial as you can chose to skip whole +// "directories" when iterating: +// +// - The Step method will iterate normally. +// +// - the Next method will not descend deeper into the tree. +// +// For example, if the iterator is at `d/`, the Step method will return +// `d/a` while the Next would have returned `z` instead (skipping `d/` +// and its descendants). The name of the these two methods are based on +// the well known "next" and "step" operations, quite common in +// debuggers, like gdb. +// +// The paths returned by the iterator will be relative, if the iterator +// was created from a single node, or absolute, if the iterator was +// created from the path to the node (the path will be prefixed to all +// returned paths). +type Iter struct { + // Tells if the iteration has started. + hasStarted bool + // The top of this stack has the current node and its siblings. The + // rest of the stack keeps the ancestors of the current node and + // their corresponding siblings. The current element is always the + // top element of the top frame. + // + // When "step"ping into a node, its children are pushed as a new + // frame. + // + // When "next"ing pass a node, the current element is dropped by + // popping the top frame. + frameStack []*frame.Frame + // The base path used to turn the relative paths used internally by + // the iterator into absolute paths used by external applications. + // For relative iterator this will be nil. + base noder.Path +} + +// NewIter returns a new relative iterator using the provider noder as +// its unnamed root. When iterating, all returned paths will be +// relative to node. +func NewIter(n noder.Noder) (*Iter, error) { + return newIter(n, nil) +} + +// NewIterFromPath returns a new absolute iterator from the noder at the +// end of the path p. When iterating, all returned paths will be +// absolute, using the root of the path p as their root. +func NewIterFromPath(p noder.Path) (*Iter, error) { + return newIter(p, p) // Path implements Noder +} + +func newIter(root noder.Noder, base noder.Path) (*Iter, error) { + ret := &Iter{ + base: base, + } + + frame, err := frame.New(root) + if err != nil { + return nil, err + } + ret.push(frame) + + return ret, nil +} + +func (iter *Iter) top() (*frame.Frame, bool) { + if len(iter.frameStack) == 0 { + return nil, false + } + top := len(iter.frameStack) - 1 + + return iter.frameStack[top], true +} + +func (iter *Iter) push(f *frame.Frame) { + iter.frameStack = append(iter.frameStack, f) +} + +const ( + doDescend = true + dontDescend = false +) + +// Next returns the path of the next node without descending deeper into +// the trie and nil. If there are no more entries in the trie it +// returns nil and io.EOF. In case of error, it will return nil and the +// error. +func (iter *Iter) Next() (noder.Path, error) { + return iter.advance(dontDescend) +} + +// Step returns the path to the next node in the trie, descending deeper +// into it if needed, and nil. If there are no more nodes in the trie, +// it returns nil and io.EOF. In case of error, it will return nil and +// the error. +func (iter *Iter) Step() (noder.Path, error) { + return iter.advance(doDescend) +} + +// Advances the iterator in the desired direction: descend or +// dontDescend. +// +// Returns the new current element and a nil error on success. If there +// are no more elements in the trie below the base, it returns nil, and +// io.EOF. Returns nil and an error in case of errors. +func (iter *Iter) advance(wantDescend bool) (noder.Path, error) { + current, err := iter.current() + if err != nil { + return nil, err + } + + // The first time we just return the current node. + if !iter.hasStarted { + iter.hasStarted = true + return current, nil + } + + // Advances means getting a next current node, either its first child or + // its next sibling, depending if we must descend or not. + numChildren, err := current.NumChildren() + if err != nil { + return nil, err + } + + mustDescend := numChildren != 0 && wantDescend + if mustDescend { + // descend: add a new frame with the current's children. + frame, err := frame.New(current) + if err != nil { + return nil, err + } + iter.push(frame) + } else { + // don't descend: just drop the current node + iter.drop() + } + + return iter.current() +} + +// Returns the path to the current node, adding the base if there was +// one, and a nil error. If there were no noders left, it returns nil +// and io.EOF. If an error occurred, it returns nil and the error. +func (iter *Iter) current() (noder.Path, error) { + if topFrame, ok := iter.top(); !ok { + return nil, io.EOF + } else if _, ok := topFrame.First(); !ok { + return nil, io.EOF + } + + ret := make(noder.Path, 0, len(iter.base)+len(iter.frameStack)) + + // concat the base... + ret = append(ret, iter.base...) + // ... and the current node and all its ancestors + for i, f := range iter.frameStack { + t, ok := f.First() + if !ok { + panic(fmt.Sprintf("frame %d is empty", i)) + } + ret = append(ret, t) + } + + return ret, nil +} + +// removes the current node if any, and all the frames that become empty as a +// consecuence of this action. +func (iter *Iter) drop() { + frame, ok := iter.top() + if !ok { + return + } + + frame.Drop() + // if the frame is empty, remove it and its parent, recursively + if frame.Len() == 0 { + top := len(iter.frameStack) - 1 + iter.frameStack[top] = nil + iter.frameStack = iter.frameStack[:top] + iter.drop() + } +} diff --git a/utils/merkletrie/iter_test.go b/utils/merkletrie/iter_test.go new file mode 100644 index 0000000..52d567a --- /dev/null +++ b/utils/merkletrie/iter_test.go @@ -0,0 +1,462 @@ +package merkletrie_test + +import ( + "fmt" + "io" + "strings" + "testing" + + "srcd.works/go-git.v4/utils/merkletrie" + "srcd.works/go-git.v4/utils/merkletrie/internal/fsnoder" + "srcd.works/go-git.v4/utils/merkletrie/noder" + + . "gopkg.in/check.v1" +) + +func Test(t *testing.T) { TestingT(t) } + +type IterSuite struct{} + +var _ = Suite(&IterSuite{}) + +// A test is a list of operations we want to perform on an iterator and +// their expected results. +// +// The operations are expresed as a sequence of `n` and `s`, +// representing the amount of next and step operations we want to call +// on the iterator and their order. For example, an operations value of +// "nns" means: call a `n`ext, then another `n`ext and finish with a +// `s`tep. +// +// The expeced is the full path of the noders returned by the +// operations, separated by spaces. +// +// For instance: +// +// t := test{ +// operations: "ns", +// expected: "a a/b" +// } +// +// means: +// +// - the first iterator operation has to be Next, and it must return a +// node called "a" with no ancestors. +// +// - the second operation has to be Step, and it must return a node +// called "b" with a single ancestor called "a". +type test struct { + operations string + expected string +} + +// Runs a test on the provided iterator, checking that the names of the +// returned values are correct. If not, the treeDescription value is +// printed along with information about mismatch. +func (t test) run(c *C, iter *merkletrie.Iter, + treeDescription string, testNumber int) { + + expectedChunks := strings.Split(t.expected, " ") + if t.expected == "" { + expectedChunks = []string{} + } + + if len(t.operations) < len(expectedChunks) { + c.Fatalf("malformed test %d: not enough operations", testNumber) + return + } + + var obtained noder.Path + var err error + for i := range t.operations { + comment := Commentf("\ntree: %q\ntest #%d (%q)\noperation #%d (%q)", + treeDescription, testNumber, t.operations, i, t.operations[i]) + + switch t.operations[i] { + case 'n': + obtained, err = iter.Next() + if err != io.EOF { + c.Assert(err, IsNil) + } + case 's': + obtained, err = iter.Step() + if err != io.EOF { + c.Assert(err, IsNil) + } + default: + c.Fatalf("unknown operation at test %d, operation %d (%s)\n", + testNumber, i, t.operations[i]) + } + if i >= len(expectedChunks) { + c.Assert(err, Equals, io.EOF, comment) + continue + } + + c.Assert(err, IsNil, comment) + c.Assert(obtained.String(), Equals, expectedChunks[i], comment) + } +} + +// A testsCollection value represents a tree and a collection of tests +// we want to perfrom on iterators of that tree. +// +// Example: +// +// . +// | +// --------- +// | | | +// a b c +// | +// z +// +// var foo testCollection = { +// tree: "(a<> b(z<>) c<>)" +// tests: []test{ +// {operations: "nns", expected: "a b b/z"}, +// {operations: "nnn", expected: "a b c"}, +// }, +// } +// +// A new iterator will be build for each test. +type testsCollection struct { + tree string // a fsnoder description of a tree. + tests []test // the collection of tests we want to run +} + +// Executes all the tests in a testsCollection. +func (tc testsCollection) run(c *C) { + root, err := fsnoder.New(tc.tree) + c.Assert(err, IsNil) + + for i, t := range tc.tests { + iter, err := merkletrie.NewIter(root) + c.Assert(err, IsNil) + t.run(c, iter, root.String(), i) + } +} + +func (s *IterSuite) TestEmptyNamedDir(c *C) { + tc := testsCollection{ + tree: "A()", + tests: []test{ + {operations: "n", expected: ""}, + {operations: "nn", expected: ""}, + {operations: "nnn", expected: ""}, + {operations: "nnns", expected: ""}, + {operations: "nnnssnsnns", expected: ""}, + {operations: "s", expected: ""}, + {operations: "ss", expected: ""}, + {operations: "sss", expected: ""}, + {operations: "sssn", expected: ""}, + {operations: "sssnnsnssn", expected: ""}, + }, + } + tc.run(c) +} + +func (s *IterSuite) TestEmptyUnnamedDir(c *C) { + tc := testsCollection{ + tree: "()", + tests: []test{ + {operations: "n", expected: ""}, + {operations: "nn", expected: ""}, + {operations: "nnn", expected: ""}, + {operations: "nnns", expected: ""}, + {operations: "nnnssnsnns", expected: ""}, + {operations: "s", expected: ""}, + {operations: "ss", expected: ""}, + {operations: "sss", expected: ""}, + {operations: "sssn", expected: ""}, + {operations: "sssnnsnssn", expected: ""}, + }, + } + tc.run(c) +} + +func (s *IterSuite) TestOneFile(c *C) { + tc := testsCollection{ + tree: "(a<>)", + tests: []test{ + {operations: "n", expected: "a"}, + {operations: "nn", expected: "a"}, + {operations: "nnn", expected: "a"}, + {operations: "nnns", expected: "a"}, + {operations: "nnnssnsnns", expected: "a"}, + {operations: "s", expected: "a"}, + {operations: "ss", expected: "a"}, + {operations: "sss", expected: "a"}, + {operations: "sssn", expected: "a"}, + {operations: "sssnnsnssn", expected: "a"}, + }, + } + tc.run(c) +} + +// root +// / \ +// a b +func (s *IterSuite) TestTwoFiles(c *C) { + tc := testsCollection{ + tree: "(a<> b<>)", + tests: []test{ + {operations: "nnn", expected: "a b"}, + {operations: "nns", expected: "a b"}, + {operations: "nsn", expected: "a b"}, + {operations: "nss", expected: "a b"}, + {operations: "snn", expected: "a b"}, + {operations: "sns", expected: "a b"}, + {operations: "ssn", expected: "a b"}, + {operations: "sss", expected: "a b"}, + }, + } + tc.run(c) +} + +// root +// | +// a +// | +// b +func (s *IterSuite) TestDirWithFile(c *C) { + tc := testsCollection{ + tree: "(a(b<>))", + tests: []test{ + {operations: "nnn", expected: "a"}, + {operations: "nns", expected: "a"}, + {operations: "nsn", expected: "a a/b"}, + {operations: "nss", expected: "a a/b"}, + {operations: "snn", expected: "a"}, + {operations: "sns", expected: "a"}, + {operations: "ssn", expected: "a a/b"}, + {operations: "sss", expected: "a a/b"}, + }, + } + tc.run(c) +} + +// root +// /|\ +// c a b +func (s *IterSuite) TestThreeSiblings(c *C) { + tc := testsCollection{ + tree: "(c<> a<> b<>)", + tests: []test{ + {operations: "nnnn", expected: "a b c"}, + {operations: "nnns", expected: "a b c"}, + {operations: "nnsn", expected: "a b c"}, + {operations: "nnss", expected: "a b c"}, + {operations: "nsnn", expected: "a b c"}, + {operations: "nsns", expected: "a b c"}, + {operations: "nssn", expected: "a b c"}, + {operations: "nsss", expected: "a b c"}, + {operations: "snnn", expected: "a b c"}, + {operations: "snns", expected: "a b c"}, + {operations: "snsn", expected: "a b c"}, + {operations: "snss", expected: "a b c"}, + {operations: "ssnn", expected: "a b c"}, + {operations: "ssns", expected: "a b c"}, + {operations: "sssn", expected: "a b c"}, + {operations: "ssss", expected: "a b c"}, + }, + } + tc.run(c) +} + +// root +// | +// b +// | +// c +// | +// a +func (s *IterSuite) TestThreeVertical(c *C) { + tc := testsCollection{ + tree: "(b(c(a())))", + tests: []test{ + {operations: "nnnn", expected: "b"}, + {operations: "nnns", expected: "b"}, + {operations: "nnsn", expected: "b"}, + {operations: "nnss", expected: "b"}, + {operations: "nsnn", expected: "b b/c"}, + {operations: "nsns", expected: "b b/c"}, + {operations: "nssn", expected: "b b/c b/c/a"}, + {operations: "nsss", expected: "b b/c b/c/a"}, + {operations: "snnn", expected: "b"}, + {operations: "snns", expected: "b"}, + {operations: "snsn", expected: "b"}, + {operations: "snss", expected: "b"}, + {operations: "ssnn", expected: "b b/c"}, + {operations: "ssns", expected: "b b/c"}, + {operations: "sssn", expected: "b b/c b/c/a"}, + {operations: "ssss", expected: "b b/c b/c/a"}, + }, + } + tc.run(c) +} + +// root +// / \ +// c a +// | +// b +func (s *IterSuite) TestThreeMix1(c *C) { + tc := testsCollection{ + tree: "(c(b<>) a<>)", + tests: []test{ + {operations: "nnnn", expected: "a c"}, + {operations: "nnns", expected: "a c"}, + {operations: "nnsn", expected: "a c c/b"}, + {operations: "nnss", expected: "a c c/b"}, + {operations: "nsnn", expected: "a c"}, + {operations: "nsns", expected: "a c"}, + {operations: "nssn", expected: "a c c/b"}, + {operations: "nsss", expected: "a c c/b"}, + {operations: "snnn", expected: "a c"}, + {operations: "snns", expected: "a c"}, + {operations: "snsn", expected: "a c c/b"}, + {operations: "snss", expected: "a c c/b"}, + {operations: "ssnn", expected: "a c"}, + {operations: "ssns", expected: "a c"}, + {operations: "sssn", expected: "a c c/b"}, + {operations: "ssss", expected: "a c c/b"}, + }, + } + tc.run(c) +} + +// root +// / \ +// b a +// | +// c +func (s *IterSuite) TestThreeMix2(c *C) { + tc := testsCollection{ + tree: "(b() a(c<>))", + tests: []test{ + {operations: "nnnn", expected: "a b"}, + {operations: "nnns", expected: "a b"}, + {operations: "nnsn", expected: "a b"}, + {operations: "nnss", expected: "a b"}, + {operations: "nsnn", expected: "a a/c b"}, + {operations: "nsns", expected: "a a/c b"}, + {operations: "nssn", expected: "a a/c b"}, + {operations: "nsss", expected: "a a/c b"}, + {operations: "snnn", expected: "a b"}, + {operations: "snns", expected: "a b"}, + {operations: "snsn", expected: "a b"}, + {operations: "snss", expected: "a b"}, + {operations: "ssnn", expected: "a a/c b"}, + {operations: "ssns", expected: "a a/c b"}, + {operations: "sssn", expected: "a a/c b"}, + {operations: "ssss", expected: "a a/c b"}, + }, + } + tc.run(c) +} + +// root +// / | \ +// / | ---- +// f d h -------- +// /\ / \ | +// e a j b/ g +// | / \ | +// l n k icm +// | +// o +// | +// p/ +func (s *IterSuite) TestCrazy(c *C) { + tc := testsCollection{ + tree: "(f(e(l<>) a(n(o(p())) k<>)) d<> h(j(i<> c<> m<>) b() g<>))", + tests: []test{ + {operations: "nnnnn", expected: "d f h"}, + {operations: "nnnns", expected: "d f h"}, + {operations: "nnnsn", expected: "d f h h/b h/g"}, + {operations: "nnnss", expected: "d f h h/b h/g"}, + {operations: "nnsnn", expected: "d f f/a f/e h"}, + {operations: "nnsns", expected: "d f f/a f/e f/e/l"}, + {operations: "nnssn", expected: "d f f/a f/a/k f/a/n"}, + {operations: "nnsss", expected: "d f f/a f/a/k f/a/n"}, + {operations: "nsnnn", expected: "d f h"}, + {operations: "nsnns", expected: "d f h"}, + {operations: "nsnsn", expected: "d f h h/b h/g"}, + {operations: "nsnss", expected: "d f h h/b h/g"}, + {operations: "nssnn", expected: "d f f/a f/e h"}, + }, + } + tc.run(c) +} + +// . +// | +// a +// | +// b +// / \ +// z h +// / \ +// d e +// | +// f +func (s *IterSuite) TestNewIterFromPath(c *C) { + tree, err := fsnoder.New("(a(b(z(d<> e(f<>)) h<>)))") + c.Assert(err, IsNil) + + z := find(c, tree, "z") + + iter, err := merkletrie.NewIterFromPath(z) + c.Assert(err, IsNil) + + n, err := iter.Next() + c.Assert(err, IsNil) + c.Assert(n.String(), Equals, "a/b/z/d") + + n, err = iter.Next() + c.Assert(err, IsNil) + c.Assert(n.String(), Equals, "a/b/z/e") + + n, err = iter.Step() + c.Assert(err, IsNil) + c.Assert(n.String(), Equals, "a/b/z/e/f") + + _, err = iter.Step() + c.Assert(err, Equals, io.EOF) +} + +func find(c *C, tree noder.Noder, name string) noder.Path { + iter, err := merkletrie.NewIter(tree) + c.Assert(err, IsNil) + + for { + current, err := iter.Step() + if err != io.EOF { + c.Assert(err, IsNil) + } else { + c.Fatalf("node %s not found in tree %s", name, tree) + } + + if current.Name() == name { + return current + } + } +} + +type errorNoder struct{} + +func (e *errorNoder) Name() string { return "" } +func (e *errorNoder) String() string { return "" } +func (e *errorNoder) Hash() []byte { return nil } +func (e *errorNoder) IsDir() bool { return true } +func (e *errorNoder) Children() ([]noder.Noder, error) { + return nil, fmt.Errorf("mock error") +} +func (e *errorNoder) NumChildren() (int, error) { + return 0, fmt.Errorf("mock error") +} + +func (s *IterSuite) TestNewIterFailsOnChildrenErrors(c *C) { + _, err := merkletrie.NewIter(&errorNoder{}) + c.Assert(err, Not(IsNil)) +} |