aboutsummaryrefslogtreecommitdiffstats
path: root/internal/revision/parser.go
diff options
context:
space:
mode:
authorAnthony HAMON <antham@users.noreply.github.com>2017-02-06 17:53:11 +0100
committerMáximo Cuadros <mcuadros@gmail.com>2017-02-06 17:53:11 +0100
commitd0cf20797464ab12b41ccb5c603f67884a6e8e17 (patch)
tree4faacd3a2f7c816c36adde83bfb22576a01e2fe8 /internal/revision/parser.go
parent41e5a1ff6f16ec13fa1cebe6e1d872bfe0951cee (diff)
downloadgo-git-d0cf20797464ab12b41ccb5c603f67884a6e8e17.tar.gz
Add revision implementation (#139)
Diffstat (limited to 'internal/revision/parser.go')
-rw-r--r--internal/revision/parser.go622
1 files changed, 622 insertions, 0 deletions
diff --git a/internal/revision/parser.go b/internal/revision/parser.go
new file mode 100644
index 0000000..b45a6d8
--- /dev/null
+++ b/internal/revision/parser.go
@@ -0,0 +1,622 @@
+// Package revision extracts git revision from string
+// More informations about revision : https://www.kernel.org/pub/software/scm/git/docs/gitrevisions.html
+package revision
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "regexp"
+ "strconv"
+ "time"
+)
+
+// ErrInvalidRevision is emitted if string doesn't match valid revision
+type ErrInvalidRevision struct {
+ s string
+}
+
+func (e *ErrInvalidRevision) Error() string {
+ return "Revision invalid : " + e.s
+}
+
+// Revisioner represents a revision component.
+// A revision is made of multiple revision components
+// obtained after parsing a revision string,
+// for instance revision "master~" will be converted in
+// two revision components Ref and TildePath
+type Revisioner interface {
+}
+
+// Ref represents a reference name : HEAD, master
+type Ref string
+
+// TildePath represents ~, ~{n}
+type TildePath struct {
+ Depth int
+}
+
+// CaretPath represents ^, ^{n}
+type CaretPath struct {
+ Depth int
+}
+
+// CaretReg represents ^{/foo bar}
+type CaretReg struct {
+ Regexp *regexp.Regexp
+ Negate bool
+}
+
+// CaretType represents ^{commit}
+type CaretType struct {
+ ObjectType string
+}
+
+// AtReflog represents @{n}
+type AtReflog struct {
+ Depth int
+}
+
+// AtCheckout represents @{-n}
+type AtCheckout struct {
+ Depth int
+}
+
+// AtUpstream represents @{upstream}, @{u}
+type AtUpstream struct {
+ BranchName string
+}
+
+// AtPush represents @{push}
+type AtPush struct {
+ BranchName string
+}
+
+// AtDate represents @{"2006-01-02T15:04:05Z"}
+type AtDate struct {
+ Date time.Time
+}
+
+// ColonReg represents :/foo bar
+type ColonReg struct {
+ Regexp *regexp.Regexp
+ Negate bool
+}
+
+// ColonPath represents :./<path> :<path>
+type ColonPath struct {
+ Path string
+}
+
+// ColonStagePath represents :<n>:/<path>
+type ColonStagePath struct {
+ Path string
+ Stage int
+}
+
+// Parser represents a parser
+// use to tokenize and transform to revisioner chunks
+// a given string
+type Parser struct {
+ s *scanner
+ currentParsedChar struct {
+ tok token
+ lit string
+ }
+ unreadLastChar bool
+}
+
+// NewParserFromString returns a new instance of parser from a string.
+func NewParserFromString(s string) *Parser {
+ return NewParser(bytes.NewBufferString(s))
+}
+
+// NewParser returns a new instance of parser.
+func NewParser(r io.Reader) *Parser {
+ return &Parser{s: newScanner(r)}
+}
+
+// scan returns the next token from the underlying scanner
+// or the last scanned token if an unscan was requested
+func (p *Parser) scan() (token, string, error) {
+ if p.unreadLastChar {
+ p.unreadLastChar = false
+ return p.currentParsedChar.tok, p.currentParsedChar.lit, nil
+ }
+
+ tok, lit, err := p.s.scan()
+
+ p.currentParsedChar.tok, p.currentParsedChar.lit = tok, lit
+
+ return tok, lit, err
+}
+
+// unscan pushes the previously read token back onto the buffer.
+func (p *Parser) unscan() { p.unreadLastChar = true }
+
+// Parse explode a revision string into revisioner chunks
+func (p *Parser) Parse() ([]Revisioner, error) {
+ var rev Revisioner
+ var revs []Revisioner
+ var tok token
+ var err error
+
+ for {
+ tok, _, err = p.scan()
+
+ if err != nil {
+ return nil, err
+ }
+
+ switch tok {
+ case at:
+ rev, err = p.parseAt()
+ case tilde:
+ rev, err = p.parseTilde()
+ case caret:
+ rev, err = p.parseCaret()
+ case colon:
+ rev, err = p.parseColon()
+ case eof:
+ err = p.validateFullRevision(&revs)
+
+ if err != nil {
+ return []Revisioner{}, err
+ }
+
+ return revs, nil
+ default:
+ p.unscan()
+ rev, err = p.parseRef()
+ }
+
+ if err != nil {
+ return []Revisioner{}, err
+ }
+
+ revs = append(revs, rev)
+ }
+}
+
+// validateFullRevision ensures all revisioner chunks make a valid revision
+func (p *Parser) validateFullRevision(chunks *[]Revisioner) error {
+ var hasReference bool
+
+ for i, chunk := range *chunks {
+ switch chunk.(type) {
+ case Ref:
+ if i == 0 {
+ hasReference = true
+ } else {
+ return &ErrInvalidRevision{`reference must be defined once at the beginning`}
+ }
+ case AtDate:
+ if len(*chunks) == 1 || hasReference && len(*chunks) == 2 {
+ return nil
+ }
+
+ return &ErrInvalidRevision{`"@" statement is not valid, could be : <refname>@{<ISO-8601 date>}, @{<ISO-8601 date>}`}
+ case AtReflog:
+ if len(*chunks) == 1 || hasReference && len(*chunks) == 2 {
+ return nil
+ }
+
+ return &ErrInvalidRevision{`"@" statement is not valid, could be : <refname>@{<n>}, @{<n>}`}
+ case AtCheckout:
+ if len(*chunks) == 1 {
+ return nil
+ }
+
+ return &ErrInvalidRevision{`"@" statement is not valid, could be : @{-<n>}`}
+ case AtUpstream:
+ if len(*chunks) == 1 || hasReference && len(*chunks) == 2 {
+ return nil
+ }
+
+ return &ErrInvalidRevision{`"@" statement is not valid, could be : <refname>@{upstream}, @{upstream}, <refname>@{u}, @{u}`}
+ case AtPush:
+ if len(*chunks) == 1 || hasReference && len(*chunks) == 2 {
+ return nil
+ }
+
+ return &ErrInvalidRevision{`"@" statement is not valid, could be : <refname>@{push}, @{push}`}
+ case TildePath, CaretPath, CaretReg:
+ if !hasReference {
+ return &ErrInvalidRevision{`"~" or "^" statement must have a reference defined at the beginning`}
+ }
+ case ColonReg:
+ if len(*chunks) == 1 {
+ return nil
+ }
+
+ return &ErrInvalidRevision{`":" statement is not valid, could be : :/<regexp>`}
+ case ColonPath:
+ if i == len(*chunks)-1 && hasReference || len(*chunks) == 1 {
+ return nil
+ }
+
+ return &ErrInvalidRevision{`":" statement is not valid, could be : <revision>:<path>`}
+ case ColonStagePath:
+ if len(*chunks) == 1 {
+ return nil
+ }
+
+ return &ErrInvalidRevision{`":" statement is not valid, could be : :<n>:<path>`}
+ }
+ }
+
+ return nil
+}
+
+// parseAt extract @ statements
+func (p *Parser) parseAt() (Revisioner, error) {
+ var tok, nextTok token
+ var lit, nextLit string
+ var err error
+
+ tok, lit, err = p.scan()
+
+ if err != nil {
+ return nil, err
+ }
+
+ if tok != obrace {
+ p.unscan()
+
+ return Ref("HEAD"), nil
+ }
+
+ tok, lit, err = p.scan()
+
+ if err != nil {
+ return nil, err
+ }
+
+ nextTok, nextLit, err = p.scan()
+
+ if err != nil {
+ return nil, err
+ }
+
+ switch {
+ case tok == word && (lit == "u" || lit == "upstream") && nextTok == cbrace:
+ return AtUpstream{}, nil
+ case tok == word && lit == "push" && nextTok == cbrace:
+ return AtPush{}, nil
+ case tok == number && nextTok == cbrace:
+ n, _ := strconv.Atoi(lit)
+
+ return AtReflog{n}, nil
+ case tok == minus && nextTok == number:
+ n, _ := strconv.Atoi(nextLit)
+
+ t, _, err := p.scan()
+
+ if err != nil {
+ return nil, err
+ }
+
+ if t != cbrace {
+ return nil, &ErrInvalidRevision{fmt.Sprintf(`missing "}" in @{-n} structure`)}
+ }
+
+ return AtCheckout{n}, nil
+ default:
+ p.unscan()
+
+ date := lit
+
+ for {
+ tok, lit, err = p.scan()
+
+ if err != nil {
+ return nil, err
+ }
+
+ switch {
+ case tok == cbrace:
+ t, err := time.Parse("2006-01-02T15:04:05Z", date)
+
+ if err != nil {
+ return nil, &ErrInvalidRevision{fmt.Sprintf(`wrong date "%s" must fit ISO-8601 format : 2006-01-02T15:04:05Z`, date)}
+ }
+
+ return AtDate{t}, nil
+ default:
+ date += lit
+ }
+ }
+ }
+}
+
+// parseTilde extract ~ statements
+func (p *Parser) parseTilde() (Revisioner, error) {
+ var tok token
+ var lit string
+ var err error
+
+ tok, lit, err = p.scan()
+
+ if err != nil {
+ return nil, err
+ }
+
+ switch {
+ case tok == number:
+ n, _ := strconv.Atoi(lit)
+
+ return TildePath{n}, nil
+ default:
+ p.unscan()
+ return TildePath{1}, nil
+ }
+}
+
+// parseCaret extract ^ statements
+func (p *Parser) parseCaret() (Revisioner, error) {
+ var tok token
+ var lit string
+ var err error
+
+ tok, lit, err = p.scan()
+
+ if err != nil {
+ return nil, err
+ }
+
+ switch {
+ case tok == obrace:
+ r, err := p.parseCaretBraces()
+
+ if err != nil {
+ return nil, err
+ }
+
+ return r, nil
+ case tok == number:
+ n, _ := strconv.Atoi(lit)
+
+ if n > 2 {
+ return nil, &ErrInvalidRevision{fmt.Sprintf(`"%s" found must be 0, 1 or 2 after "^"`, lit)}
+ }
+
+ return CaretPath{n}, nil
+ default:
+ p.unscan()
+ return CaretPath{1}, nil
+ }
+}
+
+// parseCaretBraces extract ^{<data>} statements
+func (p *Parser) parseCaretBraces() (Revisioner, error) {
+ var tok, nextTok token
+ var lit, _ string
+ start := true
+ var re string
+ var negate bool
+ var err error
+
+ for {
+ tok, lit, err = p.scan()
+
+ if err != nil {
+ return nil, err
+ }
+
+ nextTok, _, err = p.scan()
+
+ if err != nil {
+ return nil, err
+ }
+
+ switch {
+ case tok == word && nextTok == cbrace && (lit == "commit" || lit == "tree" || lit == "blob" || lit == "tag" || lit == "object"):
+ return CaretType{lit}, nil
+ case re == "" && tok == cbrace:
+ return CaretType{"tag"}, nil
+ case re == "" && tok == emark && nextTok == emark:
+ re += lit
+ case re == "" && tok == emark && nextTok == minus:
+ negate = true
+ case re == "" && tok == emark:
+ return nil, &ErrInvalidRevision{fmt.Sprintf(`revision suffix brace component sequences starting with "/!" others than those defined are reserved`)}
+ case re == "" && tok == slash:
+ p.unscan()
+ case tok != slash && start:
+ return nil, &ErrInvalidRevision{fmt.Sprintf(`"%s" is not a valid revision suffix brace component`, lit)}
+ case tok != cbrace:
+ p.unscan()
+ re += lit
+ case tok == cbrace:
+ p.unscan()
+
+ reg, err := regexp.Compile(re)
+
+ if err != nil {
+ return CaretReg{}, &ErrInvalidRevision{fmt.Sprintf(`revision suffix brace component, %s`, err.Error())}
+ }
+
+ return CaretReg{reg, negate}, nil
+ }
+
+ start = false
+ }
+}
+
+// parseColon extract : statements
+func (p *Parser) parseColon() (Revisioner, error) {
+ var tok token
+ var err error
+
+ tok, _, err = p.scan()
+
+ if err != nil {
+ return nil, err
+ }
+
+ switch tok {
+ case slash:
+ return p.parseColonSlash()
+ default:
+ p.unscan()
+ return p.parseColonDefault()
+ }
+}
+
+// parseColonSlash extract :/<data> statements
+func (p *Parser) parseColonSlash() (Revisioner, error) {
+ var tok, nextTok token
+ var lit string
+ var re string
+ var negate bool
+ var err error
+
+ for {
+ tok, lit, err = p.scan()
+
+ if err != nil {
+ return nil, err
+ }
+
+ nextTok, _, err = p.scan()
+
+ if err != nil {
+ return nil, err
+ }
+
+ switch {
+ case tok == emark && nextTok == emark:
+ re += lit
+ case re == "" && tok == emark && nextTok == minus:
+ negate = true
+ case re == "" && tok == emark:
+ return nil, &ErrInvalidRevision{fmt.Sprintf(`revision suffix brace component sequences starting with "/!" others than those defined are reserved`)}
+ case tok == eof:
+ p.unscan()
+ reg, err := regexp.Compile(re)
+
+ if err != nil {
+ return ColonReg{}, &ErrInvalidRevision{fmt.Sprintf(`revision suffix brace component, %s`, err.Error())}
+ }
+
+ return ColonReg{reg, negate}, nil
+ default:
+ p.unscan()
+ re += lit
+ }
+ }
+}
+
+// parseColonDefault extract :<data> statements
+func (p *Parser) parseColonDefault() (Revisioner, error) {
+ var tok token
+ var lit string
+ var path string
+ var stage int
+ var err error
+ var n = -1
+
+ tok, lit, err = p.scan()
+
+ if err != nil {
+ return nil, err
+ }
+
+ nextTok, _, err := p.scan()
+
+ if err != nil {
+ return nil, err
+ }
+
+ if tok == number && nextTok == colon {
+ n, _ = strconv.Atoi(lit)
+ }
+
+ switch n {
+ case 0, 1, 2, 3:
+ stage = n
+ default:
+ path += lit
+ p.unscan()
+ }
+
+ for {
+ tok, lit, err = p.scan()
+
+ if err != nil {
+ return nil, err
+ }
+
+ switch {
+ case tok == eof && n == -1:
+ return ColonPath{path}, nil
+ case tok == eof:
+ return ColonStagePath{path, stage}, nil
+ default:
+ path += lit
+ }
+ }
+}
+
+// parseRef extract reference name
+func (p *Parser) parseRef() (Revisioner, error) {
+ var tok, prevTok token
+ var lit, buf string
+ var endOfRef bool
+ var err error
+
+ for {
+ tok, lit, err = p.scan()
+
+ if err != nil {
+ return nil, err
+ }
+
+ switch tok {
+ case eof, at, colon, tilde, caret:
+ endOfRef = true
+ }
+
+ err := p.checkRefFormat(tok, lit, prevTok, buf, endOfRef)
+
+ if err != nil {
+ return "", err
+ }
+
+ if endOfRef {
+ p.unscan()
+ return Ref(buf), nil
+ }
+
+ buf += lit
+ prevTok = tok
+ }
+}
+
+// checkRefFormat ensure reference name follow rules defined here :
+// https://git-scm.com/docs/git-check-ref-format
+func (p *Parser) checkRefFormat(token token, literal string, previousToken token, buffer string, endOfRef bool) error {
+ switch token {
+ case aslash, space, control, qmark, asterisk, obracket:
+ return &ErrInvalidRevision{fmt.Sprintf(`must not contains "%s"`, literal)}
+ }
+
+ switch {
+ case (token == dot || token == slash) && buffer == "":
+ return &ErrInvalidRevision{fmt.Sprintf(`must not start with "%s"`, literal)}
+ case previousToken == slash && endOfRef:
+ return &ErrInvalidRevision{`must not end with "/"`}
+ case previousToken == dot && endOfRef:
+ return &ErrInvalidRevision{`must not end with "."`}
+ case token == dot && previousToken == slash:
+ return &ErrInvalidRevision{`must not contains "/."`}
+ case previousToken == dot && token == dot:
+ return &ErrInvalidRevision{`must not contains ".."`}
+ case previousToken == slash && token == slash:
+ return &ErrInvalidRevision{`must not contains consecutively "/"`}
+ case (token == slash || endOfRef) && len(buffer) > 4 && buffer[len(buffer)-5:] == ".lock":
+ return &ErrInvalidRevision{"cannot end with .lock"}
+ }
+
+ return nil
+}