diff options
Diffstat (limited to 'internal/revision/parser.go')
-rw-r--r-- | internal/revision/parser.go | 622 |
1 files changed, 622 insertions, 0 deletions
diff --git a/internal/revision/parser.go b/internal/revision/parser.go new file mode 100644 index 0000000..b45a6d8 --- /dev/null +++ b/internal/revision/parser.go @@ -0,0 +1,622 @@ +// Package revision extracts git revision from string +// More informations about revision : https://www.kernel.org/pub/software/scm/git/docs/gitrevisions.html +package revision + +import ( + "bytes" + "fmt" + "io" + "regexp" + "strconv" + "time" +) + +// ErrInvalidRevision is emitted if string doesn't match valid revision +type ErrInvalidRevision struct { + s string +} + +func (e *ErrInvalidRevision) Error() string { + return "Revision invalid : " + e.s +} + +// Revisioner represents a revision component. +// A revision is made of multiple revision components +// obtained after parsing a revision string, +// for instance revision "master~" will be converted in +// two revision components Ref and TildePath +type Revisioner interface { +} + +// Ref represents a reference name : HEAD, master +type Ref string + +// TildePath represents ~, ~{n} +type TildePath struct { + Depth int +} + +// CaretPath represents ^, ^{n} +type CaretPath struct { + Depth int +} + +// CaretReg represents ^{/foo bar} +type CaretReg struct { + Regexp *regexp.Regexp + Negate bool +} + +// CaretType represents ^{commit} +type CaretType struct { + ObjectType string +} + +// AtReflog represents @{n} +type AtReflog struct { + Depth int +} + +// AtCheckout represents @{-n} +type AtCheckout struct { + Depth int +} + +// AtUpstream represents @{upstream}, @{u} +type AtUpstream struct { + BranchName string +} + +// AtPush represents @{push} +type AtPush struct { + BranchName string +} + +// AtDate represents @{"2006-01-02T15:04:05Z"} +type AtDate struct { + Date time.Time +} + +// ColonReg represents :/foo bar +type ColonReg struct { + Regexp *regexp.Regexp + Negate bool +} + +// ColonPath represents :./<path> :<path> +type ColonPath struct { + Path string +} + +// ColonStagePath represents :<n>:/<path> +type ColonStagePath struct { + Path string + Stage int +} + +// Parser represents a parser +// use to tokenize and transform to revisioner chunks +// a given string +type Parser struct { + s *scanner + currentParsedChar struct { + tok token + lit string + } + unreadLastChar bool +} + +// NewParserFromString returns a new instance of parser from a string. +func NewParserFromString(s string) *Parser { + return NewParser(bytes.NewBufferString(s)) +} + +// NewParser returns a new instance of parser. +func NewParser(r io.Reader) *Parser { + return &Parser{s: newScanner(r)} +} + +// scan returns the next token from the underlying scanner +// or the last scanned token if an unscan was requested +func (p *Parser) scan() (token, string, error) { + if p.unreadLastChar { + p.unreadLastChar = false + return p.currentParsedChar.tok, p.currentParsedChar.lit, nil + } + + tok, lit, err := p.s.scan() + + p.currentParsedChar.tok, p.currentParsedChar.lit = tok, lit + + return tok, lit, err +} + +// unscan pushes the previously read token back onto the buffer. +func (p *Parser) unscan() { p.unreadLastChar = true } + +// Parse explode a revision string into revisioner chunks +func (p *Parser) Parse() ([]Revisioner, error) { + var rev Revisioner + var revs []Revisioner + var tok token + var err error + + for { + tok, _, err = p.scan() + + if err != nil { + return nil, err + } + + switch tok { + case at: + rev, err = p.parseAt() + case tilde: + rev, err = p.parseTilde() + case caret: + rev, err = p.parseCaret() + case colon: + rev, err = p.parseColon() + case eof: + err = p.validateFullRevision(&revs) + + if err != nil { + return []Revisioner{}, err + } + + return revs, nil + default: + p.unscan() + rev, err = p.parseRef() + } + + if err != nil { + return []Revisioner{}, err + } + + revs = append(revs, rev) + } +} + +// validateFullRevision ensures all revisioner chunks make a valid revision +func (p *Parser) validateFullRevision(chunks *[]Revisioner) error { + var hasReference bool + + for i, chunk := range *chunks { + switch chunk.(type) { + case Ref: + if i == 0 { + hasReference = true + } else { + return &ErrInvalidRevision{`reference must be defined once at the beginning`} + } + case AtDate: + if len(*chunks) == 1 || hasReference && len(*chunks) == 2 { + return nil + } + + return &ErrInvalidRevision{`"@" statement is not valid, could be : <refname>@{<ISO-8601 date>}, @{<ISO-8601 date>}`} + case AtReflog: + if len(*chunks) == 1 || hasReference && len(*chunks) == 2 { + return nil + } + + return &ErrInvalidRevision{`"@" statement is not valid, could be : <refname>@{<n>}, @{<n>}`} + case AtCheckout: + if len(*chunks) == 1 { + return nil + } + + return &ErrInvalidRevision{`"@" statement is not valid, could be : @{-<n>}`} + case AtUpstream: + if len(*chunks) == 1 || hasReference && len(*chunks) == 2 { + return nil + } + + return &ErrInvalidRevision{`"@" statement is not valid, could be : <refname>@{upstream}, @{upstream}, <refname>@{u}, @{u}`} + case AtPush: + if len(*chunks) == 1 || hasReference && len(*chunks) == 2 { + return nil + } + + return &ErrInvalidRevision{`"@" statement is not valid, could be : <refname>@{push}, @{push}`} + case TildePath, CaretPath, CaretReg: + if !hasReference { + return &ErrInvalidRevision{`"~" or "^" statement must have a reference defined at the beginning`} + } + case ColonReg: + if len(*chunks) == 1 { + return nil + } + + return &ErrInvalidRevision{`":" statement is not valid, could be : :/<regexp>`} + case ColonPath: + if i == len(*chunks)-1 && hasReference || len(*chunks) == 1 { + return nil + } + + return &ErrInvalidRevision{`":" statement is not valid, could be : <revision>:<path>`} + case ColonStagePath: + if len(*chunks) == 1 { + return nil + } + + return &ErrInvalidRevision{`":" statement is not valid, could be : :<n>:<path>`} + } + } + + return nil +} + +// parseAt extract @ statements +func (p *Parser) parseAt() (Revisioner, error) { + var tok, nextTok token + var lit, nextLit string + var err error + + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + if tok != obrace { + p.unscan() + + return Ref("HEAD"), nil + } + + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + nextTok, nextLit, err = p.scan() + + if err != nil { + return nil, err + } + + switch { + case tok == word && (lit == "u" || lit == "upstream") && nextTok == cbrace: + return AtUpstream{}, nil + case tok == word && lit == "push" && nextTok == cbrace: + return AtPush{}, nil + case tok == number && nextTok == cbrace: + n, _ := strconv.Atoi(lit) + + return AtReflog{n}, nil + case tok == minus && nextTok == number: + n, _ := strconv.Atoi(nextLit) + + t, _, err := p.scan() + + if err != nil { + return nil, err + } + + if t != cbrace { + return nil, &ErrInvalidRevision{fmt.Sprintf(`missing "}" in @{-n} structure`)} + } + + return AtCheckout{n}, nil + default: + p.unscan() + + date := lit + + for { + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + switch { + case tok == cbrace: + t, err := time.Parse("2006-01-02T15:04:05Z", date) + + if err != nil { + return nil, &ErrInvalidRevision{fmt.Sprintf(`wrong date "%s" must fit ISO-8601 format : 2006-01-02T15:04:05Z`, date)} + } + + return AtDate{t}, nil + default: + date += lit + } + } + } +} + +// parseTilde extract ~ statements +func (p *Parser) parseTilde() (Revisioner, error) { + var tok token + var lit string + var err error + + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + switch { + case tok == number: + n, _ := strconv.Atoi(lit) + + return TildePath{n}, nil + default: + p.unscan() + return TildePath{1}, nil + } +} + +// parseCaret extract ^ statements +func (p *Parser) parseCaret() (Revisioner, error) { + var tok token + var lit string + var err error + + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + switch { + case tok == obrace: + r, err := p.parseCaretBraces() + + if err != nil { + return nil, err + } + + return r, nil + case tok == number: + n, _ := strconv.Atoi(lit) + + if n > 2 { + return nil, &ErrInvalidRevision{fmt.Sprintf(`"%s" found must be 0, 1 or 2 after "^"`, lit)} + } + + return CaretPath{n}, nil + default: + p.unscan() + return CaretPath{1}, nil + } +} + +// parseCaretBraces extract ^{<data>} statements +func (p *Parser) parseCaretBraces() (Revisioner, error) { + var tok, nextTok token + var lit, _ string + start := true + var re string + var negate bool + var err error + + for { + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + nextTok, _, err = p.scan() + + if err != nil { + return nil, err + } + + switch { + case tok == word && nextTok == cbrace && (lit == "commit" || lit == "tree" || lit == "blob" || lit == "tag" || lit == "object"): + return CaretType{lit}, nil + case re == "" && tok == cbrace: + return CaretType{"tag"}, nil + case re == "" && tok == emark && nextTok == emark: + re += lit + case re == "" && tok == emark && nextTok == minus: + negate = true + case re == "" && tok == emark: + return nil, &ErrInvalidRevision{fmt.Sprintf(`revision suffix brace component sequences starting with "/!" others than those defined are reserved`)} + case re == "" && tok == slash: + p.unscan() + case tok != slash && start: + return nil, &ErrInvalidRevision{fmt.Sprintf(`"%s" is not a valid revision suffix brace component`, lit)} + case tok != cbrace: + p.unscan() + re += lit + case tok == cbrace: + p.unscan() + + reg, err := regexp.Compile(re) + + if err != nil { + return CaretReg{}, &ErrInvalidRevision{fmt.Sprintf(`revision suffix brace component, %s`, err.Error())} + } + + return CaretReg{reg, negate}, nil + } + + start = false + } +} + +// parseColon extract : statements +func (p *Parser) parseColon() (Revisioner, error) { + var tok token + var err error + + tok, _, err = p.scan() + + if err != nil { + return nil, err + } + + switch tok { + case slash: + return p.parseColonSlash() + default: + p.unscan() + return p.parseColonDefault() + } +} + +// parseColonSlash extract :/<data> statements +func (p *Parser) parseColonSlash() (Revisioner, error) { + var tok, nextTok token + var lit string + var re string + var negate bool + var err error + + for { + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + nextTok, _, err = p.scan() + + if err != nil { + return nil, err + } + + switch { + case tok == emark && nextTok == emark: + re += lit + case re == "" && tok == emark && nextTok == minus: + negate = true + case re == "" && tok == emark: + return nil, &ErrInvalidRevision{fmt.Sprintf(`revision suffix brace component sequences starting with "/!" others than those defined are reserved`)} + case tok == eof: + p.unscan() + reg, err := regexp.Compile(re) + + if err != nil { + return ColonReg{}, &ErrInvalidRevision{fmt.Sprintf(`revision suffix brace component, %s`, err.Error())} + } + + return ColonReg{reg, negate}, nil + default: + p.unscan() + re += lit + } + } +} + +// parseColonDefault extract :<data> statements +func (p *Parser) parseColonDefault() (Revisioner, error) { + var tok token + var lit string + var path string + var stage int + var err error + var n = -1 + + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + nextTok, _, err := p.scan() + + if err != nil { + return nil, err + } + + if tok == number && nextTok == colon { + n, _ = strconv.Atoi(lit) + } + + switch n { + case 0, 1, 2, 3: + stage = n + default: + path += lit + p.unscan() + } + + for { + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + switch { + case tok == eof && n == -1: + return ColonPath{path}, nil + case tok == eof: + return ColonStagePath{path, stage}, nil + default: + path += lit + } + } +} + +// parseRef extract reference name +func (p *Parser) parseRef() (Revisioner, error) { + var tok, prevTok token + var lit, buf string + var endOfRef bool + var err error + + for { + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + switch tok { + case eof, at, colon, tilde, caret: + endOfRef = true + } + + err := p.checkRefFormat(tok, lit, prevTok, buf, endOfRef) + + if err != nil { + return "", err + } + + if endOfRef { + p.unscan() + return Ref(buf), nil + } + + buf += lit + prevTok = tok + } +} + +// checkRefFormat ensure reference name follow rules defined here : +// https://git-scm.com/docs/git-check-ref-format +func (p *Parser) checkRefFormat(token token, literal string, previousToken token, buffer string, endOfRef bool) error { + switch token { + case aslash, space, control, qmark, asterisk, obracket: + return &ErrInvalidRevision{fmt.Sprintf(`must not contains "%s"`, literal)} + } + + switch { + case (token == dot || token == slash) && buffer == "": + return &ErrInvalidRevision{fmt.Sprintf(`must not start with "%s"`, literal)} + case previousToken == slash && endOfRef: + return &ErrInvalidRevision{`must not end with "/"`} + case previousToken == dot && endOfRef: + return &ErrInvalidRevision{`must not end with "."`} + case token == dot && previousToken == slash: + return &ErrInvalidRevision{`must not contains "/."`} + case previousToken == dot && token == dot: + return &ErrInvalidRevision{`must not contains ".."`} + case previousToken == slash && token == slash: + return &ErrInvalidRevision{`must not contains consecutively "/"`} + case (token == slash || endOfRef) && len(buffer) > 4 && buffer[len(buffer)-5:] == ".lock": + return &ErrInvalidRevision{"cannot end with .lock"} + } + + return nil +} |