From d0cf20797464ab12b41ccb5c603f67884a6e8e17 Mon Sep 17 00:00:00 2001 From: Anthony HAMON Date: Mon, 6 Feb 2017 17:53:11 +0100 Subject: Add revision implementation (#139) --- internal/revision/parser.go | 622 ++++++++++++++++++++++++++++++++++++++ internal/revision/parser_test.go | 395 ++++++++++++++++++++++++ internal/revision/scanner.go | 117 +++++++ internal/revision/scanner_test.go | 194 ++++++++++++ internal/revision/token.go | 28 ++ 5 files changed, 1356 insertions(+) create mode 100644 internal/revision/parser.go create mode 100644 internal/revision/parser_test.go create mode 100644 internal/revision/scanner.go create mode 100644 internal/revision/scanner_test.go create mode 100644 internal/revision/token.go (limited to 'internal') diff --git a/internal/revision/parser.go b/internal/revision/parser.go new file mode 100644 index 0000000..b45a6d8 --- /dev/null +++ b/internal/revision/parser.go @@ -0,0 +1,622 @@ +// Package revision extracts git revision from string +// More informations about revision : https://www.kernel.org/pub/software/scm/git/docs/gitrevisions.html +package revision + +import ( + "bytes" + "fmt" + "io" + "regexp" + "strconv" + "time" +) + +// ErrInvalidRevision is emitted if string doesn't match valid revision +type ErrInvalidRevision struct { + s string +} + +func (e *ErrInvalidRevision) Error() string { + return "Revision invalid : " + e.s +} + +// Revisioner represents a revision component. +// A revision is made of multiple revision components +// obtained after parsing a revision string, +// for instance revision "master~" will be converted in +// two revision components Ref and TildePath +type Revisioner interface { +} + +// Ref represents a reference name : HEAD, master +type Ref string + +// TildePath represents ~, ~{n} +type TildePath struct { + Depth int +} + +// CaretPath represents ^, ^{n} +type CaretPath struct { + Depth int +} + +// CaretReg represents ^{/foo bar} +type CaretReg struct { + Regexp *regexp.Regexp + Negate bool +} + +// CaretType represents ^{commit} +type CaretType struct { + ObjectType string +} + +// AtReflog represents @{n} +type AtReflog struct { + Depth int +} + +// AtCheckout represents @{-n} +type AtCheckout struct { + Depth int +} + +// AtUpstream represents @{upstream}, @{u} +type AtUpstream struct { + BranchName string +} + +// AtPush represents @{push} +type AtPush struct { + BranchName string +} + +// AtDate represents @{"2006-01-02T15:04:05Z"} +type AtDate struct { + Date time.Time +} + +// ColonReg represents :/foo bar +type ColonReg struct { + Regexp *regexp.Regexp + Negate bool +} + +// ColonPath represents :./ : +type ColonPath struct { + Path string +} + +// ColonStagePath represents ::/ +type ColonStagePath struct { + Path string + Stage int +} + +// Parser represents a parser +// use to tokenize and transform to revisioner chunks +// a given string +type Parser struct { + s *scanner + currentParsedChar struct { + tok token + lit string + } + unreadLastChar bool +} + +// NewParserFromString returns a new instance of parser from a string. +func NewParserFromString(s string) *Parser { + return NewParser(bytes.NewBufferString(s)) +} + +// NewParser returns a new instance of parser. +func NewParser(r io.Reader) *Parser { + return &Parser{s: newScanner(r)} +} + +// scan returns the next token from the underlying scanner +// or the last scanned token if an unscan was requested +func (p *Parser) scan() (token, string, error) { + if p.unreadLastChar { + p.unreadLastChar = false + return p.currentParsedChar.tok, p.currentParsedChar.lit, nil + } + + tok, lit, err := p.s.scan() + + p.currentParsedChar.tok, p.currentParsedChar.lit = tok, lit + + return tok, lit, err +} + +// unscan pushes the previously read token back onto the buffer. +func (p *Parser) unscan() { p.unreadLastChar = true } + +// Parse explode a revision string into revisioner chunks +func (p *Parser) Parse() ([]Revisioner, error) { + var rev Revisioner + var revs []Revisioner + var tok token + var err error + + for { + tok, _, err = p.scan() + + if err != nil { + return nil, err + } + + switch tok { + case at: + rev, err = p.parseAt() + case tilde: + rev, err = p.parseTilde() + case caret: + rev, err = p.parseCaret() + case colon: + rev, err = p.parseColon() + case eof: + err = p.validateFullRevision(&revs) + + if err != nil { + return []Revisioner{}, err + } + + return revs, nil + default: + p.unscan() + rev, err = p.parseRef() + } + + if err != nil { + return []Revisioner{}, err + } + + revs = append(revs, rev) + } +} + +// validateFullRevision ensures all revisioner chunks make a valid revision +func (p *Parser) validateFullRevision(chunks *[]Revisioner) error { + var hasReference bool + + for i, chunk := range *chunks { + switch chunk.(type) { + case Ref: + if i == 0 { + hasReference = true + } else { + return &ErrInvalidRevision{`reference must be defined once at the beginning`} + } + case AtDate: + if len(*chunks) == 1 || hasReference && len(*chunks) == 2 { + return nil + } + + return &ErrInvalidRevision{`"@" statement is not valid, could be : @{}, @{}`} + case AtReflog: + if len(*chunks) == 1 || hasReference && len(*chunks) == 2 { + return nil + } + + return &ErrInvalidRevision{`"@" statement is not valid, could be : @{}, @{}`} + case AtCheckout: + if len(*chunks) == 1 { + return nil + } + + return &ErrInvalidRevision{`"@" statement is not valid, could be : @{-}`} + case AtUpstream: + if len(*chunks) == 1 || hasReference && len(*chunks) == 2 { + return nil + } + + return &ErrInvalidRevision{`"@" statement is not valid, could be : @{upstream}, @{upstream}, @{u}, @{u}`} + case AtPush: + if len(*chunks) == 1 || hasReference && len(*chunks) == 2 { + return nil + } + + return &ErrInvalidRevision{`"@" statement is not valid, could be : @{push}, @{push}`} + case TildePath, CaretPath, CaretReg: + if !hasReference { + return &ErrInvalidRevision{`"~" or "^" statement must have a reference defined at the beginning`} + } + case ColonReg: + if len(*chunks) == 1 { + return nil + } + + return &ErrInvalidRevision{`":" statement is not valid, could be : :/`} + case ColonPath: + if i == len(*chunks)-1 && hasReference || len(*chunks) == 1 { + return nil + } + + return &ErrInvalidRevision{`":" statement is not valid, could be : :`} + case ColonStagePath: + if len(*chunks) == 1 { + return nil + } + + return &ErrInvalidRevision{`":" statement is not valid, could be : ::`} + } + } + + return nil +} + +// parseAt extract @ statements +func (p *Parser) parseAt() (Revisioner, error) { + var tok, nextTok token + var lit, nextLit string + var err error + + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + if tok != obrace { + p.unscan() + + return Ref("HEAD"), nil + } + + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + nextTok, nextLit, err = p.scan() + + if err != nil { + return nil, err + } + + switch { + case tok == word && (lit == "u" || lit == "upstream") && nextTok == cbrace: + return AtUpstream{}, nil + case tok == word && lit == "push" && nextTok == cbrace: + return AtPush{}, nil + case tok == number && nextTok == cbrace: + n, _ := strconv.Atoi(lit) + + return AtReflog{n}, nil + case tok == minus && nextTok == number: + n, _ := strconv.Atoi(nextLit) + + t, _, err := p.scan() + + if err != nil { + return nil, err + } + + if t != cbrace { + return nil, &ErrInvalidRevision{fmt.Sprintf(`missing "}" in @{-n} structure`)} + } + + return AtCheckout{n}, nil + default: + p.unscan() + + date := lit + + for { + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + switch { + case tok == cbrace: + t, err := time.Parse("2006-01-02T15:04:05Z", date) + + if err != nil { + return nil, &ErrInvalidRevision{fmt.Sprintf(`wrong date "%s" must fit ISO-8601 format : 2006-01-02T15:04:05Z`, date)} + } + + return AtDate{t}, nil + default: + date += lit + } + } + } +} + +// parseTilde extract ~ statements +func (p *Parser) parseTilde() (Revisioner, error) { + var tok token + var lit string + var err error + + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + switch { + case tok == number: + n, _ := strconv.Atoi(lit) + + return TildePath{n}, nil + default: + p.unscan() + return TildePath{1}, nil + } +} + +// parseCaret extract ^ statements +func (p *Parser) parseCaret() (Revisioner, error) { + var tok token + var lit string + var err error + + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + switch { + case tok == obrace: + r, err := p.parseCaretBraces() + + if err != nil { + return nil, err + } + + return r, nil + case tok == number: + n, _ := strconv.Atoi(lit) + + if n > 2 { + return nil, &ErrInvalidRevision{fmt.Sprintf(`"%s" found must be 0, 1 or 2 after "^"`, lit)} + } + + return CaretPath{n}, nil + default: + p.unscan() + return CaretPath{1}, nil + } +} + +// parseCaretBraces extract ^{} statements +func (p *Parser) parseCaretBraces() (Revisioner, error) { + var tok, nextTok token + var lit, _ string + start := true + var re string + var negate bool + var err error + + for { + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + nextTok, _, err = p.scan() + + if err != nil { + return nil, err + } + + switch { + case tok == word && nextTok == cbrace && (lit == "commit" || lit == "tree" || lit == "blob" || lit == "tag" || lit == "object"): + return CaretType{lit}, nil + case re == "" && tok == cbrace: + return CaretType{"tag"}, nil + case re == "" && tok == emark && nextTok == emark: + re += lit + case re == "" && tok == emark && nextTok == minus: + negate = true + case re == "" && tok == emark: + return nil, &ErrInvalidRevision{fmt.Sprintf(`revision suffix brace component sequences starting with "/!" others than those defined are reserved`)} + case re == "" && tok == slash: + p.unscan() + case tok != slash && start: + return nil, &ErrInvalidRevision{fmt.Sprintf(`"%s" is not a valid revision suffix brace component`, lit)} + case tok != cbrace: + p.unscan() + re += lit + case tok == cbrace: + p.unscan() + + reg, err := regexp.Compile(re) + + if err != nil { + return CaretReg{}, &ErrInvalidRevision{fmt.Sprintf(`revision suffix brace component, %s`, err.Error())} + } + + return CaretReg{reg, negate}, nil + } + + start = false + } +} + +// parseColon extract : statements +func (p *Parser) parseColon() (Revisioner, error) { + var tok token + var err error + + tok, _, err = p.scan() + + if err != nil { + return nil, err + } + + switch tok { + case slash: + return p.parseColonSlash() + default: + p.unscan() + return p.parseColonDefault() + } +} + +// parseColonSlash extract :/ statements +func (p *Parser) parseColonSlash() (Revisioner, error) { + var tok, nextTok token + var lit string + var re string + var negate bool + var err error + + for { + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + nextTok, _, err = p.scan() + + if err != nil { + return nil, err + } + + switch { + case tok == emark && nextTok == emark: + re += lit + case re == "" && tok == emark && nextTok == minus: + negate = true + case re == "" && tok == emark: + return nil, &ErrInvalidRevision{fmt.Sprintf(`revision suffix brace component sequences starting with "/!" others than those defined are reserved`)} + case tok == eof: + p.unscan() + reg, err := regexp.Compile(re) + + if err != nil { + return ColonReg{}, &ErrInvalidRevision{fmt.Sprintf(`revision suffix brace component, %s`, err.Error())} + } + + return ColonReg{reg, negate}, nil + default: + p.unscan() + re += lit + } + } +} + +// parseColonDefault extract : statements +func (p *Parser) parseColonDefault() (Revisioner, error) { + var tok token + var lit string + var path string + var stage int + var err error + var n = -1 + + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + nextTok, _, err := p.scan() + + if err != nil { + return nil, err + } + + if tok == number && nextTok == colon { + n, _ = strconv.Atoi(lit) + } + + switch n { + case 0, 1, 2, 3: + stage = n + default: + path += lit + p.unscan() + } + + for { + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + switch { + case tok == eof && n == -1: + return ColonPath{path}, nil + case tok == eof: + return ColonStagePath{path, stage}, nil + default: + path += lit + } + } +} + +// parseRef extract reference name +func (p *Parser) parseRef() (Revisioner, error) { + var tok, prevTok token + var lit, buf string + var endOfRef bool + var err error + + for { + tok, lit, err = p.scan() + + if err != nil { + return nil, err + } + + switch tok { + case eof, at, colon, tilde, caret: + endOfRef = true + } + + err := p.checkRefFormat(tok, lit, prevTok, buf, endOfRef) + + if err != nil { + return "", err + } + + if endOfRef { + p.unscan() + return Ref(buf), nil + } + + buf += lit + prevTok = tok + } +} + +// checkRefFormat ensure reference name follow rules defined here : +// https://git-scm.com/docs/git-check-ref-format +func (p *Parser) checkRefFormat(token token, literal string, previousToken token, buffer string, endOfRef bool) error { + switch token { + case aslash, space, control, qmark, asterisk, obracket: + return &ErrInvalidRevision{fmt.Sprintf(`must not contains "%s"`, literal)} + } + + switch { + case (token == dot || token == slash) && buffer == "": + return &ErrInvalidRevision{fmt.Sprintf(`must not start with "%s"`, literal)} + case previousToken == slash && endOfRef: + return &ErrInvalidRevision{`must not end with "/"`} + case previousToken == dot && endOfRef: + return &ErrInvalidRevision{`must not end with "."`} + case token == dot && previousToken == slash: + return &ErrInvalidRevision{`must not contains "/."`} + case previousToken == dot && token == dot: + return &ErrInvalidRevision{`must not contains ".."`} + case previousToken == slash && token == slash: + return &ErrInvalidRevision{`must not contains consecutively "/"`} + case (token == slash || endOfRef) && len(buffer) > 4 && buffer[len(buffer)-5:] == ".lock": + return &ErrInvalidRevision{"cannot end with .lock"} + } + + return nil +} diff --git a/internal/revision/parser_test.go b/internal/revision/parser_test.go new file mode 100644 index 0000000..a58f0ad --- /dev/null +++ b/internal/revision/parser_test.go @@ -0,0 +1,395 @@ +package revision + +import ( + "bytes" + "regexp" + "time" + + . "gopkg.in/check.v1" +) + +type ParserSuite struct{} + +var _ = Suite(&ParserSuite{}) + +func (s *ParserSuite) TestErrInvalidRevision(c *C) { + e := ErrInvalidRevision{"test"} + + c.Assert(e.Error(), Equals, "Revision invalid : test") +} + +func (s *ParserSuite) TestNewParserFromString(c *C) { + p := NewParserFromString("test") + + c.Assert(p, FitsTypeOf, &Parser{}) +} + +func (s *ParserSuite) TestScan(c *C) { + parser := NewParser(bytes.NewBufferString("Hello world !")) + + expected := []struct { + t token + s string + }{ + { + word, + "Hello", + }, + { + space, + " ", + }, + { + word, + "world", + }, + { + space, + " ", + }, + { + emark, + "!", + }, + } + + for i := 0; ; { + tok, str, err := parser.scan() + + if tok == eof { + return + } + + c.Assert(err, Equals, nil) + c.Assert(str, Equals, expected[i].s) + c.Assert(tok, Equals, expected[i].t) + + i++ + } +} + +func (s *ParserSuite) TestUnscan(c *C) { + parser := NewParser(bytes.NewBufferString("Hello world !")) + + tok, str, err := parser.scan() + + c.Assert(err, Equals, nil) + c.Assert(str, Equals, "Hello") + c.Assert(tok, Equals, word) + + parser.unscan() + + tok, str, err = parser.scan() + + c.Assert(err, Equals, nil) + c.Assert(str, Equals, "Hello") + c.Assert(tok, Equals, word) +} + +func (s *ParserSuite) TestParseWithValidExpression(c *C) { + tim, _ := time.Parse("2006-01-02T15:04:05Z", "2016-12-16T21:42:47Z") + + datas := map[string]Revisioner{ + "@": []Revisioner{Ref("HEAD")}, + "@~3": []Revisioner{ + Ref("HEAD"), + TildePath{3}, + }, + "@{2016-12-16T21:42:47Z}": []Revisioner{AtDate{tim}}, + "@{1}": []Revisioner{AtReflog{1}}, + "@{-1}": []Revisioner{AtCheckout{1}}, + "master@{upstream}": []Revisioner{ + Ref("master"), + AtUpstream{}, + }, + "@{upstream}": []Revisioner{ + AtUpstream{}, + }, + "@{u}": []Revisioner{ + AtUpstream{}, + }, + "master@{push}": []Revisioner{ + Ref("master"), + AtPush{}, + }, + "master@{2016-12-16T21:42:47Z}": []Revisioner{ + Ref("master"), + AtDate{tim}, + }, + "HEAD^": []Revisioner{ + Ref("HEAD"), + CaretPath{1}, + }, + "master~3": []Revisioner{ + Ref("master"), + TildePath{3}, + }, + "v0.99.8^{commit}": []Revisioner{ + Ref("v0.99.8"), + CaretType{"commit"}, + }, + "v0.99.8^{}": []Revisioner{ + Ref("v0.99.8"), + CaretType{"tag"}, + }, + "HEAD^{/fix nasty bug}": []Revisioner{ + Ref("HEAD"), + CaretReg{regexp.MustCompile("fix nasty bug"), false}, + }, + ":/fix nasty bug": []Revisioner{ + ColonReg{regexp.MustCompile("fix nasty bug"), false}, + }, + "HEAD:README": []Revisioner{ + Ref("HEAD"), + ColonPath{"README"}, + }, + ":README": []Revisioner{ + ColonPath{"README"}, + }, + "master:./README": []Revisioner{ + Ref("master"), + ColonPath{"./README"}, + }, + "master^1~:./README": []Revisioner{ + Ref("master"), + CaretPath{1}, + TildePath{1}, + ColonPath{"./README"}, + }, + ":0:README": []Revisioner{ + ColonStagePath{"README", 0}, + }, + ":3:README": []Revisioner{ + ColonStagePath{"README", 3}, + }, + "master~1^{/update}~5~^^1": []Revisioner{ + Ref("master"), + TildePath{1}, + CaretReg{regexp.MustCompile("update"), false}, + TildePath{5}, + TildePath{1}, + CaretPath{1}, + CaretPath{1}, + }, + } + + for d, expected := range datas { + parser := NewParser(bytes.NewBufferString(d)) + + result, err := parser.Parse() + + c.Assert(err, Equals, nil) + c.Assert(result, DeepEquals, expected) + } +} + +func (s *ParserSuite) TestParseWithUnValidExpression(c *C) { + datas := map[string]error{ + "..": &ErrInvalidRevision{`must not start with "."`}, + "master^1master": &ErrInvalidRevision{`reference must be defined once at the beginning`}, + "master^1@{2016-12-16T21:42:47Z}": &ErrInvalidRevision{`"@" statement is not valid, could be : @{}, @{}`}, + "master^1@{1}": &ErrInvalidRevision{`"@" statement is not valid, could be : @{}, @{}`}, + "master@{-1}": &ErrInvalidRevision{`"@" statement is not valid, could be : @{-}`}, + "master^1@{upstream}": &ErrInvalidRevision{`"@" statement is not valid, could be : @{upstream}, @{upstream}, @{u}, @{u}`}, + "master^1@{u}": &ErrInvalidRevision{`"@" statement is not valid, could be : @{upstream}, @{upstream}, @{u}, @{u}`}, + "master^1@{push}": &ErrInvalidRevision{`"@" statement is not valid, could be : @{push}, @{push}`}, + "^1": &ErrInvalidRevision{`"~" or "^" statement must have a reference defined at the beginning`}, + "^{/test}": &ErrInvalidRevision{`"~" or "^" statement must have a reference defined at the beginning`}, + "~1": &ErrInvalidRevision{`"~" or "^" statement must have a reference defined at the beginning`}, + "master:/test": &ErrInvalidRevision{`":" statement is not valid, could be : :/`}, + "master:0:README": &ErrInvalidRevision{`":" statement is not valid, could be : ::`}, + } + + for s, e := range datas { + parser := NewParser(bytes.NewBufferString(s)) + _, err := parser.Parse() + c.Assert(err, DeepEquals, e) + } +} + +func (s *ParserSuite) TestParseAtWithValidExpression(c *C) { + tim, _ := time.Parse("2006-01-02T15:04:05Z", "2016-12-16T21:42:47Z") + + datas := map[string]Revisioner{ + "": Ref("HEAD"), + "{1}": AtReflog{1}, + "{-1}": AtCheckout{1}, + "{push}": AtPush{}, + "{upstream}": AtUpstream{}, + "{u}": AtUpstream{}, + "{2016-12-16T21:42:47Z}": AtDate{tim}, + } + + for d, expected := range datas { + parser := NewParser(bytes.NewBufferString(d)) + + result, err := parser.parseAt() + + c.Assert(err, Equals, nil) + c.Assert(result, DeepEquals, expected) + } +} + +func (s *ParserSuite) TestParseAtWithUnValidExpression(c *C) { + datas := map[string]error{ + "{test}": &ErrInvalidRevision{`wrong date "test" must fit ISO-8601 format : 2006-01-02T15:04:05Z`}, + "{-1": &ErrInvalidRevision{`missing "}" in @{-n} structure`}, + } + + for s, e := range datas { + parser := NewParser(bytes.NewBufferString(s)) + + _, err := parser.parseAt() + + c.Assert(err, DeepEquals, e) + } +} + +func (s *ParserSuite) TestParseCaretWithValidExpression(c *C) { + datas := map[string]Revisioner{ + "": CaretPath{1}, + "2": CaretPath{2}, + "{}": CaretType{"tag"}, + "{commit}": CaretType{"commit"}, + "{tree}": CaretType{"tree"}, + "{blob}": CaretType{"blob"}, + "{tag}": CaretType{"tag"}, + "{object}": CaretType{"object"}, + "{/hello world !}": CaretReg{regexp.MustCompile("hello world !"), false}, + "{/!-hello world !}": CaretReg{regexp.MustCompile("hello world !"), true}, + "{/!! hello world !}": CaretReg{regexp.MustCompile("! hello world !"), false}, + } + + for d, expected := range datas { + parser := NewParser(bytes.NewBufferString(d)) + + result, err := parser.parseCaret() + + c.Assert(err, Equals, nil) + c.Assert(result, DeepEquals, expected) + } +} + +func (s *ParserSuite) TestParseCaretWithUnValidExpression(c *C) { + datas := map[string]error{ + "3": &ErrInvalidRevision{`"3" found must be 0, 1 or 2 after "^"`}, + "{test}": &ErrInvalidRevision{`"test" is not a valid revision suffix brace component`}, + "{/!test}": &ErrInvalidRevision{`revision suffix brace component sequences starting with "/!" others than those defined are reserved`}, + "{/test**}": &ErrInvalidRevision{"revision suffix brace component, error parsing regexp: invalid nested repetition operator: `**`"}, + } + + for s, e := range datas { + parser := NewParser(bytes.NewBufferString(s)) + + _, err := parser.parseCaret() + + c.Assert(err, DeepEquals, e) + } +} + +func (s *ParserSuite) TestParseTildeWithValidExpression(c *C) { + datas := map[string]Revisioner{ + "3": TildePath{3}, + "1": TildePath{1}, + "": TildePath{1}, + } + + for d, expected := range datas { + parser := NewParser(bytes.NewBufferString(d)) + + result, err := parser.parseTilde() + + c.Assert(err, Equals, nil) + c.Assert(result, DeepEquals, expected) + } +} + +func (s *ParserSuite) TestParseColonWithValidExpression(c *C) { + datas := map[string]Revisioner{ + "/hello world !": ColonReg{regexp.MustCompile("hello world !"), false}, + "/!-hello world !": ColonReg{regexp.MustCompile("hello world !"), true}, + "/!! hello world !": ColonReg{regexp.MustCompile("! hello world !"), false}, + "../parser.go": ColonPath{"../parser.go"}, + "./parser.go": ColonPath{"./parser.go"}, + "parser.go": ColonPath{"parser.go"}, + "0:parser.go": ColonStagePath{"parser.go", 0}, + "1:parser.go": ColonStagePath{"parser.go", 1}, + "2:parser.go": ColonStagePath{"parser.go", 2}, + "3:parser.go": ColonStagePath{"parser.go", 3}, + } + + for d, expected := range datas { + parser := NewParser(bytes.NewBufferString(d)) + + result, err := parser.parseColon() + + c.Assert(err, Equals, nil) + c.Assert(result, DeepEquals, expected) + } +} + +func (s *ParserSuite) TestParseColonWithUnValidExpression(c *C) { + datas := map[string]error{ + "/!test": &ErrInvalidRevision{`revision suffix brace component sequences starting with "/!" others than those defined are reserved`}, + "/*": &ErrInvalidRevision{"revision suffix brace component, error parsing regexp: missing argument to repetition operator: `*`"}, + } + + for s, e := range datas { + parser := NewParser(bytes.NewBufferString(s)) + + _, err := parser.parseColon() + + c.Assert(err, DeepEquals, e) + } +} + +func (s *ParserSuite) TestParseRefWithValidName(c *C) { + datas := []string{ + "lock", + "master", + "v1.0.0", + "refs/stash", + "refs/tags/v1.0.0", + "refs/heads/master", + "refs/remotes/test", + "refs/remotes/origin/HEAD", + "refs/remotes/origin/master", + } + + for _, d := range datas { + parser := NewParser(bytes.NewBufferString(d)) + + result, err := parser.parseRef() + + c.Assert(err, Equals, nil) + c.Assert(result, Equals, Ref(d)) + } +} + +func (s *ParserSuite) TestParseRefWithUnvalidName(c *C) { + datas := map[string]error{ + ".master": &ErrInvalidRevision{`must not start with "."`}, + "/master": &ErrInvalidRevision{`must not start with "/"`}, + "master/": &ErrInvalidRevision{`must not end with "/"`}, + "master.": &ErrInvalidRevision{`must not end with "."`}, + "refs/remotes/.origin/HEAD": &ErrInvalidRevision{`must not contains "/."`}, + "test..test": &ErrInvalidRevision{`must not contains ".."`}, + "test..": &ErrInvalidRevision{`must not contains ".."`}, + "test test": &ErrInvalidRevision{`must not contains " "`}, + "test*test": &ErrInvalidRevision{`must not contains "*"`}, + "test?test": &ErrInvalidRevision{`must not contains "?"`}, + "test\\test": &ErrInvalidRevision{`must not contains "\"`}, + "test[test": &ErrInvalidRevision{`must not contains "["`}, + "te//st": &ErrInvalidRevision{`must not contains consecutively "/"`}, + "refs/remotes/test.lock/HEAD": &ErrInvalidRevision{`cannot end with .lock`}, + "test.lock": &ErrInvalidRevision{`cannot end with .lock`}, + } + + for s, e := range datas { + parser := NewParser(bytes.NewBufferString(s)) + + _, err := parser.parseRef() + + c.Assert(err, DeepEquals, e) + } +} diff --git a/internal/revision/scanner.go b/internal/revision/scanner.go new file mode 100644 index 0000000..fb5f333 --- /dev/null +++ b/internal/revision/scanner.go @@ -0,0 +1,117 @@ +package revision + +import ( + "bufio" + "io" + "unicode" +) + +// runeCategoryValidator takes a rune as input and +// validates it belongs to a rune category +type runeCategoryValidator func(r rune) bool + +// tokenizeExpression aggegates a series of runes matching check predicate into a single +// string and provides given tokenType as token type +func tokenizeExpression(ch rune, tokenType token, check runeCategoryValidator, r *bufio.Reader) (token, string, error) { + var data []rune + data = append(data, ch) + + for { + c, _, err := r.ReadRune() + + if c == zeroRune { + break + } + + if err != nil { + return tokenError, "", err + } + + if check(c) { + data = append(data, c) + } else { + err := r.UnreadRune() + + if err != nil { + return tokenError, "", err + } + + return tokenType, string(data), nil + } + } + + return tokenType, string(data), nil +} + +var zeroRune = rune(0) + +// scanner represents a lexical scanner. +type scanner struct { + r *bufio.Reader +} + +// newScanner returns a new instance of scanner. +func newScanner(r io.Reader) *scanner { + return &scanner{r: bufio.NewReader(r)} +} + +// Scan extracts tokens and their strings counterpart +// from the reader +func (s *scanner) scan() (token, string, error) { + ch, _, err := s.r.ReadRune() + + if err != nil && err != io.EOF { + return tokenError, "", err + } + + switch ch { + case zeroRune: + return eof, "", nil + case ':': + return colon, string(ch), nil + case '~': + return tilde, string(ch), nil + case '^': + return caret, string(ch), nil + case '.': + return dot, string(ch), nil + case '/': + return slash, string(ch), nil + case '{': + return obrace, string(ch), nil + case '}': + return cbrace, string(ch), nil + case '-': + return minus, string(ch), nil + case '@': + return at, string(ch), nil + case '\\': + return aslash, string(ch), nil + case '?': + return qmark, string(ch), nil + case '*': + return asterisk, string(ch), nil + case '[': + return obracket, string(ch), nil + case '!': + return emark, string(ch), nil + } + + if unicode.IsSpace(ch) { + return space, string(ch), nil + } + + if unicode.IsControl(ch) { + return control, string(ch), nil + } + + if unicode.IsLetter(ch) { + return tokenizeExpression(ch, word, unicode.IsLetter, s.r) + } + + if unicode.IsNumber(ch) { + return tokenizeExpression(ch, number, unicode.IsNumber, s.r) + } + + return tokenError, string(ch), nil +} diff --git a/internal/revision/scanner_test.go b/internal/revision/scanner_test.go new file mode 100644 index 0000000..d27ccb1 --- /dev/null +++ b/internal/revision/scanner_test.go @@ -0,0 +1,194 @@ +package revision + +import ( + "bytes" + "testing" + + . "gopkg.in/check.v1" +) + +func Test(t *testing.T) { TestingT(t) } + +type ScannerSuite struct{} + +var _ = Suite(&ScannerSuite{}) + +func (s *ScannerSuite) TestReadColon(c *C) { + scanner := newScanner(bytes.NewBufferString(":")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, ":") + c.Assert(tok, Equals, colon) +} + +func (s *ScannerSuite) TestReadTilde(c *C) { + scanner := newScanner(bytes.NewBufferString("~")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "~") + c.Assert(tok, Equals, tilde) +} + +func (s *ScannerSuite) TestReadCaret(c *C) { + scanner := newScanner(bytes.NewBufferString("^")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "^") + c.Assert(tok, Equals, caret) +} + +func (s *ScannerSuite) TestReadDot(c *C) { + scanner := newScanner(bytes.NewBufferString(".")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, ".") + c.Assert(tok, Equals, dot) +} + +func (s *ScannerSuite) TestReadSlash(c *C) { + scanner := newScanner(bytes.NewBufferString("/")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "/") + c.Assert(tok, Equals, slash) +} + +func (s *ScannerSuite) TestReadEOF(c *C) { + scanner := newScanner(bytes.NewBufferString(string(rune(0)))) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "") + c.Assert(tok, Equals, eof) +} + +func (s *ScannerSuite) TestReadNumber(c *C) { + scanner := newScanner(bytes.NewBufferString("1234")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "1234") + c.Assert(tok, Equals, number) +} + +func (s *ScannerSuite) TestReadSpace(c *C) { + scanner := newScanner(bytes.NewBufferString(" ")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, " ") + c.Assert(tok, Equals, space) +} + +func (s *ScannerSuite) TestReadControl(c *C) { + scanner := newScanner(bytes.NewBufferString("")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "\x01") + c.Assert(tok, Equals, control) +} + +func (s *ScannerSuite) TestReadOpenBrace(c *C) { + scanner := newScanner(bytes.NewBufferString("{")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "{") + c.Assert(tok, Equals, obrace) +} + +func (s *ScannerSuite) TestReadCloseBrace(c *C) { + scanner := newScanner(bytes.NewBufferString("}")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "}") + c.Assert(tok, Equals, cbrace) +} + +func (s *ScannerSuite) TestReadMinus(c *C) { + scanner := newScanner(bytes.NewBufferString("-")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "-") + c.Assert(tok, Equals, minus) +} + +func (s *ScannerSuite) TestReadAt(c *C) { + scanner := newScanner(bytes.NewBufferString("@")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "@") + c.Assert(tok, Equals, at) +} + +func (s *ScannerSuite) TestReadAntislash(c *C) { + scanner := newScanner(bytes.NewBufferString("\\")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "\\") + c.Assert(tok, Equals, aslash) +} + +func (s *ScannerSuite) TestReadQuestionMark(c *C) { + scanner := newScanner(bytes.NewBufferString("?")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "?") + c.Assert(tok, Equals, qmark) +} + +func (s *ScannerSuite) TestReadAsterisk(c *C) { + scanner := newScanner(bytes.NewBufferString("*")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "*") + c.Assert(tok, Equals, asterisk) +} + +func (s *ScannerSuite) TestReadOpenBracket(c *C) { + scanner := newScanner(bytes.NewBufferString("[")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "[") + c.Assert(tok, Equals, obracket) +} + +func (s *ScannerSuite) TestReadExclamationMark(c *C) { + scanner := newScanner(bytes.NewBufferString("!")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "!") + c.Assert(tok, Equals, emark) +} + +func (s *ScannerSuite) TestReadWord(c *C) { + scanner := newScanner(bytes.NewBufferString("abcde")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "abcde") + c.Assert(tok, Equals, word) +} + +func (s *ScannerSuite) TestReadTokenError(c *C) { + scanner := newScanner(bytes.NewBufferString("`")) + tok, data, err := scanner.scan() + + c.Assert(err, Equals, nil) + c.Assert(data, Equals, "`") + c.Assert(tok, Equals, tokenError) +} diff --git a/internal/revision/token.go b/internal/revision/token.go new file mode 100644 index 0000000..abc4048 --- /dev/null +++ b/internal/revision/token.go @@ -0,0 +1,28 @@ +package revision + +// token represents a entity extracted from string parsing +type token int + +const ( + eof token = iota + + aslash + asterisk + at + caret + cbrace + colon + control + dot + emark + minus + number + obrace + obracket + qmark + slash + space + tilde + tokenError + word +) -- cgit