From d0cf20797464ab12b41ccb5c603f67884a6e8e17 Mon Sep 17 00:00:00 2001 From: Anthony HAMON Date: Mon, 6 Feb 2017 17:53:11 +0100 Subject: Add revision implementation (#139) --- internal/revision/scanner.go | 117 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 internal/revision/scanner.go (limited to 'internal/revision/scanner.go') diff --git a/internal/revision/scanner.go b/internal/revision/scanner.go new file mode 100644 index 0000000..fb5f333 --- /dev/null +++ b/internal/revision/scanner.go @@ -0,0 +1,117 @@ +package revision + +import ( + "bufio" + "io" + "unicode" +) + +// runeCategoryValidator takes a rune as input and +// validates it belongs to a rune category +type runeCategoryValidator func(r rune) bool + +// tokenizeExpression aggegates a series of runes matching check predicate into a single +// string and provides given tokenType as token type +func tokenizeExpression(ch rune, tokenType token, check runeCategoryValidator, r *bufio.Reader) (token, string, error) { + var data []rune + data = append(data, ch) + + for { + c, _, err := r.ReadRune() + + if c == zeroRune { + break + } + + if err != nil { + return tokenError, "", err + } + + if check(c) { + data = append(data, c) + } else { + err := r.UnreadRune() + + if err != nil { + return tokenError, "", err + } + + return tokenType, string(data), nil + } + } + + return tokenType, string(data), nil +} + +var zeroRune = rune(0) + +// scanner represents a lexical scanner. +type scanner struct { + r *bufio.Reader +} + +// newScanner returns a new instance of scanner. +func newScanner(r io.Reader) *scanner { + return &scanner{r: bufio.NewReader(r)} +} + +// Scan extracts tokens and their strings counterpart +// from the reader +func (s *scanner) scan() (token, string, error) { + ch, _, err := s.r.ReadRune() + + if err != nil && err != io.EOF { + return tokenError, "", err + } + + switch ch { + case zeroRune: + return eof, "", nil + case ':': + return colon, string(ch), nil + case '~': + return tilde, string(ch), nil + case '^': + return caret, string(ch), nil + case '.': + return dot, string(ch), nil + case '/': + return slash, string(ch), nil + case '{': + return obrace, string(ch), nil + case '}': + return cbrace, string(ch), nil + case '-': + return minus, string(ch), nil + case '@': + return at, string(ch), nil + case '\\': + return aslash, string(ch), nil + case '?': + return qmark, string(ch), nil + case '*': + return asterisk, string(ch), nil + case '[': + return obracket, string(ch), nil + case '!': + return emark, string(ch), nil + } + + if unicode.IsSpace(ch) { + return space, string(ch), nil + } + + if unicode.IsControl(ch) { + return control, string(ch), nil + } + + if unicode.IsLetter(ch) { + return tokenizeExpression(ch, word, unicode.IsLetter, s.r) + } + + if unicode.IsNumber(ch) { + return tokenizeExpression(ch, number, unicode.IsNumber, s.r) + } + + return tokenError, string(ch), nil +} -- cgit