aboutsummaryrefslogtreecommitdiffstats
path: root/internal/revision/scanner.go
blob: c46c21b7959cdcd1c343f8539f7e126ae58cce82 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
package revision

import (
	"bufio"
	"io"
	"unicode"
)

// runeCategoryValidator takes a rune as input and
// validates it belongs to a rune category
type runeCategoryValidator func(r rune) bool

// tokenizeExpression aggregates a series of runes matching check predicate into a single
// string and provides given tokenType as token type
func tokenizeExpression(ch rune, tokenType token, check runeCategoryValidator, r *bufio.Reader) (token, string, error) {
	var data []rune
	data = append(data, ch)

	for {
		c, _, err := r.ReadRune()

		if c == zeroRune {
			break
		}

		if err != nil {
			return tokenError, "", err
		}

		if check(c) {
			data = append(data, c)
		} else {
			err := r.UnreadRune()

			if err != nil {
				return tokenError, "", err
			}

			return tokenType, string(data), nil
		}
	}

	return tokenType, string(data), nil
}

var zeroRune = rune(0)

// scanner represents a lexical scanner.
type scanner struct {
	r *bufio.Reader
}

// newScanner returns a new instance of scanner.
func newScanner(r io.Reader) *scanner {
	return &scanner{r: bufio.NewReader(r)}
}

// Scan extracts tokens and their strings counterpart
// from the reader
func (s *scanner) scan() (token, string, error) {
	ch, _, err := s.r.ReadRune()

	if err != nil && err != io.EOF {
		return tokenError, "", err
	}

	switch ch {
	case zeroRune:
		return eof, "", nil
	case ':':
		return colon, string(ch), nil
	case '~':
		return tilde, string(ch), nil
	case '^':
		return caret, string(ch), nil
	case '.':
		return dot, string(ch), nil
	case '/':
		return slash, string(ch), nil
	case '{':
		return obrace, string(ch), nil
	case '}':
		return cbrace, string(ch), nil
	case '-':
		return minus, string(ch), nil
	case '@':
		return at, string(ch), nil
	case '\\':
		return aslash, string(ch), nil
	case '?':
		return qmark, string(ch), nil
	case '*':
		return asterisk, string(ch), nil
	case '[':
		return obracket, string(ch), nil
	case '!':
		return emark, string(ch), nil
	}

	if unicode.IsSpace(ch) {
		return space, string(ch), nil
	}

	if unicode.IsControl(ch) {
		return control, string(ch), nil
	}

	if unicode.IsLetter(ch) {
		return tokenizeExpression(ch, word, unicode.IsLetter, s.r)
	}

	if unicode.IsNumber(ch) {
		return tokenizeExpression(ch, number, unicode.IsNumber, s.r)
	}

	return tokenError, string(ch), nil
}