aboutsummaryrefslogblamecommitdiffstats
path: root/internal/revision/scanner.go
blob: fb5f333f7f8a9144cbcbfa6e8342ed5dc412626b (plain) (tree)




















































































































                                                                                                                        
package revision

import (
	"bufio"
	"io"
	"unicode"
)

// runeCategoryValidator takes a rune as input and
// validates it belongs to a rune category
type runeCategoryValidator func(r rune) bool

// tokenizeExpression aggegates a series of runes matching check predicate into a single
// string and provides given tokenType as token type
func tokenizeExpression(ch rune, tokenType token, check runeCategoryValidator, r *bufio.Reader) (token, string, error) {
	var data []rune
	data = append(data, ch)

	for {
		c, _, err := r.ReadRune()

		if c == zeroRune {
			break
		}

		if err != nil {
			return tokenError, "", err
		}

		if check(c) {
			data = append(data, c)
		} else {
			err := r.UnreadRune()

			if err != nil {
				return tokenError, "", err
			}

			return tokenType, string(data), nil
		}
	}

	return tokenType, string(data), nil
}

var zeroRune = rune(0)

// scanner represents a lexical scanner.
type scanner struct {
	r *bufio.Reader
}

// newScanner returns a new instance of scanner.
func newScanner(r io.Reader) *scanner {
	return &scanner{r: bufio.NewReader(r)}
}

// Scan extracts tokens and their strings counterpart
// from the reader
func (s *scanner) scan() (token, string, error) {
	ch, _, err := s.r.ReadRune()

	if err != nil && err != io.EOF {
		return tokenError, "", err
	}

	switch ch {
	case zeroRune:
		return eof, "", nil
	case ':':
		return colon, string(ch), nil
	case '~':
		return tilde, string(ch), nil
	case '^':
		return caret, string(ch), nil
	case '.':
		return dot, string(ch), nil
	case '/':
		return slash, string(ch), nil
	case '{':
		return obrace, string(ch), nil
	case '}':
		return cbrace, string(ch), nil
	case '-':
		return minus, string(ch), nil
	case '@':
		return at, string(ch), nil
	case '\\':
		return aslash, string(ch), nil
	case '?':
		return qmark, string(ch), nil
	case '*':
		return asterisk, string(ch), nil
	case '[':
		return obracket, string(ch), nil
	case '!':
		return emark, string(ch), nil
	}

	if unicode.IsSpace(ch) {
		return space, string(ch), nil
	}

	if unicode.IsControl(ch) {
		return control, string(ch), nil
	}

	if unicode.IsLetter(ch) {
		return tokenizeExpression(ch, word, unicode.IsLetter, s.r)
	}

	if unicode.IsNumber(ch) {
		return tokenizeExpression(ch, number, unicode.IsNumber, s.r)
	}

	return tokenError, string(ch), nil
}