206 lines
3.8 KiB
Go
206 lines
3.8 KiB
Go
// Copyright ©2020 The go-latex Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package latex
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"strings"
|
|
"text/scanner"
|
|
"unicode"
|
|
|
|
"github.com/go-latex/latex/token"
|
|
)
|
|
|
|
type texScanner struct {
|
|
sc scanner.Scanner
|
|
|
|
r rune
|
|
tok token.Token
|
|
}
|
|
|
|
func newScanner(r io.Reader) *texScanner {
|
|
sc := &texScanner{}
|
|
sc.sc.Init(r)
|
|
sc.sc.Mode = (scanner.ScanIdents | scanner.ScanInts | scanner.ScanFloats)
|
|
sc.sc.Mode |= scanner.ScanStrings
|
|
//scanner.ScanRawStrings)
|
|
// sc.sc.Error = func(s *scanner.Scanner, msg string) {}
|
|
sc.sc.IsIdentRune = func(ch rune, i int) bool {
|
|
return unicode.IsLetter(ch) //|| unicode.IsDigit(ch) && i > 0
|
|
}
|
|
sc.sc.Whitespace = 1<<'\t' | 1<<'\n' | 1<<'\r'
|
|
return sc
|
|
}
|
|
|
|
// Token returns the most recently parsed token
|
|
func (s *texScanner) Token() token.Token {
|
|
return s.tok
|
|
}
|
|
|
|
// Next iterates over all tokens.
|
|
// Next retrieves the most recent token with Token().
|
|
// It returns false once it reaches token.EOF.
|
|
func (s *texScanner) Next() bool {
|
|
s.tok = s.scan()
|
|
return s.tok.Kind != token.EOF
|
|
}
|
|
|
|
func (s *texScanner) scan() token.Token {
|
|
s.next()
|
|
pos := s.pos()
|
|
switch s.r {
|
|
case scanner.Ident:
|
|
return token.Token{
|
|
Kind: token.Word,
|
|
Pos: pos,
|
|
Text: s.sc.TokenText(),
|
|
}
|
|
case '\\':
|
|
nxt := s.sc.Peek()
|
|
switch nxt {
|
|
case ' ':
|
|
s.next()
|
|
return token.Token{
|
|
Kind: token.Space,
|
|
Pos: pos,
|
|
Text: `\ `,
|
|
}
|
|
default:
|
|
return s.scanMacro()
|
|
}
|
|
case ' ':
|
|
return token.Token{
|
|
Kind: token.Space,
|
|
Pos: pos,
|
|
Text: ` `,
|
|
}
|
|
|
|
case '%':
|
|
line := s.scanComment()
|
|
return token.Token{
|
|
Kind: token.Comment,
|
|
Pos: pos,
|
|
Text: line,
|
|
}
|
|
|
|
case '$', '_', '=', '<', '>', '^', '/', '*', '-', '+',
|
|
'!', '?', '\'', ':', ',', ';', '.':
|
|
return token.Token{
|
|
Kind: token.Symbol,
|
|
Pos: pos,
|
|
Text: s.sc.TokenText(),
|
|
}
|
|
|
|
case '[':
|
|
return token.Token{
|
|
Kind: token.Lbrack,
|
|
Pos: pos,
|
|
Text: s.sc.TokenText(),
|
|
}
|
|
case ']':
|
|
return token.Token{
|
|
Kind: token.Rbrack,
|
|
Pos: pos,
|
|
Text: s.sc.TokenText(),
|
|
}
|
|
case '{':
|
|
return token.Token{
|
|
Kind: token.Lbrace,
|
|
Pos: pos,
|
|
Text: s.sc.TokenText(),
|
|
}
|
|
case '}':
|
|
return token.Token{
|
|
Kind: token.Rbrace,
|
|
Pos: pos,
|
|
Text: s.sc.TokenText(),
|
|
}
|
|
case '(':
|
|
return token.Token{
|
|
Kind: token.Lparen,
|
|
Pos: pos,
|
|
Text: s.sc.TokenText(),
|
|
}
|
|
case ')':
|
|
return token.Token{
|
|
Kind: token.Rparen,
|
|
Pos: pos,
|
|
Text: s.sc.TokenText(),
|
|
}
|
|
case scanner.Int, scanner.Float:
|
|
return token.Token{
|
|
Kind: token.Number,
|
|
Pos: pos,
|
|
Text: s.sc.TokenText(),
|
|
}
|
|
case scanner.String, scanner.Char:
|
|
return token.Token{
|
|
Kind: token.Other,
|
|
Pos: pos,
|
|
Text: s.sc.TokenText(),
|
|
}
|
|
case scanner.EOF:
|
|
return token.Token{
|
|
Kind: token.EOF,
|
|
Pos: pos,
|
|
}
|
|
default:
|
|
panic(fmt.Errorf("unhandled token: %v %v", scanner.TokenString(s.r), s.r))
|
|
}
|
|
}
|
|
|
|
func (s *texScanner) next() {
|
|
s.r = s.sc.Scan()
|
|
}
|
|
|
|
func (s *texScanner) scanMacro() token.Token {
|
|
var (
|
|
macro = new(strings.Builder)
|
|
pos = s.pos()
|
|
)
|
|
s.next()
|
|
macro.WriteString(`\` + s.sc.TokenText())
|
|
|
|
return token.Token{
|
|
Kind: token.Macro,
|
|
Pos: pos,
|
|
Text: macro.String(),
|
|
}
|
|
}
|
|
|
|
func (s *texScanner) scanComment() string {
|
|
comment := new(strings.Builder)
|
|
comment.WriteString("%")
|
|
wsp := s.sc.Whitespace
|
|
defer func() {
|
|
s.sc.Whitespace = wsp
|
|
}()
|
|
s.sc.Whitespace = 0
|
|
|
|
for {
|
|
s.next()
|
|
if s.r == '\r' {
|
|
continue
|
|
}
|
|
if s.r == '\n' || s.r == scanner.EOF {
|
|
break
|
|
}
|
|
comment.WriteString(s.sc.TokenText())
|
|
}
|
|
return comment.String()
|
|
}
|
|
|
|
// func (s *texScanner) expect(want rune) {
|
|
// s.next()
|
|
// if s.r != want {
|
|
// panic(fmt.Errorf("invalid rune: got=%q, want=%q", s.r, want))
|
|
// }
|
|
// }
|
|
|
|
func (s *texScanner) pos() token.Pos {
|
|
return token.Pos(s.sc.Position.Offset)
|
|
}
|