-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.ts
76 lines (67 loc) · 2.32 KB
/
index.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
// eslint-disable-next-line @typescript-eslint/no-unused-vars
import { MatchToken, matchToToken, Token } from 'match-to-token'
import { joinRegExps } from './util'
/**
* Create a {@link TokenizerFactory} for the given RegExps.
*
* To capture, RegExps must use a [named group](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Groups_and_Ranges#using_named_groups).
*
* ```ts
* const tokenize = createTokenizer(
* /(?<ident>[a-z]+)/, // named groups determine token `group`
* /(?<number>[0-9]+)/
* )
* ```
*
* @param regexps RegExps to match.
*/
export const createTokenizer = (...regexps: RegExp[]) => {
const regexp = joinRegExps(regexps)
return <TokenizerFactory> ((input: string) => {
const matches = input.matchAll(regexp)
const next = (): MatchToken | null => matchToToken(matches.next().value)
const iterator = function*(token: MatchToken | null) {
while ((token = next())) yield token
}
Object.defineProperty(next, Symbol.iterator, {
value: iterator,
})
return <TokenizerCallableIterable> next
})
}
/**
* Create a {@link TokenizerCallableIterable} for given input string.
*
* ```ts
* // using next()
* const next = tokenize('hello 123')
* console.log(next()) // => {group: 'ident', value: 'hello', index: 0}
* console.log(next()) // => {group: 'number', value: '123', index: 6}
* console.log(next()) // => undefined
*
* // using for of
* for (const token of tokenize('hello 123')) {
* console.log(token)
* // => {group: 'ident', value: 'hello', index: 0}
* // => {group: 'number', value: '123', index: 6}
* }
*
* // using spread
* const tokens = [...tokenize('hello 123')]
* console.log(tokens)
* // => [
* // {group: 'ident', value: 'hello', index: 0},
* // {group: 'number', value: '123', index: 6}
* // ]
* ```
*
* @param input The string to tokenize.
*/
export type TokenizerFactory = (input: string) => TokenizerCallableIterable
/**
* Can be called to return next <a href="https://github.com/stagas/match-to-token#token">Token</a> or can be used as an
* [Iterable](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Iteration_protocols#the_iterable_protocol)
* on **for-of** and **spread** operations.
*/
export type TokenizerCallableIterable = (() => Token) & Iterable<Token>
export default createTokenizer