howl.util.lpeg_lexer
the module can be called directly to create a lexer (same as new())
assert.not_has_error -> l -> true
the resulting lexer can be called directly
lexer = l -> P'x' * Cp!
assert.same { 2 }, lexer 'x'
imports lpeg definitions locally into the module
for op in *{'Cp', 'Ct', 'S', 'P'}
assert.is_not_nil l[op]
imports lpeg.locale definitions locally into the module
for ldef in *{'digit', 'upper', 'print', 'lower'}
assert.is_not_nil l[ldef]
new(definition)
accepts a function
assert.not_has_error -> l.new -> true
capture(style, pattern)
returns a LPeg pattern
assert.equal 'pattern', lpeg.type l.capture('foo', P(1))
the returned pattern produces the three captures <start-pos>, <style-name> and <end-pos> if <pattern> matches
p = l.capture 'foo', P'fo'
assert.same { 1, 'foo', 3 }, { p\match 'foobar' }
predefined helper patterns
.eol
matches and consumes new lines
assert.is_not_nil l.eol\match '\n'
assert.is_not_nil l.eol\match '\r'
assert.equals 2, (l.eol * Cp!)\match '\n'
assert.equals 3, (l.eol * Cp!)\match '\r\n'
assert.is_nil l.eol\match 'a'
assert.is_nil l.eol\match '2'
.float
matches and consumes various float representations
for repr in *{ '34.5', '3.45e2', '1.234E1', '3.45e-2', '.32' }
assert.is_not_nil l.float\match repr
.hexadecimal
matches and consumes various hexadecimal representations
for repr in *{ '0xfeab', '0XDEADBEEF' }
assert.is_not_nil l.hexadecimal\match repr
does not match illegal hexadecimal representations
assert.is_nil l.hexadecimal\match '0xCDEFG'
.hexadecimal_float
matches and consumes various hexadecimal float representations
for repr in *{ '0xfep2', '0XAP-3' }
assert.is_not_nil l.hexadecimal_float\match repr
does not match illegal hexadecimal representations
assert.is_nil l.hexadecimal_float\match '0xFGp3'
.octal
matches and consumes octal representations
assert.is_not_nil l.octal\match '0123'
does not match illegal octal representations
assert.is_nil l.octal\match '0128'
.line_start
matches after newline or at start of text
assert.is_not_nil l.line_start\match 'x'
assert.is_not_nil (l.eol * l.line_start * P'x')\match '\nx'
does not consume anything
assert.equals 2, (l.eol * l.line_start * Cp!)\match '\nx'
any(list)
the resulting pattern is an ordered match of any member of <list>
p = l.any { 'one', 'two' }
assert.is_not_nil p\match 'one'
assert.is_not_nil p\match 'two'
assert.is_nil p\match 'three'
<list> can be vararg arguments
p = l.any 'one', 'two'
assert.is_not_nil p\match 'two'
sequence(list)
the resulting pattern is a chained match of all members of <list>
p = l.sequence { 'one', 'two' }
assert.is_nil p\match 'one'
assert.is_nil p\match 'two'
assert.is_not_nil p\match 'onetwo'
assert.is_nil p\match 'Xonetwo'
<list> can be vararg arguments
p = l.sequence 'one', 'two'
assert.is_not_nil p\match 'onetwo'
word(list)
grammar = P {
V'word' + P(1) * V(1)
word: l.word { 'one', 'two2' }
}
returns a pattern who matches any word in <list>
assert.is_not_nil grammar\match 'one'
assert.is_not_nil grammar\match 'so one match'
assert.is_not_nil grammar\match '!one'
assert.is_not_nil grammar\match 'one()'
assert.is_not_nil grammar\match 'then two2,'
assert.is_nil grammar\match 'three'
only matches standalone words, not substring occurences
assert.is_nil grammar\match 'fone'
assert.is_nil grammar\match 'one2'
assert.is_nil grammar\match 'two2fold'
assert.is_nil grammar\match 'two2_fold'
accepts var arg parameters
assert.is_not_nil l.word('one', 'two')\match 'two'
separate(p)
returns a pattern that only matches if not part of a word
p = l.separate(P'foo')
assert.is_not_nil p\match 'foo'
assert.is_not_nil (l.blank * p)\match ' foo'
assert.is_not_nil (p)\match 'foo '
assert.is_not_nil (p)\match 'foo*'
assert.is_nil (l.alpha * p)\match 'xfoo '
assert.is_nil (P(1) * p)\match '_foo '
assert.is_nil p\match 'foox '
assert.is_nil p\match 'foo_ '
span(start_p, stop_p [, escape_p])
p = l.span('{', '}') * Cp!
matches and consumes from <start_p> up to and including <stop_p>
assert.equals 3, p\match '{}'
assert.equals 5, p\match '{xx}'
always considers <EOF> as an alternate stop marker
assert.equals 3, p\match '{x'
allows escaping <stop_p> with <escape_p>
p = l.span('{', '}', '\\') * Cp!
assert.equals 5, p\match '{\\}}'
paired(p, escape [, pair_style, content_style])
p = l.paired(1) * Cp!
matches and consumes from <p> up to and including the matching <p>
assert.equals 3, p\match '||x'
assert.equals 5, p\match '|xx|x'
always considers <EOF> as an alternate stop marker
assert.equals 3, p\match '|x'
allows escaping the end delimiter with <escape>
p = l.paired(1, '\\') * Cp!
assert.equals 5, p\match '|\\|| foo\\'
(when pair_style and content_style are specified)
captures the components in the specified styles
p = l.paired(1, nil, 'keyword', 'string')
expected = {
1, 'keyword', 2,
2, 'string', 5,
5, 'keyword', 6,
}
assert.same expected, { p\match '|foo|' }
still handles escapes properly
p = l.paired(1, '%', 'keyword', 'string')
expected = {
1, 'keyword', 2,
2, 'string', 6,
6, 'keyword', 7,
}
assert.same expected, { p\match '|f%|o|' }
back_was(name, value)
p = Cg(l.alpha^1, 'group') * ' ' * l.back_was('group', 'foo')
matches if the named capture <named> previously matched <value>
assert.is_not_nil p\match 'foo '
does not match if the named capture <named> did not match <value>
assert.is_nil p\match 'bar '
produces no captures
assert.equals 1, #{ p\match 'foo ' }
last_token_matches(pattern)
matches if the last non-blank token matches pattern
p = l.blank^0 * l.digit^1 * l.blank^0 * l.last_token_matches(l.digit)
assert.is_not_nil p\match '123 '
assert.is_not_nil p\match '123 \t '
assert.is_not_nil p\match ' 123 '
assert.is_not_nil p\match ' 1 '
assert.is_not_nil p\match '1 '
assert.is_not_nil p\match '1 '
assert.is_not_nil p\match ' 1'
match_back(name)
p = Cg(P'x', 'start') * 'y' * l.match_back('start')
matches the named capture given by <name>
assert.equals 4, p\match 'xyxzx'
produces no captures
assert.equals 1, #{ p\match 'xyxzx' }
scan_until(stop_p [, escape_p])
matches until the specified pattern or <EOF>
assert.equals 3, (l.scan_until('x') * Cp!)\match '12x'
assert.equals 4, (l.scan_until('x') * Cp!)\match '123'
allows escaping <stop_p> with <escape_p>
p = l.scan_until('}', '\\') * Cp!
assert.equals 4, p\match '{\\}}'
scan_to(stop_p [, escape_p])
matches until the specified pattern or <EOF>
assert.equals 4, (l.scan_to('x') * Cp!)\match '12x'
assert.equals 4, (l.scan_to('x') * Cp!)\match '123'
allows escaping <stop_p> with <escape_p>
p = l.scan_to('}', '\\') * Cp!
assert.equals 5, p\match '{\\}}'
scan_through_indented
p = P' ' * l.scan_through_indented! * Cp!
matches until the indentation is smaller or equal to the current line
assert.equals 4, p\match ' x\n y'
assert.equals 8, p\match ' x\n y\n z'
matches until eol if it can not find any line with smaller or equal indentation
assert.equals 7, p\match ' x\n y'
uses the indentation of the line containing eol if positioned right at it
p = l.eol * l.scan_through_indented! * Cp!
assert.equals 8, p\match ' x\n y\n z', 3
scan_until_capture(name, escape, [, halt_at, halt_at_N, ..])
matches until the named capture
p = Cg('x', 'start') * l.scan_until_capture('start')
assert.equals 4, p\match 'xyzx'
stops matching at any optional halt_at parameters
p = Cg('x', 'start') * l.scan_until_capture('start', nil, 'z')
assert.equals 3, p\match 'xyzx'
treats all stop parameters as strings and not patterns
p = Cg('x', 'start') * l.scan_until_capture('start', nil, '%w')
assert.equals 4, p\match 'xyz%w'
does not halt on escaped matches
p = Cg('x', 'start') * l.scan_until_capture('start', '\\', 'z')
assert.equals 7, p\match 'xy\\x\\zx'
matches until eof if no match is found
p = Cg('x', 'start') * l.scan_until_capture('start')
assert.equals 4, p\match 'xyz'
match_until(stop_p, p)
p = l.match_until('\n', C(l.alpha)) * Cp!
matches p until stop_p matches
assert.same { 'x', 'y', 'z', 4 }, { p\match 'xyz\nx' }
matches until eof if stop_p is not found
assert.same { 'x', 'y', 3 }, { p\match 'xy' }
complement(p)
matches if <p> does not match
assert.is_not_nil l.complement('a')\match 'b'
assert.is_nil l.complement('a')\match 'a'
assert.equals 3, (l.complement('a')^1 * Cp!)\match 'bca'
sub_lex_by_pattern(mode_p, mode_style, stop_p)
lexes any leading space followed by eol as extended whitespace
p = l.sub_lex_by_pattern(l.alpha^1, 'keyword', '>')
res = { p\match 'xx \n123>' }
assert.same {
1, 'keyword', 3,
3, 'default:whitespace', 5,
5, 'embedded', 8
}, res
(when no mode is found for the <mode_p> capture)
emits mode match styling and an embedded capture for the sub text
p = l.sub_lex_by_pattern(l.alpha^1, 'keyword', '>')
res = { p\match 'xx123>' }
assert.same {
1, 'keyword', 3,
3, 'embedded', 6
}, res
(when a mode matching the <mode_p> capture exists)
local p
before_each ->
sub_mode = lexer: l -> capture('number', digit^1)
mode.register name: 'dynsub', create: -> sub_mode
p = l.P'<' * l.sub_lex_by_pattern(l.alpha^1, 'keyword', '>')
after_each ->
mode.unregister 'dynsub'
emits mode match styling and rebasing instructions to the styler
assert.same {
2, 'keyword', 8,
8, {}, 'dynsub|embedded'
}, { p\match '<dynsub>' }
lexes the content using that mode's lexer until <stop_p>
assert.same {
2, 'keyword', 8,
8, { 1, 'number', 4 }, 'dynsub|embedded'
}, { p\match '<dynsub123>' }
sub_lex(mode_name, stop_p)
lexes any leading space followed by eol as extended whitespace
p = l.sub_lex('unknown', '>')
res = { p\match ' \n123>' }
assert.same {
1, 'default:whitespace', 3,
3, 'embedded', 6
}, res
(when no mode is found matching <mode_name>)
captures using the embedded style until stop_p
p = l.sub_lex('unknown', '>')
res = { p\match 'xx>' }
assert.same {1, 'embedded', 3}, res
(when a mode matching <mode_name> exists)
local p
before_each ->
sub_mode = lexer: l -> capture('number', digit^1)
mode.register name: 'sub', create: -> sub_mode
p = l.sub_lex('sub', '>')
after_each ->
mode.unregister 'sub'
sub_captures_for = (text) ->
res = { p\match text }
res[2]
emits rebasing instructions to the styler
assert.same { 1, {}, 'sub|embedded' }, { p\match '' }
lexes the content using that mode's lexer until <stop_p>
assert.same {1, 'number', 3}, sub_captures_for '12>'
lexes until EOF if <stop_p> is not found
assert.same {1, 'number', 3}, sub_captures_for '12'
sub_lex_by_lexer(name, base_style, lexer)
sub lexes using the provided lexer
sub_lexer = l -> capture('number', digit^1)
lexer = l -> sequence {
capture('keyword', 'x'),
sub_lex_by_lexer('string', l.eol, sub_lexer)
}
assert.same {
1, 'keyword', 2
2, { 1, 'number', 2 }, 'inline|string'
}, lexer('x2')
sub_lex_by_inline(base_style, match_p, pattern)
sub lexes the matched text using the provided pattern
lexer = l ->
sub_lexer = capture('number', digit^1)
sequence {
capture('keyword', 'x'),
sub_lex_by_inline('string', l.scan_until(l.eol), sub_lexer)
}
assert.same {
1, 'keyword', 2
2, { 1, 'number', 2 }, 'inline|string'
}, lexer('x2')
adds a zero width styling instruction at the end if needed
lexer = l ->
sub_lexer = capture('number', digit^1)
alpha * sub_lex_by_inline('string', l.scan_until(l.eol), sub_lexer)
assert.same {
2, {
1, 'number', 2,
3, 'whitespace', 3
}, 'inline|string'
}, lexer('x2x')
compose(base_mode, pattern)
returns a conjunction pattern with <pattern> and the mode pattern
base_mode = lexer: l -> capture('number', digit^1)
mode.register name: 'base_mode', create: -> base_mode
p = l.compose('base_mode', l.capture('override', l.alpha))^0
assert.same {
1, 'override', 2,
2, 'number', 3
}, { p\match 'a2' }
built-in lexing support
automatically lexes whitespace
lexer = l -> P'peace-and-quiet'
assert.same { 1, 'whitespace', 3 }, lexer ' \n'
automatically skips non-recognized tokens
lexer = l -> capture 'foo', P'foo'
assert.same { 2, 'foo', 5 }, lexer '|foo'