howl.ustrings

.ulen holds the number characters in the string

assert.equal 3, ('foo').ulen
assert.equal 3, ('åäö').ulen

.ulower is a lower cased version of the string

assert.equal 'abcåäö', ('aBCåÄÖ').ulower

.uupper is a upper cased version of the string

assert.equal 'ABCÅÄÖ', ('abcåäö').uupper

.ureverse is a reversed version of the string

assert.equal 'abcåäö', ('öäåcba').ureverse

.multibyte is true if the string contains multibyte characters

assert.is_false ('foo').multibyte
assert.is_true ('åäö').multibyte

.is_empty is true for the empty string

assert.is_true ('').is_empty
assert.is_false (' ').is_empty

.is_blank is true for a string that is empty or only contains whitespace

assert.is_true ('\t\r\n').is_blank
assert.is_false ('x').is_blank

.stripped contains the string without leading or trailing whitespace

assert.equal 'foo', ('  \tfoo').stripped
assert.equal 'foo', ('foo ').stripped
assert.equal 'foo', ('  \tfoo ').stripped
assert.equal '', ('  \t').stripped
assert.equal '', ('').stripped

.is_valid_utf8 is true for valid utf8 strings only

assert.is_true ('abc\194\128').is_valid_utf8
assert.is_true ('\127').is_valid_utf8
assert.is_false ('\128').is_valid_utf8
assert.is_false ('abc\194').is_valid_utf8

ucompare(s1, s2) returns negative, 0 or positive if s1 is smaller, equal or greater than s2

assert.is_true 'a'\ucompare('b') < 0
assert.is_true 'a'\ucompare('ä') < 0
assert.equal 0, 'a'\ucompare('a')
assert.is_true 'ö'\ucompare('ä') > 0

starts_with(s) returns true if the string starts with the specified string

assert.is_true 'foobar'\starts_with 'foo'
assert.is_true 'foobar'\starts_with 'foobar'
assert.is_false 'foobar'\starts_with 'foobarx'
assert.is_false 'foobar'\starts_with '.oo'

ends_with(s) returns true if the string ends with the specified string

assert.is_true 'foobar'\ends_with 'bar'
assert.is_true 'foobar'\ends_with 'foobar'
assert.is_false 'foobar'\ends_with 'barx'
assert.is_false 'foobar'\ends_with '.ar'

contains(s) returns true if the string contains the specified string

assert.is_true 'foobar'\contains 'foobar'
assert.is_true 'foobar'\contains 'bar'
assert.is_true 'foobar'\contains 'foo'
assert.is_true 'foobar'\contains 'oba'
assert.is_false 'foobar'\contains 'arx'
assert.is_false 'foobar'\contains 'xfo'
assert.is_false 'foobar'\contains '.'

.is_likely_binary

is true for binary strings

assert.is_true ('\000\003xksj').is_likely_binary

is false for ordinary ASCII

assert.is_false ('abcDEFGHZ!"#¤%&//()"').is_likely_binary

is false for ordinary UTF-8

assert.is_false ('abc\194\128').is_likely_binary
assert.is_false ('åäöÅÄÖ').is_likely_binary

usub(i, [j])

s = 'aåäöx'

operates on characters instead of bytes

assert.equal 'aåä', s\usub 1, 3
assert.equal 'aåä', s\usub(1, 3)

adjusts the indexes similarily to string.sub

assert.equal 'äöx', s\usub 3 -- j defaults to -1
assert.equal 'öx', s\usub -2 -- i counts from back
assert.equal 'aåäöx', s\usub -7 -- is corrected to 1
assert.equal 'aåäöx', s\usub 1, 123 -- j is corrected to last character
assert.equal '', s\usub 3, 2 -- empty string when i < j

character access using indexing notation

single character strings can be accessed using indexing notation

s = 'aåäöx'
assert.equal 'a', s[1]
assert.equal 'ä', s[3]

accesses using invalid indexes returns an empty string

s = 'abc'
assert.equal '', s[0]
assert.equal '', s[4]

the index can be negative similarily to sub()

s = 'aåäöx'
assert.equal 'ä', s[-3]

umatch(pattern [, init])

init specifies a character offset

assert.same { 'ö', 4 }, { 'äåö'\umatch '(%S+)()', 3 }

if init is greater than the length nil is returned

assert.is_nil '1'\umatch '1', 2

accepts regex patterns

assert.same {'ö'}, { '/ö'\umatch r'\\p{L}'}

ugmatch(pattern)

returns character offsets instead of byte offsets

s = 'föo bãr'
gen = s\ugmatch '(%S+)()'
rets = {}
while true
  vals = { gen! }
  break if #vals == 0
  append rets, vals

assert.same { { 'föo', 4 }, { 'bãr', 8 } }, rets

accepts regex patterns

s = 'well hello there'
matches = [m for m in s\ugmatch r'\\w+']
assert.same { 'well', 'hello', 'there' }, matches

ufind(pattern [, init [, plain]])

returns character offsets instead of byte offsets

assert.same { 2, 4, 5 }, { 'ä öx'\ufind '%s.+x()' }

adjust middle-of-sequence position returns to character start

assert.same { 1, 1 }, { 'äöx'\ufind '%S' }

init specifies a character offset

assert.same { 3, 3, 'ö' }, { 'äåö'\ufind '(%S+)', 3 }

if init is greater than the length nil is returned

assert.is_nil '1'\ufind '1', 2

accepts regexes

assert.same { 2, 2 }, { '!ä öx'\ufind r'\\pL' }

returns empty match at init for empty string

assert.same { 2, 1 }, { 'abc'\ufind '', 2 }

converts position matches correctly

assert.same { 1, 3, 1, 4 }, { 'åäö'\ufind '()%S+()' }

rfind(pattern [, init])

searches backward from end using byte offsets

assert.same { 5, 6 }, { 'äöxx'\rfind 'xx' }

searches backward from init, when provided

assert.same { 5, 5 }, { 'äöxxx'\rfind 'x', 5 }

urfind(text [, init])

searches backwards from end using char offsets

assert.same { 4, 5 }, { 'äöxäöx'\urfind 'äö' }
assert.same { 3, 6 }, { 'äöxböx'\urfind 'xböx' }
assert.same { 1, 3 }, { 'äöxböx'\urfind 'äöx' }

returns nothing for no matches

assert.same {}, { 'hello'\urfind 'x' }

searches backwards from init, when provided

assert.same { 1, 2 }, { 'äöxäöx'\urfind 'äö', 4 }
assert.same { 1, 2 }, { 'äöxäöx'\urfind 'äö', -3 }
assert.same { 4, 5 }, { 'äöxäöx'\urfind 'äö', 5 }
assert.same { 4, 5 }, { 'äöxäöx'\urfind 'äö', -2 }

matches text entirely before init

assert.same {1, 2}, { 'abcabc'\urfind 'ab', 4 }

returns empty match before init for empty string

assert.same { 2, 1 }, { 'abc'\urfind '', 2 }

count(s, pattern = false)

returns the number of occurences of s within the string

assert.equal 1, 'foobar'\count 'foo'
assert.equal 2, 'foobar'\count 'o'
assert.equal 0, 'foobar'\count 'x'

s is evaluated as a pattern if <pattern> is true

assert.equal 3, 'foo'\count('%w', true)
assert.equal 2, 'foobar'\count(r'[ab]', true)

s is evaluated as a pattern if it is a regex, regardless of <pattern>

assert.equal 2, 'foobar'\count(r'[ab]')

byte_offset(...)

returns byte offsets for all character offsets passed as parameters

assert.same {1, 3, 5, 7}, { 'äåö'\byte_offset 1, 2, 3, 4 }

accepts non-increasing offsets

assert.same {1, 1}, { 'ab'\byte_offset 1, 1 }

raises an error for decreasing offsets

assert.raises 'Decreasing offset', -> 'äåö'\byte_offset 2, 1

raises error for out-of-bounds offsets

assert.raises 'out of bounds', -> 'äåö'\byte_offset 5
assert.raises 'offset', -> 'äåö'\byte_offset 0
assert.raises 'offset', -> 'a'\byte_offset -1

when parameters is a table, it returns a table for all offsets within that table

assert.same {1, 3, 5}, 'äåö'\byte_offset { 1, 2, 3 }

char_offset(...)

returns character offsets for all byte offsets passed as parameters

assert.same {1, 2, 3, 4}, { 'äåö'\char_offset 1, 3, 5, 7 }

accepts non-increasing offsets

assert.same {2, 2}, { 'ab'\char_offset 2, 2 }

raises an error for decreasing offsets

assert.raises 'Decreasing offset', -> 'äåö'\char_offset 3, 1

raises error for out-of-bounds offsets

assert.raises 'out of bounds', -> 'ab'\char_offset 4
assert.raises 'offset', -> 'äåö'\char_offset 0
assert.raises 'offset', -> 'a'\char_offset -1

when parameters is a table, it returns a table for all offsets within that table

assert.same {1, 2, 3, 4}, 'äåö'\char_offset { 1, 3, 5, 7 }

truncate(len, opts = {})

truncates long strings to at most <len> chars

s = 'åäöñÅÄÖåäö'
assert.equal 'åäöñ..', s\truncate(6)
assert.equal 'åäöñ[..]', s\truncate(8, omission_suffix: '[..]')
assert.equal 'åäö<Ə>', s\truncate(6, omission_suffix: '<Ə>')

assert.equal '..ÅÄÖåäö', s\truncate(8, omission_prefix: '..')
assert.equal '[..]Öåäö', s\truncate(8, omission_prefix: '[..]')
assert.equal '<Ə>ÄÖåäö', s\truncate(8, omission_prefix: '<Ə>')

does not truncate unless needed

s = 'åäöåäö'
assert.equal 'åäöåäö', s\truncate(7)
assert.equal 'åäöåäö', s\truncate(6)
assert.equal 'åäöåäö', s\truncate(7, omission_prefix: '..')
assert.equal 'åäöåäö', s\truncate(6, omission_prefix: '..')

skips the omission if the result would go beyond <len>

s = 'åäö'
assert.equal 'åä', s\truncate(2, omission_suffix: '[..]')
assert.equal '..', s\truncate(2, omission_suffix: '..')
assert.equal 'å', s\truncate(1, omission_suffix: '..')

assert.equal 'äö', s\truncate(2, omission_prefix: '[..]')
assert.equal '..', s\truncate(2, omission_prefix: '..')
assert.equal 'ö', s\truncate(1, omission_prefix: '..')

split(pattern)

splits the string by <pattern>

assert.same { '1' }, ('1')\split(',')
assert.same { '1', '2', '3' }, ('1,2,3')\split(',')
assert.same { '1', '2', '' }, ('1,2,')\split(',')
assert.same { '', '' }, (',')\split(',')

treats <pattern> as a lua pattern

assert.same { 'x', 'y', 'z' }, ('x.y,z')\split('[.,]')