org-vim/src/testdir/test_regexp_utf8.vim

" Tests for regexp in utf8 encoding

func s:equivalence_test()
  let str = "A������������ゾ���廎�廕�滯廕戶漲廕兕漯廕矜漁廕唸熔廕氮熄 B��廎��廎� C�������廎�� D���廎𢈈�廎墰�廎� E�������������廎竢�廎嘣�廎𦸇爾廕筵獐廕擔�廙��廙� F�廎嵴� G���罌�エオジ廎𥻗� H臚藻�廎Ｓ舅廎艮落廎芬惕 I����蘑蘆蘇蠔襤����廎矜葬廙�� J譬� K譯�ガ廎唸葡廎氯惟�� L贏躉躅醴��廎嗤虜廎筵蜈漹� M廎擔�廜� N����ゼ廜��廜���� O������������ギグヂ������廜𣉖�廜靟�廙𣉖�廙靟�廙竢�廙嘣�廙𦸇�廙�誥 P④廜竢�漹� Q� R������廜嘣�廜𦸇�漹曰𩵼 S�����廜�飾廜戶髡廜兩掛�� T籠聾臟⑵⑷��廜花僧廜桑僭 U����襯讀贗躓酈⑸��������廜聊僑廜嗤兢廜筵誘廙艮豪廙花賑廙桑趕  V⑻廜澄嗾 W霽廕�廕��廕�� X廕𢈈� Y�韃顫⑼��廕墰輔廙氮辣廙� Z饕驍鬚ⅰ廕靟�廕婙惚 a�獺璽瓊瓣疇�����ゥタ��廑𥕥�廕𠹳滿廕€漸廕扭漫廕後滬廕脊滷廕喔熊廕猾悼 b��廘矜�廎��廎� c癟������廎㕓��� d���廘凍�廑𤒹�廎溆�廎𤒹� e癡矇礙禱���������廑𠯆�廎䤾�廎𧘲�廕嫗獄廕賦瑪廙��廙�� f�廘桑�廎�� g��纂耀ォカス�廑�腹�� h艦藹�廎€艇廎扭萱廎後�漹刷� i穫穩簾簿藺蘋蘊蠕���禸廑𥕝葉廎脊�廙� j警ゴ� k譟�キ廑�萵廎喔葭漹虎� l贍躁躂���廎猿號廎頗蜇漹� m廘脊蛾廜�� n簽����ソ廘唸�廜��廜刅��� o簷籀繫繭繹繪���①�クケッ������伂廜溆�廜𤒹�廙溆�廙𤒹�廙鐧�廙䟭�廙鍅�廙﹥誨 p⑤廘晨善廑��廜� q�� r������刓廘聊絨廑刅�廜鍅��� s���禳��廘氮�廜﹥馳廜丟鳩廜抱恢 t籟聽襲⑴⑶��廘菲鼠廜凍僖廜晨�漹� u羅繳羶羹觼贖躑轡鑄鑒��⑹������廘擔�廜喔僱廜猿凳廜頗誑廙扭貍廙後賒廙脊跼 v�廑𣉖厭廜� w霾廕��廕��廕刅� x廕𦘒� y羸藩韁⑽��廕𥕥�廙喔輓廙猿遜 z驕髒鱉ⅱ廘嗤�廕𤒹�廕𨰝惇"
  let groups = split(str)
  for group1 in groups
      for c in split(group1, '\zs')
	" next statement confirms that equivalence class matches every
	" character in group
        call assert_match('^[[=' .. c .. '=]]*$', group1)
        for group2 in groups
          if group2 != group1
	    " next statement converts that equivalence class doesn't match
	    " character in any other group
            call assert_equal(-1, match(group2, '[[=' .. c .. '=]]'), c)
          endif
        endfor
      endfor
  endfor
endfunc

func Test_equivalence_re1()
  set re=1
  call s:equivalence_test()
  set re=0
endfunc

func Test_equivalence_re2()
  set re=2
  call s:equivalence_test()
  set re=0
endfunc

func s:classes_test()
  if has('win32')
    set iskeyword=@,48-57,_,192-255
  endif
  set isprint=@,161-255
  call assert_equal('Mot繹rhead', matchstr('Mot繹rhead', '[[:print:]]\+'))

  let alnumchars = ''
  let alphachars = ''
  let backspacechar = ''
  let blankchars = ''
  let cntrlchars = ''
  let digitchars = ''
  let escapechar = ''
  let graphchars = ''
  let lowerchars = ''
  let printchars = ''
  let punctchars = ''
  let returnchar = ''
  let spacechars = ''
  let tabchar = ''
  let upperchars = ''
  let xdigitchars = ''
  let identchars = ''
  let identchars1 = ''
  let kwordchars = ''
  let kwordchars1 = ''
  let fnamechars = ''
  let fnamechars1 = ''
  let i = 1
  while i <= 255
    let c = nr2char(i)
    if c =~ '[[:alpha:]]'
      let alphachars .= c
    endif
    if c =~ '[[:alnum:]]'
      let alnumchars .= c
    endif
    if c =~ '[[:backspace:]]'
      let backspacechar .= c
    endif
    if c =~ '[[:blank:]]'
      let blankchars .= c
    endif
    if c =~ '[[:cntrl:]]'
      let cntrlchars .= c
    endif
    if c =~ '[[:digit:]]'
      let digitchars .= c
    endif
    if c =~ '[[:escape:]]'
      let escapechar .= c
    endif
    if c =~ '[[:graph:]]'
      let graphchars .= c
    endif
    if c =~ '[[:lower:]]'
      let lowerchars .= c
    endif
    if c =~ '[[:print:]]'
      let printchars .= c
    endif
    if c =~ '[[:punct:]]'
      let punctchars .= c
    endif
    if c =~ '[[:return:]]'
      let returnchar .= c
    endif
    if c =~ '[[:space:]]'
      let spacechars .= c
    endif
    if c =~ '[[:tab:]]'
      let tabchar .= c
    endif
    if c =~ '[[:upper:]]'
      let upperchars .= c
    endif
    if c =~ '[[:xdigit:]]'
      let xdigitchars .= c
    endif
    if c =~ '[[:ident:]]'
      let identchars .= c
    endif
    if c =~ '\i'
      let identchars1 .= c
    endif
    if c =~ '[[:keyword:]]'
      let kwordchars .= c
    endif
    if c =~ '\k'
      let kwordchars1 .= c
    endif
    if c =~ '[[:fname:]]'
      let fnamechars .= c
    endif
    if c =~ '\f'
      let fnamechars1 .= c
    endif
    let i += 1
  endwhile

  call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alphachars)
  call assert_equal('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alnumchars)
  call assert_equal("\b", backspacechar)
  call assert_equal("\t ", blankchars)
  call assert_equal("\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0b\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\e\x1c\x1d\x1e\x1f\x7f", cntrlchars)
  call assert_equal("0123456789", digitchars)
  call assert_equal("\<Esc>", escapechar)
  call assert_equal('!"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~', graphchars)
  call assert_equal('abcdefghijklmnopqrstuvwxyz繕��獺璽瓊瓣疇疆癟癡矇礙禱穫穩簾簿簸簽簷籀繫繭繹繪羅繳羶羹羸臘藩', lowerchars)
  call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~�癒瞽瞿瞻瞼礎禮穡穢穠竄竅簫簧簪簞簣簡糧織繕繞繚繡繒繙罈翹翻職聶���������������������������������獺璽瓊瓣疇疆癟癡矇礙禱穫穩簾簿簸簽簷籀繫繭繹繩繪羅繳羶羹羸臘藩', printchars)
  call assert_equal('!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~', punctchars)
  call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZ������������������������������', upperchars)
  call assert_equal("\r", returnchar)
  call assert_equal("\t\n\x0b\f\r ", spacechars)
  call assert_equal("\t", tabchar)
  call assert_equal('0123456789ABCDEFabcdef', xdigitchars)

  if has('win32')
    let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz���������������������������������癒瞽瞿瞻瞼礎禮繕��������������������������������獺璽瓊瓣疇疆癟癡矇礙禱穫穩簾簿簸簽簷籀繫繭繹繪羅繳羶羹羸臘藩'
    let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz繕���������������������������������獺璽瓊瓣疇疆癟癡矇礙禱穫穩簾簿簸簽簷籀繫繭繹繩繪羅繳羶羹羸臘藩'
  else
    let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz繕���������������������������������獺璽瓊瓣疇疆癟癡矇礙禱穫穩簾簿簸簽簷籀繫繭繹繩繪羅繳羶羹羸臘藩'
    let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz繕���������������������������������獺璽瓊瓣疇疆癟癡矇礙禱穫穩簾簿簸簽簷籀繫繭繹繩繪羅繳羶羹羸臘藩'
  endif

  if has('win32')
    let fnamechars_ok = '!#$%+,-./0123456789:=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]_abcdefghijklmnopqrstuvwxyz{}~�癒瞽瞿瞻瞼礎禮穡穢穠竄竅簫簧簪簞簣簡糧織繕繞繚繡繒繙罈翹翻職聶���������������������������������獺璽瓊瓣疇疆癟癡矇礙禱穫穩簾簿簸簽簷籀繫繭繹繩繪羅繳羶羹羸臘藩'
  elseif has('amiga')
    let fnamechars_ok = '$+,-./0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~�癒瞽瞿瞻瞼礎禮穡穢穠竄竅簫簧簪簞簣簡糧織繕繞繚繡繒繙罈翹翻職聶���������������������������������獺璽瓊瓣疇疆癟癡矇礙禱穫穩簾簿簸簽簷籀繫繭繹繩繪羅繳羶羹羸臘藩'
  elseif has('vms')
    let fnamechars_ok = '#$%+,-./0123456789:;<>ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~�癒瞽瞿瞻瞼礎禮穡穢穠竄竅簫簧簪簞簣簡糧織繕繞繚繡繒繙罈翹翻職聶���������������������������������獺璽瓊瓣疇疆癟癡矇礙禱穫穩簾簿簸簽簷籀繫繭繹繩繪羅繳羶羹羸臘藩'
  else
    let fnamechars_ok = '#$%+,-./0123456789=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~�癒瞽瞿瞻瞼礎禮穡穢穠竄竅簫簧簪簞簣簡糧織繕繞繚繡繒繙罈翹翻職聶���������������������������������獺璽瓊瓣疇疆癟癡矇礙禱穫穩簾簿簸簽簷籀繫繭繹繩繪羅繳羶羹羸臘藩'
  endif

  call assert_equal(identchars_ok, identchars)
  call assert_equal(kwordchars_ok, kwordchars)
  call assert_equal(fnamechars_ok, fnamechars)

  call assert_equal(identchars1, identchars)
  call assert_equal(kwordchars1, kwordchars)
  call assert_equal(fnamechars1, fnamechars)
endfunc

func Test_classes_re1()
  set re=1
  call s:classes_test()
  set re=0
endfunc

func Test_classes_re2()
  set re=2
  call s:classes_test()
  set re=0
endfunc

func Test_reversed_range()
  for re in range(0, 2)
    exe 'set re=' . re
    call assert_fails('call match("abc def", "[c-a]")', 'E944:', re)
  endfor
  set re=0
endfunc

func Test_large_class()
  set re=1
  call assert_fails('call match("abc def", "[\u3000-\u4000]")', 'E945:')
  set re=2
  call assert_equal(0, 'abc def' =~# '[\u3000-\u4000]')
  call assert_equal(1, "\u3042" =~# '[\u3000-\u4000]')
  set re=0
endfunc

func Test_optmatch_toolong()
  set re=1
  " Can only handle about 8000 characters.
  let pat = '\\%[' .. repeat('x', 9000) .. ']'
  call assert_fails('call match("abc def", "' .. pat .. '")', 'E339:')
  set re=0
endfunc

" Test for regexp patterns with multi-byte support, using utf-8.
func Test_multibyte_chars()
  " tl is a List of Lists with:
  "    2: test auto/old/new  0: test auto/old  1: test auto/new
  "    regexp pattern
  "    text to test the pattern on
  "    expected match (optional)
  "    expected submatch 1 (optional)
  "    expected submatch 2 (optional)
  "    etc.
  "  When there is no match use only the first two items.
  let tl = []

  " Multi-byte character tests.
  call add(tl, [2, '[[:alpha:][=a=]]\+', '879 aia瓊璽aiuvna ', 'aia瓊璽aiuvna'])
  call add(tl, [2, '[[=a=]]\+', 'dda瓊璽bcd', 'a瓊璽'])								" equivalence classes
  call add(tl, [2, '[^鉊� ]\+', '鉊﹤腹 oijasoifjos ifjoisj f osij j 鉊﹤腹鉊﹤腹鉊� abcd', 'oijasoifjos'])
  call add(tl, [2, ' [^ ]\+', 'start 鉊｛bcd鉊� ', ' 鉊｛bcd鉊�'])
  call add(tl, [2, '[鉊︴:alpha:][=a=]]\+', '879 aia瓊鉊￣Ｒ腹aiuvna ', 'aia瓊鉊￣Ｒ腹aiuvna'])

  " this is not a normal "i" but 0xec
  call add(tl, [2, '\p\+', '穫a', '穫a'])
  call add(tl, [2, '\p*', 'a��', 'a��'])

  " Test recognition of some character classes
  call add(tl, [2, '\i\+', '&*穡xx ', 'xx'])
  call add(tl, [2, '\f\+', '&*�fname ', 'fname'])

  " Test composing character matching
  call add(tl, [2, '.鉊�', 'x鉊﹤�x y鉊『', 'y鉊�'])
  call add(tl, [2, '.鉊﹤�', 'x鉊﹤�x y鉊『', 'x鉊﹤�'])
  call add(tl, [2, "\u05b9", " x\u05b9 ", "x\u05b9"])
  call add(tl, [2, ".\u05b9", " x\u05b9 ", "x\u05b9"])
  call add(tl, [2, "\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
  call add(tl, [2, ".\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
  call add(tl, [2, "\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
  call add(tl, [2, ".\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
  call add(tl, [2, "\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
  call add(tl, [2, ".\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
  call add(tl, [2, "\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
  call add(tl, [2, ".\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
  call add(tl, [1, "\u05b9\u05bb", " y\u05b9 x\u05b9\u05bb ", "x\u05b9\u05bb"])
  call add(tl, [2, ".\u05b9\u05bb", " y\u05bb x\u05b9\u05bb ", "x\u05b9\u05bb"])
  call add(tl, [2, "a", "ca\u0300t"])
  call add(tl, [2, "ca", "ca\u0300t"])
  call add(tl, [2, "a\u0300", "ca\u0300t", "a\u0300"])
  call add(tl, [2, 'a\%C', "ca\u0300t", "a\u0300"])
  call add(tl, [2, 'ca\%C', "ca\u0300t", "ca\u0300"])
  call add(tl, [2, 'ca\%Ct', "ca\u0300t", "ca\u0300t"])

  " Test \Z
  call add(tl, [2, '繳\Z', 'x'])
  call add(tl, [2, '����\Z', '����', '����'])
  call add(tl, [2, '�眥��硌�\Z', '����', '����'])
  call add(tl, [2, '����\Z', '�眥��硌�', '�眥��硌�'])
  call add(tl, [2, '�眥��硌�\Z', '�眥��硌�', '�眥��硌�'])
  call add(tl, [2, '�眥\Z', '�眥�硊', '�硊'])
  call add(tl, [2, "蚹\u200d\u05b9x\\Z", "x蚹\u200d\u05b9xy", "蚹\u200d\u05b9x"])
  call add(tl, [2, "蚹\u200d\u05b9x\\Z", "x蚹\u200dxy", "蚹\u200dx"])
  call add(tl, [2, "蚹\u200dx\\Z", "x蚹\u200d\u05b9xy", "蚹\u200d\u05b9x"])
  call add(tl, [2, "蚹\u200dx\\Z", "x蚹\u200dxy", "蚹\u200dx"])
  call add(tl, [2, "\u05b9\\Z", "xyz"])
  call add(tl, [2, "\\Z\u05b9", "xyz"])
  call add(tl, [2, "\u05b9\\Z", "xy\u05b9z", "y\u05b9"])
  call add(tl, [2, "\\Z\u05b9", "xy\u05b9z", "y\u05b9"])
  call add(tl, [1, "\u05b9\\+\\Z", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"])
  call add(tl, [1, "\\Z\u05b9\\+", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"])

  " Combining different tests and features
  call add(tl, [2, '[^[=a=]]\+', 'dda瓊璽bcd', 'dd'])

  " Run the tests
  for t in tl
    let re = t[0]
    let pat = t[1]
    let text = t[2]
    let matchidx = 3
    for engine in [0, 1, 2]
      if engine == 2 && re == 0 || engine == 1 && re == 1
        continue
      endif
      let &regexpengine = engine
      try
        let l = matchlist(text, pat)
      catch
        call assert_report('Error ' . engine . ': pat: \"' . pat .
		    \ '\", text: \"' . text .
		    \ '\", caused an exception: \"' . v:exception . '\"')
      endtry
      " check the match itself
      if len(l) == 0 && len(t) > matchidx
        call assert_report('Error ' . engine . ': pat: \"' . pat .
		    \ '\", text: \"' . text .
		    \ '\", did not match, expected: \"' . t[matchidx] . '\"')
      elseif len(l) > 0 && len(t) == matchidx
        call assert_report('Error ' . engine . ': pat: \"' . pat .
		    \ '\", text: \"' . text . '\", match: \"' . l[0] .
		    \ '\", expected no match')
      elseif len(t) > matchidx && l[0] != t[matchidx]
        call assert_report('Error ' . engine . ': pat: \"' . pat .
		    \ '\", text: \"' . text . '\", match: \"' . l[0] .
		    \ '\", expected: \"' . t[matchidx] . '\"')
      else
        " Test passed
      endif
      if len(l) > 0
        " check all the nine submatches
        for i in range(1, 9)
          if len(t) <= matchidx + i
            let e = ''
          else
            let e = t[matchidx + i]
          endif
          if l[i] != e
            call assert_report('Error ' . engine . ': pat: \"' . pat .
                  \ '\", text: \"' . text . '\", submatch ' . i .
                  \ ': \"' . l[i] . '\", expected: \"' . e . '\"')
          endif
        endfor
        unlet i
      endif
    endfor
  endfor
  set regexpengine&
endfunc

" check that 'ambiwidth' does not change the meaning of \p
func Test_regexp_ambiwidth()
  set regexpengine=1 ambiwidth=single
  call assert_equal(0, match("\u00EC", '\p'))
  set regexpengine=1 ambiwidth=double
  call assert_equal(0, match("\u00EC", '\p'))
  set regexpengine=2 ambiwidth=single
  call assert_equal(0, match("\u00EC", '\p'))
  set regexpengine=2 ambiwidth=double
  call assert_equal(0, match("\u00EC", '\p'))
  set regexpengine& ambiwidth&
endfunc

func Run_regexp_ignore_case()
  call assert_equal('iI襤', substitute('iI襤', '\([iI襤]\)', '\1', 'g'))

  call assert_equal('iIx', substitute('iI襤', '\c\([襤]\)', 'x', 'g'))
  call assert_equal('xx襤', substitute('iI襤', '\(i\c\)', 'x', 'g'))
  call assert_equal('iIx', substitute('iI襤', '\(襤\c\)', 'x', 'g'))
  call assert_equal('iIx', substitute('iI襤', '\c\(\%u0130\)', 'x', 'g'))
  call assert_equal('iIx', substitute('iI襤', '\c\([\u0130]\)', 'x', 'g'))
  call assert_equal('iIx', substitute('iI襤', '\c\([\u012f-\u0131]\)', 'x', 'g'))
endfunc

func Test_regexp_ignore_case()
  set regexpengine=1
  call Run_regexp_ignore_case()
  set regexpengine=2
  call Run_regexp_ignore_case()
  set regexpengine&
endfunc

" Tests for regexp with multi-byte encoding and various magic settings
func Run_regexp_multibyte_magic()
  let text =<< trim END
    1 a aa abb abbccc
    2 d dd dee deefff
    3 g gg ghh ghhiii
    4 j jj jkk jkklll
    5 m mm mnn mnnooo
    6 x ^aa$ x
    7 (a)(b) abbaa
    8 axx [ab]xx
    9 鉊徇腹鉆ū 鉊冢腹x
    a 鉊冢腹x 鉊徇腹鉆ū
    b �～��具�
    c x 竅�洪
    d 憭拐蝙x
    e ��烵y
    f ���z
    g a�搓b
    j 0123�巳
    k combinations
    l 瓣繹 羹帢���
  END

  new
  call setline(1, text)
  exe 'normal /a*b\{2}c\+/e' .. "\<CR>x"
  call assert_equal('1 a aa abb abbcc', getline('.'))
  exe 'normal /\Md\*e\{2}f\+/e' .. "\<CR>x"
  call assert_equal('2 d dd dee deeff', getline('.'))
  set nomagic
  exe 'normal /g\*h\{2}i\+/e' .. "\<CR>x"
  call assert_equal('3 g gg ghh ghhii', getline('.'))
  exe 'normal /\mj*k\{2}l\+/e' .. "\<CR>x"
  call assert_equal('4 j jj jkk jkkll', getline('.'))
  exe 'normal /\vm*n{2}o+/e' .. "\<CR>x"
  call assert_equal('5 m mm mnn mnnoo', getline('.'))
  exe 'normal /\V^aa$/' .. "\<CR>x"
  call assert_equal('6 x aa$ x', getline('.'))
  set magic
  exe 'normal /\v(a)(b)\2\1\1/e' .. "\<CR>x"
  call assert_equal('7 (a)(b) abba', getline('.'))
  exe 'normal /\V[ab]\(\[xy]\)\1' .. "\<CR>x"
  call assert_equal('8 axx ab]xx', getline('.'))

  " search for multi-byte without composing char
  exe 'normal /鉊�' .. "\<CR>x"
  call assert_equal('9 鉊徇腹鉆ū 鉊軍', getline('.'))

  " search for multi-byte with composing char
  exe 'normal /鉊﹤�' .. "\<CR>x"
  call assert_equal('a 鉊冢腹x 鉊咦', getline('.'))

  " find word by change of word class
  exe 'normal /�﹏<�怒�\>��' .. "\<CR>x"
  call assert_equal('b �怒���', getline('.'))

  " Test \%u, [\u] and friends
  " c
  exe 'normal /\%u20ac' .. "\<CR>x"
  call assert_equal('c x 竅x', getline('.'))
  " d
  exe 'normal /[\u4f7f\u5929]\+' .. "\<CR>x"
  call assert_equal('d 雿篛', getline('.'))
  " e
  exe 'normal /\%U12345678' .. "\<CR>x"
  call assert_equal('e y', getline('.'))
  " f
  exe 'normal /[\U1234abcd\u1234\uabcd]' .. "\<CR>x"
  call assert_equal('f z', getline('.'))
  " g
  exe 'normal /\%d21879b' .. "\<CR>x"
  call assert_equal('g abb', getline('.'))

  " j Test backwards search from a multi-byte char
  exe "normal /x\<CR>x?.\<CR>x"
  call assert_equal('j 012��', getline('.'))
  " k
  let @w=':%s#comb[i]nations#��廜␌�怚�帢���#g'
  @w
  call assert_equal('k ��廜␌�怚�帢���', getline(18))

  close!
endfunc

func Test_regexp_multibyte_magic()
  set regexpengine=1
  call Run_regexp_multibyte_magic()
  set regexpengine=2
  call Run_regexp_multibyte_magic()
  set regexpengine&
endfunc

" Test for 7.3.192
" command ":s/ \?/ /g" splits multi-byte characters into bytes
func Test_split_multibyte_to_bytes()
  new
  call setline(1, 'l 瓣繹 羹帢���')
  s/ \?/ /g
  call assert_equal(' l 瓣 繹 羹 帢���', getline(1))
  close!
endfunc

" Test for matchstr() with multibyte characters
func Test_matchstr_multibyte()
  new
  call assert_equal('�', matchstr("����", ".", 0, 2))
  call assert_equal('��', matchstr("����", "..", 0, 2))
  call assert_equal('�', matchstr("����", ".", 0, 0))
  call assert_equal('�', matchstr("����", ".", 4, -1))
  close!
endfunc

" Test for 7.4.636
" A search with end offset gets stuck at end of file.
func Test_search_with_end_offset()
  new
  call setline(1, ['', 'dog(a', 'cat('])
  exe "normal /(/e+\<CR>"
  normal n"ayn
  call assert_equal("a\ncat(", @a)
  close!
endfunc

" Check that "^" matches even when the line starts with a combining char
func Test_match_start_of_line_combining()
  new
  call setline(1, ['', "\u05ae", ''])
  exe "normal gg/^\<CR>"
  call assert_equal(2, getcurpos()[1])
  bwipe!
endfunc

" Check that [[:upper:]] matches for automatic engine
func Test_match_char_class_upper()
  new

  " Test 1: [[:upper:]]\{2,\}
  set regexpengine=0
  call setline(1, ['05. ��苤�觓 � ����觓苭 邾�郱. �. �訄赲邽迡迮郇郕郋, �. �郋赲訄郅� 邽 �. 虼迮��迮�訄 ...', '05. PJESNJA O GJEROJAKH mus. A. Davidjenko, M. Kovalja i B. Shjekhtjera ...'])
  call cursor(1,1)
  let search_cmd='norm /\<[[:upper:]]\{2,\}\>' .. "\<CR>"
  exe search_cmd
  call assert_equal(4, searchcount().total, 'TEST 1')
  set regexpengine=1
  exe search_cmd
  call assert_equal(2, searchcount().total, 'TEST 1')
  set regexpengine=2
  exe search_cmd
  call assert_equal(4, searchcount().total, 'TEST 1')

  " Test 2: [[:upper:]].\+
  let search_cmd='norm /\<[[:upper:]].\+\>' .. "\<CR>"
  set regexpengine=0
  exe search_cmd
  call assert_equal(2, searchcount().total, 'TEST 2')
  set regexpengine=1
  exe search_cmd
  call assert_equal(1, searchcount().total, 'TEST 2')
  set regexpengine=2
  exe search_cmd
  call assert_equal(2, searchcount().total, 'TEST 2')

  " Test 3: [[:lower:]]\+
  let search_cmd='norm /\<[[:lower:]]\+\>' .. "\<CR>"
  set regexpengine=0
  exe search_cmd
  call assert_equal(4, searchcount().total, 'TEST 3 lower')
  set regexpengine=1
  exe search_cmd
  call assert_equal(2, searchcount().total, 'TEST 3 lower')
  set regexpengine=2
  exe search_cmd
  call assert_equal(4, searchcount().total, 'TEST 3 lower')

  " clean up
  set regexpengine=0
  bwipe!
endfunc

func Test_match_invalid_byte()
  call writefile(0z630a.765d30aa0a.2e0a.790a.4030, 'Xinvalid', 'D')
  new
  source Xinvalid
  bwipe!
endfunc

func Test_match_illegal_byte()
  " Text has illegal bytes which need to be set explicitly
  let lines = ["norm :set no\x01\<CR>", "silent n\xff", "silent norm :b\xff\<CR>"]
  call writefile(lines, 'Xregexp', 'D')
  call system(GetVimCommand() .. ' -X -Z -e -s -S Xregexp -c qa!')
endfunc

func Test_match_too_complicated()
  set regexpengine=1
  exe "noswapfile vsplit \xeb\xdb\x99"
  silent! buf \&\zs*\zs*0
  bwipe!
  set regexpengine=0
endfunc

func Test_combining_chars_in_collection()
  new
  for i in range(0,2)
    exe "set re=".i
    put =['��', '�',  '�  a�', 'abcd']
    :%s/[��]//
    call assert_equal(['', '', '�', '�  a�', 'abcd'], getline(1,'$'))
    %d
  endfor
  bw!
endfunc

func Test_search_multibyte_match_ascii()
  new
  " Match single '鰱' and 's'
  call setline(1,  'das abc heraus abc 鰱ich abc 鰱ind')
  for i in range(0, 2)
    exe "set re="..i
    let ic_match = matchbufline('%', '\c\%u17f', 1, '$')->mapnew({idx, val -> val.text})
    let noic_match = matchbufline('%', '\C\%u17f', 1, '$')->mapnew({idx, val -> val.text})
    call assert_equal(['s', 's', '鰱','鰱'], ic_match, "Ignorecase Regex-engine: " .. &re)
    call assert_equal(['鰱','鰱'], noic_match, "No-Ignorecase Regex-engine: " .. &re)
  endfor
  " Match several '鰱鰱' and 'ss'
  call setline(1,  'das abc herauss abc 鰱鰱ich abc 鰱ind')
  for i in range(0, 2)
    exe "set re="..i
    let ic_match = matchbufline('%', '\c\%u17f\%u17f', 1, '$')->mapnew({idx, val -> val.text})
    let noic_match = matchbufline('%', '\C\%u17f\%u17f', 1, '$')->mapnew({idx, val -> val.text})
    let ic_match2 = matchbufline('%', '\c\%u17f\+', 1, '$')->mapnew({idx, val -> val.text})
    let noic_match2 = matchbufline('%', '\C\%u17f\+', 1, '$')->mapnew({idx, val -> val.text})
    let ic_match3 = matchbufline('%', '\c[\u17f]\+', 1, '$')->mapnew({idx, val -> val.text})
    let noic_match3 = matchbufline('%', '\C[\u17f]\+', 1, '$')->mapnew({idx, val -> val.text})

    call assert_equal(['ss', '鰱鰱'], ic_match, "Ignorecase Regex-engine: " .. &re)
    call assert_equal(['鰱鰱'], noic_match, "No-Ignorecase Regex-engine: " .. &re)
    call assert_equal(['s', 'ss', '鰱鰱', '鰱'], ic_match2, "Ignorecase Regex-engine: " .. &re)
    call assert_equal(['鰱鰱','鰱'], noic_match2, "No-Ignorecase Regex-engine: " .. &re)
    call assert_equal(['s', 'ss', '鰱鰱', '鰱'], ic_match3, "Ignorecase Collection Regex-engine: " .. &re)
    call assert_equal(['鰱鰱','鰱'], noic_match3, "No-Ignorecase Collection Regex-engine: " .. &re)
  endfor
  bw!
endfunc

" vim: shiftwidth=2 sts=2 expandtab