1
0
mirror of git://git.sv.gnu.org/coreutils.git synced 2026-04-20 18:56:39 +02:00

expr: add multibyte support

Discussed in https://bugs.gnu.org/26779 .

* NEWS: Mention the improvement.
* bootstrap.conf: Add gnulib modules mbslen,mbschr.
* src/expr.c (mbs_logical_substr): New function to return a substring
based on logical character positions (instead of bytes).
(mbs_logical_cspn): Similar to strcspn/mbscspn, but returns number of
logical characters instead of byte offset.
(mbs_offset_to_chars): New function to return number of logical
characters fitting in a given byte offset.
(docolon): Report matched logical characters instead of bytes.
(eval6): For length/substr/index operations, use logical characters
instead of bytes by calling the above new functions.
* tests/misc/expr.pl: Repeat all tests with non-C locale to detect any
regressions.
* tests/misc/expr-multibyte.pl: New tests with multibyte input.
* tests/local.mk: Add new test file.
This commit is contained in:
Assaf Gordon
2017-06-28 01:23:52 +00:00
parent e13fe20049
commit a9f2be5bfe
6 changed files with 410 additions and 18 deletions

View File

@@ -24,6 +24,10 @@ my $prog = 'expr';
# Turn off localization of executable's output.
@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
my $mb_locale = $ENV{LOCALE_FR_UTF8};
! defined $mb_locale || $mb_locale eq 'none'
and $mb_locale = 'C';
my $big = '98782897298723498732987928734';
my $big_p1 = '98782897298723498732987928735';
my $big_sum = '197565794597446997465975857469';
@@ -191,6 +195,22 @@ foreach $t (@Tests)
}
}
if ($mb_locale ne 'C')
{
# Duplicate each test vector, appending "-mb" to the test name and
# inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we
# provide coverage for the distro-added multi-byte code paths.
my @new;
foreach my $t (@Tests)
{
my @new_t = @$t;
my $test_name = shift @new_t;
push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}];
}
push @Tests, @new;
}
my $save_temps = $ENV{SAVE_TEMPS};
my $verbose = $ENV{VERBOSE};