mirror of
git://git.sv.gnu.org/coreutils.git
synced 2026-04-19 10:15:48 +02:00
fmt: fix invalid multi-byte splitting on macOS
On macOS, isspace(0x85) returns true, which results in splitting within multi-byte characters. * src/fmt.c (get_line): s/isspace/c_isspace/. * tests/fmt/non-space.sh: Add a new test. * tests/local.mk: Reference new test. * NEWS: Mention the fix. Addresses https://bugs.gnu.org/54124
This commit is contained in:
4
NEWS
4
NEWS
@@ -21,6 +21,10 @@ GNU coreutils NEWS -*- outline -*-
|
||||
and B is in some other file system.
|
||||
[bug introduced in coreutils-9.0]
|
||||
|
||||
On macOS, fmt no longer corrupts multi-byte characters
|
||||
by misdetecting their component bytes as spaces.
|
||||
[This bug was present in "the beginning".]
|
||||
|
||||
'id xyz' now uses the name 'xyz' to determine groups, instead of xyz's uid.
|
||||
[bug introduced in coreutils-8.22]
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
it to be a type get syntax errors for the variable declaration below. */
|
||||
#define word unused_word_type
|
||||
|
||||
#include "c-ctype.h"
|
||||
#include "system.h"
|
||||
#include "error.h"
|
||||
#include "die.h"
|
||||
@@ -702,7 +703,7 @@ get_line (FILE *f, int c)
|
||||
*wptr++ = c;
|
||||
c = getc (f);
|
||||
}
|
||||
while (c != EOF && !isspace (c));
|
||||
while (c != EOF && !c_isspace (c));
|
||||
in_column += word_limit->length = wptr - word_limit->text;
|
||||
check_punctuation (word_limit);
|
||||
|
||||
|
||||
49
tests/fmt/non-space.sh
Executable file
49
tests/fmt/non-space.sh
Executable file
@@ -0,0 +1,49 @@
|
||||
#!/bin/sh
|
||||
# Test fmt space handling
|
||||
|
||||
# Copyright (C) 2022 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
|
||||
print_ver_ fmt printf
|
||||
|
||||
# Before coreutils 9.1 macOS treated bytes like 0x85
|
||||
# as space characters in multi-byte locales (including UTF-8)
|
||||
|
||||
check_non_space() {
|
||||
char="$1"
|
||||
test "$(env printf "=$char=" | fmt -s -w1 | wc -l)" = 1 || fail=1
|
||||
}
|
||||
|
||||
export LC_ALL=en_US.iso8859-1 # only lowercase form works on macOS 10.15.7
|
||||
if test "$(locale charmap 2>/dev/null | sed 's/iso/ISO-/')" = ISO-8859-1; then
|
||||
check_non_space '\xA0'
|
||||
fi
|
||||
|
||||
export LC_ALL=en_US.UTF-8
|
||||
if test "$(locale charmap 2>/dev/null)" = UTF-8; then
|
||||
check_non_space '\u00A0' # No break space
|
||||
check_non_space '\u2007' # TODO: should probably split on figure space
|
||||
check_non_space '\u202F' # Narrow no break space
|
||||
check_non_space '\u2060' # zero-width no break space
|
||||
check_non_space '\u0445' # Cyrillic kha, for which macOS isspace(0x85)==true
|
||||
fi
|
||||
|
||||
export LC_ALL=ru_RU.KOI8-R
|
||||
if test "$(locale charmap 2>/dev/null)" = KOI8-R; then
|
||||
check_non_space '\x9A'
|
||||
fi
|
||||
|
||||
Exit $fail
|
||||
@@ -237,8 +237,9 @@ all_tests = \
|
||||
tests/chgrp/posix-H.sh \
|
||||
tests/chgrp/recurse.sh \
|
||||
tests/fmt/base.pl \
|
||||
tests/fmt/long-line.sh \
|
||||
tests/fmt/goal-option.sh \
|
||||
tests/fmt/long-line.sh \
|
||||
tests/fmt/non-space.sh \
|
||||
tests/misc/echo.sh \
|
||||
tests/misc/env.sh \
|
||||
tests/misc/env-signal-handler.sh \
|
||||
|
||||
Reference in New Issue
Block a user