1
0
mirror of git://git.sv.gnu.org/coreutils.git synced 2026-04-19 10:15:48 +02:00

fmt: fix invalid multi-byte splitting on macOS

On macOS, isspace(0x85) returns true,
which results in splitting within multi-byte characters.

* src/fmt.c (get_line): s/isspace/c_isspace/.
* tests/fmt/non-space.sh: Add a new test.
* tests/local.mk: Reference new test.
* NEWS: Mention the fix.
Addresses https://bugs.gnu.org/54124
This commit is contained in:
Pádraig Brady
2022-02-23 17:50:46 +00:00
parent 677fb3e4ab
commit 6dc702928e
4 changed files with 57 additions and 2 deletions

4
NEWS
View File

@@ -21,6 +21,10 @@ GNU coreutils NEWS -*- outline -*-
and B is in some other file system.
[bug introduced in coreutils-9.0]
On macOS, fmt no longer corrupts multi-byte characters
by misdetecting their component bytes as spaces.
[This bug was present in "the beginning".]
'id xyz' now uses the name 'xyz' to determine groups, instead of xyz's uid.
[bug introduced in coreutils-8.22]

View File

@@ -26,6 +26,7 @@
it to be a type get syntax errors for the variable declaration below. */
#define word unused_word_type
#include "c-ctype.h"
#include "system.h"
#include "error.h"
#include "die.h"
@@ -702,7 +703,7 @@ get_line (FILE *f, int c)
*wptr++ = c;
c = getc (f);
}
while (c != EOF && !isspace (c));
while (c != EOF && !c_isspace (c));
in_column += word_limit->length = wptr - word_limit->text;
check_punctuation (word_limit);

49
tests/fmt/non-space.sh Executable file
View File

@@ -0,0 +1,49 @@
#!/bin/sh
# Test fmt space handling
# Copyright (C) 2022 Free Software Foundation, Inc.
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
print_ver_ fmt printf
# Before coreutils 9.1 macOS treated bytes like 0x85
# as space characters in multi-byte locales (including UTF-8)
check_non_space() {
char="$1"
test "$(env printf "=$char=" | fmt -s -w1 | wc -l)" = 1 || fail=1
}
export LC_ALL=en_US.iso8859-1 # only lowercase form works on macOS 10.15.7
if test "$(locale charmap 2>/dev/null | sed 's/iso/ISO-/')" = ISO-8859-1; then
check_non_space '\xA0'
fi
export LC_ALL=en_US.UTF-8
if test "$(locale charmap 2>/dev/null)" = UTF-8; then
check_non_space '\u00A0' # No break space
check_non_space '\u2007' # TODO: should probably split on figure space
check_non_space '\u202F' # Narrow no break space
check_non_space '\u2060' # zero-width no break space
check_non_space '\u0445' # Cyrillic kha, for which macOS isspace(0x85)==true
fi
export LC_ALL=ru_RU.KOI8-R
if test "$(locale charmap 2>/dev/null)" = KOI8-R; then
check_non_space '\x9A'
fi
Exit $fail

View File

@@ -237,8 +237,9 @@ all_tests = \
tests/chgrp/posix-H.sh \
tests/chgrp/recurse.sh \
tests/fmt/base.pl \
tests/fmt/long-line.sh \
tests/fmt/goal-option.sh \
tests/fmt/long-line.sh \
tests/fmt/non-space.sh \
tests/misc/echo.sh \
tests/misc/env.sh \
tests/misc/env-signal-handler.sh \