mirror of
git://git.sv.gnu.org/coreutils.git
synced 2026-04-16 08:38:48 +02:00
all: use more consistent blank character determination
* src/system.h (c32issep): A new function that is essentially iswblank() on GLIBC platforms, and iswspace() with exceptions elsewhere. * src/expand.c: Use it instead of c32isblank(). * src/fold.c: Likewise. * src/join.c: Likewise. * src/numfmt.c: Likewise. * src/unexpand.c: Likewise. * src/uniq.c: Likewise. * NEWS: Mention the improvement.
This commit is contained in:
4
NEWS
4
NEWS
@@ -42,6 +42,10 @@ GNU coreutils NEWS -*- outline -*-
|
||||
'install' now allows the combination of the --compare and
|
||||
--preserve-timestamps options.
|
||||
|
||||
'fold', 'join', 'numfmt', 'uniq' now use more consistent blank character
|
||||
determination on non GLIBC platforms. For example \u3000 (ideographic space)
|
||||
will be considered a blank character on all platforms.
|
||||
|
||||
'nl' now supports multi-byte --section-delimiter characters.
|
||||
|
||||
'shuf -i' now operates up to two times faster on systems with unlocked stdio
|
||||
|
||||
@@ -140,8 +140,7 @@ expand (void)
|
||||
|
||||
if (convert)
|
||||
{
|
||||
convert &= convert_entire_line
|
||||
|| !! (c32isblank (g.ch) && ! c32isnbspace (g.ch));
|
||||
convert &= convert_entire_line || c32issep (g.ch);
|
||||
|
||||
if (g.ch == '\t')
|
||||
{
|
||||
|
||||
@@ -210,7 +210,7 @@ fold_file (char const *filename, size_t width)
|
||||
for (mcel_t g2; logical_p < logical_lim; logical_p += g2.len)
|
||||
{
|
||||
g2 = mcel_scan (logical_p, logical_lim);
|
||||
if (c32isblank (g2.ch) && ! c32isnbspace (g2.ch))
|
||||
if (c32issep (g2.ch))
|
||||
{
|
||||
space_length = g2.len;
|
||||
logical_end = logical_p - line_out;
|
||||
|
||||
@@ -308,7 +308,7 @@ eq_tab (mcel_t g)
|
||||
static bool
|
||||
newline_or_blank (mcel_t g)
|
||||
{
|
||||
return g.ch == '\n' || c32isblank (g.ch);
|
||||
return g.ch == '\n' || c32issep (g.ch);
|
||||
}
|
||||
|
||||
/* Fill in the 'fields' structure in LINE. */
|
||||
@@ -918,7 +918,7 @@ decode_field_spec (char const *s, int *file_index, idx_t *field_index)
|
||||
static bool
|
||||
comma_or_blank (mcel_t g)
|
||||
{
|
||||
return g.ch == ',' || c32isblank (g.ch);
|
||||
return g.ch == ',' || c32issep (g.ch);
|
||||
}
|
||||
|
||||
/* Add the comma or blank separated field spec(s) in STR to 'outlist'. */
|
||||
|
||||
@@ -215,8 +215,7 @@ static bool dev_debug = false;
|
||||
static bool
|
||||
newline_or_blank (mcel_t g)
|
||||
{
|
||||
return g.ch == '\n'
|
||||
|| (c32isblank (g.ch) && ! c32isnbspace (g.ch));
|
||||
return g.ch == '\n' || c32issep (g.ch);
|
||||
}
|
||||
|
||||
static inline int
|
||||
@@ -673,7 +672,7 @@ simple_strtod_human (char const *input_str,
|
||||
if (!matched_unit_sep)
|
||||
{
|
||||
mcel_t g = mcel_scanz (*endptr);
|
||||
if (c32isblank (g.ch) || c32isnbspace (g.ch))
|
||||
if (c32issep (g.ch) || c32isnbspace (g.ch))
|
||||
(*endptr) += g.len;
|
||||
}
|
||||
|
||||
|
||||
23
src/system.h
23
src/system.h
@@ -160,6 +160,29 @@ c32isnbspace (char32_t wc)
|
||||
return wc == 0x00A0 || wc == 0x2007 || wc == 0x202F || wc == 0x2060;
|
||||
}
|
||||
|
||||
ATTRIBUTE_PURE
|
||||
static inline int
|
||||
c32isvertspace (char32_t wc)
|
||||
{
|
||||
return wc == 0x000A || wc == 0x000B || wc == 0x000C || wc == 0x000D
|
||||
|| wc == 0x2028 || wc == 0x2029;
|
||||
}
|
||||
|
||||
|
||||
/* c32isblank() is too variable on non GLIBC platforms.
|
||||
E.g., does not include \u3000 ideographic space on musl.
|
||||
E.g., does include non-breaking space on Solaris and NetBSD.
|
||||
This equivalent is more consistent across systems. */
|
||||
ATTRIBUTE_PURE
|
||||
static inline bool
|
||||
c32issep (char32_t wc)
|
||||
{
|
||||
#if defined __GLIBC__
|
||||
return !! c32isblank (wc);
|
||||
#endif
|
||||
return !! (c32isspace (wc) && ! c32isvertspace (wc) && ! c32isnbspace (wc));
|
||||
}
|
||||
|
||||
#include <locale.h>
|
||||
|
||||
/* Take care of NLS matters. */
|
||||
|
||||
@@ -176,7 +176,7 @@ unexpand (void)
|
||||
|
||||
if (convert)
|
||||
{
|
||||
bool blank = !! (c32isblank (g.ch) && ! c32isnbspace (g.ch));
|
||||
bool blank = c32issep (g.ch);
|
||||
|
||||
if (blank)
|
||||
{
|
||||
|
||||
@@ -254,7 +254,7 @@ size_opt (char const *opt, char const *msgid)
|
||||
static bool
|
||||
newline_or_blank (mcel_t g)
|
||||
{
|
||||
return g.ch == '\n' || c32isblank (g.ch);
|
||||
return g.ch == '\n' || c32issep (g.ch);
|
||||
}
|
||||
|
||||
/* Given a linebuffer LINE,
|
||||
|
||||
Reference in New Issue
Block a user