1
0
mirror of git://git.sv.gnu.org/coreutils.git synced 2026-04-20 18:56:39 +02:00

df: avoid multibyte character corruption on macOS

This improves on the earlier fix for the problem reported by
Chih-Hsuan Yen (Bug#32236), by also looking for other control
characters and for encoding errors.
* src/df.c: Include wchar.h and wctype.h instead of c-ctype.h.
(hide_problematic_chars): Process the string as multibyte.
Use iswcntrl, not c_iscntrl.
This commit is contained in:
Paul Eggert
2018-07-22 09:50:20 -07:00
parent 437555061c
commit 1ecf7abe4a

View File

@@ -23,7 +23,8 @@
#include <sys/types.h>
#include <getopt.h>
#include <assert.h>
#include <c-ctype.h>
#include <wchar.h>
#include <wctype.h>
#include "system.h"
#include "canonicalize.h"
@@ -272,21 +273,41 @@ static struct option const long_options[] =
{NULL, 0, NULL, 0}
};
/* Replace problematic chars with '?'.
Since only control characters are currently considered,
this should work in all encodings. */
/* Replace problematic chars with '?'. */
static char*
static void
hide_problematic_chars (char *cell)
{
char *p = cell;
while (*p)
char *srcend = cell + strlen (cell);
char *dst = cell;
mbstate_t mbstate = { 0, };
size_t n;
for (char *src = cell; src != srcend; src += n)
{
if (c_iscntrl (to_uchar (*p)))
*p = '?';
p++;
wchar_t wc;
size_t srcbytes = srcend - src;
n = mbrtowc (&wc, src, srcbytes, &mbstate);
bool ok = 0 < n && n <= srcbytes;
if (ok)
ok = !iswcntrl (wc);
else
n = 1;
if (ok)
{
memmove (dst, src, n);
dst += n;
}
else
{
*dst++ = '?';
memset (&mbstate, 0, sizeof mbstate);
}
}
return cell;
*dst = '\0';
}
/* Dynamically allocate a row of pointers in TABLE, which