mirror of
git://git.sv.gnu.org/coreutils.git
synced 2026-04-20 18:56:39 +02:00
join: with --check-order print offending file name, line number and data
* src/join (g_names): New global (was main's "names"). (main): Update all uses of "names". (line_no[2]): New globals. (get_line): Increment after reading each line. (check_order): Print the standard "file name:line_no: " prefix as well as the offending line when reporting disorder. Here is a sample old/new comparison: -join: file 1 is not in sorted order +join: in:4: is not sorted: contents-of-line-4 * tests/misc/join: Change the two affected tests to expect the new diagnostic. Add new tests for more coverage: mismatch in file 2, two diagnostics, zero-length out-of-order line. * NEWS (Improvements): Mention it. Suggested by David Gast in http://debbugs.gnu.org/9236
This commit is contained in:
3
NEWS
3
NEWS
@@ -66,6 +66,9 @@ GNU coreutils NEWS -*- outline -*-
|
||||
df now supports disk partitions larger than 4 TiB on MacOS X 10.5
|
||||
or newer and on AIX 5.2 or newer.
|
||||
|
||||
join --check-order now prints "join: FILE:LINE_NUMBER: bad_line" for an
|
||||
unsorted input, rather than e.g., "join: file 1 is not in sorted order".
|
||||
|
||||
shuf outputs small subsets of large permutations much more efficiently.
|
||||
For example `shuf -i1-$((2**32-1)) -n2` no longer exhausts memory.
|
||||
|
||||
|
||||
@@ -139,6 +139,7 @@ David Alan Gilbert gilbertd@treblig.org
|
||||
David Dyck dcd@tc.fluke.COM
|
||||
David Eisner cradle@umd.edu
|
||||
David Flynn dav@chess.plus.com
|
||||
David Gast dgast@csulb.edu
|
||||
David Godfrey dave@delta.demon.co.uk
|
||||
David Luyer david_luyer@pacific.net.au
|
||||
David Madore david.madore@ens.fr
|
||||
|
||||
43
src/join.c
43
src/join.c
@@ -86,9 +86,15 @@ struct seq
|
||||
struct line **lines;
|
||||
};
|
||||
|
||||
/* The previous line read from each file. */
|
||||
/* The previous line read from each file. */
|
||||
static struct line *prevline[2] = {NULL, NULL};
|
||||
|
||||
/* The number of lines read from each file. */
|
||||
static uintmax_t line_no[2] = {0, 0};
|
||||
|
||||
/* The input file names. */
|
||||
static char *g_names[2];
|
||||
|
||||
/* This provides an extra line buffer for each file. We need these if we
|
||||
try to read two consecutive lines into the same buffer, since we don't
|
||||
want to overwrite the previous buffer before we check order. */
|
||||
@@ -384,12 +390,23 @@ check_order (const struct line *prev,
|
||||
size_t join_field = whatfile == 1 ? join_field_1 : join_field_2;
|
||||
if (keycmp (prev, current, join_field, join_field) > 0)
|
||||
{
|
||||
/* Exclude any trailing newline. */
|
||||
size_t len = current->buf.length;
|
||||
if (0 < len && current->buf.buffer[len - 1] == '\n')
|
||||
--len;
|
||||
|
||||
/* If the offending line is longer than INT_MAX, output
|
||||
only the first INT_MAX bytes in this diagnostic. */
|
||||
len = MIN (INT_MAX, len);
|
||||
|
||||
error ((check_input_order == CHECK_ORDER_ENABLED
|
||||
? EXIT_FAILURE : 0),
|
||||
0, _("file %d is not in sorted order"), whatfile);
|
||||
0, _("%s:%ju: is not sorted: %.*s"),
|
||||
g_names[whatfile - 1], line_no[whatfile - 1],
|
||||
(int) len, current->buf.buffer);
|
||||
|
||||
/* If we get to here, the message was just a warning, but we
|
||||
want only to issue it once. */
|
||||
/* If we get to here, the message was merely a warning.
|
||||
Arrange to issue it only once per file. */
|
||||
issued_disorder_warning[whatfile-1] = true;
|
||||
}
|
||||
}
|
||||
@@ -436,6 +453,7 @@ get_line (FILE *fp, struct line **linep, int which)
|
||||
freeline (line);
|
||||
return false;
|
||||
}
|
||||
++line_no[which - 1];
|
||||
|
||||
xfields (line);
|
||||
|
||||
@@ -980,7 +998,6 @@ main (int argc, char **argv)
|
||||
int prev_optc_status = MUST_BE_OPERAND;
|
||||
int operand_status[2];
|
||||
int joption_count[2] = { 0, 0 };
|
||||
char *names[2];
|
||||
FILE *fp1, *fp2;
|
||||
int optc;
|
||||
int nfiles = 0;
|
||||
@@ -1100,7 +1117,7 @@ main (int argc, char **argv)
|
||||
break;
|
||||
|
||||
case 1: /* Non-option argument. */
|
||||
add_file_name (optarg, names, operand_status, joption_count,
|
||||
add_file_name (optarg, g_names, operand_status, joption_count,
|
||||
&nfiles, &prev_optc_status, &optc_status);
|
||||
break;
|
||||
|
||||
@@ -1122,7 +1139,7 @@ main (int argc, char **argv)
|
||||
/* Process any operands after "--". */
|
||||
prev_optc_status = MUST_BE_OPERAND;
|
||||
while (optind < argc)
|
||||
add_file_name (argv[optind++], names, operand_status, joption_count,
|
||||
add_file_name (argv[optind++], g_names, operand_status, joption_count,
|
||||
&nfiles, &prev_optc_status, &optc_status);
|
||||
|
||||
if (nfiles != 2)
|
||||
@@ -1148,20 +1165,20 @@ main (int argc, char **argv)
|
||||
if (join_field_2 == SIZE_MAX)
|
||||
join_field_2 = 0;
|
||||
|
||||
fp1 = STREQ (names[0], "-") ? stdin : fopen (names[0], "r");
|
||||
fp1 = STREQ (g_names[0], "-") ? stdin : fopen (g_names[0], "r");
|
||||
if (!fp1)
|
||||
error (EXIT_FAILURE, errno, "%s", names[0]);
|
||||
fp2 = STREQ (names[1], "-") ? stdin : fopen (names[1], "r");
|
||||
error (EXIT_FAILURE, errno, "%s", g_names[0]);
|
||||
fp2 = STREQ (g_names[1], "-") ? stdin : fopen (g_names[1], "r");
|
||||
if (!fp2)
|
||||
error (EXIT_FAILURE, errno, "%s", names[1]);
|
||||
error (EXIT_FAILURE, errno, "%s", g_names[1]);
|
||||
if (fp1 == fp2)
|
||||
error (EXIT_FAILURE, errno, _("both files cannot be standard input"));
|
||||
join (fp1, fp2);
|
||||
|
||||
if (fclose (fp1) != 0)
|
||||
error (EXIT_FAILURE, errno, "%s", names[0]);
|
||||
error (EXIT_FAILURE, errno, "%s", g_names[0]);
|
||||
if (fclose (fp2) != 0)
|
||||
error (EXIT_FAILURE, errno, "%s", names[1]);
|
||||
error (EXIT_FAILURE, errno, "%s", g_names[1]);
|
||||
|
||||
if (issued_disorder_warning[0] || issued_disorder_warning[1])
|
||||
exit (EXIT_FAILURE);
|
||||
|
||||
@@ -196,7 +196,29 @@ my @tv = (
|
||||
# With check, both inputs out of order (in fact, in reverse order)
|
||||
['chkodr-5', '--check-order',
|
||||
[" b 1\n a 2\n", " b Y\n a Z\n"], "", 1,
|
||||
"$prog: file 1 is not in sorted order\n"],
|
||||
"$prog: chkodr-5.1:2: is not sorted: a 2\n"],
|
||||
|
||||
# Similar, but with only file 2 not sorted.
|
||||
['chkodr-5b', '--check-order',
|
||||
[" a 2\n b 1\n", " b Y\n a Z\n"], "", 1,
|
||||
"$prog: chkodr-5b.2:2: is not sorted: a Z\n"],
|
||||
|
||||
# Similar, but with the offending line having length 0 (excluding newline).
|
||||
['chkodr-5c', '--check-order',
|
||||
[" a 2\n b 1\n", " b Y\n\n"], "", 1,
|
||||
"$prog: chkodr-5c.2:2: is not sorted: \n"],
|
||||
|
||||
# Similar, but elicit a warning for each input file (without --check-order).
|
||||
['chkodr-5d', '',
|
||||
["a\nx\n\n", "b\ny\n\n"], "", 1,
|
||||
"$prog: chkodr-5d.1:3: is not sorted: \n" .
|
||||
"$prog: chkodr-5d.2:3: is not sorted: \n"],
|
||||
|
||||
# Similar, but make it so each offending line has no newline.
|
||||
['chkodr-5e', '',
|
||||
["a\nx\no", "b\ny\np"], "", 1,
|
||||
"$prog: chkodr-5e.1:3: is not sorted: o\n" .
|
||||
"$prog: chkodr-5e.2:3: is not sorted: p\n"],
|
||||
|
||||
# Without order check, both inputs out of order and some lines
|
||||
# unpairable. This is NOT supported by the GNU extension. All that
|
||||
@@ -229,7 +251,7 @@ my @tv = (
|
||||
# actual data out-of-order. This join should fail.
|
||||
['header-3', '--header --check-order',
|
||||
["ID Name\n2 B\n1 A\n", "ID Color\n2 blue\n"], "ID Name Color\n", 1,
|
||||
"$prog: file 1 is not in sorted order\n"],
|
||||
"$prog: header-3.1:3: is not sorted: 1 A\n"],
|
||||
|
||||
# '--header' with specific output format '-o'.
|
||||
# output header line should respect the requested format
|
||||
|
||||
Reference in New Issue
Block a user