1
0
mirror of git://git.sv.gnu.org/coreutils.git synced 2026-04-21 03:12:48 +02:00

comm: add --total option

* src/comm.c (total_option): Add bool variable for the new option.
(TOTAL_OPTION): Add enum value.
(long_options): Add array element for the new option.
(usage): Document the new option here.
(compare_files): Count the lines in total[3], and output the summary at
the end.
(main): Accept the new option.
* doc/coreutils.texi (comm invocation): Document it.
* tests/misc/comm.pl: Test it.  While at it, improve the test data
to have 1 unique line in the first file, 2 unique lines in the second
file, and 3 common lines.
* NEWS (New Features): Mention the new option.

Fixes http://bugs.gnu.org/24929
This commit is contained in:
Bernhard Voelker
2016-11-22 22:03:47 +01:00
parent 812877bfcb
commit b50a151346
4 changed files with 115 additions and 20 deletions

2
NEWS
View File

@@ -114,6 +114,8 @@ GNU coreutils NEWS -*- outline -*-
** New Features
comm now accepts the --total option to output a summary at the end.
date now accepts the --debug option, to annotate the parsed date string,
display timezone information, and warn about potential misuse.

View File

@@ -5174,6 +5174,37 @@ rather than the default of a single TAB character.
The delimiter @var{str} may not be empty.
@item --total
Output a summary at the end.
Similar to the regular output,
column one contains the total number of lines unique to @var{file1},
column two contains the total number of lines unique to @var{file2}, and
column three contains the total number of lines common to both files,
followed by the word @samp{total} in the additional column four.
In the following example, @command{comm} omits the regular output
(@option{-123}), thus just printing the summary:
@example
$ printf '%s\n' a b c d e > file1
$ printf '%s\n' b c d e f g > file2
$ comm --total -123 file1 file2
1 2 4 total
@end example
This option is a GNU extension. Portable scripts should use @command{wc} to
get the totals, e.g. for the above example files:
@example
$ comm -23 file1 file2 | wc -l # number of lines only in file1
1
$ comm -13 file1 file2 | wc -l # number of lines only in file2
2
$ comm -12 file1 file2 | wc -l # number of lines common to both files
4
@end example
@optZeroTerminated
@end table

View File

@@ -63,6 +63,9 @@ static bool issued_disorder_warning[2];
/* line delimiter. */
static unsigned char delim = '\n';
/* If true, print a summary. */
static bool total_option;
/* If nonzero, check that the input is correctly ordered. */
static enum
{
@@ -82,7 +85,8 @@ enum
{
CHECK_ORDER_OPTION = CHAR_MAX + 1,
NOCHECK_ORDER_OPTION,
OUTPUT_DELIMITER_OPTION
OUTPUT_DELIMITER_OPTION,
TOTAL_OPTION
};
static struct option const long_options[] =
@@ -90,6 +94,7 @@ static struct option const long_options[] =
{"check-order", no_argument, NULL, CHECK_ORDER_OPTION},
{"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION},
{"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION},
{"total", no_argument, NULL, TOTAL_OPTION},
{"zero-terminated", no_argument, NULL, 'z'},
{GETOPT_HELP_OPTION_DECL},
{GETOPT_VERSION_OPTION_DECL},
@@ -135,6 +140,9 @@ and column three contains lines common to both files.\n\
"), stdout);
fputs (_("\
--output-delimiter=STR separate columns with STR\n\
"), stdout);
fputs (_("\
--total output a summary\n\
"), stdout);
fputs (_("\
-z, --zero-terminated line delimiter is NUL, not newline\n\
@@ -263,6 +271,9 @@ compare_files (char **infiles)
/* streams[i] holds the input stream for file i. */
FILE *streams[2];
/* Counters for the summary. */
uintmax_t total[] = {0, 0, 0};
int i, j;
/* Initialize the storage. */
@@ -317,14 +328,26 @@ compare_files (char **infiles)
/* Output the line that is lesser. */
if (order == 0)
writeline (thisline[1], stdout, 3);
{
/* Line is seen in both files. */
total[2]++;
writeline (thisline[1], stdout, 3);
}
else
{
seen_unpairable = true;
if (order <= 0)
writeline (thisline[0], stdout, 1);
{
/* Line is seen in file 1 only. */
total[0]++;
writeline (thisline[0], stdout, 1);
}
else
writeline (thisline[1], stdout, 2);
{
/* Line is seen in file 2 only. */
total[1]++;
writeline (thisline[1], stdout, 2);
}
}
/* Step the file the line came from.
@@ -365,6 +388,19 @@ compare_files (char **infiles)
for (i = 0; i < 2; i++)
if (fclose (streams[i]) != 0)
die (EXIT_FAILURE, errno, "%s", quotef (infiles[i]));
if (total_option)
{
/* Print the summary, minding the column and line delimiters. */
char buf1[INT_BUFSIZE_BOUND (uintmax_t)];
char buf2[INT_BUFSIZE_BOUND (uintmax_t)];
char buf3[INT_BUFSIZE_BOUND (uintmax_t)];
printf ("%s%s%s%s%s%s%s%c",
umaxtostr (total[0], buf1), col_sep,
umaxtostr (total[1], buf2), col_sep,
umaxtostr (total[2], buf3), col_sep,
_("total"), delim);
}
}
int
@@ -388,6 +424,7 @@ main (int argc, char **argv)
seen_unpairable = false;
issued_disorder_warning[0] = issued_disorder_warning[1] = false;
check_input_order = CHECK_ORDER_DEFAULT;
total_option = false;
while ((c = getopt_long (argc, argv, "123z", long_options, NULL)) != -1)
switch (c)
@@ -423,6 +460,10 @@ main (int argc, char **argv)
col_sep_len = *optarg ? strlen (optarg) : 1;
break;
case TOTAL_OPTION:
total_option = true;
break;
case_GETOPT_HELP_CHAR;
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);

View File

@@ -27,37 +27,50 @@ my $prog = 'comm';
# Turn off localization of executable's ouput.
@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
my @inputs = ({IN=>{a=>"1\n3"}}, {IN=>{b=>"2\n3"}});
my @zinputs = ({IN=>{za=>"1\0003"}}, {IN=>{zb=>"2\0003"}});
my @inputs = ({IN=>{a=>"1\n3\n3\n3"}}, {IN=>{b=>"2\n2\n3\n3\n3"}});
my @zinputs = ({IN=>{za=>"1\0003\0003\0003"}},
{IN=>{zb=>"2\0002\0003\0003\0003"}});
my @Tests =
(
# basic operation
['basic', @inputs, {OUT=>"1\n\t2\n\t\t3\n"} ],
['zbasic', '-z', @zinputs, {OUT=>"1\0\t2\0\t\t3\0"} ],
['basic', @inputs, {OUT=>"1\n\t2\n\t2\n\t\t3\n\t\t3\n\t\t3\n"} ],
['zbasic', '-z', @zinputs, {OUT=>"1\0\t2\0\t2\0\t\t3\0\t\t3\0\t\t3\0"} ],
# suppress lines unique to file 1
['opt-1', '-1', @inputs, {OUT=>"2\n\t3\n"} ],
['zopt-1', '-z', '-1', @zinputs, {OUT=>"2\0\t3\0"} ],
['opt-1', '-1', @inputs, {OUT=>"2\n2\n\t3\n\t3\n\t3\n"} ],
['zopt-1', '-z', '-1', @zinputs, {OUT=>"2\0002\000\t3\000\t3\000\t3\000"} ],
# suppress lines unique to file 2
['opt-2', '-2', @inputs, {OUT=>"1\n\t3\n"} ],
['opt-2', '-2', @inputs, {OUT=>"1\n\t3\n\t3\n\t3\n"} ],
['zopt-2', '-z', '-2', @zinputs, {OUT=>"1\000\t3\000\t3\000\t3\000"} ],
# suppress lines that appear in both files
['opt-3', '-3', @inputs, {OUT=>"1\n\t2\n"} ],
['opt-3', '-3', @inputs, {OUT=>"1\n\t2\n\t2\n"} ],
['zopt-3', '-z', '-3', @zinputs, {OUT=>"1\000\t2\000\t2\000"} ],
# suppress lines unique to file 1 and lines unique to file 2
['opt-12', '-1', '-2', @inputs, {OUT=>"3\n"} ],
['opt-12', '-1', '-2', @inputs, {OUT=>"3\n3\n3\n"} ],
['zopt-12', '-12z', @zinputs, {OUT=>"3\0003\0003\000"} ],
# suppress lines unique to file 1 and those that appear in both files
['opt-13', '-1', '-3', @inputs, {OUT=>"2\n"} ],
['opt-13', '-1', '-3', @inputs, {OUT=>"2\n2\n"} ],
['zopt-13', '-13z', @zinputs, {OUT=>"2\0002\000"} ],
# suppress lines unique to file 2 and those that appear in both files
['opt-23', '-2', '-3', @inputs, {OUT=>"1\n"} ],
['zopt-23', '-23z', @zinputs, {OUT=>"1\000"} ],
# suppress all output (really?)
# suppress all output
['opt-123', '-1', '-2', '-3', @inputs, {OUT=>""} ],
# show summary: 1 only in file1, 2 only in file2, 3 in both files
['total-all', '--total', @inputs, {OUT=>"1\n\t2\n\t2\n\t\t3\n\t\t3\n\t\t3\n"
. "1\t2\t3\ttotal\n"} ],
# show summary only, suppressing regular output
['total-123', '--total', '-123', @inputs, {OUT=>"1\t2\t3\ttotal\n"} ],
# invalid missing command line argument (1)
['missing-arg1', $inputs[0], {EXIT=>1},
{ERR => "$prog: missing operand after 'a'\n"
@@ -128,17 +141,17 @@ my @Tests =
# alternate delimiter: ','
['delim-comma', '--output-delimiter=,', @inputs,
{OUT=>"1\n,2\n,,3\n"} ],
{OUT=>"1\n,2\n,2\n,,3\n,,3\n,,3\n"} ],
# two-character alternate delimiter: '++'
['delim-2char', '--output-delimiter=++', @inputs,
{OUT=>"1\n++2\n++++3\n"} ],
{OUT=>"1\n++2\n++2\n++++3\n++++3\n++++3\n"} ],
# NUL delimiter
['delim-empty', '--output-delimiter=', @inputs,
{OUT=>"1\n\0002\n\000\0003\n"} ],
{OUT=>"1\n\0002\n\0002\n\000\0003\n\000\0003\n\000\0003\n"} ],
['zdelim-empty', '-z', '-z --output-delimiter=', @zinputs,
{OUT=>"1\000\0002\000\000\0003\000"} ],
{OUT=>"1\000\0002\000\0002\000\000\0003\000\000\0003\000\000\0003\000"} ],
# invalid dual delimiter
['delim-dual', '--output-delimiter=,', '--output-delimiter=+', @inputs,
@@ -146,8 +159,16 @@ my @Tests =
# valid dual delimiter specification
['delim-dual2', '--output-delimiter=,', '--output-delimiter=,', @inputs,
{OUT=>"1\n,2\n,,3\n"} ],
{OUT=>"1\n,2\n,2\n,,3\n,,3\n,,3\n"} ],
# show summary, zero-terminated
['totalz-all', '--total', '-z', @zinputs,
{OUT=>"1\000\t2\000\t2\000\t\t3\000\t\t3\000\t\t3\000"
. "1\t2\t3\ttotal\000"} ],
# show summary only (-123), zero-terminated and with ',' as delimiter
['totalz-123', '--total', '-z123', '--output-delimiter=,', @zinputs,
{OUT=>"1,2,3,total\000"} ],
);
my $save_temps = $ENV{DEBUG};