mirror of
git://git.sv.gnu.org/coreutils.git
synced 2026-04-21 03:12:48 +02:00
comm: add --total option
* src/comm.c (total_option): Add bool variable for the new option. (TOTAL_OPTION): Add enum value. (long_options): Add array element for the new option. (usage): Document the new option here. (compare_files): Count the lines in total[3], and output the summary at the end. (main): Accept the new option. * doc/coreutils.texi (comm invocation): Document it. * tests/misc/comm.pl: Test it. While at it, improve the test data to have 1 unique line in the first file, 2 unique lines in the second file, and 3 common lines. * NEWS (New Features): Mention the new option. Fixes http://bugs.gnu.org/24929
This commit is contained in:
2
NEWS
2
NEWS
@@ -114,6 +114,8 @@ GNU coreutils NEWS -*- outline -*-
|
||||
|
||||
** New Features
|
||||
|
||||
comm now accepts the --total option to output a summary at the end.
|
||||
|
||||
date now accepts the --debug option, to annotate the parsed date string,
|
||||
display timezone information, and warn about potential misuse.
|
||||
|
||||
|
||||
@@ -5174,6 +5174,37 @@ rather than the default of a single TAB character.
|
||||
|
||||
The delimiter @var{str} may not be empty.
|
||||
|
||||
@item --total
|
||||
Output a summary at the end.
|
||||
|
||||
Similar to the regular output,
|
||||
column one contains the total number of lines unique to @var{file1},
|
||||
column two contains the total number of lines unique to @var{file2}, and
|
||||
column three contains the total number of lines common to both files,
|
||||
followed by the word @samp{total} in the additional column four.
|
||||
|
||||
In the following example, @command{comm} omits the regular output
|
||||
(@option{-123}), thus just printing the summary:
|
||||
|
||||
@example
|
||||
$ printf '%s\n' a b c d e > file1
|
||||
$ printf '%s\n' b c d e f g > file2
|
||||
$ comm --total -123 file1 file2
|
||||
1 2 4 total
|
||||
@end example
|
||||
|
||||
This option is a GNU extension. Portable scripts should use @command{wc} to
|
||||
get the totals, e.g. for the above example files:
|
||||
|
||||
@example
|
||||
$ comm -23 file1 file2 | wc -l # number of lines only in file1
|
||||
1
|
||||
$ comm -13 file1 file2 | wc -l # number of lines only in file2
|
||||
2
|
||||
$ comm -12 file1 file2 | wc -l # number of lines common to both files
|
||||
4
|
||||
@end example
|
||||
|
||||
@optZeroTerminated
|
||||
|
||||
@end table
|
||||
|
||||
49
src/comm.c
49
src/comm.c
@@ -63,6 +63,9 @@ static bool issued_disorder_warning[2];
|
||||
/* line delimiter. */
|
||||
static unsigned char delim = '\n';
|
||||
|
||||
/* If true, print a summary. */
|
||||
static bool total_option;
|
||||
|
||||
/* If nonzero, check that the input is correctly ordered. */
|
||||
static enum
|
||||
{
|
||||
@@ -82,7 +85,8 @@ enum
|
||||
{
|
||||
CHECK_ORDER_OPTION = CHAR_MAX + 1,
|
||||
NOCHECK_ORDER_OPTION,
|
||||
OUTPUT_DELIMITER_OPTION
|
||||
OUTPUT_DELIMITER_OPTION,
|
||||
TOTAL_OPTION
|
||||
};
|
||||
|
||||
static struct option const long_options[] =
|
||||
@@ -90,6 +94,7 @@ static struct option const long_options[] =
|
||||
{"check-order", no_argument, NULL, CHECK_ORDER_OPTION},
|
||||
{"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION},
|
||||
{"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION},
|
||||
{"total", no_argument, NULL, TOTAL_OPTION},
|
||||
{"zero-terminated", no_argument, NULL, 'z'},
|
||||
{GETOPT_HELP_OPTION_DECL},
|
||||
{GETOPT_VERSION_OPTION_DECL},
|
||||
@@ -135,6 +140,9 @@ and column three contains lines common to both files.\n\
|
||||
"), stdout);
|
||||
fputs (_("\
|
||||
--output-delimiter=STR separate columns with STR\n\
|
||||
"), stdout);
|
||||
fputs (_("\
|
||||
--total output a summary\n\
|
||||
"), stdout);
|
||||
fputs (_("\
|
||||
-z, --zero-terminated line delimiter is NUL, not newline\n\
|
||||
@@ -263,6 +271,9 @@ compare_files (char **infiles)
|
||||
/* streams[i] holds the input stream for file i. */
|
||||
FILE *streams[2];
|
||||
|
||||
/* Counters for the summary. */
|
||||
uintmax_t total[] = {0, 0, 0};
|
||||
|
||||
int i, j;
|
||||
|
||||
/* Initialize the storage. */
|
||||
@@ -317,14 +328,26 @@ compare_files (char **infiles)
|
||||
|
||||
/* Output the line that is lesser. */
|
||||
if (order == 0)
|
||||
writeline (thisline[1], stdout, 3);
|
||||
{
|
||||
/* Line is seen in both files. */
|
||||
total[2]++;
|
||||
writeline (thisline[1], stdout, 3);
|
||||
}
|
||||
else
|
||||
{
|
||||
seen_unpairable = true;
|
||||
if (order <= 0)
|
||||
writeline (thisline[0], stdout, 1);
|
||||
{
|
||||
/* Line is seen in file 1 only. */
|
||||
total[0]++;
|
||||
writeline (thisline[0], stdout, 1);
|
||||
}
|
||||
else
|
||||
writeline (thisline[1], stdout, 2);
|
||||
{
|
||||
/* Line is seen in file 2 only. */
|
||||
total[1]++;
|
||||
writeline (thisline[1], stdout, 2);
|
||||
}
|
||||
}
|
||||
|
||||
/* Step the file the line came from.
|
||||
@@ -365,6 +388,19 @@ compare_files (char **infiles)
|
||||
for (i = 0; i < 2; i++)
|
||||
if (fclose (streams[i]) != 0)
|
||||
die (EXIT_FAILURE, errno, "%s", quotef (infiles[i]));
|
||||
|
||||
if (total_option)
|
||||
{
|
||||
/* Print the summary, minding the column and line delimiters. */
|
||||
char buf1[INT_BUFSIZE_BOUND (uintmax_t)];
|
||||
char buf2[INT_BUFSIZE_BOUND (uintmax_t)];
|
||||
char buf3[INT_BUFSIZE_BOUND (uintmax_t)];
|
||||
printf ("%s%s%s%s%s%s%s%c",
|
||||
umaxtostr (total[0], buf1), col_sep,
|
||||
umaxtostr (total[1], buf2), col_sep,
|
||||
umaxtostr (total[2], buf3), col_sep,
|
||||
_("total"), delim);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
@@ -388,6 +424,7 @@ main (int argc, char **argv)
|
||||
seen_unpairable = false;
|
||||
issued_disorder_warning[0] = issued_disorder_warning[1] = false;
|
||||
check_input_order = CHECK_ORDER_DEFAULT;
|
||||
total_option = false;
|
||||
|
||||
while ((c = getopt_long (argc, argv, "123z", long_options, NULL)) != -1)
|
||||
switch (c)
|
||||
@@ -423,6 +460,10 @@ main (int argc, char **argv)
|
||||
col_sep_len = *optarg ? strlen (optarg) : 1;
|
||||
break;
|
||||
|
||||
case TOTAL_OPTION:
|
||||
total_option = true;
|
||||
break;
|
||||
|
||||
case_GETOPT_HELP_CHAR;
|
||||
|
||||
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
|
||||
|
||||
@@ -27,37 +27,50 @@ my $prog = 'comm';
|
||||
# Turn off localization of executable's ouput.
|
||||
@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
|
||||
|
||||
my @inputs = ({IN=>{a=>"1\n3"}}, {IN=>{b=>"2\n3"}});
|
||||
my @zinputs = ({IN=>{za=>"1\0003"}}, {IN=>{zb=>"2\0003"}});
|
||||
my @inputs = ({IN=>{a=>"1\n3\n3\n3"}}, {IN=>{b=>"2\n2\n3\n3\n3"}});
|
||||
my @zinputs = ({IN=>{za=>"1\0003\0003\0003"}},
|
||||
{IN=>{zb=>"2\0002\0003\0003\0003"}});
|
||||
|
||||
my @Tests =
|
||||
(
|
||||
# basic operation
|
||||
['basic', @inputs, {OUT=>"1\n\t2\n\t\t3\n"} ],
|
||||
['zbasic', '-z', @zinputs, {OUT=>"1\0\t2\0\t\t3\0"} ],
|
||||
['basic', @inputs, {OUT=>"1\n\t2\n\t2\n\t\t3\n\t\t3\n\t\t3\n"} ],
|
||||
['zbasic', '-z', @zinputs, {OUT=>"1\0\t2\0\t2\0\t\t3\0\t\t3\0\t\t3\0"} ],
|
||||
|
||||
# suppress lines unique to file 1
|
||||
['opt-1', '-1', @inputs, {OUT=>"2\n\t3\n"} ],
|
||||
['zopt-1', '-z', '-1', @zinputs, {OUT=>"2\0\t3\0"} ],
|
||||
['opt-1', '-1', @inputs, {OUT=>"2\n2\n\t3\n\t3\n\t3\n"} ],
|
||||
['zopt-1', '-z', '-1', @zinputs, {OUT=>"2\0002\000\t3\000\t3\000\t3\000"} ],
|
||||
|
||||
# suppress lines unique to file 2
|
||||
['opt-2', '-2', @inputs, {OUT=>"1\n\t3\n"} ],
|
||||
['opt-2', '-2', @inputs, {OUT=>"1\n\t3\n\t3\n\t3\n"} ],
|
||||
['zopt-2', '-z', '-2', @zinputs, {OUT=>"1\000\t3\000\t3\000\t3\000"} ],
|
||||
|
||||
# suppress lines that appear in both files
|
||||
['opt-3', '-3', @inputs, {OUT=>"1\n\t2\n"} ],
|
||||
['opt-3', '-3', @inputs, {OUT=>"1\n\t2\n\t2\n"} ],
|
||||
['zopt-3', '-z', '-3', @zinputs, {OUT=>"1\000\t2\000\t2\000"} ],
|
||||
|
||||
# suppress lines unique to file 1 and lines unique to file 2
|
||||
['opt-12', '-1', '-2', @inputs, {OUT=>"3\n"} ],
|
||||
['opt-12', '-1', '-2', @inputs, {OUT=>"3\n3\n3\n"} ],
|
||||
['zopt-12', '-12z', @zinputs, {OUT=>"3\0003\0003\000"} ],
|
||||
|
||||
# suppress lines unique to file 1 and those that appear in both files
|
||||
['opt-13', '-1', '-3', @inputs, {OUT=>"2\n"} ],
|
||||
['opt-13', '-1', '-3', @inputs, {OUT=>"2\n2\n"} ],
|
||||
['zopt-13', '-13z', @zinputs, {OUT=>"2\0002\000"} ],
|
||||
|
||||
# suppress lines unique to file 2 and those that appear in both files
|
||||
['opt-23', '-2', '-3', @inputs, {OUT=>"1\n"} ],
|
||||
['zopt-23', '-23z', @zinputs, {OUT=>"1\000"} ],
|
||||
|
||||
# suppress all output (really?)
|
||||
# suppress all output
|
||||
['opt-123', '-1', '-2', '-3', @inputs, {OUT=>""} ],
|
||||
|
||||
# show summary: 1 only in file1, 2 only in file2, 3 in both files
|
||||
['total-all', '--total', @inputs, {OUT=>"1\n\t2\n\t2\n\t\t3\n\t\t3\n\t\t3\n"
|
||||
. "1\t2\t3\ttotal\n"} ],
|
||||
|
||||
# show summary only, suppressing regular output
|
||||
['total-123', '--total', '-123', @inputs, {OUT=>"1\t2\t3\ttotal\n"} ],
|
||||
|
||||
# invalid missing command line argument (1)
|
||||
['missing-arg1', $inputs[0], {EXIT=>1},
|
||||
{ERR => "$prog: missing operand after 'a'\n"
|
||||
@@ -128,17 +141,17 @@ my @Tests =
|
||||
|
||||
# alternate delimiter: ','
|
||||
['delim-comma', '--output-delimiter=,', @inputs,
|
||||
{OUT=>"1\n,2\n,,3\n"} ],
|
||||
{OUT=>"1\n,2\n,2\n,,3\n,,3\n,,3\n"} ],
|
||||
|
||||
# two-character alternate delimiter: '++'
|
||||
['delim-2char', '--output-delimiter=++', @inputs,
|
||||
{OUT=>"1\n++2\n++++3\n"} ],
|
||||
{OUT=>"1\n++2\n++2\n++++3\n++++3\n++++3\n"} ],
|
||||
|
||||
# NUL delimiter
|
||||
['delim-empty', '--output-delimiter=', @inputs,
|
||||
{OUT=>"1\n\0002\n\000\0003\n"} ],
|
||||
{OUT=>"1\n\0002\n\0002\n\000\0003\n\000\0003\n\000\0003\n"} ],
|
||||
['zdelim-empty', '-z', '-z --output-delimiter=', @zinputs,
|
||||
{OUT=>"1\000\0002\000\000\0003\000"} ],
|
||||
{OUT=>"1\000\0002\000\0002\000\000\0003\000\000\0003\000\000\0003\000"} ],
|
||||
|
||||
# invalid dual delimiter
|
||||
['delim-dual', '--output-delimiter=,', '--output-delimiter=+', @inputs,
|
||||
@@ -146,8 +159,16 @@ my @Tests =
|
||||
|
||||
# valid dual delimiter specification
|
||||
['delim-dual2', '--output-delimiter=,', '--output-delimiter=,', @inputs,
|
||||
{OUT=>"1\n,2\n,,3\n"} ],
|
||||
{OUT=>"1\n,2\n,2\n,,3\n,,3\n,,3\n"} ],
|
||||
|
||||
# show summary, zero-terminated
|
||||
['totalz-all', '--total', '-z', @zinputs,
|
||||
{OUT=>"1\000\t2\000\t2\000\t\t3\000\t\t3\000\t\t3\000"
|
||||
. "1\t2\t3\ttotal\000"} ],
|
||||
|
||||
# show summary only (-123), zero-terminated and with ',' as delimiter
|
||||
['totalz-123', '--total', '-z123', '--output-delimiter=,', @zinputs,
|
||||
{OUT=>"1,2,3,total\000"} ],
|
||||
);
|
||||
|
||||
my $save_temps = $ENV{DEBUG};
|
||||
|
||||
Reference in New Issue
Block a user