mirror of
git://git.sv.gnu.org/coreutils.git
synced 2026-04-11 14:44:18 +02:00
sort: new --human-numeric-sort option to sort KiB MB etc.
* NEWS: Document the new option * doc/coreutils.texi (sort invocation): ditto * src/sort.c (main): handle the new --human-numeric-sort option (-h). (human_numcompare): A new function to compare SI and IEC suffixes before falling back to the standard --numeric comparison. (find_unit_order): A new helper function to find the order of magnitude of a number string as determined by its suffix. (check_mixed_SI_IEC): A new helper function to exit with error if both SI and IEC suffixes are presented. * tests/misc/sort: Add 8 tests to test the new functionality. * THANKS: Update
This commit is contained in:
committed by
Pádraig Brady
parent
c45c51fe97
commit
159faba137
3
NEWS
3
NEWS
@@ -11,6 +11,9 @@ GNU coreutils NEWS -*- outline -*-
|
||||
|
||||
chroot now accepts the options --userspec and --groups.
|
||||
|
||||
sort accepts a new option, --human-numeric-sort (-h): sort numbers
|
||||
while honoring human readable suffixes like KiB and MB etc.
|
||||
|
||||
|
||||
* Noteworthy changes in release 7.4 (2009-05-07) [stable]
|
||||
|
||||
|
||||
1
THANKS
1
THANKS
@@ -396,6 +396,7 @@ Michael J. Croghan mcroghan@usatoday.com
|
||||
Michael McFarland sidlon@yahoo.com
|
||||
Michael McLagan mmclagan@invlogic.com
|
||||
Michael Piefel piefel@informatik.hu-berlin.de
|
||||
Michael Speer knomenet@gmail.com
|
||||
Michael Steffens michael.steffens@s.netic.de
|
||||
Michael Stone mstone@debian.org
|
||||
Michael Stutz stutz@dsl.org
|
||||
|
||||
@@ -3785,6 +3785,21 @@ Use this option only if there is no alternative; it is much slower than
|
||||
@option{--numeric-sort} (@option{-n}) and it can lose information when
|
||||
converting to floating point.
|
||||
|
||||
@item -h
|
||||
@itemx --human-numeric-sort
|
||||
@itemx --sort=human-numeric
|
||||
@opindex -h
|
||||
@opindex --human-numeric-sort
|
||||
@opindex --sort
|
||||
@cindex human numeric sort
|
||||
@vindex LC_NUMERIC
|
||||
Sort numerically, as per the @option{--numeric-sort} option below, and in
|
||||
addition handle IEC or SI suffixes like MiB, MB etc (@ref{Block size}).
|
||||
Note a mixture of IEC and SI suffixes is not supported and will
|
||||
be flagged as an error. Also the numbers must be abbreviated uniformly.
|
||||
I.E. values with different precisions like 6000K and 5M will be sorted
|
||||
incorrectly.
|
||||
|
||||
@item -i
|
||||
@itemx --ignore-nonprinting
|
||||
@opindex -i
|
||||
|
||||
115
src/sort.c
115
src/sort.c
@@ -176,6 +176,8 @@ struct keyfield
|
||||
bool random; /* Sort by random hash of key. */
|
||||
bool general_numeric; /* Flag for general, numeric comparison.
|
||||
Handle numbers in exponential notation. */
|
||||
bool human_numeric; /* Flag for sorting by human readable
|
||||
units with either SI xor IEC prefixes. */
|
||||
bool month; /* Flag for comparison by month name. */
|
||||
bool reverse; /* Reverse the sense of comparison. */
|
||||
bool version; /* sort by version number */
|
||||
@@ -335,6 +337,9 @@ Ordering options:\n\
|
||||
-g, --general-numeric-sort compare according to general numerical value\n\
|
||||
-i, --ignore-nonprinting consider only printable characters\n\
|
||||
-M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
|
||||
"), stdout);
|
||||
fputs (_("\
|
||||
-h, --human-numeric-sort compare human readable numbers (e.g., 2K 1G)\n\
|
||||
"), stdout);
|
||||
fputs (_("\
|
||||
-n, --numeric-sort compare according to string numerical value\n\
|
||||
@@ -344,8 +349,8 @@ Ordering options:\n\
|
||||
"), stdout);
|
||||
fputs (_("\
|
||||
--sort=WORD sort according to WORD:\n\
|
||||
general-numeric -g, month -M, numeric -n,\n\
|
||||
random -R, version -V\n\
|
||||
general-numeric -g, human-numeric -h, month -M,\n\
|
||||
numeric -n, random -R, version -V\n\
|
||||
-V, --version-sort natural sort of (version) numbers within text\n\
|
||||
\n\
|
||||
"), stdout);
|
||||
@@ -426,7 +431,7 @@ enum
|
||||
SORT_OPTION
|
||||
};
|
||||
|
||||
static char const short_options[] = "-bcCdfgik:mMno:rRsS:t:T:uVy:z";
|
||||
static char const short_options[] = "-bcCdfghik:mMno:rRsS:t:T:uVy:z";
|
||||
|
||||
static struct option const long_options[] =
|
||||
{
|
||||
@@ -442,6 +447,7 @@ static struct option const long_options[] =
|
||||
{"merge", no_argument, NULL, 'm'},
|
||||
{"month-sort", no_argument, NULL, 'M'},
|
||||
{"numeric-sort", no_argument, NULL, 'n'},
|
||||
{"human-numeric-sort", no_argument, NULL, 'h'},
|
||||
{"version-sort", no_argument, NULL, 'V'},
|
||||
{"random-sort", no_argument, NULL, 'R'},
|
||||
{"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
|
||||
@@ -480,6 +486,7 @@ static char const check_types[] =
|
||||
|
||||
#define SORT_TABLE \
|
||||
_st_("general-numeric", 'g') \
|
||||
_st_("human-numeric", 'h') \
|
||||
_st_("month", 'M') \
|
||||
_st_("numeric", 'n') \
|
||||
_st_("random", 'R') \
|
||||
@@ -1673,6 +1680,87 @@ numcompare (const char *a, const char *b)
|
||||
return strnumcmp (a, b, decimal_point, thousands_sep);
|
||||
}
|
||||
|
||||
/* Exit with an error if a mixture of SI and IEC units detected. */
|
||||
|
||||
static void
|
||||
check_mixed_SI_IEC (char prefix)
|
||||
{
|
||||
static int seen_si = -1;
|
||||
bool si_present = prefix == 'i';
|
||||
if (seen_si != -1 && seen_si != si_present)
|
||||
error (SORT_FAILURE, 0, _("both SI and IEC prefixes present on units"));
|
||||
seen_si = si_present;
|
||||
}
|
||||
|
||||
/* Return an integer which represents the order of magnitude of
|
||||
the unit following the number. NUMBER can contain thousands separators
|
||||
or a decimal point, but not have preceeding blanks.
|
||||
Negative numbers return a negative unit order. */
|
||||
|
||||
static int
|
||||
find_unit_order (const char *number)
|
||||
{
|
||||
static const char orders [UCHAR_LIM] = {
|
||||
['K']=1, ['M']=2, ['G']=3, ['T']=4, ['P']=5, ['E']=6, ['Z']=7, ['Y']=8,
|
||||
['k']=1,
|
||||
};
|
||||
|
||||
const unsigned char *p = number;
|
||||
|
||||
int sign = 1;
|
||||
|
||||
if (*p == '-')
|
||||
{
|
||||
sign = -1;
|
||||
p++;
|
||||
}
|
||||
|
||||
/* Scan to end of number.
|
||||
Decimals or separators not followed by digits stop the scan.
|
||||
Numbers ending in decimals or separators are thus considered
|
||||
to be lacking in units.
|
||||
FIXME: add support for multibyte thousands_sep and decimal_point. */
|
||||
|
||||
while (ISDIGIT (*p))
|
||||
{
|
||||
p++;
|
||||
|
||||
if (*p == decimal_point && ISDIGIT (*(p + 1)))
|
||||
p += 2;
|
||||
else if (*p == thousands_sep && ISDIGIT (*(p + 1)))
|
||||
p += 2;
|
||||
}
|
||||
|
||||
int order = orders[*p];
|
||||
|
||||
/* For valid units check for MiB vs MB etc. */
|
||||
if (order)
|
||||
check_mixed_SI_IEC (*(p + 1));
|
||||
|
||||
return sign * order;
|
||||
}
|
||||
|
||||
/* Compare numbers ending in units with SI xor IEC prefixes
|
||||
<none/unknown> < K/k < M < G < T < P < E < Z < Y
|
||||
Assume that numbers are properly abbreviated.
|
||||
i.e. input will never have both 6000K and 5M. */
|
||||
|
||||
static int
|
||||
human_numcompare (const char *a, const char *b)
|
||||
{
|
||||
while (blanks[to_uchar (*a)])
|
||||
a++;
|
||||
while (blanks[to_uchar (*b)])
|
||||
b++;
|
||||
|
||||
int order_a = find_unit_order (a);
|
||||
int order_b = find_unit_order (b);
|
||||
|
||||
return (order_a > order_b ? 1
|
||||
: order_a < order_b ? -1
|
||||
: strnumcmp (a, b, decimal_point, thousands_sep));
|
||||
}
|
||||
|
||||
static int
|
||||
general_numcompare (const char *sa, const char *sb)
|
||||
{
|
||||
@@ -1917,13 +2005,14 @@ keycompare (const struct line *a, const struct line *b)
|
||||
|
||||
if (key->random)
|
||||
diff = compare_random (texta, lena, textb, lenb);
|
||||
else if (key->numeric | key->general_numeric)
|
||||
else if (key->numeric | key->general_numeric | key->human_numeric)
|
||||
{
|
||||
char savea = *lima, saveb = *limb;
|
||||
|
||||
*lima = *limb = '\0';
|
||||
diff = ((key->numeric ? numcompare : general_numcompare)
|
||||
(texta, textb));
|
||||
diff = ((key->numeric ? numcompare
|
||||
: key->general_numeric ? general_numcompare
|
||||
: human_numcompare) (texta, textb));
|
||||
*lima = savea, *limb = saveb;
|
||||
}
|
||||
else if (key->version)
|
||||
@@ -2889,7 +2978,7 @@ check_ordering_compatibility (void)
|
||||
|
||||
for (key = keylist; key; key = key->next)
|
||||
if ((1 < (key->random + key->numeric + key->general_numeric + key->month
|
||||
+ key->version + !!key->ignore))
|
||||
+ key->version + !!key->ignore + key->human_numeric))
|
||||
|| (key->random && key->translate))
|
||||
{
|
||||
/* The following is too big, but guaranteed to be "big enough". */
|
||||
@@ -2901,6 +2990,8 @@ check_ordering_compatibility (void)
|
||||
*p++ = 'f';
|
||||
if (key->general_numeric)
|
||||
*p++ = 'g';
|
||||
if (key->human_numeric)
|
||||
*p++ = 'h';
|
||||
if (key->ignore == nonprinting)
|
||||
*p++ = 'i';
|
||||
if (key->month)
|
||||
@@ -2992,6 +3083,9 @@ set_ordering (const char *s, struct keyfield *key, enum blanktype blanktype)
|
||||
case 'g':
|
||||
key->general_numeric = true;
|
||||
break;
|
||||
case 'h':
|
||||
key->human_numeric = true;
|
||||
break;
|
||||
case 'i':
|
||||
/* Option order should not matter, so don't let -i override
|
||||
-d. -d implies -i, but -i does not imply -d. */
|
||||
@@ -3140,7 +3234,8 @@ main (int argc, char **argv)
|
||||
gkey.sword = gkey.eword = SIZE_MAX;
|
||||
gkey.ignore = NULL;
|
||||
gkey.translate = NULL;
|
||||
gkey.numeric = gkey.general_numeric = gkey.random = gkey.version = false;
|
||||
gkey.numeric = gkey.general_numeric = gkey.human_numeric = false;
|
||||
gkey.random = gkey.version = false;
|
||||
gkey.month = gkey.reverse = false;
|
||||
gkey.skipsblanks = gkey.skipeblanks = false;
|
||||
|
||||
@@ -3219,6 +3314,7 @@ main (int argc, char **argv)
|
||||
case 'd':
|
||||
case 'f':
|
||||
case 'g':
|
||||
case 'h':
|
||||
case 'i':
|
||||
case 'M':
|
||||
case 'n':
|
||||
@@ -3471,6 +3567,7 @@ main (int argc, char **argv)
|
||||
| key->numeric
|
||||
| key->version
|
||||
| key->general_numeric
|
||||
| key->human_numeric
|
||||
| key->random)))
|
||||
{
|
||||
key->ignore = gkey.ignore;
|
||||
@@ -3480,6 +3577,7 @@ main (int argc, char **argv)
|
||||
key->month = gkey.month;
|
||||
key->numeric = gkey.numeric;
|
||||
key->general_numeric = gkey.general_numeric;
|
||||
key->human_numeric = gkey.human_numeric;
|
||||
key->random = gkey.random;
|
||||
key->reverse = gkey.reverse;
|
||||
key->version = gkey.version;
|
||||
@@ -3495,6 +3593,7 @@ main (int argc, char **argv)
|
||||
| gkey.month
|
||||
| gkey.numeric
|
||||
| gkey.general_numeric
|
||||
| gkey.human_numeric
|
||||
| gkey.random
|
||||
| gkey.version)))
|
||||
{
|
||||
|
||||
@@ -54,6 +54,24 @@ my @Tests =
|
||||
["n11a", '-s -n -k1,1', {IN=>".01a\n.010\n"}, {OUT=>".01a\n.010\n"}],
|
||||
["n11b", '-s -n -k1,1', {IN=>".010\n.01a\n"}, {OUT=>".010\n.01a\n"}],
|
||||
|
||||
# human readable suffixes
|
||||
["h1", '-h', {IN=>"Y\nZ\nE\nP\nT\nG\nM\nK\n"},
|
||||
{OUT=>"K\nM\nG\nT\nP\nE\nZ\nY\n"}],
|
||||
["h2", '-h', {IN=>"1M\n-2G\n-3K"}, {OUT=>"-2G\n-3K\n1M\n"}],
|
||||
["h3", '-h', {IN=>"1Mi\n1M\n"}, {OUT=>""}, {EXIT=>2},
|
||||
{ERR=>"$prog: both SI and IEC prefixes present on units\n"}],
|
||||
# decimal at end => ignore suffix
|
||||
["h4", '-h', {IN=>"1.E\n2.M\n"}, {OUT=>"1.E\n2.M\n"}],
|
||||
# double decimal => ignore suffix
|
||||
["h5", '-h', {IN=>"1..2E\n2..2M\n"}, {OUT=>"1..2E\n2..2M\n"}],
|
||||
# illustrate misordering of ambiguous abbreviations
|
||||
["h6", '-h', {IN=>"1GiB\n1030MiB\n"}, {OUT=>"1030MiB\n1GiB\n"}],
|
||||
# check option incompatibility
|
||||
["h7", '-hn', {IN=>""}, {OUT=>""}, {EXIT=>2},
|
||||
{ERR=>"$prog: options `-hn' are incompatible\n"}],
|
||||
# check key processing
|
||||
["h8", '-n -k2,2h', {IN=>"1 1E\n2 2M\n"}, {OUT=>"2 2M\n1 1E\n"}],
|
||||
|
||||
["01a", '', {IN=>"A\nB\nC\n"}, {OUT=>"A\nB\nC\n"}],
|
||||
#
|
||||
["02a", '-c', {IN=>"A\nB\nC\n"}, {OUT=>''}],
|
||||
|
||||
Reference in New Issue
Block a user