sort: fix parsing of end field in obsolescent key formats

This regression was introduced in commit 224a69b5, 2009-02-24, "sort: Fix two bugs with determining the end of field". The specific regression being that we include 1 field too many when an end field is specified using obsolescent key syntax (+POS -POS). * src/sort.c (struct keyfield): Clarify the description of the eword member, as suggested by Alan Curry. (main): When processing obsolescent format key specifications, normalize eword to a zero based count when no specific end char is given for an end field. This matches what's done when keys are specified with -k. * tests/misc/sort: Add a few more tests for the obsolescent key formats, with test 07i being the particular failure addressed by this change. * THANKS: Add Alan Curry who precisely identified the issue. * NEWS: Mention the fix. Reported by Santiago Rodríguez
2026-05-31 14:56:58 +02:00 · 2010-04-20 16:25:55 +01:00
parent 1777d0dfe3
commit 8fc12909f6
4 changed files with 22 additions and 1 deletions
@@ -14,6 +14,10 @@ GNU coreutils NEWS                                    -*- outline -*-
  handled correctly, including multi byte locales with the caveat
  that multi byte characters are matched case sensitively.

+  sort again handles obsolescent key formats (+POS -POS) correctly.
+  Previously if -POS was specified, 1 field too many was used in the sort.
+  [bug introduced in coreutils-7.2]
+
 ** New features

  join now accepts the --header option, to treat the first line of each
@@ -17,6 +17,7 @@ Adrian Bunk                         bunk@stusta.de
 AIDA Shinra                         shinra@j10n.org
 Akim Demaille                       demaille@inf.enst.fr
 Alain Magloire                      alain@qnx.com
+Alan Curry                          pacman@kosh.dhis.org
 Alan Iwi                            iwi@atm.ox.ac.uk
 Albert Chin-A-Young                 china@thewrittenword.com
 Albert Hopkins                      ahopkins@dynacare.com
@@ -167,7 +167,7 @@ struct keyfield
 {
  size_t sword;			/* Zero-origin 'word' to start at. */
  size_t schar;			/* Additional characters to skip. */
-  size_t eword;			/* Zero-origin first word after field. */
+  size_t eword;			/* Zero-origin last 'word' of key. */
  size_t echar;			/* Additional characters in field. */
  bool const *ignore;		/* Boolean array of characters to ignore. */
  char const *translate;	/* Translation applied to characters. */
@@ -3389,6 +3389,16 @@ main (int argc, char **argv)
                          if (*s == '.')
                            s = parse_field_count (s + 1, &key->echar,
                                               N_("invalid number after `.'"));
+                          if (!key->echar && key->eword)
+                            {
+                              /* obsolescent syntax +A.x -B.y is equivalent to:
+                                   -k A+1.x+1,B.y   (when y = 0)
+                                   -k A+1.x+1,B+1.y (when y > 0)
+                                 So eword is decremented as in the -k case
+                                 only when the end field (B) is specified and
+                                 echar (y) is 0.  */
+                              key->eword--;
+                            }
                          if (*set_ordering (s, key, bl_end))
                            badfieldspec (optarg1,
                                      N_("stray character in field spec"));
@@ -142,6 +142,12 @@ my @Tests =
 ["07f", '-n -k1.3,1.1', {IN=>"a 2\nb 1\n"}, {OUT=>"a 2\nb 1\n"}],
 ["07g", '-n -k2.2,1.2', {IN=>"aa 2\nbb 1\n"}, {OUT=>"aa 2\nbb 1\n"}],
 ["07h", '-k1.3nb,1.3', {IN=>"  a 2\n  b 1\n"}, {OUT=>"  a 2\n  b 1\n"}],
+# ensure obsolescent key limits are handled correctly
+["07i", '-s +0 -1', {IN=>"a c\na b\n"}, {OUT=>"a c\na b\n"}],
+["07j", '-s +0 -1.0', {IN=>"a c\na b\n"}, {OUT=>"a c\na b\n"}],
+["07k", '-s +0 -1.1', {IN=>"a c\na b\n"}, {OUT=>"a c\na b\n"}],
+["07l", '-s +0 -1.2', {IN=>"a c\na b\n"}, {OUT=>"a b\na c\n"}],
+["07m", '-s +0 -1.1b', {IN=>"a c\na b\n"}, {OUT=>"a b\na c\n"}],
 #
 # report an error for `.' without following char spec
 ["08a", '-k 2.,3', {EXIT=>2},