sort: use long doubles for general numeric mode

* src/sort.c (general_numcompare): Use long doubles unconditionally, and strtold when available, to convert numbers with greater range and precision. Performance was seen to be on par with standard doubles. * doc/coreutils.texi (sort invocation): Amend the -g description to mention long double rather than double, and strtold rather than strtod. * src/getlimits.c (main): Output floating point limits for use in tests. * tests/misc/sort-float: A new test to ensure sort is using long doubles when possible, and that locale specific floats are handled. * tests/Makefile.am: Reference the new test. * tests/test-lib.sh (getlimits_): Normalize indenting. * NEWS: Mention the new behaviour. Reported by Nelson Beebe.
2026-04-20 10:51:48 +02:00 · 2010-04-28 23:54:33 +01:00
parent 7905d6d34d
commit 8b5087d4e6
7 changed files with 86 additions and 9 deletions
--- a/4
+++ b/4
@@ -2,6 +2,10 @@ GNU coreutils NEWS                                    -*- outline -*-

 * Noteworthy changes in release ?.? (????-??-??) [?]

+** Changes in behavior
+
+  sort -g now uses long doubles for greater range and precision.
+

 * Noteworthy changes in release 8.5 (2010-04-23) [stable]

--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -3767,8 +3767,8 @@ the final result, after the throwing away.))
@opindex --sort
@cindex general numeric sort
@vindex LC_NUMERIC
-Sort numerically, using the standard C function @code{strtod} to convert
-a prefix of each line to a double-precision floating point number.
+Sort numerically, using the standard C function @code{strtold} to convert
+a prefix of each line to a long double-precision floating point number.
 This allows floating point numbers to be specified in scientific notation,
 like @code{1.0e-34} and @code{10e100}.
 The @env{LC_NUMERIC} locale determines the decimal-point character.
--- a/src/getlimits.c
+++ b/src/getlimits.c
@@ -19,6 +19,7 @@
 #include <config.h>             /* sets _FILE_OFFSET_BITS=64 etc. */
 #include <stdio.h>
 #include <sys/types.h>
+#include <float.h>

 #include "system.h"
 #include "c-ctype.h"
@@ -123,7 +124,7 @@ decimal_ascii_add (const char *str1, const char *str2)
 int
 main (int argc, char **argv)
 {
-  char limit[64];               /* big enough for 128 bit at least */
+  char limit[64];               /* big enough for 128 bit integers at least */
  char *oflow;

  initialize_main (&argc, &argv);
@@ -139,20 +140,24 @@ main (int argc, char **argv)
                      usage, AUTHORS, (char const *) NULL);

 #define print_int(TYPE)                                                  \
-  snprintf (limit, sizeof limit, "%"PRIuMAX, (uintmax_t)TYPE##_MAX);    \
+  snprintf (limit, sizeof limit, "%"PRIuMAX, (uintmax_t)TYPE##_MAX);     \
  printf (#TYPE"_MAX=%s\n", limit);                                      \
  oflow = decimal_ascii_add (limit, "1");                                \
  printf (#TYPE"_OFLOW=%s\n", oflow);                                    \
  free (oflow);                                                          \
  if (TYPE##_MIN)                                                        \
    {                                                                    \
-      snprintf (limit, sizeof limit, "%"PRIdMAX, (intmax_t)TYPE##_MIN); \
+      snprintf (limit, sizeof limit, "%"PRIdMAX, (intmax_t)TYPE##_MIN);  \
      printf (#TYPE"_MIN=%s\n", limit);                                  \
      oflow = decimal_ascii_add (limit, "-1");                           \
      printf (#TYPE"_UFLOW=%s\n", oflow);                                \
      free (oflow);                                                      \
    }

+#define print_float(TYPE)                                                \
+  printf (#TYPE"_MIN=%Le\n", (long double)TYPE##_MIN);                   \
+  printf (#TYPE"_MAX=%Le\n", (long double)TYPE##_MAX);
+
  /* Variable sized ints */
  print_int (CHAR);
  print_int (SCHAR);
@@ -171,4 +176,9 @@ main (int argc, char **argv)
  print_int (OFF_T);
  print_int (INTMAX);
  print_int (UINTMAX);
+
+  /* Variable sized floats */
+  print_float (FLT);
+  print_float (DBL);
+  print_float (LDBL);
 }
--- a/src/sort.c
+++ b/src/sort.c
@@ -1855,10 +1855,16 @@ general_numcompare (const char *sa, const char *sb)
  /* FIXME: maybe add option to try expensive FP conversion
     only if A and B can't be compared more cheaply/accurately.  */

+#if HAVE_C99_STRTOLD /* provided by c-strtold module.  */
+# define STRTOD strtold
+#else
+# define STRTOD strtod
+#endif
+
  char *ea;
  char *eb;
-  double a = strtod (sa, &ea);
-  double b = strtod (sb, &eb);
+  long double a = STRTOD (sa, &ea);
+  long double b = STRTOD (sb, &eb);

  /* Put conversion errors at the start of the collating sequence.  */
  if (sa == ea)
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -224,6 +224,7 @@ TESTS =						\
  misc/sort-compress				\
  misc/sort-continue				\
  misc/sort-files0-from				\
+  misc/sort-float				\
  misc/sort-merge				\
  misc/sort-merge-fdlimit			\
  misc/sort-month				\
--- a/tests/misc/sort-float
+++ b/tests/misc/sort-float
@@ -0,0 +1,56 @@
+#!/bin/sh
+# Ensure sort -g sorts floating point limits correctly
+
+# Copyright (C) 2010 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+if test "$VERBOSE" = yes; then
+  set -x
+  sort --version
+fi
+
+. $srcdir/test-lib.sh
+
+for LOC in C $LOCALE_FR; do
+
+  LC_ALL=$LOC getlimits_
+
+  # See if sort should be using long doubles
+  grep '^#define HAVE_C99_STRTOLD 1' $CONFIG_HEADER > /dev/null ||
+    { LDBL_MAX="$DBL_MAX"; LDBL_MIN="$DBL_MIN"; }
+
+  printf -- "\
+-$LDBL_MAX
+-$DBL_MAX
+-$FLT_MAX
+-$FLT_MIN
+-$DBL_MIN
+-$LDBL_MIN
+0
+$LDBL_MIN
+$DBL_MIN
+$FLT_MIN
+$FLT_MAX
+$DBL_MAX
+$LDBL_MAX
+" |
+  grep '^[0-9.,e+-]*$' > exp # restrict to numeric just in case
+
+  tac exp | LC_ALL=$LOC sort -sg > out || fail=1
+
+  compare out exp || fail=1
+done
+
+Exit $fail
--- a/tests/test-lib.sh
+++ b/tests/test-lib.sh
@@ -57,8 +57,8 @@ skip_test_()

 getlimits_()
 {
-    eval $(getlimits)
-    test "$INT_MAX" ||
+  eval $(getlimits)
+  test "$INT_MAX" ||
    error_ "Error running getlimits"
 }