mirror of
git://git.sv.gnu.org/coreutils.git
synced 2026-02-17 21:02:25 +02:00
This patch is by Gene Auyeung, Chris Dickens, Chen Guo, and Mike Nichols, based off of a patch by Paul Eggert, Glen Lenker, et. al., with a basic heap implementation based off of the GDSL heap, originally by Nicolas Darnis. The number of sorts done in parallel is limited to the number of available processors by default, or can be further restricted with the --parallel option. On a dual-die, 8 core Intel Xeon, results show sorting with 8 threads is almost 4 times faster than using a single thread. Timings when sorting a 96MB file: THREADS TIME (s) 1 5.10 2 2.87 4 1.75 8 1.31 Single threaded sorting has also been improved, especially for cheaper comparison operations: COMMAND BEFORE (s) AFTER (s) sort 8.822 8.716 sort -g 10.336 10.222 sort -n 3.077 2.961 LANG=C sort 2.169 2.066 * bootstrap.conf: Add heap, pthread. * coreutils.texi (sort): Describe the new --parallel option. * gl/lib/heap.c: New file. Very basic heap implementation. * gl/lib/heap.h: New file. * gl/modules/heap: New file. * src/Makefile.am: Add LIB_PTHREAD. * src/sort.c: Include heap.h, nproc.h, pthread.h. (MAX_MERGE): New macro. (SUBTHREAD_LINES_HEURISTIC, PARALLEL_OPTION): New constants. (MERGE_END, MERGE_ROOT): New constants. (struct merge_node): New struct. (struct merge_node_queue): New struct. (sortlines temp): Remove declaration. (usage, long_options, main): New option, --parallel. (specify_nthreads): New function. (mergelines): New signature, to emphasize the fact that the HI area must be part of the destination. All callers changed. (sequential_sort): New function, renamed from sortlines. Merge in the functionality of sortlines_temp. (compare_nodes): New function. (lock_node, unlock_node): New functions. (queue_destroy): New function. (queue_init): New function. (queue_insert): New function. (queue_pop): New function. (write_unique): New function. (mergelines_node): New function. (check_insert): New function. (update_parent): New function. (merge_loop): New function. (sortlines): Rewrite to support and use parallelism, with a new signature. All callers changed. (struct thread_args): New struct. (sortlines_thread): New function. (sortlines_temp): Remove. (sort): New argument NTHREADS. All uses changed. Output moved to mergelines_node. (main): disable threading if we are sorting at random. * tests/Makefile.am (TESTS): Add misc/sort-benchmark-random. * tests/misc/sort-benchmark-random: New file. Signed-off-by: Pádraig Brady <P@draigBrady.com>
355 lines
6.3 KiB
Plaintext
355 lines
6.3 KiB
Plaintext
# Bootstrap configuration.
|
|
|
|
# Copyright (C) 2006-2010 Free Software Foundation, Inc.
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
# We don't need these modules.
|
|
avoided_gnulib_modules='
|
|
--avoid=canonicalize-lgpl
|
|
--avoid=dummy
|
|
'
|
|
|
|
# These modules are obsolete and can probably be removed soon,
|
|
# but leave them in for now to minimize changes.
|
|
obsolete_gnulib_modules='
|
|
atexit
|
|
memcmp
|
|
memcpy
|
|
memmove
|
|
memset
|
|
strcspn
|
|
strtod
|
|
strtol
|
|
'
|
|
|
|
# gnulib modules used by this package.
|
|
gnulib_modules="
|
|
$avoided_gnulib_modules
|
|
$obsolete_gnulib_modules
|
|
acl
|
|
alignof
|
|
alloca
|
|
announce-gen
|
|
areadlink-with-size
|
|
argmatch
|
|
argv-iter
|
|
assert
|
|
autobuild
|
|
backupfile
|
|
base64
|
|
c-strcase
|
|
c-strtod
|
|
c-strtold
|
|
calloc
|
|
canon-host
|
|
canonicalize
|
|
chown
|
|
cloexec
|
|
closein
|
|
closeout
|
|
config-h
|
|
configmake
|
|
crypto/md5
|
|
crypto/sha1
|
|
crypto/sha256
|
|
crypto/sha512
|
|
cycle-check
|
|
d-ino
|
|
d-type
|
|
di-set
|
|
diacrit
|
|
dirfd
|
|
dirname
|
|
do-release-commit-and-tag
|
|
dup2
|
|
environ
|
|
error
|
|
euidaccess
|
|
exclude
|
|
exitfail
|
|
faccessat
|
|
fchdir
|
|
fcntl
|
|
fcntl-safer
|
|
fdl
|
|
file-type
|
|
fileblocks
|
|
filemode
|
|
filenamecat
|
|
filevercmp
|
|
fnmatch-gnu
|
|
fopen-safer
|
|
fprintftime
|
|
freopen
|
|
freopen-safer
|
|
fseeko
|
|
fsusage
|
|
fsync
|
|
ftello
|
|
fts
|
|
getdate
|
|
getgroups
|
|
gethrxtime
|
|
getline
|
|
getloadavg
|
|
getndelim2
|
|
getopt-gnu
|
|
getpagesize
|
|
getpass-gnu
|
|
gettext
|
|
gettime
|
|
gettimeofday
|
|
getugroups
|
|
getusershell
|
|
git-version-gen
|
|
gitlog-to-changelog
|
|
gnu-make
|
|
gnu-web-doc-update
|
|
gnumakefile
|
|
gnupload
|
|
group-member
|
|
hard-locale
|
|
hash
|
|
hash-pjw
|
|
heap
|
|
host-os
|
|
human
|
|
idcache
|
|
ignore-value
|
|
inttostr
|
|
inttypes
|
|
isapipe
|
|
isblank
|
|
lchmod
|
|
lchown
|
|
lib-ignore
|
|
linebuffer
|
|
link
|
|
link-follow
|
|
linkat
|
|
long-options
|
|
lstat
|
|
maintainer-makefile
|
|
malloc
|
|
manywarnings
|
|
mbrtowc
|
|
mbsalign
|
|
mbswidth
|
|
memcasecmp
|
|
memchr
|
|
memcmp2
|
|
mempcpy
|
|
memrchr
|
|
mgetgroups
|
|
mkancesdirs
|
|
mkdir
|
|
mkdir-p
|
|
mkstemp
|
|
mktime
|
|
modechange
|
|
mountlist
|
|
mpsort
|
|
netinet_in
|
|
nproc
|
|
obstack
|
|
pathmax
|
|
perl
|
|
physmem
|
|
posix-shell
|
|
posixtm
|
|
posixver
|
|
priv-set
|
|
progname
|
|
propername
|
|
pthread
|
|
putenv
|
|
quote
|
|
quotearg
|
|
raise
|
|
randint
|
|
randperm
|
|
readlink
|
|
readtokens
|
|
readtokens0
|
|
readutmp
|
|
realloc
|
|
regex
|
|
remove
|
|
rename
|
|
rmdir
|
|
root-dev-ino
|
|
rpmatch
|
|
safe-read
|
|
same
|
|
save-cwd
|
|
savedir
|
|
savewd
|
|
selinux-at
|
|
settime
|
|
sig2str
|
|
sigaction
|
|
ssize_t
|
|
stat-macros
|
|
stat-time
|
|
stdbool
|
|
stdlib-safer
|
|
stpcpy
|
|
stpncpy
|
|
strdup-posix
|
|
strftime
|
|
strpbrk
|
|
strsignal
|
|
strtoimax
|
|
strtoumax
|
|
symlink
|
|
sys_ioctl
|
|
sys_stat
|
|
sys_wait
|
|
timespec
|
|
tzset
|
|
uname
|
|
unicodeio
|
|
unistd-safer
|
|
unlink-busy
|
|
unlocked-io
|
|
unsetenv
|
|
update-copyright
|
|
uptime
|
|
useless-if-before-free
|
|
userspec
|
|
utimecmp
|
|
utimens
|
|
vasprintf-posix
|
|
vc-list-files
|
|
verify
|
|
verror
|
|
version-etc-fsf
|
|
wcwidth
|
|
winsz-ioctl
|
|
winsz-termios
|
|
write-any-file
|
|
xalloc
|
|
xfreopen
|
|
xgetcwd
|
|
xgethostname
|
|
xmemcoll
|
|
xnanosleep
|
|
xprintf
|
|
xprintf-posix
|
|
xreadlink
|
|
xstrtod
|
|
xstrtoimax
|
|
xstrtol
|
|
xstrtold
|
|
xstrtoumax
|
|
yesno
|
|
"
|
|
|
|
# Other locale categories that need message catalogs.
|
|
EXTRA_LOCALE_CATEGORIES=LC_TIME
|
|
|
|
# Additional xgettext options to use. Use "\\\newline" to break lines.
|
|
XGETTEXT_OPTIONS=$XGETTEXT_OPTIONS'\\\
|
|
--flag=asnprintf:3:c-format\\\
|
|
--flag=asprintf:2:c-format\\\
|
|
--flag=error:3:c-format\\\
|
|
--flag=error_at_line:5:c-format\\\
|
|
--flag=vasnprintf:3:c-format\\\
|
|
--flag=vasprintf:2:c-format\\\
|
|
--flag=verror:3:c-format\\\
|
|
--flag=verror_at_line:5:c-format\\\
|
|
--flag=wrapf:1:c-format\\\
|
|
--flag=xasprintf:1:c-format\\\
|
|
--flag=xfprintf:2:c-format\\\
|
|
--flag=xprintf:1:c-format\\\
|
|
--from-code=UTF-8\\\
|
|
'
|
|
|
|
# Append these, since we use the propername module.
|
|
see_manual='"This is a proper name. See the gettext manual, section Names."'
|
|
see_manual=\'"$see_manual"\'
|
|
XGETTEXT_OPTIONS=$XGETTEXT_OPTIONS'\\\
|
|
--keyword=proper_name:1,'"$see_manual"'\\\
|
|
--keyword=proper_name_utf8:1,'"$see_manual"'\\\
|
|
'
|
|
|
|
# If "AM_GNU_GETTEXT(external" or "AM_GNU_GETTEXT([external]"
|
|
# appears in configure.ac, exclude some unnecessary files.
|
|
# Without grep's -E option (not portable enough, pre-configure),
|
|
# the following test is ugly. Also, this depends on the existence
|
|
# of configure.ac, not the obsolescent-named configure.in. But if
|
|
# you're using this infrastructure, you should care about such things.
|
|
|
|
gettext_external=0
|
|
grep '^[ ]*AM_GNU_GETTEXT(external\>' configure.ac > /dev/null &&
|
|
gettext_external=1
|
|
grep '^[ ]*AM_GNU_GETTEXT(\[external\]' configure.ac > /dev/null &&
|
|
gettext_external=1
|
|
|
|
if test $gettext_external = 1; then
|
|
# Gettext supplies these files, but we don't need them since
|
|
# we don't have an intl subdirectory.
|
|
excluded_files='
|
|
m4/glibc2.m4
|
|
m4/intdiv0.m4
|
|
m4/lcmessage.m4
|
|
m4/uintmax_t.m4
|
|
m4/ulonglong.m4
|
|
m4/visibility.m4
|
|
'
|
|
fi
|
|
|
|
gnulib_tool_option_extras="--tests-base=$bt/gnulib-tests --with-tests"
|
|
|
|
# Build prerequisites
|
|
buildreq="\
|
|
autoconf 2.62
|
|
automake 1.11.1
|
|
autopoint -
|
|
bison -
|
|
gettext 0.18
|
|
git 1.4.4
|
|
gperf -
|
|
gzip -
|
|
makeinfo -
|
|
perl 5.5
|
|
rsync -
|
|
tar -
|
|
xz -
|
|
"
|
|
|
|
# Automake requires that ChangeLog exist.
|
|
touch ChangeLog || exit 1
|
|
|
|
bootstrap_epilogue()
|
|
{
|
|
# Change paths in gnulib-tests/gnulib.mk from "../.." to "..".
|
|
m=gnulib-tests/gnulib.mk
|
|
sed 's,\.\./\.\.,..,g' $m > $m-t
|
|
mv -f $m-t $m
|
|
|
|
# Since this is a "GNU" package, replace this line
|
|
# if LC_ALL=C grep 'GNU @PACKAGE@' $(top_srcdir)/* 2>/dev/null \
|
|
# | grep -v 'libtool:' >/dev/null; then
|
|
# with this:
|
|
# if true; then
|
|
# Why? That pipeline searches all files in $(top_srcdir), and if you
|
|
# happen to have large files (or apparently large sparse files), the
|
|
# first grep may well run out of memory.
|
|
perl -pi -e 's/if LC_ALL=C grep .GNU .PACKAGE.*; then/if true; then/' \
|
|
po/Makefile.in.in
|
|
}
|