1
0
mirror of git://git.sv.gnu.org/coreutils.git synced 2026-02-17 21:02:25 +02:00
Files
coreutils/bootstrap.conf
Chen Guo 9face836f3 sort: parallelize internal sort
This patch is by Gene Auyeung, Chris Dickens, Chen Guo, and Mike
Nichols, based off of a patch by Paul Eggert, Glen Lenker, et. al.,
with a basic heap implementation based off of the GDSL heap,
originally by Nicolas Darnis.

The number of sorts done in parallel is limited to the number
of available processors by default, or can be further restricted
with the --parallel option.

On a dual-die, 8 core Intel Xeon, results show sorting with
8 threads is almost 4 times faster than using a single thread.
Timings when sorting a 96MB file:
THREADS     TIME (s)
1            5.10
2            2.87
4            1.75
8            1.31

Single threaded sorting has also been improved,
especially for cheaper comparison operations:
COMMAND             BEFORE (s)  AFTER (s)
sort                 8.822       8.716
sort -g             10.336      10.222
sort -n              3.077       2.961
LANG=C sort          2.169       2.066

* bootstrap.conf: Add heap, pthread.
* coreutils.texi (sort): Describe the new --parallel option.
* gl/lib/heap.c: New file. Very basic heap implementation.
* gl/lib/heap.h: New file.
* gl/modules/heap: New file.
* src/Makefile.am: Add LIB_PTHREAD.
* src/sort.c: Include heap.h, nproc.h, pthread.h.
(MAX_MERGE): New macro.
(SUBTHREAD_LINES_HEURISTIC, PARALLEL_OPTION): New constants.
(MERGE_END, MERGE_ROOT): New constants.
(struct merge_node): New struct.
(struct merge_node_queue): New struct.
(sortlines temp): Remove declaration.
(usage, long_options, main): New option, --parallel.
(specify_nthreads): New function.
(mergelines): New signature, to emphasize the fact that the HI area
must be part of the destination.  All callers changed.
(sequential_sort): New function, renamed from sortlines. Merge in
the functionality of sortlines_temp.
(compare_nodes): New function.
(lock_node, unlock_node): New functions.
(queue_destroy): New function.
(queue_init): New function.
(queue_insert): New function.
(queue_pop): New function.
(write_unique): New function.
(mergelines_node): New function.
(check_insert): New function.
(update_parent): New function.
(merge_loop): New function.
(sortlines): Rewrite to support and use parallelism, with a new
signature. All callers changed.
(struct thread_args): New struct.
(sortlines_thread): New function.
(sortlines_temp): Remove.
(sort): New argument NTHREADS. All uses changed. Output moved to
mergelines_node.
(main): disable threading if we are sorting at random.
* tests/Makefile.am (TESTS): Add misc/sort-benchmark-random.
* tests/misc/sort-benchmark-random: New file.

Signed-off-by: Pádraig Brady <P@draigBrady.com>
2010-07-13 01:44:46 +01:00

355 lines
6.3 KiB
Plaintext

# Bootstrap configuration.
# Copyright (C) 2006-2010 Free Software Foundation, Inc.
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# We don't need these modules.
avoided_gnulib_modules='
--avoid=canonicalize-lgpl
--avoid=dummy
'
# These modules are obsolete and can probably be removed soon,
# but leave them in for now to minimize changes.
obsolete_gnulib_modules='
atexit
memcmp
memcpy
memmove
memset
strcspn
strtod
strtol
'
# gnulib modules used by this package.
gnulib_modules="
$avoided_gnulib_modules
$obsolete_gnulib_modules
acl
alignof
alloca
announce-gen
areadlink-with-size
argmatch
argv-iter
assert
autobuild
backupfile
base64
c-strcase
c-strtod
c-strtold
calloc
canon-host
canonicalize
chown
cloexec
closein
closeout
config-h
configmake
crypto/md5
crypto/sha1
crypto/sha256
crypto/sha512
cycle-check
d-ino
d-type
di-set
diacrit
dirfd
dirname
do-release-commit-and-tag
dup2
environ
error
euidaccess
exclude
exitfail
faccessat
fchdir
fcntl
fcntl-safer
fdl
file-type
fileblocks
filemode
filenamecat
filevercmp
fnmatch-gnu
fopen-safer
fprintftime
freopen
freopen-safer
fseeko
fsusage
fsync
ftello
fts
getdate
getgroups
gethrxtime
getline
getloadavg
getndelim2
getopt-gnu
getpagesize
getpass-gnu
gettext
gettime
gettimeofday
getugroups
getusershell
git-version-gen
gitlog-to-changelog
gnu-make
gnu-web-doc-update
gnumakefile
gnupload
group-member
hard-locale
hash
hash-pjw
heap
host-os
human
idcache
ignore-value
inttostr
inttypes
isapipe
isblank
lchmod
lchown
lib-ignore
linebuffer
link
link-follow
linkat
long-options
lstat
maintainer-makefile
malloc
manywarnings
mbrtowc
mbsalign
mbswidth
memcasecmp
memchr
memcmp2
mempcpy
memrchr
mgetgroups
mkancesdirs
mkdir
mkdir-p
mkstemp
mktime
modechange
mountlist
mpsort
netinet_in
nproc
obstack
pathmax
perl
physmem
posix-shell
posixtm
posixver
priv-set
progname
propername
pthread
putenv
quote
quotearg
raise
randint
randperm
readlink
readtokens
readtokens0
readutmp
realloc
regex
remove
rename
rmdir
root-dev-ino
rpmatch
safe-read
same
save-cwd
savedir
savewd
selinux-at
settime
sig2str
sigaction
ssize_t
stat-macros
stat-time
stdbool
stdlib-safer
stpcpy
stpncpy
strdup-posix
strftime
strpbrk
strsignal
strtoimax
strtoumax
symlink
sys_ioctl
sys_stat
sys_wait
timespec
tzset
uname
unicodeio
unistd-safer
unlink-busy
unlocked-io
unsetenv
update-copyright
uptime
useless-if-before-free
userspec
utimecmp
utimens
vasprintf-posix
vc-list-files
verify
verror
version-etc-fsf
wcwidth
winsz-ioctl
winsz-termios
write-any-file
xalloc
xfreopen
xgetcwd
xgethostname
xmemcoll
xnanosleep
xprintf
xprintf-posix
xreadlink
xstrtod
xstrtoimax
xstrtol
xstrtold
xstrtoumax
yesno
"
# Other locale categories that need message catalogs.
EXTRA_LOCALE_CATEGORIES=LC_TIME
# Additional xgettext options to use. Use "\\\newline" to break lines.
XGETTEXT_OPTIONS=$XGETTEXT_OPTIONS'\\\
--flag=asnprintf:3:c-format\\\
--flag=asprintf:2:c-format\\\
--flag=error:3:c-format\\\
--flag=error_at_line:5:c-format\\\
--flag=vasnprintf:3:c-format\\\
--flag=vasprintf:2:c-format\\\
--flag=verror:3:c-format\\\
--flag=verror_at_line:5:c-format\\\
--flag=wrapf:1:c-format\\\
--flag=xasprintf:1:c-format\\\
--flag=xfprintf:2:c-format\\\
--flag=xprintf:1:c-format\\\
--from-code=UTF-8\\\
'
# Append these, since we use the propername module.
see_manual='"This is a proper name. See the gettext manual, section Names."'
see_manual=\'"$see_manual"\'
XGETTEXT_OPTIONS=$XGETTEXT_OPTIONS'\\\
--keyword=proper_name:1,'"$see_manual"'\\\
--keyword=proper_name_utf8:1,'"$see_manual"'\\\
'
# If "AM_GNU_GETTEXT(external" or "AM_GNU_GETTEXT([external]"
# appears in configure.ac, exclude some unnecessary files.
# Without grep's -E option (not portable enough, pre-configure),
# the following test is ugly. Also, this depends on the existence
# of configure.ac, not the obsolescent-named configure.in. But if
# you're using this infrastructure, you should care about such things.
gettext_external=0
grep '^[ ]*AM_GNU_GETTEXT(external\>' configure.ac > /dev/null &&
gettext_external=1
grep '^[ ]*AM_GNU_GETTEXT(\[external\]' configure.ac > /dev/null &&
gettext_external=1
if test $gettext_external = 1; then
# Gettext supplies these files, but we don't need them since
# we don't have an intl subdirectory.
excluded_files='
m4/glibc2.m4
m4/intdiv0.m4
m4/lcmessage.m4
m4/uintmax_t.m4
m4/ulonglong.m4
m4/visibility.m4
'
fi
gnulib_tool_option_extras="--tests-base=$bt/gnulib-tests --with-tests"
# Build prerequisites
buildreq="\
autoconf 2.62
automake 1.11.1
autopoint -
bison -
gettext 0.18
git 1.4.4
gperf -
gzip -
makeinfo -
perl 5.5
rsync -
tar -
xz -
"
# Automake requires that ChangeLog exist.
touch ChangeLog || exit 1
bootstrap_epilogue()
{
# Change paths in gnulib-tests/gnulib.mk from "../.." to "..".
m=gnulib-tests/gnulib.mk
sed 's,\.\./\.\.,..,g' $m > $m-t
mv -f $m-t $m
# Since this is a "GNU" package, replace this line
# if LC_ALL=C grep 'GNU @PACKAGE@' $(top_srcdir)/* 2>/dev/null \
# | grep -v 'libtool:' >/dev/null; then
# with this:
# if true; then
# Why? That pipeline searches all files in $(top_srcdir), and if you
# happen to have large files (or apparently large sparse files), the
# first grep may well run out of memory.
perl -pi -e 's/if LC_ALL=C grep .GNU .PACKAGE.*; then/if true; then/' \
po/Makefile.in.in
}