1
0
mirror of git://git.sv.gnu.org/coreutils.git synced 2026-04-10 06:03:31 +02:00

cat,cp,mv,install,split: Set the minimum IO block size used to 32KiB

This is following on from this change:
[02c3dc9d 2008-03-06 cat: use larger buffer sizes ...]
which increased the IO block size used by cat by 8 times,
but also capped it at 32KiB.
* NEWS: Mention the change in behavior.
* src/system.h: Add a new io_blksize() function that
returns the max of ST_BLKSIZE or 32KiB, as this was
seen as a good value for a minimum block size to use
to get good performance while minimizing system call overhead.
* src/cat.c: Use it.
* src/copy.c: ditto
* src/split.c: ditto
This commit is contained in:
Pádraig Brady
2009-03-06 22:30:55 +00:00
parent 93f6771e82
commit 55efc5f3ee
5 changed files with 58 additions and 22 deletions

5
NEWS
View File

@@ -24,6 +24,11 @@ GNU coreutils NEWS -*- outline -*-
Previously -k1,1b would have caused leading space from field 2 to be
included in the sort while -k2,3.0 would have not included field 3.
** Changes in behavior
cp,mv,install,cat,split: now read and write a minimum of 32KiB
at a time. This was seen to increase throughput. Up to 2 times
when reading cached files on linux for example.
* Noteworthy changes in release 7.1 (2009-02-21) [stable]

View File

@@ -78,12 +78,6 @@ static char *line_num_end = line_buf + LINE_COUNTER_BUF_LEN - 3;
/* Preserves the `cat' function's local `newlines' between invocations. */
static int newlines2 = 0;
static inline size_t
compute_buffer_size (struct stat st)
{
return MIN (8 * ST_BLKSIZE (st), 32 * 1024);
}
void
usage (int status)
{
@@ -642,7 +636,7 @@ main (int argc, char **argv)
if (fstat (STDOUT_FILENO, &stat_buf) < 0)
error (EXIT_FAILURE, errno, _("standard output"));
outsize = compute_buffer_size (stat_buf);
outsize = io_blksize (stat_buf);
/* Input file can be output file for non-regular files.
fstat on pipes returns S_IFSOCK on some systems, S_IFIFO
on others, so the checking should not be done for those types,
@@ -706,7 +700,7 @@ main (int argc, char **argv)
ok = false;
goto contin;
}
insize = compute_buffer_size (stat_buf);
insize = io_blksize (stat_buf);
/* Compare the device and i-node numbers of this input file with
the corresponding values of the (output file associated with)

View File

@@ -568,7 +568,7 @@ copy_reg (char const *src_name, char const *dst_name,
/* Choose a suitable buffer size; it may be adjusted later. */
size_t buf_alignment = lcm (getpagesize (), sizeof (word));
size_t buf_alignment_slop = sizeof (word) + buf_alignment - 1;
size_t buf_size = ST_BLKSIZE (sb);
size_t buf_size = io_blksize (sb);
/* Deal with sparse files. */
bool last_write_made_hole = false;
@@ -596,21 +596,12 @@ copy_reg (char const *src_name, char const *dst_name,
buffer size. */
if (! make_holes)
{
/* These days there's no point ever messing with buffers smaller
than 8 KiB. It would be nice to configure SMALL_BUF_SIZE
dynamically for this host and pair of files, but there doesn't
seem to be a good way to get readahead info portably. */
enum { SMALL_BUF_SIZE = 8 * 1024 };
/* Compute the least common multiple of the input and output
buffer sizes, adjusting for outlandish values. */
size_t blcm_max = MIN (SIZE_MAX, SSIZE_MAX) - buf_alignment_slop;
size_t blcm = buffer_lcm (ST_BLKSIZE (src_open_sb), buf_size,
size_t blcm = buffer_lcm (io_blksize (src_open_sb), buf_size,
blcm_max);
/* Do not use a block size that is too small. */
buf_size = MAX (SMALL_BUF_SIZE, blcm);
/* Do not bother with a buffer larger than the input file, plus one
byte to make sure the file has not grown while reading it. */
if (S_ISREG (src_open_sb.st_mode) && src_open_sb.st_size < buf_size)

View File

@@ -554,7 +554,7 @@ main (int argc, char **argv)
if (fstat (STDIN_FILENO, &stat_buf) != 0)
error (EXIT_FAILURE, errno, "%s", infile);
in_blk_size = ST_BLKSIZE (stat_buf);
in_blk_size = io_blksize (stat_buf);
buf = ptr_align (xmalloc (in_blk_size + 1 + page_size - 1), page_size);

View File

@@ -147,6 +147,9 @@ enum
# define D_INO(dp) NOT_AN_INODE_NUMBER
#endif
/* include here for SIZE_MAX. */
#include <inttypes.h>
/* Get or fake the disk device blocksize.
Usually defined by sys/param.h (if at all). */
#if !defined DEV_BSIZE && defined BSIZE
@@ -218,6 +221,51 @@ enum
# endif
#endif
/* As of Mar 2009, 32KiB is determined to be the minimium
blksize to best minimize system call overhead.
This can be tested with this script with the results
shown for a 1.7GHz pentium-m with 2GB of 400MHz DDR2 RAM:
for i in $(seq 0 10); do
size=$((8*1024**3)) #ensure this is big enough
bs=$((1024*2**$i))
printf "%7s=" $bs
dd bs=$bs if=/dev/zero of=/dev/null count=$(($size/$bs)) 2>&1 |
sed -n 's/.* \([0-9.]* [GM]B\/s\)/\1/p'
done
1024=734 MB/s
2048=1.3 GB/s
4096=2.4 GB/s
8192=3.5 GB/s
16384=3.9 GB/s
32768=5.2 GB/s
65536=5.3 GB/s
131072=5.5 GB/s
262144=5.7 GB/s
524288=5.7 GB/s
1048576=5.8 GB/s
Note that this is to minimize system call overhead.
Other values may be appropriate to minimize file system
or disk overhead. For example on my current linux system
the readahead setting is 128KiB which was read using:
file="."
device=$(df -P --local "$file" | tail -n1 | cut -d' ' -f1)
echo $(( $(blockdev --getra $device) * 512 ))
However there isn't a portable way to get the above.
In the future we could use the above method if available
and default to io_blksize() if not.
*/
enum { IO_BUFSIZE = 32*1024 };
static inline size_t
io_blksize (struct stat sb)
{
return MAX (IO_BUFSIZE, ST_BLKSIZE (sb));
}
/* Redirection and wildcarding when done by the utility itself.
Generally a noop, but used in particular for native VMS. */
#ifndef initialize_main
@@ -228,8 +276,6 @@ enum
#include "timespec.h"
#include <inttypes.h>
#include <ctype.h>
#if ! (defined isblank || HAVE_DECL_ISBLANK)