mirror of
git://git.sv.gnu.org/coreutils.git
synced 2026-04-19 18:26:32 +02:00
split: support split -n on larger pipe input
* bootstrap.conf (gnulib_modules): Add free-posix, tmpfile. * src/split.c (copy_to_tmpfile): New function. (input_file_size): Use it to split larger files when sizes cannot easily be determined via fstat or lseek. See Bug#61386#235. * tests/split/l-chunk.sh: Mark tests of /dev/zero as very expensive since they exhaust /tmp.
This commit is contained in:
3
NEWS
3
NEWS
@@ -145,6 +145,9 @@ GNU coreutils NEWS -*- outline -*-
|
||||
split now accepts options like '-n SIZE' that exceed machine integer
|
||||
range, when they can be implemented as if they were infinity.
|
||||
|
||||
split -n now accepts piped input even when not in round-robin mode,
|
||||
by first copying input to a temporary file to determine its size.
|
||||
|
||||
wc now accepts the --total={auto,never,always,only} option
|
||||
to give explicit control over when the total is output.
|
||||
|
||||
|
||||
@@ -103,6 +103,7 @@ gnulib_modules="
|
||||
fnmatch-gnu
|
||||
fopen-safer
|
||||
fprintftime
|
||||
free-posix
|
||||
freopen
|
||||
freopen-safer
|
||||
fseeko
|
||||
@@ -270,6 +271,7 @@ gnulib_modules="
|
||||
time_rz
|
||||
timer-time
|
||||
timespec
|
||||
tmpfile
|
||||
tzset
|
||||
uname
|
||||
unicodeio
|
||||
|
||||
@@ -3409,8 +3409,10 @@ are not split even if they overlap a partition, the files written
|
||||
can be larger or smaller than the partition size, and even empty
|
||||
if a line/record is so long as to completely overlap the partition.
|
||||
|
||||
For @samp{r} mode, the size of @var{input} is irrelevant,
|
||||
and so can be a pipe for example.
|
||||
When the input is a pipe or some other special file where the size
|
||||
cannot easily be determined, there is no trouble for @samp{r} mode
|
||||
because the size of the input is irrelevant. For other modes, such an
|
||||
input is first copied to a temporary to determine its size.
|
||||
|
||||
@item -a @var{length}
|
||||
@itemx --suffix-length=@var{length}
|
||||
|
||||
89
src/split.c
89
src/split.c
@@ -275,6 +275,39 @@ CHUNKS may be:\n\
|
||||
exit (status);
|
||||
}
|
||||
|
||||
/* Copy the data in FD to a temporary file, then make that file FD.
|
||||
Use BUF, of size BUFSIZE, to copy. Return the number of
|
||||
bytes copied, or -1 (setting errno) on error. */
|
||||
static off_t
|
||||
copy_to_tmpfile (int fd, char *buf, idx_t bufsize)
|
||||
{
|
||||
FILE *tmp = tmpfile ();
|
||||
if (!tmp)
|
||||
return -1;
|
||||
off_t copied = 0;
|
||||
off_t r;
|
||||
|
||||
while (0 < (r = read (fd, buf, bufsize)))
|
||||
{
|
||||
if (fwrite (buf, 1, r, tmp) != r)
|
||||
return -1;
|
||||
if (INT_ADD_WRAPV (copied, r, &copied))
|
||||
{
|
||||
errno = EOVERFLOW;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (r < 0)
|
||||
return r;
|
||||
r = dup2 (fileno (tmp), fd);
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (fclose (tmp) < 0)
|
||||
return -1;
|
||||
return copied;
|
||||
}
|
||||
|
||||
/* Return the number of bytes that can be read from FD with status ST.
|
||||
Store up to the first BUFSIZE bytes of the file's data into BUF,
|
||||
and advance the file position by the number of bytes read. On
|
||||
@@ -293,49 +326,35 @@ input_file_size (int fd, struct stat const *st, char *buf, idx_t bufsize)
|
||||
}
|
||||
while (size < bufsize);
|
||||
|
||||
off_t cur = lseek (fd, 0, SEEK_CUR);
|
||||
if (cur < 0)
|
||||
off_t cur, end;
|
||||
if ((usable_st_size (st) && st->st_size < size)
|
||||
|| (cur = lseek (fd, 0, SEEK_CUR)) < 0
|
||||
|| cur < size /* E.g., /dev/zero on GNU/Linux. */
|
||||
|| (end = lseek (fd, 0, SEEK_END)) < 0)
|
||||
{
|
||||
if (errno == ESPIPE)
|
||||
errno = 0; /* Suppress confusing seek error. */
|
||||
return cur;
|
||||
}
|
||||
|
||||
off_t end;
|
||||
if (usable_st_size (st))
|
||||
end = st->st_size;
|
||||
else
|
||||
{
|
||||
end = lseek (fd, 0, SEEK_END);
|
||||
char *tmpbuf = xmalloc (bufsize);
|
||||
end = copy_to_tmpfile (fd, tmpbuf, bufsize);
|
||||
free (tmpbuf);
|
||||
if (end < 0)
|
||||
return end;
|
||||
if (end == OFF_T_MAX)
|
||||
goto overflow; /* E.g., /dev/zero on GNU/Hurd. */
|
||||
if (cur < end)
|
||||
{
|
||||
off_t cur1 = lseek (fd, cur, SEEK_SET);
|
||||
if (cur1 < 0)
|
||||
return cur1;
|
||||
}
|
||||
cur = 0;
|
||||
}
|
||||
|
||||
/* Report overflow if we filled the buffer from a file with more
|
||||
bytes than stat or lseek reports. This can happen with mutating
|
||||
(e.g., /proc) files that are larger than the input block size.
|
||||
FIXME: Handle this properly, e.g., by copying the growing file's
|
||||
data into the first output file, and then splitting that output
|
||||
file (which should not grow) into the other output files. */
|
||||
if (end < size)
|
||||
goto overflow;
|
||||
if (end == OFF_T_MAX /* E.g., /dev/zero on GNU/Hurd. */
|
||||
|| (cur < end && INT_ADD_WRAPV (size, end - cur, &size)))
|
||||
{
|
||||
errno = EOVERFLOW;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (cur < end && INT_ADD_WRAPV (size, end - cur, &size))
|
||||
goto overflow;
|
||||
if (cur < end)
|
||||
{
|
||||
off_t r = lseek (fd, cur, SEEK_SET);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
return size;
|
||||
|
||||
overflow:
|
||||
errno = EOVERFLOW;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Compute the next sequential output file name and store it into the
|
||||
|
||||
@@ -37,12 +37,15 @@ rm x??
|
||||
|
||||
# 'split' should reject any attempt to create an infinitely
|
||||
# long output file.
|
||||
returns_ 1 split -n l/2 /dev/zero || fail=1
|
||||
rm x??
|
||||
# This test is very expensive as it runs out of /tmp space.
|
||||
if test "${RUN_VERY_EXPENSIVE_TESTS+set}" = set; then
|
||||
returns_ 1 split -n l/2 /dev/zero || fail=1
|
||||
rm x??
|
||||
|
||||
# Repeat the above, but with 1/2, not l/2:
|
||||
returns_ 1 split -n 1/2 /dev/zero || fail=1
|
||||
rm x??
|
||||
# Repeat the above, but with 1/2, not l/2:
|
||||
returns_ 1 split -n 1/2 /dev/zero || fail=1
|
||||
rm x??
|
||||
fi
|
||||
|
||||
# Ensure --elide-empty-files is honored
|
||||
split -e -n l/10 /dev/null || fail=1
|
||||
|
||||
Reference in New Issue
Block a user