1
0
mirror of git://git.sv.gnu.org/coreutils.git synced 2026-04-10 06:03:31 +02:00

cut: optimize -b by avoiding per byte iteration

Always memchr(line_delim) which is fast and allows:

- skipping whole segments when the next selected byte is beyond them
- skipping unselected prefixes in bulk
- writing contiguous selected spans in bulk

This wins for lines >= 4 characters,
but is slower lines <= 3 characters, especially if selecting bytes 1-3.
That is unusual though.
This commit is contained in:
Pádraig Brady
2026-03-22 12:20:04 +00:00
parent ea6a7ba547
commit a16d56d60c

View File

@@ -658,6 +658,13 @@ field_selection_exhausted (uintmax_t field_idx)
return !print_kth (field_idx) && current_rp->lo == UINTMAX_MAX;
}
static inline void
sync_byte_selection (uintmax_t byte_idx)
{
while (current_rp->hi <= byte_idx)
current_rp++;
}
static inline void
reset_field_line (uintmax_t *field_idx, bool *found_any_selected_field,
bool *have_pending_line, struct mbfield_parser *parser)
@@ -675,35 +682,58 @@ reset_field_line (uintmax_t *field_idx, bool *found_any_selected_field,
static void
cut_bytes (FILE *stream)
{
uintmax_t byte_idx; /* Number of bytes in the line so far. */
/* Whether to begin printing delimiters between ranges for the current line.
Set after we've begun printing data corresponding to the first range. */
bool print_delimiter;
uintmax_t byte_idx = 0;
bool print_delimiter = false;
static char line_in[IO_BUFSIZE];
byte_idx = 0;
print_delimiter = false;
current_rp = frp;
while (true)
{
int c; /* Each character from the file. */
c = getc (stream);
if (c == line_delim)
reset_item_line (&byte_idx, &print_delimiter);
else if (c == EOF)
idx_t available = fread (line_in, sizeof *line_in, sizeof line_in,
stream);
if (available == 0)
{
write_pending_line_delim (byte_idx);
break;
}
else
idx_t processed = 0;
while (processed < available)
{
next_item (&byte_idx);
if (print_kth (byte_idx))
char *line = line_in + processed;
char *line_end = memchr ((void *) line, line_delim,
available - processed);
char *end = line + (line_end ? line_end - line : available - processed);
char *p = line;
while (p < end)
{
char ch = c;
write_selected_item (&print_delimiter,
is_range_start_index (byte_idx), &ch, 1);
sync_byte_selection (byte_idx);
if (byte_idx + 1 < current_rp->lo)
{
idx_t skip = MIN (end - p, current_rp->lo - (byte_idx + 1));
p += skip;
byte_idx += skip;
}
else
{
idx_t n = MIN (end - p, current_rp->hi - byte_idx);
write_selected_item (&print_delimiter,
is_range_start_index (byte_idx + 1),
p, n);
p += n;
byte_idx += n;
}
}
processed += end - line;
if (line_end)
{
processed++;
reset_item_line (&byte_idx, &print_delimiter);
}
}
}