1
0
mirror of git://git.sv.gnu.org/coreutils.git synced 2026-04-20 18:56:39 +02:00

Update from glibc.

This commit is contained in:
Jim Meyering
2000-05-04 06:41:26 +00:00
parent 0605601f99
commit 3c13ac6c36
2 changed files with 532 additions and 135 deletions

View File

@@ -6,4 +6,5 @@ getopt1.c
md5.h
obstack.h
regex.h
regex.c
getpagesize.h

View File

@@ -2,7 +2,7 @@
version 0.12.
(Implements POSIX draft P1003.2/D11.2, except for some of the
internationalization features.)
Copyright (C) 1993, 94, 95, 96, 97, 98, 99 Free Software Foundation, Inc.
Copyright (C) 1993-1999, 2000 Free Software Foundation, Inc.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
@@ -78,7 +78,12 @@
__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
#define btowc __btowc
# define btowc __btowc
/* We are also using some library internals. */
# include <locale/localeinfo.h>
# include <locale/elem-hash.h>
# include <langinfo.h>
#endif
/* This is for other GNU distributions with internationalized messages. */
@@ -164,47 +169,15 @@ char *realloc ();
# define SWITCH_ENUM_CAST(x) (x)
# endif
/* How many characters in the character set. */
# define CHAR_SET_SIZE 256
# ifdef SYNTAX_TABLE
extern char *re_syntax_table;
# else /* not SYNTAX_TABLE */
static char re_syntax_table[CHAR_SET_SIZE];
static void
init_syntax_once ()
{
register int c;
static int done = 0;
if (done)
return;
bzero (re_syntax_table, sizeof re_syntax_table);
for (c = 'a'; c <= 'z'; c++)
re_syntax_table[c] = Sword;
for (c = 'A'; c <= 'Z'; c++)
re_syntax_table[c] = Sword;
for (c = '0'; c <= '9'; c++)
re_syntax_table[c] = Sword;
re_syntax_table['_'] = Sword;
done = 1;
}
# endif /* not SYNTAX_TABLE */
# define SYNTAX(c) re_syntax_table[c]
#endif /* not emacs */
#if defined _LIBC || HAVE_LIMITS_H
# include <limits.h>
#endif
#ifndef MB_LEN_MAX
# define MB_LEN_MAX 1
#endif
/* Get the interface, including the syntax bits. */
#include <regex.h>
@@ -276,6 +249,43 @@ init_syntax_once ()
# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
#endif
#ifndef emacs
/* How many characters in the character set. */
# define CHAR_SET_SIZE 256
# ifdef SYNTAX_TABLE
extern char *re_syntax_table;
# else /* not SYNTAX_TABLE */
static char re_syntax_table[CHAR_SET_SIZE];
static void
init_syntax_once ()
{
register int c;
static int done = 0;
if (done)
return;
bzero (re_syntax_table, sizeof re_syntax_table);
for (c = 0; c < CHAR_SET_SIZE; ++c)
if (ISALNUM (c))
re_syntax_table[c] = Sword;
re_syntax_table['_'] = Sword;
done = 1;
}
# endif /* not SYNTAX_TABLE */
# define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
#endif /* emacs */
/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
use `alloca' instead of `malloc'. This is because using malloc in
re_search* or re_match* could cause memory leaks when C-g is used in
@@ -615,7 +625,7 @@ extract_number_and_incr (destination, source)
/* It is useful to test things that ``must'' be true when debugging. */
# include <assert.h>
static int debug = 0;
static int debug;
# define DEBUG_STATEMENT(e) e
# define DEBUG_PRINT1(x) if (debug) printf (x)
@@ -681,7 +691,11 @@ print_partial_compiled_pattern (start, end)
/* Loop over pattern commands. */
while (p < pend)
{
printf ("%d:\t", p - start);
#ifdef _LIBC
printf ("%t:\t", p - start);
#else
printf ("%ld:\t", (long int) (p - start));
#endif
switch ((re_opcode_t) *p++)
{
@@ -771,17 +785,30 @@ print_partial_compiled_pattern (start, end)
case on_failure_jump:
extract_number_and_incr (&mcnt, &p);
printf ("/on_failure_jump to %d", p + mcnt - start);
#ifdef _LIBC
printf ("/on_failure_jump to %t", p + mcnt - start);
#else
printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start));
#endif
break;
case on_failure_keep_string_jump:
extract_number_and_incr (&mcnt, &p);
printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
#ifdef _LIBC
printf ("/on_failure_keep_string_jump to %t", p + mcnt - start);
#else
printf ("/on_failure_keep_string_jump to %ld",
(long int) (p + mcnt - start));
#endif
break;
case dummy_failure_jump:
extract_number_and_incr (&mcnt, &p);
printf ("/dummy_failure_jump to %d", p + mcnt - start);
#ifdef _LIBC
printf ("/dummy_failure_jump to %t", p + mcnt - start);
#else
printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start));
#endif
break;
case push_dummy_failure:
@@ -790,29 +817,50 @@ print_partial_compiled_pattern (start, end)
case maybe_pop_jump:
extract_number_and_incr (&mcnt, &p);
printf ("/maybe_pop_jump to %d", p + mcnt - start);
#ifdef _LIBC
printf ("/maybe_pop_jump to %t", p + mcnt - start);
#else
printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start));
#endif
break;
case pop_failure_jump:
extract_number_and_incr (&mcnt, &p);
printf ("/pop_failure_jump to %d", p + mcnt - start);
#ifdef _LIBC
printf ("/pop_failure_jump to %t", p + mcnt - start);
#else
printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start));
#endif
break;
case jump_past_alt:
extract_number_and_incr (&mcnt, &p);
printf ("/jump_past_alt to %d", p + mcnt - start);
#ifdef _LIBC
printf ("/jump_past_alt to %t", p + mcnt - start);
#else
printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start));
#endif
break;
case jump:
extract_number_and_incr (&mcnt, &p);
printf ("/jump to %d", p + mcnt - start);
#ifdef _LIBC
printf ("/jump to %t", p + mcnt - start);
#else
printf ("/jump to %ld", (long int) (p + mcnt - start));
#endif
break;
case succeed_n:
extract_number_and_incr (&mcnt, &p);
p1 = p + mcnt;
extract_number_and_incr (&mcnt2, &p);
printf ("/succeed_n to %d, %d times", p1 - start, mcnt2);
#ifdef _LIBC
printf ("/succeed_n to %t, %d times", p1 - start, mcnt2);
#else
printf ("/succeed_n to %ld, %d times",
(long int) (p1 - start), mcnt2);
#endif
break;
case jump_n:
@@ -826,7 +874,12 @@ print_partial_compiled_pattern (start, end)
extract_number_and_incr (&mcnt, &p);
p1 = p + mcnt;
extract_number_and_incr (&mcnt2, &p);
printf ("/set_number_at location %d to %d", p1 - start, mcnt2);
#ifdef _LIBC
printf ("/set_number_at location %t to %d", p1 - start, mcnt2);
#else
printf ("/set_number_at location %ld to %d",
(long int) (p1 - start), mcnt2);
#endif
break;
case wordbound:
@@ -893,7 +946,11 @@ print_partial_compiled_pattern (start, end)
putchar ('\n');
}
printf ("%d:\tend of pattern.\n", p - start);
#ifdef _LIBC
printf ("%t:\tend of pattern.\n", p - start);
#else
printf ("%ld:\tend of pattern.\n", (long int) (p - start));
#endif
}
@@ -913,7 +970,11 @@ print_compiled_pattern (bufp)
print_fastmap (bufp->fastmap);
}
printf ("re_nsub: %d\t", bufp->re_nsub);
#ifdef _LIBC
printf ("re_nsub: %Zd\t", bufp->re_nsub);
#else
printf ("re_nsub: %ld\t", (long int) bufp->re_nsub);
#endif
printf ("regs_alloc: %d\t", bufp->regs_allocated);
printf ("can_be_null: %d\t", bufp->can_be_null);
printf ("newline_anchor: %d\n", bufp->newline_anchor);
@@ -1013,25 +1074,79 @@ weak_alias (__re_set_syntax, re_set_syntax)
POSIX doesn't require that we do anything for REG_NOERROR,
but why not be nice? */
static const char *re_error_msgid[] =
static const char re_error_msgid[] =
{
gettext_noop ("Success"), /* REG_NOERROR */
gettext_noop ("No match"), /* REG_NOMATCH */
gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */
gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
gettext_noop ("Invalid range end"), /* REG_ERANGE */
gettext_noop ("Memory exhausted"), /* REG_ESPACE */
gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
gettext_noop ("Premature end of regular expression"), /* REG_EEND */
gettext_noop ("Regular expression too big"), /* REG_ESIZE */
gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
#define REG_NOERROR_IDX 0
gettext_noop ("Success") /* REG_NOERROR */
"\0"
#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
gettext_noop ("No match") /* REG_NOMATCH */
"\0"
#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
gettext_noop ("Invalid regular expression") /* REG_BADPAT */
"\0"
#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
"\0"
#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
gettext_noop ("Invalid character class name") /* REG_ECTYPE */
"\0"
#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
gettext_noop ("Trailing backslash") /* REG_EESCAPE */
"\0"
#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
gettext_noop ("Invalid back reference") /* REG_ESUBREG */
"\0"
#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
"\0"
#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
"\0"
#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
gettext_noop ("Unmatched \\{") /* REG_EBRACE */
"\0"
#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
"\0"
#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
gettext_noop ("Invalid range end") /* REG_ERANGE */
"\0"
#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
gettext_noop ("Memory exhausted") /* REG_ESPACE */
"\0"
#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
"\0"
#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
gettext_noop ("Premature end of regular expression") /* REG_EEND */
"\0"
#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
gettext_noop ("Regular expression too big") /* REG_ESIZE */
"\0"
#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
};
static const size_t re_error_msgid_idx[] =
{
REG_NOERROR_IDX,
REG_NOMATCH_IDX,
REG_BADPAT_IDX,
REG_ECOLLATE_IDX,
REG_ECTYPE_IDX,
REG_EESCAPE_IDX,
REG_ESUBREG_IDX,
REG_EBRACK_IDX,
REG_EPAREN_IDX,
REG_EBRACE_IDX,
REG_BADBR_IDX,
REG_ERANGE_IDX,
REG_ESPACE_IDX,
REG_BADRPT_IDX,
REG_EEND_IDX,
REG_ESIZE_IDX,
REG_ERPAREN_IDX
};
/* Avoiding alloca during matching, to placate r_alloc. */
@@ -1514,7 +1629,8 @@ static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
reg_syntax_t syntax));
static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
reg_syntax_t syntax));
static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
static reg_errcode_t compile_range _RE_ARGS ((unsigned int range_start,
const char **p_ptr,
const char *pend,
char *translate,
reg_syntax_t syntax,
@@ -1712,7 +1828,7 @@ typedef struct
{ if (p != pend) \
{ \
PATFETCH (c); \
while (ISDIGIT (c)) \
while ('0' <= c && c <= '9') \
{ \
if (num < 0) \
num = 0; \
@@ -2118,6 +2234,7 @@ regex_compile (pattern, size, syntax, bufp)
case '[':
{
boolean had_char_class = false;
unsigned int range_start = 0xffffffff;
if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
@@ -2161,6 +2278,7 @@ regex_compile (pattern, size, syntax, bufp)
PATFETCH (c1);
SET_LIST_BIT (c1);
range_start = c1;
continue;
}
@@ -2185,8 +2303,10 @@ regex_compile (pattern, size, syntax, bufp)
&& *p != ']')
{
reg_errcode_t ret
= compile_range (&p, pend, translate, syntax, b);
= compile_range (range_start, &p, pend, translate,
syntax, b);
if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
range_start = 0xffffffff;
}
else if (p[0] == '-' && p[1] != ']')
@@ -2196,8 +2316,9 @@ regex_compile (pattern, size, syntax, bufp)
/* Move past the `-'. */
PATFETCH (c1);
ret = compile_range (&p, pend, translate, syntax, b);
ret = compile_range (c, &p, pend, translate, syntax, b);
if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
range_start = 0xffffffff;
}
/* See if we're at the beginning of a possible character
@@ -2320,13 +2441,289 @@ regex_compile (pattern, size, syntax, bufp)
PATUNFETCH;
SET_LIST_BIT ('[');
SET_LIST_BIT (':');
range_start = ':';
had_char_class = false;
}
}
else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=')
{
unsigned char str[MB_LEN_MAX + 1];
#ifdef _LIBC
uint32_t nrules =
_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
#endif
PATFETCH (c);
c1 = 0;
/* If pattern is `[[='. */
if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
for (;;)
{
PATFETCH (c);
if ((c == '=' && *p == ']') || p == pend)
break;
if (c1 < MB_LEN_MAX)
str[c1++] = c;
else
/* This is in any case an invalid class name. */
str[0] = '\0';
}
str[c1] = '\0';
if (c == '=' && *p == ']' && str[0] != '\0')
{
/* If we have no collation data we use the default
collation in which each character is in a class
by itself. It also means that ASCII is the
character set and therefore we cannot have character
with more than one byte in the multibyte
representation. */
#ifdef _LIBC
if (nrules == 0)
#endif
{
if (c1 != 1)
FREE_STACK_RETURN (REG_ECOLLATE);
/* Throw away the ] at the end of the equivalence
class. */
PATFETCH (c);
/* Set the bit for the character. */
SET_LIST_BIT (str[0]);
}
#ifdef _LIBC
else
{
/* Try to match the byte sequence in `str' against
those known to the collate implementation.
First find out whether the bytes in `str' are
actually from exactly one character. */
const int32_t *table;
const unsigned char *weights;
const unsigned char *extra;
const int32_t *indirect;
int32_t idx;
const unsigned char *cp = str;
int ch;
/* This #include defines a local function! */
# include <locale/weight.h>
table = (const int32_t *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
weights = (const unsigned char *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
extra = (const unsigned char *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
indirect = (const int32_t *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
idx = findidx (&cp);
if (idx == 0 || cp < str + c1)
/* This is no valid character. */
FREE_STACK_RETURN (REG_ECOLLATE);
/* Throw away the ] at the end of the equivalence
class. */
PATFETCH (c);
/* Now we have to go throught the whole table
and find all characters which have the same
first level weight.
XXX Note that this is not entirely correct.
we would have to match multibyte sequences
but this is not possible with the current
implementation. */
for (ch = 1; ch < 256; ++ch)
/* XXX This test would have to be changed if we
would allow matching multibyte sequences. */
if (table[ch] > 0)
{
int32_t idx2 = table[ch];
size_t len = weights[idx2];
/* Test whether the lenghts match. */
if (weights[idx] == len)
{
/* They do. New compare the bytes of
the weight. */
size_t cnt = 0;
while (cnt < len
&& (weights[idx + 1 + cnt]
== weights[idx2 + 1 + cnt]))
++len;
if (cnt == len)
/* They match. Mark the character as
acceptable. */
SET_LIST_BIT (ch);
}
}
}
#endif
had_char_class = true;
}
else
{
c1++;
while (c1--)
PATUNFETCH;
SET_LIST_BIT ('[');
SET_LIST_BIT ('=');
range_start = '=';
had_char_class = false;
}
}
else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
{
unsigned char str[128]; /* Should be large enough. */
#ifdef _LIBC
uint32_t nrules =
_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
#endif
PATFETCH (c);
c1 = 0;
/* If pattern is `[[='. */
if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
for (;;)
{
PATFETCH (c);
if ((c == '.' && *p == ']') || p == pend)
break;
if (c1 < sizeof (str))
str[c1++] = c;
else
/* This is in any case an invalid class name. */
str[0] = '\0';
}
str[c1] = '\0';
if (c == '.' && *p == ']' && str[0] != '\0')
{
/* If we have no collation data we use the default
collation in which each character is the name
for its own class which contains only the one
character. It also means that ASCII is the
character set and therefore we cannot have character
with more than one byte in the multibyte
representation. */
#ifdef _LIBC
if (nrules == 0)
#endif
{
if (c1 != 1)
FREE_STACK_RETURN (REG_ECOLLATE);
/* Throw away the ] at the end of the equivalence
class. */
PATFETCH (c);
/* Set the bit for the character. */
SET_LIST_BIT (str[0]);
range_start = ((const unsigned char *) str)[0];
}
#ifdef _LIBC
else
{
/* Try to match the byte sequence in `str' against
those known to the collate implementation.
First find out whether the bytes in `str' are
actually from exactly one character. */
int32_t table_size;
const int32_t *symb_table;
const unsigned char *extra;
int32_t idx;
int32_t elem;
int32_t second;
int32_t hash;
table_size =
_NL_CURRENT_WORD (LC_COLLATE,
_NL_COLLATE_SYMB_HASH_SIZEMB);
symb_table = (const int32_t *)
_NL_CURRENT (LC_COLLATE,
_NL_COLLATE_SYMB_TABLEMB);
extra = (const unsigned char *)
_NL_CURRENT (LC_COLLATE,
_NL_COLLATE_SYMB_EXTRAMB);
/* Locate the character in the hashing table. */
hash = elem_hash (str, c1);
idx = 0;
elem = hash % table_size;
second = hash % (table_size - 2);
while (symb_table[2 * elem] != 0)
{
/* First compare the hashing value. */
if (symb_table[2 * elem] == hash
&& c1 == extra[symb_table[2 * elem + 1]]
&& memcmp (str,
&extra[symb_table[2 * elem + 1]
+ 1],
c1) == 0)
{
/* Yep, this is the entry. */
idx = symb_table[2 * elem + 1];
idx += 1 + extra[idx];
break;
}
/* Next entry. */
elem += second;
}
if (symb_table[2 * elem] == 0)
/* This is no valid character. */
FREE_STACK_RETURN (REG_ECOLLATE);
/* Throw away the ] at the end of the equivalence
class. */
PATFETCH (c);
/* Now add the multibyte character(s) we found
to the acceptabed list.
XXX Note that this is not entirely correct.
we would have to match multibyte sequences
but this is not possible with the current
implementation. Also, we have to match
collating symbols, which expand to more than
one file, as a whole and not allow the
individual bytes. */
c1 = extra[idx++];
if (c1 == 1)
range_start = extra[idx];
while (c1-- > 0)
SET_LIST_BIT (extra[idx++]);
}
#endif
had_char_class = false;
}
else
{
c1++;
while (c1--)
PATUNFETCH;
SET_LIST_BIT ('[');
SET_LIST_BIT ('.');
range_start = '.';
had_char_class = false;
}
}
else
{
had_char_class = false;
SET_LIST_BIT (c);
range_start = c;
}
}
@@ -2553,8 +2950,7 @@ regex_compile (pattern, size, syntax, bufp)
if (!(syntax & RE_INTERVALS)
/* If we're at `\{' and it's not the open-interval
operator. */
|| ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
|| (p - 2 == pattern && p == pend))
|| (syntax & RE_NO_BK_BRACES))
goto normal_backslash;
handle_interval:
@@ -2568,7 +2964,7 @@ regex_compile (pattern, size, syntax, bufp)
if (p == pend)
{
if (syntax & RE_NO_BK_BRACES)
if (!(syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
goto unfetch_interval;
else
FREE_STACK_RETURN (REG_EBRACE);
@@ -2579,7 +2975,12 @@ regex_compile (pattern, size, syntax, bufp)
if (c == ',')
{
GET_UNSIGNED_NUMBER (upper_bound);
if (upper_bound < 0) upper_bound = RE_DUP_MAX;
if ((!(syntax & RE_NO_BK_BRACES) && c != '\\')
|| ((syntax & RE_NO_BK_BRACES) && c != '}'))
FREE_STACK_RETURN (REG_BADBR);
if (upper_bound < 0)
upper_bound = RE_DUP_MAX;
}
else
/* Interval such as `{1}' => match exactly once. */
@@ -2588,7 +2989,7 @@ regex_compile (pattern, size, syntax, bufp)
if (lower_bound < 0 || upper_bound > RE_DUP_MAX
|| lower_bound > upper_bound)
{
if (syntax & RE_NO_BK_BRACES)
if (!(syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
goto unfetch_interval;
else
FREE_STACK_RETURN (REG_BADBR);
@@ -2603,7 +3004,7 @@ regex_compile (pattern, size, syntax, bufp)
if (c != '}')
{
if (syntax & RE_NO_BK_BRACES)
if (!(syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
goto unfetch_interval;
else
FREE_STACK_RETURN (REG_BADBR);
@@ -3076,49 +3477,53 @@ group_in_compile_stack (compile_stack, regnum)
`regex_compile' itself. */
static reg_errcode_t
compile_range (p_ptr, pend, translate, syntax, b)
const char **p_ptr, *pend;
RE_TRANSLATE_TYPE translate;
reg_syntax_t syntax;
unsigned char *b;
compile_range (range_start_char, p_ptr, pend, translate, syntax, b)
unsigned int range_start_char;
const char **p_ptr, *pend;
RE_TRANSLATE_TYPE translate;
reg_syntax_t syntax;
unsigned char *b;
{
unsigned this_char;
const char *p = *p_ptr;
unsigned int range_start, range_end;
reg_errcode_t ret;
char range_start[2];
char range_end[2];
char ch[2];
if (p == pend)
return REG_ERANGE;
/* Even though the pattern is a signed `char *', we need to fetch
with unsigned char *'s; if the high bit of the pattern character
is set, the range endpoints will be negative if we fetch using a
signed char *.
We also want to fetch the endpoints without translating them; the
/* Fetch the endpoints without translating them; the
appropriate translation is done in the bit-setting loop below. */
/* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */
range_start = ((const unsigned char *) p)[-2];
range_end = ((const unsigned char *) p)[0];
range_start[0] = range_start_char;
range_start[1] = '\0';
range_end[0] = p[0];
range_end[1] = '\0';
/* Have to increment the pointer into the pattern string, so the
caller isn't still at the ending character. */
(*p_ptr)++;
/* If the start is after the end, the range is empty. */
if (range_start > range_end)
return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
/* Report an error if the range is empty and the syntax prohibits this. */
ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
/* Here we see why `this_char' has to be larger than an `unsigned
char' -- the range is inclusive, so if `range_end' == 0xff
(assuming 8-bit characters), we would otherwise go into an infinite
loop, since all characters <= 0xff. */
for (this_char = range_start; this_char <= range_end; this_char++)
char' -- we would otherwise go into an infinite loop, since all
characters <= 0xff. */
ch[1] = '\0';
for (this_char = 0; this_char <= (unsigned char) -1; ++this_char)
{
SET_LIST_BIT (TRANSLATE (this_char));
ch[0] = this_char;
if (strcoll (range_start, ch) <= 0 && strcoll (ch, range_end) <= 0)
{
SET_LIST_BIT (TRANSLATE (this_char));
ret = REG_NOERROR;
}
}
return REG_NOERROR;
return ret;
}
/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
@@ -3848,7 +4253,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
fail_stack_type fail_stack;
#endif
#ifdef DEBUG
static unsigned failure_id = 0;
static unsigned failure_id;
unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
#endif
@@ -4779,26 +5184,15 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
}
else if ((re_opcode_t) *p2 == charset)
{
#ifdef DEBUG
register unsigned char c
= *p2 == (unsigned char) endline ? '\n' : p2[2];
#endif
#if 0
/* We win if the first character of the loop is not part
of the charset. */
if ((re_opcode_t) p1[3] == exactn
&& ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
&& (p2[2 + p1[5] / BYTEWIDTH]
& (1 << (p1[5] % BYTEWIDTH)))))
#else
if ((re_opcode_t) p1[3] == exactn
&& ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
&& (p2[2 + p1[4] / BYTEWIDTH]
& (1 << (p1[4] % BYTEWIDTH)))))
#endif
{
p[-3] = (unsigned char) pop_failure_jump;
DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
c, p1[5]);
&& ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
&& (p2[2 + p1[5] / BYTEWIDTH]
& (1 << (p1[5] % BYTEWIDTH)))))
{
p[-3] = (unsigned char) pop_failure_jump;
DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
}
else if ((re_opcode_t) p1[3] == charset_not)
@@ -5489,7 +5883,7 @@ re_compile_pattern (pattern, length, bufp)
if (!ret)
return NULL;
return gettext (re_error_msgid[(int) ret]);
return gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
}
#ifdef _LIBC
weak_alias (__re_compile_pattern, re_compile_pattern)
@@ -5526,12 +5920,14 @@ re_comp (s)
{
re_comp_buf.buffer = (unsigned char *) malloc (200);
if (re_comp_buf.buffer == NULL)
return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
return (char *) gettext (re_error_msgid
+ re_error_msgid_idx[(int) REG_ESPACE]);
re_comp_buf.allocated = 200;
re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
if (re_comp_buf.fastmap == NULL)
return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
return (char *) gettext (re_error_msgid
+ re_error_msgid_idx[(int) REG_ESPACE]);
}
/* Since `re_exec' always passes NULL for the `regs' argument, we
@@ -5546,7 +5942,7 @@ re_comp (s)
return NULL;
/* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
return (char *) gettext (re_error_msgid[(int) ret]);
return (char *) gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
}
@@ -5666,7 +6062,7 @@ regcomp (preg, pattern, cflags)
buffer. */
if (re_compile_fastmap (preg) == -2)
{
/* Some error occured while computing the fastmap, just forget
/* Some error occurred while computing the fastmap, just forget
about it. */
free (preg->fastmap);
preg->fastmap = NULL;
@@ -5772,15 +6168,15 @@ regerror (errcode, preg, errbuf, errbuf_size)
size_t msg_size;
if (errcode < 0
|| errcode >= (int) (sizeof (re_error_msgid)
/ sizeof (re_error_msgid[0])))
|| errcode >= (int) (sizeof (re_error_msgid_idx)
/ sizeof (re_error_msgid_idx[0])))
/* Only error codes returned by the rest of the code should be passed
to this routine. If we are given anything else, or if other regex
code generates an invalid error code, then the program has a bug.
Dump core so we can fix it. */
abort ();
msg = gettext (re_error_msgid[errcode]);
msg = gettext (re_error_msgid + re_error_msgid_idx[errcode]);
msg_size = strlen (msg) + 1; /* Includes the null. */