Commit 2ee90b9b 2ee90b9b7bfa8631f752a1b8cf56f7b189087df7 by Sergey Poznyakoff

Added to the repository by gnulib-sync

1 parent 1a2c62bf
/* intprops.h -- properties of integer types
Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
/* Written by Paul Eggert. */
#include <limits.h>
/* The extra casts in the following macros work around compiler bugs,
e.g., in Cray C 5.0.3.0. */
/* True if the arithmetic type T is an integer type. bool counts as
an integer. */
#define TYPE_IS_INTEGER(t) ((t) 1.5 == 1)
/* True if negative values of the signed integer type T use two's
complement, ones' complement, or signed magnitude representation,
respectively. Much GNU code assumes two's complement, but some
people like to be portable to all possible C hosts. */
#define TYPE_TWOS_COMPLEMENT(t) ((t) ~ (t) 0 == (t) -1)
#define TYPE_ONES_COMPLEMENT(t) ((t) ~ (t) 0 == 0)
#define TYPE_SIGNED_MAGNITUDE(t) ((t) ~ (t) 0 < (t) -1)
/* True if the arithmetic type T is signed. */
#define TYPE_SIGNED(t) (! ((t) 0 < (t) -1))
/* The maximum and minimum values for the integer type T. These
macros have undefined behavior if T is signed and has padding bits.
If this is a problem for you, please let us know how to fix it for
your host. */
#define TYPE_MINIMUM(t) \
((t) (! TYPE_SIGNED (t) \
? (t) 0 \
: TYPE_SIGNED_MAGNITUDE (t) \
? ~ (t) 0 \
: ~ (t) 0 << (sizeof (t) * CHAR_BIT - 1)))
#define TYPE_MAXIMUM(t) \
((t) (! TYPE_SIGNED (t) \
? (t) -1 \
: ~ (~ (t) 0 << (sizeof (t) * CHAR_BIT - 1))))
/* Bound on length of the string representing an integer type or expression T.
Subtract 1 for the sign bit if t is signed; log10 (2.0) < 146/485;
add 1 for integer division truncation; add 1 more for a minus sign
if needed. */
#define INT_STRLEN_BOUND(t) \
((sizeof (t) * CHAR_BIT - 1) * 146 / 485 + 2)
/* Bound on buffer size needed to represent an integer type or expression T,
including the terminating null. */
#define INT_BUFSIZE_BOUND(t) (INT_STRLEN_BOUND (t) + 1)
/* Report a memory allocation failure and exit.
Copyright (C) 1997, 1998, 1999, 2000, 2002, 2003, 2004 Free
Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
#if HAVE_CONFIG_H
# include <config.h>
#endif
#include "xalloc.h"
#include <stdlib.h>
#include "error.h"
#include "exitfail.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
#define N_(msgid) msgid
void
xalloc_die (void)
{
error (exit_failure, 0, "%s", _("memory exhausted"));
/* The `noreturn' cannot be given to error, since it may return if
its first argument is 0. To help compilers understand the
xalloc_die does not return, call abort. Also, the abort is a
safety feature if exit_failure is 0 (which shouldn't happen). */
abort ();
}
# getdelim.m4 serial 1
dnl Copyright (C) 2005 Free Software dnl Foundation, Inc.
dnl
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.
AC_PREREQ(2.52)
AC_DEFUN([gl_FUNC_GETDELIM],
[
MU_LIBSOURCES([getdelim.c, getdelim.h])
dnl Persuade glibc <stdio.h> to declare getdelim().
AC_REQUIRE([AC_GNU_SOURCE])
MU_REPLACE_FUNCS(getdelim)
AC_CHECK_DECLS_ONCE(getdelim)
if test $ac_cv_func_getdelim = no; then
gl_PREREQ_GETDELIM
fi
])
# Prerequisites of lib/getdelim.c.
AC_DEFUN([gl_PREREQ_GETDELIM],
[
AC_CHECK_FUNCS([flockfile funlockfile])
])
# mbchar.m4 serial 1
dnl Copyright (C) 2005 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.
dnl autoconf tests required for use of mbchar.m4
dnl From Bruno Haible.
AC_DEFUN([gl_MBCHAR],
[
AC_REQUIRE([AC_GNU_SOURCE])
:
])
# memchr.m4 serial 4
dnl Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.
AC_DEFUN([gl_FUNC_MEMCHR],
[
MU_REPLACE_FUNCS(memchr)
if test $ac_cv_func_memchr = no; then
gl_PREREQ_MEMCHR
fi
])
# Prerequisites of lib/memchr.c.
AC_DEFUN([gl_PREREQ_MEMCHR], [
AC_CHECK_HEADERS(bp-sym.h)
])
# minmax.m4 serial 1
dnl Copyright (C) 2005 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.
AC_DEFUN([gl_MINMAX],
[
AC_REQUIRE([gl_PREREQ_MINMAX])
])
# Prerequisites of lib/minmax.h.
AC_DEFUN([gl_PREREQ_MINMAX],
[
gl_MINMAX_IN_HEADER([limits.h])
gl_MINMAX_IN_HEADER([sys/param.h])
])
dnl gl_MINMAX_IN_HEADER(HEADER)
AC_DEFUN([gl_MINMAX_IN_HEADER],
[
define([header],[translit([$1],[./-],
[___])])
define([HEADER],[translit([$1],[abcdefghijklmnopqrstuvwxyz./-],
[ABCDEFGHIJKLMNOPQRSTUVWXYZ___])])
AC_CACHE_CHECK([whether <$1> defines MIN and MAX],
[gl_cv_minmax_in_]header,
[AC_TRY_COMPILE([#include <$1>
int x = MIN (42, 17);], [],
[gl_cv_minmax_in_]header[=yes],
[gl_cv_minmax_in_]header[=no])])
if test $gl_cv_minmax_in_[]header = yes; then
AC_DEFINE([HAVE_MINMAX_IN_]HEADER, 1,
[Define to 1 if <$1> defines the MIN and MAX macros.])
fi
undefine([HEADER])
undefine([header])
])
/* getdelim.c --- Implementation of replacement getdelim function.
Copyright (C) 1994, 1996, 1997, 1998, 2001, 2003, 2005 Free
Software Foundation, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2, or (at
your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA. */
/* Ported from glibc by Simon Josefsson. */
#if HAVE_CONFIG_H
# include <config.h>
#endif
#include <stdlib.h>
#include <errno.h>
#include "getdelim.h"
#if !HAVE_FLOCKFILE
# undef flockfile
# define flockfile(x) ((void) 0)
#endif
#if !HAVE_FUNLOCKFILE
# undef funlockfile
# define funlockfile(x) ((void) 0)
#endif
/* Read up to (and including) a DELIMITER from FP into *LINEPTR (and
NUL-terminate it). *LINEPTR is a pointer returned from malloc (or
NULL), pointing to *N characters of space. It is realloc'ed as
necessary. Returns the number of characters read (not including
the null terminator), or -1 on error or EOF. */
ssize_t
getdelim (char **lineptr, size_t *n, int delimiter, FILE *fp)
{
int result;
ssize_t cur_len = 0;
ssize_t len;
if (lineptr == NULL || n == NULL || fp == NULL)
{
errno = EINVAL;
return -1;
}
flockfile (fp);
if (*lineptr == NULL || *n == 0)
{
*n = 120;
*lineptr = (char *) malloc (*n);
if (*lineptr == NULL)
{
result = -1;
goto unlock_return;
}
}
for (;;)
{
char *t;
int i;
i = getc (fp);
if (i == EOF)
break;
/* Make enough space for len+1 (for final NUL) bytes. */
if (cur_len + 1 >= *n)
{
size_t needed = 2 * (cur_len + 1) + 1; /* Be generous. */
char *new_lineptr;
if (needed < cur_len)
{
result = -1;
goto unlock_return;
}
new_lineptr = (char *) realloc (*lineptr, needed);
if (new_lineptr == NULL)
{
result = -1;
goto unlock_return;
}
*lineptr = new_lineptr;
*n = needed;
}
(*lineptr)[cur_len] = i;
cur_len++;
if (i == delimiter)
break;
}
(*lineptr)[cur_len] = '\0';
result = cur_len;
unlock_return:
funlockfile (fp);
return result;
}
/* getdelim.h --- Prototype for replacement getdelim function.
Copyright (C) 2005 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2, or (at
your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA. */
/* Written by Simon Josefsson. */
/* Get size_t, FILE, ssize_t. And getdelim, if available. */
# include <stddef.h>
# include <stdio.h>
# include <sys/types.h>
#if !HAVE_DECL_GETDELIM
ssize_t getdelim (char **lineptr, size_t *n, int delimiter, FILE *stream);
#endif /* !HAVE_GETDELIM */
/* Copyright (C) 2001 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include <limits.h>
#include "mbchar.h"
#if IS_BASIC_ASCII
/* Bit table of characters in the ISO C "basic character set". */
unsigned int is_basic_table [UCHAR_MAX / 32 + 1] =
{
0x00001a00, /* '\t' '\v' '\f' */
0xffffffef, /* ' '...'#' '%'...'?' */
0xfffffffe, /* 'A'...'Z' '[' '\\' ']' '^' '_' */
0x7ffffffe /* 'a'...'z' '{' '|' '}' '~' */
/* The remaining bits are 0. */
};
#endif /* IS_BASIC_ASCII */
/* Multibyte character data type.
Copyright (C) 2001, 2005 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
/* Written by Bruno Haible <bruno@clisp.org>. */
/* A multibyte character is a short subsequence of a char* string,
representing a single wide character.
We use multibyte characters instead of wide characters because of
the following goals:
1) correct multibyte handling, i.e. operate according to the LC_CTYPE
locale,
2) ease of maintenance, i.e. the maintainer needs not know all details
of the ISO C 99 standard,
3) don't fail grossly if the input is not in the encoding set by the
locale, because often different encodings are in use in the same
countries (ISO-8859-1/UTF-8, EUC-JP/Shift_JIS, ...),
4) fast in the case of ASCII characters,
5) portability, i.e. don't make unportable assumptions about wchar_t.
Multibyte characters are only accessed through the mb* macros.
mb_ptr (mbc)
return a pointer to the beginning of the multibyte sequence.
mb_len (mbc)
returns the number of bytes occupied by the multibyte sequence.
Always > 0.
mb_iseq (mbc, sc)
returns true if mbc is the standard ASCII character sc.
mb_isnul (mbc)
returns true if mbc is the nul character.
mb_cmp (mbc1, mbc2)
returns a positive, zero, or negative value depending on whether mbc1
sorts after, same or before mbc2.
mb_casecmp (mbc1, mbc2)
returns a positive, zero, or negative value depending on whether mbc1
sorts after, same or before mbc2, modulo upper/lowercase conversion.
mb_equal (mbc1, mbc2)
returns true if mbc1 and mbc2 are equal.
mb_caseequal (mbc1, mbc2)
returns true if mbc1 and mbc2 are equal modulo upper/lowercase conversion.
mb_isalnum (mbc)
returns true if mbc is alphanumeric.
mb_isalpha (mbc)
returns true if mbc is alphabetic.
mb_isascii(mbc)
returns true if mbc is plain ASCII.
mb_isblank (mbc)
returns true if mbc is a blank.
mb_iscntrl (mbc)
returns true if mbc is a control character.
mb_isdigit (mbc)
returns true if mbc is a decimal digit.
mb_isgraph (mbc)
returns true if mbc is a graphic character.
mb_islower (mbc)
returns true if mbc is lowercase.
mb_isprint (mbc)
returns true if mbc is a printable character.
mb_ispunct (mbc)
returns true if mbc is a punctuation character.
mb_isspace (mbc)
returns true if mbc is a space character.
mb_isupper (mbc)
returns true if mbc is uppercase.
mb_isxdigit (mbc)
returns true if mbc is a hexadecimal digit.
mb_width (mbc)
returns the number of columns on the output device occupied by mbc.
Always >= 0.
mb_putc (mbc, stream)
outputs mbc on stream, a byte oriented FILE stream opened for output.
mb_setascii (&mbc, sc)
assigns the standard ASCII character sc to mbc.
mb_copy (&destmbc, &srcmbc)
copies srcmbc to destmbc.
Here are the function prototypes of the macros.
extern const char * mb_ptr (const mbchar_t mbc);
extern size_t mb_len (const mbchar_t mbc);
extern bool mb_iseq (const mbchar_t mbc, char sc);
extern bool mb_isnul (const mbchar_t mbc);
extern int mb_cmp (const mbchar_t mbc1, const mbchar_t mbc2);
extern int mb_casecmp (const mbchar_t mbc1, const mbchar_t mbc2);
extern bool mb_equal (const mbchar_t mbc1, const mbchar_t mbc2);
extern bool mb_caseequal (const mbchar_t mbc1, const mbchar_t mbc2);
extern bool mb_isalnum (const mbchar_t mbc);
extern bool mb_isalpha (const mbchar_t mbc);
extern bool mb_isascii (const mbchar_t mbc);
extern bool mb_isblank (const mbchar_t mbc);
extern bool mb_iscntrl (const mbchar_t mbc);
extern bool mb_isdigit (const mbchar_t mbc);
extern bool mb_isgraph (const mbchar_t mbc);
extern bool mb_islower (const mbchar_t mbc);
extern bool mb_isprint (const mbchar_t mbc);
extern bool mb_ispunct (const mbchar_t mbc);
extern bool mb_isspace (const mbchar_t mbc);
extern bool mb_isupper (const mbchar_t mbc);
extern bool mb_isxdigit (const mbchar_t mbc);
extern int mb_width (const mbchar_t mbc);
extern void mb_putc (const mbchar_t mbc, FILE *stream);
extern void mb_setascii (mbchar_t *new, char sc);
extern void mb_copy (mbchar_t *new, const mbchar_t *old);
*/
#ifndef _MBCHAR_H
#define _MBCHAR_H 1
#include <stdbool.h>
#include <string.h>
/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
<wchar.h>.
BSD/OS 4.1 has a bug: <stdio.h> and <time.h> must be included before
<wchar.h>. */
#include <stdio.h>
#include <time.h>
#include <wchar.h>
#include <wctype.h>
#define MBCHAR_BUF_SIZE 24
struct mbchar
{
const char *ptr; /* pointer to current character */
size_t bytes; /* number of bytes of current character, > 0 */
bool wc_valid; /* true if wc is a valid wide character */
wchar_t wc; /* if wc_valid: the current character */
char buf[MBCHAR_BUF_SIZE]; /* room for the bytes, used for file input only */
};
/* EOF (not a real character) is represented with bytes = 0 and
wc_valid = false. */
typedef struct mbchar mbchar_t;
/* Access the current character. */
#define mb_ptr(mbc) ((mbc).ptr)
#define mb_len(mbc) ((mbc).bytes)
/* Comparison of characters. */
#define mb_iseq(mbc, sc) ((mbc).wc_valid && (mbc).wc == (sc))
#define mb_isnul(mbc) ((mbc).wc_valid && (mbc).wc == 0)
#define mb_cmp(mbc1, mbc2) \
((mbc1).wc_valid && (mbc2).wc_valid \
? (int) (mbc1).wc - (int) (mbc2).wc \
: (mbc1).bytes == (mbc2).bytes \
? memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) \
: (mbc1).bytes < (mbc2).bytes \
? (memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) > 0 ? 1 : -1) \
: (memcmp ((mbc1).ptr, (mbc2).ptr, (mbc2).bytes) >= 0 ? 1 : -1))
#define mb_casecmp(mbc1, mbc2) \
((mbc1).wc_valid && (mbc2).wc_valid \
? (int) towlower ((mbc1).wc) - (int) towlower ((mbc2).wc) \
: (mbc1).bytes == (mbc2).bytes \
? memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) \
: (mbc1).bytes < (mbc2).bytes \
? (memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) > 0 ? 1 : -1) \
: (memcmp ((mbc1).ptr, (mbc2).ptr, (mbc2).bytes) >= 0 ? 1 : -1))
#define mb_equal(mbc1, mbc2) \
((mbc1).wc_valid && (mbc2).wc_valid \
? (mbc1).wc == (mbc2).wc \
: (mbc1).bytes == (mbc2).bytes \
&& memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) == 0)
#define mb_caseequal(mbc1, mbc2) \
((mbc1).wc_valid && (mbc2).wc_valid \
? towlower ((mbc1).wc) == towlower ((mbc2).wc) \
: (mbc1).bytes == (mbc2).bytes \
&& memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) == 0)
/* <ctype.h>, <wctype.h> classification. */
#define mb_isascii(mbc) \
((mbc).wc_valid && (mbc).wc >= 0 && (mbc).wc <= 127)
#define mb_isalnum(mbc) ((mbc).wc_valid && iswalnum ((mbc).wc))
#define mb_isalpha(mbc) ((mbc).wc_valid && iswalpha ((mbc).wc))
#define mb_isblank(mbc) ((mbc).wc_valid && iswblank ((mbc).wc))
#define mb_iscntrl(mbc) ((mbc).wc_valid && iswcntrl ((mbc).wc))
#define mb_isdigit(mbc) ((mbc).wc_valid && iswdigit ((mbc).wc))
#define mb_isgraph(mbc) ((mbc).wc_valid && iswgraph ((mbc).wc))
#define mb_islower(mbc) ((mbc).wc_valid && iswlower ((mbc).wc))
#define mb_isprint(mbc) ((mbc).wc_valid && iswprint ((mbc).wc))
#define mb_ispunct(mbc) ((mbc).wc_valid && iswpunct ((mbc).wc))
#define mb_isspace(mbc) ((mbc).wc_valid && iswspace ((mbc).wc))
#define mb_isupper(mbc) ((mbc).wc_valid && iswupper ((mbc).wc))
#define mb_isxdigit(mbc) ((mbc).wc_valid && iswxdigit ((mbc).wc))
/* Extra <wchar.h> function. */
/* Unprintable characters appear as a small box of width 1. */
#define MB_UNPRINTABLE_WIDTH 1
static inline int
mb_width_aux (wint_t wc)
{
int w = wcwidth (wc);
/* For unprintable characters, arbitrarily return 0 for control characters
and MB_UNPRINTABLE_WIDTH otherwise. */
return (w >= 0 ? w : iswcntrl (wc) ? 0 : MB_UNPRINTABLE_WIDTH);
}
#define mb_width(mbc) \
((mbc).wc_valid ? mb_width_aux ((mbc).wc) : MB_UNPRINTABLE_WIDTH)
/* Output. */
#define mb_putc(mbc, stream) fwrite ((mbc).ptr, 1, (mbc).bytes, (stream))
/* Assignment. */
#define mb_setascii(mbc, sc) \
((mbc)->ptr = (mbc)->buf, (mbc)->bytes = 1, (mbc)->wc_valid = 1, \
(mbc)->wc = (mbc)->buf[0] = (sc))
/* Copying a character. */
static inline void
mb_copy (mbchar_t *new, const mbchar_t *old)
{
if (old->ptr == &old->buf[0])
{
memcpy (&new->buf[0], &old->buf[0], old->bytes);
new->ptr = &new->buf[0];
}
else
new->ptr = old->ptr;
new->bytes = old->bytes;
if ((new->wc_valid = old->wc_valid))
new->wc = old->wc;
}
/* is_basic(c) tests whether the single-byte character c is in the
ISO C "basic character set".
This is a convenience function, and is in this file only to share code
between mbiter_multi.h and mbfile_multi.h. */
#if (' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
&& ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
&& (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
&& ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
&& ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
&& ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
&& ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
&& ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
&& ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
&& ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
&& ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
&& ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
&& ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
&& ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
&& ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
&& ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
&& ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
&& ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
&& ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
&& ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
&& ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
&& ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
&& ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)
/* The character set is ISO-646, not EBCDIC. */
# define IS_BASIC_ASCII 1
extern unsigned int is_basic_table[];
static inline bool
is_basic (char c)
{
return (is_basic_table [(unsigned char) c >> 5] >> ((unsigned char) c & 31))
& 1;
}
#else
static inline bool
is_basic (char c)
{
switch (c)
{
case '\t': case '\v': case '\f':
case ' ': case '!': case '"': case '#': case '%':
case '&': case '\'': case '(': case ')': case '*':
case '+': case ',': case '-': case '.': case '/':
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
case ':': case ';': case '<': case '=': case '>':
case '?':
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O':
case 'P': case 'Q': case 'R': case 'S': case 'T':
case 'U': case 'V': case 'W': case 'X': case 'Y':
case 'Z':
case '[': case '\\': case ']': case '^': case '_':
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f': case 'g': case 'h': case 'i': case 'j':
case 'k': case 'l': case 'm': case 'n': case 'o':
case 'p': case 'q': case 'r': case 's': case 't':
case 'u': case 'v': case 'w': case 'x': case 'y':
case 'z': case '{': case '|': case '}': case '~':
return 1;
default:
return 0;
}
}
#endif
#endif /* _MBCHAR_H */
/* Copyright (C) 1991, 1993, 1996, 1997, 1999, 2000, 2003, 2004 Free
Software Foundation, Inc.
Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
with help from Dan Sahlin (dan@sics.se) and
commentary by Jim Blandy (jimb@ai.mit.edu);
adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
and implemented by Roland McGrath (roland@ai.mit.edu).
NOTE: The canonical source of this file is maintained with the GNU C Library.
Bugs can be reported to bug-glibc@prep.ai.mit.edu.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any
later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
USA. */
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include <string.h>
#include <stddef.h>
#if defined _LIBC
# include <memcopy.h>
#else
# define reg_char char
#endif
#include <limits.h>
#if HAVE_BP_SYM_H || defined _LIBC
# include <bp-sym.h>
#else
# define BP_SYM(sym) sym
#endif
#undef memchr
#undef __memchr
/* Search no more than N bytes of S for C. */
void *
__memchr (void const *s, int c_in, size_t n)
{
const unsigned char *char_ptr;
const unsigned long int *longword_ptr;
unsigned long int longword, magic_bits, charmask;
unsigned reg_char c;
int i;
c = (unsigned char) c_in;
/* Handle the first few characters by reading one character at a time.
Do this until CHAR_PTR is aligned on a longword boundary. */
for (char_ptr = (const unsigned char *) s;
n > 0 && (size_t) char_ptr % sizeof longword != 0;
--n, ++char_ptr)
if (*char_ptr == c)
return (void *) char_ptr;
/* All these elucidatory comments refer to 4-byte longwords,
but the theory applies equally well to any size longwords. */
longword_ptr = (const unsigned long int *) char_ptr;
/* Bits 31, 24, 16, and 8 of this number are zero. Call these bits
the "holes." Note that there is a hole just to the left of
each byte, with an extra at the end:
bits: 01111110 11111110 11111110 11111111
bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
The 1-bits make sure that carries propagate to the next 0-bit.
The 0-bits provide holes for carries to fall into. */
/* Set MAGIC_BITS to be this pattern of 1 and 0 bits.
Set CHARMASK to be a longword, each of whose bytes is C. */
magic_bits = 0xfefefefe;
charmask = c | (c << 8);
charmask |= charmask << 16;
#if 0xffffffffU < ULONG_MAX
magic_bits |= magic_bits << 32;
charmask |= charmask << 32;
if (8 < sizeof longword)
for (i = 64; i < sizeof longword * 8; i *= 2)
{
magic_bits |= magic_bits << i;
charmask |= charmask << i;
}
#endif
magic_bits = (ULONG_MAX >> 1) & (magic_bits | 1);
/* Instead of the traditional loop which tests each character,
we will test a longword at a time. The tricky part is testing
if *any of the four* bytes in the longword in question are zero. */
while (n >= sizeof longword)
{
/* We tentatively exit the loop if adding MAGIC_BITS to
LONGWORD fails to change any of the hole bits of LONGWORD.
1) Is this safe? Will it catch all the zero bytes?
Suppose there is a byte with all zeros. Any carry bits
propagating from its left will fall into the hole at its
least significant bit and stop. Since there will be no
carry from its most significant bit, the LSB of the
byte to the left will be unchanged, and the zero will be
detected.
2) Is this worthwhile? Will it ignore everything except
zero bytes? Suppose every byte of LONGWORD has a bit set
somewhere. There will be a carry into bit 8. If bit 8
is set, this will carry into bit 16. If bit 8 is clear,
one of bits 9-15 must be set, so there will be a carry
into bit 16. Similarly, there will be a carry into bit
24. If one of bits 24-30 is set, there will be a carry
into bit 31, so all of the hole bits will be changed.
The one misfire occurs when bits 24-30 are clear and bit
31 is set; in this case, the hole at bit 31 is not
changed. If we had access to the processor carry flag,
we could close this loophole by putting the fourth hole
at bit 32!
So it ignores everything except 128's, when they're aligned
properly.
3) But wait! Aren't we looking for C, not zero?
Good point. So what we do is XOR LONGWORD with a longword,
each of whose bytes is C. This turns each byte that is C
into a zero. */
longword = *longword_ptr++ ^ charmask;
/* Add MAGIC_BITS to LONGWORD. */
if ((((longword + magic_bits)
/* Set those bits that were unchanged by the addition. */
^ ~longword)
/* Look at only the hole bits. If any of the hole bits
are unchanged, most likely one of the bytes was a
zero. */
& ~magic_bits) != 0)
{
/* Which of the bytes was C? If none of them were, it was
a misfire; continue the search. */
const unsigned char *cp = (const unsigned char *) (longword_ptr - 1);
if (cp[0] == c)
return (void *) cp;
if (cp[1] == c)
return (void *) &cp[1];
if (cp[2] == c)
return (void *) &cp[2];
if (cp[3] == c)
return (void *) &cp[3];
if (4 < sizeof longword && cp[4] == c)
return (void *) &cp[4];
if (5 < sizeof longword && cp[5] == c)
return (void *) &cp[5];
if (6 < sizeof longword && cp[6] == c)
return (void *) &cp[6];
if (7 < sizeof longword && cp[7] == c)
return (void *) &cp[7];
if (8 < sizeof longword)
for (i = 8; i < sizeof longword; i++)
if (cp[i] == c)
return (void *) &cp[i];
}
n -= sizeof longword;
}
char_ptr = (const unsigned char *) longword_ptr;
while (n-- > 0)
{
if (*char_ptr == c)
return (void *) char_ptr;
else
++char_ptr;
}
return 0;
}
#ifdef weak_alias
weak_alias (__memchr, BP_SYM (memchr))
#endif
This diff could not be displayed because it is too large.
/* Extended regular expression matching and search library.
Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
static void re_string_construct_common (const char *str, int len,
re_string_t *pstr,
RE_TRANSLATE_TYPE trans, int icase,
const re_dfa_t *dfa) internal_function;
#ifdef RE_ENABLE_I18N
static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx,
wint_t *last_wc) internal_function;
#endif /* RE_ENABLE_I18N */
static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate,
unsigned int hash) internal_function;
static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
const re_node_set *nodes,
unsigned int hash) internal_function;
static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa,
const re_node_set *nodes,
unsigned int context,
unsigned int hash) internal_function;
static unsigned int inline calc_state_hash (const re_node_set *nodes,
unsigned int context) internal_function;
/* Functions for string operation. */
/* This function allocate the buffers. It is necessary to call
re_string_reconstruct before using the object. */
static reg_errcode_t
re_string_allocate (pstr, str, len, init_len, trans, icase, dfa)
re_string_t *pstr;
const char *str;
int len, init_len, icase;
RE_TRANSLATE_TYPE trans;
const re_dfa_t *dfa;
{
reg_errcode_t ret;
int init_buf_len;
/* Ensure at least one character fits into the buffers. */
if (init_len < dfa->mb_cur_max)
init_len = dfa->mb_cur_max;
init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
re_string_construct_common (str, len, pstr, trans, icase, dfa);
ret = re_string_realloc_buffers (pstr, init_buf_len);
if (BE (ret != REG_NOERROR, 0))
return ret;
pstr->word_char = dfa->word_char;
pstr->word_ops_used = dfa->word_ops_used;
pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
pstr->valid_raw_len = pstr->valid_len;
return REG_NOERROR;
}
/* This function allocate the buffers, and initialize them. */
static reg_errcode_t
re_string_construct (pstr, str, len, trans, icase, dfa)
re_string_t *pstr;
const char *str;
int len, icase;
RE_TRANSLATE_TYPE trans;
const re_dfa_t *dfa;
{
reg_errcode_t ret;
memset (pstr, '\0', sizeof (re_string_t));
re_string_construct_common (str, len, pstr, trans, icase, dfa);
if (len > 0)
{
ret = re_string_realloc_buffers (pstr, len + 1);
if (BE (ret != REG_NOERROR, 0))
return ret;
}
pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
if (icase)
{
#ifdef RE_ENABLE_I18N
if (dfa->mb_cur_max > 1)
{
while (1)
{
ret = build_wcs_upper_buffer (pstr);
if (BE (ret != REG_NOERROR, 0))
return ret;
if (pstr->valid_raw_len >= len)
break;
if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
break;
ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
if (BE (ret != REG_NOERROR, 0))
return ret;
}
}
else
#endif /* RE_ENABLE_I18N */
build_upper_buffer (pstr);
}
else
{
#ifdef RE_ENABLE_I18N
if (dfa->mb_cur_max > 1)
build_wcs_buffer (pstr);
else
#endif /* RE_ENABLE_I18N */
{
if (trans != NULL)
re_string_translate_buffer (pstr);
else
{
pstr->valid_len = pstr->bufs_len;
pstr->valid_raw_len = pstr->bufs_len;
}
}
}
return REG_NOERROR;
}
/* Helper functions for re_string_allocate, and re_string_construct. */
static reg_errcode_t
re_string_realloc_buffers (pstr, new_buf_len)
re_string_t *pstr;
int new_buf_len;
{
#ifdef RE_ENABLE_I18N
if (pstr->mb_cur_max > 1)
{
wint_t *new_array = re_realloc (pstr->wcs, wint_t, new_buf_len);
if (BE (new_array == NULL, 0))
return REG_ESPACE;
pstr->wcs = new_array;
if (pstr->offsets != NULL)
{
int *new_array = re_realloc (pstr->offsets, int, new_buf_len);
if (BE (new_array == NULL, 0))
return REG_ESPACE;
pstr->offsets = new_array;
}
}
#endif /* RE_ENABLE_I18N */
if (pstr->mbs_allocated)
{
unsigned char *new_array = re_realloc (pstr->mbs, unsigned char,
new_buf_len);
if (BE (new_array == NULL, 0))
return REG_ESPACE;
pstr->mbs = new_array;
}
pstr->bufs_len = new_buf_len;
return REG_NOERROR;
}
static void
re_string_construct_common (str, len, pstr, trans, icase, dfa)
const char *str;
int len;
re_string_t *pstr;
RE_TRANSLATE_TYPE trans;
int icase;
const re_dfa_t *dfa;
{
pstr->raw_mbs = (const unsigned char *) str;
pstr->len = len;
pstr->raw_len = len;
pstr->trans = (unsigned RE_TRANSLATE_TYPE) trans;
pstr->icase = icase ? 1 : 0;
pstr->mbs_allocated = (trans != NULL || icase);
pstr->mb_cur_max = dfa->mb_cur_max;
pstr->is_utf8 = dfa->is_utf8;
pstr->map_notascii = dfa->map_notascii;
pstr->stop = pstr->len;
pstr->raw_stop = pstr->stop;
}
#ifdef RE_ENABLE_I18N
/* Build wide character buffer PSTR->WCS.
If the byte sequence of the string are:
<mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
Then wide character buffer will be:
<wc1> , WEOF , <wc2> , WEOF , <wc3>
We use WEOF for padding, they indicate that the position isn't
a first byte of a multibyte character.
Note that this function assumes PSTR->VALID_LEN elements are already
built and starts from PSTR->VALID_LEN. */
static void
build_wcs_buffer (pstr)
re_string_t *pstr;
{
#ifdef _LIBC
unsigned char buf[MB_LEN_MAX];
assert (MB_LEN_MAX >= pstr->mb_cur_max);
#else
unsigned char buf[64];
#endif
mbstate_t prev_st;
int byte_idx, end_idx, remain_len;
size_t mbclen;
/* Build the buffers from pstr->valid_len to either pstr->len or
pstr->bufs_len. */
end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
{
wchar_t wc;
const char *p;
remain_len = end_idx - byte_idx;
prev_st = pstr->cur_state;
/* Apply the translation if we need. */
if (BE (pstr->trans != NULL, 0))
{
int i, ch;
for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
{
ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
}
p = (const char *) buf;
}
else
p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
if (BE (mbclen == (size_t) -2, 0))
{
/* The buffer doesn't have enough space, finish to build. */
pstr->cur_state = prev_st;
break;
}
else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
{
/* We treat these cases as a singlebyte character. */
mbclen = 1;
wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
if (BE (pstr->trans != NULL, 0))
wc = pstr->trans[wc];
pstr->cur_state = prev_st;
}
/* Write wide character and padding. */
pstr->wcs[byte_idx++] = wc;
/* Write paddings. */
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
pstr->wcs[byte_idx++] = WEOF;
}
pstr->valid_len = byte_idx;
pstr->valid_raw_len = byte_idx;
}
/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
but for REG_ICASE. */
static int
build_wcs_upper_buffer (pstr)
re_string_t *pstr;
{
mbstate_t prev_st;
int src_idx, byte_idx, end_idx, remain_len;
size_t mbclen;
#ifdef _LIBC
char buf[MB_LEN_MAX];
assert (MB_LEN_MAX >= pstr->mb_cur_max);
#else
char buf[64];
#endif
byte_idx = pstr->valid_len;
end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
/* The following optimization assumes that ASCII characters can be
mapped to wide characters with a simple cast. */
if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
{
while (byte_idx < end_idx)
{
wchar_t wc;
if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
&& mbsinit (&pstr->cur_state))
{
/* In case of a singlebyte character. */
pstr->mbs[byte_idx]
= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
/* The next step uses the assumption that wchar_t is encoded
ASCII-safe: all ASCII values can be converted like this. */
pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
++byte_idx;
continue;
}
remain_len = end_idx - byte_idx;
prev_st = pstr->cur_state;
mbclen = mbrtowc (&wc,
((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+ byte_idx), remain_len, &pstr->cur_state);
if (BE (mbclen + 2 > 2, 1))
{
wchar_t wcu = wc;
if (iswlower (wc))
{
size_t mbcdlen;
wcu = towupper (wc);
mbcdlen = wcrtomb (buf, wcu, &prev_st);
if (BE (mbclen == mbcdlen, 1))
memcpy (pstr->mbs + byte_idx, buf, mbclen);
else
{
src_idx = byte_idx;
goto offsets_needed;
}
}
else
memcpy (pstr->mbs + byte_idx,
pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
pstr->wcs[byte_idx++] = wcu;
/* Write paddings. */
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
pstr->wcs[byte_idx++] = WEOF;
}
else if (mbclen == (size_t) -1 || mbclen == 0)
{
/* It is an invalid character or '\0'. Just use the byte. */
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
pstr->mbs[byte_idx] = ch;
/* And also cast it to wide char. */
pstr->wcs[byte_idx++] = (wchar_t) ch;
if (BE (mbclen == (size_t) -1, 0))
pstr->cur_state = prev_st;
}
else
{
/* The buffer doesn't have enough space, finish to build. */
pstr->cur_state = prev_st;
break;
}
}
pstr->valid_len = byte_idx;
pstr->valid_raw_len = byte_idx;
return REG_NOERROR;
}
else
for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
{
wchar_t wc;
const char *p;
offsets_needed:
remain_len = end_idx - byte_idx;
prev_st = pstr->cur_state;
if (BE (pstr->trans != NULL, 0))
{
int i, ch;
for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
{
ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
buf[i] = pstr->trans[ch];
}
p = (const char *) buf;
}
else
p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
if (BE (mbclen + 2 > 2, 1))
{
wchar_t wcu = wc;
if (iswlower (wc))
{
size_t mbcdlen;
wcu = towupper (wc);
mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
if (BE (mbclen == mbcdlen, 1))
memcpy (pstr->mbs + byte_idx, buf, mbclen);
else if (mbcdlen != (size_t) -1)
{
size_t i;
if (byte_idx + mbcdlen > pstr->bufs_len)
{
pstr->cur_state = prev_st;
break;
}
if (pstr->offsets == NULL)
{
pstr->offsets = re_malloc (int, pstr->bufs_len);
if (pstr->offsets == NULL)
return REG_ESPACE;
}
if (!pstr->offsets_needed)
{
for (i = 0; i < (size_t) byte_idx; ++i)
pstr->offsets[i] = i;
pstr->offsets_needed = 1;
}
memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
pstr->wcs[byte_idx] = wcu;
pstr->offsets[byte_idx] = src_idx;
for (i = 1; i < mbcdlen; ++i)
{
pstr->offsets[byte_idx + i]
= src_idx + (i < mbclen ? i : mbclen - 1);
pstr->wcs[byte_idx + i] = WEOF;
}
pstr->len += mbcdlen - mbclen;
if (pstr->raw_stop > src_idx)
pstr->stop += mbcdlen - mbclen;
end_idx = (pstr->bufs_len > pstr->len)
? pstr->len : pstr->bufs_len;
byte_idx += mbcdlen;
src_idx += mbclen;
continue;
}
else
memcpy (pstr->mbs + byte_idx, p, mbclen);
}
else
memcpy (pstr->mbs + byte_idx, p, mbclen);
if (BE (pstr->offsets_needed != 0, 0))
{
size_t i;
for (i = 0; i < mbclen; ++i)
pstr->offsets[byte_idx + i] = src_idx + i;
}
src_idx += mbclen;
pstr->wcs[byte_idx++] = wcu;
/* Write paddings. */
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
pstr->wcs[byte_idx++] = WEOF;
}
else if (mbclen == (size_t) -1 || mbclen == 0)
{
/* It is an invalid character or '\0'. Just use the byte. */
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
if (BE (pstr->trans != NULL, 0))
ch = pstr->trans [ch];
pstr->mbs[byte_idx] = ch;
if (BE (pstr->offsets_needed != 0, 0))
pstr->offsets[byte_idx] = src_idx;
++src_idx;
/* And also cast it to wide char. */
pstr->wcs[byte_idx++] = (wchar_t) ch;
if (BE (mbclen == (size_t) -1, 0))
pstr->cur_state = prev_st;
}
else
{
/* The buffer doesn't have enough space, finish to build. */
pstr->cur_state = prev_st;
break;
}
}
pstr->valid_len = byte_idx;
pstr->valid_raw_len = src_idx;
return REG_NOERROR;
}
/* Skip characters until the index becomes greater than NEW_RAW_IDX.
Return the index. */
static int
re_string_skip_chars (pstr, new_raw_idx, last_wc)
re_string_t *pstr;
int new_raw_idx;
wint_t *last_wc;
{
mbstate_t prev_st;
int rawbuf_idx;
size_t mbclen;
wchar_t wc = 0;
/* Skip the characters which are not necessary to check. */
for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
rawbuf_idx < new_raw_idx;)
{
int remain_len;
remain_len = pstr->len - rawbuf_idx;
prev_st = pstr->cur_state;
mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
remain_len, &pstr->cur_state);
if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
{
/* We treat these cases as a singlebyte character. */
mbclen = 1;
pstr->cur_state = prev_st;
}
/* Then proceed the next character. */
rawbuf_idx += mbclen;
}
*last_wc = (wint_t) wc;
return rawbuf_idx;
}
#endif /* RE_ENABLE_I18N */
/* Build the buffer PSTR->MBS, and apply the translation if we need.
This function is used in case of REG_ICASE. */
static void
build_upper_buffer (pstr)
re_string_t *pstr;
{
int char_idx, end_idx;
end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
{
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
if (BE (pstr->trans != NULL, 0))
ch = pstr->trans[ch];
if (islower (ch))
pstr->mbs[char_idx] = toupper (ch);
else
pstr->mbs[char_idx] = ch;
}
pstr->valid_len = char_idx;
pstr->valid_raw_len = char_idx;
}
/* Apply TRANS to the buffer in PSTR. */
static void
re_string_translate_buffer (pstr)
re_string_t *pstr;
{
int buf_idx, end_idx;
end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
{
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
pstr->mbs[buf_idx] = pstr->trans[ch];
}
pstr->valid_len = buf_idx;
pstr->valid_raw_len = buf_idx;
}
/* This function re-construct the buffers.
Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
convert to upper case in case of REG_ICASE, apply translation. */
static reg_errcode_t
re_string_reconstruct (pstr, idx, eflags)
re_string_t *pstr;
int idx, eflags;
{
int offset = idx - pstr->raw_mbs_idx;
if (BE (offset < 0, 0))
{
/* Reset buffer. */
#ifdef RE_ENABLE_I18N
if (pstr->mb_cur_max > 1)
memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
#endif /* RE_ENABLE_I18N */
pstr->len = pstr->raw_len;
pstr->stop = pstr->raw_stop;
pstr->valid_len = 0;
pstr->raw_mbs_idx = 0;
pstr->valid_raw_len = 0;
pstr->offsets_needed = 0;
pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
: CONTEXT_NEWLINE | CONTEXT_BEGBUF);
if (!pstr->mbs_allocated)
pstr->mbs = (unsigned char *) pstr->raw_mbs;
offset = idx;
}
if (BE (offset != 0, 1))
{
/* Are the characters which are already checked remain? */
if (BE (offset < pstr->valid_raw_len, 1)
#ifdef RE_ENABLE_I18N
/* Handling this would enlarge the code too much.
Accept a slowdown in that case. */
&& pstr->offsets_needed == 0
#endif
)
{
/* Yes, move them to the front of the buffer. */
pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags);
#ifdef RE_ENABLE_I18N
if (pstr->mb_cur_max > 1)
memmove (pstr->wcs, pstr->wcs + offset,
(pstr->valid_len - offset) * sizeof (wint_t));
#endif /* RE_ENABLE_I18N */
if (BE (pstr->mbs_allocated, 0))
memmove (pstr->mbs, pstr->mbs + offset,
pstr->valid_len - offset);
pstr->valid_len -= offset;
pstr->valid_raw_len -= offset;
#if DEBUG
assert (pstr->valid_len > 0);
#endif
}
else
{
/* No, skip all characters until IDX. */
#ifdef RE_ENABLE_I18N
if (BE (pstr->offsets_needed, 0))
{
pstr->len = pstr->raw_len - idx + offset;
pstr->stop = pstr->raw_stop - idx + offset;
pstr->offsets_needed = 0;
}
#endif
pstr->valid_len = 0;
pstr->valid_raw_len = 0;
#ifdef RE_ENABLE_I18N
if (pstr->mb_cur_max > 1)
{
int wcs_idx;
wint_t wc = WEOF;
if (pstr->is_utf8)
{
const unsigned char *raw, *p, *q, *end;
/* Special case UTF-8. Multi-byte chars start with any
byte other than 0x80 - 0xbf. */
raw = pstr->raw_mbs + pstr->raw_mbs_idx;
end = raw + (offset - pstr->mb_cur_max);
for (p = raw + offset - 1; p >= end; --p)
if ((*p & 0xc0) != 0x80)
{
mbstate_t cur_state;
wchar_t wc2;
int mlen = raw + pstr->len - p;
unsigned char buf[6];
q = p;
if (BE (pstr->trans != NULL, 0))
{
int i = mlen < 6 ? mlen : 6;
while (--i >= 0)
buf[i] = pstr->trans[p[i]];
q = buf;
}
/* XXX Don't use mbrtowc, we know which conversion
to use (UTF-8 -> UCS4). */
memset (&cur_state, 0, sizeof (cur_state));
mlen = (mbrtowc (&wc2, (const char *) p, mlen,
&cur_state)
- (raw + offset - p));
if (mlen >= 0)
{
memset (&pstr->cur_state, '\0',
sizeof (mbstate_t));
pstr->valid_len = mlen;
wc = wc2;
}
break;
}
}
if (wc == WEOF)
pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
if (BE (pstr->valid_len, 0))
{
for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
pstr->wcs[wcs_idx] = WEOF;
if (pstr->mbs_allocated)
memset (pstr->mbs, 255, pstr->valid_len);
}
pstr->valid_raw_len = pstr->valid_len;
pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
&& IS_WIDE_WORD_CHAR (wc))
? CONTEXT_WORD
: ((IS_WIDE_NEWLINE (wc)
&& pstr->newline_anchor)
? CONTEXT_NEWLINE : 0));
}
else
#endif /* RE_ENABLE_I18N */
{
int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
if (pstr->trans)
c = pstr->trans[c];
pstr->tip_context = (bitset_contain (pstr->word_char, c)
? CONTEXT_WORD
: ((IS_NEWLINE (c) && pstr->newline_anchor)
? CONTEXT_NEWLINE : 0));
}
}
if (!BE (pstr->mbs_allocated, 0))
pstr->mbs += offset;
}
pstr->raw_mbs_idx = idx;
pstr->len -= offset;
pstr->stop -= offset;
/* Then build the buffers. */
#ifdef RE_ENABLE_I18N
if (pstr->mb_cur_max > 1)
{
if (pstr->icase)
{
int ret = build_wcs_upper_buffer (pstr);
if (BE (ret != REG_NOERROR, 0))
return ret;
}
else
build_wcs_buffer (pstr);
}
else
#endif /* RE_ENABLE_I18N */
if (BE (pstr->mbs_allocated, 0))
{
if (pstr->icase)
build_upper_buffer (pstr);
else if (pstr->trans != NULL)
re_string_translate_buffer (pstr);
}
else
pstr->valid_len = pstr->len;
pstr->cur_idx = 0;
return REG_NOERROR;
}
static unsigned char
re_string_peek_byte_case (pstr, idx)
const re_string_t *pstr;
int idx;
{
int ch, off;
/* Handle the common (easiest) cases first. */
if (BE (!pstr->mbs_allocated, 1))
return re_string_peek_byte (pstr, idx);
#ifdef RE_ENABLE_I18N
if (pstr->mb_cur_max > 1
&& ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
return re_string_peek_byte (pstr, idx);
#endif
off = pstr->cur_idx + idx;
#ifdef RE_ENABLE_I18N
if (pstr->offsets_needed)
off = pstr->offsets[off];
#endif
ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
#ifdef RE_ENABLE_I18N
/* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
this function returns CAPITAL LETTER I instead of first byte of
DOTLESS SMALL LETTER I. The latter would confuse the parser,
since peek_byte_case doesn't advance cur_idx in any way. */
if (pstr->offsets_needed && !isascii (ch))
return re_string_peek_byte (pstr, idx);
#endif
return ch;
}
static unsigned char
re_string_fetch_byte_case (pstr)
re_string_t *pstr;
{
if (BE (!pstr->mbs_allocated, 1))
return re_string_fetch_byte (pstr);
#ifdef RE_ENABLE_I18N
if (pstr->offsets_needed)
{
int off, ch;
/* For tr_TR.UTF-8 [[:islower:]] there is
[[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
in that case the whole multi-byte character and return
the original letter. On the other side, with
[[: DOTLESS SMALL LETTER I return [[:I, as doing
anything else would complicate things too much. */
if (!re_string_first_byte (pstr, pstr->cur_idx))
return re_string_fetch_byte (pstr);
off = pstr->offsets[pstr->cur_idx];
ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
if (! isascii (ch))
return re_string_fetch_byte (pstr);
re_string_skip_bytes (pstr,
re_string_char_size_at (pstr, pstr->cur_idx));
return ch;
}
#endif
return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
}
static void
re_string_destruct (pstr)
re_string_t *pstr;
{
#ifdef RE_ENABLE_I18N
re_free (pstr->wcs);
re_free (pstr->offsets);
#endif /* RE_ENABLE_I18N */
if (pstr->mbs_allocated)
re_free (pstr->mbs);
}
/* Return the context at IDX in INPUT. */
static unsigned int
re_string_context_at (input, idx, eflags)
const re_string_t *input;
int idx, eflags;
{
int c;
if (BE (idx < 0, 0))
/* In this case, we use the value stored in input->tip_context,
since we can't know the character in input->mbs[-1] here. */
return input->tip_context;
if (BE (idx == input->len, 0))
return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
: CONTEXT_NEWLINE | CONTEXT_ENDBUF);
#ifdef RE_ENABLE_I18N
if (input->mb_cur_max > 1)
{
wint_t wc;
int wc_idx = idx;
while(input->wcs[wc_idx] == WEOF)
{
#ifdef DEBUG
/* It must not happen. */
assert (wc_idx >= 0);
#endif
--wc_idx;
if (wc_idx < 0)
return input->tip_context;
}
wc = input->wcs[wc_idx];
if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
return CONTEXT_WORD;
return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
? CONTEXT_NEWLINE : 0);
}
else
#endif
{
c = re_string_byte_at (input, idx);
if (bitset_contain (input->word_char, c))
return CONTEXT_WORD;
return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
}
}
/* Functions for set operation. */
static reg_errcode_t
re_node_set_alloc (set, size)
re_node_set *set;
int size;
{
set->alloc = size;
set->nelem = 0;
set->elems = re_malloc (int, size);
if (BE (set->elems == NULL, 0))
return REG_ESPACE;
return REG_NOERROR;
}
static reg_errcode_t
re_node_set_init_1 (set, elem)
re_node_set *set;
int elem;
{
set->alloc = 1;
set->nelem = 1;
set->elems = re_malloc (int, 1);
if (BE (set->elems == NULL, 0))
{
set->alloc = set->nelem = 0;
return REG_ESPACE;
}
set->elems[0] = elem;
return REG_NOERROR;
}
static reg_errcode_t
re_node_set_init_2 (set, elem1, elem2)
re_node_set *set;
int elem1, elem2;
{
set->alloc = 2;
set->elems = re_malloc (int, 2);
if (BE (set->elems == NULL, 0))
return REG_ESPACE;
if (elem1 == elem2)
{
set->nelem = 1;
set->elems[0] = elem1;
}
else
{
set->nelem = 2;
if (elem1 < elem2)
{
set->elems[0] = elem1;
set->elems[1] = elem2;
}
else
{
set->elems[0] = elem2;
set->elems[1] = elem1;
}
}
return REG_NOERROR;
}
static reg_errcode_t
re_node_set_init_copy (dest, src)
re_node_set *dest;
const re_node_set *src;
{
dest->nelem = src->nelem;
if (src->nelem > 0)
{
dest->alloc = dest->nelem;
dest->elems = re_malloc (int, dest->alloc);
if (BE (dest->elems == NULL, 0))
{
dest->alloc = dest->nelem = 0;
return REG_ESPACE;
}
memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
}
else
re_node_set_init_empty (dest);
return REG_NOERROR;
}
/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
DEST. Return value indicate the error code or REG_NOERROR if succeeded.
Note: We assume dest->elems is NULL, when dest->alloc is 0. */
static reg_errcode_t
re_node_set_add_intersect (dest, src1, src2)
re_node_set *dest;
const re_node_set *src1, *src2;
{
int i1, i2, is, id, delta, sbase;
if (src1->nelem == 0 || src2->nelem == 0)
return REG_NOERROR;
/* We need dest->nelem + 2 * elems_in_intersection; this is a
conservative estimate. */
if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
{
int new_alloc = src1->nelem + src2->nelem + dest->alloc;
int *new_elems = re_realloc (dest->elems, int, new_alloc);
if (BE (new_elems == NULL, 0))
return REG_ESPACE;
dest->elems = new_elems;
dest->alloc = new_alloc;
}
/* Find the items in the intersection of SRC1 and SRC2, and copy
into the top of DEST those that are not already in DEST itself. */
sbase = dest->nelem + src1->nelem + src2->nelem;
i1 = src1->nelem - 1;
i2 = src2->nelem - 1;
id = dest->nelem - 1;
for (;;)
{
if (src1->elems[i1] == src2->elems[i2])
{
/* Try to find the item in DEST. Maybe we could binary search? */
while (id >= 0 && dest->elems[id] > src1->elems[i1])
--id;
if (id < 0 || dest->elems[id] != src1->elems[i1])
dest->elems[--sbase] = src1->elems[i1];
if (--i1 < 0 || --i2 < 0)
break;
}
/* Lower the highest of the two items. */
else if (src1->elems[i1] < src2->elems[i2])
{
if (--i2 < 0)
break;
}
else
{
if (--i1 < 0)
break;
}
}
id = dest->nelem - 1;
is = dest->nelem + src1->nelem + src2->nelem - 1;
delta = is - sbase + 1;
/* Now copy. When DELTA becomes zero, the remaining
DEST elements are already in place; this is more or
less the same loop that is in re_node_set_merge. */
dest->nelem += delta;
if (delta > 0 && id >= 0)
for (;;)
{
if (dest->elems[is] > dest->elems[id])
{
/* Copy from the top. */
dest->elems[id + delta--] = dest->elems[is--];
if (delta == 0)
break;
}
else
{
/* Slide from the bottom. */
dest->elems[id + delta] = dest->elems[id];
if (--id < 0)
break;
}
}
/* Copy remaining SRC elements. */
memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int));
return REG_NOERROR;
}
/* Calculate the union set of the sets SRC1 and SRC2. And store it to
DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
static reg_errcode_t
re_node_set_init_union (dest, src1, src2)
re_node_set *dest;
const re_node_set *src1, *src2;
{
int i1, i2, id;
if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
{
dest->alloc = src1->nelem + src2->nelem;
dest->elems = re_malloc (int, dest->alloc);
if (BE (dest->elems == NULL, 0))
return REG_ESPACE;
}
else
{
if (src1 != NULL && src1->nelem > 0)
return re_node_set_init_copy (dest, src1);
else if (src2 != NULL && src2->nelem > 0)
return re_node_set_init_copy (dest, src2);
else
re_node_set_init_empty (dest);
return REG_NOERROR;
}
for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
{
if (src1->elems[i1] > src2->elems[i2])
{
dest->elems[id++] = src2->elems[i2++];
continue;
}
if (src1->elems[i1] == src2->elems[i2])
++i2;
dest->elems[id++] = src1->elems[i1++];
}
if (i1 < src1->nelem)
{
memcpy (dest->elems + id, src1->elems + i1,
(src1->nelem - i1) * sizeof (int));
id += src1->nelem - i1;
}
else if (i2 < src2->nelem)
{
memcpy (dest->elems + id, src2->elems + i2,
(src2->nelem - i2) * sizeof (int));
id += src2->nelem - i2;
}
dest->nelem = id;
return REG_NOERROR;
}
/* Calculate the union set of the sets DEST and SRC. And store it to
DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
static reg_errcode_t
re_node_set_merge (dest, src)
re_node_set *dest;
const re_node_set *src;
{
int is, id, sbase, delta;
if (src == NULL || src->nelem == 0)
return REG_NOERROR;
if (dest->alloc < 2 * src->nelem + dest->nelem)
{
int new_alloc = 2 * (src->nelem + dest->alloc);
int *new_buffer = re_realloc (dest->elems, int, new_alloc);
if (BE (new_buffer == NULL, 0))
return REG_ESPACE;
dest->elems = new_buffer;
dest->alloc = new_alloc;
}
if (BE (dest->nelem == 0, 0))
{
dest->nelem = src->nelem;
memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
return REG_NOERROR;
}
/* Copy into the top of DEST the items of SRC that are not
found in DEST. Maybe we could binary search in DEST? */
for (sbase = dest->nelem + 2 * src->nelem,
is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
{
if (dest->elems[id] == src->elems[is])
is--, id--;
else if (dest->elems[id] < src->elems[is])
dest->elems[--sbase] = src->elems[is--];
else /* if (dest->elems[id] > src->elems[is]) */
--id;
}
if (is >= 0)
{
/* If DEST is exhausted, the remaining items of SRC must be unique. */
sbase -= is + 1;
memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int));
}
id = dest->nelem - 1;
is = dest->nelem + 2 * src->nelem - 1;
delta = is - sbase + 1;
if (delta == 0)
return REG_NOERROR;
/* Now copy. When DELTA becomes zero, the remaining
DEST elements are already in place. */
dest->nelem += delta;
for (;;)
{
if (dest->elems[is] > dest->elems[id])
{
/* Copy from the top. */
dest->elems[id + delta--] = dest->elems[is--];
if (delta == 0)
break;
}
else
{
/* Slide from the bottom. */
dest->elems[id + delta] = dest->elems[id];
if (--id < 0)
{
/* Copy remaining SRC elements. */
memcpy (dest->elems, dest->elems + sbase,
delta * sizeof (int));
break;
}
}
}
return REG_NOERROR;
}
/* Insert the new element ELEM to the re_node_set* SET.
SET should not already have ELEM.
return -1 if an error is occured, return 1 otherwise. */
static int
re_node_set_insert (set, elem)
re_node_set *set;
int elem;
{
int idx;
/* In case the set is empty. */
if (set->alloc == 0)
{
if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
return 1;
else
return -1;
}
if (BE (set->nelem, 0) == 0)
{
/* We already guaranteed above that set->alloc != 0. */
set->elems[0] = elem;
++set->nelem;
return 1;
}
/* Realloc if we need. */
if (set->alloc == set->nelem)
{
int *new_array;
set->alloc = set->alloc * 2;
new_array = re_realloc (set->elems, int, set->alloc);
if (BE (new_array == NULL, 0))
return -1;
set->elems = new_array;
}
/* Move the elements which follows the new element. Test the
first element separately to skip a check in the inner loop. */
if (elem < set->elems[0])
{
idx = 0;
for (idx = set->nelem; idx > 0; idx--)
set->elems[idx] = set->elems[idx - 1];
}
else
{
for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
set->elems[idx] = set->elems[idx - 1];
}
/* Insert the new element. */
set->elems[idx] = elem;
++set->nelem;
return 1;
}
/* Insert the new element ELEM to the re_node_set* SET.
SET should not already have any element greater than or equal to ELEM.
Return -1 if an error is occured, return 1 otherwise. */
static int
re_node_set_insert_last (set, elem)
re_node_set *set;
int elem;
{
/* Realloc if we need. */
if (set->alloc == set->nelem)
{
int *new_array;
set->alloc = (set->alloc + 1) * 2;
new_array = re_realloc (set->elems, int, set->alloc);
if (BE (new_array == NULL, 0))
return -1;
set->elems = new_array;
}
/* Insert the new element. */
set->elems[set->nelem++] = elem;
return 1;
}
/* Compare two node sets SET1 and SET2.
return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */
static int
re_node_set_compare (set1, set2)
const re_node_set *set1, *set2;
{
int i;
if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
return 0;
for (i = set1->nelem ; --i >= 0 ; )
if (set1->elems[i] != set2->elems[i])
return 0;
return 1;
}
/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
static int
re_node_set_contains (set, elem)
const re_node_set *set;
int elem;
{
unsigned int idx, right, mid;
if (set->nelem <= 0)
return 0;
/* Binary search the element. */
idx = 0;
right = set->nelem - 1;
while (idx < right)
{
mid = (idx + right) / 2;
if (set->elems[mid] < elem)
idx = mid + 1;
else
right = mid;
}
return set->elems[idx] == elem ? idx + 1 : 0;
}
static void
re_node_set_remove_at (set, idx)
re_node_set *set;
int idx;
{
if (idx < 0 || idx >= set->nelem)
return;
--set->nelem;
for (; idx < set->nelem; idx++)
set->elems[idx] = set->elems[idx + 1];
}
/* Add the token TOKEN to dfa->nodes, and return the index of the token.
Or return -1, if an error will be occured. */
static int
re_dfa_add_node (dfa, token)
re_dfa_t *dfa;
re_token_t token;
{
int type = token.type;
if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
{
int new_nodes_alloc = dfa->nodes_alloc * 2;
int *new_nexts, *new_indices;
re_node_set *new_edests, *new_eclosures;
re_token_t *new_array = re_realloc (dfa->nodes, re_token_t,
new_nodes_alloc);
if (BE (new_array == NULL, 0))
return -1;
dfa->nodes = new_array;
new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
if (BE (new_nexts == NULL || new_indices == NULL
|| new_edests == NULL || new_eclosures == NULL, 0))
return -1;
dfa->nexts = new_nexts;
dfa->org_indices = new_indices;
dfa->edests = new_edests;
dfa->eclosures = new_eclosures;
dfa->nodes_alloc = new_nodes_alloc;
}
dfa->nodes[dfa->nodes_len] = token;
dfa->nodes[dfa->nodes_len].constraint = 0;
#ifdef RE_ENABLE_I18N
dfa->nodes[dfa->nodes_len].accept_mb =
(type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
#endif
dfa->nexts[dfa->nodes_len] = -1;
re_node_set_init_empty (dfa->edests + dfa->nodes_len);
re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
return dfa->nodes_len++;
}
static unsigned int inline
calc_state_hash (nodes, context)
const re_node_set *nodes;
unsigned int context;
{
unsigned int hash = nodes->nelem + context;
int i;
for (i = 0 ; i < nodes->nelem ; i++)
hash += nodes->elems[i];
return hash;
}
/* Search for the state whose node_set is equivalent to NODES.
Return the pointer to the state, if we found it in the DFA.
Otherwise create the new one and return it. In case of an error
return NULL and set the error code in ERR.
Note: - We assume NULL as the invalid state, then it is possible that
return value is NULL and ERR is REG_NOERROR.
- We never return non-NULL value in case of any errors, it is for
optimization. */
static re_dfastate_t*
re_acquire_state (err, dfa, nodes)
reg_errcode_t *err;
re_dfa_t *dfa;
const re_node_set *nodes;
{
unsigned int hash;
re_dfastate_t *new_state;
struct re_state_table_entry *spot;
int i;
if (BE (nodes->nelem == 0, 0))
{
*err = REG_NOERROR;
return NULL;
}
hash = calc_state_hash (nodes, 0);
spot = dfa->state_table + (hash & dfa->state_hash_mask);
for (i = 0 ; i < spot->num ; i++)
{
re_dfastate_t *state = spot->array[i];
if (hash != state->hash)
continue;
if (re_node_set_compare (&state->nodes, nodes))
return state;
}
/* There are no appropriate state in the dfa, create the new one. */
new_state = create_ci_newstate (dfa, nodes, hash);
if (BE (new_state != NULL, 1))
return new_state;
else
{
*err = REG_ESPACE;
return NULL;
}
}
/* Search for the state whose node_set is equivalent to NODES and
whose context is equivalent to CONTEXT.
Return the pointer to the state, if we found it in the DFA.
Otherwise create the new one and return it. In case of an error
return NULL and set the error code in ERR.
Note: - We assume NULL as the invalid state, then it is possible that
return value is NULL and ERR is REG_NOERROR.
- We never return non-NULL value in case of any errors, it is for
optimization. */
static re_dfastate_t*
re_acquire_state_context (err, dfa, nodes, context)
reg_errcode_t *err;
re_dfa_t *dfa;
const re_node_set *nodes;
unsigned int context;
{
unsigned int hash;
re_dfastate_t *new_state;
struct re_state_table_entry *spot;
int i;
if (nodes->nelem == 0)
{
*err = REG_NOERROR;
return NULL;
}
hash = calc_state_hash (nodes, context);
spot = dfa->state_table + (hash & dfa->state_hash_mask);
for (i = 0 ; i < spot->num ; i++)
{
re_dfastate_t *state = spot->array[i];
if (state->hash == hash
&& state->context == context
&& re_node_set_compare (state->entrance_nodes, nodes))
return state;
}
/* There are no appropriate state in `dfa', create the new one. */
new_state = create_cd_newstate (dfa, nodes, context, hash);
if (BE (new_state != NULL, 1))
return new_state;
else
{
*err = REG_ESPACE;
return NULL;
}
}
/* Finish initialization of the new state NEWSTATE, and using its hash value
HASH put in the appropriate bucket of DFA's state table. Return value
indicates the error code if failed. */
static reg_errcode_t
register_state (dfa, newstate, hash)
re_dfa_t *dfa;
re_dfastate_t *newstate;
unsigned int hash;
{
struct re_state_table_entry *spot;
reg_errcode_t err;
int i;
newstate->hash = hash;
err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
if (BE (err != REG_NOERROR, 0))
return REG_ESPACE;
for (i = 0; i < newstate->nodes.nelem; i++)
{
int elem = newstate->nodes.elems[i];
if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
re_node_set_insert_last (&newstate->non_eps_nodes, elem);
}
spot = dfa->state_table + (hash & dfa->state_hash_mask);
if (BE (spot->alloc <= spot->num, 0))
{
int new_alloc = 2 * spot->num + 2;
re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
new_alloc);
if (BE (new_array == NULL, 0))
return REG_ESPACE;
spot->array = new_array;
spot->alloc = new_alloc;
}
spot->array[spot->num++] = newstate;
return REG_NOERROR;
}
/* Create the new state which is independ of contexts.
Return the new state if succeeded, otherwise return NULL. */
static re_dfastate_t *
create_ci_newstate (dfa, nodes, hash)
re_dfa_t *dfa;
const re_node_set *nodes;
unsigned int hash;
{
int i;
reg_errcode_t err;
re_dfastate_t *newstate;
newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
if (BE (newstate == NULL, 0))
return NULL;
err = re_node_set_init_copy (&newstate->nodes, nodes);
if (BE (err != REG_NOERROR, 0))
{
re_free (newstate);
return NULL;
}
newstate->entrance_nodes = &newstate->nodes;
for (i = 0 ; i < nodes->nelem ; i++)
{
re_token_t *node = dfa->nodes + nodes->elems[i];
re_token_type_t type = node->type;
if (type == CHARACTER && !node->constraint)
continue;
#ifdef RE_ENABLE_I18N
newstate->accept_mb |= node->accept_mb;
#endif /* RE_ENABLE_I18N */
/* If the state has the halt node, the state is a halt state. */
if (type == END_OF_RE)
newstate->halt = 1;
else if (type == OP_BACK_REF)
newstate->has_backref = 1;
else if (type == ANCHOR || node->constraint)
newstate->has_constraint = 1;
}
err = register_state (dfa, newstate, hash);
if (BE (err != REG_NOERROR, 0))
{
free_state (newstate);
newstate = NULL;
}
return newstate;
}
/* Create the new state which is depend on the context CONTEXT.
Return the new state if succeeded, otherwise return NULL. */
static re_dfastate_t *
create_cd_newstate (dfa, nodes, context, hash)
re_dfa_t *dfa;
const re_node_set *nodes;
unsigned int context, hash;
{
int i, nctx_nodes = 0;
reg_errcode_t err;
re_dfastate_t *newstate;
newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
if (BE (newstate == NULL, 0))
return NULL;
err = re_node_set_init_copy (&newstate->nodes, nodes);
if (BE (err != REG_NOERROR, 0))
{
re_free (newstate);
return NULL;
}
newstate->context = context;
newstate->entrance_nodes = &newstate->nodes;
for (i = 0 ; i < nodes->nelem ; i++)
{
unsigned int constraint = 0;
re_token_t *node = dfa->nodes + nodes->elems[i];
re_token_type_t type = node->type;
if (node->constraint)
constraint = node->constraint;
if (type == CHARACTER && !constraint)
continue;
#ifdef RE_ENABLE_I18N
newstate->accept_mb |= node->accept_mb;
#endif /* RE_ENABLE_I18N */
/* If the state has the halt node, the state is a halt state. */
if (type == END_OF_RE)
newstate->halt = 1;
else if (type == OP_BACK_REF)
newstate->has_backref = 1;
else if (type == ANCHOR)
constraint = node->opr.ctx_type;
if (constraint)
{
if (newstate->entrance_nodes == &newstate->nodes)
{
newstate->entrance_nodes = re_malloc (re_node_set, 1);
if (BE (newstate->entrance_nodes == NULL, 0))
{
free_state (newstate);
return NULL;
}
re_node_set_init_copy (newstate->entrance_nodes, nodes);
nctx_nodes = 0;
newstate->has_constraint = 1;
}
if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
{
re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
++nctx_nodes;
}
}
}
err = register_state (dfa, newstate, hash);
if (BE (err != REG_NOERROR, 0))
{
free_state (newstate);
newstate = NULL;
}
return newstate;
}
static void
free_state (state)
re_dfastate_t *state;
{
re_node_set_free (&state->non_eps_nodes);
re_node_set_free (&state->inveclosure);
if (state->entrance_nodes != &state->nodes)
{
re_node_set_free (state->entrance_nodes);
re_free (state->entrance_nodes);
}
re_node_set_free (&state->nodes);
re_free (state->word_trtable);
re_free (state->trtable);
re_free (state);
}
/* Extended regular expression matching and search library.
Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
#ifndef _REGEX_INTERNAL_H
#define _REGEX_INTERNAL_H 1
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC
# include <langinfo.h>
#endif
#if defined HAVE_LOCALE_H || defined _LIBC
# include <locale.h>
#endif
#if defined HAVE_WCHAR_H || defined _LIBC
# include <wchar.h>
#endif /* HAVE_WCHAR_H || _LIBC */
#if defined HAVE_WCTYPE_H || defined _LIBC
# include <wctype.h>
#endif /* HAVE_WCTYPE_H || _LIBC */
#if defined _LIBC
# include <bits/libc-lock.h>
#else
# define __libc_lock_define(CLASS,NAME)
# define __libc_lock_init(NAME) do { } while (0)
# define __libc_lock_lock(NAME) do { } while (0)
# define __libc_lock_unlock(NAME) do { } while (0)
#endif
/* In case that the system doesn't have isblank(). */
#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank
# define isblank(ch) ((ch) == ' ' || (ch) == '\t')
#endif
#ifdef _LIBC
# ifndef _RE_DEFINE_LOCALE_FUNCTIONS
# define _RE_DEFINE_LOCALE_FUNCTIONS 1
# include <locale/localeinfo.h>
# include <locale/elem-hash.h>
# include <locale/coll-lookup.h>
# endif
#endif
/* This is for other GNU distributions with internationalized messages. */
#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
# include <libintl.h>
# ifdef _LIBC
# undef gettext
# define gettext(msgid) \
INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES)
# endif
#else
# define gettext(msgid) (msgid)
#endif
#ifndef gettext_noop
/* This define is so xgettext can find the internationalizable
strings. */
# define gettext_noop(String) String
#endif
#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC
# define RE_ENABLE_I18N
#endif
#if __GNUC__ >= 3
# define BE(expr, val) __builtin_expect (expr, val)
#else
# define BE(expr, val) (expr)
# define inline
#endif
/* Number of bits in a byte. */
#define BYTE_BITS 8
/* Number of single byte character. */
#define SBC_MAX 256
#define COLL_ELEM_LEN_MAX 8
/* The character which represents newline. */
#define NEWLINE_CHAR '\n'
#define WIDE_NEWLINE_CHAR L'\n'
/* Rename to standard API for using out of glibc. */
#ifndef _LIBC
# define __wctype wctype
# define __iswctype iswctype
# define __btowc btowc
# ifndef __mempcpy
# define __mempcpy mempcpy
# endif
# define __wcrtomb wcrtomb
# define __regfree regfree
# define attribute_hidden
#endif /* not _LIBC */
#ifdef __GNUC__
# define __attribute(arg) __attribute__ (arg)
#else
# define __attribute(arg)
#endif
extern const char __re_error_msgid[] attribute_hidden;
extern const size_t __re_error_msgid_idx[] attribute_hidden;
/* Number of bits in an unsinged int. */
#define UINT_BITS (sizeof (unsigned int) * BYTE_BITS)
/* Number of unsigned int in an bit_set. */
#define BITSET_UINTS ((SBC_MAX + UINT_BITS - 1) / UINT_BITS)
typedef unsigned int bitset[BITSET_UINTS];
typedef unsigned int *re_bitset_ptr_t;
typedef const unsigned int *re_const_bitset_ptr_t;
#define bitset_set(set,i) (set[i / UINT_BITS] |= 1 << i % UINT_BITS)
#define bitset_clear(set,i) (set[i / UINT_BITS] &= ~(1 << i % UINT_BITS))
#define bitset_contain(set,i) (set[i / UINT_BITS] & (1 << i % UINT_BITS))
#define bitset_empty(set) memset (set, 0, sizeof (unsigned int) * BITSET_UINTS)
#define bitset_set_all(set) \
memset (set, 255, sizeof (unsigned int) * BITSET_UINTS)
#define bitset_copy(dest,src) \
memcpy (dest, src, sizeof (unsigned int) * BITSET_UINTS)
static inline void bitset_not (bitset set);
static inline void bitset_merge (bitset dest, const bitset src);
static inline void bitset_not_merge (bitset dest, const bitset src);
static inline void bitset_mask (bitset dest, const bitset src);
#define PREV_WORD_CONSTRAINT 0x0001
#define PREV_NOTWORD_CONSTRAINT 0x0002
#define NEXT_WORD_CONSTRAINT 0x0004
#define NEXT_NOTWORD_CONSTRAINT 0x0008
#define PREV_NEWLINE_CONSTRAINT 0x0010
#define NEXT_NEWLINE_CONSTRAINT 0x0020
#define PREV_BEGBUF_CONSTRAINT 0x0040
#define NEXT_ENDBUF_CONSTRAINT 0x0080
#define WORD_DELIM_CONSTRAINT 0x0100
#define NOT_WORD_DELIM_CONSTRAINT 0x0200
typedef enum
{
INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
WORD_DELIM = WORD_DELIM_CONSTRAINT,
NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT
} re_context_type;
typedef struct
{
int alloc;
int nelem;
int *elems;
} re_node_set;
typedef enum
{
NON_TYPE = 0,
/* Node type, These are used by token, node, tree. */
CHARACTER = 1,
END_OF_RE = 2,
SIMPLE_BRACKET = 3,
OP_BACK_REF = 4,
OP_PERIOD = 5,
#ifdef RE_ENABLE_I18N
COMPLEX_BRACKET = 6,
OP_UTF8_PERIOD = 7,
#endif /* RE_ENABLE_I18N */
/* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used
when the debugger shows values of this enum type. */
#define EPSILON_BIT 8
OP_OPEN_SUBEXP = EPSILON_BIT | 0,
OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
OP_ALT = EPSILON_BIT | 2,
OP_DUP_ASTERISK = EPSILON_BIT | 3,
ANCHOR = EPSILON_BIT | 4,
/* Tree type, these are used only by tree. */
CONCAT = 16,
SUBEXP = 17,
/* Token type, these are used only by token. */
OP_DUP_PLUS = 18,
OP_DUP_QUESTION,
OP_OPEN_BRACKET,
OP_CLOSE_BRACKET,
OP_CHARSET_RANGE,
OP_OPEN_DUP_NUM,
OP_CLOSE_DUP_NUM,
OP_NON_MATCH_LIST,
OP_OPEN_COLL_ELEM,
OP_CLOSE_COLL_ELEM,
OP_OPEN_EQUIV_CLASS,
OP_CLOSE_EQUIV_CLASS,
OP_OPEN_CHAR_CLASS,
OP_CLOSE_CHAR_CLASS,
OP_WORD,
OP_NOTWORD,
OP_SPACE,
OP_NOTSPACE,
BACK_SLASH
} re_token_type_t;
#ifdef RE_ENABLE_I18N
typedef struct
{
/* Multibyte characters. */
wchar_t *mbchars;
/* Collating symbols. */
# ifdef _LIBC
int32_t *coll_syms;
# endif
/* Equivalence classes. */
# ifdef _LIBC
int32_t *equiv_classes;
# endif
/* Range expressions. */
# ifdef _LIBC
uint32_t *range_starts;
uint32_t *range_ends;
# else /* not _LIBC */
wchar_t *range_starts;
wchar_t *range_ends;
# endif /* not _LIBC */
/* Character classes. */
wctype_t *char_classes;
/* If this character set is the non-matching list. */
unsigned int non_match : 1;
/* # of multibyte characters. */
int nmbchars;
/* # of collating symbols. */
int ncoll_syms;
/* # of equivalence classes. */
int nequiv_classes;
/* # of range expressions. */
int nranges;
/* # of character classes. */
int nchar_classes;
} re_charset_t;
#endif /* RE_ENABLE_I18N */
typedef struct
{
union
{
unsigned char c; /* for CHARACTER */
re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */
#ifdef RE_ENABLE_I18N
re_charset_t *mbcset; /* for COMPLEX_BRACKET */
#endif /* RE_ENABLE_I18N */
int idx; /* for BACK_REF */
re_context_type ctx_type; /* for ANCHOR */
} opr;
#if __GNUC__ >= 2
re_token_type_t type : 8;
#else
re_token_type_t type;
#endif
unsigned int constraint : 10; /* context constraint */
unsigned int duplicated : 1;
unsigned int opt_subexp : 1;
#ifdef RE_ENABLE_I18N
unsigned int accept_mb : 1;
/* These 2 bits can be moved into the union if needed (e.g. if running out
of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */
unsigned int mb_partial : 1;
#endif
unsigned int word_char : 1;
} re_token_t;
#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
struct re_string_t
{
/* Indicate the raw buffer which is the original string passed as an
argument of regexec(), re_search(), etc.. */
const unsigned char *raw_mbs;
/* Store the multibyte string. In case of "case insensitive mode" like
REG_ICASE, upper cases of the string are stored, otherwise MBS points
the same address that RAW_MBS points. */
unsigned char *mbs;
#ifdef RE_ENABLE_I18N
/* Store the wide character string which is corresponding to MBS. */
wint_t *wcs;
int *offsets;
mbstate_t cur_state;
#endif
/* Index in RAW_MBS. Each character mbs[i] corresponds to
raw_mbs[raw_mbs_idx + i]. */
int raw_mbs_idx;
/* The length of the valid characters in the buffers. */
int valid_len;
/* The corresponding number of bytes in raw_mbs array. */
int valid_raw_len;
/* The length of the buffers MBS and WCS. */
int bufs_len;
/* The index in MBS, which is updated by re_string_fetch_byte. */
int cur_idx;
/* length of RAW_MBS array. */
int raw_len;
/* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN. */
int len;
/* End of the buffer may be shorter than its length in the cases such
as re_match_2, re_search_2. Then, we use STOP for end of the buffer
instead of LEN. */
int raw_stop;
/* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS. */
int stop;
/* The context of mbs[0]. We store the context independently, since
the context of mbs[0] may be different from raw_mbs[0], which is
the beginning of the input string. */
unsigned int tip_context;
/* The translation passed as a part of an argument of re_compile_pattern. */
unsigned RE_TRANSLATE_TYPE trans;
/* Copy of re_dfa_t's word_char. */
re_const_bitset_ptr_t word_char;
/* 1 if REG_ICASE. */
unsigned char icase;
unsigned char is_utf8;
unsigned char map_notascii;
unsigned char mbs_allocated;
unsigned char offsets_needed;
unsigned char newline_anchor;
unsigned char word_ops_used;
int mb_cur_max;
};
typedef struct re_string_t re_string_t;
struct re_dfa_t;
typedef struct re_dfa_t re_dfa_t;
#ifndef _LIBC
# ifdef __i386__
# define internal_function __attribute ((regparm (3), stdcall))
# else
# define internal_function
# endif
#endif
#ifndef RE_NO_INTERNAL_PROTOTYPES
static reg_errcode_t re_string_allocate (re_string_t *pstr, const char *str,
int len, int init_len,
RE_TRANSLATE_TYPE trans, int icase,
const re_dfa_t *dfa)
internal_function;
static reg_errcode_t re_string_construct (re_string_t *pstr, const char *str,
int len, RE_TRANSLATE_TYPE trans,
int icase, const re_dfa_t *dfa)
internal_function;
static reg_errcode_t re_string_reconstruct (re_string_t *pstr, int idx,
int eflags) internal_function;
static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
int new_buf_len)
internal_function;
# ifdef RE_ENABLE_I18N
static void build_wcs_buffer (re_string_t *pstr) internal_function;
static int build_wcs_upper_buffer (re_string_t *pstr) internal_function;
# endif /* RE_ENABLE_I18N */
static void build_upper_buffer (re_string_t *pstr) internal_function;
static void re_string_translate_buffer (re_string_t *pstr) internal_function;
static void re_string_destruct (re_string_t *pstr) internal_function;
# ifdef RE_ENABLE_I18N
static int re_string_elem_size_at (const re_string_t *pstr, int idx)
internal_function __attribute ((pure));
static inline int re_string_char_size_at (const re_string_t *pstr, int idx)
internal_function __attribute ((pure));
static inline wint_t re_string_wchar_at (const re_string_t *pstr, int idx)
internal_function __attribute ((pure));
# endif /* RE_ENABLE_I18N */
static unsigned int re_string_context_at (const re_string_t *input, int idx,
int eflags)
internal_function __attribute ((pure));
static unsigned char re_string_peek_byte_case (const re_string_t *pstr,
int idx)
internal_function __attribute ((pure));
static unsigned char re_string_fetch_byte_case (re_string_t *pstr)
internal_function __attribute ((pure));
#endif
#define re_string_peek_byte(pstr, offset) \
((pstr)->mbs[(pstr)->cur_idx + offset])
#define re_string_fetch_byte(pstr) \
((pstr)->mbs[(pstr)->cur_idx++])
#define re_string_first_byte(pstr, idx) \
((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF)
#define re_string_is_single_byte_char(pstr, idx) \
((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \
|| (pstr)->wcs[(idx) + 1] != WEOF))
#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx)
#define re_string_cur_idx(pstr) ((pstr)->cur_idx)
#define re_string_get_buffer(pstr) ((pstr)->mbs)
#define re_string_length(pstr) ((pstr)->len)
#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
#define re_free(p) free (p)
struct bin_tree_t
{
struct bin_tree_t *parent;
struct bin_tree_t *left;
struct bin_tree_t *right;
struct bin_tree_t *first;
struct bin_tree_t *next;
re_token_t token;
/* `node_idx' is the index in dfa->nodes, if `type' == 0.
Otherwise `type' indicate the type of this node. */
int node_idx;
};
typedef struct bin_tree_t bin_tree_t;
#define BIN_TREE_STORAGE_SIZE \
((1024 - sizeof (void *)) / sizeof (bin_tree_t))
struct bin_tree_storage_t
{
struct bin_tree_storage_t *next;
bin_tree_t data[BIN_TREE_STORAGE_SIZE];
};
typedef struct bin_tree_storage_t bin_tree_storage_t;
#define CONTEXT_WORD 1
#define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1)
#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1)
#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD)
#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE)
#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF)
#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF)
#define IS_ORDINARY_CONTEXT(c) ((c) == 0)
#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_')
#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR)
#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_')
#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR)
#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \
((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
|| ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
|| ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\
|| ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context)))
#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \
((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
|| (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
|| (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \
|| (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context)))
struct re_dfastate_t
{
unsigned int hash;
re_node_set nodes;
re_node_set non_eps_nodes;
re_node_set inveclosure;
re_node_set *entrance_nodes;
struct re_dfastate_t **trtable, **word_trtable;
unsigned int context : 4;
unsigned int halt : 1;
/* If this state can accept `multi byte'.
Note that we refer to multibyte characters, and multi character
collating elements as `multi byte'. */
unsigned int accept_mb : 1;
/* If this state has backreference node(s). */
unsigned int has_backref : 1;
unsigned int has_constraint : 1;
};
typedef struct re_dfastate_t re_dfastate_t;
struct re_state_table_entry
{
int num;
int alloc;
re_dfastate_t **array;
};
/* Array type used in re_sub_match_last_t and re_sub_match_top_t. */
typedef struct
{
int next_idx;
int alloc;
re_dfastate_t **array;
} state_array_t;
/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP. */
typedef struct
{
int node;
int str_idx; /* The position NODE match at. */
state_array_t path;
} re_sub_match_last_t;
/* Store information about the node NODE whose type is OP_OPEN_SUBEXP.
And information about the node, whose type is OP_CLOSE_SUBEXP,
corresponding to NODE is stored in LASTS. */
typedef struct
{
int str_idx;
int node;
int next_last_offset;
state_array_t *path;
int alasts; /* Allocation size of LASTS. */
int nlasts; /* The number of LASTS. */
re_sub_match_last_t **lasts;
} re_sub_match_top_t;
struct re_backref_cache_entry
{
int node;
int str_idx;
int subexp_from;
int subexp_to;
char more;
char unused;
unsigned short int eps_reachable_subexps_map;
};
typedef struct
{
/* The string object corresponding to the input string. */
re_string_t input;
#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
re_dfa_t *const dfa;
#else
re_dfa_t *dfa;
#endif
/* EFLAGS of the argument of regexec. */
int eflags;
/* Where the matching ends. */
int match_last;
int last_node;
/* The state log used by the matcher. */
re_dfastate_t **state_log;
int state_log_top;
/* Back reference cache. */
int nbkref_ents;
int abkref_ents;
struct re_backref_cache_entry *bkref_ents;
int max_mb_elem_len;
int nsub_tops;
int asub_tops;
re_sub_match_top_t **sub_tops;
} re_match_context_t;
typedef struct
{
re_dfastate_t **sifted_states;
re_dfastate_t **limited_states;
int last_node;
int last_str_idx;
re_node_set limits;
} re_sift_context_t;
struct re_fail_stack_ent_t
{
int idx;
int node;
regmatch_t *regs;
re_node_set eps_via_nodes;
};
struct re_fail_stack_t
{
int num;
int alloc;
struct re_fail_stack_ent_t *stack;
};
struct re_dfa_t
{
re_token_t *nodes;
int nodes_alloc;
int nodes_len;
int *nexts;
int *org_indices;
re_node_set *edests;
re_node_set *eclosures;
re_node_set *inveclosures;
struct re_state_table_entry *state_table;
re_dfastate_t *init_state;
re_dfastate_t *init_state_word;
re_dfastate_t *init_state_nl;
re_dfastate_t *init_state_begbuf;
bin_tree_t *str_tree;
bin_tree_storage_t *str_tree_storage;
re_bitset_ptr_t sb_char;
int str_tree_storage_idx;
/* number of subexpressions `re_nsub' is in regex_t. */
unsigned int state_hash_mask;
int states_alloc;
int init_node;
int nbackref; /* The number of backreference in this dfa. */
/* Bitmap expressing which backreference is used. */
unsigned int used_bkref_map;
unsigned int completed_bkref_map;
unsigned int has_plural_match : 1;
/* If this dfa has "multibyte node", which is a backreference or
a node which can accept multibyte character or multi character
collating element. */
unsigned int has_mb_node : 1;
unsigned int is_utf8 : 1;
unsigned int map_notascii : 1;
unsigned int word_ops_used : 1;
int mb_cur_max;
bitset word_char;
reg_syntax_t syntax;
int *subexp_map;
#ifdef DEBUG
char* re_str;
#endif
__libc_lock_define (, lock)
};
#ifndef RE_NO_INTERNAL_PROTOTYPES
static reg_errcode_t re_node_set_alloc (re_node_set *set, int size) internal_function;
static reg_errcode_t re_node_set_init_1 (re_node_set *set, int elem) internal_function;
static reg_errcode_t re_node_set_init_2 (re_node_set *set, int elem1,
int elem2) internal_function;
#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
static reg_errcode_t re_node_set_init_copy (re_node_set *dest,
const re_node_set *src) internal_function;
static reg_errcode_t re_node_set_add_intersect (re_node_set *dest,
const re_node_set *src1,
const re_node_set *src2) internal_function;
static reg_errcode_t re_node_set_init_union (re_node_set *dest,
const re_node_set *src1,
const re_node_set *src2) internal_function;
static reg_errcode_t re_node_set_merge (re_node_set *dest,
const re_node_set *src) internal_function;
static int re_node_set_insert (re_node_set *set, int elem) internal_function;
static int re_node_set_insert_last (re_node_set *set,
int elem) internal_function;
static int re_node_set_compare (const re_node_set *set1,
const re_node_set *set2)
internal_function __attribute ((pure));
static int re_node_set_contains (const re_node_set *set, int elem)
internal_function __attribute ((pure));
static void re_node_set_remove_at (re_node_set *set, int idx) internal_function;
#define re_node_set_remove(set,id) \
(re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
#define re_node_set_empty(p) ((p)->nelem = 0)
#define re_node_set_free(set) re_free ((set)->elems)
static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token) internal_function;
static re_dfastate_t *re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa,
const re_node_set *nodes) internal_function;
static re_dfastate_t *re_acquire_state_context (reg_errcode_t *err,
re_dfa_t *dfa,
const re_node_set *nodes,
unsigned int context) internal_function;
static void free_state (re_dfastate_t *state) internal_function;
#endif
typedef enum
{
SB_CHAR,
MB_CHAR,
EQUIV_CLASS,
COLL_SYM,
CHAR_CLASS
} bracket_elem_type;
typedef struct
{
bracket_elem_type type;
union
{
unsigned char ch;
unsigned char *name;
wchar_t wch;
} opr;
} bracket_elem_t;
/* Inline functions for bitset operation. */
static inline void
bitset_not (bitset set)
{
int bitset_i;
for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
set[bitset_i] = ~set[bitset_i];
}
static inline void
bitset_merge (bitset dest, const bitset src)
{
int bitset_i;
for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
dest[bitset_i] |= src[bitset_i];
}
static inline void
bitset_not_merge (bitset dest, const bitset src)
{
int i;
for (i = 0; i < BITSET_UINTS; ++i)
dest[i] |= ~src[i];
}
static inline void
bitset_mask (bitset dest, const bitset src)
{
int bitset_i;
for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
dest[bitset_i] &= src[bitset_i];
}
#if defined RE_ENABLE_I18N && !defined RE_NO_INTERNAL_PROTOTYPES
/* Inline functions for re_string. */
static inline int
internal_function
re_string_char_size_at (const re_string_t *pstr, int idx)
{
int byte_idx;
if (pstr->mb_cur_max == 1)
return 1;
for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx)
if (pstr->wcs[idx + byte_idx] != WEOF)
break;
return byte_idx;
}
static inline wint_t
internal_function
re_string_wchar_at (const re_string_t *pstr, int idx)
{
if (pstr->mb_cur_max == 1)
return (wint_t) pstr->mbs[idx];
return (wint_t) pstr->wcs[idx];
}
static int
internal_function
re_string_elem_size_at (const re_string_t *pstr, int idx)
{
#ifdef _LIBC
const unsigned char *p, *extra;
const int32_t *table, *indirect;
int32_t tmp;
# include <locale/weight.h>
uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
if (nrules != 0)
{
table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
extra = (const unsigned char *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
_NL_COLLATE_INDIRECTMB);
p = pstr->mbs + idx;
tmp = findidx (&p);
return p - pstr->mbs - idx;
}
else
#endif /* _LIBC */
return 1;
}
#endif /* RE_ENABLE_I18N */
#endif /* _REGEX_INTERNAL_H */
This diff could not be displayed because it is too large.
/* size_max.h -- declare SIZE_MAX through system headers
Copyright (C) 2005 Free Software Foundation, Inc.
Written by Simon Josefsson.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
#ifndef GNULIB_SIZE_MAX_H
#define GNULIB_SIZE_MAX_H
# include <limits.h>
# if HAVE_STDINT_H
# include <stdint.h>
# endif
#endif /* GNULIB_SIZE_MAX_H */
/* Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc.
Written by Bruno Haible <haible@clisp.cons.org>, 2001.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
#ifndef _STDBOOL_H
#define _STDBOOL_H
/* ISO C 99 <stdbool.h> for platforms that lack it. */
/* Usage suggestions:
Programs that use <stdbool.h> should be aware of some limitations
and standards compliance issues.
Standards compliance:
- <stdbool.h> must be #included before 'bool', 'false', 'true'
can be used.
- You cannot assume that sizeof (bool) == 1.
- Programs should not undefine the macros bool, true, and false,
as C99 lists that as an "obsolescent feature".
Limitations of this substitute, when used in a C89 environment:
- <stdbool.h> must be #included before the '_Bool' type can be used.
- You cannot assume that _Bool is a typedef; it might be a macro.
- In C99, casts and automatic conversions to '_Bool' or 'bool' are
performed in such a way that every nonzero value gets converted
to 'true', and zero gets converted to 'false'. This doesn't work
with this substitute. With this substitute, only the values 0 and 1
give the expected result when converted to _Bool' or 'bool'.
Also, it is suggested that programs use 'bool' rather than '_Bool';
this isn't required, but 'bool' is more common. */
/* 7.16. Boolean type and values */
/* BeOS <sys/socket.h> already #defines false 0, true 1. We use the same
definitions below, but temporarily we have to #undef them. */
#ifdef __BEOS__
# include <OS.h> /* defines bool but not _Bool */
# undef false
# undef true
#endif
/* For the sake of symbolic names in gdb, we define true and false as
enum constants, not only as macros.
It is tempting to write
typedef enum { false = 0, true = 1 } _Bool;
so that gdb prints values of type 'bool' symbolically. But if we do
this, values of type '_Bool' may promote to 'int' or 'unsigned int'
(see ISO C 99 6.7.2.2.(4)); however, '_Bool' must promote to 'int'
(see ISO C 99 6.3.1.1.(2)). So we add a negative value to the
enum; this ensures that '_Bool' promotes to 'int'. */
#if !(defined __cplusplus || defined __BEOS__)
# if !@HAVE__BOOL@
# if defined __SUNPRO_C && (__SUNPRO_C < 0x550 || __STDC__ == 1)
/* Avoid stupid "warning: _Bool is a keyword in ISO C99". */
# define _Bool signed char
enum { false = 0, true = 1 };
# else
typedef enum { _Bool_must_promote_to_int = -1, false = 0, true = 1 } _Bool;
# endif
# endif
#else
typedef bool _Bool;
#endif
#define bool _Bool
/* The other macros must be usable in preprocessor directives. */
#define false 0
#define true 1
#define __bool_true_false_are_defined 1
#endif /* _STDBOOL_H */
/* Find the length of STRING, but scan at most MAXLEN characters.
Copyright (C) 2005 Free Software Foundation, Inc.
Written by Simon Josefsson.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
#ifndef STRNLEN_H
#define STRNLEN_H
/* Get strnlen declaration, if available. */
#include <string.h>
#if defined HAVE_DECL_STRNLEN && !HAVE_DECL_STRNLEN
/* Find the length (number of bytes) of STRING, but scan at most
MAXLEN bytes. If no '\0' terminator is found in that many bytes,
return MAXLEN. */
extern size_t strnlen(const char *string, size_t maxlen);
#endif
#endif /* STRNLEN_H */
/* Find the length of STRING + 1, but scan at most MAXLEN bytes.
Copyright (C) 2005 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU Library General Public License as published
by the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
USA. */
#if HAVE_CONFIG_H
# include <config.h>
#endif
/* Specification. */
#include "strnlen1.h"
#include <string.h>
/* Find the length of STRING + 1, but scan at most MAXLEN bytes.
If no '\0' terminator is found in that many characters, return MAXLEN. */
/* This is the same as strnlen (string, maxlen - 1) + 1. */
size_t
strnlen1 (const char *string, size_t maxlen)
{
const char *end = memchr (string, '\0', maxlen);
if (end != NULL)
return end - string + 1;
else
return maxlen;
}
/* Find the length of STRING + 1, but scan at most MAXLEN bytes.
Copyright (C) 2005 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU Library General Public License as published
by the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
USA. */
#ifndef _STRNLEN1_H
#define _STRNLEN1_H
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
/* Find the length of STRING + 1, but scan at most MAXLEN bytes.
If no '\0' terminator is found in that many characters, return MAXLEN. */
/* This is the same as strnlen (string, maxlen - 1) + 1. */
extern size_t strnlen1 (const char *string, size_t maxlen);
#ifdef __cplusplus
}
#endif
#endif /* _STRNLEN1_H */