www.delorie.com/archives/browse.cgi   search  
Mail Archives: djgpp-workers/2001/12/18/12:02:35

X-Authentication-Warning: delorie.com: mailnull set sender to djgpp-workers-bounces using -f
Date: Tue, 18 Dec 2001 12:01:01 -0500
From: AAganichev AT netscape DOT net (Alexander Aganichev)
To: djgpp-workers AT delorie DOT com
Subject: Re: regcomp NLS fix
Message-ID: <6F32F7D2.12300D8B.09ACFA57@netscape.net>
X-Mailer: Atlas Mailer 1.0
Reply-To: djgpp-workers AT delorie DOT com

---------6f340302123118bb6f340302123118bb
Content-Type: text/plain; charset=iso-8859-1
Content-Transfer-Encoding: 8bit
Content-Disposition: inline

Hans-Bernhard Broeker <broeker AT physik DOT rwth-aachen DOT de> wrote:
>> regcomp() sometimes crashes when NLS characters are in use. This patch
>> seems to fix this (though I'm not sure whether NLS characters may be a
>> problem in other places):
>I'm quite sure there are.  The real problem is that lots of calls of
><ctype.h> functions inside regcomp are incorrect because they pass char
>values into them without casting to unsigned char first.  This would only
>be correct if our libc assumed char == unsigned char, but unless I'm
>totally misremembering things, the opposite is true.

I have redone the patch (regex.nls+warning.diff) by casting all ctype.h function's arguments to unsigned char. Also I have fix the warnings caused by the missed brackets and unused values calculated. There's 4 more warnings in pedantic mode about possible use of uninitialized variables, but they seems to be safe to ignore.

The second fix (regex.nls2.diff) is intended for the proper working in NLS capable environment.
-- 
alexander aganichev
url: http://aaganichev.narod.ru



__________________________________________________________________
Your favorite stores, helpful shopping tools and great gift ideas. Experience the convenience of buying online with Shop AT Netscape! http://shopnow.netscape.com/

Get your own FREE, personal Netscape Mail account today at http://webmail.netscape.com/

---------6f340302123118bb6f340302123118bb
Content-Type: text/plain; charset=iso-8859-1; name="regex.nls+warning.diff"
Content-Transfer-Encoding: 8bit
Content-Disposition: inline; filename="regex.nls+warning.diff"
Content-Description: regex.nls+warning.diff

diff -ru regex.orig/cclass.h regex/cclass.h
--- regex.orig/cclass.h Thu Jul 16 21:20:30 1998
+++ regex/cclass.h  Tue Dec 18 19:03:20 2001
@@ -5,28 +5,28 @@
    char *chars;
    char *multis;
 } cclasses[] = {
-   "alnum",    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
-0123456789",               "",
-   "alpha",    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
-                   "",
-   "blank",    " \t",      "",
-   "cntrl",    "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
-\25\26\27\30\31\32\33\34\35\36\37\177",    "",
-   "digit",    "0123456789",   "",
-   "graph",    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+   { "alnum",  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+0123456789",               "" },
+   { "alpha",  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
+                   "" },
+   { "blank",  " \t",      "" },
+   { "cntrl",  "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
+\25\26\27\30\31\32\33\34\35\36\37\177",    "" },
+   { "digit",  "0123456789",   "" },
+   { "graph",  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
-                   "",
-   "lower",    "abcdefghijklmnopqrstuvwxyz",
-                   "",
-   "print",    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+                   "" },
+   { "lower",  "abcdefghijklmnopqrstuvwxyz",
+                   "" },
+   { "print",  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
-                   "",
-   "punct",    "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
-                   "",
-   "space",    "\t\n\v\f\r ",  "",
-   "upper",    "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
-                   "",
-   "xdigit",   "0123456789ABCDEFabcdef",
-                   "",
-   NULL,       0,      ""
+                   "" },
+   { "punct",  "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+                   "" },
+   { "space",  "\t\n\v\f\r ",  "" },
+   { "upper",  "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
+                   "" },
+   { "xdigit", "0123456789ABCDEFabcdef",
+                   "" },
+   { NULL,     0,      "" }
 };
diff -ru regex.orig/cname.h regex/cname.h
--- regex.orig/cname.h  Thu Jul 16 21:20:30 1998
+++ regex/cname.h   Tue Dec 18 18:09:58 2001
@@ -4,100 +4,101 @@
    char *name;
    char code;
 } cnames[] = {
-   "NUL",  '\0',
-   "SOH",  '\001',
-   "STX",  '\002',
-   "ETX",  '\003',
-   "EOT",  '\004',
-   "ENQ",  '\005',
-   "ACK",  '\006',
-   "BEL",  '\007',
-   "alert",    '\007',
-   "BS",       '\010',
-   "backspace",    '\b',
-   "HT",       '\011',
-   "tab",      '\t',
-   "LF",       '\012',
-   "newline",  '\n',
-   "VT",       '\013',
-   "vertical-tab", '\v',
-   "FF",       '\014',
-   "form-feed",    '\f',
-   "CR",       '\015',
-   "carriage-return",  '\r',
-   "SO",   '\016',
-   "SI",   '\017',
-   "DLE",  '\020',
-   "DC1",  '\021',
-   "DC2",  '\022',
-   "DC3",  '\023',
-   "DC4",  '\024',
-   "NAK",  '\025',
-   "SYN",  '\026',
-   "ETB",  '\027',
-   "CAN",  '\030',
-   "EM",   '\031',
-   "SUB",  '\032',
-   "ESC",  '\033',
-   "IS4",  '\034',
-   "FS",   '\034',
-   "IS3",  '\035',
-   "GS",   '\035',
-   "IS2",  '\036',
-   "RS",   '\036',
-   "IS1",  '\037',
-   "US",   '\037',
-   "space",        ' ',
-   "exclamation-mark", '!',
-   "quotation-mark",   '"',
-   "number-sign",      '#',
-   "dollar-sign",      '$',
-   "percent-sign",     '%',
-   "ampersand",        '&',
-   "apostrophe",       '\'',
-   "left-parenthesis", '(',
-   "right-parenthesis",    ')',
-   "asterisk", '*',
-   "plus-sign",    '+',
-   "comma",    ',',
-   "hyphen",   '-',
-   "hyphen-minus", '-',
-   "period",   '.',
-   "full-stop",    '.',
-   "slash",    '/',
-   "solidus",  '/',
-   "zero",     '0',
-   "one",      '1',
-   "two",      '2',
-   "three",    '3',
-   "four",     '4',
-   "five",     '5',
-   "six",      '6',
-   "seven",    '7',
-   "eight",    '8',
-   "nine",     '9',
-   "colon",    ':',
-   "semicolon",    ';',
-   "less-than-sign",   '<',
-   "equals-sign",      '=',
-   "greater-than-sign",    '>',
-   "question-mark",    '?',
-   "commercial-at",    '@',
-   "left-square-bracket",  '[',
-   "backslash",        '\\',
-   "reverse-solidus",  '\\',
-   "right-square-bracket", ']',
-   "circumflex",       '^',
-   "circumflex-accent",    '^',
-   "underscore",       '_',
-   "low-line",     '_',
-   "grave-accent",     '`',
-   "left-brace",       '{',
-   "left-curly-bracket",   '{',
-   "vertical-line",    '|',
-   "right-brace",      '}',
-   "right-curly-bracket",  '}',
-   "tilde",        '~',
-   "DEL",  '\177',
-   NULL,   0,
+   { "NUL",    '\0' },
+   { "SOH",    '\001' },
+   { "STX",    '\002' },
+   { "ETX",    '\003' },
+   { "EOT",    '\004' },
+   { "ENQ",    '\005' },
+   { "ACK",    '\006' },
+   { "BEL",    '\007' },
+   { "alert",  '\007' },
+   { "BS",     '\010' },
+   { "backspace",  '\b' },
+   { "HT",     '\011' },
+   { "tab",        '\t' },
+   { "LF",     '\012' },
+   { "newline",    '\n' },
+   { "VT",     '\013' },
+   { "vertical-tab",   '\v' },
+   { "FF",     '\014' },
+   { "form-feed",  '\f' },
+   { "CR",     '\015' },
+   { "carriage-return",    '\r' },
+   { "SO", '\016' },
+   { "SI", '\017' },
+   { "DLE",    '\020' },
+   { "DC1",    '\021' },
+   { "DC2",    '\022' },
+   { "DC3",    '\023' },
+   { "DC4",    '\024' },
+   { "NAK",    '\025' },
+   { "SYN",    '\026' },
+   { "ETB",    '\027' },
+   { "CAN",    '\030' },
+   { "EM", '\031' },
+   { "SUB",    '\032' },
+   { "ESC",    '\033' },
+   { "IS4",    '\034' },
+   { "FS", '\034' },
+   { "IS3",    '\035' },
+   { "GS", '\035' },
+   { "IS2",    '\036' },
+   { "RS", '\036' },
+   { "IS1",    '\037' },
+   { "US", '\037' },
+   { "space",      ' ' },
+   { "exclamation-mark",   '!' },
+   { "quotation-mark", '\"' },
+   { "number-sign",        '#' },
+   { "dollar-sign",        '$' },
+   { "percent-sign",       '%' },
+   { "ampersand",      '&' },
+   { "apostrophe",     '\'' },
+   { "left-parenthesis",   '(' },
+   { "right-parenthesis",  ')' },
+   { "asterisk",   '*' },
+   { "plus-sign",  '+' },
+   { "comma",  ',' },
+   { "hyphen", '-' },
+   { "hyphen-minus",   '-' },
+   { "period", '.' },
+   { "full-stop",  '.' },
+   { "slash",  '/' },
+   { "solidus",    '/' },
+   { "zero",       '0' },
+   { "one",        '1' },
+   { "two",        '2' },
+   { "three",  '3' },
+   { "four",       '4' },
+   { "five",       '5' },
+   { "six",        '6' },
+   { "seven",  '7' },
+   { "eight",  '8' },
+   { "nine",       '9' },
+   { "colon",  ':' },
+   { "semicolon",  ';' },
+   { "less-than-sign", '<' },
+   { "equals-sign",        '=' },
+   { "greater-than-sign",  '>' },
+   { "question-mark",  '?' },
+   { "commercial-at",  '@' },
+   { "left-square-bracket",    '[' },
+   { "backslash",      '\\' },
+   { "reverse-solidus",    '\\' },
+   { "right-square-bracket",   ']' },
+   { "circumflex",     '^' },
+   { "circumflex-accent",  '^' },
+   { "underscore",     '_' },
+   { "low-line",       '_' },
+   { "grave-accent",       '`' },
+   { "left-brace",     '{' },
+   { "left-curly-bracket", '{' },
+   { "vertical-line",  '|' },
+   { "right-brace",        '}' },
+   { "right-curly-bracket",    '}' },
+   { "tilde",      '~' },
+   { "DEL",    '\177' },
+   { NULL, 0, }
 };
+
diff -ru regex.orig/debug.c regex/debug.c
--- regex.orig/debug.c  Thu Jul 16 21:20:30 1998
+++ regex/debug.c   Tue Dec 18 18:44:44 2001
@@ -235,7 +235,7 @@
 {
    static char buf[10];
 
-   if (isprint(ch) || ch == ' ')
+   if (isprint((unsigned char)ch) || ch == ' ')
        sprintf(buf, "%c", ch);
    else
        sprintf(buf, "\\%o", ch);
diff -ru regex.orig/engine.c regex/engine.c
--- regex.orig/engine.c Thu Jul 16 21:20:30 1998
+++ regex/engine.c  Tue Dec 18 18:44:48 2001
@@ -1000,7 +1000,7 @@
 {
    static char pbuf[10];
 
-   if (isprint(ch) || ch == ' ')
+   if (isprint((unsigned char)ch) || ch == ' ')
        sprintf(pbuf, "%c", ch);
    else
        sprintf(pbuf, "\\%o", ch);
diff -ru regex.orig/regcomp.c regex/regcomp.c
--- regex.orig/regcomp.c    Sat Jun  9 23:50:56 2001
+++ regex/regcomp.c Tue Dec 18 18:51:12 2001
@@ -53,10 +53,10 @@
 #define    NEXTn(n)    (p->next += (n))
 #define    GETNEXT()   (*p->next++)
 #define    SETERROR(e) seterr(p, (e))
-#define    REQUIRE(co, e)  ((co) || SETERROR(e))
-#define    MUSTSEE(c, e)   (REQUIRE(MORE() && PEEK() == (c), e))
-#define    MUSTEAT(c, e)   (REQUIRE(MORE() && GETNEXT() == (c), e))
-#define    MUSTNOTSEE(c, e)    (REQUIRE(!MORE() || PEEK() != (c), e))
+#define    REQUIRE(co, e)  { if(!(co)) SETERROR(e); }
+#define    MUSTSEE(c, e)   REQUIRE(MORE() && PEEK() == (c), e)
+#define    MUSTEAT(c, e)   REQUIRE(MORE() && GETNEXT() == (c), e)
+#define    MUSTNOTSEE(c, e)    REQUIRE(!MORE() || PEEK() != (c), e)
 #define    EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
 #define    INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
 #define    AHEAD(pos)      dofwd(p, pos, HERE()-(pos))
@@ -312,7 +312,7 @@
        ordinary(p, c);
        break;
    case '{':       /* okay as ordinary except if digit follows */
-       REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT);
+       REQUIRE(!MORE() || !isdigit((unsigned char)PEEK()), REG_BADRPT);
        /* FALLTHROUGH */
    default:
        ordinary(p, c);
@@ -324,7 +324,7 @@
    c = PEEK();
    /* we call { a repetition if followed by a digit */
    if (!( c == '*' || c == '+' || c == '?' ||
-               (c == '{' && MORE2() && isdigit(PEEK2())) ))
+       (c == '{' && MORE2() && isdigit((unsigned char)PEEK2())) ))
        return;     /* no repetition, we're done */
    NEXT();
 
@@ -353,7 +353,7 @@
    case '{':
        count = p_count(p);
        if (EAT(',')) {
-           if (isdigit(PEEK())) {
+           if (isdigit((unsigned char)PEEK())) {
                count2 = p_count(p);
                REQUIRE(count <= count2, REG_BADBR);
            } else      /* single number with comma */
@@ -374,7 +374,7 @@
        return;
    c = PEEK();
    if (!( c == '*' || c == '+' || c == '?' ||
-               (c == '{' && MORE2() && isdigit(PEEK2())) ) )
+       (c == '{' && MORE2() && isdigit((unsigned char)PEEK2())) ) )
        return;
    SETERROR(REG_BADRPT);
 }
@@ -531,7 +531,7 @@
    } else if (EATTWO('\\', '{')) {
        count = p_count(p);
        if (EAT(',')) {
-           if (MORE() && isdigit(PEEK())) {
+           if (MORE() && isdigit((unsigned char)PEEK())) {
                count2 = p_count(p);
                REQUIRE(count <= count2, REG_BADBR);
            } else      /* single number with comma */
@@ -562,7 +562,7 @@
    register int count = 0;
    register int ndigits = 0;
 
-   while (MORE() && isdigit(PEEK()) && count <= DUPMAX) {
+   while (MORE() && isdigit((unsigned char)PEEK()) && count <= DUPMAX) {
        count = count*10 + (GETNEXT() - '0');
        ndigits++;
    }
@@ -617,7 +617,7 @@
        register int ci;
 
        for (i = p->g->csetsize - 1; i >= 0; i--)
-           if (CHIN(cs, i) && isalpha(i)) {
+           if (CHIN(cs, i) && isalpha((unsigned char)i)) {
                ci = othercase(i);
                if (ci != i)
                    CHadd(cs, ci);
@@ -729,7 +729,7 @@
    register char *u;
    register char c;
 
-   while (MORE() && isalpha(PEEK()))
+   while (MORE() && isalpha((unsigned char)PEEK()))
        NEXT();
    len = p->next - sp;
    for (cp = cclasses; cp->name != NULL; cp++)
@@ -822,11 +822,11 @@
 othercase(ch)
 int ch;
 {
-   assert(isalpha(ch));
-   if (isupper(ch))
-       return(tolower(ch));
-   else if (islower(ch))
-       return(toupper(ch));
+   assert(isalpha((unsigned char)ch));
+   if (isupper((unsigned char)ch))
+       return(tolower((unsigned char)ch));
+   else if (islower((unsigned char)ch))
+       return(toupper((unsigned char)ch));
    else            /* peculiar, but could happen */
        return(ch);
 }
@@ -865,11 +865,11 @@
 static void
 ordinary(p, ch)
 register struct parse *p;
-register int ch;
 {
    register cat_t *cap = p->g->categories;
 
-   if ((p->g->cflags&REG_ICASE) && isalpha(ch) && othercase(ch) != ch)
+   if ((p->g->cflags&REG_ICASE) && isalpha((unsigned char)ch) &&
+                           othercase(ch) != ch)
        bothcases(p, ch);
    else {
        EMIT(OCHAR, (unsigned char)ch);
@@ -1169,64 +1169,6 @@
 
    (void) strcpy(cs->multis + oldend - 1, cp);
    cs->multis[cs->smultis - 1] = '\0';
-}
-
-/*
- - mcsub - subtract a collating element from a cset
- == static void mcsub(register cset *cs, register char *cp);
- */
-static void
-mcsub(cs, cp)
-register cset *cs;
-register char *cp;
-{
-   register char *fp = mcfind(cs, cp);
-   register size_t len = strlen(fp);
-
-   assert(fp != NULL);
-   (void) memmove(fp, fp + len + 1,
-               cs->smultis - (fp + len + 1 - cs->multis));
-   cs->smultis -= len;
-
-   if (cs->smultis == 0) {
-       free(cs->multis);
-       cs->multis = NULL;
-       return;
-   }
-
-   cs->multis = realloc(cs->multis, cs->smultis);
-   assert(cs->multis != NULL);
-}
-
-/*
- - mcin - is a collating element in a cset?
- == static int mcin(register cset *cs, register char *cp);
- */
-static int
-mcin(cs, cp)
-register cset *cs;
-register char *cp;
-{
-   return(mcfind(cs, cp) != NULL);
-}
-
-/*
- - mcfind - find a collating element in a cset
- == static char *mcfind(register cset *cs, register char *cp);
- */
-static char *
-mcfind(cs, cp)
-register cset *cs;
-register char *cp;
-{
-   register char *p;
-
-   if (cs->multis == NULL)
-       return(NULL);
-   for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
-       if (strcmp(cp, p) == 0)
-           return(p);
-   return(NULL);
 }
 
 /*
diff -ru regex.orig/regcomp.ih regex/regcomp.ih
--- regex.orig/regcomp.ih   Fri Jul 17 23:32:04 1998
+++ regex/regcomp.ih    Tue Dec 18 18:00:12 2001
@@ -28,9 +28,6 @@
 static int firstch(register struct parse *p, register cset *cs);
 static int nch(register struct parse *p, register cset *cs);
 static void mcadd(register struct parse *p, register cset *cs, register char *cp);
-static void mcsub(register cset *cs, register char *cp);
-static int mcin(register cset *cs, register char *cp);
-static char *mcfind(register cset *cs, register char *cp);
 static void mcinvert(register struct parse *p, register cset *cs);
 static void mccase(register struct parse *p, register cset *cs);
 static int isinsets(register struct re_guts *g, int c);
diff -ru regex.orig/regerror.c regex/regerror.c
--- regex.orig/regerror.c   Thu Jul 16 21:20:30 1998
+++ regex/regerror.c    Tue Dec 18 17:52:34 2001
@@ -36,24 +36,24 @@
    char *name;
    char *explain;
 } rerrs[] = {
-   REG_OKAY,   "REG_OKAY", "no errors detected",
-   REG_NOMATCH,    "REG_NOMATCH",  "regexec() failed to match",
-   REG_BADPAT, "REG_BADPAT",   "invalid regular expression",
-   REG_ECOLLATE,   "REG_ECOLLATE", "invalid collating element",
-   REG_ECTYPE, "REG_ECTYPE",   "invalid character class",
-   REG_EESCAPE,    "REG_EESCAPE",  "trailing backslash (\\)",
-   REG_ESUBREG,    "REG_ESUBREG",  "invalid backreference number",
-   REG_EBRACK, "REG_EBRACK",   "brackets ([ ]) not balanced",
-   REG_EPAREN, "REG_EPAREN",   "parentheses not balanced",
-   REG_EBRACE, "REG_EBRACE",   "braces not balanced",
-   REG_BADBR,  "REG_BADBR",    "invalid repetition count(s)",
-   REG_ERANGE, "REG_ERANGE",   "invalid character range",
-   REG_ESPACE, "REG_ESPACE",   "out of memory",
-   REG_BADRPT, "REG_BADRPT",   "repetition-operator operand invalid",
-   REG_EMPTY,  "REG_EMPTY",    "empty (sub)expression",
-   REG_ASSERT, "REG_ASSERT",   "\"can't happen\" -- you found a bug",
-   REG_INVARG, "REG_INVARG",   "invalid argument to regex routine",
-   -1,     "",     "*** unknown regexp error code ***",
+   { REG_OKAY, "REG_OKAY", "no errors detected" },
+   { REG_NOMATCH,  "REG_NOMATCH",  "regexec() failed to match" },
+   { REG_BADPAT,   "REG_BADPAT",   "invalid regular expression" },
+   { REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" },
+   { REG_ECTYPE,   "REG_ECTYPE",   "invalid character class" },
+   { REG_EESCAPE,  "REG_EESCAPE",  "trailing backslash (\\)" },
+   { REG_ESUBREG,  "REG_ESUBREG",  "invalid backreference number" },
+   { REG_EBRACK,   "REG_EBRACK",   "brackets ([ ]) not balanced" },
+   { REG_EPAREN,   "REG_EPAREN",   "parentheses not balanced" },
+   { REG_EBRACE,   "REG_EBRACE",   "braces not balanced" },
+   { REG_BADBR,    "REG_BADBR",    "invalid repetition count(s)" },
+   { REG_ERANGE,   "REG_ERANGE",   "invalid character range" },
+   { REG_ESPACE,   "REG_ESPACE",   "out of memory" },
+   { REG_BADRPT,   "REG_BADRPT",   "repetition-operator operand invalid" },
+   { REG_EMPTY,    "REG_EMPTY",    "empty (sub)expression" },
+   { REG_ASSERT,   "REG_ASSERT",   "\"can't happen\" -- you found a bug" },
+   { REG_INVARG,   "REG_INVARG",   "invalid argument to regex routine" },
+   { -1,       "",     "*** unknown regexp error code ***" }
 };
 
 /*
diff -ru regex.orig/regex2.h regex/regex2.h
--- regex.orig/regex2.h Thu Jul 16 21:20:30 1998
+++ regex/regex2.h  Tue Dec 18 18:42:56 2001
@@ -132,4 +132,4 @@
 
 /* misc utilities */
 #define    OUT (CHAR_MAX+1)    /* a non-character value */
-#define    ISWORD(c)   (isalnum(c) || (c) == '_')
+#define    ISWORD(c)   (isalnum((unsigned char)c) || (c) == '_')
diff -ru regex.orig/regexec.c regex/regexec.c
--- regex.orig/regexec.c    Thu Jul 16 21:20:30 1998
+++ regex/regexec.c Tue Dec 18 17:50:12 2001
@@ -17,7 +17,9 @@
 #include "utils.h"
 #include "regex2.h"
 
+#ifndef NDEBUG
 static int nope = 0;       /* for use in asserts; shuts lint up */
+#endif
 
 /* macros for manipulating states, small version */
 #define    states  unsigned

---------6f340302123118bb6f340302123118bb
Content-Type: text/plain; charset=iso-8859-1; name="regex.nls2.diff"
Content-Transfer-Encoding: 8bit
Content-Disposition: inline; filename="regex.nls2.diff"
Content-Description: regex.nls2.diff

--- regex/cclass.orig   Tue Dec 18 19:03:20 2001
+++ regex/cclass.h  Tue Dec 18 19:07:00 2001
@@ -1,32 +1,26 @@
 /* Copyright (C) 1998 DJ Delorie, see COPYING.DJ for details */
 /* character-class table */
+typedef enum {
+  CC_ALNUM, CC_ALPHA, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH, CC_LOWER,
+  CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT
+} cclasstype;
+
 static struct cclass {
    char *name;
-   char *chars;
+   cclasstype type;
    char *multis;
 } cclasses[] = {
-   { "alnum",  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
-0123456789",               "" },
-   { "alpha",  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
-                   "" },
-   { "blank",  " \t",      "" },
-   { "cntrl",  "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
-\25\26\27\30\31\32\33\34\35\36\37\177",    "" },
-   { "digit",  "0123456789",   "" },
-   { "graph",  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
-0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
-                   "" },
-   { "lower",  "abcdefghijklmnopqrstuvwxyz",
-                   "" },
-   { "print",  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
-0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
-                   "" },
-   { "punct",  "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
-                   "" },
-   { "space",  "\t\n\v\f\r ",  "" },
-   { "upper",  "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
-                   "" },
-   { "xdigit", "0123456789ABCDEFabcdef",
-                   "" },
+   { "alnum",  CC_ALNUM,   "" },
+   { "alpha",  CC_ALPHA,   "" },
+   { "blank",  CC_BLANK,   "" },
+   { "cntrl",  CC_CNTRL,   "" },
+   { "digit",  CC_DIGIT,   "" },
+   { "graph",  CC_GRAPH,   "" },
+   { "lower",  CC_LOWER,   "" },
+   { "print",  CC_PRINT,   "" },
+   { "punct",  CC_PUNCT,   "" },
+   { "space",  CC_SPACE,   "" },
+   { "upper",  CC_UPPER,   "" },
+   { "xdigit", CC_XDIGIT,  "" },
    { NULL,     0,      "" }
 };
--- regex/regcomp.orig  Tue Dec 18 18:51:12 2001
+++ regex/regcomp.c Tue Dec 18 19:22:26 2001
@@ -727,7 +727,7 @@
    register struct cclass *cp;
    register size_t len;
    register char *u;
-   register char c;
+   register int c;
 
    while (MORE() && isalpha((unsigned char)PEEK()))
        NEXT();
@@ -741,9 +741,73 @@
        return;
    }
 
-   u = cp->chars;
-   while ((c = *u++) != '\0')
-       CHadd(cs, c);
+   switch(cp->type) {
+   case CC_ALNUM:
+       for (c = CHAR_MIN; c <= CHAR_MAX; c++)
+           if(isalnum((unsigned char)c))
+               CHadd(cs, c);
+       break;
+   case CC_ALPHA:
+       for (c = CHAR_MIN; c <= CHAR_MAX; c++)
+           if(isalpha((unsigned char)c))
+               CHadd(cs, c);
+       break;
+   case CC_BLANK:
+       /*
+        * According to the documentation "blank" is the same as
+        * "space", but original code defines only space and tab
+        * characters. Who is right? 
+        */
+       for (c = CHAR_MIN; c <= CHAR_MAX; c++)
+           if(isspace((unsigned char)c))
+               CHadd(cs, c);
+       break;
+   case CC_CNTRL:
+       for (c = CHAR_MIN; c <= CHAR_MAX; c++)
+           if(iscntrl((unsigned char)c))
+               CHadd(cs, c);
+       break;
+   case CC_DIGIT:
+       for (c = CHAR_MIN; c <= CHAR_MAX; c++)
+           if(isdigit((unsigned char)c))
+               CHadd(cs, c);
+       break;
+   case CC_GRAPH:
+       for (c = CHAR_MIN; c <= CHAR_MAX; c++)
+           if(isgraph((unsigned char)c))
+               CHadd(cs, c);
+       break;
+   case CC_LOWER:
+       for (c = CHAR_MIN; c <= CHAR_MAX; c++)
+           if(islower((unsigned char)c))
+               CHadd(cs, c);
+       break;
+   case CC_PRINT:
+       for (c = CHAR_MIN; c <= CHAR_MAX; c++)
+           if(isprint((unsigned char)c))
+               CHadd(cs, c);
+       break;
+   case CC_PUNCT:
+       for (c = CHAR_MIN; c <= CHAR_MAX; c++)
+           if(ispunct((unsigned char)c))
+               CHadd(cs, c);
+       break;
+   case CC_SPACE:
+       for (c = CHAR_MIN; c <= CHAR_MAX; c++)
+           if(isspace((unsigned char)c))
+               CHadd(cs, c);
+       break;
+   case CC_UPPER:
+       for (c = CHAR_MIN; c <= CHAR_MAX; c++)
+           if(isupper((unsigned char)c))
+               CHadd(cs, c);
+       break;
+   case CC_XDIGIT:
+       for (c = CHAR_MIN; c <= CHAR_MAX; c++)
+           if(isxdigit((unsigned char)c))
+               CHadd(cs, c);
+       break;
+   }
    for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
        MCadd(p, cs, u);
 }

---------6f340302123118bb6f340302123118bb--

- Raw text -


  webmaster     delorie software   privacy  
  Copyright © 2019   by DJ Delorie     Updated Jul 2019