From ilya Thu Jun 26 14:25:18 1997 Subject: Jumbo patch for RE wrt 5.004_01 To: perl5-porters@perl.com (Mailing list Perl5) Date: Thu, 26 Jun 1997 14:25:18 -0400 (EDT) X-Mailer: ELM [version 2.5 PL0b1] Content-Length: 128525 Status: OR This patch provides only an additional bug fix (for a bug introduced by new optimizations) and an additional bug test if you compare it to the previous patches, but it should apply clean to 5.004_01. Description is still the same: a) Deep optimizations for constant substrings; b) Extraneous copying of matched string removed; c) PMf_ALL reanabled again (no, contrary to what I sayd before, /blah/ *was not* optimized to "index * blah", the code was there, but nobody set the flag); d) study should improve things; e) regexps redone to use structs; f) A lot of info moved from PM to REGEXP (possible binary non-compatibility); g) Signature of regexec changed (source uncompatibility with REGEXP modules around). h) data for $ is now allocated atomically, so (a\1?)* works, and $2 is never of negative length). i) Slightly better -Dr output. j) Optimization of s/longer/short/ (without //g) even if copying the string. k) Constant-length lookbehind added. l) BRANCH removed if not needed. m) Fixed: (a)+ used to produce both fixed and floating const string "a". n) (?:a)+ optimized to the same form as a+. o) New opcode CURLYM for (A)* which does not need backtracking and storage of additional info. p) NOTHINGs are optimized in tails of interators to enable next-char optimization. q) New opcode CURLYN for (simple)*; r) CURLYM is modified to accept (moderate)*; s) (?i) and friends localized in the next surrounding block. t) Peephole optimizer for NOTHING nodes added. u) Peephole optimizer for EXACT* nodes added. v) \1 matches as an empty string if the first group did not match yet. w) Chip's patch to case-fold \1. ============================================ a) one extra field removed from PMOP (was left due to an oversight); b) SvTAIL handling improved (bugs/enhancements); c) new test file check what happens without $& $` $'; d) study in regexp.t was studying a wrong variable; e) `noscan' handling detects more zero-length codes (bugs); f) Warnings if max quantifier value (32767) reached; g) Very primitive (?{...}) - works only inside eval "", only the simplest cases tested in the test suite; h) Sniffer for constant strings had bugs when a fixed-number {}-modifier was following another *+{} (bugs). g) Yet newer test cases. Apply with touch t/op/regexp_noamp.t chmod a+x t/op/regexp_noamp.t patch -p1 < ... Enjoy, Ilya touch t/op/regexp_noamp.t chmod +x t/op/regexp_noamp.t exit 0 diff -pru perl5.004_01.os2/dump.c perl5.004_01.my/dump.c --- perl5.004_01.os2/dump.c Tue May 13 11:25:54 1997 +++ perl5.004_01.my/dump.c Sat Jun 14 21:03:50 1997 @@ -363,19 +363,12 @@ register PMOP *pm; dump("PMf_REPL = "); dump_op(pm->op_pmreplroot); } - if (pm->op_pmshort) { - dump("PMf_SHORT = %s\n",SvPEEK(pm->op_pmshort)); - } if (pm->op_pmflags) { SV *tmpsv = newSVpv("", 0); if (pm->op_pmflags & PMf_USED) sv_catpv(tmpsv, ",USED"); if (pm->op_pmflags & PMf_ONCE) sv_catpv(tmpsv, ",ONCE"); - if (pm->op_pmflags & PMf_SCANFIRST) - sv_catpv(tmpsv, ",SCANFIRST"); - if (pm->op_pmflags & PMf_ALL) - sv_catpv(tmpsv, ",ALL"); if (pm->op_pmflags & PMf_SKIPWHITE) sv_catpv(tmpsv, ",SKIPWHITE"); if (pm->op_pmflags & PMf_CONST) diff -pru perl5.004_01.os2/global.sym perl5.004_01.my/global.sym --- perl5.004_01.os2/global.sym Mon Jun 9 16:52:04 1997 +++ perl5.004_01.my/global.sym Sat Jun 14 21:11:52 1997 @@ -50,6 +50,7 @@ dec_amg di div_amg div_ass_amg +doeval do_undump ds egid diff -pru perl5.004_01.os2/MANIFEST perl5.004_01.my/MANIFEST --- perl5.004_01.os2/MANIFEST Thu Jun 12 12:32:44 1997 +++ perl5.004_01.my/MANIFEST Sat Jun 14 21:11:52 1997 @@ -738,6 +738,7 @@ t/op/readdir.t See if readdir() works t/op/recurse.t See if deep recursion works t/op/ref.t See if refs and objects work t/op/regexp.t See if regular expressions work +t/op/regexp_noamp.t See if regular expressions work with optimizations t/op/repeat.t See if x operator works t/op/runlevel.t See if die() works from perl_call_*() t/op/sleep.t See if sleep works diff -pru perl5.004_01.os2/op.c perl5.004_01.my/op.c --- perl5.004_01.os2/op.c Fri Jun 6 18:24:12 1997 +++ perl5.004_01.my/op.c Sat Jun 14 21:03:50 1997 @@ -521,7 +521,6 @@ OP *op; case OP_PUSHRE: case OP_MATCH: pregfree(cPMOP->op_pmregexp); - SvREFCNT_dec(cPMOP->op_pmshort); break; } @@ -2024,7 +2023,6 @@ OP *repl; pm->op_pmregexp = pregcomp(p, p + plen, pm); if (strEQ("\\s+", pm->op_pmregexp->precomp)) pm->op_pmflags |= PMf_WHITE; - hoistmust(pm); op_free(expr); } else { @@ -4459,10 +4457,6 @@ OP *op; kid->op_sibling = sibl; } pm = (PMOP*)kid; - if (pm->op_pmshort && !(pm->op_pmflags & PMf_ALL)) { - SvREFCNT_dec(pm->op_pmshort); /* can't use substring to optimize */ - pm->op_pmshort = 0; - } kid->op_type = OP_PUSHRE; kid->op_ppaddr = ppaddr[OP_PUSHRE]; diff -pru perl5.004_01.os2/op.h perl5.004_01.my/op.h --- perl5.004_01.os2/op.h Tue May 13 10:59:40 1997 +++ perl5.004_01.my/op.h Sat Jun 14 21:11:52 1997 @@ -173,16 +173,14 @@ struct pmop { OP * op_pmreplstart; PMOP * op_pmnext; /* list of all scanpats */ REGEXP * op_pmregexp; /* compiled expression */ - SV * op_pmshort; /* for a fast bypass of execute() */ U16 op_pmflags; U16 op_pmpermflags; - char op_pmslen; }; #define PMf_USED 0x0001 /* pm has been used once already */ #define PMf_ONCE 0x0002 /* use pattern only once per reset */ -#define PMf_SCANFIRST 0x0004 /* initial constant not anchored */ -#define PMf_ALL 0x0008 /* initial constant is whole pat */ +/* #define PMf_SCANFIRST 0x0004 */ /* initial constant not anchored */ +/* #define PMf_ALL 0x0008 */ /* initial constant is whole pat */ #define PMf_SKIPWHITE 0x0010 /* skip leading whitespace for split */ #define PMf_FOLD 0x0020 /* case insensitivity */ #define PMf_CONST 0x0040 /* subst replacement is constant */ diff -pru perl5.004_01.os2/perl.h perl5.004_01.my/perl.h --- perl5.004_01.os2/perl.h Mon Jun 9 16:48:58 1997 +++ perl5.004_01.my/perl.h Sat Jun 14 21:03:50 1997 @@ -1668,7 +1668,7 @@ EXT char * regprecomp; /* uncompiled str EXT char * regparse; /* Input-scan pointer. */ EXT char * regxend; /* End of input for compile */ EXT I32 regnpar; /* () count. */ -EXT char * regcode; /* Code-emit pointer; ®dummy = don't. */ +EXT regnode * regcode; /* Code-emit pointer; ®dummy = don't. */ EXT I32 regsize; /* Code size. */ EXT I32 regnaughty; /* How bad is this pattern? */ EXT I32 regsawback; /* Did we see \1, ...? */ diff -pru perl5.004_01.os2/pod/perlre.pod perl5.004_01.my/pod/perlre.pod --- perl5.004_01.os2/pod/perlre.pod Thu May 8 09:26:24 1997 +++ perl5.004_01.my/pod/perlre.pod Sat Jun 14 21:18:10 1997 @@ -288,6 +288,24 @@ easier just to say: if (/foo/ && $` =~ /bar$/) +For lookbehind see below. + +=item (?<=regexp) + +A zero-width positive lookbehind assertion. For example, C +matches a word following a tab, without including the tab in C<$&>. +Works only for fixed-width lookbehind. + +=item (? +matches any occurrence of "foo" that isn't following "bar". +Works only for fixed-width lookbehind. + +=item (?{ code }) + +Experimental "execute any Perl code" zero-length assertion. Always +succeeds. Currently works only for regexps defined inside C. =item (?imsx) diff -pru perl5.004_01.os2/pp.c perl5.004_01.my/pp.c --- perl5.004_01.os2/pp.c Thu Jun 12 13:11:14 1997 +++ perl5.004_01.my/pp.c Sat Jun 14 21:11:54 1997 @@ -3978,10 +3978,12 @@ PP(pp_split) s = m; } } - else if (pm->op_pmshort && !rx->nparens) { - i = SvCUR(pm->op_pmshort); - if (i == 1) { - i = *SvPVX(pm->op_pmshort); + else if (rx->check_substr && !rx->nparens + && (rx->reganch & ROPT_CHECK_ALL) + && !(rx->reganch & ROPT_ANCH)) { + i = SvCUR(rx->check_substr); + if (i == 1 && !SvTAIL(rx->check_substr)) { + i = *SvPVX(rx->check_substr); while (--limit) { /*SUPPRESS 530*/ for (m = s; m < strend && *m != i; m++) ; @@ -3999,7 +4001,7 @@ PP(pp_split) #ifndef lint while (s < strend && --limit && (m=fbm_instr((unsigned char*)s, (unsigned char*)strend, - pm->op_pmshort)) ) + rx->check_substr)) ) #endif { dstr = NEWSV(31, m-s); @@ -4014,7 +4016,7 @@ PP(pp_split) else { maxiters += (strend - s) * rx->nparens; while (s < strend && --limit && - pregexec(rx, s, strend, orig, 1, Nullsv, TRUE)) + pregexec(rx, s, strend, orig, 1, Nullsv, NULL, 0)) { TAINT_IF(rx->exec_tainted); if (rx->subbase diff -pru perl5.004_01.os2/pp_ctl.c perl5.004_01.my/pp_ctl.c --- perl5.004_01.os2/pp_ctl.c Tue Jun 10 09:18:04 1997 +++ perl5.004_01.my/pp_ctl.c Sat Jun 14 21:11:54 1997 @@ -26,7 +26,6 @@ #define DOCATCH(o) ((CATCH_GET == TRUE) ? docatch(o) : (o)) static OP *docatch _((OP *o)); -static OP *doeval _((int gimme)); static OP *dofindlabel _((OP *op, char *label, OP **opstack, OP **oplimit)); static void doparseform _((SV *sv)); static I32 dopoptoeval _((I32 startingblock)); @@ -95,7 +94,6 @@ PP(pp_regcomp) { if (pm->op_pmflags & PMf_KEEP) { pm->op_private &= ~OPpRUNTIME; /* no point compiling again */ - hoistmust(pm); cLOGOP->op_first->op_next = op->op_next; } RETURN; @@ -124,7 +122,8 @@ PP(pp_substcont) /* Are we done */ if (cx->sb_once || !pregexec(rx, s, cx->sb_strend, orig, - s == m, Nullsv, cx->sb_safebase)) + s == m, Nullsv, NULL, + cx->sb_safebase ? 0 : REXEC_COPY)) { SV *targ = cx->sb_targ; sv_catpvn(dstr, s, cx->sb_strend - s); @@ -2094,9 +2093,10 @@ OP *o; return Nullop; } -static OP * -doeval(gimme) +OP * +doeval(gimme, startp) int gimme; +OP**startp; { dSP; OP *saveop = op; @@ -2138,10 +2138,11 @@ int gimme; av_store(comppadlist, 1, (SV*)comppad); CvPADLIST(compcv) = comppadlist; - if (saveop->op_type != OP_REQUIRE) + if (!saveop || saveop->op_type != OP_REQUIRE) CvOUTSIDE(compcv) = (CV*)SvREFCNT_inc(caller); - SAVEFREESV(compcv); + if (!startp) + SAVEFREESV(compcv); /* make sure we compile in the right package */ @@ -2162,7 +2163,7 @@ int gimme; curcop->cop_arybase = 0; SvREFCNT_dec(rs); rs = newSVpv("\n", 1); - if (saveop->op_flags & OPf_SPECIAL) + if (saveop && saveop->op_flags & OPf_SPECIAL) in_eval |= 4; else sv_setpv(GvSV(errgv),""); @@ -2178,14 +2179,19 @@ int gimme; eval_root = Nullop; } SP = stack_base + POPMARK; /* pop original mark */ - POPBLOCK(cx,curpm); - POPEVAL(cx); - pop_return(); + if (!startp) { + POPBLOCK(cx,curpm); + POPEVAL(cx); + pop_return(); + } lex_end(); LEAVE; if (optype == OP_REQUIRE) { char* msg = SvPVx(GvSV(errgv), na); DIE("%s", *msg ? msg : "Compilation failed in require"); + } else if (startp) { + char* msg = SvPVx(GvSV(errgv), na); + croak("Compilation failed in regexp: %s", (*msg ? msg : "???")); } SvREFCNT_dec(rs); rs = SvREFCNT_inc(nrs); @@ -2194,7 +2200,10 @@ int gimme; SvREFCNT_dec(rs); rs = SvREFCNT_inc(nrs); compiling.cop_line = 0; - SAVEFREEOP(eval_root); + if (startp) + *startp = eval_root; + else + SAVEFREEOP(eval_root); if (gimme & G_VOID) scalarvoid(eval_root); else if (gimme & G_ARRAY) @@ -2205,7 +2214,7 @@ int gimme; DEBUG_x(dump_eval()); /* Register with debugger: */ - if (perldb && saveop->op_type == OP_REQUIRE) { + if (perldb && saveop && saveop->op_type == OP_REQUIRE) { CV *cv = perl_get_cv("DB::postponed", FALSE); if (cv) { dSP; @@ -2355,7 +2364,7 @@ PP(pp_require) compiling.cop_line = 0; PUTBACK; - return DOCATCH(doeval(G_SCALAR)); + return DOCATCH(doeval(G_SCALAR, NULL)); } PP(pp_dofile) @@ -2407,7 +2416,7 @@ PP(pp_entereval) if (perldb && curstash != debstash) save_lines(GvAV(compiling.cop_filegv), linestr); PUTBACK; - ret = doeval(gimme); + ret = doeval(gimme, NULL); if (perldb && was != sub_generation) { /* Some subs defined here. */ strcpy(safestr, "_<(eval )"); /* Anything fake and short. */ } diff -pru perl5.004_01.os2/pp_hot.c perl5.004_01.my/pp_hot.c --- perl5.004_01.os2/pp_hot.c Fri Jun 6 13:45:32 1997 +++ perl5.004_01.my/pp_hot.c Sat Jun 14 21:11:54 1997 @@ -804,6 +804,7 @@ PP(pp_match) TAINT_NOT; if (pm->op_pmflags & PMf_USED) { + failure: if (gimme == G_ARRAY) RETURN; RETPUSHNO; @@ -813,6 +814,8 @@ PP(pp_match) pm = curpm; rx = pm->op_pmregexp; } + if (rx->minlen > len) goto failure; + truebase = t = s; if (global = pm->op_pmflags & PMf_GLOBAL) { rx->startp[0] = 0; @@ -829,6 +832,7 @@ PP(pp_match) gimme = G_SCALAR; /* accidental array context? */ safebase = (((gimme == G_ARRAY) || global || !rx->nparens) && !sawampersand); + safebase = safebase ? 0 : REXEC_COPY ; if (pm->op_pmflags & (PMf_MULTILINE|PMf_SINGLELINE)) { SAVEINT(multiline); multiline = pm->op_pmflags & PMf_MULTILINE; @@ -842,43 +846,50 @@ play_it_again: if (update_minmatch++) minmatch = (s == rx->startp[0]); } - if (pm->op_pmshort) { - if (pm->op_pmflags & PMf_SCANFIRST) { + if (rx->check_substr) { + if (!(rx->reganch & ROPT_CHECK_AT_START)) { if (SvSCREAM(TARG)) { - if (screamfirst[BmRARE(pm->op_pmshort)] < 0) + I32 p = -1; + + if (screamfirst[BmRARE(rx->check_substr)] < 0) goto nope; - else if (!(s = screaminstr(TARG, pm->op_pmshort))) + else if (!(s = screaminstr(TARG, rx->check_substr, + rx->check_offset_min, 0, &p, 0))) goto nope; - else if (pm->op_pmflags & PMf_ALL) + else if ((rx->reganch & ROPT_CHECK_ALL) + && !sawampersand && !SvTAIL(rx->check_substr)) goto yup; } - else if (!(s = fbm_instr((unsigned char*)s, - (unsigned char*)strend, pm->op_pmshort))) + else if (!(s = fbm_instr((unsigned char*)s + rx->check_offset_min, + (unsigned char*)strend, + rx->check_substr))) goto nope; - else if (pm->op_pmflags & PMf_ALL) + else if ((rx->reganch & ROPT_CHECK_ALL) && !sawampersand) goto yup; - if (s && rx->regback >= 0) { - ++BmUSEFUL(pm->op_pmshort); - s -= rx->regback; - if (s < t) - s = t; + if (s && rx->check_offset_max < t - s) { + ++BmUSEFUL(rx->check_substr); + s -= rx->check_offset_max; } else s = t; } - else if (!multiline) { - if (*SvPVX(pm->op_pmshort) != *s - || (pm->op_pmslen > 1 - && memNE(SvPVX(pm->op_pmshort), s, pm->op_pmslen))) + else if (!multiline) { /* Anchored near beginning of string. */ + I32 slen; + if (*SvPVX(rx->check_substr) != s[rx->check_offset_min] + || ((slen = SvCUR(rx->check_substr)) > 1 + && memNE(SvPVX(rx->check_substr), + s + rx->check_offset_min, slen))) goto nope; } - if (!rx->naughty && --BmUSEFUL(pm->op_pmshort) < 0) { - SvREFCNT_dec(pm->op_pmshort); - pm->op_pmshort = Nullsv; /* opt is being useless */ + if (!rx->naughty && --BmUSEFUL(rx->check_substr) < 0 + && rx->check_substr == rx->float_substr) { + SvREFCNT_dec(rx->check_substr); + rx->check_substr = Nullsv; /* opt is being useless */ + rx->float_substr = Nullsv; } } if (pregexec(rx, s, strend, truebase, minmatch, - SvSCREAM(TARG) ? TARG : Nullsv, safebase)) + SvSCREAM(TARG) ? TARG : Nullsv, NULL, safebase)) { curpm = pm; if (pm->op_pmflags & PMf_ONCE) @@ -940,9 +951,9 @@ play_it_again: RETPUSHYES; } -yup: +yup: /* Confirmed by check_substr */ TAINT_IF(rx->exec_tainted); - ++BmUSEFUL(pm->op_pmshort); + ++BmUSEFUL(rx->check_substr); curpm = pm; if (pm->op_pmflags & PMf_ONCE) pm->op_pmflags |= PMf_USED; @@ -952,7 +963,7 @@ yup: rx->subbeg = truebase; rx->subend = strend; rx->startp[0] = s; - rx->endp[0] = s + SvCUR(pm->op_pmshort); + rx->endp[0] = s + SvCUR(rx->check_substr); goto gotcha; } if (sawampersand) { @@ -962,14 +973,14 @@ yup: rx->subbeg = tmps; rx->subend = tmps + (strend-t); tmps = rx->startp[0] = tmps + (s - t); - rx->endp[0] = tmps + SvCUR(pm->op_pmshort); + rx->endp[0] = tmps + SvCUR(rx->check_substr); } LEAVE_SCOPE(oldsave); RETPUSHYES; nope: - if (pm->op_pmshort) - ++BmUSEFUL(pm->op_pmshort); + if (rx->check_substr) + ++BmUSEFUL(rx->check_substr); ret_no: if (global && !(pm->op_pmflags & PMf_CONTINUE)) { @@ -1431,6 +1442,7 @@ PP(pp_subst) STRLEN len; int force_on_match = 0; I32 oldsave = savestack_ix; + I32 update_minmatch = 1; /* known replacement string? */ dstr = (pm->op_pmflags & PMf_CONST) ? POPs : Nullsv; @@ -1460,41 +1472,46 @@ PP(pp_subst) pm = curpm; rx = pm->op_pmregexp; } - safebase = (!rx->nparens && !sawampersand); + safebase = (!rx->nparens && !sawampersand) ? 0 : REXEC_COPY; if (pm->op_pmflags & (PMf_MULTILINE|PMf_SINGLELINE)) { SAVEINT(multiline); multiline = pm->op_pmflags & PMf_MULTILINE; } orig = m = s; - if (pm->op_pmshort) { - if (pm->op_pmflags & PMf_SCANFIRST) { + if (rx->check_substr) { + if (!(rx->reganch & ROPT_CHECK_AT_START)) { if (SvSCREAM(TARG)) { - if (screamfirst[BmRARE(pm->op_pmshort)] < 0) + I32 p = -1; + + if (screamfirst[BmRARE(rx->check_substr)] < 0) goto nope; - else if (!(s = screaminstr(TARG, pm->op_pmshort))) + else if (!(s = screaminstr(TARG, rx->check_substr, rx->check_offset_min, 0, &p, 0))) goto nope; } - else if (!(s = fbm_instr((unsigned char*)s, (unsigned char*)strend, - pm->op_pmshort))) + else if (!(s = fbm_instr((unsigned char*)s + rx->check_offset_min, + (unsigned char*)strend, + rx->check_substr))) goto nope; - if (s && rx->regback >= 0) { - ++BmUSEFUL(pm->op_pmshort); - s -= rx->regback; - if (s < m) - s = m; + if (s && rx->check_offset_max < s - m) { + ++BmUSEFUL(rx->check_substr); + s -= rx->check_offset_max; } else s = m; } - else if (!multiline) { - if (*SvPVX(pm->op_pmshort) != *s - || (pm->op_pmslen > 1 - && memNE(SvPVX(pm->op_pmshort), s, pm->op_pmslen))) + else if (!multiline) { /* Anchored at beginning of string. */ + I32 slen; + if (*SvPVX(rx->check_substr) != s[rx->check_offset_min] + || ((slen = SvCUR(rx->check_substr)) > 1 + && memNE(SvPVX(rx->check_substr), + s + rx->check_offset_min, slen))) goto nope; } - if (!rx->naughty && --BmUSEFUL(pm->op_pmshort) < 0) { - SvREFCNT_dec(pm->op_pmshort); - pm->op_pmshort = Nullsv; /* opt is being useless */ + if (!rx->naughty && --BmUSEFUL(rx->check_substr) < 0 + && rx->check_substr == rx->float_substr) { + SvREFCNT_dec(rx->check_substr); + rx->check_substr = Nullsv; /* opt is being useless */ + rx->float_substr = Nullsv; } } @@ -1505,9 +1522,9 @@ PP(pp_subst) c = dstr ? SvPV(dstr, clen) : Nullch; /* can do inplace substitution? */ - if (c && clen <= rx->minlen && safebase) { + if (c && clen <= rx->minlen && (once || !(safebase & REXEC_COPY))) { if (! pregexec(rx, s, strend, orig, 0, - SvSCREAM(TARG) ? TARG : Nullsv, safebase)) { + SvSCREAM(TARG) ? TARG : Nullsv, NULL, safebase)) { PUSHs(&sv_no); LEAVE_SCOPE(oldsave); RETURN; @@ -1522,8 +1539,13 @@ PP(pp_subst) SvSCREAM_off(TARG); /* disable possible screamer */ if (once) { rxtainted = rx->exec_tainted; - m = rx->startp[0]; - d = rx->endp[0]; + if (rx->subbase) { + m = orig + (rx->startp[0] - rx->subbase); + d = orig + (rx->endp[0] - rx->subbase); + } else { + m = rx->startp[0]; + d = rx->endp[0]; + } s = orig; if (m - s > strend - d) { /* faster to shorten from end */ if (clen) { @@ -1579,7 +1601,7 @@ PP(pp_subst) } s = rx->endp[0]; } while (pregexec(rx, s, strend, orig, s == m, - Nullsv, TRUE)); /* don't match same null twice */ + Nullsv, NULL, 0)); /* don't match same null twice */ if (s != d) { i = strend - s; SvCUR_set(TARG, d - SvPVX(TARG) + i); @@ -1596,7 +1618,7 @@ PP(pp_subst) } if (pregexec(rx, s, strend, orig, 0, - SvSCREAM(TARG) ? TARG : Nullsv, safebase)) { + SvSCREAM(TARG) ? TARG : Nullsv, NULL, safebase)) { if (force_on_match) { force_on_match = 0; s = SvPV_force(TARG, len); @@ -1629,7 +1651,7 @@ PP(pp_subst) sv_catpvn(dstr, c, clen); if (once) break; - } while (pregexec(rx, s, strend, orig, s == m, Nullsv, safebase)); + } while (pregexec(rx, s, strend, orig, s == m, Nullsv, NULL, safebase)); sv_catpvn(dstr, s, strend - s); TAINT_IF(rxtainted); @@ -1652,7 +1674,7 @@ PP(pp_subst) goto ret_no; nope: - ++BmUSEFUL(pm->op_pmshort); + ++BmUSEFUL(rx->check_substr); ret_no: PUSHs(&sv_no); diff -pru perl5.004_01.os2/proto.h perl5.004_01.my/proto.h --- perl5.004_01.os2/proto.h Tue Jun 10 08:40:06 1997 +++ perl5.004_01.my/proto.h Sat Jun 14 21:11:54 1997 @@ -68,6 +68,7 @@ char* delimcpy _((char* to, char* toend, void deprecate _((char* s)); OP* die _((const char* pat,...)); OP* die_where _((char* message)); +OP* doeval _((int gimme, OP** startp)); void dounwind _((I32 cxix)); bool do_aexec _((SV* really, SV** mark, SV** sp)); void do_chop _((SV* asv, SV* sv)); @@ -144,7 +145,6 @@ void gv_init _((GV* gv, HV* stash, char* HV* gv_stashpv _((char* name, I32 create)); HV* gv_stashpvn _((char* name, U32 namelen, I32 create)); HV* gv_stashsv _((SV* sv, I32 create)); -void hoistmust _((PMOP* pm)); void hv_clear _((HV* tb)); void hv_delayfree_ent _((HV* hv, HE* entry)); SV* hv_delete _((HV* tb, char* key, U32 klen, I32 flags)); @@ -371,10 +371,10 @@ regexp* pregcomp _((char* exp, char* xen OP* ref _((OP* op, I32 type)); OP* refkids _((OP* op, I32 type)); void regdump _((regexp* r)); -I32 pregexec _((regexp* prog, char* stringarg, char* strend, char* strbeg, I32 minend, SV* screamer, I32 safebase)); +I32 pregexec _((regexp* prog, char* stringarg, char* strend, char* strbeg, I32 minend, SV* screamer, void* data, U32 flags)); void pregfree _((struct regexp* r)); -char* regnext _((char* p)); -void regprop _((SV* sv, char* op)); +regnode*regnext _((regnode* p)); +void regprop _((SV* sv, regnode* op)); void repeatcpy _((char* to, char* from, I32 len, I32 count)); char* rninstr _((char* big, char* bigend, char* little, char* lend)); Sighandler_t rsignal _((int, Sighandler_t)); @@ -425,7 +425,7 @@ UV scan_hex _((char* start, I32 len, I32 char* scan_num _((char* s)); UV scan_oct _((char* start, I32 len, I32* retlen)); OP* scope _((OP* o)); -char* screaminstr _((SV* bigsv, SV* littlesv)); +char* screaminstr _((SV* bigsv, SV* littlesv, I32 start_shift, I32 end_shift, I32 *state, I32 last)); #ifndef VMS I32 setenv_getix _((char* nam)); #endif --- ../perl5.004_01.os2/regcomp.c Fri Jun 6 16:42:22 1997 +++ ./regcomp.c Mon Jun 23 01:13:30 1997 @@ -96,22 +96,497 @@ * Forward declarations for pregcomp()'s friends. */ -static char *reg _((I32, I32 *)); -static char *reganode _((char, unsigned short)); -static char *regatom _((I32 *)); -static char *regbranch _((I32 *)); -static void regc _((char)); -static char *regclass _((void)); +static regnode *reg _((I32, I32 *)); +static regnode *reganode _((U8, U32)); +static regnode *regatom _((I32 *)); +static regnode *regbranch _((I32 *, I32)); +static void regc _((U8, char *)); +static regnode *regclass _((void)); STATIC I32 regcurly _((char *)); -static char *regnode _((char)); -static char *regpiece _((I32 *)); -static void reginsert _((char, char *)); -static void regoptail _((char *, char *)); +static regnode *reg_node _((U8)); +static regnode *regpiece _((I32 *)); +static void reginsert _((U8, regnode *)); +static void regoptail _((regnode *, regnode *)); static void regset _((char *, I32)); -static void regtail _((char *, char *)); +static void regtail _((regnode *, regnode *)); static char* regwhite _((char *, char *)); static char* nextchar _((void)); +static seen_gpos; +static seen_zerolen; +static regexp *rx; + +/* Length of a variant. */ + +typedef struct { + I32 len_min; + I32 len_delta; + I32 pos_min; + I32 pos_delta; + SV *last_found; + I32 last_end; /* min value, <0 unless valid. */ + I32 last_start_min; + I32 last_start_max; + SV **longest; /* Either &l_fixed, or &l_float. */ + SV *longest_fixed; + I32 offset_fixed; + SV *longest_float; + I32 offset_float_min; + I32 offset_float_max; + I32 flags; +} scan_data_t; + +#define SF_BEFORE_EOL 1 +#define SF_FIX_BEFORE_EOL 2 +#define SF_FL_BEFORE_EOL 4 +#define SF_IS_INF 8 +#define SF_HAS_PAR 16 +#define SF_IN_PAR 32 + +static void +scan_commit(data) + scan_data_t *data; +{ + if (SvCUR(data->last_found) > SvCUR(*data->longest)) { + sv_setsv(*data->longest, data->last_found); + if (*data->longest == data->longest_fixed) { + data->offset_fixed = data->last_start_min; + if (data->flags & SF_BEFORE_EOL) + data->flags |= SF_FIX_BEFORE_EOL; + else + data->flags &= ~SF_FIX_BEFORE_EOL; + } else { + data->offset_float_min = data->last_start_min; + data->offset_float_max = data->last_start_max; + if (data->flags & SF_BEFORE_EOL) + data->flags |= SF_FL_BEFORE_EOL; + else + data->flags &= ~SF_FL_BEFORE_EOL; + } + } + SvCUR_set(data->last_found, 0); + data->last_end = -1; + data->flags &= ~SF_BEFORE_EOL; +} + +#define SCF_DO_SUBSTR 1 + +/* Stops at toplevel WHILEM as well as at `last'. At end *scanp is set + to the position after last scanned or to NULL. */ + +static I32 +study_chunk(scanp, deltap, last, data, flags) + regnode **scanp; /* Start here (read-write). */ + I32 *deltap; /* Write maxlen-minlen here. */ + regnode *last; /* Stop before this one. */ + scan_data_t *data; + U32 flags; +{ + I32 min = 0, pars = 0; + regnode *scan = *scanp; + I32 delta = 0, is_inf = 0, is_par = OP(scan) == OPEN ? ARG(scan) : 0; + scan_data_t data_fake; + + while (scan && OP(scan) != END && scan < last) { + /* Peephole optimizer: */ + + if (regkind[(U8)OP(scan)] == EXACT) { + regnode *nxt = scan + (*OPERAND(scan) + 2 - 1)/sizeof(regnode) + 2; + regnode *n = scan; + U32 stringok = 1; +#ifdef DEBUGGING + regnode *last = scan; +#endif + + while ((n = regnext(n)) + && ( regkind[(U8)OP(n)] == NOTHING || + (stringok && (OP(n) == OP(scan)))) + && NEXT_OFF(n) + && NEXT_OFF(scan) + NEXT_OFF(n) < I16_MAX) { + NEXT_OFF(scan) += NEXT_OFF(n); + if (OP(n) == TAIL || n > nxt) + stringok = 0; + if (regkind[(U8)OP(n)] == NOTHING) { + nxt = n + NODE_STEP_REGNODE; + if (stringok) + last = n; + } else { + if (*OPERAND(scan) + *OPERAND(n) > U8_MAX) + break; + Move(OPERAND(n) + 1, + OPERAND(scan) + *OPERAND(scan) + 1, + *OPERAND(n) + 1, char); + *OPERAND(scan) += *OPERAND(n); + nxt = n + (*OPERAND(n) + 2 - 1)/sizeof(regnode) + 2; + if (stringok) + last = nxt - 1; + } +#ifdef DEBUGGING +#endif + } +#ifdef DEBUGGING + /* Allow dumping */ + n = scan + (*OPERAND(scan) + 2 - 1)/sizeof(regnode) + 2; + while (n <= last) { + if (regkind[(U8)OP(n)] != NOTHING) { + OP(n) = NOTHING; + NEXT_OFF(n) = 0; + } + n++; + } +#endif + + } else if (OP(scan) != CURLYX) { + regnode *n = scan; + + while ((n = regnext(n)) + && (regkind[(U8)OP(n)] == NOTHING) && NEXT_OFF(n) + && NEXT_OFF(scan) + NEXT_OFF(n) < I16_MAX) { + NEXT_OFF(scan) += NEXT_OFF(n); + } + } + if (OP(scan) == BRANCH) { + if (OP(regnext(scan)) == BRANCH) { + regnode *next; + I32 max1 = 0, min1 = I32_MAX; + + if (flags && SCF_DO_SUBSTR) + scan_commit(data); + while (OP(scan) == BRANCH) { + I32 deltanext, minnext; + + data_fake.flags = 0; + next = regnext(scan); + scan = NEXTOPER(scan); + minnext = study_chunk(&scan, &deltanext, next, &data_fake, 0); + if (min1 > minnext) min1 = minnext; + if (max1 < minnext + deltanext) max1 = minnext + deltanext; + if (deltanext == I32_MAX) + is_inf = 1; + scan = next; + if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR)) + pars++; + } + if (flags && SCF_DO_SUBSTR) { + data->pos_min += min1; + data->pos_delta += max1 - min1; + if (is_inf) + data->flags |= SF_IS_INF; + else + data->flags &= ~SF_IS_INF; + if (max1 != min1 || is_inf) + data->longest = &(data->longest_float); + } + min += min1; + delta += max1 - min1; + } + else /* single branch is optimized. */ + scan = NEXTOPER(scan); + continue; + } else if (OP(scan) == EXACT) { + min += *OPERAND(scan); + if (flags && SCF_DO_SUBSTR) { /* Update longest substr. */ + I32 l = *OPERAND(scan); + + /* The code below prefers earlier match for fixed + offset, later match for variable offset. */ + if (data->last_end == -1) { /* Update the start info. */ + data->last_start_min = data->pos_min; + data->last_start_max = data->flags & SF_IS_INF + ? I32_MAX : data->pos_min + data->pos_delta; + } + sv_catpvn(data->last_found, OPERAND(scan)+1, l); + data->last_end = data->pos_min + l; + data->pos_min += l; /* As in the first entry. */ + data->flags &= ~SF_BEFORE_EOL; + } + } else if (regkind[(U8)OP(scan)] == EXACT) { + if (flags && SCF_DO_SUBSTR) + scan_commit(data); + min += *OPERAND(scan); + if (data) + data->pos_min += *OPERAND(scan); + } else if (strchr(varies,OP(scan))) { + int mincount, maxcount, minnext, deltanext, pos_before, fl; + regnode *next, *oscan = scan; + + switch (regkind[(U8)OP(scan)]) { + case WHILEM: + scan = NEXTOPER(scan); + goto finish; + case PLUS: + if (flags && SCF_DO_SUBSTR) { + next = NEXTOPER(scan); + if (OP(next) == EXACT) { + mincount = 1; + maxcount = REG_INFTY; + next = regnext(scan); + scan = NEXTOPER(scan); + goto do_curly; + } + } + if (flags && SCF_DO_SUBSTR) + data->pos_min++; + min++; + /* Fall through. */ + case STAR: + is_inf = 1; + scan = regnext(scan); + if (flags && SCF_DO_SUBSTR) { + scan_commit(data); + data->flags |= SF_IS_INF; + data->longest = &(data->longest_float); + } + goto optimize_curly_tail; + case CURLY: + mincount = ARG1(scan); + maxcount = ARG2(scan); + next = regnext(scan); + scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS; + do_curly: + if (flags && SCF_DO_SUBSTR) { + if (mincount == 0) scan_commit(data); + pos_before = data->pos_min; + } + if (data) { + fl = data->flags; + data->flags &= ~(SF_HAS_PAR|SF_IN_PAR); + } + /* This will finish on WHILEM, setting scan, or on NULL: */ + minnext = study_chunk(&scan, &deltanext, next, data, + mincount == 0 + ? (flags & ~SCF_DO_SUBSTR) : flags); + if (!scan) /* It was not CURLYX, but CURLY. */ + scan = next; + min += minnext * mincount; + is_inf = (maxcount == REG_INFTY && (minnext + deltanext) > 0 + || deltanext == I32_MAX); + delta += (minnext + deltanext) * maxcount - minnext * mincount; + + /* Try powerful optimization CURLYX => CURLYN. */ +#ifdef REGALIGN_STRUCT + if ( OP(oscan) == CURLYX && data + && data->flags & SF_IN_PAR + && !deltanext && minnext == 1 ) { + /* Try to optimize to CURLY. */ + regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; + regnode *nxt1 = nxt, *nxt2; + + /* Skip open. */ + nxt = regnext(nxt); + if (!strchr(simple,OP(nxt)) + && !(regkind[(U8)OP(nxt)] == EXACT + && *OPERAND(nxt) == 1)) + goto nogo; + nxt2 = nxt; + nxt = regnext(nxt); + if (OP(nxt) != CLOSE) + goto nogo; + /* Now we know that nxt2 is the only contents: */ + oscan->flags = ARG(nxt); + OP(oscan) = CURLYN; + OP(nxt1) = NOTHING; /* was OPEN. */ +#ifdef DEBUGGING + OP(nxt1 + 1) = NOTHING; /* was count. */ + NEXT_OFF(nxt1+ 1) = 0; /* just for consistancy. */ + NEXT_OFF(nxt2) = 0; /* just for consistancy with CURLY. */ + OP(nxt) = NOTHING; /* was CLOSE. */ + OP(nxt + 1) = NOTHING; /* was count. */ + NEXT_OFF(nxt+ 1) = 0; /* just for consistancy. */ +#endif + } +#endif + nogo: + + /* Try optimization CURLYX => CURLYM. */ + if ( OP(oscan) == CURLYX && data +#ifdef REGALIGN_STRUCT + && !(data->flags & SF_HAS_PAR) +#else + && !(data->flags & (SF_HAS_PAR|SF_IN_PAR)) +#endif + && !deltanext ) { + /* XXXX How to optimize if data == 0? */ + /* Optimize to a simpler form. */ + regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS, *nxt2; /* OPEN */ + + OP(oscan) = CURLYM; + while ( (nxt2 = regnext(nxt)) /* skip over embedded stuff*/ + && (OP(nxt2) != WHILEM)) + nxt = nxt2; + OP(nxt2) = SUCCEED; /* Whas WHILEM */ +#ifdef REGALIGN_STRUCT + /* Need to optimize away parenths. */ + if (data->flags & SF_IN_PAR) { + /* Set the parenth number. */ + regnode *nxt1 = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN*/ + + if (OP(nxt) != CLOSE) + croak("panic opt close"); + oscan->flags = ARG(nxt); + OP(nxt1) = NOTHING; /* was OPEN. */ + OP(nxt) = NOTHING; /* was CLOSE. */ +#ifdef DEBUGGING + OP(nxt1 + 1) = NOTHING; /* was count. */ + OP(nxt + 1) = NOTHING; /* was count. */ + NEXT_OFF(nxt1 + 1) = 0; /* just for consistancy. */ + NEXT_OFF(nxt + 1) = 0; /* just for consistancy. */ +#endif + } else + oscan->flags = 0; +#endif + } + if (data && fl & (SF_HAS_PAR|SF_IN_PAR)) + pars++; + if (flags && SCF_DO_SUBSTR) { + SV *last_str = Nullsv; + int counted = mincount != 0; + + if (data->last_end > 0 && mincount != 0) { /* Ends with a string. */ + I32 b = pos_before >= data->last_start_min + ? pos_before : data->last_start_min; + I32 l = data->last_end - pos_before; + + /* Get the added string: */ + last_str = newSVpv(SvPV(data->last_found, na) + + b - data->last_start_min, l); + if (deltanext == 0 && pos_before == b) { + /* What was added is a constant string */ + if (mincount > 1) { + SvGROW(last_str, (mincount * l) + 1); + repeatcpy(SvPVX(last_str) + l, + SvPVX(last_str), l, mincount - 1); + SvCUR(last_str) *= mincount; + /* Add additional parts. */ + SvCUR_set(data->last_found, + SvCUR(data->last_found) - l); + sv_catsv(data->last_found, last_str); + data->last_end += l * (mincount - 1); + } + } + } + /* It is counted once already... */ + data->pos_min += minnext * (mincount - counted); + data->pos_delta += - counted * deltanext + + (minnext + deltanext) * maxcount - minnext * mincount; + /* Cannot switch off being infinite... */ + if (is_inf) + data->flags |= SF_IS_INF; + if (mincount != maxcount) { + scan_commit(data); + if (mincount && last_str) { + sv_setsv(data->last_found, last_str); + data->last_end = data->pos_min; + data->last_start_min = + data->pos_min - SvCUR(last_str); + data->last_start_max = is_inf + ? I32_MAX + : data->pos_min + data->pos_delta + - SvCUR(last_str); + } + data->longest = &(data->longest_float); + } + } + optimize_curly_tail: +#ifdef REGALIGN + if (OP(oscan) != CURLYX) { + while (regkind[(U8)OP(next = regnext(oscan))] == NOTHING + && NEXT_OFF(next)) + NEXT_OFF(oscan) += NEXT_OFF(next); + } +#endif + continue; + default: /* REF only? */ + if (flags && SCF_DO_SUBSTR) { + scan_commit(data); + data->longest = &(data->longest_float); + if (is_inf) + data->flags |= SF_IS_INF; + else + data->flags &= ~SF_IS_INF; + } + is_inf = 1; + break; + } + } else if (strchr(simple,OP(scan))) { + if (flags && SCF_DO_SUBSTR) { + scan_commit(data); + data->pos_min++; + } + min++; + } else if (regkind[(U8)OP(scan)] == EOL && flags & SCF_DO_SUBSTR) { + data->flags |= SF_BEFORE_EOL; + } else if (regkind[(U8)OP(scan)] == BRANCH + && (scan->flags || data) + && (OP(scan) == IFMATCH || OP(scan) == UNLESSM)) { + I32 deltanext, minnext; + regnode *nscan, *next; + + data_fake.flags = 0; + next = regnext(scan); + nscan = NEXTOPER(scan); + minnext = study_chunk(&nscan, &deltanext, next, &data_fake, 0); + if (scan->flags) { + if (deltanext) { + croak("variable length lookbehind not implemented"); + } else if (minnext > U8_MAX) { + croak("lookbehind longer than %d not implemented", U8_MAX); + } + scan->flags = minnext; + } + if (data && data_fake.flags & (SF_HAS_PAR|SF_IN_PAR)) + pars++; + } else if (OP(scan) == OPEN) { + pars++; + } else if (OP(scan) == CLOSE && ARG(scan) == is_par) { +#ifdef REGALIGN_STRUCT + regnode *next = regnext(scan); + + if ( next && is_par <= U8_MAX + && (next >= last + || (OP(next) == WHILEM && (next + 1) == last))) { + } else +#endif + is_par = 0; /* Disable optimization */ + } + /* Else: zero-length, ignore. */ + scan = regnext(scan); + } + + finish: + *scanp = scan; + *deltap = is_inf ? I32_MAX : delta; + if (is_par && pars==1 && data) { + data->flags |= SF_IN_PAR; + data->flags &= ~SF_HAS_PAR; + } else if (pars && data) { + data->flags |= SF_HAS_PAR; + data->flags &= ~SF_IN_PAR; + } + return min; +} + +static I32 +add_data(n, s) +I32 n; +char *s; +{ + if (rx->data) { + Renewc(rx->data, + sizeof(rx->data) + sizeof(void*) * (rx->data->count + n - 1), + char, struct reg_data); + Renew(rx->data->what, rx->data->count + n, U8); + rx->data->count += n; + } else { + Newc(1207, rx->data, sizeof(*rx->data) + sizeof(void*) * (n - 1), + char, struct reg_data); + New(1208, rx->data->what, n, U8); + rx->data->count = n; + } + Copy(s, rx->data->what + rx->data->count - n, n, U8); + return rx->data->count - n; +} + /* - pregcomp - compile a regular expression into internal code * @@ -134,21 +609,24 @@ char* xend; PMOP* pm; { register regexp *r; - register char *scan; - register SV *longish; - SV *longest; - register I32 len; - register char *first; + regnode *scan; + register regnode *scan1; + register SV *current_longest; + SV **longest; + SV *longest_fixed; + SV *longest_float; + regnode *first; I32 flags; I32 backish; - I32 backest; - I32 curback; + I32 stored_c_offset_min; + I32 c_offset_min; + I32 c_offset_delta; I32 minlen = 0; I32 sawplus = 0; I32 sawopen = 0; #define MAX_REPEAT_DEPTH 12 struct { - char *opcode; + regnode *opcode; I32 count; } repeat_stack[MAX_REPEAT_DEPTH]; I32 repeat_depth = 0; @@ -161,6 +639,9 @@ PMOP* pm; regflags = pm->op_pmflags; regsawback = 0; + seen_gpos = 0; + seen_zerolen = *exp == '^' ? -1 : 0; + /* First pass: determine size, legality. */ regparse = exp; regxend = xend; @@ -168,24 +649,27 @@ PMOP* pm; regnpar = 1; regsize = 0L; regcode = ®dummy; - regc((char)MAGIC); + regc((U8)MAGIC, (char*)regcode); if (reg(0, &flags) == NULL) { Safefree(regprecomp); regprecomp = Nullch; return(NULL); } + DEBUG_r(PerlIO_printf(Perl_debug_log, "size %d ", regsize)); /* Small enough for pointer-storage convention? */ if (regsize >= 32767L) /* Probably could be 65535L. */ FAIL("regexp too big"); /* Allocate space and initialize. */ - Newc(1001, r, sizeof(regexp) + (unsigned)regsize, char, regexp); + Newc(1001, r, sizeof(regexp) + (unsigned)regsize * sizeof(regnode), + char, regexp); if (r == NULL) FAIL("regexp out of space"); r->prelen = xend - exp; r->precomp = regprecomp; r->subbeg = r->subbase = NULL; + rx = r; /* Second pass: emit code. */ regparse = exp; @@ -193,23 +677,26 @@ PMOP* pm; regnaughty = 0; regnpar = 1; regcode = r->program; - regc((char)MAGIC); + regc((U8)MAGIC, (char*) regcode++); + r->data = 0; if (reg(0, &flags) == NULL) return(NULL); /* Dig out information for optimizations. */ pm->op_pmflags = regflags; - r->regstart = Nullsv; /* Worst-case defaults. */ r->reganch = 0; - r->regmust = Nullsv; - r->regback = -1; - r->regstclass = Nullch; + r->regstclass = NULL; r->naughty = regnaughty >= 10; /* Probably an expensive pattern. */ - scan = r->program+1; /* First BRANCH. */ - if (OP(regnext(scan)) == END) {/* Only one top-level choice. */ - scan = NEXTOPER(scan); + scan = r->program + 1; /* First BRANCH. */ + if (OP(scan) != BRANCH) { /* Only one top-level choice. */ + I32 prev_c_offset_min; + I32 prev_c_offset_delta; + I32 prev_end; + scan_data_t data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + I32 fake; first = scan; + /* Skip introductions and multiplicators >= 1. */ while ((OP(first) == OPEN && (sawopen = 1)) || (OP(first) == BRANCH && OP(regnext(first)) != BRANCH) || (OP(first) == PLUS) || @@ -224,12 +711,7 @@ PMOP* pm; /* Starting-point info. */ again: - if (OP(first) == EXACT) { - r->regstart = newSVpv(OPERAND(first)+1,*OPERAND(first)); - if (SvCUR(r->regstart) > !sawstudy) - fbm_compile(r->regstart); - (void)SvUPGRADE(r->regstart, SVt_PVBM); - } + if (OP(first) == EXACT); /* Empty, get anchored substr later. */ else if (strchr(simple+2,OP(first))) r->regstclass = first; else if (regkind[(U8)OP(first)] == BOUND || @@ -257,8 +739,9 @@ PMOP* pm; if (sawplus && (!sawopen || !regsawback)) r->reganch |= ROPT_SKIP; /* x+ must match 1st of run */ - DEBUG_r(PerlIO_printf(Perl_debug_log, "first %d next %d offset %d\n", - OP(first), OP(NEXTOPER(first)), first - scan)); + /* Scan is after the zeroth branch, first is atomic matcher. */ + DEBUG_r(PerlIO_printf(Perl_debug_log, "first at %d\n", + first - scan + 1)); /* * If there's something expensive in the r.e., find the * longest literal string that must appear and make it the @@ -267,155 +750,78 @@ PMOP* pm; * and avoiding duplication strengthens checking. Not a * strong reason, but sufficient in the absence of others. * [Now we resolve ties in favor of the earlier string if - * it happens that curback has been invalidated, since the + * it happens that c_offset_min has been invalidated, since the * earlier string may buy us something the later one won't.] */ - longish = newSVpv("",0); - longest = newSVpv("",0); - len = 0; minlen = 0; - curback = 0; - backish = 0; - backest = 0; - while (OP(scan) != END) { - if (OP(scan) == BRANCH) { - if (OP(regnext(scan)) == BRANCH) { - curback = -30000; - while (OP(scan) == BRANCH) - scan = regnext(scan); - } - else /* single branch is ok */ - scan = NEXTOPER(scan); - continue; - } - if (OP(scan) == UNLESSM) { - curback = -30000; - scan = regnext(scan); - continue; - } - if (OP(scan) == EXACT) { - char *t; - first = scan; - while ((t = regnext(scan)) && OP(t) == CLOSE) - scan = t; - minlen += *OPERAND(first) * repeat_count; - if (curback - backish == len) { - sv_catpvn(longish, OPERAND(first)+1, - *OPERAND(first)); - len += *OPERAND(first); - curback += *OPERAND(first); - first = regnext(scan); - } - else if (*OPERAND(first) >= len + (curback >= 0)) { - len = *OPERAND(first); - sv_setpvn(longish, OPERAND(first)+1,len); - backish = curback; - curback += len; - first = regnext(scan); - } - else - curback += *OPERAND(first); - } - else if (strchr(varies,OP(scan))) { - int tcount; - char *next; - - if (repeat_depth < MAX_REPEAT_DEPTH - && ((OP(scan) == PLUS - && (tcount = 1) - && (next = NEXTOPER(scan))) - || (regkind[(U8)OP(scan)] == CURLY - && (tcount = ARG1(scan)) - && (next = NEXTOPER(scan)+4)))) - { - /* We treat (abc)+ as (abc)(abc)*. */ - - /* Mark the place to return back. */ - repeat_stack[repeat_depth].opcode = regnext(scan); - repeat_stack[repeat_depth].count = repeat_count; - repeat_depth++; - repeat_count *= tcount; - - /* Go deeper: */ - scan = next; - continue; - } - else { - curback = -30000; - len = 0; - if (SvCUR(longish) > SvCUR(longest)) { - sv_setsv(longest,longish); - backest = backish; - } - sv_setpvn(longish,"",0); - } - } - else if (strchr(simple,OP(scan))) { - curback++; - minlen += repeat_count; - len = 0; - if (SvCUR(longish) > SvCUR(longest)) { - sv_setsv(longest,longish); - backest = backish; - } - sv_setpvn(longish,"",0); - } - scan = regnext(scan); - if (!scan) { /* Go up PLUS or CURLY. */ - if (!repeat_depth--) - croak("panic: re scan"); - scan = repeat_stack[repeat_depth].opcode; - repeat_count = repeat_stack[repeat_depth].count; - /* Need to submit the longest string found: */ - curback = -30000; - len = 0; - if (SvCUR(longish) > SvCUR(longest)) { - sv_setsv(longest,longish); - backest = backish; - } - sv_setpvn(longish,"",0); - } - } - - /* Prefer earlier on tie, unless we can tail match latter */ - - if (SvCUR(longish) + (first && regkind[(U8)OP(first)] == EOL) - > SvCUR(longest)) - { - sv_setsv(longest,longish); - backest = backish; - } - else - sv_setpvn(longish,"",0); - if (SvCUR(longest) - && (!r->regstart - || !fbm_instr((unsigned char*) SvPVX(r->regstart), - (unsigned char *) (SvPVX(r->regstart) - + SvCUR(r->regstart)), - longest))) - { - r->regmust = longest; - if (backest < 0) - backest = -1; - r->regback = backest; - if (SvCUR(longest) > !(sawstudy || - (first && regkind[(U8)OP(first)] == EOL))) - fbm_compile(r->regmust); - (void)SvUPGRADE(r->regmust, SVt_PVBM); - BmUSEFUL(r->regmust) = 100; - if (first && regkind[(U8)OP(first)] == EOL && SvCUR(longish)) - SvTAIL_on(r->regmust); - } - else { - SvREFCNT_dec(longest); - longest = Nullsv; - } - SvREFCNT_dec(longish); + data.longest_fixed = newSVpv("",0); + data.longest_float = newSVpv("",0); + data.last_found = newSVpv("",0); + data.longest = &(data.longest_fixed); + first = scan; + + minlen = study_chunk(&first, &fake, scan + regsize, /* To end */ + &data, SCF_DO_SUBSTR); + if ( regnpar == 1 && data.longest == &(data.longest_fixed) + && data.last_start_min == 0 && data.last_end > 0 + && !seen_zerolen + && (!seen_gpos || (r->reganch & ROPT_ANCH_GPOS))) + r->reganch |= ROPT_CHECK_ALL; + scan_commit(&data); + SvREFCNT_dec(data.last_found); + + if (SvCUR(data.longest_float)) { + if (SvCUR(data.longest_fixed) + && data.offset_fixed == data.offset_float_min) + goto remove; /* Like in (a)+. */ + + r->float_substr = data.longest_float; + r->float_min_offset = data.offset_float_min; + r->float_max_offset = data.offset_float_max; + fbm_compile(r->float_substr); + BmUSEFUL(r->float_substr) = 100; + if (data.flags & SF_FL_BEFORE_EOL) SvTAIL_on(r->float_substr); + } else { + remove: + r->float_substr = Nullsv; + SvREFCNT_dec(data.longest_float); + } + + if (SvCUR(data.longest_fixed)) { + r->anchored_substr = data.longest_fixed; + r->anchored_offset = data.offset_fixed; + fbm_compile(r->anchored_substr); + BmUSEFUL(r->anchored_substr) = 100; + if (data.flags & SF_FIX_BEFORE_EOL) SvTAIL_on(r->anchored_substr); + } else { + r->anchored_substr = Nullsv; + SvREFCNT_dec(data.longest_fixed); + } + + /* A temporary algorithm prefers floated substr to fixed one to dig more info. */ + if (SvCUR(data.longest_fixed) > SvCUR(data.longest_float)) { + r->check_substr = r->anchored_substr; + r->check_offset_min = r->check_offset_max = r->anchored_offset; + if (r->reganch & ROPT_ANCH) r->reganch |= ROPT_CHECK_AT_START; + } else { + r->check_substr = r->float_substr; + r->check_offset_min = data.offset_float_min; + r->check_offset_max = data.offset_float_max; + } + } else { + /* Several toplevels. Best we can is to set minlen. */ + I32 fake; + + DEBUG_r(PerlIO_printf(Perl_debug_log, "\n")); + scan = r->program + 1; + minlen = study_chunk(&scan, &fake, scan + regsize, NULL, 0); + r->check_substr = r->anchored_substr = r->float_substr = Nullsv; } r->nparens = regnpar - 1; r->minlen = minlen; + if (seen_gpos) r->reganch |= ROPT_GPOS_SEEN; Newz(1002, r->startp, regnpar, char*); Newz(1002, r->endp, regnpar, char*); DEBUG_r(regdump(r)); @@ -431,16 +837,16 @@ PMOP* pm; * is a trifle forced, but the need to tie the tails of the branches to what * follows makes it hard to avoid. */ -static char * +static regnode * reg(paren, flagp) I32 paren; /* Parenthesized? */ I32 *flagp; { - register char *ret; - register char *br; - register char *ender = 0; + register regnode *ret; + register regnode *br; + register regnode *ender = 0; register I32 parno = 0; - I32 flags; + I32 flags, oregflags = regflags, have_branch = 0; *flagp = HASWIDTH; /* Tentatively. */ @@ -451,9 +857,19 @@ I32 *flagp; paren = *regparse++; ret = NULL; switch (paren) { - case ':': + case '<': +#ifndef REGALIGN_STRUCT + croak("lookbehind non-implemented without REGALIGN_STRUCT"); +#endif + if (*regparse == '!') + paren = ','; + if (*regparse != '=' && *regparse != '!') + goto unknown; + regparse++; case '=': case '!': + seen_zerolen++; + case ':': break; case '$': case '@': @@ -467,10 +883,72 @@ I32 *flagp; nextchar(); *flagp = TRYAGAIN; return NULL; + case '{': + { + I32 count = 1, n; + char c; + char *s = regparse; + SV *sv; + OP *sop, *rop; + char tmpbuf[TYPE_DIGITS(long) + 12 + 3]; + char *safestr; + + seen_zerolen++; + while (count && (c = *regparse)) { + if (c == '\\' && regparse[1]) + regparse++; + else if (c == '{') + count++; + else if (c == '}') + count--; + regparse++; + } + if (*regparse != ')') + croak("Sequence (?{...}) not terminated or not {}-balanced"); + if (!SIZE_ONLY) { + if (regparse - 1 - s) + sv = newSVpv(s, regparse - 1 - s); + else + sv = newSVpv("", 0); + ENTER; + lex_start(sv); + SAVETMPS; + /* switch to eval mode */ + + SAVESPTR(compiling.cop_filegv); + sprintf(tmpbuf, "_<(re_eval %lu)", (unsigned long)++evalseq); + compiling.cop_filegv = gv_fetchfile(tmpbuf+2); + compiling.cop_line = 1; + /* XXX For Cs within BEGIN {} blocks, this ends up + deleting the eval's FILEGV from the stash before gv_check() runs + (i.e. before run-time proper). To work around the coredump that + ensues, we always turn GvMULTI_on for any globals that were + introduced within evals. See force_ident(). GSAR 96-10-12 */ + safestr = savepv(tmpbuf); + SAVEDELETE(defstash, safestr, strlen(safestr)); + SAVEI32(hints); + hints = 0; + + rop = doeval(G_VOID, &sop); + n = add_data(4, "nnso"); + rx->data->data[n] = (void*)rop; + rx->data->data[n+1] = (void*)curpad; + rx->data->data[n+2] = (void*)compcv; + rx->data->data[n+3] = (void*)sop; + sop->op_type = OP_NULL; + sop->op_ppaddr = ppaddr[OP_NULL]; + lex_end(); + LEAVE; + } + + nextchar(); + return reganode(EVAL, n); + } default: --regparse; while (*regparse && strchr("iogcmsx", *regparse)) pmflag(®flags, *regparse++); + unknown: if (*regparse != ')') croak("Sequence (?%c...) not recognized", *regparse); nextchar(); @@ -487,9 +965,15 @@ I32 *flagp; ret = NULL; /* Pick up the branches, linking them together. */ - br = regbranch(&flags); + br = regbranch(&flags, 0); if (br == NULL) return(NULL); + if (*regparse == '|') { + reginsert(BRANCH, br); + have_branch = 1; + } else if (paren == ':') { + *flagp |= flags&SIMPLE; + } if (ret != NULL) regtail(ret, br); /* OPEN -> first. */ else @@ -499,7 +983,7 @@ I32 *flagp; *flagp |= flags&SPSTART; while (*regparse == '|') { nextchar(); - br = regbranch(&flags); + br = regbranch(&flags, 1); if (br == NULL) return(NULL); regtail(ret, br); /* BRANCH -> BRANCH. */ @@ -508,37 +992,57 @@ I32 *flagp; *flagp |= flags&SPSTART; } - /* Make a closing node, and hook it on the end. */ - switch (paren) { - case ':': - ender = regnode(NOTHING); - break; - case 1: - ender = reganode(CLOSE, parno); - break; - case '=': - case '!': - ender = regnode(SUCCEED); - *flagp &= ~HASWIDTH; - break; - case 0: - ender = regnode(END); - break; - } - regtail(ret, ender); + if (have_branch || paren != ':') { + /* Make a closing node, and hook it on the end. */ + switch (paren) { + case ':': + ender = reg_node(TAIL); + break; + case 1: + ender = reganode(CLOSE, parno); + break; + case '<': + case ',': + case '=': + case '!': + ender = reg_node(SUCCEED); + *flagp &= ~HASWIDTH; + break; + case 0: + ender = reg_node(END); + break; + } + regtail(ret, ender); - /* Hook the tails of the branches to the closing node. */ - for (br = ret; br != NULL; br = regnext(br)) - regoptail(br, ender); + if (have_branch) { + /* Hook the tails of the branches to the closing node. */ + for (br = ret; br != NULL; br = regnext(br)) + regoptail(br, ender); + } + } if (paren == '=') { reginsert(IFMATCH,ret); - regtail(ret, regnode(NOTHING)); - } - else if (paren == '!') { +#ifdef REGALIGN_STRUCT + ret->flags = 0; +#endif + regtail(ret, reg_node(TAIL)); + } else if (paren == '!') { reginsert(UNLESSM,ret); - regtail(ret, regnode(NOTHING)); +#ifdef REGALIGN_STRUCT + ret->flags = 0; +#endif + regtail(ret, reg_node(TAIL)); + } else if (paren == ',') { + reginsert(UNLESSM,ret); + ret->flags = 1; + regtail(ret, reg_node(TAIL)); + } else if (paren == '<') { + reginsert(IFMATCH,ret); + ret->flags = 1; + regtail(ret, reg_node(TAIL)); } + /* Check for proper termination. */ if (paren && (regparse >= regxend || *nextchar() != ')')) { @@ -550,6 +1054,9 @@ I32 *flagp; FAIL("junk on end of regexp"); /* "Can't happen". */ /* NOTREACHED */ } + if (paren != 0) { + regflags = oregflags; + } return(ret); } @@ -559,19 +1066,18 @@ I32 *flagp; * * Implements the concatenation operator. */ -static char * -regbranch(flagp) +static regnode * +regbranch(flagp, second) I32 *flagp; +I32 second; { - register char *ret; - register char *chain; - register char *latest; - I32 flags = 0; + register regnode *ret = second ? reg_node(BRANCH) : NULL; + register regnode *chain = NULL; + register regnode *latest; + I32 flags = 0, c = 0; *flagp = WORST; /* Tentatively. */ - ret = regnode(BRANCH); - chain = NULL; regparse--; nextchar(); while (regparse < regxend && *regparse != '|' && *regparse != ')') { @@ -581,18 +1087,26 @@ I32 *flagp; if (flags & TRYAGAIN) continue; return(NULL); - } + } else if (ret == NULL) + ret = latest; *flagp |= flags&HASWIDTH; - if (chain == NULL) /* First piece. */ + if (chain == NULL) /* First piece. */ *flagp |= flags&SPSTART; else { regnaughty++; regtail(chain, latest); } chain = latest; + c++; + } + if (chain == NULL) { /* Loop ran zero times. */ + chain = reg_node(NOTHING); + if (ret == NULL) + ret = chain; + } + if (c == 1) { + *flagp |= flags&SIMPLE; } - if (chain == NULL) /* Loop ran zero times. */ - (void) regnode(NOTHING); return(ret); } @@ -606,18 +1120,18 @@ I32 *flagp; * It might seem that this node could be dispensed with entirely, but the * endmarker role is not redundant. */ -static char * +static regnode * regpiece(flagp) I32 *flagp; { - register char *ret; + register regnode *ret; register char op; register char *next; I32 flags; char *origparse = regparse; char *maxpos; I32 min; - I32 max = 32767; + I32 max = REG_INFTY; ret = regatom(&flags); if (ret == NULL) { @@ -659,7 +1173,9 @@ I32 *flagp; maxpos = regparse; max = atoi(maxpos); if (!max && *maxpos != '0') - max = 32767; /* meaning "infinity" */ + max = REG_INFTY; /* meaning "infinity" */ + else if (max >= REG_INFTY) + croak("Quantifier in {,} bigger than %d", REG_INFTY - 1); regparse = next; nextchar(); @@ -670,23 +1186,21 @@ I32 *flagp; } else { regnaughty += 4 + regnaughty; /* compound interest */ - regtail(ret, regnode(WHILEM)); + regtail(ret, reg_node(WHILEM)); reginsert(CURLYX,ret); - regtail(ret, regnode(NOTHING)); + regtail(ret, reg_node(NOTHING)); } +#ifdef REGALIGN_STRUCT + ret->flags = 0; +#endif if (min > 0) *flagp = (WORST|HASWIDTH); if (max && max < min) croak("Can't do {n,m} with n > m"); - if (regcode != ®dummy) { -#ifdef REGALIGN - *(unsigned short *)(ret+3) = min; - *(unsigned short *)(ret+5) = max; -#else - ret[3] = min >> 8; ret[4] = min & 0377; - ret[5] = max >> 8; ret[6] = max & 0377; -#endif + if (!SIZE_ONLY) { + ARG1_SET(ret, min); + ARG2_SET(ret, max); } goto nest_check; @@ -707,6 +1221,9 @@ I32 *flagp; if (op == '*' && (flags&SIMPLE)) { reginsert(STAR, ret); +#ifdef REGALIGN_STRUCT + ret->flags = 0; +#endif regnaughty += 4; } else if (op == '*') { @@ -714,6 +1231,9 @@ I32 *flagp; goto do_curly; } else if (op == '+' && (flags&SIMPLE)) { reginsert(PLUS, ret); +#ifdef REGALIGN_STRUCT + ret->flags = 0; +#endif regnaughty += 3; } else if (op == '+') { @@ -724,7 +1244,7 @@ I32 *flagp; goto do_curly; } nest_check: - if (dowarn && regcode != ®dummy && !(flags&HASWIDTH) && max > 10000) { + if (dowarn && !SIZE_ONLY && !(flags&HASWIDTH) && max > 10000) { warn("%.*s matches null string many times", regparse - origparse, origparse); } @@ -733,7 +1253,7 @@ I32 *flagp; nextchar(); reginsert(MINMOD, ret); #ifdef REGALIGN - regtail(ret, ret + 4); + regtail(ret, ret + NODE_STEP_REGNODE); #else regtail(ret, ret + 3); #endif @@ -754,11 +1274,11 @@ I32 *flagp; * * [Yes, it is worth fixing, some scripts can run twice the speed.] */ -static char * +static regnode * regatom(flagp) I32 *flagp; { - register char *ret = 0; + register regnode *ret = 0; I32 flags; *flagp = WORST; /* Tentatively. */ @@ -766,29 +1286,32 @@ I32 *flagp; tryagain: switch (*regparse) { case '^': + seen_zerolen++; nextchar(); if (regflags & PMf_MULTILINE) - ret = regnode(MBOL); + ret = reg_node(MBOL); else if (regflags & PMf_SINGLELINE) - ret = regnode(SBOL); + ret = reg_node(SBOL); else - ret = regnode(BOL); + ret = reg_node(BOL); break; case '$': + if (regparse[1]) + seen_zerolen++; nextchar(); if (regflags & PMf_MULTILINE) - ret = regnode(MEOL); + ret = reg_node(MEOL); else if (regflags & PMf_SINGLELINE) - ret = regnode(SEOL); + ret = reg_node(SEOL); else - ret = regnode(EOL); + ret = reg_node(EOL); break; case '.': nextchar(); if (regflags & PMf_SINGLELINE) - ret = regnode(SANY); + ret = reg_node(SANY); else - ret = regnode(ANY); + ret = reg_node(ANY); regnaughty++; *flagp |= HASWIDTH|SIMPLE; break; @@ -805,7 +1328,7 @@ tryagain: goto tryagain; return(NULL); } - *flagp |= flags&(HASWIDTH|SPSTART); + *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE); break; case '|': case ')': @@ -830,57 +1353,61 @@ tryagain: case '\\': switch (*++regparse) { case 'A': - ret = regnode(SBOL); + seen_zerolen++; + ret = reg_node(SBOL); *flagp |= SIMPLE; nextchar(); break; case 'G': - ret = regnode(GPOS); + ret = reg_node(GPOS); + seen_gpos = 1; *flagp |= SIMPLE; nextchar(); break; case 'Z': - ret = regnode(SEOL); + ret = reg_node(SEOL); *flagp |= SIMPLE; nextchar(); break; case 'w': - ret = regnode((regflags & PMf_LOCALE) ? ALNUML : ALNUM); + ret = reg_node((regflags & PMf_LOCALE) ? ALNUML : ALNUM); *flagp |= HASWIDTH|SIMPLE; nextchar(); break; case 'W': - ret = regnode((regflags & PMf_LOCALE) ? NALNUML : NALNUM); + ret = reg_node((regflags & PMf_LOCALE) ? NALNUML : NALNUM); *flagp |= HASWIDTH|SIMPLE; nextchar(); break; case 'b': - ret = regnode((regflags & PMf_LOCALE) ? BOUNDL : BOUND); + seen_zerolen++; + ret = reg_node((regflags & PMf_LOCALE) ? BOUNDL : BOUND); *flagp |= SIMPLE; nextchar(); break; case 'B': - ret = regnode((regflags & PMf_LOCALE) ? NBOUNDL : NBOUND); + seen_zerolen++; + ret = reg_node((regflags & PMf_LOCALE) ? NBOUNDL : NBOUND); *flagp |= SIMPLE; nextchar(); break; case 's': - ret = regnode((regflags & PMf_LOCALE) ? SPACEL : SPACE); + ret = reg_node((regflags & PMf_LOCALE) ? SPACEL : SPACE); *flagp |= HASWIDTH|SIMPLE; nextchar(); break; case 'S': - ret = regnode((regflags & PMf_LOCALE) ? NSPACEL : NSPACE); + ret = reg_node((regflags & PMf_LOCALE) ? NSPACEL : NSPACE); *flagp |= HASWIDTH|SIMPLE; nextchar(); break; case 'd': - ret = regnode(DIGIT); + ret = reg_node(DIGIT); *flagp |= HASWIDTH|SIMPLE; nextchar(); break; case 'D': - ret = regnode(NDIGIT); + ret = reg_node(NDIGIT); *flagp |= HASWIDTH|SIMPLE; nextchar(); break; @@ -933,18 +1460,19 @@ tryagain: default: { register I32 len; - register char ender; + register U8 ender; register char *p; - char *oldp; + char *oldp, *s; I32 numlen; regparse++; defchar: - ret = regnode((regflags & PMf_FOLD) + ret = reg_node((regflags & PMf_FOLD) ? ((regflags & PMf_LOCALE) ? EXACTFL : EXACTF) : EXACT); - regc(0); /* save spot for len */ + s = OPERAND(ret); + regc(0, s++); /* save spot for len */ for (len = 0, p = regparse - 1; len < 127 && p < regxend; len++) @@ -1042,11 +1570,11 @@ tryagain: p = oldp; else { len++; - regc(ender); + regc(ender, s++); } break; } - regc(ender); + regc(ender, s++); } loopdone: regparse = p - 1; @@ -1057,9 +1585,16 @@ tryagain: *flagp |= HASWIDTH; if (len == 1) *flagp |= SIMPLE; - if (regcode != ®dummy) + if (!SIZE_ONLY) *OPERAND(ret) = len; - regc('\0'); + regc('\0', s++); + if (SIZE_ONLY) { +#ifdef REGALIGN_STRUCT + regsize += (len + 2 + sizeof(regnode) - 1) / sizeof(regnode); +#endif + } else { + regcode += (len + 2 + sizeof(regnode) - 1) / sizeof(regnode); + } } break; } @@ -1091,38 +1626,41 @@ regset(opnd, c) char *opnd; register I32 c; { - if (opnd == ®dummy) + if (SIZE_ONLY) return; c &= 0xFF; opnd[1 + (c >> 3)] |= (1 << (c & 7)); } -static char * +static regnode * regclass() { - register char *opnd; + register char *opnd, *s; register I32 class; register I32 lastclass = 1234; register I32 range = 0; - register char *ret; + register regnode *ret; register I32 def; I32 numlen; - ret = regnode(ANYOF); - opnd = regcode; + s = opnd = OPERAND(regcode); + ret = reg_node(ANYOF); for (class = 0; class < 33; class++) - regc(0); + regc(0, s++); if (*regparse == '^') { /* Complement of range. */ regnaughty++; regparse++; - if (opnd != ®dummy) + if (!SIZE_ONLY) *opnd |= ANYOF_INVERT; } - if (opnd != ®dummy) { + if (!SIZE_ONLY) { + regcode += ANY_SKIP; if (regflags & PMf_FOLD) *opnd |= ANYOF_FOLD; if (regflags & PMf_LOCALE) *opnd |= ANYOF_LOCALE; + } else { + regsize += ANY_SKIP; } if (*regparse == ']' || *regparse == '-') goto skipcond; /* allow 1st char to be ] or - */ @@ -1134,7 +1672,7 @@ regclass() switch (class) { case 'w': if (regflags & PMf_LOCALE) { - if (opnd != ®dummy) + if (SIZE_ONLY) *opnd |= ANYOF_ALNUML; } else { @@ -1146,7 +1684,7 @@ regclass() continue; case 'W': if (regflags & PMf_LOCALE) { - if (opnd != ®dummy) + if (!SIZE_ONLY) *opnd |= ANYOF_NALNUML; } else { @@ -1158,7 +1696,7 @@ regclass() continue; case 's': if (regflags & PMf_LOCALE) { - if (opnd != ®dummy) + if (!SIZE_ONLY) *opnd |= ANYOF_SPACEL; } else { @@ -1170,7 +1708,7 @@ regclass() continue; case 'S': if (regflags & PMf_LOCALE) { - if (opnd != ®dummy) + if (!SIZE_ONLY) *opnd |= ANYOF_NSPACEL; } else { @@ -1282,40 +1820,33 @@ nextchar() } /* -- regnode - emit a node +- reg_node - emit a node */ +static regnode * /* Location. */ #ifdef CAN_PROTOTYPE -static char * /* Location. */ -regnode(char op) +reg_node(U8 op) #else -static char * /* Location. */ -regnode(op) -char op; +reg_node(op) +U8 op; #endif { - register char *ret; - register char *ptr; + register regnode *ret; + register regnode *ptr; ret = regcode; - if (ret == ®dummy) { -#ifdef REGALIGN - if (!(regsize & 1)) - regsize++; -#endif + if (SIZE_ONLY) { + SIZE_ALIGN(regsize); +#ifdef REGALIGN_STRUCT + regsize += 1; +#else regsize += 3; +#endif return(ret); } -#ifdef REGALIGN -#ifndef lint - if (!((long)ret & 1)) - *ret++ = 127; -#endif -#endif + NODE_ALIGN_FILL(ret); ptr = ret; - *ptr++ = op; - *ptr++ = '\0'; /* Null "next" pointer. */ - *ptr++ = '\0'; + FILL_ADVANCE_NODE(ptr, op); regcode = ptr; return(ret); @@ -1324,45 +1855,32 @@ char op; /* - reganode - emit a node with an argument */ +static regnode * /* Location. */ #ifdef CAN_PROTOTYPE -static char * /* Location. */ -reganode(char op, unsigned short arg) +reganode(U8 op, U32 arg) #else -static char * /* Location. */ reganode(op, arg) -char op; -unsigned short arg; +U8 op; +U32 arg; #endif { - register char *ret; - register char *ptr; + register regnode *ret; + register regnode *ptr; ret = regcode; - if (ret == ®dummy) { + if (SIZE_ONLY) { + SIZE_ALIGN(regsize); #ifdef REGALIGN - if (!(regsize & 1)) - regsize++; -#endif + regsize += 2; +#else regsize += 5; +#endif return(ret); } -#ifdef REGALIGN -#ifndef lint - if (!((long)ret & 1)) - *ret++ = 127; -#endif -#endif + NODE_ALIGN_FILL(ret); ptr = ret; - *ptr++ = op; - *ptr++ = '\0'; /* Null "next" pointer. */ - *ptr++ = '\0'; -#ifdef REGALIGN - *(unsigned short *)(ret+3) = arg; -#else - ret[3] = arg >> 8; ret[4] = arg & 0377; -#endif - ptr += 2; + FILL_ADVANCE_NODE_ARG(ptr, op, arg); regcode = ptr; return(ret); @@ -1373,17 +1891,16 @@ unsigned short arg; */ #ifdef CAN_PROTOTYPE static void -regc(char b) +regc(U8 b, char* s) #else static void -regc(b) -char b; +regc(b, s) +U8 b; +char *s #endif { - if (regcode != ®dummy) - *regcode++ = b; - else - regsize++; + if (!SIZE_ONLY) + *s = b; } /* @@ -1393,46 +1910,38 @@ char b; */ #ifdef CAN_PROTOTYPE static void -reginsert(char op, char *opnd) +reginsert(U8 op, regnode *opnd) #else static void reginsert(op, opnd) -char op; -char *opnd; +U8 op; +regnode *opnd; #endif { - register char *src; - register char *dst; - register char *place; - register int offset = (regkind[(U8)op] == CURLY ? 4 : 0); + register regnode *src; + register regnode *dst; + register regnode *place; + char *s; + register int offset = (regkind[(U8)op] == CURLY ? EXTRA_STEP_2ARGS : 0); - if (regcode == ®dummy) { -#ifdef REGALIGN - regsize += 4 + offset; -#else - regsize += 3 + offset; -#endif + if (SIZE_ONLY) { + regsize += NODE_STEP_REGNODE + offset; return; } src = regcode; -#ifdef REGALIGN - regcode += 4 + offset; -#else - regcode += 3 + offset; -#endif + regcode += NODE_STEP_REGNODE + offset; dst = regcode; while (src > opnd) - *--dst = *--src; + StructCopy(--src, --dst, regnode); place = opnd; /* Op node, where operand used to be. */ - *place++ = op; - *place++ = '\0'; - *place++ = '\0'; + s = OPERAND(place); + FILL_ADVANCE_NODE(place, op); while (offset-- > 0) - *place++ = '\0'; -#ifdef REGALIGN - *place++ = '\177'; + *s++ = '\0'; +#if defined(REGALIGN) && !defined(REGALIGN_STRUCT) + *s++ = '\177'; #endif } @@ -1441,14 +1950,14 @@ char *opnd; */ static void regtail(p, val) -char *p; -char *val; +regnode *p; +regnode *val; { - register char *scan; - register char *temp; + register regnode *scan; + register regnode *temp; register I32 offset; - if (p == ®dummy) + if (SIZE_ONLY) return; /* Find last node. */ @@ -1461,12 +1970,14 @@ char *val; } #ifdef REGALIGN +# ifdef REGALIGN_STRUCT + NEXT_OFF(scan) = val - scan; +# else offset = val - scan; -#ifndef lint +# ifndef lint *(short*)(scan+1) = offset; -#else - offset = offset; -#endif +# endif +#endif #else if (OP(scan) == BACK) offset = scan - val; @@ -1482,11 +1993,11 @@ char *val; */ static void regoptail(p, val) -char *p; -char *val; +regnode *p; +regnode *val; { /* "Operandless" and "op != BRANCH" are synonymous in practice. */ - if (p == NULL || p == ®dummy || regkind[(U8)OP(p)] != BRANCH) + if (p == NULL || SIZE_ONLY || regkind[(U8)OP(p)] != BRANCH) return; regtail(NEXTOPER(p), val); } @@ -1515,6 +2026,59 @@ register char *s; #ifdef DEBUGGING +static regnode * +dumpuntil(start, node, last, sv, l) +regnode *start; +regnode *node; +regnode *last; +SV* sv; +I32 l; +{ + register char op = EXACT; /* Arbitrary non-END op. */ + register regnode *next, *onode; + + while (op != END && (!last || node < last)) { + /* While that wasn't END last time... */ + + NODE_ALIGN(node); + op = OP(node); + if (op == CLOSE) + l--; + /* Where, what. */ + regprop(sv, node); + PerlIO_printf(Perl_debug_log, "%2d%*s%s", node - start, + 2*l, "", SvPVX(sv)); + next = regnext(node); + if (next == NULL) /* Next ptr. */ + PerlIO_printf(Perl_debug_log, "(0)"); + else + PerlIO_printf(Perl_debug_log, "(%d)", next - start); + (void)PerlIO_putc(Perl_debug_log, '\n'); + if (regkind[(U8)op] == BRANCH) { + node = dumpuntil(start, NEXTOPER(node), next, sv, l + 1); + } else if (regkind[(U8)op] == CURLY && op != CURLYX) { + node = dumpuntil(start, NEXTOPER(node) + EXTRA_STEP_2ARGS, next, sv, l + 1); + } else if ( op == PLUS || op == STAR ) { + node = dumpuntil(start, NEXTOPER(node), next, sv, l + 1); + } else if (op == ANYOF) { + node = NEXTOPER(node); + node += ANY_SKIP; + } else if (regkind[(U8)op] == EXACT) { + /* Literal string, where present. */ + node += ((*OPERAND(node)) + 2 + sizeof(regnode) - 1) / sizeof(regnode); + node = NEXTOPER(node); + } else { + node = NEXTOPER(node); + node += regarglen[(U8)op]; + } + if (op == CURLYX || op == OPEN) + l++; + else if (op == WHILEM) + l--; + } + return node; +} + /* - regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form */ @@ -1522,49 +2086,32 @@ void regdump(r) regexp *r; { - register char *s; - register char op = EXACT; /* Arbitrary non-END op. */ - register char *next; SV *sv = sv_newmortal(); - s = r->program + 1; - while (op != END) { /* While that wasn't END last time... */ -#ifdef REGALIGN - if (!((long)s & 1)) - s++; -#endif - op = OP(s); - /* where, what */ - regprop(sv, s); - PerlIO_printf(Perl_debug_log, "%2d%s", s - r->program, SvPVX(sv)); - next = regnext(s); - s += regarglen[(U8)op]; - if (next == NULL) /* Next ptr. */ - PerlIO_printf(Perl_debug_log, "(0)"); - else - PerlIO_printf(Perl_debug_log, "(%d)", (s-r->program)+(next-s)); - s += 3; - if (op == ANYOF) { - s += 33; - } - if (regkind[(U8)op] == EXACT) { - /* Literal string, where present. */ - s++; - (void)PerlIO_putc(Perl_debug_log, ' '); - (void)PerlIO_putc(Perl_debug_log, '<'); - while (*s != '\0') { - (void)PerlIO_putc(Perl_debug_log,*s); - s++; - } - (void)PerlIO_putc(Perl_debug_log, '>'); - s++; - } - (void)PerlIO_putc(Perl_debug_log, '\n'); - } + (void)dumpuntil(r->program, r->program + 1, NULL, sv, 0); /* Header fields of interest. */ - if (r->regstart) - PerlIO_printf(Perl_debug_log, "start `%s' ", SvPVX(r->regstart)); + if (r->anchored_substr) + PerlIO_printf(Perl_debug_log, "anchored `%s'%s at %d ", + SvPVX(r->anchored_substr), + SvTAIL(r->anchored_substr) ? "$" : "", + r->anchored_offset); + if (r->float_substr) + PerlIO_printf(Perl_debug_log, "floating `%s'%s at %d..%u ", + SvPVX(r->float_substr), + SvTAIL(r->float_substr) ? "$" : "", + r->float_min_offset, r->float_max_offset); + if (r->check_substr) + PerlIO_printf(Perl_debug_log, + r->check_substr == r->float_substr + ? "(checking floating" : "(checking anchored"); + if (r->reganch & ROPT_CHECK_AT_START) + PerlIO_printf(Perl_debug_log, " noscan"); + if (r->reganch & ROPT_CHECK_ALL) + PerlIO_printf(Perl_debug_log, " isall"); + if (r->check_substr) + PerlIO_printf(Perl_debug_log, ") "); + if (r->regstclass) { regprop(sv, r->regstclass); PerlIO_printf(Perl_debug_log, "stclass `%s' ", SvPVX(sv)); @@ -1577,13 +2124,12 @@ regexp *r; PerlIO_printf(Perl_debug_log, "(GPOS)"); PerlIO_putc(Perl_debug_log, ' '); } + if (r->reganch & ROPT_GPOS_SEEN) + PerlIO_printf(Perl_debug_log, "GPOS "); if (r->reganch & ROPT_SKIP) PerlIO_printf(Perl_debug_log, "plus "); if (r->reganch & ROPT_IMPLICIT) PerlIO_printf(Perl_debug_log, "implicit "); - if (r->regmust != NULL) - PerlIO_printf(Perl_debug_log, "must have \"%s\" back %ld ", SvPVX(r->regmust), - (long) r->regback); PerlIO_printf(Perl_debug_log, "minlen %ld ", (long) r->minlen); PerlIO_printf(Perl_debug_log, "\n"); } @@ -1594,7 +2140,7 @@ regexp *r; void regprop(sv, op) SV *sv; -char *op; +regnode *op; { register char *p = 0; @@ -1631,17 +2177,20 @@ char *op; p = "BRANCH"; break; case EXACT: - p = "EXACT"; + sv_catpvf(sv, "EXACT <%s>", OPERAND(op) + 1); break; case EXACTF: - p = "EXACTF"; + sv_catpvf(sv, "EXACTF <%s>", OPERAND(op) + 1); break; case EXACTFL: - p = "EXACTFL"; + sv_catpvf(sv, "EXACTFL <%s>", OPERAND(op) + 1); break; case NOTHING: p = "NOTHING"; break; + case TAIL: + p = "TAIL"; + break; case BACK: p = "BACK"; break; @@ -1663,23 +2212,37 @@ char *op; case CURLY: sv_catpvf(sv, "CURLY {%d,%d}", ARG1(op), ARG2(op)); break; + case CURLYN: +#ifdef REGALIGN_STRUCT + sv_catpvf(sv, "CURLYN[%d] {%d,%d}", op->flags, ARG1(op), ARG2(op)); +#else + sv_catpvf(sv, "CURLYN {%d,%d}", ARG1(op), ARG2(op)); +#endif + break; case CURLYX: sv_catpvf(sv, "CURLYX {%d,%d}", ARG1(op), ARG2(op)); break; + case CURLYM: +#ifdef REGALIGN_STRUCT + sv_catpvf(sv, "CURLYM[%d] {%d,%d}", op->flags, ARG1(op), ARG2(op)); +#else + sv_catpvf(sv, "CURLYM {%d,%d}", ARG1(op), ARG2(op)); +#endif + break; case REF: - sv_catpvf(sv, "REF%d", ARG1(op)); + sv_catpvf(sv, "REF%d", ARG(op)); break; case REFF: - sv_catpvf(sv, "REFF%d", ARG1(op)); + sv_catpvf(sv, "REFF%d", ARG(op)); break; case REFFL: - sv_catpvf(sv, "REFFL%d", ARG1(op)); - break; + sv_catpvf(sv, "REFFL%d", ARG(op)); + break; case OPEN: - sv_catpvf(sv, "OPEN%d", ARG1(op)); + sv_catpvf(sv, "OPEN%d", ARG(op)); break; case CLOSE: - sv_catpvf(sv, "CLOSE%d", ARG1(op)); + sv_catpvf(sv, "CLOSE%d", ARG(op)); p = NULL; break; case STAR: @@ -1695,10 +2258,18 @@ char *op; p = "GPOS"; break; case UNLESSM: +#ifdef REGALIGN_STRUCT + sv_catpvf(sv, "UNLESSM[-%d]", op->flags); +#else p = "UNLESSM"; +#endif break; case IFMATCH: +#ifdef REGALIGN_STRUCT + sv_catpvf(sv, "IFMATCH[-%d]", op->flags); +#else p = "IFMATCH"; +#endif break; case SUCCEED: p = "SUCCEED"; @@ -1736,6 +2307,9 @@ char *op; case NSPACEL: p = "NSPACEL"; break; + case EVAL: + p = "EVAL"; + break; default: FAIL("corrupted regexp opcode"); } @@ -1750,21 +2324,30 @@ struct regexp *r; { if (!r) return; - if (r->precomp) { + if (r->precomp) Safefree(r->precomp); - r->precomp = Nullch; - } - if (r->subbase) { + if (r->subbase) Safefree(r->subbase); - r->subbase = Nullch; - } - if (r->regmust) { - SvREFCNT_dec(r->regmust); - r->regmust = Nullsv; - } - if (r->regstart) { - SvREFCNT_dec(r->regstart); - r->regstart = Nullsv; + if (r->anchored_substr) + SvREFCNT_dec(r->anchored_substr); + if (r->float_substr) + SvREFCNT_dec(r->float_substr); + if (r->data) { + int n = r->data->count; + while (--n >= 0) { + switch (r->data->what[n]) { + case 's': + SvREFCNT_dec((SV*)r->data->data[n]); + break; + case 'o': + op_free((OP*)r->data->data[n]); + break; + case 'n': + break; + default: + croak("panic: regfree data code '%c'", r->data->what[n]); + } + } } Safefree(r->startp); Safefree(r->endp); diff -pru perl5.004_01.os2/regcomp.h perl5.004_01.my/regcomp.h --- perl5.004_01.os2/regcomp.h Fri Jun 6 16:42:20 1997 +++ perl5.004_01.my/regcomp.h Sat Jun 14 21:18:10 1997 @@ -31,6 +31,18 @@ * For instance, /[a-z].foo/ has a regmust of 'foo' and a regback of 2.] */ +/* #ifndef gould */ +/* #ifndef cray */ +/* #ifndef eta10 */ +#define REGALIGN +/* #endif */ +/* #endif */ +/* #endif */ + +#ifdef REGALIGN +# define REGALIGN_STRUCT +#endif + /* * Structure for regexp "program". This is essentially a linear encoding * of a nondeterministic finite-state machine (aka syntax charts or @@ -72,27 +84,32 @@ #define BOUNDL 21 /* no Match "" at any word boundary */ #define NBOUND 22 /* no Match "" at any word non-boundary */ #define NBOUNDL 23 /* no Match "" at any word non-boundary */ -#define REF 24 /* num Match already matched string */ -#define REFF 25 /* num Match already matched string, folded */ -#define REFFL 26 /* num Match already matched string, folded in loc. */ -#define OPEN 27 /* num Mark this point in input as start of #n. */ -#define CLOSE 28 /* num Analogous to OPEN. */ -#define MINMOD 29 /* no Next operator is not greedy. */ -#define GPOS 30 /* no Matches where last m//g left off. */ -#define IFMATCH 31 /* no Succeeds if the following matches. */ -#define UNLESSM 32 /* no Fails if the following matches. */ -#define SUCCEED 33 /* no Return from a subroutine, basically. */ -#define WHILEM 34 /* no Do curly processing and see if rest matches. */ -#define ALNUM 35 /* no Match any alphanumeric character */ -#define ALNUML 36 /* no Match any alphanumeric char in locale */ -#define NALNUM 37 /* no Match any non-alphanumeric character */ -#define NALNUML 38 /* no Match any non-alphanumeric char in locale */ -#define SPACE 39 /* no Match any whitespace character */ -#define SPACEL 40 /* no Match any whitespace char in locale */ -#define NSPACE 41 /* no Match any non-whitespace character */ -#define NSPACEL 42 /* no Match any non-whitespace char in locale */ -#define DIGIT 43 /* no Match any numeric character */ -#define NDIGIT 44 /* no Match any non-numeric character */ +#define REF 24 /* num Match some already matched string */ +#define OPEN 25 /* num Mark this point in input as start of #n. */ +#define CLOSE 26 /* num Analogous to OPEN. */ +#define MINMOD 27 /* no Next operator is not greedy. */ +#define GPOS 28 /* no Matches where last m//g left off. */ +#define IFMATCH 29 /* no Succeeds if the following matches. */ +#define UNLESSM 30 /* no Fails if the following matches. */ +#define SUCCEED 31 /* no Return from a subroutine, basically. */ +#define WHILEM 32 /* no Do curly processing and see if rest matches. */ +#define ALNUM 33 /* no Match any alphanumeric character */ +#define ALNUML 34 /* no Match any alphanumeric char in locale */ +#define NALNUM 35 /* no Match any non-alphanumeric character */ +#define NALNUML 36 /* no Match any non-alphanumeric char in locale */ +#define SPACE 37 /* no Match any whitespace character */ +#define SPACEL 38 /* no Match any whitespace char in locale */ +#define NSPACE 39 /* no Match any non-whitespace character */ +#define NSPACEL 40 /* no Match any non-whitespace char in locale */ +#define DIGIT 41 /* no Match any numeric character */ +#define NDIGIT 42 /* no Match any non-numeric character */ +#define CURLYM 43 /* no Match this medium-complex thing {n,m} times. */ +#define CURLYN 44 /* no Match next-after-this simple thing + {n,m} times, set parenths. */ +#define TAIL 45 /* no Match empty string. Can jump here from outside. */ +#define REFF 46 /* num Match already matched string, folded */ +#define REFFL 47 /* num Match already matched string, folded in loc. */ +#define EVAL 48 /* evl Execute some Perl code. */ /* * Opcode notes: @@ -117,21 +134,9 @@ */ #ifndef DOINIT -EXT char regarglen[]; -#else -EXT char regarglen[] = { - 0,0,0,0,0,0,0,0,0,0, - /*CURLY*/ 4, /*CURLYX*/ 4, - 0,0,0,0,0,0,0,0,0,0,0,0, - /*REF*/ 2, 2, 2, /*OPEN*/ 2, /*CLOSE*/ 2, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -}; -#endif - -#ifndef DOINIT -EXT char regkind[]; +EXT U8 regkind[]; #else -EXT char regkind[] = { +EXT U8 regkind[] = { END, BOL, BOL, @@ -157,8 +162,6 @@ EXT char regkind[] = { NBOUND, NBOUND, REF, - REF, - REF, OPEN, CLOSE, MINMOD, @@ -177,19 +180,26 @@ EXT char regkind[] = { NSPACE, DIGIT, NDIGIT, + CURLY, + CURLY, + NOTHING, + REF, + REF, + EVAL, }; #endif -/* The following have no fixed length. */ +/* The following have no fixed length. char* since we do strchr on it. */ #ifndef DOINIT EXT char varies[]; #else EXT char varies[] = { - BRANCH, BACK, STAR, PLUS, CURLY, CURLYX, REF, REFF, REFFL, WHILEM, 0 + BRANCH, BACK, STAR, PLUS, CURLY, CURLYX, REF, REFF, REFFL, + WHILEM, CURLYM, CURLYN, 0 }; #endif -/* The following always have a length of 1. */ +/* The following always have a length of 1. char* since we do strchr on it. */ #ifndef DOINIT EXT char simple[]; #else @@ -201,8 +211,6 @@ EXT char simple[] = { }; #endif -EXT char regdummy; - /* * A node is one char of opcode followed by two chars of "next" pointer. * "Next" pointers are stored as two 8-bit pieces, high order first. The @@ -219,42 +227,118 @@ EXT char regdummy; * stored negative.] */ -#ifndef gould -#ifndef cray -#ifndef eta10 -#define REGALIGN -#endif -#endif -#endif +#ifdef REGALIGN_STRUCT -#define OP(p) (*(p)) +struct regnode_string { + U8 flags; + U8 type; + U16 next_off; + char string[1]; +}; + +struct regnode_1 { + U8 flags; + U8 type; + U16 next_off; + U32 arg1; +}; + +struct regnode_2 { + U8 flags; + U8 type; + U16 next_off; + U16 arg1; + U16 arg2; +}; + +#endif + +#define REG_INFTY I16_MAX -#ifndef lint #ifdef REGALIGN -#define NEXT(p) (*(short*)(p+1)) -#define ARG1(p) (*(unsigned short*)(p+3)) -#define ARG2(p) (*(unsigned short*)(p+5)) -#else -#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377)) -#define ARG1(p) (((*((p)+3)&0377)<<8) + (*((p)+4)&0377)) -#define ARG2(p) (((*((p)+5)&0377)<<8) + (*((p)+6)&0377)) +#define ARG_VALUE(arg) (arg) +#define ARG__SET(arg,val) ((arg) = (val)) +#else +#define ARG_VALUE(arg) (((*((char*)&arg)&0377)<<8) + (*(((char*)&arg)+1)&0377)) +#define ARG__SET(arg,val) (((char*)&arg)[0] = (val) >> 8; ((char*)&arg)[1] = (val) & 0377;) #endif + +#define ARG(p) ARG_VALUE(ARG_LOC(p)) +#define ARG1(p) ARG_VALUE(ARG1_LOC(p)) +#define ARG2(p) ARG_VALUE(ARG2_LOC(p)) +#define ARG_SET(p, val) ARG__SET(ARG_LOC(p), (val)) +#define ARG1_SET(p, val) ARG__SET(ARG1_LOC(p), (val)) +#define ARG2_SET(p, val) ARG__SET(ARG2_LOC(p), (val)) + +#ifndef lint +# ifdef REGALIGN +# ifdef REGALIGN_STRUCT +# define NEXT_OFF(p) ((p)->next_off) +# define NODE_ALIGN(node) +# define NODE_ALIGN_FILL(node) ((node)->flags = 0xde) /* deadbeef */ +# else +# define NEXT_OFF(p) (*(short*)(p+1)) +# define NODE_ALIGN(node) ((!((long)node & 1)) ? node++ : 0) +# define NODE_ALIGN_FILL(node) ((!((long)node & 1)) ? *node++ = 127 : 0) +# endif +# else +# define NEXT_OFF(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377)) +# define NODE_ALIGN(node) +# define NODE_ALIGN_FILL(node) +# endif #else /* lint */ -#define NEXT(p) 0 +# define NEXT_OFF(p) 0 +# define NODE_ALIGN(node) +# define NODE_ALIGN_FILL(node) #endif /* lint */ -#define OPERAND(p) ((p) + 3) +#define SIZE_ALIGN NODE_ALIGN + +#ifdef REGALIGN_STRUCT +# define OP(p) ((p)->type) +# define OPERAND(p) (((struct regnode_string *)p)->string) +# define NODE_ALIGN(node) +# define ARG_LOC(p) (((struct regnode_1 *)p)->arg1) +# define ARG1_LOC(p) (((struct regnode_2 *)p)->arg1) +# define ARG2_LOC(p) (((struct regnode_2 *)p)->arg2) +# define NODE_STEP_REGNODE 1 /* sizeof(regnode)/sizeof(regnode) */ +# define EXTRA_STEP_2ARGS EXTRA_SIZE(struct regnode_2) +#else +# define OP(p) (*(p)) +# define OPERAND(p) ((p) + 3) +# define ARG_LOC(p) (*(unsigned short*)(p+3)) +# define ARG1_LOC(p) (*(unsigned short*)(p+3)) +# define ARG2_LOC(p) (*(unsigned short*)(p+5)) +typedef char* regnode; +# define NODE_STEP_REGNODE NODE_STEP_B +# define EXTRA_STEP_2ARGS 4 +#endif #ifdef REGALIGN -#define NEXTOPER(p) ((p) + 4) -#define PREVOPER(p) ((p) - 4) +# define NODE_STEP_B 4 #else -#define NEXTOPER(p) ((p) + 3) -#define PREVOPER(p) ((p) - 3) +# define NODE_STEP_B 3 +#endif + +#define NEXTOPER(p) ((p) + NODE_STEP_REGNODE) +#define PREVOPER(p) ((p) - NODE_STEP_REGNODE) + +#ifdef REGALIGN_STRUCT +# define FILL_ADVANCE_NODE(ptr, op) STMT_START { \ + (ptr)->type = op; (ptr)->next_off = 0; (ptr)++; } STMT_END +# define FILL_ADVANCE_NODE_ARG(ptr, op, arg) STMT_START { \ + ARG_SET(ptr, arg); FILL_ADVANCE_NODE(ptr, op); (ptr) += 1; } STMT_END +#else +# define FILL_ADVANCE_NODE(ptr, op) STMT_START { \ + *(ptr)++ = op; *(ptr)++ = '\0'; *(ptr)++ = '\0'; } STMT_END +# define FILL_ADVANCE_NODE_ARG(ptr, op, arg) STMT_START { \ + ARG_SET(ptr, arg); FILL_ADVANCE_NODE(ptr, op); (ptr) += 2; } STMT_END #endif #define MAGIC 0234 +#define SIZE_ONLY (regcode == ®dummy) + /* Flags for first parameter byte of ANYOF */ #define ANYOF_INVERT 0x40 #define ANYOF_FOLD 0x20 @@ -265,6 +349,13 @@ EXT char regdummy; #define ANYOF_SPACEL 0x02 #define ANYOF_NSPACEL 0x01 +#ifdef REGALIGN_STRUCT +#define ANY_SKIP ((33 - 1)/sizeof(regnode) + 1) +#else +#define ANY_SKIP 32 /* overwrite the first byte of + * the next guy. */ +#endif + /* * Utility definitions. */ @@ -279,3 +370,46 @@ EXT char regdummy; #endif /* lint */ #define FAIL(m) croak("/%.127s/: %s",regprecomp,m) + +EXT regnode regdummy; + +#define EXTRA_SIZE(guy) ((sizeof(guy)-1)/sizeof(struct regnode)) + +#ifndef DOINIT +EXT U8 regarglen[]; +#else +# ifdef REGALIGN_STRUCT +EXT U8 regarglen[] = { + 0,0,0,0,0,0,0,0,0,0, + /*CURLY*/ EXTRA_SIZE(struct regnode_2), + /*CURLYX*/ EXTRA_SIZE(struct regnode_2), + 0,0,0,0,0,0,0,0,0,0,0,0, + /*REF*/ EXTRA_SIZE(struct regnode_1), + /*OPEN*/ EXTRA_SIZE(struct regnode_1), + /*CLOSE*/ EXTRA_SIZE(struct regnode_1), + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + /*CURLYM*/ EXTRA_SIZE(struct regnode_2), + /*CURLYN*/ EXTRA_SIZE(struct regnode_2), + 0, + /*REFF*/ EXTRA_SIZE(struct regnode_1), + /*REFFL*/ EXTRA_SIZE(struct regnode_1), + /*EVAL*/ EXTRA_SIZE(struct regnode_1), +}; +# else +EXT U8 regarglen[] = { + 0,0,0,0,0,0,0,0,0,0, + /*CURLY*/ 4, /*CURLYX*/ 4, + 0,0,0,0,0,0,0,0,0,0,0,0, + /*REF*/ 2, /*OPEN*/ 2, /*CLOSE*/ 2, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,/*CURLYM*/ 4,/*CURLYN*/ 4, + 0, /*REFF*/ 2, /*REFFL*/ 2, /*EVAL*/2 +}; +# endif +#endif + +struct reg_data { + U32 count; + U8 *what; + void* data[1]; +}; + --- ../perl5.004_01.os2/regexec.c Fri Jun 6 16:42:22 1997 +++ ./regexec.c Mon Jun 23 01:44:10 1997 @@ -60,8 +60,8 @@ #endif #ifdef DEBUGGING -static I32 regnarrate = 0; -static char* regprogram = 0; +static I32 regnarrate = 0; +static regnode* regprogram = 0; #endif /* Current curly descriptor */ @@ -72,13 +72,16 @@ struct curcur { int min; /* the minimal number of scans to match */ int max; /* the maximal number of scans to match */ int minmod; /* whether to work our way up or down */ - char * scan; /* the thing to match */ - char * next; /* what has to match after it */ + regnode * scan; /* the thing to match */ + regnode * next; /* what has to match after it */ char * lastloc; /* where we started matching this scan */ CURCUR * oldcc; /* current curly before we started this one */ }; static CURCUR* regcc; +static char ** reg_start_tmp; +static U32 reg_start_tmpl; +static struct reg_data *data; typedef I32 CHECKPOINT; @@ -90,13 +93,14 @@ regcppush(parenfloor) I32 parenfloor; { int retval = savestack_ix; - int i = (regsize - parenfloor) * 3; + int i = (regsize - parenfloor) * 4; int p; SSCHECK(i + 5); for (p = regsize; p > parenfloor; p--) { SSPUSHPTR(regendp[p]); SSPUSHPTR(regstartp[p]); + SSPUSHINT(reg_start_tmp[p]); SSPUSHINT(p); } SSPUSHINT(regsize); @@ -119,13 +123,29 @@ regcppop() input = (char *) SSPOPPTR; *reglastparen = SSPOPINT; regsize = SSPOPINT; - for (i -= 3; i > 0; i -= 3) { + for (i -= 3; i > 0; i -= 4) { paren = (U32)SSPOPINT; + reg_start_tmp[paren] = (char *) SSPOPPTR; regstartp[paren] = (char *) SSPOPPTR; tmps = (char*)SSPOPPTR; if (paren <= *reglastparen) regendp[paren] = tmps; +#ifdef DEBUGGING + if (debug & 512) { + PerlIO_printf(Perl_debug_log, " restoring \\%d to %d(%d)..%d%s\n", + paren, regstartp[paren] - regbol, + reg_start_tmp[paren] - regbol, + regendp[paren] - regbol, + (paren > *reglastparen ? "(no)" : "")); + } +#endif } +#ifdef DEBUGGING + if (debug & 512 && *reglastparen + 1 <= regnpar) { + PerlIO_printf(Perl_debug_log, " restoring \\%d..\\%d to undef\n", + *reglastparen + 1, regnpar); + } +#endif for (paren = *reglastparen + 1; paren <= regnpar; paren++) { if (paren > regsize) regstartp[paren] = Nullch; @@ -144,37 +164,46 @@ regcppop() * Forwards. */ -static I32 regmatch _((char *prog)); -static I32 regrepeat _((char *p, I32 max)); +static I32 regmatch _((regnode *prog)); +static I32 regrepeat _((regnode *p, I32 max)); +static I32 regrepeat_hard _((regnode *p, I32 max, I32 *lp)); static I32 regtry _((regexp *prog, char *startpos)); static bool reginclass _((char *p, I32 c)); static bool regtainted; /* tainted information used? */ +static bool reg_warned; /* warned about big count? */ /* - pregexec - match a regexp against a string */ I32 -pregexec(prog, stringarg, strend, strbeg, minend, screamer, safebase) +pregexec(prog, stringarg, strend, strbeg, minend, screamer, data, flags) register regexp *prog; char *stringarg; register char *strend; /* pointer to null at end of string */ -char *strbeg; /* real beginning of string */ -I32 minend; /* end of match must be at least minend after stringarg */ +char *strbeg; /* real beginning of string */ +I32 minend; /* end of match must be >=minend after stringarg. */ SV *screamer; -I32 safebase; /* no need to remember string in subbase */ +void *data; /* May be used for some additional optimizations. */ +U32 flags; /* For optimizations. */ { register char *s; - register char *c; + register regnode *c; register char *startpos = stringarg; register I32 tmp; - I32 minlen = 0; /* must match at least this many chars */ + I32 minlen; /* must match at least this many chars */ I32 dontbother = 0; /* how many characters not to try at end */ CURCUR cc; + I32 start_shift = 0; /* Offset of the start to find + constant substr. */ + I32 end_shift = 0; /* Same for the end. */ + I32 scream_pos = -1; /* Internal iterator of scream. */ + char *scream_olds; cc.cur = 0; cc.oldcc = 0; regcc = &cc; + reg_warned = 0; #ifdef DEBUGGING regnarrate = debug & 512; @@ -187,6 +216,9 @@ I32 safebase; /* no need to remember str return 0; } + minlen = prog->minlen; + if (strend - startpos < minlen) goto phooey; + if (startpos == strbeg) /* is ^ valid at stringarg? */ regprev = '\n'; else { @@ -206,39 +238,42 @@ I32 safebase; /* no need to remember str /* If there is a "must appear" string, look for it. */ s = startpos; - if (prog->regmust != Nullsv && + if (!(flags & REXEC_CHECKED) + && prog->check_substr != Nullsv && !(prog->reganch & ROPT_ANCH_GPOS) && (!(prog->reganch & ROPT_ANCH_BOL) - || (multiline && prog->regback >= 0)) ) + || (multiline && prog->check_substr == prog->anchored_substr)) ) { - if (stringarg == strbeg && screamer) { - if (screamfirst[BmRARE(prog->regmust)] >= 0) - s = screaminstr(screamer,prog->regmust); + start_shift = prog->check_offset_min; + /* Should be nonnegative! */ + end_shift = minlen - start_shift - SvCUR(prog->check_substr); + if (screamer) { + if (screamfirst[BmRARE(prog->check_substr)] >= 0) + s = screaminstr(screamer, prog->check_substr, + start_shift + (stringarg - strbeg), + end_shift, &scream_pos, 0); else s = Nullch; + scream_olds = s; } else - s = fbm_instr((unsigned char*)s, (unsigned char*)strend, - prog->regmust); + s = fbm_instr((unsigned char*)s + start_shift, + (unsigned char*)strend - end_shift, + prog->check_substr); if (!s) { - ++BmUSEFUL(prog->regmust); /* hooray */ + ++BmUSEFUL(prog->check_substr); /* hooray */ goto phooey; /* not present */ - } - else if (prog->regback >= 0) { - s -= prog->regback; - if (s < startpos) - s = startpos; - minlen = prog->regback + SvCUR(prog->regmust); - } - else if (!prog->naughty && --BmUSEFUL(prog->regmust) < 0) { /* boo */ - SvREFCNT_dec(prog->regmust); - prog->regmust = Nullsv; /* disable regmust */ - s = startpos; - } - else { + } else if ((s - stringarg) > prog->check_offset_max) { + ++BmUSEFUL(prog->check_substr); /* hooray/2 */ + s -= prog->check_offset_max; + } else if (!prog->naughty + && --BmUSEFUL(prog->check_substr) < 0 + && prog->check_substr == prog->float_substr) { /* boo */ + SvREFCNT_dec(prog->check_substr); + prog->check_substr = Nullsv; /* disable */ + prog->float_substr = Nullsv; /* clear */ s = startpos; - minlen = SvCUR(prog->regmust); - } + } else s = startpos; } /* Mark beginning of line for ^ . */ @@ -275,45 +310,69 @@ I32 safebase; /* no need to remember str } /* Messy cases: unanchored match. */ - if (prog->regstart) { - if (prog->reganch & ROPT_SKIP) { /* we have /x+whatever/ */ - /* it must be a one character string */ - char ch = SvPVX(prog->regstart)[0]; - while (s < strend) { - if (*s == ch) { - if (regtry(prog, s)) - goto got_it; - s++; - while (s < strend && *s == ch) - s++; - } + if (prog->anchored_substr && prog->reganch & ROPT_SKIP) { + /* we have /x+whatever/ */ + /* it must be a one character string */ + char ch = SvPVX(prog->anchored_substr)[0]; + while (s < strend) { + if (*s == ch) { + if (regtry(prog, s)) goto got_it; s++; + while (s < strend && *s == ch) + s++; } + s++; } - else if (SvTYPE(prog->regstart) == SVt_PVBM) { - /* We know what string it must start with. */ - while ((s = fbm_instr((unsigned char*)s, - (unsigned char*)strend, prog->regstart)) != NULL) - { - if (regtry(prog, s)) - goto got_it; - s++; + } + /*SUPPRESS 560*/ + else if (prog->anchored_substr != Nullsv + || (prog->float_substr != Nullsv + && prog->float_max_offset < strend - s)) { + SV *must = prog->anchored_substr + ? prog->anchored_substr : prog->float_substr; + I32 back_max = + prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset; + I32 back_min = + prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset; + I32 delta = back_max - back_min; + char *last, *last1; + + /* XXXX check_substr already used to find `s', can optimize if + check_substr==must. */ +/* if (regtry(prog, s)) + goto got_it; + s++; */ + scream_pos = -1; + last1 = s - 1 - delta; + /* s += back_min; */ + last = strend - SvCUR(must) - back_min; + dontbother = end_shift; + strend -= dontbother; + while ( (s <= last) && + (screamer + ? (s = screaminstr(screamer, must, s + back_min - strbeg, + end_shift, &scream_pos, 0)) + : (s = fbm_instr((unsigned char*)s + back_min, + (unsigned char*)strend, must))) ) { + if (s - back_max > last1) { + last1 = s - back_min; + s = s - back_max; + } else { + char *t = last1 + 1; + + last1 = s - back_min; + s = t; } - } - else { /* Optimized fbm_instr: */ - c = SvPVX(prog->regstart); - while ((s = ninstr(s, strend, c, c + SvCUR(prog->regstart))) != NULL) - { + while (s <= last1) { if (regtry(prog, s)) goto got_it; s++; } } goto phooey; - } - /*SUPPRESS 560*/ - if (c = prog->regstclass) { + } else if (c = prog->regstclass) { I32 doevery = (prog->reganch & ROPT_SKIP) == 0; + char *class; if (minlen) dontbother = minlen - 1; @@ -322,9 +381,9 @@ I32 safebase; /* no need to remember str /* We know what class it must start with. */ switch (OP(c)) { case ANYOF: - c = OPERAND(c); + class = OPERAND(c); while (s < strend) { - if (reginclass(c, *s)) { + if (reginclass(class, *s)) { if (tmp && regtry(prog, s)) goto got_it; else @@ -509,7 +568,26 @@ I32 safebase; /* no need to remember str } } else { - if (minlen) + dontbother = 0; + if (prog->float_substr != Nullsv) { /* Trim the end. */ + char *last; + I32 oldpos = scream_pos; + + if (screamer) { + last = screaminstr(screamer, prog->float_substr, s - strbeg, + end_shift, &scream_pos, 1); /* last one */ + if (!last) { + last = scream_olds; /* Only one occurence. */ + } + } else { + STRLEN len; + char *little = SvPV(prog->float_substr, len); + last = rninstr(s, strend, little, little + len); + } + if (last == NULL) goto phooey; /* Should not happen! */ + dontbother = strend - last - 1; + } + if (minlen && (dontbother < minlen)) dontbother = minlen - 1; strend -= dontbother; /* We don't know much -- general case. */ @@ -529,8 +607,8 @@ got_it: prog->exec_tainted = regtainted; /* make sure $`, $&, $', and $digit will work later */ - if (strbeg != prog->subbase) { - if (safebase) { + if (strbeg != prog->subbase) { /* second+ //g match. */ + if (!(flags & REXEC_COPY)) { if (prog->subbase) { Safefree(prog->subbase); prog->subbase = Nullch; @@ -576,9 +654,17 @@ char *startpos; reglastparen = &prog->lastparen; prog->lastparen = 0; regsize = 0; + if (reg_start_tmpl <= prog->nparens) { + reg_start_tmpl = prog->nparens*3/2 + 3; + if(reg_start_tmp) + Renew(reg_start_tmp, reg_start_tmpl, char*); + else + New(22,reg_start_tmp, reg_start_tmpl, char*); + } sp = prog->startp; ep = prog->endp; + data = prog->data; if (prog->nparens) { for (i = prog->nparens; i >= 0; i--) { *sp++ = NULL; @@ -610,16 +696,16 @@ char *startpos; */ static I32 /* 0 failure, 1 success */ regmatch(prog) -char *prog; +regnode *prog; { - register char *scan; /* Current node. */ - char *next; /* Next node. */ + register regnode *scan; /* Current node. */ + regnode *next; /* Next node. */ register I32 nextchar; register I32 n; /* no or next */ register I32 ln; /* len or last */ register char *s; /* operand or save */ register char *locinput = reginput; - register I32 c1, c2; /* case fold search */ + register I32 c1, c2, paren; /* case fold search, parenth */ int minmod = 0; #ifdef DEBUGGING static int regindent = 0; @@ -636,9 +722,9 @@ char *prog; if (regnarrate) { SV *prop = sv_newmortal(); regprop(prop, scan); - PerlIO_printf(Perl_debug_log, "%*s%2d%-8.8s\t<%.10s>\n", + PerlIO_printf(Perl_debug_log, "%*s%2d%-20.20s\t<%.10s>\t %4i\n", regindent*2, "", scan - regprogram, - SvPVX(prop), locinput); + SvPVX(prop), locinput, locinput - regbol); } #else #define sayYES return 1 @@ -647,7 +733,7 @@ char *prog; #endif #ifdef REGALIGN - next = scan + NEXT(scan); + next = scan + NEXT_OFF(scan); if (next == scan) next = NULL; #else @@ -828,12 +914,12 @@ char *prog; case REFFL: regtainted = TRUE; /* FALL THROUGH */ - case REF: + case REF: case REFF: - n = ARG1(scan); /* which paren pair */ + n = ARG(scan); /* which paren pair */ s = regstartp[n]; if (!s) - sayNO; + break; /* Zero length always matches */ if (!regendp[n]) sayNO; if (s == regendp[n]) @@ -842,7 +928,7 @@ char *prog; if (UCHARAT(s) != nextchar && (OP(scan) == REF || (UCHARAT(s) != ((OP(scan) == REFF - ? fold : fold_locale)[nextchar])))) + ? fold : fold_locale)[nextchar])))) sayNO; ln = regendp[n] - s; if (locinput + ln > regeol) @@ -858,17 +944,33 @@ char *prog; break; case NOTHING: + case TAIL: break; case BACK: break; + case EVAL: + { + OP *oop = op; + SV **ocurpad = curpad; + + n = ARG(scan); + op = (OP*)data->data[n]; + curpad = (SV**)data->data[n + 1]; + + runops(); + op = oop; + curpad = ocurpad; + break; + } case OPEN: - n = ARG1(scan); /* which paren pair */ - regstartp[n] = locinput; + n = ARG(scan); /* which paren pair */ + reg_start_tmp[n] = locinput; if (n > regsize) regsize = n; break; case CLOSE: - n = ARG1(scan); /* which paren pair */ + n = ARG(scan); /* which paren pair */ + regstartp[n] = reg_start_tmp[n]; regendp[n] = locinput; if (n > *reglastparen) *reglastparen = n; @@ -882,7 +984,7 @@ char *prog; cc.cur = -1; cc.min = ARG1(scan); cc.max = ARG2(scan); - cc.scan = NEXTOPER(scan) + 4; + cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS; cc.next = next; cc.minmod = minmod; cc.lastloc = 0; @@ -910,8 +1012,8 @@ char *prog; #ifdef DEBUGGING if (regnarrate) - PerlIO_printf(Perl_debug_log, "%*s %ld %lx\n", regindent*2, "", - (long)n, (long)cc); + PerlIO_printf(Perl_debug_log, "%*s %ld out of %ld..%ld cc=%lx\n", regindent*2, "", + (long)n, (long)cc->min, (long)cc->max, (long)cc); #endif /* If degenerate scan matches "", assume scan done. */ @@ -919,6 +1021,10 @@ char *prog; if (locinput == cc->lastloc && n >= cc->min) { regcc = cc->oldcc; ln = regcc->cur; +#ifdef DEBUGGING + if (regnarrate) + PerlIO_printf(Perl_debug_log, "%*s empty match detected, bailing out\n", regindent*2, ""); +#endif if (regmatch(cc->next)) sayYES; regcc->cur = ln; @@ -934,6 +1040,10 @@ char *prog; if (regmatch(cc->scan)) sayYES; cc->cur = n - 1; +#ifdef DEBUGGING + if (regnarrate) + PerlIO_printf(Perl_debug_log, "%*s failed...\n", regindent*2, ""); +#endif sayNO; } @@ -951,9 +1061,16 @@ char *prog; regcc->cur = ln; regcc = cc; - if (n >= cc->max) /* Maximum greed exceeded? */ + if (n >= cc->max) { /* Maximum greed exceeded? */ + if (dowarn && n >= REG_INFTY && !reg_warned++) + warn("count exceeded %d", REG_INFTY - 1); sayNO; + } +#ifdef DEBUGGING + if (regnarrate) + PerlIO_printf(Perl_debug_log, "%*s trying longer...\n", regindent*2, ""); +#endif /* Try scanning more and see if it helps. */ reginput = locinput; cc->cur = n; @@ -963,6 +1080,10 @@ char *prog; regcpblow(cp); sayYES; } +#ifdef DEBUGGING + if (regnarrate) + PerlIO_printf(Perl_debug_log, "%*s failed...\n", regindent*2, ""); +#endif regcppop(); cc->cur = n - 1; sayNO; @@ -980,13 +1101,23 @@ char *prog; } regcppop(); /* Restore some previous $s? */ reginput = locinput; +#ifdef DEBUGGING + if (regnarrate) + PerlIO_printf(Perl_debug_log, "%*s failed, try continuation...\n", regindent*2, ""); +#endif } + if (dowarn && n >= REG_INFTY && !reg_warned++) + warn("count exceeded %d", REG_INFTY - 1); /* Failed deeper matches of scan, so see if this one works. */ regcc = cc->oldcc; ln = regcc->cur; if (regmatch(cc->next)) sayYES; +#ifdef DEBUGGING + if (regnarrate) + PerlIO_printf(Perl_debug_log, "%*s failed...\n", regindent*2, ""); +#endif regcc->cur = ln; regcc = cc; cc->cur = n - 1; @@ -1008,7 +1139,7 @@ char *prog; #ifdef REGALIGN /*SUPPRESS 560*/ - if (n = NEXT(scan)) + if (n = NEXT_OFF(scan)) scan += n; else scan = NULL; @@ -1024,25 +1155,152 @@ char *prog; case MINMOD: minmod = 1; break; + case CURLYM: + { + I32 l; + + /* We suppose that the next guy does not need + backtracking: in particular, it is of constant length, + and has no parenths to influence future backrefs. */ + ln = ARG1(scan); /* min to match */ + n = ARG2(scan); /* max to match */ +#ifdef REGALIGN_STRUCT + paren = scan->flags; + if (paren) { + if (paren > regsize) + regsize = paren; + if (paren > *reglastparen) + *reglastparen = paren; + } +#endif + scan = NEXTOPER(scan) + 4/sizeof(regnode); + if (paren) + scan = regnext(scan); /* Skip former OPEN. */ + reginput = locinput; + if (minmod) { + minmod = 0; + if (ln && regrepeat_hard(scan, ln, &l) < ln) + sayNO; + if (l == 0 && n >= ln) + n = ln; + locinput = reginput; + if (regkind[(U8)OP(next)] == EXACT) { + c1 = UCHARAT(OPERAND(next) + 1); + if (OP(next) == EXACTF) + c2 = fold[c1]; + else if (OP(next) == EXACTFL) + c2 = fold_locale[c1]; + else + c2 = c1; + } else + c1 = c2 = -1000; + while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */ + /* If it could work, try it. */ + if (c1 == -1000 || + UCHARAT(reginput) == c1 || + UCHARAT(reginput) == c2) + { + if (paren) { + if (n) { + regstartp[paren] = reginput - l; + regendp[paren] = reginput; + } else + regendp[paren] = NULL; + } + if (regmatch(next)) + sayYES; + } + /* Couldn't or didn't -- move forward. */ + reginput = locinput; + if (regrepeat_hard(scan, 1, &l)) { + ln++; + locinput = reginput; + } + else + sayNO; + } + } else { + n = regrepeat_hard(scan, n, &l); + if (n != 0 && l == 0) + n = ln; + locinput = reginput; +#ifdef DEBUGGING + if (regnarrate) + PerlIO_printf(Perl_debug_log, "%*s matched %ld times, len=%ld...\n", regindent*2, "", n, l); +#endif + if (n >= ln) { + if (regkind[(U8)OP(next)] == EXACT) { + c1 = UCHARAT(OPERAND(next) + 1); + if (OP(next) == EXACTF) + c2 = fold[c1]; + else if (OP(next) == EXACTFL) + c2 = fold_locale[c1]; + else + c2 = c1; + } else + c1 = c2 = -1000; + } + while (n >= ln) { + /* If it could work, try it. */ + if (c1 == -1000 || + UCHARAT(reginput) == c1 || + UCHARAT(reginput) == c2) + { +#ifdef DEBUGGING + if (regnarrate) + PerlIO_printf(Perl_debug_log, "%*s trying tail with n=%ld...\n", regindent*2, "", n); +#endif + if (paren) { + if (n) { + regstartp[paren] = reginput - l; + regendp[paren] = reginput; + } else + regendp[paren] = NULL; + } + if (regmatch(next)) + sayYES; + } + /* Couldn't or didn't -- back up. */ + n--; + locinput -= l; + reginput = locinput; + } + } + sayNO; + break; + } + case CURLYN: + paren = scan->flags; /* Which paren to set */ + if (paren > regsize) + regsize = paren; + if (paren > *reglastparen) + *reglastparen = paren; + ln = ARG1(scan); /* min to match */ + n = ARG2(scan); /* max to match */ + scan = regnext(NEXTOPER(scan) + 4/sizeof(regnode)); + goto repeat; case CURLY: + paren = 0; ln = ARG1(scan); /* min to match */ n = ARG2(scan); /* max to match */ - scan = NEXTOPER(scan) + 4; + scan = NEXTOPER(scan) + 4/sizeof(regnode); goto repeat; case STAR: ln = 0; - n = 32767; + n = REG_INFTY; scan = NEXTOPER(scan); + paren = 0; goto repeat; case PLUS: + ln = 1; + n = REG_INFTY; + scan = NEXTOPER(scan); + paren = 0; + repeat: /* * Lookahead to avoid useless match attempts * when we know what character comes next. */ - ln = 1; - n = 32767; - scan = NEXTOPER(scan); - repeat: if (regkind[(U8)OP(next)] == EXACT) { c1 = UCHARAT(OPERAND(next) + 1); if (OP(next) == EXACTF) @@ -1059,22 +1317,28 @@ char *prog; minmod = 0; if (ln && regrepeat(scan, ln) < ln) sayNO; - while (n >= ln || (n == 32767 && ln > 0)) { /* ln overflow ? */ + while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */ /* If it could work, try it. */ if (c1 == -1000 || UCHARAT(reginput) == c1 || UCHARAT(reginput) == c2) { + if (paren) { + if (n) { + regstartp[paren] = reginput - 1; + regendp[paren] = reginput; + } else + regendp[paren] = NULL; + } if (regmatch(next)) sayYES; } - /* Couldn't or didn't -- back up. */ + /* Couldn't or didn't -- move forward. */ reginput = locinput + ln; if (regrepeat(scan, 1)) { ln++; reginput = locinput + ln; - } - else + } else sayNO; } } @@ -1083,33 +1347,61 @@ char *prog; if (ln < n && regkind[(U8)OP(next)] == EOL && (!multiline || OP(next) == SEOL)) ln = n; /* why back off? */ - while (n >= ln) { - /* If it could work, try it. */ - if (c1 == -1000 || - UCHARAT(reginput) == c1 || - UCHARAT(reginput) == c2) - { - if (regmatch(next)) - sayYES; + if (paren) { + while (n >= ln) { + /* If it could work, try it. */ + if (c1 == -1000 || + UCHARAT(reginput) == c1 || + UCHARAT(reginput) == c2) + { + if (paren && n) { + if (n) { + regstartp[paren] = reginput - 1; + regendp[paren] = reginput; + } else + regendp[paren] = NULL; + } + if (regmatch(next)) + sayYES; + } + /* Couldn't or didn't -- back up. */ + n--; + reginput = locinput + n; + } + } else { + while (n >= ln) { + /* If it could work, try it. */ + if (c1 == -1000 || + UCHARAT(reginput) == c1 || + UCHARAT(reginput) == c2) + { + if (regmatch(next)) + sayYES; + } + /* Couldn't or didn't -- back up. */ + n--; + reginput = locinput + n; } - /* Couldn't or didn't -- back up. */ - n--; - reginput = locinput + n; } } sayNO; + break; case SUCCEED: case END: reginput = locinput; /* put where regtry can find it */ sayYES; /* Success! */ case IFMATCH: - reginput = locinput; + if (locinput < regbol + scan->flags) + sayNO; + reginput = locinput - scan->flags; scan = NEXTOPER(scan); if (!regmatch(scan)) sayNO; break; case UNLESSM: - reginput = locinput; + if (locinput < regbol + scan->flags) + break; + reginput = locinput - scan->flags; scan = NEXTOPER(scan); if (regmatch(scan)) sayNO; @@ -1153,7 +1445,7 @@ no: */ static I32 regrepeat(p, max) -char *p; +regnode *p; I32 max; { register char *scan; @@ -1162,7 +1454,7 @@ I32 max; register char *loceol = regeol; scan = reginput; - if (max != 32767 && max < loceol - scan) + if (max != REG_INFTY && max < loceol - scan) loceol = scan + max; opnd = OPERAND(p); switch (OP(p)) { @@ -1250,6 +1542,42 @@ I32 max; } /* + - regrepeat_hard - repeatedly match something, report total lenth and length + * + * The repeater is supposed to have constant length. + */ + +static I32 +regrepeat_hard(p, max, lp) +regnode *p; +I32 max; +I32 *lp; +{ + register char *scan; + register char *start; + register char *loceol = regeol; + I32 l = -1; + + start = reginput; + while (reginput < loceol && (scan = reginput, regmatch(p))) { + if (l == -1) { + *lp = l = reginput - start; + if (max != REG_INFTY && l*max < loceol - scan) + loceol = scan + l*max; + if (l == 0) { + return max; + } + } + } + if (reginput < loceol) + reginput = scan; + else + scan = reginput; + + return (scan - start)/l; +} + +/* - regclass - determine if a character falls into a character class */ @@ -1297,16 +1625,16 @@ register I32 c; * [Note, when REGALIGN is defined there are two places in regmatch() * that bypass this code for speed.] */ -char * +regnode * regnext(p) -register char *p; +register regnode *p; { register I32 offset; if (p == ®dummy) return(NULL); - offset = NEXT(p); + offset = NEXT_OFF(p); if (offset == 0) return(NULL); diff -pru perl5.004_01.os2/regexp.h perl5.004_01.my/regexp.h --- perl5.004_01.os2/regexp.h Fri Jan 17 10:05:20 1997 +++ perl5.004_01.my/regexp.h Sat Jun 14 21:11:54 1997 @@ -9,13 +9,18 @@ */ +struct regnode { + U8 flags; + U8 type; + U16 next_off; +}; + +typedef struct regnode regnode; + typedef struct regexp { char **startp; char **endp; - SV *regstart; /* Internal use only. */ - char *regstclass; - SV *regmust; /* Internal use only. */ - I32 regback; /* Can regmust locate first try? */ + regnode *regstclass; I32 minlen; /* mininum possible length of $& */ I32 prelen; /* length of precomp */ U32 nparens; /* number of parentheses */ @@ -27,11 +32,26 @@ typedef struct regexp { U16 naughty; /* how exponential is this pattern? */ char reganch; /* Internal use only. */ char exec_tainted; /* Tainted information used by regexec? */ - char program[1]; /* Unwarranted chumminess with compiler. */ + SV *anchored_substr; /* Substring at fixed position wrt start. */ + I32 anchored_offset; /* Position of it. */ + SV *float_substr; /* Substring at variable position wrt start. */ + I32 float_min_offset; /* Minimal position of it. */ + I32 float_max_offset; /* Maximal position of it. */ + SV *check_substr; /* Substring to check before matching. */ + I32 check_offset_min; /* Offset of the above. */ + I32 check_offset_max; /* Offset of the above. */ + struct reg_data *data; /* Additional data. */ + regnode program[1]; /* Unwarranted chumminess with compiler. */ } regexp; -#define ROPT_ANCH 3 -#define ROPT_ANCH_BOL 1 -#define ROPT_ANCH_GPOS 2 -#define ROPT_SKIP 4 -#define ROPT_IMPLICIT 8 +#define ROPT_ANCH (ROPT_ANCH_BOL|ROPT_ANCH_GPOS) +#define ROPT_ANCH_BOL 1 +#define ROPT_ANCH_GPOS 2 +#define ROPT_SKIP 4 +#define ROPT_IMPLICIT 8 +#define ROPT_CHECK_AT_START 16 +#define ROPT_GPOS_SEEN 32 +#define ROPT_CHECK_ALL 64 + +#define REXEC_COPY 1 /* Need to copy the string. */ +#define REXEC_CHECKED 2 /* check_substr already checked. */ diff -pru perl5.004_01.os2/sv.c perl5.004_01.my/sv.c --- perl5.004_01.os2/sv.c Fri Jun 6 17:32:44 1997 +++ perl5.004_01.my/sv.c Sat Jun 14 21:11:56 1997 @@ -4731,6 +4731,10 @@ SV* sv; sv_catpv(d, " ),"); } } + case SVt_PVBM: + if (SvTAIL(sv)) sv_catpv(d, "TAIL,"); + if (SvCOMPILED(sv)) sv_catpv(d, "COMPILED,"); + break; } if (*(SvEND(d) - 1) == ',') diff -pru perl5.004_01.os2/t/op/re_tests perl5.004_01.my/t/op/re_tests --- perl5.004_01.os2/t/op/re_tests Mon Jun 9 05:07:22 1997 +++ perl5.004_01.my/t/op/re_tests Sat Jun 14 21:18:10 1997 @@ -8,6 +8,8 @@ ab*c abc y $& abc ab*bc abc y $& abc ab*bc abbc y $& abbc ab*bc abbbbc y $& abbbbc +.{1} abbbbc y $& a +.{3,4} abbbbc y $& abbb ab{0,}bc abbbbc y $& abbbbc ab+bc abbc y $& abbc ab+bc abc n - - @@ -29,6 +31,7 @@ ab{0,1}c abc y $& abc ^abc abcc y $& abc ^abc$ aabc n - - abc$ aabc y $& abc +abc$ aabcd n - - ^ abc y $& $ abc y $& a.c abc y $& abc @@ -299,8 +302,64 @@ a(?=c|d). abad y $& ad a(?:b|c|d)(.) ace y $1 e a(?:b|c|d)*(.) ace y $1 e a(?:b|c|d)+?(.) ace y $1 e +a(?:b|c|d)+?(.) acdbcdbe y $1 d +a(?:b|c|d)+(.) acdbcdbe y $1 e +a(?:b|c|d){2}(.) acdbcdbe y $1 b +a(?:b|c|d){4,5}(.) acdbcdbe y $1 b +a(?:b|c|d){4,5}?(.) acdbcdbe y $1 d +a(?:b|c|d){6,7}(.) acdbcdbe y $1 e +a(?:b|c|d){6,7}?(.) acdbcdbe y $1 e +a(?:b|c|d){5,6}(.) acdbcdbe y $1 e +a(?:b|c|d){5,6}?(.) acdbcdbe y $1 b +a(?:b|c|d){5,7}(.) acdbcdbe y $1 e +a(?:b|c|d){5,7}?(.) acdbcdbe y $1 b a(?:b|(c|e){1,2}?|d)+?(.) ace y $1$2 ce ^(.+)?B AB y $1 A -'([a-z]+)\s\1'i Aa aa y $&-$1 Aa aa-Aa -'([a-z]+)\s\1'i Ab ab y $&-$1 Ab ab-Ab +^([^a-z])|(\^)$ . y $1 . +^[<>]& <&OUT y $& <& +^(a\1?){4}$ aaaaaaaaaa y $1 aaaa +^(a\1?){4}$ aaaaaaaaa n - - +^(a\1?){4}$ aaaaaaaaaaa n - - +^(a\1){4}$ aaaaaaaaaa y $1 aaaa +^(a\1){4}$ aaaaaaaaa n - - +^(a\1){4}$ aaaaaaaaaaa n - - +(?:(f)(o)(o)|(b)(a)(r))* foobar y $1:$2:$3:$4:$5:$6 f:o:o:b:a:r +(?<=a)b ab y $& b +(?<=a)b cb n - - +(?<=a)b b n - - +(?) { ($pat, $subject, $result, $repl, $expect) = split(/[\t\n]/,$_); $input = join(':',$pat,$subject,$result,$repl,$expect); - $pat = "'$pat'" unless $pat =~ /^'/; - for $study ("", "study \$subject") { - eval "$study; \$match = (\$subject =~ m$pat); \$got = \"$repl\";"; + $pat = "'$pat'" unless $pat =~ /^\'/; + for $study ("", 'study $subject') { + $c = $iters; + eval "$study; \$match = (\$subject =~ m$pat) while \$c--; \$got = \"$repl\";"; if ($result eq 'c') { if ($@ eq '') { print "not ok $.\n"; next TEST } last; # no need to study a syntax error } + elsif ($@) { + print "not ok $. $input => error `$@'\n"; next TEST; + } elsif ($result eq 'n') { - if ($match) { print "not ok $. $input => $got\n"; next TEST } + if ($match) { print "not ok $. ($study) $input => false positive\n"; next TEST } } else { if (!$match || $got ne $expect) { - print "not ok $. $input => $got\n"; + print "not ok $. ($study) $input => `$got', match=$match\n"; next TEST; } } diff -pru perl5.004_01.os2/t/op/regexp_noamp.t perl5.004_01.my/t/op/regexp_noamp.t --- perl5.004_01.os2/t/op/regexp_noamp.t Sun Jun 15 21:43:00 1997 +++ perl5.004_01.my/t/op/regexp_noamp.t Sat Jun 14 21:11:56 1997 @@ -0,0 +1,44 @@ +#!./perl + +$iters = shift || 1; # Poor man performance suite, 10000 is OK. + +open(TESTS,'op/re_tests') || open(TESTS,'t/op/re_tests') + || die "Can't open re_tests"; + +while () { } +$numtests = $.; +seek(TESTS,0,0); +$. = 0; + +$| = 1; +print "1..$numtests\n# $iters iterations\n"; +TEST: +while () { + ($pat, $subject, $result, $repl, $expect) = split(/[\t\n]/,$_); + $input = join(':',$pat,$subject,$result,$repl,$expect); + $pat = "'$pat'" unless $pat =~ /^\'/; + print("ok $.\n"), next TEST if $input =~ /\$[&\`\']/; + for $study ("", 'study $subject') { + $c = $iters; + eval "$study; \$match = (\$subject =~ m$pat) while \$c--; \$got = \"$repl\";"; + if ($result eq 'c') { + if ($@ eq '') { print "not ok $.\n"; next TEST } + last; # no need to study a syntax error + } + elsif ($@) { + print "not ok $. $input => error `$@'\n"; next TEST; + } + elsif ($result eq 'n') { + if ($match) { print "not ok $. ($study) $input => false positive\n"; next TEST } + } + else { + if (!$match || $got ne $expect) { + print "not ok $. ($study) $input => `$got', match=$match\n"; + next TEST; + } + } + } + print "ok $.\n"; +} + +close(TESTS); diff -pru perl5.004_01.os2/t/op/split.t perl5.004_01.my/t/op/split.t --- perl5.004_01.os2/t/op/split.t Fri Jun 6 15:01:40 1997 +++ perl5.004_01.my/t/op/split.t Sat Jun 14 21:17:10 1997 @@ -2,7 +2,7 @@ # $RCSfile: split.t,v $$Revision: 4.1 $$Date: 92/08/07 18:28:26 $ -print "1..16\n"; +print "1..21\n"; $FS = ':'; @@ -76,3 +76,24 @@ print "$a|$b" eq "2|4" ? "ok 15\n" : "no local(undef, $a, undef, $b) = qw(1 2 3 4); print "$a|$b" eq "2|4" ? "ok 16\n" : "not ok 16\n"; } + +# Are /^/m patterns scanned? +$_ = join '|', split(/^a/m, "a b a\na d a", 20); +print $_ eq "| b a\n| d a" ? "ok 17\n" : "not ok 17\n# `$_'\n"; + +# Are /$/m patterns scanned? +$_ = join '|', split(/a$/m, "a b a\na d a", 20); +print $_ eq "a b |\na d |" ? "ok 18\n" : "not ok 18\n# `$_'\n"; + +# Are /^/m patterns scanned? +$_ = join '|', split(/^aa/m, "aa b aa\naa d aa", 20); +print $_ eq "| b aa\n| d aa" ? "ok 19\n" : "not ok 19\n# `$_'\n"; + +# Are /$/m patterns scanned? +$_ = join '|', split(/aa$/m, "aa b aa\naa d aa", 20); +print $_ eq "aa b |\naa d |" ? "ok 20\n" : "not ok 20\n# `$_'\n"; + +# Greedyness: +$_ = "a : b :c: d"; +@ary = split(/\s*:\s*/); +if (($res = join(".",@ary)) eq "a.b.c.d") {print "ok 21\n";} else {print "not ok 21\n# res=`$res' != `a.b.c.d'\n";} diff -pru perl5.004_01.os2/t/TEST perl5.004_01.my/t/TEST --- perl5.004_01.os2/t/TEST Thu Mar 6 12:51:40 1997 +++ perl5.004_01.my/t/TEST Sun Jun 15 21:39:30 1997 @@ -51,6 +51,7 @@ while ($test = shift) { chop($te); print "$te" . '.' x (18 - length($te)); if ($sharpbang) { + -x $test || (print "isn't executable.\n"); open(RESULTS,"./$test |") || (print "can't run.\n"); } else { open(SCRIPT,"$test") || die "Can't run $test.\n"; diff -pru perl5.004_01.os2/toke.c perl5.004_01.my/toke.c --- perl5.004_01.os2/toke.c Tue May 13 11:07:02 1997 +++ perl5.004_01.my/toke.c Sat Jun 14 21:03:52 1997 @@ -4587,46 +4587,6 @@ char *start; return s; } -void -hoistmust(pm) -register PMOP *pm; -{ - if (!pm->op_pmshort && pm->op_pmregexp->regstart && - (!pm->op_pmregexp->regmust || pm->op_pmregexp->reganch & ROPT_ANCH) - ) { - if (!(pm->op_pmregexp->reganch & ROPT_ANCH)) - pm->op_pmflags |= PMf_SCANFIRST; - pm->op_pmshort = SvREFCNT_inc(pm->op_pmregexp->regstart); - pm->op_pmslen = SvCUR(pm->op_pmshort); - } - else if (pm->op_pmregexp->regmust) {/* is there a better short-circuit? */ - if (pm->op_pmshort && - sv_eq(pm->op_pmshort,pm->op_pmregexp->regmust)) - { - if (pm->op_pmflags & PMf_SCANFIRST) { - SvREFCNT_dec(pm->op_pmshort); - pm->op_pmshort = Nullsv; - } - else { - SvREFCNT_dec(pm->op_pmregexp->regmust); - pm->op_pmregexp->regmust = Nullsv; - return; - } - } - /* promote the better string */ - if ((!pm->op_pmshort && - !(pm->op_pmregexp->reganch & ROPT_ANCH_GPOS)) || - ((pm->op_pmflags & PMf_SCANFIRST) && - (SvCUR(pm->op_pmshort) < SvCUR(pm->op_pmregexp->regmust)))) { - SvREFCNT_dec(pm->op_pmshort); /* ok if null */ - pm->op_pmshort = pm->op_pmregexp->regmust; - pm->op_pmslen = SvCUR(pm->op_pmshort); - pm->op_pmregexp->regmust = Nullsv; - pm->op_pmflags |= PMf_SCANFIRST; - } - } -} - static char * scan_trans(start) char *start; diff -pru perl5.004_01.os2/util.c perl5.004_01.my/util.c --- perl5.004_01.os2/util.c Sat Jun 14 02:15:20 1997 +++ perl5.004_01.my/util.c Sat Jun 14 21:11:56 1997 @@ -946,22 +946,39 @@ SV *littlestr; while (tmp--) { if (*--s == *--little) continue; + differ: s = olds + 1; /* here we pay the price for failure */ little = oldlittle; if (s < bigend) /* fake up continue to outer loop */ goto top2; return Nullch; } + if (SvTAIL(littlestr) /* automatically multiline */ + && olds + 1 != bigend + && olds[1] != '\n') + goto differ; return (char *)s; } } return Nullch; } +/* start_shift, end_shift are positive quantities which give offsets + of ends of some substring of bigstr. + If `last' we want the last occurence. + old_posp is the way of communication between consequent calls if + the next call needs to find the . + The initial *old_posp should be -1. + Note that we do not take into account SvTAIL, so it may give wrong + positives if _ALL flag is set. + */ + char * -screaminstr(bigstr, littlestr) +screaminstr(bigstr, littlestr, start_shift, end_shift, old_posp, last) SV *bigstr; SV *littlestr; +I32 start_shift, end_shift, last; +I32 *old_posp; { register unsigned char *s, *x; register unsigned char *big; @@ -969,54 +986,65 @@ SV *littlestr; register I32 previous; register I32 first; register unsigned char *little; - register unsigned char *bigend; + register I32 stop_pos; register unsigned char *littleend; + I32 found = 0; - if ((pos = screamfirst[BmRARE(littlestr)]) < 0) + if (*old_posp == -1 + ? (pos = screamfirst[BmRARE(littlestr)]) < 0 + : (((pos = *old_posp), pos += screamnext[pos]) == 0)) return Nullch; little = (unsigned char *)(SvPVX(littlestr)); littleend = little + SvCUR(littlestr); first = *little++; + /* The value of pos we can start at: */ previous = BmPREVIOUS(littlestr); big = (unsigned char *)(SvPVX(bigstr)); - bigend = big + SvCUR(bigstr); - while (pos < previous) { + /* The value of pos we can stop at: */ + stop_pos = SvCUR(bigstr) - end_shift - (SvCUR(littlestr) - 1 - previous); + if (previous + start_shift > stop_pos) return Nullch; + while (pos < previous + start_shift) { if (!(pos += screamnext[pos])) return Nullch; } #ifdef POINTERRIGOR do { + if (pos >= stop_pos) return Nullch; if (big[pos-previous] != first) continue; for (x=big+pos+1-previous,s=little; s < littleend; /**/ ) { - if (x >= bigend) - return Nullch; if (*s++ != *x++) { s--; break; } } - if (s == littleend) - return (char *)(big+pos-previous); + if (s == littleend) { + *old_posp = pos; + if (!last) return (char *)(big+pos-previous); + found = 1; + } } while ( pos += screamnext[pos] ); + return (last && found) ? (char *)(big+(*old_posp)-previous) : Nullch; #else /* !POINTERRIGOR */ big -= previous; do { + if (pos >= stop_pos) return Nullch; if (big[pos] != first) continue; for (x=big+pos+1,s=little; s < littleend; /**/ ) { - if (x >= bigend) - return Nullch; if (*s++ != *x++) { s--; break; } } - if (s == littleend) - return (char *)(big+pos); + if (s == littleend) { + *old_posp = pos; + if (!last) return (char *)(big+pos); + found = 1; + } } while ( pos += screamnext[pos] ); + return (last && found) ? (char *)(big+(*old_posp)) : Nullch; #endif /* POINTERRIGOR */ - return Nullch; } I32