From ilya Thu Jun 26 14:25:18 1997
Subject: Jumbo patch for RE wrt 5.004_01
To: perl5-porters@perl.com (Mailing list Perl5)
Date: Thu, 26 Jun 1997 14:25:18 -0400 (EDT)
X-Mailer: ELM [version 2.5 PL0b1]
Content-Length: 128525    
Status: OR

This patch provides only an additional bug fix (for a bug introduced
by new optimizations) and an additional bug test if you compare it to
the previous patches, but it should apply clean to 5.004_01.

Description is still the same:

a) Deep optimizations for constant substrings;
b) Extraneous copying of matched string removed;
c) PMf_ALL reanabled again (no, contrary to what I sayd before, /blah/
  *was not* optimized to "index * blah", the code was there, but nobody
  set the flag);
d) study should improve things;
e) regexps redone to use structs;
f) A lot of info moved from PM to REGEXP (possible binary non-compatibility);
g) Signature of regexec changed (source uncompatibility with REGEXP
  modules around).
h) data for $<digit> is now allocated atomically, so (a\1?)* works, 
  and $2 is never of negative length).
i) Slightly better -Dr output.
j) Optimization of s/longer/short/ (without //g) even if copying the string.
k) Constant-length lookbehind added.
l) BRANCH removed if not needed.
m) Fixed: (a)+ used to produce both fixed and floating const string "a".
n) (?:a)+ optimized to the same form as a+. 
o) New opcode CURLYM for (A)* which does not need backtracking and
   storage of additional info.
p) NOTHINGs are optimized in tails of interators to enable next-char
   optimization. 
q) New opcode CURLYN for (simple)*;
r) CURLYM is modified to accept (moderate)*;
s) (?i) and friends localized in the next surrounding block.
t) Peephole optimizer for NOTHING nodes added.
u) Peephole optimizer for EXACT* nodes added.
v) \1 matches as an empty string if the first group did not match yet.
w) Chip's patch to case-fold \1.

============================================

a) one extra field removed from PMOP (was left due to an oversight);
b) SvTAIL handling improved (bugs/enhancements);
c) new test file check what happens without $& $` $';
d) study in regexp.t was studying a wrong variable;
e) `noscan' handling detects more zero-length codes (bugs);
f) Warnings if max quantifier value (32767) reached;
g) Very primitive (?{...}) - works only inside eval "", only the
   simplest cases tested in the test suite;
h) Sniffer for constant strings had bugs when a fixed-number
   {}-modifier was following another *+{} (bugs).
g) Yet newer test cases.



Apply with 
	touch t/op/regexp_noamp.t
	chmod a+x t/op/regexp_noamp.t
	patch -p1 < ...

Enjoy,
Ilya

touch t/op/regexp_noamp.t
chmod +x t/op/regexp_noamp.t
exit 0




diff -pru perl5.004_01.os2/dump.c perl5.004_01.my/dump.c
--- perl5.004_01.os2/dump.c	Tue May 13 11:25:54 1997
+++ perl5.004_01.my/dump.c	Sat Jun 14 21:03:50 1997
@@ -363,19 +363,12 @@ register PMOP *pm;
 	dump("PMf_REPL = ");
 	dump_op(pm->op_pmreplroot);
     }
-    if (pm->op_pmshort) {
-	dump("PMf_SHORT = %s\n",SvPEEK(pm->op_pmshort));
-    }
     if (pm->op_pmflags) {
 	SV *tmpsv = newSVpv("", 0);
 	if (pm->op_pmflags & PMf_USED)
 	    sv_catpv(tmpsv, ",USED");
 	if (pm->op_pmflags & PMf_ONCE)
 	    sv_catpv(tmpsv, ",ONCE");
-	if (pm->op_pmflags & PMf_SCANFIRST)
-	    sv_catpv(tmpsv, ",SCANFIRST");
-	if (pm->op_pmflags & PMf_ALL)
-	    sv_catpv(tmpsv, ",ALL");
 	if (pm->op_pmflags & PMf_SKIPWHITE)
 	    sv_catpv(tmpsv, ",SKIPWHITE");
 	if (pm->op_pmflags & PMf_CONST)
diff -pru perl5.004_01.os2/global.sym perl5.004_01.my/global.sym
--- perl5.004_01.os2/global.sym	Mon Jun  9 16:52:04 1997
+++ perl5.004_01.my/global.sym	Sat Jun 14 21:11:52 1997
@@ -50,6 +50,7 @@ dec_amg
 di
 div_amg
 div_ass_amg
+doeval
 do_undump
 ds
 egid
diff -pru perl5.004_01.os2/MANIFEST perl5.004_01.my/MANIFEST
--- perl5.004_01.os2/MANIFEST	Thu Jun 12 12:32:44 1997
+++ perl5.004_01.my/MANIFEST	Sat Jun 14 21:11:52 1997
@@ -738,6 +738,7 @@ t/op/readdir.t		See if readdir() works
 t/op/recurse.t		See if deep recursion works
 t/op/ref.t		See if refs and objects work
 t/op/regexp.t		See if regular expressions work
+t/op/regexp_noamp.t	See if regular expressions work with optimizations
 t/op/repeat.t		See if x operator works
 t/op/runlevel.t		See if die() works from perl_call_*()
 t/op/sleep.t		See if sleep works
diff -pru perl5.004_01.os2/op.c perl5.004_01.my/op.c
--- perl5.004_01.os2/op.c	Fri Jun  6 18:24:12 1997
+++ perl5.004_01.my/op.c	Sat Jun 14 21:03:50 1997
@@ -521,7 +521,6 @@ OP *op;
     case OP_PUSHRE:
     case OP_MATCH:
 	pregfree(cPMOP->op_pmregexp);
-	SvREFCNT_dec(cPMOP->op_pmshort);
 	break;
     }
 
@@ -2024,7 +2023,6 @@ OP *repl;
 	pm->op_pmregexp = pregcomp(p, p + plen, pm);
 	if (strEQ("\\s+", pm->op_pmregexp->precomp)) 
 	    pm->op_pmflags |= PMf_WHITE;
-	hoistmust(pm);
 	op_free(expr);
     }
     else {
@@ -4459,10 +4457,6 @@ OP *op;
 	kid->op_sibling = sibl;
     }
     pm = (PMOP*)kid;
-    if (pm->op_pmshort && !(pm->op_pmflags & PMf_ALL)) {
-	SvREFCNT_dec(pm->op_pmshort);	/* can't use substring to optimize */
-	pm->op_pmshort = 0;
-    }
 
     kid->op_type = OP_PUSHRE;
     kid->op_ppaddr = ppaddr[OP_PUSHRE];
diff -pru perl5.004_01.os2/op.h perl5.004_01.my/op.h
--- perl5.004_01.os2/op.h	Tue May 13 10:59:40 1997
+++ perl5.004_01.my/op.h	Sat Jun 14 21:11:52 1997
@@ -173,16 +173,14 @@ struct pmop {
     OP *	op_pmreplstart;
     PMOP *	op_pmnext;		/* list of all scanpats */
     REGEXP *	op_pmregexp;		/* compiled expression */
-    SV *	op_pmshort;		/* for a fast bypass of execute() */
     U16		op_pmflags;
     U16		op_pmpermflags;
-    char	op_pmslen;
 };
 
 #define PMf_USED	0x0001		/* pm has been used once already */
 #define PMf_ONCE	0x0002		/* use pattern only once per reset */
-#define PMf_SCANFIRST	0x0004		/* initial constant not anchored */
-#define PMf_ALL		0x0008		/* initial constant is whole pat */
+/* #define PMf_SCANFIRST	0x0004 */		/* initial constant not anchored */
+/* #define PMf_ALL		0x0008 */		/* initial constant is whole pat */
 #define PMf_SKIPWHITE	0x0010		/* skip leading whitespace for split */
 #define PMf_FOLD	0x0020		/* case insensitivity */
 #define PMf_CONST	0x0040		/* subst replacement is constant */
diff -pru perl5.004_01.os2/perl.h perl5.004_01.my/perl.h
--- perl5.004_01.os2/perl.h	Mon Jun  9 16:48:58 1997
+++ perl5.004_01.my/perl.h	Sat Jun 14 21:03:50 1997
@@ -1668,7 +1668,7 @@ EXT char *	regprecomp;	/* uncompiled str
 EXT char *	regparse;	/* Input-scan pointer. */
 EXT char *	regxend;	/* End of input for compile */
 EXT I32		regnpar;	/* () count. */
-EXT char *	regcode;	/* Code-emit pointer; &regdummy = don't. */
+EXT regnode *	regcode;	/* Code-emit pointer; &regdummy = don't. */
 EXT I32		regsize;	/* Code size. */
 EXT I32		regnaughty;	/* How bad is this pattern? */
 EXT I32		regsawback;	/* Did we see \1, ...? */
diff -pru perl5.004_01.os2/pod/perlre.pod perl5.004_01.my/pod/perlre.pod
--- perl5.004_01.os2/pod/perlre.pod	Thu May  8 09:26:24 1997
+++ perl5.004_01.my/pod/perlre.pod	Sat Jun 14 21:18:10 1997
@@ -288,6 +288,24 @@ easier just to say:
 
     if (/foo/ && $` =~ /bar$/)
 
+For lookbehind see below.
+
+=item (?<=regexp)
+
+A zero-width positive lookbehind assertion.  For example, C</(?=\t)\w+/>
+matches a word following a tab, without including the tab in C<$&>.
+Works only for fixed-width lookbehind.
+
+=item (?<!regexp)
+
+A zero-width negative lookbehind assertion.  For example C</(?<!bar)foo/>
+matches any occurrence of "foo" that isn't following "bar".  
+Works only for fixed-width lookbehind.
+
+=item (?{ code })
+
+Experimental "execute any Perl code" zero-length assertion.  Always
+succeeds.  Currently works only for regexps defined inside C<eval "">.
 
 =item (?imsx)
 
diff -pru perl5.004_01.os2/pp.c perl5.004_01.my/pp.c
--- perl5.004_01.os2/pp.c	Thu Jun 12 13:11:14 1997
+++ perl5.004_01.my/pp.c	Sat Jun 14 21:11:54 1997
@@ -3978,10 +3978,12 @@ PP(pp_split)
 	    s = m;
 	}
     }
-    else if (pm->op_pmshort && !rx->nparens) {
-	i = SvCUR(pm->op_pmshort);
-	if (i == 1) {
-	    i = *SvPVX(pm->op_pmshort);
+    else if (rx->check_substr && !rx->nparens 
+	     && (rx->reganch & ROPT_CHECK_ALL)
+	     && !(rx->reganch & ROPT_ANCH)) {
+	i = SvCUR(rx->check_substr);
+	if (i == 1 && !SvTAIL(rx->check_substr)) {
+	    i = *SvPVX(rx->check_substr);
 	    while (--limit) {
 		/*SUPPRESS 530*/
 		for (m = s; m < strend && *m != i; m++) ;
@@ -3999,7 +4001,7 @@ PP(pp_split)
 #ifndef lint
 	    while (s < strend && --limit &&
 	      (m=fbm_instr((unsigned char*)s, (unsigned char*)strend,
-		    pm->op_pmshort)) )
+		    rx->check_substr)) )
 #endif
 	    {
 		dstr = NEWSV(31, m-s);
@@ -4014,7 +4016,7 @@ PP(pp_split)
     else {
 	maxiters += (strend - s) * rx->nparens;
 	while (s < strend && --limit &&
-	       pregexec(rx, s, strend, orig, 1, Nullsv, TRUE))
+	       pregexec(rx, s, strend, orig, 1, Nullsv, NULL, 0))
 	{
 	    TAINT_IF(rx->exec_tainted);
 	    if (rx->subbase
diff -pru perl5.004_01.os2/pp_ctl.c perl5.004_01.my/pp_ctl.c
--- perl5.004_01.os2/pp_ctl.c	Tue Jun 10 09:18:04 1997
+++ perl5.004_01.my/pp_ctl.c	Sat Jun 14 21:11:54 1997
@@ -26,7 +26,6 @@
 #define DOCATCH(o) ((CATCH_GET == TRUE) ? docatch(o) : (o))
 
 static OP *docatch _((OP *o));
-static OP *doeval _((int gimme));
 static OP *dofindlabel _((OP *op, char *label, OP **opstack, OP **oplimit));
 static void doparseform _((SV *sv));
 static I32 dopoptoeval _((I32 startingblock));
@@ -95,7 +94,6 @@ PP(pp_regcomp) {
 
     if (pm->op_pmflags & PMf_KEEP) {
 	pm->op_private &= ~OPpRUNTIME;	/* no point compiling again */
-	hoistmust(pm);
 	cLOGOP->op_first->op_next = op->op_next;
     }
     RETURN;
@@ -124,7 +122,8 @@ PP(pp_substcont)
 
 	/* Are we done */
 	if (cx->sb_once || !pregexec(rx, s, cx->sb_strend, orig,
-				s == m, Nullsv, cx->sb_safebase))
+				     s == m, Nullsv, NULL,
+				     cx->sb_safebase ? 0 : REXEC_COPY))
 	{
 	    SV *targ = cx->sb_targ;
 	    sv_catpvn(dstr, s, cx->sb_strend - s);
@@ -2094,9 +2093,10 @@ OP *o;
     return Nullop;
 }
 
-static OP *
-doeval(gimme)
+OP *
+doeval(gimme, startp)
 int gimme;
+OP**startp;
 {
     dSP;
     OP *saveop = op;
@@ -2138,10 +2138,11 @@ int gimme;
     av_store(comppadlist, 1, (SV*)comppad);
     CvPADLIST(compcv) = comppadlist;
 
-    if (saveop->op_type != OP_REQUIRE)
+    if (!saveop || saveop->op_type != OP_REQUIRE)
 	CvOUTSIDE(compcv) = (CV*)SvREFCNT_inc(caller);
 
-    SAVEFREESV(compcv);
+    if (!startp)
+	SAVEFREESV(compcv);
 
     /* make sure we compile in the right package */
 
@@ -2162,7 +2163,7 @@ int gimme;
     curcop->cop_arybase = 0;
     SvREFCNT_dec(rs);
     rs = newSVpv("\n", 1);
-    if (saveop->op_flags & OPf_SPECIAL)
+    if (saveop && saveop->op_flags & OPf_SPECIAL)
 	in_eval |= 4;
     else
 	sv_setpv(GvSV(errgv),"");
@@ -2178,14 +2179,19 @@ int gimme;
 	    eval_root = Nullop;
 	}
 	SP = stack_base + POPMARK;		/* pop original mark */
-	POPBLOCK(cx,curpm);
-	POPEVAL(cx);
-	pop_return();
+	if (!startp) {
+	    POPBLOCK(cx,curpm);
+	    POPEVAL(cx);
+	    pop_return();
+	}
 	lex_end();
 	LEAVE;
 	if (optype == OP_REQUIRE) {
 	    char* msg = SvPVx(GvSV(errgv), na);
 	    DIE("%s", *msg ? msg : "Compilation failed in require");
+	} else if (startp) {
+	    char* msg = SvPVx(GvSV(errgv), na);
+	    croak("Compilation failed in regexp: %s", (*msg ? msg : "???"));
 	}
 	SvREFCNT_dec(rs);
 	rs = SvREFCNT_inc(nrs);
@@ -2194,7 +2200,10 @@ int gimme;
     SvREFCNT_dec(rs);
     rs = SvREFCNT_inc(nrs);
     compiling.cop_line = 0;
-    SAVEFREEOP(eval_root);
+    if (startp) 
+	*startp = eval_root;
+    else
+	SAVEFREEOP(eval_root);
     if (gimme & G_VOID)
 	scalarvoid(eval_root);
     else if (gimme & G_ARRAY)
@@ -2205,7 +2214,7 @@ int gimme;
     DEBUG_x(dump_eval());
 
     /* Register with debugger: */
-    if (perldb && saveop->op_type == OP_REQUIRE) {
+    if (perldb && saveop && saveop->op_type == OP_REQUIRE) {
 	CV *cv = perl_get_cv("DB::postponed", FALSE);
 	if (cv) {
 	    dSP;
@@ -2355,7 +2364,7 @@ PP(pp_require)
     compiling.cop_line = 0;
 
     PUTBACK;
-    return DOCATCH(doeval(G_SCALAR));
+    return DOCATCH(doeval(G_SCALAR, NULL));
 }
 
 PP(pp_dofile)
@@ -2407,7 +2416,7 @@ PP(pp_entereval)
     if (perldb && curstash != debstash)
 	save_lines(GvAV(compiling.cop_filegv), linestr);
     PUTBACK;
-    ret = doeval(gimme);
+    ret = doeval(gimme, NULL);
     if (perldb && was != sub_generation) { /* Some subs defined here. */
 	strcpy(safestr, "_<(eval )");	/* Anything fake and short. */
     }
diff -pru perl5.004_01.os2/pp_hot.c perl5.004_01.my/pp_hot.c
--- perl5.004_01.os2/pp_hot.c	Fri Jun  6 13:45:32 1997
+++ perl5.004_01.my/pp_hot.c	Sat Jun 14 21:11:54 1997
@@ -804,6 +804,7 @@ PP(pp_match)
     TAINT_NOT;
 
     if (pm->op_pmflags & PMf_USED) {
+      failure:
 	if (gimme == G_ARRAY)
 	    RETURN;
 	RETPUSHNO;
@@ -813,6 +814,8 @@ PP(pp_match)
 	pm = curpm;
 	rx = pm->op_pmregexp;
     }
+    if (rx->minlen > len) goto failure;
+
     truebase = t = s;
     if (global = pm->op_pmflags & PMf_GLOBAL) {
 	rx->startp[0] = 0;
@@ -829,6 +832,7 @@ PP(pp_match)
 	gimme = G_SCALAR;			/* accidental array context? */
     safebase = (((gimme == G_ARRAY) || global || !rx->nparens)
 		&& !sawampersand);
+    safebase = safebase ? 0  : REXEC_COPY ;
     if (pm->op_pmflags & (PMf_MULTILINE|PMf_SINGLELINE)) {
 	SAVEINT(multiline);
 	multiline = pm->op_pmflags & PMf_MULTILINE;
@@ -842,43 +846,50 @@ play_it_again:
 	if (update_minmatch++)
 	    minmatch = (s == rx->startp[0]);
     }
-    if (pm->op_pmshort) {
-	if (pm->op_pmflags & PMf_SCANFIRST) {
+    if (rx->check_substr) {
+	if (!(rx->reganch & ROPT_CHECK_AT_START)) {
 	    if (SvSCREAM(TARG)) {
-		if (screamfirst[BmRARE(pm->op_pmshort)] < 0)
+		I32 p = -1;
+		
+		if (screamfirst[BmRARE(rx->check_substr)] < 0)
 		    goto nope;
-		else if (!(s = screaminstr(TARG, pm->op_pmshort)))
+		else if (!(s = screaminstr(TARG, rx->check_substr, 
+					   rx->check_offset_min, 0, &p, 0)))
 		    goto nope;
-		else if (pm->op_pmflags & PMf_ALL)
+		else if ((rx->reganch & ROPT_CHECK_ALL)
+			 && !sawampersand && !SvTAIL(rx->check_substr))
 		    goto yup;
 	    }
-	    else if (!(s = fbm_instr((unsigned char*)s,
-	      (unsigned char*)strend, pm->op_pmshort)))
+	    else if (!(s = fbm_instr((unsigned char*)s + rx->check_offset_min,
+				     (unsigned char*)strend, 
+				     rx->check_substr)))
 		goto nope;
-	    else if (pm->op_pmflags & PMf_ALL)
+	    else if ((rx->reganch & ROPT_CHECK_ALL) && !sawampersand)
 		goto yup;
-	    if (s && rx->regback >= 0) {
-		++BmUSEFUL(pm->op_pmshort);
-		s -= rx->regback;
-		if (s < t)
-		    s = t;
+	    if (s && rx->check_offset_max < t - s) {
+		++BmUSEFUL(rx->check_substr);
+		s -= rx->check_offset_max;
 	    }
 	    else
 		s = t;
 	}
-	else if (!multiline) {
-	    if (*SvPVX(pm->op_pmshort) != *s
-		|| (pm->op_pmslen > 1
-		    && memNE(SvPVX(pm->op_pmshort), s, pm->op_pmslen)))
+	else if (!multiline) {		/* Anchored near beginning of string. */
+	    I32 slen;
+	    if (*SvPVX(rx->check_substr) != s[rx->check_offset_min]
+		|| ((slen = SvCUR(rx->check_substr)) > 1
+		    && memNE(SvPVX(rx->check_substr), 
+			     s + rx->check_offset_min, slen)))
 		goto nope;
 	}
-	if (!rx->naughty && --BmUSEFUL(pm->op_pmshort) < 0) {
-	    SvREFCNT_dec(pm->op_pmshort);
-	    pm->op_pmshort = Nullsv;	/* opt is being useless */
+	if (!rx->naughty && --BmUSEFUL(rx->check_substr) < 0
+	    && rx->check_substr == rx->float_substr) {
+	    SvREFCNT_dec(rx->check_substr);
+	    rx->check_substr = Nullsv;	/* opt is being useless */
+	    rx->float_substr = Nullsv;
 	}
     }
     if (pregexec(rx, s, strend, truebase, minmatch,
-		 SvSCREAM(TARG) ? TARG : Nullsv, safebase))
+		 SvSCREAM(TARG) ? TARG : Nullsv, NULL, safebase))
     {
 	curpm = pm;
 	if (pm->op_pmflags & PMf_ONCE)
@@ -940,9 +951,9 @@ play_it_again:
 	RETPUSHYES;
     }
 
-yup:
+yup:					/* Confirmed by check_substr */
     TAINT_IF(rx->exec_tainted);
-    ++BmUSEFUL(pm->op_pmshort);
+    ++BmUSEFUL(rx->check_substr);
     curpm = pm;
     if (pm->op_pmflags & PMf_ONCE)
 	pm->op_pmflags |= PMf_USED;
@@ -952,7 +963,7 @@ yup:
 	rx->subbeg = truebase;
 	rx->subend = strend;
 	rx->startp[0] = s;
-	rx->endp[0] = s + SvCUR(pm->op_pmshort);
+	rx->endp[0] = s + SvCUR(rx->check_substr);
 	goto gotcha;
     }
     if (sawampersand) {
@@ -962,14 +973,14 @@ yup:
 	rx->subbeg = tmps;
 	rx->subend = tmps + (strend-t);
 	tmps = rx->startp[0] = tmps + (s - t);
-	rx->endp[0] = tmps + SvCUR(pm->op_pmshort);
+	rx->endp[0] = tmps + SvCUR(rx->check_substr);
     }
     LEAVE_SCOPE(oldsave);
     RETPUSHYES;
 
 nope:
-    if (pm->op_pmshort)
-	++BmUSEFUL(pm->op_pmshort);
+    if (rx->check_substr)
+	++BmUSEFUL(rx->check_substr);
 
 ret_no:
     if (global && !(pm->op_pmflags & PMf_CONTINUE)) {
@@ -1431,6 +1442,7 @@ PP(pp_subst)
     STRLEN len;
     int force_on_match = 0;
     I32 oldsave = savestack_ix;
+    I32 update_minmatch = 1;
 
     /* known replacement string? */
     dstr = (pm->op_pmflags & PMf_CONST) ? POPs : Nullsv;
@@ -1460,41 +1472,46 @@ PP(pp_subst)
 	pm = curpm;
 	rx = pm->op_pmregexp;
     }
-    safebase = (!rx->nparens && !sawampersand);
+    safebase = (!rx->nparens && !sawampersand) ? 0 : REXEC_COPY;
     if (pm->op_pmflags & (PMf_MULTILINE|PMf_SINGLELINE)) {
 	SAVEINT(multiline);
 	multiline = pm->op_pmflags & PMf_MULTILINE;
     }
     orig = m = s;
-    if (pm->op_pmshort) {
-	if (pm->op_pmflags & PMf_SCANFIRST) {
+    if (rx->check_substr) {
+	if (!(rx->reganch & ROPT_CHECK_AT_START)) {
 	    if (SvSCREAM(TARG)) {
-		if (screamfirst[BmRARE(pm->op_pmshort)] < 0)
+		I32 p = -1;
+		
+		if (screamfirst[BmRARE(rx->check_substr)] < 0)
 		    goto nope;
-		else if (!(s = screaminstr(TARG, pm->op_pmshort)))
+		else if (!(s = screaminstr(TARG, rx->check_substr, rx->check_offset_min, 0, &p, 0)))
 		    goto nope;
 	    }
-	    else if (!(s = fbm_instr((unsigned char*)s, (unsigned char*)strend,
-	      pm->op_pmshort)))
+	    else if (!(s = fbm_instr((unsigned char*)s + rx->check_offset_min, 
+				     (unsigned char*)strend,
+				     rx->check_substr)))
 		goto nope;
-	    if (s && rx->regback >= 0) {
-		++BmUSEFUL(pm->op_pmshort);
-		s -= rx->regback;
-		if (s < m)
-		    s = m;
+	    if (s && rx->check_offset_max < s - m) {
+		++BmUSEFUL(rx->check_substr);
+		s -= rx->check_offset_max;
 	    }
 	    else
 		s = m;
 	}
-	else if (!multiline) {
-	    if (*SvPVX(pm->op_pmshort) != *s
-		|| (pm->op_pmslen > 1
-		    && memNE(SvPVX(pm->op_pmshort), s, pm->op_pmslen)))
+	else if (!multiline) {		/* Anchored at beginning of string. */
+	    I32 slen;
+	    if (*SvPVX(rx->check_substr) != s[rx->check_offset_min]
+		|| ((slen = SvCUR(rx->check_substr)) > 1
+		    && memNE(SvPVX(rx->check_substr), 
+			     s + rx->check_offset_min, slen)))
 		goto nope;
 	}
-	if (!rx->naughty && --BmUSEFUL(pm->op_pmshort) < 0) {
-	    SvREFCNT_dec(pm->op_pmshort);
-	    pm->op_pmshort = Nullsv;	/* opt is being useless */
+	if (!rx->naughty && --BmUSEFUL(rx->check_substr) < 0
+	    && rx->check_substr == rx->float_substr) {
+	    SvREFCNT_dec(rx->check_substr);
+	    rx->check_substr = Nullsv;	/* opt is being useless */
+	    rx->float_substr = Nullsv;
 	}
     }
 
@@ -1505,9 +1522,9 @@ PP(pp_subst)
     c = dstr ? SvPV(dstr, clen) : Nullch;
 
     /* can do inplace substitution? */
-    if (c && clen <= rx->minlen && safebase) {
+    if (c && clen <= rx->minlen && (once || !(safebase & REXEC_COPY))) {
 	if (! pregexec(rx, s, strend, orig, 0,
-		       SvSCREAM(TARG) ? TARG : Nullsv, safebase)) {
+		       SvSCREAM(TARG) ? TARG : Nullsv, NULL, safebase)) {
 	    PUSHs(&sv_no);
 	    LEAVE_SCOPE(oldsave);
 	    RETURN;
@@ -1522,8 +1539,13 @@ PP(pp_subst)
 	SvSCREAM_off(TARG);	/* disable possible screamer */
 	if (once) {
 	    rxtainted = rx->exec_tainted;
-	    m = rx->startp[0];
-	    d = rx->endp[0];
+	    if (rx->subbase) {
+		m = orig + (rx->startp[0] - rx->subbase);
+		d = orig + (rx->endp[0] - rx->subbase);
+	    } else {
+		m = rx->startp[0];
+		d = rx->endp[0];
+	    }
 	    s = orig;
 	    if (m - s > strend - d) {  /* faster to shorten from end */
 		if (clen) {
@@ -1579,7 +1601,7 @@ PP(pp_subst)
 		}
 		s = rx->endp[0];
 	    } while (pregexec(rx, s, strend, orig, s == m,
-			      Nullsv, TRUE)); /* don't match same null twice */
+			      Nullsv, NULL, 0)); /* don't match same null twice */
 	    if (s != d) {
 		i = strend - s;
 		SvCUR_set(TARG, d - SvPVX(TARG) + i);
@@ -1596,7 +1618,7 @@ PP(pp_subst)
     }
 
     if (pregexec(rx, s, strend, orig, 0,
-		 SvSCREAM(TARG) ? TARG : Nullsv, safebase)) {
+		 SvSCREAM(TARG) ? TARG : Nullsv, NULL, safebase)) {
 	if (force_on_match) {
 	    force_on_match = 0;
 	    s = SvPV_force(TARG, len);
@@ -1629,7 +1651,7 @@ PP(pp_subst)
 		sv_catpvn(dstr, c, clen);
 	    if (once)
 		break;
-	} while (pregexec(rx, s, strend, orig, s == m, Nullsv, safebase));
+	} while (pregexec(rx, s, strend, orig, s == m, Nullsv, NULL, safebase));
 	sv_catpvn(dstr, s, strend - s);
 
 	TAINT_IF(rxtainted);
@@ -1652,7 +1674,7 @@ PP(pp_subst)
     goto ret_no;
 
 nope:
-    ++BmUSEFUL(pm->op_pmshort);
+    ++BmUSEFUL(rx->check_substr);
 
 ret_no:
     PUSHs(&sv_no);
diff -pru perl5.004_01.os2/proto.h perl5.004_01.my/proto.h
--- perl5.004_01.os2/proto.h	Tue Jun 10 08:40:06 1997
+++ perl5.004_01.my/proto.h	Sat Jun 14 21:11:54 1997
@@ -68,6 +68,7 @@ char*	delimcpy _((char* to, char* toend,
 void	deprecate _((char* s));
 OP*	die _((const char* pat,...));
 OP*	die_where _((char* message));
+OP*	doeval _((int gimme, OP** startp));
 void	dounwind _((I32 cxix));
 bool	do_aexec _((SV* really, SV** mark, SV** sp));
 void    do_chop _((SV* asv, SV* sv));
@@ -144,7 +145,6 @@ void	gv_init _((GV* gv, HV* stash, char*
 HV*	gv_stashpv _((char* name, I32 create));
 HV*	gv_stashpvn _((char* name, U32 namelen, I32 create));
 HV*	gv_stashsv _((SV* sv, I32 create));
-void	hoistmust _((PMOP* pm));
 void	hv_clear _((HV* tb));
 void	hv_delayfree_ent _((HV* hv, HE* entry));
 SV*	hv_delete _((HV* tb, char* key, U32 klen, I32 flags));
@@ -371,10 +371,10 @@ regexp*	pregcomp _((char* exp, char* xen
 OP*	ref _((OP* op, I32 type));
 OP*	refkids _((OP* op, I32 type));
 void	regdump _((regexp* r));
-I32	pregexec _((regexp* prog, char* stringarg, char* strend, char* strbeg, I32 minend, SV* screamer, I32 safebase));
+I32	pregexec _((regexp* prog, char* stringarg, char* strend, char* strbeg, I32 minend, SV* screamer, void* data, U32 flags));
 void	pregfree _((struct regexp* r));
-char*	regnext _((char* p));
-void	regprop _((SV* sv, char* op));
+regnode*regnext _((regnode* p));
+void	regprop _((SV* sv, regnode* op));
 void	repeatcpy _((char* to, char* from, I32 len, I32 count));
 char*	rninstr _((char* big, char* bigend, char* little, char* lend));
 Sighandler_t rsignal _((int, Sighandler_t));
@@ -425,7 +425,7 @@ UV	scan_hex _((char* start, I32 len, I32
 char*	scan_num _((char* s));
 UV	scan_oct _((char* start, I32 len, I32* retlen));
 OP*	scope _((OP* o));
-char*	screaminstr _((SV* bigsv, SV* littlesv));
+char*	screaminstr _((SV* bigsv, SV* littlesv, I32 start_shift, I32 end_shift, I32 *state, I32 last));
 #ifndef VMS
 I32	setenv_getix _((char* nam));
 #endif
--- ../perl5.004_01.os2/regcomp.c	Fri Jun  6 16:42:22 1997
+++ ./regcomp.c	Mon Jun 23 01:13:30 1997
@@ -96,22 +96,497 @@
  * Forward declarations for pregcomp()'s friends.
  */
 
-static char *reg _((I32, I32 *));
-static char *reganode _((char, unsigned short));
-static char *regatom _((I32 *));
-static char *regbranch _((I32 *));
-static void regc _((char));
-static char *regclass _((void));
+static regnode *reg _((I32, I32 *));
+static regnode *reganode _((U8, U32));
+static regnode *regatom _((I32 *));
+static regnode *regbranch _((I32 *, I32));
+static void regc _((U8, char *));
+static regnode *regclass _((void));
 STATIC I32 regcurly _((char *));
-static char *regnode _((char));
-static char *regpiece _((I32 *));
-static void reginsert _((char, char *));
-static void regoptail _((char *, char *));
+static regnode *reg_node _((U8));
+static regnode *regpiece _((I32 *));
+static void reginsert _((U8, regnode *));
+static void regoptail _((regnode *, regnode *));
 static void regset _((char *, I32));
-static void regtail _((char *, char *));
+static void regtail _((regnode *, regnode *));
 static char* regwhite _((char *, char *));
 static char* nextchar _((void));
 
+static seen_gpos;
+static seen_zerolen;
+static regexp *rx;
+
+/* Length of a variant. */
+
+typedef struct {
+    I32 len_min;
+    I32 len_delta;
+    I32 pos_min;
+    I32 pos_delta;
+    SV *last_found;
+    I32 last_end;			/* min value, <0 unless valid. */
+    I32 last_start_min;
+    I32 last_start_max;
+    SV **longest;			/* Either &l_fixed, or &l_float. */
+    SV *longest_fixed;
+    I32 offset_fixed;
+    SV *longest_float;
+    I32 offset_float_min;
+    I32 offset_float_max;
+    I32 flags;
+} scan_data_t;
+
+#define SF_BEFORE_EOL		1
+#define SF_FIX_BEFORE_EOL	2
+#define SF_FL_BEFORE_EOL	4
+#define SF_IS_INF		8
+#define SF_HAS_PAR		16
+#define SF_IN_PAR		32
+
+static void
+scan_commit(data)
+    scan_data_t *data;
+{
+    if (SvCUR(data->last_found) > SvCUR(*data->longest)) {
+	sv_setsv(*data->longest, data->last_found);
+	if (*data->longest == data->longest_fixed) {
+	    data->offset_fixed = data->last_start_min;
+	    if (data->flags & SF_BEFORE_EOL)
+		data->flags |= SF_FIX_BEFORE_EOL;
+	    else
+		data->flags &= ~SF_FIX_BEFORE_EOL;
+	} else {
+	    data->offset_float_min = data->last_start_min;
+	    data->offset_float_max = data->last_start_max;
+	    if (data->flags & SF_BEFORE_EOL)
+		data->flags |= SF_FL_BEFORE_EOL;
+	    else
+		data->flags &= ~SF_FL_BEFORE_EOL;
+	}
+    }
+    SvCUR_set(data->last_found, 0);
+    data->last_end = -1;
+    data->flags &= ~SF_BEFORE_EOL;
+}
+
+#define SCF_DO_SUBSTR 1
+
+/* Stops at toplevel WHILEM as well as at `last'. At end *scanp is set
+   to the position after last scanned or to NULL. */
+
+static I32
+study_chunk(scanp, deltap, last, data, flags)
+    regnode **scanp;			/* Start here (read-write). */
+    I32 *deltap;			/* Write maxlen-minlen here. */
+    regnode *last;			/* Stop before this one. */
+    scan_data_t *data;
+    U32 flags;
+{
+    I32 min = 0, pars = 0;
+    regnode *scan = *scanp;
+    I32 delta = 0, is_inf = 0, is_par = OP(scan) == OPEN ? ARG(scan) : 0;
+    scan_data_t data_fake;
+    
+    while (scan && OP(scan) != END && scan < last) {
+	/* Peephole optimizer: */
+
+	if (regkind[(U8)OP(scan)] == EXACT) {
+	    regnode *nxt = scan + (*OPERAND(scan) + 2 - 1)/sizeof(regnode) + 2;
+	    regnode *n = scan;
+	    U32 stringok = 1;
+#ifdef DEBUGGING
+	    regnode *last = scan;
+#endif 
+	    
+	    while ((n = regnext(n))
+		   && ( regkind[(U8)OP(n)] == NOTHING || 
+			(stringok && (OP(n) == OP(scan))))
+		   && NEXT_OFF(n)
+		   && NEXT_OFF(scan) + NEXT_OFF(n) < I16_MAX) {
+		NEXT_OFF(scan) += NEXT_OFF(n);
+		if (OP(n) == TAIL || n > nxt)
+		    stringok = 0;
+		if (regkind[(U8)OP(n)] == NOTHING) {
+		    nxt = n + NODE_STEP_REGNODE;
+		    if (stringok)
+			last = n;
+		} else {
+		    if (*OPERAND(scan) + *OPERAND(n) > U8_MAX) 
+			break;
+		    Move(OPERAND(n) + 1, 
+			 OPERAND(scan) + *OPERAND(scan) + 1,
+			 *OPERAND(n) + 1, char);
+		    *OPERAND(scan) += *OPERAND(n);
+		    nxt = n + (*OPERAND(n) + 2 - 1)/sizeof(regnode) + 2;
+		    if (stringok)
+			last = nxt - 1;
+		}
+#ifdef DEBUGGING
+#endif 
+	    }
+#ifdef DEBUGGING
+	    /* Allow dumping */
+	    n = scan + (*OPERAND(scan) + 2 - 1)/sizeof(regnode) + 2;
+	    while (n <= last) {
+		if (regkind[(U8)OP(n)] != NOTHING) {
+		    OP(n) = NOTHING;
+		    NEXT_OFF(n) = 0;
+		}
+		n++;
+	    }
+#endif 
+
+	} else if (OP(scan) != CURLYX) {
+	    regnode *n = scan;
+
+	    while ((n = regnext(n))
+		   && (regkind[(U8)OP(n)] == NOTHING) && NEXT_OFF(n)
+		   && NEXT_OFF(scan) + NEXT_OFF(n) < I16_MAX) {
+		NEXT_OFF(scan) += NEXT_OFF(n);
+	    }
+	}
+	if (OP(scan) == BRANCH) {
+	    if (OP(regnext(scan)) == BRANCH) {
+		regnode *next;
+		I32 max1 = 0, min1 = I32_MAX;
+		
+		if (flags && SCF_DO_SUBSTR)
+		    scan_commit(data);
+		while (OP(scan) == BRANCH) {
+		    I32 deltanext, minnext;
+
+		    data_fake.flags = 0;
+		    next = regnext(scan);
+		    scan = NEXTOPER(scan);
+		    minnext = study_chunk(&scan, &deltanext, next, &data_fake, 0);
+		    if (min1 > minnext) min1 = minnext;
+		    if (max1 < minnext + deltanext) max1 = minnext + deltanext;
+		    if (deltanext == I32_MAX)
+			is_inf = 1;
+		    scan = next;
+		    if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
+			pars++;
+		}
+		if (flags && SCF_DO_SUBSTR) {
+		    data->pos_min += min1;
+		    data->pos_delta += max1 - min1;
+		    if (is_inf) 
+			data->flags |= SF_IS_INF;
+		    else
+			data->flags &= ~SF_IS_INF;
+		    if (max1 != min1 || is_inf)
+			data->longest = &(data->longest_float);
+		}
+		min += min1;
+		delta += max1 - min1;
+	    }
+	    else	/* single branch is optimized. */
+		scan = NEXTOPER(scan);
+	    continue;
+	} else if (OP(scan) == EXACT) {
+	    min += *OPERAND(scan);
+	    if (flags && SCF_DO_SUBSTR) { /* Update longest substr. */
+		I32 l = *OPERAND(scan);
+
+		/* The code below prefers earlier match for fixed
+		   offset, later match for variable offset.  */
+		if (data->last_end == -1) { /* Update the start info. */
+		    data->last_start_min = data->pos_min;
+ 		    data->last_start_max = data->flags & SF_IS_INF
+ 			? I32_MAX : data->pos_min + data->pos_delta; 
+		}
+		sv_catpvn(data->last_found, OPERAND(scan)+1, l);
+		data->last_end = data->pos_min + l;
+		data->pos_min += l; /* As in the first entry. */
+		data->flags &= ~SF_BEFORE_EOL;
+	    }
+	} else if (regkind[(U8)OP(scan)] == EXACT) {
+	    if (flags && SCF_DO_SUBSTR) 
+		scan_commit(data);
+	    min += *OPERAND(scan);
+	    if (data)
+		data->pos_min += *OPERAND(scan);
+	} else if (strchr(varies,OP(scan))) {
+	    int mincount, maxcount, minnext, deltanext, pos_before, fl;
+	    regnode *next, *oscan = scan;
+	    
+	    switch (regkind[(U8)OP(scan)]) {
+	    case WHILEM:
+		scan = NEXTOPER(scan);
+		goto finish;
+	    case PLUS:
+		if (flags && SCF_DO_SUBSTR) {
+		    next = NEXTOPER(scan);
+		    if (OP(next) == EXACT) {
+			mincount = 1; 
+			maxcount = REG_INFTY; 
+			next = regnext(scan);
+			scan = NEXTOPER(scan);
+			goto do_curly;
+		    }
+		}
+		if (flags && SCF_DO_SUBSTR)
+		    data->pos_min++;
+		min++;
+		/* Fall through. */
+	    case STAR:
+		is_inf = 1; 
+		scan = regnext(scan);
+		if (flags && SCF_DO_SUBSTR) {
+		    scan_commit(data);
+		    data->flags |= SF_IS_INF;
+		    data->longest = &(data->longest_float);
+		}
+		goto optimize_curly_tail;
+	    case CURLY:
+		mincount = ARG1(scan); 
+		maxcount = ARG2(scan);
+		next = regnext(scan);
+		scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
+	      do_curly:
+		if (flags && SCF_DO_SUBSTR) {
+		    if (mincount == 0) scan_commit(data);
+		    pos_before = data->pos_min;
+		}
+		if (data) {
+		    fl = data->flags;
+		    data->flags &= ~(SF_HAS_PAR|SF_IN_PAR);
+		}
+		/* This will finish on WHILEM, setting scan, or on NULL: */
+		minnext = study_chunk(&scan, &deltanext, next, data, 
+				      mincount == 0 
+					? (flags & ~SCF_DO_SUBSTR) : flags);
+		if (!scan) 		/* It was not CURLYX, but CURLY. */
+		    scan = next;
+		min += minnext * mincount;
+		is_inf = (maxcount == REG_INFTY && (minnext + deltanext) > 0
+			  || deltanext == I32_MAX);
+		delta += (minnext + deltanext) * maxcount - minnext * mincount;
+
+		/* Try powerful optimization CURLYX => CURLYN. */
+#ifdef REGALIGN_STRUCT
+		if (  OP(oscan) == CURLYX && data 
+		      && data->flags & SF_IN_PAR
+		      && !deltanext && minnext == 1 ) {
+		    /* Try to optimize to CURLY.  */
+		    regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS;
+		    regnode *nxt1 = nxt, *nxt2;
+
+		    /* Skip open. */
+		    nxt = regnext(nxt);
+		    if (!strchr(simple,OP(nxt))
+			&& !(regkind[(U8)OP(nxt)] == EXACT
+			     && *OPERAND(nxt) == 1)) 
+			goto nogo;
+		    nxt2 = nxt;
+		    nxt = regnext(nxt);
+		    if (OP(nxt) != CLOSE) 
+			goto nogo;
+		    /* Now we know that nxt2 is the only contents: */
+		    oscan->flags = ARG(nxt);
+		    OP(oscan) = CURLYN;
+		    OP(nxt1) = NOTHING;	/* was OPEN. */
+#ifdef DEBUGGING
+		    OP(nxt1 + 1) = NOTHING; /* was count. */
+		    NEXT_OFF(nxt1+ 1) = 0; /* just for consistancy. */
+		    NEXT_OFF(nxt2) = 0;	/* just for consistancy with CURLY. */
+		    OP(nxt) = NOTHING;	/* was CLOSE. */
+		    OP(nxt + 1) = NOTHING; /* was count. */
+		    NEXT_OFF(nxt+ 1) = 0; /* just for consistancy. */
+#endif 
+		}
+#endif 
+	      nogo:
+
+		/* Try optimization CURLYX => CURLYM. */
+		if (  OP(oscan) == CURLYX && data 
+#ifdef REGALIGN_STRUCT
+		      && !(data->flags & SF_HAS_PAR)
+#else
+		      && !(data->flags & (SF_HAS_PAR|SF_IN_PAR))
+#endif 
+		      && !deltanext  ) {
+		    /* XXXX How to optimize if data == 0? */
+		    /* Optimize to a simpler form.  */
+		    regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS, *nxt2; /* OPEN */
+
+		    OP(oscan) = CURLYM;
+		    while ( (nxt2 = regnext(nxt)) /* skip over embedded stuff*/
+			    && (OP(nxt2) != WHILEM)) 
+			nxt = nxt2;
+		    OP(nxt2)  = SUCCEED; /* Whas WHILEM */
+#ifdef REGALIGN_STRUCT
+		    /* Need to optimize away parenths. */
+		    if (data->flags & SF_IN_PAR) {
+			/* Set the parenth number.  */
+			regnode *nxt1 = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN*/
+
+			if (OP(nxt) != CLOSE) 
+			    croak("panic opt close");
+			oscan->flags = ARG(nxt);
+			OP(nxt1) = NOTHING;	/* was OPEN. */
+			OP(nxt) = NOTHING;	/* was CLOSE. */
+#ifdef DEBUGGING
+			OP(nxt1 + 1) = NOTHING; /* was count. */
+			OP(nxt + 1) = NOTHING; /* was count. */
+			NEXT_OFF(nxt1 + 1) = 0; /* just for consistancy. */
+			NEXT_OFF(nxt + 1) = 0; /* just for consistancy. */
+#endif 
+		    } else
+			oscan->flags = 0;
+#endif 
+		}
+		if (data && fl & (SF_HAS_PAR|SF_IN_PAR)) 
+		    pars++;
+		if (flags && SCF_DO_SUBSTR) {
+		    SV *last_str = Nullsv;
+		    int counted = mincount != 0;
+
+		    if (data->last_end > 0 && mincount != 0) { /* Ends with a string. */
+			I32 b = pos_before >= data->last_start_min 
+			    ? pos_before : data->last_start_min;
+			I32 l = data->last_end - pos_before;
+			
+			/* Get the added string: */
+			last_str = newSVpv(SvPV(data->last_found, na) 
+					   + b - data->last_start_min, l);
+			if (deltanext == 0 && pos_before == b) {
+			    /* What was added is a constant string */
+			    if (mincount > 1) {
+				SvGROW(last_str, (mincount * l) + 1);
+				repeatcpy(SvPVX(last_str) + l, 
+					  SvPVX(last_str), l, mincount - 1);
+				SvCUR(last_str) *= mincount;
+				/* Add additional parts. */
+				SvCUR_set(data->last_found, 
+					  SvCUR(data->last_found) - l);
+				sv_catsv(data->last_found, last_str);
+				data->last_end += l * (mincount - 1);
+			    }
+			}
+		    }
+		    /* It is counted once already... */
+		    data->pos_min += minnext * (mincount - counted);
+		    data->pos_delta += - counted * deltanext +
+			(minnext + deltanext) * maxcount - minnext * mincount;
+		    /* Cannot switch off being infinite... */
+		    if (is_inf) 
+			data->flags |= SF_IS_INF;
+		    if (mincount != maxcount) {
+			scan_commit(data);
+			if (mincount && last_str) {
+			    sv_setsv(data->last_found, last_str);
+			    data->last_end = data->pos_min;
+			    data->last_start_min = 
+				data->pos_min - SvCUR(last_str);
+			    data->last_start_max = is_inf 
+				? I32_MAX 
+				: data->pos_min + data->pos_delta
+				- SvCUR(last_str);
+			}
+			data->longest = &(data->longest_float);
+		    }
+		}
+	      optimize_curly_tail:
+#ifdef REGALIGN
+		if (OP(oscan) != CURLYX) {
+		    while (regkind[(U8)OP(next = regnext(oscan))] == NOTHING
+			   && NEXT_OFF(next))
+			NEXT_OFF(oscan) += NEXT_OFF(next);
+		}
+#endif
+		continue;
+	    default:			/* REF only? */
+		if (flags && SCF_DO_SUBSTR) {
+		    scan_commit(data);
+		    data->longest = &(data->longest_float);
+		    if (is_inf) 
+			data->flags |= SF_IS_INF;
+		    else
+			data->flags &= ~SF_IS_INF;
+		}
+		is_inf = 1;
+		break;
+	    }
+	} else if (strchr(simple,OP(scan))) {
+	    if (flags && SCF_DO_SUBSTR) {
+		scan_commit(data);
+		data->pos_min++;
+	    }
+	    min++;
+	} else if (regkind[(U8)OP(scan)] == EOL && flags & SCF_DO_SUBSTR) {
+	    data->flags |= SF_BEFORE_EOL;
+	} else if (regkind[(U8)OP(scan)] == BRANCH 
+		   && (scan->flags || data)
+		   && (OP(scan) == IFMATCH || OP(scan) == UNLESSM)) {
+	    I32 deltanext, minnext;
+	    regnode *nscan, *next;
+
+	    data_fake.flags = 0;
+	    next = regnext(scan);
+	    nscan = NEXTOPER(scan);
+	    minnext = study_chunk(&nscan, &deltanext, next, &data_fake, 0);
+	    if (scan->flags) {
+		if (deltanext) {
+		    croak("variable length lookbehind not implemented");
+		} else if (minnext > U8_MAX) {
+		    croak("lookbehind longer than %d not implemented", U8_MAX);
+		}
+		scan->flags = minnext;
+	    }
+	    if (data && data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
+		pars++;
+	} else if (OP(scan) == OPEN) {
+	    pars++;
+	} else if (OP(scan) == CLOSE && ARG(scan) == is_par) {
+#ifdef REGALIGN_STRUCT
+	    regnode *next = regnext(scan);
+
+	    if ( next && is_par <= U8_MAX
+		 && (next >= last 
+		     || (OP(next) == WHILEM && (next + 1) == last))) {
+	    } else
+#endif 
+		is_par = 0;		/* Disable optimization */
+	}
+	/* Else: zero-length, ignore. */
+	scan = regnext(scan);
+    }
+
+  finish:
+    *scanp = scan;
+    *deltap = is_inf ? I32_MAX : delta;
+    if (is_par && pars==1 && data) {
+	data->flags |= SF_IN_PAR;
+	data->flags &= ~SF_HAS_PAR;
+    } else if (pars && data) {
+	data->flags |= SF_HAS_PAR;
+	data->flags &= ~SF_IN_PAR;
+    }
+    return min;
+}
+
+static I32
+add_data(n, s)
+I32 n;
+char *s;
+{
+    if (rx->data) {
+	Renewc(rx->data, 
+	       sizeof(rx->data) + sizeof(void*) * (rx->data->count + n - 1), 
+	       char, struct reg_data);
+	Renew(rx->data->what, rx->data->count + n, U8);
+	rx->data->count += n;
+    } else {
+	Newc(1207, rx->data, sizeof(*rx->data) + sizeof(void*) * (n - 1),
+	     char, struct reg_data);
+	New(1208, rx->data->what, n, U8);
+	rx->data->count = n;
+    }
+    Copy(s, rx->data->what + rx->data->count - n, n, U8);
+    return rx->data->count - n;
+}
+
 /*
  - pregcomp - compile a regular expression into internal code
  *
@@ -134,21 +609,24 @@ char* xend;
 PMOP* pm;
 {
     register regexp *r;
-    register char *scan;
-    register SV *longish;
-    SV *longest;
-    register I32 len;
-    register char *first;
+    regnode *scan;
+    register regnode *scan1;
+    register SV *current_longest;
+    SV **longest;
+    SV *longest_fixed;
+    SV *longest_float;
+    regnode *first;
     I32 flags;
     I32 backish;
-    I32 backest;
-    I32 curback;
+    I32 stored_c_offset_min;
+    I32 c_offset_min;
+    I32 c_offset_delta;
     I32 minlen = 0;
     I32 sawplus = 0;
     I32 sawopen = 0;
 #define MAX_REPEAT_DEPTH 12
     struct {
-	char *opcode;
+	regnode *opcode;
 	I32 count;
     } repeat_stack[MAX_REPEAT_DEPTH];
     I32 repeat_depth = 0;
@@ -161,6 +639,9 @@ PMOP* pm;
     regflags = pm->op_pmflags;
     regsawback = 0;
 
+    seen_gpos = 0;
+    seen_zerolen = *exp == '^' ? -1 : 0;
+
     /* First pass: determine size, legality. */
     regparse = exp;
     regxend = xend;
@@ -168,24 +649,27 @@ PMOP* pm;
     regnpar = 1;
     regsize = 0L;
     regcode = &regdummy;
-    regc((char)MAGIC);
+    regc((U8)MAGIC, (char*)regcode);
     if (reg(0, &flags) == NULL) {
 	Safefree(regprecomp);
 	regprecomp = Nullch;
 	return(NULL);
     }
+    DEBUG_r(PerlIO_printf(Perl_debug_log, "size %d ", regsize));
 
     /* Small enough for pointer-storage convention? */
     if (regsize >= 32767L)		/* Probably could be 65535L. */
 	FAIL("regexp too big");
 
     /* Allocate space and initialize. */
-    Newc(1001, r, sizeof(regexp) + (unsigned)regsize, char, regexp);
+    Newc(1001, r, sizeof(regexp) + (unsigned)regsize * sizeof(regnode),
+	 char, regexp);
     if (r == NULL)
 	FAIL("regexp out of space");
     r->prelen = xend - exp;
     r->precomp = regprecomp;
     r->subbeg = r->subbase = NULL;
+    rx = r;
 
     /* Second pass: emit code. */
     regparse = exp;
@@ -193,23 +677,26 @@ PMOP* pm;
     regnaughty = 0;
     regnpar = 1;
     regcode = r->program;
-    regc((char)MAGIC);
+    regc((U8)MAGIC, (char*) regcode++);
+    r->data = 0;
     if (reg(0, &flags) == NULL)
 	return(NULL);
 
     /* Dig out information for optimizations. */
     pm->op_pmflags = regflags;
-    r->regstart = Nullsv;	/* Worst-case defaults. */
     r->reganch = 0;
-    r->regmust = Nullsv;
-    r->regback = -1;
-    r->regstclass = Nullch;
+    r->regstclass = NULL;
     r->naughty = regnaughty >= 10;	/* Probably an expensive pattern. */
-    scan = r->program+1;			/* First BRANCH. */
-    if (OP(regnext(scan)) == END) {/* Only one top-level choice. */
-	scan = NEXTOPER(scan);
+    scan = r->program + 1;		/* First BRANCH. */
+    if (OP(scan) != BRANCH) {	/* Only one top-level choice. */
+	I32 prev_c_offset_min;
+	I32 prev_c_offset_delta;
+	I32 prev_end;
+	scan_data_t data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+	I32 fake;
 
 	first = scan;
+	/* Skip introductions and multiplicators >= 1. */
 	while ((OP(first) == OPEN && (sawopen = 1)) ||
 	    (OP(first) == BRANCH && OP(regnext(first)) != BRANCH) ||
 	    (OP(first) == PLUS) ||
@@ -224,12 +711,7 @@ PMOP* pm;
 
 	/* Starting-point info. */
       again:
-	if (OP(first) == EXACT) {
-	    r->regstart = newSVpv(OPERAND(first)+1,*OPERAND(first));
-	    if (SvCUR(r->regstart) > !sawstudy)
-		fbm_compile(r->regstart);
-	    (void)SvUPGRADE(r->regstart, SVt_PVBM);
-	}
+	if (OP(first) == EXACT);	/* Empty, get anchored substr later. */
 	else if (strchr(simple+2,OP(first)))
 	    r->regstclass = first;
 	else if (regkind[(U8)OP(first)] == BOUND ||
@@ -257,8 +739,9 @@ PMOP* pm;
 	if (sawplus && (!sawopen || !regsawback))
 	    r->reganch |= ROPT_SKIP;	/* x+ must match 1st of run */
 
-	DEBUG_r(PerlIO_printf(Perl_debug_log, "first %d next %d offset %d\n",
-	   OP(first), OP(NEXTOPER(first)), first - scan));
+	/* Scan is after the zeroth branch, first is atomic matcher. */
+	DEBUG_r(PerlIO_printf(Perl_debug_log, "first at %d\n", 
+			      first - scan + 1));
 	/*
 	* If there's something expensive in the r.e., find the
 	* longest literal string that must appear and make it the
@@ -267,155 +750,78 @@ PMOP* pm;
 	* and avoiding duplication strengthens checking.  Not a
 	* strong reason, but sufficient in the absence of others.
 	* [Now we resolve ties in favor of the earlier string if
-	* it happens that curback has been invalidated, since the
+	* it happens that c_offset_min has been invalidated, since the
 	* earlier string may buy us something the later one won't.]
 	*/
-	longish = newSVpv("",0);
-	longest = newSVpv("",0);
-	len = 0;
 	minlen = 0;
-	curback = 0;
-	backish = 0;
-	backest = 0;
-	while (OP(scan) != END) {
-	    if (OP(scan) == BRANCH) {
-		if (OP(regnext(scan)) == BRANCH) {
-		    curback = -30000;
-		    while (OP(scan) == BRANCH)
-			scan = regnext(scan);
-		}
-		else	/* single branch is ok */
-		    scan = NEXTOPER(scan);
-		continue;
-	    }
-	    if (OP(scan) == UNLESSM) {
-		curback = -30000;
-		scan = regnext(scan);
-		continue;
-	    }
-	    if (OP(scan) == EXACT) {
-		char *t;
 
-		first = scan;
-		while ((t = regnext(scan)) && OP(t) == CLOSE)
-		    scan = t;
-		minlen += *OPERAND(first) * repeat_count;
-		if (curback - backish == len) {
-		    sv_catpvn(longish, OPERAND(first)+1,
-			*OPERAND(first));
-		    len += *OPERAND(first);
-		    curback += *OPERAND(first);
-		    first = regnext(scan);
-		}
-		else if (*OPERAND(first) >= len + (curback >= 0)) {
-		    len = *OPERAND(first);
-		    sv_setpvn(longish, OPERAND(first)+1,len);
-		    backish = curback;
-		    curback += len;
-		    first = regnext(scan);
-		}
-		else
-		    curback += *OPERAND(first);
-	    }
-	    else if (strchr(varies,OP(scan))) {
-		int tcount;
-		char *next;
-
-		if (repeat_depth < MAX_REPEAT_DEPTH
-		    && ((OP(scan) == PLUS
-			 && (tcount = 1)
-			 && (next = NEXTOPER(scan)))
-			|| (regkind[(U8)OP(scan)] == CURLY
-			    && (tcount = ARG1(scan))
-			    && (next = NEXTOPER(scan)+4))))
-		{
-		    /* We treat (abc)+ as (abc)(abc)*. */
-
-		    /* Mark the place to return back. */
-		    repeat_stack[repeat_depth].opcode = regnext(scan);
-		    repeat_stack[repeat_depth].count = repeat_count;
-		    repeat_depth++;
-		    repeat_count *= tcount;
-
-		    /* Go deeper: */
-		    scan = next;
-		    continue;
-		}
-		else {
-		    curback = -30000;
-		    len = 0;
-		    if (SvCUR(longish) > SvCUR(longest)) {
-			sv_setsv(longest,longish);
-			backest = backish;
-		    }
-		    sv_setpvn(longish,"",0);
-		}
-	    }
-	    else if (strchr(simple,OP(scan))) {
-		curback++;
-		minlen += repeat_count;
-		len = 0;
-		if (SvCUR(longish) > SvCUR(longest)) {
-		    sv_setsv(longest,longish);
-		    backest = backish;
-		}
-		sv_setpvn(longish,"",0);
-	    }
-	    scan = regnext(scan);
-	    if (!scan) {		/* Go up PLUS or CURLY. */
-		if (!repeat_depth--)
-		    croak("panic: re scan");
-		scan = repeat_stack[repeat_depth].opcode;
-		repeat_count = repeat_stack[repeat_depth].count;
-		/* Need to submit the longest string found: */
-		curback = -30000;
-		len = 0;
-		if (SvCUR(longish) > SvCUR(longest)) {
-		    sv_setsv(longest,longish);
-		    backest = backish;
-		}
-		sv_setpvn(longish,"",0);
-	    }
-	}
-
-	/* Prefer earlier on tie, unless we can tail match latter */
-
-	if (SvCUR(longish) + (first && regkind[(U8)OP(first)] == EOL)
-		> SvCUR(longest))
-	{
-	    sv_setsv(longest,longish);
-	    backest = backish;
-	}
-	else
-	    sv_setpvn(longish,"",0);
-	if (SvCUR(longest)
-	    && (!r->regstart
-		|| !fbm_instr((unsigned char*) SvPVX(r->regstart),
-			      (unsigned char *) (SvPVX(r->regstart)
-						 + SvCUR(r->regstart)),
-			      longest)))
-	{
-	    r->regmust = longest;
-	    if (backest < 0)
-		backest = -1;
-	    r->regback = backest;
-	    if (SvCUR(longest) > !(sawstudy || 
-				   (first && regkind[(U8)OP(first)] == EOL)))
-		fbm_compile(r->regmust);
-	    (void)SvUPGRADE(r->regmust, SVt_PVBM);
-	    BmUSEFUL(r->regmust) = 100;
-	    if (first && regkind[(U8)OP(first)] == EOL && SvCUR(longish))
-		SvTAIL_on(r->regmust);
-	}
-	else {
-	    SvREFCNT_dec(longest);
-	    longest = Nullsv;
-	}
-	SvREFCNT_dec(longish);
+	data.longest_fixed = newSVpv("",0);
+	data.longest_float = newSVpv("",0);
+	data.last_found = newSVpv("",0);
+	data.longest = &(data.longest_fixed);
+	first = scan;
+	
+	minlen = study_chunk(&first, &fake, scan + regsize, /* To end */
+			     &data, SCF_DO_SUBSTR);
+	if ( regnpar == 1 && data.longest == &(data.longest_fixed)
+	     && data.last_start_min == 0 && data.last_end > 0 
+	     && !seen_zerolen 
+	     && (!seen_gpos || (r->reganch & ROPT_ANCH_GPOS)))
+	    r->reganch |= ROPT_CHECK_ALL;
+	scan_commit(&data);
+	SvREFCNT_dec(data.last_found);
+
+	if (SvCUR(data.longest_float)) {
+	    if (SvCUR(data.longest_fixed) 
+		&& data.offset_fixed == data.offset_float_min)
+		goto remove;		/* Like in (a)+. */
+	    
+	    r->float_substr = data.longest_float;
+	    r->float_min_offset = data.offset_float_min;
+	    r->float_max_offset = data.offset_float_max;
+	    fbm_compile(r->float_substr);
+	    BmUSEFUL(r->float_substr) = 100;
+	    if (data.flags & SF_FL_BEFORE_EOL) SvTAIL_on(r->float_substr);
+	} else {
+	  remove:
+	    r->float_substr = Nullsv;
+	    SvREFCNT_dec(data.longest_float);
+	}
+
+	if (SvCUR(data.longest_fixed)) {
+	    r->anchored_substr = data.longest_fixed;
+	    r->anchored_offset = data.offset_fixed;
+	    fbm_compile(r->anchored_substr);
+	    BmUSEFUL(r->anchored_substr) = 100;
+	    if (data.flags & SF_FIX_BEFORE_EOL) SvTAIL_on(r->anchored_substr);
+	} else {
+	    r->anchored_substr = Nullsv;
+	    SvREFCNT_dec(data.longest_fixed);
+	}
+
+	/* A temporary algorithm prefers floated substr to fixed one to dig more info. */
+	if (SvCUR(data.longest_fixed) > SvCUR(data.longest_float)) {
+	    r->check_substr = r->anchored_substr;
+	    r->check_offset_min = r->check_offset_max = r->anchored_offset;
+	    if (r->reganch & ROPT_ANCH)	r->reganch |= ROPT_CHECK_AT_START;
+	} else {
+	    r->check_substr = r->float_substr;
+	    r->check_offset_min = data.offset_float_min;
+	    r->check_offset_max = data.offset_float_max;
+	}
+    } else {
+	/* Several toplevels. Best we can is to set minlen. */
+	I32 fake;
+	
+	DEBUG_r(PerlIO_printf(Perl_debug_log, "\n"));
+	scan = r->program + 1;
+	minlen = study_chunk(&scan, &fake, scan + regsize, NULL, 0);
+	r->check_substr = r->anchored_substr = r->float_substr = Nullsv;
     }
 
     r->nparens = regnpar - 1;
     r->minlen = minlen;
+    if (seen_gpos) r->reganch |= ROPT_GPOS_SEEN;
     Newz(1002, r->startp, regnpar, char*);
     Newz(1002, r->endp, regnpar, char*);
     DEBUG_r(regdump(r));
@@ -431,16 +837,16 @@ PMOP* pm;
  * is a trifle forced, but the need to tie the tails of the branches to what
  * follows makes it hard to avoid.
  */
-static char *
+static regnode *
 reg(paren, flagp)
 I32 paren;			/* Parenthesized? */
 I32 *flagp;
 {
-    register char *ret;
-    register char *br;
-    register char *ender = 0;
+    register regnode *ret;
+    register regnode *br;
+    register regnode *ender = 0;
     register I32 parno = 0;
-    I32 flags;
+    I32 flags, oregflags = regflags, have_branch = 0;
 
     *flagp = HASWIDTH;	/* Tentatively. */
 
@@ -451,9 +857,19 @@ I32 *flagp;
 	    paren = *regparse++;
 	    ret = NULL;
 	    switch (paren) {
-	    case ':':
+	    case '<':
+#ifndef REGALIGN_STRUCT
+		croak("lookbehind non-implemented without REGALIGN_STRUCT");
+#endif 
+		if (*regparse == '!') 
+		    paren = ',';
+		if (*regparse != '=' && *regparse != '!') 
+		    goto unknown;
+		regparse++;
 	    case '=':
 	    case '!':
+		seen_zerolen++;
+	    case ':':
 		break;
 	    case '$':
 	    case '@':
@@ -467,10 +883,72 @@ I32 *flagp;
 		nextchar();
 		*flagp = TRYAGAIN;
 		return NULL;
+	    case '{':
+	    {
+		I32 count = 1, n;
+		char c;
+		char *s = regparse;
+		SV *sv;
+		OP *sop, *rop;
+		char tmpbuf[TYPE_DIGITS(long) + 12 + 3];
+		char *safestr;
+
+		seen_zerolen++;
+		while (count && (c = *regparse)) {
+		    if (c == '\\' && regparse[1])
+			regparse++;
+		    else if (c == '{') 
+			count++;
+		    else if (c == '}') 
+			count--;
+		    regparse++;
+		}
+		if (*regparse != ')')
+		    croak("Sequence (?{...}) not terminated or not {}-balanced");
+		if (!SIZE_ONLY) {
+		    if (regparse - 1 - s) 
+			sv = newSVpv(s, regparse - 1 - s);
+		    else
+			sv = newSVpv("", 0);
+		    ENTER;
+		    lex_start(sv);
+		    SAVETMPS;
+		    /* switch to eval mode */
+
+		    SAVESPTR(compiling.cop_filegv);
+		    sprintf(tmpbuf, "_<(re_eval %lu)", (unsigned long)++evalseq);
+		    compiling.cop_filegv = gv_fetchfile(tmpbuf+2);
+		    compiling.cop_line = 1;
+		    /* XXX For C<eval "...">s within BEGIN {} blocks, this ends up
+		       deleting the eval's FILEGV from the stash before gv_check() runs
+		       (i.e. before run-time proper). To work around the coredump that
+		       ensues, we always turn GvMULTI_on for any globals that were
+		       introduced within evals. See force_ident(). GSAR 96-10-12 */
+		    safestr = savepv(tmpbuf);
+		    SAVEDELETE(defstash, safestr, strlen(safestr));
+		    SAVEI32(hints);
+		    hints = 0;
+
+		    rop = doeval(G_VOID, &sop);
+		    n = add_data(4, "nnso");
+		    rx->data->data[n] = (void*)rop;
+		    rx->data->data[n+1] = (void*)curpad;
+		    rx->data->data[n+2] = (void*)compcv;
+		    rx->data->data[n+3] = (void*)sop;
+		    sop->op_type = OP_NULL;
+		    sop->op_ppaddr = ppaddr[OP_NULL];
+		    lex_end();
+		    LEAVE;
+		}
+		
+		nextchar();
+		return reganode(EVAL, n);
+	    }
 	    default:
 		--regparse;
 		while (*regparse && strchr("iogcmsx", *regparse))
 		    pmflag(&regflags, *regparse++);
+	      unknown:
 		if (*regparse != ')')
 		    croak("Sequence (?%c...) not recognized", *regparse);
 		nextchar();
@@ -487,9 +965,15 @@ I32 *flagp;
 	ret = NULL;
 
     /* Pick up the branches, linking them together. */
-    br = regbranch(&flags);
+    br = regbranch(&flags, 0);
     if (br == NULL)
 	return(NULL);
+    if (*regparse == '|') {
+	reginsert(BRANCH, br);
+	have_branch = 1;
+    } else if (paren == ':') {
+	*flagp |= flags&SIMPLE;
+    }
     if (ret != NULL)
 	regtail(ret, br);	/* OPEN -> first. */
     else
@@ -499,7 +983,7 @@ I32 *flagp;
     *flagp |= flags&SPSTART;
     while (*regparse == '|') {
 	nextchar();
-	br = regbranch(&flags);
+	br = regbranch(&flags, 1);
 	if (br == NULL)
 	    return(NULL);
 	regtail(ret, br);	/* BRANCH -> BRANCH. */
@@ -508,37 +992,57 @@ I32 *flagp;
 	*flagp |= flags&SPSTART;
     }
 
-    /* Make a closing node, and hook it on the end. */
-    switch (paren) {
-    case ':':
-	ender = regnode(NOTHING);
-	break;
-    case 1:
-	ender = reganode(CLOSE, parno);
-	break;
-    case '=':
-    case '!':
-	ender = regnode(SUCCEED);
-	*flagp &= ~HASWIDTH;
-	break;
-    case 0:
-	ender = regnode(END);
-	break;
-    }
-    regtail(ret, ender);
+    if (have_branch || paren != ':') {
+	/* Make a closing node, and hook it on the end. */
+	switch (paren) {
+	case ':':
+	    ender = reg_node(TAIL);
+	    break;
+	case 1:
+	    ender = reganode(CLOSE, parno);
+	    break;
+	case '<':
+	case ',':
+	case '=':
+	case '!':
+	    ender = reg_node(SUCCEED);
+	    *flagp &= ~HASWIDTH;
+	    break;
+	case 0:
+	    ender = reg_node(END);
+	    break;
+	}
+	regtail(ret, ender);
 
-    /* Hook the tails of the branches to the closing node. */
-    for (br = ret; br != NULL; br = regnext(br))
-	regoptail(br, ender);
+	if (have_branch) {
+	    /* Hook the tails of the branches to the closing node. */
+	    for (br = ret; br != NULL; br = regnext(br))
+		regoptail(br, ender);
+	}
+    }
 
     if (paren == '=') {
 	reginsert(IFMATCH,ret);
-	regtail(ret, regnode(NOTHING));
-    }
-    else if (paren == '!') {
+#ifdef REGALIGN_STRUCT
+	ret->flags = 0;
+#endif 
+	regtail(ret, reg_node(TAIL));
+    } else if (paren == '!') {
 	reginsert(UNLESSM,ret);
-	regtail(ret, regnode(NOTHING));
+#ifdef REGALIGN_STRUCT
+	ret->flags = 0;
+#endif 
+	regtail(ret, reg_node(TAIL));
+    } else if (paren == ',') {
+	reginsert(UNLESSM,ret);
+	ret->flags = 1;
+	regtail(ret, reg_node(TAIL));
+    } else if (paren == '<') {
+	reginsert(IFMATCH,ret);
+	ret->flags = 1;
+	regtail(ret, reg_node(TAIL));
     }
+    
 
     /* Check for proper termination. */
     if (paren && (regparse >= regxend || *nextchar() != ')')) {
@@ -550,6 +1054,9 @@ I32 *flagp;
 	    FAIL("junk on end of regexp");	/* "Can't happen". */
 	/* NOTREACHED */
     }
+    if (paren != 0) {
+	regflags = oregflags;
+    }
 
     return(ret);
 }
@@ -559,19 +1066,18 @@ I32 *flagp;
  *
  * Implements the concatenation operator.
  */
-static char *
-regbranch(flagp)
+static regnode *
+regbranch(flagp, second)
 I32 *flagp;
+I32 second;
 {
-    register char *ret;
-    register char *chain;
-    register char *latest;
-    I32 flags = 0;
+    register regnode *ret = second ? reg_node(BRANCH) : NULL;
+    register regnode *chain = NULL;
+    register regnode *latest;
+    I32 flags = 0, c = 0;
 
     *flagp = WORST;		/* Tentatively. */
 
-    ret = regnode(BRANCH);
-    chain = NULL;
     regparse--;
     nextchar();
     while (regparse < regxend && *regparse != '|' && *regparse != ')') {
@@ -581,18 +1087,26 @@ I32 *flagp;
 	    if (flags & TRYAGAIN)
 		continue;
 	    return(NULL);
-	}
+	} else if (ret == NULL)
+	    ret = latest;
 	*flagp |= flags&HASWIDTH;
-	if (chain == NULL)	/* First piece. */
+	if (chain == NULL) 	/* First piece. */
 	    *flagp |= flags&SPSTART;
 	else {
 	    regnaughty++;
 	    regtail(chain, latest);
 	}
 	chain = latest;
+	c++;
+    }
+    if (chain == NULL) {	/* Loop ran zero times. */
+	chain = reg_node(NOTHING);
+	if (ret == NULL)
+	    ret = chain;
+    }
+    if (c == 1) {
+	*flagp |= flags&SIMPLE;
     }
-    if (chain == NULL)	/* Loop ran zero times. */
-	(void) regnode(NOTHING);
 
     return(ret);
 }
@@ -606,18 +1120,18 @@ I32 *flagp;
  * It might seem that this node could be dispensed with entirely, but the
  * endmarker role is not redundant.
  */
-static char *
+static regnode *
 regpiece(flagp)
 I32 *flagp;
 {
-    register char *ret;
+    register regnode *ret;
     register char op;
     register char *next;
     I32 flags;
     char *origparse = regparse;
     char *maxpos;
     I32 min;
-    I32 max = 32767;
+    I32 max = REG_INFTY;
 
     ret = regatom(&flags);
     if (ret == NULL) {
@@ -659,7 +1173,9 @@ I32 *flagp;
 		maxpos = regparse;
 	    max = atoi(maxpos);
 	    if (!max && *maxpos != '0')
-		max = 32767;		/* meaning "infinity" */
+		max = REG_INFTY;		/* meaning "infinity" */
+	    else if (max >= REG_INFTY)
+		croak("Quantifier in {,} bigger than %d", REG_INFTY - 1);
 	    regparse = next;
 	    nextchar();
 
@@ -670,23 +1186,21 @@ I32 *flagp;
 	    }
 	    else {
 		regnaughty += 4 + regnaughty;	/* compound interest */
-		regtail(ret, regnode(WHILEM));
+		regtail(ret, reg_node(WHILEM));
 		reginsert(CURLYX,ret);
-		regtail(ret, regnode(NOTHING));
+		regtail(ret, reg_node(NOTHING));
 	    }
+#ifdef REGALIGN_STRUCT
+	    ret->flags = 0;
+#endif 
 
 	    if (min > 0)
 		*flagp = (WORST|HASWIDTH);
 	    if (max && max < min)
 		croak("Can't do {n,m} with n > m");
-	    if (regcode != &regdummy) {
-#ifdef REGALIGN
-		*(unsigned short *)(ret+3) = min;
-		*(unsigned short *)(ret+5) = max;
-#else
-		ret[3] = min >> 8; ret[4] = min & 0377;
-		ret[5] = max  >> 8; ret[6] = max  & 0377;
-#endif
+	    if (!SIZE_ONLY) {
+		ARG1_SET(ret, min);
+		ARG2_SET(ret, max);
 	    }
 
 	    goto nest_check;
@@ -707,6 +1221,9 @@ I32 *flagp;
 
     if (op == '*' && (flags&SIMPLE)) {
 	reginsert(STAR, ret);
+#ifdef REGALIGN_STRUCT
+	ret->flags = 0;
+#endif 
 	regnaughty += 4;
     }
     else if (op == '*') {
@@ -714,6 +1231,9 @@ I32 *flagp;
 	goto do_curly;
     } else if (op == '+' && (flags&SIMPLE)) {
 	reginsert(PLUS, ret);
+#ifdef REGALIGN_STRUCT
+	ret->flags = 0;
+#endif 
 	regnaughty += 3;
     }
     else if (op == '+') {
@@ -724,7 +1244,7 @@ I32 *flagp;
 	goto do_curly;
     }
   nest_check:
-    if (dowarn && regcode != &regdummy && !(flags&HASWIDTH) && max > 10000) {
+    if (dowarn && !SIZE_ONLY && !(flags&HASWIDTH) && max > 10000) {
 	warn("%.*s matches null string many times",
 	    regparse - origparse, origparse);
     }
@@ -733,7 +1253,7 @@ I32 *flagp;
 	nextchar();
 	reginsert(MINMOD, ret);
 #ifdef REGALIGN
-	regtail(ret, ret + 4);
+	regtail(ret, ret + NODE_STEP_REGNODE);
 #else
 	regtail(ret, ret + 3);
 #endif
@@ -754,11 +1274,11 @@ I32 *flagp;
  *
  * [Yes, it is worth fixing, some scripts can run twice the speed.]
  */
-static char *
+static regnode *
 regatom(flagp)
 I32 *flagp;
 {
-    register char *ret = 0;
+    register regnode *ret = 0;
     I32 flags;
 
     *flagp = WORST;		/* Tentatively. */
@@ -766,29 +1286,32 @@ I32 *flagp;
 tryagain:
     switch (*regparse) {
     case '^':
+	seen_zerolen++;
 	nextchar();
 	if (regflags & PMf_MULTILINE)
-	    ret = regnode(MBOL);
+	    ret = reg_node(MBOL);
 	else if (regflags & PMf_SINGLELINE)
-	    ret = regnode(SBOL);
+	    ret = reg_node(SBOL);
 	else
-	    ret = regnode(BOL);
+	    ret = reg_node(BOL);
 	break;
     case '$':
+	if (regparse[1]) 
+	    seen_zerolen++;
 	nextchar();
 	if (regflags & PMf_MULTILINE)
-	    ret = regnode(MEOL);
+	    ret = reg_node(MEOL);
 	else if (regflags & PMf_SINGLELINE)
-	    ret = regnode(SEOL);
+	    ret = reg_node(SEOL);
 	else
-	    ret = regnode(EOL);
+	    ret = reg_node(EOL);
 	break;
     case '.':
 	nextchar();
 	if (regflags & PMf_SINGLELINE)
-	    ret = regnode(SANY);
+	    ret = reg_node(SANY);
 	else
-	    ret = regnode(ANY);
+	    ret = reg_node(ANY);
 	regnaughty++;
 	*flagp |= HASWIDTH|SIMPLE;
 	break;
@@ -805,7 +1328,7 @@ tryagain:
 		    goto tryagain;
 		return(NULL);
 	}
-	*flagp |= flags&(HASWIDTH|SPSTART);
+	*flagp |= flags&(HASWIDTH|SPSTART|SIMPLE);
 	break;
     case '|':
     case ')':
@@ -830,57 +1353,61 @@ tryagain:
     case '\\':
 	switch (*++regparse) {
 	case 'A':
-	    ret = regnode(SBOL);
+	    seen_zerolen++;
+	    ret = reg_node(SBOL);
 	    *flagp |= SIMPLE;
 	    nextchar();
 	    break;
 	case 'G':
-	    ret = regnode(GPOS);
+	    ret = reg_node(GPOS);
+	    seen_gpos = 1;
 	    *flagp |= SIMPLE;
 	    nextchar();
 	    break;
 	case 'Z':
-	    ret = regnode(SEOL);
+	    ret = reg_node(SEOL);
 	    *flagp |= SIMPLE;
 	    nextchar();
 	    break;
 	case 'w':
-	    ret = regnode((regflags & PMf_LOCALE) ? ALNUML : ALNUM);
+	    ret = reg_node((regflags & PMf_LOCALE) ? ALNUML : ALNUM);
 	    *flagp |= HASWIDTH|SIMPLE;
 	    nextchar();
 	    break;
 	case 'W':
-	    ret = regnode((regflags & PMf_LOCALE) ? NALNUML : NALNUM);
+	    ret = reg_node((regflags & PMf_LOCALE) ? NALNUML : NALNUM);
 	    *flagp |= HASWIDTH|SIMPLE;
 	    nextchar();
 	    break;
 	case 'b':
-	    ret = regnode((regflags & PMf_LOCALE) ? BOUNDL : BOUND);
+	    seen_zerolen++;
+	    ret = reg_node((regflags & PMf_LOCALE) ? BOUNDL : BOUND);
 	    *flagp |= SIMPLE;
 	    nextchar();
 	    break;
 	case 'B':
-	    ret = regnode((regflags & PMf_LOCALE) ? NBOUNDL : NBOUND);
+	    seen_zerolen++;
+	    ret = reg_node((regflags & PMf_LOCALE) ? NBOUNDL : NBOUND);
 	    *flagp |= SIMPLE;
 	    nextchar();
 	    break;
 	case 's':
-	    ret = regnode((regflags & PMf_LOCALE) ? SPACEL : SPACE);
+	    ret = reg_node((regflags & PMf_LOCALE) ? SPACEL : SPACE);
 	    *flagp |= HASWIDTH|SIMPLE;
 	    nextchar();
 	    break;
 	case 'S':
-	    ret = regnode((regflags & PMf_LOCALE) ? NSPACEL : NSPACE);
+	    ret = reg_node((regflags & PMf_LOCALE) ? NSPACEL : NSPACE);
 	    *flagp |= HASWIDTH|SIMPLE;
 	    nextchar();
 	    break;
 	case 'd':
-	    ret = regnode(DIGIT);
+	    ret = reg_node(DIGIT);
 	    *flagp |= HASWIDTH|SIMPLE;
 	    nextchar();
 	    break;
 	case 'D':
-	    ret = regnode(NDIGIT);
+	    ret = reg_node(NDIGIT);
 	    *flagp |= HASWIDTH|SIMPLE;
 	    nextchar();
 	    break;
@@ -933,18 +1460,19 @@ tryagain:
 
     default: {
 	    register I32 len;
-	    register char ender;
+	    register U8 ender;
 	    register char *p;
-	    char *oldp;
+	    char *oldp, *s;
 	    I32 numlen;
 
 	    regparse++;
 
 	defchar:
-	    ret = regnode((regflags & PMf_FOLD)
+	    ret = reg_node((regflags & PMf_FOLD)
 			  ? ((regflags & PMf_LOCALE) ? EXACTFL : EXACTF)
 			  : EXACT);
-	    regc(0);		/* save spot for len */
+	    s = OPERAND(ret);
+	    regc(0, s++);		/* save spot for len */
 	    for (len = 0, p = regparse - 1;
 	      len < 127 && p < regxend;
 	      len++)
@@ -1042,11 +1570,11 @@ tryagain:
 			p = oldp;
 		    else {
 			len++;
-			regc(ender);
+			regc(ender, s++);
 		    }
 		    break;
 		}
-		regc(ender);
+		regc(ender, s++);
 	    }
 	loopdone:
 	    regparse = p - 1;
@@ -1057,9 +1585,16 @@ tryagain:
 		*flagp |= HASWIDTH;
 	    if (len == 1)
 		*flagp |= SIMPLE;
-	    if (regcode != &regdummy)
+	    if (!SIZE_ONLY)
 		*OPERAND(ret) = len;
-	    regc('\0');
+	    regc('\0', s++);
+	    if (SIZE_ONLY) {
+#ifdef REGALIGN_STRUCT
+		regsize += (len + 2 + sizeof(regnode) - 1) / sizeof(regnode);
+#endif 
+	    } else {
+		regcode += (len + 2 + sizeof(regnode) - 1) / sizeof(regnode);
+	    }
 	}
 	break;
     }
@@ -1091,38 +1626,41 @@ regset(opnd, c)
 char *opnd;
 register I32 c;
 {
-    if (opnd == &regdummy)
+    if (SIZE_ONLY)
 	return;
     c &= 0xFF;
     opnd[1 + (c >> 3)] |= (1 << (c & 7));
 }
 
-static char *
+static regnode *
 regclass()
 {
-    register char *opnd;
+    register char *opnd, *s;
     register I32 class;
     register I32 lastclass = 1234;
     register I32 range = 0;
-    register char *ret;
+    register regnode *ret;
     register I32 def;
     I32 numlen;
 
-    ret = regnode(ANYOF);
-    opnd = regcode;
+    s = opnd = OPERAND(regcode);
+    ret = reg_node(ANYOF);
     for (class = 0; class < 33; class++)
-	regc(0);
+	regc(0, s++);
     if (*regparse == '^') {	/* Complement of range. */
 	regnaughty++;
 	regparse++;
-	if (opnd != &regdummy)
+	if (!SIZE_ONLY)
 	    *opnd |= ANYOF_INVERT;
     }
-    if (opnd != &regdummy) {
+    if (!SIZE_ONLY) {
+	regcode += ANY_SKIP;
 	if (regflags & PMf_FOLD)
 	    *opnd |= ANYOF_FOLD;
 	if (regflags & PMf_LOCALE)
 	    *opnd |= ANYOF_LOCALE;
+    } else {
+	regsize += ANY_SKIP;
     }
     if (*regparse == ']' || *regparse == '-')
 	goto skipcond;		/* allow 1st char to be ] or - */
@@ -1134,7 +1672,7 @@ regclass()
 	    switch (class) {
 	    case 'w':
 		if (regflags & PMf_LOCALE) {
-		    if (opnd != &regdummy)
+		    if (SIZE_ONLY)
 			*opnd |= ANYOF_ALNUML;
 		}
 		else {
@@ -1146,7 +1684,7 @@ regclass()
 		continue;
 	    case 'W':
 		if (regflags & PMf_LOCALE) {
-		    if (opnd != &regdummy)
+		    if (!SIZE_ONLY)
 			*opnd |= ANYOF_NALNUML;
 		}
 		else {
@@ -1158,7 +1696,7 @@ regclass()
 		continue;
 	    case 's':
 		if (regflags & PMf_LOCALE) {
-		    if (opnd != &regdummy)
+		    if (!SIZE_ONLY)
 			*opnd |= ANYOF_SPACEL;
 		}
 		else {
@@ -1170,7 +1708,7 @@ regclass()
 		continue;
 	    case 'S':
 		if (regflags & PMf_LOCALE) {
-		    if (opnd != &regdummy)
+		    if (!SIZE_ONLY)
 			*opnd |= ANYOF_NSPACEL;
 		}
 		else {
@@ -1282,40 +1820,33 @@ nextchar()
 }
 
 /*
-- regnode - emit a node
+- reg_node - emit a node
 */
+static regnode *			/* Location. */
 #ifdef CAN_PROTOTYPE
-static char *			/* Location. */
-regnode(char op)
+reg_node(U8 op)
 #else
-static char *			/* Location. */
-regnode(op)
-char op;
+reg_node(op)
+U8 op;
 #endif
 {
-    register char *ret;
-    register char *ptr;
+    register regnode *ret;
+    register regnode *ptr;
 
     ret = regcode;
-    if (ret == &regdummy) {
-#ifdef REGALIGN
-	if (!(regsize & 1))
-	    regsize++;
-#endif
+    if (SIZE_ONLY) {
+	SIZE_ALIGN(regsize);
+#ifdef REGALIGN_STRUCT
+	regsize += 1;
+#else
 	regsize += 3;
+#endif 
 	return(ret);
     }
 
-#ifdef REGALIGN
-#ifndef lint
-    if (!((long)ret & 1))
-      *ret++ = 127;
-#endif
-#endif
+    NODE_ALIGN_FILL(ret);
     ptr = ret;
-    *ptr++ = op;
-    *ptr++ = '\0';		/* Null "next" pointer. */
-    *ptr++ = '\0';
+    FILL_ADVANCE_NODE(ptr, op);
     regcode = ptr;
 
     return(ret);
@@ -1324,45 +1855,32 @@ char op;
 /*
 - reganode - emit a node with an argument
 */
+static regnode *			/* Location. */
 #ifdef CAN_PROTOTYPE
-static char *			/* Location. */
-reganode(char op, unsigned short arg)
+reganode(U8 op, U32 arg)
 #else
-static char *			/* Location. */
 reganode(op, arg)
-char op;
-unsigned short arg;
+U8 op;
+U32 arg;
 #endif
 {
-    register char *ret;
-    register char *ptr;
+    register regnode *ret;
+    register regnode *ptr;
 
     ret = regcode;
-    if (ret == &regdummy) {
+    if (SIZE_ONLY) {
+	SIZE_ALIGN(regsize);
 #ifdef REGALIGN
-	if (!(regsize & 1))
-	    regsize++;
-#endif
+	regsize += 2;
+#else
 	regsize += 5;
+#endif 
 	return(ret);
     }
 
-#ifdef REGALIGN
-#ifndef lint
-    if (!((long)ret & 1))
-      *ret++ = 127;
-#endif
-#endif
+    NODE_ALIGN_FILL(ret);
     ptr = ret;
-    *ptr++ = op;
-    *ptr++ = '\0';		/* Null "next" pointer. */
-    *ptr++ = '\0';
-#ifdef REGALIGN
-    *(unsigned short *)(ret+3) = arg;
-#else
-    ret[3] = arg >> 8; ret[4] = arg & 0377;
-#endif
-    ptr += 2;
+    FILL_ADVANCE_NODE_ARG(ptr, op, arg);
     regcode = ptr;
 
     return(ret);
@@ -1373,17 +1891,16 @@ unsigned short arg;
 */
 #ifdef CAN_PROTOTYPE
 static void
-regc(char b)
+regc(U8 b, char* s)
 #else
 static void
-regc(b)
-char b;
+regc(b, s)
+U8 b;
+char *s
 #endif
 {
-    if (regcode != &regdummy)
-	*regcode++ = b;
-    else
-	regsize++;
+    if (!SIZE_ONLY)
+	*s = b;
 }
 
 /*
@@ -1393,46 +1910,38 @@ char b;
 */
 #ifdef CAN_PROTOTYPE
 static void
-reginsert(char op, char *opnd)
+reginsert(U8 op, regnode *opnd)
 #else
 static void
 reginsert(op, opnd)
-char op;
-char *opnd;
+U8 op;
+regnode *opnd;
 #endif
 {
-    register char *src;
-    register char *dst;
-    register char *place;
-    register int offset = (regkind[(U8)op] == CURLY ? 4 : 0);
+    register regnode *src;
+    register regnode *dst;
+    register regnode *place;
+    char *s;
+    register int offset = (regkind[(U8)op] == CURLY ? EXTRA_STEP_2ARGS : 0);
 
-    if (regcode == &regdummy) {
-#ifdef REGALIGN
-	regsize += 4 + offset;
-#else
-	regsize += 3 + offset;
-#endif
+    if (SIZE_ONLY) {
+	regsize += NODE_STEP_REGNODE + offset;
 	return;
     }
 
     src = regcode;
-#ifdef REGALIGN
-    regcode += 4 + offset;
-#else
-    regcode += 3 + offset;
-#endif
+    regcode += NODE_STEP_REGNODE + offset;
     dst = regcode;
     while (src > opnd)
-	*--dst = *--src;
+	StructCopy(--src, --dst, regnode);
 
     place = opnd;		/* Op node, where operand used to be. */
-    *place++ = op;
-    *place++ = '\0';
-    *place++ = '\0';
+    s = OPERAND(place);
+    FILL_ADVANCE_NODE(place, op);
     while (offset-- > 0)
-	*place++ = '\0';
-#ifdef REGALIGN
-    *place++ = '\177';
+	*s++ = '\0';
+#if defined(REGALIGN) && !defined(REGALIGN_STRUCT)
+    *s++ = '\177';
 #endif
 }
 
@@ -1441,14 +1950,14 @@ char *opnd;
 */
 static void
 regtail(p, val)
-char *p;
-char *val;
+regnode *p;
+regnode *val;
 {
-    register char *scan;
-    register char *temp;
+    register regnode *scan;
+    register regnode *temp;
     register I32 offset;
 
-    if (p == &regdummy)
+    if (SIZE_ONLY)
 	return;
 
     /* Find last node. */
@@ -1461,12 +1970,14 @@ char *val;
     }
 
 #ifdef REGALIGN
+#  ifdef REGALIGN_STRUCT
+    NEXT_OFF(scan) = val - scan;
+#  else
     offset = val - scan;
-#ifndef lint
+#    ifndef lint
     *(short*)(scan+1) = offset;
-#else
-    offset = offset;
-#endif
+#    endif
+#endif 
 #else
     if (OP(scan) == BACK)
 	offset = scan - val;
@@ -1482,11 +1993,11 @@ char *val;
 */
 static void
 regoptail(p, val)
-char *p;
-char *val;
+regnode *p;
+regnode *val;
 {
     /* "Operandless" and "op != BRANCH" are synonymous in practice. */
-    if (p == NULL || p == &regdummy || regkind[(U8)OP(p)] != BRANCH)
+    if (p == NULL || SIZE_ONLY || regkind[(U8)OP(p)] != BRANCH)
 	return;
     regtail(NEXTOPER(p), val);
 }
@@ -1515,6 +2026,59 @@ register char *s;
 
 #ifdef DEBUGGING
 
+static regnode *
+dumpuntil(start, node, last, sv, l)
+regnode *start;
+regnode *node;
+regnode *last;
+SV* sv;
+I32 l;
+{
+    register char op = EXACT;	/* Arbitrary non-END op. */
+    register regnode *next, *onode;
+
+    while (op != END && (!last || node < last)) {
+	/* While that wasn't END last time... */
+
+	NODE_ALIGN(node);
+	op = OP(node);
+	if (op == CLOSE)
+	    l--;	
+	/* Where, what. */
+	regprop(sv, node);
+	PerlIO_printf(Perl_debug_log, "%2d%*s%s", node - start, 
+		      2*l, "", SvPVX(sv));
+	next = regnext(node);
+	if (next == NULL)		/* Next ptr. */
+	    PerlIO_printf(Perl_debug_log, "(0)");
+	else 
+	    PerlIO_printf(Perl_debug_log, "(%d)", next - start);
+	(void)PerlIO_putc(Perl_debug_log, '\n');
+	if (regkind[(U8)op] == BRANCH) {
+	    node = dumpuntil(start, NEXTOPER(node), next, sv, l + 1);
+	} else if (regkind[(U8)op] == CURLY && op != CURLYX) {
+	    node = dumpuntil(start, NEXTOPER(node) + EXTRA_STEP_2ARGS, next, sv, l + 1);
+	} else if ( op == PLUS || op == STAR ) {
+	    node = dumpuntil(start, NEXTOPER(node), next, sv, l + 1);
+	} else if (op == ANYOF) {
+	    node = NEXTOPER(node);
+	    node += ANY_SKIP;
+	} else if (regkind[(U8)op] == EXACT) {
+            /* Literal string, where present. */
+	    node += ((*OPERAND(node)) + 2 + sizeof(regnode) - 1) / sizeof(regnode);
+	    node = NEXTOPER(node);
+	} else {
+	    node = NEXTOPER(node);
+	    node += regarglen[(U8)op];
+	}
+	if (op == CURLYX || op == OPEN)
+	    l++;
+	else if (op == WHILEM)
+	    l--;
+    }
+    return node;
+}
+
 /*
  - regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
  */
@@ -1522,49 +2086,32 @@ void
 regdump(r)
 regexp *r;
 {
-    register char *s;
-    register char op = EXACT;	/* Arbitrary non-END op. */
-    register char *next;
     SV *sv = sv_newmortal();
 
-    s = r->program + 1;
-    while (op != END) {	/* While that wasn't END last time... */
-#ifdef REGALIGN
-	if (!((long)s & 1))
-	    s++;
-#endif
-	op = OP(s);
-	/* where, what */
-	regprop(sv, s);
-	PerlIO_printf(Perl_debug_log, "%2d%s", s - r->program, SvPVX(sv));
-	next = regnext(s);
-	s += regarglen[(U8)op];
-	if (next == NULL)		/* Next ptr. */
-	    PerlIO_printf(Perl_debug_log, "(0)");
-	else 
-	    PerlIO_printf(Perl_debug_log, "(%d)", (s-r->program)+(next-s));
-	s += 3;
-	if (op == ANYOF) {
-	    s += 33;
-	}
-	if (regkind[(U8)op] == EXACT) {
-	    /* Literal string, where present. */
-	    s++;
-	    (void)PerlIO_putc(Perl_debug_log, ' ');
-	    (void)PerlIO_putc(Perl_debug_log, '<');
-	    while (*s != '\0') {
-		(void)PerlIO_putc(Perl_debug_log,*s);
-		s++;
-	    }
-	    (void)PerlIO_putc(Perl_debug_log, '>');
-	    s++;
-	}
-	(void)PerlIO_putc(Perl_debug_log, '\n');
-    }
+    (void)dumpuntil(r->program, r->program + 1, NULL, sv, 0);
 
     /* Header fields of interest. */
-    if (r->regstart)
-	PerlIO_printf(Perl_debug_log, "start `%s' ", SvPVX(r->regstart));
+    if (r->anchored_substr)
+	PerlIO_printf(Perl_debug_log, "anchored `%s'%s at %d ", 
+		      SvPVX(r->anchored_substr), 
+		      SvTAIL(r->anchored_substr) ? "$" : "",
+		      r->anchored_offset);
+    if (r->float_substr)
+	PerlIO_printf(Perl_debug_log, "floating `%s'%s at %d..%u ", 
+		      SvPVX(r->float_substr), 
+		      SvTAIL(r->float_substr) ? "$" : "",
+		      r->float_min_offset, r->float_max_offset);
+    if (r->check_substr)
+	PerlIO_printf(Perl_debug_log, 
+		      r->check_substr == r->float_substr 
+		      ? "(checking floating" : "(checking anchored");
+    if (r->reganch & ROPT_CHECK_AT_START)
+	PerlIO_printf(Perl_debug_log, " noscan");
+    if (r->reganch & ROPT_CHECK_ALL)
+	PerlIO_printf(Perl_debug_log, " isall");
+    if (r->check_substr)
+	PerlIO_printf(Perl_debug_log, ") ");
+
     if (r->regstclass) {
 	regprop(sv, r->regstclass);
 	PerlIO_printf(Perl_debug_log, "stclass `%s' ", SvPVX(sv));
@@ -1577,13 +2124,12 @@ regexp *r;
 	    PerlIO_printf(Perl_debug_log, "(GPOS)");
 	PerlIO_putc(Perl_debug_log, ' ');
     }
+    if (r->reganch & ROPT_GPOS_SEEN)
+	PerlIO_printf(Perl_debug_log, "GPOS ");
     if (r->reganch & ROPT_SKIP)
 	PerlIO_printf(Perl_debug_log, "plus ");
     if (r->reganch & ROPT_IMPLICIT)
 	PerlIO_printf(Perl_debug_log, "implicit ");
-    if (r->regmust != NULL)
-	PerlIO_printf(Perl_debug_log, "must have \"%s\" back %ld ", SvPVX(r->regmust),
-	 (long) r->regback);
     PerlIO_printf(Perl_debug_log, "minlen %ld ", (long) r->minlen);
     PerlIO_printf(Perl_debug_log, "\n");
 }
@@ -1594,7 +2140,7 @@ regexp *r;
 void
 regprop(sv, op)
 SV *sv;
-char *op;
+regnode *op;
 {
     register char *p = 0;
 
@@ -1631,17 +2177,20 @@ char *op;
 	p = "BRANCH";
 	break;
     case EXACT:
-	p = "EXACT";
+	sv_catpvf(sv, "EXACT <%s>", OPERAND(op) + 1);
 	break;
     case EXACTF:
-	p = "EXACTF";
+	sv_catpvf(sv, "EXACTF <%s>", OPERAND(op) + 1);
 	break;
     case EXACTFL:
-	p = "EXACTFL";
+	sv_catpvf(sv, "EXACTFL <%s>", OPERAND(op) + 1);
 	break;
     case NOTHING:
 	p = "NOTHING";
 	break;
+    case TAIL:
+	p = "TAIL";
+	break;
     case BACK:
 	p = "BACK";
 	break;
@@ -1663,23 +2212,37 @@ char *op;
     case CURLY:
 	sv_catpvf(sv, "CURLY {%d,%d}", ARG1(op), ARG2(op));
 	break;
+    case CURLYN:
+#ifdef REGALIGN_STRUCT
+	sv_catpvf(sv, "CURLYN[%d] {%d,%d}", op->flags, ARG1(op), ARG2(op));
+#else
+	sv_catpvf(sv, "CURLYN {%d,%d}", ARG1(op), ARG2(op));
+#endif 
+	break;
     case CURLYX:
 	sv_catpvf(sv, "CURLYX {%d,%d}", ARG1(op), ARG2(op));
 	break;
+    case CURLYM:
+#ifdef REGALIGN_STRUCT
+	sv_catpvf(sv, "CURLYM[%d] {%d,%d}", op->flags, ARG1(op), ARG2(op));
+#else
+	sv_catpvf(sv, "CURLYM {%d,%d}", ARG1(op), ARG2(op));
+#endif 
+	break;
     case REF:
-	sv_catpvf(sv, "REF%d", ARG1(op));
+	sv_catpvf(sv, "REF%d", ARG(op));
 	break;
     case REFF:
-	sv_catpvf(sv, "REFF%d", ARG1(op));
+	sv_catpvf(sv, "REFF%d", ARG(op));
 	break;
     case REFFL:
-	sv_catpvf(sv, "REFFL%d", ARG1(op));
-	break;
+	sv_catpvf(sv, "REFFL%d", ARG(op));
+        break;
     case OPEN:
-	sv_catpvf(sv, "OPEN%d", ARG1(op));
+	sv_catpvf(sv, "OPEN%d", ARG(op));
 	break;
     case CLOSE:
-	sv_catpvf(sv, "CLOSE%d", ARG1(op));
+	sv_catpvf(sv, "CLOSE%d", ARG(op));
 	p = NULL;
 	break;
     case STAR:
@@ -1695,10 +2258,18 @@ char *op;
 	p = "GPOS";
 	break;
     case UNLESSM:
+#ifdef REGALIGN_STRUCT
+	sv_catpvf(sv, "UNLESSM[-%d]", op->flags);
+#else
 	p = "UNLESSM";
+#endif 
 	break;
     case IFMATCH:
+#ifdef REGALIGN_STRUCT
+	sv_catpvf(sv, "IFMATCH[-%d]", op->flags);
+#else
 	p = "IFMATCH";
+#endif 
 	break;
     case SUCCEED:
 	p = "SUCCEED";
@@ -1736,6 +2307,9 @@ char *op;
     case NSPACEL:
 	p = "NSPACEL";
 	break;
+    case EVAL:
+	p = "EVAL";
+	break;
     default:
 	FAIL("corrupted regexp opcode");
     }
@@ -1750,21 +2324,30 @@ struct regexp *r;
 {
     if (!r)
 	return;
-    if (r->precomp) {
+    if (r->precomp)
 	Safefree(r->precomp);
-	r->precomp = Nullch;
-    }
-    if (r->subbase) {
+    if (r->subbase)
 	Safefree(r->subbase);
-	r->subbase = Nullch;
-    }
-    if (r->regmust) {
-	SvREFCNT_dec(r->regmust);
-	r->regmust = Nullsv;
-    }
-    if (r->regstart) {
-	SvREFCNT_dec(r->regstart);
-	r->regstart = Nullsv;
+    if (r->anchored_substr)
+	SvREFCNT_dec(r->anchored_substr);
+    if (r->float_substr)
+	SvREFCNT_dec(r->float_substr);
+    if (r->data) {
+	int n = r->data->count;
+	while (--n >= 0) {
+	    switch (r->data->what[n]) {
+	    case 's':
+		SvREFCNT_dec((SV*)r->data->data[n]);
+		break;
+	    case 'o':
+		op_free((OP*)r->data->data[n]);
+		break;
+	    case 'n':
+		break;
+	    default:
+		croak("panic: regfree data code '%c'", r->data->what[n]);
+	    }
+	}
     }
     Safefree(r->startp);
     Safefree(r->endp);
diff -pru perl5.004_01.os2/regcomp.h perl5.004_01.my/regcomp.h
--- perl5.004_01.os2/regcomp.h	Fri Jun  6 16:42:20 1997
+++ perl5.004_01.my/regcomp.h	Sat Jun 14 21:18:10 1997
@@ -31,6 +31,18 @@
  * For instance, /[a-z].foo/ has a regmust of 'foo' and a regback of 2.]
  */
 
+/* #ifndef gould */
+/* #ifndef cray */
+/* #ifndef eta10 */
+#define REGALIGN
+/* #endif */
+/* #endif */
+/* #endif */
+
+#ifdef REGALIGN
+#  define REGALIGN_STRUCT
+#endif 
+
 /*
  * Structure for regexp "program".  This is essentially a linear encoding
  * of a nondeterministic finite-state machine (aka syntax charts or
@@ -72,27 +84,32 @@
 #define BOUNDL	21	/* no	Match "" at any word boundary */
 #define NBOUND	22	/* no	Match "" at any word non-boundary */
 #define NBOUNDL	23	/* no	Match "" at any word non-boundary */
-#define REF	24	/* num	Match already matched string */
-#define REFF	25	/* num	Match already matched string, folded */
-#define REFFL	26	/* num	Match already matched string, folded in loc. */
-#define	OPEN	27	/* num	Mark this point in input as start of #n. */
-#define	CLOSE	28	/* num	Analogous to OPEN. */
-#define MINMOD	29	/* no	Next operator is not greedy. */
-#define GPOS	30	/* no	Matches where last m//g left off. */
-#define IFMATCH	31	/* no	Succeeds if the following matches. */
-#define UNLESSM	32	/* no	Fails if the following matches. */
-#define SUCCEED	33	/* no	Return from a subroutine, basically. */
-#define WHILEM	34	/* no	Do curly processing and see if rest matches. */
-#define ALNUM	35	/* no	Match any alphanumeric character */
-#define ALNUML	36 	/* no	Match any alphanumeric char in locale */
-#define NALNUM	37	/* no	Match any non-alphanumeric character */
-#define NALNUML	38	/* no	Match any non-alphanumeric char in locale */
-#define SPACE	39	/* no	Match any whitespace character */
-#define SPACEL	40	/* no	Match any whitespace char in locale */
-#define NSPACE	41	/* no	Match any non-whitespace character */
-#define NSPACEL	42	/* no	Match any non-whitespace char in locale */
-#define DIGIT	43	/* no	Match any numeric character */
-#define NDIGIT	44	/* no	Match any non-numeric character */
+#define REF	24	/* num	Match some already matched string */
+#define	OPEN	25	/* num	Mark this point in input as start of #n. */
+#define	CLOSE	26	/* num	Analogous to OPEN. */
+#define MINMOD	27	/* no	Next operator is not greedy. */
+#define GPOS	28	/* no	Matches where last m//g left off. */
+#define IFMATCH	29	/* no	Succeeds if the following matches. */
+#define UNLESSM	30	/* no	Fails if the following matches. */
+#define SUCCEED	31	/* no	Return from a subroutine, basically. */
+#define WHILEM	32	/* no	Do curly processing and see if rest matches. */
+#define ALNUM	33	/* no	Match any alphanumeric character */
+#define ALNUML	34 	/* no	Match any alphanumeric char in locale */
+#define NALNUM	35	/* no	Match any non-alphanumeric character */
+#define NALNUML	36	/* no	Match any non-alphanumeric char in locale */
+#define SPACE	37	/* no	Match any whitespace character */
+#define SPACEL	38	/* no	Match any whitespace char in locale */
+#define NSPACE	39	/* no	Match any non-whitespace character */
+#define NSPACEL	40	/* no	Match any non-whitespace char in locale */
+#define DIGIT	41	/* no	Match any numeric character */
+#define NDIGIT	42	/* no	Match any non-numeric character */
+#define CURLYM	43	/* no	Match this medium-complex thing {n,m} times. */
+#define CURLYN	44	/* no	Match next-after-this simple thing
+			   {n,m} times, set parenths. */
+#define	TAIL	45	/* no	Match empty string. Can jump here from outside. */
+#define REFF	46      /* num  Match already matched string, folded */
+#define REFFL	47      /* num  Match already matched string, folded in loc. */
+#define EVAL	48      /* evl  Execute some Perl code. */
 
 /*
  * Opcode notes:
@@ -117,21 +134,9 @@
  */
 
 #ifndef DOINIT
-EXT char regarglen[];
-#else
-EXT char regarglen[] = {
-    0,0,0,0,0,0,0,0,0,0,
-    /*CURLY*/ 4, /*CURLYX*/ 4,
-    0,0,0,0,0,0,0,0,0,0,0,0,
-    /*REF*/ 2, 2, 2, /*OPEN*/ 2, /*CLOSE*/ 2,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-};
-#endif
-
-#ifndef DOINIT
-EXT char regkind[];
+EXT U8 regkind[];
 #else
-EXT char regkind[] = {
+EXT U8 regkind[] = {
 	END,
 	BOL,
 	BOL,
@@ -157,8 +162,6 @@ EXT char regkind[] = {
 	NBOUND,
 	NBOUND,
 	REF,
-	REF,
-	REF,
 	OPEN,
 	CLOSE,
 	MINMOD,
@@ -177,19 +180,26 @@ EXT char regkind[] = {
 	NSPACE,
 	DIGIT,
 	NDIGIT,
+	CURLY,
+	CURLY,
+	NOTHING,
+	REF,
+	REF,
+	EVAL,
 };
 #endif
 
-/* The following have no fixed length. */
+/* The following have no fixed length. char* since we do strchr on it. */
 #ifndef DOINIT
 EXT char varies[];
 #else
 EXT char varies[] = {
-    BRANCH, BACK, STAR, PLUS, CURLY, CURLYX, REF, REFF, REFFL, WHILEM, 0
+    BRANCH, BACK, STAR, PLUS, CURLY, CURLYX, REF, REFF, REFFL, 
+    WHILEM, CURLYM, CURLYN, 0
 };
 #endif
 
-/* The following always have a length of 1. */
+/* The following always have a length of 1. char* since we do strchr on it. */
 #ifndef DOINIT
 EXT char simple[];
 #else
@@ -201,8 +211,6 @@ EXT char simple[] = {
 };
 #endif
 
-EXT char regdummy;
-
 /*
  * A node is one char of opcode followed by two chars of "next" pointer.
  * "Next" pointers are stored as two 8-bit pieces, high order first.  The
@@ -219,42 +227,118 @@ EXT char regdummy;
  * stored negative.]
  */
 
-#ifndef gould
-#ifndef cray
-#ifndef eta10
-#define REGALIGN
-#endif
-#endif
-#endif
+#ifdef REGALIGN_STRUCT
 
-#define	OP(p)	(*(p))
+struct regnode_string {
+    U8	flags;
+    U8  type;
+    U16 next_off;
+    char string[1];
+};
+
+struct regnode_1 {
+    U8	flags;
+    U8  type;
+    U16 next_off;
+    U32 arg1;
+};
+
+struct regnode_2 {
+    U8	flags;
+    U8  type;
+    U16 next_off;
+    U16 arg1;
+    U16 arg2;
+};
+
+#endif 
+
+#define REG_INFTY I16_MAX
 
-#ifndef lint
 #ifdef REGALIGN
-#define NEXT(p) (*(short*)(p+1))
-#define ARG1(p) (*(unsigned short*)(p+3))
-#define ARG2(p) (*(unsigned short*)(p+5))
-#else
-#define	NEXT(p)	(((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
-#define	ARG1(p)	(((*((p)+3)&0377)<<8) + (*((p)+4)&0377))
-#define	ARG2(p)	(((*((p)+5)&0377)<<8) + (*((p)+6)&0377))
+#define ARG_VALUE(arg) (arg)
+#define ARG__SET(arg,val) ((arg) = (val))
+#else
+#define ARG_VALUE(arg) (((*((char*)&arg)&0377)<<8) + (*(((char*)&arg)+1)&0377))
+#define ARG__SET(arg,val) (((char*)&arg)[0] = (val) >> 8; ((char*)&arg)[1] = (val) & 0377;)
 #endif
+
+#define ARG(p) ARG_VALUE(ARG_LOC(p))
+#define ARG1(p) ARG_VALUE(ARG1_LOC(p))
+#define ARG2(p) ARG_VALUE(ARG2_LOC(p))
+#define ARG_SET(p, val) ARG__SET(ARG_LOC(p), (val))
+#define ARG1_SET(p, val) ARG__SET(ARG1_LOC(p), (val))
+#define ARG2_SET(p, val) ARG__SET(ARG2_LOC(p), (val))
+
+#ifndef lint
+#  ifdef REGALIGN
+#    ifdef REGALIGN_STRUCT
+#      define NEXT_OFF(p) ((p)->next_off)
+#      define	NODE_ALIGN(node)
+#      define	NODE_ALIGN_FILL(node) ((node)->flags = 0xde) /* deadbeef */
+#    else
+#      define NEXT_OFF(p) (*(short*)(p+1))
+#      define	NODE_ALIGN(node)	((!((long)node & 1)) ? node++ : 0)
+#      define	NODE_ALIGN_FILL(node)	((!((long)node & 1)) ? *node++ = 127 : 0)
+#    endif 
+#  else
+#    define	NEXT_OFF(p)	(((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
+#    define	NODE_ALIGN(node)
+#    define	NODE_ALIGN_FILL(node)
+#  endif
 #else /* lint */
-#define NEXT(p) 0
+#  define NEXT_OFF(p) 0
+#  define	NODE_ALIGN(node)
+#  define	NODE_ALIGN_FILL(node)
 #endif /* lint */
 
-#define	OPERAND(p)	((p) + 3)
+#define SIZE_ALIGN NODE_ALIGN
+
+#ifdef REGALIGN_STRUCT
+#  define	OP(p)	((p)->type)
+#  define	OPERAND(p)	(((struct regnode_string *)p)->string)
+#  define	NODE_ALIGN(node)
+#  define	ARG_LOC(p) (((struct regnode_1 *)p)->arg1)
+#  define	ARG1_LOC(p) (((struct regnode_2 *)p)->arg1)
+#  define	ARG2_LOC(p) (((struct regnode_2 *)p)->arg2)
+#  define NODE_STEP_REGNODE	1	/* sizeof(regnode)/sizeof(regnode) */
+#  define EXTRA_STEP_2ARGS	EXTRA_SIZE(struct regnode_2)
+#else
+#  define	OP(p)	(*(p))
+#  define	OPERAND(p)	((p) + 3)
+#  define	ARG_LOC(p) (*(unsigned short*)(p+3))
+#  define	ARG1_LOC(p) (*(unsigned short*)(p+3))
+#  define	ARG2_LOC(p) (*(unsigned short*)(p+5))
+typedef char* regnode;
+#  define NODE_STEP_REGNODE	NODE_STEP_B
+#  define EXTRA_STEP_2ARGS	4
+#endif 
 
 #ifdef REGALIGN
-#define	NEXTOPER(p)	((p) + 4)
-#define	PREVOPER(p)	((p) - 4)
+#  define NODE_STEP_B	4
 #else
-#define	NEXTOPER(p)	((p) + 3)
-#define	PREVOPER(p)	((p) - 3)
+#  define NODE_STEP_B	3
+#endif
+
+#define	NEXTOPER(p)	((p) + NODE_STEP_REGNODE)
+#define	PREVOPER(p)	((p) - NODE_STEP_REGNODE)
+
+#ifdef REGALIGN_STRUCT
+#  define FILL_ADVANCE_NODE(ptr, op) STMT_START { \
+    (ptr)->type = op;    (ptr)->next_off = 0;   (ptr)++; } STMT_END
+#  define FILL_ADVANCE_NODE_ARG(ptr, op, arg) STMT_START { \
+    ARG_SET(ptr, arg);  FILL_ADVANCE_NODE(ptr, op); (ptr) += 1; } STMT_END
+#else
+#  define FILL_ADVANCE_NODE(ptr, op) STMT_START { \
+    *(ptr)++ = op;    *(ptr)++ = '\0';    *(ptr)++ = '\0'; } STMT_END
+#  define FILL_ADVANCE_NODE_ARG(ptr, op, arg) STMT_START { \
+    ARG_SET(ptr, arg);  FILL_ADVANCE_NODE(ptr, op); (ptr) += 2; } STMT_END
 #endif
 
 #define MAGIC 0234
 
+#define SIZE_ONLY (regcode == &regdummy)
+
 /* Flags for first parameter byte of ANYOF */
 #define ANYOF_INVERT	0x40
 #define ANYOF_FOLD	0x20
@@ -265,6 +349,13 @@ EXT char regdummy;
 #define ANYOF_SPACEL	 0x02
 #define ANYOF_NSPACEL	 0x01
 
+#ifdef REGALIGN_STRUCT
+#define ANY_SKIP ((33 - 1)/sizeof(regnode) + 1)
+#else
+#define ANY_SKIP 32			/* overwrite the first byte of
+					 * the next guy.  */
+#endif 
+
 /*
  * Utility definitions.
  */
@@ -279,3 +370,46 @@ EXT char regdummy;
 #endif /* lint */
 
 #define	FAIL(m)	croak("/%.127s/: %s",regprecomp,m)
+
+EXT regnode regdummy;
+
+#define EXTRA_SIZE(guy) ((sizeof(guy)-1)/sizeof(struct regnode))
+
+#ifndef DOINIT
+EXT U8 regarglen[];
+#else
+#  ifdef REGALIGN_STRUCT
+EXT U8 regarglen[] = {
+    0,0,0,0,0,0,0,0,0,0,
+    /*CURLY*/ EXTRA_SIZE(struct regnode_2), 
+    /*CURLYX*/ EXTRA_SIZE(struct regnode_2),
+    0,0,0,0,0,0,0,0,0,0,0,0,
+    /*REF*/ EXTRA_SIZE(struct regnode_1), 
+    /*OPEN*/ EXTRA_SIZE(struct regnode_1),
+    /*CLOSE*/ EXTRA_SIZE(struct regnode_1),
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    /*CURLYM*/ EXTRA_SIZE(struct regnode_2),
+    /*CURLYN*/ EXTRA_SIZE(struct regnode_2),
+    0,
+    /*REFF*/ EXTRA_SIZE(struct regnode_1),
+    /*REFFL*/ EXTRA_SIZE(struct regnode_1),
+    /*EVAL*/ EXTRA_SIZE(struct regnode_1),
+};
+#  else
+EXT U8 regarglen[] = {
+    0,0,0,0,0,0,0,0,0,0,
+    /*CURLY*/ 4, /*CURLYX*/ 4,
+    0,0,0,0,0,0,0,0,0,0,0,0,
+    /*REF*/ 2, /*OPEN*/ 2, /*CLOSE*/ 2,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,/*CURLYM*/ 4,/*CURLYN*/ 4,
+    0, /*REFF*/ 2, /*REFFL*/ 2, /*EVAL*/2
+};
+#  endif 
+#endif
+
+struct reg_data {
+    U32 count;
+    U8 *what;
+    void* data[1];
+};
+
--- ../perl5.004_01.os2/regexec.c	Fri Jun  6 16:42:22 1997
+++ ./regexec.c	Mon Jun 23 01:44:10 1997
@@ -60,8 +60,8 @@
 #endif
 
 #ifdef DEBUGGING
-static I32 regnarrate = 0;
-static char* regprogram = 0;
+static I32	regnarrate = 0;
+static regnode* regprogram = 0;
 #endif
 
 /* Current curly descriptor */
@@ -72,13 +72,16 @@ struct curcur {
     int		min;		/* the minimal number of scans to match */
     int		max;		/* the maximal number of scans to match */
     int		minmod;		/* whether to work our way up or down */
-    char *	scan;		/* the thing to match */
-    char *	next;		/* what has to match after it */
+    regnode *	scan;		/* the thing to match */
+    regnode *	next;		/* what has to match after it */
     char *	lastloc;	/* where we started matching this scan */
     CURCUR *	oldcc;		/* current curly before we started this one */
 };
 
 static CURCUR* regcc;
+static char ** reg_start_tmp;
+static U32 reg_start_tmpl;
+static struct reg_data *data;
 
 typedef I32 CHECKPOINT;
 
@@ -90,13 +93,14 @@ regcppush(parenfloor)
 I32 parenfloor;
 {
     int retval = savestack_ix;
-    int i = (regsize - parenfloor) * 3;
+    int i = (regsize - parenfloor) * 4;
     int p;
 
     SSCHECK(i + 5);
     for (p = regsize; p > parenfloor; p--) {
 	SSPUSHPTR(regendp[p]);
 	SSPUSHPTR(regstartp[p]);
+	SSPUSHINT(reg_start_tmp[p]);
 	SSPUSHINT(p);
     }
     SSPUSHINT(regsize);
@@ -119,13 +123,29 @@ regcppop()
     input = (char *) SSPOPPTR;
     *reglastparen = SSPOPINT;
     regsize = SSPOPINT;
-    for (i -= 3; i > 0; i -= 3) {
+    for (i -= 3; i > 0; i -= 4) {
 	paren = (U32)SSPOPINT;
+	reg_start_tmp[paren] = (char *) SSPOPPTR;
 	regstartp[paren] = (char *) SSPOPPTR;
 	tmps = (char*)SSPOPPTR;
 	if (paren <= *reglastparen)
 	    regendp[paren] = tmps;
+#ifdef DEBUGGING
+	if (debug & 512) {
+	    PerlIO_printf(Perl_debug_log, "  restoring \\%d to %d(%d)..%d%s\n",
+			  paren, regstartp[paren] - regbol, 
+			  reg_start_tmp[paren] - regbol,
+			  regendp[paren] - regbol, 
+			  (paren > *reglastparen ? "(no)" : ""));
+	}
+#endif 
     }
+#ifdef DEBUGGING
+	if (debug & 512 && *reglastparen + 1 <= regnpar) {
+	    PerlIO_printf(Perl_debug_log, "  restoring \\%d..\\%d to undef\n",
+			  *reglastparen + 1, regnpar);
+	}
+#endif 
     for (paren = *reglastparen + 1; paren <= regnpar; paren++) {
 	if (paren > regsize)
 	    regstartp[paren] = Nullch;
@@ -144,37 +164,46 @@ regcppop()
  * Forwards.
  */
 
-static I32 regmatch _((char *prog));
-static I32 regrepeat _((char *p, I32 max));
+static I32 regmatch _((regnode *prog));
+static I32 regrepeat _((regnode *p, I32 max));
+static I32 regrepeat_hard _((regnode *p, I32 max, I32 *lp));
 static I32 regtry _((regexp *prog, char *startpos));
 static bool reginclass _((char *p, I32 c));
 
 static bool regtainted;		/* tainted information used? */
+static bool reg_warned;		/* warned about big count? */
 
 /*
  - pregexec - match a regexp against a string
  */
 I32
-pregexec(prog, stringarg, strend, strbeg, minend, screamer, safebase)
+pregexec(prog, stringarg, strend, strbeg, minend, screamer, data, flags)
 register regexp *prog;
 char *stringarg;
 register char *strend;	/* pointer to null at end of string */
-char *strbeg;	/* real beginning of string */
-I32 minend;	/* end of match must be at least minend after stringarg */
+char *strbeg;		/* real beginning of string */
+I32 minend;		/* end of match must be >=minend after stringarg. */
 SV *screamer;
-I32 safebase;	/* no need to remember string in subbase */
+void *data;		/* May be used for some additional optimizations. */
+U32 flags;		/* For optimizations. */
 {
     register char *s;
-    register char *c;
+    register regnode *c;
     register char *startpos = stringarg;
     register I32 tmp;
-    I32 minlen = 0;		/* must match at least this many chars */
+    I32 minlen;			/* must match at least this many chars */
     I32 dontbother = 0;	/* how many characters not to try at end */
     CURCUR cc;
+    I32 start_shift = 0;		/* Offset of the start to find
+					 constant substr. */
+    I32 end_shift = 0;			/* Same for the end. */
+    I32 scream_pos = -1;		/* Internal iterator of scream. */
+    char *scream_olds;
 
     cc.cur = 0;
     cc.oldcc = 0;
     regcc = &cc;
+    reg_warned = 0;
 
 #ifdef DEBUGGING
     regnarrate = debug & 512;
@@ -187,6 +216,9 @@ I32 safebase;	/* no need to remember str
 	return 0;
     }
 
+    minlen = prog->minlen;
+    if (strend - startpos < minlen) goto phooey;
+
     if (startpos == strbeg)	/* is ^ valid at stringarg? */
 	regprev = '\n';
     else {
@@ -206,39 +238,42 @@ I32 safebase;	/* no need to remember str
 
     /* If there is a "must appear" string, look for it. */
     s = startpos;
-    if (prog->regmust != Nullsv &&
+    if (!(flags & REXEC_CHECKED) 
+	&& prog->check_substr != Nullsv &&
 	!(prog->reganch & ROPT_ANCH_GPOS) &&
 	(!(prog->reganch & ROPT_ANCH_BOL)
-	 || (multiline && prog->regback >= 0)) )
+	 || (multiline && prog->check_substr == prog->anchored_substr)) )
     {
-	if (stringarg == strbeg && screamer) {
-	    if (screamfirst[BmRARE(prog->regmust)] >= 0)
-		    s = screaminstr(screamer,prog->regmust);
+	start_shift = prog->check_offset_min;
+	/* Should be nonnegative! */
+	end_shift = minlen - start_shift - SvCUR(prog->check_substr);
+	if (screamer) {
+	    if (screamfirst[BmRARE(prog->check_substr)] >= 0)
+		    s = screaminstr(screamer, prog->check_substr, 
+				    start_shift + (stringarg - strbeg),
+				    end_shift, &scream_pos, 0);
 	    else
 		    s = Nullch;
+	    scream_olds = s;
 	}
 	else
-	    s = fbm_instr((unsigned char*)s, (unsigned char*)strend,
-		prog->regmust);
+	    s = fbm_instr((unsigned char*)s + start_shift,
+			  (unsigned char*)strend - end_shift,
+		prog->check_substr);
 	if (!s) {
-	    ++BmUSEFUL(prog->regmust);	/* hooray */
+	    ++BmUSEFUL(prog->check_substr);	/* hooray */
 	    goto phooey;	/* not present */
-	}
-	else if (prog->regback >= 0) {
-	    s -= prog->regback;
-	    if (s < startpos)
-		s = startpos;
-	    minlen = prog->regback + SvCUR(prog->regmust);
-	}
-	else if (!prog->naughty && --BmUSEFUL(prog->regmust) < 0) { /* boo */
-	    SvREFCNT_dec(prog->regmust);
-	    prog->regmust = Nullsv;	/* disable regmust */
-	    s = startpos;
-	}
-	else {
+	} else if ((s - stringarg) > prog->check_offset_max) {
+	    ++BmUSEFUL(prog->check_substr);	/* hooray/2 */
+	    s -= prog->check_offset_max;
+	} else if (!prog->naughty 
+		   && --BmUSEFUL(prog->check_substr) < 0
+		   && prog->check_substr == prog->float_substr) { /* boo */
+	    SvREFCNT_dec(prog->check_substr);
+	    prog->check_substr = Nullsv;	/* disable */
+	    prog->float_substr = Nullsv;	/* clear */
 	    s = startpos;
-	    minlen = SvCUR(prog->regmust);
-	}
+	} else s = startpos;
     }
 
     /* Mark beginning of line for ^ . */
@@ -275,45 +310,69 @@ I32 safebase;	/* no need to remember str
     }
 
     /* Messy cases:  unanchored match. */
-    if (prog->regstart) {
-	if (prog->reganch & ROPT_SKIP) {  /* we have /x+whatever/ */
-	    /* it must be a one character string */
-	    char ch = SvPVX(prog->regstart)[0];
-	    while (s < strend) {
-		if (*s == ch) {
-		    if (regtry(prog, s))
-			goto got_it;
-		    s++;
-		    while (s < strend && *s == ch)
-			s++;
-		}
+    if (prog->anchored_substr && prog->reganch & ROPT_SKIP) { 
+	/* we have /x+whatever/ */
+	/* it must be a one character string */
+	char ch = SvPVX(prog->anchored_substr)[0];
+	while (s < strend) {
+	    if (*s == ch) {
+		if (regtry(prog, s)) goto got_it;
 		s++;
+		while (s < strend && *s == ch)
+		    s++;
 	    }
+	    s++;
 	}
-	else if (SvTYPE(prog->regstart) == SVt_PVBM) {
-	    /* We know what string it must start with. */
-	    while ((s = fbm_instr((unsigned char*)s,
-	      (unsigned char*)strend, prog->regstart)) != NULL)
-	    {
-		if (regtry(prog, s))
-		    goto got_it;
-		s++;
+    }
+    /*SUPPRESS 560*/
+    else if (prog->anchored_substr != Nullsv
+	     || (prog->float_substr != Nullsv 
+		 && prog->float_max_offset < strend - s)) {
+	SV *must = prog->anchored_substr 
+	    ? prog->anchored_substr : prog->float_substr;
+	I32 back_max = 
+	    prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
+	I32 back_min = 
+	    prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
+	I32 delta = back_max - back_min;
+	char *last, *last1;
+
+	/* XXXX check_substr already used to find `s', can optimize if
+	   check_substr==must. */
+/*	if (regtry(prog, s))
+	    goto got_it;
+	s++; */
+	scream_pos = -1;
+	last1 = s - 1 - delta;
+	/* s += back_min; */
+	last = strend - SvCUR(must) - back_min;	
+	dontbother = end_shift;
+	strend -= dontbother;
+	while ( (s <= last) &&
+		(screamer 
+		 ? (s = screaminstr(screamer, must, s + back_min - strbeg,
+				    end_shift, &scream_pos, 0))
+		 : (s = fbm_instr((unsigned char*)s + back_min,
+				  (unsigned char*)strend, must))) ) {
+	    if (s - back_max > last1) {
+		last1 = s - back_min;
+		s = s - back_max;
+	    } else {
+		char *t = last1 + 1;		
+
+		last1 = s - back_min;
+		s = t;		
 	    }
-	}
-	else {				/* Optimized fbm_instr: */
-	    c = SvPVX(prog->regstart);
-	    while ((s = ninstr(s, strend, c, c + SvCUR(prog->regstart))) != NULL)
-	    {
+	    while (s <= last1) {
 		if (regtry(prog, s))
 		    goto got_it;
 		s++;
 	    }
 	}
 	goto phooey;
-    }
-    /*SUPPRESS 560*/
-    if (c = prog->regstclass) {
+    } else if (c = prog->regstclass) {
 	I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
+	char *class;
 
 	if (minlen)
 	    dontbother = minlen - 1;
@@ -322,9 +381,9 @@ I32 safebase;	/* no need to remember str
 	/* We know what class it must start with. */
 	switch (OP(c)) {
 	case ANYOF:
-	    c = OPERAND(c);
+	    class = OPERAND(c);
 	    while (s < strend) {
-		if (reginclass(c, *s)) {
+		if (reginclass(class, *s)) {
 		    if (tmp && regtry(prog, s))
 			goto got_it;
 		    else
@@ -509,7 +568,26 @@ I32 safebase;	/* no need to remember str
 	}
     }
     else {
-	if (minlen)
+	dontbother = 0;
+	if (prog->float_substr != Nullsv) {	/* Trim the end. */
+	    char *last;
+	    I32 oldpos = scream_pos;
+
+	    if (screamer) {
+		last = screaminstr(screamer, prog->float_substr, s - strbeg,
+				   end_shift, &scream_pos, 1); /* last one */
+		if (!last) {
+		    last = scream_olds; /* Only one occurence. */
+		}
+	    } else {
+		STRLEN len;
+		char *little = SvPV(prog->float_substr, len);
+		last = rninstr(s, strend, little, little + len);
+	    }
+	    if (last == NULL) goto phooey; /* Should not happen! */
+	    dontbother = strend - last - 1;
+	}
+	if (minlen && (dontbother < minlen))
 	    dontbother = minlen - 1;
 	strend -= dontbother;
 	/* We don't know much -- general case. */
@@ -529,8 +607,8 @@ got_it:
     prog->exec_tainted = regtainted;
 
     /* make sure $`, $&, $', and $digit will work later */
-    if (strbeg != prog->subbase) {
-	if (safebase) {
+    if (strbeg != prog->subbase) {	/* second+ //g match.  */
+	if (!(flags & REXEC_COPY)) {
 	    if (prog->subbase) {
 		Safefree(prog->subbase);
 		prog->subbase = Nullch;
@@ -576,9 +654,17 @@ char *startpos;
     reglastparen = &prog->lastparen;
     prog->lastparen = 0;
     regsize = 0;
+    if (reg_start_tmpl <= prog->nparens) {
+	reg_start_tmpl = prog->nparens*3/2 + 3;
+        if(reg_start_tmp)
+            Renew(reg_start_tmp, reg_start_tmpl, char*);
+        else
+            New(22,reg_start_tmp, reg_start_tmpl, char*);
+    }
 
     sp = prog->startp;
     ep = prog->endp;
+    data = prog->data;
     if (prog->nparens) {
 	for (i = prog->nparens; i >= 0; i--) {
 	    *sp++ = NULL;
@@ -610,16 +696,16 @@ char *startpos;
  */
 static I32			/* 0 failure, 1 success */
 regmatch(prog)
-char *prog;
+regnode *prog;
 {
-    register char *scan;	/* Current node. */
-    char *next;			/* Next node. */
+    register regnode *scan;	/* Current node. */
+    regnode *next;		/* Next node. */
     register I32 nextchar;
     register I32 n;		/* no or next */
     register I32 ln;		/* len or last */
     register char *s;		/* operand or save */
     register char *locinput = reginput;
-    register I32 c1, c2;	/* case fold search */
+    register I32 c1, c2, paren;	/* case fold search, parenth */
     int minmod = 0;
 #ifdef DEBUGGING
     static int regindent = 0;
@@ -636,9 +722,9 @@ char *prog;
 	if (regnarrate) {
 	    SV *prop = sv_newmortal();
 	    regprop(prop, scan);
-	    PerlIO_printf(Perl_debug_log, "%*s%2d%-8.8s\t<%.10s>\n",
+	    PerlIO_printf(Perl_debug_log, "%*s%2d%-20.20s\t<%.10s>\t %4i\n",
 			  regindent*2, "", scan - regprogram,
-			  SvPVX(prop), locinput);
+			  SvPVX(prop), locinput, locinput - regbol);
 	}
 #else
 #define sayYES return 1
@@ -647,7 +733,7 @@ char *prog;
 #endif
 
 #ifdef REGALIGN
-	next = scan + NEXT(scan);
+	next = scan + NEXT_OFF(scan);
 	if (next == scan)
 	    next = NULL;
 #else
@@ -828,12 +914,12 @@ char *prog;
 	case REFFL:
 	    regtainted = TRUE;
 	    /* FALL THROUGH */
-	case REF:
+        case REF:
 	case REFF:
-	    n = ARG1(scan);  /* which paren pair */
+	    n = ARG(scan);  /* which paren pair */
 	    s = regstartp[n];
 	    if (!s)
-		sayNO;
+		break;			/* Zero length always matches */
 	    if (!regendp[n])
 		sayNO;
 	    if (s == regendp[n])
@@ -842,7 +928,7 @@ char *prog;
 	    if (UCHARAT(s) != nextchar &&
 		(OP(scan) == REF ||
 		 (UCHARAT(s) != ((OP(scan) == REFF
-				 ? fold : fold_locale)[nextchar]))))
+				  ? fold : fold_locale)[nextchar]))))
 		sayNO;
 	    ln = regendp[n] - s;
 	    if (locinput + ln > regeol)
@@ -858,17 +944,33 @@ char *prog;
 	    break;
 
 	case NOTHING:
+	case TAIL:
 	    break;
 	case BACK:
 	    break;
+	case EVAL:
+	{
+	    OP *oop = op;
+	    SV **ocurpad = curpad;
+	    
+	    n = ARG(scan);
+	    op = (OP*)data->data[n];
+	    curpad = (SV**)data->data[n + 1];
+
+	    runops();
+	    op = oop;
+	    curpad = ocurpad;
+	    break;
+	}
 	case OPEN:
-	    n = ARG1(scan);  /* which paren pair */
-	    regstartp[n] = locinput;
+	    n = ARG(scan);  /* which paren pair */
+	    reg_start_tmp[n] = locinput;
 	    if (n > regsize)
 		regsize = n;
 	    break;
 	case CLOSE:
-	    n = ARG1(scan);  /* which paren pair */
+	    n = ARG(scan);  /* which paren pair */
+	    regstartp[n] = reg_start_tmp[n];
 	    regendp[n] = locinput;
 	    if (n > *reglastparen)
 		*reglastparen = n;
@@ -882,7 +984,7 @@ char *prog;
 		cc.cur = -1;
 		cc.min = ARG1(scan);
 		cc.max  = ARG2(scan);
-		cc.scan = NEXTOPER(scan) + 4;
+		cc.scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
 		cc.next = next;
 		cc.minmod = minmod;
 		cc.lastloc = 0;
@@ -910,8 +1012,8 @@ char *prog;
 
 #ifdef DEBUGGING
 		if (regnarrate)
-		    PerlIO_printf(Perl_debug_log, "%*s  %ld  %lx\n", regindent*2, "",
-			(long)n, (long)cc);
+		    PerlIO_printf(Perl_debug_log, "%*s  %ld out of %ld..%ld  cc=%lx\n", regindent*2, "",
+			(long)n, (long)cc->min, (long)cc->max, (long)cc);
 #endif
 
 		/* If degenerate scan matches "", assume scan done. */
@@ -919,6 +1021,10 @@ char *prog;
 		if (locinput == cc->lastloc && n >= cc->min) {
 		    regcc = cc->oldcc;
 		    ln = regcc->cur;
+#ifdef DEBUGGING
+		    if (regnarrate)
+			PerlIO_printf(Perl_debug_log, "%*s  empty match detected, bailing out\n", regindent*2, "");
+#endif
 		    if (regmatch(cc->next))
 			sayYES;
 		    regcc->cur = ln;
@@ -934,6 +1040,10 @@ char *prog;
 		    if (regmatch(cc->scan))
 			sayYES;
 		    cc->cur = n - 1;
+#ifdef DEBUGGING
+		    if (regnarrate)
+			PerlIO_printf(Perl_debug_log, "%*s  failed...\n", regindent*2, "");
+#endif
 		    sayNO;
 		}
 
@@ -951,9 +1061,16 @@ char *prog;
 		    regcc->cur = ln;
 		    regcc = cc;
 
-		    if (n >= cc->max)	/* Maximum greed exceeded? */
+		    if (n >= cc->max) {	/* Maximum greed exceeded? */
+			if (dowarn && n >= REG_INFTY && !reg_warned++) 
+			    warn("count exceeded %d", REG_INFTY - 1);
 			sayNO;
+		    }
 
+#ifdef DEBUGGING
+		    if (regnarrate)
+			PerlIO_printf(Perl_debug_log, "%*s  trying longer...\n", regindent*2, "");
+#endif
 		    /* Try scanning more and see if it helps. */
 		    reginput = locinput;
 		    cc->cur = n;
@@ -963,6 +1080,10 @@ char *prog;
 			regcpblow(cp);
 			sayYES;
 		    }
+#ifdef DEBUGGING
+		    if (regnarrate)
+			PerlIO_printf(Perl_debug_log, "%*s  failed...\n", regindent*2, "");
+#endif
 		    regcppop();
 		    cc->cur = n - 1;
 		    sayNO;
@@ -980,13 +1101,23 @@ char *prog;
 		    }
 		    regcppop();		/* Restore some previous $<digit>s? */
 		    reginput = locinput;
+#ifdef DEBUGGING
+		    if (regnarrate)
+			PerlIO_printf(Perl_debug_log, "%*s  failed, try continuation...\n", regindent*2, "");
+#endif
 		}
+		if (dowarn && n >= REG_INFTY && !reg_warned++) 
+		    warn("count exceeded %d", REG_INFTY - 1);
 
 		/* Failed deeper matches of scan, so see if this one works. */
 		regcc = cc->oldcc;
 		ln = regcc->cur;
 		if (regmatch(cc->next))
 		    sayYES;
+#ifdef DEBUGGING
+		if (regnarrate)
+		    PerlIO_printf(Perl_debug_log, "%*s  failed...\n", regindent*2, "");
+#endif
 		regcc->cur = ln;
 		regcc = cc;
 		cc->cur = n - 1;
@@ -1008,7 +1139,7 @@ char *prog;
 			    
 #ifdef REGALIGN
 			/*SUPPRESS 560*/
-			if (n = NEXT(scan))
+			if (n = NEXT_OFF(scan))
 			    scan += n;
 			else
 			    scan = NULL;
@@ -1024,25 +1155,152 @@ char *prog;
 	case MINMOD:
 	    minmod = 1;
 	    break;
+	case CURLYM:
+	{
+	    I32 l;
+	    
+	    /* We suppose that the next guy does not need
+	       backtracking: in particular, it is of constant length,
+	       and has no parenths to influence future backrefs. */
+	    ln = ARG1(scan);  /* min to match */
+	    n  = ARG2(scan);  /* max to match */
+#ifdef REGALIGN_STRUCT
+	    paren = scan->flags;
+	    if (paren) {
+		if (paren > regsize)
+		    regsize = paren;
+		if (paren > *reglastparen)
+		    *reglastparen = paren;
+	    }
+#endif 
+	    scan = NEXTOPER(scan) + 4/sizeof(regnode);
+	    if (paren)
+		scan = regnext(scan);	/* Skip former OPEN. */
+	    reginput = locinput;
+	    if (minmod) {
+		minmod = 0;
+		if (ln && regrepeat_hard(scan, ln, &l) < ln)
+		    sayNO;
+		if (l == 0 && n >= ln)
+		    n = ln;
+		locinput = reginput;
+		if (regkind[(U8)OP(next)] == EXACT) {
+		    c1 = UCHARAT(OPERAND(next) + 1);
+		    if (OP(next) == EXACTF)
+			c2 = fold[c1];
+		    else if (OP(next) == EXACTFL)
+			c2 = fold_locale[c1];
+		    else
+			c2 = c1;
+		} else
+		    c1 = c2 = -1000;
+		while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
+		    /* If it could work, try it. */
+		    if (c1 == -1000 ||
+			UCHARAT(reginput) == c1 ||
+			UCHARAT(reginput) == c2)
+		    {
+			if (paren) {
+			    if (n) {
+				regstartp[paren] = reginput - l;
+				regendp[paren] = reginput;
+			    } else
+				regendp[paren] = NULL;
+			}
+			if (regmatch(next))
+			    sayYES;
+		    }
+		    /* Couldn't or didn't -- move forward. */
+		    reginput = locinput;
+		    if (regrepeat_hard(scan, 1, &l)) {
+			ln++;
+			locinput = reginput;
+		    }
+		    else
+			sayNO;
+		}
+	    } else {
+		n = regrepeat_hard(scan, n, &l);
+		if (n != 0 && l == 0) 
+		    n = ln;
+		locinput = reginput;
+#ifdef DEBUGGING
+		if (regnarrate)
+		    PerlIO_printf(Perl_debug_log, "%*s  matched %ld times, len=%ld...\n", regindent*2, "", n, l);
+#endif
+		if (n >= ln) {
+		    if (regkind[(U8)OP(next)] == EXACT) {
+			c1 = UCHARAT(OPERAND(next) + 1);
+			if (OP(next) == EXACTF)
+			    c2 = fold[c1];
+			else if (OP(next) == EXACTFL)
+			    c2 = fold_locale[c1];
+			else
+			    c2 = c1;
+		    } else
+			c1 = c2 = -1000;
+		}
+		while (n >= ln) {
+		    /* If it could work, try it. */
+		    if (c1 == -1000 ||
+			UCHARAT(reginput) == c1 ||
+			UCHARAT(reginput) == c2)
+			{
+#ifdef DEBUGGING
+			    if (regnarrate)
+				PerlIO_printf(Perl_debug_log, "%*s  trying tail with n=%ld...\n", regindent*2, "", n);
+#endif
+			    if (paren) {
+				if (n) {
+				    regstartp[paren] = reginput - l;
+				    regendp[paren] = reginput;
+				} else
+				    regendp[paren] = NULL;
+			    }
+			    if (regmatch(next))
+				sayYES;
+			}
+		    /* Couldn't or didn't -- back up. */
+		    n--;
+		    locinput -= l;
+		    reginput = locinput;
+		}
+	    }
+	    sayNO;
+	    break;
+	}
+	case CURLYN:
+	    paren = scan->flags;	/* Which paren to set */
+	    if (paren > regsize)
+		regsize = paren;
+	    if (paren > *reglastparen)
+		*reglastparen = paren;
+	    ln = ARG1(scan);  /* min to match */
+	    n  = ARG2(scan);  /* max to match */
+            scan = regnext(NEXTOPER(scan) + 4/sizeof(regnode));
+	    goto repeat;
 	case CURLY:
+	    paren = 0;
 	    ln = ARG1(scan);  /* min to match */
 	    n  = ARG2(scan);  /* max to match */
-	    scan = NEXTOPER(scan) + 4;
+	    scan = NEXTOPER(scan) + 4/sizeof(regnode);
 	    goto repeat;
 	case STAR:
 	    ln = 0;
-	    n = 32767;
+	    n = REG_INFTY;
 	    scan = NEXTOPER(scan);
+	    paren = 0;
 	    goto repeat;
 	case PLUS:
+	    ln = 1;
+	    n = REG_INFTY;
+	    scan = NEXTOPER(scan);
+	    paren = 0;
+	  repeat:
 	    /*
 	    * Lookahead to avoid useless match attempts
 	    * when we know what character comes next.
 	    */
-	    ln = 1;
-	    n = 32767;
-	    scan = NEXTOPER(scan);
-	  repeat:
 	    if (regkind[(U8)OP(next)] == EXACT) {
 		c1 = UCHARAT(OPERAND(next) + 1);
 		if (OP(next) == EXACTF)
@@ -1059,22 +1317,28 @@ char *prog;
 		minmod = 0;
 		if (ln && regrepeat(scan, ln) < ln)
 		    sayNO;
-		while (n >= ln || (n == 32767 && ln > 0)) { /* ln overflow ? */
+		while (n >= ln || (n == REG_INFTY && ln > 0)) { /* ln overflow ? */
 		    /* If it could work, try it. */
 		    if (c1 == -1000 ||
 			UCHARAT(reginput) == c1 ||
 			UCHARAT(reginput) == c2)
 		    {
+			if (paren) {
+			    if (n) {
+				regstartp[paren] = reginput - 1;
+				regendp[paren] = reginput;
+			    } else
+				regendp[paren] = NULL;
+			}
 			if (regmatch(next))
 			    sayYES;
 		    }
-		    /* Couldn't or didn't -- back up. */
+		    /* Couldn't or didn't -- move forward. */
 		    reginput = locinput + ln;
 		    if (regrepeat(scan, 1)) {
 			ln++;
 			reginput = locinput + ln;
-		    }
-		    else
+		    } else
 			sayNO;
 		}
 	    }
@@ -1083,33 +1347,61 @@ char *prog;
 		if (ln < n && regkind[(U8)OP(next)] == EOL &&
 		    (!multiline || OP(next) == SEOL))
 		    ln = n;			/* why back off? */
-		while (n >= ln) {
-		    /* If it could work, try it. */
-		    if (c1 == -1000 ||
-			UCHARAT(reginput) == c1 ||
-			UCHARAT(reginput) == c2)
-		    {
-			if (regmatch(next))
-			    sayYES;
+		if (paren) {
+		    while (n >= ln) {
+			/* If it could work, try it. */
+			if (c1 == -1000 ||
+			    UCHARAT(reginput) == c1 ||
+			    UCHARAT(reginput) == c2)
+			    {
+				if (paren && n) {
+				    if (n) {
+					regstartp[paren] = reginput - 1;
+					regendp[paren] = reginput;
+				    } else
+					regendp[paren] = NULL;
+				}
+				if (regmatch(next))
+				    sayYES;
+			    }
+			/* Couldn't or didn't -- back up. */
+			n--;
+			reginput = locinput + n;
+		    }
+		} else {
+		    while (n >= ln) {
+			/* If it could work, try it. */
+			if (c1 == -1000 ||
+			    UCHARAT(reginput) == c1 ||
+			    UCHARAT(reginput) == c2)
+			    {
+				if (regmatch(next))
+				    sayYES;
+			    }
+			/* Couldn't or didn't -- back up. */
+			n--;
+			reginput = locinput + n;
 		    }
-		    /* Couldn't or didn't -- back up. */
-		    n--;
-		    reginput = locinput + n;
 		}
 	    }
 	    sayNO;
+	    break;
 	case SUCCEED:
 	case END:
 	    reginput = locinput;	/* put where regtry can find it */
 	    sayYES;			/* Success! */
 	case IFMATCH:
-	    reginput = locinput;
+	    if (locinput < regbol + scan->flags) 
+		sayNO;
+	    reginput = locinput - scan->flags;
 	    scan = NEXTOPER(scan);
 	    if (!regmatch(scan))
 		sayNO;
 	    break;
 	case UNLESSM:
-	    reginput = locinput;
+	    if (locinput < regbol + scan->flags) 
+		break;
+	    reginput = locinput - scan->flags;
 	    scan = NEXTOPER(scan);
 	    if (regmatch(scan))
 		sayNO;
@@ -1153,7 +1445,7 @@ no:
  */
 static I32
 regrepeat(p, max)
-char *p;
+regnode *p;
 I32 max;
 {
     register char *scan;
@@ -1162,7 +1454,7 @@ I32 max;
     register char *loceol = regeol;
 
     scan = reginput;
-    if (max != 32767 && max < loceol - scan)
+    if (max != REG_INFTY && max < loceol - scan)
       loceol = scan + max;
     opnd = OPERAND(p);
     switch (OP(p)) {
@@ -1250,6 +1542,42 @@ I32 max;
 }
 
 /*
+ - regrepeat_hard - repeatedly match something, report total lenth and length
+ * 
+ * The repeater is supposed to have constant length.
+ */
+
+static I32
+regrepeat_hard(p, max, lp)
+regnode *p;
+I32 max;
+I32 *lp;
+{
+    register char *scan;
+    register char *start;
+    register char *loceol = regeol;
+    I32 l = -1;
+
+    start = reginput;
+    while (reginput < loceol && (scan = reginput, regmatch(p))) {
+	if (l == -1) {
+	    *lp = l = reginput - start;
+	    if (max != REG_INFTY && l*max < loceol - scan)
+		loceol = scan + l*max;
+	    if (l == 0) {
+		return max;
+	    }
+	}
+    }
+    if (reginput < loceol)
+	reginput = scan;
+    else
+	scan = reginput;
+    
+    return (scan - start)/l;
+}
+
+/*
  - regclass - determine if a character falls into a character class
  */
 
@@ -1297,16 +1625,16 @@ register I32 c;
  * [Note, when REGALIGN is defined there are two places in regmatch()
  * that bypass this code for speed.]
  */
-char *
+regnode *
 regnext(p)
-register char *p;
+register regnode *p;
 {
     register I32 offset;
 
     if (p == &regdummy)
 	return(NULL);
 
-    offset = NEXT(p);
+    offset = NEXT_OFF(p);
     if (offset == 0)
 	return(NULL);
 
diff -pru perl5.004_01.os2/regexp.h perl5.004_01.my/regexp.h
--- perl5.004_01.os2/regexp.h	Fri Jan 17 10:05:20 1997
+++ perl5.004_01.my/regexp.h	Sat Jun 14 21:11:54 1997
@@ -9,13 +9,18 @@
  */
 
 
+struct regnode {
+    U8	flags;
+    U8  type;
+    U16 next_off;
+};
+
+typedef struct regnode regnode;
+
 typedef struct regexp {
 	char **startp;
 	char **endp;
-	SV *regstart;		/* Internal use only. */
-	char *regstclass;
-	SV *regmust;		/* Internal use only. */
-	I32 regback;		/* Can regmust locate first try? */
+	regnode *regstclass;
 	I32 minlen;		/* mininum possible length of $& */
 	I32 prelen;		/* length of precomp */
 	U32 nparens;		/* number of parentheses */
@@ -27,11 +32,26 @@ typedef struct regexp {
 	U16 naughty;		/* how exponential is this pattern? */
 	char reganch;		/* Internal use only. */
 	char exec_tainted;	/* Tainted information used by regexec? */
-	char program[1];	/* Unwarranted chumminess with compiler. */
+        SV *anchored_substr;	/* Substring at fixed position wrt start. */
+	I32 anchored_offset;	/* Position of it. */
+        SV *float_substr;	/* Substring at variable position wrt start. */
+	I32 float_min_offset;	/* Minimal position of it. */
+	I32 float_max_offset;	/* Maximal position of it. */
+        SV *check_substr;	/* Substring to check before matching. */
+        I32 check_offset_min;	/* Offset of the above. */
+        I32 check_offset_max;	/* Offset of the above. */
+        struct reg_data *data;	/* Additional data. */
+	regnode program[1];	/* Unwarranted chumminess with compiler. */
 } regexp;
 
-#define ROPT_ANCH	3
-#define  ROPT_ANCH_BOL	 1
-#define  ROPT_ANCH_GPOS	 2
-#define ROPT_SKIP	4
-#define ROPT_IMPLICIT	8
+#define ROPT_ANCH		(ROPT_ANCH_BOL|ROPT_ANCH_GPOS)
+#define ROPT_ANCH_BOL	 	1
+#define ROPT_ANCH_GPOS	 	2
+#define ROPT_SKIP		4
+#define ROPT_IMPLICIT		8
+#define ROPT_CHECK_AT_START	16
+#define ROPT_GPOS_SEEN		32
+#define ROPT_CHECK_ALL		64
+
+#define REXEC_COPY	1		/* Need to copy the string. */
+#define REXEC_CHECKED	2		/* check_substr already checked. */
diff -pru perl5.004_01.os2/sv.c perl5.004_01.my/sv.c
--- perl5.004_01.os2/sv.c	Fri Jun  6 17:32:44 1997
+++ perl5.004_01.my/sv.c	Sat Jun 14 21:11:56 1997
@@ -4731,6 +4731,10 @@ SV* sv;
 		sv_catpv(d, " ),");
 	    }
 	}
+    case SVt_PVBM:
+	if (SvTAIL(sv))	sv_catpv(d, "TAIL,");
+	if (SvCOMPILED(sv))	sv_catpv(d, "COMPILED,");
+	break;
     }
 
     if (*(SvEND(d) - 1) == ',')
diff -pru perl5.004_01.os2/t/op/re_tests perl5.004_01.my/t/op/re_tests
--- perl5.004_01.os2/t/op/re_tests	Mon Jun  9 05:07:22 1997
+++ perl5.004_01.my/t/op/re_tests	Sat Jun 14 21:18:10 1997
@@ -8,6 +8,8 @@ ab*c	abc	y	$&	abc
 ab*bc	abc	y	$&	abc
 ab*bc	abbc	y	$&	abbc
 ab*bc	abbbbc	y	$&	abbbbc
+.{1}	abbbbc	y	$&	a
+.{3,4}	abbbbc	y	$&	abbb
 ab{0,}bc	abbbbc	y	$&	abbbbc
 ab+bc	abbc	y	$&	abbc
 ab+bc	abc	n	-	-
@@ -29,6 +31,7 @@ ab{0,1}c	abc	y	$&	abc
 ^abc	abcc	y	$&	abc
 ^abc$	aabc	n	-	-
 abc$	aabc	y	$&	abc
+abc$	aabcd	n	-	-
 ^	abc	y	$&	
 $	abc	y	$&	
 a.c	abc	y	$&	abc
@@ -299,8 +302,64 @@ a(?=c|d).	abad	y	$&	ad
 a(?:b|c|d)(.)	ace	y	$1	e
 a(?:b|c|d)*(.)	ace	y	$1	e
 a(?:b|c|d)+?(.)	ace	y	$1	e
+a(?:b|c|d)+?(.)	acdbcdbe	y	$1	d
+a(?:b|c|d)+(.)	acdbcdbe	y	$1	e
+a(?:b|c|d){2}(.)	acdbcdbe	y	$1	b
+a(?:b|c|d){4,5}(.)	acdbcdbe	y	$1	b
+a(?:b|c|d){4,5}?(.)	acdbcdbe	y	$1	d
+a(?:b|c|d){6,7}(.)	acdbcdbe	y	$1	e
+a(?:b|c|d){6,7}?(.)	acdbcdbe	y	$1	e
+a(?:b|c|d){5,6}(.)	acdbcdbe	y	$1	e
+a(?:b|c|d){5,6}?(.)	acdbcdbe	y	$1	b
+a(?:b|c|d){5,7}(.)	acdbcdbe	y	$1	e
+a(?:b|c|d){5,7}?(.)	acdbcdbe	y	$1	b
 a(?:b|(c|e){1,2}?|d)+?(.)	ace	y	$1$2	ce
 ^(.+)?B	AB	y	$1	A
-'([a-z]+)\s\1'i	Aa aa 	y	$&-$1	Aa aa-Aa
-'([a-z]+)\s\1'i	Ab ab 	y	$&-$1	Ab ab-Ab
+^([^a-z])|(\^)$	.	y	$1	.
+^[<>]&	<&OUT	y	$&	<&
+^(a\1?){4}$	aaaaaaaaaa	y	$1	aaaa
+^(a\1?){4}$	aaaaaaaaa	n	-	-
+^(a\1?){4}$	aaaaaaaaaaa	n	-	-
+^(a\1){4}$	aaaaaaaaaa	y	$1	aaaa
+^(a\1){4}$	aaaaaaaaa	n	-	-
+^(a\1){4}$	aaaaaaaaaaa	n	-	-
+(?:(f)(o)(o)|(b)(a)(r))*	foobar	y	$1:$2:$3:$4:$5:$6	f:o:o:b:a:r
+(?<=a)b	ab	y	$&	b
+(?<=a)b	cb	n	-	-
+(?<=a)b	b	n	-	-
+(?<!c)b	ab	y	$&	b
+(?<!c)b	cb	n	-	-
+(?<!c)b	b	y	-	-
+(?<%)b	-	c	-	-
+(?:..)*a	aba	y	$&	aba
+(?:..)*?a	aba	y	$&	a
+^(?:b|a(?=(.)))*\1	abc	y	$&	ab
+^(){3,5}	abc	y	a$1	a
+^(a+)*ax	aax	y	$1	a
+^((a|b)+)*ax	aax	y	$1	a
+^((a|bc)+)*ax	aax	y	$1	a
+(a|x)*ab	cab	y	y$1	y
+(a)*ab	cab	y	y$1	y
+(?:(?i)a)b	ab	y	$&	ab
+((?i)a)b	ab	y	$&:$1	ab:a
+(?:(?i)a)b	Ab	y	$&	Ab
+((?i)a)b	Ab	y	$&:$1	Ab:A
+(?:(?i)a)b	aB	n	-	-
+((?i)a)b	aB	n	-	-
+(?:c|d)(?:)(?:a(?:)(?:b)(?:b(?:))(?:b(?:)(?:b)))	cabbbb	y	$&	cabbbb
+'(ab)\d\1'i	Ab4ab	y	$1	Ab
+'(ab)\d\1'i	ab4Ab	y	$1	ab
 foo\w*\d{4}baz	foobar1234baz	y	$&	foobar1234baz
+a{1,32766}	aaa	y	$&	aaa
+a{1,32767}	-	c	-	-
+a{1,32768}	-	c	-	-
+a(?{})b	cabd	y	$&	ab
+a(?{)b	-	c	-	-
+a(?{)b	-	c	-	-
+a(?{{})b	-	c	-	-
+a(?{}})b	-	c	-	-
+a(?{"{"})b	-	c	-	-
+a(?{"\{"})b	cabd	y	$&	ab
+a(?{"{"}})b	-	c	-	-
+a(?{$bl="\{"}).b	caxbd	y	$bl	{
+x(~~)*(?:(?:F)?)?	x~~	y	-	-
diff -pru perl5.004_01.os2/t/op/regexp.t perl5.004_01.my/t/op/regexp.t
--- perl5.004_01.os2/t/op/regexp.t	Mon Jun  9 05:10:14 1997
+++ perl5.004_01.my/t/op/regexp.t	Sat Jun 14 21:14:26 1997
@@ -21,6 +21,8 @@
 # Column 5 contains the expected result of double-quote
 # interpolating that string after the match.
 
+$iters = shift || 1;		# Poor man performance suite, 10000 is OK.
+
 open(TESTS,'op/re_tests') || open(TESTS,'t/op/re_tests')
     || die "Can't open re_tests";
 
@@ -30,24 +32,28 @@ seek(TESTS,0,0);
 $. = 0;
 
 $| = 1;
-print "1..$numtests\n";
+print "1..$numtests\n# $iters iterations\n";
 TEST:
 while (<TESTS>) {
     ($pat, $subject, $result, $repl, $expect) = split(/[\t\n]/,$_);
     $input = join(':',$pat,$subject,$result,$repl,$expect);
-    $pat = "'$pat'" unless $pat =~ /^'/;
-    for $study ("", "study \$subject") {
-	eval "$study; \$match = (\$subject =~ m$pat); \$got = \"$repl\";";
+    $pat = "'$pat'" unless $pat =~ /^\'/;
+    for $study ("", 'study $subject') {
+ 	$c = $iters;
+ 	eval "$study; \$match = (\$subject =~ m$pat) while \$c--; \$got = \"$repl\";";
 	if ($result eq 'c') {
 	    if ($@ eq '') { print "not ok $.\n"; next TEST }
 	    last;  # no need to study a syntax error
 	}
+	elsif ($@) {
+	    print "not ok $. $input => error `$@'\n"; next TEST;
+	}
 	elsif ($result eq 'n') {
-	    if ($match) { print "not ok $. $input => $got\n"; next TEST }
+	    if ($match) { print "not ok $. ($study) $input => false positive\n"; next TEST }
 	}
 	else {
 	    if (!$match || $got ne $expect) {
-		print "not ok $. $input => $got\n";
+ 		print "not ok $. ($study) $input => `$got', match=$match\n";
 		next TEST;
 	    }
 	}
diff -pru perl5.004_01.os2/t/op/regexp_noamp.t perl5.004_01.my/t/op/regexp_noamp.t
--- perl5.004_01.os2/t/op/regexp_noamp.t	Sun Jun 15 21:43:00 1997
+++ perl5.004_01.my/t/op/regexp_noamp.t	Sat Jun 14 21:11:56 1997
@@ -0,0 +1,44 @@
+#!./perl
+
+$iters = shift || 1;		# Poor man performance suite, 10000 is OK.
+
+open(TESTS,'op/re_tests') || open(TESTS,'t/op/re_tests')
+    || die "Can't open re_tests";
+
+while (<TESTS>) { }
+$numtests = $.;
+seek(TESTS,0,0);
+$. = 0;
+
+$| = 1;
+print "1..$numtests\n# $iters iterations\n";
+TEST:
+while (<TESTS>) {
+    ($pat, $subject, $result, $repl, $expect) = split(/[\t\n]/,$_);
+    $input = join(':',$pat,$subject,$result,$repl,$expect);
+    $pat = "'$pat'" unless $pat =~ /^\'/;
+    print("ok $.\n"), next TEST if $input =~ /\$[&\`\']/;
+    for $study ("", 'study $subject') {
+	$c = $iters;
+	eval "$study; \$match = (\$subject =~ m$pat) while \$c--; \$got = \"$repl\";";
+	if ($result eq 'c') {
+	    if ($@ eq '') { print "not ok $.\n"; next TEST }
+	    last;  # no need to study a syntax error
+	}
+	elsif ($@) {
+	    print "not ok $. $input => error `$@'\n"; next TEST;
+	}
+	elsif ($result eq 'n') {
+	    if ($match) { print "not ok $. ($study) $input => false positive\n"; next TEST }
+	}
+	else {
+	    if (!$match || $got ne $expect) {
+		print "not ok $. ($study) $input => `$got', match=$match\n";
+		next TEST;
+	    }
+	}
+    }
+    print "ok $.\n";
+}
+
+close(TESTS);
diff -pru perl5.004_01.os2/t/op/split.t perl5.004_01.my/t/op/split.t
--- perl5.004_01.os2/t/op/split.t	Fri Jun  6 15:01:40 1997
+++ perl5.004_01.my/t/op/split.t	Sat Jun 14 21:17:10 1997
@@ -2,7 +2,7 @@
 
 # $RCSfile: split.t,v $$Revision: 4.1 $$Date: 92/08/07 18:28:26 $
 
-print "1..16\n";
+print "1..21\n";
 
 $FS = ':';
 
@@ -76,3 +76,24 @@ print "$a|$b" eq "2|4" ? "ok 15\n" : "no
   local(undef, $a, undef, $b) = qw(1 2 3 4);
   print "$a|$b" eq "2|4" ? "ok 16\n" : "not ok 16\n";
 }
+
+# Are /^/m patterns scanned?
+$_ = join '|', split(/^a/m, "a b a\na d a", 20);
+print $_ eq "| b a\n| d a" ? "ok 17\n" : "not ok 17\n# `$_'\n";
+
+# Are /$/m patterns scanned?
+$_ = join '|', split(/a$/m, "a b a\na d a", 20);
+print $_ eq "a b |\na d |" ? "ok 18\n" : "not ok 18\n# `$_'\n";
+
+# Are /^/m patterns scanned?
+$_ = join '|', split(/^aa/m, "aa b aa\naa d aa", 20);
+print $_ eq "| b aa\n| d aa" ? "ok 19\n" : "not ok 19\n# `$_'\n";
+
+# Are /$/m patterns scanned?
+$_ = join '|', split(/aa$/m, "aa b aa\naa d aa", 20);
+print $_ eq "aa b |\naa d |" ? "ok 20\n" : "not ok 20\n# `$_'\n";
+
+# Greedyness:
+$_ = "a : b :c: d";
+@ary = split(/\s*:\s*/);
+if (($res = join(".",@ary)) eq "a.b.c.d") {print "ok 21\n";} else {print "not ok 21\n# res=`$res' != `a.b.c.d'\n";}
diff -pru perl5.004_01.os2/t/TEST perl5.004_01.my/t/TEST
--- perl5.004_01.os2/t/TEST	Thu Mar  6 12:51:40 1997
+++ perl5.004_01.my/t/TEST	Sun Jun 15 21:39:30 1997
@@ -51,6 +51,7 @@ while ($test = shift) {
     chop($te);
     print "$te" . '.' x (18 - length($te));
     if ($sharpbang) {
+	-x $test || (print "isn't executable.\n");
 	open(RESULTS,"./$test |") || (print "can't run.\n");
     } else {
 	open(SCRIPT,"$test") || die "Can't run $test.\n";
diff -pru perl5.004_01.os2/toke.c perl5.004_01.my/toke.c
--- perl5.004_01.os2/toke.c	Tue May 13 11:07:02 1997
+++ perl5.004_01.my/toke.c	Sat Jun 14 21:03:52 1997
@@ -4587,46 +4587,6 @@ char *start;
     return s;
 }
 
-void
-hoistmust(pm)
-register PMOP *pm;
-{
-    if (!pm->op_pmshort && pm->op_pmregexp->regstart &&
-	(!pm->op_pmregexp->regmust || pm->op_pmregexp->reganch & ROPT_ANCH)
-       ) {
-	if (!(pm->op_pmregexp->reganch & ROPT_ANCH))
-	    pm->op_pmflags |= PMf_SCANFIRST;
-	pm->op_pmshort = SvREFCNT_inc(pm->op_pmregexp->regstart);
-	pm->op_pmslen = SvCUR(pm->op_pmshort);
-    }
-    else if (pm->op_pmregexp->regmust) {/* is there a better short-circuit? */
-	if (pm->op_pmshort &&
-	  sv_eq(pm->op_pmshort,pm->op_pmregexp->regmust))
-	{
-	    if (pm->op_pmflags & PMf_SCANFIRST) {
-		SvREFCNT_dec(pm->op_pmshort);
-		pm->op_pmshort = Nullsv;
-	    }
-	    else {
-		SvREFCNT_dec(pm->op_pmregexp->regmust);
-		pm->op_pmregexp->regmust = Nullsv;
-		return;
-	    }
-	}
-	/* promote the better string */
-	if ((!pm->op_pmshort &&
-	     !(pm->op_pmregexp->reganch & ROPT_ANCH_GPOS)) ||
-	    ((pm->op_pmflags & PMf_SCANFIRST) &&
-	     (SvCUR(pm->op_pmshort) < SvCUR(pm->op_pmregexp->regmust)))) {
-	    SvREFCNT_dec(pm->op_pmshort);		/* ok if null */
-	    pm->op_pmshort = pm->op_pmregexp->regmust;
-	    pm->op_pmslen = SvCUR(pm->op_pmshort);
-	    pm->op_pmregexp->regmust = Nullsv;
-	    pm->op_pmflags |= PMf_SCANFIRST;
-	}
-    }
-}
-
 static char *
 scan_trans(start)
 char *start;
diff -pru perl5.004_01.os2/util.c perl5.004_01.my/util.c
--- perl5.004_01.os2/util.c	Sat Jun 14 02:15:20 1997
+++ perl5.004_01.my/util.c	Sat Jun 14 21:11:56 1997
@@ -946,22 +946,39 @@ SV *littlestr;
 	    while (tmp--) {
 		if (*--s == *--little)
 		    continue;
+	      differ:
 		s = olds + 1;	/* here we pay the price for failure */
 		little = oldlittle;
 		if (s < bigend)	/* fake up continue to outer loop */
 		    goto top2;
 		return Nullch;
 	    }
+	    if (SvTAIL(littlestr)	/* automatically multiline */
+		&& olds + 1 != bigend
+		&& olds[1] != '\n') 
+		goto differ;
 	    return (char *)s;
 	}
     }
     return Nullch;
 }
 
+/* start_shift, end_shift are positive quantities which give offsets
+   of ends of some substring of bigstr.
+   If `last' we want the last occurence.
+   old_posp is the way of communication between consequent calls if
+   the next call needs to find the . 
+   The initial *old_posp should be -1.
+   Note that we do not take into account SvTAIL, so it may give wrong
+   positives if _ALL flag is set.
+ */
+
 char *
-screaminstr(bigstr, littlestr)
+screaminstr(bigstr, littlestr, start_shift, end_shift, old_posp, last)
 SV *bigstr;
 SV *littlestr;
+I32 start_shift, end_shift, last;
+I32 *old_posp;
 {
     register unsigned char *s, *x;
     register unsigned char *big;
@@ -969,54 +986,65 @@ SV *littlestr;
     register I32 previous;
     register I32 first;
     register unsigned char *little;
-    register unsigned char *bigend;
+    register I32 stop_pos;
     register unsigned char *littleend;
+    I32 found = 0;
 
-    if ((pos = screamfirst[BmRARE(littlestr)]) < 0) 
+    if (*old_posp == -1
+	? (pos = screamfirst[BmRARE(littlestr)]) < 0
+	: (((pos = *old_posp), pos += screamnext[pos]) == 0))
 	return Nullch;
     little = (unsigned char *)(SvPVX(littlestr));
     littleend = little + SvCUR(littlestr);
     first = *little++;
+    /* The value of pos we can start at: */
     previous = BmPREVIOUS(littlestr);
     big = (unsigned char *)(SvPVX(bigstr));
-    bigend = big + SvCUR(bigstr);
-    while (pos < previous) {
+    /* The value of pos we can stop at: */
+    stop_pos = SvCUR(bigstr) - end_shift - (SvCUR(littlestr) - 1 - previous);
+    if (previous + start_shift > stop_pos) return Nullch;
+    while (pos < previous + start_shift) {
 	if (!(pos += screamnext[pos]))
 	    return Nullch;
     }
 #ifdef POINTERRIGOR
     do {
+	if (pos >= stop_pos) return Nullch;
 	if (big[pos-previous] != first)
 	    continue;
 	for (x=big+pos+1-previous,s=little; s < littleend; /**/ ) {
-	    if (x >= bigend)
-		return Nullch;
 	    if (*s++ != *x++) {
 		s--;
 		break;
 	    }
 	}
-	if (s == littleend)
-	    return (char *)(big+pos-previous);
+	if (s == littleend) {
+	    *old_posp = pos;
+	    if (!last) return (char *)(big+pos-previous);
+	    found = 1;
+	}
     } while ( pos += screamnext[pos] );
+    return (last && found) ? (char *)(big+(*old_posp)-previous) : Nullch;
 #else /* !POINTERRIGOR */
     big -= previous;
     do {
+	if (pos >= stop_pos) return Nullch;
 	if (big[pos] != first)
 	    continue;
 	for (x=big+pos+1,s=little; s < littleend; /**/ ) {
-	    if (x >= bigend)
-		return Nullch;
 	    if (*s++ != *x++) {
 		s--;
 		break;
 	    }
 	}
-	if (s == littleend)
-	    return (char *)(big+pos);
+	if (s == littleend) {
+	    *old_posp = pos;
+	    if (!last) return (char *)(big+pos);
+	    found = 1;
+	}
     } while ( pos += screamnext[pos] );
+    return (last && found) ? (char *)(big+(*old_posp)) : Nullch;
 #endif /* POINTERRIGOR */
-    return Nullch;
 }
 
 I32