| Store | Cart

Scalar::Util patch, add regex() and reftype_name()

From: demerphq <deme...@gmail.com>
Wed, 19 Apr 2006 23:54:18 +0200
Attached is a first go at a patch to add some new routines to
Scalar::Util.  Pod is as follows:

=item reftype_name EXPR

Equivelent to C<reftype()> except return false instead of undefined if EXPR
does not evaluate to reference. More or less equivelent to

   (reftype($foo)||'')

This means you can say

   if ( reftype_name($item) eq 'ARRAY' ) {

and not worry about warnings.

=item regex EXPR

In scalar context returns the equivelent of stringification of a string, but
bypassing overloaded stringification. The pattern will be of the form

  (?ix-sm:PATTERN)

In list context returns a two element list containing the PATTERN and the
options seperately.

   my $pat=regex($foo);
   my ($pat,$opts)=regex($foo);

If EXPR doesn't evaluate to a qr// object then returns false or the empty list.

=cut


Im not sure about reftype_name(). Personally id like it if reftype()
just started returning sv_no but that wouldnt be backwards compatible.
Ive considered making it version dependent so that if you

  use Scalar::Util 1.19 qw(reftype);

you get the "nice" version, but for now its just a different sub.

regex() is useful as a test is something is a qr// object, a task that
is currently basically impossible. It also make it easier to join
regexes together as you can extract the pattern without the (?:)
wrapper or easily check if two patterns were compiled with the same
modifier. Which should be useful for the regex pattern mungers out
there. Anyway, it borrows a chunk of code from sv.c, which Nicholas
refactored pretty seriously in blead. It would be nice if blead was
further refactored so that the code for regex() would basically
disappear. Ive discussed a bit on irc how this would be done but i
havent got that far yet.

Cheers,
Yves



--
perl -Mre=debug -e "/just|another|perl|hacker/"

diff -wurd Scalar-List-Utils-1.18/Changes Scalar-List-Utils-1.18_01/Changes
--- Scalar-List-Utils-1.18/Changes	2005-11-25 16:34:32.000000000 +0100
+++ Scalar-List-Utils-1.18_01/Changes	2006-04-19 23:37:15.930740400 +0200
@@ -1,3 +1,16 @@
+1.18_01 -- 2006-04-19 23:33
+
+Bug Fixes
+  * Remove PRNG seeding logic from XS code by making the perl code call rand()
+    before the XS gets loaded, hopefully making the need to the seed logic
+    redundant. This is necessary the Perl_seed wasn't always exported when it
+    should have been.
+
+Enhancements
+  * Add reftype_name() a 'nice' version of reftype() that doesn't return undef.
+  * Add regex() to be able to tell if something is a regex reliably, and what
+    its pattern is and modifiers are without having to extract it from a string.
+
 1.18 -- Fri Nov 25 09:30:29 CST 2005
 
 Bug Fixes
Only in Scalar-List-Utils-1.18: Makefile.old
diff -wurd Scalar-List-Utils-1.18/Util.xs Scalar-List-Utils-1.18_01/Util.xs
--- Scalar-List-Utils-1.18/Util.xs	2005-11-25 16:34:32.000000000 +0100
+++ Scalar-List-Utils-1.18_01/Util.xs	2006-04-19 23:19:34.000000000 +0200
@@ -59,6 +59,15 @@
 #    define Drand01()		((rand() & 0x7FFF) / (double) ((unsigned long)1 << 15))
 #endif
 
+#if PERL_VERSION < 8
+#   define PERL_MAGIC_qr    'r' /* precompiled qr// regex */
+#   define SU_Svs_SMG_OR_RMG SVs_RMG
+#elif PERL_SUBVERSION>=1
+#   define SU_Svs_SMG_OR_RMG SVs_SMG
+#else
+#   define SU_Svs_SMG_OR_RMG SVs_RMG
+#endif
+
 #if PERL_VERSION < 5
 #  ifndef gv_stashpvn
 #    define gv_stashpvn(n,l,c) gv_stashpv(n,c)
@@ -312,29 +321,6 @@
 {
     dVAR;
     int index;
-#if (PERL_VERSION < 8) || (PERL_VERSION == 8 && PERL_SUBVERSION <1)
-    struct op dmy_op;
-    struct op *old_op = PL_op;
-
-    /* We call pp_rand here so that Drand01 get initialized if rand()
-       or srand() has not already been called
-    */
-    memzero((char*)(&dmy_op), sizeof(struct op));
-    /* we let pp_rand() borrow the TARG allocated for this XS sub */
-    dmy_op.op_targ = PL_op->op_targ;
-    PL_op = &dmy_op;
-    (void)*(PL_ppaddr[OP_RAND])(aTHX);
-    PL_op = old_op;
-#else
-    /* Initialize Drand01 if rand() or srand() has
-       not already been called
-    */
-    if (!PL_srand_called) {
-        (void)seedDrand01((Rand_seed_t)Perl_seed(aTHX));
-        PL_srand_called = TRUE;
-    }
-#endif
-
     for (index = items ; index > 1 ; ) {
 	int swap = (int)(Drand01() * (double)(index--));
 	SV *tmp = ST(swap);
@@ -411,6 +397,23 @@
 OUTPUT:
     RETVAL
 
+char *
+reftype_name(sv)
+    SV * sv
+PROTOTYPE: $
+CODE:
+{
+    if (SvMAGICAL(sv))
+	mg_get(sv);
+    if(!SvROK(sv)) {
+	XSRETURN_NO;
+    } else {
+        RETVAL = sv_reftype(SvRV(sv),FALSE);
+    }
+}
+OUTPUT:
+    RETVAL
+
 UV
 refaddr(sv)
     SV * sv
@@ -528,6 +531,176 @@
     XSRETURN(1);
 }
 
+
+void
+regex(sv)
+    SV * sv
+PROTOTYPE: $
+PREINIT:
+    STRLEN patlen;
+    char reflags[6];
+    int left;
+PPCODE:
+{
+    /*
+       Checks if a reference is a regex or not. If the parameter is
+       not a ref, or is not the result of a qr// then returns undef.
+       Otherwise in list context it returns the pattern and the
+       modifiers, in scalar context it returns the pattern just as it
+       would if the qr// was blessed into the package Regexp and
+       stringified normally.
+    */
+
+    if (SvMAGICAL(sv)) { /* is this if needed??? Why?*/
+        mg_get(sv);
+    }
+    if(!SvROK(sv)) {     /* bail if we dont have a ref. */
+        I32 gimme = GIMME_V;
+        if ( gimme == G_ARRAY ) {
+            XSRETURN(0);
+        } else {
+            XSRETURN_NO;
+        }
+    }
+    patlen=0;
+    left=0;
+    if (SvTHINKFIRST(sv))
+    {
+        sv = (SV*)SvRV(sv);
+        if (sv)
+        {
+            MAGIC *mg;
+            if (SvTYPE(sv)==SVt_PVMG)
+            {
+                if ( ((SvFLAGS(sv) &
+                       (SVs_OBJECT|SVf_OK|SVs_GMG|SVs_SMG|SVs_RMG))
+                      == (SVs_OBJECT|SU_Svs_SMG_OR_RMG))
+                     && (mg = mg_find(sv, PERL_MAGIC_qr)))
+                {
+                    /* Housten, we have a regex! */
+                    SV *pattern;
+                    regexp *re = (regexp *)mg->mg_obj;
+                    I32 gimme = GIMME_V;
+
+                    if ( gimme == G_ARRAY ) {
+                        /*
+                           we are in list/array context so stringify
+                           the modifiers that apply. We ignore "negative
+                           modifiers" in this scenario. Also we dont cache
+                           the modifiers. AFAICT there isnt anywhere for
+                           them to go.  :-(
+                        */
+
+                        char *fptr = "msix";
+                        char ch;
+                        U16 reganch = (U16)((re->reganch & PMf_COMPILETIME) >> 12);
+
+                        while((ch = *fptr++)) {
+                            if(reganch & 1) {
+                                reflags[left++] = ch;
+                            }
+                            reganch >>= 1;
+                        }
+
+                        pattern = sv_2mortal(newSVpvn(re->precomp,re->prelen));
+                        if (re->reganch & ROPT_UTF8) SvUTF8_on(pattern);
+
+                        /* return the pattern and the modifiers */
+                        XPUSHs(pattern);
+                        XPUSHs(sv_2mortal(newSVpvn(reflags,left)));
+                        XSRETURN(2);
+                    } else {
+                            /*
+                               Non array/list context. So we build up the
+                               stringified form just as PL_sv_2pv does,
+                               and like it we also cache the result. The
+                               entire content of the if() is directly taken
+                               from PL_sv_2pv
+                            */
+
+                            if (!mg->mg_ptr )
+                            {
+                                char *fptr = "msix";
+                                char ch;
+                                int right = 4;
+                                char need_newline = 0;
+                                U16 reganch = (U16)((re->reganch & PMf_COMPILETIME) >> 12);
+
+                                while((ch = *fptr++)) {
+                                    if(reganch & 1) {
+                                        reflags[left++] = ch;
+                                    }
+                                    else {
+                                        reflags[right--] = ch;
+                                    }
+                                    reganch >>= 1;
+                                }
+                                if(left != 4) {
+                                    reflags[left] = '-';
+                                    left = 5;
+                                }
+                                mg->mg_len = re->prelen + 4 + left;
+                                /*
+                                 * If /x was used, we have to worry about a regex
+                                 * ending with a comment later being embedded
+                                 * within another regex. If so, we don't want this
+                                 * regex's "commentization" to leak out to the
+                                 * right part of the enclosing regex, we must cap
+                                 * it with a newline.
+                                 *
+                                 * So, if /x was used, we scan backwards from the
+                                 * end of the regex. If we find a '#' before we
+                                 * find a newline, we need to add a newline
+                                 * ourself. If we find a '\n' first (or if we
+                                 * don't find '#' or '\n'), we don't need to add
+                                 * anything.  -jfriedl
+                                 */
+                                if (PMf_EXTENDED & re->reganch)
+                                {
+                                    char *endptr = re->precomp + re->prelen;
+                                    while (endptr >= re->precomp)
+                                    {
+                                        char c = *(endptr--);
+                                        if (c == '\n')
+                                            break; /* don't need another */
+                                        if (c == '#') {
+                                            /* we end while in a comment, so we
+                                               need a newline */
+                                            mg->mg_len++; /* save space for it */
+                                            need_newline = 1; /* note to add it */
+    					break;
+                                        }
+                                    }
+                                }
+                                /**/
+                                New(616, mg->mg_ptr, mg->mg_len + 1 + left, char);
+                                Copy("(?", mg->mg_ptr, 2, char);
+                                Copy(reflags, mg->mg_ptr+2, left, char);
+                                Copy(":", mg->mg_ptr+left+2, 1, char);
+                                Copy(re->precomp, mg->mg_ptr+3+left, re->prelen, char);
+                                if (need_newline)
+                                    mg->mg_ptr[mg->mg_len - 2] = '\n';
+                                mg->mg_ptr[mg->mg_len - 1] = ')';
+                                mg->mg_ptr[mg->mg_len] = 0;
+
+                            }
+                            /* return the pattern in (?msix:..) format */
+                            pattern = sv_2mortal(newSVpvn(mg->mg_ptr,mg->mg_len));
+                            if (re->reganch & ROPT_UTF8) SvUTF8_on(pattern);
+                            XPUSHs(pattern);
+                            XSRETURN(1);
+                    }
+                }
+            }
+        }
+    }
+    /* 'twould appear it aint a regex, so return undef/empty list */
+    XSRETURN_NO;
+}
+
+
+
+
 BOOT:
 {
     HV *lu_stash = gv_stashpvn("List::Util", 10, TRUE);
diff -wurd Scalar-List-Utils-1.18/lib/List/Util.pm Scalar-List-Utils-1.18_01/lib/List/Util.pm
--- Scalar-List-Utils-1.18/lib/List/Util.pm	2005-11-25 16:34:32.000000000 +0100
+++ Scalar-List-Utils-1.18_01/lib/List/Util.pm	2006-04-19 22:44:54.000000000 +0200
@@ -5,14 +5,14 @@
 # modify it under the same terms as Perl itself.
 
 package List::Util;
-
+BEGIN{ my $x=rand(1); } # Ensure the PRNG seed has been set -- for shuffle
 use strict;
 use vars qw(@ISA @EXPORT_OK $VERSION $XS_VERSION $TESTING_PERL_ONLY);
 require Exporter;
 
 @ISA        = qw(Exporter);
 @EXPORT_OK  = qw(first min max minstr maxstr reduce sum shuffle);
-$VERSION    = "1.18";
+$VERSION    = "1.18_01";
 $XS_VERSION = $VERSION;
 $VERSION    = eval $VERSION;
 
diff -wurd Scalar-List-Utils-1.18/lib/Scalar/Util.pm Scalar-List-Utils-1.18_01/lib/Scalar/Util.pm
--- Scalar-List-Utils-1.18/lib/Scalar/Util.pm	2005-11-25 16:34:32.000000000 +0100
+++ Scalar-List-Utils-1.18_01/lib/Scalar/Util.pm	2006-04-19 23:03:48.000000000 +0200
@@ -5,15 +5,18 @@
 # modify it under the same terms as Perl itself.
 
 package Scalar::Util;
-
+BEGIN{ my $x=rand(1); } # Ensure the PRNG seed has been set -- for shuffle
 use strict;
 use vars qw(@ISA @EXPORT_OK $VERSION);
 require Exporter;
 require List::Util; # List::Util loads the XS
 
 @ISA       = qw(Exporter);
-...@EXPORT_OK = qw(blessed dualvar reftype weaken isweak tainted readonly openhandle refaddr isvstring looks_like_number set_prototype);
-$VERSION    = "1.18";
+...@EXPORT_OK = qw(
+    blessed dualvar reftype reftype_name regex weaken isweak tainted
+    readonly openhandle refaddr isvstring looks_like_number set_prototype
+);
+$VERSION    = "1.18_01";
 $VERSION   = eval $VERSION;
 
 sub export_fail {
@@ -54,7 +57,7 @@
 eval <<'ESQ' unless defined &dualvar;
 
 use vars qw(@EXPORT_FAIL);
-push @EXPORT_FAIL, qw(weaken isweak dualvar isvstring set_prototype);
+push @EXPORT_FAIL, qw(weaken isweak dualvar isvstring set_prototype regex);
 
 # The code beyond here is only used if the XS is not installed
 
@@ -111,6 +114,10 @@
     : $t
 }
 
+sub reftype_name($) {
+    return reftype(shift(@_)) || ""
+}
+
 sub tainted {
   local($@, $SIG{__DIE__}, $SIG{__WARN__});
   local $^W = 0;
@@ -259,6 +266,34 @@
     $obj  = bless {}, "Foo";
     $type = reftype $obj;               # HASH
 
+=item reftype_name EXPR
+
+Equivelent to C<reftype()> except return false instead of undefined if EXPR
+does not evaluate to reference. More or less equivelent to
+
+   (reftype($foo)||'')
+
+This means you can say
+
+   if ( reftype($item) eq 'ARRAY' ) {
+
+and not worry about warnings.
+
+=item regex EXPR
+
+In scalar context returns the equivelent of stringification of a string, but
+bypassing overloaded stringification. The pattern will be of the form
+
+  (?ix-sm:PATTERN)
+
+In list context returns a two element list containing the PATTERN and the
+options seperately.
+
+   my $pat=regex($foo);
+   my ($pat,$opts)=regex($foo);
+
+If EXPR doesn't evaluate to a qr// object then returns false or the empty list.
+
 =item set_prototype CODEREF, PROTOTYPE
 
 Sets the prototype of the given function, or deletes it if PROTOTYPE is








Recent Messages in this Thread
demerphq Apr 19, 2006 09:54 pm
Graham Barr Apr 19, 2006 11:55 pm
demerphq Apr 20, 2006 06:16 am
Rafael Garcia-Suarez Apr 20, 2006 09:55 am
demerphq Apr 20, 2006 10:28 am
demerphq Apr 20, 2006 10:39 am
Rafael Garcia-Suarez Apr 20, 2006 10:50 am
demerphq Apr 20, 2006 10:54 am
Fergal Daly Apr 20, 2006 11:00 am
demerphq Apr 20, 2006 11:16 am
Marvin Humphrey Apr 20, 2006 01:16 pm
Graham Barr Apr 20, 2006 11:24 am
demerphq Apr 20, 2006 11:35 am
Rafael Garcia-Suarez Apr 20, 2006 12:20 pm
Graham Barr Apr 21, 2006 02:19 am
demerphq Apr 21, 2006 05:10 am
Rafael Garcia-Suarez Apr 21, 2006 07:22 am
Joshua ben Jore Apr 20, 2006 01:09 pm
Rafael Garcia-Suarez Apr 20, 2006 01:15 pm
Graham Barr Apr 20, 2006 11:20 am
demerphq Apr 20, 2006 11:34 am
Graham Barr Apr 21, 2006 02:18 am
Adam Kennedy Apr 22, 2006 02:22 pm
demerphq Apr 25, 2006 06:28 am
Fergal Daly Apr 25, 2006 10:18 am
demerphq Apr 25, 2006 12:54 pm
Dave Mitchell Apr 25, 2006 11:13 am
Ricardo SIGNES Apr 25, 2006 12:19 pm
demerphq Apr 25, 2006 01:02 pm
demerphq Apr 20, 2006 06:38 am
Yuval Kogman Apr 20, 2006 12:01 am
chromatic Apr 20, 2006 12:27 am
Yuval Kogman Apr 20, 2006 12:36 am
David Landgren Apr 20, 2006 06:11 am
demerphq Apr 20, 2006 06:19 am
Messages in this thread