Welcome, guest | Sign In | My Account | Store | Cart
# A regular expression that matches Python string literals.
# Tripple-quoted, unicode, and raw strings are supported.  This
# regular expression should be compiled with the re.VERBOSE flag.
PY_STRING_LITERAL_RE = (r"""
[uU]?[rR]?
  (?:              # Single-quote (') strings
  '''(?:                 # Tripple-quoted can contain...
      [^']               | # a non-quote
      \\'                | # a backslashed quote
      '{1,2}(?!')          # one or two quotes
    )*''' |
  '(?:                   # Non-tripple quoted can contain...
     [^']                | # a non-quote
     \\'                   # a backslashded quote
   )*'(?!') | """+
r'''               # Double-quote (") strings
  """(?:                 # Tripple-quoted can contain...
      [^"]               | # a non-quote
      \\"                | # a backslashed single
      "{1,2}(?!")          # one or two quotes
    )*""" |
  "(?:                   # Non-tripple quoted can contain...
     [^"]                | # a non-quote
     \\"                   # a backslashded quote
   )*"(?!")
)''')

# Example use case:
def replace_identifier(s, old, new):
    """
    Replace any occurance of the Python identifier `old` with `new` in
    the given string `s` -- but do *not* modify any occurances of
    `old` that occur inside of string literals or comments.  This
    could be used, e.g., for variable renaming.
    """
    # A regexp that matches comments, strings, and `old`.
    comment_re = r'\#.*'
    regexp = re.compile(r'(?x)%s|%s|(?P<old>\b%s\b)' %
                        (comment_re, PY_STRING_LITERAL_RE, re.escape(old)))

    # A callback used to find the replacement value for each match.
    def repl(match):
        if match.group('old'):
            # We matched `old`:
            return new 
        else:
            # We matched a comment or string literal:
            return match.group()

    # Find an regexp matches, and use `repl()` to find the replacement
    # value for each.  Since re.sub only replaces leftmost
    # non-overlapping occurances, occurances of `old` inside strings
    # or comments will be matched as part of that string or comment,
    # and so won't be changed.
    return regexp.sub(repl, s)

History