Case insensitive replace
Question:
What’s the easiest way to do a case-insensitive string replacement in Python?
Answers:
The string
type doesn’t support this. You’re probably best off using the regular expression sub method with the re.IGNORECASE option.
>>> import re
>>> insensitive_hippo = re.compile(re.escape('hippo'), re.IGNORECASE)
>>> insensitive_hippo.sub('giraffe', 'I want a hIPpo for my birthday')
'I want a giraffe for my birthday'
import re
pattern = re.compile("hello", re.IGNORECASE)
pattern.sub("bye", "hello HeLLo HELLO")
# 'bye bye bye'
This doesn’t require RegularExp
def ireplace(old, new, text):
"""
Replace case insensitive
Raises ValueError if string not found
"""
index_l = text.lower().index(old.lower())
return text[:index_l] + new + text[index_l + len(old):]
Continuing on bFloch’s answer, this function will change not one, but all occurrences of old with new – in a case insensitive fashion.
def ireplace(old, new, text):
idx = 0
while idx < len(text):
index_l = text.lower().find(old.lower(), idx)
if index_l == -1:
return text
text = text[:index_l] + new + text[index_l + len(old):]
idx = index_l + len(new)
return text
i='I want a hIPpo for my birthday'
key='hippo'
swp='giraffe'
o=(i.lower().split(key))
c=0
p=0
for w in o:
o[c]=i[p:p+len(w)]
p=p+len(key+w)
c+=1
print(swp.join(o))
In a single line:
import re
re.sub("(?i)hello","bye", "hello HeLLo HELLO") #'bye bye bye'
re.sub("(?i)he.llo","bye", "he.llo He.LLo HE.LLO") #'bye bye bye'
Or, use the optional “flags” argument:
import re
re.sub("hello", "bye", "hello HeLLo HELLO", flags=re.I) #'bye bye bye'
re.sub("he.llo", "bye", "he.llo He.LLo HE.LLO", flags=re.I) #'bye bye bye'
Like Blair Conrad says string.replace doesn’t support this.
Use the regex re.sub
, but remember to escape the replacement string first. Note that there’s no flags-option in 2.6 for re.sub
, so you’ll have to use the embedded modifier '(?i)'
(or a RE-object, see Blair Conrad’s answer). Also, another pitfall is that sub will process backslash escapes in the replacement text, if a string is given. To avoid this one can instead pass in a lambda.
Here’s a function:
import re
def ireplace(old, repl, text):
return re.sub('(?i)'+re.escape(old), lambda m: repl, text)
>>> ireplace('hippo?', 'giraffe!?', 'You want a hiPPO?')
'You want a giraffe!?'
>>> ireplace(r'[binfolder]', r'C:Tempbin', r'[BinFolder]test.exe')
'C:\Temp\bin\test.exe'
I was having t being converted to the escape sequences (scroll a bit down), so I noted that re.sub converts backslashed escaped characters to escape sequences.
To prevent that I wrote the following:
Replace case insensitive.
import re
def ireplace(findtxt, replacetxt, data):
return replacetxt.join( re.compile(findtxt, flags=re.I).split(data) )
Also, if you want it to replace with the escape characters, like the other answers here that are getting the special meaning bashslash characters converted to escape sequences, just decode your find and, or replace string. In Python 3, might have to do something like .decode("unicode_escape") # python3
findtxt = findtxt.decode('string_escape') # python2
replacetxt = replacetxt.decode('string_escape') # python2
data = ireplace(findtxt, replacetxt, data)
Tested in Python 2.7.8
This function uses both the str.replace()
and re.findall()
functions.
It will replace all occurences of pattern
in string
with repl
in a case-insensitive way.
def replace_all(pattern, repl, string) -> str:
occurences = re.findall(pattern, string, re.IGNORECASE)
for occurence in occurences:
string = string.replace(occurence, repl)
return string
An interesting observation about syntax details and options:
# Python 3.7.2 (tags/v3.7.2:9a3ffc0492, Dec 23 2018, 23:09:28) [MSC v.1916 64 bit (AMD64)] on win32
>>> import re
>>> old = "TREEROOT treeroot TREerOot"
>>> re.sub(r'(?i)treeroot', 'grassroot', old)
'grassroot grassroot grassroot'
>>> re.sub(r'treeroot', 'grassroot', old)
'TREEROOT grassroot TREerOot'
>>> re.sub(r'treeroot', 'grassroot', old, flags=re.I)
'grassroot grassroot grassroot'
>>> re.sub(r'treeroot', 'grassroot', old, re.I)
'TREEROOT grassroot TREerOot'
Using the (?i)
prefix in the match expression or adding flags=re.I
as a fourth argument will result in a case-insensitive match – however using just re.I
as the fourth argument does not result in case-insensitive match.
For comparison:
>>> re.findall(r'treeroot', old, re.I)
['TREEROOT', 'treeroot', 'TREerOot']
>>> re.findall(r'treeroot', old)
['treeroot']
What’s the easiest way to do a case-insensitive string replacement in Python?
The string
type doesn’t support this. You’re probably best off using the regular expression sub method with the re.IGNORECASE option.
>>> import re
>>> insensitive_hippo = re.compile(re.escape('hippo'), re.IGNORECASE)
>>> insensitive_hippo.sub('giraffe', 'I want a hIPpo for my birthday')
'I want a giraffe for my birthday'
import re
pattern = re.compile("hello", re.IGNORECASE)
pattern.sub("bye", "hello HeLLo HELLO")
# 'bye bye bye'
This doesn’t require RegularExp
def ireplace(old, new, text):
"""
Replace case insensitive
Raises ValueError if string not found
"""
index_l = text.lower().index(old.lower())
return text[:index_l] + new + text[index_l + len(old):]
Continuing on bFloch’s answer, this function will change not one, but all occurrences of old with new – in a case insensitive fashion.
def ireplace(old, new, text):
idx = 0
while idx < len(text):
index_l = text.lower().find(old.lower(), idx)
if index_l == -1:
return text
text = text[:index_l] + new + text[index_l + len(old):]
idx = index_l + len(new)
return text
i='I want a hIPpo for my birthday'
key='hippo'
swp='giraffe'
o=(i.lower().split(key))
c=0
p=0
for w in o:
o[c]=i[p:p+len(w)]
p=p+len(key+w)
c+=1
print(swp.join(o))
In a single line:
import re
re.sub("(?i)hello","bye", "hello HeLLo HELLO") #'bye bye bye'
re.sub("(?i)he.llo","bye", "he.llo He.LLo HE.LLO") #'bye bye bye'
Or, use the optional “flags” argument:
import re
re.sub("hello", "bye", "hello HeLLo HELLO", flags=re.I) #'bye bye bye'
re.sub("he.llo", "bye", "he.llo He.LLo HE.LLO", flags=re.I) #'bye bye bye'
Like Blair Conrad says string.replace doesn’t support this.
Use the regex re.sub
, but remember to escape the replacement string first. Note that there’s no flags-option in 2.6 for re.sub
, so you’ll have to use the embedded modifier '(?i)'
(or a RE-object, see Blair Conrad’s answer). Also, another pitfall is that sub will process backslash escapes in the replacement text, if a string is given. To avoid this one can instead pass in a lambda.
Here’s a function:
import re
def ireplace(old, repl, text):
return re.sub('(?i)'+re.escape(old), lambda m: repl, text)
>>> ireplace('hippo?', 'giraffe!?', 'You want a hiPPO?')
'You want a giraffe!?'
>>> ireplace(r'[binfolder]', r'C:Tempbin', r'[BinFolder]test.exe')
'C:\Temp\bin\test.exe'
I was having t being converted to the escape sequences (scroll a bit down), so I noted that re.sub converts backslashed escaped characters to escape sequences.
To prevent that I wrote the following:
Replace case insensitive.
import re
def ireplace(findtxt, replacetxt, data):
return replacetxt.join( re.compile(findtxt, flags=re.I).split(data) )
Also, if you want it to replace with the escape characters, like the other answers here that are getting the special meaning bashslash characters converted to escape sequences, just decode your find and, or replace string. In Python 3, might have to do something like .decode("unicode_escape") # python3
findtxt = findtxt.decode('string_escape') # python2
replacetxt = replacetxt.decode('string_escape') # python2
data = ireplace(findtxt, replacetxt, data)
Tested in Python 2.7.8
This function uses both the str.replace()
and re.findall()
functions.
It will replace all occurences of pattern
in string
with repl
in a case-insensitive way.
def replace_all(pattern, repl, string) -> str:
occurences = re.findall(pattern, string, re.IGNORECASE)
for occurence in occurences:
string = string.replace(occurence, repl)
return string
An interesting observation about syntax details and options:
# Python 3.7.2 (tags/v3.7.2:9a3ffc0492, Dec 23 2018, 23:09:28) [MSC v.1916 64 bit (AMD64)] on win32
>>> import re
>>> old = "TREEROOT treeroot TREerOot"
>>> re.sub(r'(?i)treeroot', 'grassroot', old)
'grassroot grassroot grassroot'
>>> re.sub(r'treeroot', 'grassroot', old)
'TREEROOT grassroot TREerOot'
>>> re.sub(r'treeroot', 'grassroot', old, flags=re.I)
'grassroot grassroot grassroot'
>>> re.sub(r'treeroot', 'grassroot', old, re.I)
'TREEROOT grassroot TREerOot'
Using the (?i)
prefix in the match expression or adding flags=re.I
as a fourth argument will result in a case-insensitive match – however using just re.I
as the fourth argument does not result in case-insensitive match.
For comparison:
>>> re.findall(r'treeroot', old, re.I)
['TREEROOT', 'treeroot', 'TREerOot']
>>> re.findall(r'treeroot', old)
['treeroot']