Best pythonic way to merge consecutive upper case characters in a string python
Question:
I woud need assistance to find the best pythonic way to merge consecutive upper case characters in a string python
Example:
Input: You can pay N O W or Pay me Back MY Money later
Output: You can pay NOW or Pay me Back MY Money later
I am going with a very quick & dirty approach temporarily
s='lets P A Y N O W'
new_s = s
replace_maps = []
replace_str = ''
prev_cap = False
for i, c in enumerate(s):
if c == ' ':
continue
if c.isupper():
if prev_cap:
replace_str += c
else:
start = i
replace_str = c
prev_cap = True
else:
end = i
if prev_cap:
replace_maps.append([start, end, replace_str])
prev_cap = False
replace_str = ''
else:
end = i
if prev_cap:
replace_maps.append([start, end, replace_str])
prev_cap = False
replace_str = ''
new_s = s[:replace_maps[0][0]] + replace_maps[0][2] + s[replace_maps[0][1]:]
new_s
Output: lets PAYNOWW
Answers:
I don’t know what the most pythonic way could be. I can only tell you what I came up.
import re
def merge_cons_up(string):
pattern = re.compile(" [A-Z](?![a-zA-Z0-9_.-])")
sub_text = re.findall(pattern=pattern, string=string)
repl = "".join(sub_text).replace(" ", "")
sub = re.sub(pattern=pattern, string=string, repl=" " + repl, count=1)
final_string = re.sub(pattern=pattern, string=sub, repl="")
return final_string
print(merge_cons_up("I could not find a C O V I D patient in the hospital."))
Output:
I could not find a COVID patient in the hospital.
The best idea is to use Look-aheads ?=
and Look-behinds ?<=
and check for Upper case letters.
for more info on regex
this regex should make the job
import re
data = "I could not find a C O V I D patient in the hospital."
re.sub(r"(?<=[A-Z])s(?=[A-Z])", r'', data)
'I could not find a COVID patient in the hospital.'
EDIT
Regarding your new input after question modification
data = "You can pay N O W or Pay me Back MY Money later"
re.sub(r"(?<=[A-Z])s(?=[A-Z] )", r'', data)
output
'You can pay NOW or Pay me Back MY Money later'
without regex:
mystring = mystring.split(" ")
res = mystring[0]
for i in range(1, len(mystring)):
if not (mystring[i-1].isupper() and mystring[i].isupper()):
res+= " "
res += mystring[i]
I woud need assistance to find the best pythonic way to merge consecutive upper case characters in a string python
Example:
Input: You can pay N O W or Pay me Back MY Money later
Output: You can pay NOW or Pay me Back MY Money later
I am going with a very quick & dirty approach temporarily
s='lets P A Y N O W'
new_s = s
replace_maps = []
replace_str = ''
prev_cap = False
for i, c in enumerate(s):
if c == ' ':
continue
if c.isupper():
if prev_cap:
replace_str += c
else:
start = i
replace_str = c
prev_cap = True
else:
end = i
if prev_cap:
replace_maps.append([start, end, replace_str])
prev_cap = False
replace_str = ''
else:
end = i
if prev_cap:
replace_maps.append([start, end, replace_str])
prev_cap = False
replace_str = ''
new_s = s[:replace_maps[0][0]] + replace_maps[0][2] + s[replace_maps[0][1]:]
new_s
Output: lets PAYNOWW
I don’t know what the most pythonic way could be. I can only tell you what I came up.
import re
def merge_cons_up(string):
pattern = re.compile(" [A-Z](?![a-zA-Z0-9_.-])")
sub_text = re.findall(pattern=pattern, string=string)
repl = "".join(sub_text).replace(" ", "")
sub = re.sub(pattern=pattern, string=string, repl=" " + repl, count=1)
final_string = re.sub(pattern=pattern, string=sub, repl="")
return final_string
print(merge_cons_up("I could not find a C O V I D patient in the hospital."))
Output:
I could not find a COVID patient in the hospital.
The best idea is to use Look-aheads ?=
and Look-behinds ?<=
and check for Upper case letters.
for more info on regex
this regex should make the job
import re
data = "I could not find a C O V I D patient in the hospital."
re.sub(r"(?<=[A-Z])s(?=[A-Z])", r'', data)
'I could not find a COVID patient in the hospital.'
EDIT
Regarding your new input after question modification
data = "You can pay N O W or Pay me Back MY Money later"
re.sub(r"(?<=[A-Z])s(?=[A-Z] )", r'', data)
output
'You can pay NOW or Pay me Back MY Money later'
without regex:
mystring = mystring.split(" ")
res = mystring[0]
for i in range(1, len(mystring)):
if not (mystring[i-1].isupper() and mystring[i].isupper()):
res+= " "
res += mystring[i]