Extracting info from customer requests using python regex
Question:
I have customer request log data as below: (showing one request as an example
req=['"Software not available on Software Center, when tried to raise AHO for required software it opens Software Center with error as 'This Software is not available for you' , Need to install following software for client demo urgently - nodejs, intellij, , angular, mongo db, compass , Java, Open3DK \n (from 10.61.107.166) \n Logged through #OneClickAHD# \n Contact:Ashutosh Suresh Mitkari, STGPWnCTZPSAPR \n Email: [email protected]"']
I need to extract:
- extract all text before
(from
and if not encountered return an empty list.
- extract ip address after
from
. Also strip any blank space. If pattern not found return empty list
- extract text between
# #
. If pattern not found return empty list
- extract the name after
contact:
till ,
. If pattern not found return empty list
- extract unit after say from example
Contact:Ashutosh Suresh Mitkari,
. expected answer=’STGPWnCTZPSAPR’. If pattern not found return empty list
- extract email after
Email:
. If pattern not found return empty list
save them in separate list as below:
initialize empty lists for each piece of information
request_list = []
ip_address_list = []
text_between_hashes_list = []
contact_name_list = []
unit_list = []
email_list = []
My try:
import re
for req in ahd_req:
# extract till first n
match = re.search(r'^(.*?)n', req)
if match:
print(match.group(1))
# extract IP address after 'from'
match = re.search(r'froms+([d.]+)', req)
if match:
print(match.group(1))
# extract text between # #
match = re.search(r'#(.*?)#', req)
if match:
print(match.group(1))
# extract name after 'contact:' till ,
match = re.search(r'Contact:([^,]*),', req)
if match:
print(match.group(1))
# extract unit after Contact:Ramesh Najukrao Sangle,` till nn
match = re.search(r'Contact:.*?,s*(.*?)nn', req)
if match:
print(match.group(1))
Not getting the required result/ Need help.
Answers:
I’m not entirely sure if I understood you correctly but I gave it crack. Here is the code:
import re
req = [
'"Software not available on Software Center, when tried to raise AHO for required software it opens Software Center with error as 'This Software is not available for you' , Need to install following software for client demo urgently - nodejs, intellij, , angular, mongo db, compass , Java, Open3DK \n (from 10.61.107.166) \n Logged through #OneClickAHD# \n Contact:Ashutosh Suresh Mitkari, STGPWnCTZPSAPR \n Email: [email protected]"']
request_list = []
ip_address_list = []
text_between_hashes_list = []
contact_name_list = []
unit_list = []
email_list = []
for r in req:
# Extract request
request_match = re.search(r'^"(.*)"$', r)
if request_match:
request = request_match.group(1)
request_list.append(request)
else:
request_list.append("")
# Extract IP address
ip_match = re.search(r'froms+(d{1,3}.d{1,3}.d{1,3}.d{1,3})', r)
if ip_match:
ip_address = ip_match.group(1).strip()
ip_address_list.append(ip_address)
else:
ip_address_list.append("")
# Extract text between #
text_between_hashes_match = re.search(r'#(.*)#', r)
if text_between_hashes_match:
text_between_hashes = text_between_hashes_match.group(1)
text_between_hashes_list.append(text_between_hashes)
else:
text_between_hashes_list.append("")
# Extract contact name
contact_name_match = re.search(r'Contact:(.*),', r)
if contact_name_match:
contact_name = contact_name_match.group(1).strip()
contact_name_list.append(contact_name)
else:
contact_name_list.append("")
# Extract unit
unit_match = re.search(r'Contact:.*,s+(.*)s+\ns+', r)
if unit_match:
unit = unit_match.group(1).strip()
unit_list.append(unit)
else:
unit_list.append("")
# Extract email
email_match = re.search(r'Email:s+(.*)"$', r)
if email_match:
email = email_match.group(1).strip()
email_list.append(email)
else:
email_list.append("")
print(f"request_list: {request_list}")
print(f"ip_address_list: {ip_address_list}")
print(f"text_between_hashes_list: {text_between_hashes_list}")
print(f"contact_name_list: {contact_name_list}")
print(f"unit_list: {unit_list}")
print(f"email_list: {email_list}")
import re
req = [
'"Software not available on Software Center, when tried to raise AHO for required software it opens Software Center with error as 'This Software is not available for you' , Need to install following software for client demo urgently - nodejs, intellij, , angular, mongo db, compass , Java, Open3DK \n (from 10.61.107.166) \n Logged through #OneClickAHD# \n Contact:Ashutosh Suresh Mitkari, STGPWnCTZPSAPR \n Email: [email protected]"']
request_list = []
ip_address_list = []
text_between_hashes_list = []
contact_name_list = []
unit_list = []
email_list = []
for r in req:
# Extract request
request_match = re.search(r'^(.*)s+(from', r)
if request_match:
request = request_match.group(1)
request_list.append(request)
else:
request_list.append("")
# Extract IP address
ip_match = re.search(r'froms+(d{1,3}.d{1,3}.d{1,3}.d{1,3})', r)
if ip_match:
ip_address = ip_match.group(1).strip()
ip_address_list.append(ip_address)
else:
ip_address_list.append("")
# Extract text between #
text_between_hashes_match = re.search(r'#(.*)#', r)
if text_between_hashes_match:
text_between_hashes = text_between_hashes_match.group(1)
text_between_hashes_list.append(text_between_hashes)
else:
text_between_hashes_list.append("")
# Extract contact name
contact_name_match = re.search(r'Contact:(.*),', r)
if contact_name_match:
contact_name = contact_name_match.group(1).strip()
contact_name_list.append(contact_name)
else:
contact_name_list.append("")
# Extract unit
unit_match = re.search(r'Contact:.*,s+(.*)s+\ns+', r)
if unit_match:
unit = unit_match.group(1).strip()
unit_list.append(unit)
else:
unit_list.append("")
# Extract email
email_match = re.search(r'Email:s+(.*)"$', r)
if email_match:
email = email_match.group(1).strip()
email_list.append(email)
else:
email_list.append("")
print(f"request_list: {request_list}")
print(f"ip_address_list: {ip_address_list}")
print(f"text_between_hashes_list: {text_between_hashes_list}")
print(f"contact_name_list: {contact_name_list}")
print(f"unit_list: {unit_list}")
print(f"email_list: {email_list}")
I have customer request log data as below: (showing one request as an example
req=['"Software not available on Software Center, when tried to raise AHO for required software it opens Software Center with error as 'This Software is not available for you' , Need to install following software for client demo urgently - nodejs, intellij, , angular, mongo db, compass , Java, Open3DK \n (from 10.61.107.166) \n Logged through #OneClickAHD# \n Contact:Ashutosh Suresh Mitkari, STGPWnCTZPSAPR \n Email: [email protected]"']
I need to extract:
- extract all text before
(from
and if not encountered return an empty list. - extract ip address after
from
. Also strip any blank space. If pattern not found return empty list - extract text between
# #
. If pattern not found return empty list - extract the name after
contact:
till ,
. If pattern not found return empty list - extract unit after say from example
Contact:Ashutosh Suresh Mitkari,
. expected answer=’STGPWnCTZPSAPR’. If pattern not found return empty list - extract email after
Email:
. If pattern not found return empty list
save them in separate list as below:
initialize empty lists for each piece of information
request_list = []
ip_address_list = []
text_between_hashes_list = []
contact_name_list = []
unit_list = []
email_list = []
My try:
import re
for req in ahd_req:
# extract till first n
match = re.search(r'^(.*?)n', req)
if match:
print(match.group(1))
# extract IP address after 'from'
match = re.search(r'froms+([d.]+)', req)
if match:
print(match.group(1))
# extract text between # #
match = re.search(r'#(.*?)#', req)
if match:
print(match.group(1))
# extract name after 'contact:' till ,
match = re.search(r'Contact:([^,]*),', req)
if match:
print(match.group(1))
# extract unit after Contact:Ramesh Najukrao Sangle,` till nn
match = re.search(r'Contact:.*?,s*(.*?)nn', req)
if match:
print(match.group(1))
Not getting the required result/ Need help.
I’m not entirely sure if I understood you correctly but I gave it crack. Here is the code:
import re
req = [
'"Software not available on Software Center, when tried to raise AHO for required software it opens Software Center with error as 'This Software is not available for you' , Need to install following software for client demo urgently - nodejs, intellij, , angular, mongo db, compass , Java, Open3DK \n (from 10.61.107.166) \n Logged through #OneClickAHD# \n Contact:Ashutosh Suresh Mitkari, STGPWnCTZPSAPR \n Email: [email protected]"']
request_list = []
ip_address_list = []
text_between_hashes_list = []
contact_name_list = []
unit_list = []
email_list = []
for r in req:
# Extract request
request_match = re.search(r'^"(.*)"$', r)
if request_match:
request = request_match.group(1)
request_list.append(request)
else:
request_list.append("")
# Extract IP address
ip_match = re.search(r'froms+(d{1,3}.d{1,3}.d{1,3}.d{1,3})', r)
if ip_match:
ip_address = ip_match.group(1).strip()
ip_address_list.append(ip_address)
else:
ip_address_list.append("")
# Extract text between #
text_between_hashes_match = re.search(r'#(.*)#', r)
if text_between_hashes_match:
text_between_hashes = text_between_hashes_match.group(1)
text_between_hashes_list.append(text_between_hashes)
else:
text_between_hashes_list.append("")
# Extract contact name
contact_name_match = re.search(r'Contact:(.*),', r)
if contact_name_match:
contact_name = contact_name_match.group(1).strip()
contact_name_list.append(contact_name)
else:
contact_name_list.append("")
# Extract unit
unit_match = re.search(r'Contact:.*,s+(.*)s+\ns+', r)
if unit_match:
unit = unit_match.group(1).strip()
unit_list.append(unit)
else:
unit_list.append("")
# Extract email
email_match = re.search(r'Email:s+(.*)"$', r)
if email_match:
email = email_match.group(1).strip()
email_list.append(email)
else:
email_list.append("")
print(f"request_list: {request_list}")
print(f"ip_address_list: {ip_address_list}")
print(f"text_between_hashes_list: {text_between_hashes_list}")
print(f"contact_name_list: {contact_name_list}")
print(f"unit_list: {unit_list}")
print(f"email_list: {email_list}")
import re
req = [
'"Software not available on Software Center, when tried to raise AHO for required software it opens Software Center with error as 'This Software is not available for you' , Need to install following software for client demo urgently - nodejs, intellij, , angular, mongo db, compass , Java, Open3DK \n (from 10.61.107.166) \n Logged through #OneClickAHD# \n Contact:Ashutosh Suresh Mitkari, STGPWnCTZPSAPR \n Email: [email protected]"']
request_list = []
ip_address_list = []
text_between_hashes_list = []
contact_name_list = []
unit_list = []
email_list = []
for r in req:
# Extract request
request_match = re.search(r'^(.*)s+(from', r)
if request_match:
request = request_match.group(1)
request_list.append(request)
else:
request_list.append("")
# Extract IP address
ip_match = re.search(r'froms+(d{1,3}.d{1,3}.d{1,3}.d{1,3})', r)
if ip_match:
ip_address = ip_match.group(1).strip()
ip_address_list.append(ip_address)
else:
ip_address_list.append("")
# Extract text between #
text_between_hashes_match = re.search(r'#(.*)#', r)
if text_between_hashes_match:
text_between_hashes = text_between_hashes_match.group(1)
text_between_hashes_list.append(text_between_hashes)
else:
text_between_hashes_list.append("")
# Extract contact name
contact_name_match = re.search(r'Contact:(.*),', r)
if contact_name_match:
contact_name = contact_name_match.group(1).strip()
contact_name_list.append(contact_name)
else:
contact_name_list.append("")
# Extract unit
unit_match = re.search(r'Contact:.*,s+(.*)s+\ns+', r)
if unit_match:
unit = unit_match.group(1).strip()
unit_list.append(unit)
else:
unit_list.append("")
# Extract email
email_match = re.search(r'Email:s+(.*)"$', r)
if email_match:
email = email_match.group(1).strip()
email_list.append(email)
else:
email_list.append("")
print(f"request_list: {request_list}")
print(f"ip_address_list: {ip_address_list}")
print(f"text_between_hashes_list: {text_between_hashes_list}")
print(f"contact_name_list: {contact_name_list}")
print(f"unit_list: {unit_list}")
print(f"email_list: {email_list}")