Extracting info from customer requests using python regex

Question:

I have customer request log data as below: (showing one request as an example

req=['"Software   not   available   on  Software   Center,   when   tried   to   raise  AHO   for   required   software   it opens Software   Center   with   error   as  'This   Software   is not   available   for  you'    ,   Need   to   install   following software   for client   demo   urgently   -  nodejs,   intellij,   ,   angular,      mongo   db,  compass   ,   Java,  Open3DK   \n (from   10.61.107.166)   \n   Logged   through   #OneClickAHD#   \n   Contact:Ashutosh   Suresh   Mitkari,   STGPWnCTZPSAPR   \n   Email:   [email protected]"']

I need to extract:

  1. extract all text before (from and if not encountered return an empty list.
  2. extract ip address after from . Also strip any blank space. If pattern not found return empty list
  3. extract text between # #. If pattern not found return empty list
  4. extract the name after contact: till , . If pattern not found return empty list
  5. extract unit after say from example Contact:Ashutosh Suresh Mitkari,. expected answer=’STGPWnCTZPSAPR’. If pattern not found return empty list
  6. extract email after Email: . If pattern not found return empty list

save them in separate list as below:

initialize empty lists for each piece of information

request_list = []
ip_address_list = []
text_between_hashes_list = []
contact_name_list = []
unit_list = []
email_list = []

My try:

import re

for req in ahd_req:
    # extract till first n
    match = re.search(r'^(.*?)n', req)
    if match:
        print(match.group(1))
    
    # extract IP address after 'from'
    match = re.search(r'froms+([d.]+)', req)
    if match:
        print(match.group(1))
        
    # extract text between # #
    match = re.search(r'#(.*?)#', req)
    if match:
        print(match.group(1))
    
    # extract name after 'contact:' till ,
    match = re.search(r'Contact:([^,]*),', req)
    if match:
        print(match.group(1))
    
    # extract unit after Contact:Ramesh Najukrao Sangle,` till nn
    match = re.search(r'Contact:.*?,s*(.*?)nn', req)
    if match:
        print(match.group(1))

Not getting the required result/ Need help.

Asked By: Nishant

||

Answers:

I’m not entirely sure if I understood you correctly but I gave it crack. Here is the code:

import re

req = [
    '"Software   not   available   on  Software   Center,   when   tried   to   raise  AHO   for   required   software   it opens Software   Center   with   error   as  'This   Software   is not   available   for  you'    ,   Need   to   install   following software   for client   demo   urgently   -  nodejs,   intellij,   ,   angular,      mongo   db,  compass   ,   Java,  Open3DK   \n (from   10.61.107.166)   \n   Logged   through   #OneClickAHD#   \n   Contact:Ashutosh   Suresh   Mitkari,   STGPWnCTZPSAPR   \n   Email:   [email protected]"']

request_list = []
ip_address_list = []
text_between_hashes_list = []
contact_name_list = []
unit_list = []
email_list = []

for r in req:
    # Extract request
    request_match = re.search(r'^"(.*)"$', r)
    if request_match:
        request = request_match.group(1)
        request_list.append(request)
    else:
        request_list.append("")

    # Extract IP address
    ip_match = re.search(r'froms+(d{1,3}.d{1,3}.d{1,3}.d{1,3})', r)
    if ip_match:
        ip_address = ip_match.group(1).strip()
        ip_address_list.append(ip_address)
    else:
        ip_address_list.append("")

    # Extract text between #
    text_between_hashes_match = re.search(r'#(.*)#', r)
    if text_between_hashes_match:
        text_between_hashes = text_between_hashes_match.group(1)
        text_between_hashes_list.append(text_between_hashes)
    else:
        text_between_hashes_list.append("")

    # Extract contact name
    contact_name_match = re.search(r'Contact:(.*),', r)
    if contact_name_match:
        contact_name = contact_name_match.group(1).strip()
        contact_name_list.append(contact_name)
    else:
        contact_name_list.append("")

    # Extract unit
    unit_match = re.search(r'Contact:.*,s+(.*)s+\ns+', r)
    if unit_match:
        unit = unit_match.group(1).strip()
        unit_list.append(unit)
    else:
        unit_list.append("")

    # Extract email
    email_match = re.search(r'Email:s+(.*)"$', r)
    if email_match:
        email = email_match.group(1).strip()
        email_list.append(email)
    else:
        email_list.append("")

print(f"request_list: {request_list}")
print(f"ip_address_list: {ip_address_list}")
print(f"text_between_hashes_list: {text_between_hashes_list}")
print(f"contact_name_list: {contact_name_list}")
print(f"unit_list: {unit_list}")
print(f"email_list: {email_list}")
Answered By: DaneOH-89
import re

req = [
    '"Software   not   available   on  Software   Center,   when   tried   to   raise  AHO   for   required   software   it opens Software   Center   with   error   as  'This   Software   is not   available   for  you'    ,   Need   to   install   following software   for client   demo   urgently   -  nodejs,   intellij,   ,   angular,      mongo   db,  compass   ,   Java,  Open3DK   \n (from   10.61.107.166)   \n   Logged   through   #OneClickAHD#   \n   Contact:Ashutosh   Suresh   Mitkari,   STGPWnCTZPSAPR   \n   Email:   [email protected]"']

request_list = []
ip_address_list = []
text_between_hashes_list = []
contact_name_list = []
unit_list = []
email_list = []

for r in req:
    # Extract request
    request_match = re.search(r'^(.*)s+(from', r)
    if request_match:
        request = request_match.group(1)
        request_list.append(request)
    else:
        request_list.append("")

    # Extract IP address
    ip_match = re.search(r'froms+(d{1,3}.d{1,3}.d{1,3}.d{1,3})', r)
    if ip_match:
        ip_address = ip_match.group(1).strip()
        ip_address_list.append(ip_address)
    else:
        ip_address_list.append("")

    # Extract text between #
    text_between_hashes_match = re.search(r'#(.*)#', r)
    if text_between_hashes_match:
        text_between_hashes = text_between_hashes_match.group(1)
        text_between_hashes_list.append(text_between_hashes)
    else:
        text_between_hashes_list.append("")

    # Extract contact name
    contact_name_match = re.search(r'Contact:(.*),', r)
    if contact_name_match:
        contact_name = contact_name_match.group(1).strip()
        contact_name_list.append(contact_name)
    else:
        contact_name_list.append("")

    # Extract unit
    unit_match = re.search(r'Contact:.*,s+(.*)s+\ns+', r)
    if unit_match:
        unit = unit_match.group(1).strip()
        unit_list.append(unit)
    else:
        unit_list.append("")

    # Extract email
    email_match = re.search(r'Email:s+(.*)"$', r)
    if email_match:
        email = email_match.group(1).strip()
        email_list.append(email)
    else:
        email_list.append("")

print(f"request_list: {request_list}")
print(f"ip_address_list: {ip_address_list}")
print(f"text_between_hashes_list: {text_between_hashes_list}")
print(f"contact_name_list: {contact_name_list}")
print(f"unit_list: {unit_list}")
print(f"email_list: {email_list}")
Answered By: Deepak Gautam
Categories: questions Tags: ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.