Convert Logfile to CSV
Question:
I would like to request assistance with a Python script to convert a logfile into a csv file. I am looking for the Python code to do this. My input information looks as follows:
Exception: Cannot open file "C:ProgramDataPresenceLogpco_nhp01_CT_1800.log". Access is denied
Original message: [27/01/2023 12:37:44:675] TID:[14588]
ENTER FUNCTION
SetActive :: =
{
Value: True,
}
[27/01/2023 12:37:44:675] TID:[14588]
VERBOSE
GetServerIP : 10.10.10.155
[27/01/2023 12:37:44:691] TID:[14588]
LEAVE FUNCTION
SetActive :: =
{
Active: True,
}
[27/01/2023 12:37:44:694] TID:[14588]
ENTER FUNCTION
SetActive :: =
{
Value: True,
}
[27/01/2023 12:37:44:694] TID:[14588]
VERBOSE
GetServerIP : 10.10.10.155
[27/01/2023 12:37:44:703] TID:[14588]
LEAVE FUNCTION
SetActive :: =
{
Active: True,
}
[27/01/2023 12:37:44:703] TID:[14588]
ENTER FUNCTION
MonitorDevice :: =
{
Device: 201122,
}
[27/01/2023 12:37:44:707] TID:[7060]
ENTER FUNCTION
TEventsManager.AddEvent :: =
{
ACSTAEvent: CSTACONFIRMATION CSTAR_MONITORS_CON,
CTIRequestID: 2,
}
[27/01/2023 12:37:53:711] TID:[7060]
LEAVE FUNCTION
TEventsManager.AddEvent
Here is my code I tried to create to solve this challenge; however, I am missing something as the messages do not return the information correctly and actions I am not able to solve. Here is my attempt to try and resolve this challenge:
import csv
with open('pco_nhp01_CT_1800.log', 'r') as log_file:
log_data = log_file.readlines()
with open('logfile.csv', 'w', newline='') as csv_file:
writer = csv.writer(csv_file)
writer.writerow(['Datetime', 'TID', 'Message'])
for line in log_data:
if line.startswith('['):
parts = line.split(']')
datetime = parts[0][1:]
tid = parts[1][6:]
message = parts[2][1:]
writer.writerow([datetime, tid, message])
Answers:
I’d split the original by the timestamp regexp, then work from there:
data = r"""
[27/01/2023 12:37:44:675] TID:[14588]
ENTER FUNCTION
SetActive :: =
{
Value: True,
}
[27/01/2023 12:37:44:675] TID:[14588]
... etc... cut for brevity
"""
import re
timestamp_re = re.compile(r"(^[d{2}/d{2}/d{4} d{2}:d{2}:d{2}:d{3}])", re.MULTILINE)
split = timestamp_re.split(data.strip())
timestamps, data = split[1::2], split[2::2]
for timestamp, datum in zip(timestamps, data):
timestamp = timestamp.strip("[]")
datum_lines = datum.strip().splitlines()
tid_line = datum_lines.pop(0)
tid = tid_line[5:-1]
func = datum_lines.pop(0)
rest = " ".join(datum_lines)
print([timestamp, tid, func, rest])
This outputs
['27/01/2023 12:37:44:675', '14588', 'ENTER FUNCTION', 'SetActive :: = { Value: True, }']
['27/01/2023 12:37:44:675', '14588', 'VERBOSE', 'GetServerIP : 10.10.10.155']
['27/01/2023 12:37:44:691', '14588', 'LEAVE FUNCTION', 'SetActive :: = { Active: True, }']
['27/01/2023 12:37:44:694', '14588', 'ENTER FUNCTION', 'SetActive :: = { Value: True, }']
['27/01/2023 12:37:44:694', '14588', 'VERBOSE', 'GetServerIP : 10.10.10.155']
['27/01/2023 12:37:44:703', '14588', 'LEAVE FUNCTION', 'SetActive :: = { Active: True, }']
['27/01/2023 12:37:44:703', '14588', 'ENTER FUNCTION', 'MonitorDevice :: = { Device: 201122, }']
['27/01/2023 12:37:44:707', '7060', 'ENTER FUNCTION', 'TEventsManager.AddEvent :: = { ACSTAEvent: CSTACONFIRMATION CSTAR_MONITORS_CON, CTIRequestID: 2, }']
['27/01/2023 12:37:53:711', '7060', 'LEAVE FUNCTION', 'TEventsManager.AddEvent']
which is something you can easily csv.writerow
.
Below processes line-by-line, in case the logs are huge:
import re
import csv
with open('input.log') as fin, open('output.csv', 'w', newline='') as fout:
writer = csv.writer(fout)
writer.writerow('Datetime TID Action Message'.split())
for line in fin:
# read until a timestamp/TID line is found
m = re.search(r'[(.*?)] TID:[(d+)]', line)
if m:
dt, tid = m.groups()
action = next(fin).strip() # next line is action
message = [] # collect message lines, stop on blank line or end-of-file
while line := next(fin, '').strip():
message.append(line)
writer.writerow([dt, tid, action, ''.join(message)])
output.csv
Datetime,TID,Action,Message
27/01/2023 12:37:44:675,14588,ENTER FUNCTION,"SetActive :: ={Value: True,}"
27/01/2023 12:37:44:675,14588,VERBOSE,GetServerIP : 10.10.10.155
27/01/2023 12:37:44:691,14588,LEAVE FUNCTION,"SetActive :: ={Active: True,}"
27/01/2023 12:37:44:694,14588,ENTER FUNCTION,"SetActive :: ={Value: True,}"
27/01/2023 12:37:44:694,14588,VERBOSE,GetServerIP : 10.10.10.155
27/01/2023 12:37:44:703,14588,LEAVE FUNCTION,"SetActive :: ={Active: True,}"
27/01/2023 12:37:44:703,14588,ENTER FUNCTION,"MonitorDevice :: ={Device: 201122,}"
27/01/2023 12:37:44:707,7060,ENTER FUNCTION,"TEventsManager.AddEvent :: ={ACSTAEvent: CSTACONFIRMATION CSTAR_MONITORS_CON,CTIRequestID: 2,}"
27/01/2023 12:37:53:711,7060,LEAVE FUNCTION,TEventsManager.AddEvent
I would like to request assistance with a Python script to convert a logfile into a csv file. I am looking for the Python code to do this. My input information looks as follows:
Exception: Cannot open file "C:ProgramDataPresenceLogpco_nhp01_CT_1800.log". Access is denied
Original message: [27/01/2023 12:37:44:675] TID:[14588]
ENTER FUNCTION
SetActive :: =
{
Value: True,
}
[27/01/2023 12:37:44:675] TID:[14588]
VERBOSE
GetServerIP : 10.10.10.155
[27/01/2023 12:37:44:691] TID:[14588]
LEAVE FUNCTION
SetActive :: =
{
Active: True,
}
[27/01/2023 12:37:44:694] TID:[14588]
ENTER FUNCTION
SetActive :: =
{
Value: True,
}
[27/01/2023 12:37:44:694] TID:[14588]
VERBOSE
GetServerIP : 10.10.10.155
[27/01/2023 12:37:44:703] TID:[14588]
LEAVE FUNCTION
SetActive :: =
{
Active: True,
}
[27/01/2023 12:37:44:703] TID:[14588]
ENTER FUNCTION
MonitorDevice :: =
{
Device: 201122,
}
[27/01/2023 12:37:44:707] TID:[7060]
ENTER FUNCTION
TEventsManager.AddEvent :: =
{
ACSTAEvent: CSTACONFIRMATION CSTAR_MONITORS_CON,
CTIRequestID: 2,
}
[27/01/2023 12:37:53:711] TID:[7060]
LEAVE FUNCTION
TEventsManager.AddEvent
Here is my code I tried to create to solve this challenge; however, I am missing something as the messages do not return the information correctly and actions I am not able to solve. Here is my attempt to try and resolve this challenge:
import csv
with open('pco_nhp01_CT_1800.log', 'r') as log_file:
log_data = log_file.readlines()
with open('logfile.csv', 'w', newline='') as csv_file:
writer = csv.writer(csv_file)
writer.writerow(['Datetime', 'TID', 'Message'])
for line in log_data:
if line.startswith('['):
parts = line.split(']')
datetime = parts[0][1:]
tid = parts[1][6:]
message = parts[2][1:]
writer.writerow([datetime, tid, message])
I’d split the original by the timestamp regexp, then work from there:
data = r"""
[27/01/2023 12:37:44:675] TID:[14588]
ENTER FUNCTION
SetActive :: =
{
Value: True,
}
[27/01/2023 12:37:44:675] TID:[14588]
... etc... cut for brevity
"""
import re
timestamp_re = re.compile(r"(^[d{2}/d{2}/d{4} d{2}:d{2}:d{2}:d{3}])", re.MULTILINE)
split = timestamp_re.split(data.strip())
timestamps, data = split[1::2], split[2::2]
for timestamp, datum in zip(timestamps, data):
timestamp = timestamp.strip("[]")
datum_lines = datum.strip().splitlines()
tid_line = datum_lines.pop(0)
tid = tid_line[5:-1]
func = datum_lines.pop(0)
rest = " ".join(datum_lines)
print([timestamp, tid, func, rest])
This outputs
['27/01/2023 12:37:44:675', '14588', 'ENTER FUNCTION', 'SetActive :: = { Value: True, }']
['27/01/2023 12:37:44:675', '14588', 'VERBOSE', 'GetServerIP : 10.10.10.155']
['27/01/2023 12:37:44:691', '14588', 'LEAVE FUNCTION', 'SetActive :: = { Active: True, }']
['27/01/2023 12:37:44:694', '14588', 'ENTER FUNCTION', 'SetActive :: = { Value: True, }']
['27/01/2023 12:37:44:694', '14588', 'VERBOSE', 'GetServerIP : 10.10.10.155']
['27/01/2023 12:37:44:703', '14588', 'LEAVE FUNCTION', 'SetActive :: = { Active: True, }']
['27/01/2023 12:37:44:703', '14588', 'ENTER FUNCTION', 'MonitorDevice :: = { Device: 201122, }']
['27/01/2023 12:37:44:707', '7060', 'ENTER FUNCTION', 'TEventsManager.AddEvent :: = { ACSTAEvent: CSTACONFIRMATION CSTAR_MONITORS_CON, CTIRequestID: 2, }']
['27/01/2023 12:37:53:711', '7060', 'LEAVE FUNCTION', 'TEventsManager.AddEvent']
which is something you can easily csv.writerow
.
Below processes line-by-line, in case the logs are huge:
import re
import csv
with open('input.log') as fin, open('output.csv', 'w', newline='') as fout:
writer = csv.writer(fout)
writer.writerow('Datetime TID Action Message'.split())
for line in fin:
# read until a timestamp/TID line is found
m = re.search(r'[(.*?)] TID:[(d+)]', line)
if m:
dt, tid = m.groups()
action = next(fin).strip() # next line is action
message = [] # collect message lines, stop on blank line or end-of-file
while line := next(fin, '').strip():
message.append(line)
writer.writerow([dt, tid, action, ''.join(message)])
output.csv
Datetime,TID,Action,Message
27/01/2023 12:37:44:675,14588,ENTER FUNCTION,"SetActive :: ={Value: True,}"
27/01/2023 12:37:44:675,14588,VERBOSE,GetServerIP : 10.10.10.155
27/01/2023 12:37:44:691,14588,LEAVE FUNCTION,"SetActive :: ={Active: True,}"
27/01/2023 12:37:44:694,14588,ENTER FUNCTION,"SetActive :: ={Value: True,}"
27/01/2023 12:37:44:694,14588,VERBOSE,GetServerIP : 10.10.10.155
27/01/2023 12:37:44:703,14588,LEAVE FUNCTION,"SetActive :: ={Active: True,}"
27/01/2023 12:37:44:703,14588,ENTER FUNCTION,"MonitorDevice :: ={Device: 201122,}"
27/01/2023 12:37:44:707,7060,ENTER FUNCTION,"TEventsManager.AddEvent :: ={ACSTAEvent: CSTACONFIRMATION CSTAR_MONITORS_CON,CTIRequestID: 2,}"
27/01/2023 12:37:53:711,7060,LEAVE FUNCTION,TEventsManager.AddEvent