How to parse this particular console output and make csv?
Question:
I need to process console output which looks like this and make a csv from it:
ID,FLAG,ADDRESS,MAC-ADDRESS,HOST-NAME,SERVER,STATUS,LAST-SEEN
0 10.0.0.11 00:1D:72:29:F2:4F lan waiting never
;;; test comment
1 10.0.0.19 00:13:21:15:D4:00 lan waiting never
2 10.0.0.10 00:60:6E:05:0C:E0 lan waiting never
3 D 10.0.1.199 24:E9:B3:20:FA:C7 home server1 bound 4h54m52s
4 D 100.64.1.197 E6:17:AE:21:EA:00 Suzana-s-A51 dhcp1 bound 2h16m45s
I have managed to split lines but regex is not working for tabs and spaces. Can someone point me in the right direction?
The code I am using is this:
import csv
import re
# Open the input file in read-only mode
with open('output.txt', 'r') as input_file:
# Open the output file in write-only mode
with open('output.csv', 'w') as output_file:
# Create a CSV writer that will write to the output file
writer = csv.writer(output_file)
# Read the first line of the input file (the header)
# and write it to the output file as a single value
# (i.e. do not split it on commas)
header = input_file.readline()
writer.writerow([header.strip()])
# Iterate over the remaining lines of the input file
for line in input_file:
# Ignore lines that start with ";;;" (these are comments)
if line.startswith(';;;'):
continue
# Split the line on newlines
values = line.split('n')
line = re.sub(r'[t ]+', ',', line)
# Iterate over the resulting values
for i, value in enumerate(values):
# If the value contains a comma, split it on commas
# and assign the resulting values to the `values` list
if ',' in value:
values[i:i+1] = value.split(',')
# Write the values to the output file
writer.writerow(values)
Answers:
The regular expression can be handy here, make a mask, and then take each value from line read.
you can refer the regex and will give you great visuals.
so for each line will put a regex reg_format=r"(d*?)(?:s+)(.*?)(?:s)(?:s*?)(w*.w*.w*.w*)(?:s*)(w*?:w*?:w*?:w*?:w*?:w*)(?:s*)(w*)(?:s*)(w*)(?:s*)(w*)"
pls note that when we write to csv using writer.writerow
it expects a list.
following would work for you, and you can tweak it as needed.
tweaked your code, and added the comments
Update:
Added masking for records
import csv
import re
#reg_format=r"(d*?)(?:s+)(.*?)(?:s)(?:s*?)(w*.w*.w*.w*)(?:s*)(w*?:w*?:w*?:w*?:w*?:w*)(?:s*)(w*)(?:s*)(w*)(?:s*)(w*)"
all_fields=r"(d*?)(?:s+)(.*?)(?:s)(?:s*?)(w*.w*.w*.w*)(?:s*)(w*?:w*?:w*?:w*?:w*?:w*)(?:s{1,2})([w-]{1,14})(?:s*?)(w+)(?:s*)(w+)(?:s*)(w*)(?:s*)(w*)"
all_fields_minus_host=r"(d*?)(?:s+)(.*?)(?:s)(?:s*?)(w*.w*.w*.w*)(?:s*)(w*?:w*?:w*?:w*?:w*?:w*)(?:s{1,})([w-]{1,14})(?:s*?)(w+)(?:s*)(w+)(?:s*)(w*)(?:s*)(w*)"
# Open the input file in read-only mode
with open('testreg.txt', 'r') as input_file:
# Open the output file in write-only mode
with open('output.csv', 'w') as output_file:
# Create a CSV writer that will write to the output file
writer = csv.writer(output_file)
# Read the first line of the input file (the header)
# and write it to the output file as a single value
# (i.e. do not split it on commas)
header = input_file.readline()
writer.writerow(header.split(',')) # split by "," as write row need list
#writer.writerow([header.strip()])
# Iterate over the remaining lines of the input file
for line in input_file:
# Ignore lines that start with ";;;" (these are comments)
if line.startswith(';;;'):
continue
#print(line)
gps=re.findall(all_fields,line)
if gps:
line_write=(['"'+gp+'"' for gp in list(gps[0])]) # if dont need quotes, put like gp for gp in list(gps[0])]
writer.writerow(line_write[:-1])
else:
gps=re.findall(all_fields_minus_host,line)
line_write=(['"'+gp+'"' for gp in list(gps[0])]) # if dont need quotes, put like gp for gp in list(gps[0])]
line_write.insert(4,'""')
writer.writerow(line_write[:-2])
#writer.writerow(line_write)
# commented below line
'''
# Split the line on newlines
values = line.split('n')
line = re.sub(r'[t ]+', ',', line)
# Iterate over the resulting values
for i, value in enumerate(values):
# If the value contains a comma, split it on commas
# and assign the resulting values to the `values` list
if ',' in value:
values[i:i+1] = value.split(',')
# Write the values to the output file
#writer.writerow(values)
'''
I need to process console output which looks like this and make a csv from it:
ID,FLAG,ADDRESS,MAC-ADDRESS,HOST-NAME,SERVER,STATUS,LAST-SEEN
0 10.0.0.11 00:1D:72:29:F2:4F lan waiting never
;;; test comment
1 10.0.0.19 00:13:21:15:D4:00 lan waiting never
2 10.0.0.10 00:60:6E:05:0C:E0 lan waiting never
3 D 10.0.1.199 24:E9:B3:20:FA:C7 home server1 bound 4h54m52s
4 D 100.64.1.197 E6:17:AE:21:EA:00 Suzana-s-A51 dhcp1 bound 2h16m45s
I have managed to split lines but regex is not working for tabs and spaces. Can someone point me in the right direction?
The code I am using is this:
import csv
import re
# Open the input file in read-only mode
with open('output.txt', 'r') as input_file:
# Open the output file in write-only mode
with open('output.csv', 'w') as output_file:
# Create a CSV writer that will write to the output file
writer = csv.writer(output_file)
# Read the first line of the input file (the header)
# and write it to the output file as a single value
# (i.e. do not split it on commas)
header = input_file.readline()
writer.writerow([header.strip()])
# Iterate over the remaining lines of the input file
for line in input_file:
# Ignore lines that start with ";;;" (these are comments)
if line.startswith(';;;'):
continue
# Split the line on newlines
values = line.split('n')
line = re.sub(r'[t ]+', ',', line)
# Iterate over the resulting values
for i, value in enumerate(values):
# If the value contains a comma, split it on commas
# and assign the resulting values to the `values` list
if ',' in value:
values[i:i+1] = value.split(',')
# Write the values to the output file
writer.writerow(values)
The regular expression can be handy here, make a mask, and then take each value from line read.
you can refer the regex and will give you great visuals.
so for each line will put a regex reg_format=r"(d*?)(?:s+)(.*?)(?:s)(?:s*?)(w*.w*.w*.w*)(?:s*)(w*?:w*?:w*?:w*?:w*?:w*)(?:s*)(w*)(?:s*)(w*)(?:s*)(w*)"
pls note that when we write to csv using writer.writerow
it expects a list.
following would work for you, and you can tweak it as needed.
tweaked your code, and added the comments
Update:
Added masking for records
import csv
import re
#reg_format=r"(d*?)(?:s+)(.*?)(?:s)(?:s*?)(w*.w*.w*.w*)(?:s*)(w*?:w*?:w*?:w*?:w*?:w*)(?:s*)(w*)(?:s*)(w*)(?:s*)(w*)"
all_fields=r"(d*?)(?:s+)(.*?)(?:s)(?:s*?)(w*.w*.w*.w*)(?:s*)(w*?:w*?:w*?:w*?:w*?:w*)(?:s{1,2})([w-]{1,14})(?:s*?)(w+)(?:s*)(w+)(?:s*)(w*)(?:s*)(w*)"
all_fields_minus_host=r"(d*?)(?:s+)(.*?)(?:s)(?:s*?)(w*.w*.w*.w*)(?:s*)(w*?:w*?:w*?:w*?:w*?:w*)(?:s{1,})([w-]{1,14})(?:s*?)(w+)(?:s*)(w+)(?:s*)(w*)(?:s*)(w*)"
# Open the input file in read-only mode
with open('testreg.txt', 'r') as input_file:
# Open the output file in write-only mode
with open('output.csv', 'w') as output_file:
# Create a CSV writer that will write to the output file
writer = csv.writer(output_file)
# Read the first line of the input file (the header)
# and write it to the output file as a single value
# (i.e. do not split it on commas)
header = input_file.readline()
writer.writerow(header.split(',')) # split by "," as write row need list
#writer.writerow([header.strip()])
# Iterate over the remaining lines of the input file
for line in input_file:
# Ignore lines that start with ";;;" (these are comments)
if line.startswith(';;;'):
continue
#print(line)
gps=re.findall(all_fields,line)
if gps:
line_write=(['"'+gp+'"' for gp in list(gps[0])]) # if dont need quotes, put like gp for gp in list(gps[0])]
writer.writerow(line_write[:-1])
else:
gps=re.findall(all_fields_minus_host,line)
line_write=(['"'+gp+'"' for gp in list(gps[0])]) # if dont need quotes, put like gp for gp in list(gps[0])]
line_write.insert(4,'""')
writer.writerow(line_write[:-2])
#writer.writerow(line_write)
# commented below line
'''
# Split the line on newlines
values = line.split('n')
line = re.sub(r'[t ]+', ',', line)
# Iterate over the resulting values
for i, value in enumerate(values):
# If the value contains a comma, split it on commas
# and assign the resulting values to the `values` list
if ',' in value:
values[i:i+1] = value.split(',')
# Write the values to the output file
#writer.writerow(values)
'''