How can I use parenthesis while using the (?P<name>…) regular expression?
Question:
I want to specify something from the expression after the (?P<name>...)
like:
(?P<name>d.+([a-z]))
but it is not allowing me. Is there a way to use the (?P<name>...)
expression with parenthesis?
Sample text:
197.109.77.178 - kertzmann3129 [21/Jun/2019:15:45:25 -0700] "DELETE /virtual/solutions/target/web+services HTTP/2.0" 203 26554
156.127.178.177 - okuneva5222 [21/Jun/2019:15:45:27 -0700] "DELETE /interactive/transparent/niches/revolutionize HTTP/1.1" 416 14701
100.32.205.59 - ortiz8891 [21/Jun/2019:15:45:28 -0700] "PATCH /architectures HTTP/1.0" 204 6048
168.95.156.240 - stark2413 [21/Jun/2019:15:45:31 -0700] "GET /engage HTTP/2.0" 201 9645
71.172.239.195 - dooley1853 [21/Jun/2019:15:45:32 -0700] "PUT /cutting-edge HTTP/2.0" 406 24498
180.95.121.94 - mohr6893 [21/Jun/2019:15:45:34 -0700] "PATCH /extensible/reinvent HTTP/1.1" 201 27330
144.23.247.108 - auer7552 [21/Jun/2019:15:45:35 -0700] "POST /extensible/infrastructures/one-to-one/enterprise HTTP/1.1" 100
Let’s say I only want the date part
z=re.finditer('(?P<date>[d+.+])',logdata)
for l in z:
print(l.groupdict())
this allows me to get the date but with the square brackets, how can I specify that I don’t want the square brackets with out using the brackets??
because if I use the brackets an error pops out
z=re.finditer('(?P<date>[(d+.+)])',logdata)
for l in z:
print(l.groupdict())
it just returns the date with square brackets and ignores my parenthesis here
import re
def logs():
with open("assets/logdata.txt", "r") as file:
logdata = file.read()
k=re.finditer('(?P<request>".+")',logdata)
for request in k:
a=request.groupdict()
q=re.finditer('(?P<time>d+/.+s-d+)',logdata)
for time in q:
b=time.groupdict()
g=re.finditer('(?P<host>d+.d+.d+.d+)',logdata)
for host in g:
c=host.groupdict()
f= re.finditer('(?P<user_name>s[a-z]+d+)',logdata)
for user_name in f:
d=user_name.groupdict()
d.update(a)
d.update(b)
d.update(c)
return d
that’s the code for the entire assignment.
Answers:
use pattern (.*?)
to match anything inside brackets.
import re
regex = r'[(.*?)]'
text = '''197.109.77.178 - kertzmann3129 [21/Jun/2019:15:45:25 -0700] "DELETE /virtual/solutions/target/web+services HTTP/2.0" 203 26554
156.127.178.177 - okuneva5222 [21/Jun/2019:15:45:27 -0700] "DELETE /interactive/transparent/niches/revolutionize HTTP/1.1" 416 14701
100.32.205.59 - ortiz8891 [21/Jun/2019:15:45:28 -0700] "PATCH /architectures HTTP/1.0" 204 6048
168.95.156.240 - stark2413 [21/Jun/2019:15:45:31 -0700] "GET /engage HTTP/2.0" 201 9645
71.172.239.195 - dooley1853 [21/Jun/2019:15:45:32 -0700] "PUT /cutting-edge HTTP/2.0" 406 24498
180.95.121.94 - mohr6893 [21/Jun/2019:15:45:34 -0700] "PATCH /extensible/reinvent HTTP/1.1" 201 27330
144.23.247.108 - auer7552 [21/Jun/2019:15:45:35 -0700] "POST /extensible/infrastructures/one-to-one/enterprise HTTP/1.1" 100'''
#print(re.findall(regex, text))
test = []
data = re.finditer(regex, text)
for i in data:
test.append(i.group(1))
print(test)
>>> ['21/Jun/2019:15:45:25 -0700', '21/Jun/2019:15:45:27 -0700', '21/Jun/2019:15:45:28 -0700', '21/Jun/2019:15:45:31 -0700', '21/Jun/2019:15:45:32 -0700', '21/Jun/2019:15:45:34 -0700', '21/Jun/2019:15:45:35 -0700']
this will return all updated dict keys and values:
def logs():
logdata = '''197.109.77.178 - kertzmann3129 [21/Jun/2019:15:45:25 -0700] "DELETE /virtual/solutions/target/web+services HTTP/2.0" 203 26554
156.127.178.177 - okuneva5222 [21/Jun/2019:15:45:27 -0700] "DELETE /interactive/transparent/niches/revolutionize HTTP/1.1" 416 14701
'''
log_data = logdata.split('n')
dict_lst = []
for log in log_data:
d = {}
k = re.finditer('(?P<request>".+")', log)
for request in k:
a = request.groupdict()
d.update(a)
q = re.finditer('(?P<time>d+/.+s-d+)', logdata)
for time in q:
b = time.groupdict()
d.update(b)
g = re.finditer('(?P<host>d+.d+.d+.d+)', logdata)
for host in g:
c = host.groupdict()
d.update(c)
f = re.finditer('(?P<user_name>s[a-z]+d+)', logdata)
for user_name in f:
e = user_name.groupdict()
d.update(e)
dict_lst.append(d)
return dict_lst
I want to specify something from the expression after the (?P<name>...)
like:
(?P<name>d.+([a-z]))
but it is not allowing me. Is there a way to use the (?P<name>...)
expression with parenthesis?
Sample text:
197.109.77.178 - kertzmann3129 [21/Jun/2019:15:45:25 -0700] "DELETE /virtual/solutions/target/web+services HTTP/2.0" 203 26554
156.127.178.177 - okuneva5222 [21/Jun/2019:15:45:27 -0700] "DELETE /interactive/transparent/niches/revolutionize HTTP/1.1" 416 14701
100.32.205.59 - ortiz8891 [21/Jun/2019:15:45:28 -0700] "PATCH /architectures HTTP/1.0" 204 6048
168.95.156.240 - stark2413 [21/Jun/2019:15:45:31 -0700] "GET /engage HTTP/2.0" 201 9645
71.172.239.195 - dooley1853 [21/Jun/2019:15:45:32 -0700] "PUT /cutting-edge HTTP/2.0" 406 24498
180.95.121.94 - mohr6893 [21/Jun/2019:15:45:34 -0700] "PATCH /extensible/reinvent HTTP/1.1" 201 27330
144.23.247.108 - auer7552 [21/Jun/2019:15:45:35 -0700] "POST /extensible/infrastructures/one-to-one/enterprise HTTP/1.1" 100
Let’s say I only want the date part
z=re.finditer('(?P<date>[d+.+])',logdata)
for l in z:
print(l.groupdict())
this allows me to get the date but with the square brackets, how can I specify that I don’t want the square brackets with out using the brackets??
because if I use the brackets an error pops out
z=re.finditer('(?P<date>[(d+.+)])',logdata)
for l in z:
print(l.groupdict())
it just returns the date with square brackets and ignores my parenthesis here
import re
def logs():
with open("assets/logdata.txt", "r") as file:
logdata = file.read()
k=re.finditer('(?P<request>".+")',logdata)
for request in k:
a=request.groupdict()
q=re.finditer('(?P<time>d+/.+s-d+)',logdata)
for time in q:
b=time.groupdict()
g=re.finditer('(?P<host>d+.d+.d+.d+)',logdata)
for host in g:
c=host.groupdict()
f= re.finditer('(?P<user_name>s[a-z]+d+)',logdata)
for user_name in f:
d=user_name.groupdict()
d.update(a)
d.update(b)
d.update(c)
return d
that’s the code for the entire assignment.
use pattern (.*?)
to match anything inside brackets.
import re
regex = r'[(.*?)]'
text = '''197.109.77.178 - kertzmann3129 [21/Jun/2019:15:45:25 -0700] "DELETE /virtual/solutions/target/web+services HTTP/2.0" 203 26554
156.127.178.177 - okuneva5222 [21/Jun/2019:15:45:27 -0700] "DELETE /interactive/transparent/niches/revolutionize HTTP/1.1" 416 14701
100.32.205.59 - ortiz8891 [21/Jun/2019:15:45:28 -0700] "PATCH /architectures HTTP/1.0" 204 6048
168.95.156.240 - stark2413 [21/Jun/2019:15:45:31 -0700] "GET /engage HTTP/2.0" 201 9645
71.172.239.195 - dooley1853 [21/Jun/2019:15:45:32 -0700] "PUT /cutting-edge HTTP/2.0" 406 24498
180.95.121.94 - mohr6893 [21/Jun/2019:15:45:34 -0700] "PATCH /extensible/reinvent HTTP/1.1" 201 27330
144.23.247.108 - auer7552 [21/Jun/2019:15:45:35 -0700] "POST /extensible/infrastructures/one-to-one/enterprise HTTP/1.1" 100'''
#print(re.findall(regex, text))
test = []
data = re.finditer(regex, text)
for i in data:
test.append(i.group(1))
print(test)
>>> ['21/Jun/2019:15:45:25 -0700', '21/Jun/2019:15:45:27 -0700', '21/Jun/2019:15:45:28 -0700', '21/Jun/2019:15:45:31 -0700', '21/Jun/2019:15:45:32 -0700', '21/Jun/2019:15:45:34 -0700', '21/Jun/2019:15:45:35 -0700']
this will return all updated dict keys and values:
def logs():
logdata = '''197.109.77.178 - kertzmann3129 [21/Jun/2019:15:45:25 -0700] "DELETE /virtual/solutions/target/web+services HTTP/2.0" 203 26554
156.127.178.177 - okuneva5222 [21/Jun/2019:15:45:27 -0700] "DELETE /interactive/transparent/niches/revolutionize HTTP/1.1" 416 14701
'''
log_data = logdata.split('n')
dict_lst = []
for log in log_data:
d = {}
k = re.finditer('(?P<request>".+")', log)
for request in k:
a = request.groupdict()
d.update(a)
q = re.finditer('(?P<time>d+/.+s-d+)', logdata)
for time in q:
b = time.groupdict()
d.update(b)
g = re.finditer('(?P<host>d+.d+.d+.d+)', logdata)
for host in g:
c = host.groupdict()
d.update(c)
f = re.finditer('(?P<user_name>s[a-z]+d+)', logdata)
for user_name in f:
e = user_name.groupdict()
d.update(e)
dict_lst.append(d)
return dict_lst