How can I use parenthesis while using the (?P<name>…) regular expression?

Question:

I want to specify something from the expression after the (?P<name>...) like:

(?P<name>d.+([a-z]))

but it is not allowing me. Is there a way to use the (?P<name>...) expression with parenthesis?
Sample text:

197.109.77.178 - kertzmann3129 [21/Jun/2019:15:45:25 -0700] "DELETE /virtual/solutions/target/web+services HTTP/2.0" 203 26554
156.127.178.177 - okuneva5222 [21/Jun/2019:15:45:27 -0700] "DELETE /interactive/transparent/niches/revolutionize HTTP/1.1" 416 14701
100.32.205.59 - ortiz8891 [21/Jun/2019:15:45:28 -0700] "PATCH /architectures HTTP/1.0" 204 6048
168.95.156.240 - stark2413 [21/Jun/2019:15:45:31 -0700] "GET /engage HTTP/2.0" 201 9645
71.172.239.195 - dooley1853 [21/Jun/2019:15:45:32 -0700] "PUT /cutting-edge HTTP/2.0" 406 24498
180.95.121.94 - mohr6893 [21/Jun/2019:15:45:34 -0700] "PATCH /extensible/reinvent HTTP/1.1" 201 27330
144.23.247.108 - auer7552 [21/Jun/2019:15:45:35 -0700] "POST /extensible/infrastructures/one-to-one/enterprise HTTP/1.1" 100 

Let’s say I only want the date part

z=re.finditer('(?P<date>[d+.+])',logdata)
        for l in z:
         print(l.groupdict()) 

this allows me to get the date but with the square brackets, how can I specify that I don’t want the square brackets with out using the brackets??
because if I use the brackets an error pops out

z=re.finditer('(?P<date>[(d+.+)])',logdata)
        for l in z:
         print(l.groupdict())

it just returns the date with square brackets and ignores my parenthesis here

import re
def logs():
    with open("assets/logdata.txt", "r") as file:
        logdata = file.read()
        k=re.finditer('(?P<request>".+")',logdata)
        for request in k:
            a=request.groupdict()
        q=re.finditer('(?P<time>d+/.+s-d+)',logdata)
        for time in q:
            b=time.groupdict()
        g=re.finditer('(?P<host>d+.d+.d+.d+)',logdata)
        for host in g:
            c=host.groupdict()
        f= re.finditer('(?P<user_name>s[a-z]+d+)',logdata)
        for user_name in f:
            d=user_name.groupdict()
        d.update(a)
        d.update(b)
        d.update(c)
        return d

that’s the code for the entire assignment.

Asked By: Mamdouh Dabjan

||

Answers:

use pattern (.*?) to match anything inside brackets.

import re

regex = r'[(.*?)]'

text = '''197.109.77.178 - kertzmann3129 [21/Jun/2019:15:45:25 -0700] "DELETE /virtual/solutions/target/web+services HTTP/2.0" 203 26554
156.127.178.177 - okuneva5222 [21/Jun/2019:15:45:27 -0700] "DELETE /interactive/transparent/niches/revolutionize HTTP/1.1" 416 14701
100.32.205.59 - ortiz8891 [21/Jun/2019:15:45:28 -0700] "PATCH /architectures HTTP/1.0" 204 6048
168.95.156.240 - stark2413 [21/Jun/2019:15:45:31 -0700] "GET /engage HTTP/2.0" 201 9645
71.172.239.195 - dooley1853 [21/Jun/2019:15:45:32 -0700] "PUT /cutting-edge HTTP/2.0" 406 24498
180.95.121.94 - mohr6893 [21/Jun/2019:15:45:34 -0700] "PATCH /extensible/reinvent HTTP/1.1" 201 27330
144.23.247.108 - auer7552 [21/Jun/2019:15:45:35 -0700] "POST /extensible/infrastructures/one-to-one/enterprise HTTP/1.1" 100'''

#print(re.findall(regex, text))

test = []
data = re.finditer(regex, text)
for i in data:
    test.append(i.group(1))
print(test)

>>> ['21/Jun/2019:15:45:25 -0700', '21/Jun/2019:15:45:27 -0700', '21/Jun/2019:15:45:28 -0700', '21/Jun/2019:15:45:31 -0700', '21/Jun/2019:15:45:32 -0700', '21/Jun/2019:15:45:34 -0700', '21/Jun/2019:15:45:35 -0700']

this will return all updated dict keys and values:

def logs():
    logdata = '''197.109.77.178 - kertzmann3129 [21/Jun/2019:15:45:25 -0700] "DELETE /virtual/solutions/target/web+services HTTP/2.0" 203 26554
156.127.178.177 - okuneva5222 [21/Jun/2019:15:45:27 -0700] "DELETE /interactive/transparent/niches/revolutionize HTTP/1.1" 416 14701
'''
    log_data = logdata.split('n')
    dict_lst = []
    for log in log_data:
        d = {}
        k = re.finditer('(?P<request>".+")', log)
        for request in k:
            a = request.groupdict()
            d.update(a)
        q = re.finditer('(?P<time>d+/.+s-d+)', logdata)
        for time in q:
            b = time.groupdict()
            d.update(b)
        g = re.finditer('(?P<host>d+.d+.d+.d+)', logdata)
        for host in g:
            c = host.groupdict()
            d.update(c)
        f = re.finditer('(?P<user_name>s[a-z]+d+)', logdata)
        for user_name in f:
            e = user_name.groupdict()
            d.update(e)
        dict_lst.append(d)
    return dict_lst
Answered By: Ramesh
Categories: questions Tags: ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.