I have written a function containing regex to separate some special parts of a txt file. The code works fine but I would like to get a dictionary as an output from this and the length should be 979:
import re
def logs():
with open("C:/Users/ASUS/Desktop/logdata.txt", "r") as file:
logdata = file.read()
pattern = '''
(?P<host>\d{1,}\.\d{1,}\.\d{1,}\.\d{1,}) # host name
\s \S \s
(?P<user_name>(?<=-\s)(\w |-)(?=\s))\s \[ # user_name
(?P<time>([^[] ))\]\s " # time
(?P<request>[^"] )" # request
'''
for item in re.finditer(pattern, logdata, re.VERBOSE):
print(item.groupdict())
This function is supposed to turn a text like this:
146.204.224.152 - feest6811 [21/Jun/2019:15:45:24 -0700] "POST /incentivize HTTP/1.1" 302 4622
to this capturing host, user_name etc:
{"host":"146.204.224.152",
"user_name":"feest6811",
"time":"21/Jun/2019:15:45:24 -0700",
"request":"POST /incentivize HTTP/1.1"}
How can I do this?
CodePudding user response:
I managed to fix this issue by making the following modifications:
def logs():
with open("C:/Users/ASUS/Desktop/logdata.txt", "r") as file:
logdata = file.read()
pattern = '''
(?P<host>\d{1,}\.\d{1,}\.\d{1,}\.\d{1,}) # host name
\s \S \s
(?P<user_name>(?<=-\s)(\w |-)(?=\s))\s \[ # user_name
(?P<time>([^[] ))\]\s " # time
(?P<request>[^"] )" # request
'''
dict = {}
for item in re.finditer(pattern, logdata, re.VERBOSE):
dict[item] = item.groupdict()
return dict
type(logs())
<class 'dict'>
CodePudding user response:
is this the result you wanted?
import re
def logs():
with open("C:/Users/ASUS/Desktop/logdata.txt", "r") as file:
logdata = file.read()
pattern = '''
(?P<host>\d{1,}\.\d{1,}\.\d{1,}\.\d{1,}) # host name
\s \S \s
(?P<user_name>(?<=-\s)(\w |-)(?=\s))\s \[ # user_name
(?P<time>([^[] ))\]\s " # time
(?P<request>[^"] )" # request
'''
match_object = re.match(pattern, logdata)
details = match_object.groupdict()
print(details)
