IP URL Mapping in JSON log file

Question:

I have a JSON log file and want to print and count the number of times a URL(requestURL) has been hit by an IP in the same log file. The output should be like the below:

IP(remoteIp): URL1-(Count), URL2-(Count), URL3...
127.0.0.1: http://www.google.com - 12, www.bing.com/servlet-server.jsp - 2, etc..

The Sample of the Logfile is like below

"insertId": "kdkddkdmdkd",
"jsonPayload": {
  "@type": "type.googleapis.com/google.cloud.loadbalancing.type.LoadBalancerLogEntry",
  "enforcedSecurityPolicy": {
    "configuredAction": "DENY",
    "outcome": "DENY",
    "preconfiguredExprIds": [
      "owasp-crs-v030001-id942220-sqli"
    ],
    "name": "shbdbbddjdjdjd",
    "priority": 2000
  },
  "statusDetails": "body_denied_by_security_policy"
},
"httpRequest": {
  "requestMethod": "POST",
  "requestUrl": "https://dknnkkdkddkd/token",
  "requestSize": "3004",
  "status": 403,
  "responseSize": "274",
  "userAgent": "okhttp/3.12.2",
  "remoteIp": "127.0.0.1",
  "serverIp": "123.123.33.31",
  "latency": "0.018728s"
}

The solution that I am using is below. I am able to get the total hits per IP or how many total times a URL has been hit etc.

import json
from collections import Counter

unique_ip = {}
request_url = {}

def getAndSaveValueSafely(freqTable, searchDict, key):
    try:
        tmp = searchDict['httpRequest'][key]
        if tmp in freqTable:
            freqTable[tmp] += 1

        else:
            freqTable[tmp] = 1
    except KeyError:
        if 'not_present' in freqTable:
            freqTable['not_present'] += 1
        else:
            freqTable['not_present'] = 1
with open("threat_intel_1.json") as file:
    data = json.load(file)

for d2 in data:
    getAndSaveValueSafely(unique_ip, d2, 'remoteIp')

    getAndSaveValueSafely(request_url, d2, 'requestUrl')
mc_unique_ip = (dict(Counter(unique_ip).most_common()))
mc_request_url = (dict(Counter(request_url).most_common()))
def printing():
    a = str(len(unique_ip))
    b = str(len(request_url))
    with open("output.txt", "w") as f1:
        print(
            f' Start Time of log = {minTs}'
            f' nn End Time of log = {maxTs} nnn {a} Unique IP List = {mc_unique_ip} nnn {b} Unique URL = {mc_request_url},file=f1)
Asked By: Max_sh

||

Answers:

I dont think you need to use counter and are unlikely to see any benifit

from collections import defaultdict
result = {} # start empty

with open("threat_intel_1.json") as file:
    data = json.load(file)

for d2 in data:
    req = d2.get('httpRequest',None)
    if not req:
       continue
    url = req['requestUrl']
    ip = req['remoteIp']
    result.setdefault(url,defaultdict(int))[ip] += 1

print(result)
# {"/endpoint.html": {"127.2.3.4":15,"222.11.31.22":2}}

if instead you want it the other way thats easy also

for d2 in data:
    req = d2.get('httpRequest',None)
    if not req:
       continue
    url = req['requestUrl']
    ip = req['remoteIp']
    result.setdefault(ip,defaultdict(int))[url] += 1

#{"127.1.2.3",{"/endpoint1.html":15,"/endpoint2.php":1},"33.44.55.66":{"/endpoint1.html":5}, ...}

instead of using defaultdict you could add a line

   # result.setdefault(ip,defaultdict(int))[url] += 1
   result.setdefault(ip,{})
   result[ip][url] = result[ip].get(url,0) + 1

which arguably is more readable anyway…

Answered By: Joran Beasley