Home > Back-end >  How to extract specific nested JSON value doing loop? (Python / Flask)
How to extract specific nested JSON value doing loop? (Python / Flask)

Time:01-13

{
   "127.0.0.1":{
      "addresses":{
         "ipv4":"127.0.0.1"
      },
      "hostnames":[
         {
            "name":"localhost",
            "type":"PTR"
         }
      ],
      "status":{
         "reason":"conn-refused",
         "state":"up"
      },
      "tcp":{
         "5000":{
            "conf":"10",
            "cpe":"cpe:/a:python:python:3.9.2",
            "extrainfo":"Python 3.9.2",
            "name":"http",
            "product":"Werkzeug httpd",
            "reason":"syn-ack",
            "script":{
               "vulners":"\n  cpe:/a:python:python:3.9.2: \n    \tCVE-2021-29921\t7.5\thttps://vulners.com/cve/CVE-2021-29921\n    \tCVE-2021-23336\t4.0\thttps://vulners.com/cve/CVE-2021-23336\n    \tMSF:ILITIES/DEBIAN-CVE-2021-3426/\t2.7\thttps://vulners.com/metasploit/MSF:ILITIES/DEBIAN-CVE-2021-3426/\t*EXPLOIT*\n    \tCVE-2021-3426\t2.7\thttps://vulners.com/cve/CVE-2021-3426"
            },
            "state":"open",
            "version":"1.0.1"
         },
         "6000":{
            "conf":"10",
            "cpe":"cpe:/a:python:python:3.9.2",
            "extrainfo":"Python 3.9.2",
            "name":"http",
            "product":"Werkzeug httpd",
            "reason":"syn-ack",
            "script":{
               "vulners":"\n  cpe:/a:python:python:3.9.2: \n    \tCVE-2021-29921\t7.5\thttps://vulners.com/cve/CVE-2021-29921\n    \tCVE-2021-23336\t4.0\thttps://vulners.com/cve/CVE-2021-23336\n    \tMSF:ILITIES/DEBIAN-CVE-2021-3426/\t2.7\thttps://vulners.com/metasploit/MSF:ILITIES/DEBIAN-CVE-2021-3426/\t*EXPLOIT*\n    \tCVE-2021-3426\t2.7\thttps://vulners.com/cve/CVE-2021-3426"
            },
            "state":"open",
            "version":"1.0.1"
         }
      },
      "vendor":{
         
      }
   }
}

I want to extract "vulners" value here i tried this -

    results = []
for x in collection.find({},{"scan": 1, "_id": 0 }):
    results.append(json.loads(json_util.dumps(x)))

portnumber = []
datay = []
datapro = []


for result in results:
    ips = result['scan']


for ip in ips:

        ports = result['scan'][ip]['tcp']
        ipdomain = result['scan'][ip]['hostnames']

        for ip2 in ipdomain:
            ip3 = ip2['name']

        for port in ports:
            portnumber.append(port)
            datax = ports[port]['script']
            datay.append(datax)
            datapro2 = ports[port]['product']
            datapro.append(datapro2)
            date = datetime.datetime.now()
            date_now = date.strftime("%x, %X")

        pass_json_var = {'domain': ip3, 'ports': portnumber, 'product': datapro, 'vulnerabilities': datay, "date": date_now}

        if isinstance(pass_json_var, list):
            domaindata.insert_many(pass_json_var)
        else:
            domaindata.insert_one(pass_json_var)

Ok so here if the "results" output gives me one "vulners" value then it works fine but when it's multiple ports with vulners values it doesn't work!

How can i access the 'vulners' value? Hoping for someone to guide me also a bit, Please try to give a solution which is dynamic

Thanks a lot!

CodePudding user response:

Model based approach

this approach is based on a model of your data you want to parse. From my point of view this is more work in the beginning. With the advantage, that you will have clean error messages and you can control the behaviour by adapting your data model.

  1. make a model of the data you want to parse
from typing import Any, Optional
from pydantic import BaseModel, Field

class ExScript(BaseModel):
    vulners:str = ""

class Ex30000(BaseModel):
    script:ExScript = Field(default=Any)
        
class ExTcp(BaseModel):
    root:Ex30000= Field(default=Any, alias="30000")
    
class ExRoot(BaseModel):
    tcp:ExTcp = Field() # Required
    
class Base(BaseModel):
    root:ExRoot = Field(default=Any, alias="35.0.0.0.0")

  1. change your input data to a raw string outherwise you will have to escape \n and \t
input_will_work = r"""{
  "35.0.0.0.0": {
    "hostnames": [
      {
        "name": "domain.com",
        "type": "PTR"
      }
    ],
    "addresses": {
      "ipv4": "35.0.0.0"
    },
    "vendor": {},
    "status": {
      "state": "up",
      "reason": "syn-ack"
    },
    "tcp": {
      "30000": {
        "state": "open",
        "reason": "syn-ack",
        "name": "http",
        "product": "nginx",
        "version": "1.20.0",
        "extrainfo": "",
        "conf": "10",
        "cpe": "cpe:/a:igor_sysoev:nginx:1.20.0",
        "script": {
          "http-server-header": "nginx/1.20.0",
          "vulners": "\n  cpe:/a:igor_sysoev:nginx:1.20.0: \n    \tNGINX:CVE-2021-23017\t6.8\thttps://vulners.com/nginx/NGINX:CVE-2021-23017\n    \t9A14990B-D52A-56B6-966C-6F35C8B8EB9D\t6.8\thttps://vulners.com/githubexploit/9A14990B-D52A-56B6-966C-6F35C8B8EB9D\t*EXPLOIT*\n    \t1337DAY-ID-36300\t6.8\thttps://vulners.com/zdt/1337DAY-ID-36300\t*EXPLOIT*\n    \tPACKETSTORM:162830\t0.0\thttps://vulners.com/packetstorm/PACKETSTORM:162830\t*EXPLOIT*"
        }
      }
    }
  }
}
"""

input_will_fail = r"""{
  "35.0.0.0.0": {}
}
"""

3.1 this should give you the expected result

obj1 = Base.parse_raw(input_will_work)
print(obj1.root.tcp.root.script.vulners)

3.2 this should throw an exception

obj2 = Base.parse_raw(input_will_fail)

Search data with jsonpath

should return all objects with the name vulners

from jsonpath_ng import jsonpath, parse
import json

obj = json.loads(input_will_work)
p = parse('$..vulners')
      
for match in p.find(obj):
    print(match.value)

CodePudding user response:

Update:


def extract_data(ip_address_data):
    domains = ip_address_data["hostnames"]
    ports_data = []
    # Each port can have different products and vulners
    # So that data is grouped together in a dictionary
    for port in ip_address_data["tcp"].keys():
        port_data = ip_address_data["tcp"][port]
        product = port_data["product"]
        vulners = port_data['script']['vulners']
        ports_data.append({
            "port": port,
            "product": product,
            "vulners": vulners
        })

    return {
        "domains": domains,
        "ports_data": ports_data
    }

# Result is the data from mongo db
# result = collection.find({})["scan"]
result = {
    "127.0.0.1": {
        "addresses": {
            "ipv4": "127.0.0.1"
        },
        "hostnames": [
            {
                "name": "localhost",
                "type": "PTR"
            }
        ],
        "status": {
            "reason": "conn-refused",
            "state": "up"
        },
        "tcp": {
            "5000": {
                "conf": "10",
                "cpe": "cpe:/a:python:python:3.9.2",
                "extrainfo": "Python 3.9.2",
                "name": "http",
                "product": "Werkzeug httpd",
                "reason": "syn-ack",
                "script": {
                    "vulners": "\n  cpe:/a:python:python:3.9.2: \n    \tCVE-2021-29921\t7.5\thttps://vulners.com/cve/CVE-2021-29921\n    \tCVE-2021-23336\t4.0\thttps://vulners.com/cve/CVE-2021-23336\n    \tMSF:ILITIES/DEBIAN-CVE-2021-3426/\t2.7\thttps://vulners.com/metasploit/MSF:ILITIES/DEBIAN-CVE-2021-3426/\t*EXPLOIT*\n    \tCVE-2021-3426\t2.7\thttps://vulners.com/cve/CVE-2021-3426"
                },
                "state": "open",
                "version": "1.0.1"
            },
            "6000": {
                "conf": "10",
                "cpe": "cpe:/a:python:python:3.9.2",
                "extrainfo": "Python 3.9.2",
                "name": "http",
                "product": "Werkzeug httpd",
                "reason": "syn-ack",
                "script": {
                    "vulners": "\n  cpe:/a:python:python:3.9.2: \n    \tCVE-2021-29921\t7.5\thttps://vulners.com/cve/CVE-2021-29921\n    \tCVE-2021-23336\t4.0\thttps://vulners.com/cve/CVE-2021-23336\n    \tMSF:ILITIES/DEBIAN-CVE-2021-3426/\t2.7\thttps://vulners.com/metasploit/MSF:ILITIES/DEBIAN-CVE-2021-3426/\t*EXPLOIT*\n    \tCVE-2021-3426\t2.7\thttps://vulners.com/cve/CVE-2021-3426"
                },
                "state": "open",
                "version": "1.0.1"
            }
        },
        "vendor": {

        }
    }
}


def scandata():
    for ip_address in result:
        ip_address_data = extract_data(
            result[ip_address]
        )

        print(ip_address, ip_address_data)


scandata()
  •  Tags:  
  • Related