Getting a not subscriptable when running a web scraping script-CodePudding

I am practicing web scraping and am using this code. I am trying the for loop.

import requests
from bs4 import BeautifulSoup

name=[]
link=[]
address=[]
for i in range (1,11):
  i=str(i)
  url = "https://forum.iktva.sa/exhibitors-list?&page=" i "&searchgroup=37D5A2A4-exhibitors"
  soup = BeautifulSoup(requests.get(url).content, "html.parser")

  for a in soup.select(".m-exhibitors-list__items__item__header__title__link"):
      company_url = "https://forum.iktva.sa/"   a["href"].split("'")[1]

      soup2 = BeautifulSoup(requests.get(company_url).content, "html.parser")
      n=soup2.select_one(".m-exhibitor-entry__item__header__title").text

      l=soup2.select_one("h4 a")["href"]
      a=soup2.select_one(".m-exhibitor-entry__item__body__contacts__address").text
      name.append(n)
      link.append(l)
      address.append(a)

When I am running the program I am getting this error:

  l=soup2.select_one("h4 a")["href"]
TypeError: 'NoneType' object is not subscriptable

If i am not sure how to solve the problem.

CodePudding user response：

You just need to raplace, follwing code to Handle None

l = soup2.select_one("h4 a")
if l:
    l = l["href"]
else:
    l = "Website not available"

As you can see, Because website is not available for: https://forum.iktva.sa/exhibitors/sanad

OR you can handle all error like:

import requests
from bs4 import BeautifulSoup


def get_object(obj, attr=None):
    try:
        if attr:
            return obj[attr]
        else:
            return obj.text
    except:
        return "Not available"


name = []
link = []
address = []
for i in range(1, 11):
    i = str(i)
    url = f"https://forum.iktva.sa/exhibitors-list?&page={i}&searchgroup=37D5A2A4-exhibitors"
    soup = BeautifulSoup(requests.get(url).text, features="lxml")

    for a in soup.select(".m-exhibitors-list__items__item__header__title__link"):

        company_url = "https://forum.iktva.sa/"   a["href"].split("'")[1]
        soup2 = BeautifulSoup(requests.get(company_url).content, "html.parser")

        n = soup2.select_one(".m-exhibitor-entry__item__header__title").text
        n = get_object(n)

        l = soup2.select_one("h4 a")
        l = get_object(l, 'href')

        a = soup2.select_one(".m-exhibitor-entry__item__body__contacts__address")
        a = get_object(a)

        name.append(n)
        link.append(l)
        address.append(a)