Home > Back-end >  Scrapy doesn't go to the next url
Scrapy doesn't go to the next url

Time:01-13

the next_page variable gives the correct link when used on shell and even when printed on Console but Scrapy still keeps scraping the same(first) page

code below:

class QuotesSpider(scrapy.Spider):
    name = "Bider"
    def start_requests(self):
        urls = [
            "https://www.flipkart.com/clothing-and-accessories/bottomwear/pr?sid=clo,vua&p[]=facets.ideal_for%5B%5D=Men&p[]=facets.ideal_for%5B%5D=men&otracker=categorytree&fm=neo/merchandising&iid=M_1064313a-7a8d-48f3-8199-daaf60d62ef6_2_372UD5BXDFYS_MC.8HARX8UX7IX5&otracker=hp_rich_navigation_2_2.navigationCard.RICH_NAVIGATION_Fashion~Men%27s+Bottom+Wear_8HARX8UX7IX5&otracker1=hp_rich_navigation_PINNED_neo/merchandising_NA_NAV_EXPANDABLE_navigationCard_cc_2_L1_view-all&cid=8HARX8UX7IX5"
        ]
        for url in urls:
                yield scrapy.Request(url=url,callback=self.parse)

def parse(self, response):
        
        browser=webdriver.Chrome()
        
        browser.get(response.request.url) 

        next_page=response.css("a._1LKTO3::attr(href)").getall()
        try:
            next_page=next_page[-1]
        except:
            time.sleep(1)
            next_page=response.css("a._1LKTO3::attr(href)").getall()
            next_page=next_page[-1]
        print("\n\n\n NEXT PAGE\n\n\n")
        print("\n" next_page "\n")
        print(response.urljoin(next_page))
        if next_page is not None:
            next_page=response.urljoin(next_page)
            # yield scrapy.Request(url=next_page,callback=self.parse)

            yield scrapy.Request(next_page, callback=self.parse)

CodePudding user response:

Your code works for me so I'm not sure why it doesn't work for you. Anyway this pagination also works but it's cleaner.

import scrapy
from selenium import webdriver


class QuotesSpider(scrapy.Spider):
    name = "Bider"

    def start_requests(self):
        urls = [
            "https://www.flipkart.com/clothing-and-accessories/bottomwear/pr?sid=clo,vua&p[]=facets.ideal_for%5B%5D=Men&p[]=facets.ideal_for%5B%5D=men&otracker=categorytree&fm=neo/merchandising&iid=M_1064313a-7a8d-48f3-8199-daaf60d62ef6_2_372UD5BXDFYS_MC.8HARX8UX7IX5&otracker=hp_rich_navigation_2_2.navigationCard.RICH_NAVIGATION_Fashion~Men%27s+Bottom+Wear_8HARX8UX7IX5&otracker1=hp_rich_navigation_PINNED_neo/merchandising_NA_NAV_EXPANDABLE_navigationCard_cc_2_L1_view-all&cid=8HARX8UX7IX5"
        ]
        for url in urls:
            yield scrapy.Request(url=url, callback=self.parse)

    def parse(self, response):
        browser = webdriver.Chrome()
        browser.get(response.request.url)

        next_page = response.xpath('//a[span[text()="Next"]]/@href').get()
        
        if next_page:
            print("\n\n\n NEXT PAGE\n\n\n")
            print("\n" next_page "\n")
            next_page = response.urljoin(next_page)
            print(next_page)
            
            yield scrapy.Request(next_page, callback=self.parse)

pagination proof

  •  Tags:  
  • Related