I am trying to scrape a website, but it seems my slice of products it's empty.
scraper.go:
package scraper
import (
"fmt"
"strings"
"github.com/gocolly/colly"
"github.com/gocolly/colly/extensions"
)
type Product struct {
name string
fullPrice string
url string
}
func Scraper(site string) []Product {
products := []Product{}
c := colly.NewCollector()
replacer := strings.NewReplacer("R$", "", ",", ".")
c.OnHTML("div#column-main-content", func(e *colly.HTMLElement) {
fullPrice := e.ChildText("span.m7nrfa-0.eJCbzj.sc-ifAKCX.ANnoQ")
product := Product{
name: e.ChildText("h2"),
fullPrice: replacer.Replace(fullPrice),
url: e.ChildAttr("a.sc-1fcmfeb-2.iezWpY", "href"),
}
fmt.Println(product)
products = append(products, product)
})
fmt.Println(products)
c.OnRequest(func(r *colly.Request) {
fmt.Println("Visiting", r.URL)
})
c.OnError(func(r *colly.Response, err error) {
fmt.Println("Request URL:", r.Request.URL, "failed with response:", r.Request, "\nError:", err)
})
// Uses a random User-Agent in each request
extensions.RandomUserAgent(c)
c.Visit(site)
return products
}
main.go:
package main
import "github.com/Antonio-Costa00/Go-Price-Monitor/scraper"
func main() {
scraper.Scraper("https://sp.olx.com.br/?q=iphone'")
}
product variable has an output, but the slice is empty.
slice output:
[]
I don't know if I am doing something wrong when appending the result to products slice.
Can someone help me to check if I am doing something wrong to return an empty slice?
CodePudding user response:
The Colly library does the scarping Asynchronously, so when you print the products it is empty, but it will be filled in another goroutine. By using the OnScraped handler and printing the products there you should see it is filled.
package scraper
import (
"fmt"
"strings"
"github.com/gocolly/colly"
"github.com/gocolly/colly/extensions"
)
type Product struct {
name string
fullPrice string
url string
}
func Scraper(site string) []Product {
products := []Product{}
c := colly.NewCollector()
replacer := strings.NewReplacer("R$", "", ",", ".")
c.OnHTML("div#column-main-content", func(e *colly.HTMLElement) {
fullPrice := e.ChildText("span.m7nrfa-0.eJCbzj.sc-ifAKCX.ANnoQ")
product := Product{
name: e.ChildText("h2"),
fullPrice: replacer.Replace(fullPrice),
url: e.ChildAttr("a.sc-1fcmfeb-2.iezWpY", "href"),
}
fmt.Println(product)
products = append(products, product)
})
c.OnRequest(func(r *colly.Request) {
fmt.Println("Visiting", r.URL)
})
c.OnError(func(r *colly.Response, err error) {
fmt.Println("Request URL:", r.Request.URL, "failed with response:", r.Request, "\nError:", err)
})
c.OnScraped(func(r *colly.Response) {
fmt.Println(products)
})
// Uses a random User-Agent in each request
extensions.RandomUserAgent(c)
c.Visit(site)
return products
}
