Advertisement
rcod3r

Untitled

Apr 29th, 2024
571
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.86 KB | Source Code | 0 0
  1. import scrapy
  2. import json
  3.  
  4. class OlxHouses(scrapy.Spider):
  5.     name = 'olx'
  6.  
  7.     custom_settings = {
  8.         'USER_AGENT' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
  9.         'AUTOTHROTTLE_ENABLED': True,
  10.     }
  11.  
  12.     def start_requests(self):
  13.         for page in range(1,101):
  14.             yield scrapy.Request(f'https://www.olx.com.br/eletronicos-e-celulares/estado-sp?o={page}')
  15.  
  16.     def parse(self, response, **kwargs):
  17.         html = json.loads(response.xpath('//script[@id="__NEXT_DATA__"]/text()').get())
  18.         houses = html.get('props').get('pageProps').get('ads')
  19.         for house in houses:
  20.             yield{
  21.                 'title' : house.get('title'),
  22.                 'price' : house.get('price'),
  23.                 'locations' : house.get('location')
  24.             }
  25.  
  26.  
  27.  
Tags: python
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement