hi there
i need to scrap bestbuy i am currently using scrapy i was able to get most of the data i need but however i had faced some problems trying to get the specification data section where UPC is. i was able to get features but that part i am not able to grab the data.
really appreciate your help this is my code
from scrapy import Spider
from bestbuy_spider.items import BestbuyProductItem
from scrapy import Request
import re
import json
class Bestbuy2Spider(Spider):
    name = 'bestbuy2'
    # allowed_domains = ['https://www.bestbuy.com']
    allowed_domains = ['bestbuy.com']
    # https://www.bestbuy.com/site/searchpage.jsp?cp=1&searchType=search&browsedCategory=pcmcat209400050001&ks=960&sp=-bestsellingsort%20skuidsaas&sc=Global&list=y&usc=All%20Categories&type=page&id=pcat17071&iht=n&nrp=15&seeAll=&st=categoryid%24pcmcat209400050001&qp=carrier_facet%3DCarrier~Verizon
    # start_urls = ['https://www.bestbuy.com/site/laptop-computers/all-laptops/pcmcat138500050001.c?id=pcmcat138500050001']
    start_urls = ['https://www.bestbuy.com/site/searchpage.jsp?id=pcat17071&qp=storepickupstores_facet%3DStore%20Availability%20-%20In%20Store%20Pickup~237&st=%2A']
    def parse(self, response):
        text = response.xpath('//div[@class="left-side"]/span/text()').extract_first()
        _, items_page, total = tuple(map(lambda x: int(x), re.findall('\d+',text)))
        num_pages = total // items_page
        #print('number of pages:', num_pages)
        urls = [
            'https://www.bestbuy.com/site/searchpage.jsp?cp={}&id=pcat17071&qp=storepickupstores_facet%3DStore%20Availability%20-%20In%20Store%20Pickup~237&st=%2A'.format(
                x) for x in range(1, num_pages + 1)]
        for url in urls[:1]:
            # product list page
            yield Request(url=url, callback=self.parse_product_list)
    def parse_product_list(self, response):
        # product list
        rows = response.xpath('//ol[@class="sku-item-list"]/li')
        # print(len(rows))
        # print('=' * 50)
        for row in rows:
            url = row.xpath('.//div[@class="sku-title"]/h4/a/@href').extract_first()
            print(url)
            yield Request(url='https://www.bestbuy.com' + str(url), callback=self.parse_product)
        #'//ul[@Class="thumbnail-list"]//@src'
    def parse_product(self, response):
        price_txt = response.xpath('//div[@class="pricing-price__regular-price"]/text()').extract_first()
        #reg_price = price_txt.replace('Was ', '')
        item = BestbuyProductItem(
            product = response.xpath('//div[@class="sku-title"]/h1/text()').extract_first(),
            #color = response.xpath('li[@class="image selected"]/div/a/@title').extract_first(),
            #skuId = response.xpath('//div[@class="sku product-data"]/span[2]/text()').extract_first(),
            #price = response.xpath('//div[@class="priceView-hero-price priceView-customer-price"]/span[1]/text()').extract_first(),
            #model = response.xpath('//div[@class="model product-data"]/span[2]/text()').extract_first(),
            #main_image = response.xpath('//img[@class="primary-image"]/@src').extract_first(),
            #images = response.xpath('//*[@class="thumbnail-list"]//img/@src').extract(),
            #description = response.xpath('//div[@class="long-description-container body-copy "]//div/text()').extract(),
            #features = response.xpath('//div[@class="list-row"]/p/text()').extract(),
            #regular_price = price_txt,
            Location = response.xpath('//div[@class="fulfillment-fulfillment-summary"]//div/p[1]/span/text()').extract()
        )
        yield item