I am trying to build a web scrape for Trustpilot, however, the code keeps returning empty data frames, I really can't figure out why. Could someone please help me? Thank you so so much. This is the output I get. I am a new beginner in Python, any help much appreciated.
Empty DataFrame Columns: [Title, Body, Rating, Date] Index: []
code:
import requests
from requests import get
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import json
import time
from time import sleep
from random import randint
#Not needed but I like a timer to see how long the code takes to run
then = time.time()
#Create empty lists to hold data
reviews = []
headings = []
stars = []
dates = []
#Set number of pages to scrape, you need to check on TrustPilot to see how many to scrape
#in this instance at the time of coding there were 287 pages to be scraped
# The first number 1 means start at 1, the number 287 means stop at 287
#the third number which is 1 means go from 1 to 287 in steps of 1
pages = np.arange(1, 10, 1)
#Create a loop to go over the reviews
for page in pages:
    page = requests.get("https://www.trustpilot.com/review/www.dugood.org" + "?page=" + str(page))
    soup = BeautifulSoup(page.text, "html.parser")
    #Set the tag we wish to start at, this is like a parent tag where we will go in and get everything below it`enter code here`
    review_div = soup.find_all('div', class_="review-content")
    #loop to iterate through each reviews
    for container in review_div:
        #Get the body of the review
        #If there is no review left by the user we will get a "-" returned by using 'if len(nv) == True else '-''
        #TrustPilot will add nothing if there is no review so there will be no tag for the code to scrape
        #It is saying if nv is True (we have a review) return the review or just put a - in
        #We now tell the code to go into the tag  'p' 'class' 'review-content__text'
        nv = container.find_all('p', attrs={'class': 'review-content__text'})
        review = container.p.text if len(nv) == True else '-'
        reviews.append(review)
        #Get the title of the review
        nv1 = container.find_all('h2', attrs={'class': 'review-content__title'})
        heading = container.a.text if len(nv1) == True else '-'
        headings.append(heading)
        #Get the star rating review given
        star = container.find("div", {"class":"star-rating star-rating--medium"}).find('img').get('alt')
        stars.append(star)
        #Get the date
        date_json = json.loads(container.find('script').text)
        date = date_json['publishedDate']
        dates.append(date)
TrustPilot = pd.DataFrame({'Title': headings, 'Body': reviews, 'Rating': stars, 'Date': dates})
print(TrustPilot)
