I'm trying to scrape https://arxiv.org/search/?query=healthcare&searchtype=allI through the Selenium and python. The for loop takes too long to execute. I tried to scrape with headless browsers and PhantomJS, but it doesnt scrape the abstract field (Need the abstract field expanded with the more button clicked)
import pandas as pd
import selenium
import re
import time
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver import Firefox
browser = Firefox()
url_healthcare = 'https://arxiv.org/search/?query=healthcare&searchtype=all'
browser.get(url_healthcare)
dfs = []
for i in range(1, 39):
    articles = browser.find_elements_by_tag_name('li[class="arxiv-result"]')
    for article in articles:
        title = article.find_element_by_tag_name('p[class="title is-5 mathjax"]').text
        arxiv_id = article.find_element_by_tag_name('a').text.replace('arXiv:','')
        arxiv_link = article.find_elements_by_tag_name('a')[0].get_attribute('href') 
        pdf_link = article.find_elements_by_tag_name('a')[1].get_attribute('href')
        authors = article.find_element_by_tag_name('p[class="authors"]').text.replace('Authors:','')
        try:
                link1 = browser.find_element_by_link_text('▽ More')
                link1.click()
        except:
                time.sleep(0.1)
        abstract = article.find_element_by_tag_name('p[class="abstract mathjax"]').text
        date = article.find_element_by_tag_name('p[class="is-size-7"]').text
        date = re.split(r"Submitted|;",date)[1]
        tag = article.find_element_by_tag_name('div[class="tags is-inline-block"]').text.replace('\n', ',')
        
        try:
            doi = article.find_element_by_tag_name('div[class="tags has-addons"]').text
            doi = re.split(r'\s', doi)[1] 
        except NoSuchElementException:
            doi = 'None'
        all_combined = [title, arxiv_id, arxiv_link, pdf_link, authors, abstract, date, tag, doi]
        dfs.append(all_combined)
    print('Finished Extracting Page:', i)
    try:
        link2 = browser.find_element_by_class_name('pagination-next')
        link2.click()
    except:
        browser.close
        
    time.sleep(0.1)
 
     
    