import time from bs4 
import BeautifulSoup from bs4.element 
import Tag
import pip._internal.distributions from selenium 
import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support 
import expected_conditions as EC from selenium.webdriver.support.ui import 
WebDriverWait n = ['2020','2019','2018'] 
base = 'https://www.bseindia.com' browser =
webdriver.Chrome('/home/anuj/PycharmProjects/firstfrog/chromedriver')
wait = WebDriverWait(browser, 10)
browser.get('https://www.bseindia.com/stock-share-price/financials/annualreports/500104/')
alert_name = browser.find_elements_by_xpath('//*[@class="ng-scope"]/td')
print(alert_name) 
for value in alert_name:       
       if value.text in n:             
           url_d = browser.find_elements_by_xpath('//*[@class="ng-scope"]/td/td/a')
           print(url_d)
           print(value.text)## Heading ##
            Asked
            
        
        
            Active
            
        
            Viewed 74 times
        
    1
            
            
        
        Dev
        
- 2,739
 - 2
 - 21
 - 34
 
        Anuj Dwivedi
        
- 7
 - 7
 
- 
                    Welcome to SO, Please read [mcve] and edit your post accordingly. – Dev Nov 02 '20 at 09:07
 
1 Answers
0
            
            
        Try this:
from selenium import webdriver
import time
import pandas as pd
url = 'https://www.bseindia.com/stock-share-price/financials/annualreports/500104/'
driver = webdriver.Chrome()
driver.get(url)
time.sleep(2)
url_lst = []
td_tags = driver.find_elements_by_class_name('tdcolumn')
for td in td_tags:
    try:
        url_lst.append(td.find_element_by_xpath('.//a').get_attribute('href'))
    except:
        pass
df = pd.read_html(driver.page_source)[-1]
df['Download'] = url_lst
driver.close()
print(df)
Output:
    Year                                           Download
0   2020  https://www.bseindia.com/bseplus/AnnualReport/...
1   2019  https://www.bseindia.com/bseplus/AnnualReport/...
2   2018  https://www.bseindia.com/bseplus/AnnualReport/...
3   2017  https://www.bseindia.com/bseplus/AnnualReport/...
4   2016  https://www.bseindia.com/bseplus/AnnualReport/...
5   2015  https://www.bseindia.com/bseplus/AnnualReport/...
6   2014  https://www.bseindia.com/bseplus/AnnualReport/...
7   2013  https://www.bseindia.com/bseplus/AnnualReport/...
8   2012  https://www.bseindia.com/bseplus/AnnualReport/...
9   2011  https://www.bseindia.com/bseplus/AnnualReport/...
10  2010  https://www.bseindia.com/bseplus/AnnualReport/...
        Sushil
        
- 5,440
 - 1
 - 8
 - 26
 
- 
                    
 - 
                    In a single url? What do u mean by that? Do u want it as a list or something? – Sushil Nov 02 '20 at 11:23
 - 
                    can i print year also along with URL (year URL year URL in this order) – Anuj Dwivedi Nov 11 '20 at 09:06
 - 
                    
 - 
                    1
 - 
                    from selenium import webdriver import time url = 'https://www.bseindia.com/stock-share-price/financials/annualreports/500104/' driver = webdriver.Chrome() driver.get(url) time.sleep(2) td_tags = driver.find_elements_by_class_name('tdcolumn') for td in td_tags: try: print(td.find_element_by_xpath('.//a').get_attribute('href')) year = driver.find_element_by_xpath("//tr[@class='ng-scope']") print(year.text) except: pass driver.close() – Anuj Dwivedi Nov 11 '20 at 11:11
 - 
                    Getting output like this , year is not coming in decreasing order " https://www.bseindia.com/bseplus/AnnualReport/500002/5000021219.pdf 2019 https://www.bseindia.com/bseplus/AnnualReport/500002/5000021218.pdf 2019 https://www.bseindia.com/bseplus/AnnualReport/500002/5000021217.pdf 2019 https://www.bseindia.com/bseplus/AnnualReport/500002/5000021216.pdf 2019 https://www.bseindia.com/bseplus/AnnualReport/500002/5000021215.pdf 2019 https://www.bseindia.com/bseplus/AnnualReport/500002/5000021214.pdf 2019 " – Anuj Dwivedi Nov 11 '20 at 11:15
 - 
                    
 - 
                    
 - 
                    please help me i am trying to get year also along with URL @Sushil in this format " bseindia.com/bseplus/AnnualReport/500002/5000021219.pdf 2019 bseindia.com/bseplus/AnnualReport/500002/5000021218.pdf 2018 – Anuj Dwivedi Nov 13 '20 at 16:30
 - 
                    not generating whole pdf link . when i click on it . it show error.The Page you are looking for has been moved BSEINDIA – Anuj Dwivedi Nov 16 '20 at 11:44
 - 
                    Tht is how a pandas DataFrame displays data. It does not display the full url as it is very long. U can export this dataframe to a csv file, from where u can copy paste the link – Sushil Nov 16 '20 at 12:20
 - 
                    i am trying to export data in mongo , i will take input from mongo and save it into mongo . if by any other idea its possible then let me know. – Anuj Dwivedi Nov 16 '20 at 16:42
 - 
                    This thread could help you: https://stackoverflow.com/questions/20167194/insert-a-pandas-dataframe-into-mongodb-using-pymongo – Sushil Nov 17 '20 at 02:46