I have thousands of html files stored in a remote directory. All these files have same HTML structure. Right now I am scraping every file manually with the following script
from string import punctuation, whitespace
import urllib2
import datetime
import re
from bs4 import BeautifulSoup as Soup
import csv
today = datetime.date.today()
html = urllib2.urlopen("http://hostname/coimbatore/3BHK_flats_inCoimbatore.html_%94201308110608%94.html").read()
soup = Soup(html)
for li in soup.findAll('li', attrs={'class':'g'}):
    sLink = li.find('a')
    print sLink['href']
    sSpan = li.find('span', attrs={'class':'st'})
    print sSpan
So the above script is for one URL. Like wise I wanna scrape through all the html files which are under that directory irrespective of the file names. I do not find that this question has been asked.
Update : Code
import urllib2
import BeautifulSoup
import re
Newlines = re.compile(r'[\r\n]\s+')
def getPageText(url):
    # given a url, get page content
 data = urllib2.urlopen(url).read()
    # parse as html structured document
 bs = BeautifulSoup.BeautifulSoup(data, convertEntities=BeautifulSoup.BeautifulSoup.HTML_ENTITIES)
    # kill javascript content
 for li in bs.findAll('li', attrs={'class':'g'}):
  sLink = li.find('a')
  print sLink['href']
  sSpan = li.find('span', attrs={'class':'st'})
  print sSpan
def main():
    urls = [
        'http://192.168.1.200/coimbatore/3BHK_flats_inCoimbatore.html_%94201308110608%94.html',
        'http://192.168.1.200/coimbatore/3BHK_flats_inCoimbatore.html_%94201308110608%94.html.html'
    ]
    txt = [getPageText(url) for url in urls]
if __name__=="__main__":
    main()