I'm building a web crawler. some of the the data I input into datastore get saved, others do not get saved and I have no idea what is the problem.
here is my crawler class
class Crawler(object):
    def get_page(self, url):
        try:
            req = urllib2.Request(url, headers={'User-Agent': "Magic Browser"}) #  yessss!!! with the header, I am able to download pages
            #response = urlfetch.fetch(url, method='GET')
            #return response.content
        #except urlfetch.InvalidURLError as iu:
         #   return iu.message
            response = urllib2.urlopen(req)
            return response.read()
        except urllib2.HTTPError as e:
            return e.reason
    def get_all_links(self, page):
         return re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',page)
    def union(self, lyst1, lyst2):
        try:
            for elmt in lyst2:
                if elmt not in lyst1:
                    lyst1.append(elmt)
            return lyst1
        except e:
            return e.reason
#function that  crawls the web for links starting from the seed
#returns a dictionary of index and graph
    def crawl_web(self, seed="http://tonaton.com/"):
        query = Listings.query() #create a listings object from storage
        if query.get():
            objListing = query.get()
        else:
            objListing = Listings()
            objListing.toCrawl = [seed]
            objListing.Crawled = []
        start_time = datetime.datetime.now()
        while datetime.datetime.now()-start_time < datetime.timedelta(0,5):#tocrawl (to crawl can take forever)
            try:
                #while True:
                page = objListing.toCrawl.pop()
                if page not in objListing.Crawled:
                    content = self.get_page(page)
                    add_page_to_index(page, content)
                    outlinks = self.get_all_links(content)
                    graph = Graph() #create a graph object with the url
                    graph.url = page
                    graph.links = outlinks #save all outlinks as the value part of the graph url
                    graph.put()
                    self.union(objListing.toCrawl, outlinks)
                    objListing.Crawled.append(page)
            except:
                return False
        objListing.put() #save to database
        return True #return true if it works
the classes that define the various ndb Models are in this python module:
import os
import urllib
from google.appengine.ext import ndb
import webapp2
class Listings(ndb.Model):
    toCrawl = ndb.StringProperty(repeated=True)
    Crawled = ndb.StringProperty(repeated=True)
#let's see how this works
class Index(ndb.Model):
    keyword = ndb.StringProperty() # keyword part of the index
    url = ndb.StringProperty(repeated=True) # value part of the index
#class Links(ndb.Model):
 #   links = ndb.JsonProperty(indexed=True)
class Graph(ndb.Model):
    url = ndb.StringProperty()
    links = ndb.StringProperty(repeated=True)
it used to work fine when I had JsonProperty in place of StringProperty(repeated=true). but JsonProperty is limited to 1500 bytes so I had an error once.
now, when I run the crawl_web member function, it actually crawls but when I check datastore it's only the Index entity that is created. No Graph, no Listing. please help. thanks.
 
     
     
    