def post(self): stopID = self.request.get('stopID') lat = self.request.get('lat') lon = self.request.get('lon') stop = StopLocation() stop.stopID = stopID stop.routeID = '00' stop.intersection = self.request.get('intersection').upper() stop.location = GeoPt(lat, lon) stop.update_location() stop.direction = '00' logging.debug('created new stoplocation for %s' % stopID) stop.put() routeQ = db.GqlQuery("SELECT * FROM RouteListing WHERE stopID = :1", stopID) routes = routeQ.fetch(100) if len(routes) > 0: for r in routes: logging.debug('updating route %s with new location' % r.route) r.stopLocation = stop r.put() self.redirect('http://smsmybus.com/labs/displaystops')
def post(self): stop_list = [] stopID = self.request.get('stopID') if len(stopID) == 1: stopID = "000" + stopID if len(stopID) == 2: stopID = "00" + stopID if len(stopID) == 3: stopID = "0" + stopID name = self.request.get('name') description = self.request.get('description') lat = self.request.get('lat') lon = self.request.get('lon') direction = self.request.get('direction') # if it doesn't, create a new one s = StopLocation() s.stopID = stopID s.intersection = name.split('(')[0].rstrip() s.direction = direction s.description = description s.location = GeoPt(lat,lon) s.update_location() stop_list.append(s) # put the new stop in the datastore db.put(stop_list) logging.info('done updating stop locations for stopID %s' % stopID) self.response.set_status(200)
def post(self): stop_list = [] stopID = self.request.get('stopID') if len(stopID) == 1: stopID = "000" + stopID if len(stopID) == 2: stopID = "00" + stopID if len(stopID) == 3: stopID = "0" + stopID name = self.request.get('name') description = self.request.get('description') lat = self.request.get('lat') lon = self.request.get('lon') direction = self.request.get('direction') # if it doesn't, create a new one s = StopLocation() s.stopID = stopID s.intersection = name.split('(')[0].rstrip() s.direction = direction s.description = description s.location = GeoPt(lat, lon) s.update_location() stop_list.append(s) # put the new stop in the datastore db.put(stop_list) logging.info('done updating stop locations for stopID %s' % stopID) self.response.set_status(200)
def post(self): intersection = self.request.get('intersection') latitude = self.request.get('latitude') longitude = self.request.get('longitude') direction = self.request.get('direction') routeID = self.request.get('routeID') stopID = self.request.get('stopID') logging.info("storing route %s intersection %s at lat/lon %s,%s toward %s" % (routeID,intersection,latitude,longitude,direction)) if len(intersection) > 400: intersection = intersection.ljust(400) if stopID == '00' or latitude is None or longitude is None: # create a task event to process the error task = Task(url='/crawl/errortask', params={'intersection':intersection, 'location':(latitude+","+longitude), 'direction':direction, 'metaStringOne':self.request.get('crawlLine'), 'metaStringTwo':'from geotask crawler', 'routeID':routeID, 'stopID':stopID, }) task.add('crawlerrors') else: # ignore this stop if we've already stored it # stopID + routeID stop = db.GqlQuery("SELECT * FROM StopLocation WHERE stopID = :1 and routeID = :2", stopID, routeID).get() if stop is None: stop = StopLocation() stop.stopID = stopID stop.routeID = routeID stop.intersection = intersection.upper() stop.direction = direction.upper() stop.location = GeoPt(latitude,longitude) stop.update_location() stop.put() # update the route table to include a reference to the new geo data if stopID != '00': route = db.GqlQuery("SELECT * FROM RouteListing WHERE stopID = :1 and route = :2", stopID,routeID).get() if route is None: logging.error("IMPOSSIBLE... no stop on record?!? stop %s, route %s" % (stopID,routeID)) # create a task event to process the error task = Task(url='/crawl/errortask', params={'intersection':intersection, 'location':(latitude+","+longitude), 'direction':direction, 'metaStringOne':self.request.get('crawlLine'), 'metaStringTwo':'routelisting update', 'routeID':routeID, 'stopID':stopID, }) task.add('crawlerrors') else: route.stopLocation = stop route.put() return
def post(self): stop_list = [] route_list = [] stopID = self.request.get('stopID') if len(stopID) == 1: stopID = "000" + stopID if len(stopID) == 2: stopID = "00" + stopID if len(stopID) == 3: stopID = "0" + stopID name = self.request.get('name') description = self.request.get('description') lat = self.request.get('lat') lon = self.request.get('lon') direction = self.request.get('direction') # check to see if the stop exists already stops = db.GqlQuery("select * from StopLocation where stopID = :1", stopID).fetch(50) # if it does, append the stop description if stops is not None and len(stops) > 0: for s in stops: stop_template = s s.description = description stop_list.append(s) else: # if it doesn't, create a new one s = StopLocation() stop_template = s s.stopID = stopID s.intersection = name.split('(')[0].rstrip() s.direction = direction s.description = description s.location = GeoPt(lat,lon) stop_list.append(s) # put the new stop in the datastore db.put(stop_list) logging.info('done updating stop locations for stopID %s' % stopID) # find all of the RouteListings with this stopID # loop through them and update the StopLocation references routes = db.GqlQuery("select * from RouteListing where stopID = :1", stopID).fetch(50) for r in routes: r.stopLocation = stop_template route_list.append(r) # save the route updates db.put(route_list) logging.info('done updating %s route listings for stopID %s' % (str(len(routes)),stopID) ) self.response.set_status(200)
def post(self): stopID = self.request.get('stopID') lat = self.request.get('lat') lon = self.request.get('lon') stop = StopLocation() stop.stopID = stopID stop.routeID = '00' stop.intersection = self.request.get('intersection').upper() stop.location = GeoPt(lat,lon) stop.update_location() stop.direction = '00' logging.debug('created new stoplocation for %s' % stopID) stop.put() routeQ = db.GqlQuery("SELECT * FROM RouteListing WHERE stopID = :1", stopID) routes = routeQ.fetch(100) if len(routes) > 0: for r in routes: logging.debug('updating route %s with new location' % r.route) r.stopLocation = stop r.put() self.redirect('http://smsmybus.com/labs/displaystops')
def post(self): try: scrapeURL = self.request.get('crawl') direction = self.request.get('direction') routeID = self.request.get('routeID') logging.debug("task scraping for %s, direction %s, route %s" % (scrapeURL,direction,routeID)) loop = 0 done = False result = None start = quota.get_request_cpu_usage() while not done and loop < 3: try: # fetch the page result = urlfetch.fetch(scrapeURL) done = True; except urlfetch.DownloadError: logging.info("Error loading page (%s)... sleeping" % loop) if result: logging.debug("Error status: %s" % result.status_code) logging.debug("Error header: %s" % result.headers) logging.debug("Error content: %s" % result.content) time.sleep(4) loop = loop+1 end = quota.get_request_cpu_usage() #logging.info("scraping took %s cycles" % (end-start)) # start to interrogate the results soup = BeautifulSoup(result.content) stopUpdates = [] for slot in soup.html.body.findAll("a","ada"): logging.info("pulling out data from page... %s" % slot) if slot.has_key('href'): href = slot['href'] title = slot['title'] logging.info("FOUND A TITLE ----> %s" % title) # route crawler looks for titles with an ID# string if title.find("#") > 0: # we finally got down to the page we're looking for # pull the stopID from the page content... stopID = title.split("#")[1].split("]")[0] # pull the intersection from the page content... intersection = title.split("[")[0].strip() logging.info("found stop %s, %s" % (stopID,intersection)) # check for conflicts... stop = db.GqlQuery("SELECT * FROM StopLocation WHERE stopID = :1", stopID).get() if stop is None: # add the new stop stop = StopLocation() stop.stopID = stopID stop.routeID = routeID stop.intersection = intersection.upper() stop.direction = direction.upper() stopUpdates.append(stop) # stop.put() logging.info("ADDED StopLocation (%s) - MINUS geo location" % stopID) else: logging.info("StopLoation entity already exists for %s..." % stopID) stop.routeID = routeID stopUpdates.append(stop) # pull the route and direction data from the URL routeData = scrapeURL.split('?')[1] logging.info("FOUND THE PAGE ---> arguments: %s stopID: %s" % (routeData,stopID)) routeArgs = routeData.split('&') routeID = routeArgs[0].split('=')[1] directionID = routeArgs[1].split('=')[1] timeEstimatesURL = CRAWL_URLBASE + href # check for conflicts... r = db.GqlQuery("SELECT * FROM RouteListing WHERE route = :1 AND direction = :2 AND stopID = :3", routeID, directionID, stopID).get() if r is None: # add the new route to the DB route = RouteListing() route.route = routeID route.direction = directionID route.stopID = stopID route.scheduleURL = timeEstimatesURL route.put() logging.info("added new route listing entry to the database!") else: logging.error("we found a duplicate entry!?! %s", r.scheduleURL) #else: # title.split(",")[0].isdigit(): elif href.find("?r=") > -1: # create a new task with this link crawlURL = CRAWL_URLBASE + href if routeID == '00': routeID = href.split('r=')[1] elif href.find("&") > -1: routeID = href.split('&')[0].split('r=')[1] task = Task(url='/routelist/crawlingtask', params={'crawl':crawlURL,'direction':title,'routeID':routeID}) task.add('crawler') logging.info("Added new task for %s, direction %s, route %s" % (title.split(",")[0],title,routeID)) # label crawler looks for titles with letters for extraction/persistence #elif title.replace('-','').replace(' ','').isalpha(): # routeData = href.split('?')[1] # logging.info("found the route LABEL page! href: %s" % href) # routeArgs = routeData.split('&') # directionID = routeArgs[1].split('=')[1] # # l = DestinationListing.get_or_insert(title, id=directionID, label=title) # push the vehicle updates to the datastore db.put(stopUpdates) except apiproxy_errors.DeadlineExceededError: logging.error("DeadlineExceededError exception!?") return return;
def post(self): try: scrapeURL = self.request.get('crawl') direction = self.request.get('direction') routeID = self.request.get('routeID') logging.debug("task scraping for %s, direction %s, route %s" % (scrapeURL,direction,routeID)) # fetch the URL content content = fetchURL(scrapeURL) # start to interrogate the results soup = BeautifulSoup(content) stopUpdates = [] for slot in soup.html.body.findAll("a","ada"): #logging.info("pulling out data from page... %s" % slot) if slot.has_key('href'): href = slot['href'] title = slot['title'] logging.info("FOUND A TITLE ----> %s" % title) # route crawler looks for titles with an ID# string if title.find("[ID#") > 0: # we finally got down to the page we're looking for. this is a reference # to a specific stop including a stopID and intersection. # pull the stopID from the page content... stopID = title.split("ID#")[1].split("]")[0] # pull the intersection from the page content... intersection = title.split("[")[0].strip() logging.info("found stop %s, %s" % (stopID,intersection)) # check to see if we've already found this stop... q = db.GqlQuery("SELECT * FROM StopLocation WHERE stopID = :1 and direction = :2 and routeID = :3", stopID, direction.upper(), routeID) stopQuery = q.fetch(1) if len(stopQuery) == 0: # add the new stop stop = StopLocation() stop.stopID = stopID stop.routeID = routeID stop.intersection = intersection.upper() stop.direction = direction.upper() stopUpdates.append(stop) # we'll do a batch put at the end logging.info("added new stop listing MINUS geo location") else: logging.info("already have this stop in the table...") stopQuery[0].routeID = routeID stopUpdates.append(stopQuery[0]) elif href.find("?r=") > -1: # this is step #2 and #3 from the algorithm documented above. we're going to create # a new task to go off and scrape the live route data for a specific route. crawlURL = CRAWL_URLBASE + href if routeID == '00': routeID = href.split('r=')[1] elif href.find("&") > -1: routeID = href.split('&')[0].split('r=')[1] task = Task(url='/crawl/crawlingtask', params={'crawl':crawlURL,'direction':title,'routeID':routeID}) task.add('crawler') logging.info("Added new task for %s, direction %s, route %s" % (title.split(",")[0],title,routeID)) # push the StopLocation updates to the datastore db.put(stopUpdates) except apiproxy_errors.DeadlineExceededError: logging.error("DeadlineExceededError exception!?") return return;