def main(): extractor = EntityExtractor() fetcher = Fetcher() url = "http://www.guardian.co.uk/world/2013/jun/23/edward-snowden-gchq" text = fetcher.fetch_text_from_url(url) entities = extractor.extract_named_entities(text) print text print entities
class FetchHandler(webapp2.RequestHandler): def __init__(self): self.fetcher = Fetcher() def post(self): url = cgi.escape(self.request.get('url')) logging.info('Fetching %s' % url) text = self.fetcher.fetch_text_from_url(url) query = db.Query(DocumentModel) query.get('url =', url) doc = query.run() doc.text = text doc.put() taskqueue.add(url='/extract', params={'url': url})
def __init__(self): self.fetcher = Fetcher()