示例#1
0
def main():
    extractor = EntityExtractor()
    fetcher = Fetcher()
    url = "http://www.guardian.co.uk/world/2013/jun/23/edward-snowden-gchq"
    text = fetcher.fetch_text_from_url(url)
    entities = extractor.extract_named_entities(text)
    print text
    print entities
示例#2
0
class FetchHandler(webapp2.RequestHandler):
    def __init__(self):
        self.fetcher = Fetcher()
        
    def post(self):
        url = cgi.escape(self.request.get('url'))
        logging.info('Fetching %s' % url)
        text = self.fetcher.fetch_text_from_url(url)
        query = db.Query(DocumentModel)
        query.get('url =', url)
        doc = query.run()
        doc.text = text
        doc.put()
        taskqueue.add(url='/extract', params={'url': url})
示例#3
0
 def __init__(self):
     self.fetcher = Fetcher()