def __scrape_rss(self, source, full=False): """ scape a rss source, return a list of article """ result = [] document = feedparser.parse(source['target']) for article in document['entries']: try: if full: entry = Article(title=article['title'], date=datetime.fromtimestamp( mktime(article['published_parsed'])), content=article['content'][0]['value'], summary=article['summary'], link=article['link'], thematic=source['thematic'], type='common_rss') entry.save() else: worker.run('retrieve_page', article['link'], thematic=source['thematic'], source='common_rss') except KeyError as e: # TODO: Logs # print e, article pass return result
def get_content_from_duration(duration, thematics=None, user_id=None): words = utils.how_many_words(int(duration)) thematics = thematics.split(',') if thematics else None articles = { "one" : Article.get_closest(count_words=words, limit=5, thematics=thematics, user=user_id), # FIXME "two" : Article.get_closest(count_words=words/2, limit=2, thematics=thematics, user=user_id), "three" : Article.get_closest(count_words=words/3, limit=3, thematics=thematics, user=user_id), } return dumps({'articles': articles, 'delta': duration})
def get_content_from_itineraire(src, tgt, thematics=None, user_id=None): itineraire = utils.get_itineraire(src, tgt) duration = itineraire['delta'] words = utils.how_many_words(duration) thematics = thematics.split(',') if thematics else None articles = { "one" : Article.get_closest(count_words=words, limit=5, thematics=thematics, user=user_id), # FIXME "two" : Article.get_closest(count_words=words/2, limit=2, thematics=thematics, user=user_id), "three" : Article.get_closest(count_words=words/3, limit=3, thematics=thematics, user=user_id), } itineraire["articles"] = articles return dumps(itineraire)
def __scrape_rss(self, source, full=False): """ scape a rss source, return a list of article """ result = [] document = feedparser.parse(source['target']) for article in document['entries']: try: if full: entry = Article( title = article['title'], date = datetime.fromtimestamp(mktime(article['published_parsed'])), content = article['content'][0]['value'], summary = article['summary'], link = article['link'], thematic = source['thematic'], type = 'common_rss') entry.save() else: worker.run('retrieve_page', article['link'], thematic=source['thematic'], source='common_rss') except KeyError as e: # TODO: Logs # print e, article pass return result
def run(self, url, thematic=None, user_id=None, source=None): if not (url.startswith("http://") or url.startswith("https://")): url = "http://%s" % url # parse the web page res = requests.get( "http://www.readability.com/api/content/v1/parser?url=%s&token=%s" % (url, app.config['READABILITY_PARSER_TOKEN'])) parsed = res.json() # save the article article = Article() article.title = parsed['title'] article.date = parsed['date_published'] article.content = parsed['content'] article.summary = parsed['excerpt'] article.link = parsed['url'] article.domain = parsed['domain'] article.count_words = parsed['word_count'] article.user = user_id article.thematic = thematic article.type = source article.save() # EOF
def run(self): response = requests.get("https://api.paris.fr:3000/data/1.1/QueFaire/get_activities/?token={token}&created={created}&offset={offset}&limit={limit}" .format( token = app.config['API_QUEFAIREAPARIS_TOKEN'], created = "0", offset = "0", limit = "100"), verify=False ) results = response.json() for result in results['data']: article = Article() article.title = result['nom'] article.date = datetime.datetime.strptime(result['created'], '%Y-%m-%dT%H:%M:%S.%fZ') article.content = result['description'] article.summary = result['small_description'] article.thematic = "quefaireaparis" # FIXME article.type = "quefaireaparis" # special fields article.occurences = result['occurences'] article.thematics = [_['rubrique'] for _ in result['rubriques']] article.location = dict(lat=result['lat'], lon=result['lon']) article.save() # EOF
def run(self, url, thematic=None, user_id=None, source=None): if not (url.startswith("http://") or url.startswith("https://")): url = "http://%s" % url # parse the web page res = requests.get("http://www.readability.com/api/content/v1/parser?url=%s&token=%s" % (url, app.config['READABILITY_PARSER_TOKEN'])) parsed = res.json() # save the article article = Article() article.title = parsed['title'] article.date = parsed['date_published'] article.content = parsed['content'] article.summary = parsed['excerpt'] article.link = parsed['url'] article.domain = parsed['domain'] article.count_words = parsed['word_count'] article.user = user_id article.thematic = thematic article.type = source article.save() # EOF
def run(self): response = requests.get( "https://api.paris.fr:3000/data/1.1/QueFaire/get_activities/?token={token}&created={created}&offset={offset}&limit={limit}" .format(token=app.config['API_QUEFAIREAPARIS_TOKEN'], created="0", offset="0", limit="100"), verify=False) results = response.json() for result in results['data']: article = Article() article.title = result['nom'] article.date = datetime.datetime.strptime(result['created'], '%Y-%m-%dT%H:%M:%S.%fZ') article.content = result['description'] article.summary = result['small_description'] article.thematic = "quefaireaparis" # FIXME article.type = "quefaireaparis" # special fields article.occurences = result['occurences'] article.thematics = [_['rubrique'] for _ in result['rubriques']] article.location = dict(lat=result['lat'], lon=result['lon']) article.save() # EOF
def api_content(id): article = Article.get(id=id) if article: return article['content'] return "false"
def reset_content(): articles_collection = Article.get_collection() articles_collection.remove() worker.run('retrieve_common_articles', app.config['SOURCE_CONTENT']) return "ok"