def add_article(source_id, entry): if FeedArticle.query.filter_by(link=entry.link, source_id=source_id).first(): return summary = BeautifulSoup(entry.summary, 'lxml').get_text() article = FeedArticle( link=entry.link, title=entry.title, summary=summary, source_id=source_id, html=entry.summary) if 'media_thumbnail' in entry: article.thumbnail_url = entry['media_thumbnail'][0]['url'] if not article.thumbnail_url and 'links' in entry: links = entry['links'] for link in links: if 'type' in link and link['type'].startswith('image'): if 'href' in link: article.thumbnail_url = link['href'] break if article.summary and not article.thumbnail_url: article.thumbnail_url = get_thumbnail_url_from_summary(article.html) cdb.session.add(article) cdb.session.commit()
def add_article(article): if article.summary and not article.thumbnail_url: article.thumbnail_url = get_thumbnail_url_from_summary(article.html) # if not article.thumbnail_url: # article.thumbnail_url = get_thumbnail_url_from_html(html_readable) cdb.session.add(article) cdb.session.commit()