def generate_relation_dict(self, news_sources, news_targets): ''' generates a dictionary of string/list(int) in the format {source : target_count} ie. {s1 : [tc1, tc2, ... tcn], s2 : [tc1, tc2, ... tcn], ... sn : [tc1, tc2, ... tcn]} where sn is the source, tcn is the citation count of each target ''' # initialize the relation dictionary. relation_dict = {} for source_name, source_url in news_sources.iteritems(): # create an empty list with a specific size which describe the number # of target referenced by each source target_count = [0] * len(news_targets) # Find the articles which have a specific source website url articles = Article.objects( Q(website=Website.objects(homepage_url=source_url).only('homepage_url').first()) & Q(citations__exists=True)).only('citations') for article in articles: # Count the times that each target in the news_targets is in the # citation list for each article and put it in the target_count for citation in article.citations: if not isinstance( citation, int ): i = 0 while i < len(news_targets): if citation.target_name.lower() == news_targets.keys()[i].lower(): target_count[i] += 1 i += 1 relation_dict[source_name] = target_count return relation_dict
def get_articles(self, number=None): global username show_article_template = Template(filename='get_articles.html') sources = User.objects(name=username).first().news_sources targets = User.objects(name=username).first().news_targets articles = [] for s in sources: articles += Article.objects(website=Website.objects(name=s).first()).only('title', 'url').all() for t in targets: articles += Article.objects(website=Website.objects(name=t).first()).only('title', 'url').all() if not number: number = len(articles) return show_article_template.render(articles=articles[ :int(number)])
def generate_relation_dict_beta(self, news_sources, news_targets): relation_dict = {} for source_name in news_sources: # create an empty list with a specific size which describe the number # of target referenced by each source target_count = [0] * len(news_targets) # Find the articles which have a specific source website url articles = Article.objects( Q(website=Website.objects(name=source_name).only('name').first()) & Q(citations__exists=True)).only('citations') for article in articles: # Count the times that each target in the news_targets is in the # citation list for each article and put it in the target_count for citation in article.citations: if not isinstance( citation, int ): i = 0 while i < len(news_targets): if citation.target_name.lower() == news_targets[i].lower(): target_count[i] += 1 i += 1 relation_dict[source_name] = target_count return relation_dict
def add_article(self, article_meta, website): '''Add the article in the database''' #Create an article object to check if it exists in the database art = Article.objects( title=article_meta.get("title"), url=article_meta.get('url'), last_modified_date=article_meta.get('last_modified_date'), website=website ).first() if art: return art #This article object is used to add to the database art = Article( title=article_meta.get("title"), author=article_meta.get("author"), last_modified_date=article_meta.get("last_modified_date"), html=article_meta.get("html"), url=article_meta.get("url"), website=website ) try: status = art.save() except NotUniqueError: self.logger.warn('Article is not unique, url: {0}'.format(art.url)) return None except ValidationError: self.logger.warn('Article Save/Validation Failed, url: {0}' \ .format(article_meta.get("url"))) return None if status: return art else: return None
def show_article(self, url=None): if not url: return "" art = Article.objects(url=url).first() html = art.html return html