Exemplo n.º 1
0
    def generate_relation_dict(self, news_sources, news_targets):
        '''
        generates a dictionary of string/list(int) in the format
        {source : target_count}
        ie. {s1 : [tc1, tc2, ... tcn],
        s2 : [tc1, tc2, ... tcn], ...
        sn : [tc1, tc2, ... tcn]}
        where sn is the source, tcn is the citation count of each target
        '''
        # initialize the relation dictionary.
        relation_dict = {}

        for source_name, source_url in news_sources.iteritems():
            # create an empty list with a specific size which describe the number
            # of target referenced by each source
            target_count = [0] * len(news_targets)
            # Find the articles which have a specific source website url
            articles = Article.objects(
                Q(website=Website.objects(homepage_url=source_url).only('homepage_url').first()) &
                Q(citations__exists=True)).only('citations')
            for article in articles:
                # Count the times that each target in the news_targets is in the
                # citation list for each article and put it in the target_count
                for citation in article.citations:
                    if not isinstance( citation, int ):
                        i = 0
                        while i < len(news_targets):
                            if citation.target_name.lower() == news_targets.keys()[i].lower():
                                target_count[i] += 1
                            i += 1
            relation_dict[source_name] = target_count
        return relation_dict
Exemplo n.º 2
0
    def get_articles(self, number=None):
        global username

        show_article_template = Template(filename='get_articles.html')
        sources = User.objects(name=username).first().news_sources
        targets = User.objects(name=username).first().news_targets
        articles = []

        for s in sources:
            articles += Article.objects(website=Website.objects(name=s).first()).only('title', 'url').all()
        for t in targets:
            articles += Article.objects(website=Website.objects(name=t).first()).only('title', 'url').all()

        if not number:
            number = len(articles)

        return show_article_template.render(articles=articles[ :int(number)])
Exemplo n.º 3
0
 def generate_relation_dict_beta(self, news_sources, news_targets):
     relation_dict = {}
     for source_name in news_sources:
         # create an empty list with a specific size which describe the number
         # of target referenced by each source
         target_count = [0] * len(news_targets)
         # Find the articles which have a specific source website url
         articles = Article.objects(
             Q(website=Website.objects(name=source_name).only('name').first()) &
             Q(citations__exists=True)).only('citations')
         for article in articles:
             # Count the times that each target in the news_targets is in the
             # citation list for each article and put it in the target_count
             for citation in article.citations:
                 if not isinstance( citation, int ):
                     i = 0
                     while i < len(news_targets):
                         if citation.target_name.lower() == news_targets[i].lower():
                             target_count[i] += 1
                         i += 1
         relation_dict[source_name] = target_count
     return relation_dict
Exemplo n.º 4
0
    def add_article(self, article_meta, website):
        '''Add the article in the database'''

        #Create an article object to check if it exists in the database
        art = Article.objects(
                    title=article_meta.get("title"),
                    url=article_meta.get('url'),
                    last_modified_date=article_meta.get('last_modified_date'),
                    website=website
                    ).first()

        if art:
            return art

        #This article object is used to add to the database
        art = Article(
                title=article_meta.get("title"),
                author=article_meta.get("author"),
                last_modified_date=article_meta.get("last_modified_date"),
                html=article_meta.get("html"),
                url=article_meta.get("url"),
                website=website
                    )
        try:
            status = art.save()
        except NotUniqueError:
            self.logger.warn('Article is not unique, url: {0}'.format(art.url))
            return None
        except ValidationError:
            self.logger.warn('Article Save/Validation Failed, url: {0}' \
                                               .format(article_meta.get("url")))
            return None

        if status:
            return art
        else:
            return None
Exemplo n.º 5
0
 def show_article(self, url=None):
     if not url:
         return ""
     art = Article.objects(url=url).first()
     html = art.html
     return html