Exemplo n.º 1
0
def home(request):
    if request.method == 'POST':
        form = ArticleForm(request.POST)

        if 'pubmed_id' in request.POST:
            pubmed_id = request.POST['pubmed_id']
            article = None
            try:
                article = Article.objects.get(pubmed_id=pubmed_id)
            except ObjectDoesNotExist:
                article_list = find_articles([pubmed_id])
                if len(article_list) == 1:
                    article_dict = article_list[0]
                    article = Article(
                        pubmed_id=article_dict['pubmed_id'],
                        title=article_dict['title'])
                    article.save()

                    # now add the author relationships, adding the author if necessary
                    for name in article_dict['authors']:
                        author, created = Author.objects.get_or_create(name=name)
                        article.authors.add(author)

                    # now add the references at some depth
                    article.add_references(depth=2)

                    return HttpResponseRedirect(reverse('view_article', args=(article.pubmed_id,)))

            if article is not None:
                return HttpResponseRedirect(reverse('view_article', args=(article.pubmed_id,)))
            else:
                return HttpResponseRedirect(reverse('home',))

        else:
            return HttpResponseRedirect(reverse('home',))
    else:
        form = ArticleForm()

    most_referenced_articles = Article.objects.annotate(Count('article')).order_by('-article__count')[0:5]

    return render_to_response(
        'home.html',
        {
            'form': form,
            'most_referenced_articles': most_referenced_articles,
        },
        context_instance=RequestContext(request)
    )
Exemplo n.º 2
0
    def add_references(self, depth):
        # TODO: some articles genuinely have no references in PubMed, maybe flag whether articles have been checked???
        print "%s - %s" % (datetime.now().ctime(), "START")

        temp_list = [self.pubmed_id]
        total_list = []
        total_reference_dict = {}

        for i in range(depth):
            referenced_dict = find_article_references(temp_list)

            temp_list = []

            for key in referenced_dict.keys():
                temp_list.extend(referenced_dict[key])
                total_list.extend(referenced_dict[key])
                total_reference_dict[key] = referenced_dict[key]

            print "%s - Depth: %d" % (datetime.now().ctime(), i)

        del(temp_list)

        print "%s - %s" % (datetime.now().ctime(), "FIND ARTICLES")

        if len(total_list) > 0:

            # get a list of pubmed IDs already in the Article model
            existing_pubmed_id_list = Article.objects.filter(pubmed_id__in=total_list).values_list('pubmed_id', flat=True)

            # only find articles we haven't already got
            # this avoids using get_or_create, which takes too long in bulk operations
            # IMPORTANT: the existing_pubmed_id_list contains integers, not strings,
            # so we map to str here
            new_pubmed_id_list = list(set(total_list) - set(map(str, existing_pubmed_id_list)))

            # If there are no new PubMed IDs, then stop
            if len(new_pubmed_id_list) == 0:
                return

            # find all the new articles' summaries (and authors)
            # this could take a while
            articles_to_save = find_articles(new_pubmed_id_list)

            print "%s - %s" % (datetime.now().ctime(), "ARTICLES FOUND, ARTICLES TO DB")

            # for storing our new Article model instances
            new_art_instance_list = set()

            # And a set to catch all the authors we'll collect in the article for loop
            # We'll do a similar bulk_create for the new authors
            author_set = set()

            # find_articles returned a list of dictionaries, each dict is an article to save
            for new_art in articles_to_save:

                new_art_instance_list.add(Article(pubmed_id=new_art['pubmed_id'], title=new_art['title']))

                # Catch the authors here
                author_set.update(new_art['authors'])

            # save the new articles in bulk
            # TODO: maybe catch any exceptions here...look into Exceptions thrown by bulk_create
            Article.objects.bulk_create(list(new_art_instance_list))

            print "%s - %s" % (datetime.now().ctime(), "ARTICLES SAVED")

            # for all the new articles created, we need to add the authors and article relationships
            print "%s - %s" % (datetime.now().ctime(), "ARTICLE RELATIONSHIPS TO DB")

            # get all the Articles corresponding to the reference dict keys which have non-empty values
            referenced_article_list = []
            for k in total_reference_dict.keys():
                if len(total_reference_dict[k]) > 0:
                    referenced_article_list.append(k)

            # now get the Article model instances just for the ones with references to add
            articles_with_refs = Article.objects.filter(pubmed_id__in=referenced_article_list)

            for article in articles_with_refs:
                references = Article.objects.filter(pubmed_id__in=total_reference_dict[str(article.pubmed_id)])

                article.referenced_articles.add(*references)

#                for name in new_art['authors']:
#                    author, created = Author.objects.get_or_create(name=name)
#                    new_art_instance.authors.add(author)

        print "%s - %s" % (datetime.now().ctime(), "END")