def create_article(art_dict, max_comments):
    """
    Create article and comments from article json
    """

    args = {
        k: v
        for k, v in art_dict.items()
        if k in ["title", "body", "text", "slug", "user", "tweet_id", "url"]
    }

    args["created_at"] = parse_date(art_dict)
    args["metadata"] = art_dict["description"]

    art = Article(**args)
    art.save()

    new_comments = []

    elligible_comments = get_elligible_comments(art_dict["comments"])

    sampled_comments = random.sample(
        elligible_comments, min(max_comments, len(elligible_comments)))
    for comm in sampled_comments:
        args = {
            k: v
            for k, v in comm.items() if k in ["text", "user_id", "tweet_id"]
        }
        args["created_at"] = parse_date(comm)
        args["text"] = preprocess_tweet(comm["text"])
        comm = Comment(**args)
        comm.article = art
        new_comments.append(comm)

    Comment.objects.bulk_create(new_comments)
Пример #2
0
 def post(self, request, *args, **kwargs):
     aid = request.POST.get('id')
     content = request.POST.get('content')
     tags = request.POST.get('tags')
     slug = request.POST.get('slug', '')
     cid = request.POST.get('classification')
     publish = request.POST.get('publish', False)
     title = request.POST.get('title')
     slug = unicode(slug).replace(' ', '-').lower()
     if not unicode(aid).isdigit():
         aid = 0
     article = Article.objects.filter(id=aid)
     if article.exists():
         article = article[0]
         article.tags.clear()
     else:
         article = Article()
         article.publish = publish
     article.content = content
     classification = Classification.objects.get(id=cid)
     article.classification = classification
     article.title = title
     article.slug = slug
     article.save()
     for itm in tags:
         if itm and itm != '':
             tag = Tag.objects.filter(id=itm)
             if tag.exists():
                 article.tags.add(tag[0])
     article.save()
     return self.render_to_response(dict())
Пример #3
0
 def post(self, request, *args, **kwargs):
     aid = request.POST.get('id')
     content = request.POST.get('content')
     tags = request.POST.get('tags')
     cid = request.POST.get('classification')
     publish = request.POST.get('publish', False)
     title = request.POST.get('title')
     if not unicode(aid).isdigit():
         aid = 0
     article = Article.objects.filter(id=aid)
     if article.exists():
         article = article[0]
         article.tags.clear()
     else:
         article = Article()
         article.publish = publish
     article.content = content
     classification = Classification.objects.get(id=cid)
     article.classification = classification
     article.title = title
     article.save()
     for itm in tags:
         if itm and itm != '':
             tag = Tag.objects.filter(id=itm)
             if tag.exists():
                 article.tags.add(tag[0])
     article.save()
     return self.render_to_response(dict())
Пример #4
0
 def test_article_creation(self):
     """
     Test creation  of an article in a category
     """
     old_count = Article.objects.count()
     new_article = Article(url="http://www.dummy.com",
                           title="Dummy title",
                           category=self.category,
                           owner=self.user)
     new_article.save()
     new_count = Article.objects.count()
     self.assertEqual(new_count, old_count + 1)
     self.assertTrue(isinstance(new_article, Article))
Пример #5
0
    def setUp(self):
        """
        Define the test client and initial variables for the test suite
        """
        self.user = User.objects.create(username="******")

        self.client = APIClient()
        self.client.force_authenticate(user=self.user)
        self.category = Category(name="Test",
                                 description="This is a test category",
                                 owner=self.user)
        self.category.save()
        self.article = Article(url="http://www.dummy.com",
                               title="Dummy title",
                               category=self.category,
                               owner=self.user)
        self.article_data = {
            "url": "http://www.dummy.com",
            "title": "Dummy Title",
            "description": "A sample description",
            "read_status": False,
            "owner": self.user.id
        }
        article_url = '/api/v2/categories/{}/articles/'.format(
            self.category.id)
        self.response = self.client.post(article_url,
                                         self.article_data,
                                         format="json")
Пример #6
0
def save_articles(article_list, source):
    for article in article_list[u'articles']:
        data = {
            'author': article[r'author'],
            'title': article[r'title'],
            'content': article[u'description'],
            'url': article[u'url'],
            'image_url': article[u'urlToImage'],
            'published_at': article[u'publishedAt'],
            'source': source
        }

        if all([
                data['author'] is not None, data['content'] is not None,
                data['image_url'] is not None
        ]):
            a = Article(**data)
            a.save()
Пример #7
0
def test_deserialize_article_to_article_instance(client, database):
    time: datetime = datetime.now()
    input_time: str = time.isoformat()
    article_data: dict = dict(title="Test Title",
                              content="Post body",
                              date_created=input_time)

    actual: Article = article_schema.load(article_data).data
    expected: Article = Article(title="Test Title",
                                content="Post body",
                                date_created=time)

    assert actual.title == expected.title
    assert actual.slug == expected.slug
    assert actual.content == expected.content
    assert actual.date_created == expected.date_created
Пример #8
0
def test_serialize_article_to_dict():
    """
    GIVEN an Article object created from a dictionary
    WHEN that object is serialized
    THEN return a dictionary containing the data from that object, including data that is initialized in the Article constructor
    """
    time: datetime = datetime.now(timezone.utc)
    article: Article = Article(title="Test Title",
                               content="Post body",
                               date_created=time)

    actual: dict = article_schema.dump(article).data
    expected: dict = dict(
        title="Test Title",
        slug="test-title",
        content="Post body",
        date_created=time.isoformat(),
    )
    for key in expected.keys():
        assert actual[key] == expected[key]
Пример #9
0
    def create(self, validated_data):
        article_tags = validated_data.pop('tags', None)
        article = Article(**validated_data)
        article.author = self.context['request'].user
        article.save()

        for request_tag in article_tags:
            tag_name = request_tag.get('name')
            if not Tag.objects.filter(name=tag_name).exists():
                tag = create_tag(request_tag)
            else:
                tag = Tag.objects.filter(name=tag_name)[0]
            at = ArticleTag.objects.create(
                article=article,
                tag=tag,
            )
            at.save()

        article.save()
        return article
Пример #10
0
def test_create_article():
    """
    GIVEN the model for an article
    WHEN article data is passed in
    THEN return a database model for that article.
    """

    # create time here so that actual and expected have same time; if created in model, time will differ.
    time: datetime = datetime.now(timezone.utc)

    actual: Article = Article(title="Test Title",
                              content="Post body",
                              date_created=time)
    expected: dict = {
        "title": "Test Title",
        "slug": "test-title",
        "content": "Post body",
        "date_created": time,
    }

    assert actual.title == expected["title"]
    assert actual.slug == expected["slug"]
    assert actual.content == expected["content"]
Пример #11
0
 def handle(self, *args, **options):
     res = requests.get('http://www.rapospectre.com/migrate')
     json_data = json.loads(res.content)
     blog_list = json_data.get('body').get('blog')
     for itm in blog_list:
         if Article.objects.filter(title=itm.get('caption')).exists():
             continue
         c_name = itm.get('classification').get('c_name')
         classifi = Classification.objects.filter(title=c_name)
         if not classifi.exists():
             classifi = Classification(title=c_name).save()
         else:
             classifi = classifi[0]
         if not classifi:
             classifi = Classification.objects.all()[0]
         create_time = datetime.datetime.strptime(itm.get('create_time'),
                                                  '%Y-%m-%d %H:%M:%S')
         create_time = create_time.replace(tzinfo=get_current_timezone())
         Article(title=itm.get('caption'),
                 content=itm.get('content'),
                 publish=True,
                 create_time=create_time,
                 classification=classifi,
                 views=int(itm.get('read_count'))).save()
Пример #12
0
def get_aiticle(request):
    article = Article()
    return HttpResponse("success")
Пример #13
0
 def create(self, validated_data):
     article = Article(**validated_data)
     article.save()
     return article
Пример #14
0
def get_article_from_authors(author_name, token):

    if author_name == "Anonymous":
        return

    from django.conf import settings
    import django

    import requests
    from django.core.exceptions import ObjectDoesNotExist
    from api.models import Article, Authors
    from django.db.models import ObjectDoesNotExist
    import json
    import time
    if Authors.objects.get(name=author_name).done:
        return

    headers = {"Authorization": 'Bearer ' + token}
    ids = []
    try:
        response = requests.get(
            f'https://api.mendeley.com/search/catalog?author={author_name}&view=all&open_access=True&limit=100',
            headers=headers).json()

    except requests.exceptions.ConnectionError:
        print("okay_bitxh")
        time.sleep(5)
        response = requests.get(
            f'https://api.mendeley.com/search/catalog?author={author_name}&view=all&open_access=True&limit=100',
            headers=headers).json()

    for i, res in enumerate(response):
        print(i)
        if 'identifiers' not in res:
            continue

        if 'authors' in res:
            authors = []
            for i in res['authors']:
                f_name = None if 'first_name' not in i or i[
                    'first_name'] == '' else i['first_name']
                l_name = None if 'last_name' not in i or i[
                    'last_name'] == '' else i['last_name']
                if f_name or l_name:
                    first = f_name if f_name else ''
                    last = l_name if l_name else ''

                    try:
                        author = Authors.objects.get(name=first + " " + last)
                    except ObjectDoesNotExist:
                        author = Authors(name=first + " " + last, done=False)

                else:
                    try:
                        author = Authors.objects.get(name='Anonymous')
                    except ObjectDoesNotExist:
                        author = Authors(name='Anonymous', done=False)

                author.save()
                authors.append(author)

        else:
            authors = []
            author, cr = Authors.objects.get_or_create(name='Anonymous')

            author.save()
            authors.append(author)

        try:
            article = Article.objects.get(id=res['id'])

        except ObjectDoesNotExist:

            article = Article(
                title=res['title'],
                type=res['type'],
                id=res['id'],
                year=res['year'] if 'year' in res else 2000,
                source=res['source'] if 'source' in res else "Not specified",
                publisher=res['publisher']
                if 'publisher' in res else 'Anonymous',
                identifiers=res['identifiers'],
                link=res['link'],
                pdf=res['pdf'] if 'pdf' in res else None,
                abstract=get_abstract(res['identifiers']['doi'])
                if 'doi' in res['identifiers'] else ""
                # keywords = res["keywords"][:6] if "keywords" in res else None,
                # reader_count = res['reader_count'] if 'reader_count' in res else 0,
                # reader_count_by_academic_status = res['reader_count_by_academic_status'] if 'reader_count_by_academic_status' in res else '{}',
                # reader_count_by_subject_area = res['reader_count_by_subject_area'] if 'reader_count_by_subject_area' in res else '{}',
                # reader_count_by_country = res['reader_count_by_country'] if 'reader_count_by_country' in res else '{}',
            )
            try:
                article.save()
                for i in authors:
                    article.authors.add(i)
            except django.db.utils.DataError:
                pass
        ids.append(article.pk)
    return ids
Пример #15
0
def get_data_by_query(token, query, limit=100):
    from django.conf import settings
    import requests
    from api.models import Article, Authors
    from django.db.models import ObjectDoesNotExist
    import json

    import time
    time.sleep(0.1)

    ids = []
    headers = {"Authorization": 'Bearer ' + token}
    try:
        response = requests.get(
            f'https://api.mendeley.com/search/catalog?query={query}&open_access=True&limit={limit}',
            headers=headers).json()
    except requests.exceptions.ConnectionError:
        time.sleep(5)
        response = requests.get(
            f'https://api.mendeley.com/search/catalog?query={query}&open_access=True&limit={limit}',
            headers=headers).json()

    for i, res in enumerate(response):
        print(i, "ok")
        if 'identifiers' not in res:
            continue
        if 'authors' in res:
            authors = []
            for i in res['authors']:
                f_name = None if 'first_name' not in i or i[
                    'first_name'] == '' else i['first_name']
                l_name = None if 'last_name' not in i or i[
                    'last_name'] == '' else i['last_name']
                if f_name or l_name:
                    first = f_name if f_name else ''
                    last = l_name if l_name else ''

                    author, cr = Authors.objects.get_or_create(name=first +
                                                               " " + last,
                                                               done=False)
                else:
                    author, cr = Authors.objects.get_or_create(
                        name='Anonymous', done=False)

                author.save()
                authors.append(author)

        else:
            authors = []
            author, cr = Authors.objects.get_or_create(name='Anonymous',
                                                       done=False)

            author.save()
            authors.append(author)

        try:
            article = Article.objects.get(id=res['id'])

        except ObjectDoesNotExist:
            abstract = get_abstract(res['identifiers']['doi']
                                    ) if 'doi' in res['identifiers'] else ""

            article = Article(
                title=res['title'],
                type=res['type'],
                id=res['id'],
                year=res['year'] if 'year' in res else 2000,
                source=res['source'] if 'source' in res else "Not specified",
                publisher=res['publisher']
                if 'publisher' in res else 'Anonymous',
                identifiers=res['identifiers'],
                link=res['link'],
                pdf=res['pdf'] if 'pdf' in res else None,
                keywords=res["keywords"][:6] if "keywords" in res else None,
                abstract=abstract

                # reader_count = res['reader_count'] if 'reader_count' in res else 0,
                # reader_count_by_academic_status = res['reader_count_by_academic_status'] if 'reader_count_by_academic_status' in res else '{}',
                # reader_count_by_subject_area = res['reader_count_by_subject_area'] if 'reader_count_by_subject_area' in res else '{}',
                # reader_count_by_country = res['reader_count_by_country'] if 'reader_count_by_country' in res else '{}',
            )
            article.save()

            for i in authors:
                article.authors.add(i)

        ids.append(article.pk)
    return ids
Пример #16
0
def get_data(token):
    from django.conf import settings
    import requests
    from api.models import Article, Authors
    from django.db.models import ObjectDoesNotExist
    import json

    import time
    time.sleep(0.1)

    headers = {"Authorization": 'Bearer ' + token}
    for key, val in settings.SUBDISCIPLINES.items():
        for sub in val:
            limit = 100
            query = sub.replace(" ", "+").lower()
            print(query)
            try:
                response = requests.get(
                    f'https://api.mendeley.com/search/catalog?query={query}&view=all&open_access=True&limit={limit}',
                    headers=headers).json()
            except requests.exceptions.ConnectionError:
                print("okay_bitxh")
                time.sleep(5)
                response = requests.get(
                    f'https://api.mendeley.com/search/catalog?query={query}&view=all&open_access=True&limit={limit}',
                    headers=headers).json()

            for i, res in enumerate(response):
                print(i)

                if 'identifiers' not in res:
                    continue
                if 'authors' in res:
                    authors = []
                    for i in res['authors']:
                        f_name = None if 'first_name' not in i or i[
                            'first_name'] == '' else i['first_name']
                        l_name = None if 'last_name' not in i or i[
                            'last_name'] == '' else i['last_name']
                        if f_name or l_name:
                            first = f_name if f_name else ''
                            last = l_name if l_name else ''

                            author, cr = Authors.objects.get_or_create(
                                name=first + " " + last, done=False)
                        else:
                            author, cr = Authors.objects.get_or_create(
                                name='Anonymous', done=False)

                        author.save()
                        authors.append(author)

                else:
                    authors = []
                    author, cr = Authors.objects.get_or_create(
                        name='Anonymous', done=False)

                    author.save()
                    authors.append(author)

                try:
                    article = Article.objects.get(id=res['id'])

                except ObjectDoesNotExist:
                    abstract = get_abstract(
                        res['identifiers']
                        ['doi']) if 'doi' in res['identifiers'] else ""

                    article = Article(
                        title=res['title'],
                        type=res['type'],
                        id=res['id'],
                        year=res['year'] if 'year' in res else 2000,
                        source=res['source']
                        if 'source' in res else "Not specified",
                        publisher=res['publisher']
                        if 'publisher' in res else 'Anonymous',
                        identifiers=res['identifiers'],
                        link=res['link'],
                        pdf=res['pdf'] if 'pdf' in res else None,
                        keywords=res["keywords"][:6]
                        if "keywords" in res else None,
                        abstract=abstract)
                    article.save()

                    for i in authors:
                        article.authors.add(i)
Пример #17
0
 def create(self, validated_data, author_id):
     article = Article(**validated_data)
     article.author_id = author_id
     article.save()
     return article
Пример #18
0
                'url':
                url,
                'body_text':
                str(body_text.text).strip().replace('\n', '')
            })
    else:
        errors.append({
            'error':
            f'Не верный статускод {response.status_code} возможно изменился url'
        })


if __name__ == '__main__':
    """
    Получаем все статьи, если статьи получены, то удаляем старые и сохраняем новые
    Поскольку время на реализацию ограничено, то сделано именно так.
    Если есть ошибки, то сохраняем их в базу.
    """
    parser()
    if articles:
        all_article = Article.objects.all()
        all_article.delete()
        for article_item in articles:
            article = Article(**article_item)
            article.save()

    if errors:
        for error_item in errors:
            error = ParserError(**error_item)
            error.save()