示例#1
0
 def test_delete_fragment(self):
     self.assertEqual(models.ArticleFragment.objects.count(), 1)
     fragment = {'title': 'pants. party'}
     logic.add(self.msid, 'foo', fragment)
     self.assertEqual(models.ArticleFragment.objects.count(), 2)
     logic.rm(self.msid, 'foo')
     self.assertEqual(models.ArticleFragment.objects.count(), 1)
示例#2
0
    def test_update_fragment(self):
        "a fragment of article data can be updated by adding it again with different content"

        # ensure we have something that resembles the ingest data
        self.assertEqual(models.ArticleFragment.objects.count(), 1)
        frag = logic.get(self.av, 'xml->json').fragment
        self.assertTrue('title' in frag)

        # now update it with some garbage
        data = {'title': 'pants-party'}
        logic.add(self.av, 'xml->json', data, pos=0, update=True)

        # ensure we've just destroyed our very important data
        frag = logic.get(self.av, 'xml->json').fragment
        self.assertEqual(frag, data)
示例#3
0
    def test_valid_merge_updates_article_version_fields(self):
        "when a fragment is added, if the merge results in valid article-json, the results of the merge are stored"
        # setUp inserts article snippet that should be valid
        av = self.freshen(self.av)

        # TODO: remove this once xml validates
        # layer in enough to make it validate
        placeholders = {
            'statusDate': '2001-01-01T00:00:00Z',
        }
        logic.add(self.msid, 'foo', placeholders)

        self.assertTrue(logic.set_article_json(self.av, quiet=True))
        av = self.freshen(self.av)
        self.assertTrue(av.article_json_v1)
        self.assertTrue(av.article_json_v1_snippet)
示例#4
0
    def test_add_fragment(self):
        "a fragment of article data can be recorded against an Article"
        # `setUp` creates a fragment by ingesting article
        self.assertEqual(models.ArticleFragment.objects.count(), 1)

        fragment = {'title': 'pants. party'}
        fragobj, created, updated = logic.add(self.msid, 'foo', fragment)
        self.assertEqual(models.ArticleFragment.objects.count(), 2)
        self.assertEqual(fragment, fragobj.fragment)
示例#5
0
    def setUp(self):
        # unauthenticated
        self.c = Client()
        # authenticated
        self.ac = Client(**{
            mware.CGROUPS: 'admin',
        })

        self.msid = 16695
        self.ajson_fixture_v1 = join(self.fixture_dir, 'ajson',
                                     'elife-16695-v1.xml.json')  # poa
        self.av = ajson_ingestor.ingest_publish(
            json.load(open(self.ajson_fixture_v1, 'r')))

        self.key = 'test-frag'
        fragment = {'title': 'Electrostatic selection'}
        fragments.add(
            self.av.article, self.key,
            fragment)  # add it to the *article* not the article *version*
示例#6
0
    def test_merge_fragments(self):
        logic.add(self.av, 'xml->json', {'title': 'foo'}, update=True)
        logic.add(self.msid, 'frag1', {'body': 'bar'})
        logic.add(self.msid, 'frag2', {'foot': 'baz'})

        expected = {'title': 'foo', 'body': 'bar', 'foot': 'baz'}
        self.assertEqual(expected, logic.merge(self.av))
示例#7
0
    def test_delete_fragment_sends_aws_event(self):
        "sucessfully deleting a fragment sends an aws event"
        self.key = 'test-frag'
        fragment = {'title': 'Electrostatic selection'}
        fragments.add(
            self.av.article, self.key,
            fragment)  # add it to the *article* not the article *version*

        mock = Mock()
        with patch('publisher.aws_events.event_bus_conn', return_value=mock):
            url = reverse('v2:article-fragment',
                          kwargs={
                              'art_id': self.msid,
                              'fragment_id': self.key
                          })

            resp = self.ac.delete(url, json.dumps(fragment))
            self.assertEqual(resp.status_code, 200)  # successfully deleted

            # https://docs.djangoproject.com/en/1.10/topics/db/transactions/#use-in-tests
            expected_event = json.dumps({"type": "article", "id": self.msid})
            mock.publish.assert_called_once_with(Message=expected_event)
示例#8
0
    def test_merge_overwrite_fragments(self):
        logic.add(self.av, 'xml->json', {'title': 'foo'},
                  update=True)  # destroys original article json
        logic.add(self.msid, 'frag1', {'title': 'bar'})
        logic.add(self.msid, 'frag2', {'title': 'baz'})

        expected = {'title': 'baz'}
        self.assertEqual(expected, logic.merge(self.av))
示例#9
0
    def test_fragment_ordering(self):
        logic.add(self.av, 'xml->json', {'title': 'foo'}, update=True)
        logic.add(self.msid, 'frag1', {'body': 'bar'})
        logic.add(self.msid, 'frag2', {'foot': 'baz'})

        # order of insertion is preserved
        expected_order = ['xml->json', 'frag1', 'frag2']
        for given, expected in zip(models.ArticleFragment.objects.all(),
                                   expected_order):
            self.assertEqual(given.type, expected)
示例#10
0
    def test_fragment_ordering_explicit(self):
        logic.add(self.av, 'xml->json', {'title': 'foo'},
                  update=True)  # implicit pos=1
        logic.add(self.msid, 'frag1', {'title': 'bar'},
                  pos=2)  # explicit pos=2
        logic.add(self.msid, 'frag2', {'title': 'baz'},
                  pos=1)  # explicit pos=1

        # order of insertion is preserved + explicit ordering
        expected_order = ['xml->json', 'frag2', 'frag1']
        for given, expected in zip(models.ArticleFragment.objects.all(),
                                   expected_order):
            self.assertEqual(given.type, expected)

        expected = {'title': 'bar'}
        self.assertEqual(expected, logic.merge(self.av))
示例#11
0
def _ingest(data, force=False):
    """ingests article-json. returns a triple of (journal obj, article obj, article version obj)
    unpublished article-version data can be ingested multiple times UNLESS that article version has been published.
    published article-version data can be ingested only if force=True"""

    data = copy.deepcopy(data) # we don't want to modify the given data

    create = update = True
    log_context = {}

    try:
        # this *could* be scraped from the provided data, but we have no time to
        # normalize journal names so we sometimes get duplicate journals in the db.
        # safer to disable until needed.
        journal = logic.journal()

        try:
            article_struct = render.render_item(ARTICLE, data['article'])
            article, created, updated = \
                create_or_update(models.Article, article_struct, ['manuscript_id', 'journal'], create, update, journal=journal)

            assert isinstance(article, models.Article)
            log_context['article'] = article

            previous_article_versions = None
            if updated:
                previous_article_versions = list(article.articleversion_set.all().order_by('version')) # earliest -> latest

            av_struct = render.render_item(ARTICLE_VERSION, data['article'])
            # this is an INGEST event and *not* a PUBLISH event. we don't touch the date published.
            del av_struct['datetime_published']

            av, created, updated = \
                create_or_update(models.ArticleVersion, av_struct, ['article', 'version'],
                                 create, update, commit=False, article=article)
        except KeyError as err:
            raise ValueError("failed to scrape article data, couldn't find key %s" % err)

        assert isinstance(av, models.ArticleVersion)
        log_context['article-version'] = av

        # only update the fragment if this article version has *not* been published *or* if force=True
        update_fragment = not av.published() or force
        merge_result = fragments.add(av, XML2JSON, data['article'], pos=0, update=update_fragment)
        fragments.merge_if_valid(av)
        invalid_ajson = not merge_result
        if invalid_ajson:
            LOG.warn("this article failed to merge it's fragments into a valid result and cannot be PUBLISHed in it's current state.", extra=log_context)

        # enforce business rules

        if created:
            if previous_article_versions:
                last_version = previous_article_versions[-1]
                log_context['previous-version'] = last_version

                if not last_version.published():
                    # uhoh. we're attempting to create an article version before previous version of that article has been published.
                    msg = "refusing to ingest new article version when previous article version is still unpublished."
                    LOG.error(msg, extra=log_context)
                    raise StateError(msg)

                if not last_version.version + 1 == av.version:
                    # uhoh. we're attempting to create an article version out of sequence
                    msg = "refusing to ingest new article version out of sequence."
                    log_context.update({
                        'given-version': av.version,
                        'expected-version': last_version.version + 1})
                    LOG.error(msg, extra=log_context)
                    raise StateError(msg)

            # no other versions of article exist
            else:
                if not av.version == 1:
                    # uhoh. we're attempting to create our first article version and it isn't a version 1
                    msg = "refusing to ingest new article version out of sequence. no other article versions exist so I expect a v1"
                    log_context.update({
                        'given-version': av.version,
                        'expected-version': 1})
                    LOG.error(msg, extra=log_context)
                    raise StateError(msg)

        elif updated:
            # this version of the article already exists
            # this is only a problem if the article version has already been published
            if av.published():
                # uhoh. we've received an INGEST event for a previously published article version
                if not force:
                    # unless our arm is being twisted, die.
                    msg = "refusing to ingest new article data on an already published article version."
                    LOG.error(msg, extra=log_context)
                    raise StateError(msg)

        # passed all checks, save
        av.save()

        # notify event bus that article change has occurred
        transaction.on_commit(partial(events.notify, article))

        return journal, article, av

    except KeyError as err:
        # *probably* an error while scraping ...
        raise StateError("failed to scrape given article data: %s" % err)

    except StateError:
        raise

    except Exception:
        LOG.exception("unhandled exception attempting to ingest article-json", extra=log_context)
        raise
示例#12
0
def _ingest(data, force=False) -> models.ArticleVersion:
    """ingests article-json. returns a triple of (journal obj, article obj, article version obj)
    unpublished article-version data can be ingested multiple times UNLESS that article version has been published.
    published article-version data can be ingested only if force=True"""

    create = update = True
    log_context = {}

    try:
        av, created, updated, previous_article_versions = _ingest_objects(
            data, create, update, force, log_context)

        # only update the fragment if this article version has *not* been published *or* if force=True
        update_fragment = not av.published() or force
        fragments.add(av,
                      XML2JSON,
                      data['article'],
                      pos=0,
                      update=update_fragment)
        # validation of article-json occurs here
        fragments.set_article_json(
            av, quiet=False if settings.VALIDATE_FAILS_FORCE else force)

        # update the relationships
        relationships.remove_relationships(av)
        relationships.relate_using_msid_list(av,
                                             data['article'].get(
                                                 '-related-articles-internal',
                                                 []),
                                             quiet=force)
        relationships.relate_using_citation_list(
            av, data['article'].get('-related-articles-external', []))

        # enforce business rules
        if created:
            if previous_article_versions:
                last_version = previous_article_versions[-1]
                log_context['previous-version'] = last_version

                if not last_version.published():
                    # uhoh. we're attempting to create an article version before previous version of that article has been published.
                    msg = "refusing to ingest new article version when previous article version is still unpublished."
                    LOG.error(msg, extra=log_context)
                    raise StateError(codes.PREVIOUS_VERSION_UNPUBLISHED, msg)

                if not last_version.version + 1 == av.version:
                    # uhoh. we're attempting to create an article version out of sequence
                    msg = "refusing to ingest new article version out of sequence."
                    log_context.update({
                        'given-version':
                        av.version,
                        'expected-version':
                        last_version.version + 1
                    })
                    LOG.error(msg, extra=log_context)
                    raise StateError(codes.PREVIOUS_VERSION_DNE, msg)

            # no other versions of article exist
            else:
                if not av.version == 1:
                    # uhoh. we're attempting to create our first article version and it isn't a version 1
                    msg = "refusing to ingest new article version out of sequence. no other article versions exist so I expect a v1"
                    log_context.update({
                        'given-version': av.version,
                        'expected-version': 1
                    })
                    LOG.error(msg, extra=log_context)
                    raise StateError(codes.PREVIOUS_VERSION_DNE, msg)

        elif updated:
            # this version of the article already exists
            # this is only a problem if the article version has already been published
            if av.published():
                # uhoh. we've received an INGEST event for a previously published article version
                if not force:
                    # unless our arm is being twisted, die.
                    msg = "refusing to ingest new article data on an already published article version."
                    LOG.error(msg, extra=log_context)
                    raise StateError(codes.ALREADY_PUBLISHED, msg)

        # passed all checks, save
        av.save()

        # notify event bus that article change has occurred
        transaction.on_commit(partial(aws_events.notify_all, av))

        return av

    except KeyError as err:
        # *probably* an error while scraping ...
        raise StateError(codes.PARSE_ERROR,
                         "failed to scrape given article data: %r" % err)

    except StateError:
        raise

    except ValidationError as err:
        raise StateError(codes.INVALID, "validation error: %s" % err.message,
                         err)

    except Exception:
        LOG.exception("unhandled exception attempting to ingest article-json",
                      extra=log_context)
        raise