Пример #1
0
    def test_article_import_update_of_many_versions(self):
        "three versions of the same article can be ingested with expected results"
        path = join(self.fixture_dir, "ppp-09066")
        v1 = join(path, "elife-09066-v1.json")
        v2 = join(path, "elife-09066-v2.json")
        v3 = join(path, "elife-09066-v3.json")

        ingestor.import_article_from_json_path(self.journal, v1)
        ingestor.import_article_from_json_path(self.journal, v2)
        ingestor.import_article_from_json_path(self.journal, v3)

        self.assertEqual(models.Article.objects.count(), 1)
        self.assertEqual(models.ArticleVersion.objects.count(), 3)

        v1obj = models.ArticleVersion.objects.get(version=1) # POA
        v2obj = models.ArticleVersion.objects.get(version=2) # POA
        v3obj = models.ArticleVersion.objects.get(version=3) # VOR

        self.assertEqual(v1obj.datetime_published, utils.todt("2015-12-19T00:00:00Z"))
        self.assertEqual(v2obj.datetime_published, utils.todt("2015-12-23T00:00:00Z"))
        self.assertEqual(v3obj.datetime_published, utils.todt("2016-02-04T00:00:00Z"))

        # all three objects should share the same article and the article's date_published should be the
        # date of the earliest Article Version
        self.assertEqual(v1obj.datetime_published, v1obj.article.datetime_published)
        self.assertEqual(v1obj.datetime_published, v2obj.article.datetime_published)
        self.assertEqual(v1obj.datetime_published, v3obj.article.datetime_published)
Пример #2
0
    def add_or_update_article(self, **adata):
        "creates article+article-version stubs for testing"
        replacements = [
            ('pub-date', 'published'),
            ('update', 'versionDate'),
        ]
        renkeys(adata, replacements)

        struct = {
            'id':
            utils.doi2msid(adata['doi'])
            if 'doi' in adata else adata['manuscript_id'],
            'volume':
            1,
            'type':
            'research-article',
            'title':
            '[default]',
            'version':
            1,
            'status':
            models.VOR,
            'published':
            '2012-01-01T00:00:00Z'
        }
        struct.update(adata)
        delall(struct, ['journal'])  # can't be serialized, not utilised anyway

        with self.settings(VALIDATE_FAILS_FORCE=False):
            # bad ajson won't fail ingest
            av = ajson_ingestor.ingest_publish({'article': struct}, force=True)
            av.datetime_published = utils.todt(struct['published'])
            av.save()
            return av
Пример #3
0
    def test_article_publish_v1(self):
        "an unpublished v1 article can be successfully published"
        av = ajson_ingestor.ingest(self.ajson)
        self.assertEqual(models.Journal.objects.count(), 1)
        self.assertEqual(models.Article.objects.count(), 1)
        self.assertEqual(models.ArticleVersion.objects.count(), 1)
        self.assertFalse(av.published())

        # publish
        av = ajson_ingestor.publish(self.msid, self.version)

        # aaand just make sure we still have the expected number of objects
        self.assertEqual(models.Journal.objects.count(), 1)
        self.assertEqual(models.Article.objects.count(), 1)
        self.assertEqual(models.ArticleVersion.objects.count(), 1)

        self.assertTrue(av.published())
        self.assertTrue(isinstance(av.datetime_published, datetime))

        # the pubdate of an unpublished v1 article is the same as that found in the
        # given json.
        av = self.freshen(av)
        expected_pubdate = utils.ymd(
            utils.todt(self.ajson['article']['published']))
        self.assertEqual(expected_pubdate, utils.ymd(av.datetime_published))
Пример #4
0
    def test_article_publish_succeeds_for_published_article_if_forced(self):
        "publication of an already published article can occur only if forced"
        _, _, av = ajson_ingestor.ingest(self.ajson)
        av = ajson_ingestor.publish(self.msid, self.version)
        av = self.freshen(av)
        expected_pubdate = utils.ymd(utils.todt(self.ajson['article']['published']))
        self.assertEqual(expected_pubdate, utils.ymd(av.datetime_published))

        # publish again, no changes to pubdate expected
        av = ajson_ingestor.publish(self.msid, self.version, force=True)
        av = self.freshen(av)
        self.assertEqual(expected_pubdate, utils.ymd(av.datetime_published))

        # ingest new pubdate, force publication
        new_pubdate = utils.todt('2016-01-01')
        self.ajson['article']['published'] = new_pubdate
        ajson_ingestor.ingest_publish(self.ajson, force=True)
        av = self.freshen(av)
        self.assertEqual(utils.ymd(new_pubdate), utils.ymd(av.datetime_published))
Пример #5
0
 def test_article_version_data(self):
     art, ver = ingestor.import_article_from_json_path(self.journal, self.json_fixture)
     expected_data = {
         'article': art,
         'datetime_published': utils.todt('2012-12-10'),
         'status': 'poa',
         'version': 1,
     }
     avobj = models.ArticleVersion.objects.get(article=art, version=1)
     for attr, expected in expected_data.items():
         self.assertEqual(getattr(avobj, attr), expected)
Пример #6
0
    def test_article_updated(self):
        "an article is successfully updated when update=True"
        self.assertEqual(0, models.Article.objects.count())
        art, ver = ingestor.import_article_from_json_path(self.journal, self.json_fixture)
        for attr, expected in [['title', "A meh life"],
                               ['status', "poa"],
                               ['version', 1],
                               ["datetime_published", utils.todt("2012-12-10")]]:
            self.assertEqual(getattr(ver, attr), expected)

        self.assertEqual(1, models.Article.objects.count())

        # attempt the update

        art, ver = ingestor.import_article_from_json_path(self.journal, self.update_fixture, update=True)
        for attr, expected in [['title', "A good life"],
                               ['status', "vor"],
                               ["datetime_published", utils.todt("2012-12-13")]]:
            self.assertEqual(getattr(ver, attr), expected)
        self.assertEqual(1, models.Article.objects.count())
Пример #7
0
    def test_todt(self):
        cases = [
            # naive dtstr becomes utc
            ("2001-01-01", \
             datetime(year=2001, month=1, day=1, tzinfo=pytz.utc)),

            # aware but non-utc become utc
            ("2001-01-01T23:30:30+09:30", \
             datetime(year=2001, month=1, day=1, hour=14, minute=0, second=30, tzinfo=pytz.utc)),
        ]
        for string, expected in cases:
            self.assertEqual(utils.todt(string), expected)
Пример #8
0
    def test_article_publish_succeeds_for_published_article_if_forced(self):
        "publication of an already published article can occur only if forced"
        av = ajson_ingestor.ingest(self.ajson)
        av = ajson_ingestor.publish(self.msid, self.version)
        av = self.freshen(av)
        expected_pubdate = utils.ymd(
            utils.todt(self.ajson['article']['published']))
        self.assertEqual(expected_pubdate, utils.ymd(av.datetime_published))

        # publish again, no changes to pubdate expected
        av = ajson_ingestor.publish(self.msid, self.version, force=True)
        av = self.freshen(av)
        self.assertEqual(expected_pubdate, utils.ymd(av.datetime_published))

        # ingest new pubdate, force publication
        new_pubdate = utils.todt('2016-01-01')
        self.ajson['article']['published'] = new_pubdate
        ajson_ingestor.ingest_publish(self.ajson, force=True)
        av = self.freshen(av)
        self.assertEqual(utils.ymd(new_pubdate),
                         utils.ymd(av.datetime_published))
Пример #9
0
 def test_ingest_from_cli(self):
     "ingest script requires the --ingest flag and a source of data"
     args = [self.nom, '--ingest', '--id', self.msid, '--version', self.version, self.ajson_fixture1]
     errcode, stdout = self.call_command(*args)
     self.assertEqual(errcode, 0)
     # article has been ingested
     self.assertEqual(models.ArticleVersion.objects.count(), 1)
     # message returned is json encoded with all the right keys and values
     result = json.loads(stdout.getvalue())
     self.assertTrue(utils.has_all_keys(result, ['status', 'id', 'datetime']))
     self.assertEqual(result['status'], 'ingested')
     # the date and time is roughly the same as right now, ignoring microseconds
     expected_datetime = utils.utcnow()
     actual_datetime = utils.todt(result['datetime'])
     delta = expected_datetime - actual_datetime
     threshold = 2 # seconds
     self.assertTrue(delta.seconds <= threshold)
Пример #10
0
    def setUp(self):
        self.journal = publogic.journal()
        import_all = [
            '00353.1',  # discussion, VOR
            '00385.1',  # commentary, VOR
            '01328.1',  # correction, VOR
            '02619.1',  # editorial, VOR
            '03401.1',  # research, POA
            '03401.2',  # POA
            '03401.3',  # VOR
            '03665.1',  # research, VOR
            '06250.1',  # research, POA
            '06250.2',  # POA
            '06250.3',  # VOR
            '07301.1',  # research, VOR
            '08025.1',  # research, POA
            '08025.2',  # VOR
            '09571.1',  # research, POA
        ]
        for subdir in import_all:
            fname = subdir.replace('.', '-v')
            fname = "elife-%s.xml.json" % fname
            path = join(self.fixture_dir, 'ppp2', fname)
            ajson_ingestor.ingest_publish(
                self.load_ajson(path))  # strip relations

        # we need to coerce the data of the non-v1 articles a little
        # as we removed the eif ingestor that bypassed business logic
        cases = [
            # vor
            (3401, 3, "2014-08-01"),
            (8025, 2, "2015-06-16"),
        ]
        for msid, ver, dtstr in cases:
            av = models.ArticleVersion.objects.get(article__manuscript_id=msid,
                                                   version=ver)
            av.datetime_published = utils.todt(dtstr)
            av.save()

        self.vor_version_count = 9
        self.poa_version_count = 6

        self.poa_art_count = 1
        self.vor_art_count = 9
Пример #11
0
 def test_ingest_from_cli(self):
     "ingest script requires the --ingest flag and a source of data"
     args = [
         self.nom, '--ingest', '--id', self.msid, '--version', self.version,
         self.ajson_fixture1
     ]
     errcode, stdout = self.call_command(*args)
     self.assertEqual(errcode, 0)
     # article has been ingested
     self.assertEqual(models.ArticleVersion.objects.count(), 1)
     # message returned is json encoded with all the right keys and values
     result = json.loads(stdout)
     self.assertTrue(
         utils.has_all_keys(result, ['status', 'id', 'datetime']))
     self.assertEqual(result['status'], 'ingested')
     # the date and time is roughly the same as right now, ignoring microseconds
     expected_datetime = utils.utcnow()
     actual_datetime = utils.todt(result['datetime'])
     delta = expected_datetime - actual_datetime
     threshold = 2  # seconds
     self.assertTrue(delta.seconds <= threshold)
Пример #12
0
    def test_article_publish_v1(self):
        "an unpublished v1 article can be successfully published"
        _, _, av = ajson_ingestor.ingest(self.ajson)
        self.assertEqual(models.Journal.objects.count(), 1)
        self.assertEqual(models.Article.objects.count(), 1)
        self.assertEqual(models.ArticleVersion.objects.count(), 1)
        self.assertFalse(av.published())

        # publish
        av = ajson_ingestor.publish(self.msid, self.version)

        # aaand just make sure we still have the expected number of objects
        self.assertEqual(models.Journal.objects.count(), 1)
        self.assertEqual(models.Article.objects.count(), 1)
        self.assertEqual(models.ArticleVersion.objects.count(), 1)

        self.assertTrue(av.published())
        self.assertTrue(isinstance(av.datetime_published, datetime))

        # the pubdate of an unpublished v1 article is the same as that found in the
        # given json.
        av = self.freshen(av)
        expected_pubdate = utils.ymd(utils.todt(self.ajson['article']['published']))
        self.assertEqual(expected_pubdate, utils.ymd(av.datetime_published))
Пример #13
0
def _publish(msid, version, force=False):
    """attach a `datetime_published` value to an article version. if none provided, use RIGHT NOW.
    you cannot publish an already published article version unless force==True"""
    try:
        av = models.ArticleVersion.objects.get(article__manuscript_id=msid, version=version)
        if av.published():
            if not force:
                raise StateError("refusing to publish an already published article version")

        # NOTE: we don't use any other article fragments for determining the publication date

        # except the xml->json fragment.
        raw_data = fragments.get(av, XML2JSON)

        # the json *will always* have a published date if v1 ...
        if version == 1:
            # pull that published date from the stored (but unpublished) article-json
            # and set the pub-date on the ArticleVersion object
            datetime_published = utils.todt(raw_data.get('published'))
            if not datetime_published:
                raise StateError("found 'published' value in article-json, but it's either null or unparsable as a datetime")

        else:
            # but *not* if it's > v1. in this case, we generate one.
            if av.published() and force:
                # this article version is already published and a force publish request has been sent
                if False and 'versionDate' in raw_data: # fail this case for now.
                    # FUTURE CASE: when a 'versionDate' value is present in the article-json, use that.
                    # as of 2016-10-21 version history IS NOT captured in the xml,
                    # it won't be parsed by the bot-lax-adaptor and it
                    # won't find it's way here. this is a future-case only.
                    datetime_published = utils.todt(raw_data['versionDate'])
                    if not datetime_published:
                        raise StateError("found 'versionDate' value in article-json, but it's either null or unparseable as a datetime")
                else:
                    # CURRENT CASE
                    # preserve the existing pubdate set by lax. ignore anything given in the ajson.
                    # if the pubdate for an article is to change, it must come from the xml (see above case)
                    datetime_published = av.datetime_published
            else:
                # CURRENT CASE
                # this article version hasn't been published yet. use a value of RIGHT NOW as the published date.
                datetime_published = utils.utcnow()

        av.datetime_published = datetime_published
        av.save()

        # merge the fragments we have available and make them available for serving
        # allow errors when the publish operation is being forced
        fragments.merge_if_valid(av, quiet=force)

        # notify event bus that article change has occurred
        transaction.on_commit(partial(events.notify, av.article))

        return av

    except ValidationError:
        raise StateError("refusing to publish an article '%sv%s' with invalid article-json" % (msid, version))

    except models.ArticleFragment.DoesNotExist:
        raise StateError("no 'xml->json' fragment found. being strict and failing this publish. please INGEST!")

    except models.ArticleVersion.DoesNotExist:
        # attempted to publish an article that doesn't exist ...
        raise StateError("refusing to publish an article '%sv%s' that doesn't exist" % (msid, version))
Пример #14
0
def _publish(msid, version, force=False) -> models.ArticleVersion:
    """attach a `datetime_published` value to an article version. if none provided, use RIGHT NOW.
    you cannot publish an already published article version unless force==True"""
    try:
        av = models.ArticleVersion.objects.get(article__manuscript_id=msid,
                                               version=version)
        if av.published():
            if not force:
                raise StateError(
                    codes.ALREADY_PUBLISHED,
                    "refusing to publish an already published article version")

        # NOTE: we don't use any other article fragments for determining the publication date
        # except the xml->json fragment.
        raw_data = fragments.get(av, XML2JSON).fragment

        # the json *will always* have a published date if v1 ...
        if version == 1:
            # pull that published date from the stored (but unpublished) article-json
            # and set the pub-date on the ArticleVersion object
            datetime_published = utils.todt(raw_data.get('published'))
            if not datetime_published:
                raise StateError(
                    codes.PARSE_ERROR,
                    "found 'published' value in article-json, but it's either null or unparsable as a date+time"
                )

        else:
            # but *not* if it's > v1. in this case, we generate one.
            if av.published() and force:
                # this article version is already published and a force publish request has been sent
                if False and 'versionDate' in raw_data:  # fail this case for now.
                    # FUTURE CASE: when a 'versionDate' value is present in the article-json, use that.
                    # as of 2016-10-21 version history IS NOT captured in the xml,
                    # it won't be parsed by the bot-lax-adaptor and it
                    # won't find it's way here. this is a future-case only.
                    datetime_published = utils.todt(raw_data['versionDate'])
                    if not datetime_published:
                        raise StateError(
                            codes.PARSE_ERROR,
                            "found 'versionDate' value in article-json, but it's either null or unparseable as a datetime"
                        )
                else:
                    # CURRENT CASE
                    # preserve the existing pubdate set by lax. ignore anything given in the ajson.
                    # if the pubdate for an article is to change, it must come from the xml (see above case)
                    datetime_published = av.datetime_published
            else:
                # CURRENT CASE
                # this article version hasn't been published yet. use a value of RIGHT NOW as the published date.
                datetime_published = utils.utcnow()

        av.datetime_published = datetime_published
        av.save()

        events.ajson_publish_events(av, force)

        # merge the fragments we have available and make them available for serving.
        # allow errors when the publish operation is being forced.
        fragments.set_article_json(
            av, quiet=False if settings.VALIDATE_FAILS_FORCE else force)

        # notify event bus that article change has occurred
        transaction.on_commit(partial(aws_events.notify_all, av))

        return av

    except ValidationError as err:
        # the problem isn't that the ajson is invalid, it's that we've allowed invalid ajson into the system
        raise StateError(
            codes.INVALID,
            "refusing to publish an article '%sv%s' with invalid article-json: %s"
            % (msid, version, err), err)

    except models.ArticleFragment.DoesNotExist:
        raise StateError(
            codes.NO_RECORD,
            "no 'xml->json' fragment found. being strict and failing this publish. please INGEST!"
        )

    except models.ArticleVersion.DoesNotExist:
        # attempted to publish an article that doesn't exist ...
        raise StateError(
            codes.NO_RECORD,
            "refusing to publish an article '%sv%s' that doesn't exist" %
            (msid, version))