def test_delete_fragment(self): self.assertEqual(models.ArticleFragment.objects.count(), 1) fragment = {'title': 'pants. party'} logic.add(self.msid, 'foo', fragment) self.assertEqual(models.ArticleFragment.objects.count(), 2) logic.rm(self.msid, 'foo') self.assertEqual(models.ArticleFragment.objects.count(), 1)
def test_update_fragment(self): "a fragment of article data can be updated by adding it again with different content" # ensure we have something that resembles the ingest data self.assertEqual(models.ArticleFragment.objects.count(), 1) frag = logic.get(self.av, 'xml->json').fragment self.assertTrue('title' in frag) # now update it with some garbage data = {'title': 'pants-party'} logic.add(self.av, 'xml->json', data, pos=0, update=True) # ensure we've just destroyed our very important data frag = logic.get(self.av, 'xml->json').fragment self.assertEqual(frag, data)
def test_valid_merge_updates_article_version_fields(self): "when a fragment is added, if the merge results in valid article-json, the results of the merge are stored" # setUp inserts article snippet that should be valid av = self.freshen(self.av) # TODO: remove this once xml validates # layer in enough to make it validate placeholders = { 'statusDate': '2001-01-01T00:00:00Z', } logic.add(self.msid, 'foo', placeholders) self.assertTrue(logic.set_article_json(self.av, quiet=True)) av = self.freshen(self.av) self.assertTrue(av.article_json_v1) self.assertTrue(av.article_json_v1_snippet)
def test_add_fragment(self): "a fragment of article data can be recorded against an Article" # `setUp` creates a fragment by ingesting article self.assertEqual(models.ArticleFragment.objects.count(), 1) fragment = {'title': 'pants. party'} fragobj, created, updated = logic.add(self.msid, 'foo', fragment) self.assertEqual(models.ArticleFragment.objects.count(), 2) self.assertEqual(fragment, fragobj.fragment)
def setUp(self): # unauthenticated self.c = Client() # authenticated self.ac = Client(**{ mware.CGROUPS: 'admin', }) self.msid = 16695 self.ajson_fixture_v1 = join(self.fixture_dir, 'ajson', 'elife-16695-v1.xml.json') # poa self.av = ajson_ingestor.ingest_publish( json.load(open(self.ajson_fixture_v1, 'r'))) self.key = 'test-frag' fragment = {'title': 'Electrostatic selection'} fragments.add( self.av.article, self.key, fragment) # add it to the *article* not the article *version*
def test_merge_fragments(self): logic.add(self.av, 'xml->json', {'title': 'foo'}, update=True) logic.add(self.msid, 'frag1', {'body': 'bar'}) logic.add(self.msid, 'frag2', {'foot': 'baz'}) expected = {'title': 'foo', 'body': 'bar', 'foot': 'baz'} self.assertEqual(expected, logic.merge(self.av))
def test_delete_fragment_sends_aws_event(self): "sucessfully deleting a fragment sends an aws event" self.key = 'test-frag' fragment = {'title': 'Electrostatic selection'} fragments.add( self.av.article, self.key, fragment) # add it to the *article* not the article *version* mock = Mock() with patch('publisher.aws_events.event_bus_conn', return_value=mock): url = reverse('v2:article-fragment', kwargs={ 'art_id': self.msid, 'fragment_id': self.key }) resp = self.ac.delete(url, json.dumps(fragment)) self.assertEqual(resp.status_code, 200) # successfully deleted # https://docs.djangoproject.com/en/1.10/topics/db/transactions/#use-in-tests expected_event = json.dumps({"type": "article", "id": self.msid}) mock.publish.assert_called_once_with(Message=expected_event)
def test_merge_overwrite_fragments(self): logic.add(self.av, 'xml->json', {'title': 'foo'}, update=True) # destroys original article json logic.add(self.msid, 'frag1', {'title': 'bar'}) logic.add(self.msid, 'frag2', {'title': 'baz'}) expected = {'title': 'baz'} self.assertEqual(expected, logic.merge(self.av))
def test_fragment_ordering(self): logic.add(self.av, 'xml->json', {'title': 'foo'}, update=True) logic.add(self.msid, 'frag1', {'body': 'bar'}) logic.add(self.msid, 'frag2', {'foot': 'baz'}) # order of insertion is preserved expected_order = ['xml->json', 'frag1', 'frag2'] for given, expected in zip(models.ArticleFragment.objects.all(), expected_order): self.assertEqual(given.type, expected)
def test_fragment_ordering_explicit(self): logic.add(self.av, 'xml->json', {'title': 'foo'}, update=True) # implicit pos=1 logic.add(self.msid, 'frag1', {'title': 'bar'}, pos=2) # explicit pos=2 logic.add(self.msid, 'frag2', {'title': 'baz'}, pos=1) # explicit pos=1 # order of insertion is preserved + explicit ordering expected_order = ['xml->json', 'frag2', 'frag1'] for given, expected in zip(models.ArticleFragment.objects.all(), expected_order): self.assertEqual(given.type, expected) expected = {'title': 'bar'} self.assertEqual(expected, logic.merge(self.av))
def _ingest(data, force=False): """ingests article-json. returns a triple of (journal obj, article obj, article version obj) unpublished article-version data can be ingested multiple times UNLESS that article version has been published. published article-version data can be ingested only if force=True""" data = copy.deepcopy(data) # we don't want to modify the given data create = update = True log_context = {} try: # this *could* be scraped from the provided data, but we have no time to # normalize journal names so we sometimes get duplicate journals in the db. # safer to disable until needed. journal = logic.journal() try: article_struct = render.render_item(ARTICLE, data['article']) article, created, updated = \ create_or_update(models.Article, article_struct, ['manuscript_id', 'journal'], create, update, journal=journal) assert isinstance(article, models.Article) log_context['article'] = article previous_article_versions = None if updated: previous_article_versions = list(article.articleversion_set.all().order_by('version')) # earliest -> latest av_struct = render.render_item(ARTICLE_VERSION, data['article']) # this is an INGEST event and *not* a PUBLISH event. we don't touch the date published. del av_struct['datetime_published'] av, created, updated = \ create_or_update(models.ArticleVersion, av_struct, ['article', 'version'], create, update, commit=False, article=article) except KeyError as err: raise ValueError("failed to scrape article data, couldn't find key %s" % err) assert isinstance(av, models.ArticleVersion) log_context['article-version'] = av # only update the fragment if this article version has *not* been published *or* if force=True update_fragment = not av.published() or force merge_result = fragments.add(av, XML2JSON, data['article'], pos=0, update=update_fragment) fragments.merge_if_valid(av) invalid_ajson = not merge_result if invalid_ajson: LOG.warn("this article failed to merge it's fragments into a valid result and cannot be PUBLISHed in it's current state.", extra=log_context) # enforce business rules if created: if previous_article_versions: last_version = previous_article_versions[-1] log_context['previous-version'] = last_version if not last_version.published(): # uhoh. we're attempting to create an article version before previous version of that article has been published. msg = "refusing to ingest new article version when previous article version is still unpublished." LOG.error(msg, extra=log_context) raise StateError(msg) if not last_version.version + 1 == av.version: # uhoh. we're attempting to create an article version out of sequence msg = "refusing to ingest new article version out of sequence." log_context.update({ 'given-version': av.version, 'expected-version': last_version.version + 1}) LOG.error(msg, extra=log_context) raise StateError(msg) # no other versions of article exist else: if not av.version == 1: # uhoh. we're attempting to create our first article version and it isn't a version 1 msg = "refusing to ingest new article version out of sequence. no other article versions exist so I expect a v1" log_context.update({ 'given-version': av.version, 'expected-version': 1}) LOG.error(msg, extra=log_context) raise StateError(msg) elif updated: # this version of the article already exists # this is only a problem if the article version has already been published if av.published(): # uhoh. we've received an INGEST event for a previously published article version if not force: # unless our arm is being twisted, die. msg = "refusing to ingest new article data on an already published article version." LOG.error(msg, extra=log_context) raise StateError(msg) # passed all checks, save av.save() # notify event bus that article change has occurred transaction.on_commit(partial(events.notify, article)) return journal, article, av except KeyError as err: # *probably* an error while scraping ... raise StateError("failed to scrape given article data: %s" % err) except StateError: raise except Exception: LOG.exception("unhandled exception attempting to ingest article-json", extra=log_context) raise
def _ingest(data, force=False) -> models.ArticleVersion: """ingests article-json. returns a triple of (journal obj, article obj, article version obj) unpublished article-version data can be ingested multiple times UNLESS that article version has been published. published article-version data can be ingested only if force=True""" create = update = True log_context = {} try: av, created, updated, previous_article_versions = _ingest_objects( data, create, update, force, log_context) # only update the fragment if this article version has *not* been published *or* if force=True update_fragment = not av.published() or force fragments.add(av, XML2JSON, data['article'], pos=0, update=update_fragment) # validation of article-json occurs here fragments.set_article_json( av, quiet=False if settings.VALIDATE_FAILS_FORCE else force) # update the relationships relationships.remove_relationships(av) relationships.relate_using_msid_list(av, data['article'].get( '-related-articles-internal', []), quiet=force) relationships.relate_using_citation_list( av, data['article'].get('-related-articles-external', [])) # enforce business rules if created: if previous_article_versions: last_version = previous_article_versions[-1] log_context['previous-version'] = last_version if not last_version.published(): # uhoh. we're attempting to create an article version before previous version of that article has been published. msg = "refusing to ingest new article version when previous article version is still unpublished." LOG.error(msg, extra=log_context) raise StateError(codes.PREVIOUS_VERSION_UNPUBLISHED, msg) if not last_version.version + 1 == av.version: # uhoh. we're attempting to create an article version out of sequence msg = "refusing to ingest new article version out of sequence." log_context.update({ 'given-version': av.version, 'expected-version': last_version.version + 1 }) LOG.error(msg, extra=log_context) raise StateError(codes.PREVIOUS_VERSION_DNE, msg) # no other versions of article exist else: if not av.version == 1: # uhoh. we're attempting to create our first article version and it isn't a version 1 msg = "refusing to ingest new article version out of sequence. no other article versions exist so I expect a v1" log_context.update({ 'given-version': av.version, 'expected-version': 1 }) LOG.error(msg, extra=log_context) raise StateError(codes.PREVIOUS_VERSION_DNE, msg) elif updated: # this version of the article already exists # this is only a problem if the article version has already been published if av.published(): # uhoh. we've received an INGEST event for a previously published article version if not force: # unless our arm is being twisted, die. msg = "refusing to ingest new article data on an already published article version." LOG.error(msg, extra=log_context) raise StateError(codes.ALREADY_PUBLISHED, msg) # passed all checks, save av.save() # notify event bus that article change has occurred transaction.on_commit(partial(aws_events.notify_all, av)) return av except KeyError as err: # *probably* an error while scraping ... raise StateError(codes.PARSE_ERROR, "failed to scrape given article data: %r" % err) except StateError: raise except ValidationError as err: raise StateError(codes.INVALID, "validation error: %s" % err.message, err) except Exception: LOG.exception("unhandled exception attempting to ingest article-json", extra=log_context) raise