def setUp(self): path = join(self.fixture_dir, 'ajson', "dummyelife-20105-v1.xml.json") ajson_ingestor.ingest_publish(json.load(open(path, 'r'))) self.msid = 20105 self.version = 1 self.av = models.ArticleVersion.objects.filter(article__manuscript_id=self.msid)[0] self.assertTrue(self.av.published()) self.assertTrue(fragments.merge_if_valid(self.av)) self.c = Client()
def setUp(self): path = join(self.fixture_dir, 'ajson', "elife-20105-v1.xml.json") ajson_ingestor.ingest_publish(json.load(open(path, 'r'))) self.msid = 20105 self.version = 1 self.av = models.ArticleVersion.objects.filter( article__manuscript_id=self.msid)[0] self.assertTrue(self.av.published()) self.assertTrue(fragments.merge_if_valid(self.av)) self.c = Client() self.ac = Client(**{ mware.CGROUPS: 'admin', })
def test_invalid_merge_deletes_article_json(self): fragment = models.ArticleFragment.objects.all()[0] # simulate a value that was once valid but no longer is fragment.fragment['title'] = '' fragment.save() # ensure fragment is now invalid. self.assertFalse(logic.merge_if_valid(self.av)) # article is still serving up invalid content :( self.assertTrue(self.av.article_json_v1) # ensure delete happens successfully self.assertFalse(logic.set_article_json(self.av, quiet=True)) # article is no longer serving up invalid content :) av = self.freshen(self.av) self.assertFalse(av.article_json_v1)
def test_delete_fragment_fails_if_result_is_invalid(self): "if the result of deleting a fragment is invalid article-json, the fragment will not be deleted" # modify the XML2JSON fragment so 'title' is None (invalid) # the test fragment {'title': 'whatever'} makes it valid # deleting the test fragment should fail fobj = models.ArticleFragment.objects.get(type=models.XML2JSON) fobj.fragment['title'] = None fobj.save() self.assertTrue(fragments.merge_if_valid( self.av)) # returns None if invalid url = reverse('v2:article-fragment', kwargs={ 'art_id': self.msid, 'fragment_id': self.key }) resp = self.ac.delete(url) self.assertEqual(resp.status_code, 400) expected_fragments = 2 # XML2JSON + 'test-frag' self.assertEqual(models.ArticleFragment.objects.count(), expected_fragments)
def _ingest(data, force=False): """ingests article-json. returns a triple of (journal obj, article obj, article version obj) unpublished article-version data can be ingested multiple times UNLESS that article version has been published. published article-version data can be ingested only if force=True""" data = copy.deepcopy(data) # we don't want to modify the given data create = update = True log_context = {} try: # this *could* be scraped from the provided data, but we have no time to # normalize journal names so we sometimes get duplicate journals in the db. # safer to disable until needed. journal = logic.journal() try: article_struct = render.render_item(ARTICLE, data['article']) article, created, updated = \ create_or_update(models.Article, article_struct, ['manuscript_id', 'journal'], create, update, journal=journal) assert isinstance(article, models.Article) log_context['article'] = article previous_article_versions = None if updated: previous_article_versions = list(article.articleversion_set.all().order_by('version')) # earliest -> latest av_struct = render.render_item(ARTICLE_VERSION, data['article']) # this is an INGEST event and *not* a PUBLISH event. we don't touch the date published. del av_struct['datetime_published'] av, created, updated = \ create_or_update(models.ArticleVersion, av_struct, ['article', 'version'], create, update, commit=False, article=article) except KeyError as err: raise ValueError("failed to scrape article data, couldn't find key %s" % err) assert isinstance(av, models.ArticleVersion) log_context['article-version'] = av # only update the fragment if this article version has *not* been published *or* if force=True update_fragment = not av.published() or force merge_result = fragments.add(av, XML2JSON, data['article'], pos=0, update=update_fragment) fragments.merge_if_valid(av) invalid_ajson = not merge_result if invalid_ajson: LOG.warn("this article failed to merge it's fragments into a valid result and cannot be PUBLISHed in it's current state.", extra=log_context) # enforce business rules if created: if previous_article_versions: last_version = previous_article_versions[-1] log_context['previous-version'] = last_version if not last_version.published(): # uhoh. we're attempting to create an article version before previous version of that article has been published. msg = "refusing to ingest new article version when previous article version is still unpublished." LOG.error(msg, extra=log_context) raise StateError(msg) if not last_version.version + 1 == av.version: # uhoh. we're attempting to create an article version out of sequence msg = "refusing to ingest new article version out of sequence." log_context.update({ 'given-version': av.version, 'expected-version': last_version.version + 1}) LOG.error(msg, extra=log_context) raise StateError(msg) # no other versions of article exist else: if not av.version == 1: # uhoh. we're attempting to create our first article version and it isn't a version 1 msg = "refusing to ingest new article version out of sequence. no other article versions exist so I expect a v1" log_context.update({ 'given-version': av.version, 'expected-version': 1}) LOG.error(msg, extra=log_context) raise StateError(msg) elif updated: # this version of the article already exists # this is only a problem if the article version has already been published if av.published(): # uhoh. we've received an INGEST event for a previously published article version if not force: # unless our arm is being twisted, die. msg = "refusing to ingest new article data on an already published article version." LOG.error(msg, extra=log_context) raise StateError(msg) # passed all checks, save av.save() # notify event bus that article change has occurred transaction.on_commit(partial(events.notify, article)) return journal, article, av except KeyError as err: # *probably* an error while scraping ... raise StateError("failed to scrape given article data: %s" % err) except StateError: raise except Exception: LOG.exception("unhandled exception attempting to ingest article-json", extra=log_context) raise
def _publish(msid, version, force=False): """attach a `datetime_published` value to an article version. if none provided, use RIGHT NOW. you cannot publish an already published article version unless force==True""" try: av = models.ArticleVersion.objects.get(article__manuscript_id=msid, version=version) if av.published(): if not force: raise StateError("refusing to publish an already published article version") # NOTE: we don't use any other article fragments for determining the publication date # except the xml->json fragment. raw_data = fragments.get(av, XML2JSON) # the json *will always* have a published date if v1 ... if version == 1: # pull that published date from the stored (but unpublished) article-json # and set the pub-date on the ArticleVersion object datetime_published = utils.todt(raw_data.get('published')) if not datetime_published: raise StateError("found 'published' value in article-json, but it's either null or unparsable as a datetime") else: # but *not* if it's > v1. in this case, we generate one. if av.published() and force: # this article version is already published and a force publish request has been sent if False and 'versionDate' in raw_data: # fail this case for now. # FUTURE CASE: when a 'versionDate' value is present in the article-json, use that. # as of 2016-10-21 version history IS NOT captured in the xml, # it won't be parsed by the bot-lax-adaptor and it # won't find it's way here. this is a future-case only. datetime_published = utils.todt(raw_data['versionDate']) if not datetime_published: raise StateError("found 'versionDate' value in article-json, but it's either null or unparseable as a datetime") else: # CURRENT CASE # preserve the existing pubdate set by lax. ignore anything given in the ajson. # if the pubdate for an article is to change, it must come from the xml (see above case) datetime_published = av.datetime_published else: # CURRENT CASE # this article version hasn't been published yet. use a value of RIGHT NOW as the published date. datetime_published = utils.utcnow() av.datetime_published = datetime_published av.save() # merge the fragments we have available and make them available for serving # allow errors when the publish operation is being forced fragments.merge_if_valid(av, quiet=force) # notify event bus that article change has occurred transaction.on_commit(partial(events.notify, av.article)) return av except ValidationError: raise StateError("refusing to publish an article '%sv%s' with invalid article-json" % (msid, version)) except models.ArticleFragment.DoesNotExist: raise StateError("no 'xml->json' fragment found. being strict and failing this publish. please INGEST!") except models.ArticleVersion.DoesNotExist: # attempted to publish an article that doesn't exist ... raise StateError("refusing to publish an article '%sv%s' that doesn't exist" % (msid, version))