def import_article(journal, article_data, create=True, update=False): if not article_data or not isinstance(article_data, dict): raise ValueError("given data to import is empty/invalid") expected_keys = ['doi', 'volume', 'path', 'article-type', 'manuscript_id'] # data wrangling try: kwargs = subdict(article_data, expected_keys) # JATS XML doesn't contain the manuscript ID. derive it from doi if 'manuscript_id' not in kwargs and 'doi' in kwargs: kwargs['manuscript_id'] = doi2msid(kwargs['doi']) elif 'doi' not in kwargs and 'manuscript_id' in kwargs: kwargs['doi'] = msid2doi(kwargs['manuscript_id']) context = {'article': kwargs['doi']} LOG.info("importing Article", extra=context) # post process data kwargs.update({ 'journal': journal, 'volume': int(kwargs['volume']), 'type': kwargs['article-type'], }) delall(kwargs, ['path', 'article-type']) except KeyError: raise ValueError("expected keys invalid/not present: %s" % ", ".join(expected_keys)) # attempt to insert article_key = subdict(kwargs, ['doi', 'version']) try: article_obj = models.Article.objects.get(**article_key) avobj = import_article_version(article_obj, article_data, create, update) LOG.info("Article exists, updating", extra=context) for key, val in kwargs.items(): setattr(article_obj, key, val) article_obj.save() return article_obj, avobj except models.Article.DoesNotExist: # we've been told not to create new articles. # this is now a legitimate exception if not create: raise article_obj = models.Article(**kwargs) article_obj.save() avobj = import_article_version(article_obj, article_data, create, update) LOG.info("created new Article %s" % article_obj) return article_obj, avobj
def import_article_version(article, article_data, create=True, update=False): expected_keys = ['title', 'version', 'update', 'pub-date', 'status'] kwargs = subdict(article_data, expected_keys) try: doi = article_data['doi'] version = int(kwargs['version']) version_date = kwargs.get('update') datetime_published = kwargs['pub-date'] context = {'article': doi, 'version': version} LOG.info("importing ArticleVersion", extra=context) if version_date and version == 1: # this is so common it's not even worth a debug #LOG.warn("inconsistency: a v1 has an 'update' date", extra=context) d1, d2 = striptz(version_date), striptz(datetime_published) if d1 != d2: c = {} c.update(context) c.update({'pub-date': datetime_published, 'update': version_date}) LOG.warn("double inconsistency: not only do we have an 'update' date for a v1, it doesn't match the date published", extra=c) # 'update' date occurred before publish date ... if d1 < d2: LOG.warn("triple inconsistency: not only do we have an 'update' date for a v1 that doesn't match the date published, it was actually updated *before* it was published", extra=c) if version == 1: version_date = datetime_published if not version_date and version > 1: LOG.warn("inconsistency: a version > 1 does not have an 'update' date", extra=context) if settings.FAIL_ON_NO_UPDATE_DATE: msg = "no 'update' date found for ArticleVersion" raise ValueError(msg) msg = "no 'update' date found for ArticleVersion, using None instead" LOG.warn(msg, extra=context) version_date = None # post process data kwargs.update({ 'article': article, 'version': version, 'datetime_published': todt(version_date), 'status': kwargs['status'].lower(), }) delall(kwargs, ['pub-date', 'update']) except KeyError: LOG.error("expected keys invalid/not present", extra={'expected_keys': expected_keys}) raise try: avobj = models.ArticleVersion.objects.get(article=article, version=kwargs['version']) if not update: msg = "Article with version does exists but update == False" LOG.warn(msg, extra=context) raise AssertionError(msg) LOG.debug("ArticleVersion found, updating") for key, val in kwargs.items(): setattr(avobj, key, val) avobj.save() LOG.info("updated existing ArticleVersion", extra=context) return avobj except models.ArticleVersion.DoesNotExist: if not create: msg = "ArticleVersion with version does not exist and create == False" LOG.warn(msg, extra=context) raise LOG.debug("ArticleVersion NOT found, creating", extra=context) avobj = models.ArticleVersion(**kwargs) avobj.save() LOG.info("created new ArticleVersion", extra=context) return avobj