def _validate_record(obj, eng): try: validate(obj.data, schema) except ValidationError: obj.extra_data['validation_errors'] = \ get_validation_errors(obj.data, schema) obj.extra_data['callback_url'] = \ get_resolve_validation_callback_url() obj.save() db.session.commit() reraise(*sys.exc_info())
def test_results_from_jats(): """Get and validate results from mocking a JATS response.""" from scrapy.http import XmlResponse spider = aps_spider.APSSpider() fake_response = fake_response_from_file( 'aps/PhysRevD.96.095036.xml', response_type=XmlResponse, ) record = spider._parse_jats(fake_response).record assert validate(record, 'hep') == None
def validate_record(obj, eng): """ Validate record based on its schema. If there is no schema or the record is invalid, the workflow will be halted. """ if '$schema' not in obj.data: __halt_and_notify('No schema found!', eng) return schema_data = requests_retry_session().get(obj.data['$schema']).content schema_data = json.loads(schema_data) try: validate(obj.data, schema_data) except ValidationError as err: __halt_and_notify('Invalid record: %s' % err, eng) except SchemaError as err: __halt_and_notify('SchemaError during record validation! %s' % err, eng)
def test_convert_new_publication_info_to_old_handles_renamed_journals(): schema = utils.load_schema('hep') subschema = schema['properties']['publication_info'] publication_info = [{ 'artid': '525', 'journal_title': 'Nucl.Phys.B Proc.Suppl.', 'journal_volume': '118', 'page_start': '525', }] assert utils.validate(publication_info, subschema) is None expected = [{ 'artid': '525', 'journal_title': 'Nucl.Phys.Proc.Suppl.', 'journal_volume': '118', 'page_start': '525', }] result = utils.convert_new_publication_info_to_old(publication_info) assert utils.validate(result, subschema) is None assert expected == result
def test_convert_new_publication_info_to_old(): schema = utils.load_schema('hep') subschema = schema['properties']['publication_info'] publication_info = [ { 'journal_title': 'Phys.Rev.C', 'journal_volume': '48', }, ] assert utils.validate(publication_info, subschema) is None expected = [ { 'journal_title': 'Phys.Rev.', 'journal_volume': 'C48', }, ] result = utils.convert_new_publication_info_to_old(publication_info) assert utils.validate(result, subschema) is None assert expected == result
def test_convert_new_publication_info_to_old_handles_journals_with_already_a_letter(): schema = utils.load_schema('hep') subschema = schema['properties']['publication_info'] publication_info = [ { 'journal_title': 'Kumamoto J.Sci.Ser.A', 'journal_volume': '13', }, ] assert utils.validate(publication_info, subschema) is None expected = [ { 'journal_title': 'Kumamoto J.Sci.Ser.A', 'journal_volume': '13', }, ] result = utils.convert_new_publication_info_to_old(publication_info) assert utils.validate(result, subschema) is None assert expected == result
def test_convert_old_publication_info_to_new_does_not_double_letters(): schema = utils.load_schema('hep') subschema = schema['properties']['publication_info'] publication_info = [ { 'journal_title': 'Proc.Roy.Soc.Lond.A', 'journal_volume': 'A120', }, ] assert utils.validate(publication_info, subschema) is None expected = [ { 'journal_title': 'Proc.Roy.Soc.Lond.A', 'journal_volume': '120', }, ] result = utils.convert_old_publication_info_to_new(publication_info) assert utils.validate(result, subschema) is None assert expected == result
def test_add_license_doesnt_overwrite_name_if_no_url(): schema = load_schema('hep') subschema = schema['properties']['license'] builder = LiteratureBuilder() builder.add_license(license='foo') result = builder.record['license'] expected = [{ 'license': 'foo', }] assert validate(result, subschema) is None assert expected == result
def test_add_doi_normalizes_doi(): schema = load_schema('hep') subschema = schema['properties']['dois'] builder = LiteratureBuilder() builder.add_doi('doi.org/10.1234/foo') result = builder.record['dois'] expected = [{ 'value': '10.1234/foo', }] assert validate(result, subschema) is None assert expected == result
def test_convert_new_publication_info_to_old_handles_volumes_with_letters_in_the_middle(): schema = utils.load_schema('hep') subschema = schema['properties']['publication_info'] publication_info = [ { 'journal_title': 'Eur.Phys.J.A', 'journal_volume': '28S1', }, ] assert utils.validate(publication_info, subschema) is None expected = [ { 'journal_title': 'Eur.Phys.J.', 'journal_volume': 'A28S1', }, ] result = utils.convert_new_publication_info_to_old(publication_info) assert utils.validate(result, subschema) is None assert expected == result
def test_convert_old_publication_info_to_new_handles_hidden_without_volume_variations(): schema = utils.load_schema('hep') subschema = schema['properties']['publication_info'] publication_info = [ { 'artid': 'R10587', 'journal_record': { '$ref': 'http://localhost:5000/api/journals/1214516', }, 'journal_title': 'Phys.Rev.', 'journal_volume': 'B61', }, { 'artid': '10587', 'hidden': True, 'journal_title': 'Phys.Rev.', 'journal_volume': 'B61', }, ] assert utils.validate(publication_info, subschema) is None expected = [ { 'artid': 'R10587', 'journal_title': 'Phys.Rev.B', 'journal_volume': '61', }, { 'artid': '10587', 'hidden': True, 'journal_title': 'Phys.Rev.B', 'journal_volume': '61', }, ] result = utils.convert_old_publication_info_to_new(publication_info) assert utils.validate(result, subschema) is None assert expected == result
def test_convert_new_publication_info_to_old_handles_the_letter_in_proc_roy_soc_lond( ): schema = utils.load_schema('hep') subschema = schema['properties']['publication_info'] publication_info = [ { 'journal_title': 'Proc.Roy.Soc.Lond.A', 'journal_volume': '110', }, ] assert utils.validate(publication_info, subschema) is None expected = [ { 'journal_title': 'Proc.Roy.Soc.Lond.', 'journal_volume': 'A110', }, ] result = utils.convert_new_publication_info_to_old(publication_info) assert utils.validate(result, subschema) is None assert expected == result
def test_convert_old_publication_info_to_new_handles_year_added_to_volumes_when_no_journal_title( ): schema = utils.load_schema('hep') subschema = schema['properties']['publication_info'] publication_info = [{ 'artid': '137', 'journal_volume': '1709', 'year': 2017, 'page_start': '137', }] assert utils.validate(publication_info, subschema) is None expected = [{ 'artid': '137', 'journal_volume': '1709', 'year': 2017, 'page_start': '137', }] result = utils.convert_old_publication_info_to_new(publication_info) assert utils.validate(result, subschema) is None assert expected == result
def test_publication_info_public_note(): schema = load_schema('hep') subschema = schema['properties']['public_notes'] builder = LiteratureBuilder(source="APS") builder.add_publication_info(journal_title="Phys. Rev. B") expected = [{ 'source': 'APS', 'value': 'Submitted to Phys. Rev. B', }] result = builder.record['public_notes'] assert validate(result, subschema) is None assert expected == result assert 'publication_info' not in builder.record
def test_make_author_handles_none_in_id_schema(): schema = load_schema('hep') subschema = schema['properties']['authors'] builder = LiteratureBuilder() result = builder.make_author( 'Smith, John', ids=[(None, 'J.Smith.1')], ) expected = { 'full_name': 'Smith, John', } assert validate([result], subschema) is None assert expected == result
def test_add_keyword(): schema = load_schema('hep') subschema = schema['properties']['keywords'] builder = LiteratureBuilder(source='Publisher') builder.add_keyword('29.27.Fh', schema='PACS') result = builder.record['keywords'] expected = [{ 'value': '29.27.Fh', 'schema': 'PACS', 'source': 'Publisher', }] assert validate(result, subschema) is None assert expected == result
def test_curate(): schema = load_schema('hep') subschema = schema['properties']['references'] builder = ReferenceBuilder() builder.curate() expected = [ {'curated_relation': True}, ] result = [builder.obj] assert validate(result, subschema) is None assert expected == result
def test_convert_old_publication_info_to_new_does_not_double_letters_when_letter_with_volume( ): schema = utils.load_schema('hep') subschema = schema['properties']['publication_info'] publication_info = [ { 'journal_title': 'Nucl.Phys.Proc.Suppl.', 'journal_volume': 'B120', }, ] assert utils.validate(publication_info, subschema) is None expected = [ { 'journal_title': 'Nucl.Phys.B Proc.Suppl.', 'journal_volume': '120', }, ] result = utils.convert_old_publication_info_to_new(publication_info) assert utils.validate(result, subschema) is None assert expected == result
def test_add_external_system_identifier_kwargs(): schema = load_schema('hep') subschema = schema['properties']['external_system_identifiers'] builder = LiteratureBuilder() builder.add_external_system_identifier(schema='osti', extid='12345') result = builder.record['external_system_identifiers'] expected = [ { 'value': '12345', 'schema': 'osti', } ] assert validate(result, subschema) is None assert expected == result
def test_make_author_sets_record(): schema = load_schema('hep') subschema = schema['properties']['authors'] builder = LiteratureBuilder() author_record = {'$ref': 'http://url/api/authors/1234'} result = builder.make_author( 'Smith, John', record=author_record, ) expected = { 'full_name': 'Smith, John', 'record': author_record, } assert validate([result], subschema) is None assert expected == result
def test_add_url_adds_uid(): schema = load_schema('hep') subschema = schema['properties']['references'] builder = ReferenceBuilder() builder.add_url('10.1109/NSSMIC.2005.1596597') expected = [ { 'reference': { 'dois': ['10.1109/NSSMIC.2005.1596597'], }, }, ] result = [builder.obj] assert validate(result, subschema) is None assert expected == result
def test_set_label(): schema = load_schema('hep') subschema = schema['properties']['references'] builder = ReferenceBuilder() builder.set_label('Abe et al, 2008') expected = [ { 'reference': { 'label': 'Abe et al, 2008', }, }, ] result = [builder.obj] assert validate(result, subschema) is None assert expected == result
def test_add_uid_falls_back_to_isbn(): schema = load_schema('hep') subschema = schema['properties']['references'] builder = ReferenceBuilder() builder.add_uid('1449344852') expected = [ { 'reference': { 'isbn': '9781449344856', }, }, ] result = [builder.obj] assert validate(result, subschema) is None assert expected == result
def test_set_pubnote_puts_incomplete_pubnote_in_misc(): schema = load_schema('hep') subschema = schema['properties']['references'] builder = ReferenceBuilder() builder.set_pubnote('Phys.Rev.,D43,') expected = [ { 'reference': { 'misc': ['Phys.Rev.,D43,'] }, }, ] result = [builder.obj] assert validate(result, subschema) is None assert expected == result
def test_add_uid_handles_arxiv_ids(): schema = load_schema('hep') subschema = schema['properties']['references'] builder = ReferenceBuilder() builder.add_uid('hep-th/0603001') expected = [ { 'reference': { 'arxiv_eprint': 'hep-th/0603001', }, }, ] result = [builder.obj] assert validate(result, subschema) is None assert expected == result
def test_set_texkey(): schema = load_schema('hep') subschema = schema['properties']['references'] builder = ReferenceBuilder() builder.set_texkey('Aaij:2016qlz') expected = [ { 'reference': { 'texkey': 'Aaij:2016qlz', }, }, ] result = [builder.obj] assert validate(result, subschema) is None assert expected == result
def test_set_pubnote_falls_back_to_misc(): schema = load_schema('hep') subschema = schema['properties']['references'] builder = ReferenceBuilder() builder.set_pubnote('not-a-valid-pubnote') expected = [ { 'reference': { 'misc': ['not-a-valid-pubnote'], }, }, ] result = [builder.obj] assert validate(result, subschema) is None assert expected == result
def test_pop_additional_pubnotes_several_pubnotes(): schema = load_schema('hep') subschema = schema['properties']['references'] builder = ReferenceBuilder() builder.add_misc( "Additional pubnote: J.Improbable Testing,453,42-47 / some other stuff" ) builder.add_misc("Additional pubnote: J.Testing,42,R477") expected = [ { 'reference': { 'publication_info': { 'journal_title': 'J.Improbable Testing', 'journal_volume': '453', 'page_start': '42', 'page_end': '47' }, 'misc': [ 'Additional pubnote split from previous reference', ], }, }, { 'reference': { 'publication_info': { 'journal_title': 'J.Testing', 'journal_volume': '42', 'page_start': 'R477', 'artid': 'R477' }, 'misc': [ 'Additional pubnote split from previous reference', ], }, }, ] result = list(builder.pop_additional_pubnotes()) assert validate(result, subschema) is None assert expected == result assert builder.obj['reference']['misc'] == ['some other stuff']
def test_make_author(): schema = load_schema('hep') subschema = schema['properties']['authors'] builder = LiteratureBuilder() result = builder.make_author( 'Smith, John', affiliations=['CERN', 'SLAC'], source='submitter', raw_affiliations=['CERN, 1211 Geneva', 'SLAC, Stanford'], emails=['*****@*****.**'], ids=[('INSPIRE BAI', 'J.Smith.1')], alternative_names=['Johnny Smith'] ) expected = { 'full_name': 'Smith, John', 'affiliations': [ {'value': 'CERN'}, {'value': 'SLAC'}, ], 'raw_affiliations': [ { 'value': 'CERN, 1211 Geneva', 'source': 'submitter' }, { 'value': 'SLAC, Stanford', 'source': 'submitter', } ], 'emails': ['*****@*****.**'], 'ids': [ { 'schema': 'INSPIRE BAI', 'value': 'J.Smith.1', } ], 'alternative_names': ['Johnny Smith'], } assert validate([result], subschema) is None assert expected == result
def test_add_collaboration(): schema = load_schema('hep') subschema = schema['properties']['references'] builder = ReferenceBuilder() builder.add_collaboration('ALICE') expected = [ { 'reference': { 'collaborations': [ 'ALICE', ], }, }, ] result = [builder.obj] assert validate(result, subschema) is None assert expected == result
def test_add_uid_rejects_invalid_isbns(): schema = load_schema('hep') subschema = schema['properties']['references'] builder = ReferenceBuilder() builder.add_uid('123456789') expected = [ { 'reference': { 'misc': [ '123456789', ] }, }, ] result = [builder.obj] assert validate(result, subschema) is None assert expected == result
def test_add_uid_handles_cnums(): schema = load_schema('hep') subschema = schema['properties']['references'] builder = ReferenceBuilder() builder.add_uid('C87-11-11') expected = [ { 'reference': { 'publication_info': { 'cnum': 'C87-11-11', }, }, }, ] result = [builder.obj] assert validate(result, subschema) is None assert expected == result
def test_add_uid_handles_dois(): schema = load_schema('hep') subschema = schema['properties']['references'] builder = ReferenceBuilder() builder.add_uid('http://dx.doi.org/10.3972/water973.0145.db') expected = [ { 'reference': { 'dois': [ '10.3972/water973.0145.db', ], }, }, ] result = [builder.obj] assert validate(result, subschema) is None assert expected == result
def _validate_record(obj, eng): validate(obj.data, schema)
def formdata_to_model(obj, formdata): """Manipulate form data to match authors data model.""" form_fields = copy.deepcopy(formdata) filter_empty_elements( form_fields, ['institution_history', 'advisors', 'websites', 'experiments'] ) data = updateform.do(form_fields) # ====== # Schema # ====== if '$schema' not in data and '$schema' in obj.data: data['$schema'] = obj.data.get('$schema') if '$schema' in data and not data['$schema'].startswith('http'): data['$schema'] = url_for( 'invenio_jsonschemas.get_schema', schema_path="records/{0}".format(data['$schema']) ) author_name = '' if 'family_name' in form_fields and form_fields['family_name']: author_name = form_fields['family_name'].strip() + ', ' if 'given_names' in form_fields and form_fields['given_names']: author_name += form_fields['given_names'] if author_name: data.get('name', {})['value'] = author_name # Add comments to extra data if 'extra_comments' in form_fields and form_fields['extra_comments']: data.setdefault('_private_notes', []).append({ 'source': 'submitter', 'value': form_fields['extra_comments'] }) data['stub'] = False # ========== # Submitter Info # ========== try: user_email = User.query.get(obj.id_user).email except AttributeError: user_email = '' try: orcid = UserIdentity.query.filter_by( id_user=obj.id_user, method='orcid' ).one().id except NoResultFound: orcid = '' data['acquisition_source'] = dict( email=user_email, datetime=datetime.datetime.utcnow().isoformat(), method="submitter", orcid=orcid, submission_number=str(obj.id), internal_uid=int(obj.id_user), ) strip_empty_values(data) validate(data, 'authors') return data
def map_old_record(record, dry_run): """ Maps the given record if needed to comply with the new schema. Following fields will be mapped: - page_nr will be a list of integers instead of list of strings - arxiv id will be put to the arxiv_eprints field - arxiv categories will be added if not yet present - "arxiv:" prefix will be removed from arxiv id - record_creation_date will be converted to iso format Following fields will be deleted at the end of the process: - _collections - report_numbers - files - local_files - free_keywords - additional_files - file_urls - earliest_date The result won't be saved and None will be returned in the following cases: - the record doesn't contain a json - a record fails the validation after mapping - both report_numbers and arxiv_eprints fields are present (shouldn't happen in the existing records) - there is more then one value in report_numbers field (shouldn't happen in the existing records) - report_numbers field is present, but there is no source subfield - no record_creation_date is present """ # if there is no json, the record is considered deleted if not record.json: rerror('no json', record) return # page_nr to list of integers if 'page_nr' in record.json: record.json['page_nr'] = [int(x) for x in record.json['page_nr']] # extract arxiv from report_numbers if present if "report_numbers" in record.json and "arxiv_eprints" in record.json: rerror('both report_numbers and arxiv_eprints are present. Skip record.', record) return if "report_numbers" in record.json: if len(record.json["report_numbers"]) > 1: rerror('report_numbers has more then one element. Skip record.', record) return arxiv_id = None for element in record.json.get("report_numbers", ()): source = element.get('source') if not source: rerror('report_numbers present, but no source. Skip record.', record) return if source.lower() == 'arxiv': arxiv_id = element.get('value') break if arxiv_id: arxiv_id = arxiv_id.lower().replace('arxiv:', '') record.json['arxiv_eprints'] = [{'value': arxiv_id}] rinfo('report_numbers -> arxiv_eprints', record) else: rerror('report_numbers present, but no arxiv id? Skip record.', record) return # add arxiv category if not yet present if "arxiv_eprints" in record.json: for element in record.json.get("arxiv_eprints", ()): if 'value' not in element: rerror('arxiv_eprints value missing', record) continue arxiv_id = element['value'] # remove arxiv prefix if present if arxiv_id.lower().startswith('arxiv:'): rinfo('removing "arxiv:" prefix', record) arxiv_id = arxiv_id[len('arxiv:'):] if 'categories' not in element: categories = get_arxiv_categories(arxiv_id) element['categories'] = categories # record_creation_date to isoformat record_creation_date = record.json.get('record_creation_date') if record_creation_date is None: rerror('no record creation date. Skip record.', record) return new_date = parse_date(record_creation_date).isoformat() if new_date != record_creation_date: rinfo('update record_creation_date: %s -> %s' % (record_creation_date, new_date), record) record.json['record_creation_date'] = new_date # delete unwanted fields unwanted_fields = ( '_collections', 'report_numbers', 'files', 'local_files', 'free_keywords', 'additional_files', 'file_urls', 'earliest_date', ) for key in unwanted_fields: if record.json.pop(key, None) is not None: rinfo('deleted %s field' % key, record) # validate record valid = False schema = record.json.get('$schema') if schema is not None: schema_data = requests_retry_session().get(schema).content schema_data = json.loads(schema_data) try: validate(record.json, schema_data) valid = True except ValidationError as err: rerror('Invalid record: %s' % err, record) except SchemaError as err: rerror('SchemaError during record validation! %s' % err, record) else: rerror('No schema found!', record) if not valid: return # mark changes if not dry_run if not dry_run: flag_modified(record, 'json') return record