def test_file_download(app, db, es, event_queues, record_with_files_creation): """Test file download views.""" recid, record, _ = record_with_files_creation record['conceptdoi'] = '10.1234/foo.concept' record['conceptrecid'] = 'foo.concept' record.commit() db.session.commit() with app.test_client() as client: file_url = url_for( 'invenio_records_ui.recid_files', pid_value=recid.pid_value, filename='Test.pdf', ) assert client.get(file_url).status_code == 200 process_events(['file-download']) current_search.flush_and_refresh(index='events-stats-file-download') search = Search(using=es, index='events-stats-file-download') assert search.count() == 1 doc = search.execute()[0] assert doc['doi'] == '10.1234/foo.bar' assert doc['conceptdoi'] == '10.1234/foo.concept' assert doc['recid'] == '12345' assert doc['conceptrecid'] == 'foo.concept' assert doc['resource_type'] == {'type': 'publication', 'subtype': 'book'} assert doc['access_right'] == 'open' assert doc['communities'] == ['zenodo'] assert doc['owners'] == [1]
def test_dashboard_view_returns_shared_with_user(action, app, users, create_deposit, json_headers, auth_headers_for_user): user, other_user = users['lhcb_user'], users['lhcb_user2'] my_deposit = create_deposit(user, 'lhcb-v1.0.0', experiment='LHCb') user_published_deposit = create_deposit(user, 'lhcb-v1.0.0', publish=True) other_user_deposit = create_deposit(other_user, 'lhcb-v1.0.0') permissions = [{ 'email': user.email, 'type': 'user', 'op': 'add', 'action': action }] with app.test_client() as client: client.post('/deposits/{}/actions/permissions'.format(other_user_deposit['_deposit']['id']), headers=auth_headers_for_user(other_user) + json_headers, data=json.dumps(permissions)) current_search.flush_and_refresh('deposits') resp = client.get('/dashboard', headers=auth_headers_for_user(user)) shared_with_user = [x['id'] for x in resp.json['shared_with_user']] assert len(shared_with_user) == 1 assert my_deposit['_deposit']['id'] not in shared_with_user assert user_published_deposit['control_number'] not in shared_with_user assert other_user_deposit['_deposit']['id'] in shared_with_user
def test_record_page(app, db, es, event_queues, full_record): """Test record page views.""" full_record['conceptdoi'] = '10.1234/foo.concept' full_record['conceptrecid'] = 'foo.concept' r = Record.create(full_record) PersistentIdentifier.create( 'recid', '12345', object_type='rec', object_uuid=r.id, status=PIDStatus.REGISTERED) db.session.commit() with app.test_client() as client: record_url = url_for('invenio_records_ui.recid', pid_value='12345') assert client.get(record_url).status_code == 200 process_events(['record-view']) current_search.flush_and_refresh(index='events-stats-record-view') search = Search(using=es, index='events-stats-record-view') assert search.count() == 1 doc = search.execute()[0] assert doc['doi'] == '10.1234/foo.bar' assert doc['conceptdoi'] == '10.1234/foo.concept' assert doc['recid'] == '12345' assert doc['conceptrecid'] == 'foo.concept' assert doc['resource_type'] == {'type': 'publication', 'subtype': 'book'} assert doc['access_right'] == 'open' assert doc['communities'] == ['zenodo'] assert doc['owners'] == [1]
def create_deposit(client, headers, auth_headers, deposit_url, get_json, data): """Create a deposit via the API.""" test_data = dict( metadata=dict( upload_type='software', title='Test title', creators=[ dict(name='Doe, John', affiliation='Atlantis'), ], description='Test', ) ) test_data['metadata'].update(data) # Create deposit res = client.post( deposit_url, data=json.dumps(test_data), headers=headers) data = get_json(res, code=201) # Get identifier and links current_search.flush_and_refresh(index='deposits') links = data['links'] # Upload file res = client.post( links['files'], data=dict(file=(BytesIO(b'ctx'), 'test.txt'), name='test.txt'), headers=auth_headers, ) assert res.status_code == 201 return links, data
def _create_deposit(user, schema_name, metadata=None, experiment=None, publish=False): """ Create a new deposit for given user and schema name e.g cms-analysis-v0.0.1, with minimal metadata defined for this schema type. """ with app.test_request_context(): # create schema for record create_schema('records/{}'.format(schema_name), is_deposit=False, experiment='CMS') # create schema for deposit schema = create_schema('deposits/records/{}'.format(schema_name), experiment=experiment) metadata = metadata or minimal_metadata(jsonschemas_host, 'deposits/records/{}'.format(schema_name)) login_user(user) id_ = uuid4() deposit_minter(id_, metadata) deposit = Deposit.create(metadata, id_=id_) if publish: deposit.publish() _, record = deposit.fetch_published() RecordIndexer().index(record) current_search.flush_and_refresh('records') current_search.flush_and_refresh(schema.index_name) return Deposit.get_record(deposit.id)
def test_synchronize_cadi_entries_when_entry_doesnt_exist_creates_a_new_one(mock_parse_cadi_enty, mock_get_all_from_cadi, app, es, location, create_schema): schema = create_schema('deposits/records/cms-analysis-v0.0.1', experiment='CMS') role = assign_egroup_to_experiment('*****@*****.**', 'CMS') # deposit with this cadi id doesn't exist with raises(DepositDoesNotExist): get_deposit_by_cadi_id('ANA-00-001') synchronize_cadi_entries() current_search.flush_and_refresh('deposits-records') # deposit with this cadi id created deposit = get_deposit_by_cadi_id('ANA-00-001') assert deposit['cadi_info'] == {'status': 'Free'} # sets cadi info correctly assert deposit['basic_info']['cadi_id'] == 'ANA-00-001' # sets cadi id assert deposit['general_title'] == 'ANA-00-001' # members of experiment got read access assert deposit['_access']['deposit-read'] == {'users': [],'roles': [role.id]} assert deposit['_access']['deposit-update'] == {'users': [], 'roles': []} assert deposit['_access']['deposit-admin'] == {'users': [], 'roles': []} # deposit doesnt have owner assert deposit['_deposit']['owners'] == []
def test_publish_empty(api_client, db, es, location, json_auth_headers, deposit_url, get_json, auth_headers, json_headers, license_record, communities, resolver): """Test if it is possible to circumvent metadata validation.""" headers = json_auth_headers client = api_client # Create deposit response = client.post(deposit_url, data='{}', headers=headers) data = get_json(response, code=201) # Get identifier and links current_search.flush_and_refresh(index='deposits') links = data['links'] # Upload file res = client.post( links['files'], data=dict(file=(BytesIO(b'ctx'), 'test.txt'), name='test.txt'), headers=auth_headers, ) assert res.status_code == 201 # Publish deposition - not possible response = client.post(links['publish'], headers=auth_headers) data = get_json(response, code=400)
def test_citation_formatter_citeproc_get(api, api_client, es, db, full_record, users): """Test records REST citeproc get.""" r = Record.create(full_record) pid = PersistentIdentifier.create( 'recid', '12345', object_type='rec', object_uuid=r.id, status=PIDStatus.REGISTERED) db.session.commit() db.session.refresh(pid) RecordIndexer().index_by_id(r.id) current_search.flush_and_refresh(index='records') login_user_via_session(api_client, email=users[2]['email']) with api.test_request_context(): records_url = url_for('invenio_records_rest.recid_item', pid_value=pid.pid_value) res = api_client.get(records_url, query_string={'style': 'apa'}, headers={'Accept': 'text/x-bibliography'}) assert res.status_code == 200 assert 'Doe, J.' in res.get_data(as_text=True) assert 'Test title (Version 1.2.5).' in res.get_data(as_text=True) assert '(2014).' in res.get_data(as_text=True)
def test_get_deposits_returns_deposits_that_users_egroups_have_read_or_admin_access_to(action, app, db, users, auth_headers_for_user, json_headers, create_deposit ): user, other_user = users['cms_user'], users['lhcb_user'] add_role_to_user(users['lhcb_user'], '*****@*****.**') with app.test_client() as client: deposit = create_deposit(user, 'cms-v0.0.1') # other user cant see the deposit resp = client.get('/deposits/', headers=auth_headers_for_user(other_user)) hits = resp.json['hits']['hits'] assert len(hits) == 0 permissions = [{ 'email': '*****@*****.**', 'type': 'egroup', 'op': 'add', 'action': action }] resp = client.post('/deposits/{}/actions/permissions'.format(deposit['_deposit']['id']), headers=auth_headers_for_user(user) + json_headers, data=json.dumps(permissions)) # sometimes ES needs refresh current_search.flush_and_refresh('deposits') resp = client.get('/deposits/', headers=auth_headers_for_user(other_user)) hits = resp.json['hits']['hits'] assert len(hits) == 1
def test_deposit_deletion(api_client, deposit, json_auth_headers, deposit_url, get_json, license_record, auth_headers): """Test file accessibility after deposit deletion.""" client = api_client headers = json_auth_headers auth = auth_headers # Create res = client.post( deposit_url, data=json.dumps(get_data()), headers=headers) links = get_json(res, code=201)['links'] current_search.flush_and_refresh(index='deposits') # Upload file res = client.post( links['files'], data=dict(file=(BytesIO(b'test'), 'test.txt'), name='test.txt'), headers=auth ) assert res.status_code == 201 # Get deposit links res = client.get(links['self'], headers=headers) data = get_json(res, code=200) file_link = data['files'][0]['links']['self'] download_link = data['files'][0]['links']['download'] # Get file res = client.get(file_link, headers=headers) assert res.status_code == 200 res = client.get(download_link, headers=auth) assert res.status_code == 200 # Get file - unauthenticated res = client.get(file_link) assert res.status_code == 401 # Any request requires auth. res = client.get(download_link) assert res.status_code == 404 # # Delete upload # res = client.delete(links['self'], headers=auth) assert res.status_code == 204 # Try to get deposit. res = client.get(links['self'], headers=auth) assert res.status_code == 410 # Try to get file res = client.get(file_link, headers=headers) assert res.status_code == 410 res = client.get(download_link, headers=auth) assert res.status_code == 404 # Try to get file - unauthenticated res = client.get(file_link) assert res.status_code == 410 res = client.get(download_link) assert res.status_code == 404
def test_deposit_index(db, es): """Test update embargoed records.""" deposit_index_name = 'deposits-records-record-v1.0.0' rec1 = Record.create({ 'title': 'One', '_deposit': { 'status': 'published', 'pid': { 'type': 'recid', 'value': '1' } } }) PersistentIdentifier.create(pid_type='recid', pid_value='1', status=PIDStatus.REGISTERED, object_uuid=rec1.id, object_type='rec') Deposit.create({ '_deposit': { 'status': 'published', 'pid': { 'type': 'recid', 'value': '1' } } }) db.session.commit() current_search.flush_and_refresh(deposit_index_name) res = current_search.client.search(index=deposit_index_name) # Make sure the 'title' was indexed from record assert res['hits']['hits'][0]['_source']['title'] == 'One'
def test_update_deposit_when_user_is_member_of_egroup_that_has_only_read_access_returns_403(app, db, users, create_deposit, json_headers, auth_headers_for_user): owner, other_user = users['lhcb_user'], users['cms_user'] add_role_to_user(other_user, '*****@*****.**') deposit = create_deposit(owner, 'lhcb-v0.0.1') with app.test_client() as client: permissions = [{ 'email': '*****@*****.**', 'type': 'egroup', 'op': 'add', 'action': 'deposit-read' }] resp = client.post('/deposits/{}/actions/permissions'.format(deposit['_deposit']['id']), headers=auth_headers_for_user(owner) + json_headers, data=json.dumps(permissions)) # sometimes ES needs refresh current_search.flush_and_refresh('deposits') resp = client.put('/deposits/{}'.format(deposit['_deposit']['id']), headers=auth_headers_for_user( other_user) + json_headers, data=json.dumps({})) assert resp.status_code == 403
def oaiset_update_records(minimal_record, db, es): """Fixture with records for query-based OAISet updating tests.""" rec_ok = { 'title': 'extra', '_oai': { 'id': '12345', 'sets': ['extra', 'user-foobar'], 'updated': datetime(1970, 1, 1).isoformat(), } } # Record which needs removal of 'extra' from oai sets rec_remove = deepcopy(rec_ok) rec_remove['title'] = 'other' # Record which needs addition of 'extra' to oai sets rec_add = deepcopy(rec_ok) rec_add['_oai']['sets'] = ['user-foobar', ] records = [rec_ok, rec_remove, rec_add, ] rec_uuids = [] for record_meta in records: rec = RecordMetadata() rec.json = deepcopy(record_meta) db.session.add(rec) db.session.commit() RecordIndexer().index_by_id(rec.id) rec_uuids.append(rec.id) current_search.flush_and_refresh('records') return rec_uuids
def load_records(app, filename, schema, tries=5): """Try to index records.""" indexer = RecordIndexer() records = [] with app.app_context(): with mock.patch('invenio_records.api.Record.validate', return_value=None): data_filename = pkg_resources.resource_filename( 'invenio_records', filename) records_data = load(data_filename) with db.session.begin_nested(): for item in records_data: record_id = uuid.uuid4() item_dict = dict(marc21.do(item)) item_dict['$schema'] = schema recid_minter(record_id, item_dict) oaiid_minter(record_id, item_dict) record = Record.create(item_dict, id_=record_id) indexer.index(record) records.append(record.id) db.session.commit() # Wait for indexer to finish for i in range(tries): response = current_search_client.search() if response['hits']['total'] >= len(records): break current_search.flush_and_refresh('_all') return records
def test_basic_search(app, db, es): """Test basic search functionality.""" # The index should be empty assert len(ItemSearch().execute()) == 0 # Create item1, search for everything item1 = Item.create({}) item1.commit() record_indexer = RecordIndexer() record_indexer.index(item1) current_search.flush_and_refresh('_all') assert len(ItemSearch().execute()) == 1 # Create item2, search for everything again item2 = Item.create({'foo': 'bar'}) item2.commit() record_indexer.index(item2) current_search.flush_and_refresh('_all') assert len(ItemSearch().execute()) == 2 # Search for item2 assert len(ItemSearch().query('match', foo='bar').execute()) == 1 # Search for nonsense assert len(ItemSearch().query('match', foo='banana').execute()) == 0
def test_delete_draft(api, api_client, db, es, location, json_auth_headers, auth_headers, deposit_url, get_json, license_record): """Test deleting of Deposit draft using REST API.""" # Setting var this way doesn't work headers = json_auth_headers client = api_client links, data = create_deposit( client, headers, auth_headers, deposit_url, get_json, {}) # Two 'recid' PIDs - Concept PID and Version PID assert PersistentIdentifier.query.filter_by(pid_type='recid').count() == 2 recid = PersistentIdentifier.get('recid', str(data['record_id'])) depid = PersistentIdentifier.query.filter_by(pid_type='depid').one() assert recid.status == PIDStatus.RESERVED assert depid.status == PIDStatus.REGISTERED # Get deposition current_search.flush_and_refresh(index='deposits') response = client.get(links['self'], headers=auth_headers) assert response.status_code == 200 # Delete deposition current_search.flush_and_refresh(index='deposits') response = client.delete(links['self'], headers=auth_headers) assert response.status_code == 204 # 'recid' PID shuld be removed, while 'depid' should have status deleted. # No 'doi' PIDs should be created without publishing assert PersistentIdentifier.query.filter_by(pid_type='recid').count() == 0 depid = PersistentIdentifier.query.filter_by(pid_type='depid').one() assert PersistentIdentifier.query.filter_by(pid_type='doi').count() == 0 assert depid.status == PIDStatus.DELETED
def das_datasets_index(es): source = [ {'name': 'dataset1'}, {'name': 'dataset2'}, {'name': 'another_dataset'} ] cache_das_datasets_in_es_from_file(source) current_search.flush_and_refresh(DAS_DATASETS_INDEX['alias'])
def test_exporter(app, db, es, exporter_bucket, record_with_files_creation): """Test record exporter.""" pid, record, record_url = record_with_files_creation RecordIndexer().index_by_id(record.id) current_search.flush_and_refresh('records') with app.app_context(): assert ObjectVersion.get_by_bucket(exporter_bucket).count() == 0 export_job(job_id='records') assert ObjectVersion.get_by_bucket(exporter_bucket).count() == 1
def closed_access_record(db, es, record_with_files_creation): """Creation of a full record with closed access right.""" _, record, record_url = record_with_files_creation record['access_right'] = AccessRight.CLOSED record.commit() db.session.commit() indexer = RecordIndexer() indexer.index(record) current_search.flush_and_refresh(index='records') return record
def deposit(app, es, users, locations, deposit_metadata, sip_metadata_types): """New deposit with files.""" with app.test_request_context(): datastore = app.extensions['security'].datastore login_user(datastore.get_user(users[0]['email'])) id_ = uuid4() zenodo_deposit_minter(id_, deposit_metadata) deposit = Deposit.create(deposit_metadata, id_=id_) db_.session.commit() current_search.flush_and_refresh(index='deposits') return deposit
def indexed_loans(es, test_loans): """Get a function to wait for records to be flushed to index.""" indexer = RecordIndexer() for pid, loan in test_loans: indexer.index(loan) current_search.flush_and_refresh(index="loans") yield test_loans for pid, loan in test_loans: indexer.delete_by_id(loan.id) current_search.flush_and_refresh(index="loans")
def test_bucket_create_delete(api_client, deposit, json_auth_headers, deposit_url, get_json, license_record, auth_headers, minimal_deposit): """Test bucket creation/deletion of bucket with each deposit.""" client = api_client headers = json_auth_headers auth = auth_headers # Create deposit res = client.post( deposit_url, data=json.dumps(minimal_deposit), headers=headers) links = get_json(res, code=201)['links'] current_search.flush_and_refresh(index='deposits') # Assert bucket was created and accessible assert 'bucket' in links res = client.get(links['bucket'], headers=auth) assert res.status_code == 200 res = client.get(links['bucket']) assert res.status_code == 404 # Upload object via files-rest. object_url = links['bucket'] + '/viafilesrest' res = client.put( object_url, input_stream=BytesIO(b'viafilesrest'), headers=auth, ) assert res.status_code == 200 # Get object via files-rest res = client.get(object_url, headers=auth) assert res.status_code == 200 # List files in deposit. res = client.get(links['self'], headers=headers) data = get_json(res, code=200) assert len(data['files']) == 1 # Get file via deposit. res = client.get(data['files'][0]['links']['self'], headers=headers) data = get_json(res, code=200) # Delete deposit res = client.delete(links['self'], headers=auth) assert res.status_code == 204 # Assert bucket no longer exists res = client.get(links['bucket'], headers=auth) assert res.status_code == 404 res = client.get(object_url, headers=auth) assert res.status_code == 404
def cms_triggers_index(es): source = [ {'dataset': 'Dataset1', 'trigger': 'Trigger1'}, {'dataset': 'Dataset1', 'trigger': 'Trigger_2'}, {'dataset': 'Dataset1', 'trigger': 'Another_Trigger'}, {'dataset': 'Dataset2', 'trigger': 'Trigger1'}, {'dataset': 'Dataset2', 'trigger': 'Trigger2'}, {'dataset': 'Dataset2', 'trigger': 'Another_One'} ] cache_cms_triggers_in_es_from_file(source) current_search.flush_and_refresh(CMS_TRIGGERS_INDEX['alias'])
def test_custom_search(es, api, json_headers, record_with_bucket, custom_metadata, query, result): """Test custom metadata search.""" pid, record = record_with_bucket record['custom'] = custom_metadata RecordIndexer().index(record) current_search.flush_and_refresh(index='records') with api.test_request_context(): with api.test_client() as client: res = client.get( url_for('invenio_records_rest.recid_list', custom=query), headers=json_headers) assert len(res.json) == result
def test_missing_files(api_client, json_auth_headers, deposit_url, location, es, get_json, license_record): """Test data validation - no files added.""" client = api_client headers = json_auth_headers # Create res = client.post( deposit_url, data=json.dumps(get_data()), headers=headers) links = get_json(res, code=201)['links'] current_search.flush_and_refresh(index='deposits') # Publish - not possible (file is missing) res = client.post(links['publish'], headers=headers) data = get_json(res, code=400) assert len(data['errors']) == 1
def test_simple_delete(api_client, db, es, location, json_auth_headers, auth_headers, deposit_url): """Deletion.""" client = api_client headers = json_auth_headers # Create links = get_json(client.post(deposit_url, data=json.dumps({}), headers=headers), code=201)["links"] current_search.flush_and_refresh(index="deposits") # Check list assert 1 == len(get_json(client.get(deposit_url, headers=headers), code=200)) # Delete assert client.delete(links["self"], headers=auth_headers).status_code == 204 current_search.flush_and_refresh(index="deposits") # Check list assert 0 == len(get_json(client.get(deposit_url, headers=headers), code=200)) # Delete again assert client.delete(links["self"], headers=auth_headers).status_code == 410
def test_geographical_search(es, api, json_headers, record_with_bucket): """Test geographical search.""" pid, record = record_with_bucket record['locations'] = [ {'lat': 46.204391, 'lon': 6.143158, 'place': 'Geneva'}, {'place': 'New York'} ] RecordIndexer().index(record) current_search.flush_and_refresh(index='records') with api.test_request_context(): with api.test_client() as client: res = client.get( url_for('invenio_records_rest.recid_list', bounds='6.059634,46.167928,6.230161,46.244911'), headers=json_headers ) assert len(res.json) == 1
def test_publish_merge_conflict(app, db, es, users, location, deposit, json_headers, fake_schemas): """Test publish with merge conflicts.""" with app.test_request_context(): with app.test_client() as client: user_info = dict(email=users[0].email, password='******') # login res = client.post(url_for_security('login'), data=user_info) # create a deposit deposit = Deposit.create({"metadata": { "title": "title-1", }}) deposit.commit() db.session.commit() # publish deposit.publish() db.session.commit() # edit deposit = deposit.edit() db.session.commit() # simulate a externally modification rid, record = deposit.fetch_published() rev_id = record.revision_id record.update({'metadata': { "title": "title-2.1", }}) record.commit() db.session.commit() assert rev_id != record.revision_id # edit again and check the merging deposit.update({"metadata": { "title": "title-2.2", }}) deposit.commit() current_search.flush_and_refresh('_all') deposit_id = deposit.pid.pid_value res = client.post( url_for('invenio_deposit_rest.depid_actions', pid_value=deposit_id, action='publish'), ) assert res.status_code == 409
def test_simple_flow(client): """Test simple flow using REST API.""" headers = [('Content-Type', 'application/json')] data = { 'title': 'The title of the record ', 'contributors': [ {'name': 'Ellis Jonathan'}, ] } url = 'https://localhost:5000/records/' # create a record response = client.post(url, data=json.dumps(data), headers=headers) assert response.status_code == 201 current_search.flush_and_refresh('records') # retrieve record res = client.get('https://localhost:5000/records/1') assert res.status_code == 200
def deposit(app, es, users, location): """New deposit with files.""" record = dict( title='Test title', creators=[ dict(name='Doe, John', affiliation='Atlantis'), dict(name='Smith, Jane', affiliation='Atlantis') ], description='Test Description', publication_date='2013-05-08', access_right='open' ) with app.test_request_context(): datastore = app.extensions['security'].datastore login_user(datastore.get_user(users[0]['email'])) deposit = Deposit.create(record) deposit.commit() db_.session.commit() current_search.flush_and_refresh(index='deposits') return deposit
def test_synchronize_cadi_entries_when_entry_doesnt_exist_creates_a_new_one_and_assigns_all_the_permissions_correctly( base_app, db, es, location, create_schema): create_schema('cms-analysis', experiment='CMS', version='0.0.1') owner = create_test_user('*****@*****.**') cms_members_group_with_r_access = assign_egroup_to_experiment( '*****@*****.**', 'CMS') cms_admin_groups_with_admin_access = [ _datastore.find_or_create_role('*****@*****.**'), _datastore.find_or_create_role('*****@*****.**'), _datastore.find_or_create_role('*****@*****.**'), ] db.session.commit() # deposit with this cadi id doesn't exist with raises(DepositDoesNotExist): get_deposit_by_cadi_id('EXO-00-000') synchronize_cadi_entries() current_search.flush_and_refresh('deposits-records') # deposit with this cadi id created deposit = get_deposit_by_cadi_id('EXO-00-000') assert deposit == { 'cadi_info': { 'description': 'Projections for 2HDM Higgs studies (H->ZZ and A->Zh) in 3000 fb-1', 'name': '2HDM Higgs studies (H->ZZ and A->Zh)', 'contact': '*****@*****.**', 'creator': '*****@*****.**', 'updater': '*****@*****.**', 'created': '2014-02-05', 'updated': '2014-07-26', 'twiki': 'https://twiki.cern.ch/twikiurl', 'paper': 'http://cms.cern.ch:80/paper.pdf', 'paper_tar': 'http://cms.cern.ch:80/paper.tgz', 'pas': '******', 'awg': 'HIG', 'publication_status': 'Free', 'status': 'PUB', 'conference': '', 'hepData': '', 'relatedNotes': [{ 'id': 'AN-2014/000', 'url': 'http://cms.cern.ch/noteInfo.jsp?cmsnoteid=CMS+AN-2014%2F000' }, { 'id': 'AN-2013/000', 'url': 'http://cms.cern.ch/noteInfo.jsp?cmsnoteid=CMS+AN-2013%2F000' }] }, 'general_title': '2HDM Higgs studies (H->ZZ and A->Zh)', '_fetched_from': 'cadi', '_user_edited': False, 'basic_info': { 'cadi_id': 'EXO-00-000' }, '$schema': 'https://analysispreservation.cern.ch/schemas/deposits/records/cms-analysis-v0.0.1.json', '_deposit': { 'id': deposit['_deposit']['id'], 'status': 'draft', 'owners': [] }, '_experiment': 'CMS', '_access': { 'deposit-read': { 'users': [owner.id], 'roles': [cms_members_group_with_r_access.id] + [x.id for x in cms_admin_groups_with_admin_access] }, 'deposit-update': { 'users': [owner.id], 'roles': [x.id for x in cms_admin_groups_with_admin_access] }, 'deposit-admin': { 'users': [owner.id], 'roles': [x.id for x in cms_admin_groups_with_admin_access] } }, '_files': [] }
def test_index_after_commit_indexes_also_cites_record_when_new_citation_is_added( mocked_indexing_task, mocked_permission_check, app, ): # this test doesn't use the isolated_app because it needs to commit to # the DB in order to create records versions. json_data = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'This is the record being cited' }], 'control_number': 9999, '_collections': ['Literature'] } cited = InspireRecord.create(data=json_data, skip_files=True) db.session.commit() current_search.flush_and_refresh('records-hep') expected_args = 'lit', cited['control_number'], 1 mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec = get_es_record('lit', 9999) assert es_rec['citation_count'] == 0 assert LiteratureSearch.citations(es_rec).total == 0 citing_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'Record citing the first one' }], '_collections': ['Literature'], 'control_number': 8888, 'references': [{ "reference": { 'authors': [{ 'full_name': 'Smith, J.' }] } }] } record = InspireRecord.create(data=citing_json, skip_files=True) db.session.commit() current_search.flush_and_refresh('records-hep') expected_args = 'lit', record['control_number'], 1 mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec = get_es_record('lit', 9999) assert es_rec['citation_count'] == 0 assert LiteratureSearch.citations(es_rec).total == 0 references = { 'references': [{ "curated_relation": False, "record": { "$ref": "http://localhost:5000/api/literature/9999" }, "reference": { 'authors': [{ 'full_name': 'Smith, J.' }], } }] } citing_json.update(references) record.clear() record.update(citing_json) record.commit() db.session.commit() current_search.flush_and_refresh('records-hep') expected_args = 'lit', record['control_number'], 2 mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec = get_es_record('lit', 9999) assert es_rec['citation_count'] == 1 assert LiteratureSearch.citations(es_rec).total == 1 _delete_record('lit', 8888) _delete_record('lit', 9999)
def test_regression_index_after_commit_retries_for_new_record_not_yet_in_db( mocked_indexing_task, mocked_permission_check, app, ): # this test doesn't use the isolated_app because it needs to commit to # the DB in order to create records versions. json_data = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'This is the record being cited' }], 'control_number': 9999, '_collections': ['Literature'] } cited = InspireRecord.create(data=json_data, skip_files=True) cited.commit() db.session.commit() current_search.flush_and_refresh('records-hep') expected_args = ('lit', 9999, 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec = get_es_record('lit', 9999) assert es_rec['citation_count'] == 0 assert LiteratureSearch.citations(es_rec).total == 0 citing_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'Record citing the first one' }], '_collections': ['Literature'], 'control_number': 8888, 'references': [{ 'record': { '$ref': 'http://localhost:5000/api/literature/9999' }, 'reference': { 'authors': [{ 'full_name': 'Smith, J.' }], } }] } record = InspireRecord.create(data=citing_json, skip_files=True) record.commit() db.session.commit() current_search.flush_and_refresh('records-hep') expected_args = ('lit', record['control_number'], 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task pretending record is not committed yet to DB _delete_record('lit', record['control_number']) with pytest.raises(RecordGetterError): # XXX: celery in eager mode does not retry, so it raises the first time index_modified_citations_from_record(*expected_args) _delete_record('lit', cited['control_number'])
def run_events_and_compare(events): current_search.flush_and_refresh('relationships') for ev in events: event = generate_payload(ev) EventAPI.handle_event(event) assert_es_equals_db()
def test_reindex(app, script_info): """Test reindex.""" # load records with app.test_request_context(): runner = CliRunner() id1 = uuid.uuid4() id2 = uuid.uuid4() record1 = Record.create(dict(title='Test 1', recid=1), id_=id1) record2 = Record.create(dict(title='Test 2', recid=2), id_=id2) PersistentIdentifier.create( pid_type='recid', pid_value=1, object_type='rec', object_uuid=id1, status=PIDStatus.REGISTERED, ) PersistentIdentifier.create( pid_type='recid', pid_value=2, object_type='rec', object_uuid=id2, status=PIDStatus.REGISTERED, ) db.session.commit() indexer = RecordIndexer() index, doc_type = indexer.record_to_index(record1) # Make sure the index doesn't exist at the beginning (it was not # preserved by accident from some other tests) assert current_search_client.indices.exists(index) is False # Initialize queue res = runner.invoke(cli.queue, ['init', 'purge'], obj=script_info) assert 0 == res.exit_code res = runner.invoke(cli.reindex, ['--yes-i-know', '-t', 'recid'], obj=script_info) assert 0 == res.exit_code res = runner.invoke(cli.run, [], obj=script_info) assert 0 == res.exit_code current_search.flush_and_refresh(index) # Both records should be indexed res = current_search_client.search(index=index) assert res['hits']['total'] == 2 # Delete one of the records record2 = Record.get_record(id2) record2.delete() db.session.commit() # Destroy the index and reindex list(current_search.delete(ignore=[404])) res = runner.invoke(cli.reindex, ['--yes-i-know', '-t', 'recid'], obj=script_info) assert 0 == res.exit_code res = runner.invoke(cli.run, [], obj=script_info) assert 0 == res.exit_code current_search.flush_and_refresh(index) # Check that the deleted record is not indexed res = current_search_client.search(index=index) assert res['hits']['total'] == 1 assert res['hits']['hits'][0]['_source']['title'] == 'Test 1' # Destroy queue and the index res = runner.invoke(cli.queue, ['delete'], obj=script_info) assert 0 == res.exit_code list(current_search.delete(ignore=[404]))
def indexed_records(search_class, indexer, test_records): """Get a function to wait for records to be flushed to index.""" for pid, record in test_records: indexer.index_by_id(record.id) current_search.flush_and_refresh(index=search_class.Meta.index) yield test_records
def test_simple_rest_flow(mocker, api, api_client, db, es, locations, users, write_token, license_record): """Test simple flow using REST API.""" mocker.patch('invenio_pidstore.providers.datacite.DataCiteMDSClient') # Stash the configuration and enable writing orig = api.config['SIPSTORE_ARCHIVER_WRITING_ENABLED'] api.config['SIPSTORE_ARCHIVER_WRITING_ENABLED'] = True archive_task_mock = mocker.patch( 'zenodo.modules.deposit.receivers.archive_sip') # Setting var this way doesn't work client = api_client test_data = dict(metadata=dict( upload_type='presentation', title='Test title', creators=[ dict(name='Doe, John', affiliation='Atlantis'), dict(name='Smith, Jane', affiliation='Atlantis') ], description='Test Description', publication_date='2013-05-08', access_right='open', )) # Prepare headers auth = write_token['auth_header'] headers = [('Content-Type', 'application/json'), ('Accept', 'application/json')] auth_headers = headers + auth # Get deposit URL with api.test_request_context(): deposit_url = url_for('invenio_deposit_rest.depid_list') # Try to create deposit as anonymous user (failing) response = client.post(deposit_url, data=json.dumps(test_data), headers=headers) assert response.status_code == 401 # Create deposit response = client.post(deposit_url, data=json.dumps(test_data), headers=auth_headers) data = get_json(response, code=201) deposit_id = data['id'] links = data['links'] # Get deposition current_search.flush_and_refresh(index='deposits') response = client.get(links['self'], headers=auth) assert response.status_code == 200 # Upload 3 files for i in range(3): response = client.post( links['files'], data={ 'file': make_file_fixture('test{0}.txt'.format(i)), 'name': 'test-{0}.txt'.format(i), }, headers=auth, ) assert response.status_code == 201, i assert not archive_task_mock.delay.called # Publish deposition # Enable datacite minting response = client.post(links['publish'], headers=auth_headers) record_id = get_json(response, code=202)['record_id'] recid_pid = PersistentIdentifier.get('recid', str(record_id)) # Pass doi to record test_data['metadata']['doi'] = get_json(response, code=202)['doi'] # Check that same id is being used for both deposit and record. assert deposit_id == record_id # Does record exists? current_search.flush_and_refresh(index='records') response = client.get( url_for('invenio_records_rest.recid_item', pid_value=record_id)) # Was SIP writing task executed? sip = RecordSIP.query.filter_by(pid_id=recid_pid.id).one().sip archive_task_mock.delay.assert_called_with(str(sip.id)) # Second request will return forbidden since it's already published response = client.post(links['publish'], headers=auth_headers) assert response.status_code == 403 # FIXME should be 400 # Not allowed to edit drafts response = client.put(links['self'], data=json.dumps(test_data), headers=auth_headers) assert response.status_code == 403 # Not allowed to delete response = client.delete(links['self'], headers=auth) assert response.status_code == 403 # Not allowed to sort files response = client.get(links['files'], headers=auth_headers) data = get_json(response, code=200) files_list = list(map(lambda x: {'id': x['id']}, data)) files_list.reverse() response = client.put(links['files'], data=json.dumps(files_list), headers=auth) assert response.status_code == 403 # Not allowed to add files i = 5 response = client.post( links['files'], data={ 'file': make_file_fixture('test{0}.txt'.format(i)), 'name': 'test-{0}.txt'.format(i), }, headers=auth, ) assert response.status_code == 403 # Not allowed to delete file file_url = '{0}/{1}'.format(links['files'], files_list[0]['id']) response = client.delete(file_url, headers=auth) assert response.status_code == 403 # Not allowed to rename file response = client.put( file_url, data=json.dumps(dict(filename='another_test.pdf')), headers=auth_headers, ) assert response.status_code == 403 # Change the config back api.config['SIPSTORE_ARCHIVER_WRITING_ENABLED'] = orig
def data(n_docs, n_items, n_eitems, n_loans, n_intlocs, n_series, n_document_requests, n_vendors, n_orders, n_libraries, n_borrowing_requests): """Insert demo data.""" click.secho("Generating demo data", fg="yellow") indexer = RecordIndexer() vocabulary_dir = os.path.join(os.path.realpath("."), "invenio_app_ils", "vocabularies", "data") with open(os.path.join(vocabulary_dir, "tags.json")) as f: tags = json.loads(f.read()) with open(os.path.join(vocabulary_dir, "languages.json")) as f: languages = json.loads(f.read()) holder = Holder( patrons_pids=["1", "2", "5", "6"], languages=languages, librarian_pid="4", tags=tags, total_intloc=n_intlocs, total_items=n_items, total_eitems=n_eitems, total_documents=n_docs, total_loans=n_loans, total_series=n_series, total_document_requests=n_document_requests, total_vendors=n_vendors, total_orders=n_orders, total_borrowing_requests=n_borrowing_requests, total_libraries=n_libraries, ) click.echo("Creating locations...") loc_generator = LocationGenerator(holder, minter) loc_generator.generate() rec = loc_generator.persist() indexer.index(rec) # InternalLocations intlocs_generator = InternalLocationGenerator(holder, minter) intlocs_generator.generate() rec_intlocs = intlocs_generator.persist() # Series click.echo("Creating series...") series_generator = SeriesGenerator(holder, minter) series_generator.generate() rec_series = series_generator.persist() # Documents click.echo("Creating documents...") documents_generator = DocumentGenerator(holder, minter) documents_generator.generate() rec_docs = documents_generator.persist() # Items click.echo("Creating items...") items_generator = ItemGenerator(holder, minter) items_generator.generate() rec_items = items_generator.persist() # EItems click.echo("Creating eitems...") eitems_generator = EItemGenerator(holder, minter) eitems_generator.generate() rec_eitems = eitems_generator.persist() # Loans click.echo("Creating loans...") loans_generator = LoanGenerator(holder, minter) loans_generator.generate() rec_loans = loans_generator.persist() # Related records click.echo("Creating related records...") related_generator = RecordRelationsGenerator(holder, minter) related_generator.generate(rec_docs, rec_series) related_generator.persist() # Document requests click.echo("Creating document requests...") document_requests_generator = DocumentRequestGenerator(holder, minter) document_requests_generator.generate() rec_requests = document_requests_generator.persist() # Vendors click.echo("Creating vendors...") vendor_generator = VendorGenerator(holder, minter) vendor_generator.generate() rec_vendors = vendor_generator.persist() # Orders click.echo("Creating orders...") order_generator = OrderGenerator(holder, minter) order_generator.generate() rec_orders = order_generator.persist() # Libraries click.echo("Creating libraries...") library_generator = LibraryGenerator(holder, minter) library_generator.generate() rec_libraries = library_generator.persist() # Borrowing requests click.echo("Creating borrowing requests...") borrowing_requests_generator = BorrowingRequestGenerator(holder, minter) borrowing_requests_generator.generate() rec_borrowing_requests = borrowing_requests_generator.persist() # index locations indexer.bulk_index([str(r.id) for r in rec_intlocs]) click.echo("Sent to the indexing queue {0} locations".format( len(rec_intlocs))) # index series indexer.bulk_index([str(r.id) for r in rec_series]) click.echo("Sent to the indexing queue {0} series".format(len(rec_series))) # index loans indexer.bulk_index([str(r.id) for r in rec_loans]) click.echo("Sent to the indexing queue {0} loans".format(len(rec_loans))) click.secho("Now indexing...", fg="green") # process queue so items can resolve circulation status correctly indexer.process_bulk_queue() # index eitems indexer.bulk_index([str(r.id) for r in rec_eitems]) click.echo("Sent to the indexing queue {0} eitems".format(len(rec_eitems))) # index items indexer.bulk_index([str(r.id) for r in rec_items]) click.echo("Sent to the indexing queue {0} items".format(len(rec_items))) click.secho("Now indexing...", fg="green") # process queue so documents can resolve circulation correctly indexer.process_bulk_queue() # index libraries indexer.bulk_index([str(r.id) for r in rec_libraries]) click.echo("Sent to the indexing queue {0} libraries".format( len(rec_libraries))) # index borrowing requests indexer.bulk_index([str(r.id) for r in rec_borrowing_requests]) click.echo("Sent to the indexing queue {0} borrowing requests".format( len(rec_borrowing_requests))) click.secho("Now indexing...", fg="green") indexer.process_bulk_queue() # flush all indices after indexing, otherwise ES won't be ready for tests current_search.flush_and_refresh(index="*") # index documents indexer.bulk_index([str(r.id) for r in rec_docs]) click.echo("Sent to the indexing queue {0} documents".format( len(rec_docs))) # index document requests indexer.bulk_index([str(r.id) for r in rec_requests]) click.echo("Sent to the indexing queue {0} document requests".format( len(rec_requests))) # index loans again indexer.bulk_index([str(r.id) for r in rec_loans]) click.echo("Sent to the indexing queue {0} loans".format(len(rec_loans))) # index items again indexer.bulk_index([str(r.id) for r in rec_items]) click.echo("Sent to the indexing queue {0} items".format(len(rec_items))) # index vendors indexer.bulk_index([str(r.id) for r in rec_vendors]) click.echo("Sent to the indexing queue {0} vendors".format( len(rec_vendors))) # index orders indexer.bulk_index([str(r.id) for r in rec_orders]) click.echo("Sent to the indexing queue {0} orders".format(len(rec_orders))) click.secho("Now indexing...", fg="green") indexer.process_bulk_queue()
def flush_index(name): """.""" return current_search.flush_and_refresh(name)
def testdata(app, db, es_clear, users): """Create, index and return test data.""" data = load_json_from_datadir("locations.json") locations = _create_records(db, data, Location, LOCATION_PID_TYPE) data = load_json_from_datadir("internal_locations.json") int_locs = _create_records(db, data, InternalLocation, INTERNAL_LOCATION_PID_TYPE) data = load_json_from_datadir("series.json") series = _create_records(db, data, Series, SERIES_PID_TYPE) data = load_json_from_datadir("documents.json") documents = _create_records(db, data, Document, DOCUMENT_PID_TYPE) data = load_json_from_datadir("items.json") items = _create_records(db, data, Item, ITEM_PID_TYPE) data = load_json_from_datadir("eitems.json") eitems = _create_records(db, data, EItem, EITEM_PID_TYPE) data = load_json_from_datadir("loans.json") loans = _create_records(db, data, Loan, CIRCULATION_LOAN_PID_TYPE) data = load_json_from_datadir("document_requests.json") doc_reqs = _create_records(db, data, DocumentRequest, DOCUMENT_REQUEST_PID_TYPE) data = load_json_from_datadir("acq_vendors.json") acq_vendors = _create_records(db, data, Vendor, VENDOR_PID_TYPE) data = load_json_from_datadir("acq_orders.json") acq_orders = _create_records(db, data, Order, ORDER_PID_TYPE) data = load_json_from_datadir("ill_libraries.json") ill_libraries = _create_records(db, data, Library, LIBRARY_PID_TYPE) data = load_json_from_datadir("ill_borrowing_requests.json") ill_brw_reqs = _create_records(db, data, BorrowingRequest, BORROWING_REQUEST_PID_TYPE) # index ri = RecordIndexer() for rec in (locations + int_locs + series + documents + items + eitems + loans + doc_reqs + acq_vendors + acq_orders + ill_libraries + ill_brw_reqs): ri.index(rec) current_search.flush_and_refresh(index="*") return { "document_requests": doc_reqs, "documents": documents, "internal_locations": int_locs, "items": items, "eitems": eitems, "loans": loans, "locations": locations, "series": series, "acq_vendors": acq_vendors, "acq_orders": acq_orders, "ill_libraries": ill_libraries, "ill_brw_reqs": ill_brw_reqs, }
def flush(cls): """Flush indexes.""" from rero_ils.modules.holdings.api import HoldingsSearch current_search.flush_and_refresh(DocumentsSearch.Meta.index) current_search.flush_and_refresh(HoldingsSearch.Meta.index) current_search.flush_and_refresh(cls.Meta.index)
def test_update_users(app, db, testdata, mocker): """Test update users with LDAP.""" ldap_users = [ { "displayName": [b"New user"], "department": [b"A department"], "uidNumber": [b"111"], "mail": [b"*****@*****.**"], "cernAccountType": [b"Primary"], "employeeID": [b"00111"], "postOfficeBox": [b"M12345"] }, { "displayName": [b"A new name"], "department": [b"A new department"], "uidNumber": [b"222"], "mail": [b"*****@*****.**"], "cernAccountType": [b"Primary"], "employeeID": [b"00222"], "postOfficeBox": [b"M12345"] }, { "displayName": [b"Nothing changed"], "department": [b"Same department"], "uidNumber": [b"333"], "mail": [b"*****@*****.**"], "cernAccountType": [b"Primary"], "employeeID": [b"00333"], "postOfficeBox": [b"M12345"] }, { "displayName": [b"Name 1"], "department": [b"Department 1"], "uidNumber": [b"555"], "mail": [b"*****@*****.**"], "cernAccountType": [b"Primary"], "employeeID": [b"00555"], "postOfficeBox": [b"M12345"] }, { "displayName": [b"Name 2"], "department": [b"Department 2"], "uidNumber": [b"666"], "mail": [b"*****@*****.**"], # same email as 555 "cernAccountType": [b"Primary"], "employeeID": [b"00666"], "postOfficeBox": [b"M12345"] }, { "displayName": [b"Name"], "department": [b"Department"], "uidNumber": [b"777"], # missing email, should be skipped "cernAccountType": [b"Primary"], "employeeID": [b"00777"], "postOfficeBox": [b"M12345"] }, { "displayName": [b"Name"], "department": [b"Department"], "uidNumber": [b"999"], # custom emails allowed "mail": [b"*****@*****.**"], "cernAccountType": [b"Primary"], "employeeID": [b"00999"], "postOfficeBox": [b"M12345"] }, { "displayName": [b"Nothing changed"], "department": [b"Same department"], "uidNumber": [b"333"], # same email as 333, different employee ID, should be skipped "mail": [b"*****@*****.**"], "cernAccountType": [b"Primary"], "employeeID": [b"9152364"], "postOfficeBox": [b"M12345"] }, { "displayName": [b"Name"], "department": [b"Department"], "uidNumber": [b"444"], # empty email should be skipped "mail": [b""], "cernAccountType": [b"Primary"], "employeeID": [b"00444"], "postOfficeBox": [b"M12345"] }, ] def _prepare(): """Prepare data.""" importer = LdapUserImporter() # Prepare users in DB. Use `LdapUserImporter` to make it easy # create old users WILL_BE_UPDATED = deepcopy(ldap_users[1]) WILL_BE_UPDATED["displayName"] = [b"Previous name"] WILL_BE_UPDATED["department"] = [b"Old department"] ldap_user = serialize_ldap_user(WILL_BE_UPDATED) importer.import_user(ldap_user) WILL_NOT_CHANGE = deepcopy(ldap_users[2]) ldap_user = serialize_ldap_user(WILL_NOT_CHANGE) importer.import_user(ldap_user) # create a user that does not exist anymore in LDAP, but will not # be deleted for safety COULD_BE_DELETED = { "displayName": [b"old user left CERN"], "department": [b"Department"], "uidNumber": [b"444"], "mail": [b"*****@*****.**"], "cernAccountType": [b"Primary"], "employeeID": [b"00444"], "postOfficeBox": [b"M12345"] } ldap_user = serialize_ldap_user(COULD_BE_DELETED) importer.import_user(ldap_user) db.session.commit() current_app_ils.patron_indexer.reindex_patrons() def _prepare_duplicate(): duplicated = { "displayName": [b"Name 2"], "department": [b"Department 2"], # same id as one of the previous, different emails # should be skipped "uidNumber": [b"555"], "mail": [b"*****@*****.**"], "cernAccountType": [b"Primary"], "employeeID": [b"00555"], "postOfficeBox": [b"M12345"] } importer = LdapUserImporter() ldap_user = serialize_ldap_user(duplicated) importer.import_user(ldap_user) db.session.commit() _prepare() # mock LDAP response mocker.patch( "cds_ils.ldap.api.LdapClient.get_primary_accounts", return_value=ldap_users, ) n_ldap, n_updated, n_added = update_users() current_search.flush_and_refresh(index="*") assert n_ldap == 9 assert n_updated == 1 # 00222 assert n_added == 3 # 00111, 00555, 00999 invenio_users = User.query.all() # 2 are already in test data # 4 in the prepared data # 2 newly added from LDAP assert len(invenio_users) == 8 patrons_search = PatronsSearch() def check_existence(expected_email, expected_name, expected_department, expected_person_id, expected_mailbox): """Assert exist in DB and ES.""" # check if saved in DB user = User.query.filter_by(email=expected_email).one() up = UserProfile.query.filter_by(user_id=user.id).one() assert up.full_name == expected_name ra = RemoteAccount.query.filter_by(user_id=user.id).one() assert ra.extra_data["department"] == expected_department assert ra.extra_data["person_id"] == expected_person_id # check if indexed correctly results = patrons_search.filter("term", id=user.id).execute() assert len(results.hits) == 1 patron_hit = [r for r in results][0] assert patron_hit["email"] == expected_email assert patron_hit["department"] == expected_department assert patron_hit["person_id"] == expected_person_id assert patron_hit["mailbox"] == expected_mailbox check_existence("*****@*****.**", "New user", "A department", "00111", "M12345") check_existence("*****@*****.**", "A new name", "A new department", "00222", "M12345") check_existence("*****@*****.**", "Nothing changed", "Same department", "00333", "M12345") check_existence("*****@*****.**", "old user left CERN", "Department", "00444", "M12345") check_existence("*****@*****.**", "Name 1", "Department 1", "00555", "M12345") # try ot import duplicated userUID with pytest.raises(IntegrityError): _prepare_duplicate()
def update_indexes(cls): """Update indexes.""" try: current_search.flush_and_refresh(index='mef') except Exception as err: current_app.logger.error(f'ERROR flush and refresh: {err}')
def flush_and_wait(self): """Flush index and wait until operation is fully done.""" current_search.flush_and_refresh(self.search_class.Meta.index)
def test_versioning_rest_flow(mocker, api, api_client, db, es, locations, users, write_token, license_record): mocker.patch('invenio_pidstore.providers.datacite.DataCiteMDSClient') # Stash the configuration and enable SIP writing to disk orig = api.config['SIPSTORE_ARCHIVER_WRITING_ENABLED'] api.config['SIPSTORE_ARCHIVER_WRITING_ENABLED'] = True archive_task_mock = mocker.patch( 'zenodo.modules.deposit.receivers.archive_sip') client = api_client test_data = dict( metadata=dict(upload_type='presentation', title='Test title', creators=[ dict(name='Doe, John', affiliation='Atlantis'), dict(name='Smith, Jane', affiliation='Atlantis') ], description='Test Description', publication_date='2013-05-08', access_right='open')) # Prepare headers auth = write_token['auth_header'] headers = [('Content-Type', 'application/json'), ('Accept', 'application/json')] auth_headers = headers + auth # Get deposit URL with api.test_request_context(): deposit_url = url_for('invenio_deposit_rest.depid_list') # Create deposit response = client.post(deposit_url, data=json.dumps(test_data), headers=auth_headers) data = get_json(response, code=201) links = data['links'] # Get deposition current_search.flush_and_refresh(index='deposits') response = client.get(links['self'], headers=auth) data = get_json(response, code=200) links = data['links'] # Upload a file response = client.post( links['files'], data={ 'file': make_file_fixture('test-1.txt'), 'name': 'test-1.txt', }, headers=auth, ) assert response.status_code == 201 # Cannot create new version for unpublished record response = client.post(links['newversion'], headers=auth_headers) assert response.status_code == 403 assert not archive_task_mock.delay.called # Publish deposition response = client.post(links['publish'], headers=auth_headers) assert response.status_code == 202 data = get_json(response, code=202) record_id = data['record_id'] recid_pid = PersistentIdentifier.get('recid', str(record_id)) # Was SIP writing task executed? sip = RecordSIP.query.filter_by(pid_id=recid_pid.id).one().sip archive_task_mock.delay.assert_called_with(str(sip.id)) # New version possible for published deposit response = client.post(links['newversion'], headers=auth_headers) assert response.status_code == 201 # Calling again new version is a no-op response = client.post(links['newversion'], headers=auth_headers) links = get_json(response, code=201)['links'] assert 'latest_draft' in links # Get the new version deposit current_search.flush_and_refresh(index='deposits') response = client.get(links['latest_draft'], headers=auth) data = get_json(response, code=200) links = data['links'] # Deleting files allowed for new version response = client.get(links['files'], headers=auth_headers) data = get_json(response, code=200) files_list = list(map(lambda x: {'id': x['id']}, data)) file_url = '{0}/{1}'.format(links['files'], files_list[0]['id']) response = client.delete(file_url, headers=auth) assert response.status_code == 204 # Adding files allowed for new version response = client.post( links['files'], data={ 'file': make_file_fixture('test-2.txt'), 'name': 'test-2.txt', }, headers=auth, ) assert response.status_code == 201 # Publish new verision response = client.post(links['publish'], headers=auth_headers) assert response.status_code == 202 data = get_json(response, code=202) record_id = data['record_id'] recid_pid = PersistentIdentifier.get('recid', str(record_id)) # Was SIP writing task executed? sip = RecordSIP.query.filter_by(pid_id=recid_pid.id).one().sip archive_task_mock.delay.assert_called_with(str(sip.id)) # Change the config back api.config['SIPSTORE_ARCHIVER_WRITING_ENABLED'] = orig
def test_unicode(api_client, es, location, json_auth_headers, deposit_url, get_json, license_record, grant_record, auth_headers, communities): """Rough validation of input against output data.""" client = api_client headers = json_auth_headers test_data = dict(metadata=dict( access_right='open', access_conditions='Αυτή είναι μια δοκιμή', communities=[{ 'identifier': 'c1' }], conference_acronym='Αυτή είναι μια δοκιμή', conference_dates='هذا هو اختبار', conference_place='Սա փորձություն', conference_title='Гэта тэст', conference_url='http://someurl.com', conference_session='5', conference_session_part='a', creators=[ dict(name="Doe, John", affiliation="Това е тест"), dict(name="Smith, Jane", affiliation="Tio ĉi estas testo") ], description="这是一个测试", doi="10.1234/foo.bar", embargo_date="2010-12-09", grants=[ dict(id="282896"), ], imprint_isbn="Some isbn", imprint_place="這是一個測試", imprint_publisher="ეს არის გამოცდა", journal_issue="આ એક કસોટી છે", journal_pages="זהו מבחן", journal_title="यह एक परीक्षण है", journal_volume="Þetta er prófun", keywords=["これはテストです", "ಇದು ಪರೀಕ್ಷೆ"], subjects=[ dict(scheme="gnd", identifier="1234567899", term="これはです"), dict(scheme="gnd", identifier="1234567898", term="ಇ"), ], license="CC0-1.0", notes="이것은 테스트입니다", partof_pages="ນີ້ແມ່ນການທົດສອບ", partof_title="ही चाचणी आहे", prereserve_doi=True, publication_date="2013-09-12", publication_type="book", related_identifiers=[ dict(identifier='2011ApJS..192...18K', relation='isAlternativeIdentifier'), dict(identifier='10.1234/foo.bar2', relation='isCitedBy'), dict(identifier='10.1234/foo.bar3', relation='cites'), ], thesis_supervisors=[ dict(name="Doe Sr., این یک تست است", affiliation="Atlantis"), dict(name="Это Sr., Jane", affiliation="Atlantis") ], thesis_university="இந்த ஒரு சோதனை", contributors=[ dict(name="Doe Sr., ن یک تست", affiliation="Atlantis", type="Other"), dict(name="SmЭтith Sr., Marco", affiliation="Atlantis", type="DataCurator") ], title="Đây là một thử nghiệm", upload_type="publication", )) # Create res = client.post(deposit_url, data=json.dumps(test_data), headers=headers) links = get_json(res, code=201)['links'] current_search.flush_and_refresh(index='deposits') # Upload file assert client.post( links['files'], data=dict(file=(BytesIO(b'test'), 'test.txt'), name='test.txt'), headers=auth_headers, ).status_code == 201 # Publish deposition response = client.post(links['publish'], headers=auth_headers) record_id = get_json(response, code=202)['record_id'] # Get record. current_search.flush_and_refresh(index='records') response = client.get( url_for('invenio_records_rest.recid_item', pid_value=record_id))
def test_edit_flow(datacite_mock, api_client, db, es, location, json_auth_headers, deposit_url, get_json, auth_headers, json_headers, license_record, communities, resolver): """Test simple flow using REST API.""" headers = json_auth_headers client = api_client test_data = dict(metadata=dict( upload_type='presentation', title='Test title', creators=[ dict(name='Doe, John', affiliation='Atlantis'), dict(name='Smith, Jane', affiliation='Atlantis') ], description='Test Description', publication_date='2013-05-08', access_right='open', license='CC0-1.0', communities=[{ 'identifier': 'c1' }, { 'identifier': 'c3' }], )) # Create deposit response = client.post(deposit_url, data=json.dumps(test_data), headers=headers) data = get_json(response, code=201) # Get identifier and links current_search.flush_and_refresh(index='deposits') links = data['links'] # Upload 3 files for i in range(3): f = 'test{0}.txt'.format(i) response = client.post( links['files'], data=dict(file=(BytesIO(b'ctx'), f), name=f), headers=auth_headers, ) assert response.status_code == 201, i # Update metadata newdata = dict(metadata=data['metadata']) newdata['metadata']['title'] = 'Updated title' resdata = get_json(client.put(links['self'], data=json.dumps(newdata), headers=headers), code=200) # Publish deposition response = client.post(links['publish'], headers=auth_headers) data = get_json(response, code=202) record_id = data['record_id'] assert PersistentIdentifier.query.filter_by(pid_type='depid').count() == 1 # There should be two 'recid' PIDs - Concept PID and version PID assert PersistentIdentifier.query.filter_by(pid_type='recid').count() == 2 recid_pid = PersistentIdentifier.get('recid', str(record_id)) doi_pid = PersistentIdentifier.get(pid_type='doi', pid_value='10.5072/zenodo.1') assert doi_pid.status == PIDStatus.RESERVED # This task (datacite_register) would normally be executed asynchronously datacite_register(recid_pid.pid_value, recid_pid.object_uuid) assert doi_pid.status == PIDStatus.REGISTERED # Make sure it was registered properly in datacite # It should be called twice - for concept DOI and version DOI assert datacite_mock().metadata_post.call_count == 2 # Concept DOI call datacite_mock().doi_post.assert_any_call('10.5072/zenodo.1', 'https://zenodo.org/record/1') # Record DOI call datacite_mock().doi_post.assert_any_call('10.5072/zenodo.2', 'https://zenodo.org/record/2') # Does record exists? current_search.flush_and_refresh(index='records') preedit_data = get_json(client.get( url_for('invenio_records_rest.recid_item', pid_value=record_id), headers=json_headers, ), code=200) expected_doi = '10.5072/zenodo.{0}'.format(record_id) assert preedit_data['doi'] == expected_doi # - community c3 got auto-accepted (owned by deposit user) assert preedit_data['metadata']['communities'] == [{'identifier': 'c3'}] # Are files downloadable by everyone (open)? assert len(preedit_data['files']) == 3 download_url = preedit_data['files'][0]['links']['download'] assert client.get(download_url).status_code == 200 # Edit record - can now be done immediately after. response = client.post(links['edit'], headers=auth_headers) assert response.status_code == 201 # Edit - 2nd time is invalid. response = client.post(links['edit'], headers=auth_headers) assert response.status_code == 403 # FIXME 400 # Get data data = get_json(client.get(links['self'], headers=auth_headers), code=200) # Not allowed to delete assert client.delete(links['self'], headers=auth_headers).status_code == 403 # Update metadata data = dict(metadata=data['metadata']) data['metadata'].update( dict(title='New title', access_right='closed', creators=[ dict(name="Smith, Jane", affiliation="Atlantis"), dict(name="Doe, John", affiliation="Atlantis"), ], communities=[{ 'identifier': 'c1' }])) resdata = get_json(client.put(links['self'], data=json.dumps(data), headers=headers), code=200) assert resdata['title'] == 'New title' assert resdata['metadata']['title'] == 'New title' # Try to change DOI data['metadata']['doi'] = '10.1234/foo' data = get_json(client.put(links['self'], data=json.dumps(data), headers=headers), code=400) # Approve community c = Community.get('c1') _, record = resolver.resolve(str(record_id)) c.accept_record(record) record.commit() db.session.commit() # Get record to confirm if both communities should be visible now assert get_json(client.get( url_for('invenio_records_rest.recid_item', pid_value=record_id), headers=json_headers, ), code=200)['metadata']['communities'] == [ { 'identifier': 'c1' }, { 'identifier': 'c3' }, ] # Publish response = client.post(links['publish'], headers=auth_headers) data = get_json(response, code=202) current_search.flush_and_refresh(index='records') # - is record still accessible? postedit_data = get_json(client.get( url_for('invenio_records_rest.recid_item', pid_value=record_id), headers=json_headers, ), code=200) # - sanity checks assert postedit_data['doi'] == expected_doi assert postedit_data['record_id'] == record_id # - files should no longer be downloadable (closed access) # - download_url worked before edit, so make sure it doesn't work now. assert 'files' not in postedit_data assert client.get(download_url).status_code == 404 # - c3 was removed, so only c1 one should be visible now assert postedit_data['metadata']['communities'] == [ { 'identifier': 'c1' }, ] # Edit data = get_json(client.post(links['edit'], headers=auth_headers), code=201) # Update data = dict(metadata=data['metadata']) data['metadata'].update(dict(title='Will be discarded')) resdata = get_json(client.put(links['self'], data=json.dumps(data), headers=headers), code=200) # Discard data = get_json(client.post(links['discard'], headers=auth_headers), code=201) # Get and assert metadata data = get_json(client.get(links['self'], headers=auth_headers), code=200) assert data['title'] == postedit_data['title']
def test_input_output(api_client, es, json_auth_headers, deposit_url, get_json, license_record, grant_record, location): """Rough validation of input against output data.""" client = api_client headers = json_auth_headers test_data = dict(metadata=dict( access_right='embargoed', communities=[{ 'identifier': 'cfa' }], conference_acronym='Some acronym', conference_dates='Some dates', conference_place='Some place', conference_title='Some title', conference_url='http://someurl.com', conference_session='VI', conference_session_part='1', creators=[ dict(name="Doe, John", affiliation="Atlantis", orcid="0000-0002-1825-0097", gnd="170118215"), dict(name="Smith, Jane", affiliation="Atlantis") ], description="Some description", doi="10.1234/foo.bar", embargo_date=(datetime.utcnow().date() + timedelta(days=1)).isoformat(), grants=[ dict(id="282896"), ], imprint_isbn="Some isbn", imprint_place="Some place", imprint_publisher="Some publisher", journal_issue="Some issue", journal_pages="Some pages", journal_title="Some journal name", journal_volume="Some volume", keywords=["Keyword 1", "keyword 2"], subjects=[ dict(scheme="gnd", identifier="gnd:1234567899", term="Astronaut"), dict(scheme="gnd", identifier="gnd:1234567898", term="Amish"), ], license="CC0-1.0", notes="Some notes", partof_pages="SOme part of", partof_title="Some part of title", prereserve_doi=True, publication_date="2013-09-12", publication_type="book", references=[ "Reference 1", "Reference 2", ], related_identifiers=[ dict(identifier='10.1234/foo.bar2', relation='isCitedBy', scheme='doi'), dict(identifier='10.1234/foo.bar3', relation='cites', scheme='doi'), dict(identifier='2011ApJS..192...18K', relation='isAlternateIdentifier', scheme='ads'), ], thesis_supervisors=[ dict(name="Doe Sr., John", affiliation="Atlantis"), dict(name="Smith Sr., Jane", affiliation="Atlantis", orcid="0000-0002-1825-0097", gnd="170118215") ], thesis_university="Some thesis_university", contributors=[ dict(name="Doe Sr., Jochen", affiliation="Atlantis", type="Other"), dict(name="Smith Sr., Marco", affiliation="Atlantis", orcid="0000-0002-1825-0097", gnd="170118215", type="DataCurator") ], title="Test title", upload_type="publication", )) # Create res = client.post(deposit_url, data=json.dumps(test_data), headers=headers) links = get_json(res, code=201)['links'] current_search.flush_and_refresh(index='deposits') # Get serialization. data = get_json(client.get(links['self'], headers=headers), code=200) # - fix known differences. test_data['metadata'].update( {'prereserve_doi': { 'doi': '10.5072/zenodo.1', 'recid': 1 }}) assert data['metadata'] == test_data['metadata']
def create_fake_record(): """Create records for demo purposes.""" fake = Faker() data_to_use = { "_access": { "metadata_restricted": False, "files_restricted": False }, "_created_by": 2, "_default_preview": "previewer one", "_internal_notes": [{ "user": "******", "note": "RDM record", "timestamp": fake.iso8601(tzinfo=None, end_datetime=None), }], "_owners": [1], "access_right": "open", "embargo_date": fake.iso8601(tzinfo=None, end_datetime=None), "contact": "*****@*****.**", "resource_type": fake_resource_type(), "identifiers": { "DOI": "10.9999/rdm.9999999", "arXiv": "9999.99999", }, "creators": [{ "name": fake.name(), "type": "Personal", "identifiers": { "Orcid": "0000-0002-1825-0097", }, "affiliations": [{ "name": fake.company(), "identifiers": { "ror": "03yrm5c26" } }] }], "titles": [{ "title": fake.company() + "'s gallery", "type": "Other", "lang": "eng" }], "publication_date": fake_edtf_level_0(), "subjects": [{ "subject": "Romans", "identifier": "subj-1", "scheme": "no-scheme" }], "contributors": [{ "name": fake.name(), "type": "Personal", "identifiers": { "Orcid": "9999-9999-9999-9998", }, "affiliations": [{ "name": fake.company(), "identifiers": { "ror": "03yrm5c26" } }], "role": "RightsHolder" }], "dates": [{ # No end date to avoid computations based on start "start": fake.iso8601(tzinfo=None, end_datetime=None), "description": "Random test date", "type": "Other" }], "language": "eng", "related_identifiers": [{ "identifier": "10.9999/rdm.9999988", "scheme": "DOI", "relation_type": "Requires", "resource_type": fake_resource_type() }], "version": "v0.0.1", "licenses": [{ "license": "Berkeley Software Distribution 3", "uri": "https://opensource.org/licenses/BSD-3-Clause", "identifier": "BSD-3", "scheme": "BSD-3", }], "descriptions": [{ "description": fake.text(max_nb_chars=3000), "type": "Abstract", "lang": "eng" }], "locations": [{ "point": { "lat": str(fake.latitude()), "lon": str(fake.longitude()) }, "place": fake.location_on_land()[2], "description": "Random place on land for random coordinates..." }], "references": [{ "reference_string": "Reference to something et al.", "identifier": "9999.99988", "scheme": "GRID" }] } # Create and index record rec_uuid = uuid.uuid4() current_pidstore.minters['recid_v2'](rec_uuid, data_to_use) record = Record.create(data_to_use, id_=rec_uuid) RecordIndexer().index(record) # Flush to index and database current_search.flush_and_refresh(index='records') db.session.commit() return record
def test_literature_citations_api_with_parameter_page_2(api_client): record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': 111, 'titles': [ { 'title': 'Jessica Jones', }, ], '_collections': ['Literature'] } record = InspireRecord.create(record_json) record.commit() record_json_ref_1 = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': 222, 'titles': [ { 'title': 'Frank Castle', }, ], 'references': [{ 'record': { '$ref': record._get_ref() } }], '_collections': ['Literature'] } record_ref_1 = InspireRecord.create(record_json_ref_1) record_ref_1.commit() record_json_ref_2 = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': 333, 'titles': [ { 'title': 'Luke Cage', }, ], 'references': [{ 'record': { '$ref': record._get_ref() } }], '_collections': ['Literature'] } record_ref_2 = InspireRecord.create(record_json_ref_2) record_ref_2.commit() db.session.commit() current_search.flush_and_refresh('records-hep') response = api_client.get('/literature/111/citations?size=1&page=2', headers={'Accept': 'application/json'}) result = json.loads(response.get_data(as_text=True)) expected_metadata = [{ "citation_count": 2, "citations": [ { "control_number": 222, "titles": [{ "title": "Frank Castle" }] }, ] }, { "citation_count": 2, "citations": [ { "control_number": 333, "titles": [{ "title": "Luke Cage" }] }, ] }] assert response.status_code == 200 assert result['metadata'] in expected_metadata _delete_record('lit', 111) _delete_record('lit', 222) _delete_record('lit', 333)
def flush(cls): """Flush indexes.""" current_search.flush_and_refresh(cls.Meta.index)
def test_literature_citations_api_with_not_existing_pid_value(api_client): record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': 111, 'titles': [ { 'title': 'Jessica Jones', }, ], '_collections': ['Literature'] } record = InspireRecord.create(record_json) record.commit() record_json_ref_1 = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': 222, 'titles': [ { 'title': 'Frank Castle', }, ], 'references': [{ 'record': { '$ref': record._get_ref() } }], '_collections': ['Literature'] } record_ref_1 = InspireRecord.create(record_json_ref_1) record_ref_1.commit() record_json_ref_2 = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': 333, 'titles': [ { 'title': 'Luke Cage', }, ], 'references': [{ 'record': { '$ref': record._get_ref() } }], '_collections': ['Literature'] } record_ref_2 = InspireRecord.create(record_json_ref_2) record_ref_2.commit() db.session.commit() current_search.flush_and_refresh('records-hep') response = api_client.get('/literature/444/citations', headers={'Accept': 'application/json'}) assert response.status_code == 404 _delete_record('lit', 111) _delete_record('lit', 222) _delete_record('lit', 333)
def test_index_after_commit_indexes_also_cites_two_records( mocked_indexing_task, mocked_permission_check, app, ): # this test doesn't use the isolated_app because it needs to commit to # the DB in order to create records versions. json1 = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'This is the record being cited' }], 'control_number': 9999, '_collections': ['Literature'] } cited1 = InspireRecord.create(data=json1, skip_files=True) cited1.commit() db.session.commit() current_search.flush_and_refresh('records-hep') expected_args = ('lit', cited1['control_number'], 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) json2 = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'This also is the record being cited' }], 'control_number': 9998, '_collections': ['Literature'] } cited2 = InspireRecord.create(data=json2, skip_files=True) cited2.commit() db.session.commit() current_search.flush_and_refresh('records-hep') expected_args = ('lit', cited2['control_number'], 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec1 = get_es_record('lit', 9999) es_rec2 = get_es_record('lit', 9998) assert es_rec1['citation_count'] == 0 assert es_rec2['citation_count'] == 0 assert LiteratureSearch.citations(es_rec1).total == 0 assert LiteratureSearch.citations(es_rec2).total == 0 citing_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'Record citing the first one' }], '_collections': ['Literature'], 'control_number': 8888, 'references': [{ 'reference': { 'authors': [{ 'full_name': 'Smith, J.' }], } }] } record = InspireRecord.create(data=citing_json, skip_files=True) record.commit() db.session.commit() current_search.flush_and_refresh('records-hep') expected_args = ('lit', record['control_number'], 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec1 = get_es_record('lit', 9999) es_rec2 = get_es_record('lit', 9998) assert es_rec1['citation_count'] == 0 assert es_rec2['citation_count'] == 0 assert LiteratureSearch.citations(es_rec1).total == 0 assert LiteratureSearch.citations(es_rec2).total == 0 references = { 'references': [{ 'record': { '$ref': 'http://localhost:5000/api/literature/9998' }, }, { 'record': { '$ref': 'http://localhost:5000/api/literature/9999' }, }] } citing_json.update(references) record.clear() record.update(citing_json) record.commit() db.session.commit() current_search.flush_and_refresh('records-hep') expected_args = ('lit', record['control_number'], 3) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec1 = get_es_record('lit', 9999) es_rec2 = get_es_record('lit', 9998) assert es_rec1['citation_count'] == 1 assert es_rec2['citation_count'] == 1 assert LiteratureSearch.citations(es_rec1).total == 1 assert LiteratureSearch.citations(es_rec2).total == 1 _delete_record('lit', record['control_number']) _delete_record('lit', cited1['control_number']) _delete_record('lit', cited2['control_number'])
def test_literature_citations_api_with_full_citing_record(api_client): record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': 111, 'titles': [ { 'title': 'Jessica Jones', }, ], '_collections': ['Literature'] } record = InspireRecord.create(record_json) record.commit() record_json_ref_1 = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': 222, 'titles': [ { 'title': 'Frank Castle', }, ], 'authors': [{ "full_name": "Urhan, Ahmet", }], 'publication_info': [{ "artid": "HAL Id : hal-01735421, https://hal.archives-ouvertes.fr/hal-01735421", "page_start": "1", }], 'references': [{ 'record': { '$ref': record._get_ref() } }], '_collections': ['Literature'] } record_ref_1 = InspireRecord.create(record_json_ref_1) record_ref_1.commit() record_json_ref_2 = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': 333, 'titles': [ { 'title': 'Luke Cage', }, ], 'references': [{ 'record': { '$ref': record._get_ref() } }], '_collections': ['Literature'] } record_ref_2 = InspireRecord.create(record_json_ref_2) record_ref_2.commit() db.session.commit() current_search.flush_and_refresh('records-hep') response = api_client.get('/literature/111/citations', headers={'Accept': 'application/json'}) result = json.loads(response.get_data(as_text=True)) result['metadata']['citations'].sort() expected_metadata = { "citation_count": 2, "citations": [{ 'authors': [{ "full_name": "Urhan, Ahmet", "first_name": "Ahmet", "last_name": "Urhan", "signature_block": "URANa", "uuid": result['metadata']['citations'][1]['authors'][0]['uuid'] }], 'control_number': 222, 'titles': [ { 'title': 'Frank Castle', }, ] }, { "control_number": 333, "titles": [{ "title": "Luke Cage" }] }] } assert response.status_code == 200 expected_metadata['citations'].sort() assert expected_metadata == result['metadata'] _delete_record('lit', 111) _delete_record('lit', 222) _delete_record('lit', 333)
def test_index_after_commit_indexes_raises_if_cited_records_are_not_in_db( mocked_indexing_task, mocked_permission_check, app, ): # this test doesn't use the isolated_app because it needs to commit to # the DB in order to create records versions. citing_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'Record citing the first one' }], '_collections': ['Literature'], 'control_number': 8888, 'references': [{ "reference": { 'authors': [{ 'full_name': 'Smith, J.' }] } }] } record = InspireRecord.create(data=citing_json, skip_files=True) record.commit() db.session.commit() current_search.flush_and_refresh('records-hep') expected_args = 'lit', record['control_number'], 2 mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) references = { 'references': [{ "curated_relation": False, "record": { "$ref": "http://localhost:5000/api/literature/9999" }, "reference": { 'authors': [{ 'full_name': 'Smith, J.' }], } }] } citing_json.update(references) record.clear() record.update(citing_json) record.commit() db.session.commit() current_search.flush_and_refresh('records-hep') expected_args = ('lit', record['control_number'], 3) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task with pytest.raises(MissingCitedRecordError): index_modified_citations_from_record(*expected_args) _delete_record('lit', 8888)
def test_literature_citations_api_with_superseded_records(app, api_client): record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': 111, 'titles': [ { 'title': 'Jessica Jones', }, ], '_collections': ['Literature'] } record = InspireRecord.create(record_json) record.commit() citing_superseded_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'related_records': [{ 'record': { '$ref': 'https://link-to-successor' }, 'relation': 'successor' }], 'document_type': [ 'article', ], 'control_number': 222, 'titles': [ { 'title': 'Frank Castle', }, ], 'references': [{ 'record': { '$ref': record._get_ref() } }], '_collections': ['Literature'] } citing_superseded_record = InspireRecord.create(citing_superseded_json) citing_superseded_record.commit() db.session.commit() current_search.flush_and_refresh('records-hep') response = api_client.get('/literature/111/citations', headers={'Accept': 'application/json'}) result = json.loads(response.get_data(as_text=True)) expected_metadata = {"citation_count": 0, "citations": []} expected_metadata['citations'].sort() result['metadata']['citations'].sort() assert response.status_code == 200 assert expected_metadata == result['metadata'] _delete_record('lit', 111) _delete_record('lit', 222)
def test_listidentifiers(app): """Test verb ListIdentifiers.""" from invenio_oaiserver.models import OAISet with app.app_context(): current_oaiserver.unregister_signals_oaiset() # create new OAI Set with db.session.begin_nested(): oaiset = OAISet( spec='test0', name='Test0', description='test desc 0', search_pattern='title_statement.title:Test0', ) db.session.add(oaiset) db.session.commit() run_after_insert_oai_set() with app.test_request_context(): indexer = RecordIndexer() # create a new record (inside the OAI Set) with db.session.begin_nested(): record_id = uuid.uuid4() data = {'title_statement': {'title': 'Test0'}} recid_minter(record_id, data) pid = oaiid_minter(record_id, data) record = Record.create(data, id_=record_id) db.session.commit() indexer.index_by_id(record_id) current_search.flush_and_refresh('_all') pid_value = pid.pid_value # get the list of identifiers with app.test_client() as c: result = c.get('/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc') tree = etree.fromstring(result.data) assert len(tree.xpath('/x:OAI-PMH', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:ListIdentifiers', namespaces=NAMESPACES)) == 1 assert len( tree.xpath('/x:OAI-PMH/x:ListIdentifiers/x:header', namespaces=NAMESPACES)) == 1 identifier = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:identifier', namespaces=NAMESPACES) assert len(identifier) == 1 assert identifier[0].text == str(pid_value) datestamp = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:datestamp', namespaces=NAMESPACES) assert len(datestamp) == 1 assert datestamp[0].text == datetime_to_datestamp(record.updated) # Check from:until range with app.test_client() as c: # Check date and datetime timestamps. for granularity in (False, True): result = c.get( '/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc' '&from={0}&until={1}&set=test0'.format( datetime_to_datestamp(record.updated - timedelta(1), day_granularity=granularity), datetime_to_datestamp(record.updated + timedelta(1), day_granularity=granularity), )) assert result.status_code == 200 tree = etree.fromstring(result.data) identifier = tree.xpath( '/x:OAI-PMH/x:ListIdentifiers/x:header/x:identifier', namespaces=NAMESPACES) assert len(identifier) == 1
def test_file_ops(api_client, deposit, json_auth_headers, auth_headers, deposit_url, get_json): """Test data validation.""" client = api_client headers = json_auth_headers auth = auth_headers # Create empty deposit res = client.post(deposit_url, data=json.dumps({}), headers=headers) links = get_json(res, code=201)['links'] current_search.flush_and_refresh(index='deposits') # Upload same file twice - first ok, second not for code in [201, 400]: f = dict(file=(BytesIO(b'test'), 'test1.txt'), name='test1.txt') res = client.post(links['files'], data=f, headers=auth) res.status_code == code # Upload another file client.post(links['files'], data=dict(file=(BytesIO(b'test'), 'test2.txt'), name='test2.txt'), headers=auth) # List files data = get_json(client.get(links['files'], headers=headers), code=200) assert len(data) == 2 file_id = data[0]['id'] file_url = '{0}/{1}'.format(links['files'], file_id) # Get file assert client.get(file_url, headers=headers).status_code == 200 # File does not exists assert client.get('{0}/invalid'.format(links['files']), headers=headers).status_code == 404 data = get_json(client.get(links['files'], headers=headers), code=200) invalid_files_list = [dict(filename=x['filename']) for x in data] ok_files_list = list(reversed([dict(id=x['id']) for x in data])) # Sort - invalid assert client.put(links['files'], data=json.dumps(invalid_files_list), headers=headers).status_code == 400 # Sort - valid assert client.put(links['files'], data=json.dumps(ok_files_list), headers=headers).status_code == 200 # Delete assert client.delete(file_url, headers=headers).status_code == 204 assert client.get(file_url, headers=headers).status_code == 404 data = get_json(client.get(links['files'], headers=headers), code=200) assert len(data) == 1 file_id = data[0]['id'] file_url = '{0}/{1}'.format(links['files'], file_id) # Rename assert client.put(file_url, data=json.dumps(dict(filename='rename.txt')), headers=headers).status_code == 200 # Bad renaming for data in [dict(name='test.txt'), dict(filename='../../etc/passwd')]: assert client.put(file_url, data=json.dumps(data), headers=headers).status_code == 400 data = get_json(client.get(file_url, headers=headers), code=200) assert data['filename'] == 'rename.txt'
def test_delete_user_with_counter(app, db, testdata, mocker): """Test delete users when no longer in ldap, after <n> checks.""" ldap_users = [{ "displayName": [b"New user"], "department": [b"A department"], "uidNumber": [b"111"], "mail": [b"*****@*****.**"], "cernAccountType": [b"Primary"], "employeeID": [b"00111"], "postOfficeBox": [b"M12345"] }, { "displayName": [b"A new name"], "department": [b"A new department"], "uidNumber": [b"222"], "mail": [b"*****@*****.**"], "cernAccountType": [b"Primary"], "employeeID": [b"00222"], "postOfficeBox": [b"M12345"] }, { "displayName": [b"old user left CERN"], "department": [b"Department"], "uidNumber": [b"444"], "mail": [b"*****@*****.**"], "cernAccountType": [b"Primary"], "employeeID": [b"00444"], "postOfficeBox": [b"M12345"] }] new_ldap_response = [{ "displayName": [b"New user"], "department": [b"A department"], "uidNumber": [b"111"], "mail": [b"*****@*****.**"], "cernAccountType": [b"Primary"], "employeeID": [b"00111"], "postOfficeBox": [b"M12345"] }] def _prepare(): """Prepare data.""" importer = LdapUserImporter() # Prepare users in DB. Use `LdapUserImporter` to make it easy # create old users existing_user = deepcopy(ldap_users[0]) ldap_user = serialize_ldap_user(existing_user) importer.import_user(ldap_user) user_to_delete1 = deepcopy(ldap_users[1]) ldap_user = serialize_ldap_user(user_to_delete1) user_to_delete_id1 = importer.import_user(ldap_user) user_to_delete2 = deepcopy(ldap_users[2]) ldap_user = serialize_ldap_user(user_to_delete2) user_to_delete_id2 = importer.import_user(ldap_user) db.session.commit() current_app_ils.patron_indexer.reindex_patrons() return user_to_delete_id1, user_to_delete_id2 user_to_delete_id1, user_to_delete_id2 = _prepare() # mock LDAP response mocker.patch( "cds_ils.ldap.api.LdapClient.get_primary_accounts", return_value=new_ldap_response, ) ldap_users_count, deleted_accounts = delete_users(dry_run=False) assert ldap_users_count == 1 assert deleted_accounts == 0 ra1 = RemoteAccount.query.filter( RemoteAccount.user_id == user_to_delete_id1).one() ra2 = RemoteAccount.query.filter( RemoteAccount.user_id == user_to_delete_id2).one() assert ra1.extra_data["deletion_countdown"] == 1 assert ra2.extra_data["deletion_countdown"] == 1 # set to be deleted now config_checks_before_deletion = \ current_app.config["CDS_ILS_PATRON_DELETION_CHECKS"] # mark for total deletion ra2.extra_data["deletion_countdown"] = config_checks_before_deletion # unmark one entry fixed_ldap_response = [ { "displayName": [b"New user"], "department": [b"A department"], "uidNumber": [b"111"], "mail": [b"*****@*****.**"], "cernAccountType": [b"Primary"], "employeeID": [b"00111"], "postOfficeBox": [b"M12345"] }, { "displayName": [b"A new name"], "department": [b"A new department"], "uidNumber": [b"222"], "mail": [b"*****@*****.**"], "cernAccountType": [b"Primary"], "employeeID": [b"00222"], "postOfficeBox": [b"M12345"] }, ] mocker.patch( "cds_ils.ldap.api.LdapClient.get_primary_accounts", return_value=fixed_ldap_response, ) ldap_users_count, deleted_accounts = delete_users(dry_run=False) assert ldap_users_count == 2 # the users should be deleted (not the account from testdata # and not the unmarked) assert deleted_accounts == 1 current_search.flush_and_refresh(index="*") ra1 = RemoteAccount.query.filter( RemoteAccount.user_id == user_to_delete_id1).one() # make sure first account was unmarked for deletion assert ra1.extra_data["deletion_countdown"] == 0 # make sure account 2 was deleted with pytest.raises(NoResultFound): RemoteAccount.query.filter( RemoteAccount.user_id == user_to_delete_id2).one()
def test_versioning_indexing(db, es, deposit, deposit_file): """Test the indexing of 'version' relations.""" deposit_index_name = 'deposits-records-record-v1.0.0' records_index_name = 'records-record-v1.0.0' deposit_v1 = publish_and_expunge(db, deposit) depid_v1_value = deposit_v1['_deposit']['id'] recid_v1, record_v1 = deposit_v1.fetch_published() recid_v1_value = recid_v1.pid_value RecordIndexer().index_by_id(str(record_v1.id)) RecordIndexer().process_bulk_queue() current_search.flush_and_refresh(index=deposit_index_name) current_search.flush_and_refresh(index=records_index_name) s_dep = current_search.client.search( index=deposit_index_name)['hits']['hits'] s_rec = current_search.client.search( index=records_index_name)['hits']['hits'] assert len(s_dep) == 1 assert len(s_rec) == 1 assert 'relations' in s_dep[0]['_source'] assert 'relations' in s_rec[0]['_source'] expected = { "version": [{ "draft_child_deposit": None, "index": 0, "is_last": True, "last_child": { "pid_type": "recid", "pid_value": "2" }, "count": 1, "parent": { "pid_type": "recid", "pid_value": "1" }, }] } assert s_dep[0]['_source']['relations'] == expected assert s_rec[0]['_source']['relations'] == expected deposit_v1.newversion() pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit deposit_v2 = ZenodoDeposit.get_record(depid_v2.object_uuid) deposit_v2.files['file.txt'] = BytesIO(b('file1')) depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) RecordIndexer().process_bulk_queue() current_search.flush_and_refresh(index=deposit_index_name) current_search.flush_and_refresh(index=records_index_name) s_dep = current_search.client.search( index=deposit_index_name)['hits']['hits'] s_rec = current_search.client.search( index=records_index_name)['hits']['hits'] assert len(s_dep) == 2 # Two deposits should be indexed assert len(s_rec) == 1 # One, since record does not exist yet s_dep1 = current_search.client.get(index=deposit_index_name, id=deposit_v1.id) s_dep2 = current_search.client.get(index=deposit_index_name, id=deposit_v2.id) expected_d1 = { "version": [{ "draft_child_deposit": { "pid_type": "depid", "pid_value": "3" }, "index": 0, "is_last": False, "last_child": { "pid_type": "recid", "pid_value": "2" }, "parent": { "pid_type": "recid", "pid_value": "1" }, "count": 2 # For deposit, draft children are also counted }] } expected_d2 = { "version": [{ "draft_child_deposit": { "pid_type": "depid", "pid_value": "3" }, "index": 1, "is_last": True, "last_child": { "pid_type": "recid", "pid_value": "2" }, "count": 2, # For deposit, draft children are also counted "parent": { "pid_type": "recid", "pid_value": "1" }, }] } assert s_dep1['_source']['relations'] == expected_d1 assert s_dep2['_source']['relations'] == expected_d2 deposit_v2 = publish_and_expunge(db, deposit_v2) recid_v2, record_v2 = deposit_v2.fetch_published() recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) RecordIndexer().index_by_id(str(record_v2.id)) RecordIndexer().process_bulk_queue() current_search.flush_and_refresh(index=deposit_index_name) current_search.flush_and_refresh(index=records_index_name) s_dep = current_search.client.search( index=deposit_index_name)['hits']['hits'] s_rec = current_search.client.search( index=records_index_name)['hits']['hits'] assert len(s_dep) == 2 assert len(s_rec) == 2 s_dep1 = current_search.client.get(index=deposit_index_name, id=deposit_v1.id) s_dep2 = current_search.client.get(index=deposit_index_name, id=deposit_v2.id) s_rec1 = current_search.client.get(index=records_index_name, id=record_v1.id) s_rec2 = current_search.client.get(index=records_index_name, id=record_v2.id) expected_d1 = { "version": [{ "draft_child_deposit": None, "index": 0, "is_last": False, "last_child": { "pid_type": "recid", "pid_value": "3" }, "parent": { "pid_type": "recid", "pid_value": "1" }, "count": 2 }] } expected_d2 = { "version": [{ "draft_child_deposit": None, "index": 1, "is_last": True, "last_child": { "pid_type": "recid", "pid_value": "3" }, "count": 2, "parent": { "pid_type": "recid", "pid_value": "1" }, }] } assert s_dep1['_source']['relations'] == expected_d1 assert s_dep2['_source']['relations'] == expected_d2 expected_r1 = { "version": [{ "draft_child_deposit": None, "index": 0, "is_last": False, "last_child": { "pid_type": "recid", "pid_value": "3" }, "parent": { "pid_type": "recid", "pid_value": "1" }, "count": 2 }] } expected_r2 = { "version": [{ "draft_child_deposit": None, "index": 1, "is_last": True, "last_child": { "pid_type": "recid", "pid_value": "3" }, "count": 2, "parent": { "pid_type": "recid", "pid_value": "1" }, }] } assert s_rec1['_source']['relations'] == expected_r1 assert s_rec2['_source']['relations'] == expected_r2