def test_identifier_schemes(app, db, es, locations, license_record, sample_identifiers): """Test supported identifier schemes.""" s = legacyjson.LegacyMetadataSchemaV1(strict=True) result = s.load(d(related_identifiers=[ {'identifier': _id, 'scheme': scheme, 'relation': 'references'} for scheme, (_id, _) in sample_identifiers.items() ])) ZenodoDeposit.create(result.data).validate()
def test_autoadd_explicit_newversion( app, db, users, communities, deposit, deposit_file, communities_autoadd_enabled): """Explicitly the autoadded communities in a new version.""" deposit_v1 = publish_and_expunge(db, deposit) recid_v1, record_v1 = deposit_v1.fetch_published() depid_v1_value = deposit_v1['_deposit']['id'] recid_v1_value = recid_v1.pid_value deposit_v1 = deposit_v1.newversion() pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit depid_v2_value = depid_v2.pid_value deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object()) deposit_v2['communities'] = ['ecfunded', 'grants_comm', 'zenodo'] deposit_v2['grants'] = [{'title': 'SomeGrant'}, ] deposit_v2.files['file.txt'] = BytesIO(b('file1')) deposit_v2 = publish_and_expunge(db, deposit_v2) recid_v2, record_v2 = deposit_v2.fetch_published() depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value) recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) assert record_v1.get('communities', []) == ['grants_comm', ] assert deposit_v1.get('communities', []) == ['ecfunded', 'grants_comm', 'zenodo'] assert record_v2.get('communities', []) == ['grants_comm', ] assert deposit_v2.get('communities', []) == ['ecfunded', 'grants_comm', 'zenodo']
def test_propagation_with_newversion_open( app, db, users, communities, deposit, deposit_file): """Adding old versions to a community should propagate to all drafts.""" # deposit['communities'] = ['c1', 'c2'] deposit_v1 = publish_and_expunge(db, deposit) deposit_v1 = deposit_v1.edit() recid_v1, record_v1 = deposit_v1.fetch_published() recid_v1_value = recid_v1.pid_value deposit_v1 = deposit_v1.newversion() pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit depid_v2_value = depid_v2.pid_value # New version in 'deposit_v2' has not been published yet deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object()) # depid_v1_value = deposit_v1['_deposit']['id'] # depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) deposit_v1['communities'] = ['c1', 'c2', ] deposit_v1 = publish_and_expunge(db, deposit_v1) recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) c1_api = ZenodoCommunity('c1') c1_api.accept_record(record_v1, pid=recid_v1) depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value) assert deposit_v2['communities'] == ['c1', 'c2'] deposit_v2.files['file.txt'] = BytesIO(b('file1')) deposit_v2 = publish_and_expunge(db, deposit_v2) recid_v2, record_v2 = deposit_v2.fetch_published() assert record_v2['communities'] == ['c1', ]
def test_basic_api(app, db, communities, deposit, deposit_file): """Test basic workflow using Deposit and Communities API.""" deposit_v1 = publish_and_expunge(db, deposit) depid_v1_value = deposit_v1['_deposit']['id'] recid_v1, record_v1 = deposit_v1.fetch_published() recid_v1_value = recid_v1.pid_value deposit_v1.newversion() pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit deposit_v2 = ZenodoDeposit.get_record(depid_v2.object_uuid) deposit_v2.files['file.txt'] = BytesIO(b('file1')) deposit_v2 = publish_and_expunge(db, deposit_v2) deposit_v2 = deposit_v2.edit() # 1. Request for 'c1' and 'c2' through deposit v2 deposit_v2['communities'] = ['c1', 'c2', ] deposit_v2 = publish_and_expunge(db, deposit_v2) recid_v2, record_v2 = deposit_v2.fetch_published() recid_v2_value = recid_v2.pid_value depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) recid_v1, record_v1 = deposit_v1.fetch_published() assert record_v1.get('communities', []) == [] assert record_v2.get('communities', []) == [] c1_api = ZenodoCommunity('c1') c2_api = ZenodoCommunity('c2') # Inclusion requests should be visible for both records assert c1_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1 assert c1_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1 assert c2_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1 assert c2_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1 # Accept to 'c1' through record_v2 (as originally requested), # and 'c2' through record_v1 (version) c1_api.accept_record(record_v2, pid=recid_v2) c2_api.accept_record(record_v1, pid=recid_v1) recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) recid_v2, record_v2 = record_resolver.resolve(recid_v2_value) # Accepting individual record to a community should propagate the changes # to all versions assert record_v1['communities'] == record_v2['communities'] == \ ['c1', 'c2', ] # Removing 'c1' from deposit_v1 should remove it from two published records depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) deposit_v1 = deposit_v1.edit() deposit_v1['communities'] = [] deposit_v1 = publish_and_expunge(db, deposit_v1) recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) recid_v2, record_v2 = record_resolver.resolve(recid_v2_value) assert record_v1.get('communities', []) == [] assert record_v2.get('communities', []) == []
def test_basic_api(app, db, communities, deposit, deposit_file): """Test basic workflow using Deposit and Communities API.""" deposit_v1 = publish_and_expunge(db, deposit) depid_v1_value = deposit_v1['_deposit']['id'] recid_v1, record_v1 = deposit_v1.fetch_published() recid_v1_value = recid_v1.pid_value deposit_v1.newversion() pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object()) deposit_v2.files['file.txt'] = BytesIO(b('file1')) deposit_v2 = publish_and_expunge(db, deposit_v2) deposit_v2 = deposit_v2.edit() # 1. Request for 'c1' and 'c2' through deposit v2 deposit_v2['communities'] = ['c1', 'c2', ] deposit_v2 = publish_and_expunge(db, deposit_v2) recid_v2, record_v2 = deposit_v2.fetch_published() recid_v2_value = recid_v2.pid_value depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) recid_v1, record_v1 = deposit_v1.fetch_published() assert record_v1.get('communities', []) == [] assert record_v2.get('communities', []) == [] c1_api = ZenodoCommunity('c1') c2_api = ZenodoCommunity('c2') # Inclusion requests should be visible for both records assert c1_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1 assert c1_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1 assert c2_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1 assert c2_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1 # Accept to 'c1' through record_v2 (as originally requested), # and 'c2' through record_v1 (version) c1_api.accept_record(record_v2, pid=recid_v2) c2_api.accept_record(record_v1, pid=recid_v1) recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) recid_v2, record_v2 = record_resolver.resolve(recid_v2_value) # Accepting individual record to a community should propagate the changes # to all versions assert record_v1['communities'] == record_v2['communities'] == \ ['c1', 'c2', ] # Removing 'c1' from deposit_v1 should remove it from two published records depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) deposit_v1 = deposit_v1.edit() deposit_v1['communities'] = [] deposit_v1 = publish_and_expunge(db, deposit_v1) recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) recid_v2, record_v2 = record_resolver.resolve(recid_v2_value) assert record_v1.get('communities', []) == [] assert record_v2.get('communities', []) == []
def test_record_delete_v1(dc_mock, app, db, users, deposit, deposit_file): """Delete a record with multiple versions.""" deposit_v1 = publish_and_expunge(db, deposit) recid_v1, record_v1 = deposit.fetch_published() recid_v1_value = recid_v1.pid_value deposit_v1.newversion() recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) # Stash a copy of v1 for later rec1 = deepcopy(record_v1) rec1_id = str(record_v1.id) pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object()) deposit_v2.files['file.txt'] = BytesIO(b('file1')) deposit_v2 = publish_and_expunge(db, deposit_v2) recid_v2, record_v2 = deposit_v2.fetch_published() # Stash a copy of v2 for later rec2 = deepcopy(record_v2) assert dc_mock().metadata_delete.call_count == 0 # Remove the first version delete_record(rec1_id, 'spam', users[0]['id']) # Make sure all PIDs are deleted assert PID.get('doi', rec1['doi']).status == PIDStatus.DELETED assert PID.get('doi', rec1['conceptdoi']).status == PIDStatus.REGISTERED assert PID.get('recid', rec1['recid']).status == PIDStatus.DELETED # Make sure conceptrecid is redirecting to v2 (as before) crecid = PID.get('recid', rec1['conceptrecid']) assert crecid.get_redirect() == PID.get('recid', rec2['recid']) assert crecid.status == PIDStatus.REDIRECTED assert PID.get('depid', rec1['_deposit']['id']).status == PIDStatus.DELETED # Make sure the v2 PIDs are kept intact assert PID.get('oai', rec2['_oai']['id']).status == PIDStatus.REGISTERED assert PID.get('doi', rec2['doi']).status == PIDStatus.REGISTERED assert PID.get('recid', rec2['recid']).status == PIDStatus.REGISTERED assert PID.get('depid', rec2['_deposit']['id']).status == \ PIDStatus.REGISTERED # Only the v1 DOI should be deleted assert dc_mock().doi_post.call_count == 2 assert dc_mock().doi_post.has_any_call('10.5072/zenodo.3') assert dc_mock().doi_post.has_any_call('10.5072/zenodo.1') assert dc_mock().metadata_delete.call_count == 1 dc_mock().metadata_delete.assert_any_call('10.5072/zenodo.2') record = Record.get_record(rec1_id) assert record['removed_by'] == users[0]['id'] assert record['removal_reason'] == 'Spam record, removed by Zenodo staff.'
def _publish_and_expunge(db, deposit): """Publish the deposit and expunge the session. Use this if you want to be safe that session is synced with the DB after the deposit publishing.""" deposit.publish() dep_uuid = deposit.id db.session.commit() db.session.expunge_all() deposit = Deposit.get_record(dep_uuid) return deposit
def deposit(app, es, users, locations, deposit_metadata, sip_metadata_types): """New deposit with files.""" with app.test_request_context(): datastore = app.extensions['security'].datastore login_user(datastore.get_user(users[0]['email'])) id_ = uuid4() zenodo_deposit_minter(id_, deposit_metadata) deposit = Deposit.create(deposit_metadata, id_=id_) db_.session.commit() current_search.flush_and_refresh(index='deposits') return deposit
def versioning_new_deposit(uuid): """Migrate a yet-unpublished deposit to a versioning scheme.""" deposit = ZenodoDeposit.get_record(uuid) if 'conceptrecid' in deposit: return # ASSERT ZENODO DOI ONLY! assert 'conceptrecid' not in deposit, 'Concept RECID already in record.' conceptrecid = zenodo_concept_recid_minter(uuid, deposit) recid = PersistentIdentifier.get('recid', str(deposit['recid'])) depid = PersistentIdentifier.get('depid', str(deposit['_deposit']['id'])) pv = PIDVersioning(parent=conceptrecid) pv.insert_draft_child(recid) RecordDraft.link(recid, depid) deposit.commit() db.session.commit()
def loaddemorecords(records, owner): """Load demo records.""" with current_app.test_request_context(): login_user(owner) for record in records: deposit_data = legacyjson_v1(record) deposit_id = uuid4() zenodo_deposit_minter(deposit_id, deposit_data) deposit = ZenodoDeposit.create(deposit_data, id_=deposit_id) db.session.commit() filename = record['files'][0] deposit.files[filename] = BytesIO(filename) db.session.commit() deposit.publish() db.session.commit()
def test_communities_newversion_while_ir_pending_bug(app, db, users, communities, deposit, deposit_file): """Make sure that pending IRs remain after a new version (bug).""" deposit['communities'] = ['c1', 'c2'] deposit_v1 = publish_and_expunge(db, deposit) recid_v1, record_v1 = deposit_v1.fetch_published() depid_v1_value = deposit_v1['_deposit']['id'] recid_v1_value = recid_v1.pid_value # Two inclusion requests are pending assert InclusionRequest.query.count() == 2 # Accept one community c1_api = ZenodoCommunity('c1') c1_api.accept_record(record_v1, pid=recid_v1) deposit_v1 = deposit_v1.newversion() pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit depid_v2_value = depid_v2.pid_value deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object()) deposit_v2.files['file.txt'] = BytesIO(b('file1')) deposit_v2 = publish_and_expunge(db, deposit_v2) recid_v2, record_v2 = deposit_v2.fetch_published() depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value) recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) # Make sure there is still IR to community 'c2' after newversion assert InclusionRequest.query.count() == 1 assert InclusionRequest.query.one().id_community == 'c2' assert record_v1.get('communities', []) == [ 'c1', ] assert deposit_v1.get('communities', []) == [ 'c1', 'c2', ] assert record_v2.get('communities', []) == [ 'c1', ] assert deposit_v2.get('communities', []) == [ 'c1', 'c2', ]
def test_communities_newversion_addition(app, db, users, communities, deposit, deposit_file): """Make sure that new version of record synchronizes the communities.""" deposit['communities'] = ['c1', 'c2'] deposit_v1 = publish_and_expunge(db, deposit) recid_v1, record_v1 = deposit_v1.fetch_published() depid_v1_value = deposit_v1['_deposit']['id'] recid_v1_value = recid_v1.pid_value c1_api = ZenodoCommunity('c1') c2_api = ZenodoCommunity('c2') c1_api.accept_record(record_v1, pid=recid_v1) c2_api.accept_record(record_v1, pid=recid_v1) deposit_v1 = deposit_v1.newversion() pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit depid_v2_value = depid_v2.pid_value deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object()) # Remove 'c2' and request for 'c5'. Make sure that communities from # previous record version are preserved/removed properly deposit_v2['communities'] = ['c1', 'c5'] deposit_v2.files['file.txt'] = BytesIO(b('file1')) deposit_v2 = publish_and_expunge(db, deposit_v2) recid_v2, record_v2 = deposit_v2.fetch_published() depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value) recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) assert record_v1.get('communities', []) == [ 'c1', ] assert deposit_v1.get('communities', []) == [ 'c1', 'c5', ] assert record_v2.get('communities', []) == [ 'c1', ] assert deposit_v2.get('communities', []) == [ 'c1', 'c5', ]
def deposit(app, es, users, location): """New deposit with files.""" record = dict( title='Test title', creators=[ dict(name='Doe, John', affiliation='Atlantis'), dict(name='Smith, Jane', affiliation='Atlantis') ], description='Test Description', publication_date='2013-05-08', access_right='open' ) with app.test_request_context(): datastore = app.extensions['security'].datastore login_user(datastore.get_user(users[0]['email'])) deposit = Deposit.create(record) deposit.commit() db_.session.commit() current_search.flush_and_refresh(index='deposits') return deposit
def test_autoadd_explicit_newversion(app, db, users, communities, deposit, deposit_file, communities_autoadd_enabled): """Explicitly the autoadded communities in a new version.""" deposit_v1 = publish_and_expunge(db, deposit) recid_v1, record_v1 = deposit_v1.fetch_published() depid_v1_value = deposit_v1['_deposit']['id'] recid_v1_value = recid_v1.pid_value deposit_v1 = deposit_v1.newversion() pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit depid_v2_value = depid_v2.pid_value deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object()) deposit_v2['communities'] = ['ecfunded', 'grants_comm', 'zenodo'] deposit_v2['grants'] = [ { 'title': 'SomeGrant' }, ] deposit_v2.files['file.txt'] = BytesIO(b('file1')) deposit_v2 = publish_and_expunge(db, deposit_v2) recid_v2, record_v2 = deposit_v2.fetch_published() depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value) recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) assert record_v1.get('communities', []) == [ 'grants_comm', ] assert deposit_v1.get('communities', []) == ['ecfunded', 'grants_comm', 'zenodo'] assert record_v2.get('communities', []) == [ 'grants_comm', ] assert deposit_v2.get('communities', []) == ['ecfunded', 'grants_comm', 'zenodo']
def test_propagation_with_newversion_open(app, db, users, communities, deposit, deposit_file): """Adding old versions to a community should propagate to all drafts.""" # deposit['communities'] = ['c1', 'c2'] deposit_v1 = publish_and_expunge(db, deposit) deposit_v1 = deposit_v1.edit() recid_v1, record_v1 = deposit_v1.fetch_published() recid_v1_value = recid_v1.pid_value deposit_v1 = deposit_v1.newversion() pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit depid_v2_value = depid_v2.pid_value # New version in 'deposit_v2' has not been published yet deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object()) # depid_v1_value = deposit_v1['_deposit']['id'] # depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) deposit_v1['communities'] = [ 'c1', 'c2', ] deposit_v1 = publish_and_expunge(db, deposit_v1) recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) c1_api = ZenodoCommunity('c1') c1_api.accept_record(record_v1, pid=recid_v1) depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value) assert deposit_v2['communities'] == ['c1', 'c2'] deposit_v2.files['file.txt'] = BytesIO(b('file1')) deposit_v2 = publish_and_expunge(db, deposit_v2) recid_v2, record_v2 = deposit_v2.fetch_published() assert record_v2['communities'] == [ 'c1', ]
def test_communities_newversion_while_ir_pending_bug( app, db, users, communities, deposit, deposit_file): """Make sure that pending IRs remain after a new version (bug).""" deposit['communities'] = ['c1', 'c2'] deposit_v1 = publish_and_expunge(db, deposit) recid_v1, record_v1 = deposit_v1.fetch_published() depid_v1_value = deposit_v1['_deposit']['id'] recid_v1_value = recid_v1.pid_value # Two inclusion requests are pending assert InclusionRequest.query.count() == 2 # Accept one community c1_api = ZenodoCommunity('c1') c1_api.accept_record(record_v1, pid=recid_v1) deposit_v1 = deposit_v1.newversion() pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit depid_v2_value = depid_v2.pid_value deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object()) deposit_v2.files['file.txt'] = BytesIO(b('file1')) deposit_v2 = publish_and_expunge(db, deposit_v2) recid_v2, record_v2 = deposit_v2.fetch_published() depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value) recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) # Make sure there is still IR to community 'c2' after newversion assert InclusionRequest.query.count() == 1 assert InclusionRequest.query.one().id_community == 'c2' assert record_v1.get('communities', []) == ['c1', ] assert deposit_v1.get('communities', []) == ['c1', 'c2', ] assert record_v2.get('communities', []) == ['c1', ] assert deposit_v2.get('communities', []) == ['c1', 'c2', ]
def test_communities_newversion_addition( app, db, users, communities, deposit, deposit_file): """Make sure that new version of record synchronizes the communities.""" deposit['communities'] = ['c1', 'c2'] deposit_v1 = publish_and_expunge(db, deposit) recid_v1, record_v1 = deposit_v1.fetch_published() depid_v1_value = deposit_v1['_deposit']['id'] recid_v1_value = recid_v1.pid_value c1_api = ZenodoCommunity('c1') c2_api = ZenodoCommunity('c2') c1_api.accept_record(record_v1, pid=recid_v1) c2_api.accept_record(record_v1, pid=recid_v1) deposit_v1 = deposit_v1.newversion() pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit depid_v2_value = depid_v2.pid_value deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object()) # Remove 'c2' and request for 'c5'. Make sure that communities from # previous record version are preserved/removed properly deposit_v2['communities'] = ['c1', 'c5'] deposit_v2.files['file.txt'] = BytesIO(b('file1')) deposit_v2 = publish_and_expunge(db, deposit_v2) recid_v2, record_v2 = deposit_v2.fetch_published() depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value) recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) assert record_v1.get('communities', []) == ['c1', ] assert deposit_v1.get('communities', []) == ['c1', 'c5', ] assert record_v2.get('communities', []) == ['c1', ] assert deposit_v2.get('communities', []) == ['c1', 'c5', ]
def get_github_repository(pid): """Get GitHub repository from depid.""" depid = fetch_depid(pid) # First check if the passed depid is a GitHub release release = (Release.query.filter_by(record_id=depid.object_uuid) .one_or_none()) if release: return release.repository deposit = ZenodoDeposit.get_record(depid.object_uuid) concepterecid = deposit.get('conceptrecid') if not concepterecid: return None parent = PersistentIdentifier.get( pid_type='recid', pid_value=concepterecid) pv = PIDVersioning(parent=parent) if pv.exists: record_ids = [r.object_uuid for r in pv.children] deposit_ids = (rec.depid.object_uuid for rec in ZenodoRecord.get_records(record_ids)) release = (Release.query .filter(Release.record_id.in_(deposit_ids)) .first()) return release.repository if release else None
def test_relations_serialization(app, db, deposit, deposit_file): """Serialize PID relations.""" deposit_v1 = publish_and_expunge(db, deposit) depid_v1_value = deposit_v1['_deposit']['id'] depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) recid_v1, record_v1 = deposit_v1.fetch_published() expected = { "version": [{ "draft_child_deposit": None, "index": 0, "is_last": True, "last_child": { "pid_type": "recid", "pid_value": "2" }, "parent": { "pid_type": "recid", "pid_value": "1" }, "count": 1 }] } assert serialize_relations(recid_v1) == expected deposit_v1.newversion() # Should contain "draft_child_deposit" information expected = { "version": [{ "draft_child_deposit": { "pid_type": "depid", "pid_value": "3" }, "index": 0, "is_last": True, "last_child": { "pid_type": "recid", "pid_value": "2" }, "count": 1, "parent": { "pid_type": "recid", "pid_value": "1" }, }] } assert serialize_relations(recid_v1) == expected # Publish the new version pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object()) deposit_v2.files['file.txt'] = BytesIO(b('file1')) deposit_v2 = publish_and_expunge(db, deposit_v2) recid_v2, record_v2 = deposit_v2.fetch_published() depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) recid_v1, record_v1 = deposit_v1.fetch_published() # Should no longer contain "draft_child_deposit" info after publishing # and no longer be the last child expected = { "version": [{ "draft_child_deposit": None, "index": 0, "is_last": False, "last_child": { "pid_type": "recid", "pid_value": "3" }, "parent": { "pid_type": "recid", "pid_value": "1" }, "count": 2 }] } assert serialize_relations(recid_v1) == expected # New version should be the last child now expected = { "version": [{ "draft_child_deposit": None, "index": 1, "is_last": True, "last_child": { "pid_type": "recid", "pid_value": "3" }, "count": 2, "parent": { "pid_type": "recid", "pid_value": "1" }, }] } assert serialize_relations(recid_v2) == expected
def test_legacyjson_to_record_translation(app, db, es, grant_record, license_record, locations): """Test the translator legacy_zenodo and zenodo_legacy.""" test_data = dict(metadata=dict( access_right='embargoed', communities=[{ 'identifier': 'cfa' }], conference_acronym='Some acronym', conference_dates='Some dates', conference_place='Some place', conference_title='Some title', conference_url='http://someurl.com', conference_session='VI', conference_session_part='1', creators=[ dict(name="Doe, John", affiliation="Atlantis", orcid="0000-0002-1825-0097", gnd="170118215"), dict(name="Smith, Jane", affiliation="Atlantis") ], description="Some description", doi="10.1234/foo.bar", embargo_date=(datetime.utcnow().date() + timedelta(days=2)).isoformat(), grants=[ dict(id="282896"), ], imprint_isbn="Some isbn", imprint_place="Some place", imprint_publisher="Some publisher", journal_issue="Some issue", journal_pages="Some pages", journal_title="Some journal name", journal_volume="Some volume", keywords=["Keyword 1", "keyword 2"], subjects=[ dict(scheme="gnd", identifier="1234567899", term="Astronaut"), dict(scheme="gnd", identifier="1234567898", term="Amish"), ], license="CC0-1.0", notes="Some notes", partof_pages="SOme part of", partof_title="Some part of title", prereserve_doi=True, publication_date="2013-09-12", publication_type="book", references=[ "Reference 1", "Reference 2", ], related_identifiers=[ dict(identifier='10.1234/foo.bar2', relation='isCitedBy'), dict(identifier='10.1234/foo.bar3', relation='cites'), dict(identifier='2011ApJS..192...18K', relation='isAlternativeIdentifier'), ], thesis_supervisors=[ dict(name="Doe Sr., John", affiliation="Atlantis"), dict(name="Smith Sr., Jane", affiliation="Atlantis", orcid="http://orcid.org/0000-0002-1825-0097", gnd="http://d-nb.info/gnd/170118215") ], thesis_university="Some thesis_university", contributors=[ dict(name="Doe Sr., Jochen", affiliation="Atlantis", type="Other"), dict(name="Smith Sr., Marco", affiliation="Atlantis", orcid="http://orcid.org/0000-0002-1825-0097", gnd="http://d-nb.info/gnd/170118215", type="DataCurator") ], title="Test title", upload_type="publication", )) ZenodoDeposit.create( legacyjson.LegacyRecordSchemaV1( strict=True).load(test_data).data).validate()
def test_versioning_indexing(db, es, deposit, deposit_file): """Test the indexing of 'version' relations.""" deposit_index_name = 'deposits-records-record-v1.0.0' records_index_name = 'records-record-v1.0.0' deposit_v1 = publish_and_expunge(db, deposit) depid_v1_value = deposit_v1['_deposit']['id'] recid_v1, record_v1 = deposit_v1.fetch_published() recid_v1_value = recid_v1.pid_value RecordIndexer().index_by_id(str(record_v1.id)) RecordIndexer().process_bulk_queue() current_search.flush_and_refresh(index=deposit_index_name) current_search.flush_and_refresh(index=records_index_name) s_dep = current_search.client.search( index=deposit_index_name)['hits']['hits'] s_rec = current_search.client.search( index=records_index_name)['hits']['hits'] assert len(s_dep) == 1 assert len(s_rec) == 1 assert 'relations' in s_dep[0]['_source'] assert 'relations' in s_rec[0]['_source'] expected = { "version": [{ "draft_child_deposit": None, "index": 0, "is_last": True, "last_child": { "pid_type": "recid", "pid_value": "2" }, "count": 1, "parent": { "pid_type": "recid", "pid_value": "1" }, }] } assert s_dep[0]['_source']['relations'] == expected assert s_rec[0]['_source']['relations'] == expected deposit_v1.newversion() pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit deposit_v2 = ZenodoDeposit.get_record(depid_v2.object_uuid) deposit_v2.files['file.txt'] = BytesIO(b('file1')) depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) RecordIndexer().process_bulk_queue() current_search.flush_and_refresh(index=deposit_index_name) current_search.flush_and_refresh(index=records_index_name) s_dep = current_search.client.search( index=deposit_index_name)['hits']['hits'] s_rec = current_search.client.search( index=records_index_name)['hits']['hits'] assert len(s_dep) == 2 # Two deposits should be indexed assert len(s_rec) == 1 # One, since record does not exist yet s_dep1 = current_search.client.get(index=deposit_index_name, id=deposit_v1.id) s_dep2 = current_search.client.get(index=deposit_index_name, id=deposit_v2.id) expected_d1 = { "version": [{ "draft_child_deposit": { "pid_type": "depid", "pid_value": "3" }, "index": 0, "is_last": False, "last_child": { "pid_type": "recid", "pid_value": "2" }, "parent": { "pid_type": "recid", "pid_value": "1" }, "count": 2 # For deposit, draft children are also counted }] } expected_d2 = { "version": [{ "draft_child_deposit": { "pid_type": "depid", "pid_value": "3" }, "index": 1, "is_last": True, "last_child": { "pid_type": "recid", "pid_value": "2" }, "count": 2, # For deposit, draft children are also counted "parent": { "pid_type": "recid", "pid_value": "1" }, }] } assert s_dep1['_source']['relations'] == expected_d1 assert s_dep2['_source']['relations'] == expected_d2 deposit_v2 = publish_and_expunge(db, deposit_v2) recid_v2, record_v2 = deposit_v2.fetch_published() recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) RecordIndexer().index_by_id(str(record_v2.id)) RecordIndexer().process_bulk_queue() current_search.flush_and_refresh(index=deposit_index_name) current_search.flush_and_refresh(index=records_index_name) s_dep = current_search.client.search( index=deposit_index_name)['hits']['hits'] s_rec = current_search.client.search( index=records_index_name)['hits']['hits'] assert len(s_dep) == 2 assert len(s_rec) == 2 s_dep1 = current_search.client.get(index=deposit_index_name, id=deposit_v1.id) s_dep2 = current_search.client.get(index=deposit_index_name, id=deposit_v2.id) s_rec1 = current_search.client.get(index=records_index_name, id=record_v1.id) s_rec2 = current_search.client.get(index=records_index_name, id=record_v2.id) expected_d1 = { "version": [{ "draft_child_deposit": None, "index": 0, "is_last": False, "last_child": { "pid_type": "recid", "pid_value": "3" }, "parent": { "pid_type": "recid", "pid_value": "1" }, "count": 2 }] } expected_d2 = { "version": [{ "draft_child_deposit": None, "index": 1, "is_last": True, "last_child": { "pid_type": "recid", "pid_value": "3" }, "count": 2, "parent": { "pid_type": "recid", "pid_value": "1" }, }] } assert s_dep1['_source']['relations'] == expected_d1 assert s_dep2['_source']['relations'] == expected_d2 expected_r1 = { "version": [{ "draft_child_deposit": None, "index": 0, "is_last": False, "last_child": { "pid_type": "recid", "pid_value": "3" }, "parent": { "pid_type": "recid", "pid_value": "1" }, "count": 2 }] } expected_r2 = { "version": [{ "draft_child_deposit": None, "index": 1, "is_last": True, "last_child": { "pid_type": "recid", "pid_value": "3" }, "count": 2, "parent": { "pid_type": "recid", "pid_value": "1" }, }] } assert s_rec1['_source']['relations'] == expected_r1 assert s_rec2['_source']['relations'] == expected_r2
def test_relations_serialization(app, db, deposit, deposit_file): """Serialize PID relations.""" deposit_v1 = publish_and_expunge(db, deposit) depid_v1_value = deposit_v1['_deposit']['id'] depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) recid_v1, record_v1 = deposit_v1.fetch_published() expected = { "version": [ { "draft_child_deposit": None, "index": 0, "is_last": True, "last_child": { "pid_type": "recid", "pid_value": "2" }, "parent": { "pid_type": "recid", "pid_value": "1" }, "count": 1 } ] } assert serialize_relations(recid_v1) == expected deposit_v1.newversion() # Should contain "draft_child_deposit" information expected = { "version": [ { "draft_child_deposit": { "pid_type": "depid", "pid_value": "3" }, "index": 0, "is_last": True, "last_child": { "pid_type": "recid", "pid_value": "2" }, "count": 1, "parent": { "pid_type": "recid", "pid_value": "1" }, } ] } assert serialize_relations(recid_v1) == expected # Publish the new version pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object()) deposit_v2.files['file.txt'] = BytesIO(b('file1')) deposit_v2 = publish_and_expunge(db, deposit_v2) recid_v2, record_v2 = deposit_v2.fetch_published() depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) recid_v1, record_v1 = deposit_v1.fetch_published() # Should no longer contain "draft_child_deposit" info after publishing # and no longer be the last child expected = { "version": [ { "draft_child_deposit": None, "index": 0, "is_last": False, "last_child": { "pid_type": "recid", "pid_value": "3" }, "parent": { "pid_type": "recid", "pid_value": "1" }, "count": 2 } ] } assert serialize_relations(recid_v1) == expected # New version should be the last child now expected = { "version": [ { "draft_child_deposit": None, "index": 1, "is_last": True, "last_child": { "pid_type": "recid", "pid_value": "3" }, "count": 2, "parent": { "pid_type": "recid", "pid_value": "1" }, } ] } assert serialize_relations(recid_v2) == expected
def test_related_identifiers_serialization(app, db, deposit, deposit_file): """Serialize PID Relations to related identifiers.""" deposit_v1 = publish_and_expunge(db, deposit) depid_v1_value = deposit_v1['_deposit']['id'] recid_v1, record_v1 = deposit_v1.fetch_published() deposit_v1.newversion() pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object()) deposit_v2.files['file.txt'] = BytesIO(b('file1')) deposit_v2 = publish_and_expunge(db, deposit_v2) deposit_v2 = deposit_v2.edit() # 1. Request for 'c1' and 'c2' through deposit v2 deposit_v2 = publish_and_expunge(db, deposit_v2) recid_v2, record_v2 = deposit_v2.fetch_published() depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) recid_v1, record_v1 = deposit_v1.fetch_published() depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) rids = serialize_related_identifiers(recid_v1) expected_v1 = [ { 'scheme': 'doi', 'identifier': '10.5072/zenodo.1', 'relation': 'isPartOf' } # TODO: serialization of new version realtions is disabled # { # 'scheme': 'doi', # 'identifier': '10.5072/zenodo.3', # 'relation': 'isPreviousVersionOf' # } ] assert rids == expected_v1 rids = serialize_related_identifiers(recid_v2) expected_v2 = [ { 'scheme': 'doi', 'identifier': '10.5072/zenodo.1', 'relation': 'isPartOf' } # TODO: serialization of new version realtions is disabled # { # 'scheme': 'doi', # 'identifier': '10.5072/zenodo.2', # 'relation': 'isNewVersionOf' # } ] assert rids == expected_v2 parent_pid = PersistentIdentifier.get('recid', '1') rids = serialize_related_identifiers(parent_pid) expected_parent = [{ 'relation': 'hasPart', 'scheme': 'doi', 'identifier': '10.5072/zenodo.2' }, { 'relation': 'hasPart', 'scheme': 'doi', 'identifier': '10.5072/zenodo.3' }] assert rids == expected_parent
def test_autoadd(app, db, users, communities, deposit, deposit_file, communities_autoadd_enabled): """Test basic workflow using Deposit and Communities API.""" deposit_v1 = publish_and_expunge(db, deposit) depid_v1_value = deposit_v1['_deposit']['id'] recid_v1, record_v1 = deposit_v1.fetch_published() recid_v1_value = recid_v1.pid_value deposit_v1 = deposit_v1.newversion() pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit depid_v2_value = depid_v2.pid_value deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object()) deposit_v2.files['file.txt'] = BytesIO(b('file1')) deposit_v2 = publish_and_expunge(db, deposit_v2) deposit_v2 = deposit_v2.edit() # 1. Request for 'c1' and 'c3' (owned by user) through deposit v2 deposit_v2['communities'] = ['c1', 'c2', 'c3', ] deposit_v2['grants'] = [{'title': 'SomeGrant'}, ] deposit_v2 = publish_and_expunge(db, deposit_v2) recid_v2, record_v2 = deposit_v2.fetch_published() assert record_v2['grants'] == [{'title': 'SomeGrant'}, ] recid_v2_value = recid_v2.pid_value depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) recid_v1, record_v1 = deposit_v1.fetch_published() assert record_v1.get('communities', []) == ['c3', 'grants_comm'] assert record_v2.get('communities', []) == ['c3', 'grants_comm'] assert deposit_v1.get('communities', []) == ['c1', 'c2', 'c3', 'ecfunded', 'grants_comm', 'zenodo'] assert deposit_v2.get('communities', []) == ['c1', 'c2', 'c3', 'ecfunded', 'grants_comm', 'zenodo'] c1_api = ZenodoCommunity('c1') c2_api = ZenodoCommunity('c2') c3_api = ZenodoCommunity('c3') grants_comm_api = ZenodoCommunity('grants_comm') ecfunded_api = ZenodoCommunity('ecfunded') zenodo_api = ZenodoCommunity('zenodo') # Inclusion requests should be visible for both records assert c1_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1 assert c1_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1 assert c2_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1 assert c2_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1 assert c3_api.get_comm_irs(record_v1, pid=recid_v1).count() == 0 assert c3_api.get_comm_irs(record_v2, pid=recid_v2).count() == 0 assert grants_comm_api.get_comm_irs( record_v1, pid=recid_v1).count() == 0 assert grants_comm_api.get_comm_irs( record_v2, pid=recid_v2).count() == 0 assert ecfunded_api.get_comm_irs( record_v1, pid=recid_v1).count() == 1 assert ecfunded_api.get_comm_irs( record_v2, pid=recid_v2).count() == 1 assert zenodo_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1 assert zenodo_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1 # Accept to 'c1' through record_v2 (as originally requested), # and 'c2' through record_v1 (resolved through version) c1_api.accept_record(record_v2, pid=recid_v2) c2_api.accept_record(record_v1, pid=recid_v1) recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) recid_v2, record_v2 = record_resolver.resolve(recid_v2_value) # Accepting individual record to a community should propagate the changes # to all versions assert record_v1.get('communities', []) == ['c1', 'c2', 'c3', 'grants_comm'] assert record_v2.get('communities', []) == ['c1', 'c2', 'c3', 'grants_comm'] assert deposit_v1.get('communities', []) == ['c1', 'c2', 'c3', 'ecfunded', 'grants_comm', 'zenodo'] assert deposit_v2.get('communities', []) == ['c1', 'c2', 'c3', 'ecfunded', 'grants_comm', 'zenodo'] # Removing 'c1'-'c3' from deposit_v1 should remove it from two published # records and other deposits as well depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) deposit_v1 = deposit_v1.edit() deposit_v1['communities'] = [] deposit_v1 = publish_and_expunge(db, deposit_v1) depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value) recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) recid_v2, record_v2 = record_resolver.resolve(recid_v2_value) assert record_v1.get('communities', []) == ['grants_comm', ] assert record_v2.get('communities', []) == ['grants_comm', ] assert deposit_v1.get('communities', []) == ['ecfunded', 'grants_comm', 'zenodo'] assert deposit_v2.get('communities', []) == ['ecfunded', 'grants_comm', 'zenodo']
def versioning_github_repository(uuid): """ Migrate the GitHub repositories. :param uuid: UUID of the repository (invenio_github.models.Repository) """ from invenio_github.models import Repository, ReleaseStatus from zenodo.modules.deposit.minters import zenodo_concept_recid_minter from zenodo.modules.records.minters import zenodo_concept_doi_minter from invenio_pidrelations.contrib.records import index_siblings repository = Repository.query.get(uuid) published_releases = repository.releases.filter_by( status=ReleaseStatus.PUBLISHED).all() # Nothing to migrate if no successful release was ever made if not published_releases: return deposits = [ ZenodoDeposit.get_record(r.record_id) for r in published_releases if r.recordmetadata.json is not None ] deposits = [dep for dep in deposits if 'removed_by' not in dep] deposits = sorted(deposits, key=lambda dep: int(dep['recid'])) recids = [ PersistentIdentifier.get('recid', dep['recid']) for dep in deposits ] records = [ZenodoRecord.get_record(p.object_uuid) for p in recids] # There were successful releases, but deposits/records were removed since if not records: return assert not any('conceptrecid' in rec for rec in records), \ "One or more of the release records have been already migrated" assert not any('conceptrecid' in dep for dep in deposits), \ "One or more of the release deposits have been already migrated" conceptrecid = zenodo_concept_recid_minter(record_uuid=records[0].id, data=records[0]) conceptrecid.register() # Mint the Concept DOI if we are migrating (linking) more than one record if len(records) > 1: conceptdoi = zenodo_concept_doi_minter(records[0].id, records[0]) else: conceptdoi = None rec_comms = sorted( set(sum([rec.get('communities', []) for rec in records], []))) dep_comms = sorted( set(sum([dep.get('communities', []) for dep in deposits], []))) for rec in records: rec['conceptrecid'] = conceptrecid.pid_value if conceptdoi: rec['conceptdoi'] = conceptdoi.pid_value if rec_comms: rec['communities'] = rec_comms rec.commit() for dep in deposits: dep['conceptrecid'] = conceptrecid.pid_value if conceptdoi: dep['conceptdoi'] = conceptdoi.pid_value if dep_comms: dep['communities'] = dep_comms dep.commit() pv = PIDVersioning(parent=conceptrecid) for recid in recids: pv.insert_child(recid) pv.update_redirect() if current_app.config['DEPOSIT_DATACITE_MINTING_ENABLED']: datacite_register.delay(recids[-1].pid_value, str(records[-1].id)) db.session.commit() # Reindex all siblings index_siblings(pv.last_child, with_deposits=True)
def test_autoadd(app, db, users, communities, deposit, deposit_file, communities_autoadd_enabled): """Test basic workflow using Deposit and Communities API.""" deposit_v1 = publish_and_expunge(db, deposit) depid_v1_value = deposit_v1['_deposit']['id'] recid_v1, record_v1 = deposit_v1.fetch_published() recid_v1_value = recid_v1.pid_value deposit_v1 = deposit_v1.newversion() pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit depid_v2_value = depid_v2.pid_value deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object()) deposit_v2.files['file.txt'] = BytesIO(b('file1')) deposit_v2 = publish_and_expunge(db, deposit_v2) deposit_v2 = deposit_v2.edit() # 1. Request for 'c1' and 'c3' (owned by user) through deposit v2 deposit_v2['communities'] = [ 'c1', 'c2', 'c3', ] deposit_v2['grants'] = [ { 'title': 'SomeGrant' }, ] deposit_v2 = publish_and_expunge(db, deposit_v2) recid_v2, record_v2 = deposit_v2.fetch_published() assert record_v2['grants'] == [ { 'title': 'SomeGrant' }, ] recid_v2_value = recid_v2.pid_value depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) recid_v1, record_v1 = deposit_v1.fetch_published() assert record_v1.get('communities', []) == ['c3', 'grants_comm'] assert record_v2.get('communities', []) == ['c3', 'grants_comm'] assert deposit_v1.get('communities', []) == [ 'c1', 'c2', 'c3', 'ecfunded', 'grants_comm', 'zenodo' ] assert deposit_v2.get('communities', []) == [ 'c1', 'c2', 'c3', 'ecfunded', 'grants_comm', 'zenodo' ] c1_api = ZenodoCommunity('c1') c2_api = ZenodoCommunity('c2') c3_api = ZenodoCommunity('c3') grants_comm_api = ZenodoCommunity('grants_comm') ecfunded_api = ZenodoCommunity('ecfunded') zenodo_api = ZenodoCommunity('zenodo') # Inclusion requests should be visible for both records assert c1_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1 assert c1_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1 assert c2_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1 assert c2_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1 assert c3_api.get_comm_irs(record_v1, pid=recid_v1).count() == 0 assert c3_api.get_comm_irs(record_v2, pid=recid_v2).count() == 0 assert grants_comm_api.get_comm_irs(record_v1, pid=recid_v1).count() == 0 assert grants_comm_api.get_comm_irs(record_v2, pid=recid_v2).count() == 0 assert ecfunded_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1 assert ecfunded_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1 assert zenodo_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1 assert zenodo_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1 # Accept to 'c1' through record_v2 (as originally requested), # and 'c2' through record_v1 (resolved through version) c1_api.accept_record(record_v2, pid=recid_v2) c2_api.accept_record(record_v1, pid=recid_v1) recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) recid_v2, record_v2 = record_resolver.resolve(recid_v2_value) # Accepting individual record to a community should propagate the changes # to all versions assert record_v1.get('communities', []) == ['c1', 'c2', 'c3', 'grants_comm'] assert record_v2.get('communities', []) == ['c1', 'c2', 'c3', 'grants_comm'] assert deposit_v1.get('communities', []) == [ 'c1', 'c2', 'c3', 'ecfunded', 'grants_comm', 'zenodo' ] assert deposit_v2.get('communities', []) == [ 'c1', 'c2', 'c3', 'ecfunded', 'grants_comm', 'zenodo' ] # Removing 'c1'-'c3' from deposit_v1 should remove it from two published # records and other deposits as well depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) deposit_v1 = deposit_v1.edit() deposit_v1['communities'] = [] deposit_v1 = publish_and_expunge(db, deposit_v1) depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value) recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) recid_v2, record_v2 = record_resolver.resolve(recid_v2_value) assert record_v1.get('communities', []) == [ 'grants_comm', ] assert record_v2.get('communities', []) == [ 'grants_comm', ] assert deposit_v1.get('communities', []) == ['ecfunded', 'grants_comm', 'zenodo'] assert deposit_v2.get('communities', []) == ['ecfunded', 'grants_comm', 'zenodo']
def test_archiving(app, db, deposit, deposit_file, locations, archive_fs): """Test ZenodoSIP archiving.""" # Stash the configuration and enable writing orig = app.config['SIPSTORE_ARCHIVER_WRITING_ENABLED'] app.config['SIPSTORE_ARCHIVER_WRITING_ENABLED'] = True deposit.files['test2.txt'] = BytesIO(b'test-two') deposit_v1 = publish_and_expunge(db, deposit) recid_v1, record_v1 = deposit_v1.fetch_published() recid_v1_id = recid_v1.id # Record files after publishing: 'test.txt', 'test2.txt' sip1 = SIP(SIPModel.query.one()) sip1_id = sip1.id # Edit the metadata deposit_v1 = deposit_v1.edit() deposit_v1['title'] = "New title" deposit_v1 = publish_and_expunge(db, deposit_v1) # Record files after publishing: 'test.txt', 'test2.txt' sip2_id = SIPModel.query.order_by(SIPModel.created.desc()).first().id # Create a new version deposit_v1.newversion() recid_v1 = PersistentIdentifier.query.get(recid_v1_id) pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit deposit_v2 = ZenodoDeposit.get_record(depid_v2.object_uuid) del deposit_v2.files['test.txt'] deposit_v2.files['test3.txt'] = BytesIO(b('test-three')) deposit_v2 = publish_and_expunge(db, deposit_v2) # Record files after publishing: 'test2.txt', 'test3.txt' sip1 = SIP(SIPModel.query.get(sip1_id)) sip2 = SIP(SIPModel.query.get(sip2_id)) sip3 = SIP(SIPModel.query.order_by(SIPModel.created.desc()).first()) # Becase we are using secure_filename when writing SIPFiles we need to # genenarate the correct names: <SIPFile.id>-<secure_filename> s1_file1_fn = '{0}-test.txt'.format(fetch_suff(sip1, 'test.txt').file_id) s1_file1_fp = 'data/files/{0}'.format(s1_file1_fn) s1_file2_fn = '{0}-test2.txt'.format(fetch_suff(sip1, 'test2.txt').file_id) s1_file2_fp = 'data/files/{0}'.format(s1_file2_fn) s3_file2_fn = '{0}-test2.txt'.format(fetch_suff(sip3, 'test2.txt').file_id) s3_file2_fp = 'data/files/{0}'.format(s3_file2_fn) s3_file3_fn = '{0}-test3.txt'.format(fetch_suff(sip3, 'test3.txt').file_id) s3_file3_fp = 'data/files/{0}'.format(s3_file3_fn) sip1_bagmeta = json.loads(next( m.content for m in sip1.metadata if m.type.name == 'bagit'))['files'] sip2_bagmeta = json.loads(next( m.content for m in sip2.metadata if m.type.name == 'bagit'))['files'] sip3_bagmeta = json.loads(next( m.content for m in sip3.metadata if m.type.name == 'bagit'))['files'] # Check if Bagit metadata contains the correct file-fetching information assert set([f['filepath'] for f in sip1_bagmeta]) == \ set([s1_file1_fp, s1_file2_fp, 'data/filenames.txt', 'data/metadata/record-json.json', 'bag-info.txt', 'manifest-md5.txt', 'bagit.txt', 'tagmanifest-md5.txt']) assert not BagItArchiver._is_fetched( get_m_item(sip1_bagmeta, s1_file1_fp)) assert not BagItArchiver._is_fetched( get_m_item(sip1_bagmeta, s1_file2_fp)) assert set([f['filepath'] for f in sip2_bagmeta]) == \ set([s1_file1_fp, s1_file2_fp, 'data/filenames.txt', 'data/metadata/record-json.json', 'bag-info.txt', 'manifest-md5.txt', 'bagit.txt', 'tagmanifest-md5.txt', 'fetch.txt']) # Both files should be fetched since it's only metadata-edit submission assert BagItArchiver._is_fetched( get_m_item(sip2_bagmeta, s1_file1_fp)) assert BagItArchiver._is_fetched( get_m_item(sip2_bagmeta, s1_file2_fp)) assert set([f['filepath'] for f in sip3_bagmeta]) == \ set([s3_file2_fp, s3_file3_fp, 'data/filenames.txt', 'data/metadata/record-json.json', 'bag-info.txt', 'manifest-md5.txt', 'bagit.txt', 'tagmanifest-md5.txt', 'fetch.txt']) # First file should be fetched from previous version and new file should # be archived in this bag. assert BagItArchiver._is_fetched( get_m_item(sip3_bagmeta, s3_file2_fp)) assert not BagItArchiver._is_fetched( get_m_item(sip3_bagmeta, s3_file3_fp)) archiver1 = BagItArchiver(sip1) archiver2 = BagItArchiver(sip2) archiver3 = BagItArchiver(sip3) # Each archiver subpath follows: '<recid>/r/<ISO-8601-SIP-timestamp>' sip1_ts = arrow.get(sip1.model.created).isoformat() sip2_ts = arrow.get(sip2.model.created).isoformat() sip3_ts = arrow.get(sip3.model.created).isoformat() assert archiver1.get_archive_subpath() == '2/r/{0}'.format(sip1_ts) assert archiver2.get_archive_subpath() == '2/r/{0}'.format(sip2_ts) assert archiver3.get_archive_subpath() == '3/r/{0}'.format(sip3_ts) # As a test, write the SIPs in reverse chronological order assert not sip1.archived assert not sip2.archived assert not sip3.archived archive_sip.delay(sip3.id) archive_sip.delay(sip2.id) archive_sip.delay(sip1.id) assert sip1.archived assert sip2.archived assert sip3.archived fs1 = archive_fs.opendir(archiver1.get_archive_subpath()) assert set(fs1.listdir()) == set(['tagmanifest-md5.txt', 'bagit.txt', 'manifest-md5.txt', 'bag-info.txt', 'data']) assert set(fs1.listdir('data')) == set(['metadata', 'files', 'filenames.txt']) assert fs1.listdir('data/metadata') == ['record-json.json', ] assert set(fs1.listdir('data/files')) == set([s1_file1_fn, s1_file2_fn]) fs2 = archive_fs.opendir(archiver2.get_archive_subpath()) assert set(fs2.listdir()) == set(['tagmanifest-md5.txt', 'bagit.txt', 'manifest-md5.txt', 'bag-info.txt', 'data', 'fetch.txt']) # Second SIP has written only the metadata, # because of that There should be no 'files/', but 'filesnames.txt' should # still be there becasue of the fetch.txt assert set(fs2.listdir('data')) == set(['metadata', 'filenames.txt']) assert fs2.listdir('data/metadata') == ['record-json.json', ] with fs2.open('fetch.txt') as fp: cnt = fp.read().splitlines() # Fetched files should correctly fetch the files from the first archive base_uri = archiver1.get_archive_base_uri() assert set(cnt) == set([ '{base}/2/r/{s1ts}/{fn} 4 {fn}'.format(fn=s1_file1_fp, base=base_uri, s1ts=sip1_ts), '{base}/2/r/{s1ts}/{fn} 8 {fn}'.format(fn=s1_file2_fp, base=base_uri, s1ts=sip1_ts), ]) fs3 = archive_fs.opendir(archiver3.get_archive_subpath()) assert set(fs3.listdir()) == set(['tagmanifest-md5.txt', 'bagit.txt', 'manifest-md5.txt', 'bag-info.txt', 'data', 'fetch.txt']) # Third SIP should write only the extra 'test3.txt' file assert set(fs3.listdir('data')) == set(['metadata', 'files', 'filenames.txt']) assert fs3.listdir('data/metadata') == ['record-json.json', ] assert fs3.listdir('data/files') == [s3_file3_fn, ] with fs3.open('fetch.txt') as fp: cnt = fp.read().splitlines() # Since 'file.txt' was removed in third SIP, we should only fetch the # 'test2.txt', also from the first archive, since that's where this # file resides physically. base_uri = archiver1.get_archive_base_uri() assert set(cnt) == set([ '{base}/2/r/{s1ts}/{fn} 8 {fn}'.format(fn=s3_file2_fp, base=base_uri, s1ts=sip1_ts), ]) app.config['SIPSTORE_ARCHIVER_WRITING_ENABLED'] = orig
def test_legacyjson_to_record_translation(app, db, es, grant_records, license_record, locations, communities): """Test the translator legacy_zenodo and zenodo_legacy.""" test_data = dict( metadata=dict( access_right='embargoed', communities=[{'identifier': 'c1'}], conference_acronym='Some acronym', conference_dates='Some dates', conference_place='Some place', conference_title='Some title', conference_url='http://someurl.com', conference_session='VI', conference_session_part='1', creators=[ dict(name="Doe, John", affiliation="Atlantis", orcid="0000-0002-1825-0097", gnd="170118215"), dict(name="Smith, Jane", affiliation="Atlantis") ], description="Some description", doi="10.1234/foo.bar", embargo_date=( datetime.utcnow().date() + timedelta(days=2)).isoformat(), grants=[dict(id="282896"), ], imprint_isbn="Some isbn", imprint_place="Some place", imprint_publisher="Some publisher", journal_issue="Some issue", journal_pages="Some pages", journal_title="Some journal name", journal_volume="Some volume", keywords=["Keyword 1", "keyword 2"], subjects=[ dict(scheme="gnd", identifier="1234567899", term="Astronaut"), dict(scheme="gnd", identifier="1234567898", term="Amish"), ], license="CC0-1.0", notes="Some notes", partof_pages="SOme part of", partof_title="Some part of title", prereserve_doi=True, publication_date="2013-09-12", publication_type="book", references=[ "Reference 1", "Reference 2", ], related_identifiers=[ dict(identifier='10.1234/foo.bar2', relation='isCitedBy'), dict(identifier='10.1234/foo.bar3', relation='cites'), dict( identifier='2011ApJS..192...18K', relation='isAlternativeIdentifier'), ], thesis_supervisors=[ dict(name="Doe Sr., John", affiliation="Atlantis"), dict(name="Smith Sr., Jane", affiliation="Atlantis", orcid="http://orcid.org/0000-0002-1825-0097", gnd="http://d-nb.info/gnd/170118215") ], thesis_university="Some thesis_university", contributors=[ dict(name="Doe Sr., Jochen", affiliation="Atlantis", type="Other"), dict(name="Smith Sr., Marco", affiliation="Atlantis", orcid="http://orcid.org/0000-0002-1825-0097", gnd="http://d-nb.info/gnd/170118215", type="DataCurator") ], title="Test title", upload_type="publication", ) ) ZenodoDeposit.create( legacyjson.LegacyRecordSchemaV1(strict=True).load(test_data).data ).validate()
def test_versioning_indexing(db, es, deposit, deposit_file): """Test the indexing of 'version' relations.""" deposit_index_name = 'deposits-records-record-v1.0.0' records_index_name = 'records-record-v1.0.0' deposit_v1 = publish_and_expunge(db, deposit) depid_v1_value = deposit_v1['_deposit']['id'] recid_v1, record_v1 = deposit_v1.fetch_published() recid_v1_value = recid_v1.pid_value RecordIndexer().index_by_id(str(record_v1.id)) RecordIndexer().process_bulk_queue() current_search.flush_and_refresh(index=deposit_index_name) current_search.flush_and_refresh(index=records_index_name) s_dep = current_search.client.search( index=deposit_index_name)['hits']['hits'] s_rec = current_search.client.search( index=records_index_name)['hits']['hits'] assert len(s_dep) == 1 assert len(s_rec) == 1 assert 'relations' in s_dep[0]['_source'] assert 'relations' in s_rec[0]['_source'] expected = { "version": [ { "draft_child_deposit": None, "index": 0, "is_last": True, "last_child": { "pid_type": "recid", "pid_value": "2" }, "count": 1, "parent": { "pid_type": "recid", "pid_value": "1" }, } ] } assert s_dep[0]['_source']['relations'] == expected assert s_rec[0]['_source']['relations'] == expected deposit_v1.newversion() pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit deposit_v2 = ZenodoDeposit.get_record(depid_v2.object_uuid) depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) RecordIndexer().process_bulk_queue() current_search.flush_and_refresh(index=deposit_index_name) current_search.flush_and_refresh(index=records_index_name) s_dep = current_search.client.search( index=deposit_index_name)['hits']['hits'] s_rec = current_search.client.search( index=records_index_name)['hits']['hits'] assert len(s_dep) == 2 # Two deposits should be indexed assert len(s_rec) == 1 # One, since record does not exist yet s_dep1 = current_search.client.get( index=deposit_index_name, id=deposit_v1.id) s_dep2 = current_search.client.get( index=deposit_index_name, id=deposit_v2.id) expected_d1 = { "version": [ { "draft_child_deposit": { "pid_type": "depid", "pid_value": "3" }, "index": 0, "is_last": False, "last_child": { "pid_type": "recid", "pid_value": "2" }, "parent": { "pid_type": "recid", "pid_value": "1" }, "count": 2 # For deposit, draft children are also counted } ] } expected_d2 = { "version": [ { "draft_child_deposit": { "pid_type": "depid", "pid_value": "3" }, "index": 1, "is_last": True, "last_child": { "pid_type": "recid", "pid_value": "2" }, "count": 2, # For deposit, draft children are also counted "parent": { "pid_type": "recid", "pid_value": "1" }, } ] } assert s_dep1['_source']['relations'] == expected_d1 assert s_dep2['_source']['relations'] == expected_d2 deposit_v2 = publish_and_expunge(db, deposit_v2) recid_v2, record_v2 = deposit_v2.fetch_published() recid_v1, record_v1 = record_resolver.resolve(recid_v1_value) depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) RecordIndexer().index_by_id(str(record_v2.id)) RecordIndexer().process_bulk_queue() current_search.flush_and_refresh(index=deposit_index_name) current_search.flush_and_refresh(index=records_index_name) s_dep = current_search.client.search( index=deposit_index_name)['hits']['hits'] s_rec = current_search.client.search( index=records_index_name)['hits']['hits'] assert len(s_dep) == 2 assert len(s_rec) == 2 s_dep1 = current_search.client.get( index=deposit_index_name, id=deposit_v1.id) s_dep2 = current_search.client.get( index=deposit_index_name, id=deposit_v2.id) s_rec1 = current_search.client.get( index=records_index_name, id=record_v1.id) s_rec2 = current_search.client.get( index=records_index_name, id=record_v2.id) expected_d1 = { "version": [ { "draft_child_deposit": None, "index": 0, "is_last": False, "last_child": { "pid_type": "recid", "pid_value": "3" }, "parent": { "pid_type": "recid", "pid_value": "1" }, "count": 2 } ] } expected_d2 = { "version": [ { "draft_child_deposit": None, "index": 1, "is_last": True, "last_child": { "pid_type": "recid", "pid_value": "3" }, "count": 2, "parent": { "pid_type": "recid", "pid_value": "1" }, } ] } assert s_dep1['_source']['relations'] == expected_d1 assert s_dep2['_source']['relations'] == expected_d2 expected_r1 = { "version": [ { "draft_child_deposit": None, "index": 0, "is_last": False, "last_child": { "pid_type": "recid", "pid_value": "3" }, "parent": { "pid_type": "recid", "pid_value": "1" }, "count": 2 } ] } expected_r2 = { "version": [ { "draft_child_deposit": None, "index": 1, "is_last": True, "last_child": { "pid_type": "recid", "pid_value": "3" }, "count": 2, "parent": { "pid_type": "recid", "pid_value": "1" }, } ] } assert s_rec1['_source']['relations'] == expected_r1 assert s_rec2['_source']['relations'] == expected_r2
def test_related_identifiers_serialization(app, db, deposit, deposit_file): """Serialize PID Relations to related identifiers.""" deposit_v1 = publish_and_expunge(db, deposit) depid_v1_value = deposit_v1['_deposit']['id'] recid_v1, record_v1 = deposit_v1.fetch_published() deposit_v1.newversion() pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object()) deposit_v2.files['file.txt'] = BytesIO(b('file1')) deposit_v2 = publish_and_expunge(db, deposit_v2) deposit_v2 = deposit_v2.edit() # 1. Request for 'c1' and 'c2' through deposit v2 deposit_v2 = publish_and_expunge(db, deposit_v2) recid_v2, record_v2 = deposit_v2.fetch_published() depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) recid_v1, record_v1 = deposit_v1.fetch_published() depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) rids = serialize_related_identifiers(recid_v1) expected_v1 = [ { 'scheme': 'doi', 'identifier': '10.5072/zenodo.1', 'relation': 'isPartOf' } # TODO: serialization of new version realtions is disabled # { # 'scheme': 'doi', # 'identifier': '10.5072/zenodo.3', # 'relation': 'isPreviousVersionOf' # } ] assert rids == expected_v1 rids = serialize_related_identifiers(recid_v2) expected_v2 = [ { 'scheme': 'doi', 'identifier': '10.5072/zenodo.1', 'relation': 'isPartOf' } # TODO: serialization of new version realtions is disabled # { # 'scheme': 'doi', # 'identifier': '10.5072/zenodo.2', # 'relation': 'isNewVersionOf' # } ] assert rids == expected_v2 parent_pid = PersistentIdentifier.get('recid', '1') rids = serialize_related_identifiers(parent_pid) expected_parent = [ { 'relation': 'hasPart', 'scheme': 'doi', 'identifier': '10.5072/zenodo.2' }, { 'relation': 'hasPart', 'scheme': 'doi', 'identifier': '10.5072/zenodo.3' } ] assert rids == expected_parent
def publish(self): """Publish GitHub release as record.""" id_ = uuid.uuid4() deposit_metadata = dict(self.metadata) deposit = None try: db.session.begin_nested() # TODO: Add filter on Published releases previous_releases = self.model.repository.releases.filter_by( status=ReleaseStatus.PUBLISHED) versioning = None stashed_draft_child = None if previous_releases.count(): last_release = previous_releases.order_by( Release.created.desc()).first() last_recid = PersistentIdentifier.get( 'recid', last_release.record['recid']) versioning = PIDVersioning(child=last_recid) last_record = ZenodoRecord.get_record( versioning.last_child.object_uuid) deposit_metadata['conceptrecid'] = last_record['conceptrecid'] if 'conceptdoi' not in last_record: last_depid = PersistentIdentifier.get( 'depid', last_record['_deposit']['id']) last_deposit = ZenodoDeposit.get_record( last_depid.object_uuid) last_deposit = last_deposit.registerconceptdoi() last_recid, last_record = last_deposit.fetch_published() deposit_metadata['conceptdoi'] = last_record['conceptdoi'] if versioning.draft_child: stashed_draft_child = versioning.draft_child versioning.remove_draft_child() deposit = self.deposit_class.create(deposit_metadata, id_=id_) deposit['_deposit']['created_by'] = self.event.user_id deposit['_deposit']['owners'] = [self.event.user_id] # Fetch the deposit files for key, url in self.files: # Make a HEAD request to get GitHub to compute the # Content-Length. res = self.gh.api.session.head(url, allow_redirects=True) # Now, download the file res = self.gh.api.session.get(url, stream=True, allow_redirects=True) if res.status_code != 200: raise Exception( "Could not retrieve archive from GitHub: {url}" .format(url=url) ) size = int(res.headers.get('Content-Length', 0)) ObjectVersion.create( bucket=deposit.files.bucket, key=key, stream=res.raw, size=size or None, mimetype=res.headers.get('Content-Type'), ) # GitHub-specific SIP store agent sip_agent = { '$schema': current_jsonschemas.path_to_url( current_app.config['SIPSTORE_GITHUB_AGENT_JSONSCHEMA']), 'user_id': self.event.user_id, 'github_id': self.release['author']['id'], 'email': self.gh.account.user.email, } deposit.publish(user_id=self.event.user_id, sip_agent=sip_agent) recid_pid, record = deposit.fetch_published() self.model.recordmetadata = record.model if versioning and stashed_draft_child: versioning.insert_draft_child(stashed_draft_child) record_id = str(record.id) db.session.commit() # Send Datacite DOI registration task datacite_register.delay(recid_pid.pid_value, record_id) # Index the record RecordIndexer().index_by_id(record_id) except Exception: db.session.rollback() # Remove deposit from index since it was not commited. if deposit and deposit.id: try: RecordIndexer().delete(deposit) except Exception: current_app.logger.exception( "Failed to remove uncommited deposit from index.") raise
def test_archiving(app, db, deposit, deposit_file, locations, archive_fs): """Test ZenodoSIP archiving.""" # Stash the configuration and enable writing orig = app.config['SIPSTORE_ARCHIVER_WRITING_ENABLED'] app.config['SIPSTORE_ARCHIVER_WRITING_ENABLED'] = True deposit.files['test2.txt'] = BytesIO(b'test-two') deposit_v1 = publish_and_expunge(db, deposit) recid_v1, record_v1 = deposit_v1.fetch_published() recid_v1_id = recid_v1.id # Record files after publishing: 'test.txt', 'test2.txt' sip1 = SIP(SIPModel.query.one()) sip1_id = sip1.id # Edit the metadata deposit_v1 = deposit_v1.edit() deposit_v1['title'] = "New title" deposit_v1 = publish_and_expunge(db, deposit_v1) # Record files after publishing: 'test.txt', 'test2.txt' sip2_id = SIPModel.query.order_by(SIPModel.created.desc()).first().id # Create a new version deposit_v1.newversion() recid_v1 = PersistentIdentifier.query.get(recid_v1_id) pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit deposit_v2 = ZenodoDeposit.get_record(depid_v2.object_uuid) del deposit_v2.files['test.txt'] deposit_v2.files['test3.txt'] = BytesIO(b('test-three')) deposit_v2 = publish_and_expunge(db, deposit_v2) # Record files after publishing: 'test2.txt', 'test3.txt' sip1 = SIP(SIPModel.query.get(sip1_id)) sip2 = SIP(SIPModel.query.get(sip2_id)) sip3 = SIP(SIPModel.query.order_by(SIPModel.created.desc()).first()) # Becase we are using secure_filename when writing SIPFiles we need to # genenarate the correct names: <SIPFile.id>-<secure_filename> s1_file1_fn = '{0}-test.txt'.format(fetch_suff(sip1, 'test.txt').file_id) s1_file1_fp = 'data/files/{0}'.format(s1_file1_fn) s1_file2_fn = '{0}-test2.txt'.format(fetch_suff(sip1, 'test2.txt').file_id) s1_file2_fp = 'data/files/{0}'.format(s1_file2_fn) s3_file2_fn = '{0}-test2.txt'.format(fetch_suff(sip3, 'test2.txt').file_id) s3_file2_fp = 'data/files/{0}'.format(s3_file2_fn) s3_file3_fn = '{0}-test3.txt'.format(fetch_suff(sip3, 'test3.txt').file_id) s3_file3_fp = 'data/files/{0}'.format(s3_file3_fn) sip1_bagmeta = json.loads( next(m.content for m in sip1.metadata if m.type.name == 'bagit'))['files'] sip2_bagmeta = json.loads( next(m.content for m in sip2.metadata if m.type.name == 'bagit'))['files'] sip3_bagmeta = json.loads( next(m.content for m in sip3.metadata if m.type.name == 'bagit'))['files'] # Check if Bagit metadata contains the correct file-fetching information assert set([f['filepath'] for f in sip1_bagmeta]) == \ set([s1_file1_fp, s1_file2_fp, 'data/filenames.txt', 'data/metadata/record-json.json', 'bag-info.txt', 'manifest-md5.txt', 'bagit.txt', 'tagmanifest-md5.txt']) assert not BagItArchiver._is_fetched(get_m_item(sip1_bagmeta, s1_file1_fp)) assert not BagItArchiver._is_fetched(get_m_item(sip1_bagmeta, s1_file2_fp)) assert set([f['filepath'] for f in sip2_bagmeta]) == \ set([s1_file1_fp, s1_file2_fp, 'data/filenames.txt', 'data/metadata/record-json.json', 'bag-info.txt', 'manifest-md5.txt', 'bagit.txt', 'tagmanifest-md5.txt', 'fetch.txt']) # Both files should be fetched since it's only metadata-edit submission assert BagItArchiver._is_fetched(get_m_item(sip2_bagmeta, s1_file1_fp)) assert BagItArchiver._is_fetched(get_m_item(sip2_bagmeta, s1_file2_fp)) assert set([f['filepath'] for f in sip3_bagmeta]) == \ set([s3_file2_fp, s3_file3_fp, 'data/filenames.txt', 'data/metadata/record-json.json', 'bag-info.txt', 'manifest-md5.txt', 'bagit.txt', 'tagmanifest-md5.txt', 'fetch.txt']) # First file should be fetched from previous version and new file should # be archived in this bag. assert BagItArchiver._is_fetched(get_m_item(sip3_bagmeta, s3_file2_fp)) assert not BagItArchiver._is_fetched(get_m_item(sip3_bagmeta, s3_file3_fp)) archiver1 = BagItArchiver(sip1) archiver2 = BagItArchiver(sip2) archiver3 = BagItArchiver(sip3) # Each archiver subpath follows: '<recid>/r/<ISO-8601-SIP-timestamp>' sip1_ts = arrow.get(sip1.model.created).isoformat() sip2_ts = arrow.get(sip2.model.created).isoformat() sip3_ts = arrow.get(sip3.model.created).isoformat() assert archiver1.get_archive_subpath() == '2/r/{0}'.format(sip1_ts) assert archiver2.get_archive_subpath() == '2/r/{0}'.format(sip2_ts) assert archiver3.get_archive_subpath() == '3/r/{0}'.format(sip3_ts) # As a test, write the SIPs in reverse chronological order assert not sip1.archived assert not sip2.archived assert not sip3.archived archive_sip.delay(sip3.id) archive_sip.delay(sip2.id) archive_sip.delay(sip1.id) assert sip1.archived assert sip2.archived assert sip3.archived fs1 = archive_fs.opendir(archiver1.get_archive_subpath()) assert set(fs1.listdir()) == set([ 'tagmanifest-md5.txt', 'bagit.txt', 'manifest-md5.txt', 'bag-info.txt', 'data' ]) assert set(fs1.listdir('data')) == set( ['metadata', 'files', 'filenames.txt']) assert fs1.listdir('data/metadata') == [ 'record-json.json', ] assert set(fs1.listdir('data/files')) == set([s1_file1_fn, s1_file2_fn]) fs2 = archive_fs.opendir(archiver2.get_archive_subpath()) assert set(fs2.listdir()) == set([ 'tagmanifest-md5.txt', 'bagit.txt', 'manifest-md5.txt', 'bag-info.txt', 'data', 'fetch.txt' ]) # Second SIP has written only the metadata, # because of that There should be no 'files/', but 'filesnames.txt' should # still be there becasue of the fetch.txt assert set(fs2.listdir('data')) == set(['metadata', 'filenames.txt']) assert fs2.listdir('data/metadata') == [ 'record-json.json', ] with fs2.open('fetch.txt') as fp: cnt = fp.read().splitlines() # Fetched files should correctly fetch the files from the first archive base_uri = archiver1.get_archive_base_uri() assert set(cnt) == set([ '{base}/2/r/{s1ts}/{fn} 4 {fn}'.format(fn=s1_file1_fp, base=base_uri, s1ts=sip1_ts), '{base}/2/r/{s1ts}/{fn} 8 {fn}'.format(fn=s1_file2_fp, base=base_uri, s1ts=sip1_ts), ]) fs3 = archive_fs.opendir(archiver3.get_archive_subpath()) assert set(fs3.listdir()) == set([ 'tagmanifest-md5.txt', 'bagit.txt', 'manifest-md5.txt', 'bag-info.txt', 'data', 'fetch.txt' ]) # Third SIP should write only the extra 'test3.txt' file assert set(fs3.listdir('data')) == set( ['metadata', 'files', 'filenames.txt']) assert fs3.listdir('data/metadata') == [ 'record-json.json', ] assert fs3.listdir('data/files') == [ s3_file3_fn, ] with fs3.open('fetch.txt') as fp: cnt = fp.read().splitlines() # Since 'file.txt' was removed in third SIP, we should only fetch the # 'test2.txt', also from the first archive, since that's where this # file resides physically. base_uri = archiver1.get_archive_base_uri() assert set(cnt) == set([ '{base}/2/r/{s1ts}/{fn} 8 {fn}'.format(fn=s3_file2_fp, base=base_uri, s1ts=sip1_ts), ]) app.config['SIPSTORE_ARCHIVER_WRITING_ENABLED'] = orig