def test_identifier_schemes(app, db, es, locations, license_record,
                            sample_identifiers):
    """Test supported identifier schemes."""
    s = legacyjson.LegacyMetadataSchemaV1(strict=True)
    result = s.load(d(related_identifiers=[
        {'identifier': _id, 'scheme': scheme, 'relation': 'references'}
        for scheme, (_id, _) in sample_identifiers.items()
    ]))
    ZenodoDeposit.create(result.data).validate()
示例#2
0
def test_autoadd_explicit_newversion(
        app, db, users, communities, deposit, deposit_file,
        communities_autoadd_enabled):
    """Explicitly the autoadded communities in a new version."""
    deposit_v1 = publish_and_expunge(db, deposit)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    depid_v1_value = deposit_v1['_deposit']['id']
    recid_v1_value = recid_v1.pid_value

    deposit_v1 = deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    depid_v2_value = depid_v2.pid_value

    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())

    deposit_v2['communities'] = ['ecfunded', 'grants_comm', 'zenodo']
    deposit_v2['grants'] = [{'title': 'SomeGrant'}, ]
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()

    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    assert record_v1.get('communities', []) == ['grants_comm', ]
    assert deposit_v1.get('communities', []) == ['ecfunded', 'grants_comm',
                                                 'zenodo']
    assert record_v2.get('communities', []) == ['grants_comm', ]
    assert deposit_v2.get('communities', []) == ['ecfunded', 'grants_comm',
                                                 'zenodo']
示例#3
0
def test_propagation_with_newversion_open(
        app, db, users, communities, deposit, deposit_file):
    """Adding old versions to a community should propagate to all drafts."""
    # deposit['communities'] = ['c1', 'c2']
    deposit_v1 = publish_and_expunge(db, deposit)
    deposit_v1 = deposit_v1.edit()

    recid_v1, record_v1 = deposit_v1.fetch_published()
    recid_v1_value = recid_v1.pid_value

    deposit_v1 = deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    depid_v2_value = depid_v2.pid_value

    # New version in 'deposit_v2' has not been published yet
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())

    # depid_v1_value = deposit_v1['_deposit']['id']
    # depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    deposit_v1['communities'] = ['c1', 'c2', ]
    deposit_v1 = publish_and_expunge(db, deposit_v1)

    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    c1_api = ZenodoCommunity('c1')
    c1_api.accept_record(record_v1, pid=recid_v1)

    depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value)
    assert deposit_v2['communities'] == ['c1', 'c2']
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()

    assert record_v2['communities'] == ['c1', ]
示例#4
0
def test_basic_api(app, db, communities, deposit, deposit_file):
    """Test basic workflow using Deposit and Communities API."""
    deposit_v1 = publish_and_expunge(db, deposit)
    depid_v1_value = deposit_v1['_deposit']['id']

    recid_v1, record_v1 = deposit_v1.fetch_published()
    recid_v1_value = recid_v1.pid_value

    deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.object_uuid)
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    deposit_v2 = deposit_v2.edit()
    # 1. Request for 'c1' and 'c2' through deposit v2
    deposit_v2['communities'] = ['c1', 'c2', ]
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()
    recid_v2_value = recid_v2.pid_value
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    assert record_v1.get('communities', []) == []
    assert record_v2.get('communities', []) == []

    c1_api = ZenodoCommunity('c1')
    c2_api = ZenodoCommunity('c2')

    # Inclusion requests should be visible for both records
    assert c1_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1
    assert c1_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1

    assert c2_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1
    assert c2_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1

    # Accept to 'c1' through record_v2 (as originally requested),
    # and 'c2' through record_v1 (version)
    c1_api.accept_record(record_v2, pid=recid_v2)
    c2_api.accept_record(record_v1, pid=recid_v1)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    recid_v2, record_v2 = record_resolver.resolve(recid_v2_value)
    # Accepting individual record to a community should propagate the changes
    # to all versions
    assert record_v1['communities'] == record_v2['communities'] == \
        ['c1', 'c2', ]

    # Removing 'c1' from deposit_v1 should remove it from two published records
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    deposit_v1 = deposit_v1.edit()
    deposit_v1['communities'] = []
    deposit_v1 = publish_and_expunge(db, deposit_v1)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    recid_v2, record_v2 = record_resolver.resolve(recid_v2_value)
    assert record_v1.get('communities', []) == []
    assert record_v2.get('communities', []) == []
示例#5
0
def test_basic_api(app, db, communities, deposit, deposit_file):
    """Test basic workflow using Deposit and Communities API."""
    deposit_v1 = publish_and_expunge(db, deposit)
    depid_v1_value = deposit_v1['_deposit']['id']

    recid_v1, record_v1 = deposit_v1.fetch_published()
    recid_v1_value = recid_v1.pid_value

    deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    deposit_v2 = deposit_v2.edit()
    # 1. Request for 'c1' and 'c2' through deposit v2
    deposit_v2['communities'] = ['c1', 'c2', ]
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()
    recid_v2_value = recid_v2.pid_value
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    assert record_v1.get('communities', []) == []
    assert record_v2.get('communities', []) == []

    c1_api = ZenodoCommunity('c1')
    c2_api = ZenodoCommunity('c2')

    # Inclusion requests should be visible for both records
    assert c1_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1
    assert c1_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1

    assert c2_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1
    assert c2_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1

    # Accept to 'c1' through record_v2 (as originally requested),
    # and 'c2' through record_v1 (version)
    c1_api.accept_record(record_v2, pid=recid_v2)
    c2_api.accept_record(record_v1, pid=recid_v1)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    recid_v2, record_v2 = record_resolver.resolve(recid_v2_value)
    # Accepting individual record to a community should propagate the changes
    # to all versions
    assert record_v1['communities'] == record_v2['communities'] == \
        ['c1', 'c2', ]

    # Removing 'c1' from deposit_v1 should remove it from two published records
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    deposit_v1 = deposit_v1.edit()
    deposit_v1['communities'] = []
    deposit_v1 = publish_and_expunge(db, deposit_v1)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    recid_v2, record_v2 = record_resolver.resolve(recid_v2_value)
    assert record_v1.get('communities', []) == []
    assert record_v2.get('communities', []) == []
示例#6
0
def test_record_delete_v1(dc_mock, app, db, users, deposit, deposit_file):
    """Delete a record with multiple versions."""
    deposit_v1 = publish_and_expunge(db, deposit)
    recid_v1, record_v1 = deposit.fetch_published()
    recid_v1_value = recid_v1.pid_value
    deposit_v1.newversion()
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)

    # Stash a copy of v1 for later
    rec1 = deepcopy(record_v1)
    rec1_id = str(record_v1.id)

    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()

    # Stash a copy of v2 for later
    rec2 = deepcopy(record_v2)

    assert dc_mock().metadata_delete.call_count == 0

    # Remove the first version
    delete_record(rec1_id, 'spam', users[0]['id'])

    # Make sure all PIDs are deleted
    assert PID.get('doi', rec1['doi']).status == PIDStatus.DELETED
    assert PID.get('doi', rec1['conceptdoi']).status == PIDStatus.REGISTERED
    assert PID.get('recid', rec1['recid']).status == PIDStatus.DELETED

    # Make sure conceptrecid is redirecting to v2 (as before)
    crecid = PID.get('recid', rec1['conceptrecid'])
    assert crecid.get_redirect() == PID.get('recid', rec2['recid'])
    assert crecid.status == PIDStatus.REDIRECTED
    assert PID.get('depid', rec1['_deposit']['id']).status == PIDStatus.DELETED

    # Make sure the v2 PIDs are kept intact
    assert PID.get('oai', rec2['_oai']['id']).status == PIDStatus.REGISTERED
    assert PID.get('doi', rec2['doi']).status == PIDStatus.REGISTERED
    assert PID.get('recid', rec2['recid']).status == PIDStatus.REGISTERED
    assert PID.get('depid', rec2['_deposit']['id']).status == \
        PIDStatus.REGISTERED

    # Only the v1 DOI should be deleted
    assert dc_mock().doi_post.call_count == 2
    assert dc_mock().doi_post.has_any_call('10.5072/zenodo.3')
    assert dc_mock().doi_post.has_any_call('10.5072/zenodo.1')
    assert dc_mock().metadata_delete.call_count == 1
    dc_mock().metadata_delete.assert_any_call('10.5072/zenodo.2')
    record = Record.get_record(rec1_id)
    assert record['removed_by'] == users[0]['id']
    assert record['removal_reason'] == 'Spam record, removed by Zenodo staff.'
示例#7
0
def _publish_and_expunge(db, deposit):
    """Publish the deposit and expunge the session.

    Use this if you want to be safe that session is synced with the DB after
    the deposit publishing."""
    deposit.publish()
    dep_uuid = deposit.id
    db.session.commit()
    db.session.expunge_all()
    deposit = Deposit.get_record(dep_uuid)
    return deposit
def _publish_and_expunge(db, deposit):
    """Publish the deposit and expunge the session.

    Use this if you want to be safe that session is synced with the DB after
    the deposit publishing."""
    deposit.publish()
    dep_uuid = deposit.id
    db.session.commit()
    db.session.expunge_all()
    deposit = Deposit.get_record(dep_uuid)
    return deposit
示例#9
0
文件: conftest.py 项目: zenodo/zenodo
def deposit(app, es, users, locations, deposit_metadata, sip_metadata_types):
    """New deposit with files."""
    with app.test_request_context():
        datastore = app.extensions['security'].datastore
        login_user(datastore.get_user(users[0]['email']))
        id_ = uuid4()
        zenodo_deposit_minter(id_, deposit_metadata)
        deposit = Deposit.create(deposit_metadata, id_=id_)
        db_.session.commit()
    current_search.flush_and_refresh(index='deposits')
    return deposit
示例#10
0
def deposit(app, es, users, locations, deposit_metadata, sip_metadata_types):
    """New deposit with files."""
    with app.test_request_context():
        datastore = app.extensions['security'].datastore
        login_user(datastore.get_user(users[0]['email']))
        id_ = uuid4()
        zenodo_deposit_minter(id_, deposit_metadata)
        deposit = Deposit.create(deposit_metadata, id_=id_)
        db_.session.commit()
    current_search.flush_and_refresh(index='deposits')
    return deposit
示例#11
0
def versioning_new_deposit(uuid):
    """Migrate a yet-unpublished deposit to a versioning scheme."""
    deposit = ZenodoDeposit.get_record(uuid)
    if 'conceptrecid' in deposit:
        return
    # ASSERT ZENODO DOI ONLY!
    assert 'conceptrecid' not in deposit, 'Concept RECID already in record.'
    conceptrecid = zenodo_concept_recid_minter(uuid, deposit)
    recid = PersistentIdentifier.get('recid', str(deposit['recid']))
    depid = PersistentIdentifier.get('depid', str(deposit['_deposit']['id']))
    pv = PIDVersioning(parent=conceptrecid)
    pv.insert_draft_child(recid)
    RecordDraft.link(recid, depid)
    deposit.commit()
    db.session.commit()
示例#12
0
def loaddemorecords(records, owner):
    """Load demo records."""
    with current_app.test_request_context():
        login_user(owner)
        for record in records:
            deposit_data = legacyjson_v1(record)
            deposit_id = uuid4()
            zenodo_deposit_minter(deposit_id, deposit_data)
            deposit = ZenodoDeposit.create(deposit_data, id_=deposit_id)
            db.session.commit()
            filename = record['files'][0]
            deposit.files[filename] = BytesIO(filename)
            db.session.commit()
            deposit.publish()
            db.session.commit()
示例#13
0
def test_communities_newversion_while_ir_pending_bug(app, db, users,
                                                     communities, deposit,
                                                     deposit_file):
    """Make sure that pending IRs remain after a new version (bug)."""
    deposit['communities'] = ['c1', 'c2']
    deposit_v1 = publish_and_expunge(db, deposit)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    depid_v1_value = deposit_v1['_deposit']['id']
    recid_v1_value = recid_v1.pid_value

    # Two inclusion requests are pending
    assert InclusionRequest.query.count() == 2

    # Accept one community
    c1_api = ZenodoCommunity('c1')
    c1_api.accept_record(record_v1, pid=recid_v1)

    deposit_v1 = deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    depid_v2_value = depid_v2.pid_value

    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())

    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()

    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    # Make sure there is still IR to community 'c2' after newversion
    assert InclusionRequest.query.count() == 1
    assert InclusionRequest.query.one().id_community == 'c2'
    assert record_v1.get('communities', []) == [
        'c1',
    ]
    assert deposit_v1.get('communities', []) == [
        'c1',
        'c2',
    ]
    assert record_v2.get('communities', []) == [
        'c1',
    ]
    assert deposit_v2.get('communities', []) == [
        'c1',
        'c2',
    ]
示例#14
0
def test_communities_newversion_addition(app, db, users, communities, deposit,
                                         deposit_file):
    """Make sure that new version of record synchronizes the communities."""
    deposit['communities'] = ['c1', 'c2']
    deposit_v1 = publish_and_expunge(db, deposit)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    depid_v1_value = deposit_v1['_deposit']['id']
    recid_v1_value = recid_v1.pid_value

    c1_api = ZenodoCommunity('c1')
    c2_api = ZenodoCommunity('c2')

    c1_api.accept_record(record_v1, pid=recid_v1)
    c2_api.accept_record(record_v1, pid=recid_v1)

    deposit_v1 = deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    depid_v2_value = depid_v2.pid_value

    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())

    # Remove 'c2' and request for 'c5'. Make sure that communities from
    # previous record version are preserved/removed properly
    deposit_v2['communities'] = ['c1', 'c5']
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()

    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    assert record_v1.get('communities', []) == [
        'c1',
    ]
    assert deposit_v1.get('communities', []) == [
        'c1',
        'c5',
    ]
    assert record_v2.get('communities', []) == [
        'c1',
    ]
    assert deposit_v2.get('communities', []) == [
        'c1',
        'c5',
    ]
示例#15
0
def deposit(app, es, users, location):
    """New deposit with files."""
    record = dict(
        title='Test title',
        creators=[
            dict(name='Doe, John', affiliation='Atlantis'),
            dict(name='Smith, Jane', affiliation='Atlantis')
        ],
        description='Test Description',
        publication_date='2013-05-08',
        access_right='open'
    )
    with app.test_request_context():
        datastore = app.extensions['security'].datastore
        login_user(datastore.get_user(users[0]['email']))
        deposit = Deposit.create(record)
        deposit.commit()
        db_.session.commit()
    current_search.flush_and_refresh(index='deposits')
    return deposit
示例#16
0
def test_autoadd_explicit_newversion(app, db, users, communities, deposit,
                                     deposit_file,
                                     communities_autoadd_enabled):
    """Explicitly the autoadded communities in a new version."""
    deposit_v1 = publish_and_expunge(db, deposit)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    depid_v1_value = deposit_v1['_deposit']['id']
    recid_v1_value = recid_v1.pid_value

    deposit_v1 = deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    depid_v2_value = depid_v2.pid_value

    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())

    deposit_v2['communities'] = ['ecfunded', 'grants_comm', 'zenodo']
    deposit_v2['grants'] = [
        {
            'title': 'SomeGrant'
        },
    ]
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()

    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    assert record_v1.get('communities', []) == [
        'grants_comm',
    ]
    assert deposit_v1.get('communities',
                          []) == ['ecfunded', 'grants_comm', 'zenodo']
    assert record_v2.get('communities', []) == [
        'grants_comm',
    ]
    assert deposit_v2.get('communities',
                          []) == ['ecfunded', 'grants_comm', 'zenodo']
示例#17
0
def test_propagation_with_newversion_open(app, db, users, communities, deposit,
                                          deposit_file):
    """Adding old versions to a community should propagate to all drafts."""
    # deposit['communities'] = ['c1', 'c2']
    deposit_v1 = publish_and_expunge(db, deposit)
    deposit_v1 = deposit_v1.edit()

    recid_v1, record_v1 = deposit_v1.fetch_published()
    recid_v1_value = recid_v1.pid_value

    deposit_v1 = deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    depid_v2_value = depid_v2.pid_value

    # New version in 'deposit_v2' has not been published yet
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())

    # depid_v1_value = deposit_v1['_deposit']['id']
    # depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    deposit_v1['communities'] = [
        'c1',
        'c2',
    ]
    deposit_v1 = publish_and_expunge(db, deposit_v1)

    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    c1_api = ZenodoCommunity('c1')
    c1_api.accept_record(record_v1, pid=recid_v1)

    depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value)
    assert deposit_v2['communities'] == ['c1', 'c2']
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()

    assert record_v2['communities'] == [
        'c1',
    ]
示例#18
0
def test_communities_newversion_while_ir_pending_bug(
        app, db, users, communities, deposit, deposit_file):
    """Make sure that pending IRs remain after a new version (bug)."""
    deposit['communities'] = ['c1', 'c2']
    deposit_v1 = publish_and_expunge(db, deposit)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    depid_v1_value = deposit_v1['_deposit']['id']
    recid_v1_value = recid_v1.pid_value

    # Two inclusion requests are pending
    assert InclusionRequest.query.count() == 2

    # Accept one community
    c1_api = ZenodoCommunity('c1')
    c1_api.accept_record(record_v1, pid=recid_v1)

    deposit_v1 = deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    depid_v2_value = depid_v2.pid_value

    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())

    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()

    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    # Make sure there is still IR to community 'c2' after newversion
    assert InclusionRequest.query.count() == 1
    assert InclusionRequest.query.one().id_community == 'c2'
    assert record_v1.get('communities', []) == ['c1', ]
    assert deposit_v1.get('communities', []) == ['c1', 'c2', ]
    assert record_v2.get('communities', []) == ['c1', ]
    assert deposit_v2.get('communities', []) == ['c1', 'c2', ]
示例#19
0
def test_communities_newversion_addition(
        app, db, users, communities, deposit, deposit_file):
    """Make sure that new version of record synchronizes the communities."""
    deposit['communities'] = ['c1', 'c2']
    deposit_v1 = publish_and_expunge(db, deposit)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    depid_v1_value = deposit_v1['_deposit']['id']
    recid_v1_value = recid_v1.pid_value

    c1_api = ZenodoCommunity('c1')
    c2_api = ZenodoCommunity('c2')

    c1_api.accept_record(record_v1, pid=recid_v1)
    c2_api.accept_record(record_v1, pid=recid_v1)

    deposit_v1 = deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    depid_v2_value = depid_v2.pid_value

    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())

    # Remove 'c2' and request for 'c5'. Make sure that communities from
    # previous record version are preserved/removed properly
    deposit_v2['communities'] = ['c1', 'c5']
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()

    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    assert record_v1.get('communities', []) == ['c1', ]
    assert deposit_v1.get('communities', []) == ['c1', 'c5', ]
    assert record_v2.get('communities', []) == ['c1', ]
    assert deposit_v2.get('communities', []) == ['c1', 'c5', ]
示例#20
0
文件: utils.py 项目: lnielsen/zenodo
def get_github_repository(pid):
    """Get GitHub repository from depid."""
    depid = fetch_depid(pid)
    # First check if the passed depid is a GitHub release
    release = (Release.query.filter_by(record_id=depid.object_uuid)
               .one_or_none())
    if release:
        return release.repository

    deposit = ZenodoDeposit.get_record(depid.object_uuid)
    concepterecid = deposit.get('conceptrecid')
    if not concepterecid:
        return None
    parent = PersistentIdentifier.get(
        pid_type='recid', pid_value=concepterecid)
    pv = PIDVersioning(parent=parent)
    if pv.exists:
        record_ids = [r.object_uuid for r in pv.children]
        deposit_ids = (rec.depid.object_uuid
                       for rec in ZenodoRecord.get_records(record_ids))
        release = (Release.query
                   .filter(Release.record_id.in_(deposit_ids))
                   .first())
        return release.repository if release else None
示例#21
0
def test_relations_serialization(app, db, deposit, deposit_file):
    """Serialize PID relations."""
    deposit_v1 = publish_and_expunge(db, deposit)
    depid_v1_value = deposit_v1['_deposit']['id']
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)

    recid_v1, record_v1 = deposit_v1.fetch_published()
    expected = {
        "version": [{
            "draft_child_deposit": None,
            "index": 0,
            "is_last": True,
            "last_child": {
                "pid_type": "recid",
                "pid_value": "2"
            },
            "parent": {
                "pid_type": "recid",
                "pid_value": "1"
            },
            "count": 1
        }]
    }
    assert serialize_relations(recid_v1) == expected

    deposit_v1.newversion()
    # Should contain "draft_child_deposit" information
    expected = {
        "version": [{
            "draft_child_deposit": {
                "pid_type": "depid",
                "pid_value": "3"
            },
            "index": 0,
            "is_last": True,
            "last_child": {
                "pid_type": "recid",
                "pid_value": "2"
            },
            "count": 1,
            "parent": {
                "pid_type": "recid",
                "pid_value": "1"
            },
        }]
    }
    assert serialize_relations(recid_v1) == expected

    # Publish the new version
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    recid_v1, record_v1 = deposit_v1.fetch_published()

    # Should no longer contain "draft_child_deposit" info after publishing
    # and no longer be the last child
    expected = {
        "version": [{
            "draft_child_deposit": None,
            "index": 0,
            "is_last": False,
            "last_child": {
                "pid_type": "recid",
                "pid_value": "3"
            },
            "parent": {
                "pid_type": "recid",
                "pid_value": "1"
            },
            "count": 2
        }]
    }
    assert serialize_relations(recid_v1) == expected

    # New version should be the last child now
    expected = {
        "version": [{
            "draft_child_deposit": None,
            "index": 1,
            "is_last": True,
            "last_child": {
                "pid_type": "recid",
                "pid_value": "3"
            },
            "count": 2,
            "parent": {
                "pid_type": "recid",
                "pid_value": "1"
            },
        }]
    }
    assert serialize_relations(recid_v2) == expected
def test_legacyjson_to_record_translation(app, db, es, grant_record,
                                          license_record, locations):
    """Test the translator legacy_zenodo and zenodo_legacy."""
    test_data = dict(metadata=dict(
        access_right='embargoed',
        communities=[{
            'identifier': 'cfa'
        }],
        conference_acronym='Some acronym',
        conference_dates='Some dates',
        conference_place='Some place',
        conference_title='Some title',
        conference_url='http://someurl.com',
        conference_session='VI',
        conference_session_part='1',
        creators=[
            dict(name="Doe, John",
                 affiliation="Atlantis",
                 orcid="0000-0002-1825-0097",
                 gnd="170118215"),
            dict(name="Smith, Jane", affiliation="Atlantis")
        ],
        description="Some description",
        doi="10.1234/foo.bar",
        embargo_date=(datetime.utcnow().date() +
                      timedelta(days=2)).isoformat(),
        grants=[
            dict(id="282896"),
        ],
        imprint_isbn="Some isbn",
        imprint_place="Some place",
        imprint_publisher="Some publisher",
        journal_issue="Some issue",
        journal_pages="Some pages",
        journal_title="Some journal name",
        journal_volume="Some volume",
        keywords=["Keyword 1", "keyword 2"],
        subjects=[
            dict(scheme="gnd", identifier="1234567899", term="Astronaut"),
            dict(scheme="gnd", identifier="1234567898", term="Amish"),
        ],
        license="CC0-1.0",
        notes="Some notes",
        partof_pages="SOme part of",
        partof_title="Some part of title",
        prereserve_doi=True,
        publication_date="2013-09-12",
        publication_type="book",
        references=[
            "Reference 1",
            "Reference 2",
        ],
        related_identifiers=[
            dict(identifier='10.1234/foo.bar2', relation='isCitedBy'),
            dict(identifier='10.1234/foo.bar3', relation='cites'),
            dict(identifier='2011ApJS..192...18K',
                 relation='isAlternativeIdentifier'),
        ],
        thesis_supervisors=[
            dict(name="Doe Sr., John", affiliation="Atlantis"),
            dict(name="Smith Sr., Jane",
                 affiliation="Atlantis",
                 orcid="http://orcid.org/0000-0002-1825-0097",
                 gnd="http://d-nb.info/gnd/170118215")
        ],
        thesis_university="Some thesis_university",
        contributors=[
            dict(name="Doe Sr., Jochen", affiliation="Atlantis", type="Other"),
            dict(name="Smith Sr., Marco",
                 affiliation="Atlantis",
                 orcid="http://orcid.org/0000-0002-1825-0097",
                 gnd="http://d-nb.info/gnd/170118215",
                 type="DataCurator")
        ],
        title="Test title",
        upload_type="publication",
    ))
    ZenodoDeposit.create(
        legacyjson.LegacyRecordSchemaV1(
            strict=True).load(test_data).data).validate()
示例#23
0
def test_versioning_indexing(db, es, deposit, deposit_file):
    """Test the indexing of 'version' relations."""
    deposit_index_name = 'deposits-records-record-v1.0.0'
    records_index_name = 'records-record-v1.0.0'

    deposit_v1 = publish_and_expunge(db, deposit)
    depid_v1_value = deposit_v1['_deposit']['id']
    recid_v1, record_v1 = deposit_v1.fetch_published()
    recid_v1_value = recid_v1.pid_value

    RecordIndexer().index_by_id(str(record_v1.id))
    RecordIndexer().process_bulk_queue()
    current_search.flush_and_refresh(index=deposit_index_name)
    current_search.flush_and_refresh(index=records_index_name)
    s_dep = current_search.client.search(
        index=deposit_index_name)['hits']['hits']
    s_rec = current_search.client.search(
        index=records_index_name)['hits']['hits']
    assert len(s_dep) == 1
    assert len(s_rec) == 1
    assert 'relations' in s_dep[0]['_source']
    assert 'relations' in s_rec[0]['_source']

    expected = {
        "version": [{
            "draft_child_deposit": None,
            "index": 0,
            "is_last": True,
            "last_child": {
                "pid_type": "recid",
                "pid_value": "2"
            },
            "count": 1,
            "parent": {
                "pid_type": "recid",
                "pid_value": "1"
            },
        }]
    }
    assert s_dep[0]['_source']['relations'] == expected
    assert s_rec[0]['_source']['relations'] == expected

    deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.object_uuid)
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)

    RecordIndexer().process_bulk_queue()
    current_search.flush_and_refresh(index=deposit_index_name)
    current_search.flush_and_refresh(index=records_index_name)
    s_dep = current_search.client.search(
        index=deposit_index_name)['hits']['hits']
    s_rec = current_search.client.search(
        index=records_index_name)['hits']['hits']

    assert len(s_dep) == 2  # Two deposits should be indexed
    assert len(s_rec) == 1  # One, since record does not exist yet

    s_dep1 = current_search.client.get(index=deposit_index_name,
                                       id=deposit_v1.id)
    s_dep2 = current_search.client.get(index=deposit_index_name,
                                       id=deposit_v2.id)

    expected_d1 = {
        "version": [{
            "draft_child_deposit": {
                "pid_type": "depid",
                "pid_value": "3"
            },
            "index": 0,
            "is_last": False,
            "last_child": {
                "pid_type": "recid",
                "pid_value": "2"
            },
            "parent": {
                "pid_type": "recid",
                "pid_value": "1"
            },
            "count": 2  # For deposit, draft children are also counted
        }]
    }
    expected_d2 = {
        "version": [{
            "draft_child_deposit": {
                "pid_type": "depid",
                "pid_value": "3"
            },
            "index": 1,
            "is_last": True,
            "last_child": {
                "pid_type": "recid",
                "pid_value": "2"
            },
            "count": 2,  # For deposit, draft children are also counted
            "parent": {
                "pid_type": "recid",
                "pid_value": "1"
            },
        }]
    }

    assert s_dep1['_source']['relations'] == expected_d1
    assert s_dep2['_source']['relations'] == expected_d2

    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)

    RecordIndexer().index_by_id(str(record_v2.id))
    RecordIndexer().process_bulk_queue()
    current_search.flush_and_refresh(index=deposit_index_name)
    current_search.flush_and_refresh(index=records_index_name)

    s_dep = current_search.client.search(
        index=deposit_index_name)['hits']['hits']
    s_rec = current_search.client.search(
        index=records_index_name)['hits']['hits']
    assert len(s_dep) == 2
    assert len(s_rec) == 2

    s_dep1 = current_search.client.get(index=deposit_index_name,
                                       id=deposit_v1.id)
    s_dep2 = current_search.client.get(index=deposit_index_name,
                                       id=deposit_v2.id)

    s_rec1 = current_search.client.get(index=records_index_name,
                                       id=record_v1.id)
    s_rec2 = current_search.client.get(index=records_index_name,
                                       id=record_v2.id)

    expected_d1 = {
        "version": [{
            "draft_child_deposit": None,
            "index": 0,
            "is_last": False,
            "last_child": {
                "pid_type": "recid",
                "pid_value": "3"
            },
            "parent": {
                "pid_type": "recid",
                "pid_value": "1"
            },
            "count": 2
        }]
    }
    expected_d2 = {
        "version": [{
            "draft_child_deposit": None,
            "index": 1,
            "is_last": True,
            "last_child": {
                "pid_type": "recid",
                "pid_value": "3"
            },
            "count": 2,
            "parent": {
                "pid_type": "recid",
                "pid_value": "1"
            },
        }]
    }
    assert s_dep1['_source']['relations'] == expected_d1
    assert s_dep2['_source']['relations'] == expected_d2

    expected_r1 = {
        "version": [{
            "draft_child_deposit": None,
            "index": 0,
            "is_last": False,
            "last_child": {
                "pid_type": "recid",
                "pid_value": "3"
            },
            "parent": {
                "pid_type": "recid",
                "pid_value": "1"
            },
            "count": 2
        }]
    }
    expected_r2 = {
        "version": [{
            "draft_child_deposit": None,
            "index": 1,
            "is_last": True,
            "last_child": {
                "pid_type": "recid",
                "pid_value": "3"
            },
            "count": 2,
            "parent": {
                "pid_type": "recid",
                "pid_value": "1"
            },
        }]
    }
    assert s_rec1['_source']['relations'] == expected_r1
    assert s_rec2['_source']['relations'] == expected_r2
def test_relations_serialization(app, db, deposit, deposit_file):
    """Serialize PID relations."""
    deposit_v1 = publish_and_expunge(db, deposit)
    depid_v1_value = deposit_v1['_deposit']['id']
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)

    recid_v1, record_v1 = deposit_v1.fetch_published()
    expected = {
        "version": [
            {
                "draft_child_deposit": None,
                "index": 0,
                "is_last": True,
                "last_child": {
                    "pid_type": "recid",
                    "pid_value": "2"
                },
                "parent": {
                    "pid_type": "recid",
                    "pid_value": "1"
                },
                "count": 1
            }
        ]
    }
    assert serialize_relations(recid_v1) == expected

    deposit_v1.newversion()
    # Should contain "draft_child_deposit" information
    expected = {
        "version": [
            {
                "draft_child_deposit": {
                    "pid_type": "depid",
                    "pid_value": "3"
                },
                "index": 0,
                "is_last": True,
                "last_child": {
                    "pid_type": "recid",
                    "pid_value": "2"
                },
                "count": 1,
                "parent": {
                    "pid_type": "recid",
                    "pid_value": "1"
                },
            }
        ]
    }
    assert serialize_relations(recid_v1) == expected

    # Publish the new version
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    recid_v1, record_v1 = deposit_v1.fetch_published()

    # Should no longer contain "draft_child_deposit" info after publishing
    # and no longer be the last child
    expected = {
        "version": [
            {
                "draft_child_deposit": None,
                "index": 0,
                "is_last": False,
                "last_child": {
                    "pid_type": "recid",
                    "pid_value": "3"
                },
                "parent": {
                    "pid_type": "recid",
                    "pid_value": "1"
                },
                "count": 2
            }
        ]
    }
    assert serialize_relations(recid_v1) == expected

    # New version should be the last child now
    expected = {
        "version": [
            {
                "draft_child_deposit": None,
                "index": 1,
                "is_last": True,
                "last_child": {
                    "pid_type": "recid",
                    "pid_value": "3"
                },
                "count": 2,
                "parent": {
                    "pid_type": "recid",
                    "pid_value": "1"
                },
            }
        ]
    }
    assert serialize_relations(recid_v2) == expected
示例#25
0
def test_related_identifiers_serialization(app, db, deposit, deposit_file):
    """Serialize PID Relations to related identifiers."""
    deposit_v1 = publish_and_expunge(db, deposit)
    depid_v1_value = deposit_v1['_deposit']['id']

    recid_v1, record_v1 = deposit_v1.fetch_published()

    deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    deposit_v2 = deposit_v2.edit()
    # 1. Request for 'c1' and 'c2' through deposit v2
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    recid_v1, record_v1 = deposit_v1.fetch_published()

    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)

    rids = serialize_related_identifiers(recid_v1)
    expected_v1 = [
        {
            'scheme': 'doi',
            'identifier': '10.5072/zenodo.1',
            'relation': 'isPartOf'
        }
        # TODO: serialization of new version realtions is disabled
        # {
        #     'scheme': 'doi',
        #     'identifier': '10.5072/zenodo.3',
        #     'relation': 'isPreviousVersionOf'
        # }
    ]
    assert rids == expected_v1

    rids = serialize_related_identifiers(recid_v2)
    expected_v2 = [
        {
            'scheme': 'doi',
            'identifier': '10.5072/zenodo.1',
            'relation': 'isPartOf'
        }
        # TODO: serialization of new version realtions is disabled
        # {
        #     'scheme': 'doi',
        #     'identifier': '10.5072/zenodo.2',
        #     'relation': 'isNewVersionOf'
        # }
    ]
    assert rids == expected_v2
    parent_pid = PersistentIdentifier.get('recid', '1')
    rids = serialize_related_identifiers(parent_pid)

    expected_parent = [{
        'relation': 'hasPart',
        'scheme': 'doi',
        'identifier': '10.5072/zenodo.2'
    }, {
        'relation': 'hasPart',
        'scheme': 'doi',
        'identifier': '10.5072/zenodo.3'
    }]
    assert rids == expected_parent
示例#26
0
def test_autoadd(app, db, users, communities, deposit, deposit_file,
                 communities_autoadd_enabled):
    """Test basic workflow using Deposit and Communities API."""
    deposit_v1 = publish_and_expunge(db, deposit)
    depid_v1_value = deposit_v1['_deposit']['id']

    recid_v1, record_v1 = deposit_v1.fetch_published()
    recid_v1_value = recid_v1.pid_value

    deposit_v1 = deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    depid_v2_value = depid_v2.pid_value
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    deposit_v2 = deposit_v2.edit()
    # 1. Request for 'c1' and 'c3' (owned by user) through deposit v2
    deposit_v2['communities'] = ['c1', 'c2', 'c3', ]
    deposit_v2['grants'] = [{'title': 'SomeGrant'}, ]
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()
    assert record_v2['grants'] == [{'title': 'SomeGrant'}, ]
    recid_v2_value = recid_v2.pid_value
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    assert record_v1.get('communities', []) == ['c3', 'grants_comm']
    assert record_v2.get('communities', []) == ['c3', 'grants_comm']
    assert deposit_v1.get('communities', []) == ['c1', 'c2', 'c3', 'ecfunded',
                                                 'grants_comm', 'zenodo']
    assert deposit_v2.get('communities', []) == ['c1', 'c2', 'c3', 'ecfunded',
                                                 'grants_comm', 'zenodo']

    c1_api = ZenodoCommunity('c1')
    c2_api = ZenodoCommunity('c2')
    c3_api = ZenodoCommunity('c3')
    grants_comm_api = ZenodoCommunity('grants_comm')
    ecfunded_api = ZenodoCommunity('ecfunded')
    zenodo_api = ZenodoCommunity('zenodo')

    # Inclusion requests should be visible for both records
    assert c1_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1
    assert c1_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1
    assert c2_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1
    assert c2_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1
    assert c3_api.get_comm_irs(record_v1, pid=recid_v1).count() == 0
    assert c3_api.get_comm_irs(record_v2, pid=recid_v2).count() == 0
    assert grants_comm_api.get_comm_irs(
        record_v1, pid=recid_v1).count() == 0
    assert grants_comm_api.get_comm_irs(
        record_v2, pid=recid_v2).count() == 0
    assert ecfunded_api.get_comm_irs(
        record_v1, pid=recid_v1).count() == 1
    assert ecfunded_api.get_comm_irs(
        record_v2, pid=recid_v2).count() == 1
    assert zenodo_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1
    assert zenodo_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1

    # Accept to 'c1' through record_v2 (as originally requested),
    # and 'c2' through record_v1 (resolved through version)
    c1_api.accept_record(record_v2, pid=recid_v2)
    c2_api.accept_record(record_v1, pid=recid_v1)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    recid_v2, record_v2 = record_resolver.resolve(recid_v2_value)
    # Accepting individual record to a community should propagate the changes
    # to all versions
    assert record_v1.get('communities', []) == ['c1', 'c2', 'c3',
                                                'grants_comm']
    assert record_v2.get('communities', []) == ['c1', 'c2', 'c3',
                                                'grants_comm']
    assert deposit_v1.get('communities', []) == ['c1', 'c2', 'c3', 'ecfunded',
                                                 'grants_comm', 'zenodo']
    assert deposit_v2.get('communities', []) == ['c1', 'c2', 'c3', 'ecfunded',
                                                 'grants_comm', 'zenodo']

    # Removing 'c1'-'c3' from deposit_v1 should remove it from two published
    # records and other deposits as well
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    deposit_v1 = deposit_v1.edit()
    deposit_v1['communities'] = []
    deposit_v1 = publish_and_expunge(db, deposit_v1)
    depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    recid_v2, record_v2 = record_resolver.resolve(recid_v2_value)
    assert record_v1.get('communities', []) == ['grants_comm', ]
    assert record_v2.get('communities', []) == ['grants_comm', ]
    assert deposit_v1.get('communities', []) == ['ecfunded', 'grants_comm',
                                                 'zenodo']
    assert deposit_v2.get('communities', []) == ['ecfunded', 'grants_comm',
                                                 'zenodo']
示例#27
0
def versioning_github_repository(uuid):
    """
    Migrate the GitHub repositories.

    :param uuid: UUID of the repository (invenio_github.models.Repository)
    """
    from invenio_github.models import Repository, ReleaseStatus
    from zenodo.modules.deposit.minters import zenodo_concept_recid_minter
    from zenodo.modules.records.minters import zenodo_concept_doi_minter
    from invenio_pidrelations.contrib.records import index_siblings

    repository = Repository.query.get(uuid)
    published_releases = repository.releases.filter_by(
        status=ReleaseStatus.PUBLISHED).all()

    # Nothing to migrate if no successful release was ever made
    if not published_releases:
        return

    deposits = [
        ZenodoDeposit.get_record(r.record_id) for r in published_releases
        if r.recordmetadata.json is not None
    ]
    deposits = [dep for dep in deposits if 'removed_by' not in dep]
    deposits = sorted(deposits, key=lambda dep: int(dep['recid']))

    recids = [
        PersistentIdentifier.get('recid', dep['recid']) for dep in deposits
    ]
    records = [ZenodoRecord.get_record(p.object_uuid) for p in recids]

    # There were successful releases, but deposits/records were removed since
    if not records:
        return

    assert not any('conceptrecid' in rec for rec in records), \
        "One or more of the release records have been already migrated"
    assert not any('conceptrecid' in dep for dep in deposits), \
        "One or more of the release deposits have been already migrated"

    conceptrecid = zenodo_concept_recid_minter(record_uuid=records[0].id,
                                               data=records[0])
    conceptrecid.register()

    # Mint the Concept DOI if we are migrating (linking) more than one record
    if len(records) > 1:
        conceptdoi = zenodo_concept_doi_minter(records[0].id, records[0])
    else:
        conceptdoi = None

    rec_comms = sorted(
        set(sum([rec.get('communities', []) for rec in records], [])))

    dep_comms = sorted(
        set(sum([dep.get('communities', []) for dep in deposits], [])))

    for rec in records:
        rec['conceptrecid'] = conceptrecid.pid_value
        if conceptdoi:
            rec['conceptdoi'] = conceptdoi.pid_value
        if rec_comms:
            rec['communities'] = rec_comms
        rec.commit()

    for dep in deposits:
        dep['conceptrecid'] = conceptrecid.pid_value
        if conceptdoi:
            dep['conceptdoi'] = conceptdoi.pid_value
        if dep_comms:
            dep['communities'] = dep_comms
        dep.commit()

    pv = PIDVersioning(parent=conceptrecid)
    for recid in recids:
        pv.insert_child(recid)
    pv.update_redirect()

    if current_app.config['DEPOSIT_DATACITE_MINTING_ENABLED']:
        datacite_register.delay(recids[-1].pid_value, str(records[-1].id))
    db.session.commit()

    # Reindex all siblings
    index_siblings(pv.last_child, with_deposits=True)
示例#28
0
def test_autoadd(app, db, users, communities, deposit, deposit_file,
                 communities_autoadd_enabled):
    """Test basic workflow using Deposit and Communities API."""
    deposit_v1 = publish_and_expunge(db, deposit)
    depid_v1_value = deposit_v1['_deposit']['id']

    recid_v1, record_v1 = deposit_v1.fetch_published()
    recid_v1_value = recid_v1.pid_value

    deposit_v1 = deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    depid_v2_value = depid_v2.pid_value
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    deposit_v2 = deposit_v2.edit()
    # 1. Request for 'c1' and 'c3' (owned by user) through deposit v2
    deposit_v2['communities'] = [
        'c1',
        'c2',
        'c3',
    ]
    deposit_v2['grants'] = [
        {
            'title': 'SomeGrant'
        },
    ]
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()
    assert record_v2['grants'] == [
        {
            'title': 'SomeGrant'
        },
    ]
    recid_v2_value = recid_v2.pid_value
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    assert record_v1.get('communities', []) == ['c3', 'grants_comm']
    assert record_v2.get('communities', []) == ['c3', 'grants_comm']
    assert deposit_v1.get('communities', []) == [
        'c1', 'c2', 'c3', 'ecfunded', 'grants_comm', 'zenodo'
    ]
    assert deposit_v2.get('communities', []) == [
        'c1', 'c2', 'c3', 'ecfunded', 'grants_comm', 'zenodo'
    ]

    c1_api = ZenodoCommunity('c1')
    c2_api = ZenodoCommunity('c2')
    c3_api = ZenodoCommunity('c3')
    grants_comm_api = ZenodoCommunity('grants_comm')
    ecfunded_api = ZenodoCommunity('ecfunded')
    zenodo_api = ZenodoCommunity('zenodo')

    # Inclusion requests should be visible for both records
    assert c1_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1
    assert c1_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1
    assert c2_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1
    assert c2_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1
    assert c3_api.get_comm_irs(record_v1, pid=recid_v1).count() == 0
    assert c3_api.get_comm_irs(record_v2, pid=recid_v2).count() == 0
    assert grants_comm_api.get_comm_irs(record_v1, pid=recid_v1).count() == 0
    assert grants_comm_api.get_comm_irs(record_v2, pid=recid_v2).count() == 0
    assert ecfunded_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1
    assert ecfunded_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1
    assert zenodo_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1
    assert zenodo_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1

    # Accept to 'c1' through record_v2 (as originally requested),
    # and 'c2' through record_v1 (resolved through version)
    c1_api.accept_record(record_v2, pid=recid_v2)
    c2_api.accept_record(record_v1, pid=recid_v1)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    recid_v2, record_v2 = record_resolver.resolve(recid_v2_value)
    # Accepting individual record to a community should propagate the changes
    # to all versions
    assert record_v1.get('communities',
                         []) == ['c1', 'c2', 'c3', 'grants_comm']
    assert record_v2.get('communities',
                         []) == ['c1', 'c2', 'c3', 'grants_comm']
    assert deposit_v1.get('communities', []) == [
        'c1', 'c2', 'c3', 'ecfunded', 'grants_comm', 'zenodo'
    ]
    assert deposit_v2.get('communities', []) == [
        'c1', 'c2', 'c3', 'ecfunded', 'grants_comm', 'zenodo'
    ]

    # Removing 'c1'-'c3' from deposit_v1 should remove it from two published
    # records and other deposits as well
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    deposit_v1 = deposit_v1.edit()
    deposit_v1['communities'] = []
    deposit_v1 = publish_and_expunge(db, deposit_v1)
    depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    recid_v2, record_v2 = record_resolver.resolve(recid_v2_value)
    assert record_v1.get('communities', []) == [
        'grants_comm',
    ]
    assert record_v2.get('communities', []) == [
        'grants_comm',
    ]
    assert deposit_v1.get('communities',
                          []) == ['ecfunded', 'grants_comm', 'zenodo']
    assert deposit_v2.get('communities',
                          []) == ['ecfunded', 'grants_comm', 'zenodo']
示例#29
0
def test_archiving(app, db, deposit, deposit_file, locations, archive_fs):
    """Test ZenodoSIP archiving."""
    # Stash the configuration and enable writing
    orig = app.config['SIPSTORE_ARCHIVER_WRITING_ENABLED']
    app.config['SIPSTORE_ARCHIVER_WRITING_ENABLED'] = True
    deposit.files['test2.txt'] = BytesIO(b'test-two')
    deposit_v1 = publish_and_expunge(db, deposit)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    recid_v1_id = recid_v1.id
    # Record files after publishing: 'test.txt', 'test2.txt'

    sip1 = SIP(SIPModel.query.one())
    sip1_id = sip1.id

    # Edit the metadata
    deposit_v1 = deposit_v1.edit()
    deposit_v1['title'] = "New title"
    deposit_v1 = publish_and_expunge(db, deposit_v1)
    # Record files after publishing: 'test.txt', 'test2.txt'
    sip2_id = SIPModel.query.order_by(SIPModel.created.desc()).first().id

    # Create a new version
    deposit_v1.newversion()
    recid_v1 = PersistentIdentifier.query.get(recid_v1_id)
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.object_uuid)
    del deposit_v2.files['test.txt']
    deposit_v2.files['test3.txt'] = BytesIO(b('test-three'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    # Record files after publishing: 'test2.txt', 'test3.txt'

    sip1 = SIP(SIPModel.query.get(sip1_id))
    sip2 = SIP(SIPModel.query.get(sip2_id))
    sip3 = SIP(SIPModel.query.order_by(SIPModel.created.desc()).first())

    # Becase we are using secure_filename when writing SIPFiles we need to
    # genenarate the correct names: <SIPFile.id>-<secure_filename>
    s1_file1_fn = '{0}-test.txt'.format(fetch_suff(sip1, 'test.txt').file_id)
    s1_file1_fp = 'data/files/{0}'.format(s1_file1_fn)

    s1_file2_fn = '{0}-test2.txt'.format(fetch_suff(sip1, 'test2.txt').file_id)
    s1_file2_fp = 'data/files/{0}'.format(s1_file2_fn)

    s3_file2_fn = '{0}-test2.txt'.format(fetch_suff(sip3, 'test2.txt').file_id)
    s3_file2_fp = 'data/files/{0}'.format(s3_file2_fn)

    s3_file3_fn = '{0}-test3.txt'.format(fetch_suff(sip3, 'test3.txt').file_id)
    s3_file3_fp = 'data/files/{0}'.format(s3_file3_fn)

    sip1_bagmeta = json.loads(next(
        m.content for m in sip1.metadata if m.type.name == 'bagit'))['files']
    sip2_bagmeta = json.loads(next(
        m.content for m in sip2.metadata if m.type.name == 'bagit'))['files']
    sip3_bagmeta = json.loads(next(
        m.content for m in sip3.metadata if m.type.name == 'bagit'))['files']

    # Check if Bagit metadata contains the correct file-fetching information
    assert set([f['filepath'] for f in sip1_bagmeta]) == \
        set([s1_file1_fp,
             s1_file2_fp,
             'data/filenames.txt',
             'data/metadata/record-json.json', 'bag-info.txt',
             'manifest-md5.txt', 'bagit.txt', 'tagmanifest-md5.txt'])
    assert not BagItArchiver._is_fetched(
        get_m_item(sip1_bagmeta, s1_file1_fp))
    assert not BagItArchiver._is_fetched(
        get_m_item(sip1_bagmeta, s1_file2_fp))

    assert set([f['filepath'] for f in sip2_bagmeta]) == \
        set([s1_file1_fp,
             s1_file2_fp,
             'data/filenames.txt',
             'data/metadata/record-json.json', 'bag-info.txt',
             'manifest-md5.txt', 'bagit.txt', 'tagmanifest-md5.txt',
             'fetch.txt'])
    # Both files should be fetched since it's only metadata-edit submission
    assert BagItArchiver._is_fetched(
        get_m_item(sip2_bagmeta, s1_file1_fp))
    assert BagItArchiver._is_fetched(
        get_m_item(sip2_bagmeta, s1_file2_fp))

    assert set([f['filepath'] for f in sip3_bagmeta]) == \
        set([s3_file2_fp,
             s3_file3_fp,
             'data/filenames.txt',
             'data/metadata/record-json.json', 'bag-info.txt',
             'manifest-md5.txt', 'bagit.txt', 'tagmanifest-md5.txt',
             'fetch.txt'])

    # First file should be fetched from previous version and new file should
    # be archived in this bag.
    assert BagItArchiver._is_fetched(
        get_m_item(sip3_bagmeta, s3_file2_fp))
    assert not BagItArchiver._is_fetched(
        get_m_item(sip3_bagmeta, s3_file3_fp))
    archiver1 = BagItArchiver(sip1)
    archiver2 = BagItArchiver(sip2)
    archiver3 = BagItArchiver(sip3)

    # Each archiver subpath follows: '<recid>/r/<ISO-8601-SIP-timestamp>'
    sip1_ts = arrow.get(sip1.model.created).isoformat()
    sip2_ts = arrow.get(sip2.model.created).isoformat()
    sip3_ts = arrow.get(sip3.model.created).isoformat()
    assert archiver1.get_archive_subpath() == '2/r/{0}'.format(sip1_ts)
    assert archiver2.get_archive_subpath() == '2/r/{0}'.format(sip2_ts)
    assert archiver3.get_archive_subpath() == '3/r/{0}'.format(sip3_ts)

    # As a test, write the SIPs in reverse chronological order
    assert not sip1.archived
    assert not sip2.archived
    assert not sip3.archived
    archive_sip.delay(sip3.id)
    archive_sip.delay(sip2.id)
    archive_sip.delay(sip1.id)
    assert sip1.archived
    assert sip2.archived
    assert sip3.archived

    fs1 = archive_fs.opendir(archiver1.get_archive_subpath())
    assert set(fs1.listdir()) == set(['tagmanifest-md5.txt', 'bagit.txt',
                                      'manifest-md5.txt', 'bag-info.txt',
                                      'data'])
    assert set(fs1.listdir('data')) == set(['metadata', 'files',
                                            'filenames.txt'])
    assert fs1.listdir('data/metadata') == ['record-json.json', ]
    assert set(fs1.listdir('data/files')) == set([s1_file1_fn, s1_file2_fn])

    fs2 = archive_fs.opendir(archiver2.get_archive_subpath())
    assert set(fs2.listdir()) == set(['tagmanifest-md5.txt', 'bagit.txt',
                                      'manifest-md5.txt', 'bag-info.txt',
                                      'data', 'fetch.txt'])
    # Second SIP has written only the metadata,
    # because of that There should be no 'files/', but 'filesnames.txt' should
    # still be there becasue of the fetch.txt
    assert set(fs2.listdir('data')) == set(['metadata', 'filenames.txt'])
    assert fs2.listdir('data/metadata') == ['record-json.json', ]

    with fs2.open('fetch.txt') as fp:
        cnt = fp.read().splitlines()
    # Fetched files should correctly fetch the files from the first archive
    base_uri = archiver1.get_archive_base_uri()
    assert set(cnt) == set([
        '{base}/2/r/{s1ts}/{fn} 4 {fn}'.format(fn=s1_file1_fp, base=base_uri,
                                               s1ts=sip1_ts),
        '{base}/2/r/{s1ts}/{fn} 8 {fn}'.format(fn=s1_file2_fp, base=base_uri,
                                               s1ts=sip1_ts),
    ])

    fs3 = archive_fs.opendir(archiver3.get_archive_subpath())
    assert set(fs3.listdir()) == set(['tagmanifest-md5.txt', 'bagit.txt',
                                      'manifest-md5.txt', 'bag-info.txt',
                                      'data', 'fetch.txt'])
    # Third SIP should write only the extra 'test3.txt' file
    assert set(fs3.listdir('data')) == set(['metadata', 'files',
                                            'filenames.txt'])
    assert fs3.listdir('data/metadata') == ['record-json.json', ]
    assert fs3.listdir('data/files') == [s3_file3_fn, ]
    with fs3.open('fetch.txt') as fp:
        cnt = fp.read().splitlines()
    # Since 'file.txt' was removed in third SIP, we should only fetch the
    # 'test2.txt', also from the first archive, since that's where this
    # file resides physically.
    base_uri = archiver1.get_archive_base_uri()
    assert set(cnt) == set([
        '{base}/2/r/{s1ts}/{fn} 8 {fn}'.format(fn=s3_file2_fp, base=base_uri,
                                               s1ts=sip1_ts),
    ])
    app.config['SIPSTORE_ARCHIVER_WRITING_ENABLED'] = orig
def test_legacyjson_to_record_translation(app, db, es, grant_records,
                                          license_record, locations,
                                          communities):
    """Test the translator legacy_zenodo and zenodo_legacy."""
    test_data = dict(
        metadata=dict(
            access_right='embargoed',
            communities=[{'identifier': 'c1'}],
            conference_acronym='Some acronym',
            conference_dates='Some dates',
            conference_place='Some place',
            conference_title='Some title',
            conference_url='http://someurl.com',
            conference_session='VI',
            conference_session_part='1',
            creators=[
                dict(name="Doe, John", affiliation="Atlantis",
                     orcid="0000-0002-1825-0097", gnd="170118215"),
                dict(name="Smith, Jane", affiliation="Atlantis")
            ],
            description="Some description",
            doi="10.1234/foo.bar",
            embargo_date=(
                datetime.utcnow().date() + timedelta(days=2)).isoformat(),
            grants=[dict(id="282896"), ],
            imprint_isbn="Some isbn",
            imprint_place="Some place",
            imprint_publisher="Some publisher",
            journal_issue="Some issue",
            journal_pages="Some pages",
            journal_title="Some journal name",
            journal_volume="Some volume",
            keywords=["Keyword 1", "keyword 2"],
            subjects=[
                dict(scheme="gnd", identifier="1234567899", term="Astronaut"),
                dict(scheme="gnd", identifier="1234567898", term="Amish"),
            ],
            license="CC0-1.0",
            notes="Some notes",
            partof_pages="SOme part of",
            partof_title="Some part of title",
            prereserve_doi=True,
            publication_date="2013-09-12",
            publication_type="book",
            references=[
                "Reference 1",
                "Reference 2",
            ],
            related_identifiers=[
                dict(identifier='10.1234/foo.bar2', relation='isCitedBy'),
                dict(identifier='10.1234/foo.bar3', relation='cites'),
                dict(
                    identifier='2011ApJS..192...18K',
                    relation='isAlternativeIdentifier'),
            ],
            thesis_supervisors=[
                dict(name="Doe Sr., John", affiliation="Atlantis"),
                dict(name="Smith Sr., Jane", affiliation="Atlantis",
                     orcid="http://orcid.org/0000-0002-1825-0097",
                     gnd="http://d-nb.info/gnd/170118215")
            ],
            thesis_university="Some thesis_university",
            contributors=[
                dict(name="Doe Sr., Jochen", affiliation="Atlantis",
                     type="Other"),
                dict(name="Smith Sr., Marco", affiliation="Atlantis",
                     orcid="http://orcid.org/0000-0002-1825-0097",
                     gnd="http://d-nb.info/gnd/170118215",
                     type="DataCurator")
            ],
            title="Test title",
            upload_type="publication",
        )
    )
    ZenodoDeposit.create(
        legacyjson.LegacyRecordSchemaV1(strict=True).load(test_data).data
    ).validate()
示例#31
0
def test_versioning_indexing(db, es, deposit, deposit_file):
    """Test the indexing of 'version' relations."""
    deposit_index_name = 'deposits-records-record-v1.0.0'
    records_index_name = 'records-record-v1.0.0'

    deposit_v1 = publish_and_expunge(db, deposit)
    depid_v1_value = deposit_v1['_deposit']['id']
    recid_v1, record_v1 = deposit_v1.fetch_published()
    recid_v1_value = recid_v1.pid_value

    RecordIndexer().index_by_id(str(record_v1.id))
    RecordIndexer().process_bulk_queue()
    current_search.flush_and_refresh(index=deposit_index_name)
    current_search.flush_and_refresh(index=records_index_name)
    s_dep = current_search.client.search(
        index=deposit_index_name)['hits']['hits']
    s_rec = current_search.client.search(
        index=records_index_name)['hits']['hits']
    assert len(s_dep) == 1
    assert len(s_rec) == 1
    assert 'relations' in s_dep[0]['_source']
    assert 'relations' in s_rec[0]['_source']

    expected = {
        "version": [
            {
                "draft_child_deposit": None,
                "index": 0,
                "is_last": True,
                "last_child": {
                    "pid_type": "recid",
                    "pid_value": "2"
                },
                "count": 1,
                "parent": {
                    "pid_type": "recid",
                    "pid_value": "1"
                },
            }
        ]
    }
    assert s_dep[0]['_source']['relations'] == expected
    assert s_rec[0]['_source']['relations'] == expected

    deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.object_uuid)
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)

    RecordIndexer().process_bulk_queue()
    current_search.flush_and_refresh(index=deposit_index_name)
    current_search.flush_and_refresh(index=records_index_name)
    s_dep = current_search.client.search(
        index=deposit_index_name)['hits']['hits']
    s_rec = current_search.client.search(
        index=records_index_name)['hits']['hits']

    assert len(s_dep) == 2  # Two deposits should be indexed
    assert len(s_rec) == 1  # One, since record does not exist yet

    s_dep1 = current_search.client.get(
        index=deposit_index_name, id=deposit_v1.id)
    s_dep2 = current_search.client.get(
        index=deposit_index_name, id=deposit_v2.id)

    expected_d1 = {
        "version": [
            {
                "draft_child_deposit": {
                    "pid_type": "depid",
                    "pid_value": "3"
                },
                "index": 0,
                "is_last": False,
                "last_child": {
                    "pid_type": "recid",
                    "pid_value": "2"
                },
                "parent": {
                    "pid_type": "recid",
                    "pid_value": "1"
                },
                "count": 2  # For deposit, draft children are also counted
            }
        ]
    }
    expected_d2 = {
        "version": [
            {
                "draft_child_deposit": {
                    "pid_type": "depid",
                    "pid_value": "3"
                },
                "index": 1,
                "is_last": True,
                "last_child": {
                    "pid_type": "recid",
                    "pid_value": "2"
                },
                "count": 2,  # For deposit, draft children are also counted
                "parent": {
                    "pid_type": "recid",
                    "pid_value": "1"
                },
            }
        ]
    }

    assert s_dep1['_source']['relations'] == expected_d1
    assert s_dep2['_source']['relations'] == expected_d2

    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)

    RecordIndexer().index_by_id(str(record_v2.id))
    RecordIndexer().process_bulk_queue()
    current_search.flush_and_refresh(index=deposit_index_name)
    current_search.flush_and_refresh(index=records_index_name)

    s_dep = current_search.client.search(
        index=deposit_index_name)['hits']['hits']
    s_rec = current_search.client.search(
        index=records_index_name)['hits']['hits']
    assert len(s_dep) == 2
    assert len(s_rec) == 2

    s_dep1 = current_search.client.get(
        index=deposit_index_name, id=deposit_v1.id)
    s_dep2 = current_search.client.get(
        index=deposit_index_name, id=deposit_v2.id)

    s_rec1 = current_search.client.get(
        index=records_index_name, id=record_v1.id)
    s_rec2 = current_search.client.get(
        index=records_index_name, id=record_v2.id)

    expected_d1 = {
        "version": [
            {
                "draft_child_deposit": None,
                "index": 0,
                "is_last": False,
                "last_child": {
                    "pid_type": "recid",
                    "pid_value": "3"
                },
                "parent": {
                    "pid_type": "recid",
                    "pid_value": "1"
                },
                "count": 2
            }
        ]
    }
    expected_d2 = {
        "version": [
            {
                "draft_child_deposit": None,
                "index": 1,
                "is_last": True,
                "last_child": {
                    "pid_type": "recid",
                    "pid_value": "3"
                },
                "count": 2,
                "parent": {
                    "pid_type": "recid",
                    "pid_value": "1"
                },
            }
        ]
    }
    assert s_dep1['_source']['relations'] == expected_d1
    assert s_dep2['_source']['relations'] == expected_d2

    expected_r1 = {
        "version": [
            {
                "draft_child_deposit": None,
                "index": 0,
                "is_last": False,
                "last_child": {
                    "pid_type": "recid",
                    "pid_value": "3"
                },
                "parent": {
                    "pid_type": "recid",
                    "pid_value": "1"
                },
                "count": 2
            }
        ]
    }
    expected_r2 = {
        "version": [
            {
                "draft_child_deposit": None,
                "index": 1,
                "is_last": True,
                "last_child": {
                    "pid_type": "recid",
                    "pid_value": "3"
                },
                "count": 2,
                "parent": {
                    "pid_type": "recid",
                    "pid_value": "1"
                },
            }
        ]
    }
    assert s_rec1['_source']['relations'] == expected_r1
    assert s_rec2['_source']['relations'] == expected_r2
def test_related_identifiers_serialization(app, db, deposit, deposit_file):
    """Serialize PID Relations to related identifiers."""
    deposit_v1 = publish_and_expunge(db, deposit)
    depid_v1_value = deposit_v1['_deposit']['id']

    recid_v1, record_v1 = deposit_v1.fetch_published()

    deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    deposit_v2 = deposit_v2.edit()
    # 1. Request for 'c1' and 'c2' through deposit v2
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    recid_v1, record_v1 = deposit_v1.fetch_published()

    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)

    rids = serialize_related_identifiers(recid_v1)
    expected_v1 = [
        {
            'scheme': 'doi',
            'identifier': '10.5072/zenodo.1',
            'relation': 'isPartOf'
        }
        # TODO: serialization of new version realtions is disabled
        # {
        #     'scheme': 'doi',
        #     'identifier': '10.5072/zenodo.3',
        #     'relation': 'isPreviousVersionOf'
        # }
    ]
    assert rids == expected_v1

    rids = serialize_related_identifiers(recid_v2)
    expected_v2 = [
        {
            'scheme': 'doi',
            'identifier': '10.5072/zenodo.1',
            'relation': 'isPartOf'
        }
        # TODO: serialization of new version realtions is disabled
        # {
        #     'scheme': 'doi',
        #     'identifier': '10.5072/zenodo.2',
        #     'relation': 'isNewVersionOf'
        # }
    ]
    assert rids == expected_v2
    parent_pid = PersistentIdentifier.get('recid', '1')
    rids = serialize_related_identifiers(parent_pid)

    expected_parent = [
        {
            'relation': 'hasPart',
            'scheme': 'doi',
            'identifier': '10.5072/zenodo.2'
        },
        {
            'relation': 'hasPart',
            'scheme': 'doi',
            'identifier': '10.5072/zenodo.3'
        }
    ]
    assert rids == expected_parent
示例#33
0
    def publish(self):
        """Publish GitHub release as record."""
        id_ = uuid.uuid4()
        deposit_metadata = dict(self.metadata)
        deposit = None
        try:
            db.session.begin_nested()
            # TODO: Add filter on Published releases
            previous_releases = self.model.repository.releases.filter_by(
                status=ReleaseStatus.PUBLISHED)
            versioning = None
            stashed_draft_child = None
            if previous_releases.count():
                last_release = previous_releases.order_by(
                        Release.created.desc()).first()
                last_recid = PersistentIdentifier.get(
                    'recid', last_release.record['recid'])
                versioning = PIDVersioning(child=last_recid)
                last_record = ZenodoRecord.get_record(
                    versioning.last_child.object_uuid)
                deposit_metadata['conceptrecid'] = last_record['conceptrecid']
                if 'conceptdoi' not in last_record:
                    last_depid = PersistentIdentifier.get(
                        'depid', last_record['_deposit']['id'])
                    last_deposit = ZenodoDeposit.get_record(
                        last_depid.object_uuid)
                    last_deposit = last_deposit.registerconceptdoi()
                    last_recid, last_record = last_deposit.fetch_published()
                deposit_metadata['conceptdoi'] = last_record['conceptdoi']
                if versioning.draft_child:
                    stashed_draft_child = versioning.draft_child
                    versioning.remove_draft_child()

            deposit = self.deposit_class.create(deposit_metadata, id_=id_)

            deposit['_deposit']['created_by'] = self.event.user_id
            deposit['_deposit']['owners'] = [self.event.user_id]

            # Fetch the deposit files
            for key, url in self.files:
                # Make a HEAD request to get GitHub to compute the
                # Content-Length.
                res = self.gh.api.session.head(url, allow_redirects=True)
                # Now, download the file
                res = self.gh.api.session.get(url, stream=True,
                                              allow_redirects=True)
                if res.status_code != 200:
                    raise Exception(
                        "Could not retrieve archive from GitHub: {url}"
                        .format(url=url)
                    )

                size = int(res.headers.get('Content-Length', 0))
                ObjectVersion.create(
                    bucket=deposit.files.bucket,
                    key=key,
                    stream=res.raw,
                    size=size or None,
                    mimetype=res.headers.get('Content-Type'),
                )

            # GitHub-specific SIP store agent
            sip_agent = {
                '$schema': current_jsonschemas.path_to_url(
                    current_app.config['SIPSTORE_GITHUB_AGENT_JSONSCHEMA']),
                'user_id': self.event.user_id,
                'github_id': self.release['author']['id'],
                'email': self.gh.account.user.email,
            }
            deposit.publish(user_id=self.event.user_id, sip_agent=sip_agent)
            recid_pid, record = deposit.fetch_published()
            self.model.recordmetadata = record.model
            if versioning and stashed_draft_child:
                versioning.insert_draft_child(stashed_draft_child)
            record_id = str(record.id)
            db.session.commit()

            # Send Datacite DOI registration task
            datacite_register.delay(recid_pid.pid_value, record_id)

            # Index the record
            RecordIndexer().index_by_id(record_id)
        except Exception:
            db.session.rollback()
            # Remove deposit from index since it was not commited.
            if deposit and deposit.id:
                try:
                    RecordIndexer().delete(deposit)
                except Exception:
                    current_app.logger.exception(
                        "Failed to remove uncommited deposit from index.")
            raise
示例#34
0
def test_archiving(app, db, deposit, deposit_file, locations, archive_fs):
    """Test ZenodoSIP archiving."""
    # Stash the configuration and enable writing
    orig = app.config['SIPSTORE_ARCHIVER_WRITING_ENABLED']
    app.config['SIPSTORE_ARCHIVER_WRITING_ENABLED'] = True
    deposit.files['test2.txt'] = BytesIO(b'test-two')
    deposit_v1 = publish_and_expunge(db, deposit)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    recid_v1_id = recid_v1.id
    # Record files after publishing: 'test.txt', 'test2.txt'

    sip1 = SIP(SIPModel.query.one())
    sip1_id = sip1.id

    # Edit the metadata
    deposit_v1 = deposit_v1.edit()
    deposit_v1['title'] = "New title"
    deposit_v1 = publish_and_expunge(db, deposit_v1)
    # Record files after publishing: 'test.txt', 'test2.txt'
    sip2_id = SIPModel.query.order_by(SIPModel.created.desc()).first().id

    # Create a new version
    deposit_v1.newversion()
    recid_v1 = PersistentIdentifier.query.get(recid_v1_id)
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.object_uuid)
    del deposit_v2.files['test.txt']
    deposit_v2.files['test3.txt'] = BytesIO(b('test-three'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    # Record files after publishing: 'test2.txt', 'test3.txt'

    sip1 = SIP(SIPModel.query.get(sip1_id))
    sip2 = SIP(SIPModel.query.get(sip2_id))
    sip3 = SIP(SIPModel.query.order_by(SIPModel.created.desc()).first())

    # Becase we are using secure_filename when writing SIPFiles we need to
    # genenarate the correct names: <SIPFile.id>-<secure_filename>
    s1_file1_fn = '{0}-test.txt'.format(fetch_suff(sip1, 'test.txt').file_id)
    s1_file1_fp = 'data/files/{0}'.format(s1_file1_fn)

    s1_file2_fn = '{0}-test2.txt'.format(fetch_suff(sip1, 'test2.txt').file_id)
    s1_file2_fp = 'data/files/{0}'.format(s1_file2_fn)

    s3_file2_fn = '{0}-test2.txt'.format(fetch_suff(sip3, 'test2.txt').file_id)
    s3_file2_fp = 'data/files/{0}'.format(s3_file2_fn)

    s3_file3_fn = '{0}-test3.txt'.format(fetch_suff(sip3, 'test3.txt').file_id)
    s3_file3_fp = 'data/files/{0}'.format(s3_file3_fn)

    sip1_bagmeta = json.loads(
        next(m.content for m in sip1.metadata
             if m.type.name == 'bagit'))['files']
    sip2_bagmeta = json.loads(
        next(m.content for m in sip2.metadata
             if m.type.name == 'bagit'))['files']
    sip3_bagmeta = json.loads(
        next(m.content for m in sip3.metadata
             if m.type.name == 'bagit'))['files']

    # Check if Bagit metadata contains the correct file-fetching information
    assert set([f['filepath'] for f in sip1_bagmeta]) == \
        set([s1_file1_fp,
             s1_file2_fp,
             'data/filenames.txt',
             'data/metadata/record-json.json', 'bag-info.txt',
             'manifest-md5.txt', 'bagit.txt', 'tagmanifest-md5.txt'])
    assert not BagItArchiver._is_fetched(get_m_item(sip1_bagmeta, s1_file1_fp))
    assert not BagItArchiver._is_fetched(get_m_item(sip1_bagmeta, s1_file2_fp))

    assert set([f['filepath'] for f in sip2_bagmeta]) == \
        set([s1_file1_fp,
             s1_file2_fp,
             'data/filenames.txt',
             'data/metadata/record-json.json', 'bag-info.txt',
             'manifest-md5.txt', 'bagit.txt', 'tagmanifest-md5.txt',
             'fetch.txt'])
    # Both files should be fetched since it's only metadata-edit submission
    assert BagItArchiver._is_fetched(get_m_item(sip2_bagmeta, s1_file1_fp))
    assert BagItArchiver._is_fetched(get_m_item(sip2_bagmeta, s1_file2_fp))

    assert set([f['filepath'] for f in sip3_bagmeta]) == \
        set([s3_file2_fp,
             s3_file3_fp,
             'data/filenames.txt',
             'data/metadata/record-json.json', 'bag-info.txt',
             'manifest-md5.txt', 'bagit.txt', 'tagmanifest-md5.txt',
             'fetch.txt'])

    # First file should be fetched from previous version and new file should
    # be archived in this bag.
    assert BagItArchiver._is_fetched(get_m_item(sip3_bagmeta, s3_file2_fp))
    assert not BagItArchiver._is_fetched(get_m_item(sip3_bagmeta, s3_file3_fp))
    archiver1 = BagItArchiver(sip1)
    archiver2 = BagItArchiver(sip2)
    archiver3 = BagItArchiver(sip3)

    # Each archiver subpath follows: '<recid>/r/<ISO-8601-SIP-timestamp>'
    sip1_ts = arrow.get(sip1.model.created).isoformat()
    sip2_ts = arrow.get(sip2.model.created).isoformat()
    sip3_ts = arrow.get(sip3.model.created).isoformat()
    assert archiver1.get_archive_subpath() == '2/r/{0}'.format(sip1_ts)
    assert archiver2.get_archive_subpath() == '2/r/{0}'.format(sip2_ts)
    assert archiver3.get_archive_subpath() == '3/r/{0}'.format(sip3_ts)

    # As a test, write the SIPs in reverse chronological order
    assert not sip1.archived
    assert not sip2.archived
    assert not sip3.archived
    archive_sip.delay(sip3.id)
    archive_sip.delay(sip2.id)
    archive_sip.delay(sip1.id)
    assert sip1.archived
    assert sip2.archived
    assert sip3.archived

    fs1 = archive_fs.opendir(archiver1.get_archive_subpath())
    assert set(fs1.listdir()) == set([
        'tagmanifest-md5.txt', 'bagit.txt', 'manifest-md5.txt', 'bag-info.txt',
        'data'
    ])
    assert set(fs1.listdir('data')) == set(
        ['metadata', 'files', 'filenames.txt'])
    assert fs1.listdir('data/metadata') == [
        'record-json.json',
    ]
    assert set(fs1.listdir('data/files')) == set([s1_file1_fn, s1_file2_fn])

    fs2 = archive_fs.opendir(archiver2.get_archive_subpath())
    assert set(fs2.listdir()) == set([
        'tagmanifest-md5.txt', 'bagit.txt', 'manifest-md5.txt', 'bag-info.txt',
        'data', 'fetch.txt'
    ])
    # Second SIP has written only the metadata,
    # because of that There should be no 'files/', but 'filesnames.txt' should
    # still be there becasue of the fetch.txt
    assert set(fs2.listdir('data')) == set(['metadata', 'filenames.txt'])
    assert fs2.listdir('data/metadata') == [
        'record-json.json',
    ]

    with fs2.open('fetch.txt') as fp:
        cnt = fp.read().splitlines()
    # Fetched files should correctly fetch the files from the first archive
    base_uri = archiver1.get_archive_base_uri()
    assert set(cnt) == set([
        '{base}/2/r/{s1ts}/{fn} 4 {fn}'.format(fn=s1_file1_fp,
                                               base=base_uri,
                                               s1ts=sip1_ts),
        '{base}/2/r/{s1ts}/{fn} 8 {fn}'.format(fn=s1_file2_fp,
                                               base=base_uri,
                                               s1ts=sip1_ts),
    ])

    fs3 = archive_fs.opendir(archiver3.get_archive_subpath())
    assert set(fs3.listdir()) == set([
        'tagmanifest-md5.txt', 'bagit.txt', 'manifest-md5.txt', 'bag-info.txt',
        'data', 'fetch.txt'
    ])
    # Third SIP should write only the extra 'test3.txt' file
    assert set(fs3.listdir('data')) == set(
        ['metadata', 'files', 'filenames.txt'])
    assert fs3.listdir('data/metadata') == [
        'record-json.json',
    ]
    assert fs3.listdir('data/files') == [
        s3_file3_fn,
    ]
    with fs3.open('fetch.txt') as fp:
        cnt = fp.read().splitlines()
    # Since 'file.txt' was removed in third SIP, we should only fetch the
    # 'test2.txt', also from the first archive, since that's where this
    # file resides physically.
    base_uri = archiver1.get_archive_base_uri()
    assert set(cnt) == set([
        '{base}/2/r/{s1ts}/{fn} 8 {fn}'.format(fn=s3_file2_fp,
                                               base=base_uri,
                                               s1ts=sip1_ts),
    ])
    app.config['SIPSTORE_ARCHIVER_WRITING_ENABLED'] = orig