def test_delete(client, db, bucket, objects, permissions, user, expected):
    """Test deleting an object."""
    login_user(client, permissions[user])
    for obj in objects:
        # Valid object
        resp = client.delete(
            url_for(
                'invenio_files_rest.object_api',
                bucket_id=bucket.id,
                key=obj.key,
            ))
        assert resp.status_code == expected
        if resp.status_code == 204:
            assert not ObjectVersion.get(bucket.id, obj.key)
            resp = client.get(
                url_for(
                    'invenio_files_rest.object_api',
                    bucket_id=bucket.id,
                    key=obj.key,
                ))
            assert resp.status_code == 404
        else:
            assert ObjectVersion.get(bucket.id, obj.key)

        # Invalid object
        assert client.delete(
            url_for(
                'invenio_files_rest.object_api',
                bucket_id=bucket.id,
                key='invalid',
            )).status_code == 404
示例#2
0
def test_object_delete(app, db, dummy_location):
    """Test object creation."""
    # Create three versions, with latest being a delete marker.
    with db.session.begin_nested():
        b1 = Bucket.create()
        ObjectVersion.create(b1, "test").set_location(
            "b1test1", 1, "achecksum")
        ObjectVersion.create(b1, "test").set_location(
            "b1test2", 1, "achecksum")
        obj_deleted = ObjectVersion.delete(b1, "test")

    assert ObjectVersion.query.count() == 3
    assert ObjectVersion.get(b1, "test") is None
    assert ObjectVersion.get_by_bucket(b1).count() == 0

    obj = ObjectVersion.get(b1, "test", version_id=obj_deleted.version_id)
    assert obj.is_deleted
    assert obj.file_id is None

    ObjectVersion.create(b1, "test").set_location(
        "b1test4", 1, "achecksum")

    assert ObjectVersion.query.count() == 4
    assert ObjectVersion.get(b1.id, "test") is not None
    assert ObjectVersion.get_by_bucket(b1.id).count() == 1
示例#3
0
def rename_file(recid, key, new_key):
    """Remove a file from a publishd record."""
    pid, record = record_resolver.resolve(recid)
    bucket = record.files.bucket

    obj = ObjectVersion.get(bucket, key)
    if obj is None:
        click.echo(click.style(u'File with key "{key}" not found.'.format(
            key=key), fg='red'))
        return

    new_obj = ObjectVersion.get(bucket, new_key)
    if new_obj is not None:
        click.echo(click.style(u'File with key "{key}" already exists.'.format(
            key=new_key), fg='red'))
        return

    if click.confirm(u'Rename "{key}" to "{new_key}" on bucket {bucket}.'
                     u' Continue?'.format(
                        key=obj.key, new_key=new_key, bucket=bucket.id)):
        record.files.bucket.locked = False

        file_id = obj.file.id
        ObjectVersion.delete(bucket, obj.key)
        ObjectVersion.create(bucket, new_key, _file_id=file_id)
        record.files.bucket.locked = True
        record.files.flush()
        record.commit()
        db.session.commit()
        click.echo(click.style(u'File renamed successfully.', fg='green'))
    else:
        click.echo(click.style(u'Aborted file rename.', fg='green'))
示例#4
0
def test_object_delete(app, db, dummy_location):
    """Test object creation."""
    # Create three versions, with latest being a delete marker.
    with db.session.begin_nested():
        b1 = Bucket.create()
        ObjectVersion.create(b1, "test").set_location(
            "b1test1", 1, "achecksum")
        ObjectVersion.create(b1, "test").set_location(
            "b1test2", 1, "achecksum")
        obj_deleted = ObjectVersion.delete(b1, "test")

    assert ObjectVersion.query.count() == 3
    assert ObjectVersion.get(b1, "test") is None
    assert ObjectVersion.get_by_bucket(b1).count() == 0

    obj = ObjectVersion.get(b1, "test", version_id=obj_deleted.version_id)
    assert obj.deleted
    assert obj.file_id is None

    ObjectVersion.create(b1, "test").set_location(
        "b1test4", 1, "achecksum")

    assert ObjectVersion.query.count() == 4
    assert ObjectVersion.get(b1.id, "test") is not None
    assert ObjectVersion.get_by_bucket(b1.id).count() == 1
示例#5
0
def test_object_create(app, db, dummy_location):
    """Test object creation."""
    with db.session.begin_nested():
        b = Bucket.create()
        # Create one object version
        obj1 = ObjectVersion.create(b, "test")
        assert obj1.bucket_id == b.id
        assert obj1.key == 'test'
        assert obj1.version_id
        assert obj1.file_id is None
        assert obj1.is_head is True
        assert obj1.bucket == b

        # Set fake location.
        obj1.set_location("file:///tmp/obj1", 1, "checksum")

        # Create one object version for same object key
        obj2 = ObjectVersion.create(b, "test")
        assert obj2.bucket_id == b.id
        assert obj2.key == 'test'
        assert obj2.version_id != obj1.version_id
        assert obj2.file_id is None
        assert obj2.is_head is True
        assert obj2.bucket == b

        # Set fake location
        obj2.set_location("file:///tmp/obj2", 2, "checksum")

        # Create a new object version for a different object with no location.
        # I.e. it is considered a delete marker.
        obj3 = ObjectVersion.create(b, "deleted_obj")

    # Object __repr__
    assert str(obj1) == \
        "{0}:{1}:{2}".format(obj1.bucket_id, obj1.version_id, obj1.key)

    # Sanity check
    assert ObjectVersion.query.count() == 3

    # Assert that obj2 is the head version
    obj = ObjectVersion.get(b.id, "test", version_id=obj1.version_id)
    assert obj.version_id == obj1.version_id
    assert obj.is_head is False
    obj = ObjectVersion.get(b.id, "test", version_id=obj2.version_id)
    assert obj.version_id == obj2.version_id
    assert obj.is_head is True
    # Assert that getting latest version gets obj2
    obj = ObjectVersion.get(b.id, "test")
    assert obj.version_id == obj2.version_id
    assert obj.is_head is True

    # Assert that obj3 is not retrievable (without specifying version id).
    assert ObjectVersion.get(b.id, "deleted_obj") is None
    # Assert that obj3 *is* retrievable (when specifying version id).
    assert \
        ObjectVersion.get(b.id, "deleted_obj", version_id=obj3.version_id) == \
        obj3
示例#6
0
def test_object_create(app, db, dummy_location):
    """Test object creation."""
    with db.session.begin_nested():
        b = Bucket.create()
        # Create one object version
        obj1 = ObjectVersion.create(b, "test")
        assert obj1.bucket_id == b.id
        assert obj1.key == 'test'
        assert obj1.version_id
        assert obj1.file_id is None
        assert obj1.is_head is True
        assert obj1.bucket == b

        # Set fake location.
        obj1.set_location("file:///tmp/obj1", 1, "checksum")

        # Create one object version for same object key
        obj2 = ObjectVersion.create(b, "test")
        assert obj2.bucket_id == b.id
        assert obj2.key == 'test'
        assert obj2.version_id != obj1.version_id
        assert obj2.file_id is None
        assert obj2.is_head is True
        assert obj2.bucket == b

        # Set fake location
        obj2.set_location("file:///tmp/obj2", 2, "checksum")

        # Create a new object version for a different object with no location.
        # I.e. it is considered a delete marker.
        obj3 = ObjectVersion.create(b, "deleted_obj")

    # Object __repr__
    assert str(obj1) == \
        "{0}:{1}:{2}".format(obj1.bucket_id, obj1.version_id, obj1.key)

    # Sanity check
    assert ObjectVersion.query.count() == 3

    # Assert that obj2 is the head version
    obj = ObjectVersion.get(b.id, "test", version_id=obj1.version_id)
    assert obj.version_id == obj1.version_id
    assert obj.is_head is False
    obj = ObjectVersion.get(b.id, "test", version_id=obj2.version_id)
    assert obj.version_id == obj2.version_id
    assert obj.is_head is True
    # Assert that getting latest version gets obj2
    obj = ObjectVersion.get(b.id, "test")
    assert obj.version_id == obj2.version_id
    assert obj.is_head is True

    # Assert that obj3 is not retrievable (without specifying version id).
    assert ObjectVersion.get(b.id, "deleted_obj") is None
    # Assert that obj3 *is* retrievable (when specifying version id).
    assert \
        ObjectVersion.get(b.id, "deleted_obj", version_id=obj3.version_id) == \
        obj3
示例#7
0
def attach_file(file_id, pid_type1, pid_value1, key1, pid_type2, pid_value2,
                key2):
    """Attach a file to a record or deposit.

    You must provide the information which will determine the first file, i.e.:
    either 'file-id' OR 'pid-type1', 'pid-value1' and 'key1'.
    Additionally you need to specify the information on the target
    record/deposit, i.e.: 'pid-type2', 'pid-value2' and 'key2'.
    """
    assert ((file_id or (pid_type1 and pid_value1 and key1))
            and (pid_type2 and pid_value2 and key2))

    msg = u"PID type must be 'recid' or 'depid'."
    if pid_type1:
        assert pid_type1 in ('recid', 'depid', ), msg
    assert pid_type2 in ('recid', 'depid', ), msg

    if not file_id:
        resolver = record_resolver if pid_type1 == 'recid' \
            else deposit_resolver
        pid1, record1 = resolver.resolve(pid_value1)
        bucket1 = record1.files.bucket

        obj1 = ObjectVersion.get(bucket1, key1)
        if obj1 is None:
            click.echo(click.style(u'File with key "{key}" not found.'.format(
                key=key1), fg='red'))
            return
        file_id = obj1.file.id

    resolver = record_resolver if pid_type2 == 'recid' else deposit_resolver
    pid2, record2 = resolver.resolve(pid_value2)
    bucket2 = record2.files.bucket

    obj2 = ObjectVersion.get(bucket2, key2)
    if obj2 is not None:
        click.echo(click.style(u'File with key "{key}" already exists on'
                               u' bucket {bucket}.'.format(
                                   key=key2, bucket=bucket2.id), fg='red'))
        return

    if click.confirm(u'Attaching file "{file_id}" to bucket {bucket2}'
                     u' as "{key2}". Continue?'.format(
                         file_id=file_id, key2=key2,
                         bucket2=bucket2.id)):
        record2.files.bucket.locked = False

        ObjectVersion.create(bucket2, key2, _file_id=file_id)
        if pid_type2 == 'recid':
            record2.files.bucket.locked = True
        record2.files.flush()
        record2.commit()
        db.session.commit()
        click.echo(click.style(u'File attached successfully.', fg='green'))
    else:
        click.echo(click.style(u'Aborted file attaching.', fg='green'))
示例#8
0
文件: cli.py 项目: hachreak/zenodo
def attach_file(file_id, pid_type1, pid_value1, key1, pid_type2, pid_value2,
                key2):
    """Attach a file to a record or deposit.

    You must provide the information which will determine the first file, i.e.:
    either 'file-id' OR 'pid-type1', 'pid-value1' and 'key1'.
    Additionally you need to specify the information on the target
    record/deposit, i.e.: 'pid-type2', 'pid-value2' and 'key2'.
    """
    assert ((file_id or (pid_type1 and pid_value1 and key1))
            and (pid_type2 and pid_value2 and key2))

    msg = u"PID type must be 'recid' or 'depid'."
    if pid_type1:
        assert pid_type1 in ('recid', 'depid', ), msg
    assert pid_type2 in ('recid', 'depid', ), msg

    if not file_id:
        resolver = record_resolver if pid_type1 == 'recid' \
            else deposit_resolver
        pid1, record1 = resolver.resolve(pid_value1)
        bucket1 = record1.files.bucket

        obj1 = ObjectVersion.get(bucket1, key1)
        if obj1 is None:
            click.echo(click.style(u'File with key "{key}" not found.'.format(
                key=key1), fg='red'))
            return
        file_id = obj1.file.id

    resolver = record_resolver if pid_type2 == 'recid' else deposit_resolver
    pid2, record2 = resolver.resolve(pid_value2)
    bucket2 = record2.files.bucket

    obj2 = ObjectVersion.get(bucket2, key2)
    if obj2 is not None:
        click.echo(click.style(u'File with key "{key}" already exists on'
                               u' bucket {bucket}.'.format(
                                   key=key2, bucket=bucket2.id), fg='red'))
        return

    if click.confirm(u'Attaching file "{file_id}" to bucket {bucket2}'
                     u' as "{key2}". Continue?'.format(
                         file_id=file_id, key2=key2,
                         bucket2=bucket2.id)):
        record2.files.bucket.locked = False

        ObjectVersion.create(bucket2, key2, _file_id=file_id)
        if pid_type2 == 'recid':
            record2.files.bucket.locked = True
        record2.files.flush()
        record2.commit()
        db.session.commit()
        click.echo(click.style(u'File attached successfully.', fg='green'))
    else:
        click.echo(click.style(u'Aborted file attaching.', fg='green'))
示例#9
0
def test_download_repo_when_failed_creates_empty_file_object_with_failed_tag(
        deposit, git_repo_tar):
    responses.add(
        responses.GET,
        'https://codeload.github.com/owner/repository/legacy.tar.gz/mybranchsha',  # noqa
        body=git_repo_tar,
        content_type='application/x-gzip',
        headers={
            'Transfer-Encoding': 'chunked',
            'Content-Length': '287'
        },
        stream=True,
        status=400)

    download_repo(
        deposit.id,
        'repositories/github.com/owner/repository/mybranch.tar.gz',
        'https://codeload.github.com/owner/repository/legacy.tar.gz/mybranchsha',  # noqa
        {'Authorization': 'token mysecretsecret'},
    )

    # file object was created
    obj = ObjectVersion.get(
        deposit.files.bucket.id,
        'repositories/github.com/owner/repository/mybranch.tar.gz')

    # but tagged as failed
    tag = obj.tags[0]
    assert tag.key, tag.value == ('status', 'failed')
示例#10
0
def test_download_repo(deposit, git_repo_tar):
    responses.add(
        responses.GET,
        'https://codeload.github.com/owner/repository/legacy.tar.gz/mybranchsha',
        body=git_repo_tar,
        content_type='application/x-gzip',
        headers={
            'Transfer-Encoding': 'chunked',
            'Content-Length': '287'
        },
        stream=True,
        status=200)

    download_repo(
        deposit.id,
        'repositories/github.com/owner/repository/mybranch.tar.gz',
        'https://codeload.github.com/owner/repository/legacy.tar.gz/mybranchsha',  # noqa
        {'Authorization': 'token mysecretsecret'},
    )

    assert responses.calls[0].request.headers[
        'Authorization'] == 'token mysecretsecret'

    obj = ObjectVersion.get(
        deposit.files.bucket.id,
        'repositories/github.com/owner/repository/mybranch.tar.gz')
    tar_obj = tarfile.open(obj.file.uri)
    repo_file_name = tar_obj.getmembers()[1]
    repo_content = tar_obj.extractfile(repo_file_name).read()

    assert repo_content == b'test repo for cap\n'
示例#11
0
def test_download_repo_file_when_failed_creates_empty_file_object_with_failed_tag(
        deposit, file_tar):
    responses.add(
        responses.GET,
        'https://raw.githubusercontent.com/owner/repository/mybranchsha/README.md',  # noqa
        body=file_tar,
        content_type='text/plain',
        headers={
            'Content-Length': '18',
            'Content-Encoding': 'gzip',
            'Content-Type': 'text/plain; charset=utf-8'
        },
        stream=True,
        status=400)

    download_repo_file(
        deposit.id,
        'repositories/github.com/owner/repository/mybranch/README.md',
        'https://raw.githubusercontent.com/owner/repository/mybranchsha/README.md',  # noqa
        18,
        {'Authorization': 'token mysecretsecret'},
    )

    assert responses.calls[0].request.headers[
        'Authorization'] == 'token mysecretsecret'

    # file object was created
    obj = ObjectVersion.get(
        deposit.files.bucket.id,
        'repositories/github.com/owner/repository/mybranch/README.md')

    # but tagged as failed
    tag = obj.tags[0]
    assert tag.key, tag.value == ('status', 'failed')
示例#12
0
def test_update_record(app, db, dummy_location, record_dump, record_db,
                       resolver, record_file):
    """Test update of a record."""
    # Smoke test
    record_db['files'] = [record_file]
    record_db.commit()
    db.session.commit()

    pytest.raises(IntegrityError, RecordIdentifier.insert, 11782)
    # Update record instead of create a new
    RecordDumpLoader.create(record_dump)
    pid, record = resolver.resolve('11782')
    # Basic some test that record exists
    assert record['title']
    assert record.created == datetime(2014, 10, 13, 8, 27, 47)
    # Test that old revisions are kept
    assert len(record.revisions) == 4
    # Test the PIDs are extracted and created
    assert PersistentIdentifier.get('doi', '10.5281/zenodo.11782')

    assert Bucket.query.count() == 1
    assert ObjectVersion.query.filter_by(is_head=True).count() == 1
    assert FileInstance.query.count() == 2

    assert len(record['files']) == 1
    f = record['files'][0]
    obj = ObjectVersion.get(f['bucket'], f['filename'])
    assert obj.file.checksum != record_file['checksum']
    assert obj.file.size != record_file['size']
def test_import_record(app, db, dummy_location, record_dump, records_json,
                       resolver):
    """Test import record celery task."""
    assert RecordMetadata.query.count() == 0
    import_record(records_json[0], source_type='json')
    assert RecordMetadata.query.count() == 1
    pid, record = resolver.resolve('11782')
    assert record['_collections'] == []
    assert len(record['_files']) == 1
    assert ObjectVersion.get(
        record['_files'][0]['bucket'], record['_files'][0]['key'])

    import_record(records_json[1], source_type='marcxml')
    assert RecordMetadata.query.count() == 2
    pid, record = resolver.resolve('10')
    assert record['_collections'] == [
        "ALEPH Papers",
        "Articles & Preprints",
        "Experimental Physics (EP)",
        "CERN Divisions",
        "Atlantis Institute of Fictive Science",
        "CERN Experiments",
        "Preprints",
        "ALEPH",
    ]
    assert len(record['_files']) == 2
示例#14
0
def test_import_record(app, db, dummy_location, record_dump, records_json,
                       resolver):
    """Test import record celery task."""
    assert RecordMetadata.query.count() == 0
    import_record(records_json[0], source_type='json')
    assert RecordMetadata.query.count() == 1
    pid, record = resolver.resolve('11782')
    assert record['_collections'] == []
    assert len(record['_files']) == 1
    assert ObjectVersion.get(record['_files'][0]['bucket'],
                             record['_files'][0]['key'])

    import_record(records_json[1], source_type='marcxml')
    assert RecordMetadata.query.count() == 2
    pid, record = resolver.resolve('10')
    assert record['_collections'] == [
        "ALEPH Papers",
        "Articles & Preprints",
        "Experimental Physics (EP)",
        "CERN Divisions",
        "Atlantis Institute of Fictive Science",
        "CERN Experiments",
        "Preprints",
        "ALEPH",
    ]
    assert len(record['_files']) == 2
示例#15
0
def file_version_update():
    """Bulk delete items and index trees."""
    # Only allow authorised users to update object version
    from invenio_files_rest.permissions import has_update_version_role
    if has_update_version_role(current_user):

        bucket_id = request.values.get('bucket_id')
        key = request.values.get('key')
        version_id = request.values.get('version_id')
        is_show = request.values.get('is_show')
        if bucket_id is not None and key is not None and version_id is not None:
            from invenio_files_rest.models import ObjectVersion
            object_version = ObjectVersion.get(bucket=bucket_id,
                                               key=key,
                                               version_id=version_id)
            if object_version is not None:
                # Do update the path on record
                object_version.is_show = True if is_show == '1' else False
                db.session.commit()

                return jsonify({'status': 1})
            else:
                return jsonify({'status': 0, 'msg': 'Version not found'})
        else:
            return jsonify({'status': 0, 'msg': 'Invalid data'})
    else:
        return jsonify({'status': 0, 'msg': 'Insufficient permission'})
示例#16
0
 def commit_file(self, id, file_key, identity, record):
     """Commit file handler."""
     # TODO: Add other checks here (e.g. verify checksum, S3 upload)
     file_obj = ObjectVersion.get(record.bucket.id, file_key)
     if not file_obj:
         raise Exception(f'File with key {file_key} not uploaded yet.')
     record.files[file_key] = file_obj
示例#17
0
def test_download_file_branch(client, db, get_git_attributes, json_headers,
                              git_url, git, git_record):
    owner, deposit, pid, bucket, headers = get_git_attributes
    data = {
        'url': git_url,
        'type': 'url',
        'for_download': True,
        'for_connection': False
    }

    if get_access_token(git) is None:
        pytest.skip("No access token found for Git integration. Skipping.")

    resp = client.post('/deposits/{}/actions/upload'.format(pid),
                       headers=headers + json_headers,
                       data=json.dumps(data))
    assert resp.status_code == 201

    resp = client.get('/deposits/{}/files'.format(pid), headers=headers)
    assert resp.status_code == 200

    obj = ObjectVersion.get(bucket.id, git_record)
    open_file = open(obj.file.uri)
    repo_content = open_file.read()
    assert repo_content == 'test repo for cap - branch\n'
示例#18
0
def test_download_filename_should_not_be_renamed(location):
    """Test files not renamed when the file to download is not a slave."""
    bucket = Bucket.create(location)
    _fill_bucket_with_files(bucket)

    obj = ObjectVersion.get(bucket, _MASTER_FILENAME)
    on_download_rename_file(None, obj)
    assert obj.key == _MASTER_FILENAME

    obj = ObjectVersion.get(bucket, _SUBTITLE_FILENAME)
    on_download_rename_file(None, obj)
    assert obj.key == _SUBTITLE_FILENAME

    obj = ObjectVersion.get(bucket, _EXTRA_FILENAME)
    on_download_rename_file(None, obj)
    assert obj.key == _EXTRA_FILENAME
示例#19
0
    def download_record(cls, record, bucket, key, version_id, usr='******'):
        """Download a record.

        :param record: the record object from invenio_records_files
        :param bucket: the record's 'bucket.
        :param key: the record's key.
        :param version_id: the record's version id.
        :param usr: a string that identify the current user
        """
        obj_version = ObjectVersion.get(bucket, key, version_id)
        pid = PersistentIdentifier.get('recid', record['id'])

        current_app.logger.info("Download file= " + record['title'] +
                                ", requested by user= " + usr)

        # Send file
        return ObjectResource.send_object(
            bucket,
            obj_version,
            expected_chksum=obj_version.file.checksum,
            logger_data={
                'bucket_id': bucket,
                'pid_type': pid.pid_type,
                'pid_value': pid.pid_value,
            },
            as_attachment=True)
示例#20
0
def test_download_gitlab_archive_private(client, db, get_git_attributes,
                                         json_headers):
    owner, deposit, pid, bucket, headers = get_git_attributes
    data = {
        'url': 'https://gitlab.cern.ch/analysispreservation/test-private-repo',
        'type': 'repo',
        'for_download': True,
        'for_connection': False
    }

    if get_access_token('GITLAB') is None:
        pytest.skip("No access token found for Git integration. Skipping.")

    resp = client.post('/deposits/{}/actions/upload'.format(pid),
                       headers=headers + json_headers,
                       data=json.dumps(data))
    assert resp.status_code == 201

    resp = client.get('/deposits/{}/files'.format(pid), headers=headers)
    assert resp.status_code == 200

    obj = ObjectVersion.get(
        bucket.id, 'analysispreservation_test-private-repo_master.tar.gz')
    tar_obj = tarfile.open(obj.file.uri)
    repo_file_name = tar_obj.getmembers()[1]
    repo_content = tar_obj.extractfile(repo_file_name).read()

    assert repo_content == 'test repo for cap'
示例#21
0
def test_download_repo_file(deposit, file_tar):
    responses.add(
        responses.GET,
        'https://raw.githubusercontent.com/owner/repository/mybranchsha/README.md',  # noqa
        body=file_tar,
        content_type='text/plain',
        headers={
            'Content-Length': '18',
            'Content-Encoding': 'gzip',
            'Content-Type': 'text/plain; charset=utf-8'
        },
        stream=True,
        status=200)

    download_repo_file(
        deposit.id,
        'repositories/github.com/owner/repository/mybranch/README.md',
        'https://raw.githubusercontent.com/owner/repository/mybranchsha/README.md',  # noqa
        18,
        {'Authorization': 'token mysecretsecret'},
    )

    assert responses.calls[0].request.headers[
        'Authorization'] == 'token mysecretsecret'

    obj = ObjectVersion.get(
        deposit.files.bucket.id,
        'repositories/github.com/owner/repository/mybranch/README.md')
    open_file = open(obj.file.uri)
    repo_content = open_file.read()
    assert repo_content == 'test repo for cap\n'
def test_migrate_file(app, db, dummy_location, extra_location, bucket,
                      objects):
    """Test file migration."""
    obj = objects[0]

    # Test pre-condition
    old_uri = obj.file.uri
    assert exists(old_uri)
    assert old_uri == join(dummy_location.uri, str(obj.file.id)[0:2],
                           str(obj.file.id)[2:4], str(obj.file.id)[4:], 'data')
    assert FileInstance.query.count() == 4

    # Migrate file
    with patch('invenio_files_rest.tasks.verify_checksum') as verify_checksum:
        migrate_file(
            obj.file_id, location_name=extra_location.name,
            post_fixity_check=True)
        assert verify_checksum.delay.called

    # Get object again
    obj = ObjectVersion.get(bucket, obj.key)
    new_uri = obj.file.uri
    assert exists(old_uri)
    assert exists(new_uri)
    assert new_uri != old_uri
    assert FileInstance.query.count() == 5
示例#23
0
def index_attachments(sender, json=None, record=None,
                      index=None, doc_type=None):
    """Load and index attached files for given record.

    It iterates over ``_files`` field in ``record`` and checks if
    ``_attachment`` subfiled has been configured with following values:

    * ``True``/``False`` simply enables/disables automatic fulltext indexing
      for given file instance;
    * Alternativelly, one can provide a ``dict`` instance with all
      configuration options as defined in Elasticsearch guide on
      https://www.elastic.co/guide/en/elasticsearch/ search for
      mapper-attachment.

    .. note::
       Make sure that ``mapper-attachment`` plugin is installed and running
       in Elasticsearch when using this signal handler.
    """
    for index, data in enumerate(record['_files']):
        attachment = json['_files'][index].pop('_attachment', None)
        if attachment:
            obj = ObjectVersion.get(data['bucket'], data['key'],
                                    version_id=data.get('version_id'))
            attachment = attachment if isinstance(attachment, dict) else {}
            attachment.setdefault('_content', base64.b64encode(
                obj.file.storage().open().read()
            ).decode('utf-8'))
            json['_files'][index]['_attachment'] = attachment
示例#24
0
def test_download_archive_branch(client, db, get_git_attributes, json_headers,
                                 git_url, git, git_record):
    """Given a git url, check if the link correctly identifies the repo, downloads its data,
       and then CAP is able to retrieve them from a bucket.
    """
    owner, deposit, pid, bucket, headers = get_git_attributes
    data = {
        'url': git_url,
        'type': 'repo',
        'for_download': True,
        'for_connection': False
    }

    if get_access_token(git) is None:
        pytest.skip("No access token found for Git integration. Skipping.")

    resp = client.post('/deposits/{}/actions/upload'.format(pid),
                       headers=headers + json_headers,
                       data=json.dumps(data))
    assert resp.status_code == 201

    resp = client.get('/deposits/{}/files'.format(pid), headers=headers)
    assert resp.status_code == 200

    obj = ObjectVersion.get(bucket.id, git_record)
    tar_obj = tarfile.open(obj.file.uri)
    repo_file_name = tar_obj.getmembers()[1]
    repo_content = tar_obj.extractfile(repo_file_name).read()

    assert repo_content == 'test repo for cap - branch\n'
def test_new_record(app, db, dummy_location, record_dumps, resolver):
    """Test creation of new record."""
    RecordDumpLoader.create(record_dumps)
    pid, record = resolver.resolve('11783')
    created = datetime(2011, 10, 13, 8, 27, 47)
    # Basic some test that record exists
    assert record['title']
    assert record.created == created
    # Test that this is a completely new record
    assert len(record.revisions) == 3

    # check revisions
    assert record.revisions[2].created == created
    assert record.revisions[2].updated == datetime(2012, 10, 13, 8, 27, 47)
    assert record.revisions[1].created == created
    assert record.revisions[1].updated == datetime(2012, 10, 13, 8, 27, 47)
    assert record.revisions[0].created == created
    assert record.revisions[0].updated == datetime(2011, 10, 13, 8, 27, 47)

    pytest.raises(IntegrityError, RecordIdentifier.insert, 11783)
    # Test the PIDs are extracted and created
    assert PersistentIdentifier.get('doi', '10.5281/zenodo.11783')

    assert len(record['_files']) == 1
    f = record['_files'][0]
    obj = ObjectVersion.get(f['bucket'], f['key'])
    assert obj.file.checksum == f['checksum']
    assert obj.file.size == f['size']

    assert BucketTag.get_value(f['bucket'], 'record') == str(record.id)
def test_delete_versions(client, db, bucket, versions, permissions, user,
                         expected):
    """Test deleting an object."""
    login_user(client, permissions[user])
    for obj in versions:
        # Valid delete
        resp = client.delete(
            url_for(
                'invenio_files_rest.object_api',
                bucket_id=bucket.id,
                key=obj.key,
                versionId=obj.version_id,
            ))
        assert resp.status_code == expected
        if resp.status_code == 204:
            assert not ObjectVersion.get(
                bucket.id, obj.key, version_id=obj.version_id)

        # Invalid object
        assert client.delete(
            url_for('invenio_files_rest.object_api',
                    bucket_id=bucket.id,
                    key=obj.key,
                    versionId='deadbeef-65bd-4d9b-93e2-ec88cc59aec5')
        ).status_code == 404
示例#27
0
文件: cli.py 项目: xbee/zenodo
def add_file(recid, fp, replace_existing):
    """Add a new file to a published record."""
    pid, record = record_resolver.resolve(recid)
    bucket = record.files.bucket
    key = os.path.basename(fp.name)

    obj = ObjectVersion.get(bucket, key)
    if obj is not None and not replace_existing:
        click.echo(
            click.style(
                u'File with key "{key}" already exists.'
                u' Use `--replace-existing/-f` to overwrite it.'.format(
                    key=key, recid=recid),
                fg='red'))
        return

    fp.seek(SEEK_SET, SEEK_END)
    size = fp.tell()
    fp.seek(SEEK_SET)

    click.echo(u'Will add the following file:\n')
    click.echo(
        click.style(u'  key: "{key}"\n'
                    u'  bucket: {bucket}\n'
                    u'  size: {size}\n'
                    u''.format(key=key, bucket=bucket.id, size=size),
                    fg='green'))
    click.echo(u'to record:\n')
    click.echo(
        click.style(u'  Title: "{title}"\n'
                    u'  RECID: {recid}\n'
                    u'  UUID: {uuid}\n'
                    u''.format(recid=record['recid'],
                               title=record['title'],
                               uuid=record.id),
                    fg='green'))
    if replace_existing and obj is not None:
        click.echo(u'and remove the file:\n')
        click.echo(
            click.style(u'  key: "{key}"\n'
                        u'  bucket: {bucket}\n'
                        u'  size: {size}\n'
                        u''.format(key=obj.key,
                                   bucket=obj.bucket,
                                   size=obj.file.size),
                        fg='green'))

    if click.confirm(u'Continue?'):
        bucket.locked = False
        if obj is not None and replace_existing:
            ObjectVersion.delete(bucket, obj.key)
        ObjectVersion.create(bucket, key, stream=fp, size=size)
        bucket.locked = True

        record.files.flush()
        record.commit()
        db.session.commit()
        click.echo(click.style(u'File added successfully.', fg='green'))
    else:
        click.echo(click.style(u'File addition aborted.', fg='green'))
示例#28
0
    def post(self, pid, record, **kwargs):
        """Send a signal to count record view for the record stats."""
        factory = RecordPermission(record, "read")
        if not factory.is_public() and not backoffice_permission().can():
            if not current_user.is_authenticated:
                abort(401)
            abort(403)

        data = request.get_json()
        event_name = data.get("event")
        if event_name == "record-view":
            record_viewed.send(
                current_app._get_current_object(),
                pid=pid,
                record=record,
            )
            return self.make_response(pid, record, 202)
        elif event_name == "file-download":
            if "key" not in data:
                abort(406, "File key is required")
            if "bucket_id" not in record:
                abort(406, "Record has no bucket")
            obj = ObjectVersion.get(record["bucket_id"], data["key"])
            file_downloaded.send(current_app._get_current_object(),
                                 obj=obj,
                                 record=record)
            return self.make_response(pid, record, 202)
        return StatsError(
            description="Invalid stats event request: {}".format(event_name))
示例#29
0
def test_bucket_writer(writer):
    """Test bucket writer."""
    writer.open()
    assert writer.obj.file_id is None
    writer.write(BytesIO(b'this is a test'))
    writer.close()
    assert ObjectVersion.get(writer.bucket_id, writer.key).file_id is not None
def test_new_record(app, db, dummy_location, record_dumps, resolver):
    """Test creation of new record."""
    RecordDumpLoader.create(record_dumps)
    pid, record = resolver.resolve('11783')
    created = datetime(2011, 10, 13, 8, 27, 47)
    # Basic some test that record exists
    assert record['title']
    assert record.created == created
    # Test that this is a completely new record
    assert len(record.revisions) == 3

    # check revisions
    assert record.revisions[2].created == created
    assert record.revisions[2].updated == datetime(2012, 10, 13, 8, 27, 47)
    assert record.revisions[1].created == created
    assert record.revisions[1].updated == datetime(2012, 10, 13, 8, 27, 47)
    assert record.revisions[0].created == created
    assert record.revisions[0].updated == datetime(2011, 10, 13, 8, 27, 47)

    pytest.raises(IntegrityError, RecordIdentifier.insert, 11783)
    # Test the PIDs are extracted and created
    assert PersistentIdentifier.get('doi', '10.5281/zenodo.11783')

    assert len(record['_files']) == 1
    f = record['_files'][0]
    obj = ObjectVersion.get(f['bucket'], f['key'])
    assert obj.file.checksum == f['checksum']
    assert obj.file.size == f['size']

    assert BucketTag.get_value(f['bucket'], 'record') == str(record.id)
示例#31
0
def index_attachments(sender,
                      json=None,
                      record=None,
                      index=None,
                      doc_type=None):
    """Load and index attached files for given record.

    It iterates over ``_files`` field in ``record`` and checks if
    ``_attachment`` subfiled has been configured with following values:

    * ``True``/``False`` simply enables/disables automatic fulltext indexing
      for given file instance;
    * Alternativelly, one can provide a ``dict`` instance with all
      configuration options as defined in Elasticsearch guide on
      https://www.elastic.co/guide/en/elasticsearch/ search for
      mapper-attachment.

    .. note::
       Make sure that ``mapper-attachment`` plugin is installed and running
       in Elasticsearch when using this signal handler.
    """
    for index, data in enumerate(record['_files']):
        attachment = json['_files'][index].pop('_attachment', None)
        if attachment:
            obj = ObjectVersion.get(data['bucket'],
                                    data['key'],
                                    version_id=data.get('version_id'))
            attachment = attachment if isinstance(attachment, dict) else {}
            attachment.setdefault(
                '_content',
                base64.b64encode(
                    obj.file.storage().open().read()).decode('utf-8'))
            json['_files'][index]['_attachment'] = attachment
示例#32
0
def test_migrate_file(app, db, dummy_location, extra_location, bucket,
                      objects):
    """Test file migration."""
    obj = objects[0]

    # Test pre-condition
    old_uri = obj.file.uri
    assert exists(old_uri)
    assert old_uri == join(dummy_location.uri,
                           str(obj.file.id)[0:2],
                           str(obj.file.id)[2:4],
                           str(obj.file.id)[4:], 'data')
    assert FileInstance.query.count() == 4

    # Migrate file
    with patch('invenio_files_rest.tasks.verify_checksum') as verify_checksum:
        migrate_file(obj.file_id,
                     location_name=extra_location.name,
                     post_fixity_check=True)
        assert verify_checksum.delay.called

    # Get object again
    obj = ObjectVersion.get(bucket, obj.key)
    new_uri = obj.file.uri
    assert exists(old_uri)
    assert exists(new_uri)
    assert new_uri != old_uri
    assert FileInstance.query.count() == 5
示例#33
0
    def delete(self, pid, record, files, file_rec, multipart_config, key,
               upload_id):
        if multipart_config['upload_id'] != upload_id:
            abort(404)

        before_upload_abort.send(file_rec,
                                 record=record,
                                 file=file_rec,
                                 multipart_config=multipart_config)

        res = current_s3.client.abort_multipart_upload(
            bucket=multipart_config['bucket'],
            key=multipart_config['key'],
            upload_id=upload_id)

        with db.session.begin_nested():
            delete_file_object_version(file_rec.bucket, file_rec.obj)
            head = ObjectVersion.get(file_rec.bucket, key)
            if not head:
                del files.filesmap[key]

            files.flush()
            record.commit()

        db.session.commit()

        after_upload_abort.send(file_rec, record=record, file=file_rec)

        return jsonify({})
示例#34
0
def test_download_file(mock_git_api, client, db, get_git_attributes,
                       json_headers):
    owner, deposit, pid, bucket, headers = get_git_attributes
    data = {'url': FILE, 'type': 'file', 'download': True, 'webhook': False}

    responses.add(responses.GET,
                  'https://gitlab.cern.ch/file',
                  body=FILE_BODY,
                  content_type='text/plain',
                  headers={
                      'Content-Length': '12',
                      'Content-Type': 'text/plain; charset=utf-8',
                  },
                  stream=True,
                  status=200)

    resp = client.post('/deposits/{}/actions/upload'.format(pid),
                       headers=headers + json_headers,
                       data=json.dumps(data))
    assert resp.status_code == 201

    resp = client.get('/deposits/{}/files'.format(pid), headers=headers)
    assert resp.status_code == 200

    obj = ObjectVersion.get(
        bucket.id,
        'repositories/gitlab.cern.ch/pfokiano/test-repo/master/README.md')
    open_file = open(obj.file.uri)
    repo_content = open_file.read()
    assert repo_content == FILE_BODY
示例#35
0
def test_bucket_sync_same_object(app, db, dummy_location):
    """Test that an exiting file in src and dest is not changed."""
    b1 = Bucket.create()
    b2 = Bucket.create()
    ObjectVersion.create(b1, "filename").set_location("b1v1", 1, "achecksum")
    b1.sync(b2)
    db.session.commit()

    b1_version_id = ObjectVersion.get(b1, "filename").version_id
    b2_version_id = ObjectVersion.get(b2, "filename").version_id

    b1.sync(b2)

    assert ObjectVersion.get_by_bucket(b1).count() == 1
    assert ObjectVersion.get_by_bucket(b2).count() == 1
    assert ObjectVersion.get(b1, "filename").version_id == b1_version_id
    assert ObjectVersion.get(b2, "filename").version_id == b2_version_id
示例#36
0
def test_object_get_by_bucket(app, db, dummy_location):
    """Test object listing."""
    b1 = Bucket.create()
    b2 = Bucket.create()

    # First version of object
    obj1_first = ObjectVersion.create(b1, "test")
    obj1_first.set_location("b1test1", 1, "achecksum")
    # Intermediate version which is a delete marker.
    obj1_intermediate = ObjectVersion.create(b1, "test")
    obj1_intermediate.set_location("b1test2", 1, "achecksum")
    # Latest version of object
    obj1_latest = ObjectVersion.create(b1, "test")
    obj1_latest.set_location("b1test3", 1, "achecksum")
    # Create objects in/not in same bucket using different key.
    ObjectVersion.create(b1, "another").set_location(
        "b1another1", 1, "achecksum")
    ObjectVersion.create(b2, "test").set_location("b2test1", 1, "achecksum")
    db.session.commit()

    # Sanity check
    assert ObjectVersion.query.count() == 5
    assert ObjectVersion.get(b1, "test")
    assert ObjectVersion.get(b1, "another")
    assert ObjectVersion.get(b2, "test")

    # Retrieve objects for a bucket with/without versions
    assert ObjectVersion.get_by_bucket(b1).count() == 2
    assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 4
    assert ObjectVersion.get_by_bucket(b2).count() == 1
    assert ObjectVersion.get_by_bucket(b2, versions=True).count() == 1

    # Assert order of returned objects (alphabetical)
    objs = ObjectVersion.get_by_bucket(b1.id).all()
    assert objs[0].key == "another"
    assert objs[1].key == "test"

    # Assert order of returned objects verions (creation date ascending)
    objs = ObjectVersion.get_by_bucket(b1.id, versions=True).all()
    assert objs[0].key == "another"
    assert objs[1].key == "test"
    assert objs[1].version_id == obj1_latest.version_id
    assert objs[2].key == "test"
    assert objs[2].version_id == obj1_intermediate.version_id
    assert objs[3].key == "test"
    assert objs[3].version_id == obj1_first.version_id
示例#37
0
def test_object_get_by_bucket(app, db, dummy_location):
    """Test object listing."""
    b1 = Bucket.create()
    b2 = Bucket.create()

    # First version of object
    obj1_first = ObjectVersion.create(b1, "test")
    obj1_first.set_location("b1test1", 1, "achecksum")
    # Intermediate version which is a delete marker.
    obj1_intermediate = ObjectVersion.create(b1, "test")
    obj1_intermediate.set_location("b1test2", 1, "achecksum")
    # Latest version of object
    obj1_latest = ObjectVersion.create(b1, "test")
    obj1_latest.set_location("b1test3", 1, "achecksum")
    # Create objects in/not in same bucket using different key.
    ObjectVersion.create(b1, "another").set_location(
        "b1another1", 1, "achecksum")
    ObjectVersion.create(b2, "test").set_location("b2test1", 1, "achecksum")
    db.session.commit()

    # Sanity check
    assert ObjectVersion.query.count() == 5
    assert ObjectVersion.get(b1, "test")
    assert ObjectVersion.get(b1, "another")
    assert ObjectVersion.get(b2, "test")

    # Retrieve objects for a bucket with/without versions
    assert ObjectVersion.get_by_bucket(b1).count() == 2
    assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 4
    assert ObjectVersion.get_by_bucket(b2).count() == 1
    assert ObjectVersion.get_by_bucket(b2, versions=True).count() == 1

    # Assert order of returned objects (alphabetical)
    objs = ObjectVersion.get_by_bucket(b1.id).all()
    assert objs[0].key == "another"
    assert objs[1].key == "test"

    # Assert order of returned objects verions (creation date ascending)
    objs = ObjectVersion.get_by_bucket(b1.id, versions=True).all()
    assert objs[0].key == "another"
    assert objs[1].key == "test"
    assert objs[1].version_id == obj1_latest.version_id
    assert objs[2].key == "test"
    assert objs[2].version_id == obj1_intermediate.version_id
    assert objs[3].key == "test"
    assert objs[3].version_id == obj1_first.version_id
示例#38
0
def test_get_webhook_event_view_when_release_event(m_gitlab, deposit, client,
                                                   gitlab_release_webhook_sub,
                                                   git_repo_tar):
    class MockBranchManager:
        def get(self, name):
            m = Mock(commit=dict(id='mybranchsha'))
            m.name = 'mybranch'
            return m

    class MockProjectManager:
        def get(self, name, lazy):
            return Mock(branches=MockBranchManager(), id='12345')

    m_gitlab.return_value = Mock(projects=MockProjectManager())
    responses.add(responses.GET, (
        'https://gitlab.cern.ch/api/v4/projects/12345/repository/archive?sha=mybranchsha'
    ),
                  body=git_repo_tar,
                  content_type='application/octet_stream',
                  headers={
                      'Transfer-Encoding': 'binary',
                      'Content-Length': '287'
                  },
                  stream=True,
                  status=200)

    resp = client.post('/repos/event',
                       headers=tag_push_headers,
                       data=json.dumps(tag_push_payload_shortened))

    assert resp.status_code == 200
    assert resp.json == {'message': 'Snapshot of repository was saved.'}
    assert responses.calls[0].request.headers['Private-Token'] == 'some-token'

    obj = ObjectVersion.get(
        deposit.files.bucket.id,
        'repositories/gitlab.cern.ch/owner_name/myrepository/v3.0.0.tar.gz')
    tar_obj = tarfile.open(obj.file.uri)
    repo_file_name = tar_obj.getmembers()[1]
    repo_content = tar_obj.extractfile(repo_file_name).read()

    assert repo_content == b'test repo for cap\n'

    snapshot = gitlab_release_webhook_sub.snapshots[0]
    assert obj.snapshot_id == snapshot.id
    assert GitSnapshot.query.count() == 1
    assert snapshot.payload == {
        'event_type': 'release',
        'author': {
            'name': 'owner_name',
            'id': 1
        },
        'link': 'https://gitlab.com/owner_name/myrepository/tags/v3.0.0',
        'release': {
            'tag': 'v3.0.0',
            'name': 'My release'
        }
    }
示例#39
0
    def get_version(self, version_id=None):
        """Return specific version ``ObjectVersion`` instance or HEAD.

        :param version_id: Version ID of the object.
        :returns: :class:`~invenio_files_rest.models.ObjectVersion` instance or
            HEAD of the stored object.
        """
        return ObjectVersion.get(bucket=self.obj.bucket, key=self.obj.key,
                                 version_id=version_id)
示例#40
0
def test_object_remove_marker(app, db, bucket, objects):
    """Test object remove."""
    obj = objects[0]
    assert ObjectVersion.query.count() == 4
    obj = ObjectVersion.delete(obj.bucket, obj.key)
    db.session.commit()
    assert ObjectVersion.query.count() == 5
    obj = ObjectVersion.get(obj.bucket, obj.key, version_id=obj.version_id)
    obj.remove()
    assert ObjectVersion.query.count() == 4
def test_import_record(app, db, dummy_location, record_dump, records_json,
                       resolver):
    """Test import record celery task."""
    assert RecordMetadata.query.count() == 0
    import_record(records_json[0], source_type='json')
    assert RecordMetadata.query.count() == 1
    pid, record = resolver.resolve('11782')
    assert len(record['files']) == 1
    assert ObjectVersion.get(
        record['files'][0]['bucket'], record['files'][0]['filename'])
示例#42
0
def test_object_multibucket(app, db, dummy_location):
    """Test object creation in multiple buckets."""
    with db.session.begin_nested():
        # Create two buckets each with an object using the same key
        b1 = Bucket.create()
        b2 = Bucket.create()
        obj1 = ObjectVersion.create(b1, "test")
        obj1.set_location("file:///tmp/obj1", 1, "checksum")
        obj2 = ObjectVersion.create(b2, "test")
        obj2.set_location("file:///tmp/obj2", 2, "checksum")

    # Sanity check
    assert ObjectVersion.query.count() == 2

    # Assert object versions are correctly created in each bucket.
    obj = ObjectVersion.get(b1.id, "test")
    assert obj.is_head is True
    assert obj.version_id == obj1.version_id
    obj = ObjectVersion.get(b2.id, "test")
    assert obj.is_head is True
    assert obj.version_id == obj2.version_id
def test_delete(client, db, bucket, objects, permissions, user, expected):
    """Test deleting an object."""
    login_user(client, permissions[user])
    for obj in objects:
        # Valid object
        resp = client.delete(url_for(
            'invenio_files_rest.object_api',
            bucket_id=bucket.id,
            key=obj.key,
        ))
        assert resp.status_code == expected
        if resp.status_code == 204:
            assert not ObjectVersion.get(bucket.id, obj.key)
        else:
            assert ObjectVersion.get(bucket.id, obj.key)

        # Invalid object
        assert client.delete(url_for(
            'invenio_files_rest.object_api',
            bucket_id=bucket.id,
            key='invalid',
        )).status_code == 404
示例#44
0
def test_object_mimetype(app, db, dummy_location):
    """Test object set file."""
    b = Bucket.create()
    db.session.commit()
    obj1 = ObjectVersion.create(b, "test.pdf", stream=BytesIO(b'pdfdata'))
    obj2 = ObjectVersion.create(b, "README", stream=BytesIO(b'pdfdata'))

    assert obj1.mimetype == "application/pdf"
    assert obj2.mimetype == "application/octet-stream"

    # Override computed MIME type.
    obj2.mimetype = "text/plain"
    db.session.commit()
    assert ObjectVersion.get(b, "README").mimetype == "text/plain"
示例#45
0
def __extract_article_text(record):
    # fixme extraction shouldn't happen in article_upload?

    extracted_text = {}

    for file in record.get('_files', ()):
        filetype = file['filetype']
        if filetype in ('pdf', 'pdf/a'):
            path = ObjectVersion.get(file['bucket'], file['key']).file.uri
            try:
                extracted_text[filetype] = extract_text_from_pdf(path).decode('utf-8')
            except PDFSyntaxError as e:
                current_app.logger.error('Error while extracting text from pdf with uri %s: %s' % (path, e))

    return extracted_text
def test_cascade_action_record_delete(app, db, location, record_with_bucket,
                                      generic_file, force,
                                      num_of_recordbuckets):
    """Test cascade action on record delete, with force false."""
    record = record_with_bucket
    record_id = record.id
    bucket_id = record.files.bucket.id

    # check before
    assert len(RecordsBuckets.query.all()) == 1
    assert len(Bucket.query.all()) == 1
    assert len(Bucket.query.filter_by(id=bucket_id).all()) == 1
    assert ObjectVersion.get(bucket=bucket_id, key=generic_file)

    record.delete(force=force)

    # check after
    db.session.expunge(record.model)
    with pytest.raises(NoResultFound):
        record = Record.get_record(record_id)
    assert len(RecordsBuckets.query.all()) == num_of_recordbuckets
    assert len(Bucket.query.all()) == 1
    assert len(Bucket.query.filter_by(id=bucket_id).all()) == 1
    assert ObjectVersion.get(bucket=bucket_id, key=generic_file)
示例#47
0
def create_b2safe_file(external_pids, bucket):
    """Create a FileInstance which contains a PID in its uri."""
    validate_schema(external_pids, {
        'type': 'array',
        'items': {
            'type': 'object',
            'properties': {
                'ePIC_PID': {'type': 'string'},
                'key': {'type': 'string'}
            },
            'additionalProperties': False,
            'required': ['ePIC_PID', 'key']
        }
    })

    keys_list = [e['key'] for e in external_pids]
    keys_set = set(keys_list)
    if len(keys_list) != len(keys_set):
        raise InvalidDepositError([FieldError('external_pids',
            'Field external_pids contains duplicate keys.')])
    for external_pid in external_pids:
        if not external_pid['ePIC_PID'].startswith('http://hdl.handle.net/'):
            external_pid['ePIC_PID'] = 'http://hdl.handle.net/' + \
                external_pid['ePIC_PID']
        if external_pid['key'].startswith('/'):
            raise InvalidDepositError(
                [FieldError('external_pids',
                            'File key cannot start with a "/".')])
        try:
            # Create the file instance if it does not already exist
            file_instance = FileInstance.get_by_uri(external_pid['ePIC_PID'])
            if file_instance is None:
                file_instance = FileInstance.create()
                file_instance.set_uri(
                    external_pid['ePIC_PID'], 1, 0, storage_class='B')
            assert file_instance.storage_class == 'B'
            # Add the file to the bucket if it is not already in it
            current_version = ObjectVersion.get(bucket, external_pid['key'])
            if not current_version or \
                    current_version.file_id != file_instance.id:
                ObjectVersion.create(bucket, external_pid['key'],
                                     file_instance.id)
        except IntegrityError as e:
            raise InvalidDepositError(
                [FieldError('external_pids', 'File URI already exists.')])
def test_new_record(app, db, dummy_location, record_dump, resolver):
    """Test creation of new record."""
    RecordDumpLoader.create(record_dump)
    pid, record = resolver.resolve("11782")
    # Basic some test that record exists
    assert record["title"]
    assert record.created == datetime(2014, 10, 13, 8, 27, 47)
    # Test that this is a completely new record
    assert len(record.revisions) == 2
    pytest.raises(IntegrityError, RecordIdentifier.insert, 11782)
    # Test the PIDs are extracted and created
    assert PersistentIdentifier.get("doi", "10.5281/zenodo.11782")

    assert len(record["_files"]) == 1
    f = record["_files"][0]
    obj = ObjectVersion.get(f["bucket"], f["key"])
    assert obj.file.checksum == f["checksum"]
    assert obj.file.size == f["size"]

    assert BucketTag.get_value(f["bucket"], "record") == str(record.id)
示例#49
0
文件: cli.py 项目: lnielsen/zenodo
def remove_file(recid, key=None, index=None):
    """Remove a file from a publishd record."""
    pid, record = record_resolver.resolve(recid)
    bucket = record.files.bucket
    obj = ObjectVersion.get(bucket, key)
    if obj is None:
        click.echo(click.style(u'File with key "{key}" not found.'.format(
            key=key, recid=recid), fg='red'))
        return

    click.echo(u'Will remove the following file:\n')
    click.echo(click.style(
        u'  key: "{key}"\n'
        u'  {checksum}\n'
        u'  bucket: {bucket}\n'
        u''.format(
            key=key,
            checksum=obj.file.checksum,
            bucket=bucket.id),
        fg='green'))
    click.echo('from record:\n')
    click.echo(click.style(
        u'  Title: "{title}"\n'
        u'  RECID: {recid}\n'
        u'  UUID: {uuid}\n'
        u''.format(
            recid=record['recid'],
            title=record['title'],
            uuid=record.id),
        fg='green'))

    if click.confirm(u'Continue?'):
        bucket.locked = False
        ObjectVersion.delete(bucket, obj.key)
        bucket.locked = True
        record.files.flush()
        record.commit()
        db.session.commit()
        click.echo(click.style(u'File removed successfully.', fg='green'))
    else:
        click.echo(click.style(u'Aborted file removal.', fg='green'))
def upload_to_zenodo(bucket_id, filename):
    """Upload code to zenodo."""
    zenodo_server_url = current_app.config.get('ZENODO_SERVER_URL')
    params = {"access_token": current_app.config.get(
        'ZENODO_ACCESS_TOKEN')}
    filename = filename + '.tar.gz'

    r = requests.post(zenodo_server_url,
                      params=params, json={},
                      )

    file_obj = ObjectVersion.get(bucket_id, filename)
    file = FileInstance.get(file_obj.file_id)

    bucket_url = r.json()['links']['bucket']
    with open(file.uri, 'rb') as fp:
        response = requests.put(
            bucket_url + '/{}'.format(filename),
            data=fp,
            params=params,
        )

    return jsonify({"status": response.status_code})
def test_delete_versions(client, db, bucket, versions, permissions, user,
                         expected):
    """Test deleting an object."""
    login_user(client, permissions[user])
    for obj in versions:
        # Valid delete
        resp = client.delete(url_for(
            'invenio_files_rest.object_api',
            bucket_id=bucket.id,
            key=obj.key,
            versionId=obj.version_id,
        ))
        assert resp.status_code == expected
        if resp.status_code == 204:
            assert not ObjectVersion.get(
                bucket.id, obj.key, version_id=obj.version_id)

        # Invalid object
        assert client.delete(url_for(
            'invenio_files_rest.object_api',
            bucket_id=bucket.id,
            key=obj.key,
            versionId='deadbeef-65bd-4d9b-93e2-ec88cc59aec5'
        )).status_code == 404
def test_part_creation(app, db, bucket, get_md5):
    """Test part creation."""
    assert bucket.size == 0
    mp = MultipartObject.create(bucket, 'test.txt', 5, 2)
    db.session.commit()
    assert bucket.size == 5

    Part.create(mp, 2, stream=BytesIO(b'p'))
    Part.create(mp, 0, stream=BytesIO(b'p1'))
    Part.create(mp, 1, stream=BytesIO(b'p2'))
    db.session.commit()
    assert bucket.size == 5

    mp.complete()
    db.session.commit()
    assert bucket.size == 5

    # Assert checksum of part.
    m = hashlib.md5()
    m.update(b'p2')
    assert "md5:{0}".format(m.hexdigest()) == Part.get_or_none(mp, 1).checksum

    obj = mp.merge_parts()
    db.session.commit()
    assert bucket.size == 5

    assert MultipartObject.query.count() == 0
    assert Part.query.count() == 0

    assert obj.file.size == 5
    assert obj.file.checksum == get_md5(b'p1p2p')
    assert obj.file.storage().open().read() == b'p1p2p'
    assert obj.file.writable is False
    assert obj.file.readable is True

    assert obj.version_id == ObjectVersion.get(bucket, 'test.txt').version_id
 def __getitem__(self, key):
     """Get a specific file."""
     obj = ObjectVersion.get(self.bucket, key)
     if obj:
         return self.file_cls(obj, self.filesmap.get(obj.key, {}))
     raise KeyError(key)
示例#54
0
 def __getitem__(self, key):
     """Get a specific file."""
     obj = ObjectVersion.get(self.bucket, key)
     if obj:
         return FileObject(self.bucket, obj)
     raise KeyError(key)
示例#55
0
 def get_version(self, version_id=None):
     """Return specific version ``ObjectVersion`` instance or HEAD."""
     return ObjectVersion.get(bucket=self.bucket, key=self.obj.key,
                              version_id=version_id)
示例#56
0
    def proc(record):
        rinfo('start...', record)

        if '_files' not in record.json:
            rerror('Skipping. No _files', record)
            return

        xml = filter(lambda x: x['filetype'] == 'xml', record.json['_files'])
        if not xml:
            rerror('Skipping. No xml in _files', record)
            return

        object = ObjectVersion.get(xml[0]['bucket'], xml[0]['key'])
        uri = object.file.uri
        xml = parse(open(uri, 'rt'))
        x_author_groups = xml.getElementsByTagName('ce:author-group')

        if not x_author_groups:
            rerror('Skipping. No author groups.', record)
            return

        if len(x_author_groups) > 1:
            rinfo('Reparse all authors.', record)
            authors = []

            for x_author_group in x_author_groups:
                # skip if not deepest author-group
                if x_author_group.getElementsByTagName('ce:author-group'):
                    continue

                # extract affiliations
                x_affiliations = x_author_group.getElementsByTagName('ce:affiliation')
                affs = []
                for a in x_affiliations:
                    value = a.getElementsByTagName('ce:textfn')[0].childNodes[0].nodeValue
                    affs.append({
                        u'country': find_country(value),
                        u'value': value
                    })

                # extract authors, add affiliations
                x_authors = x_author_group.getElementsByTagName('ce:author')
                for x_author in x_authors:
                    given_name = x_author.getElementsByTagName('ce:given-name')[0].childNodes[0].nodeValue
                    surname = x_author.getElementsByTagName('ce:surname')[0].childNodes[0].nodeValue
                    full_name = '%s, %s' % (surname, given_name)

                    author_affs = []
                    for ref in x_author.getElementsByTagName('ce:cross-ref'):
                        affid = ref.attributes.get('refid').value
                        if 'aff' in affid:
                            aff_value = get_aff_by_id(x_author_group, affid)
                            aff_country = find_country(aff_value)
                            author_affs.append({
                                u'country': aff_country,
                                u'value': aff_value
                            })

                    if not (author_affs or affs):
                        rerror('no affs for author: %s. Skip this record.' % surname, record)
                        return

                    authors.append({
                        'full_name': full_name,
                        'given_name': given_name,
                        'surname': surname,
                        'affiliations': author_affs or affs
                    })

            if authors:
                record.json['authors'] = authors
                flag_modified(record, 'json')
                rinfo('updated', record)
            else:
                rerror('No authors found', record)

        else:
            for x_author_group in x_author_groups:
                x_collaborations = x_author_group.getElementsByTagName('ce:collaboration')
                x_affiliations = x_author_group.getElementsByTagName('ce:affiliation')
                # needed for supporting multiple author groups with author matching, but author matching is not rly possible.
                # authors_in_group = [
                #     (c.getElementsByTagName('ce:given-name')[0].childNodes[0].nodeValue.replace('-', '').title(),
                #      c.getElementsByTagName('ce:surname')[0].childNodes[0].nodeValue.replace('-', '').title())
                #     for c in x_author_group.getElementsByTagName('ce:author')
                # ]

                if 'authors' not in record.json:
                    # Type 1 and 3: has no authors at all. Fix: add collaborations if there are affiliations in xml.
                    rerror('No authors... SKIPPING', record)
                    return

                    # extract collaborations, find countries later
                    # FIXME we should always extract collaborations, but that would cause a lot more problems now.
                    authors = [{'full_name': c.getElementsByTagName('ce:text')[0].childNodes[0].nodeValue} for c in
                               x_collaborations]
                    if authors:
                        rinfo('Collaborations found: %s' % authors, record)
                        record.json['authors'] = authors
                    else:
                        rerror('No collaborations. Not fixable.', record)

                # possibly we added authors in the previous step.
                if 'authors' in record.json:
                    # Type 2 and 4: has authors, but no affiliations.
                    authors = record.json['authors']
                    aff_count = sum(map(lambda x: 'affiliations' in x, authors))
                    if aff_count == 0:
                        # Type 4: No affiliations in data.
                        new_affs = [
                            {u'country': find_country(a.getElementsByTagName('ce:textfn')[0].childNodes[0].nodeValue),
                             u'value': a.getElementsByTagName('ce:textfn')[0].childNodes[0].nodeValue
                             }
                            for a in x_affiliations]
                        if new_affs:
                            rinfo('New affiliations: %s' % new_affs, record)
                            # FIXME modify this, if multiple author groups should be supported
                            # FIXME (not all authors should be updated)!!!
                            # update_authors(record, authors_in_group, new_affs)

                            for i, a in enumerate(record.json.get('authors')):
                                record.json['authors'][i]['affiliations'] = new_affs
                            flag_modified(record, 'json')
                        else:
                            rerror('No affiliations at all. Not fixable.', record)

                    elif aff_count == len(authors):
                        empty_aff_count = sum(map(lambda x: len(x['affiliations']) == 0, authors))
                        if empty_aff_count == len(authors):
                            # Type 2: Only empty affiliations.
                            rinfo('Type 2. Not fixable.', record)
                        else:
                            rerror('Only SOME authors have EMPTY affiliations. What now?', record)
                    else:
                        rerror('Only SOME authors have affiliations. What now?', record)

        rinfo('OK', record)
示例#57
0
文件: cli.py 项目: lnielsen/zenodo
def add_file(recid, fp, replace_existing):
    """Add a new file to a publishd record."""
    pid, record = record_resolver.resolve(recid)
    bucket = record.files.bucket
    key = os.path.basename(fp.name)

    obj = ObjectVersion.get(bucket, key)
    if obj is not None and not replace_existing:
        click.echo(click.style(u'File with key "{key}" already exists.'
                   u' Use `--replace-existing/-f` to overwrite it.'.format(
                        key=key, recid=recid), fg='red'))
        return

    fp.seek(SEEK_SET, SEEK_END)
    size = fp.tell()
    fp.seek(SEEK_SET)

    click.echo(u'Will add the following file:\n')
    click.echo(click.style(
        u'  key: "{key}"\n'
        u'  bucket: {bucket}\n'
        u'  size: {size}\n'
        u''.format(
            key=key,
            bucket=bucket.id,
            size=size),
        fg='green'))
    click.echo(u'to record:\n')
    click.echo(click.style(
        u'  Title: "{title}"\n'
        u'  RECID: {recid}\n'
        u'  UUID: {uuid}\n'
        u''.format(
            recid=record['recid'],
            title=record['title'],
            uuid=record.id),
        fg='green'))
    if replace_existing and obj is not None:
        click.echo(u'and remove the file:\n')
        click.echo(click.style(
            u'  key: "{key}"\n'
            u'  bucket: {bucket}\n'
            u'  size: {size}\n'
            u''.format(
                key=obj.key,
                bucket=obj.bucket,
                size=obj.file.size),
            fg='green'))

    if click.confirm(u'Continue?'):
        bucket.locked = False
        if obj is not None and replace_existing:
            ObjectVersion.delete(bucket, obj.key)
        ObjectVersion.create(bucket, key, stream=fp, size=size)
        bucket.locked = True

        record.files.flush()
        record.commit()
        db.session.commit()
        click.echo(click.style(u'File added successfully.', fg='green'))
    else:
        click.echo(click.style(u'File addition aborted.', fg='green'))