Пример #1
0
def initiate_multipart(context, data_dict):
    """Initiate new Multipart Upload.

    :param context:
    :param data_dict: dict with required keys:
        id: resource's id
        name: filename
        size: filesize

    :returns: MultipartUpload info
    :rtype: dict

    """

    h.check_access('cloudstorage_initiate_multipart', data_dict)
    id, name, size = toolkit.get_or_bust(data_dict, ['id', 'name', 'size'])
    user_id = None
    if context['auth_user_obj']:
        user_id = context['auth_user_obj'].id

    res_dict = toolkit.get_action('resource_show')(context.copy(), {
        'id': data_dict.get('id')
    })
    res_dict['upload_in_progress'] = True
    toolkit.get_action('resource_patch')(context.copy(), res_dict)

    uploader = ResourceCloudStorage({'multipart_name': name})
    res_name = uploader.path_from_filename(id, name)

    upload_object = MultipartUpload.by_name(res_name)

    if upload_object is not None:
        _delete_multipart(upload_object, uploader)
        upload_object = None

    if upload_object is None:
        for old_upload in model.Session.query(MultipartUpload).filter_by(
                resource_id=id):
            _delete_multipart(old_upload, uploader)

        _rindex = res_name.rfind('/')
        if ~_rindex:
            try:
                name_prefix = res_name[:_rindex]
                for cloud_object in uploader.container.iterate_objects():
                    if cloud_object.name.startswith(name_prefix):
                        log.info('Removing cloud object: %s' % cloud_object)
                        cloud_object.delete()
            except Exception as e:
                log.exception('[delete from cloud] %s' % e)

        upload_object = MultipartUpload(
            uploader.driver._initiate_multipart(container=uploader.container,
                                                object_name=res_name), id,
            res_name, size, name, user_id)
        upload_object.save()
    return upload_object.as_dict()
Пример #2
0
    def setup_template_variables(self, context, data_dict):
        #print data_dict["resource"]['url']
        from ckanext.cloudstorage.storage import ResourceCloudStorage
        rcs = ResourceCloudStorage(data_dict["resource"])
        import urllib
        import urlparse
        resource_url = rcs.get_url_from_filename(
            data_dict["resource"]["id"],
            urlparse.urlsplit(
                data_dict["resource"]["url"]).path.split('/')[-1])
        encoded_url = urllib.quote(resource_url)

        return {"resource_url": encoded_url}
Пример #3
0
def clean_multipart(context, data_dict):
    """Clean old multipart uploads.

    :param context:
    :param data_dict:
    :returns: dict with:
        removed - amount of removed uploads.
        total - total amount of expired uploads.
        errors - list of errors raised during deletion. Appears when
        `total` and `removed` are different.
    :rtype: dict

    """

    toolkit.check_access('cloudstorage_clean_multipart', context, data_dict)
    uploader = ResourceCloudStorage({})
    delta = _get_max_multipart_lifetime()
    oldest_allowed = datetime.datetime.utcnow() - delta

    uploads_to_remove = model.Session.query(MultipartUpload).filter(
        MultipartUpload.initiated < oldest_allowed)

    result = {'removed': 0, 'total': uploads_to_remove.count(), 'errors': []}

    for upload in uploads_to_remove:
        try:
            _delete_multipart(upload, uploader)
        except toolkit.ValidationError as e:
            result['errors'].append(e.error_summary)
        else:
            result['removed'] += 1

    return result
Пример #4
0
def upload_multipart(context, data_dict):
    h.check_access('cloudstorage_upload_multipart', data_dict)
    upload_id, part_number, part_content = toolkit.get_or_bust(
        data_dict, ['uploadId', 'partNumber', 'upload'])

    uploader = ResourceCloudStorage({})
    upload = model.Session.query(MultipartUpload).get(upload_id)
    data = bytearray(_get_underlying_file(part_content).read())

    resp = uploader.driver.connection.request(
        _get_object_url(uploader, upload.name),
        params={
            'uploadId': upload_id,
            'partNumber': part_number
        },
        method='PUT',
        data=data,
        headers={'Content-Length': len(data)})

    if resp.status != 200:
        raise toolkit.ValidationError('Upload failed: part %s' % part_number)

    _save_part_info(part_number, resp.headers['etag'], upload)

    return {'partNumber': part_number, 'ETag': resp.headers['etag']}
Пример #5
0
def _migrate(args):
    path = args['<path_to_storage>']
    if not os.path.isdir(path):
        print('The storage directory cannot be found.')
        return

    lc = LocalCKAN()
    resources = {}

    # The resource folder is stuctured like so on disk:
    # - storage/
    #   - ...
    # - resources/
    #   - <3 letter prefix>
    #     - <3 letter prefix>
    #       - <remaining resource_id as filename>
    #       ...
    #     ...
    #   ...
    for root, dirs, files in os.walk(path):
        # Only the bottom level of the tree actually contains any files. We
        # don't care at all about the overall structure.
        if not files:
            continue

        split_root = root.split('/')
        resource_id = split_root[-2] + split_root[-1]

        for file_ in files:
            resources[resource_id + file_] = os.path.join(root, file_)

    for i, resource in enumerate(resources.iteritems(), 1):
        resource_id, file_path = resource
        print('[{i}/{count}] Working on {id}'.format(i=i,
                                                     count=len(resources),
                                                     id=resource_id))

        resource = lc.action.resource_show(id=resource_id)
        if resource['url_type'] != 'upload':
            continue

        with open(os.path.join(root, file_path), 'rb') as fin:
            resource['upload'] = FakeFileStorage(
                fin, resource['url'].split('/')[-1])

            uploader = ResourceCloudStorage(resource)
            uploader.upload(resource['id'])
Пример #6
0
def finish_multipart(context, data_dict):
    """Called after all parts had been uploaded.

    Triggers call to `_commit_multipart` which will convert separate uploaded
    parts into single file

    :param context:
    :param data_dict: dict with required key `uploadId` - id of Multipart Upload that should be finished
    :returns: None
    :rtype: NoneType

    """

    toolkit.check_access('cloudstorage_finish_multipart', context, data_dict)
    upload_id = toolkit.get_or_bust(data_dict, 'uploadId')
    save_action = data_dict.get('save_action', False)
    upload = model.Session.query(MultipartUpload).get(upload_id)
    chunks = [(part.n, part.etag)
              for part in model.Session.query(MultipartPart).filter_by(
                  upload_id=upload_id).order_by(MultipartPart.n)]
    uploader = ResourceCloudStorage({})
    try:
        obj = uploader.container.get_object(upload.name)
        obj.delete()
    except Exception:
        pass
    uploader.driver._commit_multipart(_get_object_url(uploader, upload.name),
                                      upload_id, chunks)
    upload.delete()
    upload.commit()

    if save_action and save_action == "go-metadata":
        try:
            res_dict = toolkit.get_action('resource_show')(
                context.copy(), {
                    'id': data_dict.get('id')
                })
            pkg_dict = toolkit.get_action('package_show')(
                context.copy(), {
                    'id': res_dict['package_id']
                })
            if pkg_dict['state'] == 'draft':
                toolkit.get_action('package_patch')(dict(
                    context.copy(), allow_state_change=True),
                                                    dict(id=pkg_dict['id'],
                                                         state='active'))
        except Exception as e:
            log.error(e)
    return {'commited': True}
Пример #7
0
def abort_multipart(context, data_dict):
    toolkit.check_access('cloudstorage_abort_multipart', context, data_dict)
    id = toolkit.get_or_bust(data_dict, ['id'])
    uploader = ResourceCloudStorage({})

    resource_uploads = MultipartUpload.resource_uploads(id)

    aborted = []
    for upload in resource_uploads:
        _delete_multipart(upload, uploader)

        aborted.append(upload.id)

    model.Session.commit()

    return aborted
Пример #8
0
def upload_multipart(context, data_dict):
    h.check_access('cloudstorage_upload_multipart', data_dict)
    upload_id, part_number, part_content = toolkit.get_or_bust(
        data_dict, ['uploadId', 'partNumber', 'upload'])

    uploader = ResourceCloudStorage({})
    upload = model.Session.query(MultipartUpload).get(upload_id)

    resp = uploader.driver.connection.request(
        _get_object_url(uploader, upload.name) +
        '?partNumber={0}&uploadId={1}'.format(part_number, upload_id),
        method='PUT',
        data=bytearray(part_content.file.read()))
    if resp.status != 200:
        raise toolkit.ValidationError('Upload failed: part %s' % part_number)

    _save_part_info(part_number, resp.headers['etag'], upload)
    return {'partNumber': part_number, 'ETag': resp.headers['etag']}
Пример #9
0
def initiate_multipart(context, data_dict):
    """Initiate new Multipart Upload.

    :param context:
    :param data_dict: dict with required keys:
        id: resource's id
        name: filename
        size: filesize

    :returns: MultipartUpload info
    :rtype: dict

    """

    toolkit.check_access('cloudstorage_initiate_multipart', context, data_dict)
    id, name, size = toolkit.get_or_bust(data_dict, ['id', 'name', 'size'])
    user_id = None
    if context['auth_user_obj']:
        user_id = context['auth_user_obj'].id

    uploader = ResourceCloudStorage({'multipart_name': name})
    res_name = uploader.path_from_filename(id, name)

    upload_object = MultipartUpload.by_name(res_name)

    if upload_object is not None:
        _delete_multipart(upload_object, uploader)
        upload_object = None

    if upload_object is None:
        for old_upload in model.Session.query(MultipartUpload).filter_by(
                resource_id=id):
            _delete_multipart(old_upload, uploader)

        _rindex = res_name.rfind('/')
        if ~_rindex:
            try:
                name_prefix = res_name[:_rindex]
                for cloud_object in uploader.container.iterate_objects():
                    if cloud_object.name.startswith(name_prefix):
                        log.info('Removing cloud object: %s' % cloud_object)
                        cloud_object.delete()
            except Exception as e:
                log.exception('[delete from cloud] %s' % e)

        resp = uploader.driver.connection.request(
            _get_object_url(uploader, res_name) + '?uploads', method='POST')
        if not resp.success():
            raise toolkit.ValidationError(resp.error)
        try:
            upload_id = resp.object.find('{%s}UploadId' %
                                         resp.object.nsmap[None]).text
        except AttributeError:
            upload_id_list = filter(lambda e: e.tag.endswith('UploadId'),
                                    resp.object.getchildren())
            upload_id = upload_id_list[0].text
        upload_object = MultipartUpload(upload_id, id, res_name, size, name,
                                        user_id)

        upload_object.save()
    return upload_object.as_dict()
Пример #10
0
def _migrate(args):
    path = args['<path_to_storage>']
    single_id = args['<resource_id>']
    if not os.path.isdir(path):
        print('The storage directory cannot be found.')
        return

    lc = LocalCKAN()
    resources = {}
    failed = []

    # The resource folder is stuctured like so on disk:
    # - storage/
    #   - ...
    # - resources/
    #   - <3 letter prefix>
    #     - <3 letter prefix>
    #       - <remaining resource_id as filename>
    #       ...
    #     ...
    #   ...
    for root, dirs, files in os.walk(path):
        # Only the bottom level of the tree actually contains any files. We
        # don't care at all about the overall structure.
        if not files:
            continue

        split_root = root.split('/')
        resource_id = split_root[-2] + split_root[-1]

        for file_ in files:
            ckan_res_id = resource_id + file_
            if single_id and ckan_res_id != single_id:
                continue

            resources[ckan_res_id] = os.path.join(root, file_)

    for i, resource in enumerate(resources.iteritems(), 1):
        resource_id, file_path = resource
        print('[{i}/{count}] Working on {id}'.format(i=i,
                                                     count=len(resources),
                                                     id=resource_id))

        try:
            resource = lc.action.resource_show(id=resource_id)
        except NotFound:
            print(u'\tResource not found')
            continue

        if resource['url_type'] != 'upload':
            print(u'\t`url_type` is not `upload`. Skip')
            continue

        with open(file_path, 'rb') as fin:
            resource['upload'] = FakeFileStorage(
                fin, resource['url'].split('/')[-1])
            try:
                uploader = ResourceCloudStorage(resource)
                uploader.upload(resource['id'])
            except Exception as e:
                failed.append(resource_id)
                print(u'\tError of type {0} during upload: {1}'.format(
                    type(e), e))

    if failed:
        log_file = tempfile.NamedTemporaryFile(delete=False)
        log_file.file.writelines(failed)
        print(u'ID of all failed uploads are saved to `{0}`'.format(
            log_file.name))
Пример #11
0
def _migrate(args):
    path = args['<path_to_storage>']
    single_id = args['<resource_id>']
    if not os.path.isdir(path):
        print('The storage directory cannot be found.')
        return

    lc = LocalCKAN()
    resources = {}
    failed = []

    # The resource folder is stuctured like so on disk:
    # - storage/
    #   - ...
    # - resources/
    #   - <3 letter prefix>
    #     - <3 letter prefix>
    #       - <remaining resource_id as filename>
    #       ...
    #     ...
    #   ...
    for root, dirs, files in os.walk(path):
        # Only the bottom level of the tree actually contains any files. We
        # don't care at all about the overall structure.
        if not files:
            continue

        split_root = root.split('/')
        resource_id = split_root[-2] + split_root[-1]

        for file_ in files:
            ckan_res_id = resource_id + file_
            if single_id and ckan_res_id != single_id:
                continue

            resources[ckan_res_id] = os.path.join(
                root,
                file_
            )

    for i, resource in enumerate(resources.iteritems(), 1):
        resource_id, file_path = resource
        print('[{i}/{count}] Working on {id}'.format(
            i=i,
            count=len(resources),
            id=resource_id
        ))

        try:
            resource = lc.action.resource_show(id=resource_id)
        except NotFound:
            print(u'\tResource not found')
            continue
        if resource['url_type'] != 'upload':
            print(u'\t`url_type` is not `upload`. Skip')
            continue

        with open(file_path, 'rb') as fin:
            resource['upload'] = FakeFileStorage(
                fin,
                resource['url'].split('/')[-1]
            )
            try:
                uploader = ResourceCloudStorage(resource)
                uploader.upload(resource['id'])
            except Exception as e:
                failed.append(resource_id)
                print(u'\tError of type {0} during upload: {1}'.format(type(e), e))

    if failed:
        log_file = tempfile.NamedTemporaryFile(delete=False)
        log_file.file.writelines(failed)
        print(u'ID of all failed uploads are saved to `{0}`'.format(log_file.name))