def initiate_multipart(context, data_dict): """Initiate new Multipart Upload. :param context: :param data_dict: dict with required keys: id: resource's id name: filename size: filesize :returns: MultipartUpload info :rtype: dict """ h.check_access('cloudstorage_initiate_multipart', data_dict) id, name, size = toolkit.get_or_bust(data_dict, ['id', 'name', 'size']) user_id = None if context['auth_user_obj']: user_id = context['auth_user_obj'].id res_dict = toolkit.get_action('resource_show')(context.copy(), { 'id': data_dict.get('id') }) res_dict['upload_in_progress'] = True toolkit.get_action('resource_patch')(context.copy(), res_dict) uploader = ResourceCloudStorage({'multipart_name': name}) res_name = uploader.path_from_filename(id, name) upload_object = MultipartUpload.by_name(res_name) if upload_object is not None: _delete_multipart(upload_object, uploader) upload_object = None if upload_object is None: for old_upload in model.Session.query(MultipartUpload).filter_by( resource_id=id): _delete_multipart(old_upload, uploader) _rindex = res_name.rfind('/') if ~_rindex: try: name_prefix = res_name[:_rindex] for cloud_object in uploader.container.iterate_objects(): if cloud_object.name.startswith(name_prefix): log.info('Removing cloud object: %s' % cloud_object) cloud_object.delete() except Exception as e: log.exception('[delete from cloud] %s' % e) upload_object = MultipartUpload( uploader.driver._initiate_multipart(container=uploader.container, object_name=res_name), id, res_name, size, name, user_id) upload_object.save() return upload_object.as_dict()
def setup_template_variables(self, context, data_dict): #print data_dict["resource"]['url'] from ckanext.cloudstorage.storage import ResourceCloudStorage rcs = ResourceCloudStorage(data_dict["resource"]) import urllib import urlparse resource_url = rcs.get_url_from_filename( data_dict["resource"]["id"], urlparse.urlsplit( data_dict["resource"]["url"]).path.split('/')[-1]) encoded_url = urllib.quote(resource_url) return {"resource_url": encoded_url}
def clean_multipart(context, data_dict): """Clean old multipart uploads. :param context: :param data_dict: :returns: dict with: removed - amount of removed uploads. total - total amount of expired uploads. errors - list of errors raised during deletion. Appears when `total` and `removed` are different. :rtype: dict """ toolkit.check_access('cloudstorage_clean_multipart', context, data_dict) uploader = ResourceCloudStorage({}) delta = _get_max_multipart_lifetime() oldest_allowed = datetime.datetime.utcnow() - delta uploads_to_remove = model.Session.query(MultipartUpload).filter( MultipartUpload.initiated < oldest_allowed) result = {'removed': 0, 'total': uploads_to_remove.count(), 'errors': []} for upload in uploads_to_remove: try: _delete_multipart(upload, uploader) except toolkit.ValidationError as e: result['errors'].append(e.error_summary) else: result['removed'] += 1 return result
def upload_multipart(context, data_dict): h.check_access('cloudstorage_upload_multipart', data_dict) upload_id, part_number, part_content = toolkit.get_or_bust( data_dict, ['uploadId', 'partNumber', 'upload']) uploader = ResourceCloudStorage({}) upload = model.Session.query(MultipartUpload).get(upload_id) data = bytearray(_get_underlying_file(part_content).read()) resp = uploader.driver.connection.request( _get_object_url(uploader, upload.name), params={ 'uploadId': upload_id, 'partNumber': part_number }, method='PUT', data=data, headers={'Content-Length': len(data)}) if resp.status != 200: raise toolkit.ValidationError('Upload failed: part %s' % part_number) _save_part_info(part_number, resp.headers['etag'], upload) return {'partNumber': part_number, 'ETag': resp.headers['etag']}
def _migrate(args): path = args['<path_to_storage>'] if not os.path.isdir(path): print('The storage directory cannot be found.') return lc = LocalCKAN() resources = {} # The resource folder is stuctured like so on disk: # - storage/ # - ... # - resources/ # - <3 letter prefix> # - <3 letter prefix> # - <remaining resource_id as filename> # ... # ... # ... for root, dirs, files in os.walk(path): # Only the bottom level of the tree actually contains any files. We # don't care at all about the overall structure. if not files: continue split_root = root.split('/') resource_id = split_root[-2] + split_root[-1] for file_ in files: resources[resource_id + file_] = os.path.join(root, file_) for i, resource in enumerate(resources.iteritems(), 1): resource_id, file_path = resource print('[{i}/{count}] Working on {id}'.format(i=i, count=len(resources), id=resource_id)) resource = lc.action.resource_show(id=resource_id) if resource['url_type'] != 'upload': continue with open(os.path.join(root, file_path), 'rb') as fin: resource['upload'] = FakeFileStorage( fin, resource['url'].split('/')[-1]) uploader = ResourceCloudStorage(resource) uploader.upload(resource['id'])
def finish_multipart(context, data_dict): """Called after all parts had been uploaded. Triggers call to `_commit_multipart` which will convert separate uploaded parts into single file :param context: :param data_dict: dict with required key `uploadId` - id of Multipart Upload that should be finished :returns: None :rtype: NoneType """ toolkit.check_access('cloudstorage_finish_multipart', context, data_dict) upload_id = toolkit.get_or_bust(data_dict, 'uploadId') save_action = data_dict.get('save_action', False) upload = model.Session.query(MultipartUpload).get(upload_id) chunks = [(part.n, part.etag) for part in model.Session.query(MultipartPart).filter_by( upload_id=upload_id).order_by(MultipartPart.n)] uploader = ResourceCloudStorage({}) try: obj = uploader.container.get_object(upload.name) obj.delete() except Exception: pass uploader.driver._commit_multipart(_get_object_url(uploader, upload.name), upload_id, chunks) upload.delete() upload.commit() if save_action and save_action == "go-metadata": try: res_dict = toolkit.get_action('resource_show')( context.copy(), { 'id': data_dict.get('id') }) pkg_dict = toolkit.get_action('package_show')( context.copy(), { 'id': res_dict['package_id'] }) if pkg_dict['state'] == 'draft': toolkit.get_action('package_patch')(dict( context.copy(), allow_state_change=True), dict(id=pkg_dict['id'], state='active')) except Exception as e: log.error(e) return {'commited': True}
def abort_multipart(context, data_dict): toolkit.check_access('cloudstorage_abort_multipart', context, data_dict) id = toolkit.get_or_bust(data_dict, ['id']) uploader = ResourceCloudStorage({}) resource_uploads = MultipartUpload.resource_uploads(id) aborted = [] for upload in resource_uploads: _delete_multipart(upload, uploader) aborted.append(upload.id) model.Session.commit() return aborted
def upload_multipart(context, data_dict): h.check_access('cloudstorage_upload_multipart', data_dict) upload_id, part_number, part_content = toolkit.get_or_bust( data_dict, ['uploadId', 'partNumber', 'upload']) uploader = ResourceCloudStorage({}) upload = model.Session.query(MultipartUpload).get(upload_id) resp = uploader.driver.connection.request( _get_object_url(uploader, upload.name) + '?partNumber={0}&uploadId={1}'.format(part_number, upload_id), method='PUT', data=bytearray(part_content.file.read())) if resp.status != 200: raise toolkit.ValidationError('Upload failed: part %s' % part_number) _save_part_info(part_number, resp.headers['etag'], upload) return {'partNumber': part_number, 'ETag': resp.headers['etag']}
def initiate_multipart(context, data_dict): """Initiate new Multipart Upload. :param context: :param data_dict: dict with required keys: id: resource's id name: filename size: filesize :returns: MultipartUpload info :rtype: dict """ toolkit.check_access('cloudstorage_initiate_multipart', context, data_dict) id, name, size = toolkit.get_or_bust(data_dict, ['id', 'name', 'size']) user_id = None if context['auth_user_obj']: user_id = context['auth_user_obj'].id uploader = ResourceCloudStorage({'multipart_name': name}) res_name = uploader.path_from_filename(id, name) upload_object = MultipartUpload.by_name(res_name) if upload_object is not None: _delete_multipart(upload_object, uploader) upload_object = None if upload_object is None: for old_upload in model.Session.query(MultipartUpload).filter_by( resource_id=id): _delete_multipart(old_upload, uploader) _rindex = res_name.rfind('/') if ~_rindex: try: name_prefix = res_name[:_rindex] for cloud_object in uploader.container.iterate_objects(): if cloud_object.name.startswith(name_prefix): log.info('Removing cloud object: %s' % cloud_object) cloud_object.delete() except Exception as e: log.exception('[delete from cloud] %s' % e) resp = uploader.driver.connection.request( _get_object_url(uploader, res_name) + '?uploads', method='POST') if not resp.success(): raise toolkit.ValidationError(resp.error) try: upload_id = resp.object.find('{%s}UploadId' % resp.object.nsmap[None]).text except AttributeError: upload_id_list = filter(lambda e: e.tag.endswith('UploadId'), resp.object.getchildren()) upload_id = upload_id_list[0].text upload_object = MultipartUpload(upload_id, id, res_name, size, name, user_id) upload_object.save() return upload_object.as_dict()
def _migrate(args): path = args['<path_to_storage>'] single_id = args['<resource_id>'] if not os.path.isdir(path): print('The storage directory cannot be found.') return lc = LocalCKAN() resources = {} failed = [] # The resource folder is stuctured like so on disk: # - storage/ # - ... # - resources/ # - <3 letter prefix> # - <3 letter prefix> # - <remaining resource_id as filename> # ... # ... # ... for root, dirs, files in os.walk(path): # Only the bottom level of the tree actually contains any files. We # don't care at all about the overall structure. if not files: continue split_root = root.split('/') resource_id = split_root[-2] + split_root[-1] for file_ in files: ckan_res_id = resource_id + file_ if single_id and ckan_res_id != single_id: continue resources[ckan_res_id] = os.path.join(root, file_) for i, resource in enumerate(resources.iteritems(), 1): resource_id, file_path = resource print('[{i}/{count}] Working on {id}'.format(i=i, count=len(resources), id=resource_id)) try: resource = lc.action.resource_show(id=resource_id) except NotFound: print(u'\tResource not found') continue if resource['url_type'] != 'upload': print(u'\t`url_type` is not `upload`. Skip') continue with open(file_path, 'rb') as fin: resource['upload'] = FakeFileStorage( fin, resource['url'].split('/')[-1]) try: uploader = ResourceCloudStorage(resource) uploader.upload(resource['id']) except Exception as e: failed.append(resource_id) print(u'\tError of type {0} during upload: {1}'.format( type(e), e)) if failed: log_file = tempfile.NamedTemporaryFile(delete=False) log_file.file.writelines(failed) print(u'ID of all failed uploads are saved to `{0}`'.format( log_file.name))
def _migrate(args): path = args['<path_to_storage>'] single_id = args['<resource_id>'] if not os.path.isdir(path): print('The storage directory cannot be found.') return lc = LocalCKAN() resources = {} failed = [] # The resource folder is stuctured like so on disk: # - storage/ # - ... # - resources/ # - <3 letter prefix> # - <3 letter prefix> # - <remaining resource_id as filename> # ... # ... # ... for root, dirs, files in os.walk(path): # Only the bottom level of the tree actually contains any files. We # don't care at all about the overall structure. if not files: continue split_root = root.split('/') resource_id = split_root[-2] + split_root[-1] for file_ in files: ckan_res_id = resource_id + file_ if single_id and ckan_res_id != single_id: continue resources[ckan_res_id] = os.path.join( root, file_ ) for i, resource in enumerate(resources.iteritems(), 1): resource_id, file_path = resource print('[{i}/{count}] Working on {id}'.format( i=i, count=len(resources), id=resource_id )) try: resource = lc.action.resource_show(id=resource_id) except NotFound: print(u'\tResource not found') continue if resource['url_type'] != 'upload': print(u'\t`url_type` is not `upload`. Skip') continue with open(file_path, 'rb') as fin: resource['upload'] = FakeFileStorage( fin, resource['url'].split('/')[-1] ) try: uploader = ResourceCloudStorage(resource) uploader.upload(resource['id']) except Exception as e: failed.append(resource_id) print(u'\tError of type {0} during upload: {1}'.format(type(e), e)) if failed: log_file = tempfile.NamedTemporaryFile(delete=False) log_file.file.writelines(failed) print(u'ID of all failed uploads are saved to `{0}`'.format(log_file.name))