def _migrate(args): path = args['<path_to_storage>'] if not os.path.isdir(path): print('The storage directory cannot be found.') return lc = LocalCKAN() resources = {} # The resource folder is stuctured like so on disk: # - storage/ # - ... # - resources/ # - <3 letter prefix> # - <3 letter prefix> # - <remaining resource_id as filename> # ... # ... # ... for root, dirs, files in os.walk(path): # Only the bottom level of the tree actually contains any files. We # don't care at all about the overall structure. if not files: continue split_root = root.split('/') resource_id = split_root[-2] + split_root[-1] for file_ in files: resources[resource_id + file_] = os.path.join(root, file_) for i, resource in enumerate(resources.iteritems(), 1): resource_id, file_path = resource print('[{i}/{count}] Working on {id}'.format(i=i, count=len(resources), id=resource_id)) resource = lc.action.resource_show(id=resource_id) if resource['url_type'] != 'upload': continue with open(os.path.join(root, file_path), 'rb') as fin: resource['upload'] = FakeFileStorage( fin, resource['url'].split('/')[-1]) uploader = ResourceCloudStorage(resource) uploader.upload(resource['id'])
def _migrate(args): path = args['<path_to_storage>'] single_id = args['<resource_id>'] if not os.path.isdir(path): print('The storage directory cannot be found.') return lc = LocalCKAN() resources = {} failed = [] # The resource folder is stuctured like so on disk: # - storage/ # - ... # - resources/ # - <3 letter prefix> # - <3 letter prefix> # - <remaining resource_id as filename> # ... # ... # ... for root, dirs, files in os.walk(path): # Only the bottom level of the tree actually contains any files. We # don't care at all about the overall structure. if not files: continue split_root = root.split('/') resource_id = split_root[-2] + split_root[-1] for file_ in files: ckan_res_id = resource_id + file_ if single_id and ckan_res_id != single_id: continue resources[ckan_res_id] = os.path.join(root, file_) for i, resource in enumerate(resources.iteritems(), 1): resource_id, file_path = resource print('[{i}/{count}] Working on {id}'.format(i=i, count=len(resources), id=resource_id)) try: resource = lc.action.resource_show(id=resource_id) except NotFound: print(u'\tResource not found') continue if resource['url_type'] != 'upload': print(u'\t`url_type` is not `upload`. Skip') continue with open(file_path, 'rb') as fin: resource['upload'] = FakeFileStorage( fin, resource['url'].split('/')[-1]) try: uploader = ResourceCloudStorage(resource) uploader.upload(resource['id']) except Exception as e: failed.append(resource_id) print(u'\tError of type {0} during upload: {1}'.format( type(e), e)) if failed: log_file = tempfile.NamedTemporaryFile(delete=False) log_file.file.writelines(failed) print(u'ID of all failed uploads are saved to `{0}`'.format( log_file.name))
def _migrate(args): path = args['<path_to_storage>'] single_id = args['<resource_id>'] if not os.path.isdir(path): print('The storage directory cannot be found.') return lc = LocalCKAN() resources = {} failed = [] # The resource folder is stuctured like so on disk: # - storage/ # - ... # - resources/ # - <3 letter prefix> # - <3 letter prefix> # - <remaining resource_id as filename> # ... # ... # ... for root, dirs, files in os.walk(path): # Only the bottom level of the tree actually contains any files. We # don't care at all about the overall structure. if not files: continue split_root = root.split('/') resource_id = split_root[-2] + split_root[-1] for file_ in files: ckan_res_id = resource_id + file_ if single_id and ckan_res_id != single_id: continue resources[ckan_res_id] = os.path.join( root, file_ ) for i, resource in enumerate(resources.iteritems(), 1): resource_id, file_path = resource print('[{i}/{count}] Working on {id}'.format( i=i, count=len(resources), id=resource_id )) try: resource = lc.action.resource_show(id=resource_id) except NotFound: print(u'\tResource not found') continue if resource['url_type'] != 'upload': print(u'\t`url_type` is not `upload`. Skip') continue with open(file_path, 'rb') as fin: resource['upload'] = FakeFileStorage( fin, resource['url'].split('/')[-1] ) try: uploader = ResourceCloudStorage(resource) uploader.upload(resource['id']) except Exception as e: failed.append(resource_id) print(u'\tError of type {0} during upload: {1}'.format(type(e), e)) if failed: log_file = tempfile.NamedTemporaryFile(delete=False) log_file.file.writelines(failed) print(u'ID of all failed uploads are saved to `{0}`'.format(log_file.name))