def prepare(self): # Create the archive root path. try: util.make_path(self._root) except EnvironmentError as _error: raise Error("unable to create archive root path '%s' [%s]" % (self._root, _error))
def move(self, product, archive_path, paths=None): # Ignore if product already there if product.core.archive_path == archive_path: return paths # Make target archive path abs_archive_path = os.path.realpath( os.path.join(self._root, archive_path)) util.make_path(abs_archive_path) # Move files there product_path = self.product_path(product) os.rename(product_path, os.path.join(abs_archive_path, product.core.physical_name)) # Optionally rewrite (local) paths if paths is not None: paths = [ os.path.join( self._root, archive_path, os.path.relpath( path, os.path.join(self._root, product.core.archive_path))) for path in paths ] return paths
def get(self, product, product_path, target_path, use_enclosing_directory, use_symlinks=None): if use_symlinks: raise Error("Swift storage backend does not support symlinks") archive_path = product.core.archive_path keys = self._object_keys(product_path) if not keys: raise Error("no data for product '%s' (%s)" % (product.core.product_name, product.core.uuid)) for key in keys: rel_path = os.path.relpath(key, archive_path) if use_enclosing_directory: rel_path = '/'.join(rel_path.split('/')[1:]) target = os.path.normpath(os.path.join(target_path, rel_path)) if key.endswith('/'): util.make_path(target) else: util.make_path(os.path.dirname(target)) binary = self._conn.get_object(self.container, key)[1] with open(target, 'wb') as f: f.write(binary)
def pull(self, archive, product): if getattr(product.core, "archive_path", None) is None: raise Error("cannot pull files that do not have archive_path set") # Determine the (absolute) path in the archive that will contain the product and create it if required. abs_archive_path = os.path.realpath( os.path.join(archive._root, product.core.archive_path)) abs_product_path = os.path.join(abs_archive_path, product.core.physical_name) # Create destination location for product try: util.make_path(abs_archive_path) except EnvironmentError as _error: raise Error("cannot create parent destination path '%s' [%s]" % (abs_archive_path, _error)) plugin = archive.product_type_plugin(product.core.product_type) # Create a temporary directory and download the product there, then move the product to its # destination within the archive. try: with util.TemporaryDirectory(prefix=".pull-", suffix="-%s" % product.core.uuid.hex, dir=abs_archive_path) as tmp_path: # Create enclosing directory if required. if plugin.use_enclosing_directory: tmp_path = os.path.join(tmp_path, product.core.physical_name) util.make_path(tmp_path) # Define a temp location and download the file tmp_file = os.path.join(tmp_path, product.core.physical_name) downloader = util.Downloader(product.core.remote_url, archive.auth_file()) downloader.save(tmp_file) # TODO: implement extraction of downloaded archives # for ftp and file check if url ends with 'core.physical_name + <archive ext>' # for http/https check the header for the line: # Content-Disposition: attachment; filename="**********" # end then use this ***** filename to match against core.physical_name + <archive ext> # Move the transferred product into its destination within the archive. if plugin.use_enclosing_directory: os.rename(tmp_path, abs_product_path) else: os.rename(tmp_file, abs_product_path) except EnvironmentError as _error: raise Error( "unable to transfer product to destination path '%s' [%s]" % (abs_product_path, _error))
def __init__(self, bucket, host, port, access_key, secret_access_key, prefix='', tmp_root=None, download_args=None, upload_args=None, copy_args=None, transfer_config=None): super(S3StorageBackend, self).__init__() self.bucket = bucket if prefix and not prefix.endswith('/'): prefix += '/' self._prefix = prefix if port == 80: export_port = '' else: export_port = ':%d' % port self.global_prefix = os.path.join( 'http://%s%s/%s' % (host, export_port, bucket), prefix) self._root = bucket if tmp_root: tmp_root = os.path.realpath(tmp_root) util.make_path(tmp_root) self._tmp_root = tmp_root self._resource = boto3.resource( service_name='s3', aws_access_key_id=access_key, aws_secret_access_key=secret_access_key, endpoint_url='http://%s:%s' % (host, port), ) self._download_args = None if download_args: self._download_args = json.loads(download_args) self._upload_args = None if upload_args: self._upload_args = json.loads(upload_args) self._copy_args = None if copy_args: self._copy_args = json.loads(copy_args) if transfer_config: self._transfer_config = boto3.s3.transfer.TransferConfig( **json.loads(transfer_config)) else: self._transfer_config = boto3.s3.transfer.TransferConfig()
def __init__(self, container, user, key, authurl, tmp_root=None): super(SwiftStorageBackend, self).__init__() self.container = container self._root = container if tmp_root: tmp_root = os.path.realpath(tmp_root) util.make_path(tmp_root) self._tmp_root = tmp_root self._conn = swiftclient.Connection(user=user, key=key, authurl=authurl)
def move(self, product, archive_path): # Ignore if product already there if product.core.archive_path == archive_path: return # Make target archive path abs_archive_path = os.path.realpath( os.path.join(self._root, archive_path)) util.make_path(abs_archive_path) # Move files there product_path = self.product_path(product) os.rename(product_path, os.path.join(abs_archive_path, product.core.physical_name))
def __init__(self, bucket, host, access_key, secret_access_key, port=None, region=None, prefix='', tmp_root=None, download_args=None, upload_args=None, copy_args=None, transfer_config=None): super(S3StorageBackend, self).__init__() self.bucket = bucket if prefix and not prefix.endswith('/'): prefix += '/' self._prefix = prefix endpoint_url = host if ':' not in host: if port == 443: endpoint_url = 'https://' + endpoint_url else: endpoint_url = 'http://' + endpoint_url if port is not None and port != 80: endpoint_url += ':%d' % port elif port is not None: endpoint_url += ':%d' % port self.global_prefix = os.path.join(endpoint_url, bucket, prefix) self._root = bucket if tmp_root: tmp_root = os.path.realpath(tmp_root) util.make_path(tmp_root) self._tmp_root = tmp_root self._resource = boto3.resource( service_name='s3', region_name=region, aws_access_key_id=access_key, aws_secret_access_key=secret_access_key, endpoint_url=endpoint_url, ) self._download_args = None if download_args: self._download_args = json.loads(download_args) self._upload_args = None if upload_args: self._upload_args = json.loads(upload_args) self._copy_args = None if copy_args: self._copy_args = json.loads(copy_args) if transfer_config: self._transfer_config = boto3.s3.transfer.TransferConfig(**json.loads(transfer_config)) else: self._transfer_config = boto3.s3.transfer.TransferConfig()
def get(self, product, product_path, target_path, use_enclosing_directory, use_symlinks=None): if use_symlinks: raise Error("S3 storage backend does not support symlinks") archive_path = product.core.archive_path prefix = self._prefix + product_path objs = list(self._resource.Bucket(self.bucket).objects.filter(Prefix=prefix)) if not objs: raise Error("no data for product '%s' (%s)" % (product.core.product_name, product.core.uuid)) for obj in objs: rel_path = os.path.relpath(obj.key, self._prefix + archive_path) if use_enclosing_directory: rel_path = '/'.join(rel_path.split('/')[1:]) target = os.path.normpath(os.path.join(target_path, rel_path)) if obj.key.endswith('/'): util.make_path(target) else: util.make_path(os.path.dirname(target)) self._resource.Object(self.bucket, obj.key).download_file(target, ExtraArgs=self._download_args, Config=self._transfer_config)
def get_tmp_root(self, product): tmp_root = os.path.join(self._root, product.core.archive_path) util.make_path(tmp_root) return tmp_root
def put(self, paths, properties, use_enclosing_directory, use_symlinks=None, retrieve_files=None): if use_symlinks is None: use_symlinks = self._use_symlinks physical_name = properties.core.physical_name archive_path = properties.core.archive_path uuid = properties.core.uuid abs_archive_path = os.path.realpath( os.path.join(self._root, archive_path)) abs_product_path = os.path.join(abs_archive_path, physical_name) # TODO separate this out like 'current_archive_path' if paths is not None and util.is_sub_path(os.path.realpath(paths[0]), abs_product_path, allow_equal=True): # Product should already be in the target location for path in paths: if not os.path.exists(path): raise Error("product source path does not exist '%s'" % (path, )) if not util.is_sub_path(os.path.realpath(path), abs_product_path, allow_equal=True): raise Error( "cannot ingest product where only part of the files are already at the " "destination location") else: # Create destination location for product try: util.make_path(abs_archive_path) except EnvironmentError as _error: raise Error("cannot create parent destination path '%s' [%s]" % (abs_archive_path, _error)) # Create a temporary directory and transfer the product there, then move the product to its # destination within the archive. try: tmp_root = self.get_tmp_root(properties) with util.TemporaryDirectory(prefix=".put-", suffix="-%s" % uuid.hex, dir=tmp_root) as tmp_path: # Create enclosing directory if required. if use_enclosing_directory: tmp_path = os.path.join(tmp_path, physical_name) util.make_path(tmp_path) # Transfer the product (parts). if use_symlinks: if use_enclosing_directory: abs_path = abs_product_path else: abs_path = abs_archive_path # Create symbolic link(s) for the product (parts). for path in paths: if util.is_sub_path(path, self._root): # Create a relative symbolic link when the target is part of the archive # (i.e. when creating an intra-archive symbolic link). This ensures the # archive can be relocated without breaking intra-archive symbolic links. os.symlink( os.path.relpath(path, abs_path), os.path.join(tmp_path, os.path.basename(path))) else: os.symlink( path, os.path.join(tmp_path, os.path.basename(path))) else: # Copy/retrieve product (parts). if retrieve_files: paths = retrieve_files(tmp_path) else: for path in paths: util.copy_path(path, tmp_path, resolve_root=True) # Move the transferred product into its destination within the archive. if use_enclosing_directory: os.rename(tmp_path, abs_product_path) else: assert (len(paths) == 1 and os.path.basename(paths[0]) == physical_name) tmp_product_path = os.path.join( tmp_path, physical_name) os.rename(tmp_product_path, abs_product_path) except EnvironmentError as _error: raise Error( "unable to transfer product to destination path '%s' [%s]" % (abs_product_path, _error))
def pull(self, archive, product): from ecmwfapi import ECMWFDataServer, ECMWFService dataserver = ECMWFDataServer(log=logging.info) marsservice = ECMWFService("mars", log=logging.info) if getattr(product.core, "archive_path", None) is None: raise Error("cannot pull files that do not have archive_path set") # Determine the (absolute) path in the archive that will contain the product and create it if required. abs_archive_path = os.path.realpath( os.path.join(archive._root, product.core.archive_path)) abs_product_path = os.path.join(abs_archive_path, product.core.physical_name) # Create destination location for product try: util.make_path(abs_archive_path) except EnvironmentError as _error: raise Error("cannot create parent destination path '%s' [%s]" % (abs_archive_path, _error)) requests = [] for order in product.core.remote_url.split('?')[1].split( '&concatenate&'): request = {} for param in order.split('&'): key, value = param.split('=') request[key] = value requests.append(request) plugin = archive.product_type_plugin(product.core.product_type) # Create a temporary directory and download the product there, then move the product to its # destination within the archive. try: with util.TemporaryDirectory(prefix=".pull-", suffix="-%s" % product.core.uuid.hex, dir=abs_archive_path) as tmp_path: # Create enclosing directory if required. if plugin.use_enclosing_directory: tmp_path = os.path.join(tmp_path, product.core.physical_name) util.make_path(tmp_path) # Download product. tmp_file_combined = os.path.join(tmp_path, product.core.physical_name) tmp_file_download = os.path.join(tmp_path, "request.grib") combined_file = open(tmp_file_combined, "w") for request in requests: if 'dataset' in request: request['target'] = tmp_file_download dataserver.retrieve(request) else: marsservice.execute(request, tmp_file_download) result_file = open(tmp_file_download, "r") combined_file.write(result_file.read()) result_file.close() os.remove(tmp_file_download) combined_file.close() # Move the retrieved product into its destination within the archive. if plugin.use_enclosing_directory: os.rename(tmp_path, abs_product_path) else: os.rename(tmp_file_combined, abs_product_path) except EnvironmentError as _error: raise Error( "unable to transfer product to destination path '%s' [%s]" % (abs_product_path, _error))