def list_dataset_uris(cls, base_uri, config_path): """Return list containing URIs with base URI.""" uri_list = [] parse_result = generous_parse_uri(base_uri) bucket = parse_result.netloc ecs_endpoint = get_config_value("DTOOL_ECS_ENDPOINT_{}".format(bucket)) ecs_access_key_id = get_config_value( "DTOOL_ECS_ACCESS_KEY_ID_{}".format(bucket)) ecs_secret_access_key = get_config_value( "DTOOL_ECS_SECRET_ACCESS_KEY_{}".format(bucket)) session = Session(aws_access_key_id=ecs_access_key_id, aws_secret_access_key=ecs_secret_access_key) resource = session.resource('s3', endpoint_url=ecs_endpoint, config=BOTO3_CONFIG) parse_result = generous_parse_uri(base_uri) bucket_name = parse_result.netloc bucket = resource.Bucket(bucket_name) for obj in bucket.objects.filter(Prefix='dtool').all(): uuid = obj.key.split('-', 1)[1] uri = cls.generate_uri(None, uuid, base_uri) storage_broker = cls(uri, config_path) if storage_broker.has_admin_metadata(): uri_list.append(uri) return uri_list
def __init__(self, uri, config_path=None): parse_result = generous_parse_uri(uri) self.bucket = parse_result.netloc uuid = parse_result.path[1:] self.uuid = uuid ecs_endpoint = get_config_value("DTOOL_ECS_ENDPOINT_{}".format( self.bucket)) ecs_access_key_id = get_config_value( "DTOOL_ECS_ACCESS_KEY_ID_{}".format(self.bucket)) ecs_secret_access_key = get_config_value( "DTOOL_ECS_SECRET_ACCESS_KEY_{}".format(self.bucket)) if not ecs_endpoint: raise RuntimeError( "No ECS endpoint specified for bucket '{bucket}', " "please set DTOOL_ECS_ENDPOINT_{bucket}.".format( bucket=self.bucket)) if not ecs_access_key_id: raise RuntimeError( "No ECS access key id specified for bucket '{bucket}', " "please set DTOOL_ECS_ACCESS_KEY_ID_{bucket}.".format( bucket=self.bucket)) if not ecs_secret_access_key: raise RuntimeError( "No ECS secret access key specified for bucket '{bucket}', " "please set DTOOL_ECS_SECRET_ACCESS_KEY_{bucket}.".format( bucket=self.bucket)) session = Session(aws_access_key_id=ecs_access_key_id, aws_secret_access_key=ecs_secret_access_key) self.s3resource = session.resource('s3', endpoint_url=ecs_endpoint, config=BOTO3_CONFIG) self.s3client = session.client('s3', endpoint_url=ecs_endpoint, config=BOTO3_CONFIG) self._structure_parameters = _ECS_STRUCTURE_PARAMETERS self.dataset_registration_key = 'dtool-{}'.format(self.uuid) self._structure_parameters[ "dataset_registration_key"] = self.dataset_registration_key # NOQA self.data_key_prefix = self._generate_key_prefix("data_key_infix") self.fragments_key_prefix = self._generate_key_prefix( "fragment_key_infix") self.overlays_key_prefix = self._generate_key_prefix( "overlays_key_infix") self.annotations_key_prefix = self._generate_key_prefix( "annotations_key_infix") self.http_manifest_key = self._generate_key("http_manifest_key") self._s3_cache_abspath = get_config_value("DTOOL_CACHE_DIRECTORY", config_path=config_path, default=DEFAULT_CACHE_PATH)
def __init__(self, uri, config_path=None): parse_result = generous_parse_uri(uri) self.bucket = parse_result.netloc uuid = parse_result.path[1:] self.dataset_prefix = get_config_value("DTOOL_S3_DATASET_PREFIX") self.uuid = uuid self.s3resource, self.s3client = \ self._get_resource_and_client(self.bucket) self._structure_parameters = _STRUCTURE_PARAMETERS self.dataset_registration_key = 'dtool-{}'.format(self.uuid) self._structure_parameters[ "dataset_registration_key"] = self.dataset_registration_key # NOQA self.data_key_prefix = self._generate_key_prefix("data_key_infix") self.fragments_key_prefix = self._generate_key_prefix( "fragment_key_infix") self.overlays_key_prefix = self._generate_key_prefix( "overlays_key_infix") self.annotations_key_prefix = self._generate_key_prefix( "annotations_key_infix") self.tags_key_prefix = self._generate_key_prefix("tags_key_infix") self.http_manifest_key = self._generate_key("http_manifest_key") self._s3_cache_abspath = get_config_value("DTOOL_CACHE_DIRECTORY", config_path=config_path, default=DEFAULT_CACHE_PATH)
def list_dataset_uris(cls, base_uri, config_path): """Return list containing URIs in location given by base_uri.""" parsed_uri = generous_parse_uri(base_uri) uri_list = [] path = parsed_uri.path if IS_WINDOWS: path = unix_to_windows_path(parsed_uri.path) for d in os.listdir(path): dir_path = os.path.join(path, d) if not os.path.isdir(dir_path): continue storage_broker = cls(dir_path, config_path) if not storage_broker.has_admin_metadata(): continue uri = storage_broker.generate_uri( name=d, uuid=None, base_uri=base_uri ) uri_list.append(uri) return uri_list
def _sanitise_base_uri(tmp_dir): base_uri = tmp_dir if IS_WINDOWS: parsed_base_uri = generous_parse_uri(tmp_dir) unix_path = windows_to_unix_path(parsed_base_uri.path) base_uri = "file://{}".format(unix_path) return base_uri
def __init__(self, uri, config_path=None): parse_result = generous_parse_uri(uri) self.storage_account_name = parse_result.netloc uuid = parse_result.path[1:] self.uuid = uuid self.fragments_key_prefix = self._generate_key('fragments_key_prefix') self.overlays_key_prefix = self._generate_key('overlays_key_prefix') self.annotations_key_prefix = self._generate_key( 'annotations_key_prefix') self.tags_key_prefix = self._generate_key('tags_key_prefix') self.http_manifest_key = self._generate_key("http_manifest_key") self._azure_cache_abspath = get_config_value( "DTOOL_CACHE_DIRECTORY", config_path=config_path, default=DEFAULT_CACHE_PATH) self._blobservice = get_blob_service(self.storage_account_name, config_path)
def generate_uri(cls, name, uuid, base_uri): scheme, netloc, path, _, _, _ = generous_parse_uri(base_uri) assert scheme == 'smb' # Force path (third component of tuple) to be the dataset UUID uri = urlunparse((scheme, netloc, uuid, _, _, _)) return uri
def _get_abspath_from_uri(uri): """Return abspath. """ logger.debug("In _get_abspath_from_uri") logger.debug("_get_abspath_from_uri.input_uri: {}".format(uri)) parse_result = generous_parse_uri(uri) path = parse_result.path if IS_WINDOWS: path = unix_to_windows_path(path) abspath = os.path.abspath(path) logger.debug("_get_abspath_from_uri.return: {}".format(abspath)) return abspath
def test_generous_parse_uri(): from dtoolcore.utils import generous_parse_uri s3_uri = "s3://my-bucket/path/to/files" parse_result = generous_parse_uri(s3_uri) assert parse_result.scheme == 's3' assert parse_result.netloc == 'my-bucket' assert parse_result.path == '/path/to/files' lazy_file_uri = ".my_dataset" parse_result = generous_parse_uri(lazy_file_uri) assert parse_result.scheme == 'file' full_file_uri = "file://localhost/path/to/files" parse_result = generous_parse_uri(full_file_uri) assert parse_result.scheme == 'file' assert parse_result.netloc == 'localhost' assert parse_result.path == '/path/to/files' irods_uri = "irods:///jic_raw_data/rg-someone/my_dataset" parse_result = generous_parse_uri(irods_uri) assert parse_result.scheme == 'irods' assert parse_result.netloc == '' assert parse_result.path == '/jic_raw_data/rg-someone/my_dataset' irods_uri = "irods:/jic_raw_data/rg-someone/my_dataset" parse_result = generous_parse_uri(irods_uri) assert parse_result.scheme == 'irods' assert parse_result.netloc == '' assert parse_result.path == '/jic_raw_data/rg-someone/my_dataset'
def generate_uri(cls, name, uuid, base_uri): logger.debug("In DiskStorageBroker.generate_uri...") parsed_uri = generous_parse_uri(base_uri) base_dir_path = parsed_uri.path if IS_WINDOWS: base_dir_path = unix_to_windows_path(base_dir_path) dataset_path = os.path.join(base_dir_path, name) dataset_abspath = os.path.abspath(dataset_path) if IS_WINDOWS: dataset_abspath = windows_to_unix_path(dataset_abspath) return "{}:///{}".format(cls.key, dataset_abspath) else: return "{}://{}{}".format( cls.key, socket.gethostname(), dataset_abspath )
def list_dataset_uris(cls, base_uri, config_path): """Return list containing URIs with base URI.""" uri_list = [] parse_result = generous_parse_uri(base_uri) bucket_name = parse_result.netloc resource, _ = cls._get_resource_and_client(bucket_name) bucket = resource.Bucket(bucket_name) for obj in bucket.objects.filter(Prefix='dtool').all(): uuid = obj.key.split('-', 1)[1] uri = cls.generate_uri(None, uuid, base_uri) storage_broker = cls(uri, config_path) if storage_broker.has_admin_metadata(): uri_list.append(uri) return uri_list
def __init__(self, uri, config_path=None): parse_result = generous_parse_uri(uri) self.config_name = parse_result.netloc uuid = parse_result.path[1:] self.uuid = uuid # Connect to SMB server. self.conn, self.service_name, self.path = \ SMBStorageBroker._connect(uri, config_path) # Define some other more abspaths. self._data_path = self._generate_path("data_directory") self._overlays_path = self._generate_path("overlays_directory") self._annotations_path = self._generate_path( "annotations_directory" ) self._tags_path = self._generate_path( "tags_directory" ) self._metadata_fragments_path = self._generate_path( "metadata_fragments_directory" ) # Define some essential directories to be created. self._essential_subdirectories = [ self._generate_path("dtool_directory"), self._data_path, self._overlays_path, self._annotations_path, self._tags_path, ] # Cache for file hashes computed on upload self._hash_cache = {} self._smb_cache_abspath = get_config_value( "DTOOL_CACHE_DIRECTORY", config_path=config_path, default=DEFAULT_CACHE_PATH )
def list_dataset_uris(cls, base_uri, config_path): """Return list containing URIs with base URI.""" storage_account_name = generous_parse_uri(base_uri).netloc blobservice = get_blob_service(storage_account_name, config_path) containers = blobservice.list_containers(include_metadata=True) uri_list = [] for c in containers: admin_metadata = c.metadata # Ignore containers without metadata. if len(admin_metadata) == 0: continue uri = cls.generate_uri(admin_metadata['name'], admin_metadata['uuid'], base_uri) uri_list.append(uri) return uri_list
def _connect(cls, uri, config_path): parse_result = generous_parse_uri(uri) config_name = parse_result.netloc username = get_config_value( "DTOOL_SMB_USERNAME_{}".format(config_name), config_path=config_path ) server_name = get_config_value( "DTOOL_SMB_SERVER_NAME_{}".format(config_name), config_path=config_path ) server_port = get_config_value( "DTOOL_SMB_SERVER_PORT_{}".format(config_name), config_path=config_path ) domain = get_config_value( "DTOOL_SMB_DOMAIN_{}".format(config_name), config_path=config_path ) service_name = get_config_value( "DTOOL_SMB_SERVICE_NAME_{}".format(config_name), config_path=config_path ) path = get_config_value( "DTOOL_SMB_PATH_{}".format(config_name), config_path=config_path ) if not username: raise RuntimeError("No username specified for service '{name}', " "please set DTOOL_SMB_USERNAME_{name}." .format(name=config_name)) if not server_name: raise RuntimeError("No server name specified for service '{name}', " "please set DTOOL_SMB_SERVER_NAME_{name}." .format(name=config_name)) if not server_port: raise RuntimeError("No server port specified for service '{name}', " "please set DTOOL_SMB_SERVER_PORT_{name}." .format(name=config_name)) if not domain: raise RuntimeError("No domain specified for service '{name}', " "please set DTOOL_SMB_DOMAIN_{name}." .format(name=config_name)) if not service_name: raise RuntimeError("No service name specified for service '{name}', " "please set DTOOL_SMB_SERVICE_NAME_{name}. " "(The service name is the name of the 'share'.)" .format(name=config_name)) if not path: raise RuntimeError("No path specified for service '{name}', " "please set DTOOL_SMB_PATH_{name}." .format(name=config_name)) # server_port might be string, i.e. if specified via env vars if not isinstance(server_port, int): server_port = int(server_port) server_ip = socket.gethostbyname(server_name) host_name = socket.gethostname() password = get_config_value( "DTOOL_SMB_PASSWORD_{}".format(config_name), config_path=config_path ) if password is None: if cls._connect.num_calls == 1: password = getpass.getpass() cls.password = password else: password = cls.password conn = SMBConnection(username, password, host_name, server_name, domain=domain, use_ntlm_v2=True, is_direct_tcp=True) logger.info( ( "Connecting from '{host:s}' to " "'smb://{user:s}@{ip:s}({server:s}):{port:d}', " "DOMAIN '{domain:s}'").format(user=username, ip=server_ip, server=server_name, port=server_port, host=host_name, domain=domain) ) # for testing, see types of arguments logger.debug( ( "Types HOST '{host:s}', USER '{user:s}', IP '{ip:s}', " "SERVER '{server:s}', PORT '{port:s}', DOMAIN '{domain:s}'").format( user=type(username).__name__, ip=type(server_ip).__name__, server=type(server_name).__name__, port=type(server_port).__name__, host=type(host_name).__name__, domain=type(domain).__name__)) conn.connect(server_ip, port=server_port) return conn, service_name, path
def uri_to_path(uri): parsed = generous_parse_uri(uri) if IS_WINDOWS: return unix_to_windows_path(parsed.path) return parsed.path