async def output_file(user_id: int, project_id: str, postgres_engine: Engine) -> Iterable[FileMetaData]: node_id = "fd6f9737-1988-341b-b4ac-0614b646fa82" # pylint: disable=no-value-for-parameter file = FileMetaData() file.simcore_from_uuid(f"{project_id}/{node_id}/filename.txt", bucket_name="master-simcore") file.entity_tag = "df9d868b94e53d18009066ca5cd90e9f" file.user_name = "test" file.user_id = str(user_id) async with postgres_engine.acquire() as conn: stmt = ( file_meta_data.insert().values(**attr.asdict(file), ).returning( literal_column("*"))) result = await conn.execute(stmt) row = await result.fetchone() # hacks defect file.user_id = str(user_id) file.location_id = str(file.location_id) # -- assert file == FileMetaData(**dict(row)) # type: ignore yield file result = await conn.execute(file_meta_data.delete().where( file_meta_data.c.file_uuid == row.file_uuid))
def test_fmd_build(): file_uuid = str(Path("1234") / Path("abcd") / Path("xx.dat")) fmd = FileMetaData() fmd.simcore_from_uuid(file_uuid, "test-bucket") assert fmd.node_id == "abcd" assert fmd.project_id == "1234" assert fmd.file_name == "xx.dat" assert fmd.object_name == "1234/abcd/xx.dat" assert fmd.file_uuid == file_uuid assert fmd.location == SIMCORE_S3_STR assert fmd.location_id == SIMCORE_S3_ID assert fmd.bucket_name == "test-bucket"
def _create_file_meta_for_s3(postgres_url, s3_client, tmp_file): utils.create_tables(url=postgres_url) bucket_name = BUCKET_NAME s3_client.create_bucket(bucket_name, delete_contents_if_exists=True) # create file and upload filename = os.path.basename(tmp_file) project_id = "22" node_id = "1006" file_name = filename file_uuid = os.path.join(str(project_id), str(node_id), str(file_name)) d = { 'object_name' : os.path.join(str(project_id), str(node_id), str(file_name)), 'bucket_name' : bucket_name, 'file_name' : filename, 'user_id' : "42", 'user_name' : "starbucks", 'location' : SIMCORE_S3_STR, 'project_id' : project_id, 'project_name' : "battlestar", 'node_id' : node_id, 'node_name' : "this is the name of the node", 'file_uuid' : file_uuid } fmd = FileMetaData(**d) return fmd
def test_fmd_build(): file_uuid = str(Path("api") / Path("abcd") / Path("xx.dat")) fmd = FileMetaData() fmd.simcore_from_uuid(file_uuid, "test-bucket") assert not fmd.node_id assert not fmd.project_id assert fmd.file_name == "xx.dat" assert fmd.object_name == "api/abcd/xx.dat" assert fmd.file_uuid == file_uuid assert fmd.location == SIMCORE_S3_STR assert fmd.location_id == SIMCORE_S3_ID assert fmd.bucket_name == "test-bucket" file_uuid = f"{uuid.uuid4()}/{uuid.uuid4()}/xx.dat" fmd.simcore_from_uuid(file_uuid, "test-bucket") assert fmd.node_id == file_uuid.split("/")[1] assert fmd.project_id == file_uuid.split("/")[0] assert fmd.file_name == "xx.dat" assert fmd.object_name == file_uuid assert fmd.file_uuid == file_uuid assert fmd.location == SIMCORE_S3_STR assert fmd.location_id == SIMCORE_S3_ID assert fmd.bucket_name == "test-bucket"
def _create_file_meta_for_s3(postgres_url, s3_client, tmp_file): bucket_name = BUCKET_NAME s3_client.create_bucket(bucket_name, delete_contents_if_exists=True) # create file and upload filename = os.path.basename(tmp_file) project_id = "api" # "357879cc-f65d-48b2-ad6c-074e2b9aa1c7" project_name = "battlestar" node_name = "galactica" node_id = "b423b654-686d-4157-b74b-08fa9d90b36e" file_name = filename file_uuid = os.path.join(str(project_id), str(node_id), str(file_name)) display_name = os.path.join(str(project_name), str(node_name), str(file_name)) created_at = str(datetime.datetime.now()) file_size = 1234 d = { "object_name": os.path.join(str(project_id), str(node_id), str(file_name)), "bucket_name": bucket_name, "file_name": filename, "user_id": USER_ID, "user_name": "starbucks", "location": SIMCORE_S3_STR, "location_id": SIMCORE_S3_ID, "project_id": project_id, "project_name": project_name, "node_id": node_id, "node_name": node_name, "file_uuid": file_uuid, "file_id": file_uuid, "raw_file_path": file_uuid, "display_file_path": display_name, "created_at": created_at, "last_modified": created_at, "file_size": file_size, } fmd = FileMetaData(**d) return fmd
def _create_file_meta_for_s3(postgres_url, s3_client, tmp_file): utils.create_tables(url=postgres_url) bucket_name = BUCKET_NAME s3_client.create_bucket(bucket_name, delete_contents_if_exists=True) # create file and upload filename = os.path.basename(tmp_file) project_id = "22" project_name = "battlestar" node_name = "galactica" node_id = "1006" file_name = filename file_uuid = os.path.join(str(project_id), str(node_id), str(file_name)) display_name = os.path.join(str(project_name), str(node_name), str(file_name)) created_at = str(datetime.datetime.now()) file_size = 1234 d = { "object_name": os.path.join(str(project_id), str(node_id), str(file_name)), "bucket_name": bucket_name, "file_name": filename, "user_id": USER_ID, "user_name": "starbucks", "location": SIMCORE_S3_STR, "location_id": SIMCORE_S3_ID, "project_id": project_id, "project_name": project_name, "node_id": node_id, "node_name": node_name, "file_uuid": file_uuid, "file_id": file_uuid, "raw_file_path": file_uuid, "display_file_path": display_name, "created_at": created_at, "last_modified": created_at, "file_size": file_size, } fmd = FileMetaData(**d) return fmd
def list_dataset_files_recursively(self, files: List[FileMetaData], base: BaseCollection, current_root: Path): for item in base: if isinstance(item, Collection): _current_root = current_root / Path(item.name) self.list_dataset_files_recursively(files, item, _current_root) else: parts = current_root.parts bucket_name = parts[0] file_name = item.name file_size = 0 # lets assume we have only one file if item.files: file_name = Path( item.files[0].as_dict()["content"]["s3key"]).name file_size = item.files[0].as_dict()["content"]["size"] # if this is in the root directory, the object_name is the filename only if len(parts) > 1: object_name = str(Path(*list(parts)[1:]) / Path(file_name)) else: object_name = str(Path(file_name)) file_uuid = str(Path(bucket_name) / Path(object_name)) file_id = item.id created_at = item.created_at last_modified = item.updated_at fmd = FileMetaData( bucket_name=bucket_name, file_name=file_name, object_name=object_name, location=DATCORE_STR, location_id=DATCORE_ID, file_uuid=file_uuid, file_id=file_id, raw_file_path=file_uuid, display_file_path=file_uuid, created_at=created_at, last_modified=last_modified, file_size=file_size, ) files.append(fmd)
def dsm_mockup_db( postgres_service_url, s3_client, mock_files_factory ) -> Dict[str, FileMetaData]: # s3 client bucket_name = BUCKET_NAME s3_client.create_bucket(bucket_name, delete_contents_if_exists=True) # TODO: use pip install Faker users = ["alice", "bob", "chuck", "dennis"] projects = [ "astronomy", "biology", "chemistry", "dermatology", "economics", "futurology", "geology", ] location = SIMCORE_S3_STR nodes = ["alpha", "beta", "gamma", "delta"] N = 100 files = mock_files_factory(count=N) counter = 0 data = {} for _file in files: idx = randrange(len(users)) user_name = users[idx] user_id = idx + 10 idx = randrange(len(projects)) project_name = projects[idx] project_id = idx + 100 idx = randrange(len(nodes)) node = nodes[idx] node_id = idx + 10000 file_name = str(counter) object_name = Path(str(project_id), str(node_id), str(counter)).as_posix() file_uuid = Path(object_name).as_posix() raw_file_path = file_uuid display_file_path = str(Path(project_name) / Path(node) / Path(file_name)) created_at = str(datetime.datetime.now()) file_size = 1234 assert s3_client.upload_file(bucket_name, object_name, _file) d = { "file_uuid": file_uuid, "location_id": "0", "location": location, "bucket_name": bucket_name, "object_name": object_name, "project_id": str(project_id), "project_name": project_name, "node_id": str(node_id), "node_name": node, "file_name": file_name, "user_id": str(user_id), "user_name": user_name, "file_id": str(uuid.uuid4()), "raw_file_path": file_uuid, "display_file_path": display_file_path, "created_at": created_at, "last_modified": created_at, "file_size": file_size, } counter = counter + 1 data[object_name] = FileMetaData(**d) # pylint: disable=no-member tests.utils.insert_metadata(postgres_service_url, data[object_name]) total_count = 0 for _obj in s3_client.list_objects(bucket_name, recursive=True): total_count = total_count + 1 assert total_count == N yield data # s3 client s3_client.remove_bucket(bucket_name, delete_contents=True)
def list_files_raw_dataset(self, dataset_id: str) -> List[FileMetaDataEx]: files = [] # raw packages _files = [] # fmds data = {} # map to keep track of parents-child cursor = "" page_size = 1000 api = self._bf._api.datasets dataset = self._bf.get_dataset(dataset_id) if dataset is not None: while True: resp = api._get( api._uri( "/{id}/packages?cursor={cursor}&pageSize={pageSize}&includeSourceFiles={includeSourceFiles}", id=dataset_id, cursor=cursor, pageSize=page_size, includeSourceFiles=False, )) for package in resp.get("packages", list()): id = package["content"]["id"] data[id] = package files.append(package) cursor = resp.get("cursor") if cursor is None: break for f in files: if f["content"]["packageType"] != "Collection": filename = f["content"]["name"] file_path = "" file_id = f["content"]["nodeId"] _f = f while "parentId" in _f["content"].keys(): parentid = _f["content"]["parentId"] _f = data[parentid] file_path = _f["content"]["name"] + "/" + file_path bucket_name = dataset.name file_name = filename file_size = 0 object_name = str(Path(file_path) / file_name) file_uuid = str(Path(bucket_name) / object_name) created_at = f["content"]["createdAt"] last_modified = f["content"]["updatedAt"] parent_id = dataset_id if "parentId" in f["content"]: parentId = f["content"]["parentId"] parent_id = data[parentId]["content"]["nodeId"] fmd = FileMetaData( bucket_name=bucket_name, file_name=file_name, object_name=object_name, location=DATCORE_STR, location_id=DATCORE_ID, file_uuid=file_uuid, file_id=file_id, raw_file_path=file_uuid, display_file_path=file_uuid, created_at=created_at, last_modified=last_modified, file_size=file_size, ) fmdx = FileMetaDataEx(fmd=fmd, parent_id=parent_id) _files.append(fmdx) return _files
def test_file_entry_valid(file_size: Optional[int], entity_tag: Optional[str], expected_validity: bool): file_meta_data = FileMetaData(file_size=file_size, entity_tag=entity_tag) assert is_file_entry_valid(file_meta_data) == expected_validity
def dsm_mockup_db(postgres_service_url, s3_client, mock_files_factory): # db utils.create_tables(url=postgres_service_url) # s3 client bucket_name = BUCKET_NAME s3_client.create_bucket(bucket_name, delete_contents_if_exists=True) # TODO: use pip install Faker users = ['alice', 'bob', 'chuck', 'dennis'] projects = [ 'astronomy', 'biology', 'chemistry', 'dermatology', 'economics', 'futurology', 'geology' ] location = SIMCORE_S3_STR nodes = ['alpha', 'beta', 'gamma', 'delta'] N = 100 files = mock_files_factory(count=N) counter = 0 data = {} for _file in files: idx = randrange(len(users)) user_name = users[idx] user_id = idx + 10 idx = randrange(len(projects)) project_name = projects[idx] project_id = idx + 100 idx = randrange(len(nodes)) node = nodes[idx] node_id = idx + 10000 file_name = str(counter) object_name = Path(str(project_id), str(node_id), str(counter)).as_posix() file_uuid = Path(object_name).as_posix() assert s3_client.upload_file(bucket_name, object_name, _file) d = { 'file_uuid': file_uuid, 'location_id': "0", 'location': location, 'bucket_name': bucket_name, 'object_name': object_name, 'project_id': str(project_id), 'project_name': project_name, 'node_id': str(node_id), 'node_name': node, 'file_name': file_name, 'user_id': str(user_id), 'user_name': user_name } counter = counter + 1 data[object_name] = FileMetaData(**d) # pylint: disable=no-member utils.insert_metadata(postgres_service_url, data[object_name]) total_count = 0 for _obj in s3_client.list_objects_v2(bucket_name, recursive=True): total_count = total_count + 1 assert total_count == N yield data # s3 client s3_client.remove_bucket(bucket_name, delete_contents=True) # db utils.drop_tables(url=postgres_service_url)