def download_dataset_tgz(dataset_id) -> str: api_client = get_swagger_client() api_instance = swagger_client.DatasetServiceApi(api_client=api_client) try: response: HTTPResponse = \ api_instance.download_dataset_files(dataset_id, include_generated_code=True, _preload_content=False) attachment_header = response.info().get( "Content-Disposition", f"attachment; filename={dataset_id}.tgz") download_filename = re.sub("attachment; filename=", "", attachment_header) download_dir = os.path.join(tempfile.gettempdir(), "download", "datasets") os.makedirs(download_dir, exist_ok=True) tarfile_path = os.path.join(download_dir, download_filename) with open(tarfile_path, 'wb') as f: f.write(response.read()) print(tarfile_path) return tarfile_path except ApiException as e: print( "Exception when calling DatasetServiceApi -> download_dataset_files: %s\n" % e, file=stderr) return "Download failed?"
def get_template(template_id: str) -> str: api_client = get_swagger_client() api_instance = swagger_client.DatasetServiceApi(api_client=api_client) try: template_response: ApiGetTemplateResponse = api_instance.get_dataset_template( template_id) print(template_response.template) # yaml_dict = yaml.load(template_response.template, Loader=yaml.FullLoader) # dataset_name = yaml_dict.get("name") or f"template_{template_id}" # template_file = os.path.join("files", dataset_name) + ".yaml" # with open(template_file, "w") as f: # f.write(template_response.template) return template_response.template except ApiException as e: print( "Exception when calling DatasetServiceApi -> get_dataset_template: %s\n" % e, file=stderr) return None
def delete_assets(upload_assets_response: ApiCatalogUploadResponse = None): api_client = get_swagger_client() delete_methods = { "components": swagger_client.ComponentServiceApi(api_client).delete_component, "datasets": swagger_client.DatasetServiceApi(api_client).delete_dataset, "models": swagger_client.ModelServiceApi(api_client).delete_model, "notebooks": swagger_client.NotebookServiceApi(api_client).delete_notebook, "pipelines": swagger_client.PipelineServiceApi(api_client).delete_pipeline } try: for asset_type, delete_method in delete_methods.items(): if upload_assets_response: asset_list = upload_assets_response.__getattribute__( asset_type) for asset in asset_list: delete_method(asset.id) else: delete_method("*") except ApiException as e: print(f"Exception when calling {delete_method}: {e}\n", file=stderr)
def delete_dataset(dataset_id: str): api_client = get_swagger_client() api_instance = swagger_client.DatasetServiceApi(api_client=api_client) try: api_instance.delete_dataset(dataset_id) except ApiException as e: print( "Exception when calling DatasetServiceApi -> delete_dataset: %s\n" % e, file=stderr)
def set_featured_datasets(dataset_ids: [str]): api_client = get_swagger_client() api_instance = swagger_client.DatasetServiceApi(api_client=api_client) try: api_response = api_instance.set_featured_datasets(dataset_ids) except ApiException as e: print( "Exception when calling DatasetServiceApi -> set_featured_datasets: %s\n" % e, file=stderr) return None
def get_dataset(dataset_id: str) -> ApiDataset: api_client = get_swagger_client() api_instance = swagger_client.DatasetServiceApi(api_client=api_client) try: dataset_meta: ApiDataset = api_instance.get_dataset(dataset_id) pprint(dataset_meta, indent=2) return dataset_meta except ApiException as e: print("Exception when calling DatasetServiceApi -> get_dataset: %s\n" % e, file=stderr) return None
def approve_datasets_for_publishing(dataset_ids: [str]): api_client = get_swagger_client() api_instance = swagger_client.DatasetServiceApi(api_client=api_client) try: api_response = api_instance.approve_datasets_for_publishing( dataset_ids) except ApiException as e: print( "Exception when calling DatasetServiceApi -> approve_datasets_for_publishing: %s\n" % e, file=stderr) return None
def upload_dataset_file(dataset_id, file_path): api_client = get_swagger_client() api_instance = swagger_client.DatasetServiceApi(api_client=api_client) try: response = api_instance.upload_dataset_file(id=dataset_id, uploadfile=file_path) print(f"Upload file '{file_path}' to dataset with ID '{dataset_id}'") except ApiException as e: print( "Exception when calling DatasetServiceApi -> upload_dataset_file: %s\n" % e, file=stderr) raise e
def upload_dataset_template(uploadfile_name, name=None) -> str: api_client = get_swagger_client() api_instance = swagger_client.DatasetServiceApi(api_client=api_client) try: dataset: ApiDataset = api_instance.upload_dataset( uploadfile=uploadfile_name, name=name) print(f"Uploaded '{dataset.name}': {dataset.id}") return dataset.id except ApiException as e: print( "Exception when calling DatasetServiceApi -> upload_dataset: %s\n" % e, file=stderr) # raise e return None
def generate_code(dataset_id: str) -> str: api_client = get_swagger_client() api_instance = swagger_client.DatasetServiceApi(api_client=api_client) try: generate_code_response: ApiGenerateCodeResponse = api_instance.generate_dataset_code( dataset_id) print(generate_code_response.script) return generate_code_response.script except ApiException as e: print( "Exception while calling DatasetServiceApi -> generate_code: %s\n" % e, file=stderr) return None
def verify_dataset_download(dataset_id: str) -> bool: api_client = get_swagger_client() api_instance = swagger_client.DatasetServiceApi(api_client=api_client) try: response: HTTPResponse = \ api_instance.download_dataset_files(dataset_id, include_generated_code=True, _preload_content=False) tgz_file = BytesIO(response.read()) tar = tarfile.open(fileobj=tgz_file) file_contents = { m.name.split(".")[-1]: tar.extractfile(m).read().decode("utf-8") for m in tar.getmembers() } template_response: ApiGetTemplateResponse = api_instance.get_dataset_template( dataset_id) template_text_from_api = template_response.template assert template_text_from_api == file_contents.get( "yaml", file_contents.get("yml")) # TODO: verify generated code # generate_code_response: ApiGenerateCodeResponse = api_instance.generate_dataset_code(dataset_id) # run_script_from_api = generate_code_response.script # # regex = re.compile(r"name='[^']*'") # controller adds random chars to name, replace those # # assert regex.sub("name='...'", run_script_from_api) == \ # regex.sub("name='...'", file_contents.get("py")) print("downloaded files match") return True except ApiException as e: print( "Exception when calling DatasetServiceApi -> download_dataset_files: %s\n" % e, file=stderr) return False
def list_datasets(filter_dict: dict = {}, sort_by: str = None) -> [ApiDataset]: api_client = get_swagger_client() api_instance = swagger_client.DatasetServiceApi(api_client=api_client) try: filter_str = json.dumps(filter_dict) if filter_dict else None api_response: ApiListDatasetsResponse = api_instance.list_datasets( filter=filter_str, sort_by=sort_by) for c in api_response.datasets: print("%s %s %s" % (c.id, c.created_at.strftime("%Y-%m-%d %H:%M:%S"), c.name)) return api_response.datasets except ApiException as e: print( "Exception when calling DatasetServiceApi -> list_datasets: %s\n" % e, file=stderr) return []
def run_code(dataset_id: str, parameters: dict = {}, run_name: str = None) -> str: api_client = get_swagger_client() api_instance = swagger_client.DatasetServiceApi(api_client=api_client) try: param_array = [{ "name": key, "value": value } for key, value in parameters.items()] run_code_response: ApiRunCodeResponse = api_instance.run_dataset( dataset_id, run_name=run_name, parameters=param_array) print(run_code_response.run_url) return run_code_response.run_url except ApiException as e: print("Exception while calling DatasetServiceApi -> run_code: %s\n" % e, file=stderr) return None