def test_config_set(self): new_workspace = f"{uuid4()}" new_workspace_namespace = f"{uuid4()}" new_copy_progress_indicator_type = "log" with NamedTemporaryFile() as tf: with CLIConfigOverride(None, None, tf.name): CLIConfig.write() args = argparse.Namespace(workspace=new_workspace) terra_notebook_utils.cli.commands.config.set_config_workspace( args) args = argparse.Namespace( workspace_namespace=new_workspace_namespace) terra_notebook_utils.cli.commands.config.set_config_workspace_namespace( args) args = argparse.Namespace(copy_progress_indicator_type= new_copy_progress_indicator_type) terra_notebook_utils.cli.commands.config.set_indicator_type( args) with open(tf.name) as fh: data = json.loads(fh.read()) self.assertEqual( data, dict(workspace=new_workspace, workspace_namespace=new_workspace_namespace, copy_progress_indicator_type="log"))
def _test_cmd(self, cmd: Callable, **kwargs): with NamedTemporaryFile() as tf: with CLIConfigOverride(WORKSPACE_NAME, WORKSPACE_NAMESPACE, tf.name): CLIConfig.write() args = argparse.Namespace(**dict(**self.common_kwargs, **kwargs)) out = io.StringIO() with redirect_stdout(out): cmd(args) return out.getvalue().strip()
def set_indicator_type(args: argparse.Namespace): """ Set the indicator type for DRS copy operations. When 'copy-progress-indicator-type' is set to 'auto', terra-notebook-utils chooses the most appropriate indicator type for copy operations. """ CLIConfig.info[ "copy_progress_indicator_type"] = args.copy_progress_indicator_type CLIConfig.write()
def drs_access(args: argparse.Namespace): """ Get a signed url for a drs:// URI """ args.workspace, args.workspace_namespace = CLIConfig.resolve(args.workspace, args.workspace_namespace) signed_url = drs.access(args.drs_url, args.workspace, args.workspace_namespace) print(signed_url.strip())
def list_rows(args: argparse.Namespace): """ Get all rows """ args.workspace, args.workspace_namespace = CLIConfig.resolve(args.workspace, args.workspace_namespace) for row in tnu_table.list_rows(args.table, args.workspace, args.workspace_namespace): print(json.dumps({f"{args.table}_id": row.name, **row.attributes}))
def list_tables(args: argparse.Namespace): """ List all tables in the workspace """ args.workspace, args.workspace_namespace = CLIConfig.resolve(args.workspace, args.workspace_namespace) for table in tnu_table.list_tables(args.workspace, args.workspace_namespace): print(table)
def estimate_submission_cost(args: argparse.Namespace): """ Estimate costs for all workflows in a submission """ args.workspace, args.workspace_namespace = CLIConfig.resolve( args.workspace, args.workspace_namespace) workflows_metadata = workflows.get_all_workflows(args.submission_id, args.workspace, args.workspace_namespace) reporter = TXTReport([("workflow_id", 37), ("shard", 6), ("cpus", 5), ("memory (GB)", 12), ("duration (h)", 13), ("call cached", 12), ("cost", 5)]) reporter.print_headers() total = 0 for workflow_id, workflow_metadata in workflows_metadata.items(): shard = 1 for item in workflows.estimate_workflow_cost(workflow_id, workflow_metadata): cost, cpus, mem, duration, call_cached = (item[k] for k in ( 'cost', 'number_of_cpus', 'memory', 'duration', 'call_cached')) reporter.print_line(workflow_id, shard, cpus, mem, duration / 3600, call_cached, cost) total += cost shard += 1 reporter.print_divider() reporter.print_line("", "", "", "", "", "", total)
def drs_extract_tar_gz(args: argparse.Namespace): """ Extract a `tar.gz` archive resolved by DRS into a Google Storage bucket. example: tnu drs extract-tar-gz drs://my-tar-gz gs://my-dst-bucket/root """ args.workspace, args.workspace_namespace = CLIConfig.resolve(args.workspace, args.workspace_namespace) drs.extract_tar_gz(args.drs_url, args.dst, args.workspace, args.workspace_namespace)
def get_row(args: argparse.Namespace): """ Get one row """ args.workspace, args.workspace_namespace = CLIConfig.resolve(args.workspace, args.workspace_namespace) row = tnu_table.get_row(args.table, args.row, args.workspace, args.workspace_namespace) if row is not None: print(json.dumps({f"{args.table}_id": row.name, **row.attributes}))
def test_config_print(self): workspace = f"{uuid4()}" workspace_namespace = f"{uuid4()}" copy_progress_indicator_type = "auto" with NamedTemporaryFile() as tf: with CLIConfigOverride(workspace, workspace_namespace, tf.name): CLIConfig.write() args = argparse.Namespace() out = io.StringIO() with redirect_stdout(out): terra_notebook_utils.cli.commands.config.config_print(args) data = json.loads(out.getvalue()) self.assertEqual( data, dict(workspace=workspace, workspace_namespace=workspace_namespace, copy_progress_indicator_type= copy_progress_indicator_type))
def get_submission(args: argparse.Namespace): """ Get information about a submission, including member worklows """ args.workspace, args.workspace_namespace = CLIConfig.resolve( args.workspace, args.workspace_namespace) submission = workflows.get_submission(args.submission_id, args.workspace, args.workspace_namespace) print(json.dumps(submission, indent=2))
def get_workflow(args: argparse.Namespace): """ Get information about a workflow """ args.workspace, args.workspace_namespace = CLIConfig.resolve( args.workspace, args.workspace_namespace) wf = workflows.get_workflow(args.submission_id, args.workflow_id, args.workspace, args.workspace_namespace) print(json.dumps(wf, indent=2))
def drs_copy(args: argparse.Namespace): """ Copy drs:// object to local file or Google Storage bucket If 'dst' is suffixed with "/", the destination is assumed to be a folder and the file name is derived from the drs response and appended to 'dst'. Otherwise the destination is assumed to be absolute. examples: tnu drs copy drs://my-drs-id /tmp/doom # copy to /tmp/doom tnu drs copy drs://my-drs-id /tmp/doom/ # copy to /tmp/doom/{file-name-from-drs-resolution} tnu drs copy drs://my-drs-id gs://my-cool-bucket/my-cool-bucket-key tnu drs copy drs://my-drs-id gs://my-cool-bucket/my-cool-bucket-key/ """ args.workspace, args.workspace_namespace = CLIConfig.resolve(args.workspace, args.workspace_namespace) kwargs: Dict[str, Any] = dict(workspace_name=args.workspace, workspace_namespace=args.workspace_namespace) if CLIConfig.progress_indicator_type() is not None: kwargs['indicator_type'] = CLIConfig.progress_indicator_type() drs.copy(args.drs_url, args.dst, **kwargs)
def list_submissions(args: argparse.Namespace): """ List workflow submissions in chronological order """ args.workspace, args.workspace_namespace = CLIConfig.resolve( args.workspace, args.workspace_namespace) listing = [(s['submissionId'], s['submissionDate'], s['status']) for s in workflows.list_submissions(args.workspace, args.workspace_namespace)] for submission_id, date, status in sorted(listing, key=lambda item: item[1]): print(submission_id, date, status)
def test_resolve(self): with self.subTest( "Should fall back to env vars if arguments are None and config file missing" ): with CLIConfigOverride(None, None): workspace, namespace = CLIConfig.resolve(None, None) self.assertEqual(WORKSPACE_NAME, workspace) self.assertEqual(WORKSPACE_NAMESPACE, namespace) with self.subTest( "Should fall back to config if arguments are None/False"): with CLIConfigOverride(str(uuid4()), str(uuid4())): workspace, namespace = CLIConfig.resolve(None, None) self.assertEqual(CLIConfig.info['workspace'], workspace) self.assertEqual(CLIConfig.info['workspace_namespace'], namespace) with self.subTest( "Should attempt namespace resolve via fiss when workspace present, namespace empty" ): expected_namespace = str(uuid4()) with mock.patch( "terra_notebook_utils.workspace.get_workspace_namespace", return_value=expected_namespace): with CLIConfigOverride(WORKSPACE_NAME, None): terra_notebook_utils.cli.WORKSPACE_NAMESPACE = None workspace, namespace = CLIConfig.resolve(None, None) self.assertEqual(CLIConfig.info['workspace'], workspace) self.assertEqual(expected_namespace, namespace) with self.subTest( "Should use overrides for workspace and workspace_namespace"): expected_workspace = str(uuid4()) expected_namespace = str(uuid4()) with mock.patch( "terra_notebook_utils.workspace.get_workspace_namespace", return_value=expected_namespace): with CLIConfigOverride(str(uuid4()), str(uuid4())): terra_notebook_utils.cli.WORKSPACE_NAMESPACE = None workspace, namespace = CLIConfig.resolve( expected_workspace, expected_namespace) self.assertEqual(expected_workspace, workspace) self.assertEqual(expected_namespace, namespace)
def drs_copy_batch(args: argparse.Namespace): """ Copy several drs:// objects to local directory or Google Storage bucket examples: tnu drs copy-batch drs://my-drs-1 drs://my-drs-2 drs://my-drs-3 --dst /tmp/doom/ tnu drs copy-batch drs://my-drs-1 drs://my-drs-2 drs://my-drs-3 --dst gs://my-cool-bucket/my-cool-folder tnu drs copy-batch --manifest manifest.json When not using a manifest, 'dst' is treated as a folder, and file names are derived from the drs response. Otherwise, in a manifest, 'dst' can either be a folder (if suffixed with "/"), or an absolute path, e.g. '/home/me/my-file-name.vcf.gz' or 'gs://bucket-name/pfx/my-file.vcf.gz'. example manifest.json: [ { "drs_uri": "drs://my/cool/drs/uri", "dst": "/path/to/local/dir/" }, { "drs_uri": "drs://my/cool/drs/uri", "dst": "gs://my-cook-bucket/my-cool-prefix" } ] """ args.workspace, args.workspace_namespace = CLIConfig.resolve(args.workspace, args.workspace_namespace) kwargs: Dict[str, Any] = dict(workspace_name=args.workspace, workspace_namespace=args.workspace_namespace) if CLIConfig.progress_indicator_type() is not None: kwargs['indicator_type'] = CLIConfig.progress_indicator_type() if args.drs_uris: assert args.manifest is None, "Cannot use 'drs_uris' with '--manifest'" assert args.dst is not None, "Must specify a destination with '--dst'" drs.copy_batch_urls(args.drs_uris, args.dst, **kwargs) elif args.manifest: with open(args.manifest) as fh: manifest = json.loads(fh.read()) drs.copy_batch_manifest(manifest, **kwargs) else: raise RuntimeError("Must supply either 'drs_uris' or '--manifest'")
def put_row(args: argparse.Namespace): """ Put a row. Example: tnu table put-row \\ --table abbrv_merge_input \\ bucket=fc-9169fcd1-92ce-4d60-9d2d-d19fd326ff10 \\ input_keys=test_vcfs/a.vcf.gz,test_vcfs/b.vcf.gz \\ output_key=foo.vcf.gz """ args.workspace, args.workspace_namespace = CLIConfig.resolve(args.workspace, args.workspace_namespace) attributes = dict() for pair in args.data: key, val = pair.split("=") attributes[key] = val row = tnu_table.Row(name=args.row or f"{uuid4()}", attributes=attributes) tnu_table.put_row(args.table, row, args.workspace, args.workspace_namespace) print(json.dumps({f"{args.table}_id": row.name, **row.attributes}))
def drs_head(args: argparse.Namespace): """ Print the first bytes of a drs:// object. Example: tnu drs head drs://crouching-drs-hidden-access """ args.workspace, args.workspace_namespace = CLIConfig.resolve(args.workspace, args.workspace_namespace) the_bytes = drs.head(args.drs_url, num_bytes=args.bytes, workspace_name=args.workspace, workspace_namespace=args.workspace_namespace) # sys.stdout.buffer is used outside of a python notebook since that's the standard non-lossy way # to write/display bytes; sys.stdout.buffer is not available inside of a python notebook # though, as sys.stdout is a ipykernel.iostream.OutStream object: # https://github.com/ipython/ipykernel/blob/master/ipykernel/iostream.py#L265 # so we use bare sys.stdout and rely on the ipykernel method's lossy unicode stream stdout_buffer = getattr(sys.stdout, 'buffer', sys.stdout) stdout_buffer.write(the_bytes)
def fetch_drs_url(args: argparse.Namespace): """ Fetch the DRS URL associated with `--file-name` in `--table`. """ args.workspace, args.workspace_namespace = CLIConfig.resolve(args.workspace, args.workspace_namespace) print(tnu_table.fetch_drs_url(args.table, args.file_name, args.workspace, args.workspace_namespace))
def set_config_workspace_namespace(args: argparse.Namespace): """ Set workspace namespace for cli commands """ CLIConfig.info["workspace_namespace"] = args.workspace_namespace CLIConfig.write()
def delete_table(args: argparse.Namespace): """ Get one row """ args.workspace, args.workspace_namespace = CLIConfig.resolve(args.workspace, args.workspace_namespace) tnu_table.delete(args.table, args.workspace, args.workspace_namespace)
def delete_row(args: argparse.Namespace): """ Delete a row """ args.workspace, args.workspace_namespace = CLIConfig.resolve(args.workspace, args.workspace_namespace) tnu_table.del_row(args.table, args.row, args.workspace, args.workspace_namespace)