def add_composite_algo(ctx, data): """Add composite algo. The path must point to a valid JSON file with the following schema: \b { "name": str, "description": path, "file": path, "permissions": { "public": bool, "authorized_ids": list[str], }, } \b Where: - name: name of the algorithm - description: path to a markdown file describing the algo - file: path to tar.gz or zip archive containing the algorithm python script and its Dockerfile - permissions: define asset access permissions """ client = get_client(ctx.obj) res = client.add_composite_algo(data) printer = printers.get_asset_printer(assets.COMPOSITE_ALGO, ctx.obj.output_format) printer.print(res, is_list=False)
def add_testtuple(ctx, objective_key, dataset_key, traintuple_key, data_samples, tag): """Add testtuple. The option --data-samples-path must point to a valid JSON file with the following schema: \b { "keys": list[str], } \b Where: - keys: list of data sample keys """ client = get_client(ctx.obj) data = { 'objective_key': objective_key, 'data_manager_key': dataset_key, 'traintuple_key': traintuple_key, } if data_samples: data['test_data_sample_keys'] = load_data_samples_keys(data_samples) if tag: data['tag'] = tag res = client.add_testtuple(data) printer = printers.get_asset_printer(assets.TESTTUPLE, ctx.obj.output_format) printer.print(res, is_list=False)
def add_traintuple(ctx, objective_key, algo_key, dataset_key, data_samples, in_models_keys, tag): """Add traintuple. The option --data-samples-path must point to a valid JSON file with the following schema: \b { "keys": list[str], } \b Where: - keys: list of data sample keys """ client = get_client(ctx.obj) data = { 'algo_key': algo_key, 'objective_key': objective_key, 'data_manager_key': dataset_key, } if data_samples: data['train_data_sample_keys'] = data_samples['keys'] if tag: data['tag'] = tag if in_models_keys: data['in_models_keys'] = in_models_keys res = client.add_traintuple(data) printer = printers.get_asset_printer(assets.TRAINTUPLE, ctx.obj.output_format) printer.print(res, is_list=False)
def add_dataset(ctx, data, objective_key): """Add dataset. The path must point to a valid JSON file with the following schema: \b { "name": str, "description": path, "type": str, "data_opener": path, "permissions": { "public": bool, "authorized_ids": list[str], }, } \b Where: - name: name of the dataset - description: path to a markdown file describing the dataset - type: short description of the type of data that will be attached to this dataset (common values are 'Images', 'Tabular', 'Time series', 'Spatial time series' and 'Hierarchical images') - data_opener: path to the opener python script - permissions: define asset access permissions """ client = get_client(ctx.obj) dict_append_to_optional_field(data, 'objective_keys', objective_key) res = client.add_dataset(data) printer = printers.get_asset_printer(assets.DATASET, ctx.obj.output_format) printer.print(res, is_list=False)
def cancel_compute_plan(ctx, compute_plan_id): """Cancel execution of a compute plan.""" client = get_client(ctx.obj) # method must exist in sdk res = client.cancel_compute_plan(compute_plan_id) printer = printers.get_asset_printer(assets.COMPUTE_PLAN, ctx.obj.output_format) printer.print(res, profile=ctx.obj.profile)
def add_composite_traintuple(ctx, algo_key, dataset_key, data_samples, head_model_key, trunk_model_key, out_trunk_model_permissions, tag, metadata): """Add composite traintuple. The option --data-samples-path must point to a valid JSON file with the following schema: \b { "keys": list[str], } \b Where: - keys: list of data sample keys The option --out-trunk-model-permissions-path must point to a valid JSON file with the following schema: \b { "authorized_ids": list[str], } """ if head_model_key and not trunk_model_key: raise click.BadOptionUsage( '--trunk-model-key', "The --trunk-model-key option is required when using " "--head-model-key.") if trunk_model_key and not head_model_key: raise click.BadOptionUsage( '--head-model-key', "The --head-model-key option is required when using " "--trunk-model-key.") client = get_client(ctx.obj) data = { 'algo_key': algo_key, 'data_manager_key': dataset_key, 'in_head_model_key': head_model_key, 'in_trunk_model_key': trunk_model_key, } if data_samples: data['train_data_sample_keys'] = load_data_samples_keys(data_samples) if out_trunk_model_permissions: data['out_trunk_model_permissions'] = out_trunk_model_permissions if tag: data['tag'] = tag if metadata: data['metadata'] = metadata res = client.add_composite_traintuple(data) printer = printers.get_asset_printer(assets.COMPOSITE_TRAINTUPLE, ctx.obj.output_format) printer.print(res, is_list=False)
def update_compute_plan(ctx, compute_plan_key, tuples, no_auto_batching, batch_size): """Update compute plan. The tuples path must point to a valid JSON file with the following schema: \b { "traintuples": list[{ "algo_key": str, "data_manager_key": str, "train_data_sample_keys": list[str], "traintuple_id": str, "in_models_ids": list[str], "tag": str, "metadata": dict, }], "composite_traintuples": list[{ "composite_traintuple_id": str, "algo_key": str, "data_manager_key": str, "train_data_sample_keys": list[str], "in_head_model_id": str, "in_trunk_model_id": str, "out_trunk_model_permissions": { "authorized_ids": list[str], }, "tag": str, "metadata": dict, }] "aggregatetuples": list[{ "aggregatetuple_id": str, "algo_key": str, "worker": str, "in_models_ids": list[str], "tag": str, "metadata": dict, }], "testtuples": list[{ "objective_key": str, "data_manager_key": str, "test_data_sample_keys": list[str], "traintuple_id": str, "tag": str, "metadata": dict, }] } Disable the auto batching to upload all the tuples of the compute plan at once. If the auto batching is enabled, change the `batch_size` to define the number of tuples uploaded in each batch (default 20). """ if no_auto_batching and batch_size: raise click.BadOptionUsage('--batch_size', "The --batch_size option cannot be used when using " "--no_auto_batching.") client = get_client(ctx.obj) res = client.update_compute_plan(compute_plan_key, tuples, not no_auto_batching, batch_size) printer = printers.get_asset_printer(assets.COMPUTE_PLAN, ctx.obj.output_format) printer.print(res, is_list=False)
def add_aggregatetuple(ctx, algo_key, in_models_keys, worker, rank, tag, metadata): """Add aggregatetuple.""" client = get_client(ctx.obj) data = { 'algo_key': algo_key, 'worker': worker, } if in_models_keys: data['in_models_keys'] = in_models_keys if rank is not None: data['rank'] = rank if tag: data['tag'] = tag if metadata: data['metadata'] = metadata key = client.add_aggregatetuple(data) res = ctx.obj.retry(client.get_aggregatetuple)(key) printer = printers.get_asset_printer(assets.AGGREGATETUPLE, ctx.obj.output_format) printer.print(res, is_list=False)
def list_(ctx, asset_name, filters, filters_logical_clause, advanced_filters): """List assets.""" client = get_client(ctx.obj) # method must exist in sdk method = getattr(client, f'list_{asset_name.lower()}') # handle filters if advanced_filters and filters: raise click.UsageError( 'The --filter and --advanced-filters options are mutually exclusive' ) elif filters: filters = list(filters) if filters_logical_clause == 'or': # insert 'OR' between each filter n = len(filters) for i in range(n - 1): filters.insert(i + 1, 'OR') elif advanced_filters: filters = advanced_filters res = method(filters) printer = printers.get_asset_printer(asset_name, ctx.obj.output_format) dict_res = [ result.dict(exclude_none=False, by_alias=True) for result in res ] printer.print(dict_res, is_list=True)
def add_compute_plan(ctx, data): """Add compute plan. The path must point to a valid JSON file with the following schema: \b { "traintuples": list[{ "algo_key": str, "data_manager_key": str, "train_data_sample_keys": list[str], "traintuple_id": str, "in_models_ids": list[str], "tag": str, "metadata": dict }], "composite_traintuples": list[{ "composite_traintuple_id": str, "algo_key": str, "data_manager_key": str, "train_data_sample_keys": list[str], "in_head_model_id": str, "in_trunk_model_id": str, "out_trunk_model_permissions": { "authorized_ids": list[str], }, "tag": str, "metadata": dict }] "aggregatetuples": list[{ "aggregatetuple_id": str, "algo_key": str, "worker": str, "in_models_ids": list[str], "tag": str, "metadata": dict }], "testtuples": list[{ "objective_key": str, "data_manager_key": str, "test_data_sample_keys": list[str], "traintuple_id": str, "tag": str, "metadata": dict }], "clean_models": bool, "tag": str, "metadata": dict } """ client = get_client(ctx.obj) res = client.add_compute_plan(data) printer = printers.get_asset_printer(assets.COMPUTE_PLAN, ctx.obj.output_format) printer.print(res, is_list=False)
def add_objective(ctx, data, dataset_key, data_samples): """Add objective. The path must point to a valid JSON file with the following schema: \b { "name": str, "description": path, "metrics_name": str, "metrics": path, "permissions": { "public": bool, "authorized_ids": list[str], }, "metadata": dict } \b Where: - name: name of the objective - description: path to a markdown file describing the objective - metrics_name: name of the metrics - metrics: path to tar.gz or zip archive containing the metrics python script and its Dockerfile - permissions: define asset access permissions The option --data-samples-path must point to a valid JSON file with the following schema: \b { "keys": list[str], } \b Where: - keys: list of test only data sample keys """ client = get_client(ctx.obj) if dataset_key: data['test_data_manager_key'] = dataset_key if data_samples: data['test_data_sample_keys'] = load_data_samples_keys(data_samples) key = client.add_objective(data) res = ctx.obj.retry(client.get_objective)(key) printer = printers.get_asset_printer(assets.OBJECTIVE, ctx.obj.output_format) printer.print(res, is_list=False)
def get(ctx, expand, asset_name, asset_key): """Get asset definition.""" expand_valid_assets = (assets.DATASET, assets.TRAINTUPLE, assets.OBJECTIVE, assets.TESTTUPLE, assets.COMPOSITE_TRAINTUPLE, assets.AGGREGATETUPLE, assets.COMPUTE_PLAN) if expand and asset_name not in expand_valid_assets: # fail fast raise click.UsageError( f'--expand option is available with assets {expand_valid_assets}') client = get_client(ctx.obj) # method must exist in sdk method = getattr(client, f'get_{asset_name.lower()}') res = method(asset_key) printer = printers.get_asset_printer(asset_name, ctx.obj.output_format) printer.print(res, profile=ctx.obj.profile, expand=expand)
def test_get_asset_printer(asset, output_format, printer_cls): assert isinstance(printers.get_asset_printer(asset, output_format), printer_cls)