def login(info): """Login with your username and password""" username = click.prompt('Please enter your email', type=str) password = click.prompt('Please enter your password', type=str, hide_input=True) # API instance config = ce_api.Configuration() config.host = constants.API_HOST api_instance = ce_api.LoginApi(ce_api.ApiClient(config)) output = api_call( func=api_instance.login_access_token_api_v1_login_access_token_post, username=username, password=password ) info[constants.ACTIVE_USER] = username declare('Login successful!') if username in info: info[username][constants.TOKEN] = output.access_token else: info[username] = {constants.TOKEN: output.access_token} info.save()
def create_datasource(info, name, ds_type, source, provider_id, args): """Create a datasource""" click.echo('Registering datasource {}...'.format(name)) parsed_args = parse_unknown_options(args) api = ce_api.DatasourcesApi(api_client(info)) p_api = ce_api.ProvidersApi(api_client(info)) p_list = api_call(p_api.get_loggedin_provider_api_v1_providers_get) p_uuid = find_closest_uuid(provider_id, p_list) ds = api_call( api.create_datasource_api_v1_datasources_post, DatasourceCreate( name=name, type=ds_type, source=source, provider_id=p_uuid, args=parsed_args, )) declare('Datasource registered with ID: {}'.format( format_uuid(ds.id)))
def pull_pipeline(info, pipeline_id, output_path, no_docs): """Copy the configuration of a registered pipeline""" p_api = ce_api.PipelinesApi(utils.api_client(info)) ws_api = ce_api.WorkspacesApi(utils.api_client(info)) active_user = info[constants.ACTIVE_USER] ws_id = info[active_user][constants.ACTIVE_WORKSPACE] all_ps = utils.api_call( ws_api.get_workspaces_pipelines_api_v1_workspaces_workspace_id_pipelines_get, ws_id) p_uuid = utils.find_closest_uuid(pipeline_id, all_ps) utils.declare('Pulling pipeline: {}'.format(utils.format_uuid(p_uuid))) pp = utils.api_call(p_api.get_pipeline_api_v1_pipelines_pipeline_id_get, pipeline_id=p_uuid) # Short term fix for these getting into the exp_config c = pp.pipeline_config if 'bq_args' in c: c.pop('bq_args') if 'ai_platform_training_args' in c: c.pop('ai_platform_training_args') utils.save_config(c, output_path, no_docs)
def list_backends(info, backend_class): """Lists all created backends""" b_api = ce_api.BackendsApi(api_client(info)) b_list = api_call(b_api.get_loggedin_backend_api_v1_backends_get) if backend_class: b_list = [b for b in b_list if b.backend_class == backend_class] declare('You have {count} different {class_}backend(s) so ' 'far. \n'.format(count=len(b_list), class_=backend_class + ' ' if backend_class else '')) if b_list: b_list = sorted(b_list, key=lambda b: b.backend_class) table = [] for b in b_list: table.append({ 'ID': format_uuid(b.id), 'Name': b.name, 'Backend Class': b.backend_class, 'Backend Type': b.type, 'Created At': b.created_at }) click.echo(tabulate(table, headers='keys', tablefmt='presto')) click.echo()
def list_pipelines(info, pipeline_id, ignore_empty): """List of registered pipelines""" utils.notice('Fetching pipeline(s). This might take a few seconds... \n') active_user = info[constants.ACTIVE_USER] ws = info[active_user][constants.ACTIVE_WORKSPACE] ws_api = ce_api.WorkspacesApi(utils.api_client(info)) p_api = ce_api.PipelinesApi(utils.api_client(info)) d_api = ce_api.DatasourcesApi(utils.api_client(info)) pipelines = utils.api_call( ws_api.get_workspaces_pipelines_api_v1_workspaces_workspace_id_pipelines_get, ws) if pipeline_id is not None: pipeline_id = utils.find_closest_uuid(pipeline_id, pipelines) pipelines.sort(key=lambda x: x.created_at) for p in pipelines: write_check = (len(p.pipeline_runs) > 0 or not ignore_empty) and \ (pipeline_id is None or pipeline_id == p.id) if write_check: # THIS WHOLE THING IS HERE FOR A REASON!!!!!! title = 'PIPELINE NAME: {} PIPELINE ID: {}'.format( p.name, utils.format_uuid(p.id)) utils.declare(title) utils.declare('-' * len(title)) if len(p.pipeline_runs) == 0: click.echo('No runs for this pipeline yet!') else: table = [] for r in p.pipeline_runs: author = utils.api_call( p_api.get_pipeline_run_user_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_user_get, p.id, r.id) # Resolve datasource ds_commit = utils.api_call( d_api.get_single_commit_api_v1_datasources_commits_commit_id_get, r.datasource_commit_id) ds = utils.api_call( d_api.get_datasource_api_v1_datasources_ds_id_get, ds_commit.datasource_id) table.append({ 'RUN ID': utils.format_uuid(r.id), 'TYPE': r.pipeline_run_type, 'CPUs PER WORKER': r.cpus_per_worker, 'WORKERS': r.workers, 'DATASOURCE': '{}_{}'.format( ds.name, utils.format_uuid(r.datasource_commit_id)), 'AUTHOR': author.email, 'CREATED AT': utils.format_date(r.start_time), }) click.echo(tabulate(table, headers='keys', tablefmt='plain')) click.echo('\n')
def set_provider(info, provider_id): user = info[constants.ACTIVE_USER] api = ce_api.ProvidersApi(api_client(info)) p_list = api_call(api.get_loggedin_provider_api_v1_providers_get) p_id = find_closest_uuid(provider_id, p_list) info[user][constants.ACTIVE_PROVIDER] = p_id info.save() declare('Active provider set to id: {id}'.format(id=format_uuid(p_id)))
def commit_datasource(ctx, info, datasource_id, message, schema, orchestration_backend, orchestration_args, processing_backend, processing_args, force): """Creates a commit for a datasource""" api = ce_api.DatasourcesApi(api_client(info)) if not force: confirmation('Committing will trigger a pipeline that will create a ' 'snapshot of your datasources current state. ' 'This might take a while. ' 'Are you sure you wish to continue?', abort=True) # find closest, this a heavy call for now all_ds = api_call(api.get_datasources_api_v1_datasources_get) ds_uuid = find_closest_uuid(datasource_id, all_ds) if schema: try: with open(schema, 'rt', encoding='utf8') as f: schema_dict = yaml.load(f) except: error('Badly formatted YAML!') schema_dict = dict() else: schema_dict = dict() commit = api_call( api.create_datasource_commit_api_v1_datasources_ds_id_commits_post, DatasourceCommitCreate( message=message, used_schema=schema_dict, orchestration_backend=orchestration_backend, orchestration_args=orchestration_args, processing_backend=processing_backend, processing_args=processing_args, ), ds_id=ds_uuid, ) declare('Commit successful: {}'.format(format_uuid(commit.id))) active_commit = '{datasource_id}:{commit_id}'.format(datasource_id=ds_uuid, commit_id=commit.id) user = info[constants.ACTIVE_USER] info[user][constants.ACTIVE_DATASOURCE_COMMIT] = active_commit info.save() declare('Active datasource commit set to: {}'.format( format_uuid(active_commit)))
def reset_password(info): """Send reset password link to registered email address""" confirmation('Are you sure you want to reset your password? This will ' 'trigger an email for resetting your password and ' 'clear cookies.', abort=True) check_login_status(info) api = ce_api.UsersApi(api_client(info)) user = api_call(api.get_loggedin_user_api_v1_users_me_get) api = ce_api.LoginApi(api_client(info)) api_call(api.send_reset_pass_email_api_v1_login_email_resetpassword_post, AuthEmail(email=user.email)) info[constants.ACTIVE_USER] = None info.save() declare("Reset password email sent to {}".format(user.email))
def set_workspace(info, workspace_id): """Set workspace to be active""" user = info[constants.ACTIVE_USER] api = ce_api.WorkspacesApi(api_client(info)) all_ws = api_call(api.get_loggedin_workspaces_api_v1_workspaces_get) ws_uuid = find_closest_uuid(workspace_id, all_ws) api_call(api.get_workspace_api_v1_workspaces_workspace_id_get, ws_uuid) info[user][constants.ACTIVE_WORKSPACE] = ws_uuid info.save() declare('Active workspace set to id: {id}'.format(id=format_uuid( ws_uuid)))
def set_datasource(info, source_id): ds_id, c_id = resolve_datasource_commits(info, source_id) """Set datasource to be active""" active_commit = '{datasource_id}:{commit_id}'.format( datasource_id=ds_id, commit_id=c_id) user = info[constants.ACTIVE_USER] info[user][constants.ACTIVE_DATASOURCE_COMMIT] = active_commit info.save() declare('Active datasource commit set to: {}'.format( ':'.join([format_uuid(x) for x in active_commit.split(':')]) ))
def list_functions(info): """List the given custom functions""" api = ce_api.FunctionsApi(api_client(info)) f_list = api_call(api.get_functions_api_v1_functions_get) declare('You have declared {count} different ' 'function(s) so far. \n'.format(count=len(f_list))) if f_list: table = [] for f in f_list: table.append({'ID': format_uuid(f.id), 'Name': f.name, 'Type': f.function_type, 'Created At': f.created_at}) click.echo(tabulate(table, headers='keys', tablefmt='presto')) click.echo()
def create_workspace(ctx, info, provider_id, name): """Create a workspace and set it to be active.""" click.echo('Registering the workspace "{}"...'.format(name)) w_api = ce_api.WorkspacesApi(api_client(info)) p_api = ce_api.ProvidersApi(api_client(info)) p_list = api_call(p_api.get_loggedin_provider_api_v1_providers_get) p_uuid = find_closest_uuid(provider_id, p_list) ws = api_call(w_api.create_workspace_api_v1_workspaces_post, WorkspaceCreate(name=name, provider_id=p_uuid)) declare('Workspace registered.') ctx.invoke(set_workspace, workspace_id=ws.id)
def create_function(info, local_path, name, func_type, udf_name, message): """Register a custom function to use with the Core Engine""" click.echo('Registering the function {}.'.format(udf_name)) with open(local_path, 'rb') as file: data = file.read() encoded_file = base64.b64encode(data).decode() api = ce_api.FunctionsApi(api_client(info)) api_call(api.create_function_api_v1_functions_post, FunctionCreate(name=name, function_type=func_type, udf_path=udf_name, message=message, file_contents=encoded_file)) declare('Function registered.')
def list_datasources(info): """List of all the available datasources""" user = info[constants.ACTIVE_USER] if constants.ACTIVE_DATASOURCE_COMMIT in info[user]: active_dc = info[user][constants.ACTIVE_DATASOURCE_COMMIT] active_dc = active_dc.split(':')[1] else: active_dc = None api = ce_api.DatasourcesApi(api_client(info)) ds_list = api_call(api.get_datasources_api_v1_datasources_get) declare('You have created {count} different ' 'datasource(s).\n'.format(count=len(ds_list))) declare("Use 'cengine datasource commits DATASOURCE_ID' see commits of " "any datasource.\n") if ds_list: table = [] for ds in ds_list: dcs = [x.id for x in ds.datasource_commits] status = 'No Commit' latest_created_at = 'No Commit' if len(dcs) != 0: latest = min(ds.datasource_commits, key=attrgetter('created_at')) latest_created_at = format_date(latest.created_at) latest_n_bytes = latest.n_bytes if latest else '' latest_n_datapoints = latest.n_datapoints if latest else '' latest_n_features = latest.n_features if latest else '' table.append({'Selection': '*' if active_dc in dcs else '', 'ID': format_uuid(ds.id), 'Name': ds.name, 'Type': ds.type, '# Commits': len(ds.datasource_commits), 'Latest Commit Status': status, 'Latest Commit Date': latest_created_at, 'Latest Commit Bytes': latest_n_bytes, 'Latest Commit # Datapoints': latest_n_datapoints, 'Latest Commit # Features': latest_n_features }) click.echo(tabulate(table, headers='keys', tablefmt='presto')) click.echo()
def peek_datasource(info, source_id, sample_size): """Randomly sample datasource and print to console.""" api = ce_api.DatasourcesApi(api_client(info)) ds_id, c_id = resolve_datasource_commits(info, source_id) declare('Randomly generating {} samples from datasource {}:{}'.format( sample_size, format_uuid(ds_id), format_uuid(c_id) )) data = api_call( api.get_datasource_commit_data_sample_api_v1_datasources_ds_id_commits_commit_id_data_get, ds_id=ds_id, commit_id=c_id, sample_size=sample_size) click.echo(tabulate(data, headers='keys', tablefmt='plain'))
def list_datasource_commits(info, datasource_id): """List of all the available datasources""" api = ce_api.DatasourcesApi(api_client(info)) # find closest, this a heavy call for now all_ds = api_call(api.get_datasources_api_v1_datasources_get) ds_uuid = find_closest_uuid(datasource_id, all_ds) ds = api_call( api.get_datasource_api_v1_datasources_ds_id_get, ds_id=ds_uuid) declare('There are {count} different commits for datasource {name}' '.\n'.format(count=len(ds.datasource_commits), name=ds.name)) user = info[constants.ACTIVE_USER] if constants.ACTIVE_DATASOURCE_COMMIT in info[user]: _, c_id = info[user][constants.ACTIVE_DATASOURCE_COMMIT].split(':') else: c_id = None if ds.datasource_commits: table = [] for commit in ds.datasource_commits: status = api_call( api.get_datasource_commit_status_api_v1_datasources_ds_id_commits_commit_id_status_get, ds.id, commit.id, ) table.append({ 'Selection': '*' if commit.id == c_id else '', 'ID': format_uuid(commit.id), 'Created At': format_date(commit.created_at), 'Status': status, 'Message': commit.message, 'Bytes': commit.n_bytes, '# Datapoints': commit.n_datapoints, '# Features': commit.n_features }) click.echo(tabulate(table, headers='keys', tablefmt='presto')) click.echo()
def model_pipeline(info, pipeline_, output_path): """Download the trained model to a specified location""" if os.path.exists(output_path) and os.path.isdir(output_path): if not [f for f in os.listdir(output_path) if not f.startswith('.')] == []: utils.error("Output path must be an empty directory!") if os.path.exists(output_path) and not os.path.isdir(output_path): utils.error("Output path must be an empty directory!") if not os.path.exists(output_path): "Creating directory {}..".format(output_path) p_uuid, r_uuid = utils.resolve_pipeline_runs(info, pipeline_) utils.notice('Downloading the trained model from pipeline run ' 'ID {}. This might take some time if the model ' 'resources are significantly large in size.\nYour patience ' 'is much appreciated!'.format(utils.format_uuid(r_uuid))) api = ce_api.PipelinesApi(utils.api_client(info)) artifact = utils.api_call( api.get_pipeline_artifacts_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_artifacts_component_type_get, pipeline_id=p_uuid, pipeline_run_id=r_uuid, component_type=GDPComponent.Deployer.name) spin = utils.Spinner() spin.start() if len(artifact) == 1: utils.download_artifact(artifact_json=artifact[0].to_dict(), path=output_path) spin.stop() else: utils.error('Something unexpected happened! Please contact ' '[email protected] to get further information.') utils.declare('Model downloaded to: {}'.format(output_path)) # TODO: [LOW] Make the Tensorflow version more dynamic utils.declare('Please note that the model is saved as a SavedModel ' 'Tensorflow artifact, trained on Tensoflow 2.1.0.')
def list_versions(info, function_id): """List of versions for a selected custom function""" api = ce_api.FunctionsApi(api_client(info)) f_list = api_call(api.get_functions_api_v1_functions_get) f_uuid = find_closest_uuid(function_id, f_list) v_list = api_call( api.get_function_versions_api_v1_functions_function_id_versions_get, f_uuid) declare('Function with {id} has {count} ' 'versions.\n'.format(id=format_uuid(function_id), count=len(v_list))) if v_list: table = [] for v in v_list: table.append({'ID': format_uuid(v.id), 'Created At': v.created_at, 'Description': v.message}) click.echo(tabulate(table, headers='keys', tablefmt='presto')) click.echo()
def update_function(info, function_id, local_path, udf_name, message): """Add a new version to a function and update it""" click.echo('Updating the function {}.'.format( format_uuid(function_id))) api = ce_api.FunctionsApi(api_client(info)) f_list = api_call(api.get_functions_api_v1_functions_get) f_uuid = find_closest_uuid(function_id, f_list) with open(local_path, 'rb') as file: data = file.read() encoded_file = base64.b64encode(data).decode() api_call( api.create_function_version_api_v1_functions_function_id_versions_post, FunctionVersionCreate(udf_path=udf_name, message=message, file_contents=encoded_file), f_uuid) declare('Function updated!')
def list_providers(info): p_api = ce_api.ProvidersApi(api_client(info)) p_list = api_call(p_api.get_loggedin_provider_api_v1_providers_get) declare('You have {count} different providers(s) so ' 'far. \n'.format(count=len(p_list))) user = info[constants.ACTIVE_USER] if constants.ACTIVE_PROVIDER in info[user]: active_p = info[user][constants.ACTIVE_PROVIDER] else: active_p = None if p_list: table = [] for p in p_list: table.append({'Selection': '*' if p.id == active_p else '', 'ID': format_uuid(p.id), 'Name': p.name, 'Type': p.type, 'Created At': p.created_at}) click.echo(tabulate(table, headers='keys', tablefmt='presto')) click.echo()
def list_workspaces(info): """List of all workspaces available to the user""" user = info[constants.ACTIVE_USER] api = ce_api.WorkspacesApi(api_client(info)) ws_list = api_call(api.get_loggedin_workspaces_api_v1_workspaces_get) if constants.ACTIVE_WORKSPACE in info[user]: active_w = info[user][constants.ACTIVE_WORKSPACE] else: active_w = None declare('You have created {count} different ' 'workspace(s). \n'.format(count=len(ws_list))) if ws_list: table = [] for w in ws_list: table.append({'Selection': '*' if w.id == active_w else '', 'ID': format_uuid(w.id), 'Name': w.name, 'Provider': format_uuid(w.provider_id)}) click.echo(tabulate(table, headers='keys', tablefmt='presto')) click.echo()
def push_pipeline(info, config_path, pipeline_name): """Register a pipeline with the selected configuration""" active_user = info[constants.ACTIVE_USER] ws_id = info[active_user][constants.ACTIVE_WORKSPACE] try: with open(config_path, 'rt', encoding='utf8') as f: config = yaml.load(f) except: utils.error('Badly formatted YAML!') api = ce_api.PipelinesApi(utils.api_client(info)) p = utils.api_call(api.create_pipeline_api_v1_pipelines_post, PipelineCreate(name=pipeline_name, pipeline_config=config, workspace_id=ws_id)) utils.declare('Pipeline pushed successfully!'.format( id=utils.format_uuid(p.id))) utils.declare( "Use `cengine pipeline train {} --datasource DS_COMMIT` " "to launch a training pipeline!".format(utils.format_uuid(p.id)))
def pull_function_version(info, function_id, version_id, output_path): """Download a version of a given custom function""" api = ce_api.FunctionsApi(api_client(info)) # Infer the function uuid and name f_list = api_call(api.get_functions_api_v1_functions_get) f_uuid = find_closest_uuid(function_id, f_list) f_name = [f.name for f in f_list if f.id == f_uuid][0] # Infer the version uuid v_list = api_call( api.get_function_versions_api_v1_functions_function_id_versions_get, f_uuid) v_uuid = find_closest_uuid(version_id, v_list) notice('Downloading the function with the following parameters: \n' 'Name: {f_name}\n' 'function_id: {f_id}\n' 'version_id: {v_id}\n'.format(f_name=f_name, f_id=format_uuid(f_uuid), v_id=format_uuid(v_uuid))) # Get the file and write it to the output path encoded_file = api_call( api.get_function_version_api_v1_functions_function_id_versions_version_id_get, f_uuid, v_uuid) # Derive the output path and download if output_path is None: output_path = os.path.join(os.getcwd(), '{}@{}.py'.format(f_name, v_uuid)) with open(output_path, 'wb') as f: f.write(base64.b64decode(encoded_file.file_contents)) declare('File downloaded to {}'.format(output_path))
def get_pipeline_status(info, pipeline_id): """Get status of started pipelines""" utils.notice('Fetching pipeline(s). This might take a few seconds.. \n') active_user = info[constants.ACTIVE_USER] ws = info[active_user][constants.ACTIVE_WORKSPACE] ws_api = ce_api.WorkspacesApi(utils.api_client(info)) p_api = ce_api.PipelinesApi(utils.api_client(info)) d_api = ce_api.DatasourcesApi(utils.api_client(info)) pipelines = utils.api_call( ws_api.get_workspaces_pipelines_api_v1_workspaces_workspace_id_pipelines_get, ws) if pipeline_id is not None: pipeline_id = utils.find_closest_uuid(pipeline_id, pipelines) pipelines.sort(key=lambda x: x.created_at) for p in pipelines: write_check = (len(p.pipeline_runs) > 0) and \ (pipeline_id is None or pipeline_id == p.id) if write_check: title = 'PIPELINE NAME: {} PIPELINE ID: {}'.format( p.name, utils.format_uuid(p.id)) utils.declare(title) utils.declare('-' * len(title)) table = [] for r in p.pipeline_runs: run = utils.api_call( p_api.get_pipeline_run_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_get, p.id, r.id) # Resolve datasource ds_commit = utils.api_call( d_api.get_single_commit_api_v1_datasources_commits_commit_id_get, r.datasource_commit_id) ds = utils.api_call( d_api.get_datasource_api_v1_datasources_ds_id_get, ds_commit.datasource_id) if run.end_time: td = run.end_time - run.start_time else: td = datetime.now(timezone.utc) - run.start_time # # Resolve component status # stage = utils.get_run_stage(run.pipeline_components) table.append({ 'RUN ID': utils.format_uuid(run.id), 'TYPE': run.pipeline_run_type, 'STATUS': run.status, # 'STAGE': stage, 'DATASOURCE': '{}_{}'.format( ds.name, utils.format_uuid(run.datasource_commit_id)), 'DATAPOINTS': '{}'.format(ds_commit.n_datapoints), # 'RUNNING STAGE': stage, 'START TIME': utils.format_date(run.start_time), 'DURATION': utils.format_timedelta(td), }) click.echo(tabulate(table, headers='keys', tablefmt='plain')) click.echo('\n')