def put(self, template_id): parser = reqparse.RequestParser() parser.add_argument('name', required=True, help='name is empty') parser.add_argument('comment') parser.add_argument('config', type=dict, required=True, help='config is empty') data = parser.parse_args() name = data['name'] comment = data['comment'] config = data['config'] tmp = WorkflowTemplate.query.filter_by(name=name).first() if tmp is not None and tmp.id != template_id: raise ResourceConflictException( 'Workflow template {} already exists'.format(name)) template = WorkflowTemplate.query.filter_by(id=template_id).first() if template is None: raise NotFoundException() template_proto = _check_config(config) template.set_config(template_proto) template.name = name template.comment = comment template.group_alias = template_proto.group_alias template.is_left = template_proto.is_left db.session.commit() return {'data': template.to_dict()}, HTTPStatus.OK
def get(self, workflow_uuid, participant_id, job_name): parser = reqparse.RequestParser() parser.add_argument('start_time', type=int, location='args', required=False, help='project_id must be timestamp') parser.add_argument('max_lines', type=int, location='args', required=True, help='max_lines is required') data = parser.parse_args() start_time = data['start_time'] max_lines = data['max_lines'] workflow = Workflow.query.filter_by(uuid=workflow_uuid).first() if workflow is None: raise NotFoundException() if start_time is None: start_time = workflow.start_at project_config = workflow.project.get_config() party = project_config.participants[participant_id] client = RpcClient(project_config, party) resp = client.get_job_events(job_name=job_name, start_time=start_time, max_lines=max_lines) if resp.status.code != common_pb2.STATUS_SUCCESS: raise InternalException(resp.status.msg) peer_events = MessageToDict(resp.logs, preserving_proto_field_name=True, including_default_value_fields=True) return {'data': peer_events}
def delete(self, template_id): result = WorkflowTemplate.query.filter_by(id=template_id) if result.first() is None: raise NotFoundException() result.delete() db.session.commit() return {'data': {}}, HTTPStatus.OK
def patch(self, project_id): project = Project.query.filter_by(id=project_id).first() if project is None: raise NotFoundException() config = project.get_config() if request.json.get('token') is not None: new_token = request.json.get('token') config.token = new_token project.token = new_token if request.json.get('variables') is not None: del config.variables[:] config.variables.extend([ ParseDict(variable, Variable()) for variable in request.json.get('variables') ]) custom_host = None egress_url = 'fedlearner-stack-ingress-nginx-controller.default'\ '.svc.cluster.local:80' for variable in config.variables: if variable.name == 'CUSTOM_HOST': custom_host = variable.value if variable.name == 'EGRESS_URL': egress_url = variable.value for participant in config.participants: participant.grpc_spec.egress_url = egress_url # TODO: update add-on's custom_host project.set_config(config) if request.json.get('comment') is not None: project.comment = request.json.get('comment') try: db.session.commit() except Exception as e: raise InvalidArgumentException(details=e) return {'data': project.to_dict()}
def delete_sparkapplication(self, name: str, namespace: str = SPARKOPERATOR_NAMESPACE ) -> dict: """ delete sparkapp Args: name (str): sparkapp name namespace (str, optional): namespace to delete. Raises: ApiException Returns: dict: resp of k8s """ try: return self.crds.delete_namespaced_custom_object( group=SPARKOPERATOR_CUSTOM_GROUP, version=SPARKOPERATOR_CUSTOM_VERSION, namespace=namespace, plural=CrdKind.SPARK_APPLICATION.value, name=name, body=client.V1DeleteOptions()) except ApiException as err: if err.status == 404: raise NotFoundException() raise InternalException(details=err.body)
def get(self, dataset_id): with db.session_scope() as session: dataset = session.query(Dataset).get(dataset_id) if dataset is None: raise NotFoundException( f'Failed to find dataset: {dataset_id}') return {'data': dataset.to_dict()}
def delete(self, model_id): with db_handler.session_scope() as session: model = ModelService(session).drop(model_id) if not model: raise NotFoundException( f'Failed to find model: {model_id}') return {'data': model.to_dict()}, HTTPStatus.OK
def post(self): parser = reqparse.RequestParser() parser.add_argument('username', required=True, help='username is empty') parser.add_argument('password', required=True, help='password is empty') data = parser.parse_args() username = data['username'] password = base64decode(data['password']) user = User.query.filter_by(username=username).filter_by( state=State.ACTIVE).first() if user is None: raise NotFoundException(f'Failed to find user: {username}') if not user.verify_password(password): raise UnauthorizedException('Invalid password') token = create_access_token(identity=username) decoded_token = decode_token(token) session = Session(jti=decoded_token.get('jti'), expired_at=datetime.datetime.fromtimestamp( decoded_token.get('exp'))) db.session.add(session) db.session.commit() return { 'data': { 'user': user.to_dict(), 'access_token': token } }, HTTPStatus.OK
def plot_data_join_metrics(self, num_buckets=30): res = es.query_data_join_metrics(self._job.name, num_buckets) if not res['aggregations']['OVERALL']['buckets']: raise NotFoundException() metrics = [] # plot pie chart for overall join rate overall = res['aggregations']['OVERALL']['buckets'][0] labels = ['joined', 'fake', 'unjoined'] sizes = [ overall['JOINED']['doc_count'], overall['FAKE']['doc_count'], overall['UNJOINED']['value'] ] fig = Figure() ax = fig.add_subplot(111) ax.pie(sizes, labels=labels, autopct='%1.1f%%') metrics.append(mpld3.fig_to_dict(fig)) # plot stackplot for event time by_et = res['aggregations']['EVENT_TIME']['buckets'] et_index = [self._to_datetime(buck['key']) for buck in by_et] et_joined = [buck['JOINED']['doc_count'] for buck in by_et] et_faked = [buck['FAKE']['doc_count'] for buck in by_et] et_unjoined = [buck['UNJOINED']['value'] for buck in by_et] fig = Figure() ax = fig.add_subplot(111) ax.stackplot(et_index, et_joined, et_faked, et_unjoined, labels=labels) twin_ax = ax.twinx() twin_ax.patch.set_alpha(0.0) et_rate = [buck['JOIN_RATE']['value'] for buck in by_et] twin_ax.plot(et_index, et_rate, label='join rate', color='black') ax.xaxis_date() ax.legend() metrics.append(mpld3.fig_to_dict(fig)) # plot processing time vs event time by_pt = res['aggregations']['PROCESS_TIME']['buckets'] pt_index = [self._to_datetime(buck['key']) for buck in by_pt] pt_min = [ self._to_datetime(buck['MIN_EVENT_TIME']['value']) \ for buck in by_pt] pt_max = [ self._to_datetime(buck['MAX_EVENT_TIME']['value']) \ for buck in by_pt] fig = Figure() ax = fig.add_subplot(111) ax.plot(pt_index, pt_min, label='min event time') ax.plot(pt_index, pt_max, label='max event time') ax.xaxis_date() ax.yaxis_date() ax.legend() metrics.append(mpld3.fig_to_dict(fig)) return metrics
def get(self, model_id): detail_level = request.args.get('detail_level', '') with db_handler.session_scope() as session: model_json = ModelService(session).query(model_id, detail_level) if not model_json: raise NotFoundException( f'Failed to find model: {model_id}') return {'data': model_json}, HTTPStatus.OK
def get(self, workflow_uuid, participant_id, job_name): parser = reqparse.RequestParser() parser.add_argument('type', type=str, location='args', required=True, choices=('Ratio', 'Numeric'), help='Visualization type is required. Choices: ' 'Rate, Ratio, Numeric, Time, Timer') parser.add_argument('interval', type=str, location='args', default='', help='Time bucket interval length, ' 'defaults to be automated by Kibana.') parser.add_argument('x_axis_field', type=str, location='args', default='tags.event_time', help='Time field (X axis) is required.') parser.add_argument('query', type=str, location='args', help='Additional query string to the graph.') parser.add_argument('start_time', type=int, location='args', default=-1, help='Earliest <x_axis_field> time of data.' 'Unix timestamp in secs.') parser.add_argument('end_time', type=int, location='args', default=-1, help='Latest <x_axis_field> time of data.' 'Unix timestamp in secs.') # Ratio visualization parser.add_argument('numerator', type=str, location='args', help='Numerator is required in Ratio ' 'visualization. ' 'A query string similar to args::query.') parser.add_argument('denominator', type=str, location='args', help='Denominator is required in Ratio ' 'visualization. ' 'A query string similar to args::query.') # Numeric visualization parser.add_argument('aggregator', type=str, location='args', default='Average', choices=('Average', 'Sum', 'Max', 'Min', 'Variance', 'Std. Deviation', 'Sum of Squares'), help='Aggregator type is required in Numeric and ' 'Timer visualization.') parser.add_argument('value_field', type=str, location='args', help='The field to be aggregated on is required ' 'in Numeric visualization.') args = parser.parse_args() workflow = Workflow.query.filter_by(uuid=workflow_uuid).first() if workflow is None: raise NotFoundException( f'Failed to find workflow: {workflow_uuid}') project_config = workflow.project.get_config() party = project_config.participants[participant_id] client = RpcClient(project_config, party) resp = client.get_job_kibana(job_name, json.dumps(args)) if resp.status.code != common_pb2.STATUS_SUCCESS: raise InternalException(resp.status.msg) metrics = json.loads(resp.metrics) # metrics is a list of 2-element lists, # each 2-element list is a [x, y] pair. return {'data': metrics}
def get(self, dataset_id: int): if dataset_id <= 0: raise NotFoundException(f'Failed to find dataset: {dataset_id}') name = request.args.get('name', None) if not name: raise InvalidArgumentException(f'required params name') with db.session_scope() as session: data = DatasetService(session).feature_metrics(name, dataset_id) return {'data': data}
def put(self, model_id): with db_handler.session_scope() as session: model = session.query(Model).filter_by(id=model_id).one_or_none() if not model: raise NotFoundException( f'Failed to find model: {model_id}') model.extra = request.args.get('extra', model.extra) session.commit() return {'data': model.to_dict()}, HTTPStatus.OK
def get(self, job_id, pod_name): job = Job.query.filter_by(job=job_id).first() if job is None: raise NotFoundException() k8s = get_client() base = k8s.get_base_url() container_id = k8s.get_webshell_session(job.project.get_namespace(), pod_name, 'tensorflow') return {'data': {'id': container_id, 'base': base}}
def get(self, job_id): job = Job.query.filter_by(id=job_id).first() if job is None: raise NotFoundException() metrics = JobMetricsBuilder(job).plot_metrics() # Metrics is a list of dict. Each dict can be rendered by frontend with # mpld3.draw_figure('figure1', json) return {'data': metrics}
def patch(self, group_id): group = ModelGroup.query.filter_by(id=group_id).one_or_none() if not group: raise NotFoundException( f'Failed to find group: {group_id}') group.name = request.args.get('name', group.name) group.extra = request.args.get('extra', group.extra) with db_handler.session_scope() as session: session.add(group) session.commit() return {'data': group.to_dict()}, HTTPStatus.OK
def post(self, project_id): project = Project.query.filter_by(id=project_id).first() if project is None: raise NotFoundException() success = True details = [] # TODO: Concurrently check for participant in project.get_config().participants: result = self.check_connection(project.get_config(), participant) success = success & (result.code == StatusCode.STATUS_SUCCESS) if result.code != StatusCode.STATUS_SUCCESS: details.append(result.msg) return {'data': {'success': success, 'details': details}}
def post(self, dataset_id: int): parser = reqparse.RequestParser() parser.add_argument('event_time', type=int) parser.add_argument('files', required=True, type=list, location='json', help=_FORMAT_ERROR_MESSAGE.format('files')) parser.add_argument('move', type=bool) parser.add_argument('comment', type=str) body = parser.parse_args() event_time = body.get('event_time') files = body.get('files') move = body.get('move', False) comment = body.get('comment') dataset = Dataset.query.filter_by(id=dataset_id).first() if dataset is None: raise NotFoundException() if event_time is None and dataset.type == DatasetType.STREAMING: raise InvalidArgumentException( details='data_batch.event_time is empty') # TODO: PSI dataset should not allow multi batches # Create batch batch = DataBatch( dataset_id=dataset.id, # Use current timestamp to fill when type is PSI event_time=datetime.datetime.fromtimestamp( event_time or datetime.datetime.now().timestamp()), comment=comment, state=BatchState.NEW, move=move, ) batch_details = dataset_pb2.DataBatch() root_dir = current_app.config.get('STORAGE_ROOT') batch_folder_name = batch.event_time.strftime('%Y%m%d%H%M%S') for file_path in files: file = batch_details.files.add() file.source_path = file_path file_name = file_path.split('/')[-1] file.destination_path = f'{root_dir}/dataset/{dataset.id}' \ f'/batch/{batch_folder_name}/{file_name}' batch.set_details(batch_details) db.session.add(batch) db.session.commit() db.session.refresh(batch) scheduler.wakeup(data_batch_ids=[batch.id]) return {'data': batch.to_dict()}
def _get_match_templates(self, workflow_id): """ find templates which match the peer's config. """ workflow = Workflow.query.filter_by(id=workflow_id).first() if workflow is None: raise NotFoundException() templates = WorkflowTemplate.query.filter_by( group_alias=workflow.group_alias) result = [] for template in templates: if check_group_match(template.get_config(), workflow.get_peer_config): result.append(template.to_dict()) return {'data': result}, HTTPStatus.OK
def get(self, workflow_id, participant_id, job_name): workflow = Workflow.query.filter_by(id=workflow_id).first() if workflow is None: raise NotFoundException() project_config = workflow.project.get_config() party = project_config.participants[participant_id] client = RpcClient(project_config, party) resp = client.get_job_metrics(workflow.name, job_name) if resp.status.code != common_pb2.STATUS_SUCCESS: raise InternalException(resp.status.msg) metrics = json.loads(resp.metrics) # Metrics is a list of dict. Each dict can be rendered by frontend with # mpld3.draw_figure('figure1', json) return {'data': metrics}
def patch(self, project_id): project = Project.query.filter_by(id=project_id).first() if project is None: raise NotFoundException(f'Failed to find project: {project_id}') config = project.get_config() if request.json.get('token') is not None: new_token = request.json.get('token') config.token = new_token project.token = new_token if request.json.get('variables') is not None: del config.variables[:] config.variables.extend([ ParseDict(variable, Variable()) for variable in request.json.get('variables') ]) # exact configuration from variables grpc_ssl_server_host = None egress_host = None for variable in config.variables: if variable.name == 'GRPC_SSL_SERVER_HOST': grpc_ssl_server_host = variable.value if variable.name == 'EGRESS_HOST': egress_host = variable.value if request.json.get('participant_name'): config.participants[0].name = request.json.get('participant_name') if request.json.get('comment'): project.comment = request.json.get('comment') for participant in config.participants: if participant.domain_name in\ project.get_certificate().domain_name_to_cert.keys(): _create_add_on( participant, project.get_certificate().domain_name_to_cert[ participant.domain_name], grpc_ssl_server_host) if egress_host: participant.grpc_spec.authority = egress_host project.set_config(config) try: db.session.commit() except Exception as e: raise InvalidArgumentException(details=e) return {'data': project.to_dict()}
def post(self): parser = reqparse.RequestParser() parser.add_argument('dataset_id', type=int, required=True, help=_FORMAT_ERROR_MESSAGE.format('dataset_id')) parser.add_argument('event_time', type=int) parser.add_argument('files', required=True, type=str, action='append', help=_FORMAT_ERROR_MESSAGE.format('files')) parser.add_argument('move', type=bool) parser.add_argument('comment', type=str) body = parser.parse_args() dataset_id = body.get('dataset_id') event_time = body.get('event_time') files = body.get('files') move = body.get('move', False) comment = body.get('comment') dataset = Dataset.query.filter_by(id=dataset_id).first() if dataset is None: raise NotFoundException() if dataset.external_storage_path: raise InvalidArgumentException( details='Cannot import into dataset for scanning') if event_time is None and dataset.type == DatasetType.STREAMING: raise InvalidArgumentException( details='data_batch.event_time is empty') # Create batch batch = DataBatch() # Use current timestamp to fill when type is PSI batch.event_time = datetime.datetime.fromtimestamp( event_time or datetime.datetime.now().timestamp()) batch.dataset_id = dataset.id batch.comment = comment batch.state = BatchState.IMPORTING batch.num_file = len(files) batch.set_source(ParseDict({'files': files}, DatasetSource())) # TODO: Call scheduler to import db.session.add(batch) db.session.commit() return {'data': batch.to_dict()}
def get(self, template_id): download = request.args.get('download', 'false') == 'true' template = WorkflowTemplate.query.filter_by(id=template_id).first() if template is None: raise NotFoundException() result = template.to_dict() if download: in_memory_file = io.BytesIO() in_memory_file.write(json.dumps(result).encode('utf-8')) in_memory_file.seek(0) return send_file(in_memory_file, as_attachment=True, attachment_filename=f'{template.name}.json', mimetype='application/json; charset=UTF-8', cache_timeout=0) return {'data': result}, HTTPStatus.OK
def plot_nn_metrics(self, num_buckets=30): res = es.query_nn_metrics(self._job.name, num_buckets) if not res['aggregations']['PROCESS_TIME']['buckets']: raise NotFoundException() buckets = res['aggregations']['PROCESS_TIME']['buckets'] time = [self._to_datetime(buck['key']) for buck in buckets] metrics = [] # plot auc curve auc = [buck['AUC']['AUC']['value'] for buck in buckets] fig = Figure() ax = fig.add_subplot(111) ax.plot(time, auc, label='auc') ax.legend() metrics.append(mpld3.fig_to_dict(fig)) return metrics
def test_not_found_exception(self): exception1 = NotFoundException('User A not found.') self.assertEqual(exception1.status_code, HTTPStatus.NOT_FOUND) self.assertEqual(exception1.to_dict(), { 'code': 404, 'message': 'User A not found.', }) exception2 = NotFoundException() self.assertEqual(exception2.status_code, HTTPStatus.NOT_FOUND) self.assertEqual(exception2.to_dict(), { 'code': 404, 'message': 'Resource not found.', })
def get_dataset_preview(self, dataset_id: int = 0) -> dict: dataset = self._session.query(Dataset).filter( Dataset.id == dataset_id).first() if not dataset: raise NotFoundException(f'Failed to find dataset: {dataset_id}') dataset_path = dataset.path # meta is generated from sparkapp/pipeline/analyzer.py meta_path = dataset_meta_path(dataset_path) # data format: # { # 'dtypes': { # 'f01': 'bigint' # }, # 'samples': [ # [1], # [0], # ], # 'metrics': { # 'f01': { # 'count': '2', # 'mean': '0.0015716767309123998', # 'stddev': '0.03961485047808605', # 'min': '0', # 'max': '1', # 'missing_count': '0' # } # } # } val = {} try: val = json.loads(self._file_manager.read(meta_path)) except Exception as e: # pylint: disable=broad-except logging.info( f'failed to read meta file, path: {meta_path}, err: {e}') return {} # feature is generated from sparkapp/pipeline/analyzer.py feature_path = dataset_features_path(dataset_path) try: val['metrics'] = json.loads(self._file_manager.read(feature_path)) except Exception as e: # pylint: disable=broad-except logging.info( f'failed to read feature file, path: {feature_path}, err: {e}') return val
def post(self): parser = reqparse.RequestParser() parser.add_argument('username', required=True, help='username is empty') parser.add_argument('password', required=True, help='password is empty') data = parser.parse_args() username = data['username'] password = data['password'] user = User.query.filter_by(username=username).first() if user is None: raise NotFoundException() if not user.verify_password(password): raise UnauthorizedException('Invalid password') token = create_access_token(identity=username) return {'access_token': token}, HTTPStatus.OK
def feature_metrics(self, name: str, dataset_id: int = 0) -> dict: dataset = self._session.query(Dataset).filter( Dataset.id == dataset_id).first() if not dataset: raise NotFoundException(f'Failed to find dataset: {dataset_id}') dataset_path = dataset.path feature_path = dataset_features_path(dataset_path) # data format: # { # 'name': 'f01', # 'metrics': { # 'count': '2', # 'mean': '0.0015716767309123998', # 'stddev': '0.03961485047808605', # 'min': '0', # 'max': '1', # 'missing_count': '0' # }, # 'hist': { # 'x': [0.0, 0.1, 0.2, 0.30000000000000004, 0.4, 0.5, # 0.6000000000000001, 0.7000000000000001, 0.8, 0.9, 1], # 'y': [12070, 0, 0, 0, 0, 0, 0, 0, 0, 19] # } # } val = {} try: feature_data = json.loads(self._file_manager.read(feature_path)) val['name'] = name val['metrics'] = feature_data.get(name, {}) except Exception as e: # pylint: disable=broad-except logging.info( f'failed to read feature file, path: {feature_path}, err: {e}') # hist is generated from sparkapp/pipeline/analyzer.py hist_path = dataset_hist_path(dataset_path) try: hist_data = json.loads(self._file_manager.read(hist_path)) val['hist'] = hist_data.get(name, {}) except Exception as e: # pylint: disable=broad-except logging.info( f'failed to read hist file, path: {hist_path}, err: {e}') return val
def patch(self, project_id): project = Project.query.filter_by(id=project_id).first() if project is None: raise NotFoundException() config = project.get_config() if request.json.get('token') is not None: new_token = request.json.get('token') config.token = new_token project.token = new_token if request.json.get('variables') is not None: del config.variables[:] config.variables.extend([ ParseDict(variable, Variable()) for variable in request.json.get('variables') ]) # exact configuration from variables custom_host = None for variable in config.variables: if variable.name == 'CUSTOM_HOST': custom_host = variable.value project.set_config(config) if request.json.get('comment'): project.comment = request.json.get('comment') for participant in project.get_config().participants: if participant.domain_name in\ project.get_certificate().domain_name_to_cert.keys(): _create_add_on( participant, project.get_certificate().domain_name_to_cert[ participant.domain_name], custom_host) try: db.session.commit() except Exception as e: raise InvalidArgumentException(details=e) return {'data': project.to_dict()}
def patch(self, dataset_id: int): parser = reqparse.RequestParser() parser.add_argument('name', type=str, required=False, help='dataset name') parser.add_argument('comment', type=str, required=False, help='dataset comment') parser.add_argument('comment') data = parser.parse_args() with db.session_scope() as session: dataset = session.query(Dataset).filter_by(id=dataset_id).first() if not dataset: raise NotFoundException( f'Failed to find dataset: {dataset_id}') if data['name']: dataset.name = data['name'] if data['comment']: dataset.comment = data['comment'] session.commit() return {'data': dataset.to_dict()}, HTTPStatus.OK