def get_data_flow_results(info_str=None): """Ask a TA2 to GetDataflowResults via gRPC""" if info_str is None: err_msg = 'UI Str for PipelineReference is None' return get_failed_precondition_response(err_msg) # -------------------------------- # Is this valid JSON? # -------------------------------- try: raven_dict = json.loads(info_str, object_pairs_hook=OrderedDict) except json.decoder.JSONDecodeError as err_obj: err_msg = 'Failed to convert UI Str to JSON: %s' % (err_obj) return get_failed_precondition_response(err_msg) # -------------------------------- # convert the JSON string to a gRPC request # -------------------------------- try: req = Parse(info_str, dataflow_ext_pb2.PipelineReference()) except ParseError as err_obj: err_msg = 'Failed to convert JSON to gRPC: %s' % (err_obj) return get_failed_precondition_response(err_msg) # In test mode, return canned response # if settings.TA2_STATIC_TEST_MODE: info_dict = dict(pipelineId=raven_dict.get('pipelineId')) return get_grpc_test_json( 'test_responses/get_dataflow_results_ok.json', info_dict) # -------------------------------- # Get the connection, return an error if there are channel issues # -------------------------------- dataflow_stub, err_msg = TA2Connection.get_grpc_dataflow_stub() if err_msg: return get_failed_precondition_sess_response(err_msg) # -------------------------------- # Send the gRPC request # -------------------------------- try: reply = dataflow_stub.GetDataflowResults(req) except Exception as ex: return get_failed_precondition_response(str(ex)) if reply and str(reply) == VAL_GRPC_STATE_CODE_NONE: err_msg = ('Unkown gRPC state.' ' (Was an GetDataflowResults request sent?)') return get_failed_precondition_response(err_msg) # -------------------------------- # Convert the reply to JSON and send it on # -------------------------------- results = map(MessageToJson, reply) result_str = '[' + ', '.join(results) + ']' return result_str
def describe_data_flow(): req = dataflow_ext_pb2.PipelineReference() req.context.session_id = 'session_01' req.pipeline_id = 'pipeline_1' content = MessageToJson(req) print('JSON:\n') print(content) print('-' * 40) json_parse(content, dataflow_ext_pb2.PipelineReference) print('-' * 40) resp = dataflow_ext_pb2.DataflowDescription() resp.response_info.status.code = core_pb2.OK resp.response_info.status.details = "(static test response)" resp.pipeline_id = 'pipeline_1' # Add two modules for idx in range(0, 2): resp.modules.add(id='module_id %d' % idx, type='module_type %d' % idx, label='module_label %d' % idx) # For each module, add 2 inputs and 2 outputs for idx2 in range(0, 1): resp.modules[idx].inputs.add() resp.modules[idx].inputs[idx2].name = 'nome %d' % idx2 resp.modules[idx].inputs[idx2].type = 'type %d' % idx2 resp.modules[idx].inputs[idx2].value = 'value %d' % idx2 resp.modules[idx].outputs.add() resp.modules[idx].outputs[idx2].name = 'nome %d' % idx2 resp.modules[idx].outputs[idx2].type = 'type %d' % idx2 # Add two connections for idx in range(0, 2): resp.connections.add() resp.connections[idx].from_module_id = 'module %d' % idx resp.connections[idx].from_output_name = 'from_output_name %d' % idx resp.connections[idx].to_module_id = 'to_module_id %d' % idx resp.connections[idx].to_input_name = 'to_input_name %d' % idx content = MessageToJson(resp) print('JSON:\n') print(content) print('-' * 40) print('-' * 40) print('gRPC:\n') json_parse(content, dataflow_ext_pb2.DataflowDescription) print('-' * 40)
def describe_data_flow(raven_json_str=None): """ Send a PipelineReference to the DescribeDataflow command """ if raven_json_str is None: err_msg = 'No data found for the PipelineReference' return get_failed_precondition_sess_response(err_msg) # -------------------------------- # The UI has sent JSON in string format that contains the PipelineReference # Make sure it's valid JSON # -------------------------------- try: raven_dict = json.loads(raven_json_str) except json.decoder.JSONDecodeError as err_obj: err_msg = 'Failed to convert UI Str to JSON: %s' % (err_obj) return get_failed_precondition_sess_response(err_msg) # -------------------------------- # convert the JSON string to a gRPC request # -------------------------------- try: req = Parse(raven_json_str, dataflow_ext_pb2.PipelineReference()) except ParseError as err_obj: err_msg = 'Failed to convert JSON to gRPC: %s' % (err_obj) return get_failed_precondition_sess_response(err_msg) # In test mode, return canned response # if settings.TA2_STATIC_TEST_MODE: info_dict = dict(pipelineId=raven_dict.get('pipelineId')) return get_grpc_test_json('test_responses/describe_data_flow_ok.json', info_dict) # -------------------------------- # Get the connection, return an error if there are channel issues # -------------------------------- dataflow_stub, err_msg = TA2Connection.get_grpc_dataflow_stub() if err_msg: return get_failed_precondition_sess_response(err_msg) # -------------------------------- # Send the gRPC request # -------------------------------- try: reply = dataflow_stub.DataflowDescription(req) except Exception as ex: return get_failed_precondition_sess_response(str(ex)) # -------------------------------- # Convert the reply to JSON and send it back # -------------------------------- return MessageToJson(reply)
def run(): channel = grpc.insecure_channel('localhost:45042') stub = crpc.CoreStub(channel) dstub = drpc.DataExtStub(channel) dfstub = dfrpc.DataflowExtStub(channel) # Start Session session_response = stub.StartSession( core.SessionRequest(user_agent="xxx", version="1.0")) session_context = session_response.context print("Session started (%s)" % str(session_context.session_id)) # Send pipeline creation request dataset_uri = "file:///tmp/data/185_baseball/185_baseball_dataset/datasetDoc.json" some_features = [ core.Feature(resource_id="0", feature_name="d3mIndex"), core.Feature(resource_id="0", feature_name="Games_played"), core.Feature(resource_id="0", feature_name="Runs"), core.Feature(resource_id="0", feature_name="Hits"), core.Feature(resource_id="0", feature_name="Home_runs") ] target_features = [ core.Feature(resource_id="0", feature_name="Hall_of_Fame") ] task = core.TaskType.Value('CLASSIFICATION') task_subtype = core.TaskSubtype.Value('MULTICLASS') task_description = "Classify Hall of Fame" output = core.OutputType.Value('OUTPUT_TYPE_UNDEFINED') metrics = [ core.PerformanceMetric.Value('F1_MICRO'), core.PerformanceMetric.Value('F1_MACRO') ] max_pipelines = 10 pipeline_ids = [] print("Training with some features") pc_request = core.PipelineCreateRequest(context=session_context, dataset_uri=dataset_uri, predict_features=some_features, task=task, task_subtype=task_subtype, task_description=task_description, output=output, metrics=metrics, target_features=target_features, max_pipelines=max_pipelines) ''' # Iterate over results for pcr in stub.CreatePipelines(pc_request): print(str(pcr)) if len(pcr.pipeline_info.scores) > 0: pipeline_ids.append(pcr.pipeline_id) print("Training with some features") pc_request = core.PipelineCreateRequest( context = session_context, train_features = some_features, task = task, task_subtype = task_subtype, task_description = task_description, output = output, metrics = metrics, target_features = target_features, max_pipelines = max_pipelines ) ''' result = stub.CreatePipelines(pc_request) # Iterate over results for pcr in result: print(str(pcr)) ''' for gdr in dfstub.GetDataflowResults(dfext.PipelineReference(context = session_context, pipeline_id = pcr.pipeline_id)): print(gdr) ''' if len(pcr.pipeline_info.scores) > 0: pipeline_id = pcr.pipeline_id pipeline_ids.append(pipeline_id) dflow = dfstub.DescribeDataflow( dfext.PipelineReference(context=session_context, pipeline_id=pipeline_id)) print(dflow) exres = stub.ExportPipeline( core.PipelineExportRequest( context=session_context, pipeline_id=pipeline_id, pipeline_exec_uri="file:///tmp/{}".format(pipeline_id))) print(exres) ''' if pcr.pipeline_info.predict_result_uri is not None: df = pandas.read_csv(pcr.pipeline_info.predict_result_uri, index_col="d3mIndex") print(df) ''' print("************** Executing/Testing Pipelines") # Execute pipelines for pipeline_id in pipeline_ids: print("Executing Pipeline %s" % pipeline_id) ep_request = core.PipelineExecuteRequest(context=session_context, pipeline_id=pipeline_id, dataset_uri=dataset_uri) for ecr in stub.ExecutePipeline(ep_request): print(str(ecr)) if ecr.result_uri is not None: df = pandas.read_csv(ecr.result_uri, index_col="d3mIndex") print(df) list_request = core.PipelineListRequest(context=session_context) lrr = stub.ListPipelines(list_request) print(lrr.pipeline_ids) print("************** Cached pipeline create results") pcrr = core.PipelineCreateResultsRequest(context=session_context, pipeline_ids=lrr.pipeline_ids) for gcpr in stub.GetCreatePipelineResults(pcrr): print(str(gcpr)) print("************** Cached pipeline execute results") perr = core.PipelineExecuteResultsRequest(context=session_context, pipeline_ids=lrr.pipeline_ids) for gepr in stub.GetExecutePipelineResults(perr): print(str(gepr)) print("*********** Updating Metric to Accuracy.. Create pipelines again") metric = core.PerformanceMetric.Value('ACCURACY') ups_request = core.SetProblemDocRequest( context=session_context, updates=[ core.SetProblemDocRequest.ReplaceProblemDocField(metric=metric) ]) print(stub.SetProblemDoc(ups_request)) print("********** Re-running pipeline creation") for pcr in stub.CreatePipelines( core.PipelineCreateRequest(context=session_context)): print(str(pcr)) stub.EndSession(session_context)