def execute_pipeline_parse(): """...PipelineExecuteRequest""" req = core_pb2.PipelineExecuteRequest() req.context.session_id = 'session_01' req.pipeline_id = 'pipeline_01' feature_names = ('cylinders displacement horsepower' ' weight acceleration model class').split() for feature_name in feature_names: req.predict_features.add(feature_id=feature_name, data_uri='<<DATA_URI>>') content = MessageToJson(req, including_default_value_fields=True) print('JSON:\n') print(content) print('-' * 40) #content = content.replace('pipelineIds', 'pipeline_ids').replace('sessionId', 'session_id') print(content) print('-' * 40) print('gRPC:\n') json_parse(content, core_pb2.PipelineExecuteRequest) print('-' * 40)
def executePipeline(context=None, pipeline=None, data_uri=None): stub = get_stub() # add file descriptor if it is missing. some systems might be inconsistent, but file:// is the standard if data_uri[0:4] != 'file': data_uri = 'file://%s' % (data_uri) context_in = cpb.SessionContext(session_id=context) request_in = cpb.PipelineExecuteRequest(context=context_in, pipeline_id=pipeline, dataset_uri=data_uri) resp = stub.ExecutePipeline(request_in) executedPipes = map(lambda x: json.loads(MessageToJson(x)), resp) print executedPipes # now loop through the returned pipelines and copy their data map(lambda x: copyToWebRoot(x), executedPipes) return executedPipes
def executePipeline(port=None, session=None, pipeline=None, data=None, predictor=None): stub = get_stub(int(port)) data_uri = 'file://%s' % (data) predictor = json.loads(predictor) resp = stub.ExecutePipeline( cpb.PipelineExecuteRequest(context=Parse(session, cpb.SessionContext()), pipeline_id=pipeline, predict_features=[ cpb.Feature(feature_id=pred, data_uri=data_uri) for pred in predictor ])) return map(lambda x: json.loads(MessageToJson(x)), resp)
def run(): channel = grpc.insecure_channel('localhost:45042') stub = crpc.CoreStub(channel) dstub = drpc.DataExtStub(channel) dfstub = dfrpc.DataflowExtStub(channel) # Start Session session_response = stub.StartSession( core.SessionRequest(user_agent="xxx", version="1.0")) session_context = session_response.context print("Session started (%s)" % str(session_context.session_id)) # Send pipeline creation request dataset_uri = "file:///tmp/data/185_baseball/185_baseball_dataset/datasetDoc.json" some_features = [ core.Feature(resource_id="0", feature_name="d3mIndex"), core.Feature(resource_id="0", feature_name="Games_played"), core.Feature(resource_id="0", feature_name="Runs"), core.Feature(resource_id="0", feature_name="Hits"), core.Feature(resource_id="0", feature_name="Home_runs") ] target_features = [ core.Feature(resource_id="0", feature_name="Hall_of_Fame") ] task = core.TaskType.Value('CLASSIFICATION') task_subtype = core.TaskSubtype.Value('MULTICLASS') task_description = "Classify Hall of Fame" output = core.OutputType.Value('OUTPUT_TYPE_UNDEFINED') metrics = [ core.PerformanceMetric.Value('F1_MICRO'), core.PerformanceMetric.Value('F1_MACRO') ] max_pipelines = 10 pipeline_ids = [] print("Training with some features") pc_request = core.PipelineCreateRequest(context=session_context, dataset_uri=dataset_uri, predict_features=some_features, task=task, task_subtype=task_subtype, task_description=task_description, output=output, metrics=metrics, target_features=target_features, max_pipelines=max_pipelines) ''' # Iterate over results for pcr in stub.CreatePipelines(pc_request): print(str(pcr)) if len(pcr.pipeline_info.scores) > 0: pipeline_ids.append(pcr.pipeline_id) print("Training with some features") pc_request = core.PipelineCreateRequest( context = session_context, train_features = some_features, task = task, task_subtype = task_subtype, task_description = task_description, output = output, metrics = metrics, target_features = target_features, max_pipelines = max_pipelines ) ''' result = stub.CreatePipelines(pc_request) # Iterate over results for pcr in result: print(str(pcr)) ''' for gdr in dfstub.GetDataflowResults(dfext.PipelineReference(context = session_context, pipeline_id = pcr.pipeline_id)): print(gdr) ''' if len(pcr.pipeline_info.scores) > 0: pipeline_id = pcr.pipeline_id pipeline_ids.append(pipeline_id) dflow = dfstub.DescribeDataflow( dfext.PipelineReference(context=session_context, pipeline_id=pipeline_id)) print(dflow) exres = stub.ExportPipeline( core.PipelineExportRequest( context=session_context, pipeline_id=pipeline_id, pipeline_exec_uri="file:///tmp/{}".format(pipeline_id))) print(exres) ''' if pcr.pipeline_info.predict_result_uri is not None: df = pandas.read_csv(pcr.pipeline_info.predict_result_uri, index_col="d3mIndex") print(df) ''' print("************** Executing/Testing Pipelines") # Execute pipelines for pipeline_id in pipeline_ids: print("Executing Pipeline %s" % pipeline_id) ep_request = core.PipelineExecuteRequest(context=session_context, pipeline_id=pipeline_id, dataset_uri=dataset_uri) for ecr in stub.ExecutePipeline(ep_request): print(str(ecr)) if ecr.result_uri is not None: df = pandas.read_csv(ecr.result_uri, index_col="d3mIndex") print(df) list_request = core.PipelineListRequest(context=session_context) lrr = stub.ListPipelines(list_request) print(lrr.pipeline_ids) print("************** Cached pipeline create results") pcrr = core.PipelineCreateResultsRequest(context=session_context, pipeline_ids=lrr.pipeline_ids) for gcpr in stub.GetCreatePipelineResults(pcrr): print(str(gcpr)) print("************** Cached pipeline execute results") perr = core.PipelineExecuteResultsRequest(context=session_context, pipeline_ids=lrr.pipeline_ids) for gepr in stub.GetExecutePipelineResults(perr): print(str(gepr)) print("*********** Updating Metric to Accuracy.. Create pipelines again") metric = core.PerformanceMetric.Value('ACCURACY') ups_request = core.SetProblemDocRequest( context=session_context, updates=[ core.SetProblemDocRequest.ReplaceProblemDocField(metric=metric) ]) print(stub.SetProblemDoc(ups_request)) print("********** Re-running pipeline creation") for pcr in stub.CreatePipelines( core.PipelineCreateRequest(context=session_context)): print(str(pcr)) stub.EndSession(session_context)
def execute_pipeline(info_str=None): """Ask a TA2 to ListPipelines via gRPC This call is a bit different b/c it writes part of the data to a file and places that file uri into the original request Success: (updated request str, grpc json response) Failure: (None, error message) """ if info_str is None: info_str = get_test_info_str() if info_str is None: err_msg = 'UI Str for PipelineListResult is None' return None, get_failed_precondition_response(err_msg) if info_str.find(VAL_DATA_URI) == -1: err_msg = ('Expected to see place holder for file uri.' ' Placeholder is "%s"') % VAL_DATA_URI return None, get_failed_precondition_response(err_msg) d3m_config = get_latest_d3m_config() if not d3m_config: err_msg = ('The D3M configuration is not available.' ' Therefore, there is no "temp_storage_root" directory to' ' write the data.') return None, get_failed_precondition_response(err_msg) # -------------------------------- # Is this valid JSON? # -------------------------------- try: info_dict = json.loads(info_str, object_pairs_hook=OrderedDict) except json.decoder.JSONDecodeError as err_obj: err_msg = 'Failed to convert UI Str to JSON: %s' % (err_obj) return None, get_failed_precondition_response(err_msg) if not KEY_DATA in info_dict: err_msg = ('The JSON request did not contain a "%s" key.') % KEY_DATA return None, get_failed_precondition_response(err_msg) file_uri, err_msg = write_data_for_execute_pipeline( d3m_config, info_dict[KEY_DATA]) if err_msg is not None: return None, get_failed_precondition_response(err_msg) # Reformat the original content # # (1) remove the data key if KEY_DATA in info_dict: del info_dict[KEY_DATA] # (2) convert it back to a JSON string info_str = json.dumps(info_dict) # (3) replace the VAL_DATA_URI with the file_uri info_str_formatted = info_str.replace(VAL_DATA_URI, file_uri) # -------------------------------- # convert the JSON string to a gRPC request # -------------------------------- try: req = Parse(info_str_formatted, core_pb2.PipelineExecuteRequest()) except ParseError as err_obj: err_msg = 'Failed to convert JSON to gRPC: %s' % (err_obj) return None, get_failed_precondition_response(err_msg) if settings.TA2_STATIC_TEST_MODE: #return info_str_formatted,\ # get_grpc_test_json('test_responses/execute_results_1pipe_ok.json', # dict()) #--- template_info = get_predict_file_info_dict() template_str = get_grpc_test_json( 'test_responses/execute_results_1pipe_ok.json', template_info) # These next lines embed file uri content into the JSON embed_util = FileEmbedUtil(template_str) if embed_util.has_error: return get_failed_precondition_response(embed_util.error_message) test_note = ('Test. An actual result would be the test JSON with' ' the "data" section removed and DATA_URI replaced' ' with a file path to where the "data" section was' ' written.') return json.dumps(dict(note=test_note)), embed_util.get_final_results() #--- #return info_str_formatted,\ # get_grpc_test_json('test_responses/execute_results_1pipe_ok.json', # dict()) # -------------------------------- # Get the connection, return an error if there are channel issues # -------------------------------- core_stub, err_msg = TA2Connection.get_grpc_stub() if err_msg: return None, get_failed_precondition_response(err_msg) # -------------------------------- # Send the gRPC request - returns a stream # -------------------------------- try: reply = core_stub.ExecutePipeline(req) except Exception as ex: return None, get_failed_precondition_response(str(ex)) # -------------------------------- # Convert the reply to JSON and send it on # -------------------------------- results = map(MessageToJson, reply) result_str = '[' + ', '.join(results) + ']' embed_util = FileEmbedUtil(result_str) if embed_util.has_error: return get_failed_precondition_response(embed_util.error_message) return info_str_formatted, embed_util.get_final_results()