def createPipeline(port=None, session=None, data=None, predictor=None, response=None, task_type=None, task_subtype=None, output_type=None, metric=None): stub = get_stub(int(port)) data_uri = 'file://%s' % (data) predictor = json.loads(predictor) response = json.loads(response) resp = stub.CreatePipelines( cpb.PipelineCreateRequest( context=Parse(session, cpb.SessionContext()), train_features=[ cpb.Feature(feature_id=pred, data_uri=data_uri) for pred in predictor ], target_features=[ cpb.Feature(feature_id=targ, data_uri=data_uri) for targ in response ], task=cpb.TaskType.Value(task_type.upper()), task_subtype=cpb.TaskSubtype.Value(toConstCase(task_subtype)), output=cpb.OutputType.Value(toConstCase(output_type)), metrics=[cpb.Metric.Value(toConstCase(metric))], task_description='TA2 pipeline creation', max_pipelines=5)) return map(lambda x: json.loads(MessageToJson(x)), resp)
def createPipeline(context=None, data_uri=None, task_type=None, task_subtype=None, target_features=None, predict_features=[], metrics=None, max_pipelines=10): stub = get_stub() problem_schema_path = os.environ.get('PROBLEM_ROOT') problem_supply = d3mds.D3MProblem(problem_schema_path) # get the target features into the record format expected by the API targets = problem_supply.get_targets() features = [] for entry in targets: tf = core_pb2.Feature(resource_id=entry['resID'], feature_name=entry['colName']) features.append(tf) # we are having trouble parsing the problem specs into valid API specs, so just hardcode # to certain problem types for now. We could fix this with a more general lookup table to return valid API codes task = taskTypeLookup(task_type) tasksubtype = subTaskLookup(task_subtype) # the metrics in the files are imprecise text versions of the enumerations, so just standardize. A lookup table # would help here, too metrics = [ core_pb2.F1_MICRO, core_pb2.ROC_AUC, core_pb2.ROOT_MEAN_SQUARED_ERROR, core_pb2.F1, core_pb2.R_SQUARED ] context_in = cpb.SessionContext(session_id=context) request_in = cpb.PipelineCreateRequest( context=context_in, dataset_uri=data_uri, task=task, task_subtype=tasksubtype, metrics=metrics, task_description='Modsquad pipeline create request', target_features=features, predict_features=[], max_pipelines=10) resp = stub.CreatePipelines(request_in) return map(lambda x: json.loads(MessageToJson(x)), resp)
def test_pipeline(self): "Tries setting up a new pipeline" channel = grpc.insecure_channel('localhost:45042') stub = core_pb2_grpc.CoreStub(channel) msg = core_pb2.SessionRequest(user_agent="unittest", version="Foo") session = stub.StartSession(msg) self.assertTrue(session.response_info.status.code == core_pb2.OK) pipeline_request = core_pb2.PipelineCreateRequest( context=session.context, dataset_uri= "file:///home/sheath/projects/D3M/cmu-ta3/test-data/185_baseball/TRAIN/dataset_TRAIN/datasetDoc.json", task=core_pb2.TASK_TYPE_UNDEFINED, task_subtype=core_pb2.TASK_SUBTYPE_UNDEFINED, task_description="", output=core_pb2.OUTPUT_TYPE_UNDEFINED, metrics=[], target_features=[], predict_features=[], max_pipelines=10) p = stub.CreatePipelines(pipeline_request) for response in p: self.assertTrue(response.response_info.status.code == core_pb2.OK)
def pipeline_create_parse(): session_context = core_pb2.SessionContext() session_context.session_id = 'abc123' req = core_pb2.PipelineCreateRequest() req.context.session_id = 'session_0' req.train_features.add( feature_id='cylinders', data_uri='data/d3m/o_196seed/data/trainDatamerged.tsv') req.train_features.add( feature_id='cylinders', data_uri='data/d3m/o_196seed/data/trainDatamerged.tsv') req.task = core_pb2.REGRESSION req.task_subtype = core_pb2.UNIVARIATE req.output = core_pb2.REAL req.metrics.append(core_pb2.ROOT_MEAN_SQUARED_ERROR) req.target_features.add( feature_id='class', data_uri='data/d3m/o_196seed/data/trainDatamerged.tsv') req.max_pipelines = 10 msg_and_back(req, core_pb2.PipelineCreateRequest) print('-' * 40) content = MessageToJson(req, including_default_value_fields=True) print(content) print('-' * 40) json_parse(content, core_pb2.PipelineCreateRequest) print('-' * 40)
def run(): channel = grpc.insecure_channel('localhost:45042') stub = crpc.CoreStub(channel) dstub = drpc.DataExtStub(channel) dfstub = dfrpc.DataflowExtStub(channel) # Start Session session_response = stub.StartSession( core.SessionRequest(user_agent="xxx", version="1.0")) session_context = session_response.context print("Session started (%s)" % str(session_context.session_id)) # Send pipeline creation request dataset_uri = "file:///tmp/data/185_baseball/185_baseball_dataset/datasetDoc.json" some_features = [ core.Feature(resource_id="0", feature_name="d3mIndex"), core.Feature(resource_id="0", feature_name="Games_played"), core.Feature(resource_id="0", feature_name="Runs"), core.Feature(resource_id="0", feature_name="Hits"), core.Feature(resource_id="0", feature_name="Home_runs") ] target_features = [ core.Feature(resource_id="0", feature_name="Hall_of_Fame") ] task = core.TaskType.Value('CLASSIFICATION') task_subtype = core.TaskSubtype.Value('MULTICLASS') task_description = "Classify Hall of Fame" output = core.OutputType.Value('OUTPUT_TYPE_UNDEFINED') metrics = [ core.PerformanceMetric.Value('F1_MICRO'), core.PerformanceMetric.Value('F1_MACRO') ] max_pipelines = 10 pipeline_ids = [] print("Training with some features") pc_request = core.PipelineCreateRequest(context=session_context, dataset_uri=dataset_uri, predict_features=some_features, task=task, task_subtype=task_subtype, task_description=task_description, output=output, metrics=metrics, target_features=target_features, max_pipelines=max_pipelines) ''' # Iterate over results for pcr in stub.CreatePipelines(pc_request): print(str(pcr)) if len(pcr.pipeline_info.scores) > 0: pipeline_ids.append(pcr.pipeline_id) print("Training with some features") pc_request = core.PipelineCreateRequest( context = session_context, train_features = some_features, task = task, task_subtype = task_subtype, task_description = task_description, output = output, metrics = metrics, target_features = target_features, max_pipelines = max_pipelines ) ''' result = stub.CreatePipelines(pc_request) # Iterate over results for pcr in result: print(str(pcr)) ''' for gdr in dfstub.GetDataflowResults(dfext.PipelineReference(context = session_context, pipeline_id = pcr.pipeline_id)): print(gdr) ''' if len(pcr.pipeline_info.scores) > 0: pipeline_id = pcr.pipeline_id pipeline_ids.append(pipeline_id) dflow = dfstub.DescribeDataflow( dfext.PipelineReference(context=session_context, pipeline_id=pipeline_id)) print(dflow) exres = stub.ExportPipeline( core.PipelineExportRequest( context=session_context, pipeline_id=pipeline_id, pipeline_exec_uri="file:///tmp/{}".format(pipeline_id))) print(exres) ''' if pcr.pipeline_info.predict_result_uri is not None: df = pandas.read_csv(pcr.pipeline_info.predict_result_uri, index_col="d3mIndex") print(df) ''' print("************** Executing/Testing Pipelines") # Execute pipelines for pipeline_id in pipeline_ids: print("Executing Pipeline %s" % pipeline_id) ep_request = core.PipelineExecuteRequest(context=session_context, pipeline_id=pipeline_id, dataset_uri=dataset_uri) for ecr in stub.ExecutePipeline(ep_request): print(str(ecr)) if ecr.result_uri is not None: df = pandas.read_csv(ecr.result_uri, index_col="d3mIndex") print(df) list_request = core.PipelineListRequest(context=session_context) lrr = stub.ListPipelines(list_request) print(lrr.pipeline_ids) print("************** Cached pipeline create results") pcrr = core.PipelineCreateResultsRequest(context=session_context, pipeline_ids=lrr.pipeline_ids) for gcpr in stub.GetCreatePipelineResults(pcrr): print(str(gcpr)) print("************** Cached pipeline execute results") perr = core.PipelineExecuteResultsRequest(context=session_context, pipeline_ids=lrr.pipeline_ids) for gepr in stub.GetExecutePipelineResults(perr): print(str(gepr)) print("*********** Updating Metric to Accuracy.. Create pipelines again") metric = core.PerformanceMetric.Value('ACCURACY') ups_request = core.SetProblemDocRequest( context=session_context, updates=[ core.SetProblemDocRequest.ReplaceProblemDocField(metric=metric) ]) print(stub.SetProblemDoc(ups_request)) print("********** Re-running pipeline creation") for pcr in stub.CreatePipelines( core.PipelineCreateRequest(context=session_context)): print(str(pcr)) stub.EndSession(session_context)
def pipeline_create(info_str=None): """Send the pipeline create request via gRPC""" if info_str is None: info_str = get_test_info_str() if info_str is None: err_msg = 'UI Str for %s is None' % PIPELINE_CREATE_REQUEST return get_failed_precondition_response(err_msg) # -------------------------------- # Convert info string to dict # -------------------------------- try: info_dict = json.loads(info_str, object_pairs_hook=OrderedDict) except json.decoder.JSONDecodeError as err_obj: err_msg = 'Failed to convert UI Str to JSON: %s' % (err_obj) return get_failed_precondition_response(err_msg) if KEY_CONTEXT_FROM_UI not in info_dict: return get_failed_precondition_response(ERR_NO_CONTEXT) if KEY_SESSION_ID_FROM_UI not in info_dict[KEY_CONTEXT_FROM_UI]: return get_failed_precondition_response(ERR_NO_SESSION_ID) # -------------------------------- # convert the JSON string to a gRPC request # -------------------------------- try: req = Parse(info_str, core_pb2.PipelineCreateRequest()) except ParseError as err_obj: err_msg = 'Failed to convert JSON to gRPC: %s' % (err_obj) return get_failed_precondition_response(err_msg) if settings.TA2_STATIC_TEST_MODE: template_info = get_predict_file_info_dict(info_dict.get('task')) template_str = get_grpc_test_json('test_responses/createpipeline_ok.json', template_info) # These next lines embed file uri content into the JSON embed_util = FileEmbedUtil(template_str) if embed_util.has_error: return get_failed_precondition_response(embed_util.error_message) return embed_util.get_final_results() #return get_grpc_test_json('test_responses/createpipeline_ok.json', # template_info) # -------------------------------- # Get the connection, return an error if there are channel issues # -------------------------------- core_stub, err_msg = TA2Connection.get_grpc_stub() if err_msg: return get_failed_precondition_response(err_msg) # -------------------------------- # Send the gRPC request # -------------------------------- messages = [] try: for reply in core_stub.CreatePipelines(req): user_msg = MessageToJson(reply) print(user_msg) messages.append(user_msg) except Exception as ex: return get_reply_exception_response(str(ex)) print('end of queue. make message list') result_str = '['+', '.join(messages)+']' print('embed file contents') embed_util = FileEmbedUtil(result_str) if embed_util.has_error: print('file embed error') return get_failed_precondition_response(embed_util.error_message) print('return results') return embed_util.get_final_results()