def get_data_flow_results(info_str=None):
    """Ask a TA2 to GetDataflowResults via gRPC"""
    if info_str is None:
        err_msg = 'UI Str for PipelineReference is None'
        return get_failed_precondition_response(err_msg)

    # --------------------------------
    # Is this valid JSON?
    # --------------------------------
    try:
        raven_dict = json.loads(info_str, object_pairs_hook=OrderedDict)
    except json.decoder.JSONDecodeError as err_obj:
        err_msg = 'Failed to convert UI Str to JSON: %s' % (err_obj)
        return get_failed_precondition_response(err_msg)

    # --------------------------------
    # convert the JSON string to a gRPC request
    # --------------------------------
    try:
        req = Parse(info_str, dataflow_ext_pb2.PipelineReference())
    except ParseError as err_obj:
        err_msg = 'Failed to convert JSON to gRPC: %s' % (err_obj)
        return get_failed_precondition_response(err_msg)

    # In test mode, return canned response
    #
    if settings.TA2_STATIC_TEST_MODE:
        info_dict = dict(pipelineId=raven_dict.get('pipelineId'))
        return get_grpc_test_json(
            'test_responses/get_dataflow_results_ok.json', info_dict)

    # --------------------------------
    # Get the connection, return an error if there are channel issues
    # --------------------------------
    dataflow_stub, err_msg = TA2Connection.get_grpc_dataflow_stub()
    if err_msg:
        return get_failed_precondition_sess_response(err_msg)

    # --------------------------------
    # Send the gRPC request
    # --------------------------------
    try:
        reply = dataflow_stub.GetDataflowResults(req)
    except Exception as ex:
        return get_failed_precondition_response(str(ex))

    if reply and str(reply) == VAL_GRPC_STATE_CODE_NONE:
        err_msg = ('Unkown gRPC state.'
                   ' (Was an GetDataflowResults request sent?)')
        return get_failed_precondition_response(err_msg)

    # --------------------------------
    # Convert the reply to JSON and send it on
    # --------------------------------
    results = map(MessageToJson, reply)
    result_str = '[' + ', '.join(results) + ']'

    return result_str
Пример #2
0
def describe_data_flow():

    req = dataflow_ext_pb2.PipelineReference()

    req.context.session_id = 'session_01'
    req.pipeline_id = 'pipeline_1'

    content = MessageToJson(req)
    print('JSON:\n')
    print(content)
    print('-' * 40)

    json_parse(content, dataflow_ext_pb2.PipelineReference)
    print('-' * 40)

    resp = dataflow_ext_pb2.DataflowDescription()

    resp.response_info.status.code = core_pb2.OK
    resp.response_info.status.details = "(static test response)"
    resp.pipeline_id = 'pipeline_1'

    # Add two modules
    for idx in range(0, 2):

        resp.modules.add(id='module_id %d' % idx,
                         type='module_type %d' % idx,
                         label='module_label %d' % idx)

        # For each module, add 2 inputs and 2 outputs
        for idx2 in range(0, 1):
            resp.modules[idx].inputs.add()
            resp.modules[idx].inputs[idx2].name = 'nome %d' % idx2
            resp.modules[idx].inputs[idx2].type = 'type %d' % idx2
            resp.modules[idx].inputs[idx2].value = 'value %d' % idx2

            resp.modules[idx].outputs.add()
            resp.modules[idx].outputs[idx2].name = 'nome %d' % idx2
            resp.modules[idx].outputs[idx2].type = 'type %d' % idx2

    # Add two connections
    for idx in range(0, 2):
        resp.connections.add()
        resp.connections[idx].from_module_id = 'module %d' % idx
        resp.connections[idx].from_output_name = 'from_output_name %d' % idx
        resp.connections[idx].to_module_id = 'to_module_id %d' % idx
        resp.connections[idx].to_input_name = 'to_input_name %d' % idx

    content = MessageToJson(resp)
    print('JSON:\n')
    print(content)
    print('-' * 40)

    print('-' * 40)
    print('gRPC:\n')
    json_parse(content, dataflow_ext_pb2.DataflowDescription)
    print('-' * 40)
Пример #3
0
def describe_data_flow(raven_json_str=None):
    """
    Send a PipelineReference to the DescribeDataflow command
    """
    if raven_json_str is None:
        err_msg = 'No data found for the PipelineReference'
        return get_failed_precondition_sess_response(err_msg)

    # --------------------------------
    # The UI has sent JSON in string format that contains the PipelineReference
    # Make sure it's valid JSON
    # --------------------------------
    try:
        raven_dict = json.loads(raven_json_str)
    except json.decoder.JSONDecodeError as err_obj:
        err_msg = 'Failed to convert UI Str to JSON: %s' % (err_obj)
        return get_failed_precondition_sess_response(err_msg)

    # --------------------------------
    # convert the JSON string to a gRPC request
    # --------------------------------
    try:
        req = Parse(raven_json_str, dataflow_ext_pb2.PipelineReference())
    except ParseError as err_obj:
        err_msg = 'Failed to convert JSON to gRPC: %s' % (err_obj)
        return get_failed_precondition_sess_response(err_msg)

    # In test mode, return canned response
    #
    if settings.TA2_STATIC_TEST_MODE:
        info_dict = dict(pipelineId=raven_dict.get('pipelineId'))
        return get_grpc_test_json('test_responses/describe_data_flow_ok.json',
                                  info_dict)

    # --------------------------------
    # Get the connection, return an error if there are channel issues
    # --------------------------------
    dataflow_stub, err_msg = TA2Connection.get_grpc_dataflow_stub()
    if err_msg:
        return get_failed_precondition_sess_response(err_msg)

    # --------------------------------
    # Send the gRPC request
    # --------------------------------
    try:
        reply = dataflow_stub.DataflowDescription(req)
    except Exception as ex:
        return get_failed_precondition_sess_response(str(ex))


    # --------------------------------
    # Convert the reply to JSON and send it back
    # --------------------------------
    return MessageToJson(reply)
Пример #4
0
def run():
    channel = grpc.insecure_channel('localhost:45042')
    stub = crpc.CoreStub(channel)
    dstub = drpc.DataExtStub(channel)
    dfstub = dfrpc.DataflowExtStub(channel)

    # Start Session
    session_response = stub.StartSession(
        core.SessionRequest(user_agent="xxx", version="1.0"))
    session_context = session_response.context
    print("Session started (%s)" % str(session_context.session_id))

    # Send pipeline creation request
    dataset_uri = "file:///tmp/data/185_baseball/185_baseball_dataset/datasetDoc.json"
    some_features = [
        core.Feature(resource_id="0", feature_name="d3mIndex"),
        core.Feature(resource_id="0", feature_name="Games_played"),
        core.Feature(resource_id="0", feature_name="Runs"),
        core.Feature(resource_id="0", feature_name="Hits"),
        core.Feature(resource_id="0", feature_name="Home_runs")
    ]
    target_features = [
        core.Feature(resource_id="0", feature_name="Hall_of_Fame")
    ]
    task = core.TaskType.Value('CLASSIFICATION')
    task_subtype = core.TaskSubtype.Value('MULTICLASS')
    task_description = "Classify Hall of Fame"
    output = core.OutputType.Value('OUTPUT_TYPE_UNDEFINED')
    metrics = [
        core.PerformanceMetric.Value('F1_MICRO'),
        core.PerformanceMetric.Value('F1_MACRO')
    ]
    max_pipelines = 10

    pipeline_ids = []

    print("Training with some features")
    pc_request = core.PipelineCreateRequest(context=session_context,
                                            dataset_uri=dataset_uri,
                                            predict_features=some_features,
                                            task=task,
                                            task_subtype=task_subtype,
                                            task_description=task_description,
                                            output=output,
                                            metrics=metrics,
                                            target_features=target_features,
                                            max_pipelines=max_pipelines)
    '''
    # Iterate over results
    for pcr in stub.CreatePipelines(pc_request):
        print(str(pcr))
        if len(pcr.pipeline_info.scores) > 0:
            pipeline_ids.append(pcr.pipeline_id)

    print("Training with some features")
    pc_request = core.PipelineCreateRequest(
        context = session_context,
        train_features = some_features,
        task = task,
        task_subtype = task_subtype,
        task_description = task_description,
        output = output,
        metrics = metrics,
        target_features = target_features,
        max_pipelines = max_pipelines
    )
    '''

    result = stub.CreatePipelines(pc_request)

    # Iterate over results
    for pcr in result:
        print(str(pcr))
        '''
        for gdr in dfstub.GetDataflowResults(dfext.PipelineReference(context = session_context,
                pipeline_id = pcr.pipeline_id)):
            print(gdr)
        '''
        if len(pcr.pipeline_info.scores) > 0:
            pipeline_id = pcr.pipeline_id
            pipeline_ids.append(pipeline_id)
            dflow = dfstub.DescribeDataflow(
                dfext.PipelineReference(context=session_context,
                                        pipeline_id=pipeline_id))
            print(dflow)

            exres = stub.ExportPipeline(
                core.PipelineExportRequest(
                    context=session_context,
                    pipeline_id=pipeline_id,
                    pipeline_exec_uri="file:///tmp/{}".format(pipeline_id)))
            print(exres)
            '''
            if pcr.pipeline_info.predict_result_uri is not None:
                df = pandas.read_csv(pcr.pipeline_info.predict_result_uri, index_col="d3mIndex")
                print(df)
            '''

    print("************** Executing/Testing Pipelines")

    # Execute pipelines
    for pipeline_id in pipeline_ids:
        print("Executing Pipeline %s" % pipeline_id)
        ep_request = core.PipelineExecuteRequest(context=session_context,
                                                 pipeline_id=pipeline_id,
                                                 dataset_uri=dataset_uri)
        for ecr in stub.ExecutePipeline(ep_request):
            print(str(ecr))
            if ecr.result_uri is not None:
                df = pandas.read_csv(ecr.result_uri, index_col="d3mIndex")
                print(df)

    list_request = core.PipelineListRequest(context=session_context)
    lrr = stub.ListPipelines(list_request)
    print(lrr.pipeline_ids)

    print("************** Cached pipeline create results")
    pcrr = core.PipelineCreateResultsRequest(context=session_context,
                                             pipeline_ids=lrr.pipeline_ids)
    for gcpr in stub.GetCreatePipelineResults(pcrr):
        print(str(gcpr))

    print("************** Cached pipeline execute results")
    perr = core.PipelineExecuteResultsRequest(context=session_context,
                                              pipeline_ids=lrr.pipeline_ids)
    for gepr in stub.GetExecutePipelineResults(perr):
        print(str(gepr))

    print("*********** Updating Metric to Accuracy.. Create pipelines again")
    metric = core.PerformanceMetric.Value('ACCURACY')
    ups_request = core.SetProblemDocRequest(
        context=session_context,
        updates=[
            core.SetProblemDocRequest.ReplaceProblemDocField(metric=metric)
        ])

    print(stub.SetProblemDoc(ups_request))
    print("********** Re-running pipeline creation")
    for pcr in stub.CreatePipelines(
            core.PipelineCreateRequest(context=session_context)):
        print(str(pcr))

    stub.EndSession(session_context)