Пример #1
0
    def ProduceSolution(self, request, context):
        fitted_solution_id = request.fitted_solution_id
        logger.info('method=ProduceSolution, fitted_solution_id=%s',
                    fitted_solution_id)

        if not self.backend.fitted_pipeline_id_exists(fitted_solution_id):
            logger.info(
                'method=ProduceSolution, fitted_solution_id=%s, status=ERRORED info=No fitted_solution_id found',
                fitted_solution_id)
            response = core_pb2.ProduceSolutionResponse()
            return response

        input_data = [load_data(utils.decode_value(x)) for x in request.inputs]

        expose_outputs = [
            expose_output for expose_output in request.expose_outputs
        ]
        if expose_outputs:
            expose_outputs = True
        else:
            expose_outputs = False

        request_id = self.backend.produce_pipeline_request(
            fitted_pipeline_id=fitted_solution_id,
            input_data=input_data,
            expose_outputs=expose_outputs)
        response = core_pb2.ProduceSolutionResponse(request_id=request_id)
        return response
Пример #2
0
    def FitSolution(self, request, context):
        solution_id = request.solution_id
        logger.info('method=FitSolution solution_id=%s', solution_id)

        pipeline, problem_description, _ = self.get_solution_problem(
            solution_id)
        if pipeline is None:
            logger.info(
                'method=FitSolution, solution_id=%s, status=ERRORED, error=Solution_id not found',
                solution_id)
            response = core_pb2.FitSolutionResponse()
            return response

        input_data = [load_data(utils.decode_value(x)) for x in request.inputs]

        expose_outputs = [
            expose_output for expose_output in request.expose_outputs
        ]
        if expose_outputs:
            expose_outputs = True
        else:
            expose_outputs = False

        request_id = self.backend.fit_pipeline_request(
            problem_description=problem_description,
            pipeline=pipeline,
            input_data=input_data,
            expose_outputs=expose_outputs)

        response = core_pb2.FitSolutionResponse(request_id=request_id)
        return response
Пример #3
0
    def ScoreSolution(self, request, context):
        solution_id = request.solution_id
        logger.info('method=SocreSolution, solution_id=%s', solution_id)

        pipeline, problem_description, _ = self.get_solution_problem(
            solution_id)
        if pipeline is None:
            logger.info(
                'method=FitSolution, solution_id=%s, status=ERRORED, error=Solution_id not found',
                solution_id)
            response = core_pb2.ScoreSolutionResponse()
            return response

        input_data = [load_data(utils.decode_value(x)) for x in request.inputs]
        metrics = [
            utils.decode_performance_metric(metric)
            for metric in request.performance_metrics
        ]
        scoring_pipeline = schemas_utils.get_scoring_pipeline()
        data_preparation_params = decode_scoring_configuration(
            request.configuration)
        data_preparation_pipeline = schemas_utils.get_splitting_pipeline(
            data_preparation_params['method'])

        request_id = self.backend.evaluate_pipeline_request(
            problem_description=problem_description,
            pipeline=pipeline,
            input_data=input_data,
            metrics=metrics,
            data_preparation_pipeline=data_preparation_pipeline,
            scoring_pipeline=scoring_pipeline,
            data_preparation_params=data_preparation_params)

        response = core_pb2.ScoreSolutionResponse(request_id=request_id)
        return response
Пример #4
0
    def do_score(self, solution_id, dataset_path, problem_path, ta2_id):
        try:
            problem = Problem.load(problem_uri=problem_path)
        except:
            logger.exception('Error parsing problem')

        # Encode metric
        metrics = []
        for metric in problem['problem']['performance_metrics']:
            metrics.append(encode_performance_metric(metric))

        # Showing only the first metric
        target_metric = problem['problem']['performance_metrics'][0]['metric']
        logger.info('target_metric %s !', target_metric)

        response = self.core.ScoreSolution(
            pb_core.ScoreSolutionRequest(
                solution_id=solution_id,
                inputs=[
                    pb_value.Value(dataset_uri='file://%s' % dataset_path, )
                ],
                performance_metrics=metrics,
                users=[],
                configuration=pb_core.ScoringConfiguration(
                    method='HOLDOUT',
                    train_test_ratio=0.75,
                    shuffle=True,
                    random_seed=0),
            ))
        logger.info('ScoreSolution response %s !', response)

        # Get Results
        results = self.core.GetScoreSolutionResults(
            pb_core.GetScoreSolutionResultsRequest(
                request_id=response.request_id, ))
        for result in results:
            logger.info('result %s !', result)
            if result.progress.state == pb_core.COMPLETED:
                scores = []
                for metric_score in result.scores:
                    metric = decode_performance_metric(
                        metric_score.metric)['metric']
                    if metric == target_metric:
                        score = decode_value(metric_score.value)['value']
                        scores.append(score)
                if len(scores) > 0:
                    avg_score = round(sum(scores) / len(scores), 5)
                    normalized_score = PerformanceMetric[
                        target_metric.name].normalize(avg_score)

                    return {
                        'score': avg_score,
                        'normalized_score': normalized_score,
                        'metric': target_metric.name.lower()
                    }
Пример #5
0
    def SearchSolutions(self, request, context):
        user_agent = request.user_agent
        logger.info('method=SearchSolution, agent=%s', user_agent)

        # Checking version of protocol.
        if request.version != self.version:
            logger.info(' method=SearchSolution, info=Different api version%s',
                        self.version)

        # Types allowed by client
        allowed_value_types = list(request.allowed_value_types)

        if not allowed_value_types:
            allowed_value_types = ALLOWED_VALUE_TYPES

        problem_description = utils.decode_problem_description(request.problem)

        # Parsing and storing Pipeline Template (store this to a file instead of passing it)
        with d3m_utils.silence():
            template = utils.decode_pipeline_description(
                pipeline_description=request.template,
                resolver=Resolver(
                    primitives_blocklist=PrimitivesList.BlockList))

        time_bound_search = request.time_bound_search
        time_bound_search = time_bound_search * 60

        input_data = [load_data(utils.decode_value(x)) for x in request.inputs]

        search = SearchWrappers.remote(
            search_class=DataDrivenSearch,
            problem_description=problem_description,
            backend=self.backend,
            primitives_blocklist=PrimitivesList.BlockList,
            ranking_function=dummy_ranking_function,
            n_workers=self.n_workers)

        request_id = search.get_search_id.remote()
        search_id = ray.get(request_id)

        # print('got search_id')
        self.searches[search_id] = search
        request_id = self.searches[search_id].search_request.remote(
            time_left=time_bound_search, input_data=input_data)

        self.request_mapping[search_id] = request_id
        self.solutions[search_id] = []
        self.problem_descriptions[search_id] = problem_description
        response = core_pb2.SearchSolutionsResponse(search_id=search_id)
        return response
Пример #6
0
    def SplitData(self, request, context):
        input_data = [load_data(utils.decode_value(x)) for x in request.inputs]
        scoring_configuration = decode_scoring_configuration(
            request.scoring_configuration)
        problem_description = utils.decode_problem_description(request.problem)
        data_pipeline = schemas_utils.get_splitting_pipeline(
            scoring_configuration['method'])

        data_random_seed = 0
        outputs, data_result = runtime_module.prepare_data(
            data_pipeline=data_pipeline,
            problem_description=problem_description,
            inputs=input_data,
            data_params=scoring_configuration,
            context=Context.TESTING,
            random_seed=data_random_seed,
            volumes_dir=EnvVars.D3MSTATICDIR,
            scratch_dir=Path.TEMP_STORAGE_ROOT,
            runtime_environment=None,
        )

        if data_result.has_error():
            logger.info('method=SplitData, error={}', data_result.error)
            response = core_pb2.SplitDataResponse()
            yield response
            return
        else:
            for i, (train_output, test_output,
                    score_output) in enumerate(zip(*outputs)):
                uri_list = []
                for output, tag in (
                    (train_output, 'train'),
                    (test_output, 'test'),
                    (score_output, 'score'),
                ):
                    path = os.path.join(Path.TEMP_STORAGE_ROOT,
                                        '{}_output_{}'.format(tag, i),
                                        'datasetDoc.json')
                    uri = get_uri(path)
                    output.save(uri)
                    uri_list.append(uri)
                # response
                response = core_pb2.SplitDataResponse(
                    train_output=value_pb2.Value(dataset_uri=uri_list[0]),
                    test_output=value_pb2.Value(dataset_uri=uri_list[1]),
                    score_output=value_pb2.Value(dataset_uri=uri_list[2]),
                )
                yield response
Пример #7
0
    def test_value(self):
        # Values should be strings because on loading a CSV values are not parsed.
        table = container.DataFrame({
            'a': ['1', '2', '3'],
            'b': ['4', '5', '6']},
            generate_metadata=True,
        )

        with tempfile.TemporaryDirectory() as scratch_dir:
            def validate_uri(uri):
                utils.validate_uri(uri, [scratch_dir])

            def dataframe_equal(a, b):
                return a.columns.tolist() == b.columns.tolist() and a.values.tolist() == b.values.tolist()

            for value, test_value_types, equal in [
                (
                    42,
                    [
                        utils.ValueType.RAW, utils.ValueType.LARGE_RAW,
                        utils.ValueType.PICKLE_BLOB, utils.ValueType.PICKLE_URI,
                        utils.ValueType.LARGE_RAW, utils.ValueType.LARGE_PICKLE_BLOB,
                    ],
                    operator.eq,
                ),
                (
                    PythonValue(42),
                    [
                        utils.ValueType.PICKLE_BLOB, utils.ValueType.PICKLE_URI,
                        utils.ValueType.LARGE_PICKLE_BLOB,
                    ],
                    operator.eq,
                ),
                (
                    table,
                    [
                        utils.ValueType.CSV_URI, utils.ValueType.PICKLE_BLOB,
                        utils.ValueType.PICKLE_URI, utils.ValueType.LARGE_PICKLE_BLOB,
                    ],
                    dataframe_equal,
                ),
            ]:
                for allowed_value_type in test_value_types:
                    self.assertTrue(
                        equal(
                            utils.load_value(
                                utils.save_value(
                                    value,
                                    [allowed_value_type],
                                    scratch_dir,
                                    raise_error=True,
                                ),
                                validate_uri=validate_uri,
                                strict_digest=True,
                            ),
                            value,
                        ),
                        (value, allowed_value_type),
                    )
                    self.assertTrue(
                        equal(
                            utils.load_value(
                                utils.decode_value(
                                    utils.encode_value(
                                        utils.save_value(
                                            value,
                                            [allowed_value_type],
                                            scratch_dir,
                                            raise_error=True,
                                        ),
                                        [allowed_value_type],
                                        scratch_dir,
                                        validate_uri=validate_uri,
                                    ),
                                    validate_uri=validate_uri,
                                    raise_error=True,
                                ),
                                validate_uri=validate_uri,
                                strict_digest=True,
                            ),
                            value,
                        ),
                        (value, allowed_value_type),
                    )