def ProduceSolution(self, request, context): fitted_solution_id = request.fitted_solution_id logger.info('method=ProduceSolution, fitted_solution_id=%s', fitted_solution_id) if not self.backend.fitted_pipeline_id_exists(fitted_solution_id): logger.info( 'method=ProduceSolution, fitted_solution_id=%s, status=ERRORED info=No fitted_solution_id found', fitted_solution_id) response = core_pb2.ProduceSolutionResponse() return response input_data = [load_data(utils.decode_value(x)) for x in request.inputs] expose_outputs = [ expose_output for expose_output in request.expose_outputs ] if expose_outputs: expose_outputs = True else: expose_outputs = False request_id = self.backend.produce_pipeline_request( fitted_pipeline_id=fitted_solution_id, input_data=input_data, expose_outputs=expose_outputs) response = core_pb2.ProduceSolutionResponse(request_id=request_id) return response
def FitSolution(self, request, context): solution_id = request.solution_id logger.info('method=FitSolution solution_id=%s', solution_id) pipeline, problem_description, _ = self.get_solution_problem( solution_id) if pipeline is None: logger.info( 'method=FitSolution, solution_id=%s, status=ERRORED, error=Solution_id not found', solution_id) response = core_pb2.FitSolutionResponse() return response input_data = [load_data(utils.decode_value(x)) for x in request.inputs] expose_outputs = [ expose_output for expose_output in request.expose_outputs ] if expose_outputs: expose_outputs = True else: expose_outputs = False request_id = self.backend.fit_pipeline_request( problem_description=problem_description, pipeline=pipeline, input_data=input_data, expose_outputs=expose_outputs) response = core_pb2.FitSolutionResponse(request_id=request_id) return response
def ScoreSolution(self, request, context): solution_id = request.solution_id logger.info('method=SocreSolution, solution_id=%s', solution_id) pipeline, problem_description, _ = self.get_solution_problem( solution_id) if pipeline is None: logger.info( 'method=FitSolution, solution_id=%s, status=ERRORED, error=Solution_id not found', solution_id) response = core_pb2.ScoreSolutionResponse() return response input_data = [load_data(utils.decode_value(x)) for x in request.inputs] metrics = [ utils.decode_performance_metric(metric) for metric in request.performance_metrics ] scoring_pipeline = schemas_utils.get_scoring_pipeline() data_preparation_params = decode_scoring_configuration( request.configuration) data_preparation_pipeline = schemas_utils.get_splitting_pipeline( data_preparation_params['method']) request_id = self.backend.evaluate_pipeline_request( problem_description=problem_description, pipeline=pipeline, input_data=input_data, metrics=metrics, data_preparation_pipeline=data_preparation_pipeline, scoring_pipeline=scoring_pipeline, data_preparation_params=data_preparation_params) response = core_pb2.ScoreSolutionResponse(request_id=request_id) return response
def do_score(self, solution_id, dataset_path, problem_path, ta2_id): try: problem = Problem.load(problem_uri=problem_path) except: logger.exception('Error parsing problem') # Encode metric metrics = [] for metric in problem['problem']['performance_metrics']: metrics.append(encode_performance_metric(metric)) # Showing only the first metric target_metric = problem['problem']['performance_metrics'][0]['metric'] logger.info('target_metric %s !', target_metric) response = self.core.ScoreSolution( pb_core.ScoreSolutionRequest( solution_id=solution_id, inputs=[ pb_value.Value(dataset_uri='file://%s' % dataset_path, ) ], performance_metrics=metrics, users=[], configuration=pb_core.ScoringConfiguration( method='HOLDOUT', train_test_ratio=0.75, shuffle=True, random_seed=0), )) logger.info('ScoreSolution response %s !', response) # Get Results results = self.core.GetScoreSolutionResults( pb_core.GetScoreSolutionResultsRequest( request_id=response.request_id, )) for result in results: logger.info('result %s !', result) if result.progress.state == pb_core.COMPLETED: scores = [] for metric_score in result.scores: metric = decode_performance_metric( metric_score.metric)['metric'] if metric == target_metric: score = decode_value(metric_score.value)['value'] scores.append(score) if len(scores) > 0: avg_score = round(sum(scores) / len(scores), 5) normalized_score = PerformanceMetric[ target_metric.name].normalize(avg_score) return { 'score': avg_score, 'normalized_score': normalized_score, 'metric': target_metric.name.lower() }
def SearchSolutions(self, request, context): user_agent = request.user_agent logger.info('method=SearchSolution, agent=%s', user_agent) # Checking version of protocol. if request.version != self.version: logger.info(' method=SearchSolution, info=Different api version%s', self.version) # Types allowed by client allowed_value_types = list(request.allowed_value_types) if not allowed_value_types: allowed_value_types = ALLOWED_VALUE_TYPES problem_description = utils.decode_problem_description(request.problem) # Parsing and storing Pipeline Template (store this to a file instead of passing it) with d3m_utils.silence(): template = utils.decode_pipeline_description( pipeline_description=request.template, resolver=Resolver( primitives_blocklist=PrimitivesList.BlockList)) time_bound_search = request.time_bound_search time_bound_search = time_bound_search * 60 input_data = [load_data(utils.decode_value(x)) for x in request.inputs] search = SearchWrappers.remote( search_class=DataDrivenSearch, problem_description=problem_description, backend=self.backend, primitives_blocklist=PrimitivesList.BlockList, ranking_function=dummy_ranking_function, n_workers=self.n_workers) request_id = search.get_search_id.remote() search_id = ray.get(request_id) # print('got search_id') self.searches[search_id] = search request_id = self.searches[search_id].search_request.remote( time_left=time_bound_search, input_data=input_data) self.request_mapping[search_id] = request_id self.solutions[search_id] = [] self.problem_descriptions[search_id] = problem_description response = core_pb2.SearchSolutionsResponse(search_id=search_id) return response
def SplitData(self, request, context): input_data = [load_data(utils.decode_value(x)) for x in request.inputs] scoring_configuration = decode_scoring_configuration( request.scoring_configuration) problem_description = utils.decode_problem_description(request.problem) data_pipeline = schemas_utils.get_splitting_pipeline( scoring_configuration['method']) data_random_seed = 0 outputs, data_result = runtime_module.prepare_data( data_pipeline=data_pipeline, problem_description=problem_description, inputs=input_data, data_params=scoring_configuration, context=Context.TESTING, random_seed=data_random_seed, volumes_dir=EnvVars.D3MSTATICDIR, scratch_dir=Path.TEMP_STORAGE_ROOT, runtime_environment=None, ) if data_result.has_error(): logger.info('method=SplitData, error={}', data_result.error) response = core_pb2.SplitDataResponse() yield response return else: for i, (train_output, test_output, score_output) in enumerate(zip(*outputs)): uri_list = [] for output, tag in ( (train_output, 'train'), (test_output, 'test'), (score_output, 'score'), ): path = os.path.join(Path.TEMP_STORAGE_ROOT, '{}_output_{}'.format(tag, i), 'datasetDoc.json') uri = get_uri(path) output.save(uri) uri_list.append(uri) # response response = core_pb2.SplitDataResponse( train_output=value_pb2.Value(dataset_uri=uri_list[0]), test_output=value_pb2.Value(dataset_uri=uri_list[1]), score_output=value_pb2.Value(dataset_uri=uri_list[2]), ) yield response
def test_value(self): # Values should be strings because on loading a CSV values are not parsed. table = container.DataFrame({ 'a': ['1', '2', '3'], 'b': ['4', '5', '6']}, generate_metadata=True, ) with tempfile.TemporaryDirectory() as scratch_dir: def validate_uri(uri): utils.validate_uri(uri, [scratch_dir]) def dataframe_equal(a, b): return a.columns.tolist() == b.columns.tolist() and a.values.tolist() == b.values.tolist() for value, test_value_types, equal in [ ( 42, [ utils.ValueType.RAW, utils.ValueType.LARGE_RAW, utils.ValueType.PICKLE_BLOB, utils.ValueType.PICKLE_URI, utils.ValueType.LARGE_RAW, utils.ValueType.LARGE_PICKLE_BLOB, ], operator.eq, ), ( PythonValue(42), [ utils.ValueType.PICKLE_BLOB, utils.ValueType.PICKLE_URI, utils.ValueType.LARGE_PICKLE_BLOB, ], operator.eq, ), ( table, [ utils.ValueType.CSV_URI, utils.ValueType.PICKLE_BLOB, utils.ValueType.PICKLE_URI, utils.ValueType.LARGE_PICKLE_BLOB, ], dataframe_equal, ), ]: for allowed_value_type in test_value_types: self.assertTrue( equal( utils.load_value( utils.save_value( value, [allowed_value_type], scratch_dir, raise_error=True, ), validate_uri=validate_uri, strict_digest=True, ), value, ), (value, allowed_value_type), ) self.assertTrue( equal( utils.load_value( utils.decode_value( utils.encode_value( utils.save_value( value, [allowed_value_type], scratch_dir, raise_error=True, ), [allowed_value_type], scratch_dir, validate_uri=validate_uri, ), validate_uri=validate_uri, raise_error=True, ), validate_uri=validate_uri, strict_digest=True, ), value, ), (value, allowed_value_type), )