def load_pipeline(pipeline): with open(pipeline) as _pipeline: if pipeline.endswith('.json'): pipeline = Pipeline.from_json(_pipeline) else: pipeline = Pipeline.from_yaml(_pipeline) return pipeline
def generate_template(pipeline_file: str) -> dict: with open(pipeline_file) as f: pipeline = Pipeline.from_json(f) steps = [] for i, step in enumerate(pipeline.steps): if not isinstance(step, PrimitiveStep): raise ValueError('Can only handle PrimitiveSteps') step_name = f'steps.{i}' hyperparameters = {} for name, value in step.hyperparams.items(): if value['type'] == ArgumentType.VALUE: hyperparameters[name] = value['data'] else: raise ValueError( f'Do not know how to parse hyperparam: {str(value)}') arguments = [] argument_keys = set(step.arguments.keys()) for argument_name in ['inputs', 'outputs', 'reference']: if argument_name in argument_keys: argument_keys.remove(argument_name) if step.arguments[argument_name][ 'type'] == ArgumentType.CONTAINER: if step.arguments[argument_name]['data'] == 'inputs.0': arguments.append('template_input') elif step.arguments[argument_name]['data'].startswith( 'steps.') and step.arguments['inputs'][ 'data'].endswith('.produce'): arguments.append( step.arguments[argument_name]['data'][:-8]) else: raise ValueError( f"Do not know how to parse argument: {step.arguments['inputs']['data']}" ) else: raise ValueError( f"Do not know how to parse argument type: {step.arguments['inputs']['type']}" ) if len(argument_keys) > 0: for argument_name in argument_keys: print(argument_name, step.arguments[argument_name]) raise ValueError(f"Unused arguments: {argument_keys}") primitive = OrderedDict() primitive['primitive'] = str(step.primitive) primitive['hyperparameters'] = hyperparameters step = OrderedDict() step['name'] = step_name step['primitives'] = [primitive] step['inputs'] = arguments steps.append(step) template = OrderedDict() template['name'] = pipeline.id if pipeline.name is None else pipeline.name template['taskType'] = {'TaskType'} template['taskSubtype'] = {'TaskSubtype'} template['inputType'] = {'table'} template['output'] = step_name template['steps'] = steps return template
def DescribeSolution(self, request, context): solution_id = request.solution_id info_dict = self.get_from_stage_outputs("GetSearchSolutionsResults", solution_id) # Serialize the pipeline pipeline_json = info_dict["pipeline_json"] allowed_value_types = info_dict["allowed_value_types"] pipeline = Pipeline.from_json(pipeline_json) pipeline_description = ta3ta2utils.encode_pipeline_description( pipeline, allowed_value_types, "/tmp") return core_pb2.DescribeSolutionResponse(pipeline=pipeline_description)
def generate_pipeline(pipeline_path: str, dataset_path: str, problem_doc_path: str, resolver: Resolver = None) -> Runtime: """ Simplified interface that fit a pipeline with a dataset Paramters --------- pipeline_path Path to the pipeline description dataset_path: Path to the datasetDoc.json problem_doc_path: Path to the problemDoc.json resolver : Resolver Resolver to use. """ # Pipeline description pipeline_description = None if '.json' in pipeline_path: with open(pipeline_path) as pipeline_file: pipeline_description = Pipeline.from_json( string_or_file=pipeline_file, resolver=resolver) else: with open(pipeline_path) as pipeline_file: pipeline_description = Pipeline.from_yaml( string_or_file=pipeline_file, resolver=resolver) # Problem Doc problem_doc = load_problem_doc(problem_doc_path) # Dataset if 'file:' not in dataset_path: dataset_path = 'file://{dataset_path}'.format( dataset_path=os.path.abspath(dataset_path)) dataset = D3MDatasetLoader().load(dataset_uri=dataset_path) # Adding Metadata to Dataset dataset = add_target_columns_metadata(dataset, problem_doc) # Pipeline pipeline_runtime = Runtime(pipeline_description) # Fitting Pipeline pipeline_runtime.fit(inputs=[dataset]) return pipeline_runtime
def fitproduce(self, input_item): problem_doc, pipeline_json, dataset_train, dataset_test = input_item[ 1:] # Run pipeline pipeline = Pipeline.from_json(pipeline_json) pipeline_runtime = Runtime(pipeline, context=Context.TESTING) pipeline_runtime.fit(inputs=[dataset_train], return_values=['outputs.0']) score_predictions = pipeline_runtime.produce( inputs=[dataset_test], return_values=['outputs.0']) score_predictions = score_predictions.values['outputs.0'] # Write predictions to output path path = self.get_predictions_save_path() utils.utils.write_predictions_to_file(score_predictions, path, problem_doc) path_uri = "file://%s" % path return path_uri
def score(self, input_item): problem_doc, metric, pipeline_json, dataset_train, dataset_test = input_item[ 1:] # Run pipeline pipeline = Pipeline.from_json(pipeline_json) pipeline_runtime = Runtime(pipeline, context=Context.TESTING) pipeline_runtime.fit(inputs=[dataset_train], return_values=['outputs.0']) score_predictions = pipeline_runtime.produce( inputs=[dataset_test], return_values=['outputs.0']) score_predictions = score_predictions.values['outputs.0'] # Evaluate scores on score dir achieved_score = utils.train_utils.score(score_predictions, dataset_test, problem_doc, override_metric_key=metric) return achieved_score
import utils.utils import utils.train_utils if __name__ == "__main__": # Get args try: path_to_pipeline_json = sys.argv[1] inputdir = sys.argv[2] # Load datasets problem_doc, dataset = utils.utils.load_data_from_dir(inputdir) # Create pipeline with open(path_to_pipeline_json, "r") as f: pipeline = Pipeline.from_json(f.read()) pipeline_runtime = Runtime(pipeline, context=Context.TESTING) pipeline_runtime.fit(inputs=[dataset], return_values=['outputs.0']) problem_doc_score, dataset_score = utils.utils.load_data_from_dir(inputdir, mode="score") score_predictions = pipeline_runtime.produce(inputs=[dataset_score], return_values=['outputs.0']) score_predictions = score_predictions.values['outputs.0'] # Evaluate scores on score dir achieved_score = utils.train_utils.score(score_predictions, dataset_score, problem_doc_score) print(achieved_score) except: print("N/A")
def load_pipeline(pipeline_json): return Pipeline.from_json(pipeline_json)
def load_pipeline(pipeline_path): with open(pipeline_path, 'r') as pipeline_file: if pipeline_path.endswith('.json'): return Pipeline.from_json(pipeline_file) else: return Pipeline.from_yaml(pipeline_file)