def test_pipeline(self): with open(os.path.join(TEST_PIPELINES_DIR, 'random-sample.yml'), 'r') as pipeline_file: pipeline = pipeline_module.Pipeline.from_yaml( pipeline_file, resolver=pipeline_module.Resolver(), strict_digest=True, ) with tempfile.TemporaryDirectory() as scratch_dir: def validate_uri(uri): utils.validate_uri(uri, [scratch_dir]) pipeline_message = utils.encode_pipeline_description( pipeline, [ utils.ValueType.RAW, utils.ValueType.CSV_URI, utils.ValueType.DATASET_URI, utils.ValueType.PICKLE_BLOB, utils.ValueType.PICKLE_URI, ], scratch_dir, validate_uri=validate_uri, ) decoded_pipeline = utils.decode_pipeline_description( pipeline_message, pipeline_module.Resolver(), validate_uri=validate_uri, strict_digest=True, ) self.assertEqual(pipeline.to_json_structure(nest_subpipelines=True), decoded_pipeline.to_json_structure(nest_subpipelines=True))
def execute_pipeline(pipeline: pipeline_pb2.PipelineDescription, dataset_filenames: List[str], static_resource_path: Optional[str] = None) -> Any: """ Executes a binary protobuf pipeline against a supplied d3m dataset using the d3m common runtime. Parameters ---------- pipeline: protobuf pipeline definition dataset_filenames: paths to folders containing input D3M datasets Returns ------- The result of the pipeline execution. """ # transform the pipeline to the internal d3m representation pipeline_d3m = utils.decode_pipeline_description( pipeline, metadata_pipeline.Resolver()) # load the data inputs = _load_inputs(dataset_filenames) # fit and produce fitted_pipeline, _ = _fit(pipeline_d3m, inputs, volumes_dir=static_resource_path) result = _produce(fitted_pipeline, inputs) return result
def SearchSolutions(self, message): """Get potential pipelines and load into DB.""" # Validate request is in required format, extract if it is dataset_uri, _ = self.validator.validate_search_solutions_request( message) # Generate search ID search_id = self._generate_id() # serialize the problem buf to json to save it in the db prob = json_format.MessageToDict(message.problem) prob = json.dumps(prob) # serialize the pipeline to a string for storage in db if one is provided search_template: str = None if message.HasField('template'): search_template_obj = api_utils.decode_pipeline_description( message.template, pipeline.Resolver()) search_template = search_template_obj.to_json() # Create search row in DB search = models.Searches(id=search_id) self.session.add(search) self.session.commit() task = models.Tasks(problem=prob, pipeline=search_template, type="EXLINE", dataset_uri=dataset_uri, id=self._generate_id(), search_id=search_id) self.session.add(task) # Add all to DB self.session.commit() return search_id
def convert_pipeline(input_file: str, output_file: str) -> pipeline_pb2.PipelineDescription: # load the protobuf infile = open(input_file, "rb") pipeline = pipeline_pb2.PipelineDescription() pipeline.ParseFromString(infile.read()) infile.close() # convert to json and save return ta3ta2.decode_pipeline_description(pipeline, pipeline_module.Resolver())
def SearchSolutions(self, request, context): """Create a `Session` and start generating & scoring pipelines. """ if len(request.inputs) > 1: raise error(context, grpc.StatusCode.UNIMPLEMENTED, "Search with more than 1 input is not supported") expected_version = pb_core.DESCRIPTOR.GetOptions().Extensions[ pb_core.protocol_version] if request.version != expected_version: logger.error( "TA3 is using a different protocol version: %r " "(us: %r)", request.version, expected_version) template = request.template if template is not None and len( template.steps ) > 0: # isinstance(template, pb_pipeline.PipelineDescription) pipeline = decode_pipeline_description(template, pipeline_module.Resolver()) if pipeline.has_placeholder(): template = pipeline.to_json_structure() else: # Pipeline template fully defined problem = None if request.problem: problem = decode_problem_description(request.problem) search_id = self._ta2.new_session(problem) dataset = request.inputs[0].dataset_uri if not dataset.startswith('file://'): dataset = 'file://' + dataset self._ta2.build_fixed_pipeline(search_id, pipeline.to_json_structure(), dataset) return pb_core.SearchSolutionsResponse( search_id=str(search_id), ) else: template = None dataset = request.inputs[0].dataset_uri if not dataset.endswith('datasetDoc.json'): raise error(context, grpc.StatusCode.INVALID_ARGUMENT, "Dataset is not in D3M format: %s", dataset) if not dataset.startswith('file://'): dataset = 'file://' + dataset problem = decode_problem_description(request.problem) timeout_search = request.time_bound_search timeout_run = request.time_bound_run report_rank = True if request.rank_solutions_limit > 0 else False if timeout_search <= 0.0: timeout_search = None if timeout_run <= 0.0: timeout_run = None search_id = self._ta2.new_session(problem) session = self._ta2.sessions[search_id] task_keywords = session.problem['problem']['task_keywords'] metrics = session.metrics self._ta2.build_pipelines(search_id, dataset, task_keywords, metrics, timeout_search, timeout_run, template, report_rank=report_rank) return pb_core.SearchSolutionsResponse(search_id=str(search_id), )