def GetCreatePipelineResults(self, request, context): """Mock GetCreatePipelineResults response""" sessioncontext = request.context if not sessioncontext.session_id in self.sessions: yield core_pb2.PipelineCreateResult(\ response_info=core_pb2.Response(\ status=core_pb2.Status(\ code=core_pb2.FAILED_PRECONDITION, details="Unknown session id: %s" % sessioncontext.session_id))) return results = [ (core_pb2.COMPLETED, 'pipeline_1', True), (core_pb2.COMPLETED, 'pipeline_2', True), ] cnt = 0 for progress, pipeline_id, send_pipeline in results: #print('sleep 1 second...') #time.sleep(1) if not context.is_active(): logger.info("Client closed GetCreatePipelineResults stream") msg = core_pb2.PipelineCreateResult( response_info=core_pb2.Response( status=core_pb2.Status(code=core_pb2.OK), ), progress_info=progress, pipeline_id=pipeline_id, ) if send_pipeline: cnt += 1 # try to create a legit file uri file_uri_dict = get_predict_file_info_dict('CLASSIFICATION') msg.pipeline_info.CopyFrom( core_pb2.Pipeline( #predict_result_uris=['file:///out/predict1.csv'], predict_result_uri = file_uri_dict.get(\ TEST_KEY_FILE_URI, 'no file uri'), output=core_pb2.OUTPUT_TYPE_UNDEFINED, scores=[ core_pb2.Score( metric=core_pb2.ACCURACY, value=0.8, ), core_pb2.Score( metric=core_pb2.ROC_AUC, value=0.5, ), ], ) ) yield msg
def RunningPipeline(self, pipeline): response = self._create_response("Pipeline Running") progress = self._create_progress("RUNNING") self.session.update_pipeline(pipeline) result = core.PipelineCreateResult( response_info = response, progress_info = progress, pipeline_id = pipeline.id ) self.session.cache_planner_result(pipeline, result) return result
def SubmittedPipeline(self, pipeline): response = self._create_response("Pipeline Submitted") progress = self._create_progress("SUBMITTED") self.session.add_pipeline(pipeline) result = core.PipelineCreateResult( response_info = response, progress_info = progress, pipeline_id = pipeline.id ) self.session.cache_planner_result(pipeline, result) return result
def _response_session_invalid(self, session_id): "Returns a message that the given session does not exist" pipeline = core_pb2.Pipeline(predict_result_uri="invalid", output=core_pb2.OUTPUT_TYPE_UNDEFINED, scores=[]) msg = core_pb2.PipelineCreateResult(response_info=core_pb2.Response( status=core_pb2.Status(code=core_pb2.SESSION_UNKNOWN), ), progress_info=core_pb2.ERRORED, pipeline_id="invalid", pipeline_info=pipeline) return msg
def CompletedPipeline(self, pipeline, exec_pipeline): response = self._create_response("Pipeline Completed", "OK") progress = self._create_progress("COMPLETED") pipeline_info = None if exec_pipeline is None: response = self._create_response("Pipeline Failed", "INTERNAL") else: pipeline_info = self._create_pipeline_info(exec_pipeline) # Update session pipeline self.session.update_pipeline(exec_pipeline) result = core.PipelineCreateResult( response_info = response, progress_info = progress, pipeline_id = pipeline.id, pipeline_info = pipeline_info ) self.session.cache_planner_result(pipeline, result) return result
def CreatePipelines(self, request, context): """Mock CreatePipelines response""" sessioncontext = request.context if not sessioncontext.session_id in self.sessions: yield core_pb2.PipelineCreateResult(\ response_info=core_pb2.Response(\ status=core_pb2.Status(\ code=core_pb2.FAILED_PRECONDITION, details="Unknown session id: %s" % sessioncontext.session_id))) return dataset_uri = request.dataset_uri task = request.task task_subtype = request.task_subtype task_description = request.task_description output = request.output metrics = request.metrics target_features = request.target_features predict_features = request.predict_features max_pipelines = request.max_pipelines logger.info("Got CreatePipelines request, session=%s", sessioncontext.session_id) results = [ (core_pb2.SUBMITTED, 'pipeline_1', False), (core_pb2.SUBMITTED, 'pipeline_2', False), (core_pb2.RUNNING, 'pipeline_2', False), (core_pb2.RUNNING, 'pipeline_1', False), (core_pb2.COMPLETED, 'pipeline_1', True), (core_pb2.COMPLETED, 'pipeline_2', True), ] cnt = 0 for progress, pipeline_id, send_pipeline in results: print('sleep 1 second...') time.sleep(1) if not context.is_active(): logger.info("Client closed CreatePipelines stream") msg = core_pb2.PipelineCreateResult( response_info=core_pb2.Response( status=core_pb2.Status(code=core_pb2.OK), ), progress_info=progress, pipeline_id=pipeline_id, ) if send_pipeline: cnt += 1 # try to create a legit file uri file_uri_dict = get_predict_file_info_dict('CLASSIFICATION') msg.pipeline_info.CopyFrom( core_pb2.Pipeline( #predict_result_uris=['file:///out/predict1.csv'], predict_result_uri=file_uri_dict.get(\ TEST_KEY_FILE_URI, 'no file uri'), output=output, scores=[ core_pb2.Score( metric=core_pb2.ACCURACY, value=0.8, ), core_pb2.Score( metric=core_pb2.ROC_AUC, value=0.5, ), ], ) ) yield msg
def ProblemNotImplemented(self): response = self._create_response("Not Implemented", "UNIMPLEMENTED") result = core.PipelineCreateResult( response_info = response ) return result
def CreatePipelines(self, request, context): logging.info("Message received: CreatePipelines: %s", request) session_id = request.context.session_id if session_id not in self._sessions: logging.warning( "Asked to create pipeline for session %s which does not exist", session_id) return self._response_session_invalid(session_id) session = self._sessions[session_id] # Setup pipeline specification dataset_uri = request.dataset_uri task_type = request.task # TODO: task_subtype is currently ignored. # TODO: task_description is currently ignored. metrics = request.metrics target_features = request.target_features predict_features = request.predict_features # We need to tell the TA1 where it can find output, # which will be in a created subdirectory of the dataset URI # This assumes the URI is always file:// but changing that will # be Hard so it's an ok assumption for now. dataset_directory = dataset_uri_path(request.dataset_uri) output_directory = os.path.join(dataset_directory, "output") # We describe a set of related pipelines with a ProblemDescription. # This basically is all the parameters for the problem that the client # wants us to solve, with the idea that it will do whatever metalearning # stuff it wants and then produce a set of PipelineDescription, # where each Pipeline is a particular attempt at solving that problem. spec = problem.ProblemDescription(session_id, dataset_uri, output_directory, task_type, metrics, target_features, predict_features) logging.info("Starting new problem for session %s", session_id) problem_id = session.new_problem(spec) pipelines = spec.find_solutions(dataset_uri) for pipeline in pipelines: pipeline_id = self._new_pipeline_id() output_file = pipeline_id + ".csv" output_uri = "file://" + output_directory + "/" + output_file pb2_pipeline = core_pb2.Pipeline( predict_result_uri=output_uri, output=core_pb2.OUTPUT_TYPE_UNDEFINED, scores=[]) msg = core_pb2.PipelineCreateResult( response_info=core_pb2.Response( status=core_pb2.Status(code=core_pb2.OK), ), progress_info=core_pb2.SUBMITTED, pipeline_id=pipeline_id, pipeline_info=pb2_pipeline) yield msg # Actually ask the problem description to start finding solutions. msg.progress_info = core_pb2.RUNNING pipeline.train(dataset_uri) yield msg pipeline.evaluate(dataset_uri, os.path.join(output_directory, output_file), target_features) msg.progress_info = core_pb2.COMPLETED yield msg