Exemplo n.º 1
0
 def GetExecutePipelineResults(self, request, context):
     session = Session.get(request.context.session_id)
     if session is not None:
         for pipeline_id in request.pipeline_ids:
             result = session.test_results.get(pipeline_id, None)
             if result is not None:
                 yield result
Exemplo n.º 2
0
    def ExecutePipeline(self, request, context):
        """Predict step - multiple results messages returned via GRPC streaming.
        """
        session = Session.get(request.context.session_id)
        if session is None:
            yield None
            return

        pipeline = session.get_pipeline(request.pipeline_id)
        if pipeline is None:
            yield None
            return

        datafile = None
        if request.dataset_uri is not None:
            datafile = self._create_path_from_uri(request.dataset_uri)

        # Get test data directories
        handler = GRPC_PlannerEventHandler(session)
        handler.StartExecutingPipeline(pipeline)

        session.controller.initialize_from_features(datafile,
                                                    session.train_features,
                                                    session.target_features,
                                                    session.outputdir,
                                                    view='TEST')

        # Change this to be a yield too.. Save should happen within the handler
        for result in session.controller.test(pipeline, handler):
            if result is not None:
                yield result
Exemplo n.º 3
0
 def ListPipelines(self, request, context):
     pipeline_ids = []
     session = Session.get(request.context.session_id)
     if session is not None:
         for pipeline_id in session.pipelines.keys():
             pipeline_ids.append(pipeline_id)
     response = self._create_response("Listing pipelines")
     return core.PipelineListResult(response_info=response,
                                    pipeline_ids=pipeline_ids)
Exemplo n.º 4
0
 def DeletePipelines(self, request, context):
     pipeline_ids = []
     session = Session.get(request.context.session_id)
     if session is not None:
         for pipeline_id in request.delete_pipeline_ids:
             session.delete_pipeline(pipeline_id)
             pipeline_ids.append(pipeline_id)
     response = self._create_response("Deleted pipelines")
     return core.PipelineListResult(response_info=response,
                                    pipeline_ids=pipeline_ids)
Exemplo n.º 5
0
 def StartSession(self, request, context):
     """Session management
     """
     session = Session.new()
     session_context = core.SessionContext(session_id=session.id)
     response = self._create_response("Session started")
     session_response = core.SessionResponse(response_info=response,
                                             user_agent=request.user_agent,
                                             version=request.version,
                                             context=session_context)
     return session_response
Exemplo n.º 6
0
    def DescribeDataflow(self, request, context):
        ok = True
        response = self._create_response("Dataflow description")

        session = Session.get(request.context.session_id)
        if session is None:
            response = self._create_response("Dataflow description",
                                             code="SESSION_UNKNOWN")
            ok = False

        pipeline = session.get_pipeline(request.pipeline_id)
        if pipeline is None:
            response = self._create_response("Dataflow description",
                                             code="INTERNAL")
            ok = False

        modules = []
        connections = []
        if ok:
            for i in range(0, len(pipeline.primitives)):
                primitive = pipeline.primitives[i]
                inputs = [
                    dfext.DataflowDescription.Input(name="input_data",
                                                    type="pandas.DataFrame")
                ]
                outputs = [
                    dfext.DataflowDescription.Output(name="output_data",
                                                     type="pandas.DataFrame")
                ]
                if primitive.task == "Modeling":
                    inputs.append(
                        dfext.DataflowDescription.Input(
                            name="input_labels", type="pandas.DataFrame"))

                modules.append(
                    dfext.DataflowDescription.Module(id=primitive.cls,
                                                     type=primitive.type,
                                                     label=primitive.name,
                                                     inputs=inputs,
                                                     outputs=outputs))
                if i > 0:
                    prev_primitive = pipeline.primitives[i - 1]
                    connections.append(
                        dfext.DataflowDescription.Connection(
                            from_module_id=prev_primitive.cls,
                            from_output_name="output_data",
                            to_module_id=primitive.cls,
                            to_input_name="input_data"))
        return dfext.DataflowDescription(pipeline_id=request.pipeline_id,
                                         response_info=response,
                                         modules=modules,
                                         connections=connections)
Exemplo n.º 7
0
 def SetProblemDoc(self, request, context):
     session = Session.get(request.context.session_id)
     if session is not None:
         c = session.controller
         for update in request.updates:
             if update.task_type:
                 # Get Problem details
                 c.problem.task_type = TaskType[core.TaskType.Name(update.task_type)]
             if update.task_subtype:
                 c.problem.task_subtype = TaskSubType[core.TaskSubtype.Name(update.task_subtype)]
             #if update.output_type:
             #    c.problem.output_type = core.OutputType.Name(update.output_type)
             if update.metric:
                 metric = Metric[core.PerformanceMetric.Name(update.metric)]
                 c.problem.set_metrics([metric])
         session.controller = c
         return self._create_response("Updated Problem Doc")
Exemplo n.º 8
0
    def ExportPipeline(self, request, context):
        session = Session.get(request.context.session_id)
        if session is None:
            return self._create_response("No such session", code="SESSION_UNKNOWN")

        pipeline = session.get_pipeline(request.pipeline_id)
        if pipeline is None:
            return self._create_response("Invalid pipeline id", code="INVALID_ARGUMENT")

        if request.pipeline_exec_uri is not None:
            ex = session.controller.execution_helper
            ex.create_pipeline_executable(pipeline, session.config)
            exefile = self._create_path_from_uri(request.pipeline_exec_uri)
            origfile = os.path.join(session.execdir, pipeline.id)
            shutil.copy(origfile, exefile)
            os.chmod(exefile, 0o755)
            return self._create_response("Pipeline Exported")
        else:
            return self._create_response("No pipeline export uri", code="INVALID_ARGUMENT")
Exemplo n.º 9
0
    def GetDataflowResults(self, request, context):
        session = Session.get(request.context.session_id)
        ok = True
        if session is None:
            response = self._create_response("Dataflow results",
                                             code="SESSION_UNKNOWN")
            ok = False

        pipeline = session.get_pipeline(request.pipeline_id)
        if pipeline is None:
            response = self._create_response("Dataflow results",
                                             code="INTERNAL")
            ok = False
        if ok:
            if pipeline.finished:
                for result in self._get_pipeline_results(pipeline):
                    yield result
            else:
                while not pipeline.finished:
                    pipeline.waitForChanges()
                    for result in self._get_pipeline_results(pipeline):
                        yield result
        else:
            yield dfext.ModuleResult(response_info=response)
Exemplo n.º 10
0
    def CreatePipelines(self, request, context):
        session = Session.get(request.context.session_id)
        if session is None:
            yield None
            return

        # Get training and target features
        train_features = []
        target_features = []
        if len(request.predict_features) > 0:
            for tfeature in request.predict_features:
                resource_id = tfeature.resource_id
                feature_name = tfeature.feature_name
                train_features.append(Feature(resource_id, feature_name))
        if len(request.target_features) > 0:
            for tfeature in request.target_features:
                resource_id = tfeature.resource_id
                feature_name = tfeature.feature_name
                target_features.append(Feature(resource_id, feature_name))

        datafile = None
        if request.dataset_uri is not None:
            datafile = self._create_path_from_uri(request.dataset_uri)

        session.train_features = train_features
        session.target_features = target_features

        # Create the planning controller if not already present
        c = session.controller
        if c is None:
            c = Controller(self.libdir)
            c.initialize_from_features(datafile,
                                       train_features,
                                       target_features,
                                       session.outputdir,
                                       view='TRAIN')

        # Set Problem schema
        if request.task > 0:
            c.problem.task_type = TaskType[core.TaskType.Name(request.task)]
        if request.task_subtype > 0:
            c.problem.task_subtype = TaskSubType[core.TaskSubtype.Name(
                request.task_subtype)]
        #if request.output > 0:
        #    c.problem.output_type = core.OutputType.Name(request.output)

        # Load metrics
        metrics = []
        if len(request.metrics) > 0:
            for rm in request.metrics:
                metrics.append(Metric[core.PerformanceMetric.Name(rm)])
            c.problem.set_metrics(metrics)

        # Set the max pipelines cutoff
        cutoff = None
        if request.max_pipelines is not None:
            cutoff = request.max_pipelines

        # Start planning / training
        session.controller = c
        if session.controller.l1_planner is None:
            session.controller.initialize_planners()

        for result in session.controller.train(
                GRPC_PlannerEventHandler(session), cutoff=cutoff):
            if result is not None:
                yield result
Exemplo n.º 11
0
 def EndSession(self, request, context):
     Session.delete(request.session_id)
     return self._create_response("Session ended")