def run_merge_job( vector, target, entity_rows=None, timestamp_column=None, run_config=None, drop_columns=None, ): name = vector.metadata.name if not name: raise mlrun.errors.MLRunInvalidArgumentError( "feature vector name must be specified") if not target or not hasattr(target, "to_dict"): raise mlrun.errors.MLRunInvalidArgumentError( "target object must be specified") name = f"{name}_merger" run_config = run_config or RunConfig() if not run_config.function: function_ref = vector.spec.function.copy() if function_ref.is_empty(): function_ref = FunctionReference(name=name, kind="job") if not function_ref.url: function_ref.code = _default_merger_handler run_config.function = function_ref function = run_config.to_function( "job", mlrun.mlconf.feature_store.default_job_image) function.metadata.project = vector.metadata.project function.metadata.name = function.metadata.name or name task = new_task( name=name, params={ "vector_uri": vector.uri, "target": target.to_dict(), "timestamp_column": timestamp_column, "drop_columns": drop_columns, }, inputs={"entity_rows": entity_rows}, ) task.spec.secret_sources = run_config.secret_sources task.set_label("job-type", "feature-merge").set_label("feature-vector", vector.uri) task.metadata.uid = uuid.uuid4().hex vector.status.run_uri = task.metadata.uid vector.save() run = function.run( task, handler=run_config.handler or "merge_handler", local=run_config.local, watch=run_config.watch, ) logger.info(f"feature vector merge job started, run id = {run.uid()}") return RemoteVectorResponse(vector, run)
def run_merge_job( vector, target, entity_rows=None, timestamp_column=None, local=None, watch=None, drop_columns=None, function=None, secrets=None, auto_mount=None, ): name = vector.metadata.name if not name: raise mlrun.errors.MLRunInvalidArgumentError( "feature vector name must be specified") if not target or not hasattr(target, "to_dict"): raise mlrun.errors.MLRunInvalidArgumentError( "target object must be specified") name = f"{name}_merger" if not function: function_ref = vector.spec.function if not function_ref.to_dict(): function_ref = FunctionReference(name=name, kind="job") function_ref.image = (function_ref.image or mlrun.mlconf.feature_store.default_job_image) if not function_ref.url: function_ref.code = _default_merger_handler function = function_ref.to_function() if auto_mount: function.apply(mlrun.platforms.auto_mount()) function.metadata.project = vector.metadata.project task = new_task( name=name, params={ "vector_uri": vector.uri, "target": target.to_dict(), "timestamp_column": timestamp_column, "drop_columns": drop_columns, }, inputs={"entity_rows": entity_rows}, ) if secrets: task.with_secrets("inline", secrets) task.metadata.uid = uuid.uuid4().hex vector.status.run_uri = task.metadata.uid vector.save() run = function.run(task, handler="merge_handler", local=local, watch=watch) logger.info(f"feature vector merge job started, run id = {run.uid()}") return RemoteVectorResponse(vector, run)
def _generate_task(self): return new_task(name=self.name, project=self.project, artifact_path=self.artifact_path)