Пример #1
0
 def insert_summary_into_db(self, summary_data: dict):
     try:
         summary_model = self.get_dynamic_db_model(ComponentSummary,
                                                   self.job_id)
         DB.create_tables([summary_model])
         summary_obj = summary_model.get_or_none(
             summary_model.f_job_id == self.job_id,
             summary_model.f_component_name == self.component_name,
             summary_model.f_role == self.role,
             summary_model.f_party_id == self.party_id,
             summary_model.f_task_id == self.task_id,
             summary_model.f_task_version == self.task_version)
         if summary_obj:
             summary_obj.f_summary = serialize_b64(summary_data,
                                                   to_str=True)
             summary_obj.f_update_time = current_timestamp()
             summary_obj.save()
         else:
             self.get_dynamic_db_model(
                 ComponentSummary,
                 self.job_id).create(f_job_id=self.job_id,
                                     f_component_name=self.component_name,
                                     f_role=self.role,
                                     f_party_id=self.party_id,
                                     f_task_id=self.task_id,
                                     f_task_version=self.task_version,
                                     f_summary=serialize_b64(summary_data,
                                                             to_str=True),
                                     f_create_time=current_timestamp())
     except Exception as e:
         schedule_logger(self.job_id).exception(
             "An exception where querying summary job id: {} "
             "component name: {} to database:\n{}".format(
                 self.job_id, self.component_name, e))
Пример #2
0
    def resource_for_job(cls, job_id, role, party_id, operation_type):
        operate_status = False
        engine_name, cores, memory = cls.calculate_job_resource(job_id=job_id, role=role, party_id=party_id)
        try:
            with DB.atomic():
                updates = {
                    Job.f_engine_type: EngineType.COMPUTING,
                    Job.f_engine_name: engine_name,
                    Job.f_cores: cores,
                    Job.f_memory: memory,
                }
                filters = [
                    Job.f_job_id == job_id,
                    Job.f_role == role,
                    Job.f_party_id == party_id,
                ]
                if operation_type == ResourceOperation.APPLY:
                    updates[Job.f_remaining_cores] = cores
                    updates[Job.f_remaining_memory] = memory
                    updates[Job.f_resource_in_use] = True
                    updates[Job.f_apply_resource_time] = base_utils.current_timestamp()
                    filters.append(Job.f_resource_in_use == False)
                elif operation_type == ResourceOperation.RETURN:
                    updates[Job.f_resource_in_use] = False
                    updates[Job.f_return_resource_time] = base_utils.current_timestamp()
                    filters.append(Job.f_resource_in_use == True)
                operate = Job.update(updates).where(*filters)
                record_status = operate.execute() > 0
                if not record_status:
                    raise RuntimeError(f"record job {job_id} resource {operation_type} failed on {role} {party_id}")

                filters, updates = cls.update_resource_sql(resource_model=EngineRegistry,
                                                           cores=cores,
                                                           memory=memory,
                                                           operation_type=operation_type,
                                                           )
                filters.append(EngineRegistry.f_engine_type == EngineType.COMPUTING)
                filters.append(EngineRegistry.f_engine_name == engine_name)
                operate = EngineRegistry.update(updates).where(*filters)
                apply_status = operate.execute() > 0
                if not apply_status:
                    raise RuntimeError(
                        f"{operation_type} resource from engine {engine_name} for job {job_id} resource {operation_type} failed on {role} {party_id}")
            operate_status = True
        except Exception as e:
            schedule_logger(job_id=job_id).warning(e)
            schedule_logger(job_id=job_id).warning(
                f"{operation_type} job {job_id} resource(cores {cores} memory {memory}) on {role} {party_id} failed")
            operate_status = False
        finally:
            remaining_cores, remaining_memory = cls.get_remaining_resource(EngineRegistry,
                                                                           [
                                                                               EngineRegistry.f_engine_type == EngineType.COMPUTING,
                                                                               EngineRegistry.f_engine_name == engine_name])
            operate_msg = "successfully" if operate_status else "failed"
            schedule_logger(job_id=job_id).info(
                f"{operation_type} job {job_id} resource(cores {cores} memory {memory}) on {role} {party_id} {operate_msg}, remaining cores: {remaining_cores} remaining memory: {remaining_memory}")
            return operate_status
Пример #3
0
class Tag(DataBaseModel):
    f_id = BigAutoField(primary_key=True)
    f_name = CharField(max_length=100, index=True, unique=True)
    f_desc = TextField(null=True)
    f_create_time = BigIntegerField(default=current_timestamp())
    f_update_time = BigIntegerField(default=current_timestamp())

    class Meta:
        db_table = "t_tags"
Пример #4
0
 def ready_signal(cls, job_id, set_or_reset: bool, ready_timeout_ttl=None):
     filters = [Job.f_job_id == job_id]
     if set_or_reset:
         update_fields = {Job.f_ready_signal: True, Job.f_ready_time: current_timestamp()}
         filters.append(Job.f_ready_signal == False)
     else:
         update_fields = {Job.f_ready_signal: False, Job.f_ready_time: None}
         filters.append(Job.f_ready_signal == True)
         if ready_timeout_ttl:
             filters.append(current_timestamp() - Job.f_ready_time > ready_timeout_ttl)
     update_status = Job.update(update_fields).where(*filters).execute() > 0
     return update_status
Пример #5
0
class ModelOperationLog(DataBaseModel):
    f_operation_type = CharField(max_length=20, null=False, index=True)
    f_operation_status = CharField(max_length=20, null=True, index=True)
    f_initiator_role = CharField(max_length=50, index=True, null=True)
    f_initiator_party_id = CharField(max_length=10, index=True, null=True)
    f_request_ip = CharField(max_length=20, null=True)
    f_model_id = CharField(max_length=100, index=True)
    f_model_version = CharField(max_length=100, index=True)
    f_create_time = BigIntegerField(default=current_timestamp())
    f_update_time = BigIntegerField(default=current_timestamp())

    class Meta:
        db_table = "t_model_operation_log"
Пример #6
0
 def start_job(cls, job_id, initiator_role, initiator_party_id):
     schedule_logger(job_id=job_id).info(
         "try to start job {} on initiator {} {}".format(
             job_id, initiator_role, initiator_party_id))
     job_info = {}
     job_info["job_id"] = job_id
     job_info["role"] = initiator_role
     job_info["party_id"] = initiator_party_id
     job_info["status"] = JobStatus.RUNNING
     job_info["party_status"] = JobStatus.RUNNING
     job_info["start_time"] = current_timestamp()
     job_info["tag"] = 'end_waiting'
     jobs = JobSaver.query_job(job_id=job_id,
                               role=initiator_role,
                               party_id=initiator_party_id)
     if jobs:
         job = jobs[0]
         FederatedScheduler.start_job(job=job)
         schedule_logger(job_id=job_id).info(
             "start job {} on initiator {} {}".format(
                 job_id, initiator_role, initiator_party_id))
     else:
         schedule_logger(job_id=job_id).error(
             "can not found job {} on initiator {} {}".format(
                 job_id, initiator_role, initiator_party_id))
Пример #7
0
 def create(self):
     table_meta = StorageTableMetaModel()
     table_meta.f_create_time = current_timestamp()
     table_meta.f_schema = {}
     table_meta.f_part_of_data = []
     for k, v in self.to_dict().items():
         attr_name = 'f_%s' % k
         if hasattr(StorageTableMetaModel, attr_name):
             setattr(
                 table_meta, attr_name,
                 v if not issubclass(type(v), AddressABC) else v.__dict__)
     try:
         rows = table_meta.save(force_insert=True)
         if rows != 1:
             raise Exception("create table meta failed")
     except peewee.IntegrityError as e:
         if e.args[0] == 1062:
             # warning
             pass
         elif isinstance(e.args[0],
                         str) and "UNIQUE constraint failed" in e.args[0]:
             pass
         else:
             raise e
     except Exception as e:
         raise e
Пример #8
0
 def insert_metrics_into_db(self,
                            metric_namespace: str,
                            metric_name: str,
                            data_type: int,
                            kv,
                            job_level=False):
     try:
         tracking_metric = self.get_dynamic_db_model(
             TrackingMetric, self.job_id)()
         tracking_metric.f_job_id = self.job_id
         tracking_metric.f_component_name = (
             self.component_name
             if not job_level else job_utils.job_virtual_component_name())
         tracking_metric.f_task_id = self.task_id
         tracking_metric.f_task_version = self.task_version
         tracking_metric.f_role = self.role
         tracking_metric.f_party_id = self.party_id
         tracking_metric.f_metric_namespace = metric_namespace
         tracking_metric.f_metric_name = metric_name
         tracking_metric.f_type = data_type
         default_db_source = tracking_metric.to_json()
         tracking_metric_data_source = []
         for k, v in kv:
             db_source = default_db_source.copy()
             db_source['f_key'] = serialize_b64(k)
             db_source['f_value'] = serialize_b64(v)
             db_source['f_create_time'] = current_timestamp()
             tracking_metric_data_source.append(db_source)
         self.bulk_insert_into_db(
             self.get_dynamic_db_model(TrackingMetric, self.job_id),
             tracking_metric_data_source)
     except Exception as e:
         schedule_logger(self.job_id).exception(
             "An exception where inserted metric {} of metric namespace: {} to database:\n{}"
             .format(metric_name, metric_namespace, e))
Пример #9
0
 def update_entity_table(cls, entity_model, entity_info):
     query_filters = []
     primary_keys = entity_model.get_primary_keys_name()
     for p_k in primary_keys:
         query_filters.append(
             operator.attrgetter(p_k)(entity_model) == entity_info[
                 p_k.lstrip("f").lstrip("_")])
     objs = entity_model.select().where(*query_filters)
     if objs:
         obj = objs[0]
     else:
         raise Exception("can not found the {}".format(
             entity_model.__class__.__name__))
     update_filters = query_filters[:]
     update_info = {}
     update_info.update(entity_info)
     for _ in cls.STATUS_FIELDS:
         # not allow update status fields by this function
         update_info.pop(_, None)
     if update_info.get("tag") == "job_end" and hasattr(
             entity_model, "f_tag"):
         if obj.f_start_time:
             update_info["end_time"] = current_timestamp()
             update_info[
                 'elapsed'] = update_info['end_time'] - obj.f_start_time
     if update_info.get("progress") and hasattr(
             entity_model, "f_progress") and update_info["progress"] > 0:
         update_filters.append(
             operator.attrgetter("f_progress")
             (entity_model) <= update_info["progress"])
     return cls.execute_update(old_obj=obj,
                               model=entity_model,
                               update_info=update_info,
                               update_filters=update_filters)
Пример #10
0
def save_model_info(model_info):
    model = MLModel()
    model.f_create_time = current_timestamp()
    for k, v in model_info.items():
        attr_name = 'f_%s' % k
        if hasattr(MLModel, attr_name):
            setattr(model, attr_name, v)
        elif hasattr(MLModel, k):
            setattr(model, k, v)

    try:
        rows = model.save(force_insert=True)
        if rows != 1:
            raise Exception("Save to database failed")
    except peewee.IntegrityError as e:
        if e.args[0] != 1062:
            raise Exception("Create {} failed:\n{}".format(MLModel, e))

        sql_logger(job_id=model_info.get("job_id", "fate_flow")).warning(e)
        return
    except Exception as e:
        raise Exception("Create {} failed:\n{}".format(MLModel, e))

    RuntimeConfig.SERVICE_DB.register_model(
        gen_party_model_id(role=model.f_role,
                           party_id=model.f_party_id,
                           model_id=model.f_model_id), model.f_model_version)

    return model
Пример #11
0
 def detect_expired_session(cls):
     ttl = SESSION_VALID_PERIOD
     detect_logger().info(
         f'start detect expired session by ttl {ttl/1000} s')
     try:
         session_records = Session.query_sessions(
             create_time=[None, current_timestamp() - ttl])
         manager_session_id_list = []
         for session_record in session_records:
             manager_session_id = session_record.f_manager_session_id
             if manager_session_id not in manager_session_id:
                 continue
             manager_session_id_list.append(manager_session_id)
             detect_logger().info(
                 f'start destroy session {manager_session_id}')
             try:
                 sess = Session(session_id=manager_session_id,
                                options={"logger": detect_logger()})
                 sess.destroy_all_sessions()
             except Exception as e:
                 detect_logger().error(
                     f'stop session {manager_session_id} error', e)
             finally:
                 detect_logger().info(
                     f'stop session {manager_session_id} successfully')
     except Exception as e:
         detect_logger().error('detect expired session error', e)
     finally:
         detect_logger().info('finish detect expired session')
Пример #12
0
    def save(self, *args, **kwargs):
        if hasattr(self, "f_update_date"):
            self.f_update_date = datetime.datetime.now()
        if hasattr(self, "f_update_time"):
            self.f_update_time = current_timestamp()

        return super(DataBaseModel, self).save(*args, **kwargs)
Пример #13
0
    def save(self, *args, **kwargs):
        if self.f_create_time:
            self.f_create_date = timestamp_to_date(self.f_create_time)

        self.f_update_time = current_timestamp()
        self.f_update_date = timestamp_to_date(self.f_update_time)
        return super(BaseModel, self).save(*args, **kwargs)
Пример #14
0
def save_model_info(model_info):
    model = MLModel()
    model.f_create_time = current_timestamp()
    for k, v in model_info.items():
        attr_name = 'f_%s' % k
        if hasattr(MLModel, attr_name):
            setattr(model, attr_name, v)
        elif hasattr(MLModel, k):
            setattr(model, k, v)
    try:
        rows = model.save(force_insert=True)
        if rows != 1:
            raise Exception("Create {} failed".format(MLModel))
        if RuntimeConfig.zk_client is not None:
            ServiceUtils.register(
                RuntimeConfig.zk_client,
                gen_party_model_id(role=model.f_role,
                                   party_id=model.f_party_id,
                                   model_id=model.f_model_id),
                model.f_model_version)
        return model
    except peewee.IntegrityError as e:
        if e.args[0] == 1062:
            sql_logger(job_id=model_info.get("job_id", "fate_flow")).warning(e)
        else:
            raise Exception("Create {} failed:\n{}".format(MLModel, e))
    except Exception as e:
        raise Exception("Create {} failed:\n{}".format(MLModel, e))
Пример #15
0
    def register_engine(cls, engine_type, engine_name, engine_entrance,
                        engine_config):
        nodes = engine_config.get("nodes", 1)
        cores = engine_config.get(
            "cores_per_node",
            0) * nodes * JobDefaultConfig.total_cores_overweight_percent
        memory = engine_config.get(
            "memory_per_node",
            0) * nodes * JobDefaultConfig.total_memory_overweight_percent
        filters = [
            EngineRegistry.f_engine_type == engine_type,
            EngineRegistry.f_engine_name == engine_name
        ]
        resources = EngineRegistry.select().where(*filters)
        if resources:
            resource = resources[0]
            update_fields = {}
            update_fields[EngineRegistry.f_engine_config] = engine_config
            update_fields[EngineRegistry.f_cores] = cores
            update_fields[EngineRegistry.f_memory] = memory
            update_fields[
                EngineRegistry.
                f_remaining_cores] = EngineRegistry.f_remaining_cores + (
                    cores - resource.f_cores)
            update_fields[
                EngineRegistry.
                f_remaining_memory] = EngineRegistry.f_remaining_memory + (
                    memory - resource.f_memory)
            update_fields[EngineRegistry.f_nodes] = nodes
            operate = EngineRegistry.update(update_fields).where(*filters)
            update_status = operate.execute() > 0
            if update_status:
                stat_logger.info(
                    f"update {engine_type} engine {engine_name} {engine_entrance} registration information"
                )
            else:
                stat_logger.info(
                    f"update {engine_type} engine {engine_name} {engine_entrance} registration information takes no effect"
                )
        else:
            resource = EngineRegistry()
            resource.f_create_time = base_utils.current_timestamp()
            resource.f_engine_type = engine_type
            resource.f_engine_name = engine_name
            resource.f_engine_entrance = engine_entrance
            resource.f_engine_config = engine_config

            resource.f_cores = cores
            resource.f_memory = memory
            resource.f_remaining_cores = cores
            resource.f_remaining_memory = memory
            resource.f_nodes = nodes
            try:
                resource.save(force_insert=True)
            except Exception as e:
                stat_logger.warning(e)
            stat_logger.info(
                f"create {engine_type} engine {engine_name} {engine_entrance} registration information"
            )
Пример #16
0
 def run(self, **kwargs):
     result = {}
     code = 0
     message = ""
     start_time = current_timestamp()
     self.run_pid = os.getpid()
     try:
         self.args = self.get_args(**kwargs)
         RuntimeConfig.init_env()
         RuntimeConfig.set_process_role(ProcessRole(os.getenv("PROCESS_ROLE")))
         if RuntimeConfig.PROCESS_ROLE == ProcessRole.WORKER:
             LoggerFactory.LEVEL = logging.getLevelName(os.getenv("FATE_LOG_LEVEL", "INFO"))
             LoggerFactory.set_directory(directory=self.args.log_dir, parent_log_dir=self.args.parent_log_dir,
                                         append_to_parent_log=True, force=True)
             LOGGER.info(f"enter {self.__class__.__name__} worker in subprocess, pid: {self.run_pid}")
         else:
             LOGGER.info(f"enter {self.__class__.__name__} worker in driver process, pid: {self.run_pid}")
         LOGGER.info(f"log level: {logging.getLevelName(LoggerFactory.LEVEL)}")
         for env in {"VIRTUAL_ENV", "PYTHONPATH", "SPARK_HOME", "FATE_DEPLOY_BASE", "PROCESS_ROLE", "FATE_JOB_ID"}:
             LOGGER.info(f"{env}: {os.getenv(env)}")
         if self.args.job_server:
             RuntimeConfig.init_config(JOB_SERVER_HOST=self.args.job_server.split(':')[0],
                                       HTTP_PORT=self.args.job_server.split(':')[1])
         if not RuntimeConfig.LOAD_COMPONENT_REGISTRY:
             ComponentRegistry.load()
         if not RuntimeConfig.LOAD_CONFIG_MANAGER:
             ConfigManager.load()
         result = self._run()
     except Exception as e:
         LOGGER.exception(e)
         traceback.print_exc()
         try:
             self._handle_exception()
         except Exception as e:
             LOGGER.exception(e)
         code = 1
         message = exception_to_trace_string(e)
     finally:
         if self.args and self.args.result:
             dump_json_conf(result, self.args.result)
         end_time = current_timestamp()
         LOGGER.info(f"worker {self.__class__.__name__}, process role: {RuntimeConfig.PROCESS_ROLE}, pid: {self.run_pid}, elapsed: {end_time - start_time} ms")
         if RuntimeConfig.PROCESS_ROLE == ProcessRole.WORKER:
             sys.exit(code)
         else:
             return code, message, result
Пример #17
0
def check_job_is_timeout(job: Job):
    job_parameters = job.f_runtime_conf_on_party["job_parameters"]
    timeout = job_parameters.get("timeout", JOB_DEFAULT_TIMEOUT)
    now_time = current_timestamp()
    running_time = (now_time - job.f_create_time)/1000
    if running_time > timeout:
        schedule_logger(job_id=job.f_job_id).info('job {}  run time {}s timeout'.format(job.f_job_id, running_time))
        return True
    else:
        return False
Пример #18
0
def check_job_is_timeout(job: Job):
    job_parameters = job.f_runtime_conf_on_party["job_parameters"]
    timeout = job_parameters.get("timeout", JobDefaultConfig.job_timeout)
    now_time = current_timestamp()
    running_time = (now_time - job.f_create_time) / 1000
    if running_time > timeout:
        schedule_logger(job.f_job_id).info(f'run time {running_time}s timeout')
        return True
    else:
        return False
Пример #19
0
 def create_task(cls, role, party_id, run_on_this_party, task_info):
     task_info["role"] = role
     task_info["party_id"] = party_id
     task_info["status"] = TaskStatus.WAITING
     task_info["party_status"] = TaskStatus.WAITING
     task_info["create_time"] = base_utils.current_timestamp()
     task_info["run_on_this_party"] = run_on_this_party
     if "task_id" not in task_info:
         task_info["task_id"] = job_utils.generate_task_id(job_id=task_info["job_id"], component_name=task_info["component_name"])
     if "task_version" not in task_info:
         task_info["task_version"] = 0
     JobSaver.create_task(task_info=task_info)
Пример #20
0
 def create_or_update(self):
     defaults = {
         "f_name": self.name,
         "f_engine": self.engine,
         "f_connector_info": self.connector_info,
         "f_create_time": current_timestamp(),
     }
     connector, status = StorageConnectorModel.get_or_create(
         f_name=self.name, defaults=defaults)
     if status is False:
         for key in defaults:
             setattr(connector, key, defaults[key])
         connector.save(force_insert=False)
Пример #21
0
 def update(cls, __data=None, **update):
     if __data:
         if hasattr(cls, "f_update_time"):
             __data[operator.attrgetter("f_update_time")(cls)] = current_timestamp()
         fields = AUTO_DATE_TIMESTAMP_FIELD_PREFIX.copy()
         # create can not be updated
         fields.remove("create")
         for f_n in fields:
             if hasattr(cls, f"f_{f_n}_time") and hasattr(cls, f"f_{f_n}_date"):
                 k = operator.attrgetter(f"f_{f_n}_time")(cls)
                 if k in __data and __data[k]:
                     __data[operator.attrgetter(f"f_{f_n}_date")(cls)] = timestamp_to_date(__data[k])
     return super().update(__data, **update)
Пример #22
0
 def start_job(cls, job_id, role, party_id, extra_info=None):
     schedule_logger(job_id=job_id).info(f"try to start job {job_id} on {role} {party_id}")
     job_info = {
         "job_id": job_id,
         "role": role,
         "party_id": party_id,
         "status": JobStatus.RUNNING,
         "start_time": current_timestamp()
     }
     if extra_info:
         schedule_logger(job_id=job_id).info(f"extra info: {extra_info}")
         job_info.update(extra_info)
     cls.update_job_status(job_info=job_info)
     cls.update_job(job_info=job_info)
     schedule_logger(job_id=job_id).info(f"start job {job_id} on {role} {party_id} successfully")
Пример #23
0
def check_job_is_timeout(job):
    job_dsl, job_runtime_conf, train_runtime_conf = get_job_configuration(
        job_id=job.f_job_id,
        role=job.f_initiator_role,
        party_id=job.f_initiator_party_id)
    job_parameters = job_runtime_conf.get('job_parameters', {})
    timeout = job_parameters.get("timeout", JOB_DEFAULT_TIMEOUT)
    now_time = current_timestamp()
    running_time = (now_time - job.f_create_time) / 1000
    if running_time > timeout:
        schedule_logger(job_id=job.f_job_id).info(
            'job {}  run time {}s timeout'.format(job.f_job_id, running_time))
        return True
    else:
        return False
Пример #24
0
 def federated_command(cls, job_id, src_role, src_party_id, dest_role,
                       dest_party_id, endpoint, body, federated_mode,
                       federated_response):
     st = base_utils.current_timestamp()
     log_msg = f"sending {endpoint} federated command"
     schedule_logger(job_id).info(start_log(msg=log_msg))
     try:
         response = federated_api(job_id=job_id,
                                  method='POST',
                                  endpoint=endpoint,
                                  src_role=src_role,
                                  src_party_id=src_party_id,
                                  dest_party_id=dest_party_id,
                                  json_body=body if body else {},
                                  federated_mode=federated_mode)
     except Exception as e:
         schedule_logger(job_id=job_id).exception(e)
         response = {
             "retcode": RetCode.FEDERATED_ERROR,
             "retmsg": "Federated schedule error, {}".format(e)
         }
     if response["retcode"] != RetCode.SUCCESS:
         if response["retcode"] in [RetCode.NOT_EFFECTIVE, RetCode.RUNNING]:
             schedule_logger(job_id).warning(
                 warning_log(msg=log_msg,
                             role=dest_role,
                             party_id=dest_party_id))
         else:
             schedule_logger(job_id).error(
                 failed_log(msg=log_msg,
                            role=dest_role,
                            party_id=dest_party_id,
                            detail=response["retmsg"]))
     federated_response[dest_role][dest_party_id] = response
     et = base_utils.current_timestamp()
     schedule_logger(job_id).info(f"{log_msg} use {et - st} ms")
Пример #25
0
 def __enter__(self):
     with DB.connection_context():
         session_record = SessionRecord()
         session_record.f_session_id = self._session_id
         session_record.f_engine_name = self._engine_name
         session_record.f_engine_type = EngineType.STORAGE
         # TODO: engine address
         session_record.f_engine_address = {}
         session_record.f_create_time = current_timestamp()
         rows = session_record.save(force_insert=True)
         if rows != 1:
             raise Exception(
                 f"create session record {self._session_id} failed")
         LOGGER.debug(f"save session {self._session_id} record")
     self.create()
     return self
Пример #26
0
    def create_task(cls, role, party_id, run_on_this_party, task_info):
        task_info["role"] = role
        task_info["party_id"] = str(party_id)
        task_info["status"] = TaskStatus.WAITING
        task_info["party_status"] = TaskStatus.WAITING
        task_info["create_time"] = base_utils.current_timestamp()
        task_info["run_on_this_party"] = run_on_this_party
        if task_info.get("task_id") is None:
            task_info["task_id"] = job_utils.generate_task_id(
                job_id=task_info["job_id"],
                component_name=task_info["component_name"])
        if task_info.get("task_version") is None:
            task_info["task_version"] = 0

        task = JobSaver.create_task(task_info=task_info)
        if task and run_on_this_party:
            job_utils.save_task_using_job_conf(task)
Пример #27
0
 def create_job_family_entity(cls, entity_model, entity_info):
     obj = entity_model()
     obj.f_create_time = current_timestamp()
     for k, v in entity_info.items():
         attr_name = 'f_%s' % k
         if hasattr(entity_model, attr_name):
             setattr(obj, attr_name, v)
     try:
         rows = obj.save(force_insert=True)
         if rows != 1:
             raise Exception("Create {} failed".format(entity_model))
     except peewee.IntegrityError as e:
         if e.args[0] == 1062:
             sql_logger(job_id=entity_info.get("job_id", "fate_flow")).warning(e)
         else:
             raise Exception("Create {} failed:\n{}".format(entity_model, e))
     except Exception as e:
         raise Exception("Create {} failed:\n{}".format(entity_model, e))
Пример #28
0
 def save_worker_info(cls, task: Task, worker_name: WorkerName, worker_id,
                      **kwargs):
     worker = WorkerInfo()
     ignore_attr = auto_date_timestamp_db_field()
     for attr, value in task.to_dict().items():
         if hasattr(worker,
                    attr) and attr not in ignore_attr and value is not None:
             setattr(worker, attr, value)
     worker.f_create_time = current_timestamp()
     worker.f_worker_name = worker_name.value
     worker.f_worker_id = worker_id
     for k, v in kwargs.items():
         attr = f"f_{k}"
         if hasattr(worker, attr) and v is not None:
             setattr(worker, attr, v)
     rows = worker.save(force_insert=True)
     if rows != 1:
         raise Exception("save worker info failed")
Пример #29
0
 def store(self, model_id: str, model_version: str, store_address: dict, force_update: bool = False):
     """
     Store the model from local cache to mysql
     :param model_id:
     :param model_version:
     :param store_address:
     :param force_update:
     :return:
     """
     try:
         self.get_connection(config=store_address)
         DB.create_tables([MachineLearningModel])
         model = PipelinedModel(model_id=model_id, model_version=model_version)
         LOGGER.info("start store model {} {}".format(model_id, model_version))
         with DB.connection_context():
             with open(model.packaging_model(), "rb") as fr:
                 slice_index = 0
                 while True:
                     content = fr.read(SLICE_MAX_SIZE)
                     if content:
                         model_in_table = MachineLearningModel()
                         model_in_table.f_create_time = current_timestamp()
                         model_in_table.f_model_id = model_id
                         model_in_table.f_model_version = model_version
                         model_in_table.f_content = serialize_b64(content, to_str=True)
                         model_in_table.f_size = sys.getsizeof(model_in_table.f_content)
                         model_in_table.f_slice_index = slice_index
                         if force_update:
                             model_in_table.save(only=[MachineLearningModel.f_content, MachineLearningModel.f_size,
                                                       MachineLearningModel.f_update_time, MachineLearningModel.f_slice_index])
                             LOGGER.info("update model {} {} slice index {} content".format(model_id, model_version, slice_index))
                         else:
                             model_in_table.save(force_insert=True)
                         slice_index += 1
                         LOGGER.info("insert model {} {} slice index {} content".format(model_id, model_version, slice_index))
                     else:
                         break
                 LOGGER.info("Store model {} {} to mysql successfully".format(model_id,  model_version))
         self.close_connection()
     except Exception as e:
         LOGGER.exception(e)
         raise Exception("Store model {} {} to mysql failed".format(model_id, model_version))
Пример #30
0
    def save_machine_learning_model_info(self):
        try:
            record = MLModel.get_or_none(
                MLModel.f_model_version == self.job_id)
            if not record:
                job = Job.get_or_none(Job.f_job_id == self.job_id)
                if job:
                    job_data = job.to_json()
                    MLModel.create(
                        f_role=self.role,
                        f_party_id=self.party_id,
                        f_roles=job_data.get("f_roles"),
                        f_model_id=self.model_id,
                        f_model_version=self.model_version,
                        f_job_id=job_data.get("f_job_id"),
                        f_create_time=current_timestamp(),
                        f_initiator_role=job_data.get('f_initiator_role'),
                        f_initiator_party_id=job_data.get(
                            'f_initiator_party_id'),
                        f_runtime_conf=job_data.get('f_runtime_conf'),
                        f_work_mode=job_data.get('f_work_mode'),
                        f_dsl=job_data.get('f_dsl'),
                        f_train_runtime_conf=job_data.get(
                            'f_train_runtime_conf'),
                        f_size=self.get_model_size(),
                        f_job_status=job_data.get('f_status'))

                    schedule_logger(self.job_id).info(
                        'save {} model info done. model id: {}, model version: {}.'
                        .format(self.job_id, self.model_id,
                                self.model_version))
                else:
                    schedule_logger(self.job_id).info(
                        'save {} model info failed, no job found in db. '
                        'model id: {}, model version: {}.'.format(
                            self.job_id, self.model_id, self.model_version))
            else:
                schedule_logger(self.job_id).info(
                    'model {} info has already existed in database.'.format(
                        self.job_id))
        except Exception as e:
            schedule_logger(self.job_id).exception(e)