Exemplo n.º 1
0
    def get_job(self):
        """
        获取job内容,发送到执行队列,并修改任务状态
        """
        while 1:
            if self._stop:
                log.warn("get_job stopping")
                return
            if self.locks["get_job"].acquire():
                if self.queues["get_job"].empty():
                    self.locks["get_job"].release()
                    time.sleep(0.5)
                    continue
                dist_role, dist_node, jid = self.queues["get_job"].get(
                    timeout=5)
                self.queues["get_job"].task_done()
                self.locks["get_job"].release()
                node_base_dir = self.zookeeper_conf.nodes
                jid_path = os.path.join(node_base_dir, dist_role, dist_node,
                                        "jobs", jid)
                try:
                    job = self.zkconn.get(jid_path)[0]
                    data = msgpack.loads(job)
                    if data["env"] == "aes":
                        key_str = self.main_conf.token
                        crypt = Crypt(key_str)
                        data["payload"] = crypt.loads(data.get("payload"))
                    if data["payload"]["status"] != "READY":
                        continue
                    data["payload"]["role"] = dist_role
                    data["payload"]["node_name"] = dist_node
                    #发送到执行队列中
                    if data["payload"].get("nthread"):
                        self.queues["sigle_run"].put(msgpack.dumps(data),
                                                     timeout=5)
                    else:
                        self.queues["mult_run"].put(msgpack.dumps(data),
                                                    timeout=5)

                    data["payload"]["status"] = "RUNNING"
                    if data["env"] == "aes":
                        key_str = self.main_conf.token
                        crypt = Crypt(key_str)
                        data["payload"] = crypt.dumps(data.get("payload"))
                        #修改任务状态为RUNNING
                    self.zkconn.set(jid_path, msgpack.dumps(data))
                except:
                    log.error(traceback.format_exc())
                    self.queues["get_job"].put((dist_role, dist_node, jid))
Exemplo n.º 2
0
 def __init__(self, config):
     self.main_conf = Conf(config["swall"])
     self.fs_conf = Conf(config["fs"])
     self.node = self.main_conf.node_name
     self.node_ip = self.main_conf.node_ip
     self.node_funcs = self.load_module()
     self.mq = MQ(config)
     self._stop = 0
     self.sys_envs = self.load_env()
     self.job_sub = JobSubject()
     self.job_sub.register(self)
     self.crypt = Crypt(self.main_conf.token)
Exemplo n.º 3
0
 def get_job_info(self, node_name, jid):
     """
     返回任务状态
     @param node_name string:节点名称
     @param jid string:任务id
     @return dict:
     """
     payload = {}
     data = self.mq.get_res(node_name, jid)
     if data:
         if data["env"] == "aes":
             key_str = self.main_conf.token
             crypt = Crypt(key_str)
             payload = crypt.loads(data.get("payload"))
     return payload
Exemplo n.º 4
0
    def crond_clear_job(self):
        """
        定时清理已经完成的job
        """
        while 1:
            if self._stop:
                log.warn("crond_clear_job stopping")
                return
            try:
                for node_name in self.nodes.keys():
                    job_path = os.path.join(self.zookeeper_conf.nodes,
                                            self.nodes[node_name]["role"],
                                            node_name, "jobs")
                    jids = self.zkconn.get_children(job_path)
                    for jid in jids:
                        jid_path = os.path.join(job_path, jid)
                        znode = self.zkconn.get(jid_path)
                        job = znode[0]
                        mtime = znode[1]["mtime"] / 1000
                        data = msgpack.loads(job)
                        if data["env"] == "aes":
                            key_str = self.main_conf.token
                            crypt = Crypt(key_str)
                            data["payload"] = crypt.loads(data.get("payload"))
                        cur_t = int(time.strftime('%s', time.localtime()))
                        delay_sec = cur_t - mtime
                        keep_job_time = getattr(self.main_conf,
                                                "keep_job_time", 604800)

                        if delay_sec >= keep_job_time:
                            zkcli = self.zkconn
                            if zkcli.delete(jid_path):
                                log.info(
                                    "delete the timeout %s %s job [%s] ok" %
                                    (keep_job_time, data["payload"]["status"],
                                     jid))
                            else:
                                log.error(
                                    "delete the timeout %s %s job [%s] fail" %
                                    (keep_job_time, data["payload"]["status"],
                                     jid))
            except:
                log.error(traceback.format_exc())
            time.sleep(5)
Exemplo n.º 5
0
 def get_job_info(self, role, node_name, jid):
     """
     返回任务状态
     @param role string:角色
     @param node_name string:节点名称
     @param jid string:任务id
     @return dict:
     """
     node_base_dir = self.zookeeper_conf.nodes
     jid_path = os.path.join(node_base_dir, role, node_name, "jobs", jid)
     payload = {}
     if self.zkconn.exists(jid_path):
         job = self.zkconn.get(jid_path)[0]
         data = msgpack.loads(job)
         if data["env"] == "aes":
             key_str = self.main_conf.token
             crypt = Crypt(key_str)
             payload = crypt.loads(data.get("payload"))
     return payload
Exemplo n.º 6
0
 def _send_job(self, data, role, node_name):
     """
     发送job到对应的zk目录
     @param data dict:
     @param role string:
     @param node_name string:
     @return int:1 for success else 0
     """
     ret = 0
     try:
         job_path = os.path.join(self.zookeeper_conf.nodes, role, node_name,
                                 "jobs", data["payload"]["jid"])
         if data.get("env") == "aes":
             key_str = self.main_conf.token
             crypt = Crypt(key_str)
             data["payload"] = crypt.dumps(data.get("payload"))
         data = msgpack.dumps(data)
         self.zkconn.create(job_path, data)
         ret = 1
     except ZKClientError, e:
         log.error("send_job error:%s" % e.message)
Exemplo n.º 7
0
    def send_ret(self):
        """
        发送结果
        """
        while 1:
            if self._stop:
                log.warn("send_ret stopping")
                return
            if self.locks["ret_job"].acquire():
                if self.queues["ret_job"].empty():
                    self.locks["ret_job"].release()
                    time.sleep(0.5)
                    continue
                data = msgpack.loads(self.queues["ret_job"].get(timeout=5))
                self.queues["ret_job"].task_done()
                self.locks["ret_job"].release()
                node_base_dir = self.zookeeper_conf.nodes
                jid_path = os.path.join(node_base_dir, data["payload"]["role"],
                                        data["payload"]["node_name"], "jobs",
                                        data["payload"]["jid"])
                log.info(
                    "[%s %s] send the result of job [%s]" %
                    (data["payload"]["role"], data["payload"]["node_name"],
                     data["payload"]["jid"]))
                try:
                    if data["env"] == "aes":
                        key_str = self.main_conf.token
                        crypt = Crypt(key_str)
                        data["payload"] = crypt.dumps(data.get("payload"))

                    #遇到过set返回成功但是却没有更新的情况,这里尝试set两次看看
                    self.zkconn.set(jid_path, msgpack.dumps(data))
                    time.sleep(0.0001)
                    set_ret = self.zkconn.set(jid_path, msgpack.dumps(data))

                    if set_ret != 0:
                        log.error("send result error,retcode is [%s]" %
                                  set_ret)
                except:
                    log.error(traceback.format_exc())
Exemplo n.º 8
0
 def _send_job(self, node_data):
     """
     发送job
     @param data dict:
     @param node_name string:
     @return int:1 for success else 0
     """
     ret = 0
     try:
         key_str = self.main_conf.token
         crypt = Crypt(key_str)
         jobs = []
         for node in node_data:
             data = node[0]
             node_name = node[1]
             if data.get("env") == "aes":
                 data["payload"] = crypt.dumps(data.get("payload"))
             jobs.append((node_name, data))
         if jobs:
             self.keeper.mq.mset_job(jobs)
         ret = 1
     except Exception, e:
         log.error("send_job error:%s" % traceback.format_exc())
Exemplo n.º 9
0
    def get_job(self, job_data):
        """
        获取任务
        @param node_name string:节点名称
        @param jid string:任务id
        @return dict:a job info
        """
        ret = {}
        key_str = self.main_conf.token
        crypt = Crypt(key_str)
        try:
            rets = self.mq.mget_job(job_data)
            for node, data in rets.items():
                if data:
                    env = data.get("env")
                    if env == "aes":
                        data["payload"] = crypt.loads(data.get("payload"))
                    payload = data["payload"]
                    if payload["cmd"] == "sys.get" and payload[
                            "status"] == "FINISH" and payload["return"] != "":
                        if payload["args"][0] != "help":
                            fid = payload["return"]
                            if "local_path" in payload[
                                    "kwargs"] and "remote_path" in payload[
                                        "kwargs"]:
                                local_path = payload["kwargs"]["local_path"]
                                remote_path = payload["kwargs"]["remote_path"]
                            else:
                                local_path = payload["args"][1]
                                remote_path = payload["args"][0]
                            stat = payload["kwargs"].get("stat")
                            if local_path.endswith('/') or os.path.isdir(
                                    local_path):
                                local_path = os.path.join(
                                    local_path, os.path.basename(remote_path))
                            if checksum(local_path) != fid:
                                if not check_cache(
                                        app_abs_path(self.main_conf.cache),
                                        fid):
                                    FsClient = load_fclient(
                                        app_abs_path(self.main_conf.fs_plugin),
                                        ftype=self.fs_conf.fs_type)
                                    fscli = FsClient(self.fs_conf)
                                    fscli.download(
                                        fid,
                                        os.path.join(
                                            app_abs_path(self.main_conf.cache),
                                            fid))

                                if check_cache(
                                        app_abs_path(self.main_conf.cache),
                                        fid):
                                    if not make_dirs(
                                            os.path.dirname(local_path)):
                                        log.error("创建目标目录:%s失败" % local_path)
                                    if cp(
                                            os.path.join(
                                                app_abs_path(
                                                    self.main_conf.cache),
                                                fid), local_path, stat):
                                        payload["return"] = local_path
                                    else:
                                        payload["return"] = ""
                            else:
                                payload["return"] = local_path
                ret[node] = data

        except Exception, e:
            log.error(traceback.format_exc())
Exemplo n.º 10
0
    def get_job(self, role, node_name, jid):
        """
        获取任务
        @param role string:角色
        @param node_name string:节点名称
        @param jid string:任务id
        @return dict:a job info
        """
        ret = {}
        try:
            node_path = os.path.join(self.zookeeper_conf.nodes, role,
                                     node_name, "jobs", jid)
            data = self.zkconn.get(node_path)[0]
            data = msgpack.loads(data)
            env = data.get("env")
            if env == "aes":
                key_str = self.main_conf.token
                crypt = Crypt(key_str)
                data["payload"] = crypt.loads(data.get("payload"))
            payload = data["payload"]
            if payload["cmd"] == "sys.get" and payload[
                    "status"] == "FINISH" and payload["return"] != "":
                if payload["args"][0] != "help":
                    fid = payload["return"]
                    if "local_path" in payload[
                            "kwargs"] and "remote_path" in payload["kwargs"]:
                        local_path = payload["kwargs"]["local_path"]
                        remote_path = payload["kwargs"]["remote_path"]
                    else:
                        local_path = payload["args"][1]
                        remote_path = payload["args"][0]
                    stat = payload["kwargs"].get("stat")
                    if local_path.endswith('/') or os.path.isdir(local_path):
                        local_path = os.path.join(
                            local_path, os.path.basename(remote_path))
                    if checksum(local_path) != fid:
                        if not check_cache(app_abs_path(self.main_conf.cache),
                                           fid):
                            FsClient = load_fclient(app_abs_path(
                                self.main_conf.fs_plugin),
                                                    ftype=self.fs_conf.fs_type)
                            fscli = FsClient(self.fs_conf)
                            fscli.download(
                                fid,
                                os.path.join(
                                    app_abs_path(self.main_conf.cache), fid))

                        if check_cache(app_abs_path(self.main_conf.cache),
                                       fid):
                            if not make_dirs(os.path.dirname(local_path)):
                                log.error("创建目标目录:%s失败" % local_path)
                            if cp(
                                    os.path.join(
                                        app_abs_path(self.main_conf.cache),
                                        fid), local_path, stat):
                                payload["return"] = local_path
                            else:
                                payload["return"] = ""
                    else:
                        payload["return"] = local_path
            ret = data

        except (ZKClientError, KeyboardInterrupt), e:
            log.error(e.message)