def __init__(self): self.logger = Logger("executor").getlog() self.aps_log = Logger("apscheduler").getlog() self.config = ConfigUtil() self.dboption = DBOption() self.process_running = {} self.scheduler = BlockingScheduler() self.monitor = Monitor()
class SMSUtil(object): def __init__(self): self.config = ConfigUtil() ''' phone 用 , 分割 content 内容 ''' def send(self, phone, content): data = { "mobile": phone, "template": "super_template", "data": { "content": content } } host = self.config.get("sms.host") request = urllib2.Request(url="http://" + host + "/api/v1/sms/send/template", data=json.dumps(data)) request.add_header('Content-Type', 'application/json') response = urllib2.urlopen(request) rep = response.read() return rep
def export(self, outfilename, dbname=None): if dbname == None: dbname = self.datasource.get_db_name() outputdir = ConfigUtil.instance().outputdir outpath = os.path.join(os.getcwd(), outputdir) mkdir_p(outpath) outpath = os.path.join(outpath, outfilename) self.export_output(outpath, dbname)
def install_log(): log_level = str(ConfigUtil.instance( ).log_level if hasattr(ConfigUtil.instance(), "log_level") else "INFO") # 分割log if sys.hexversion > 0x20700f0: logging.config.dictConfig( yaml.load(open(ConfigUtil.instance().loggingyaml, 'r'))) else: log_file = 'logging.%s-%d.log' % (time.strftime( "%Y-%m-%d--%H-%M-%S", time.localtime()), os.getpid()) timelog = logging.handlers.TimedRotatingFileHandler( log_file, 'midnight', 1, 0) logger.addHandler(timelog) logger = logging.getLogger() logger.setLevel(log_level) stdout_encoding()
def __init__(self, name): self.config = ConfigUtil() # 创建一个logger self.logger = logging.getLogger(name) self.logger.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s') # 用于写入日志文件 log_path = self.config.get("shcedule.log.path") if not os.path.exists(log_path): os.makedirs(log_path) log_file = log_path + "/" + name + ".log" handler = logging.handlers.TimedRotatingFileHandler(log_file, when="midnight", backupCount=10) # 输出到控制台 # handler = logging.StreamHandler() handler.setFormatter(formatter) self.logger.addHandler(handler)
def __init__(self): self.config = ConfigUtil() self.host = self.config.get("mysql.host") self.port = self.config.get("mysql.port") self.db = self.config.get("mysql.db") self.username = self.config.get("mysql.username") self.password = self.config.get("mysql.password") self.logger = Logger("db").getlog() self.pool = PooledDB(creator=MySQLdb, mincached=1, maxcached=20, host=self.host, port=int(self.port), user=self.username, passwd=self.password, db=self.db, use_unicode=True, charset="utf8")
class RunCommand(object): def __init__(self): self.config = ConfigUtil() self.dboption = DBOption() # run command def run_command(self, job_name): try: etl_job = self.dboption.get_job_info(job_name) job_script = etl_job["job_script"] job_script = self.config.get("job.script.path") + "/script/" + job_script print(" 需要运行的Job名称:" + job_name + " 脚本位置:" + job_script) if not os.path.exists(job_script): raise Exception("can't find run script:" + job_script) extend = os.path.splitext(job_script)[1] if extend == ".py": child = subprocess.Popen([CommonUtil.python_bin(self.config), job_script, "-job", job_name], stdout=None, stderr=subprocess.STDOUT, shell=False) if child is None: raise Exception("创建子进程运行脚本:" + job_script + "异常") print("创建子进程:" + str(child.pid)) code = child.wait() return code elif extend == ".sh": child = subprocess.Popen([job_script, "-job", job_name], stdout=None, stderr=subprocess.STDOUT, shell=True) if child is None: raise Exception("创建子进程运行脚本:" + job_script + "异常") print("创建子进程:" + str(child.pid)) code = child.wait() return code elif extend == ".yml": return self.run_yaml(job_script) else: raise Exception("当前只支持 python , shell , yaml 脚本") except Exception, e: print("Executor 上 job 运行失败:" + job_name) print(traceback.format_exc()) return 1
class DBUtil(object): def __init__(self): self.config = ConfigUtil() self.host = self.config.get("mysql.host") self.port = self.config.get("mysql.port") self.db = self.config.get("mysql.db") self.username = self.config.get("mysql.username") self.password = self.config.get("mysql.password") self.logger = Logger("db").getlog() self.pool = PooledDB(creator=MySQLdb, mincached=1, maxcached=20, host=self.host, port=int(self.port), user=self.username, passwd=self.password, db=self.db, use_unicode=True, charset="utf8") def get_connection(self): try: mysql_con = self.pool.connection() return mysql_con except Exception, e: self.logger.error(e) for i in range(3): try: time.sleep(5) mysql_con = self.pool.connection() return mysql_con except Exception, e: self.logger.error(e) self.logger.error("数据库连接异常执行" + str(i + 1) + "次连接") return None
def __init__(self): self.config = ConfigUtil()
class ETLUtil(object): def __init__(self): self.dboption = DBOption() self.config = ConfigUtil() self.time_format = "时间触发格式: Job名称,time,触发天(每天0),触发小时,触发分钟,触发周期(day|month),负责人,脚本位置" self.depdency_format = "依赖触发格式:Job名称,dependency,依赖Job(多个用空格分隔),触发Job,负责人,脚本位置" def check_trigger_job(self, day, interval): if int(day) == 0 and interval == 'month': print("day =0 表示每天触发, 触发周期应该为day") return False if int(day) != 0 and interval == 'day': print("触发周期为day 表示每天,需要设置触发天为 0") return False return True def remove_etl_job(self, job_name, should_check=True): job_info = self.dboption.get_job_info(job_name) if job_info is None: print(job_name + " 不存在,无法删除") return if should_check: # 判断是否被依赖 dependened_jobs = self.dboption.get_depended_job(job_name) depended = False if dependened_jobs is not None: for dependened_job in dependened_jobs: print(dependened_job["job_name"] + " 依赖 " + job_name) depended = True if depended: print(job_name + " 被依赖无法删除") return stream_jobs = self.dboption.get_stream_job(job_name) stream_job_set = set() for stream_job in stream_jobs: stream_job_set.add(stream_job["job_name"]) print("删除 job_name:" + job_name + " 不会删除触发执行的job:[" + ",".join(stream_job_set, ) + "]!!!") # 删除时间触发 self.dboption.remove_etl_job_trigger(job_name) # 删除 触发 self.dboption.remove_etl_stream_job(job_name) # 删除 依赖 self.dboption.remove_etl_dependency_job(job_name) # 删除 配置 self.dboption.remove_etl_job(job_name) def parse_line(self, line): if line is None or len(line.strip()) == 0: # print("读取的记录为空") pass else: line = line.strip() print line line_array = line.split(",") job_name = line_array[0].upper() job_info = self.dboption.get_job_info(job_name) job_trigger_info = self.dboption.get_etl_job_trigger(job_name) if job_info or job_trigger_info: # raise Exception("Job:" + job_name + " 已经存在") print("Job:" + job_name + " 已经存在,需要删除后重新创建!") self.remove_etl_job(job_name, should_check=False) trigger_type = line_array[1] man = line_array[len(line_array) - 2] script = line_array[len(line_array) - 1].strip() script_path = self.config.get("job.script.path") + "/script/" + script self.check_script_path(script_path) if trigger_type and trigger_type in ("time", "dependency"): if trigger_type == "time": day = line_array[2] hour = line_array[3] minute = line_array[4] interval = line_array[5] valid = self.check_trigger_job(day, interval) if not valid: raise Exception("Job 配置错误") deps = line_array[6] add_job_dep_sets = set() if deps != man: print "时间触发需要添加依赖:", deps for dep_job in deps.split(" "): job = self.dboption.get_job_info(dep_job) if job is None: print "依赖的 Job:" + str(dep_job) + " 不存在" raise Exception("依赖Job :" + dep_job + " 不存在") add_job_dep_sets.add(dep_job) print("需要配置依赖:" + ",".join(add_job_dep_sets)) code = self.dboption.save_time_trigger_job(job_name, trigger_type, day, hour, minute, interval, man, script, add_job_dep_sets) if code == 1: print("添加时间触发Job 成功") else: print("添加时间触发Job 失败") elif trigger_type == "dependency": dep_jobs = line_array[2].strip().upper() stream = line_array[3].strip().upper() # 依赖job,已经依赖的 add_job_dep_sets = set() for dep_job in dep_jobs.split(" "): job = self.dboption.get_job_info(dep_job) if job is None: print "依赖的 Job:" + str(dep_job) + " 不存在" raise Exception("依赖Job :" + dep_job + " 不存在") add_job_dep_sets.add(dep_job) print("需要配置依赖:" + ",".join(add_job_dep_sets)) stream_job = self.dboption.get_job_info(stream) if stream_job is None: raise Exception("Job:" + stream + " 不存在") code = self.dboption.save_depdency_trigger_job(job_name, trigger_type, add_job_dep_sets, stream, man, script,) if code == 1: print("添加依赖触发Job 成功") else: print("添加依赖触发 Job 失败") else: raise Exception("配置 job 触发方式 :" + str(trigger_type)) print(self.time_format) print(self.depdency_format) raise Exception("配置格式错误") def check_script_path(self, path): exists = os.path.exists(path) if not exists: raise Exception("脚本:" + path + " 不存在") def query_etl_job(self, job_name): print("查询Job:" + job_name) etl_job = self.dboption.get_job_info(job_name) if etl_job is None: print("没有找Job:" + job_name) else: print("------job-----") print("job:" + str(etl_job)) trigger = etl_job["job_trigger"] if trigger == "time": etl_job_trigger = self.dboption.get_etl_job_trigger(job_name) print("-------trigger-----") print(str(etl_job_trigger)) dep_jobs = self.dboption.get_dependency_job(job_name) print("------deps-----") if dep_jobs is not None and len(dep_jobs) != 0: for dep_job in dep_jobs: print(str(dep_job['dependency_job'])) print("-----nexts-----") next_jobs = self.dboption.get_stream_job(job_name) if next_jobs is not None and len(next_jobs) != 0: for next_job in next_jobs: print(str(next_job['stream_job'])) print("-----before----") before = self.dboption.get_before_job(job_name) if before is not None: print(str(before['job_name'])) def read_file(self, path): file_handler = open(path, 'r') for line in file_handler.readlines(): if line is not None and len(line) > 0 and not line.startswith("#"): self.parse_line(line) def rename_job(self,etl_job_name): etl_job_array = etl_job_name.strip().split(" ") if etl_job_array is None or len(etl_job_array) != 3: raise Exception("修改job 名称格式: 原名称 新名称 脚本相对路径") from_job = etl_job_array[0].strip() to_job = etl_job_array[1].strip() print("修改的名称:" + from_job + " -> " + to_job) from_job_info = self.dboption.get_job_info(from_job) if not from_job_info: raise Exception("原 Job 名称 " + from_job + " 不存在") to_job_info = self.dboption.get_job_info(to_job) if to_job_info: raise Exception("新 Job 名称 " + from_job + " 已存在") yaml_file = etl_job_array[2].strip().lower() script_path = self.config.get("job.script.path") + "/script/" + yaml_file self.check_script_path(script_path) # trigger update_trigger = "update t_etl_job_trigger set job_name = %s where job_name = %s" self.dboption.execute_sql(update_trigger, (to_job, from_job)) # stream update_stream_1 = "update t_etl_job_stream set job_name = %s where job_name = %s" self.dboption.execute_sql(update_stream_1, (to_job, from_job)) update_stream_2 = "update t_etl_job_stream set stream_job = %s where stream_job = %s" self.dboption.execute_sql(update_stream_2, (to_job, from_job)) # dependency update_dependency_1 = "update t_etl_job_dependency set job_name = %s where job_name = %s" self.dboption.execute_sql(update_dependency_1, (to_job, from_job)) update_dependency_2 = "update t_etl_job_dependency set dependency_job = %s where dependency_job = %s" self.dboption.execute_sql(update_dependency_2, (to_job, from_job)) # job update_job = "update t_etl_job set job_name = %s , job_script=%s where job_name = %s" self.dboption.execute_sql(update_job, (to_job, yaml_file, from_job)) self.query_etl_job(to_job)
def __init__(self): self.dboption = DBOption() self.config = ConfigUtil() self.time_format = "时间触发格式: Job名称,time,触发天(每天0),触发小时,触发分钟,触发周期(day|month),负责人,脚本位置" self.depdency_format = "依赖触发格式:Job名称,dependency,依赖Job(多个用空格分隔),触发Job,负责人,脚本位置"
class Executor(object): def __init__(self): self.logger = Logger("executor").getlog() self.aps_log = Logger("apscheduler").getlog() self.config = ConfigUtil() self.dboption = DBOption() self.process_running = {} self.scheduler = BlockingScheduler() self.monitor = Monitor() ''' 运行 t_etl_job_queue 中Pending状态的job ''' def run_queue_job_pending(self): self.logger.info("\n") self.logger.info("... interval run run_queue_job_pending ....") try: self.check_process_state() # 判断已有的进程状态 logpath = self.config.get("job.log.path") if logpath is None or len(logpath.strip()) == 0: raise Exception("can't find slave job.log.path") if not os.path.exists(logpath): os.makedirs(logpath) today = DateUtil.get_today() today_log_dir = logpath + "/" + today if not os.path.exists(today_log_dir): os.makedirs(today_log_dir) queue_job = self.dboption.get_queue_job_pending() if queue_job is not None: job_name = queue_job["job_name"] etl_job = self.dboption.get_job_info(job_name) job_status = etl_job["job_status"] job_retry_count = etl_job["retry_count"] run_number = queue_job["run_number"] if not self.check_should_run(job_name, job_status, job_retry_count, run_number): return logfile = today_log_dir + "/" + job_name + "_" + today + ".log." + str( run_number) bufsize = 0 logfile_handler = open(logfile, 'w', bufsize) python_bin = CommonUtil.python_bin(self.config) run_path = project_path + "/bin/" + "runcommand.py" child = subprocess.Popen(python_bin + [run_path, "-job", job_name], stdout=logfile_handler.fileno(), stderr=subprocess.STDOUT, shell=False) pid = child.pid if pid > 0: self.logger.info("创建子进程:" + str(pid) + " 运行Job:" + str(job_name)) code = self.dboption.update_job_running(job_name) if code != 1: try: self.logger.info("更新Job:" + job_name + " 运行状态为Running失败,停止创建的进程") self.terminate_process(child, logfile_handler) except Exception, e: self.logger.error(e) self.logger.error("terminate 子进程异常") logfile_handler.flush() logfile_handler.close() else: self.logger.info("更新Job:" + job_name + " 运行状态Running") code = self.dboption.update_job_queue_done( job_name) # FixMe 事物问题 self.logger.info("更新Queue job:" + str(job_name) + " 状态为Done,影响行数:" + str(code)) if code != 1: self.logger.error("更新Job Queue job:" + job_name + " 状态为Done失败") self.terminate_process(child, logfile_handler) self.logger.info("重新修改job_name:" + job_name + " 状态为Pending 等待下次运行") self.dboption.update_job_pending_from_running( job_name) else: self.process_running[child] = { "logfile_handler": logfile_handler, "job_name": job_name, "pid": pid } else: self.logger.error("启动子进程异常pid:" + str(pid)) logfile_handler.flush() logfile_handler.close() else:
def __init__(self): self.config = ConfigUtil() self.dbUtil = DBUtil() self.dboption = DBOption() self.smsUtil = SMSUtil()
#!/usr/bin/python # -*- coding:utf-8 -*- import os import sys import MySQLdb from configutil import ConfigUtil from smsutil import SMSUtil from dateutil import DateUtil project_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(project_path) configUtil = ConfigUtil() smsUtil = SMSUtil() def get_zeus_connection(): host = configUtil.get("mysql.host") username = configUtil.get("mysql.username") password = configUtil.get("mysql.password") port = int(configUtil.get("mysql.port")) db = "db_zeus" connection = MySQLdb.connect(host, username, password, db, port, use_unicode=True, charset='utf8') return connection
import os from reportlab.lib import fonts,colors from reportlab.lib.pagesizes import letter, inch from reportlab.platypus import SimpleDocTemplate, Table, TableStyle,Paragraph, Spacer from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from reportlab.lib.enums import TA_CENTER from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.ttfonts import TTFont from base_format import BaseFormat from configutil import ConfigUtil # ttf=os.path.join(os.getcwd(), 'templates',"simsun.ttf") ttf=ConfigUtil.instance().pdfttf ttfname = 'sim' pdfmetrics.registerFont(TTFont(ttfname,ttf)) fonts.addMapping(ttfname, 0, 0,ttfname) fonts.addMapping(ttfname, 0, 1, ttfname) class PdfFormat(BaseFormat): def __init__(self, filename,headings): self.suffix = "pdf" #后缀 super(PdfFormat, self).__init__(filename,headings) self.doc = SimpleDocTemplate(self.outname, pagesize=letter) self.elements = []
def __init__(self): self.scheduler = BlockingScheduler() self.logger = Logger("scheduler").getlog() self.aps_log = Logger("apscheduler").getlog() self.dboption = DBOption() self.config = ConfigUtil()
def __init__(self): self.config = ConfigUtil() self.dboption = DBOption()
def is_debug(): return ConfigUtil.instance().log_level == "DEBUG"