def get_monitor(): n = Custom_MySQL(using='center_app') return n.query('select public_ip as ip,22 as port,"playcrab" as user,pwd \ from assets \ where \ public_ip ="115.29.10.48" \ ')
def get_assets(): n = Custom_MySQL(using='center_app') return n.query('select public_ip as ip,22 as port,"playcrab" as user,pwd \ from assets \ where \ public_ip !="" and public_ip !="NULL" \ and public_ip not in("115.29.12.230","115.29.12.219","49.213.111.2","49.213.111.3","49.213.111.4","49.213.111.5","49.213.111.6") \ order by id')
def main(): tables = excel_table_byindex() db = Custom_MySQL(using='center_app') for row in tables: if row['pub'] != "": param ={} param['pwd'] = row['pwd'] db.update('assets','public_ip="%s"'%row['pub'],**param)
def get_result(self): ''' 获取子进程执行结果 ''' db = Custom_MySQL(using='log') result = db.query('select ip,result as data,IF(flag=2,1,0) as flag \ from batch_detail \ where batch_id ="%s"'% self.batch_id ) db.close() print '===========', len(result),'================' self.result = result
def assets2appinfo(self): db = Custom_MySQL(using='center_app') #将assets中有的,但是在app_info没有的,插入到app_info中 sql =''' INSERT into app_info (assets_id,name,public_ip,inner_ip,main_category_id) SELECT id,hostname as name,public_ip,inner_ip,main_category_id from assets where public_ip not in (select public_ip from app_info ) ''' db.execute(sql) db.commit()
def call_back(self,data=[]): db = Custom_MySQL(using='log') sql = "insert into last_ip (id,ip,login_ip) values(null,%s,%s)" for result in data: if int(result['flag']) == 1: ips = json.loads(result['data']) host = result['ip'] set_param =[] for ip in ips: set_param.append(tuple([host,ip])) db.executemany(sql,set_param) db.commit() else: print result
def call_back(self,data=[]): db = Custom_MySQL(using='center_app') for result in data: if result !=[] and result['flag'] == '1': param = {} param['is_manage'] = 1 db.update('assets', 'public_ip="%s"' % result['ip'], **param) db.commit() else: print result param = {} param['is_manage'] = 0 db.update('assets', 'public_ip="%s"' % result['ip'], **param) db.commit()
def run(self): ''' 业务入口 ''' api_data = self.get_api_data() if api_data == None:return db = Custom_MySQL(using='center_app') #更新大类中的应用类型 app_type ={} app_type ={'app_type':','.join(api_data['type'])} db.update('main_category',' prefix="%s" ' % self.game_code,**app_type) game = db.get('select id from main_category where prefix="%s"' % self.game_code) main_category_id = game['id'] #获取区组信息 for dist in api_data['dists']: print '========'+dist['name']+'+'+dist['code'] sql ='select count(id) as count from sub_category where main_category_id ='+str(main_category_id)+ ' and name="'+dist['name']+'"' count = db.count(sql) if count==None: print 'SQL Error:%s'% sql return False #区组更新内容 dist_data ={} dist_data ={'prefix':dist['code'], 'main_category_id':main_category_id, 'name':dist['name'], 'platform':self.platform} #如果没有区组信息则保存 if count['count'] == 0: db.insert('sub_category',**dist_data) else: db.update('sub_category',' main_category_id ='+str(main_category_id)+ ' and name="'+dist['name']+'"',**dist_data)
class AddPartition(): def __init__(self): #self.mysql= {'report_ares': Custom_MySQL(using='report_ares'),'report_hebe': Custom_MySQL(using='report_hebe'),'report_crius': Custom_MySQL(using='report_crius')} self.mysql=Custom_MySQL(using='hadoops2') def execPartitons(self,games,tables,start_day,end_day): ##conv db for game in games: db="report_"+game for table in tables: self.mysql.begin(); do_date=start_day i=0; exec_partions_sql="ALTER TABLE "+db+"."+table+" ADD PARTITION (" patition_sql=""; while do_date <= end_day: i = i + 1 partition_name="p"+str(do_date).replace('-',''); is_exist=self.find_partition(db,table,partition_name) if not is_exist: patition_sql=patition_sql+"PARTITION %s VALUES LESS THAN (to_days('%s')),"%(partition_name,do_date) #print patition_sql do_date = start_day + datetime.timedelta(days = i) if len(patition_sql)>0: replace_reg = re.compile(r',$') print "add partition db:%s table:%s ,start_day:%s,end_day:%s"%(db,table,start_day,end_day) sql=exec_partions_sql+replace_reg.sub('', patition_sql)+");" print sql self.mysql.execute(sql) self.mysql.commit(); def add_months(self,sourcedate,months): month = sourcedate.month - 1 + months year = int(sourcedate.year + month / 12 ) month = month % 12 + 1 day = min(sourcedate.day,calendar.monthrange(year,month)[1]) return datetime.date(year,month,day) def find_partition(self,db,table_name,partition_name): # exis_row=self.mysql.query("select partition_name,partition_expression, partition_description,table_rows from information_schema.partitions"\ # " where table_schema = schema() and table_schema='%s' and table_name='%s' and partition_name='%s';"%(db,table_name,partition_name)) exis_row=self.mysql.query("select partition_name,partition_expression, partition_description,table_rows from information_schema.partitions"\ " where table_schema='%s' and table_name='%s' and partition_name='%s';"%(db,table_name,partition_name)) if len(exis_row)>0: print "exis partitons db:%s,table:%s,p_name:%s"%(db,table_name,partition_name) return True return False
def call_back(self,data=[]): db = Custom_MySQL(using='log') last_insert_id = db.insert('cpu_batch',**{}) db.commit() sql = "insert into cpu_detail (id,ip,cpu_used) values(null,%s,%s)" for result in data: if int(result['flag']) == 1: ips = json.loads(result['data']) host = result['ip'] set_param =[] for ip in ips: set_param.append(tuple([host,ip])) db.executemany(sql,set_param) db.commit() else: print result
def run(self): ''' 业务入口 ''' try: #资产就是资产吧 不做为一个应用出现了,运维管理的是资产服务器,也就是应用的公网 #self.assets2appinfo() api_data = self.get_api_data() if api_data == None:return db = Custom_MySQL(using='center_app') game = db.get('select id from main_category where prefix="%s"' % self.game_code) main_category_id = game['id'] #获取区组信息 for dist in api_data['dists']: print '========'+dist['name']+'+'+dist['code'] sql ='select id from sub_category where main_category_id ='+str(main_category_id)+ ' and name="'+dist['name']+'"' sub_category_id = db.get(sql)['id'] #将各区组不共用信息入数据库 for app in dist['ips']: self.insert_db(main_category_id,sub_category_id,self.platform,app) #处理共用信息 #for app in api_data['global']: #更新资产id db.execute('update app_info as a left join assets as b on a.public_ip = b.public_ip set a.assets_id = b.id where a.public_ip is not NULL') db.execute('update app_info as a left join assets as b on a.inner_ip = b.inner_ip set a.assets_id = b.id where a.inner_ip is not NULL') except Exception as e: print e
def save_idcs(self): if self.data == None: return False db = Custom_MySQL(using='center_app') for param in self.data: sql = 'select count(*) as count from idc where prefix = %s' p = (param['prefix'],) count = db.count(sql, *p) # 检查是否存在 if count['count'] == 0: db.insert('idc', **param) else: db.update('idc', 'prefix="%s"' % param['prefix'], **param)
def save_hosts(self): if self.data == None: return False db = Custom_MySQL(using='center_app') for param in self.data: sql ='select count(*) as count from assets where wxsn= %s' p =(param['wxsn'],) count = db.count(sql,*p) if count['count'] == 0: db.insert('assets',**param) else: db.update('assets','wxsn="%s"'%param['wxsn'],**param)
def run(self): db = Custom_MySQL(using='log') status = {'flag':1} db.update('batch_detail', 'batch_id="%s" and ip ="%s"' % (self.host['batch_id'],self.host['ip']), **status) db.commit() db.close() try: #建立连接 self.ssh=paramiko.SSHClient() #如果没有密码就走public key if self.host.get('pwd',True) == True: privatekeyfile = os.path.expanduser('/root/.ssh/id_rsa') paramiko.RSAKey.from_private_key_file(privatekeyfile) #缺失host_knows时的处理方法 known_host = "/root/.ssh/known_hosts" self.ssh.load_system_host_keys(known_host) self.ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) #os.system('/opt/local/junos/junos') #连接远程客户机器 self.ssh.connect( hostname =self.host['ip'], port =int(self.host['port']), username =self.host['user'], password =self.host['pwd'], compress =True, timeout =20 ) #获取远程命令执行结果 stdin, stdout, stderr = self.ssh.exec_command(self.host['cmd'],bufsize=65535, timeout=10) temp = stdout.readlines() db = Custom_MySQL(using='log') status = {'flag':2,'result':''.join(temp)} db.update('batch_detail', 'batch_id="%s" and ip ="%s"' % (self.host['batch_id'],self.host['ip']),**status) db.commit() db.close() if temp ==[]: self.grandchild.put({'flag':'1','ip':self.host['ip'],'data':temp}) else: self.grandchild.put({'flag':'0','ip':self.host['ip'],'data':temp}) #输出执行结果 self.ssh.close() except : #print trace_back() #以防paramiko本身出问题,这里再用shell运行一次,如果还出问题再确认为问题 cmd ="ssh -p %s -o StrictHostKeyChecking=no %s@%s %s"%(self.host['port'],self.host['user'],self.host['ip'],self.host['cmd']) (status,output) = commands.getstatusoutput(cmd) if status == 0: db = Custom_MySQL(using='log') status = {'flag':2,'result':output} db.update('batch_detail', 'batch_id="%s" and ip ="%s"' % (self.host['batch_id'],self.host['ip']), **status) db.commit() db.close() self.grandchild.put({'flag':'1','ip':self.host['ip'],'data':output}) else: db = Custom_MySQL(using='log') status = {'flag':-1,'result':'faild'} db.update('batch_detail', 'batch_id="%s" and ip ="%s"' % (self.host['batch_id'],self.host['ip']), **status) db.commit() db.close() self.grandchild.put({'flag':'0','ip':self.host['ip'],'data':trace_back()})
def run_task(self, task_param): mysql = Custom_MySQL(using='etl_manage') mysql.begin() datas = {'etl_status': 0} where = {'id': int(task_param['id'])} try: log_param = { 'game': task_param['game'], 'platform': task_param['platform'], 'log_date': task_param['log_date'], 'log_name': task_param['log_name'], 'log_dir': task_param['log_dir'], 'col_num': task_param['col_num'] } log_name_param = { 'log_name': task_param['log_name'], 'source_ip': task_param['source_ip'], 'log_time': task_param['log_time'] } do_rate = task_param['do_rate'] flag = task_param['flag'] log_dir = '%(log_dir)s/%(game)s/%(platform)s/%(log_date)s/%(log_name)s' % log_param log_name = '%(log_name)s_%(source_ip)s_%(log_time)s' % log_name_param log_name_notime = '%(log_name)s_%(source_ip)s' % log_name_param col_num = task_param['col_num'] project_path = os.getcwd() #判断如果有md5文件和数据文件同时存在,则开始执行 if (do_rate == "1day" and flag == "log") or ( os.path.exists('%s/%s.log.md5' % (log_dir, log_name)) is True and os.path.exists('%s/%s.log' % (log_dir, log_name)) is True): #排除同名文件存在的可能,同时为修复执行提供方便 if os.path.exists('%s/%s.txt' % (log_dir, log_name)): cmd_remove = 'rm -f %s/%s.txt*' % (log_dir, log_name) logger.info('remove history file: {0}'.format(cmd_remove)) remove_result = cmd.run(cmd_remove) if remove_result['status'] != 0: logger.error('Error Code %s : %s Cmd: %s' % (remove_result['status'], remove_result['output'], cmd_remove)) ''' 将任务标识为开始执行:1 ''' datas['etl_status'] = 1 mysql.update('etl_data_log', ' id = %(id)d' % where, **datas) mysql.commit() ''' 校验数据 ''' #如果是“天”频次的日志数据,则特殊处理,直接将同一ip的合成一个文件,同时执行校验 if do_rate == "1day" and flag == "log": cmd_merge = '/bin/bash %s/etl_data/merge_data.sh %s %s %s' % ( project_path, log_dir, log_name_notime, col_num) logger.info('check data: {0}'.format(cmd_merge)) merge_result = cmd.run(cmd_merge) else: cmd_merge = '/bin/bash %s/etl_data/check_data.sh %s %s %s' % ( project_path, log_dir, log_name, col_num) logger.info('check data: {0}'.format(cmd_merge)) merge_result = cmd.run(cmd_merge) if merge_result['status'] != 0: logger.error('Error Code %s : %s Cmd: %s' % (merge_result['status'], merge_result['output'], cmd_merge)) datas['etl_status'] = 0 else: ''' 读取校验格式后的文件总条数 ''' row = open('%s/%s.txt.row' % (log_dir, log_name)).read() ''' 将文件总条数写入数据库,并将任务标识为为校验已完成:2 ''' datas['etl_status'] = 2 datas['row_num'] = int(row) #datas = {'etl_status': 2, 'row_num': int(row)} mysql.update('etl_data_log', ' id = %(id)d' % where, **datas) mysql.commit() ''' 压缩数据 ''' cmd_compress = '/bin/bash %s/etl_data/compress_data.sh %s %s' % ( project_path, log_dir, log_name) logger.info('compress data: {0}'.format(cmd_compress)) compress_result = cmd.run(cmd_compress) if compress_result['status'] != 0: logger.error('Error Code %s : %s Cmd: %s' % (compress_result['status'], compress_result['output'], cmd_compress)) datas['etl_status'] = 0 else: ''' 将任务标识为压缩完成:4 ''' datas['etl_status'] = 4 datas.pop('row_num') mysql.update('etl_data_log', ' id = %(id)d' % where, **datas) mysql.commit() ''' 生成MD5文件 ''' cmd_md5 = '/bin/bash %s/etl_data/md5_data.sh %s %s' % ( project_path, log_dir, log_name) logger.info('md5 data: {0}'.format(cmd_md5)) md5_result = cmd.run(cmd_md5) if md5_result['status'] != 0: logger.error('Error Code %s : %s Cmd: %s' % (md5_result['status'], md5_result['output'], cmd_md5)) datas['etl_status'] = 0 else: ''' 将任务标识为生成MD5完成(即为校验、合并、压缩均已完成):6 ''' datas['etl_status'] = 6 ''' 执行完毕,模拟从队列中清楚任务:0 ''' datas['in_etl_queue'] = 0 update_result = mysql.update('etl_data_log', ' id = %(id)d' % where, **datas) # 如果更新失败,则再调用一次,如果还是失败,则等待自动修复机制,但这种概率很小了 if update_result != 1: mysql.update('etl_data_log', ' id = %(id)d' % where, **datas) mysql.commit() mysql.close() return True except Exception as exc: logger.error('etl_data error: %s' % exc) mysql.rollback() datas = {'in_etl_queue': 0, 'etl_status': 0} mysql.update('etl_data_log', ' id = %(id)d' % where, **datas) mysql.commit() mysql.close() raise self.retry(exc=exc, countdown=30)
def run_task(self, task_param): mysql = Custom_MySQL(using='hadoops2') mysql_etl = Custom_MySQL(using='etl_manage') redis = Custom_Redis(using='etl_task') mysql.begin() mysql_etl.begin() datas = {'status': 0} where = {'id': int(task_param['id'])} task_key_exc = "" try: hive = Custom_Hive(using='ares_dw') game = task_param['game'] platform = task_param['platform'] table_name = task_param['table_name'] log_date = task_param['log_date'] prefix_sql = task_param['prefix_sql'] exec_sql = task_param['exec_sql'] post_sql = task_param['post_sql'] date_cycle = task_param['date_cycle'] random_str = str(random.randint(0, 999999999)) stimes = str(int(time.time())) task_date = task_param['task_date'] task_name = task_param['task_name'] do_rate = task_param['do_rate'] #task_key_tmp = str(game)+str(platform)+str(task_name)+str(date_cycle)+str(do_rate)+str(log_date)+str(task_date) task_key_tmp = str(task_name) + str(date_cycle) + str(do_rate) + str( log_date) + str(task_date) task_key_md5 = hashlib.md5() task_key_md5.update(task_key_tmp) task_key_md5_result = task_key_md5.hexdigest() task_key = "dm2report_new_" + str(game) + "_" + str( platform) + "_" + str(task_key_md5_result) task_key_exc = task_key ''' celery 本身的机制存在漏洞,会将一个已经完成任务再次分配给其他的worker,致使同一个任务执行多次 为防止此种现象,在任务开始执行时,将任务的“唯一标示”写入redis中,标注已执行 ''' #如果task_key is None, 则表示该条任务没有执行过,正常执行即可 #如果task_key = 0, 则表示该条任务上次执行失败,允许重复执行 if redis.get(task_key) == "0" or redis.get(task_key) is None: tmp_file_dir = "/tmp/tmp/%s/%s/%s" % (game, platform, log_date) #创建本地目录 if not os.path.exists(tmp_file_dir): os.makedirs(tmp_file_dir) tmp_file = "%s/%s_%s_%s_%s.txt" % (tmp_file_dir, table_name, date_cycle, random_str, stimes) hql_conf = "SET hive.support.concurrency=false;" \ "SET hive.exec.compress.output=true;" \ "SET mapred.output.compression.codec=com.hadoop.compression.lzo.LzopCodec; " ''' 将任务标示为开始执行:1 ''' datas['status'] = 1 datas['start_time'] = str( datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) mysql_etl.update('dm2report_new_log', ' id = %(id)d' % where, **datas) mysql_etl.commit() #在redis标注 任务正在执行 redis.set(task_key, 1) #执行前置sql,将数据临时写入本地,用于mysql load数据 if prefix_sql is not None: result = mysql.delete_by_sql(prefix_sql) logger.info('exec prefix_sql: delete old data {0}'.format( result['output'])) if result['status'] != 0: logger.error( 'Error Code %s : %s Cmd: %s' % (result['status'], result['output'], prefix_sql)) ''' 执行失败,将其状态标为未执行:0 ''' datas['status'] = 0 #在redis标注 任务未开始执行 redis.set(task_key, 0) else: ''' 将任务标示为删除临时文件完成:2 ''' datas['status'] = 2 datas.pop('start_time') mysql_etl.update('dm2report_new_log', ' id = %(id)d' % where, **datas) mysql_etl.commit() ''' 开始执行hive ql,将数据dump到本地 ''' result = hive.dump(hql_conf + exec_sql, tmp_file) logger.info('exec exec_sql: dump data {0}'.format( result['output'])) if result['status'] != 0 or False == os.path.exists( '%s' % tmp_file): logger.error( 'Error Code %s : %s Cmd: %s' % (result['status'], result['output'], exec_sql)) #在redis标注 任务未开始执行 redis.set(task_key, 0) else: ''' 将任务标示为dump hive数据完成:3 ''' datas['status'] = 3 datas['tmp_file_name'] = tmp_file mysql_etl.update('dm2report_new_log', ' id = %(id)d' % where, **datas) mysql_etl.commit() #执行后置sql if post_sql is not None: post_sql = post_sql.replace('{dir_path}', tmp_file) post_sql = post_sql.replace( '{table_name}', task_param['table_name']) post_sql = post_sql.replace( '{db_name}', task_param['db_name']) result = mysql.load(post_sql) logger.info( 'exec post_sql: load data to hdfs {0}'.format( result['output'])) if result['status'] != 0: logger.error('Error Code %s : %s Cmd: %s' % (result['status'], result['output'], post_sql)) #在redis标注 任务未开始执行 redis.set(task_key, 0) else: ''' 将任务标识为录入mysql完成:4 ''' datas['status'] = 4 datas['end_time'] = str( datetime.datetime.now().strftime( '%Y-%m-%d %H:%M:%S')) datas.pop('tmp_file_name') #在redis标注 任务已成功执行 redis.set(task_key, 2) else: logger.error('prefix_sql is null') datas['status'] = 0 #在redis标注 任务未开始执行 redis.set(task_key, 0) #如果task_key=2, 则标示该条任务已经运行成功 elif redis.get(task_key) == "2": datas['status'] = 4 #该条任务正在运行中 else: return True ''' 将任务标示为:(模拟) 已从任务队列中移除 ''' datas['in_queue'] = 0 update_result = mysql_etl.update('dm2report_new_log', ' id = %(id)d' % where, **datas) # 如果数据库更新失败,再调用一次。 如果还是失败,等待自动修复机制,但这样的概率应该很小了。 if update_result != 1: mysql_etl.update('dm2report_new_log', ' id = %(id)d' % where, **datas) mysql_etl.commit() mysql.commit() mysql_etl.close() mysql.close() return True except Exception as exc: logger.error('dm2report error: %s' % exc) mysql_etl.rollback() mysql.rollback() redis.set(task_key_exc, 0) datas = {'in_queue': 0, 'status': 0} mysql_etl.update('dm2report_new_log', ' id = %(id)d' % where, **datas) mysql_etl.commit() mysql_etl.close() mysql.close() raise self.retry(exc=exc, countdown=60)
def run_task(self, task_param): mysql = Custom_MySQL(using='etl_manage') mysql.begin() where = {'id': int(task_param['id'])} try: dir_param = {'game': task_param['game'], 'platform': task_param['platform'], 'log_date': task_param['log_date'], 'log_name': task_param['log_name']} filename_dict = {'log_name': task_param['log_name'], 'log_time': task_param['log_time'], 'source_ip':task_param['source_ip']} index_dict = {'db_name': task_param['db_name'], 'table_name': task_param['table_name'], 'platform': task_param['platform'], 'log_date': datetime.datetime.strptime(task_param['log_date'], '%Y%m%d').strftime("%Y-%m-%d")} partition = {'platform': task_param['platform'], 'log_date': datetime.datetime.strptime(task_param['log_date'], '%Y%m%d').strftime("%Y-%m-%d")} log_dir = "/%(game)s/%(platform)s/%(log_date)s/%(log_name)s/" % dir_param lzo_file_name = "%(log_name)s_%(source_ip)s_%(log_time)s.txt.lzo" % filename_dict index_dir_name = "%(db_name)s.db/%(table_name)s/plat_form=%(platform)s/log_date=%(log_date)s/" % index_dict partition_name = "plat_form='%(platform)s',log_date='%(log_date)s'" % partition project_path = os.getcwd() local_log_dir = '/disk1/tmp_data'+log_dir logger.info('local_log_dir: {0}'.format(local_log_dir)) #判断要录入hive 中的文件知否存在,存在则执行 if os.path.exists('%s%s' % (local_log_dir, lzo_file_name)): ''' 将任务标识为开始执行:1 ''' datas = {'load_status': 1} mysql.update('file2dw_log', ' id = %(id)d' % where, **datas) #执行load之前,删除同名文件,防止同一个文件出现两次的可能 cmd_remove = '/bin/bash %s/file2dw/remove_damaged_file.sh %s %s' % (project_path, index_dir_name, lzo_file_name) logger.info('remove damaged files: {0}'.format(cmd_remove)) remove_result = cmd.run(cmd_remove) if remove_result['status'] != 0: logger.error('Error Code %s : %s Cmd: %s' % (remove_result['status'], remove_result['output'], cmd_remove)) ''' 文件加载到hive中 ''' hive = Custom_Hive(using='ares_dw') load_sql = task_param['load_sql'] load_sql = load_sql.replace('{dir_path}', local_log_dir+lzo_file_name) load_sql = load_sql.replace('{table_name}', task_param['table_name']) load_sql = load_sql.replace('{partition_name}', '%s' % partition_name) load_sql = load_sql.replace('{db_name}', task_param['db_name']) logger.info('hive load SQL: {0}'.format(load_sql)) result = hive.load(load_sql) logger.info('hive load result {0}'.format(result['output'])) if result['status'] == 0: ''' 将任务标识为加载文件完成:2 ''' datas = {'load_status': 2} mysql.update('file2dw_log', ' id = %(id)d' % where, **datas) ''' 建立索引,否则lzo将不支持split ''' #print index_dir_name cmd_index = '/bin/bash %s/file2dw/create_lzo_indexer.sh %s %s' % (project_path, index_dir_name, lzo_file_name) logger.info('create lzo index: {0}'.format(cmd_index)) index_result = cmd.run(cmd_index) if index_result['status'] != 0: logger.error('Error Code %s : %s Cmd: %s' % (index_result['status'], index_result['output'], cmd_index)) else: if "create index success" in index_result['output']: ''' 将任务标识为建立lzo索引完成:3 ''' datas = {'load_status': 3} mysql.update('file2dw_log', ' id = %(id)d' % where, **datas) else: ''' 如果load数据失败,则删除半途出现错误的文件,方式hive查询的时候报错 ''' cmd_remove = '/bin/bash %s/file2dw/remove_damaged_file.sh %s %s' % (project_path, index_dir_name, lzo_file_name) logger.info('remove damaged files: {0}'.format(cmd_remove)) remove_result = cmd.run(cmd_remove) if remove_result['status'] != 0: logger.error('Error Code %s : %s Cmd: %s' % (remove_result['status'], remove_result['output'], cmd_remove)) else: ''' 将任务标识为未启动,重新执行:0 ''' datas = {'load_status': 0} mysql.update('file2dw_log', ' id = %(id)d' % where, **datas) ''' 将任务标示为:(模拟) 已从任务队列中移除 ''' datas = {'in_queue': 0} update_result = mysql.update('file2dw_log', ' id = %(id)d' % where, **datas) # 如果更新失败,则再调用一次,如果还是失败,则等待自动修复机制,但这种概率很小了 if update_result != 1: mysql.update('file2dw_log', ' id = %(id)d' % where, **datas) mysql.commit() mysql.close() return True except Exception as exc: print (exc) mysql.rollback() datas = {'in_queue': 0} mysql.update('file2dw_log', ' id = %(id)d' % where, **datas) mysql.commit() mysql.close() raise self.retry(exc=exc, countdown=60)
class CenterApp(): def __init__(self): self.center_mysql = Custom_MySQL(using='center_app') # def get_server_list(self): # ''' # 获取执行任务 # ''' # try: # ip_list = [] # if param['flag'] == 'basic': # ip_list.append('120.26.1.250') # # if param['flag'] == 'most': # ip_list.append('120.26.13.150') # # if param['flag'] == 'log': # s_sql = "select t1.* from \ # (select a.public_ip as source_ip,a.platform_id from assets a inner join main_category m \ # on a.main_category_id = m.id \ # where a.is_del = 0 and (a.hostname like '%%%%web_balance%%%%' or a.hostname like '%%%%web_admin%%%%') ) t1 \ # left join platform t2 on t1.platform_id = t2.id \ # where t1.source_ip is not null group by source_ip" # # if param['game'] == 'kof': # s_sql = "select t1.* from \ # (select a.public_ip as source_ip,a.platform_id from assets a inner join main_category m \ # on a.main_category_id = m.id \ # where a.is_del = 0 and a.hostname like '%%%%gameserver%%%%' ) t1 \ # left join platform t2 on t1.platform_id = t2.id \ # where t1.source_ip is not null group by source_ip" # # ip_list = self.exec_sql(s_sql) # # if param['flag'] == 'snap': # s_sql = "select s.prefix as source_ip from sub_category s inner join main_category m \ # on s.main_category_id = m.id \ # where s.platform = '%s' and m.prefix = '%s'" % (param['platform'],param['game']) # ip_list = self.exec_sql(s_sql) # # return ip_list # except Exception as exc: # print exc # #异常回滚 # self.center_mysql.rollback() def get_log_ip(self): ''' 获取所有log的IP ''' ip_list = {} sql = "select t1.*,t2.prefix as platform from \ (select m.prefix as gamename,a.public_ip as source_ip,a.platform_id from assets a inner join main_category m \ on a.main_category_id = m.id \ where a.is_del = 0 and (a.hostname like '%%%%web_balance%%%%' or a.hostname like '%%%%web_admin%%%%') ) t1 \ left join platform t2 on t1.platform_id = t2.id \ where t1.source_ip is not null group by source_ip\ union all \ select t1.*,t2.prefix as platform from \ (select m.prefix as gamename,a.public_ip as source_ip,a.platform_id from assets a inner join main_category m \ on a.main_category_id = m.id \ where a.is_del = 0 and a.hostname like '%%%%gameserver%%%%' ) t1 \ left join platform t2 on t1.platform_id = t2.id \ where t1.source_ip is not null group by source_ip" result = self.center_mysql.query(sql) for info in result: if info['gamename'] is None or info['platform'] is None: continue if info['gamename'].encode('utf8') not in ip_list: ip_list[info['gamename'].encode('utf8')] = {} if info['platform'].encode('utf8') not in ip_list[info['gamename'].encode('utf8')]: ip_list[info['gamename'].encode('utf8')][info['platform'].encode('utf8')] = [] ip_list[info['gamename'].encode('utf8')][info['platform'].encode('utf8')].append(info['source_ip'].encode('utf8')) return ip_list def get_snap_ip(self): ''' 获取所有快照的IP ''' ip_list = {} s_sql = "select f.prefix as platform,s.prefix as source_ip,m.prefix as gamename from sub_category s inner join main_category m \ on s.main_category_id = m.id \ left join platform f on f.id = s.platform_id" result = self.center_mysql.query(s_sql) for info in result: if info['gamename'] is None or info['platform'] is None: continue if info['gamename'].encode('utf8') not in ip_list: ip_list[info['gamename'].encode('utf8')] = {} if info['platform'].encode('utf8') not in ip_list[info['gamename'].encode('utf8')]: ip_list[info['gamename'].encode('utf8')][info['platform'].encode('utf8')] = [] ip_list[info['gamename'].encode('utf8')][info['platform'].encode('utf8')].append(info['source_ip'].encode('utf8')) # print ip_list return ip_list def get_ip_list(self): ''' 整理IP列表 ''' try: ip_list = {} ip_list['basic'] = ['120.26.1.250'] ip_list['mostsdk'] = ['120.26.13.150'] ip_list['wanpay'] = ['112.124.116.44'] log_list = self.get_log_ip(); ip_list['log'] = log_list snap_list = self.get_snap_ip() ip_list['snap'] = snap_list return ip_list except Exception,e: print e print "异常" #异常回滚 self.center_mysql.rollback()
class CopyConfig(): def __init__(self): self.mysql = Custom_MySQL(using='etl_manage') self.source_game = 'ares' def get_all_task(self, task_name): condition = 'game = "%s" ' % self.source_game if task_name is not None: condition += 'and task_name="%s"' % task_name ##appstoremix is_delete = 0 and task_list = self.mysql.query( "select * from dw2dm where platform='all' and %s" % (condition)) return task_list def get_structure(self, id, game, plat_form): ''' 获取当前游戏的,参数structure.如不存在则会添加 :param id: :param game: :param plat_form: :return: ''' structure = self.mysql.get( "select * from structure where is_delete=0 and id=%s", id) if structure != None: t_structure = [ structure['type'], structure['flag'], structure['db_type'], game, plat_form, #structure['platform'], #'db_name':structure['db_name'], structure['table_name'], structure['column_name'], ##structure['partition_name'], ##structure['partition_rule'], ##structure['index_name'], structure['create_table_sql'], structure['user_id'], 0, datetime.datetime.today().strftime("%Y-%m-%d") ] game_db = None if structure['type'] != None and str( structure['type']).__eq__('dw'): game_db = '%s_dw' % game t_structure.append(game_db) elif structure['type'] != None and str( structure['type']).__eq__('dm'): game_db = '%s_dm' % game t_structure.append(game_db) elif structure['type'] != None and str( structure['type']).__eq__('report'): game_db = 'report_%s' % game t_structure.append(game_db) exis_row = self.mysql.query( "select id from structure where platform='%s' and is_delete=0 and db_name='%s' and platform='all' and table_name='%s' and db_type='%s'" % (plat_form, game_db, str( structure['table_name']), str(structure['db_type']))) if len(exis_row) > 0: return int(exis_row[0]['id']) else: return self.save_newstructure(t_structure) def save_new_task(self, task): self.mysql.insert("dw2dm", **task) self.mysql.commit() def save_newstructure(self, structure): query = 'INSERT INTO structure(type,flag,db_type,game,platform,table_name,column_name,create_table_sql,user_id,is_delete,create_date,db_name) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)' rowNum = self.mysql.execute(query, *tuple(structure)) self.mysql.commit() return rowNum def run(self, game, task_name=None, plat_form="all"): print "start copy" task_list = self.get_all_task(task_name) for task in task_list: form_ids = "" for form_id_str in task['from_id'].split(","): if len(str(form_ids)) > 0: form_ids = form_ids + "," + str( self.get_structure(int(form_id_str), game, plat_form)) else: form_ids = str( self.get_structure(int(form_id_str), game, plat_form)) target_id = self.get_structure(int(task['target_id']), game, plat_form) t_task = { 'game': game, ##'platform':task['platform'], 'platform': plat_form, 'log_name': task['log_name'], 'do_rate': task['do_rate'], 'priority': task['priority'], 'prefix_sql': task['prefix_sql'], 'exec_sql': task['exec_sql'].replace("%s_dw" % self.source_game, "%s_dw" % game).replace( "%s_dm" % self.source_game, "%s_dm" % game), 'post_sql': task['post_sql'], 'from_id': form_ids, 'target_id': target_id, 'create_date': datetime.datetime.today().strftime("%Y-%m-%d"), 'comment': task['comment'], 'grouped': task['grouped'], 'is_delete': task['is_delete'], 'user_id': task['user_id'] } self.save_new_task(t_task) self.mysql.close() print "over" def add_structure(self, game, plat_form): platforms_str = plat_form.split(",") structures = self.mysql.query( "select * from structure where platform='all' and is_delete=0 and flag='log' and game='ares' and type in ('report','dm')" ) for structure in structures: for platform in platforms_str: t_structure = [ structure['type'], structure['flag'], structure['db_type'], game, platform, #structure['platform'], #'db_name':structure['db_name'], structure['table_name'], structure['column_name'], ##structure['partition_name'], ##structure['partition_rule'], ##structure['index_name'], structure['create_table_sql'], structure['user_id'], 0, datetime.datetime.today().strftime("%Y-%m-%d") ] game_db = None if structure['type'] != None and str( structure['type']).__eq__('dw'): game_db = '%s_dw' % game elif structure['type'] != None and str( structure['type']).__eq__('dm'): game_db = '%s_dm' % game elif structure['type'] != None and str( structure['type']).__eq__('report'): game_db = 'report_%s' % game t_structure.append(game_db) self.save_newstructure(t_structure)
#coding=utf-8 """ auth: suguoxin mail: [email protected] create_time: 2016-03-11 15:20:00 used: dm2report_new 任务执行 last_update:2016-04-28 14:48:00 """ import datetime import sys from dm2report_new.tasks import run_task from custom.db.mysql import Custom_MySQL mysql = Custom_MySQL(using='etl_manage') game = sys.argv[1] platform = sys.argv[2] do_rate = sys.argv[3] now_time = sys.argv[4] now_time = datetime.datetime.strptime(now_time, '%Y-%m-%d %H:%M:%S') #用于判断,是否需要校验对于上层任务(dw2dm任务)的依赖,onrely: 需要,offrely: 不需要 is_rely = "" try: is_rely = sys.argv[5] except Exception as exc: is_rely = "onrely" #数据日期,格式如:20151015
def run_task(self, task_param): mysql = Custom_MySQL(using='etl_manage') mysql.begin() try: ''' 业务代码块放下方 ''' dir_param ={'game':task_param['game'], 'platform':task_param['platform'], 'log_date':task_param['log_date'], 'log_name':task_param['log_name']} filename_dict = {'log_name':task_param['log_name'],'log_time':task_param['log_time']} ''' 游戏\平台\日期\业务日志名\日志或者md5文件 ''' log_dir = "/%(game)s/%(platform)s/%(log_date)s/%(log_name)s/" % dir_param lzo_file_name = "%(log_name)s_%(log_time)s.txt"% filename_dict local_log_dir = '/tmp'+log_dir dump_sql = task_param['dump_sql'] dump_sql = dump_sql.replace('{table_name}',task_param['table_name']) dump_sql = dump_sql.replace('{partition_name}',task_param['partition_name']) dump_sql = dump_sql.replace('{db_name}',task_param['db_name']) print(dump_sql) result = mysql.dump(sql,local_log_dir+lzo_file_name) #print(result) ''' 将任务标识为加载文件完成:2 ''' datas = {'load_status':2} where = {} where['id'] = int(task_param['id']) mysql.update('etl_data_log', ' id = %(id)d' % where, **datas) mysql.commit() mysql.close() return True except Exception as exc: print (exc) mysql.rollback() raise self.retry(exc=exc, countdown=60)
def run_task(self, task_param): mysql = Custom_MySQL(using='hadoops2') mysql_etl = Custom_MySQL(using='etl_manage') mysql.begin() mysql_etl.begin() where = {'id': int(task_param['id'])} try: hive = Custom_Hive(using='ares_dw') game = task_param['game'] platform = task_param['platform'] table_name = task_param['table_name'] log_date = task_param['log_date'] prefix_sql = task_param['prefix_sql'] exec_sql = task_param['exec_sql'] post_sql = task_param['post_sql'] date_cycle = task_param['date_cycle'] random_str = str(random.randint(0, 999999999)) stimes = str(int(time.time())) tmp_file_dir = "/tmp/tmp/%s/%s/%s" % (game, platform, log_date) #创建本地目录 if not os.path.exists(tmp_file_dir): os.makedirs(tmp_file_dir) tmp_file = "%s/%s_%s_%s_%s.txt" % (tmp_file_dir, table_name, date_cycle, random_str, stimes) hql_conf = "SET hive.support.concurrency=false;SET hive.exec.compress.output=true;" \ "SET mapred.output.compression.codec=com.hadoop.compression.lzo.LzopCodec; " ''' 将任务标示为开始执行:1 ''' datas = {'status': 1, 'start_time': str(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))} mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas) mysql_etl.commit() #执行前置sql,将数据临时写入本地,用于mysql load数据 if prefix_sql is not None: result = mysql.delete_by_sql(prefix_sql) logger.info('exec prefix_sql: delete old data {0}'.format(result['output'])) if result['status'] == 0: ''' 将任务标示为删除临时文件完成:2 ''' datas = {'status': 2} mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas) mysql_etl.commit() ''' 开始执行hive ql,将数据dump到本地 ''' result = hive.dump(hql_conf+exec_sql, tmp_file) logger.info('exec exec_sql: dump data {0}'.format(result['output'])) if result['status'] == 0 and True == os.path.exists('%s' % tmp_file): ''' 将任务标示为dump hive数据完成:3 ''' datas = {'status': 3, 'tmp_file_name': tmp_file} mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas) mysql_etl.commit() #执行后置sql if post_sql is not None: post_sql = post_sql.replace('{dir_path}', tmp_file) post_sql = post_sql.replace('{table_name}', task_param['table_name']) post_sql = post_sql.replace('{db_name}', task_param['db_name']) result = mysql.load(post_sql) logger.info('exec post_sql: load data to hdfs {0}'.format(result['output'])) if result['status'] == 0: ''' 将任务标识为录入mysql完成:4 ''' datas = {'status': 4, 'end_time': str(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))} mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas) else: logger.error('Error Code %s : %s Cmd: %s' % (result['status'], result['output'], post_sql)) else: logger.error('Error Code %s : %s Cmd: %s' % (result['status'], result['output'], exec_sql)) else: logger.error('Error Code %s : %s Cmd: %s' % (result['status'], result['output'], prefix_sql)) ''' 执行失败,将其状态标为未执行:0 ''' datas = {'status': 0} mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas) ''' 将任务标示为:(模拟) 已从任务队列中移除 ''' datas = {'in_queue': 0} update_result = mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas) # 如果数据库更新失败,再调用一次。 如果还是失败,等待自动修复机制,但这样的概率应该很小了。 if update_result != 1: mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas) mysql_etl.commit() mysql.commit() mysql_etl.close() mysql.close() return True except Exception as exc: logger.error('dm2report error: %s' % exc) mysql_etl.rollback() mysql.rollback() datas = {'in_queue': 0, 'status': 0} mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas) mysql_etl.commit() mysql_etl.close() mysql.close() raise self.retry(exc=exc, countdown=60)
def run_task(self, task_param): redis = Custom_Redis(using='etl_manage') mysql = Custom_MySQL(using='etl_manage') mysql.begin() datas = {'download_status': 0} where = {'id': int(task_param['id'])} local_log_dir = "" lzo_file_name = "" try: dir_param = { 'game': task_param['game'], 'platform': task_param['platform'], 'log_date': task_param['log_date'], 'log_name': task_param['log_name'] } filename_dict = { 'log_name': task_param['log_name'], 'log_time': task_param['log_time'], 'source_ip': task_param['source_ip'] } log_dir = "/%(game)s/%(platform)s/%(log_date)s/%(log_name)s/" % dir_param txt_file_name = "%(log_name)s_%(source_ip)s_%(log_time)s.txt" % filename_dict lzo_file_name = "%(log_name)s_%(source_ip)s_%(log_time)s.txt.lzo" % filename_dict md5_file_name = "%(log_name)s_%(source_ip)s_%(log_time)s.txt.lzo.md5" % filename_dict lzo_download_url = task_param['download_url'].rstrip( '/') + log_dir + lzo_file_name md5_download_url = task_param['download_url'].rstrip( '/') + log_dir + md5_file_name #从redis中,获取当前数据对应存储到哪块磁盘 if redis.get("disk_xml") is None: disk_tmp = open('/data/etl_manage/conf/disk_game.xml', 'r') redis.set("disk_xml", str(disk_tmp.read())) disk_list = str(redis.get("disk_xml")) root = ET.fromstring(disk_list) disk = "" for gameinfo in root.findall('game'): if gameinfo.get('name') == task_param['game']: disk = gameinfo.get('disk') continue #local_log_dir = '/disk1/tmp_data' + log_dir local_log_dir = '/' + disk + '/data' + log_dir #创建本地目录 if not os.path.exists(local_log_dir): os.makedirs(local_log_dir) #排除同名文件存在的可能,同时为修复执行提供方便 if os.path.exists('%s%s' % (local_log_dir, txt_file_name)): cmd_remove = 'rm -f %s%s*' % (local_log_dir, txt_file_name) logger.info('remove history file: {0}'.format(cmd_remove)) remove_result = cmd.run(cmd_remove) if remove_result['status'] != 0: logger.error('Error Code %s : %s Cmd: %s' % (remove_result['status'], remove_result['output'], cmd_remove)) ''' 下载md5文件,如果md5文件不存在则退出,不再继续执行程序,同时不向数据库写入任何标示 ''' md5_line = ' wget -o /tmp/log/wget_log -O %s%s %s' % ( local_log_dir, md5_file_name, md5_download_url) logger.info('md5 info: {0}'.format(md5_line)) md5_result = cmd.run(md5_line) if md5_result['status'] != 0: logger.error( 'Error Code %s : %s Cmd: %s' % (md5_result['status'], md5_result['output'], md5_line)) else: ''' 将任务标识为开始执行:1 ''' datas['download_status'] = 1 mysql.update('etl_data_log', ' id = %(id)d' % where, **datas) mysql.commit() ''' 下载数据文件 ''' lzo_line = ' wget -o /tmp/log/wget_log -O %s%s %s' % ( local_log_dir, lzo_file_name, lzo_download_url) logger.info('file info: {0}'.format(lzo_line)) lzo_result = cmd.run(lzo_line) if lzo_result['status'] != 0: logger.error( 'Error Code %s : %s Cmd: %s' % (lzo_result['status'], lzo_result['output'], lzo_line)) datas['download_status'] = 0 else: ''' 将任务标识为下载完成:2 ''' datas['download_status'] = 2 mysql.update('etl_data_log', ' id = %(id)d' % where, **datas) mysql.commit() ''' md5校验,如果未通过则不再继续执行程序 ''' check_line = "cat %s%s |grep `md5sum %s%s|cut -d ' ' -f 1`" % ( local_log_dir, md5_file_name, local_log_dir, lzo_file_name) logger.info('md5 or md5 info: {0}'.format(check_line)) check_result = cmd.run(check_line) if check_result['status'] != 0: logger.error('Error Code %s : %s Cmd: %s' % (check_result['status'], check_result['output'], check_line)) datas['download_status'] = 0 else: ''' lzop解压缩 ''' cmd_line = ' lzop -dP %s%s' % (local_log_dir, lzo_file_name) logger.info('file info: {0}'.format(cmd_line)) cmd_result = cmd.run(cmd_line) if cmd_result['status'] != 0: logger.error('Lzop Code %s : %s Cmd: %s' % (cmd_result['status'], cmd_result['output'], cmd_line)) datas['download_status'] = 0 else: ''' 将任务标识md5一致,完成下载任务:3 ''' datas['download_status'] = 3 ''' 将任务标示为:(模拟) 已从任务队列中移除 ''' datas['in_download_queue'] = 0 update_result = mysql.update('etl_data_log', ' id = %(id)d' % where, **datas) # 如果更新失败,则再调用一次,如果还是失败,则等待自动修复机制,但这种概率很小了 if update_result != 1: mysql.update('etl_data_log', ' id = %(id)d' % where, **datas) mysql.commit() mysql.close() return True except Exception as exc: print(exc) logger.error('download error : %s' % exc) mysql.rollback() kill_proces = "kill -9 `ps -ef |grep wget |grep -v grep |grep '%s%s'|awk '{print $2}'`" % ( local_log_dir, lzo_file_name) cmd.run(kill_proces) datas = {'in_download_queue': 0, 'download_status': 0} mysql.update('etl_data_log', ' id = %(id)d' % where, **datas) mysql.commit() mysql.close() raise self.retry(exc=exc, countdown=60)
def __init__(self): #self.mysql= {'report_ares': Custom_MySQL(using='report_ares'),'report_hebe': Custom_MySQL(using='report_hebe'),'report_crius': Custom_MySQL(using='report_crius')} self.mysql=Custom_MySQL(using='hadoops2')
def insert_db(self,main_category_id,sub_category_id,platform,app): temp_app ={} temp_app['name']=app['type']+'['+app['memo']+']' temp_app['platform'] = platform temp_app['type']=app['type'] temp_app['port']=app['port'] temp_app['main_category_id'] = main_category_id temp_app['sub_category_id'] = sub_category_id if app.get('db_type',False): temp_app['db_type'] = app['db_type'] sql ='select count(*) as count from app_info where ' #同一游戏同一区组 where ='type="%s" and port="%s" and main_category_id="%s" \ and sub_category_id="%s"'% (app['type'],app['port'],main_category_id,sub_category_id) db = Custom_MySQL(using='center_app') #处理内网 if app['ip'].split('.')[0] in ['10','172']: inner_ip ='and inner_ip="%s"'%(app['ip']) count = db.count(sql+where+inner_ip) if count==None: print 'SQL Error:%s'% sql+where+inner_ip return False count = count['count'] try: temp_app['public_ip'] = db.get('select public_ip from assets where inner_ip="%s"'% app['ip'])['public_ip'] except: pass if count==0: temp_app['inner_ip'] = app['ip'] db.insert('app_info',**temp_app) else: db.update('app_info',where+inner_ip,**temp_app) else: import re if app['type']=='web': app['ip'] = app['ip'].replace('http://','').split('/')[0] p=r'(?<![\.\d])(?:\d{1,3}\.){3}\d{1,3}(?![\.\d])' mo = re.search(p ,app['ip']) if not mo: domain = app['ip'].replace('http://','').split('/')[0] app['ip'] = db.get('select ip from domain where domain="%s"'% domain)['ip'] temp_app['domain'] = domain public_ip ='and (public_ip="%s") '%(app['ip']) count = db.count(sql+where+public_ip) if count==None: print 'SQL Error:%s'% sql+where+public_ip return False count = count['count'] if count==0: temp_app['public_ip'] = app['ip'] db.insert('app_info',**temp_app) else: db.update('app_info',where+public_ip,**temp_app)
#coding=utf-8 """ auth: wuqichao、suguoxin mail: [email protected]、[email protected] create_time: 2015-9-17 10:00:00 used: 执行下载任务 last_update: 2016-04-28 15:15:00 """ import datetime import sys from download.tasks import run_task from custom.db.mysql import Custom_MySQL mysql = Custom_MySQL(using='etl_manage') #执行频次,通过命令行参数获得 game = sys.argv[1] platform = sys.argv[2] do_rate = sys.argv[3] now_time = sys.argv[4] now_time = datetime.datetime.strptime(now_time, '%Y-%m-%d %H:%M:%S') #数据日期,格式如:20151015 log_date = now_time.strftime('%Y%m%d') #数据时间点(每五分钟),格式如:0005、2400 log_time = now_time.strftime("%H%M") sql = 'select * from etl_data_log ' \ 'where game="%s" and platform="%s" and do_rate="%s" and etl_status=6 and download_status=0 and task_date="%s" ' \
def run_task(self, task_param): mysql = Custom_MySQL(using='hadoops2') mysql_etl = Custom_MySQL(using='etl_manage') redis = Custom_Redis(using='etl_task') mysql.begin() mysql_etl.begin() datas = {'status': 0} where = {'id': int(task_param['id'])} task_key_exc = "" try: hive = Custom_Hive(using='ares_dw') game = task_param['game'] platform = task_param['platform'] table_name = task_param['table_name'] log_date = task_param['log_date'] prefix_sql = task_param['prefix_sql'] exec_sql = task_param['exec_sql'] post_sql = task_param['post_sql'] date_cycle = task_param['date_cycle'] random_str = str(random.randint(0, 999999999)) stimes = str(int(time.time())) task_date = task_param['task_date'] task_name = task_param['task_name'] do_rate = task_param['do_rate'] #task_key_tmp = str(game)+str(platform)+str(task_name)+str(date_cycle)+str(do_rate)+str(log_date)+str(task_date) task_key_tmp = str(task_name)+str(date_cycle)+str(do_rate)+str(log_date)+str(task_date) task_key_md5 = hashlib.md5() task_key_md5.update(task_key_tmp) task_key_md5_result = task_key_md5.hexdigest() task_key = "dm2report_new_"+str(game)+"_"+str(platform)+"_"+str(task_key_md5_result) task_key_exc = task_key ''' celery 本身的机制存在漏洞,会将一个已经完成任务再次分配给其他的worker,致使同一个任务执行多次 为防止此种现象,在任务开始执行时,将任务的“唯一标示”写入redis中,标注已执行 ''' #如果task_key is None, 则表示该条任务没有执行过,正常执行即可 #如果task_key = 0, 则表示该条任务上次执行失败,允许重复执行 if redis.get(task_key) == "0" or redis.get(task_key) is None: tmp_file_dir = "/tmp/tmp/%s/%s/%s" % (game, platform, log_date) #创建本地目录 if not os.path.exists(tmp_file_dir): os.makedirs(tmp_file_dir) tmp_file = "%s/%s_%s_%s_%s.txt" % (tmp_file_dir, table_name, date_cycle, random_str, stimes) hql_conf = "SET hive.support.concurrency=false;" \ "SET hive.exec.compress.output=true;" \ "SET mapred.output.compression.codec=com.hadoop.compression.lzo.LzopCodec; " ''' 将任务标示为开始执行:1 ''' datas['status'] = 1 datas['start_time'] = str(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) mysql_etl.update('dm2report_new_log', ' id = %(id)d' % where, **datas) mysql_etl.commit() #在redis标注 任务正在执行 redis.set(task_key, 1) #执行前置sql,将数据临时写入本地,用于mysql load数据 if prefix_sql is not None: result = mysql.delete_by_sql(prefix_sql) logger.info('exec prefix_sql: delete old data {0}'.format(result['output'])) if result['status'] != 0: logger.error('Error Code %s : %s Cmd: %s' % (result['status'], result['output'], prefix_sql)) ''' 执行失败,将其状态标为未执行:0 ''' datas['status'] = 0 #在redis标注 任务未开始执行 redis.set(task_key, 0) else: ''' 将任务标示为删除临时文件完成:2 ''' datas['status'] = 2 datas.pop('start_time') mysql_etl.update('dm2report_new_log', ' id = %(id)d' % where, **datas) mysql_etl.commit() ''' 开始执行hive ql,将数据dump到本地 ''' result = hive.dump(hql_conf+exec_sql, tmp_file) logger.info('exec exec_sql: dump data {0}'.format(result['output'])) if result['status'] != 0 or False == os.path.exists('%s' % tmp_file): logger.error('Error Code %s : %s Cmd: %s' % (result['status'], result['output'], exec_sql)) #在redis标注 任务未开始执行 redis.set(task_key, 0) else: ''' 将任务标示为dump hive数据完成:3 ''' datas['status'] = 3 datas['tmp_file_name'] = tmp_file mysql_etl.update('dm2report_new_log', ' id = %(id)d' % where, **datas) mysql_etl.commit() #执行后置sql if post_sql is not None: post_sql = post_sql.replace('{dir_path}', tmp_file) post_sql = post_sql.replace('{table_name}', task_param['table_name']) post_sql = post_sql.replace('{db_name}', task_param['db_name']) result = mysql.load(post_sql) logger.info('exec post_sql: load data to hdfs {0}'.format(result['output'])) if result['status'] != 0: logger.error('Error Code %s : %s Cmd: %s' % (result['status'], result['output'], post_sql)) #在redis标注 任务未开始执行 redis.set(task_key, 0) else: ''' 将任务标识为录入mysql完成:4 ''' datas['status'] = 4 datas['end_time'] = str(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) datas.pop('tmp_file_name') #在redis标注 任务已成功执行 redis.set(task_key, 2) else: logger.error('prefix_sql is null') datas['status'] = 0 #在redis标注 任务未开始执行 redis.set(task_key, 0) #如果task_key=2, 则标示该条任务已经运行成功 elif redis.get(task_key) == "2": datas['status'] = 4 #该条任务正在运行中 else: return True ''' 将任务标示为:(模拟) 已从任务队列中移除 ''' datas['in_queue'] = 0 update_result = mysql_etl.update('dm2report_new_log', ' id = %(id)d' % where, **datas) # 如果数据库更新失败,再调用一次。 如果还是失败,等待自动修复机制,但这样的概率应该很小了。 if update_result != 1: mysql_etl.update('dm2report_new_log', ' id = %(id)d' % where, **datas) mysql_etl.commit() mysql.commit() mysql_etl.close() mysql.close() return True except Exception as exc: logger.error('dm2report error: %s' % exc) mysql_etl.rollback() mysql.rollback() redis.set(task_key_exc, 0) datas = {'in_queue': 0, 'status': 0} mysql_etl.update('dm2report_new_log', ' id = %(id)d' % where, **datas) mysql_etl.commit() mysql_etl.close() mysql.close() raise self.retry(exc=exc, countdown=60)
last_update: 2016-05-04 11:10:20 """ import datetime import sys from etl_data.tasks import run_task from custom.db.mysql import Custom_MySQL from custom.db.redis_tools import Custom_Redis try: import xml.etree.cElementTree as ET except ImportError: import xml.etree.ElementTree as ET mysql = Custom_MySQL(using='etl_manage') redis = Custom_Redis(using='etl_manage') game = sys.argv[1] platform = sys.argv[2] do_rate = sys.argv[3] now_time = sys.argv[4] now_time = datetime.datetime.strptime(now_time, '%Y-%m-%d %H:%M:%S') #数据日期,格式如:20151015 log_date = now_time.strftime('%Y%m%d') #数据时间点(每五分钟),格式如:0005、2400 log_time = now_time.strftime("%H%M") #当前机器的外网ip,根据不同机器修改即可 machine = "120.26.1.224"
def __init__(self): self.mysql = Custom_MySQL(using='etl_manage') self.source_game = 'ares'
""" auth: suguoxin mail: [email protected] create_time: 2016-03-08 16:00:00 used: 启动dw2dm任务 last_update: 2016-04-28 15:00:00 """ import datetime import sys from dw2dm.tasks import run_task from custom.db.mysql import Custom_MySQL from custom.command import Custom_Command as cmd mysql = Custom_MySQL(using='etl_manage') game = sys.argv[1] platform = sys.argv[2] do_rate = sys.argv[3] now_time = sys.argv[4] now_time = datetime.datetime.strptime(now_time, '%Y-%m-%d %H:%M:%S') #用于判断,是否需要校验对于上层任务(mergefile2dw任务)的依赖,onrely: 需要,offrely: 不需要 is_rely = "" try: is_rely = sys.argv[5] except Exception as exc: is_rely = "onrely" #数据日期,格式如:20151015
def __init__(self): self.center_mysql = Custom_MySQL(using='center_app')
#coding=utf-8 """ auth: suguoxin mail: [email protected] create_time: 2016-06-23 17:03:00 used: 执行下载任务,为处理服务器过多的有游戏所建立 last_update: 2016-06-23 17:03:00 """ import datetime import sys from download_new.tasks import run_task from custom.db.mysql import Custom_MySQL mysql = Custom_MySQL(using='etl_manage') #执行频次,通过命令行参数获得 game = sys.argv[1] platform = sys.argv[2] do_rate = sys.argv[3] now_time = sys.argv[4] now_time = datetime.datetime.strptime(now_time, '%Y-%m-%d %H:%M:%S') #数据日期,格式如:20151015 log_date = now_time.strftime('%Y%m%d') #数据时间点(每五分钟),格式如:0005、2400 log_time = now_time.strftime("%H%M") def task_5min():
#!/usr/bin/python #coding=utf-8 """ auth:suguoxin mail:[email protected] createtime:2016-01-12 00:30:00 usege: 用于自动重置执行失败的任务 """ import datetime from custom.db.mysql import Custom_MySQL mysql = Custom_MySQL(using='etl_manage') #当前时间 now_time = datetime.datetime.now() task_date = now_time.strftime('%Y%m%d') log_time = now_time.strftime('%H%M') if log_time < '0200': task_date = (now_time-datetime.timedelta(days=1)).strftime('%Y%m%d') log_time_before_30min = (now_time-datetime.timedelta(seconds=1800)).strftime('%H%M') log_time_before_1hour = (now_time-datetime.timedelta(seconds=3600)).strftime('%H%M') now = now_time.strftime('%Y-%m-%d %H:%M:%S') try: mysql.begin()
def run_task(self, task_param): mysql = Custom_MySQL(using='etl_manage') hive = Custom_Hive(using='ares_dw') redis = Custom_Redis(using='etl_manage') mysql.begin() where = {'id': int(task_param['id'])} try: log_date = datetime.datetime.strptime(task_param['log_date'], '%Y%m%d').strftime("%Y-%m-%d") log_date_1 = (datetime.datetime.strptime(task_param['log_date'], '%Y%m%d')-datetime.timedelta(days=1)).strftime("%Y-%m-%d") dir_param = {'game': task_param['game'], 'platform': task_param['platform'], 'log_date': task_param['log_date'], 'log_name': task_param['log_name']} filename_dict = {'log_name': task_param['log_name'], 'log_time': task_param['log_time']} index_dict = {'db_name': task_param['db_name'], 'table_name': task_param['table_name'], 'platform': task_param['platform'], 'log_date': log_date} partition = {'platform': task_param['platform'], 'log_date': log_date} index_dict_1 = {'db_name': task_param['db_name'], 'table_name': task_param['table_name'], 'platform': task_param['platform'], 'log_date': log_date_1} partition_1 = {'platform': task_param['platform'], 'log_date': log_date_1} log_dir = "/%(game)s/%(platform)s/%(log_date)s/%(log_name)s" % dir_param flag = task_param['flag'] file_name = "%(log_name)s" % filename_dict file_name_txt = "%(log_name)s_%(log_time)s_result.txt" % filename_dict file_name_lzo = "%(log_name)s_%(log_time)s_result.txt.lzo" % filename_dict file_name_row = "%(log_name)s_%(log_time)s_result.txt.row" % filename_dict index_dir_name = "%(db_name)s.db/%(table_name)s/plat_form=%(platform)s/log_date=%(log_date)s/" % index_dict partition_name = "plat_form='%(platform)s',log_date='%(log_date)s'" % partition project_path = os.getcwd() log_time = task_param['log_time'] do_rate = task_param['do_rate'] #if flag == "snap" or (do_rate == "1day" and flag == "log") if flag == "snap": index_dir_name = "%(db_name)s.db/%(table_name)s/plat_form=%(platform)s/log_date=%(log_date)s/" % index_dict_1 partition_name = "plat_form='%(platform)s',log_date='%(log_date)s'" % partition_1 #从redis中,获取当前数据对应存储到哪块磁盘 if redis.get("disk_xml") is None: disk_tmp = open('/data/etl_manage/conf/disk_game.xml', 'r') redis.set("disk_xml", str(disk_tmp.read())) disk_list = str(redis.get("disk_xml")) root = ET.fromstring(disk_list) disk = "" for gameinfo in root.findall('game'): if gameinfo.get('name') == task_param['game']: disk = gameinfo.get('disk') continue #local_log_dir = '/disk1/tmp_data'+log_dir local_log_dir = '/'+disk+'/data'+log_dir logger.info('local_log_dir: {0}'.format(local_log_dir)) #判断目录是否存在 if os.path.exists('%s' % local_log_dir): #排除同名文件存在的可能,同时为修复执行提供方便 if os.path.exists('%s/%s' % (local_log_dir, file_name_txt)): cmd_remove = 'rm -f %s/%s*' % (local_log_dir, file_name_txt) logger.info('remove history file: {0}'.format(cmd_remove)) remove_result = cmd.run(cmd_remove) if remove_result['status'] != 0: logger.error('Error Code %s : %s Cmd: %s' % (remove_result['status'], remove_result['output'], cmd_remove)) datas = {'load_status': 1} mysql.update('file2dw_log', ' id = %(id)d' % where, **datas) mysql.commit() cmd_merge = '/bin/bash %s/mergefile2dw/merge_data.sh %s %s %s %s %s' \ '' % (project_path, local_log_dir, local_log_dir, file_name, do_rate, log_time) logger.info('merge data: {0}'.format(cmd_merge)) merge_result = cmd.run(cmd_merge) logger.info('merge data result {0}'.format(merge_result['output'])) if merge_result['status'] == 0: #读取总条数 row = open('%s/%s' % (local_log_dir, file_name_row)).read() ''' 合并数据完成:2 ''' datas = {'load_status': 2, 'row_num': int(row)} mysql.update('file2dw_log', ' id = %(id)d' % where, **datas) mysql.commit() ''' 压缩数据 ''' cmd_compress = '/bin/bash %s/mergefile2dw/compress_data.sh %s %s' % (project_path, local_log_dir, file_name_txt) logger.info('compress data: {0}'.format(cmd_compress)) compress_result = cmd.run(cmd_compress) if compress_result['status'] != 0: logger.error('Error Code %s : %s Cmd: %s' % (compress_result['status'], compress_result['output'], cmd_compress)) else: ''' 压缩数据完成:3 ''' datas = {'load_status': 3} mysql.update('file2dw_log', ' id = %(id)d' % where, **datas) mysql.commit() #执行load之前,删除同名文件,防止同一个文件出现两次的可能 cmd_remove = '/bin/bash %s/mergefile2dw/remove_damaged_file.sh %s %s' % (project_path, index_dir_name, file_name_lzo) logger.info('remove damaged files: {0}'.format(cmd_remove)) remove_result = cmd.run(cmd_remove) if remove_result['status'] != 0: logger.error('Error Code %s : %s Cmd: %s' % (remove_result['status'], remove_result['output'], cmd_remove)) ''' 文件加载到hive中 ''' load_sql = task_param['load_sql'] load_sql = load_sql.replace('{dir_path}', local_log_dir+"/"+file_name_lzo) load_sql = load_sql.replace('{table_name}', task_param['table_name']) load_sql = load_sql.replace('{partition_name}', '%s' % partition_name) load_sql = load_sql.replace('{db_name}', task_param['db_name']) logger.info('hive load SQL: {0}'.format(load_sql)) result = hive.load(load_sql) logger.info('hive load result {0}'.format(result['output'])) if result['status'] == 0: ''' 将任务标识为加载文件完成:4 ''' datas = {'load_status': 4} mysql.update('file2dw_log', ' id = %(id)d' % where, **datas) mysql.commit() ''' 建立索引,否则lzo将不支持split ''' #print index_dir_name cmd_index = '/bin/bash %s/mergefile2dw/create_lzo_indexer.sh %s %s' % (project_path, index_dir_name, file_name_lzo) logger.info('create lzo index: {0}'.format(cmd_index)) index_result = cmd.run(cmd_index) if index_result['status'] != 0: logger.error('Error Code %s : %s Cmd: %s' % (index_result['status'], index_result['output'], cmd_index)) else: if "create index success" in index_result['output']: ''' 将任务标识为建立lzo索引完成:5 ''' datas = {'load_status': 5} mysql.update('file2dw_log', ' id = %(id)d' % where, **datas) else: ''' 如果load数据失败,则删除半途出现错误的文件,防止hive查询的时候报错 ''' cmd_remove = '/bin/bash %s/mergefile2dw/remove_damaged_file.sh %s %s' % (project_path, index_dir_name, file_name_lzo) logger.info('remove damaged files: {0}'.format(cmd_remove)) remove_result = cmd.run(cmd_remove) if remove_result['status'] != 0: logger.error('Error Code %s : %s Cmd: %s' % (remove_result['status'], remove_result['output'], cmd_remove)) else: logger.error('Error Code %s : %s Cmd: %s' % (result['status'], result['output'], load_sql)) else: ''' 合并数据失败 ''' datas = {'load_status': 0} mysql.update('file2dw_log', ' id = %(id)d' % where, **datas) logger.error('Error Code %s : %s Cmd: %s' % (merge_result['status'], merge_result['output'], merge_result)) ''' 执行完毕,模拟从队列中清楚任务:0 ''' datas = {'in_queue': 0} update_result = mysql.update('file2dw_log', ' id = %(id)d' % where, **datas) # 如果更新失败,则再调用一次,如果还是失败,则等待自动修复机制,但这种概率很小了 if update_result != 1: mysql.update('file2dw_log', ' id = %(id)d' % where, **datas) mysql.commit() mysql.close() return True except Exception as exc: logger.error('mergefile2dw error: %s' % exc) mysql.rollback() datas = {'in_queue': 0, 'load_status': 0} mysql.update('file2dw_log', ' id = %(id)d' % where, **datas) mysql.commit() mysql.close() raise self.retry(exc=exc, countdown=30)
class CenterApp(): def __init__(self): self.center_mysql = Custom_MySQL(using='center_app') # def get_server_list(self): # ''' # 获取执行任务 # ''' # try: # ip_list = [] # if param['flag'] == 'basic': # ip_list.append('120.26.1.250') # # if param['flag'] == 'most': # ip_list.append('120.26.13.150') # # if param['flag'] == 'log': # s_sql = "select t1.* from \ # (select a.public_ip as source_ip,a.platform_id from assets a inner join main_category m \ # on a.main_category_id = m.id \ # where a.is_del = 0 and (a.hostname like '%%%%web_balance%%%%' or a.hostname like '%%%%web_admin%%%%') ) t1 \ # left join platform t2 on t1.platform_id = t2.id \ # where t1.source_ip is not null group by source_ip" # # if param['game'] == 'kof': # s_sql = "select t1.* from \ # (select a.public_ip as source_ip,a.platform_id from assets a inner join main_category m \ # on a.main_category_id = m.id \ # where a.is_del = 0 and a.hostname like '%%%%gameserver%%%%' ) t1 \ # left join platform t2 on t1.platform_id = t2.id \ # where t1.source_ip is not null group by source_ip" # # ip_list = self.exec_sql(s_sql) # # if param['flag'] == 'snap': # s_sql = "select s.prefix as source_ip from sub_category s inner join main_category m \ # on s.main_category_id = m.id \ # where s.platform = '%s' and m.prefix = '%s'" % (param['platform'],param['game']) # ip_list = self.exec_sql(s_sql) # # return ip_list # except Exception as exc: # print exc # #异常回滚 # self.center_mysql.rollback() def get_log_ip(self): ''' 获取所有log的IP ''' ip_list = {} sql = "select t1.*,t2.prefix as platform from \ (select m.prefix as gamename,a.public_ip as source_ip,a.platform_id from assets a inner join main_category m \ on a.main_category_id = m.id \ where a.is_del = 0 and (a.hostname like '%%%%web_balance%%%%' or a.hostname like '%%%%web_admin%%%%') ) t1 \ left join platform t2 on t1.platform_id = t2.id \ where t1.source_ip is not null group by source_ip\ union all \ select t1.*,t2.prefix as platform from \ (select m.prefix as gamename,a.public_ip as source_ip,a.platform_id from assets a inner join main_category m \ on a.main_category_id = m.id \ where a.is_del = 0 and a.hostname like '%%%%gameserver%%%%' ) t1 \ left join platform t2 on t1.platform_id = t2.id \ where t1.source_ip is not null group by source_ip" result = self.center_mysql.query(sql) for info in result: if info['gamename'] is None or info['platform'] is None: continue if info['gamename'].encode('utf8') not in ip_list: ip_list[info['gamename'].encode('utf8')] = {} if info['platform'].encode('utf8') not in ip_list[ info['gamename'].encode('utf8')]: ip_list[info['gamename'].encode('utf8')][ info['platform'].encode('utf8')] = [] ip_list[info['gamename'].encode('utf8')][info['platform'].encode( 'utf8')].append(info['source_ip'].encode('utf8')) return ip_list def get_snap_ip(self): ''' 获取所有快照的IP ''' ip_list = {} s_sql = "select f.prefix as platform,s.prefix as source_ip,m.prefix as gamename from sub_category s inner join main_category m \ on s.main_category_id = m.id \ left join platform f on f.id = s.platform_id" result = self.center_mysql.query(s_sql) for info in result: if info['gamename'] is None or info['platform'] is None: continue if info['gamename'].encode('utf8') not in ip_list: ip_list[info['gamename'].encode('utf8')] = {} if info['platform'].encode('utf8') not in ip_list[ info['gamename'].encode('utf8')]: ip_list[info['gamename'].encode('utf8')][ info['platform'].encode('utf8')] = [] ip_list[info['gamename'].encode('utf8')][info['platform'].encode( 'utf8')].append(info['source_ip'].encode('utf8')) # print ip_list return ip_list def get_ip_list(self): ''' 整理IP列表 ''' try: ip_list = {} ip_list['basic'] = ['120.26.1.250'] ip_list['mostsdk'] = ['120.26.13.150'] ip_list['wanpay'] = ['112.124.116.44'] log_list = self.get_log_ip() ip_list['log'] = log_list snap_list = self.get_snap_ip() ip_list['snap'] = snap_list return ip_list except Exception, e: print e print "异常" #异常回滚 self.center_mysql.rollback()
def run(self): db = Custom_MySQL(using='log') status = {'flag': 1} db.update( 'batch_detail', 'batch_id="%s" and ip ="%s"' % (self.host['batch_id'], self.host['ip']), **status) db.commit() db.close() #建立连接 self.ssh = paramiko.SSHClient() try: #如果没有密码就走public key if self.host.get('pwd', True) == True: privatekeyfile = os.path.expanduser('/root/.ssh/id_rsa') paramiko.RSAKey.from_private_key_file(privatekeyfile) #缺失host_knows时的处理方法 known_host = "/root/.ssh/known_hosts" self.ssh.load_system_host_keys(known_host) self.ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) #os.system('/opt/local/junos/junos') #连接远程客户机器 self.ssh.connect(hostname=self.host['ip'], port=int(self.host['port']), username=self.host['user'], password=self.host['pwd'], compress=True, timeout=60) #获取远程命令执行结果 stdin, stdout, stderr = self.ssh.exec_command(self.host['cmd'], bufsize=65535, timeout=60) temp = stdout.readlines() db = Custom_MySQL(using='log') status = {'flag': 2, 'result': json.dumps(temp)} db.update( 'batch_detail', 'batch_id="%s" and ip ="%s"' % (self.host['batch_id'], self.host['ip']), **status) db.commit() db.close() #输出执行结果 self.ssh.close() except: self.ssh.close() #print trace_back() #以防paramiko本身出问题,这里再用shell运行一次,如果还出问题再确认为问题 cmd = "ssh -p %s -o StrictHostKeyChecking=no %s@%s '%s'" % ( self.host['port'], self.host['user'], self.host['ip'], self.host['cmd']) (status, output) = commands.getstatusoutput(cmd) if status == 0: db = Custom_MySQL(using='log') status = {'flag': 2, 'result': json.dumps(output)} db.update( 'batch_detail', 'batch_id="%s" and ip ="%s"' % (self.host['batch_id'], self.host['ip']), **status) db.commit() db.close() else: db = Custom_MySQL(using='log') status = {'flag': -1, 'result': 'faild'} db.update( 'batch_detail', 'batch_id="%s" and ip ="%s"' % (self.host['batch_id'], self.host['ip']), **status) db.commit() db.close()
def run_task(self, task_param): mysql = Custom_MySQL(using='etl_manage') mysql.begin() try: ''' 业务代码块放下方 ''' dir_param = { 'game': task_param['game'], 'platform': task_param['platform'], 'log_date': task_param['log_date'], 'log_name': task_param['log_name'] } filename_dict = { 'log_name': task_param['log_name'], 'log_time': task_param['log_time'] } ''' 游戏\平台\日期\业务日志名\日志或者md5文件 ''' log_dir = "/%(game)s/%(platform)s/%(log_date)s/%(log_name)s/" % dir_param lzo_file_name = "%(log_name)s_%(log_time)s.txt" % filename_dict local_log_dir = '/tmp' + log_dir dump_sql = task_param['dump_sql'] dump_sql = dump_sql.replace('{table_name}', task_param['table_name']) dump_sql = dump_sql.replace('{partition_name}', task_param['partition_name']) dump_sql = dump_sql.replace('{db_name}', task_param['db_name']) print(dump_sql) result = mysql.dump(sql, local_log_dir + lzo_file_name) #print(result) ''' 将任务标识为加载文件完成:2 ''' datas = {'load_status': 2} where = {} where['id'] = int(task_param['id']) mysql.update('etl_data_log', ' id = %(id)d' % where, **datas) mysql.commit() mysql.close() return True except Exception as exc: print(exc) mysql.rollback() raise self.retry(exc=exc, countdown=60)
def main(): tables = excel_table_byindex() db = Custom_MySQL(using='center_app') for row in tables: print row game = row['name'].split('_')[0] sql ='select id from main_category where prefix= %s' p =(game,) result= db.get(sql,*p) if result: print result['id'] param={} param['main_category_id']= result['id'] if row['inner'] !='' and row['pub']!="": sql='select count(*) as count from assets where inner_ip="%s" or public_ip ="%s"'%(row['inner'],row['pub']) count = db.count(sql)['count'] if count == 0: param['inner_ip'] = row['inner'] param['public_ip'] = row['pub'] param['hostname'] = row['name'] param['wxsn'] = row['name'] db.insert('assets',**param) else: param['hostname'] = row['name'] db.update('assets','inner_ip="%s"'%row['inner'],**param) elif row['inner'] !='': sql='select count(*) as count from assets where inner_ip="%s"'%row['inner'] count = db.count(sql)['count'] if count == 0: param['inner_ip'] = row['inner'] param['hostname'] = row['name'] param['wxsn'] = row['name'] db.insert('assets',**param) else: param['hostname'] = row['name'] db.update('assets','inner_ip="%s"'%row['inner'],**param) elif row['pub']!="": sql='select count(*) as count from assets where public_ip="%s"'%row['pub'] count = db.count(sql)['count'] if count ==0: param['public_ip'] = row['pub'] param['hostname'] = row['name'] param['wxsn'] = row['name'] db.insert('assets',**param) else: param['hostname'] = row['name'] db.update('assets','public_ip="%s"'%row['pub'],**param) else: print 'pub and inner are both empty'
def get_idcs(idc_name): db = Custom_MySQL(using='center_app') sql ="select id as idc_id,prefix from idc where name like %s and is_del = 0" p=(idc_name+'%',) return db.query(sql,*p)
def run_task(self, task_param): mysql = Custom_MySQL(using='hadoops2') mysql_etl = Custom_MySQL(using='etl_manage') mysql.begin() mysql_etl.begin() where = {'id': int(task_param['id'])} try: hive = Custom_Hive(using='ares_dw') game = task_param['game'] platform = task_param['platform'] table_name = task_param['table_name'] log_date = task_param['log_date'] prefix_sql = task_param['prefix_sql'] exec_sql = task_param['exec_sql'] post_sql = task_param['post_sql'] date_cycle = task_param['date_cycle'] random_str = str(random.randint(0, 999999999)) stimes = str(int(time.time())) tmp_file_dir = "/tmp/tmp/%s/%s/%s" % (game, platform, log_date) #创建本地目录 if not os.path.exists(tmp_file_dir): os.makedirs(tmp_file_dir) tmp_file = "%s/%s_%s_%s_%s.txt" % (tmp_file_dir, table_name, date_cycle, random_str, stimes) hql_conf = "SET hive.support.concurrency=false;SET hive.exec.compress.output=true;" \ "SET mapred.output.compression.codec=com.hadoop.compression.lzo.LzopCodec; " ''' 将任务标示为开始执行:1 ''' datas = { 'status': 1, 'start_time': str(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) } mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas) mysql_etl.commit() #执行前置sql,将数据临时写入本地,用于mysql load数据 if prefix_sql is not None: result = mysql.delete_by_sql(prefix_sql) logger.info('exec prefix_sql: delete old data {0}'.format( result['output'])) if result['status'] == 0: ''' 将任务标示为删除临时文件完成:2 ''' datas = {'status': 2} mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas) mysql_etl.commit() ''' 开始执行hive ql,将数据dump到本地 ''' result = hive.dump(hql_conf + exec_sql, tmp_file) logger.info('exec exec_sql: dump data {0}'.format( result['output'])) if result['status'] == 0 and True == os.path.exists( '%s' % tmp_file): ''' 将任务标示为dump hive数据完成:3 ''' datas = {'status': 3, 'tmp_file_name': tmp_file} mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas) mysql_etl.commit() #执行后置sql if post_sql is not None: post_sql = post_sql.replace('{dir_path}', tmp_file) post_sql = post_sql.replace('{table_name}', task_param['table_name']) post_sql = post_sql.replace('{db_name}', task_param['db_name']) result = mysql.load(post_sql) logger.info( 'exec post_sql: load data to hdfs {0}'.format( result['output'])) if result['status'] == 0: ''' 将任务标识为录入mysql完成:4 ''' datas = { 'status': 4, 'end_time': str(datetime.datetime.now().strftime( '%Y-%m-%d %H:%M:%S')) } mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas) else: logger.error( 'Error Code %s : %s Cmd: %s' % (result['status'], result['output'], post_sql)) else: logger.error( 'Error Code %s : %s Cmd: %s' % (result['status'], result['output'], exec_sql)) else: logger.error('Error Code %s : %s Cmd: %s' % (result['status'], result['output'], prefix_sql)) ''' 执行失败,将其状态标为未执行:0 ''' datas = {'status': 0} mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas) ''' 将任务标示为:(模拟) 已从任务队列中移除 ''' datas = {'in_queue': 0} update_result = mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas) # 如果数据库更新失败,再调用一次。 如果还是失败,等待自动修复机制,但这样的概率应该很小了。 if update_result != 1: mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas) mysql_etl.commit() mysql.commit() mysql_etl.close() mysql.close() return True except Exception as exc: logger.error('dm2report error: %s' % exc) mysql_etl.rollback() mysql.rollback() datas = {'in_queue': 0, 'status': 0} mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas) mysql_etl.commit() mysql_etl.close() mysql.close() raise self.retry(exc=exc, countdown=60)
def query_mysql(sql): mysql = Custom_MySQL(using='etl_manage') try: mysql.begin() result = mysql.query(sql) mysql.commit() mysql.close() return result except Exception as exc: #回滚 mysql.rollback() print(exc) mysql.close()
def run_task(self, task_param): mysql = Custom_MySQL(using='etl_manage') hive = Custom_Hive(using='ares_dw') redis = Custom_Redis(using='etl_manage') mysql.begin() where = {'id': int(task_param['id'])} try: log_date = datetime.datetime.strptime(task_param['log_date'], '%Y%m%d').strftime("%Y-%m-%d") log_date_1 = ( datetime.datetime.strptime(task_param['log_date'], '%Y%m%d') - datetime.timedelta(days=1)).strftime("%Y-%m-%d") dir_param = { 'game': task_param['game'], 'platform': task_param['platform'], 'log_date': task_param['log_date'], 'log_name': task_param['log_name'] } filename_dict = { 'log_name': task_param['log_name'], 'log_time': task_param['log_time'] } index_dict = { 'db_name': task_param['db_name'], 'table_name': task_param['table_name'], 'platform': task_param['platform'], 'log_date': log_date } partition = {'platform': task_param['platform'], 'log_date': log_date} index_dict_1 = { 'db_name': task_param['db_name'], 'table_name': task_param['table_name'], 'platform': task_param['platform'], 'log_date': log_date_1 } partition_1 = { 'platform': task_param['platform'], 'log_date': log_date_1 } log_dir = "/%(game)s/%(platform)s/%(log_date)s/%(log_name)s" % dir_param flag = task_param['flag'] file_name = "%(log_name)s" % filename_dict file_name_txt = "%(log_name)s_%(log_time)s_result.txt" % filename_dict file_name_lzo = "%(log_name)s_%(log_time)s_result.txt.lzo" % filename_dict file_name_row = "%(log_name)s_%(log_time)s_result.txt.row" % filename_dict index_dir_name = "%(db_name)s.db/%(table_name)s/plat_form=%(platform)s/log_date=%(log_date)s/" % index_dict partition_name = "plat_form='%(platform)s',log_date='%(log_date)s'" % partition project_path = os.getcwd() log_time = task_param['log_time'] do_rate = task_param['do_rate'] #if flag == "snap" or (do_rate == "1day" and flag == "log") if flag == "snap": index_dir_name = "%(db_name)s.db/%(table_name)s/plat_form=%(platform)s/log_date=%(log_date)s/" % index_dict_1 partition_name = "plat_form='%(platform)s',log_date='%(log_date)s'" % partition_1 #从redis中,获取当前数据对应存储到哪块磁盘 if redis.get("disk_xml") is None: disk_tmp = open('/data/etl_manage/conf/disk_game.xml', 'r') redis.set("disk_xml", str(disk_tmp.read())) disk_list = str(redis.get("disk_xml")) root = ET.fromstring(disk_list) disk = "" for gameinfo in root.findall('game'): if gameinfo.get('name') == task_param['game']: disk = gameinfo.get('disk') continue #local_log_dir = '/disk1/tmp_data'+log_dir local_log_dir = '/' + disk + '/data' + log_dir logger.info('local_log_dir: {0}'.format(local_log_dir)) #判断目录是否存在 if os.path.exists('%s' % local_log_dir): #排除同名文件存在的可能,同时为修复执行提供方便 if os.path.exists('%s/%s' % (local_log_dir, file_name_txt)): cmd_remove = 'rm -f %s/%s*' % (local_log_dir, file_name_txt) logger.info('remove history file: {0}'.format(cmd_remove)) remove_result = cmd.run(cmd_remove) if remove_result['status'] != 0: logger.error('Error Code %s : %s Cmd: %s' % (remove_result['status'], remove_result['output'], cmd_remove)) datas = {'load_status': 1} mysql.update('file2dw_log', ' id = %(id)d' % where, **datas) mysql.commit() cmd_merge = '/bin/bash %s/mergefile2dw/merge_data.sh %s %s %s %s %s' \ '' % (project_path, local_log_dir, local_log_dir, file_name, do_rate, log_time) logger.info('merge data: {0}'.format(cmd_merge)) merge_result = cmd.run(cmd_merge) logger.info('merge data result {0}'.format(merge_result['output'])) if merge_result['status'] == 0: #读取总条数 row = open('%s/%s' % (local_log_dir, file_name_row)).read() ''' 合并数据完成:2 ''' datas = {'load_status': 2, 'row_num': int(row)} mysql.update('file2dw_log', ' id = %(id)d' % where, **datas) mysql.commit() ''' 压缩数据 ''' cmd_compress = '/bin/bash %s/mergefile2dw/compress_data.sh %s %s' % ( project_path, local_log_dir, file_name_txt) logger.info('compress data: {0}'.format(cmd_compress)) compress_result = cmd.run(cmd_compress) if compress_result['status'] != 0: logger.error('Error Code %s : %s Cmd: %s' % (compress_result['status'], compress_result['output'], cmd_compress)) else: ''' 压缩数据完成:3 ''' datas = {'load_status': 3} mysql.update('file2dw_log', ' id = %(id)d' % where, **datas) mysql.commit() #执行load之前,删除同名文件,防止同一个文件出现两次的可能 cmd_remove = '/bin/bash %s/mergefile2dw/remove_damaged_file.sh %s %s' % ( project_path, index_dir_name, file_name_lzo) logger.info('remove damaged files: {0}'.format(cmd_remove)) remove_result = cmd.run(cmd_remove) if remove_result['status'] != 0: logger.error('Error Code %s : %s Cmd: %s' % (remove_result['status'], remove_result['output'], cmd_remove)) ''' 文件加载到hive中 ''' load_sql = task_param['load_sql'] load_sql = load_sql.replace( '{dir_path}', local_log_dir + "/" + file_name_lzo) load_sql = load_sql.replace('{table_name}', task_param['table_name']) load_sql = load_sql.replace('{partition_name}', '%s' % partition_name) load_sql = load_sql.replace('{db_name}', task_param['db_name']) logger.info('hive load SQL: {0}'.format(load_sql)) result = hive.load(load_sql) logger.info('hive load result {0}'.format( result['output'])) if result['status'] == 0: ''' 将任务标识为加载文件完成:4 ''' datas = {'load_status': 4} mysql.update('file2dw_log', ' id = %(id)d' % where, **datas) mysql.commit() ''' 建立索引,否则lzo将不支持split ''' #print index_dir_name cmd_index = '/bin/bash %s/mergefile2dw/create_lzo_indexer.sh %s %s' % ( project_path, index_dir_name, file_name_lzo) logger.info('create lzo index: {0}'.format(cmd_index)) index_result = cmd.run(cmd_index) if index_result['status'] != 0: logger.error('Error Code %s : %s Cmd: %s' % (index_result['status'], index_result['output'], cmd_index)) else: if "create index success" in index_result[ 'output']: ''' 将任务标识为建立lzo索引完成:5 ''' datas = {'load_status': 5} mysql.update('file2dw_log', ' id = %(id)d' % where, **datas) else: ''' 如果load数据失败,则删除半途出现错误的文件,防止hive查询的时候报错 ''' cmd_remove = '/bin/bash %s/mergefile2dw/remove_damaged_file.sh %s %s' % ( project_path, index_dir_name, file_name_lzo) logger.info('remove damaged files: {0}'.format( cmd_remove)) remove_result = cmd.run(cmd_remove) if remove_result['status'] != 0: logger.error( 'Error Code %s : %s Cmd: %s' % (remove_result['status'], remove_result['output'], cmd_remove)) else: logger.error( 'Error Code %s : %s Cmd: %s' % (result['status'], result['output'], load_sql)) else: ''' 合并数据失败 ''' datas = {'load_status': 0} mysql.update('file2dw_log', ' id = %(id)d' % where, **datas) logger.error('Error Code %s : %s Cmd: %s' % (merge_result['status'], merge_result['output'], merge_result)) ''' 执行完毕,模拟从队列中清楚任务:0 ''' datas = {'in_queue': 0} update_result = mysql.update('file2dw_log', ' id = %(id)d' % where, **datas) # 如果更新失败,则再调用一次,如果还是失败,则等待自动修复机制,但这种概率很小了 if update_result != 1: mysql.update('file2dw_log', ' id = %(id)d' % where, **datas) mysql.commit() mysql.close() return True except Exception as exc: logger.error('mergefile2dw error: %s' % exc) mysql.rollback() datas = {'in_queue': 0, 'load_status': 0} mysql.update('file2dw_log', ' id = %(id)d' % where, **datas) mysql.commit() mysql.close() raise self.retry(exc=exc, countdown=30)
def run_task(self, task_param): redis = Custom_Redis(using='etl_manage') mysql = Custom_MySQL(using='hadoops2') mysql_etl = Custom_MySQL(using='etl_manage') mysql.begin() mysql_etl.begin() where = {'id': int(task_param['id'])} datas = {'load_status': 0} try: ''' 业务代码块放下方 ''' dir_param = {'game': task_param['game'], 'platform': task_param['platform'], 'log_date': task_param['log_date'], 'log_name': task_param['log_name']} filename_dict = {'log_name': task_param['log_name'], 'log_time': task_param['log_time'], 'source_ip': task_param['source_ip']} prefix_sql = task_param['prefix_sql'] post_sql = task_param['post_sql'] log_dir = "/%(game)s/%(platform)s/%(log_date)s/%(log_name)s/" % dir_param file_name = "%(log_name)s_%(source_ip)s_%(log_time)s.txt" % filename_dict #从redis中,获取当前数据对应存储到哪块磁盘 if redis.get("disk_xml") is None: disk_tmp = open('/data/etl_manage/conf/disk_game.xml', 'r') redis.set("disk_xml", str(disk_tmp.read())) disk_list = str(redis.get("disk_xml")) root = ET.fromstring(disk_list) disk = "" for gameinfo in root.findall('game'): if gameinfo.get('name') == task_param['game']: disk = gameinfo.get('disk') continue #local_log_dir = '/disk1/tmp_data'+log_dir local_log_dir = '/'+disk+'/data'+log_dir #判断要录入的文件是否存在,如果存在则执行,否则不执行 if os.path.exists('%s%s' % (local_log_dir, file_name)): ''' 将任务标识为开始执行:1 ''' datas['load_status'] = 1 mysql_etl.update('file2mysql_log', ' id = %(id)d' % where, **datas) mysql_etl.commit() logger.info('start load data') #执行前置sql if prefix_sql is not None and prefix_sql != '': mysql.query(prefix_sql) ''' 执行load数据进mysql ''' load_sql = task_param['load_sql'] load_sql = load_sql.replace('{dir_path}', local_log_dir+file_name) load_sql = load_sql.replace('{table_name}', task_param['table_name']) load_sql = load_sql.replace('{db_name}', task_param['db_name']) result = mysql.load(load_sql) logger.info('load data to mysql: {0}'.format(result['output'])) #判断录入mysql是否成功 if result['status'] == 0: #执行后置sql if post_sql is not None and post_sql != '': post_sql = post_sql.replace('{table_name}', task_param['table_name']) post_sql = post_sql.replace('{db_name}', task_param['db_name']) mysql.query(post_sql) ''' 将任务标识为录入mysql完成:3 ''' datas['load_status'] = 3 else: logger.error('Error Code %s : %s Cmd: %s' % (result['status'], result['output'], load_sql)) ''' 录入mysql失败,将任务标示为未执行:0 ''' datas['load_status'] = 0 ''' 将任务标示为:(模拟) 已从任务队列中移除 ''' datas['in_queue'] = 0 update_result = mysql_etl.update('file2mysql_log', ' id = %(id)d' % where, **datas) # 如果更新失败,则再调用一次,如果还是失败,则等待自动修复机制,但这种概率很小了 if update_result != 1: mysql_etl.update('file2mysql_log', ' id = %(id)d' % where, **datas) mysql_etl.commit() mysql.commit() mysql_etl.close() mysql.close() return True except Exception as exc: logger.error('file2mysql error: %s' % exc) mysql_etl.rollback() mysql.rollback() datas = {'in_queue': 0, 'load_status': 0} mysql_etl.update('file2mysql_log', ' id = %(id)d' % where, **datas) mysql_etl.commit() mysql_etl.close() mysql.close() raise self.retry(exc=exc, countdown=60)
def run_task(self, task_param): redis = Custom_Redis(using='etl_task') mysql = Custom_MySQL(using='etl_manage') mysql.begin() datas = {'status': 0} where = {'id': int(task_param['id'])} hive = Custom_Hive(using='hadoops2') task_key_exc = "" try: game = task_param['game'] platform = task_param['platform'] task_dict = {'log_name': task_param['log_name'], 'do_rate': task_param['do_rate'], 'log_date': task_param['log_date'], 'task_date': task_param['task_date']} task_key_tmp = "%(log_name)s%(do_rate)s%(log_date)s%(task_date)s" % task_dict task_key_md5 = hashlib.md5() task_key_md5.update(task_key_tmp) task_key_md5_result = task_key_md5.hexdigest() task_key = "dw2dm_"+str(game)+"_"+str(platform)+"_"+str(task_key_md5_result) task_key_exc = task_key ''' celery 本身的机制存在漏洞,会将一个已经完成任务再次分配给其他的worker,致使同一个任务执行多次 为防止此种现象,在任务开始执行时,将任务的“唯一标示”写入redis中,标注已执行 ''' #如果task_key is None, 则表示该条任务没有执行过,正常执行即可 #如果task_key = 0, 则表示该条任务上次执行失败,允许重复执行 if redis.get(task_key) == "0" or redis.get(task_key) is None: exec_sql = task_param['exec_sql'] log_date = (datetime.datetime.strptime(task_param['log_date'], '%Y%m%d')).strftime("%Y-%m-%d") #prefix_sql = task_param['prefix_sql'] #post_sql = task_param['post_sql'] index_dict = {'db_name': task_param['db_name'], 'table_name': task_param['table_name'], 'platform': task_param['platform'], 'log_date': log_date} index_dir_name = "%(db_name)s.db/%(table_name)s/plat_form=%(platform)s/log_date=%(log_date)s/" % index_dict #用于删除索引 del_index_dir_name = "%(db_name)s.db/%(table_name)s/plat_form=%(platform)s/log_date=%(log_date)s/" % index_dict if platform == 'all': index_dict = {'db_name': task_param['db_name'], 'table_name': task_param['table_name'], 'log_date': log_date} #用于建立索引(建立索引时,不能使用*通配符,所以仅指定到表名) index_dir_name = "%(db_name)s.db/%(table_name)s/" % index_dict #用于删除索引,虽然建立的时候指定到表名,但删除的时候,仅删除当天的 del_index_dir_name = "%(db_name)s.db/%(table_name)s/*/log_date=%(log_date)s/*" % index_dict hql_conf = "set hive.exec.dynamic.partition.mode=nonstrict;" \ "set hive.exec.compress.output=true;" \ "set mapred.output.compress=true;" \ "set mapred.output.compression.codec=com.hadoop.compression.lzo.LzopCodec;" \ "set io.compression.codecs=com.hadoop.compression.lzo.LzopCodec; " #获取项目根路径:/data/etl_manage project_path = os.getcwd() ''' 将任务标示为开始执行:1 ''' datas['status'] = 1 datas['start_time'] = str(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) mysql.update('dw2dm_log', ' id = %(id)d' % where, **datas) mysql.commit() #在redis标注 任务正在执行 redis.set(task_key, 1) #执行正式的 select xx insert xx 语句 result = hive.select_insert(hql_conf+exec_sql) logger.info('exec exec_sql: select xxx insert xxx {0}'.format(result['output'])) if result['status'] != 0: logger.error('Error Code %s : %s Cmd: %s' % (result['status'], result['output'], exec_sql)) ''' 执行失败,将其状态标为未执行:0 ''' datas['status'] = 0 #在redis标注 任务未执行 redis.set(task_key, 0) else: ''' 将任务标示为执行hive ql, select xxx insert xxx完成:2 ''' datas['status'] = 2 datas.pop('start_time') mysql.update('dw2dm_log', ' id = %(id)d' % where, **datas) mysql.commit() ''' 建立索引,否则lzo将不支持split ''' cmd_index = '/bin/bash %s/dw2dm/create_lzo_indexer.sh %s' % (project_path, index_dir_name) logger.info('create lzo index: {0}'.format(cmd_index)) index_result = cmd.run(cmd_index) if index_result['status'] != 0: logger.error('Error Code %s : %s Cmd: %s' % (index_result['status'], index_result['output'], cmd_index)) #在redis标注 任务未执行 redis.set(task_key, 0) else: if "create index success" in index_result['output']: ''' 将任务标识为建立lzo索引完成:3 ''' datas['status'] = 3 datas['end_time'] = str(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) #在redis标注 任务已完成且成功 redis.set(task_key, 2) else: ''' 如果load数据失败,则删除半途出现错误的文件,防止hive查询的时候报错 ''' cmd_remove = '/bin/bash %s/dw2dm/remove_damaged_file.sh %s' % (project_path, del_index_dir_name) logger.info('remove damaged files: {0}'.format(cmd_remove)) remove_result = cmd.run(cmd_remove) if remove_result['status'] != 0: logger.error('Error Code %s : %s Cmd: %s' % (remove_result['status'], remove_result['output'], cmd_remove)) #在redis标注 任务未执行 redis.set(task_key, 0) #如果task_key=2, 则标示该条任务已经运行成功 elif redis.get(task_key) == "2": datas['status'] = 3 #该条任务正在运行中 else: return True ''' 执行完毕,模拟从队列中清除任务:0 ''' datas['in_queue'] = 0 update_result = mysql.update('dw2dm_log', ' id = %(id)d' % where, **datas) # 如果更新失败,则再调用一次,如果还是失败,则等待自动修复机制,但这种概率很小了 if update_result != 1: mysql.update('dw2dm_log', ' id = %(id)d' % where, **datas) mysql.commit() mysql.close() return True except Exception as exc: logger.error('dw2dm_log error: %s' % exc) redis.set(task_key_exc, 0) mysql.rollback() datas = {'in_queue': 0, 'status': 0} mysql.update('dw2dm_log', ' id = %(id)d' % where, **datas) mysql.commit() mysql.close() raise self.retry(exc=exc, countdown=60)
#coding=utf-8 """ auth: suguoxin mail: [email protected] create_time: 2016-01-01 11:20:00 used: dm2report 任务执行 last_update:2016-04-28 14:48:00 """ import datetime import sys from dm2report.tasks import run_task from custom.db.mysql import Custom_MySQL mysql = Custom_MySQL(using='etl_manage') game = sys.argv[1] platform = sys.argv[2] do_rate = sys.argv[3] now_time = sys.argv[4] now_time = datetime.datetime.strptime(now_time, '%Y-%m-%d %H:%M:%S') #数据日期,格式如:20151015 log_date = now_time.strftime('%Y%m%d') tasks = mysql.query( 'select * from dm2report_log where do_rate="%s" and task_date="%s" and game="%s" ' 'and platform="%s" and exec_num<4 and in_queue=0 and status=0' '' % (do_rate, log_date, game, platform))
class CopyConfig(): def __init__(self): self.mysql = Custom_MySQL(using='etl_manage') self.source_game = 'ares' def get_all_task(self,task_name): condition = 'game = "%s" ' % self.source_game if task_name is not None: condition += 'and task_name="%s"' % task_name ##appstoremix is_delete = 0 and task_list = self.mysql.query("select * from dw2dm where platform='all' and %s" % (condition)) return task_list def get_structure(self,id,game,plat_form): ''' 获取当前游戏的,参数structure.如不存在则会添加 :param id: :param game: :param plat_form: :return: ''' structure=self.mysql.get("select * from structure where is_delete=0 and id=%s",id) if structure!=None: t_structure=[ structure['type'], structure['flag'], structure['db_type'], game, plat_form, #structure['platform'], #'db_name':structure['db_name'], structure['table_name'], structure['column_name'], ##structure['partition_name'], ##structure['partition_rule'], ##structure['index_name'], structure['create_table_sql'], structure['user_id'], 0, datetime.datetime.today().strftime("%Y-%m-%d") ] game_db=None if structure['type']!=None and str(structure['type']).__eq__('dw'): game_db='%s_dw' % game t_structure.append(game_db) elif structure['type']!=None and str(structure['type']).__eq__('dm'): game_db='%s_dm' % game t_structure.append(game_db) elif structure['type']!=None and str(structure['type']).__eq__('report'): game_db='report_%s' % game t_structure.append(game_db) exis_row=self.mysql.query("select id from structure where platform='%s' and is_delete=0 and db_name='%s' and platform='all' and table_name='%s' and db_type='%s'"%(plat_form,game_db,str(structure['table_name']),str(structure['db_type']))) if len(exis_row)>0: return int(exis_row[0]['id']) else: return self.save_newstructure(t_structure) def save_new_task(self,task): self.mysql.insert("dw2dm",**task) self.mysql.commit() def save_newstructure(self,structure): query='INSERT INTO structure(type,flag,db_type,game,platform,table_name,column_name,create_table_sql,user_id,is_delete,create_date,db_name) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)' rowNum=self.mysql.execute(query,*tuple(structure)) self.mysql.commit() return rowNum def run(self,game,task_name=None,plat_form="all"): print "start copy" task_list = self.get_all_task(task_name) for task in task_list: form_ids="" for form_id_str in task['from_id'].split(","): if len(str(form_ids))>0: form_ids=form_ids+","+str(self.get_structure(int(form_id_str),game,plat_form)) else: form_ids=str(self.get_structure(int(form_id_str),game,plat_form)) target_id=self.get_structure(int(task['target_id']),game,plat_form) t_task = { 'game':game, ##'platform':task['platform'], 'platform':plat_form, 'log_name':task['log_name'], 'do_rate':task['do_rate'], 'priority':task['priority'], 'prefix_sql':task['prefix_sql'], 'exec_sql':task['exec_sql'].replace("%s_dw" % self.source_game,"%s_dw" % game).replace("%s_dm" % self.source_game,"%s_dm" % game), 'post_sql':task['post_sql'], 'from_id':form_ids, 'target_id':target_id, 'create_date':datetime.datetime.today().strftime("%Y-%m-%d"), 'comment':task['comment'], 'grouped':task['grouped'], 'is_delete':task['is_delete'], 'user_id':task['user_id'] } self.save_new_task(t_task) self.mysql.close() print "over" def add_structure(self,game,plat_form): platforms_str=plat_form.split(",") structures=self.mysql.query("select * from structure where platform='all' and is_delete=0 and flag='log' and game='ares' and type in ('report','dm')") for structure in structures: for platform in platforms_str: t_structure=[ structure['type'], structure['flag'], structure['db_type'], game, platform, #structure['platform'], #'db_name':structure['db_name'], structure['table_name'], structure['column_name'], ##structure['partition_name'], ##structure['partition_rule'], ##structure['index_name'], structure['create_table_sql'], structure['user_id'], 0, datetime.datetime.today().strftime("%Y-%m-%d") ] game_db=None if structure['type']!=None and str(structure['type']).__eq__('dw'): game_db='%s_dw' % game elif structure['type']!=None and str(structure['type']).__eq__('dm'): game_db='%s_dm' % game elif structure['type']!=None and str(structure['type']).__eq__('report'): game_db='report_%s' % game t_structure.append(game_db) self.save_newstructure(t_structure)
#coding=utf-8 """ auth: suguoxin mail: [email protected] create_time: 2015-11-16 15:28:00 used: 执行file2mysql任务 last_update: 2016-04-28 14:57:00 """ import sys import datetime from file2mysql.tasks import run_task from custom.db.mysql import Custom_MySQL mysql = Custom_MySQL(using='etl_manage') #执行频次,通过命令行参数获得 game = sys.argv[1] platform = sys.argv[2] do_rate = sys.argv[3] now_time = sys.argv[4] now_time = datetime.datetime.strptime(now_time, '%Y-%m-%d %H:%M:%S') #数据日期,格式如:20151015 log_date = now_time.strftime('%Y%m%d') #数据时间点(每五分钟),格式如:0005、2400 log_time = now_time.strftime("%H%M") sql = "select distinct a.* from (select * from file2mysql_log where game='%s' and platform='%s' and load_status=0 " \ "and do_rate='%s' and task_date='%s' and in_queue=0 and exec_num<4 and retry_num <6 and log_time<='%s') as a left outer join " \
class CopyConfig(): def __init__(self): self.mysql = Custom_MySQL(using='etl_manage') self.source_game = 'ares' def get_all_task(self, task_name): condition = 'game = "%s" ' % self.source_game if task_name is not None: condition += 'and task_name="%s"' % task_name task_list = self.mysql.query( "select * from dm2report where is_delete = 0 and %s" % condition) return task_list def get_structure(self, id, game): structure = self.mysql.get( "select * from structure where is_delete=0 and id=%s", id) if structure != None: t_structure = [ structure['type'], structure['flag'], structure['db_type'], game, structure['platform'], #'db_name':structure['db_name'], structure['table_name'], structure['column_name'], ##structure['partition_name'], ##structure['partition_rule'], ##structure['index_name'], structure['create_table_sql'], structure['user_id'], 0, datetime.datetime.today().strftime("%Y-%m-%d") ] game_db = None if structure['db_type'] != None and str( structure['db_type']).__eq__('hive'): game_db = '%s_dw' % game t_structure.append(game_db) elif structure['db_type'] != None and str( structure['db_type']).__eq__('mysql'): game_db = 'report_%s' % game t_structure.append(game_db) exis_row = self.mysql.query( "select id from structure where platform='all' and user_id='wxx' and is_delete=0 and db_name='%s' and table_name='%s' and db_type='%s'" % (game_db, str( structure['table_name']), str(structure['db_type']))) if len(exis_row) > 0: return int(exis_row[0]['id']) else: return self.save_newstructure(t_structure) def save_new_task(self, task): self.mysql.insert("dm2report", **task) self.mysql.commit() def save_newstructure(self, structure): query = 'INSERT INTO structure(type,flag,db_type,game,platform,table_name,column_name,create_table_sql,user_id,is_delete,create_date,db_name) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)' rowNum = self.mysql.execute(query, *tuple(structure)) self.mysql.commit() return rowNum def run(self, game, task_name=None): print "start copy" task_list = self.get_all_task(task_name) for task in task_list: form_id = self.get_structure(int(task['from_id']), game) target_id = self.get_structure(int(task['target_id']), game) t_task = { 'game': game, 'platform': task['platform'], 'task_name': task['task_name'], 'date_cycle': task['date_cycle'], 'do_rate': task['do_rate'], 'group': task['group'], 'priority': task['priority'], 'prefix_sql': task['prefix_sql'], 'exec_sql': task['exec_sql'].replace("%s_dw" % self.source_game, "%s_dw" % game), 'post_sql': task['post_sql'], 'from_id': form_id, 'target_id': target_id, 'create_date': datetime.datetime.today().strftime("%Y-%m-%d"), 'comment': task['comment'] } self.save_new_task(t_task) self.mysql.close() print "over"