Пример #1
0
 def get_monitor():
     n = Custom_MySQL(using='center_app')
     
     return n.query('select public_ip as ip,22 as port,"playcrab" as  user,pwd  \
                     from assets \
                     where \
                     public_ip ="115.29.10.48" \
                    ')
Пример #2
0
 def get_assets():
     n = Custom_MySQL(using='center_app')
     
     return n.query('select public_ip as ip,22 as port,"playcrab" as  user,pwd  \
                     from assets \
                     where \
                     public_ip !="" and public_ip !="NULL" \
                     and public_ip not in("115.29.12.230","115.29.12.219","49.213.111.2","49.213.111.3","49.213.111.4","49.213.111.5","49.213.111.6") \
                     order by id')
Пример #3
0
def main():
   tables = excel_table_byindex()
   
   db = Custom_MySQL(using='center_app')
   
   for row in tables:
       if row['pub'] != "":
           param ={}
           param['pwd'] = row['pwd']
           db.update('assets','public_ip="%s"'%row['pub'],**param)
Пример #4
0
 def get_result(self):
     '''
     获取子进程执行结果
     '''
     db = Custom_MySQL(using='log')
     result = db.query('select ip,result as data,IF(flag=2,1,0) as flag \
                        from batch_detail \
                        where batch_id ="%s"'% self.batch_id )
     db.close()
     print '===========', len(result),'================'
     self.result = result
Пример #5
0
 def assets2appinfo(self):
     
     db = Custom_MySQL(using='center_app')
     #将assets中有的,但是在app_info没有的,插入到app_info中
     sql ='''
         INSERT into app_info (assets_id,name,public_ip,inner_ip,main_category_id)
         SELECT id,hostname as name,public_ip,inner_ip,main_category_id
         from 
         assets
         where public_ip 
         not in (select public_ip from app_info )
         '''
     db.execute(sql) 
     db.commit()
Пример #6
0
 def call_back(self,data=[]):
     
     
     db = Custom_MySQL(using='log')
     
     sql = "insert into last_ip (id,ip,login_ip) values(null,%s,%s)"
     
     for result in data:
         
         if int(result['flag']) == 1:
  
             ips = json.loads(result['data'])
             host = result['ip']
 
             set_param =[]
             for ip in ips:
                 set_param.append(tuple([host,ip]))
             db.executemany(sql,set_param)
             db.commit()
         else:
             print result
Пример #7
0
    def call_back(self,data=[]):
        
        db = Custom_MySQL(using='center_app')

        for result in data:

            if result !=[] and result['flag'] == '1':
                
                param = {}
                param['is_manage'] = 1
                db.update('assets', 'public_ip="%s"' %  result['ip'], **param)
                db.commit()
                
            else:
                print result
                param = {}
                param['is_manage'] = 0
                db.update('assets', 'public_ip="%s"' %  result['ip'], **param)
                db.commit()
Пример #8
0
 def run(self):
     '''
     业务入口
     '''
     api_data = self.get_api_data()
     if api_data == None:return 
     
     db = Custom_MySQL(using='center_app')
     
     #更新大类中的应用类型
     app_type ={}
     app_type ={'app_type':','.join(api_data['type'])}
     db.update('main_category',' prefix="%s" ' % self.game_code,**app_type)
     
     
     game = db.get('select id from main_category where prefix="%s"' % self.game_code)
     main_category_id = game['id'] 
                   
     #获取区组信息
     for dist in  api_data['dists']:
         
         print '========'+dist['name']+'+'+dist['code']
         
         sql ='select count(id) as count from  sub_category where main_category_id ='+str(main_category_id)+ ' and name="'+dist['name']+'"'
         count = db.count(sql)
         if count==None:
             print 'SQL Error:%s'% sql 
             return False
         
         #区组更新内容
         dist_data ={}
         dist_data ={'prefix':dist['code'],
                     'main_category_id':main_category_id,
                     'name':dist['name'],
                     'platform':self.platform}
         
         #如果没有区组信息则保存  
         if count['count'] == 0:
              db.insert('sub_category',**dist_data)
         else:
              db.update('sub_category',' main_category_id ='+str(main_category_id)+ ' and name="'+dist['name']+'"',**dist_data)
Пример #9
0
class AddPartition():
    def __init__(self):
        #self.mysql= {'report_ares': Custom_MySQL(using='report_ares'),'report_hebe': Custom_MySQL(using='report_hebe'),'report_crius': Custom_MySQL(using='report_crius')}
         self.mysql=Custom_MySQL(using='hadoops2') 
    def execPartitons(self,games,tables,start_day,end_day):
        ##conv db
        for game in games:
            db="report_"+game
            for table in tables:
                self.mysql.begin();
                do_date=start_day
                i=0;
                exec_partions_sql="ALTER TABLE "+db+"."+table+" ADD PARTITION ("
                patition_sql="";
                while do_date <= end_day:
                    i = i + 1
                    partition_name="p"+str(do_date).replace('-','');
                    is_exist=self.find_partition(db,table,partition_name)
                    if not is_exist:
                        patition_sql=patition_sql+"PARTITION %s VALUES LESS THAN (to_days('%s')),"%(partition_name,do_date)
                        #print patition_sql
                    do_date = start_day + datetime.timedelta(days = i)
                if len(patition_sql)>0:
                    replace_reg = re.compile(r',$')
                    print "add partition db:%s table:%s ,start_day:%s,end_day:%s"%(db,table,start_day,end_day)
                    sql=exec_partions_sql+replace_reg.sub('', patition_sql)+");"
                    print sql
                    self.mysql.execute(sql)
                    self.mysql.commit();

    def add_months(self,sourcedate,months):
        month = sourcedate.month - 1 + months
        year = int(sourcedate.year + month / 12 )
        month = month % 12 + 1
        day = min(sourcedate.day,calendar.monthrange(year,month)[1])
        return datetime.date(year,month,day)
    def find_partition(self,db,table_name,partition_name):
       # exis_row=self.mysql.query("select partition_name,partition_expression, partition_description,table_rows  from information_schema.partitions"\
        #                          " where table_schema = schema() and table_schema='%s' and table_name='%s' and partition_name='%s';"%(db,table_name,partition_name))
        exis_row=self.mysql.query("select partition_name,partition_expression, partition_description,table_rows  from information_schema.partitions"\
                                  " where table_schema='%s' and table_name='%s' and partition_name='%s';"%(db,table_name,partition_name))
        if len(exis_row)>0:
            print "exis partitons db:%s,table:%s,p_name:%s"%(db,table_name,partition_name)
            return True
        return False
Пример #10
0
 def call_back(self,data=[]):
         
     db = Custom_MySQL(using='log')   
     last_insert_id =  db.insert('cpu_batch',**{})
     db.commit()
     
     sql = "insert into cpu_detail (id,ip,cpu_used) values(null,%s,%s)"
     
     for result in data:
         
         if int(result['flag']) == 1:
  
             ips = json.loads(result['data'])
             host = result['ip']
 
             set_param =[]
             for ip in ips:
                 set_param.append(tuple([host,ip]))
             db.executemany(sql,set_param)
             db.commit()
         else:
             print result
Пример #11
0
    def run(self):
        '''
        业务入口
        '''
        try:
            #资产就是资产吧 不做为一个应用出现了,运维管理的是资产服务器,也就是应用的公网
            #self.assets2appinfo()
            
            api_data = self.get_api_data()
            if api_data == None:return 
            
            db = Custom_MySQL(using='center_app')    
       
            game = db.get('select id from main_category where prefix="%s"' % self.game_code)
            
            main_category_id = game['id']               
            #获取区组信息
            for dist in  api_data['dists']:
                
                print '========'+dist['name']+'+'+dist['code']
                
                sql ='select id from  sub_category where main_category_id ='+str(main_category_id)+ ' and name="'+dist['name']+'"'
                sub_category_id = db.get(sql)['id']
                
                
                #将各区组不共用信息入数据库
                for app in dist['ips']:
                    self.insert_db(main_category_id,sub_category_id,self.platform,app)
                            
                #处理共用信息     
                #for app in api_data['global']:
                    
                    
 
            #更新资产id
            db.execute('update app_info as a left join assets as b on a.public_ip = b.public_ip set a.assets_id = b.id where a.public_ip is not NULL')                              
            db.execute('update app_info as a left join assets as b on a.inner_ip = b.inner_ip set a.assets_id = b.id where a.inner_ip is not NULL')                              
        
        except Exception as e:
            print e
Пример #12
0
 def save_idcs(self):
     
     if self.data == None:
         return False
     
     db = Custom_MySQL(using='center_app')
     
     
     for  param in  self.data:
         
         sql = 'select count(*) as count from idc where prefix = %s'
         p = (param['prefix'],)
         count = db.count(sql, *p)
         # 检查是否存在
         if count['count'] == 0:      
             db.insert('idc', **param)
         else:
             db.update('idc', 'prefix="%s"' % param['prefix'], **param) 
Пример #13
0
    def save_hosts(self):
        
        if self.data == None:
            return False
                
        db = Custom_MySQL(using='center_app')

        for param in self.data:

            sql ='select count(*) as count from assets where wxsn= %s'
            p =(param['wxsn'],)
            count = db.count(sql,*p)

 
            if count['count'] == 0:
                db.insert('assets',**param)
            else:
                db.update('assets','wxsn="%s"'%param['wxsn'],**param)  
Пример #14
0
 def run(self):
     db = Custom_MySQL(using='log')
     status = {'flag':1}
     db.update('batch_detail',
               'batch_id="%s" and ip ="%s"'  % (self.host['batch_id'],self.host['ip']),
               **status)
     db.commit()
     db.close()
     try:
          
         #建立连接
         self.ssh=paramiko.SSHClient()
         
         #如果没有密码就走public key
         if self.host.get('pwd',True) == True:
             privatekeyfile = os.path.expanduser('/root/.ssh/id_rsa')
             paramiko.RSAKey.from_private_key_file(privatekeyfile)
 
         #缺失host_knows时的处理方法
         known_host = "/root/.ssh/known_hosts"
         self.ssh.load_system_host_keys(known_host)
         self.ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
 
         #os.system('/opt/local/junos/junos')
         #连接远程客户机器
         self.ssh.connect(
                     hostname =self.host['ip'],
                     port     =int(self.host['port']),
                     username =self.host['user'],
                     password =self.host['pwd'],
                     compress =True,
                     timeout  =20
                     )
      
         #获取远程命令执行结果
         stdin, stdout, stderr = self.ssh.exec_command(self.host['cmd'],bufsize=65535, timeout=10)
         temp = stdout.readlines()
         
         db = Custom_MySQL(using='log')
         status = {'flag':2,'result':''.join(temp)}
         db.update('batch_detail',
                   'batch_id="%s" and ip ="%s"' % (self.host['batch_id'],self.host['ip']),**status)
         db.commit()
         db.close()
        
         
         if temp ==[]:
 
             self.grandchild.put({'flag':'1','ip':self.host['ip'],'data':temp})
         else:
             self.grandchild.put({'flag':'0','ip':self.host['ip'],'data':temp})
         #输出执行结果
         self.ssh.close()
         
     except  :
         #print trace_back()
         #以防paramiko本身出问题,这里再用shell运行一次,如果还出问题再确认为问题
         cmd ="ssh -p %s -o StrictHostKeyChecking=no %s@%s  %s"%(self.host['port'],self.host['user'],self.host['ip'],self.host['cmd'])
         (status,output) = commands.getstatusoutput(cmd)
         if status == 0:
             db = Custom_MySQL(using='log')
             status = {'flag':2,'result':output}
             db.update('batch_detail',
                       'batch_id="%s" and ip ="%s"' % (self.host['batch_id'],self.host['ip']),
                       **status)
             db.commit()
             db.close()
         
             self.grandchild.put({'flag':'1','ip':self.host['ip'],'data':output})
         else:
             
             db = Custom_MySQL(using='log')
             status = {'flag':-1,'result':'faild'}
             db.update('batch_detail',
                       'batch_id="%s" and ip ="%s"' % (self.host['batch_id'],self.host['ip']),
                       **status)
             db.commit()
             db.close()
         
             self.grandchild.put({'flag':'0','ip':self.host['ip'],'data':trace_back()})
Пример #15
0
def run_task(self, task_param):

    mysql = Custom_MySQL(using='etl_manage')
    mysql.begin()
    datas = {'etl_status': 0}
    where = {'id': int(task_param['id'])}

    try:
        log_param = {
            'game': task_param['game'],
            'platform': task_param['platform'],
            'log_date': task_param['log_date'],
            'log_name': task_param['log_name'],
            'log_dir': task_param['log_dir'],
            'col_num': task_param['col_num']
        }
        log_name_param = {
            'log_name': task_param['log_name'],
            'source_ip': task_param['source_ip'],
            'log_time': task_param['log_time']
        }

        do_rate = task_param['do_rate']
        flag = task_param['flag']

        log_dir = '%(log_dir)s/%(game)s/%(platform)s/%(log_date)s/%(log_name)s' % log_param
        log_name = '%(log_name)s_%(source_ip)s_%(log_time)s' % log_name_param
        log_name_notime = '%(log_name)s_%(source_ip)s' % log_name_param
        col_num = task_param['col_num']

        project_path = os.getcwd()

        #判断如果有md5文件和数据文件同时存在,则开始执行
        if (do_rate == "1day" and flag == "log") or (
                os.path.exists('%s/%s.log.md5' % (log_dir, log_name)) is True
                and os.path.exists('%s/%s.log' % (log_dir, log_name)) is True):

            #排除同名文件存在的可能,同时为修复执行提供方便
            if os.path.exists('%s/%s.txt' % (log_dir, log_name)):
                cmd_remove = 'rm -f %s/%s.txt*' % (log_dir, log_name)
                logger.info('remove history file: {0}'.format(cmd_remove))
                remove_result = cmd.run(cmd_remove)
                if remove_result['status'] != 0:
                    logger.error('Error Code %s : %s  Cmd: %s' %
                                 (remove_result['status'],
                                  remove_result['output'], cmd_remove))
            '''
            将任务标识为开始执行:1
            '''
            datas['etl_status'] = 1
            mysql.update('etl_data_log', ' id = %(id)d' % where, **datas)
            mysql.commit()
            '''
            校验数据
            '''
            #如果是“天”频次的日志数据,则特殊处理,直接将同一ip的合成一个文件,同时执行校验
            if do_rate == "1day" and flag == "log":
                cmd_merge = '/bin/bash %s/etl_data/merge_data.sh %s %s %s' % (
                    project_path, log_dir, log_name_notime, col_num)
                logger.info('check data: {0}'.format(cmd_merge))
                merge_result = cmd.run(cmd_merge)
            else:
                cmd_merge = '/bin/bash %s/etl_data/check_data.sh %s %s %s' % (
                    project_path, log_dir, log_name, col_num)
                logger.info('check data: {0}'.format(cmd_merge))
                merge_result = cmd.run(cmd_merge)

            if merge_result['status'] != 0:
                logger.error('Error Code %s : %s  Cmd: %s' %
                             (merge_result['status'], merge_result['output'],
                              cmd_merge))
                datas['etl_status'] = 0
            else:
                '''
                读取校验格式后的文件总条数
                '''
                row = open('%s/%s.txt.row' % (log_dir, log_name)).read()
                '''
                将文件总条数写入数据库,并将任务标识为为校验已完成:2
                '''
                datas['etl_status'] = 2
                datas['row_num'] = int(row)
                #datas = {'etl_status': 2, 'row_num': int(row)}
                mysql.update('etl_data_log', ' id = %(id)d' % where, **datas)
                mysql.commit()
                '''
                压缩数据
                '''
                cmd_compress = '/bin/bash %s/etl_data/compress_data.sh %s %s' % (
                    project_path, log_dir, log_name)
                logger.info('compress data: {0}'.format(cmd_compress))
                compress_result = cmd.run(cmd_compress)
                if compress_result['status'] != 0:
                    logger.error('Error Code %s : %s  Cmd: %s' %
                                 (compress_result['status'],
                                  compress_result['output'], cmd_compress))
                    datas['etl_status'] = 0
                else:
                    '''
                    将任务标识为压缩完成:4
                    '''
                    datas['etl_status'] = 4
                    datas.pop('row_num')
                    mysql.update('etl_data_log', ' id = %(id)d' % where,
                                 **datas)
                    mysql.commit()
                    '''
                    生成MD5文件
                    '''
                    cmd_md5 = '/bin/bash %s/etl_data/md5_data.sh %s %s' % (
                        project_path, log_dir, log_name)
                    logger.info('md5 data: {0}'.format(cmd_md5))
                    md5_result = cmd.run(cmd_md5)

                    if md5_result['status'] != 0:
                        logger.error('Error Code %s : %s  Cmd: %s' %
                                     (md5_result['status'],
                                      md5_result['output'], cmd_md5))
                        datas['etl_status'] = 0
                    else:
                        '''
                        将任务标识为生成MD5完成(即为校验、合并、压缩均已完成):6
                        '''
                        datas['etl_status'] = 6
        '''
        执行完毕,模拟从队列中清楚任务:0
        '''
        datas['in_etl_queue'] = 0
        update_result = mysql.update('etl_data_log', ' id = %(id)d' % where,
                                     **datas)
        # 如果更新失败,则再调用一次,如果还是失败,则等待自动修复机制,但这种概率很小了
        if update_result != 1:
            mysql.update('etl_data_log', ' id = %(id)d' % where, **datas)

        mysql.commit()
        mysql.close()
        return True

    except Exception as exc:
        logger.error('etl_data error: %s' % exc)
        mysql.rollback()

        datas = {'in_etl_queue': 0, 'etl_status': 0}
        mysql.update('etl_data_log', ' id = %(id)d' % where, **datas)
        mysql.commit()

        mysql.close()
        raise self.retry(exc=exc, countdown=30)
Пример #16
0
def run_task(self, task_param):

    mysql = Custom_MySQL(using='hadoops2')
    mysql_etl = Custom_MySQL(using='etl_manage')
    redis = Custom_Redis(using='etl_task')

    mysql.begin()
    mysql_etl.begin()

    datas = {'status': 0}
    where = {'id': int(task_param['id'])}

    task_key_exc = ""

    try:
        hive = Custom_Hive(using='ares_dw')

        game = task_param['game']
        platform = task_param['platform']
        table_name = task_param['table_name']
        log_date = task_param['log_date']
        prefix_sql = task_param['prefix_sql']
        exec_sql = task_param['exec_sql']
        post_sql = task_param['post_sql']
        date_cycle = task_param['date_cycle']
        random_str = str(random.randint(0, 999999999))
        stimes = str(int(time.time()))

        task_date = task_param['task_date']
        task_name = task_param['task_name']
        do_rate = task_param['do_rate']

        #task_key_tmp = str(game)+str(platform)+str(task_name)+str(date_cycle)+str(do_rate)+str(log_date)+str(task_date)
        task_key_tmp = str(task_name) + str(date_cycle) + str(do_rate) + str(
            log_date) + str(task_date)

        task_key_md5 = hashlib.md5()
        task_key_md5.update(task_key_tmp)
        task_key_md5_result = task_key_md5.hexdigest()

        task_key = "dm2report_new_" + str(game) + "_" + str(
            platform) + "_" + str(task_key_md5_result)
        task_key_exc = task_key
        '''
        celery 本身的机制存在漏洞,会将一个已经完成任务再次分配给其他的worker,致使同一个任务执行多次
        为防止此种现象,在任务开始执行时,将任务的“唯一标示”写入redis中,标注已执行
        '''
        #如果task_key is None, 则表示该条任务没有执行过,正常执行即可
        #如果task_key = 0, 则表示该条任务上次执行失败,允许重复执行
        if redis.get(task_key) == "0" or redis.get(task_key) is None:

            tmp_file_dir = "/tmp/tmp/%s/%s/%s" % (game, platform, log_date)
            #创建本地目录
            if not os.path.exists(tmp_file_dir):
                os.makedirs(tmp_file_dir)

            tmp_file = "%s/%s_%s_%s_%s.txt" % (tmp_file_dir, table_name,
                                               date_cycle, random_str, stimes)
            hql_conf = "SET hive.support.concurrency=false;" \
                       "SET hive.exec.compress.output=true;" \
                       "SET mapred.output.compression.codec=com.hadoop.compression.lzo.LzopCodec; "
            '''
            将任务标示为开始执行:1
            '''
            datas['status'] = 1
            datas['start_time'] = str(
                datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
            mysql_etl.update('dm2report_new_log', ' id = %(id)d' % where,
                             **datas)
            mysql_etl.commit()
            #在redis标注 任务正在执行
            redis.set(task_key, 1)

            #执行前置sql,将数据临时写入本地,用于mysql load数据
            if prefix_sql is not None:
                result = mysql.delete_by_sql(prefix_sql)
                logger.info('exec prefix_sql: delete old data {0}'.format(
                    result['output']))

                if result['status'] != 0:
                    logger.error(
                        'Error Code %s : %s  Cmd: %s' %
                        (result['status'], result['output'], prefix_sql))
                    '''
                    执行失败,将其状态标为未执行:0
                    '''
                    datas['status'] = 0
                    #在redis标注 任务未开始执行
                    redis.set(task_key, 0)
                else:
                    '''
                    将任务标示为删除临时文件完成:2
                    '''
                    datas['status'] = 2
                    datas.pop('start_time')
                    mysql_etl.update('dm2report_new_log',
                                     ' id = %(id)d' % where, **datas)
                    mysql_etl.commit()
                    '''
                    开始执行hive ql,将数据dump到本地
                    '''
                    result = hive.dump(hql_conf + exec_sql, tmp_file)
                    logger.info('exec exec_sql: dump data {0}'.format(
                        result['output']))

                    if result['status'] != 0 or False == os.path.exists(
                            '%s' % tmp_file):
                        logger.error(
                            'Error Code %s : %s  Cmd: %s' %
                            (result['status'], result['output'], exec_sql))
                        #在redis标注 任务未开始执行
                        redis.set(task_key, 0)
                    else:
                        '''
                        将任务标示为dump hive数据完成:3
                        '''
                        datas['status'] = 3
                        datas['tmp_file_name'] = tmp_file
                        mysql_etl.update('dm2report_new_log',
                                         ' id = %(id)d' % where, **datas)
                        mysql_etl.commit()

                        #执行后置sql
                        if post_sql is not None:
                            post_sql = post_sql.replace('{dir_path}', tmp_file)
                            post_sql = post_sql.replace(
                                '{table_name}', task_param['table_name'])
                            post_sql = post_sql.replace(
                                '{db_name}', task_param['db_name'])

                            result = mysql.load(post_sql)
                            logger.info(
                                'exec post_sql: load data to hdfs {0}'.format(
                                    result['output']))

                            if result['status'] != 0:
                                logger.error('Error Code %s : %s  Cmd: %s' %
                                             (result['status'],
                                              result['output'], post_sql))
                                #在redis标注 任务未开始执行
                                redis.set(task_key, 0)
                            else:
                                '''
                                将任务标识为录入mysql完成:4
                                '''
                                datas['status'] = 4
                                datas['end_time'] = str(
                                    datetime.datetime.now().strftime(
                                        '%Y-%m-%d %H:%M:%S'))
                                datas.pop('tmp_file_name')
                                #在redis标注 任务已成功执行
                                redis.set(task_key, 2)
            else:
                logger.error('prefix_sql is null')
                datas['status'] = 0
                #在redis标注 任务未开始执行
                redis.set(task_key, 0)

        #如果task_key=2, 则标示该条任务已经运行成功
        elif redis.get(task_key) == "2":
            datas['status'] = 4
        #该条任务正在运行中
        else:
            return True
        '''
        将任务标示为:(模拟) 已从任务队列中移除
        '''
        datas['in_queue'] = 0
        update_result = mysql_etl.update('dm2report_new_log',
                                         ' id = %(id)d' % where, **datas)
        # 如果数据库更新失败,再调用一次。 如果还是失败,等待自动修复机制,但这样的概率应该很小了。
        if update_result != 1:
            mysql_etl.update('dm2report_new_log', ' id = %(id)d' % where,
                             **datas)

        mysql_etl.commit()
        mysql.commit()
        mysql_etl.close()
        mysql.close()

        return True

    except Exception as exc:
        logger.error('dm2report error: %s' % exc)
        mysql_etl.rollback()
        mysql.rollback()
        redis.set(task_key_exc, 0)

        datas = {'in_queue': 0, 'status': 0}
        mysql_etl.update('dm2report_new_log', ' id = %(id)d' % where, **datas)
        mysql_etl.commit()

        mysql_etl.close()
        mysql.close()
        raise self.retry(exc=exc, countdown=60)
Пример #17
0
def run_task(self, task_param):

    mysql = Custom_MySQL(using='etl_manage')
    mysql.begin()

    where = {'id': int(task_param['id'])}

    try:
        dir_param = {'game': task_param['game'], 'platform': task_param['platform'],
                     'log_date': task_param['log_date'], 'log_name': task_param['log_name']}
        filename_dict = {'log_name': task_param['log_name'], 'log_time': task_param['log_time'], 'source_ip':task_param['source_ip']}
        index_dict = {'db_name': task_param['db_name'], 'table_name': task_param['table_name'], 'platform': task_param['platform'],
                      'log_date': datetime.datetime.strptime(task_param['log_date'], '%Y%m%d').strftime("%Y-%m-%d")}
        partition = {'platform': task_param['platform'], 'log_date': datetime.datetime.strptime(task_param['log_date'], '%Y%m%d').strftime("%Y-%m-%d")}

        log_dir = "/%(game)s/%(platform)s/%(log_date)s/%(log_name)s/" % dir_param

        lzo_file_name = "%(log_name)s_%(source_ip)s_%(log_time)s.txt.lzo" % filename_dict
        index_dir_name = "%(db_name)s.db/%(table_name)s/plat_form=%(platform)s/log_date=%(log_date)s/" % index_dict
        partition_name = "plat_form='%(platform)s',log_date='%(log_date)s'" % partition
        project_path = os.getcwd()

        local_log_dir = '/disk1/tmp_data'+log_dir
        logger.info('local_log_dir: {0}'.format(local_log_dir))

        #判断要录入hive 中的文件知否存在,存在则执行
        if os.path.exists('%s%s' % (local_log_dir, lzo_file_name)):

            '''
            将任务标识为开始执行:1
            '''
            datas = {'load_status': 1}
            mysql.update('file2dw_log', ' id = %(id)d' % where, **datas)

            #执行load之前,删除同名文件,防止同一个文件出现两次的可能
            cmd_remove = '/bin/bash %s/file2dw/remove_damaged_file.sh %s %s' % (project_path, index_dir_name, lzo_file_name)
            logger.info('remove damaged files: {0}'.format(cmd_remove))
            remove_result = cmd.run(cmd_remove)

            if remove_result['status'] != 0:
                logger.error('Error Code %s : %s  Cmd: %s' % (remove_result['status'], remove_result['output'], cmd_remove))

            '''
            文件加载到hive中
            '''
            hive = Custom_Hive(using='ares_dw')
        
            load_sql = task_param['load_sql']
            load_sql = load_sql.replace('{dir_path}', local_log_dir+lzo_file_name)
            load_sql = load_sql.replace('{table_name}', task_param['table_name'])
            load_sql = load_sql.replace('{partition_name}', '%s' % partition_name)
            load_sql = load_sql.replace('{db_name}', task_param['db_name'])

            logger.info('hive load SQL: {0}'.format(load_sql))
            result = hive.load(load_sql)
            logger.info('hive load result {0}'.format(result['output']))

            if result['status'] == 0:
                '''
                将任务标识为加载文件完成:2
                '''
                datas = {'load_status': 2}
                mysql.update('file2dw_log', ' id = %(id)d' % where, **datas)

                '''
                建立索引,否则lzo将不支持split
                '''
                #print index_dir_name
                cmd_index = '/bin/bash %s/file2dw/create_lzo_indexer.sh %s %s' % (project_path, index_dir_name, lzo_file_name)
                logger.info('create lzo index: {0}'.format(cmd_index))
                index_result = cmd.run(cmd_index)

                if index_result['status'] != 0:
                    logger.error('Error Code %s : %s  Cmd: %s' % (index_result['status'], index_result['output'], cmd_index))
                else:
                    if "create index success" in index_result['output']:
                        '''
                        将任务标识为建立lzo索引完成:3
                        '''
                        datas = {'load_status': 3}
                        mysql.update('file2dw_log', ' id = %(id)d' % where, **datas)
                    else:
                        '''
                        如果load数据失败,则删除半途出现错误的文件,方式hive查询的时候报错
                        '''
                        cmd_remove = '/bin/bash %s/file2dw/remove_damaged_file.sh %s %s' % (project_path, index_dir_name, lzo_file_name)
                        logger.info('remove damaged files: {0}'.format(cmd_remove))
                        remove_result = cmd.run(cmd_remove)

                        if remove_result['status'] != 0:
                            logger.error('Error Code %s : %s Cmd: %s' % (remove_result['status'], remove_result['output'], cmd_remove))

            else:
                '''
                将任务标识为未启动,重新执行:0
                '''
                datas = {'load_status': 0}
                mysql.update('file2dw_log', ' id = %(id)d' % where, **datas)

        '''
        将任务标示为:(模拟) 已从任务队列中移除
        '''
        datas = {'in_queue': 0}
        update_result = mysql.update('file2dw_log', ' id = %(id)d' % where, **datas)
        # 如果更新失败,则再调用一次,如果还是失败,则等待自动修复机制,但这种概率很小了
        if update_result != 1:
            mysql.update('file2dw_log', ' id = %(id)d' % where, **datas)

        mysql.commit()
        mysql.close()
        return True

    except Exception as exc:
        print (exc)
        mysql.rollback()
        
        datas = {'in_queue': 0}
        mysql.update('file2dw_log', ' id = %(id)d' % where, **datas)
        mysql.commit()

        mysql.close()
        raise self.retry(exc=exc, countdown=60)
Пример #18
0
class CenterApp():
    def __init__(self):
        self.center_mysql = Custom_MySQL(using='center_app')
        
#     def get_server_list(self):
#         '''
#             获取执行任务
#         '''
#         try:
#             ip_list = []
#             if param['flag'] == 'basic':
#                 ip_list.append('120.26.1.250')
#                 
#             if param['flag'] == 'most':
#                 ip_list.append('120.26.13.150')
#             
#             if param['flag'] == 'log':
#                 s_sql = "select t1.*  from \
#                             (select a.public_ip as source_ip,a.platform_id from assets a inner join main_category m \
#                             on a.main_category_id = m.id  \
#                             where a.is_del = 0 and (a.hostname like '%%%%web_balance%%%%' or a.hostname like '%%%%web_admin%%%%') ) t1 \
#                             left join platform t2 on t1.platform_id = t2.id \
#                             where t1.source_ip is not null group by source_ip"
#                             
#                 if param['game'] == 'kof':
#                     s_sql = "select t1.*  from \
#                             (select a.public_ip as source_ip,a.platform_id from assets a inner join main_category m \
#                             on a.main_category_id = m.id  \
#                             where a.is_del = 0 and a.hostname like '%%%%gameserver%%%%' ) t1 \
#                             left join platform t2 on t1.platform_id = t2.id \
#                             where t1.source_ip is not null group by source_ip" 
#                  
#                 ip_list = self.exec_sql(s_sql)
#   
#             if param['flag'] == 'snap':
#                 s_sql = "select s.prefix as source_ip from sub_category s inner join main_category m \
#                         on s.main_category_id = m.id \
#                         where s.platform = '%s' and m.prefix = '%s'" % (param['platform'],param['game'])
#                 ip_list = self.exec_sql(s_sql)
#             
#             return ip_list
#         except Exception as exc:
#             print exc
#             #异常回滚
#             self.center_mysql.rollback()
    
    
    def get_log_ip(self):
        '''
            获取所有log的IP
        '''
        
        ip_list = {}
        sql = "select t1.*,t2.prefix as platform  from \
                            (select m.prefix as gamename,a.public_ip as source_ip,a.platform_id from assets a inner join main_category m \
                            on a.main_category_id = m.id  \
                            where a.is_del = 0 and (a.hostname like '%%%%web_balance%%%%' or a.hostname like '%%%%web_admin%%%%') ) t1 \
                            left join platform t2 on t1.platform_id = t2.id \
                            where t1.source_ip is not null group by source_ip\
                union all \
                select t1.*,t2.prefix as platform from \
                            (select m.prefix as gamename,a.public_ip as source_ip,a.platform_id from assets a inner join main_category m \
                            on a.main_category_id = m.id  \
                            where a.is_del = 0 and a.hostname like '%%%%gameserver%%%%' ) t1 \
                            left join platform t2 on t1.platform_id = t2.id \
                            where t1.source_ip is not null group by source_ip"
        
        result = self.center_mysql.query(sql)
  
        for info in result:
            if info['gamename'] is None or info['platform'] is None:
                continue
            
            if info['gamename'].encode('utf8') not in ip_list:
                ip_list[info['gamename'].encode('utf8')] = {}
            
            if info['platform'].encode('utf8') not in ip_list[info['gamename'].encode('utf8')]:
                ip_list[info['gamename'].encode('utf8')][info['platform'].encode('utf8')] = []
                
            ip_list[info['gamename'].encode('utf8')][info['platform'].encode('utf8')].append(info['source_ip'].encode('utf8'))

        return ip_list
    
    
    def get_snap_ip(self):
        '''
            获取所有快照的IP
        '''
        ip_list = {}
        s_sql = "select f.prefix as platform,s.prefix as source_ip,m.prefix as gamename from sub_category s inner join main_category m \
                        on s.main_category_id = m.id \
                left join platform f on f.id = s.platform_id"
        
        result = self.center_mysql.query(s_sql)
        
        for info in result:
            if info['gamename'] is None or info['platform'] is None:
                continue
            
            if info['gamename'].encode('utf8') not in ip_list:
                ip_list[info['gamename'].encode('utf8')] = {}
            
            if info['platform'].encode('utf8') not in ip_list[info['gamename'].encode('utf8')]:
                ip_list[info['gamename'].encode('utf8')][info['platform'].encode('utf8')] = []
                
            ip_list[info['gamename'].encode('utf8')][info['platform'].encode('utf8')].append(info['source_ip'].encode('utf8'))
        
#         print ip_list
        return ip_list
    
        
    def get_ip_list(self):
        '''
            整理IP列表
        '''
        try:
            ip_list = {}
          
            ip_list['basic'] = ['120.26.1.250']
                
            ip_list['mostsdk'] = ['120.26.13.150']

            ip_list['wanpay'] = ['112.124.116.44']
            
            log_list = self.get_log_ip();
             
            ip_list['log'] = log_list

            snap_list = self.get_snap_ip()
            ip_list['snap'] = snap_list
            
            return ip_list
        except Exception,e:
            print e
            print "异常"
            #异常回滚
            self.center_mysql.rollback()
Пример #19
0
class CopyConfig():
    def __init__(self):
        self.mysql = Custom_MySQL(using='etl_manage')
        self.source_game = 'ares'

    def get_all_task(self, task_name):

        condition = 'game = "%s" ' % self.source_game
        if task_name is not None:
            condition += 'and task_name="%s"' % task_name
        ##appstoremix is_delete = 0 and
        task_list = self.mysql.query(
            "select * from dw2dm where  platform='all' and %s" % (condition))
        return task_list

    def get_structure(self, id, game, plat_form):
        '''
         获取当前游戏的,参数structure.如不存在则会添加
        :param id:
        :param game:
        :param plat_form:
        :return:
        '''
        structure = self.mysql.get(
            "select * from structure where is_delete=0 and id=%s", id)
        if structure != None:
            t_structure = [
                structure['type'],
                structure['flag'],
                structure['db_type'],
                game,
                plat_form,
                #structure['platform'],
                #'db_name':structure['db_name'],
                structure['table_name'],
                structure['column_name'],
                ##structure['partition_name'],
                ##structure['partition_rule'],
                ##structure['index_name'],
                structure['create_table_sql'],
                structure['user_id'],
                0,
                datetime.datetime.today().strftime("%Y-%m-%d")
            ]
            game_db = None
            if structure['type'] != None and str(
                    structure['type']).__eq__('dw'):
                game_db = '%s_dw' % game
                t_structure.append(game_db)
            elif structure['type'] != None and str(
                    structure['type']).__eq__('dm'):
                game_db = '%s_dm' % game
                t_structure.append(game_db)
            elif structure['type'] != None and str(
                    structure['type']).__eq__('report'):
                game_db = 'report_%s' % game
                t_structure.append(game_db)
            exis_row = self.mysql.query(
                "select id from structure where platform='%s' and is_delete=0 and db_name='%s' and platform='all' and table_name='%s' and db_type='%s'"
                % (plat_form, game_db, str(
                    structure['table_name']), str(structure['db_type'])))
            if len(exis_row) > 0:
                return int(exis_row[0]['id'])
            else:
                return self.save_newstructure(t_structure)

    def save_new_task(self, task):
        self.mysql.insert("dw2dm", **task)
        self.mysql.commit()

    def save_newstructure(self, structure):
        query = 'INSERT INTO structure(type,flag,db_type,game,platform,table_name,column_name,create_table_sql,user_id,is_delete,create_date,db_name) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
        rowNum = self.mysql.execute(query, *tuple(structure))
        self.mysql.commit()
        return rowNum

    def run(self, game, task_name=None, plat_form="all"):
        print "start copy"
        task_list = self.get_all_task(task_name)

        for task in task_list:
            form_ids = ""
            for form_id_str in task['from_id'].split(","):
                if len(str(form_ids)) > 0:
                    form_ids = form_ids + "," + str(
                        self.get_structure(int(form_id_str), game, plat_form))
                else:
                    form_ids = str(
                        self.get_structure(int(form_id_str), game, plat_form))
            target_id = self.get_structure(int(task['target_id']), game,
                                           plat_form)
            t_task = {
                'game':
                game,
                ##'platform':task['platform'],
                'platform':
                plat_form,
                'log_name':
                task['log_name'],
                'do_rate':
                task['do_rate'],
                'priority':
                task['priority'],
                'prefix_sql':
                task['prefix_sql'],
                'exec_sql':
                task['exec_sql'].replace("%s_dw" % self.source_game,
                                         "%s_dw" % game).replace(
                                             "%s_dm" % self.source_game,
                                             "%s_dm" % game),
                'post_sql':
                task['post_sql'],
                'from_id':
                form_ids,
                'target_id':
                target_id,
                'create_date':
                datetime.datetime.today().strftime("%Y-%m-%d"),
                'comment':
                task['comment'],
                'grouped':
                task['grouped'],
                'is_delete':
                task['is_delete'],
                'user_id':
                task['user_id']
            }
            self.save_new_task(t_task)

        self.mysql.close()
        print "over"

    def add_structure(self, game, plat_form):
        platforms_str = plat_form.split(",")
        structures = self.mysql.query(
            "select * from structure where platform='all' and is_delete=0 and flag='log' and game='ares' and type in ('report','dm')"
        )
        for structure in structures:
            for platform in platforms_str:
                t_structure = [
                    structure['type'],
                    structure['flag'],
                    structure['db_type'],
                    game,
                    platform,
                    #structure['platform'],
                    #'db_name':structure['db_name'],
                    structure['table_name'],
                    structure['column_name'],
                    ##structure['partition_name'],
                    ##structure['partition_rule'],
                    ##structure['index_name'],
                    structure['create_table_sql'],
                    structure['user_id'],
                    0,
                    datetime.datetime.today().strftime("%Y-%m-%d")
                ]
                game_db = None
                if structure['type'] != None and str(
                        structure['type']).__eq__('dw'):
                    game_db = '%s_dw' % game
                elif structure['type'] != None and str(
                        structure['type']).__eq__('dm'):
                    game_db = '%s_dm' % game
                elif structure['type'] != None and str(
                        structure['type']).__eq__('report'):
                    game_db = 'report_%s' % game
                t_structure.append(game_db)
                self.save_newstructure(t_structure)
Пример #20
0
#coding=utf-8
"""
auth: suguoxin
mail: [email protected]
create_time: 2016-03-11 15:20:00
used: dm2report_new 任务执行

last_update:2016-04-28 14:48:00
"""

import datetime
import sys
from dm2report_new.tasks import run_task
from custom.db.mysql import Custom_MySQL

mysql = Custom_MySQL(using='etl_manage')

game = sys.argv[1]
platform = sys.argv[2]
do_rate = sys.argv[3]
now_time = sys.argv[4]
now_time = datetime.datetime.strptime(now_time, '%Y-%m-%d %H:%M:%S')

#用于判断,是否需要校验对于上层任务(dw2dm任务)的依赖,onrely: 需要,offrely: 不需要
is_rely = ""
try:
    is_rely = sys.argv[5]
except Exception as exc:
    is_rely = "onrely"

#数据日期,格式如:20151015
Пример #21
0
def run_task(self, task_param):



    mysql = Custom_MySQL(using='etl_manage')
    mysql.begin()

    try:

        '''
        业务代码块放下方
        '''
        dir_param ={'game':task_param['game'],
                    'platform':task_param['platform'],
                    'log_date':task_param['log_date'],
                    'log_name':task_param['log_name']}

        filename_dict = {'log_name':task_param['log_name'],'log_time':task_param['log_time']}
        
        '''
        游戏\平台\日期\业务日志名\日志或者md5文件
        '''
        log_dir =  "/%(game)s/%(platform)s/%(log_date)s/%(log_name)s/" % dir_param

        lzo_file_name = "%(log_name)s_%(log_time)s.txt"% filename_dict

        local_log_dir = '/tmp'+log_dir


        dump_sql = task_param['dump_sql']
        dump_sql = dump_sql.replace('{table_name}',task_param['table_name'])
        dump_sql = dump_sql.replace('{partition_name}',task_param['partition_name'])
        dump_sql = dump_sql.replace('{db_name}',task_param['db_name'])
        print(dump_sql)

        result = mysql.dump(sql,local_log_dir+lzo_file_name)
        #print(result)



        '''
        将任务标识为加载文件完成:2
        '''
        datas = {'load_status':2}
        where = {}
        where['id'] = int(task_param['id'])

        mysql.update('etl_data_log',
                          ' id = %(id)d' % where,
                            **datas)
        mysql.commit()
        mysql.close()
        return True

    except Exception as exc:
        print (exc)
        mysql.rollback()
        raise self.retry(exc=exc, countdown=60)
Пример #22
0
def run_task(self, task_param):

    mysql = Custom_MySQL(using='hadoops2')
    mysql_etl = Custom_MySQL(using='etl_manage')
    mysql.begin()
    mysql_etl.begin()
    where = {'id': int(task_param['id'])}
    try:
        hive = Custom_Hive(using='ares_dw')

        game = task_param['game']
        platform = task_param['platform']
        table_name = task_param['table_name']
        log_date = task_param['log_date']
        prefix_sql = task_param['prefix_sql']
        exec_sql = task_param['exec_sql']
        post_sql = task_param['post_sql']
        date_cycle = task_param['date_cycle']
        random_str = str(random.randint(0, 999999999))
        stimes = str(int(time.time()))

        tmp_file_dir = "/tmp/tmp/%s/%s/%s" % (game, platform, log_date)
        #创建本地目录
        if not os.path.exists(tmp_file_dir):
            os.makedirs(tmp_file_dir)

        tmp_file = "%s/%s_%s_%s_%s.txt" % (tmp_file_dir, table_name, date_cycle, random_str, stimes)
        hql_conf = "SET hive.support.concurrency=false;SET hive.exec.compress.output=true;" \
                   "SET mapred.output.compression.codec=com.hadoop.compression.lzo.LzopCodec; "
        '''
        将任务标示为开始执行:1
        '''
        datas = {'status': 1, 'start_time': str(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))}
        mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas)
        mysql_etl.commit()

        #执行前置sql,将数据临时写入本地,用于mysql load数据
        if prefix_sql is not None:
            result = mysql.delete_by_sql(prefix_sql)
            logger.info('exec prefix_sql: delete old data {0}'.format(result['output']))

            if result['status'] == 0:

                '''
                将任务标示为删除临时文件完成:2
                '''
                datas = {'status': 2}
                mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas)
                mysql_etl.commit()

                '''
                开始执行hive ql,将数据dump到本地
                '''
                result = hive.dump(hql_conf+exec_sql, tmp_file)
                logger.info('exec exec_sql: dump data {0}'.format(result['output']))

                if result['status'] == 0 and True == os.path.exists('%s' % tmp_file):

                    '''
                    将任务标示为dump hive数据完成:3
                    '''
                    datas = {'status': 3, 'tmp_file_name': tmp_file}
                    mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas)
                    mysql_etl.commit()

                    #执行后置sql
                    if post_sql is not None:
                        post_sql = post_sql.replace('{dir_path}', tmp_file)
                        post_sql = post_sql.replace('{table_name}', task_param['table_name'])
                        post_sql = post_sql.replace('{db_name}', task_param['db_name'])

                        result = mysql.load(post_sql)
                        logger.info('exec post_sql: load data to hdfs {0}'.format(result['output']))

                        if result['status'] == 0:
                            '''
                            将任务标识为录入mysql完成:4
                            '''
                            datas = {'status': 4, 'end_time': str(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))}
                            mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas)
                        else:
                            logger.error('Error Code %s : %s  Cmd: %s' % (result['status'], result['output'], post_sql))
                else:
                    logger.error('Error Code %s : %s  Cmd: %s' % (result['status'], result['output'], exec_sql))
            else:
                logger.error('Error Code %s : %s  Cmd: %s' % (result['status'], result['output'], prefix_sql))
                '''
                执行失败,将其状态标为未执行:0
                '''
                datas = {'status': 0}
                mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas)

        '''
        将任务标示为:(模拟) 已从任务队列中移除
        '''
        datas = {'in_queue': 0}
        update_result = mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas)
        # 如果数据库更新失败,再调用一次。 如果还是失败,等待自动修复机制,但这样的概率应该很小了。
        if update_result != 1:
            mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas)

        mysql_etl.commit()
        mysql.commit()
        mysql_etl.close()
        mysql.close()

        return True

    except Exception as exc:
        logger.error('dm2report error: %s' % exc)
        mysql_etl.rollback()
        mysql.rollback()

        datas = {'in_queue': 0, 'status': 0}
        mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas)
        mysql_etl.commit()

        mysql_etl.close()
        mysql.close()
        raise self.retry(exc=exc, countdown=60)
Пример #23
0
def run_task(self, task_param):

    redis = Custom_Redis(using='etl_manage')
    mysql = Custom_MySQL(using='etl_manage')
    mysql.begin()
    datas = {'download_status': 0}
    where = {'id': int(task_param['id'])}

    local_log_dir = ""
    lzo_file_name = ""

    try:
        dir_param = {
            'game': task_param['game'],
            'platform': task_param['platform'],
            'log_date': task_param['log_date'],
            'log_name': task_param['log_name']
        }
        filename_dict = {
            'log_name': task_param['log_name'],
            'log_time': task_param['log_time'],
            'source_ip': task_param['source_ip']
        }
        log_dir = "/%(game)s/%(platform)s/%(log_date)s/%(log_name)s/" % dir_param

        txt_file_name = "%(log_name)s_%(source_ip)s_%(log_time)s.txt" % filename_dict
        lzo_file_name = "%(log_name)s_%(source_ip)s_%(log_time)s.txt.lzo" % filename_dict
        md5_file_name = "%(log_name)s_%(source_ip)s_%(log_time)s.txt.lzo.md5" % filename_dict

        lzo_download_url = task_param['download_url'].rstrip(
            '/') + log_dir + lzo_file_name
        md5_download_url = task_param['download_url'].rstrip(
            '/') + log_dir + md5_file_name

        #从redis中,获取当前数据对应存储到哪块磁盘
        if redis.get("disk_xml") is None:
            disk_tmp = open('/data/etl_manage/conf/disk_game.xml', 'r')
            redis.set("disk_xml", str(disk_tmp.read()))

        disk_list = str(redis.get("disk_xml"))
        root = ET.fromstring(disk_list)
        disk = ""
        for gameinfo in root.findall('game'):
            if gameinfo.get('name') == task_param['game']:
                disk = gameinfo.get('disk')
                continue

        #local_log_dir = '/disk1/tmp_data' + log_dir
        local_log_dir = '/' + disk + '/data' + log_dir
        #创建本地目录
        if not os.path.exists(local_log_dir):
            os.makedirs(local_log_dir)

        #排除同名文件存在的可能,同时为修复执行提供方便
        if os.path.exists('%s%s' % (local_log_dir, txt_file_name)):
            cmd_remove = 'rm -f %s%s*' % (local_log_dir, txt_file_name)
            logger.info('remove history file: {0}'.format(cmd_remove))
            remove_result = cmd.run(cmd_remove)
            if remove_result['status'] != 0:
                logger.error('Error Code %s : %s  Cmd: %s' %
                             (remove_result['status'], remove_result['output'],
                              cmd_remove))
        '''
        下载md5文件,如果md5文件不存在则退出,不再继续执行程序,同时不向数据库写入任何标示
        '''
        md5_line = ' wget -o /tmp/log/wget_log -O %s%s %s' % (
            local_log_dir, md5_file_name, md5_download_url)
        logger.info('md5 info: {0}'.format(md5_line))
        md5_result = cmd.run(md5_line)

        if md5_result['status'] != 0:
            logger.error(
                'Error Code %s : %s  Cmd: %s' %
                (md5_result['status'], md5_result['output'], md5_line))

        else:
            '''
            将任务标识为开始执行:1
            '''
            datas['download_status'] = 1
            mysql.update('etl_data_log', ' id = %(id)d' % where, **datas)
            mysql.commit()
            '''
            下载数据文件
            '''
            lzo_line = ' wget -o /tmp/log/wget_log -O %s%s %s' % (
                local_log_dir, lzo_file_name, lzo_download_url)
            logger.info('file info: {0}'.format(lzo_line))
            lzo_result = cmd.run(lzo_line)

            if lzo_result['status'] != 0:
                logger.error(
                    'Error Code %s : %s  Cmd: %s' %
                    (lzo_result['status'], lzo_result['output'], lzo_line))
                datas['download_status'] = 0
            else:
                '''
                将任务标识为下载完成:2
                '''
                datas['download_status'] = 2
                mysql.update('etl_data_log', ' id = %(id)d' % where, **datas)
                mysql.commit()
                '''
                md5校验,如果未通过则不再继续执行程序
                '''
                check_line = "cat %s%s |grep `md5sum %s%s|cut -d ' ' -f 1`" % (
                    local_log_dir, md5_file_name, local_log_dir, lzo_file_name)
                logger.info('md5 or md5 info: {0}'.format(check_line))
                check_result = cmd.run(check_line)

                if check_result['status'] != 0:
                    logger.error('Error Code %s : %s Cmd: %s' %
                                 (check_result['status'],
                                  check_result['output'], check_line))
                    datas['download_status'] = 0
                else:
                    '''
                    lzop解压缩
                    '''
                    cmd_line = ' lzop -dP %s%s' % (local_log_dir,
                                                   lzo_file_name)
                    logger.info('file info: {0}'.format(cmd_line))
                    cmd_result = cmd.run(cmd_line)
                    if cmd_result['status'] != 0:
                        logger.error('Lzop Code %s : %s Cmd: %s' %
                                     (cmd_result['status'],
                                      cmd_result['output'], cmd_line))
                        datas['download_status'] = 0
                    else:
                        '''
                        将任务标识md5一致,完成下载任务:3
                        '''
                        datas['download_status'] = 3
        '''
        将任务标示为:(模拟) 已从任务队列中移除
        '''
        datas['in_download_queue'] = 0
        update_result = mysql.update('etl_data_log', ' id = %(id)d' % where,
                                     **datas)
        # 如果更新失败,则再调用一次,如果还是失败,则等待自动修复机制,但这种概率很小了
        if update_result != 1:
            mysql.update('etl_data_log', ' id = %(id)d' % where, **datas)

        mysql.commit()
        mysql.close()
        return True

    except Exception as exc:
        print(exc)
        logger.error('download error : %s' % exc)
        mysql.rollback()

        kill_proces = "kill -9 `ps -ef |grep wget |grep -v grep |grep '%s%s'|awk '{print $2}'`" % (
            local_log_dir, lzo_file_name)
        cmd.run(kill_proces)

        datas = {'in_download_queue': 0, 'download_status': 0}
        mysql.update('etl_data_log', ' id = %(id)d' % where, **datas)
        mysql.commit()

        mysql.close()
        raise self.retry(exc=exc, countdown=60)
Пример #24
0
 def __init__(self):
     #self.mysql= {'report_ares': Custom_MySQL(using='report_ares'),'report_hebe': Custom_MySQL(using='report_hebe'),'report_crius': Custom_MySQL(using='report_crius')}
      self.mysql=Custom_MySQL(using='hadoops2') 
Пример #25
0
 def insert_db(self,main_category_id,sub_category_id,platform,app):
     
                       
     temp_app ={}
     temp_app['name']=app['type']+'['+app['memo']+']'
     temp_app['platform'] = platform
     temp_app['type']=app['type']
     temp_app['port']=app['port']
     temp_app['main_category_id'] = main_category_id
     temp_app['sub_category_id'] = sub_category_id
     if app.get('db_type',False):
         temp_app['db_type'] = app['db_type']
     
     sql ='select count(*) as count from app_info where '
     #同一游戏同一区组 
     where ='type="%s" and port="%s"  and main_category_id="%s" \
     and sub_category_id="%s"'% (app['type'],app['port'],main_category_id,sub_category_id) 
      
     db = Custom_MySQL(using='center_app')
     
     #处理内网
     if app['ip'].split('.')[0] in ['10','172']:
         inner_ip ='and inner_ip="%s"'%(app['ip'])
      
         count = db.count(sql+where+inner_ip)
         
         if count==None:
             print 'SQL Error:%s'% sql+where+inner_ip 
             return False
         count = count['count']
        
         try:
             temp_app['public_ip'] = db.get('select public_ip from assets where inner_ip="%s"'% app['ip'])['public_ip']
         except:
             pass
         if count==0:
              temp_app['inner_ip'] = app['ip']
              db.insert('app_info',**temp_app)
         else:
             db.update('app_info',where+inner_ip,**temp_app)
             
     else:
         import re
         if app['type']=='web':
             app['ip'] = app['ip'].replace('http://','').split('/')[0]
             p=r'(?<![\.\d])(?:\d{1,3}\.){3}\d{1,3}(?![\.\d])'
             mo = re.search(p ,app['ip'])
             if not mo:
                 domain = app['ip'].replace('http://','').split('/')[0]
                 app['ip'] = db.get('select ip from domain where domain="%s"'% domain)['ip']
                 temp_app['domain'] = domain
         public_ip ='and (public_ip="%s") '%(app['ip'])
      
         count = db.count(sql+where+public_ip)
         if count==None:
             print 'SQL Error:%s'% sql+where+public_ip 
             return False
         count = count['count']
         
         if count==0:
              temp_app['public_ip'] = app['ip']
              db.insert('app_info',**temp_app)
         else:
             db.update('app_info',where+public_ip,**temp_app)
Пример #26
0
#coding=utf-8
"""
auth: wuqichao、suguoxin
mail: [email protected][email protected]
create_time: 2015-9-17 10:00:00
used: 执行下载任务

last_update: 2016-04-28 15:15:00
"""

import datetime
import sys
from download.tasks import run_task
from custom.db.mysql import Custom_MySQL

mysql = Custom_MySQL(using='etl_manage')

#执行频次,通过命令行参数获得
game = sys.argv[1]
platform = sys.argv[2]
do_rate = sys.argv[3]
now_time = sys.argv[4]
now_time = datetime.datetime.strptime(now_time, '%Y-%m-%d %H:%M:%S')

#数据日期,格式如:20151015
log_date = now_time.strftime('%Y%m%d')
#数据时间点(每五分钟),格式如:0005、2400
log_time = now_time.strftime("%H%M")

sql = 'select * from etl_data_log ' \
      'where game="%s" and platform="%s" and do_rate="%s" and etl_status=6 and download_status=0 and task_date="%s" ' \
Пример #27
0
def run_task(self, task_param):

    mysql = Custom_MySQL(using='hadoops2')
    mysql_etl = Custom_MySQL(using='etl_manage')
    redis = Custom_Redis(using='etl_task')

    mysql.begin()
    mysql_etl.begin()

    datas = {'status': 0}
    where = {'id': int(task_param['id'])}

    task_key_exc = ""

    try:
        hive = Custom_Hive(using='ares_dw')

        game = task_param['game']
        platform = task_param['platform']
        table_name = task_param['table_name']
        log_date = task_param['log_date']
        prefix_sql = task_param['prefix_sql']
        exec_sql = task_param['exec_sql']
        post_sql = task_param['post_sql']
        date_cycle = task_param['date_cycle']
        random_str = str(random.randint(0, 999999999))
        stimes = str(int(time.time()))

        task_date = task_param['task_date']
        task_name = task_param['task_name']
        do_rate = task_param['do_rate']

        #task_key_tmp = str(game)+str(platform)+str(task_name)+str(date_cycle)+str(do_rate)+str(log_date)+str(task_date)
        task_key_tmp = str(task_name)+str(date_cycle)+str(do_rate)+str(log_date)+str(task_date)

        task_key_md5 = hashlib.md5()
        task_key_md5.update(task_key_tmp)
        task_key_md5_result = task_key_md5.hexdigest()

        task_key = "dm2report_new_"+str(game)+"_"+str(platform)+"_"+str(task_key_md5_result)
        task_key_exc = task_key

        '''
        celery 本身的机制存在漏洞,会将一个已经完成任务再次分配给其他的worker,致使同一个任务执行多次
        为防止此种现象,在任务开始执行时,将任务的“唯一标示”写入redis中,标注已执行
        '''
        #如果task_key is None, 则表示该条任务没有执行过,正常执行即可
        #如果task_key = 0, 则表示该条任务上次执行失败,允许重复执行
        if redis.get(task_key) == "0" or redis.get(task_key) is None:

            tmp_file_dir = "/tmp/tmp/%s/%s/%s" % (game, platform, log_date)
            #创建本地目录
            if not os.path.exists(tmp_file_dir):
                os.makedirs(tmp_file_dir)

            tmp_file = "%s/%s_%s_%s_%s.txt" % (tmp_file_dir, table_name, date_cycle, random_str, stimes)
            hql_conf = "SET hive.support.concurrency=false;" \
                       "SET hive.exec.compress.output=true;" \
                       "SET mapred.output.compression.codec=com.hadoop.compression.lzo.LzopCodec; "
            '''
            将任务标示为开始执行:1
            '''
            datas['status'] = 1
            datas['start_time'] = str(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
            mysql_etl.update('dm2report_new_log', ' id = %(id)d' % where, **datas)
            mysql_etl.commit()
            #在redis标注 任务正在执行
            redis.set(task_key, 1)

            #执行前置sql,将数据临时写入本地,用于mysql load数据
            if prefix_sql is not None:
                result = mysql.delete_by_sql(prefix_sql)
                logger.info('exec prefix_sql: delete old data {0}'.format(result['output']))

                if result['status'] != 0:
                    logger.error('Error Code %s : %s  Cmd: %s' % (result['status'], result['output'], prefix_sql))
                    '''
                    执行失败,将其状态标为未执行:0
                    '''
                    datas['status'] = 0
                    #在redis标注 任务未开始执行
                    redis.set(task_key, 0)
                else:
                    '''
                    将任务标示为删除临时文件完成:2
                    '''
                    datas['status'] = 2
                    datas.pop('start_time')
                    mysql_etl.update('dm2report_new_log', ' id = %(id)d' % where, **datas)
                    mysql_etl.commit()

                    '''
                    开始执行hive ql,将数据dump到本地
                    '''
                    result = hive.dump(hql_conf+exec_sql, tmp_file)
                    logger.info('exec exec_sql: dump data {0}'.format(result['output']))

                    if result['status'] != 0 or False == os.path.exists('%s' % tmp_file):
                        logger.error('Error Code %s : %s  Cmd: %s' % (result['status'], result['output'], exec_sql))
                        #在redis标注 任务未开始执行
                        redis.set(task_key, 0)
                    else:

                        '''
                        将任务标示为dump hive数据完成:3
                        '''
                        datas['status'] = 3
                        datas['tmp_file_name'] = tmp_file
                        mysql_etl.update('dm2report_new_log', ' id = %(id)d' % where, **datas)
                        mysql_etl.commit()

                        #执行后置sql
                        if post_sql is not None:
                            post_sql = post_sql.replace('{dir_path}', tmp_file)
                            post_sql = post_sql.replace('{table_name}', task_param['table_name'])
                            post_sql = post_sql.replace('{db_name}', task_param['db_name'])

                            result = mysql.load(post_sql)
                            logger.info('exec post_sql: load data to hdfs {0}'.format(result['output']))

                            if result['status'] != 0:
                                logger.error('Error Code %s : %s  Cmd: %s' % (result['status'], result['output'], post_sql))
                                #在redis标注 任务未开始执行
                                redis.set(task_key, 0)
                            else:
                                '''
                                将任务标识为录入mysql完成:4
                                '''
                                datas['status'] = 4
                                datas['end_time'] = str(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
                                datas.pop('tmp_file_name')
                                #在redis标注 任务已成功执行
                                redis.set(task_key, 2)
            else:
                logger.error('prefix_sql is null')
                datas['status'] = 0
                #在redis标注 任务未开始执行
                redis.set(task_key, 0)

        #如果task_key=2, 则标示该条任务已经运行成功
        elif redis.get(task_key) == "2":
            datas['status'] = 4
        #该条任务正在运行中
        else:
            return True

        '''
        将任务标示为:(模拟) 已从任务队列中移除
        '''
        datas['in_queue'] = 0
        update_result = mysql_etl.update('dm2report_new_log', ' id = %(id)d' % where, **datas)
        # 如果数据库更新失败,再调用一次。 如果还是失败,等待自动修复机制,但这样的概率应该很小了。
        if update_result != 1:
            mysql_etl.update('dm2report_new_log', ' id = %(id)d' % where, **datas)

        mysql_etl.commit()
        mysql.commit()
        mysql_etl.close()
        mysql.close()

        return True

    except Exception as exc:
        logger.error('dm2report error: %s' % exc)
        mysql_etl.rollback()
        mysql.rollback()
        redis.set(task_key_exc, 0)

        datas = {'in_queue': 0, 'status': 0}
        mysql_etl.update('dm2report_new_log', ' id = %(id)d' % where, **datas)
        mysql_etl.commit()

        mysql_etl.close()
        mysql.close()
        raise self.retry(exc=exc, countdown=60)
last_update: 2016-05-04 11:10:20
"""

import datetime
import sys
from etl_data.tasks import run_task
from custom.db.mysql import Custom_MySQL
from custom.db.redis_tools import Custom_Redis

try:
    import xml.etree.cElementTree as ET
except ImportError:
    import xml.etree.ElementTree as ET

mysql = Custom_MySQL(using='etl_manage')
redis = Custom_Redis(using='etl_manage')

game = sys.argv[1]
platform = sys.argv[2]
do_rate = sys.argv[3]
now_time = sys.argv[4]
now_time = datetime.datetime.strptime(now_time, '%Y-%m-%d %H:%M:%S')

#数据日期,格式如:20151015
log_date = now_time.strftime('%Y%m%d')
#数据时间点(每五分钟),格式如:0005、2400
log_time = now_time.strftime("%H%M")

#当前机器的外网ip,根据不同机器修改即可
machine = "120.26.1.224"
Пример #29
0
 def __init__(self):
     self.mysql = Custom_MySQL(using='etl_manage')
     self.source_game = 'ares'
Пример #30
0
"""
auth: suguoxin
mail: [email protected]
create_time: 2016-03-08 16:00:00
used: 启动dw2dm任务

last_update: 2016-04-28 15:00:00
"""

import datetime
import sys
from dw2dm.tasks import run_task
from custom.db.mysql import Custom_MySQL
from custom.command import Custom_Command as cmd

mysql = Custom_MySQL(using='etl_manage')

game = sys.argv[1]
platform = sys.argv[2]
do_rate = sys.argv[3]
now_time = sys.argv[4]
now_time = datetime.datetime.strptime(now_time, '%Y-%m-%d %H:%M:%S')

#用于判断,是否需要校验对于上层任务(mergefile2dw任务)的依赖,onrely: 需要,offrely: 不需要
is_rely = ""
try:
    is_rely = sys.argv[5]
except Exception as exc:
    is_rely = "onrely"

#数据日期,格式如:20151015
Пример #31
0
 def __init__(self):
     self.center_mysql = Custom_MySQL(using='center_app')
#coding=utf-8
"""
auth: suguoxin
mail: [email protected]
create_time: 2016-06-23 17:03:00
used: 执行下载任务,为处理服务器过多的有游戏所建立

last_update: 2016-06-23 17:03:00
"""

import datetime
import sys
from download_new.tasks import run_task
from custom.db.mysql import Custom_MySQL

mysql = Custom_MySQL(using='etl_manage')

#执行频次,通过命令行参数获得
game = sys.argv[1]
platform = sys.argv[2]
do_rate = sys.argv[3]
now_time = sys.argv[4]
now_time = datetime.datetime.strptime(now_time, '%Y-%m-%d %H:%M:%S')

#数据日期,格式如:20151015
log_date = now_time.strftime('%Y%m%d')
#数据时间点(每五分钟),格式如:0005、2400
log_time = now_time.strftime("%H%M")


def task_5min():
Пример #33
0
#!/usr/bin/python
#coding=utf-8

"""
auth:suguoxin
mail:[email protected]
createtime:2016-01-12 00:30:00
usege: 用于自动重置执行失败的任务

"""

import datetime
from custom.db.mysql import Custom_MySQL

mysql = Custom_MySQL(using='etl_manage')

#当前时间
now_time = datetime.datetime.now()
task_date = now_time.strftime('%Y%m%d')
log_time = now_time.strftime('%H%M')

if log_time < '0200':
    task_date = (now_time-datetime.timedelta(days=1)).strftime('%Y%m%d')

log_time_before_30min = (now_time-datetime.timedelta(seconds=1800)).strftime('%H%M')
log_time_before_1hour = (now_time-datetime.timedelta(seconds=3600)).strftime('%H%M')

now = now_time.strftime('%Y-%m-%d %H:%M:%S')

try:
    mysql.begin()
Пример #34
0
def run_task(self, task_param):

    mysql = Custom_MySQL(using='etl_manage')
    hive = Custom_Hive(using='ares_dw')
    redis = Custom_Redis(using='etl_manage')
    mysql.begin()
    where = {'id': int(task_param['id'])}

    try:
        log_date = datetime.datetime.strptime(task_param['log_date'], '%Y%m%d').strftime("%Y-%m-%d")
        log_date_1 = (datetime.datetime.strptime(task_param['log_date'], '%Y%m%d')-datetime.timedelta(days=1)).strftime("%Y-%m-%d")

        dir_param = {'game': task_param['game'], 'platform': task_param['platform'],
                     'log_date': task_param['log_date'], 'log_name': task_param['log_name']}
        filename_dict = {'log_name': task_param['log_name'], 'log_time': task_param['log_time']}
        index_dict = {'db_name': task_param['db_name'], 'table_name': task_param['table_name'],
                      'platform': task_param['platform'], 'log_date': log_date}
        partition = {'platform': task_param['platform'], 'log_date': log_date}

        index_dict_1 = {'db_name': task_param['db_name'], 'table_name': task_param['table_name'],
                        'platform': task_param['platform'], 'log_date': log_date_1}
        partition_1 = {'platform': task_param['platform'], 'log_date': log_date_1}

        log_dir = "/%(game)s/%(platform)s/%(log_date)s/%(log_name)s" % dir_param

        flag = task_param['flag']

        file_name = "%(log_name)s" % filename_dict
        file_name_txt = "%(log_name)s_%(log_time)s_result.txt" % filename_dict
        file_name_lzo = "%(log_name)s_%(log_time)s_result.txt.lzo" % filename_dict
        file_name_row = "%(log_name)s_%(log_time)s_result.txt.row" % filename_dict
        index_dir_name = "%(db_name)s.db/%(table_name)s/plat_form=%(platform)s/log_date=%(log_date)s/" % index_dict
        partition_name = "plat_form='%(platform)s',log_date='%(log_date)s'" % partition

        project_path = os.getcwd()

        log_time = task_param['log_time']
        do_rate = task_param['do_rate']

        #if flag == "snap" or (do_rate == "1day" and flag == "log")
        if flag == "snap":
            index_dir_name = "%(db_name)s.db/%(table_name)s/plat_form=%(platform)s/log_date=%(log_date)s/" % index_dict_1
            partition_name = "plat_form='%(platform)s',log_date='%(log_date)s'" % partition_1

        #从redis中,获取当前数据对应存储到哪块磁盘
        if redis.get("disk_xml") is None:
            disk_tmp = open('/data/etl_manage/conf/disk_game.xml', 'r')
            redis.set("disk_xml", str(disk_tmp.read()))

        disk_list = str(redis.get("disk_xml"))
        root = ET.fromstring(disk_list)
        disk = ""
        for gameinfo in root.findall('game'):
            if gameinfo.get('name') == task_param['game']:
                disk = gameinfo.get('disk')
                continue

        #local_log_dir = '/disk1/tmp_data'+log_dir
        local_log_dir = '/'+disk+'/data'+log_dir
        logger.info('local_log_dir: {0}'.format(local_log_dir))

        #判断目录是否存在
        if os.path.exists('%s' % local_log_dir):

            #排除同名文件存在的可能,同时为修复执行提供方便
            if os.path.exists('%s/%s' % (local_log_dir, file_name_txt)):
                cmd_remove = 'rm -f %s/%s*' % (local_log_dir, file_name_txt)
                logger.info('remove history file: {0}'.format(cmd_remove))
                remove_result = cmd.run(cmd_remove)
                if remove_result['status'] != 0:
                    logger.error('Error Code %s : %s  Cmd: %s' % (remove_result['status'], remove_result['output'], cmd_remove))

            datas = {'load_status': 1}
            mysql.update('file2dw_log', ' id = %(id)d' % where, **datas)
            mysql.commit()

            cmd_merge = '/bin/bash %s/mergefile2dw/merge_data.sh %s %s %s %s %s' \
                        '' % (project_path, local_log_dir, local_log_dir, file_name, do_rate, log_time)
            logger.info('merge data: {0}'.format(cmd_merge))
            merge_result = cmd.run(cmd_merge)
            logger.info('merge data result {0}'.format(merge_result['output']))

            if merge_result['status'] == 0:

                #读取总条数
                row = open('%s/%s' % (local_log_dir, file_name_row)).read()

                '''
                合并数据完成:2
                '''
                datas = {'load_status': 2, 'row_num': int(row)}
                mysql.update('file2dw_log', ' id = %(id)d' % where, **datas)
                mysql.commit()

                '''
                压缩数据
                '''
                cmd_compress = '/bin/bash %s/mergefile2dw/compress_data.sh %s %s' % (project_path, local_log_dir, file_name_txt)
                logger.info('compress data: {0}'.format(cmd_compress))
                compress_result = cmd.run(cmd_compress)
                if compress_result['status'] != 0:
                    logger.error('Error Code %s : %s Cmd: %s' % (compress_result['status'], compress_result['output'], cmd_compress))
                else:
                    '''
                    压缩数据完成:3
                    '''
                    datas = {'load_status': 3}
                    mysql.update('file2dw_log', ' id = %(id)d' % where, **datas)
                    mysql.commit()

                    #执行load之前,删除同名文件,防止同一个文件出现两次的可能
                    cmd_remove = '/bin/bash %s/mergefile2dw/remove_damaged_file.sh %s %s' % (project_path, index_dir_name, file_name_lzo)
                    logger.info('remove damaged files: {0}'.format(cmd_remove))
                    remove_result = cmd.run(cmd_remove)

                    if remove_result['status'] != 0:
                        logger.error('Error Code %s : %s  Cmd: %s' % (remove_result['status'], remove_result['output'], cmd_remove))

                    '''
                    文件加载到hive中
                    '''
                    load_sql = task_param['load_sql']
                    load_sql = load_sql.replace('{dir_path}', local_log_dir+"/"+file_name_lzo)
                    load_sql = load_sql.replace('{table_name}', task_param['table_name'])
                    load_sql = load_sql.replace('{partition_name}', '%s' % partition_name)
                    load_sql = load_sql.replace('{db_name}', task_param['db_name'])

                    logger.info('hive load SQL: {0}'.format(load_sql))
                    result = hive.load(load_sql)
                    logger.info('hive load result {0}'.format(result['output']))

                    if result['status'] == 0:
                        '''
                        将任务标识为加载文件完成:4
                        '''
                        datas = {'load_status': 4}
                        mysql.update('file2dw_log', ' id = %(id)d' % where, **datas)
                        mysql.commit()

                        '''
                        建立索引,否则lzo将不支持split
                        '''
                        #print index_dir_name
                        cmd_index = '/bin/bash %s/mergefile2dw/create_lzo_indexer.sh %s %s' % (project_path, index_dir_name, file_name_lzo)
                        logger.info('create lzo index: {0}'.format(cmd_index))
                        index_result = cmd.run(cmd_index)

                        if index_result['status'] != 0:
                            logger.error('Error Code %s : %s  Cmd: %s' % (index_result['status'], index_result['output'], cmd_index))
                        else:
                            if "create index success" in index_result['output']:
                                '''
                                将任务标识为建立lzo索引完成:5
                                '''
                                datas = {'load_status': 5}
                                mysql.update('file2dw_log', ' id = %(id)d' % where, **datas)
                            else:
                                '''
                                如果load数据失败,则删除半途出现错误的文件,防止hive查询的时候报错
                                '''
                                cmd_remove = '/bin/bash %s/mergefile2dw/remove_damaged_file.sh %s %s' % (project_path, index_dir_name, file_name_lzo)
                                logger.info('remove damaged files: {0}'.format(cmd_remove))
                                remove_result = cmd.run(cmd_remove)

                                if remove_result['status'] != 0:
                                    logger.error('Error Code %s : %s Cmd: %s' % (remove_result['status'], remove_result['output'], cmd_remove))
                    else:
                        logger.error('Error Code %s : %s Cmd: %s' % (result['status'], result['output'], load_sql))
            else:
                '''
                合并数据失败
                '''
                datas = {'load_status': 0}
                mysql.update('file2dw_log', ' id = %(id)d' % where, **datas)
                logger.error('Error Code %s : %s Cmd: %s' % (merge_result['status'], merge_result['output'], merge_result))

        '''
        执行完毕,模拟从队列中清楚任务:0
        '''
        datas = {'in_queue': 0}
        update_result = mysql.update('file2dw_log', ' id = %(id)d' % where, **datas)
        # 如果更新失败,则再调用一次,如果还是失败,则等待自动修复机制,但这种概率很小了
        if update_result != 1:
            mysql.update('file2dw_log', ' id = %(id)d' % where, **datas)

        mysql.commit()
        mysql.close()
        return True

    except Exception as exc:
        logger.error('mergefile2dw error: %s' % exc)
        mysql.rollback()

        datas = {'in_queue': 0, 'load_status': 0}
        mysql.update('file2dw_log', ' id = %(id)d' % where, **datas)
        mysql.commit()

        mysql.close()
        raise self.retry(exc=exc, countdown=30)
Пример #35
0
class CenterApp():
    def __init__(self):
        self.center_mysql = Custom_MySQL(using='center_app')

#     def get_server_list(self):
#         '''
#             获取执行任务
#         '''
#         try:
#             ip_list = []
#             if param['flag'] == 'basic':
#                 ip_list.append('120.26.1.250')
#
#             if param['flag'] == 'most':
#                 ip_list.append('120.26.13.150')
#
#             if param['flag'] == 'log':
#                 s_sql = "select t1.*  from \
#                             (select a.public_ip as source_ip,a.platform_id from assets a inner join main_category m \
#                             on a.main_category_id = m.id  \
#                             where a.is_del = 0 and (a.hostname like '%%%%web_balance%%%%' or a.hostname like '%%%%web_admin%%%%') ) t1 \
#                             left join platform t2 on t1.platform_id = t2.id \
#                             where t1.source_ip is not null group by source_ip"
#
#                 if param['game'] == 'kof':
#                     s_sql = "select t1.*  from \
#                             (select a.public_ip as source_ip,a.platform_id from assets a inner join main_category m \
#                             on a.main_category_id = m.id  \
#                             where a.is_del = 0 and a.hostname like '%%%%gameserver%%%%' ) t1 \
#                             left join platform t2 on t1.platform_id = t2.id \
#                             where t1.source_ip is not null group by source_ip"
#
#                 ip_list = self.exec_sql(s_sql)
#
#             if param['flag'] == 'snap':
#                 s_sql = "select s.prefix as source_ip from sub_category s inner join main_category m \
#                         on s.main_category_id = m.id \
#                         where s.platform = '%s' and m.prefix = '%s'" % (param['platform'],param['game'])
#                 ip_list = self.exec_sql(s_sql)
#
#             return ip_list
#         except Exception as exc:
#             print exc
#             #异常回滚
#             self.center_mysql.rollback()

    def get_log_ip(self):
        '''
            获取所有log的IP
        '''

        ip_list = {}
        sql = "select t1.*,t2.prefix as platform  from \
                            (select m.prefix as gamename,a.public_ip as source_ip,a.platform_id from assets a inner join main_category m \
                            on a.main_category_id = m.id  \
                            where a.is_del = 0 and (a.hostname like '%%%%web_balance%%%%' or a.hostname like '%%%%web_admin%%%%') ) t1 \
                            left join platform t2 on t1.platform_id = t2.id \
                            where t1.source_ip is not null group by source_ip\
                union all \
                select t1.*,t2.prefix as platform from \
                            (select m.prefix as gamename,a.public_ip as source_ip,a.platform_id from assets a inner join main_category m \
                            on a.main_category_id = m.id  \
                            where a.is_del = 0 and a.hostname like '%%%%gameserver%%%%' ) t1 \
                            left join platform t2 on t1.platform_id = t2.id \
                            where t1.source_ip is not null group by source_ip"

        result = self.center_mysql.query(sql)

        for info in result:
            if info['gamename'] is None or info['platform'] is None:
                continue

            if info['gamename'].encode('utf8') not in ip_list:
                ip_list[info['gamename'].encode('utf8')] = {}

            if info['platform'].encode('utf8') not in ip_list[
                    info['gamename'].encode('utf8')]:
                ip_list[info['gamename'].encode('utf8')][
                    info['platform'].encode('utf8')] = []

            ip_list[info['gamename'].encode('utf8')][info['platform'].encode(
                'utf8')].append(info['source_ip'].encode('utf8'))

        return ip_list

    def get_snap_ip(self):
        '''
            获取所有快照的IP
        '''
        ip_list = {}
        s_sql = "select f.prefix as platform,s.prefix as source_ip,m.prefix as gamename from sub_category s inner join main_category m \
                        on s.main_category_id = m.id \
                left join platform f on f.id = s.platform_id"

        result = self.center_mysql.query(s_sql)

        for info in result:
            if info['gamename'] is None or info['platform'] is None:
                continue

            if info['gamename'].encode('utf8') not in ip_list:
                ip_list[info['gamename'].encode('utf8')] = {}

            if info['platform'].encode('utf8') not in ip_list[
                    info['gamename'].encode('utf8')]:
                ip_list[info['gamename'].encode('utf8')][
                    info['platform'].encode('utf8')] = []

            ip_list[info['gamename'].encode('utf8')][info['platform'].encode(
                'utf8')].append(info['source_ip'].encode('utf8'))

#         print ip_list
        return ip_list

    def get_ip_list(self):
        '''
            整理IP列表
        '''
        try:
            ip_list = {}

            ip_list['basic'] = ['120.26.1.250']

            ip_list['mostsdk'] = ['120.26.13.150']

            ip_list['wanpay'] = ['112.124.116.44']

            log_list = self.get_log_ip()

            ip_list['log'] = log_list

            snap_list = self.get_snap_ip()
            ip_list['snap'] = snap_list

            return ip_list
        except Exception, e:
            print e
            print "异常"
            #异常回滚
            self.center_mysql.rollback()
Пример #36
0
    def run(self):

        db = Custom_MySQL(using='log')
        status = {'flag': 1}
        db.update(
            'batch_detail', 'batch_id="%s" and ip ="%s"' %
            (self.host['batch_id'], self.host['ip']), **status)
        db.commit()
        db.close()

        #建立连接
        self.ssh = paramiko.SSHClient()

        try:

            #如果没有密码就走public key
            if self.host.get('pwd', True) == True:
                privatekeyfile = os.path.expanduser('/root/.ssh/id_rsa')
                paramiko.RSAKey.from_private_key_file(privatekeyfile)

            #缺失host_knows时的处理方法
            known_host = "/root/.ssh/known_hosts"
            self.ssh.load_system_host_keys(known_host)
            self.ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())

            #os.system('/opt/local/junos/junos')
            #连接远程客户机器
            self.ssh.connect(hostname=self.host['ip'],
                             port=int(self.host['port']),
                             username=self.host['user'],
                             password=self.host['pwd'],
                             compress=True,
                             timeout=60)

            #获取远程命令执行结果
            stdin, stdout, stderr = self.ssh.exec_command(self.host['cmd'],
                                                          bufsize=65535,
                                                          timeout=60)
            temp = stdout.readlines()

            db = Custom_MySQL(using='log')
            status = {'flag': 2, 'result': json.dumps(temp)}
            db.update(
                'batch_detail', 'batch_id="%s" and ip ="%s"' %
                (self.host['batch_id'], self.host['ip']), **status)
            db.commit()
            db.close()

            #输出执行结果
            self.ssh.close()

        except:
            self.ssh.close()

            #print trace_back()
            #以防paramiko本身出问题,这里再用shell运行一次,如果还出问题再确认为问题
            cmd = "ssh -p %s -o StrictHostKeyChecking=no %s@%s  '%s'" % (
                self.host['port'], self.host['user'], self.host['ip'],
                self.host['cmd'])
            (status, output) = commands.getstatusoutput(cmd)
            if status == 0:
                db = Custom_MySQL(using='log')
                status = {'flag': 2, 'result': json.dumps(output)}
                db.update(
                    'batch_detail', 'batch_id="%s" and ip ="%s"' %
                    (self.host['batch_id'], self.host['ip']), **status)
                db.commit()
                db.close()

            else:

                db = Custom_MySQL(using='log')
                status = {'flag': -1, 'result': 'faild'}
                db.update(
                    'batch_detail', 'batch_id="%s" and ip ="%s"' %
                    (self.host['batch_id'], self.host['ip']), **status)
                db.commit()
                db.close()
Пример #37
0
def run_task(self, task_param):

    mysql = Custom_MySQL(using='etl_manage')
    mysql.begin()

    try:
        '''
        业务代码块放下方
        '''
        dir_param = {
            'game': task_param['game'],
            'platform': task_param['platform'],
            'log_date': task_param['log_date'],
            'log_name': task_param['log_name']
        }

        filename_dict = {
            'log_name': task_param['log_name'],
            'log_time': task_param['log_time']
        }
        '''
        游戏\平台\日期\业务日志名\日志或者md5文件
        '''
        log_dir = "/%(game)s/%(platform)s/%(log_date)s/%(log_name)s/" % dir_param

        lzo_file_name = "%(log_name)s_%(log_time)s.txt" % filename_dict

        local_log_dir = '/tmp' + log_dir

        dump_sql = task_param['dump_sql']
        dump_sql = dump_sql.replace('{table_name}', task_param['table_name'])
        dump_sql = dump_sql.replace('{partition_name}',
                                    task_param['partition_name'])
        dump_sql = dump_sql.replace('{db_name}', task_param['db_name'])
        print(dump_sql)

        result = mysql.dump(sql, local_log_dir + lzo_file_name)
        #print(result)
        '''
        将任务标识为加载文件完成:2
        '''
        datas = {'load_status': 2}
        where = {}
        where['id'] = int(task_param['id'])

        mysql.update('etl_data_log', ' id = %(id)d' % where, **datas)
        mysql.commit()
        mysql.close()
        return True

    except Exception as exc:
        print(exc)
        mysql.rollback()
        raise self.retry(exc=exc, countdown=60)
Пример #38
0
 def __init__(self):
     self.center_mysql = Custom_MySQL(using='center_app')
Пример #39
0
def main():
   tables = excel_table_byindex()
   
   db = Custom_MySQL(using='center_app')
   
   for row in tables:
       print row
       game = row['name'].split('_')[0]
       sql ='select id from main_category where prefix= %s'
       p =(game,)
       result= db.get(sql,*p)
                          
       if result:
           print result['id']
                     
           param={}
           param['main_category_id']= result['id']
           
           if row['inner'] !='' and row['pub']!="":
               
               sql='select count(*) as count from assets where inner_ip="%s" or public_ip ="%s"'%(row['inner'],row['pub'])
               count = db.count(sql)['count']
               if count == 0:
                   param['inner_ip'] = row['inner']
                   param['public_ip'] = row['pub']
                   param['hostname'] = row['name']
                   param['wxsn'] = row['name']
                   db.insert('assets',**param)
               else:
                   param['hostname'] = row['name']
                   db.update('assets','inner_ip="%s"'%row['inner'],**param)
               
           elif row['inner'] !='':
               sql='select count(*) as count from assets where inner_ip="%s"'%row['inner']
               count = db.count(sql)['count']
               if count == 0:
                   param['inner_ip'] = row['inner']
                   param['hostname'] = row['name']
                   param['wxsn'] = row['name']
                   db.insert('assets',**param)
               else:
                   param['hostname'] = row['name']
                   db.update('assets','inner_ip="%s"'%row['inner'],**param)
               
           elif row['pub']!="":
               sql='select count(*) as count from assets where public_ip="%s"'%row['pub']
               count = db.count(sql)['count']
               if count ==0:
                   param['public_ip'] = row['pub']
                   param['hostname'] = row['name']
                   param['wxsn'] = row['name']
                   
                   db.insert('assets',**param)
               else:
                   param['hostname'] = row['name']
                   db.update('assets','public_ip="%s"'%row['pub'],**param)
           else:
                print 'pub and inner are both empty' 
Пример #40
0
 def get_idcs(idc_name):
     db = Custom_MySQL(using='center_app')
     sql ="select id as idc_id,prefix  from idc where name like  %s and is_del = 0" 
     p=(idc_name+'%',)
     return db.query(sql,*p)
Пример #41
0
def run_task(self, task_param):

    mysql = Custom_MySQL(using='hadoops2')
    mysql_etl = Custom_MySQL(using='etl_manage')
    mysql.begin()
    mysql_etl.begin()
    where = {'id': int(task_param['id'])}
    try:
        hive = Custom_Hive(using='ares_dw')

        game = task_param['game']
        platform = task_param['platform']
        table_name = task_param['table_name']
        log_date = task_param['log_date']
        prefix_sql = task_param['prefix_sql']
        exec_sql = task_param['exec_sql']
        post_sql = task_param['post_sql']
        date_cycle = task_param['date_cycle']
        random_str = str(random.randint(0, 999999999))
        stimes = str(int(time.time()))

        tmp_file_dir = "/tmp/tmp/%s/%s/%s" % (game, platform, log_date)
        #创建本地目录
        if not os.path.exists(tmp_file_dir):
            os.makedirs(tmp_file_dir)

        tmp_file = "%s/%s_%s_%s_%s.txt" % (tmp_file_dir, table_name,
                                           date_cycle, random_str, stimes)
        hql_conf = "SET hive.support.concurrency=false;SET hive.exec.compress.output=true;" \
                   "SET mapred.output.compression.codec=com.hadoop.compression.lzo.LzopCodec; "
        '''
        将任务标示为开始执行:1
        '''
        datas = {
            'status':
            1,
            'start_time':
            str(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
        }
        mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas)
        mysql_etl.commit()

        #执行前置sql,将数据临时写入本地,用于mysql load数据
        if prefix_sql is not None:
            result = mysql.delete_by_sql(prefix_sql)
            logger.info('exec prefix_sql: delete old data {0}'.format(
                result['output']))

            if result['status'] == 0:
                '''
                将任务标示为删除临时文件完成:2
                '''
                datas = {'status': 2}
                mysql_etl.update('dm2report_log', ' id = %(id)d' % where,
                                 **datas)
                mysql_etl.commit()
                '''
                开始执行hive ql,将数据dump到本地
                '''
                result = hive.dump(hql_conf + exec_sql, tmp_file)
                logger.info('exec exec_sql: dump data {0}'.format(
                    result['output']))

                if result['status'] == 0 and True == os.path.exists(
                        '%s' % tmp_file):
                    '''
                    将任务标示为dump hive数据完成:3
                    '''
                    datas = {'status': 3, 'tmp_file_name': tmp_file}
                    mysql_etl.update('dm2report_log', ' id = %(id)d' % where,
                                     **datas)
                    mysql_etl.commit()

                    #执行后置sql
                    if post_sql is not None:
                        post_sql = post_sql.replace('{dir_path}', tmp_file)
                        post_sql = post_sql.replace('{table_name}',
                                                    task_param['table_name'])
                        post_sql = post_sql.replace('{db_name}',
                                                    task_param['db_name'])

                        result = mysql.load(post_sql)
                        logger.info(
                            'exec post_sql: load data to hdfs {0}'.format(
                                result['output']))

                        if result['status'] == 0:
                            '''
                            将任务标识为录入mysql完成:4
                            '''
                            datas = {
                                'status':
                                4,
                                'end_time':
                                str(datetime.datetime.now().strftime(
                                    '%Y-%m-%d %H:%M:%S'))
                            }
                            mysql_etl.update('dm2report_log',
                                             ' id = %(id)d' % where, **datas)
                        else:
                            logger.error(
                                'Error Code %s : %s  Cmd: %s' %
                                (result['status'], result['output'], post_sql))
                else:
                    logger.error(
                        'Error Code %s : %s  Cmd: %s' %
                        (result['status'], result['output'], exec_sql))
            else:
                logger.error('Error Code %s : %s  Cmd: %s' %
                             (result['status'], result['output'], prefix_sql))
                '''
                执行失败,将其状态标为未执行:0
                '''
                datas = {'status': 0}
                mysql_etl.update('dm2report_log', ' id = %(id)d' % where,
                                 **datas)
        '''
        将任务标示为:(模拟) 已从任务队列中移除
        '''
        datas = {'in_queue': 0}
        update_result = mysql_etl.update('dm2report_log',
                                         ' id = %(id)d' % where, **datas)
        # 如果数据库更新失败,再调用一次。 如果还是失败,等待自动修复机制,但这样的概率应该很小了。
        if update_result != 1:
            mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas)

        mysql_etl.commit()
        mysql.commit()
        mysql_etl.close()
        mysql.close()

        return True

    except Exception as exc:
        logger.error('dm2report error: %s' % exc)
        mysql_etl.rollback()
        mysql.rollback()

        datas = {'in_queue': 0, 'status': 0}
        mysql_etl.update('dm2report_log', ' id = %(id)d' % where, **datas)
        mysql_etl.commit()

        mysql_etl.close()
        mysql.close()
        raise self.retry(exc=exc, countdown=60)
Пример #42
0
def query_mysql(sql):

    mysql = Custom_MySQL(using='etl_manage')

    try:
        mysql.begin()
        result = mysql.query(sql)

        mysql.commit()
        mysql.close()

        return result

    except Exception as exc:
        #回滚
        mysql.rollback()
        print(exc)
        mysql.close()
Пример #43
0
def run_task(self, task_param):

    mysql = Custom_MySQL(using='etl_manage')
    hive = Custom_Hive(using='ares_dw')
    redis = Custom_Redis(using='etl_manage')
    mysql.begin()
    where = {'id': int(task_param['id'])}

    try:
        log_date = datetime.datetime.strptime(task_param['log_date'],
                                              '%Y%m%d').strftime("%Y-%m-%d")
        log_date_1 = (
            datetime.datetime.strptime(task_param['log_date'], '%Y%m%d') -
            datetime.timedelta(days=1)).strftime("%Y-%m-%d")

        dir_param = {
            'game': task_param['game'],
            'platform': task_param['platform'],
            'log_date': task_param['log_date'],
            'log_name': task_param['log_name']
        }
        filename_dict = {
            'log_name': task_param['log_name'],
            'log_time': task_param['log_time']
        }
        index_dict = {
            'db_name': task_param['db_name'],
            'table_name': task_param['table_name'],
            'platform': task_param['platform'],
            'log_date': log_date
        }
        partition = {'platform': task_param['platform'], 'log_date': log_date}

        index_dict_1 = {
            'db_name': task_param['db_name'],
            'table_name': task_param['table_name'],
            'platform': task_param['platform'],
            'log_date': log_date_1
        }
        partition_1 = {
            'platform': task_param['platform'],
            'log_date': log_date_1
        }

        log_dir = "/%(game)s/%(platform)s/%(log_date)s/%(log_name)s" % dir_param

        flag = task_param['flag']

        file_name = "%(log_name)s" % filename_dict
        file_name_txt = "%(log_name)s_%(log_time)s_result.txt" % filename_dict
        file_name_lzo = "%(log_name)s_%(log_time)s_result.txt.lzo" % filename_dict
        file_name_row = "%(log_name)s_%(log_time)s_result.txt.row" % filename_dict
        index_dir_name = "%(db_name)s.db/%(table_name)s/plat_form=%(platform)s/log_date=%(log_date)s/" % index_dict
        partition_name = "plat_form='%(platform)s',log_date='%(log_date)s'" % partition

        project_path = os.getcwd()

        log_time = task_param['log_time']
        do_rate = task_param['do_rate']

        #if flag == "snap" or (do_rate == "1day" and flag == "log")
        if flag == "snap":
            index_dir_name = "%(db_name)s.db/%(table_name)s/plat_form=%(platform)s/log_date=%(log_date)s/" % index_dict_1
            partition_name = "plat_form='%(platform)s',log_date='%(log_date)s'" % partition_1

        #从redis中,获取当前数据对应存储到哪块磁盘
        if redis.get("disk_xml") is None:
            disk_tmp = open('/data/etl_manage/conf/disk_game.xml', 'r')
            redis.set("disk_xml", str(disk_tmp.read()))

        disk_list = str(redis.get("disk_xml"))
        root = ET.fromstring(disk_list)
        disk = ""
        for gameinfo in root.findall('game'):
            if gameinfo.get('name') == task_param['game']:
                disk = gameinfo.get('disk')
                continue

        #local_log_dir = '/disk1/tmp_data'+log_dir
        local_log_dir = '/' + disk + '/data' + log_dir
        logger.info('local_log_dir: {0}'.format(local_log_dir))

        #判断目录是否存在
        if os.path.exists('%s' % local_log_dir):

            #排除同名文件存在的可能,同时为修复执行提供方便
            if os.path.exists('%s/%s' % (local_log_dir, file_name_txt)):
                cmd_remove = 'rm -f %s/%s*' % (local_log_dir, file_name_txt)
                logger.info('remove history file: {0}'.format(cmd_remove))
                remove_result = cmd.run(cmd_remove)
                if remove_result['status'] != 0:
                    logger.error('Error Code %s : %s  Cmd: %s' %
                                 (remove_result['status'],
                                  remove_result['output'], cmd_remove))

            datas = {'load_status': 1}
            mysql.update('file2dw_log', ' id = %(id)d' % where, **datas)
            mysql.commit()

            cmd_merge = '/bin/bash %s/mergefile2dw/merge_data.sh %s %s %s %s %s' \
                        '' % (project_path, local_log_dir, local_log_dir, file_name, do_rate, log_time)
            logger.info('merge data: {0}'.format(cmd_merge))
            merge_result = cmd.run(cmd_merge)
            logger.info('merge data result {0}'.format(merge_result['output']))

            if merge_result['status'] == 0:

                #读取总条数
                row = open('%s/%s' % (local_log_dir, file_name_row)).read()
                '''
                合并数据完成:2
                '''
                datas = {'load_status': 2, 'row_num': int(row)}
                mysql.update('file2dw_log', ' id = %(id)d' % where, **datas)
                mysql.commit()
                '''
                压缩数据
                '''
                cmd_compress = '/bin/bash %s/mergefile2dw/compress_data.sh %s %s' % (
                    project_path, local_log_dir, file_name_txt)
                logger.info('compress data: {0}'.format(cmd_compress))
                compress_result = cmd.run(cmd_compress)
                if compress_result['status'] != 0:
                    logger.error('Error Code %s : %s Cmd: %s' %
                                 (compress_result['status'],
                                  compress_result['output'], cmd_compress))
                else:
                    '''
                    压缩数据完成:3
                    '''
                    datas = {'load_status': 3}
                    mysql.update('file2dw_log', ' id = %(id)d' % where,
                                 **datas)
                    mysql.commit()

                    #执行load之前,删除同名文件,防止同一个文件出现两次的可能
                    cmd_remove = '/bin/bash %s/mergefile2dw/remove_damaged_file.sh %s %s' % (
                        project_path, index_dir_name, file_name_lzo)
                    logger.info('remove damaged files: {0}'.format(cmd_remove))
                    remove_result = cmd.run(cmd_remove)

                    if remove_result['status'] != 0:
                        logger.error('Error Code %s : %s  Cmd: %s' %
                                     (remove_result['status'],
                                      remove_result['output'], cmd_remove))
                    '''
                    文件加载到hive中
                    '''
                    load_sql = task_param['load_sql']
                    load_sql = load_sql.replace(
                        '{dir_path}', local_log_dir + "/" + file_name_lzo)
                    load_sql = load_sql.replace('{table_name}',
                                                task_param['table_name'])
                    load_sql = load_sql.replace('{partition_name}',
                                                '%s' % partition_name)
                    load_sql = load_sql.replace('{db_name}',
                                                task_param['db_name'])

                    logger.info('hive load SQL: {0}'.format(load_sql))
                    result = hive.load(load_sql)
                    logger.info('hive load result {0}'.format(
                        result['output']))

                    if result['status'] == 0:
                        '''
                        将任务标识为加载文件完成:4
                        '''
                        datas = {'load_status': 4}
                        mysql.update('file2dw_log', ' id = %(id)d' % where,
                                     **datas)
                        mysql.commit()
                        '''
                        建立索引,否则lzo将不支持split
                        '''
                        #print index_dir_name
                        cmd_index = '/bin/bash %s/mergefile2dw/create_lzo_indexer.sh %s %s' % (
                            project_path, index_dir_name, file_name_lzo)
                        logger.info('create lzo index: {0}'.format(cmd_index))
                        index_result = cmd.run(cmd_index)

                        if index_result['status'] != 0:
                            logger.error('Error Code %s : %s  Cmd: %s' %
                                         (index_result['status'],
                                          index_result['output'], cmd_index))
                        else:
                            if "create index success" in index_result[
                                    'output']:
                                '''
                                将任务标识为建立lzo索引完成:5
                                '''
                                datas = {'load_status': 5}
                                mysql.update('file2dw_log',
                                             ' id = %(id)d' % where, **datas)
                            else:
                                '''
                                如果load数据失败,则删除半途出现错误的文件,防止hive查询的时候报错
                                '''
                                cmd_remove = '/bin/bash %s/mergefile2dw/remove_damaged_file.sh %s %s' % (
                                    project_path, index_dir_name,
                                    file_name_lzo)
                                logger.info('remove damaged files: {0}'.format(
                                    cmd_remove))
                                remove_result = cmd.run(cmd_remove)

                                if remove_result['status'] != 0:
                                    logger.error(
                                        'Error Code %s : %s Cmd: %s' %
                                        (remove_result['status'],
                                         remove_result['output'], cmd_remove))
                    else:
                        logger.error(
                            'Error Code %s : %s Cmd: %s' %
                            (result['status'], result['output'], load_sql))
            else:
                '''
                合并数据失败
                '''
                datas = {'load_status': 0}
                mysql.update('file2dw_log', ' id = %(id)d' % where, **datas)
                logger.error('Error Code %s : %s Cmd: %s' %
                             (merge_result['status'], merge_result['output'],
                              merge_result))
        '''
        执行完毕,模拟从队列中清楚任务:0
        '''
        datas = {'in_queue': 0}
        update_result = mysql.update('file2dw_log', ' id = %(id)d' % where,
                                     **datas)
        # 如果更新失败,则再调用一次,如果还是失败,则等待自动修复机制,但这种概率很小了
        if update_result != 1:
            mysql.update('file2dw_log', ' id = %(id)d' % where, **datas)

        mysql.commit()
        mysql.close()
        return True

    except Exception as exc:
        logger.error('mergefile2dw error: %s' % exc)
        mysql.rollback()

        datas = {'in_queue': 0, 'load_status': 0}
        mysql.update('file2dw_log', ' id = %(id)d' % where, **datas)
        mysql.commit()

        mysql.close()
        raise self.retry(exc=exc, countdown=30)
Пример #44
0
 def __init__(self):
     self.mysql = Custom_MySQL(using='etl_manage')
     self.source_game = 'ares'
Пример #45
0
def run_task(self, task_param):

    redis = Custom_Redis(using='etl_manage')
    mysql = Custom_MySQL(using='hadoops2')
    mysql_etl = Custom_MySQL(using='etl_manage')
    mysql.begin()
    mysql_etl.begin()

    where = {'id': int(task_param['id'])}
    datas = {'load_status': 0}

    try:
        '''
        业务代码块放下方
        '''
        dir_param = {'game': task_param['game'], 'platform': task_param['platform'],
                     'log_date': task_param['log_date'], 'log_name': task_param['log_name']}

        filename_dict = {'log_name': task_param['log_name'], 'log_time': task_param['log_time'], 'source_ip': task_param['source_ip']}
        prefix_sql = task_param['prefix_sql']
        post_sql = task_param['post_sql']

        log_dir = "/%(game)s/%(platform)s/%(log_date)s/%(log_name)s/" % dir_param
        file_name = "%(log_name)s_%(source_ip)s_%(log_time)s.txt" % filename_dict

        #从redis中,获取当前数据对应存储到哪块磁盘
        if redis.get("disk_xml") is None:
            disk_tmp = open('/data/etl_manage/conf/disk_game.xml', 'r')
            redis.set("disk_xml", str(disk_tmp.read()))

        disk_list = str(redis.get("disk_xml"))
        root = ET.fromstring(disk_list)
        disk = ""
        for gameinfo in root.findall('game'):
            if gameinfo.get('name') == task_param['game']:
                disk = gameinfo.get('disk')
                continue

        #local_log_dir = '/disk1/tmp_data'+log_dir
        local_log_dir = '/'+disk+'/data'+log_dir

        #判断要录入的文件是否存在,如果存在则执行,否则不执行
        if os.path.exists('%s%s' % (local_log_dir, file_name)):
            '''
            将任务标识为开始执行:1
            '''
            datas['load_status'] = 1
            mysql_etl.update('file2mysql_log', ' id = %(id)d' % where, **datas)
            mysql_etl.commit()
            logger.info('start load data')
            #执行前置sql
            if prefix_sql is not None and prefix_sql != '':
                mysql.query(prefix_sql)
 
            '''
            执行load数据进mysql
            '''
            load_sql = task_param['load_sql']
            load_sql = load_sql.replace('{dir_path}', local_log_dir+file_name)
            load_sql = load_sql.replace('{table_name}', task_param['table_name'])
            load_sql = load_sql.replace('{db_name}', task_param['db_name'])
        
            result = mysql.load(load_sql)
            logger.info('load data to mysql: {0}'.format(result['output']))

            #判断录入mysql是否成功
            if result['status'] == 0:
                #执行后置sql
                if post_sql is not None and post_sql != '':
                    post_sql = post_sql.replace('{table_name}', task_param['table_name'])
                    post_sql = post_sql.replace('{db_name}', task_param['db_name'])
                    mysql.query(post_sql)

                '''
                将任务标识为录入mysql完成:3
                '''
                datas['load_status'] = 3

            else:

                logger.error('Error Code %s : %s  Cmd: %s' % (result['status'], result['output'], load_sql))
                '''
                录入mysql失败,将任务标示为未执行:0
                '''
                datas['load_status'] = 0

        '''
        将任务标示为:(模拟) 已从任务队列中移除
        '''
        datas['in_queue'] = 0
        update_result = mysql_etl.update('file2mysql_log', ' id = %(id)d' % where, **datas)
        # 如果更新失败,则再调用一次,如果还是失败,则等待自动修复机制,但这种概率很小了
        if update_result != 1:
            mysql_etl.update('file2mysql_log', ' id = %(id)d' % where, **datas)

        mysql_etl.commit()
        mysql.commit()
        mysql_etl.close()
        mysql.close()
        return True

    except Exception as exc:
        logger.error('file2mysql error: %s' % exc)
        mysql_etl.rollback()
        mysql.rollback()
        
        datas = {'in_queue': 0, 'load_status': 0}
        mysql_etl.update('file2mysql_log', ' id = %(id)d' % where, **datas)
        mysql_etl.commit()
        
        mysql_etl.close()
        mysql.close()

        raise self.retry(exc=exc, countdown=60)
Пример #46
0
def run_task(self, task_param):

    redis = Custom_Redis(using='etl_task')
    mysql = Custom_MySQL(using='etl_manage')
    mysql.begin()

    datas = {'status': 0}
    where = {'id': int(task_param['id'])}
    hive = Custom_Hive(using='hadoops2')

    task_key_exc = ""

    try:

        game = task_param['game']
        platform = task_param['platform']

        task_dict = {'log_name': task_param['log_name'], 'do_rate': task_param['do_rate'],
                     'log_date': task_param['log_date'], 'task_date': task_param['task_date']}

        task_key_tmp = "%(log_name)s%(do_rate)s%(log_date)s%(task_date)s" % task_dict

        task_key_md5 = hashlib.md5()
        task_key_md5.update(task_key_tmp)
        task_key_md5_result = task_key_md5.hexdigest()

        task_key = "dw2dm_"+str(game)+"_"+str(platform)+"_"+str(task_key_md5_result)
        task_key_exc = task_key

        '''
        celery 本身的机制存在漏洞,会将一个已经完成任务再次分配给其他的worker,致使同一个任务执行多次
        为防止此种现象,在任务开始执行时,将任务的“唯一标示”写入redis中,标注已执行
        '''
        #如果task_key is None, 则表示该条任务没有执行过,正常执行即可
        #如果task_key = 0, 则表示该条任务上次执行失败,允许重复执行
        if redis.get(task_key) == "0" or redis.get(task_key) is None:

            exec_sql = task_param['exec_sql']
            log_date = (datetime.datetime.strptime(task_param['log_date'], '%Y%m%d')).strftime("%Y-%m-%d")

            #prefix_sql = task_param['prefix_sql']
            #post_sql = task_param['post_sql']

            index_dict = {'db_name': task_param['db_name'], 'table_name': task_param['table_name'],
                          'platform': task_param['platform'], 'log_date': log_date}
            index_dir_name = "%(db_name)s.db/%(table_name)s/plat_form=%(platform)s/log_date=%(log_date)s/" % index_dict
            #用于删除索引
            del_index_dir_name = "%(db_name)s.db/%(table_name)s/plat_form=%(platform)s/log_date=%(log_date)s/" % index_dict

            if platform == 'all':
                index_dict = {'db_name': task_param['db_name'], 'table_name': task_param['table_name'],
                              'log_date': log_date}
                #用于建立索引(建立索引时,不能使用*通配符,所以仅指定到表名)
                index_dir_name = "%(db_name)s.db/%(table_name)s/" % index_dict
                #用于删除索引,虽然建立的时候指定到表名,但删除的时候,仅删除当天的
                del_index_dir_name = "%(db_name)s.db/%(table_name)s/*/log_date=%(log_date)s/*" % index_dict

            hql_conf = "set hive.exec.dynamic.partition.mode=nonstrict;" \
                       "set hive.exec.compress.output=true;" \
                       "set mapred.output.compress=true;" \
                       "set mapred.output.compression.codec=com.hadoop.compression.lzo.LzopCodec;" \
                       "set io.compression.codecs=com.hadoop.compression.lzo.LzopCodec; "

            #获取项目根路径:/data/etl_manage
            project_path = os.getcwd()

            '''
            将任务标示为开始执行:1
            '''
            datas['status'] = 1
            datas['start_time'] = str(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
            mysql.update('dw2dm_log', ' id = %(id)d' % where, **datas)
            mysql.commit()
            #在redis标注 任务正在执行
            redis.set(task_key, 1)

            #执行正式的 select xx insert xx 语句
            result = hive.select_insert(hql_conf+exec_sql)
            logger.info('exec exec_sql: select xxx insert xxx {0}'.format(result['output']))

            if result['status'] != 0:
                logger.error('Error Code %s : %s  Cmd: %s' % (result['status'], result['output'], exec_sql))
                '''
                执行失败,将其状态标为未执行:0
                '''
                datas['status'] = 0
                #在redis标注 任务未执行
                redis.set(task_key, 0)
            else:

                '''
                将任务标示为执行hive ql, select xxx insert xxx完成:2
                '''
                datas['status'] = 2
                datas.pop('start_time')
                mysql.update('dw2dm_log', ' id = %(id)d' % where, **datas)
                mysql.commit()

                '''
                建立索引,否则lzo将不支持split
                '''
                cmd_index = '/bin/bash %s/dw2dm/create_lzo_indexer.sh %s' % (project_path, index_dir_name)
                logger.info('create lzo index: {0}'.format(cmd_index))
                index_result = cmd.run(cmd_index)

                if index_result['status'] != 0:
                    logger.error('Error Code %s : %s  Cmd: %s' % (index_result['status'], index_result['output'], cmd_index))
                    #在redis标注 任务未执行
                    redis.set(task_key, 0)
                else:
                    if "create index success" in index_result['output']:
                        '''
                        将任务标识为建立lzo索引完成:3
                        '''
                        datas['status'] = 3
                        datas['end_time'] = str(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
                        #在redis标注 任务已完成且成功
                        redis.set(task_key, 2)
                    else:
                        '''
                        如果load数据失败,则删除半途出现错误的文件,防止hive查询的时候报错
                        '''
                        cmd_remove = '/bin/bash %s/dw2dm/remove_damaged_file.sh %s' % (project_path, del_index_dir_name)
                        logger.info('remove damaged files: {0}'.format(cmd_remove))
                        remove_result = cmd.run(cmd_remove)

                        if remove_result['status'] != 0:
                            logger.error('Error Code %s : %s Cmd: %s' % (remove_result['status'], remove_result['output'], cmd_remove))
                            #在redis标注 任务未执行
                            redis.set(task_key, 0)

        #如果task_key=2, 则标示该条任务已经运行成功
        elif redis.get(task_key) == "2":
            datas['status'] = 3
        #该条任务正在运行中
        else:
            return True

        '''
        执行完毕,模拟从队列中清除任务:0
        '''
        datas['in_queue'] = 0
        update_result = mysql.update('dw2dm_log', ' id = %(id)d' % where, **datas)
        # 如果更新失败,则再调用一次,如果还是失败,则等待自动修复机制,但这种概率很小了
        if update_result != 1:
            mysql.update('dw2dm_log', ' id = %(id)d' % where, **datas)

        mysql.commit()
        mysql.close()
        return True

    except Exception as exc:
        logger.error('dw2dm_log error: %s' % exc)
        redis.set(task_key_exc, 0)
        mysql.rollback()

        datas = {'in_queue': 0, 'status': 0}
        mysql.update('dw2dm_log', ' id = %(id)d' % where, **datas)

        mysql.commit()
        mysql.close()
        raise self.retry(exc=exc, countdown=60)
Пример #47
0
#coding=utf-8
"""
auth: suguoxin
mail: [email protected]
create_time: 2016-01-01 11:20:00
used: dm2report 任务执行

last_update:2016-04-28 14:48:00
"""

import datetime
import sys
from dm2report.tasks import run_task
from custom.db.mysql import Custom_MySQL

mysql = Custom_MySQL(using='etl_manage')

game = sys.argv[1]
platform = sys.argv[2]
do_rate = sys.argv[3]
now_time = sys.argv[4]
now_time = datetime.datetime.strptime(now_time, '%Y-%m-%d %H:%M:%S')

#数据日期,格式如:20151015
log_date = now_time.strftime('%Y%m%d')

tasks = mysql.query(
    'select * from dm2report_log where do_rate="%s" and task_date="%s" and game="%s" '
    'and platform="%s" and exec_num<4 and in_queue=0 and status=0'
    '' % (do_rate, log_date, game, platform))
Пример #48
0
class CopyConfig():
    
    def __init__(self):
        self.mysql = Custom_MySQL(using='etl_manage')
        self.source_game = 'ares'
    
    def get_all_task(self,task_name):
        
        condition = 'game = "%s" ' % self.source_game
        if task_name is not None:
            condition += 'and task_name="%s"' % task_name
        ##appstoremix is_delete = 0 and
        task_list = self.mysql.query("select * from dw2dm where  platform='all' and %s" % (condition))
        return task_list

    def get_structure(self,id,game,plat_form):
        '''
         获取当前游戏的,参数structure.如不存在则会添加
        :param id:
        :param game:
        :param plat_form:
        :return:
        '''
        structure=self.mysql.get("select * from structure where is_delete=0 and id=%s",id)
        if structure!=None:
            t_structure=[
                structure['type'],
                structure['flag'],
                structure['db_type'],
                game,
                plat_form,
                #structure['platform'],
                #'db_name':structure['db_name'],
                structure['table_name'],
                structure['column_name'],
                ##structure['partition_name'],
                ##structure['partition_rule'],
                ##structure['index_name'],
                structure['create_table_sql'],
                structure['user_id'],
                0,
                datetime.datetime.today().strftime("%Y-%m-%d")
            ]
            game_db=None
            if structure['type']!=None and str(structure['type']).__eq__('dw'):
                game_db='%s_dw' % game
                t_structure.append(game_db)
            elif structure['type']!=None and str(structure['type']).__eq__('dm'):
                game_db='%s_dm' % game
                t_structure.append(game_db)
            elif structure['type']!=None and str(structure['type']).__eq__('report'):
                game_db='report_%s' % game
                t_structure.append(game_db)
            exis_row=self.mysql.query("select id from structure where platform='%s' and is_delete=0 and db_name='%s' and platform='all' and table_name='%s' and db_type='%s'"%(plat_form,game_db,str(structure['table_name']),str(structure['db_type'])))
            if len(exis_row)>0:
                return  int(exis_row[0]['id'])
            else:
                return self.save_newstructure(t_structure)


    def save_new_task(self,task):
        self.mysql.insert("dw2dm",**task)
        self.mysql.commit()
    def save_newstructure(self,structure):
        query='INSERT INTO structure(type,flag,db_type,game,platform,table_name,column_name,create_table_sql,user_id,is_delete,create_date,db_name) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
        rowNum=self.mysql.execute(query,*tuple(structure))
        self.mysql.commit()
        return rowNum
    def run(self,game,task_name=None,plat_form="all"):
        print "start copy"
        task_list = self.get_all_task(task_name)
        
        for task in task_list:
            form_ids=""
            for form_id_str in task['from_id'].split(","):
                if len(str(form_ids))>0:
                    form_ids=form_ids+","+str(self.get_structure(int(form_id_str),game,plat_form))
                else:
                    form_ids=str(self.get_structure(int(form_id_str),game,plat_form))
            target_id=self.get_structure(int(task['target_id']),game,plat_form)
            t_task = {
                'game':game,
                ##'platform':task['platform'],
                'platform':plat_form,
                'log_name':task['log_name'],
                'do_rate':task['do_rate'],
                'priority':task['priority'],
                'prefix_sql':task['prefix_sql'],
                'exec_sql':task['exec_sql'].replace("%s_dw" % self.source_game,"%s_dw" % game).replace("%s_dm" % self.source_game,"%s_dm" % game),
                'post_sql':task['post_sql'],
                'from_id':form_ids,
                'target_id':target_id,
                'create_date':datetime.datetime.today().strftime("%Y-%m-%d"),
                'comment':task['comment'],
                'grouped':task['grouped'],
                'is_delete':task['is_delete'],
                'user_id':task['user_id']
            }
            self.save_new_task(t_task)
        
        self.mysql.close()
        print "over"

    def add_structure(self,game,plat_form):
        platforms_str=plat_form.split(",")
        structures=self.mysql.query("select * from structure where platform='all' and is_delete=0 and flag='log' and game='ares' and type in ('report','dm')")
        for structure in structures:
            for platform in platforms_str:
                t_structure=[
                    structure['type'],
                    structure['flag'],
                    structure['db_type'],
                    game,
                    platform,
                    #structure['platform'],
                    #'db_name':structure['db_name'],
                    structure['table_name'],
                    structure['column_name'],
                    ##structure['partition_name'],
                    ##structure['partition_rule'],
                    ##structure['index_name'],
                    structure['create_table_sql'],
                    structure['user_id'],
                    0,
                    datetime.datetime.today().strftime("%Y-%m-%d")
                ]
                game_db=None
                if structure['type']!=None and str(structure['type']).__eq__('dw'):
                    game_db='%s_dw' % game
                elif structure['type']!=None and str(structure['type']).__eq__('dm'):
                    game_db='%s_dm' % game
                elif structure['type']!=None and str(structure['type']).__eq__('report'):
                    game_db='report_%s' % game
                t_structure.append(game_db)
                self.save_newstructure(t_structure)
#coding=utf-8
"""
auth: suguoxin
mail: [email protected]
create_time: 2015-11-16 15:28:00
used: 执行file2mysql任务

last_update: 2016-04-28 14:57:00
"""

import sys
import datetime
from file2mysql.tasks import run_task
from custom.db.mysql import Custom_MySQL

mysql = Custom_MySQL(using='etl_manage')

#执行频次,通过命令行参数获得
game = sys.argv[1]
platform = sys.argv[2]
do_rate = sys.argv[3]
now_time = sys.argv[4]
now_time = datetime.datetime.strptime(now_time, '%Y-%m-%d %H:%M:%S')

#数据日期,格式如:20151015
log_date = now_time.strftime('%Y%m%d')
#数据时间点(每五分钟),格式如:0005、2400
log_time = now_time.strftime("%H%M")

sql = "select distinct a.* from (select * from file2mysql_log where game='%s' and platform='%s' and load_status=0 " \
      "and do_rate='%s' and task_date='%s' and in_queue=0 and exec_num<4 and retry_num <6 and log_time<='%s') as a left outer join " \
Пример #50
0
class CopyConfig():
    def __init__(self):
        self.mysql = Custom_MySQL(using='etl_manage')
        self.source_game = 'ares'

    def get_all_task(self, task_name):

        condition = 'game = "%s" ' % self.source_game
        if task_name is not None:
            condition += 'and task_name="%s"' % task_name

        task_list = self.mysql.query(
            "select * from dm2report where is_delete = 0 and %s" % condition)
        return task_list

    def get_structure(self, id, game):
        structure = self.mysql.get(
            "select * from structure where is_delete=0 and id=%s", id)
        if structure != None:
            t_structure = [
                structure['type'],
                structure['flag'],
                structure['db_type'],
                game,
                structure['platform'],
                #'db_name':structure['db_name'],
                structure['table_name'],
                structure['column_name'],
                ##structure['partition_name'],
                ##structure['partition_rule'],
                ##structure['index_name'],
                structure['create_table_sql'],
                structure['user_id'],
                0,
                datetime.datetime.today().strftime("%Y-%m-%d")
            ]
            game_db = None
            if structure['db_type'] != None and str(
                    structure['db_type']).__eq__('hive'):
                game_db = '%s_dw' % game
                t_structure.append(game_db)
            elif structure['db_type'] != None and str(
                    structure['db_type']).__eq__('mysql'):
                game_db = 'report_%s' % game
                t_structure.append(game_db)
            exis_row = self.mysql.query(
                "select id from structure where platform='all' and user_id='wxx' and is_delete=0 and db_name='%s' and table_name='%s' and db_type='%s'"
                % (game_db, str(
                    structure['table_name']), str(structure['db_type'])))
            if len(exis_row) > 0:
                return int(exis_row[0]['id'])
            else:
                return self.save_newstructure(t_structure)

    def save_new_task(self, task):
        self.mysql.insert("dm2report", **task)
        self.mysql.commit()

    def save_newstructure(self, structure):
        query = 'INSERT INTO structure(type,flag,db_type,game,platform,table_name,column_name,create_table_sql,user_id,is_delete,create_date,db_name) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
        rowNum = self.mysql.execute(query, *tuple(structure))
        self.mysql.commit()
        return rowNum

    def run(self, game, task_name=None):
        print "start copy"
        task_list = self.get_all_task(task_name)

        for task in task_list:
            form_id = self.get_structure(int(task['from_id']), game)
            target_id = self.get_structure(int(task['target_id']), game)
            t_task = {
                'game':
                game,
                'platform':
                task['platform'],
                'task_name':
                task['task_name'],
                'date_cycle':
                task['date_cycle'],
                'do_rate':
                task['do_rate'],
                'group':
                task['group'],
                'priority':
                task['priority'],
                'prefix_sql':
                task['prefix_sql'],
                'exec_sql':
                task['exec_sql'].replace("%s_dw" % self.source_game,
                                         "%s_dw" % game),
                'post_sql':
                task['post_sql'],
                'from_id':
                form_id,
                'target_id':
                target_id,
                'create_date':
                datetime.datetime.today().strftime("%Y-%m-%d"),
                'comment':
                task['comment']
            }

            self.save_new_task(t_task)

        self.mysql.close()
        print "over"