示例#1
0
def load_file(date_arg):  #导文件模块,主要考虑:要导的文件存在不存在?需要有删除计算当天的分区步骤,文件没导成功怎么办
    global WORKERS, FILE_HIVE_LIST, HIVE_DB, HIVE_TABLE  # 5.1 设置全局变量
    hive_status = check_hive_file()  # 5.2 hive_status=True、False 检查是否存在文件
    if not hive_status:
        log_str = 'click stream file parse fail: file count not equal WORKERS'
        print MyTime.get_local_time(), '-------------- ' + log_str
        log.error(log_str)
        #MyAlarm.send_mail_sms(log_str)                                                  # 5.3 假如文件不存在,生成告警日志,并发送文件
    else:
        hive = MyHiveBin.HiveBin()  # 调用hive模块 模块在 com/hive/bin
        dt = date_arg.replace('-', '')  # "2016-11-30" 改变为 "20161130"
        hive.drop_partition(
            HIVE_DB, HIVE_TABLE, 'dt', dt
        )  # 5.4 假如文件存在,删掉计算当天的分区       HIVE_DB= 'to8to_rawdata'   HIVE_TABLE='clickstream'

        partition_dict = {'dt': dt}

        for hive_file in FILE_HIVE_LIST:
            log_str = 'load file ' + hive_file + ' into hive begin'
            print MyTime.get_local_time(), '-------------- ' + log_str
            log.info(log_str)
            status = hive.load_file(
                HIVE_DB, HIVE_TABLE, hive_file,
                partition_dict)  # status 注意,导数据成功后将 状态 True、False 赋值给 status
            if status is False:
                log_str = 'Load file ' + hive_file + ' into hive status:fail; Click stream parse exit'
                log.error(log_str)
                #MyAlarm.send_mail_sms(log_str)                                         # 5.5 假如文件存在  但没导成功,将发送告警
                return False
示例#2
0
def load_file(date, today):
    global tar_src
    global tar_des
    global latest_time
    tar_src = tar_src.replace('xxxx-xx-xx', date)
    tar_des = tar_des.replace('xxxx-xx-xx', date)
    print tar_src
    latest_time_stamp = MyTime.datetime_timestamp(
        latest_time.replace('xxxx-xx-xx', today))
    if not os.path.exists(tar_des):
        shell = "mkdir -p " + tar_des
        os.system(shell)
    while 1:
        if os.path.exists(tar_src):
            if file_modify_stat(tar_src):
                if MyTool.tar_file(tar_src, tar_des):
                    for root, dirs, files in os.walk(tar_des):
                        for d in dirs:
                            print os.path.join(root, d)
                        for f in files:
                            file = os.path.join(root, f)
                            shell = '/usr/bin/dos2unix ' + file
                            os.popen(shell)
                            hive = MyHiveBin.HiveBin()
                            hive.load_file_single_overwrite(
                                'to8to_rawdata', file)
                    log.info('Mysql actual kefu yuyue to hive status:ok')
                    break
        else:
            log.info('Mysql actual tar file not exists')
            now_time_stamp = MyTime.datetime_timestamp(MyTime.get_local_time())
            if now_time_stamp > latest_time_stamp:
                log.critical('not find ' + tar_src)
                MyAlarm.send_mail_sms('Get Mysql actual tar file status:fail!')
                return False

        time.sleep(time_rate)
    return True
示例#3
0
 def __init__(self, date):
     date_before = MyTime.date_before(date)  #如果date是昨天的日期,那么 date_before=1
     self.dbName = 'to8to_rawdata'
     self.lastDate = datetime.date.today() - datetime.timedelta(
         days=date_before
     )  # self.lastDate = datetime.date(2016, 12, 19)  str(self.lastDate)='2016-12-19'
     print "load mysql all file begin--------------------", self.lastDate
     self.last2Date = datetime.date.today() - datetime.timedelta(
         days=date_before + 1)
     self.last7Date = datetime.date.today() - datetime.timedelta(
         days=date_before + 10)
     self.last7DateFormat = str(self.last7Date).replace('-', '')
     self.lastDateFormat = str(self.lastDate).replace('-', '')
     self.data_src = src_path + "/tmp/bi/Mysql/"  # /data1/bi/platform/rawdata/1011/
     self.data_des = src_path + "/tar/"
     self.txtPath = self.data_des + str(
         self.lastDate) + os.sep + 'Mysql' + os.sep
     self.doPath = self.txtPath
     self.delPath = self.data_des + str(
         self.last2Date) + os.sep + 'Mysql' + os.sep
     #定义要加密(替换为空)的txt文件的列
     self.cutCol = {
         "to8to_fcom": [9, 10, 11, 12, 13],
         "to8to_jj_smt_zb": [6, 8, 26],
         "to8to_yuyue_apply": [18, 42],
         "to8to_fcom_info": [6, 23, 24],
         "to8to_to8toyw_contact": [7, 8],
         "to8to_to8toyw_back": [4, 5, 6],
         "to8to_mcom": [13, 14, 15, 27],
         "to8to_tuori": [15, 16, 20, 21],
         #"to8to_huodong_yanfang" : [2,3],
         "to8to_huodong_apply": [2],
         "to8to_gongdi": [13]
     }
     #定义要解码url的字段
     self.unquote = {"to8to_project_src": [3]}
     #定义保留分区的表
     self.reserve = ('to8to_fcom', 'to8to_yuyue_apply_fp', 'to8to_zxb',
                     'to8to_yuyue_apply', 'to8to_dw_login_record',
                     'to8to_yuyue_apply_record', 'to8to_nps_record',
                     'to8to_item_condition', 'ts_record',
                     'to8to_jj_jianli_clock', 'to8to_yuyue_yyhh_shkf',
                     'to8to_apply_config_node', 'to8to_apply_config',
                     'to8to_credit_log', 'sem_360_diyu')
示例#4
0
    (status, output) = commands.getstatusoutput(shell_command)

    if status == 0:
        print time.strftime("%H:%M:%S", time.localtime(
        )), "==========================================>", log_str, "ok"
        log.info(log_str + "ok")
        return True
    else:
        print time.strftime("%H:%M:%S", time.localtime(
        )), "==========================================>", log_str, "fail"
        log.info(log_str + "fail")
        return False


if __name__ == '__main__':
    date_day = MyTime.get_date(1)
    if len(sys.argv) == 2:
        date_day = sys.argv[1]

    print time.strftime("%H:%M:%S", time.localtime(
    )), "==========================================>un tar begin"
    if un_tar(date_day):
        print time.strftime("%H:%M:%S", time.localtime(
        )), "==========================================>load to mongodb begin"
        if tar_to_mongodb(date_day):
            print time.strftime(
                "%H:%M:%S", time.localtime()
            ), "==========================================>load to mongodb end"
            if mongodb_to_mysql(date_day):
                time.sleep(10)
                print time.strftime(
示例#5
0
                    log.debug(logString)
                    #MyAlarm.send_mail_sms(logString)

                #删除分区
                try:
                    if table not in self.reserve:
                        dropData = 'use to8to_rawdata;alter table ' + table + ' drop partition (dt=' + self.last7DateFormat + ')'  # self.last7DateFormat = str(self.last7Date).replace('-', '') 删除7天前的分区
                        hive.execute(dropData)

                    #删除掉当天txt文件
                    #os.remove(file)
                except Exception, ex:
                    pass
        try:
            rmCmd = 'rm -rf ' + self.delPath  # 删除当天的分区 self.doPath = '/data1/bi/platform/tar/2016-12-21/Mysql/'
            os.system(rmCmd)
        except Exception, ex:
            pass


if __name__ == '__main__':
    date = MyTime.get_date(1)  #自动取昨天日期
    if len(sys.argv) == 2:
        date = sys.argv[1]
    hive = toHive(date)
    if hive.tarFile():  #解压tar.gz文件
        hive.cutTxt()  #替换隐私信息为null
        hive.txtToHive()  #替换后的txt文件存入hive
    #os.system('python /data1/bi/platform/scripts/BI/BISub/bi_yewu_caiwu_argv.py ' + date)
    #os.system('python /data1/bi/platform/scripts/BI/BISub/bi_zxgs_yunyingjibie.py ' + date)
示例#6
0
                    file_path = os.path.join(root, file)
                    if 'sem_sm_keyword' in file:
                        hive.load_file('to8to_rawdata', 'sem_sm_keyword',
                                       file_path, partition_dict)
                    elif 'sem_sm_diyu' in file:
                        hive.load_file('to8to_rawdata', 'sem_sm_diyu',
                                       file_path, partition_dict)
                    else:
                        pass
            try:
                shutil.rmtree(data_path)
            except Exception, ex:
                print ex

            return True
        else:
            pass

        leave_time += 300
        if leave_time > wait_time:
            return False
        time.sleep(300)


if __name__ == '__main__':
    date = MyTime.get_date(1)
    if len(sys.argv) == 2:
        date = sys.argv[1]
    if not load_all(date):
        MyAlarm.send_mail_sms('load sem shenma keyword to hive status:fail')
示例#7
0
                log.error(log_str)
                #MyAlarm.send_mail_sms(log_str)                                         # 5.5 假如文件存在  但没导成功,将发送告警
                return False


def main(date_arg):
    global FILE_JSON_NAME, FILE_HIVE_PATH  ## 3.1、首先设置全局变量,FILE_JSON_NAME = None  FILE_HIVE_PATH = None
    set_file_path(date_arg)  ## 3.2、设置文件路径 ,生成文件名列表 FILE_HIVE_LIST
    try:
        os.mkdir(
            FILE_HIVE_PATH
        )  ## 3.3 创建目录  FILE_HIVE_PATH :/data1/bi/platform/tar/2016-11-16/ClickStream/
    except Exception, ex:
        print str(ex)
        pass
    print MyTime.get_local_time(
    ), '-------------- tar click stream file begin'  # from cube import MyTime
    log.info(
        'tar click stream file begin'
    )  # 写入日志  log = MyLog.MyLog(path='/data1/bi/platform/scripts/BI/ClickStream/log/', name='ClickStream', type='to8to', level='DEBUG')
    if check_file():  ## 3.4 检查json日志文件是否生成
        print MyTime.get_local_time(
        ), '-------------- tar click stream file success, then process work'
        log.info('tar click stream file success')
        log.info('click stream to8to process work')
        get_file_size(FILE_JSON_NAME)  ## 3.5 获得文件大小,这个有点多余
        click_stream()  ## 3.6 开始清洗
        print MyTime.get_local_time(), '-------------- process success'
        log.info('click stream to8to process work success')


if __name__ == '__main__':
示例#8
0
                        for d in dirs:
                            print os.path.join(root, d)
                        for f in files:
                            file = os.path.join(root, f)
                            shell = '/usr/bin/dos2unix ' + file
                            os.popen(shell)
                            hive = MyHiveBin.HiveBin()
                            hive.load_file_single_overwrite(
                                'to8to_rawdata', file)
                    log.info('Mysql actual kefu yuyue to hive status:ok')
                    break
        else:
            log.info('Mysql actual tar file not exists')
            now_time_stamp = MyTime.datetime_timestamp(MyTime.get_local_time())
            if now_time_stamp > latest_time_stamp:
                log.critical('not find ' + tar_src)
                MyAlarm.send_mail_sms('Get Mysql actual tar file status:fail!')
                return False

        time.sleep(time_rate)
    return True


if __name__ == '__main__':
    date = MyTime.get_date(1)
    if len(sys.argv) == 2:
        date = sys.argv[1]
    today = MyTime.get_date(0)
    if load_file(date, today):
        os.system(next_script)