Пример #1
0
 def crt_tb_from_src_sys(self, src_tb_nm, src_db_cd, trgt_schm_nm='ods'):
     """
     数据同步时,源系统和目标表结构创建或者校验
     :param src_tb_nm:
     :param src_db_cd:
     :param trgt_schm_nm:
     :return:
     """
     trgt_tb_nm = self.tb_nm
     trgt_db_cd = self.conn.db_cd
     src_conn = Conn(src_db_cd)
     src_meta = src_conn.get_tb_strct(src_tb_nm)  # 获取表结构
     src_conn.close()
     if src_meta:
         if trgt_tb_nm is None:  # 如果没有设定目标表名,需要自动生成目标表名
             trgt_tb_nm = self.get_auto_el_tb_nm(schema=trgt_schm_nm)
         crt_tb_sql = crt_trgt_db_sql(src_meta, trgt_tb_nm, trgt_db_cd)
         trgt_conn = Conn(trgt_db_cd)
         rs = trgt_conn.upd_tb_strct(crt_tb_sql,
                                     schm_tb_nm=trgt_tb_nm,
                                     drop_direct=False)
         trgt_conn.close()
         return rs
     else:
         raise Exception("源数据库目标表表%s不存在" % src_tb_nm)
Пример #2
0
def multi_proc_el(df, batch_id, processes=5, trgt_schm_nm='ods'):
    """
    对df数据多进程处理,这里是导入数据或建表格
    :param batch_id:
    :param df: dataframe 需要字段batch_id, el_type, src_tb_nm, src_db_cd, trgt_tb_nm,
                         trgt_db_cd=default_db_cd, read_where='', pre_sql='truncate', parallel_num=2
    :param processes:  多进程数
    :param trgt_schm_nm:  目标schema 主要用于
    :return:
    """
    rs = df.to_dict(orient='record')
    logger.info("批量导入 启动进程数: %s,开始批处理" % (processes, ))
    pool = multiprocessing.Pool(processes=processes)
    # i 需要传递字段 batch_id, el_type, src_tb_nm, src_db_cd, trgt_tb_nm, trgt_db_cd,
    # read_where='', pre_sql='truncate', parallel_num
    for i in rs:
        i['batch_id'] = batch_id
        if i['trgt_tb_nm'] is None:
            i['trgt_tb_nm'] = get_targt_tb_nm(i['src_tb_nm'],
                                              i['src_db_cd'],
                                              schema=trgt_schm_nm)
        # logger.info(str(i))
        pool.apply_async(_el_run, kwds=i)
    pool.close()
    pool.join()
    logger.info("完成批处理")
    conn = Conn()
    check_el_task(batch_id, conn, batch_nm='T1', check_error_only=True)
    conn.close()
Пример #3
0
def deal():
    """
    处理入口
    """
    conn = Conn(default_db_cd)
    crt_el_log_tb(conn)
    try:
        # 校验表结构
        if conn.upd_tb_strct(crt_tb_sql=ddl_sql, schm_tb_nm=tb_nm, drop_direct=True):
            etl_meta_el(conn)
    finally:
        conn.close()
Пример #4
0
def deal():
    """
    处理入口
    :return:
    """
    conn = Conn('ibm')
    try:
        with conn:
            # 表结构校验
            if conn.upd_tb_strct(crt_tb_sql=ddl_sql, schm_tb_nm=tb_nm, drop_direct=proc_all_flag):
                # 产品分类
                dim_prod_cat(conn)
    finally:
        conn.close()
Пример #5
0
def src_tb_sync_ods(src_tb_nm,
                    src_db_cd,
                    trgt_tb_nm=None,
                    trgt_db_cd=default_db_cd,
                    trgt_schm_nm='ods',
                    if_el_data=True):
    """
    源系统和目标表结构创建或者校验
    :param if_el_data:  是否导入数据
    :param src_tb_nm:
    :param src_db_cd:
    :param trgt_tb_nm:
    :param trgt_db_cd:
    :param trgt_schm_nm:
    :return:
    """
    src_conn = Conn(src_db_cd)
    src_meta = src_conn.get_tb_strct(src_tb_nm)  # 获取表结构
    src_conn.close()
    if src_meta:
        if trgt_tb_nm is None:  # 如果没有设定目标表名,需要自动生成目标表名
            trgt_tb_nm = get_targt_tb_nm(src_meta['tb_nm'],
                                         src_meta['db_cd'],
                                         schema=trgt_schm_nm)
        crt_tb_sql = crt_trgt_db_sql(src_meta, trgt_tb_nm, trgt_db_cd)
        trgt_conn = Conn(trgt_db_cd)
        rs = trgt_conn.upd_tb_strct(crt_tb_sql,
                                    schm_tb_nm=trgt_tb_nm,
                                    drop_direct=False)
        trgt_conn.close()
        if if_el_data:
            datax(src_tb_nm,
                  src_db_cd,
                  trgt_tb_nm,
                  write_conn=trgt_db_cd,
                  check_tb_strct=False,
                  logs_print=False)
        return rs
    else:
        raise Exception("源数据库目标表表%s不存在" % src_tb_nm)
Пример #6
0
def run_el_with_batch(batch_id,
                      el_type,
                      read_tb,
                      read_conn,
                      write_tb,
                      write_conn='DPS',
                      read_where='',
                      pre_sql='truncate',
                      parallel_num=2,
                      check_tb_strct=True,
                      logs_print=True):
    """
    同步表数据
    :param batch_id: 批次批次编号
    :param el_type:  datax或者pypd
    :param read_tb: 读取表名
    :param read_conn: 读取的数据库标识 例如DPS CRM PFUND
    :param write_tb: 写入的表名 例如 dw.dim_prod
    :param write_conn: 写入库的库名 例如DPS CRM PFUND
    :param read_where: sql where条件
    :param pre_sql:  导入前sql操作,truncate 表示清空表,可以有其他sql
    :param parallel_num: 并发的channel 数
    :param check_tb_strct: 是否检验表结构
    :param logs_print 是否打印日志到终端展示 不管是否设置 日志都会存储到datax/log路径下
    :return:
    """
    dw_conn = Conn(write_conn)
    if write_tb is None:
        write_tb = get_targt_tb_nm(read_tb, read_conn)
    stat = get_el_tb_job_stat(batch_id, write_tb, dw_conn)  # 获取作业状态
    if is_runing(write_tb, dw_conn):  # 如果作业在处理则跳过
        logger.info("el_type %s 处理表: %s 正在处理中不再处理" % (el_type, write_tb))
        el_upd_stat(dw_conn,
                    batch_id,
                    write_tb,
                    batch_stat=1,
                    error_msg="正在处理中不再处理")
    if stat != 1:
        # stat==1 表示执行成功了 不再执行
        el_start_stat(dw_conn, batch_id, write_tb)
        try:
            logger.info("开始同步 batch_id %s el_type %s 处理表: %s" %
                        (batch_id, el_type, write_tb))
            # logger.debug("%s 导入数据前执行:%s" % (write_tb, pre_sql))
            # logger.debug("%s 导入数据条件:%s" % (write_tb, read_where))
            rs = run_el(el_type,
                        read_tb,
                        read_conn,
                        write_tb,
                        write_conn,
                        read_where,
                        pre_sql,
                        parallel_num,
                        check_tb_strct,
                        logs_print,
                        batch_dt=batch_id)
            if rs:
                el_upd_stat(dw_conn, batch_id, write_tb, batch_stat=1)
                logger.info("同步成功 batch_id %s el_type %s 处理表: %s" %
                            (batch_id, el_type, write_tb))
            else:
                raise Exception("不知名错误")
        except Exception as e:
            err_msg = str(e)
            logger.error("同步错误 batch_id %s el_type %s 处理表: %s ERROR: %s" %
                         (batch_id, el_type, write_tb, err_msg))
            el_upd_stat(dw_conn,
                        batch_id,
                        write_tb,
                        batch_stat=2,
                        error_msg=err_msg)
            send_error_msg(err_msg, write_tb, if_to_wx=False)
            raise Exception(err_msg)
        finally:
            dw_conn.close()
    else:
        # 该批次下数据已经同步过,不再同步
        el_upd_stat(dw_conn,
                    batch_id,
                    write_tb,
                    batch_stat=1,
                    error_msg="多次执行,执行跳过")
        logger.warning("该批次下数据已经同步过,不再同步。 batch_id %s el_type %s 处理表: %s" %
                       (batch_id, el_type, write_tb))
        dw_conn.close()