def just_run_sql(cusid, tech, CAT, SQL, mod_name= __name__): dbconf = ETLDB.get_computed_config(cusid, tech, mod_name)[CAT] conn = DB.get_connection(dbconf[RESTRICT.HOST], dbconf[RESTRICT.PORT], dbconf[RESTRICT.USER], dbconf[RESTRICT.PSWD], dbconf[RESTRICT.DB], dbconf[RESTRICT.CHARSET], mod_name) #logger(mod_name).debug(SQL) DB.run_sql(conn, SQL) conn.close()
def get_check_columns(tblname, cusid, tech, CAT, mod_name): if tblname is None: SQL = _get_SQL_check_columns(cusid, tech) if type(tblname) in set([str, list]): SQL = _get_SQL_check_columns(cusid, tech, tblname) dbconf = DB1.get_computed_config(cusid, tech, mod_name)[CAT] task_name = 'get columns for checking {}'.format(tblname) return DB.get_data_set(dbconf, task_name, SQL, _get_proc_check_columns(), mod_name)
def _get_columns(cusid, tech): dbconf = ETLDB.get_computed_config(cusid, tech, __name__)[_cat] tblname = dbconf[RESTRICT.TABLE] columns = ETLDB.get_columns(cusid, tech, _cat, tblname=tblname, mod_name=__name__) for c in [ '__id', '_type', '_additional1', '_additional2', 'create_date', 'last_update' ]: columns.pop(c) for c in columns: print(c, columns[c]) return tblname, columns
# logger(__name__).debug(columns3) # for c in columns3: # logger(__name__).debug(c == cols_order1[columns3[c]['order']]) """ return columns3, cols_order1 # ## function chkcol(...) if type(date) is datetime.date: logger(__name__).info('checking column {date} {case}'.format(date=date, case=CAT)) dsconf = customer.get_default_DS_config_all(cusid, tech) dppath = ETL.get_computed_config(cusid, tech, __name__)[RESTRICT.DATA_PATH] dbconf = ETLDB.get_computed_config(cusid, tech, __name__)[CAT] s = [(l, z, f) for l, z, f in Common.extract_info( cusid, tech, date, CAT, __name__)] LRCs = set([l for _, (l, _, _) in enumerate(s)]) zfs = set([(z, f, _get_owner(CAT, z, dsconf[RESTRICT.ZIP_FLT][CAT]), _get_table_name(f, dsconf[RESTRICT.CSV_FLT][CAT])) for _, (_, z, f) in enumerate(s)]) LRC_ow_ta_columns = DSColumn.get_all_columns(cusid, tech, date, CAT, __name__) synthcols = _get_synthesized_columns(LRC_ow_ta_columns, zfs, LRCs) workpath = dppath.joinpath( '{cusid}/{tech}/{d:%Y%m%d}/tmp/{cat}/chkcol'.format(cusid=cusid, tech=tech, d=date, cat=CAT)) Folder.create(workpath, __name__)
def chkcol(cusid, tech, date, load= False): if type(date) is datetime.date: logger(__name__).info('checking column {} FM'.format(str(date))) ymd = '{:%Y%m%d}'.format(date) dppath = ETL.get_computed_config(cusid, tech, __name__)[RESTRICT.DATA_PATH] database = DB.get_computed_config(cusid, tech, __name__)[RESTRICT.FM][RESTRICT.DB] advpath = dppath.joinpath('{}/{}/{}/columns/check/{}.sql'.format(cusid, tech, ymd, database)) File.remove(advpath, __name__) owner_tables = Common.get_owner_tables(cusid, tech, date, _cat, __name__) cols = DSColumn.extract_columns(cusid, tech, date, _cat, owner_tables, __name__) for tblname1 in cols: tblname = '{}_{:%Y%m}'.format(tblname1, date) dbcols = DB.get_columns(cusid, tech, _cat, tblname, __name__) new = dict() add = dict() alter = dict() if dbcols == dict(): new[tblname] = cols[tblname1] else: for col in cols[tblname1]: if col not in dbcols: add[col] = cols[tblname1][col] elif not DSColumn.type_equal(cols[tblname1][col], dbcols[col]): #logger(__name__).debug(cols[tblname1][col]) #logger(__name__).debug(dbcols[col]) alter[col] = cols[tblname1][col] if new != dict() or add != dict() or alter != dict(): profile = list() if new != dict(): profile.append('create') if add != dict(): profile.append('add column') if alter != dict(): profile.append('change column') logger(__name__).info('FM table {}: {}'.format(tblname, profile)) advpath.touch() with open(str(advpath), 'a') as fo: fo.write('use {};\n'.format(database)) if new != dict(): sql = DSColumn.to_sql(create= new) if load: Common.just_run_sql(cusid, tech, _cat, sql, __name__) fo.write('{};\n'.format(sql)) if add != dict(): fo.write('{};\n'.format(DSColumn.to_sql(tblname= tblname, add_column= add))) if alter != dict(): fo.write('{};\n'.format(DSColumn.to_sql(tblname= tblname, change_column= alter))) fo.close() if advpath.exists(): logger(__name__).info('advice: "{}"'.format(str(advpath)))
c2=c2, tym=tblname_ym, g=_build_group_by(agg_case, group_by), ym=_ym(tblname_ym)) logger(__name__).debug(sql1) #logger(__name__).debug(sql2) return sql1, sql2 if case in _cases: datapath = _init_datapath(cusid, tech, datapath) casepath = datapath.joinpath('{cusid}/{tech}/{d:%Y%m%d}/{case}'.format( cusid=cusid, tech=tech, d=date, case=case)) logger(__name__).info('aiming path: {path}'.format(path=casepath)) dbconf = ETLDB.get_computed_config(cusid, tech, __name__)[case] database = dbconf[RESTRICT.DB] i = -1 for i, folder in enumerate( [x for x in casepath.glob('*') if x.is_dir()]): #logger(__name__).debug(folder) owner, tblname, tblname_ym = folder.parent.name, _tblname_without_ym( folder.name), folder.name #logger(__name__).debug((cusid, tech, owner, tblname_ym, __name__)) dbcols = set( ETLDB.get_columns(cusid, tech, owner, tblname_ym, __name__).keys()) aggcols_spec = ETLDB.get_agg_rules(cusid, tech, owner, tblname, __name__)
def load(DDL_proc, SQL_gen_proc, date, cusid, tech, CAT, mod_name): """ ## DDL_proc: lambda (owner, tblname) -> SQL ##- tblname: table name in database ## SQL_gen_proc: lambda (sqlformat, lines) -> SQL ## - sqlformat: 'insert into some_table ({columns}) values {values};' ## - lines: a list of { column: value } """ if Util.is_function(DDL_proc) and Util.get_arity(DDL_proc) == 2 and\ Util.is_function(SQL_gen_proc) and Util.get_arity(SQL_gen_proc) == 4: #import dispy cfgbase, dpbase = _get_config(cusid, tech, date, mod_name) #logger(mod_name).debug(cfgbase) #logger(mod_name).debug(dpbase) dbconf_base = ETLDB.get_computed_config(cusid, tech, mod_name) dbconf1 = dbconf_base[CAT] dbconfs = dict([ (PCOFNSRAW, dbconf_base[PCOFNSRAW]), (PCOFNGRAW, dbconf_base[PCOFNGRAW]), (IMSCSFRAW, dbconf_base[IMSCSFRAW]), (IMSHSSRAW, dbconf_base[IMSHSSRAW]), (MADNHRRAW, dbconf_base[MADNHRRAW]), (MADODCRAW, dbconf_base[MADODCRAW]), (IMSDRARAW, dbconf_base[IMSDRARAW]), (XMLNSSRAW, dbconf_base[XMLNSSRAW]), (NOKOBWRAW, dbconf_base[NOKOBWRAW]), (NOKOMWRAW, dbconf_base[NOKOMWRAW]), (NOKIUMRAW, dbconf_base[NOKIUMRAW]) ]) hsformat = str(cfgbase.joinpath('history/{cat}.{tblname}.{filestem}.upload')) fdrpath = dpbase.joinpath(CAT) #logger(mod_name).debug(fdrpath) #cluster = dispy.JobCluster(_load_SQL) for p in fdrpath.glob('*/*.txt'): #logger(mod_name).debug('{} {}'.format(type(p), p)) owner = _get_owner(p.parent.parent.name) tblname = p.parent.name hspath = pathlib.Path(hsformat.format(cat= CAT, tblname= tblname, filestem= p.stem)) if _has_symbol(hspath): continue logger(__name__).debug('symbol create: "{}"'.format(hspath)) #_put_symbol(hspath) if owner in dbconfs: dbconf = dbconfs[owner] else: dbconf = dbconf1 ddl = DDL_proc(owner, tblname) conn = DB.get_connection(dbconf[RESTRICT.HOST], dbconf[RESTRICT.PORT], dbconf[RESTRICT.USER], dbconf[RESTRICT.PSWD], dbconf[RESTRICT.DB], dbconf[RESTRICT.CHARSET], mod_name) #if not DB.table_exists(conn, dbconf[RESTRICT.DB], tblname): # block_size = DB.get_variable(conn, 'max_allowed_packet') # DB.run_sql(conn, ddl) #DB.run_sql(conn, 'set autocommit=0;') ##conn.close() #for sql in _gen_SQL_list(SQL_gen_proc, owner, tblname, p, block_size): # #logger(__name__).debug(sql) # #conn = DB.get_connection(dbconf[RESTRICT.HOST], # # dbconf[RESTRICT.PORT], # # dbconf[RESTRICT.USER], # # dbconf[RESTRICT.PSWD], # # dbconf[RESTRICT.DB], # # dbconf[RESTRICT.CHARSET], mod_name) # #logger(__name__).debug(len(sql)) # DB.run_sql(conn, sql) # #DB.run_sql(conn, 'commit;') # #conn.close() ##conn = DB.get_connection(dbconf[RESTRICT.HOST], ## dbconf[RESTRICT.PORT], ## dbconf[RESTRICT.USER], ## dbconf[RESTRICT.PSWD], ## dbconf[RESTRICT.DB], ## dbconf[RESTRICT.CHARSET], mod_name) #DB.run_sql(conn, 'commit;') #conn.close() block_size = DB.get_variable(conn, 'max_allowed_packet') DB.run_sql(conn, ddl) try: cursor = conn.cursor() cursor.execute('start transaction') for i, sql in enumerate(_gen_SQL_list(SQL_gen_proc, owner, tblname, p, block_size)): sql1 = re.sub(r'(\r\n|\n)', ' ', sql).strip() sql2 = sql1[:512] #logger(__name__).debug(sql1) logger(__name__).debug('run: length {}; "{}{}"'.format(len(sql1), sql2, '...' if len(sql1) > len(sql2) else '')) cursor.execute(sql) cursor.connection.commit() except Exception as ex: traceback.print_exc() finally: cursor.close() conn.close()
return "delete from `{database}`.`{table_to}`; insert into `{database}`.`{table_to}` select * from `{database}`.`{table_from}` where date(`TIME`) = '{d:%Y-%m-%d}'".format( database=dbname, table_to='{}_latest'.format(tblname), table_from='{}_{:%Y%m}'.format(tblname, date), d=date) # ## function chkcol(...) if type(date) is datetime.date: logger(__name__).info('aggregate {date} {case}'.format(date=date, case=CAT)) dsconf = customer.get_default_DS_config_all(cusid, tech) dppath = ETL.get_computed_config(cusid, tech, __name__)[RESTRICT.DATA_PATH] dbconf = ETLDB.get_computed_config(cusid, tech, __name__)[CAT] s = [(l, z, f) for l, z, f in Common.extract_info( cusid, tech, date, CAT, __name__)] #LRCs = set([ l for _, (l, _, _) in enumerate(s) ]) zfs = set([(z, f, _get_owner(CAT, z, dsconf[RESTRICT.ZIP_FLT][CAT]), _get_table_name(f, dsconf[RESTRICT.CSV_FLT][CAT])) for _, (_, z, f) in enumerate(s)]) #LRC_ow_ta_columns = DSColumn.get_all_columns(cusid, tech, date, CAT, __name__) #synthcols = _get_synthesized_columns(LRC_ow_ta_columns, zfs, LRCs) DB_creating = set() except_cols = set([ '_ID', 'LRC', 'TIME', 'PERIOD_START_TIME', 'LAST_MODIFIED', 'LAST_MODIFIER' ]) workpath = dppath.joinpath( '{cusid}/{tech}/{d:%Y%m%d}/tmp/{cat}/agg'.format(cusid=cusid,