def _get_output_table_name(cusid, tech, CAT, filename): if CAT == 'CO': return 'COMMON_OBJECT' elif CAT == 'FM': return 'FX_ALARM' elif CAT in set([PCOFNSRAW, PCOFNGRAW, IMSCSFRAW, IMSHSSRAW, MADNHRRAW, MADODCRAW, IMSDRARAW, XMLNSSRAW, NOKOBWRAW, NOKOMWRAW, NOKIUMRAW]): reCAT = DataSource.get_computed_config(cusid, tech, __name__)[RESTRICT.CSV_FLT][RESTRICT.PM] m = reCAT.match(filename) return m.group(1) else: reCAT = DataSource.get_computed_config(cusid, tech, __name__)[RESTRICT.CSV_FLT][CAT] m = reCAT.match(filename) return m.group(1)
def get_owner_tables(cusid, tech, date, CAT, mod_name): if type(date) is datetime.date: _dsconf = DataSource.get_computed_config(cusid, tech, mod_name) re_zip = _dsconf[RESTRICT.ZIP_FLT][CAT] re_csv = _dsconf[RESTRICT.CSV_FLT][CAT] result = set() for _, (z, f) in enumerate([ (zippath, filename) for _, zippath, filename in extract_info(cusid, tech, date, CAT, mod_name) if re_zip.match(zippath.name) ]): if CAT == RESTRICT.CM: owner = 'CMDLTE' elif CAT == RESTRICT.FM: owner = 'FM' elif CAT in set([RESTRICT.PM, RESTRICT.PCOFNSRAW, RESTRICT.PCOFNGRAW, RESTRICT.IMSCSFRAW, RESTRICT.IMSHSSRAW, RESTRICT.MADNHRRAW, RESTRICT.MADODCRAW, RESTRICT.IMSDRARAW, RESTRICT.XMLNSSRAW, RESTRICT.NOKOBWRAW, RESTRICT.NOKOMWRAW, RESTRICT.NOKIUMRAW]): m = re_zip.match(z.name) owner = m.group(1) m = re_csv.match(f) table = m.group(1) result.add((owner, table.upper())) return result
def get_data_source_list(cusid, tech, date): if type(date) is datetime.date: ds = DataSource.get_computed_config(cusid, tech, __name__) path = pathlib.Path('{path}/{category}'.format( path=ds[RESTRICT.PATH], category=ds[RESTRICT.CTGR].format(date=date))) nlst = FTP.nlst(ds[RESTRICT.PROTOCOL], ds[RESTRICT.HOST], ds[RESTRICT.PORT], ds[RESTRICT.USER], ds[RESTRICT.PSWD], path.as_posix(), __name__) return nlst
def extract_info(cusid, tech, date, CAT, mod_name): if type(date) is datetime.date: cfgbase, dpbase = _get_config(cusid, tech, date, mod_name) LRC_z_s = _download_zips(cusid, tech, CAT, dpbase, cfgbase, mod_name= mod_name) reCAT = DataSource.get_computed_config(cusid, tech, mod_name)[RESTRICT.CSV_FLT][CAT] for LRC, zippath in LRC_z_s: zf = zipfile.ZipFile(str(zippath)) infolist = zf.infolist() zf.close() for _, f in enumerate([ zi.filename for _, zi in enumerate(infolist) if reCAT.match(zi.filename) ]): yield (LRC, zippath, f)
def _extract_PM(cusid, tech, date, CAT= _cat): if type(date) is datetime.date: dsconfig = DataSource.get_computed_config(cusid, tech, __name__) pm_zflt, pm_cflt = dsconfig[RESTRICT.ZIP_FLT][CAT], dsconfig[RESTRICT.CSV_FLT][CAT] ei = Common.extract_info(cusid, tech, date, CAT, __name__) for i, (LRC, zippath, filename) in enumerate(ei): owner, tblname = pm_zflt.match(zippath.name).group(1), pm_cflt.match(filename).group(1) thing = _do_pm_mapping(LRC, owner, tblname) if thing is not None: prefix, id_col, agg_terms = thing yield LRC, zippath, filename, prefix, owner, tblname, id_col, agg_terms
def _get_owner_table(cusid, tech, CAT, zippath, filename): conf = DataSource.get_computed_config(cusid, tech, __name__) re_zip = conf[RESTRICT.ZIP_FLT][CAT] re_csv = conf[RESTRICT.CSV_FLT][CAT] m_z = re_zip.match(str(zippath.name)) if m_z is None: return None else: m_c = re_csv.match(filename) if m_c is None: return None else: if CAT == RESTRICT.CM or CAT == RESTRICT.CM: return 'CMDLTE', m_c.group(1) elif CAT == RESTRICT.FM: return 'FM', m_c.group(1) elif CAT == RESTRICT.PM: return m_z.group(1), m_c.group(1)
def extract_info(cusid, tech, date, mod_name): if type(date) is datetime.date: cfgbase, dpbase = Common.get_config(cusid, tech, date, mod_name) LRC_z_s = Common.download_zips(cusid, tech, RESTRICT.DB, dpbase, cfgbase, limit=None, mod_name=mod_name) reCAT = DataSource.get_computed_config( cusid, tech, __name__)[RESTRICT.CSV_FLT][RESTRICT.DB] for i, (LRC, zippath) in enumerate(LRC_z_s): zf = zipfile.ZipFile(str(zippath)) infolist = zf.infolist() zf.close() for _, f in enumerate([ zi.filename for _, zi in enumerate(infolist) if reCAT.match(zi.filename) ]): yield (LRC, zippath, f)
def initialize_working_space(cusid, tech, date=None): base = wic.find_config_path() if date is None: logger(__name__).info('initializing "{}".....'.format(base)) for folder in ['columns', 'columns_bak']: Folder.create(base.joinpath(folder), __name__) create_config_file(cusid, tech, __name__) DataSource.create_config_file(cusid, tech, __name__) DB.create_config_file(cusid, tech, __name__) dpbase1 = _find_data_path(cusid, tech) if dpbase1 is not None: for folder in ['cache']: Folder.create(dpbase1.joinpath(folder), __name__) for p, c in [('COMMON_OBJECT.json', lambda f: File.dump_JSON(f, dict(), __name__))]: f = dpbase1.joinpath(p) if f.exists(): logger(__name__).info('found: "{}"'.format(str(f))) else: c(f) elif type(date) is datetime.date: ymd = '{:%Y%m%d}'.format(date) base = base.joinpath(ymd) fcheck = base.joinpath('files.txt') logger(__name__).info('initailizing "{}"...'.format(fcheck)) if not fcheck.exists(): Folder.create(base, __name__) filelist = get_data_source_list(cusid, tech, date) if filelist == []: sys.exit() with open(str(fcheck), 'w') as fo: for _, ln in enumerate(filelist): fo.write('{}\n'.format(ln)) fo.close() flz = DataSource.get_computed_config(cusid, tech, __name__)[RESTRICT.ZIP_FLT] LRCs = set() for _, p in enumerate(filelist): fn = pathlib.Path(p).name for _, r in enumerate(flz): m = flz[r].match(fn) if type(m) is type(re.match('', '')): try: LRCs.add( (r, m.group(3) if r is 'PM' else m.group(1))) except Exception as e: logger(__name__).debug('{}: {}'.format(m, e)) break for CAT, LRC in LRCs: Folder.create(base.joinpath(CAT).joinpath(LRC), __name__) Folder.create(base.joinpath('history'), __name__) dpbase = _find_data_path(cusid, tech) ymdbase = dpbase.joinpath(ymd) for _, fdrpath in enumerate([ 'tmp', 'columns/check', 'cache/OC', RESTRICT.CO, RESTRICT.CM, RESTRICT.OC, RESTRICT.DC, RESTRICT.FM, RESTRICT.PM ]): Folder.create(ymdbase.joinpath(fdrpath), __name__)