예제 #1
0
파일: common.py 프로젝트: YauHsien/some_ETL
def _get_output_table_name(cusid, tech, CAT, filename):
    if CAT == 'CO':
        return 'COMMON_OBJECT'
    elif CAT == 'FM':
        return 'FX_ALARM'
    elif CAT in set([PCOFNSRAW, PCOFNGRAW, IMSCSFRAW, IMSHSSRAW, MADNHRRAW, MADODCRAW,
                     IMSDRARAW, XMLNSSRAW, NOKOBWRAW, NOKOMWRAW, NOKIUMRAW]):
        reCAT = DataSource.get_computed_config(cusid, tech, __name__)[RESTRICT.CSV_FLT][RESTRICT.PM]
        m = reCAT.match(filename)
        return m.group(1)
    else:
        reCAT = DataSource.get_computed_config(cusid, tech, __name__)[RESTRICT.CSV_FLT][CAT]
        m = reCAT.match(filename)
        return m.group(1)
예제 #2
0
파일: common.py 프로젝트: YauHsien/some_ETL
def get_owner_tables(cusid, tech, date, CAT, mod_name):
    if type(date) is datetime.date:

        _dsconf = DataSource.get_computed_config(cusid, tech, mod_name)
        re_zip = _dsconf[RESTRICT.ZIP_FLT][CAT]
        re_csv = _dsconf[RESTRICT.CSV_FLT][CAT]
        
        result = set()
        for _, (z, f) in enumerate([ (zippath, filename)
                                     for _, zippath, filename in  extract_info(cusid, tech, date, CAT, mod_name)
                                     if re_zip.match(zippath.name) ]):

            if CAT == RESTRICT.CM:
                owner = 'CMDLTE'
            elif CAT == RESTRICT.FM:
                owner = 'FM'
            elif CAT in set([RESTRICT.PM, RESTRICT.PCOFNSRAW, RESTRICT.PCOFNGRAW, RESTRICT.IMSCSFRAW,
                             RESTRICT.IMSHSSRAW, RESTRICT.MADNHRRAW, RESTRICT.MADODCRAW, RESTRICT.IMSDRARAW,
                             RESTRICT.XMLNSSRAW, RESTRICT.NOKOBWRAW, RESTRICT.NOKOMWRAW,
                             RESTRICT.NOKIUMRAW]):
                m = re_zip.match(z.name)
                owner = m.group(1)

            m = re_csv.match(f)
            table = m.group(1)

            result.add((owner, table.upper()))

        return result
예제 #3
0
def get_data_source_list(cusid, tech, date):
    if type(date) is datetime.date:
        ds = DataSource.get_computed_config(cusid, tech, __name__)
        path = pathlib.Path('{path}/{category}'.format(
            path=ds[RESTRICT.PATH],
            category=ds[RESTRICT.CTGR].format(date=date)))
        nlst = FTP.nlst(ds[RESTRICT.PROTOCOL], ds[RESTRICT.HOST],
                        ds[RESTRICT.PORT], ds[RESTRICT.USER],
                        ds[RESTRICT.PSWD], path.as_posix(), __name__)
        return nlst
예제 #4
0
파일: common.py 프로젝트: YauHsien/some_ETL
def extract_info(cusid, tech, date, CAT, mod_name):
    if type(date) is datetime.date:
        cfgbase, dpbase = _get_config(cusid, tech, date, mod_name)
        LRC_z_s = _download_zips(cusid, tech, CAT, dpbase, cfgbase, mod_name= mod_name)
        reCAT = DataSource.get_computed_config(cusid, tech, mod_name)[RESTRICT.CSV_FLT][CAT]

        for LRC, zippath in LRC_z_s:
            zf = zipfile.ZipFile(str(zippath))
            infolist = zf.infolist()
            zf.close()
            for _, f in enumerate([ zi.filename
                                    for _, zi in enumerate(infolist)
                                    if reCAT.match(zi.filename) ]):
                yield (LRC, zippath, f)
예제 #5
0
def _extract_PM(cusid, tech, date, CAT= _cat):
    if type(date) is datetime.date:

        dsconfig = DataSource.get_computed_config(cusid, tech, __name__)
        pm_zflt, pm_cflt = dsconfig[RESTRICT.ZIP_FLT][CAT], dsconfig[RESTRICT.CSV_FLT][CAT]
        
        ei = Common.extract_info(cusid, tech, date, CAT, __name__)
        for i, (LRC, zippath, filename) in enumerate(ei):

            owner, tblname = pm_zflt.match(zippath.name).group(1), pm_cflt.match(filename).group(1)
            thing = _do_pm_mapping(LRC, owner, tblname)
            if thing is not None:
                prefix, id_col, agg_terms = thing
                yield LRC, zippath, filename, prefix, owner, tblname, id_col, agg_terms
예제 #6
0
파일: common.py 프로젝트: YauHsien/some_ETL
def _get_owner_table(cusid, tech, CAT, zippath, filename):
    conf = DataSource.get_computed_config(cusid, tech, __name__)
    re_zip = conf[RESTRICT.ZIP_FLT][CAT]
    re_csv = conf[RESTRICT.CSV_FLT][CAT]
    m_z = re_zip.match(str(zippath.name))
    if m_z is None:
        return None
    else:
        m_c = re_csv.match(filename)
        if m_c is None:
            return None
        else:
            if CAT == RESTRICT.CM or CAT == RESTRICT.CM:
                return 'CMDLTE', m_c.group(1)
            elif CAT == RESTRICT.FM:
                return 'FM', m_c.group(1)
            elif CAT == RESTRICT.PM:
                return m_z.group(1), m_c.group(1)
예제 #7
0
def extract_info(cusid, tech, date, mod_name):
    if type(date) is datetime.date:
        cfgbase, dpbase = Common.get_config(cusid, tech, date, mod_name)
        LRC_z_s = Common.download_zips(cusid,
                                       tech,
                                       RESTRICT.DB,
                                       dpbase,
                                       cfgbase,
                                       limit=None,
                                       mod_name=mod_name)
        reCAT = DataSource.get_computed_config(
            cusid, tech, __name__)[RESTRICT.CSV_FLT][RESTRICT.DB]
        for i, (LRC, zippath) in enumerate(LRC_z_s):
            zf = zipfile.ZipFile(str(zippath))
            infolist = zf.infolist()
            zf.close()
            for _, f in enumerate([
                    zi.filename for _, zi in enumerate(infolist)
                    if reCAT.match(zi.filename)
            ]):
                yield (LRC, zippath, f)
예제 #8
0
def initialize_working_space(cusid, tech, date=None):

    base = wic.find_config_path()

    if date is None:
        logger(__name__).info('initializing "{}".....'.format(base))
        for folder in ['columns', 'columns_bak']:
            Folder.create(base.joinpath(folder), __name__)

        create_config_file(cusid, tech, __name__)
        DataSource.create_config_file(cusid, tech, __name__)
        DB.create_config_file(cusid, tech, __name__)

        dpbase1 = _find_data_path(cusid, tech)
        if dpbase1 is not None:
            for folder in ['cache']:
                Folder.create(dpbase1.joinpath(folder), __name__)
                for p, c in [('COMMON_OBJECT.json',
                              lambda f: File.dump_JSON(f, dict(), __name__))]:
                    f = dpbase1.joinpath(p)
                    if f.exists():
                        logger(__name__).info('found: "{}"'.format(str(f)))
                    else:
                        c(f)

    elif type(date) is datetime.date:
        ymd = '{:%Y%m%d}'.format(date)
        base = base.joinpath(ymd)
        fcheck = base.joinpath('files.txt')
        logger(__name__).info('initailizing "{}"...'.format(fcheck))

        if not fcheck.exists():
            Folder.create(base, __name__)
            filelist = get_data_source_list(cusid, tech, date)
            if filelist == []:
                sys.exit()
            with open(str(fcheck), 'w') as fo:
                for _, ln in enumerate(filelist):
                    fo.write('{}\n'.format(ln))
                fo.close()

            flz = DataSource.get_computed_config(cusid, tech,
                                                 __name__)[RESTRICT.ZIP_FLT]
            LRCs = set()
            for _, p in enumerate(filelist):
                fn = pathlib.Path(p).name
                for _, r in enumerate(flz):
                    m = flz[r].match(fn)
                    if type(m) is type(re.match('', '')):
                        try:
                            LRCs.add(
                                (r, m.group(3) if r is 'PM' else m.group(1)))
                        except Exception as e:
                            logger(__name__).debug('{}: {}'.format(m, e))
                        break
            for CAT, LRC in LRCs:
                Folder.create(base.joinpath(CAT).joinpath(LRC), __name__)

            Folder.create(base.joinpath('history'), __name__)

        dpbase = _find_data_path(cusid, tech)
        ymdbase = dpbase.joinpath(ymd)
        for _, fdrpath in enumerate([
                'tmp', 'columns/check', 'cache/OC', RESTRICT.CO, RESTRICT.CM,
                RESTRICT.OC, RESTRICT.DC, RESTRICT.FM, RESTRICT.PM
        ]):
            Folder.create(ymdbase.joinpath(fdrpath), __name__)