Exemplo n.º 1
0
    def exec_ddl(self, ddl):
        psql(ddl)
        try:
            return self.cur.execute(ddl)
        except Exception as ex:

            raise
Exemplo n.º 2
0
    def insert_trans_data(self, trans, target, source, stmt, skip_header=0):
        pipe = source.pipe
        skip = str(skip_header).strip()
        if skip_header is not None:
            skip = str(skip_header).strip()
        assert str(skip).strip() in [
            '0', '1'
        ], 'skip_header [%s] should be "0" or "1"' % str(skip).strip()
        if str(skip) == '1':
            pipe.readline()
        assert pipe

        start_time = time.time()
        xref = self.cli.tcfg["columnMap"]

        cols = [x for x in xref]
        cli.to_cols = ',\n'.join(cols)

        cli.to_quotes = ','.join([x for x in '?' * len(cols)])
        assert cli.to_cols
        sql = self.get_query(target, stmt)

        cur = self.conn.cursor()
        line = pipe.readline()
        rows = []
        while line:
            line.update(
                dict(AccountingDate=self.cli.pa[1],
                     AsOfDateTime=self.cli.asod))
            rows.append([
                str(line[xref[x][0]])
                if xref[x][1] in ['varchar'] else line[xref[x][0]]
                for x in xref
            ])
            line = pipe.readline()
        chunk = 300
        total = 0
        cid = 0
        psql(sql, 'Insert')
        while total < len(rows):
            cur.fast_executemany = True
            data = rows[total:][:chunk]
            cur.executemany(sql, data)
            ins = len(data)
            total += ins
            cid += 1
            log.info('[{}] [{}] {}: Running: {:,.0f}, Rows: {:,.0f}'.format(
                self.objtype, cid, self.cln, total, ins))

        log.info(
            '[{}]: {}: Inserted: {:,.0f}, To-Schema:{}, To-Table:{}, Skipped: {}, Elapsed: {}'
            .format(self.objtype, self.cln, len(rows), target['targetSchema'],
                    target["targetTable"], skip,
                    round((time.time() - start_time), 2)))
        pipe.close()
Exemplo n.º 3
0
    def get_col_types(self, schema, tbl):
        stmt = """
SELECT  column_name, data_type
FROM   v_catalog.columns 
WHERE  table_schema='%s' 
       AND table_name='%s' 
ORDER  BY ordinal_position""" % (schema, tbl)
        self.cur.execute(stmt)
        psql(stmt)
        out = []

        return self.cur.fetchall()
Exemplo n.º 4
0
    def get_query_cur(self,
                      chunk_size,
                      source,
                      qname,
                      out,
                      skip_header,
                      terminate_line=False):
        assert chunk_size
        cli = self.cli
        chunk_size = self.cli.lame_duck if self.cli.lame_duck and chunk_size > self.cli.lame_duck else chunk_size
        assert chunk_size
        tf = "%Y-%m-%d.%H_%M_%S"
        current_ts = time.strftime(tf)
        id = 0
        stmt = self.get_query(source, qname)
        log.debug(stmt)
        if not hasattr(self, 'cur') or not self.cur:
            self.cur = self.conn.cursor()
        cur = self.cur
        psql(' \n'.join(stmt.replace(',', ', ').split()), 'Extractor cmd')
        #e()

        cur.execute(stmt)
        return cur
Exemplo n.º 5
0
    def fetch_many(self,
                   chunk_size,
                   source,
                   qname,
                   out,
                   skip_header,
                   terminate_line=False):
        assert chunk_size
        cli = self.cli
        chunk_size = self.cli.lame_duck if self.cli.lame_duck and chunk_size > self.cli.lame_duck else chunk_size
        assert chunk_size
        tf = "%Y-%m-%d.%H_%M_%S"
        current_ts = time.strftime(tf)
        id = 0
        stmt = self.get_query(source, qname)
        log.debug(stmt)
        if not hasattr(self, 'cur') or not self.cur:
            self.cur = self.conn.cursor()
        cur = self.cur
        psql(' \n'.join(stmt.replace(',', ', ').split()), 'Extractor cmd')
        #e()

        cur.execute(stmt)
        cols = [c[0] for c in cur.description]
        total_read = 0
        if 1:
            apx_cmap, apx_cols, apx = cli.get_appendix2()

        header = None
        first = True
        is_apx = []
        start_time = time.time()
        while True:
            print('Elapsed [%d] PRE fetch: %s' %
                  (id, time.time() - start_time))
            start_time = time.time()
            out.data = []
            if self.cli.lame_duck and self.cli.lame_duck <= total_read: break
            #decrease chunk size
            if self.cli.lame_duck and self.cli.lame_duck - total_read < chunk_size:
                chunk_size = self.cli.lame_duck - total_read

            fetch_time = time.time()
            rows = cur.fetchmany(chunk_size)
            print('Elapsed [%d] FMANY: %s' % (id, time.time() - fetch_time))
            print(len(rows))
            #e()
            data = []
            append_time = time.time()
            if rows:
                for row in rows:
                    d = []
                    for x in row:
                        if x == None:
                            d.append(b'')
                            continue
                        if isinstance(x, datetime.date) or isinstance(
                                x, datetime.datetime):
                            d.append(str(x).encode('utf-8'))
                            continue
                        if isinstance(x, int) or isinstance(x, float):
                            d.append(repr(x))
                            continue
                        if sys.version_info[0] < 3:
                            d.append(x)
                        else:
                            d.append(x.encode())

                    if apx:
                        #pp(d)
                        #print len(d), len(d+apx.split(cli.csep)), apx
                        #e()
                        cols = cols + apx_cols
                        is_apx = ['N'] * len(d) + ['Y'] * len(apx_cols)
                        d = d + apx.split(cli.csep.decode())
                        data.append(
                            d + [''] if terminate_line else
                            [])  #data.append('^'.join(str(v) for v in d+apx))
                    else:

                        if 1:
                            is_apx = ['N'] * len(d)
                            data.append(d)
                            #header = [col[:2] for ]
                        else:
                            assert 3 == 2
                            data.append('^'.join(str(v)
                                                 for v in d) + os.linesep)
                    if first:
                        pfmtd([
                            dict(Col=col, Row=d[i], Appendix=is_apx[i])
                            for i, col in enumerate(cols)
                        ], 'First row')
                        first = False
                        #e()

            else:
                break
            out.data = data
            print('Elapsed [%d] APPEND: %s' % (id, time.time() - append_time))

            out.chunk_id, out.current_ts, out.actor = id, current_ts, self.cln
            if not data:
                break
            print('Elapsed [%d] POST fetch: %s' %
                  (id, time.time() - start_time))
            yield out
            id += 1
            total_read += len(data)
Exemplo n.º 6
0
Arquivo: iq.py Projeto: pie-crust/etl
    #e()
    if 1:
        info('Start')
        cursor = conn.cursor()
        dbdata = []
        if 1:
            stmt = "SET TEMPORARY OPTION DATE_ORDER=MDY exec CIGActgH.spSOI_Cnsldtd_WRAPPER '223906','EOD','2019/1/30','DESK','*','DEFAULT','REGULAR','1','0','NONE','*','*','N','0','0','NONE','NONE','ALL','0','MONTH_END','N','ALL','*','0','*','N','*'"
            tbl = 'DY_FinancingPosition'
            stmt = "SET TEMPORARY OPTION DATE_ORDER=MDY exec CIGActgH.spTDPFinancialPosRptCnsldtd_WRAPPER '223907','2019/9/13','2019/9/13','EOD','EOD','*','*','*','NONE','*','NOW','0'"
            tbl = 'ME_FinancingPosition'
            stmt = "SET TEMPORARY OPTION DATE_ORDER=MDY exec CIGActgH.spTDPFinancialPosRptCnsldtd_WRAPPER '223906','2019/8/01','2019/8/31','EOD','EOD','*','*','*','NONE','*','NOW','0'"
            stmt = "select * from  Position.ME_FinancingPosition where AccountingDate='2019-08-31' and client=223906"
            tbl = 'DY_FinancingPosition'
            stmt = "select * from  Position.%s where AccountingDate='2019-09-13' and client=223906" % tbl
            psql(os.linesep.join(stmt.split()), tbl)
            cursor.execute(stmt)

            t = 0
            i = 1
            eol = '\n'
            colsep = '|'

            start_time = time.time()
            fn = '%s.csv' % (tbl)
            info(fn)
            with open(fn, 'w') as fh:
                fh.write(colsep.join([c[0] for c in cursor.description]) + eol)
                row = cursor.fetchone()
                if row:
                    info('First row: %s' % row)
Exemplo n.º 7
0
 def exec_query(self, stml, show=False):
     cur = self.conn.cursor()
     if show: psql(stml)
     return cur.execute(stml)
Exemplo n.º 8
0
    def insert_RC_data(self,
                       trans,
                       target,
                       source,
                       stmt,
                       insert_stats,
                       skip_header=0):
        pipe = source.pipe
        skip = str(skip_header).strip()
        if skip_header is not None:
            skip = str(skip_header).strip()
        assert str(skip).strip() in [
            '0', '1'
        ], 'skip_header [%s] should be "0" or "1"' % str(skip).strip()
        if str(skip) == '1':
            pipe.readline()
        assert pipe

        start_time = time.time()
        xref = self.cli.tcfg["columnMap"]

        cols = [v[0] for _, v in xref.items()]
        cli.to_cols = ',\n'.join(cols)

        cli.to_quotes = ','.join([x for x in '?' * len(cols)])
        assert cli.to_cols
        sql = self.get_query(target, stmt)
        #cnxn = pyodbc.connect(conn_str, autocommit=True)
        trans.conn.set_attr(pyodbc.SQL_ATTR_TXN_ISOLATION,
                            pyodbc.SQL_TXN_SERIALIZABLE)
        trans.conn.autocommit = False
        cur = trans.conn.cursor()
        fline = line = pipe.readline()
        self.rows = rows = []
        #pp(xref)
        apx = {x[0]: x[2] for x in xref.values() if len(x) == 3}
        apx = {x: cli.get_parsed(ckey=x, cfg=apx) for x, v in apx.items()}

        #ppe(fline)
        ext_c = list(set(xref.keys()) - set(fline.keys()))
        if ext_c:
            log.warn('Config has extra columns missing in REST')
            #pfmtd([dict(Id=k, DB_ColName=v) for k, v  in enumerate(list(sorted(ext_c)))], 'Defaulting these to nulls')

        ext_l = list(set(fline.keys()) - set(xref.keys()))
        if ext_l:
            log.warn('REST has extra columns missing in DB')
            #pfmtd([dict(Id=k, RESR_Col=v) for k, v  in enumerate(ext_l)], 'Extra cols in REST')
            #pp(ext_l)


        ignore=[u'signOffVersion', u'signOffTime', u'RBDate', u'asofDate'] +[u'DataSource', u'GPOSMATTol']+[u'CCY',
        u'DEPolicy',
        u'Price',
        u'UnrealizedPnL',
        u'Fund',
        u'RawUnrealizedPnL',
        u'SwapType'] + [u'SettlementDate']+[u'BuySell',
        u'IndependentAmount',
        u'ConfirmStatus',
        u'RefEntityName',
        u'ReferenceOb',
        u'CounterpartyRefID',
        u'CDSType',
        u'TerminationDateUnadjusted',
        u'TerminationDateAdjusted',
        u'StandardRefObligation',
        u'FixedRate'] + [u'MaturityDate', u'StrikePrice', u'IsSpot'] + [u'Symbol', u'VolatilityStrike']+[u'Direction',
        u'MaturityDateUnadjusted',
        u'TradeCurrency',
        u'ProductType',
        u'UnderlyingSecurity']+[u'MaturityTenor', u'PaymentDate', u'CAP_FLOOR', u'MaturityDateAdjusted'] + \
       [u'IsElectronicallyConfirmed', u'Classification'] + [u'FloatingRateIndex']+[u'IsTodayResetDate']+ \
       [u'FloatRateIndexRec',
        u'IndexTenorRec',
        u'IsOldED',
        u'DayCountFractionPay',
        u'DayCountFractionRec',
        u'PaymentFrequencyPay',
        u'PaymentFrequencyRec',
        u'RollDate']+[u'CCP', u'CCPConfirmRefId'] +[u'IndexTenorPay', u'SpreadPay', u'FloatRateIndexPay']+ \
       [u'TerminationDate', u'FloatingIndex', u'StartFlow', u'CptyRefID'] +[u'Country']+ \
       [u'Barrier1Strike',
        u'Barrier1CCYPair',
        u'bdi',
        u'Barrier2Strike',
        u'Barrier2CCYPair'] + [u'PutCall', u'UnderlyingSymbol', u'OptionStyle']+[u'TerminationDateUnderlyingUnadjusted', u'CallPut', u'PayReceive']+\
       [u'TerminationDateUnderlyingAdjusted']+[u'ProceedsNotional'] +[u'ContractType', u'ExecutingAccount']+[u'SSGClientNote']+[u'Issuer']

        while line:
            line.update(apx)
            ext_s = set(line.keys()) - set(xref.keys())
            #pp(ext_s)
            if ext_s - set(ignore):
                pfmtd([
                    dict(Id=k, REST_Col=v)
                    for k, v in enumerate(list(ext_s - set(ignore)))
                ], 'Extra cols in REST/IGNORE')
                pp(list(ext_s - set(ignore)))
                ignore = ignore + list(ext_s - set(ignore))

            #rows.append([str(line[x]) if xref[x][1] in ['varchar'] else float(line[x]) if xref[x][1] in ['varchar'] else line[x] for x in xref if x not in ext])
            line = pipe.readline()
        print(123)
        e()
        chunk = 3
        total = 0
        cid = 0
        psql(sql, 'Insert')
        if not rows:
            raise Exception('No data in REST pipe.')
        else:

            ignore_cols = target["ignoreSourceColumns"]

            if not len(fline) == len(rows[0]) + len(ignore_cols):
                pp(fline)
                pp(rows[0])

                raise Exception(
                    'line %s <> row %s not in xref:%s, not in source:%s' %
                    (len(fline), len(rows[0]), set(fline.keys()) -
                     set(xref.keys()), set(xref.keys()) - set(fline.keys())))

            pfmtd([
                dict(Col=col, Row=rows[0][i])
                for i, col in enumerate([col for col in xref])
            ], 'First row')

        while total < len(rows):
            cur.fast_executemany = True
            data = rows[total:][:chunk]
            #ppe(data)
            cur.executemany(sql, data)
            cur.execute("ROLLBACK")
            trans.conn.rollback()
            ins = len(data)
            total += ins
            cid += 1
            log.info('[{}] [{}] {}: Running: {:,.0f}, Rows: {:,.0f}'.format(
                self.objtype, cid, self.cln, total, ins))

        log.info(
            '[{}]: {}: Inserted: {:,.0f}, To-Schema:{}, To-Table:{}, Skipped: {}, Elapsed: {}'
            .format(self.objtype, self.cln, len(rows), target['targetSchema'],
                    target["targetTable"], skip,
                    round((time.time() - start_time), 2)))
        pipe.close()
        insert_stats.inserted_cnt = total
Exemplo n.º 9
0
    def bulk_load_file(self, trans, file_names, qname, cfg, out, header=None):
        #cli=self.cli
        scfg, tcfg = cfg
        skip = scfg['writeHeader']
        assert skip in [0, 1]
        tbl = cli.get_parsed(ckey='targetTable', cfg=tcfg)
        sch = tcfg['targetSchema']
        assert tbl

        fnames = file_names.file_names
        start_time = time.time()
        total_ins = 0
        cur = self.conn.cursor()
        cur.execute('SELECT * FROM %s.%s WHERE 1=2' % (sch, tbl))
        from decimal import Decimal
        #for col in cur.description:
        #	print col[0], col[1],type(col[1]), col[1] == decimal.Decimal, decimal.Decimal
        #e()
        if header:
            clist = ',\n '.join([c for c in header])
        else:
            cols = [c[:2] for c in cur.description]
            clist = ',\n '.join([c[0] for c in cur.description])

        for path in file_names.file_names:
            #pp(fnamed)
            #_, fnd = fnamed
            #pp(fnd)
            #path = fnd['path']
            assert os.path.isfile(path)
            limit = 1000
            rowid = 0
            vals = []
            start_time = time.time()

            linesep = scfg['recordDelimiter']
            colsep = scfg['columnDelimiter']
            data = []
            #print 7777777777777, path
            if 0:
                stmt = """
LOAD TABLE %s.%s (%s)
FROM '%s'
quotes off
escapes off
format ascii

delimited by '%s'
skip %d
row delimited by '%s'
			""" % (sch, tbl, clist, path, colsep, skip, linesep)
            stmt = """
LOAD TABLE %s.%s (%s)
FROM '%s'
quotes off
escapes off
format ascii

delimited by '%s'
skip %d
			""" % (sch, tbl, clist, path, colsep, skip)

            psql(stmt)
            #e()
            try:
                cnt = cur.execute(stmt)

                total_ins += cur.rowcount
                out[path] = cur.rowcount
            except pyodbc.ProgrammingError as ex:
                log.debug(stmt)
                log.error(ex)
                self.conn.rollback()
                raise

            log.debug('Read: %d, Inserted: %d ' % (rowid, len(vals)))

            self.conn.commit()
            #out[path]=total_ins
            log.info('%s: Read:%d, Inserted: %d,  Elapsed: %s' %
                     (self.cln, rowid, total_ins,
                      round((time.time() - start_time), 2)))
Exemplo n.º 10
0
    def load_md5(self,
                 trans,
                 file_obj,
                 table_name,
                 qname,
                 fmt_cols,
                 cfg,
                 skip=0,
                 apx=None,
                 stats=None):

        scfg, tcfg = cfg
        file_name = file_obj.file_name

        assert os.path.isfile(file_name)
        if 1:
            colsep = scfg['columnDelimiter']
            assert colsep

            lcnt = file_obj.line_count(file_name)

            if 1:
                cols = ','.join([col[0] for col in file_obj.cols])

                trans.conn.autocommit = False

                copyfmt = ',\n'.join([
                    "%s FORMAT 'hex'" %
                    col[0] if col[0] in fmt_cols else "%s" % col[0]
                    for col in file_obj.cols
                ])
                assert os.path.isfile(file_obj.file_name)
                #print (table_name, apx)
                #e()
                apxq = ',\n'.join(
                    [''] + ["%s AS %s" % (k, v)
                            for k, v in apx.items()]) if apx else ''
                stmt = """
COPY %s (%s %s) 
FROM LOCAL '%s' 
DELIMITER '|' ESCAPE AS '^' NULL '' 
SKIP %d ABORT ON ERROR NO COMMIT """ % (table_name, copyfmt, apxq,
                                        file_obj.file_name, skip)
                try:
                    psql(stmt, 'Load')
                    trans.cur.execute(stmt)

                except:
                    trans.conn.rollback()
                    pfmt([[stmt]])

                    raise

                accepted, rejected = trans.cur.execute(
                    'SELECT GET_NUM_ACCEPTED_ROWS(),GET_NUM_REJECTED_ROWS()'
                ).fetchall()[0]
                pfmt([[lcnt - skip, accepted, rejected]],
                     ['Line count', 'Accepted', 'Rejected'], 'Load stats')
                assert lcnt - skip == accepted

                out = OrderedDict()
                out['table_name'] = table_name
                out['accepted'] = accepted
                out['rejected'] = rejected
                out['linecount'] = lcnt
                out['skip'] = skip
                out['diff'] = lcnt - skip - accepted
                stats.append(out)
Exemplo n.º 11
0
    def load_file(self,
                  trans,
                  file_obj,
                  schema,
                  table_name,
                  qname,
                  fmt_cols,
                  cfg,
                  skip=0,
                  apx=None,
                  stats=None):

        scfg, tcfg = cfg
        file_name = file_obj.file_name

        assert os.path.isfile(file_name)
        if 1:
            colsep = scfg['columnDelimiter']
            assert colsep

            lcnt = file_obj.line_count(file_name)

            if 1:
                pp(file_obj.cols)

                #cols = ','.join([col.decode() for col in file_obj.cols])
                #pp(cols)

                trans.conn.autocommit = False

                copyfmt = ',\n'.join([
                    "%s FORMAT 'hex'" %
                    col[0] if col[0] in fmt_cols else "%s" % col
                    for col in file_obj.cols_alt
                ])

                assert os.path.isfile(file_obj.file_name)

                stmt = """
COPY %s.%s (%s ) 
FROM LOCAL '%s' 
DELIMITER '|' ESCAPE AS '^' NULL '' 
SKIP %d ABORT ON ERROR NO COMMIT """ % (schema, table_name, copyfmt,
                                        file_obj.file_name, skip)
                try:
                    self.desc_table(schema, table_name)
                    psql(stmt, 'Load')
                    trans.cur.execute(stmt)

                except:
                    trans.conn.rollback()
                    psql(stmt)

                    raise

                accepted, rejected = trans.cur.execute(
                    'SELECT GET_NUM_ACCEPTED_ROWS(),GET_NUM_REJECTED_ROWS()'
                ).fetchall()[0]
                pfmtd([
                    dict(Line_count=lcnt - skip,
                         Accepted=accepted,
                         Rejected=rejected)
                ], 'Load stats')
                assert lcnt - skip == accepted

                out = OrderedDict()
                out['table_name'] = table_name
                out['accepted'] = accepted
                out['rejected'] = rejected
                out['linecount'] = lcnt
                out['skip'] = skip
                out['diff'] = lcnt - skip - accepted
                stats[table_name] = out
Exemplo n.º 12
0
def run():
    skip = 2

    do_not_load = []
    for _source, val in cli.cfg['dump'].items():
        cli.set_source(_source)
        _src_class = list(val.keys())[0]

        DirReader = create_reader(aname=_src_class, app_init=app_init)

        cli.set_source(_source)
        dir_scfg = cli.get_dcfg(_src_class)
        path = cli.get_parsed(ckey='dumpDir', cfg=dir_scfg)
        ok_files = InOut(file_names=[])
        DirReader.glob_dir(path=path, out=ok_files, ext='*.ok')

        loaded = {}

        for _trg_class, val in cli.cfg['target'][_source].items():

            cli.tcfg = tcfg = cli.get_tcfg(_trg_class)

            _dbname = tcfg["targetDb"]
            toDB = create_writer(aname=_trg_class, app_init=app_init)

            do_not_delete = tcfg['doNotDeleteTables']
            do_not_load = tcfg['doNotLoadTables']
            to_conn = InOut()
            toDB.begin_transaction(env=tcfg['targetDb'], out=to_conn)
            toSchema = tcfg['targetSchema']
            stmt = 'set search_path to %s' % toSchema
            psql(stmt)
            to_conn.cur.execute(stmt)

            pkstats = {}
            for okfn in ok_files.file_names:
                okFile = create_reader(aname='File',
                                       app_init=app_init,
                                       file_name=okfn,
                                       scfg=dir_scfg)
                okdir, okname = os.path.splitext(okfn)
                okbn = os.path.basename(okdir)

                out_files = InOut(file_names=[])

                DirReader.glob_dir(path=okdir, out=out_files, ext='*.out')

                #e()
                if 1:  # Check if some there are files missing in config
                    ftlist = []

                    for out_fn in out_files.file_names:
                        print(out_fn)
                        ftlist.append(os.path.basename(out_fn).split('.')[1])

                    pfmt([[x] for x in ftlist], ['Files->Tables'])

                    ctables = cli.tcfg['targetTables'].keys()
                    extra_file_tables = list(set(ftlist) - set(ctables))
                    pfmt([[x] for x in extra_file_tables],
                         ['Tables not in config.'])
                    extra_config_tables = list(set(ctables) - set(ftlist))
                    pfmt([[x] for x in extra_config_tables],
                         ['Tables in config but not in file names.'])
                    assert not extra_file_tables, 'Tables %s are not listed in config["targetTables"].' % extra_file_tables

                for outfn in out_files.file_names:  # Master first

                    outFile = create_reader(aname='File',
                                            app_init=app_init,
                                            file_name=outfn,
                                            scfg=dir_scfg)

                    outbn = os.path.basename(outfn)
                    tbl = outbn.split('.')[1]
                    outTbl = 'tmp_PK_%s' % tbl
                    outCols = outFile.get_header_cols()
                    apxCols = [('MartModifiedDate', 'timestamp'),
                               ('AsOfFrom', 'timestamp'),
                               ('AsOfTo', 'timestamp'), ('MD5', 'char(22)')]
                    outTblCols = toDB.get_create_col_list(outCols, apx=apxCols)

                    toCols = toDB.get_col_types(toSchema, tbl)
                    pp(toCols)

                    toDB.desc_tmp_table(outTbl, outCols + apxCols)

                    do_not_delete.append(outTbl)

                    try:

                        stmt = 'drop table %s' % outTbl
                        to_conn.cur.execute(stmt)
                    except Exception as ex:
                        #raise
                        if not 'Table "%s" does not exist' % outTbl in str(ex):
                            raise
                    psql(outfn)
                    stmt = 'CREATE LOCAL TEMPORARY TABLE %s ( %s )\nON COMMIT PRESERVE ROWS' % (
                        outTbl, ', \n'.join(
                            ['%s %s' % tuple(col) for col in toCols]))
                    pfmt([[stmt]], ['Create master temp PK' + outTbl])
                    toDB.exec_ddl(stmt)
                    if 1:  #//Load data into PK table

                        fmt_cols = {}
                        mmDt = okFile.get_value(coords=(0, 0), skip=skip)

                        md5val = (base64.b64encode(
                            hashlib.md5(b'test').digest()))

                        apx = OrderedDict()
                        apx['MartModifiedDate'] = mmDt
                        apx['AsOfFrom'] = mmDt
                        apx['AsOfTo'] = "12/31/9999"
                        apx['MD5'] = ''  #//defined on row level

                        pk_outfn = '%s.pk' % outfn
                        colsep = dir_scfg['columnDelimiter']

                        with open(pk_outfn, 'wb') as pkfh:
                            with open(outfn, 'rb') as outfh:
                                line = outfh.readline().strip()
                                pkfh.write(line +
                                           colsep.join(apx.keys()).encode() +
                                           os.linesep.encode())
                                line = outfh.readline().strip()
                                apxTypes = colsep.join(
                                    [col[1] for col in apxCols])
                                pkfh.write(line + apxTypes.encode() +
                                           os.linesep.encode())
                                line = outfh.readline().strip()
                                while line:
                                    md5 = (base64.b64encode(
                                        hashlib.md5(line.replace(
                                            b'|', b'')).digest()))
                                    apx['MD5'] = md5.decode('ascii',
                                                            'ignore').strip(
                                                                '=')  #// REDO

                                    pkfh.write(
                                        line +
                                        colsep.join(apx.values()).encode() +
                                        os.linesep.encode())
                                    line = outfh.readline().strip()
                        outPkFile = create_reader(aname='File',
                                                  app_init=app_init,
                                                  file_name=pk_outfn,
                                                  scfg=dir_scfg)
                        outPkFile.set_alt_cols()

                        schema = tcfg['targetSchema']
                        toDB.load_grds_file(trans=to_conn,
                                            file_obj=outPkFile,
                                            schema=schema,
                                            table_name=outTbl,
                                            qname='insertStmt',
                                            fmt_cols=fmt_cols,
                                            cfg=(dir_scfg, tcfg),
                                            skip=skip,
                                            stats=pkstats)
                        loaded[outbn] = outTbl
                        #outPkFile.delete()

                #pfmtd([pkstats])
                #e()
            stats = {}
            deleted = {}
            processed = []
            not_processed = []
            for okfn in ok_files.file_names:
                okFile = create_reader(aname='File',
                                       app_init=app_init,
                                       file_name=okfn,
                                       scfg=dir_scfg)
                okdir, _ = os.path.splitext(okfn)
                okbn = os.path.basename(okdir)
                #e()
                assert os.path.isdir(okdir)
                snap_df = cli.get_dest_folder(okdir)
                if os.path.isdir(snap_df):
                    log.warning('[%s]Destination folder exists: [%s]' %
                                (okdir, snap_df))
                    not_processed.append(okfn)
                    continue

                out_files = InOut(file_names=[])
                DirReader.glob_dir(path=okdir, out=out_files, ext='*.out')
                apx = dict(
                    MartModifiedDate=okFile.get_value(coords=(0,
                                                              0), skip=skip))

                #e()
                if 0:
                    g = raw_input("Continue?")

                not_loaded = {}

                for table_name in ftlist:
                    tmpTbl = 'tmp_PK_%s' % table_name
                    toCols = toDB.get_tab_cols(tmpTbl)
                    #pp(toCols)
                    toDB.desc_table(None, tmpTbl)
                    toDB.desc_table(toSchema, table_name)
                    #e()
                    if table_name in ['TxnLookupMap']:

                        tmpCols = ',\n  '.join(
                            ['tmid.%s' % col[0].decode() for col in toCols])
                        ins = """ 
insert into {0} ( {1} ) 
select distinct {2} 
from {3} tmid LEFT JOIN {0} ta ON ta.{4} = tmid.{4}
AND ta.{5} = tmid.{5}
AND ta.{6} = tmid.{6}
AND ta.ValidFrom = tmid.ValidFrom and ta.AsOfTo = tmid.AsOfTo
where ta.MD5 <> tmid.MD5
OR ta.{4} is NULL
""".format(table_name, ',\n  '.join([col[0].decode() for col in toCols]),
                        tmpCols, tmpTbl, toCols[0][0].decode(), toCols[1][0].decode(),
                        toCols[2][0].decode())
                        psql(ins)
                        inserted = toDB.exec_dml(ins,
                                                 trans=to_conn,
                                                 commit=False)
                        pfmtd([dict(Inserted=inserted)])
                    elif table_name in [
                            'G3Lookup', 'GCLookup', 'GISLookup', 'GPSLookup',
                            'GPXLookup', 'GPosLookup', 'GTxLookup',
                            'FundToBusinessUnitMap', 'TxEditReason'
                    ]:

                        tmpCols = ',\n  '.join(
                            ['tmid.%s' % col[0].decode() for col in toCols])
                        ins = """ 
insert into {0} ( {1} )
select distinct {2}
from {3} tmid LEFT JOIN {0} ta ON ta.{4} = tmid.{4}
AND ta.{5} = tmid.{5}
AND ta.AsOfTo = tmid.AsOfTo
where ta.MD5 <> tmid.MD5
OR ta.{4} is NULL 
""".format(table_name, ',\n  '.join([col[0].decode() for col in toCols]),
                        tmpCols, tmpTbl, toCols[0][0].decode(), toCols[1][0].decode())
                        psql(ins)
                        inserted = toDB.exec_dml(ins,
                                                 trans=to_conn,
                                                 commit=False)
                        pfmtd([dict(Inserted=inserted)])
                    else:
                        tmpCols = ',\n  '.join(
                            ['tmid.%s' % col[0].decode() for col in toCols])
                        ins = """ 
insert into {0} ( {1} )
select distinct {2}
from {3} tmid LEFT JOIN {0} ta ON ta.{4} = tmid.{4}
AND ta.AsOfTo = tmid.AsOfTo
where ta.MD5 <> tmid.MD5
OR ta.{4} is NULL ;
""".format(table_name, ',\n  '.join([col[0].decode() for col in toCols]),
                        tmpCols, tmpTbl, toCols[0][0].decode())
                        psql(ins)
                        inserted = toDB.exec_dml(ins,
                                                 trans=to_conn,
                                                 commit=False)
                        pfmtd([dict(Inserted=inserted)])

                if 1:
                    toDB.commit_transaction(trans=to_conn)

                    pfmt([[k] + list(v.values())[1:]
                          for k, v in pkstats.items()], [
                              'Table', 'Accepted', 'Rejected', 'Line count',
                              'Skip', 'Diff'
                          ], 'Load completed'.upper())
                    pfmt([(k, v) for k, v in loaded.items()],
                         ['Loaded Files', 'Loaded Tables'])
                    pfmt([(k, v) for k, v in not_loaded.items()],
                         ['Not loaded Files', 'Not loaded Tables'])
                    assert os.path.isdir(okdir)
                    if 0:
                        cli.MoveSnapFolder(okdir)
                    processed.append(dict(ProcessedFile=okfn))
                #break;

            if not ok_files.file_names:
                pfmtd([
                    dict(NoFiles='No OK files at working dir: [ %s ]' %
                         cli.pa[0])
                ])

            pfmtd(processed)
            pfmtd(not_processed)

    if 0:
        email_args.update(dict(cli_stats=None))
        Email.send_email(**email_args)
        cli.done()