Пример #1
0
    def desc_tmp_table(self, tbl, cols):

        d = {col[0]: col for col in cols}

        pfmtd([
            dict(Column=d[k][0], Data_type=d[k][1]) for k in sorted(d.keys())
        ], tbl)
Пример #2
0
    def desc_cur(self, cur, colord=True):
        rows = {}
        for col in cur.description:
            rows[col[0]] = [col[0], str(col[1]).split("'")[1], col[3]]
        out = []
        for k in sorted(rows.keys()) if colord else rows.keys():
            row = rows[k]
            d = collections.OrderedDict()
            for i in zip(['Column', 'Type', 'Length'], row):
                x, y = i
                d[x] = y
            out.append(d)

        pfmtd(out, 'Procedure')
Пример #3
0
def load_day(a, mon, day=None):
    cli = get_cli(a)
    pars = params[a.table][0].format(**dict(CENTER=a.center,
                                            CLIENT=a.client,
                                            YEAR=a.year,
                                            MONTH=mon,
                                            DAY=day,
                                            EOM=a.day_to,
                                            BUNIT=a.bunit))

    pycli = cli.format(
        **dict(TABLE=a.table,
               PARAM_CNT=len([p for p in pars.strip().split(' ') if p]),
               DUMP='--dump' if a.dump else '--no-dump',
               LAME_DUCK='-ld %d ' % a.lame_duck))
    cmd = '%s %s' % (pycli, pars)

    if not a.dry:
        pfmtd([dict(Command=os.linesep.join(cmd.split()))], a.table)
        pipe = subprocess.Popen([cmd],
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE,
                                shell=True)

        line = pipe.stdout.readline()

        while line:
            print('OUTPUT:', line.strip())
            line = pipe.stdout.readline()

        line = pipe.stderr.readline()

        while line:
            print('ERROR:', line)
            line = pipe.stderr.readline()

        while pipe.poll() is None:
            print('Waiting...')
            time.sleep(1)

        if pipe.returncode != 0:
            print('returncode = %d' % pipe.returncode)
            e(pipe.returncode)
    else:
        print(cmd)
Пример #4
0
    def desc_table(self, schema, tbl, col_ord=None):
        stmt = """
SELECT  cname, coltype, nulls, colno, length, in_primary_key as in_pk FROM sys.syscolumns
WHERE  creator='%s' 
       AND tname='%s' 
ORDER  BY %s""" % (schema, tbl,
                   'cname' if not col_ord else '%s desc' % col_ord)
        #psql(stmt)
        self.cur.execute(stmt)
        out = []
        rows = {row[1 if col_ord else 0]: row for row in self.cur.fetchall()}

        for k in sorted(rows.keys()):
            row = rows[k]
            d = collections.OrderedDict()
            for i in zip([col[0] for col in self.cur.description], row):
                x, y = i
                d[x] = y
            out.append(d)

        pfmtd(out, '%s.%s' % (schema, tbl))
Пример #5
0
    def desc_table(self, schema, tbl, col_ord=None):
        stmt = """
SELECT  ordinal_position as id, column_name, data_type,
data_type_length as dt_len, is_nullable as nullable, column_default as default
FROM   v_catalog.columns 
WHERE  table_schema='%s' 
       AND table_name='%s' 
ORDER  BY %s""" % (schema, tbl,
                   'ordinal_position' if not col_ord else '%s desc' % col_ord)
        self.cur.execute(stmt)
        #psql(stmt)
        out = []
        rows = {row[0 if col_ord else 1]: row for row in self.cur.fetchall()}

        for k in sorted(rows.keys()):
            row = rows[k]
            d = OrderedDict()
            for i in zip([col[0] for col in self.cur.description], row):
                x, y = i
                d[x] = y
            out.append(d)

        pfmtd(out, '%s.%s' % (schema, tbl))
Пример #6
0
    def fetch_many(self,
                   chunk_size,
                   source,
                   qname,
                   out,
                   skip_header,
                   terminate_line=False):
        assert chunk_size
        cli = self.cli
        chunk_size = self.cli.lame_duck if self.cli.lame_duck and chunk_size > self.cli.lame_duck else chunk_size
        assert chunk_size
        tf = "%Y-%m-%d.%H_%M_%S"
        current_ts = time.strftime(tf)
        id = 0
        stmt = self.get_query(source, qname)
        log.debug(stmt)
        if not hasattr(self, 'cur') or not self.cur:
            self.cur = self.conn.cursor()
        cur = self.cur
        psql(' \n'.join(stmt.replace(',', ', ').split()), 'Extractor cmd')
        #e()

        cur.execute(stmt)
        cols = [c[0] for c in cur.description]
        total_read = 0
        if 1:
            apx_cmap, apx_cols, apx = cli.get_appendix2()

        header = None
        first = True
        is_apx = []
        start_time = time.time()
        while True:
            print('Elapsed [%d] PRE fetch: %s' %
                  (id, time.time() - start_time))
            start_time = time.time()
            out.data = []
            if self.cli.lame_duck and self.cli.lame_duck <= total_read: break
            #decrease chunk size
            if self.cli.lame_duck and self.cli.lame_duck - total_read < chunk_size:
                chunk_size = self.cli.lame_duck - total_read

            fetch_time = time.time()
            rows = cur.fetchmany(chunk_size)
            print('Elapsed [%d] FMANY: %s' % (id, time.time() - fetch_time))
            print(len(rows))
            #e()
            data = []
            append_time = time.time()
            if rows:
                for row in rows:
                    d = []
                    for x in row:
                        if x == None:
                            d.append(b'')
                            continue
                        if isinstance(x, datetime.date) or isinstance(
                                x, datetime.datetime):
                            d.append(str(x).encode('utf-8'))
                            continue
                        if isinstance(x, int) or isinstance(x, float):
                            d.append(repr(x))
                            continue
                        if sys.version_info[0] < 3:
                            d.append(x)
                        else:
                            d.append(x.encode())

                    if apx:
                        #pp(d)
                        #print len(d), len(d+apx.split(cli.csep)), apx
                        #e()
                        cols = cols + apx_cols
                        is_apx = ['N'] * len(d) + ['Y'] * len(apx_cols)
                        d = d + apx.split(cli.csep.decode())
                        data.append(
                            d + [''] if terminate_line else
                            [])  #data.append('^'.join(str(v) for v in d+apx))
                    else:

                        if 1:
                            is_apx = ['N'] * len(d)
                            data.append(d)
                            #header = [col[:2] for ]
                        else:
                            assert 3 == 2
                            data.append('^'.join(str(v)
                                                 for v in d) + os.linesep)
                    if first:
                        pfmtd([
                            dict(Col=col, Row=d[i], Appendix=is_apx[i])
                            for i, col in enumerate(cols)
                        ], 'First row')
                        first = False
                        #e()

            else:
                break
            out.data = data
            print('Elapsed [%d] APPEND: %s' % (id, time.time() - append_time))

            out.chunk_id, out.current_ts, out.actor = id, current_ts, self.cln
            if not data:
                break
            print('Elapsed [%d] POST fetch: %s' %
                  (id, time.time() - start_time))
            yield out
            id += 1
            total_read += len(data)
Пример #7
0
    def fetch_row(self,
                  cur,
                  source,
                  qname,
                  out,
                  skip_header,
                  terminate_line=False):
        cols = [c[0] for c in cur.description]
        total_read = 0
        if 1:
            apx_cmap, apx_cols, apx = cli.get_appendix2()

        header = None
        first = True
        is_apx = []
        start_time = time.time()
        if 1:

            print('Elapsed PRE fetch: %s' % (time.time() - start_time))
            start_time = time.time()
            out.data = []

            fetch_time = time.time()

            row = cur.fetchone()
            rid = 0
            while row:

                #if rows:
                #for row in rows:
                d = []
                for x in row:
                    if x == None:
                        d.append(b'')
                        continue
                    if isinstance(x, datetime.date) or isinstance(
                            x, datetime.datetime):
                        d.append(str(x).encode('utf-8'))
                        continue
                    if isinstance(x, int) or isinstance(x, float):
                        d.append(repr(x))
                        continue
                    if sys.version_info[0] < 3:
                        d.append(x)
                    else:
                        d.append(x.encode())

                if apx:
                    #pp(d)
                    #print len(d), len(d+apx.split(cli.csep)), apx
                    #e()
                    cols = cols + apx_cols
                    is_apx = ['N'] * len(d) + ['Y'] * len(apx_cols)
                    d = d + apx.split(cli.csep.decode())
                    yield d + [''] if terminate_line else []
                    #data.append(d+[''] if terminate_line else []) #data.append('^'.join(str(v) for v in d+apx))
                else:

                    if 1:
                        is_apx = ['N'] * len(d)
                        yield d
                        #data.append(d)
                        #header = [col[:2] for ]
                    else:
                        assert 3 == 2
                        yield '^'.join(str(v) for v in d) + os.linesep
                        #data.append('^'.join(str(v) for v in d)+os.linesep)
                if first:
                    pfmtd([
                        dict(Col=col, Row=d[i], Appendix=is_apx[i])
                        for i, col in enumerate(cols)
                    ], 'First row')
                    first = False
                    #e()
                row = cur.fetchone()
                rid += 1

            print('Elapsed POST fetch: %s' % (time.time() - start_time))
Пример #8
0
 def describe(self):
     pfmtd([dict(Column=x) for x in self.get_header()],
           'File header: %s' % self.file_name)
Пример #9
0
def run():
	stats={}
	total_ins = 0
	term_line = True
	#//validate cols
	for _source, val in cli.cfg['source'].items():
		cli.set_source(_source)
		_src_class = list(val.keys())[0]
		cli.scfg= scfg=cli.get_scfg(_src_class)
		for _trg_class, val in cli.cfg['target'][_source].items() or []:
			cli.tcfg= tcfg =  cli.get_tcfg(_trg_class)

			if tcfg.get('accountingDate', None): #//set acct_year, acct_mon for new target table naming
				fmt=cli.get_parsed(ckey='accountingDateFmt', cfg=tcfg) 
				cli.set_target_table(tcfg=tcfg, acct_date=cli.get_parsed(ckey='accountingDate', cfg=tcfg), fmt=fmt)
				
			_dbname = tcfg["targetDb"]
			toDB 	= create_writer (aname =_trg_class,	app_init=app_init )
			
			toDB.begin_transaction  ( env =tcfg['targetDb'] , out 	= to_conn )
			table='%s.%s' % (tcfg['targetSchema'], tcfg['targetTable'])
			toDB.desc_table(schema=tcfg['targetSchema'], tbl=tcfg['targetTable'], col_ord=False)
			#// validate cols
			cfg_cols=[x[u'columnName'] for x in cli.scfg[u'columnMappings']]
			tcols=toDB.get_cols()
			t_vs_c  = set(tcols) -set(cfg_cols)
			c_vs_t  = set(cfg_cols) -set(tcols)
			

			if t_vs_c: 
				pfmtd([dict(c_vs_t = c_vs_t)], 'Config has columns missing in target table.')
				
				raise Exception('Target table has columns missing in config: %s' % t_vs_c)
			
			if c_vs_t: 
				pfmtd([dict(t_vs_c = t_vs_c)], 'Target table has columns missing in config.')
				raise Exception('Config has columns missing in target table: %s' % c_vs_t)
			toDB.commit_transaction ( trans	= to_conn)
	#//transfer
	for _source, val in cli.cfg['source'].items():
		cli.set_source(_source)
		_src_class = list(val.keys())[0]
		cli.scfg= scfg=cli.get_scfg(_src_class)

		_dbname=cli.scfg["sourceDb"]
		#// in include/extractor
		
		fromDB 			= create_reader(aname = _src_class,	app_init=app_init )


		fromDB.begin_transaction  ( env =cli.scfg['sourceDb'] , out = from_conn )
		if 1: #//Extract to File
			
			for _dmp_class, val in cli.cfg['dump'][_source].items() or []:
				FileWriter 	= create_writer(aname =_dmp_class,	app_init=app_init ) 
				fromDB.set_loader(FileWriter)
				cli.dcfg= cli.get_dcfg(_dmp_class)
				for _trg_class, val in cli.cfg['target'][_source].items() or []:

					cli.tcfg= tcfg =  cli.get_tcfg(_trg_class)
					file_ins_cnt= 0
					FileWriter.open_file( out = dump_file )
					start_time = time.time()
					# //if fetch_many is not in IQ - it's in include/extractor/common/Extractor.py
					for iq_data in fromDB.fetch_many ( chunk_size=file_size_rows,  source = cli.scfg, qname = 'sourceStmt', out=InOut(), skip_header=0, terminate_line= term_line):
						if 1:
							if not file_ins_cnt:
								FileWriter.create_header(file = dump_file, header = fromDB.get_header(), cfg=cli.dcfg, terminate_line= term_line)
							FileWriter.append_data ( file = dump_file,  data = iq_data, cfg=cli.dcfg)
							file_ins_cnt+=len(iq_data.data)
							FileWriter.terminate(file = dump_file)
						print (len(iq_data.data))
						print ('Elapsed read/write: %s' % (time.time() - start_time))
						start_time = time.time()
						
						
					if not file_ins_cnt: #in case there's no data
						FileWriter.create_header(file = dump_file, header = fromDB.get_header(), cfg=cli.dcfg, terminate_line= term_line)
					#else:
					#	FileWriter.terminate(file = dump_file)
					
					FileWriter.close_file(file = dump_file)

					total_ins +=file_ins_cnt
		fromDB.desc_cur(cur = from_conn.cur, colord=False)
		
		fromDB.commit_transaction ( trans	= from_conn)
	log.info('Total records saved: %d' % total_ins)
	#// Load to IQ
	
	for _source, val in cli.cfg['dump'].items():
		cli.set_source(_source)
		_src_class = list(val.keys())[0]


		DirReader 	= create_reader(aname = _src_class,	app_init=app_init )

			
		if 1: #//Get the file names
			cli.set_source(_source)
			dir_scfg = cli.get_dcfg(_src_class)
			path = cli.get_parsed(ckey='dumpDir', cfg=dir_scfg)

			DirReader.glob_dir(path=path,  out = data_files, ext='*.*')
			
		if 1: #//Load to DB
			
			for _trg_class, val in cli.cfg['target'][_source].items() or []:

				cli.tcfg= tcfg =  cli.get_tcfg(_trg_class)

				_dbname = tcfg["targetDb"]
				toDB 	= create_writer (aname =_trg_class,	app_init=app_init )
				
				toDB.begin_transaction  ( env =tcfg['targetDb'] , out 	= to_conn )
				
				table='%s.%s' % (tcfg['targetSchema'], tcfg['targetTable'])
				toDB.desc_table(schema=tcfg['targetSchema'], tbl=tcfg['targetTable'], col_ord=False)

				
				#// validate cols
				cfg_cols=[x[u'columnName'] for x in cli.scfg[u'columnMappings']]
				
				acols= cli.get_alt_cols(scfg)
				tcols=toDB.get_cols()
				fcols_alt=[]
				for data_file in data_files.file_names:
					dataFile 	= create_reader(aname = 'File',	app_init=app_init, file_name=data_file, scfg=dir_scfg)
					dataFile.describe()
					file_stats[data_file] =  dataFile.line_count() - cli.header_size(dir_scfg)
					fcols_alt=[acols.get(x.decode(),x.decode()) for x in  dataFile.get_header(data_file, dir_scfg)]
					f_vs_c  = set(fcols_alt) -set(cfg_cols)
					c_vs_f  = set(cfg_cols) -set(fcols_alt)
					f_vs_t = set(fcols_alt) -set(tcols)
					t_vs_f = set(tcols) -set(fcols_alt)
					if f_vs_c: 
						pfmtd([dict(c_vs_f = c_vs_f)], 'Config has columns missing in dump file.')
						pfmtd([dict(f_vs_t = f_vs_t)], 'Dump file has columns missing in target table.')
						pfmtd([dict(t_vs_f = t_vs_f)], 'Target table has columns missing in dump file.')
						raise Exception('Target table has columns missing in config: %s' % f_vs_c)
					
					if c_vs_f: 
						pfmtd([dict(f_vs_c = f_vs_c)], 'Dump file has columns missing in config.')
						pfmtd([dict(f_vs_t = f_vs_t)], 'Dump file has columns missing in target table.')
						pfmtd([dict(t_vs_f = t_vs_f)], 'Target table has columns missing in dump file.')
						raise Exception('Config has columns missing in target table: %s' % c_vs_f)

					if f_vs_t: 
						pfmtd([dict(f_vs_c = f_vs_c)], 'Dump file has columns missing in config.')
						pfmtd([dict(c_vs_f = c_vs_f)], 'Config has columns missing in dump file.')
						pfmtd([dict(t_vs_f = t_vs_f)], 'Target table has columns missing in dump file.')
						raise Exception('Dump file has columns missing in target table: %s' % f_vs_t)
					if t_vs_f: 
						pfmtd([dict(f_vs_c = f_vs_c)], 'Dump file has columns missing in config.')
						pfmtd([dict(c_vs_f = c_vs_f)], 'Config has columns missing in dump file.')
						pfmtd([dict(f_vs_t = f_vs_t)], 'Dump file has columns missing in target table.')
						raise Exception('Target table has columns missing in dump file: %s' % t_vs_f)


				#toDB.truncate_table		( table = table )
				toDB.bulk_load			( trans	= to_conn, file_names = data_files,  qname = 'insertStmt', cfg = (dir_scfg, tcfg), out=insert_stats, header=fcols_alt)
				toDB.commit_transaction ( trans	= to_conn)
				
				for k in file_stats.keys():
					assert file_stats[k], 'Dump file is empty'
					assert insert_stats[k] not in [-1], 'Insert failed'
					assert insert_stats[k] == file_stats[k], 'Insert vs file count diff: %s<>%s for file \n%s' % (insert_stats[k] , file_stats[k], k)
				
				if 1:
					stmt = cli.get_parsed(ckey='afterCountStmt', cfg=tcfg)
					
					cur = toDB.exec_query(stmt)
					after_cnt= cur.fetchall()[0][0]
					print(after_cnt)

				stats['%s->%s' % (_source, _trg_class)] =st=  OrderedDict()
				st['source_cnt']		= total_ins 

				st['total_inserted'] 	= sum(insert_stats.values())
				st['after_count'] 		= after_cnt
				st['rollback']			= cli.get_parsed(ckey='rollbackStmt', cfg=tcfg)
				st['purge']				= cli.get_parsed(ckey='purgeStmt', cfg=tcfg)

				if 1: #//validate
		
					try:
						assert st['source_cnt'] == st['total_inserted'],  "source_cnt %s <> total_inserted %s" 	% ( st['source_cnt'], st['total_inserted'])
						assert st['source_cnt'] == st['after_count'] , 	"source_cnt %s <> after_count %s" 		% ( st['source_cnt'], st['after_count'])
					except Exception as ex:
						del_cnt = toDB.exec_dml( dml=st['rollback'], trans=to_conn, commit=True) 
						log.info('Rolled back recs: %d' % del_cnt)
						raise 
				if 1: #//purge
					purge_cnt = toDB.exec_dml( dml=st['purge'], trans=to_conn, commit=True) 
					log.info('Purged old recs: %d' % purge_cnt)
				toDB.commit_transaction( trans	= to_conn )
	
	if 0:
		Email.send_email( **email_args )
Пример #10
0
    def insert_RC_data(self,
                       trans,
                       target,
                       source,
                       stmt,
                       insert_stats,
                       skip_header=0):
        pipe = source.pipe
        skip = str(skip_header).strip()
        if skip_header is not None:
            skip = str(skip_header).strip()
        assert str(skip).strip() in [
            '0', '1'
        ], 'skip_header [%s] should be "0" or "1"' % str(skip).strip()
        if str(skip) == '1':
            pipe.readline()
        assert pipe

        start_time = time.time()
        xref = self.cli.tcfg["columnMap"]

        cols = [v[0] for _, v in xref.items()]
        cli.to_cols = ',\n'.join(cols)

        cli.to_quotes = ','.join([x for x in '?' * len(cols)])
        assert cli.to_cols
        sql = self.get_query(target, stmt)
        #cnxn = pyodbc.connect(conn_str, autocommit=True)
        trans.conn.set_attr(pyodbc.SQL_ATTR_TXN_ISOLATION,
                            pyodbc.SQL_TXN_SERIALIZABLE)
        trans.conn.autocommit = False
        cur = trans.conn.cursor()
        fline = line = pipe.readline()
        self.rows = rows = []
        #pp(xref)
        apx = {x[0]: x[2] for x in xref.values() if len(x) == 3}
        apx = {x: cli.get_parsed(ckey=x, cfg=apx) for x, v in apx.items()}

        #ppe(fline)
        ext_c = list(set(xref.keys()) - set(fline.keys()))
        if ext_c:
            log.warn('Config has extra columns missing in REST')
            #pfmtd([dict(Id=k, DB_ColName=v) for k, v  in enumerate(list(sorted(ext_c)))], 'Defaulting these to nulls')

        ext_l = list(set(fline.keys()) - set(xref.keys()))
        if ext_l:
            log.warn('REST has extra columns missing in DB')
            #pfmtd([dict(Id=k, RESR_Col=v) for k, v  in enumerate(ext_l)], 'Extra cols in REST')
            #pp(ext_l)


        ignore=[u'signOffVersion', u'signOffTime', u'RBDate', u'asofDate'] +[u'DataSource', u'GPOSMATTol']+[u'CCY',
        u'DEPolicy',
        u'Price',
        u'UnrealizedPnL',
        u'Fund',
        u'RawUnrealizedPnL',
        u'SwapType'] + [u'SettlementDate']+[u'BuySell',
        u'IndependentAmount',
        u'ConfirmStatus',
        u'RefEntityName',
        u'ReferenceOb',
        u'CounterpartyRefID',
        u'CDSType',
        u'TerminationDateUnadjusted',
        u'TerminationDateAdjusted',
        u'StandardRefObligation',
        u'FixedRate'] + [u'MaturityDate', u'StrikePrice', u'IsSpot'] + [u'Symbol', u'VolatilityStrike']+[u'Direction',
        u'MaturityDateUnadjusted',
        u'TradeCurrency',
        u'ProductType',
        u'UnderlyingSecurity']+[u'MaturityTenor', u'PaymentDate', u'CAP_FLOOR', u'MaturityDateAdjusted'] + \
       [u'IsElectronicallyConfirmed', u'Classification'] + [u'FloatingRateIndex']+[u'IsTodayResetDate']+ \
       [u'FloatRateIndexRec',
        u'IndexTenorRec',
        u'IsOldED',
        u'DayCountFractionPay',
        u'DayCountFractionRec',
        u'PaymentFrequencyPay',
        u'PaymentFrequencyRec',
        u'RollDate']+[u'CCP', u'CCPConfirmRefId'] +[u'IndexTenorPay', u'SpreadPay', u'FloatRateIndexPay']+ \
       [u'TerminationDate', u'FloatingIndex', u'StartFlow', u'CptyRefID'] +[u'Country']+ \
       [u'Barrier1Strike',
        u'Barrier1CCYPair',
        u'bdi',
        u'Barrier2Strike',
        u'Barrier2CCYPair'] + [u'PutCall', u'UnderlyingSymbol', u'OptionStyle']+[u'TerminationDateUnderlyingUnadjusted', u'CallPut', u'PayReceive']+\
       [u'TerminationDateUnderlyingAdjusted']+[u'ProceedsNotional'] +[u'ContractType', u'ExecutingAccount']+[u'SSGClientNote']+[u'Issuer']

        while line:
            line.update(apx)
            ext_s = set(line.keys()) - set(xref.keys())
            #pp(ext_s)
            if ext_s - set(ignore):
                pfmtd([
                    dict(Id=k, REST_Col=v)
                    for k, v in enumerate(list(ext_s - set(ignore)))
                ], 'Extra cols in REST/IGNORE')
                pp(list(ext_s - set(ignore)))
                ignore = ignore + list(ext_s - set(ignore))

            #rows.append([str(line[x]) if xref[x][1] in ['varchar'] else float(line[x]) if xref[x][1] in ['varchar'] else line[x] for x in xref if x not in ext])
            line = pipe.readline()
        print(123)
        e()
        chunk = 3
        total = 0
        cid = 0
        psql(sql, 'Insert')
        if not rows:
            raise Exception('No data in REST pipe.')
        else:

            ignore_cols = target["ignoreSourceColumns"]

            if not len(fline) == len(rows[0]) + len(ignore_cols):
                pp(fline)
                pp(rows[0])

                raise Exception(
                    'line %s <> row %s not in xref:%s, not in source:%s' %
                    (len(fline), len(rows[0]), set(fline.keys()) -
                     set(xref.keys()), set(xref.keys()) - set(fline.keys())))

            pfmtd([
                dict(Col=col, Row=rows[0][i])
                for i, col in enumerate([col for col in xref])
            ], 'First row')

        while total < len(rows):
            cur.fast_executemany = True
            data = rows[total:][:chunk]
            #ppe(data)
            cur.executemany(sql, data)
            cur.execute("ROLLBACK")
            trans.conn.rollback()
            ins = len(data)
            total += ins
            cid += 1
            log.info('[{}] [{}] {}: Running: {:,.0f}, Rows: {:,.0f}'.format(
                self.objtype, cid, self.cln, total, ins))

        log.info(
            '[{}]: {}: Inserted: {:,.0f}, To-Schema:{}, To-Table:{}, Skipped: {}, Elapsed: {}'
            .format(self.objtype, self.cln, len(rows), target['targetSchema'],
                    target["targetTable"], skip,
                    round((time.time() - start_time), 2)))
        pipe.close()
        insert_stats.inserted_cnt = total
Пример #11
0
    def load_file(self,
                  trans,
                  file_obj,
                  schema,
                  table_name,
                  qname,
                  fmt_cols,
                  cfg,
                  skip=0,
                  apx=None,
                  stats=None):

        scfg, tcfg = cfg
        file_name = file_obj.file_name

        assert os.path.isfile(file_name)
        if 1:
            colsep = scfg['columnDelimiter']
            assert colsep

            lcnt = file_obj.line_count(file_name)

            if 1:
                pp(file_obj.cols)

                #cols = ','.join([col.decode() for col in file_obj.cols])
                #pp(cols)

                trans.conn.autocommit = False

                copyfmt = ',\n'.join([
                    "%s FORMAT 'hex'" %
                    col[0] if col[0] in fmt_cols else "%s" % col
                    for col in file_obj.cols_alt
                ])

                assert os.path.isfile(file_obj.file_name)

                stmt = """
COPY %s.%s (%s ) 
FROM LOCAL '%s' 
DELIMITER '|' ESCAPE AS '^' NULL '' 
SKIP %d ABORT ON ERROR NO COMMIT """ % (schema, table_name, copyfmt,
                                        file_obj.file_name, skip)
                try:
                    self.desc_table(schema, table_name)
                    psql(stmt, 'Load')
                    trans.cur.execute(stmt)

                except:
                    trans.conn.rollback()
                    psql(stmt)

                    raise

                accepted, rejected = trans.cur.execute(
                    'SELECT GET_NUM_ACCEPTED_ROWS(),GET_NUM_REJECTED_ROWS()'
                ).fetchall()[0]
                pfmtd([
                    dict(Line_count=lcnt - skip,
                         Accepted=accepted,
                         Rejected=rejected)
                ], 'Load stats')
                assert lcnt - skip == accepted

                out = OrderedDict()
                out['table_name'] = table_name
                out['accepted'] = accepted
                out['rejected'] = rejected
                out['linecount'] = lcnt
                out['skip'] = skip
                out['diff'] = lcnt - skip - accepted
                stats[table_name] = out
Пример #12
0
def run():
    skip = 1
    total_ins = 0
    term_line = True
    #//validate cols
    for _source, val in cli.cfg['source'].items():
        cli.set_source(_source)
        _src_class = list(val.keys())[0]
        cli.scfg = scfg = cli.get_scfg(_src_class)
        for _trg_class, val in cli.cfg['target'][_source].items() or []:
            cli.tcfg = tcfg = cli.get_tcfg(_trg_class)

            _dbname = tcfg["targetDb"]
            toDB = create_writer(aname=_trg_class, app_init=app_init)

            toDB.begin_transaction(env=tcfg['targetDb'], out=to_conn)
            table = '%s.%s' % (tcfg['targetSchema'], tcfg['targetTable'])
            toDB.desc_table(schema=tcfg['targetSchema'],
                            tbl=tcfg['targetTable'],
                            col_ord=False)
            #// validate cols
            cfg_cols = [x[u'columnName'] for x in cli.scfg[u'columnMappings']]
            tcols = toDB.get_cols()
            t_vs_c = set(tcols) - set(cfg_cols)
            c_vs_t = set(cfg_cols) - set(tcols)
            if t_vs_c:
                pfmtd([dict(c_vs_t=c_vs_t)],
                      'Config has columns missing in target table.')

                raise Exception(
                    'Target table has columns missing in config: %s' % t_vs_c)

            if c_vs_t:
                pfmtd([dict(t_vs_c=t_vs_c)],
                      'Target table has columns missing in config.')
                raise Exception(
                    'Config has columns missing in target table: %s' % c_vs_t)
            toDB.commit_transaction(trans=to_conn)
    #// transfer
    for _source, val in cli.cfg['source'].items():
        cli.set_source(_source)
        _src_class = list(val.keys())[0]
        cli.scfg = scfg = cli.get_scfg(_src_class)

        _dbname = cli.scfg["sourceDb"]
        fromDB = create_reader(aname=_src_class, app_init=app_init)

        fromDB.begin_transaction(env=cli.scfg['sourceDb'], out=from_conn)
        if 1:  #//Extract to Dir

            for _dmp_class, val in cli.cfg['dump'][_source].items() or []:
                FileWriter = create_writer(aname=_dmp_class, app_init=app_init)
                fromDB.set_loader(FileWriter)
                cli.dcfg = cli.get_dcfg(_dmp_class)
                for _trg_class, val in cli.cfg['target'][_source].items(
                ) or []:

                    cli.tcfg = tcfg = cli.get_tcfg(_trg_class)
                    file_ins_cnt = 0
                    FileWriter.open_file(out=dump_file)

                    for iq_data in fromDB.fetch_many(chunk_size=file_size_rows,
                                                     source=cli.scfg,
                                                     qname='sourceStmt',
                                                     out=InOut(),
                                                     skip_header=0,
                                                     terminate_line=term_line):

                        if not file_ins_cnt:
                            FileWriter.create_header(
                                file=dump_file,
                                header=fromDB.get_header(),
                                cfg=cli.dcfg,
                                terminate_line=term_line)
                        FileWriter.append_data(file=dump_file,
                                               data=iq_data,
                                               cfg=cli.dcfg)
                        file_ins_cnt += len(iq_data.data)
                    if not file_ins_cnt:  #in case there's no data
                        FileWriter.create_header(file=dump_file,
                                                 header=fromDB.get_header(),
                                                 cfg=cli.dcfg,
                                                 terminate_line=term_line)
                    FileWriter.close_file(file=dump_file)
                    total_ins += file_ins_cnt
        fromDB.desc_cur(cur=from_conn.cur, colord=False)

        fromDB.commit_transaction(trans=from_conn)
    log.info('Total records saved: %d' % total_ins)
    #// Load to IQ
    for _source, val in cli.cfg['dump'].items():
        cli.set_source(_source)
        _src_class = list(val.keys())[0]

        DirReader = create_reader(aname=_src_class, app_init=app_init)

        if 1:  #//Get the file names
            cli.set_source(_source)
            dir_scfg = cli.get_dcfg(_src_class)
            path = cli.get_parsed(ckey='dumpDir', cfg=dir_scfg)

            DirReader.glob_dir(path=path, out=data_files, ext='*.*')

        if 1:  #//Load to DB

            for _trg_class, val in cli.cfg['target'][_source].items() or []:

                cli.tcfg = tcfg = cli.get_tcfg(_trg_class)

                _dbname = tcfg["targetDb"]
                toDB = create_writer(aname=_trg_class, app_init=app_init)

                toDB.begin_transaction(env=tcfg['targetDb'], out=to_conn)

                table = '%s.%s' % (tcfg['targetSchema'], tcfg['targetTable'])
                toDB.desc_table(schema=tcfg['targetSchema'],
                                tbl=tcfg['targetTable'],
                                col_ord=None)

                #// validate cols
                cfg_cols = [
                    x[u'columnName'] for x in cli.scfg[u'columnMappings']
                ]

                acols = cli.get_alt_cols(scfg)
                tcols = toDB.get_cols()
                fcols_alt = []
                for data_file in data_files.file_names:
                    dataFile = create_reader(aname='File',
                                             app_init=app_init,
                                             file_name=data_file,
                                             scfg=dir_scfg)
                    dataFile.describe()
                    file_stats[data_file] = dataFile.line_count(
                    ) - cli.header_size(dir_scfg)
                    fcols_alt = [
                        acols.get(x.decode(), x.decode())
                        for x in dataFile.get_header(data_file, dir_scfg)
                    ]
                    f_vs_c = set(fcols_alt) - set(cfg_cols)
                    c_vs_f = set(cfg_cols) - set(fcols_alt)
                    f_vs_t = set(fcols_alt) - set(tcols)
                    t_vs_f = set(tcols) - set(fcols_alt)
                    if f_vs_c:
                        pfmtd([dict(c_vs_f=c_vs_f)],
                              'Config has columns missing in dump file.')
                        pfmtd([dict(f_vs_t=f_vs_t)],
                              'Dump file has columns missing in target table.')
                        pfmtd([dict(t_vs_f=t_vs_f)],
                              'Target table has columns missing in dump file.')
                        raise Exception(
                            'Target table has columns missing in config: %s' %
                            f_vs_c)

                    if c_vs_f:
                        pfmtd([dict(f_vs_c=f_vs_c)],
                              'Dump file has columns missing in config.')
                        pfmtd([dict(f_vs_t=f_vs_t)],
                              'Dump file has columns missing in target table.')
                        pfmtd([dict(t_vs_f=t_vs_f)],
                              'Target table has columns missing in dump file.')
                        raise Exception(
                            'Config has columns missing in target table: %s' %
                            c_vs_f)

                    if f_vs_t:
                        pfmtd([dict(f_vs_c=f_vs_c)],
                              'Dump file has columns missing in config.')
                        pfmtd([dict(c_vs_f=c_vs_f)],
                              'Config has columns missing in dump file.')
                        pfmtd([dict(t_vs_f=t_vs_f)],
                              'Target table has columns missing in dump file.')
                        raise Exception(
                            'Dump file has columns missing in target table: %s'
                            % f_vs_t)
                    if t_vs_f:
                        pfmtd([dict(f_vs_c=f_vs_c)],
                              'Dump file has columns missing in config.')
                        pfmtd([dict(c_vs_f=c_vs_f)],
                              'Config has columns missing in dump file.')
                        pfmtd([dict(f_vs_t=f_vs_t)],
                              'Dump file has columns missing in target table.')
                        raise Exception(
                            'Target table has columns missing in dump file: %s'
                            % t_vs_f)

                if 1:
                    for data_fn in [x for x in data_files.file_names]:
                        dataFile = create_reader(aname="File",
                                                 app_init=app_init,
                                                 file_name=data_fn,
                                                 scfg=dir_scfg)
                        dataFile.describe()

                        fileCols = [
                            col.decode() for col in dataFile.get_header_cols()
                        ]
                        tbl = tcfg[
                            "targetTable"]  #tcfg. os.path.basename(data_fn).split('.')[-2]
                        assert tbl
                        if 1:

                            if 0 and tbl not in do_not_delete:
                                stmt = 'DELETE FROM %s WHERE %s in (SELECT t.%s FROM %s t)' % (
                                    tbl, masterTblCol, masterTblCol, masterTbl)
                                deleted[tbl] = toDB.exec_dml(stmt,
                                                             trans=to_conn,
                                                             commit=False)
                                pfmt([[deleted[tbl]]],
                                     ['Deleted from %s' % tbl])
                            else:
                                deleted[tbl] = -1
                            if 0:
                                acols = cli.get_alt_cols(scfg)
                                dataFile.cols_alt = [
                                    acols.get(x.decode(), x.decode())
                                    for x in dataFile.cols
                                ]
                            else:
                                dataFile.set_alt_cols()

                            missing_cols = list(
                                set(dataFile.cols_alt) - set(tcols))
                            pfmt([(tbl, x) for x in missing_cols],
                                 ['Table', 'Missing columns'])
                            schema = tcfg["targetSchema"]
                            if missing_cols:
                                pfmt([[x] for x in missing_cols],
                                     ['Columns in Source, but not Target'])
                                to_conn.conn.rollback()
                                toDB.desc_table(schema, tbl)
                                raise Exception(
                                    'File column %s missing in table "%s".' %
                                    (missing_cols, tbl))

                            if 1:
                                apx = {}
                                fmt_cols = []

                                toDB.load_file(trans=to_conn,
                                               file_obj=dataFile,
                                               schema=schema,
                                               table_name=tbl,
                                               qname='insertStmt',
                                               fmt_cols=fmt_cols,
                                               cfg=(dir_scfg, tcfg),
                                               skip=skip,
                                               apx=apx,
                                               stats=stats)
                                loaded[data_fn] = tbl
                        else:
                            not_loaded[data_fn] = tbl

                    else:
                        if 1:
                            toDB.commit_transaction(trans=to_conn)
                            pfmt(
                                [[k] + [deleted[k]] + list(v)[1:]
                                 for k, v in stats.items() if deleted[k] >= 0],
                                [
                                    'Table', 'Deleted', 'Accepted', 'Rejected',
                                    'Line count', 'Skip', 'Diff'
                                ], 'Load completed (deleted)'.upper())
                            pfmt([(k, v) for k, v in loaded.items()],
                                 ['Loaded Files', 'Loaded Tables'])
                            pfmt([(k, v) for k, v in not_loaded.items()],
                                 ['Not loaded Files', 'Not loaded Tables'])

                e()
                if 0:
                    #toDB.truncate_table		( table = table )
                    toDB.bulk_load(trans=to_conn,
                                   file_names=data_files,
                                   qname='insertStmt',
                                   cfg=(dir_scfg, tcfg),
                                   out=insert_stats)

                for k in file_stats.keys():
                    assert insert_stats[k] == file_stats[
                        k], 'Insert vs file count diff: %s<>%s for file \n%s' % (
                            insert_stats[k], file_stats[k], k)
                toDB.commit_transaction(trans=to_conn)

    if 0:
        Email.send_email(**email_args)
Пример #13
0
def run():
    skip = 2

    do_not_load = []
    for _source, val in cli.cfg['dump'].items():
        cli.set_source(_source)
        _src_class = list(val.keys())[0]

        DirReader = create_reader(aname=_src_class, app_init=app_init)

        cli.set_source(_source)
        dir_scfg = cli.get_dcfg(_src_class)
        path = cli.get_parsed(ckey='dumpDir', cfg=dir_scfg)
        ok_files = InOut(file_names=[])
        DirReader.glob_dir(path=path, out=ok_files, ext='*.ok')

        loaded = {}

        for _trg_class, val in cli.cfg['target'][_source].items():

            cli.tcfg = tcfg = cli.get_tcfg(_trg_class)

            _dbname = tcfg["targetDb"]
            toDB = create_writer(aname=_trg_class, app_init=app_init)

            do_not_delete = tcfg['doNotDeleteTables']
            do_not_load = tcfg['doNotLoadTables']
            to_conn = InOut()
            toDB.begin_transaction(env=tcfg['targetDb'], out=to_conn)
            toSchema = tcfg['targetSchema']
            stmt = 'set search_path to %s' % toSchema
            psql(stmt)
            to_conn.cur.execute(stmt)

            pkstats = {}
            for okfn in ok_files.file_names:
                okFile = create_reader(aname='File',
                                       app_init=app_init,
                                       file_name=okfn,
                                       scfg=dir_scfg)
                okdir, okname = os.path.splitext(okfn)
                okbn = os.path.basename(okdir)

                out_files = InOut(file_names=[])

                DirReader.glob_dir(path=okdir, out=out_files, ext='*.out')

                #e()
                if 1:  # Check if some there are files missing in config
                    ftlist = []

                    for out_fn in out_files.file_names:
                        print(out_fn)
                        ftlist.append(os.path.basename(out_fn).split('.')[1])

                    pfmt([[x] for x in ftlist], ['Files->Tables'])

                    ctables = cli.tcfg['targetTables'].keys()
                    extra_file_tables = list(set(ftlist) - set(ctables))
                    pfmt([[x] for x in extra_file_tables],
                         ['Tables not in config.'])
                    extra_config_tables = list(set(ctables) - set(ftlist))
                    pfmt([[x] for x in extra_config_tables],
                         ['Tables in config but not in file names.'])
                    assert not extra_file_tables, 'Tables %s are not listed in config["targetTables"].' % extra_file_tables

                for outfn in out_files.file_names:  # Master first

                    outFile = create_reader(aname='File',
                                            app_init=app_init,
                                            file_name=outfn,
                                            scfg=dir_scfg)

                    outbn = os.path.basename(outfn)
                    tbl = outbn.split('.')[1]
                    outTbl = 'tmp_PK_%s' % tbl
                    outCols = outFile.get_header_cols()
                    apxCols = [('MartModifiedDate', 'timestamp'),
                               ('AsOfFrom', 'timestamp'),
                               ('AsOfTo', 'timestamp'), ('MD5', 'char(22)')]
                    outTblCols = toDB.get_create_col_list(outCols, apx=apxCols)

                    toCols = toDB.get_col_types(toSchema, tbl)
                    pp(toCols)

                    toDB.desc_tmp_table(outTbl, outCols + apxCols)

                    do_not_delete.append(outTbl)

                    try:

                        stmt = 'drop table %s' % outTbl
                        to_conn.cur.execute(stmt)
                    except Exception as ex:
                        #raise
                        if not 'Table "%s" does not exist' % outTbl in str(ex):
                            raise
                    psql(outfn)
                    stmt = 'CREATE LOCAL TEMPORARY TABLE %s ( %s )\nON COMMIT PRESERVE ROWS' % (
                        outTbl, ', \n'.join(
                            ['%s %s' % tuple(col) for col in toCols]))
                    pfmt([[stmt]], ['Create master temp PK' + outTbl])
                    toDB.exec_ddl(stmt)
                    if 1:  #//Load data into PK table

                        fmt_cols = {}
                        mmDt = okFile.get_value(coords=(0, 0), skip=skip)

                        md5val = (base64.b64encode(
                            hashlib.md5(b'test').digest()))

                        apx = OrderedDict()
                        apx['MartModifiedDate'] = mmDt
                        apx['AsOfFrom'] = mmDt
                        apx['AsOfTo'] = "12/31/9999"
                        apx['MD5'] = ''  #//defined on row level

                        pk_outfn = '%s.pk' % outfn
                        colsep = dir_scfg['columnDelimiter']

                        with open(pk_outfn, 'wb') as pkfh:
                            with open(outfn, 'rb') as outfh:
                                line = outfh.readline().strip()
                                pkfh.write(line +
                                           colsep.join(apx.keys()).encode() +
                                           os.linesep.encode())
                                line = outfh.readline().strip()
                                apxTypes = colsep.join(
                                    [col[1] for col in apxCols])
                                pkfh.write(line + apxTypes.encode() +
                                           os.linesep.encode())
                                line = outfh.readline().strip()
                                while line:
                                    md5 = (base64.b64encode(
                                        hashlib.md5(line.replace(
                                            b'|', b'')).digest()))
                                    apx['MD5'] = md5.decode('ascii',
                                                            'ignore').strip(
                                                                '=')  #// REDO

                                    pkfh.write(
                                        line +
                                        colsep.join(apx.values()).encode() +
                                        os.linesep.encode())
                                    line = outfh.readline().strip()
                        outPkFile = create_reader(aname='File',
                                                  app_init=app_init,
                                                  file_name=pk_outfn,
                                                  scfg=dir_scfg)
                        outPkFile.set_alt_cols()

                        schema = tcfg['targetSchema']
                        toDB.load_grds_file(trans=to_conn,
                                            file_obj=outPkFile,
                                            schema=schema,
                                            table_name=outTbl,
                                            qname='insertStmt',
                                            fmt_cols=fmt_cols,
                                            cfg=(dir_scfg, tcfg),
                                            skip=skip,
                                            stats=pkstats)
                        loaded[outbn] = outTbl
                        #outPkFile.delete()

                #pfmtd([pkstats])
                #e()
            stats = {}
            deleted = {}
            processed = []
            not_processed = []
            for okfn in ok_files.file_names:
                okFile = create_reader(aname='File',
                                       app_init=app_init,
                                       file_name=okfn,
                                       scfg=dir_scfg)
                okdir, _ = os.path.splitext(okfn)
                okbn = os.path.basename(okdir)
                #e()
                assert os.path.isdir(okdir)
                snap_df = cli.get_dest_folder(okdir)
                if os.path.isdir(snap_df):
                    log.warning('[%s]Destination folder exists: [%s]' %
                                (okdir, snap_df))
                    not_processed.append(okfn)
                    continue

                out_files = InOut(file_names=[])
                DirReader.glob_dir(path=okdir, out=out_files, ext='*.out')
                apx = dict(
                    MartModifiedDate=okFile.get_value(coords=(0,
                                                              0), skip=skip))

                #e()
                if 0:
                    g = raw_input("Continue?")

                not_loaded = {}

                for table_name in ftlist:
                    tmpTbl = 'tmp_PK_%s' % table_name
                    toCols = toDB.get_tab_cols(tmpTbl)
                    #pp(toCols)
                    toDB.desc_table(None, tmpTbl)
                    toDB.desc_table(toSchema, table_name)
                    #e()
                    if table_name in ['TxnLookupMap']:

                        tmpCols = ',\n  '.join(
                            ['tmid.%s' % col[0].decode() for col in toCols])
                        ins = """ 
insert into {0} ( {1} ) 
select distinct {2} 
from {3} tmid LEFT JOIN {0} ta ON ta.{4} = tmid.{4}
AND ta.{5} = tmid.{5}
AND ta.{6} = tmid.{6}
AND ta.ValidFrom = tmid.ValidFrom and ta.AsOfTo = tmid.AsOfTo
where ta.MD5 <> tmid.MD5
OR ta.{4} is NULL
""".format(table_name, ',\n  '.join([col[0].decode() for col in toCols]),
                        tmpCols, tmpTbl, toCols[0][0].decode(), toCols[1][0].decode(),
                        toCols[2][0].decode())
                        psql(ins)
                        inserted = toDB.exec_dml(ins,
                                                 trans=to_conn,
                                                 commit=False)
                        pfmtd([dict(Inserted=inserted)])
                    elif table_name in [
                            'G3Lookup', 'GCLookup', 'GISLookup', 'GPSLookup',
                            'GPXLookup', 'GPosLookup', 'GTxLookup',
                            'FundToBusinessUnitMap', 'TxEditReason'
                    ]:

                        tmpCols = ',\n  '.join(
                            ['tmid.%s' % col[0].decode() for col in toCols])
                        ins = """ 
insert into {0} ( {1} )
select distinct {2}
from {3} tmid LEFT JOIN {0} ta ON ta.{4} = tmid.{4}
AND ta.{5} = tmid.{5}
AND ta.AsOfTo = tmid.AsOfTo
where ta.MD5 <> tmid.MD5
OR ta.{4} is NULL 
""".format(table_name, ',\n  '.join([col[0].decode() for col in toCols]),
                        tmpCols, tmpTbl, toCols[0][0].decode(), toCols[1][0].decode())
                        psql(ins)
                        inserted = toDB.exec_dml(ins,
                                                 trans=to_conn,
                                                 commit=False)
                        pfmtd([dict(Inserted=inserted)])
                    else:
                        tmpCols = ',\n  '.join(
                            ['tmid.%s' % col[0].decode() for col in toCols])
                        ins = """ 
insert into {0} ( {1} )
select distinct {2}
from {3} tmid LEFT JOIN {0} ta ON ta.{4} = tmid.{4}
AND ta.AsOfTo = tmid.AsOfTo
where ta.MD5 <> tmid.MD5
OR ta.{4} is NULL ;
""".format(table_name, ',\n  '.join([col[0].decode() for col in toCols]),
                        tmpCols, tmpTbl, toCols[0][0].decode())
                        psql(ins)
                        inserted = toDB.exec_dml(ins,
                                                 trans=to_conn,
                                                 commit=False)
                        pfmtd([dict(Inserted=inserted)])

                if 1:
                    toDB.commit_transaction(trans=to_conn)

                    pfmt([[k] + list(v.values())[1:]
                          for k, v in pkstats.items()], [
                              'Table', 'Accepted', 'Rejected', 'Line count',
                              'Skip', 'Diff'
                          ], 'Load completed'.upper())
                    pfmt([(k, v) for k, v in loaded.items()],
                         ['Loaded Files', 'Loaded Tables'])
                    pfmt([(k, v) for k, v in not_loaded.items()],
                         ['Not loaded Files', 'Not loaded Tables'])
                    assert os.path.isdir(okdir)
                    if 0:
                        cli.MoveSnapFolder(okdir)
                    processed.append(dict(ProcessedFile=okfn))
                #break;

            if not ok_files.file_names:
                pfmtd([
                    dict(NoFiles='No OK files at working dir: [ %s ]' %
                         cli.pa[0])
                ])

            pfmtd(processed)
            pfmtd(not_processed)

    if 0:
        email_args.update(dict(cli_stats=None))
        Email.send_email(**email_args)
        cli.done()