def __init__(self,table_name): self.logger = EtlLogger.get_logger(table_name) # use class name as the log name self.lock = JobLock(table_name) # use class name as the lock name self.env = yaml.load(open(config['ENV'])) self.config=config self.table = sqoop_table(table_name,config,self.logger) self.json = sqoop_json(table_name,config,self.logger) self.pyhive = sqoop_pyhive(table_name,config,self.logger)
def __init__(self, table_name): self.logger = EtlLogger.get_logger( table_name) # use class name as the log name self.lock = JobLock(table_name) # use class name as the lock name self.env = yaml.load(open(config['ENV'])) self.config = config self.table = sqoop_table(table_name, config, self.logger) self.json = sqoop_json(table_name, config, self.logger) self.pyhive = sqoop_pyhive(table_name, config, self.logger)
def __init__( self ): # Reading configuration file ( YAML file ) self.logger=EtlLogger.get_logger(self.__class__.__name__) # use class name as the log name self.lock = JobLock(self.__class__.__name__) # use class name as the lock name self.env= yaml.load( open( config['ENV'] ) ) self.db = OnlineDB( self.env['DSN'], logger=self.logger ) set_schema_sql = self.env['SET_SCHEMA_SQL'] self.db.executeSQL( set_schema_sql ) self.config = config
def __init__(self): # Reading configuration file ( YAML file ) self.logger = EtlLogger.get_logger( self.__class__.__name__) # use class name as the log name self.lock = JobLock( self.__class__.__name__) # use class name as the lock name self.env = yaml.load(open(config['ENV'])) self.db = OnlineDB(self.env['DSN'], logger=self.logger) set_schema_sql = self.env['SET_SCHEMA_SQL'] self.db.executeSQL(set_schema_sql) self.config = config
self.db.executeSQL(self.config['INSERT_COLUMN_MISMATCH'], True) #[(res,)] = self.db.retrieveSQLArgs(check_sql, args[0]) def transfer_schema_data(self): first_host = self.config['FIRST_HOST'] second_host = self.config['SECOND_HOST'] self.db.executeSQL(self.config['TRUNCATE_TEMP_COLUMNS'], True) self.db.executeSQL(self.config['TRUNCATE_TABLE_MISMATCH'], True) self.db.executeSQL(self.config['TRUNCATE_COLUMN_MISMATCH'], True) query="time /opt/vertica/bin/vsql -h "+second_host+" -U "+self.env['VERTICA_USER']+" -w "+self.env['VERTICA_PASSWORD']+" -c " query=query+" \"CONNECT TO VERTICA DW USER "+self.env['VERTICA_USER']+" PASSWORD '"+self.env['VERTICA_PASSWORD']+"' ON '"+first_host+"',"+self.env['TCP_PORT']+";" query=query+"EXPORT TO VERTICA DW.schema.temp_columns AS select * from DW.v_catalog.columns;\"" print query subprocess.call(query,shell=True) if __name__ == "__main__": yaml_file = sys.argv[0].replace(".py", ".yaml") g_logger = EtlLogger.get_logger(sys.argv[0].replace('.py', '')) g_logger.info( "Begin load: %s", datetime.now()) g_logger.info("Starting cluster data comparator ...") dh = schema_mismatch(yaml_file) start_time = time.time() dh.transfer_schema_data() dh.find_mismatch() end_time = time.time() g_logger.info("Finished comparison. Took %s seconds" % (end_time - start_time))