def __init__(self,table_name):
        self.logger = EtlLogger.get_logger(table_name)  # use class name as the log name
        self.lock = JobLock(table_name)  # use class name as the lock name
        self.env = yaml.load(open(config['ENV']))
	self.config=config
        self.table = sqoop_table(table_name,config,self.logger)
        self.json = sqoop_json(table_name,config,self.logger)
        self.pyhive = sqoop_pyhive(table_name,config,self.logger)
Пример #2
0
 def __init__(self, table_name):
     self.logger = EtlLogger.get_logger(
         table_name)  # use class name as the log name
     self.lock = JobLock(table_name)  # use class name as the lock name
     self.env = yaml.load(open(config['ENV']))
     self.config = config
     self.table = sqoop_table(table_name, config, self.logger)
     self.json = sqoop_json(table_name, config, self.logger)
     self.pyhive = sqoop_pyhive(table_name, config, self.logger)
Пример #3
0
 def __init__( self ):
     #  Reading configuration file ( YAML file )
     self.logger=EtlLogger.get_logger(self.__class__.__name__)  # use class name as the log name
     self.lock = JobLock(self.__class__.__name__)  # use class name as the lock name
     self.env= yaml.load( open( config['ENV'] ) )
     self.db = OnlineDB( self.env['DSN'], logger=self.logger )
     set_schema_sql = self.env['SET_SCHEMA_SQL']
     self.db.executeSQL( set_schema_sql )
     self.config = config
Пример #4
0
 def __init__(self):
     #  Reading configuration file ( YAML file )
     self.logger = EtlLogger.get_logger(
         self.__class__.__name__)  # use class name as the log name
     self.lock = JobLock(
         self.__class__.__name__)  # use class name as the lock name
     self.env = yaml.load(open(config['ENV']))
     self.db = OnlineDB(self.env['DSN'], logger=self.logger)
     set_schema_sql = self.env['SET_SCHEMA_SQL']
     self.db.executeSQL(set_schema_sql)
     self.config = config
Пример #5
0
      self.db.executeSQL(self.config['INSERT_COLUMN_MISMATCH'], True)
      #[(res,)] = self.db.retrieveSQLArgs(check_sql, args[0])

   def transfer_schema_data(self):
    first_host = self.config['FIRST_HOST']
    second_host = self.config['SECOND_HOST']   
    self.db.executeSQL(self.config['TRUNCATE_TEMP_COLUMNS'], True)
    self.db.executeSQL(self.config['TRUNCATE_TABLE_MISMATCH'], True)
    self.db.executeSQL(self.config['TRUNCATE_COLUMN_MISMATCH'], True)
     	
    query="time /opt/vertica/bin/vsql -h "+second_host+" -U "+self.env['VERTICA_USER']+" -w "+self.env['VERTICA_PASSWORD']+" -c "
    query=query+" \"CONNECT TO VERTICA DW USER "+self.env['VERTICA_USER']+" PASSWORD '"+self.env['VERTICA_PASSWORD']+"' ON '"+first_host+"',"+self.env['TCP_PORT']+";"
    query=query+"EXPORT TO VERTICA DW.schema.temp_columns AS select * from DW.v_catalog.columns;\""
    print query
    subprocess.call(query,shell=True)
             
if __name__ == "__main__":
    yaml_file = sys.argv[0].replace(".py", ".yaml")
    g_logger = EtlLogger.get_logger(sys.argv[0].replace('.py', ''))
    g_logger.info( "Begin load: %s", datetime.now())
    g_logger.info("Starting cluster data comparator ...")
    dh = schema_mismatch(yaml_file)
    start_time = time.time()
    dh.transfer_schema_data()
    dh.find_mismatch()
    end_time = time.time()
    g_logger.info("Finished comparison. Took %s seconds" % (end_time - start_time))