def __init__(self, start_date=None, end_date=None): self._start_date = start_date self._end_date = end_date self.lock = pid.PidFile(pidname="%s.LOCK" % APP_NAME, piddir=LOCK_ROOT, enforce_dotpid_postfix=False) self.logger = get_etl_logger(APP_NAME)
def logger(self): """ Where to output our logging. Defaults to the app name and table name. """ if self._logger is None: self._logger = get_etl_logger(self.table_name, LOG_DIR) return self._logger
def test_None_log_directory(self): logger = get_etl_logger(log_name='test', log_directory=None, log_format=None, log_level=NEW_LOG_LEVEL) self.assertEqual( logger.handlers[0].baseFilename, os.path.join(os.path.abspath(LOG_DIRECTORY), '.'.join(['test', 'log'])))
def setUp(self): print('') self.extractor = \ Extractor( NAME, SchemaConfig(SCHEMA_CONFIG_FILE), get_etl_logger(NAME)) self.table_name = os.path.splitext(os.path.basename(JSON_FILE))[0] with open(JSON_FILE, 'r') as in_json: for raw in in_json: self.extractor.update_handler(json.loads(raw))
def get_logger(): """ Will look at the known loggers for this package. Will return """ for name in [APP_NAME, HARMONIZER_NAME, DEFAULT]: logger = logging.Logger.manager.loggerDict.get(name) if logger is not None: return logger return get_etl_logger(DEFAULT, log_directory=None)
def get_logger(name, debug=False): """ Returns a logger with the appropriate log level. """ logger = get_etl_logger( name, log_level=logging.DEBUG if debug else getattr(logging, ENV.get('LOG_LEVEL', 'INFO'))) if sys.stdin.isatty(): sys.stderr.write("Logging to %s\n" % logger.handlers[0].baseFilename) return logger
def __init__(self, name, conf, queues, schema_config): logger = get_etl_logger(name, LOG_DIR) super(APIExtractorClient, self).__init__(name, conf, queues, logger=logger) self.schema_config = schema_config self.lock = pid.PidFile(pidname="%s.LOCK" % APP_NAME, piddir=LOCK_ROOT, enforce_dotpid_postfix=False) self._worker = None
def __init__(self, schema_file, tables=None): # Required self.schema_file = schema_file # Constructor setup self.lock = pid.PidFile(pidname="%s.LOCK" % LOADER_APP_NAME, piddir=LOCK_ROOT, enforce_dotpid_postfix=False) self.logger = get_etl_logger(LOADER_APP_NAME, LOG_DIR) self.schema_config = SchemaConfig(self.schema_file) # Optional self.tables = tables if not bool(self.tables): self.tables = self.schema_config.configured_tables
def __init__(self, table_name, config): self.table_name = table_name self.config = config self.tmp_table_name = '_'.join(['tmp', self.table_name]) self.name = '-'.join([APP_NAME, self.table_name]) self.lock = pid.PidFile(pidname="%s.LOCK" % self.name, piddir=LOCK_ROOT, enforce_dotpid_postfix=False) self.logger = get_etl_logger(self.name) self.tmp_file = \ os.path.join(OUTPUT_DIR, '.'.join([self.table_name, 'csv'])) # These values are pivoted from the config and used for kwargs. self.keys_keys = \ [list(item.keys())[0] for item in self.config.get('keys', [])] self.keys_values = \ [list(item.values())[0] for item in self.config.get('keys', [])] self.values_keys = \ [list(item.keys())[0] for item in self.config.get('values', [])] self.values_values = \ [list(item.values())[0] for item in self.config.get('values', [])] # These items can be overridden in the config if needed but defaults # usually work fine. self.join_keys = self.config.get('join_keys', self.keys_values) self.join_values = self.config.get('join_values', self.values_values) self.key_columns = self.config.get('key_columns', self.join_keys) self.set_columns = self.config.get('set_columns', self.values_values) # These are used for kwargs. self.destination_fields = self.keys_values + self.values_values self.source_fields = self.keys_keys + self.values_keys self.key_sep = self.config.get('key_sep') self._kwargs = None self._oxdb = None
""" Re-exporting data from the API DB. """ import sys import psycopg2 import pid from progressbar import ProgressBar, widgets, UnknownLength from retrying import retry, RetryError from ox_dw_logger import get_etl_logger from .extractor import Extractor from .settings import DEFAULT_SCHEMA_FILE, HARMONIZER_NAME, LOCK_ROOT, ENV from .schema_config import SchemaConfig API_JSONDB_CONFIG = ENV.get('API_JSONDB_CONFIG') LOGGER = get_etl_logger(HARMONIZER_NAME) MAX_ATTEMPTS = 5 WAIT_BETWEEN_ATTEMPTS = 2000 # ms # Object name to JSONDB table mapping. # Most of the time the object and table name are the same. # This is for when it doesn't # Keep this here since it is specific to the harmonizer and may change when # switching to postgres someday. OBJECT_TYPE_TO_TABLE = {'order': 'order_', 'user': '******'} def retry_if_db_error(exception): """ Only retry on a psycopg2.Error. Else raise error immediately. """ if isinstance(exception, psycopg2.Error):
def test_log_same_name(self): logger = get_etl_logger(log_name=NEW_LOG_NAME) self.assertEqual(self.logger.handlers[0].baseFilename, logger.handlers[0].baseFilename)
def setUp(self): self.logger = \ get_etl_logger( log_name=NEW_LOG_NAME, log_directory=NEW_LOG_DIR, log_format=NEW_LOG_FORMAT, log_level=NEW_LOG_LEVEL)
return {} ENV = get_config('env') CONFIG = get_config(APP_NAME) DB_CONFIG = ENV['DB_CONNECTIONS']['SNOWFLAKE']['KWARGS'] DB_NAME = DB_CONFIG.get('database') DB_SCHEMA = DB_CONFIG.get('schema') TMP = os.environ['TMP'] if 'TMP' in os.environ else '/tmp' TEMP_FILE_DIR = os.path.join(TMP, APP_NAME) DELIMITER = chr(30) MAX_ATTEMPTS = 5 WAIT_BETWEEN_ATTEMPTS = 2000 # ms LOGGER = get_etl_logger(APP_NAME) GET_COLUMNS = """ SELECT COLUMN_NAME, ORDINAL_POSITION FROM {0}.INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = ? AND TABLE_NAME = ? ORDER BY ORDINAL_POSITION""" INSERT_MISSING_COLUMNS = """ INSERT INTO monitor_sf_load (table_name, column_name) VALUES (?, ?)""" TRUNCATE_MONITOR_TABLE = """ TRUNCATE TABLE monitor_sf_load"""
def __init__(self): self.lock = pid.PidFile(pidname="%s.LOCK" % APP_NAME, piddir=LOCK_ROOT, enforce_dotpid_postfix=False) self.logger = get_etl_logger(APP_NAME)
def run(config): """ Required in the config are: STATEMENTS: Ordered list of SQL statements to run. Optional: DW_NAME: Data warehouse name. Required either in config file or sql_runner argument, -d (--dw_name). APP_NAME: This is used for the logger. Defaults to LOAD_STATE_VAR or sql_runner LOAD_STATE_VAR: If present will update this load state var. This will fail at the first statement that fails and will not continue. Be sure the use local temporary or temporary tables as there is no clean up. """ job_name = config.get('APP_NAME', APP_NAME) logger = get_etl_logger(job_name) try: with pid.PidFile(pidname="%s.LOCK" % job_name, piddir=LOCK_ROOT, enforce_dotpid_postfix=False) as p_lock: logger.info("-------------------------------") logger.info("Running %s application with process id: %d", job_name, p_lock.pid) logger.info("Starting %s for load_state_variable %s", job_name, config.get('LOAD_STATE_VAR')) if sys.stdout.isatty(): sys.stderr.write("Logging all output to %s\n" % logger.handlers[0].baseFilename) logger.info("Connecting to %s", config.get('DW_NAME')) with OXDB(config.get('DW_NAME')) as oxdb: size = len(config.get('STATEMENTS')) # Set dynamic variables for key, val in config.get('VARIABLES').items(): if str(val).lower().startswith('select '): val %= config.get('VARIABLES') config['VARIABLES'][key], = \ oxdb.get_executed_cursor(val).fetchone() for index, statement in enumerate(config.get('STATEMENTS'), start=1): statement %= config.get('VARIABLES') logger.info("STATEMENT(%s/%s) %s;", index, size, statement) cursor = oxdb.get_executed_cursor(statement) if str(statement).lower().startswith('select '): writer = \ csv.writer( sys.stdout, delimiter=config.get('FIELD_SEP', DEFAULT_FIELD_SEP)) if config.get('HEADERS', False): writer.writerow(col[0] for col in cursor.description) for row in cursor: writer.writerow(row) else: cursor.execute(statement) if config.get('LOAD_STATE_VAR') is not None: logger.info("SETTING %s in load_state.", config.get('LOAD_STATE_VAR')) LoadState( oxdb.connection, variable_name=config.get('LOAD_STATE_VAR')).upsert() logger.info("Completed %s for load_state_variable %s", job_name, config.get('LOAD_STATE_VAR')) except (pid.PidFileAlreadyRunningError, pid.PidFileAlreadyLockedError): logger.warning("Unable to get lock for %s application. Exiting...", job_name) except Exception as err: logger.error("Application %s FAILED. ERROR %s", job_name, err) raise Exception("Application %s FAILED. ERROR %s" % (job_name, err))
def setUp(self): setup_schema() self.logger = get_etl_logger(JOB_NAME) # , log_directory=None) self.options = OPTIONS(JOB_NAME, '2018-03-14_04', 1181, True)