def remove(self, name): logger.debug(self._context(f'Removing vault')) if not is_file(self._file_name(name)): logger.warning(self._context(f'Vault does not exist')) else: delete_file(self._file_name(name), ignore_errors=True) if is_file(self._file_name(name)): logger.warning('Unable to remove vault')
def test_delete_files(): setup_test_files() # Set the readonly.txt file to read/write so it can be deleted readonly_file_name = f'{test_folder_path}/readonly.txt' if is_file(readonly_file_name): os.chmod(readonly_file_name, S_IWUSR | S_IREAD) # delete both the readonly.txt and the readwrite.txt files delete_files(f'{test_folder_path}/*.txt') # assert both files have been deleted assert is_file(f'{test_folder_path}/readwrite.txt') is False assert is_file(f'{test_folder_path}/readonly.txt') is False teardown_test_files()
def test_rename_file(): setup_test_files() rename_file(f'{test_folder_path}/readwrite.txt', f'{test_folder_path}/readwrite_renamed.txt') assert is_file(f'{test_folder_path}/readwrite_renamed.txt') is True teardown_test_files()
def load(self, file_name, default_section_key=''): """Search across registered paths loading all instances of file in path order.""" logger.info(f'Searching for {file_name} across: {self.path_names}') for path_name in self.path_names: load_file_name = f'{path_name}/{file_name}' if is_file(load_file_name): self.load_file(load_file_name, default_section_key)
def connect(self, name, password): # assume failure self.name = name self.is_connected = False self.is_updated = False if not is_file(self._file_name(name)): warning_message = 'Vault does not exist' logger.warning(self._context(warning_message)) raise self.exception(warning_message) else: logger.debug(self._context('Opening vault')) if not self._load_file(): warning_message = self._context('Vault corrupt') logger.warning(warning_message) raise self.exception(warning_message) # key vault authentication if not self._authenticate(): warning_message = self._context('Vault password authentication failed') logger.warning(warning_message) raise self.exception(warning_message) else: # we successfully connected to vault self.is_connected = True # remove password from secrets; we add back when we disconnect del self.secrets[self.password_key]
def _listen(self): if not self.endpoint: return '' else: if is_file(self.endpoint): return load_text(self.endpoint, '') else: return ''
def find_file(self, file_name): # search for file across multiple paths/file types logger.info(f'Searching for {file_name} across: {self.path_names}') for path_name in self.path_names: load_file_name = f'{path_name}/{file_name}' if is_file(load_file_name): # return file handle pass
def test_move_file(): setup_test_files() move_file(f'{test_folder_path}/readwrite.txt', f'{test_folder_path}/working/readwrite_copy.txt') assert is_file(f'{test_folder_path}/working/readwrite_copy.txt') is True teardown_test_files()
def teardown_test_files(): # set the read_only.txt file to read/write so it can be deleted if it exists readonly_file_name = f'{test_folder_path}/readonly.txt' if is_file(readonly_file_name): os.chmod(readonly_file_name, S_IWUSR | S_IREAD) # clear the test/working folder # DISCUSS: We may want to remove entire folder and recreate in case there are sub-folders? # eg, clear_folder(test_folder_path) clear_folder(f'{test_folder_path}/')
def test_clear_folder(): setup_test_files() # ToDo: add exception handling test # set the read_only.txt file to read/write so it can be deleted if it exists readonly_file_name = f'{test_folder_path}/readonly.txt' if is_file(readonly_file_name): os.chmod(readonly_file_name, S_IWUSR | S_IREAD) clear_folder(f'{test_folder_path}/') assert len(os.listdir(f'{test_folder_path}/')) == 0 teardown_test_files()
def delete(self, blob_name): """Delete blob.""" target_file_name = self._blob_file(blob_name) if not is_file(target_file_name): warning_message = f"Blob name does not exist" logger.warning(self._context(warning_message, blob_name)) is_success = False else: logger.debug(self._context(f"Deleting blob", blob_name)) delete_file(target_file_name) is_success = True return is_success
def get(self, target_file_name, blob_name): """Download blob to target file name.""" blob_folder = self._blob_folder() source_file_name = f"{blob_folder}/{blob_name}" if not is_file(source_file_name): logger.warning(self._context("Blob name does not exist", blob_name)) is_success = False else: logger.debug(self._context(f"Getting {target_file_name}", blob_name)) copy_file_if_exists(source_file_name, target_file_name) is_success = True return is_success
def load(self): if not is_file(self.file_name): # file doesn't exist, initialize object with default values logger.info(f'Initializing {self.file_name}') self.job_id = 1 self.tables = dict() else: logger.info(f'Loading {self.file_name}') obj = load_jsonpickle(self.file_name) # load key attributes self.job_id = obj.job_id self.tables = obj.tables
def on_mouse_move(self, *args): mouse_pos = args[1] if not self.mouse_disabled(): for child in self.children: if child.focus: child.background_color = child.focused_color elif child.collide_point(*child.to_widget(*mouse_pos))\ and not (is_file(child.attrs) and self.moving): # do not change background color of icons when moving files and mouse dosesn't point directory child.background_color = child.active_color else: child.background_color = child.unactive_color
def callback(self, path): _path, filename = split(path) # print('REMOTE SEARCH', self.text, path, thumb_dir not in path and self.text in filename.lower()) if thumb_dir not in path and self.text in filename.lower(): try: attrs = get_dir_attrs(path, self.sftp) except Exception as ex: ex_log(f'Failed to get attrs {ex}') else: attrs.thumbnail = self.thumbnail if is_file(attrs): attrs.path = path else: attrs.path = _path self.search_list.append(attrs) self.files_queue.put(attrs)
def is_stale(self): """Returns True if pid file is stale (older than boot time or contains inactive pid).""" reason = '' if is_file(self.file_name): # pid file exists so check if its stale pid = self.get_pid() pid_datetime = file_modify_datetime(self.file_name) if pid_datetime < boot_datetime(): reason = f' - pid file older ({pid_datetime}) than boot time ({boot_datetime()})' elif not is_process(pid): reason = f' - pid ({pid}) does not exist' status = bool(reason) logger.debug(f'{self}:is_stale({status}){reason}') return status
def __init__(self, project_file=None): # session folder (acts as root path for job specific folders) self.session_folder = '../sessions' # configuration engines self.config = None self.option = None # project metadata self.project = None self.namespace = None # project resources self.database = None self.schedule = None # project dataset specific working folders self.state_folder = None self.work_folder = None self.publish_folder = None # project database connections (db_conn) self.source_db_conn = None self.target_db_conn = None # project file and name self.project_file = '' self.project_name = '' # if optional project file supplied use it; otherwise try command line if project_file: self.project_file = project_file elif len(sys.argv) > 1: self.project_file = sys.argv[1] # make sure we have a valid project file app_name = script_name() if not self.project_file: print(f'{app_name}: error - must specify project file') elif not is_file(f'../conf/{self.project_file}'): print( f'{app_name}: error - project file not found ({project_file})') else: # project file controls configuration self.project_name = just_file_stem(self.project_file)
def __init__(self, tenant): # tenant self.tenant = force_local_path(tenant.lower()) # make sure we have a tenant create_folder(self.tenant) self.ad_file_name = force_file_ext(f'{self.tenant}/ad', 'json') # start in a non-authenticated state self.identity = None self.is_authenticated = False # load our directory if is_file(self.ad_file_name): self._load() else: self.identities = dict()
def create(self, name, password): # if already connected, then disconnect first if self.is_connected: self.disconnect() if is_file(self._file_name(name)): # update status logger.warning(self._context('Vault already exists')) else: # update status self.is_updated = True self.is_connected = True # create new key vault self.name = name self.password = password self.secrets = dict() self._save_file() logger.debug(self._context('Creating vault'))
def put(self, source_file_name, blob_name): """"Upload source file name to blob.""" if not is_file(source_file_name): warning_message = f"Source file does not exist ({source_file_name})" logger.warning(self._context(warning_message, blob_name)) is_success = False else: logger.debug(self._context(f"Putting {source_file_name}", blob_name)) # build blob target file and folder names blob_folder = self._blob_folder() target_file_name = f"{blob_folder}/{blob_name}" target_folder = just_path(target_file_name) # make sure the blob's target folder exists create_folder(target_folder) # then copy source file to blob container copy_file_if_exists(source_file_name, target_file_name) is_success = True return is_success
def stage_file(self, archive_capture_file_name): logger.info( f"Getting {archive_capture_file_name} from archive blob store") # make sure work folder exists and is empty clear_folder(self.work_folder) # connect to the archive blobstore resource = self.config(self.project.blobstore_archive) bs_archive = BlobStore() bs_archive.connect(resource) # extract dataset name and job id from archive capture file name dataset_name, _, job_id = just_file_stem( archive_capture_file_name).partition("#") # copy archive_capture_file_name to our local working folder capture_file_name = just_file_name(archive_capture_file_name) local_work_file_name = f"{self.work_folder}/{capture_file_name}" archive_capture_file_blob_name = f"{archive_capture_file_name}" bs_archive.get(local_work_file_name, archive_capture_file_blob_name) bs_archive.disconnect() # unzip the capture file we retrieved from archive with zipfile.ZipFile(local_work_file_name) as zf: zf.extractall(self.work_folder) # create the file's dataset_name schema if missing self.target_db_conn.create_schema(dataset_name) # process all table files in our work folder for file_name in sorted(glob.glob(f"{self.work_folder}/*.table")): table_name = just_file_stem(file_name) logger.info(f"Processing {table_name} ...") # always load table objects table_object = load_jsonpickle( f"{self.work_folder}/{table_name}.table") # skip table if no schema file exists schema_file_name = f"{self.work_folder}/{table_name}.schema" if not is_file(schema_file_name): logger.warning( f"Table skipped ({table_name}); schema file not found") continue # always load table schema table_schema = load_jsonpickle(schema_file_name) # always load table pk table_pk = load_text(f"{self.work_folder}/{table_name}.pk").strip() # extend table object with table table and column names from table_schema object table_object.table_name = table_name table_object.column_names = [ column_name for column_name in table_schema.columns ] # if drop_table, drop table and exit if table_object.drop_table: logger.info(f"Table drop request; table_drop=1") self.target_db_conn.drop_table(dataset_name, table_name) return # convert table schema to our target database and add extended column definitions extended_definitions = "udp_jobid int, udp_timestamp datetime2".split( ",") convert_to_mssql(table_schema, extended_definitions) # Future: support custom staging table type overrides # [table].table_type = < blank > | standard, columnar, memory, columnar - memory # handle cdc vs non-cdc table workflows differently logger.debug( f"{table_name}.cdc={table_object.cdc}, timestamp={table_object.timestamp}" ) if (not table_object.cdc or table_object.cdc.lower() == "none" or not table_pk): # if table cdc=none, drop the target table logger.info( f"Table cdc=[{table_object.cdc}]; rebuilding table") self.target_db_conn.drop_table(dataset_name, table_name) # then re-create target table with latest schema # FUTURE: Add udp_pk, udp_nk, udp_nstk and other extended columns logger.info( f"Re-creating non-CDC table: {dataset_name}.{table_name}") self.target_db_conn.create_table_from_table_schema( dataset_name, table_name, table_schema, extended_definitions) # no cdc in effect for this table - insert directly to target table work_folder_obj = pathlib.Path(self.work_folder) batch_number = 0 for json_file in sorted( work_folder_obj.glob(f"{table_name}#*.json")): # load rows from json file # input_stream = open(json_file) # rows = json.load(input_stream) # input_stream.close() rows = load_jsonpickle(json_file) # insert/upsert/merge *.json into target tables if not rows: logger.info( f"Table {table_name} has 0 rows; no updates") else: batch_number += 1 logger.info( f"Job {job_id}, batch {batch_number}, table {table_name}" ) self.progress_message( f"loading {just_file_stem(capture_file_name)}({table_name}.{batch_number:04}) ..." ) # convert date/datetime columns to date/datetime values convert_data_types(rows, table_schema) self.target_db_conn.bulk_insert_into_table( dataset_name, table_name, table_schema, rows) else: # table has cdc updates # create target table if it doesn't exist if not self.target_db_conn.does_table_exist( dataset_name, table_name): # FUTURE: Add udp_pk, udp_nk, udp_nstk and other extended columns logger.info(f"Creating table: {dataset_name}.{table_name}") self.target_db_conn.create_table_from_table_schema( dataset_name, table_name, table_schema, extended_definitions) # create temp table to receive captured changes # FUTURE: Create a database wrapper function for creating 'portable' temp table names vs hard-coding '#'. temp_table_name = f"_{table_name}" self.target_db_conn.drop_table(dataset_name, temp_table_name) self.target_db_conn.create_table_from_table_schema( dataset_name, temp_table_name, table_schema, extended_definitions) # insert captured updates into temp table work_folder_obj = pathlib.Path(self.work_folder) batch_number = 0 for json_file in sorted( work_folder_obj.glob(f"{table_name}#*.json")): # load rows from json file # input_stream = open(json_file) # rows = json.load(input_stream) # input_stream.close() rows = load_jsonpickle(json_file) # insert/upsert/merge *.json into target tables if not rows: logger.info( f"Table {table_name} has 0 rows; no updates") break else: batch_number += 1 logger.info( f"Job {job_id}, batch {batch_number}, table {table_name}" ) self.progress_message( f"loading {just_file_stem(capture_file_name)}({table_name}.{batch_number:04}) ..." ) # convert date/datetime columns to date/datetime values convert_data_types(rows, table_schema) self.target_db_conn.bulk_insert_into_table( dataset_name, temp_table_name, table_schema, rows) else: # merge (upsert) temp table to target table merge_cdc = cdc_merge.MergeCDC(table_object, extended_definitions) sql_command = merge_cdc.merge(dataset_name, table_pk) # TODO: Capture SQL commands in a sql specific log. logger.debug(sql_command) self.target_db_conn.cursor.execute(sql_command) # drop temp table after merge self.target_db_conn.drop_table(dataset_name, temp_table_name)
def test_is_file(): setup_test_files() assert is_file(f'{test_folder_path}/readonly.txt') is True assert is_file(f'{test_folder_path}/ThisDoesNotExist.txt') is False teardown_test_files()