def __rename_files_in_a_folder(list_of_files, folder_path, new_filename): """ Renames files within a folder to a new name. The files retain their pathname (they stay in the same folder) and retain their file extension. Args: list_of_files (list of strings): a list of filenames (without the path but WITH the extension) folder_path (string): the full pathname to the folder that is storing the files in the list_of_files list new_filename (string): the new filename for the files in the list_of_files list. All files will be renamed to this same value. Returns: None. Raises: None. """ # Iterate over the files to be renamed. for existing_file in list_of_files: # Get the full path of the existing file existing_path = os.path.join(folder_path, existing_file) # Get the file extension of the existing file existing_extension = io_util.get_extension(existing_path) # Create the full path of the renamed file. If an extension was included in the original filename, then that # same extension is included in the new filename. new_path = os.path.join( folder_path, "{}{}".format(new_filename, existing_extension)) os.rename(existing_path, new_path)
def __should_write_table(self, table_id, output_file_abs): """ Checks the following: * the ID of the Table is an existing Table ID * the output folder is a valid folder Args: table_id: the ID of the Table to be written output_file_abs: the full pathname to the output file Returns: run_write: Boolean. If TRUE, the writing process should be run. If FALSE, it should not be run. """ # List of Boolean values. The Boolean values correspond to the results of the following tests. If TRUE, the # test confirms that the command should be run. should_run_command = [] # If the Table ID is not an existing Table ID, raise a FAILURE. should_run_command.append( validators.run_check(self, "IsTableIdExisting", "TableID", table_id, "FAIL")) # Get the full path to the output folder output_folder_abs = io_util.get_path(output_file_abs) # If the output folder is not an existing folder, raise a FAILURE. should_run_command.append( validators.run_check(self, "IsFolderPathValid", "OutputFile", output_folder_abs, "FAIL")) # Continue if the output file is an existing file. if os.path.exists(output_folder_abs): if io_util.get_extension(output_file_abs).upper() == ".XLS": message = 'At the current time, a Table object cannot be appended to or overwrite an existing Excel ' \ 'file in XLS format.' recommendation = "Update the XLS file ({}) to an XLSX file or write the table " \ "to a new XLS file.".format(output_file_abs) self.warning_count += 1 self.logger.error(message) self.command_status.add_to_log( CommandPhaseType.RUN, CommandLogRecord(CommandStatusType.FAILURE, message, recommendation)) should_run_command.append(False) # Return the Boolean to determine if the process should be run. if False in should_run_command: return False else: return True
def __get_default_file_type(file_path): """ Helper function to get the default FileType parameter value. Arg: file_path: the absolute path to the input File parameter Returns: The default FileType parameter value. Returns None if the file extension does not correlate with a compatible FileType. """ # A dictionary of compatible file extensions and their corresponding FileType. # key: Uppercase file extension. # value: Uppercase file type. dic = {".TAR": "TAR", ".ZIP": "ZIP"} # Iterate over the dictionary and return the FileType that corresponds to the the input file's extension. for ext, file_type in dic.items(): if io_util.get_extension(file_path).upper() == ext: return file_type # If the file extension is not recognized, return None. return None
def run_command(self): """ Run the command. Download the file from the web and save it on the local computer. Returns: None. Raises: RuntimeError if any warnings occurred during run_command method. """ # Obtain the parameter values pv_URL = self.get_parameter_value("URL") pv_OutputFile = self.get_parameter_value("OutputFile", default_value=None) pv_Username = self.get_parameter_value("Username", default_value=None) pv_Password = self.get_parameter_value("Password", default_value=None) # Convert the pv_URL parameter to expand for ${Property} syntax. url_abs = self.command_processor.expand_parameter_value(pv_URL, self) # Convert the OutputFile parameter value relative path to an absolute path. Expand for ${Property} syntax. # If the OutputFile parameter is specified, continue. if pv_OutputFile: output_file_absolute = io_util.verify_path_for_os( io_util.to_absolute_path( self.command_processor.get_property('WorkingDir'), self.command_processor.expand_parameter_value( pv_OutputFile, self))) # If the OutputFile parameter is NOT specified, continue. else: original_filename = io_util.get_filename( pv_URL) + io_util.get_extension(pv_URL) output_file_absolute = io_util.verify_path_for_os( io_util.to_absolute_path( self.command_processor.get_property('WorkingDir'), self.command_processor.expand_parameter_value( original_filename, self))) # Run the checks on the parameter values. Only continue if the checks passed. if self.__should_run_webget(output_file_absolute): try: # Get the output folder. output_folder = os.path.dirname(output_file_absolute) # Get the URL file and convert it into a request Response object # Authentication Reference: http://docs.python-requests.org/en/master/user/authentication/ r = requests.get(url_abs, auth=HTTPBasicAuth(pv_Username, pv_Password), verify=False, stream=True) # Get the filename of the URL and the output file url_filename = io_util.get_filename(url_abs) output_filename = io_util.get_filename(output_file_absolute) # Remove the output file if it already exists. if os.path.exists(output_file_absolute): os.remove(output_file_absolute) # If the URL file is a zip file, process as a zip file. if zip_util.is_zip_file_request(r): # Create an empty list to hold the files that were downloaded/extracted to the output folder. downloaded_files = [] with open( os.path.join(output_folder, "{}.zip".format(url_filename)), "wb") as downloaded_zip_file: downloaded_zip_file.write(r.content) downloaded_files.append("{}.zip".format(url_filename)) # Determine if the downloaded zip file(s) should be renamed. If the filename is %f then the # filenames of the downloaded products should be the same as the url filenames if not output_filename == '%f': self.__rename_files_in_a_folder( list_of_files=downloaded_files, folder_path=output_folder, new_filename=output_filename) else: # Download the file to the output folder. with open( os.path.join(output_folder, os.path.basename(url_abs)), "wb") as downloaded_file: downloaded_file.write(r.content) # Determine if the downloaded file should be renamed. If the filename is %f then the filename # of the downloaded product should be the same as the url filename if not output_filename == '%f': self.__rename_files_in_a_folder( list_of_files=[os.path.basename(url_abs)], folder_path=output_folder, new_filename=output_filename) # Raise an exception if an unexpected error occurs during the process except Exception as e: self.warning_count += 1 message = "Unexpected error downloading file from URL {}.".format( url_abs) recommendation = "Check the log file for details." self.logger.error(message, exc_info=True) self.command_status.add_to_log( CommandPhaseType.RUN, CommandLogRecord(CommandStatusType.FAILURE, message, recommendation)) # Determine success of command processing. Raise Runtime Error if any errors occurred if self.warning_count > 0: message = "There were {} warnings proceeding this command.".format( self.warning_count) raise RuntimeError(message) # Set command status type as SUCCESS if there are no errors. else: self.command_status.refresh_phase_severity( CommandPhaseType.RUN, CommandStatusType.SUCCESS)
def write_df_to_excel(df, excel_workbook_path, excel_worksheet_name, include_col_list, include_index): """ Writes a pandas data frame object to an excel file. Args: df (object): the pandas data frame object to write excel_workbook_path (str): the full pathname to an excel workbook (either existing or non-existing) excel_worksheet_name (str): the worksheet name to write to (either existing or non-existing) include_col_list (list of strings): A list of Table columns to write to the excel file include_index (bool): If TRUE, write out column names. If FALSE, do not write column names. Returns: None """ # Removes the default styling of the table (provided in the pandas library). The package has been moved over time. try: import pandas.io.formats.excel pandas.io.formats.excel.header_style = None except: pass try: import pandas.formats.format pandas.formats.format.header_style = None except: pass try: import pandas.core.format pandas.core.format.header_style = None except: pass # If the output excel file already exists, take into consideration the current file format and the current # worksheets. if os.path.exists(excel_workbook_path): # TODO egiles 2018-04-25 Currently this function does not work. Need to fix. # Write the table to an existing excel file in XLS format. if io_util.get_extension(excel_workbook_path).upper() == ".XLS": # Set the writer object. writer = pd.ExcelWriter(excel_workbook_path, engine = 'xlwt') book = xlwt.Workbook(excel_workbook_path) writer.book = book # Write the df to the excel workbook with the given worksheet name. df.to_excel(writer, sheet_name=excel_worksheet_name, index=include_index, columns=include_col_list) writer.save() # Write the table to an existing excel file in XLSX format. else: # REf: https://stackoverflow.com/questions/20219254/ # how-to-write-to-an-existing-excel-file-without-overwriting-data-using-pandas # Set the writer object. writer = pd.ExcelWriter(excel_workbook_path, engine="openpyxl") # If applicable, inform the writer object of the already-existing excel workbook. book = load_workbook(excel_workbook_path) writer.book = book # If applicable, inform the writer object of the already-existing excel worksheets. writer.sheets = dict((ws.title, ws) for ws in book.worksheets) # Write the df to the excel workbook with the given worksheet name. df.to_excel(writer, sheet_name=excel_worksheet_name, index=include_index, columns=include_col_list) writer.save() # If the output excel file does not already exists, configure which excel file version to use. else: # Set the writer object. writer = pd.ExcelWriter(excel_workbook_path) # Write the df to the excel workbook with the given worksheet name. df.to_excel(writer, sheet_name=excel_worksheet_name, index=include_index, columns=include_col_list) writer.save()