示例#1
0
    def __rename_files_in_a_folder(list_of_files, folder_path, new_filename):
        """

        Renames files within a folder to a new name. The files retain their pathname (they stay in the same folder)
        and retain their file extension.

        Args:
            list_of_files (list of strings): a list of filenames (without the path but WITH the extension)
            folder_path (string): the full pathname to the folder that is storing the files in the list_of_files list
            new_filename (string): the new filename for the files in the list_of_files list. All files will be renamed
                to this same value.

        Returns:
             None.

        Raises:
            None.
        """

        # Iterate over the files to be renamed.
        for existing_file in list_of_files:

            # Get the full path of the existing file
            existing_path = os.path.join(folder_path, existing_file)

            # Get the file extension of the existing file
            existing_extension = io_util.get_extension(existing_path)

            # Create the full path of the renamed file. If an extension was included in the original filename, then that
            # same extension is included in the new filename.
            new_path = os.path.join(
                folder_path, "{}{}".format(new_filename, existing_extension))
            os.rename(existing_path, new_path)
示例#2
0
    def __should_write_table(self, table_id, output_file_abs):
        """
       Checks the following:
       * the ID of the Table is an existing Table ID
       * the output folder is a valid folder

       Args:
           table_id: the ID of the Table to be written
           output_file_abs: the full pathname to the output file

       Returns:
           run_write: Boolean. If TRUE, the writing process should be run. If FALSE, it should not be run.
       """

        # List of Boolean values. The Boolean values correspond to the results of the following tests. If TRUE, the
        # test confirms that the command should be run.
        should_run_command = []

        # If the Table ID is not an existing Table ID, raise a FAILURE.
        should_run_command.append(
            validators.run_check(self, "IsTableIdExisting", "TableID",
                                 table_id, "FAIL"))

        # Get the full path to the output folder
        output_folder_abs = io_util.get_path(output_file_abs)

        # If the output folder is not an existing folder, raise a FAILURE.
        should_run_command.append(
            validators.run_check(self, "IsFolderPathValid", "OutputFile",
                                 output_folder_abs, "FAIL"))
        # Continue if the output file is an existing file.
        if os.path.exists(output_folder_abs):

            if io_util.get_extension(output_file_abs).upper() == ".XLS":

                message = 'At the current time, a Table object cannot be appended to or overwrite an existing Excel ' \
                          'file in XLS format.'
                recommendation = "Update the XLS file ({}) to an XLSX file or write the table " \
                                 "to a new XLS file.".format(output_file_abs)

                self.warning_count += 1
                self.logger.error(message)
                self.command_status.add_to_log(
                    CommandPhaseType.RUN,
                    CommandLogRecord(CommandStatusType.FAILURE, message,
                                     recommendation))
                should_run_command.append(False)

        # Return the Boolean to determine if the process should be run.
        if False in should_run_command:
            return False
        else:
            return True
    def __get_default_file_type(file_path):
        """
        Helper function to get the default FileType parameter value.

        Arg:
            file_path: the absolute path to the input File parameter

        Returns: The default FileType parameter value. Returns None if the file extension does not correlate with
            a compatible FileType.
        """

        # A dictionary of compatible file extensions and their corresponding FileType.
        # key: Uppercase file extension.
        # value: Uppercase file type.
        dic = {".TAR": "TAR", ".ZIP": "ZIP"}

        # Iterate over the dictionary and return the FileType that corresponds to the the input file's extension.
        for ext, file_type in dic.items():
            if io_util.get_extension(file_path).upper() == ext:
                return file_type

        # If the file extension is not recognized, return None.
        return None
示例#4
0
    def run_command(self):
        """
        Run the command. Download the file from the web and save it on the local computer.

        Returns: None.

        Raises:
            RuntimeError if any warnings occurred during run_command method.
        """

        # Obtain the parameter values
        pv_URL = self.get_parameter_value("URL")
        pv_OutputFile = self.get_parameter_value("OutputFile",
                                                 default_value=None)
        pv_Username = self.get_parameter_value("Username", default_value=None)
        pv_Password = self.get_parameter_value("Password", default_value=None)

        # Convert the pv_URL parameter to expand for ${Property} syntax.
        url_abs = self.command_processor.expand_parameter_value(pv_URL, self)

        # Convert the OutputFile parameter value relative path to an absolute path. Expand for ${Property} syntax.
        # If the OutputFile parameter is specified, continue.
        if pv_OutputFile:
            output_file_absolute = io_util.verify_path_for_os(
                io_util.to_absolute_path(
                    self.command_processor.get_property('WorkingDir'),
                    self.command_processor.expand_parameter_value(
                        pv_OutputFile, self)))

        # If the OutputFile parameter is NOT specified, continue.
        else:
            original_filename = io_util.get_filename(
                pv_URL) + io_util.get_extension(pv_URL)
            output_file_absolute = io_util.verify_path_for_os(
                io_util.to_absolute_path(
                    self.command_processor.get_property('WorkingDir'),
                    self.command_processor.expand_parameter_value(
                        original_filename, self)))

        # Run the checks on the parameter values. Only continue if the checks passed.
        if self.__should_run_webget(output_file_absolute):

            try:

                # Get the output folder.
                output_folder = os.path.dirname(output_file_absolute)

                # Get the URL file and convert it into a request Response object
                # Authentication Reference: http://docs.python-requests.org/en/master/user/authentication/
                r = requests.get(url_abs,
                                 auth=HTTPBasicAuth(pv_Username, pv_Password),
                                 verify=False,
                                 stream=True)

                # Get the filename of the URL and the output file
                url_filename = io_util.get_filename(url_abs)
                output_filename = io_util.get_filename(output_file_absolute)

                # Remove the output file if it already exists.
                if os.path.exists(output_file_absolute):
                    os.remove(output_file_absolute)

                # If the URL file is a zip file, process as a zip file.
                if zip_util.is_zip_file_request(r):

                    # Create an empty list to hold the files that were downloaded/extracted to the output folder.
                    downloaded_files = []

                    with open(
                            os.path.join(output_folder,
                                         "{}.zip".format(url_filename)),
                            "wb") as downloaded_zip_file:
                        downloaded_zip_file.write(r.content)
                    downloaded_files.append("{}.zip".format(url_filename))

                    # Determine if the downloaded zip file(s) should be renamed. If the filename is %f then the
                    # filenames of the downloaded products should be the same as the url filenames
                    if not output_filename == '%f':
                        self.__rename_files_in_a_folder(
                            list_of_files=downloaded_files,
                            folder_path=output_folder,
                            new_filename=output_filename)

                else:

                    # Download the file to the output folder.
                    with open(
                            os.path.join(output_folder,
                                         os.path.basename(url_abs)),
                            "wb") as downloaded_file:
                        downloaded_file.write(r.content)

                    # Determine if the downloaded file should be renamed. If the filename is %f then the filename
                    # of the downloaded product should be the same as the url filename
                    if not output_filename == '%f':
                        self.__rename_files_in_a_folder(
                            list_of_files=[os.path.basename(url_abs)],
                            folder_path=output_folder,
                            new_filename=output_filename)

            # Raise an exception if an unexpected error occurs during the process
            except Exception as e:

                self.warning_count += 1
                message = "Unexpected error downloading file from URL {}.".format(
                    url_abs)
                recommendation = "Check the log file for details."
                self.logger.error(message, exc_info=True)
                self.command_status.add_to_log(
                    CommandPhaseType.RUN,
                    CommandLogRecord(CommandStatusType.FAILURE, message,
                                     recommendation))

        # Determine success of command processing. Raise Runtime Error if any errors occurred
        if self.warning_count > 0:
            message = "There were {} warnings proceeding this command.".format(
                self.warning_count)
            raise RuntimeError(message)

        # Set command status type as SUCCESS if there are no errors.
        else:
            self.command_status.refresh_phase_severity(
                CommandPhaseType.RUN, CommandStatusType.SUCCESS)
示例#5
0
def write_df_to_excel(df, excel_workbook_path, excel_worksheet_name, include_col_list, include_index):
    """
    Writes a pandas data frame object to an excel file.

    Args:
        df (object): the pandas data frame object to write
        excel_workbook_path (str): the full pathname to an excel workbook (either existing or non-existing)
        excel_worksheet_name (str): the worksheet name to write to (either existing or non-existing)
        include_col_list (list of strings): A list of Table columns to write to the excel file
        include_index (bool): If TRUE, write out column names. If FALSE, do not write column names.

    Returns: None
    """

    # Removes the default styling of the table (provided in the pandas library). The package has been moved over time.
    try:
        import pandas.io.formats.excel
        pandas.io.formats.excel.header_style = None

    except:
        pass

    try:
        import pandas.formats.format
        pandas.formats.format.header_style = None

    except:
        pass

    try:
        import pandas.core.format
        pandas.core.format.header_style = None

    except:
        pass

    # If the output excel file already exists, take into consideration the current file format and the current
    # worksheets.
    if os.path.exists(excel_workbook_path):

        # TODO egiles 2018-04-25 Currently this function does not work. Need to fix.
        # Write the table to an existing excel file in XLS format.
        if io_util.get_extension(excel_workbook_path).upper() == ".XLS":

            # Set the writer object.
            writer = pd.ExcelWriter(excel_workbook_path, engine = 'xlwt')

            book = xlwt.Workbook(excel_workbook_path)
            writer.book = book

            # Write the df to the excel workbook with the given worksheet name.
            df.to_excel(writer, sheet_name=excel_worksheet_name, index=include_index, columns=include_col_list)
            writer.save()

        # Write the table to an existing excel file in XLSX format.
        else:

            # REf: https://stackoverflow.com/questions/20219254/
            # how-to-write-to-an-existing-excel-file-without-overwriting-data-using-pandas

            # Set the writer object.
            writer = pd.ExcelWriter(excel_workbook_path, engine="openpyxl")

            # If applicable, inform the writer object of the already-existing excel workbook.
            book = load_workbook(excel_workbook_path)
            writer.book = book

            # If applicable, inform the writer object of the already-existing excel worksheets.
            writer.sheets = dict((ws.title, ws) for ws in book.worksheets)

            # Write the df to the excel workbook with the given worksheet name.
            df.to_excel(writer, sheet_name=excel_worksheet_name, index=include_index, columns=include_col_list)
            writer.save()

    # If the output excel file does not already exists, configure which excel file version to use.
    else:

        # Set the writer object.
        writer = pd.ExcelWriter(excel_workbook_path)

        # Write the df to the excel workbook with the given worksheet name.
        df.to_excel(writer, sheet_name=excel_worksheet_name, index=include_index, columns=include_col_list)
        writer.save()