Exemplo n.º 1
0
    def __init__(self, output_txt=False, metapickle_path=None):
        # File locations
        self.root_dir = settings.root_dir
        self.pickle_source = settings.Qs_raw_pickles_dir
        self.pickle_destination = settings.Qs_merged_pickles_dir
        self.txt_destination = settings.Qs_merged_txt_dir
        self.log_filepath = pjoin(settings.root_dir, 'log-files',
                                  'QsPickleProcessor.txt')
        self.metapickle_path = metapickle_path
        self.statspickle_name = "Qs_summary_stats"
        self.output_txt = output_txt

        # tolerance for difference between files
        # This value is more to highlight very different dataframes than have
        # any physical meaning.
        self.difference_tolerance = 0.02

        # Start up logger
        self.logger = logger.Logger(self.log_filepath, default_verbose=True)
        hm.ensure_dir_exists(self.pickle_destination, self.logger)
        self.logger.write(["Begin Qs Pickle Processor output", asctime()])

        # Start up loader
        self.loader = data_loading.DataLoader(self.pickle_source,
                                              self.pickle_destination,
                                              self.logger)
Exemplo n.º 2
0
    def __init__(self, root_dir, output_dir):
        log_filepath = pjoin(root_dir, 'log-files', 'QsExtractor.txt')
        logger = hlp_logger.Logger(log_filepath, default_verbose=True)
        hlp_crawler.Crawler.__init__(self, logger)

        self.set_root(root_dir)
        self.output_dir = output_dir
        ensure_dir_exists(output_dir, logger)

        self.loader = data_loading.DataLoader(root_dir, output_dir, logger)
Exemplo n.º 3
0
    def __init__(self, source_dir, destination_dir=None, logger=None):
        self.source_dir = source_dir
        self.logger =  logger_module.Logger(None)if logger is None else logger
        self.logger.write(["DataLoader created",
                          f"Source dir is {self.source_dir}",
                          ])

        self.destination_dir = destination_dir
        if destination_dir is not None:
            ensure_dir_exists(self.destination_dir, self.logger)
            self.logger.write([f"Destination dir is {self.destination_dir}"])
Exemplo n.º 4
0
    def __init__(self, **kwargs):
        check = kwarg_checker.get_check_kwarg_fu(kwargs)

        self.figure_extension = check('figure_extension', 'png')
        self.logger = check('logger', None)
        self.debug_mode = check('debug_mode', False)

        self.figure_root_dir = check('figure_root_dir', './')
        sub_dir = check('figure_sub_dir', '')
        self.figure_dir = pjoin(self.figure_root_dir, sub_dir)

        hpm.ensure_dir_exists(self.figure_root_dir)
        hpm.ensure_dir_exists(self.figure_dir)
Exemplo n.º 5
0
    def __init__(self,
                 log_filepath="./log-crawler.txt",
                 default_verbose=True,
                 no_log=False):
        self.verbose = default_verbose
        self.global_indent = 0
        self.indent_str = 4 * ' '
        self.no_log = no_log or log_filepath is None or "" == log_filepath
        self.start_time = asctime()

        if self.no_log:
            print("Logger is set to not log")

        else:
            self.log_filepath = log_filepath
            ensure_dir_exists(os.path.split(log_filepath)[0], logger=self)

        self.write_section_break()
        self.write_section_break()
        self.write(f"Begin Logger run output at {self.start_time}")
Exemplo n.º 6
0
    def save_txt(self, data, filename, kwargs={}, is_path=False):
        filepath = self._get_filepath(filename, is_path)
        if is_path:
            ensure_dir_exists(os.path.split(filepath)[0])

        # Some default parameters
        keys = kwargs.keys()
        if 'sep' not in keys:
            kwargs['sep'] = '\t'
        if 'na_rep' not in keys:
            kwargs['na_rep'] = 'NaN'
        if 'float_format' not in keys:
            kwargs['float_format'] = "%8.3f"
        if 'index' not in keys:
            kwargs['index'] = False
        if 'header' not in keys:
            kwargs['header'] = True

        with open(filepath, mode='tw') as txt_file:
            data.to_csv(txt_file, **kwargs)
Exemplo n.º 7
0
#!/usr/bin/env python3

from os.path import join as pjoin
from helpyr.helpyr_misc import ensure_dir_exists

root_dir = "/home/alex/hacking/Qs_merger/tests/test_data"
#root_dir = "E:\LT_Qs_Combine\LT_Results" # Windows style path
#root_dir = "/home/alex/ubc/feed-timing/data" # Unix style path

lighttable_bedload_cutoff = 800  # g/s max rate

output_dir = pjoin(root_dir, "Qs-merger-output")
Qs_raw_pickles_dir = pjoin(output_dir, "raw-pickles")
Qs_merged_pickles_dir = pjoin(output_dir, "merged-pickles")
Qs_merged_txt_dir = pjoin(output_dir, "merged-txts")

ensure_dir_exists(output_dir)
ensure_dir_exists(Qs_raw_pickles_dir)
ensure_dir_exists(Qs_merged_pickles_dir)
ensure_dir_exists(Qs_merged_txt_dir)

metapickle_name = 'Qs_metapickle'
Exemplo n.º 8
0
    def save_xlsx(self, data, filename, key_order=None, save_kwargs={}, add_path=False):
        # Save a dictionary of pandas objects to an excel sheet. Each entry in 
        # the dictionary is saved as a separate sheet with the dict key being 
        # the sheet name. The sheet order can be specified with key_order.

        # Get the path
        filepath = self._get_filepath(filename, add_path)
        if not add_path:
            ensure_dir_exists(os.path.split(filepath)[0])

        # Get the key order. 
        if isinstance(data, dict):
            if key_order is None:
                key_order = list(data.keys()).sort()
        else:
            # Assume a single dataframe. Convert into a dict
            if key_order is None:
                key = 'Sheet1'
                key_order = [key]
            elif not isinstance(key_order, list):
                key_order = [key_order]
            data = {key_order[0] : data}

        ## DEBUG
        #print(f"DEBUGGING xlsx export with a reduced data set")
        #key_order = key_order[:2]

        self.logger.write(f"Saving data to file {os.path.split(filepath)[-1]}")
        self.logger.increase_global_indent()
        min_col_width = 5
        with pd.ExcelWriter(filepath) as xlsx_file:
            for sheet_key in key_order:
                # Write data
                self.logger.write(f"Writing to sheet {sheet_key}")
                #self.logger.write(f"DEBUGGING ONLY WRITING DATA HEAD")
                #data[sheet_key].head().to_excel(xlsx_file, 
                data[sheet_key].to_excel(xlsx_file, 
                        sheet_name=sheet_key, 
                        **save_kwargs)

                # Fix the column widths
                i_cols = list(data[sheet_key].index.names)
                d_cols = list(data[sheet_key].columns)
                cols = i_cols + d_cols
                #xlsx_cols = [x + y for x in ['', 'A', 'B'] for y in [
                #    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L',
                #    'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
                #    'Y', 'Z' ]
                #    ]
                #xlsx_cols = xlsx_cols[:len(cols)]

                sheet = xlsx_file.sheets[sheet_key]
                #for c, xc in zip(cols, xlsx_cols):
                for xc, c in enumerate(cols):
                    width = len(str(c)) + 0.5
                    width += 2 if c == 'limb' else 0
                    width = width if width > min_col_width else min_col_width
                    #print(f"Fixing col {xc}:{xc} to {width}")
                    #sheet.set_column(f"{xc}:{xc}", width)
                    sheet.set_column(xc, xc, width)
            self.logger.write(f"Wrapping things up...")

        self.logger.decrease_global_indent()
        self.logger.write(f"Finished saving!")
Exemplo n.º 9
0
    def save_figure(self, **kwargs):
        """

        Save a figure function. Provides convenient filename assembly options. 

        Kwargs:
        'figure_name' is a complete name for a figure. It will be used directly 
        as the filename. Can't define with 'figure_name_parts'.

        'figure_name_parts' or 'fig_name_parts' is the ordered sequence of file
        name chunks. The chunks will be joined with '_' to generate the file 
        name. Can't define with 'figure_name'.

        'sep' is the separator used to join the figure_name_parts

        'figure' is the handle for the figure to save.

        'alt_subdir' is the optional alternate subdirectory to use instead of the one
        provided in the constructor.

        Any unused kwargs are passed to plt.savefig or fig.savefig

        """
        kwargs_copy = kwargs.copy()
        check = kwarg_checker.get_check_kwarg_fu(kwargs_copy, pop=True)

        figure_name = check('figure_name', None)
        figure_name_parts = check('figure_name_parts', None)
        fig_name_parts = check('fig_name_parts', None)
        sep = check('sep', '_')
        figure = check('figure', None)
        alt_subdir = check('alt_subdir', None)

        try:
            assert ((figure_name is not None) ^ (figure_name_parts is not None)
                    ^ (fig_name_parts is not None))  # XOR
        except AssertionError:
            print(figure_name)
            print(figure_name_parts)
            print(fig_name_parts)
            raise

        if figure_name is None:
            if figure_name_parts is None:
                figure_name = sep.join(fig_name_parts)
            else:
                figure_name = sep.join(figure_name_parts)

        # Check if an alternate subdirectory is desired
        if alt_subdir is None:
            destination_dir = self.figure_dir
        else:
            destination_dir = pjoin(self.figure_root_dir, alt_subdir)
            hpm.ensure_dir_exists(destination_dir)

        filename = f"{figure_name}.{self.figure_extension}"
        filepath = pjoin(destination_dir, filename)

        if self.debug_mode:
            msgs = ["!!!", f"   Not saving figure to {filepath}", "!!!"]
            if self.logger is None:
                for msg in msgs:
                    print(msg)
            else:
                self.logger.write(msgs)
        else:
            msg = f"Saving figure to {filepath}"
            if self.logger is None:
                print(msg)
            else:
                self.logger.write(msg)

            if figure is not None:
                # Save target figure
                figure.savefig(filepath,
                               orientation='landscape',
                               **kwargs_copy)
            else:
                # Save current figure
                plt.savefig(filepath, orientation='landscape', **kwargs_copy)