예제 #1
0
def test_messenger_indentation_with(capfd):

    printer = Messenger(verbose=True, indent=2, msg_fn=print, end="")

    # Temporarily change indentation to 6 spaces
    with printer.indentation(indent=6):
        printer("some string")

    out, err = capfd.readouterr()
    assert out == "      some string"

    # Back to original indentation
    printer("some string")
    out, err = capfd.readouterr()
    assert out == "  some string"

    # Temporarily add 3 spaces to original indentation
    with printer.indentation(add_indent=3):
        printer("some string")

    # Gives 5 spaces of indentation
    out, err = capfd.readouterr()
    assert out == "     some string"

    # Back to original indentation
    printer("some string")
    out, err = capfd.readouterr()
    assert out == "  some string"
예제 #2
0
def test_messenger_print(capfd):

    printer = Messenger(verbose=True, indent=2, msg_fn=print)

    # Default indentation
    printer("Ma name is not John", "Ma name is James",
            "I shall not repeat this")

    out, err = capfd.readouterr()
    assert out == "  Ma name is not John Ma name is James I shall not repeat this\n"

    # Override indentation
    printer("Ma name is not John",
            "Ma name is James",
            "I shall not repeat this",
            indent=4)

    out, err = capfd.readouterr()
    assert out == "    Ma name is not John Ma name is James I shall not repeat this\n"

    # Disable verbosity
    printer("Ma name is not John",
            "Ma name is James",
            "I shall not repeat this",
            verbose=False)

    out, err = capfd.readouterr()
    assert out == ""
예제 #3
0
def print_nan_stats(
        x: Union[np.ndarray, pd.DataFrame],
        message: str,
        messenger: Optional[Callable] = Messenger(
            verbose=True, indent=0, msg_fn=print),
        indent: Optional[int] = None) -> None:
    """
    Print statistics about NaNs in an array.

    Parameters
    ----------
    x : `numpy.ndarray` or `pandas.DataFrame`
        The array / data frame to count NaNs in.
    message : str
        The message prior to the stats. Full message becomes:
            `indentation + message + ": " + num NaNs (percentage)` 
    messenger : `utipy.Messenger` or None
        A `utipy.Messenger` instance used to print/log/... information.
        When `None`, no printing/logging is performed.
        The messenger determines the messaging function (e.g. `print` or `log.info`)
        and indentation when `indent` is `None`.
    indent : int
        Indentation of message. When `None`, indentation is determined by `messenger`.
    """
    messenger = check_messenger(messenger)
    num_nans, perc = nan_stats(x)
    messenger(f"{message}: {num_nans} ({perc}%)", indent=indent)
예제 #4
0
def test_messenger_logger(capfd, caplog):

    logging.basicConfig()
    _logger = logging.getLogger("Mr.Logger")
    _logger.setLevel(logging.INFO)
    logger = Messenger(verbose=True, indent=2, msg_fn=_logger.info)

    # Default indentation
    logger("Ma name is not John")

    def get_last_log_message():
        for rec in caplog.records:
            pass
        return rec.getMessage()

    assert get_last_log_message() == "  Ma name is not John"

    # Override indentation
    logger("Ma name is not John", indent=4)

    assert get_last_log_message() == "    Ma name is not John"

    # Multiple args
    logger("Ma name is not John", "Ma name is James", indent=4)

    assert get_last_log_message() == "    Ma name is not John Ma name is James"

    # Disable verbosity
    logger("Ma name is not John", verbose=False)

    out, err = capfd.readouterr()
    assert out == ""
예제 #5
0
def mk_dir(path: Union[str, pathlib.Path],
           arg_name: Union[str, None] = "",
           raise_on_exists: bool = False,
           messenger: Optional[Callable] = Messenger(verbose=True,
                                                     indent=0,
                                                     msg_fn=print)):
    """
    Make directory if it doesn't exist.

    Parameters
    ----------
    path : str or `pathlib.Path`
        Path to directory to make.
    arg_name : str or None
        Name of path argument/variable for message 
        when creating a directory and `messenger.verbose` is `True`.
    raise_on_exists : bool
        Whether to raise a `FileExistsError` when the directory already exists.
    messenger : `utipy.Messenger` or None
        A `utipy.Messenger` instance used to print/log/... information.
        When `None`, no printing/logging is performed.
        The messenger determines the messaging function (e.g. `print`)
        and potential indentation.
    """
    path = pathlib.Path(path)
    path_exists = path.exists()

    # Prepare arg name
    arg_name = _prep_arg_name(arg_name)

    # Check messenger (always returns Messenger instance)
    messenger = check_messenger(messenger)

    # Fail for existing directory (when specified)
    # Or exit function
    if path_exists:
        if raise_on_exists:
            raise FileExistsError(
                f"{arg_name}directory already exists: {path.resolve()}")
        return

    # Message user about the creation of a new directory
    messenger(f"{arg_name}directory does not exist and will be created: "
              f"{path.resolve()}")

    # Create new directory if it does not already exist
    try:
        path.mkdir(parents=True, exist_ok=not raise_on_exists)
    except FileExistsError as e:
        # In this case, the directory was likely created between
        # our existence check and our creation attempt
        if raise_on_exists:
            raise FileExistsError(
                f"{arg_name}directory already exists: {path.resolve()}")
예제 #6
0
def rm_dir(path: Union[str, pathlib.Path],
           arg_name: Union[str, None] = "",
           raise_missing: bool = False,
           raise_not_dir: bool = True,
           shutil_ignore_errors: bool = False,
           shutil_onerror: Optional[Callable] = None,
           messenger: Optional[Callable] = Messenger(verbose=True,
                                                     indent=0,
                                                     msg_fn=print)):
    """
    Remove directory and its contents if it exists using `shutil.rmtree()`.

    Parameters
    ----------
    path : str or `pathlib.Path`
        Path to directory to remove.
    arg_name : str or None
        Name of path argument/variable for message 
        when creating a directory and `messenger.verbose` is `True`.
    raise_missing : bool
        Whether to raise a RuntimeError when the directory does not exist.
    raise_not_dir : bool
        Whether to raise a RuntimeError when the path is not to a directory.
    shutil_ignore_errors : bool
        Passed to the `ignore_errors` argument in `shutil.rmtree()`.
    shutil_onerror : bool
        Passed to the `onerror` argument in `shutil.rmtree()`.
    messenger : `utipy.Messenger` or None
        A `utipy.Messenger` instance used to print/log/... information.
        When `None`, no printing/logging is performed.
        The messenger determines the messaging function (e.g. `print`)
        and potential indentation.
    """
    path = pathlib.Path(path)
    path_exists = path.exists()

    # Prepare arg name
    arg_name = _prep_arg_name(arg_name)

    # Check messenger (always returns Messenger instance)
    messenger = check_messenger(messenger)

    if raise_missing and not path_exists:
        raise RuntimeError(f"{arg_name}path did not exist: {path}")

    if path_exists and raise_not_dir and not path.is_dir():
        raise RuntimeError(f"{arg_name}path was not a directory: {path}")

    if path_exists and path.is_dir():
        # Message user about the removal of the directory
        messenger(f"{arg_name}directory will be removed: " f"{path.resolve()}")
        shutil.rmtree(path,
                      ignore_errors=shutil_ignore_errors,
                      onerror=shutil_onerror)
예제 #7
0
def test_check_messenger():

    messenger = Messenger(verbose=True, indent=2, msg_fn=print)
    assert check_messenger(messenger) is messenger

    class SubMessenger(Messenger):
        def __init__(self) -> None:
            super().__init__(verbose=True, msg_fn=print, indent=0)

    # Works with subclasses of Messenger
    submessenger = SubMessenger()
    assert check_messenger(submessenger) is submessenger

    # None should return Messenger with `verbose=False`
    assert isinstance(check_messenger(None), Messenger)
    assert not check_messenger(None).verbose
예제 #8
0
    def rm_dir(
        self,
        name: str,
        rm_paths: bool = True,
        raise_on_fail: bool = True,
        messenger: Optional[Callable] = Messenger(verbose=True,
                                                  indent=0,
                                                  msg_fn=print)
    ) -> None:
        """
        Remove a directory from disk.

        Parameters
        ----------
        name : str
            Name of path to a directory to remove from disk.
        rm_paths : bool
            Whether to remove all paths that are within the 
            removed directory as well as the path to the 
            directory itself.
            NOTE: For files that need to exist (e.g. those in the `in_files` collection),
            leaving the path after removing the file will cause downstream
            checking of the paths (see `.check_paths()`) will fail 
            (as we removed the files). Those checks are called as part of 
            some of the methods.
        raise_on_fail : bool
            Whether to raise an error when the path does not exist.
        messenger : `utipy.Messenger` or None
            A `utipy.Messenger` instance used to print/log/... information.
            When `None`, no printing/logging is performed.
            The messenger determines the messaging function (e.g. `print`)
            and potential indentation.
        """
        path = self[name]
        if path is None:
            raise ValueError(f"Path object for `{name}` was `None`.")
        remove_dir(path=path,
                   arg_name=f'{name} path',
                   raise_missing=raise_on_fail,
                   raise_not_dir=raise_on_fail,
                   messenger=messenger)
        if rm_paths:
            self.rm_paths_in_dir(dir_path=path, rm_dir=True)
예제 #9
0
    def rm_tmp_dirs(
        self,
        rm_paths: bool = True,
        raise_on_fail: bool = True,
        messenger: Optional[Callable] = Messenger(verbose=True,
                                                  indent=0,
                                                  msg_fn=print)
    ) -> None:
        """
        Remove all temporary directories from disk.

        Parameters
        ----------
        rm_paths : bool
            Whether to remove all paths that are within the 
            removed directories and the paths to the directories 
            themselves.
        raise_on_fail : bool
            Whether to raise an error when the path does not exist.
        messenger : `utipy.Messenger` or None
            A `utipy.Messenger` instance used to print/log/... information.
            When `None`, no printing/logging is performed.
            The messenger determines the messaging function (e.g. `print`)
            and potential indentation.
        """

        # TODO In case they are nested, we should check their existence
        # before deleting some of the directories, as that might
        # delete the existing ones
        # (I.e. find the top-level tmp dirs and remove those, and don't
        # try to remove those contained in them)

        # Delete each path in `tmp_dirs``
        for path in self.get_collection(name="tmp_dirs").keys():
            self.rm_dir(name=path,
                        raise_on_fail=raise_on_fail,
                        messenger=messenger)
            if rm_paths:
                self.rm_paths_in_dir(dir_path=path, rm_dir=True)
예제 #10
0
def test_messenger_kwargs(capfd):

    # `end` is a kwarg
    # Normally `end='\n'` for `print()`
    printer = Messenger(verbose=True, indent=2, msg_fn=print, end="")

    # Print with the kwargs set during initialization
    printer("some string")

    out, err = capfd.readouterr()
    assert out == "  some string"

    # Print with call-specific kwargs
    printer("some string", end=" - Dudley")

    out, err = capfd.readouterr()
    assert out == "  some string - Dudley"

    # Check that defaults did not change
    printer("some string")

    out, err = capfd.readouterr()
    assert out == "  some string"
예제 #11
0
    def mk_output_dir(self,
                      name: str,
                      messenger: Optional[Callable] = Messenger(verbose=True,
                                                                indent=0,
                                                                msg_fn=print)):
        """
        Create non-existing output directory for a given path.

        For filepaths, it creates the directory the file is located in.

        Parameters
        ----------
        name : str
            Name of path to create output directory for.
        messenger : `utipy.Messenger` or None
            A `utipy.Messenger` instance used to print/log/... information.
            When `None`, no printing/logging is performed.
            The messenger determines the messaging function (e.g. `print`)
            and potential indentation.
        """
        path = self.get_path(name=name)
        dir_path = pathlib.Path(path).parent
        mk_dir(path=dir_path, arg_name=name, messenger=messenger)
예제 #12
0
def drop(data,
         value='NaN',
         thresh=0,
         direction='>',
         axis=0,
         include=None,
         exclude=None,
         copy=True,
         messenger: Optional[Callable] = Messenger(verbose=True,
                                                   indent=0,
                                                   msg_fn=print)):
    """
    Drop rows or columns from pandas DataFrame based on values.

    Drop rows / columns if specific value, or any value, is represented too much, too little, etc.

    Commands:
        'Drop [axis] if [value] appears [direction] than [thresh] percent of the time.'
    E.g.:
        'Drop columns if 0 appears more than 90 percent of the time.'
        'Drop rows if *any* value appears exactly 77 percent of the time.'


    Parameters
    ----------
    data : pd.DataFrame
        The data to distort.
    value : str / int / float
        The value to match.
            Regular value, 
            'any',
            'NaN',
            'inf'
    thresh : float
        Threshold.
        Percentage between 0-1.
    direction : str
        Operator sign for comparison.
            '>', '<', '>=', '<=', '=='.
    axis : int
        0 for columns, 1 for rows.
    include : list of strings
        Names of columns / indices of rows to search within. 
        None means ALL are included unless otherwise specified, see *exclude*.
    exclude : list of strings
        Names of columns / indices of rows NOT to search within.
        None means no columns/rows are excluded unless otherwise specified, see *include*.
    messenger : `utipy.Messenger` or None
        A `utipy.Messenger` instance used to print/log/... information.
        When `None`, no printing/logging is performed.
        The messenger determines the messaging function (e.g. `print`)
        and potential indentation.


    Returns
    -------
    pd.DataFrame


    Examples
    --------

    Uncomment code to run.

    Remove all rows with any NaNs in dependent variable 'y'
    # drop(data, value = 'NaN', axis = 1, thresh = 0, 
    #      direction = '>', cols = ['y'])

    Remove all columns with only 1 unique value.
    I.e. the same value in 100% of the rows.
    # drop(data, value = 'any', axis = 0, thresh = 1, 
    #      direction = '==')

    Remove all columns that have less than 30% NaNs
    # drop(data, value = 'NaN', axis = 0, thresh = 0.3, 
    #      direction = '<')

    """

    # Check messenger (always returns Messenger instance)
    messenger = check_messenger(messenger)

    if value is None:
        raise ValueError('value cannot be None.')

    if axis not in [0, 1]:
        raise ValueError("`axis` must be 0 or 1")

    # Create copy of dataframe
    if copy:
        data = data.copy()

    if exclude is not None and include is not None:
        raise ValueError("Either include or exclude must be None.")

    # Columns
    if axis == 0:

        if exclude is not None:
            # Create include list
            include = [col for col in data.columns if col not in exclude]

        if include is not None:
            # Subset dataframe to only work on included cols
            data_cols = data.filter(items=include)

            # Find columns / rows to drop
            to_drop = _find_exceeders(data_cols,
                                      value,
                                      thresh,
                                      direction,
                                      axis=axis)
        else:
            # Find columns / rows to drop
            to_drop = _find_exceeders(data,
                                      value,
                                      thresh,
                                      direction,
                                      axis=axis)

        # Drop columns
        messenger(f'Dropped {len(to_drop)} columns.')

        return data.drop(to_drop, axis=1)

    # Rows
    elif axis == 1:
        # Find columns / rows to drop
        to_drop = _find_exceeders(data, value, thresh, direction, axis=axis)

        # Remove indices not in include or in exclude
        # TODO use sets instead
        if exclude is not None:
            to_drop = [i for i in to_drop if i not in exclude]
        elif include is not None:
            to_drop = [i for i in to_drop if i in include]

        # Drop rows
        messenger(f'Dropped {len(to_drop)} rows.')

        return data.drop(data.index[to_drop], axis=0)
예제 #13
0
    def mk_output_dirs(self,
                       collection: str = None,
                       messenger: Optional[Callable] = Messenger(
                           verbose=True, indent=0, msg_fn=print)):
        """
        Create non-existing output directories.

        For filepaths, it creates the directory the file is located in.

        Parameters
        ----------
        collection : str
            Name of collection to create output directories for.
                One of: ('out_dirs', 'out_files', 'tmp_files', 'mkdirs_for_tmp_dirs')
            When `None`, directories are created for all three collections.
        messenger : `utipy.Messenger` or None
            A `utipy.Messenger` instance used to print/log/... information.
            When `None`, no printing/logging is performed.
            The messenger determines the messaging function (e.g. `print`)
            and potential indentation.
        """

        # Find which collections to create output dirs for
        mkdirs_for_out_files = True
        mkdirs_for_out_dirs = True
        mkdirs_for_tmp_files = True
        mkdirs_for_tmp_dirs = True
        if collection is not None:
            if collection not in [
                    "out_files", "out_dirs", "tmp_files", "tmp_dirs"
            ]:
                raise ValueError(
                    f"`collection` must be one of the output path collections but was {collection}."
                )
            if collection != "out_files":
                mkdirs_for_out_files = False
            if collection != "out_dirs":
                mkdirs_for_out_files = False
            if collection != "tmp_files":
                mkdirs_for_tmp_files = False
            if collection != "tmp_dirs":
                mkdirs_for_tmp_dirs = False

        # Create output directories if they don't exist

        # For output directories
        if mkdirs_for_out_dirs:
            out_dirs = self.get_collection("out_dirs")
            if out_dirs is None:
                raise ValueError("`out_dirs` collection was `None`.")
            for k, v in out_dirs.items():
                mk_dir(path=v, arg_name=k, messenger=messenger)

        # For output files' directories
        if mkdirs_for_out_files:
            out_files = self.get_collection("out_files")
            if out_files is None:
                raise ValueError("`out_files` collection was `None`.")
            for k, v in out_files.items():
                # Get directory the file should be place in
                dir_path = pathlib.Path(v).parent
                mk_dir(path=dir_path, arg_name=k, messenger=messenger)

        # For tmp directories
        if mkdirs_for_tmp_dirs:
            tmp_dirs = self.get_collection("tmp_dirs")
            if tmp_dirs is None:
                raise ValueError("`tmp_dirs` collection was `None`.")
            for k, v in tmp_dirs.items():
                mk_dir(path=v, arg_name=k, messenger=messenger)

        # For tmp files' directories
        if mkdirs_for_tmp_files:
            tmp_files = self.get_collection("tmp_files")
            if tmp_files is None:
                raise ValueError("`tmp_files` collection was `None`.")
            for k, v in tmp_files.items():
                # Get directory the file should be place in
                dir_path = pathlib.Path(v).parent
                mk_dir(path=dir_path, arg_name=k, messenger=messenger)