def test_string_component_transform_factory(x, alg, example_func):
    string_component_transform_func = string_component_transform_factory(alg)
    try:
        assert string_component_transform_func(py23_str(x)) == example_func(py23_str(x))
    except ValueError as e:  # handle broken locale lib on BSD.
        if "is not in range" not in str(e):
            raise
def test_string_component_transform_factory(x, alg, example_func):
    string_component_transform_func = string_component_transform_factory(alg)
    try:
        assert string_component_transform_func(py23_str(x)) == example_func(
            py23_str(x))
    except ValueError as e:  # handle broken locale lib on BSD.
        if "is not in range" not in str(e):
            raise
Exemplo n.º 3
0
def test_path_splitter_splits_path_string_by_separator_and_removes_extension(
        x):
    assume(all(x))
    z = py23_str(pathlib.Path(*x[:-2])) + '.' + x[-1]
    y = list(pathlib.Path(z).parts)
    assert _path_splitter(z) == y[:-1] + [pathlib.Path(z).stem
                                          ] + [pathlib.Path(z).suffix]
Exemplo n.º 4
0
def check_filters(filters):
    """
    Execute range_check for every element of an iterable.

    Parameters
    ----------
    filters : iterable
        The collection of filters to check. Each element
        must be a two-element tuple of floats or ints.

    Returns
    -------
    The input as-is, or None if it evaluates to False.

    Raises
    ------
    ValueError
        Low is greater than or equal to high for any element.

    """
    if not filters:
        return None
    try:
        return [range_check(f[0], f[1]) for f in filters]
    except ValueError as err:
        raise ValueError("Error in --filter: " + py23_str(err))
Exemplo n.º 5
0
def test_path_splitter_splits_path_string_by_separator_and_removes_extension(x):
    z = py23_str(pathlib.Path(*x[:-2])) + "." + x[-1]
    y = tuple(pathlib.Path(z).parts)
    assert tuple(utils.path_splitter(z)) == y[:-1] + (
        pathlib.Path(z).stem,
        pathlib.Path(z).suffix,
    )
Exemplo n.º 6
0
def check_filters(filters):
    """
    Execute range_check for every element of an iterable.

    Parameters
    ----------
    filters : iterable
        The collection of filters to check. Each element
        must be a two-element tuple of floats or ints.

    Returns
    -------
    The input as-is, or None if it evaluates to False.

    Raises
    ------
    ValueError
        Low is greater than or equal to high for any element.

    """
    if not filters:
        return None
    try:
        return [range_check(f[0], f[1]) for f in filters]
    except ValueError as err:
        raise ValueError("Error in --filter: " + py23_str(err))
Exemplo n.º 7
0
def test_path_splitter_splits_path_string_by_separator_and_removes_extension(
        x):
    z = py23_str(pathlib.Path(*x[:-2])) + "." + x[-1]
    y = tuple(pathlib.Path(z).parts)
    assert tuple(utils.path_splitter(z)) == y[:-1] + (
        pathlib.Path(z).stem,
        pathlib.Path(z).suffix,
    )
Exemplo n.º 8
0
def check_filter(filt):
    """\
    Check that the low value of the filter is lower than the high.
    If there is to be no filter, return 'None'.
    If the condition is not met, a ValueError is raised.
    Otherwise, the values are returned as-is.
    """
    # Quick return if no filter.
    if not filt:
        return None
    try:
        return [range_check(f[0], f[1]) for f in filt]
    except ValueError as a:
        raise ValueError('Error in --filter: '+py23_str(a))
def check_filter(filt):
    """\
    Check that the low value of the filter is lower than the high.
    If there is to be no filter, return 'None'.
    If the condition is not met, a ValueError is raised.
    Otherwise, the values are returned, but as floats.
    """
    # Quick return if no filter.
    if not filt:
        return None
    try:
        return [range_check(f[0], f[1]) for f in filt]
    except ValueError as a:
        raise ValueError('Error in --filter: ' + py23_str(a))
Exemplo n.º 10
0
        def __new__(cls, x, y, alg=ns.DEFAULT):
            try:
                ns.DEFAULT | alg
            except TypeError:
                msg = "natsort_keygen: 'alg' argument must be from the enum 'ns'"
                raise ValueError(msg + ", got {}".format(py23_str(alg)))

            # Add the _DUMB option if the locale library is broken.
            if alg & ns.LOCALEALPHA and natsort.compat.locale.dumb_sort():
                alg |= NS_DUMB

            if alg not in cls.cached_keys:
                cls.cached_keys[alg] = natsort_keygen(alg=alg)

            return py23_cmp(cls.cached_keys[alg](x), cls.cached_keys[alg](y))
Exemplo n.º 11
0
        def __new__(cls, x, y, alg=ns.DEFAULT):
            try:
                ns.DEFAULT | alg
            except TypeError:
                msg = "natsort_keygen: 'alg' argument must be from the enum 'ns'"
                raise ValueError(msg + ", got {}".format(py23_str(alg)))

            # Add the _DUMB option if the locale library is broken.
            if alg & ns.LOCALEALPHA and natsort.compat.locale.dumb_sort():
                alg |= NS_DUMB

            if alg not in cls.cached_keys:
                cls.cached_keys[alg] = natsort_keygen(alg=alg)

            return py23_cmp(cls.cached_keys[alg](x), cls.cached_keys[alg](y))
Exemplo n.º 12
0
        def __new__(cls, x, y, alg=0, *args, **kwargs):
            try:
                alg = _args_to_enum(**kwargs) | alg
            except TypeError:
                msg = ("natsort_keygen: 'alg' argument must be "
                       "from the enum 'ns'")
                raise ValueError(msg + ', got {0}'.format(py23_str(alg)))

            # Add the _DUMB option if the locale library is broken.
            if alg & ns.LOCALEALPHA and natsort.compat.locale.dumb_sort():
                alg |= ns._DUMB

            if alg not in cls.cached_keys:
                cls.cached_keys[alg] = natsort_keygen(alg=alg)

            return py23_cmp(cls.cached_keys[alg](x), cls.cached_keys[alg](y))
def _path_splitter(s, _d_match=re.compile(r'\.\d').match):
    """Split a string into its path components. Assumes a string is a path."""
    # If a PathLib Object, use it's functionality to perform the split.
    if has_pathlib and isinstance(s, PurePath):
        s = py23_str(s)
    path_parts = deque()
    p_appendleft = path_parts.appendleft
    # Continue splitting the path from the back until we have reached
    # '..' or '.', or until there is nothing left to split.
    path_location = s
    while path_location != os_curdir and path_location != os_pardir:
        parent_path = path_location
        path_location, child_path = path_split(parent_path)
        if path_location == parent_path:
            break
        p_appendleft(child_path)

    # This last append is the base path.
    # Only append if the string is non-empty.
    if path_location:
        p_appendleft(path_location)

    # Now, split off the file extensions using a similar method to above.
    # Continue splitting off file extensions until we reach a decimal number
    # or there are no more extensions.
    # We are not using built-in functionality of PathLib here because of
    # the recursive splitting up to a decimal.
    base = path_parts.pop()
    base_parts = deque()
    b_appendleft = base_parts.appendleft
    while True:
        front = base
        base, ext = path_splitext(front)
        if _d_match(ext) or not ext:
            # Reset base to before the split if the split is invalid.
            base = front
            break
        b_appendleft(ext)
    b_appendleft(base)

    # Return the split parent paths and then the split basename.
    return ichain(path_parts, base_parts)
Exemplo n.º 14
0
def path_splitter(s, _d_match=re.compile(r"\.\d").match):
    """
    Split a string into its path components.

    Assumes a string is a path or is path-like.

    Parameters
    ----------
    s : str | pathlib.Path

    Returns
    -------
    split : tuple
        The path split by directory components and extensions.

    Examples
    --------

        >>> tuple(path_splitter("this/thing.ext"))
        ({u}'this', {u}'thing', {u}'.ext')

    """
    if has_pathlib and isinstance(s, PurePath):
        s = py23_str(s)
    path_parts = deque()
    p_appendleft = path_parts.appendleft
    # Continue splitting the path from the back until we have reached
    # '..' or '.', or until there is nothing left to split.
    path_location = s
    while path_location != os_curdir and path_location != os_pardir:
        parent_path = path_location
        path_location, child_path = path_split(parent_path)
        if path_location == parent_path:
            break
        p_appendleft(child_path)

    # This last append is the base path.
    # Only append if the string is non-empty.
    # Make sure the proper path separator for this OS is used
    # no matter what was actually given.
    if path_location:
        p_appendleft(py23_str(os_sep))

    # Now, split off the file extensions using a similar method to above.
    # Continue splitting off file extensions until we reach a decimal number
    # or there are no more extensions.
    # We are not using built-in functionality of PathLib here because of
    # the recursive splitting up to a decimal.
    base = path_parts.pop()
    base_parts = deque()
    b_appendleft = base_parts.appendleft
    while True:
        front = base
        base, ext = path_splitext(front)
        if _d_match(ext) or not ext:
            # Reset base to before the split if the split is invalid.
            base = front
            break
        b_appendleft(ext)
    b_appendleft(base)

    # Return the split parent paths and then the split basename.
    return ichain(path_parts, base_parts)
            entries = [
                entry for entry in entries
                if keep_entry_range(entry, lows, highs, float, regex)
            ]
        if args.reverse_filter is not None:
            lows, highs = ([f[0] for f in args.reverse_filter],
                           [f[1] for f in args.reverse_filter])
            entries = [
                entry for entry in entries
                if not keep_entry_range(entry, lows, highs, float, regex)
            ]
        if args.exclude:
            exclude = set(args.exclude)
            entries = [
                entry for entry in entries
                if exclude_entry(entry, exclude, float, regex)
            ]

    # Print off the sorted results
    for entry in natsorted(entries, reverse=args.reverse, alg=alg):
        print(entry)


if __name__ == '__main__':
    try:
        main()
    except ValueError as a:
        sys.exit(py23_str(a))
    except KeyboardInterrupt:
        sys.exit(1)
Exemplo n.º 16
0
def test_path_splitter_splits_path_string_by_separator_and_removes_extension(x):
    assume(len(x) > 2)
    assume(all(x))
    z = py23_str(pathlib.Path(*x[:-2])) + '.' + x[-1]
    y = list(pathlib.Path(z).parts)
    assert _path_splitter(z) == y[:-1] + [pathlib.Path(z).stem] + [pathlib.Path(z).suffix]
Exemplo n.º 17
0
def test_path_splitter_splits_path_string_by_separator(x):
    z = py23_str(pathlib.Path(*x))
    assert tuple(utils.path_splitter(z)) == tuple(pathlib.Path(z).parts)
Exemplo n.º 18
0
        if args.filter is not None:
            lows, highs = ([f[0] for f in args.filter],
                           [f[1] for f in args.filter])
            entries = [entry for entry in entries
                       if keep_entry_range(entry, lows, highs,
                                           float, regex)]
        if args.reverse_filter is not None:
            lows, highs = ([f[0] for f in args.reverse_filter],
                           [f[1] for f in args.reverse_filter])
            entries = [entry for entry in entries
                       if not keep_entry_range(entry, lows, highs,
                                               float, regex)]
        if args.exclude:
            exclude = set(args.exclude)
            entries = [entry for entry in entries
                       if exclude_entry(entry, exclude,
                                        float, regex)]

    # Print off the sorted results
    for entry in natsorted(entries, reverse=args.reverse, alg=alg):
        print(entry)


if __name__ == '__main__':
    try:
        main()
    except ValueError as a:
        sys.exit(py23_str(a))
    except KeyboardInterrupt:
        sys.exit(1)
Exemplo n.º 19
0
def test_path_splitter_splits_path_string_by_separator(x):
    assume(len(x) > 1)
    assume(all(x))
    z = py23_str(pathlib.Path(*x))
    assert _path_splitter(z) == list(pathlib.Path(z).parts)
Exemplo n.º 20
0
def natsort_keygen(key=None, alg=ns.DEFAULT):
    """
    Generate a key to sort strings and numbers naturally.

    This key is designed for use as the `key` argument to
    functions such as the `sorted` builtin.

    The user may customize the generated function with the
    arguments to `natsort_keygen`, including an optional
    `key` function.

    Parameters
    ----------
    key : callable, optional
        A key used to manipulate the input value before parsing for
        numbers. It is **not** applied recursively.
        It should accept a single argument and return a single value.

    alg : ns enum, optional
        This option is used to control which algorithm `natsort`
        uses when sorting. For details into these options, please see
        the :class:`ns` class documentation. The default is `ns.INT`.

    Returns
    -------
    out : function
        A function that parses input for natural sorting that is
        suitable for passing as the `key` argument to functions
        such as `sorted`.

    See Also
    --------
    natsorted
    natsort_key

    Examples
    --------
    `natsort_keygen` is a convenient way to create a custom key
    to sort lists in-place (for example).::

        >>> a = ['num5.10', 'num-3', 'num5.3', 'num2']
        >>> a.sort(key=natsort_keygen(alg=ns.REAL))
        >>> a
        [{u}'num-3', {u}'num2', {u}'num5.10', {u}'num5.3']

    """
    try:
        ns.DEFAULT | alg
    except TypeError:
        msg = "natsort_keygen: 'alg' argument must be from the enum 'ns'"
        raise ValueError(msg + ", got {}".format(py23_str(alg)))

    # Add the NS_DUMB option if the locale library is broken.
    if alg & ns.LOCALEALPHA and natsort.compat.locale.dumb_sort():
        alg |= NS_DUMB

    # Set some variables that will be passed to the factory functions
    if alg & ns.NUMAFTER:
        if alg & ns.LOCALEALPHA:
            sep = natsort.compat.locale.null_string_locale_max
        else:
            sep = natsort.compat.locale.null_string_max
        pre_sep = natsort.compat.locale.null_string_max
    else:
        if alg & ns.LOCALEALPHA:
            sep = natsort.compat.locale.null_string_locale
        else:
            sep = natsort.compat.locale.null_string
        pre_sep = natsort.compat.locale.null_string
    regex = utils.regex_chooser(alg)

    # Create the functions that will be used to split strings.
    input_transform = utils.input_string_transform_factory(alg)
    component_transform = utils.string_component_transform_factory(alg)
    final_transform = utils.final_data_transform_factory(alg, sep, pre_sep)

    # Create the high-level parsing functions for strings, bytes, and numbers.
    string_func = utils.parse_string_factory(alg, sep, regex.split,
                                             input_transform,
                                             component_transform,
                                             final_transform)
    if alg & ns.PATH:
        string_func = utils.parse_path_factory(string_func)
    bytes_func = utils.parse_bytes_factory(alg)
    num_func = utils.parse_number_factory(alg, sep, pre_sep)

    # Return the natsort key with the parsing path pre-chosen.
    return partial(
        utils.natsort_key,
        key=key,
        string_func=string_func,
        bytes_func=bytes_func,
        num_func=num_func,
    )
Exemplo n.º 21
0
def test_string_component_transform_factory_with_FLOAT_returns_fast_float(x):
    assert _string_component_transform_factory(ns.FLOAT)(
        py23_str(x)) == fast_float(py23_str(x), nan=float('-inf'))
Exemplo n.º 22
0
def test_path_splitter_splits_path_string_by_separator(x):
    assume(all(x))
    z = py23_str(pathlib.Path(*x))
    assert _path_splitter(z) == list(pathlib.Path(z).parts)
Exemplo n.º 23
0
def test_post_split_function_returns_fast_int(x):
    assume(x)
    assert _post_split_function(0)(py23_str(x)) == fast_int(py23_str(x))
Exemplo n.º 24
0
def natsort_keygen(key=None, alg=ns.DEFAULT):
    """
    Generate a key to sort strings and numbers naturally.

    This key is designed for use as the `key` argument to
    functions such as the `sorted` builtin.

    The user may customize the generated function with the
    arguments to `natsort_keygen`, including an optional
    `key` function.

    Parameters
    ----------
    key : callable, optional
        A key used to manipulate the input value before parsing for
        numbers. It is **not** applied recursively.
        It should accept a single argument and return a single value.

    alg : ns enum, optional
        This option is used to control which algorithm `natsort`
        uses when sorting. For details into these options, please see
        the :class:`ns` class documentation. The default is `ns.INT`.

    Returns
    -------
    out : function
        A function that parses input for natural sorting that is
        suitable for passing as the `key` argument to functions
        such as `sorted`.

    See Also
    --------
    natsorted
    natsort_key

    Examples
    --------
    `natsort_keygen` is a convenient way to create a custom key
    to sort lists in-place (for example).::

        >>> a = ['num5.10', 'num-3', 'num5.3', 'num2']
        >>> a.sort(key=natsort_keygen(alg=ns.REAL))
        >>> a
        [{u}'num-3', {u}'num2', {u}'num5.10', {u}'num5.3']

    """
    try:
        ns.DEFAULT | alg
    except TypeError:
        msg = "natsort_keygen: 'alg' argument must be from the enum 'ns'"
        raise ValueError(msg + ", got {}".format(py23_str(alg)))

    # Add the NS_DUMB option if the locale library is broken.
    if alg & ns.LOCALEALPHA and natsort.compat.locale.dumb_sort():
        alg |= NS_DUMB

    # Set some variables that will be passed to the factory functions
    if alg & ns.NUMAFTER:
        if alg & ns.LOCALEALPHA:
            sep = natsort.compat.locale.null_string_locale_max
        else:
            sep = natsort.compat.locale.null_string_max
        pre_sep = natsort.compat.locale.null_string_max
    else:
        if alg & ns.LOCALEALPHA:
            sep = natsort.compat.locale.null_string_locale
        else:
            sep = natsort.compat.locale.null_string
        pre_sep = natsort.compat.locale.null_string
    regex = utils.regex_chooser(alg)

    # Create the functions that will be used to split strings.
    input_transform = utils.input_string_transform_factory(alg)
    component_transform = utils.string_component_transform_factory(alg)
    final_transform = utils.final_data_transform_factory(alg, sep, pre_sep)

    # Create the high-level parsing functions for strings, bytes, and numbers.
    string_func = utils.parse_string_factory(
        alg, sep, regex.split, input_transform, component_transform, final_transform
    )
    if alg & ns.PATH:
        string_func = utils.parse_path_factory(string_func)
    bytes_func = utils.parse_bytes_factory(alg)
    num_func = utils.parse_number_factory(alg, sep, pre_sep)

    # Return the natsort key with the parsing path pre-chosen.
    return partial(
        utils.natsort_key,
        key=key,
        string_func=string_func,
        bytes_func=bytes_func,
        num_func=num_func,
    )
Exemplo n.º 25
0
def test_path_splitter_splits_path_string_by_separator(x):
    z = py23_str(pathlib.Path(*x))
    assert tuple(utils.path_splitter(z)) == tuple(pathlib.Path(z).parts)
Exemplo n.º 26
0
def _natsort_key(val, key, alg):
    """\
    Key to sort strings and numbers naturally.

    It works by separating out the numbers from the strings. This function for
    internal use only. See the natsort_keygen documentation for details of each
    parameter.

    Parameters
    ----------
    val : {str, unicode}
    key : callable
    alg : ns enum

    Returns
    -------
    out : tuple
        The modified value with numbers extracted.

    """

    # Convert the arguments to the proper input tuple
    try:
        use_locale = alg & _ns['LOCALE']
        inp_options = (alg & _NUMBER_ALGORITHMS,
                       localeconv()['decimal_point'] if use_locale else '.')
    except TypeError:
        msg = "_natsort_key: 'alg' argument must be from the enum 'ns'"
        raise ValueError(msg+', got {0}'.format(py23_str(alg)))

    # Get the proper regex and conversion function.
    try:
        regex, num_function = _regex_and_num_function_chooser[inp_options]
    except KeyError:  # pragma: no cover
        if inp_options[1] not in ('.', ','):  # pragma: no cover
            raise ValueError("_natsort_key: currently natsort only supports "
                             "the decimal separators '.' and ','. "
                             "Please file a bug report.")
        else:
            raise
    else:
        # Apply key if needed.
        if key is not None:
            val = key(val)

        # If this is a path, convert it.
        # An AttrubuteError is raised if not a string.
        split_as_path = False
        if alg & _ns['PATH']:
            try:
                val = _path_splitter(val)
            except AttributeError:
                pass
            else:
                # Record that this string was split as a path so that
                # we don't set PATH in the recursive call.
                split_as_path = True

        # Assume the input are strings, which is the most common case.
        # Apply the string modification if needed.
        orig_val = val
        try:
            lowfirst = alg & _ns['LOWERCASEFIRST']
            dumb = dumb_sort() if use_locale else False
            if use_locale and dumb and not lowfirst:
                val = val.swapcase()  # Compensate for bad locale lib.
            elif lowfirst and not (use_locale and dumb):
                val = val.swapcase()
            if alg & _ns['IGNORECASE']:
                val = val.casefold() if PY_VERSION >= 3.3 else val.lower()
            gl = alg & _ns['GROUPLETTERS']
            ret = tuple(_number_extracter(val,
                                          regex,
                                          num_function,
                                          alg & _ns['TYPESAFE'],
                                          use_locale,
                                          gl or (use_locale and dumb)))
            # Handle NaN.
            if any(isfloat(x, num_only=True) and isnan(x) for x in ret):
                ret = _fix_nan(ret, alg)
            # For UNGROUPLETTERS, so the high level grouping can occur
            # based on the first letter of the string.
            # Do no locale transformation of the characters.
            if use_locale and alg & _ns['UNGROUPLETTERS']:
                if not ret:
                    return (ret, ret)
                elif ret[0] == null_string:
                    return ((b'' if use_pyicu else '',), ret)
                elif dumb:
                    if lowfirst:
                        return ((orig_val[0].swapcase(),), ret)
                    else:
                        return ((orig_val[0],), ret)
                else:
                    return ((val[0],), ret)
            else:
                return ret
        except (TypeError, AttributeError):
            # Check if it is a bytes type, and if so return as a
            # one element tuple.
            if type(val) in (bytes,):
                return (val.lower(),) if alg & _ns['IGNORECASE'] else (val,)
            # If not strings, assume it is an iterable that must
            # be parsed recursively. Do not apply the key recursively.
            # If this string was split as a path, turn off 'PATH'.
            try:
                was_path = alg & _ns['PATH']
                newalg = alg & _ALL_BUT_PATH
                newalg |= (was_path * (not split_as_path))
                return tuple([_natsort_key(x, None, newalg) for x in val])
            # If there is still an error, it must be a number.
            # Return as-is, with a leading empty string.
            except TypeError:
                n = null_string if use_locale else ''
                if isfloat(val, num_only=True) and isnan(val):
                    val = _fix_nan([val], alg)[0]
                return ((n, val,),) if alg & _ns['PATH'] else (n, val,)
Exemplo n.º 27
0
def natsort_keygen(key=None, alg=0, **_kwargs):
    """\
    Generate a key to sort strings and numbers naturally.

    Generate a key to sort strings and numbers naturally,
    not lexicographically. This key is designed for use as the
    `key` argument to functions such as the `sorted` builtin.

    The user may customize the generated function with the
    arguments to `natsort_keygen`, including an optional
    `key` function which will be called before the `natsort_key`.

    Parameters
    ----------
    key : callable, optional
        A key used to manipulate the input value before parsing for
        numbers. It is **not** applied recursively.
        It should accept a single argument and return a single value.

    alg : ns enum, optional
        This option is used to control which algorithm `natsort`
        uses when sorting. For details into these options, please see
        the :class:`ns` class documentation. The default is `ns.INT`.

    Returns
    -------
    out : function
        A wrapped version of the `natsort_key` function that is
        suitable for passing as the `key` argument to functions
        such as `sorted`.

    See Also
    --------
    natsorted

    Examples
    --------
    `natsort_keygen` is a convenient way to create a custom key
    to sort lists in-place (for example). Calling with no objects
    will return a plain `natsort_key` instance::

        >>> a = ['num5.10', 'num-3', 'num5.3', 'num2']
        >>> a.sort(key=natsort_keygen(alg=ns.REAL))
        >>> a
        [{u}'num-3', {u}'num2', {u}'num5.10', {u}'num5.3']

    """
    # Transform old arguments to the ns enum.
    try:
        alg = _args_to_enum(**_kwargs) | alg
    except TypeError:
        msg = "natsort_keygen: 'alg' argument must be from the enum 'ns'"
        raise ValueError(msg+', got {0}'.format(py23_str(alg)))

    # Add the _DUMB option if the locale library is broken.
    if alg & ns.LOCALEALPHA and natsort.compat.locale.dumb_sort():
        alg |= ns._DUMB

    # Set some variable that will be passed to the factory functions
    sep = natsort.compat.locale.null_string if alg & ns.LOCALEALPHA else ''
    regex = _regex_chooser[alg & ns._NUMERIC_ONLY]

    # Create the functions that will be used to split strings.
    pre = _pre_split_function(alg)
    post = _post_split_function(alg)
    after = _post_string_parse_function(alg, sep)

    # Create the high-level parsing functions for strings, bytes, and numbers.
    string_func = _parse_string_function(
        alg, sep, regex.split, pre, post, after
    )
    if alg & ns.PATH:
        string_func = _parse_path_function(string_func)
    bytes_func = _parse_bytes_function(alg)
    num_func = _parse_number_function(alg, sep)

    # Return the natsort key with the parsing path pre-chosen.
    return partial(
        _natsort_key,
        key=key,
        string_func=string_func,
        bytes_func=bytes_func,
        num_func=num_func
    )
Exemplo n.º 28
0
def test_post_split_function_with_FLOAT_returns_fast_float(x):
    assume(x)
    assert _post_split_function(ns.FLOAT)(py23_str(x)) == fast_float(py23_str(x), nan=float('-inf'))
Exemplo n.º 29
0
def test_path_splitter_splits_path_string_by_separator(x):
    assume(all(x))
    z = py23_str(pathlib.Path(*x))
    assert tuple(_path_splitter(z)) == tuple(pathlib.Path(z).parts)
Exemplo n.º 30
0
def test_string_component_transform_factory_returns_fast_int(x):
    assert _string_component_transform_factory(0)(py23_str(x)) == fast_int(
        py23_str(x))
Exemplo n.º 31
0
def test_path_splitter_splits_path_string_by_separator_and_removes_extension(x):
    assume(all(x))
    z = py23_str(pathlib.Path(*x[:-2])) + '.' + x[-1]
    y = tuple(pathlib.Path(z).parts)
    assert tuple(_path_splitter(z)) == y[:-1] + (pathlib.Path(z).stem, pathlib.Path(z).suffix)
Exemplo n.º 32
0
def natsort_keygen(key=None, alg=0, **_kwargs):
    """\
    Generate a key to sort strings and numbers naturally.

    Generate a key to sort strings and numbers naturally,
    not lexicographically. This key is designed for use as the
    `key` argument to functions such as the `sorted` builtin.

    The user may customize the generated function with the
    arguments to `natsort_keygen`, including an optional
    `key` function which will be called before the `natsort_key`.

    Parameters
    ----------
    key : callable, optional
        A key used to manipulate the input value before parsing for
        numbers. It is **not** applied recursively.
        It should accept a single argument and return a single value.

    alg : ns enum, optional
        This option is used to control which algorithm `natsort`
        uses when sorting. For details into these options, please see
        the :class:`ns` class documentation. The default is `ns.INT`.

    Returns
    -------
    out : function
        A wrapped version of the `natsort_key` function that is
        suitable for passing as the `key` argument to functions
        such as `sorted`.

    See Also
    --------
    natsorted

    Examples
    --------
    `natsort_keygen` is a convenient way to create a custom key
    to sort lists in-place (for example). Calling with no objects
    will return a plain `natsort_key` instance::

        >>> a = ['num5.10', 'num-3', 'num5.3', 'num2']
        >>> a.sort(key=natsort_keygen(alg=ns.REAL))
        >>> a
        [{u}'num-3', {u}'num2', {u}'num5.10', {u}'num5.3']

    """
    # Transform old arguments to the ns enum.
    try:
        alg = _args_to_enum(**_kwargs) | alg
    except TypeError:
        msg = "natsort_keygen: 'alg' argument must be from the enum 'ns'"
        raise ValueError(msg + ', got {0}'.format(py23_str(alg)))

    # Add the _DUMB option if the locale library is broken.
    if alg & ns.LOCALEALPHA and natsort.compat.locale.dumb_sort():
        alg |= ns._DUMB

    # Set some variable that will be passed to the factory functions
    sep = natsort.compat.locale.null_string if alg & ns.LOCALEALPHA else ''
    regex = _regex_chooser[alg & ns._NUMERIC_ONLY]

    # Create the functions that will be used to split strings.
    pre = _pre_split_function(alg)
    post = _post_split_function(alg)
    after = _post_string_parse_function(alg, sep)

    # Create the high-level parsing functions for strings, bytes, and numbers.
    string_func = _parse_string_function(alg, sep, regex.split, pre, post,
                                         after)
    if alg & ns.PATH:
        string_func = _parse_path_function(string_func)
    bytes_func = _parse_bytes_function(alg)
    num_func = _parse_number_function(alg, sep)

    # Return the natsort key with the parsing path pre-chosen.
    return partial(_natsort_key,
                   key=key,
                   string_func=string_func,
                   bytes_func=bytes_func,
                   num_func=num_func)
Exemplo n.º 33
0
def _natsort_key(val, key, alg):
    """\
    Key to sort strings and numbers naturally.

    It works by separating out the numbers from the strings. This function for
    internal use only. See the natsort_keygen documentation for details of each
    parameter.

    Parameters
    ----------
    val : {str, unicode}
    key : callable
    alg : ns enum

    Returns
    -------
    out : tuple
        The modified value with numbers extracted.

    """

    # Convert the arguments to the proper input tuple
    try:
        use_locale = alg & _ns['LOCALE']
        inp_options = (alg & _NUMBER_ALGORITHMS,
                       localeconv()['decimal_point'] if use_locale else '.')
    except TypeError:
        msg = "_natsort_key: 'alg' argument must be from the enum 'ns'"
        raise ValueError(msg + ', got {0}'.format(py23_str(alg)))

    # Get the proper regex and conversion function.
    try:
        regex, num_function = _regex_and_num_function_chooser[inp_options]
    except KeyError:  # pragma: no cover
        if inp_options[1] not in ('.', ','):  # pragma: no cover
            raise ValueError("_natsort_key: currently natsort only supports "
                             "the decimal separators '.' and ','. "
                             "Please file a bug report.")
        else:
            raise
    else:
        # Apply key if needed.
        if key is not None:
            val = key(val)

        # If this is a path, convert it.
        # An AttrubuteError is raised if not a string.
        split_as_path = False
        if alg & _ns['PATH']:
            try:
                val = _path_splitter(val)
            except AttributeError:
                pass
            else:
                # Record that this string was split as a path so that
                # we don't set PATH in the recursive call.
                split_as_path = True

        # Assume the input are strings, which is the most common case.
        # Apply the string modification if needed.
        orig_val = val
        try:
            lowfirst = alg & _ns['LOWERCASEFIRST']
            dumb = dumb_sort() if use_locale else False
            if use_locale and dumb and not lowfirst:  # pragma: no cover
                val = val.swapcase()  # Compensate for bad locale lib.
            elif lowfirst and not (use_locale and dumb):
                val = val.swapcase()
            if alg & _ns['IGNORECASE']:
                val = val.casefold() if PY_VERSION >= 3.3 else val.lower()
            gl = alg & _ns['GROUPLETTERS']
            ret = tuple(
                _number_extracter(val, regex, num_function,
                                  alg & _ns['TYPESAFE'], use_locale, gl
                                  or (use_locale and dumb)))
            # Handle NaN.
            if any(isfloat(x, num_only=True) and isnan(x) for x in ret):
                ret = _fix_nan(ret, alg)
            # For UNGROUPLETTERS, so the high level grouping can occur
            # based on the first letter of the string.
            # Do no locale transformation of the characters.
            if use_locale and alg & _ns['UNGROUPLETTERS']:
                if not ret:
                    return (ret, ret)
                elif ret[0] == null_string:
                    return ((b'' if use_pyicu else '', ), ret)
                elif dumb:  # pragma: no cover
                    if lowfirst:
                        return ((orig_val[0].swapcase(), ), ret)
                    else:
                        return ((orig_val[0], ), ret)
                else:
                    return ((val[0], ), ret)
            else:
                return ret
        except (TypeError, AttributeError):
            # Check if it is a bytes type, and if so return as a
            # one element tuple.
            if type(val) in (bytes, ):
                return (val.lower(), ) if alg & _ns['IGNORECASE'] else (val, )
            # If not strings, assume it is an iterable that must
            # be parsed recursively. Do not apply the key recursively.
            # If this string was split as a path, turn off 'PATH'.
            try:
                was_path = alg & _ns['PATH']
                newalg = alg & _ALL_BUT_PATH
                newalg |= (was_path * (not split_as_path))
                return tuple([_natsort_key(x, None, newalg) for x in val])
            # If there is still an error, it must be a number.
            # Return as-is, with a leading empty string.
            except TypeError:
                n = null_string if use_locale else ''
                if isfloat(val, num_only=True) and isnan(val):
                    val = _fix_nan([val], alg)[0]
                return ((
                    n,
                    val,
                ), ) if alg & _ns['PATH'] else (
                    n,
                    val,
                )
Exemplo n.º 34
0
            (("", 6, "a-", 5, ".", 34, "e+", 1), ("/folder (", 1, ")/foo"), ("", 56.7)),
        ),
        (ns.REAL, (("", 6.0, "A", -50.34), ("/Folder (", 1.0, ")/Foo"), ("", 56.7))),
        (
            ns.LOWERCASEFIRST | ns.FLOAT | ns.NOEXP,
            (
                ("", 6.0, "a-", 5.034, "E+", 1.0),
                ("/fOLDER (", 1.0, ")/fOO"),
                ("", 56.7),
            ),
        ),
        (
            ns.PATH | ns.GROUPLETTERS,
            (
                (("", 6, "aA--", 5, "..", 34, "ee++", 1),),
                ((2 * py23_str(os.sep),), ("fFoollddeerr  ((", 1, "))"), ("fFoooo",)),
                (("", 56.7),),
            ),
        ),
    ],
)
def test_natsort_keygen_handles_arbitrary_input(arbitrary_input, alg, expected):
    ns_key = natsort_keygen(alg=alg)
    assert ns_key(arbitrary_input) == expected


@pytest.mark.parametrize(
    "alg, expected",
    [
        (ns.DEFAULT, (b"6A-5.034e+1",)),
        (ns.IGNORECASE, (b"6a-5.034e+1",)),