예제 #1
0
def ensure_list__(obj, atom_types=(str,)):
    """
    The same as `ensure_list`, except for that an error will be thrown if `obj` cannot be converted to a non-singleton list.

    For example,
    >>> from utix.listex import ensure_list__
    >>> a = 1
    >>> print(ensure_list__(a)) # ! error

    For another example,
    >>> from utix.listex import ensure_list__
    >>> a = (1,2,3,4)
    >>> print(ensure_list__(a, atom_types=(str, tuple))) # ! error
    """
    if atom_types and isinstance(obj, atom_types):
        raise TypeError(f'the provided object `{obj}` is of one of the atom types `{atom_types}` and hence cannot be converted to a list')
    elif isinstance(obj, list):
        return obj
    elif isinstance(obj, tuple):
        return list(obj)
    elif obj is None:
        return None
    elif iterable(obj):
        return list(obj)
    else:
        raise TypeError(f'the provided object `{obj}` cannot be converted to a list equivalent; it must be a Python list, tuple or an iterable not of types in `{atom_types}`')
예제 #2
0
def ensure_list(obj, atom_types=(str,)):
    """
    A convenient function that returns a possible list equivalent of `obj`. This function is usually applied to process parameters, allowing inputs being either an iterable or a non-iterable element.
    Returns a singleton `[obj]` if the type of `obj` is in `atom_types`;
    otherwise, returns `obj` itself if `obj` is a list, and returns `list(obj)` if `obj` is a tuple;
    otherwise, returns a list with all elements from `obj` if `obj` is an iterable and the type of obj is not one of the `atom_types`.
    Returns `None` if `obj` is `None`.
    Otherwise, returns a singleton list with `obj` as the only element.

    For example,
    >>> import utix.listex as lx
    >>> a = [1,2,3,4]
    >>> print(lx.ensure_list(a)) # [1,2,3,4]
    >>> print(lx.ensure_list(a) is a) # True
    >>> a = (1,2,3,4)
    >>> print(lx.ensure_list(a)) # [1,2,3,4]
    >>> a = (x for x in range(4))
    >>> print(lx.ensure_list(a)) # [0,1,2,3]
    >>> a = 1
    >>> print(lx.ensure_list(a)) # [1]
    >>> a = (1,2,3,4)
    >>> print(lx.ensure_list(a, atom_types=(str, tuple))) # [(1,2,3,4)]
    """
    if atom_types and isinstance(obj, atom_types):
        return [obj]
    elif isinstance(obj, list):
        return obj
    elif isinstance(obj, tuple):
        return list(obj)
    elif obj is None:
        return None
    elif iterable(obj):
        return list(obj)
    else:
        return [obj]
예제 #3
0
def check_legends(X, legends=None):
    if legends is not None:
        if iterable(X[0]):
            if hasattr(X[0], '__len__') and len(X[0]) != len(legends):
                warnings.warn(
                    f'plot {len(X[0])} sequences with {len(legends)} legends')
        elif len(legends) != 1:
            warnings.warn(f'plot one sequence with {len(legends)} legends')
예제 #4
0
def rouge_n(hypothesis: Union[str, Iterator],
            reference: Union[str, Iterator],
            n: Union[int, Iterator[int]] = 2,
            out: dict = None,
            ignore_tokens=None):
    """
    Computes the rouge-n scores, which are the bag-of-ngrams precision/recall/F1-score between the hypothesis and the reference.
    First, we compute the bag-of-ngrams overlap between the hypothesis and the reference, then
    1) the rouge-n-p is the overlap size divided by the number of hypothesis n-grams;
    2) the rouge-n-r is the overlap size divided by the number of reference n-grams;
    3) and the rouge-n-f is the F1-score between the rouge-n-p and the rouge-n-r.

    :param hypothesis: the hypothesis text, or a list of hypothesis tokens.
    :param reference: the reference text, or a list of reference tokens.
    :param n: an integer or a list of integers; we will compute n-grams for both the hypothesis and the reference for each of the specified `n`.
    :param out: provides an optional dictionary; the computed scores will be written into this dictionary.
    :param ignore_tokens: ignore tokens specified in this parameter when computing the precision or recall.
    :return: a mapping contains the scores.
    """
    if isinstance(hypothesis, str):
        hypothesis = hypothesis.split()
    if isinstance(reference, str):
        reference = reference.split()

    if ignore_tokens is not None:
        if iterable(ignore_tokens):
            hypothesis = tuple(x for x in hypothesis if x not in ignore_tokens)
            reference = tuple(x for x in reference if x not in ignore_tokens)
        else:
            hypothesis = tuple(x for x in hypothesis if x != ignore_tokens)
            reference = tuple(x for x in reference if x != ignore_tokens)

    if out is None:
        out = {}

    def _rouge_n(n):
        hyp_ngrams = set(ngrams(hypothesis, n))
        ref_ngrams = set(ngrams(reference, n))
        hyp_len = len(hyp_ngrams)
        ref_len = len(ref_ngrams)
        overlap_len = len(hyp_ngrams.intersection(ref_ngrams))

        out[f"rouge_{n}_p"] = precision = 0.0 if hyp_len == 0 else overlap_len / hyp_len
        out[f"rouge_{n}_r"] = recall = 0.0 if ref_len == 0 else overlap_len / ref_len
        out[f"rouge_{n}_f"] = 2.0 * ((precision * recall) /
                                     (precision + recall + 1e-8))

    if isinstance(n, int):
        _rouge_n(n)
    else:
        for _n in set(n):
            _rouge_n(_n)

    return xfdict(out)
예제 #5
0
def ensure_list_or_tuple__(obj, atom_types=(str,)):
    """
    The same as `ensure_list__`; the only difference is that it directly returns a tuple object without converting it to a list.
    """
    if atom_types and isinstance(obj, atom_types):
        raise TypeError(f'the provided object `{obj}` is of one of the atom types `{atom_types}` and hence cannot be converted to a list')
    elif isinstance(obj, (list, tuple)):
        return obj
    elif obj is None:
        return None
    elif iterable(obj):
        return list(obj)
    else:
        raise TypeError(f'the provided object `{obj}` cannot be converted to a list equivalent; it must be a Python list, tuple or an iterable not of types in `{atom_types}`')
예제 #6
0
def ensure_list_or_tuple(obj, atom_types=(str,)):
    """
    The same as `ensure_list`; the only difference is that it directly returns a tuple object without converting it to a list.
    """
    if atom_types and isinstance(obj, atom_types):
        return [obj]
    elif isinstance(obj, (list, tuple)):
        return obj
    elif obj is None:
        return None
    elif iterable(obj):
        return list(obj)
    else:
        return [obj]