Пример #1
0
def test_mean():
    with pytest.raises(ValueError):
        utils.mean([])

    vals = [random.normalvariate(0, 1) for i in range(1000)]
    assert utils.mean(vals) - 0 < 0.1
    assert utils.mean([1, 1, 1]) == 1
Пример #2
0
def test_mean():
    with pytest.raises(ValueError):
        utils.mean([])

    vals = [random.normalvariate(0, 1) for i in range(1000)]
    assert utils.mean(vals) - 0 < 0.1
    assert utils.mean([1, 1, 1]) == 1
Пример #3
0
def probability(tree, instance, attr, val):
    """
    Returns the probability of a particular value of an attribute in the
    instance.

    The instance should not contain the attribute, but if it does then a
    shallow copy is created that does not have the attribute.
    """
    if attr in instance:
        instance = {a:instance[a] for a in instance if not a == attr}
    concept = tree.categorize(instance)

    if isinstance(val, dict):
        structure_mapper = StructureMapper(concept)
        temp_instance = structure_mapper.transform(instance)
        mapping = structure_mapper.get_mapping()

        #temp_instance = flatten_json(instance)
        #mapping = flat_match(concept, temp_instance)
        #temp_instance = rename_flat(temp_instance, mapping)

        probs = [concept.get_probability(sub_attr, temp_instance[sub_attr]) 
                 for sub_attr in temp_instance 
                 if search('^' + mapping[attr], sub_attr)]
        return mean(probs)
    else:
        return concept.get_probability(attr, val)
Пример #4
0
def probability(tree, instance, attr, val):
    """
    Returns the probability of a particular value of an attribute in the
    instance. One of the scoring functions for incremental_evaluation.

    If the instance currently contains the target attribute a shallow copy is
    created to allow the attribute to be predicted.

    .. warning:: This is an older function in the library and we are not quite
        sure how to set it up for component values under the new
        representation and so for the time being it will raise an Exception if
        it encounts a component.

    :param tree: A category tree to evaluate.
    :type tree: :class:`CobwebTree <concept_formation.cobweb.CobwebTree>`,
        :class:`Cobweb3Tree <concept_formation.cobweb3.Cobweb3Tree>`, or
        :class:`TrestleTree <concept_formation.trestle.TrestleTree>`
    :param instance: An instance to use query the tree with
    :type instance: {a1:v1, a2:v2, ...}
    :param attr: A target instance attribute to evaluate probability on
    :type attr: :ref:`Attribute<attributes>`
    :param val: The target value of the given attr
    :type val: A :ref:`Nominal<val-nom>` or :ref:`Numeric<val-num>` value.
    :returns: The probabily of the given instance attribute value in the given
        tree
    :rtype: float
    """
    if attr in instance:
        instance = {a: instance[a] for a in instance if not a == attr}
    concept = tree.categorize(instance)

    if isinstance(val, dict):
        raise Exception(
            "Probability cannot be estimated on component attributes!")
        structure_mapper = StructureMapper(concept)
        temp_instance = structure_mapper.transform(instance)
        mapping = structure_mapper.get_mapping()

        # temp_instance = flatten_json(instance)
        # mapping = flat_match(concept, temp_instance)
        # temp_instance = rename_flat(temp_instance, mapping)

        probs = [
            concept.probability(sub_attr, temp_instance[sub_attr])
            for sub_attr in temp_instance
            if search(r'^' + mapping[attr], sub_attr)
        ]
        return mean(probs)
    else:
        return concept.probability(attr, val)
Пример #5
0
def lowess(x, y, f=1./3., iter=3, confidence=0.95):
    """
    Performs Lowess smoothing

    Code adapted from: https://gist.github.com/agramfort/850437

    lowess(x, y, f=2./3., iter=3) -> yest

    Lowess smoother: Robust locally weighted regression.
    The lowess function fits a nonparametric regression curve to a scatterplot.
    The arrays x and y contain an equal number of elements; each pair
    (x[i], y[i]) defines a data point in the scatterplot. The function returns
    the estimated (smooth) values of y.

    The smoothing span is given by f. A larger value for f will result in a
    smoother curve. The number of robustifying iterations is given by iter. The
    function will run faster with a smaller number of iterations.

    .. todo:: double check that the confidence bounds are correct
    """
    n = len(x)
    r = int(np.ceil(f*n))
    h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)]
    w = np.clip(np.abs((x[:, None] - x[None, :]) / h), 0.0, 1.0)
    w = (1 - w**3)**3
    yest = np.zeros(n)
    delta = np.ones(n)
    for iteration in range(iter):
        for i in range(n):
            weights = delta * w[:, i]
            b = np.array([np.sum(weights*y), np.sum(weights*y*x)])
            A = np.array([[np.sum(weights), np.sum(weights*x)],
                          [np.sum(weights*x), np.sum(weights*x*x)]])
            beta = linalg.solve(A, b)
            yest[i] = beta[0] + beta[1]*x[i]

        residuals = y - yest
        s = np.median(np.abs(residuals))
        delta = np.clip(residuals / (6.0 * s), -1, 1)
        delta = (1 - delta**2)**2

    h = np.zeros(n)
    for x_idx, x_val in enumerate(x):
        r2 = np.array([v*v for i, v in enumerate(residuals) if x[i] == x_val])
        n = len(r2)
        se = sqrt(mean(r2)) / sqrt(len(r2))
        h[x_idx] = se * t._ppf((1+confidence)/2., n-1)

    return yest, yest-h, yest+h
Пример #6
0
def probability(tree, instance, attr, val):
    """
    Returns the probability of a particular value of an attribute in the
    instance. One of the scoring functions for incremental_evaluation.

    If the instance currently contains the target attribute a shallow copy is
    created to allow the attribute to be predicted.

    .. warning:: This is an older function in the library and we are not quite
        sure how to set it up for component values under the new
        representation and so for the time being it will raise an Exception if
        it encounts a component.

    :param tree: A category tree to evaluate.
    :type tree: :class:`CobwebTree <concept_formation.cobweb.CobwebTree>`,
        :class:`Cobweb3Tree <concept_formation.cobweb3.Cobweb3Tree>`, or
        :class:`TrestleTree <concept_formation.trestle.TrestleTree>`
    :param instance: An instance to use query the tree with
    :type instance: {a1:v1, a2:v2, ...}
    :param attr: A target instance attribute to evaluate probability on
    :type attr: :ref:`Attribute<attributes>`
    :param val: The target value of the given attr
    :type val: A :ref:`Nominal<val-nom>` or :ref:`Numeric<val-num>` value.
    :returns: The probabily of the given instance attribute value in the given
        tree
    :rtype: float
    """
    if attr in instance:
        instance = {a: instance[a] for a in instance if not a == attr}
    concept = tree.categorize(instance)

    if isinstance(val, dict):
        raise Exception(
            "Probability cannot be estimated on component attributes!")
        structure_mapper = StructureMapper(concept)
        temp_instance = structure_mapper.transform(instance)
        mapping = structure_mapper.get_mapping()

        # temp_instance = flatten_json(instance)
        # mapping = flat_match(concept, temp_instance)
        # temp_instance = rename_flat(temp_instance, mapping)

        probs = [concept.probability(sub_attr, temp_instance[sub_attr])
                 for sub_attr in temp_instance
                 if search(r'^' + mapping[attr], sub_attr)]
        return mean(probs)
    else:
        return concept.probability(attr, val)
Пример #7
0
def test_cv_mean():
    for i in range(10):
        values = [random.normalvariate(0, 1) for i in range(100)]
        cv = ContinuousValue()
        cv.update_batch(values)
        assert cv.mean - utils.mean(values) < 0.00000000001
Пример #8
0
 def test_cv_mean(self):
     for i in range(10):
         values = [random.normalvariate(0, 1) for i in range(100)]
         cv = ContinuousValue()
         cv.update_batch(values)
         assert cv.mean - utils.mean(values) < 0.00000000001