Пример #1
0
def weighted_replicate(seq, weights, n):
    """
    Return n selections from seq, with the count of each element of
    seq proportional to the corresponding weight (filling in fractions
    randomly).
    >>> weighted_replicate('ABC', [1, 2, 1], 4)
    ['A', 'B', 'B', 'C']
    """
    assert len(seq) == len(weights)
    weights = normalize(weights)
    wholes = [int(w * n) for w in weights]
    fractions = [(w * n) % 1 for w in weights]
    return (flatten([x] * nx for x, nx in zip(seq, wholes)) +
            weighted_sample_with_replacement(n - sum(wholes), seq, fractions))
Пример #2
0
    def train(dataset):
        examples, target = dataset.examples, dataset.target
        N = len(examples)
        epsilon = 1 / (2 * N)
        w = [1 / N] * N
        h, z = [], []
        for k in range(K):
            h_k = L(dataset, w)
            h.append(h_k)
            error = sum(weight for example, weight in zip(examples, w)
                        if example[target] != h_k(example))

            # Avoid divide-by-0 from either 0% or 100% error rates:
            error = clip(error, epsilon, 1 - epsilon)
            for j, example in enumerate(examples):
                if example[target] == h_k(example):
                    w[j] *= error / (1 - error)
            w = normalize(w)
            z.append(math.log((1 - error) / error))
        return WeightedMajority(h, z)
Пример #3
0
def ada_boost(dataset, L, K):
    """[Figure 18.34]"""

    examples, target = dataset.examples, dataset.target
    n = len(examples)
    eps = 1 / (2 * n)
    w = [1 / n] * n
    h, z = [], []
    for k in range(K):
        h_k = L(dataset, w)
        h.append(h_k)
        error = sum(weight for example, weight in zip(examples, w)
                    if example[target] != h_k(example))
        # avoid divide-by-0 from either 0% or 100% error rates
        error = clip(error, eps, 1 - eps)
        for j, example in enumerate(examples):
            if example[target] == h_k(example):
                w[j] *= error / (1 - error)
        w = normalize(w)
        z.append(math.log((1 - error) / error))
    return weighted_majority(h, z)
Пример #4
0
def information_content(values):
    """Number of bits to represent the probability distribution in values."""
    probabilities = normalize(removeall(0, values))
    return sum(-p * math.log2(p) for p in probabilities)