Пример #1
0
def check_sample_int_distribution(sample_without_replacement):
    # This test is heavily inspired from test_random.py of python-core.
    #
    # For the entire allowable range of 0 <= k <= N, validate that
    # sample generates all possible permutations
    n_population = 10

    # a large number of trials prevents false negatives without slowing normal
    # case
    n_trials = 10000

    for n_samples in range(n_population):
        # Counting the number of combinations is not as good as counting the
        # the number of permutations. However, it works with sampling algorithm
        # that does not provide a random permutation of the subset of integer.
        n_expected = combinations(n_population, n_samples, exact=True)

        output = {}
        for i in range(n_trials):
            output[frozenset(sample_without_replacement(n_population,
                                                        n_samples))] = None

            if len(output) == n_expected:
                break
        else:
            raise AssertionError(
                "number of combinations != number of expected (%s != %s)" %
                (len(output), n_expected))
Пример #2
0
def check_sample_int_distribution(sample_without_replacement):
    # This test is heavily inspired from test_random.py of python-core.
    #
    # For the entire allowable range of 0 <= k <= N, validate that
    # sample generates all possible permutations
    n_population = 10

    # a large number of trials prevents false negatives without slowing normal
    # case
    n_trials = 10000

    for n_samples in range(n_population):
        # Counting the number of combinations is not as good as counting the
        # the number of permutations. However, it works with sampling algorithm
        # that does not provide a random permutation of the subset of integer.
        n_expected = combinations(n_population, n_samples, exact=True)

        output = {}
        for i in range(n_trials):
            output[frozenset(
                sample_without_replacement(n_population, n_samples))] = None

            if len(output) == n_expected:
                break
        else:
            raise AssertionError(
                "number of combinations != number of expected (%s != %s)" %
                (len(output), n_expected))
Пример #3
0
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        c = list(zip(inputs, targets))
        random.shuffle(c)
        inputs, targets = zip(*c)

    for i in xrange(
            len(inputs) /
            batchsize):  # cut off the elements that don't fit by rounding down
        start, end = i * batchsize, (i + 1) * batchsize
        targs = np.array(targets[start:end])
        n = len(targs)
        if not (LAMBDA1 == LAMBDA2 == 0.0):
            targs = targs.reshape((n, 1))
            bool_mat = targs == targs.T  # pairwise class equality boolean matrix, nxn
            bool_mat *= (
                targs != SINGLETON_CLASS
            )  # make singletons != to each other (since otherwise they have same class)
            np.fill_diagonal(
                bool_mat, True
            )  # above will make the diagonal False for singletons, so reset to True
            total_same_class_pairs = (bool_mat.sum() - n) / 2.
            total_diff_class_pairs = combinations(n,
                                                  2) - total_same_class_pairs

            if total_same_class_pairs == 0:
                print 'No pairs of same class!'
            if total_diff_class_pairs == 0:
                print 'No pairs from different classes!'

            # need these to do proper summations over the distances
            same_clust_mat = bool_mat * (1.0 / total_same_class_pairs)
            diff_clust_mat = (bool_mat
                              == False) * (1.0 / total_diff_class_pairs)
            np.fill_diagonal(same_clust_mat, 0)
            np.fill_diagonal(diff_clust_mat, 0)
            same_clust_mat = np.triu(same_clust_mat)
            diff_clust_mat = np.triu(diff_clust_mat)
        else:
            same_clust_mat = np.zeros((n, n))
            diff_clust_mat = np.zeros((n, n))

        targs = targs.reshape(n)
        yield inputs[start:end], tuple(targs), same_clust_mat, diff_clust_mat
Пример #4
0
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        c = list(zip(inputs, targets))
        random.shuffle(c)
        inputs, targets = zip(*c)

    for i in xrange(len(inputs)/batchsize): # cut off the elements that don't fit by rounding down
        start, end = i*batchsize, (i+1)*batchsize
        targs = np.array(targets[start:end])
        n = len(targs)
        if not (LAMBDA1 == LAMBDA2 == 0.0):
            targs = targs.reshape((n,1))
            bool_mat = targs == targs.T # pairwise class equality boolean matrix, nxn
            bool_mat *= (targs != SINGLETON_CLASS)  # make singletons != to each other (since otherwise they have same class)
            np.fill_diagonal(bool_mat, True) # above will make the diagonal False for singletons, so reset to True
            total_same_class_pairs = (bool_mat.sum() - n)/2.
            total_diff_class_pairs = combinations(n, 2) - total_same_class_pairs

            if total_same_class_pairs == 0:
                print 'No pairs of same class!'
            if total_diff_class_pairs == 0:
                print 'No pairs from different classes!'

            # need these to do proper summations over the distances
            same_clust_mat = bool_mat*(1.0/total_same_class_pairs)
            diff_clust_mat = (bool_mat==False)*(1.0/total_diff_class_pairs)
            np.fill_diagonal(same_clust_mat, 0)
            np.fill_diagonal(diff_clust_mat, 0)
            same_clust_mat = np.triu(same_clust_mat)
            diff_clust_mat = np.triu(diff_clust_mat)
        else:
            same_clust_mat = np.zeros((n,n))
            diff_clust_mat = np.zeros((n,n))

        targs = targs.reshape(n)
        yield inputs[start:end], tuple(targs), same_clust_mat, diff_clust_mat