def check_sample_int_distribution(sample_without_replacement): # This test is heavily inspired from test_random.py of python-core. # # For the entire allowable range of 0 <= k <= N, validate that # sample generates all possible permutations n_population = 10 # a large number of trials prevents false negatives without slowing normal # case n_trials = 10000 for n_samples in range(n_population): # Counting the number of combinations is not as good as counting the # the number of permutations. However, it works with sampling algorithm # that does not provide a random permutation of the subset of integer. n_expected = combinations(n_population, n_samples, exact=True) output = {} for i in range(n_trials): output[frozenset(sample_without_replacement(n_population, n_samples))] = None if len(output) == n_expected: break else: raise AssertionError( "number of combinations != number of expected (%s != %s)" % (len(output), n_expected))
def check_sample_int_distribution(sample_without_replacement): # This test is heavily inspired from test_random.py of python-core. # # For the entire allowable range of 0 <= k <= N, validate that # sample generates all possible permutations n_population = 10 # a large number of trials prevents false negatives without slowing normal # case n_trials = 10000 for n_samples in range(n_population): # Counting the number of combinations is not as good as counting the # the number of permutations. However, it works with sampling algorithm # that does not provide a random permutation of the subset of integer. n_expected = combinations(n_population, n_samples, exact=True) output = {} for i in range(n_trials): output[frozenset( sample_without_replacement(n_population, n_samples))] = None if len(output) == n_expected: break else: raise AssertionError( "number of combinations != number of expected (%s != %s)" % (len(output), n_expected))
def iterate_minibatches(inputs, targets, batchsize, shuffle=False): assert len(inputs) == len(targets) if shuffle: c = list(zip(inputs, targets)) random.shuffle(c) inputs, targets = zip(*c) for i in xrange( len(inputs) / batchsize): # cut off the elements that don't fit by rounding down start, end = i * batchsize, (i + 1) * batchsize targs = np.array(targets[start:end]) n = len(targs) if not (LAMBDA1 == LAMBDA2 == 0.0): targs = targs.reshape((n, 1)) bool_mat = targs == targs.T # pairwise class equality boolean matrix, nxn bool_mat *= ( targs != SINGLETON_CLASS ) # make singletons != to each other (since otherwise they have same class) np.fill_diagonal( bool_mat, True ) # above will make the diagonal False for singletons, so reset to True total_same_class_pairs = (bool_mat.sum() - n) / 2. total_diff_class_pairs = combinations(n, 2) - total_same_class_pairs if total_same_class_pairs == 0: print 'No pairs of same class!' if total_diff_class_pairs == 0: print 'No pairs from different classes!' # need these to do proper summations over the distances same_clust_mat = bool_mat * (1.0 / total_same_class_pairs) diff_clust_mat = (bool_mat == False) * (1.0 / total_diff_class_pairs) np.fill_diagonal(same_clust_mat, 0) np.fill_diagonal(diff_clust_mat, 0) same_clust_mat = np.triu(same_clust_mat) diff_clust_mat = np.triu(diff_clust_mat) else: same_clust_mat = np.zeros((n, n)) diff_clust_mat = np.zeros((n, n)) targs = targs.reshape(n) yield inputs[start:end], tuple(targs), same_clust_mat, diff_clust_mat
def iterate_minibatches(inputs, targets, batchsize, shuffle=False): assert len(inputs) == len(targets) if shuffle: c = list(zip(inputs, targets)) random.shuffle(c) inputs, targets = zip(*c) for i in xrange(len(inputs)/batchsize): # cut off the elements that don't fit by rounding down start, end = i*batchsize, (i+1)*batchsize targs = np.array(targets[start:end]) n = len(targs) if not (LAMBDA1 == LAMBDA2 == 0.0): targs = targs.reshape((n,1)) bool_mat = targs == targs.T # pairwise class equality boolean matrix, nxn bool_mat *= (targs != SINGLETON_CLASS) # make singletons != to each other (since otherwise they have same class) np.fill_diagonal(bool_mat, True) # above will make the diagonal False for singletons, so reset to True total_same_class_pairs = (bool_mat.sum() - n)/2. total_diff_class_pairs = combinations(n, 2) - total_same_class_pairs if total_same_class_pairs == 0: print 'No pairs of same class!' if total_diff_class_pairs == 0: print 'No pairs from different classes!' # need these to do proper summations over the distances same_clust_mat = bool_mat*(1.0/total_same_class_pairs) diff_clust_mat = (bool_mat==False)*(1.0/total_diff_class_pairs) np.fill_diagonal(same_clust_mat, 0) np.fill_diagonal(diff_clust_mat, 0) same_clust_mat = np.triu(same_clust_mat) diff_clust_mat = np.triu(diff_clust_mat) else: same_clust_mat = np.zeros((n,n)) diff_clust_mat = np.zeros((n,n)) targs = targs.reshape(n) yield inputs[start:end], tuple(targs), same_clust_mat, diff_clust_mat