def sample_filter(category1, category2, averaging, nrefits): if category2 > category1: return False return True def func(category1, category2, averaging, nrefits): start_time = time.time() np.random.seed(10 * category1 + category2) y_train1, y_train2 = get_categories(category1, category2) np.random.seed() htest = HTest(dataloader_workers=0, verbose=1, distribution="bernoulli", averaging=averaging) htest.fit(y_train1, y_train2, nrefits=nrefits) elapsed_time = time.time() - start_time return dict( pvalue=htest.pvalue, elapsed_time=elapsed_time, ) do_simulation_study(to_sample, func, db, ResultVAECIFARHTest, sample_filter=sample_filter)
if ncomparisons == 1 and averaging == "median": return False return True def func(distribution, no_instances, dissimilarity, ncomparisons, averaging): def data_gen(size, dim, mu): res = np.linspace(0.2, 0.9, dim) res = stats.lognorm.rvs(res, scale=2, size=(size, dim)) res -= stats.lognorm.rvs(0.5, scale=2, size=(size, 1)) res += stats.norm.rvs(loc=mu, scale=2, size=(size, 1)) return res start_time = time.time() y_train0 = data_gen(no_instances, 10, 0) y_train1 = data_gen(no_instances, 10, dissimilarity) htest = HTest(dataloader_workers=0, verbose=1, averaging=averaging) htest.fit(y_train0, y_train1, 10000, ncomparisons=ncomparisons) elapsed_time = time.time() - start_time return dict( pvalue=htest.pvalue, elapsed_time=elapsed_time, ) do_simulation_study(to_sample, func, db, ResultVAEHTest, max_count=200, sample_filter=sample_filter)
retrain_permutations = retrain_permutations, estimator = estimator, method = method, ) elif estimator == "rf": nn_obj = NNPTest( y_train = y_train, x_train = np.delete(x_train_n, feature_testedd, 1), x_to_permutate = x_train_n[:, feature_testedd], retrain_permutations = retrain_permutations, estimator = "rf", method = method, n_estimators = 300, ) elif estimator == "linear": nn_obj = NNPTest( y_train = y_train, x_train = np.delete(x_train_n, feature_testedd, 1), x_to_permutate = x_train_n[:, feature_testedd], retrain_permutations = retrain_permutations, estimator = "linear", method = method, ) return dict( pvalue=nn_obj.pvalue, elapsed_time=nn_obj.elapsed_time, ) do_simulation_study(to_sample, func, db, ResultRealData, max_count=1, sample_filter=sample_filter)
from cifar_compare_db_structure import ResultVAECIFARCompare, db from vaecompare import Compare from sstudy import do_simulation_study from utils import get_categories to_sample = dict( category1 = range(10), category2 = range(10), ) def sample_filter(category1, category2): if category2 > category1: return False return True def func(category1, category2): start_time = time.time() y_train1, y_train2 = get_categories(category1, category2) compare = Compare(dataloader_workers=0, verbose=2, distribution="bernoulli") compare.fit(y_train1, y_train2, 10000) elapsed_time = time.time() - start_time return dict( samples=pickle.dumps(compare.samples), elapsed_time=elapsed_time, ) do_simulation_study(to_sample, func, db, ResultVAECIFARCompare, sample_filter=sample_filter, max_count=90)