def do_run(): print('Random\n') dis.pairwise(dat.matrix) ndis.vecpair(dat.matrix) cov.correl(dat.matrix) ncov.veccor(dat.matrix) entrop(dat.matrix) ''' print('\nIris\n')
def test_algorithms(): n = 10 delta = 0.1 pmodel = pairwise(n) k = 1 # generated a BTL model with probabilites close to one and zero pmodel.generate_deterministic_BTL([k * i / float(n) for i in range(n)]) print("largest entry: ", amax(pmodel.P)) print("model complexity: ", pmodel.top1H()) kset = [1, n] ar = ARalg(pmodel, kset) ar.rank(0.1) print('AR, # Comparisons:', ar.pairwise.ctr) print('..succeeded' if ar.evaluate_perfect_recovery() else '..failed') plpac = PLPAC(pmodel) plpac.rank(delta) print('PLPAC, # Comparisons:', plpac.pairwise.ctr) print('..succeeded' if plpac.evaluate_perfect_recovery() else '..failed') alg = topkalg(pmodel, 1) alg.rank() print('AR2, #comparisons:', alg.pairwise.ctr) print('..succeeded' if alg.evaluate_perfect_recovery() else '..failed') btm = BTM(pmodel) btm.rank(delta) print('BTM, #comparisons:', btm.pairwise.ctr) print('..succeeded' if btm.evaluate_perfect_recovery() else '..failed')
def reproduce_figure_selection_confidence_interval(): n = 5 pmodel = pairwise(n) pmodel.generate_const(0.1) k = 2 rule = 4 alg = topkalg(pmodel, k, rule) varydelta(alg, "./dat/cmprules3")
def get_points(arrangement, table): arrangement = list(arrangement) arrangement.append(arrangement[0]) points = 0 for a, b in pairwise(arrangement): points += get_points_for_a_next_to_b(a, b, table) return points
def exp_fig_varyn(ns, const=5, nit=300, std=0): k = 1 rule = 7 #ns = [10,15,20,25,30,35,40] models = [pairwise(n) for n in ns] for pmodel in models: pmodel.generate_deterministic_BTL( [const * i / float(pmodel.n**1.1) for i in range(pmodel.n)]) pmodel.uniform_perturb(std) print(amax(pmodel.P)) print(pmodel.scores()) pmodel = pairwise(2) alg = topkalg(pmodel, k, rule) plpac = PLPAC(pmodel) btm = BTM(pmodel) algorithms = [alg, plpac, btm] result = compare_models(models, algorithms, nit, False) ar = result[:, 3] plpac = result[:, 5] btmb = result[:, 7] return result
def exp_fig6(): #n = 5 n = 10 delta = 0.1 #nit = 500 nit = 10 #kset = range(0,n*(n-1)/2+1) kset = range(0, 15) result = zeros((len(kset), 12)) for k in kset: sampcomp = [[], [], []] successp = [[], [], []] for it in range(nit): # for each instance, generate a random model... pmodel = pairwise(n) pmodel.generate_deterministic_BTL([log(1 + i) for i in range(n)]) oncemore = True while pmodel.top1H() > 250000 or oncemore: oncemore = False # find off diagonals offdiags = [] for i in range(0, n): offdiags += [(i, j) for j in range(i + 1, n)] offdiags = random.permutation(offdiags) for (i, j) in offdiags[:k]: pmodel.P[i, j] = 0.5 * random.rand() pmodel.P[j, i] = 1 - pmodel.P[i, j] pmodel.sortP() # to make sure 0 is the top item print(pmodel.top1H()) alg = topkalg(pmodel, 1) plpac = PLPAC(pmodel, pmodel.top1H() * 50) btm = BTM(pmodel, pmodel.top1H() * 50) for nalg, alg in enumerate([alg, plpac, btm]): alg.rank(delta) sampcomp[nalg].append(alg.pairwise.ctr / pmodel.top1H()) successp[nalg].append(alg.evaluate_perfect_recovery()) print(nalg, alg.pairwise.ctr / pmodel.top1H()) print( nalg, 'succeeded' if alg.evaluate_perfect_recovery() else 'failed') result[k, 0] = k / float(n * (n - 1) / 2) for nalg in range(3): result[k, 3 + 2 * nalg] = mean(sampcomp[nalg]) result[k, 3 + 2 * nalg + 1] = sqrt(var(sampcomp[nalg])) result[k, 9 + nalg] = 1 - mean(successp[nalg]) savetxt("./fig/generalization_n10.dat", result, delimiter='\t')
def exp_revision(relative=False): n = 10 #nit = 400 nit = 50 ks = range(1, 130, 10) models = [pairwise(n) for i in ks] for pmodel, k in zip(models, ks): pmodel.generate_deterministic_BTL( [log(0.09 * k + i) for i in range(n)]) alg = topkalg(pmodel, 1, 7) # original AR algorithm savage = topkalg(pmodel, 1, 2) # SAVAGE algorithm from Urvoy et al. 2013 algorithms = [alg, savage] result = compare_models(models, algorithms, nit, relative) savetxt("./dat/comparison_vary_closeness_linsep_rev.dat", result, delimiter='\t')
def exp_fig4b(relative=False): n = 10 nit = 200 kset = range(1, n) models = [pairwise(n) for k in kset] for pmodel, k in zip(models, kset): pmodel.generate_deterministic_BTL( [0.6 * k * i / float(n) for i in range(n)]) k = 1 rule = 7 alg = topkalg(pmodel, k, rule) plpac = PLPAC(pmodel) btm = BTM(pmodel) algorithms = [alg, plpac, btm] result = compare_models(models, algorithms, nit, relative) savetxt("./fig/comparison_vary_closeness_extreme.dat", result, delimiter='\t')
def exp_fig4a(relative=False): n = 10 #nit = 400 nit = 200 ks = range(1, 130, 10) #ks = [1] models = [pairwise(n) for i in ks] for pmodel, k in zip(models, ks): pmodel.generate_deterministic_BTL( [log(0.09 * k + i) for i in range(n)]) k = 1 rule = 7 alg = topkalg(pmodel, k, rule) plpac = PLPAC(pmodel) btm = BTM(pmodel) #alg2 = topkalg(pmodel,6) #algorithms = [alg,plpac,btm,alg2] algorithms = [alg, plpac, btm] result = compare_models(models, algorithms, nit, relative) savetxt("./fig/comparison_vary_closeness_linsep.dat", result, delimiter='\t')
import EntropyComputation as entr import datums as dat import pairwise as dis import pairwiseVectorization as ndis import covariance as cov import CorrelationVectorized as ncov print('Random\n') dis.pairwise(dat.matrix) ndis.vecpair(dat.matrix) cov.correl(dat.matrix) ncov.veccor(dat.matrix) # print('\nIris\n') # dis.pairwise(dat.iris) # ndis.vecpair(dat.iris) # cov.correl(dat.iris) # ncov.veccor(dat.iris) # print('\nBreast Cancer\n') # dis.pairwise(dat.breast_cancer) # ndis.vecpair(dat.breast_cancer) # cov.correl(dat.breast_cancer) # ncov.veccor(dat.breast_cancer) # print('\nDigits\n') # dis.pairwise(dat.digits) # ndis.vecpair(dat.digits) # cov.correl(dat.digits) # ncov.veccor(dat.digits)
p = PorterStemmer() index = {} count_term = {} start_time = time.time() with open(stopwords_file, "r") as file: stopwords = map(lambda line: line.strip(), file.readlines()) with open(index_file, "r") as index_file: lines = index_file.readlines() for line in lines: entry = line.split(" ") documents = entry[2:] dictionary = defaultdict(int) for document, count in pairwise(documents): dictionary[document] = int(count) count_term.update({entry[0]: int(entry[1])}) index.update({entry[0]: dictionary}) with open(lengths_file, "r") as lengths: documents_lengths = map(lambda line: line.strip(), lengths.readlines()) documents_lengths = [int(length) for length in documents_lengths] corpus_length = documents_lengths[len(documents_lengths) - 1] del documents_lengths[len(documents_lengths) - 1] for term in query: term = term.translate(None, string.punctuation) if term not in stopwords: inner_query.append(p.stem(term.lower(), 0, len(term) - 1))
def entrop(X): print('starting running .....') np.random.seed(100) params = range(10, 141, 10) # different param setting nparams = len(params) # number of different parameters perf_loop = np.zeros([10, nparams ]) # 10 trials = 10 rows, each parameter is a column perf_cool = np.zeros([10, nparams]) counter = 0 for ncols in X: nrows = len(X[0]) print("matrix dimensions: ", nrows, ncols) for i in range(10): #X = np.random.randint(0,20,[nrows,ncols]) # random matrix # you need to use random.rand(...) for float matrix st = time.time() entropy_dist = dis.pairwise(X) et = time.time() perf_loop[i, counter] = et - st # time difference st = time.time() entropy_npdist = ndis.vecpair(X) et = time.time() perf_cool[i, counter] = et - st assert np.isclose(entropy_loop, entropy_cool, atol=1e-06) counter = counter + 1 mean_loop = np.mean( perf_loop, axis=0) # mean time for each parameter setting (over 10 trials) mean_cool = np.mean(perf_cool, axis=0) std_loop = np.std(perf_loop, axis=0) # standard deviation std_cool = np.std(perf_cool, axis=0) import matplotlib.pyplot as plt plt.errorbar(params, mean_loop[0:nparams], yerr=std_loop[0:nparams], color='red', label='Loop Solution') plt.errorbar(params, mean_cool[0:nparams], yerr=std_cool[0:nparams], color='blue', label='Matrix Solution') plt.xlabel('Number of Cols of the Matrix') plt.ylabel('Running Time (Seconds)') plt.legend() plt.savefig('CompareEntropyFig.pdf') # plt.show() # uncomment this if you want to see it right way print("result is written to CompareEntropyFig.pdf")