'lambdaS': lambdaS, 'lambdaG': lambdaG } init_S = 'random' init_FG = 'kmeans' metrics = ['MSE', 'R^2', 'Rp'] # Load in data R_true = numpy.loadtxt(input_folder + "R_true.txt") # For each noise ratio, generate mask matrices for each attempt M_attempts = 100 all_Ms = [[ try_generate_M(I, J, fraction_unknown, M_attempts) for r in range(0, repeats) ] for noise in noise_ratios] all_Ms_test = [[calc_inverse_M(M) for M in Ms] for Ms in all_Ms] # Make sure each M has no empty rows or columns def check_empty_rows_columns(M, fraction): sums_columns = M.sum(axis=0) sums_rows = M.sum(axis=1) for i, c in enumerate(sums_rows): assert c != 0, "Fully unobserved row in M, row %s. Fraction %s." % ( i, fraction) for j, c in enumerate(sums_columns): assert c != 0, "Fully unobserved column in M, column %s. Fraction %s." % ( j, fraction)
init_FG = 'kmeans' minimum_TN = 0.1 metrics = ['MSE', 'R^2', 'Rp'] # Load in data R = numpy.loadtxt(input_folder+"R.txt") # Seed all of the methods the same numpy.random.seed(3) # Generate matrices M - one list of M's for each fraction M_attempts = 100 all_Ms = [ [try_generate_M(I,J,fraction,M_attempts) for r in range(0,repeats)] for fraction in fractions_unknown ] all_Ms_test = [ [calc_inverse_M(M) for M in Ms] for Ms in all_Ms ] # Make sure each M has no empty rows or columns def check_empty_rows_columns(M,fraction): sums_columns = M.sum(axis=0) sums_rows = M.sum(axis=1) for i,c in enumerate(sums_rows): assert c != 0, "Fully unobserved row in M, row %s. Fraction %s." % (i,fraction) for j,c in enumerate(sums_columns): assert c != 0, "Fully unobserved column in M, column %s. Fraction %s." % (j,fraction) for Ms,fraction in zip(all_Ms,fractions_unknown): for M in Ms:
alpha, beta = 100., 1. #1., 1. tau = alpha / beta lambdaF = numpy.ones((I,true_K)) lambdaS = numpy.ones((true_K,true_L)) lambdaG = numpy.ones((J,true_L)) classifier = bnmtf_gibbs_optimised initFG = 'kmeans' initS = 'random' search_metric = 'AIC' # Generate data (_,_,_,_,_,R) = generate_dataset(I,J,true_K,true_L,lambdaF,lambdaS,lambdaG,tau) M = try_generate_M(I,J,fraction_unknown,attempts_M) # Run the line search. The priors lambdaU and lambdaV need to be a single value (recall K is unknown) priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF[0,0], 'lambdaS':lambdaS[0,0], 'lambdaG':lambdaG[0,0] } greedy_search = GreedySearch(classifier,values_K,values_L,R,M,priors,initS,initFG,iterations,restarts) greedy_search.search(search_metric,burn_in,thinning) # Plot the performances of all three metrics for metric in ['loglikelihood', 'BIC', 'AIC', 'MSE']: # Make three lists of indices X,Y,Z (K,L,metric) KLvalues = numpy.array(greedy_search.all_values(metric)) (list_values_K,list_values_L,values) = zip(*KLvalues) # Set up a regular grid of interpolation points Ki, Li = (numpy.linspace(min(list_values_K), max(list_values_K), 100), numpy.linspace(min(list_values_L), max(list_values_L), 100))
init_S = 'random' init_FG = 'kmeans' metrics = ['MSE', 'R^2', 'Rp'] # Load in data R = numpy.loadtxt(input_folder + "R.txt") # Seed all of the methods the same numpy.random.seed(3) # Generate matrices M - one list of M's for each fraction M_attempts = 100 all_Ms = [[ try_generate_M(I, J, fraction, M_attempts) for r in range(0, repeats) ] for fraction in fractions_unknown] all_Ms_test = [[calc_inverse_M(M) for M in Ms] for Ms in all_Ms] # Make sure each M has no empty rows or columns def check_empty_rows_columns(M, fraction): sums_columns = M.sum(axis=0) sums_rows = M.sum(axis=1) for i, c in enumerate(sums_rows): assert c != 0, "Fully unobserved row in M, row %s. Fraction %s." % ( i, fraction) for j, c in enumerate(sums_columns): assert c != 0, "Fully unobserved column in M, column %s. Fraction %s." % ( j, fraction)
attempts_M = 100 alpha, beta = 100., 1. #1., 1. tau = alpha / beta lambdaF = numpy.ones((I, true_K)) lambdaS = numpy.ones((true_K, true_L)) lambdaG = numpy.ones((J, true_L)) classifier = bnmtf_gibbs_optimised initFG = 'kmeans' initS = 'random' # Generate data (_, _, _, _, _, R) = generate_dataset(I, J, true_K, true_L, lambdaF, lambdaS, lambdaG, tau) M = try_generate_M(I, J, fraction_unknown, attempts_M) # Run the line search. The priors lambdaF,S,G need to be a single value (recall K,L is unknown) priors = { 'alpha': alpha, 'beta': beta, 'lambdaF': lambdaF[0, 0], 'lambdaS': lambdaS[0, 0], 'lambdaG': lambdaG[0, 0] } grid_search = GridSearch(classifier, values_K, values_L, R, M, priors, initS, initFG, iterations, restarts) grid_search.search(burn_in, thinning) # Plot the performances of all three metrics for metric in ['loglikelihood', 'BIC', 'AIC', 'MSE']: