def _train(self, data, params, verbose): import linearcorex if verbose: print("Training {} ...".format(self.name)) start_time = time.time() c = linearcorex.Corex(n_hidden=params['n_hidden'], max_iter=params['max_iter'], anneal=params['anneal']) c.fit(data) clusters = c.mis.argmax(axis=0) finish_time = time.time() if verbose: print("\tElapsed time {:.1f}s".format(finish_time - start_time)) return clusters
def _train(self, train_data, params, verbose): import linearcorex if verbose: print("Training {} ...".format(self.name)) start_time = time.time() covs = [] for x in train_data: c = linearcorex.Corex(n_hidden=params['n_hidden'], max_iter=params['max_iter'], anneal=params['anneal']) c.fit(x) covs.append(c.get_covariance()) finish_time = time.time() if verbose: print("\tElapsed time {:.1f}s".format(finish_time - start_time)) return covs, None
def test_corex(): r""" Test pytorch linear CorEx implementation. Check if the performance of pytorch CorEx matches that of standard CorEx. """ print("=" * 100) print("Testing PyTorch Linear CorEx ...") # load data resources = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'resources') data_file = os.path.join(resources, 'test_corex_data.npy') data = np.load(data_file) print("Data is loaded, shape = {}".format(data.shape)) # train linear corex lc_scores = [] for i in tqdm(range(5)): X = data[32 * i:32 * (i + 1)] lc = linearcorex.Corex(n_hidden=8, max_iter=500, verbose=0) lc.fit(X) covs = lc.get_covariance() cur_score = calculate_nll_score(data=[X], covs=[covs]) lc_scores.append(cur_score) # train pytorch corex pylc_scores = [] for i in tqdm(range(5)): X = data[32 * i:32 * (i + 1)] lc = Corex(nv=128, n_hidden=8, max_iter=1000, verbose=0) lc.fit(X) covs = lc.get_covariance() cur_score = calculate_nll_score(data=[X], covs=[covs]) pylc_scores.append(cur_score) lc_mean = np.mean(lc_scores) pylc_mean = np.mean(pylc_scores) print("pylc score: {:.4f}, lc score: {:.4f}".format(pylc_mean, lc_mean)) assert (pylc_mean - lc_mean) / (np.abs(lc_mean) + 1e-6) < 0.01
if verbose: print '\nData summary: X has %d rows and %d columns' % X.shape print 'Variable names are: ' + ','.join(map(str, list(enumerate(variable_names)))) # Run CorEx on data if verbose: print 'Getting CorEx results' if not options.regraph: for l, layer in enumerate(layers): if verbose: print "Layer ", l if l == 0: t0 = time() corexes = [lc.Corex(n_hidden=layer, verbose=verbose, gaussianize=options.gaussianize, missing_values=options.missing, eliminate_synergy=options.additive, gpu=options.gpu, max_iter=options.max_iter).fit(X)] print 'Time for first layer: %0.2f' % (time() - t0) X_prev = X else: X_prev = corexes[-1].transform(X_prev) corexes.append(lc.Corex(n_hidden=layer, verbose=verbose, gaussianize=options.gaussianize, gpu=options.gpu, eliminate_synergy=options.additive, max_iter=options.max_iter).fit(X_prev)) for l, corex in enumerate(corexes): # The learned model can be loaded again using ce.Corex().load(filename) print 'TC at layer %d is: %0.3f' % (l, corex.tc) cPickle.dump(corex, safe_open(options.output + '/layer_' + str(l) + '.dat', 'w')) else: corexes = [cPickle.load(open(options.output + '/layer_' + str(l) + '.dat')) for l in range(len(layers))]
import linearcorex as lc import numpy as np print('\nInput Matrix\n==================') # A A A A A iA C C A C X = np.array([[0.01, 0.01, 0.01, 0.01, 0.01, 1.00, 1.00, 1.00, 0.01, 1.00], [0.01, 0.01, 0.01, 0.01, 0.01, 1.00, 0.00, 0.00, 0.01, 0.00], [1.00, 1.00, 1.00, 1.00, 1.00, 0.01, 0.00, 0.00, 1.00, 0.00], [1.00, 1.00, 1.00, 1.00, 1.00, 0.01, 1.00, 1.00, 1.00, 1.00], [1.00, 1.00, 1.00, 1.00, 1.00, 0.01, 1.00, 1.00, 1.00, 1.00]]) print('%s' % str(X)) print('\nFitting...\n==================') out = lc.Corex(n_hidden=2, max_iter=1000, verbose=True) out.fit(X) print('\nClusters\n==================') print(out.clusters()) print('\nCovariance\n==================') print(out.get_covariance()) print('\nTCS\n==================') print(out.tcs) print('\nTC\n==================') print(out.tc) print('\nPrediction\n==================') sample = np.array([[1., 1., 1., 1., 1., 0., 1., 1., 1., 1.]]) p, log_z = out.transform(sample, details=True)
','.join(map(str, list(enumerate(variable_names))))) # Run CorEx on data if verbose: print('Getting CorEx results') if not options.regraph: for l, layer in enumerate(layers): if verbose: print("Layer ", l) if l == 0: t0 = time() corexes = [ lc.Corex(n_hidden=layer, verbose=verbose, gaussianize=options.gaussianize, missing_values=options.missing, discourage_overlap=options.additive, gpu=options.gpu, max_iter=options.max_iter).fit(X) ] print('Time for first layer: %0.2f' % (time() - t0)) X_prev = X else: X_prev = corexes[-1].transform(X_prev) corexes.append( lc.Corex(n_hidden=layer, verbose=verbose, gaussianize=options.gaussianize, gpu=options.gpu, discourage_overlap=options.additive, max_iter=options.max_iter).fit(X_prev))
previous_latent_factors = input_matrix latent_factors = START_NUMBER_FACTORS while latent_factors <= END_NUMBER_FACTORS: print('========= LATENT FACTOR %d =========' % latent_factors) best = { 'clusters': [], 'tcs': [], 'tc': 0, } for repetition in range(0, REPETITIONS): print('Executing with %d latent factors, repetition %d...' % (latent_factors, repetition)) print('Fitting...') out = lc.Corex(n_hidden=latent_factors, max_iter=10000, verbose=True) fit = out.fit(previous_latent_factors) transform = out.transform(previous_latent_factors, details=True) clusters = out.clusters() tcs = out.tcs tc = out.tc print('Clusters:\n%s' % str(clusters)) print('TCS:\n%s' % str(tcs)) print('TC:\n%.4f' % tc) if tc > best['tc']: best['clusters'] = clusters best['tcs'] = tcs best['tc'] = tc best['fit'] = fit
def main(): parser = argparse.ArgumentParser() parser.add_argument('--n_hidden', '-m', default=64, type=int, help='number of hidden variables') parser.add_argument('--n_observed', '-p', default=128, type=int, help='number of observed variables') parser.add_argument('--snr', '-s', default=0.1, type=float, help='signal-to-noise ratio') parser.add_argument('--n_samples', '-n', default=300, type=int, help='number of samples') parser.add_argument('--num_extra_parents', default=0.1, type=float, help='average number of extra parents for each x_i') parser.add_argument( '--num_correlated_zs', default=0, type=int, help='number of zs each z_i is correlated with (besides z_i itself)') parser.add_argument('--random_scale', dest='random_scale', action='store_true', help='if true x_i will have random scales') parser.add_argument('--method', '-M', type=str, choices=['linearcorex', 'pycorex'], default='pycorex', help='which implementation of corex to use') parser.add_argument('--device', '-d', type=str, default='cpu', help='which device to use for pytorch corex') parser.add_argument('--output_dir', '-o', type=str, default='outputs/blessing/') parser.set_defaults(random_scale=False) args = parser.parse_args() print(args) p = args.n_observed m = args.n_hidden snr = args.snr n = args.n_samples assert p % m == 0 # generate some data data, _ = data_tools.generate_approximately_modular( nv=p, m=m, ns=n, snr=snr, num_extra_parents=args.num_extra_parents, num_correlated_zs=args.num_correlated_zs, random_scale=args.random_scale) # select the method if args.method == 'linearcorex': method = linearcorex.Corex(n_hidden=m, verbose=1) else: method = PyCorex(nv=p, n_hidden=m, verbose=2, max_iter=10000, tol=1e-6, device=args.device, optimizer_class=torch.optim.Adam, optimizer_params={'lr': 0.01}) # train and compute the clustering score true_clusters = np.arange(m).repeat(p // m, axis=0) method.fit(data) if args.method == 'linearcorex': pred_clusters = method.mis.argmax(axis=0) else: pred_clusters = method.clusters() score = adjusted_rand_score(labels_true=true_clusters, labels_pred=pred_clusters) print(pred_clusters, score) # save the results run_id = str(np.random.choice(10**18)) save_dict = { 'p': p, 'm': m, 'snr': snr, 'n': n, 'num_extra_parents': args.num_extra_parents, 'num_correlated_zs': args.num_correlated_zs, 'random_scale': args.random_scale, 'method': args.method, 'score': score, 'run_id': run_id } output_file = os.path.join(args.output_dir, run_id + '.pkl') make_sure_path_exists(output_file) with open(output_file, 'wb') as fout: pickle.dump(save_dict, fout)
def __perform_corex(self): y = lc.Corex(n_hidden=2).fit_transform(self.embedding) y = MinMaxScaler().fit_transform(y).transpose() return y