def perform_experiment(( train_n_dw_matrix, test_n_dw_matrix, optimizer, T, samples, init_iters, output_path )): init_optimizer = default.Optimizer([regularizers.Trivial()] * init_iters) callback = experiments.default_callback( train_n_dw_matrix=train_n_dw_matrix, test_n_dw_matrix=test_n_dw_matrix ) init_optimizer.iteration_callback = callback optimizer.iteration_callback = callback for seed in range(samples): print(seed) plsa_phi, plsa_theta = experiments.default_sample( train_n_dw_matrix=train_n_dw_matrix, T=T, seed=seed, optimizer=init_optimizer, finish_launch=False, ) optimizer.run(train_n_dw_matrix, plsa_phi, plsa_theta) if optimizer.iteration_callback: optimizer.iteration_callback.finish_launch() optimizer.iteration_callback.save_results(output_path)
def perform_experiment(n_dw_matrix, optimizer, T, samples): optimizer.iteration_callback = experiments.default_callback( train_n_dw_matrix=n_dw_matrix, top_pmi_sizes=[5, 10, 20, 30], top_avg_jaccard_sizes=[10, 50, 100, 200], measure_time=True) for seed in range(samples): print(seed) experiments.default_sample(n_dw_matrix, T, seed, optimizer) print(timed_default.SimpleTimer.total_times)
def perform_experiment((train_n_dw_matrix, test_n_dw_matrix, optimizer, T, samples, output_path)): optimizer.iteration_callback = experiments.default_callback( train_n_dw_matrix=train_n_dw_matrix, test_n_dw_matrix=test_n_dw_matrix, top_pmi_sizes=[5, 10, 20, 30], top_avg_jaccard_sizes=[10, 50, 100, 200]) for seed in range(samples): print(seed) experiments.default_sample(train_n_dw_matrix, T, seed, optimizer) optimizer.iteration_callback.save_results(output_path)
def perform_iteration_dependency_experiment(( train_n_dw_matrix, test_n_dw_matrix, optimizer, T, samples, output_path )): optimizer.iteration_callback = experiments.default_callback( train_n_dw_matrix=train_n_dw_matrix, test_n_dw_matrix=test_n_dw_matrix, uniqueness_measures=True, iter_eval_step=5 ) for seed in range(samples): print(seed) experiments.default_sample(train_n_dw_matrix, T, seed, optimizer) optimizer.iteration_callback.save_results(output_path)
def perform_alpha_dependency_experiment(( train_n_dw_matrix, optimizer, T, samples, output_path )): callback = experiments.default_callback( train_n_dw_matrix=train_n_dw_matrix, uniqueness_measures=True ) callback.start_launch() for seed in range(samples): print(seed) callback(0, *experiments.default_sample( train_n_dw_matrix, T, seed, optimizer )) callback.finish_launch() callback.save_results(output_path)
def perform_experiment(train_n_dw_matrix, test_n_dw_matrix, optimizer, T, samples, output_path, tau, path_phi_output): optimizer.iteration_callback = experiments.default_callback( train_n_dw_matrix=train_n_dw_matrix, test_n_dw_matrix=test_n_dw_matrix, top_pmi_sizes=[5, 10, 20, 30], top_avg_jaccard_sizes=[10, 50, 100, 200], measure_time=False) for seed in range(samples): expphi, exptheta = experiments.default_sample(train_n_dw_matrix, T, seed, optimizer, tau) optimizer.iteration_callback.save_results(output_path) with open(path_phi_output, 'wb') as resource_file: pickle.dump(expphi, resource_file) return (expphi, exptheta)
def perform_ww_experiment((n_ww_matrix, optimizer, T, samples, output_dir)): optimizer.iteration_callback = Callback(n_ww_matrix) for seed in range(samples): print('Seed', seed) seed_callback = experiments.default_callback( train_n_dw_matrix=n_ww_matrix, top_avg_jaccard_sizes=[10, 50, 100, 200]) phi, theta, n_tw, n_dt = symmetric_sample(n_ww_matrix, T, seed, optimizer) seed_callback.start_launch() seed_callback(0, phi, theta) seed_callback.finish_launch() result = dict(phi=phi, theta=theta, n_tw=n_tw, n_dt=n_dt) result['properties'] = { key: value[0][0] for key, value in seed_callback.result.items() } callbacks.save_results( result, os.path.join(output_dir, 'seed_{}.pkl'.format(seed)))
def perform_experiment(train_n_dw_matrix, test_n_dw_matrix, T, num_2_token): train_corpus = [zip(row.indices, row.data) for row in train_n_dw_matrix] for seed in [42, 7, 777, 12]: model = LdaModel(train_corpus, alpha='auto', id2word=num_2_token, num_topics=T, iterations=500, random_state=seed) gensim_phi = exp_common.get_phi(model) gensim_theta = exp_common.get_theta(train_corpus, model) print('gensim perplexity') print(np.exp(-model.log_perplexity(train_corpus))) D, W = train_n_dw_matrix.shape random_gen = np.random.RandomState(seed) phi = common.get_prob_matrix_by_counters( random_gen.uniform(size=(T, W)).astype(np.float64)) theta = common.get_prob_matrix_by_counters( np.ones(shape=(D, T)).astype(np.float64)) phi, theta = default.Optimizer([regularizers.Additive(0.1, 0.)] * 100, verbose=False).run( train_n_dw_matrix, phi, theta) callback = experiments.default_callback( train_n_dw_matrix=train_n_dw_matrix, test_n_dw_matrix=test_n_dw_matrix, top_pmi_sizes=[5, 10, 20, 30], top_avg_jaccard_sizes=[10, 50, 100, 200], measure_time=True) callback.start_launch() callback(0, phi, theta) callback(1, gensim_phi, gensim_theta) print('artm') for name, values in callback.launch_result.items(): print('\t{}: {}'.format(name, values[0])) print('gensim') for name, values in callback.launch_result.items(): print('\t{}: {}'.format(name, values[1]))