def test_data_size_vs_diff(dm, given_dict, infer_dict): #Read all data from data model dm.read_data(normalize_data=False) #attr_list = [U_UNIVERSITY_CODE, PROGRAM_CODE, UNIVERSITY, MAJOR_CODE, TERM] attr_list = [U_UNIVERSITY_CODE, PROGRAM_CODE, UNIVERSITY] #attr_list = [MAJOR_CODE, PROGRAM_CODE, TERM] #Size of data data_size = len(dm.data) #Step size = 10 steps step_size = data_size//10 #Get experiment data in a dict size = [] accuracy = [] for i in xrange(step_size, data_size, step_size): dm_test = DataModel("") dm_test.set_data(dm.data[:i]) exp_test = Experimenter(dm_test, attr_list) actual = exp_test.get_actual_result(given_dict, infer_dict) estimation = exp_test.generic_get_estimated_result(given_dict, infer_dict) size.append(i) accuracy.append(abs(estimation - actual)) print("Step:%d--->Actual:%f--->Estimate:%f" %(i, actual, estimation)) print "-------------------------------------------------------------" plt.figure() plt.plot(size, accuracy) plt.title("Data Size vs Accuracy") plt.show()
def main(args): dm = DataModel(args.gig_file, args.chat_file) dm.read_data() exp = Experimenter(dm) if args.classify is True: scores = exp.classify_gigs() if args.feature_values is True: scores = exp.evaluate_feature_values() return dm
def main(): debug = 0 sample_size = 8192 #database = MFPT(debug=debug) database = Paderborn(debug=debug) database_acq = database.load() #print(database_acq) database_exp = Experimenter(database_acq, sample_size) database_exp.perform(Classifiers(), Scoring())
def __init__(self, trials=3): self.exp = Experimenter() self.mongo_coll_conn = Mongo(collection_name='optimisation') self.trials = trials self.config = load_config() self.task_type = self.config['Utils']['task_type'] self.data_name = self.config['Utils'][self.task_type]['data_name']
def main(args): dm = DataModel(args.data_file) dm.read_data(to_read_count=10000) exp = Experimenter(dm, \ process_datamodel=True, \ serialise=False) t1 = time.time() exp.perform_multiclass_experiment( pred_mode=INDEPENDENT, use_exclusion=True, need_to_extract_features=True, prediction_file='../results/predictions_multiclass_independent_englishonly_legibleonly_wordunibigram_chartrigram_10000.csv', result_file='../results/results_multiclass_independent_englishonly_legibleonly_wordunibigram_chartrigram_10000.txt', english_only=True, legible_only=True) t2 = time.time() timeused = t2 - t1 logging.getLogger(LOGGER).info('Time used in experiment (hour:min:sec): %d:%d:%d' % \ (timeused/3600, timeused/60, timeused%60)) return exp
def main(args): dm = DataModel(args.data_file) dm.read_data(to_read_count=10000) exp = Experimenter(dm, \ process_datamodel=True, \ serialise=False) t1 = time.time() exp.perform_multiclass_experiment( pred_mode=INDEPENDENT, use_exclusion=True, need_to_extract_features=True, prediction_file= '../results/predictions_multiclass_independent_englishonly_legibleonly_wordunibigram_chartrigram_10000.csv', result_file= '../results/results_multiclass_independent_englishonly_legibleonly_wordunibigram_chartrigram_10000.txt', english_only=True, legible_only=True) t2 = time.time() timeused = t2 - t1 logging.getLogger(LOGGER).info('Time used in experiment (hour:min:sec): %d:%d:%d' % \ (timeused/3600, timeused/60, timeused%60)) return exp
def main(args): dm = DataModel(args.train_file) dm.read_train_data() exp = Experimenter(dm) distances = [x.get_distance() for x in dm.data] print(max(distances)) print(min(distances)) print(stats.mean(distances)) t1 = time.time() t2 = time.time() timeused = t2 - t1 logging.getLogger(LOGGER).info('Time used in experiment (hour:min:sec): %d:%d:%d' % \ (timeused/3600, timeused/60, timeused%60)) return exp
if args.check_estimator: common_params['eval_metrics'] += \ '-CPrecIPSin0.0_10-CPrecIPSin0.0_100-CDCGIPSin0.0_100000-CARIPSin0.0' + \ '-CPrecIPSin0.001_10-CPrecIPSin0.001_100-CDCGIPSin0.001_100000-CARIPSin0.001' + \ '-CPrecIPSin0.003_10-CPrecIPSin0.003_100-CDCGIPSin0.003_100000-CARIPSin0.003' + \ '-CPrecIPSin0.01_10-CPrecIPSin0.01_100-CDCGIPSin0.01_100000-CARIPSin0.01' + \ '-CPrecIPSin0.03_10-CPrecIPSin0.03_100-CDCGIPSin0.03_100000-CARIPSin0.03' + \ '-CPrecIPSin0.1_10-CPrecIPSin0.1_100-CDCGIPSin0.1_100000-CARIPSin0.1' + \ '-CPrecIPSin0.3_10-CPrecIPSin0.3_100-CDCGIPSin0.3_100000-CARIPSin0.3' else: common_params['eval_metrics'] = 'Prec_10-Prec_100' + '-CPrec_10-CPrec_100-CDCG_100000-CAR' if args.check_estimator: common_params['eval_metrics'] += '-CPrecIPSin0.01_10-CPrecIPSin0.01_100-CDCGIPSin0.01_100000-CARIPSin0.01' # set up experimenter experimenter = Experimenter() list_params = experimenter.set_search_params(args.cond_search, args.type_search) list_params = experimenter.set_common_params(list_params, common_params) save_result_file = dir_data_prepared + "result/" + datetime.now().strftime( '%Y%m%d_%H%M%S') + "_" + args.type_model + "_" + args.name_experiment + '_tlt' + str(args.time_length_train) + ".csv" if phase == 'test_phase': save_result_file = save_result_file.replace('.csv', '_test.csv') print('save_result_file is {}'.format(save_result_file)) save_result_dir = os.path.dirname(save_result_file) if not os.path.exists(save_result_dir): os.mkdir(save_result_dir) print('Start experiment.') t_init = datetime.now()
"num_factor": 40 } files["movietweetings-gte.csv"] = { "learning_rate": 0.0001, "reg_rate": 0.0, "batch_size": 256, "num_factor": 40 } files["ml-100k-gte.csv"] = { "learning_rate": 0.0005, "reg_rate": 0.0, "batch_size": 256, "num_factor": 40 } experimenter = Experimenter() experimenter.config_gpu() experimenter.addSamplingApproach(Cosine) experimenter.addSamplingApproach(TF_IDF) experimenter.addSamplingApproach(ARM) experimenter.addSamplingApproach(Random) experimenter.addMaxRejection(TotalLimit) experimenter.addMaxRejection(UniqueLimit) experimenter.addMaxRejection(Q3Total) experimenter.addMaxRejection(Q3Unique) experimenter.setModel(NeuMF) experimenter.setParameterFiles(files) experimenter.execute()
from experiments import EXPERIMENTS from experimenter import Experimenter import utils parser = argparse.ArgumentParser() parser.add_argument('-n', '--trials', type=int, default=1, help="How many trials?") parser.add_argument('-tb', '--tensorboard', action='store_true', help="Should we write to TensorBoard") rng = np.random.default_rng() experiments = [0] # experiments = list(sorted(EXPERIMENTS.keys())) # experiments = list(range(31, 51)) # Experiment 0 is a test experiment # experiments.remove(0) if __name__ == '__main__': args = parser.parse_args() print(f"Running {args.trials} trial(s) of experiments {experiments}.") exp_runner = Experimenter(write_to_tensorboard=args.tensorboard) for exp in experiments: for _ in range(args.trials): exp_runner.run_experiment(exp, rng)