from utils import name_to_classifier_object from datasets import load_datasets, load_dataset, create_dataset from argparse import ArgumentParser # Argument parser parser = ArgumentParser(description='Collect data') parser.add_argument('-a', '--algorithm', type=str, required=True, default='rnd_forest', help='The learning algorithm, one of [rnd_forest, log_reg, svm, naive_bayes]') group = parser.add_mutually_exclusive_group(required=True) group.add_argument('-s', '--synthetic', type=str, help='Create a synthetic dataset with the given parameters') group.add_argument('-l', '--load-arff', type=str, help='Load dataset from arff file with the given name') parser.add_argument('-d', '--percentage-data', type=float, required=True, help='The percentage of data used') parser.add_argument('parameter', metavar='parameter', nargs='*', help='Parameters to the algorithm in the form <param_name>:<int|float>:<number>') args = parser.parse_args() classifier = name_to_classifier_object(args.algorithm) if args.synthetic: dataset = create_dataset(eval(args.synthetic)) elif args.load_arff: dataset = load_dataset({'name': args.load_arff}) param_names = [] param_values = [] for param_str in args.parameter: name, type_, value_str = param_str.split(':') param_names.append(name) if type_ == 'int': param_values.append(int(value_str)) elif type_ == 'float':
algorithms = [ {'name': 'rnd_forest', 'parameters': {'n_estimators': 50}, 'time': [], 'score': []}, {'name': 'log_reg', 'parameters': {}, 'time': [], 'score': []} ] # Percentage of data values data_range = exp_incl_float_range(0.1, 10, 1, 1.5) def draw(ax, plt): ax.cla() ax.plot(data_range[:len(algorithms[0]['score'])], algorithms[0]['score'], 'r-') ax.plot(data_range[:len(algorithms[1]['score'])], algorithms[1]['score'], 'b-') ax.set_xlabel('% data') ax.set_ylabel('Score') plt.draw() plt.ion() fig, ax = plt.subplots(1,1) for percentage_data in data_range: for algorithm_data in algorithms: print '{}; {}'.format(algorithm_data['name'], str(percentage_data)) cl = name_to_classifier_object(algorithm_data['name']) time, score = generate_datum(dataset, cl, percentage_data, algorithm_data['parameters']) algorithm_data['time'].append(time) algorithm_data['score'].append(score) draw(ax, plt) plt.show() plt.savefig(FIG_DIR + 'fig_{}.pdf'.format(datetime.now().strftime('%Y-%m-%d_%H-%M-%S-%f')), format='pdf')