def hist(iterations, conf): global train_errors, valid_errors, best_train_i, config config = conf best_valid = list() best_train = list() best_epoch_slp = np.zeros(conf['epochs']) best_epoch_mlp = np.zeros(conf['epochs']) for i in range(conf['epochs']): training_data, training_targets, valid_data, valid_target, ids = getData(size=conf['datasets'], ratio=conf['ratio'], features=conf['features'], balanced=conf['balanced'], type=conf['type'], return_ids=False) train_errors = np.zeros(conf['epochs']) valid_errors = np.zeros(conf['epochs']) slp = getNet(units=[], n_iter=conf['epochs'], callbacks=default_callbacks) pipeline_slp = learning_utils.getPipeline(training_data, slp, 'neural_network') model_slp = pipeline_slp.fit(training_data, training_targets) best_epoch_slp[best_train_i - 1] += 1 train_errors = np.zeros(conf['epochs']) valid_errors = np.zeros(conf['epochs']) mlp = getNet(units=[61], n_iter=conf['epochs'], callbacks=default_callbacks) pipeline_mlp = learning_utils.getPipeline(training_data, mlp, 'neural_network') model_mlp = pipeline_mlp.fit(training_data, training_targets) best_epoch_mlp[best_train_i - 1] += 1 learning_utils.plot_hist(best_epoch_slp, best_epoch_mlp)
def train(conf): training_data, training_targets, ids = learning_utils.getData( conf["datasets"], ratio=conf["ratio"], type=conf["type"], split=True, balanced=conf["balanced"], shuffle=True, return_ids=True, ) clf = svm.SVC(C=1, kernel="rbf") clf1 = svm.SVC(C=100, kernel="rbf") # loss=conf['loss_type']) # clf1 = svm.LinearSVC(C=.9) clf.ids = ids pipeline = learning_utils.getPipeline(training_data, clf, "svm") pipeline1 = learning_utils.getPipeline(training_data, clf1, "svm") scores = cross_validation.cross_val_score(pipeline, training_data, training_targets, cv=5, verbose=True, n_jobs=-1) scores1 = cross_validation.cross_val_score( pipeline1, training_data, training_targets, cv=5, verbose=True, n_jobs=-1 ) print scores print scores1 return pipeline.fit(training_data, training_targets), training_data.columns.values
def scores(conf): parameters = { 'nn__n_iter': range(10, 25), 'nn__layers': [ # [Layer(type="Softmax", name="output")], [Layer(type="Sigmoid", units=10, name="h0"), Layer(type="Softmax", name="output")], [Layer(type="Sigmoid", units=20, name="h0"), Layer(type="Softmax", name="output")], [Layer(type="Sigmoid", units=35, name="h0"), Layer(type="Softmax", name="output")], [Layer(type="Sigmoid", units=50, name="h0"), Layer(type="Softmax", name="output")], [Layer(type="Sigmoid", units=75, name="h0"), Layer(type="Softmax", name="output")], # [Layer(type="Sigmoid", units=50, name="h0"), Layer(type="Sigmoid", units=50, name="h1"), # Layer(type="Softmax", name="output")] ], 'nn__learning_rate': [.001, .01, .05], # 'nn__learning_momentum': [.8, .9], 'nn__batch_size': [1, 10, 25, 50], 'nn__dropout_rate': [0, .1, .25, .5] } training_data, training_targets = learning_utils.getData(conf['datasets'], type=conf['type'], split=True, balanced=conf['balanced'], shuffle=True) clf = getNet(valid_size=0) pipeline = learning_utils.getPipeline(training_data, clf, 'nn') grid_search = GridSearchCV(pipeline, parameters, verbose=10) return grid_search, training_data, training_targets
def train(conf): training_data, training_targets, ids = learning_utils.getData(conf['datasets'], ratio=conf['ratio'], type=conf['type'], split=True, balanced=conf['balanced'], shuffle=True, return_ids=True) clf = svm.SVC(C=1, kernel='rbf') clf1 = svm.SVC(C=100, kernel='rbf') # loss=conf['loss_type']) # clf1 = svm.LinearSVC(C=.9) clf.ids = ids pipeline = learning_utils.getPipeline(training_data, clf, 'svm') pipeline1 = learning_utils.getPipeline(training_data, clf1, 'svm') scores = cross_validation.cross_val_score(pipeline, training_data, training_targets, cv=5, verbose=True, n_jobs=-1) scores1 = cross_validation.cross_val_score(pipeline1, training_data, training_targets, cv=5, verbose=True, n_jobs=-1) print scores print scores1 return pipeline.fit(training_data, training_targets), training_data.columns.values
def train(conf): training_data, training_targets = learning_utils.getData(conf['datasets'], type=conf['type'], split=True, balanced=conf['balanced'], shuffle=True) clf = tree.DecisionTreeClassifier(criterion=conf['criterion'], splitter='best', max_depth=12) # clf = RandomForestClassifier(criterion=conf['criterion'], max_depth=12) pipeline = learning_utils.getPipeline(training_data, clf, 'decision_tree') scores = cross_validation.cross_val_score(pipeline, training_data, training_targets, cv=5) print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) return pipeline.fit(training_data, training_targets), training_data.columns.values
def scores(conf): parameters = { 'svm__C': np.logspace(-2, 5, 8), 'svm__gamma': np.logspace(-9, 2, 12), 'svm__kernel': ['linear', 'rbf'] } training_data, training_targets = learning_utils.getData(conf['datasets'], type=conf['type'], split=True, balanced=conf['balanced'], shuffle=True) clf = svm.SVC() pipeline = learning_utils.getPipeline(training_data, clf, 'svm') grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1) learning_utils.gs(grid_search, training_data, training_targets)
def scores(conf): parameters = { "svm__C": np.logspace(-2, 5, 8), "svm__gamma": np.logspace(-9, 2, 12), "svm__kernel": ["linear", "rbf"], } training_data, training_targets = learning_utils.getData( conf["datasets"], type=conf["type"], split=True, balanced=conf["balanced"], shuffle=True ) clf = svm.SVC() pipeline = learning_utils.getPipeline(training_data, clf, "svm") grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1) learning_utils.gs(grid_search, training_data, training_targets)
def train_custom(conf, plot_path, debug, verbose, gs_params=None, callbacks=default_callbacks): global config, plot, train_errors, valid_errors plot = plot_path train_errors = np.zeros(conf['epochs']) valid_errors = np.zeros(conf['epochs']) all_ = list() for x in ['all', 'md', 'mir', 'feat_sel', 'random']: all = list() if x in ('all', 'md', 'mir'): training_data, training_targets, valid_data, valid_targets = getData(conf['datasets'], 0, None, None, type=x) elif x == 'random': from utils import features import random conf['features'] = random.sample(np.hstack(features.values()), random.randint(1, 115)) training_data, training_targets, valid_data, valid_targets = getData(conf['datasets'], 0, conf['features'], balanced=conf['balanced'], type=x) else: training_data, training_targets, valid_data, valid_targets = getData(conf['datasets'], 0, conf['features'], balanced=conf['balanced'], type=x) # if there is not enough data available conf['datasets'] = training_data.shape[0] if conf['units'] is None: conf['units'] = [int(math.ceil((training_data.shape[1] + 7) / 2))] conf['n_input'] = training_data.shape[1] config = conf units = [int(math.ceil((training_data.shape[1] + 7) / 2))] for i in range(1, 101): net = getNet(units, conf['learning_rate'], conf['epochs'], conf['learning_rule'], conf['batch_size'], conf['weight_decay'], conf['dropout_rate'], conf['loss_type'], n_stable=conf['n_stable'], debug=debug, verbose=verbose, callbacks=callbacks, # valid_set=(valid_data, valid_targets) valid_size=conf['ratio'] ) pipeline = learning_utils.getPipeline(training_data, net) pipeline.fit(training_data, training_targets) all.append(valid_errors) all_.append(np.array(all).mean(axis=0)) learning_utils.plot_lines(data=all_, labels=["all", "md", "mir", "feat_sel", "random"], xlabel="number of epochs", ylabel=config['loss_type'], title="mean training and validation error", suptitle=None, path="learning/nn/plots/comb/test.png")
def scores(conf): if conf['tree'] == 'tree': parameters = { 'tree__criterion': ['gini', 'entropy'], 'tree__splitter': ['best', 'random'], 'tree__max_features': [.1, .2, .5, 'sqrt', 'log2', None], 'tree__max_depth': range(1, 20), 'tree__presort': [True, False] } clf = tree.DecisionTreeClassifier() elif conf['tree'] == 'random': parameters = { 'tree__criterion': ['gini', 'entropy'], 'tree__n_estimators': range(5, 50), 'tree__max_features': [.1, .2, .5, 'sqrt', 'log2', None], 'tree__max_depth': range(1, 20), 'tree__bootstrap': [True, False], } clf = ensemble.RandomForestClassifier() elif conf['tree'] == 'extra': parameters = { 'tree__criterion': ['gini', 'entropy'], 'tree__n_estimators': range(5, 50), 'tree__max_features': [.1, .2, .5, 'sqrt', 'log2', None], 'tree__max_depth': range(1, 20), 'tree__bootstrap': [True, False], } clf = ensemble.ExtraTreesClassifier() training_data, training_targets = learning_utils.getData(conf['datasets'], type=conf['type'], split=True, balanced=conf['balanced'], shuffle=True) pipeline = learning_utils.getPipeline(training_data, clf, 'tree') grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1) learning_utils.gs(grid_search, training_data, training_targets)
def train(conf, plot_path, debug, verbose, gs_params=None, callbacks=default_callbacks): global config, plot, train_errors, valid_errors plot = plot_path if 'neural_network__n_iter' in gs_params: train_errors = np.zeros(20) valid_errors = np.zeros(20) else: train_errors = np.zeros(conf['epochs']) valid_errors = np.zeros(conf['epochs']) training_data, training_targets, valid_data, valid_targets, ids = getData(conf['datasets'], conf['ratio'], conf['features'], balanced=conf['balanced'], type=conf['type'], return_ids=True) # if there is not enough data available conf['datasets'] = training_data.shape[0] if conf['units'] is None: conf['units'] = [int(math.ceil((training_data.shape[1] + 7) / 2))] conf['n_input'] = training_data.shape[1] config = conf if conf['unit_range'] is not None: del conf['units'] for units in range(conf['unit_range'][0], conf['unit_range'][1] + 1): net = getNet([units], conf['learning_rate'], conf['epochs'], conf['learning_rule'], conf['batch_size'], conf['weight_decay'], conf['dropout_rate'], conf['loss_type'], n_stable=conf['n_stable'], debug=debug, verbose=verbose, callbacks=callbacks, valid_size=conf['ratio'] ) pipeline = learning_utils.getPipeline(training_data, net, 'neural_network') pipeline.fit(training_data, training_targets) learning_utils.plot_lines(data=[unit_iter_train_error, unit_iter_valid_error], labels=["Training error", "Validation error"], xlabel="number of hidden units", ylabel=config['loss_type'], title="training and validation error", suptitle=None, conf=config, additionals=[ [np.array(unit_iter_train_error).argmin() + conf['unit_range'][0], np.array(unit_iter_train_error).min()], [np.array(unit_iter_valid_error).argmin() + conf['unit_range'][0], np.array(unit_iter_valid_error).min()]], begin=conf['unit_range'][0], path="learning/nn/plots/unit_iter/{}_{}.png".format(conf['unit_range'], conf['epochs'])) learning_utils.plot_lines(data=[unit_iter_best_train_error, unit_iter_best_valid_error], labels=["Training error", "Validation error"], xlabel="number of hidden units", ylabel=config['loss_type'], title="training and validation error", suptitle=None, conf=config, additionals=[ [np.array(unit_iter_best_train_error).argmin() + conf['unit_range'][0], np.array(unit_iter_best_train_error).min()], [np.array(unit_iter_best_valid_error).argmin() + conf['unit_range'][0], np.array(unit_iter_best_valid_error).min()]], begin=conf['unit_range'][0], path="learning/nn/plots/unit_iter/{}_{}_{}.png".format(conf['unit_range'], conf['epochs'], "best")) else: net = getNet(conf['units'], conf['learning_rate'], conf['epochs'], conf['learning_rule'], conf['batch_size'], conf['weight_decay'], conf['dropout_rate'], conf['loss_type'], n_stable=conf['n_stable'], debug=debug, verbose=verbose, callbacks=callbacks, # valid_set=(np.array(valid_data), valid_targets), valid_size=conf['ratio'] ) pipeline = learning_utils.getPipeline(training_data, net, 'neural_network') model = pipeline.fit(training_data, training_targets) model.ids = ids return model