def save(name, ext, target, sep=",", dec=".", rem=[], hidden=[100], lr=0.1, pat=5, d=1, l="mse", act="tanh", e=200, bsize=100, verb=0, ts=.2): ds = Dataset("{}.{}".format(name, ext), target, sep, dec, rem, testSize=ts) # Split the data Xtrain, Xvalid, yTrain, yValid = train_test_split(ds.Xtrain, ds.ytrain.values, test_size=0.1, shuffle=True) model = Regressor(hidden=hidden, lr=lr, pat=pat, delta=d, loss=l, act=act) model.fit(Xtrain, yTrain, Xvalid, yValid, ep=e, bs=bsize, v=verb) if verb > 0: model.plot() print("Metrics for", name, "data set:") model.metrics(ds.Xtest, ds.ytest.values) print("Saving model...") model.save(name) if verb > 0: return ds print()
def Mw_PolyReg(MagSize, MagError, Coeff): """ Conversion using polynomial coefficients """ M = Reg.PolyFun(Coeff, MagSize) E = None # Still to implement return (M, E)
'max_depth': 1, 'max_leaf_nodes': 2 }, { 'max_depth': 3, 'max_leaf_nodes': 2 }, { 'max_depth': 5, 'max_leaf_nodes': 5 }, { 'max_depth': 7, 'max_leaf_nodes': 5 }] # create Regressors (a Regressor is one algorithm and a list of parameters) models = [ Regressor(name="RandomForest", model=RandomForestRegressor, parameters=parameters_randomforest), Regressor(name="DecisionTree", model=DecisionTreeRegressor, parameters=parameters_decisiontree) ] # create a DataSetting ds = DataSetting(y=y, x=x, models=models, loss_function=rmse) # fit all models ds.evaluate_all() # show results ds.collect_losses()
import DataStructure import MonteCarloSimulation import XMLInterface import Classifier import Regressor import HTMLInterface # ============================================================================= # Main body # ============================================================================= diagram = DataStructure.Diagram() xmlParser = XMLInterface.XMLParser() xmlParser.ImportDiagramXMLFormat('ControlSystemProject.xml', diagram) calculator = MonteCarloSimulation.Calculator() checker = MonteCarloSimulation.Checker() classifier = Classifier.Classifier() regressor = Regressor.Regressor() htmlInterface = HTMLInterface.HTMLCreator() if checker.CheckConstraints(diagram) is True: noOfTrials= 10000 statistics = calculator.MeasureStatistics(noOfTrials, diagram) histogramName = "Histogram" gateName = "MidProject" accuracyOfClassificationMethods = classifier.AccuracyOfClassificationMethods(noOfTrials, diagram, gateName) accuracyOfRegressionMethods = regressor.AccuracyOfRegressionMethods(noOfTrials, diagram, gateName) templateFile = "DiagramTemplate.html" targetFile = "Risk Assessment.html" htmlInterface.ExportDiagramAtHTMLFormat(diagram, templateFile, targetFile, statistics, histogramName, accuracyOfClassificationMethods, accuracyOfRegressionMethods)
return sum(X, 1) GENEPOOL = rand(300, 6) GENEPOOL[GENEPOOL < 0.5] = 0 GENEPOOL[GENEPOOL > 0] = 1 G_List = zeros([GENEPOOL.shape[0], 1]) N = 1000 X_Train = rand(N) * 20 - 10 Y_Train = linfun(X_Train) + rand(X_Train.size) X_Test = rand(100) * 20 - 10 Y_Test = linfun(X_Test) for GEN_I in range(GENEPOOL.shape[0]): print(GENEPOOL[GEN_I, :]) if not sum(GENEPOOL[GEN_I, :]) == 0: Reg = Regressor(GENEPOOL[GEN_I, :]) Reg.learn(X_Train, Y_Train) R = sum((Reg.eval(X_Test) - Y_Test)**2) else: R = 1E16 G_List[GEN_I] = R minind = argmin(G_List) print(G_List[minind]) print(GENEPOOL[minind, :])
import numpy as np import sys sys.path.insert(0, '../') import Regressor data = open('yacht_hydrodynamics.data', 'r') row_list = [] for line in data: line = line.strip() line_list = line.split(' ') index = -1 for l in line_list: index += 1 if l == ' ': del line_list[index] if l == '': del line_list[index] line_list[index] = line_list[index].rstrip() if len(line_list) != 7: print line_list row_list.append(line_list) output_list = list(row[6] for row in row_list) Regressor.callAllRegressor(np.array(row_list).astype(float), np.array(output_list).astype(float), len(row_list))
import numpy as np import sys sys.path.insert(0, '../') import Regressor park_file = open('parkinsons_updrs.data', 'r'); b = [] x = list((line.split(',') for line in park_file)) motor_UPDRS_list = [] total_UPDRS_list = [] for attribute in x: total_UPDRS_list.append(attribute[5]) motor_UPDRS_list.append(attribute[4]) for val in x: del val[4] del val[5] # Train the model using the training sets print 'MOTOR' Regressor.callAllRegressor(np.array(x).astype(float), np.array(motor_UPDRS_list).astype(float), len(x)) print 'TOTAL' Regressor.callAllRegressor(np.array(x).astype(float), np.array(total_UPDRS_list).astype(float), len(x))
def test(benchmark, violated_const_ratio, test_seed, start_point_seed=2): res = {'test_seed': test_seed} # store all model results params = { 'epochs': 150, 'n_data': 4000, 'batch_size': 256, 'violated_const_ratio': violated_const_ratio, # this is used to create a trainig set with a specific 'benchmark': benchmark, 'split': [0.5, 0.25, 0.25], 'seed': test_seed } d_trainall = Dataset(params, 'train', 'cpu') d_test = Dataset(params, 'test', 'cpu') d_valall = Dataset(params, 'valid', 'cpu') res['d_test'] = d_test X_test, y_test = d_test._dataset val_size = 300 # fix validation set size for train_size in [200, 400, 600, 800, 1000]: res[train_size] = {} for split_seed in range(20): print('train size = {} , seed = {}'.format(train_size, split_seed)) np.random.seed(split_seed) X_train, y_train = copy.deepcopy(d_trainall._dataset) idx_train = np.random.choice(list(range(len(X_train))), train_size) X_train, y_train = X_train[idx_train, :], y_train[idx_train] d_train = copy.deepcopy(d_trainall) d_train._dataset = (X_train, y_train) y_med_pred = np.median(y_train) * np.ones(len(y_test)) X_val, y_val = copy.deepcopy(d_valall._dataset) idx_val = np.random.choice(list(range(len(X_val))), val_size) X_val, y_val = X_val[idx_val, :], y_val[idx_val] d_val = copy.deepcopy(d_valall) d_val._dataset = (X_val, y_val) res[train_size][split_seed] = {'d_val': d_val, 'd_train': d_train} model_1 = Regressor( params, d_train, d_test, d_val, start_point_seed) # start_point_seed is random seed of pytorch # make sure all models starting from the same initial points model_1.train() tmp = model_1.test() res[train_size][split_seed]['model_1_perf'] = copy.deepcopy(tmp) # regularization with single multiplier =1 model_2_1 = SBRregressor(params, d_train, d_test, d_val, start_point_seed) model_2_1.train(options={'mult_fixed': True}) tmp = model_2_1.test() res[train_size][split_seed]['model_2_1_perf'] = copy.deepcopy(tmp) ###################regularization with single multiplier updated gradually, starts with 0 model_2 = SBRregressor(params, d_train, d_test, d_val, start_point_seed) if split_seed == 0: model_2.opt_lr_rate() best_lr_model_2 = copy.deepcopy(model_2._LR_rate) else: model_2._LR_rate = copy.deepcopy(best_lr_model_2) model_2.train(options={'mult_fixed': False}) tmp = model_2.test() res[train_size][split_seed]['model_2_perf'] = copy.deepcopy(tmp) ########## regularization with a multiplier for each constraint, each multiplier has updated gradually, starts with 0 model_3 = SBRregressor2(params, d_train, d_test, d_val, start_point_seed) if split_seed == 0: model_3.opt_lr_rate() best_lr_model_3 = copy.deepcopy(model_3._LR_rate) else: model_3._LR_rate = copy.deepcopy(best_lr_model_3) model_3.train(options={'mult_fixed': False}) tmp = model_3.test() res[train_size][split_seed]['model_3_perf'] = copy.deepcopy(tmp) res[train_size][split_seed]['model_3'] = copy.deepcopy(model_3) res[train_size][split_seed]['model_2'] = copy.deepcopy(model_2) res[train_size][split_seed]['model_1'] = copy.deepcopy(model_1) res[train_size][split_seed]['model_2_1'] = copy.deepcopy(model_2_1) res[train_size][split_seed]['dump_model_perf'] = [ copy.deepcopy(mae(y_test, y_med_pred)), 0, 0 ] filename = str(benchmark) + '_test_seed_{}'.format(test_seed)+\ "_vconst" + str(violated_const_ratio) + '_start_point_seed_'+ str( start_point_seed) +'.pkl' file_handle = open(path_to_repo + 'results/' + filename, 'wb') pickle.dump(res, file_handle)