MSE_valid = np.sum((sample_validation.rings - prediccions)**2) / N_valid print("MSE validation MLP:", MSE_valid) NMSE_val = sum((sample_validation.rings - prediccions)**2) / ( (N_valid - 1) * np.var(sample_validation.rings)) print("NMSE validation MLP:", NMSE_val) R_squared = (1 - NMSE_val) * 100 print("Our model explain the {}% of the validation variance".format(R_squared)) #%% MLP MultiLayer model_nnet = MLPRegressor(hidden_layer_sizes=[128, 64, 32], alpha=0, activation="logistic", learning_rate='constant', solver='lbfgs') model_nnet.learning_rate_init = 1e-3 model_nnet.max_iter = 500 model_nnet.fit(sample.loc[:, "male":"shell_weight"], sample.rings) print("Final loss 1st training module: ", model_nnet.loss_) prediccions = model_nnet.predict(sample_validation.loc[:, "male":"shell_weight"]) MAE = np.sum(abs(sample_validation.rings - prediccions)) / N_valid print("MAE on validation data before refining:", MAE) NMSE_val = sum((sample_validation.rings - prediccions)**2) / ( (N_valid - 1) * np.var(sample_validation.rings)) print("NMSE validation MLP before refining:", NMSE_val) model_nnet.learning_rate_init = 1e-5 model_nnet.max_iter = 500 model_nnet.fit(sample.loc[:, "male":"shell_weight"], sample.rings) print(model_nnet.get_params(), file=open('coeficients/mlp_multilayer', 'w')) #print("Coeficients:", model_nnet.coefs_, "Biasis:", model_nnet.intercepts_) print("Final loss: ", model_nnet.loss_)
def train_ANN(descriptors_filename, target_values_filename, architecture,\ ANN_seed, split_seed, T): ########## preprocess ########## ### read files ### # read the training and target data fv = pd.read_csv(descriptors_filename) value = pd.read_csv(target_values_filename) ### prepare training set ### # prepare CIDs CIDs = np.array(fv['CID']) # prepare target, train, test arrays target = np.array(value['a']) # construct dictionary: CID to feature vector fv_dict = {} for cid,row in zip(CIDs, fv.values[:,1:]): fv_dict[cid] = row # construct dictionary: CID to target value target_dict = {} for cid, val in zip(np.array(value['CID']), np.array(value['a'])): target_dict[cid] = val # check CIDs: target_values_filename should contain all CIDs that appear in descriptors_filename for cid in CIDs: if cid not in target_dict: sys.stderr.write('error: {} misses the target value of CID {}\n'.format(target_values_filename, cid)) exit(1) # construct x and y so that the CIDs are ordered in ascending order CIDs.sort() x = np.array([fv_dict[cid] for cid in CIDs]) y = np.array([target_dict[cid] for cid in CIDs]) # obtain numbers of examples and features numdata = x.shape[0] numfeature = x.shape[1] ### prepare learning ### # initialize an ANN - MLP regressor reg = MLPRegressor(activation='relu', solver='adam', alpha=1e-5, hidden_layer_sizes=architecture, random_state=ANN_seed, early_stopping=False) # initalize array that stores the result R = {} # R[<key>][fold][t] for key in R_key: R[key] = [] for fold in range(CV): R[key].append(dict()) # separate the data randomly for cross-validation kf = KFold(n_splits=CV, shuffle=True, random_state=split_seed) fold = -1 ### start learning experiments ### for train, test in kf.split(x): fold += 1 x_train, x_test, y_train, y_test = x[train], x[test], y[train], y[test] reg.warm_start = False start = time.time() print("\n\n### (ANN_seed, split_seed)=({},{}), fold={}/{} ###".format(ANN_seed, split_seed, fold+1, CV)) print("# t\ttrain\ttest\ttime") # learn ANN, but stop the learning at itr=t in order to record stats for t in T: reg.max_iter = t reg.fit(x_train, y_train) reg.warm_start = True # obtain the prediction to compute MAE pred = reg.predict(x) pred_train = reg.predict(x_train) pred_test = reg.predict(x_test) # calculate the prediction score (R^2) R["R2train"][fold][t] = reg.score(x_train,y_train) R["R2test"][fold][t] = reg.score(x_test,y_test) R["R2all"][fold][t] = reg.score(x,y) # calculate MAE R["MAEtrain"][fold][t] = mean_absolute_error(y_train,pred_train) R["MAEtest"][fold][t] = mean_absolute_error(y_test,pred_test) R["MAEall"][fold][t] = mean_absolute_error(y,pred) # store time and ref R["time"][fold][t] = time.time() - start R["reg"][fold][t] = copy.deepcopy(reg) print("{}\t{:.4f}\t{:.4f}\t{:.4f}".format(t, R["R2train"][fold][t], R["R2test"][fold][t], R["time"][fold][t])) return R