def osvm(train_X, train_y, test_X, test_y): # train_X,valid_X,train_y,valid_y=train_test_split(train_X,train_y,random_state=0,shuffle=True) regressor = DecisionTreeRegressor() regressor.fit(train_X, train_y) pred_y = regressor.predict(test_X) evalutor = EvaluatorEX() return evalutor.eval(pred_y, test_y, 1)
def nnregression(train_X, train_y, valid_X, valid_y, test_X, test_y): hiddenlayers = nn.Sequential( nn.Linear(train_X.shape[1], 128), # nn.Dropout(0.5), nn.ReLU(), nn.Linear(128, 128), # nn.Dropout(0.5), nn.ReLU(), nn.Linear(128, 128), # nn.Dropout(0.5), nn.ReLU(), ) regressor = BasicRegressor(hiddenlayers=hiddenlayers, hidden_size=128) regressor.fit(train_X, train_y, valid_X, valid_y, lr=1e-3, epoch=256) pred_y = regressor.predict(test_X) evalutor = EvaluatorEX() return evalutor.eval(pred_y, test_y, 1)
def search_nnr(config): cv_data = load_data(config['data']) unit = config['unit'] dropout = config['dropout'] evalutor = EvaluatorEX() val_losses = [] test_maes = [] rescore_maes = [] metas = [] for train_X, train_y, valid_X, valid_y, test_X, test_y in cv_data: hiddenlayers = nn.Sequential( nn.Linear(train_X.shape[1], unit), nn.Dropout(dropout), nn.ReLU(), nn.Linear(unit, unit), nn.Dropout(dropout), nn.ReLU(), nn.Linear(unit, unit), nn.Dropout(dropout), nn.ReLU(), ) regressor = BasicRegressor(hiddenlayers=hiddenlayers, hidden_size=unit) regressor.fit(train_X, train_y, valid_X, valid_y, lr=config['lr'], epoch=config['epoch'], cuda=False, batch_size=32) val_losses.append(regressor.val_loss) pred_y = regressor.predict(test_X, cuda=False) test_maes.append(np.mean(np.abs(pred_y - test_y))) rescored = regressor.predict(np.concatenate([train_X, valid_X], axis=0)) N = len(rescored) idxs = np.sum((pred_y.reshape(-1, 1) > rescored).astype('int'), axis=1).clip(max=N - 1) gts = np.concatenate([train_y, valid_y]).reshape(-1) gts = np.sort(gts) rty = gts[idxs] rescore_maes.append(np.mean(np.abs(rty - test_y))) metas.append({'gt': test_y, 'pred': pred_y, 'rty': rty}) if config['return']: return metas else: tune.report( mae=np.mean(test_maes), std=np.std(test_maes), val_loss=np.mean(val_losses), rty=np.mean(rescore_maes), )
def search_ORARS(config): cv_data = load_data(config['data']) unit = config['unit'] dropout = config['dropout'] evalutor = EvaluatorEX() val_losses = [] test_maes = [] for train_X, train_y, valid_X, valid_y, test_X, test_y in cv_data: if config['full'] == False: train_X = train_X[:256] train_y = train_y[:256] valid_X = valid_X[:256] valid_y = valid_y[:256] batch_size = 512 else: batch_size = 128 org_config = { 'model_protocol': OrdRegNet, 'model_args': { 'units': [train_X.shape[1] * 2, unit, unit, unit, 2], 'dropout': config['dropout'] }, 'config': { 'lr': config['lr'], 'cuda': True, 'generate_weight': True, 'bin_width': 10000, 'batch_size': batch_size, 'soft_scale': 1, 'epoch': config['epoch'], }, 'log_dir': f'exp/{uuid4()}', 'ignore_check_dirs': False, 'remove_old': True } org = OrdinalRegression(**org_config) anchor_set = list(zip(train_X, train_y)) valid_set = list(zip(valid_X, valid_y)) org.train(anchor_set, valid_set, show_time=True) val_losses.append(org.val_loss) print(org.val_loss) test_set = list(zip(test_X, test_y)) preds = [] for i in range(0, len(test_set), 64): sub_set = test_set[i:i + 64] pred = org.eval(anchor_set, sub_set)[1] preds.append(pred) pred = np.concatenate(preds) mae = np.mean(np.abs(pred - test_y)) test_maes.append(mae) if config['full']: print({ 'test_mae': np.mean(test_maes), 'val_loss': np.mean(val_losses), }) else: tune.report( mae=np.mean(test_maes), std=np.std(test_maes), val_loss=np.mean(val_losses), )
org.train(anchor_set, valid_set, show_time=False) return org.eval(anchor_set, list(zip(test_X, test_y)), load_best_model=True)[0] if __name__ == "__main__": matplotlib.use('AGG') parser = ArgumentParser() parser.add_argument('data') args = parser.parse_args() trial_data = load_data(args.data) pool = Pool(5) res = pool.starmap(nnregression, trial_data) evaluator = EvaluatorEX() for x in res: evaluator.add_record(x) print('NNR', evaluator.get_current_mean()) # res=pool.starmap(osvm,trial_data) # evaluator=EvaluatorEX() # for x in res: # evaluator.add_record(x) # print('OrdinalSVM',evaluator.get_current_mean()) res = [ORARS(*x) for x in trial_data] # res=pool.starmap(ORARS,trial_data) evaluator = EvaluatorEX() for x in res: evaluator.add_record(x)