def train(snapshotroot, ensembleType, numTrees, depth, seed=0): xtrain, ytrain, xtest, ytest = datasets.load_madelon() # Labels ytrain = ytrain.astype(np.int32) ytest = ytest.astype(np.int32) xtrain, ytrain, xval, yval = balanced_shuffle(xtrain, ytrain, 1500) metric = "logloss" earlyStop = max(1, int(0.1 * numTrees)) clf = ensembleType(max_depth=depth, use_label_encoder=False, tree_method="exact", n_estimators=numTrees, random_state=seed) clf.fit(xtrain, ytrain, eval_set=[(xtrain, ytrain), (xval, yval)], eval_metric=metric, verbose=False, early_stopping_rounds=earlyStop) print( f"best iteration = {clf.best_iteration}, best_score = {clf.best_score}, best_ntree_limit = {clf.best_ntree_limit}" ) results = clf.evals_result() ypred = clf.predict(xtest) acc = (ypred == ytest).mean() return acc, np.array(results["validation_1"][metric])
def train(snapshotroot, ensembleType, numTrees, depth, seed=0): xtrain, ytrain, xtest, ytest = datasets.load_madelon() # Labels ytrain = ytrain.astype(np.int32) ytest = ytest.astype(np.int32) clf = ensembleType(random_state=seed, n_estimators=numTrees, max_features="sqrt", max_depth=depth) clf.fit(xtrain, ytrain) acc = clf.score(xtest, ytest) return acc
def train(snapshotroot, device, forestType, numTrees, depth): xtrain, ytrain, xtest, ytest = datasets.load_madelon() # Labels ytrain = ytrain.astype(np.int32) ytest = ytest.astype(np.int32) xtrain, ytrain, xval, yval = balanced_shuffle(xtrain, ytrain, 1500) net = Net(forestType, numTrees, depth).to(device) criterion = nn.CrossEntropyLoss().to(device) # Transfer this data to the device xtrain = torch.from_numpy(xtrain).type(torch.float32).to(device) ytrain = torch.from_numpy(ytrain).type(torch.long).to(device) xval = torch.from_numpy(xval).type(torch.float32).to(device) yval = torch.from_numpy(yval).type(torch.long).to(device) xtest = torch.from_numpy(xtest).type(torch.float32).to(device) ytest = torch.from_numpy(ytest).type(torch.long).to(device) #optimizer = optim.Adam(net.parameters(), lr = 0.001) optimizer = optim.Adam(net.parameters(), lr=1e-4) numEpochs = 200 batchSize = 25 indices = [i for i in range(xtrain.shape[0])] bestEpoch = numEpochs - 1 bestLoss = 1000.0 mu = 10.0 targetKeep = 10 valLosses = np.zeros([numEpochs]) ratesUpdated = False for epoch in range(numEpochs): random.shuffle(indices) xtrain = xtrain[indices, :] ytrain = ytrain[indices] numKeep = targetKeep + (500 - targetKeep) * max( 0.0, (numEpochs - 2.0 * epoch) / (2.0 * epoch * mu + numEpochs)) numKeep = int(numKeep) print(f"Info: Epoch = {epoch}, numKeep = {numKeep}", flush=True) #net.features.select(numKeep) net.features.group_select(numKeep) #print(net.features.selection.sum(dim=1)) if numKeep <= targetKeep and not ratesUpdated: ratesUpdated = True print("Info: Updating learning rates...", flush=True) for g in optimizer.param_groups: g['lr'] = 1e-3 runningLoss = 0.0 count = 0 for xbatch, ybatch in batches(xtrain, ytrain, batchSize): optimizer.zero_grad() outputs = net(xbatch) loss = criterion(outputs, ybatch) loss.backward() optimizer.step() runningLoss += loss count += 1 meanLoss = runningLoss / count snapshotFile = os.path.join(snapshotroot, f"epoch_{epoch}") torch.save(net.state_dict(), snapshotFile) runningLoss = 0.0 count = 0 with torch.no_grad(): net.train(False) #for xbatch, ybatch in batches(xval, yval, batchSize): for xbatch, ybatch in zip([xval], [yval]): outputs = net(xbatch) loss = criterion(outputs, ybatch) runningLoss += loss count += 1 net.train(True) valLoss = runningLoss / count if valLoss < bestLoss: bestLoss = valLoss bestEpoch = epoch valLosses[epoch] = valLoss print( f"Info: epoch = {epoch}, loss = {meanLoss}, validation loss = {valLoss}", flush=True) snapshotFile = os.path.join(snapshotroot, f"epoch_{bestEpoch}") net = Net(forestType, numTrees, depth) net.load_state_dict(torch.load(snapshotFile, map_location="cpu")) net = net.to(device) totalCorrect = 0 count = 0 with torch.no_grad(): net.train(False) #for xbatch, ybatch in batches(xtest, ytest, batchSize): for xbatch, ybatch in zip([xtest], [ytest]): outputs = net(xbatch) outputs = torch.argmax(outputs, dim=1) tmpCorrect = torch.sum(outputs == ybatch) totalCorrect += tmpCorrect count += xbatch.shape[0] accuracy = float(totalCorrect) / float(count) print( f"Info: Best epoch = {bestEpoch}, test accuracy = {accuracy}, misclassification rate = {1.0 - accuracy}", flush=True) return accuracy, valLosses