def test_model(Classifier, prep_type, **clfargs): X, y, *_ = prep(prep_type) clf = Classifier(X=X, y=y, **clfargs) print("No PCA, 5-fold cross-val score...") print("Score = {score}% ({scores})".format( score=round(clf.cross_val_score().mean(), 4) * 100, scores=clf.cv_score)) print("No PCA, unstratified score...") print("Score = {score}%".format(score=round(clf.unstratified_score(), 4) * 100)) clf = Classifier(X=X, y=y, pca=True, n_components=4, **clfargs) print("PCA, 5-fold-cross-val score") print("Score = {score}% ({scores})".format( score=round(clf.cv_score.mean(), 4) * 100, scores=clf.cv_score))
# prepare for pytorch by moving numpy arrays to torch arrays batch["input"] = torch.from_numpy(input) for key in ["target", "index"]: if isinstance(batch[key], np.ndarray): batch[key] = torch.from_numpy(batch[key]) return batch, drop data = data.recsys.ml100k(0.) dataloader = torch.utils.data.DataLoader(data, num_workers=1, collate_fn=collate_fn, batch_size=80000, shuffle=True) index = prep(data.index, dtype="int") if use_cuda: index = index.cuda() units = 100 # build model enc = SparseSequential( index, SparseExchangeable(5, units, index), nn.LeakyReLU(), torch.nn.Dropout(p=0.5), SparseExchangeable(units, units, index), nn.LeakyReLU(), torch.nn.Dropout(p=0.5), SparseExchangeable(units, units, index), nn.LeakyReLU(),
X, y, *_ = prep(prep_type) clf = Classifier(X=X, y=y, **clfargs) print("No PCA, 5-fold cross-val score...") print("Score = {score}% ({scores})".format( score=round(clf.cross_val_score().mean(), 4) * 100, scores=clf.cv_score)) print("No PCA, unstratified score...") print("Score = {score}%".format(score=round(clf.unstratified_score(), 4) * 100)) clf = Classifier(X=X, y=y, pca=True, n_components=4, **clfargs) print("PCA, 5-fold-cross-val score") print("Score = {score}% ({scores})".format( score=round(clf.cv_score.mean(), 4) * 100, scores=clf.cv_score)) print("BAYES") test_model(Classifier=BayesClassifier, prep_type="basic") print("SVM") test_model(Classifier=SVM, prep_type="scale") print("RANDOM FOREST") test_model(Classifier=RForest, prep_type="basic") print("KNEIGHBOURS") test_model(Classifier=KNeigh, prep_type="scale") print("MLP") X, y, *_ = prep("basic") clf = FFNN(X=X, y=y, pca=True, n_components=4) print("PCA, 5-fold-cross-val score") print("Score = {score}% ({scores})".format( score=round(clf.cv_score.mean(), 4) * 100, scores=clf.cv_score)) print("RIDGE") test_model(Classifier=RidgeCV, prep_type="scale")