def main(): settings_path = get_settings_path_from_arg("decision_tree") settings = load_settings(settings_path) init_logger(settings.log.dir, "decision_tree_classifier", logging.DEBUG) dl = DataLoader() dl.load_train(settings.data.train_path) dl.load_test(settings.data.test_path) ms = ModelSelector() # Load test and training dl = DataLoader() dl.load_train(settings.data.train_path) dl.load_test(settings.data.test_path) dl.scale_data() # Image feature extraction k = np.size(dl.classes) *10 dl.load_from_images(settings.data.image_path, k, k*3, verbose=False) # Add Data Selector ds = DataSelector( dl.id_train, dl.x_train, dl.y_train, dl.id_test, dl.x_test ) ds.add_all() # Use lasso ds.auto_remove_lasso(0.17) # Dimensionality reduction dr = DataReducer(ds.train_x, ds.test_x) dr.pca_data_reduction() ds = DataSelector( dl.id_train, dr.x_train, dl.y_train, dl.id_test, dr.x_test ) ds.add_all() # Add data selection to model selector ms.add_selector("all_feature", ds) clf = DecisionTreeClassifier() ms.add_classifier("", clf) ms.get_best_model(k=10, plot=True) ms.generate_submission(settings.data.submission_dir, dl.classes)
def main(): settings_path = get_settings_path_from_arg("qda_classifier") settings = load_settings(settings_path) init_logger(settings.log.dir, "qda_classifier", logging.DEBUG) ms = ModelSelector() # Load test and training dl = DataLoader() dl.load_train(settings.data.train_path) dl.load_test(settings.data.test_path) dl.scale_data() # Image feature extraction k = np.size(dl.classes) * 10 dl.load_from_images(settings.data.image_path, k, k * 3, verbose=False) # Add Data Selector ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test, dl.x_test) ds.add_all() # Use lasso ds.auto_remove_lasso(0.17) # Dimensionality reduction dr = DataReducer(ds.train_x, ds.test_x) dr.pca_data_reduction() ds = DataSelector(dl.id_train, dr.x_train, dl.y_train, dl.id_test, dr.x_test) ds.add_all() # Add Classifier to model selector clf = QuadraticDiscriminantAnalysis(store_covariance=True) ms.add_classifier("store_covariance=True", clf) clf = QuadraticDiscriminantAnalysis(store_covariance=False) ms.add_classifier("store_covariance=False", clf) clf = QuadraticDiscriminantAnalysis(priors=None, reg_param=0.0, store_covariance=False, store_covariances=None, tol=0.0001) ms.add_classifier("final", clf) # Get best model ms.get_best_model(k=10, plot=True) ms.generate_submission(settings.data.submission_dir, dl.classes)
def main(): settings_path = get_settings_path_from_arg("naive_bayes_classifier") settings = load_settings(settings_path) init_logger(settings.log.dir, "naive_bayes_classifier", logging.DEBUG) ms = ModelSelector() # Load test and training dl = DataLoader() dl.load_train(settings.data.train_path) dl.load_test(settings.data.test_path) dl.scale_data() # Add Data Selector ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test, dl.x_test) ds.add_all() # Image feature extraction k = np.size(dl.classes) * 10 dl.load_from_images(settings.data.image_path, k, k * 3, verbose=False) # Add Data Selector ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test, dl.x_test) ds.add_all() # Use lasso ds.auto_remove_lasso(0.17) # Dimensionality reduction dr = DataReducer(ds.train_x, ds.test_x) dr.pca_data_reduction() ds = DataSelector(dl.id_train, dr.x_train, dl.y_train, dl.id_test, dr.x_test) ds.add_all() # Add data selection to model selector ms.add_selector("all_feature", ds) # Add classifier to model selector clf = BernoulliNB(alpha=3.5, binarize=0.03) ms.add_classifier("bernoulli", clf) # Get best model ms.get_best_model(k=10) ms.generate_submission(settings.data.submission_dir, dl.classes)
def main(): settings_path = get_settings_path_from_arg("random_forest_classifier") settings = load_settings(settings_path) init_logger(settings.log.dir, "random_forest_classifier", logging.DEBUG) ms = ModelSelector() # Load test and training dl = DataLoader() dl.load_train(settings.data.train_path) dl.load_test(settings.data.test_path) dl.scale_data() # Image feature extraction k = np.size(dl.classes) * 10 dl.load_from_images(settings.data.image_path, k, k * 3, verbose=False) # Add Data Selector ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test, dl.x_test) ds.add_all() # Use lasso ds.auto_remove_lasso(0.17) # Dimensionality reduction dr = DataReducer(ds.train_x, ds.test_x) dr.pca_data_reduction() ds = DataSelector(dl.id_train, dr.x_train, dl.y_train, dl.id_test, dr.x_test) ds.add_all() # Add data selection to model selector ms.add_selector("all_feature", ds) # Add Classifier to model selector clf = RandomForestClassifier(n_estimators=13, random_state=0, min_samples_leaf=3, bootstrap=False) ms.add_classifier( "n_estimators=13, random_state=0, min_samples_leaf=3, bootsrap=False", clf) # Get best model ms.get_best_model(k=10) ms.generate_submission(settings.data.submission_dir, dl.classes)
def main(): settings_path = get_settings_path_from_arg("k_neighbors_classifier") settings = load_settings(settings_path) init_logger(settings.log.dir, "k_neighbors_classifier", logging.DEBUG) ms = ModelSelector() # Load test and training dl = DataLoader() dl.load_train(settings.data.train_path) dl.load_test(settings.data.test_path) dl.scale_data() # Image feature extraction k = np.size(dl.classes) * 10 dl.load_from_images(settings.data.image_path, k, k * 3, verbose=False) # Add Data Selector ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test, dl.x_test) ds.add_all() # Use lasso ds.auto_remove_lasso(0.17) # Dimensionality reduction dr = DataReducer(ds.train_x, ds.test_x) dr.pca_data_reduction() ds = DataSelector(dl.id_train, dr.x_train, dl.y_train, dl.id_test, dr.x_test) ds.add_all() # Add data selection to model selector ms.add_selector("all_feature", ds) for i in range(1, 40): clf_k = KNeighborsClassifier(i, weights="distance", p=1) ms.add_classifier("k_{}_distance_p1".format(i), clf_k) # Get best model ms.get_best_model(k=10, plot=True) ms.generate_submission(settings.data.submission_dir, dl.classes)