def main(): settings_path = get_settings_path_from_arg("decision_tree") settings = load_settings(settings_path) init_logger(settings.log.dir, "decision_tree_classifier", logging.DEBUG) dl = DataLoader() dl.load_train(settings.data.train_path) dl.load_test(settings.data.test_path) ms = ModelSelector() # Load test and training dl = DataLoader() dl.load_train(settings.data.train_path) dl.load_test(settings.data.test_path) dl.scale_data() # Image feature extraction k = np.size(dl.classes) *10 dl.load_from_images(settings.data.image_path, k, k*3, verbose=False) # Add Data Selector ds = DataSelector( dl.id_train, dl.x_train, dl.y_train, dl.id_test, dl.x_test ) ds.add_all() # Use lasso ds.auto_remove_lasso(0.17) # Dimensionality reduction dr = DataReducer(ds.train_x, ds.test_x) dr.pca_data_reduction() ds = DataSelector( dl.id_train, dr.x_train, dl.y_train, dl.id_test, dr.x_test ) ds.add_all() # Add data selection to model selector ms.add_selector("all_feature", ds) clf = DecisionTreeClassifier() ms.add_classifier("", clf) ms.get_best_model(k=10, plot=True) ms.generate_submission(settings.data.submission_dir, dl.classes)
def main(): settings_path = get_settings_path_from_arg("qda_classifier") settings = load_settings(settings_path) init_logger(settings.log.dir, "qda_classifier", logging.DEBUG) ms = ModelSelector() # Load test and training dl = DataLoader() dl.load_train(settings.data.train_path) dl.load_test(settings.data.test_path) dl.scale_data() # Image feature extraction k = np.size(dl.classes) * 10 dl.load_from_images(settings.data.image_path, k, k * 3, verbose=False) # Add Data Selector ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test, dl.x_test) ds.add_all() # Use lasso ds.auto_remove_lasso(0.17) # Dimensionality reduction dr = DataReducer(ds.train_x, ds.test_x) dr.pca_data_reduction() ds = DataSelector(dl.id_train, dr.x_train, dl.y_train, dl.id_test, dr.x_test) ds.add_all() # Add Classifier to model selector clf = QuadraticDiscriminantAnalysis(store_covariance=True) ms.add_classifier("store_covariance=True", clf) clf = QuadraticDiscriminantAnalysis(store_covariance=False) ms.add_classifier("store_covariance=False", clf) clf = QuadraticDiscriminantAnalysis(priors=None, reg_param=0.0, store_covariance=False, store_covariances=None, tol=0.0001) ms.add_classifier("final", clf) # Get best model ms.get_best_model(k=10, plot=True) ms.generate_submission(settings.data.submission_dir, dl.classes)
def main(): settings_path = get_settings_path_from_arg("naive_bayes_classifier") settings = load_settings(settings_path) init_logger(settings.log.dir, "naive_bayes_classifier", logging.DEBUG) ms = ModelSelector() # Load test and training dl = DataLoader() dl.load_train(settings.data.train_path) dl.load_test(settings.data.test_path) dl.scale_data() # Add Data Selector ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test, dl.x_test) ds.add_all() # Image feature extraction k = np.size(dl.classes) * 10 dl.load_from_images(settings.data.image_path, k, k * 3, verbose=False) # Add Data Selector ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test, dl.x_test) ds.add_all() # Use lasso ds.auto_remove_lasso(0.17) # Dimensionality reduction dr = DataReducer(ds.train_x, ds.test_x) dr.pca_data_reduction() ds = DataSelector(dl.id_train, dr.x_train, dl.y_train, dl.id_test, dr.x_test) ds.add_all() # Add data selection to model selector ms.add_selector("all_feature", ds) # Add classifier to model selector clf = BernoulliNB(alpha=3.5, binarize=0.03) ms.add_classifier("bernoulli", clf) # Get best model ms.get_best_model(k=10) ms.generate_submission(settings.data.submission_dir, dl.classes)
def main(): settings_path = get_settings_path_from_arg("random_forest_classifier") settings = load_settings(settings_path) init_logger(settings.log.dir, "random_forest_classifier", logging.DEBUG) ms = ModelSelector() # Load test and training dl = DataLoader() dl.load_train(settings.data.train_path) dl.load_test(settings.data.test_path) dl.scale_data() # Image feature extraction k = np.size(dl.classes) * 10 dl.load_from_images(settings.data.image_path, k, k * 3, verbose=False) # Add Data Selector ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test, dl.x_test) ds.add_all() # Use lasso ds.auto_remove_lasso(0.17) # Dimensionality reduction dr = DataReducer(ds.train_x, ds.test_x) dr.pca_data_reduction() ds = DataSelector(dl.id_train, dr.x_train, dl.y_train, dl.id_test, dr.x_test) ds.add_all() # Add data selection to model selector ms.add_selector("all_feature", ds) # Add Classifier to model selector clf = RandomForestClassifier(n_estimators=13, random_state=0, min_samples_leaf=3, bootstrap=False) ms.add_classifier( "n_estimators=13, random_state=0, min_samples_leaf=3, bootsrap=False", clf) # Get best model ms.get_best_model(k=10) ms.generate_submission(settings.data.submission_dir, dl.classes)
def main(): settings_path = get_settings_path_from_arg("k_neighbors_classifier") settings = load_settings(settings_path) init_logger(settings.log.dir, "k_neighbors_classifier", logging.DEBUG) ms = ModelSelector() # Load test and training dl = DataLoader() dl.load_train(settings.data.train_path) dl.load_test(settings.data.test_path) dl.scale_data() # Image feature extraction k = np.size(dl.classes) * 10 dl.load_from_images(settings.data.image_path, k, k * 3, verbose=False) # Add Data Selector ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test, dl.x_test) ds.add_all() # Use lasso ds.auto_remove_lasso(0.17) # Dimensionality reduction dr = DataReducer(ds.train_x, ds.test_x) dr.pca_data_reduction() ds = DataSelector(dl.id_train, dr.x_train, dl.y_train, dl.id_test, dr.x_test) ds.add_all() # Add data selection to model selector ms.add_selector("all_feature", ds) for i in range(1, 40): clf_k = KNeighborsClassifier(i, weights="distance", p=1) ms.add_classifier("k_{}_distance_p1".format(i), clf_k) # Get best model ms.get_best_model(k=10, plot=True) ms.generate_submission(settings.data.submission_dir, dl.classes)
logging.debug("[ImageFeatureExtractor] Histogram:{}".format(histo_list)) return histo_list def extract_descriptor_to_csv(img_path): """Extract image descriptor into csv""" def extract_feature_to_csv(img_path, ids, k, batch_size=None, verbose=False): """Extract feature from image and save to csv""" if __name__ == "__main__": settings_path = get_settings_path_from_arg("image_feature_extractor") settings = load_settings(settings_path) init_logger(settings.log.dir, "image_feature_extractor", logging.DEBUG) train_data = pd.read_csv(settings.data.train_path) test_data = pd.read_csv(settings.data.test_path) species = train_data["species"] train_data = train_data.drop("species", axis=1) k = np.size(species) * 10 batch_size = np.size(os.listdir(settings.data.image_path)) * 3 print( get_feature(settings.data.image_path, test_data['id'], k, batch_size, verbose=False))
def main(): settings_path = get_settings_path_from_arg("main_classifier") settings = load_settings(settings_path) init_logger(settings.log.dir, "main_classifier", logging.DEBUG) ms = ModelSelector() # Load test and training data dl = DataLoader() dl.load_train(settings.data.train_path) dl.load_test(settings.data.test_path) dl.scale_data() # Image feature extraction k = np.size(dl.classes) * 10 dl.load_from_images(settings.data.image_path, k, k * 3, verbose=False) # Add Data Selector ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test, dl.x_test) ds.add_all() # Use lasso ds.auto_remove_lasso(0.17) # Dimensionality reduction dr = DataReducer(ds.train_x, ds.test_x) dr.pca_data_reduction() ds = DataSelector(dl.id_train, dr.x_train, dl.y_train, dl.id_test, dr.x_test) ds.add_all() # Add data selection to model selector ms.add_selector("all_feature", ds) # Instantiate all the classifiers to be added clf_knn = KNeighborsClassifier(6, weights="distance", p=1) clf_adaboost = AdaBoostClassifier(learning_rate=0.01) clf_dectree = DecisionTreeClassifier(min_impurity_decrease=0.02) clf_gaussian = GaussianNB() clf_lda = LinearDiscriminantAnalysis() clf_gradientboost = GradientBoostingClassifier(n_estimators=100, random_state=0, min_samples_leaf=3, verbose=True) clf_bernoullinb = BernoulliNB(alpha=3.5, binarize=0.03) clf_nnkeras = NNKeras(ds.selected_x_test.shape[1]) clf_svc = SVC(probability=True, C=1000, gamma=1) clf_nusvc = NuSVC(nu=0.1, gamma=10, probability=True) clf_qda = QuadraticDiscriminantAnalysis(store_covariance=True) clf_randomforest = RandomForestClassifier(n_estimators=13, random_state=0, min_samples_leaf=3, bootstrap=False) # Add all the classifiers to the model selector ms.add_classifier("KNN", clf_knn) ms.add_classifier("AdaBoost", clf_adaboost) ms.add_classifier("Decision Tree", clf_dectree) ms.add_classifier("Gaussian NB", clf_gaussian) ms.add_classifier("LDA", clf_lda) ms.add_classifier("Gradient Boosting", clf_gradientboost) ms.add_classifier("Bernoulli NB", clf_bernoullinb) ms.add_classifier("NN Keras", clf_nnkeras) ms.add_classifier("SVC", clf_svc) ms.add_classifier("NuSVC", clf_nusvc) ms.add_classifier("QDA", clf_qda) ms.add_classifier("Random Forest", clf_randomforest) # Get best model ms.get_best_model(k=10, plot=True)
self.pca_x_test = pd.DataFrame(self.pca_x_test, columns=col_names) def get_pca_x_train(self): """ get x_train after pca """ return self.pca_x_train def get_pca_x_test(self): """ get x_test after pca """ return self.pca_x_test if __name__ == "__main__": settings_path = get_settings_path_from_arg("data_reducer") settings = load_settings(settings_path) init_logger(settings.log.dir, "data_reducer", logging.DEBUG) train_data = pd.read_csv(settings.data.train_path) test_data = pd.read_csv(settings.data.test_path) train_data = train_data.drop("species", axis=1) dr = DataReducer(train_data, test_data) dr.pca_data_reduction() pca_train_data = dr.get_pca_x_train() pca_test_data = dr.get_pca_x_test() print(pca_train_data.shape) print(pca_test_data)