Exemplo n.º 1
0
def main():
    settings_path = get_settings_path_from_arg("decision_tree")
    settings = load_settings(settings_path)

    init_logger(settings.log.dir, "decision_tree_classifier", logging.DEBUG)

    dl = DataLoader()
    dl.load_train(settings.data.train_path)
    dl.load_test(settings.data.test_path)

    ms = ModelSelector()

     # Load test and training
    dl = DataLoader()
    dl.load_train(settings.data.train_path)
    dl.load_test(settings.data.test_path)
    dl.scale_data()

    # Image feature extraction
    k = np.size(dl.classes) *10
    dl.load_from_images(settings.data.image_path, k, k*3, verbose=False)

    # Add Data Selector
    ds = DataSelector(
        dl.id_train, dl.x_train, dl.y_train,
        dl.id_test, dl.x_test
    )
    ds.add_all()

    # Use lasso
    ds.auto_remove_lasso(0.17)

    # Dimensionality reduction
    dr = DataReducer(ds.train_x, ds.test_x)
    dr.pca_data_reduction()
    ds = DataSelector(
        dl.id_train, dr.x_train, dl.y_train,
        dl.id_test, dr.x_test
    )
    ds.add_all()

    # Add data selection to model selector
    ms.add_selector("all_feature", ds)


    clf = DecisionTreeClassifier()
    ms.add_classifier("", clf)

    ms.get_best_model(k=10, plot=True)
    ms.generate_submission(settings.data.submission_dir, dl.classes)
def main():
    settings_path = get_settings_path_from_arg("qda_classifier")
    settings = load_settings(settings_path)

    init_logger(settings.log.dir, "qda_classifier", logging.DEBUG)
    ms = ModelSelector()

    # Load test and training
    dl = DataLoader()
    dl.load_train(settings.data.train_path)
    dl.load_test(settings.data.test_path)
    dl.scale_data()

    # Image feature extraction
    k = np.size(dl.classes) * 10
    dl.load_from_images(settings.data.image_path, k, k * 3, verbose=False)

    # Add Data Selector
    ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test,
                      dl.x_test)
    ds.add_all()

    # Use lasso
    ds.auto_remove_lasso(0.17)

    # Dimensionality reduction
    dr = DataReducer(ds.train_x, ds.test_x)
    dr.pca_data_reduction()
    ds = DataSelector(dl.id_train, dr.x_train, dl.y_train, dl.id_test,
                      dr.x_test)
    ds.add_all()

    # Add Classifier to model selector
    clf = QuadraticDiscriminantAnalysis(store_covariance=True)
    ms.add_classifier("store_covariance=True", clf)

    clf = QuadraticDiscriminantAnalysis(store_covariance=False)
    ms.add_classifier("store_covariance=False", clf)

    clf = QuadraticDiscriminantAnalysis(priors=None,
                                        reg_param=0.0,
                                        store_covariance=False,
                                        store_covariances=None,
                                        tol=0.0001)
    ms.add_classifier("final", clf)

    # Get best model
    ms.get_best_model(k=10, plot=True)
    ms.generate_submission(settings.data.submission_dir, dl.classes)
def main():
    settings_path = get_settings_path_from_arg("naive_bayes_classifier")
    settings = load_settings(settings_path)

    init_logger(settings.log.dir, "naive_bayes_classifier", logging.DEBUG)
    ms = ModelSelector()

    # Load test and training
    dl = DataLoader()
    dl.load_train(settings.data.train_path)
    dl.load_test(settings.data.test_path)
    dl.scale_data()

    # Add Data Selector
    ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test,
                      dl.x_test)
    ds.add_all()

    # Image feature extraction
    k = np.size(dl.classes) * 10
    dl.load_from_images(settings.data.image_path, k, k * 3, verbose=False)

    # Add Data Selector
    ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test,
                      dl.x_test)
    ds.add_all()

    # Use lasso
    ds.auto_remove_lasso(0.17)

    # Dimensionality reduction
    dr = DataReducer(ds.train_x, ds.test_x)
    dr.pca_data_reduction()
    ds = DataSelector(dl.id_train, dr.x_train, dl.y_train, dl.id_test,
                      dr.x_test)
    ds.add_all()

    # Add data selection to model selector
    ms.add_selector("all_feature", ds)

    # Add classifier to model selector
    clf = BernoulliNB(alpha=3.5, binarize=0.03)
    ms.add_classifier("bernoulli", clf)

    # Get best model
    ms.get_best_model(k=10)
    ms.generate_submission(settings.data.submission_dir, dl.classes)
def main():
    settings_path = get_settings_path_from_arg("random_forest_classifier")
    settings = load_settings(settings_path)

    init_logger(settings.log.dir, "random_forest_classifier", logging.DEBUG)
    ms = ModelSelector()

    # Load test and training
    dl = DataLoader()
    dl.load_train(settings.data.train_path)
    dl.load_test(settings.data.test_path)
    dl.scale_data()

    # Image feature extraction
    k = np.size(dl.classes) * 10
    dl.load_from_images(settings.data.image_path, k, k * 3, verbose=False)

    # Add Data Selector
    ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test,
                      dl.x_test)
    ds.add_all()

    # Use lasso
    ds.auto_remove_lasso(0.17)

    # Dimensionality reduction
    dr = DataReducer(ds.train_x, ds.test_x)
    dr.pca_data_reduction()
    ds = DataSelector(dl.id_train, dr.x_train, dl.y_train, dl.id_test,
                      dr.x_test)
    ds.add_all()

    # Add data selection to model selector
    ms.add_selector("all_feature", ds)

    # Add Classifier to model selector
    clf = RandomForestClassifier(n_estimators=13,
                                 random_state=0,
                                 min_samples_leaf=3,
                                 bootstrap=False)
    ms.add_classifier(
        "n_estimators=13, random_state=0, min_samples_leaf=3, bootsrap=False",
        clf)

    # Get best model
    ms.get_best_model(k=10)
    ms.generate_submission(settings.data.submission_dir, dl.classes)
def main():
    settings_path = get_settings_path_from_arg("k_neighbors_classifier")
    settings = load_settings(settings_path)

    init_logger(settings.log.dir, "k_neighbors_classifier", logging.DEBUG)
    ms = ModelSelector()

    # Load test and training
    dl = DataLoader()
    dl.load_train(settings.data.train_path)
    dl.load_test(settings.data.test_path)
    dl.scale_data()

    # Image feature extraction
    k = np.size(dl.classes) * 10
    dl.load_from_images(settings.data.image_path, k, k * 3, verbose=False)

    # Add Data Selector
    ds = DataSelector(dl.id_train, dl.x_train, dl.y_train, dl.id_test,
                      dl.x_test)
    ds.add_all()

    # Use lasso
    ds.auto_remove_lasso(0.17)

    # Dimensionality reduction
    dr = DataReducer(ds.train_x, ds.test_x)
    dr.pca_data_reduction()
    ds = DataSelector(dl.id_train, dr.x_train, dl.y_train, dl.id_test,
                      dr.x_test)
    ds.add_all()

    # Add data selection to model selector
    ms.add_selector("all_feature", ds)

    for i in range(1, 40):
        clf_k = KNeighborsClassifier(i, weights="distance", p=1)
        ms.add_classifier("k_{}_distance_p1".format(i), clf_k)

    # Get best model
    ms.get_best_model(k=10, plot=True)
    ms.generate_submission(settings.data.submission_dir, dl.classes)