Пример #1
0
def combined_symmetry_train_classifier(
        data=None,
        data_backup_filename='ShapeAndTextureScores',
        filename_to_save_model='ShapeAndTextureModel'):
    """Train a random forest classifier with data from the "shapeAndTextureScores.csv" (created by merging shape scores
       and texture scores) file following the expert diagnosis about symmetry (PH2 Dataset).

    # Arguments :
        data:   As returned by the texture_symmetry_scores function (optional).
        backup_filename:   Only if data is None, file to load data from.
        filename_to_save_model: String or None.

    # Outputs :
        clf: The fitted classifier.
        acc: The accuracy score of the classifier
    """
    if data is None:
        data = pd.read_csv(
            f"{package_path()}/data/patchesDataSet/{data_backup_filename}.csv",
            index_col=False)
        features = list(data)
        del features[0]
    else:
        features = list(data)

    del features[0]  # Delete labels too

    trainX = data[features][50:]
    trainy = data.Labels[50:]
    valX = data[features][:50]
    valy = data.Labels[:50]

    clf = RandomForestClassifier(n_estimators=10,
                                 max_leaf_nodes=3,
                                 random_state=2)
    clf.fit(trainX, trainy)

    preds = clf.predict(valX)
    acc = accuracy_score(valy, preds)

    if filename_to_save_model:
        save_model(clf, filename_to_save_model)

    return clf, acc
Пример #2
0
def classifierTrainer(maxLeafNodes,
                      data=None,
                      data_backup_file='patchesDataSet/Features',
                      filename_to_save_model='PatchClassifierModel'):
    """Train a random forest classifier with data from the patchesDataSet.

    # Arguments :
        maxLeafNodes: Int or None. Grow trees with max_leaf_nodes in best-first fashion. Best nodes are
        defined as relative reduction in impurity. If None then unlimited number of leaf nodes (scikit-learn
        RandomForestClassifier() documentation).

    # Outputs :
        clf: The fitted classifier.
        acc: The accuracy score of the classifier
    """
    if data is None:
        data = pd.read_csv(
            f"{package_path()}/data/patchesDataSet/{data_backup_file}.csv",
            index_col=False)
        features = list(data)
        del features[0]
    else:
        features = list(data)

    del features[-1]  # Remove `Result` colname.

    trainX = data[features][500:]
    trainy = data.Result[500:]
    valX = data[features][:500]
    valy = data.Result[:500]

    clf = RandomForestClassifier(max_leaf_nodes=maxLeafNodes, random_state=2)
    clf.fit(trainX, trainy)

    if filename_to_save_model:
        save_model(clf, filename_to_save_model)

    preds = clf.predict(valX)

    acc = accuracy_score(valy, preds)

    return clf, acc
Пример #3
0
def example():
    """Usage example of the main functionalities within this file. """
    dataExtractorForTraining(10, 199, 4)
    classifier, accScore = classifierTrainer(200)
    print(f'Accuracy score: {accScore}')
    save_model(classifier, "SimilarPatchClassifier")