Пример #1
0
def selectBestModel(project_file, results_model_file):
    f = open(results_model_file + '.results.html', 'w')

    project = yaml.load(open(project_file, 'r'))
    className = project['className']
    results_dir = project['resultsDirectory']

    if os.path.exists(results_dir):
        classifierType = None  # all types

        cr = ClassificationResults()
        print('Loading all results...')
        cr.readResults(results_dir)

        accuracy, filename, params = cr.best(1, classifierType)[0]
        print("RESULT " + project_file + '\t' + str(accuracy) + '\t' +
              filename)

        f.write('<h1>%s (%s)</h1>\nAccuracy: %s\n' %
                (className, project_file, accuracy))

        cm = ConfusionMatrix()
        cm.load(filename)
        f.write(cm.toHtml())

        filename = filename.replace('.result', '.param')

        trainSVMHistory(project_file, filename, results_model_file, className)
        shutil.copyfile(filename, results_model_file + '.param')

    else:
        print("RESULT " + "No results found for ", project_file,
              ": cannot build a model")
        f.write('<h1>%s (%s) </h1>\nResults not found\n' %
                (collection, project_file))
Пример #2
0
def selectBestModel(project_file, results_model_file):
    f = open(results_model_file + '.results.html', 'w')

    project = yaml.load(open(project_file, 'r'))
    className = project['className']
    results_dir = project['resultsDirectory']

    if os.path.exists(results_dir):
        classifierType = None # all types

        cr = ClassificationResults()
        print 'Loading all results...'
        cr.readResults(results_dir)

        accuracy, filename, params = cr.best(1, classifierType)[0]
        print "RESULT " + project_file + '\t' + str(accuracy) + '\t' + filename

        f.write('<h1>%s (%s)</h1>\nAccuracy: %s\n' % (className, project_file, accuracy))

        cm = ConfusionMatrix()
        cm.load(filename)
        f.write(cm.toHtml())

        filename = filename.replace('.result', '.param')

        trainSVMHistory(project_file, filename, results_model_file, className)
        shutil.copyfile(filename, results_model_file + '.param')

    else:
        print "RESULT " + "No results found for ", project_file, ": cannot build a model"
        f.write('<h1>%s (%s) </h1>\nResults not found\n' % (collection, project_file))
Пример #3
0
def selectBestModel():

    parser = OptionParser(
        usage='%prog [options] project_file results_model_file')

    options, args = parser.parse_args()

    try:
        project_file = args[0]
        results_model_file = args[1]
    except:
        parser.print_help()
        sys.exit(1)

    f = open(results_model_file + '.results.html', 'w')

    project = yaml.load(open(project_file, 'r'))
    className = project['className']
    results_dir = project['resultsDirectory']

    if os.path.exists(results_dir):
        classifierType = None  # all types

        cr = ClassificationResults()
        print 'Loading all results...'
        cr.readResults(results_dir)

        accuracy, filename, params = cr.best(1, classifierType)[0]
        print "RESULT " + project_file + '\t' + str(accuracy) + '\t' + filename

        f.write('<h1>%s (%s)</h1>\nAccuracy: %s\n' %
                (className, project_file, accuracy))

        cm = ConfusionMatrix()
        cm.load(filename)
        f.write(cm.toHtml())

        filename = filename.replace('.result', '.param')

        trainSVMHistory(project_file, filename, results_model_file, className)
        shutil.copyfile(filename, results_model_file + '.param')

    else:
        print "RESULT " + "No results found for ", project_file, ": cannot build a model"
        f.write('<h1>%s (%s) </h1>\nResults not found\n' %
                (collection, project_file))
Пример #4
0
def update_parameters(project_file, c_values, gamma_values, preprocessing_values):
    """Update the project file with user-provided preferences

    Args:
        project_file: The file to be updated.
        c_values: C value to be updated.
        gamma_values: gamma value to be updated.
        preprocessing_values: preprocessing values to be updated.
    """
    with open(project_file, "r") as pfile:
        project = yaml.load(pfile)
    for pref in project['classifiers']['svm']:
        if c_values:
            pref['C'] = c_values
        if gamma_values:
            pref['gamma'] = gamma_values
        if preprocessing_values:
            pref['preprocessing'] = preprocessing_values
    with open(project_file, "w") as pfile:
        yaml.dump(project, pfile)
def select_best_model(project_dir):
    """Selects most accurate classifier parameters for the specified project.

    Args:
        project_file_path: Path to the project file in YAML format.

    Returns:
        Dictionary that contains information about best model for the dataset:
            - parameters: classifier parameters for selected model;
            - accuracy: accuracy of selected model;
            - confusion_matrix: simplified version of confusion matrix for
                selected model.
            - history_path: path to the history file generated using returned
                set of parameters for the best model.
    """
    with open(os.path.join(project_dir, PROJECT_FILE_NAME)) as project_file:
        project = yaml.load(project_file)

    classifierName = project["className"]
    results = ClassificationResults()
    results.readResults(project["resultsDirectory"])
    best_accuracy, best_result_file, best_params = results.best(1, None)[0]

    cm = ConfusionMatrix()
    cm.load(best_result_file)
    simplified_cm = {}
    for key, val in cm.matrix.items():
        simplified_cm[key] = {}
        for predicted_key, predicted_val in val.items():
            simplified_cm[key][predicted_key] = len(predicted_val)

    history_file_path = os.path.join(project_dir,
                                     "%s.history" % classifierName)
    train_svm_history(project, best_params, history_file_path)

    return {
        "parameters": best_params,
        "accuracy": round(best_accuracy, 2),
        "confusion_matrix": simplified_cm,
        "history_path": history_file_path,
    }
def select_best_model(project_dir):
    """Selects most accurate classifier parameters for the specified project.

    Args:
        project_file_path: Path to the project file in YAML format.

    Returns:
        Dictionary that contains information about best model for the dataset:
            - parameters: classifier parameters for selected model;
            - accuracy: accuracy of selected model;
            - confusion_matrix: simplified version of confusion matrix for
                selected model.
            - history_path: path to the history file generated using returned
                set of parameters for the best model.
    """
    with open(os.path.join(project_dir, PROJECT_FILE_NAME)) as project_file:
        project = yaml.load(project_file)

    classifierName = project["className"]
    results = ClassificationResults()
    results.readResults(project["resultsDirectory"])
    best_accuracy, best_result_file, best_params = results.best(1, None)[0]

    cm = ConfusionMatrix()
    cm.load(best_result_file)
    simplified_cm = {}
    for key, val in cm.matrix.items():
        simplified_cm[key] = {}
        for predicted_key, predicted_val in val.items():
            simplified_cm[key][predicted_key] = len(predicted_val)

    history_file_path = os.path.join(project_dir, "%s.history" % classifierName)
    train_svm_history(project, best_params, history_file_path)

    return {
        "parameters": best_params,
        "accuracy": round(best_accuracy, 2),
        "confusion_matrix": simplified_cm,
        "history_path": history_file_path,
    }
Пример #7
0
def retrainModel(project_file, params_file, output_file):
    project = yaml.load(open(project_file, 'r'))
    class_name = project['className']

    trainSVMHistory(project_file, params_file, output_file, class_name)
Пример #8
0
def generate_params_report(project_file, result_file):
    project = yaml.load(open(project_file, 'r'))
    project_file_dir = os.path.dirname(project_file)
    results_dir = project['resultsDirectory']

    tsv_file = os.path.join(project_file_dir, result_file)

    if os.path.exists(results_dir):
        classifier_type = None  # all types

        cr = ClassificationResults()
        print('Loading all results...')
        cr.read_results(results_dir)

        n_results = len(cr.results)
        results = cr.best(n_results, classifier_type)
        # results is a list of tuples sorted by accuracy with these fields:
        # (accuracy, std, norm_Accuracy, norm_std, results_file results)

        # use position as index and sort by norm_accuracy
        r = [(v[2], i) for i, v in enumerate(results, 1)]
        r = sorted(r, reverse=True)
        _, idx = zip(*r)

        fieldnames = [
            'accuracy_rank', 'accuracy', 'accuracy_std',
            'normalized_accuracy_rank', 'normalized_accuracy',
            'normalized_accuracy_std', 'results_file', 'param_file', 'kernel',
            'C', 'preprocessing', 'balance_classes', 'type', 'classifier',
            'gamma', 'evaluation'
        ]

        results = [{
            'accuracy_rank': k,
            'accuracy': accuracy,
            'accuracy_std': acc_std,
            'normalized_accuracy_rank': idx.index(k) + 1,
            'normalized_accuracy': norm_accuracy,
            'normalized_accuracy_std': norm_std,
            'results_file': results_file,
            'param_file': results_file.rstrip('results') + 'params',
            'kernel': results_param['model']['kernel'],
            'C': results_param['model']['C'],
            'preprocessing': results_param['model']['preprocessing'],
            'balance_classes': results_param['model']['balanceClasses'],
            'type': results_param['model']['type'],
            'classifier': results_param['model']['classifier'],
            'gamma': results_param['model']['gamma'],
            'evaluation': results_param['evaluation']
        } for k,
                   (accuracy, acc_std, norm_accuracy, norm_std, results_file,
                    results_param) in enumerate(results, 1)]

        with open(tsv_file, 'w') as csvfile:
            writer = csv.DictWriter(csvfile,
                                    fieldnames=fieldnames,
                                    delimiter='\t')
            writer.writeheader()

            writer.writerows(results)

    else:
        print('No results found for {} in its "resultsDirectory" ({}).\n'
              'If the project was moved you may want to manually update '
              'this field on the project file.'.format(project_file,
                                                       results_dir))