Exemplo n.º 1
def selectBestModel(project_file, results_model_file):
    f = open(results_model_file + '.results.html', 'w')

    project = yaml.load(open(project_file, 'r'))
    className = project['className']
    results_dir = project['resultsDirectory']

    if os.path.exists(results_dir):
        classifierType = None  # all types

        cr = ClassificationResults()
        print('Loading all results...')

        accuracy, filename, params = cr.best(1, classifierType)[0]
        print("RESULT " + project_file + '\t' + str(accuracy) + '\t' +

        f.write('<h1>%s (%s)</h1>\nAccuracy: %s\n' %
                (className, project_file, accuracy))

        cm = ConfusionMatrix()

        filename = filename.replace('.result', '.param')

        trainSVMHistory(project_file, filename, results_model_file, className)
        shutil.copyfile(filename, results_model_file + '.param')

        print("RESULT " + "No results found for ", project_file,
              ": cannot build a model")
        f.write('<h1>%s (%s) </h1>\nResults not found\n' %
                (collection, project_file))
Exemplo n.º 2
    def testAccuraciesNFold(self):
        cm = ConfusionMatrix()

        # Fold 0 with acc = 100%
        cm = self.populateFold(cm, 1, 0, 1, 0, fold=0)

        # Fold 1 with acc = 0%
        cm = self.populateFold(cm, 0, 1, 0, 1, fold=1)

        # Resulting accuracy should be the average
        self.assertEqual(cm.accuracy(), 50.0)
Exemplo n.º 3
    def testNormalizedAccuracy(self):
        cm = ConfusionMatrix()

        # Consider the following confussion matrix,
        #      A   B
        #  A | 3 | 1 |
        #  B | 6 | 2 |
        # raw accuracy is 5 / 12 ~= 41.67%
        # However, normalized accuracy divides each
        # class contrubion by the class population:
        # A accuracy: 3 / 4, B accuracy: 2 / 8
        # normalized accuracy is 50%
        cm = self.populateFold(cm, 3, 1, 2, 6, fold=0)

        self.assertEqual(cm.accuracy(), 100 * 5. / 12)
        self.assertEqual(cm.normalizedAccuracy(), 50.0)
Exemplo n.º 4
def selectBestModel():

    parser = OptionParser(
        usage='%prog [options] project_file results_model_file')

    options, args = parser.parse_args()

        project_file = args[0]
        results_model_file = args[1]

    f = open(results_model_file + '.results.html', 'w')

    project = yaml.load(open(project_file, 'r'))
    className = project['className']
    results_dir = project['resultsDirectory']

    if os.path.exists(results_dir):
        classifierType = None  # all types

        cr = ClassificationResults()
        print 'Loading all results...'

        accuracy, filename, params = cr.best(1, classifierType)[0]
        print "RESULT " + project_file + '\t' + str(accuracy) + '\t' + filename

        f.write('<h1>%s (%s)</h1>\nAccuracy: %s\n' %
                (className, project_file, accuracy))

        cm = ConfusionMatrix()

        filename = filename.replace('.result', '.param')

        trainSVMHistory(project_file, filename, results_model_file, className)
        shutil.copyfile(filename, results_model_file + '.param')

        print "RESULT " + "No results found for ", project_file, ": cannot build a model"
        f.write('<h1>%s (%s) </h1>\nResults not found\n' %
                (collection, project_file))
Exemplo n.º 5
    def readResults(self, dir):
        """Reads all the results file contained in the given directory, and generates the
        associated ConfusionMatrix for each one."""

        resultFiles = glob.glob(join(dir, '*.result'))
        progress = TextProgress(len(resultFiles))

        for i, filename in enumerate(resultFiles):
            cm = ConfusionMatrix()

            paramFile = splitext(filename)[0] + '.param'
            params = yaml.load(open(paramFile).read())

            self.results += [ (filename, cm, params) ]

Exemplo n.º 6
    def testStdNfold(self):
        cm = ConfusionMatrix()

        # Fold 0 with acc = 0%
        cm = self.populateFold(cm, 0, 1, 0, 1, fold=0)

        # Fold 1 with acc = 10%
        cm = self.populateFold(cm, 1, 9, 1, 9, fold=1)

        # Fold 2 with acc = 20%
        cm = self.populateFold(cm, 2, 8, 2, 8, fold=2)

        nfolds = 3.
        values = [-10. * -10., 0., 10. * 10.]  # each fold contribution to the
        # std after substractig the mean
        analitic_std = sqrt(sum(values) / nfolds)

        self.assertEqual(cm.stdNfold(), analitic_std)
def select_best_model(project_dir):
    """Selects most accurate classifier parameters for the specified project.

        project_file_path: Path to the project file in YAML format.

        Dictionary that contains information about best model for the dataset:
            - parameters: classifier parameters for selected model;
            - accuracy: accuracy of selected model;
            - confusion_matrix: simplified version of confusion matrix for
                selected model.
            - history_path: path to the history file generated using returned
                set of parameters for the best model.
    with open(os.path.join(project_dir, PROJECT_FILE_NAME)) as project_file:
        project = yaml.load(project_file)

    classifierName = project["className"]
    results = ClassificationResults()
    best_accuracy, best_result_file, best_params = results.best(1, None)[0]

    cm = ConfusionMatrix()
    simplified_cm = {}
    for key, val in cm.matrix.items():
        simplified_cm[key] = {}
        for predicted_key, predicted_val in val.items():
            simplified_cm[key][predicted_key] = len(predicted_val)

    history_file_path = os.path.join(project_dir,
                                     "%s.history" % classifierName)
    train_svm_history(project, best_params, history_file_path)

    return {
        "parameters": best_params,
        "accuracy": round(best_accuracy, 2),
        "confusion_matrix": simplified_cm,
        "history_path": history_file_path,
Exemplo n.º 8
    def testStdNfoldNormalizedAccuracies(self):
        # Same test considering normalized accuracies
        cm = ConfusionMatrix()

        # Fold 0 with normalized acc = 0%
        cm = self.populateFold(cm, 0, 1, 0, 1, fold=0)

        # Fold 1 with normalized acc = 10%
        cm = self.populateFold(cm, 4, 16, 0, 10, fold=1)

        # Fold 2 with normalized acc = 20%
        cm = self.populateFold(cm, 2, 8, 2, 8, fold=2)
        # resulting accuracy should be the average
        nfolds = 3.
        values = [-10. * -10., 0., 10. * 10.]  # each fold contribution to the
        # std after substractig the mean
        analitic_std = sqrt(sum(values) / nfolds)

        self.assertEqual(cm.stdNfold(normalizedAccuracies=True), analitic_std)

        # Also make sure that this test does not work without the normalization
Exemplo n.º 9
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation (FSF), either version 3 of the License, or (at your
# option) any later version.
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
# You should have received a copy of the Affero GNU General Public License     
# version 3 along with this program. If not, see http://www.gnu.org/licenses/

from __future__ import print_function
import sys
from gaia2.classification import ConfusionMatrix

    results = sys.argv[1]
    output_html = sys.argv[2]
    print('Usage: %s <results_file> <confusion_matrix_html_file>' % sys.argv[0])

cm = ConfusionMatrix()
open(output_html, 'w').write(cm.toHtml())