Пример #1
0
 def runModel(self, kind, conf):
     self.setDatasets(conf)
     # Create the experiment
     exp = self.iteration.experiment
     name = 'AL' + str(exp.experiment_id) + '-Iter' + str(
         self.iteration.iteration_number) + '-' + kind
     model_exp = ClassificationExperiment(
         exp.project,
         exp.dataset,
         exp.db,
         exp.cursor,
         experiment_name=name,
         experiment_label=exp.experiment_label,
         parent=exp.experiment_id)
     model_exp.setFeaturesFilenames(exp.features_filenames)
     model_exp.setClassifierConf(conf)
     model_exp.createExperiment()
     model_exp.export()
     # Build the model
     model = conf.model_class(model_exp, self.datasets, cv_monitoring=True)
     model.run()
     self.models[kind] = model
     # Execution time monitoring
     time = model.training_execution_time + model.testing_execution_time
     self.times[kind] = time
Пример #2
0
 def createMulticlassExperiment(self):
     exp = self.supervised_exp
     name = exp.experiment_name + '_alertsMulticlassClassifier'
     multiclass_exp = ClassificationExperiment(
         exp.project,
         exp.dataset,
         exp.db,
         exp.cursor,
         experiment_name=name,
         experiment_label=exp.experiment_label,
         parent=exp.experiment_id)
     multiclass_exp.setFeaturesFilenames(exp.features_filenames)
     params = {}
     params['num_folds'] = exp.classification_conf.num_folds
     params['sample_weight'] = False
     params['families_supervision'] = True
     params['optim_algo'] = 'sag'
     params['alerts_conf'] = None
     test_conf = TestConfiguration()
     test_conf.setUnlabeled(labels_annotations='annotations')
     params['test_conf'] = test_conf
     conf = ClassifierConfFactory.getFactory().fromParam(
         'LogisticRegression', params)
     multiclass_exp.setClassifierConf(conf)
     multiclass_exp.createExperiment()
     multiclass_exp.export()
     return multiclass_exp
Пример #3
0
 def runNaiveBayes(self):
     # Create an experiment for the naive Bayes model
     exp = self.iteration.experiment
     name = '-'.join([
         'AL' + str(exp.experiment_id),
         'Iter' + str(self.iteration.iteration_number), 'all', 'NaiveBayes'
     ])
     naive_bayes_exp = ClassificationExperiment(
         exp.project,
         exp.dataset,
         exp.db,
         exp.cursor,
         experiment_name=name,
         experiment_label=exp.experiment_label,
         parent=exp.experiment_id)
     naive_bayes_exp.setFeaturesFilenames(exp.features_filenames)
     test_conf = TestConfiguration()
     test_conf.setUnlabeled(labels_annotations='annotations')
     naive_bayes_conf = GaussianNaiveBayesConfiguration(
         exp.conf.models_conf['multiclass'].num_folds, False, True,
         test_conf)
     naive_bayes_exp.setClassifierConf(naive_bayes_conf)
     naive_bayes_exp.createExperiment()
     naive_bayes_exp.export()
     # Update training data - the naive Bayes classifier is trained on all the data
     self.datasets.test_instances.families = list(self.lr_predicted_labels)
     all_datasets = ClassifierDatasets(naive_bayes_exp,
                                       naive_bayes_exp.classification_conf)
     train_instances = Instances()
     train_instances.union(self.datasets.train_instances,
                           self.datasets.test_instances)
     all_datasets.train_instances = train_instances
     all_datasets.test_instances = None
     all_datasets.setSampleWeights()
     self.evalClusteringPerf(all_datasets.train_instances)
     # Train the naive Bayes detection model and predict
     self.naive_bayes = GaussianNaiveBayes(naive_bayes_exp, all_datasets)
     self.naive_bayes.training()
     self.nb_time = self.naive_bayes.training_execution_time
     self.datasets.test_instances.families = [
         None
     ] * self.datasets.test_instances.numInstances()
     self.nb_predicted_log_proba = self.naive_bayes.pipeline.predict_log_proba(
         self.datasets.test_instances.getFeatures())
     start_time = time.time()
     self.nb_predicted_labels = self.naive_bayes.pipeline.predict(
         self.datasets.test_instances.getFeatures())
     self.nb_time += time.time() - start_time
     self.nb_class_labels = self.naive_bayes.class_labels
Пример #4
0
 def createMulticlassExperiment(self):
     conf = self.rare_category_detection_conf.classification_conf
     exp  = self.iteration.experiment
     name = '-'.join(['AL' + str(exp.experiment_id),
                      'Iter' + str(self.iteration.iteration_number),
                      self.label,
                      'analysis'])
     multiclass_exp = ClassificationExperiment(exp.project, exp.dataset, exp.session,
                                               experiment_name = name,
                                               labels_id = exp.labels_id,
                                               parent = exp.experiment_id)
     multiclass_exp.setFeaturesFilenames(exp.features_filenames)
     multiclass_exp.setClassifierConf(conf)
     multiclass_exp.createExperiment()
     multiclass_exp.export()
     return multiclass_exp
 def createNaiveBayesConf(self):
     exp = self.experiment
     name = '-'.join(['AL' + str(exp.experiment_id),
         'Iter' + str(self.iteration.iteration_number),
         'all',
         'NaiveBayes'])
     naive_bayes_exp = ClassificationExperiment(exp.project, exp.dataset, exp.session,
                                                experiment_name = name,
                                                labels_id = exp.labels_id,
                                                parent = exp.experiment_id)
     naive_bayes_exp.setFeaturesFilenames(exp.features_filenames)
     test_conf = TestConfiguration()
     test_conf.setUnlabeled(labels_annotations = 'annotations')
     naive_bayes_conf = GaussianNaiveBayesConfiguration(exp.conf.models_conf['multiclass'].num_folds, False, True, test_conf)
     naive_bayes_exp.setClassifierConf(naive_bayes_conf)
     naive_bayes_exp.createExperiment()
     naive_bayes_exp.export()
     return naive_bayes_conf
Пример #6
0
class RareCategoryDetectionAnnotationQueriesExp(
        RareCategoryDetectionAnnotationQueries):
    def __init__(self,
                 iteration,
                 label,
                 proba_min,
                 proba_max,
                 multiclass_model=None,
                 multiclass_exp=None):
        RareCategoryDetectionAnnotationQueries.__init__(self,
                                                        iteration,
                                                        label,
                                                        proba_min,
                                                        proba_max,
                                                        multiclass_model=None)
        self.multiclass_exp = multiclass_exp
        self.experiment = iteration.experiment

    def getMulticlassConf(self):
        conf = self.rare_category_detection_conf.classification_conf
        exp = self.experiment
        name = '-'.join([
            'AL' + str(exp.experiment_id),
            'Iter' + str(self.iteration.iteration_number), self.label,
            'analysis'
        ])
        self.multiclass_exp = ClassificationExperiment(
            exp.project,
            exp.dataset,
            exp.session,
            experiment_name=name,
            labels_id=exp.labels_id,
            parent=exp.experiment_id)
        self.multiclass_exp.setFeaturesFilenames(exp.features_filenames)
        self.multiclass_exp.setClassifierConf(conf)
        self.multiclass_exp.createExperiment()
        self.multiclass_exp.export()
        return conf

    def createClusteringExperiment(self):
        conf = ClusteringConfiguration(self.categories.numCategories())
        exp = self.experiment
        name = '-'.join([
            'AL' + str(exp.experiment_id),
            'Iter' + str(self.iteration.iteration_number), self.label,
            'clustering'
        ])
        clustering_exp = ClusteringExperiment(exp.project,
                                              exp.dataset,
                                              exp.session,
                                              conf,
                                              labels_id=exp.labels_id,
                                              experiment_name=name,
                                              parent=exp.experiment_id)
        clustering_exp.setFeaturesFilenames(exp.features_filenames)
        clustering_exp.createExperiment()
        clustering_exp.export()
        return clustering_exp

    def generateClusteringVisualization(self):
        if self.families_analysis:
            self.clustering_exp = self.createClusteringExperiment()
            clustering = Clustering(self.categories.instances,
                                    self.categories.assigned_categories)
            clustering.generateClustering(None, None)
            clustering.export(self.clustering_exp.getOutputDirectory())
        else:
            self.clustering_exp = None

    def setCategories(self, all_instances, assigned_categories,
                      predicted_proba):
        self.categories = CategoriesExp(self.multiclass_exp, self.iteration,
                                        all_instances, assigned_categories,
                                        predicted_proba, self.label,
                                        self.multiclass_model.class_labels)