Exemplo n.º 1
0
 def __init__(self, iteration):
     QueryStrategy.__init__(self, iteration)
     num_annotations = self.iteration.experiment.conf.batch
     proba_min = None
     proba_max = None
     self.annotations = UncertainAnnotationQueries(self.iteration,
                                                   num_annotations,
                                                   proba_min, proba_max)
Exemplo n.º 2
0
 def __init__(self, iteration):
     QueryStrategy.__init__(self, iteration)
     eps = self.iteration.conf.eps
     self.uncertain = UncertainAnnotationQueries(
         self.iteration, self.iteration.conf.num_uncertain, 0, 1)
     self.malicious = RareCategoryDetectionAnnotationQueriesExp(
         self.iteration, 'malicious', 1 - eps, 1)
     self.benign = RareCategoryDetectionAnnotationQueriesExp(
         self.iteration, 'benign', 0, eps)
Exemplo n.º 3
0
class IlabExp(Ilab):
    def __init__(self, iteration):
        QueryStrategy.__init__(self, iteration)
        eps = self.iteration.conf.eps
        self.uncertain = UncertainAnnotationQueries(
            self.iteration, self.iteration.conf.num_uncertain, 0, 1)
        self.malicious = RareCategoryDetectionAnnotationQueriesExp(
            self.iteration, 'malicious', 1 - eps, 1)
        self.benign = RareCategoryDetectionAnnotationQueriesExp(
            self.iteration, 'benign', 0, eps)

    def generateAnnotationQueries(self):
        self.generate_queries_time = 0
        self.uncertain.run()
        self.generate_queries_time += self.uncertain.generate_queries_time
        self.exportAnnotationsTypes(malicious=False, benign=False)
        uncertain_queries = self.uncertain.getInstanceIds()
        self.malicious.run(already_queried=uncertain_queries)
        self.generate_queries_time += self.malicious.generate_queries_time
        self.exportAnnotationsTypes(malicious=True, benign=False)
        self.benign.run(already_queried=uncertain_queries)
        self.generate_queries_time += self.benign.generate_queries_time
        self.exportAnnotationsTypes()
        self.globalClusteringEvaluation()

    def exportAnnotationsTypes(self, malicious=True, benign=True):
        types = {
            'uncertain': {
                'type': 'individual',
                'clustering_exp': None
            },
            'malicious': None,
            'benign': None
        }
        if malicious:
            types['malicious'] = {}
            types['malicious']['type'] = self.malicious.annotations_type
            clustering_exp = self.malicious.clustering_exp
            if clustering_exp is not None:
                types['malicious'][
                    'clustering_exp'] = clustering_exp.experiment_id
            else:
                types['malicious']['clustering_exp'] = None
        if benign:
            types['benign'] = {}
            types['benign']['type'] = self.benign.annotations_type
            clustering_exp = self.benign.clustering_exp
            if clustering_exp is not None:
                types['benign'][
                    'clustering_exp'] = clustering_exp.experiment_id
            else:
                types['benign']['clustering_exp'] = None
        filename = self.iteration.iteration_dir
        filename += 'annotations_types.json'
        with open(filename, 'w') as f:
            json.dump(types, f, indent=2)
Exemplo n.º 4
0
class UncertaintySampling(QueryStrategy):
    def __init__(self, iteration):
        QueryStrategy.__init__(self, iteration)
        num_annotations = self.iteration.experiment.conf.batch
        proba_min = None
        proba_max = None
        self.annotations = UncertainAnnotationQueries(self.iteration,
                                                      num_annotations,
                                                      proba_min, proba_max)

    def generateAnnotationQueries(self):
        self.annotations.run()
        self.generate_queries_time = self.annotations.generate_queries_time

    def annotateAuto(self):
        self.annotations.annotateAuto()

    def getManualAnnotations(self):
        self.annotations.getManualAnnotations()

    ###############################
    ## Execution time monitoring ##
    ###############################

    def executionTimeHeader(self):
        header = ['binary_model']
        header += QueryStrategy.executionTimeHeader(self)
        return header

    def executionTimeMonitoring(self):
        line = [self.iteration.train_test_validation.times['binary']]
        line += QueryStrategy.executionTimeMonitoring(self)
        return line

    def executionTimeDisplay(self):
        binary_model = PlotDataset(None, 'Binary model')
        return [binary_model] + QueryStrategy.executionTimeDisplay(self)
Exemplo n.º 5
0
class Ilab(QueryStrategy):
    def __init__(self, iteration):
        QueryStrategy.__init__(self, iteration)
        conf = iteration.experiment.conf
        eps = conf.eps
        self.uncertain = UncertainAnnotationQueries(self.iteration,
                                                    conf.num_uncertain, 0, 1)
        self.malicious = RareCategoryDetectionAnnotationQueries(
            self.iteration, 'malicious', 1 - eps, 1)
        self.benign = RareCategoryDetectionAnnotationQueries(
            self.iteration, 'benign', 0, eps)

    def generateAnnotationQueries(self):
        self.generate_queries_time = 0
        self.uncertain.run()
        self.generate_queries_time += self.uncertain.generate_queries_time
        self.exportAnnotationsTypes(malicious=False, benign=False)
        uncertain_queries = self.uncertain.getInstanceIds()
        self.malicious.run(already_queried=uncertain_queries)
        self.generate_queries_time += self.malicious.generate_queries_time
        self.exportAnnotationsTypes(malicious=True, benign=False)
        self.benign.run(already_queried=uncertain_queries)
        self.generate_queries_time += self.benign.generate_queries_time
        self.exportAnnotationsTypes()
        self.globalClusteringEvaluation()

    def annotateAuto(self):
        self.uncertain.annotateAuto()
        self.malicious.annotateAuto()
        self.benign.annotateAuto()

    def getManualAnnotations(self):
        self.uncertain.getManualAnnotations()
        self.malicious.getManualAnnotations()
        self.benign.getManualAnnotations()

    def getClusteringsEvaluations(self):
        clusterings = {}
        clusterings['all'] = self.global_clustering_perf
        clusterings['malicious'] = None
        clusterings['benign'] = None
        return clusterings

    def globalClusteringEvaluation(self):
        clusters = []
        true_families = []
        if self.malicious.categories is not None:
            clusters += list(self.malicious.categories.assigned_categories)
            true_families += self.malicious.categories.instances.getFamilies(
                true_labels=True)
        if self.benign.categories is not None:
            max_clusters = 0
            if len(clusters) > 0:
                max_clusters = max(clusters)
            clusters += [
                x + max_clusters + 1
                for x in list(self.benign.categories.assigned_categories)
            ]
            true_families += self.benign.categories.instances.getFamilies(
                true_labels=True)
        if len(clusters) > 0:
            self.global_clustering_perf = PerformanceIndicators()
            self.global_clustering_perf.generateEvaluation(
                clusters, true_families)
        else:
            self.global_clustering_perf = None

    ###############################
    ## Execution time monitoring ##
    ###############################

    def executionTimeHeader(self):
        header = ['binary_model', 'malicious_clustering', 'benign_clustering']
        header += QueryStrategy.executionTimeHeader(self)
        return header

    def executionTimeMonitoring(self):
        line = [self.iteration.train_test_validation.times['binary']]
        line += [self.malicious.analysis_time, self.benign.analysis_time]
        line += QueryStrategy.executionTimeMonitoring(self)
        return line

    def executionTimeDisplay(self):
        binary_model = PlotDataset(None, 'Binary model')
        malicious = PlotDataset(None, 'Malicious Analysis')
        malicious.setLinestyle('dotted')
        malicious.setColor(colors_tools.getLabelColor('malicious'))
        benign = PlotDataset(None, 'Benign Analysis')
        benign.setLinestyle('dashed')
        benign.setColor(colors_tools.getLabelColor('benign'))
        return [binary_model, malicious, benign
                ] + QueryStrategy.executionTimeDisplay(self)

    def exportAnnotationsTypes(self, malicious=True, benign=True):
        types = {'uncertain': 'individual', 'malicious': None, 'benign': None}
        if malicious:
            types['malicious'] = self.malicious.annotations_type
        if benign:
            types['benign'] = self.benign.annotations_type
        filename = self.iteration.output_directory
        filename += 'annotations_types.json'
        with open(filename, 'w') as f:
            json.dump(types, f, indent=2)
Exemplo n.º 6
0
class Ilab(QueryStrategy):
    def __init__(self, iteration):
        QueryStrategy.__init__(self, iteration)
        eps = self.iteration.conf.eps
        self.uncertain = UncertainAnnotationQueries(
            self.iteration, self.iteration.conf.num_uncertain, 0, 1)
        self.malicious = RareCategoryDetectionAnnotationQueries(
            self.iteration, 'malicious', 1 - eps, 1)
        self.benign = RareCategoryDetectionAnnotationQueries(
            self.iteration, 'benign', 0, eps)

    def generateAnnotationQueries(self):
        self.generate_queries_time = 0
        self.uncertain.run()
        self.generate_queries_time += self.uncertain.generate_queries_time
        uncertain_queries = self.uncertain.getInstanceIds()
        self.malicious.run(already_queried=uncertain_queries)
        self.generate_queries_time += self.malicious.generate_queries_time
        self.benign.run(already_queried=uncertain_queries)
        self.generate_queries_time += self.benign.generate_queries_time
        self.globalClusteringEvaluation()

    def annotateAuto(self):
        self.uncertain.annotateAuto()
        self.malicious.annotateAuto()
        self.benign.annotateAuto()

    def getManualAnnotations(self):
        self.uncertain.getManualAnnotations()
        self.malicious.getManualAnnotations()
        self.benign.getManualAnnotations()

    def getClusteringsEvaluations(self):
        clusterings = {}
        clusterings['all'] = self.global_clustering_perf
        clusterings['malicious'] = None
        clusterings['benign'] = None
        return clusterings

    def globalClusteringEvaluation(self):
        clusters = []
        true_families = []
        if self.malicious.categories is not None:
            clusters += list(self.malicious.categories.assigned_categories)
            true_families += self.malicious.categories.instances.getFamilies(
                true_labels=True)
        if self.benign.categories is not None:
            max_clusters = 0
            if len(clusters) > 0:
                max_clusters = max(clusters)
            clusters += [
                x + max_clusters + 1
                for x in list(self.benign.categories.assigned_categories)
            ]
            true_families += self.benign.categories.instances.getFamilies(
                true_labels=True)
        if len(clusters) > 0:
            self.global_clustering_perf = PerformanceIndicators()
            self.global_clustering_perf.generateEvaluation(
                clusters, true_families)
        else:
            self.global_clustering_perf = None

    def checkAnnotationQueriesAnswered(self):
        answered = self.uncertain.checkAnnotationQueriesAnswered()
        if answered:
            answered = self.malicious.checkAnnotationQueriesAnswered()
            if answered:
                return self.benign.checkAnnotationQueriesAnswered()
            else:
                return False
        else:
            return False

    ###############################
    ## Execution time monitoring ##
    ###############################

    def executionTimeHeader(self):
        header = ['malicious_queries', 'uncertain_queries', 'benign_queries']
        return header

    def executionTimeMonitoring(self):
        line = [
            self.malicious.analysis_time + self.malicious.generate_queries_time
        ]
        line += [
            self.iteration.update_model.times['binary'] +
            self.uncertain.generate_queries_time
        ]
        line += [self.benign.analysis_time + self.benign.generate_queries_time]
        return line

    def executionTimeDisplay(self):
        uncertain = PlotDataset(None, 'Uncertain Queries')
        malicious = PlotDataset(None, 'Malicious Queries')
        malicious.setLinestyle('dotted')
        malicious.setColor(colors_tools.getLabelColor('malicious'))
        benign = PlotDataset(None, 'Benign Queries')
        benign.setLinestyle('dashed')
        benign.setColor(colors_tools.getLabelColor('benign'))
        return [malicious, uncertain, benign]