示例#1
0
 def fromJson(obj, db, cursor):
     conf = ProjectionConfFactory.getFactory().fromJson(obj['conf'])
     experiment = ProjectionExperiment(obj['project'], obj['dataset'], db,
                                       cursor)
     Experiment.expParamFromJson(experiment, obj)
     experiment.setConf(conf)
     return experiment
 def generateParser(parser, binary=True):
     Experiment.projectDatasetFeturesParser(parser)
     al_group = ActiveLearningConfiguration.generateActiveLearningArguments(
         parser)
     ActiveLearningConfiguration.generateSupervisedLearningArguments(
         parser, binary=binary)
     return al_group
示例#3
0
 def __init__(self, project, dataset, db, cursor, experiment_name=None):
     Experiment.__init__(self,
                         project,
                         dataset,
                         db,
                         cursor,
                         experiment_name=experiment_name)
     self.kind = 'Validation'
 def fromJson(obj, db, cursor):
     experiment = ClassificationExperiment(obj['project'], obj['dataset'],
                                           db, cursor)
     Experiment.expParamFromJson(experiment, obj)
     classification_conf = ClassifierConfFactory.getFactory().fromJson(
         obj['classification_conf'], experiment)
     experiment.setClassifierConf(classification_conf)
     return experiment
示例#5
0
 def initLabels(self, labels_filename=None, overwrite=True):
     if isinstance(self.conf, SemiSupervisedProjectionConfiguration):
         if labels_filename is None:
             message = 'Semi supervised projections require annotated instances. '
             message += 'labels_filename must be specified.'
             raise ValueError(message)
     Experiment.initLabels(self,
                           labels_filename=labels_filename,
                           overwrite=overwrite)
 def fromJson(obj, db, cursor):
     experiment = ActiveLearningExperiment(obj['project'], obj['dataset'], db, cursor)
     Experiment.expParamFromJson(experiment, obj)
     experiment.labeling_method  = obj['labeling_method']
     # Validation configuration
     experiment.validation_conf = None
     if obj['validation_conf'] is not None:
         experiment.validation_conf = TestConfiguration.fromJson(obj['validation_conf'], experiment)
     experiment.conf = ActiveLearningConfFactory.getFactory().fromJson(obj['conf'], experiment)
     return experiment
示例#7
0
 def loadTrueLabels(self):
     labels_file = dir_tools.getDatasetDirectory(self.project, self.dataset)
     labels_file += 'labels/true_labels.csv'
     # Loads the true labels in the table TrueLabels if the file exists
     # Otherwise the table TrueLabels is not created
     if not dir_tools.checkFileExists(labels_file):
         print >> sys.stderr, 'No ground truth labels for this dataset'
         return
     exp = Experiment(self.project,
                      self.dataset,
                      self.db,
                      self.cursor,
                      experiment_name='true_labels')
     exp.initLabels('true_labels.csv')
 def __init__(self,
              project,
              dataset,
              db,
              cursor,
              experiment_name=None,
              experiment_label=None,
              parent=None):
     Experiment.__init__(self,
                         project,
                         dataset,
                         db,
                         cursor,
                         experiment_name=experiment_name,
                         experiment_label=experiment_label,
                         parent=parent)
     self.kind = 'Classification'
示例#9
0
    def generateParser(parser):
        Experiment.projectDatasetFeturesParser(parser)

        parser.add_argument('--num-folds', type=int, default=4)
        parser.add_argument('--multilabel', action='store_true', default=False)
        sample_weight_help = 'When set to True, the detection model is learned with '
        sample_weight_help += 'sample weights inverse to the proportion of the family '
        sample_weight_help += 'in the dataset. Useless if the families are not specified.'
        parser.add_argument('--sample-weight',
                            action='store_true',
                            default=False,
                            help=sample_weight_help)

        ## Validation parameters
        validation_help = 'Validation parameters: \n '
        validation_help += 'The detection model is validated with a proportion of '
        validation_help += 'the instances in the input dataset, or with a separate validation'
        validation_help += ' dataset. By default 10% of the instances are used for validation'
        validation_group = parser.add_argument_group(validation_help)
        validation_group.add_argument('--test-size', type=float, default=0.1)
        validation_group.add_argument('--validation-dataset', default=None)

        ## Alerts
        alerts_group = parser.add_argument_group('Alerts parameters')
        alerts_group.add_argument(
            '--top-n-alerts',
            default=100,
            help='Number of most confident alerts displayed.')
        alerts_group.add_argument(
            '--detection-threshold',
            type=float,
            default=0.8,
            help=
            'An alert is raised if the predicted probability of maliciousness '
            + 'is above this threshold.')
        alerts_group.add_argument(
            '--clustering-algo',
            default='Kmeans',
            choices=['Kmeans', 'GaussianMixture'],
            help='Clustering algorithm to analyse the alerts.')
        alerts_group.add_argument(
            '--num-clusters',
            type=int,
            default=4,
            help='Number of clusters built from the alerts.')
示例#10
0
 def toJson(self):
     conf = Experiment.toJson(self)
     conf['__type__'] = 'ActiveLearningExperiment'
     conf['labeling_method'] = self.labeling_method
     if self.validation_conf is not None:
         conf['validation_conf'] = self.validation_conf.toJson()
     else:
         conf['validation_conf'] = None
     conf['conf'] = self.conf.toJson()
     return conf
示例#11
0
 def __init__(self,
              project,
              dataset,
              db,
              cursor,
              conf,
              experiment_name=None,
              experiment_label=None,
              parent=None):
     Experiment.__init__(self,
                         project,
                         dataset,
                         db,
                         cursor,
                         experiment_name=experiment_name,
                         experiment_label=experiment_label,
                         parent=parent)
     self.kind = 'Clustering'
     self.conf = conf
示例#12
0
    def generateParser(parser):
        Experiment.projectDatasetFeturesParser(parser)

        parser.add_argument(
            '--families-supervision',
            action='store_true',
            default=False,
            help=
            'When set to True, the semi-supervision is based on the families '
            +
            'instead of the binary labels. Useless if an unsupervised projection method is used.'
        )

        parser.add_argument(
            '--labels',
            '-l',
            dest='labels_file',
            default=None,
            help='CSV file containing the labels of some instances. ' +
            'These labels are used for semi-supervised projections.')

        parser.add_argument('--num-components', type=int, default=None)
示例#13
0
 def generateParser(parser):
     # Generic arguments
     Experiment.projectDatasetFeturesParser(parser)
     # Clustering arguments
     parser.add_argument('--num-clusters', type=int, default=4)
     label_help = 'The clustering is built from all the instances in the dataset, '
     label_help += 'or only from the benign or malicious ones. '
     label_help += 'By default, the clustering is built from all the instances. '
     label_help += 'The malicious and benign instances are selected according to '
     label_help += 'the ground truth labels stored in labels/true_labels.csv.'
     parser.add_argument('--label',
                         choices=['all', 'malicious', 'benign'],
                         default='all',
                         help=label_help)
     # Projection arguments
     projection_group = parser.add_argument_group('Projection parameters')
     projection_group.add_argument(
         '--projection-algo',
         choices=['Pca', 'Rca', 'Lda', 'Lmnn', 'Nca', 'Itml', None],
         default=None,
         help='Projection performed before building the clustering. ' +
         'By default the instances are not projected.')
     projection_group.add_argument(
         '--families-supervision',
         action='store_true',
         default=False,
         help=
         'When set to True, the semi-supervision is based on the families '
         +
         'instead of the binary labels. Useless if an unsupervised projection method is used.'
     )
     projection_group.add_argument(
         '--labels',
         '-l',
         dest='labels_file',
         default=None,
         help='CSV file containing the labels of some instances. ' +
         'These labels are used for semi-supervised projections.')
 def fromJson(obj, db, cursor):
     experiment = DescriptiveStatisticsExperiment(obj['project'],
                                                  obj['dataset'], db,
                                                  cursor)
     Experiment.expParamFromJson(experiment, obj)
     return experiment
示例#15
0
 def toJson(self):
     conf = Experiment.toJson(self)
     conf['__type__'] = 'ClassificationExperiment'
     conf['classification_conf'] = self.classification_conf.toJson()
     return conf
示例#16
0
 def __init__(self, project, dataset, db, cursor):
     Experiment.__init__(self, project, dataset, db, cursor)
     self.kind = 'ActiveLearning'
     self.labeling_method = None
示例#17
0
 def toJson(self):
     conf = Experiment.toJson(self)
     conf['__type__'] = 'ClusteringExperiment'
     conf['conf'] = self.conf.toJson()
     return conf
示例#18
0
 def fromJson(obj, db, cursor):
     conf = ClusteringConfFactory.getFactory().fromJson(obj['conf'])
     experiment = ClusteringExperiment(obj['project'], obj['dataset'], db,
                                       cursor, conf)
     Experiment.expParamFromJson(experiment, obj)
     return experiment
 def toJson(self):
     conf = Experiment.toJson(self)
     conf['__type__'] = 'DescriptiveStatisticsExperiment'
     return conf
示例#20
0
 def toJson(self):
     conf = Experiment.toJson(self)
     conf['__type__'] = 'ProjectionExperiment'
     conf['conf'] = self.conf.toJson()
     return conf
示例#21
0
 def fromJson(obj, db, cursor):
     experiment = ValidationExperiment(obj['project'], obj['dataset'], db,
                                       cursor)
     Experiment.expParamFromJson(experiment, obj)
     return experiment
示例#22
0
 def toJson(self):
     conf = Experiment.toJson(self)
     conf['__type__'] = 'ValidationExperiment'
     return conf
 def __init__(self, project, dataset, db, cursor):
     Experiment.__init__(self, project, dataset, db, cursor)
     self.kind = 'DescriptiveStatistics'