def fromJson(obj, db, cursor): conf = ProjectionConfFactory.getFactory().fromJson(obj['conf']) experiment = ProjectionExperiment(obj['project'], obj['dataset'], db, cursor) Experiment.expParamFromJson(experiment, obj) experiment.setConf(conf) return experiment
def generateParser(parser, binary=True): Experiment.projectDatasetFeturesParser(parser) al_group = ActiveLearningConfiguration.generateActiveLearningArguments( parser) ActiveLearningConfiguration.generateSupervisedLearningArguments( parser, binary=binary) return al_group
def __init__(self, project, dataset, db, cursor, experiment_name=None): Experiment.__init__(self, project, dataset, db, cursor, experiment_name=experiment_name) self.kind = 'Validation'
def fromJson(obj, db, cursor): experiment = ClassificationExperiment(obj['project'], obj['dataset'], db, cursor) Experiment.expParamFromJson(experiment, obj) classification_conf = ClassifierConfFactory.getFactory().fromJson( obj['classification_conf'], experiment) experiment.setClassifierConf(classification_conf) return experiment
def initLabels(self, labels_filename=None, overwrite=True): if isinstance(self.conf, SemiSupervisedProjectionConfiguration): if labels_filename is None: message = 'Semi supervised projections require annotated instances. ' message += 'labels_filename must be specified.' raise ValueError(message) Experiment.initLabels(self, labels_filename=labels_filename, overwrite=overwrite)
def fromJson(obj, db, cursor): experiment = ActiveLearningExperiment(obj['project'], obj['dataset'], db, cursor) Experiment.expParamFromJson(experiment, obj) experiment.labeling_method = obj['labeling_method'] # Validation configuration experiment.validation_conf = None if obj['validation_conf'] is not None: experiment.validation_conf = TestConfiguration.fromJson(obj['validation_conf'], experiment) experiment.conf = ActiveLearningConfFactory.getFactory().fromJson(obj['conf'], experiment) return experiment
def loadTrueLabels(self): labels_file = dir_tools.getDatasetDirectory(self.project, self.dataset) labels_file += 'labels/true_labels.csv' # Loads the true labels in the table TrueLabels if the file exists # Otherwise the table TrueLabels is not created if not dir_tools.checkFileExists(labels_file): print >> sys.stderr, 'No ground truth labels for this dataset' return exp = Experiment(self.project, self.dataset, self.db, self.cursor, experiment_name='true_labels') exp.initLabels('true_labels.csv')
def __init__(self, project, dataset, db, cursor, experiment_name=None, experiment_label=None, parent=None): Experiment.__init__(self, project, dataset, db, cursor, experiment_name=experiment_name, experiment_label=experiment_label, parent=parent) self.kind = 'Classification'
def generateParser(parser): Experiment.projectDatasetFeturesParser(parser) parser.add_argument('--num-folds', type=int, default=4) parser.add_argument('--multilabel', action='store_true', default=False) sample_weight_help = 'When set to True, the detection model is learned with ' sample_weight_help += 'sample weights inverse to the proportion of the family ' sample_weight_help += 'in the dataset. Useless if the families are not specified.' parser.add_argument('--sample-weight', action='store_true', default=False, help=sample_weight_help) ## Validation parameters validation_help = 'Validation parameters: \n ' validation_help += 'The detection model is validated with a proportion of ' validation_help += 'the instances in the input dataset, or with a separate validation' validation_help += ' dataset. By default 10% of the instances are used for validation' validation_group = parser.add_argument_group(validation_help) validation_group.add_argument('--test-size', type=float, default=0.1) validation_group.add_argument('--validation-dataset', default=None) ## Alerts alerts_group = parser.add_argument_group('Alerts parameters') alerts_group.add_argument( '--top-n-alerts', default=100, help='Number of most confident alerts displayed.') alerts_group.add_argument( '--detection-threshold', type=float, default=0.8, help= 'An alert is raised if the predicted probability of maliciousness ' + 'is above this threshold.') alerts_group.add_argument( '--clustering-algo', default='Kmeans', choices=['Kmeans', 'GaussianMixture'], help='Clustering algorithm to analyse the alerts.') alerts_group.add_argument( '--num-clusters', type=int, default=4, help='Number of clusters built from the alerts.')
def toJson(self): conf = Experiment.toJson(self) conf['__type__'] = 'ActiveLearningExperiment' conf['labeling_method'] = self.labeling_method if self.validation_conf is not None: conf['validation_conf'] = self.validation_conf.toJson() else: conf['validation_conf'] = None conf['conf'] = self.conf.toJson() return conf
def __init__(self, project, dataset, db, cursor, conf, experiment_name=None, experiment_label=None, parent=None): Experiment.__init__(self, project, dataset, db, cursor, experiment_name=experiment_name, experiment_label=experiment_label, parent=parent) self.kind = 'Clustering' self.conf = conf
def generateParser(parser): Experiment.projectDatasetFeturesParser(parser) parser.add_argument( '--families-supervision', action='store_true', default=False, help= 'When set to True, the semi-supervision is based on the families ' + 'instead of the binary labels. Useless if an unsupervised projection method is used.' ) parser.add_argument( '--labels', '-l', dest='labels_file', default=None, help='CSV file containing the labels of some instances. ' + 'These labels are used for semi-supervised projections.') parser.add_argument('--num-components', type=int, default=None)
def generateParser(parser): # Generic arguments Experiment.projectDatasetFeturesParser(parser) # Clustering arguments parser.add_argument('--num-clusters', type=int, default=4) label_help = 'The clustering is built from all the instances in the dataset, ' label_help += 'or only from the benign or malicious ones. ' label_help += 'By default, the clustering is built from all the instances. ' label_help += 'The malicious and benign instances are selected according to ' label_help += 'the ground truth labels stored in labels/true_labels.csv.' parser.add_argument('--label', choices=['all', 'malicious', 'benign'], default='all', help=label_help) # Projection arguments projection_group = parser.add_argument_group('Projection parameters') projection_group.add_argument( '--projection-algo', choices=['Pca', 'Rca', 'Lda', 'Lmnn', 'Nca', 'Itml', None], default=None, help='Projection performed before building the clustering. ' + 'By default the instances are not projected.') projection_group.add_argument( '--families-supervision', action='store_true', default=False, help= 'When set to True, the semi-supervision is based on the families ' + 'instead of the binary labels. Useless if an unsupervised projection method is used.' ) projection_group.add_argument( '--labels', '-l', dest='labels_file', default=None, help='CSV file containing the labels of some instances. ' + 'These labels are used for semi-supervised projections.')
def fromJson(obj, db, cursor): experiment = DescriptiveStatisticsExperiment(obj['project'], obj['dataset'], db, cursor) Experiment.expParamFromJson(experiment, obj) return experiment
def toJson(self): conf = Experiment.toJson(self) conf['__type__'] = 'ClassificationExperiment' conf['classification_conf'] = self.classification_conf.toJson() return conf
def __init__(self, project, dataset, db, cursor): Experiment.__init__(self, project, dataset, db, cursor) self.kind = 'ActiveLearning' self.labeling_method = None
def toJson(self): conf = Experiment.toJson(self) conf['__type__'] = 'ClusteringExperiment' conf['conf'] = self.conf.toJson() return conf
def fromJson(obj, db, cursor): conf = ClusteringConfFactory.getFactory().fromJson(obj['conf']) experiment = ClusteringExperiment(obj['project'], obj['dataset'], db, cursor, conf) Experiment.expParamFromJson(experiment, obj) return experiment
def toJson(self): conf = Experiment.toJson(self) conf['__type__'] = 'DescriptiveStatisticsExperiment' return conf
def toJson(self): conf = Experiment.toJson(self) conf['__type__'] = 'ProjectionExperiment' conf['conf'] = self.conf.toJson() return conf
def fromJson(obj, db, cursor): experiment = ValidationExperiment(obj['project'], obj['dataset'], db, cursor) Experiment.expParamFromJson(experiment, obj) return experiment
def toJson(self): conf = Experiment.toJson(self) conf['__type__'] = 'ValidationExperiment' return conf
def __init__(self, project, dataset, db, cursor): Experiment.__init__(self, project, dataset, db, cursor) self.kind = 'DescriptiveStatistics'