示例#1
0
 def get_naive_bayes_conf(self):
     name = '-'.join([
         'AL%d' % self.exp.exp_id,
         'Iter%d' % self.iteration.iter_num, 'all', 'NaiveBayes'
     ])
     classifier_conf = self.exp.exp_conf.core_conf.classifier_conf
     optim_conf = classifier_conf.hyperparam_conf.optim_conf
     multiclass = True
     factory = classifiers.get_factory()
     naive_bayes_conf = factory.get_default('GaussianNaiveBayes',
                                            optim_conf.num_folds,
                                            optim_conf.n_jobs, multiclass,
                                            self.exp.logger)
     test_conf = UnlabeledLabeledConf(self.exp.logger)
     classification_conf = ClassificationConf(naive_bayes_conf, test_conf,
                                              self.exp.logger)
     features_conf = FeaturesConf(
         self.exp.exp_conf.features_conf.input_features,
         self.exp.exp_conf.features_conf.sparse,
         self.exp.exp_conf.features_conf.logger,
         filter_in_f=self.exp.exp_conf.features_conf.filter_in_f,
         filter_out_f=self.exp.exp_conf.features_conf.filter_out_f)
     exp_conf = DiademConf(self.exp.exp_conf.secuml_conf,
                           self.exp.exp_conf.dataset_conf,
                           features_conf,
                           self.exp.exp_conf.annotations_conf,
                           classification_conf,
                           None,
                           name=name,
                           parent=self.exp.exp_id)
     DiademExp(exp_conf, session=self.exp.session)
     return naive_bayes_conf
示例#2
0
 def gen_parser():
     parser = argparse.ArgumentParser(
         description='Learn a detection model. '
                     'The ground-truth must be stored in '
                     'annotations/ground_truth.csv.')
     ExpConf.gen_parser(parser)
     ClassificationConf.gen_parser(parser)
     factory = classifiers.get_factory()
     models = factory.get_methods()
     models.remove('AlreadyTrained')
     subparsers = parser.add_subparsers(dest='model_class')
     subparsers.required = True
     for model in models:
         model_parser = subparsers.add_parser(model)
         factory.gen_parser(model, model_parser)
         classifier_type = get_classifier_type(factory.get_class(model))
         if classifier_type in [ClassifierType.supervised,
                                ClassifierType.semisupervised]:
             AnnotationsConf.gen_parser(
                         model_parser, required=False,
                         message='CSV file containing the annotations of '
                                 'some or all the instances.')
     # Add subparser for already trained model
     already_trained = subparsers.add_parser('AlreadyTrained')
     factory.gen_parser('AlreadyTrained', already_trained)
     return parser
示例#3
0
 def gen_parser(parser):
     alerts_group = parser.add_argument_group('Alerts parameters')
     alerts_group.add_argument(
         '--detection-threshold',
         type=float,
         default=0.5,
         help='An alert is triggered if the predicted probability of '
              'maliciousness is above this threshold. '
              'Default: 0.5.')
     group = alerts_group.add_mutually_exclusive_group(required=False)
     models = classifiers.get_factory().get_methods(
                                                 ClassifierType.supervised)
     group.add_argument('--alerts-classif',
                        default=None,
                        choices=models,
                        help='Supervised model trained to cluster the '
                             'alerts according to the malicious families '
                             'defined in the training dataset. '
                             'Default: None.')
     group.add_argument('--alerts-clustering',
                        default=None,
                        choices=cluster_conf.get_factory().get_methods(),
                        help='Clustering algorithm to analyze the alerts. '
                             'Default: None.')
     alerts_group.add_argument(
              '--num-alerts-clusters',
              type=int,
              default=4,
              help='Number of clusters built from the alerts. '
                   'Default: 4.')
示例#4
0
 def _create_naive_bayes_conf(self):
     name = '-'.join([
         'AL%d' % (self.exp.exp_id),
         'Iter%d' % (self.iteration.iter_num), 'all', 'NaiveBayes'
     ])
     multiclass_model = self.exp.exp_conf.core_conf.multiclass_model
     classifier_conf = multiclass_model.classifier_conf
     optim_conf = classifier_conf.hyperparam_conf.optim_conf
     multiclass = True
     factory = classifiers.get_factory()
     naive_bayes_conf = factory.get_default('GaussianNaiveBayes',
                                            optim_conf.num_folds,
                                            optim_conf.n_jobs, multiclass,
                                            self.exp.logger)
     test_conf = UnlabeledLabeledConf(self.exp.logger)
     classif_conf = ClassificationConf(naive_bayes_conf, test_conf,
                                       self.exp.logger)
     DiademConf(self.exp.exp_conf.secuml_conf,
                self.exp.exp_conf.dataset_conf,
                self.exp.exp_conf.features_conf,
                self.exp.exp_conf.annotations_conf,
                classif_conf,
                None,
                name=name,
                parent=self.exp.exp_id)
     return naive_bayes_conf
示例#5
0
def _rcd_conf(args, logger):
    factory = classifiers.get_factory()
    classifier_conf = factory.get_default('LogisticRegression', None, None,
                                          True, logger)
    classif_conf = ClassificationConf(classifier_conf,
                                      UnlabeledLabeledConf(logger), logger)
    return RcdStrategyConf(classif_conf, args.cluster_strategy,
                           args.num_annotations, 'uniform', logger)
示例#6
0
文件: aladin.py 项目: zzszmyf/SecuML
 def _get_lr_conf(self, validation_conf, logger, multiclass=False):
     factory = classifiers.get_factory()
     classifier_conf = factory.get_default('LogisticRegression', None, None,
                                           multiclass, logger)
     return ClassificationConf(classifier_conf,
                               UnlabeledLabeledConf(logger),
                               logger,
                               validation_conf=validation_conf)
示例#7
0
 def gen_main_model_parser(parser):
     group = parser.add_argument_group('Classification model parameters')
     models = classifiers.get_factory().get_methods(supervised=True)
     group.add_argument('--model-class',
                        choices=models,
                        default='LogisticRegression',
                        help='Model class trained at each iteration. '
                        'Default: LogisticRegression.')
     HyperparamConf.gen_parser(group, None, True, subgroup=False)
示例#8
0
 def from_json(obj, logger):
     if obj is None:
         return None
     classifier_conf = None
     clustering_conf = None
     if obj['classifier_conf'] is not None:
         factory = classifiers.get_factory()
         classifier_conf = factory.from_json(obj['classifier_conf'], logger)
     elif obj['clustering_conf'] is not None:
         factory = cluster_conf.get_factory()
         clustering_conf = factory.from_json(obj['clustering_conf'], logger)
     return AlertsConf(obj['detection_threshold'], classifier_conf,
                       clustering_conf, logger)
示例#9
0
 def from_args(args):
     secuml_conf = ExpConf.secuml_conf_from_args(args)
     classif_conf = ClassificationConf.from_args(args, secuml_conf.logger)
     model_class = classifiers.get_factory().get_class(args.model_class)
     classifier_type = get_classifier_type(model_class)
     if classifier_type in [
             ClassifierType.supervised, ClassifierType.semisupervised
     ]:
         annotations_conf = AnnotationsConf(args.annotations_file, None,
                                            secuml_conf.logger)
     else:
         annotations_conf = AnnotationsConf(None, None, secuml_conf.logger)
     already_trained = None
     if args.model_class == 'AlreadyTrained':
         already_trained = args.model_exp_id
     alerts_conf = AlertsConf.from_args(args, secuml_conf.logger)
     if (classifier_type == ClassifierType.unsupervised
             and alerts_conf.classifier_conf is not None):
         raise InvalidInputArguments('Supervised classification of the '
                                     'alerts is not supported for '
                                     'unsupervised model classes. ')
     if classif_conf.classifier_conf.multiclass:
         if alerts_conf.with_analysis():
             raise InvalidInputArguments('Alerts analysis is not supported '
                                         'for multiclass models. ')
         else:
             alerts_conf = None
     if (classif_conf.test_conf.method == 'dataset'
             and classif_conf.test_conf.streaming
             and alerts_conf.with_analysis()):
         raise InvalidInputArguments('Alerts analysis is not supported '
                                     'in streaming mode. ')
     dataset_conf = DatasetConf.from_args(args, secuml_conf.logger)
     features_conf = FeaturesConf.from_args(args, secuml_conf.logger)
     if (features_conf.sparse
             and not classif_conf.classifier_conf.accept_sparse):
         raise InvalidInputArguments('%s does not support sparse '
                                     'features. ' % args.model_class)
     return DiademConf(secuml_conf,
                       dataset_conf,
                       features_conf,
                       annotations_conf,
                       classif_conf,
                       alerts_conf,
                       name=args.exp_name,
                       already_trained=already_trained,
                       no_training_detection=args.no_training_detection)
示例#10
0
 def from_args(self, method, args, logger):
     validation_conf = None
     if args.validation_datasets is not None:
         validation_conf = ValidationDatasetsConf.from_args(args, logger)
     class_ = self.get_class(method)
     main_model_type = class_.main_model_type()
     main_model_conf = None
     if main_model_type is not None:
         factory = classifiers.get_factory()
         args.multiclass = main_model_type == 'multiclass'
         classifier_conf = factory.from_args(args.model_class, args, logger)
         test_conf = UnlabeledLabeledConf(logger)
         main_model_conf = ClassificationConf(
                                         classifier_conf, test_conf,
                                         logger,
                                         validation_conf=validation_conf)
     return class_.from_args(args, main_model_conf, validation_conf, logger)
示例#11
0
 def from_json(conf_json, secuml_conf):
     logger = secuml_conf.logger
     dataset_conf = DatasetConf.from_json(conf_json['dataset_conf'], logger)
     features_conf = FeaturesConf.from_json(conf_json['features_conf'],
                                            logger)
     annotations_conf = AnnotationsConf.from_json(
                                              conf_json['annotations_conf'],
                                              logger)
     factory = classifiers.get_factory()
     classifier_conf = factory.from_json(conf_json['core_conf'], logger)
     exp_conf = TestConf(secuml_conf, dataset_conf, features_conf,
                         annotations_conf, classifier_conf,
                         name=conf_json['name'], parent=conf_json['parent'],
                         fold_id=conf_json['fold_id'],
                         kind=conf_json['kind'])
     exp_conf.exp_id = conf_json['exp_id']
     return exp_conf
示例#12
0
 def from_args(args):
     secuml_conf = ExpConf.secuml_conf_from_args(args)
     classif_conf = ClassificationConf.from_args(args, secuml_conf.logger)
     model_class = classifiers.get_factory().get_class(args.model_class)
     classifier_type = get_classifier_type(model_class)
     if classifier_type in [ClassifierType.supervised,
                            ClassifierType.semisupervised]:
         annotations_conf = AnnotationsConf(args.annotations_file, None,
                                            secuml_conf.logger)
     else:
         annotations_conf = AnnotationsConf(None, None, secuml_conf.logger)
     already_trained = None
     if args.model_class == 'AlreadyTrained':
         already_trained = args.model_exp_id
     dataset_conf = DatasetConf.from_args(args, secuml_conf.logger)
     features_conf = FeaturesConf.from_args(args, secuml_conf.logger)
     return DiademConf(secuml_conf, dataset_conf, features_conf,
                       annotations_conf, classif_conf, name=args.exp_name,
                       already_trained=already_trained)
示例#13
0
 def gen_parser():
     parser = argparse.ArgumentParser(
         description='Train and evaluate a detection '
         'model. ')
     ExpConf.gen_parser(parser, sparse=True)
     parser.add_argument('--no-training-detection',
                         action='store_true',
                         default=False,
                         help='''When specified, the detection model is
                                 not applied to the training instances. ''')
     factory = classifiers.get_factory()
     models = factory.get_methods()
     models.remove('AlreadyTrained')
     subparsers = parser.add_subparsers(dest='model_class')
     subparsers.required = True
     for model in models:
         model_parser = subparsers.add_parser(model)
         factory.gen_parser(model, model_parser)
         classifier_type = get_classifier_type(factory.get_class(model))
         if classifier_type in [
                 ClassifierType.supervised, ClassifierType.semisupervised
         ]:
             default = None
             message = '''CSV file containing the annotations of some
                          instances, or GROUND_TRUTH to use the ground
                          truth annotations stored in idents.csv. '''
             if classifier_type == ClassifierType.supervised:
                 default = 'GROUND_TRUTH'
                 message = '%s Default: GROUND_TRUTH.' % message
             AnnotationsConf.gen_parser(model_parser,
                                        required=default is None,
                                        default=default,
                                        message=message)
         ClassificationConf.gen_parser(model_parser)
         AlertsConf.gen_parser(model_parser)
     # Add subparser for already trained model
     already_trained = subparsers.add_parser('AlreadyTrained')
     factory.gen_parser('AlreadyTrained', already_trained)
     ClassificationConf.gen_parser(already_trained)
     AlertsConf.gen_parser(already_trained)
     return parser
示例#14
0
 def from_args(args, logger):
     classifier_conf = None
     clustering_conf = None
     if args.alerts_classif is not None:
         multiclass = True
         num_folds = None
         if hasattr(args, 'num_folds'):
             num_folds = args.num_folds
         n_jobs = None
         if hasattr(args, 'n_jobs'):
             n_jobs = args.n_jobs
         factory = classifiers.get_factory()
         classifier_conf = factory.get_default(args.alerts_classif,
                                               num_folds, n_jobs,
                                               multiclass, logger)
     elif args.alerts_clustering is not None:
         factory = cluster_conf.get_factory()
         clustering = factory.get_class(args.alerts_clustering)
         clustering_conf = clustering(logger, args.num_alerts_clusters)
     return AlertsConf(args.detection_threshold, classifier_conf,
                       clustering_conf, logger)