Пример #1
0
 def gen_parser():
     parser = argparse.ArgumentParser(
         description='Learn a detection model. '
                     'The ground-truth must be stored in '
                     'annotations/ground_truth.csv.')
     ExpConf.gen_parser(parser)
     ClassificationConf.gen_parser(parser)
     factory = classifiers.get_factory()
     models = factory.get_methods()
     models.remove('AlreadyTrained')
     subparsers = parser.add_subparsers(dest='model_class')
     subparsers.required = True
     for model in models:
         model_parser = subparsers.add_parser(model)
         factory.gen_parser(model, model_parser)
         classifier_type = get_classifier_type(factory.get_class(model))
         if classifier_type in [ClassifierType.supervised,
                                ClassifierType.semisupervised]:
             AnnotationsConf.gen_parser(
                         model_parser, required=False,
                         message='CSV file containing the annotations of '
                                 'some or all the instances.')
     # Add subparser for already trained model
     already_trained = subparsers.add_parser('AlreadyTrained')
     factory.gen_parser('AlreadyTrained', already_trained)
     return parser
Пример #2
0
 def _create_detection_exp(self, kind, classifier_conf, fold_id=None):
     diadem_id = self.exp_conf.exp_id
     exp_name = 'DIADEM_%i_Detection_%s' % (diadem_id, kind)
     if fold_id is not None:
         exp_name = '%s_fold_%i' % (exp_name, fold_id)
     secuml_conf = self.exp_conf.secuml_conf
     logger = secuml_conf.logger
     if kind == 'validation':
         dataset_conf = DatasetConf(self.exp_conf.dataset_conf.project,
                                    self.validation_conf.test_dataset,
                                    self.exp_conf.secuml_conf.logger)
         annotations_conf = AnnotationsConf('ground_truth.csv', None,
                                            logger)
     elif kind == 'test' and self.test_conf.method == 'dataset':
         dataset_conf = DatasetConf(self.exp_conf.dataset_conf.project,
                                    self.test_conf.test_dataset,
                                    self.exp_conf.secuml_conf.logger)
         annotations_conf = AnnotationsConf('ground_truth.csv', None,
                                            logger)
     else:
         dataset_conf = self.exp_conf.dataset_conf
         annotations_conf = self.exp_conf.annotations_conf
     features_conf = self.exp_conf.features_conf
     test_exp_conf = DetectionConf(secuml_conf,
                                   dataset_conf,
                                   features_conf,
                                   annotations_conf,
                                   self._get_alerts_conf(fold_id),
                                   name=exp_name,
                                   parent=diadem_id,
                                   fold_id=fold_id,
                                   kind=kind)
     return DetectionExp(test_exp_conf, session=self.session)
Пример #3
0
 def gen_parser():
     parser = argparse.ArgumentParser(
         description='Clustering of the data for data exploration.')
     ExpConf.gen_parser(parser)
     AnnotationsConf.gen_parser(
         parser,
         message='''CSV file containing the annotations of some
                            instances, or GROUND_TRUTH to use the ground
                            truth annotations stored in idents.csv.
                            These annotations are used for semi-supervised
                            projections.''')
     parser.add_argument(
         '--label',
         choices=['all', 'malicious', 'benign'],
         default='all',
         help='''The clustering is built from all the instances in the
                      dataset, or only from the benign or malicious ones.
                      By default, the clustering is built from all the
                      instances. The malicious and benign instances are
                      selected according to the ground-truth stored in
                      idents.csv.''')
     subparsers = parser.add_subparsers(dest='algo')
     subparsers.required = True
     factory = clustering_conf.get_factory()
     for algo in factory.get_methods():
         algo_parser = subparsers.add_parser(algo)
         factory.gen_parser(algo, algo_parser)
     return parser
Пример #4
0
 def gen_parser():
     parser = argparse.ArgumentParser(description='Features Analysis')
     ExpConf.gen_parser(parser, filters=False)
     AnnotationsConf.gen_parser(
         parser,
         required=True,
         message='CSV file containing the annotations of some or all'
         ' the instances.')
     return parser
Пример #5
0
 def gen_parser():
     parser = argparse.ArgumentParser(
                              description='Rare Category Detection',
                              formatter_class=argparse.RawTextHelpFormatter)
     ExpConf.gen_parser(parser, filters=True, sparse=True)
     AnnotationsConf.gen_parser(
                 parser, default='init_annotations.csv', required=False,
                 message='CSV file containing the initial annotations '
                         'used to learn the first supervised detection '
                         'model.')
     strategies_conf.get_factory().gen_parser('Rcd', parser)
     return parser
Пример #6
0
 def gen_parser():
     parser = argparse.ArgumentParser(description='Features Analysis')
     ExpConf.gen_parser(parser, filters=False, sparse=True)
     AnnotationsConf.gen_parser(
         parser,
         required=False,
         message='CSV file containing the annotations of some or '
         'all the instances.')
     parser.add_argument('--multiclass',
                         default=False,
                         action='store_true',
                         help='The instances are grouped according to '
                         'their families instead of their binary '
                         'labels.')
     return parser
Пример #7
0
 def from_args(args):
     secuml_conf = ExpConf.common_from_args(args)
     already_trained = None
     core_conf = ClassificationConf.from_args(args, secuml_conf.logger)
     if args.model_class != 'AlreadyTrained':
         annotations_conf = AnnotationsConf(args.annotations_file, None,
                                            secuml_conf.logger)
     else:
         already_trained = args.model_exp_id
         annotations_conf = AnnotationsConf(None, None, secuml_conf.logger)
     dataset_conf = DatasetConf.from_args(args, secuml_conf.logger)
     features_conf = FeaturesConf.from_args(args, secuml_conf.logger)
     return DiademConf(secuml_conf, dataset_conf, features_conf,
                       annotations_conf, core_conf, name=args.exp_name,
                       already_trained=already_trained)
Пример #8
0
 def gen_parser():
     parser = argparse.ArgumentParser(
         description='Projection of the data for data visualization.')
     ExpConf.gen_parser(parser)
     AnnotationsConf.gen_parser(
         parser,
         message='CSV file containing the annotations of some'
         ' instances. These annotations are used for '
         'semi-supervised projections.')
     subparsers = parser.add_subparsers(dest='algo')
     subparsers.required = True
     for algo in projection_conf.get_factory().get_methods():
         algo_parser = subparsers.add_parser(algo)
         projection_conf.get_factory().gen_parser(algo, algo_parser)
     return parser
Пример #9
0
 def _create_test_exp(self, fold_id=None):
     diadem_id = self.exp_conf.exp_id
     exp_name = 'DIADEM_%i_Test' % diadem_id
     if fold_id is not None:
         exp_name = '%s_fold_%i' % (exp_name, fold_id)
     secuml_conf = self.exp_conf.secuml_conf
     logger = secuml_conf.logger
     if self.test_conf.method == 'dataset':
         dataset_conf = DatasetConf(self.exp_conf.dataset_conf.project,
                                    self.test_conf.test_dataset,
                                    self.exp_conf.secuml_conf.logger)
         annotations_conf = AnnotationsConf('ground_truth.csv', None,
                                            logger)
     else:
         dataset_conf = self.exp_conf.dataset_conf
         annotations_conf = self.exp_conf.annotations_conf
     features_conf = self.exp_conf.features_conf
     test_exp_conf = TestConf(secuml_conf,
                              dataset_conf,
                              features_conf,
                              annotations_conf,
                              self.exp_conf.core_conf.classifier_conf,
                              name=exp_name,
                              parent=diadem_id,
                              fold_id=fold_id,
                              kind='test')
     return TestExp(test_exp_conf,
                    alerts_conf=self._get_alerts_conf(fold_id),
                    session=self.session)
Пример #10
0
 def gen_parser():
     parser = argparse.ArgumentParser(
             description='Active Learning',
             formatter_class=argparse.RawTextHelpFormatter)
     ExpConf.gen_parser(parser, filters=True, sparse=True)
     AnnotationsConf.gen_parser(
                 parser, default=None, required=False,
                 message='CSV file containing the initial annotations '
                         'used to learn the first detection model.')
     subparsers = parser.add_subparsers(dest='strategy')
     subparsers.required = True
     strategies = strategies_conf.get_factory().get_methods()
     for strategy in strategies:
         strategy_parser = subparsers.add_parser(strategy)
         strategies_conf.get_factory().gen_parser(strategy, strategy_parser)
     return parser
Пример #11
0
 def from_args(args):
     secuml_conf = ExpConf.secuml_conf_from_args(args)
     classif_conf = ClassificationConf.from_args(args, secuml_conf.logger)
     model_class = classifiers.get_factory().get_class(args.model_class)
     classifier_type = get_classifier_type(model_class)
     if classifier_type in [
             ClassifierType.supervised, ClassifierType.semisupervised
     ]:
         annotations_conf = AnnotationsConf(args.annotations_file, None,
                                            secuml_conf.logger)
     else:
         annotations_conf = AnnotationsConf(None, None, secuml_conf.logger)
     already_trained = None
     if args.model_class == 'AlreadyTrained':
         already_trained = args.model_exp_id
     alerts_conf = AlertsConf.from_args(args, secuml_conf.logger)
     if (classifier_type == ClassifierType.unsupervised
             and alerts_conf.classifier_conf is not None):
         raise InvalidInputArguments('Supervised classification of the '
                                     'alerts is not supported for '
                                     'unsupervised model classes. ')
     if classif_conf.classifier_conf.multiclass:
         if alerts_conf.with_analysis():
             raise InvalidInputArguments('Alerts analysis is not supported '
                                         'for multiclass models. ')
         else:
             alerts_conf = None
     if (classif_conf.test_conf.method == 'dataset'
             and classif_conf.test_conf.streaming
             and alerts_conf.with_analysis()):
         raise InvalidInputArguments('Alerts analysis is not supported '
                                     'in streaming mode. ')
     dataset_conf = DatasetConf.from_args(args, secuml_conf.logger)
     features_conf = FeaturesConf.from_args(args, secuml_conf.logger)
     if (features_conf.sparse
             and not classif_conf.classifier_conf.accept_sparse):
         raise InvalidInputArguments('%s does not support sparse '
                                     'features. ' % args.model_class)
     return DiademConf(secuml_conf,
                       dataset_conf,
                       features_conf,
                       annotations_conf,
                       classif_conf,
                       alerts_conf,
                       name=args.exp_name,
                       already_trained=already_trained,
                       no_training_detection=args.no_training_detection)
Пример #12
0
 def gen_parser():
     parser = argparse.ArgumentParser(
         description='Projection of the data for data visualization.')
     ExpConf.gen_parser(parser)
     AnnotationsConf.gen_parser(
         parser,
         message='''CSV file containing the annotations of some
                         instances, or GROUND_TRUTH to use the ground
                         truth annotations stored in idents.csv.
                         These annotations are used for semi-supervised
                         projections and are displayed in the GUI.''')
     subparsers = parser.add_subparsers(dest='algo')
     subparsers.required = True
     for algo in projection_conf.get_factory().get_methods():
         algo_parser = subparsers.add_parser(algo)
         projection_conf.get_factory().gen_parser(algo, algo_parser)
     return parser
Пример #13
0
 def from_args(args):
     secuml_conf = ExpConf.secuml_conf_from_args(args)
     classif_conf = ClassificationConf.from_args(args, secuml_conf.logger)
     model_class = classifiers.get_factory().get_class(args.model_class)
     classifier_type = get_classifier_type(model_class)
     if classifier_type in [ClassifierType.supervised,
                            ClassifierType.semisupervised]:
         annotations_conf = AnnotationsConf(args.annotations_file, None,
                                            secuml_conf.logger)
     else:
         annotations_conf = AnnotationsConf(None, None, secuml_conf.logger)
     already_trained = None
     if args.model_class == 'AlreadyTrained':
         already_trained = args.model_exp_id
     dataset_conf = DatasetConf.from_args(args, secuml_conf.logger)
     features_conf = FeaturesConf.from_args(args, secuml_conf.logger)
     return DiademConf(secuml_conf, dataset_conf, features_conf,
                       annotations_conf, classif_conf, name=args.exp_name,
                       already_trained=already_trained)
Пример #14
0
 def from_args(args):
     secuml_conf = ExpConf.secuml_conf_from_args(args)
     logger = secuml_conf.logger
     dataset_conf = DatasetConf.from_args(args, logger)
     features_conf = FeaturesConf.from_args(args, logger)
     annotations_conf = AnnotationsConf(args.annotations_file, None, logger)
     core_conf = strategies_conf.get_factory().from_args('Rcd', args,
                                                         logger)
     return RcdConf(secuml_conf, dataset_conf, features_conf,
                    annotations_conf, core_conf, name=args.exp_name)
Пример #15
0
 def from_args(args):
     secuml_conf = ExpConf.common_from_args(args)
     dataset_conf = DatasetConf.from_args(args, secuml_conf.logger)
     features_conf = FeaturesConf.from_args(args, secuml_conf.logger)
     annotations_conf = AnnotationsConf(args.annotations_file, None,
                                        secuml_conf.logger)
     core_conf = projection_conf.get_factory().from_args(args.algo, args,
                                                         secuml_conf.logger)
     return ProjectionConf(secuml_conf, dataset_conf, features_conf,
                           annotations_conf, core_conf, name=args.exp_name)
Пример #16
0
 def gen_parser():
     parser = argparse.ArgumentParser(
         description='Train and evaluate a detection '
         'model. ')
     ExpConf.gen_parser(parser, sparse=True)
     parser.add_argument('--no-training-detection',
                         action='store_true',
                         default=False,
                         help='''When specified, the detection model is
                                 not applied to the training instances. ''')
     factory = classifiers.get_factory()
     models = factory.get_methods()
     models.remove('AlreadyTrained')
     subparsers = parser.add_subparsers(dest='model_class')
     subparsers.required = True
     for model in models:
         model_parser = subparsers.add_parser(model)
         factory.gen_parser(model, model_parser)
         classifier_type = get_classifier_type(factory.get_class(model))
         if classifier_type in [
                 ClassifierType.supervised, ClassifierType.semisupervised
         ]:
             default = None
             message = '''CSV file containing the annotations of some
                          instances, or GROUND_TRUTH to use the ground
                          truth annotations stored in idents.csv. '''
             if classifier_type == ClassifierType.supervised:
                 default = 'GROUND_TRUTH'
                 message = '%s Default: GROUND_TRUTH.' % message
             AnnotationsConf.gen_parser(model_parser,
                                        required=default is None,
                                        default=default,
                                        message=message)
         ClassificationConf.gen_parser(model_parser)
         AlertsConf.gen_parser(model_parser)
     # Add subparser for already trained model
     already_trained = subparsers.add_parser('AlreadyTrained')
     factory.gen_parser('AlreadyTrained', already_trained)
     ClassificationConf.gen_parser(already_trained)
     AlertsConf.gen_parser(already_trained)
     return parser
Пример #17
0
 def from_args(args):
     secuml_conf = ExpConf.common_from_args(args)
     dataset_conf = DatasetConf.from_args(args, secuml_conf.logger)
     features_conf = FeaturesConf.from_args(args, secuml_conf.logger)
     annotations_conf = AnnotationsConf(args.annotations_file, None,
                                        secuml_conf.logger)
     return FeaturesAnalysisConf(secuml_conf,
                                 dataset_conf,
                                 features_conf,
                                 annotations_conf,
                                 None,
                                 name=args.exp_name)
Пример #18
0
 def from_args(args):
     secuml_conf = ExpConf.secuml_conf_from_args(args)
     dataset_conf = DatasetConf.from_args(args, secuml_conf.logger)
     features_conf = FeaturesConf.from_args(args, secuml_conf.logger)
     annotations_conf = AnnotationsConf(args.annotations_file, None,
                                        secuml_conf.logger)
     core_conf = strategies_conf.get_factory().from_args(args.strategy,
                                                         args,
                                                         secuml_conf.logger)
     return ActiveLearningConf(secuml_conf, dataset_conf, features_conf,
                               annotations_conf, core_conf,
                               name=args.exp_name)
Пример #19
0
 def from_json(conf_json, secuml_conf):
     logger = secuml_conf.logger
     dataset_conf = DatasetConf.from_json(conf_json['dataset_conf'], logger)
     features_conf = FeaturesConf.from_json(conf_json['features_conf'],
                                            logger)
     annotations_conf = AnnotationsConf.from_json(
                                              conf_json['annotations_conf'],
                                              logger)
     factory = strategies_conf.get_factory()
     core_conf = factory.from_json(conf_json['core_conf'], logger)
     conf = RcdConf(secuml_conf, dataset_conf, features_conf,
                    annotations_conf, core_conf, name=conf_json['name'],
                    parent=conf_json['parent'])
     conf.exp_id = conf_json['exp_id']
     return conf
Пример #20
0
 def from_json(conf_json, secuml_conf):
     dataset_conf = DatasetConf.from_json(conf_json['dataset_conf'],
                                          secuml_conf.logger)
     features_conf = FeaturesConf.from_json(conf_json['features_conf'],
                                            secuml_conf.logger)
     annotations_conf = AnnotationsConf.from_json(
         conf_json['annotations_conf'], secuml_conf.logger)
     conf = FeaturesAnalysisConf(secuml_conf,
                                 dataset_conf,
                                 features_conf,
                                 annotations_conf,
                                 None,
                                 name=conf_json['name'],
                                 parent=conf_json['parent'])
     conf.exp_id = conf_json['exp_id']
     return conf
Пример #21
0
 def from_args(args):
     secuml_conf = ExpConf.secuml_conf_from_args(args)
     dataset_conf = DatasetConf.from_args(args, secuml_conf.logger)
     features_conf = FeaturesConf.from_args(args, secuml_conf.logger)
     annotations_conf = AnnotationsConf(args.annotations_file, None,
                                        secuml_conf.logger)
     core_conf = clustering_conf.get_factory().from_args(
         args.algo, args, secuml_conf.logger)
     conf = ClusteringConf(secuml_conf,
                           dataset_conf,
                           features_conf,
                           annotations_conf,
                           core_conf,
                           name=args.exp_name,
                           label=args.label)
     return conf
Пример #22
0
 def from_args(args):
     if args.annotations_file is None and args.multiclass:
         raise InvalidInputArguments('--annotations <file> is required. '
                                     'An annotation file must be specified '
                                     'to group the instances according to '
                                     'their families.')
     secuml_conf = ExpConf.secuml_conf_from_args(args)
     dataset_conf = DatasetConf.from_args(args, secuml_conf.logger)
     features_conf = FeaturesConf.from_args(args, secuml_conf.logger)
     annotations_conf = AnnotationsConf(args.annotations_file, None,
                                        secuml_conf.logger)
     return FeaturesAnalysisConf(secuml_conf,
                                 dataset_conf,
                                 features_conf,
                                 annotations_conf,
                                 args.multiclass,
                                 name=args.exp_name)
Пример #23
0
 def from_json(conf_json, secuml_conf):
     dataset_conf = DatasetConf.from_json(conf_json['dataset_conf'],
                                          secuml_conf.logger)
     features_conf = FeaturesConf.from_json(conf_json['features_conf'],
                                            secuml_conf.logger)
     annotations_conf = AnnotationsConf.from_json(
                                               conf_json['annotations_conf'],
                                               secuml_conf.logger)
     core_conf = ClassificationConf.from_json(conf_json['core_conf'],
                                              secuml_conf.logger)
     exp_conf = DiademConf(secuml_conf, dataset_conf, features_conf,
                           annotations_conf, core_conf,
                           name=conf_json['name'],
                           parent=conf_json['parent'],
                           already_trained=conf_json['already_trained'])
     exp_conf.exp_id = conf_json['exp_id']
     return exp_conf
Пример #24
0
 def from_json(conf_json, secuml_conf):
     dataset_conf = DatasetConf.from_json(conf_json['dataset_conf'],
                                          secuml_conf.logger)
     features_conf = FeaturesConf.from_json(conf_json['features_conf'],
                                            secuml_conf.logger)
     annotations_conf = AnnotationsConf.from_json(
                                               conf_json['annotations_conf'],
                                               secuml_conf.logger)
     core_conf = projection_conf.get_factory().from_json(
                                                 conf_json['core_conf'],
                                                 secuml_conf.logger)
     conf = ProjectionConf(secuml_conf, dataset_conf, features_conf,
                           annotations_conf, core_conf,
                           name=conf_json['name'],
                           parent=conf_json['parent'])
     conf.exp_id = conf_json['exp_id']
     return conf
Пример #25
0
 def from_json(conf_json, secuml_conf):
     logger = secuml_conf.logger
     dataset_conf = DatasetConf.from_json(conf_json['dataset_conf'], logger)
     features_conf = FeaturesConf.from_json(conf_json['features_conf'],
                                            logger)
     annotations_conf = AnnotationsConf.from_json(
                                              conf_json['annotations_conf'],
                                              logger)
     factory = classifiers.get_factory()
     classifier_conf = factory.from_json(conf_json['core_conf'], logger)
     exp_conf = TestConf(secuml_conf, dataset_conf, features_conf,
                         annotations_conf, classifier_conf,
                         name=conf_json['name'], parent=conf_json['parent'],
                         fold_id=conf_json['fold_id'],
                         kind=conf_json['kind'])
     exp_conf.exp_id = conf_json['exp_id']
     return exp_conf
Пример #26
0
 def from_json(conf_json, secuml_conf):
     logger = secuml_conf.logger
     dataset_conf = DatasetConf.from_json(conf_json['dataset_conf'], logger)
     features_conf = FeaturesConf.from_json(conf_json['features_conf'],
                                            logger)
     annotations_conf = AnnotationsConf.from_json(
                                              conf_json['annotations_conf'],
                                              logger)
     if conf_json['core_conf'] is not None:
         alerts_conf = AlertsConf.from_json(conf_json['core_conf'], logger)
     else:
         alerts_conf = None
     exp_conf = DetectionConf(secuml_conf, dataset_conf, features_conf,
                              annotations_conf, alerts_conf,
                              name=conf_json['name'],
                              parent=conf_json['parent'],
                              fold_id=conf_json['fold_id'],
                              kind=conf_json['kind'])
     exp_conf.exp_id = conf_json['exp_id']
     return exp_conf
Пример #27
0
 def from_json(conf_json, secuml_conf):
     dataset_conf = DatasetConf.from_json(conf_json['dataset_conf'],
                                          secuml_conf.logger)
     features_conf = FeaturesConf.from_json(conf_json['features_conf'],
                                            secuml_conf.logger)
     annotations_conf = AnnotationsConf.from_json(
         conf_json['annotations_conf'], secuml_conf.logger)
     core_conf = None
     if conf_json['core_conf'] is not None:
         core_conf = clustering_conf.get_factory().from_json(
             conf_json['core_conf'], secuml_conf.logger)
     exp_conf = ClusteringConf(secuml_conf,
                               dataset_conf,
                               features_conf,
                               annotations_conf,
                               core_conf,
                               name=conf_json['name'],
                               parent=conf_json['parent'],
                               label=conf_json['label'])
     exp_conf.exp_id = conf_json['exp_id']
     return exp_conf
Пример #28
0
 def _create_detection_conf(self, kind, classifier_conf, fold_id=None):
     diadem_id = self.exp_conf.exp_id
     exp_name = 'DIADEM_%i_Detection_%s' % (diadem_id, kind)
     if fold_id is not None:
         exp_name = '%s_fold_%i' % (exp_name, fold_id)
     secuml_conf = self.exp_conf.secuml_conf
     logger = secuml_conf.logger
     if (kind == 'validation'
             or (kind == 'test' and self.test_conf.method == 'datasets')):
         validation_conf = getattr(self, '%s_conf' % kind)
         annotations_conf = AnnotationsConf('ground_truth.csv', None,
                                            logger)
         features_conf = self.exp_conf.features_conf
         if validation_conf.streaming:
             stream_batch = validation_conf.stream_batch
             features_conf = features_conf.copy_streaming(stream_batch)
         dataset_confs = [
             DatasetConf(self.exp_conf.dataset_conf.project, test_dataset,
                         self.exp_conf.secuml_conf.logger)
             for test_dataset in validation_conf.validation_datasets
         ]
     else:
         dataset_confs = [self.exp_conf.dataset_conf]
         annotations_conf = self.exp_conf.annotations_conf
         features_conf = self.exp_conf.features_conf
     alerts_conf = None
     if fold_id is None and kind != 'train':
         alerts_conf = self.exp_conf.alerts_conf
     return [
         DetectionConf(secuml_conf,
                       dataset_conf,
                       features_conf,
                       annotations_conf,
                       alerts_conf,
                       name=exp_name,
                       parent=diadem_id,
                       fold_id=fold_id,
                       kind=kind) for dataset_conf in dataset_confs
     ]