def fromJson(obj, experiment): binary_model_conf = ClassifierConfFactory.getFactory().fromJson( obj['models_conf']['binary'], experiment) conf = UncertaintySamplingConfiguration(obj['auto'], obj['budget'], obj['batch'], binary_model_conf) return conf
def createMulticlassExperiment(self): exp = self.supervised_exp name = exp.experiment_name + '_alertsMulticlassClassifier' multiclass_exp = ClassificationExperiment( exp.project, exp.dataset, exp.db, exp.cursor, experiment_name=name, experiment_label=exp.experiment_label, parent=exp.experiment_id) multiclass_exp.setFeaturesFilenames(exp.features_filenames) params = {} params['num_folds'] = exp.classification_conf.num_folds params['sample_weight'] = False params['families_supervision'] = True params['optim_algo'] = 'sag' params['alerts_conf'] = None test_conf = TestConfiguration() test_conf.setUnlabeled(labels_annotations='annotations') params['test_conf'] = test_conf conf = ClassifierConfFactory.getFactory().fromParam( 'LogisticRegression', params) multiclass_exp.setClassifierConf(conf) multiclass_exp.createExperiment() multiclass_exp.export() return multiclass_exp
def fromJson(obj, experiment): binary_model_conf = ClassifierConfFactory.getFactory().fromJson( obj['models_conf']['binary'], experiment) conf = CesaBianchiConfiguration(obj['auto'], obj['budget'], obj['batch'], obj['b'], binary_model_conf) return conf
def fromJson(obj, session): experiment = ClassificationExperiment(obj['project'], obj['dataset'], session) Experiment.expParamFromJson(experiment, obj) classification_conf = ClassifierConfFactory.getFactory().fromJson( obj['classification_conf'], experiment) experiment.setClassifierConf(classification_conf) return experiment
def fromJson(obj, experiment): rare_category_detection_conf = RareCategoryDetectionStrategy.fromJson(obj['rare_category_detection_conf']) binary_model_conf = ClassifierConfFactory.getFactory().fromJson(obj['models_conf']['binary'], experiment) conf = IlabConfiguration(obj['auto'], obj['budget'], rare_category_detection_conf, obj['num_uncertain'], obj['eps'], binary_model_conf) return conf
def fromJson(obj, experiment): multiclass_model_conf = ClassifierConfFactory.getFactory().fromJson( obj['models_conf']['multiclass'], experiment) rare_category_detection_conf = RareCategoryDetectionStrategy.fromJson( obj['rare_category_detection_conf']) conf = RareCategoryDetectionConfiguration( obj['auto'], obj['budget'], rare_category_detection_conf, multiclass_model_conf) return conf
def fromJson(obj, experiment): validation_conf = None if obj['validation_conf'] is not None: validation_conf = TestConfiguration.fromJson(obj['validation_conf'], experiment) binary_model_conf = ClassifierConfFactory.getFactory().fromJson( obj['models_conf']['binary'], experiment) conf = RandomSamplingConfiguration(obj['auto'], obj['budget'], obj['batch'], binary_model_conf, validation_conf) return conf
def gornitzBinaryModelConf(): classifier_args = {} classifier_args['num_folds'] = 4 classifier_args['sample_weight'] = False classifier_args['families_supervision'] = False classifier_args['alerts_conf'] = None binary_model_conf = ClassifierConfFactory.getFactory().fromParam( 'Sssvdd', classifier_args) binary_model_conf.setUnlabeled(labels_annotations='annotations') return binary_model_conf
def fromJson(obj, experiment): validation_conf = None if obj['validation_conf'] is not None: validation_conf = TestConfiguration.fromJson( obj['validation_conf'], experiment) binary_model_conf = ClassifierConfFactory.getFactory().fromJson( obj['models_conf']['binary'], experiment) conf = AladinConfiguration(obj['auto'], obj['budget'], obj['num_annotations'], binary_model_conf, validation_conf) return conf
def generateParser(): parser = argparse.ArgumentParser( description = 'Learn a detection model. ' + 'The labels must be stored in labels/true_labels.csv.') Experiment.projectDatasetFeturesParser(parser) models = ['LogisticRegression', 'Svc', 'GaussianNaiveBayes', 'DecisionTree', 'RandomForest', 'GradientBoosting'] subparsers = parser.add_subparsers(dest = 'model') factory = ClassifierConfFactory.getFactory() for model in models: model_parser = subparsers.add_parser(model) factory.generateParser(model, model_parser) return parser
def setExperimentFromArgs(self, args): self.setFeaturesFilenames(args.features_files) factory = ClassifierConfFactory.getFactory() conf = factory.fromArgs(args.model, args, self) self.setClassifierConf(conf) try: self.initLabels(args.labels) except Exception as e: message = 'The ground truth labels must be provided in true_labels.csv ' message += 'to run SecuML_classification.' print message raise e self.export()
def generateParamsFromArgs(args, experiment): supervised_args = {} supervised_args['num_folds'] = 4 supervised_args['sample_weight'] = False supervised_args['families_supervision'] = False test_conf = TestConfiguration() test_conf.setUnlabeled(labels_annotations='annotations') supervised_args['test_conf'] = test_conf binary_model_conf = ClassifierConfFactory.getFactory().fromParam( 'LogisticRegression', supervised_args) params = ActiveLearningConfiguration.generateParamsFromArgs( args, experiment, binary_model_conf=binary_model_conf) params['num_annotations'] = args.num_annotations return params
def aladinMulticlassModelConf(): classifier_args = {} classifier_args['num_folds'] = 4 classifier_args['sample_weight'] = False classifier_args['families_supervision'] = True classifier_args['alerts_conf'] = None classifier_args['optim_algo'] = 'liblinear' test_conf = TestConfiguration() test_conf.setUnlabeled(labels_annotations='annotations') classifier_args['test_conf'] = test_conf factory = ClassifierConfFactory.getFactory() multiclass_model_conf = factory.fromParam('LogisticRegression', classifier_args) return multiclass_model_conf
def generateParamsFromArgs(args): params = ActiveLearningConfiguration.generateParamsFromArgs(args) multiclass_classifier_args = {} multiclass_classifier_args['num_folds'] = args.num_folds multiclass_classifier_args['sample_weight'] = False multiclass_classifier_args['families_supervision'] = True test_conf = TestConfiguration() test_conf.setUnlabeled(labels_annotations = 'annotations') multiclass_classifier_args['test_conf'] = test_conf multiclass_conf = ClassifierConfFactory.getFactory().fromParam( 'LogisticRegression', multiclass_classifier_args) rare_category_detection_conf = RareCategoryDetectionStrategy(multiclass_conf, args.cluster_strategy, args.num_annotations, 'uniform') params['rare_category_detection_conf'] = rare_category_detection_conf params['num_uncertain'] = args.num_uncertain params['eps'] = 0.49 return params
def generateParamsFromArgs(args): supervised_args = {} supervised_args['num_folds'] = args.num_folds supervised_args['sample_weight'] = args.sample_weight supervised_args['families_supervision'] = False test_conf = TestConfiguration() test_conf.setUnlabeled(labels_annotations='annotations') supervised_args['test_conf'] = test_conf binary_model_conf = ClassifierConfFactory.getFactory().fromParam( args.model_class, supervised_args) active_learning_params = {} active_learning_params['auto'] = args.auto active_learning_params['budget'] = args.budget active_learning_params['binary_model_conf'] = binary_model_conf return active_learning_params
def generateParamsFromArgs(args, experiment): params = ActiveLearningConfiguration.generateParamsFromArgs( args, experiment) multiclass_classifier_args = {} multiclass_classifier_args['num_folds'] = args.num_folds multiclass_classifier_args['sample_weight'] = False multiclass_classifier_args['families_supervision'] = True multiclass_classifier_args['alerts_conf'] = None test_conf = TestConfiguration() test_conf.setUnlabeled(labels_annotations='annotations') multiclass_classifier_args['test_conf'] = test_conf multiclass_conf = ClassifierConfFactory.getFactory().fromParam( args.model_class, multiclass_classifier_args) rare_category_detection_conf = RareCategoryDetectionStrategy( multiclass_conf, args.cluster_strategy, args.num_annotations, 'uniform') params['rare_category_detection_conf'] = rare_category_detection_conf params['num_annotations'] = args.num_annotations params['multiclass_model_conf'] = multiclass_conf return params
def generateParamsFromArgs(args, experiment, binary_model_conf=None): if binary_model_conf is None: supervised_args = {} supervised_args['num_folds'] = args.num_folds supervised_args['sample_weight'] = args.sample_weight supervised_args['families_supervision'] = False test_conf = TestConfiguration() test_conf.setUnlabeled(labels_annotations='annotations') supervised_args['test_conf'] = test_conf binary_model_conf = ClassifierConfFactory.getFactory().fromParam( args.model_class, supervised_args) active_learning_params = {} active_learning_params['auto'] = args.auto active_learning_params['budget'] = args.budget active_learning_params['binary_model_conf'] = binary_model_conf validation_conf = None if args.validation_dataset is not None: validation_conf = TestConfiguration() validation_conf.setTestDataset(args.validation_dataset, experiment) active_learning_params['validation_conf'] = validation_conf return active_learning_params
def fromJson(obj): classification_conf = ClassifierConfFactory.getFactory().fromJson(obj['classification_conf'], None) conf = RareCategoryDetectionStrategy(classification_conf, obj['cluster_strategy'], obj['num_annotations'], obj['cluster_weights']) return conf