def getDiademDetectionChildExp(diadem_exp_id, child_type, fold_id, dataset): def _get_parent_id(diadem_exp_id, child_type): if dataset is None or dataset == 'all': return diadem_exp_id query = session.query(DiademExpAlchemy) query = query.join(DiademExpAlchemy.exp) query = query.join(ExpAlchemy.parents) query = query.filter(ExpAlchemy.kind == 'Detection') query = query.filter( ExpRelationshipsAlchemy.parent_id == diadem_exp_id) query = query.filter(DiademExpAlchemy.type == child_type) query = query.filter(DiademExpAlchemy.fold_id == fold_id) return query.one().exp_id fold_id = None if fold_id == 'None' else int(fold_id) dataset = None if dataset == 'None' else dataset if child_type != 'cv': parent_id = _get_parent_id(diadem_exp_id, child_type) query = session.query(DiademExpAlchemy) query = query.join(DiademExpAlchemy.exp) query = query.join(ExpAlchemy.parents) query = query.filter(ExpAlchemy.kind == 'Detection') query = query.filter(ExpRelationshipsAlchemy.parent_id == parent_id) if dataset is not None and dataset != 'all': query = query.join(DiademExpAlchemy.dataset) query = query.filter(DatasetsAlchemy.dataset == dataset) else: query = session.query(DiademExpAlchemy) query = query.filter(DiademExpAlchemy.exp_id == diadem_exp_id) query = query.filter(DiademExpAlchemy.type == child_type) query = query.filter(DiademExpAlchemy.fold_id == fold_id) return jsonify(db_row_to_json(query.one()))
def getAlerts(exp_id, analysis_type): exp = update_curr_exp(exp_id) # With proba ? With scores ? query = session.query(DiademExpAlchemy) query = query.filter(DiademExpAlchemy.exp_id == exp_id) diadem_exp = query.one() with_proba, with_scores = diadem_exp.proba, diadem_exp.with_scoring # Get alerts query = session.query(PredictionsAlchemy) query = query.filter(PredictionsAlchemy.exp_id == exp_id) if with_proba: threshold = exp.exp_conf.core_conf.detection_threshold query = query.filter(PredictionsAlchemy.proba >= threshold) else: query = query.filter(PredictionsAlchemy.value == MALICIOUS) if analysis_type == 'topN' and (with_proba or with_scores): if with_proba: query = query.order_by(PredictionsAlchemy.proba.desc()) else: query = query.order_by(PredictionsAlchemy.score.desc()) elif analysis_type == 'random': query = call_specific_db_func(secuml_conf.db_type, 'random_order', (query, )) query = query.limit(NUM_MAX_DISPLAY) return jsonify(_predictions_results(query))
def getAlerts(exp_id, analysis_type): exp = update_curr_exp(exp_id) # With proba ? query = session.query(DiademExpAlchemy) query = query.filter(DiademExpAlchemy.exp_id == exp_id) with_proba = query.one().proba threshold = None if with_proba: threshold = exp.exp_conf.core_conf.detection_threshold # Get alerts query = session.query(PredictionsAlchemy) query = query.filter(PredictionsAlchemy.exp_id == exp_id) if with_proba: query = query.filter(PredictionsAlchemy.proba >= threshold) if analysis_type == 'topN' and with_proba: query = query.order_by(PredictionsAlchemy.proba.desc()) elif analysis_type == 'random': query = call_specific_db_func(secuml_conf.db_type, 'random_order', (query,)) query = query.limit(TOP_N_ALERTS) predictions = query.all() if predictions: ids, probas = zip(*[(r.instance_id, r.proba) for r in predictions]) else: ids = [] probas = [] return jsonify({'instances': ids, 'proba': probas})
def get_train_exp(exp_id): query = session.query(DiademExpAlchemy) query = query.filter(DiademExpAlchemy.exp_id == exp_id) row = query.one() if row.type == 'train': return exp_id elif row.type == 'test': query = session.query(DiademExpAlchemy) query = query.filter(DiademExpAlchemy.diadem_id == row.diadem_id) query = query.filter(DiademExpAlchemy.fold_id == row.fold_id) query = query.filter(DiademExpAlchemy.type == 'train') return query.one().exp_id else: assert (False)
def get_feature_user_ids(session, features): user_ids = [None for _ in range(len(features))] for i, feature_id in enumerate(features): query = session.query(FeaturesAlchemy) query = query.filter(FeaturesAlchemy.id == feature_id) user_ids[i] = query.one().user_id return user_ids
def getDatasets(project): project_id = get_project_id(session, project) if project_id is None: return [] query = session.query(DatasetsAlchemy) query = query.filter(DatasetsAlchemy.project_id == project_id) return jsonify({'datasets': [r.dataset for r in query.all()]})
def get_train_exp(exp_id): query = session.query(DiademExpAlchemy) query = query.filter(DiademExpAlchemy.exp_id == exp_id) row = query.one() # get diadem_exp query = session.query(ExpRelationshipsAlchemy) query = query.filter(ExpRelationshipsAlchemy.child_id == exp_id) diadem_exp_id = query.one().parent_id # get train_exp query = session.query(DiademExpAlchemy) query = query.join(DiademExpAlchemy.exp) query = query.join(ExpAlchemy.parents) query = query.filter(ExpRelationshipsAlchemy.parent_id == diadem_exp_id) query = query.filter(ExpAlchemy.kind == 'Train') query = query.filter(DiademExpAlchemy.fold_id == row.fold_id) return query.one().exp_id
def getAnnotationsTypes(exp_id, iteration): query = session.query(IlabExpAlchemy) query = query.filter(IlabExpAlchemy.id == exp_id) query = query.filter(IlabExpAlchemy.iter == iteration) res = query.one() return jsonify( {k: getattr(res, k) for k in ['uncertain', 'malicious', 'benign']})
def getIdent(exp_id, instance_id): query = session.query(InstancesAlchemy) query = query.join(InstancesAlchemy.dataset) query = query.join(DatasetsAlchemy.features) query = query.join(FeaturesSetsAlchemy.experiments) query = query.filter(ExpAlchemy.id == exp_id) query = query.filter(InstancesAlchemy.id == instance_id) res = query.one() return jsonify({'ident': res.ident, 'user_id': res.user_instance_id})
def getAlertsClusteringExpId(test_exp_id): query = session.query(ExpRelationshipsAlchemy) query = query.join(ExpRelationshipsAlchemy.child) query = query.join(ExpAlchemy.diadem_exp) query = query.filter(ExpRelationshipsAlchemy.parent_id == test_exp_id) query = query.filter(DiademExpAlchemy.type == 'alerts') try: return str(query.one().child_id) except NoResultFound: return 'None'
def _get_parent_id(diadem_exp_id, child_type): if dataset is None or dataset == 'all': return diadem_exp_id query = session.query(DiademExpAlchemy) query = query.join(DiademExpAlchemy.exp) query = query.join(ExpAlchemy.parents) query = query.filter(ExpAlchemy.kind == 'Detection') query = query.filter( ExpRelationshipsAlchemy.parent_id == diadem_exp_id) query = query.filter(DiademExpAlchemy.type == child_type) query = query.filter(DiademExpAlchemy.fold_id == fold_id) return query.one().exp_id
def get_train_exp(exp_id): query = session.query(DiademExpAlchemy) query = query.filter(DiademExpAlchemy.exp_id == exp_id) row = query.one() if row.type == 'train': return exp_id elif row.type == 'test': # get diadem_exp query = session.query(ExpRelationshipsAlchemy) query = query.filter(ExpRelationshipsAlchemy.child_id == exp_id) diadem_exp_id = query.one().parent_id # get train_exp query = session.query(DiademExpAlchemy) query = query.join(DiademExpAlchemy.exp) query = query.join(ExpAlchemy.parents) query = query.filter( ExpRelationshipsAlchemy.parent_id == diadem_exp_id) query = query.filter(DiademExpAlchemy.fold_id == row.fold_id) query = query.filter(DiademExpAlchemy.type == 'train') return query.one().exp_id else: assert (False)
def getDiademChildExp(diadem_exp_id, child_type, fold_id): fold_id = None if fold_id == 'None' else int(fold_id) query = session.query(DiademExpAlchemy) if child_type != 'cv': query = query.join(DiademExpAlchemy.exp) query = query.join(ExpAlchemy.parents) query = query.filter( ExpRelationshipsAlchemy.parent_id == diadem_exp_id) else: query = query.filter(DiademExpAlchemy.exp_id == diadem_exp_id) query = query.filter(DiademExpAlchemy.type == child_type) query = query.filter(DiademExpAlchemy.fold_id == fold_id) return jsonify(db_row_to_json(query.one()))
def getFeaturesInfo(exp_id): exp = update_curr_exp(exp_id) features_set_id = exp.exp_conf.features_conf.set_id query = session.query(FeaturesAlchemy) query = query.filter(FeaturesAlchemy.set_id == features_set_id) return jsonify({ res.id: { 'type': res.type, 'user_id': res.user_id, 'name': res.name, 'description': res.description } for res in query.all() })
def getPredictionsScores(exp_id, range_, label): score_min, score_max = [float(x) for x in range_.split(' - ')] query = session.query(PredictionsAlchemy) query = query.filter(PredictionsAlchemy.exp_id == exp_id) query = query.filter(PredictionsAlchemy.score >= score_min) query = query.filter(PredictionsAlchemy.score <= score_max) query = query.order_by(PredictionsAlchemy.score.asc()) if label != 'all': query = query.join(PredictionsAlchemy.instance) query = query.join(InstancesAlchemy.ground_truth) query = query.filter(GroundTruthAlchemy.label == label) query = call_specific_db_func(secuml_conf.db_type, 'random_order', (query, )) query = query.limit(NUM_MAX_DISPLAY) return jsonify(_predictions_results(query))
def _get_errors(exp_id, fn_fp): if fn_fp == 'FN': predicted_value = BENIGN ground_truth = label_str_to_bool(MALICIOUS) else: predicted_value = MALICIOUS ground_truth = label_str_to_bool(BENIGN) query = session.query(PredictionsAlchemy) query = query.filter(PredictionsAlchemy.exp_id == exp_id) query = query.filter(PredictionsAlchemy.value == predicted_value) query = query.join(PredictionsAlchemy.instance) query = query.filter(InstancesAlchemy.label == ground_truth) query = call_specific_db_func(secuml_conf.db_type, 'random_order', (query, )) query = query.limit(NUM_MAX_DISPLAY) return _predictions_results(query)
def getPredictionsProbas(exp_id, index, label): index = int(index) proba_min = index * 0.1 proba_max = (index + 1) * 0.1 query = session.query(PredictionsAlchemy) query = query.filter(PredictionsAlchemy.exp_id == exp_id) query = query.filter(PredictionsAlchemy.proba >= proba_min) query = query.filter(PredictionsAlchemy.proba <= proba_max) query = query.order_by(PredictionsAlchemy.proba.asc()) if label != 'all': query = query.join(PredictionsAlchemy.instance) query = query.join(InstancesAlchemy.ground_truth) query = query.filter(GroundTruthAlchemy.label == label) query = call_specific_db_func(secuml_conf.db_type, 'random_order', (query, )) query = query.limit(NUM_MAX_DISPLAY) return jsonify(_predictions_results(query))
def _get_errors(exp_id, fn_fp): if fn_fp == 'FN': predicted_value = 'benign' ground_truth = 'malicious' else: predicted_value = 'malicious' ground_truth = 'benign' query = session.query(PredictionsAlchemy) query = query.filter(PredictionsAlchemy.exp_id == exp_id) query = query.filter(PredictionsAlchemy.value == predicted_value) query = query.join(PredictionsAlchemy.instance) query = query.join(InstancesAlchemy.ground_truth) query = query.filter(GroundTruthAlchemy.label == ground_truth) query = call_specific_db_func(secuml_conf.db_type, 'random_order', (query, )) query = query.limit(NUM_MAX_DISPLAY) return _predictions_results(query)
def getAllExperiments(project, dataset): query = session.query(ExpAlchemy) query = query.join(ExpAlchemy.features_set) query = query.join(FeaturesSetsAlchemy.dataset) query = query.outerjoin(ExpAlchemy.parents) query = query.filter(DatasetsAlchemy.project == project) query = query.filter(DatasetsAlchemy.dataset == dataset) query = query.filter(ExpRelationshipsAlchemy.parent_id == null()) experiments = {} for exp in query.all(): if exp.kind not in experiments: experiments[exp.kind] = [] experiments[exp.kind].append({'name': exp.name, 'id': exp.id}) for k, v in experiments.items(): t = [(x['id'], x['name']) for x in v] t.sort(key=operator.itemgetter(0), reverse=True) experiments[k] = t return jsonify(experiments)
def getFeaturesInfo(exp_id): exp = update_curr_exp(exp_id) features_types_f = path.join(exp.output_dir(), 'features_types.json') with open(features_types_f, 'r') as f: features_types = json.load(f) types_descriptions = {} for feature_id, feature_type in features_types.items(): query = session.query(FeaturesAlchemy) query = query.filter(FeaturesAlchemy.id == feature_id) res = query.one() user_id, name, description = res.user_id, res.name, res.description types_descriptions[feature_id] = { 'type': feature_type, 'user_id': user_id, 'name': name, 'description': description } return jsonify(types_descriptions)
def getPredictions(exp_id, index, label): index = int(index) proba_min = index * 0.1 proba_max = (index + 1) * 0.1 query = session.query(PredictionsAlchemy) query = query.filter(PredictionsAlchemy.exp_id == exp_id) query = query.filter(PredictionsAlchemy.proba >= proba_min) query = query.filter(PredictionsAlchemy.proba <= proba_max) if label != 'all': query = query.join(PredictionsAlchemy.instance) query = query.join(InstancesAlchemy.ground_truth) query = query.filter(GroundTruthAlchemy.label == label) predictions = query.all() if predictions: ids, probas = zip(*[(r.instance_id, r.proba) for r in predictions]) else: ids = [] probas = [] return jsonify({'instances': ids, 'proba': probas})
def getDiademChildExp(diadem_exp_id, child_type, fold_id): fold_id = None if fold_id == 'None' else int(fold_id) query = session.query(DiademExpAlchemy) if child_type != 'cv': query = query.join(DiademExpAlchemy.exp) query = query.join(ExpAlchemy.parents) query = query.filter( ExpRelationshipsAlchemy.parent_id == diadem_exp_id) else: query = query.filter(DiademExpAlchemy.exp_id == diadem_exp_id) query = query.filter(DiademExpAlchemy.type == child_type) query = query.filter(DiademExpAlchemy.fold_id == fold_id) row = query.one() return jsonify({ 'exp_id': row.exp_id, 'perf_monitoring': row.perf_monitoring, 'model_interpretation': row.model_interpretation, 'predictions_interpretation': row.predictions_interpretation, 'alerts': row.alerts })
def getPredictions(exp_id, predicted_value, right_wrong, multiclass): multiclass = multiclass == 'true' query = session.query(PredictionsAlchemy) query = query.filter(PredictionsAlchemy.exp_id == exp_id) query = query.filter(PredictionsAlchemy.value == predicted_value) if right_wrong != 'all': query = query.join(PredictionsAlchemy.instance) query = query.join(InstancesAlchemy.ground_truth) field = 'family' if multiclass else 'label' if right_wrong == 'right': query = query.filter( getattr(GroundTruthAlchemy, field) == predicted_value) elif right_wrong == 'wrong': query = query.filter( getattr(GroundTruthAlchemy, field) != predicted_value) else: assert (False) query = call_specific_db_func(secuml_conf.db_type, 'random_order', (query, )) query = query.limit(NUM_MAX_DISPLAY) return jsonify(_predictions_results(query))
def hasGroundTruth(project, dataset): query = session.query(DatasetsAlchemy) query = query.filter(DatasetsAlchemy.project == project) query = query.filter(DatasetsAlchemy.dataset == dataset) return str(query.one().ground_truth_hash is not None)
def getDatasets(project): query = session.query(DatasetsAlchemy) query = query.filter(DatasetsAlchemy.project == project) return jsonify({'datasets': [r.dataset for r in query.all()]})
def getProjects(): query = session.query(DatasetsAlchemy.project).distinct() return jsonify({'projects': [r.project for r in query.all()]})
def getRcdClusteringId(exp_id, iteration): query = session.query(RcdClusteringExpAlchemy) query = query.filter(RcdClusteringExpAlchemy.id == exp_id) query = query.filter(RcdClusteringExpAlchemy.iter == iteration) return jsonify({'clustering_exp_id': query.one().clustering_exp})
def getDiademExp(exp_id): query = session.query(DiademExpAlchemy) query = query.filter(DiademExpAlchemy.exp_id == exp_id) return jsonify(db_row_to_json(query.one()))
def predictionsInterpretation(exp_id): query = session.query(DiademExpAlchemy) query = query.filter(DiademExpAlchemy.exp_id == exp_id) # first() and not one() # because a train experiment can be shared by several DIADEM experiments. return str(query.first().predictions_interpretation)
def get_user_instance_ids(instance_ids): query = session.query(InstancesAlchemy) query = query.filter(InstancesAlchemy.id.in_(instance_ids)) query = query.order_by(InstancesAlchemy.id) return [r.user_instance_id for r in query.all()]