def mergeFamilies(exp_id, annotations_id, label, families, new_family): families = families.split(',') annotations_db_tools.merge_families(session, annotations_id, label, families, new_family) session.commit() if user_exp: exp = update_curr_exp(exp_id) filename = path.join(exp.output_dir(), 'user_actions.log') file_exists = path.isfile(filename) mode = 'a' if file_exists else 'w' to_print = ','.join(map(str, [datetime.datetime.now(), 'merge_families', new_family] + families)) with open(filename, mode) as f: f.write(to_print) return ''
def changeFamilyLabel(exp_id, annotations_id, label, family): annotations_db_tools.change_family_label(session, annotations_id, label, family) session.commit() if user_exp: exp = update_curr_exp(exp_id) filename = path.join(exp.output_dir(), 'user_actions.log') file_exists = path.isfile(filename) mode = 'a' if file_exists else 'w' to_print = ','.join( map(str, [ datetime.datetime.now(), 'change_family_label', family, label ])) with open(filename, mode) as f: f.write(to_print) return ''
def getInstance(exp_id, view_id, instance_id): try: if view_id == 'None': view_id = None experiment = update_curr_exp(exp_id) dataset_id = experiment.exp_conf.dataset_conf.dataset_id ident, user_id = idents_tools.get_ident(session, dataset_id, instance_id) project = experiment.exp_conf.dataset_conf.project module = importlib.import_module('secuml.web.views.projects.%s' % project) return module.get_instance(experiment, view_id, user_id, ident) except ImportError as e: app.logger.error(str(e)) app.logger.error('Please create the project file "%s.py" in ' 'secuml/web/views/projects/' % project) return 'Unable to display the instance', ident
def activeLearningMonitoring(exp_id, iteration, kind, sub_kind): experiment = update_curr_exp(exp_id) directory = path.join(experiment.output_dir(), str(iteration)) if kind == 'labels': filename = path.join(directory, 'labels_monitoring', 'iteration' + '_' + sub_kind + '.png') if kind == 'families': filename = path.join(directory, 'labels_monitoring', 'families_monitoring.png') if kind == 'clustering': filename = path.join(directory, 'clustering_evaluation', sub_kind + '_monitoring.png') if kind == 'time': filename = path.join(directory, 'execution_times.png') try: return send_file(filename, mimetype='image/png') except FileNotFoundError: return 'FileNotFoundError'
def getFeaturesInfo(exp_id): exp = update_curr_exp(exp_id) features_types_f = path.join(exp.output_dir(), 'features_types.json') with open(features_types_f, 'r') as f: features_types = json.load(f) types_descriptions = {} for feature_id, feature_type in features_types.items(): query = session.query(FeaturesAlchemy) query = query.filter(FeaturesAlchemy.id == feature_id) res = query.one() user_id, name, description = res.user_id, res.name, res.description types_descriptions[feature_id] = { 'type': feature_type, 'user_id': user_id, 'name': name, 'description': description } return jsonify(types_descriptions)
def updateAnnotation(exp_id, annotations_id, iter_num, instance_id, label, family, method): iter_num = None if iter_num == 'None' else int(iter_num) annotations_db_tools.update_annotation(session, annotations_id, instance_id, label, family, iter_num, method) session.commit() if user_exp: exp = update_curr_exp(exp_id) filename = path.join(exp.output_dir(), 'user_actions.log') file_exists = path.isfile(filename) mode = 'a' if file_exists else 'w' to_print = ','.join(map(str, [datetime.datetime.now(), 'update_annotation', iter_num, instance_id, label, family, method])) with open(filename, mode) as f: f.write(to_print) return ''
def getHexBin(exp_id, x, y): experiment = update_curr_exp(exp_id) directory = experiment.output_dir() filename = '_'.join(['c', x, y, 'hexbin.json']) with open(path.join(directory, filename), 'r') as f: hex_bins = json.load(f) for hex_bin in hex_bins[1:]: if hex_bin['num_malicious_instances'] > NUM_MAX_INSTANCES: hex_bin['malicious_instances'] = random.sample( hex_bin['malicious_instances'], NUM_MAX_INSTANCES) if hex_bin['num_ok_instances'] > NUM_MAX_INSTANCES: hex_bin['ok_instances'] = random.sample( hex_bin['ok_instances'], NUM_MAX_INSTANCES) for kind in ['malicious', 'ok']: ids = hex_bin['%s_instances' % kind] ids.sort() hex_bin['%s_user_ids' % kind] = get_user_instance_ids(ids) hex_bin['%s_instances' % kind] = ids return jsonify(hex_bins)
def getPredictions(exp_id, index, label): exp = update_curr_exp(exp_id) filename = path.join(exp.output_dir(), 'predictions.csv') index = int(index) min_value = index * 0.1 max_value = (index + 1) * 0.1 with open(filename, 'r') as f: data = pd.read_csv(f, header=0, index_col=0) selection = data.loc[:, 'predicted_proba'] >= min_value data = data.loc[selection, :] selection = data.loc[:, 'predicted_proba'] <= max_value data = data.loc[selection, :] if label != 'all': if label == 'malicious': selection = data.loc[:, 'ground_truth'] == True elif label == 'benign': selection = data.loc[:, 'ground_truth'] == False data = data.loc[selection, :] selected_instances = [int(x) for x in list(data.index.values)] proba = list(data['predicted_proba']) return jsonify({'instances': selected_instances, 'proba': proba})
def getSortedFeatures(exp_id, criterion): exp = update_curr_exp(exp_id) scoring_filename = path.join(exp.output_dir(), 'scores.csv') scores = pd.read_csv(scoring_filename, header=0, index_col=0) pvalues = None if criterion == 'alphabet': features = scores.index.values.tolist() features.sort() values = None user_ids = get_feature_user_ids(session, features) return jsonify({ 'features': features, 'values': None, 'pvalues': None, 'user_ids': user_ids }) if criterion == 'null_variance': selection = scores.loc[:, 'variance'] == 0 scores = scores.loc[selection, :] criterion = 'variance' else: sort_data_frame(scores, criterion, False, True) features = scores.index.values.tolist() values = scores[criterion].tolist() values = ['%.2f' % v for v in values] pvalues_col = '_'.join([criterion, 'pvalues']) if pvalues_col in scores.columns: pvalues = scores[pvalues_col].tolist() pvalues = ['%.2E' % Decimal(v) for v in pvalues] user_ids = get_feature_user_ids(session, features) return jsonify({ 'features': features, 'values': values, 'pvalues': pvalues, 'user_ids': user_ids })
def getTopWeightedFeatures(exp_id, instance_id, size): instance_id = int(instance_id) classifier = get_classifier(exp_id) # get the features exp = update_curr_exp(exp_id) f_names, f_values = FeaturesFromExp.get_instance(exp, instance_id) # scale the features scaled_values = classifier.named_steps['scaler'].transform(np.reshape( f_values, (1, -1))) weighted_values = np.multiply(scaled_values, classifier.named_steps['model'].coef_) features = list(map(lambda name, value, w_value: (name, value, w_value), f_names, f_values, weighted_values[0])) features.sort(key=lambda tup: abs(tup[2])) features = features[:-int(size) - 1:-1] f_names, f_values, f_weighted = list(zip(*features)) labels = [str(name) for name in f_names] tooltips = ['%s (%.2f)' % (name, f_values[i]) for i, name in enumerate(f_names)] barplot = BarPlot(labels) dataset = PlotDataset(f_weighted, None) dataset.set_color(red) barplot.add_dataset(dataset) return jsonify(barplot.to_json(tooltip_data=tooltips))
def getFeatureScores(exp_id, feature): exp = update_curr_exp(exp_id) return send_file(path.join(exp.output_dir(), feature, 'scores.json'))
def getReconsErrors(exp_id): experiment = update_curr_exp(exp_id) directory = experiment.output_dir() filename = 'reconstruction_errors.csv' return send_file(path.join(directory, filename))
def getCumExplVar(exp_id): experiment = update_curr_exp(exp_id) directory = experiment.output_dir() filename = 'cumuled_explained_variance.csv' return send_file(path.join(directory, filename))
def getProjectionMatrix(exp_id): experiment = update_curr_exp(exp_id) directory = experiment.output_dir() filename = 'projection_matrix.csv' return send_file(path.join(directory, filename))
def getFamiliesInstancesToAnnotate(exp_id, iter, label): experiment = update_curr_exp(exp_id) filename = path.join(experiment.output_dir(), str(iter), 'toannotate_%s.json' % label) return send_file(filename)
def getHexBin(exp_id, x, y): experiment = update_curr_exp(exp_id) directory = experiment.output_dir() filename = '_'.join(['c', x, y, 'hexbin.json']) return send_file(path.join(directory, filename))
def getFeatures(exp_id, instance_id): instance_id = int(instance_id) experiment = update_curr_exp(exp_id) f_names, f_values = FeaturesFromExp.get_instance(experiment, instance_id) return jsonify({f_names[i]: v for i, v in enumerate(f_values)})
def rcdAnnotations(exp_id, iteration): experiment = update_curr_exp(exp_id) return render_template('active_learning/rcd_annotations.html', project=experiment.exp_conf.dataset_conf.project)
def displayAlerts(exp_id, analysis_type): experiment = update_curr_exp(exp_id) return render_template('diadem/alerts.html', project=experiment.exp_conf.dataset_conf.project)
def get_classifier(exp_id): train_exp_id = get_train_exp(exp_id) train_exp = update_curr_exp(train_exp_id) return joblib.load(path.join(train_exp.output_dir(), 'model.out'))
def getClusterLabelsFamilies(exp_id, cluster_id): cluster_id = int(cluster_id) experiment = update_curr_exp(exp_id) clustering = ClustersExp.from_json(experiment.output_dir()) return jsonify(clustering.get_labels_families(experiment, cluster_id))
def getClusterLabel(exp_id, selected_cluster): selected_cluster = int(selected_cluster) experiment = update_curr_exp(exp_id) clustering = ClustersExp.from_json(experiment.output_dir()) predicted_label = clustering.get_label(selected_cluster) return predicted_label
def activeLearningModelsMonitoring(exp_id, iter, train_test): experiment = update_curr_exp(exp_id) directory = path.join(experiment.output_dir(), str(iter), 'model_perf') filename = '%s.png' % train_test return send_file(path.join(directory, filename), mimetype='image/png')
def currentAnnotationIteration(exp_id): exp = update_curr_exp(exp_id) return str(exp.get_current_iter())
def predictionsAnalysis(train_exp_id, index): exp = update_curr_exp(train_exp_id) return render_template('diadem/predictions.html', project=exp.exp_conf.dataset_conf.project)
def getCriterionDensity(exp_id, criterion): exp = update_curr_exp(exp_id) return send_file(path.join(exp.output_dir(), '%s_density.png' % criterion))
def displayErrors(exp_id, error_kind): experiment = update_curr_exp(exp_id) return render_template('diadem/errors.html', project=experiment.exp_conf.dataset_conf.project)
def getFamiliesInstancesToAnnotate(exp_id, iteration, predicted_label): experiment = update_curr_exp(exp_id) filename = path.join(experiment.output_dir(), str(iteration), 'toannotate_' + predicted_label + '.json') return send_file(filename)