def removeClusterLabel(experiment_id, selected_cluster, num_results): selected_cluster = int(selected_cluster) num_results = int(num_results) experiment = updateCurrentExperiment(experiment_id) clustering = Clustering.fromJson(experiment) clustering.removeClusterLabel(selected_cluster, num_results) return ''
def getValidationDataset(experiment_id): experiment = updateCurrentExperiment(experiment_id) test_conf = experiment.classification_conf.test_conf if test_conf.method == 'test_dataset': return test_conf.test_dataset else: return experiment.dataset
def activeLearningSuggestionsMonitoring(experiment_id, iteration): experiment = updateCurrentExperiment(experiment_id) filename = experiment.getOutputDirectory() + str(int(iteration) - 1) + '/' filename += 'suggestions_accuracy/' filename += 'labels_families' filename += '_high_confidence_suggestions.png' return send_file(filename)
def getInstancesToAnnotate(experiment_id, iteration, predicted_label): experiment = updateCurrentExperiment(experiment_id) filename = experiment.getOutputDirectory() + str(iteration) + '/' filename += 'toannotate_' + predicted_label + '.csv' df = pd.read_csv(filename) queries = list(df.instance_id) return jsonify({'instances': queries})
def getNumComponents(experiment_id): experiment = updateCurrentExperiment(experiment_id) directory = experiment.getOutputDirectory() filename = directory + 'projection_matrix.csv' with open(filename, 'r') as f: header = f.readline() num_components = len(header.split(',')) - 1 return str(num_components)
def getClusterLabelFamilyIds(experiment_id, selected_cluster, label, family, num_results): selected_cluster = int(selected_cluster) num_results = int(num_results) experiment = updateCurrentExperiment(experiment_id) clustering = Clustering.fromJson(experiment) ids = clustering.getClusterLabelFamilyIds(selected_cluster, label, family) res = listResultWebFormat(ids, num_results) return jsonify(res)
def getNumElements(experiment_id, selected_cluster): selected_cluster = int(selected_cluster) experiment = updateCurrentExperiment(experiment_id) clustering = Clustering.fromJson(experiment) cluster = clustering.clusters[selected_cluster] res = {} res['num_elements'] = cluster.numInstances() return jsonify(res)
def getStatsPlot(experiment_id, plot_type, feature): experiment = updateCurrentExperiment(experiment_id) filename = experiment.getOutputDirectory() + feature + '/' if plot_type.find('histogram') >= 0: filename += plot_type + '.json' else: filename += plot_type + '.png' return send_file(filename)
def addClusterLabel(experiment_id, selected_cluster, num_results, label, family, label_iteration, label_method): selected_cluster = int(selected_cluster) num_results = int(num_results) experiment = updateCurrentExperiment(experiment_id) clustering = Clustering.fromJson(experiment) clustering.addClusterLabel(selected_cluster, num_results, label, family, label_iteration, label_method) return ''
def getLabelsFamilies(experiment_id, iteration_max): experiment = updateCurrentExperiment(experiment_id) if iteration_max == 'None': iteration_max = None else: iteration_max = int(iteration_max) labels_families = labels_tools.getLabelsFamilies( session, experiment.labels_id, iteration_max=iteration_max) return jsonify(labels_families)
def getClustersLabels(experiment_id): experiment = updateCurrentExperiment(experiment_id) clustering = Clustering.fromJson(experiment) # Do not consider empty clusters for visualization clusters = [] for c in range(clustering.num_clusters): #if clustering.clusters[c].numInstances() > 0: clusters.append({'id': c, 'label': clustering.clusters[c].label}) return jsonify({'clusters': clusters})
def supervisedLearningMonitoring(experiment_id, train_test, kind): experiment = updateCurrentExperiment(experiment_id) filename = experiment.getOutputDirectory() + train_test + '/' filename += kind if kind == 'ROC': filename += '.png' else: filename += '.json' return send_file(filename)
def getClusterInstancesVisu(experiment_id, selected_cluster, c_e_r, num_results): num_results = int(num_results) selected_cluster = int(selected_cluster) experiment = updateCurrentExperiment(experiment_id) clustering = Clustering.fromJson(experiment) selected_cluster_ids = {} selected_cluster_ids[selected_cluster] = \ clustering.getClusterInstancesVisu( selected_cluster, num_results, random = True)[c_e_r] return jsonify(selected_cluster_ids)
def getIterationSupervisedExperiment(experiment_id, iteration): experiment = updateCurrentExperiment(experiment_id) active_learning = Iteration(experiment, int(iteration)) binary_multiclass = 'multiclass' if 'binary' in experiment.conf.models_conf.keys(): binary_multiclass = 'binary' models_exp_file = experiment.getOutputDirectory() models_exp_file += str(iteration) + '/models_experiments.json' with open(models_exp_file, 'r') as f: models_exp = json.load(f) return str(models_exp[binary_multiclass])
def getInstance(experiment_id, view_id, instance_id, ident): try: if view_id == 'None': view_id = None experiment = updateCurrentExperiment(experiment_id) project = experiment.project module = importlib.import_module('SecuML_web.base.views.Projects.' + project) return module.getInstance(experiment, view_id, instance_id, ident) except IOError as e: app.logger.error(e) return 'Unable to display the instance', ident
def getLabeledInstances(experiment_id): experiment = updateCurrentExperiment(experiment_id) experiment_label_id = experiment.labels_id res = {} res['malicious'] = labels_tools.getLabelIds(session, 'malicious', experiment_label_id, annotation=True) res['benign'] = labels_tools.getLabelIds(session, 'benign', experiment_label_id, annotation=True) return jsonify(res)
def activeLearningModelsMonitoring(experiment_id, iteration, train_cv_validation): experiment = updateCurrentExperiment(experiment_id) active_learning = Iteration(experiment, int(iteration)) binary_multiclass = 'multiclass' estimator = 'accuracy' if 'binary' in experiment.conf.models_conf.keys(): binary_multiclass = 'binary' estimator = 'auc' directory = active_learning.output_directory filename = directory filename += 'models_performance/' filename += binary_multiclass + '_' + train_cv_validation + '_' + estimator + '_monitoring.png' return send_file(filename, mimetype='image/png')
def runNextIteration(experiment_id, iteration_number): res = str(celeryRunNextIteration.s().apply_async()) if user_exp: experiment = updateCurrentExperiment(experiment_id) filename = experiment.getOutputDirectory() filename += 'user_actions.log' file_exists = dir_tools.checkFileExists(filename) mode = 'a' if file_exists else 'w' to_print = [datetime.datetime.now(), 'nextIteration', iteration_number] to_print = map(str, to_print) to_print = ','.join(to_print) with open(filename, mode) as f: print >> f, to_print return res
def getPredictions(experiment_id, train_test, index): experiment = updateCurrentExperiment(experiment_id) filename = experiment.getOutputDirectory() filename += train_test + '/predictions.csv' index = int(index) min_value = index * 0.1 max_value = (index + 1) * 0.1 with open(filename, 'r') as f: data = pd.read_csv(f, header=0, index_col=0) data = matrix_tools.extractRowsWithThresholds(data, min_value, max_value, 'predicted_proba') selected_instances = list(data.index.values) proba = list(data['predicted_proba']) return jsonify({'instances': selected_instances, 'proba': proba})
def removeLabel(experiment_id, inst_experiment_label_id, iteration_number, instance_id): labels_tools.removeLabel(session, inst_experiment_label_id, instance_id) if user_exp: experiment = updateCurrentExperiment(experiment_id) filename = experiment.getOutputDirectory() filename += 'user_actions.log' file_exists = dir_tools.checkFileExists(filename) mode = 'a' if file_exists else 'w' to_print = [datetime.datetime.now(), 'removeLabel', instance_id] to_print = map(str, to_print) to_print = ','.join(to_print) with open(filename, mode) as f: print >> f, to_print return ''
def currentAnnotations(experiment_id, iteration): experiment = updateCurrentExperiment(experiment_id) page = render_template('ActiveLearning/current_annotations.html', project=experiment.project) if user_exp: filename = experiment.getOutputDirectory() filename += 'user_actions.log' file_exists = dir_tools.checkFileExists(filename) mode = 'a' if file_exists else 'w' to_print = [datetime.datetime.now(), 'displayAnnotatedInstances'] to_print = map(str, to_print) to_print = ','.join(to_print) with open(filename, mode) as f: print >> f, to_print return page
def activeLearningMonitoring(experiment_id, iteration, kind, sub_kind): experiment = updateCurrentExperiment(experiment_id) active_learning = Iteration(experiment, int(iteration)) directory = active_learning.output_directory if kind == 'labels': filename = directory + 'labels_monitoring/' filename += 'iteration' + '_' + sub_kind + '.png' if kind == 'families': filename = directory + 'labels_monitoring/' + 'families_monitoring.png' if kind == 'clustering': filename = directory + 'clustering_evaluation/' filename += sub_kind + '_monitoring.png' if kind == 'time': filename = directory filename += 'execution_time_monitoring.png' return send_file(filename, mimetype='image/png')
def getFamiliesInstances(experiment_id, label, iteration_max): experiment = updateCurrentExperiment(experiment_id) if iteration_max == 'None': iteration_max = None else: iteration_max = int(iteration_max) families = labels_tools.getLabelsFamilies( session, experiment.labels_id, iteration_max=iteration_max)[label] families_instances = {} for f in families: families_instances[f] = labels_tools.getLabelFamilyIds( session, experiment.labels_id, label, family=f, iteration_max=iteration_max) return jsonify(families_instances)
def getAlerts(experiment_id, analysis_type): experiment = updateCurrentExperiment(experiment_id) filename = experiment.getOutputDirectory() filename += 'alerts.csv' with open(filename, 'r') as f: data = pd.read_csv(f, header=0, index_col=0) num_max_alerts = experiment.classification_conf.test_conf.alerts_conf.num_max_alerts alerts = list(data[['predicted_proba']].itertuples()) if num_max_alerts < len(alerts): if analysis_type == 'topN': alerts = alerts[:num_max_alerts] elif analysis_type == 'random': alerts = random.sample(alerts, num_max_alerts) return jsonify({ 'instances': [alert[0] for alert in alerts], 'proba': dict(alerts) })
def changeFamilyLabel(experiment_id, label, family): experiment = updateCurrentExperiment(experiment_id) experiment_label_id = experiment.labels_id labels_tools.changeFamilyLabel(session, label, family, experiment_label_id) if user_exp: filename = experiment.getOutputDirectory() filename += 'user_actions.log' file_exists = dir_tools.checkFileExists(filename) mode = 'a' if file_exists else 'w' to_print = [ datetime.datetime.now(), 'changeFamilyLabel', family, label ] to_print = map(str, to_print) to_print = ','.join(to_print) with open(filename, mode) as f: print >> f, to_print return ''
def mergeFamilies(experiment_id, label, families, new_family_name): experiment = updateCurrentExperiment(experiment_id) experiment_label_id = experiment.labels_id families = families.split(',') labels_tools.mergeFamilies(session, label, families, new_family_name, experiment_label_id) if user_exp: filename = experiment.getOutputDirectory() filename += 'user_actions.log' file_exists = dir_tools.checkFileExists(filename) mode = 'a' if file_exists else 'w' to_print = [datetime.datetime.now(), 'mergeFamilies', new_family_name] to_print += map(str, families) to_print = map(str, to_print) to_print = ','.join(to_print) with open(filename, mode) as f: print >> f, to_print return ''
def getClusterStats(experiment_id): experiment = updateCurrentExperiment(experiment_id) clustering = Clustering.fromJson(experiment) num_clusters = clustering.num_clusters num_instances_v = [] labels = [] for c in range(num_clusters): instances_in_cluster = clustering.clusters[c].instances_ids num_instances = len(instances_in_cluster) # the empty clusters are not displayed #if num_instances > 0: num_instances_v.append(num_instances) #labels.append('c_' + str(c)) labels.append(clustering.clusters[c].label) barplot = BarPlot(labels) dataset = PlotDataset(num_instances_v, 'Num. Instances') barplot.addDataset(dataset) return jsonify(barplot.toJson())
def addLabel(experiment_id, inst_experiment_label_id, iteration_number, instance_id, label, family, method, annotation): annotation = annotation == 'true' labels_tools.addLabel(session, inst_experiment_label_id, instance_id, label, family, iteration_number, method, annotation) if user_exp: experiment = updateCurrentExperiment(experiment_id) filename = experiment.getOutputDirectory() filename += 'user_actions.log' file_exists = dir_tools.checkFileExists(filename) mode = 'a' if file_exists else 'w' to_print = [ datetime.datetime.now(), 'addLabel', iteration_number, instance_id, label, family, method, annotation ] to_print = map(str, to_print) to_print = ','.join(to_print) with open(filename, mode) as f: print >> f, to_print return ''
def getFamiliesBarplot(experiment_id, iteration, label): experiment = updateCurrentExperiment(experiment_id) experiment_label_id = experiment.labels_id if iteration == 'None': iteration = None family_counts = labels_tools.getFamiliesCounts(experiment.session, experiment_label_id, iteration_max=iteration, label=label) df = pd.DataFrame({ 'families': family_counts.keys(), 'counts': [family_counts[k] for k in family_counts.keys()] }) matrix_tools.sortDataFrame(df, 'families', ascending=True, inplace=True) barplot = BarPlot(list(df['families'])) dataset = PlotDataset(list(df['counts']), 'Num. Instances') dataset.setColor(colors_tools.getLabelColor(label)) barplot.addDataset(dataset) return jsonify(barplot.toJson())
def getFamiliesPerformance(experiment_id, train_test, label, threshold): experiment = updateCurrentExperiment(experiment_id) filename = experiment.getOutputDirectory() + train_test + '/families/' if label == 'malicious': filename += 'tp_' tp_fp = 'Detection Rate' elif label == 'benign': filename += 'fp_' tp_fp = 'False Positive Rate' filename += 'families_thresholds.csv' with open(filename, 'r') as f: perf = pd.read_csv(f, header=0, index_col=0) families = list(perf.columns.values[:-1]) threshold = float(threshold) / 100 thresholds = list(perf.index[:-1]) threshold_value = min(enumerate(thresholds), key=lambda x: abs(x[1] - threshold))[1] perf = list(perf.loc[threshold_value]) barplot = BarPlot(families) barplot.addDataset(PlotDataset(perf, tp_fp)) return jsonify(barplot.toJson())