def display(self, directory): labels = [ '0-10%', '10-20%', '20-30%', '30-40%', '40-50%', '50-60%', '60-70%', '70-80%', '80-90%', '90-100%' ] barplot = BarPlot(labels) barplot.addDataset(map(len, self.ranges), colors_tools.getLabelColor('all'), 'numInstances') filename = directory + 'predictions_barplot.json' with open(filename, 'w') as f: barplot.display(f) barplot = BarPlot(labels) malicious_ranges = map(lambda l: filter(lambda x: x['true_label'], l), self.ranges) benign_ranges = map(lambda l: filter(lambda x: not x['true_label'], l), self.ranges) barplot.addDataset(map(len, malicious_ranges), colors_tools.getLabelColor('malicious'), 'malicious') barplot.addDataset(map(len, benign_ranges), colors_tools.getLabelColor('benign'), 'benign') filename = directory filename += 'predictions_barplot_labels.json' with open(filename, 'w') as f: barplot.display(f)
def executionTimeDisplay(self): uncertain = PlotDataset(None, 'Uncertain Queries') malicious = PlotDataset(None, 'Malicious Queries') malicious.setLinestyle('dotted') malicious.setColor(colors_tools.getLabelColor('malicious')) benign = PlotDataset(None, 'Benign Queries') benign.setLinestyle('dashed') benign.setColor(colors_tools.getLabelColor('benign')) return [malicious, uncertain, benign]
def executionTimeDisplay(self): binary_model = PlotDataset(None, 'Binary model') malicious = PlotDataset(None, 'Malicious Analysis') malicious.setLinestyle('dotted') malicious.setColor(colors_tools.getLabelColor('malicious')) benign = PlotDataset(None, 'Benign Analysis') benign.setLinestyle('dashed') benign.setColor(colors_tools.getLabelColor('benign')) return [binary_model, malicious, benign ] + QueryStrategy.executionTimeDisplay(self)
def generatePlotDatasets(self, instances): self.plot_datasets = {} if self.has_true_labels: malicious_instances = instances.getInstancesFromIds(instances.getMaliciousIds(true_labels = True)) malicious_dataset = PlotDataset(malicious_instances.getFeatureValues(self.feature), 'malicious') malicious_dataset.setColor(colors_tools.getLabelColor('malicious')) self.plot_datasets['malicious'] = malicious_dataset benign_instances = instances.getInstancesFromIds(instances.getBenignIds(true_labels = True)) benign_dataset = PlotDataset(benign_instances.getFeatureValues(self.feature), 'benign') benign_dataset.setColor(colors_tools.getLabelColor('benign')) self.plot_datasets['benign'] = benign_dataset else: self.plot_datasets['all'] = PlotDataset(instances.getFeatureValues(self.feature), 'all') self.plot_datasets['all'].setColor(colors_tools.getLabelColor('all'))
def plotEvolutionMonitoring(self, estimator=None): if estimator is None: for e in self.homogeneity_estimators + self.adjusted_estimators: self.plotEvolutionMonitoring(estimator=e) else: iterations = range(self.monitoring.iteration_number) plt.clf() max_value = 1 clusterings = self.annotations.getClusteringsEvaluations() for l in clusterings.keys(): color = colors_tools.getLabelColor(l) label = l + '_' + estimator plt.plot(iterations, self.data.loc[:][label], label=l.title() + ' Clustering', color=color, linewidth=4, marker='o') plt.ylim(0, max_value) plt.xlabel('Iteration') plt.ylabel(estimator) lgd = plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode='expand', borderaxespad=0., fontsize='large') filename = self.output_directory filename += estimator + '_monitoring.png' plt.savefig(filename, bbox_extra_artists=(lgd, ), bbox_inches='tight') plt.clf()
def plotFamiliesEvolutionMonitoring(self): annotations = self.evolutions['global']['annotations'] plt.clf() if self.has_true_labels: max_value = 1 else: max_value = max(self.families_monitoring['malicious'], self.families_monitoring['benign']) for l in ['malicious', 'benign']: evolution = self.evolutions[l]['families'] if self.has_true_labels: num_families = len( self.monitoring.datasets.instances.getFamiliesValues( label=l, true_labels=True)) evolution = [x / num_families for x in evolution] color = colors_tools.getLabelColor(l) plt.plot(annotations, evolution, label=l.title(), color=color, linewidth=4, marker='o') plt.ylim(0, max_value) plt.xlabel('Num Annotations') plt.ylabel('Prop. Families Discovered') lgd = plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode='expand', borderaxespad=0., fontsize='x-large') filename = self.output_directory filename += 'families_monitoring.png' plt.savefig(filename, bbox_extra_artists=(lgd, ), bbox_inches='tight') plt.clf()
def addFold(self, fold_id, true_labels, predicted_proba, predicted_scores): if len(true_labels) == 0: return if self.probabilist_model: scores = predicted_proba else: scores = predicted_scores fpr, tpr, thresholds = roc_curve(true_labels, scores) self.mean_tpr += interp(self.mean_fpr, fpr, tpr) self.thresholds = interp(self.mean_fpr, fpr, thresholds) self.mean_tpr[0] = 0.0 self.thresholds[0] = 1.0 self.thresholds[-1] = 0.0 roc_auc = auc(fpr, tpr) if self.num_folds > 1: self.ax1.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (fold_id, roc_auc)) else: self.ax1.plot(fpr, tpr, lw=3, color=colors_tools.getLabelColor('all'), label='ROC (area = %0.2f)' % (roc_auc))
def getFamiliesPerformance(project, dataset, experiment, train_test, label, threshold): filename = getDir(project, dataset, experiment) + train_test + '/families/' if label == 'malicious': filename += 'tp_' tp_fp = 'Detection Rate' elif label == 'benign': filename += 'fp_' tp_fp = 'False Positive Rate' filename += 'families_thresholds.csv' with open(filename, 'r') as f: perf = pd.read_csv(f, header = 0, index_col = 0) families = list(perf.columns.values[:-1]) threshold = float(threshold)/100 thresholds = list(perf.index[:-1]) threshold_value = min(enumerate(thresholds), key=lambda x: abs(x[1]-threshold))[1] perf = list(perf.loc[threshold_value]) barplot = BarPlot(families) barplot.addDataset(perf, colors_tools.getLabelColor('all'), tp_fp) return jsonify(barplot.barplot);
def getFamiliesBarplot(project, dataset, experiment_id, iteration, label): experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) experiment_label_id = experiment.experiment_label_id if iteration == 'None': iteration = None family_counts = labels_tools.getFamiliesCounts(cursor, experiment_label_id, iteration_max=iteration, label=label) df = pd.DataFrame({ 'families': family_counts.keys(), 'counts': [family_counts[k] for k in family_counts.keys()] }) matrix_tools.sortDataFrame(df, 'families', ascending=True, inplace=True) barplot = BarPlot(list(df['families'])) barplot.addDataset(list(df['counts']), colors_tools.getLabelColor(label), 'Num. Instances') return jsonify(barplot.barplot)
def getFamiliesBarplot(experiment_id, iteration, label): experiment = updateCurrentExperiment(experiment_id) experiment_label_id = experiment.labels_id if iteration == 'None': iteration = None family_counts = labels_tools.getFamiliesCounts(experiment.session, experiment_label_id, iteration_max=iteration, label=label) df = pd.DataFrame({ 'families': family_counts.keys(), 'counts': [family_counts[k] for k in family_counts.keys()] }) matrix_tools.sortDataFrame(df, 'families', ascending=True, inplace=True) barplot = BarPlot(list(df['families'])) dataset = PlotDataset(list(df['counts']), 'Num. Instances') dataset.setColor(colors_tools.getLabelColor(label)) barplot.addDataset(dataset) return jsonify(barplot.toJson())
def setDefaultValues(self): self.color = colors_tools.getLabelColor('all') self.linewidth = 3 self.linestyle = 'solid' self.marker = 'o'
def displayFamiliesDistribution(self, directory, label=None): if label is None: self.displayFamiliesDistribution(directory, label='malicious') self.displayFamiliesDistribution(directory, label='benign') return if label == 'malicious': families = self.malicious_families else: families = self.benign_families bandwidth = 0.1 num_points = 50 eps = 0.00001 kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth) fig, (ax) = plt.subplots(1, 1) i = 0 for family in families: predictions = families[family] predictions_np = np.asarray(predictions) if i % 3 == 0: linestyle = 'solid' elif i % 3 == 1: linestyle = 'dashed' if i % 3 == 2: linestyle = 'dotted' linewidth = 2 if np.var(predictions_np) < eps: linewidth = 4 mean = np.mean(predictions_np) x = np.arange(0, 1, 0.1) x = np.sort(np.append(x, [mean, mean - eps, mean + eps])) density = [1 if v == mean else 0 for v in x] else: density_predictions = [[x] for x in predictions_np] kde.fit(density_predictions) ## Computes the x axis p_max = np.amax(predictions_np) p_min = np.amin(predictions_np) delta = p_max - p_min density_delta = 1.1 * delta x = np.arange(0, 1, density_delta / num_points) x_density = [[y] for y in x] ## kde.score_samples returns the 'log' of the density log_density = kde.score_samples(x_density).tolist() density = map(math.exp, log_density) ax.plot(x, density, label=family, linewidth=linewidth, linestyle=linestyle) fig_f, (ax_f) = plt.subplots(1, 1) ax_f.plot(x, density, linewidth=4, color=colors_tools.getLabelColor(label)) ax_f.set_title(family) ax_f.set_xlabel('P(Malicious)') ax_f.set_ylabel('Density') filename = directory filename += label + '_family_' + family + '_prediction_distributions.png' fig_f.savefig(filename) plt.close(fig_f) i += 1 ax.legend(bbox_to_anchor=(0., 0.95, 1., .102), loc=3, ncol=5, mode='expand', borderaxespad=0., fontsize='xx-small') ax.set_xlabel('P(Malicious)') ax.set_ylabel('Density') filename = directory filename += label + '_families_prediction_distributions.png' fig.savefig(filename) plt.close(fig)