示例#1
0
def removeClusterLabel(experiment_id, selected_cluster, num_results):
    selected_cluster = int(selected_cluster)
    num_results = int(num_results)
    experiment = updateCurrentExperiment(experiment_id)
    clustering = Clustering.fromJson(experiment)
    clustering.removeClusterLabel(selected_cluster, num_results)
    return ''
示例#2
0
def getValidationDataset(experiment_id):
    experiment = updateCurrentExperiment(experiment_id)
    test_conf = experiment.classification_conf.test_conf
    if test_conf.method == 'test_dataset':
        return test_conf.test_dataset
    else:
        return experiment.dataset
示例#3
0
def activeLearningSuggestionsMonitoring(experiment_id, iteration):
    experiment = updateCurrentExperiment(experiment_id)
    filename  = experiment.getOutputDirectory() + str(int(iteration) - 1) + '/'
    filename += 'suggestions_accuracy/'
    filename += 'labels_families'
    filename += '_high_confidence_suggestions.png'
    return send_file(filename)
示例#4
0
def getInstancesToAnnotate(experiment_id, iteration, predicted_label):
    experiment = updateCurrentExperiment(experiment_id)
    filename  = experiment.getOutputDirectory() + str(iteration) + '/'
    filename += 'toannotate_' + predicted_label + '.csv'
    df = pd.read_csv(filename)
    queries = list(df.instance_id)
    return jsonify({'instances': queries})
示例#5
0
def getNumComponents(experiment_id):
    experiment = updateCurrentExperiment(experiment_id)
    directory = experiment.getOutputDirectory()
    filename = directory + 'projection_matrix.csv'
    with open(filename, 'r') as f:
        header = f.readline()
        num_components = len(header.split(',')) - 1
    return str(num_components)
示例#6
0
def getClusterLabelFamilyIds(experiment_id, selected_cluster, label, family, num_results):
    selected_cluster = int(selected_cluster)
    num_results = int(num_results)
    experiment = updateCurrentExperiment(experiment_id)
    clustering = Clustering.fromJson(experiment)
    ids = clustering.getClusterLabelFamilyIds(selected_cluster, label, family)
    res = listResultWebFormat(ids, num_results)
    return jsonify(res)
示例#7
0
def getNumElements(experiment_id, selected_cluster):
    selected_cluster = int(selected_cluster)
    experiment = updateCurrentExperiment(experiment_id)
    clustering = Clustering.fromJson(experiment)
    cluster = clustering.clusters[selected_cluster]
    res = {}
    res['num_elements'] = cluster.numInstances()
    return jsonify(res)
def getStatsPlot(experiment_id, plot_type, feature):
    experiment = updateCurrentExperiment(experiment_id)
    filename = experiment.getOutputDirectory() + feature + '/'
    if plot_type.find('histogram') >= 0:
        filename += plot_type + '.json'
    else:
        filename += plot_type + '.png'
    return send_file(filename)
示例#9
0
def addClusterLabel(experiment_id, selected_cluster, num_results, label,
                    family, label_iteration, label_method):
    selected_cluster = int(selected_cluster)
    num_results = int(num_results)
    experiment = updateCurrentExperiment(experiment_id)
    clustering = Clustering.fromJson(experiment)
    clustering.addClusterLabel(selected_cluster, num_results, label, family,
                               label_iteration, label_method)
    return ''
示例#10
0
文件: labels.py 项目: Piuliss/SecuML
def getLabelsFamilies(experiment_id, iteration_max):
    experiment = updateCurrentExperiment(experiment_id)
    if iteration_max == 'None':
        iteration_max = None
    else:
        iteration_max = int(iteration_max)
    labels_families = labels_tools.getLabelsFamilies(
        session, experiment.labels_id, iteration_max=iteration_max)
    return jsonify(labels_families)
示例#11
0
def getClustersLabels(experiment_id):
    experiment = updateCurrentExperiment(experiment_id)
    clustering = Clustering.fromJson(experiment)
    # Do not consider empty clusters for visualization
    clusters = []
    for c in range(clustering.num_clusters):
        #if clustering.clusters[c].numInstances() > 0:
        clusters.append({'id': c, 'label': clustering.clusters[c].label})
    return jsonify({'clusters': clusters})
示例#12
0
def supervisedLearningMonitoring(experiment_id, train_test, kind):
    experiment = updateCurrentExperiment(experiment_id)
    filename = experiment.getOutputDirectory() + train_test + '/'
    filename += kind
    if kind == 'ROC':
        filename += '.png'
    else:
        filename += '.json'
    return send_file(filename)
示例#13
0
def getClusterInstancesVisu(experiment_id, selected_cluster, c_e_r, num_results):
    num_results = int(num_results)
    selected_cluster = int(selected_cluster)
    experiment = updateCurrentExperiment(experiment_id)
    clustering = Clustering.fromJson(experiment)
    selected_cluster_ids = {}
    selected_cluster_ids[selected_cluster] = \
            clustering.getClusterInstancesVisu(
                    selected_cluster, num_results, random = True)[c_e_r]
    return jsonify(selected_cluster_ids)
示例#14
0
def getIterationSupervisedExperiment(experiment_id, iteration):
    experiment = updateCurrentExperiment(experiment_id)
    active_learning = Iteration(experiment, int(iteration))
    binary_multiclass = 'multiclass'
    if 'binary' in experiment.conf.models_conf.keys():
        binary_multiclass = 'binary'
    models_exp_file = experiment.getOutputDirectory()
    models_exp_file += str(iteration) + '/models_experiments.json'
    with open(models_exp_file, 'r') as f:
        models_exp = json.load(f)
    return str(models_exp[binary_multiclass])
示例#15
0
def getInstance(experiment_id, view_id, instance_id, ident):
    try:
        if view_id == 'None':
            view_id = None
        experiment = updateCurrentExperiment(experiment_id)
        project = experiment.project
        module = importlib.import_module('SecuML_web.base.views.Projects.' +
                                         project)
        return module.getInstance(experiment, view_id, instance_id, ident)
    except IOError as e:
        app.logger.error(e)
        return 'Unable to display the instance', ident
示例#16
0
文件: labels.py 项目: Piuliss/SecuML
def getLabeledInstances(experiment_id):
    experiment = updateCurrentExperiment(experiment_id)
    experiment_label_id = experiment.labels_id
    res = {}
    res['malicious'] = labels_tools.getLabelIds(session,
                                                'malicious',
                                                experiment_label_id,
                                                annotation=True)
    res['benign'] = labels_tools.getLabelIds(session,
                                             'benign',
                                             experiment_label_id,
                                             annotation=True)
    return jsonify(res)
示例#17
0
def activeLearningModelsMonitoring(experiment_id, iteration, train_cv_validation):
    experiment = updateCurrentExperiment(experiment_id)
    active_learning = Iteration(experiment, int(iteration))
    binary_multiclass = 'multiclass'
    estimator = 'accuracy'
    if 'binary' in experiment.conf.models_conf.keys():
        binary_multiclass = 'binary'
        estimator = 'auc'
    directory = active_learning.output_directory
    filename  = directory
    filename += 'models_performance/'
    filename += binary_multiclass + '_' + train_cv_validation + '_' + estimator + '_monitoring.png'
    return send_file(filename, mimetype='image/png')
示例#18
0
def runNextIteration(experiment_id, iteration_number):
    res = str(celeryRunNextIteration.s().apply_async())
    if user_exp:
        experiment = updateCurrentExperiment(experiment_id)
        filename = experiment.getOutputDirectory()
        filename += 'user_actions.log'
        file_exists = dir_tools.checkFileExists(filename)
        mode = 'a' if file_exists else 'w'
        to_print = [datetime.datetime.now(), 'nextIteration', iteration_number]
        to_print = map(str, to_print)
        to_print = ','.join(to_print)
        with open(filename, mode) as f:
            print >> f, to_print
    return res
示例#19
0
def getPredictions(experiment_id, train_test, index):
    experiment = updateCurrentExperiment(experiment_id)
    filename = experiment.getOutputDirectory()
    filename += train_test + '/predictions.csv'
    index = int(index)
    min_value = index * 0.1
    max_value = (index + 1) * 0.1
    with open(filename, 'r') as f:
        data = pd.read_csv(f, header=0, index_col=0)
        data = matrix_tools.extractRowsWithThresholds(data, min_value,
                                                      max_value,
                                                      'predicted_proba')
        selected_instances = list(data.index.values)
        proba = list(data['predicted_proba'])
    return jsonify({'instances': selected_instances, 'proba': proba})
示例#20
0
文件: labels.py 项目: Piuliss/SecuML
def removeLabel(experiment_id, inst_experiment_label_id, iteration_number,
                instance_id):
    labels_tools.removeLabel(session, inst_experiment_label_id, instance_id)
    if user_exp:
        experiment = updateCurrentExperiment(experiment_id)
        filename = experiment.getOutputDirectory()
        filename += 'user_actions.log'
        file_exists = dir_tools.checkFileExists(filename)
        mode = 'a' if file_exists else 'w'
        to_print = [datetime.datetime.now(), 'removeLabel', instance_id]
        to_print = map(str, to_print)
        to_print = ','.join(to_print)
        with open(filename, mode) as f:
            print >> f, to_print
    return ''
示例#21
0
def currentAnnotations(experiment_id, iteration):
    experiment = updateCurrentExperiment(experiment_id)
    page = render_template('ActiveLearning/current_annotations.html',
                           project=experiment.project)
    if user_exp:
        filename = experiment.getOutputDirectory()
        filename += 'user_actions.log'
        file_exists = dir_tools.checkFileExists(filename)
        mode = 'a' if file_exists else 'w'
        to_print = [datetime.datetime.now(), 'displayAnnotatedInstances']
        to_print = map(str, to_print)
        to_print = ','.join(to_print)
        with open(filename, mode) as f:
            print >> f, to_print
    return page
示例#22
0
def activeLearningMonitoring(experiment_id, iteration, kind, sub_kind):
    experiment = updateCurrentExperiment(experiment_id)
    active_learning = Iteration(experiment, int(iteration))
    directory = active_learning.output_directory
    if kind == 'labels':
        filename  = directory + 'labels_monitoring/'
        filename += 'iteration' + '_' + sub_kind + '.png'
    if kind == 'families':
        filename = directory + 'labels_monitoring/' + 'families_monitoring.png'
    if kind == 'clustering':
        filename  = directory + 'clustering_evaluation/'
        filename += sub_kind + '_monitoring.png'
    if kind == 'time':
        filename  = directory
        filename += 'execution_time_monitoring.png'
    return send_file(filename, mimetype='image/png')
示例#23
0
文件: labels.py 项目: Piuliss/SecuML
def getFamiliesInstances(experiment_id, label, iteration_max):
    experiment = updateCurrentExperiment(experiment_id)
    if iteration_max == 'None':
        iteration_max = None
    else:
        iteration_max = int(iteration_max)
    families = labels_tools.getLabelsFamilies(
        session, experiment.labels_id, iteration_max=iteration_max)[label]
    families_instances = {}
    for f in families:
        families_instances[f] = labels_tools.getLabelFamilyIds(
            session,
            experiment.labels_id,
            label,
            family=f,
            iteration_max=iteration_max)
    return jsonify(families_instances)
示例#24
0
def getAlerts(experiment_id, analysis_type):
    experiment = updateCurrentExperiment(experiment_id)
    filename = experiment.getOutputDirectory()
    filename += 'alerts.csv'
    with open(filename, 'r') as f:
        data = pd.read_csv(f, header=0, index_col=0)
        num_max_alerts = experiment.classification_conf.test_conf.alerts_conf.num_max_alerts
        alerts = list(data[['predicted_proba']].itertuples())
        if num_max_alerts < len(alerts):
            if analysis_type == 'topN':
                alerts = alerts[:num_max_alerts]
            elif analysis_type == 'random':
                alerts = random.sample(alerts, num_max_alerts)
    return jsonify({
        'instances': [alert[0] for alert in alerts],
        'proba': dict(alerts)
    })
示例#25
0
文件: labels.py 项目: Piuliss/SecuML
def changeFamilyLabel(experiment_id, label, family):
    experiment = updateCurrentExperiment(experiment_id)
    experiment_label_id = experiment.labels_id
    labels_tools.changeFamilyLabel(session, label, family, experiment_label_id)
    if user_exp:
        filename = experiment.getOutputDirectory()
        filename += 'user_actions.log'
        file_exists = dir_tools.checkFileExists(filename)
        mode = 'a' if file_exists else 'w'
        to_print = [
            datetime.datetime.now(), 'changeFamilyLabel', family, label
        ]
        to_print = map(str, to_print)
        to_print = ','.join(to_print)
        with open(filename, mode) as f:
            print >> f, to_print
    return ''
示例#26
0
文件: labels.py 项目: Piuliss/SecuML
def mergeFamilies(experiment_id, label, families, new_family_name):
    experiment = updateCurrentExperiment(experiment_id)
    experiment_label_id = experiment.labels_id
    families = families.split(',')
    labels_tools.mergeFamilies(session, label, families, new_family_name,
                               experiment_label_id)
    if user_exp:
        filename = experiment.getOutputDirectory()
        filename += 'user_actions.log'
        file_exists = dir_tools.checkFileExists(filename)
        mode = 'a' if file_exists else 'w'
        to_print = [datetime.datetime.now(), 'mergeFamilies', new_family_name]
        to_print += map(str, families)
        to_print = map(str, to_print)
        to_print = ','.join(to_print)
        with open(filename, mode) as f:
            print >> f, to_print
    return ''
示例#27
0
def getClusterStats(experiment_id):
    experiment = updateCurrentExperiment(experiment_id)
    clustering = Clustering.fromJson(experiment)
    num_clusters = clustering.num_clusters
    num_instances_v = []
    labels = []
    for c in range(num_clusters):
        instances_in_cluster = clustering.clusters[c].instances_ids
        num_instances = len(instances_in_cluster)
        # the empty clusters are not displayed

        #if num_instances > 0:
        num_instances_v.append(num_instances)
        #labels.append('c_' + str(c))
        labels.append(clustering.clusters[c].label)
    barplot = BarPlot(labels)
    dataset = PlotDataset(num_instances_v, 'Num. Instances')
    barplot.addDataset(dataset)
    return jsonify(barplot.toJson())
示例#28
0
文件: labels.py 项目: Piuliss/SecuML
def addLabel(experiment_id, inst_experiment_label_id, iteration_number,
             instance_id, label, family, method, annotation):
    annotation = annotation == 'true'
    labels_tools.addLabel(session, inst_experiment_label_id, instance_id,
                          label, family, iteration_number, method, annotation)
    if user_exp:
        experiment = updateCurrentExperiment(experiment_id)
        filename = experiment.getOutputDirectory()
        filename += 'user_actions.log'
        file_exists = dir_tools.checkFileExists(filename)
        mode = 'a' if file_exists else 'w'
        to_print = [
            datetime.datetime.now(), 'addLabel', iteration_number, instance_id,
            label, family, method, annotation
        ]
        to_print = map(str, to_print)
        to_print = ','.join(to_print)
        with open(filename, mode) as f:
            print >> f, to_print
    return ''
示例#29
0
def getFamiliesBarplot(experiment_id, iteration, label):
    experiment = updateCurrentExperiment(experiment_id)
    experiment_label_id = experiment.labels_id
    if iteration == 'None':
        iteration = None
    family_counts = labels_tools.getFamiliesCounts(experiment.session,
                                                   experiment_label_id,
                                                   iteration_max=iteration,
                                                   label=label)
    df = pd.DataFrame({
        'families':
        family_counts.keys(),
        'counts': [family_counts[k] for k in family_counts.keys()]
    })
    matrix_tools.sortDataFrame(df, 'families', ascending=True, inplace=True)
    barplot = BarPlot(list(df['families']))
    dataset = PlotDataset(list(df['counts']), 'Num. Instances')
    dataset.setColor(colors_tools.getLabelColor(label))
    barplot.addDataset(dataset)
    return jsonify(barplot.toJson())
示例#30
0
def getFamiliesPerformance(experiment_id, train_test, label, threshold):
    experiment = updateCurrentExperiment(experiment_id)
    filename = experiment.getOutputDirectory() + train_test + '/families/'
    if label == 'malicious':
        filename += 'tp_'
        tp_fp = 'Detection Rate'
    elif label == 'benign':
        filename += 'fp_'
        tp_fp = 'False Positive Rate'
    filename += 'families_thresholds.csv'
    with open(filename, 'r') as f:
        perf = pd.read_csv(f, header=0, index_col=0)
        families = list(perf.columns.values[:-1])
        threshold = float(threshold) / 100
        thresholds = list(perf.index[:-1])
        threshold_value = min(enumerate(thresholds),
                              key=lambda x: abs(x[1] - threshold))[1]
        perf = list(perf.loc[threshold_value])
        barplot = BarPlot(families)
        barplot.addDataset(PlotDataset(perf, tp_fp))
    return jsonify(barplot.toJson())