Exemplo n.º 1
0
def mergeFamilies(exp_id, annotations_id, label, families, new_family):
    families = families.split(',')
    annotations_db_tools.merge_families(session, annotations_id, label,
                                        families, new_family)
    session.commit()
    if user_exp:
        exp = update_curr_exp(exp_id)
        filename = path.join(exp.output_dir(), 'user_actions.log')
        file_exists = path.isfile(filename)
        mode = 'a' if file_exists else 'w'
        to_print = ','.join(map(str, [datetime.datetime.now(),
                                      'merge_families',
                                      new_family] + families))
        with open(filename, mode) as f:
            f.write(to_print)
    return ''
Exemplo n.º 2
0
def changeFamilyLabel(exp_id, annotations_id, label, family):
    annotations_db_tools.change_family_label(session, annotations_id, label,
                                             family)
    session.commit()
    if user_exp:
        exp = update_curr_exp(exp_id)
        filename = path.join(exp.output_dir(), 'user_actions.log')
        file_exists = path.isfile(filename)
        mode = 'a' if file_exists else 'w'
        to_print = ','.join(
            map(str, [
                datetime.datetime.now(), 'change_family_label', family, label
            ]))
        with open(filename, mode) as f:
            f.write(to_print)
    return ''
Exemplo n.º 3
0
def getInstance(exp_id, view_id, instance_id):
    try:
        if view_id == 'None':
            view_id = None
        experiment = update_curr_exp(exp_id)
        dataset_id = experiment.exp_conf.dataset_conf.dataset_id
        ident, user_id = idents_tools.get_ident(session, dataset_id,
                                                instance_id)
        project = experiment.exp_conf.dataset_conf.project
        module = importlib.import_module('secuml.web.views.projects.%s' %
                                         project)
        return module.get_instance(experiment, view_id, user_id, ident)
    except ImportError as e:
        app.logger.error(str(e))
        app.logger.error('Please create the project file "%s.py" in '
                         'secuml/web/views/projects/' % project)
        return 'Unable to display the instance', ident
Exemplo n.º 4
0
def activeLearningMonitoring(exp_id, iteration, kind, sub_kind):
    experiment = update_curr_exp(exp_id)
    directory = path.join(experiment.output_dir(), str(iteration))
    if kind == 'labels':
        filename = path.join(directory, 'labels_monitoring',
                             'iteration' + '_' + sub_kind + '.png')
    if kind == 'families':
        filename = path.join(directory, 'labels_monitoring',
                             'families_monitoring.png')
    if kind == 'clustering':
        filename = path.join(directory, 'clustering_evaluation',
                             sub_kind + '_monitoring.png')
    if kind == 'time':
        filename = path.join(directory, 'execution_times.png')
    try:
        return send_file(filename, mimetype='image/png')
    except FileNotFoundError:
        return 'FileNotFoundError'
Exemplo n.º 5
0
def getFeaturesInfo(exp_id):
    exp = update_curr_exp(exp_id)
    features_types_f = path.join(exp.output_dir(), 'features_types.json')
    with open(features_types_f, 'r') as f:
        features_types = json.load(f)
        types_descriptions = {}
        for feature_id, feature_type in features_types.items():
            query = session.query(FeaturesAlchemy)
            query = query.filter(FeaturesAlchemy.id == feature_id)
            res = query.one()
            user_id, name, description = res.user_id, res.name, res.description
            types_descriptions[feature_id] = {
                'type': feature_type,
                'user_id': user_id,
                'name': name,
                'description': description
            }
    return jsonify(types_descriptions)
Exemplo n.º 6
0
def updateAnnotation(exp_id, annotations_id, iter_num, instance_id, label,
                     family, method):
    iter_num = None if iter_num == 'None' else int(iter_num)
    annotations_db_tools.update_annotation(session, annotations_id,
                                           instance_id, label, family,
                                           iter_num, method)
    session.commit()
    if user_exp:
        exp = update_curr_exp(exp_id)
        filename = path.join(exp.output_dir(), 'user_actions.log')
        file_exists = path.isfile(filename)
        mode = 'a' if file_exists else 'w'
        to_print = ','.join(map(str, [datetime.datetime.now(),
                                      'update_annotation',
                                      iter_num, instance_id, label, family,
                                      method]))
        with open(filename, mode) as f:
            f.write(to_print)
    return ''
Exemplo n.º 7
0
def getHexBin(exp_id, x, y):
    experiment = update_curr_exp(exp_id)
    directory = experiment.output_dir()
    filename = '_'.join(['c', x, y, 'hexbin.json'])
    with open(path.join(directory, filename), 'r') as f:
        hex_bins = json.load(f)
        for hex_bin in hex_bins[1:]:
            if hex_bin['num_malicious_instances'] > NUM_MAX_INSTANCES:
                hex_bin['malicious_instances'] = random.sample(
                    hex_bin['malicious_instances'], NUM_MAX_INSTANCES)
            if hex_bin['num_ok_instances'] > NUM_MAX_INSTANCES:
                hex_bin['ok_instances'] = random.sample(
                    hex_bin['ok_instances'], NUM_MAX_INSTANCES)
            for kind in ['malicious', 'ok']:
                ids = hex_bin['%s_instances' % kind]
                ids.sort()
                hex_bin['%s_user_ids' % kind] = get_user_instance_ids(ids)
                hex_bin['%s_instances' % kind] = ids
    return jsonify(hex_bins)
Exemplo n.º 8
0
def getPredictions(exp_id, index, label):
    exp = update_curr_exp(exp_id)
    filename = path.join(exp.output_dir(), 'predictions.csv')
    index = int(index)
    min_value = index * 0.1
    max_value = (index + 1) * 0.1
    with open(filename, 'r') as f:
        data = pd.read_csv(f, header=0, index_col=0)
        selection = data.loc[:, 'predicted_proba'] >= min_value
        data = data.loc[selection, :]
        selection = data.loc[:, 'predicted_proba'] <= max_value
        data = data.loc[selection, :]
        if label != 'all':
            if label == 'malicious':
                selection = data.loc[:, 'ground_truth'] == True
            elif label == 'benign':
                selection = data.loc[:, 'ground_truth'] == False
            data = data.loc[selection, :]
        selected_instances = [int(x) for x in list(data.index.values)]
        proba = list(data['predicted_proba'])
    return jsonify({'instances': selected_instances, 'proba': proba})
Exemplo n.º 9
0
def getSortedFeatures(exp_id, criterion):
    exp = update_curr_exp(exp_id)
    scoring_filename = path.join(exp.output_dir(), 'scores.csv')
    scores = pd.read_csv(scoring_filename, header=0, index_col=0)
    pvalues = None
    if criterion == 'alphabet':
        features = scores.index.values.tolist()
        features.sort()
        values = None
        user_ids = get_feature_user_ids(session, features)
        return jsonify({
            'features': features,
            'values': None,
            'pvalues': None,
            'user_ids': user_ids
        })
    if criterion == 'null_variance':
        selection = scores.loc[:, 'variance'] == 0
        scores = scores.loc[selection, :]
        criterion = 'variance'
    else:
        sort_data_frame(scores, criterion, False, True)
    features = scores.index.values.tolist()
    values = scores[criterion].tolist()
    values = ['%.2f' % v for v in values]
    pvalues_col = '_'.join([criterion, 'pvalues'])
    if pvalues_col in scores.columns:
        pvalues = scores[pvalues_col].tolist()
        pvalues = ['%.2E' % Decimal(v) for v in pvalues]
    user_ids = get_feature_user_ids(session, features)
    return jsonify({
        'features': features,
        'values': values,
        'pvalues': pvalues,
        'user_ids': user_ids
    })
Exemplo n.º 10
0
def getTopWeightedFeatures(exp_id, instance_id, size):
    instance_id = int(instance_id)
    classifier = get_classifier(exp_id)
    # get the features
    exp = update_curr_exp(exp_id)
    f_names, f_values = FeaturesFromExp.get_instance(exp, instance_id)
    # scale the features
    scaled_values = classifier.named_steps['scaler'].transform(np.reshape(
                                                    f_values, (1, -1)))
    weighted_values = np.multiply(scaled_values,
                                  classifier.named_steps['model'].coef_)
    features = list(map(lambda name, value, w_value: (name, value, w_value),
                        f_names, f_values, weighted_values[0]))
    features.sort(key=lambda tup: abs(tup[2]))
    features = features[:-int(size) - 1:-1]
    f_names, f_values, f_weighted = list(zip(*features))
    labels = [str(name) for name in f_names]
    tooltips = ['%s (%.2f)' % (name, f_values[i])
                for i, name in enumerate(f_names)]
    barplot = BarPlot(labels)
    dataset = PlotDataset(f_weighted, None)
    dataset.set_color(red)
    barplot.add_dataset(dataset)
    return jsonify(barplot.to_json(tooltip_data=tooltips))
Exemplo n.º 11
0
def getFeatureScores(exp_id, feature):
    exp = update_curr_exp(exp_id)
    return send_file(path.join(exp.output_dir(), feature, 'scores.json'))
Exemplo n.º 12
0
def getReconsErrors(exp_id):
    experiment = update_curr_exp(exp_id)
    directory = experiment.output_dir()
    filename = 'reconstruction_errors.csv'
    return send_file(path.join(directory, filename))
Exemplo n.º 13
0
def getCumExplVar(exp_id):
    experiment = update_curr_exp(exp_id)
    directory = experiment.output_dir()
    filename = 'cumuled_explained_variance.csv'
    return send_file(path.join(directory, filename))
Exemplo n.º 14
0
def getProjectionMatrix(exp_id):
    experiment = update_curr_exp(exp_id)
    directory = experiment.output_dir()
    filename = 'projection_matrix.csv'
    return send_file(path.join(directory, filename))
Exemplo n.º 15
0
def getFamiliesInstancesToAnnotate(exp_id, iter, label):
    experiment = update_curr_exp(exp_id)
    filename = path.join(experiment.output_dir(), str(iter),
                         'toannotate_%s.json' % label)
    return send_file(filename)
Exemplo n.º 16
0
def getHexBin(exp_id, x, y):
    experiment = update_curr_exp(exp_id)
    directory = experiment.output_dir()
    filename = '_'.join(['c', x, y, 'hexbin.json'])
    return send_file(path.join(directory, filename))
Exemplo n.º 17
0
def getFeatures(exp_id, instance_id):
    instance_id = int(instance_id)
    experiment = update_curr_exp(exp_id)
    f_names, f_values = FeaturesFromExp.get_instance(experiment, instance_id)
    return jsonify({f_names[i]: v for i, v in enumerate(f_values)})
Exemplo n.º 18
0
def rcdAnnotations(exp_id, iteration):
    experiment = update_curr_exp(exp_id)
    return render_template('active_learning/rcd_annotations.html',
                           project=experiment.exp_conf.dataset_conf.project)
Exemplo n.º 19
0
def displayAlerts(exp_id, analysis_type):
    experiment = update_curr_exp(exp_id)
    return render_template('diadem/alerts.html',
                           project=experiment.exp_conf.dataset_conf.project)
Exemplo n.º 20
0
def get_classifier(exp_id):
    train_exp_id = get_train_exp(exp_id)
    train_exp = update_curr_exp(train_exp_id)
    return joblib.load(path.join(train_exp.output_dir(), 'model.out'))
Exemplo n.º 21
0
def getClusterLabelsFamilies(exp_id, cluster_id):
    cluster_id = int(cluster_id)
    experiment = update_curr_exp(exp_id)
    clustering = ClustersExp.from_json(experiment.output_dir())
    return jsonify(clustering.get_labels_families(experiment, cluster_id))
Exemplo n.º 22
0
def getClusterLabel(exp_id, selected_cluster):
    selected_cluster = int(selected_cluster)
    experiment = update_curr_exp(exp_id)
    clustering = ClustersExp.from_json(experiment.output_dir())
    predicted_label = clustering.get_label(selected_cluster)
    return predicted_label
Exemplo n.º 23
0
def activeLearningModelsMonitoring(exp_id, iter, train_test):
    experiment = update_curr_exp(exp_id)
    directory = path.join(experiment.output_dir(), str(iter), 'model_perf')
    filename = '%s.png' % train_test
    return send_file(path.join(directory, filename), mimetype='image/png')
Exemplo n.º 24
0
def currentAnnotationIteration(exp_id):
    exp = update_curr_exp(exp_id)
    return str(exp.get_current_iter())
Exemplo n.º 25
0
def predictionsAnalysis(train_exp_id, index):
    exp = update_curr_exp(train_exp_id)
    return render_template('diadem/predictions.html',
                           project=exp.exp_conf.dataset_conf.project)
Exemplo n.º 26
0
def getCriterionDensity(exp_id, criterion):
    exp = update_curr_exp(exp_id)
    return send_file(path.join(exp.output_dir(), '%s_density.png' % criterion))
Exemplo n.º 27
0
def displayErrors(exp_id, error_kind):
    experiment = update_curr_exp(exp_id)
    return render_template('diadem/errors.html',
                           project=experiment.exp_conf.dataset_conf.project)
Exemplo n.º 28
0
def getFamiliesInstancesToAnnotate(exp_id, iteration, predicted_label):
    experiment = update_curr_exp(exp_id)
    filename = path.join(experiment.output_dir(), str(iteration),
                         'toannotate_' + predicted_label + '.json')
    return send_file(filename)