def getTopWeightedFeatures(project, dataset, experiment, instance_dataset, inst_exp_id, instance_id, size): instance_id = int(instance_id) model_experiment_obj = ExperimentFactory.getFactory().fromJson( project, dataset, experiment, db, cursor) validation_experiment = ExperimentFactory.getFactory().fromJson( project, instance_dataset, inst_exp_id, db, cursor) #get the features features_names, features_values = validation_experiment.getFeatures( instance_id) features_values = [float(value) for value in features_values] #get the pipeline with scaler and logistic model pipeline = model_experiment_obj.getModelPipeline() #scale the features scaled_values = pipeline.named_steps['scaler'].transform( np.reshape(features_values, (1, -1))) weighted_values = np.multiply(scaled_values, pipeline.named_steps['model'].coef_) features = map(lambda name, value, w_value: (name, value, w_value), features_names, features_values, weighted_values[0]) features.sort(key=lambda tup: abs(tup[2])) features = features[:-int(size) - 1:-1] tooltips = [x[1] for x in features] barplot = BarPlot([x[0] for x in features]) barplot.addDataset([x[2] for x in features], colors_tools.red, None) barplot.addTooltips(tooltips) return jsonify(barplot.barplot)
def getAnnotationsTypes(project, dataset, experiment_id, iteration): experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) + str( iteration) + '/' filename += 'annotations_types.json' return send_file(filename)
def getConf(project, dataset, experiment_id): experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) conf = experiment.toJson() mysql_tools.useDatabase(cursor, project, dataset) conf['has_true_labels'] = labels_tools.hasTrueLabels(cursor) return jsonify(conf)
def removeExperimentDB(self): experiment_id, experiment_label_id = self.isInDB() if experiment_id is None: return self.experiment_id = experiment_id self.experiment_label_id = experiment_label_id ## Remove children experiments children = experiment_db_tools.getChildren(self.cursor, experiment_id) for child in children: child_exp = ExperimentFactory.getFactory().fromJson( self.project, self.dataset, child, self.db, self.cursor) child_exp.removeExperimentDB() if self.parent is None: labels_tools.removeExperimentLabels(self.cursor, experiment_label_id) self.cursor.execute( 'DELETE FROM Experiments \ WHERE name = %s \ AND kind = %s', ( self.experiment_name, self.kind, )) self.db.commit() experiment_dir = dir_tools.getExperimentOutputDirectory(self) dir_tools.removeDirectory(experiment_dir)
def activeLearningSuggestionsMonitoring(project, dataset, experiment_id, iteration): experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) + str(int(iteration) - 1) + '/' filename += 'suggestions_accuracy/' filename += 'labels_families' filename += '_high_confidence_suggestions.png' return send_file(filename)
def getClusterPredictedLabel(project, dataset, experiment_id, selected_cluster): selected_cluster = int(selected_cluster) experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id, db, cursor) clustering = Clustering.fromJson(experiment) predicted_label = clustering.getClusterLabel(selected_cluster) return predicted_label
def getClusterLabelsFamilies(project, dataset, experiment_id, selected_cluster): selected_cluster = int(selected_cluster) experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id, db, cursor) clustering = Clustering.fromJson(experiment) labels_families = clustering.getClusterLabelsFamilies(selected_cluster) return jsonify(labels_families)
def getFeatures(project, dataset, experiment, instance_dataset, instance_id): instance_id = int(instance_id) mysql_tools.useDatabase(cursor, project, dataset) experiment_obj = ExperimentFactory.getFactory().fromJson( project, instance_dataset, experiment, db, cursor) features_names, features_values = experiment_obj.getFeatures(instance_id) features = zip(features_names, features_values) return jsonify(features)
def getFamiliesInstancesToAnnotate(project, dataset, experiment_id, iteration, predicted_label): experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) + str( iteration) + '/' filename += 'toannotate_' + predicted_label + '.json' return send_file(filename)
def getValidationDataset(project, dataset, experiment_id): experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) test_conf = experiment.classification_conf.test_conf if test_conf.method == 'test_dataset': return test_conf.test_dataset else: return dataset
def getNumComponents(project, dataset, experiment_id): experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) directory = dir_tools.getExperimentOutputDirectory(experiment) filename = directory + 'projection_matrix.csv' with open(filename, 'r') as f: header = f.readline() num_components = len(header.split(',')) - 1 return str(num_components)
def getNumElements(project, dataset, experiment_id, selected_cluster): selected_cluster = int(selected_cluster) experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id, db, cursor) clustering = Clustering.fromJson(experiment) cluster = clustering.clusters[selected_cluster] res = {} res['num_elements'] = cluster.numInstances() return jsonify(res)
def removeClusterLabel(project, dataset, experiment_id, selected_cluster, num_results): selected_cluster = int(selected_cluster) num_results = int(num_results) experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id, db, cursor) clustering = Clustering.fromJson(experiment) clustering.removeClusterLabel(selected_cluster, num_results) db.commit() return ''
def getClustersLabels(project, dataset, experiment_id): experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id, db, cursor) clustering = Clustering.fromJson(experiment) # Do not consider empty clusters for visualization labels = [] for c in range(clustering.num_clusters): if clustering.clusters[c].numInstances() > 0: labels.append('c_' + str(c)) return jsonify({'labels': labels})
def getClustersLabels(project, dataset, experiment_id): experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) clustering = Clustering.fromJson(experiment) # Do not consider empty clusters for visualization clusters = [] for c in range(clustering.num_clusters): #if clustering.clusters[c].numInstances() > 0: clusters.append({'id': c, 'label': clustering.clusters[c].label}) return jsonify({'clusters': clusters})
def getStatsPlot(project, dataset, experiment_id, plot_type, feature): experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory( experiment) + feature + '/' if plot_type.find('histogram') >= 0: filename += plot_type + '.json' else: filename += plot_type + '.png' return send_file(filename)
def getClusterLabelFamilyIds(project, dataset, experiment_id, selected_cluster, label, family, num_results): selected_cluster = int(selected_cluster) num_results = int(num_results) experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id, db, cursor) clustering = Clustering.fromJson(experiment) ids = clustering.getClusterLabelFamilyIds(selected_cluster, label, family) res = web_tools.listResultWebFormat(ids, num_results) return jsonify(res)
def getInstancesToAnnotate(project, dataset, experiment_id, iteration, predicted_label): experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) + str( iteration) + '/' filename += 'toannotate_' + predicted_label + '.csv' df = pd.read_csv(filename) queries = list(df.instance_id) return jsonify({'instances': queries})
def getClusterInstancesVisu(project, dataset, experiment_id, selected_cluster, c_e_r, num_results): num_results = int(num_results) selected_cluster = int(selected_cluster) experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id, db, cursor) clustering = Clustering.fromJson(experiment) selected_cluster_ids = {} selected_cluster_ids[selected_cluster] = \ clustering.getClusterInstancesVisu( selected_cluster, num_results, random = True)[c_e_r] return jsonify(selected_cluster_ids)
def addClusterLabel(project, dataset, experiment_id, selected_cluster, num_results, label, family, label_iteration, label_method): selected_cluster = int(selected_cluster) num_results = int(num_results) experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id, db, cursor) clustering = Clustering.fromJson(experiment) clustering.addClusterLabel(selected_cluster, num_results, label, family, label_iteration, label_method) db.commit() return ''
def getIterationSupervisedExperiment(project, dataset, experiment_id, iteration): experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) active_learning = Iteration(experiment, int(iteration)) binary_multiclass = 'multiclass' if 'binary' in experiment.conf.models_conf.keys(): binary_multiclass = 'binary' models_exp_file = active_learning.output_directory + 'models_experiments.json' with open(models_exp_file, 'r') as f: models_exp = json.load(f) return str(models_exp[binary_multiclass])
def getTopModelCoefficients(project, dataset, experiment, size): size = int(size) model_experiment_obj = ExperimentFactory.getFactory().fromJson(project, dataset, experiment, db, cursor) pipeline = model_experiment_obj.getModelPipeline() model_coefficients = pipeline.named_steps['model'].coef_[0] features_names = model_experiment_obj.getFeaturesNames() coefficients = map(lambda name, coef: (name, coef), features_names, model_coefficients) coefficients.sort(key = lambda tup: abs(tup[1])) coefficients = coefficients[:-size-1:-1] barplot = BarPlot([x[0] for x in coefficients]) barplot.addDataset([x[1] for x in coefficients], '#d9534f', None) return jsonify(barplot.barplot)
def activeLearningModelsMonitoring(project, dataset, experiment_id, iteration, train_cv_validation): experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id, db, cursor) active_learning = Iteration(experiment, int(iteration)) binary_multiclass = 'multiclass' estimator = 'accuracy' if 'binary' in experiment.conf.models_conf.keys(): binary_multiclass = 'binary' estimator = 'auc' directory = active_learning.output_directory filename = directory filename += 'models_performance/' filename += binary_multiclass + '_' + train_cv_validation + '_' + estimator + '_monitoring.png' return send_file(filename, mimetype='image/png')
def getAlertsClusteringExperimentId(project, dataset, experiment_id): experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id, db, cursor) test_conf = experiment.classification_conf.test_conf if test_conf.method == 'random_split': test_dataset = dataset test_exp_id = experiment_id elif test_conf.method == 'test_dataset': test_dataset = test_conf.test_exp.dataset test_exp_id = test_conf.test_exp.experiment_id mysql_tools.useDatabase(cursor, project, test_dataset) clustering_experiment_id = AlertsMonitoring.AlertsMonitoring.getAlertsClusteringExperimentId( cursor, test_exp_id) return str(clustering_experiment_id)
def getTopModelFeatures(experiment_id, size): size = int(size) exp = ExperimentFactory.getFactory().fromJson(experiment_id, session) model_coefficients = exp.getTopFeatures() features_names = exp.getFeaturesNames() coefficients = map(lambda name, coef: (name, coef), features_names, model_coefficients) coefficients.sort(key=lambda tup: abs(tup[1])) coefficients = coefficients[:-size - 1:-1] barplot = BarPlot([x[0] for x in coefficients]) dataset = PlotDataset([x[1] for x in coefficients], None) if (exp.classification_conf.featureImportance() == 'weight'): dataset.setColor(colors_tools.red) barplot.addDataset(dataset) return jsonify(barplot.toJson())
def getAlerts(project, dataset, experiment_id, analysis_type): experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) filename += 'alerts.csv' with open(filename, 'r') as f: data = pd.read_csv(f, header = 0, index_col = 0) num_max_alerts = experiment.classification_conf.test_conf.alerts_conf.num_max_alerts alerts = list(data[['predicted_proba']].itertuples()) if num_max_alerts < len(alerts): if analysis_type == 'topN': alerts = alerts[:num_max_alerts] elif analysis_type == 'random': alerts = random.sample(alerts, num_max_alerts) return jsonify({'instances': [alert[0] for alert in alerts], 'proba': dict(alerts)})
def getPredictions(project, dataset, experiment_id, train_test, index): experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) filename += train_test + '/predictions.csv' index = int(index) min_value = index * 0.1 max_value = (index+1) * 0.1 with open(filename, 'r') as f: data = pd.read_csv(f, header = 0, index_col = 0) data = matrix_tools.extractRowsWithThresholds(data, min_value, max_value, 'predicted_proba') selected_instances = list(data.index.values) proba = list(data['predicted_proba']) return jsonify({'instances': selected_instances, 'proba': proba})
def runNextIteration(project, dataset, experiment_id, iteration_number): res = str(celeryRunNextIteration.s().apply_async()) if user_exp: experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) filename += 'user_actions.log' file_exists = dir_tools.checkFileExists(filename) mode = 'a' if file_exists else 'w' to_print = [datetime.datetime.now(), 'nextIteration', iteration_number] to_print = map(str, to_print) to_print = ','.join(to_print) with open(filename, mode) as f: print >> f, to_print return res
def activeLearningMonitoring(project, dataset, experiment_id, iteration, kind, sub_kind): experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id, db, cursor) active_learning = Iteration(experiment, int(iteration)) directory = active_learning.output_directory if kind == 'labels': filename = directory + 'labels_monitoring/' filename += 'iteration' + '_' + sub_kind + '.png' if kind == 'families': filename = directory + 'labels_monitoring/' + 'families_monitoring.png' if kind == 'clustering': filename = directory + 'clustering_evaluation/' filename += sub_kind + '_monitoring.png' if kind == 'time': filename = directory filename += 'execution_time_monitoring.png' return send_file(filename, mimetype='image/png')
def currentAnnotations(project, dataset, experiment_id, iteration): page = render_template('ActiveLearning/current_annotations.html', project=project) if user_exp: experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) filename += 'user_actions.log' file_exists = dir_tools.checkFileExists(filename) mode = 'a' if file_exists else 'w' to_print = [datetime.datetime.now(), 'displayAnnotatedInstances'] to_print = map(str, to_print) to_print = ','.join(to_print) with open(filename, mode) as f: print >> f, to_print return page