def initFromFile(self, labels_filename): filename = dir_tools.getDatasetDirectory(self.project, self.dataset) filename += 'labels/' + labels_filename if not dir_tools.checkFileExists(filename): raise ValueError('The labels file %s does not exist.' % filename) ## Check whether the file contains families families = False with open(filename, 'r') as f: header = f.readline() fields = header.split(',') if len(fields) == 3: families = True query = 'LOAD DATA LOCAL INFILE \'' + filename + '\' ' query += 'INTO TABLE ' + 'Labels' + ' ' query += 'FIELDS TERMINATED BY \',\' ' query += 'IGNORE 1 LINES ' if families: query += '(instance_id, label, family) ' else: query += '(instance_id, label) ' query += 'SET experiment_label_id = ' + str( self.experiment_label_id) + ', ' if not families: query += 'family = "other",' query += 'iteration = 0, ' query += 'method = "init", ' query += 'annotation = "0"' query += ';' self.cursor.execute(query) self.db.commit() self.checkLabelsValidity()
def runNextIteration(experiment_id, iteration_number): res = str(celeryRunNextIteration.s().apply_async()) if user_exp: experiment = updateCurrentExperiment(experiment_id) filename = experiment.getOutputDirectory() filename += 'user_actions.log' file_exists = dir_tools.checkFileExists(filename) mode = 'a' if file_exists else 'w' to_print = [datetime.datetime.now(), 'nextIteration', iteration_number] to_print = map(str, to_print) to_print = ','.join(to_print) with open(filename, mode) as f: print >> f, to_print return res
def loadTrueLabels(self): labels_file = dir_tools.getDatasetDirectory(self.project, self.dataset) labels_file += 'labels/true_labels.csv' # Loads the true labels in the table TrueLabels if the file exists # Otherwise the table TrueLabels is not created if not dir_tools.checkFileExists(labels_file): print >> sys.stderr, 'No ground truth labels for this dataset' return exp = Experiment(self.project, self.dataset, self.db, self.cursor, experiment_name='true_labels') exp.initLabels('true_labels.csv')
def currentAnnotations(experiment_id, iteration): experiment = updateCurrentExperiment(experiment_id) page = render_template('ActiveLearning/current_annotations.html', project=experiment.project) if user_exp: filename = experiment.getOutputDirectory() filename += 'user_actions.log' file_exists = dir_tools.checkFileExists(filename) mode = 'a' if file_exists else 'w' to_print = [datetime.datetime.now(), 'displayAnnotatedInstances'] to_print = map(str, to_print) to_print = ','.join(to_print) with open(filename, mode) as f: print >> f, to_print return page
def removeLabel(experiment_id, inst_experiment_label_id, iteration_number, instance_id): labels_tools.removeLabel(session, inst_experiment_label_id, instance_id) if user_exp: experiment = updateCurrentExperiment(experiment_id) filename = experiment.getOutputDirectory() filename += 'user_actions.log' file_exists = dir_tools.checkFileExists(filename) mode = 'a' if file_exists else 'w' to_print = [datetime.datetime.now(), 'removeLabel', instance_id] to_print = map(str, to_print) to_print = ','.join(to_print) with open(filename, mode) as f: print >> f, to_print return ''
def runNextIteration(project, dataset, experiment_id, iteration_number): res = str(celeryRunNextIteration.s().apply_async()) if user_exp: experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) filename += 'user_actions.log' file_exists = dir_tools.checkFileExists(filename) mode = 'a' if file_exists else 'w' to_print = [datetime.datetime.now(), 'nextIteration', iteration_number] to_print = map(str, to_print) to_print = ','.join(to_print) with open(filename, mode) as f: print >> f, to_print return res
def currentAnnotations(project, dataset, experiment_id, iteration): page = render_template('ActiveLearning/current_annotations.html', project=project) if user_exp: experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) filename += 'user_actions.log' file_exists = dir_tools.checkFileExists(filename) mode = 'a' if file_exists else 'w' to_print = [datetime.datetime.now(), 'displayAnnotatedInstances'] to_print = map(str, to_print) to_print = ','.join(to_print) with open(filename, mode) as f: print >> f, to_print return page
def changeFamilyLabel(experiment_id, label, family): experiment = updateCurrentExperiment(experiment_id) experiment_label_id = experiment.labels_id labels_tools.changeFamilyLabel(session, label, family, experiment_label_id) if user_exp: filename = experiment.getOutputDirectory() filename += 'user_actions.log' file_exists = dir_tools.checkFileExists(filename) mode = 'a' if file_exists else 'w' to_print = [ datetime.datetime.now(), 'changeFamilyLabel', family, label ] to_print = map(str, to_print) to_print = ','.join(to_print) with open(filename, mode) as f: print >> f, to_print return ''
def mergeFamilies(experiment_id, label, families, new_family_name): experiment = updateCurrentExperiment(experiment_id) experiment_label_id = experiment.labels_id families = families.split(',') labels_tools.mergeFamilies(session, label, families, new_family_name, experiment_label_id) if user_exp: filename = experiment.getOutputDirectory() filename += 'user_actions.log' file_exists = dir_tools.checkFileExists(filename) mode = 'a' if file_exists else 'w' to_print = [datetime.datetime.now(), 'mergeFamilies', new_family_name] to_print += map(str, families) to_print = map(str, to_print) to_print = ','.join(to_print) with open(filename, mode) as f: print >> f, to_print return ''
def changeFamilyLabel(project, dataset, experiment_id, experiment_label_id, label, family): mysql_tools.useDatabase(cursor, project, dataset) labels_tools.changeFamilyLabel(cursor, label, family, experiment_label_id) if user_exp: experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) filename += 'user_actions.log' file_exists = dir_tools.checkFileExists(filename) mode = 'a' if file_exists else 'w' to_print = [ datetime.datetime.now(), 'changeFamilyLabel', family, label ] to_print = map(str, to_print) to_print = ','.join(to_print) with open(filename, mode) as f: print >> f, to_print return ''
def mergeFamilies(project, dataset, experiment_id, experiment_label_id, label, families, new_family_name): families = families.split(',') mysql_tools.useDatabase(cursor, project, dataset) labels_tools.mergeFamilies(cursor, label, families, new_family_name, experiment_label_id) if user_exp: experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) filename += 'user_actions.log' file_exists = dir_tools.checkFileExists(filename) mode = 'a' if file_exists else 'w' to_print = [datetime.datetime.now(), 'mergeFamilies', new_family_name] to_print += map(str, families) to_print = map(str, to_print) to_print = ','.join(to_print) with open(filename, mode) as f: print >> f, to_print return ''
def addLabel(experiment_id, inst_experiment_label_id, iteration_number, instance_id, label, family, method, annotation): annotation = annotation == 'true' labels_tools.addLabel(session, inst_experiment_label_id, instance_id, label, family, iteration_number, method, annotation) if user_exp: experiment = updateCurrentExperiment(experiment_id) filename = experiment.getOutputDirectory() filename += 'user_actions.log' file_exists = dir_tools.checkFileExists(filename) mode = 'a' if file_exists else 'w' to_print = [ datetime.datetime.now(), 'addLabel', iteration_number, instance_id, label, family, method, annotation ] to_print = map(str, to_print) to_print = ','.join(to_print) with open(filename, mode) as f: print >> f, to_print return ''
def removeLabel(project, dataset, experiment_id, inst_dataset, inst_experiment_label_id, iteration_number, instance_id): mysql_tools.useDatabase(cursor, project, inst_dataset) labels_tools.removeLabel(cursor, inst_experiment_label_id, instance_id) db.commit() if user_exp: experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) filename += 'user_actions.log' file_exists = dir_tools.checkFileExists(filename) mode = 'a' if file_exists else 'w' to_print = [ datetime.datetime.now(), 'removeLabel', project, dataset, instance_id ] to_print = map(str, to_print) to_print = ','.join(to_print) with open(filename, mode) as f: print >> f, to_print return ''
def loadTrueLabels(self): labels_file = dir_tools.getDatasetDirectory(self.project, self.dataset) labels_file += 'labels/true_labels.csv' # Loads the true labels in the table TrueLabels if the file exists # Otherwise the table TrueLabels is not created if not dir_tools.checkFileExists(labels_file): print >> sys.stderr, 'No ground truth labels for this dataset' return ## Check whether the file contains families families = False with open(labels_file, 'r') as f: header = f.readline() fields = header.split(',') if len(fields) == 3: families = True db, cursor = db_tools.getRawConnection() if db_tools.isMysql(): query = 'CREATE TEMPORARY TABLE true_labels_import(' query += 'user_instance_id integer PRIMARY KEY, ' query += 'label varchar(200), ' query += 'family varchar(200) DEFAULT \'other\', ' query += 'dataset_id integer DEFAULT ' + str( self.dataset_id) + ', ' query += 'id integer DEFAULT NULL' query += ');' cursor.execute(query) query = 'LOAD DATA LOCAL INFILE \'' + labels_file + '\' ' query += 'INTO TABLE ' + 'true_labels_import' + ' ' query += 'FIELDS TERMINATED BY \',\' ' query += 'IGNORE 1 LINES ' if families: query += '(user_instance_id, label, family) ' else: query += '(user_instance_id, label) ' query += ';' cursor.execute(query) query = 'UPDATE true_labels_import t ' query += 'JOIN instances i ' query += 'ON i.user_instance_id = t.user_instance_id ' query += 'AND i.dataset_id = t.dataset_id ' query += 'SET t.id = i.id;' cursor.execute(query) query = 'INSERT INTO true_labels(instance_id, dataset_id, label, family) ' query += 'SELECT t.id, t.dataset_id, t.label, t.family ' query += 'FROM true_labels_import AS t;' cursor.execute(query) elif db_tools.isPostgresql(): query = 'CREATE TEMPORARY TABLE true_labels_import(' query += 'user_instance_id integer PRIMARY KEY, ' query += 'label true_labels_enum, ' query += 'family varchar(200) DEFAULT \'other\', ' query += 'dataset_id integer DEFAULT ' + str( self.dataset_id) + ', ' query += 'id integer DEFAULT NULL' query += ');' cursor.execute(query) with open(labels_file, 'r') as f: if families: query = 'COPY true_labels_import(user_instance_id,label,family) ' else: query = 'COPY true_labels_import(user_instance_id,label) ' query += 'FROM STDIN ' query += 'WITH CSV HEADER DELIMITER AS \',\' ;' cursor.copy_expert(sql=query, file=f) query = 'UPDATE true_labels_import AS t ' query += 'SET id = i.id ' query += 'FROM instances AS i ' query += 'WHERE i.user_instance_id = t.user_instance_id ' query += 'AND i.dataset_id = t.dataset_id;' cursor.execute(query) query = 'INSERT INTO true_labels(instance_id, dataset_id, label, family) ' query += 'SELECT t.id, t.dataset_id, t.label, t.family ' query += 'FROM true_labels_import AS t;' cursor.execute(query) db_tools.closeRawConnection(db, cursor)
def initFromFile(self, labels_filename): if labels_filename is None: labels_type = 'none' elif labels_filename == 'true_labels.csv': labels_type = 'true_labels' else: labels_type = 'partial_labels' exp_labels = db_tables.ExperimentsLabelsAlchemy( experiment_id=self.experiment_id, labels_type=labels_type) self.session.add(exp_labels) self.session.commit() self.labels_id = exp_labels.labels_id self.labels_type = labels_type if labels_type == 'partial_labels': filename = dir_tools.getDatasetDirectory(self.project, self.dataset) filename += 'labels/' + labels_filename if not dir_tools.checkFileExists(filename): raise ValueError('The labels file %s does not exist.' % filename) ## Check whether the file contains families families = False with open(filename, 'r') as f: header = f.readline() fields = header.split(',') if len(fields) == 3: families = True db, cursor = db_tools.getRawConnection() if db_tools.isMysql(): query = 'CREATE TEMPORARY TABLE labels_import(' query += 'instance_id integer, ' query += 'labels_id integer DEFAULT ' + str( self.labels_id) + ', ' query += 'user_instance_id integer, ' query += 'label varchar(200), ' query += 'family varchar(200) DEFAULT \'other\', ' query += 'iteration integer DEFAULT 0, ' query += 'method varchar(200) DEFAULT \'init\', ' query += 'annotation boolean DEFAULT True' query += ');' cursor.execute(query) query = 'LOAD DATA LOCAL INFILE \'' + filename + '\' ' query += 'INTO TABLE ' + 'labels_import' + ' ' query += 'FIELDS TERMINATED BY \',\' ' query += 'IGNORE 1 LINES ' if families: query += '(user_instance_id, label, family) ' else: query += '(user_instance_id, label) ' query += ';' cursor.execute(query) query = 'UPDATE labels_import l ' query += 'JOIN instances i ' query += 'ON i.user_instance_id = l.user_instance_id ' query += 'AND i.dataset_id = ' + str(self.dataset_id) + ' ' query += 'SET l.instance_id = i.id;' cursor.execute(query) query = 'INSERT INTO labels(instance_id,labels_id,label,family,iteration,method,annotation) ' query += 'SELECT instance_id,labels_id,label,family,iteration,method,annotation ' query += 'FROM labels_import;' cursor.execute(query) elif db_tools.isPostgresql(): query = 'CREATE TEMPORARY TABLE labels_import(' query += 'instance_id integer, ' query += 'labels_id integer DEFAULT ' + str( self.labels_id) + ', ' query += 'user_instance_id integer, ' query += 'label labels_enum, ' query += 'family varchar(200) DEFAULT \'other\', ' query += 'iteration integer DEFAULT 0, ' query += 'method varchar(200) DEFAULT \'init\', ' query += 'annotation boolean DEFAULT True' query += ');' cursor.execute(query) with open(filename, 'r') as f: if families: query = 'COPY labels_import(user_instance_id,label,family) ' else: query = 'COPY labels_import(user_instance_id,label) ' query += 'FROM STDIN ' query += 'WITH CSV HEADER DELIMITER AS \',\' ;' cursor.copy_expert(sql=query, file=f) query = 'UPDATE labels_import AS l ' query += 'SET instance_id = i.id ' query += 'FROM instances AS i ' query += 'WHERE i.user_instance_id = l.user_instance_id ' query += 'AND i.dataset_id = ' + str(self.dataset_id) + ';' cursor.execute(query) query = 'INSERT INTO labels(instance_id,labels_id,label,family,iteration,method,annotation) ' query += 'SELECT instance_id,labels_id,label,family,iteration,method,annotation ' query += 'FROM labels_import;' cursor.execute(query) db_tools.closeRawConnection(db, cursor) self.session.commit()