def test_project_data(db, test_project): ''' Creates the test project and adds test data to it. ''' test_data = read_test_data_backend( file='./core/data/test_files/test_no_labels.csv') add_data(test_project, test_data) return test_project
def test_project_all_irr_3_coders_data(db, test_project_all_irr_3_coders): ''' Creates the test project with 100% irr and adds test data to it. ''' test_data = read_test_data_backend( file='./core/data/test_files/test_no_labels.csv') add_data(test_project_all_irr_3_coders, test_data) return test_project_all_irr_3_coders
def test_project_all_irr_data(db, test_profile): ''' Creates the test project with 100% irr and adds test data to it. ''' project = create_project('test_project', test_profile, 100, 2) test_data = read_test_data_backend( file='./core/data/test_files/test_no_labels.csv') add_data(project, test_data) return project
def test_project_labeled(test_project): """A project that has labeled data.""" for label in SEED_LABELS: Label.objects.create(name=label, project=test_project) test_data = read_test_data_backend( file="./core/data/test_files/test_some_labels.csv" ) add_data(test_project, test_data) return test_project
def test_project_gnb_data_tfidf(db, test_profile, tmpdir, settings): """This fixture only creates the test project without any data.""" proj = create_project("test_project", test_profile, classifier="gnb") test_data = read_test_data_backend(file="./core/data/test_files/test_no_labels.csv") add_data(proj, test_data) Data.objects.filter(project=proj) matrix = create_tfidf_matrix(proj.pk)[0] data_temp = tmpdir.mkdir("data").mkdir("tf_idf") settings.TF_IDF_PATH = str(data_temp) save_tfidf_matrix(matrix, proj.pk) return proj
def test_fill_multiple_projects(db, test_queue, test_profile): project_data_count = test_queue.project.data_set.count() test_queue.length = project_data_count + 1 test_queue.save() test_project2 = create_project("test_project2", test_profile) project2_data = read_test_data_backend( file="./core/data/test_files/test_no_labels.csv" ) add_data(test_project2, project2_data) fill_queue(test_queue, orderby="random") # Ensure the queue didn't fill any data from the other project assert test_queue.data.count() == project_data_count assert all((d.project == test_queue.project for d in test_queue.data.all()))
def test_project_svm_data_tfidf(db, test_profile, tmpdir, settings): ''' This fixture only creates the test project without any data. ''' proj = create_project('test_project', test_profile, classifier="svm") test_data = read_test_data_backend( file='./core/data/test_files/test_no_labels.csv') add_data(proj, test_data) Data.objects.filter(project=proj) matrix = create_tfidf_matrix(proj.pk)[0] data_temp = tmpdir.mkdir('data').mkdir('tf_idf') settings.TF_IDF_PATH = str(data_temp) save_tfidf_matrix(matrix, proj.pk) return proj
def test_add_data_no_labels(db, test_project): test_data = read_test_data_backend( file='./core/data/test_files/test_no_labels.csv') df = add_data(test_project, test_data) for i, row in df.iterrows(): assert_obj_exists( Data, { 'upload_id_hash': row['id_hash'], 'hash': row['hash'], 'project': test_project })
def test_init_redis_multiple_projects(db, test_project_data, test_redis, test_profile): # Try a mix of multiple queues in multiple projects with # and without data to see if everything initializes as expected. p1_queue1 = add_queue(test_project_data, 10) fill_queue(p1_queue1, orderby="random") add_queue(test_project_data, 10) project2 = create_project("test_project2", test_profile) project2_data = read_test_data_backend( file="./core/data/test_files/test_no_labels.csv") add_data(project2, project2_data) p2_queue1 = add_queue(project2, 10) fill_queue(p2_queue1, orderby="random") add_queue(project2, 10) test_redis.flushdb() init_redis() assert_redis_matches_db(test_redis)
def test_add_data_no_labels(db, test_project): test_data = read_test_data_backend( file="./core/data/test_files/test_no_labels.csv") df = add_data(test_project, test_data) for i, row in df.iterrows(): assert_obj_exists( Data, { "upload_id_hash": row["id_hash"], "hash": row["hash"], "project": test_project, }, )
def seed_project(creator, name, description, data_file, label_list, perm_list, classifier): project = Project.objects.create(name=name, description=description, creator=creator, classifier=classifier) TrainingSet.objects.create(project=project, set_number=0) labels = [] for name in label_list: labels.append(Label.objects.create(name=name, project=project)) permissions = [] for perm in perm_list: permissions.append( ProjectPermissions.objects.create(profile=perm, project=project, permission="CODER")) batch_size = 10 * len(labels) project.batch_size = batch_size project.save() num_coders = len(permissions) + 1 q_length = find_queue_length(batch_size, num_coders) queue = add_queue(project=project, length=q_length, type="normal") # Data f_data = read_test_data_backend(file=data_file) data_length = len(f_data) add_queue(project=project, length=data_length, type="admin") irr_queue = add_queue(project=project, length=2000000, type="irr") new_df = add_data(project, f_data) fill_queue(queue, irr_queue=irr_queue, orderby="random", batch_size=batch_size) save_data_file(new_df, project.pk) tasks.send_tfidf_creation_task.apply(args=[project.pk]) tasks.send_check_and_trigger_model_task.apply(args=[project.pk]) return project
def test_add_data_with_labels(db, test_project_labels): test_data = read_test_data_backend( file='./core/data/test_files/test_some_labels.csv') df = add_data(test_project_labels, test_data) for i, row in df.iterrows(): assert_obj_exists( Data, { 'upload_id_hash': row['id_hash'], 'hash': row['hash'], 'project': test_project_labels }) if not pd.isnull(row['Label']): assert_obj_exists( DataLabel, { 'data__hash': row['hash'], 'profile': test_project_labels.creator, 'label__name': row['Label'] })
def test_add_data_with_labels(db, test_project_labels): test_data = read_test_data_backend( file="./core/data/test_files/test_some_labels.csv") df = add_data(test_project_labels, test_data) for i, row in df.iterrows(): assert_obj_exists( Data, { "upload_id_hash": row["id_hash"], "hash": row["hash"], "project": test_project_labels, }, ) if not pd.isnull(row["Label"]): assert_obj_exists( DataLabel, { "data__hash": row["hash"], "profile": test_project_labels.creator, "label__name": row["Label"], }, )
def test_project_data(db, test_project): """Creates the test project and adds test data to it.""" test_data = read_test_data_backend(file="./core/data/test_files/test_no_labels.csv") add_data(test_project, test_data) return test_project
def test_project_all_irr_data(db, test_profile): """Creates the test project with 100% irr and adds test data to it.""" project = create_project("test_project", test_profile, 100, 2) test_data = read_test_data_backend(file="./core/data/test_files/test_no_labels.csv") add_data(project, test_data) return project