示例#1
0
def test_dep_parent1(config):
    #Test if parent doesn't have highlight, child does, child still have its highlight
    iaa_files_path=test_utils.make_test_directory(config, 'dep_parent1')
    outpath=test_utils.make_test_directory(config, 'out_dep_parent1')
    # source_task_id generated by smashing keyboard
    all_schema=[ [{"agreed_Answer": 2, "question_Number": 4, "namespace": 'Covid_Sources_2002_03_20v2.1','highlighted_indices': test_utils.make_highlight_indices(10,30)},{"agreed_Answer": 1, "question_Number": 3, "namespace": 'Covid_Sources_2002_03_20v2.1'},4],
                 [{"agreed_Answer": 1, "question_Number": 2, "namespace": 'Covid_Reasoning','highlighted_indices': test_utils.make_highlight_indices(10,30)},{"agreed_Answer": 1, "question_Number": 1, "namespace": 'Covid_Reasoning'},2]
                 ]

    for i in all_schema:
        print(i)
        iaa = IAA_task(out_folder=iaa_files_path, source_task_id="auhfdaiughfs")
        iaa.add_row(i[1])
        iaa.add_row(i[0])
        fin_path = iaa.export()
        data_path = config['data_dir']
        schema_path = data_path + '/schemas'
        dh_path = None #doesn't get used by dependency but is still an argument
        eval_dependency(dh_path, iaa_files_path, schema_path, outpath)
        for root, dir, files in os.walk(outpath):
            for file in files:
                #should be only 1 file for this case, so just run it on the only one
                # if there's more than 1 then you can get fancy
                out_df  = pd.read_csv(os.path.join(outpath, file), encoding='utf-8')
        #9 answer choices to a checklist question
        #This basically works for my first test, child should have parent's highlights if itself doesn't have any but its parent does, thx eric~.
        assert len(out_df) == 2
        q_three = out_df[out_df['question_Number']==i[2]]
        hl = q_three['highlighted_indices'].iloc[0]
        assert len(hl) >18
        assert '10' in hl
        assert '29' in hl
示例#2
0
def test_dep_sample(config):
    iaa_files_path = test_utils.make_test_directory(config, 'dep_sample')
    out_path = test_utils.make_test_directory(config, 'out_dep_sample')
    # source_task_id generated by smashing keyboard
    iaa = IAA_task(out_folder=iaa_files_path, source_task_id='kjncsa87nxao21899102j1j2')
    iaa.add_row({"agreed_Answer": 1, "question_Number": 1, "namespace": 'Covid_Probability',
                 'highlighted_indices': test_utils.make_highlight_indices(10,30)})
    iaa.add_row({"agreed_Answer": 3, "question_Number": 2, "namespace": 'Covid_Probability'})
    fin_path = iaa.export()
    data_path = config['data_dir']
    schema_path = data_path + '/schemas'
    dh_path = None #doesn't get used by dependency but is still an argument

    eval_dependency(dh_path, iaa_files_path, schema_path, out_path)

    for root, dir, files in os.walk(out_path):
        for file in files:
            #should be only 1 file for this case, so just run it on the only one
            # if there's more than 1 then you can get fancy
            out_df  = pd.read_csv(os.path.join(out_path, file), encoding='utf-8')

    #9 answer choices to a checklist question
    assert len(out_df) == 2
    q_three = out_df[out_df['question_Number']==2]
    hl = q_three['highlighted_indices'].iloc[0]
    assert len(hl) >18
    assert '10' in hl
    assert '29' in hl
def test_weighting_sample(config):
    out_path = test_utils.make_test_directory(config, 'weighting_sample_test')
    weight_out_folder = test_utils.make_test_directory(
        config, 'out_weighting_sample_test')
    weight_df = pd.read_csv(weight_path)
    iaa = dep_iaa(out_folder=out_path, source_task_id='weightsampletests')
    namespace = "Covid2_Evidence2020_09_20"
    #-.5 points--from the weight key in config folder and the agreement_score
    iaa.add_row({
        "namespace": namespace,
        "agreed_Answer": 2,
        "question_Number": 4,
        "agreement_score": 1
    })
    #-2 points from ./config/weight_key and agreement score
    iaa.add_row({
        "namespace": namespace,
        "agreed_Answer": 1,
        "question_Number": 8,
        "agreement_score": .5
    })
    # +1.5 points from ./config/weight_key and agreement score
    iaa.add_row({
        "namespace": namespace,
        "agreed_Answer": 1,
        "question_Number": 12,
        "agreement_score": .75
    })
    fin_path = iaa.export()
    #weighting will output the actual pandas dataframe instead of the directory
    #if you look into the Weighting.py file, you can see the paths to
    weighting_out = launch_Weighting(out_path, weight_out_folder)
    points = weighting_out['agreement_adjusted_points']
    weighting_out.to_csv(weight_out_folder + "/Point_recs_.csv",
                         encoding='utf-8')
    tot = points.sum()
    print(weighting_out)
    assert tot == -1
    assert len(weighting_out.index) == 3
    for index, row in weighting_out.iterrows():
        question_num = row['Question_Number']
        answer_num = row['Answer_Number']
        adjusted_points = row['agreement_adjusted_points']
        ag_score = row['agreement_score']
        nm = weight_df[weight_df['namespace'] == namespace]
        qa = nm[(nm['Question_Number'] == question_num)
                & (nm['Answer_Number'] == answer_num)]
        pr = qa['Point_Recommendation'].iloc[0]
        correct_weight = pr * ag_score
        assert adjusted_points == correct_weight, "Q" + str(
            question_num) + "A" + str(answer_num) + " points: " + str(
                adjusted_points) + ", weight_df: " + str(correct_weight)
def test_random_evidence(config):
    weight_df = pd.read_csv(weight_path)
    namespace = "Covid2_Evidence2020_09_20"
    weight_df = weight_df[weight_df['namespace'] == namespace]
    out_path = test_utils.make_test_directory(
        config, 'weighting_evidence_random_test')
    iaa = dep_iaa(out_folder=out_path, source_task_id='3random')
    sample_df = weight_df.sample(3)

    for index, row in sample_df.iterrows():
        question_num = row['Question_Number']
        answer_num = row['Answer_Number']
        iaa.add_row({
            "namespace": namespace,
            "agreed_Answer": answer_num,
            "question_Number": question_num,
            "agreement_score": 1
        })
    fin_path = iaa.export()
    weighting_out = launch_Weighting(out_path)
    assert weighting_out.shape[0] == 3
    for index, row in weighting_out.iterrows():
        question_num = row['Question_Number']
        answer_num = row['Answer_Number']
        adjusted_points = row['agreement_adjusted_points']
        ag_score = row['agreement_score']
        nm = weight_df[weight_df['namespace'] == namespace]
        qa = nm[(nm['Question_Number'] == question_num)
                & (nm['Answer_Number'] == answer_num)]
        pr = qa['Point_Recommendation'].iloc[0]
        correct_weight = pr * ag_score
        assert adjusted_points == correct_weight, "Q" + str(
            question_num) + "A" + str(answer_num) + " points: " + str(
                adjusted_points) + ", weight_df: " + str(correct_weight)
        print("evidence random row", index + 1, "checks out")
示例#5
0
def test_iaa_checklist_diff_agree(config, tmpdir):
    test_path = test_utils.make_test_directory(
        config, 'test_iaa_checklist_diff_agree')
    # source_task_id generated by smashing keyboard
    dh = datahunt(out_folder=test_path, source_task_id='apply_to_all')
    for i in range(9):
        for j in range(i + 1):
            dh.add_row({
                'answer_label': 'T1.Q2.A' + str(j),
                'namespace': 'Covid2_Reasoning_2020_09_20',
                'contributor_uuid': 'Daniel' + str(i)
            })
    fin_path = dh.export()

    data_path = config['data_dir']
    schema_path = data_path + '/schemas'

    iaa_out = calc_agreement_directory(test_path,
                                       schema_path,
                                       config['IAA_config_dir'],
                                       test_utils.texts_dir,
                                       outDirectory=tmpdir)
    print(iaa_out)
    for root, dir, files in os.walk(iaa_out):
        for file in files:
            # should be only 1 file for this case, so just run it on the only one
            # if there's more than 1 then you can get fancy
            out_df = pd.read_csv(os.path.join(iaa_out, file), encoding='utf-8')
            print(out_df['agreed_Answer'])
            temp = out_df['agreed_Answer'].tolist()
            assert int(temp[0]) == 1
            assert int(temp[2]) == 3
            assert out_df['agreement_score'][0] != out_df['agreement_score'][3]
            print("++++++++++++++")
            print(out_df['agreement_score'].tolist())
示例#6
0
def test_iaa_constructor(config, tmpdir):
    test_path = test_utils.make_test_directory(config, 'test_test_iaa_evi_q5')
    #source_task_id generated by smashing keyboard
    dh = datahunt(out_folder=test_path, source_task_id='owhdnoicaunhcio32ewda')
    dh.add_row({
        'answer_label': 'T1.Q2.A2',
        'namespace': 'Covid2_Reasoning_2020_09_20',
        'contributor_uuid': 'Michael'
    })
    dh.add_row({
        'answer_label': 'T1.Q2.A2',
        'namespace': 'Covid2_Reasoning_2020_09_20',
        'contributor_uuid': 'Dwight'
    })
    fin_path = dh.export()
    data_path = config['data_dir']
    schema_path = data_path + '/schemas'

    iaa_out = calc_agreement_directory(test_path,
                                       schema_path,
                                       config['IAA_config_dir'],
                                       test_utils.texts_dir,
                                       outDirectory=tmpdir)
    print(iaa_out)
    for root, dir, files in os.walk(iaa_out):
        for file in files:
            #should be only 1 file for this case, so just run it on the only one
            # if there's more than 1 then you can get fancy
            out_df = pd.read_csv(os.path.join(iaa_out, file), encoding='utf-8')
    #9 answer choices to a checklist question
    assert len(out_df) == 6
示例#7
0
def test_dep_parent(config):
    #Test if parent has highlight, children don't, dep_iaa should have parent's highlight
    iaa_files_path=test_utils.make_test_directory(config, 'dep_parent')
    outpath=test_utils.make_test_directory(config, 'out_dep_parent')
    # source_task_id generated by smashing keyboard
    all_schema=[ [{"agreed_Answer": 1, "question_Number": 1, "namespace": 'Covid_Probability','highlighted_indices': test_utils.make_highlight_indices(10,30)},{"agreed_Answer": 3, "question_Number": 2, "namespace": 'Covid_Probability'},2],
                 [{"agreed_Answer": 2, "question_Number": 1, "namespace": 'Covid_Languagev1.1','highlighted_indices': test_utils.make_highlight_indices(10,30)},{"agreed_Answer": 3, "question_Number": 3, "namespace": 'Covid_Languagev1.1'},3],
                 [{"agreed_Answer": 4, "question_Number": 15, "namespace": 'Covid_Holisticv1.2','highlighted_indices': test_utils.make_highlight_indices(10, 30)},{"agreed_Answer": 1, "question_Number": 16, "namespace": 'Covid_Holisticv1.2'},16],
                 [{"agreed_Answer": 1, "question_Number": 1, "namespace": 'Covid_Evidence2020_03_21',
                   'highlighted_indices': test_utils.make_highlight_indices(10, 30)},
                  {"agreed_Answer": 1, "question_Number": 2, "namespace": 'Covid_Evidence2020_03_21',
                   'highlighted_indices': test_utils.make_highlight_indices(10, 30)},
                  {"agreed_Answer": 3, "question_Number": 4, "namespace": 'Covid_Evidence2020_03_21'}, 4],
                 [{"agreed_Answer": 1, "question_Number": 1, "namespace": 'Covid_Reasoning', 'highlighted_indices': test_utils.make_highlight_indices(80, 120)},{"agreed_Answer": 1, "question_Number": 2, "namespace": 'Covid_Reasoning','highlighted_indices': test_utils.make_highlight_indices(10, 30)},{"agreed_Answer": 1,
                                                                                      "question_Number": 7,
                                                                                      "namespace": 'Covid_Reasoning'}, 7]
                 ]

    #The test fails if I add these two to the all_Schema list, which I don't understand why since I have been following the same logic
    #[{"agreed_Answer": 1, "question_Number": 2, "namespace": 'Covid_Evidence2020_03_21','highlighted_indices': test_utils.make_highlight_indices(10, 30)},{"agreed_Answer": 3, "question_Number": 4, "namespace": 'Covid_Evidence2020_03_21'}, 4],
    #[{"agreed_Answer": 1, "question_Number": 2, "namespace": 'Covid_Reasoning','highlighted_indices': test_utils.make_highlight_indices(10, 30)},{"agreed_Answer": 1, "question_Number": 7, "namespace": 'Covid_Reasoning'}, 7]
    for i in all_schema:
        print(i)
        iaa = IAA_task(out_folder=iaa_files_path, source_task_id="auhfdaiughfs")
        for row in i:
            if isinstance(row, dict):
                iaa.add_row(row)
        fin_path = iaa.export()
        data_path = config['data_dir']
        schema_path = data_path + '/schemas'
        dh_path = None #doesn't get used by dependency but is still an argument
        eval_dependency(dh_path, iaa_files_path, schema_path, outpath)
        for root, dir, files in os.walk(outpath):
            for file in files:
                #should be only 1 file for this case, so just run it on the only one
                # if there's more than 1 then you can get fancy
                out_df  = pd.read_csv(os.path.join(outpath, file), encoding='utf-8')
        #9 answer choices to a checklist question
        #This basically works for my first test, child should have parent's highlights if itself doesn't have any but its parent does, thx eric~.
                #assert len(out_df) == 2
                q_three = out_df[out_df['question_Number']==i[-1]]
                hl = q_three['highlighted_indices'].iloc[0]
                assert len(hl) >18
                assert '10' in hl
                assert '29' in hl
def test_he_low_info_true_low_counts(config):
    tua_path = test_utils.make_test_directory(
        config, 'he_tua_input_low_info_true_low_counts')
    scoring_path = test_utils.make_test_directory(
        config, 'he_scoring_input_low_info_true_low_counts')
    #out_path = test_utils.make_test_directory(config, 'out_he_low_info_true_low_counts')

    pa = point_assignment(out_folder=scoring_path,
                          article_num='520',
                          source_task_id='practice_makes+[perfect')
    pa.add_row({
        'namespace': 'Covid2_Reasoning_2020_09_20',
        'Answer_Number': 3,
        'points': 5,
        "Question_Number": 5,
        'agreement_score': 1,
        'highlighted_indices': test_utils.make_highlight_indices(10, 30)
    })

    new_tua = tua(out_folder=tua_path,
                  article_num='520',
                  source_task_id='tua_task_id')
    new_tua.add_row({
        'topic_name': 'argument',
        'start_pos': 10,
        'end_pos': 30,
        'tua_uuid': 'test1'
    })

    hol_dep = dep_iaa(out_folder=scoring_path,
                      source_task_id='doesnt matter',
                      article_num='520')
    #scientific discovery
    hol_dep.add_row({
        "namespace": "Covid2_Holistic_2020_09_20",
        "agreed_Answer": 5,
        "question_Number": 1,
        "agreement_score": 1,
        "tua_uuid": 'test1'
    })
    hol_dep.export()
    points = eval_triage_scoring(new_tua.df, pa.df, scoring_path)
    #points.to_csv(out_path+'/AssessedPoints.csv', encoding = 'utf-8')
    assert len(points) == 2
    assert points['points'].sum() == 3
def test_import_tags_iaa_when_not_adj(config, tmpdir):
    iaa_path = test_utils.make_test_directory(config,
                                              'imptags_iaa_iaa_when_not_adj')
    adj_path = test_utils.make_test_directory(config,
                                              'imptags_adj_iaa_when_not_adj')
    schema_path = config['data_dir'] + '/schemas'
    schema_namespace = 'Covid_Evidence2020_03_21'
    # source_task_id generated by smashing keyboard
    task_id = 'adjudicated'
    iaa = IAA_task(out_folder=iaa_path, source_task_id=task_id)
    iaa.add_row({
        "question_Number": 1,
        "agreed_Answer": 3,
        'namespace': schema_namespace
    })
    iaa.export()
    adj = adjudicator(out_folder=adj_path, source_task_id=task_id)
    adj.add_row({'topic_name': '01.02.02', 'namespace': schema_namespace})
    adj.export()
    task_id2 = 'not_adjudicated'
    iaa = IAA_task(out_folder=iaa_path, source_task_id=task_id2)
    iaa.add_row({
        "question_Number": 2,
        "agreed_Answer": 2,
        'namespace': schema_namespace
    })
    iaa.export()
    i_tags = import_tags(iaa_path, adj_path, schema_path, tmpdir)
    print('temp dir is:', tmpdir)
    #i_tags is directory holding all the import tags
    for root, dir, files in os.walk(i_tags):
        for file in files:
            #produces 2 files with the same answer/questions in them
            i_df = pd.read_csv(os.path.join(i_tags, file), encoding='utf-8')
            assert len(i_df) == 1
            assert test_utils.count_matching_rows(i_df, {
                'agreed_Answer': 2,
                'question_Number': 2
            }) == 1
            assert test_utils.count_matching_rows(i_df, {
                'agreed_Answer': 1,
                'question_Number': 3
            }) == 0
def test_op_ed_not_op_ed(config):
    out_path = test_utils.make_test_directory(
        config, 'weighting_test_op_ed_not_op_ed')
    weight_out_folder = test_utils.make_test_directory(
        config, 'out_weighting_not_op_ed')
    weight_df = pd.read_csv(weight_path)
    holi_iaa = dep_iaa(out_folder=out_path, source_task_id='holisticIAA')
    # -.5 points--from the weight key in config folder and the agreement_score
    holi_iaa.add_row({
        "namespace": "Covid2_Holistic_2020_09_20",
        "agreed_Answer": 3,
        "question_Number": 5,
        "agreement_score": 1
    })
    # -2 points from ./config/weight_key and agreement score

    holi_iaa.export()
    reas_iaa = dep_iaa(out_folder=out_path, source_task_id='reasoningIAA')
    reasoning_namespace = "Covid2_Reasoning_2020_09_20"
    reas_iaa.add_row({
        "namespace": reasoning_namespace,
        "agreed_Answer": 1,
        "question_Number": 2,
        "agreement_score": .5
    })
    reas_iaa.export()
    # weighting will output the actual pandas dataframe instead of the directory
    # if you look into the Weighting.py file, you can see the paths to
    weighting_out = launch_Weighting(out_path)
    weighting_out.to_csv(weight_out_folder + "/Point_recs_.csv",
                         encoding='utf-8')

    nm = weight_df[weight_df['namespace'] == reasoning_namespace]
    qa = nm[(nm['Question_Number'] == 2) & (nm['Answer_Number'] == 1)]
    pr = qa['Point_Recommendation'].iloc[0]
    ag_rows = weighting_out[weighting_out['agreement_score'] == .5]
    ag_score = ag_rows['agreement_score'].iloc[0]
    correct_weight = pr * ag_score
    adjusted_points = ag_rows['agreement_adjusted_points'].iloc[0]
    assert adjusted_points == correct_weight, "Q" + str(2) + "A" + str(
        1) + " points: " + str(adjusted_points) + ", weight_df: " + str(
            correct_weight)
def test_language_weighting(config):
    #Import the csv containing the weights for each question
    weight_df = pd.read_csv(weight_path)

    #Set up paths for test data to be stored at
    out_path = test_utils.make_test_directory(
        config, 'weighting_language_calculation_test')
    iaa = dep_iaa(out_folder=out_path, source_task_id='languageweights')

    #Generate an IAA with random agreement scores for each question and answer in the schema
    namespace = "Covid_Languagev1.1"
    weight_df = weight_df[weight_df['namespace'] == namespace]
    for index, row in weight_df.iterrows():
        question_num = row['Question_Number']
        answer_num = row['Answer_Number']
        agree_score = random.random()
        namespace = "Covid_Languagev1.1"
        iaa.add_row({
            "namespace": namespace,
            "agreed_Answer": answer_num,
            "question_Number": question_num,
            "agreement_score": agree_score
        })

    #Export the data as a dataframe
    fin_path = iaa.export()
    weighting_out = launch_Weighting(out_path)

    #Check that weights (point_recs) * agreement_scores = adjusted_scores
    assert weight_df.shape[0] == weighting_out.shape[0]
    point_recs = weight_df['Point_Recommendation'].to_numpy()
    agreement_scores = weighting_out['agreement_score'].to_numpy()
    adjusted_points = weighting_out['agreement_adjusted_points'].to_numpy()
    print("scores:", agreement_scores[:5])
    print("weights:", point_recs[:5])
    print("adjusted:", adjusted_points[:5])
    assert np.array_equal(point_recs * agreement_scores, adjusted_points)
    for index, row in weighting_out.iterrows():
        question_num = row['Question_Number']
        answer_num = row['Answer_Number']
        adjusted_points = row['agreement_adjusted_points']
        ag_score = row['agreement_score']
        nm = weight_df[weight_df['namespace'] == namespace]
        qa = nm[(nm['Question_Number'] == question_num)
                & (nm['Answer_Number'] == answer_num)]
        pr = qa['Point_Recommendation'].iloc[0]
        correct_weight = pr * ag_score
        assert adjusted_points == correct_weight, "Q" + str(
            question_num) + "A" + str(answer_num) + " points: " + str(
                adjusted_points) + ", weight_df: " + str(correct_weight)

    print("No differences found in language weighting.")
    print()
示例#12
0
def test_all_no_parent_pass(config):
    iaa_files_path = test_utils.make_test_directory(config, 'dep_all_orphans_pass')
    out_path = test_utils.make_test_directory(config, 'dep_all_orphans_pass_out')
    #The questions with no parents in the Evidence schema are 1, 9, 12, 13, and 14
    numAnswers = {1:3, 9:3, 12:4, 13:10, 14:10}
    iaa = IAA_task(out_folder=iaa_files_path, source_task_id='batman')
    for i in [1, 9, 12, 13, 14]:
        for j in range(1, numAnswers[i]+1):
            iaa.add_row({"namespace": "Covid_Evidence2020_03_21", "question_Number": i, "agreed_Answer": j})
        #Question 3 has Question 2 as a parent, so it should never appear in any dependencies
    iaa.add_row({"namespace": "Covid_Evidence2020_03_21", "question_Number": 3, "agreed_Answer": 1})
    fin_path = iaa.export()
    data_path = config['data_dir']
    schema_path = data_path + '/schemas'
    dh_path = None

    eval_dependency(dh_path, iaa_files_path, schema_path, out_path)
    for root, dir, files in os.walk(out_path):
        for file in files:
            out_df  = pd.read_csv(os.path.join(out_path, file), encoding='utf-8')
            assert len(out_df) == 30
示例#13
0
def test_bad_parent(config):
    iaa_files_path = test_utils.make_test_directory(config, 'dep_bad_dad')
    out_path = test_utils.make_test_directory(config, 'dep_bad_dad_out')

    parents = {1:[2], 2:[3,4,5], 5:[6], 9:[10,11]}
    childNumAnswers = {2:9, 3:1, 4:6, 5:5, 6:3, 7:1, 8:5, 10:5, 11:5}
    for parent in parents:
        iaa = IAA_task(out_folder=iaa_files_path, source_task_id='gru' + str(parent))
        iaa.add_row({"namespace": "Covid_Evidence2020_03_21", "question_Number": parent, "agreed_Answer": 'U'})
        for child in parents[parent]:
            for j in range(1, childNumAnswers[child]+1):
                iaa.add_row({"namespace": "Covid_Evidence2020_03_21", "question_Number": child, "agreed_Answer": j})
        fin_path = iaa.export()
    data_path = config['data_dir']
    schema_path = data_path + '/schemas'
    dh_path = None

    eval_dependency(dh_path, iaa_files_path, schema_path, out_path)
    for root, dir, files in os.walk(out_path):
        for file in files:
            out_df  = pd.read_csv(os.path.join(out_path, file), encoding='utf-8')
            assert len(out_df) == 0, "failing file is " + str(file)
def test_import_tags_adj_1_iaa_1_disagree(config, tmpdir):
    iaa_path = test_utils.make_test_directory(
        config, 'imptags_iaa_1_iaa_1_adj_disagree')
    adj_path = test_utils.make_test_directory(
        config, 'imptags_adj_1_iaa_1_adj_disagree')
    schema_path = config['data_dir'] + '/schemas'
    schema_namespace = 'Covid_Evidence2020_03_21'
    # source_task_id generated by smashing keyboard
    task_id = 'nc87wehcolfg6caanc9w'
    iaa = IAA_task(out_folder=iaa_path, source_task_id=task_id)
    iaa.add_row({
        "question_Number": 1,
        "agreed_Answer": 3,
        'namespace': schema_namespace
    })
    iaa.export()
    adj = adjudicator(out_folder=adj_path, source_task_id=task_id)
    adj.add_row({'topic_name': '01.02.02', 'namespace': schema_namespace})
    adj.export()
    i_tags = import_tags(iaa_path, adj_path, schema_path, tmpdir)
    print('temp dir is:', tmpdir)
    #i_tags is directory holding all the import tags
    for root, dir, files in os.walk(i_tags):
        for file in files:
            #should be only 1 file for this case, so just run it on the only one
            # if there's more than 1 then you can get fancy
            i_df = pd.read_csv(os.path.join(i_tags, file), encoding='utf-8')

    assert len(i_df) == 1
    assert test_utils.count_matching_rows(i_df, {
        'agreed_Answer': 2,
        'question_Number': 2
    }) == 1
    assert test_utils.count_matching_rows(i_df, {
        'agreed_Answer': 1,
        'question_Number': 3
    }) == 0
def test_evidence_weighting(config):
    weight_df = pd.read_csv(weight_path)

    out_path = test_utils.make_test_directory(
        config, 'weighting_evidence_calculation_test')
    iaa = dep_iaa(out_folder=out_path, source_task_id='evidenceweights')
    namespace = "Covid2_Evidence2020_09_20"
    weight_df = weight_df[weight_df['namespace'] == namespace]
    for index, row in weight_df.iterrows():
        question_num = row['Question_Number']
        answer_num = row['Answer_Number']
        agree_score = random.random()
        namespace = "Covid2_Evidence2020_09_20"
        iaa.add_row({
            "namespace": namespace,
            "agreed_Answer": answer_num,
            "question_Number": question_num,
            "agreement_score": agree_score
        })

    fin_path = iaa.export()
    weighting_out = launch_Weighting(out_path)

    assert weight_df.shape[0] == weighting_out.shape[0]
    point_recs = weight_df['Point_Recommendation'].to_numpy()
    agreement_scores = weighting_out['agreement_score'].to_numpy()
    adjusted_points = weighting_out['agreement_adjusted_points'].to_numpy()
    print("scores:", agreement_scores[:5])
    print("weights:", point_recs[:5])
    print("adjusted:", adjusted_points[:5])
    assert np.array_equal(point_recs * agreement_scores, adjusted_points)
    for index, row in weighting_out.iterrows():
        question_num = row['Question_Number']
        answer_num = row['Answer_Number']
        adjusted_points = row['agreement_adjusted_points']
        ag_score = row['agreement_score']
        nm = weight_df[weight_df['namespace'] == namespace]
        qa = nm[(nm['Question_Number'] == question_num)
                & (nm['Answer_Number'] == answer_num)]
        pr = qa['Point_Recommendation'].iloc[0]
        correct_weight = pr * ag_score
        assert adjusted_points == correct_weight, "Q" + str(
            question_num) + "A" + str(answer_num) + " points: " + str(
                adjusted_points) + ", weight_df: " + str(correct_weight)

    print("No differences found in evidence weighting.")
    print()
def test_iaa_evi_3q(config):
    out_path = test_utils.make_test_directory(config, 'iaa_evi_q5')
    #source_task_id generated by smashing keyboard
    iaa = IAA_task(out_folder=out_path,
                   source_task_id='kjncsa87nxao21899102j1j2')
    iaa.add_row({"agreed_Answer": 800, "question_Number": 8})
    iaa.add_row({"agreed_Answer": 800, "question_Number": 5})
    iaa.add_row({"agreed_Answer": 800, "question_Number": 5})
    fin_path = iaa.export()
    read_iaa = pd.read_csv(fin_path, encoding='utf-8')
    assert len(read_iaa) == 3
    count = test_utils.count_matching_rows(read_iaa, {
        'agreed_Answer': 800,
        'question_Number': 8
    })
    assert count == 1
    count = test_utils.count_matching_rows(read_iaa, {'agreed_Answer': 800})
    assert count == 3
def test_random_language(config):
    #Import the csv containing the weights for each question
    weight_df = pd.read_csv(weight_path)
    namespace = "Covid_Languagev1.1"
    weight_df = weight_df[weight_df['namespace'] == namespace]

    #Set up paths for test data to be stored at
    out_path = test_utils.make_test_directory(
        config, 'weighting_language_random_test')

    #Create IAA file with 3 random rows with agreement scores of 1
    iaa = dep_iaa(out_folder=out_path, source_task_id='3random')
    sample_df = weight_df.sample(3)

    for index, row in sample_df.iterrows():
        question_num = row['Question_Number']
        answer_num = row['Answer_Number']
        iaa.add_row({
            "namespace": namespace,
            "agreed_Answer": answer_num,
            "question_Number": question_num,
            "agreement_score": 1
        })

    #Export the data as a dataframe and check if all 3 rows have the correct adjusted weight
    fin_path = iaa.export()
    weighting_out = launch_Weighting(out_path)
    assert weighting_out.shape[0] == 3
    for index, row in weighting_out.iterrows():
        question_num = row['Question_Number']
        answer_num = row['Answer_Number']
        adjusted_points = row['agreement_adjusted_points']
        ag_score = row['agreement_score']
        nm = weight_df[weight_df['namespace'] == namespace]
        qa = nm[(nm['Question_Number'] == question_num)
                & (nm['Answer_Number'] == answer_num)]
        pr = qa['Point_Recommendation'].iloc[0]
        correct_weight = pr * ag_score
        assert adjusted_points == correct_weight, "Q" + str(
            question_num) + "A" + str(answer_num) + " points: " + str(
                adjusted_points) + ", weight_df: " + str(correct_weight)

        print("language random row", index + 1, "checks out")
示例#18
0
def test_iaa_checklist_maxans(config, tmpdir):
    test_path = test_utils.make_test_directory(config,
                                               'test_iaa_checklist_max_ans')
    # source_task_id generated by smashing keyboard
    dh = datahunt(out_folder=test_path, source_task_id='apply_to_all')
    for i in range(5):
        dh.add_row({
            'answer_label': 'T1.Q2.A7',
            'namespace': 'Sources2021_02_23',
            'contributor_uuid': 'mturk' + str(i)
        })
    for i in range(5):
        dh.add_row({
            'answer_label': 'T1.Q10.A7',
            'namespace': 'Sources2021_02_23',
            'contributor_uuid': 'mturk' + str(i)
        })
    fin_path = dh.export()
    data_path = config['data_dir']
    schema_path = data_path + '/schemas'

    iaa_out = calc_agreement_directory(test_path,
                                       schema_path,
                                       config['IAA_config_dir'],
                                       test_utils.texts_dir,
                                       outDirectory=tmpdir)
    print(iaa_out)
    for root, dir, files in os.walk(iaa_out):
        for file in files:
            # should be only 1 file for this case, so just run it on the only one
            # if there's more than 1 then you can get fancy
            out_df = pd.read_csv(os.path.join(iaa_out, file), encoding='utf-8')
            print(out_df['agreed_Answer'])
            temp = out_df['agreed_Answer'].tolist()
            #for i in range(9):
            #assert temp[i] == 'L'
            print("++++++++++++++")
            print(out_df['coding_perc_agreement'].tolist())
示例#19
0
def test_highlights(config, tmpdir):
    test_path = test_utils.make_test_directory(config, 'test_iaa_highlights')

    # source_task_id generated by smashing keyboard
    dh = datahunt(out_folder=test_path, source_task_id='highlights')

    for i in range(10):
        dh.add_row({'answer_label': 'T1.Q2.A' + str((i % 3) + 1), 'namespace': 'Covid_Evidence2020_03_21',
                    'contributor_uuid': 'Daniel' + str(i), 'start_pos': 0, 'end_pos': 20})
    fin_path = dh.export()

    data_path = config['data_dir']
    schema_path = data_path + '/schemas'

    # out_path = test_utils.make_test_directory(config, 'out_iaa_hl_everythingpass')
    iaa_out = calc_agreement_directory(test_path, schema_path, config['IAA_config_dir'], test_utils.texts_dir,
                                       outDirectory=tmpdir)
    print(iaa_out)
    for root, dir, files in os.walk(iaa_out):
        for file in files:
            # should be only 1 file for this case, so just run it on the only one
            # if there's more than 1 then you can get fancy
            out_df = pd.read_csv(os.path.join(iaa_out, file), encoding='utf-8')
            temp = out_df["highlighted_indices"]

            for i in range(8):
                counter = 1
                listt = list(map(str, temp[i][1:len(temp[i]) - 1].split(',')))
                for j in range(len(listt)):
                    listt[j] = listt[j].strip()
                if listt != ['']:
                    for num in listt:
                        assert int(num) == counter
                        counter += 1

            print("++++++++++++++")
            print(out_df['coding_perc_agreement'].tolist())
def test_op_ed_when_op_ed(config):
    out_path = test_utils.make_test_directory(
        config, 'weighting_test_op_ed_when_op_ed')
    weight_df = pd.read_csv(weight_path)
    holi_iaa = dep_iaa(out_folder=out_path, source_task_id='holisticIAA')
    holi_namespace = "Covid2_Holistic_2020_09_20"
    holi_iaa.add_row({
        "namespace": holi_namespace,
        "agreed_Answer": 3,
        "question_Number": 1,
        "agreement_score": 1
    })

    holi_iaa.export()
    reas_iaa = dep_iaa(out_folder=out_path, source_task_id='reasoningIAA')
    reasoning_namespace = "Covid2_Reasoning_2020_09_20"
    reas_iaa.add_row({
        "namespace": reasoning_namespace,
        "agreed_Answer": 1,
        "question_Number": 2,
        "agreement_score": .5
    })
    reas_iaa.export()
    # weighting will output the actual pandas dataframe instead of the directory
    # if you look into the Weighting.py file, you can see the paths to
    weighting_out = launch_Weighting(out_path)
    nm = weight_df[weight_df['namespace'] == reasoning_namespace]
    qa = nm[(nm['Question_Number'] == 2) & (nm['Answer_Number'] == 1)]
    pr = qa['Op-Ed'].iloc[0]
    ag_rows = weighting_out[weighting_out['agreement_score'] == .5]
    ag_score = ag_rows['agreement_score'].iloc[0]
    correct_weight = pr * ag_score
    adjusted_points = ag_rows['agreement_adjusted_points'].iloc[0]
    assert adjusted_points == correct_weight, "Q" + str(2) + "A" + str(
        1) + " points: " + str(adjusted_points) + ", weight_df: " + str(
            correct_weight)
示例#21
0
def test_master(config):
    dh_path = test_utils.make_test_directory(config, 'mn_dh_')
    iaa_path = test_utils.make_test_directory(config, 'out_mn_iaa')
    scoring_path = test_utils.make_test_directory(config, 'out_mn_scoring')
    tua_path = test_utils.make_test_directory(config, 'mn_tua_')
    viz_path = test_utils.make_test_directory(config, 'out_mn_viz')
    dh = datahunt(out_folder=dh_path,
                  source_task_id='dh1',
                  article_num='520',
                  article_text_length=2900)
    for i in range(9):
        for j in range(i + 1):
            dh.add_row({
                'answer_label': 'T1.Q2.A' + str(j),
                'namespace': 'Covid2_Reasoning_2020_09_20',
                'contributor_uuid': 'Daniel' + str(i)
            })
    dh.export()
    dh = datahunt(out_folder=dh_path,
                  source_task_id='dh2',
                  article_num='520',
                  article_text_length=2900)
    make_dh(dh,
            'Covid2_Probability2020_09_20',
            20,
            50,
            start_shift=lambda x: x + 5,
            end_shift=lambda x: x * 2)
    dh = datahunt(out_folder=dh_path,
                  source_task_id='dh3',
                  article_num='520',
                  article_text_length=2900)
    make_dh(dh,
            'Covid2_Probability2020_09_20',
            100,
            150,
            start_shift=lambda x: x - 5,
            end_shift=lambda x: x + 5,
            answer=lambda x: 2)
    dh.export()
    dh = datahunt(out_folder=dh_path,
                  source_task_id='dh4',
                  article_num='520',
                  article_text_length=2900)
    make_dh(dh,
            'Covid2_Probability2020_09_20',
            100,
            150,
            start_shift=lambda x: x + 5,
            end_shift=lambda x: x + 5,
            answer=lambda x: (x + 8) / 5)
    dh.export()
    dh = datahunt(out_folder=dh_path,
                  source_task_id='dh5',
                  article_num='520',
                  article_text_length=2900)
    make_dh(dh,
            'Covid2_Holistic_2020_09_20',
            0,
            0,
            start_shift=lambda x: x,
            end_shift=lambda x: x,
            answer=lambda x: 1)
    dh.export()
    dh = datahunt(out_folder=dh_path,
                  source_task_id='dh6',
                  article_num='520',
                  article_text_length=2900)
    make_dh(dh,
            'Covid2_Evidence2020_09_20',
            100,
            150,
            start_shift=lambda x: x + 5,
            end_shift=lambda x: x + 5,
            answer=lambda x: (x + 6) / 4)
    dh.export()
    dh = datahunt(out_folder=dh_path,
                  source_task_id='dh7',
                  article_num='520',
                  article_text_length=2900)
    make_dh(dh,
            'Covid2_Sources_2002_09_20',
            100,
            150,
            start_shift=lambda x: x + 5,
            end_shift=lambda x: x + 5,
            answer=lambda x: 6,
            tua='qs1')
    dh.export()
    dh = datahunt(out_folder=dh_path,
                  source_task_id='dh8',
                  article_num='520',
                  article_text_length=2900)
    make_dh(dh,
            'Covid2_Sources_2002_09_20',
            500,
            760,
            start_shift=lambda x: x + 5,
            end_shift=lambda x: x + 5,
            answer=lambda x: 6,
            tua='qs2')
    dh.export()
    dh = datahunt(out_folder=dh_path,
                  source_task_id='dh9',
                  article_num='520',
                  article_text_length=2900)
    make_dh(dh,
            'Covid2_Sources_2002_09_20',
            100,
            150,
            start_shift=lambda x: x + 5,
            end_shift=lambda x: x + 5,
            answer=lambda x: 6,
            tua='qs3')
    dh.export()
    dh = datahunt(out_folder=dh_path,
                  source_task_id='dh10',
                  article_num='520',
                  article_text_length=2900)
    make_dh(dh,
            'Covid2_Sources_2002_09_20',
            500,
            760,
            start_shift=lambda x: x + 5,
            end_shift=lambda x: x + 5,
            answer=lambda x: 6,
            tua='qs4')
    dh.export()
    dh = datahunt(out_folder=dh_path,
                  source_task_id='dh11',
                  article_num='520',
                  article_text_length=2900)
    make_dh(dh,
            'Covid2_Sources_2002_09_20',
            100,
            150,
            start_shift=lambda x: x + 5,
            end_shift=lambda x: x + 5,
            answer=lambda x: 6,
            tua='qs5')
    dh.export()
    dh = datahunt(out_folder=dh_path,
                  source_task_id='dh12',
                  article_num='520',
                  article_text_length=2900)
    make_dh(dh,
            'Covid2_Sources_2002_09_20',
            500,
            760,
            start_shift=lambda x: x + 5,
            end_shift=lambda x: x + 5,
            answer=lambda x: 6,
            tua='qs6')
    dh.export()
    dh = datahunt(out_folder=dh_path,
                  source_task_id='dh13',
                  article_num='520',
                  article_text_length=2900)
    for i in range(9):
        for j in range(7):
            dh.add_row({
                'answer_label': 'T1.Q1.A' + str(j),
                'namespace': 'Covid_Languagev1.1',
                'contributor_uuid': 'User' + str(i),
                'start_pos': 10 * j,
                'end_pos': 12 * j
            })
    for i in range(9):
        for j in range(7):
            dh.add_row({
                'answer_label': 'T1.Q1.A' + str(j),
                'namespace': 'Covid_Languagev1.1',
                'contributor_uuid': 'User' + str(i),
                'start_pos': 19 * j,
                'end_pos': 20 * j + 10
            })
    make_dh(
        dh,
        'Covid_Languagev1.1',
        1,
        20,
        start_shift=lambda x: x + 5,
        end_shift=lambda x: x + 5,
        answer=lambda x: (x + 2) % 4,
    )
    dh.export()
    dh.export()

    new_tua = tua(out_folder=tua_path,
                  article_num='520',
                  source_task_id='tua_task_id')
    new_tua.add_row({
        'topic_name': 'Assertions',
        'start_pos': 10,
        'end_pos': 30,
        'tua_uuid': 'a1'
    })
    new_tua.add_row({
        'topic_name': 'Assertions',
        'start_pos': 40,
        'end_pos': 80,
        'tua_uuid': 'a2'
    })
    new_tua.add_row({
        'topic_name': 'Assertions',
        'start_pos': 40,
        'end_pos': 80,
        'tua_uuid': 'a3'
    })
    new_tua.add_row({
        'topic_name': 'Quoted Sources',
        'start_pos': 20,
        'end_pos': 80,
        'tua_uuid': 'qs1'
    })
    new_tua.add_row({
        'topic_name': 'Quoted Sources',
        'start_pos': 40,
        'end_pos': 60,
        'tua_uuid': 'qs2'
    })
    new_tua.add_row({
        'topic_name': 'Quoted Sources',
        'start_pos': 20,
        'end_pos': 80,
        'tua_uuid': 'qs3'
    })
    new_tua.add_row({
        'topic_name': 'Quoted Sources',
        'start_pos': 40,
        'end_pos': 60,
        'tua_uuid': 'qs4'
    })
    new_tua.add_row({
        'topic_name': 'Quoted Sources',
        'start_pos': 20,
        'end_pos': 80,
        'tua_uuid': 'qs5'
    })
    new_tua.add_row({
        'topic_name': 'Quoted Sources',
        'start_pos': 40,
        'end_pos': 60,
        'tua_uuid': 'qs6'
    })
    new_tua.export()
    calculate_scores_master(dh_path,
                            config['test_dir'] + config['texts_dir'],
                            config['IAA_config_dir'],
                            config['schema_dir'],
                            iaa_path,
                            scoring_path,
                            push_aws=False,
                            tua_dir=tua_path,
                            viz_dir=viz_path,
                            reporting=True)
    for root, dir, files in os.walk(viz_path):
        for file in files:
            viz_file = pd.read_csv(viz_path + '/' + file, encoding='utf-8')
    print(len(viz_file))
    points = viz_file['Points'].dropna()
    assert sum(points) < -31
    assert sum(points) > -31.1
def test_he_vague_sources_false(config):
    tua_path = test_utils.make_test_directory(config,
                                              'he_tua_vague_sources_false')
    scoring_path = test_utils.make_test_directory(
        config, 'he_scoring_vague_sources_false')
    out_path = test_utils.make_test_directory(config,
                                              'out_he_vague_sources_false')
    #2800 is considered standard article; threhold for scoring is 4 vague sources per 2800 characters
    pa = point_assignment(out_folder=scoring_path,
                          article_num='520',
                          source_task_id='practice_makes+[perfect',
                          article_text_length=2900)
    pa.add_row({
        'namespace': 'Covid2_Reasoning_2020_09_20',
        'Answer_Number': 3,
        'points': 0,
        "Question_Number": 5,
        'agreement_score': 1,
        'highlighted_indices': test_utils.make_highlight_indices(10, 30)
    })
    pa.export()
    src_dep = dep_iaa(out_folder=scoring_path,
                      source_task_id='qs1',
                      article_num='520',
                      article_text_length=2900)
    # scientific discovery
    src_dep.add_row({
        'namespace':
        'Covid2_Sources_2002_09_20',
        'agreed_Answer':
        5,
        "question_Number":
        2,
        'agreement_score':
        1,
        'highlighted_indices':
        test_utils.make_highlight_indices(10, 30),
        'tua_uuid':
        'tua3'
    })
    src_dep.export()
    src_dep = dep_iaa(out_folder=scoring_path,
                      source_task_id='qs2',
                      article_num='520',
                      article_text_length=2900)
    src_dep.add_row({
        'namespace':
        'Covid2_Sources_2002_09_20',
        'agreed_Answer':
        6,
        "question_Number":
        2,
        'agreement_score':
        1,
        'highlighted_indices':
        test_utils.make_highlight_indices(15, 38),
        'tua_uuid':
        'tua3'
    })
    src_dep.add_row({
        'namespace':
        'Covid2_Sources_2002_09_20',
        'agreed_Answer':
        8,
        "question_Number":
        5,
        'agreement_score':
        1,
        'highlighted_indices':
        test_utils.make_highlight_indices(7, 27),
        'tua_uuid':
        'tua3'
    })
    src_dep.export()
    src_dep = dep_iaa(out_folder=scoring_path,
                      source_task_id='qs3',
                      article_num='520',
                      article_text_length=2900)
    src_dep.add_row({
        'namespace':
        'Covid2_Sources_2002_09_20',
        'agreed_Answer':
        7,
        "question_Number":
        5,
        'agreement_score':
        1,
        'highlighted_indices':
        test_utils.make_highlight_indices(15, 38),
        'tua_uuid':
        'tua3'
    })
    src_dep.export()

    new_tua = tua(out_folder=tua_path,
                  article_num='520',
                  source_task_id='tua_task_id',
                  article_text_length=2900)
    new_tua.add_row({
        'topic_name': 'Quoted Sources',
        'start_pos': 10,
        'end_pos': 30,
        'tua_uuid': 'tua1'
    })
    new_tua.add_row({
        'topic_name': 'Quoted Sources',
        'start_pos': 50,
        'end_pos': 120,
        'tua_uuid': 'tua2'
    })
    new_tua.add_row({
        'topic_name': 'Quoted Sources',
        'start_pos': 900,
        'end_pos': 1020,
        'tua_uuid': 'tua3'
    })
    new_tua.export()

    points = eval_triage_scoring(new_tua.df, pa.df, scoring_path)
    points.to_csv(out_path + '/AssessedPoints.csv', encoding='utf-8')

    assert points['points'].sum() == -4
    assert len(points) == 3
    hl = points[points['points'] == -2]['highlighted_indices'].iloc[0]
    assert all([str(i) in hl for i in range(900, 1020)])
示例#23
0
            raise NameError(
                'Params', params,
                ' must include a value for namespace, Question_Number, and Answer_Number, and agreement_adjusted_points'
            )
        new_row['schema'] = self.schema
        return new_row

    def set_out_name(self, filetype, source_task_id):
        return 'SortedPts.csv'


if __name__ == '__main__':
    #this is broken cause it's not a path data
    with open('test_config.json') as json_file:
        config = json.load(json_file)
    dh_path = test_utils.make_test_directory(config, 'mn_dh_')
    dh = datahunt(out_folder=dh_path,
                  source_task_id='dh13',
                  article_num='520',
                  article_text_length=2900)
    for i in range(9):
        for j in range(7):
            dh.add_row({
                'answer_label': 'T1.Q1.A' + str(j),
                'namespace': 'Covid_Languagev1.1',
                'contributor_uuid': 'User' + str(i),
                'start_pos': 10 * j,
                'end_pos': 12 * j
            })
    for i in range(9):
        for j in range(7):
示例#24
0
def test_point_assignment_source(config):
    tua_path = test_utils.make_test_directory(config, 'pa_source_tua')
    scoring_path = test_utils.make_test_directory(config, 'pa_source_dep')

    weight = weighted(out_folder=scoring_path,
                      article_num='520',
                      source_task_id='source_scaling')
    for i in range(1, 8):
        hl = ((i - 1) * 10, i * 10 - 1
              )  #creates highlights (0, 9) for 1, (10, 19) for 2, etc.
        weight.add_row({
            'schema':
            'Probability',
            'namespace':
            'Covid_Probability',
            'Answer_Number':
            1,
            'agreement_adjusted_points':
            5,
            "Question_Number":
            i,
            'agreement_score':
            1,
            'highlighted_indices':
            test_utils.make_highlight_indices(hl[0], hl[1])
        })
    weight_df = weight.df

    #Note that each TUA should have its own ID and unique set of highlights (TUA highlights should not overlap!)
    new_tua = tua(out_folder=tua_path,
                  article_num='520',
                  source_task_id='tua_task_id')
    for i in range(1, 8):
        hl = ((i - 1) * 10, i * 10 - 1)
        new_tua.add_row({
            'topic_name': 'source',
            'start_pos': hl[0],
            'end_pos': hl[1],
            'tua_uuid': str(i)
        })

    arg_dep = dep_iaa(out_folder=scoring_path,
                      source_task_id='doesnt matter',
                      article_num='520')
    for i in range(1, 8):
        arg_dep.add_row({
            "namespace": "Covid_Sources_2002_03_20v2.1",
            "agreed_Answer": i,
            "question_Number": 8,
            "agreement_score": 1,
            "tua_uuid": str(i)
        })
    new_tua.export()
    arg_dep.export()

    tuas, weights, tua_raw = pointSort(
        scoring_path,
        input_dir=None,
        weights=weight_df,
        scale_guide_dir=config['IAA_config_dir'] +
        '/point_assignment_scaling_guide.csv',
        tua_dir=tua_path,
        reporting=True)
    print("WEIGHTS:", weights)
    assert len(weights) == 7
    #Ensure all point assignments are accurate with point_assignment_scaling_guide.csv
    assert weights['points'].iloc[0] == 5 * 2
    assert weights['points'].iloc[1] == 5 * 1.5
    assert weights['points'].iloc[2] == 5 * 1
    assert weights['points'].iloc[3] == 5 * 0.5
    assert weights['points'].iloc[4] == 5 * 0
    assert weights['points'].iloc[5] == 5 * -0.5
    assert weights['points'].iloc[6] == 5 * 0
示例#25
0
def test_point_assignment_source_in_weight_highlight(config):
    tua_path = test_utils.make_test_directory(config, 'pa_source_wh_tua')
    scoring_path = test_utils.make_test_directory(config, 'pa_source_wh_dep')

    weight = weighted(out_folder=scoring_path,
                      article_num='520',
                      source_task_id='source_scaling')
    for i in range(1, 8):
        hl = ((i - 1) * 10, i * 10 - 1)  # (0, 9), (10, 19), ...
        weight.add_row({
            'schema':
            'Probability',
            'namespace':
            'Covid_Probability',
            'Answer_Number':
            1,
            'agreement_adjusted_points':
            9,
            "Question_Number":
            i,
            'agreement_score':
            1,
            'highlighted_indices':
            test_utils.make_highlight_indices(hl[0], hl[1])
        })
    weight_df = weight.df

    new_tua = tua(out_folder=tua_path,
                  article_num='520',
                  source_task_id='tua_task_id')
    for i in range(1, 8):
        hl = ((i - 1) * 10 + 1, i * 10 - 2)  # (1, 8), (11, 18), ...
        new_tua.add_row({
            'topic_name': 'source',
            'start_pos': hl[0],
            'end_pos': hl[1],
            'tua_uuid': str(i)
        })

    arg_dep = dep_iaa(out_folder=scoring_path,
                      source_task_id='doesnt matter',
                      article_num='520')
    for i in range(1, 8):
        arg_dep.add_row({
            "namespace": "Covid_Sources_2002_03_20v2.1",
            "agreed_Answer": i,
            "question_Number": 8,
            "agreement_score": 1,
            "tua_uuid": str(i)
        })
    new_tua.export()
    arg_dep.export()

    tuas, weights, tua_raw = pointSort(
        scoring_path,
        input_dir=None,
        weights=weight_df,
        scale_guide_dir=config['IAA_config_dir'] +
        '/point_assignment_scaling_guide.csv',
        tua_dir=tua_path,
        reporting=True)

    assert len(weights) == 7
    assert weights['points'].iloc[0] == 9 * 2
    assert weights['points'].iloc[1] == 9 * 1.5
    assert weights['points'].iloc[2] == 9 * 1
    assert weights['points'].iloc[3] == 9 * 0.5
    assert weights['points'].iloc[4] == 9 * 0
    assert weights['points'].iloc[5] == 9 * -0.5
    assert weights['points'].iloc[6] == 9 * 0
示例#26
0
def test_checklist_coding_multiple_hl(config):
    test_path = test_utils.make_test_directory(
        config, 'test_iaa_checklist_coding_multiple_hl')
    out_path = test_utils.make_test_directory(
        config, 'out_test_iaa_checklist_coding_multiple_hl')
    # source_task_id generated by smashing keyboard
    dh = datahunt(out_folder=test_path, source_task_id='highlights')

    dh.add_row({
        'answer_label': 'T1.Q2.A2',
        'namespace': 'Covid2_Reasoning_2020_09_20',
        'contributor_uuid': 'C1',
        'start_pos': 0,
        'end_pos': 20
    })
    dh.add_row({
        'answer_label': 'T1.Q2.A2',
        'namespace': 'Covid2_Reasoning_2020_09_20',
        'contributor_uuid': 'C1',
        'start_pos': 30,
        'end_pos': 80
    })
    dh.add_row({
        'answer_label': 'T1.Q2.A4',
        'namespace': 'Covid2_Reasoning_2020_09_20',
        'contributor_uuid': 'C1',
        'start_pos': 30,
        'end_pos': 80
    })
    dh.add_row({
        'answer_label': 'T1.Q2.A4',
        'namespace': 'Covid2_Reasoning_2020_09_20',
        'contributor_uuid': 'C2',
        'start_pos': 30,
        'end_pos': 80
    })
    dh.add_row({
        'answer_label': 'T1.Q2.A4',
        'namespace': 'Covid2_Reasoning_2020_09_20',
        'contributor_uuid': 'C3',
        'start_pos': 30,
        'end_pos': 80
    })
    dh.add_row({
        'answer_label': 'T1.Q2.A4',
        'namespace': 'Covid2_Reasoning_2020_09_20',
        'contributor_uuid': 'C5',
        'start_pos': 30,
        'end_pos': 80
    })
    fin_path = dh.export()

    data_path = config['data_dir']
    schema_path = data_path + '/schemas'

    # out_path = test_utils.make_test_directory(config, 'out_iaa_hl_everythingpass')
    iaa_out = calc_agreement_directory(test_path,
                                       schema_path,
                                       config['IAA_config_dir'],
                                       test_utils.texts_dir,
                                       outDirectory=out_path)
    for root, dir, files in os.walk(out_path):
        for file in files:
            # should be only 1 file for this case, so just run it on the only one
            # if there's more than 1 then you can get fancy
            out_df = pd.read_csv(os.path.join(iaa_out, file), encoding='utf-8')
    out_df['agreed_Answer'] = out_df['agreed_Answer'].apply(str)
    answers = out_df['agreed_Answer']
    print('answers', answers.isin([4]))
    assert not answers.isin(['2']).any()
    assert len(out_df) == 6
    assert answers.isin(['4']).any()
    row_4 = out_df[out_df['agreed_Answer'] == '4']
    print(row_4.columns)

    print("row 4\n", row_4)
    print(row_4['highlighted_indices'])
    r4_hl = row_4['highlighted_indices'].iloc[0]
    assert all([str(i) in r4_hl for i in range(30, 80)])