示例#1
0
def checkIdenticals():
    old = ptd.getDataWithMeta()
    old_2011 = old[old.Publication_year == 2011]
    old_2011_wos = old_2011.WOS.tolist()
    new = ptd.getUnlabelledData()
    print("len of new data: {}".format(len(new)))
    new_2011 = new[new.Publication_year == "2011"]
    new_2011_wos = new_2011.WOS.tolist()

    print("old length 2011: {}".format(len(old_2011_wos)))
    print("new length 2011: {}".format(len(new_2011_wos)))

    print old_2011_wos[:5]
    print new_2011_wos[:5]

    identical = []
    for wos in new_2011_wos:
        for wos2 in old_2011_wos:
            if wos == wos2:
                print("{}\n{}\n".format(wos, wos2))
                identical.append(wos)

    print("Number of identical papers = {}".format(len(identical)))

    new_data = ptd.getUnlabelledDataAsList()

    print ("len of old before: {}".format(len(new_data)))
    new_data_after = []
    for dic in new_data:
        if dic["WOS"] not in identical:
            new_data_after.append(dic)

    print ("len of old after: {}".format(len(new_data_after)))
示例#2
0
none_c = 0
for pred in predictions:
    if pred == "AGAINST":
        against_c += 1
    elif pred == "FAVOR":
        favor_c += 1
    else:
        none_c += 1

print("\nThe distribution of predictions are: ")
print("\tFAVOR:  \t{}".format(favor_c))
print("\tAGAINST:\t{}".format(against_c))
print("\tNONE:   \t{}".format(none_c))

unique_years = list(set(unlabelled_data.Publication_year.tolist()))
unlabelled_data = ptd.getUnlabelledDataAsList()

for i, dic in enumerate(unlabelled_data):
    dic["Stance"] = predictions[i]

for year in unique_years:
    favor_c = 0
    against_c = 0
    none_c = 0
    for dic in unlabelled_data:
        if dic["Publication_year"] == year:
            if dic["Stance"] == "AGAINST":
                against_c += 1
            elif dic["Stance"] == "FAVOR":
                favor_c += 1
            else: