示例#1
0
def getInfoAboutStance(stance="All"):
    if stance == "All":
        d = ptd.getMetaDataAsList()
        data = pd.DataFrame(d)
    else:
        data = getStanceData(stance)
    print("SHOWING INFORMATION FOR THE SELECTED STANCE: '{}'\n".format(stance))
    print("Data consist of {} records".format(len(data)))
    print
    print 80*"-"
    print("TITLE INFO")
    getTitleInfo(data.Title)
    print 80*"-" + "\n\n"
    print 80*"-"
    print("YEAR INFO")
    getPublicationYear(data.Publication_year)
    print 80*"-" + "\n\n"
    print 80*"-"
    print("LANGUAGE INFO")
    getLanguage(data.Language)
    print 80*"-" + "\n\n"
    print 80*"-"
    print("REFERENCE INFO")
    getRefs(data.References)
    print 80*"-" + "\n\n"
    print 80*"-"
    print("ORGANIZATION INFO")
    getOrgInfo(data.Organization_info)
    print 80*"-" + "\n\n"
    print 80*"-"
    print("PUBLICATION LENGTH")
    getPubLength(data.Publication_length)
    print 80*"-" + "\n\n"
    print 80*"-"
    print("AUTHOR INFO")
    getAuthor(data.Authors)
    print 80*"-" + "\n\n"
    print 80*"-"
    print("HEADER AND SUB-HEADER INFO ")
    print 80*"-"
    getHeader(data.Headers, data.Sub_headers)
    print 80*"-" + "\n\n"
    print 80*"-"
    print("SUBJECT INFO")
    print 80*"-"
    getSubject(data.Subjects)
示例#2
0
def storeLanguageToJson(stance="All"):
    if stance == "All":
        d = ptd.getMetaDataAsList()
        frame = pd.DataFrame(d).Language
    else:
        frame = getStanceData(stance).Language
    frame.fillna("nan", inplace=True)
    lang = frame.tolist()
    lang = [l for l in lang if l != "nan"]
    uniq = list(set(lang))
    print("There are {} different languages".format(len(uniq)))
    print("These languages are:\n{}".format(uniq))
    d = dict(zip(uniq, np.zeros(len(uniq))))
    for l in lang:
        d[l] += 1
    for key in d.keys():
        print("{} \t: {}".format(key, d[key]))

    with open("../TextFiles/meta_data/lang_NONE.json", "w") as f:
        json.dump(d, f)
示例#3
0
def getAllData():
    data = ptd.getMetaDataAsList()
    return pd.DataFrame(data)
示例#4
0
def getAllData():
    data = ptd.getMetaDataAsList()
    return pd.DataFrame(data)
示例#5
0
def getStanceData(stance):
    d = ptd.getMetaDataAsList()
    data = pd.DataFrame(d)
    return data[data.Stance == stance]
示例#6
0
def getCount(stance="AGAINST"):
    base = "../DataProcessing/TopicModelling/abstract/"
    #base = "../DataProcessing/TopicModelling/title/"

    type_ = "abstract_"     # "abstract_" or "title_"
    with open(base + type_ + stance+ "_count.pkl") as f:
        file1 = pickle.load(f)
    with open(base + type_ + stance +"_tfidf.pkl") as f:
        file2 = pickle.load(f)
    data = file1 + file2

    uniq_words = list(set([word[1] for word in data]))
    print("# of uniq words in {} topic is: {}".format(stance, len(uniq_words)))

    l = ptd.getMetaDataAsList()
    df = pd.DataFrame(l)
    favor = df[df.Stance == "FAVOR"]
    none = df[df.Stance == "NONE"]
    against = df[df.Stance == "AGAINST"]

    t_favor = list(set([word for sublist in favor.Title.tolist() for word in sublist.split(" ")]))
    t_none = list(set([word for sublist in none.Title.tolist() for word in sublist.split(" ")]))
    t_against = list(set([word for sublist in against.Title.tolist() for word in sublist.split(" ")]))
    print("Length of unique words in {} is {}.\nLength of unique words in {} is {}.\nLength of unique words in {} is {}".format("AGAINST", len(t_against), "FAVOR", len(t_favor), "NONE", len(t_none)))
    print
    counter = 0
    if stance == "AGAINST":
        for title in t_favor:
            for word in title.split(" "):
                if word in uniq_words:
                    counter += 1
        print("Number of title words from FAVOR found in {} is {}\n".format(stance, counter))
        counter = 0
        for title in t_none:
            for word in title.split(" "):
                if word in uniq_words:
                    counter += 1
        print("Number of title words from NONE found in {} is {}".format(stance, counter))
    elif stance == "FAVOR":
        for title in t_against:
            for word in title.split(" "):
                if word in uniq_words:
                    counter += 1
        print("Number of title words from AGAINST found in {} is {}\n".format(stance, counter))
        counter = 0
        for title in t_none:
            for word in title.split(" "):
                if word in uniq_words:
                    counter += 1
        print("Number of title words from NONE found in {} is {}".format(stance, counter))
    else:
        for title in t_against:
            for word in title.split(" "):
                if word in uniq_words:
                    counter += 1
        print("Number of title words from AGAINST found in {} is {}\n".format(stance, counter))
        for title in t_favor:
            for word in title.split(" "):
                if word in uniq_words:
                    counter += 1
        print("Number of title words from FAVOR found in {} is {}".format(stance, counter))