Exemplo n.º 1
0
def main():
    host = "weert.lucimmerzeel.nl"
    port = "5432"
    database_in = "pocdb"
    user_in = "pocuser"
    password_in = "pocuser"
    db_in = pipeline_in(host, port, database_in, user_in, password_in)

    database_out = "InnoDB-test"
    user_out = "innouser"
    password_out = "innouser"
    db_out = pipeline_out(host, port, database_out, user_out, password_out)
    print("analysing")
    input_descriptions = db_in.get_descriptions(20)

    keywords = get_summarised_textrank_results(input_descriptions)

    db_out.clear_entries_institute('textrank')
    print("Uploading...")
    db_out.add_dict(keywords, 'textrank')
Exemplo n.º 2
0
def main():
    host = "weert.lucimmerzeel.nl"
    port = "5432"
    database_in = "pocdb"
    user_in = "pocuser"
    password_in = "pocuser"
    db_in = pipeline_in(host, port, database_in, user_in, password_in)

    database_out = "InnoDB-test"
    user_out = "innouser"
    password_out = "innouser"
    db_out = pipeline_out(host, port, database_out, user_out, password_out)
    print("analysing")
    input_descriptions = db_in.get_descriptions(10)
    clear_institue = False
    baseline_results = get_baseline_results(input_descriptions)

    if(clear_institue):
        db_out.clear_entries_institute('baseline')

    print("Uploading...")
    db_out.add_dict(baseline_results, "baseline")
Exemplo n.º 3
0
    df = pd.DataFrame(tfIdf[0].T.todense(),
                      index=tfIdfVectorizer.get_feature_names(),
                      columns=["TF-IDF"])
    df = df.sort_values('TF-IDF', ascending=False)
    # Select the Top25 most important words.
    df = df.head(25)
    # Transform the dataframe to a dict.
    tmp = df.to_dict("dict")["TF-IDF"]

    new_freq = {}
    # Copy the word frequency from freq to new_freq using the important words as keys.
    for word in tmp:
        if word in freq:
            new_freq[word] = freq[word]

    return new_freq


if __name__ == "__main__":
    host = "weert.lucimmerzeel.nl"
    port = "5432"
    database_in = "pocdb"
    user_in = "pocuser"
    password_in = "pocuser"
    db_in = pipeline_in(host, port, database_in, user_in, password_in)

    dataset = db_in.get_descriptions(1000)
    tmp = TF_IDF_get_results(dataset)
    for i in tmp:
        print(tmp[i], "\t:", i)