Пример #1
0
def save_newslist_to_db():

    # 获取学校列表,数据库配置信息
    university_list = get_university_list()
    db_config = get_database_dict_info()

    #建立数据库连接
    conn = MongoClient(db_config["host"], db_config["port"])

    NewsPOA = conn.NewsPOA

    # NewsPOA["newslist"].drop()

    for i in range(0, len(university_list)):
        # for i in range(0,1):

        uni = university_list[i]
        if NewsPOA['newslist'].find({"Uname": uni["zh_name"]}).count() != 0:
            continue
        news_documents_list = request_baidu_news(uni["zh_name"], 1,
                                                 MAX_PAGE_NUMBERS,
                                                 uni["en_name"])
        NewsPOA["newslist"].insert(news_documents_list)
        print(uni["zh_name"], "的新闻列表保存成功")

    print("新闻全部爬取完毕")
def db_to_dataset_folder():
    db_config = get_database_dict_info()
    conn = MongoClient(db_config["host"],db_config["port"])
    NewsPOA = conn.NewsPOA

    dataset = NewsPOA["newslist"].find(
        { 
            "$or": [ 
                { "body": { "$regex": ".*学术论坛.*" } }
            ],
        }
    )

    to_filt = "/,.<》;:‘\"[]\{\}-_=+!~`@#$%^&*()"
    for data in dataset:
        data_title = data["title"]
        data_body = data["body"]
        for c in to_filt:
            data_title = data_title.replace(c, "")
            data_body = data_body.replace(c, "")
        with open(
            "../text_classification/dataset/news_category_dataset/"
            + data_title + ".txt", "w"
        ) as f:
            data_to_save = []
            data_to_save.extend(jieba.cut(data_title))
            data_to_save.extend(jieba.cut(data_body))
            for item in data_to_save:
                if (item == "\n"):
                    continue
                f.write(item)
                f.write(" ")
Пример #3
0
def save_result_poa_list_to_db():

    # 获取学校列表,数据库配置信息
    university_list = get_university_list()
    db_config = get_database_dict_info()

    #建立数据库连接
    conn = MongoClient(db_config["host"], db_config["port"])

    NewsPOA = conn.NewsPOA

    NewsPOA["news"].drop()

    for i in range(len(university_list)):

        uni = university_list[i]
        print("开始", uni["zh_name"])
        json_path = "../news_result/" + uni["zh_name"] + ".json"
        current_uni_news_list = load_json_file(json_path)
        result_uni_news_list = []

        for j in range(len(current_uni_news_list)):
            result_uni_news_list.append(
                predict_poa_result_from_documnet_dict(
                    current_uni_news_list[j]))

        NewsPOA["news"].insert(result_uni_news_list)
        print(uni["zh_name"], "的新闻分析完毕,共有", str(len(result_uni_news_list)),
              "条")
def get_connected_database():
    db_config = get_database_dict_info()
    connection = MongoClient(db_config['host'], db_config['port'])
    database = connection[db_config['database']]
    if (db_config['host'] != '127.0.0.1' and db_config['host'] != 'localhost'):
        database.authenticate(db_config['user'], db_config['password'])
    return database
Пример #5
0
def insert_university_list():

    # 获取学校列表,数据库配置信息
    university_list = get_university_list()
    db_config = get_database_dict_info()

    #建立数据库连接
    conn = MongoClient(db_config["host"], db_config["port"])

    NewsPOA = conn.NewsPOA

    NewsPOA["universitylist"].insert(university_list)
    print("学校列表表创建成功")
Пример #6
0
def add_negative_news_from_old_db():

    # 获取学校列表,数据库配置信息
    university_list = get_university_list()
    db_config = get_database_dict_info()

    #建立数据库连接
    # new_conn = MongoClient(db_config["host"],db_config["port"])

    new_conn = MongoClient("121.42.236.250", 27034)

    old_conn = MongoClient("121.42.236.250",27034)

    old_neg_news_cursor = old_conn.ResultPOA["news"].find({"sentiment": "-1"})

    old_neg_news_list = [ item for item in old_neg_news_cursor]

    new_neg_news_cursor = new_conn.NewsPOA["newslist"].find({"sentiment": "-1"})

    new_neg_news_list = [item for item in new_neg_news_cursor]

    add_neg_list = []

    for i in range(len(old_neg_news_list)):

        current_news = old_neg_news_list[i]
        current_news_url = current_news["url"]

        if judge_url_in_list(new_neg_news_list,current_news_url) == False:

            current_news["media"] = "unkown"
            current_news["ranking"] = "300"
            add_neg_list.append(current_news)

    new_conn.NewsPOA["newslist"].insert(add_neg_list)

    create_news_numbers_info()

    new_conn.close()
    old_conn.close()
Пример #7
0
def compute_score():
    # 获取学校列表,数据库配置信息
    university_list = get_university_list()
    db_config = get_database_dict_info()

    #建立数据库连接
    conn = MongoClient("121.42.236.250",27034)

    NewsPOA = conn.NewsPOA
    NewsPOA["influence"].drop()

    for uni in university_list:
        uni_name = uni['zh_name']

        uni_news_list = NewsPOA["newslist"].find({"Uname":uni_name})
        print("开始计算 ",uni_name,"的数据...")
        score = {}
        for news in uni_news_list:
            if score.get(news["media"]) is not None:
                score[news["media"]] += 1/(float(news["ranking"])/100+1)
            else:
                score[news["media"]] = 1/(float(news["ranking"])/100+1)

        score_list = []
        for key,value in score.items():

            current = {
                "Uname":uni_name,
                "media":key,
                "score":value
            }

            score_list.append(current)

        NewsPOA["influence"].insert(score_list)
        print(uni_name,"的数据保存完毕")
def get_newslist():
    db_config = get_database_dict_info()
    conn = MongoClient(db_config["host"],db_config["port"])
    NewsPOA = conn.NewsPOA
    newslist = NewsPOA["newslist"]
    return newslist
Пример #9
0
def create_news_numbers_info():

    # 获取学校列表,数据库配置信息
    university_list = get_university_list()
    db_config = get_database_dict_info()

    #建立数据库连接
    conn = MongoClient("121.42.236.250", 27034)

    NewsPOA = conn.NewsPOA
    news_number_list = []

    for uni in university_list:

        studyNumberList = []
        activityNumberList = []
        entranceNumberList = []
        socialNumberList = []

        studyNumberList.append(NewsPOA["news"].find({
            "Uname": uni["zh_name"],
            "classification": "study",
            "sentiment": "-1"
        }).count())
        studyNumberList.append(NewsPOA["news"].find({
            "Uname": uni["zh_name"],
            "classification": "study",
            "sentiment": "0"
        }).count())
        studyNumberList.append(NewsPOA["news"].find({
            "Uname": uni["zh_name"],
            "classification": "study",
            "sentiment": "1"
        }).count())
        studyNumberList.append(studyNumberList[0] + studyNumberList[1] +
                               studyNumberList[2])

        activityNumberList.append(NewsPOA["news"].find({
            "Uname": uni["zh_name"],
            "classification": "activity",
            "sentiment": "-1"
        }).count())
        activityNumberList.append(NewsPOA["news"].find({
            "Uname": uni["zh_name"],
            "classification": "activity",
            "sentiment": "0"
        }).count())
        activityNumberList.append(NewsPOA["news"].find({
            "Uname": uni["zh_name"],
            "classification": "activity",
            "sentiment": "1"
        }).count())
        activityNumberList.append(activityNumberList[0] +
                                  activityNumberList[1] +
                                  activityNumberList[2])

        entranceNumberList.append(NewsPOA["news"].find({
            "Uname": uni["zh_name"],
            "classification": "entrance",
            "sentiment": "-1"
        }).count())
        entranceNumberList.append(NewsPOA["news"].find({
            "Uname": uni["zh_name"],
            "classification": "entrance",
            "sentiment": "0"
        }).count())
        entranceNumberList.append(NewsPOA["news"].find({
            "Uname": uni["zh_name"],
            "classification": "entrance",
            "sentiment": "1"
        }).count())
        entranceNumberList.append(entranceNumberList[0] +
                                  entranceNumberList[1] +
                                  entranceNumberList[2])

        socialNumberList.append(NewsPOA["news"].find({
            "Uname": uni["zh_name"],
            "classification": "social",
            "sentiment": "-1"
        }).count())
        socialNumberList.append(NewsPOA["news"].find({
            "Uname": uni["zh_name"],
            "classification": "social",
            "sentiment": "0"
        }).count())
        socialNumberList.append(NewsPOA["news"].find({
            "Uname": uni["zh_name"],
            "classification": "social",
            "sentiment": "1"
        }).count())
        socialNumberList.append(socialNumberList[0] + socialNumberList[1] +
                                socialNumberList[2])

        news_number_list.append({
            "Uname": uni["zh_name"],
            "abbr": uni["en_name"],
            "studyNumber": studyNumberList,
            "activityNumber": activityNumberList,
            "entranceNumber": entranceNumberList,
            "socialNumber": socialNumberList
        })

    NewsPOA["newsNumber"].drop()
    NewsPOA["newsNumber"].insert(news_number_list)
    print("新闻数量表保存成功")
Пример #10
0
def reset_sentiment_category():
    db_config = get_database_dict_info()
    conn = MongoClient(db_config["host"], db_config["port"])
    NewsPOA = conn.NewsPOA