Пример #1
0
def predict(extract_fun):
    """
    对结果进行预测
    :param extract_fun:
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    # 恢复模型
    clf = joblib.load("model_" + extract_fun.__name__ + ".pkl")

    # 二分网络
    graph = init_graph()
    print("构建二分网络完成")
    print(time.time())
    # 进行投影
    v_nodes = get_v_nodes()
    a_nodes = get_a_nodes()
    prjv_graph = project(graph, v_nodes)
    prja_graph = project(graph, a_nodes)
    print("投影完成")
    print(time.time())

    sql = "select * from public.ml_test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    f = open("predict_" + extract_fun.__name__ + ".txt", "w", encoding="utf-8")
    for row in rows:
        user_id = VName(row[0])
        att_id = AName(row[1])
        is_link = row[4]
        if extract_fun.__name__ == "extract_direct":
            feature = extract_direct(graph, user_id, att_id)
        elif extract_fun.__name__ == "extract_indirect":
            feature = extract_indirect(graph, prjv_graph, prja_graph, user_id,
                                       att_id)

        result = clf.predict([feature])[0]
        f.writelines("%s\t%s\t%s\t%s\n" % (user_id, att_id, is_link, result))

    f.close()
    cursor.close()
    conn.close()
Пример #2
0
def init_graph():
    conn = get_conn()
    cursor = conn.cursor()

    graph = networkx.Graph()
    # 添加 游客节点
    VNodes = get_v_nodes()
    for node in VNodes:
        graph.add_node(node, bipartite=0)
    # 添加景点节点
    ANodes = get_a_nodes()
    for node in ANodes:
        graph.add_node(node, bipartite=1)

    sql = "select * from public.train_set"
    cursor.execute(sql)
    rows = cursor.fetchall()
    for row in rows:
        user_id = row[7]
        classroute = row[2]
        classroutestr = row[3]

        for atrraction_id in classroute:
            if (VName(user_id), AName(atrraction_id)) in graph.edges:
                graph[VName(user_id)][AName(atrraction_id)]["weight"] += 1
            else:
                graph.add_edge(VName(user_id), AName(atrraction_id), weight=1)

    sql = "select * from public.test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()
    for row in rows:
        user_id = row[7]
        classroute = row[2]
        classroutestr = row[3]

        for atrraction_id in classroute[:-1]:
            if (VName(user_id), AName(atrraction_id)) in graph.edges:
                graph[VName(user_id)][AName(atrraction_id)]["weight"] += 1
            else:
                graph.add_edge(VName(user_id), AName(atrraction_id), weight=1)
    return graph
Пример #3
0
def train(extract_fun):
    """
    训练模型
    :param: extract_fun
    :return:
    """
    # 读取数据
    conn = get_conn()
    cursor = conn.cursor()

    # 二分网络
    graph = init_graph()
    print("构建二分网络完成")
    print(time.time())
    # 进行投影
    v_nodes = get_v_nodes()
    a_nodes = get_a_nodes()
    prjv_graph = project(graph, v_nodes)
    prja_graph = project(graph, a_nodes)
    print("网络投影完成")
    print(time.time())

    sql = "select * from public.ml_train_set"
    cursor.execute(sql)
    rows = cursor.fetchall()
    print(len(rows))

    # 保存训练数据
    X_list = list()
    Y_list = list()

    i = 0
    for row in rows:
        user_id = VName(row[0])
        att_id = AName(row[1])

        i += 1
        print(i)
        # print(time.time())

        if extract_fun.__name__ == "extract_direct":
            feature = extract_direct(graph, user_id, att_id)
        elif extract_fun.__name__ == "extract_indirect":
            feature = extract_indirect(graph, prjv_graph, prja_graph, user_id,
                                       att_id)
            # print(feature)
        else:
            print("wrong function")
            break
        X_list.append(feature)
        if row[4]:
            Y_list.append(1)
        else:
            Y_list.append(-1)
    print("生成训练数据")
    print(time.time())

    cursor.close()
    conn.close()

    # 记录X_list, Y_list
    f = open("param_" + extract_fun.__name__ + ".txt", "w", encoding="utf-8")
    f.writelines(json.dumps(X_list) + "\n")
    f.writelines(json.dumps(Y_list) + "\n")
    f.close()
    print("训练数据保存成功")
    print(time.time())

    clf = svm.SVC(kernel="linear")
    clf.fit(X_list, Y_list)
    print("训练数据结束")
    print(time.time())

    joblib.dump(clf, "model_" + extract_fun.__name__ + ".pkl")
    print("保存模型")
    print(time.time())
Пример #4
0
def recommend_test(extract_fun, tuned_params):
    """
    根据GridSearchCV求得的参数  检验调参结果
    :param tuned_params:
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    # 读数据
    sql = "select * from public.ml_test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    # 构建模型
    clf = svm.SVC(kernel=tuned_params["kernel"],
                  C=tuned_params["C"],
                  gamma=tuned_params["gamma"])
    f = open("param_" + extract_fun.__name__ + ".txt", "r", encoding="utf-8")
    x_list = f.readline()
    x_list.split()
    y_list = f.readline()
    y_list.split()
    f.close()
    x_list = json.loads(x_list)
    y_list = json.loads(y_list)
    clf.fit(x_list, y_list)

    # 二分网络
    graph = init_graph()
    print("构建二分网络完成")
    print(time.time())
    # 进行投影
    v_nodes = get_v_nodes()
    a_nodes = get_a_nodes()
    prjv_graph = project(graph, v_nodes)
    prja_graph = project(graph, a_nodes)
    print("投影完成")
    print(time.time())
    # 所有的景点
    a_nodes = list(get_node_id_dict().keys())

    # 记录结果数据
    f = open("recommend_" + extract_fun.__name__ + "_" +
             tuned_params["kernel"] + "_C" + str(tuned_params["C"]) +
             "_gamma" + str(tuned_params["gamma"]) + ".txt",
             "w",
             encoding="utf-8")

    i = 0
    for row in rows:
        i += 1
        print(i)
        user_id = VName(row[0])
        att_id = AName(row[1])
        is_link = row[4]

        if not is_link:
            continue

        sql = "select classroute from public.route_0320 where id={user_id}".format(
            user_id=row[0])
        cursor.execute(sql)
        result = cursor.fetchone()
        classroute = result[0]

        # 待预测的集合
        left_set = set(a_nodes) - set(classroute[0:-1])

        recommendation = dict()
        for anode in left_set:
            anode = AName(anode)
            if extract_fun.__name__ == "extract_direct":
                feature = extract_direct(graph, user_id, anode)
            elif extract_fun.__name__ == "extract_indirect":
                feature = extract_indirect(graph, prjv_graph, prja_graph,
                                           user_id, anode)

            result = clf.predict([feature])[0]
            dis = abs(clf.decision_function([feature]))
            if result == 1:
                recommendation[anode] = dis[0]

        recommendation = dict(
            sorted(recommendation.items(), key=lambda x: x[1], reverse=True))
        f.write("%s\t%s\t%s\t%s\n" %
                (user_id, classroute[:-1], att_id, json.dumps(recommendation)))

    f.close()
    cursor.close()
    conn.close()
Пример #5
0
def recommend_list(extract_fun):
    """
    利用之前生成的模型 进行推荐
    :param extract_fun:
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    # 读数据
    sql = "select * from public.ml_test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    # 读模型
    clf = joblib.load("model_" + extract_fun.__name__ + ".pkl")

    # 二分网络
    graph = init_graph()
    print("构建二分网络完成")
    print(time.time())
    # 进行投影
    v_nodes = get_v_nodes()
    a_nodes = get_a_nodes()
    prjv_graph = project(graph, v_nodes)
    prja_graph = project(graph, a_nodes)
    print("投影完成")
    print(time.time())
    # 所有的景点
    a_nodes = list(get_node_id_dict().keys())

    # 记录结果数据
    f = open("recommend_" + extract_fun.__name__ + ".txt",
             "w",
             encoding="utf-8")

    for row in rows:
        user_id = VName(row[0])
        att_id = AName(row[1])
        is_link = row[4]

        if not is_link:
            continue

        sql = "select classroute from public.route_0320 where id={user_id}".format(
            user_id=row[0])
        cursor.execute(sql)
        result = cursor.fetchone()
        classroute = result[0]

        # 待预测的集合
        left_set = set(a_nodes) - set(classroute[0:-1])

        recommendation = dict()
        for anode in left_set:
            anode = AName(anode)
            if extract_fun.__name__ == "extract_direct":
                feature = extract_direct(graph, user_id, anode)
            elif extract_fun.__name__ == "extract_indirect":
                feature = extract_indirect(graph, prjv_graph, prja_graph,
                                           user_id, anode)

            result = clf.predict([feature])[0]
            dis = abs(clf.decision_function([feature]))
            if result == 1:
                recommendation[anode] = dis[0]

        recommendation = dict(
            sorted(recommendation.items(), key=lambda x: x[1], reverse=True))
        f.write("%s\t%s\t%s\t%s\n" %
                (user_id, classroute[:-1], att_id, json.dumps(recommendation)))

    f.close()
    cursor.close()
    conn.close()
Пример #6
0
def write_train_feature(func, have_sd=0):
    """
    生成训练集特征
    并写入文件
    :param func:
    :param have_sd: 间接特征 是否含有最短距离
    :return:
    """
    if have_sd:
        file_name = func.__name__ + "_has_sd_train.csv"
    else:
        file_name = func.__name__ + "_train.csv"

    # 读取数据
    conn = get_conn()
    cursor = conn.cursor()

    # 二分网络
    graph = init_graph()
    print("构建二分网络完成")
    print(time.time())
    # 进行投影
    v_nodes = get_v_nodes()
    a_nodes = get_a_nodes()
    prjv_graph = project(graph, v_nodes)
    prja_graph = project(graph, a_nodes)
    print("网络投影完成")
    print(time.time())

    sql = "select * from public.ml_train_set"
    cursor.execute(sql)
    rows = cursor.fetchall()
    print("len_rows:" + str(len(rows)))

    if func.__name__ == "extract_direct":
        title = ["label", "snv", "sna", "cn", "jc", "aa", "pa", "sd"]
    elif func.__name__ == "extract_indirect":
        if have_sd:
            title = [
                "label", "snv", "sna", "cn", "jc", "aa", "pa", "sd", "prj_cnv",
                "prj_cna", "prj_jcv", "prj_jca", "prj_aav", "prj_aaa",
                "prj_pav", "prj_paa", "prj_sdv", "prj_sda"
            ]
        else:
            title = [
                "label", "snv", "sna", "cn", "jc", "aa", "pa", "sd", "prj_cnv",
                "prj_cna", "prj_jcv", "prj_jca", "prj_aav", "prj_aaa",
                "prj_pav", "prj_paa"
            ]
    else:
        print("函数错误")
        return

    i = 0
    train_f = []
    for row in rows:
        i += 1
        print(i)

        user_id = VName(row[0])
        att_id = AName(row[1])

        if func.__name__ == "extract_direct":
            feature = func(graph, user_id, att_id)
        elif func.__name__ == "extract_indirect":
            feature = func(graph, prjv_graph, prja_graph, user_id, att_id,
                           have_sd)
        else:
            print("函数错误")

        if row[4]:
            line = [1]
            line.extend(feature)
            train_f.append(line)
        else:
            line = [0]
            line.extend(feature)
            train_f.append(line)

    # 写入到csv文件中
    df = pd.DataFrame(train_f, columns=title)
    df.to_csv(file_name, encoding="utf-8")

    print("训练特征保存完成")
Пример #7
0
def write_test_feature(func, has_sd=0):
    """
    保存测试集的特征
    :param func:
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    if has_sd:
        file_name = func.__name__ + "_has_sd_test.csv"
    else:
        file_name = func.__name__ + "_test.csv"

    if func.__name__ == "extract_direct":
        title = ["anode", "snv", "sna", "cn", "jc", "aa", "pa", "sd"]
    elif func.__name__ == "extract_indirect":
        if has_sd:
            title = [
                "anode", "snv", "sna", "cn", "jc", "aa", "pa", "sd", "prj_cnv",
                "prj_cna", "prj_jcv", "prj_jca", "prj_aav", "prj_aaa",
                "prj_pav", "prj_paa", "prj_sdv", "prj_sda"
            ]
        else:
            title = [
                "anode", "snv", "sna", "cn", "jc", "aa", "pa", "sd", "prj_cnv",
                "prj_cna", "prj_jcv", "prj_jca", "prj_aav", "prj_aaa",
                "prj_pav", "prj_paa"
            ]
    else:
        print("函数错误")
        return

    # 读数据
    sql = "select * from public.ml_test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    # 二分网络
    graph = init_graph()
    print("构建二分网络完成")
    print(time.time())
    # 进行投影
    v_nodes = get_v_nodes()
    a_nodes = get_a_nodes()
    prjv_graph = project(graph, v_nodes)
    prja_graph = project(graph, a_nodes)
    print("投影完成")
    print(time.time())
    # 所有的景点
    a_nodes = list(get_node_id_dict().keys())

    test_f = []
    i = 0
    for row in rows:
        print(i)
        i += 1

        user_id = VName(row[0])
        att_id = AName(row[1])
        is_link = row[4]

        if not is_link:
            continue

        sql = "select classroute from public.route_0320 where id={user_id}".format(
            user_id=row[0])
        cursor.execute(sql)
        result = cursor.fetchone()
        classroute = result[0]

        # 待预测的集合
        left_set = set(a_nodes) - set(classroute[0:-1])

        for anode in left_set:
            anode = AName(anode)
            if func.__name__ == "extract_direct":
                feature = func(graph, user_id, anode)
            elif func.__name__ == "extract_indirect":
                feature = func(graph, prjv_graph, prja_graph, user_id, anode,
                               has_sd)
            else:
                print("函数名错误")

            line = [anode]
            line.extend(feature)
            test_f.append(line)

    # 写入到csv文件中
    df = pd.DataFrame(test_f, columns=title)
    df.to_csv(file_name, encoding="utf-8")

    print("测试特征保存完成")