Exemplo n.º 1
0
def test():
    """
    开始进行预测
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    # 初始化矩阵
    user_dict, item_user = form_data()
    print("初始化矩阵完成")
    print(time.time())

    sql = "select * from public.test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    f = open("recommend_20.txt", "w", encoding="utf-8")
    i = 0
    for row in rows:
        i += 1
        if i % 100 == 0:
            print("完成:" + str(i))

        user = row[7]
        classroute = row[2]

        rec_dict = recommend_by_user(user_dict, item_user, user, 20)
        f.writelines("%s\t%s\t%s\t%s\n" %
                     (user, classroute[:-1], AName(
                         classroute[-1]), json.dumps(rec_dict)))

    f.close()
    cursor.close()
    conn.close()
Exemplo n.º 2
0
def test(algorithm, lam=0.5):
    conn = get_conn()
    cursor = conn.cursor()

    # 从md_test_set中读取数据
    select_sql = r"select * from public.test_set"
    cursor.execute(select_sql)
    rows = cursor.fetchall()

    # 初始化二部图
    graph = init_graph()

    # 写结果
    if algorithm.__name__ == "hunhe":
        f = open("hunhe_"+str(lam) + ".txt", "w", encoding="utf-8")
    else:
        f = open(algorithm.__name__ + ".txt", "w", encoding="utf-8")

    for row in rows:
        user_id = row[7]
        classroute = row[2]
        classroutestr = row[3]

        if algorithm.__name__ == "hunhe":
            result = algorithm(graph, user_id, lam)
        else:
            result = algorithm(graph, user_id)
        f.write("%s\t%s\t%s\t%s\n" % (user_id,classroute[:-1] ,classroute[-1], str(result)))

    f.close()
    cursor.close()
    conn.close()
Exemplo n.º 3
0
def gen_nodes_list():
    """
    产生所有的节点列表
    包括游客和景点
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    nodes_list = list()

    # 先添加游客节点
    sql = "select * from public.route_0320"
    cursor.execute(sql)
    rows = cursor.fetchall()

    for row in rows:
        user_id = row[7]
        user_id = VName(user_id)
        nodes_list.append(user_id)

    # 添加景点节点
    sql = "select * from public.node_1023"
    cursor.execute(sql)
    rows = cursor.fetchall()
    for row in rows:
        att_id = row[0]
        att_id = AName(att_id)
        nodes_list.append(att_id)
    return nodes_list
Exemplo n.º 4
0
def init_graph():
    conn = get_conn()
    cursor = conn.cursor()

    graph = networkx.Graph()

    # 添加 游客节点
    VNodes = get_v_nodes()
    for node in VNodes:
        graph.add_node(node, bipartite=0)
    # 添加景点节点
    ANodes = get_a_nodes()
    for node in ANodes:
        graph.add_node(node, bipartite=1)

    # 从ml_graph_set中读取数据
    select_sql = r"select * from public.ml_graph_set"
    cursor.execute(select_sql)
    rows = cursor.fetchall()

    for row in rows:
        user_id = row[0]
        atrraction_id = row[1]
        atrraction_name = row[2]

        if (VName(user_id), AName(atrraction_id)) in graph.edges:
            graph[VName(user_id)][AName(atrraction_id)]["weight"] += 1
        else:
            graph.add_edge(VName(user_id), AName(atrraction_id), weight=1)

    return graph
Exemplo n.º 5
0
def init_set():
    """
    从route表中取数据
    并根据此初始化训练集和测试集
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    # 节点字典
    node_dict = get_node_id_dict()

    # 获取数量
    count_sql = r"select count(*) from public.route"
    cursor.execute(count_sql)
    number = cursor.fetchone()[0]
    print(number)

    # 取出所有元素
    select_sql = r"select * from public.route order by id"
    cursor.execute(select_sql)
    rows = cursor.fetchall()

    train_number = int(0.9*number)
    test_number = number - train_number
    train_set = random.sample(rows, train_number)

    for row in rows:
        classroute = row[2]
        classroutestr = row[3]
        routetime = row[4]
        id = row[7]
        if id_in_set(train_set, row[7]):
            for i in range(len(classroute)):
                insert_sql = """
                            insert into public.md_train_set(user_id, attraction, attractionstr, visittime) 
                            values(%s, %s, %s, %s);
                    """
                cursor.execute(insert_sql, (id, classroute[i], node_dict[classroute[i]], routetime[i]))
        else:
            for i in range(len(classroute) - 1):
                insert_sql = """
                            insert into public.md_train_set(user_id, attraction, attractionstr, visittime) 
                            values(%s, %s, %s, %s);
                    """
                cursor.execute(insert_sql, (id, classroute[i], node_dict[classroute[i]], routetime[i]))
            insert_sql = """
                        insert into public.md_test_set(user_id, attraction, attractionstr, visittime) 
                            values(%s, %s, %s, %s);
            """
            cursor.execute(insert_sql, (id, classroute[-1], node_dict[classroute[-1]], routetime[-1]))
        conn.commit()

    cursor.close()
    conn.close()
Exemplo n.º 6
0
def init_matrix():
    """
    根据训练集的数据
    生成邻接矩阵
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    # 矩阵的维数
    nodes_list = gen_nodes_list()
    n = len(nodes_list)
    # 初始化矩阵
    matrix = np.mat(np.full((n, n), np.complex(0.0, 0.0)))

    sql = "select * from public.train_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    # 填充训练数据
    for row in rows:
        classroute = row[2]
        user_id = VName(row[7])
        user_index = nodes_list.index(user_id)

        for att_id in classroute:
            att = AName(att_id)
            att_index = nodes_list.index(att)

            matrix[user_index, att_index] = np.complex(0.0, 1.0)
            matrix[att_index, user_index] = np.complex(0.0, -1.0)

    sql = "select * from public.train_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    # 填充测试数据
    sql = "select * from public.test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()
    for row in rows:
        classroute = row[2]
        user_id = VName(row[7])
        user_index = nodes_list.index(user_id)

        for att_id in classroute[:-1]:
            att = AName(att_id)
            att_index = nodes_list.index(att)

            matrix[user_index, att_index] = np.complex(0.0, 1.0)
            matrix[att_index, user_index] = np.complex(0.0, -1.0)

    return matrix
Exemplo n.º 7
0
def get_a_nodes():
    conn = get_conn()
    cursor = conn.cursor()

    sql = "select num from public.node_1023"
    cursor.execute(sql)
    rows = cursor.fetchall()

    a_list = list()
    for row in rows:
        att_id = AName(row[0])
        a_list.append(att_id)

    cursor.close()
    conn.close()
    return set(a_list)
Exemplo n.º 8
0
def get_v_nodes():
    conn = get_conn()
    cursor = conn.cursor()

    sql = "select id from public.route_0320"
    cursor.execute(sql)
    rows = cursor.fetchall()

    id_list = list()
    for row in rows:
        user_id = VName(row[0])
        id_list.append(user_id)

    cursor.close()
    conn.close()
    return set(id_list)
Exemplo n.º 9
0
def predict(extract_fun):
    """
    对结果进行预测
    :param extract_fun:
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    # 恢复模型
    clf = joblib.load("model_" + extract_fun.__name__ + ".pkl")

    # 二分网络
    graph = init_graph()
    print("构建二分网络完成")
    print(time.time())
    # 进行投影
    v_nodes = get_v_nodes()
    a_nodes = get_a_nodes()
    prjv_graph = project(graph, v_nodes)
    prja_graph = project(graph, a_nodes)
    print("投影完成")
    print(time.time())

    sql = "select * from public.ml_test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    f = open("predict_" + extract_fun.__name__ + ".txt", "w", encoding="utf-8")
    for row in rows:
        user_id = VName(row[0])
        att_id = AName(row[1])
        is_link = row[4]
        if extract_fun.__name__ == "extract_direct":
            feature = extract_direct(graph, user_id, att_id)
        elif extract_fun.__name__ == "extract_indirect":
            feature = extract_indirect(graph, prjv_graph, prja_graph, user_id,
                                       att_id)

        result = clf.predict([feature])[0]
        f.writelines("%s\t%s\t%s\t%s\n" % (user_id, att_id, is_link, result))

    f.close()
    cursor.close()
    conn.close()
Exemplo n.º 10
0
def init_graph():
    conn = get_conn()
    cursor = conn.cursor()

    graph = networkx.Graph()
    # 添加 游客节点
    VNodes = get_v_nodes()
    for node in VNodes:
        graph.add_node(node, bipartite=0)
    # 添加景点节点
    ANodes = get_a_nodes()
    for node in ANodes:
        graph.add_node(node, bipartite=1)

    sql = "select * from public.train_set"
    cursor.execute(sql)
    rows = cursor.fetchall()
    for row in rows:
        user_id = row[7]
        classroute = row[2]
        classroutestr = row[3]

        for atrraction_id in classroute:
            if (VName(user_id), AName(atrraction_id)) in graph.edges:
                graph[VName(user_id)][AName(atrraction_id)]["weight"] += 1
            else:
                graph.add_edge(VName(user_id), AName(atrraction_id), weight=1)

    sql = "select * from public.test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()
    for row in rows:
        user_id = row[7]
        classroute = row[2]
        classroutestr = row[3]

        for atrraction_id in classroute[:-1]:
            if (VName(user_id), AName(atrraction_id)) in graph.edges:
                graph[VName(user_id)][AName(atrraction_id)]["weight"] += 1
            else:
                graph.add_edge(VName(user_id), AName(atrraction_id), weight=1)
    return graph
Exemplo n.º 11
0
def get_node_loc_dict():
    """
    景点id  及其对应的经纬度
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    sql = "select * from public.node_1023"
    cursor.execute(sql)
    rows = cursor.fetchall()

    node_loc_dict = dict()
    for row in rows:
        att_id = AName(row[0])
        lon = row[2]
        lat = row[3]

        node_loc_dict[att_id] = (lon, lat)
    return node_loc_dict
Exemplo n.º 12
0
def init_set():
    """
    从route表中取数据
    并根据此初始化训练集和测试集
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    # 获取数量
    count_sql = r"select count(*) from public.route_0320"
    cursor.execute(count_sql)
    number = cursor.fetchone()[0]
    print(number)

    # 取出所有元素
    select_sql = r"select * from public.route_0320 order by id"
    cursor.execute(select_sql)
    rows = cursor.fetchall()

    train_number = int(0.9 * number)
    test_number = number - train_number
    train_set = random.sample(rows, train_number)

    for row in rows:
        if id_in_set(train_set, row[7]):
            insert_sql = """
                            insert into public.train_set(id_base64,route, classroute, classroutestr, routetime, starttime,
                            endtime, id, route_length) values(%s, %s, %s, %s, %s, %s, %s, %s, %s);
                    """
        else:
            insert_sql = """
                            insert into public.test_set(id_base64,route, classroute, classroutestr, routetime, starttime,
                            endtime, id, route_length) values(%s, %s, %s, %s, %s, %s, %s, %s, %s);
                    """
        cursor.execute(insert_sql, row)
        conn.commit()

    cursor.close()
    conn.close()
Exemplo n.º 13
0
def cal_train_distance(save_dis_file="train_distance.csv"):
    """
    计算 训练集 的距离特征
    并保存
    :param save_dis_file: 保存训练集距离特征的文件
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    # 节点对应的经纬度
    node_loc_dict = get_node_loc_dict()

    sql = "select * from public.ml_train_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    distance = []
    title = ["min_d", "mean_d", "max_d", "last_d"]

    for row in rows:
        user_id = VName(row[0])
        att_id = AName(row[1])

        sql = "select * from public.route_0320 where id={user_id}".format(
            user_id=row[0])
        cursor.execute(sql)
        item = cursor.fetchone()
        classroute = item[2][:-1]

        dis_feature = get_dis_feature(node_loc_dict, classroute, att_id)
        distance.append(dis_feature)

    # 写入到csv文件中
    df = pd.DataFrame(distance, columns=title)
    df.to_csv(save_dis_file, encoding="utf-8")
Exemplo n.º 14
0
def init_set():
    """
    从route表中取数据
    并根据此初始化训练集和测试集
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    # 节点字典
    node_dict = get_node_id_dict()
    all_nodes = list(node_dict.keys())

    # 获取数量
    count_sql = r"select count(*) from public.train_set"
    cursor.execute(count_sql)
    number = cursor.fetchone()[0]
    print(number)

    # 从train_set中读数据 加入ml_train_set和ml_graph_set中
    sql = "select * from public.train_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    train_number = int(0.2 * number)
    train_set = random.sample(rows, train_number)

    for row in rows:
        classroute = row[2]
        classroutestr = row[3]
        routetime = row[4]
        id = row[7]
        if not id_in_set(train_set, row[7]):
            for i in range(len(classroute)):
                insert_sql = """
                            insert into public.ml_graph_set(user_id, attraction, attractionstr, visittime, islink) 
                            values(%s, %s, %s, %s, %s);
                    """
                cursor.execute(insert_sql,
                               (id, classroute[i], node_dict[classroute[i]],
                                routetime[i], True))
        else:
            for i in range(len(classroute) - 1):
                insert_sql = """
                            insert into public.ml_graph_set(user_id, attraction, attractionstr, visittime, islink) 
                            values(%s, %s, %s, %s, %s);
                    """
                cursor.execute(insert_sql,
                               (id, classroute[i], node_dict[classroute[i]],
                                routetime[i], True))
            insert_sql = """
                        insert into public.ml_train_set(user_id, attraction, attractionstr, visittime, islink) 
                            values(%s, %s, %s, %s, %s);
            """
            cursor.execute(insert_sql,
                           (id, classroute[-1], node_dict[classroute[-1]],
                            routetime[-1], True))
            left_nodes = set(all_nodes) - set(classroute)
            neg_examples = random.sample(left_nodes, 2)
            for neg in neg_examples:
                cursor.execute(insert_sql,
                               (id, neg, node_dict[neg], None, False))

        conn.commit()

    # 从test_set中读数据 加入ml_test_set和ml_graph_set中
    sql = "select * from public.test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    for row in rows:
        classroute = row[2]
        classroutestr = row[3]
        routetime = row[4]
        id = row[7]
        for i in range(len(classroute) - 1):
            insert_sql = """
                        insert into public.ml_graph_set(user_id, attraction, attractionstr, visittime, islink) 
                        values(%s, %s, %s, %s, %s);
                """
            cursor.execute(insert_sql,
                           (id, classroute[i], node_dict[classroute[i]],
                            routetime[i], True))
        insert_sql = """
                    insert into public.ml_test_set(user_id, attraction, attractionstr, visittime, islink) 
                        values(%s, %s, %s, %s, %s);
        """
        cursor.execute(insert_sql,
                       (id, classroute[-1], node_dict[classroute[-1]],
                        routetime[-1], True))
        left_nodes = set(all_nodes) - set(classroute)
        neg_examples = random.sample(left_nodes, 2)
        for neg in neg_examples:
            cursor.execute(insert_sql, (id, neg, node_dict[neg], None, False))

        conn.commit()

    cursor.close()
    conn.close()
Exemplo n.º 15
0
def recommend_list(route_length=3, coeffs=[]):
    """

    :param route_length:
    :return:
    """
    nodes_list = gen_nodes_list()
    matrix = init_matrix()

    conn = get_conn()
    cursor = conn.cursor()

    if route_length == 3:
        final_matrix = (matrix ** 3)
    elif route_length == 5:
        if coeffs:
            final_matrix = (matrix ** 3) + (matrix ** 5) / coeffs[0]
        else:
            final_matrix = (matrix ** 3) + (matrix ** 5) / 20
    elif route_length == 7:
        if coeffs:
            final_matrix = (matrix ** 3) + (matrix ** 5) / coeffs[0] + (matrix ** 7) / coeffs[1]
        else:
            final_matrix = (matrix ** 3) + (matrix ** 5) / 120 + (matrix ** 7) / 5040
    else:
        print("暂时没有对应的公式")
        return

    sql = "select * from public.test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    file_name = "cn_route_" + str(route_length) + ".txt"
    f = open(file_name, "w", encoding="utf-8")

    # 所有的景点
    a_nodes = list(get_node_id_dict().keys())
    # 用于记录所有测试案例的推荐结果
    record_result = dict()
    for row in rows:
        classroute = row[2]
        user_id = VName(row[7])
        user_index = nodes_list.index(user_id)

        # 剩余的 待推荐景点
        left_nodes = list(set(a_nodes) - set(classroute[:-1]))
        result_dict = dict()
        for node in left_nodes:
            node = AName(node)
            att_index = nodes_list.index(node)

            imag_coeff = final_matrix[user_index, att_index].imag

            if imag_coeff == 0.0:
                continue
            else:
                result_dict[node] = imag_coeff

        # 对结果进行排序
        result_dict = dict(sorted(result_dict.items(), key=lambda x:x[1], reverse=True))
        # 记录推荐结果
        f.write("%s\t%s\t%s\t%s\n" % (user_id, classroute[:-1], AName(classroute[-1]), json.dumps(result_dict)))
        record_result[user_id] = {"user_id":user_id, "classroute":classroute[-1], "answer":AName(classroute[-1]),
                                  "recommend":result_dict}
    return record_result
Exemplo n.º 16
0
def write_train_feature(func, have_sd=0):
    """
    生成训练集特征
    并写入文件
    :param func:
    :param have_sd: 间接特征 是否含有最短距离
    :return:
    """
    if have_sd:
        file_name = func.__name__ + "_has_sd_train.csv"
    else:
        file_name = func.__name__ + "_train.csv"

    # 读取数据
    conn = get_conn()
    cursor = conn.cursor()

    # 二分网络
    graph = init_graph()
    print("构建二分网络完成")
    print(time.time())
    # 进行投影
    v_nodes = get_v_nodes()
    a_nodes = get_a_nodes()
    prjv_graph = project(graph, v_nodes)
    prja_graph = project(graph, a_nodes)
    print("网络投影完成")
    print(time.time())

    sql = "select * from public.ml_train_set"
    cursor.execute(sql)
    rows = cursor.fetchall()
    print("len_rows:" + str(len(rows)))

    if func.__name__ == "extract_direct":
        title = ["label", "snv", "sna", "cn", "jc", "aa", "pa", "sd"]
    elif func.__name__ == "extract_indirect":
        if have_sd:
            title = [
                "label", "snv", "sna", "cn", "jc", "aa", "pa", "sd", "prj_cnv",
                "prj_cna", "prj_jcv", "prj_jca", "prj_aav", "prj_aaa",
                "prj_pav", "prj_paa", "prj_sdv", "prj_sda"
            ]
        else:
            title = [
                "label", "snv", "sna", "cn", "jc", "aa", "pa", "sd", "prj_cnv",
                "prj_cna", "prj_jcv", "prj_jca", "prj_aav", "prj_aaa",
                "prj_pav", "prj_paa"
            ]
    else:
        print("函数错误")
        return

    i = 0
    train_f = []
    for row in rows:
        i += 1
        print(i)

        user_id = VName(row[0])
        att_id = AName(row[1])

        if func.__name__ == "extract_direct":
            feature = func(graph, user_id, att_id)
        elif func.__name__ == "extract_indirect":
            feature = func(graph, prjv_graph, prja_graph, user_id, att_id,
                           have_sd)
        else:
            print("函数错误")

        if row[4]:
            line = [1]
            line.extend(feature)
            train_f.append(line)
        else:
            line = [0]
            line.extend(feature)
            train_f.append(line)

    # 写入到csv文件中
    df = pd.DataFrame(train_f, columns=title)
    df.to_csv(file_name, encoding="utf-8")

    print("训练特征保存完成")
Exemplo n.º 17
0
def write_test_feature(func, has_sd=0):
    """
    保存测试集的特征
    :param func:
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    if has_sd:
        file_name = func.__name__ + "_has_sd_test.csv"
    else:
        file_name = func.__name__ + "_test.csv"

    if func.__name__ == "extract_direct":
        title = ["anode", "snv", "sna", "cn", "jc", "aa", "pa", "sd"]
    elif func.__name__ == "extract_indirect":
        if has_sd:
            title = [
                "anode", "snv", "sna", "cn", "jc", "aa", "pa", "sd", "prj_cnv",
                "prj_cna", "prj_jcv", "prj_jca", "prj_aav", "prj_aaa",
                "prj_pav", "prj_paa", "prj_sdv", "prj_sda"
            ]
        else:
            title = [
                "anode", "snv", "sna", "cn", "jc", "aa", "pa", "sd", "prj_cnv",
                "prj_cna", "prj_jcv", "prj_jca", "prj_aav", "prj_aaa",
                "prj_pav", "prj_paa"
            ]
    else:
        print("函数错误")
        return

    # 读数据
    sql = "select * from public.ml_test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    # 二分网络
    graph = init_graph()
    print("构建二分网络完成")
    print(time.time())
    # 进行投影
    v_nodes = get_v_nodes()
    a_nodes = get_a_nodes()
    prjv_graph = project(graph, v_nodes)
    prja_graph = project(graph, a_nodes)
    print("投影完成")
    print(time.time())
    # 所有的景点
    a_nodes = list(get_node_id_dict().keys())

    test_f = []
    i = 0
    for row in rows:
        print(i)
        i += 1

        user_id = VName(row[0])
        att_id = AName(row[1])
        is_link = row[4]

        if not is_link:
            continue

        sql = "select classroute from public.route_0320 where id={user_id}".format(
            user_id=row[0])
        cursor.execute(sql)
        result = cursor.fetchone()
        classroute = result[0]

        # 待预测的集合
        left_set = set(a_nodes) - set(classroute[0:-1])

        for anode in left_set:
            anode = AName(anode)
            if func.__name__ == "extract_direct":
                feature = func(graph, user_id, anode)
            elif func.__name__ == "extract_indirect":
                feature = func(graph, prjv_graph, prja_graph, user_id, anode,
                               has_sd)
            else:
                print("函数名错误")

            line = [anode]
            line.extend(feature)
            test_f.append(line)

    # 写入到csv文件中
    df = pd.DataFrame(test_f, columns=title)
    df.to_csv(file_name, encoding="utf-8")

    print("测试特征保存完成")
Exemplo n.º 18
0
def correct_data():
    conn = get_conn()
    cursor = conn.cursor()

    select_sql = r"select * from public.route"
    cursor.execute(select_sql)
    rows = cursor.fetchall()

    for row in rows:
        line = dict()
        line["id_base64"] = row[0]
        line["route"] = row[1]
        line["classroute"] = row[2]
        line["classroutestr"] = row[3]
        line["routetime"] = row[4]
        line["starttime"] = row[5]
        line["endtime"] = row[6]
        line["id"] = row[7]
        line["route_length"] = row[8]

        # 轨迹为空 跳过
        if not line["classroute"]:
            continue

        classroute = [line["classroute"][0]]
        classroutestr = [line["classroutestr"][0]]
        routetime = [line["routetime"][0]]

        for i in range(len(line["classroute"]) - 1):
            if line["classroute"][i + 1] not in classroute:
                classroute.append(line["classroute"][i + 1])
                classroutestr.append((line["classroutestr"][i + 1]))
                routetime.append(line["routetime"][i + 1])
            else:
                for j in range(len(classroute)):
                    if classroute[j] == line["classroute"][i + 1]:
                        index = j
                if is_same_day(routetime[index], line["routetime"][i + 1]):
                    continue
                else:
                    classroute.append(line["classroute"][i + 1])
                    classroutestr.append((line["classroutestr"][i + 1]))
                    routetime.append(line["routetime"][i + 1])

        line["classroute"] = classroute
        line["classroutestr"] = classroutestr
        line["routetime"] = routetime

        insert_sql = """
                    insert into public.route_0320(id_base64,route, classroute, classroutestr, routetime, starttime,
                            endtime, id, route_length) values(%s, %s, %s, %s, %s, %s, %s, %s, %s);
        """

        cursor.execute(
            insert_sql,
            (line["id_base64"], line["route"], line["classroute"],
             line["classroutestr"], line["routetime"], line["starttime"],
             line["endtime"], line["id"], line["route_length"]))
        conn.commit()

    cursor.close()
    conn.close()
Exemplo n.º 19
0
#! usr/bin/env python3
# -*- coding:utf-8 -*-
import sys
sys.path.append("../")
from postgresql import get_conn, get_node_id_dict

if __name__=="__main__":
    conn = get_conn()
    cursor = conn.cursor()

    print(get_node_id_dict())
Exemplo n.º 20
0
def form_data():
    """
    初始化数据 填充item_user和user_dict
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    # 从数据库中读取数据
    sql = "select * from public.train_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    # 分别是物品表和用户表
    item_user = dict()
    user_dict = dict()

    for row in rows:
        user_id = row[7]
        classroute = row[2]

        user_dict[user_id] = dict()

        for att in classroute:
            if att in user_dict[user_id].keys():
                user_dict[user_id][att] += 1
            else:
                user_dict[user_id][att] = 1

            if att not in item_user.keys():
                item_user[att] = {user_id: 1}
            else:
                if user_id in item_user[att].keys():
                    item_user[att][user_id] += 1
                else:
                    item_user[att][user_id] = 1

    # 从test_set中读取数据 不记录路径的最后一个
    sql = "select * from public.test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()
    for row in rows:
        user_id = row[7]
        classroute = row[2]

        user_dict[user_id] = dict()

        for att in classroute[:-1]:
            if att in user_dict[user_id].keys():
                user_dict[user_id][att] += 1
            else:
                user_dict[user_id][att] = 1

            if att not in item_user.keys():
                item_user[att] = {user_id: 1}
            else:
                if user_id in item_user[att].keys():
                    item_user[att][user_id] += 1
                else:
                    item_user[att][user_id] = 1

    cursor.close()
    conn.close()
    return user_dict, item_user
Exemplo n.º 21
0
def train(extract_fun):
    """
    训练模型
    :param: extract_fun
    :return:
    """
    # 读取数据
    conn = get_conn()
    cursor = conn.cursor()

    # 二分网络
    graph = init_graph()
    print("构建二分网络完成")
    print(time.time())
    # 进行投影
    v_nodes = get_v_nodes()
    a_nodes = get_a_nodes()
    prjv_graph = project(graph, v_nodes)
    prja_graph = project(graph, a_nodes)
    print("网络投影完成")
    print(time.time())

    sql = "select * from public.ml_train_set"
    cursor.execute(sql)
    rows = cursor.fetchall()
    print(len(rows))

    # 保存训练数据
    X_list = list()
    Y_list = list()

    i = 0
    for row in rows:
        user_id = VName(row[0])
        att_id = AName(row[1])

        i += 1
        print(i)
        # print(time.time())

        if extract_fun.__name__ == "extract_direct":
            feature = extract_direct(graph, user_id, att_id)
        elif extract_fun.__name__ == "extract_indirect":
            feature = extract_indirect(graph, prjv_graph, prja_graph, user_id,
                                       att_id)
            # print(feature)
        else:
            print("wrong function")
            break
        X_list.append(feature)
        if row[4]:
            Y_list.append(1)
        else:
            Y_list.append(-1)
    print("生成训练数据")
    print(time.time())

    cursor.close()
    conn.close()

    # 记录X_list, Y_list
    f = open("param_" + extract_fun.__name__ + ".txt", "w", encoding="utf-8")
    f.writelines(json.dumps(X_list) + "\n")
    f.writelines(json.dumps(Y_list) + "\n")
    f.close()
    print("训练数据保存成功")
    print(time.time())

    clf = svm.SVC(kernel="linear")
    clf.fit(X_list, Y_list)
    print("训练数据结束")
    print(time.time())

    joblib.dump(clf, "model_" + extract_fun.__name__ + ".pkl")
    print("保存模型")
    print(time.time())
Exemplo n.º 22
0
def recommend_test(extract_fun, tuned_params):
    """
    根据GridSearchCV求得的参数  检验调参结果
    :param tuned_params:
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    # 读数据
    sql = "select * from public.ml_test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    # 构建模型
    clf = svm.SVC(kernel=tuned_params["kernel"],
                  C=tuned_params["C"],
                  gamma=tuned_params["gamma"])
    f = open("param_" + extract_fun.__name__ + ".txt", "r", encoding="utf-8")
    x_list = f.readline()
    x_list.split()
    y_list = f.readline()
    y_list.split()
    f.close()
    x_list = json.loads(x_list)
    y_list = json.loads(y_list)
    clf.fit(x_list, y_list)

    # 二分网络
    graph = init_graph()
    print("构建二分网络完成")
    print(time.time())
    # 进行投影
    v_nodes = get_v_nodes()
    a_nodes = get_a_nodes()
    prjv_graph = project(graph, v_nodes)
    prja_graph = project(graph, a_nodes)
    print("投影完成")
    print(time.time())
    # 所有的景点
    a_nodes = list(get_node_id_dict().keys())

    # 记录结果数据
    f = open("recommend_" + extract_fun.__name__ + "_" +
             tuned_params["kernel"] + "_C" + str(tuned_params["C"]) +
             "_gamma" + str(tuned_params["gamma"]) + ".txt",
             "w",
             encoding="utf-8")

    i = 0
    for row in rows:
        i += 1
        print(i)
        user_id = VName(row[0])
        att_id = AName(row[1])
        is_link = row[4]

        if not is_link:
            continue

        sql = "select classroute from public.route_0320 where id={user_id}".format(
            user_id=row[0])
        cursor.execute(sql)
        result = cursor.fetchone()
        classroute = result[0]

        # 待预测的集合
        left_set = set(a_nodes) - set(classroute[0:-1])

        recommendation = dict()
        for anode in left_set:
            anode = AName(anode)
            if extract_fun.__name__ == "extract_direct":
                feature = extract_direct(graph, user_id, anode)
            elif extract_fun.__name__ == "extract_indirect":
                feature = extract_indirect(graph, prjv_graph, prja_graph,
                                           user_id, anode)

            result = clf.predict([feature])[0]
            dis = abs(clf.decision_function([feature]))
            if result == 1:
                recommendation[anode] = dis[0]

        recommendation = dict(
            sorted(recommendation.items(), key=lambda x: x[1], reverse=True))
        f.write("%s\t%s\t%s\t%s\n" %
                (user_id, classroute[:-1], att_id, json.dumps(recommendation)))

    f.close()
    cursor.close()
    conn.close()
Exemplo n.º 23
0
def recommend_list(extract_fun):
    """
    利用之前生成的模型 进行推荐
    :param extract_fun:
    :return:
    """
    conn = get_conn()
    cursor = conn.cursor()

    # 读数据
    sql = "select * from public.ml_test_set"
    cursor.execute(sql)
    rows = cursor.fetchall()

    # 读模型
    clf = joblib.load("model_" + extract_fun.__name__ + ".pkl")

    # 二分网络
    graph = init_graph()
    print("构建二分网络完成")
    print(time.time())
    # 进行投影
    v_nodes = get_v_nodes()
    a_nodes = get_a_nodes()
    prjv_graph = project(graph, v_nodes)
    prja_graph = project(graph, a_nodes)
    print("投影完成")
    print(time.time())
    # 所有的景点
    a_nodes = list(get_node_id_dict().keys())

    # 记录结果数据
    f = open("recommend_" + extract_fun.__name__ + ".txt",
             "w",
             encoding="utf-8")

    for row in rows:
        user_id = VName(row[0])
        att_id = AName(row[1])
        is_link = row[4]

        if not is_link:
            continue

        sql = "select classroute from public.route_0320 where id={user_id}".format(
            user_id=row[0])
        cursor.execute(sql)
        result = cursor.fetchone()
        classroute = result[0]

        # 待预测的集合
        left_set = set(a_nodes) - set(classroute[0:-1])

        recommendation = dict()
        for anode in left_set:
            anode = AName(anode)
            if extract_fun.__name__ == "extract_direct":
                feature = extract_direct(graph, user_id, anode)
            elif extract_fun.__name__ == "extract_indirect":
                feature = extract_indirect(graph, prjv_graph, prja_graph,
                                           user_id, anode)

            result = clf.predict([feature])[0]
            dis = abs(clf.decision_function([feature]))
            if result == 1:
                recommendation[anode] = dis[0]

        recommendation = dict(
            sorted(recommendation.items(), key=lambda x: x[1], reverse=True))
        f.write("%s\t%s\t%s\t%s\n" %
                (user_id, classroute[:-1], att_id, json.dumps(recommendation)))

    f.close()
    cursor.close()
    conn.close()
Exemplo n.º 24
0
def recommend(test_file, model_path, save_file):
    """
    根据上面生成的模型
    进行预测推荐
    :param model_path:
    :return:
    """
    # 加载模型
    model = xgb.Booster()
    model.load_model(model_path)
    # print(dir(model))

    # 构造训练集数据
    test_data = pd.read_csv(test_file)
    anode_list = list(test_data["anode"])
    test_set = test_data.drop("anode", axis=1)
    # xgb矩阵赋值
    xgb_test = xgb.DMatrix(test_set)

    # 进行预测
    preds = model.predict(xgb_test)

    conn = get_conn()
    cursor = conn.cursor()
    sql = "select * from public.ml_test_set "
    cursor.execute(sql)
    rows = cursor.fetchall()

    # 所有的景区节点
    a_nodes = list(get_node_id_dict().keys())

    index = 0
    f = open(save_file, "w", encoding="utf-8")
    for row in rows:
        user_id = VName(row[0])
        att_id = AName(row[1])
        is_link = row[4]

        # 对于测试的负案例 直接跳过
        if not is_link:
            continue

        sql = "select classroute from public.route_0320 where id={user_id}".format(user_id=row[0])
        cursor.execute(sql)
        result = cursor.fetchone()
        classroute = result[0]

        # 待预测的集合
        left_set = set(a_nodes) - set(classroute[0:-1])
        n = len(left_set)

        recommend_list = dict()
        for i in range(n):
            recommend_list[anode_list[index+i]] = preds[index+i]
        index += n

        recommend_list = dict(sorted(recommend_list.items(), key=lambda x:x[1], reverse=True))
        f.write("%s\t%s\t%s\t%s\n" % (user_id, classroute[:-1], att_id, json.dumps(recommend_list, cls=MyEncoder)))

    f.close()
    cursor.close()
    conn.close()