Exemplo n.º 1
0
def test():
    file = "testRelationRaw.txt"
    cnt = 1
    sql_template = 'update freebase set rank_value="{}", trsut="{}", max_trust_relation="{}" where id={}'
    conn, cursor = init()
    with open(file, 'r') as data:
        lines = data.readlines()
        print(len(lines))
        for line in lines:
            contents = line.split('\t')
            triplet = contents[0]
            real_relation = contents[1]
            max_trust_relation = contents[2]
            rank = contents[3]
            trust = contents[4]
            print(triplet)
            print(real_relation)
            print(max_trust_relation)
            print(rank)
            print(trust)
            sql = sql_template.format(rank, str(1 - float(trust)),
                                      max_trust_relation, cnt)
            print(sql)
            cursor.execute(sql)
            conn.commit()
            cnt += 1
    print(cnt)
Exemplo n.º 2
0
def get_entity_name(entity_id):
    sql = "select * from freebase_entity_id where entity_id=" + entity_id
    conn, cursor = init()  # 初始化mysql
    cursor.execute(sql)
    result = cursor.fetchall()
    if len(result) == 0:
        return ""
    return result[0][1]
Exemplo n.º 3
0
def get_weibo_user_follow(user_id=''):
    if user_id == '':
        return {'message': '请输入user_id'}
    conn, cursor = init()
    cursor.execute(user_follow_template.format(user_id))
    results = cursor.fetchall()
    if len(results) == 0:
        return {'message': '该用户关注的人为0'}
    for row in results:
        pass
Exemplo n.º 4
0
def advogato_data_KG_source(kind, entity, level):
    source_sql = source_template
    if kind == 'freebase':
        source_sql = "select * from freebase where source_entity='{}'"
    conn, cursor = init()
    sql = source_sql.format(entity)
    entity01 = {}
    cursor.execute(sql)
    results = cursor.fetchall()
    per_level_nodes = 0
    contents = {}
    contents["type"] = "force"  # 属性 type
    no_repeat_categories = ['指定实体', '关联实体']
    category_to_int = {"指定实体":0, "关联实体":1}
    print(len(results))
    print(no_repeat_categories)
    categories = []
    for category in no_repeat_categories:
        node = {'name': category, 'keyword': {}, 'base': category}
        categories.append(node)
    contents["categories"] = categories
    print(contents)
    nodes = []
    links = []
    real_nodes = []
    for row in results:
        source_node = row[1]
        target_node = row[2]
        entity01[source_node] = 1  # 表示已经查询过了
        entity01[target_node] = 0  # 表示还未查询
        if row[1] not in real_nodes:
            real_nodes.append(row[1])
            if row[1] == entity:
                category = "指定实体"
            else:
                category = "关联实体"
            node = {'name': row[1], 'value': str(row[1]), 'category': category_to_int[category]}
            nodes.append(node)
        if row[2] not in real_nodes:
            real_nodes.append(row[2])
            if row[2] == entity:
                category = "指定实体"
            else:
                category = "关联实体"
            node = {'name': row[2], 'value': str(row[2]), 'category': category_to_int[category]}
            nodes.append(node)
        link = {'source': real_nodes.index(row[1]), 'target': real_nodes.index(row[2]), 'value': row[3]}
        links.append(link)
        per_level_nodes += 1
        if per_level_nodes > max_nodes_per_level:
            break
    helper(entity01, nodes, level - 1, conn, cursor, links, category_to_int, real_nodes, source_template)
    contents['nodes'] = list(nodes)
    contents['links'] = list(links)
    return contents
Exemplo n.º 5
0
def advogato_data_KG_target(kind, entity):
    '''
    这个暂时没什么用
    :param kind:
    :param entity:
    :return:
    '''
    conn, cursor = init()
    if kind == 'source':
        sql = source_template.format(entity)
    else:
        sql = target_template.format(entity)
    cursor.execute(sql)
    results = cursor.fetchall()
    print(len(results))
    nodes = []  # 最终结果
    seconds = []
    cnt = 0
    for row in results:
        node = {'source': row[1], 'target': row[2], 'type': "resolved", 'rela': row[3]}
        nodes.append(node)
        seconds.append(row[2])
        cnt += 1
        if cnt > max_nodes_per_level:
            break
    thirds = []
    cnt = 0
    for source in seconds:
        sql = source_template.format(source)
        cursor.execute(sql)
        results = cursor.fetchall()
        for row in results:
            node = {'source': row[1], 'target': row[2], 'type': "resolved", 'rela': row[3]}
            nodes.append(node)
            thirds.append(row[2])
            cnt += 1
            if cnt > max_nodes_per_level:
                break
    cnt = 0
    for source in thirds:
        sql = source_template.format(source)
        cursor.execute(sql)
        results = cursor.fetchall()
        for row in results:
            node = {'source': row[1], 'target': row[2], 'type': "resolved", 'rela': row[3]}
            nodes.append(node)
            cnt += 1
            if cnt > max_nodes_per_level:
                break
    print(nodes)

    return nodes
Exemplo n.º 6
0
def load_entity2id_into_mysql(file=path + "/freebase/entity2id.txt", sp='\t'):
    with open(file, 'r') as data:
        lines = data.readlines()
        cnt = 0
        conn, cursor = init()  #初始化mysql
        sql = 'insert into freebase_entity_id (entity_name, entity_id) values ("{}","{}")'
        for line in lines:
            contents = line.split(sp)
            entity_name = contents[0]
            entity_id = contents[1]
            print(entity_name, entity_id)
            cnt = cnt + 1
            execute(cursor, conn, sql.format(entity_name, entity_id))
            print(sql.format(entity_name, entity_id))
        print(cnt)
Exemplo n.º 7
0
def get_weibo_user_profile(user_id=''):
    if user_id == '':
        return {'message': '请输入user_id'}
    conn, cursor = init()
    cursor.execute(user_profile_template.format(user_id))
    results = cursor.fetchall()
    if len(results) == 1:
        results = results[0]
    info = user_profile_info.format(
        results[1],
        results[2],
        results[3],
        results[4],
        results[5],
    )
    return info
Exemplo n.º 8
0
def load_into_mysql(file=path + "/freebase/test.txt", sp='\t'):
    with open(file, 'r') as data:
        lines = data.readlines()
        cnt = 0
        conn, cursor = init()  #初始化mysql
        sql = 'insert into freebase (source_entity, target_entity, relation) values ("{}","{}","{}")'
        for line in lines:
            contents = line.split(sp)
            source = contents[0]
            target = contents[1]
            relation = contents[2]
            print(source, target, relation)
            cnt = cnt + 1
            execute(cursor, conn, sql.format(source, target, relation))
            print(sql.format(source, target, relation))
        print(cnt)
Exemplo n.º 9
0
def load_data_into_mysql():
    fname = "advogato"
    cnt = 0
    content = "source = {}, target = {}, and relation = {}"
    conn, cursor = init()
    with open(fname, 'r') as file:
        for line in file:
            line = line.strip()
            combo = line.split(" ")
            source = combo[0]
            target = combo[1]
            relation = combo[2]
            if source == target:
                continue
            print(content.format(source, target, relation))
            sql = sql_template.format(source, target, relation)
            execute(cursor, conn, sql)
            cnt += 1
    print(cnt)
Exemplo n.º 10
0
def get_freebase_info(source, target):
    source_name = get_entity_name(source)
    target_name = get_entity_name(target)
    sql_template = "select * from freebase where source_entity='{}' and target_entity='{}'"
    conn, cursor = init()  # 初始化mysql
    print(sql_template.format(source_name, target_name))
    cursor.execute(sql_template.format(source_name, target_name))
    result = cursor.fetchall()
    if len(result) == 0:
        print("hahahahha")
        return {}
    infos = {}
    content = tuple(result[0])
    print("content:", content)
    infos['rank'] = content[4]
    infos['trust'] = content[5]
    infos['max_trust_relation'] = content[6]
    print(infos)
    return infos
Exemplo n.º 11
0
def generate_weibo_user_graph(user_id=''):
    if user_id == '':
        return {'message': '请输入user_id'}
    contents = {}
    contents["type"] = "force"
    ##  类别
    categories = []
    no_repeat_categories = [
        '关注数量', '粉丝', '微博内容数量', '原创微博', '转发微博', '微博发布地点', '微博发布时间', '微博点赞数',
        '微博转发数', '微博评论数', '微博发布工具', '关注', '微博检索用户'
    ]
    category_to_int = {
        '关注数量': 0,
        '粉丝': 1,
        '微博内容数量': 2,
        '原创微博': 3,
        '转发微博': 4,
        '微博发布地点': 5,
        '微博发布时间': 6,
        '微博点赞数': 7,
        '微博转发数': 8,
        '微博评论数': 9,
        '微博发布工具': 10,
        '关注': 11,
        '微博检索用户': 12
    }
    for category in no_repeat_categories:
        node = {'name': category, 'keyword': {}, 'base': category}
        categories.append(node)
    contents["categories"] = categories

    node_name_value = {}

    ### 节点nodes
    nodes = []
    links = []
    real_nodes = []
    ##  user profile
    conn, cursor = init()
    cursor.execute(user_profile_template.format(user_id))
    profile = cursor.fetchall()
    if len(profile) == 0:
        return "", ""
    if len(profile) == 1:
        profile = profile[0]

    node = {
        'name': profile[1],
        'value': profile[1],
        'category': category_to_int['微博检索用户']
    }
    nodes.append(node)
    real_nodes.append(profile[1])
    node_name_value[profile[1]] = profile[1]
    node_name_value['微博内容数量'] = profile[3]
    node = {
        'name': '微博内容数量',
        'value': profile[3],
        'category': category_to_int['微博内容数量']
    }
    nodes.append(node)
    node_name_value['关注数量'] = profile[4]
    node = {
        'name': '关注数量',
        'value': profile[4],
        'category': category_to_int['关注数量']
    }
    nodes.append(node)
    node_name_value['粉丝'] = profile[5]
    node = {
        'name': '粉丝',
        'value': profile[5],
        'category': category_to_int['粉丝']
    }
    nodes.append(node)
    real_nodes.append('微博内容数量')
    real_nodes.append('关注数量')
    real_nodes.append('粉丝')

    link = {
        'source': real_nodes.index(profile[1]),
        'target': real_nodes.index('关注数量'),
        'value': node_name_value['关注数量']
    }
    links.append(link)

    link = {
        'source': real_nodes.index(profile[1]),
        'target': real_nodes.index('粉丝'),
        'value': node_name_value['粉丝']
    }
    links.append(link)

    link = {
        'source': real_nodes.index(profile[1]),
        'target': real_nodes.index('微博内容数量'),
        'value': node_name_value['微博内容数量']
    }
    links.append(link)

    # user follow
    cursor.execute(user_follow_template.format(user_id))
    follows = cursor.fetchall()
    for follow in follows:
        follow_name = follow[3]
        follow_link = follow[4]
        node_name_value[follow_name] = follow_link
        node = {
            'name': follow_name,
            'value': follow_link,
            'category': category_to_int['关注']
        }
        nodes.append(node)
        real_nodes.append(follow_name)
        link = {
            'source': real_nodes.index('关注数量'),
            'target': real_nodes.index(follow_name),
            'value': node_name_value[follow_name]
        }
        links.append(link)

    # user contents
    cursor.execute(weibo_contents_template.format(user_id))
    weibo_contents = cursor.fetchall()
    cnt = 1
    for content in weibo_contents:
        text = content[3]
        postion = content[4]
        time = content[5]
        up = content[6]
        repost = content[7]
        comment = content[8]
        tool = content[9]
        real_nodes.append('微博内容' + str(cnt))
        node_name_value['微博内容' + str(cnt)] = text
        if '转发理由' in text:
            cate = '转发微博'
        else:
            cate = '原创微博'
        node = {
            'name': '微博内容' + str(cnt),
            'value': text,
            'category': category_to_int[cate]
        }
        nodes.append(node)
        link = {
            'source': real_nodes.index('微博内容数量'),
            'target': real_nodes.index('微博内容' + str(cnt)),
            'value': node_name_value['微博内容' + str(cnt)]
        }
        links.append(link)

        real_nodes.append('微博发布地点' + str(cnt))
        node_name_value['微博发布地点' + str(cnt)] = postion
        node = {
            'name': '微博发布地点' + str(cnt),
            'value': postion,
            'category': category_to_int['微博发布地点']
        }
        nodes.append(node)
        link = {
            'source': real_nodes.index('微博内容' + str(cnt)),
            'target': real_nodes.index('微博发布地点' + str(cnt)),
            'value': postion
        }
        links.append(link)

        real_nodes.append('微博发布时间' + str(cnt))
        node_name_value['微博发布时间' + str(cnt)] = time
        node = {
            'name': '微博发布时间' + str(cnt),
            'value': time,
            'category': category_to_int['微博发布时间']
        }
        nodes.append(node)
        link = {
            'source': real_nodes.index('微博内容' + str(cnt)),
            'target': real_nodes.index('微博发布时间' + str(cnt)),
            'value': time
        }
        links.append(link)

        real_nodes.append('微博点赞数' + str(cnt))
        node_name_value['微博点赞数' + str(cnt)] = up
        node = {
            'name': '微博点赞数' + str(cnt),
            'value': up,
            'category': category_to_int['微博点赞数']
        }
        nodes.append(node)
        link = {
            'source': real_nodes.index('微博内容' + str(cnt)),
            'target': real_nodes.index('微博点赞数' + str(cnt)),
            'value': up
        }
        links.append(link)

        real_nodes.append('微博转发数' + str(cnt))
        node_name_value['微博转发数' + str(cnt)] = repost
        node = {
            'name': '微博转发数' + str(cnt),
            'value': repost,
            'category': category_to_int['微博转发数']
        }
        nodes.append(node)
        link = {
            'source': real_nodes.index('微博内容' + str(cnt)),
            'target': real_nodes.index('微博转发数' + str(cnt)),
            'value': repost
        }
        links.append(link)

        real_nodes.append('微博评论数' + str(cnt))
        node_name_value['微博评论数' + str(cnt)] = comment
        node = {
            'name': '微博评论数' + str(cnt),
            'value': comment,
            'category': category_to_int['微博评论数']
        }
        nodes.append(node)
        link = {
            'source': real_nodes.index('微博内容' + str(cnt)),
            'target': real_nodes.index('微博评论数' + str(cnt)),
            'value': comment
        }
        links.append(link)

        real_nodes.append('微博发布工具' + str(cnt))
        node_name_value['微博发布工具' + str(cnt)] = tool
        node = {
            'name': '微博发布工具' + str(cnt),
            'value': tool,
            'category': category_to_int['微博发布工具']
        }
        nodes.append(node)
        link = {
            'source': real_nodes.index('微博内容' + str(cnt)),
            'target': real_nodes.index('微博发布工具' + str(cnt)),
            'value': tool
        }
        links.append(link)
        cnt += 1

    contents['nodes'] = nodes
    contents['links'] = links

    return contents, profile[1]
Exemplo n.º 12
0
            print(rank)
            print(trust)
            sql = sql_template.format(rank, str(1 - float(trust)),
                                      max_trust_relation, cnt)
            print(sql)
            cursor.execute(sql)
            conn.commit()
            cnt += 1
    print(cnt)


if __name__ == '__main__':
    file = "testRelationRaw.txt"
    cnt = 1
    sql_template = 'update freebase set rank_value="{}", trsut="{}", max_trust_relation="{}" where id={}'
    conn, cursor = init()
    with open(file, 'r') as data:
        lines = data.readlines()
        print(len(lines))
        for line in lines:
            contents = line.split('\t')
            triplet = contents[0]
            real_relation = contents[1]
            max_trust_relation = contents[2]
            rank = contents[3]
            trust = contents[4]
            print(triplet)
            print(real_relation)
            print(max_trust_relation)
            print(rank)
            print(trust)