Exemplo n.º 1
0
def cal_mcs(pkl_dir, mcs_dir, is_front, key_word, lap=1):
    f_list = util.get_file_list(pkl_dir, '.pkl')
    os.chdir(pkl_dir)
    # 升序排序
    nw_list = sorted(f_list)

    record_list = []
    num_list = []
    enum_list = []
    ii = len(nw_list) - 1
    # g2是2号 g1是1号,此处获取最末端的网络
    g2 = util.get_nw(nw_list[ii])

    # 迭代生成子图
    k = 1
    while k < lap:
        g2 = mcs(g2, util.get_nw(nw_list[ii - k]))
        k += 1

    while ii > 0:
        jj = ii
        ii -= lap
        # print(nw_list[ii])

        g1 = util.get_nw(nw_list[ii])
        # 迭代生成子图
        k = 1
        while k < lap:
            g1 = mcs(g1, util.get_nw(nw_list[ii - k]))
            k += 1

        # 生成连通子图
        g1 = mcs(g2, g1)

        # 生成文件名字
        filename = nw_list[ii][0:-4] + '-' + nw_list[jj][0:-4] + '.pkl'

        # 2016.9.20 用于测试,保存结果
        util.save_nw(
            g1, 'D://semantic analysis//nalyze_data//result//过程结果//连通子图//' +
            filename)

        if is_front:
            # 计算比例,1,2  跟1比
            pr = mcs_ratio(g1, g1, key_word)
            record_list.append(nw_list[jj][0:-4] + '\t' + str(pr))
        else:
            # 计算比例,1,2  跟2比
            pr = mcs_ratio(g1, g2, key_word)
            record_list.append(nw_list[jj][0:-4] + '\t' + str(pr))

        num_list.append(nw_list[jj][0:-4] + '\t' + str(g1.number_of_nodes()))
        enum_list.append(nw_list[jj][0:-4] + '\t' + str(g1.number_of_edges()))

        # 统计节点数
        # with open(mcs_dir + filename[0:-4]+'.txt','w',encoding='utf-8') as file:
        #     for node in g1.nodes():
        #         file.write(node+'\n')
        # util.save_nw(g1,mcs_dir + filename)
        g2 = g1
Exemplo n.º 2
0
def main():
    # 设置结果保存的目录
    result_dir = r'D:\semantic analysis\新结果\共现网络//'
    txt_dir = r"D:\semantic analysis\新纯文本\1常用词//"
    # k_list = util.get_key_list()
    # k_list = ['不约而同', '喜闻乐见', '努力', '感觉', '简单', '无聊', '希望', '美好']
    # 中心词
    k_list = ['美好']
    # 结巴分词词典的目录
    # jieba.set_dictionary("D:\semantic analysis\分词\词库\导出结果\dict1.txt")
    # jieba.initialize()
    pynlpir.open()
    for key in k_list:
        print(key)
        pynlpir.nlpir.AddUserWord(c_char_p(key.encode()))

    for key in k_list:
        print(key)
        # 文件目录
        file_list = util.get_file_list(txt_dir + key, ".txt")
        # 建立目录
        mk_dir(result_dir + key)
        # mk_dir(result_dir+key+'//w')
        mk_dir(result_dir + key + '//p')

        for n_file in file_list:
            s_list = util.get_list_from_file(txt_dir + key + "//" + n_file)
            # 过滤相同的语句,防止重复计算
            print(len(s_list))
            s_list = list(set(s_list))
            print(len(s_list))

            # 生成所有句子的网络
            # ps_list, mn, pps_list,pmn = create_matrix(s_list,key)
            pps_list, pmn = create_matrix(s_list, key)

            pkl_name = n_file[:-4] + '.pkl'

            # for w_list in ps_list:
            #     # 创建整句话的网络
            #     mn.add_edges(w_list)
            # util.save_nw(mn.get_network(), result_dir+key+'//w//' + pkl_name)

            for w_list in pps_list:
                # pmn.add_edges(w_list)
                pmn.add_gram_edges(w_list)
            util.save_nw(pmn.get_network(),
                         result_dir + key + '//p//' + pkl_name)

            print(n_file)
            print(
                time.strftime('%Y-%m-%d %H:%M:%S',
                              time.localtime(time.time())))

            with open(result_dir + key + '//record.txt', 'a',
                      encoding='utf-8') as rf:
                rf.write(n_file + '\n')
    pynlpir.close()
Exemplo n.º 3
0
def main():
    k_list = util.get_key_list()
    jieba.set_dictionary("D:\semantic analysis\分词\词库\导出结果\dict1.txt")
    jieba.initialize()
    for key in k_list:
        print(key)
        file_list = util.get_file_list(
            'D://semantic analysis//analyze_data//fc//' + key, ".txt")
        # 建立目录
        mk_dir('./w')
        mk_dir('./p')

        for n_file in file_list:
            s_list = util.get_list_from_file(n_file)
            # 过滤相同的语句,防止重复计算
            print(len(s_list))
            s_list = list(set(s_list))
            print(len(s_list))
            wg = nx.Graph()
            pg = nx.Graph()

            for sentence in s_list:
                # 创建整句话的网络
                ll = util.input_filer(sentence)
                wg = add_s2g(wg, ' '.join(ll))

                # 只创建关键词所在的分句的网络
                for ss in ll:
                    if (key in ss):
                        pg = add_s2g(pg, ss)

            pkl_name = n_file[:-4] + '.pkl'
            util.save_nw(pg, './/p//' + pkl_name)
            util.save_nw(wg, './/w//' + pkl_name)

            print(n_file)
            print(
                time.strftime('%Y-%m-%d %H:%M:%S',
                              time.localtime(time.time())))

            with open('record.txt', 'a', encoding='utf-8') as rf:
                rf.write(n_file + '\n')
Exemplo n.º 4
0
def cal_node_mcs(pkl_dir, mcs_dir, key_word, lap=2):
    f_list = util.get_file_list(pkl_dir, '.pkl')
    os.chdir(pkl_dir)
    # 升序排序
    nw_list = sorted(f_list)
    record_list = []
    num_list = []
    enum_list = []
    ii = len(nw_list) - 1

    while (ii - lap + 1) >= 0:
        # print(nw_list[ii])
        g1 = util.get_nw(nw_list[ii])
        # 迭代生成子图
        k = 1
        while k < lap:
            g1 = mcs(g1, util.get_nw(nw_list[ii - k]))
            k += 1

        # 生成文件名字
        filename = nw_list[ii][0:-4] + '.pkl'

        # 保存结果
        pkl_dir = r"D:\semantic analysis\公共子图节点数\新词\30公共子图//" + key_word + "//"
        util.create_directory(pkl_dir)
        util.save_nw(g1, pkl_dir + nw_list[ii][0:-4])

        num_list.append(nw_list[ii][0:-4] + '\t' + str(g1.number_of_nodes()))
        enum_list.append(nw_list[ii][0:-4] + '\t' + str(g1.number_of_edges()))

        # 统计节点数
        # with open(mcs_dir + filename[0:-4]+'.txt','w',encoding='utf-8') as file:
        #     for node in g1.nodes():
        #         file.write(node+'\n')
        # util.save_nw(g1,mcs_dir + filename)

        ii -= lap

    # util.save_file(mcs_dir + key_word+'mcs.txt', record_list)
    util.save_file(mcs_dir + 'n' + key_word + 'mcs.txt', num_list)
    util.save_file(mcs_dir + 'e' + key_word + 'mcs.txt', enum_list)
Exemplo n.º 5
0
import tool.util as util
import os

key_list = util.get_key_list2()

for keyword in key_list:
    print(keyword)
    dirr = 'D:\semantic analysis\常用词的分词集合\\' + keyword
    os.chdir(dirr)
    pkl_list = util.get_file_list(dirr, '.pkl')
    pkl_list = sorted(pkl_list)
    util.create_directory(r"D:\semantic analysis\常用词的分词集合1//" + keyword)
    i = 0
    s = util.get_nw(pkl_list[0])
    while i < len(pkl_list) - 1:
        s1 = util.get_nw(pkl_list[i + 1])
        util.save_nw(
            s & s1,
            r"D:\semantic analysis\常用词的分词集合1//" + keyword + "//" + pkl_list[i])
        s = s1
        i += 1
Exemplo n.º 6
0
import tool.util as util
import os

# key_list = util.get_key_list2()
#
# for keyword in key_list:
#     print(keyword)
#     dirr = 'D:\semantic analysis\分词网络\pNet2\\' + keyword + '//p//'
#     os.chdir(dirr)
#     pkl_list = util.get_file_list(dirr, '.pkl')
#     util.create_directory(r"D:\semantic analysis\常用词的分词集合//"+keyword)
#     for pkl in pkl_list:
#         g = util.get_nw(pkl)
#         s = set(g.nodes())
#         util.save_nw(s,r"D:\semantic analysis\常用词的分词集合//"+keyword+"//"+pkl)



# 测试数据
util.save_nw(set(["我们","你们","他们","怎么","天气","很好","哈哈"]),r"D:\semantic analysis\测试//1.pkl")
util.save_nw(set(["基本","数据","文章","集合","天气","很好","哈哈"]),r"D:\semantic analysis\测试//2.pkl")
util.save_nw(set(["重复","你们","他们","怎么","天气","消除","元素"]),r"D:\semantic analysis\测试//3.pkl")
util.save_nw(set(["我们","排序","他们","转换","类型","很好","哈哈"]),r"D:\semantic analysis\测试//4.pkl")
util.save_nw(set(["危机","为何","他们","转换","不会","友情","哈哈"]),r"D:\semantic analysis\测试//5.pkl")
import tool.util as util
import os

key_list = util.get_key_list2()

for keyword in key_list:
    print(keyword)
    dirr = 'D:\semantic analysis\常用词的分词集合\\' + keyword
    os.chdir(dirr)
    pkl_list = util.get_file_list(dirr, '.pkl')
    pkl_list = sorted(pkl_list)
    util.create_directory(r"D:\semantic analysis\2016-10-05结果//" + keyword)
    i = 0
    s = util.get_nw(pkl_list[0])
    while i < len(pkl_list):
        s1 = util.get_nw(pkl_list[i]) | s
        s2 = s1 - s
        print(len(s2))
        util.save_nw(
            s2, r"D:\semantic analysis\2016-10-05结果//" + keyword + "//" +
            pkl_list[i])
        s = s1
        i += 1