示例#1
0
def loop_compare(keyword_list, pkl_dir1, txt_dir1, result_dir, mode=1, lap=1):
    for key in keyword_list:
        print(key)
        if mode == 0:
            util.create_directory(result_dir + key + "//")

        pkl_dir = pkl_dir1.format(key)
        txt_dir = txt_dir1.format(key)

        # 获取日期列表
        d_list = util.get_file_list(pkl_dir, '.pkl')
        d_list = [d.split(".")[0] for d in d_list]

        result_list = []
        # 升序排序
        d_list = sorted(d_list)
        ii = len(d_list) - 1

        while ii - lap >= 0:
            g1 = get_core_graph(pkl_dir + d_list[ii] + ".pkl")
            d1 = get_txt_dict(txt_dir + d_list[ii] + ".txt")

            # 迭代生成子图
            k = 1
            while k < lap:
                g1 = nx.compose(g1, util.get_nw(d_list[ii - k]))
                k += 1
            result_list.append(compare_function(d1, g1))
            ii -= lap
        util.save_file(result_dir + key + ".txt", result_list)
def main():
    # 设置结果保存的目录
    result_dir = r'D:\semantic analysis\2016-10-05结果\html标记分句2//'
    txt_dir = r"D:\semantic analysis\2016-10-05结果\新词分句//"
    set_dir = r"D:\semantic analysis\2016-10-05结果\新词//"

    k_list = util.get_key_list()

    for key in k_list:
        print(key)
        # 文件目录
        file_list = sorted(util.get_file_list(txt_dir + key, ".txt"))
        # 集合目录
        set_list = sorted(util.get_file_list(set_dir + key, ".pkl"))

        util.create_directory(result_dir + "新词//" + key + "//")

        i = 0
        while i < len(file_list):
            s_list = util.get_list_from_file(txt_dir + key + "//" +
                                             set_list[i][0:-4] + ".txt")
            new_word_list = util.get_nw(set_dir + key + "//" + set_list[i])
            # 过滤相同的语句,防止重复计算
            s_list = list(set(s_list))
            w_list = remark(s_list, new_word_list, key)
            html_name = file_list[i][:-4] + '.html'
            util.save_file(result_dir + "新词//" + key + "//" + html_name,
                           w_list)
            i += 1
def loop_compare(com_function,
                 keyword_list,
                 pkl_dir1,
                 result_dir,
                 mode=1,
                 lap=1,
                 type="pkl"):
    for key in keyword_list:
        global keyword
        keyword = key
        print(key)
        if mode == 0:
            util.create_directory(result_dir + key + "//")
        pkl_dir = pkl_dir1.format(key)
        f_list = util.get_file_list(pkl_dir, '.pkl')
        os.chdir(pkl_dir)
        result_list = []
        # 升序排序
        nw_list = sorted(f_list)
        ii = len(nw_list) - 1

        while ii - 2 * lap >= 0:
            g2 = util.get_nw(nw_list[ii])
            # 迭代生成子图
            # k = 1
            # while k < lap:
            #     g2 = nx.compose(g2, util.get_nw(nw_list[ii - k]))
            #     k += 1

            ii -= lap
            g1 = util.get_nw(nw_list[ii])
            # 迭代生成子图
            # k = 1
            # while k < lap:
            #     g1 = nx.compose(g1, util.get_nw(nw_list[ii - k]))
            #     k += 1

            # 生成连通子图
            # 相互比例
            if mode == 1:
                r1, r2 = com_function(copy.deepcopy(g1), copy.deepcopy(g2))
                result_list.append(nw_list[ii + lap][0:-4] + "\t" + str(r1))
                result_list.append((nw_list[ii][0:-4] + "\t" + str(r2)))
            # 一对一
            elif mode == 0:
                result_list = com_function(copy.deepcopy(g1),
                                           copy.deepcopy(g2))
                util.save_file(
                    result_dir + key + "//" + nw_list[ii + lap][0:-4] + ".txt",
                    result_list)
            # n对一
            elif mode == 2:
                r1 = com_function(copy.deepcopy(g1), copy.deepcopy(g2), type)
                result_list.append(nw_list[ii + lap][0:-4] + "\t" + str(r1))

            ii -= lap
        if mode != 0:
            result_list.reverse()
            util.save_file(result_dir + key + ".txt", result_list)
def main2():
    fir = "D:\semantic analysis\整理文本\正能量//"
    xml_list = util.get_file_list(fir, "txt")
    for xml in xml_list:
        w_list = set(util.get_list_from_file(fir + xml))
        r_list = remark(w_list, (['正能量']), None)
        html_name = xml[:-4] + '.html'
        util.save_file(fir + html_name, r_list)
def loop_compare(com_function,
                 keyword_list,
                 pkl_dir1,
                 result_dir,
                 mode=1,
                 lap=1,
                 type="pkl"):
    for key in keyword_list:
        print(key)
        if mode == 0:
            util.create_directory(result_dir + key + "//")
        pkl_dir = pkl_dir1.format(key)
        f_list = util.get_file_list(pkl_dir, '.txt')
        os.chdir(pkl_dir)
        result_list = []
        # 升序排序
        nw_list = sorted(f_list)
        ii = len(nw_list) - 1

        while ii - 2 * lap >= 0:
            g2 = util.txt2dict(util.get_list_from_file(nw_list[ii]))
            # 迭代生成子图
            k = 1
            while k < lap:
                g2 = nx.compose(g2, util.get_nw(nw_list[ii - k]))
                k += 1

            ii -= lap
            g1 = util.txt2dict(util.get_list_from_file(nw_list[ii]))
            d1 = util.get_nw(
                "D:\semantic analysis\新结果\去虚词去单字共现网络//{0}//p//".format(key) +
                nw_list[ii].split(".")[0] + ".pkl")
            # 迭代生成子图
            k = 1
            while k < lap:
                g1 = nx.compose(g1, util.get_nw(nw_list[ii - k]))
                k += 1

            # 生成连通子图
            if mode == 1:
                r1, r2 = com_function(copy.deepcopy(g1), copy.deepcopy(g2))
                result_list.append(nw_list[ii + lap][0:-4] + "\t" + str(r1))
                result_list.append((nw_list[ii][0:-4] + "\t" + str(r2)))
            elif mode == 0:
                result_list = com_function(copy.deepcopy(g1),
                                           copy.deepcopy(g2))
                util.save_file(
                    result_dir + key + "//" + nw_list[ii + lap][0:-4] + ".txt",
                    result_list)
            elif mode == 2:
                r1 = com_function(copy.deepcopy(g1), copy.deepcopy(g2), d1)
                # result_list.append(str(r1))
                result_list.append(nw_list[ii + lap][0:-4] + "\t" + str(r1))

            ii -= lap
        if mode != 0:
            result_list.reverse()
            util.save_file(result_dir + key + ".txt", result_list)
def main1():
    # date_list = ["2012-08-05","2011-04-05","2011-03-28","2011-10-20","2012-12-30","2011-07-30","2011-06-09","2012-02-05","2012-12-16","2011-08-01","2011-05-19","2013-09-01","2012-08-01","2013-12-01"]
    # key_list = ["吐槽","纠结","淡定","自拍","正能量","山寨","达人","腹黑","接地气","扯淡","闷骚","不明觉厉","完爆","人艰不拆"]
    date_list = [
        "2013-12-31", "2013-12-31", "2013-12-31", "2013-12-31", "2013-12-31",
        "2013-12-31", "2013-12-31", "2013-12-31", "2013-12-31", "2013-12-31",
        "2013-12-31"
    ]
    key_list = [
        '努力',
        '感觉',
        '简单',
        '无聊',
        '希望',
        '美好',
        '气质',
        '害怕',
        '喜欢',
        '不约而同',
        '喜闻乐见',
    ]

    # 设置结果保存的目录
    result_dir = r'D:\semantic analysis\2016-10-09结果\html标记结果//'
    txt_dir = r"D:\semantic analysis\纯文本\常用词分句//"
    set_dir = r"D:\semantic analysis\2016-10-09结果\中间结果//"

    i = 0

    while i < len(key_list):
        key = key_list[i]
        print(key)
        # 文件目录
        file_list = sorted(util.get_file_list(txt_dir + key, ".txt"))
        # 集合目录
        set_dir_list = util.get_file_list(set_dir + key, ".pkl")
        set_list = []
        for set_list_dir in set_dir_list:
            set_list.append(util.get_nw(set_dir + key + "//" + set_list_dir))
            print(set_list_dir)

        util.create_directory(result_dir + key + "//")
        rr = cal_index2(date_list[i], txt_dir + key_list[i])
        j = 0
        # 每个分段
        while j < len(rr):
            k = 0
            while k < rr[j]:
                print(file_list[k][:-4])
                print(rr[j])
                txt_list = util.get_list_from_file(txt_dir + key + "//" +
                                                   file_list[k])
                w_list = remark(txt_list, set_list[j], key)
                html_name = file_list[k][:-4] + '.html'
                util.save_file(result_dir + key + "//" + html_name, w_list)
                k += 1
            j += 1
        i += 1
def main1(keyword):
    dirr = 'D:\semantic analysis\pNet1\\' + keyword + '//p//'
    r_dir = 'D:\semantic analysis//3次采集结果\连续比例4//'
    pkl_list = util.get_file_list(dirr, '.pkl')
    pkl_list = sorted(pkl_list)
    for pkl in pkl_list:
        print(pkl)
    ll = len(pkl_list) - 1
    ii = ll
    g = util.get_nw(dirr + '\\' + pkl_list[ii])
    r_list = []
    n_list = []

    # 生成五个图的公共子图
    while ii >= ll - 3:
        ii -= 1
        g2 = util.get_nw(dirr + '\\' + pkl_list[ii])
        g = mcs(g2, g)
        print(pkl_list[ii] + '\t' + str(g.number_of_nodes()))

    ii = len(pkl_list) - 1
    while ii > 0:
        ii -= 1
        g2 = util.get_nw(dirr + '\\' + pkl_list[ii])
        rr, nn = mcs_ratio_advanced(g2, g, keyword)
        r_list.append(pkl_list[ii][0:-4] + '\t' + str(rr))
        n_list.append(pkl_list[ii][0:-4] + '\t' + str(nn))
    util.save_file(r_dir + keyword + '.txt', r_list)
    util.save_file(r_dir + 'n' + keyword + '.txt', n_list)


# for key in key_list:
#     print(key)
#     main1(key)

# g2 = util.get_nw('D:\semantic analysis\c_date\给力\p//2011-03-31.pkl')
# g1 = util.get_nw('D:\semantic analysis\c_date\给力\p//2011-03-30.pkl')
# g1 = util.get_nw('D:\semantic analysis\c_date\给力\p//2011-03-29.pkl')
# g4 = util.get_nw('D:\semantic analysis\c_date\给力\p//2011-03-28.pkl')
#
#
# g5 = mcs(g2, g1)
# g6 = mcs(g1, g5)
# g7 = mcs(g4, g6)
# r = g1.number_of_nodes() / g2.number_of_nodes()
# print("的节点数:"+str(g2.number_of_nodes()))
# print("的节点数:"+str(g1.number_of_nodes()))
# print("两者公共子图的节点数:"+str(g5.number_of_nodes()))
# print("两者公共子图的节点数:"+str(g6.number_of_nodes()))
# print("两者公共子图的节点数:"+str(g7.number_of_nodes()))

# print("比值1:" + str(g1.number_of_nodes() / g2.number_of_nodes()))
# print("比值2:" + str(g1.number_of_nodes() / g1.number_of_nodes()))
示例#8
0
    def post(self):
        username = self.get_argument('username')
        answer_id = safe_str_to_int(self.get_argument('answer_id'))
        ask_content = self.get_argument('ask_content')
        original_question_id = safe_str_to_int(self.get_argument('original_question_id'))
        be_asked_username = self.get_argument('be_asked_username')

        options = None
        # 拿到问题相关的图片
        files = self.request.files
        if files:
            keys = ['ask_pic_file', 'ask_sound_file']
            for key in keys:
                if key in files:
                    tmp_file = files[key][0]
                    file_name = tmp_file['filename']
                    from tool.util import get_file_extension, save_file
                    suffix = get_file_extension(file_name)
                    from dbop.dbQuestion import get_latest_id
                    index = get_latest_id("tb_ask")
                    new_file_name = "{0}_{1}{2}".format("ask", index, suffix)
                    msg0 = "[in postQuestionServer] new_file_name=" + new_file_name
                    logging.info(msg0)
                    file_content = tmp_file['body']
                    # 注入url字段信息
                    tmp_dict = dict()
                    if key == 'ask_pic_file':
                        tmp_dict['ask_pic_url'] = save_file(new_file_name, file_content, 2)
                        tmp_dict['ask_pic_url'] = "http://" + ConfigManager().get_config('host') + ":" + \
                                                  str(ConfigManager().get_config('port')) + tmp_dict['ask_pic_url']
                    elif key == 'ask_sound_file':
                        tmp_dict['ask_sound_url'] = save_file(new_file_name, file_content, 3)
                        tmp_dict['ask_sound_url'] = "http://" + ConfigManager().get_config('host') + ":" + \
                                                    str(ConfigManager().get_config('port')) + tmp_dict['ask_sound_url']
                    if options is None:
                        options = tmp_dict.copy()
                    else:
                        options.update(tmp_dict)

        self.set_header("Content-Type", "application/json;charset=utf8")
        result = ask_question(username, answer_id, ask_content, original_question_id, be_asked_username,
                              options=options)
        self.write(result)
        self.finish()
示例#9
0
def loop_compare(com_function,
                 keyword_list,
                 pkl_dir1,
                 result_dir,
                 mode=1,
                 lap=1,
                 type="pkl"):
    for keyword in keyword_list:
        pkl_dir = pkl_dir1.format(keyword)
        f_list = util.get_file_list(pkl_dir, '.pkl')
        os.chdir(pkl_dir)
        # 升序排序
        nw_list = sorted(f_list)

        record_list = []
        ii = len(nw_list) - 1
        # g2是2号 g1是1号,此处获取最末端的网络
        g2 = util.get_nw(nw_list[ii])

        # 迭代生成子图
        k = 1
        while k < lap:
            g2 = nx.compose(g2, util.get_nw(nw_list[ii - k]))
            k += 1

        while ii - lap >= 0:
            jj = ii
            ii -= lap
            # print(nw_list[ii])

            g1 = util.get_nw(nw_list[ii])
            # 迭代生成子图
            k = 1
            while k < lap:
                g1 = nx.compose(g1, util.get_nw(nw_list[ii - k]))
                k += 1

            # 计算比例
            r1 = com_function(g1, g2)
            record_list.append(nw_list[jj][0:-4] + '\t' + str(r1))

            g2 = g1
        record_list.reverse()
        util.save_file(result_dir + keyword + ".txt", record_list)
示例#10
0
    def post(self):
        username = self.get_argument('username')
        password = self.get_argument('password')
        grade = safe_str_to_int(self.get_argument('grade'))
        identifier = safe_str_to_int(self.get_argument('identifier'))
        nickname = self.get_argument('nickname')
        subject = self.get_argument('subject', None)
        serial_number = self.get_argument('serial_number', None)
        options = self.get_argument('options', None)

        # 拿到用户头像
        files = self.request.files
        if files:
            key = 'avatar_file'
            if key in files:
                avatar_file = files[key][0]
                file_name = avatar_file['filename']
                from tool.util import get_file_extension, save_file
                suffix = get_file_extension(file_name)
                from dbop.dbUser import get_latest_id
                index = get_latest_id(username, is_new=True)
                new_file_name = "{0}_{1}{2}".format("user", index, suffix)
                msg0 = "[in registerServer] new_file_name=" + new_file_name
                logging.info(msg0)
                file_content = avatar_file['body']
                # 注入头像url字段信息
                tmp_dict = dict()
                tmp_dict['avatar_url'] = save_file(new_file_name, file_content, 1)
                tmp_dict['avatar_url'] = "http://" + ConfigManager().get_config('host') + ":" + \
                                         str(ConfigManager().get_config('port')) + tmp_dict['avatar_url']
                if options:
                    options = safe_str_to_dict(options)
                    options.update(tmp_dict)
                else:
                    options = tmp_dict.copy()
        else:
            # 注入系统默认的头像
            from tool.util import get_system_default_avatar_url
            tmp_dict = dict()
            tmp_dict['avatar_url'] = get_system_default_avatar_url()
            if options:
                options = safe_str_to_dict(options)
                options.update(tmp_dict)
            else:
                options = tmp_dict.copy()

        if subject:
            subject = safe_str_to_int(subject)
        if options:
            options = safe_str_to_dict(options)
            logging.info(options)

        self.set_header("Content-Type", "application/json;charset=utf8")
        result = register(username, password, grade, identifier, nickname, subject, serial_number, options=options)
        self.write(result)
        self.finish()
示例#11
0
def cal_node_mcs(pkl_dir, mcs_dir, key_word, lap=2):
    f_list = util.get_file_list(pkl_dir, '.pkl')
    os.chdir(pkl_dir)
    # 升序排序
    nw_list = sorted(f_list)
    record_list = []
    num_list = []
    enum_list = []
    ii = len(nw_list) - 1

    while (ii - lap + 1) >= 0:
        # print(nw_list[ii])
        g1 = util.get_nw(nw_list[ii])
        # 迭代生成子图
        k = 1
        while k < lap:
            g1 = mcs(g1, util.get_nw(nw_list[ii - k]))
            k += 1

        # 生成文件名字
        filename = nw_list[ii][0:-4] + '.pkl'

        # 保存结果
        pkl_dir = r"D:\semantic analysis\公共子图节点数\新词\30公共子图//" + key_word + "//"
        util.create_directory(pkl_dir)
        util.save_nw(g1, pkl_dir + nw_list[ii][0:-4])

        num_list.append(nw_list[ii][0:-4] + '\t' + str(g1.number_of_nodes()))
        enum_list.append(nw_list[ii][0:-4] + '\t' + str(g1.number_of_edges()))

        # 统计节点数
        # with open(mcs_dir + filename[0:-4]+'.txt','w',encoding='utf-8') as file:
        #     for node in g1.nodes():
        #         file.write(node+'\n')
        # util.save_nw(g1,mcs_dir + filename)

        ii -= lap

    # util.save_file(mcs_dir + key_word+'mcs.txt', record_list)
    util.save_file(mcs_dir + 'n' + key_word + 'mcs.txt', num_list)
    util.save_file(mcs_dir + 'e' + key_word + 'mcs.txt', enum_list)
示例#12
0
def loop_key2(pkl_dir, result_dir, key_word, lap=1):
    pkl_dir = pkl_dir.format(key_word)
    f_list = util.get_file_list(pkl_dir, '.pkl')
    os.chdir(pkl_dir)
    # 升序排序
    nw_list = sorted(f_list)
    ii = 0
    # g2是2号 g1是1号,此处获取最末端的网络
    g1 = util.get_nw(nw_list[ii])
    util.create_directory(result_dir + key_word)

    while ii < len(nw_list) - lap:
        ii += lap
        g2 = util.get_nw(nw_list[ii])

        # 生成文件名字
        filename = nw_list[ii][0:-4] + '.txt'

        result_list = extract_new_nodes_attributes(g1, g2)
        util.save_file(result_dir + key_word + "//" + filename, result_list)
        g1 = nx.compose(g1, g2)
def main():
    # 设置结果保存的目录
    result_dir = r'D:\semantic analysis\新纯文本\1新词分句//'
    txt_dir = r"D:\semantic analysis\新纯文本\1新词//"

    k_list = util.get_key_list()

    for key in k_list:
        print(key)
        # 文件目录
        file_list = util.get_file_list(txt_dir + key, ".txt")

        # 建立目录
        # mk_dir(result_dir+"新词整句//"+key)
        mk_dir(result_dir + key)

        for file in file_list:
            s_list = util.get_list_from_file(txt_dir + key + "//" + file)
            # 过滤相同的语句,防止重复计算
            # s_list = list(set(s_list))
            w_list, p_list = extract_sentence(s_list, key)
            util.save_file(result_dir + key + "//" + file, p_list, True)
示例#14
0
def loop_key(pkl_dir, result_dir, key_word, lap=1):
    pkl_dir = pkl_dir.format(key_word)
    f_list = util.get_file_list(pkl_dir, '.pkl')
    os.chdir(pkl_dir)
    # 升序排序
    nw_list = sorted(f_list)
    ii = len(nw_list) - 1
    # g2是2号 g1是1号,此处获取最末端的网络
    g2 = util.get_nw(nw_list[ii])
    util.create_directory(result_dir + key_word)

    while ii > 0:
        jj = ii
        ii -= lap
        # print(nw_list[ii])
        g1 = util.get_nw(nw_list[ii])

        # 生成文件名字
        filename = nw_list[ii][0:-4] + '-' + nw_list[jj][0:-4] + '.txt'

        result_list = cal_connect_real_probability(g1, g2, key_word)
        util.save_file(result_dir + key_word + "//" + filename, result_list)

        g2 = g1
示例#15
0
        r"D:\semantic analysis\2016-10-09结果\词频\希望//2011-09-30.txt")
    dd = util.txt2dict(w_list)


key_list = util.get_key_list2() + util.get_key_list()
txt_dir = r"D:\semantic analysis\2016-10-09结果\词频1//"
for key in key_list:
    print(key)
    file_dir = txt_dir + key
    dict_list = []
    r_list = []
    file_list = util.get_file_list(file_dir, ".txt")
    for file_name in file_list:
        word_list = sorted(
            util.get_list_from_file(txt_dir + key + "//" + file_name))
        dict_list.append(util.txt2dict(word_list))

    # 循环求比值
    i = 1
    dict1 = dict_list[0]
    while i < len(dict_list):
        dict2 = dict_list[i]
        r_list.append(file_list[i - 1][0:-4] + "\t" +
                      str(cal_mcs_ratio(dict1, dict2, key)))
        dict1 = dict2.copy()
        i += 1

    util.save_file(
        r"D:\semantic analysis\2016-10-12结果\自身比例节点数//" + key + ".txt", r_list,
        False)
        low_value -= 1

    # 生成统计的目标集合
    target_set = set()
    while low_index > high_index:
        target_set.add(rank_key[low_index])
        low_index -= 1
    node_sum = len(target_set)

    r_list = []
    r_list1 = []
    dict_list, key_dict_value = util.get_objdict_list(
        r"D:\semantic analysis\2016-10-09结果\词频月//" + key, ".txt")
    for dict_key in key_dict_value:
        word_dict = dict_list[dict_key]
        temp_set = set()
        for k, v in word_dict.items():
            temp_set.add(k)

        sum1 = len(temp_set & target_set)

        r_list.append(dict_key[0:-4] + "\t" + str(sum1))
        r_list1.append(dict_key[0:-4] + "\t" + str(sum1 / node_sum))

    util.save_file(
        r"D:\semantic analysis\2016-10-12结果\2010年保留比例\新词\数量//" + key + ".txt",
        r_list)
    util.save_file(
        r"D:\semantic analysis\2016-10-12结果\2010年保留比例\新词\比例//" + key + ".txt",
        r_list1)
    def post(self):
        username = self.get_argument('username')
        nickname = self.get_argument('nickname', None)
        phone_number = self.get_argument('phone_number', None)
        name = self.get_argument('name', None)
        sex = safe_str_to_dict(self.get_argument('sex', None))
        birthday = self.get_argument('birthday', None)
        address = self.get_argument('address', None)
        grade = safe_str_to_int(self.get_argument('grade', None))
        subject = safe_str_to_int(self.get_argument('subject', None))

        options = dict()
        if nickname:
            options['nickname'] = nickname
        if phone_number:
            options['phone_number'] = phone_number
        if name:
            options['name'] = name
        if sex:
            options['sex'] = sex
        if birthday:
            options['birthday'] = birthday
        if address:
            options['address'] = address
        if grade:
            options['grade'] = grade
        if subject:
            options['subject'] = subject

        # 拿到用户头像
        props = None
        files = self.request.files
        logging.info("start!!!")
        logging.info(files)
        if files:
            key = 'avatar_file'
            if key in files:
                avatar_file = files[key][0]
                file_name = avatar_file['filename']
                from tool.util import get_file_extension, save_file
                suffix = get_file_extension(file_name)
                from dbop.dbUser import get_latest_id
                index = get_latest_id(username)
                new_file_name = "{0}_{1}{2}".format("user", index, suffix)
                msg0 = "[in modifyPersonalInformationServer] new_file_name=" + new_file_name
                logging.info(msg0)
                file_content = avatar_file['body']
                # 注入头像url字段信息
                tmp_dict = dict()
                tmp_dict['avatar_url'] = save_file(new_file_name, file_content, 1)
                tmp_dict['avatar_url'] = "http://" + ConfigManager().get_config('host') + ":" + \
                                         str(ConfigManager().get_config('port')) + tmp_dict['avatar_url']
                if props:
                    props = safe_str_to_dict(props)
                    props.update(tmp_dict)
                else:
                    props = tmp_dict.copy()

        logging.info("yes!!!")
        logging.info(props)
        self.set_header("Content-Type", "application/json;charset=utf8")
        result = modify_personal_information(username, props=options, options=props)
        self.write(result)
        self.finish()
示例#18
0
# 统计所有子图的节点数
import tool.util as util
import networkx as nx


def count_num_of_node(pkl_dir):
    pkl_file_list = util.get_file_list(pkl_dir, '.pkl')
    r_list = []
    for file in pkl_file_list:
        g = util.get_nw(file)
        s = file[0:10] + '\t' + str(g.number_of_nodes())
        r_list.append(s)
    return r_list


key_list = util.get_key_list()
pkl_dir1 = r'D:\semantic analysis\pNet1//'
df_dir = r'D://semantic analysis//3次采集结果//节点数//'
# for key in key_list:
#     util.create_directory(df_dir+key)
for key in key_list:
    print(key)
    r = count_num_of_node(pkl_dir1 + key + '//p//')
    util.save_file(df_dir + key + '.txt', r)
示例#19
0
# 计算节点的比例数值
# 日期    数量
import tool.util as util

file_list = util.get_file_list('D:\semantic analysis//3次采集结果\节点数', '.txt')

for f in file_list:
    x_list = []
    y_list = []
    ry_list = []
    r_data_list = []
    data_list = util.get_list_from_file(f)
    for data in data_list:
        item = data.split('\t')
        x_list.append(item[0])
        y_list.append(float(item[1]))

    my = max(y_list)
    i = 0
    while i < len(x_list):
        r_data_list.append(x_list[i] + '\t' + str(y_list[i] / my))
        i += 1
    util.save_file(
        'D:\semantic analysis//3次采集结果\节点数' + '//' + f[0:-4] + 'mcs.txt',
        r_data_list)
示例#20
0
for key in key_word:
    print(key)
    pynlpir.nlpir.AddUserWord(c_char_p(key.encode()))

result_dir = r"D:\semantic analysis\新结果\去重去虚词去单字词频数//"
fold_list_dir = r"D:\semantic analysis\新纯文本\1常用词分句//"
for key in key_word:
    print(key)
    file_list = sorted(util.get_file_list(fold_list_dir + key, ".txt"))
    # 循环文件
    for txt_file in file_list:
        print(txt_file)
        # 过滤重复
        s_list = set(
            util.get_list_from_file(fold_list_dir + key + "//" + txt_file))
        # 获取分词dict
        rr = count_word(s_list, key)
        # if "无力" in rr:
        #     print(rr["无力"]/rr["吐槽"])

        # 对key进行排序
        kk = sort_by_value(rr)
        w_list = create_dict_list(kk, rr)

        # 创建目录
        util.create_directory(result_dir + key)
        util.save_file(result_dir + key + "//" + txt_file, w_list, False)

# 关闭分词工具
pynlpir.close()
示例#21
0
import tool.util as util
# 计算所有文件里面所有词语出现的次数
key_list = util.get_key_list() + util.get_key_list2()
txt_dir = r"D:\semantic analysis\2016-10-09结果\词频1//"
r_dir = r"D:\semantic analysis\2016-10-09结果\总数1//"
for key in key_list:
    print(key)
    file_dir = txt_dir + key
    dict_list = []
    r_list = []
    file_list = util.get_file_list(file_dir, ".txt")
    for file_name in file_list:
        word_list = sorted(
            util.get_list_from_file(txt_dir + key + "//" + file_name))
        dict_list.append(util.txt2dict(word_list))

    r_dict = dict_list[0]
    for word_dict in dict_list:
        r_dict = util.union_dicts(r_dict, word_dict)

    # 对key进行排序
    kk = util.sort_by_value(r_dict)
    w_list = util.create_dict_list(kk, r_dict)

    # 创建目录
    util.save_file(r"D:\semantic analysis\2016-10-09结果\总数1//" + key + ".txt",
                   w_list, False)
示例#22
0
import tool.util as util

key_list = util.get_key_list()+util.get_key_list2()

fre_path = r"D:\semantic analysis\结果\去重频率//"
result_path = r"D:\semantic analysis\结果\累计位置前50//"

def get_acc_ratio(ratio_list):
    if len(ratio_list)-1 < 1:
        return 0
    s = 0
    for index, r in enumerate(ratio_list):
        s += r
        # if s > 0.6:
        #     return index/(len(ratio_list)-1)
        if index > 50 or index == len(ratio_list)-1:
            return s

for key in key_list:
    file_list = util.get_file_list(fre_path+key+"//",".txt")
    result_list = []
    for file in file_list:
        rl = util.get_list_from_dicttxt(fre_path+key+"//"+file)
        result_list.append(get_acc_ratio(rl))
    util.save_file(result_path+key+".txt",result_list)
            w_file.write("http://weibo.com/" + str(c[0]) +
                         "/info?mod=pedit_more" + '\n')

    # with open("raddress2.txt","w") as rfile:
    #     for ii in cur:
    #         rfile.write(ii[1]+'\n')
    cur.close()  #关闭游标
    conn.commit()  #向数据库中提交任何未解决的事务,对不支持事务的数据库不进行任何操作
    conn.close()  #关闭到数据库的连接,释放数据库资源


dd = [
    "2010-05-15", "2010-08-02", "2010-09-26", "2010-11-03", "2011-01-11",
    "2011-03-20", "2011-06-01", "2011-10-16", "2012-02-16", "2012-07-09",
    "2012-11-19"
]

for d in dd:
    extract_url("fh", d)

ll = util.get_list_from_file(r"D:\semantic analysis\用户信息//userUrl.txt")
lll = util.get_list_from_file(r"D:\semantic analysis\用户信息//user.txt")
print(len(ll))
ss = set(ll)
ss1 = set(lll)
print(len(ss))
ss = ss - ss1
print(len(ss))
ll = list(ss)
util.save_file(r"D:\semantic analysis\用户信息//userUrl1.txt", ll)
    while i < len(set_list):
        r_set0 = r_set0 | util.get_nw(pkl_dir + set_list[i])
        r_list.append(set_list[i][0:-4] + '\t' + str(len(r_set0)))
        d_list.append(len(r_set0))
        i += 1

    i = 0
    r_list2 = []
    d_list2 = []
    while i < len(r_list)-1:
        r_list2.append(set_list[i][0:-4] + '\t' + str(d_list[i+1]-d_list[i]))
        d_list2.append(d_list[i+1]-d_list[i])
        i += 1

    i = 15
    r_list3 = []
    while i < len(d_list2):
        r_list3.append(set_list[i][0:-4] + '\t' + str(d_list2[i]/d_list[i]))
        i += 1

    return r_list, r_list2,r_list3


key_list = util.get_key_list()
for key in key_list:
    print(key)
    r1, r2, r3 = cal_union_set(key, "D:\semantic analysis\分词集合//"+key+"//")
    # util.save_file(r"D:\semantic analysis\2016-10-03结果\增量//"+key+".txt", r2)
    # util.save_file(r"D:\semantic analysis\2016-10-03结果\总量//"+key+".txt", r1)
    util.save_file(r"D:\semantic analysis\2016-10-03结果\比例//"+key+".txt", r3)
示例#25
0
 def phantomjs_screen_html(self):
     util.save_file(self.driver.page_source.encode('utf-8'), './hj.html')
示例#26
0
import tool.util as util


def get_num_sentence(file_path):
    return len(set(util.get_list_from_file(file_path)))


key_list = util.get_key_list2()

root = r"D:\semantic analysis\新纯文本\1常用词/"
for key in key_list:
    print(key)
    file_list = util.get_file_list(root + key, ".txt")
    r_list = []
    for file in file_list:
        ss = get_num_sentence(root + key + "//" + file)
        r_list.append(file[0:-4] + "\t" + str(ss))
    util.save_file(r"D:\semantic analysis\新结果//去重句子数//常用词//" + key + ".txt",
                   r_list)