Exemplo n.º 1
0
def check():
    fetcher_info = FetcherInfo()
    f = Fetcher(fetcher_info)
    url = 'http://www.cnblogs.com/animalize/p/4773363.html'

    try:
        data = f.fetch_url(url)
    except:
        raise Exception('无法下载“版本发布网页”')

    try:
        html = data.decode('utf-8')
    except:
        raise Exception('无法用utf-8解码“版本发布网页”')

    p = r'【最新版本】(.*?)【结束】.*?【更新网址】(.*?)【结束】'
    r = red.re_dict(p, red.DOTALL)
    m = r.search(html)
    if not m:
        raise Exception('无法从“版本发布网页”提取最新的版本号')

    newver = m.group(1)
    download_url = m.group(2)

    return newver, download_url
Exemplo n.º 2
0
    def process_1(self):
        '''自定义处理'''

        # 处理引用
        print('>处理引用')
        r = (r'^(?=(.*@@(\S{1,16})##))', r'\1', r'.*?', r'(?<=\n)',
             r'(?=(.*?(?<=\n)', re_separater, r'\s+))', r'(?!\3.*?(?<=\n)',
             re_separater, r'\s+)', r'\s*(.*?)\s*', re_separater, r'\s+(.*)')

        p = red.re_dict(''.join(r), red.DOTALL)

        quote_count = 0
        for rpl in self.rlist:
            rpl.text, n = p.subn(r'回复 \2:\n【引用开始】\4\n【引用结束】\n\5', rpl.text)
            quote_count += n

#         # 使用'固化分组'处理引用
#         print('>处理引用')
#         r = (r'^(?>.*@@(\S{1,16})##)',
#              r'.*?',
#              r'(?<=\n)',
#              r'(?=(?>.*?(?<=\n)', re_separater, r'\s+)',
#              r'(?!.*?(?<=\n)', re_separater, r'\s+))',
#              r'\s*(.*?)\s*', re_separater, r'\s+(.*)')
#
#         p = red.re_dict(''.join(r), red.DOTALL)
#
#         quote_count = 0
#         for rpl in self.rlist:
#             rpl.text, n = p.subn(r'回复 \1:\n【引用开始】\2\n【引用结束】\n\3',
#                                   rpl.text)
#             quote_count += n

        color_p = color.fore_color(quote_count, color.Fore.CYAN)
        print('...处理了{0}条引用'.format(color_p))
Exemplo n.º 3
0
    def mark_reduplicate(self):
        '''标记相邻重复'''
        print('>检查相邻重复:')

        last_reply = None
        reduplicate_list = []
        r = red.re_dict(r'^\s*$')

        # 查找重复
        for rpl in self.rlist:
            if last_reply and last_reply.text == rpl.text and \
               not r.match(rpl.text):
                reduplicate_list.append(rpl)
            last_reply = rpl

        # 处理重复
        for i in reduplicate_list:
            i.text = self.append_note(i.text, '【与上一条回复重复】')
            i.suggest = False

        reduplicate_count = len(reduplicate_list)
        if reduplicate_count:
            color_p = color.fore_color(reduplicate_count, color.Fore.RED)
        else:
            color_p = color.fore_color(reduplicate_count, color.Fore.GREEN)

        print('...标记了{0}个重复回复'.format(color_p))
Exemplo n.º 4
0
    def mark_multireply(self):
        '''标记连续重复引用'''
        print('>开始标记 连续重复引用的回复')
        
        r = red.re_dict(r'^(.*?【引用开始】.*?)【引用结束】\n?(.*)$', red.S)
        last_reply = None  # 最后一条引用回复
        last_quote = None  # 最后一条引用回复的引用部分
        count = 0
        
        for rpl in self.rlist:
            if not rpl.suggest:
                continue

            m = r.match(rpl.text)
            if m == None:
                last_reply = None
                last_quote = None
                continue
            
            temp = m.group(1)
            if last_quote == temp:
                last_reply.text += '\n\n【补充回复】\n' + m.group(2)
                rpl.text = ''
                rpl.suggest = False
                count += 1
            else:
                last_reply = rpl
                last_quote = temp
                
        if count:
            color_p = color.fore_color(count, color.Fore.RED)
        else:
            color_p = color.fore_color(count, color.Fore.GREEN)

        print('...有{0}个连续重复引用的回复'.format(color_p))
Exemplo n.º 5
0
    def mark_reduplicate(self):
        '''标记相邻重复'''        
        print('>检查相邻重复:')

        last_reply = None
        reduplicate_list = []
        r = red.re_dict(r'^\s*$')

        # 查找重复
        for rpl in self.rlist:
            if last_reply and last_reply.text == rpl.text and \
               not r.match(rpl.text):
                reduplicate_list.append(rpl)
            last_reply = rpl

        # 处理重复
        for i in reduplicate_list:
            i.text = self.append_note(i.text, '【与上一条回复重复】')
            i.suggest = False

        reduplicate_count = len(reduplicate_list)
        if reduplicate_count:
            color_p = color.fore_color(reduplicate_count, color.Fore.RED)
        else:
            color_p = color.fore_color(reduplicate_count, color.Fore.GREEN)

        print('...标记了{0}个重复回复'.format(color_p))
Exemplo n.º 6
0
def check():
    fetcher_info = FetcherInfo()
    f = Fetcher(fetcher_info)
    url = 'http://www.cnblogs.com/animalize/p/4773363.html'
    
    try:
        data = f.fetch_url(url)
    except:
        raise Exception('无法下载“版本发布网页”')
    
    try:
        html = data.decode('utf-8')
    except:
        raise Exception('无法用utf-8解码“版本发布网页”')
    
    p = r'【最新版本】(.*?)【结束】.*?【更新网址】(.*?)【结束】'
    r = red.re_dict(p, red.DOTALL)
    m = r.search(html)
    if not m:
        raise Exception('无法从“版本发布网页”提取最新的版本号')
    
    newver = m.group(1)
    download_url = m.group(2)
    
    return newver, download_url
Exemplo n.º 7
0
    def has_quote(reply):
        '''是否包含引用'''
        p = red.re_dict(r'^.*?【引用开始】.*?【引用结束】')

        if p.search(reply.text):
            return True
        else:
            return False
Exemplo n.º 8
0
    def has_quote(reply):
        '''是否包含引用'''
        p = red.re_dict(r'^.*?【引用开始】.*?【引用结束】')

        if p.search(reply.text):
            return True
        else:
            return False
Exemplo n.º 9
0
    def has_unhandled_quote(self, reply):
        '''是否包含未处理的引用'''
        p1 = red.re_dict(r'@@\S{1,16}##')
        #p2 = red.re_dict(re_datetime)

        if p1.search(reply.text):  # or p2.search(reply.text):
            return True
        else:
            return False
Exemplo n.º 10
0
    def has_unhandled_quote(self, reply):
        '''是否包含未处理的引用'''
        p1 = red.re_dict(r'@@\S{1,16}##')
        #p2 = red.re_dict(re_datetime)

        if p1.search(reply.text): # or p2.search(reply.text):
            return True
        else:
            return False
Exemplo n.º 11
0
    def get_processor(all_list):
        '''得到处理器'''
        processor = None
        if all_list:
            p = red.re_dict(r'<processor:\s*(.*?)\s*>')
            m = p.search(all_list[0])
            if m:
                local_processor = m.group(1)
                processor = BaseProcessor.get_processor(local_processor)

        return processor
Exemplo n.º 12
0
        def should_pick(reply):
            p_space = red.re_dict(r'^\s*$')

            if p_space.match(reply.text):
                return False
            if reply.text.endswith('【与上一条回复重复】'):
                return False
            if reply.text.endswith('【无法处理的回复】'):
                return False
            
            return True
Exemplo n.º 13
0
def is_url(url):
    p = red.re_dict(
        r'^https?://' # http:// or https://
        r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' #domain...
        r'localhost|' #localhost...
        r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
        r'(?::\d+)?' # optional port
        r'(?:/?|[/?]\S+)$', red.IGNORECASE|red.A)
    if p.match(url):
        return True
    else:
        return False
Exemplo n.º 14
0
def bp_to_internal2(infile):
    '''编排文本 到 中间形式2'''
    all_list = list()

    pattern = red.re_dict(r'<(\d{4}-\d\d-\d\d\s+\d\d:\d\d:\d\d)')
    dt = lambda s:datetime.strptime(s, '%Y-%m-%d  %H:%M:%S')
    
    temp = list()
    temp_date = None
    in_reply = False

    for line in infile.readlines():
        line = line.rstrip('\n')
        
        if line.startswith('<time>'):
            if in_reply == True:
                print('格式错误:回复文本的前后包括标志不配对。\n',
                      '丢失<mark>行')
                break
            m = pattern.search(line)
            if not m:
                print('无法解析日期')
                break
            temp_date = dt(m.group(1))
            in_reply = True

        elif line.startswith('<mark>'):
            if in_reply == False:
                print('格式错误:回复文本的前后包括标志不配对。\n',
                      '丢失<time>行')
                break
            if line.endswith('█'):
                select = True
            else:
                select = False
            # 添加回复
            rpl = BPReply(temp_date, '\n'.join(temp), select)
            all_list.append(rpl)
            
            temp.clear()
            in_reply = False

        elif in_reply:
            temp.append(line)

        elif not in_reply:
            all_list.append(line)
    
    infile.close()
    if in_reply == True:
        print('格式错误:最后一个回复文本的前后包括标志不配对。')

    return all_list
Exemplo n.º 15
0
def is_url(url):
    p = red.re_dict(
        r'^https?://' # http:// or https://
        r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' #domain...
        r'localhost|' #localhost...
        r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
        r'(?::\d+)?' # optional port
        r'(?:/?|[/?]\S+)$', red.IGNORECASE|red.A)
    if p.match(url):
        return True
    else:
        return False
Exemplo n.º 16
0
def lianzai_fliter(processor, regex, flags):
    '''连载过滤器。注意:只标记suggest,不会考虑select'''
    
    print('>连载过滤器\n...正则式:{0}'.format(regex))
    
    pattern = red.re_dict(regex, flags)
    count = 0
    
    for reply in processor.rlist:
        if reply.suggest:
            if not processor.has_quote(reply) \
               and pattern.search(reply.text):
                count += 1
            else:
                reply.suggest = False

    print('...选择了{0}条回复作为连载'.format(count))
Exemplo n.º 17
0
    def mark_empty(self):
        '''标记空回复'''
        print('>标记空白回复:')

        p = red.re_dict(r'^\s*$')
        blank_count = 0

        for rpl in self.rlist:
            if p.match(rpl.text):
                rpl.suggest = False
                blank_count += 1

        if blank_count:
            color_p = color.fore_color(blank_count, color.Fore.RED)
        else:
            color_p = color.fore_color(blank_count, color.Fore.GREEN)

        print('...标记了{0}个空白回复'.format(color_p))
Exemplo n.º 18
0
    def mark_empty(self):
        '''标记空回复'''
        print('>标记空白回复:')
        
        p = red.re_dict(r'^\s*$')
        blank_count = 0
        
        for rpl in self.rlist:
            if p.match(rpl.text):
                rpl.suggest = False
                blank_count += 1

        if blank_count:
            color_p = color.fore_color(blank_count, color.Fore.RED)
        else:
            color_p = color.fore_color(blank_count, color.Fore.GREEN)

        print('...标记了{0}个空白回复'.format(color_p))
Exemplo n.º 19
0
    def do_re_list(self):
        '''用re_list进行替换处理'''
        print('>用正则式列表替换')
        
        # 编译
        for i in self.re_list:
            i.append(red.re_dict(''.join(i[0]), i[1]))

        process_count = 0
        for rpl in self.rlist:
            #i = 0
            for r in self.re_list:
                rpl.text, n = r[3].subn(r[2], rpl.text)
                process_count += 1 if n > 0 else 0

                #if '某些文字' in rpl.text:
                #    print(rpl.text, '\n', i, '>>>>>>>')
                #    i += 1
        print('...做了{0}次替换'.format(process_count))
Exemplo n.º 20
0
    def do_re_list(self):
        '''用re_list进行替换处理'''
        print('>用正则式列表替换')

        # 编译
        for i in self.re_list:
            i.append(red.re_dict(''.join(i[0]), i[1]))

        process_count = 0
        for rpl in self.rlist:
            #i = 0
            for r in self.re_list:
                rpl.text, n = r[3].subn(r[2], rpl.text)
                process_count += 1 if n > 0 else 0

                #if '某些文字' in rpl.text:
                #    print(rpl.text, '\n', i, '>>>>>>>')
                #    i += 1
        print('...做了{0}次替换'.format(process_count))
Exemplo n.º 21
0
    def process_1(self):
        '''自定义处理'''
        
        # 处理引用
        print('>处理引用')
        r = (r'^(?=(.*@@(\S{1,16})##))',
             r'\1',
             r'.*?',
             r'(?<=\n)',
             r'(?=(.*?(?<=\n)', re_separater, r'\s+))',
             r'(?!\3.*?(?<=\n)', re_separater, r'\s+)',
             r'\s*(.*?)\s*', re_separater, r'\s+(.*)')

        p = red.re_dict(''.join(r), red.DOTALL)

        quote_count = 0
        for rpl in self.rlist:
            rpl.text, n = p.subn(r'回复 \2:\n【引用开始】\4\n【引用结束】\n\5',
                                  rpl.text)
            quote_count += n

        color_p = color.fore_color(quote_count, color.Fore.CYAN)
        print('...处理了{0}条引用'.format(color_p))
Exemplo n.º 22
0
def bp_to_final(infile, keep_discard=True, label=0):
    '''编译 编排to最终、丢弃'''
    class placeholder:
        def __init__(self, posi=0, pagenum=0, show=False):
            self.posi = posi
            self.pagenum = pagenum
            self.show = show

    def is_not_empty(lst):
        for i in lst:
            yield i.strip() != ''
    
    info_list = list()
    holder_list = [placeholder()]
    
    text_list = list()
    abandon_list = list()
    
    pickcount, allcount = 0, 0

    # 用于把 [img]http://img3.laibafile.cn/p/m/1234567.jpg[/img]
    # 替换成 【图片:1234567.jpg】
    picr = (r'\[img\s*(\d+|)\].*?\[/img\]')
    pattern = red.re_dict(picr)
    
    # 提取页号
    re_pagenum = red.re_dict(r'^<page>页号:\s*(\d+)\s*$')
    
    # 提取时间
    p_time = (r'^<time>[^<]*<\d\d(\d\d-\d{1,2}-\d{1,2})\s+'
              r'(\d{1,2}:\d{1,2})')
    re_time = red.re_dict(p_time)

    # 读取编排文本
    in_reply = False
    temp = list()
    
    current_page = 0
    current_time = ''

    for line in infile.readlines():
        if line.startswith('<time>'):
            if in_reply == True:
                print('格式错误:回复文本的前后包括标志不配对。\n',
                      '丢失<mark>行')
                break
            in_reply = True
            
            # current_time
            if label == 2:
                m = re_time.search(line)
                if m:
                    current_time = m.group(1) + ' ' + m.group(2)
                else:
                    current_time = ''
            
        elif line.startswith('<mark>'):
            if in_reply == False:
                print('格式错误:回复文本的前后包括标志不配对。\n',
                      '丢失<time>行')
                break
                                   
            if line.endswith('█\n') or line.endswith('█'):
                pickcount += 1
                
                if label == 0:
                    pass
                elif label == 1:
                    holder_list[-1].show = True
                elif label == 2:
                    floor_label = ('№.%d ☆☆☆'
                                   ' 发表于%s  P.%d '
                                   '☆☆☆\n'
                                   '-------------------------'
                                   '-------------------------'
                                   '\n')
                    floor_label = floor_label % \
                        (pickcount, current_time, current_page)
                    text_list.append(floor_label)
                    
                text_list.extend(temp)
                text_list.append('\n')

            elif any(is_not_empty(temp)):
                abandon_list.extend(temp)
                abandon_list.append('∞∞∞∞∞∞∞∞∞∞∞∞∞∞∞∞∞∞∞∞\n\n')
                
            temp.clear()
            allcount += 1
            in_reply = False
            
        elif in_reply:
            line = pattern.sub(r'【一张图片\1】', line)
            temp.append(line)

        # 由于上一个elif,以下必定not in_reply
        elif not text_list and not abandon_list and \
             line.startswith('<tiezi>'):
            info_list.append(line[len('<tiezi>'):])
        
        elif label != 0:
            m = re_pagenum.search(line)
            if m:
                current_page = int(m.group(1))
                if label == 1:
                    text_list.append('')
                    holder = placeholder(len(text_list)-1,
                                         current_page
                                         )
                    holder_list.append(holder)

    infile.close()
    if in_reply == True:
        print('格式错误:最后一个回复文本的前后包括标志不配对。')

    
    # 页码 辅助格式
    if label == 1:
        for holder in holder_list[1:]:
            if holder.show:
                page_label = ('☆☆☆☆☆'
                              ' 进入第%d页 '
                              '☆☆☆☆☆\n'
                              '----------------'
                              '----------------'
                              '\n\n') % holder.pagenum
                text_list[holder.posi] = page_label

    color_p1 = color.fore_color(allcount, color.Fore.YELLOW)
    color_p2 = color.fore_color(pickcount, color.Fore.YELLOW)
    print('共有{0}条回复,选择了其中{1}条回复'.format(color_p1, color_p2))

    # output的内容============
    # 连接
    if info_list:
        s_iter = itertools.chain(info_list, '\n', text_list)
    else:
        s_iter = iter(text_list)
    s = ''.join(s_iter)

    # 连续的多张图片
    s = red.sub(r'(?:【一张图片(\d+|)】\s+){3,}',
                r'【多张图片\1】\n\n',
                s)
    
    s = red.sub(r'(?:【一张图片(\d+|)】\s+){2}',
                r'【两张图片\1】\n\n',
                s)

    # 输出StringIO
    output = StringIO(s)
    
    # 汉字字数
    chinese_ct = count_chinese(s)

    # 丢弃文本
    if keep_discard and abandon_list:
        s_iter = itertools.chain(info_list, '\n', abandon_list)
        s = ''.join(s_iter)
        discard = StringIO(s)
    else:
        discard = None
            
    return output, discard, info_list, chinese_ct
Exemplo n.º 23
0
    def statistic(self):
        '''统计'''
        # 回复总数 --------------------------
        print('回复总数:', len(self.rlist))

        # 选择的回复数
        selected_count = sum(1 for r in self.rlist if r.select)
        print('选择的回复数:', selected_count)

        print()

        # 字数统计 --------------------------
        print('以下的统计不包括空白、重复和无法处理的回复:\n')

        # 排除不想参与统计的回复
        p_space = red.re_dict(r'^\s*$')

        def should_pick(reply):
            if p_space.match(reply.text):
                return False
            if reply.text.endswith('【与上一条回复重复】'):
                return False
            if reply.text.endswith('【无法处理的回复】'):
                return False

            return True

        lenlist = [
            self.reply_len_quote(r) for r in self.rlist if should_pick(r)
        ]

        # 有引用回复 的 引用部分长度
        qlenlist = [x[0] for x in lenlist if x[0] != -1]
        # 有引用回复 的 回复部分长度
        rlenlist = [x[1] for x in lenlist if x[0] != -1]
        # 无引用回复 的 长度
        noqlenlist = [x[2] for x in lenlist if x[0] == -1]
        del lenlist

        def num(lst, func):
            if not lst:
                return 0
            else:
                return func(lst)

        print('           (引用部分 回复部分) 无引用回复')
        print(' 总 数 :   {0:<8}  +       {1:<8} = {2}'.format(
            len(qlenlist), len(noqlenlist),
            len(qlenlist) + len(noqlenlist)))
        print('最长的字数:   {0:<8} {1:<8} {2:<8}'.format(num(qlenlist, max),
                                                     num(rlenlist, max),
                                                     num(noqlenlist, max)))
        print('字数平均数:   {0:<8.2f} {1:<8.2f} {2:<8.2f}'.format(
            num(qlenlist, statistics.mean), num(rlenlist, statistics.mean),
            num(noqlenlist, statistics.mean)))
        print('字数中位数:   {0:<8.0f} {1:<8.0f} {2:<8.0f}'.format(
            num(qlenlist, statistics.median), num(rlenlist, statistics.median),
            num(noqlenlist, statistics.median)))
        print('总体标准差:   {0:<8.2f} {1:<8.2f} {2:<8.2f}'.format(
            num(qlenlist, statistics.pstdev), num(rlenlist, statistics.pstdev),
            num(noqlenlist, statistics.pstdev)))

        # 字数分布 ------------------------------

        # e_table由y=e**x函数生成 x:0.5,1.0,1.5,2.0,2.5,3.0...
        e_table = [0, 7, 12, 20, 33, 55, 90, 148, 245, 403, \
                    665, 1097, 1808, 2981, 4915, 8103, 13360]

        # 字数分布函数
        def get_len_distribution(lenlist):
            '''字数分布'''
            table_len = len(e_table)
            count_table = [0 for i in range(table_len + 1)]

            for length in lenlist:
                for i in range(table_len):
                    if length < e_table[i]:
                        count_table[i] += 1
                        break
                else:
                    count_table[-1] += 1

            return count_table

        # 得到字数分布
        qdis = get_len_distribution(qlenlist)
        rdis = get_len_distribution(rlenlist)
        ndis = get_len_distribution(noqlenlist)

        # 打印字数分布
        print('\n字数分布')
        print(' ' * 16, '(引用部分 回复部分) 无引用回复')

        for i in range(1, len(e_table)):
            print('{0:>6}<= x <{1:<5} : {2:<8} {3:<8} {4:<8}'.format(
                e_table[i - 1], e_table[i], qdis[i], rdis[i], ndis[i]))
        print('{0:>6}<= x        : {1:<8} {2:<8} {3:<8}'.format(
            e_table[-1], qdis[-1], rdis[-1], ndis[-1]))

        print(' ' * 8, '=' * 35)
        print(
            ' ' * 12,
            '总数 : {0:<8} {1:<8} {2:<8}'.format(len(qlenlist), len(rlenlist),
                                               len(noqlenlist)))
Exemplo n.º 24
0
    def statistic(self):
        '''统计'''      
        # 回复总数 --------------------------
        print('回复总数:', len(self.rlist))

        # 选择的回复数
        selected_count = sum(1 for r in self.rlist if r.select)
        print('选择的回复数:', selected_count)

        print()

        # 字数统计 --------------------------
        print('以下的统计不包括空白、重复和无法处理的回复:\n')

        # 排除不想参与统计的回复
        p_space = red.re_dict(r'^\s*$')
        def should_pick(reply):
            if p_space.match(reply.text):
                return False
            if reply.text.endswith('【与上一条回复重复】'):
                return False
            if reply.text.endswith('【无法处理的回复】'):
                return False
            
            return True
        
        lenlist = [self.reply_len_quote(r)
                   for r in self.rlist if should_pick(r)]

        # 有引用回复 的 引用部分长度
        qlenlist = [x[0] for x in lenlist if x[0] != -1]
        # 有引用回复 的 回复部分长度
        rlenlist = [x[1] for x in lenlist if x[0] != -1]
        # 无引用回复 的 长度
        noqlenlist = [x[2] for x in lenlist if x[0] == -1]
        del lenlist

        def num(lst, func):
            if not lst:
                return 0
            else:
                return func(lst)

        print('           (引用部分 回复部分) 无引用回复')
        print(' 总 数 :   {0:<8}  +       {1:<8} = {2}'.format(
                        len(qlenlist),
                        len(noqlenlist),
                        len(qlenlist) + len(noqlenlist)
                        )
              )
        print('最长的字数:   {0:<8} {1:<8} {2:<8}'.format(
                        num(qlenlist, max),
                        num(rlenlist, max),
                        num(noqlenlist, max)
                        )
              )
        print('字数平均数:   {0:<8.2f} {1:<8.2f} {2:<8.2f}'.format(
                        num(qlenlist, statistics.mean),
                        num(rlenlist, statistics.mean),
                        num(noqlenlist, statistics.mean)
                        )
              )
        print('字数中位数:   {0:<8.0f} {1:<8.0f} {2:<8.0f}'.format(
                        num(qlenlist, statistics.median),
                        num(rlenlist, statistics.median),
                        num(noqlenlist, statistics.median)
                        )
              )
        print('总体标准差:   {0:<8.2f} {1:<8.2f} {2:<8.2f}'.format(
                        num(qlenlist, statistics.pstdev),
                        num(rlenlist, statistics.pstdev),
                        num(noqlenlist, statistics.pstdev)
                        )
              )
        
        # 字数分布 ------------------------------

        # e_table由y=e**x函数生成 x:0.5,1.0,1.5,2.0,2.5,3.0...
        e_table = [0, 7, 12, 20, 33, 55, 90, 148, 245, 403, \
                    665, 1097, 1808, 2981, 4915, 8103, 13360]
        
        # 字数分布函数
        def get_len_distribution(lenlist):
            '''字数分布'''
            table_len = len(e_table)
            count_table = [0 for i in range(table_len+1)]

            for length in lenlist:
                for i in range(table_len):
                    if length < e_table[i]:
                        count_table[i] += 1
                        break
                else:
                    count_table[-1] += 1

            return count_table
        
        # 得到字数分布
        qdis = get_len_distribution(qlenlist)
        rdis = get_len_distribution(rlenlist)
        ndis = get_len_distribution(noqlenlist)       

        # 打印字数分布
        print('\n字数分布')
        print(' '*16, '(引用部分 回复部分) 无引用回复')

        for i in range(1, len(e_table)):
            print('{0:>6}<= x <{1:<5} : {2:<8} {3:<8} {4:<8}'.format(
                                        e_table[i-1],
                                        e_table[i],
                                        qdis[i],
                                        rdis[i],
                                        ndis[i]
                                        )
                  )
        print('{0:>6}<= x        : {1:<8} {2:<8} {3:<8}'.format(
                                        e_table[-1],
                                        qdis[-1],
                                        rdis[-1],
                                        ndis[-1]
                                        )
              )

        print(' '*8,'='*35)
        print(' '*12, '总数 : {0:<8} {1:<8} {2:<8}'.format(
                                                len(qlenlist),
                                                len(rlenlist),
                                                len(noqlenlist)
                                                )
              )