示例#1
0
    def generate_file_diff(self, source_file_directory, target_file_directory):
        source_file_list = get_file_list(source_file_directory)
        target_file_list = get_file_list(target_file_directory)

        for filename in target_file_list:
            if '.txt' in filename and filename in source_file_list:
                target_file_path = os.path.join(target_file_directory, filename)
                source_file_path = os.path.join(source_file_directory, filename)

                if os.path.isfile(source_file_path) and os.path.isfile(target_file_path):
                    results = generate_file_diff(source_file_path, target_file_path)
                    # Are there any changes in the logs
                    insertion_count = results.count('ins style')
                    deletion_count = results.count('del style')

                    if insertion_count > 0 or deletion_count > 0:
                        results = results.replace(' ', ' ')

                        rep_dict = {"ins style": "ins style", "del style": "del style", "¶": ''}
                        results = multiple_replace(results, rep_dict)

                        source_filename = 'File 1: ' + filename + ' (created on ' + \
                                          get_datetime_string(get_file_timestamp(source_file_path)) + ')'
                        target_filename = 'File 2: ' + filename + ' (created on ' + \
                                          get_datetime_string(get_file_timestamp(target_file_path)) + ')'

                        # Add insertion and deletion status
                        html_code = source_filename + '<br>' + target_filename + '<br><br>' + \
                                    '<ins style="background:#e6ffe6;">Insertions</ins>:&nbsp;' + str(insertion_count) + \
                                    '&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;' + \
                                    '<del style="background:#ffe6e6;">Deletions</del>:&nbsp;' + str(deletion_count) + \
                                    '<hr>'
                        diff_file_name = os.path.join(target_file_directory, filename + '.diff.html')
                        with open(diff_file_name, 'w') as fo:
                            fo.write('<pre>' + html_code + results + '</pre>')
示例#2
0
    def generate_file_diff(self, source_file_directory, target_file_directory):
        source_file_list = get_file_list(source_file_directory)
        target_file_list = get_file_list(target_file_directory)

        for filename in target_file_list:
            if '.txt' in filename and filename in source_file_list:
                target_file_path = os.path.join(target_file_directory, filename)
                source_file_path = os.path.join(source_file_directory, filename)

                if os.path.isfile(source_file_path) and os.path.isfile(target_file_path):
                    results = generate_file_diff(source_file_path, target_file_path)
                    # Are there any changes in the logs
                    insertion_count = results.count('ins style')
                    deletion_count = results.count('del style')

                    if insertion_count > 0 or deletion_count > 0:
                        results = results.replace(' ', '&nbsp;')

                        rep_dict = {"ins&nbsp;style": "ins style", "del&nbsp;style": "del style", "&para;": ''}
                        results = multiple_replace(results, rep_dict)

                        source_filename = 'File 1: ' + filename + ' (created on ' + \
                                          get_datetime_string(get_file_timestamp(source_file_path)) + ')'
                        target_filename = 'File 2: ' + filename + ' (created on ' + \
                                          get_datetime_string(get_file_timestamp(target_file_path)) + ')'

                        # Add insertion and deletion status
                        html_code = source_filename + '<br>' + target_filename + '<br><br>' + \
                                    '<ins style="background:#e6ffe6;">Insertions</ins>:&nbsp;' + str(insertion_count) + \
                                    '&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;' + \
                                    '<del style="background:#ffe6e6;">Deletions</del>:&nbsp;' + str(deletion_count) + \
                                    '<hr>'
                        diff_file_name = os.path.join(target_file_directory, filename + '.diff.html')
                        with open(diff_file_name, 'w') as fo:
                            fo.write('<pre>' + html_code + results + '</pre>')
示例#3
0
def extract_name(name):
    name = re.sub(non_cn_pattern, '', name)
    name = re.sub('\(', '(', name)
    name = re.sub('\)', ')', name)
    #name = re.sub(r'\s+(?=[^《》]*》)', '', name) 
    name = re.sub('(\w+)$', '', name)
    name = multiple_replace(name, adict, 1)
    return name.strip()
示例#4
0
def _escape(inString):
    return multiple_replace({'_':'\\_',
                             '^':'\\textasciicircum{}',
                             '{':'\\{',
                             '}':'\\}',
                             '%':'\\%',
                             '\\':'\\textbackslash{}',
                             '&':'\\&',
                             '~':'\\textasciitilde{}'},
                            inString)
示例#5
0
def LatexString(char):
    latexChar = open(resource('resources','char.tex')).read()
    latexChar = multiple_replace({'PYCHARGEN_STATS_OVERVIEW':_statsString(char),
                                  'PYCHARGEN_COMMON_SKILLS_OVERVIEW':_skillOverviewString(char),
                                  'PYCHARGEN_RESISTANCES_OVERVIEW':_resistanceString(char),
                                  'PYCHARGEN_SKILL_FULLLIST':_skillFullListString(char),
                                  'PYCHARGEN_CHARACTER_NAME':char.GetMisc('Name'),
                                  'PYCHARGEN_LEVEL':str(char.GetMisc('Level')),
                                  'PYCHARGEN_PROFESSION':char.GetMisc('Profession'),
                                  },latexChar)
    return latexChar
示例#6
0
def cleanLabels(text, format=''):
    text = uni(text)
    dictresub = {
        '\[COLOR (.+?)\]': '',
        '\[/COLOR\]': '',
        '\[COLOR=(.+?)\]': '',
        '\[color (.+?)\]': '',
        '\[/color\]': '',
        '\[Color=(.+?)\]': '',
        '\[/Color\]': ''
    }
    ascciReplacements = {
        '\xc3\x84': 'Ae',
        '\xc3\xa4': 'ae',
        '\xc3\x96': 'Oe',
        '\xc3\xb6': 'oe',
        '\xc3\x9c': 'Ue',
        'xc3\xbc': 'ue',
        '\xc3\x9f': 'ss'
    }
    replacements = ((u"[]", u''), (u"[UPPERCASE]", u''), (u"[/UPPERCASE]",
                                                          u''),
                    (u"[LOWERCASE]", u''), (u"[/LOWERCASE]",
                                            u''), (u"(de)", u" german"),
                    (u"(en)", u" english"), (u"(TVshow)", u""), (u"[B]", u''),
                    (u"[/B]", u''), (u"[I]", u''), (u"[/I]", u''),
                    (u'[D]', u''), (u'[F]', u''), (u"[CR]", u''),
                    (u"[HD]", u''), (u"()", u''), (u"[CC]", u''),
                    (u"[Cc]", u''), (u"[Favorite]", u""), (u"[DRM]", u""),
                    (u'(cc).', u''), (u'(n)', u''), (u"(SUB)", u''),
                    (u"(DUB)", u''), (u'(repeat)',
                                      u''), (u"(English Subtitled)",
                                             u""), (u"*", u""), (u"\n", u""),
                    (u"\r", u""), (u"\t", u""), (u"\ ", u''), (u"/ ", u''),
                    (u"\\", u'/'), (u"//", u'/'), (u'plugin.video.',
                                                   u''), (u'plugin.audio.',
                                                          u''))

    text = utils.multiple_reSub(text.rstrip(), ascciReplacements)
    text = utils.multiple_reSub(text, dictresub)
    text = utils.multiple_replace(text, *replacements)
    text = re.sub('[\/:*?<>|!@#$/:]', '', text)
    if format == 'title':
        text = text.title().replace("'S", "'s")
    elif format == 'upper':
        text = text.upper()
    elif format == 'lower':
        text = text.lower()
    else:
        text = text

    text = uni(text.strip())
    return text
示例#7
0
    def parse_box_office(box_office_doc):
        """解析猫眼票房页面,获取各正在上映影片的累计票房。

        :param box_office_doc: 页面内容。
        :return: dict of movie name and box office
        :raise: ParseError if parse html doc failed
        """
        tree = html.fromstring(box_office_doc)
        font_face_style = tree.xpath('//style[@id="js-nuwa"]/text()')[0]
        try:
            font_face = re.match(r'.*base64,(.*)\) format.*', font_face_style,
                                 re.S).group(1)
        except AttributeError:
            raise ParseError('parse font-face failed')

        # 数字字符和数字的映射
        real_numbers = ParserMaoYan._parse_font_face(font_face)

        # 片名
        movies_name = tree.xpath('//li[@class="c1"]/b/text()')
        # 票房
        movies_box_office = tree.xpath(
            '//li[@class="c1"]//i[@class="cs"]/text()')
        # 上映时长
        movies_released_days = tree.xpath(
            '//li[@class="c1"]//i[@class="font-orange"]/text() | '
            '//li[@class="c1"]//em/text()')
        # 排片占比
        movies_schedule_rate = tree.xpath('//li[@class="c4 "]/i/text()')

        movies = {}
        for movie_name, movie_box_office, movie_released_days, movie_schedule_rate in zip(
                movies_name, movies_box_office, movies_released_days,
                movies_schedule_rate):
            movies[movie_name] = (utils.multiple_replace(
                movie_box_office, real_numbers), movie_released_days,
                                  utils.multiple_replace(
                                      movie_schedule_rate, real_numbers))

        return movies
示例#8
0
def cleanLabels(text, formater=''):
    text = uni(text)
    dictresub = {
        '\[COLOR (.+?)\]': '',
        '\[/COLOR\]': '',
        '\[COLOR=(.+?)\]': '',
        '\[color (.+?)\]': '',
        '\[/color\]': '',
        '\[Color=(.+?)\]': '',
        '\[/Color\]': ''
    }

    replacements = ((u"[]", u''), (u"[UPPERCASE]",
                                   u''), (u"[/UPPERCASE]",
                                          u''), (u"[LOWERCASE]", u''),
                    (u"[/LOWERCASE]", u''), (u"[B]", u''), (u"[/B]", u''),
                    (u"[I]", u''), (u"[/I]", u''), (u'[D]', u''),
                    (u'[F]', u''), (u"[CR]", u''), (u"[HD]", u''),
                    (u"()", u''), (u"[CC]", u''), (u"[Cc]", u''),
                    (u"[Favorite]", u""), (u"[DRM]", u""), (u'(cc).', u''),
                    (u'(n)', u''), (u"(SUB)", u''), (u"(DUB)",
                                                     u''), (u'(repeat)', u''),
                    (u"(English Subtitled)",
                     u""), (u"*", u""), (u"\n", u""), (u"\r", u""),
                    (u"\t", u""), (u"\ ", u''), (u"/ ", u''), (u"\\", u'/'),
                    (u"//", u'/'), (u'plugin.video.',
                                    u''), (u':', u''), (u'plugin.audio.', u''))

    text = utils.multiple_reSub(text, dictresub)
    text = utils.multiple_replace(text, *replacements)
    text = re.sub('[\/:*?<>|!@#$/:]', '', text)
    text = re.sub('\(.\d*\)', "", text)
    if formater == 'title':
        text = text.title().replace("'S", "'s")
    elif formater == 'upper':
        text = text.upper()
    elif formater == 'lower':
        text = text.lower()
    else:
        text = text

    text = uni(text.strip())
    return text
示例#9
0
def clean_odd_symbol(text, keywords_dict):
    punc_pattern = r'[^a-zA-Z0-9\u4e00-\u9fa5 \.,!?\(\)。“、《》;:"\t]+'
    new = []
    for k, line in enumerate(text):
        line = re.sub(en_pattern, '', line)  #去除英文
        line = multiple_replace(line, keywords_dict)  #替换错别字
        line = re.sub('^ {4,}', '', line)  #去除每一行开始多余的空格
        line = line + '(table)' if len(re.findall(r'\d+\t',
                                                  line)) > 1 else line  #添加表格标记
        line = re.sub(r'\.{3,}\d+|\t{2,}\d+|-{3,}\d+', '(catlog)',
                      line)  #替换为目录记号
        line = line + '(headline)' if re.search(r'^>',
                                                line) else line  #替换为小标题记号
        line = re.sub(r'\r|\f|\v| {2,}', '', line)  #去除空白字符
        line = re.sub(punc_pattern, '', line)  #去除标点符号
        line = re.sub(r'\s(?=[^\(\)]*\))', '', line)  #去除括号内的空格
        line = re.sub('。{2,}', '。', line)
        line = re.sub(r'(。|!|\?)', '\\1<EOS>', line)  #增加分隔符标志
        line = re.sub(r'(\(headline\))', '\\1<EOS>', line)  #增加分隔符标志
        line = re.sub(r'\(\)', '', line)  #删除无内容的括号
        line = re.sub(r'\.{2,}', '', line)
        # 去除非标准空行
        if check_invalid_blank_line(k, line, text):
            pass
        elif check_catlog(line):
            pass
        elif check_colon(line):
            pass
        elif re.search('\t', line):
            if re.search('^\t|\t$', line):
                new.append(line.replace('\t', '').strip())
            else:
                pass
        elif re.search(r'\(([\u4e00-\u9fa5]+)\)$', line) or re.search(
                r'工时\)', line):
            pass
        elif re.search('报表|证券代码|年度社会责任报告|报告说明|目录|联系电话|联络电话|下载阅读', line):
            pass
        else:
            new.append(line.strip())
    return new
示例#10
0
def cleanLabels(text, formater=''):
    text = uni(text)
    dictresub = {'\[COLOR (.+?)\]' : '', '\[/COLOR\]' : '', '\[COLOR=(.+?)\]' : '', '\[color (.+?)\]': '',
                 '\[/color\]': '', '\[Color=(.+?)\]': '', '\[/Color\]': ''} 
 
    replacements = ((u"[]", u''), (u"[UPPERCASE]", u''),
                   (u"[/UPPERCASE]", u''), (u"[LOWERCASE]", u''),
                   (u"[/LOWERCASE]", u''), (u"[B]", u''), (u"[/B]", u''),
                   (u"[I]", u''), (u"[/I]", u''),
                   (u'[D]', u''), (u'[F]', u''), (u'MULTi BluRay x264-PopHD', u''), (u'WwW.Zone-Telechargement.Ws', u''), 
                   (u"[CR]", u''), (u"[HD]", u''), (u"Uptobox", u''), (u"[]", u''), (u"540p", u''),(u"BluRayx264-LOST", u''),    
                   (u"[CC]", u''), (u"1080p", u''), (u"720p", u''), (u"h264", u''), (u"AC3-6ch", u''), (u"AC3-6ch", u''), (u"&133", u''),      
                   (u"[Cc]", u''), (u"[Favorite]", u""), (u".MULTi.TRUEFRENCH.1080p.BluRay.DTS.x264-MeToO-Shanks@Zone-Telechargement", u""),              
                   (u"[DRM]", u""), (u'(cc).', u''),
                   (u'(n)', u''), (u"(SUB)", u''),
                   (u"(DUB)", u''), (u'(repeat)', u''),
                   (u"(English Subtitled)", u""), (u"*", u""),
                   (u"\n", u""), (u"\r", u""), (u".", u" "),
                   (u"\t", u""), (u"\ ", u''),
                   (u"/ ", u''), (u"\\", u'/'),
                   (u"//", u'/'), (u'plugin.video.', u''), (u'mkv', u''), (u'avi', u''), (u'1080p.', u''), (u':', u''), (u'FRENCH', u''), (u'MULTI', u''),
                   (u'plugin.audio.', u''))

    text = utils.multiple_reSub(text, dictresub)
    text = utils.multiple_replace(text, *replacements)
    text = re.sub('[\/:*?<>|!@#$/:]', '', text)
    #text = re.sub('\(.\d*\)',"", text)
    if formater == 'title':
        text = text.title().replace("'S", "'s")
    elif formater == 'upper':
        text = text.upper()
    elif formater == 'lower':
        text = text.lower()
    else:
        text = text
        
    text = uni(text.strip())
    return text
示例#11
0
def cleanLabels(text, formater=''):
    dictresub = {
        '\[COLOR (.+?)\]': '',
        '\[/COLOR\]': '',
        '\[COLOR=(.+?)\]': '',
        '\[color (.+?)\]': '',
        '\[/color\]': '',
        '\[Color=(.+?)\]': '',
        '\[/Color\]': ''
    }

    replacements = (('[]', ''), ('[UPPERCASE]', ''), ('[/UPPERCASE]', ''),
                    ('[LOWERCASE]', ''), ('[/LOWERCASE]', ''), ('[B]', ''),
                    ('[/B]', ''), ('[I]', ''), ('[/I]', ''), ('[D]', ''),
                    ('[F]', ''), ('[CR]', ''), ('[HD]', ''), ('()', ''),
                    ('[CC]', ''), ('[Cc]', ''), ('[Favorite]', ''),
                    ('[DRM]', ''), ('(cc).', ''), ('(n)', ''), ('(SUB)', ''),
                    ('(DUB)', ''), ('(repeat)', ''), ('(English Subtitled)',
                                                      ''), ('*', ''),
                    ('\n', ''), ('\r', ''), ('\t', ''), ('\ ', ''), ('/ ', ''),
                    ('\\', '/'), ('//', '/'), ('plugin.video.',
                                               ''), ('plugin.audio.', ''))

    text = utils.multiple_reSub(text, dictresub)
    text = utils.multiple_replace(text, *replacements)
    text = cleanStrmFilesys(text)
    text = re.sub('\(.\d*\)', '', text)
    if formater == 'title':
        text = text.title().replace('\'S', '\'s')
    elif formater == 'upper':
        text = text.upper()
    elif formater == 'lower':
        text = text.lower()
    else:
        text = text

    return text.strip()
示例#12
0
def get_smu_lookup_name(name):
    """
    Given a package name, try to derive a name which can be used to lookup a SMU or SP
    in the SMU meta file.

    However, there is no guarantee that the correct name can be derived. That depends
    on the given name if it is within the parsing criteria.
    """
    name = name.strip()
    package_type = get_package_type(name)
    if package_type != PackageType.SMU and package_type != PackageType.SERVICE_PACK:
        return name
    
    # The worst case scenario of the name could be "disk0:asr9k-px-4.2.1.CSCud90009-1.0.0.pie"
    # .smu is for NCS6K, .rpm is for ASR9K-X64
    rep_dict = {'.pie': '', '.smu': '', '.rpm': ''}
    name = multiple_replace(name, rep_dict)
    
    # Skip the location string if found
    pos = name.find(':')
    if pos != -1:
        name = name[pos+1:]
        
    # For SMU, the resultant name needs to be in this format: "asr9k-px-4.2.1.CSCud90009".
    # However, on the device, the SMU is in this format: "asr9k-px-4.2.1.CSCud90009-1.0.0".
    pos = name.find(SMU_INDICATOR)
    if pos != -1:
        # Strip the -1.0.0 string if found
        try:
            # index may throw ValueError if substring not found
            pos2 = name.index('-', pos)
            if pos2 != -1:
                name = name[:pos2]
        except:
            pass
            
    return name
示例#13
0
def cleanLabels(text, formater=''):
    text = uni(text)
    dictresub = {'\[COLOR (.+?)\]' : '', '\[/COLOR\]' : '', '\[COLOR=(.+?)\]' : '', '\[color (.+?)\]': '',
                 '\[/color\]': '', '\[Color=(.+?)\]': '', '\[/Color\]': ''} 
 
    replacements = ((u"[]", u''), (u"[UPPERCASE]", u''),
                   (u"[/UPPERCASE]", u''), (u"[LOWERCASE]", u''),
                   (u"[/LOWERCASE]", u''), (u"[B]", u''), (u"[/B]", u''),
                   (u"[I]", u''), (u"[/I]", u''),
                   (u'[D]', u''), (u'[F]', u''),
                   (u"[CR]", u''), (u"[HD]", u''),
                   (u"()", u''), (u"[CC]", u''),
                   (u"[Cc]", u''), (u"[Favorite]", u""),
                   (u"[DRM]", u""), (u'(cc).', u''),
                   (u'(n)', u''), (u"(SUB)", u''),
                   (u"(DUB)", u''), (u'(repeat)', u''),
                   (u"(English Subtitled)", u""), (u"*", u""),
                   (u"\n", u""), (u"\r", u""),
                   (u"\t", u""), (u"\ ", u''),
                   (u"/ ", u''), (u"\\", u'/'),
                   (u"//", u'/'), (u'plugin.video.', u''),(u':', u''),
                   (u'plugin.audio.', u''))

    text = utils.multiple_reSub(text, dictresub)
    text = utils.multiple_replace(text, *replacements)
    text = re.sub('[\/:*?<>|!@#$/:]', '', text)
    if formater == 'title':
        text = text.title().replace("'S", "'s")
    elif formater == 'upper':
        text = text.upper()
    elif formater == 'lower':
        text = text.lower()
    else:
        text = text
        
    text = uni(text.strip())
    return text
示例#14
0
def get_smu_lookup_name(name):
    """
    Given a package name, try to derive a name which can be used to lookup a SMU or SP
    in the SMU meta file.

    However, there is no guarantee that the correct name can be derived. That depends
    on the given name if it is within the parsing criteria.
    """
    name = name.strip()
    package_type = get_package_type(name)
    if package_type != PackageType.SMU and package_type != PackageType.SERVICE_PACK:
        return name
    
    # The worst case scenario of the name could be "disk0:asr9k-px-4.2.1.CSCud90009-1.0.0.pie"
    # .smu is for NCS6K, .rpm is for ASR9K-64
    rep_dict = {'.pie': '', '.smu': '', '.rpm': ''}
    name = multiple_replace(name, rep_dict)
    
    # Skip the location string if found
    pos = name.find(':')
    if pos != -1:
        name = name[pos+1:]
        
    # For SMU, the resultant name needs to be in this format: "asr9k-px-4.2.1.CSCud90009".
    # However, on the device, the SMU is in this format: "asr9k-px-4.2.1.CSCud90009-1.0.0".
    pos = name.find(SMU_INDICATOR)
    if pos != -1:
        # Strip the -1.0.0 string if found
        try:
            # index may throw ValueError if substring not found
            pos2 = name.index('-', pos)
            if pos2 != -1:
                name = name[:pos2]
        except:
            pass
            
    return name
示例#15
0
文件: ingest.py 项目: vm/nba
 def replace_titles(title):
     return multiple_replace(
         title,
         {'%': 'P', '3': 'T', '+/-': 'PlusMinus'})
示例#16
0
 def test_replace(self):
     replacements = {'X': '3', 'Y': '2', 'Z': '1'}
     self.assertEquals(
         utils.multiple_replace('(X + Y) / (X-Y) = Z', replacements),
         '(3 + 2) / (3-2) = 1'
     )
示例#17
0
 def test_replace(self):
     replacements = {'X': '3', 'Y': '2', 'Z': '1'}
     self.assertEquals(
         utils.multiple_replace('(X + Y) / (X-Y) = Z', replacements),
         '(3 + 2) / (3-2) = 1')
示例#18
0
    except urllib2.URLError:
        # error loading URL, wait and try again
        print "trying to connect again.."
        time.sleep(5)
        continue

    html = res.read()
    soup = BeautifulSoup(html, "lxml")

    # kill all script and style elements
    for script in soup(["script", "style"]):
        script.extract()  # rip it out

    # get text
    text = soup.get_text()
    # extract second line of page (the one with the current score)
    matched_lines = [
        line for line in text.split('\n') if "Status: In Progress" in line
    ]
    # remove [] and 'u
    matched_lines = ''.join(matched_lines)
    matched_lines = multiple_replace(tokens, matched_lines)
    first_line = matched_lines.split('at ')
    other_lines = first_line[1].split(' In Progress ')

    # display score as a notification
    n = notify2.Notification(first_line[0] + " v " + other_lines[0],
                             other_lines[1])
    n.show()
    time.sleep(30)
示例#19
0
    try:
        res = response(req)
    except urllib2.URLError:
        # error loading URL, wait and try again
        print "trying to connect again.."
        time.sleep(5)
        continue

    html = res.read()
    soup = BeautifulSoup(html, "lxml")

    # kill all script and style elements
    for script in soup(["script", "style"]):
        script.extract()    # rip it out

    # get text
    text = soup.get_text()
    # extract second line of page (the one with the current score)
    matched_lines = [line for line in text.split('\n') if "Status: In Progress" in line]
    # remove [] and 'u
    matched_lines = ''.join(matched_lines)
    matched_lines = multiple_replace(tokens, matched_lines)
    first_line = matched_lines.split('at ')
    other_lines = first_line[1].split(' In Progress ')

    # display score as a notification
    n = notify2.Notification(first_line[0] + " v " + other_lines[0], other_lines[1])
    n.show()
    time.sleep(30)