Python re_search示例，cloudtb.textools.re_search Python示例

示例#1

0

显示文件

文件： wp_formatting.py 项目： bahuafeng/wordtex

    def __call__(self, texpart, *args, **kwargs):
        '''Have to do a call here only because the "item"s may or may not have
        an end point (particularily the last one). Have to create a start and stop 
        so it can be handled by process_inout'''
        
        # have to handle itemize and enumerate first for nested lists
        use_dict = {'itemize': begin_dict['itemize'],
                    'enumerate': begin_dict['enumerate']}
        texpart.no_update_text = False
        texpart.update_text(use_dict)
        regexp = re.compile(r'\\item ([\w\W]*?)(?=(\\item|$))')
        researched = []
        for n in texpart.text_data:
            if type(n) in (str, unicode):
#                pdb.set_trace()
                researched.extend(textools.re_search(regexp, n))
            else:
                researched.append(n)
        new_body = []
        for text in researched:
            if type(text) in (str, unicode, texlib.TexPart):
                new_body.append(text)
            else:
                self.count += 1
                assert( r'\end{itemize}' not in text.group(1))
                new_body.append(r'\startitem ' + text.group(1) + r'\enditem ')
#                need = r'\end{itemize}'
#                if need in text.text:
#                    new_body.append(text.text[text.text.find(need):])
        texpart.text_data = texlib.reform_text(new_body, no_indicators = True)
        
        line_items = [
        ['item'         ,tp(add_outside = ('<li>','</li>'), 
                        no_outer_pgraphs = True)],
        ]
        use_dict = build_dict('list_call', line_items, r'\\start{0} ', None, 
                              r'\\end{0}')
        texpart.update_text(use_dict = use_dict)
        texpart.update_text()

示例#2

0

显示文件

文件： RegExp.py 项目： vitiral/SearchTheSky

    def update_formatting(self):
        self._disable_signals = True
        self.clear_error()

        rsearch_rtext = researched_richtext
        self._update = False
        # print 'Updating', time.time()
        qtpos = self.get_text_cursor_pos()  # visible pos
        # print 'Got pos', qtpos
        raw_html = self.getHtml()
        # we need to get the "True Position", i.e. the position without
        # our formats added in. I think this is the best way to do it
        deformated = richtext.deformat_html(
            raw_html, (richtext.KEEPIF["black-bold"], richtext.KEEPIF["red-underlined-bold"])
        )
        deformated_str = richtext.get_str_formated_true(deformated)

        #            assert(len(deformated_str) <= len(self.getText()))
        true_pos = richtext.get_position(deformated, visible_position=qtpos)[0]

        regexp = self.get_regexp()
        try:
            re.compile(regexp)
        except Exception as E:
            pass
        else:
            self.Replace_groups_model.set_groups(textools.get_regex_groups(regexp))

        #            import pprint
        #            pprint.pprint(self.Replace_groups_model.data)

        error = None
        # These slow it down alot and are not really useful. Just
        # display an error
        if regexp == ".":
            error = "'.' -- Matches everything, not displayed"
        elif regexp == "\w":
            error = "'\w' -- Matches all characters, not displayed"
        elif regexp == "":
            error = "'' -- Results not displayed, matches between every" " character."
        else:
            try:
                researched = textools.re_search(regexp, deformated_str)
                if len(researched) == 1 and type(researched[0]) == str:
                    error = "No Match Found"
            except re.sre_compile.error as E:
                error = str(E)
        if error:
            print error
            self.set_error(error)
            # believe it or not, setText will add formating!
            # have to explicitly set html
            self.setText(deformated_str)
            print "er setting pos", true_pos
            self.set_text_cursor_pos(true_pos, no_anchor=True)
            self._disable_signals = False
            return

        # Set the html to the correct values
        if self.Radio_match.isChecked():
            print "doing match"
            html_list = rsearch_rtext.re_search_format_html(researched)
        else:
            print "doing replace"
            rlist = self.get_replace()
            replaced = textools.re_search_replace(researched, rlist, preview=True)
            html_list = rsearch_rtext.re_search_format_html(replaced)

        raw_html = richtext.get_str_formated_html(html_list)
        self.setHtml(raw_html)

        visible_pos = richtext.get_position(html_list, true_position=true_pos)[1]
        print "new visible pos", visible_pos
        self.set_text_cursor_pos(visible_pos, no_anchor=True)

        self._researched = researched
        self._html_list = html_list
        self._disable_signals = False

示例#3

0

显示文件

文件： texlib.py 项目： bahuafeng/wordtex

def get_text_data(text_objects, texpart_constructor, return_first = False):
    '''
    This is the primary function for converting data into TexParts.
    Inputs:
        text_objects - list of strings and TexParts, must have been formated by
            reform_text
        texpart_constructor - the constructor used, normally defined in
            a list in wp_formatting.py
        return_first - only used by the get_document function, returns only
            the first object found.
    Output:
        returns the fully created text_data that is held in all TexPart objects
        
    The internal workings are as follows:    
    given the matches, creates it in a readable array
    (2, txt3),  # value was the first inside start of group    
    (True, txt1),
    (True, txt2),
    (True, TxtPart),
    (3, txt4),  # text was the final end of group
    (False, txt2),
    (False, TxtPart),
    etc...
    
    where True means that the text is inside your match parameters and False
    means they are outside. 2 and 3 are documented above.
    
    Note, the inside list takes precedence over the starter list, and the starter
    list takes precedence over the end list.
    This means that if something matches inside it will not match starter, etc.
    It is best to make your "insides" specific and not use special re 
    characters like .* etc.
    
    If a starters is imbeded in an inside, it is considered inside. For instance
    /iffase /ifblog no hello to world /fi /fi -- ifblog will be inside of /iffalse
    '''
    inside_list, starters_list, end_list = texpart_constructor.match_re
    re_in = textools.re_in
    
    # error checking on file
    match_cmp = re.compile('|'.join(inside_list + starters_list + end_list))
    
    # split up text for compiling
    splited = []
    for tobj in text_objects:
        if type(tobj) != str:
            splited.append(tobj)
        else:
            researched = textools.re_search(match_cmp, tobj)
            splited.extend(textools.get_iter_str_researched(researched))
        
    inside = [re.compile(m) for m in inside_list]
    starter = [re.compile(m) for m in starters_list]
    end = [re.compile(m) for m in end_list]
    
    num_in = 0
    set_num = None
    inout = []
    #TODO: It has to match arbitrary if statements. I think this should be
    # pretty easy
    for txt in splited:
#        if type(txt) == str and 'Garrett' in txt and 'section' in texpart_constructor.label:
#            print texpart_constructor.label
#            pdb.set_trace()
        assert(num_in >= 0)
        if txt in (None, ''):
            continue
        elif type(txt) == TexPart:
            pass    # TexParts have been alrady processed.
        elif re_in(txt, inside):
            if num_in == 0:
                set_num = 2
            num_in += 1
        elif num_in > 0 and re_in(txt, starter):
            # i.e. if you wrote something like /iffalse /ifblog
            num_in += 1
        elif num_in > 0 and re_in(txt, end):
            # make sure we only count ends if you are removing!
            num_in -= 1
            if num_in == 0:
                set_num = 3
        
        if set_num:
            inout.append((set_num, txt))
            set_num = None
        elif num_in > 0:
            inout.append((True, txt))
        else:
            inout.append((False, txt))
    
    return convert_inout(inout, texpart_constructor, 
                         return_first=return_first)

示例#4

0

显示文件

文件： RegExp.py 项目： vitiral/SearchTheSky

 def init_node_researched(self, node):
     if node.researched != None:
         return
     with open(node.full_path) as f:
         text = f.read()
     node.researched = textools.re_search(self._regexp_text, text)