示例#1
0
	def reset(self):
		self.description = ''
		self.title = ''
		self.elementName = ''
		self.puredata = []
		self.printp = True
		HTMLParser.reset(self)
示例#2
0
    def _reset(self, style):
        '''reset the parser'''

        HTMLParser.reset(self)
        # initialize list of string segments to empty
        self.errors = []
        self._style = style
        self._iReset()
示例#3
0
 def reset(self):
     HTMLParser.reset(self)
     self.synonyms = []
     self.possible_synonyms = []
     self._div_class_p = False
     self._tyda_trans_intro = False
     self._tyda_assoc_word = False
     self._possible_synonym = False
     self._stack = []
示例#4
0
文件: extract.py 项目: fnl/libfnl
 def reset(self):
     """
     Reset the parser for feeding a new document.
     """
     HTMLParser.reset(self)
     self._string = []
     self._elements = []
     self._ignored = []
     self.tags = dict()
示例#5
0
    def reset(self):
        HTMLParser.reset(self)
        self.result = ""
        self.list_stack = []
        self.verbatim = 0

        self.links = []
        self.link_text = ""
        self.link_href = ""
        self.link_open = False
示例#6
0
    def __init__(self, filters):
        HTMLParser.__init__(self)
        HTMLParser.reset(self)
        self._stack = []
        self._flag_pool = {}
        self._value_pool = {}

        self._starttag_handlers = []
        self._endtag_handlers = []
        self._data_handlers = []
        self._install_filters(filters)
示例#7
0
def remove_html(text):
    text = re.sub('<[^<]+?>', '', text)
    text = text.replace('&lt;', '<');
    text = text.replace('&gt;', '>');
    return text
    s = HTMLParser()
    s.reset()
    s.reset()
    s.strict = False
    s.convert_charrefs = True
    s.fed = []
    s.feed(text)
    return ''.join(s.fed)
示例#8
0
def remove_html(text):
    text = re.sub("<[^<]+?>", "", text)
    text = text.replace("&lt;", "<")
    text = text.replace("&gt;", ">")
    return text
    s = HTMLParser()
    s.reset()
    s.reset()
    s.strict = False
    s.convert_charrefs = True
    s.fed = []
    s.feed(text)
    return "".join(s.fed)
 def reset(self):
     HTMLParser.reset(self)
     self.extracting = False
     self.links = []
示例#10
0
 def reset(self):
     HTMLParser.reset(self)
     self._cleaned = ''
示例#11
0
 def reset(self):
     HTMLParser.reset(self)
     self._stack = []
     self._flag_pool = {}
     self._value_pool = {}
示例#12
0
 def reset(self):
     """Reset state of URLLister"""
     HTMLParser.reset(self)
     self.urls = []
示例#13
0
 def reset(self):
     self.data = []
     HTMLParser.reset(self)
示例#14
0
 def reset(self):
     HTMLParser.reset(self)
     self.title = None
     self.timestamp = None
     self.curclass = None
示例#15
0
文件: parsers.py 项目: mnjstwins/riko
 def reset(self):
     HTMLParser.reset(self)
     self.data = StringIO()
 def reset(self):
     HTMLParser.reset(self)
     self.data = []
     self.p = []
示例#17
0
 def reset(self):
     HTMLParser.reset(self)
     self.sounds = []
示例#18
0
 def reset(self):
     HTMLParser.reset(self)
     self.sounds = []
     self.prev_tag = ""
示例#19
0
文件: autorss.py 项目: Fuzzwah/riko
 def reset(self):
     HTMLParser.reset(self)
     self.entry = []
示例#20
0
	def reset(self):
		self.found = False
		self.matches = []
		HTMLParser.reset(self)
示例#21
0
class HyperlinkManager(HTMLParser):
    def __init__(self, text):
        self.text = text
        self.text.tag_config("hyper", foreground="blue", underline=1)
        self.text.tag_bind("hyper", "<Enter>", self._enter)
        self.text.tag_bind("hyper", "<Leave>", self._leave)
        self.text.tag_bind("hyper", "<Button-1>", self._click)
        bold_font = font.Font(text, self.text.cget("font"))
        bold_font.configure(weight="bold")
        self.text.tag_config("bold", font=bold_font)
        italic_font = font.Font(text, self.text.cget("font"))
        italic_font.configure(slant="italic")
        self.text.tag_config("italic", font=italic_font)
        self.text.tag_config("underline", underline=1)
        self.parser = HTMLParser()
        self.parser.handle_starttag = self.handle_starttag
        self.parser.handle_endtag = self.handle_endtag
        self.parser.handle_data = self.handle_data
        self.reset()

    def reset(self):
        self.links = {}
        self.colors = []
        self.bgs = []
        self.colorbgs = []

    def add(self, action):
        # add an action to the manager.  returns tags to use in
        # associated text widget
        tag = "hyper-{}".format(len(self.links))
        self.links[tag] = action
        return "hyper", tag

    def color(self, color):
        tag = "color-{}".format(color)
        if tag not in self.colors:
            self.colors.append(tag)
            self.text.tag_config(tag, foreground=color)
            self.text.tag_raise("hyper")
        return (tag, )

    def bg(self, color):
        tag = "bg-{}".format(color)
        if tag not in self.bgs:
            self.bgs.append(tag)
            self.text.tag_config(tag, background=color)
            self.text.tag_raise("hyper")
        return (tag, )

    def colorbg(self, color, bg):
        tag = "colorbg-{}|{}".format(color, bg)
        if tag not in self.colorbgs:
            self.colorbgs.append(tag)
            self.text.tag_config(tag, foreground=color, background=bg)
            self.text.tag_raise("hyper")
        return (tag, )

    def _enter(self, event):
        self.text.config(cursor="hand2")

    def _leave(self, event):
        self.text.config(cursor="")

    def _click(self, event):
        for tag in self.text.tag_names(tkinter.CURRENT):
            if tag[:6] == "hyper-":
                self.links[tag]()
                return

    def handle_starttag(self, tag, attrs):
        tagmap = {"b": "bold", "i": "italic", "u": "underline"}
        if tag in tagmap:
            self.parser_tags.append([tagmap[tag]])
        elif tag == "a":
            ref = ""
            for k, v in attrs:
                if k == "href":
                    ref = v
            self.parser_tags.append(self.add(self.parser_handler(ref)))
        elif tag == "font":
            color = ""
            bg = ""
            for k, v in attrs:
                if k == "color":
                    color = v
                elif k == "bg":
                    bg = v
            if color and bg:
                self.parser_tags.append(self.colorbg(color, bg))
            elif bg:
                self.parser_tags.append(self.bg(bg))
            else:
                self.parser_tags.append(self.color(color))

    def handle_endtag(self, tag):
        self.parser_tags = self.parser_tags[:-1]

    def handle_data(self, data):
        self.parser_widget.insert(tkinter.INSERT, data, \
            tuple(reversed([x for x in self.parser_tags for x in x])))

    def add_markup(self, text, widget, handler):
        self.parser_tags = []
        self.parser_widget = widget
        self.parser_handler = handler
        self.parser.reset()
        self.parser.feed(text)
        return
示例#22
0
文件: text.py 项目: ww9rivers/c9r
 def reset(self):
     HTMLParser.reset(self)
     self.bodymode = True
     self.segments = []
     return self
示例#23
0
 def reset(self):
     HTMLParser.reset(self)
     self.links = iter([])
示例#24
0
 def reset(self):
     self.output = ""
     HTMLParser.reset(self)
示例#25
0
 def reset(self):
     HTMLParser.reset(self)
     self.links = set()
示例#26
0
	def reset(self):
		self.__links = []
		self.__title = ''
		self.__get_title = False
		HTMLParser.reset(self)
示例#27
0
 def reset(self):
     self.indent = 0
     self.active_tags = []
     self.do_lstrip = False
     self.end_ul = False
     HTMLParser.reset(self)
示例#28
0
 def reset(self):
     HTMLParser.reset(self)
     self._level = 0
     self._parsed_cards = []
     self._current_card = []
     self._rules_text = ''
 def reset(self):
     HTMLParser.reset(self)
     self.links = []
示例#30
0
 def reset(self):
     HTMLParser.reset(self)
     self.script_json_data = []
     self._found_script_json = False
示例#31
0
 def reset(self):
     self.pieces = []
     HTMLParser.reset(self)
示例#32
0
    def reset(self):
        self.search_results = False
        self.line = []
        self.lines = []

        HTMLParser.reset(self)
示例#33
0
 def reset(self):
   self._data_buffer = []
   HTMLParser.reset(self)
示例#34
0
 def reset(self):
     HTMLParser.reset(self)  # 注意顺序
     self.url_set = set()
示例#35
0
 def reset(self):
     HTMLParser.reset(self)
     self._cleaned = ''
示例#36
0
文件: parser.py 项目: wruibo/ispider
 def reset(self):
     HTMLParser.reset(self)
示例#37
0
 def reset(self):
     HTMLParser.reset(self)
     self.tags = []
 def reset(self):
     self.pieces = []
     self.state = ""
     self.buf = []
     self.inli = 0
     HTMLParser.reset(self)
 def reset(self):
     self.allData = []
     HTMLParser.reset(self)
示例#40
0
 def reset(self):
     HTMLParser.reset(self)
     self.objstack = []
     self.tagstack = []
     self.toplevel_objects = []
示例#41
0
	def reset(self):
		HTMLParser.reset(self)
		self.urls = []
示例#42
0
 def reset(self):
     HTMLParser.reset(self)
     self.entry = iter([])
示例#43
0
 def reset(self):
     """Reset state of URLLister"""
     HTMLParser.reset(self)
     self.urls = []
示例#44
0
 def reset(self):
     ''' 重置HTMLParser实例 '''
     self.text = None
     self.flag = 'start'
     self.ips = []
     HTMLParser.reset(self)
示例#45
0
 def reset(self):
     self._data_buffer = []
     HTMLParser.reset(self)
 def reset(self):
     del self.tags[0:len(self.tags)]
     self.name = ''
     HTMLParser.reset(self)
示例#47
0
 def reset(self):
     HTMLParser.reset(self)
     self.data = {}
     self.relay = ""
     self.login = False
示例#48
0
 def reset(self):
     self.__currentKey = None
     self.__th = False
     self.airportInfo = LocationInfo(0, 0)
     return HTMLParser.reset(self)
示例#49
0
 def reset(self):
     HTMLParser.reset(self)
     self.parts=[]
     self.tags=[]
 def reset(self):
     HTMLParser.reset(self)
     self.orders = []
     self.recording = 0
     self.rawStats = []
示例#51
0
 def reset(self):
     HTMLParser.reset(self)
     self.title = None
     self.timestamp = None
     self.curclass = None
    def reset(self):
        HTMLParser.reset(self)

        self.base_url = None
        self.current_link = None
        self.links = []
示例#53
0
 def reset(self):
     HTMLParser.reset(self)
     self._xml = []
示例#54
0
 def reset(self):
     HTMLParser.reset(self)
     self.sounds = []
     self.prev_tag = ""
示例#55
0
文件: parsers.py 项目: nerevu/riko
 def reset(self):
     HTMLParser.reset(self)
     self.data = StringIO()
示例#56
0
 def reset(self):
     self.pieces = []
     self.encoding = None
     HTMLParser.reset(self)
示例#57
0
 def reset(self):
     self.encoding = None
     HTMLParser.reset(self)
示例#58
0
 def reset(self):
     HTMLParser.reset(self)
     self.out = []
示例#59
0
 def reset(self):
     HTMLParser.reset(self)
     self.out = []
 def reset(self):
     HTMLParser.reset(self)
     self.level_stack = []