Python toStr примеры, pyglossary.text_utils.toStr Python примеры использования

Пример #1

0

Показать файл

Файл: ui_tk.py Проект: tughluq/pyglossary

 def combobox_o_changed(self, event):
     #log.debug(self.combobox_o.get())
     formatD = self.combobox_o.get()
     if formatD == noneItem:
         return
     format = Glossary.descFormat[formatD]
     """
     if format=='Omnidic':
         self.xml.get_widget('label_omnidic_o').show()
         self.xml.get_widget('spinbutton_omnidic_o').show()
     else:
         self.xml.get_widget('label_omnidic_o').hide()
         self.xml.get_widget('spinbutton_omnidic_o').hide()
     if format=='Babylon':
         self.xml.get_widget('label_enc').show()
         self.xml.get_widget('comboentry_enc').show()
     else:
         self.xml.get_widget('label_enc').hide()
         self.xml.get_widget('comboentry_enc').hide()
     """
     if self.pref['auto_set_out']:  #format==None:
         pathI = toStr(self.entry_i.get())
         pathO = toStr(self.entry_o.get())
         formatOD = self.combobox_o.get()
         if formatOD != None and not pathO and '.' in pathI:
             extO = Glossary.descExt[formatOD]
             pathO = ''.join(os.path.splitext(pathI)[:-1]) + extO
             #self.entry_o.delete(0, 'end')
             self.entry_o.insert(0, pathO)

Пример #2

0

Показать файл

Файл: ui_tk.py Проект: flyhawk007/pyglossary

 def combobox_o_changed(self, event):
     #log.debug(self.combobox_o.get())
     formatD = self.combobox_o.get()
     if formatD==noneItem:
         return
     format = Glossary.descFormat[formatD]
     '''if format=='Omnidic':
         self.xml.get_widget('label_omnidic_o').show()
         self.xml.get_widget('spinbutton_omnidic_o').show()
     else:
         self.xml.get_widget('label_omnidic_o').hide()
         self.xml.get_widget('spinbutton_omnidic_o').hide()
     if format=='Babylon':
         self.xml.get_widget('label_enc').show()
         self.xml.get_widget('comboentry_enc').show()
     else:
         self.xml.get_widget('label_enc').hide()
         self.xml.get_widget('comboentry_enc').hide()
     if format=='Stardict':
         self.xml.get_widget('checkb_o_ext').show()
     else:
         self.xml.get_widget('checkb_o_ext').hide()'''
     if self.pref['auto_set_out']:#format==None:
         pathI = toStr(self.entry_i.get())
         pathO = toStr(self.entry_o.get())
         formatOD = self.combobox_o.get()
         if formatOD != None and not pathO and '.' in pathI:
             extO=Glossary.descExt[formatOD]
             pathO=''.join(os.path.splitext(pathI)[:-1])+extO
             #self.entry_o.delete(0, 'end')
             self.entry_o.insert(0, pathO)

Пример #3

0

Показать файл

Файл: ui_tk.py Проект: codeaten/pyglossary

 def convert(self):
     if len(self.glos)==0:
         log.error('Input glossary has no word! Be sure to click "Load" before "Convert", '+\
             'or just click "Apply" instead.')
         return False
     oPath = toStr(self.entry_o.get())
     if not oPath:
         log.critical('Output file path is empty!');return
     formatD = self.combobox_o.get()
     if formatD in (noneItem, ''):
         log.critical('Output format is empty!');return
     log.info('Converting to %s, please wait...'%formatD)
     #while gtk.events_pending():#??????????
     #    gtk.main_iteration_do(False)
     self.running = True
     format = Glossary.descFormat[formatD]
     t0 = time.time()
     """
     if format=='Omnidic':
         dicIndex=self.xml.get_widget('spinbutton_omnidic_o').get_value_as_int()
         self.glos.writeOmnidic(oPath, dicIndex=dicIndex)
     elif format=='Babylon':
         encoding = self.xml.get_widget('comboentry_enc').get_active_text()
         self.glos.writeBabylon(oPath, encoding=encoding)
     else:"""##???????????????????????
     self.glos.write(oPath, format=format)
     #self.oFormat = format
     self.oPath = oPath
     log.info('writing %s file: "%s" done.'%(format, oPath))
     log.info('time left = %3f seconds'%(time.time()-t0))
     self.running = False
     return True

Пример #4

0

Показать файл

Файл: ui_tk.py Проект: tughluq/pyglossary

 def convert(self):
     oPath = toStr(self.entry_o.get())
     if not oPath:
         log.critical('Output file path is empty!')
         return
     formatD = self.combobox_o.get()
     if formatD in (noneItem, ''):
         log.critical('Output format is empty!')
         return
     log.info('Converting to %s, please wait...' % formatD)
     #while gtk.events_pending():#??????????
     #    gtk.main_iteration_do(False)
     self.running = True
     format = Glossary.descFormat[formatD]
     t0 = time.time()
     """
     if format=='Omnidic':
         dicIndex=self.xml.get_widget('spinbutton_omnidic_o').get_value_as_int()
         self.glos.writeOmnidic(oPath, dicIndex=dicIndex)
     elif format=='Babylon':
         encoding = self.xml.get_widget('comboentry_enc').get_active_text()
         self.glos.writeBabylon(oPath, encoding=encoding)
     else:"""##???????????????????????
     self.glos.write(oPath, format=format)
     #self.oFormat = format
     self.oPath = oPath
     log.info('writing %s file: "%s" done.' % (format, oPath))
     log.info('time left = %3f seconds' % (time.time() - t0))
     self.running = False
     return True

Пример #5

0

Показать файл

Файл: ui_tk.py Проект: flyhawk007/pyglossary

 def entry_changed(self, event=None):
     #log.debug('entry_changed')
     #char = event.keysym
     pathI = toStr(self.entry_i.get())
     if self.pathI != pathI:
         formatD = self.combobox_i.get()
         if len(pathI)>7:
             if pathI[:7]=='file://':
                 pathI=urlToPath(pathI)
                 self.entry_i.delete(0, 'end')
                 self.entry_i.insert(0, pathI)
         if self.pref['auto_set_for']:#format==noneItem:
             ext = os.path.splitext(pathI)[-1].lower()
             if ext in ('.gz', '.bz2', '.zip'):
                 ext = os.path.splitext(pathI[:-len(ext)])[-1].lower()
             for i in xrange(len(Glossary.readExt)):
                 if ext in Glossary.readExt[i]:
                     self.combobox_i.set(Glossary.readDesc[i])
                     break
         if self.pref['auto_set_out']:#format==noneItem:
             #pathI = self.entry_i.get()
             formatOD = self.combobox_o.get()
             pathO = toStr(self.entry_o.get())
             if formatOD != noneItem and not pathO and '.' in pathI:
                 extO=Glossary.descExt[formatOD]
                 pathO=''.join(os.path.splitext(pathI)[:-1])+extO
                 self.entry_o.delete(0, 'end')
                 self.entry_o.insert(0, pathO)
         self.pathI = pathI
     ##############################################
     pathO = toStr(self.entry_o.get())
     if self.pathO!=pathO:
         formatD = self.combobox_o.get()
         if len(pathO)>7:
             if pathO[:7]=='file://':
                 pathO=urlToPath(pathO)
                 self.entry_o.delete(0, 'end')
                 self.entry_o.insert(0, pathO)
         if self.pref['auto_set_for']:#format==noneItem:
             ext = os.path.splitext(pathO)[-1].lower()
             if ext in ('.gz', '.bz2', '.zip'):
                 ext = os.path.splitext(pathO[:-len(ext)])[-1].lower()
             for i in xrange(len(Glossary.writeExt)):
                 if ext in Glossary.writeExt[i]:
                     self.combobox_o.set(Glossary.writeDesc[i])
                     break
         self.pathO = pathO

Пример #6

0

Показать файл

Файл: ui_tk.py Проект: tughluq/pyglossary

 def entry_changed(self, event=None):
     #log.debug('entry_changed')
     #char = event.keysym
     pathI = toStr(self.entry_i.get())
     if self.pathI != pathI:
         formatD = self.combobox_i.get()
         if len(pathI) > 7:
             if pathI[:7] == 'file://':
                 pathI = urlToPath(pathI)
                 self.entry_i.delete(0, 'end')
                 self.entry_i.insert(0, pathI)
         if self.pref['auto_set_for']:  #format==noneItem:
             ext = os.path.splitext(pathI)[-1].lower()
             if ext in ('.gz', '.bz2', '.zip'):
                 ext = os.path.splitext(pathI[:-len(ext)])[-1].lower()
             for i in range(len(Glossary.readExt)):
                 if ext in Glossary.readExt[i]:
                     self.combobox_i.set(Glossary.readDesc[i])
                     break
         if self.pref['auto_set_out']:  #format==noneItem:
             #pathI = self.entry_i.get()
             formatOD = self.combobox_o.get()
             pathO = toStr(self.entry_o.get())
             if formatOD != noneItem and not pathO and '.' in pathI:
                 extO = Glossary.descExt[formatOD]
                 pathO = ''.join(os.path.splitext(pathI)[:-1]) + extO
                 self.entry_o.delete(0, 'end')
                 self.entry_o.insert(0, pathO)
         self.pathI = pathI
     ##############################################
     pathO = toStr(self.entry_o.get())
     if self.pathO != pathO:
         formatD = self.combobox_o.get()
         if len(pathO) > 7:
             if pathO[:7] == 'file://':
                 pathO = urlToPath(pathO)
                 self.entry_o.delete(0, 'end')
                 self.entry_o.insert(0, pathO)
         if self.pref['auto_set_for']:  #format==noneItem:
             ext = os.path.splitext(pathO)[-1].lower()
             if ext in ('.gz', '.bz2', '.zip'):
                 ext = os.path.splitext(pathO[:-len(ext)])[-1].lower()
             for i in range(len(Glossary.writeExt)):
                 if ext in Glossary.writeExt[i]:
                     self.combobox_o.set(Glossary.writeDesc[i])
                     break
         self.pathO = pathO

Пример #7

0

Показать файл

 def get_prefix(self, word: str) -> str:
     length = self._group_by_prefix_length
     if not word:
         return None
     word = toStr(word)
     if "Z" < word[0] < "a":
         return "SPECIAL"
     return word[:length]

Пример #8

0

Показать файл

Файл: bgl_reader_debug.py Проект: linzhp/pyglossary

	def samplesDumpFileWrite(self, text):
		text = toStr(text)
		if self.samplesDumpFile:
			offset = self.samplesDumpFile.tell()
			self.samplesDumpFile.write("\noffset = {0:#X}\n" % offset)
			self.samplesDumpFile.write(text + "\n")
		else:
			log.debug(text)

Пример #9

0

Показать файл

Файл: bgl_reader_debug.py Проект: xiaoke912/pyglossary

 def samplesDumpFileWrite(self, text):
     text = toStr(text)
     if self.samplesDumpFile:
         offset = self.samplesDumpFile.tell()
         self.samplesDumpFile.write(f"\noffset = {offset:#02x}\n")
         self.samplesDumpFile.write(text + "\n")
     else:
         log.debug(text)

Пример #10

0

Показать файл

Файл: bgl_reader_debug.py Проект: xiaoke912/pyglossary

 def msgLogFileWrite(self, text):
     text = toStr(text)
     if self.msgLogFile:
         offset = self.msgLogFile.tell()
         # print offset in the log file to facilitate navigating this
         # log in hex editor
         # intended usage:
         # the log file is opened in a text editor and hex editor
         # use text editor to read error messages, use hex editor to
         # inspect char codes offsets allows to quickly jump to the right
         # place of the file hex editor
         self.msgLogFile.write(f"\noffset = {offset:#02x}\n")
         self.msgLogFile.write(text + "\n")
     else:
         log.debug(text)

Пример #11

0

Показать файл

def get_prefix(word, length):
    """
	Return the prefix for the given word,
	of length length.

	:param word: the word string
	:type  word: unicode
	:param length: prefix length
	:type  length: int
	:rtype: unicode
	"""
    if not word:
        return None
    word = toStr(word)
    if "Z" < word[0] < "a":
        return "SPECIAL"
    return word[:length]  ## return a unicode? FIXME

Пример #12

0

Показать файл

Файл: ui_tk.py Проект: jeffzfw/pyglossary

 def load(self):
     iPath = toStr(self.entry_i.get())
     if not iPath:
         printAsError('Input file path is empty!')
         return
     formatD = self.combobox_i.get()
     if formatD == noneItem:
         #printAsError('Input format is empty!');return
         format = ''
         print('Please wait...')
     else:
         format = Glossary.descFormat[formatD]
         print('Reading from %s, please wait...' % formatD)
     #while gtk.events_pending():#??????????????
     #    gtk.main_iteration_do(False)
     t0 = time.time()
     '''
     if formatD[:7]=='Omnidic':
         dicIndex=self.xml.get_widget('spinbutton_omnidic_i').get_value_as_int()
         ex = self.glos.readOmnidic(iPath, dicIndex=dicIndex)
     elif formatD[:8]=='StarDict' and self.checkb_i_ext.get_active():
         ex = self.glos.readStardict_ext(iPath)
     else:'''
     ex = self.glos.read(iPath, format=format)
     if ex:
         print('reading %s file: "%s" done.\n%d words found.' % (
             format,
             iPath,
             len(self.glos.data),
         ))
     else:
         print('reading %s file: "%s" failed.' % (format, iPath))
         return False
     #self.iFormat = format
     self.iPath = iPath
     #self.button_conv.set_sensitive(True)
     self.glos.uiEdit()
     self.progress(1.0, 'Loading Comleted')
     if self.checkb_o_det.get():  #?????????
         print('time left = %3f seconds' % (time.time() - t0))
         for x in self.glos.info:
             print('%s="%s"' % (x[0], x[1]))
     return True

Пример #13

0

Показать файл

Файл: ui_tk.py Проект: flyhawk007/pyglossary

 def load(self):
     iPath = toStr(self.entry_i.get())
     if not iPath:
         log.error('Input file path is empty!');return
     formatD = self.combobox_i.get()
     if formatD==noneItem:
         #log.error('Input format is empty!');return
         format=''
         log.info('Please wait...')
     else:
         format = Glossary.descFormat[formatD]
         log.info('Reading from %s, please wait...'%formatD)
     #while gtk.events_pending():#??????????????
     #    gtk.main_iteration_do(False)
     t0=time.time()
     '''
     if formatD[:7]=='Omnidic':
         dicIndex=self.xml.get_widget('spinbutton_omnidic_i').get_value_as_int()
         ex = self.glos.readOmnidic(iPath, dicIndex=dicIndex)
     elif formatD[:8]=='StarDict' and self.checkb_i_ext.get_active():
         ex = self.glos.readStardict_ext(iPath)
     else:'''
     ex = self.glos.read(iPath, format=format)
     if ex:
         log.info('reading %s file: "%s" done.\n%d words found.'%(
             format,
             iPath,
             len(self.glos.data),
         ))
     else:
         log.error('reading %s file: "%s" failed.'%(format, iPath))
         return False
     #self.iFormat = format
     self.iPath = iPath
     #self.button_conv.set_sensitive(True)
     self.glos.uiEdit()
     self.progress(1.0, 'Loading Comleted')
     if self.checkb_o_det.get():#?????????
         log.info('time left = %3f seconds'%(time.time()-t0))
         for x in self.glos.info:
             log.info('%s="%s"'%(x[0], x[1]))
     return True

Пример #14

0

Показать файл

Файл: bgl_reader_debug.py Проект: xiaoke912/pyglossary

 def processDefiStat(self, fields, b_defi, b_key):
     if fields.singleEncoding:
         self.findAndPrintCharSamples(
             fields.b_defi,
             f"defi, key = {b_key}",
             fields.encoding,
         )
         if self.metadata2:
             self.metadata2.defiProcessedCount += 1
             if isASCII(toStr(fields.b_defi)):
                 self.metadata2.defiAsciiCount += 1
             try:
                 fields.b_defi.decode("utf-8")
             except UnicodeError:
                 pass
             else:
                 self.metadata2.defiUtf8Count += 1
     if self.metadata2 and self.metadata2.isDefiASCII:
         if not isASCII(fields.u_defi):
             self.metadata2.isDefiASCII = False

Пример #15

0

Показать файл

Файл: ui_tk.py Проект: jeffzfw/pyglossary

 def convert(self):
     if len(self.glos.data) == 0:
         printAsError('Input glossary has no word! Be sure to click "Load" before "Convert", '+\
             'or just click "Apply" instead.')
         return False
     oPath = toStr(self.entry_o.get())
     if not oPath:
         printAsError('Output file path is empty!')
         return
     formatD = self.combobox_o.get()
     if formatD in (noneItem, ''):
         printAsError('Output format is empty!')
         return
     print('Converting to %s, please wait...' % formatD)
     #while gtk.events_pending():#??????????
     #    gtk.main_iteration_do(False)
     self.running = True
     format = Glossary.descFormat[formatD]
     t0 = time.time()
     '''
     if format=='Stardict':
         if self.xml.get_widget('checkb_o_ext').get_active():
             self.glos.writeStardict(oPath)
         else:
             self.glos.writeStardict_int(oPath)
     elif format=='Omnidic':
         dicIndex=self.xml.get_widget('spinbutton_omnidic_o').get_value_as_int()
         self.glos.writeOmnidic(oPath, dicIndex=dicIndex)
     elif format=='Babylon':
         encoding = self.xml.get_widget('comboentry_enc').get_active_text()
         self.glos.writeBabylon(oPath, encoding=encoding)
     else:'''##???????????????????????
     self.glos.write(oPath, format=format)
     #self.oFormat = format
     self.oPath = oPath
     print('writing %s file: "%s" done.' % (format, oPath))
     if self.checkb_o_det.get():  #???????
         print('time left = %3f seconds' % (time.time() - t0))
     self.running = False
     return True

Пример #16

0

Показать файл

def _mktitle(title_element, include_opts=()):
    title = title_element.text
    opt_i = -1
    for c in title_element:
        if c.tag == 'nu' and c.tail:
            if title:
                title += c.tail
            else:
                title = c.tail
        if c.tag == 'opt':
            opt_i += 1
            if opt_i in include_opts:
                if title:
                    title += c.text
                else:
                    title = c.text
            if c.tail:
                if title:
                    title += c.tail
                else:
                    title = c.tail
    return toStr(title.strip())

Пример #17

0

Показать файл

Файл: bgl_reader_debug.py Проект: xiaoke912/pyglossary

 def rawDumpFileWriteData(self, text):
     text = toStr(text)
     # the next function escapes too many chars, for example, it escapes äöü
     # self.rawDumpFile.write(text.encode("unicode_escape"))
     if self.rawDumpFile:
         self.rawDumpFile.write(text)

Пример #18

0

Показать файл

Файл: bgl_reader_debug.py Проект: xiaoke912/pyglossary

 def rawDumpFileWriteText(self, text):  # FIXME
     text = toStr(text)
     if self.rawDumpFile:
         self.rawDumpFile.write(text)

Пример #19

0

Показать файл

Файл: _dict.py Проект: Forwardboy009/pyglossary

def format_clean_content(title, body, BeautifulSoup):
    # heavily integrated with output of dsl reader plugin!
    # and with xdxf also.
    """
    :param title: str | None
    """

    # class="sec" => d:priority="2"
    # style="color:steelblue" => class="ex"
    # class="p" style="color:green" => class="p"
    # style="color:green" => class="c"
    # style="margin-left:{}em" => class="m{}"
    # <s> => <del>

    # xhtml is strict
    if BeautifulSoup:
        soup = BeautifulSoup.BeautifulSoup(body, "lxml", from_encoding='utf-8')
        # difference between 'lxml' and 'html.parser'
        if soup.body:
            soup = soup.body

        for tag in soup(class_='sec'):
            tag['class'].remove('sec')
            if not tag['class']:
                del tag['class']
            tag['d:priority'] = "2"
        for tag in soup(lambda x: 'color:steelblue' in x.get('style', '')):
            remove_style(tag, 'color:steelblue')
            if 'ex' not in tag.get('class', []):
                tag['class'] = tag.get('class', []) + ['ex']
        for tag in soup(is_green):
            remove_style(tag, 'color:green')
            if 'p' not in tag.get('class', ''):
                tag['class'] = tag.get('class', []) + ['c']
        for tag in soup(True):
            if 'style' in tag.attrs:
                m = margin_re.search(tag['style'])
                if m:
                    remove_style(tag, m.group(0))
                    tag['class'] = tag.get('class', []) + ['m' + m.group(1)]
        for tag in soup.select('[href]'):
            href = tag['href']
            if not (href.startswith('http:') or href.startswith('https:')):
                tag['href'] = 'x-dictionary:d:%s' % href
        for tag in soup('u'):
            tag.name = 'span'
            tag['class'] = tag.get('class', []) + ['u']
        for tag in soup('s'):
            tag.name = 'del'

        if title:
            h1 = BeautifulSoup.Tag(name='h1')
            h1.string = title
            soup.insert(0, h1)
        # hence the name BeautifulSoup
        content = toStr(soup.encode_contents())
    else:
        # somewhat analogue to what BeautifulSoup suppose to do
        body = em0_9_re.sub(em0_9_sub, body)
        body = em0_9_ex_re.sub(em0_9_ex_sub, body)
        body = href_re.sub(href_sub, body)

        body = body \
            .replace('<i style="color:green">', '<i class="c">') \
            .replace('<i class="p" style="color:green">', '<i class="p">') \
            .replace('<span class="ex" style="color:steelblue">', '<span class="ex">') \
            .replace('<span class="sec ex" style="color:steelblue">', '<span class="sec ex">') \
            .replace('<u>', '<span class="u">').replace('</u>', '</span>') \
            .replace('<s>', '<del>').replace('</s>', '</del>')

        # nice header to display
        content = '<h1>%s</h1>%s' % (title, body) if title else body
        content = close_tag.sub('<\g<1> />', content)
        content = img_tag.sub('<img \g<1>/>', content)
    content = content.replace('&nbsp;', '&#160;')
    content = nonprintable.sub('', content)
    return content

Пример #20

0

Показать файл

Файл: _content.py Проект: turion2005/pyglossary

def prepare_content_with_soup(
	title: "Optional[str]",
	body: str,
	BeautifulSoup: "Any",
) -> str:
	soup = BeautifulSoup.BeautifulSoup(body, features="lxml")
	# difference between "lxml" and "html.parser"
	if soup.body:
		soup = soup.body

	for tag in soup(class_="sec"):
		tag["class"].remove("sec")
		if not tag["class"]:
			del tag["class"]
		tag["d:priority"] = "2"
	for tag in soup(lambda x: "color:steelblue" in x.get("style", "")):
		remove_style(tag, "color:steelblue")
		if "ex" not in tag.get("class", []):
			tag["class"] = tag.get("class", []) + ["ex"]
	for tag in soup(is_green):
		remove_style(tag, "color:green")
		if "p" not in tag.get("class", ""):
			tag["class"] = tag.get("class", []) + ["c"]
	for tag in soup(True):
		if "style" in tag.attrs:
			m = re_margin.search(tag["style"])
			if m:
				remove_style(tag, m.group(0))
				tag["class"] = tag.get("class", []) + ["m" + m.group(1)]

	for tag in soup(lambda x: "xhtml:" in x.name):
		old_tag_name = tag.name
		tag.name = old_tag_name[len("xhtml:"):]
		if tag.string:
			tag.string = f"{tag.string} "

	for tag in soup.select("[href]"):
		href = tag["href"]
		href = cleanup_link_target(href)

		if href.startswith("sound:"):
			fix_sound_link(href, tag)

		elif href.startswith("phonetics") or href.startswith("help:phonetics"):
			# for oxford9
			log.debug(f"phonetics: tag={tag}")
			if tag.audio and "name" in tag.audio.attrs:
				tag["onmousedown"] = f"this.lastChild.play(); return false;"
				src_name = tag.audio["name"].replace("#", "_")
				tag.audio["src"] = f"{src_name}.mp3"

		elif not link_is_url(href):
			tag["href"] = f"x-dictionary:d:{href}"

	for thumb in soup.find_all("div", "pic_thumb"):
		thumb["onclick"] = 'this.setAttribute("style", "display:none"); ' \
			'this.nextElementSibling.setAttribute("style", "display:block")'

	for pic in soup.find_all("div", "big_pic"):
		pic["onclick"] = 'this.setAttribute("style", "display:none"), ' \
			'this.previousElementSibling.setAttribute("style", "display:block")'

	# to unfold(expand) and fold(collapse) blocks
	for pos in soup.find_all("pos", onclick="toggle_infl(this)"):
		# TODO: simplify this!
		pos["onclick"] = (
			r'var e = this.parentElement.parentElement.parentElement'
			r'.querySelector("res-g vp-gs"); style = window.'
			r'getComputedStyle(e), display = style.getPropertyValue'
			r'("display"), "none" === e.style.display || "none" === display'
			r' ? e.style.display = "block" : e.style.display = "none", '
			r'this.className.match(/(?:^|\s)Clicked(?!\S)/) ? this.'
			r'className = this.className.replace('
			r'/(?:^|\s)Clicked(?!\S)/g, "") : this.setAttribute('
			r'"class", "Clicked")'
		)

	for tag in soup.select("[src]"):
		src = tag["src"]
		if src.startswith("/"):
			tag["src"] = src[1:]
	for tag in soup("u"):
		tag.name = "span"
		tag["class"] = tag.get("class", []) + ["u"]
	for tag in soup("s"):
		tag.name = "del"

	if title and "<h" not in body:
		h1 = BeautifulSoup.Tag(name="h1")
		h1.string = title
		soup.insert(0, h1)

	# hence the name BeautifulSoup
	# soup.insert(0,head)
	content = toStr(soup.encode_contents())
	return content

Пример #21

0

Показать файл

def format_clean_content(title, body, BeautifulSoup):
    # heavily integrated with output of dsl reader plugin!
    # and with xdxf also.
    """
    :param title: str | None
    """

    # class="sec" => d:priority="2"
    # style="color:steelblue" => class="ex"
    # class="p" style="color:green" => class="p"
    # style="color:green" => class="c"
    # style="margin-left:{}em" => class="m{}"
    # <s> => <del>

    # xhtml is strict
    if BeautifulSoup:
        soup = BeautifulSoup.BeautifulSoup(body, "lxml", from_encoding='utf-8')
        # difference between 'lxml' and 'html.parser'
        if soup.body:
            soup = soup.body

        for tag in soup(class_='sec'):
            tag['class'].remove('sec')
            if not tag['class']:
                del tag['class']
            tag['d:priority'] = "2"
        for tag in soup(lambda x: 'color:steelblue' in x.get('style', '')):
            remove_style(tag, 'color:steelblue')
            if 'ex' not in tag.get('class', []):
                tag['class'] = tag.get('class', []) + ['ex']
        for tag in soup(is_green):
            remove_style(tag, 'color:green')
            if 'p' not in tag.get('class', ''):
                tag['class'] = tag.get('class', []) + ['c']
        for tag in soup(True):
            if 'style' in tag.attrs:
                m = margin_re.search(tag['style'])
                if m:
                    remove_style(tag, m.group(0))
                    tag['class'] = tag.get('class', []) + ['m' + m.group(1)]
        for tag in soup.select('[href]'):
            href = tag['href']
            if not (href.startswith('http:') or href.startswith('https:')):
                tag['href'] = 'x-dictionary:d:%s' % href
        for tag in soup('u'):
            tag.name = 'span'
            tag['class'] = tag.get('class', []) + ['u']
        for tag in soup('s'):
            tag.name = 'del'

        if title:
            h1 = BeautifulSoup.Tag(name='h1')
            h1.string = title
            soup.insert(0, h1)
        # hence the name BeautifulSoup
        content = toStr(soup.encode_contents())
    else:
        # somewhat analogue to what BeautifulSoup suppose to do
        body = em0_9_re.sub(em0_9_sub, body)
        body = em0_9_ex_re.sub(em0_9_ex_sub, body)
        body = href_re.sub(href_sub, body)

        body = body \
            .replace('<i style="color:green">', '<i class="c">') \
            .replace('<i class="p" style="color:green">', '<i class="p">') \
            .replace('<span class="ex" style="color:steelblue">', '<span class="ex">') \
            .replace('<span class="sec ex" style="color:steelblue">', '<span class="sec ex">') \
            .replace('<u>', '<span class="u">').replace('</u>', '</span>') \
            .replace('<s>', '<del>').replace('</s>', '</del>')

        # nice header to display
        content = '<h1>%s</h1>%s' % (title, body) if title else body
        content = close_tag.sub('<\g<1> />', content)
        content = img_tag.sub('<img \g<1>/>', content)
    content = content.replace('&nbsp;', '&#160;')
    content = nonprintable.sub('', content)
    return content

Python toStr примеры использования