def combobox_o_changed(self, event): #log.debug(self.combobox_o.get()) formatD = self.combobox_o.get() if formatD == noneItem: return format = Glossary.descFormat[formatD] """ if format=='Omnidic': self.xml.get_widget('label_omnidic_o').show() self.xml.get_widget('spinbutton_omnidic_o').show() else: self.xml.get_widget('label_omnidic_o').hide() self.xml.get_widget('spinbutton_omnidic_o').hide() if format=='Babylon': self.xml.get_widget('label_enc').show() self.xml.get_widget('comboentry_enc').show() else: self.xml.get_widget('label_enc').hide() self.xml.get_widget('comboentry_enc').hide() """ if self.pref['auto_set_out']: #format==None: pathI = toStr(self.entry_i.get()) pathO = toStr(self.entry_o.get()) formatOD = self.combobox_o.get() if formatOD != None and not pathO and '.' in pathI: extO = Glossary.descExt[formatOD] pathO = ''.join(os.path.splitext(pathI)[:-1]) + extO #self.entry_o.delete(0, 'end') self.entry_o.insert(0, pathO)
def combobox_o_changed(self, event): #log.debug(self.combobox_o.get()) formatD = self.combobox_o.get() if formatD==noneItem: return format = Glossary.descFormat[formatD] '''if format=='Omnidic': self.xml.get_widget('label_omnidic_o').show() self.xml.get_widget('spinbutton_omnidic_o').show() else: self.xml.get_widget('label_omnidic_o').hide() self.xml.get_widget('spinbutton_omnidic_o').hide() if format=='Babylon': self.xml.get_widget('label_enc').show() self.xml.get_widget('comboentry_enc').show() else: self.xml.get_widget('label_enc').hide() self.xml.get_widget('comboentry_enc').hide() if format=='Stardict': self.xml.get_widget('checkb_o_ext').show() else: self.xml.get_widget('checkb_o_ext').hide()''' if self.pref['auto_set_out']:#format==None: pathI = toStr(self.entry_i.get()) pathO = toStr(self.entry_o.get()) formatOD = self.combobox_o.get() if formatOD != None and not pathO and '.' in pathI: extO=Glossary.descExt[formatOD] pathO=''.join(os.path.splitext(pathI)[:-1])+extO #self.entry_o.delete(0, 'end') self.entry_o.insert(0, pathO)
def convert(self): if len(self.glos)==0: log.error('Input glossary has no word! Be sure to click "Load" before "Convert", '+\ 'or just click "Apply" instead.') return False oPath = toStr(self.entry_o.get()) if not oPath: log.critical('Output file path is empty!');return formatD = self.combobox_o.get() if formatD in (noneItem, ''): log.critical('Output format is empty!');return log.info('Converting to %s, please wait...'%formatD) #while gtk.events_pending():#?????????? # gtk.main_iteration_do(False) self.running = True format = Glossary.descFormat[formatD] t0 = time.time() """ if format=='Omnidic': dicIndex=self.xml.get_widget('spinbutton_omnidic_o').get_value_as_int() self.glos.writeOmnidic(oPath, dicIndex=dicIndex) elif format=='Babylon': encoding = self.xml.get_widget('comboentry_enc').get_active_text() self.glos.writeBabylon(oPath, encoding=encoding) else:"""##??????????????????????? self.glos.write(oPath, format=format) #self.oFormat = format self.oPath = oPath log.info('writing %s file: "%s" done.'%(format, oPath)) log.info('time left = %3f seconds'%(time.time()-t0)) self.running = False return True
def convert(self): oPath = toStr(self.entry_o.get()) if not oPath: log.critical('Output file path is empty!') return formatD = self.combobox_o.get() if formatD in (noneItem, ''): log.critical('Output format is empty!') return log.info('Converting to %s, please wait...' % formatD) #while gtk.events_pending():#?????????? # gtk.main_iteration_do(False) self.running = True format = Glossary.descFormat[formatD] t0 = time.time() """ if format=='Omnidic': dicIndex=self.xml.get_widget('spinbutton_omnidic_o').get_value_as_int() self.glos.writeOmnidic(oPath, dicIndex=dicIndex) elif format=='Babylon': encoding = self.xml.get_widget('comboentry_enc').get_active_text() self.glos.writeBabylon(oPath, encoding=encoding) else:"""##??????????????????????? self.glos.write(oPath, format=format) #self.oFormat = format self.oPath = oPath log.info('writing %s file: "%s" done.' % (format, oPath)) log.info('time left = %3f seconds' % (time.time() - t0)) self.running = False return True
def entry_changed(self, event=None): #log.debug('entry_changed') #char = event.keysym pathI = toStr(self.entry_i.get()) if self.pathI != pathI: formatD = self.combobox_i.get() if len(pathI)>7: if pathI[:7]=='file://': pathI=urlToPath(pathI) self.entry_i.delete(0, 'end') self.entry_i.insert(0, pathI) if self.pref['auto_set_for']:#format==noneItem: ext = os.path.splitext(pathI)[-1].lower() if ext in ('.gz', '.bz2', '.zip'): ext = os.path.splitext(pathI[:-len(ext)])[-1].lower() for i in xrange(len(Glossary.readExt)): if ext in Glossary.readExt[i]: self.combobox_i.set(Glossary.readDesc[i]) break if self.pref['auto_set_out']:#format==noneItem: #pathI = self.entry_i.get() formatOD = self.combobox_o.get() pathO = toStr(self.entry_o.get()) if formatOD != noneItem and not pathO and '.' in pathI: extO=Glossary.descExt[formatOD] pathO=''.join(os.path.splitext(pathI)[:-1])+extO self.entry_o.delete(0, 'end') self.entry_o.insert(0, pathO) self.pathI = pathI ############################################## pathO = toStr(self.entry_o.get()) if self.pathO!=pathO: formatD = self.combobox_o.get() if len(pathO)>7: if pathO[:7]=='file://': pathO=urlToPath(pathO) self.entry_o.delete(0, 'end') self.entry_o.insert(0, pathO) if self.pref['auto_set_for']:#format==noneItem: ext = os.path.splitext(pathO)[-1].lower() if ext in ('.gz', '.bz2', '.zip'): ext = os.path.splitext(pathO[:-len(ext)])[-1].lower() for i in xrange(len(Glossary.writeExt)): if ext in Glossary.writeExt[i]: self.combobox_o.set(Glossary.writeDesc[i]) break self.pathO = pathO
def entry_changed(self, event=None): #log.debug('entry_changed') #char = event.keysym pathI = toStr(self.entry_i.get()) if self.pathI != pathI: formatD = self.combobox_i.get() if len(pathI) > 7: if pathI[:7] == 'file://': pathI = urlToPath(pathI) self.entry_i.delete(0, 'end') self.entry_i.insert(0, pathI) if self.pref['auto_set_for']: #format==noneItem: ext = os.path.splitext(pathI)[-1].lower() if ext in ('.gz', '.bz2', '.zip'): ext = os.path.splitext(pathI[:-len(ext)])[-1].lower() for i in range(len(Glossary.readExt)): if ext in Glossary.readExt[i]: self.combobox_i.set(Glossary.readDesc[i]) break if self.pref['auto_set_out']: #format==noneItem: #pathI = self.entry_i.get() formatOD = self.combobox_o.get() pathO = toStr(self.entry_o.get()) if formatOD != noneItem and not pathO and '.' in pathI: extO = Glossary.descExt[formatOD] pathO = ''.join(os.path.splitext(pathI)[:-1]) + extO self.entry_o.delete(0, 'end') self.entry_o.insert(0, pathO) self.pathI = pathI ############################################## pathO = toStr(self.entry_o.get()) if self.pathO != pathO: formatD = self.combobox_o.get() if len(pathO) > 7: if pathO[:7] == 'file://': pathO = urlToPath(pathO) self.entry_o.delete(0, 'end') self.entry_o.insert(0, pathO) if self.pref['auto_set_for']: #format==noneItem: ext = os.path.splitext(pathO)[-1].lower() if ext in ('.gz', '.bz2', '.zip'): ext = os.path.splitext(pathO[:-len(ext)])[-1].lower() for i in range(len(Glossary.writeExt)): if ext in Glossary.writeExt[i]: self.combobox_o.set(Glossary.writeDesc[i]) break self.pathO = pathO
def get_prefix(self, word: str) -> str: length = self._group_by_prefix_length if not word: return None word = toStr(word) if "Z" < word[0] < "a": return "SPECIAL" return word[:length]
def samplesDumpFileWrite(self, text): text = toStr(text) if self.samplesDumpFile: offset = self.samplesDumpFile.tell() self.samplesDumpFile.write("\noffset = {0:#X}\n" % offset) self.samplesDumpFile.write(text + "\n") else: log.debug(text)
def samplesDumpFileWrite(self, text): text = toStr(text) if self.samplesDumpFile: offset = self.samplesDumpFile.tell() self.samplesDumpFile.write(f"\noffset = {offset:#02x}\n") self.samplesDumpFile.write(text + "\n") else: log.debug(text)
def msgLogFileWrite(self, text): text = toStr(text) if self.msgLogFile: offset = self.msgLogFile.tell() # print offset in the log file to facilitate navigating this # log in hex editor # intended usage: # the log file is opened in a text editor and hex editor # use text editor to read error messages, use hex editor to # inspect char codes offsets allows to quickly jump to the right # place of the file hex editor self.msgLogFile.write(f"\noffset = {offset:#02x}\n") self.msgLogFile.write(text + "\n") else: log.debug(text)
def get_prefix(word, length): """ Return the prefix for the given word, of length length. :param word: the word string :type word: unicode :param length: prefix length :type length: int :rtype: unicode """ if not word: return None word = toStr(word) if "Z" < word[0] < "a": return "SPECIAL" return word[:length] ## return a unicode? FIXME
def load(self): iPath = toStr(self.entry_i.get()) if not iPath: printAsError('Input file path is empty!') return formatD = self.combobox_i.get() if formatD == noneItem: #printAsError('Input format is empty!');return format = '' print('Please wait...') else: format = Glossary.descFormat[formatD] print('Reading from %s, please wait...' % formatD) #while gtk.events_pending():#?????????????? # gtk.main_iteration_do(False) t0 = time.time() ''' if formatD[:7]=='Omnidic': dicIndex=self.xml.get_widget('spinbutton_omnidic_i').get_value_as_int() ex = self.glos.readOmnidic(iPath, dicIndex=dicIndex) elif formatD[:8]=='StarDict' and self.checkb_i_ext.get_active(): ex = self.glos.readStardict_ext(iPath) else:''' ex = self.glos.read(iPath, format=format) if ex: print('reading %s file: "%s" done.\n%d words found.' % ( format, iPath, len(self.glos.data), )) else: print('reading %s file: "%s" failed.' % (format, iPath)) return False #self.iFormat = format self.iPath = iPath #self.button_conv.set_sensitive(True) self.glos.uiEdit() self.progress(1.0, 'Loading Comleted') if self.checkb_o_det.get(): #????????? print('time left = %3f seconds' % (time.time() - t0)) for x in self.glos.info: print('%s="%s"' % (x[0], x[1])) return True
def load(self): iPath = toStr(self.entry_i.get()) if not iPath: log.error('Input file path is empty!');return formatD = self.combobox_i.get() if formatD==noneItem: #log.error('Input format is empty!');return format='' log.info('Please wait...') else: format = Glossary.descFormat[formatD] log.info('Reading from %s, please wait...'%formatD) #while gtk.events_pending():#?????????????? # gtk.main_iteration_do(False) t0=time.time() ''' if formatD[:7]=='Omnidic': dicIndex=self.xml.get_widget('spinbutton_omnidic_i').get_value_as_int() ex = self.glos.readOmnidic(iPath, dicIndex=dicIndex) elif formatD[:8]=='StarDict' and self.checkb_i_ext.get_active(): ex = self.glos.readStardict_ext(iPath) else:''' ex = self.glos.read(iPath, format=format) if ex: log.info('reading %s file: "%s" done.\n%d words found.'%( format, iPath, len(self.glos.data), )) else: log.error('reading %s file: "%s" failed.'%(format, iPath)) return False #self.iFormat = format self.iPath = iPath #self.button_conv.set_sensitive(True) self.glos.uiEdit() self.progress(1.0, 'Loading Comleted') if self.checkb_o_det.get():#????????? log.info('time left = %3f seconds'%(time.time()-t0)) for x in self.glos.info: log.info('%s="%s"'%(x[0], x[1])) return True
def processDefiStat(self, fields, b_defi, b_key): if fields.singleEncoding: self.findAndPrintCharSamples( fields.b_defi, f"defi, key = {b_key}", fields.encoding, ) if self.metadata2: self.metadata2.defiProcessedCount += 1 if isASCII(toStr(fields.b_defi)): self.metadata2.defiAsciiCount += 1 try: fields.b_defi.decode("utf-8") except UnicodeError: pass else: self.metadata2.defiUtf8Count += 1 if self.metadata2 and self.metadata2.isDefiASCII: if not isASCII(fields.u_defi): self.metadata2.isDefiASCII = False
def convert(self): if len(self.glos.data) == 0: printAsError('Input glossary has no word! Be sure to click "Load" before "Convert", '+\ 'or just click "Apply" instead.') return False oPath = toStr(self.entry_o.get()) if not oPath: printAsError('Output file path is empty!') return formatD = self.combobox_o.get() if formatD in (noneItem, ''): printAsError('Output format is empty!') return print('Converting to %s, please wait...' % formatD) #while gtk.events_pending():#?????????? # gtk.main_iteration_do(False) self.running = True format = Glossary.descFormat[formatD] t0 = time.time() ''' if format=='Stardict': if self.xml.get_widget('checkb_o_ext').get_active(): self.glos.writeStardict(oPath) else: self.glos.writeStardict_int(oPath) elif format=='Omnidic': dicIndex=self.xml.get_widget('spinbutton_omnidic_o').get_value_as_int() self.glos.writeOmnidic(oPath, dicIndex=dicIndex) elif format=='Babylon': encoding = self.xml.get_widget('comboentry_enc').get_active_text() self.glos.writeBabylon(oPath, encoding=encoding) else:'''##??????????????????????? self.glos.write(oPath, format=format) #self.oFormat = format self.oPath = oPath print('writing %s file: "%s" done.' % (format, oPath)) if self.checkb_o_det.get(): #??????? print('time left = %3f seconds' % (time.time() - t0)) self.running = False return True
def _mktitle(title_element, include_opts=()): title = title_element.text opt_i = -1 for c in title_element: if c.tag == 'nu' and c.tail: if title: title += c.tail else: title = c.tail if c.tag == 'opt': opt_i += 1 if opt_i in include_opts: if title: title += c.text else: title = c.text if c.tail: if title: title += c.tail else: title = c.tail return toStr(title.strip())
def rawDumpFileWriteData(self, text): text = toStr(text) # the next function escapes too many chars, for example, it escapes äöü # self.rawDumpFile.write(text.encode("unicode_escape")) if self.rawDumpFile: self.rawDumpFile.write(text)
def rawDumpFileWriteText(self, text): # FIXME text = toStr(text) if self.rawDumpFile: self.rawDumpFile.write(text)
def format_clean_content(title, body, BeautifulSoup): # heavily integrated with output of dsl reader plugin! # and with xdxf also. """ :param title: str | None """ # class="sec" => d:priority="2" # style="color:steelblue" => class="ex" # class="p" style="color:green" => class="p" # style="color:green" => class="c" # style="margin-left:{}em" => class="m{}" # <s> => <del> # xhtml is strict if BeautifulSoup: soup = BeautifulSoup.BeautifulSoup(body, "lxml", from_encoding='utf-8') # difference between 'lxml' and 'html.parser' if soup.body: soup = soup.body for tag in soup(class_='sec'): tag['class'].remove('sec') if not tag['class']: del tag['class'] tag['d:priority'] = "2" for tag in soup(lambda x: 'color:steelblue' in x.get('style', '')): remove_style(tag, 'color:steelblue') if 'ex' not in tag.get('class', []): tag['class'] = tag.get('class', []) + ['ex'] for tag in soup(is_green): remove_style(tag, 'color:green') if 'p' not in tag.get('class', ''): tag['class'] = tag.get('class', []) + ['c'] for tag in soup(True): if 'style' in tag.attrs: m = margin_re.search(tag['style']) if m: remove_style(tag, m.group(0)) tag['class'] = tag.get('class', []) + ['m' + m.group(1)] for tag in soup.select('[href]'): href = tag['href'] if not (href.startswith('http:') or href.startswith('https:')): tag['href'] = 'x-dictionary:d:%s' % href for tag in soup('u'): tag.name = 'span' tag['class'] = tag.get('class', []) + ['u'] for tag in soup('s'): tag.name = 'del' if title: h1 = BeautifulSoup.Tag(name='h1') h1.string = title soup.insert(0, h1) # hence the name BeautifulSoup content = toStr(soup.encode_contents()) else: # somewhat analogue to what BeautifulSoup suppose to do body = em0_9_re.sub(em0_9_sub, body) body = em0_9_ex_re.sub(em0_9_ex_sub, body) body = href_re.sub(href_sub, body) body = body \ .replace('<i style="color:green">', '<i class="c">') \ .replace('<i class="p" style="color:green">', '<i class="p">') \ .replace('<span class="ex" style="color:steelblue">', '<span class="ex">') \ .replace('<span class="sec ex" style="color:steelblue">', '<span class="sec ex">') \ .replace('<u>', '<span class="u">').replace('</u>', '</span>') \ .replace('<s>', '<del>').replace('</s>', '</del>') # nice header to display content = '<h1>%s</h1>%s' % (title, body) if title else body content = close_tag.sub('<\g<1> />', content) content = img_tag.sub('<img \g<1>/>', content) content = content.replace(' ', ' ') content = nonprintable.sub('', content) return content
def prepare_content_with_soup( title: "Optional[str]", body: str, BeautifulSoup: "Any", ) -> str: soup = BeautifulSoup.BeautifulSoup(body, features="lxml") # difference between "lxml" and "html.parser" if soup.body: soup = soup.body for tag in soup(class_="sec"): tag["class"].remove("sec") if not tag["class"]: del tag["class"] tag["d:priority"] = "2" for tag in soup(lambda x: "color:steelblue" in x.get("style", "")): remove_style(tag, "color:steelblue") if "ex" not in tag.get("class", []): tag["class"] = tag.get("class", []) + ["ex"] for tag in soup(is_green): remove_style(tag, "color:green") if "p" not in tag.get("class", ""): tag["class"] = tag.get("class", []) + ["c"] for tag in soup(True): if "style" in tag.attrs: m = re_margin.search(tag["style"]) if m: remove_style(tag, m.group(0)) tag["class"] = tag.get("class", []) + ["m" + m.group(1)] for tag in soup(lambda x: "xhtml:" in x.name): old_tag_name = tag.name tag.name = old_tag_name[len("xhtml:"):] if tag.string: tag.string = f"{tag.string} " for tag in soup.select("[href]"): href = tag["href"] href = cleanup_link_target(href) if href.startswith("sound:"): fix_sound_link(href, tag) elif href.startswith("phonetics") or href.startswith("help:phonetics"): # for oxford9 log.debug(f"phonetics: tag={tag}") if tag.audio and "name" in tag.audio.attrs: tag["onmousedown"] = f"this.lastChild.play(); return false;" src_name = tag.audio["name"].replace("#", "_") tag.audio["src"] = f"{src_name}.mp3" elif not link_is_url(href): tag["href"] = f"x-dictionary:d:{href}" for thumb in soup.find_all("div", "pic_thumb"): thumb["onclick"] = 'this.setAttribute("style", "display:none"); ' \ 'this.nextElementSibling.setAttribute("style", "display:block")' for pic in soup.find_all("div", "big_pic"): pic["onclick"] = 'this.setAttribute("style", "display:none"), ' \ 'this.previousElementSibling.setAttribute("style", "display:block")' # to unfold(expand) and fold(collapse) blocks for pos in soup.find_all("pos", onclick="toggle_infl(this)"): # TODO: simplify this! pos["onclick"] = ( r'var e = this.parentElement.parentElement.parentElement' r'.querySelector("res-g vp-gs"); style = window.' r'getComputedStyle(e), display = style.getPropertyValue' r'("display"), "none" === e.style.display || "none" === display' r' ? e.style.display = "block" : e.style.display = "none", ' r'this.className.match(/(?:^|\s)Clicked(?!\S)/) ? this.' r'className = this.className.replace(' r'/(?:^|\s)Clicked(?!\S)/g, "") : this.setAttribute(' r'"class", "Clicked")' ) for tag in soup.select("[src]"): src = tag["src"] if src.startswith("/"): tag["src"] = src[1:] for tag in soup("u"): tag.name = "span" tag["class"] = tag.get("class", []) + ["u"] for tag in soup("s"): tag.name = "del" if title and "<h" not in body: h1 = BeautifulSoup.Tag(name="h1") h1.string = title soup.insert(0, h1) # hence the name BeautifulSoup # soup.insert(0,head) content = toStr(soup.encode_contents()) return content