def _load_url(self, url_u, encoding=None): # word hit list obsolete self.wordhitview.clear_words() # set text in textview ret = fetcher.fetch(url_u) if not encoding: encoding = decoder.detect_encoding(ret.txt_byte) txt_u = decoder.decode(ret.txt_byte, encoding) txt_u = unmarkup.unwiki(txt_u) or unmarkup.unhtml(txt_u) self.text = word.Text() self.text.set_from_txt_u(txt_u) self.textview.set_text(self.text, encoding, url_u)
def url_handler(url_u, dir='/tmp/t'): if not os.path.isdir(dir): os.makedirs(dir) os.environ["ORIG_FILENAMES"] = "1" filename = os.path.join(dir, urlrewrite.url_to_filename(url_u)) + '.txt' ret = fetcher.fetch(url_u) txt_u = decoder.detect_decode(ret.txt_byte) txt_u = unmarkup.unwiki(txt_u) # add license notice tm = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime()) notice = u"\n\n%s\nRetrieved on %s from:\n %s" % ('-' * 78, tm, ret.url_u) notice += (u"\nLicensed under CC-BY-SA, see %s" % "http://creativecommons.org/licenses/by-sa/3.0/") txt_u += notice txt_byte = decoder.encode(txt_u) open(filename, 'w').write(txt_byte)
def url_handler(url_u, dir='/tmp/t'): if not os.path.isdir(dir): os.makedirs(dir) os.environ["ORIG_FILENAMES"] = "1" filename = os.path.join(dir, urlrewrite.url_to_filename(url_u)) + '.txt' ret = fetcher.fetch(url_u) txt_u = decoder.detect_decode(ret.txt_byte) txt_u = unmarkup.unwiki(txt_u) # add license notice tm = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime()) notice = u"\n\n%s\nRetrieved on %s from:\n %s" % ('-'*78, tm, ret.url_u) notice += (u"\nLicensed under CC-BY-SA, see %s" % "http://creativecommons.org/licenses/by-sa/3.0/") txt_u += notice txt_byte = decoder.encode(txt_u) open(filename, 'w').write(txt_byte)