def assert_html2Creole(self, raw_markup, raw_html, debug=False, **kwargs): """ Compare the genereted markup from the given >raw_html< html code, with the given >raw_markup< reference string. """ # assert isinstance(raw_html, unicode) # raw_markup = unicode(raw_markup, encoding="utf8") # raw_html = unicode(raw_html, "utf8") self.assertNotEqual(raw_markup, raw_html) # prepare whitespace on test strings markup = self._prepare_text(raw_markup) assert isinstance(markup, unicode) if debug: self._debug_text("assert_Creole2html() markup", markup) html = self._prepare_text(raw_html) assert isinstance(html, unicode) # convert html code into creole markup out_string = html2creole(html, debug, **kwargs) if debug: self._debug_text("assert_html2Creole() html2creole", out_string) # compare self.assertEqual(out_string, markup)
def assert_html2creole(self, raw_creole, raw_html, \ strip_lines=False, debug=False, parser_kwargs={}, emitter_kwargs={}): """ Compare the genereted markup from the given >raw_html< html code, with the given >creole_string< reference string. """ # assert isinstance(raw_html, unicode) # creole_string = unicode(creole_string, encoding="utf8") # raw_html = unicode(raw_html, "utf8") self.assertNotEqual(raw_creole, raw_html) # prepare whitespace on test strings markup = self._prepare_text(raw_creole) assert isinstance(markup, unicode) if debug: self._debug_text("assert_creole2html() markup", markup) html = self._prepare_text(raw_html) assert isinstance(html, unicode) # convert html code into creole markup out_string = html2creole(html, debug, parser_kwargs, emitter_kwargs) if debug: self._debug_text("assert_html2creole() html2creole", out_string) # compare try: self.assertEqual(out_string, markup) except: print " *** Error in html2creole:" raise
def assert_html2creole2(self, creole, html, debug=False, unknown_emit=None): # convert html code into creole markup out_string = html2creole(html, debug, unknown_emit=unknown_emit) if debug: self._debug_text("assert_html2creole() html2creole", out_string) # compare self.assertEqual(out_string, creole, msg="html2creole")
def run(self): tz = pytz.timezone("Asia/Taipei") #list_urls = self.getListUrls() #拼接明星列表页url #actor_ids = self.getActorIds(list_urls) #获取艺人ID号 #ids = ','.join(actor_ids) #f = open('sina_actor_ids.txt', 'w+') #f.write(ids) #f.close #actor_ids = ['7915'] f = open('sina_actor_ids.txt', 'r') actor_content = f.read() actor_ids = actor_content.split(',') f.close for actor_id in actor_ids: actor_data = '' actor_data = self.getActorBaseInfo(actor_id) if actor_data: actor_data['model'] = 'actor' actor_data['created_at'] = datetime.now().replace(tzinfo=tz) actor_data['updated_at'] = datetime.now().replace(tzinfo=tz) actor_data['title'] = actor_data['title'].replace('-', '·') print actor_data else: print '跳过组合' continue actor_content = self.getActorContent(actor_id) print actor_content actor_data['content'] = html2creole(actor_content) #过滤掉自动生成的wiki链接 actor_data['content'] = actor_data['content'].replace('[[', '') actor_data['content'] = actor_data['content'].replace(']]', '') actor_data['html_cache'] = creole2html(actor_data['content']) slug = proposal = re.sub( "[^\x61-\xff\d\w]", "-", actor_data['title'].decode("utf-8").encode( "gb18030", "ignore")).decode("gb18030", "ignore").encode("utf-8").strip("-") similarSlugs = [] regex = re.compile("^%s" % slug) for result in self.mongo_wiki.find({"slug": regex}): similarSlugs.append(result['slug']) i = 1 while slug in similarSlugs: i = i + 1 slug = proposal + "-" + str(i) actor_data['slug'] = slug self.mongo_wiki.insert(actor_data)
def run(self): tz = pytz.timezone("Asia/Taipei") #list_urls = self.getListUrls() #拼接明星列表页url #actor_ids = self.getActorIds(list_urls) #获取艺人ID号 #ids = ','.join(actor_ids) #f = open('sina_actor_ids.txt', 'w+') #f.write(ids) #f.close #actor_ids = ['7915'] f = open('sina_actor_ids.txt', 'r') actor_content = f.read() actor_ids = actor_content.split(',') f.close for actor_id in actor_ids: actor_data = '' actor_data = self.getActorBaseInfo(actor_id) if actor_data: actor_data['model'] = 'actor' actor_data['created_at'] = datetime.now().replace(tzinfo=tz) actor_data['updated_at'] = datetime.now().replace(tzinfo=tz) actor_data['title'] = actor_data['title'].replace('-', '·') print actor_data else: print '跳过组合' continue actor_content = self.getActorContent(actor_id) print actor_content actor_data['content'] = html2creole(actor_content) #过滤掉自动生成的wiki链接 actor_data['content'] = actor_data['content'].replace('[[', '') actor_data['content'] = actor_data['content'].replace(']]', '') actor_data['html_cache'] = creole2html(actor_data['content']) slug = proposal = re.sub("[^\x61-\xff\d\w]", "-", actor_data['title'].decode("utf-8").encode("gb18030", "ignore")).decode("gb18030", "ignore").encode("utf-8").strip("-") similarSlugs = [] regex = re.compile("^%s" % slug) for result in self.mongo_wiki.find({"slug": regex}): similarSlugs.append(result['slug']) i = 1 while slug in similarSlugs: i = i + 1 slug = proposal + "-" + str(i) actor_data['slug'] = slug self.mongo_wiki.insert(actor_data)
def convert_markup(raw_content, source_markup_no, dest_markup_no, request): """ Convert one markup in a other. """ page_msg = FileLikeMessages(request, messages.INFO) html_source = source_markup_no in (MARKUP_HTML, MARKUP_HTML_EDITOR) html_dest = dest_markup_no in (MARKUP_HTML, MARKUP_HTML_EDITOR) if source_markup_no == dest_markup_no or (html_source and html_dest): # Nothing to do ;) return raw_content if not html_dest and dest_markup_no != MARKUP_CREOLE: raise NotImplementedError("Converting into %r not supported." % dest_markup_no) if html_source: # Source markup is HTML html_content = raw_content else: # cut out every Django tags from content assembler = DjangoTagAssembler() raw_content2, cut_data = assembler.cut_out(raw_content) # convert to html html_content = convert(raw_content2, source_markup_no, page_msg) if html_dest: # Destination markup is HTML new_content = html_content else: # Skip: if dest_markup_no == MARKUP_CREOLE: - only creole supported here from creole import html2creole new_content = html2creole(html_content) if not html_source: # Source markup is not HTML # reassembly cut out django tags into text new_content = assembler.reassembly(new_content, cut_data) return new_content
== simple demo You can convert from: * from //creole// to **html** * from **html** back to //creole// === e.g. a table: |=headline 1 |= headline 2 | | 1.1. cell | 1.2. cell | | 2.1. cell | 2.2. cell | ---- More info on our [[http://code.google.com/p/python-creole/|Homepage]].""" print "*" * 79 print " Source creole markup text:" print "-" * 79 print source print "*" * 79 print " Convert it into html:" print "-" * 79 html = creole2html(source) print html print "*" * 79 print " Convert the html code back into creole:" print "-" * 79 creole = html2creole(html) print creole
* from //creole// to **html** * from **html** back to //creole// === e.g. a table: |=headline 1 |= headline 2 | | 1.1. cell | 1.2. cell | | 2.1. cell | 2.2. cell | ---- More info on our [[http://code.google.com/p/python-creole/|Homepage]].""" print "*" * 79 print " Source creole markup text:" print "-" * 79 print source print "*" * 79 print " Convert it into html:" print "-" * 79 html = creole2html(source) print html print "*" * 79 print " Convert the html code back into creole:" print "-" * 79 creole = html2creole(html) print creole