def _html_to_wiki(self, html): converter = HTMLWiki([]) try: wiki = converter.from_html(html) except converter.ParseError: raise forms.ValidationError("Could not parse the HTML file.") return wiki
def clean_wikitext(self): markup = self.data['markup'] if markup != 'html': # not HTML input: it's okay as-is return self.data['wikitext'] html = self.data['wikitext'] converter = HTMLWiki([]) try: wiki = converter.from_html(html) except converter.ParseError: raise forms.ValidationError("Could not parse the HTML file.") return wiki
def convert_content(request, course_slug, page_label=None): """ Convert between wikicreole and HTML (AJAX called in editor when switching editing modes) """ if request.method != 'POST': return ForbiddenResponse(request, 'POST only') if 'to' not in request.POST: return ForbiddenResponse(request, 'must send "to" language') if 'data' not in request.POST: return ForbiddenResponse(request, 'must sent source "data"') offering = get_object_or_404(CourseOffering, slug=course_slug) to = request.POST['to'] data = request.POST['data'] if to == 'html': # convert wikitext to HTML # temporarily change the current version to get the result (but don't save) if page_label: page = get_object_or_404(Page, offering=offering, label=page_label) pv = page.current_version() else: # create temporary Page for conversion during creation p = Page(offering=offering) pv = PageVersion(page=p) pv.wikitext = data pv.diff_from = None result = {'data': pv.html_contents()} return HttpResponse(json.dumps(result), content_type="application/json") else: # convert HTML to wikitext converter = HTMLWiki([]) try: wiki = converter.from_html(data) except converter.ParseError: wiki = '' result = {'data': wiki} return HttpResponse(json.dumps(result), content_type="application/json")
def __init__(self, offering, editor, *args, **kwargs): super(SiteImportForm, self).__init__(*args, **kwargs) self.converter = HTMLWiki([]) self.offering = offering self.editor = editor
class SiteImportForm(forms.Form): url = forms.URLField(required=True, label='URL', widget=forms.TextInput(attrs={'size': 70})) can_read = forms.ChoiceField(choices=READ_ACL_CHOICES, required=True, initial="ALL") can_write = forms.ChoiceField(choices=WRITE_ACL_CHOICES, required=True, initial="STAF") def __init__(self, offering, editor, *args, **kwargs): super(SiteImportForm, self).__init__(*args, **kwargs) self.converter = HTMLWiki([]) self.offering = offering self.editor = editor def _labelize(self, url, title): path = urlparse.urlsplit(url).path if path: parts = path.split('/') if len(parts) >= 1 and parts[-1]: return parts[-1] elif len(parts) >= 2 and parts[-2]: return parts[-2] def _import_page(self, url): try: fh = urllib2.urlopen(url, timeout=20) if 'content-type' in fh.headers: ctype = fh.headers['content-type'].split(';')[0] is_html = ctype in ['text/html', 'application/xhtml+xml'] else: is_html = False html = fh.read() fh.close() except: raise forms.ValidationError('Could not fetch "%s".' % (url)) if not is_html: raise forms.ValidationError('Not HTML at "%s".' % (url)) try: wiki, title, urls = self.converter.from_html_full(html) except self.converter.ParseError: raise forms.ValidationError("Could not parse the HTML file %s." % (url)) label = self._labelize(url, title) if not title: title = label page = Page(offering=self.offering, label=label) pv = PageVersion(page=page, editor=self.editor, title=title, wikitext=wiki, comment="imported content") #print (page, pv, pv.title) #print [urlparse.urljoin(url, u) for u in urls] return page, pv, urls def clean_url(self): url = self.cleaned_data['url'] if not url: return None baseurl = urlparse.urljoin(url, "./") needed = set([url]) done = set() found = {} errors = [] while needed: if len(found) >= 20: break url = needed.pop() try: page, pv, newurls = self._import_page(url) except forms.ValidationError as e: errors.append(e.messages[0]) done.add(url) newurls = set((urlparse.urljoin(url, u) for u in newurls)) newurls = set((u for u in newurls if u.startswith(baseurl))) needed = needed | newurls - done found[page.label] = (page, pv) #print ">>>", found, errors return found, errors