def process(self): try: # Convert from other office format to odt if needed filename, extension = os.path.splitext(self.original_filename) odt_filename = str(filename) + '.odt' if(extension != '.odt'): self._convert_to_odt(filename) # Convert and save all the resulting files. tree, files, errors = transform(odt_filename) cnxml = clean_cnxml(etree.tostring(tree)) save_cnxml(self.save_dir, cnxml, files.items()) # now validate with jing validate_cnxml(cnxml) except ConversionError as e: return render_conversionerror(self.request, e.msg) except Exception: tb = traceback.format_exc() self.write_traceback_to_zipfile(tb) templatePath = 'templates/error.pt' response = {'traceback': tb} if('title' in self.request.session): del self.request.session['title'] return render_to_response(templatePath, response, request=self.request) self.request.session.flash(self.message) return HTTPFound(location=self.request.route_url(self.nextStep()))
def process_gdocs_resource(self, save_dir, gdocs_resource_id, gdocs_access_token=None): # login to gdocs and get a client object gd_client = getAuthorizedGoogleDocsClient() # Create a AuthSub Token based on gdocs_access_token String auth_sub_token = gdata.gauth.AuthSubToken(gdocs_access_token) \ if gdocs_access_token \ else None # get the Google Docs Entry gd_entry = gd_client.GetDoc(gdocs_resource_id, None, auth_sub_token) # Get the contents of the document gd_entry_url = gd_entry.content.src html = gd_client.get_file_content(gd_entry_url, auth_sub_token) # Transformation and get images cnxml, objects = gdocs_to_cnxml(html, bDownloadImages=True) cnxml = clean_cnxml(cnxml) save_cnxml(save_dir, cnxml, objects.items()) validate_cnxml(cnxml) # Return the title and filename. Old comment states # that returning this filename might kill the ability to # do multiple tabs in parallel, unless it gets offloaded # onto the form again. return (gd_entry.title.text, "Google Document")
def process(self): try: if self.form.data.get('newmodule'): self.set_source('new') self.set_target('new') # save empty cnxml and html files cnxml = self.empty_cnxml() files = [] save_cnxml(self.save_dir, cnxml, files) elif self.form.data.get('existingmodule'): self.set_source('existingmodule') self.set_target('existingmodule') return HTTPFound( location=self.request.route_url('choose-module')) except ConversionError as e: return render_conversionerror(self.request, e.msg) # TODO: add a process decorator that has this bit of error handling except Exception: tb = traceback.format_exc() self.write_traceback_to_zipfile(tb) templatePath = 'templates/error.pt' response = {'traceback': tb} if('title' in self.request.session): del self.request.session['title'] return render_to_response(templatePath, response, request=self.request) self.request.session.flash(self.message) return HTTPFound(location=self.request.route_url(self.nextStep()))
def process(self): try: f = open(self.original_filename) latex_archive = f.read() # LaTeX 2 CNXML transformation cnxml, objects = latex_to_cnxml(latex_archive, self.original_filename) cnxml = clean_cnxml(cnxml) save_cnxml(self.save_dir, cnxml, objects.items()) validate_cnxml(cnxml) except ConversionError as e: return render_conversionerror(self.request, e.msg) except Exception: tb = traceback.format_exc() self.write_traceback_to_zipfile(tb) templatePath = 'templates/error.pt' response = {'traceback': tb} if('title' in self.request.session): del self.request.session['title'] return render_to_response(templatePath, response, request=self.request) self.request.session.flash(self.message) return HTTPFound(location=self.request.route_url(self.nextStep()))
def _process_gdocs_resource(klass, save_dir, html): # Transformation and get images cnxml, objects = gdocs_to_cnxml(html, bDownloadImages=True) cnxml = clean_cnxml(cnxml) save_cnxml(save_dir, cnxml, objects.items()) validate_cnxml(cnxml) return "Google Document"
def process(self, form): try: # Convert from other office format to odt if needed filename, extension = os.path.splitext(self.original_filename) odt_filename = str(filename) + '.odt' if (extension != '.odt'): self._convert_to_odt(filename) # Convert and save all the resulting files. tree, files, errors = transform(odt_filename) cnxml = clean_cnxml(etree.tostring(tree)) save_cnxml(self.save_dir, cnxml, files.items()) # now validate with jing validate_cnxml(cnxml) except ConversionError as e: return render_conversionerror(self.request, e.msg) except Exception: tb = traceback.format_exc() self.write_traceback_to_zipfile(tb, form) templatePath = 'templates/error.pt' response = {'traceback': tb} if ('title' in self.request.session): del self.request.session['title'] return render_to_response(templatePath, response, request=self.request) self.request.session.flash(self.message) return HTTPFound(location=self.request.route_url(self.nextStep()))
def process(self, form): try: f = open(self.original_filename) latex_archive = f.read() # LaTeX 2 CNXML transformation cnxml, objects = latex_to_cnxml(latex_archive, self.original_filename) cnxml = clean_cnxml(cnxml) save_cnxml(self.save_dir, cnxml, objects.items()) validate_cnxml(cnxml) except ConversionError as e: return render_conversionerror(self.request, e.msg) except Exception: tb = traceback.format_exc() self.write_traceback_to_zipfile(tb, form) templatePath = 'templates/error.pt' response = {'traceback': tb} if ('title' in self.request.session): del self.request.session['title'] return render_to_response(templatePath, response, request=self.request) self.request.session.flash(self.message) return HTTPFound(location=self.request.route_url(self.nextStep()))
def process(self, form): try: if form.data.get('newmodule'): self.set_source('new') self.set_target('new') # save empty cnxml and html files cnxml = self.empty_cnxml() files = [] save_cnxml(self.save_dir, cnxml, files) elif form.data.get('existingmodule'): self.set_source('existingmodule') self.set_target('existingmodule') return HTTPFound( location=self.request.route_url('choose-module')) except ConversionError as e: return render_conversionerror(self.request, e.msg) # TODO: add a process decorator that has this bit of error handling except Exception: tb = traceback.format_exc() self.write_traceback_to_zipfile(tb, form) templatePath = 'templates/error.pt' response = {'traceback': tb} if ('title' in self.request.session): del self.request.session['title'] return render_to_response(templatePath, response, request=self.request) self.request.session.flash(self.message) return HTTPFound(location=self.request.route_url(self.nextStep()))
def _process_gdocs_resource(klass, save_dir, html, kix=None): # Transformation and get images cnxml, objects = gdocs_to_cnxml(html, kixcontent=kix, bDownloadImages=True) cnxml = clean_cnxml(cnxml) save_cnxml(save_dir, cnxml, objects.items()) validate_cnxml(cnxml) return "Google Document"
def process(self): try: url = self.form.data['url_text'] # Build a regex for Google Docs URLs regex = re.compile( "^https:\/\/docs\.google\.com\/.*document\/[^\/]\/([^\/]+)\/") r = regex.search(url) # Take special action for Google Docs URLs if r: gdocs_resource_id = r.groups()[0] doc_id = "document:" + gdocs_resource_id title, filename = self.process_gdocs_resource(self.save_dir, doc_id) self.request.session['title'] = title self.request.session['filename'] = filename else: # download html: # Simple urlopen() will fail on mediawiki websites eg. Wikipedia! import_opener = urllib2.build_opener() import_opener.addheaders = [('User-agent', 'Mozilla/5.0')] import_request = import_opener.open(url) html = import_request.read() # transformation cnxml, objects, html_title = htmlsoup_to_cnxml( html, bDownloadImages=True, base_or_source_url=url) self.request.session['title'] = html_title cnxml = clean_cnxml(cnxml) save_cnxml(self.save_dir, cnxml, objects.items()) # Keep the info we need for next uploads. Note that # this might kill the ability to do multiple tabs in # parallel, unless it gets offloaded onto the form # again. self.request.session['filename'] = "HTML Document" validate_cnxml(cnxml) except ConversionError as e: return render_conversionerror(self.request, e.msg) except Exception: tb = traceback.format_exc() self.write_traceback_to_zipfile(tb) templatePath = 'templates/error.pt' response = {'traceback': tb} if('title' in self.request.session): del self.request.session['title'] return render_to_response(templatePath, response, request=self.request) self.request.session.flash(self.message) return HTTPFound(location=self.request.route_url(self.nextStep()))
def process(self, form): try: url = form.data['url_text'] # Build a regex for Google Docs URLs regex = re.compile( "^https:\/\/docs\.google\.com\/.*document\/[^\/]\/([^\/]+)\/") r = regex.search(url) # Take special action for Google Docs URLs if r is not None: gdocs_resource_id = r.groups()[0] http = httplib2.Http() http.follow_redirects = False try: resp, html = http.request( 'https://docs.google.com/document/d/%s/export?format=html&confirm=no_antivirus' % gdocs_resource_id) except HttpError: pass else: # Check that status was OK, google docs sends a redirect to a login # page if not. if resp.status / 100 == 2: # Get the title (does not work anymore) title = 'Untitled Google Document' # Process it P = GoogleDocProcessor(self.request) return P.process_gdocs_resource(html, title, form) self.request.session.flash('Failed to convert google document') return HTTPFound(location=self.request.route_url('choose')) else: # download html: # Simple urlopen() will fail on mediawiki websites eg. Wikipedia! import_opener = urllib2.build_opener() import_opener.addheaders = [('User-agent', 'Mozilla/5.0')] import_request = import_opener.open(url) html = import_request.read() # transformation cnxml, objects, html_title = htmlsoup_to_cnxml( html, bDownloadImages=True, base_or_source_url=url) self.request.session['title'] = html_title cnxml = clean_cnxml(cnxml) save_cnxml(self.save_dir, cnxml, objects.items()) # Keep the info we need for next uploads. Note that # this might kill the ability to do multiple tabs in # parallel, unless it gets offloaded onto the form # again. self.request.session['filename'] = "HTML Document" validate_cnxml(cnxml) except ConversionError as e: return render_conversionerror(self.request, e.msg) except Exception: tb = traceback.format_exc() self.write_traceback_to_zipfile(tb, form) templatePath = 'templates/error.pt' response = {'traceback': tb} if('title' in self.request.session): del self.request.session['title'] return render_to_response(templatePath, response, request=self.request) self.request.session.flash(self.message) return HTTPFound(location=self.request.route_url(self.nextStep()))
def process(self, form): try: url = form.data['url_text'] # Build a regex for Google Docs URLs regex = re.compile( "^https:\/\/docs\.google\.com\/.*document\/[^\/]\/([^\/]+)\/") r = regex.search(url) # Take special action for Google Docs URLs if r is not None: gdocs_resource_id = r.groups()[0] http = httplib2.Http() http.follow_redirects = False try: resp, html = http.request( 'https://docs.google.com/document/d/%s/export?format=html&confirm=no_antivirus' % gdocs_resource_id) resp2, kix = http.request( 'https://docs.google.com/feeds/download/documents/export/Export?id=%s&exportFormat=kix' % gdocs_resource_id) except HttpError: pass else: # Check that status was OK, google docs sends a redirect to a login # page if not. if resp.status / 100 == 2: # Get the title title = 'Untitled Google Document' # Process it P = GoogleDocProcessor(self.request) return P.process_gdocs_resource(html, title, form, kix) self.request.session.flash('Failed to convert google document') return HTTPFound(location=self.request.route_url('choose')) else: # download html: # Simple urlopen() will fail on mediawiki websites eg. Wikipedia! import_opener = urllib2.build_opener() import_opener.addheaders = [('User-agent', 'Mozilla/5.0')] import_request = import_opener.open(url) html = import_request.read() # transformation cnxml, objects, html_title = htmlsoup_to_cnxml( html, bDownloadImages=True, base_or_source_url=url) self.request.session['title'] = html_title cnxml = clean_cnxml(cnxml) save_cnxml(self.save_dir, cnxml, objects.items()) # Keep the info we need for next uploads. Note that # this might kill the ability to do multiple tabs in # parallel, unless it gets offloaded onto the form # again. self.request.session['filename'] = "HTML Document" validate_cnxml(cnxml) except ConversionError as e: return render_conversionerror(self.request, e.msg) except Exception: tb = traceback.format_exc() self.write_traceback_to_zipfile(tb, form) templatePath = 'templates/error.pt' response = {'traceback': tb} if ('title' in self.request.session): del self.request.session['title'] return render_to_response(templatePath, response, request=self.request) self.request.session.flash(self.message) return HTTPFound(location=self.request.route_url(self.nextStep()))