def save_cnxml(save_dir, cnxml, files): # write CNXML output save_and_backup_file(save_dir, 'index.cnxml', cnxml) # write files for filename, content in files: filename = os.path.join(save_dir, filename) f = open(filename, 'wb') # write binary, important! f.write(content) f.close() # we generate the preview and save the error conversionerror = None try: htmlpreview = cnxml_to_htmlpreview(cnxml) except libxml2.parserError: conversionerror = traceback.format_exc() # Zip up all the files. This is done now, since we have all the files # available, and it also allows us to post a simple download link. # Note that we cannot use zipfile as context manager, as that is only # available from python 2.7 # TODO: Do a filesize check xxxx if conversionerror is None: save_and_backup_file(save_dir, 'index.html', htmlpreview) save_zip(save_dir, cnxml, htmlpreview, files) else: save_zip(save_dir, cnxml, None, files) raise ConversionError(conversionerror)
def _download_module(self, module_url): request = self.request session = request.session conn = sword2cnx.Connection(session['login'].service_document_url, user_name=session['login'].username, user_pass=session['login'].password, always_authenticate=True, download_service_document=False) parts = urlparse.urlsplit(module_url) path = parts.path.split('/') path = path[:path.index('sword')] module_url = '%s://%s%s' % (parts.scheme, parts.netloc, '/'.join(path)) # example: http://cnx.org/Members/user001/m17222/sword/editmedia zip_file = conn.get_cnx_module(module_url = module_url, packaging = 'zip') save_dir = get_save_dir(request) if save_dir is None: request.session['upload_dir'], save_dir = create_save_dir(request) extract_to_save_dir(zip_file, save_dir) cnxml_file = open(os.path.join(save_dir, 'index.cnxml'), 'rb') cnxml = cnxml_file.read() cnxml_file.close() conversionerror = None try: htmlpreview = cnxml_to_htmlpreview(cnxml) save_and_backup_file(save_dir, 'index.html', htmlpreview) files = get_files(save_dir) save_zip(save_dir, cnxml, htmlpreview, files) except libxml2.parserError: conversionerror = traceback.format_exc() raise ConversionError(conversionerror)
def process_import(save_dir_path, original_filepath, filename, download_url): # convert from other office format to odt odt_filename = '%s.odt' % filename odt_filepath = str(os.path.join(save_dir_path, odt_filename)) # run jod service converter = JOD.DocumentConverterClient() try: converter.convert(original_filepath, 'odt', odt_filepath) except Exception as e: raise e # check file existed try: fp = open(odt_filepath, 'r') fp.close() except IOError as io: # TODO: raise exception raise io # convert to cnxml tree, files, errors = transform(odt_filepath) cnxml = clean_cnxml(etree.tostring(tree)) # convert to html html = cnxml_to_htmlpreview(cnxml) # produce zipfile ram = StringIO() zip_archive = zipfile.ZipFile(ram, 'w') zip_archive.writestr('index.html', html) for fname, fdata in files.items(): zip_archive.writestr(fname, fdata) zip_archive.close() # save zipfile zip_file_path = os.path.join(save_dir_path, '%s.zip' % filename) if os.path.exists(zip_file_path): os.rename(zip_file_path, zip_file_path + '~') f = open(zip_file_path, 'wb') f.write(ram.getvalue()) f.close() return download_url
def process(self, zip_filename): try: self.zip_archive = zipfile.ZipFile(zip_filename, 'r') # Unzip into transform directory self.zip_archive.extractall(path=self.save_dir) # Rename ZIP file so that the user can download it again os.rename(zip_filename, os.path.join(self.save_dir, 'upload.zip')) # Read CNXML with open(os.path.join(self.save_dir, 'index.cnxml'), 'rt') as fp: cnxml = fp.read() # Convert the CNXML to XHTML for preview html = cnxml_to_htmlpreview(cnxml) with open(os.path.join(self.save_dir, 'index.xhtml'), 'w') as index: index.write(html) cnxml = clean_cnxml(cnxml) validate_cnxml(cnxml) except ConversionError as e: return render_conversionerror(self.request, e.msg) except Exception: tb = traceback.format_exc() self.write_traceback_to_zipfile(tb, form) templatePath = 'templates/error.pt' response = {'traceback': tb} if ('title' in self.request.session): del self.request.session['title'] return render_to_response(templatePath, response, request=self.request) self.request.session.flash(self.message) return HTTPFound(location=self.request.route_url(self.nextStep()))
def save_cnxml(save_dir, cnxml, files): # write CNXML output save_and_backup_file(save_dir, 'index.cnxml', cnxml) # write files for filename, content in files: filename = os.path.join(save_dir, filename) f = open(filename, 'wb') # write binary, important! f.write(content) f.close() # we generate the preview and save the error conversionerror = None try: htmlpreview = cnxml_to_htmlpreview(cnxml) except libxml2.parserError: conversionerror = traceback.format_exc() # Zip up all the files. This is done now, since we have all the files # available, and it also allows us to post a simple download link. # Note that we cannot use zipfile as context manager, as that is only # available from python 2.7 # TODO: Do a filesize check xxxx ram = StringIO() zip_archive = zipfile.ZipFile(ram, 'w') zip_archive.writestr('index.cnxml', cnxml) if not conversionerror: save_and_backup_file(save_dir, 'index.xhtml', htmlpreview) zip_archive.writestr('index.xhtml', htmlpreview) for filename, fileObj in files: zip_archive.writestr(filename, fileObj) zip_archive.close() zip_filename = os.path.join(save_dir, 'upload.zip') save_and_backup_file(save_dir, zip_filename, ram.getvalue(), mode='wb') if conversionerror: raise ConversionError(conversionerror)
def process(self, zip_filename): try: self.zip_archive = zipfile.ZipFile(zip_filename, 'r') # Unzip into transform directory self.zip_archive.extractall(path=self.save_dir) # Rename ZIP file so that the user can download it again os.rename(zip_filename, os.path.join(self.save_dir, 'upload.zip')) # Read CNXML with open(os.path.join(self.save_dir, 'index.cnxml'), 'rt') as fp: cnxml = fp.read() # Convert the CNXML to XHTML for preview html = cnxml_to_htmlpreview(cnxml) with open(os.path.join(self.save_dir, 'index.xhtml'), 'w') as index: index.write(html) cnxml = clean_cnxml(cnxml) validate_cnxml(cnxml) except ConversionError as e: return render_conversionerror(self.request, e.msg) except Exception: tb = traceback.format_exc() self.write_traceback_to_zipfile(tb) templatePath = 'templates/error.pt' response = {'traceback': tb} if('title' in self.request.session): del self.request.session['title'] return render_to_response(templatePath, response, request=self.request) self.request.session.flash(self.message) return HTTPFound(location=self.request.route_url(self.nextStep()))
def choose_view(request): check_login(request) templatePath = 'templates/choose.pt' form = Form(request, schema=UploadSchema) field_list = [('upload', 'File')] # clear the session if 'transformerror' in request.session: del request.session['transformerror'] if 'title' in request.session: del request.session['title'] # Check for successful form completion if form.validate(): try: # Catch-all exception block # Create a directory to do the conversions now_string = datetime.datetime.now().strftime('%Y%m%d-%H%M%S') # TODO: This has a good chance of being unique, but even so... temp_dir_name = '%s-%s' % (request.session['username'], now_string) save_dir = os.path.join( request.registry.settings['transform_dir'], temp_dir_name ) os.mkdir(save_dir) # Keep the info we need for next uploads. Note that this # might kill the ability to do multiple tabs in parallel, # unless it gets offloaded onto the form again. request.session['upload_dir'] = temp_dir_name if form.data['upload'] is not None: request.session['filename'] = form.data['upload'].filename # Google Docs Conversion # if we have a Google Docs ID and Access token. if form.data['gdocs_resource_id']: gdocs_resource_id = form.data['gdocs_resource_id'] gdocs_access_token = form.data['gdocs_access_token'] form.data['gdocs_resource_id'] = None form.data['gdocs_access_token'] = None (request.session['title'], request.session['filename']) = \ process_gdocs_resource(save_dir, \ gdocs_resource_id, \ gdocs_access_token) # HTML URL Import: elif form.data.get('url_text'): url = form.data['url_text'] form.data['url_text'] = None # Build a regex for Google Docs URLs regex = re.compile("^https:\/\/docs\.google\.com\/.*document\/[^\/]\/([^\/]+)\/") r = regex.search(url) # Take special action for Google Docs URLs if r: gdocs_resource_id = r.groups()[0] (request.session['title'], request.session['filename']) = \ process_gdocs_resource(save_dir, "document:" + gdocs_resource_id) else: # download html: #html = urllib2.urlopen(url).read() # Simple urlopen() will fail on mediawiki websites like e.g. Wikipedia! import_opener = urllib2.build_opener() import_opener.addheaders = [('User-agent', 'Mozilla/5.0')] try: import_request = import_opener.open(url) html = import_request.read() # transformation cnxml, objects, html_title = htmlsoup_to_cnxml( html, bDownloadImages=True, base_or_source_url=url) request.session['title'] = html_title cnxml = clean_cnxml(cnxml) save_cnxml(save_dir, cnxml, objects.items()) # Keep the info we need for next uploads. Note that # this might kill the ability to do multiple tabs in # parallel, unless it gets offloaded onto the form # again. request.session['filename'] = "HTML Document" validate_cnxml(cnxml) except urllib2.URLError, e: request['errors'] = ['The URL %s could not be opened' %url,] response = { 'form': FormRenderer(form), } return render_to_response(templatePath, response, request=request) # Office, CNXML-ZIP or LaTeX-ZIP file else: # Save the original file so that we can convert, plus keep it. original_filename = os.path.join( save_dir, form.data['upload'].filename.replace(os.sep, '_')) saved_file = open(original_filename, 'wb') input_file = form.data['upload'].file shutil.copyfileobj(input_file, saved_file) saved_file.close() input_file.close() form.data['upload'] = None # Check if it is a ZIP file with at least index.cnxml or a LaTeX file in it try: zip_archive = zipfile.ZipFile(original_filename, 'r') is_zip_archive = ('index.cnxml' in zip_archive.namelist()) # Do we have a latex file? if not is_zip_archive: # incoming latex.zip must contain a latex.tex file, where "latex" is the base name. (latex_head, latex_tail) = os.path.split(original_filename) (latex_root, latex_ext) = os.path.splitext(latex_tail) latex_basename = latex_root latex_filename = latex_basename + '.tex' is_latex_archive = (latex_filename in zip_archive.namelist()) except zipfile.BadZipfile: is_zip_archive = False is_latex_archive = False # ZIP package from previous conversion if is_zip_archive: # Unzip into transform directory zip_archive.extractall(path=save_dir) # Rename ZIP file so that the user can download it again os.rename(original_filename, os.path.join(save_dir, 'upload.zip')) # Read CNXML with open(os.path.join(save_dir, 'index.cnxml'), 'rt') as fp: cnxml = fp.read() # Convert the CNXML to XHTML for preview html = cnxml_to_htmlpreview(cnxml) with open(os.path.join(save_dir, 'index.xhtml'), 'w') as index: index.write(html) cnxml = clean_cnxml(cnxml) validate_cnxml(cnxml) # LaTeX elif is_latex_archive: f = open(original_filename) latex_archive = f.read() # LaTeX 2 CNXML transformation cnxml, objects = latex_to_cnxml(latex_archive, original_filename) cnxml = clean_cnxml(cnxml) save_cnxml(save_dir, cnxml, objects.items()) validate_cnxml(cnxml) # OOo / MS Word Conversion else: # Convert from other office format to odt if needed odt_filename = original_filename filename, extension = os.path.splitext(original_filename) if(extension != '.odt'): odt_filename= '%s.odt' % filename command = '/usr/bin/soffice -headless -nologo -nofirststartwizard "macro:///Standard.Module1.SaveAsOOO(' + escape_system(original_filename)[1:-1] + ',' + odt_filename + ')"' os.system(command) try: fp = open(odt_filename, 'r') fp.close() except IOError as io: raise ConversionError("%s not found" % original_filename) # Convert and save all the resulting files. tree, files, errors = transform(odt_filename) cnxml = clean_cnxml(etree.tostring(tree)) save_cnxml(save_dir, cnxml, files.items()) # now validate with jing validate_cnxml(cnxml)