def toCnxml(self, strXml, objZipFile): # stow styles.xml in a tempfile styles_xml = objZipFile.read('styles.xml') (tmpsfile, tmpsname) = tempfile.mkstemp('.OOo') os.write(tmpsfile, styles_xml) os.close(tmpsfile) stylesPath=tmpsname # # not strictly required. this xform removes empty paragraphs. # makes other oo 2 cnxml xforms possible. # try: strOOoXml = XMLService.transform(strXml, OO2OO_XSL, stylesPath=stylesPath) if len(strOOoXml) == 0: zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "OOo to OOo XSL transform failed."); strOOoXml = strXml except: zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "OOo to OOo XSL transform failed."); strOOoXml = strXml # Clean up styles.xml tempfile os.remove(tmpsname) # # addSectionTags() calls the SAX parser.parse() which expects a file argument # thus we force the xml string into being a file object # try: strSectionedXml = addSectionTags(StringIO(strOOoXml)) if len(strSectionedXml) > 0: bAddedSections = True else: zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Failed to add sections."); strSectionedXml = strOOoXml bAddedSections = False except: zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Failed to add sections."); strSectionedXml = strOOoXml bAddedSections = False # # add external MathML as child of <draw:object> via SAX parser. # try: strMathedXml = addMathML(StringIO(strSectionedXml), objZipFile) if len(strMathedXml) > 0: bAddedMath = True else: zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Failed to add MathML."); strMathedXml = strSectionedXml bAddedMath = False except: zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Failed to add MathML."); strMathedXml = strSectionedXml bAddedMath = False # # oo 2 cnxml via xsl transform. # try: strCnxml = XMLService.transform(strMathedXml, OO2CNXML_XSL) bTransformed = True except: zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "OOo to CNXML XSL transform failed."); # set strCnxml to invalid CNXML ... strCnxml = '<>' bTransformed = False # # Replace Word Symbol Font with correct entity # strCnxml = symbolReplace(strCnxml, UNICODE_DICTIONARY) # # Global id generation # strCnxml = autoIds(strCnxml, prefix='oo-') # # Error handling # errors = XMLService.validate(strCnxml) if errors: if bAddedSections or bAddedMath: zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Invalid CNXML generated. Trying w/o sections and MathML. Errors were \n" + str([str(e) for e in errors])) try: strCnxml = XMLService.transform(strXml, OO2CNXML_XSL) strCnxml = autoIds(strCnxml, prefix='oo-') except: zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "OOo to CNXML XSL transform failed again with the undoctored OOo Xml."); strCnxml = '<>' errors = XMLService.validate(strCnxml) if errors: zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Still...invalid CNXML. errors were \n" + str(errors)) raise OOoImportError, "Generated CNXML is invalid" else: zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Invalid CNXML generated. errors were \n" + str(errors)) raise OOoImportError, "Generated CNXML is invalid" # # Tidy up the CNXML # docCnxmlClean = XMLService.transform(strCnxml, CNXMLTIDY_XSL) return str(docCnxmlClean)
def SearchableText(self): """Return the text of the module for searching""" content = self.getDefaultFile().getSource() bare = XMLService.transform(content, baretext) return bare
objZipFile = zipfile.ZipFile(strZipFile, 'r') # no 'rb' since 'b' => binary? # # Pass #1 - OOo Xml to OOo Xml xform - change one entry table & remove empty <text:p> # try: styles_xml = objZipFile.read('styles.xml') (tmpsfile, tmpsname) = tempfile.mkstemp('.OOo') os.write(tmpsfile, styles_xml) os.close(tmpsfile) stylesPath = tmpsname strOutputMassageOOoXml = XMLService.transform(strOOoXml, OO2OO_XSL, stylesPath=stylesPath) if len(strOutputMassageOOoXml) > 0: print "**** suceeded in OO 2 OO XSL transform to remove empty paragraphs." else: print "**** failed in OO 2 OO XSL transform to remove empty paragraphs. return an empty string. ignore and continue.\n" + str( strErrorMsg) strOutputMassageOOoXml = strOOoXml os.remove(tmpsname) except XMLService.XMLParserError, strErrorMsg: print "**** failed in OO 2 OO XSL transform to remove empty paragraphs. raised exception. ignore and continue.\n" + str( strErrorMsg) strOutputMassageOOoXml = strOOoXml #raise
def convert(self, data, outdata, **kwargs): """Input is a zip file. Output is idata, with getData being index.cnxml and subObjects being other siblings.""" fakefile = StringIO(data) zipfile = ZipFile(fakefile, 'r') prefix = '' namelist = zipfile.namelist() lenlist = len(namelist) if lenlist > 1: prefix = os.path.commonprefix(namelist) lastslash = prefix.rfind("/") if lastslash != -1: prefix = prefix[:lastslash+1] else: prefix = '' elif lenlist == 1: name = namelist[0] lastslash = name.rfind("/") if lastslash != -1: prefix = name[:lastslash+1] subdirs = {} ignored = [] objects = {} mdata = {} preflen = len(prefix) for name in namelist: modname = name[preflen:] if not modname: # some zip programs store directories by themselves continue isubdir = modname.find('/') if isubdir != -1: # subdirs, incl. especially 'stylesheets', not imported subdir = modname[:isubdir] if not subdirs.has_key(subdir): subdirs[subdir] = 1 continue ## disabled until we get a better handle on "viewable" export... #if modname == 'index.xhtml': # do not import index.xhtml # ignored.append('index.xhtml') # continue ## probably also do the same with README if modname == 'index_auto_generated.cnxml': # do not import autogenerated cnxml ignored.append('index_auto_generated.cnxml') continue unzipfile = zipfile.read(name) if modname == "index.cnxml": if unzipfile: outdata.setData(unzipfile) # Parse out the mdml for trusted import jsonstr = XMLService.transform(unzipfile, MDML2JSON_XSL) metadict = demjson.decode(jsonstr) # First, direct copies for k in ('abstract','title','language'): val = metadict.get(k) if type(val) == type(u''): val = val.encode('UTF-8') if not(val): val = '' mdata[k] = val # Now, unwrap one level of dict for lists for k in ('subjectlist','keywordlist'): listdict = metadict.get(k) if listdict: lkey = listdict.keys()[0] # should only be one mlist = listdict[lkey] if isinstance(mlist,basestring): listdict[lkey] = [mlist] mdata.update(listdict) # Rename if metadict.has_key('content-id'): mdata['objectId'] = metadict['content-id'].encode('UTF-8') if metadict.has_key('license'): if metadict['license'].has_key('url'): mdata['license'] = metadict['license']['url'].encode('UTF-8') else: mdata['license'] = metadict['license']['href'].encode('UTF-8') # DateTime strings for k in ('created','revised'): if metadict.has_key(k): mdata[k] = DateTime(metadict[k]) # And the trickiest, unwrap and split roles (userids must be str, not unicode) if metadict.has_key('roles'): mdata.update(dict([(r['type']+'s',str(r['_text']).split()) for r in metadict['roles']['role']])) #FIXME need to do collaborators here, as well - untested below mdata['collaborators'] = {}.fromkeys(' '.join([r['_text'] for r in metadict['roles']['role']]).encode('UTF-8').split()).keys() else: ignored.append('index.cnxml') else: objects[modname] = unzipfile zipfile.close() fakefile.close() meta = outdata.getMetadata() meta['subdirs'] = subdirs.keys() meta['ignored'] = ignored meta['metadata'] = mdata outdata.setSubObjects(objects) return outdata
## Script (Python) "onEditChangeSet" ##bind container=container ##bind context=context ##bind namespace= ##bind script=script ##bind subpath=traverse_subpath ##parameters= ##title=Compute object differences ## from Products.CNXMLDocument.XMLService import XMLError from Products.CNXMLDocument import XMLService diffs = context.getDiffs() if not diffs: return "no changes" return XMLService.transform(diffs[0].htmlDiff(), "/home/simon/xml/cnxml/style/unibrowser.xsl")
def mdml2json(self, content): return XMLService.transform(content, MDML2JSON_XSL)
def convert(self, data, outdata, **kwargs): """Input is a zip file. Output is idata, with getData being index.cnxml and subObjects being other siblings.""" fakefile = StringIO(data) zipfile = ZipFile(fakefile, 'r') prefix = '' namelist = zipfile.namelist() lenlist = len(namelist) if lenlist > 1: prefix = os.path.commonprefix(namelist) lastslash = prefix.rfind("/") if lastslash != -1: prefix = prefix[:lastslash + 1] else: prefix = '' elif lenlist == 1: name = namelist[0] lastslash = name.rfind("/") if lastslash != -1: prefix = name[:lastslash + 1] subdirs = {} ignored = [] objects = {} mdata = {} preflen = len(prefix) for name in namelist: modname = name[preflen:] if not modname: # some zip programs store directories by themselves continue isubdir = modname.find('/') if isubdir != -1: # subdirs, incl. especially 'stylesheets', not imported subdir = modname[:isubdir] if not subdirs.has_key(subdir): subdirs[subdir] = 1 continue ## disabled until we get a better handle on "viewable" export... #if modname == 'index.xhtml': # do not import index.xhtml # ignored.append('index.xhtml') # continue ## probably also do the same with README if modname == 'index_auto_generated.cnxml': # do not import autogenerated cnxml ignored.append('index_auto_generated.cnxml') continue unzipfile = zipfile.read(name) if modname == "index.cnxml": if unzipfile: outdata.setData(unzipfile) # Parse out the mdml for trusted import jsonstr = XMLService.transform(unzipfile, MDML2JSON_XSL) metadict = demjson.decode(jsonstr) # First, direct copies for k in ('abstract', 'title', 'language'): val = metadict.get(k) if type(val) == type(u''): val = val.encode('UTF-8') if not (val): val = '' mdata[k] = val # Now, unwrap one level of dict for lists for k in ('subjectlist', 'keywordlist'): listdict = metadict.get(k) if listdict: lkey = listdict.keys()[0] # should only be one mlist = listdict[lkey] if isinstance(mlist, basestring): listdict[lkey] = [mlist] mdata.update(listdict) # Rename if metadict.has_key('content-id'): mdata['objectId'] = metadict['content-id'].encode( 'UTF-8') if metadict.has_key('license'): if metadict['license'].has_key('url'): mdata['license'] = metadict['license'][ 'url'].encode('UTF-8') else: mdata['license'] = metadict['license'][ 'href'].encode('UTF-8') # DateTime strings for k in ('created', 'revised'): if metadict.has_key(k): mdata[k] = DateTime(metadict[k]) # And the trickiest, unwrap and split roles (userids must be str, not unicode) if metadict.has_key('roles'): mdata.update( dict([(r['type'] + 's', str(r['_text']).split()) for r in metadict['roles']['role']])) #FIXME need to do collaborators here, as well - untested below mdata['collaborators'] = {}.fromkeys(' '.join([ r['_text'] for r in metadict['roles']['role'] ]).encode('UTF-8').split()).keys() else: ignored.append('index.cnxml') else: objects[modname] = unzipfile zipfile.close() fakefile.close() meta = outdata.getMetadata() meta['subdirs'] = subdirs.keys() meta['ignored'] = ignored meta['metadata'] = mdata outdata.setSubObjects(objects) return outdata
def toCnxml(self, strXml, objZipFile): # stow styles.xml in a tempfile styles_xml = objZipFile.read('styles.xml') (tmpsfile, tmpsname) = tempfile.mkstemp('.OOo') os.write(tmpsfile, styles_xml) os.close(tmpsfile) stylesPath=tmpsname # # not strictly required. this xform removes empty paragraphs. # makes other oo 2 cnxml xforms possible. # try: strOOoXml = XMLService.transform(strXml, OO2OO_XSL, stylesPath=stylesPath) if len(strOOoXml) == 0: zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "OOo to OOo XSL transform failed."); strOOoXml = strXml except: zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "OOo to OOo XSL transform failed."); strOOoXml = strXml # Clean up styles.xml tempfile os.remove(tmpsname) # # addSectionTags() calls the SAX parser.parse() which expects a file argument # thus we force the xml string into being a file object # try: strSectionedXml = addSectionTags(StringIO(strOOoXml)) if len(strSectionedXml) > 0: bAddedSections = True else: zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Failed to add sections."); strSectionedXml = strOOoXml bAddedSections = False except: zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Failed to add sections."); strSectionedXml = strOOoXml bAddedSections = False # # add external MathML as child of <draw:object> via SAX parser. # try: strMathedXml = addMathML(StringIO(strSectionedXml), objZipFile) if len(strMathedXml) > 0: bAddedMath = True else: zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Failed to add MathML."); strMathedXml = strSectionedXml bAddedMath = False except: zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Failed to add MathML."); strMathedXml = strSectionedXml bAddedMath = False # # oo 2 cnxml via xsl transform. # try: strCnxml = XMLService.transform(strMathedXml, OO2CNXML_XSL) bTransformed = True except: zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "OOo to CNXML XSL transform failed."); # set strCnxml to invalid CNXML ... strCnxml = '<>' bTransformed = False # # Replace Word Symbol Font with correct entity # strCnxml = symbolReplace(strCnxml, UNICODE_DICTIONARY) # # Global id generation # strCnxml = autoIds(strCnxml, prefix='oo-') # # Error handling # errors = XMLService.validate(strCnxml) if errors: if bAddedSections or bAddedMath: zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Invalid CNXML generated. Trying w/o sections and MathML. Errors were \n" + str([str(e) for e in errors])) try: strCnxml = XMLService.transform(strXml, OO2CNXML_XSL) strCnxml = autoIds(strCnxml, prefix='oo-') except: zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "OOo to CNXML XSL transform failed again with the undoctored OOo Xml."); strCnxml = '<>' errors = XMLService.validate(strCnxml) if errors: zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Still...invalid CNXML. errors were \n" + str(errors)) raise OOoImportError, "Generated CNXML is invalid" else: zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Invalid CNXML generated. errors were \n" + str(errors)) raise OOoImportError, "Generated CNXML is invalid" # # Tidy up the CNXML # docCnxmlClean = XMLService.transform(strCnxml, CNXMLTIDY_XSL) return str(docCnxmlClean)
def cnxml2json(self, content): return XMLService.transform(content, CNXML2JSON_XSL)
def mdml2json(self, content): return XMLService.transform(content, MDML2JSON_XSL)
def cnxml2json(self, content): return XMLService.transform(content, CNXML2JSON_XSL)
def convert(self, data, outdata, **kwargs): """Input is a zip file. Output is idata, with getData being index.cnxml and subObjects being other siblings.""" fakefile = StringIO(data) zipfile = ZipFile(fakefile, 'r') prefix = '' namelist = zipfile.namelist() lenlist = len(namelist) if lenlist > 1: prefix = os.path.commonprefix(namelist) lastslash = prefix.rfind("/") if lastslash != -1: prefix = prefix[:lastslash+1] else: prefix = '' elif lenlist == 1: name = namelist[0] lastslash = name.rfind("/") if lastslash != -1: prefix = name[:lastslash+1] namelist = [name[len(prefix):] for name in namelist] # Strip prefix from namelist entries zLOG.LOG("Sword Transform", zLOG.INFO, "files in zip=%s" % namelist) meta = outdata.getMetadata() meta['properties'] = {} objects = {} containsIndexCnxml = ('index.cnxml' in namelist) wordfiles = len([True for m in namelist for e in \ ('.odt', '.sxw', '.docx', \ '.rtf', '.doc') if m.endswith(e)]) latexfiles = len([True for m in namelist if m.endswith('.tex')]) if sum([int(containsIndexCnxml), wordfiles, latexfiles]) > 1: # The upload contains more than one transformable file, ie # it has a index.cnxml and latex/word content, or it has both latex # and word content, or more than one latex or word file. raise CNXImportError( "Import has more than one transformable file. It has " "%d index.cnxml files, %d word files and " "%d LaTeX files" % (containsIndexCnxml, wordfiles, latexfiles)) for modname in namelist: if not modname: # some zip programs show directories by themselves continue isubdir = modname.find('/') if isubdir != -1: # subdirs, incl. especially 'stylesheets', not imported continue unzipfile = zipfile.read(prefix + modname) if modname == "mets.xml": # Write metadata zLOG.LOG("Sword Transform", zLOG.INFO, "starting...") simplified = XMLService.transform(unzipfile, SWORD2RME_XSL) jsonstr = XMLService.transform(simplified, XML2JSON_XSL) m = json.decode(jsonstr) meta['properties'] = m elif modname == "index.cnxml": # hook here for featured links # elaborate the metadata returned in order to add the featured links. meta['featured_links'] = [] if unzipfile: outdata.setData(StringIO(unzipfile)) dom = parseString(unzipfile) groups = dom.getElementsByTagName('link-group') links = meta.get('featured_links', []) for group in groups: group_type = group.getAttribute('type').encode(self.encoding) for link in group.getElementsByTagName('link'): title = link.firstChild.toxml().encode( self.encoding) url = link.getAttribute('url').encode( self.encoding) strength = link.getAttribute('strength').encode( self.encoding) links.append({'url':url, 'title':title, 'type':group_type, 'strength':strength } ) meta['featured_links'] = links else: if not containsIndexCnxml: if [True for e in ('.odt', '.sxw', '.docx', \ '.rtf', '.doc') if modname.endswith(e)]: # This is a word file oo_to_cnxml().convert(unzipfile, outdata, **kwargs) elif modname.endswith('.tex'): # This is LaTeX latex_to_folder().convert(unzipfile, outdata, original_file_name='sword-import-file.tex', user_name=kwargs['user_name']) # LaTeX transform returns straight text, make it # a file object outdata.setData(StringIO(outdata.getData())) else: objects[modname] = unzipfile else: objects[modname] = unzipfile zipfile.close() fakefile.close() meta = outdata.getMetadata() # Add attribution note to the cnxml props = meta['properties'] params = {} for key in ('journal', 'year', 'url'): if unicode(key) in props: value = props[unicode(key)] if isinstance(value, unicode): value = value.encode('utf-8') params[key] = value zLOG.LOG("Sword Transform", zLOG.INFO, "attribution dict=%s" % params) data = outdata.getData() if data and len(data.getvalue()) > 0: attributed = XMLService.transform(data.getvalue(), SWORD_INSERT_ATTRIBUTION_XSL, **params) outdata.setData(StringIO(unicode(attributed,'utf-8'))) else: zLOG.LOG("Sword Transform", zLOG.INFO, "Skipping adding attributions because no cnxml was generated...") #meta['subdirs'] = subdirs.keys() objects.update(outdata.getSubObjects()) outdata.setSubObjects(objects) return outdata
strOOoXml = strInputOOoXml doc = XMLService.parseString(strOOoXml) objZipFile = zipfile.ZipFile(strZipFile, 'r') # no 'rb' since 'b' => binary? # # Pass #1 - OOo Xml to OOo Xml xform - change one entry table & remove empty <text:p> # try: styles_xml = objZipFile.read('styles.xml') (tmpsfile, tmpsname) = tempfile.mkstemp('.OOo') os.write(tmpsfile, styles_xml) os.close(tmpsfile) stylesPath=tmpsname strOutputMassageOOoXml = XMLService.transform(strOOoXml, OO2OO_XSL, stylesPath=stylesPath) if len(strOutputMassageOOoXml) > 0: print "**** suceeded in OO 2 OO XSL transform to remove empty paragraphs." else: print "**** failed in OO 2 OO XSL transform to remove empty paragraphs. return an empty string. ignore and continue.\n" + str(strErrorMsg) strOutputMassageOOoXml = strOOoXml os.remove(tmpsname) except XMLService.XMLParserError, strErrorMsg: print "**** failed in OO 2 OO XSL transform to remove empty paragraphs. raised exception. ignore and continue.\n" + str(strErrorMsg) strOutputMassageOOoXml = strOOoXml #raise # print "wrting to : '" + strOutputMassageOOoXmlFileBase + '.oo2oo.xml' fileMassagedOutputXml = open(strOutputMassageOOoXmlFileBase + '.oo2oo.xml', "w") fileMassagedOutputXml.write(str(strOutputMassageOOoXml))
def SearchableText(self): """Return the text of the module for searching""" content = self.getDefaultFile().getSource() bare = XMLService.transform(content,baretext) return bare