def toCnxml(self, strXml, objZipFile):
        # stow styles.xml in a tempfile
        styles_xml = objZipFile.read('styles.xml')
        (tmpsfile, tmpsname) = tempfile.mkstemp('.OOo')
        os.write(tmpsfile, styles_xml)
        os.close(tmpsfile)
        stylesPath=tmpsname

        #
        # not strictly required.  this xform removes empty paragraphs.
        # makes other oo 2 cnxml xforms possible.
        #
        try:
            strOOoXml = XMLService.transform(strXml, OO2OO_XSL, stylesPath=stylesPath)
            if len(strOOoXml) == 0:
                zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "OOo to OOo XSL transform failed.");
                strOOoXml = strXml
        except:
            zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "OOo to OOo XSL transform failed.");
            strOOoXml = strXml

        # Clean up styles.xml tempfile
        os.remove(tmpsname)

        #
        # addSectionTags() calls the SAX parser.parse() which expects a file argument
        # thus we force the xml string into being a file object
        #
        try:
            strSectionedXml = addSectionTags(StringIO(strOOoXml))
            if len(strSectionedXml) > 0:
                bAddedSections = True
            else:
                zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Failed to add sections.");
                strSectionedXml = strOOoXml
                bAddedSections = False
        except:
            zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Failed to add sections.");
            strSectionedXml = strOOoXml
            bAddedSections = False

        #
        # add external MathML as child of <draw:object> via SAX parser.
        #
        try:
            strMathedXml = addMathML(StringIO(strSectionedXml), objZipFile)
            if len(strMathedXml) > 0:
                bAddedMath = True
            else:
                zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Failed to add MathML.");
                strMathedXml = strSectionedXml
                bAddedMath = False
        except:
            zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Failed to add MathML.");
            strMathedXml = strSectionedXml
            bAddedMath = False

        #
        # oo 2 cnxml via xsl transform.
        #
        try:
            strCnxml = XMLService.transform(strMathedXml, OO2CNXML_XSL)
            bTransformed = True
        except:
            zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "OOo to CNXML XSL transform failed.");
            # set strCnxml to invalid CNXML ...
            strCnxml = '<>'
            bTransformed = False

        #
        # Replace Word Symbol Font with correct entity
        #
        strCnxml = symbolReplace(strCnxml, UNICODE_DICTIONARY)

        #
        # Global id generation
        #
        strCnxml = autoIds(strCnxml, prefix='oo-')

        #
        # Error handling
        #
        errors = XMLService.validate(strCnxml)
        if errors:

            if bAddedSections or bAddedMath:
                zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Invalid CNXML generated. Trying w/o sections and MathML. Errors were \n" + str([str(e) for e in errors]))

                try:
                    strCnxml = XMLService.transform(strXml, OO2CNXML_XSL)
                    strCnxml = autoIds(strCnxml, prefix='oo-')
                except:
                    zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "OOo to CNXML XSL transform failed again with the undoctored OOo Xml.");
                    strCnxml = '<>'

                errors = XMLService.validate(strCnxml)
                if errors:
                    zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Still...invalid CNXML. errors were \n" + str(errors))
                    raise OOoImportError, "Generated CNXML is invalid"
            else:
                zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Invalid CNXML generated. errors were \n" + str(errors))
                raise OOoImportError, "Generated CNXML is invalid"

        #
        # Tidy up the CNXML
        #
        docCnxmlClean = XMLService.transform(strCnxml, CNXMLTIDY_XSL)

        return str(docCnxmlClean)
 def SearchableText(self):
     """Return the text of the module for searching"""
     content = self.getDefaultFile().getSource()
     bare = XMLService.transform(content, baretext)
     return bare
示例#3
0
    objZipFile = zipfile.ZipFile(strZipFile,
                                 'r')  # no 'rb' since 'b' => binary?

    #
    # Pass #1 - OOo Xml to OOo Xml xform - change one entry table & remove empty <text:p>
    #
    try:
        styles_xml = objZipFile.read('styles.xml')
        (tmpsfile, tmpsname) = tempfile.mkstemp('.OOo')
        os.write(tmpsfile, styles_xml)
        os.close(tmpsfile)
        stylesPath = tmpsname

        strOutputMassageOOoXml = XMLService.transform(strOOoXml,
                                                      OO2OO_XSL,
                                                      stylesPath=stylesPath)
        if len(strOutputMassageOOoXml) > 0:
            print "**** suceeded in OO 2 OO XSL transform to remove empty paragraphs."
        else:
            print "**** failed in OO 2 OO XSL transform to remove empty paragraphs. return an empty string. ignore and continue.\n" + str(
                strErrorMsg)
            strOutputMassageOOoXml = strOOoXml

        os.remove(tmpsname)
    except XMLService.XMLParserError, strErrorMsg:
        print "**** failed in OO 2 OO XSL transform to remove empty paragraphs. raised exception. ignore and continue.\n" + str(
            strErrorMsg)
        strOutputMassageOOoXml = strOOoXml
        #raise
    def convert(self, data, outdata, **kwargs):
        """Input is a zip file. Output is idata, with getData being index.cnxml and subObjects being other siblings."""
        fakefile = StringIO(data)
        zipfile = ZipFile(fakefile, 'r')
        
        prefix = ''
        namelist = zipfile.namelist()
        lenlist = len(namelist)
        if lenlist > 1:
            prefix = os.path.commonprefix(namelist)
            lastslash = prefix.rfind("/")
            if lastslash != -1: prefix = prefix[:lastslash+1]
            else: prefix = ''
        elif lenlist == 1:
            name = namelist[0]
            lastslash = name.rfind("/")
            if lastslash != -1: prefix = name[:lastslash+1]

        subdirs = {}
        ignored = []
        objects = {}
        mdata = {}
        preflen = len(prefix)
        for name in namelist:
            modname = name[preflen:]
            if not modname:               # some zip programs store directories by themselves
              continue
            isubdir = modname.find('/')
            if isubdir != -1:             # subdirs, incl. especially 'stylesheets', not imported
              subdir = modname[:isubdir]
              if not subdirs.has_key(subdir): subdirs[subdir] = 1
              continue
            ## disabled until we get a better handle on "viewable" export...
            #if modname == 'index.xhtml':  # do not import index.xhtml
            #  ignored.append('index.xhtml')
            #  continue
            ## probably also do the same with README
            if modname == 'index_auto_generated.cnxml':  # do not import autogenerated cnxml
              ignored.append('index_auto_generated.cnxml')
              continue
            unzipfile = zipfile.read(name)
            if modname == "index.cnxml":
                if unzipfile:
                    outdata.setData(unzipfile)
                    # Parse out the mdml for trusted import
                    jsonstr = XMLService.transform(unzipfile, MDML2JSON_XSL)
                    metadict = demjson.decode(jsonstr)

                    # First, direct copies
                    for k in ('abstract','title','language'):
                        val = metadict.get(k)
                        if type(val) == type(u''):
                            val = val.encode('UTF-8')
                        if not(val):
                            val = ''
                        mdata[k] = val

                    # Now, unwrap one level of dict for lists
                    for k in ('subjectlist','keywordlist'):
                        listdict = metadict.get(k)
                        if listdict:
                            lkey = listdict.keys()[0] # should only be one
                            mlist = listdict[lkey]
                            if isinstance(mlist,basestring):
                                listdict[lkey] = [mlist]
                            mdata.update(listdict)

                    # Rename
                    if metadict.has_key('content-id'):
                        mdata['objectId'] = metadict['content-id'].encode('UTF-8')
                    if metadict.has_key('license'):
                        if metadict['license'].has_key('url'):
                            mdata['license'] = metadict['license']['url'].encode('UTF-8')
                        else:
                            mdata['license'] = metadict['license']['href'].encode('UTF-8')

                    # DateTime strings
                    for k in ('created','revised'):
                        if metadict.has_key(k):
                            mdata[k] = DateTime(metadict[k])

                    # And the trickiest, unwrap and split roles (userids must be str, not unicode)
                    if metadict.has_key('roles'):
                        mdata.update(dict([(r['type']+'s',str(r['_text']).split()) for r in metadict['roles']['role']]))
                        #FIXME need to do collaborators here, as well - untested below
                        mdata['collaborators'] = {}.fromkeys(' '.join([r['_text'] for r in metadict['roles']['role']]).encode('UTF-8').split()).keys()
                else:
                    ignored.append('index.cnxml')
            else:
                objects[modname] = unzipfile

        zipfile.close()
        fakefile.close()

        meta = outdata.getMetadata()
        meta['subdirs'] = subdirs.keys()
        meta['ignored'] = ignored
        meta['metadata'] = mdata

        outdata.setSubObjects(objects)
        return outdata
## Script (Python) "onEditChangeSet"
##bind container=container
##bind context=context
##bind namespace=
##bind script=script
##bind subpath=traverse_subpath
##parameters=
##title=Compute object differences
##
from Products.CNXMLDocument.XMLService import XMLError
from Products.CNXMLDocument import XMLService

diffs = context.getDiffs()
if not diffs:
    return "no changes"

return XMLService.transform(diffs[0].htmlDiff(), "/home/simon/xml/cnxml/style/unibrowser.xsl")
 def mdml2json(self, content):
     return XMLService.transform(content, MDML2JSON_XSL)
    def convert(self, data, outdata, **kwargs):
        """Input is a zip file. Output is idata, with getData being index.cnxml and subObjects being other siblings."""
        fakefile = StringIO(data)
        zipfile = ZipFile(fakefile, 'r')

        prefix = ''
        namelist = zipfile.namelist()
        lenlist = len(namelist)
        if lenlist > 1:
            prefix = os.path.commonprefix(namelist)
            lastslash = prefix.rfind("/")
            if lastslash != -1: prefix = prefix[:lastslash + 1]
            else: prefix = ''
        elif lenlist == 1:
            name = namelist[0]
            lastslash = name.rfind("/")
            if lastslash != -1: prefix = name[:lastslash + 1]

        subdirs = {}
        ignored = []
        objects = {}
        mdata = {}
        preflen = len(prefix)
        for name in namelist:
            modname = name[preflen:]
            if not modname:  # some zip programs store directories by themselves
                continue
            isubdir = modname.find('/')
            if isubdir != -1:  # subdirs, incl. especially 'stylesheets', not imported
                subdir = modname[:isubdir]
                if not subdirs.has_key(subdir): subdirs[subdir] = 1
                continue
            ## disabled until we get a better handle on "viewable" export...
            #if modname == 'index.xhtml':  # do not import index.xhtml
            #  ignored.append('index.xhtml')
            #  continue
            ## probably also do the same with README
            if modname == 'index_auto_generated.cnxml':  # do not import autogenerated cnxml
                ignored.append('index_auto_generated.cnxml')
                continue
            unzipfile = zipfile.read(name)
            if modname == "index.cnxml":
                if unzipfile:
                    outdata.setData(unzipfile)
                    # Parse out the mdml for trusted import
                    jsonstr = XMLService.transform(unzipfile, MDML2JSON_XSL)
                    metadict = demjson.decode(jsonstr)

                    # First, direct copies
                    for k in ('abstract', 'title', 'language'):
                        val = metadict.get(k)
                        if type(val) == type(u''):
                            val = val.encode('UTF-8')
                        if not (val):
                            val = ''
                        mdata[k] = val

                    # Now, unwrap one level of dict for lists
                    for k in ('subjectlist', 'keywordlist'):
                        listdict = metadict.get(k)
                        if listdict:
                            lkey = listdict.keys()[0]  # should only be one
                            mlist = listdict[lkey]
                            if isinstance(mlist, basestring):
                                listdict[lkey] = [mlist]
                            mdata.update(listdict)

                    # Rename
                    if metadict.has_key('content-id'):
                        mdata['objectId'] = metadict['content-id'].encode(
                            'UTF-8')
                    if metadict.has_key('license'):
                        if metadict['license'].has_key('url'):
                            mdata['license'] = metadict['license'][
                                'url'].encode('UTF-8')
                        else:
                            mdata['license'] = metadict['license'][
                                'href'].encode('UTF-8')

                    # DateTime strings
                    for k in ('created', 'revised'):
                        if metadict.has_key(k):
                            mdata[k] = DateTime(metadict[k])

                    # And the trickiest, unwrap and split roles (userids must be str, not unicode)
                    if metadict.has_key('roles'):
                        mdata.update(
                            dict([(r['type'] + 's', str(r['_text']).split())
                                  for r in metadict['roles']['role']]))
                        #FIXME need to do collaborators here, as well - untested below
                        mdata['collaborators'] = {}.fromkeys(' '.join([
                            r['_text'] for r in metadict['roles']['role']
                        ]).encode('UTF-8').split()).keys()
                else:
                    ignored.append('index.cnxml')
            else:
                objects[modname] = unzipfile

        zipfile.close()
        fakefile.close()

        meta = outdata.getMetadata()
        meta['subdirs'] = subdirs.keys()
        meta['ignored'] = ignored
        meta['metadata'] = mdata

        outdata.setSubObjects(objects)
        return outdata
    def toCnxml(self, strXml, objZipFile):
        # stow styles.xml in a tempfile
        styles_xml = objZipFile.read('styles.xml')
        (tmpsfile, tmpsname) = tempfile.mkstemp('.OOo')
        os.write(tmpsfile, styles_xml)
        os.close(tmpsfile)
        stylesPath=tmpsname

        #
        # not strictly required.  this xform removes empty paragraphs.
        # makes other oo 2 cnxml xforms possible.
        #
        try:
            strOOoXml = XMLService.transform(strXml, OO2OO_XSL, stylesPath=stylesPath)
            if len(strOOoXml) == 0:
                zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "OOo to OOo XSL transform failed.");
                strOOoXml = strXml
        except:
            zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "OOo to OOo XSL transform failed.");
            strOOoXml = strXml

        # Clean up styles.xml tempfile
        os.remove(tmpsname)

        #
        # addSectionTags() calls the SAX parser.parse() which expects a file argument
        # thus we force the xml string into being a file object
        #
        try:
            strSectionedXml = addSectionTags(StringIO(strOOoXml))
            if len(strSectionedXml) > 0:
                bAddedSections = True
            else:
                zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Failed to add sections.");
                strSectionedXml = strOOoXml
                bAddedSections = False
        except:
            zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Failed to add sections.");
            strSectionedXml = strOOoXml
            bAddedSections = False

        #
        # add external MathML as child of <draw:object> via SAX parser.
        #
        try:
            strMathedXml = addMathML(StringIO(strSectionedXml), objZipFile)
            if len(strMathedXml) > 0:
                bAddedMath = True
            else:
                zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Failed to add MathML.");
                strMathedXml = strSectionedXml
                bAddedMath = False
        except:
            zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Failed to add MathML.");
            strMathedXml = strSectionedXml
            bAddedMath = False

        #
        # oo 2 cnxml via xsl transform.
        #
        try:
            strCnxml = XMLService.transform(strMathedXml, OO2CNXML_XSL)
            bTransformed = True
        except:
            zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "OOo to CNXML XSL transform failed.");
            # set strCnxml to invalid CNXML ...
            strCnxml = '<>'
            bTransformed = False

        #
        # Replace Word Symbol Font with correct entity
        #
        strCnxml = symbolReplace(strCnxml, UNICODE_DICTIONARY)

        #
        # Global id generation
        #
        strCnxml = autoIds(strCnxml, prefix='oo-')

        #
        # Error handling
        #
        errors = XMLService.validate(strCnxml)
        if errors:

            if bAddedSections or bAddedMath:
                zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Invalid CNXML generated. Trying w/o sections and MathML. Errors were \n" + str([str(e) for e in errors]))

                try:
                    strCnxml = XMLService.transform(strXml, OO2CNXML_XSL)
                    strCnxml = autoIds(strCnxml, prefix='oo-')
                except:
                    zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "OOo to CNXML XSL transform failed again with the undoctored OOo Xml.");
                    strCnxml = '<>'

                errors = XMLService.validate(strCnxml)
                if errors:
                    zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Still...invalid CNXML. errors were \n" + str(errors))
                    raise OOoImportError, "Generated CNXML is invalid"
            else:
                zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Invalid CNXML generated. errors were \n" + str(errors))
                raise OOoImportError, "Generated CNXML is invalid"

        #
        # Tidy up the CNXML
        #
        docCnxmlClean = XMLService.transform(strCnxml, CNXMLTIDY_XSL)

        return str(docCnxmlClean)
 def cnxml2json(self, content):
     return XMLService.transform(content, CNXML2JSON_XSL)
示例#10
0
 def mdml2json(self, content):
     return XMLService.transform(content, MDML2JSON_XSL)
示例#11
0
 def cnxml2json(self, content):
     return XMLService.transform(content, CNXML2JSON_XSL)
    def convert(self, data, outdata, **kwargs):
        """Input is a zip file. Output is idata, with getData being index.cnxml and subObjects being other siblings."""
        fakefile = StringIO(data)
        zipfile = ZipFile(fakefile, 'r')

        prefix = ''
        namelist = zipfile.namelist()
        lenlist = len(namelist)
        if lenlist > 1:
            prefix = os.path.commonprefix(namelist)
            lastslash = prefix.rfind("/")
            if lastslash != -1: prefix = prefix[:lastslash+1]
            else: prefix = ''
        elif lenlist == 1:
            name = namelist[0]
            lastslash = name.rfind("/")
            if lastslash != -1: prefix = name[:lastslash+1]
        namelist = [name[len(prefix):] for name in namelist] # Strip prefix from namelist entries

        zLOG.LOG("Sword Transform", zLOG.INFO, "files in zip=%s" % namelist)
        meta = outdata.getMetadata()
        meta['properties'] = {}
        objects = {}

        containsIndexCnxml = ('index.cnxml' in namelist)
        wordfiles = len([True for m in namelist for e in \
                                ('.odt', '.sxw', '.docx', \
                                '.rtf', '.doc') if m.endswith(e)])
        latexfiles = len([True for m in namelist if m.endswith('.tex')])

        if sum([int(containsIndexCnxml), wordfiles, latexfiles]) > 1:
            # The upload contains more than one transformable file, ie
            # it has a index.cnxml and latex/word content, or it has both latex
            # and word content, or more than one latex or word file.
            raise CNXImportError(
                "Import has more than one transformable file. It has "
                "%d index.cnxml files, %d word files and "
                "%d LaTeX files" % (containsIndexCnxml, wordfiles, latexfiles))

        for modname in namelist:
            if not modname:               # some zip programs show directories by themselves
              continue
            isubdir = modname.find('/')
            if isubdir != -1:             # subdirs, incl. especially 'stylesheets', not imported
              continue
            unzipfile = zipfile.read(prefix + modname)
            if modname == "mets.xml":
                # Write metadata
                zLOG.LOG("Sword Transform", zLOG.INFO, "starting...")
                simplified = XMLService.transform(unzipfile, SWORD2RME_XSL)
                jsonstr = XMLService.transform(simplified, XML2JSON_XSL)
                m = json.decode(jsonstr)
                meta['properties'] = m
            elif modname == "index.cnxml":
                # hook here for featured links
                # elaborate the metadata returned in order to add the featured links.
                meta['featured_links'] = []
                if unzipfile:
                    outdata.setData(StringIO(unzipfile))
                    dom = parseString(unzipfile)
                    groups = dom.getElementsByTagName('link-group')
                    links = meta.get('featured_links', [])
                    for group in groups:
                        group_type = group.getAttribute('type').encode(self.encoding)
                        for link in group.getElementsByTagName('link'):
                            title = link.firstChild.toxml().encode(
                                self.encoding)
                            url = link.getAttribute('url').encode(
                                self.encoding)
                            strength = link.getAttribute('strength').encode(
                                self.encoding)
                            links.append({'url':url,
                                          'title':title,
                                          'type':group_type,
                                          'strength':strength
                                         }
                            )
                        meta['featured_links'] = links
            else:
                if not containsIndexCnxml:
                    if [True for e in ('.odt', '.sxw', '.docx', \
                        '.rtf', '.doc') if modname.endswith(e)]:
                        # This is a word file
                        oo_to_cnxml().convert(unzipfile, outdata, **kwargs)
                    elif modname.endswith('.tex'):
                        # This is LaTeX
                        latex_to_folder().convert(unzipfile, outdata,
                                        original_file_name='sword-import-file.tex',
                                        user_name=kwargs['user_name'])
                        # LaTeX transform returns straight text, make it
                        # a file object
                        outdata.setData(StringIO(outdata.getData()))
                    else:
                        objects[modname] = unzipfile
                else:
                    objects[modname] = unzipfile

        zipfile.close()
        fakefile.close()

        meta = outdata.getMetadata()

        # Add attribution note to the cnxml
        props = meta['properties']
        params = {}
        for key in ('journal', 'year', 'url'):
          if unicode(key) in props:
            value = props[unicode(key)]
            if isinstance(value, unicode):
              value = value.encode('utf-8')
            params[key] = value

        zLOG.LOG("Sword Transform", zLOG.INFO, "attribution dict=%s" % params)
        data = outdata.getData()

        if data and len(data.getvalue()) > 0:
          attributed = XMLService.transform(data.getvalue(), SWORD_INSERT_ATTRIBUTION_XSL, **params)
          outdata.setData(StringIO(unicode(attributed,'utf-8')))
        else:
          zLOG.LOG("Sword Transform", zLOG.INFO, "Skipping adding attributions because no cnxml was generated...")

        #meta['subdirs'] = subdirs.keys()

        objects.update(outdata.getSubObjects())
        outdata.setSubObjects(objects)

        return outdata
     strOOoXml = strInputOOoXml
     doc = XMLService.parseString(strOOoXml)

     objZipFile = zipfile.ZipFile(strZipFile, 'r') # no 'rb' since 'b' => binary?

     #
     # Pass #1 - OOo Xml to OOo Xml xform - change one entry table & remove empty <text:p>
     #
     try:
         styles_xml = objZipFile.read('styles.xml')
         (tmpsfile, tmpsname) = tempfile.mkstemp('.OOo')
         os.write(tmpsfile, styles_xml)
         os.close(tmpsfile)
         stylesPath=tmpsname

         strOutputMassageOOoXml = XMLService.transform(strOOoXml, OO2OO_XSL, stylesPath=stylesPath)
         if len(strOutputMassageOOoXml) > 0:
             print "**** suceeded in OO 2 OO XSL transform to remove empty paragraphs."
         else:
             print "**** failed in OO 2 OO XSL transform to remove empty paragraphs. return an empty string. ignore and continue.\n" + str(strErrorMsg)
             strOutputMassageOOoXml = strOOoXml

         os.remove(tmpsname)
     except XMLService.XMLParserError, strErrorMsg:
         print "**** failed in OO 2 OO XSL transform to remove empty paragraphs. raised exception. ignore and continue.\n" + str(strErrorMsg)
         strOutputMassageOOoXml = strOOoXml
         #raise

     # print "wrting to : '" + strOutputMassageOOoXmlFileBase + '.oo2oo.xml'
     fileMassagedOutputXml = open(strOutputMassageOOoXmlFileBase + '.oo2oo.xml', "w")
     fileMassagedOutputXml.write(str(strOutputMassageOOoXml))
 def SearchableText(self):
     """Return the text of the module for searching"""
     content = self.getDefaultFile().getSource()
     bare = XMLService.transform(content,baretext)
     return bare