Exemplo n.º 1
0
class TransformTool(UniqueObject, ActionProviderBase, Folder):

    id = 'portal_transforms'
    meta_type = id.title().replace('_', ' ')
    isPrincipiaFolderish = 1 # Show up in the ZMI

    __implements__ = iengine
    implements(IPortalTransformsTool)

    meta_types = all_meta_types = (
        { 'name'   : 'Transform',
          'action' : 'manage_addTransformForm'},
        { 'name'   : 'TransformsChain',
          'action' : 'manage_addTransformsChainForm'},
        )

    manage_addTransformForm = PageTemplateFile('addTransform', _www)
    manage_addTransformsChainForm = PageTemplateFile('addTransformsChain', _www)
    manage_cacheForm = PageTemplateFile('setCacheTime', _www)
    manage_editTransformationPolicyForm = PageTemplateFile('editTransformationPolicy', _www)
    manage_reloadAllTransforms = PageTemplateFile('reloadAllTransforms', _www)

    manage_options = ((Folder.manage_options[0],) + Folder.manage_options[2:] +
                      (
        { 'label'   : 'Caches',
          'action' : 'manage_cacheForm'},
        { 'label'   : 'Policy',
          'action' : 'manage_editTransformationPolicyForm'},
        { 'label'   : 'Reload transforms',
          'action' : 'manage_reloadAllTransforms'},
        )
                      )

    security = ClassSecurityInfo()

    def __init__(self, policies=None, max_sec_in_cache=3600):
        self._mtmap = PersistentMapping()
        self._policies = policies or PersistentMapping()
        self.max_sec_in_cache = max_sec_in_cache
        self._new_style_pt = 1

    # mimetype oriented conversions (iengine interface) ########################

    def unregisterTransform(self, name):
        """ unregister a transform
        name is the name of a registered transform
        """
        self._unmapTransform(getattr(self, name))
        if name in self.objectIds():
            self._delObject(name)


    def convertTo(self, target_mimetype, orig, data=None, object=None,
                  usedby=None, context=None, **kwargs):
        """Convert orig to a given mimetype

        * orig is an encoded string

        * data an optional idatastream object. If None a new datastream will be
        created and returned

        * optional object argument is the object on which is bound the data.
        If present that object will be used by the engine to bound cached data.

        * additional arguments (kwargs) will be passed to the transformations.
        Some usual arguments are : filename, mimetype, encoding

        return an object implementing idatastream or None if no path has been
        found.
        """
        target_mimetype = str(target_mimetype)

        if object is not None:
            cache = Cache(object)
            data = cache.getCache(target_mimetype)
            if data is not None:
                time, data = data
                if self.max_sec_in_cache == 0 or time < self.max_sec_in_cache:
                    return data

        if data is None:
            data = self._wrap(target_mimetype)

        registry = getToolByName(self, 'mimetypes_registry')

        if not getattr(aq_base(registry), 'classify', None):
            # avoid problems when importing a site with an old mimetype registry
            # XXX return None or orig?
            return None

        orig_mt = registry.classify(orig,
                                    mimetype=kwargs.get('mimetype'),
                                    filename=kwargs.get('filename'))
        orig_mt = str(orig_mt)
        if not orig_mt:
            log('Unable to guess input mime type (filename=%s, mimetype=%s)' %(
                kwargs.get('mimetype'), kwargs.get('filename')), severity=DEBUG)
            return None

        target_mt = registry.lookup(target_mimetype)
        if target_mt:
            target_mt = target_mt[0]
        else:
            log('Unable to match target mime type %s'% str(target_mimetype),
                severity=DEBUG)
            return None

        ## fastpath
        # If orig_mt and target_mt are the same, we only allow
        # a one-hop transform, a.k.a. filter.
        # XXX disabled filtering for now
        filter_only = False
        if orig_mt == str(target_mt):
            filter_only = True
            data.setData(orig)
            md = data.getMetadata()
            md['mimetype'] = str(orig_mt)
            if object is not None:
                cache.setCache(str(target_mimetype), data)
            return data

        ## get a path to output mime type
        requirements = self._policies.get(str(target_mt), [])
        path = self._findPath(orig_mt, target_mt, list(requirements))
        if not path and requirements:
            log('Unable to satisfy requirements %s' % ', '.join(requirements),
                severity=DEBUG)
            path = self._findPath(orig_mt, target_mt)

        if not path:
            log('NO PATH FROM %s TO %s : %s' % (orig_mt, target_mimetype, path),
                severity=DEBUG)
            return None #XXX raise TransformError

        if len(path) > 1:
            ## create a chain on the fly (sly)
            transform = chain()
            for t in path:
                transform.registerTransform(t)
        else:
            transform = path[0]

        result = transform.convert(orig, data, context=context, usedby=usedby, **kwargs)
        assert(idatastream.isImplementedBy(result),
               'result doesn\'t implemented idatastream')
        self._setMetaData(result, transform)

        # set cache if possible
        if object is not None and result.isCacheable():
            cache.setCache(str(target_mimetype), result)

        # return idatastream object
        return result

    security.declarePublic('convertToData')
    def convertToData(self, target_mimetype, orig, data=None, object=None,
                      usedby=None, context=None, **kwargs):
        """Convert to a given mimetype and return the raw data
        ignoring subobjects. see convertTo for more information
        """
        data =self.convertTo(target_mimetype, orig, data, object, usedby,
                       context, **kwargs)
        if data:
            return data.getData()
        return None

    security.declarePublic('convert')
    def convert(self, name, orig, data=None, context=None, **kwargs):
        """run a tranform of a given name on data

        * name is the name of a registered transform

        see convertTo docstring for more info
        """
        if not data:
            data = self._wrap(name)
        try:
            transform = getattr(self, name)
        except AttributeError:
            raise Exception('No such transform "%s"' % name)
        data = transform.convert(orig, data, context=context, **kwargs)
        self._setMetaData(data, transform)
        return data


    def __call__(self, name, orig, data=None, context=None, **kwargs):
        """run a transform by its name, returning the raw data product

        * name is the name of a registered transform.

        return an encoded string.
        see convert docstring for more info on additional arguments.
        """
        data = self.convert(name, orig, data, context, **kwargs)
        return data.getData()


    # utilities ###############################################################

    def _setMetaData(self, datastream, transform):
        """set metadata on datastream according to the given transform
        (mime type and optionaly encoding)
        """
        md = datastream.getMetadata()
        if hasattr(transform, 'output_encoding'):
            md['encoding'] = transform.output_encoding
        md['mimetype'] = transform.output

    def _wrap(self, name):
        """wrap a data object in an icache"""
        return datastream(name)

    def _unwrap(self, data):
        """unwrap data from an icache"""
        if idatastream.isImplementedBy(data):
            data = data.getData()
        return data

    def _mapTransform(self, transform):
        """map transform to internal structures"""
        registry = getToolByName(self, 'mimetypes_registry')
        inputs = getattr(transform, 'inputs', None)
        if not inputs:
            raise TransformException('Bad transform %s : no input MIME type' %
                                     (transform))
        for i in inputs:
            mts = registry.lookup(i)
            if not mts:
                msg = 'Input MIME type %r for transform %s is not registered '\
                      'in the MIME types registry' % (i, transform.name())
                raise TransformException(msg)
            for mti in mts:
                for mt in mti.mimetypes:
                    mt_in = self._mtmap.setdefault(mt, PersistentMapping())
                    output = getattr(transform, 'output', None)
                    if not output:
                        msg = 'Bad transform %s : no output MIME type'
                        raise TransformException(msg % transform.name())
                    mto = registry.lookup(output)
                    if not mto:
                        msg = 'Output MIME type %r for transform %s is not '\
                              'registered in the MIME types registry' % \
                              (output, transform.name())
                        raise TransformException(msg)
                    if len(mto) > 1:
                        msg = 'Wildcarding not allowed in transform\'s output '\
                              'MIME type'
                        raise TransformException(msg)

                    for mt2 in mto[0].mimetypes:
                        try:
                            if not transform in mt_in[mt2]:
                                mt_in[mt2].append(transform)
                        except KeyError:
                            mt_in[mt2] = PersistentList([transform])

    def _unmapTransform(self, transform):
        """unmap transform from internal structures"""
        registry = getToolByName(self, 'mimetypes_registry')
        for i in transform.inputs:
            for mti in registry.lookup(i):
                for mt in mti.mimetypes:
                    mt_in = self._mtmap.get(mt, {})
                    output = transform.output
                    mto = registry.lookup(output)
                    for mt2 in mto[0].mimetypes:
                        l = mt_in[mt2]
                        for i in range(len(l)):
                            if transform.name() == l[i].name():
                                l.pop(i)
                                break
                        else:
                            log('Can\'t find transform %s from %s to %s' % (
                                transform.name(), mti, mt),
                                severity=DEBUG)

    def _findPath(self, orig, target, required_transforms=()):
        """return the shortest path for transformation from orig mimetype to
        target mimetype
        """
        path = []

        if not self._mtmap:
            return None

        # naive algorithm :
        #  find all possible paths with required transforms
        #  take the shortest
        #
        # it should be enough since we should not have so much possible paths
        shortest, winner = 9999, None
        for path in self._getPaths(str(orig), str(target), required_transforms):
            if len(path) < shortest:
                winner = path
                shortest = len(path)

        return winner

    def _getPaths(self, orig, target, requirements, path=None, result=None):
        """return a all path for transformation from orig mimetype to
        target mimetype
        """
        if path is None:
            result = []
            path = []
            requirements = list(requirements)
        outputs = self._mtmap.get(orig)
        if outputs is None:
            return result
        path.append(None)
        for o_mt, transforms in outputs.items():
            for transform in transforms:
                required = 0
                name = transform.name()
                if name in requirements:
                    requirements.remove(name)
                    required = 1
                if transform in path:
                    # avoid infinite loop...
                    continue
                path[-1] = transform
                if o_mt == target:
                    if not requirements:
                        result.append(path[:])
                else:
                    self._getPaths(o_mt, target, requirements, path, result)
                if required:
                    requirements.append(name)
        path.pop()

        return result

    security.declarePrivate('manage_afterAdd')
    def manage_afterAdd(self, item, container):
        """ overload manage_afterAdd to finish initialization when the
        transform tool is added
        """
        Folder.manage_afterAdd(self, item, container)
        transforms.initialize(self)
        # XXX required?
        #try:
        #    # first initialization
        #    transforms.initialize(self)
        #except:
        #    # may fail on copy
        #    pass

    security.declareProtected(ManagePortal, 'manage_addTransform')
    def manage_addTransform(self, id, module, REQUEST=None):
        """ add a new transform to the tool """
        transform = Transform(id, module)
        self._setObject(id, transform)
        self._mapTransform(transform)
        if REQUEST is not None:
            REQUEST['RESPONSE'].redirect(self.absolute_url()+'/manage_main')

    security.declareProtected(ManagePortal, 'manage_addTransform')
    def manage_addTransformsChain(self, id, description, REQUEST=None):
        """ add a new transform to the tool """
        transform = TransformsChain(id, description)
        self._setObject(id, transform)
        self._mapTransform(transform)
        if REQUEST is not None:
            REQUEST['RESPONSE'].redirect(self.absolute_url()+'/manage_main')

    security.declareProtected(ManagePortal, 'manage_addTransform')
    def manage_setCacheValidityTime(self, seconds, REQUEST=None):
        """set  the lifetime of cached data in seconds"""
        self.max_sec_in_cache = int(seconds)
        if REQUEST is not None:
            REQUEST['RESPONSE'].redirect(self.absolute_url()+'/manage_main')

    security.declareProtected(ManagePortal, 'reloadTransforms')
    def reloadTransforms(self, ids=()):
        """ reload transforms with the given ids
        if no ids, reload all registered transforms

        return a list of (transform_id, transform_module) describing reloaded
        transforms
        """
        if not ids:
            ids = self.objectIds()
        reloaded = []
        for id in ids:
            o = getattr(self, id)
            o.reload()
            reloaded.append((id, o.module))
        return reloaded

    # Policy handling methods #################################################

    def manage_addPolicy(self, output_mimetype, required_transforms, REQUEST=None):
        """ add a policy for a given output mime types"""
        registry = getToolByName(self, 'mimetypes_registry')
        if not registry.lookup(output_mimetype):
            raise TransformException('Unknown MIME type')
        if self._policies.has_key(output_mimetype):
            msg = 'A policy for output %s is yet defined' % output_mimetype
            raise TransformException(msg)

        required_transforms = tuple(required_transforms)
        self._policies[output_mimetype] = required_transforms
        if REQUEST is not None:
            REQUEST['RESPONSE'].redirect(self.absolute_url()+'/manage_editTransformationPolicyForm')

    def manage_delPolicies(self, outputs, REQUEST=None):
        """ remove policies for given output mime types"""
        for mimetype in outputs:
            del self._policies[mimetype]
        if REQUEST is not None:
            REQUEST['RESPONSE'].redirect(self.absolute_url()+'/manage_editTransformationPolicyForm')

    def listPolicies(self):
        """ return the list of defined policies

        a policy is a 2-uple (output_mime_type, [list of required transforms])
        """
        # XXXFIXME: backward compat, should be removed latter
        if not hasattr(self, '_policies'):
            self._policies = PersistentMapping()
        return self._policies.items()

    # mimetype oriented conversions (iengine interface) ########################

    def registerTransform(self, transform):
        """register a new transform

        transform isn't a Zope Transform (the wrapper) but the wrapped transform
        the persistence wrapper will be created here
        """
        # needed when call from transform.transforms.initialize which
        # register non zope transform
        module = str(transform.__module__)
        transform = Transform(transform.name(), module, transform)
        if not itransform.isImplementedBy(transform):
            raise TransformException('%s does not implement itransform' % transform)
        name = transform.name()
        __traceback_info__ = (name, transform)
        if name not in self.objectIds():
            self._setObject(name, transform)
            self._mapTransform(transform)

    security.declareProtected(ManagePortal, 'ZopeFind')
    def ZopeFind(self, *args, **kwargs):
        """Don't break ZopeFind feature when a transform can't be loaded
        """
        try:
            return Folder.ZopeFind(self, *args, **kwargs)
        except MissingBinary:
            log('ZopeFind: catched MissingBinary exception')

    security.declareProtected(View, 'objectItems')
    def objectItems(self, *args, **kwargs):
        """Don't break ZopeFind feature when a transform can't be loaded
        """
        try:
            return Folder.objectItems(self, *args, **kwargs)
        except MissingBinary:
            log('objectItems: catched MissingBinary exception')
            return []

    # available mimetypes ####################################################
    def listAvailableTextInputs(self):
        """ Returns a list of mimetypes that can be used as input for textfields
            by building a list of the inputs beginning with "text/" of all transforms.
        """
        available_types = []
        candidate_transforms = [object[1] for object in self.objectItems()]
        for candidate in candidate_transforms:
            for input in candidate.inputs:
                if input.startswith("text/") and input not in available_types:
                    available_types.append(input)
        return available_types
Exemplo n.º 2
0
class MimeTypesRegistry(UniqueObject, ActionProviderBase, Folder):
    """Mimetype registry that deals with
    a) registering types
    b) wildcarding of rfc-2046 types
    c) classifying data into a given type
    """

    __implements__ = (IMimetypesRegistry, ISourceAdapter)
    implements(IMimetypesRegistryTool)

    id        = 'mimetypes_registry'
    meta_type = 'MimeTypes Registry'
    isPrincipiaFolderish = 1 # Show up in the ZMI

    meta_types = all_meta_types = (
        { 'name'   : 'MimeType',
          'action' : 'manage_addMimeTypeForm'},
        )

    manage_options = (
        ( { 'label'   : 'MimeTypes',
            'action' : 'manage_main'},) +
        Folder.manage_options[2:]
        )

    manage_addMimeTypeForm = PageTemplateFile('addMimeType', _www)
    manage_main = PageTemplateFile('listMimeTypes', _www)
    manage_editMimeTypeForm = PageTemplateFile('editMimeType', _www)

    security = ClassSecurityInfo()

    # FIXME
    __allow_access_to_unprotected_subobjects__ = 1

    def __init__(self,):
        self.encodings_map = encodings_map.copy()
        self.suffix_map = suffix_map.copy()
        # Major key -> minor IMimetype objects
        self._mimetypes  = PersistentMapping()
        # ext -> IMimetype mapping
        self.extensions = PersistentMapping()
        # glob -> (regex, mimetype) mapping
        self.globs = OOBTree()
        self.manage_addProperty('defaultMimetype', 'text/plain', 'string')
        self.manage_addProperty('unicodePolicies', 'strict ignore replace',
                                'tokens')
        self.manage_addProperty('unicodePolicy', 'unicodePolicies', 'selection')
        self.manage_addProperty('fallbackEncoding', 'latin1', 'string')

        # initialize mime types
        initialize(self)
        self._new_style_mtr = 1

    security.declareProtected(ManagePortal, 'register')
    def register(self, mimetype):
        """ Register a new mimetype

        mimetype must implement IMimetype
        """
        mimetype = aq_base(mimetype)
        assert IMimetype.isImplementedBy(mimetype)
        for t in mimetype.mimetypes:
            self.register_mimetype(t, mimetype)
        for extension in mimetype.extensions:
            self.register_extension(extension, mimetype)
        for glob in mimetype.globs:
            self.register_glob(glob, mimetype)

    security.declareProtected(ManagePortal, 'register_mimetype')
    def register_mimetype(self, mt, mimetype):
        major, minor = split(mt)
        if not major or not minor or minor == '*':
            raise MimeTypeException('Can\'t register mime type %s' % mt)
        group = self._mimetypes.setdefault(major, PersistentMapping())
        if group.has_key(minor):
            if group.get(minor) != mimetype:
                log('Warning: redefining mime type %s (%s)' % (
                    mt, mimetype.__class__))
        group[minor] = mimetype

    security.declareProtected(ManagePortal, 'register_extension')
    def register_extension(self, extension, mimetype):
        """ Associate a file's extension to a IMimetype

        extension is a string representing a file extension (not
        prefixed by a dot) mimetype must implement IMimetype
        """
        mimetype = aq_base(mimetype)
        if self.extensions.has_key(extension):
            if self.extensions.get(extension) != mimetype:
                log('Warning: redefining extension %s from %s to %s' % (
                    extension, self.extensions[extension], mimetype))
        # we don't validate fmt yet, but its ["txt", "html"]
        self.extensions[extension] = mimetype

    security.declareProtected(ManagePortal, 'register_glob')
    def register_glob(self, glob, mimetype):
        """ Associate a glob to a IMimetype

        glob is a shell-like glob that will be translated to a regex
        to match against whole filename.
        mimetype must implement IMimetype.
        """
        globs = getattr(self, 'globs', None)
        if globs is None:
            self.globs = globs = OOBTree()
        mimetype = aq_base(mimetype)
        existing = globs.get(glob)
        if existing is not None:
            regex, mt = existing
            if mt != mimetype:
                log('Warning: redefining glob %s from %s to %s' % (
                    glob, mt, mimetype))
        # we don't validate fmt yet, but its ["txt", "html"]
        pattern = re.compile(fnmatch.translate(glob))
        globs[glob] = (pattern, mimetype)

    security.declareProtected(ManagePortal, 'unregister')
    def unregister(self, mimetype):
        """ Unregister a new mimetype

        mimetype must implement IMimetype
        """
        assert IMimetype.isImplementedBy(mimetype)
        for t in mimetype.mimetypes:
            major, minor = split(t)
            group = self._mimetypes.get(major, {})
            if group.get(minor) == mimetype:
                del group[minor]
        for e in mimetype.extensions:
            if self.extensions.get(e) == mimetype:
                del self.extensions[e]
        globs = getattr(self, 'globs', None)
        if globs is not None:
            for glob in mimetype.globs:
                existing = globs.get(glob)
                if existing is None:
                    continue
                regex, mt = existing
                if mt == mimetype:
                    del globs[glob]

    security.declarePublic('mimetypes')
    def mimetypes(self):
        """Return all defined mime types, each one implements at least
        IMimetype
        """
        res = {}
        for g in self._mimetypes.values():
            for mt in g.values():
                res[mt] =1
        return [aq_base(mtitem) for mtitem in res.keys()]


    security.declarePublic('list_mimetypes')
    def list_mimetypes(self):
        """Return all defined mime types, as string"""
        return [str(mt) for mt in self.mimetypes()]

    security.declarePublic('lookup')
    def lookup(self, mimetypestring):
        """Lookup for IMimetypes object matching mimetypestring

        mimetypestring may have an empty minor part or containing a
        wildcard (*) mimetypestring may and IMimetype object (in this
        case it will be returned unchanged

        Return a list of mimetypes objects associated with the
        RFC-2046 name return an empty list if no one is known.
        """
        if IMimetype.isImplementedBy(mimetypestring):
            return (aq_base(mimetypestring), )
        __traceback_info__ = (repr(mimetypestring), str(mimetypestring))
        major, minor = split(str(mimetypestring))
        group = self._mimetypes.get(major, {})
        if not minor or minor == '*':
            res = group.values()
        else:
            res = group.get(minor)
            if res:
                res = (res,)
            else:
                return ()
        return tuple([aq_base(mtitem) for mtitem in res])

    security.declarePublic('lookupExtension')
    def lookupExtension(self, filename):
        """Lookup for IMimetypes object matching filename

        Filename maybe a file name like 'content.txt' or an extension
        like 'rest'

        Return an IMimetype object associated with the file's
        extension or None
        """
        if filename.find('.') != -1:
            base, ext = os.path.splitext(filename)
            ext = ext[1:] # remove the dot
            while self.suffix_map.has_key(ext):
                base, ext = os.path.splitext(base + self.suffix_map[ext])
                ext = ext[1:] # remove the dot
        else:
            ext = filename
            base = None

        # XXX This code below make no sense and may break because base
        # isn't defined.
        if self.encodings_map.has_key(ext) and base:
            encoding = self.encodings_map[ext]
            base, ext = os.path.splitext(base)
            ext = ext[1:] # remove the dot
        else:
            encoding = None
        return aq_base(self.extensions.get(ext))

    security.declarePublic('globFilename')
    def globFilename(self, filename):
        """Lookup for IMimetypes object matching filename

        Filename must be a complete filename with extension.

        Return an IMimetype object associated with the glob's or None
        """
        globs = getattr(self, 'globs', None)
        if globs is None:
            return None
        for key in globs.keys():
            glob, mimetype = globs[key]
            if glob.match(filename):
                return aq_base(mimetype)
        return None

    security.declarePublic('lookupGlob')
    def lookupGlob(self, glob):
        globs = getattr(self, 'globs', None)
        if globs is None:
            return None
        return aq_base(globs.get(glob))

    def _classifiers(self):
        return [mt for mt in self.mimetypes() if IClassifier.isImplementedBy(mt)]

    security.declarePublic('classify')
    def classify(self, data, mimetype=None, filename=None):
        """Classify works as follows:
        1) you tell me the rfc-2046 name and I give you an IMimetype
           object
        2) the filename includes an extension from which we can guess
           the mimetype
        3) we can optionally introspect the data
        4) default to self.defaultMimetype if no data was provided
           else to application/octet-stream of no filename was provided,
           else to text/plain

        Return an IMimetype object or None 
        """
        mt = None
        if mimetype:
            mt = self.lookup(mimetype)
            if mt:
                mt = mt[0]
        elif filename:
            mt = self.lookupExtension(filename)
            if mt is None:
                mt = self.globFilename(filename)
        if data and not mt:
            for c in self._classifiers():
                if c.classify(data):
                    mt = c
                    break
            if not mt:
                mstr = magic.guessMime(data)
                if mstr:
                    mt = self.lookup(mstr)[0]
        if not mt:
            if not data:
                mtlist = self.lookup(self.defaultMimetype)
            elif filename:
                mtlist = self.lookup('application/octet-stream')
            else:
                failed = 'text/x-unknown-content-type'
                filename = filename or ''
                data = data or ''
                ct, enc = guess_content_type(filename, data, None)
                if ct == failed:
                    ct = 'text/plain'
                mtlist = self.lookup(ct)
            if len(mtlist)>0:
                mt = mtlist[0]
            else:
                return None

        # Remove acquisition wrappers
        return aq_base(mt)

    def __call__(self, data, **kwargs):
        """ Return a triple (data, filename, mimetypeobject) given
        some raw data and optional paramters

        method from the isourceAdapter interface
        """
        mimetype = kwargs.get('mimetype', None)
        filename = kwargs.get('filename', None)
        encoding = kwargs.get('encoding', None)
        mt = None
        if hasattr(data, 'filename'):
            filename = os.path.basename(data.filename)
        elif hasattr(data, 'name'):
            filename = os.path.basename(data.name)

        if hasattr(data, 'read'):
            _data = data.read()
            if hasattr(data, 'seek'):
                data.seek(0)
            data = _data

        # We need to figure out if data is binary and skip encoding if
        # it is
        mt = self.classify(data, mimetype=mimetype, filename=filename)

        if not mt.binary and not type(data) is UnicodeType:
            # if no encoding specified, try to guess it from data
            if encoding is None:
                encoding = self.guess_encoding(data)

            # ugly workaround for
            # https://sourceforge.net/tracker/?func=detail&aid=1068001&group_id=75272&atid=543430
            # covered by
            # https://sourceforge.net/tracker/?func=detail&atid=355470&aid=843590&group_id=5470
            # dont remove this code unless python is fixed.
            if encoding is "macintosh":
                encoding = 'mac_roman'

            try:
                try:
                    data = unicode(data, encoding, self.unicodePolicy)
                except (ValueError, LookupError):
                    # wrong unicodePolicy
                    data = unicode(data, encoding)
            except:
                data = unicode(data, self.fallbackEncoding)

        return (data, filename, aq_base(mt))

    security.declarePublic('guess_encoding')
    def guess_encoding(self, data):
        """ Try to guess encoding from a text value if no encoding
        guessed, used the default charset from site properties (Zope)
        with a fallback to UTF-8 (should never happen with correct
        site_properties, but always raise Attribute error without
        Zope)
        """
        if type(data) is type(u''):
            # data maybe unicode but with another encoding specified
            data = data.encode('UTF-8')
        encoding = guess_encoding(data)
        if encoding is None:
            try:
                site_props = self.portal_properties.site_properties
                encoding = site_props.getProperty('default_charset', 'UTF-8')
            except:
                encoding = 'UTF-8'
        return encoding

    security.declareProtected(ManagePortal, 'manage_delObjects')
    def manage_delObjects(self, ids, REQUEST=None):
        """ delete the selected mime types """
        for id in ids:
            self.unregister(self.lookup(id)[0])
        if REQUEST is not None:
            REQUEST['RESPONSE'].redirect(self.absolute_url()+'/manage_main')

    security.declareProtected(ManagePortal, 'manage_addMimeType')
    def manage_addMimeType(self, id, mimetypes, extensions, icon_path,
                           binary=0, globs=None, REQUEST=None):
        """add a mime type to the tool"""
        mt = MimeTypeItem(id, mimetypes, extensions=extensions,
                          binary=binary, icon_path=icon_path, globs=globs)
        self.register(mt)
        if REQUEST is not None:
            REQUEST['RESPONSE'].redirect(self.absolute_url()+'/manage_main')

    security.declareProtected(ManagePortal, 'manage_editMimeType')
    def manage_editMimeType(self, name, new_name, mimetypes, extensions,
                            icon_path, binary=0, globs=None, REQUEST=None):
        """Edit a mime type by name
        """
        mt = self.lookup(name)[0]
        self.unregister(mt)
        mt.edit(new_name, mimetypes, extensions, icon_path=icon_path,
                binary=binary, globs=globs)
        self.register(mt)
        if REQUEST is not None:
            REQUEST['RESPONSE'].redirect(self.absolute_url()+'/manage_main')