def doc_title_with_good_suffix(doc):
    """Return title (perhaps unchanged) which reflects the MIME type.

    For example, a text/plain 'Readme' will be mapped to 'Readme.txt'."""
    orig_filename, orig_ext = os.path.splitext(originals_filename_for_doc(doc))
    wpc = _check_for_webpage_complete(doc.originals_path())
    if wpc:
        orig_filespec, orig_ext = os.path.splitext(wpc)     
    filename = doc.get_metadata('title') or doc.original_name() or doc.id
    filename = os.path.basename(filename).strip(' \t.,-;')
    filename = re.sub('[^A-Za-z0-9 \._-]+', '_', filename)  # sanitize charset
    assert '\r' not in filename
    assert '\n' not in filename
    orig_ext = orig_ext.lower()
    # e.g. for 'ReadMe', orig_ext == ''
    if orig_ext and \
            filename.lower().endswith(orig_ext) and \
            is_good_extension(orig_ext):
        return filename
    # Popular entries from mimetypes.common_types not found in CONTENT_TYPES:
    #  image/pict .{pct,pic,pict}, application/rtf .rtf (no uplib parser)
    # Popular entries from mimetypes.types_map not found in CONTENT_TYPES:
    #  audio .aiff + many more, video .avi + many more, text .bat .h .c .css,
    #  html .htm .xml, msword .dot, image .bmp .jpeg .tiff, postscript .eps

    # Originals lacked an extension, or had a bad extension.  Synthesize one.
    fallback_ext = get_extension_for_type(doc.get_metadata('apparent-mime-type') or \
                                          doc.get_metadata('content-type') or \
                                          'text/plain')
    ext = ((orig_ext and is_good_extension(orig_ext) and orig_ext) or fallback_ext)
    assert is_good_extension(ext)
    if not ext.startswith("."):
        ext = "." + ext
    return filename + ext
def is_good_extension(ext):
    """Boolean, true if uploading with this file extension could succeed.

    That is, after mapping the extension to a content-type, we will want
    extensions/UploadDocument.py's _add_internal to find it in CONTENT_TYPES.
    """
    if ext.startswith('.'):  # e.g. might be ''
        ext = ext[1:]
    # get_content_type() punts on '.JPG' &c., letting mimetypes.guess_type() deal with it.
    ext = ext.lower()
    ct = get_content_type('foo.' + ext)
    if ct == 'application/octet-stream':
        return False  # This binary type is absolutely unacceptable to _add_internal.
    return get_extension_for_type(ct) == ext and ext in CONTENT_TYPES.values()
def _separate_images (html):
    images = {}
    counter = 0
    m = _DATA_URI_PATTERN.search(html)
    while m:
        maintype = m.group("maintype")
        subtype = m.group("subtype")
        encoding = m.group("encoding")
        params = m.group("params")
        data = m.group("data")
        content_type = "%s/%s" % (maintype, subtype)
        if encoding == "base64":
            data = base64.decodestring(data)
        image_name = "image-%s.%s" % (counter, get_extension_for_type(content_type))
        counter += 1
        images[image_name] = (content_type, data)
        html = html[:m.start()] + ('src="images/%s"' % image_name) + html[m.end():]
        m = _DATA_URI_PATTERN.search(html)
    return html, images