示例#1
0
    async def get_mimetype(self):
        """Return the mimetype for the file."""
        if self._mimetype:
            return self._mimetype

        try:
            results = puremagic.magic_string(await self.get_file_bytes())
        except puremagic.PureError:
            # If no results return none
            return ""

        # If for some reason we get a len 0 list
        if not results:  # pragma: nocover
            return ""

        # If we only have one result use it.
        if len(results) == 1:  # pragma: nocover
            return results[0].mime_type

        # If we have multiple matches with the same confidence, pick one that
        # actually has a mime_type.
        confidence = results[0].confidence
        results = filter(lambda x: x.confidence == confidence, results)
        results = list(filter(lambda x: bool(x.mime_type), results))
        return results[0].mime_type
 def test_magic_string_with_filename_hint(self):
     """String identification: magic_string with hint|"""
     filename = os.path.join(OFFICE_DIR, "test.xlsx")
     with open(filename, "rb") as f:
         data = f.read()
     ext = puremagic.magic_string(data, filename=filename)
     self.assertEqual(".xlsx", ext[0].extension)
 def test_magic_string_with_filename_hint(self):
     """String identification: magic_string with hint|"""
     filename = os.path.join(OFFICE_DIR, "test.xlsx")
     with open(filename, "rb") as f:
         data = f.read()
     ext = puremagic.magic_string(data, filename=filename)
     self.assertEqual(".xlsx", ext[0][0])
示例#4
0
 def mime_type(self):
     if self._mime_type:
         return self._mime_type
     else:
         if self.auto_detect_mime_type:
             if self.path and os.path.isfile(self.path) \
             and os.access(self.path, os.R_OK):
                 m = find_mime_type_recursive(
                     puremagic.magic_file(self.path))
                 if m:
                     return m
                 else:
                     logger.warning(
                         "Can't detect mime type of file '%s'. Using default mime type: %s"
                         % (self.path, self.default_mime_type))
                     return self.default_mime_type
             elif not self.path and self.body:
                 try:
                     m = find_mime_type_recursive(
                         puremagic.magic_string(self.body))
                 except PureError:
                     m = None
                 if m:
                     return m
                 else:
                     logger.warning(
                         "Can't detect mime type of body. Using default mime type: %s"
                         % self.default_mime_type)
                     return self.default_mime_type
             else:
                 logger.warning(
                     "File '%s' isn't readable. Skipping mime type auto detection, using default mime type: %s"
                     % (self.path, self.default_mime_type))
                 return self.default_mime_type
         else:
             return self.default_mime_type
 def test_string_with_confidence(self):
     """String identification: magic_string          |"""
     ext = puremagic.magic_string(bytes(self.mp4magic))
     self.assertEqual(self.expect_ext, ext[0][0])
     self.assertRaises(ValueError, puremagic.magic_string, "")
 def test_string_with_confidence(self):
     """String identification: magic_string          |"""
     ext = puremagic.magic_string(bytes(self.mp4magic))
     self.assertEqual(self.expect_ext, ext[0].extension)
     self.assertRaises(ValueError, puremagic.magic_string, "")
 def test_string_with_confidence(self):
     """String identification: magic_string          |"""
     ext = puremagic.magic_string(bytes(self.mp4magic))
     self.assertEqual(self.expect_ext, ext[1][0][0])
示例#8
0
def fetch_remote_file_to_storage(remote_url,
                                 upload_to='',
                                 allowed_mime_types=()):
    """
    Fetches a remote url, and stores it in DefaultStorage
    :return: (status_code, new_storage_name)
    """
    SVG_MIME_TYPE = 'image/svg+xml'

    if not allowed_mime_types:
        raise SuspiciousFileOperation("allowed mime types must be passed in")

    magic_strings = None
    content = None
    status_code = None

    if _is_data_uri(remote_url):
        # data:[<MIME-type>][;charset=<encoding>][;base64],<data>
        # finds the end of the substring 'base64' adds one more to get the comma as well.
        base64_image_from_data_uri = remote_url[
            (re.search('base64', remote_url).end()) + 1:]
        content = decoded_test = base64.b64decode(base64_image_from_data_uri)
        magic_strings = puremagic.magic_string(decoded_test)
        status_code = 200

    store = DefaultStorage()

    if magic_strings is None:
        r = requests.get(remote_url, stream=True)
        if r.status_code == 200:
            magic_strings = puremagic.magic_string(r.content)
            content = r.content
            status_code = r.status_code

    if magic_strings and content:
        derived_mime_type = None
        derived_ext = None
        stripped_svg_string = None

        for magic_string in magic_strings:
            if getattr(magic_string, 'mime_type', None) in allowed_mime_types:
                derived_mime_type = getattr(magic_string, 'mime_type', None)
                derived_ext = getattr(magic_string, 'extension', None)
                break

        if not derived_mime_type and re.search(
                b'<svg', content[:1024]) and content.strip()[-6:] == b'</svg>':
            derived_mime_type = SVG_MIME_TYPE
            derived_ext = '.svg'

        if derived_mime_type == SVG_MIME_TYPE:
            stripped_svg_element = ET.fromstring(content)
            scrubSvgElementTree(stripped_svg_element)
            stripped_svg_string = ET.tostring(stripped_svg_element)

        if derived_mime_type not in allowed_mime_types:
            raise SuspiciousFileOperation(
                "{} is not an allowed mime type for upload".format(
                    derived_mime_type))

        if not derived_ext:
            raise SuspiciousFileOperation(
                "could not determine a file extension")

        string_to_write_to_file = stripped_svg_string or content

        storage_name = '{upload_to}/cached/{filename}{ext}'.format(
            upload_to=upload_to,
            filename=hashlib.sha256(string_to_write_to_file).hexdigest(),
            ext=derived_ext)

        if not store.exists(storage_name):
            buf = io.BytesIO(string_to_write_to_file)
            store.save(storage_name, buf)
        return status_code, storage_name
    return status_code, None