def match_video_audio_type_pattern(resource: bytes) -> MIMEType: """ Determines if a resource matches an audio or video type pattern as specificed in: https://mimesniff.spec.whatwg.org/#matching-an-audio-or-video-type-pattern Return Mimetype object if it does or const.UNDEFINED otherwise.""" mime_type = match_pattern_from_table(resource, const.AUDIO_VIDEO_PATTERNS) if mime_type != const.UNDEFINED: return mime_type if is_mp4_pattern(resource): return parse_mime_type('video/mp4') if is_webm_pattern(resource): return parse_mime_type('video/webm') if is_mp3_pattern(resource): return parse_mime_type('audio/mpeg') return mime_type
def test_parse_mime_type_basic(): """Tests whether basic parsing works""" test_string = "text/html" mimetype_obj = mimetype.parse_mime_type(test_string) assert mimetype_obj.type == "text" assert mimetype_obj.subtype == "html"
def test_parse_mime_type_single_quoted_parameter(): """Tests whether parsing works with a single quoted parameter""" test_string = 'text/html;charset="shift_jis"iso-2022-jpi' mimetype_obj = mimetype.parse_mime_type(test_string) assert mimetype_obj.type == "text" assert mimetype_obj.subtype == "html" assert len(mimetype_obj.parameters) == 1 assert "charset" in mimetype_obj.parameters assert mimetype_obj.parameters['charset'] == "shift_jis"
def test_parse_mime_type_single_unquoted_parameter(): """Tests whether parsing works with a single unquoted parameter""" test_string = "text/html;charset=ISO-8859-1" mimetype_obj = mimetype.parse_mime_type(test_string) assert mimetype_obj.type == "text" assert mimetype_obj.subtype == "html" assert len(mimetype_obj.parameters) == 1 assert "charset" in mimetype_obj.parameters assert mimetype_obj.parameters['charset'] == "ISO-8859-1"
def get_resource_test_list(tags): """ Returns test parameters for @pytest.mark.parameterize so that it can check whether the relevant method can discern the actual MIME type of the resource. Only the relevant test parameters are returned according to the tags from the files_metadata. Also reads from test_files directory to get the relevant resources. """ filtered_files_metadata = filter_files_metadata(tags) resources = [] for metadata in filtered_files_metadata: file_path = os.path.join(TEST_FILES_PATH, metadata['path']) expected_mime_type = parse_mime_type(metadata['expected_mime_type']) with open(file_path, "rb") as f: resource = f.read() _id = "{} => {}".format(metadata['path'], metadata['expected_mime_type']) param = pytest.param(expected_mime_type, resource, id=_id) resources.append(param) return resources
def match_pattern_from_table(resource: bytes, table: List[List[bytes]]): """ Utility function for looping through a table of patterns to return matching pattern Returns: MIME Type of the row if some pattern matches the corresponding resource or UNDEFINED otherwise. """ for row in table: pattern = row[0] mask = row[1] mime_type = parse_mime_type(row[3]) ignored = row[2] pattern_found = match_pattern(resource=resource, pattern=pattern, mask=mask, ignored=ignored ) if pattern_found: return mime_type return const.UNDEFINED
def match_image_type_pattern(resource: bytes) -> bool: """ Implementation of algorithm in: https://mimesniff.spec.whatwg.org/#matching-an-image-type-pattern Returns: Image MIME Type if some image pattern matches the resource or UNDEFINED otherwise. """ for row in const.IMAGE_PATTERNS: pattern = row[0] mask = row[1] mime_type = parse_mime_type(row[3]) ignored = row[2] pattern_found = match_pattern(resource=resource, pattern=pattern, mask=mask, ignored=ignored) if pattern_found: return mime_type return const.UNDEFINED
def test_sniff_mislabeled_binary(self, mime, resource): """Test sniff_mislabeled_binary using manually constructed test plain text and binary strings""" computed_type = sniffpy.sniff_mislabeled_binary(resource) actual_type = parse_mime_type(mime) mimetype_is_equal(computed_type, actual_type)
def test_match_image_pattern(self, mime, resource): """ Tests the most importnat image MIMEs with simulated content""" computed_type = match.match_image_type_pattern(resource) actual_type = parse_mime_type(mime) mimetype_is_equal(computed_type, actual_type)