def get(self, request, mid=None): media = Medium.objects.get(id=mid) full_path = media.media.path #assert magic_file(avatar.media.path)=='image/jpeg', 'Error' #PDF, textPython, textPlain, textHtml, videoMP4='application/pdf', 'text/x-python', 'text/plain', 'text/html', 'video/mp4' #textPlain, textPython, textHtml, PDF, mime_type = magic_file(full_path, mime=True) #print(mime_type) if mime_type in ['image/jpeg', 'image/png', 'image/gif']: response = HttpResponse(content_type=mime_type) #'image/png') im = img_open(full_path) im.save(response, 'PNG') return response elif mime_type in [ 'application/pdf', 'text/x-python', 'text/plain', 'text/html', 'video/mpeg', 'video/mp4', 'audio/mpeg' ]: return stream(full_path, contentType=mime_type) #elif mime_type in []: # return stream(full_path, contentType=mime_type) # return HttpResponse(content, content_type='text/plain; charset=utf-8') #elif mime_type in [PDF]: # return stream(full_path, contentType=mime_type) # return pdfResponse(media) else: return stream(full_path, mode='attachment', contentType='application/octet-stream')
def calc_magic(stream): # Missing python-magic features: # - magic_descriptor (https://github.com/ahupp/python-magic/pull/227) # - direct support for symlink flag magic_cookie = magic.magic_open(magic.MAGIC_SYMLINK) magic.magic_load(magic_cookie, None) try: fd_path = get_fd_path(stream) if fd_path: return magic.maybe_decode(magic.magic_file(magic_cookie, fd_path)) else: # Handle BytesIO in-memory streams stream.seek(0, os.SEEK_SET) return magic.maybe_decode(magic.magic_buffer(magic_cookie, stream.read())) finally: magic.magic_close(magic_cookie) return None
def from_file(self, path): return magic.magic_file(self.cookie,path.encode('utf-8'))
def ident(buf, length: int, path) -> Dict: data = { 'ascii': None, 'hex': None, 'magic': None, 'mime': None, 'type': 'unknown' } if length <= 0: return data header = buf[:min(64, length)] data['ascii'] = dotdump(header) data['hex'] = safe_str(hexlify(header)) # noinspection PyBroadException try: # Loop over the labels returned by libmagic, ... labels = [] if file_type: with magic_lock: labels = magic.magic_file(file_type, path).split(b'\n') labels = [ label[2:] if label.startswith(b'- ') else label for label in labels ] mimes = [] if mime_type: with magic_lock: mimes = magic.magic_file(mime_type, path).split(b'\n') mimes = [ mime[2:] if mime.startswith(b'- ') else mime for mime in mimes ] # For user feedback set the mime and magic meta data to always be the primary # libmagic responses if len(labels) > 0: data['magic'] = safe_str(labels[0]) if len(mimes) > 0 and mimes[0] != b'': data['mime'] = safe_str(mimes[0]) # Highest priority is given to mime type matching something tagged = False for label in labels: label = dotdump(label) if custom.match(label): data['type'] = label.split('custom: ')[1].strip() tagged = True break # Second priority is mime times marked as trusted if not tagged: for mime in mimes: mime = dotdump(mime) if mime in trusted_mimes: data['type'] = trusted_mimes[mime] tagged = True break # As a third priority try matching the tl_patterns if not tagged: minimum = len(tl_patterns) sl_tag = None # Try each label and see how far down the tl_patterns list we go # before we hit a match, the closer to the beginning of the list we are the better # the tag match is. The final line of tl_patterns matches anything and sets # tag to 'unknown', so this loop should never finish with sl_tag as None # Unless the tl_patters table has been changed inappropriately for label in labels: label = dotdump(label) # ... match against our patterns and, ... index = 0 for entry in tl_patterns: if index >= minimum: break if entry[1].search(label): # pylint:disable=E1101 break index += 1 # ... keep highest precedence (lowest index) match. if index < minimum: minimum = index sl_tag = subtype(label) # If a label does match, take the best from that label # Further labels from magic are probably terrible break assert sl_tag is not None, "tl_patterns seems to be missing a match all => unknown rule at the end" # Based on the sub tag we found, figure out the top level tag to use tl_tag = sl_to_tl.get(sl_tag, tl_patterns[minimum][0]) data['type'] = '/'.join((tl_tag, sl_tag)) except Exception as e: print(str(e)) pass if not recognized.get(data['type'], False): data['type'] = 'unknown' if data['type'] == 'document/office/unknown': # noinspection PyBroadException try: root_entry_property_offset = buf.find( u"Root Entry".encode("utf-16-le")) if -1 != root_entry_property_offset: # Get root entry's GUID and try to guess document type clsid_offset = root_entry_property_offset + 0x50 if len(buf) >= clsid_offset + 16: clsid = buf[clsid_offset:clsid_offset + 16] if len(clsid) == 16 and clsid != "\0" * len(clsid): clsid_str = uuid.UUID(bytes_le=clsid) clsid_str = clsid_str.urn.rsplit(':', 1)[-1].upper() if clsid_str in OLE_CLSID_GUIDs: data['type'] = OLE_CLSID_GUIDs[clsid_str] except Exception: pass return data
def ident(self, buf, length: int, path) -> Dict: data = { "ascii": None, "hex": None, "magic": None, "mime": None, "type": "unknown" } if length <= 0: return data header = buf[:min(64, length)] data["ascii"] = dotdump(header) data["hex"] = safe_str(hexlify(header)) # noinspection PyBroadException try: # Loop over the labels returned by libmagic, ... labels = [] mimes = [] with self.lock: try: labels = magic.magic_file(self.file_type, path).split(b"\n") except magic.MagicException as me: labels = me.message.split(b"\n") try: mimes = magic.magic_file(self.mime_type, path).split(b"\n") except magic.MagicException as me: mimes = me.message.split(b"\n") mimes = [ mime[2:].strip() if mime.startswith(b"- ") else mime.strip() for mime in mimes ] labels = [ label[2:].strip() if label.startswith(b"- ") else label.strip() for label in labels ] # For user feedback set the mime and magic meta data to always be the primary # libmagic responses if len(labels) > 0: def find_special_words(word, labels): for index, label in enumerate(labels): if word in label: return index return -1 # If an expected label is not the first label returned by Magic, then make it so # Manipulating the mime accordingly varies between special word cases special_word_cases = [ (b"OLE 2 Compound Document : Microsoft Word Document", False), (b"Lotus 1-2-3 WorKsheet", True), ] for word, alter_mime in special_word_cases: index = find_special_words(word, labels) if index >= 0: labels.insert(0, labels.pop(index)) if len(labels) == len(mimes) and alter_mime: mimes.insert(0, mimes.pop(index)) data["magic"] = safe_str(labels[0]) for mime in mimes: if mime != b"": data["mime"] = safe_str(mime) break # First lets try to find any custom types for label in labels: label = dotdump(label) if self.custom.match(label): data["type"] = label.split("custom: ")[1].strip() break # Second priority is mime times marked as trusted if data["type"] == "unknown": with self.lock: trusted_mimes = self.trusted_mimes for mime in mimes: mime = dotdump(mime) if mime in trusted_mimes: data["type"] = trusted_mimes[mime] break # As a third priority try matching the magic_patterns if data["type"] == "unknown": found = False with self.lock: compiled_magic_patterns = self.compiled_magic_patterns for label in labels: for entry in compiled_magic_patterns: if entry[1].search(dotdump(label)): # pylint: disable=E1101 data['type'] = entry[0] found = True break if found: break except Exception as e: self.log.error( f"An error occured during file identification: {e.__class__.__name__}({str(e)})" ) pass # If mime is text/* and type is unknown, set text/plain to trigger # language detection later. if data["type"] == "unknown" and data['mime'] is not None and data[ 'mime'].startswith("text/"): data["type"] = "text/plain" # Lookup office documents by GUID if we're still not sure what they are if data["type"] == "document/office/unknown": # noinspection PyBroadException try: root_entry_property_offset = buf.find( u"Root Entry".encode("utf-16-le")) if -1 != root_entry_property_offset: # Get root entry's GUID and try to guess document type clsid_offset = root_entry_property_offset + 0x50 if len(buf) >= clsid_offset + 16: clsid = buf[clsid_offset:clsid_offset + 16] if len(clsid) == 16 and clsid != b"\0" * len(clsid): clsid_str = uuid.UUID(bytes_le=clsid) clsid_str = clsid_str.urn.rsplit(":", 1)[-1].upper() if clsid_str in OLE_CLSID_GUIDs: data["type"] = OLE_CLSID_GUIDs[clsid_str] else: bup_details_offset = buf[: root_entry_property_offset + 0x100].find( u"Details".encode( "utf-16-le")) if -1 != bup_details_offset: data["type"] = "quarantine/mcafee" except Exception: pass return data
def contentType(self): return magic_file(self.media.path, mime=True) return True if mime_type=='text/plain' else False