def _load_magic_file(self): self.magic_file = ':'.join( (constants.MAGIC_RULE_PATH, '/usr/share/file/magic.mgc')) if self.use_cache: self.log.info("Checking for custom magic file...") with get_cachestore('system', config=self.config, datastore=self.datastore) as cache: try: custom_magic = "/tmp/custom.magic" cache.download('custom_magic', custom_magic) self.magic_file = ':'.join( (custom_magic, '/usr/share/file/magic.mgc')) self.log.info("Custom magic file loaded!") except FileStoreException: self.log.info("No custom magic file found.") with self.lock: self.file_type = magic.magic_open(magic.MAGIC_CONTINUE + magic.MAGIC_RAW) magic.magic_load(self.file_type, self.magic_file) self.mime_type = magic.magic_open(magic.MAGIC_CONTINUE + magic.MAGIC_RAW + magic.MAGIC_MIME) magic.magic_load(self.mime_type, self.magic_file)
def get_file_mime_type(name): m = magic.Magic() m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME | magic.MAGIC_MIME_ENCODING) magic.magic_load(m.cookie, None) filetype = m.from_file(name) return filetype
def create_dataset_series(path: str, preprocess: Callable[[str], str]) -> Iterable: """Create dataset series. Arguments: path: The path of the file with the data preprocess: Preprocessor function Returns: The dataset series. """ filetyper = magic.Magic(mime=True) filetyper.flags |= magic.MAGIC_SYMLINK filetyper.cookie = magic.magic_open(filetyper.flags) magic.magic_load(filetyper.cookie, None) log("Loading {}".format(path)) file_type = filetyper.from_file(path) if file_type.startswith('text/'): reader = PlainTextFileReader(path) for line in reader.read(): yield preprocess(line) elif file_type == 'application/gzip' or file_type == 'application/x-gzip': gzreader = GZipReader(path) for line in gzreader.read(): yield preprocess(line) elif file_type == 'application/octet-stream': return np.load(path) else: raise Exception("Unsupported data type: {}, file {}".format( file_type, path))
def get_mime_type(content): # try to fixup encoding if hasattr(magic, "open"): m = magic.open(magic.MAGIC_MIME) m.load() filetype = m.buffer(content) else: m = magic.Magic() m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME | magic.MAGIC_MIME_ENCODING) magic.magic_load(m.cookie, None) filetype = m.from_buffer(content) return filetype.split('; ', 1)
def get_cleaned_text_file_content(uploaded_file): """Read uploaded file, try to fix up encoding to UTF-8 and transform line endings into Unix style, then return the content as a UTF-8 string. Errors are reported as django.core.exceptions.ValidationError exceptions.""" if not uploaded_file: return u"" if uploaded_file.size and uploaded_file.size > 10 * 1000 * 1000: raise ValidationError("Text file too large (size %s)." % uploaded_file.size) content = "".join(uploaded_file.chunks()) # try to fixup encoding import magic if hasattr(magic, "open"): m = magic.open(magic.MAGIC_MIME) m.load() filetype = m.buffer(content) else: m = magic.Magic() m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME | magic.MAGIC_MIME_ENCODING) magic.magic_load(m.cookie, None) filetype = m.from_buffer(content) if not filetype.startswith("text"): raise ValidationError( "Uploaded file does not appear to be a text file.") match = re.search("charset=([\w-]+)", filetype) if not match: raise ValidationError("File has unknown encoding.") encoding = match.group(1) if "ascii" not in encoding: try: content = content.decode(encoding) except Exception as e: raise ValidationError( "Error decoding file (%s). Try submitting with UTF-8 encoding or remove non-ASCII characters." % str(e)) # turn line-endings into Unix style content = content.replace("\r\n", "\n").replace("\r", "\n") return content.encode("utf-8")
def parse_file_charset(self): import magic self.fd.file.seek(0) content = self.fd.file.read(4096) if hasattr(magic, "open"): m = magic.open(magic.MAGIC_MIME) m.load() filetype = m.buffer(content) else: m = magic.Magic() m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME | magic.MAGIC_MIME_ENCODING) magic.magic_load(m.cookie, None) filetype = m.from_buffer(content) if not 'ascii' in filetype: self.parsed_info.add_error('A plain text ASCII document must be submitted.')
def put_identify_custom_magic_file(**_): """ Save a new version of identify's custom LibMagic file Variables: None Arguments: None Data Block: <current custom.magic file> Result example: {"success": True} """ data = request.json.encode('utf-8') magic_file = None try: with tempfile.NamedTemporaryFile(delete=False) as tmp: magic_file = tmp.name tmp.write(data) try: test = magic.magic_open(magic.MAGIC_CONTINUE + magic.MAGIC_RAW) magic.magic_load(test, magic_file) except magic.MagicException: return make_api_response( {'success': False}, "The magic file you have submitted is invalid.", 400) finally: if magic_file and os.path.exists(magic_file): os.unlink(magic_file) with forge.get_cachestore('system', config=config, datastore=STORAGE) as cache: if hashlib.sha256(data).hexdigest() == get_sha256_for_file( constants.MAGIC_RULE_PATH): cache.delete('custom_magic') else: cache.save('custom_magic', data, ttl=ADMIN_FILE_TTL, force=True) # Notify components watching to reload magic file event_sender.send('identify', 'magic') return make_api_response({'success': True})
def calc_magic(stream): # Missing python-magic features: # - magic_descriptor (https://github.com/ahupp/python-magic/pull/227) # - direct support for symlink flag magic_cookie = magic.magic_open(magic.MAGIC_SYMLINK) magic.magic_load(magic_cookie, None) try: fd_path = get_fd_path(stream) if fd_path: return magic.maybe_decode(magic.magic_file(magic_cookie, fd_path)) else: # Handle BytesIO in-memory streams stream.seek(0, os.SEEK_SET) return magic.maybe_decode(magic.magic_buffer(magic_cookie, stream.read())) finally: magic.magic_close(magic_cookie) return None
def parse_file_charset(self): import magic self.fd.file.seek(0) content = self.fd.file.read(4096) if hasattr(magic, "open"): m = magic.open(magic.MAGIC_MIME) m.load() filetype = m.buffer(content) else: m = magic.Magic() m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME | magic.MAGIC_MIME_ENCODING) magic.magic_load(m.cookie, None) filetype = m.from_buffer(content) if not 'ascii' in filetype: self.parsed_info.add_error( 'A plain text document must be submitted.')
def parse_file_charset(self): import magic self.fd.file.seek(0) content = self.fd.file.read() if hasattr(magic, "open"): m = magic.open(magic.MAGIC_MIME) m.load() filetype = m.buffer(content) else: m = magic.Magic() m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME | magic.MAGIC_MIME_ENCODING) magic.magic_load(m.cookie, None) filetype = m.from_buffer(content) if not 'ascii' in filetype and not 'utf-8' in filetype: self.parsed_info.add_error('A plain text ASCII document is required. ' 'Found an unexpected encoding: "%s". ' 'You probably have one or more non-ascii characters in your file.' % filetype )
def get_cleaned_text_file_content(uploaded_file): """Read uploaded file, try to fix up encoding to UTF-8 and transform line endings into Unix style, then return the content as a UTF-8 string. Errors are reported as django.forms.ValidationError exceptions.""" if not uploaded_file: return u"" if uploaded_file.size and uploaded_file.size > 10 * 1000 * 1000: raise django.forms.ValidationError("Text file too large (size %s)." % uploaded_file.size) content = "".join(uploaded_file.chunks()) # try to fixup encoding import magic if hasattr(magic, "open"): m = magic.open(magic.MAGIC_MIME) m.load() filetype = m.buffer(content) else: m = magic.Magic() m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME | magic.MAGIC_MIME_ENCODING) magic.magic_load(m.cookie, None) filetype = m.from_buffer(content) if not filetype.startswith("text"): raise django.forms.ValidationError("Uploaded file does not appear to be a text file.") match = re.search("charset=([\w-]+)", filetype) if not match: raise django.forms.ValidationError("File has unknown encoding.") encoding = match.group(1) if "ascii" not in encoding: try: content = content.decode(encoding) except Exception as e: raise django.forms.ValidationError("Error decoding file (%s). Try submitting with UTF-8 encoding or remove non-ASCII characters." % str(e)) # turn line-endings into Unix style content = content.replace("\r\n", "\n").replace("\r", "\n") return content.encode("utf-8")
from typing import List, Any, Callable import magic # tests: lint, mypy # pylint: disable=invalid-name Reader = Callable[[List[str]], Any] FILETYPER = magic.Magic(mime=True) FILETYPER.flags |= magic.MAGIC_SYMLINK FILETYPER.cookie = magic.magic_open(FILETYPER.flags) magic.magic_load(FILETYPER.cookie, None)
x[1] = re.compile(x[1], re.IGNORECASE) custom = re.compile(r'^custom: ', re.IGNORECASE) ssdeep_from_file = None magic_lock = None file_type = None mime_type = None if platform.system() != 'Windows': import magic magic_lock = threading.Lock() file_type = magic.magic_open(magic.MAGIC_CONTINUE + magic.MAGIC_RAW) magic.magic_load(file_type, constants.RULE_PATH) mime_type = magic.magic_open(magic.MAGIC_CONTINUE + magic.MAGIC_MIME) magic.magic_load(mime_type, constants.RULE_PATH) try: # noinspection PyUnresolvedReferences import ssdeep # ssdeep requires apt-get cython and pip install ssdeep ssdeep_from_file = ssdeep.hash_from_file except ImportError: pass # ssdeep_from_file will be None if we fail to import ssdeep. # Translate the match object into a sub-type label. def subtype(label):
'''Check the type of given file. You need install python-magic and libmagic, you'd better run this program on linux platform. ''' import magic ms = magic.magic_open(magic.MAGIC_NONE) ms.load() fileName = r'c:\windows\notepad.exe' data = open(fileName,'rb').read() print ms.buffer(data)
search_in_files is a text search python module. The function search_in_text_files search a regexp in files, and return the list files that's complain the regexp. It's a folder resursive search by default. """ import os import re from magic import open as magic_open from magic import MAGIC_MIME MagicMime = magic_open(MAGIC_MIME) MagicMime.load() def recursion(expression, folder, files): results = [] list_nodes = os.listdir(folder) for node_name in list_nodes: if node_name not in ['..', '.', '.git'] and \ node_name[-1] != '~': if re.search(files, node_name): node_path = os.path.join(folder, node_name) if os.path.isdir(node_path): results.extend(recursion(expression, node_path, files)) else: ctype = MagicMime.file(node_path)
def __init__(self,magic_file): self.cookie = magic.magic_open(magic.MAGIC_COMPRESS|magic.MAGIC_MIME|magic.MAGIC_CONTINUE|magic.MAGIC_PRESERVE_ATIME|magic.MAGIC_ERROR|magic.MAGIC_MIME_ENCODING) # lolololo magic.magic_load(self.cookie,magic_file.encode('utf-8')) self.thread = threading.currentThread()
class FileUtil(object): MAGIC_LOAD = magic.magic_open(magic.MAGIC_MIME) TEXT_PLAIN = magic.from_buffer(b'MQ==\n', mime=True) AUTO_NAME = "autonamed_buffer_data-%s.bin" @classmethod def auto_name(cls): rn = ''.join( random.choices(string.ascii_uppercase + string.digits, k=6)) return cls.AUTO_NAME % rn @classmethod def multipart_file_tuple(cls, filename, buffer=None, content_type=None, custom_header={}): if content_type is None and buffer is None: content_type = cls.file_mime_type(filename) elif content_type is None: content_type = cls.buffer_mime_type(buffer) fileobj = None if buffer is None: fileobj = open(filename, 'rb') else: fileobj = io.BytesIO(buffer) fname = os.path.basename(filename) return (fname, fileobj, content_type, custom_header) @classmethod def buffer_mime_type(cls, buffer): return magic.from_buffer(buffer, mime=True) @classmethod def file_mime_type(cls, filename): return magic.from_file(filename, mime=True) @classmethod def buffer_base64_string(cls, buffer, strip_new_lines=True): r = base64.encodebytes(buffer).decode('utf8') if strip_new_lines: r = r.replace('\n', '') return r @classmethod def file_base64_string(cls, filename, strip_new_lines=True): if not os.path.exists(filename): return None content = open(filename, 'rb').read() return cls.buffer_base64_string(content, strip_new_lines) @classmethod def load_file(cls, filename): if not os.path.exists(filename): return None, None content = open(filename, 'rb').read() return cls.buffer_mime_type(content), content @classmethod def is_base64_encoded(cls, buffer): mt = cls.buffer_mime_type(buffer) if mt != cls.TEXT_PLAIN: return False try: _ = base64.decodebytes(buffer.encode('utf8')) except: return False return True @classmethod def zip_content(cls, name, buffer) -> bytes: obj = io.BytesIO() zf_obj = zipfile.ZipFile(obj, "a", zipfile.ZIP_DEFLATED, False) zf_obj.writestr(name, buffer) zf_obj.close() obj.seek(0) return obj.read() @classmethod def zip_content_b64(cls, name, buffer, strip_new_lines=True) -> bytes: data = cls.zip_content(name, buffer) return cls.buffer_base64_string(data, strip_new_lines=strip_new_lines)