def content_type(self, content): temp = tempfile.NamedTemporaryFile(delete=False) temp.write(content) temp.close() content_type = magic.detect_from_filename(temp.name).mime_type os.remove(temp.name) return content_type
def file_mimetype_filemagic(filepath): """ Determine file mimetype using the file-magic module. The file the given path refers to must exist. :raises ImportError: ``magic`` python module not available. :raises IOError: ``filepath`` did not refer to an existing file. :param filepath: Path to the (existing) file to determine the mimetype of. :type filepath: str :return: MIMETYPE string identifier. :rtype: str """ # noinspection PyUnresolvedReferences import magic # type: ignore if os.path.isfile(filepath): d = magic.detect_from_filename(filepath) return d.mime_type elif os.path.isdir(filepath): raise IOError(21, "Is a directory: '%s'" % filepath) else: raise IOError(2, "No such file or directory: '%s'" % filepath)
async def sanitize(self): """Start sanitization tasks""" mimetype = magic.detect_from_filename(str(self.path)).mime_type if mimetype.startswith("video/") or mimetype.startswith("audio/"): raise ValueError if mimetype.startswith("image/"): self.pagenums = 1 else: if mimetype == "application/pdf": self._verify_password_pdf() else: self._convert_office_file_to_pdf() self.pagenums = self._pagenums() self.batch = asyncio.Queue(self.pagenums) send(self.pagenums) publish_task = asyncio.create_task(self._publish()) consume_task = asyncio.create_task(self._consume()) try: await asyncio.gather(publish_task, consume_task) except subprocess.CalledProcessError: await cancel_task(publish_task) while not self.batch.empty(): convert_task = await self.batch.get() await cancel_task(convert_task) self.batch.task_done() raise
def TypeOfFile(path: str, default: str = None) -> str: try: r = magic.detect_from_filename(str) return r.mime_type except: pass return default
def copy_file_to_python_env(ar, f): if f.startswith("/usr/bin/python"): gen_python_thunk(ar, os.path.basename(f)) fix_python_binary(ar, f) else: libfile = f # python tends to install in both /usr/lib and /usr/lib64, which doesn't mean it is # a package for the wrong arch. So we need to handle both /lib and /lib64. Copying files # blindly from /lib could be a problem, but we filtered out all the i686 packages during # the dependency generation. if libfile.startswith("/usr/"): libfile = libfile.replace("/usr/", "/", 1) if libfile.startswith("/lib/"): libfile = libfile.replace("/lib/", "lib64/", 1) elif libfile.startswith("/lib64/"): libfile = libfile.replace("/lib64/", "lib64/", 1) else: raise RuntimeError("unexpected path: don't know what to do with {}".format(f)) # copy file instead of link unless we link to the current directory. # links to the current directory are usually safe, but because we are manipulating # the directory structure, very likely links that transverse paths will break. if os.path.islink(f) and os.readlink(f) != os.path.basename(os.readlink(f)): ar.add(os.path.realpath(f), arcname=libfile) else: m = magic.detect_from_filename(f) if m and (m.mime_type.startswith('application/x-sharedlib') or m.mime_type.startswith('application/x-pie-executable')): fix_sharedlib(ar, f, libfile) else: # in case this is a directory that is listed, we don't want to include everything that is in that directory # for instance, the python3 package will own site-packages, but other packages that we are not packaging could have # filled it with stuff. ar.add(f, arcname=libfile, recursive=False)
def get_file_lines(self, filename: str): """ Get lines from a file, which may or may not be compressed. If compressed then it will be uncompressed using ``gzip`` as the algorithm. :param filename: The name of the file to be read. :return: An array with all the lines. """ ftype = magic.detect_from_filename(filename) if ftype.mime_type == "application/gzip": try: with gzip.open(filename, 'r') as f: return f.readlines() except: pass if ftype.mime_type == "application/x-ms-wim": cmd = "/usr/bin/wiminfo" if os.path.exists(cmd): cmd = "%s %s" % (cmd, filename) return utils.subprocess_get(cmd).splitlines() self.logger.info("no %s found, please install wimlib-utils", cmd) elif ftype.mime_type == "text/plain": with open(filename, 'r') as f: return f.readlines() else: self.logger.info( 'Could not detect the filetype and read the content of file "%s". Returning nothing.', filename) return []
def known_errors(logfile, fail_log): sz = os.path.getsize(logfile) if os.path.exists(logfile) and sz > 0: if magic.detect_from_filename(logfile).mime_type == 'application/gzip': handle = open(logfile, "r") # let's mmap piece of memory # as we unpacked gzip tmp_mm = mmap.mmap(handle.fileno(), sz, access=mmap.ACCESS_READ) real_sz = struct.unpack("@I", tmp_mm[-4:])[0] mm = mmap.mmap(-1, real_sz, prot=mmap.PROT_READ | mmap.PROT_WRITE) gz = gzip.GzipFile(fileobj=tmp_mm) for line in gz: mm.write(line) tmp_mm.close() gz.close() handle.close else: msgf = io.open(logfile, "r", encoding="utf-8") mm = mmap.mmap(msgf.fileno(), sz, access=mmap.ACCESS_READ) msgf.close() for pat in err_type: error = re.search(pat.encode("utf-8"), mm) if error: print(error.group(0).decode('utf-8')) write_log(error.group(0).decode('utf-8'), fail_log) break else: common_pattern = 'error: (.*)' error = re.search(common_pattern.encode("utf-8"), mm) if error: print(error.group(0).decode('utf-8')) write_log(error.group(0).decode('utf-8'), fail_log) break mm.close()
def cache(self, entry): # pylint:disable=undefined-variable try: detected = magic.detect_from_filename(entry.path) return detected.mime_type, detected.encoding, detected.name except (OSError, ValueError) as exc: raise NoData from exc
def _guess_mimetype(path) -> str: """ We use the file-magic module to get this value, but if that returns a type that doesn't mean anything to us, we fall back to guessing based on the file suffix. """ ambiguous_mimetypes = ( "text/plain", "application/octet-stream" ) guessed = magic.detect_from_filename(path).mime_type if guessed not in ambiguous_mimetypes: return guessed return { "jpg": "image/jpeg", "jpeg": "image/jpeg", "gif": "image/gif", "png": "image/png", "mp3": "audio/mp3", "mp4": "video/mp4", "htm": "text/html", "html": "text/html", "md": "text/markdown", "mkv": "video/x-matroska", "ogv": "video/ogg", "webm": "video/webm", }.get(path.split(".")[-1].lower(), guessed)
def __init__(self, filepath): """ Create a new FileElement. :param filepath: Path to the file to wrap. If relative, it is interpreted as relative to the current working directory. :type filepath: str """ super(DataFileElement, self).__init__() # Just expand a user-home `~` if present, keep relative if given. self._filepath = osp.expanduser(filepath) self._content_type = None if magic and osp.isfile(filepath): r = magic.detect_from_filename(filepath) self._content_type = r.mime_type elif tika_detector: try: self._content_type = tika_detector.from_file(filepath) except IOError as ex: self._log.warn( "Failed tika.detector.from_file content type " "detection (error: %s), falling back to file " "extension", str(ex)) # If no tika detector or it failed for some reason if not self._content_type: self._content_type = mimetypes.guess_type(filepath)[0]
def get_new_front_cover_mime_type(self): if hasattr(magic, 'detect_from_filename'): return magic.detect_from_filename( self.new_front_cover_sv.get()).mime_type # older magic version if hasattr(magic, 'from_file'): return magic.from_file(self.new_front_cover_sv.get(), mime=True) # newer magic version raise AttributeError
def testMimeRacemap(idr): session = archiveLib.makeSession() res = archiveLib.fetch_png(session, archiveLib.VLMRACEMAP.format(idr=idr), "tmp/{idr}.png".format(idr=idr)) mime = magic.detect_from_filename("tmp/{idr}.png".format(idr=idr)) logging.info("Type %s is %s", idr, mime.mime_type) return(True)
def _magic_mimetype(self): """ This can be slow because it has to open the files. """ if self.exists(): if hasattr(magic, 'detect_from_filename'): # sys-apps/file python-magic api return magic.detect_from_filename(self).mime_type else: # python-magic return magic.from_file(self.as_posix(), mime=True)
def real_extension(self) -> str: if self.file.is_dir(): raise IsADirectoryError(self.file) mime = fm.detect_from_filename(self.file).mime_type true_ext = exts_dict.get(mime) if true_ext: log.info("Real extension: %s", exts_dict[mime]) return true_ext raise NotInDictionaryException(self.file)
def retrieve_photo_paths(d, r): ps = [] for root, dirs, files in os.walk(d): if root == d or root + "/" == d or r: for f in files: p = os.path.join(root, f) if "jpeg" in magic.detect_from_filename(p).mime_type: ps.append(p) return ps
def mime_type(file: str) -> str: data_magic = magic.detect_from_filename(file) if data_magic.mime_type == 'application/octet-stream': result = subprocess.run(['xdg-mime', 'query', 'filetype', file], stdout=subprocess.PIPE) mime_type = result.stdout.decode('utf-8', errors='ignore').rstrip() else: mime_type = data_magic.mime_type return mime_type
def add_db_record(cursor, filename, quiet): """ docstring """ skip_hash = False absolute = os.path.abspath(filename) try: tempstat = os.stat(absolute) perms = oct(tempstat.st_mode) owner = tempstat.st_uid group = tempstat.st_gid size = tempstat.st_size except OSError as err: print "[-] Couldn't open %s: %s" % (absolute, err) return False # Skip hashing if the file is a FIFO, because the script will # just hang forever trying to read data to calculate a hash. if stat.S_ISFIFO(os.stat(absolute).st_mode): skip_hash = True fuzzy_hash = "FIFO" md5digest = "FIFO" sha1digest = "FIFO" # Determine file type with libmagic filetype = magic.detect_from_filename(absolute).name if quiet is False: print "[+] Adding %s -- %s" % (filename, filetype) if skip_hash is False: # Calculate ssdeep hash try: fuzzy_hash = ssdeep.hash_from_file(absolute) except IOError: fuzzy_hash = "PERMISSION DENIED" except UnicodeDecodeError: fuzzy_hash = "UNICODE DECODE ERROR" # Calculate MD5 hash md5hash = hashlib.md5() md5hash.update(open(absolute).read()) md5digest = md5hash.hexdigest() # Calculate SHA1 hash sha1hash = hashlib.sha1() sha1hash.update(open(absolute).read()) sha1digest = sha1hash.hexdigest() cursor.execute( "INSERT INTO hashes VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, DATETIME())", (HOSTNAME, absolute, size, perms, owner, group, fuzzy_hash, md5digest, sha1digest, filetype)) return True
def get_file_mime_type(path): try: if hasattr(magic, 'detect_from_filename'): # Using file-magic module: https://github.com/file/file return magic.detect_from_filename(path).mime_type else: # Using python-magic module: https://github.com/ahupp/python-magic return magic.from_file(path, mime=True) except Exception: return ''
def magic_type(self, data, isdata=False): try: if isdata: magictype = magic.detect_from_content(data[0:512]).name else: magictype = magic.detect_from_filename(data).name except NameError: magictype = 'Error - file-magic library required.' except Exception as e: magictype = 'Error getting magic type - %s' % e return magictype
def _mime(path): try: _mime_lock.acquire() x = magic.detect_from_filename(path) log.info(path, ":", x) mi = x.mime_type _mime_lock.release() return mi except: _mime_lock.release() return "text/plain"
def read_magic(path: Union[bytes, str, os.PathLike]) -> magic.FileMagic: """ Detect file type using 'file-magic' library. Work around 'file-magic' bug by returning default FileMagic instance. """ try: return magic.detect_from_filename(path) except UnicodeDecodeError as exc: logger.warning("Failed to perform magic: %s; using fallback", exc) return DEFAULT_FILE_MAGIC
def save_file(request, entity, data): input_file = data['content'].file input_file_name = data['content'].filename entity.original_name = input_file_name storage_dir = request.registry.settings.get('file_storage_dir') if entity.id and input_file: # To avoid thousands of files in the same directory (which is bad), # we take the first three digits of the primary key separately (or # zero filled if < 100), each digit will be a directory, for # example (where "->" means "will be stored"): # - content_id == 5 -> 0/0/5/5.ext # - content_id == 24 -> 0/2/4/24.ext # - content_id == 153 -> 1/5/3/153.ext # - content_id == 1536 -> 1/5/3/1536.ext # - ... file_directory = os.path.join(storage_dir, entity.subpath) if not os.path.exists(file_directory): os.makedirs(file_directory) full_file_name = os.path.join(file_directory, entity.filename) # Copy the uploaded file to it's final destination input_file.seek(0) with open(full_file_name, 'wb') as output_file: shutil.copyfileobj(input_file, output_file) # Close both files, to ensure buffers are flushed input_file.close() output_file.close() # A file must be associated to a MIME type (image/png, # application/pdf, etc). Rather than trusting the extension of the # file, we use the magic number instead. The magic number approach # offers better guarantees that the format will be identified # correctly. file_magic = magic.detect_from_filename(full_file_name) mime_type = file_magic.mime_type major, minor = mime_type.split('/') # Fetch mime from database mime_obj = Mime.q_major_minor(request.dbsession, major, minor) entity.mime = mime_obj # bytes -> megabytes entity.file_size = os.path.getsize(full_file_name) / 1024.0 / 1024.0 return entity return False
def get_path_list(path_src, path_dst, ignore): input_root = path_src input_file_list = [] if os.path.isfile(path_src): input_file_list.append(path_src) input_root = os.path.dirname(path_src) + '/' else: input_root = os.path.dirname(input_root + '/') + '/' for (current_path, subfolder, filenames) in os.walk(path_src): input_file_list += [ os.path.join(current_path, x) for x in filenames if '_xincoder_temp.html' not in x ] if path_dst is None: path_dst = os.path.join(input_root, 'PDFCode_Results/') else: path_dst = os.path.dirname(path_dst + '/') + '/' # print(input_root, path_dst) convert_mask_list = [ any(mm in magic.detect_from_filename(x).mime_type for mm in ['text/', 'x-']) for x in input_file_list ] # ignore file name contains ignore. if ignore != '': ignore_list = ignore.split(',') convert_mask_list = [ mask & (not any(ign in os.path.basename(x) for ign in ignore_list)) for x, mask in zip(input_file_list, convert_mask_list) ] # replace root path now_file_list = [x.replace(input_root, path_dst) for x in input_file_list] # replace extension # now_ext_list = [os.path.splitext(x)[-1] for x in now_file_list] # output_file_list = [name.replace(ext, '.pdf') if ext else name for name, ext in zip(now_file_list, now_ext_list)] # convert_mask_list = [magic.detect_from_filename(x).encoding!='binary' for x in input_file_list] output_file_list = [ name + '.pdf' if convert_mask else name for name, convert_mask in zip(now_file_list, convert_mask_list) ] return input_file_list, output_file_list, convert_mask_list, input_root
def is_pcap(file): with contextlib.suppress(Exception): report = magic.detect_from_filename(file) if report.mime_type == 'application/vnd.tcpdump.pcap': return True if report.mime_type == 'application/octet-stream': info = report.name.casefold() if 'pcap' in info: return True if 'capture' in info: return True with open(file, 'rb') as test_file: magic_number = test_file.read(4) return magic_number in PCAP_MGC return False
def magicScan(target: Path) -> Dict[str, str]: '''trid scan target Args: target: A Path to target file Return: A dict result ''' detected = magic.detect_from_filename(target) log.info("finish magicScan...") return { "mime_type": detected.mime_type, "encoding": detected.encoding, "type_name": detected.name }
def mime(path): p = path.lower() if p.endswith(".html"): return "text/html" if p.endswith(".css"): return "text/css" if p.endswith(".js"): return "text/javascript" try: _mime_lock.acquire() x = magic.detect_from_filename(path) #log.info(path, ":", x) mi = x.mime_type _mime_lock.release() return mi except: _mime_lock.release() return "text/plain"
def editarExtension(file_path): """! @brief Función que edita la extensión de un archivo @param file_path Ruta del archivo @return Ruta del archivo correctamente formateada """ type = magic.detect_from_filename(file_path).mime_type extension = str(mimetypes.guess_extension(type, strict=False)) if extension is not None: if '.jpe' in extension: extension = extension.replace('jpe', 'jpg') os.rename(file_path, file_path + extension) return file_path + extension else: return file_path
def main(args): unsorted_path = os.path.join(args.outdir, "unsorted") os.makedirs(unsorted_path, exist_ok=True) onlyfiles = [ os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(args.indir)) for f in fn ] for current_fname in onlyfiles: mime_type = magic.detect_from_filename(current_fname).mime_type if 'jpeg' in mime_type or 'quicktime' in mime_type or 'mp4' in mime_type: print("Previous file name ", current_fname) exif_mime_type_key = exif_key(mime_type) new_fname, year, month = name_from_exif(current_fname, exif_mime_type_key) copy_rename_file(args.outdir, year, month, current_fname, new_fname)
def traitement(self, name): if os.path.isfile(str(name)): file_content = "" file_content1 = "" logging.getLogger().info("parse inventory %s", name) try: if (magic.detect_from_filename(name).mime_type == 'application/gzip'): com = 'zcat %s' % name file_content1 = os.popen(com).read() file_content = parsejsoninventory(str(name), file_content1) else: with open(name, 'r') as content_file: file_content = content_file.read().replace('\n', '') m = re.search('<REQUEST>.*<\/REQUEST>', file_content) file_content = str(m.group(0)) try: file_content = macadressclear(file_content, "00:00:00:00:00:00") mac = mac_adressexml(file_content) try: # add Mc:mac address end of datagram header = '<?xml version="1.0" encoding="utf-8"?>' file_content = file_content[:-10] xmldata = "%s%sMc:%s</REQUEST>" % (header, file_content, mac) logging.getLogger().debug( "XML recv from pxe client %s" % xmldata) os.remove(name) senddata(xmldata, '127.0.0.1', conf['port']) except Exception as e: traceback.print_exc(file=logoutput) logging.getLogger().error( "UDP error sending to %s:%d [%s]" % ('127.0.0.1', conf['port'], str(e))) except Exception as e: traceback.print_exc(file=logoutput) logging.getLogger().error("MAC address error %s" % str(e)) except Exception as e: traceback.print_exc(file=logoutput) logging.getLogger().error("Error traitement file %s" % str(name)) logging.getLogger().error("Error traitement %s" % str(e))
def get_mime_type(resource): import magic if resource.startswith('file://'): resource = resource[len('file://'):] if resource.startswith('http://') or resource.startswith('https://'): with urllib.request.urlopen(resource) as response: return response.info().get_content_type() else: if hasattr(magic, 'detect_from_filename'): mime = magic.detect_from_filename(resource) elif hasattr(magic, 'from_file'): mime = magic.from_file(resource, mime=True) else: raise RuntimeError('The installed magic version provides neither detect_from_filename nor from_file') if mime: return mime.mime_type
def upload_asset(path, owner, repo, tag): token = os.environ['GITHUB_TOKEN'] url = urljoin(GITHUB_API, '/'.join(['repos', owner, repo, 'releases', 'tags', tag])) res = requests.get(url) j = json.loads(res.text) if check_status(res, j): # release must not exist, creating release from tag if create_release(owner, repo, tag, token): return 0 else: # Need to start over with uploading now that release is created # Return 1 to indicate we need to run upload_asset again return 1 upload_url = j['upload_url'] upload_url = upload_url.split('{')[0] fname = ntpath.basename(path) with open(path) as f: contents = f.read() try: content_type = mime.from_file(path) except: content = magic.detect_from_filename(path) content_type = content.name headers = {'Content-Type': content_type, 'Authorization': token} params = {'name': fname} res = requests.post(upload_url, data=contents, auth=(owner, token), headers=headers, params=params) j = json.loads(res.text) if check_status(res, j): return 0 print('SUCCESS: %s uploaded' % fname) return 0
def upload_to_s3(bucket_name, file_paths): bucket = s3.get_bucket(bucket_name) skip_count = 0 upload_count = 0 for path in file_paths: dir_path = path.as_posix() parts = dir_path.split('/') key_name = os.path.join(*parts[1:]) # skip jekyll's _site directory name k = bucket.get_key(key_name) if k is not None: # file exists on S3 md5_hash = hashlib.md5(path.open("rb").read()).hexdigest() if md5_hash == k.etag[1:-1]: # skip if it's the same file print "skipping {}".format(dir_path) skip_count += 1 continue print "uploading {}".format(dir_path) mime_type = magic.detect_from_filename(dir_path).mime_type if dir_path.endswith(".css"): # libmagic doesn't set this correctly. set manually mime_type = "text/css" headers = get_s3_headers() headers['Content-Type'] = mime_type k = Key(bucket) k.name = key_name k.set_contents_from_filename(dir_path, headers=headers) k.set_acl("public-read") upload_count += 1 return {"skipped": skip_count, "uploaded": upload_count}
def test_detect_from_filename(self): result = magic.detect_from_filename(self.filename) self.assert_result(result)
elif "firewall,info" in line: (mes, dia, hora, ippublico, _, _, protocolo, _, _, _, _, macorg, _, _, _, operacao ) = line.split()[0:16] url = operacao.split(">")[1].split(":")[0] endereco = operacao.split(":")[0] metodo = protocolo else: print("Failed: {}".format(line)) return None return {'mes': mes, 'dia': dia, 'hora': hora, 'ip_rt': ippublico, 'cliente': endereco, 'metodo': metodo, 'url': url} if __name__ == "__main__": options, arq = parse_args() dtlimit = dt.datetime.now() dtlog = dtlimit - dt.timedelta(hours=1) if 'gzip' in magic.detect_from_filename(arq).mime_type: with gzip.open(arq,'r') as logfile: lfile = logfile.readlines() else: with open(arq,'r') as logfile: lfile = logfile.readlines() if options.filepart == True else [line for line in logfile.readlines() if line.startswith(dtlog.strftime("%b %d %H:"))] lines = [] for line in lfile: if "wigong.com.br/" in line: continue elif line.startswith(dtlimit.strftime("%b %d %H:")): break else: result = getvalues(line) if result is not None: lines.append(result)