Пример #1
0
    def detect_type(self):
        if self.mime_type is None:
            text = magic.from_file(self.contentfile.path)
            mime = magic.from_file(self.contentfile.path, magic.MAGIC_MIME)
            if mime.startswith("image"):
                im = Image.open(self.contentfile.path)
                image_meta = im.info
                meta = dict()
                meta['size'] = im.size
                meta['format'] = im.format
                meta['mode'] = im.mode
                meta = json.dumps(meta)
                dim = {'width': im.size[0],
                       'height': im.size[1]}
            else:
                dim = None
                meta = text
            self.mime_type = mime
            self.detected_meta_data = meta
            self.dimensions = json.dumps(dim)
            self.save()

        if self.dimensions:
            dim = json.loads(self.dimensions)
        else:
            dim = None

        if not self.md5_hash:
            self.md5_hash = file_hash(self.contentfile.path)

        return dict(mime_type=self.mime_type,
                    description=self.detected_meta_data,
                    dimensions=dim)
Пример #2
0
def attachment(path):
    """Create an attachment upload object from a filename Embeds the attachment as a data url."""
    filename = os.path.basename(path)
    mime_type, encoding = mimetypes.guess_type(path)
    major, minor = mime_type.split('/')
    try:
        detected_type = magic.from_file(path, mime=True).decode('ascii')
    except AttributeError:
        detected_type = magic.from_file(path, mime=True)
    # XXX This validation logic should move server-side.
    if not (detected_type == mime_type or
            detected_type == 'text/plain' and major == 'text'):
        raise ValueError('Wrong extension for %s: %s' % (detected_type, filename))
    with open(path, 'rb') as stream:
        attach = {'download': filename,
                  'type': mime_type,
                  'href': 'data:%s;base64,%s' % (mime_type, b64encode(stream.read()).decode('ascii'))}
        if mime_type in ('application/pdf', "application/zip", 'text/plain',
                         'text/tab-separated-values', 'text/html', 'application/msword', 'application/vnd.ms-excel',
                         'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'):
            # XXX Should use chardet to detect charset for text files here.
            return attach
        if major == 'image' and minor in ('png', 'jpeg', 'gif', 'tiff'):
            # XXX we should just convert our tiffs to pngs
            stream.seek(0, 0)
            im = Image.open(stream)
            im.verify()
            if im.format != minor.upper():
                msg = "Image file format %r does not match extension for %s"
                raise ValueError(msg % (im.format, filename))
            attach['width'], attach['height'] = im.size
            return attach
    raise ValueError("Unknown file type for %s" % filename)
Пример #3
0
 def do_sample_type_detect(datafile):
     """
         Checks the datafile type's.
     """
     mtype = magic.from_file(datafile, mime=True)
     stype = magic.from_file(datafile)
     return (mtype, stype)
Пример #4
0
	def processNewArchive(self, archPath, passwd=""):
		if magic.from_file(archPath, mime=True).decode("ascii") == 'application/zip':
			self.unprotectZip(archPath, passwd)
		elif magic.from_file(archPath, mime=True).decode("ascii") == 'application/x-rar':
			pass
		else:
			self.log.error("ArchCleaner called on file that isn't a rar or zip!")
			self.log.error("Called on file %s", archPath)
			self.log.error("Specified password '%s'", passwd)
			self.log.error("Inferred file type %s", magic.from_file(archPath, mime=True).decode("ascii"))
			raise NotAnArchive("ArchCleaner called on file that isn't a rar or zip!")

		# ArchPath will convert from rar to zip if needed, and returns the name of the resulting
		# file in either case
		try:
			archPath = self.cleanZip(archPath)
		except (zipfile.BadZipFile, rarfile.BadRarFile, DamagedArchive, NotAnArchive):
			self.log.error("Ignoring archive because it appears damaged.")
			return "damaged", archPath

		except:
			self.log.error("Unknown error??")
			for line in traceback.format_exc().split("\n"):
				self.log.error(line)
			return "damaged", archPath

		return "", archPath
Пример #5
0
    def info(self, name):
        """Get file info from current_dir. Returns list."""
        stat_data = os.stat(os.path.join(self.current_dir, name))
        try:
            mime_type = magic.from_file(os.path.join(self.current_dir, name), mime=True).decode()
        except magic.MagicException:
            mime_type = 'application/octet-stream'
        file_info = {
            'name': name,
            'path': self.current_dir,
            'mime': mime_type,
            'type': mime_type.replace('/', '-'),
            'size': stat_data.st_size,
            'mode': format(stat_data.st_mode & 0o777, 'o'),
            'owner_id': stat_data.st_uid,
            'owner_name': getpwuid(stat_data.st_uid).pw_name,
            'group_id': stat_data.st_gid,
            'group_name': getgrgid(stat_data.st_gid).gr_name,
        }
        if mime_type == 'inode/symlink':
            file_info['real_path'] = os.path.realpath(os.path.join(self.current_dir, name))
            file_info['real_mime'] = magic.from_file(file_info['real_path'], mime=True).decode()
            file_info['real_type'] = file_info['real_mime'].replace('/', '-')

        return file_info
Пример #6
0
    def test_from_file_str_and_bytes(self):
        filename = os.path.join(self.TESTDATA_DIR, "test.pdf")

        self.assertEqual('application/pdf',
                         magic.from_file(filename, mime=True))
        self.assertEqual('application/pdf',
                         magic.from_file(filename.encode('utf-8'), mime=True))
Пример #7
0
 def fileprop(self,pathname):
     """
         object for holding data about the file for storefiles process
     """
     (mode,ino,dev,nlink,uid,gid,size,atime,mtime,ctime)=os.stat(pathname)
     prop=dict(
         mode=mode,
         inode=ino,
         dev=dev,
         nlink=nlink,
         uid=uid,
         gid=gid,
         size=size,
         atime=datetime.datetime.fromtimestamp(atime),
         mtime=datetime.datetime.fromtimestamp(mtime),
         ctime=datetime.datetime.fromtimestamp(ctime)
               )
     prop['filename']=os.path.basename(pathname)
     prop['description']=magic.from_file(pathname)
     prop['mime']=magic.from_file(pathname,mime=True)
     prop['ftype']=None
     for compression, unused in self.compressions:
         if str(os.path.splitext(pathname)[1]) in compression:
             prop['ftype']='cmp'
             break
     if not prop['ftype']:
         if os.path.islink(pathname):
             prop['ftype']='lnk'
         elif os.path.isdir(pathname):
             prop['ftype']='dir'
         elif os.path.isfile(pathname):
             prop['ftype']='file'
         else:
             prop['ftype']='unknown'
     return prop
Пример #8
0
def test_various_word_formats():
    """
    The point here is that the various formats use different mime types.
    """
    path1 = 'tests/testdata/test-word-docx.docx'
    path2 = 'tests/testdata/test-word-97-2004.doc'
    assert magic.from_file(path1, mime=True) != magic.from_file(path2, mime=True)
Пример #9
0
  def _produce_one_sample(self):
    dirname = os.path.dirname(self.path)
    if not check_dir(dirname):
      raise ValueError("Invalid data path.")
    with open(self.path, 'r') as fid:
      flist = [l.strip() for l in fid.xreadlines()]

    if self.shuffle:
      random.shuffle(flist)

    input_files = [os.path.join(dirname, 'input', f) for f in flist]
    output_files = [os.path.join(dirname, 'output', f) for f in flist]

    self.nsamples = len(input_files)

    input_queue, output_queue = tf.train.slice_input_producer(
        [input_files, output_files], shuffle=self.shuffle,
        seed=0123, num_epochs=self.num_epochs)

    if '16-bit' in magic.from_file(input_files[0]):
      input_dtype = tf.uint16
      input_wl = 65535.0
    else:
      input_wl = 255.0
      input_dtype = tf.uint8
    if '16-bit' in magic.from_file(output_files[0]):
      output_dtype = tf.uint16
      output_wl = 65535.0
    else:
      output_wl = 255.0
      output_dtype = tf.uint8

    input_file = tf.read_file(input_queue)
    output_file = tf.read_file(output_queue)

    if os.path.splitext(input_files[0])[-1] == '.jpg': 
      im_input = tf.image.decode_jpeg(input_file, channels=3)
    else:
      im_input = tf.image.decode_png(input_file, dtype=input_dtype, channels=3)

    if os.path.splitext(output_files[0])[-1] == '.jpg': 
      im_output = tf.image.decode_jpeg(output_file, channels=3)
    else:
      im_output = tf.image.decode_png(output_file, dtype=output_dtype, channels=3)

    # normalize input/output
    sample = {}
    with tf.name_scope('normalize_images'):
      im_input = tf.to_float(im_input)/input_wl
      im_output = tf.to_float(im_output)/output_wl

    inout = tf.concat([im_input, im_output], 2)
    fullres, inout = self._augment_data(inout, 6)

    sample['lowres_input'] = inout[:, :, :3]
    sample['lowres_output'] = inout[:, :, 3:]
    sample['image_input'] = fullres[:, :, :3]
    sample['image_output'] = fullres[:, :, 3:]
    return sample
Пример #10
0
 def convert(self, known_input, method, expected_output):
     self.tmp_filename = getattr(self.helper, method)(os.path.join(self._in_fixtures, known_input))
     expected = self.expected_output_file(expected_output)
     # Check that the type of the file is exactly the same first of all:
     self.assertEqual(strip_kbps_from_file_info(magic.from_file(self.tmp_filename).decode()),
                       strip_kbps_from_file_info(magic.from_file(expected).decode()))
     # Now check that the files are identical:
     self.assertSameAudioLength(self.tmp_filename, expected)
Пример #11
0
 def __init__(self, filepath):
     self.depth = filepath.strip("/").count("/")
     self.is_file = os.path.isfile(filepath)
     self.is_dir = os.path.isdir(filepath)
     self.is_link = os.path.islink(filepath)
     self.size = os.path.getsize(filepath)
     self.meta = magic.from_file(filepath).lower()
     self.mime = magic.from_file(filepath, mime=True)
     self.filepath = filepath
Пример #12
0
def file_type(filename, ifname):
    """For use by irods iexecmd. Output an abstracted file type string.
 MIME isn't always good enough."""
    if magic.from_file(filename).decode().find('FITS image data') >= 0:
        print('FITS')
    elif magic.from_file(filename).decode().find('JPEG image data') >= 0:
        print('JPEG')
    elif magic.from_file(filename).decode().find('script text executable') >= 0:
        print('shell script')
    else:
        print('UNKNOWN')
Пример #13
0
def readInput(inputfile):
    xlsx_match = re.compile(".*Excel.*"); 
    xls_match = re.compile("Composite Document.*");
    if (magic.from_file(inputfile) == 'ASCII text, with CRLF line terminators'):
        copyfile(inputfile, 'inventory_maintenance_db.csv');
        parsecsv('inventory_maintenance_db.csv');
    elif ((xlsx_match.match(magic.from_file(inputfile)) != None) | (xls_match.match(magic.from_file(inputfile)) != None)):
        csv_conversion = xls2csv(inputfile);
        parsecsv(csv_conversion); 
    else:
        raise IncompatibleFileType;
        return -1;
Пример #14
0
def main():
    try:
        # https://github.com/ahupp/python-magic#usage
        print("{0} is \"{1}\"".format(sys.argv[1], magic.from_file(sys.argv[1])))
        print("MIME type: {0}".format(magic.from_file(sys.argv[1], mime=True)))
    except FileNotFoundError as e:
        print("[!] FileNotFoundError: {0}".format(e))
    except PermissionError as e:
        print("[!] PermissionError: {0}".format(e))
        print("[!] {0} may be a directory".format(sys.argv[1]))
    except Exception as e:
        print("[!] Exception: {0} ({1})".format(e, type(e)))
Пример #15
0
	def scanFile(self, name):
			filepath = self.queue.get()
			ftype = magic.from_file(filepath)
			mtype = magic.from_file(filepath, mime=True)
			btype = magic.from_buffer(open(filepath).read(1024))
			print nameThread + ": hashes:", filetomd5(filepath),
			print "\n", nameThread, ": file magic:", ftype, mtype, btype,
			print "\n", nameThread, ": modified:", mtime(filepath),
			print "\n", nameThread, ": metadata change:", ctime(filepath),
			print "\n", nameThread, ": access time:", atime(filepath),
			print "\n", nameThread, ": user id and group id:", owner(filepath), group(filepath),
			print "\n", nameThread, ": current directory is:", top
Пример #16
0
def upload_file(request, config):
    value_burn_after_read = request.form.getlist('burn')
    if value_burn_after_read:
        burn_after_read = True
    else:
        burn_after_read = False

    # Write tmp file on disk
    try:
        file_md5, tmp_full_filename = utils.write_tmpfile_to_disk(file=request.files['file'],
                                                                  dest_dir=config['TMP_FOLDER'])
    except IOError:
        return 'Server error, contact administrator\n'

    secure_name = secure_filename(request.files['file'].filename)

    with JsonDB(dbfile=config['FILE_LIST']) as db:

        # Just inform for debug purpose
        if db.lock_error:
            LOG.error("Unable to get lock during file upload %s" % file_md5)

        # Try to write file on disk and db. Return false if file is not writed
        storage_full_filename = os.path.join(config['UPLOAD_FOLDER'], file_md5)
        mime_type = magic.from_file(tmp_full_filename, mime=True)
        _type = magic.from_file(tmp_full_filename)
        succed_add_file = add_new_file(filename=secure_name,
                                       source=tmp_full_filename,
                                       dest=storage_full_filename,
                                       mime_type=mime_type,
                                       type=_type,
                                       db=db,
                                       md5=file_md5,
                                       burn_after_read=burn_after_read)

    if not succed_add_file:
        # In the case the file is not in db, we have 2 reason :
        #  * We was not able to have the lock and write the file in the db.
        #  * Or an error occure during the file processing
        # In any case just tell the user to try later
        try:
            os.remove(tmp_full_filename)
        except OSError as e:
            LOG.error("Can't remove tmp file: %s" % e)

        LOG.info('Unable lock the db and find the file %s in db during upload' % file_md5)
        return 'Unable to upload the file, try again later ...\n'

    LOG.info("[POST] Client %s has successfully uploaded: %s (%s)"
             % (request.remote_addr, storage_full_filename, file_md5))
    return "%s/%s\n" % (utils.build_base_url(env=request.environ),
                        file_md5)
def isDownloadDone():
    # Goes through all files in downloadDir
    print("Hi")
    for path, subdirs, files in os.walk(downloadDir):
        print("This")
        for filename in files:
            print("is")
            # Checks if there a file with .part extension
            # if(filename.endswith(".part")):
            print(magic.from_file(downloadDir + filename, mime=True))
            if magic.from_file(downloadDir + filename, mime=True) != "audio/mpeg":
                return False
    return True
def allAMRAndSQLiteFiles(targetDir):
  amrFiles = []
  sqliteFiles = []
  files = glob.glob(targetDir + '/*')
  print('Inspecting {} files. This may take a while.'.format(len(files)))
  count = 0
  for f in files:
    if magic.from_file(f) == 'Adaptive Multi-Rate Codec (GSM telephony)':
      amrFiles.append(f)
    elif magic.from_file(f).startswith('SQLite 3.x database'):
      sqliteFiles.append(f)
    count += 1
    if count % 500 == 0:
      print('{} files remaining'.format(len(files) - count))
  return (amrFiles, sqliteFiles)
Пример #19
0
def unpickle(filename):
    if not os.path.exists(filename):
        raise UnpickleError("Path '%s' does not exist." % filename)
    if magic.from_file(filename).startswith('gzip'):
        fo = gzip.open(filename, 'rb')
        dict = cPickle.load(fo)
    elif magic.from_file(filename).startswith('Zip'):
        fo = zipfile.ZipFile(filename, 'r', zipfile.ZIP_DEFLATED)
        dict = cPickle.loads(fo.read('data'))
    else:
        fo = open(filename, 'rb')
        dict = cPickle.load(fo)
    
    fo.close()
    return dict
Пример #20
0
    def prepare_sample(self):

        # Get mimetype if not supplied
        if not self.mimetype:
            self.mimetype = magic.from_file(self.path, mime=True)
            self.mimetype_str = magic.from_file(self.path)

        # Get file size
        self.size = os.stat(self.path).st_size

        # Give it a nice uuid
        self.uuid = str(uuid.uuid1())

        # Let it process
        self.process = True
Пример #21
0
 def update_stats(self):
     """Check if file exists on disk and if so, update its metadata."""
     full_path = self.get_full_path()
     try:
         self.modified = timezone.make_aware(datetime.fromtimestamp(path.getmtime(full_path)))
         self.size = path.getsize(full_path)
         self.hash = md5sum(full_path)
         self.type = magic.from_file(full_path)
         self.mime = magic.from_file(full_path, mime=True)
     except FileNotFoundError:
         self.online = False
     else:
         self.online = True
     self.save()
     return self.online
Пример #22
0
Файл: mime.py Проект: dmdm/Stoma
def guess_mime_type(fn,
        magic_inst=magic.Magic(mime=True, mime_encoding=True, keep_going=True)):
    """
    Guesses mime-type from filename.

    Uses Python's lib ``mimetypes`` first, if no type could be determined, falls
    back to ``python-magic``.

    Returned encoding might be None.

    :param fn: Filename.
    :param magic_inst: Instance of :class:`magic.Magic`. Should be created with
        mime=True, mime_encoding=True, keep_going=True.
    :return: Tuple(mime_type, encoding).
    """
    # Try Python's native lib first
    mt, enc = mimetypes.guess_type(fn)
    # It may not find all types, e.g. it returns None for 'text/plain', so
    # fallback on python-magic.
    if not mt:
        if magic_inst:
            mt = magic_inst.from_file(fn).decode('ASCII')
        else:
            mt = magic.from_file(fn).decode('ASCII')
    if not enc:
        enc = None
    # In case magic returned several types on separate lines
    mt = mt.split(r'\012')[0]
    return mt, enc
Пример #23
0
 def getFiletype(self, path):
     if MAGIC_AVAILABLE == MAGIC_PYTHON_FILE:
         ms = self._get_file_magic()
         ftype = ms.file(path)
     elif MAGIC_AVAILABLE == MAGIC_PYTHON_MAGIC:
         ftype = magic.from_file(path, mime=True)
     return ftype
Пример #24
0
def studentGetFiles(pid, uid, subnum):
  try:
    problem = Problem.objects.get(id=pid)
    c, a = problem.getParents()
    if not (c in current_user.courseStudent or c in current_user.gradingCourses()):
      abort(403)

    u = User.objects.get(id=uid)
    s = problem.getSubmission(u, subnum)

    content = request.get_json()

    filepath = getSubmissionPath(c, a, problem, u, subnum)
    filepath = os.path.join(filepath, content['filename'])

    import magic

    fileType = magic.from_file(filepath, mime=True)
    fileType = fileType.split('/')

    if fileType[0] == 'text':
      try:
        f = codecs.open(filepath, encoding='utf-8', errors='ignore')
        content = f.read()
        return jsonify(majorType=fileType[0], minorType=fileType[1], content=content)
      except Exception as e:
        return jsonify(majorType=fileType[0], minorType=fileType[1], content=str(e))
        pass
      finally:
        f.close()
    else:
      return jsonify(majorType=fileType[0], minorType=fileType[1],\
       url=url_for('serveFiles', pid=pid, uid=uid, subnum=subnum, filename=content['filename']))
  except Problem.DoesNotExist:
    abort(404)
Пример #25
0
def parse_dl(fname):
    try:
        filetype=magic.from_file(fname)
        filecontent=open(fname,'rb').read()
        (mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime) = os.stat(fname)
        dl_file={'filename':fname, 'mimetype':filetype,'size':size,'atime':time.strftime("%a, %d %b %Y %X GMT", time.gmtime(atime)),'ctime':time.strftime("%a, %d %b %Y %X GMT", time.gmtime(ctime)),'mtime':time.strftime("%a, %d %b %Y %X GMT", time.gmtime(mtime))}
        if filetype=='HTML document, ASCII text, with CRLF line terminators' or filetype=='XML document text':
            dl_file['jar_href']=re.findall(r'\<jar\ href\=\"(.*?)\"',filecontent)[0]
            main_class_arr=re.findall('\<applet\-desc.*main\-class\=\"(.*?)\"',filecontent)
            if main_class_arr:
                dl_file['main_class']=main_class_arr[0]
            dl_file['parameters']={}
            for param,value in re.findall(r'<param name="(.*?)" value="(.*?)"',filecontent):
                dl_file['parameters'][param]=value
            
        m = hashlib.md5()
        m.update(filecontent)
        dl_file['md5']=m.digest().encode('hex')
    
        # computing sha1
        s = hashlib.sha1()               
        s.update(filecontent)
        dl_file['sha1']=s.digest().encode('hex')
    
        # computing ssdeep
        dl_file['ssdeep']=pydeep.hash_buf(filecontent)
        return dl_file
    except:
        print "Unable to stat the downloaded file"
	def typeFilter(self, path): 
		"""
		Pass in the pdfpath here, returns the uploaded file's MIME type
		"""	
		fileTypeString = magic.from_file(path, mime=True) # Stores the MIME string that describes the file type
		web.debug(fileTypeString)
		return fileTypeString
Пример #27
0
    def get_type(self):
        """Get MIME file type.
        @return: file type.
        """
        file_type = None
        if HAVE_MAGIC:
            try:
                ms = magic.open(magic.MAGIC_NONE)
                ms.load()
                file_type = ms.file(self.file_path)
            except:
                try:
                    file_type = magic.from_file(self.file_path)
                except:
                    pass
            finally:
                try:
                    ms.close()
                except:
                    pass

        if file_type is None:
            try:
                p = subprocess.Popen(["file", "-b", self.file_path],
                                     stdout=subprocess.PIPE)
                file_type = p.stdout.read().strip()
            except:
                pass

        return file_type
Пример #28
0
    def guess_filetype(self, filepath):
        # If we have python-magic, use it to determine file type
        typ = None
        if have_magic:
            try:
                # it seems there are conflicting versions of a 'magic'
                # module for python floating around...*sigh*
                if hasattr(magic, 'from_file'):
                    typ = magic.from_file(filepath, mime=True)

                elif magic_tester is not None:
                    descrip = magic_tester.file(filepath)
                    if descrip.startswith("FITS image data"):
                        return ('image', 'fits')

            except Exception as e:
                self.logger.warn("python-magic error: %s; falling back to 'mimetypes'" % (str(e)))

        if typ is None:
            try:
                typ, enc = mimetypes.guess_type(filepath)
            except Exception as e:
                self.logger.warn("mimetypes error: %s; can't determine file type" % (str(e)))

        if typ:
            typ, subtyp = typ.split('/')
            self.logger.debug("MIME type is %s/%s" % (typ, subtyp))
            return (typ, subtyp)

        raise ControlError("Can't determine file type of '%s'" % (filepath))
Пример #29
0
 def _get_source_filetype(self):
     """
     Set the source filetype. First it tries to use magic and
     if import error it will just use the extension
     """
     if not hasattr(self, '_source_filetype'):
         if not isinstance(self.source, basestring):
             # Assuming a file-like object - we won't know it's type.
             return None
         try:
             import magic
         except ImportError:
             self._source_filetype = splitext(self.source)[1].lower().\
                replace('.', '').replace('jpeg', 'jpg')
         else:
             if hasattr(magic, 'from_file'):
                 # Adam Hupp's ctypes-based magic library
                 ftype = magic.from_file(self.source)
             else:
                 # Brett Funderburg's older python magic bindings
                 m = magic.open(magic.MAGIC_NONE)
                 m.load()
                 ftype = m.file(self.source)
             if ftype.find('Microsoft Office Document') != -1:
                 self._source_filetype = 'doc'
             elif ftype.find('PDF document') != -1:
                 self._source_filetype = 'pdf'
             elif ftype.find('JPEG') != -1:
                 self._source_filetype = 'jpg'
             else:
                 self._source_filetype = ftype
     return self._source_filetype
Пример #30
0
def load_binary(path):
  magic_type = magic.from_file(path)
  if 'ELF' in magic_type:
    bv_type = binja.BinaryViewType['ELF']
  elif 'PE32' in magic_type:
    bv_type = binja.BinaryViewType['PE']
  elif 'Mach-O' in magic_type:
    bv_type = binja.BinaryViewType['Mach-O']
  else:
    bv_type = binja.BinaryViewType['Raw']

    # Can't do anything with Raw type
    log.fatal('Unknown binary type: "{}", exiting'.format(magic_type))
    exit(1)

  log.debug('Loading binary in binja...')
  bv = bv_type.open(path)
  bv.update_analysis_and_wait()

  # NOTE: at the moment binja will not load a binary
  # that doesn't have an entry point
  if len(bv) == 0:
    log.error('Binary could not be loaded in binja, is it linked?')
    exit(1)

  return bv
Пример #31
0
def is_text_file(file: str) -> bool:
    _logger.debug("Checking provided file %s", file)
    mimetype = from_file(str(file), mime=True)

    return file.is_file() and mimetype[:4] == "text"
Пример #32
0
def define_file_type(filepath):
    print(filepath)
    try:
        print(magic.from_file(filepath, mime=True))
    except Exception as inst:
        print(inst)
Пример #33
0
def is_elf_file(path):
    """Check whether 'path' is an ELF file."""
    return is_regular_file(path) and 'ELF' in magic.from_file(path)
Пример #34
0
counter = 1000000
for meme in data:
    filename = 'reddit/' + str(counter)
    success = False
    for i in range(3):
        try:
            urllib.urlretrieve(meme['meme_url'], filename)
        except Exception as e:
            print e, meme['meme_url']
            continue
        success = True
        break
    if not success:
        continue
    mime_type = magic.from_file(filename, mime=True)
    extension = mimetypes.guess_extension(mime_type)
    if extension in ['.jpeg', '.jpg', '.jpe', '.png']:
        if extension == '.jpe':
            extension = '.jpg'
        os.rename(filename, filename + extension)

        captions.append({'image_id': counter, 'caption': meme['title']})
        filenames.append({
            'image_id': counter,
            'file_name': filename + extension
        })
        counter += 1
    else:
        os.remove(filename)
Пример #35
0
    def getFileInfo(self, filepath):
        # whitelist = ['Apple Desktop Services Store','POSIX tar archive (GNU)','POSIX','GNU','UBI image']
        file_type = magic.from_file(filepath)
        mime_type = magic.from_file(filepath, mime=True)

        return file_type, mime_type
Пример #36
0
	def malware(self, msg, sample):

		filepath = "/tmp/mal"
    
		draft_dir = "/tmp/tmp_malw"

		if not os.path.exists(draft_dir):
			os.makedirs(draft_dir)

		#The plugins folder is call addins, because plugins if already use and make an error in errbot
		if not os.path.exists( os.path.dirname(__file__) + '/addins'):
			os.makedirs(os.path.dirname(__file__) + '/addins')

		#If the file come from Internet
		if "http" in sample :

			self.download(msg, filepath, sample)

		#If the file must be uploaded
		else:

			filepath = sample

			#TODO add callback_stream to force bot to download the sample

			#self.send(msg.frm, "Send file: " + sample)
			#self.send_stream_request(msg.frm, open(filepath, 'rb'), name=sample, size=path.getsize(filepath), stream_type='document')


		#Check the filetype
		filetype = magic.from_file(filepath, mime=True)

		#In case of the file in a text file or a script
		if (filetype == "text/x-shellscript" or filetype == "text/plain"):
			f = open(filepath, "r")
			toto=f.read()

			urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', toto)
    
			if urls != "":
                
				for url in urls:
                    
					clean_url = re.sub('[!@#$;&|]', '', url)
                    
					self.printMessage(msg, "Find payload: " + clean_url)

					maliciousFile = draft_dir + "/" + os.path.basename(clean_url)

					r = requests.get(clean_url, stream=True)

					if r.status_code == 200:
						with open(maliciousFile, 'wb') as f:
							for chunk in r.iter_content(1024):
								f.write(chunk)

					self.check_malware(msg, maliciousFile)

			else:
				self.printMessage(msg, "Can't find url in the file")

		#Check if the file is a Unix exectuable or a windows PE
		elif (filetype == "application/x-executable" or filetype == "application/x-dosexec"):
			self.check_malware(msg, filepath)
Пример #37
0
def check_mimetype(row):
    if os.path.isfile(str(row['file'])):
        row['mimetype'] = magic.from_file(row['file'], mime=True)
        row['size'] = os.stat(row['file']).st_size
    return row
Пример #38
0
def get_mime(path):
    """ Fetches the mine type of a file """
    return magic.from_file(path, mime=True)
def handle_image(file_name, uploaded_file_id, file_hash):
    """
		This function handles the uploaded files that are detected as images.
		Input: Uploaded Image file
		Output: 
	"""
    import magic
    from PIL import Image
    import os
    import json
    import mysql.connector
    from database import login_info
    import config
    import shutil
    import compareCompatibleFiles
    import convert_to_tiles
    import time
    execution_start_time = time.time()
    log_dictionary = {}
    log_dictionary['all_steps'] = "Image Detected\n"
    file_type = magic.from_file(file_name, mime=True)
    if "png" not in file_type:
        conversion_start_time = time.time()
        if not os.path.exists(config.store_converted_images):
            os.makedirs(config.store_converted_images)
        im = Image.open(file_name)
        new_filename = file_name.replace(config.store_main_uploads,
                                         config.store_converted_images)
        new_filename = new_filename.split(".")[0] + ".png"
        im.save(new_filename)
        file_name = new_filename
        conversion_end_time = time.time()
        total_time = str(
            round(conversion_end_time - conversion_start_time,
                  config.time_digit_precision))

        print("[" + str(uploaded_file_id) +
              "] Converting Image to PNG. Time: " + total_time)
        log_dictionary[
            'all_steps'] += "Converting Image to PNG. Time: %s\n" % (
                total_time)
        config.write_to_file(
            config.log_file_name, "[" + str(uploaded_file_id) +
            "] Converting Image to PNG. Time: " + total_time)

    db = mysql.connector.Connect(**login_info)
    cursor = db.cursor()

    img_info = {}

    image = Image.open(file_name)
    source_width, source_height = image.size

    img_info['dimensions'] = {
        "width": str(source_width),
        "height": str(source_height)
    }
    sql_query = 'UPDATE upload_file SET doc_info = "' + (str(
        json.dumps(img_info)
    ).replace('"', '\\"')).replace(
        "'", "\\'"
    ) + '", processed="1" WHERE processed="0" AND hash LIKE "' + file_hash + '";'
    cursor.execute(sql_query)
    db.commit()
    cursor.close()
    print("[" + str(uploaded_file_id) +
          "] Added Image details to 'upload_file' table")
    log_dictionary[
        'all_steps'] += "Added Image details to 'upload_file' table\n"
    config.write_to_file(
        config.log_file_name, "[" + str(uploaded_file_id) +
        "] Added Image details to 'upload_file' table")

    thumbnail_generation_start_time = time.time()
    newHeight = float(config.fixed_thumbnail_height)
    factor = newHeight / source_height
    newWidth = source_width * factor
    image.thumbnail((newWidth, newHeight), Image.ANTIALIAS)
    if not os.path.exists(config.store_thumbnails):
        os.makedirs(config.store_thumbnails)
    if config.store_converted_images in file_name:
        newfile_location = config.store_thumbnails + file_name.replace(
            config.store_converted_images, "") + ".png"
    else:
        newfile_location = config.store_thumbnails + file_name.replace(
            config.store_main_uploads, "") + ".png"
    image.save(newfile_location)

    thumbnail_blob = "NULL"
    generatedBy = "PNG"
    orientation = ""

    if config.add_thumb_blob_into_db == 1:
        with open(newfile_location, "rb") as img:
            thumbnail_blob = img.read()

    if source_width > source_height:
        orientation = "Landscape"
    else:
        orientation = "Portrait"

    db = mysql.connector.Connect(**login_info)
    cursor = db.cursor()
    cursor.execute(
        "INSERT INTO `thumbnail` VALUES (NULL, %s, %s, %s, %s, %s, %s)", (
            newfile_location,
            str(source_width),
            str(source_height),
            orientation,
            generatedBy,
            thumbnail_blob,
        ))
    db.commit()
    cursor.close()
    thumbnail_generation_end_time = time.time()
    total_time = str(
        round(thumbnail_generation_end_time - thumbnail_generation_start_time,
              config.time_digit_precision))
    print(
        "[" + str(uploaded_file_id) +
        "] Thumbnail Generation Complete. Added Details to 'thumbnail' table. Time: "
        + total_time)
    log_dictionary[
        'all_steps'] += "Thumbnail Generation Complete. Added Details to 'thumbnail' table. Time: " + total_time + "\n"
    config.write_to_file(
        config.log_file_name, "[" + str(uploaded_file_id) +
        "] Thumbnail Generation Complete. Added Details to 'thumbnail' table. Time: "
        + total_time)

    comparison_start_time = time.time()
    checkVariable, similarThumbnailID = compareCompatibleFiles.compare(
        newfile_location, uploaded_file_id, "NULL", source_width,
        source_height)
    comparison_end_time = time.time()
    total_time = str(
        round(comparison_end_time - comparison_start_time,
              config.time_digit_precision))
    print(
        "[" + str(uploaded_file_id) +
        '] Thumbnails Compared. Comparison Details added to \'thumb_comparison\' table. Time: '
        + total_time)
    log_dictionary[
        'all_steps'] += 'Thumbnails Compared. Comparison Details added to \'thumb_comparison\' table. Time: ' + total_time + "\n"
    config.write_to_file(
        config.log_file_name, "[" + str(uploaded_file_id) +
        '] Thumbnails Compared. Comparison Details added to \'thumb_comparison\' table. Time: '
        + total_time)

    if checkVariable == "True":
        high_res_start_time = time.time()
        png_blob = ""
        if config.store_converted_images in file_name:
            shutil.copy(
                file_name,
                file_name.replace(config.store_converted_images,
                                  config.store_high_res_images))
        else:
            shutil.copy(
                file_name,
                file_name.replace(config.store_main_uploads,
                                  config.store_high_res_images))
        if config.add_high_res_png_into_db == 1:
            with open(file_name, "rb") as img:
                png_blob = img.read()
            # print("[" + str(uploaded_file_id) + "," + str(page_number) + "] High Resolution PNG Generated. Details Added to 'image' table. Time: " + total_time)
        db = mysql.connector.Connect(**login_info)
        cursor = db.cursor()
        sql_query = "SELECT * FROM `thumbnail` WHERE dir LIKE '" + newfile_location + "'"
        cursor.execute(sql_query)
        thumbnail_id = 0
        for row in cursor:
            thumbnail_id = row[0]
            break
        cursor.close()

        db = mysql.connector.Connect(**login_info)
        cursor = db.cursor()
        cursor.execute(
            "INSERT INTO `image` VALUES (NULL, NULL, NULL, %s, %s, %s, %s, %s, %s, %s, %s);",
            (
                uploaded_file_id,
                str(thumbnail_id),
                "NULL",
                png_blob,
                "NULL",
                str(source_width),
                str(source_height),
                "",
            ))
        db.commit()
        high_res_end_time = time.time()
        total_time = str(
            round(high_res_end_time - high_res_start_time,
                  config.time_digit_precision))
        print(
            "[" + str(uploaded_file_id) +
            "] High Resolution PNG Generated. Details Added to 'image' table. Time: "
            + total_time)
        log_dictionary[
            'all_steps'] += "High Resolution PNG Generated. Details Added to 'image' table. Time: " + total_time + "\n"
        config.write_to_file(
            config.log_file_name, "[" + str(uploaded_file_id) +
            "] High Resolution PNG Generated. Details Added to 'image' table. Time: "
            + total_time)

        db = mysql.connector.Connect(**login_info)
        cursor = db.cursor()
        sql_query = """SELECT * FROM `image` WHERE upload_file_id LIKE '%s' AND page_number LIKE '%s'""" % (
            uploaded_file_id, "")
        cursor.execute(sql_query)
        current_image_id = 0
        for row in cursor:
            current_image_id = row[0]
            break
        cursor.close()
        tiles_start_time = time.time()
        log_dictionary = convert_to_tiles.generate_tiles(
            file_name, current_image_id, log_dictionary, "NULL",
            uploaded_file_id, tiles_start_time)
    else:
        # print("[" + str(uploaded_file_id) + '] Thumbnails Compared. Comparison Details added to \'thumb_comparison\' table.')
        log_dictionary[
            'all_steps'] += 'Thumbnail matches with Thumbnail ID: ' + similarThumbnailID + '\n'
        # Dont convert, abort process
    if config.keep_converted_images == 0 and config.store_converted_images in file_name:
        os.remove(file_name)
    log_dictionary['total_time'] = str(time.time() - execution_start_time)
    sql_query = "UPDATE upload_file SET log = '" + (str(
        json.dumps(log_dictionary)).replace('"', '\\"')).replace(
            "'",
            "\\'") + "' WHERE hash = '" + file_hash + "' AND processed = '1'"
    db = mysql.connector.Connect(**login_info)
    cursor = db.cursor()
    cursor.execute(sql_query)
    db.commit()
    cursor.close()
Пример #40
0
    def __init__(self, path):
        """
        Read a firmware file and store its data ready for device programming.

        This class will try to guess the file type if python-magic is available.

        If python-magic indicates a plain text file, and if IntelHex is
        available, then the file will be treated as one of Intel HEX format.

        In all other cases, the file will be treated as a raw binary file.

        In both cases, the file's contents are stored in bytes for subsequent
        usage to program a device or to perform a crc check.

        Parameters:
            path -- A str with the path to the firmware file.

        Attributes:
            bytes: A bytearray with firmware contents ready to send to the device
        """
        self._crc32 = None
        firmware_is_hex = False

        if have_magic:
            file_type = bytearray(magic.from_file(path, True))

            #from_file() returns bytes with PY3, str with PY2. This comparison
            #will be True in both cases"""
            if file_type == b'text/plain':
                firmware_is_hex = True
                mdebug(5, "Firmware file: Intel Hex")
            elif file_type == b'application/octet-stream':
                mdebug(5, "Firmware file: Raw Binary")
            else:
                error_str = "Could not determine firmware type. Magic " \
                            "indicates '%s'" % (file_type)
                raise CmdException(error_str)
        else:
            if os.path.splitext(path)[1][1:] in self.HEX_FILE_EXTENSIONS:
                firmware_is_hex = True
                mdebug(5, "Your firmware looks like an Intel Hex file")
            else:
                mdebug(5, "Cannot auto-detect firmware filetype: Assuming .bin")

            mdebug(10, "For more solid firmware type auto-detection, install "
                       "python-magic.")
            mdebug(10, "Please see the readme for more details.")

        if firmware_is_hex:
            if have_hex_support:
                self.bytes = bytearray(IntelHex(path).tobinarray())
                return
            else:
                error_str = "Firmware is Intel Hex, but the IntelHex library " \
                            "could not be imported.\n" \
                            "Install IntelHex in site-packages or program " \
                            "your device with a raw binary (.bin) file.\n" \
                            "Please see the readme for more details."
                raise CmdException(error_str)

        with open(path, 'rb') as f:
            self.bytes = bytearray(f.read())
Пример #41
0
def from_file(f, mime=False):
    try:
        return magic.from_file(f, mime)
    except magic.MagicException as e:
        return e.message
Пример #42
0
    return requests_session


DUMPS_CACHE_DIR = 'cache'
RESULTS_DIR = 'results'
CHUNK_SIZE = 4194304  # 4MiB

# There are two Python modules with the name `magic`, luckily both do
# the same thing.
# pylint: disable=no-member
if 'open' in dir(magic):
    _mime = magic.open(magic.MAGIC_MIME)
    _mime.load()
    mimetype = _mime.file
else:
    mimetype = lambda filename: magic.from_file(filename, mime=True)  # NOQA
# pylint: enable=no-member


def isplaintext(filename):
    '''
    Returns True if `filename` has mimetype == 'text/plain'.
    '''
    if os.path.islink(filename):
        filename = os.readlink(filename)
    return mimetype(filename).split(';')[0] == 'text/plain'


def smart_open(filename):
    '''
    Returns an open file object if `filename` is plain text, else assumes
Пример #43
0
def main(args=None):
    args = parser.parse_args(args)
    try:
        print(magic.from_file(args.filename, mime=args.mime))
    except FileNotFoundError as e:
        return e
Пример #44
0
def all_files():
    ls = run(['git', 'ls-files'], universal_newlines=True)
    for filename in ls.splitlines():
        if magic.from_file(filename, mime=True).split('/')[0] == 'text':
            yield filename
Пример #45
0
def getFiletype(path):
    return magic.from_file(path)
Пример #46
0
def RunMontag():
    devnull = open(os.devnull, 'w')

    parser = argparse.ArgumentParser(
        description='e-book profanity scrubber',
        add_help=False,
        usage=f'{os.path.basename(__file__)} [options]')
    requiredNamed = parser.add_argument_group('required arguments')
    requiredNamed.add_argument('-i',
                               '--input',
                               required=True,
                               dest='input',
                               metavar='<STR>',
                               type=str,
                               default='',
                               help='Input file')
    requiredNamed.add_argument('-o',
                               '--output',
                               required=True,
                               dest='output',
                               metavar='<STR>',
                               type=str,
                               default='',
                               help='Output file')
    requiredNamed.add_argument(
        '-w',
        '--word-list',
        dest='swears',
        metavar='<STR>',
        type=str,
        default=os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             'swears.txt'),
        help='Profanity list text file (default: swears.txt)',
    )
    requiredNamed.add_argument(
        '-e',
        '--encoding',
        dest='encoding',
        metavar='<STR>',
        type=str,
        default='utf-8',
        help='Text encoding (default: utf-8)',
    )
    try:
        parser.error = parser.exit
        args = parser.parse_args()
    except SystemExit:
        parser.print_help()
        exit(2)

    # initialize the set of profanity
    swears = set(
        map(lambda x: x.lower(), [
            line.strip()
            for line in open(args.swears, 'r', encoding=args.encoding)
        ]))

    # determine the type of the ebook
    bookMagic = magic.from_file(args.input, mime=True)

    eprint(f'Processing "{args.input}" of type "{"".join(bookMagic)}"')

    with tempfile.TemporaryDirectory() as tmpDirName:
        metadataFileSpec = os.path.join(tmpDirName, 'metadata.opf')

        # save off the metadata to be restored after conversion
        eprint("Extracting metadata...")
        metadataExitCode = subprocess.call(
            ["ebook-meta", args.input, "--to-opf=" + metadataFileSpec],
            stdout=devnull,
            stderr=devnull)
        if metadataExitCode != 0:
            raise subprocess.CalledProcessError(
                metadataExitCode,
                f"ebook-meta {args.input} --to-opf={metadataFileSpec}")

        # convert the book from whatever format it is into epub for conversion
        if "epub" in bookMagic.lower():
            epubFileSpec = args.input
            wasEpub = True
        else:
            wasEpub = False
            epubFileSpec = os.path.join(tmpDirName, 'ebook.epub')
            eprint("Converting to EPUB...")
            toEpubExitCode = subprocess.call(
                ["ebook-convert", args.input, epubFileSpec],
                stdout=devnull,
                stderr=devnull)
            if toEpubExitCode != 0:
                raise subprocess.CalledProcessError(
                    toEpubExitCode,
                    f"ebook-convert {args.input} {epubFileSpec}")

        # todo: somehow links/TOCs tend to get messed up

        eprint("Processing book contents...")
        book = epub.read_epub(epubFileSpec)
        newBook = epub.EpubBook()
        newBook.spine = ['nav']
        documentNumber = 0
        for item in book.get_items():
            if item.get_type() == ebooklib.ITEM_DOCUMENT:
                documentNumber += 1
                cleanTokens = []
                for tokenNeedsCensoring, token in tagTokenizer(
                        item.get_content().decode(args.encoding)):
                    if tokenNeedsCensoring and (token.lower() in swears):
                        # print(f"censoring:→{token}←")
                        cleanTokens.append("*" * len(token))
                    else:
                        # print(f"including:→{token}←")
                        cleanTokens.append(token)
                    # if (len(cleanTokens) % 100 == 0):
                    #   eprint(f"Processed {len(cleanTokens)} tokens from section {documentNumber}...")
                item.set_content(''.join(cleanTokens).encode(args.encoding))
                newBook.spine.append(item)
                newBook.add_item(item)
            else:
                newBook.add_item(item)
        book.add_item(epub.EpubNcx())
        book.add_item(epub.EpubNav())

        # write epub (either final or intermediate)
        eprint("Generating output...")
        if args.output.lower().endswith('.epub'):
            epub.write_epub(args.output, newBook)
        else:
            cleanEpubFileSpec = os.path.join(tmpDirName, 'ebook_cleaned.epub')
            epub.write_epub(cleanEpubFileSpec, newBook)
            eprint("Converting...")
            fromEpubExitCode = subprocess.call(
                ["ebook-convert", cleanEpubFileSpec, args.output],
                stdout=devnull,
                stderr=devnull)
            if fromEpubExitCode != 0:
                raise subprocess.CalledProcessError(
                    toEpubExitCode,
                    f"ebook-convert {cleanEpubFileSpec} {args.output}")

        # restore metadata
        eprint("Restoring metadata...")
        metadataExitCode = subprocess.call(
            ["ebook-meta", args.output, "--from-opf=" + metadataFileSpec],
            stdout=devnull,
            stderr=devnull)
        if metadataExitCode != 0:
            raise subprocess.CalledProcessError(
                metadataExitCode,
                f"ebook-meta {args.output} --from-opf={metadataFileSpec}")
import os
import zipfile
import magic
import subprocess
import shutil

destination = '/home/mark/File-Entropy-master/malware'

print(destination)

for filename in os.listdir(destination):
    print(filename)
    file = os.path.join(destination, filename)
    magictype = magic.from_file(file)

    if any(x in magictype
           for x in ['upx', 'packed', 'compressed', 'extracting']):
        packedname = os.path.join(destination, 'packed-' + filename)
        os.rename(file, packedname)
        print('file was packed:', packedname)
        if 'upx' in magictype:
            print('found upx, attempting to unpack')
            # if its upx we can try to unpack it
            unpackedname = os.path.join(destination,
                                        'upx-unpacked-' + filename)
            shutil.copyfile(packedname, unpackedname)
            try:
                subprocess.checkoutput(['upx', '-d', unpackedname])
                print('unpacked as :', unpackedname)
            except:
                # bad practice but I don't care why upx failed
Пример #48
0
        print compile_cmd
        #exit(-1)
        process = subprocess.Popen(compile_cmd,
                                   shell=True,
                                   stdout=subprocess.PIPE,
                                   stderr=subprocess.PIPE)

        out, err = process.communicate()
        errcode = process.returncode
        if errcode < 0:
            print '[ERROR]', errcode, err
            continue

        path = os.path.join(path, 'build-gcc')
        print path
        #exit(-1)

        for root, _, files in os.walk(path):
            for fname in files:
                fpath = os.path.join(root, fname)
                m = magic.from_file(fpath).lower()
                #print fpath, m
                if 'elf ' not in m:
                    continue
                print fpath, fpath.split('build-gcc/')[1]
                upload(fpath, fpath.split('build-gcc/')[1], name)

        os.remove(os.path.join(working_dir, uri))
        shutil.rmtree(os.path.join(working_dir, 'imagemagick-' + name))
        break
Пример #49
0
def get_mime_for_file(filepath):
    return magic.from_file(filepath, mime=True)
Пример #50
0
    def run(self):
        """ Main controller for Video2X

        This function controls the flow of video conversion
        and handles all necessary functions.
        """

        # external stop signal when called in a thread
        self.running = True

        # define process pool to contain processes
        self.process_pool = []

        # load driver modules
        DriverWrapperMain = getattr(importlib.import_module(f'wrappers.{self.driver}'), 'WrapperMain')
        self.driver_object = DriverWrapperMain(self.driver_settings)

        # load options from upscaler class into driver settings
        self.driver_object.load_configurations(self)

        # initialize FFmpeg object
        self.ffmpeg_object = Ffmpeg(self.ffmpeg_settings, extracted_frame_format=self.extracted_frame_format)

        # define processing queue
        self.processing_queue = queue.Queue()

        Avalon.info(_('Loading files into processing queue'))
        Avalon.debug_info(_('Input path(s): {}').format(self.input))

        # make output directory if the input is a list or a directory
        if isinstance(self.input, list) or self.input.is_dir():
            self.output.mkdir(parents=True, exist_ok=True)

        input_files = []

        # if input is single directory
        # put it in a list for compability with the following code
        if not isinstance(self.input, list):
            input_paths = [self.input]
        else:
            input_paths = self.input

        # flatten directories into file paths
        for input_path in input_paths:

            # if the input path is a single file
            # add the file's path object to input_files
            if input_path.is_file():
                input_files.append(input_path)

            # if the input path is a directory
            # add all files under the directory into the input_files (non-recursive)
            elif input_path.is_dir():
                input_files.extend([f for f in input_path.iterdir() if f.is_file()])

        output_paths = []

        for input_path in input_files:

            # get file type
            # try python-magic if it's available
            try:
                input_file_mime_type = magic.from_file(str(input_path.absolute()), mime=True)
                input_file_type = input_file_mime_type.split('/')[0]
                input_file_subtype = input_file_mime_type.split('/')[1]
            except Exception:
                input_file_mime_type = input_file_type = input_file_subtype = ''

            # if python-magic doesn't determine the file to be an image/video file
            # fall back to mimetypes to guess the file type based on the extension
            if input_file_type not in ['image', 'video']:
                # in case python-magic fails to detect file type
                # try guessing file mime type with mimetypes
                input_file_mime_type = mimetypes.guess_type(input_path.name)[0]
                input_file_type = input_file_mime_type.split('/')[0]
                input_file_subtype = input_file_mime_type.split('/')[1]

            Avalon.debug_info(_('File MIME type: {}').format(input_file_mime_type))

            # set default output file suffixes
            # if image type is GIF, default output suffix is also .gif
            if input_file_mime_type == 'image/gif':
                output_path = self.output / self.output_file_name_format_string.format(original_file_name=input_path.stem, extension='.gif')

            elif input_file_type == 'image':
                output_path = self.output / self.output_file_name_format_string.format(original_file_name=input_path.stem, extension=self.image_output_extension)

            elif input_file_type == 'video':
                output_path = self.output / self.output_file_name_format_string.format(original_file_name=input_path.stem, extension=self.video_output_extension)

            # if file is none of: image, image/gif, video
            # skip to the next task
            else:
                Avalon.error(_('File {} ({}) neither an image nor a video').format(input_path, input_file_mime_type))
                Avalon.warning(_('Skipping this file'))
                continue

            # if there is only one input file
            # do not modify output file suffix
            if isinstance(self.input, pathlib.Path) and self.input.is_file():
                output_path = self.output

            output_path_id = 0
            while str(output_path) in output_paths:
                output_path = output_path.parent / pathlib.Path(f'{output_path.stem}_{output_path_id}{output_path.suffix}')
                output_path_id += 1

            # record output path
            output_paths.append(str(output_path))

            # push file information into processing queue
            self.processing_queue.put((input_path.absolute(), output_path.absolute(), input_file_mime_type, input_file_type, input_file_subtype))

        # check argument sanity before running
        self._check_arguments()

        # record file count for external calls
        self.total_files = self.processing_queue.qsize()

        Avalon.info(_('Loaded files into processing queue'))
        # print all files in queue for debugging
        for job in self.processing_queue.queue:
            Avalon.debug_info(_('Input file: {}').format(job[0].absolute()))

        try:
            while not self.processing_queue.empty():

                # get new job from queue
                self.current_input_file, output_path, input_file_mime_type, input_file_type, input_file_subtype = self.processing_queue.get()

                # get current job starting time for GUI calculations
                self.current_processing_starting_time = time.time()

                # get video information JSON using FFprobe
                Avalon.info(_('Reading file information'))
                file_info = self.ffmpeg_object.probe_file_info(self.current_input_file)

                # create temporary directories for storing frames
                self.create_temp_directories()

                # start handling input
                # if input file is a static image
                if input_file_type == 'image' and input_file_subtype != 'gif':
                    Avalon.info(_('Starting upscaling image'))

                    # copy original file into the pre-processing directory
                    shutil.copy(self.current_input_file, self.extracted_frames / self.current_input_file.name)

                    width = int(file_info['streams'][0]['width'])
                    height = int(file_info['streams'][0]['height'])
                    framerate = self.total_frames = 1

                # elif input_file_mime_type == 'image/gif' or input_file_type == 'video':
                else:
                    Avalon.info(_('Starting upscaling video/GIF'))

                    # find index of video stream
                    video_stream_index = None
                    for stream in file_info['streams']:
                        if stream['codec_type'] == 'video':
                            video_stream_index = stream['index']
                            break

                    # exit if no video stream found
                    if video_stream_index is None:
                        Avalon.error(_('Aborting: No video stream found'))
                        raise StreamNotFoundError('no video stream found')

                    # get average frame rate of video stream
                    framerate = float(Fraction(file_info['streams'][video_stream_index]['r_frame_rate']))
                    width = int(file_info['streams'][video_stream_index]['width'])
                    height = int(file_info['streams'][video_stream_index]['height'])

                    # get total number of frames
                    Avalon.info(_('Getting total number of frames in the file'))

                    # if container stores total number of frames in nb_frames, fetch it directly
                    if 'nb_frames' in file_info['streams'][video_stream_index]:
                        self.total_frames = int(file_info['streams'][video_stream_index]['nb_frames'])

                    # otherwise call FFprobe to count the total number of frames
                    else:
                        self.total_frames = self.ffmpeg_object.get_number_of_frames(self.current_input_file, video_stream_index)

                # calculate scale width/height/ratio and scaling jobs if required
                Avalon.info(_('Calculating scaling parameters'))

                # create a local copy of the global output settings
                output_scale = self.scale_ratio
                output_width = self.scale_width
                output_height = self.scale_height

                # calculate output width and height if scale ratio is specified
                if output_scale is not None:
                    output_width = int(math.ceil(width * output_scale / 2.0) * 2)
                    output_height = int(math.ceil(height * output_scale / 2.0) * 2)

                else:
                    # scale keeping aspect ratio is only one of width/height is given
                    if output_width == 0 or output_width is None:
                        output_width = output_height / height * width

                    elif output_height == 0 or output_height is None:
                        output_height = output_width / width * height

                    output_width = int(math.ceil(output_width / 2.0) * 2)
                    output_height = int(math.ceil(output_height / 2.0) * 2)

                    # calculate required minimum scale ratio
                    output_scale = max(output_width / width, output_height / height)

                # if driver is one of the drivers that doesn't support arbitrary scaling ratio
                # TODO: more documentations on this block
                if self.driver in DRIVER_FIXED_SCALING_RATIOS:

                    # select the optimal driver scaling ratio to use
                    supported_scaling_ratios = sorted(DRIVER_FIXED_SCALING_RATIOS[self.driver])

                    remaining_scaling_ratio = math.ceil(output_scale)
                    self.scaling_jobs = []

                    # if the scaling ratio is 1.0
                    # apply the smallest scaling ratio available
                    if remaining_scaling_ratio == 1:
                        self.scaling_jobs.append(supported_scaling_ratios[0])
                    else:
                        while remaining_scaling_ratio > 1:
                            for ratio in supported_scaling_ratios:
                                if ratio >= remaining_scaling_ratio:
                                    self.scaling_jobs.append(ratio)
                                    remaining_scaling_ratio /= ratio
                                    break

                            else:
                                found = False
                                for i in supported_scaling_ratios:
                                    for j in supported_scaling_ratios:
                                        if i * j >= remaining_scaling_ratio:
                                            self.scaling_jobs.extend([i, j])
                                            remaining_scaling_ratio /= i * j
                                            found = True
                                            break
                                    if found is True:
                                        break

                                if found is False:
                                    self.scaling_jobs.append(supported_scaling_ratios[-1])
                                    remaining_scaling_ratio /= supported_scaling_ratios[-1]

                else:
                    self.scaling_jobs = [output_scale]

                # print file information
                Avalon.debug_info(_('Framerate: {}').format(framerate))
                Avalon.debug_info(_('Width: {}').format(width))
                Avalon.debug_info(_('Height: {}').format(height))
                Avalon.debug_info(_('Total number of frames: {}').format(self.total_frames))
                Avalon.debug_info(_('Output width: {}').format(output_width))
                Avalon.debug_info(_('Output height: {}').format(output_height))
                Avalon.debug_info(_('Required scale ratio: {}').format(output_scale))
                Avalon.debug_info(_('Upscaling jobs queue: {}').format(self.scaling_jobs))

                # extract frames from video
                if input_file_mime_type == 'image/gif' or input_file_type == 'video':
                    self.process_pool.append((self.ffmpeg_object.extract_frames(self.current_input_file, self.extracted_frames)))
                    self._wait()

                # if driver is waifu2x-caffe
                # pass pixel format output depth information
                if self.driver == 'waifu2x_caffe':
                    # get a dict of all pixel formats and corresponding bit depth
                    pixel_formats = self.ffmpeg_object.get_pixel_formats()

                    # try getting pixel format's corresponding bti depth
                    try:
                        self.driver_settings['output_depth'] = pixel_formats[self.ffmpeg_object.pixel_format]
                    except KeyError:
                        Avalon.error(_('Unsupported pixel format: {}').format(self.ffmpeg_object.pixel_format))
                        raise UnsupportedPixelError(f'unsupported pixel format {self.ffmpeg_object.pixel_format}')

                # upscale images one by one using waifu2x
                Avalon.info(_('Starting to upscale extracted frames'))
                upscale_begin_time = time.time()

                self.current_pass = 1
                if self.driver == 'waifu2x_caffe':
                    self.driver_object.set_scale_resolution(output_width, output_height)
                else:
                    self.driver_object.set_scale_ratio(self.scaling_jobs[0])
                self._upscale_frames(self.extracted_frames, self.upscaled_frames)
                for job in self.scaling_jobs[1:]:
                    self.current_pass += 1
                    self.driver_object.set_scale_ratio(job)
                    shutil.rmtree(self.extracted_frames)
                    shutil.move(self.upscaled_frames, self.extracted_frames)
                    self.upscaled_frames.mkdir(parents=True, exist_ok=True)
                    self._upscale_frames(self.extracted_frames, self.upscaled_frames)

                Avalon.info(_('Upscaling completed'))
                Avalon.info(_('Average processing speed: {} seconds per frame').format(self.total_frames / (time.time() - upscale_begin_time)))

                # downscale frames with Lanczos
                Avalon.info(_('Lanczos downscaling frames'))
                shutil.rmtree(self.extracted_frames)
                shutil.move(self.upscaled_frames, self.extracted_frames)
                self.upscaled_frames.mkdir(parents=True, exist_ok=True)

                for image in tqdm([i for i in self.extracted_frames.iterdir() if i.is_file() and i.name.endswith(self.extracted_frame_format)], ascii=True, desc=_('Downscaling')):
                    image_object = Image.open(image)

                    # if the image dimensions are not equal to the output size
                    # resize the image using Lanczos
                    if (image_object.width, image_object.height) != (output_width, output_height):
                        image_object.resize((output_width, output_height), Image.LANCZOS).save(self.upscaled_frames / image.name)
                        image_object.close()

                    # if the image's dimensions are already equal to the output size
                    # move image to the finished directory
                    else:
                        image_object.close()
                        shutil.move(image, self.upscaled_frames / image.name)

                # start handling output
                # output can be either GIF or video
                if input_file_type == 'image' and input_file_subtype != 'gif':

                    Avalon.info(_('Exporting image'))

                    # there should be only one image in the directory
                    shutil.move([f for f in self.upscaled_frames.iterdir() if f.is_file()][0], output_path)

                # elif input_file_mime_type == 'image/gif' or input_file_type == 'video':
                else:

                    # if the desired output is gif file
                    if output_path.suffix.lower() == '.gif':
                        Avalon.info(_('Converting extracted frames into GIF image'))
                        gifski_object = Gifski(self.gifski_settings)
                        self.process_pool.append(gifski_object.make_gif(self.upscaled_frames, output_path, framerate, self.extracted_frame_format, output_width, output_height))
                        self._wait()
                        Avalon.info(_('Conversion completed'))

                    # if the desired output is video
                    else:
                        # frames to video
                        Avalon.info(_('Converting extracted frames into video'))
                        self.process_pool.append(self.ffmpeg_object.assemble_video(framerate, self.upscaled_frames))
                        # f'{scale_width}x{scale_height}'
                        self._wait()
                        Avalon.info(_('Conversion completed'))

                        try:
                            # migrate audio tracks and subtitles
                            Avalon.info(_('Migrating audio, subtitles and other streams to upscaled video'))
                            self.process_pool.append(self.ffmpeg_object.migrate_streams(self.current_input_file,
                                                                                        output_path,
                                                                                        self.upscaled_frames))
                            self._wait()

                        # if failed to copy streams
                        # use file with only video stream
                        except subprocess.CalledProcessError:
                            traceback.print_exc()
                            Avalon.error(_('Failed to migrate streams'))
                            Avalon.warning(_('Trying to output video without additional streams'))

                            if input_file_mime_type == 'image/gif':
                                # copy will overwrite destination content if exists
                                shutil.copy(self.upscaled_frames / self.ffmpeg_object.intermediate_file_name, output_path)

                            else:
                                # construct output file path
                                output_file_name = f'{output_path.stem}{self.ffmpeg_object.intermediate_file_name.suffix}'
                                output_video_path = output_path.parent / output_file_name

                                # if output file already exists
                                # create temporary directory in output folder
                                # temporary directories generated by tempfile are guaranteed to be unique
                                # and won't conflict with other files
                                if output_video_path.exists():
                                    Avalon.error(_('Output video file exists'))

                                    temporary_directory = pathlib.Path(tempfile.mkdtemp(dir=output_path.parent))
                                    output_video_path = temporary_directory / output_file_name
                                    Avalon.info(_('Created temporary directory to contain file'))

                                # move file to new destination
                                Avalon.info(_('Writing intermediate file to: {}').format(output_video_path.absolute()))
                                shutil.move(self.upscaled_frames / self.ffmpeg_object.intermediate_file_name, output_video_path)

                # increment total number of files processed
                self.cleanup_temp_directories()
                self.processing_queue.task_done()
                self.total_processed += 1

        except (Exception, KeyboardInterrupt, SystemExit) as e:
            with contextlib.suppress(ValueError, AttributeError):
                self.cleanup_temp_directories()
                self.running = False
            raise e

        # signal upscaling completion
        self.running = False
Пример #51
0
def mp4togif( input_mp4_file, gif_file = None, duration = None, scale = 1.0 ):
    """
    This consists of voodoo FFmpeg_ magic that converts MP4_ to animated GIF_ reasonably well. Don't ask me how most of it works, just be on-your-knees-kissing-the-dirt grateful that MILLIONS of people hack onto and into FFmpeg_ so that this information is available, and the workflow works.
    
    This requires a working ``ffmpeg`` and ``ffprobe`` executable to work. If the input file is named ``<input>.mp4``, the output animated GIF file is named ``<input>.gif``.
    
    Here are resources that I used to get this working.
    
    * `Tutorial on high quality movie to animated GIF conversion <movie_2_gif_>`_. I hope this doesn't go away!
    
    * `Using FFPROBE to output JSON format <ffprobe_json_>`_.
    
    :param str input_mp4_file: the name of the valid MP4_ file.
    :param str gif_file: the (optional) name of the animated GIF_ file. If not provided, then creates a GIF file of some default name.
    :param float duration: duration, in seconds, of MP4_ file to use to make the animated GIF_. If ``None`` is provided, use the full movie. If provided, then must be :math:`\ge 1` seconds.
    :param float scale: scaling of input width and height of MP4_ file. Default is 1.0. Must be :math:`\ge 0`.
  
    .. seealso:: :py:meth:`make_square_mp4video <nprstuff.core.convert_image.make_square_mp4video>`.
    
    .. _GIF: https://en.wikipedia.org/wiki/GIF
    .. _movie_2_gif: http://blog.pkh.me/p/21-high-quality-gif-with-ffmpeg.html
    """
    from distutils.spawn import find_executable
    ffmpeg_exec = find_executable( 'ffmpeg' )
    ffprobe_exec = find_executable( 'ffprobe' )
    assert(all(map(lambda tok: tok is not None, ( ffmpeg_exec, ffprobe_exec ))))
    assert( os.path.basename( input_mp4_file ).endswith( '.mp4' ) )
    assert( os.path.isfile( input_mp4_file ) )
    if duration is not None: assert( duration >= 1.0 )
    assert( scale > 0.0 )
    #
    ## assert this is an MP4 file
    assert( 'ISO Media,' in magic.from_file( input_mp4_file ) )
    #
    ## GIF output and PALETTE file
    if gif_file is None: gif_file = input_mp4_file.replace('.mp4', '.gif' )
    else: assert( os.path.basename( gif_file ).endswith( '.gif' ) )
    palettefile = '%s.png' % str( uuid.uuid4( ) )
    #
    ## step #0: first scale the image if not X1
    newmp4file = input_mp4_file
    if scale != 1.0:
        newmp4file = '%s.mp4' % str( uuid.uuid4( ) )
        #
        ## m**********r
        ## thought experiment: you want to scale a (divisible-by-two) MP4 file by some multiplier
        ## the OUTPUT file itself must have width AND height divisible by two
        ## the corporate knowledge is embedded in 'scale=ceil(iw*%0.2f)*2:ceil(ih*%0.2f)*2' % ( scale * 0.5, scale * 0.5 )
        ## intent of that video filter: scale width and height by HALF of scale, round-up width + height, multiple by 2.
        ## by definition this will create a final (scaled) width and height that are divisible by two
        ## solution to impossib-error: https://stackoverflow.com/questions/20847674/ffmpeg-libx264-height-not-divisible-by-2
        ## m**********r
        cmd = [
            ffmpeg_exec, '-y', '-v', 'warning', '-i', input_mp4_file,
            '-vf', 'scale=ceil(iw*%0.2f)*2:ceil(ih*%0.2f)*2' % ( scale * 0.5, scale * 0.5 ),
            newmp4file ]
        logging.debug('COMMAND TO SCALE = %s.' % ' '.join( cmd ) )
        stdout_val = subprocess.check_output(
            cmd, stderr = subprocess.STDOUT )
        logging.debug( 'OUTPUT FFMPEG SCALE = %s.' % stdout_val )
    
    #
    ## get info JSON to get width, fps
    stdout_val = subprocess.check_output(
        [ ffprobe_exec, '-v', 'quiet', '-show_streams',
         '-show_format', '-print_format', 'json', newmp4file ],
        stderr = subprocess.STDOUT )
    mp4file_info = json.loads( stdout_val )
    logging.debug( 'mp4file_info = %s.' % mp4file_info )
    # from dictionary, get width
    width_of_mp4 = int( mp4file_info[ 'streams' ][ 0 ][ 'width' ] )
    fps_string = mp4file_info[ 'streams' ][ 0 ][ 'avg_frame_rate' ]
    fps = int( float( fps_string.split('/')[0] ) * 1.0 /
              float( fps_string.split('/')[1] ) )
    
    #
    ## now do the voodoo magic from resource #1
    ## step #1: create palette, run at fps
    args_mov_before = [ ]
    if duration is not None: args_mov_before = [ '-t', '%0.3f' % duration ]
    cmd = [
        ffmpeg_exec, '-y', '-v', 'warning', ] + args_mov_before + [
            '-i', newmp4file,
            '-vf', 'fps=%d,scale=%d:-1:flags=lanczos,palettegen' % ( fps, width_of_mp4 ),
            palettefile ]
    proc = subprocess.Popen(cmd, stdout = subprocess.PIPE, stderr = subprocess.STDOUT )
    stdout_val, stderr_val = proc.communicate( )
    assert( os.path.isfile( palettefile ) )
    #
    ## step #2: take palette file, MP4 file, create animated GIF
    cmd = [
        ffmpeg_exec, '-y', '-v', 'warning' ] + args_mov_before + [
            '-i', newmp4file,
            '-i', palettefile, '-lavfi', 'fps=%d,scale=%d:-1:flags=lanczos[x];[x][1:v]paletteuse' % (
            fps, width_of_mp4 ), gif_file ]
    proc = subprocess.Popen(cmd, stdout = subprocess.PIPE, stderr = subprocess.STDOUT )
    stdout_val, stderr_val = proc.communicate( )
    #
    ## now batting cleanup
    try:
        if newmp4file != input_mp4_file: os.remove( newmp4file )
        os.remove( palettefile )
    except Exception as e:
        print( 'REASON FAILURE WHY:', e )
        pass
Пример #52
0
    def mimetype_lookup(file_object, debug_mode=True):
        """
        Obtain a file's mimetype given an Agave response file object.

        When developing locally, (DEBUG==True) we can't assume that Corral is 
        mounted so w have to download the file to memory in order to pass its 
        bytecode to python-magic. In staging/prod where Corral is mounted, we
        build up the absolute path of the file and pass that to python-magic to
        get the mimetype.

        :param agave.py.agve.AttrDict file_object: Agave file object to look up.
        :param bool debug_mode: True if Debug mode is active; False otherwise.

        :return string mimeType: The mimetype to index with Elasticsearch.

        """
        if debug_mode == True:
            # In local dev, corral isn't mounted so we have to download the file to get its mimetype.
            import requests
            client = get_service_account_client()
            system = file_object['system']
            path = file_object['path']

            try:
                f = client.files.download(systemId=system, filePath=path)
                mimeType = magic.from_buffer(f.content, mime=True)
            except requests.HTTPError as e:
                if e.response.status_code == 501:
                    # This is a 'not implemented' error that should only get thrown for directories.
                    mimeType =  'text/directory'
                elif e.response.status_code == 404:
                    # The file cannot be retrieved.
                    raise requests.HTTPError
            return mimeType

        else:
            # In dev/prod, Corral is mounted and we can use the absolute path to get the mimetype.
            SYSTEM_ID_PATHS = [
                {'regex': r'^designsafe.storage.default$',
                'path': '/corral-repl/tacc/NHERI/shared'},
                {'regex': r'^designsafe.storage.community$',
                'path': '/corral-repl/tacc/NHERI/community'},
                {'regex': r'^designsafe.storage.published$',
                'path': '/corral-repl/tacc/NHERI/published'},
                {'regex': r'^project\-',
                'path': '/corral-repl/tacc/NHERI/projects'}
            ]
            for mapping in SYSTEM_ID_PATHS:
                if re.search(mapping['regex'], file_object['system']):
                    base_path = mapping['path']
                    if mapping['regex'] == r'^project\-':
                        base_path += '/' + file_object['system'][8:] 
                    break

            filePath = base_path + file_object['path']
            if os.path.isdir(filePath):
                mimeType = 'text/directory'
            else:
                mimeType = magic.from_file(filePath, mime=True)

            return mimeType
Пример #53
0
 def compat(self, filepath):
     return self.accepts_all or magic.from_file(filepath,
                                                mime=True) in self.accepts
Пример #54
0
def get_desc_for_file(filepath):
    filepath = abspath(filepath)
    return magic.from_file(filepath)
Пример #55
0
    def _produce_one_sample(self):
        dirname = os.path.dirname(self.path)
        if not check_dir(dirname):
            raise ValueError("Invalid data path.")
        with open(self.path, 'r') as fid:
            flist = [l.strip() for l in fid]

        if self.shuffle:
            random.shuffle(flist)

        input_files = [os.path.join(dirname, 'input', f) for f in flist]
        output_files = [os.path.join(dirname, 'output', f) for f in flist]

        self.nsamples = len(input_files)

        input_queue, output_queue = tf.train.slice_input_producer(
            [input_files, output_files],
            shuffle=self.shuffle,
            seed=123,
            num_epochs=self.num_epochs)

        if '16-bit' in magic.from_file(input_files[0]):
            input_dtype = tf.uint16
            input_wl = 65535.0
        else:
            input_wl = 255.0
            input_dtype = tf.uint8
        if '16-bit' in magic.from_file(output_files[0]):
            output_dtype = tf.uint16
            output_wl = 65535.0
        else:
            output_wl = 255.0
            output_dtype = tf.uint8

        input_file = tf.read_file(input_queue)
        output_file = tf.read_file(output_queue)

        if os.path.splitext(input_files[0])[-1] == '.jpg':
            im_input = tf.image.decode_jpeg(input_file, channels=3)
        else:
            im_input = tf.image.decode_png(input_file,
                                           dtype=input_dtype,
                                           channels=3)

        if os.path.splitext(output_files[0])[-1] == '.jpg':
            im_output = tf.image.decode_jpeg(output_file, channels=3)
        else:
            im_output = tf.image.decode_png(output_file,
                                            dtype=output_dtype,
                                            channels=3)

        # normalize input/output
        sample = {}
        with tf.name_scope('normalize_images'):
            im_input = tf.to_float(im_input) / input_wl
            im_output = tf.to_float(im_output) / output_wl

        inout = tf.concat([im_input, im_output], 2)
        fullres, inout = self._augment_data(inout, 6)

        sample['lowres_input'] = inout[:, :, :3]
        sample['lowres_output'] = inout[:, :, 3:]
        sample['image_input'] = fullres[:, :, :3]
        sample['image_output'] = fullres[:, :, 3:]
        return sample
Пример #56
0
def make_aspected_mp4video( input_mp4_file, output_mp4_file, aspect = 'square', background = 'white' ):
    """
    More FFmpeg_ voodoo, this time to create a square (or 9/16 aspect or 16/9 aspect) MP4_ file for upload into Instagram_.

    This requires a working ``ffmpeg`` and ``ffprobe`` executable to work. The input file must be MP4_.

    Here are resources that I used to get this working.

    * `Padding movie file with FFmpeg <padding_movie_>`_.

    * `Using FFPROBE to output JSON format <ffprobe_json_>`_.

    :param str input_mp4_file: the name of the valid input MP4_ file.
    :param str output_mp4_file: the name of the valid output MP4_ file.
    :param str aspect: the aspect ratio to choose. Must be one of "square", "916" is 9/16 (width 9 units, height 16 units), and "169" is 16/9 (width 16 units, height 9 units). Default is "square".
    :param str background: the background color to use for padding. Must be either "white" or "black". Default is "white".

    .. seealso:: :py:meth:`get_gif_video <nprstuff.core.convert_image.get_gif_video>`.

    .. _FFmpeg: https://ffmpeg.org
    .. _MP4: https://en.wikipedia.org/wiki/MPEG-4_Part_14
    .. _MKV: https://en.wikipedia.org/wiki/Matroska
    .. _Instagram: https://www.instagram.com
    .. _padding_movie: https://superuser.com/questions/1212106/add-border-to-video-ffmpeg
    .. _ffprobe_json: https://tanimislam.github.io/blog/ffprobe-to-get-output-in-json-format.html
    """
    from distutils.spawn import find_executable
    import shutil
    ffmpeg_exec = find_executable( 'ffmpeg' )
    ffprobe_exec = find_executable( 'ffprobe' )
    assert(all(map(lambda tok: tok is not None, ( ffmpeg_exec, ffprobe_exec ))))
    assert( os.path.basename( input_mp4_file ).endswith( '.mp4' ) )
    assert( os.path.isfile( input_mp4_file ) )
    assert( aspect in ('square', '916', '169') )
    assert( background in ('black', 'white') )
    #
    ## first dictionary of multiplication of width to height
    aspect_dict = { 'square' : 1, '916' : 9.0 / 16, '169' : 16.0 / 9 }
    #
    ## assert this is an MP4 file, and output ends in .mp4
    assert( 'ISO Media,' in magic.from_file( input_mp4_file ) )
    assert( os.path.basename( output_mp4_file ).endswith( '.mp4' ) )
    ## get info JSON to get width, fps
    stdout_val = subprocess.check_output(
        [ ffprobe_exec, '-v', 'quiet', '-show_streams',
         '-show_format', '-print_format', 'json', input_mp4_file ],
        stderr = subprocess.STDOUT )
    mp4file_info = json.loads( stdout_val )
    # from dictionary, get width and height
    width_of_mp4 = int( mp4file_info[ 'streams' ][ 0 ][ 'width' ] )
    height_of_mp4 = int( mp4file_info[ 'streams' ][ 0 ][ 'height' ] )
    asp = aspect_dict[ aspect ]
    #
    ## if input video already correctly aspected, copy to output mp4 file
    if int( width_of_mp4 ) == int( asp * height_of_mp4 ):
        shutil.copyfile( input_mp4_file, output_mp4_file )
        return
    #
    ## case #1: asp * height_of_mp4 > width_of_mp4, pad width
    elif asp * height_of_mp4 > width_of_mp4:
        filter_string = 'pad=w=%d:h=%d:x=%d:y=0:color=%s' % (
            width_of_mp4 + int( asp * height_of_mp4 - width_of_mp4 ),
            height_of_mp4, ( asp * height_of_mp4 - width_of_mp4 ) // 2, background )
    #
    ## case #2: asp * height_of_mp4 < width_of_mp4, pad height
    else:
        filter_string = 'pad=w=%d:h=%d:x=0:y=%d:color=%s' % (
            width_of_mp4, height_of_mp4 + int( width_of_mp4 / asp - height_of_mp4 ),
            ( width_of_mp4 / asp - height_of_mp4 ) // 2, background )
    #
    ## now voodoo magic do do
    exec_cmd = [
        ffmpeg_exec, '-y', '-v', 'warning', '-i', input_mp4_file,
        '-vf', filter_string, output_mp4_file ]
    logging.info( 'CMD: %s' % ' '.join( exec_cmd ) )
    stdout_val = subprocess.check_output(
        exec_cmd, stderr = subprocess.STDOUT )
Пример #57
0
def file_mime(path, mime=True):
    try:
        return magic.from_file(path, mime=mime).decode('utf-8')
    except OSError:
        return None
def file_mime_type(file):
    return (magic.from_file(file, mime=True))
Пример #59
0
 def inspect(self, sample):
     sample.info[self.NAME] = {
         "magic": magic.from_file(sample.path),
         "mime": magic.from_file(sample.path, mime=True)
     }
Пример #60
0
    def getContent(self):
        if self._caller != "databrowse":
            return None
        else:
            if self._content_mode == "full":
                try:
                    st = os.stat(self._fullpath)
                except IOError:
                    return "Failed To Get File Information: %s" % (
                        self._fullpath)
                else:
                    file_size = st[ST_SIZE]
                    file_mtime = time.asctime(time.localtime(st[ST_MTIME]))
                    file_ctime = time.asctime(time.localtime(st[ST_CTIME]))
                    file_atime = time.asctime(time.localtime(st[ST_ATIME]))
                    if platform.system() is "Windows":
                        contenttype = magic.from_file(self._fullpath,
                                                      mime=True)
                    else:
                        magicstore = magic.open(magic.MAGIC_MIME)
                        magicstore.load()
                        contenttype = magicstore.file(self._fullpath)
                    extension = os.path.splitext(self._fullpath)[1][1:]
                    icon = self._handler_support.GetIcon(
                        contenttype, extension)

                    downlink = self.getURL(self._relpath,
                                           content_mode="raw",
                                           download="true")

                    xmlroot = etree.Element('{%s}dbhdf' % self._namespace_uri,
                                            nsmap=self.nsmap,
                                            name=os.path.basename(
                                                self._relpath),
                                            resurl=self._web_support.resurl,
                                            downlink=downlink,
                                            icon=icon)

                    xmlchild = etree.SubElement(xmlroot,
                                                "filename",
                                                nsmap=self.nsmap)
                    xmlchild.text = os.path.basename(self._fullpath)

                    xmlchild = etree.SubElement(xmlroot,
                                                "path",
                                                nsmap=self.nsmap)
                    xmlchild.text = os.path.dirname(self._fullpath)

                    xmlchild = etree.SubElement(xmlroot,
                                                "size",
                                                nsmap=self.nsmap)
                    xmlchild.text = self.ConvertUserFriendlySize(file_size)

                    xmlchild = etree.SubElement(xmlroot,
                                                "mtime",
                                                nsmap=self.nsmap)
                    xmlchild.text = file_mtime

                    xmlchild = etree.SubElement(xmlroot,
                                                "ctime",
                                                nsmap=self.nsmap)
                    xmlchild.text = file_ctime

                    xmlchild = etree.SubElement(xmlroot,
                                                "atime",
                                                nsmap=self.nsmap)
                    xmlchild.text = file_atime

                    # Content Type
                    xmlchild = etree.SubElement(xmlroot,
                                                "contenttype",
                                                nsmap=self.nsmap)
                    xmlchild.text = contenttype

                    # File Permissions
                    xmlchild = etree.SubElement(xmlroot,
                                                "permissions",
                                                nsmap=self.nsmap)
                    xmlchild.text = self.ConvertUserFriendlyPermissions(
                        st[ST_MODE])

                    # User and Group
                    if platform.system() == "Linux":
                        try:
                            username = pwd.getpwuid(st[ST_UID])[0]
                        except KeyError:
                            username = ""
                        groupname = grp.getgrgid(st[ST_GID])[0]
                        xmlchild = etree.SubElement(xmlroot,
                                                    "owner",
                                                    nsmap=self.nsmap)
                        xmlchild.text = "%s:%s" % (username, groupname)

                    # Contents of File
                    f = open(self._fullpath)
                    xmlchild = etree.SubElement(xmlroot,
                                                "contents",
                                                nsmap=self.nsmap)
                    output, error = subprocess.Popen(
                        ['/usr/bin/h5dump', '-x', '-H', self._fullpath],
                        stdout=subprocess.PIPE).communicate()
                    output = output.replace(
                        'xmlns:hdf5="http://hdfgroup.org/HDF5/XML/schema/HDF5-File.xsd"',
                        'xmlns:hdf5="http://hdfgroup.org/DTDs/HDF5-File"')
                    xmlchild.append(etree.XML(output))
                    #xmlchild.text = f.read()

                    return xmlroot
            elif self._content_mode == "raw" and self._web_support.req.form[
                    'getimage'].value == "true" and 'hdfloc' in self._web_support.req.form:
                hdfpath = self._web_support.req.form['hdfloc'].value
                tagname = base64.urlsafe_b64encode(hdfpath)
                ext = 'png'
                if self.CacheFileExists(tagname, extension=ext):
                    size = os.path.getsize(
                        self.getCacheFileName(tagname, extension=ext))
                    f = self.getCacheFileHandler('rb', tagname, extension=ext)
                    self._web_support.req.response_headers[
                        'Content-Type'] = 'image/png'
                    self._web_support.req.response_headers[
                        'Content-Length'] = str(size)
                    self._web_support.req.start_response(
                        self._web_support.req.status,
                        self._web_support.req.response_headers.items())
                    self._web_support.req.output_done = True
                    if 'wsgi.file_wrapper' in self._web_support.req.environ:
                        return self._web_support.req.environ[
                            'wsgi.file_wrapper'](f, 1024)
                    else:
                        return iter(lambda: f.read(1024), '')
                else:
                    print(self._fullpath)
                    f = h5py.File(self._fullpath, 'r')
                    data = f.get(self._web_support.req.form['hdfloc'].value)
                    if len(data.value.shape) == 1:
                        pylab.figure()
                        pylab.plot(data.value)
                        imgf = self.getCacheFileHandler('w', tagname, 'png')
                        pylab.savefig(imgf)
                        imgf.close()
                        pylab.clf()
                    elif len(data.value.shape) == 2:
                        pylab.figure()
                        pylab.imshow(data.value, origin='lower')
                        imgf = self.getCacheFileHandler('w', tagname, 'png')
                        pylab.savefig(imgf)
                        imgf.close()
                        pylab.clf()
                    f.close()
                    size = os.path.getsize(
                        self.getCacheFileName(tagname, extension=ext))
                    f = self.getCacheFileHandler('rb', tagname, extension=ext)
                    self._web_support.req.response_headers[
                        'Content-Type'] = 'image/png'
                    self._web_support.req.response_headers[
                        'Content-Length'] = str(size)
                    self._web_support.req.start_response(
                        self._web_support.req.status,
                        self._web_support.req.response_headers.items())
                    self._web_support.req.output_done = True
                    if 'wsgi.file_wrapper' in self._web_support.req.environ:
                        return self._web_support.req.environ[
                            'wsgi.file_wrapper'](f, 1024)
                    else:
                        return iter(lambda: f.read(1024), '')
            elif self._content_mode == "raw":
                size = os.path.getsize(self._fullpath)
                if platform.system() is "Windows":
                    contenttype = magic.from_file(self._fullpath, mime=True)
                else:
                    magicstore = magic.open(magic.MAGIC_MIME)
                    magicstore.load()
                    contenttype = magicstore.file(self._fullpath)
                f = open(self._fullpath, "rb")
                self._web_support.req.response_headers[
                    'Content-Type'] = contenttype
                self._web_support.req.response_headers['Content-Length'] = str(
                    size)
                self._web_support.req.response_headers[
                    'Content-Disposition'] = "attachment; filename=" + os.path.basename(
                        self._fullpath)
                self._web_support.req.start_response(
                    self._web_support.req.status,
                    self._web_support.req.response_headers.items())
                self._web_support.req.output_done = True
                if 'wsgi.file_wrapper' in self._web_support.req.environ:
                    return self._web_support.req.environ['wsgi.file_wrapper'](
                        f, 1024)
                else:
                    return iter(lambda: f.read(1024), '')
            else:
                raise self.RendererException("Invalid Content Mode")
            pass