def get_media_info(self, path: str) -> MediaInfo: # Get file attributes ctime = os.path.getctime(path) mtime = os.path.getmtime(path) size = os.path.getsize(path) # Try read from cache first c = self._conn.cursor() c.execute( 'SELECT mediainfo FROM files WHERE path=? AND size=? AND modified=? AND created=?', (path, size, mtime, ctime)) row = c.fetchone() if row is None: logging.debug('Parsing MediaInfo from %s', path) # Parse file to get media info media_info_xml = MediaInfo.parse(path, output='OLDXML') media_info = MediaInfo(media_info_xml) # Update cache c.execute( 'REPLACE INTO files (path, size, modified, created, mediainfo) VALUES (?, ?, ?, ?, ?)', (path, size, mtime, ctime, media_info_xml)) self._conn.commit() return media_info else: return MediaInfo(row[0])
def test_track_other_attributes(self): mi = MediaInfo(self.xml_dom) for track in mi.tracks: if track.track_type == 'General': self.assertEqual(5, len(track.other_file_size)) self.assertEqual(4, len(track.other_duration)) break
def test_valid_video_track(self): mi = MediaInfo(self.xml_dom) for track in mi.tracks: if track.track_type == 'Video': self.assertEqual('DV', track.codec) self.assertEqual('Interlaced', track.scan_type) break
def test_track_other_attributes(self): xml = minidom.parseString(self.xml_data) mi = MediaInfo(xml) for track in mi.tracks: if track.track_type == 'General': self.assertEqual(5, len(track.other_file_size)) self.assertEqual(4, len(track.other_duration)) break
def test_valid_video_track(self): xml = minidom.parseString(self.xml_data) mi = MediaInfo(xml) for track in mi.tracks: if track.track_type == 'Video': self.assertEqual('DV', track.codec) self.assertEqual('Interlaced', track.scan_type) break
def test_track_integer_attributes(self): mi = MediaInfo(self.xml_data) for track in mi.tracks: if track.track_type == 'Audio': self.assertTrue(isinstance(track.duration, int)) self.assertTrue(isinstance(track.bit_rate, int)) self.assertTrue(isinstance(track.sampling_rate, int)) break
def _execute(self, filename): output_type = 'OLDXML' if self.version >= (17, 10) else 'XML' return MediaInfo( ensure_text( check_output([ self.location, '--Output=' + output_type, '--Full', filename ])))
def test_track_existing_other_attributes(self): with open(os.path.join(data_dir, "issue100.xml")) as f: media_info = MediaInfo(f.read()) general_tracks = [ track for track in media_info.tracks if track.track_type == "General" ] general_track = general_tracks[0] self.assertEqual(general_track.other_format_list, "RTP / RTP")
def test_mkv_stream_duration(self): """ MKV duration is stored as float and this is a problem for TS constuctor.""" original = meta.from_media_info(self.media_info) s = SAMPLE s = s.replace('<Duration>6742</Duration>', '<Duration>6742.000000</Duration>') s = s.replace('<Duration>6740</Duration>', '<Duration>6740.000000</Duration>') streams = meta.from_media_info(MediaInfo(s)) self.assertEqual(len(original), len(streams)) for s, o in zip(streams, original): self.assertEqual(s.duration, o.duration)
def setup_mediainfo(executor, monkeypatch, options): options['provider'] = 'mediainfo' api.available_providers.clear() get_executor = Mock() get_executor.return_value = executor monkeypatch.setattr(MediaInfoExecutor, 'get_executor_instance', get_executor) data = {} extract_info = executor.extract_info monkeypatch.setattr(executor, 'extract_info', lambda filename: MediaInfo(data[filename]) if filename in data else extract_info(filename)) return data
def media_length(media_path): """ Uses Media Info to obtain the media length :param media_path: The file path to be checked.. """ if MediaInfo.can_parse(): media_data = MediaInfo.parse(media_path) else: xml = check_output([ 'mediainfo', '-f', '--Output=XML', '--Inform=OLDXML', media_path ]) if not xml.startswith(b'<?xml'): xml = check_output( ['mediainfo', '-f', '--Output=XML', media_path]) media_data = MediaInfo(xml.decode("utf-8")) # duration returns in milli seconds return media_data.tracks[0].duration
def _parse(self, filename): lib = self.native_lib # Create a MediaInfo handle handle = lib.MediaInfo_New() lib.MediaInfo_Option(handle, 'CharSet', 'UTF-8') # Fix for https://github.com/sbraz/pymediainfo/issues/22 # Python 2 does not change LC_CTYPE # at startup: https://bugs.python.org/issue6203 if sys.version_info < ( 3, ) and os.name == 'posix' and locale.getlocale() == (None, None): locale.setlocale(locale.LC_CTYPE, locale.getdefaultlocale()) lib.MediaInfo_Option(None, 'Inform', 'XML') lib.MediaInfo_Option(None, 'Complete', '1') lib.MediaInfo_Open(handle, filename) xml = lib.MediaInfo_Inform(handle, 0) # Delete the handle lib.MediaInfo_Close(handle) lib.MediaInfo_Delete(handle) return MediaInfo(xml)
def get_media_info(file): xml_io = StringIO() sh.mediainfo( '--Output=XML', '-f', file, _out=xml_io ) info = MediaInfo(xml_io.getvalue()) tracks = [] for track in info.tracks: track_type = track.track_type.lower() if track_type == 'general': general_info = track else: tracks.append(track) try: track_ids = [int(track.track_id) for track in tracks] min_id = min(track_ids) track_ids = [int(track.track_id) - min_id for track in tracks] except: track_ids = [] if set(track_ids) != set(range(0, len(tracks))): track_ids = range(0, len(tracks)) for index, track in enumerate(tracks): track.track_id = track_ids[index] folder_name, file_name = os.path.split(file) filename, ext = os.path.splitext(file_name) general_info.folder_name = folder_name general_info.file_name = filename return general_info, tracks
def setUp(self): with open(os.path.join(data_dir, 'sample.xml'), 'r') as f: self.xml_data = f.read() self.mi = MediaInfo(self.xml_data)
def test_parse_invalid_xml(self): mi = MediaInfo(MediaInfo.parse_xml_data_into_dom(self.xml_data)) self.assertEqual(len(mi.tracks), 0)
def extract_info(self, filename): """Extract media info.""" xml = self._execute(filename) return MediaInfo(xml)
def test_load_mediainfo_from_string(self): mi = MediaInfo(self.xml_data) self.assertEqual(4, len(mi.tracks))
def test_getting_attribute_that_doesnot_exist(self): mi = MediaInfo(self.xml_data) self.assertTrue(mi.tracks[0].does_not_exist is None)
def setUp(self) -> None: self.media_info = MediaInfo(SAMPLE)
def __init__(self, xml_string): self.mediainfo = MediaInfo(xml_string) self.metadata = self.mediainfo.to_data()
def selectFiles(video_files, source, files, checkBitRate=False, v_br=0.0, a_br=0.0): total_duration = 0. for f in video_files: if not f.startswith(source): continue cmd = 'mediainfo -f --Output=OLDXML \"%s\"' % f p = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE) xml_mediaInfo, err = p.communicate() mediaInfo = MediaInfo(xml_mediaInfo) general = getTrack(mediaInfo, 'General') total_br = 0. # leave already transcoded files untouched if general: #print [attr for attr in dir(general) if not attr.startswith('__')] if general.comment: if 'ffmpeg: ' in general.comment: print 'File', f, 'already transcoded. Skipping...' continue if general.overall_bit_rate: total_br = float( general.overall_bit_rate ) else: print 'Missing total bit rate info for', f, 'Skipping...' os.system('mediainfo \"%s\"' % f) continue else: print 'Problem with getting general info for', f, 'Skipping...' os.system('mediainfo \"%s\"' % f) continue audio_br = 0. channels = 0. audio = getTrack(mediaInfo, 'Audio') if audio: #print [attr for attr in dir(audio) if not attr.startswith('__')] if audio.bit_rate: audio_br = float( audio.bit_rate ) else: print 'Missing audio bit rate info for', f, 'Skipping...' os.system('mediainfo \"%s\"' % f) continue if audio.channel_s: channels = float( audio.channel_s ) else: print 'Missing audio channels info for', f, 'Skipping...' os.system('mediainfo \"%s\"' % f) continue else: print 'Problem with finding audio stream for', f, 'Will be transcoded without an audio stream' os.system('mediainfo \"%s\"' % f) video_br = 0. video = getTrack(mediaInfo, 'Video') if video: #print [attr for attr in dir(video) if not attr.startswith('__')] if video.bit_rate: video_br = float( video.bit_rate ) else: print 'Missing video bit rate info for', f else: print 'Problem with finding video stream for', f, 'Skipping...' os.system('mediainfo \"%s\"' % f) continue # leave .mp4 and .mkv files that already meet the bit rate requirements untouched extensions = ('.mp4', '.mkv') if checkBitRate and f.lower().endswith(extensions) and video.bit_rate and video_br < v_br and ( (audio and audio_br < (channels * a_br)/2.) or not audio ): print 'File', f, 'already meets the bit rate requirements. Skipping...' print ' Video bit rate:', video_br/1e6, 'Mbps' print ' Audio channels:', int(channels) print ' Audio bit rate:', audio_br/1e3, 'kbps' continue # detect UHD videos if video.width: if int( video.width ) > 1920: print 'File', f, 'has width greater than 1920 pixels:', video.width else: print 'Problem with getting video width info for', f os.system('mediainfo \"%s\"' % f) # duration stored in ms, converting to s if general.duration: total_duration += float( general.duration ) / 1e3 else: print 'Problem with getting duration info for', f, 'Skipping...' os.system('mediainfo \"%s\"' % f) continue files.append([f, os.path.getsize(f), xml_mediaInfo]) return total_duration
def test_populate_tracks(self): xml = minidom.parseString(self.xml_data) mi = MediaInfo(xml) self.assertEqual(4, len(mi.tracks))
def main(): # usage description usage = "Usage: python %prog [options] \nExample: python %prog -s /home/ferencek/Pictures/ -d test_transcode" # input parameters parser = OptionParser(usage=usage) parser.add_option("-s", "--source", dest="source", help="Source folder (This parameter is mandatory)", metavar="SOURCE") parser.add_option("-d", "--destination", dest="destination", help="Destination folder (This parameter is mandatory)", metavar="DESTINATION") parser.add_option("-t", "--transcode", dest="transcode", action='store_true', help="Transcode selected files", default=False) parser.add_option("-r", "--rescan", dest="rescan", action='store_true', help="Force rescan of the source folder", default=False) parser.add_option("-n", "--dry_run", dest="dry_run", action="store_true", help="Perform a transcoding dry run", default=False) parser.add_option("--deint", dest="deint", action='store_true', help="Enable deinterlacing", default=False) parser.add_option("--size", dest="size", action='store_true', help="Check transcoded file size", default=False) (options, args) = parser.parse_args() # make sure all necessary input parameters are provided if not (options.source and options.destination): print 'Mandatory parameters missing' print '' parser.print_help() sys.exit(1) # define audio encoder audio_enc = 'aac' cmd = 'ffmpeg -encoders' p = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() if 'libfdk_aac' in out: audio_enc = 'libfdk_aac' #print audio_enc source = options.source # make sure the source path is defined as an absolute path if not source.startswith('/'): source = os.path.join( os.path.abspath('.'), source ) # make sure the source path end with '/' source = source.rstrip('/') + '/' # bit rate thresholds (in bps) # video bit rate v_br = 4.1e6 # audio bit rate a_br = 140e3 extensions = ('.mp4', '.m4v', '.mov', '.3gp', '.3g2', '.mpg', '.mpeg', '.mj2', '.wmv', '.avi', '.webm', '.mkv') extensions_ts = ('.ts', '.mts') #extensions += extensions_ts video_files = [] selected_files = [] selected_files_list = [] total_duration = 0. shouldRebuild = options.rescan if os.path.exists('video_files_all.pkl') and not shouldRebuild: with open('video_files_all.pkl', 'rb') as fpkl: (path, video_files) = pickle.load(fpkl) if not path.rstrip('/') in source: print 'Source folder', source, 'different from the cached folder', path print 'Source folder will be rescanned...' print '' shouldRebuild = True del video_files [:] else: print 'Pickled list of all video files loaded' if not os.path.exists('video_files_all.pkl') or shouldRebuild: print 'Building pickled list of all video files...' with open('video_files_all.pkl', 'wb') as fpkl: collectFiles(source, extensions, video_files) dump = (source, video_files) pickle.dump(dump, fpkl) print 'Pickled list of all video files built' file_list_all = open('video_files_all.txt','w') for v in video_files: file_list_all.write(v+'\n') file_list_all.close() print '\nFound', len(video_files), 'video files in', source, '\n' if os.path.exists('video_files_selected.pkl') and not shouldRebuild: with open('video_files_selected.pkl', 'rb') as fpkl: (path, total_duration, selected_files) = pickle.load(fpkl) if not path.rstrip('/') in source: shouldRebuild = True del selected_files [:] del selected_files_list [:] else: print 'Pickled list of selected video files loaded' if os.path.exists('video_files_selected.txt') and not shouldRebuild: with open('video_files_selected.txt', 'rb') as ftxt: selected_files_list = ftxt.read().splitlines() #print selected_files_list if not os.path.exists('video_files_selected.pkl') or shouldRebuild: print 'Building pickled list of selected video files...' with open('video_files_selected.pkl', 'wb') as fpkl: total_duration = selectFiles(video_files, source, selected_files, checkBitRate=True, v_br=v_br, a_br=a_br) dump = (source, total_duration, selected_files) pickle.dump(dump, fpkl) print 'Pickled list of selected video files built' for v in selected_files: selected_files_list.append(v[0]) file_list_selected = open('video_files_selected.txt','w') file_list_skipped = open('video_files_skipped.txt','w') for v in video_files: if v in selected_files_list: file_list_selected.write(v+'\n') else: file_list_skipped.write(v+'\n') file_list_selected.close() file_list_skipped.close() print '\nSelected', len(selected_files), 'video files in', source, 'with a total duration of', str(datetime.timedelta(seconds=total_duration)), '\n' if options.transcode: source_prefix = source destination = options.destination totalSizeBefore = 0 totalSizeAfter = 0 file_list_processed = open('video_files_processed.txt','w') file_list_failed = open('video_files_failed.txt','w') for counter, f in enumerate(selected_files, 1): if not f[0] in selected_files_list: continue filename = os.path.basename(f[0]) bv = '4M' ba = '128k' mediaInfo = MediaInfo(f[2]) general = getTrack(mediaInfo, 'General') audio = getTrack(mediaInfo, 'Audio') video = getTrack(mediaInfo, 'Video') total_br = float( general.overall_bit_rate ) audio_br = ( float( audio.bit_rate ) if (audio and audio.bit_rate) else 0. ) channels = ( float( audio.channel_s ) if (audio and audio.channel_s) else 0. ) # if mono, reduce the audio bit rate if int( channels ) == 1: ba = '64k' # make sure the destination path is defined as an absolute path if not destination.startswith('/'): destination = os.path.join( os.path.abspath('.'), destination ) dest_folder = os.path.join( destination, os.path.dirname(f[0])[len(source_prefix):] ) #print dest_folder if not os.path.exists(dest_folder) and not options.dry_run: os.system('mkdir -p \"%s\"' % dest_folder) print '===============================================' os.system('echo `date`') print 'Processing file', counter print f[0] print '' video_br = 0. # check video bit rate info if video.bit_rate: video_br = float( video.bit_rate ) else: print 'Missing video bit rate info, setting it to (total - audio)' print ' Total bit rate:', total_br/1e6, 'Mbps' print ' Video bit rate: N/A' print ' Audio bit rate:', audio_br/1e3, 'kbps' print ' Audio channels:', int(channels) video_br = total_br - audio_br print ' Fixed video bit rate:', video_br/1e6, 'Mbps' # check for corrupt total bit rate corrupt_total_br = False if video_br > 1.1 * total_br: corrupt_total_br = True print 'Total bit rate info is potentially corrupt. Assuming video and audio bit rates to be correct...' print ' Total bit rate:', total_br/1e6, 'Mbps' print ' Video bit rate:', video_br/1e6, 'Mbps' print ' Audio bit rate:', audio_br/1e3, 'kbps' print ' Audio channels:', int(channels) # figure out transcoding and repacking status copy_video = False if video_br < v_br: print 'File already meets the video bit rate and codec requirements. Video stream will be repacked...' print ' Video bit rate:', video_br/1e6, 'Mbps' copy_video = True unsupported_audio_codecs = ['raw', 'samr'] audio_codec = '' if audio: if audio.codec_id: audio_codec = audio.codec_id.strip() elif audio.id: audio_codec = audio.id.strip() copy_audio = False if audio and audio_br < (channels * a_br)/2. and audio_codec not in unsupported_audio_codecs: print 'File already meets the audio bit rate and codec requirements. Audio stream will be repacked...' print ' Audio channels:', int(channels) print ' Audio bit rate:', audio_br/1e3, 'kbps' copy_audio = True # comment comment = 'ffmpeg: video and audio transcode' if audio: if copy_video and copy_audio: comment = 'ffmpeg: video and audio repack' elif (copy_video and not copy_audio) or (not copy_video and copy_audio): comment = 'ffmpeg: video ' + ('repack' if copy_video else 'transcode') + ', audio ' + ('repack' if copy_audio else 'transcode') else: comment = 'ffmpeg: video ' + ('repack' if copy_video else 'transcode') + ', no audio' # video encoding options video_filt = '' if options.deint: video_filt = '-vf "yadif=0:-1:0" ' # workaround for files with JPEG-based codecs (by default pix_fmt=yuvj422p is used which results in strange artifacts in an x265-encoded video stream) pix_fmt = '' jpeg_codecs = ['jpeg', 'mjpg'] if not copy_video: video_codec = (video.codec_id.lower() if video.codec_id else '') chroma_subsampling = (video.chroma_subsampling.strip() if video.chroma_subsampling else '') if video_codec in jpeg_codecs and chroma_subsampling == '4:2:2': pix_fmt = ' -pix_fmt yuv422p' video_options_1st_pass = '******' % (video_filt, bv, pix_fmt) video_options = '%s-c:v libx265 -b:v %s -x265-params pass=2%s' % (video_filt, bv, pix_fmt) if copy_video: video_options = '-c:v copy' # audio encoding options if audio: audio_options = '-c:a %s -b:a %s' % (audio_enc, ba) if copy_audio: audio_options = '-c:a copy' else: audio_options = '-an' fmt = 'mp4' extensions = ('.mp4', '.m4v', '.mov', '.3gp', '.3g2') if ( not copy_video and not copy_audio and not filename.lower().endswith(extensions_ts) ) or filename.lower().endswith(extensions): filename = os.path.splitext(filename)[0] + '.mp4' else: filename = os.path.splitext(filename)[0] + '.mkv' fmt = 'matroska' dest_path = os.path.join(dest_folder, filename) if not copy_video: cmd = 'ffmpeg -i \"%s\" -vsync 0 %s -an -f %s -y /dev/null' % (f[0], video_options_1st_pass, fmt) print '' print cmd print '' if not options.dry_run: r = os.system(cmd) if r: print 'ffmpeg 1st pass failed! Skipping...' file_list_failed.write(f[0] + '\n') continue cmd = 'ffmpeg -i \"%s\" -vsync 0 %s %s -map_metadata 0 -metadata comment="%s" -y \"%s\"' % (f[0], video_options, audio_options, comment, dest_path) print '' print cmd print '' if not options.dry_run: r = os.system(cmd) if r: if f[0].lower().endswith('.mpg'): print 'ffmpeg failed! Attempting recovery...' cmd = cmd.replace('ffmpeg -i', 'ffmpeg -fflags +genpts -i') print '' print cmd print '' r = os.system(cmd) if r: print 'ffmpeg failed again! Skipping...' file_list_failed.write(f[0] + '\n') continue else: if not copy_video: print 'ffmpeg 2nd pass failed! Skipping...' else: print 'ffmpeg failed! Skipping...' file_list_failed.write(f[0] + '\n') continue if not options.dry_run: cmd = 'touch -r \"%s\" \"%s\"' % ( f[0], dest_path ) print '' print cmd print '' os.system(cmd) file_list_processed.write(f[0] + ' : ' + dest_path + '\n') totalSizeBefore += f[1] if not options.dry_run or options.size: totalSizeAfter += os.path.getsize( dest_path ) file_list_processed.close() file_list_failed.close() print '===============================================' os.system('echo `date`') print '' print '\nTotal size before transcoding:', float(totalSizeBefore)/(1024.0**3), 'GB' print 'Total size after transcoding:', float(totalSizeAfter)/(1024.0**3), 'GB\n'
def test_populate_tracks(self): mi = MediaInfo(self.xml_data) self.assertEqual(3, len(mi.tracks))
def setUp(self): self.mi_audio = MediaInfo.parse(os.path.join(data_dir, "sample.mp4")) self.mi_text = MediaInfo.parse(os.path.join(data_dir, "sample.mkv")) self.mi_image = MediaInfo.parse(os.path.join(data_dir, "empty.gif")) with open(os.path.join(data_dir, "other_track.xml")) as f: self.mi_other = MediaInfo(f.read())
def setUp(self): with open(os.path.join(data_dir, "sample.xml"), "r") as f: self.xml_data = f.read() self.media_info = MediaInfo(self.xml_data)
def test_valid_video_track(self): mi = MediaInfo(self.xml_data) for track in mi.tracks: if track.track_type == 'Video': self.assertEqual('AVC', track.codec) break