def get_creation_date(_path): """ Simple function to retrieve the creation date from the file's metdata Args: _path the full path to the file. """ # Initialise result _creation_date = None # Using the hachoir metadata library retrieve file metadata hachoir_config.quiet = True try: parser = createParser(unicodeFilename(_path), _path) if parser: metadata = extractMetadata(parser) if metadata: _creation_date = metadata.get("creation_date") except Exception: pass # Validate and use ctime if not available if not _creation_date: _ctime = os.path.getctime(_path) _creation_date = datetime.datetime.fromtimestamp(_ctime) # Return result return _creation_date
def which_type(self, path): """ Analyzes the image provided and attempts to determine whether it is a poster or banner. :param path: full path to the image :return: BANNER, POSTER if it concluded one or the other, or None if the image was neither (or didn't exist) """ if not os.path.isfile(path): sickrage.app.log.warning("Couldn't check the type of " + str(path) + " cause it doesn't exist") return None with io.open(path, 'rb') as fh: img_metadata = extractMetadata(guessParser(StringInputStream(fh.read()))) if not img_metadata: sickrage.app.log.debug( "Unable to get metadata from " + str(path) + ", not using your existing image") return None img_ratio = float(img_metadata.get('width', 0)) / float(img_metadata.get('height', 0)) # most posters are around 0.68 width/height ratio (eg. 680/1000) if 0.55 < img_ratio < 0.8: return self.POSTER # most banners are around 5.4 width/height ratio (eg. 758/140) elif 5 < img_ratio < 6: return self.BANNER # most fanart are around 1.77777 width/height ratio (eg. 1280/720 and 1920/1080) elif 1.7 < img_ratio < 1.8: return self.FANART else: sickrage.app.log.warning("Image has size ratio of " + str(img_ratio) + ", unknown type")
def qualityFromFileMeta(filename): """ Get quality from file metadata :param filename: Filename to analyse :return: Quality prefix """ from hachoir_core.stream import StringInputStream from hachoir_parser import guessParser from hachoir_metadata import extractMetadata from hachoir_core import config as hachoir_config hachoir_config.quiet = True if os.path.isfile(filename): base_filename = os.path.basename(filename) bluray = re.search(r"blue?-?ray|hddvd|b[rd](rip|mux)", base_filename, re.I) is not None webdl = re.search(r"web.?dl|web(rip|mux|hd)", base_filename, re.I) is not None for byte in readFileBuffered(filename): try: file_metadata = extractMetadata(guessParser(StringInputStream(byte))) for metadata in chain([file_metadata], file_metadata.iterGroups()): height = metadata.get('height', 0) if height > 1000: return ((Quality.FULLHDTV, Quality.FULLHDBLURAY)[bluray], Quality.FULLHDWEBDL)[webdl] elif height > 680 and height < 800: return ((Quality.HDTV, Quality.HDBLURAY)[bluray], Quality.HDWEBDL)[webdl] elif height < 680: return (Quality.SDTV, Quality.SDDVD)[ re.search(r'dvd|b[rd]rip|blue?-?ray', base_filename, re.I) is not None] except: continue return Quality.UNKNOWN
def qualityFromFileMeta(filename): """ Get quality from file metadata :param filename: Filename to analyse :return: Quality prefix """ from hachoir_core.stream import StringInputStream from hachoir_parser import guessParser from hachoir_metadata import extractMetadata from hachoir_core.log import log log.use_print = False if ek(os.path.isfile, filename): base_filename = ek(os.path.basename, filename) bluray = re.search(r"blue?-?ray|hddvd|b[rd](rip|mux)", base_filename, re.I) is not None webdl = re.search(r"web.?dl|web(rip|mux|hd)", base_filename, re.I) is not None try: with ek(io.open, filename, "rb") as file: file_metadata = extractMetadata(guessParser(StringInputStream(file.read()))) if file_metadata: for metadata in chain([file_metadata], file_metadata.iterGroups()): height = metadata.get('height', None) if height and height > 1000: return ((Quality.FULLHDTV, Quality.FULLHDBLURAY)[bluray], Quality.FULLHDWEBDL)[webdl] elif height and height > 680 and height < 800: return ((Quality.HDTV, Quality.HDBLURAY)[bluray], Quality.HDWEBDL)[webdl] elif height and height < 680: return (Quality.SDTV, Quality.SDDVD)[re.search(r'dvd|b[rd]rip|blue?-?ray', base_filename, re.I) is not None] except Exception as e: sickbeard.logger.log(ex(e)) return Quality.UNKNOWN
def classify(path,rootdir): # add an extra argument here to take the root dir :) print 'path given: ', path,' RootDir: ',rootdir foo = path.rsplit('/', 1) fname = foo[1] # defaults audio, video: artist = album = genre = 'unknown' # defaults image: latitude = longitude = 0 city = state = country = 'unknown' year = '1960' month = 'January' # here we go : filename = path filename, realname = unicodeFilename(filename), filename parser = createParser(filename, realname) if not parser: print >>stderr, "Unable to parse file" exit(1) try: metadata = extractMetadata(parser) except HachoirError, err: print "Metadata extraction error: %s" % unicode(err) metadata = None
def _extractMetadata(self): """ Extract metadata from file on client or server using hachoir-metadata. """ try: parser = createParser(unicode(self.path), str(self.path)) if parser is None: raise HachoirError extractor = extractMetadata(parser) if extractor is None: raise HachoirError self.metadata = dict() for data in sorted(extractor): if not data.values: continue key = data.description value = ', '.join([item.text for item in data.values]) self.metadata[key] = value except HachoirError: self.metadata = None
def get_meta(self, file_path): """ Get the meta information. """ self.check_extension(file_path) filename, realname = unicodeFilename(file_path), file_path parser = createParser(filename, realname) if parser is None: if file_path.lower().endswith('.mov'): return 'video/quicktime', 'null' if file_path.lower().endswith('.mpg'): return 'video/mpeg', 'null' if file_path.lower().endswith('.jpg'): return 'image/jpeg', 'null' if file_path.lower().endswith('.bup'): return 'video/dvd', 'null' if file_path.lower().endswith('.vob'): return 'video/dvd', 'null' if file_path.lower().endswith('.ifo'): return 'video/dvd', 'null' metadata = extractMetadata(parser) mime_type = parser.mime_type info = {} for data in sorted(metadata or ()): if not data.values: continue info[data.key] = [item.text for item in data.values] return mime_type, json.dumps(info)
def parse_metadata(path): try: parser = createParser(unicode(path)) except InputStreamError: return if not parser: return try: metadata = extractMetadata(parser, appsettings.INFO_QUALITY) except HachoirError: return if not metadata: return data = {} text = metadata.exportPlaintext(priority=None, human=False) for line in text: if not line.strip().startswith('-'): key = line.strip().lower().split(':')[0] value = [] else: key = line.strip().split('- ')[1].split(': ')[0] value = line.split(key)[1][2:] if key in data: if hasattr(data[key],'__iter__'): value = data[key] + [value] else: value = [data[key],value] if value: data[key] = value return data
def get_creation_date(file_path): try: parser = createParser(file_path) metadata = extractMetadata(parser, 0.5) return metadata['creation_date'].strftime('%Y-%m-%d') except: return None
def extract_metadata_from_file(filename): parser = createParser(filename) if not parser: raise ValueError("Could not parse %s" % filename) return extractMetadata(parser)
def extract_metadata(audio): parser = guessParser(StringInputStream(audio)) if not parser: raise ValueError("Could not parse the stream") return extractMetadata(parser)
def get_file_metadata(path): rdata = {} if os.path.isfile(path): try: parser = createParser(unicodeFilename(path), path) rdata["size"] = os.stat(path).st_size if parser: try: metadata = extractMetadata(parser) if metadata: rdata.update( (md.key, md.values[0].value if len(md.values) == 1 else [value.value for value in md.values] ) for md in metadata if md.values ) except HachoirError as e: logging.exception(e) except NullStreamError: rdata["size"] = 0 except BaseException as e: logging.exception(e) finally: if parser and parser.stream and parser.stream._input and not parser.stream._input.closed: parser.stream._input.close() return rdata
def get_file_date(root, file): date = "" try: filename = "{}/{}".format(root,file) filename, realname = unicodeFilename(filename), filename parser = createParser(filename, realname) if not parser: print >>stderr, "Unable to parse file {}".format(filename) try: actualstderr = sys.stderr sys.stderr = open(os.devnull,'w') metadata = extractMetadata(parser) sys.stderr = actualstderr except HachoirError, err: print "Metadata extraction error: %s" % unicode(err) metadata = None if not metadata: print "Unable to extract metadata, {}".format(filename) text = metadata.exportPlaintext() date = "" # Tracer()() for line in text: if line[0:10] == "- Creation": match = re.search('(\d+-\d+-\d+ \d+:\d+:\d+)', line) if match: date = time.strptime(match.groups()[0], '%Y-%m-%d %H:%M:%S') return date
def _guess_from_metadata(self): parse = lambda s: s.split(":") guesses = [] for filename in self.files: filename = get_filename(filename) if not isinstance(filename, unicode): filename, realname = unicodeFilename(filename), filename else: realname = filename parser = createParser(filename, realname) if parser: try: metadata = extractMetadata(parser) except HachoirError: continue for line in metadata.exportPlaintext(): entries = dict((parse(normalize(l)) for l in line if 'comment' in l or 'title' in l)) entries = dict(((k, guessit.guess_episode_info(v)) for (k, v) in entries.items())) if 'title' in entries: guesses.append(entries['title']) elif 'comment' in entries: guesses.append(entries['comment']) return guesses
def media_from_file(infile, batch, user, manual=False): """Creates an instance of correct Media class from an open file""" stream = InputIOStream(infile) parser = hachoir_parser.guessParser(stream) metadata = hachoir_metadata.extractMetadata(parser) model_class = klass_from_metadata(metadata, infile.name) if not model_class: # TODO: need to test different errors log.warn('no media found for: %s', infile.name) return None else: mediatype = model_class.mediatype() cursor = connection.cursor() cursor.execute("SELECT nextval ('gallery_mediabase_id_seq')") slugid = cursor.fetchone()[0] slug = '%s.%d' % (user.username, slugid) args = {'owner': user, 'slug': slug, 'status': 'uploaded', 'textheight' : 50, 'batch': batch} if not manual: if hasattr(model_class, 'IKOptions'): # we're some type of image object args['image'] = infile else: args['filefield'] = infile for dimension in ('width', 'height'): dimvalue = metadata.get(dimension, False) if dimvalue: args[dimension] = dimvalue if mediatype == 'video' and not infile.name.endswith('flv'): args['encode'] = True if metadata.has('creation_date'): year = metadata.get('creation_date', None) if year: year = year.year args['year'] = year instance = model_class(**args) if manual: fn = os.path.basename(infile.name) fileobj = File(infile) log.debug('manual creation of %s: %s', mediatype, fn) if hasattr(model_class, 'IKOptions'): # we're some type of image object instance.image.save(fn, fileobj) else: instance.filefield.save(fn, fileobj) instance.save() log.debug('Saved %s: %s' % (mediatype, instance.get_fname())) return instance
def googlesearch(): print "Searching google for files..." # set up browser browse = mechanize.Browser() cookiejar = cookielib.LWPCookieJar() browse.set_cookiejar(cookiejar) browse.set_handle_equiv(True) browse.set_handle_redirect(True) browse.set_handle_referer(True) browse.set_handle_robots(False) browse.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1) browse.addheaders = [ ( "User-agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1", ) ] # response = browse.open("https://www.google.com/#q=filetype: %s + %s" % (filetype, domain)) for filetype in ["doc", "docx", "ppt", "xls"]: response = browse.open("https://www.google.com") browse.select_form(nr=0) browse.form["q"] = "filetype:%s site:%s" % (filetype, domain) browse.submit() results = browse.response().read() soup = BeautifulSoup(results, "lxml") sidlist = [] namelist = [] typelist = [] metalist = [] counter = 1 for link in soup.find_all("a", href=re.compile("/url")): link = link.get("href") if link.startswith("/url?q="): link = link[len("/url?q=") :] link = link.split("." + filetype)[0] # print str(link + ".pdf") filename = "%s%s.%s" % (domain, counter, filetype) try: downfile = browse.retrieve(str(link + "." + filetype), filename)[0] filename = downfile filename, realname = unicodeFilename(filename), filename parser = createParser(filename, realname) metadata = extractMetadata(parser) text = metadata.exportPlaintext() charset = getTerminalCharset() sidlist.append(sid) typelist.append(str(filetype)) namelist.append(str(filename)) metalist.append(str(text)) counter += 1 except: pass for meta in zip(sidlist, typelist, namelist, metalist): executor.execute("INSERT INTO metadata VALUES (?,?,?,?)", meta) # for line in text: # print makePrintable(line, charset) connection.commit()
def getData(self): filename, realname = unicodeFilename(self.filename), self.filename parser = createParser(filename, realname) try: metadata = extractMetadata(parser) except HachoirError, err: print "Metadata extraction error: %s" % unicode(err) metadata = None
def get_duration(fn): # We need to provide just begining of file otherwise hachoir might try to read all file with open(fn,'rb') as f: s=StringIO(f.read(1024*64)) p=guessParser(InputIOStream(s, filename=unicode(fn), tags=[])) m=extractMetadata(p) if m: return m.getItem('duration',0) and m.getItem('duration',0).value
def extract_title(filename): filename, realname = unicodeFilename(filename), filename parser = createParser(filename, realname) metadata = extractMetadata(parser) text = metadata.exportPlaintext() return metadata.get('title')
def _sync_file(self, manager, file_path, node, upload_pool): if self._aborting: return with manager.start_task(1, '* Syncing file "%s"...' % file_path): file_name = file_path.split(os.sep)[-1].strip() with open(file_path, 'rb') as f: file_content = f.read() file_root, file_extension = os.path.splitext(file_name) if file_extension.lower() == '.heic': # SmugMug converts HEIC files to JPEG and renames them in the process renamed_file = file_root + '_' + file_extension[1:] + '.JPG' remote_file = node.get_child(renamed_file) else: remote_file = node.get_child(file_name) if remote_file: if remote_file['Format'].lower() in VIDEO_EXT: # Video files are modified by SmugMug server side, so we cannot use # the MD5 to check if the file needs a re-sync. Use the last # modification time instead. remote_time = datetime.datetime.strptime( remote_file.get('ImageMetadata')['DateTimeModified'], '%Y-%m-%dT%H:%M:%S') try: parser = guessParser(StringInputStream(file_content)) metadata = extractMetadata(parser) file_time = max( metadata.getValues('last_modification') + metadata.getValues('creation_date')) except Exception as err: print('Failed extracting metadata for file "%s".' % file_path) file_time = datetime.datetime.fromtimestamp( os.path.getmtime(file_path)) time_delta = abs(remote_time - file_time) same_file = (time_delta <= datetime.timedelta(seconds=1)) elif file_extension.lower() == '.heic': # HEIC files are recoded to JPEG's server side by SmugMug so we cannot # use MD5 to check if file needs a re-sync. Moreover, no image # metadata (e.g. time taken timestamp) is kept in SmugMug that would # allow us to tell if the file is the same. Hence, for now we just # assume HEIC files never change and we never re-upload them. same_file = True else: remote_md5 = remote_file['ArchivedMD5'] file_md5 = hashlib.md5(file_content).hexdigest() same_file = (remote_md5 == file_md5) if same_file: return # File already exists on Smugmug if self._aborting: return upload_pool.add(self._upload_media, manager, node, remote_file, file_path, file_name, file_content)
def _sync_file(self, manager, file_path, node, upload_pool): if self._aborting: return with manager.start_task(1, '* Syncing file "%s"...' % file_path): file_name = file_path.split(os.sep)[-1].strip() with open(file_path, 'rb') as f: file_content = f.read() remote_file = node.get_child(file_name) # noinspection PyBroadException try: with open(file_path + ".json", 'rb') as json_file: json_file_content: json = json.load(json_file) except: json_file_content: json = json.dumps({"Keywords": "auto"}) if remote_file: if remote_file['Format'].lower() in VIDEO_EXT: # Video files are modified by SmugMug server side, so we cannot use # the MD5 to check if the file needs a re-sync. Use the last # modification time instead. remote_time = datetime.datetime.strptime( remote_file.get('ImageMetadata')['DateTimeModified'], '%Y-%m-%dT%H:%M:%S') try: parser = guessParser(StringInputStream(file_content)) metadata = extractMetadata(parser) file_time = max( metadata.getValues('last_modification') + metadata.getValues('creation_date')) except Exception as err: print('Failed extracting metadata for file "%s".' % file_path) file_time = datetime.datetime.fromtimestamp( os.path.getmtime(file_path)) time_delta = abs(remote_time - file_time) same_file = (time_delta <= datetime.timedelta(seconds=1)) else: remote_md5 = remote_file['ArchivedMD5'] file_md5 = hashlib.md5(file_content).hexdigest() same_file = (remote_md5 == file_md5) if same_file: remote_file.patch('Image', json=json_file_content) return # File already exists on Smugmug if self._aborting: return for key in json_file_content: json_file_content["X-Smug-" + key] = json_file_content.pop(key) upload_pool.add(self._upload_media, manager, node, remote_file, file_path, file_name, file_content, json_file_content)
def from_string(self, data): from hachoir_parser import guessParser from hachoir_core.stream import StringInputStream stream = StringInputStream(data) parser = guessParser(stream) from hachoir_metadata import extractMetadata ret = extractMetadata(parser) #formated = md.exportPlaintext(line_prefix=u"") return ret
def getMetadata(filename): filename, realname = unicodeFilename(filename), filename parser = createParser(filename, realname) if not parser: print "Unable to parse file" exit(1) metadata = extractMetadata(parser) return metadata
def qualityFromFileMeta(filename): # pylint: disable=too-many-branches """ Get quality file file metadata :param filename: Filename to analyse :return: Quality prefix """ log.use_print = False try: parser = createParser(filename) except Exception: # pylint: disable=broad-except parser = None if not parser: return Quality.UNKNOWN try: metadata = extractMetadata(parser) except Exception: # pylint: disable=broad-except metadata = None try: parser.stream._input.close() # pylint: disable=protected-access except Exception: # pylint: disable=broad-except pass if not metadata: return Quality.UNKNOWN height = 0 if metadata.has('height'): height = int(metadata.get('height') or 0) else: test = getattr(metadata, "iterGroups", None) if callable(test): for metagroup in metadata.iterGroups(): if metagroup.has('height'): height = int(metagroup.get('height') or 0) if not height: return Quality.UNKNOWN base_filename = ek(path.basename, filename) bluray = re.search(r"blue?-?ray|hddvd|b[rd](rip|mux)", base_filename, re.I) is not None webdl = re.search(r"web.?dl|web(rip|mux|hd)", base_filename, re.I) is not None ret = Quality.UNKNOWN if height > 1000: ret = ((Quality.FULLHDTV, Quality.FULLHDBLURAY)[bluray], Quality.FULLHDWEBDL)[webdl] elif 680 < height < 800: ret = ((Quality.HDTV, Quality.HDBLURAY)[bluray], Quality.HDWEBDL)[webdl] elif height < 680: ret = (Quality.SDTV, Quality.SDDVD)[re.search(r'dvd|b[rd]rip|blue?-?ray', base_filename, re.I) is not None] return ret
def which_type(self, image_path): """ Analyzes the image provided and attempts to determine whether it is a poster or banner. :param image_path: full path to the image :return: BANNER, POSTER if it concluded one or the other, or None if the image was neither (or didn't exist) """ if not os.path.isfile(image_path): logger.log( u"Couldn't check the type of {image_path} cause it doesn't exist" .format(image_path=image_path), logger.WARNING) return None if try_int(os.path.getsize(image_path)) == 0: logger.log( u'Image has 0 bytes size. Deleting it: {image_path}'.format( image_path=image_path), logger.WARNING) try: os.remove(image_path) except OSError as e: logger.log( u"Could't delete file: '{image_path}'. Please manually delete it. Error: {error_msg}" .format(image_path=image_path, error_msg=e), logger.WARNING) return # use hachoir to parse the image for us img_parser = createParser(image_path) img_metadata = extractMetadata(img_parser) if not img_metadata: logger.log( u"Unable to get metadata from {image_path}, not using your existing image" .format(image_path=image_path), logger.DEBUG) return None img_ratio = float(img_metadata.get('width')) / float( img_metadata.get('height')) img_parser.stream._input.close() # most posters are around 0.68 width/height ratio (eg. 680/1000) if 0.55 < img_ratio < 0.8: return self.POSTER # most banners are around 5.4 width/height ratio (eg. 758/140) elif 5 < img_ratio < 6: return self.BANNER # most fanart are around 1.77777 width/height ratio (eg. 1280/720 and 1920/1080) elif 1.7 < img_ratio < 1.8: return self.FANART else: logger.log( u"Image has size ratio of {img_ratio}, unknown type".format( img_ratio=img_ratio), logger.WARNING) return
def get_metadata(self,fullurl): metadata = 'not' try: filename, realname = unicode(fullurl), fullurl parser = createParser(filename, realname) metadata = extractMetadata(parser) except Exception as e: print ("Error getting metadata ",e.args) return metadata
def from_string(self, data): from hachoir_parser import guessParser from hachoir_core.stream import StringInputStream stream = StringInputStream(data) parser = guessParser(stream) from hachoir_metadata import extractMetadata ret = extractMetadata(parser) # formated = md.exportPlaintext(line_prefix=u"") return ret
def getinfo(rootdir, extensions=(".avi", ".mp4" , ".mov")): if not isinstance(rootdir, unicode): rootdir = rootdir.decode(sys.getfilesystemencoding()) for dirpath, dirs, files in os.walk(rootdir): dirs.sort() # traverse directories in sorted order files.sort() for filename in files: if filename.endswith(extensions): path = os.path.join(dirpath, filename) yield path, extractMetadata(createParser(path))
def _get_hachoir_metadata(blob_path): parser = createParser(blob_path) if not parser: print "Unable to parse file" exit(1) try: metadata = extractMetadata(parser, quality=metadata_item.QUALITY_BEST) except HachoirError, err: print "Metadata extraction error: {}".format(err) metadata = None
def __get_hd_tag__(self, video): result = 104 file_metadata = extractMetadata(createParser(unicodeFilename(video))) if file_metadata.get('width') == 1280: result = 404 elif file_metadata.get('width') == 1920: result = 1604 return result
def getinfo(rootdir, extensions=(".avi", ".mp4", ".mov")): if not isinstance(rootdir, unicode): rootdir = rootdir.decode(sys.getfilesystemencoding()) for dirpath, dirs, files in os.walk(rootdir): dirs.sort() # traverse directories in sorted order files.sort() for filename in files: if filename.endswith(extensions): path = os.path.join(dirpath, filename) yield path, extractMetadata(createParser(path))
def get_video_creation_date_metadata(fname): """ Returns the "Creation date" entry from the metadata of a file The return string will have the format '- Creation date: YYYY-MM-DD HH:MM:SS' or if no metadata is found or the file is not valid or doesn't exist, an exception will be thrown :param fname: Name of file to read the metadata from :returns: creation data metadata in specified format :Example: >>> import fileops >>> print fileops.get_video_creation_date_metadata("IMG_1234.JPG") '- Creation date: 2013-09-30 15:21:42' """ # suppress errors from hachoir calls, use our own logging hachoir_core.config.quiet = True # try to access tags associated with video files using # hachoir parser try: fname, realname = hachoir_core.cmd_line.unicodeFilename( fname), fname parser = hachoir_parser.createParser(fname, realname) except: raise VideoMetadataError, "Unable to parse video file" if not parser: raise VideoMetadataError, "Unable to parse video file" try: metadata = hachoir_metadata.extractMetadata(parser) except HachoirError: raise VideoMetadataError, "Error extracting metadata " finally: # hachoir doesn't close the file associated with # the parser object, hence need to do this parser.stream._input.close() if not metadata: raise VideoMetadataError, "No metadata found" text = metadata.exportPlaintext() for line in text: printable = hachoir_core.tools.makePrintable(line, hachoir_core.i18n.getTerminalCharset()) if "Creation date" in printable: return printable raise VideoMetadataError, "No 'Creation date' found in metadata"
def extract_metadata(self, file): config.MAX_STR_LENGTH = float("inf") try: filename = file.name if not isinstance(filename, unicode): filename = unicodeFilename(filename) stream = InputIOStream(file, source="file:%s" % filename, tags=[], filename=filename) parser = guessParser(stream) return extractMetadata(parser) except (HachoirError, TypeError) as e: raise MetadataException(e)
def getData(self): filename, realname = unicodeFilename(self.filename), self.filename try: parser = createParser(filename, realname) except: return "error" try: metadata = extractMetadata(parser) except HachoirError, err: print "Metadata extraction error: %s" % unicode(err) metadata = None
def get_mov_created_date(path): if os.path.splitext(path)[1].lower() == '.mov': filename, realname = hachoir_core.cmd_line.unicodeFilename(path), path parser = hachoir_parser.createParser(filename, realname) metadata = hachoir_metadata.extractMetadata(parser) text = metadata.exportPlaintext() for line in text: m = re.search(r'Creation date:\s*(\d{4}-\d{2}-\d{2})\s+(\d{2}:\d{2}:\d{2})', line, re.I) if m: return datetime.strptime(str(m.group(1)) + ' ' + str(m.group(2)), '%Y-%m-%d %H:%M:%S') return None
def metadata_for(filename): filename, realname = unicodeFilename(filename), filename parser = createParser(filename, realname) if not parser: print "Unable to parse file" exit(1) try: metadata = extractMetadata(parser) except HachoirError, err: print "Metadata extraction error: %s" % unicode(err) metadata = None
def loadMetadata(self): """Load the metadata, either using Hachoir, ... either using mplayer""" if len(self.videoFile) != 0: filename = OP.join(self.videoPath, self.videoFile) filename, realname = unicodeFilename(filename), filename myParser = createParser(filename, realname) try: self.metadata = extractMetadata(myParser) except HachoirError, err: print "Metadata extraction error: %s" % unicode(err) self.metadata = None
def metadata_for_video(filename): filename, realname = unicodeFilename(filename), filename parser = createParser(filename, realname) if not parser: print "Unable to parse file" exit(1) try: metadata = extractMetadata(parser) except HachoirError, err: print "Metadata extraction error: %s" % unicode(err) metadata = None
def parse(self): filename, realname = unicodeFilename(self.filename), self.filename parser = hachoir_parser.createParser(filename, realname) if not parser: sys.stderr.write("Unable to parse file %s/n" % self.filename) return try: ## TODO Essa chamada da um warning quando nao ha gps data metadata = hachoir_metadata.extractMetadata(parser) except HachoirError, err: print "Metadata extraction error: %s" % unicode(err) metadata = None
def getMetadata(filename): filename, realname = unicodeFilename(filename), filename parser = createParser(filename, realname) try: metadata = extractMetadata(parser) except: return None if metadata is not None: metadata = metadata.exportPlaintext() return metadata return None
def get_video_creation_date_metadata(fname): """ Returns the "Creation date" entry from the metadata of a file The return string will have the format '- Creation date: YYYY-MM-DD HH:MM:SS' or if no metadata is found or the file is not valid or doesn't exist, an exception will be thrown :param fname: Name of file to read the metadata from :returns: creation data metadata in specified format :Example: >>> import fileops >>> print fileops.get_video_creation_date_metadata("IMG_1234.JPG") '- Creation date: 2013-09-30 15:21:42' """ # suppress errors from hachoir calls, use our own logging hachoir_core.config.quiet = True # try to access tags associated with video files using # hachoir parser try: fname, realname = hachoir_core.cmd_line.unicodeFilename(fname), fname parser = hachoir_parser.createParser(fname, realname) except: raise VideoMetadataError, "Unable to parse video file" if not parser: raise VideoMetadataError, "Unable to parse video file" try: metadata = hachoir_metadata.extractMetadata(parser) except HachoirError: raise VideoMetadataError, "Error extracting metadata " finally: # hachoir doesn't close the file associated with # the parser object, hence need to do this parser.stream._input.close() if not metadata: raise VideoMetadataError, "No metadata found" text = metadata.exportPlaintext() for line in text: printable = hachoir_core.tools.makePrintable( line, hachoir_core.i18n.getTerminalCharset()) if "Creation date" in printable: return printable raise VideoMetadataError, "No 'Creation date' found in metadata"
def processFile(self, filename): filename, realname = unicodeFilename(filename), filename print u"[%s] Process file %s..." % (self.total, filename) parser = createParser(filename, realname) if not parser: print >> stderr, "Unable to parse file" return None try: metadata = extractMetadata(parser) except HachoirError, err: print >> stderr, "Metadata extraction error: %s" % unicode(err) return None
def metadata_for(filename): print '\nprinting metadata...\n' filename, realname = unicodeFilename(filename), filename parser = createParser(filename, realname) if not parser: print("Unable to parse file") exit(1) try: metadata = extractMetadata(parser) except HachoirError, err: print("Metadata extraction error: %s" % unicode(err)) metadata = None
def main(filename="default_64.png"): filename, realname = unicodeFilename(filename), filename p = hachoir_parser.createParser(filename, realname) print "Fields: ", p._fields metadata = extractMetadata(p) print_metadata(metadata) fields = p._fields print "fields: ", fields for i in print_recursively(fields): print i
def getMetaData(self, filepath): ufilepath = hachoir_core.cmd_line.unicodeFilename(str(filepath)) parser = hachoir_parser.createParser(ufilepath, filepath) if not parser: self.output("Unable to parse file") sys.exit(1) try: metadata = hachoir_metadata.extractMetadata(parser) except HachoirError, err: self.output("Metadata extraction error: %s" % unicode(err)) sys.exit(1)
def _parse_file(filename): """Extract metatata from file""" # Workaround to fix unicode path problem on different OSs if sys.platform == 'win32': f = open(filename, 'rb') else: f = File(filename) try: s = StringIO(f.read(1024 * 64)) p = guessParser(InputIOStream(s, filename=unicode(filename), tags=[])) metadata = extractMetadata(p) finally: f.close() return metadata
def get_metadata(self, filepath): ''' Gets video metadata using hachoir_parser filepath: str absolute path to movie file On failure, can return empty dict Returns dict ''' logging.info('Gathering metada for {}.'.format(filepath)) data = { 'title': '', 'year': '', 'resolution': '', 'releasegroup': '', 'audiocodec': '', 'videocodec': '', 'source': '', 'imdbid': '', 'size': '' } titledata = PTN.parse(os.path.basename(filepath)) # this key is useless titledata.pop('excess', None) # Make sure this matches our key names if 'codec' in titledata: titledata['videocodec'] = titledata.pop('codec') if 'audio' in titledata: titledata['audiocodec'] = titledata.pop('audio') if 'quality' in titledata: titledata['source'] = titledata.pop('quality') if 'group' in titledata: titledata['releasegroup'] = titledata.pop('group') if 'resolution' in titledata: titledata['resolution'] = titledata['resolution'].upper() data.update(titledata) metadata = None try: parser = createParser(filepath) extractor = extractMetadata(parser) metadata = extractor.exportDictionary(human=False) parser.stream._input.close() data.update(metadata) except Exception, e: #noqa logging.warning('Unable to parse metadata.', exc_info=True)
def get_duration(filename): duration = None try: if HAVE_HACHOIR: filename = unicode(filename, "utf-8") parser = createParser(filename) metadata = extractMetadata(parser, quality=1.0) duration = metadata.getValues('duration')[0].total_seconds() return duration except: logging.error("error while getting duration metadata from movie (%s)", filename) logging.error(traceback.format_exc()) return None
def metadata_for_filelike(filelike): try: filelike.seek(0) except (AttributeError, IOError): return None stream = InputIOStream(filelike, None, tags=[]) parser = guessParser(stream) if not parser: return None try: metadata = extractMetadata(parser) except HachoirError: return None return metadata._Metadata__data
def get_hachoir_create_date(fname): """Get media create date using hachoir library""" global log retval = None filename, realname = unicodeFilename(fname), fname parser = createParser(filename, realname) if not parser: log.critical('Unable to parse file ' + fname) return retval try: metadata = extractMetadata(parser) except HachoirError, err: log.critical('Metadata extraction error for ' + fname + ' - ' + unicode(err)) metadata = None
def get_metadata(filename): from hachoir_core.error import HachoirError from hachoir_parser import createParser from hachoir_metadata import extractMetadata # filename, realname = unicodeFilename(filename), filename parser = createParser(filename, filename) if not parser: return "Unable to parse file" try: metadata = extractMetadata(parser) except HachoirError, err: return "Metadata extraction error: %s" % unicode(err)
def showvideo(request): if request.method =='POST': video=Video() video.title=request.POST.get('title') video.name=request.POST.get('title') video.description=request.POST.get('description') video.tags=request.POST.get('tags') video.categories=request.POST.get('categories') video.videofile=request.FILES['videofile'] filename ="%s"% (video.videofile) filename = filename.replace("videos/",'') video.save() meta_video=VideoMetadata() meta_video.meta_title_id=video.title filename = "/home/priyanka/my_projects/tv/vidgyor/media/videos/%s"%(filename) filename=filename.replace(" ","_") filename, realname = filename, filename.encode('utf-8') parser = createParser(filename, realname) metadata = extractMetadata(parser) text = metadata.exportPlaintext() for item in text: if 'Duration' in item: dur, dur_val = item.split(": ") dur=dur[2:] meta_video.duration=dur_val elif 'Creation ' in item: cre, cre_date = item.split(": ") cre=cre[2:] meta_video.created_on=cre_date meta_video.save() #lastvideo= Video.objects.last() #videofile= lastvideo.videofile #form= VideoForm(reqiuest.POST or None, request.FILES or None) #if form.is_valid(): # form.save() #context= {'videofile': videofile, # 'form': form # } return render(request,'main_page.html')# 'videos.html')#, context)
def qualityFromFileMeta(filename): from hachoir_parser import createParser from hachoir_metadata import extractMetadata try: parser = createParser(filename) except Exception: parser = None pass if not parser: return Quality.UNKNOWN try: metadata = extractMetadata(parser) except Exception: metadata = None pass try: parser.stream._input.close() except: pass if not metadata: return Quality.UNKNOWN height = 0 if metadata.has('height'): height = int(metadata.get('height') or 0) else: test = getattr(metadata, "iterGroups", None) if callable(test): for metagroup in metadata.iterGroups(): if metagroup.has('height'): height = int(metagroup.get('height') or 0) if not height: return Quality.UNKNOWN if height > 1040: return Quality.FULLHDTV elif height > 680 and height < 760: return Quality.HDTV elif height < 680: return Quality.SDTV return Quality.UNKNOWN
def which_type(self, path): """ Analyzes the image provided and attempts to determine whether it is a poster or banner. returns: BANNER, POSTER if it concluded one or the other, or None if the image was neither (or didn't exist) path: full path to the image """ if not ek.ek(os.path.isfile, path): logger.log( u"Couldn't check the type of " + str(path) + " cause it doesn't exist", logger.WARNING) return None # use hachoir to parse the image for us img_parser = createParser(path) img_metadata = extractMetadata(img_parser) if not img_metadata: logger.log( u"Unable to get metadata from " + str(path) + ", not using your existing image", logger.DEBUG) return None img_ratio = float(img_metadata.get('width')) / float( img_metadata.get('height')) img_parser.stream._input.close() # most posters are around 0.68 width/height ratio (eg. 680/1000) if 0.55 < img_ratio < 0.8: return self.POSTER # most banners are around 5.4 width/height ratio (eg. 758/140) elif 5 < img_ratio < 6: return self.BANNER # most fanart are around 1.77777 width/height ratio (eg. 1280/720 and 1920/1080) elif 1.7 < img_ratio < 1.8: return self.FANART else: logger.log( u"Image has size ratio of " + str(img_ratio) + ", unknown type", logger.WARNING) return None
def getMetaData(filename): text = "" filename, realname = hachoir_core.cmd_line.unicodeFilename( filename), filename print "filename: " + filename print "realname: " + realname parser = hachoir_parser.createParser(filename, realname) if not parser: print >> sys.stderr, "Unable to parse file" return text try: metadata = hachoir_metadata.extractMetadata(parser) except HachoirError, err: print "Metadata extraction error: %s" % unicode(err) metadata = None
def qualityFromFileMeta(filename): """ Get quality from file metadata :param filename: Filename to analyse :return: Quality prefix """ from hachoir_core.stream import StringInputStream from hachoir_parser import guessParser from hachoir_metadata import extractMetadata from hachoir_core.log import log log.use_print = False if ek(os.path.isfile, filename): base_filename = ek(os.path.basename, filename) bluray = re.search(r"blue?-?ray|hddvd|b[rd](rip|mux)", base_filename, re.I) is not None webdl = re.search(r"web.?dl|web(rip|mux|hd)", base_filename, re.I) is not None try: with ek(io.open, filename, "rb") as file: file_metadata = extractMetadata( guessParser(StringInputStream(file.read()))) if file_metadata: for metadata in chain([file_metadata], file_metadata.iterGroups()): height = metadata.get('height', None) if height and height > 1000: return ((Quality.FULLHDTV, Quality.FULLHDBLURAY)[bluray], Quality.FULLHDWEBDL)[webdl] elif height and height > 680 and height < 800: return ((Quality.HDTV, Quality.HDBLURAY)[bluray], Quality.HDWEBDL)[webdl] elif height and height < 680: return (Quality.SDTV, Quality.SDDVD)[re.search( r'dvd|b[rd]rip|blue?-?ray', base_filename, re.I) is not None] except Exception as e: sickbeard.logger.log(ex(e)) return Quality.UNKNOWN
def attributes(self, node): attr = VMap() attr.thisown = False file = node.open() parser = guessParser(StringInputStream(file.read())) file.close() if not parser: attr["info"] = Variant("unable to read metadata") return attr try: metadata = extractMetadata(parser) for data in metadata: if not(any(data.values)): continue attr[data.key] = Variant("; ".join([str(val.value) for val in data.values])) except HachoirError, err: attr["info"] = Variant("error while reading metadata")