def extract_metadata(audio): parser = guessParser(StringInputStream(audio)) if not parser: raise ValueError("Could not parse the stream") return extractMetadata(parser)
def getFragment(self, frag): stream = frag.getSubIStream() ministream = guessParser(stream) if not ministream: warning("Unable to create the OLE2 mini stream parser!") return frag return ministream
def which_type(self, path): """ Analyzes the image provided and attempts to determine whether it is a poster or banner. :param path: full path to the image :return: BANNER, POSTER if it concluded one or the other, or None if the image was neither (or didn't exist) """ if not os.path.isfile(path): sickrage.app.log.warning("Couldn't check the type of " + str(path) + " cause it doesn't exist") return None with io.open(path, 'rb') as fh: img_metadata = extractMetadata(guessParser(StringInputStream(fh.read()))) if not img_metadata: sickrage.app.log.debug( "Unable to get metadata from " + str(path) + ", not using your existing image") return None img_ratio = float(img_metadata.get('width', 0)) / float(img_metadata.get('height', 0)) # most posters are around 0.68 width/height ratio (eg. 680/1000) if 0.55 < img_ratio < 0.8: return self.POSTER # most banners are around 5.4 width/height ratio (eg. 758/140) elif 5 < img_ratio < 6: return self.BANNER # most fanart are around 1.77777 width/height ratio (eg. 1280/720 and 1920/1080) elif 1.7 < img_ratio < 1.8: return self.FANART else: sickrage.app.log.warning("Image has size ratio of " + str(img_ratio) + ", unknown type")
def to_python(self, data): f = super(AudioField, self).to_python(data) if f is None: return None if hasattr(data, 'temporary_file_path'): file = open(data.temporary_file_path(), 'rb') else: if hasattr(data, 'read'): file = BytesIO(data.read()) else: file = BytesIO(data['content']) try: parser = guessParser(InputIOStream(file)) if not (parser.validate() and parser.mime_type == u'audio/mpeg'): raise Exception except ImportError: raise except Exception: #not an mp3 raise forms.ValidationError(self.error_messages['invalid_format']) if hasattr(f, 'seek') and callable(f.seek): f.seek(0) return f
def getOLE2Parser(ole2, path): name = path+"[0]" if name in ole2: fragment = ole2[name] else: fragment = getRootParser(ole2)[name] return guessParser(fragment.getSubIStream())
def useRoot(self, root): stream = root.getSubIStream() ministream = guessParser(stream) if not ministream: warning("Unable to create the OLE2 mini stream parser!") return self._extract(ministream, main_document=False)
def qualityFromFileMeta(filename): """ Get quality from file metadata :param filename: Filename to analyse :return: Quality prefix """ from hachoir_core.stream import StringInputStream from hachoir_parser import guessParser from hachoir_metadata import extractMetadata from hachoir_core.log import log log.use_print = False if ek(os.path.isfile, filename): base_filename = ek(os.path.basename, filename) bluray = re.search(r"blue?-?ray|hddvd|b[rd](rip|mux)", base_filename, re.I) is not None webdl = re.search(r"web.?dl|web(rip|mux|hd)", base_filename, re.I) is not None try: with ek(io.open, filename, "rb") as file: file_metadata = extractMetadata(guessParser(StringInputStream(file.read()))) if file_metadata: for metadata in chain([file_metadata], file_metadata.iterGroups()): height = metadata.get('height', None) if height and height > 1000: return ((Quality.FULLHDTV, Quality.FULLHDBLURAY)[bluray], Quality.FULLHDWEBDL)[webdl] elif height and height > 680 and height < 800: return ((Quality.HDTV, Quality.HDBLURAY)[bluray], Quality.HDWEBDL)[webdl] elif height and height < 680: return (Quality.SDTV, Quality.SDDVD)[re.search(r'dvd|b[rd]rip|blue?-?ray', base_filename, re.I) is not None] except Exception as e: sickbeard.logger.log(ex(e)) return Quality.UNKNOWN
def qualityFromFileMeta(filename): """ Get quality from file metadata :param filename: Filename to analyse :return: Quality prefix """ from hachoir_core.stream import StringInputStream from hachoir_parser import guessParser from hachoir_metadata import extractMetadata from hachoir_core import config as hachoir_config hachoir_config.quiet = True if os.path.isfile(filename): base_filename = os.path.basename(filename) bluray = re.search(r"blue?-?ray|hddvd|b[rd](rip|mux)", base_filename, re.I) is not None webdl = re.search(r"web.?dl|web(rip|mux|hd)", base_filename, re.I) is not None for byte in readFileBuffered(filename): try: file_metadata = extractMetadata(guessParser(StringInputStream(byte))) for metadata in chain([file_metadata], file_metadata.iterGroups()): height = metadata.get('height', 0) if height > 1000: return ((Quality.FULLHDTV, Quality.FULLHDBLURAY)[bluray], Quality.FULLHDWEBDL)[webdl] elif height > 680 and height < 800: return ((Quality.HDTV, Quality.HDBLURAY)[bluray], Quality.HDWEBDL)[webdl] elif height < 680: return (Quality.SDTV, Quality.SDDVD)[ re.search(r'dvd|b[rd]rip|blue?-?ray', base_filename, re.I) is not None] except: continue return Quality.UNKNOWN
def media_from_file(infile, batch, user, manual=False): """Creates an instance of correct Media class from an open file""" stream = InputIOStream(infile) parser = hachoir_parser.guessParser(stream) metadata = hachoir_metadata.extractMetadata(parser) model_class = klass_from_metadata(metadata, infile.name) if not model_class: # TODO: need to test different errors log.warn('no media found for: %s', infile.name) return None else: mediatype = model_class.mediatype() cursor = connection.cursor() cursor.execute("SELECT nextval ('gallery_mediabase_id_seq')") slugid = cursor.fetchone()[0] slug = '%s.%d' % (user.username, slugid) args = {'owner': user, 'slug': slug, 'status': 'uploaded', 'textheight' : 50, 'batch': batch} if not manual: if hasattr(model_class, 'IKOptions'): # we're some type of image object args['image'] = infile else: args['filefield'] = infile for dimension in ('width', 'height'): dimvalue = metadata.get(dimension, False) if dimvalue: args[dimension] = dimvalue if mediatype == 'video' and not infile.name.endswith('flv'): args['encode'] = True if metadata.has('creation_date'): year = metadata.get('creation_date', None) if year: year = year.year args['year'] = year instance = model_class(**args) if manual: fn = os.path.basename(infile.name) fileobj = File(infile) log.debug('manual creation of %s: %s', mediatype, fn) if hasattr(model_class, 'IKOptions'): # we're some type of image object instance.image.save(fn, fileobj) else: instance.filefield.save(fn, fileobj) instance.save() log.debug('Saved %s: %s' % (mediatype, instance.get_fname())) return instance
def get_duration(fn): # We need to provide just begining of file otherwise hachoir might try to read all file with open(fn,'rb') as f: s=StringIO(f.read(1024*64)) p=guessParser(InputIOStream(s, filename=unicode(fn), tags=[])) m=extractMetadata(p) if m: return m.getItem('duration',0) and m.getItem('duration',0).value
def from_string(self, data): from hachoir_parser import guessParser from hachoir_core.stream import StringInputStream stream = StringInputStream(data) parser = guessParser(stream) from hachoir_metadata import extractMetadata ret = extractMetadata(parser) # formated = md.exportPlaintext(line_prefix=u"") return ret
def extract_metadata(self, file): config.MAX_STR_LENGTH = float("inf") try: filename = file.name if not isinstance(filename, unicode): filename = unicodeFilename(filename) stream = InputIOStream(file, source="file:%s" % filename, tags=[], filename=filename) parser = guessParser(stream) return extractMetadata(parser) except (HachoirError, TypeError) as e: raise MetadataException(e)
def extract(self): self.nb_extract += 1 self.prefix = "" data = self.data.tostring() stream = InputIOStream(StringIO(data), filename=self.filename) # Create parser start = time() try: parser = guessParser(stream) except InputStreamError, err: parser = None
def getField(self, fieldset, main_document, name): if name not in fieldset: return None # _feedAll() is needed to make sure that we get all fragments # eg. summary[0], summary[1], ..., summary[n] fieldset._feedAll() field = fieldset[name] if main_document: stream = field.getSubIStream() field = guessParser(stream) if not field: warning("Unable to create the OLE2 parser for %s!" % name) return None return field
def metadata_for_filelike(filelike): try: filelike.seek(0) except (AttributeError, IOError): return None stream = InputIOStream(filelike, None, tags=[]) parser = guessParser(stream) if not parser: return None try: metadata = extractMetadata(parser) except HachoirError: return None return metadata._Metadata__data
def _parse_file(filename): """Extract metatata from file""" # Workaround to fix unicode path problem on different OSs if sys.platform == 'win32': f = open(filename, 'rb') else: f = File(filename) try: s = StringIO(f.read(1024 * 64)) p = guessParser(InputIOStream(s, filename=unicode(filename), tags=[])) metadata = extractMetadata(p) finally: f.close() return metadata
def _duration(self): """ File duration in sec """ if getattr(self, '_duration_cache', None): return self._duration_cache duration = extractMetadata(guessParser(\ InputIOStream(self))).get('duration') if not duration: raise Exception(u'Not an audio file') else: duration = duration.seconds self._duration_cache = duration return duration
def run(): msg = _resize = retry = 0 events = ( "window resize", ) profile_display = args.profile_display while True: for e in events: try: if e == "window resize": size = ui.get_cols_rows() resize = log.height else: e = top.keypress(size, e) if e is None: pass elif e in ('f1', '?'): try: body.select(body.tabs.index(help)) except ValueError: body.append(help) resize = log.height elif e in ('esc', 'ctrl w'): body.close() if body.box_widget is None: return resize = log.height elif e == '+': if log.height: resize = log.height - 1 elif e == '-': resize = log.height + 1 elif e == 'q': return #except AssertionError: # hachoir_log.error(getBacktrace()) except NewTab_Stream, e: stream = e.field.getSubIStream() logger.objects[stream] = e = "%u/%s" % (body.active, e.field.absolute_address) parser = guessParser(stream) if not parser: hachoir_log.error(_("No parser found for %s") % stream.source) else: logger.objects[parser] = e body.append((e, TreeBox(charset, Node(parser, None), preload_fields, None, options))) resize = log.height except NeedInput, e: input.do(*e.args) if profile_display: events = events[1:] break
def extract(self, myfile): """ """ if not self.available(): return dataIO = myfile.open("r") filename, realname = unicodeFilename(myfile.name), myfile.name source = "file:%s" % filename args = {"tags" : [], "filename" : filename} stream = InputIOStream(dataIO, source=source, **args) parser = guessParser(stream) try: metadata = extractMetadata(parser) except HachoirError, err: print "Metadata extraction error: %s" % unicode(err) metadata = None
def get_track(self, track): for url in track.urls: f, parser = None, None try: f = self.download_url(url) parser = guessParser(InputIOStream(f)) except Exception, e: LOG.exception('Unable to handle url: %s' % url) continue if parser: metadata = extractMetadata(parser) new_track = models.Track(reference_key = track.reference_key, url = url) new_track.title = metadata.get('title') new_track.artist = metadata.get('author') new_track.duration = 24 * 60 * 60 * metadata.get('duration').days + metadata.get('duration').seconds return new_track
def getMetadata(vidFile): try: vidFile.seek(0) except (AttributeError, IOError): return None stream = InputIOStream(vidFile, None, tags=[]) parser = guessParser(stream) if not parser: return None try: metadata = extractMetadata(parser) except HachoirError: return None return metadata
def attributes(self, node): attr = VMap() attr.thisown = False file = node.open() parser = guessParser(StringInputStream(file.read())) file.close() if not parser: attr["info"] = Variant("unable to read metadata") return attr try: metadata = extractMetadata(parser) for data in metadata: if not(any(data.values)): continue attr[data.key] = Variant("; ".join([str(val.value) for val in data.values])) except HachoirError, err: attr["info"] = Variant("error while reading metadata")
def useSummary(self, summary): # FIXME: Remove this hack # Problem: there is no method to get all fragments from a file summary.parent._feedAll() # --- stream = summary.getSubIStream() summary = guessParser(stream) if not summary: print "Unable to create summary parser" if "os" in summary: self.os = summary["os"].display if "section[0]" not in summary: return summary = summary["section[0]"] for property in summary.array("property_index"): self.useProperty(summary, property)
def _verify_download(self, file_name=None): """ Checks the saved file to see if it was actually valid, if not then consider the download a failure. """ # primitive verification of torrents, just make sure we didn't get a text file or something if file_name.endswith('torrent'): try: with open(file_name, 'rb') as file: mime_type = guessParser(StringInputStream(file.read()))._getMimeType() if mime_type == 'application/x-bittorrent': return True except Exception as e: sickrage.srCore.srLogger.debug("Failed to validate torrent file: {}".format(e.message)) sickrage.srCore.srLogger.debug("Result is not a valid torrent file") return False return True
def _verify_download(self, file_name=None): """ Checks the saved file to see if it was actually valid, if not then consider the download a failure. """ result = True # primitive verification of torrents, just make sure we didn't get a text file or something if GenericProvider.TORRENT == self.providerType: parser = stream = None try: stream = FileInputStream(file_name) parser = guessParser(stream) except: pass result = parser and 'application/x-bittorrent' == parser.mime_type try: stream._input.close() except: pass return result
def _verify_download(self, file_name=None): """ Checks the saved file to see if it was actually valid, if not then consider the download a failure. """ # primitive verification of torrents, just make sure we didn't get a text file or something if file_name.endswith(GenericProvider.TORRENT): try: for byte in readFileBuffered(file_name): mime_type = guessParser(StringInputStream(byte))._getMimeType() if mime_type == "application/x-bittorrent": # clean up del mime_type return True except Exception as e: sickrage.srLogger.debug("Failed to validate torrent file: {}".format(e.message)) sickrage.srLogger.debug("Result is not a valid torrent file") return False return True
def metadata_for_filelike(filelike): try: filelike.seek(0) except (AttributeError, IOError): return None stream = InputIOStream(filelike, None, tags=[]) parser = guessParser(stream) if not parser: return None try: metadata = extractMetadata(parser) except HachoirError: return None metas = {} for k,v in metadata._Metadata__data.iteritems(): if v.values: metas[v.key] = v.values[0].value return metas
def META_OLECF(s, buff): META_DICT = { } try: stream = InputIOStream(StringIO(buff)) parser = guessParser(stream) meta = extractMetadata(parser) except: return META_DICT for data in sorted(meta): if data.values: if len(data.values) == 1: META_DICT['%s' % data.key] = data.values[0].text else: values = [] for value in data.values: values.append(value.text) META_DICT['%s' % data.key] = values return META_DICT
def main(self): if len(argv) != 2: print >>stderr, "usage: %s document.swf" % argv[0] exit(1) realname = argv[1] filename = unicodeFilename(realname) parser = createParser(filename, real_filename=realname) if parser["signature"].value == "CWS": deflate_swf = parser["compressed_data"].getSubIStream() parser = guessParser(deflate_swf) if "jpg_table/data" in parser: # JPEG pictures with common header jpeg_header = parser["jpg_table/data"].value[:-2] for field in parser.array("def_bits"): jpeg_content = field["image"].value[2:] if self.verbose: print "Extract JPEG from %s" % field.path self.storeJPEG(jpeg_header + jpeg_content) # JPEG in format 2/3 for field in parser.array("def_bits_jpeg2"): self.extractFormat2(field) for field in parser.array("def_bits_jpeg3"): self.extractFormat2(field) # Extract sound #self.extractSound(parser) self.extractSound2(parser) # Does it extract anything? if self.jpg_index == 1: print "No JPEG picture found." if self.snd_index == 1: print "No sound found."
usage = "usage: %prog <file_name>" op = OptionParser(usage) (options, args) = op.parse_args() if len(args) != 1: op.print_help() sys.exit(1) inputFileName = unicode(args[0]) try: stream = FileInputStream(inputFileName) except InputStreamError, err: exit("Unable to open file: %s" % err) try: data = guessParser(stream) if not data: exit("Unable to parse file: %s" % inputFileName) for struct in data.allFeatures(): print "%08X: %s = %s" % ( (struct.address) / 8, struct.path, struct.display) return 1 for struct in data: print "%08X: %s = %s" % ( (struct.address) / 8, struct.path, struct.display) try: iter_exists = getattr(struct, "__iter__", None) except AttributeError:
def upload(request): """View that displays the upload form and processes upload form submissions.""" # Django's 'permission_required' decorator redirects to the login # form even if the user is already logged in. That sucks, so we # don't use it, and we do the permission check in the code # instead. if not request.user.has_perm('gallery.can_upload'): raise PermissionDenied if request.method == 'POST': mediatype_form = MediaTypeForm(request.POST) upload_formset = UploadFormSet(request.POST, request.FILES) if 'cancel' in request.POST: request.notifications.add(_('Upload canceled.')) return HttpResponseRedirect(reverse('bm.gallery.views.index')) if (mediatype_form.data['mediatype'] == 'video' and not request.user.has_perm('gallery.can_review')): raise PermissionDenied if mediatype_form.is_valid() and upload_formset.is_valid(): instances = [] if len([u for u in upload_formset.cleaned_data if u]) >= 10: raise PermissionDenied for cleaned_data in upload_formset.cleaned_data: if not cleaned_data: continue file_ = cleaned_data['file_'] # extract media metadata stream = InputIOStream(file_) parser = hachoir_parser.guessParser(stream) metadata = hachoir_metadata.extractMetadata(parser) # create model instance mediatype = mediatype_form.cleaned_data['mediatype'] model_class = models.mediatype_map[mediatype]['klass'] cursor = connection.cursor() cursor.execute("SELECT nextval ('gallery_mediabase_id_seq')") id_ = cursor.fetchone()[0] slug = '%s.%d' % (request.user.username, id_) model_args = {'id': id_, 'owner': request.user, 'slug': slug, 'status': 'uploaded'} if hasattr(model_class, 'IKOptions'): # we're some type of image object model_args['image'] = file_ else: model_args['filefield'] = file_ for dimension in ('width', 'height'): dimvalue = metadata.get(dimension, False) if dimvalue: model_args[dimension] = dimvalue if mediatype == 'video' and not file_.name.endswith('flv'): model_args['encode'] = True try: year = metadata.get('creation_date').year model_args['year'] = year except ValueError: # no creation date in metadata pass instance = model_class(**model_args) instances.append(instance) # we're not using Django's transaction middleware, so we # fake our own transaction behavior here. should probably # switch to using the middleware... try: for instance in instances: instance.save() except: for instance in instances: instance.delete() raise request.notifications.add(_('Resources uploaded.')) url = '%s/edit' % instance.get_absolute_url() batch_length = len(instances) if batch_length > 1: ids = [str(i.id) for i in instances] url = '%s?batch_length=%d&ids=%s' % (url, batch_length, ','.join(ids)) return HttpResponseRedirect(url) else: mediatype_form = MediaTypeForm() upload_formset = UploadFormSet() # only moderators can upload video if not request.user.has_perm('gallery.can_review'): mediatype_field = mediatype_form.fields['mediatype'] choices = mediatype_field.choices mediatype_field.choices = [choice for choice in choices if choice[0] != 'video'] return render_to_response('gallery/upload.html', {'mediatype_form': mediatype_form, 'upload_formset': upload_formset}, context_instance=RequestContext(request))
def ParseFileObject(self, parser_mediator, file_object, **kwargs): """Parses a file-like object using Hachoir. Args: parser_mediator: A parser context object (instance of ParserContext). file_object: A file-like object. Raises: UnableToParseFile: when the file cannot be parsed. """ file_name = parser_mediator.GetDisplayName() try: fstream = hachoir_core.stream.InputIOStream(file_object, None, tags=[]) except hachoir_core.error.HachoirError as exception: raise errors.UnableToParseFile( u'[{0:s}] unable to parse file {1:s}: {2:s}'.format( self.NAME, file_name, exception)) if not fstream: raise errors.UnableToParseFile( u'[{0:s}] unable to parse file {1:s}: {2:s}'.format( self.NAME, file_name, 'Not fstream')) try: doc_parser = hachoir_parser.guessParser(fstream) except hachoir_core.error.HachoirError as exception: raise errors.UnableToParseFile( u'[{0:s}] unable to parse file {1:s}: {2:s}'.format( self.NAME, file_name, exception)) if not doc_parser: raise errors.UnableToParseFile( u'[{0:s}] unable to parse file {1:s}: {2:s}'.format( self.NAME, file_name, 'Not parser')) try: metadata = hachoir_metadata.extractMetadata(doc_parser) except (AssertionError, AttributeError) as exception: raise errors.UnableToParseFile( u'[{0:s}] unable to parse file {1:s}: {2:s}'.format( self.NAME, file_name, exception)) try: metatext = metadata.exportPlaintext(human=False) except AttributeError as exception: raise errors.UnableToParseFile( u'[{0:s}] unable to parse file {1:s}: {2:s}'.format( self.NAME, file_name, exception)) if not metatext: raise errors.UnableToParseFile( u'[{0:s}] unable to parse file {1:s}: No metadata'.format( self.NAME, file_name)) attributes = {} extracted_events = [] for meta in metatext: if not meta.startswith('-'): continue if len(meta) < 3: continue key, _, value = meta[2:].partition(': ') key2, _, value2 = value.partition(': ') if key2 == 'LastPrinted' and value2 != 'False': date_object = timelib.Timestamp.FromTimeString( value2, timezone=parser_mediator.timezone) if isinstance(date_object, datetime.datetime): extracted_events.append((date_object, key2)) try: date = metadata.get(key) if isinstance(date, datetime.datetime): extracted_events.append((date, key)) except ValueError: pass if key in attributes: if isinstance(attributes.get(key), list): attributes[key].append(value) else: old_value = attributes.get(key) attributes[key] = [old_value, value] else: attributes[key] = value if not extracted_events: raise errors.UnableToParseFile( u'[{0:s}] unable to parse file {1:s}: {2:s}'.format( self.NAME, file_name, 'No events discovered')) for date, key in extracted_events: event_object = HachoirEvent(date, key, attributes) parser_mediator.ProduceEvent(event_object)
def Parse(self, file_entry): """Extract data from a file using Hachoir. Args: file_entry: A file entry object. Yields: An event object (instance of EventObject) that contains the parsed attributes. """ file_object = file_entry.GetFileObject() try: fstream = hachoir_core.stream.InputIOStream(file_object, None, tags=[]) except hachoir_core.error.HachoirError as exception: raise errors.UnableToParseFile( u'[{0:s}] unable to parse file {1:s}: {2:s}'.format( self.parser_name, file_entry.name, exception)) if not fstream: raise errors.UnableToParseFile( u'[{0:s}] unable to parse file {1:s}: {2:s}'.format( self.parser_name, file_entry.name, 'Not fstream')) try: doc_parser = hachoir_parser.guessParser(fstream) except hachoir_core.error.HachoirError as exception: raise errors.UnableToParseFile( u'[{0:s}] unable to parse file {1:s}: {2:s}'.format( self.parser_name, file_entry.name, exception)) if not doc_parser: raise errors.UnableToParseFile( u'[{0:s}] unable to parse file {1:s}: {2:s}'.format( self.parser_name, file_entry.name, 'Not parser')) try: metadata = hachoir_metadata.extractMetadata(doc_parser) except (AssertionError, AttributeError) as exception: raise errors.UnableToParseFile( u'[{0:s}] unable to parse file {1:s}: {2:s}'.format( self.parser_name, file_entry.name, exception)) try: metatext = metadata.exportPlaintext(human=False) except AttributeError as exception: raise errors.UnableToParseFile( u'[{0:s}] unable to parse file {1:s}: {2:s}'.format( self.parser_name, file_entry.name, exception)) if not metatext: raise errors.UnableToParseFile( u'[{0:s}] unable to parse file {1:s}: No metadata'.format( self.parser_name, file_entry.name)) attributes = {} extracted_events = [] for meta in metatext: if not meta.startswith('-'): continue if len(meta) < 3: continue key, _, value = meta[2:].partition(': ') key2, _, value2 = value.partition(': ') if key2 == 'LastPrinted' and value2 != 'False': date_object = timelib.StringToDatetime( value2, timezone=self._pre_obj.zone) if isinstance(date_object, datetime.datetime): extracted_events.append((date_object, key2)) try: date = metadata.get(key) if isinstance(date, datetime.datetime): extracted_events.append((date, key)) except ValueError: pass if key in attributes: if isinstance(attributes.get(key), list): attributes[key].append(value) else: old_value = attributes.get(key) attributes[key] = [old_value, value] else: attributes[key] = value if not extracted_events: raise errors.UnableToParseFile( u'[{0:s}] unable to parse file {1:s}: {2:s}'.format( self.parser_name, file_entry.name, 'No events discovered')) for date, key in extracted_events: yield HachoirEvent(date, key, attributes)