def onMatch(self, field): if not self.display: return text = [] if self.display_percent or self.display_address: addr = field.absolute_address if self.display_filename: filename = makePrintable(self.filename, self.term_charset) text.append(filename) if self.display_address: if (addr % 8) == 0: text.append(str(addr // 8)) else: text.append("%u.%u" % (addr // 8, addr % 8)) if self.display_path: text.append(field.path) if self.display_value: value = field.value value = makePrintable(value, self.term_charset) text.append(value) if not text: return text = ":".join(text) if self.display_percent: percent = float(addr) * 100 / field.parent.root.size sys.stdout.flush() sys.stderr.write("[%02.1f%%] " % percent) sys.stderr.flush() print(text)
def unicodeFilename(filename, charset=None): if not charset: charset = getTerminalCharset() try: return unicode(filename, charset) except UnicodeDecodeError: return makePrintable(filename, charset, to_unicode=True)
def createFields(self): addr = self.absolute_address len = self.stream.searchBytesLength(b':', False, addr, addr + (MAX_STRING_LENGTH + 1) * 8) if len is None: raise ParserError("Torrent: unable to find string separator (':')") if not len: raise ParserError("Torrent: error: no string length!") val = String(self, "length", len, "String length") yield val try: len = int(val.value) except ValueError: len = -1 if len < 0: raise ParserError("Invalid string length (%s)" % makePrintable(val.value, "ASCII")) yield String(self, "separator", 1, "String length/value separator") if not len: self.info("Empty string: len=%i" % len) return if len < 512: yield String(self, "value", len, "String value", charset="ISO-8859-1") else: # Probably raw data yield RawBytes(self, "value", len, "Raw data")
def __init__(self, parent, name, length, description=None, parser=None, filename=None, mime_type=None, parser_class=None): if filename: if not isinstance(filename, unicode): filename = makePrintable(filename, "ISO-8859-1") if not description: description = 'File "%s" (%s)' % (filename, humanFilesize(length)) Bytes.__init__(self, parent, name, length, description) def createInputStream(cis, **args): tags = args.setdefault("tags", []) if parser_class: tags.append(("class", parser_class)) if parser is not None: tags.append(("id", parser.PARSER_TAGS["id"])) if mime_type: tags.append(("mime", mime_type)) if filename: tags.append(("filename", filename)) return cis(**args) self.setSubIStream(createInputStream)
def getFieldType(self): info = self.charset if self._strip: if isinstance(self._strip, (str, unicode)): info += ",strip=%s" % makePrintable(self._strip, "ASCII", quote="'") else: info += ",strip=True" return "%s<%s>" % (Bytes.getFieldType(self), info)
def getFieldType(self): info = self.charset if self._strip: if isinstance(self._strip, str): info += ",strip=%s" % makePrintable( self._strip, "ASCII", quote="'") else: info += ",strip=True" return "%s<%s>" % (Bytes.getFieldType(self), info)
def createDisplay(self, human=True): if not human: if self._raw_value is None: self._raw_value = GenericString.createValue(self, False) value = makePrintable(self._raw_value, "ASCII") elif self._charset: value = makePrintable(self.value, "ISO-8859-1") else: value = self.value if config.max_string_length < len(value): # Truncate string if needed value = "%s(...)" % value[:config.max_string_length] if not self._charset or not human: return makePrintable(value, "ASCII", quote='"') else: if value: return '"%s"' % value.replace('"', '\\"') else: return "(empty)"
def createDisplay(self, human=True): if not human: if self._raw_value is None: self._raw_value = GenericString.createValue(self, False) value = makePrintable(self._raw_value, "ASCII", to_unicode=True) elif self._charset: value = makePrintable(self.value, "ISO-8859-1", to_unicode=True) else: value = self.value if config.max_string_length < len(value): # Truncate string if needed value = "%s(...)" % value[:config.max_string_length] if not self._charset or not human: return makePrintable(value, "ASCII", quote='"', to_unicode=True) else: if value: return '"%s"' % value.replace('"', '\\"') else: return _("(empty)")
def __init__(self, parent, name, length, decompressor, description=None, parser=None, filename=None, mime_type=None, parser_class=None): if filename: if not isinstance(filename, unicode): filename = makePrintable(filename, "ISO-8859-1") if not description: description = 'File "%s" (%s)' % ( filename, humanFilesize(length)) Bytes.__init__(self, parent, name, length, description) self.setupInputStream(decompressor, parser, filename, mime_type, parser_class)
def _createDisplay(self, human): max_bytes = config.max_byte_length if isinstance(self._getValue, types.FunctionType): display = makePrintable(self.value[:max_bytes], "ASCII") else: if self._display is None: address = self.absolute_address length = min(self._size // 8, max_bytes) self._display = self._parent.stream.readBytes(address, length) display = makePrintable(self._display, "ASCII") truncated = (8 * len(display) < self._size) if human: if truncated: display += "(...)" return makePrintable(display, "latin-1", quote='"') else: if truncated: return '"%s(...)"' % display else: return '"%s"' % display
def _getDescription(self): if self._description is None: try: self._description = self.createDescription() if isinstance(self._description, str): self._description = makePrintable( self._description, "ISO-8859-1", to_unicode=True) except Exception as err: self.error("Error getting description: " + unicode(err)) self._description = "" return self._description
def _getDescription(self): if self._description is None: try: self._description = self.createDescription() if isinstance(self._description, str): self._description = makePrintable( self._description, "ISO-8859-1") except Exception as err: self.error("Error getting description: " + str(err)) self._description = "" return self._description
def _createDisplay(self, human): max_bytes = config.max_byte_length if isinstance(self._getValue, types.FunctionType): display = makePrintable(self.value[:max_bytes], "ASCII") else: if self._display is None: address = self.absolute_address length = min(self._size // 8, max_bytes) self._display = self._parent.stream.readBytes(address, length) display = makePrintable(self._display, "ASCII") truncated = (8 * len(display) < self._size) if human: if truncated: display += "(...)" return makePrintable(display, "latin-1", quote='"', to_unicode=True) else: display = str2hex(display, format=r"\x%02x") if truncated: return '"%s(...)"' % display else: return '"%s"' % display
def getFilename(self): name = self["name"].value if isinstance(name, str): name = makePrintable(name, "ASCII") ext = self["ext"].value if ext: name += "." + ext if name[0] == 5: name = "\xE5" + name[1:] if not self.LFN and self["directory"].value: name += "/" return name
def processFile(self, filename): print("[%s] Process file %s..." % (self.total, filename)) parser = createParser(filename) if not parser: print("Unable to parse file", file=stderr) return None try: metadata = extractMetadata(parser) except Exception as err: print("Metadata extraction error: %s" % str(err), file=stderr) return None if not metadata: print("Unable to extract metadata", file=stderr) return None filename = makePrintable(filename, self.charset) line = [filename] for field in self.fields: value = metadata.getText(field, '') value = makePrintable(value, self.charset) line.append(value) return '; '.join(line)
def _createDisplay(self, human): max_bytes = config.max_byte_length try: display = makePrintable(self.value[:max_bytes], "ASCII") except Exception: if self._display is None: address = self.absolute_address length = min(self._size // 8, max_bytes) self._display = self._parent.stream.readBytes(address, length) display = makePrintable(self._display, "ASCII") truncated = (8 * len(display) < self._size) if human: if truncated: display += "(...)" return makePrintable(display, "latin-1", quote='"', to_unicode=True) else: display = str2hex(display, format=r"\x%02x") if truncated: return '"%s(...)"' % display else: return '"%s"' % display
def __str__(self): r""" Create a multi-line ASCII string (end of line is "\n") which represents all datas. >>> a = RootMetadata() >>> a.author = "haypo" >>> a.copyright = unicode("© Hachoir", "UTF-8") >>> print a Metadata: - Author: haypo - Copyright: \xa9 Hachoir @see __unicode__() and exportPlaintext() """ text = self.exportPlaintext() return "\n".join(makePrintable(line, "ASCII") for line in text)
def description(self): """str: Informal description of this field. Cached. The description of a field may provide a general summary of its usage or for field sets it can be used to give a short indication of the contents without having to expand the node. """ if self._description is None: try: self._description = self.createDescription() if isinstance(self._description, str): self._description = makePrintable(self._description, "ISO-8859-1") except Exception as err: self.error("Error getting description: " + str(err)) self._description = "" return self._description
def __init__(self, parent, name, length, decompressor, description=None, parser=None, filename=None, mime_type=None, parser_class=None): if filename: if not isinstance(filename, str): filename = makePrintable(filename, "ISO-8859-1") if not description: description = 'File "%s" (%s)' % (filename, humanFilesize(length)) Bytes.__init__(self, parent, name, length, description) self.setupInputStream(decompressor, parser, filename, mime_type, parser_class)
def processID3v2(self, field): # Read value if "content" not in field: return content = field["content"] if "text" not in content: return if "title" in content and content["title"].value: value = "%s: %s" % (content["title"].value, content["text"].value) else: value = content["text"].value # Known tag? tag = field["tag"].value if tag not in self.TAG_TO_KEY: if tag: if isinstance(tag, str): tag = makePrintable(tag, "ISO-8859-1") self.warning("Skip ID3v2 tag %s: %s" % (tag, value)) return key = self.TAG_TO_KEY[tag] setattr(self, key, value)
def processID3v2(self, field): # Read value if "content" not in field: return content = field["content"] if "text" not in content: return if "title" in content and content["title"].value: value = "%s: %s" % (content["title"].value, content["text"].value) else: value = content["text"].value # Known tag? tag = field["tag"].value if tag not in self.TAG_TO_KEY: if tag: if isinstance(tag, str): tag = makePrintable(tag, "ISO-8859-1", to_unicode=True) self.warning("Skip ID3v2 tag %s: %s" % (tag, value)) return key = self.TAG_TO_KEY[tag] setattr(self, key, value)
def __repr__(self, **kw): regex = self.__str__(**kw) regex = makePrintable(regex, 'ASCII') return "<%s '%s'>" % (self.__class__.__name__, regex)
def createDisplay(self): return makePrintable(self.value, "UTF-8", to_unicode=True, quote='"')
def __str__(self): return makePrintable(self.text, 'ASCII')
def processFile(values, filename, display_filename=False, priority=None, human=True, display=True): charset = getTerminalCharset() # Create parser try: if values.force_parser: tags = [("id", values.force_parser), None] else: tags = None parser = createParser(filename, tags=tags) except InputStreamError as err: error(str(err)) return False if not parser: error("Unable to parse file: %s" % filename) return False with parser: # Extract metadata extract_metadata = not (values.mime or values.type) if extract_metadata: try: metadata = extractMetadata(parser, values.quality) except Exception as err: error(str(err)) metadata = None if not metadata: parser.error( "Hachoir can't extract metadata, but is able to parse: %s" % filename) return False else: if values.type: result = parser.description else: result = parser.mime_type if display: # Display metadatas on stdout if extract_metadata: text = metadata.exportPlaintext(priority=priority, human=human) if not text: text = ["(no metadata, priority may be too small)"] if display_filename: for line in text: line = "%s: %s" % (filename, line) print(makePrintable(line, charset)) else: for line in text: print(makePrintable(line, charset)) else: text = result if display_filename: text = "%s: %s" % (filename, text) print(text) return True
def processFile(values, filename, display_filename=False, priority=None, human=True, display=True): charset = getTerminalCharset() # Create parser try: if values.force_parser: tags = [("id", values.force_parser), None] else: tags = None parser = createParser(filename, tags=tags) except InputStreamError as err: error(str(err)) return False if not parser: error("Unable to parse file: %s" % filename) return False with parser: # Extract metadata extract_metadata = not (values.mime or values.type) if extract_metadata: try: metadata = extractMetadata(parser, values.quality) except Exception as err: error(str(err)) metadata = None if not metadata: parser.error( "Hachoir can't extract metadata, but is able to parse: %s" % filename) return False else: if values.type: result = parser.description else: result = parser.mime_type if hasattr(config, 'RESULT_DICTS'): # Append a python Dictionary, to be used within pyhton if extract_metadata: dict_ = metadata.exportDictionary(priority=priority, human=human) if not dict_: dict_ = {"message": "(no metadata, priority may be too small)"} if display_filename: dict_.setdefault("file path", filename) else: if values.type: dict_ = {('type' if values.raw else 'Type'): result} else: dict_ = {('mime_type' if values.raw else 'MIME type'): result} if display_filename: dict_.setdefault("file path", filename) config.RESULT_DICTS.append(dict_) if display: # Display metadatas on stdout if extract_metadata: text = metadata.exportPlaintext(priority=priority, human=human) if not text: text = ["(no metadata, priority may be too small)"] if display_filename: for line in text: line = "%s: %s" % (filename, line) print(makePrintable(line, charset)) else: for line in text: print(makePrintable(line, charset)) else: text = result if display_filename: text = "%s: %s" % (filename, text) print(text) return True
def __str__(self): return makePrintable(str(self.regex), 'ASCII')
def createDisplay(self): return makePrintable(self.value, "ASCII", quote="'", to_unicode=True)
def createRawDisplay(self): value = self.value if isinstance(value, str): return makePrintable(value, "ASCII", to_unicode=True) else: return unicode(value)
def createDisplay(self): return makePrintable(self.value, "ASCII", quote="'")
def createDisplay(self): if self._display_pattern: return u"<padding pattern=%s>" % makePrintable(self.pattern, "ASCII", quote="'") else: return Bytes.createDisplay(self)
def createDisplay(self): if self._display_pattern: return "<padding pattern=%s>" % makePrintable( self.pattern, "ASCII", quote="'") else: return Bytes.createDisplay(self)
def update(self, node): if node.depth: text = ' ' * (3 * node.depth - 2) if node.childs: text += '- ' elif node.field.is_field_set: text += '+ ' else: text += ' ' name = node.field.name else: text = '' name = node.field.stream.source if node.field.size: if self.flags & self.use_absolute_address: address = node.field.absolute_address else: address = node.field.address display_bits = (address % 8) != 0 or (node.field.size % 8) != 0 if self.flags & self.hex_address: if display_bits: text += "%04x.%x" % (address // 8, address % 8) else: text += "%04x" % (address // 8) else: if display_bits: text += "%u.%u" % (address // 8, address % 8) else: text += "%u" % (address // 8) text += ") " + name else: text += "-> " + name smart_display = True if self.flags & self.display_value and node.field.hasValue(): if self.flags & self.human_size: display = node.field.display else: display = node.field.raw_display smart_display = False text += "= %s" % display if node.field.description and self.flags & self.display_description: description = node.field.description if not (self.flags & self.human_size): description = makePrintable(description, "ASCII") text += ": %s" % description if self.flags & self.display_size and node.field.size or self.flags & self.display_type: tmp_text = [] if self.flags & self.display_type: tmp_text.append(node.field.getFieldType()) if self.flags & self.display_size: if node.field.size % 8: tmp_text.append(humanBitSize(node.field.size)) else: size = node.field.size // 8 if not self.flags & self.human_size: tmp_text.append("%u bytes" % size) else: tmp_text.append(humanFilesize(size)) text += " (%s)" % ", ".join(tmp_text) text = makePrintable(text, self.charset, smart=smart_display) node.setText(text, self.flags)
def processHeader(self, header): compression = [] is_vbr = None if "ext_desc/content" in header: # Extract all data from ext_desc data = {} for desc in header.array("ext_desc/content/descriptor"): self.useExtDescItem(desc, data) # Have ToolName and ToolVersion? If yes, group them to producer key if "ToolName" in data and "ToolVersion" in data: self.producer = "%s (version %s)" % (data["ToolName"], data["ToolVersion"]) del data["ToolName"] del data["ToolVersion"] # "IsVBR" key if "IsVBR" in data: is_vbr = (data["IsVBR"] == 1) del data["IsVBR"] # Store data for key, value in data.iteritems(): if key in self.EXT_DESC_TO_ATTR: key = self.EXT_DESC_TO_ATTR[key] else: if isinstance(key, str): key = makePrintable(key, "ISO-8859-1", to_unicode=True) value = "%s=%s" % (key, value) key = "comment" setattr(self, key, value) if "file_prop/content" in header: self.useFileProp(header["file_prop/content"], is_vbr) if "codec_list/content" in header: for codec in header.array("codec_list/content/codec"): if "name" in codec: text = codec["name"].value if "desc" in codec and codec["desc"].value: text = "%s (%s)" % (text, codec["desc"].value) compression.append(text) audio_index = 1 video_index = 1 for index, stream_prop in enumerate(header.array("stream_prop")): if "content/audio_header" in stream_prop: meta = Metadata(self) self.streamProperty(header, index, meta) self.streamAudioHeader(stream_prop["content/audio_header"], meta) if self.addGroup("audio[%u]" % audio_index, meta, "Audio stream #%u" % audio_index): audio_index += 1 elif "content/video_header" in stream_prop: meta = Metadata(self) self.streamProperty(header, index, meta) self.streamVideoHeader(stream_prop["content/video_header"], meta) if self.addGroup("video[%u]" % video_index, meta, "Video stream #%u" % video_index): video_index += 1 if "metadata/content" in header: info = header["metadata/content"] try: self.title = info["title"].value self.author = info["author"].value self.copyright = info["copyright"].value except MissingField: pass
def processHeader(self, header): compression = [] is_vbr = None if "ext_desc/content" in header: # Extract all data from ext_desc data = {} for desc in header.array("ext_desc/content/descriptor"): self.useExtDescItem(desc, data) # Have ToolName and ToolVersion? If yes, group them to producer key if "ToolName" in data and "ToolVersion" in data: self.producer = "%s (version %s)" % (data["ToolName"], data["ToolVersion"]) del data["ToolName"] del data["ToolVersion"] # "IsVBR" key if "IsVBR" in data: is_vbr = (data["IsVBR"] == 1) del data["IsVBR"] # Store data for key, value in data.items(): if key in self.EXT_DESC_TO_ATTR: key = self.EXT_DESC_TO_ATTR[key] else: if isinstance(key, str): key = makePrintable(key, "ISO-8859-1") value = "%s=%s" % (key, value) key = "comment" setattr(self, key, value) if "file_prop/content" in header: self.useFileProp(header["file_prop/content"], is_vbr) if "codec_list/content" in header: for codec in header.array("codec_list/content/codec"): if "name" in codec: text = codec["name"].value if "desc" in codec and codec["desc"].value: text = "%s (%s)" % (text, codec["desc"].value) compression.append(text) audio_index = 1 video_index = 1 for index, stream_prop in enumerate(header.array("stream_prop")): if "content/audio_header" in stream_prop: meta = Metadata(self) self.streamProperty(header, index, meta) self.streamAudioHeader(stream_prop["content/audio_header"], meta) if self.addGroup("audio[%u]" % audio_index, meta, "Audio stream #%u" % audio_index): audio_index += 1 elif "content/video_header" in stream_prop: meta = Metadata(self) self.streamProperty(header, index, meta) self.streamVideoHeader(stream_prop["content/video_header"], meta) if self.addGroup("video[%u]" % video_index, meta, "Video stream #%u" % video_index): video_index += 1 if "metadata/content" in header: info = header["metadata/content"] try: self.title = info["title"].value self.author = info["author"].value self.copyright = info["copyright"].value except MissingField: pass
def createDisplay(self): return makePrintable(self.value, "UTF-8", quote='"')
def createRawDisplay(self): value = self.value if isinstance(value, str): return makePrintable(value, "ASCII") else: return str(value)