def extract(self, icon): for index, header in enumerate(icon.array("icon_header")): image = Metadata(self) # Read size and colors from header image.width = header["width"].value image.height = header["height"].value bpp = header["bpp"].value nb_colors = header["nb_color"].value if nb_colors != 0: image.nb_colors = nb_colors if bpp == 0 and nb_colors in self.color_to_bpp: bpp = self.color_to_bpp[nb_colors] elif bpp == 0: bpp = 8 image.bits_per_pixel = bpp image.setHeader(_("Icon #%u (%sx%s)") % (1 + index, image.get("width", "?"), image.get("height", "?"))) # Read compression from data (if available) key = "icon_data[%u]/header/codec" % index if key in icon: image.compression = icon[key].display key = "icon_data[%u]/pixels" % index if key in icon: computeComprRate(image, icon[key].size) # Store new image self.addGroup("image[%u]" % index, image)
def extractAVI(self, headers, **kwargs): audio_index = 1 for stream in headers.array("stream"): if "stream_hdr/stream_type" not in stream: continue stream_type = stream["stream_hdr/stream_type"].value if stream_type == "vids": if "stream_hdr" in stream: meta = Metadata(self) self.extractAVIVideo(stream["stream_hdr"], meta) self.addGroup("video", meta, "Video stream") elif stream_type == "auds": if "stream_fmt" in stream: meta = Metadata(self) self.extractAVIAudio(stream["stream_fmt"], meta) self.addGroup("audio[%u]" % audio_index, meta, "Audio stream") audio_index += 1 if "avi_hdr" in headers: self.useAviHeader(headers["avi_hdr"]) # Compute global bit rate if self.has("duration") and "/movie/size" in headers: self.bit_rate = float( headers["/movie/size"].value) * 8 / timedelta2seconds(self.get('duration')) # Video has index? scan_index = (True, kwargs['scan_index'])['scan_index' in kwargs] if scan_index and "/index" in headers: self.comment = _("Has audio/video index (%s)") \ % humanFilesize(headers["/index"].size // 8)
def processSubtitle(self, track): sub = Metadata(self) self.trackCommon(track, sub) try: sub.compression = track["CodecID/string"].value except MissingField: pass self.addGroup("subtitle[]", sub, "Subtitle")
def processVideo(self, track): video = Metadata(self) self.trackCommon(track, video) try: video.compression = track["CodecID/string"].value if "Video" in track: video.width = track["Video/PixelWidth/unsigned"].value video.height = track["Video/PixelHeight/unsigned"].value except MissingField: pass self.addGroup("video[]", video, "Video stream")
def processAudio(self, track): audio = Metadata(self) self.trackCommon(track, audio) if "Audio" in track: frequency = self.getDouble(track, "Audio/SamplingFrequency") if frequency is not None: audio.sample_rate = frequency if "Audio/Channels/unsigned" in track: audio.nb_channel = track["Audio/Channels/unsigned"].value if "Audio/BitDepth/unsigned" in track: audio.bits_per_sample = track["Audio/BitDepth/unsigned"].value if "CodecID/string" in track: audio.compression = track["CodecID/string"].value self.addGroup("audio[]", audio, "Audio stream")
def extract(self, tar): max_nb = maxNbFile(self) for index, field in enumerate(tar.array("file")): if max_nb is not None and max_nb <= index: self.warning("TAR archive contains many files, " "but only first %s files are processed" % max_nb) break meta = Metadata(self) self.extractFile(field, meta) if meta.has("filename"): title = 'File "%s"' % meta.getText('filename') else: title = "File" self.addGroup(field.name, meta, title)
def processFile(self, field): meta = Metadata(self) meta.filename = field["filename"].value meta.creation_date = field["last_mod"].value meta.compression = field["compression"].display if "data_desc" in field: meta.file_size = field["data_desc/file_uncompressed_size"].value if field["data_desc/file_compressed_size"].value: meta.compr_size = field["data_desc/file_compressed_size"].value else: meta.file_size = field["uncompressed_size"].value if field["compressed_size"].value: meta.compr_size = field["compressed_size"].value computeCompressionRate(meta) self.addGroup(field.name, meta, "File \"%s\"" % meta.get('filename'))
def extract(self, tar): max_nb = maxNbFile(self) for index, field in enumerate(tar.array("file")): if max_nb is not None and max_nb <= index: self.warning("TAR archive contains many files, " "but only first %s files are processed" % max_nb) break meta = Metadata(self) self.extractFile(field, meta) if meta.has("filename"): title = _('File "%s"') % meta.getText('filename') else: title = _("File") self.addGroup(field.name, meta, title)
def extract(self, mar): self.comment = "Contains %s files" % mar["nb_file"].value self.format_version = "Microsoft Archive version %s"\ % mar["version"].value max_nb = maxNbFile(self) for index, field in enumerate(mar.array("file")): if max_nb is not None and max_nb <= index: self.warning("MAR archive contains many files, " "but only first %s files are processed" % max_nb) break meta = Metadata(self) meta.filename = field["filename"].value meta.compression = "None" meta.file_size = field["filesize"].value self.addGroup(field.name, meta, "File \"%s\"" % meta.getText('filename'))
def traverse_dir(self, p_dir_entry, p_cur_path, p_list): l_loc = p_dir_entry["extent_loc"].value * SECTOR_SIZE l_len = p_dir_entry["size"].value l_read = 0 if self.DEBUG is True: print(80 * "*") for l_index, l_field in enumerate(p_list): print("[%d] 0x%0.8x" % (l_index, l_field.absolute_address // 8)) print(80 * "*") while l_read < l_len: l_entry = self.find_entry(l_loc, p_list) if l_entry is not None: if self.DEBUG: for l_field in l_entry: print("%#x:%s=%s" % (l_field.absolute_address // 8, l_field.name, l_field.display)) l_new_len = l_entry["rec_length"].value l_read += l_new_len if l_entry["name_length"].value > 1: l_filename = self.get_filename(l_entry) if l_entry["file_flags"].value & 2: if self.DEBUG: print("entering directory %s" % l_filename) self.traverse_dir( l_entry, "%s%s%s" % (p_cur_path, l_filename, sep), p_list) if self.DEBUG: print("leaving directory %s" % l_filename) else: (acc_time, crea_time, mod_time) = self.get_dates(l_entry) meta = Metadata(self) meta.filename = "%s%s" % (p_cur_path, l_filename) meta.last_modification = mod_time meta.creation_date = crea_time meta.file_size = l_entry["size"].value self.addGroup("file[]", meta, "File \"%s\"" % meta.get('filename')) if self.DEBUG: print("adding file[] %s" % meta.get('filename')) l_loc = l_loc + l_new_len else: l_node_sec, l_node_rest = divmod(l_loc, SECTOR_SIZE) if self.DEBUG: print( "no entry found at %#x, skipping %d bytes to sector boundary" % (l_loc, SECTOR_SIZE - l_node_rest)) l_loc = (l_node_sec + 1) * SECTOR_SIZE l_read += (SECTOR_SIZE - l_node_rest)
def extract(self, flv): if "video[0]" in flv: meta = Metadata(self) self.extractVideo(flv["video[0]"], meta) self.addGroup("video", meta, "Video stream") if "audio[0]" in flv: meta = Metadata(self) self.extractAudio(flv["audio[0]"], meta) self.addGroup("audio", meta, "Audio stream") # TODO: Computer duration # One technic: use last video/audio chunk and use timestamp # But this is very slow self.format_version = flv.description if "metadata/entry[1]" in flv: self.extractAMF(flv["metadata/entry[1]"]) if self.has('duration'): self.bit_rate = flv.size / timedelta2seconds(self.get('duration'))
def useFile(self, field): meta = Metadata(self) meta.filename = field["filename"].value meta.file_size = field["filesize"].value meta.creation_date = field["timestamp"].value attr = field["attributes"].value if attr != "(none)": meta.file_attr = attr if meta.has("filename"): title = "File \"%s\"" % meta.getText('filename') else: title = "File" self.addGroup(field.name, meta, title)
def useStreamProp(self, stream, index): meta = Metadata(self) meta.comment = "Start: %s" % stream["stream_start"].value if getValue(stream, "mime_type") == "logical-fileinfo": for prop in stream.array("file_info/prop"): self.useFileInfoProp(prop) else: meta.bit_rate = stream["avg_bit_rate"].value meta.duration = timedelta(milliseconds=stream["duration"].value) meta.mime_type = getValue(stream, "mime_type") meta.title = getValue(stream, "desc") self.addGroup("stream[%u]" % index, meta, "Stream #%u" % (1 + index))
def extract(self, ogg): granule_quotient = None for index, page in enumerate(ogg.array("page")): if "segments" not in page: continue page = page["segments"] if "vorbis_hdr" in page: meta = Metadata(self) self.vorbisHeader(page["vorbis_hdr"], meta) self.addGroup("audio[]", meta, "Audio") if not granule_quotient and meta.has("sample_rate"): granule_quotient = meta.get('sample_rate') if "theora_hdr" in page: meta = Metadata(self) self.theoraHeader(page["theora_hdr"], meta) self.addGroup("video[]", meta, "Video") if "video_hdr" in page: meta = Metadata(self) self.videoHeader(page["video_hdr"], meta) self.addGroup("video[]", meta, "Video") if not granule_quotient and meta.has("frame_rate"): granule_quotient = meta.get('frame_rate') if "comment" in page: readVorbisComment(self, page["comment"]) if 3 <= index: # Only process pages 0..3 break # Compute duration if granule_quotient and QUALITY_NORMAL <= self.quality: page = ogg.createLastPage() if page and "abs_granule_pos" in page: try: self.duration = timedelta( seconds=float(page["abs_granule_pos"].value) / granule_quotient) except OverflowError: pass
def useFile(self, field): meta = Metadata(self) meta.filename = field["filename"].value meta.file_size = field["filesize"].value meta.creation_date = field["timestamp"].value attr = field["attributes"].value if attr != "(none)": meta.file_attr = attr if meta.has("filename"): title = _("File \"%s\"") % meta.getText('filename') else: title = _("File") self.addGroup(field.name, meta, title)
def extract(self, mar): self.comment = "Contains %s files" % mar["nb_file"].value self.format_version = ("Microsoft Archive version %s" % mar["version"].value) max_nb = maxNbFile(self) for index, field in enumerate(mar.array("file")): if max_nb is not None and max_nb <= index: self.warning("MAR archive contains many files, " "but only first %s files are processed" % max_nb) break meta = Metadata(self) meta.filename = field["filename"].value meta.compression = "None" meta.file_size = field["filesize"].value self.addGroup(field.name, meta, "File \"%s\"" % meta.getText('filename'))
def extract(self, ogg): granule_quotient = None for index, page in enumerate(ogg.array("page")): if "segments" not in page: continue page = page["segments"] if "vorbis_hdr" in page: meta = Metadata(self) self.vorbisHeader(page["vorbis_hdr"], meta) self.addGroup("audio[]", meta, "Audio") if not granule_quotient and meta.has("sample_rate"): granule_quotient = meta.get('sample_rate') if "theora_hdr" in page: meta = Metadata(self) self.theoraHeader(page["theora_hdr"], meta) self.addGroup("video[]", meta, "Video") if "video_hdr" in page: meta = Metadata(self) self.videoHeader(page["video_hdr"], meta) self.addGroup("video[]", meta, "Video") if not granule_quotient and meta.has("frame_rate"): granule_quotient = meta.get('frame_rate') if "comment" in page: readVorbisComment(self, page["comment"]) if 3 <= index: # Only process pages 0..3 break # Compute duration if granule_quotient and QUALITY_NORMAL <= self.quality: page = ogg.createLastPage() if page and "abs_granule_pos" in page: try: self.duration = timedelta(seconds=float(page["abs_granule_pos"].value) / granule_quotient) except OverflowError: pass
def extract(self, rar): l_max_nb = maxNbFile(self) l_rarformat = rar["signature"].value if l_rarformat == b"RE~^": l_format_version = "1.4" elif l_rarformat[0:6] == b"Rar!\x1A\x07": if l_rarformat[6:7] == b"\x00": l_format_version = "1.5" # RAR 4 elif l_rarformat[6:7] == b"\x01": l_format_version = "5.0" elif l_rarformat[6:7] == b"\x02": l_format_version = "> 5.0" self.format_version = "RAR version %s" % l_format_version if l_format_version != "1.5": self.warning("RAR TODO: unknown format_version \"%s\" " % l_format_version) l_has_recovery_record = False l_has_auth_verification = False l_has_password = False l_is_multivolume = False l_is_solid = False if rar["/archive_start/flags/has_comment"].value: self.warning("RAR TODO: comment extraction not implemented") self.comment = "HACHOIR: comment extraction not implemented" l_has_recovery_record = rar[ "/archive_start/flags/has_recovery_record"].value l_has_auth_verification = rar[ "/archive_start/flags/has_auth_information"].value l_has_password = rar["/archive_start/flags/is_locked"].value l_is_multivolume = rar["/archive_start/flags/vol"].value l_is_solid = rar["/archive_start/flags/is_solid"].value is_first_vol = rar["/archive_start/flags/is_first_vol"].value for l_index, l_field in enumerate(rar.array("new_sub_block")): if l_field["filename"].value == "CMT": self.warning("RAR TODO: comment unpacking not implemented") self.comment = "HACHOIR: comment unpacking not implemented" elif l_field["filename"].value == "AV": l_has_auth_verification = True elif l_field["filename"].value == "RR": l_has_recovery_record = True else: self.warning("RAR TODO: unknown sub_block \"%s\" " % l_field["filename"].value) self.has_recovery_record = l_has_recovery_record self.has_auth_verification = l_has_auth_verification self.has_password = l_has_password self.is_multivolume = l_is_multivolume self.is_solid = l_is_solid self.is_first_vol = is_first_vol for l_index, l_field in enumerate(rar.array("file")): if l_max_nb is not None and l_max_nb <= l_index: self.warning( "RAR archive contains many files, but only first %s files are processed" % l_max_nb) break l_meta = Metadata(self) l_meta.filename = l_field["filename"].value l_meta.last_modification = l_field["ftime"].value l_meta.os = l_field["host_os"].display l_meta.application_version = l_field["version"].display l_meta.compression = l_field["method"].display l_meta.file_size = l_field["uncompressed_size"].value l_meta.compr_size = l_field["compressed_size"].value self.addGroup(l_field.name, l_meta, "File \"%s\"" % l_meta.get('filename'))
def extract(self, icon): for index, header in enumerate(icon.array("icon_header")): image = Metadata(self) # Read size and colors from header image.width = header["width"].value image.height = header["height"].value bpp = header["bpp"].value nb_colors = header["nb_color"].value if nb_colors != 0: image.nb_colors = nb_colors if bpp == 0 and nb_colors in self.color_to_bpp: bpp = self.color_to_bpp[nb_colors] elif bpp == 0: bpp = 8 image.bits_per_pixel = bpp image.setHeader( "Icon #%u (%sx%s)" % (1 + index, image.get("width", "?"), image.get("height", "?"))) # Read compression from data (if available) key = "icon_data[%u]/header/codec" % index if key in icon: image.compression = icon[key].display key = "icon_data[%u]/pixels" % index if key in icon: computeComprRate(image, icon[key].size) # Store new image self.addGroup("image[%u]" % index, image)
def processHeader(self, header): compression = [] is_vbr = None if "ext_desc/content" in header: # Extract all data from ext_desc data = {} for desc in header.array("ext_desc/content/descriptor"): self.useExtDescItem(desc, data) # Have ToolName and ToolVersion? If yes, group them to producer key if "ToolName" in data and "ToolVersion" in data: self.producer = "%s (version %s)" % (data["ToolName"], data["ToolVersion"]) del data["ToolName"] del data["ToolVersion"] # "IsVBR" key if "IsVBR" in data: is_vbr = (data["IsVBR"] == 1) del data["IsVBR"] # Store data for key, value in data.items(): if key in self.EXT_DESC_TO_ATTR: key = self.EXT_DESC_TO_ATTR[key] else: if isinstance(key, str): key = makePrintable(key, "ISO-8859-1") value = "%s=%s" % (key, value) key = "comment" setattr(self, key, value) if "file_prop/content" in header: self.useFileProp(header["file_prop/content"], is_vbr) if "codec_list/content" in header: for codec in header.array("codec_list/content/codec"): if "name" in codec: text = codec["name"].value if "desc" in codec and codec["desc"].value: text = "%s (%s)" % (text, codec["desc"].value) compression.append(text) audio_index = 1 video_index = 1 for index, stream_prop in enumerate(header.array("stream_prop")): if "content/audio_header" in stream_prop: meta = Metadata(self) self.streamProperty(header, index, meta) self.streamAudioHeader(stream_prop["content/audio_header"], meta) if self.addGroup("audio[%u]" % audio_index, meta, "Audio stream #%u" % audio_index): audio_index += 1 elif "content/video_header" in stream_prop: meta = Metadata(self) self.streamProperty(header, index, meta) self.streamVideoHeader(stream_prop["content/video_header"], meta) if self.addGroup("video[%u]" % video_index, meta, "Video stream #%u" % video_index): video_index += 1 if "metadata/content" in header: info = header["metadata/content"] try: self.title = info["title"].value self.author = info["author"].value self.copyright = info["copyright"].value except MissingField: pass