def _get_config(archive: tarfile.TarFile) -> Tuple[Dict, Dict, File]: """ Extracts Docker image archive manifest and configuration. Returns a tuple with: - the deserialized manifest, - the deserialized configuration, - the configuration File object to scan. """ manifest_file = archive.extractfile("manifest.json") if manifest_file is None: raise InvalidDockerArchiveException("No manifest file found.") manifest = json.load(manifest_file)[0] config_file_path = manifest.get("Config") config_file_info = archive.getmember(config_file_path) if config_file_info is None: raise InvalidDockerArchiveException("No config file found.") config_file = archive.extractfile(config_file_info) if config_file is None: raise InvalidDockerArchiveException( "Config file could not be extracted.") config_file_content = config_file.read().decode() return ( manifest, json.loads(config_file_content), File(config_file_content, filename="Dockerfile or build-args"), )
def read_file_from_image(img: tarfile.TarFile, file_path: str, autoclose=False) -> bytes: if autoclose: with closing(img.extractfile(file_path)) as fd: return fd.read() else: return img.extractfile(file_path).read()
def store_song(connection, tar: tarfile.TarFile, song: SongInfo): client = boto3.client('s3') opts = {'ACL': 'public-read', 'Bucket': 'music.ponytone.online'} with connection: with connection.cursor() as cur: stuff = song._asdict() if song.parts: stuff['parts'] = psycopg2.extras.Json(song.parts) q = cur.execute( """ INSERT INTO karaoke_song (title, artist, transcriber, genre, updated, "language", "length", preview_start, song_year, is_mlk, cover_image, parts) VALUES (%(title)s, %(artist)s, %(transcriber)s, %(genre)s, %(updated)s, %(language)s, %(length)s, %(preview_start)s, %(song_year)s, %(is_mlk)s, %(cover)s, %(parts)s) RETURNING id""", stuff) id, = cur.fetchone() print(f"Inserted into DB: #{id}") dirname = os.path.dirname(song.notes) f = tar.extractfile(song.notes) client.put_object(Body=f, Key=f"{id}/notes.txt", ContentType="text/plain", **opts) f = tar.extractfile(os.path.join(dirname, song.mp3)) print("Uploaded MP3") client.put_object(Body=f, Key=f"{id}/{song.mp3}", ContentType="audio/mpeg", **opts) f = tar.extractfile(os.path.join(dirname, song.cover)) print("Uploaded cover") client.put_object(Body=f, Key=f"{id}/{song.cover}", ContentType=mimetypes.guess_type(song.cover)[0], **opts) if song.background: f = tar.extractfile(os.path.join(dirname, song.background)) client.put_object(Body=f, Key=f"{id}/{song.background}", ContentType=mimetypes.guess_type( song.background)[0], **opts) print("Uploaded background") if song.video: f = tar.extractfile(os.path.join(dirname, song.video)) client.put_object(Body=f, Key=f"{id}/{song.video}", ContentType=mimetypes.guess_type( song.video)[0], **opts) print("Uploaded video") print("Committed")
def extract_file(tar: tarfile.TarFile, name: str) -> IO[bytes]: """ Helper for getting a file handle to the database file in the tar archive. This is needed because we don't necessarily know the name of it's containing folder. :raises: TarError if the tar archive does not contain the databse file """ mmdb = next( (m for m in tar.getmembers() if m.name.endswith(name) and m.isfile()), None ) if mmdb is None: # Because we verified the checksum earlier, this should only be # possible if maxmind actually served us a bad file raise tarfile.TarError("Tar archive did not contain the database file!") f = tar.extractfile(mmdb) if f is None: raise tarfile.TarError("Tar archive did not contain the database file!") return f
def detect( cls, target_file, magic_type ): filename = os.path.basename( target_file.name ) if not filename.endswith( '.tar.gz'): return None if not magic_type.startswith( 'gzip compressed data' ): return None ( filename, _, _ ) = filename.rsplit( '.', 2 ) try: ( package, version ) = filename.rsplit( '-', 1 ) # ie: cinp-0.9.2.tar.gz except ValueError: return None gzfile = GzipFile( fileobj=target_file.file, mode='r' ) tarfile = TarFile( fileobj=gzfile, mode='r' ) try: info = tarfile.extractfile( '{0}/PKG-INFO'.format( filename ) ) except KeyError: return None tarfile.close() gzfile.close() if info is None: return None return cls( filename, package, 'all', version, 'python' )
def detect( cls, target_file, magic_type ): filename = os.path.basename( target_file.name ) if not filename.endswith( '.tar.gz'): return None if not magic_type.startswith( 'gzip compressed data' ): return None ( filename, _, _ ) = filename.rsplit( '.', 2 ) gzfile = GzipFile( fileobj=target_file.file, mode='r' ) tarfile = TarFile( fileobj=gzfile, mode='r' ) try: manifest = json.loads( tarfile.extractfile( 'MANIFEST.json' ).read() ) except ( KeyError, TypeError, json.JSONDecodeError ): return None tarfile.close() gzfile.close() if 'collection_info' not in manifest: return None try: ( namespace, name, version ) = filename.split( '-' ) except ValueError: raise ValueError( 'Unrecognized Galaxy file name Format' ) return cls( filename, '{0}-{1}'.format( namespace, name ), 'all', version, 'galaxy' )
def load_from_file(self, f): tar = TarFile(f, "r") # load info file f = tar.extractfile("info.py") self.agedesc, self.generation = eval(f.read(-1), {"__builtins__": None}) f.close() # load agents for info in tar.getmembers(): if (splitext(info.name)[1]==".agt" and info.isfile()): f = tar.extractfile(info) self.add(Agent(self.agedesc, file = f)) f.close() tar.close()
def _extract_station(stations_tar: tarfile.TarFile, tar_member: tarfile.TarInfo) -> Dict[str, DataFrame]: if not tar_member.name.endswith(".csv"): return None # Read the records from the provided station data = read_file( stations_tar.extractfile(tar_member), file_type="csv", usecols=_COLUMN_MAPPING.keys()).rename(columns=_COLUMN_MAPPING) # Fix data types noaa_station = tar_member.name.replace(".csv", "") data["noaa_station"] = noaa_station data["rainfall"] = data["rainfall"].apply(conv_dist) data["snowfall"] = data["snowfall"].apply(conv_dist) data["dew_point"] = data["dew_point"].apply(conv_temp) for temp_type in ("average", "minimum", "maximum"): col = f"{temp_type}_temperature" data[col] = data[col].apply(conv_temp) # Compute the relative humidity from the dew point and average temperature data["relative_humidity"] = data.apply( lambda x: relative_humidity(x["average_temperature"], x["dew_point"]), axis=1) return {noaa_station: data}
def get_openface(file_): tar_file = TarFile(file_) openface_data = {} d = tar_file.extractfile([ x for x in tar_file.getmembers() if x.path.endswith(".csv") ][0]).readlines() failed = set() reference = [] for i, line in enumerate(d): split_line = line.decode("utf-8").strip().split(",") if i == 0: reference = {x: split_line.index(x) for x in split_line} continue frame = int(split_line[reference["frame"]]) if not ONLY_ODD or frame % 2 == 1: confidence = float(split_line[reference["confidence"]]) success = bool(split_line[reference["success"]]) if not success or confidence < 0.98: failed.add(frame) return failed
def _filter_single_tar( in_file: tarfile.TarFile, remove_entries, ): temp_fh = tempfile.TemporaryFile() temptar = tarfile.TarFile(fileobj=temp_fh, mode='w') for tar_info in in_file: if not tar_info.isfile(): temptar.addfile(tar_info) continue if tar_info.name in remove_entries: logging.debug(f'purging entry: {tar_info.name}') continue # copy entry entry = in_file.extractfile(tar_info) temptar.addfile(tar_info, fileobj=entry) size = temp_fh.tell() temp_fh.flush() temp_fh.seek(0) return temp_fh, size
def extract_docker_layer(img: tarfile.TarFile, layer_id: str, extract_path: str): with tarfile.open(fileobj=img.extractfile('%s/layer.tar' % layer_id), errorlevel=0, dereference=True) as layer: layer.extractall(path=extract_path) log.debug('processing whiteouts') for member in layer.getmembers(): path = member.path if path.startswith('.wh.') or '/.wh.' in path: if path.startswith('.wh.'): newpath = path[4:] else: newpath = path.replace('/.wh.', '/') try: log.debug('removing path %s', newpath) os.unlink(path) os.unlink(newpath) except OSError as err: if err.errno != errno.ENOENT: raise
def __init__(self, archive: tarfile.TarFile, name: str): buffer = archive.extractfile(name) if buffer is None: raise RuntimeError("No buffer for layer") super().__init__(buffer) self.name = name self.files = set() self.mask = set() for member in self.archive.getmembers(): path = Path(member.name) if not path.name.startswith('.wh.'): self.files.add(member.name) continue if len(path.parent.name) > 0: prefix = str(path.parent) + '/' else: prefix = '' if path.name == '.wh..wh..opq': # Discard everything in the same directory self.mask.add(prefix) else: # Just discard one file self.mask.add(prefix + path.name[4:])
def _extract_tarinfo(tf: tarfile.TarFile, parent_info: Dict, extensions=IMG_EXTENSIONS): sample_count = 0 for i, ti in enumerate(tf): if not ti.isfile(): continue dirname, basename = os.path.split(ti.path) name, ext = os.path.splitext(basename) ext = ext.lower() if ext == '.tar': with tarfile.open(fileobj=tf.extractfile(ti), mode='r|') as ctf: child_info = dict(name=ti.name, path=os.path.join(parent_info['path'], name), ti=ti, children=[], samples=[]) sample_count += _extract_tarinfo(ctf, child_info, extensions=extensions) _logger.debug( f'{i}/?. Extracted child tarinfos from {ti.name}. {len(child_info["samples"])} images.' ) parent_info['children'].append(child_info) elif ext in extensions: parent_info['samples'].append(ti) sample_count += 1 return sample_count
def determine_osinfo(tarfh: tarfile.TarFile) -> um.OperatingSystemId: ''' tries to determine the operating system identification, roughly as specified by https://www.freedesktop.org/software/systemd/man/os-release.html and otherwise following some conventions believed to be common. The argument (an opened tarfile) is being read from its initial position, possibly (but not necessarily) to the end. The underlying stream does not need to be seekable. It is the caller's responsibility to close the tarfile handle after this function returns. The tarfile is expected to contain a directory tree from a "well-known" unix-style operating system distribution. In particular, the following (GNU/) Linux distributions are well-supported: - alpine - debian - centos In case nothing was recognised within the given tarfile, the returned OperatingSystemId's attributes will all be `None`. ''' known_fnames = ( 'debian_version', 'centos-release', 'os-release', ) os_info = {} for info in tarfh: fname = info.name.split('/')[-1] if not fname in known_fnames: continue if info.issym(): # we assume fnames are the same (this assumption might not always be correct) continue if not info.isfile(): continue # found an "interesting" file contents = tarfh.extractfile(info).read().decode('utf-8') if fname == 'os-release': for k, v in _parse_os_release(contents): if k in os_info: if k == 'VERSION_ID' and version.is_semver_parseable(v) and \ not version.is_semver_parseable(os_info[k]): pass else: continue # os-release has lesser precedence os_info[k] = v if os_info.get('ID') == 'ubuntu' and (ver := os_info.get('VERSION')): # of _course_ ubuntu requires a special hack os_info['VERSION_ID'] = ver.split(' ', 1)[0] elif fname == 'centos-release': for k, v in _parse_centos_release(contents): os_info[k] = v
def parse_backup_label(self, basebackup_path): tar = TarFile(basebackup_path) content = tar.extractfile("backup_label").read() # pylint: disable=no-member for line in content.split(b"\n"): if line.startswith(b"START WAL LOCATION"): start_wal_segment = line.split(b" ")[5].strip(b")").decode("utf8") self.log.debug("Found: %r as starting wal segment", start_wal_segment) return start_wal_segment
def from_package(cls, package: TarFile, url: str, config: ConfigFile) -> 'EnotConfig': f = package.extractfile('enot_config.json') content = f.read() conf = cls(json.loads(content.decode('utf-8')), url=url) if config is not None: if config.fullname: # overwrite fullname by package's fullname (from dep.config). conf.fullname = config.fullname return conf
def load_from_file(self, f): tar = TarFile(f, "r") # load info file f = tar.extractfile("info.py") self.agedesc, self.generation = eval(f.read(-1), {"__builtins__": None}) f.close() # load agents for info in tar.getmembers(): if (splitext(info.name)[1] == ".agt" and info.isfile()): f = tar.extractfile(info) self.add(Agent(self.agedesc, file=f)) f.close() tar.close()
def _readManifest(self): raw = TarFile(self.filename) manifest = raw.extractfile('manifest.json').read().decode() raw.close() self._manifest = json.loads(manifest)[0]
def _extract_tar_file(tar: tarfile.TarFile, f: tarfile.TarInfo, path: Path): with tar.extractfile(f) as ef: with open(str(path), 'wb') as o: while True: r = ef.read(_TAR_BUFFER_SIZE) if r: o.write(r) else: break
def tar_contents(cls, tarfile_name, passwd=None, names=None): results = {} tf = TarFile(tarfile_name) if names is None: names = tf.getnames() for n in names: results[n] = tf.extractfile(n).read() return results
def song_info(tar: tarfile.TarFile, path: str): content = tar.extractfile(path) parsed = parse(content.read()) if parsed is None: return None artist = parsed.get('ARTIST') transcriber = parsed.get('CREATOR') genre = parsed.get('GENRE', 'Pony') language = parsed.get('LANGUAGE', 'English') title = parsed.get('TITLE') cover = parsed['COVER'] song_year = int(parsed.get('YEAR', '0')) or None try: updated = dateutil.parser.parse( parsed['UPDATED'], MLKDateParserInfo()) if 'UPDATED' in parsed else None except ValueError: print(f"Couldn't parse date: {parsed['UPDATED']}") updated = None mp3_path = os.path.join(os.path.dirname(path), parsed['MP3']) try: tar.getmember(mp3_path) except KeyError: return None if 'END' in parsed: duration = int(parsed['END']) / 1000 else: duration = mutagen.mp3.MP3(tar.extractfile(mp3_path)).info.length if 'START' in parsed: duration -= float(parsed['START'].replace(',', '.')) is_mlk = 'mylittlekaraoke' in parsed.get('COMMENT', '') mp3 = parsed['MP3'] background = parsed.get('BACKGROUND') video = parsed.get('VIDEO') if 'P1' and 'P2' in parsed: parts = [parsed['P1'], parsed['P2']] else: parts = None preview_start = float(parsed['PREVIEWSTART'].replace( ',', '.')) if 'PREVIEWSTART' in parsed else None return SongInfo(title, artist, genre, song_year, duration, language, transcriber, is_mlk, updated, path, mp3, background, video, preview_start, parts, cover)
class TarFileWrapper(ArchiveFileWrapper): def __init__(self, fh, *args, **kwargs): self.archive = TarFile(fileobj=fh) super(TarFileWrapper, self).__init__(*args, **kwargs) def extract_file(self, *args, **kwarg): return self.archive.extractfile(*args, **kwarg) def names(self): return self.archive.getnames()
def extractfile(tar_file: TarFile, name: str, filtr: Callable[[str], str], line: int = 1) -> Optional[Stream]: """Extract the specified file from a tarball using the specifeid filter and an optional line offset.""" try: stream = tar_file.extractfile(name) except KeyError: return None return None if stream is None else Stream( (line.decode('utf-8') for line in stream.readlines()), filtr, line)
def tarfile_extract_single_file(tf: tarfile.TarFile, member: str, extract_path: str): print("Extracting {} to {}".format(member, extract_path)) with tf.extractfile(member) as f_obj: data = f_obj.read() os.makedirs(os.path.dirname(extract_path), mode=0o755, exist_ok=True) with open(extract_path, "wb") as f_obj: f_obj.write(data)
def _extract_tar_file(tar: tarfile.TarFile, f: tarfile.TarInfo, path: Path): if not path.parent.exists(): path.parent.mkdir(parents=True) with tar.extractfile(f) as ef: with open(str(path), 'wb') as o: while True: r = ef.read(_TAR_BUFFER_SIZE) if r: o.write(r) else: break
def load_bin(self, bin_models: tarfile.TarFile) -> None: """ Loads model weights form a tar file (binary form) into our models. :param bin_models: Tar File containing model weights as io.BytesIO :return: None """ city_models = [] for model_info in bin_models: city_models.append( (bin_models.extractfile(model_info), model_info.name)) self.agent.load_bin(city_models)
def layerInfo(self, layer): try: return self._layerInfo[layer] except KeyError: pass raw = TarFile(self.filename) config = raw.extractfile('{0}/json'.format(layer)).read().decode() self._layerInfo[layer] = json.loads(config) raw.close() return self._layerInfo[layer]
def extracttarfile(tar: tarfile.TarFile, member: tarfile.TarInfo) -> Text: """Extract the given member from the tarfile. Return the path of the file where we output it. If this isn't a regular file or a link, tarfile module will return `None`. In this case return None as the output file name. """ outfilename = os.path.join(conf['datadir'], member.name) extractfile = tar.extractfile(member) if not extractfile: return None with open(outfilename, 'wb') as f: f.write(extractfile.read()) return outfilename
def config(self): if self._config is not None: return self._config if self._manifest is None: self._readManifest() raw = TarFile(self.filename) config = raw.extractfile(self._manifest['Config']).read().decode() self._config = json.loads(config) raw.close() return self._config
def test_bundler_basic(self): """Test the bundler to stream a tarfile.""" with BuildSampleData() as sample_files: md_obj = MetaData([MetaObj(value='SomethingReal')]) bundle_fd = NamedTemporaryFile(delete=False) bundle = bundler.Bundler(md_obj, sample_files) bundle.stream(bundle_fd) bundle_fd.close() self.assertTrue(bundle_fd) check_tar = TarFile(bundle_fd.name, 'r') md_fd = check_tar.extractfile('metadata.txt') self.assertTrue(md_fd) md_bytes = md_fd.read() self.assertTrue(loads(md_bytes.decode('utf8')))
def _open_archive_file(self, archive: TarFile, name: str) -> IO[bytes]: while True: member = archive.next() if member is None: break if member.name == name: fobj = archive.extractfile(member) if fobj is None: break return fobj # noinspection PyProtectedMember raise ObjectDoesNotExistError(f'File {name} is missing in archive', self._file_storage._driver, archive.name)
def _open_archive_file(self, archive: TarFile, name: str) -> IO[bytes]: while True: member = archive.next() if member is None: break if member.name == name: fobj = archive.extractfile(member) if fobj is None: break return fobj raise ObjectDoesNotExistError( 'File {} is missing in archive'.format(name), self._file_storage._driver, archive.name)
def load_section(tf: TarFile, info: TarInfo) -> Table: with tf.extractfile('./ReadMe') as readme: col_names = ['Bmag', 'Vmag', 'e_Bmag', 'e_Vmag', 'd3', 'TYC1', 'TYC2', 'TYC3', 'Jmag', 'e_Jmag', 'Hmag', 'e_Hmag', 'Kmag', 'e_Kmag', 'SpType'] reader = io_ascii.get_reader(io_ascii.Cds, readme=readme, include_names=col_names) reader.data.table_name = 'cc*.dat' print(' Loading ' + os.path.basename(info.name)) with tf.extractfile(info) as gzf, gzip.open(gzf, 'rb') as f: section = reader.read(f) section = section[section['TYC1'] != 0] parse_tyc_cols(section) convert_cols = ['Bmag', 'Vmag', 'e_Bmag', 'e_Vmag', 'Jmag', 'e_Jmag', 'Hmag', 'e_Hmag', 'Kmag', 'e_Kmag'] for col in convert_cols: section[col] = section[col].astype(np.float64) section[col].convert_unit_to(u.mag) section[col].format = '.3f' return section
def get_root_json_from_image(img: tarfile.TarFile) -> Tuple[str, dict]: """ Every docker image has a root .json file with the metadata information. this function locate this file, load it and return the value of it and their name >>> get_docker_image_layers(img) ('db079554b4d2f7c65c4df3adae88cb72d051c8c3b8613eb44e86f60c945b1ca7', dict(...)) """ for f in img.getmembers(): if f.name.endswith("json") and "/" not in f.name: c = img.extractfile(f.name).read() if hasattr(c, "decode"): c = c.decode() return f.name.split(".")[0], json.loads(c) return None, None
def test_can_put_extracted_file_from_tar(self): tempdir = self.make_tempdir() tarname = os.path.join(tempdir, "mytar.tar") filename = os.path.join(tempdir, "foo") # Set up a file to add the tarfile. with open(filename, "w") as f: f.write("bar") # Setup the tar file by adding the file to it. # Note there is no context handler for TarFile in python 2.6 try: tar = TarFile(tarname, "w") tar.add(filename, "foo") finally: tar.close() # See if an extracted file can be uploaded to s3. try: tar = TarFile(tarname, "r") with closing(tar.extractfile("foo")) as f: self.assert_can_put_object(body=f) finally: tar.close()
FILENAME = "projectcounts-2008.tar" MAX_REQUESTS = 33056088 * 0.4 if __name__ == "__main__": tar = TarFile(FILENAME) inidate = datetime(year=2008, month=1, day=1) maxrequests = 0 for filename in tar.getnames(): pre, date, time = filename.split("-") year = int(date[0:4]) month = int(date[4:6]) day = int(date[6:8]) hour = int(time[0:2]) minute = int(time[2:4]) second = int(time[4:6]) date = datetime(year=year, month=month, day=day, hour=hour, minute=minute, second=second) td = date - inidate seconds = td.days * 24 * 60 * 60 + td.seconds f = tar.extractfile(filename) for line in f.readlines(): if line.startswith("en -"): line = line.replace("\n", "").replace("\r", "") lineSplit = line.split(" ") requests = int(lineSplit[2]) if requests > MAX_REQUESTS: requests = MAX_REQUESTS print "%d %.2f" % (seconds, float(requests) / MAX_REQUESTS) if requests > maxrequests: maxrequests = requests
def create_new_docker_image(manifest: dict, image_output_path: str, img: tarfile.TarFile, old_layer_digest: str, new_layer_path: str, new_layer_digest: str, json_metadata_last_layer: dict = None, json_metadata_root: dict = None): with tarfile.open(image_output_path, "w") as s: for f in img.getmembers(): log.debug(" _> Processing file: {}".format(f.name)) # Add new manifest if f.name == "manifest.json": # Dump Manifest to JSON new_manifest_json = json.dumps(manifest).encode() replace_or_append_file_to_layer("manifest.json", new_manifest_json, s) # # NEW LAYER INFO # elif old_layer_digest in f.name: # Skip for old layer.tar file if f.name == "{}/layer.tar".format(old_layer_digest) or \ "/" not in f.name: log.debug( " _> Replacing layer {} by {}".format( f.name, new_layer_digest )) replace_or_append_file_to_layer("{}/layer.tar".format( new_layer_digest), new_layer_path, s) else: # # Extra files: "json" and "VERSION" # c = read_file_from_image(img, f.name) if "json" in f.name: # Modify the JSON content to add the new # hash if json_metadata_last_layer: c = json.dumps(json_metadata_last_layer).encode() else: c = c.decode().replace(old_layer_digest, new_layer_digest).encode() replace_or_append_file_to_layer("{}/{}".format( new_layer_digest, os.path.basename(f.name)), c, s) # # Root .json file with the global info # elif "repositories" in f.name: c = read_file_from_image(img, f, autoclose=False) j = json.loads(c.decode()) image = list(j.keys())[0] tag = list(j[image].keys())[0] # Update the latest layer j[image][tag] = new_layer_digest new_c = json.dumps(j).encode() replace_or_append_file_to_layer(f.name, new_c, s) elif ".json" in f.name and "/" not in f.name: c = read_file_from_image(img, f, autoclose=False) # Modify the JSON content to add the new # hash if json_metadata_root: j = json_metadata_root else: j = json.loads(c.decode()) j["rootfs"]["diff_ids"][-1] = \ "sha256:{}".format(new_layer_digest) new_c = json.dumps(j).encode() replace_or_append_file_to_layer(f.name, new_c, s) # Add the rest of files / dirs else: s.addfile(f, img.extractfile(f))