def writeDataFile( self, filename, text, content_type, subdir=None ): """ See IExportContext. """ if subdir is not None: filename = '/'.join( ( subdir, filename ) ) parents = filename.split('/')[:-1] while parents: path = '/'.join(parents) + '/' if path not in self._archive.getnames(): info = TarInfo(path) info.type = DIRTYPE # tarfile.filemode(0755) == '-rwxr-xr-x' info.mode = 0755 info.mtime = time.time() self._archive.addfile(info) parents.pop() info = TarInfo(filename) if isinstance(text, basestring): stream = StringIO(text) info.size = len(text) else: # Assume text is a an instance of a class like # Products.Archetypes.WebDAVSupport.PdataStreamIterator, # as in the case of ATFile stream = text.file info.size = text.size info.mtime = time.time() self._archive.addfile( info, stream )
def writeDataFile( self, filename, text, content_type, subdir=None ): """ See IExportContext. """ if subdir is not None: filename = '/'.join( ( subdir, filename ) ) parents = filename.split('/')[:-1] while parents: path = '/'.join(parents) + '/' if path not in self._archive.getnames(): info = TarInfo(path) info.type = DIRTYPE # tarfile.filemode(0755) == '-rwxr-xr-x' info.mode = 0755 info.mtime = time.time() self._archive.addfile(info) parents.pop() info = TarInfo(filename) if isinstance(text, str): stream = StringIO(text) info.size = len(text) elif isinstance(text, unicode): raise ValueError("Unicode text is not supported, even if it only " "contains ascii. Please encode your data. See " "GS 1.7.0 changes for more") else: # Assume text is a an instance of a class like # Products.Archetypes.WebDAVSupport.PdataStreamIterator, # as in the case of ATFile stream = text.file info.size = text.size info.mtime = time.time() self._archive.addfile( info, stream )
def writeDataFile( self, filename, text, content_type, subdir=None ): """ See IExportContext. """ mod_time = time.time() if subdir is not None: elements = subdir.split('/') parents = filter(None, elements) while parents: dirname = os.path.join(*parents) try: self._archive.getmember(dirname+'/') except KeyError: info = TarInfo(dirname) info.size = 0 info.mode = 509 info.mtime = mod_time info.type = DIRTYPE self._archive.addfile(info, StringIO()) parents = parents[:-1] filename = '/'.join( ( subdir, filename ) ) stream = StringIO( text ) info = TarInfo( filename ) info.size = len( text ) info.mode = 436 info.mtime = mod_time self._archive.addfile( info, stream )
def _addToArchive(output, filename, archive): output.seek(0) xmlInfo = TarInfo(filename) xmlInfo.size = len(output.getbuffer()) xmlInfo.mtime = time.time() archive.addfile(xmlInfo,output) output.close()
def put(self, content, filename="", file_hash=""): """ Store file information in hashed tree """ if not filename and not file_hash: raise ValueError('Filename or FileHash is mandatory') if filename: # File accesibility if not os.path.exists(filename): raise IOError('Unaccesible file %s', filename) # Calc hash file_hash = self._get_hash(filename) if not file_hash: raise ValueError('Hash of file is mandatory') # Get file path for hash path, tarfile, hashed_filename = self._get_path(file_hash) # Create file path try: os.makedirs(path) except WindowsError: pass except OSError: pass # Open tarfile if self.external_compressor: # External compressor is not suited for adding files. raise ValueError('You cannot use external compressor for write files') with TarFile.open(name=os.path.join(path, tarfile), mode='a') as tar: with FileLock(os.path.join(path, tarfile)) as lock: # Test if file already exists into tarfile try: tar.getmember(hashed_filename) raise ValueError('Member already exists') except KeyError: pass except: raise data = self.encoder.encode(content) if self.internal_compressor: data = self.internal_compressor.compress(data) data_file = StringIO(data) mtime = time.time() ti = TarInfo(hashed_filename) ti.size = data_file.len ti.mtime = mtime tar.addfile(tarinfo=ti, fileobj=data_file) tar.close() return file_hash
def _dump_files(self, tar): """ Dump all uploaded media to the archive. """ # Loop through all models and find FileFields for model in apps.get_models(): # Get the name of all file fields in the model field_names = [] for field in model._meta.fields: if isinstance(field, models.FileField): field_names.append(field.name) # If any were found, loop through each row if len(field_names): for row in model.objects.all(): for field_name in field_names: field = getattr(row, field_name) if field: field.open() info = TarInfo(field.name) info.size = field.size tar.addfile(info, field) field.close()
def move_certs(self, paths): self.log.info("Staging internal ssl certs for %s", self._log_name) yield self.pull_image(self.move_certs_image) # create the volume volume_name = self.format_volume_name(self.certs_volume_name, self) # create volume passes even if it already exists self.log.info("Creating ssl volume %s for %s", volume_name, self._log_name) yield self.docker('create_volume', volume_name) # create a tar archive of the internal cert files # docker.put_archive takes a tarfile and a running container # and unpacks the archive into the container nb_paths = {} tar_buf = BytesIO() archive = TarFile(fileobj=tar_buf, mode='w') for key, hub_path in paths.items(): fname = os.path.basename(hub_path) nb_paths[key] = '/certs/' + fname with open(hub_path, 'rb') as f: content = f.read() tarinfo = TarInfo(name=fname) tarinfo.size = len(content) tarinfo.mtime = os.stat(hub_path).st_mtime tarinfo.mode = 0o644 archive.addfile(tarinfo, BytesIO(content)) archive.close() tar_buf.seek(0) # run a container to stage the certs, # mounting the volume at /certs/ host_config = self.client.create_host_config( binds={ volume_name: {"bind": "/certs", "mode": "rw"}, }, ) container = yield self.docker('create_container', self.move_certs_image, volumes=["/certs"], host_config=host_config, ) container_id = container['Id'] self.log.debug( "Container %s is creating ssl certs for %s", container_id[:12], self._log_name, ) # start the container yield self.docker('start', container_id) # stage the archive to the container try: yield self.docker( 'put_archive', container=container_id, path='/certs', data=tar_buf, ) finally: yield self.docker('remove_container', container_id) return nb_paths
def addFile(tar, dest, file, file_size): if dest not in written_files: info = TarInfo(dest) info.size = file_size info.mtime = now info.mode = 0777 tar.addfile(info, fileobj=file) written_files.add(dest)
def generate_tar(entries): tar_buf = BytesIO() tar_file = TarFile(mode="w", fileobj=tar_buf) for path, contents in entries.items(): tar_info = TarInfo(name=path) tar_info.size = len(contents) tar_file.addfile(tar_info, fileobj=BytesIO(contents)) return BytesIO(tar_buf.getvalue())
def _dump_meta(self, tar): """ Dump metadata to the archive. """ data = MixedIO() dump({'version': __version__}, data) info = TarInfo('meta.json') info.size = data.rewind() tar.addfile(info, data)
def handle(self, *args, **kwargs): """ Process the command. """ tar = TarFile.open(datetime.today().strftime("2buntu-backup-%Y-%m-%d-%H-%M-%S.tar.bz2"), "w:bz2") for name in self.DATABASE_MODELS: f = MixedIO() call_command("dumpdata", name, format="json", stdout=f) info = TarInfo("%s.json" % name.split(".")[1]) info.size = f.rewind() tar.addfile(info, f) for model in self.IMAGE_MODELS: for item in model.objects.all(): if item.image: info = TarInfo(item.image.name) info.size = item.image.size tar.addfile(info, item.image) self.stdout.write("Backup completed.")
def addString(tar, dest, string): if dest not in written_files: print dest, string info = TarInfo(dest) info.size = len(string) info.mtime = now info.mode = 0777 file = StringIO(string) tar.addfile(info, fileobj=file) file.close() written_files.add(dest)
def given_download(self, payload: Dict[str, bytes], compression: str): resource_id = '{}.tar.{}'.format(uuid4(), compression) download_filename = join(self._content_root, resource_id) with self.sync._open(download_filename, 'w') as archive: for filename, content in payload.items(): tarinfo = TarInfo(filename) tarinfo.size = len(content) archive.addfile(tarinfo, BytesIO(content)) self.email_server_client_mock.download.return_value = resource_id
def _addMember(path, data, modtime): from tarfile import DIRTYPE elements = path.split('/') parents = filter(None, [elements[x] for x in range(len(elements))]) for parent in parents: info = TarInfo() info.name = parent info.size = 0 info.mtime = mod_time info.type = DIRTYPE archive.addfile(info, StringIO()) _addOneMember(path, data, modtime)
def create_file_from_string(self, filename, content): """Create a file with the contents passed as a string. :param filename: the path to put the file at inside the tarfile. :param content: the content to put in the created file. """ tarinfo = TarInfo(name=filename) tarinfo.size = len(content) self._set_defaults(tarinfo) fileobj = StringIO(content) self.addfile(tarinfo, fileobj=fileobj)
def _dump_db(self, tar): """ Dump the rows in each model to the archive. """ # Dump the tables to a MixedIO data = MixedIO() call_command('dumpdata', all=True, format='json', indent=self.attr.get('ARCHIVE_DB_INDENT'), exclude=self.attr.get('ARCHIVE_EXCLUDE'), stdout=data) info = TarInfo(DB_DUMP) info.size = data.rewind() tar.addfile(info, data)
def write_lines_to_tarball(tar_ball, tar_info, lines): """ Writes the relevant lines to the tar ball """ txt = '\n'.join(lines) txt = txt.encode('utf-8') with BytesIO(txt) as tar_file: info = TarInfo(name=tar_info.name) info.size = len(txt) tar_ball.addfile(info, fileobj=tar_file) pass
def writeDataFile( self, filename, text, content_type, subdir=None ): """ See IExportContext. """ if subdir is not None: filename = '/'.join( ( subdir, filename ) ) stream = StringIO( text ) info = TarInfo( filename ) info.size = len( text ) info.mtime = time.time() self._archive.addfile( info, stream )
def _tar_file(items): """Helper to create an in-memory tar file with multiple files.""" tar_fileobj = BytesIO() tf = TarFile.open(mode="w|", fileobj=tar_fileobj) for item_name, item_bytes in items: ti = TarInfo(name=item_name) content_as_bytes = item_bytes.encode("utf-8") ti.size = len(content_as_bytes) tf.addfile(ti, BytesIO(content_as_bytes)) tf.close() tar_fileobj.seek(0) return tar_fileobj
def write_package(): tbs = ForgeClient.UPLOAD_TAR_BUFFER_SIZE with TarFile.open(mode="w|gz", fileobj=body, bufsize=tbs, dereference=True) as tar: for file in files: self.debug("Sending %s", file) ti = TarInfo(file) fp = os.path.join(self.path, file) ti.size = os.path.getsize(fp) ti.mode = 0o666 with open(fp, "rb") as fd: tar.addfile(ti, fileobj=fd) body.close()
def run_ics(group: str, turtle: bytes, extra_files: List[str] = (), extra_data: List[bytes] = ()): client = docker.from_env() files = ['data.ttl'] if len(extra_files) > 0: files.extend(extra_files) if len(extra_data) > 0: files.extend(f"extra_{i}.ttl" for i in range(0, len(extra_data))) tests = client.containers.create( 'gsscogs/gdp-sparql-tests', command=f'''sparql-test-runner -t /usr/local/tests/{group} -m 10 ''' f'''{" ".join('/tmp/' + f for f in files)}''') archive = BytesIO() with TarFile(fileobj=archive, mode='w') as t: ttl = TarInfo('data.ttl') ttl.size = len(turtle) ttl.mtime = time.time() t.addfile(ttl, BytesIO(turtle)) for filename in extra_files: actual_path = Path('features') / 'fixtures' / 'extra' / filename with actual_path.open('rb') as actual_file: extra_file = t.gettarinfo(arcname=filename, fileobj=actual_file) t.addfile(extra_file, actual_file) for i, add_turtle in enumerate(extra_data): filename = f'extra_{i}.ttl' add_ttl = TarInfo(filename) add_ttl.size = len(add_turtle) add_ttl.mtime = time.time() t.addfile(add_ttl, BytesIO(add_turtle)) archive.seek(0) tests.put_archive('/tmp/', archive) tests.start() response = tests.wait() sys.stdout.write(tests.logs().decode('utf-8')) return response['StatusCode']
def uploadDF(dataflowName): dataflowStr = None udfs = {} dataflowPath = os.path.join(path, "dataflows", dataflowName) with open(os.path.join(dataflowPath, "dataflowInfo.json"), 'r') as df: dataflowStr = df.read() if os.path.exists(dataflowPath + "/udfs/"): for udf in os.listdir(os.path.join(dataflowPath, "udfs")): with open(os.path.join(dataflowPath, "udfs", udf), 'r') as udfFile: udfs[udf] = udfFile.read() retinaBuf = io.BytesIO() with tarfile.open(fileobj=retinaBuf, mode="w:gz") as tar: info = TarInfo("dataflowInfo.json") info.size = len(dataflowStr) tar.addfile(info, io.BytesIO(bytearray(dataflowStr, "utf-8"))) # # ##udfs directory if udfs: info = TarInfo("udfs") info.type = tarfile.DIRTYPE info.mode = 0o755 tar.addfile(info) # ##Add udf to the above dir for udfName, udfCode in udfs.items(): info = TarInfo(name="udfs/" + udfName) info.size = len(udfCode) info.mode = 0o755 tar.addfile(info, io.BytesIO(bytearray(udfCode, "utf-8"))) try: retina.delete(dataflowName) except: print("Dataflow deletion failed!", dataflowName, availableRetinas) retina.add(dataflowName, retinaBuf.getvalue())
def from_str(cls, filename, string): self = cls() b_str = string.encode('utf-8') info = TarInfo(filename) fileobj = io.BytesIO() info.size = fileobj.write(b_str) fileobj.seek(0) self.addfile(tarinfo=info, fileobj=fileobj) return self
def tarball_images( images: List[Image.Image], *, name: str = None, animated: bool = False, format: str = "png", extras: List[Tuple[str, BytesIO]], ) -> BytesIO: fp = BytesIO() tar = TarFile(mode="w", fileobj=fp) for idx, image in enumerate(images): f = BytesIO() if animated: image[0].save(f, format, append_images=image[1:], save_all=True, loop=0) else: image.save(f, format) f.seek(0) if name: info = TarInfo(f"{name}_{idx}.{format}") else: info = TarInfo(f"{idx}.{format}") info.size = len(f.getbuffer()) tar.addfile(info, fileobj=f) for extra in extras: info = TarInfo(extra[0] or "_.txt") info.size = len(extra[1].getbuffer()) tar.addfile(info, fileobj=extra[1]) fp.seek(0) return fp
def _dump_meta(self, tar): """ Dump metadata to the archive. """ data = MixedIO() meta_dict = OrderedDict(( ('version', __version__), ('db_file', DB_DUMP), ('media_folder', MEDIA_DIR), ('settings', self.attr.settings_dict()), )) dump(meta_dict, data, indent=2) info = TarInfo(META_DUMP) info.size = data.rewind() tar.addfile(info, data)
def generate_dataset(config): destination = config.absolute(File.COMPRESSED_DATASET) csv_lines = [REPO_LIST_HEADERS] for lang, ext in config.extensions.items(): for pos in range(REPO_PER_LANG): full_name = f'lang_{ext}/repo_{pos:02}' csv_lines.append(REPO_LINE.format(full_name=full_name, lang=lang)) csv_bytes = '\n'.join(csv_lines).encode() with TarFile.open(destination, 'w:gz') as tar_file: tar_info = TarInfo(DATASET_FILENAME) tar_info.size = len(csv_bytes) tar_file.addfile(tar_info, BytesIO(csv_bytes)) return True, 200
def run_csv2rdf(csv_filename: str, metadata_filename: str, csv_io: TextIO, metadata_io: TextIO): client = docker.from_env() csv2rdf = client.containers.create( 'gsscogs/csv2rdf', command=f'csv2rdf -m annotated -o /tmp/output.ttl -t /tmp/{csv_filename} -u /tmp/{metadata_filename}' ) archive = BytesIO() metadata_io.seek(0, SEEK_END) metadata_size = metadata_io.tell() metadata_io.seek(0) csv_io.seek(0, SEEK_END) csv_size = csv_io.tell() csv_io.seek(0) with TarFile(fileobj=archive, mode='w') as t: tis = TarInfo(str(metadata_filename)) tis.size = metadata_size tis.mtime = time.time() t.addfile(tis, BytesIO(metadata_io.read().encode('utf-8'))) tic = TarInfo(str(csv_filename)) tic.size = csv_size tic.mtime = time.time() t.addfile(tic, BytesIO(csv_io.read().encode('utf-8'))) archive.seek(0) csv2rdf.put_archive('/tmp/', archive) csv2rdf.start() response = csv2rdf.wait() sys.stdout.write(csv2rdf.logs().decode('utf-8')) assert_equal(response['StatusCode'], 0) output_stream, output_stat = csv2rdf.get_archive('/tmp/output.ttl') output_archive = BytesIO() for line in output_stream: output_archive.write(line) output_archive.seek(0) with TarFile(fileobj=output_archive, mode='r') as t: output_ttl = t.extractfile('output.ttl') return output_ttl.read()
def create_archive(filepaths): tarstream = BytesIO() tarfile = TarFile(fileobj=tarstream, mode='w') for filepath in filepaths: file = open(filepath, 'r') file_data = file.read() tarinfo = TarInfo(name=basename(file.name)) tarinfo.size = len(file_data) tarinfo.mtime = time() tarfile.addfile(tarinfo, BytesIO(file_data)) tarfile.close() tarstream.seek(0) return tarstream
def run(self, args, argv): # Create a temporary tarball with our whole build context and # dockerfile for the update tmp = tempfile.NamedTemporaryFile(suffix="dckr.tar.gz") tmp_tar = TarFile(fileobj=tmp, mode='w') # Add the executable to the tarball, using the current # configured binfmt_misc path. If we don't get a path then we # only need the support libraries copied ff, enabled = _check_binfmt_misc(args.executable) if not enabled: print("binfmt_misc not enabled, update disabled") return 1 if ff: tmp_tar.add(args.executable, arcname=ff) # Add any associated libraries libs = _get_so_libs(args.executable) if libs: for l in libs: tmp_tar.add(os.path.realpath(l), arcname=l) # Create a Docker buildfile df = StringIO() df.write(u"FROM %s\n" % args.tag) df.write(u"ADD . /\n") df_bytes = BytesIO(bytes(df.getvalue(), "UTF-8")) df_tar = TarInfo(name="Dockerfile") df_tar.size = df_bytes.getbuffer().nbytes tmp_tar.addfile(df_tar, fileobj=df_bytes) tmp_tar.close() # reset the file pointers tmp.flush() tmp.seek(0) # Run the build with our tarball context dkr = Docker() dkr.update_image(args.tag, tmp, quiet=args.quiet) return 0
def _add_entry( self, name: str, type: bytes, mode: int, mtime: int, size: int, data: Optional[IO[bytes]], linkname: str = "", ) -> None: info = TarInfo(name) info.type = type info.mode = mode info.size = size info.mtime = mtime info.linkname = linkname return self._inner.addfile(info, data)
def run(self, args, argv): # Create a temporary tarball with our whole build context and # dockerfile for the update tmp = tempfile.NamedTemporaryFile(suffix="dckr.tar.gz") tmp_tar = TarFile(fileobj=tmp, mode='w') # Add the executable to the tarball, using the current # configured binfmt_misc path. If we don't get a path then we # only need the support libraries copied ff, enabled = _check_binfmt_misc(args.executable) if not enabled: print("binfmt_misc not enabled, update disabled") return 1 if ff: tmp_tar.add(args.executable, arcname=ff) # Add any associated libraries libs = _get_so_libs(args.executable) if libs: for l in libs: tmp_tar.add(os.path.realpath(l), arcname=l) # Create a Docker buildfile df = StringIO() df.write("FROM %s\n" % args.tag) df.write("ADD . /\n") df.seek(0) df_tar = TarInfo(name="Dockerfile") df_tar.size = len(df.buf) tmp_tar.addfile(df_tar, fileobj=df) tmp_tar.close() # reset the file pointers tmp.flush() tmp.seek(0) # Run the build with our tarball context dkr = Docker() dkr.update_image(args.tag, tmp, quiet=args.quiet) return 0
def file_write(self, path, content, mode=None, owner=None, group=None, append=False, hide=False, sudo=False): """ Writes a file to the container @param path: path of the file @param content: content to be put in the file @param mode: file mode @param owner: owner of the file @param group: group of the file @param append: append content to the file @param hide: hide (debug) logs @raise runtimeError: path for file couldn't be created """ if append and self.exists(path): content = self.file_read(path) + content file_name = os.path.basename(path) dir_name = os.path.dirname(path) buf = BytesIO() with TarFile("write_file", mode='w', fileobj=buf) as tarf: f = BytesIO() length = f.write(content.encode('utf8')) f.seek(0) tari = TarInfo(name=file_name) tari.size = length if not mode is None: tari.mode = mode if not owner is None: tari.uname = owner if not group is None: tari.gname = group tarf.addfile(tari, f) if not self.exists(dir_name): result = self.container.exec_run("mkdir -p %s" % dir_name) if result.exit_code != 0: raise RuntimeError("Could not create path %s!\n%s" % (dir_name, result.output)) self.container.put_archive(dir_name, buf.getvalue())
def _dump_db(self, tar): """ Dump the rows in each model to the archive. """ # Determine the list of models to exclude exclude = getattr(settings, 'ARCHIVE_EXCLUDE', ( 'auth.Permission', 'contenttypes.ContentType', 'sessions.Session', )) # Dump the tables to a MixedIO data = MixedIO() call_command('dumpdata', all=True, format='json', exclude=exclude, stdout=data) info = TarInfo('data.json') info.size = data.rewind() tar.addfile(info, data)
def _unpack_data(self, tar: TarFile, data_archive: TarFile): with io.BytesIO( str.encode("\n".join([ member.name.lstrip(".") for member in data_archive if member.name.lstrip(".") ]) + "\n")) as fileobj: info = TarInfo("list") info.size = fileobj.getbuffer().nbytes self._unpack_info_file(tar, info, fileobj) names = tar.getnames() for member in (member for member in data_archive if member.name not in names): if member.islnk() or member.issym() or member.isdir(): tar.addfile(member) else: with data_archive.extractfile(member) as fileobj: tar.addfile(member, fileobj)
def stream_context(self): """Start streaming the tar context for Docker.""" with TarFile.open( mode='w|', fileobj=getattr(sys.stdout, 'buffer', sys.stdout) ) as tarfile: tarfile.add( self.context, arcname='.', exclude=self.exclude ) tarinfo = TarInfo('./Dockerfile') tarinfo.size = len(self.dockerfile) tarfile.addfile(tarinfo, BytesIO(self.dockerfile.encode('UTF-8'))) tarfile.close() sys.stdout.flush()
def add_str(self, name, content, ftype, mode, mtime=None, uid=None, gid=None, uname=None, gname=None): ''' Add a string in memory as a file in tarball ''' if isinstance(name, unicode): name = name.encode("UTF-8") ti = TarInfo(name) # set tarinfo attribute for v in ("name", "ftype", "mode", "mtime", "uid", "gid", "uname", "gname"): if vars()[v] is not None: vars(ti)[v] = vars()[v] # set mtime to current if not specified if mtime is None: ti.mtime = int(time()) # unicode char is encoded in UTF-8, has changelog must be in UTF-8 if isinstance(content, unicode): content = content.encode("UTF-8") ti.size = len(content) if content is not None else 0 self.addfile(ti, StringIO(content))
def tarfile(self, format, filename, content_type): from .root.histogram import Histogram from .combination import Combination imgformat = "eps" tarred_contents = StringIO() with closing(open_tar(mode="w" + format, fileobj=tarred_contents)) as tar: for key, context in self.resource_to_render.indexed_contexts: if not context_renderable_as(context, imgformat): continue name = "/".join(map(str, key)) content = context.rendered(imgformat).content.body info = TarInfo(name=name + "." + imgformat) info.size = len(content) tar.addfile(tarinfo=info, fileobj=StringIO(content)) return Response(tarred_contents.getvalue(), content_type=content_type, content_disposition=("Content-Disposition: attachment; filename={0};" .format(filename)))
def writeDataFile(self, filename, text, content_type, subdir=None): """ See IExportContext. """ if subdir is not None: filename = '/'.join((subdir, filename)) parents = filename.split('/')[:-1] while parents: path = '/'.join(parents) + '/' if path not in self._archive.getnames(): info = TarInfo(path) info.type = DIRTYPE info.mtime = time.time() self._archive.addfile(info) parents.pop() stream = StringIO(text) info = TarInfo(filename) info.size = len(text) info.mtime = time.time() self._archive.addfile(info, stream)
def run(self, args, argv): # Create a temporary tarball with our whole build context and # dockerfile for the update tmp = tempfile.NamedTemporaryFile(suffix="dckr.tar.gz") tmp_tar = TarFile(fileobj=tmp, mode='w') # Add the executable to the tarball bn = os.path.basename(args.executable) ff = "/usr/bin/%s" % bn tmp_tar.add(args.executable, arcname=ff) # Add any associated libraries libs = _get_so_libs(args.executable) if libs: for l in libs: tmp_tar.add(os.path.realpath(l), arcname=l) # Create a Docker buildfile df = StringIO() df.write("FROM %s\n" % args.tag) df.write("ADD . /\n") df.seek(0) df_tar = TarInfo(name="Dockerfile") df_tar.size = len(df.buf) tmp_tar.addfile(df_tar, fileobj=df) tmp_tar.close() # reset the file pointers tmp.flush() tmp.seek(0) # Run the build with our tarball context dkr = Docker() dkr.update_image(args.tag, tmp, quiet=args.quiet) return 0
def archivestream(self, ticket): stream = cStringIO.StringIO() with tarfile.open(mode='w|', fileobj=stream): for filepath, arcpath, cont_name, cont_id, f_size, f_modified in ticket[ 'target']: tarinfo = TarInfo() tarinfo.name = arcpath.lstrip('/') tarinfo.size = f_size tarinfo.mtime = datetime_to_epoch(f_modified) tarinfo_buf = tarinfo.tobuf() signed_url = None try: signed_url = files.get_signed_url(filepath, config.fs) except fs.errors.ResourceNotFound: pass if signed_url: content_generator = self.stream_file_signed_url( signed_url, tarinfo_buf, (filepath, cont_name, cont_id, arcpath)) else: content_generator = self.stream_regular_file( filepath, tarinfo_buf, (filepath, cont_name, cont_id, arcpath)) for chunk in content_generator: yield chunk self.log_user_access( AccessType.download_file, cont_name=cont_name, cont_id=cont_id, filename=os.path.basename(arcpath), origin_override=ticket['origin'], download_ticket=ticket['_id']) # log download yield stream.getvalue() # get tar stream trailer stream.close()
def tarfile(self, format, filename, content_type): from .root.histogram import Histogram from .combination import Combination imgformat = "eps" tarred_contents = StringIO() with closing(open_tar(mode="w" + format, fileobj=tarred_contents)) as tar: for key, context in self.resource_to_render.indexed_contexts: if not context_renderable_as(context, imgformat): continue name = "/".join(map(str, key)) content = context.rendered(imgformat).content.body info = TarInfo(name=name + "." + imgformat) info.size = len(content) tar.addfile(tarinfo=info, fileobj=StringIO(content)) return Response( tarred_contents.getvalue(), content_type=content_type, content_disposition=( "Content-Disposition: attachment; filename={0};".format( filename)))
def tar(host, backup, share, path): binary_stdout = stdout.buffer fbak = Fruitbak(confdir = Path('/dev/shm/conf')) backup = fbak[host][backup] if path is None: share, path = backup.locate_path(share) else: share = backup[share] def iterator(): for dentry in share.find(path): if dentry.is_file and not dentry.is_hardlink: yield from dentry.hashes with fbak.pool.agent().readahead(iterator()) as reader: for dentry in share.find(path): name = dentry.name or b'.' i = TarInfo(fsdecode(bytes(name))) i.mode = dentry.mode & 0o7777 i.uid = dentry.uid i.gid = dentry.gid i.mtime = dentry.mtime // 1000000000 if dentry.is_hardlink: i.type = LNKTYPE hardlink = dentry.hardlink or b'.' i.linkname = fsdecode(bytes(hardlink)) elif dentry.is_file: i.type = REGTYPE i.size = dentry.size elif dentry.is_symlink: i.type = SYMTYPE i.linkname = fsdecode(bytes(dentry.symlink)) elif dentry.is_chardev: i.type = CHRTYPE i.devmajor = dentry.major i.devminor = dentry.minor elif dentry.is_blockdev: i.type = BLKTYPE i.devmajor = dentry.major i.devminor = dentry.minor elif dentry.is_directory: i.type = DIRTYPE elif dentry.is_fifo: i.type = FIFOTYPE else: continue binary_stdout.write(i.tobuf(GNU_FORMAT)) if dentry.is_file and not dentry.is_hardlink: for hash in dentry.hashes: action = next(reader) if action.exception: raise action.exception[1] binary_stdout.write(action.value) padding = -i.size % BLOCKSIZE if padding: binary_stdout.write(bytes(padding)) binary_stdout.write(b'\0' * (BLOCKSIZE*2))
def update(): logging.info('Backup update started.') global args # Compare archive contents # dir_path, basename = path.split(abspath) # ext = ' ' # while len(ext) > 0: # basename, ext = path.splitext(basename) lst_name = path.join(dir_path, basename + '.lst.gz') try: logging.info("Collect backed up files info") backed = {} if not path.exists(lst_name): with TarFile.open(args.dst, 'r', ignore_zeros=True, errorlevel=0, encoding='mbcs', errors='utf-8') as arc: try: member = arc.next() while member is not None: try: # if sys.version_info[0] > 2: # fn = member.name + u'' # else: fn = member.name.decode('cp1251', errors='replace') backed[fn] = member with gzip.open(lst_name, 'a') as f: f.write(fn.encode('utf8', errors='replace')) # if sys.version_info[0] > 2: # f.write(bytes('\t' + str(member.mtime) + '\t' + str(member.size) + '\n', 'utf8')) # else: f.write('\t' + str(member.mtime) + '\t' + str(member.size) + '\n') except UnicodeEncodeError as e: logging.warning('UnicodeEncodeError: ' + str(e), exc_info=True) member = arc.next() except IOError as e: logging.warning('IOError: ' + str(e), exc_info=True) n = 1 incpath = abspath.replace(basename, basename + '_inc%s' % n) while path.exists(incpath): with TarFile.open(incpath, 'r', ignore_zeros=True, errorlevel=0, encoding='mbcs', errors='utf-8') as arc: try: member = arc.next() while member is not None: try: # if sys.version_info[0] > 2: # fn = member.name # else: fn = member.name.decode('utf8', errors='replace') if fn not in backed: backed[fn] = member with gzip.open(lst_name, 'a') as f: f.write( fn.encode('utf8', errors='replace')) # if sys.version_info[0] > 2: # f.write(bytes('\t' + str(member.mtime) + '\t' + str(member.size) + '\n', # 'utf8')) # else: f.write('\t' + str(member.mtime) + '\t' + str(member.size) + '\n') except UnicodeEncodeError as e: logging.warning('UnicodeEncodeError: ' + str(e), exc_info=True) member = arc.next() except IOError as e: logging.warning('IOError: ' + str(e), exc_info=True) n += 1 incpath = abspath.replace(basename, basename + '_inc%s' % n) else: with gzip.open(lst_name, 'r') as f: for line in f: fn = b'' mtime = fsize = 0 v = line.split(b'\t') if len(v) > 2: fn, mtime, fsize = v fsize = fsize.replace(b'\r', b'') fsize = fsize.replace(b'\n', b'') else: fn, mtime = v mtime = mtime.replace(b'\r', b'') mtime = mtime.replace(b'\n', b'') fn = unicode(fn, 'utf8') info = TarInfo(fn) info.mtime = int(mtime) info.size = int(fsize) backed[fn] = info n = 1 incpath = abspath.replace(basename, basename + '_inc%s' % n) while path.exists(incpath): if path.getsize(incpath) < 2048: try: with TarFile.open(incpath, 'r', ignore_zeros=True, errorlevel=0, encoding='mbcs', errors='utf-8') as arc: try: member = arc.next() if member is None: arc.close() logging.warning(incpath + ' is empty. Removing.') remove(incpath) break except IOError as e: logging.warning('IOError: ' + str(e), exc_info=True) except TarError as e: logging.warning('TarError: ' + str(e) + '. Removing ' + incpath + '.') remove(incpath) break n += 1 incpath = abspath.replace(basename, basename + '_inc%s' % n) updatedlst = path.join(dir_path, basename + updated_postfix) if path.exists(updatedlst): remove(updatedlst) exception_thrown = False added_count = 0 with TarFile.open(incpath, 'w:gz', ignore_zeros=True, encoding='mbcs', errors='utf-8') as arc,\ gzip.open(lst_name, 'a') as lst: for dirpath, dirnames, filenames in walk(args.src): if exception_thrown: break for filename in filenames: fn = path.join(dirpath, filename) op = ' added' key = fn[3:].replace('\\', '/') if key in backed: if backed[key].mtime < int(path.getmtime(fn)): op = ' updated' backed.pop(key) else: logging.debug(fn[3:] + ' up to date') backed.pop(key) continue try: arc.add(fn) added_count += 1 lst.write(key.encode('utf8', errors='replace')) # if sys.version_info[0] > 2: # lst.write(bytes('\t' + str(int(path.getmtime(fn))) + '\t' + str(path.getsize(fn)) + '\n', # 'utf8')) # else: lst.write( '\t' + str(int(path.getmtime(fn))) + '\t' + str(path.getsize(fn)) + '\n', ) if op == ' updated': with gzip.open(updatedlst, 'a') as u: u.write( key.encode('utf8', errors='replace') + b'\n') logging.info(fn[3:] + op) except CompressionError as e: logging.warning(fn + ': CompressionError: ' + str(e), exc_info=True) except StreamError as e: logging.warning(fn + ': StreamError: ' + str(e), exc_info=True) except UnicodeEncodeError as e: logging.warning(fn + ': UnicodeEncodeError: ' + str(e), exc_info=True) except IOError as e: logging.warning(fn + ': IOError: ' + str(e), exc_info=True) exception_thrown = True break except Exception as e: logging.warning(fn + ': Exception: ' + str(e), exc_info=True) # Remove backup file if no files added if not added_count: remove(incpath) if len(backed) > 0: with gzip.open(path.join(dir_path, basename + delete_postfix), 'w') as dl: for k in backed.keys(): try: dl.write(k.encode('utf8', errors='replace')) dl.write(b'\n') except Exception as e: logging.warning(str(e), exc_info=True) if exception_thrown: return exception_thrown else: logging.info("Done.") return exception_thrown except ReadError: create()
def get_artifact_file(artifact_file_name: str): artifact_file_path = get_artifact_file_path(artifact_file_name) artifact_file = open(artifact_file_path, mode='r+b') artifact_file_info = TarInfo(artifact_file_name) artifact_file_info.size = get_file_size(artifact_file) return artifact_file_info, artifact_file
def generate_biom_and_metadata_release(study_status='public'): """Generate a list of biom/meatadata filepaths and a tgz of those files Parameters ---------- study_status : str, optional The study status to search for. Note that this should always be set to 'public' but having this exposed helps with testing. The other options are 'private' and 'sandbox' """ studies = qdb.study.Study.get_by_status(study_status) qiita_config = ConfigurationManager() working_dir = qiita_config.working_dir portal = qiita_config.portal bdir = qdb.util.get_db_files_base_dir() time = datetime.now().strftime('%m-%d-%y %H:%M:%S') data = [] for s in studies: # [0] latest is first, [1] only getting the filepath sample_fp = relpath(s.sample_template.get_filepaths()[0][1], bdir) for a in s.artifacts(artifact_type='BIOM'): if a.processing_parameters is None or a.visibility != study_status: continue merging_schemes, parent_softwares = a.merging_scheme software = a.processing_parameters.command.software software = '%s v%s' % (software.name, software.version) for x in a.filepaths: if x['fp_type'] != 'biom' or 'only-16s' in x['fp']: continue fp = relpath(x['fp'], bdir) for pt in a.prep_templates: categories = pt.categories() platform = '' target_gene = '' if 'platform' in categories: platform = ', '.join( set(pt.get_category('platform').values())) if 'target_gene' in categories: target_gene = ', '.join( set(pt.get_category('target_gene').values())) for _, prep_fp in pt.get_filepaths(): if 'qiime' not in prep_fp: break prep_fp = relpath(prep_fp, bdir) # format: (biom_fp, sample_fp, prep_fp, qiita_artifact_id, # platform, target gene, merging schemes, # artifact software/version, # parent sofware/version) data.append( (fp, sample_fp, prep_fp, a.id, platform, target_gene, merging_schemes, software, parent_softwares)) # writing text and tgz file ts = datetime.now().strftime('%m%d%y-%H%M%S') tgz_dir = join(working_dir, 'releases') create_nested_path(tgz_dir) tgz_name = join(tgz_dir, '%s-%s-building.tgz' % (portal, study_status)) tgz_name_final = join(tgz_dir, '%s-%s.tgz' % (portal, study_status)) txt_lines = [ "biom fp\tsample fp\tprep fp\tqiita artifact id\tplatform\t" "target gene\tmerging scheme\tartifact software\tparent software" ] with topen(tgz_name, "w|gz") as tgz: for biom_fp, sample_fp, prep_fp, aid, pform, tg, ms, asv, psv in data: txt_lines.append( "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (biom_fp, sample_fp, prep_fp, aid, pform, tg, ms, asv, psv)) tgz.add(join(bdir, biom_fp), arcname=biom_fp, recursive=False) tgz.add(join(bdir, sample_fp), arcname=sample_fp, recursive=False) tgz.add(join(bdir, prep_fp), arcname=prep_fp, recursive=False) info = TarInfo(name='%s-%s-%s.txt' % (portal, study_status, ts)) txt_hd = BytesIO() txt_hd.write(bytes('\n'.join(txt_lines), 'ascii')) txt_hd.seek(0) info.size = len(txt_hd.read()) txt_hd.seek(0) tgz.addfile(tarinfo=info, fileobj=txt_hd) with open(tgz_name, "rb") as f: md5sum = md5() for c in iter(lambda: f.read(4096), b""): md5sum.update(c) rename(tgz_name, tgz_name_final) vals = [('filepath', tgz_name_final[len(working_dir):], r_client.set), ('md5sum', md5sum.hexdigest(), r_client.set), ('time', time, r_client.set)] for k, v, f in vals: redis_key = '%s:release:%s:%s' % (portal, study_status, k) # important to "flush" variables to avoid errors r_client.delete(redis_key) f(redis_key, v)
def plot_predictions(self): epoch, batch, data = self.get_next_batch(train=False) # get a test batch num_classes = self.test_data_provider.get_num_classes() NUM_ROWS = 2 NUM_COLS = 4 NUM_IMGS = NUM_ROWS * NUM_COLS if not self.save_preds else data[0].shape[1] NUM_TOP_CLASSES = min(num_classes, 5) # show this many top labels NUM_OUTPUTS = self.model_state["layers"][self.softmax_name]["outputs"] PRED_IDX = 1 label_names = [lab.split(",")[0] for lab in self.test_data_provider.batch_meta["label_names"]] if self.only_errors: preds = n.zeros((data[0].shape[1], NUM_OUTPUTS), dtype=n.single) else: preds = n.zeros((NUM_IMGS, NUM_OUTPUTS), dtype=n.single) # rand_idx = nr.permutation(n.r_[n.arange(1), n.where(data[1] == 552)[1], n.where(data[1] == 795)[1], n.where(data[1] == 449)[1], n.where(data[1] == 274)[1]])[:NUM_IMGS] rand_idx = nr.randint(0, data[0].shape[1], NUM_IMGS) if NUM_IMGS < data[0].shape[1]: data = [n.require(d[:, rand_idx], requirements="C") for d in data] # data += [preds] # Run the model print [d.shape for d in data], preds.shape self.libmodel.startFeatureWriter(data, [preds], [self.softmax_name]) IGPUModel.finish_batch(self) print preds data[0] = self.test_data_provider.get_plottable_data(data[0]) if self.save_preds: if not gfile.Exists(self.save_preds): gfile.MakeDirs(self.save_preds) preds_thresh = preds > 0.5 # Binarize predictions data[0] = data[0] * 255.0 data[0][data[0] < 0] = 0 data[0][data[0] > 255] = 255 data[0] = n.require(data[0], dtype=n.uint8) dir_name = "%s_predictions_batch_%d" % (os.path.basename(self.save_file), batch) tar_name = os.path.join(self.save_preds, "%s.tar" % dir_name) tfo = gfile.GFile(tar_name, "w") tf = TarFile(fileobj=tfo, mode="w") for img_idx in xrange(NUM_IMGS): img = data[0][img_idx, :, :, :] imsave = Image.fromarray(img) prefix = ( "CORRECT" if data[1][0, img_idx] == preds_thresh[img_idx, PRED_IDX] else "FALSE_POS" if preds_thresh[img_idx, PRED_IDX] == 1 else "FALSE_NEG" ) file_name = "%s_%.2f_%d_%05d_%d.png" % ( prefix, preds[img_idx, PRED_IDX], batch, img_idx, data[1][0, img_idx], ) # gf = gfile.GFile(file_name, "w") file_string = StringIO() imsave.save(file_string, "PNG") tarinf = TarInfo(os.path.join(dir_name, file_name)) tarinf.size = file_string.tell() file_string.seek(0) tf.addfile(tarinf, file_string) tf.close() tfo.close() # gf.close() print "Wrote %d prediction PNGs to %s" % (preds.shape[0], tar_name) else: fig = pl.figure(3, figsize=(12, 9)) fig.text(0.4, 0.95, "%s test samples" % ("Mistaken" if self.only_errors else "Random")) if self.only_errors: # what the net got wrong if NUM_OUTPUTS > 1: err_idx = [i for i, p in enumerate(preds.argmax(axis=1)) if p not in n.where(data[2][:, i] > 0)[0]] else: err_idx = n.where(data[1][0, :] != preds[:, 0].T)[0] print err_idx err_idx = r.sample(err_idx, min(len(err_idx), NUM_IMGS)) data[0], data[1], preds = data[0][:, err_idx], data[1][:, err_idx], preds[err_idx, :] import matplotlib.gridspec as gridspec import matplotlib.colors as colors cconv = colors.ColorConverter() gs = gridspec.GridSpec(NUM_ROWS * 2, NUM_COLS, width_ratios=[1] * NUM_COLS, height_ratios=[2, 1] * NUM_ROWS) # print data[1] for row in xrange(NUM_ROWS): for col in xrange(NUM_COLS): img_idx = row * NUM_COLS + col if data[0].shape[0] <= img_idx: break pl.subplot(gs[(row * 2) * NUM_COLS + col]) # pl.subplot(NUM_ROWS*2, NUM_COLS, row * 2 * NUM_COLS + col + 1) pl.xticks([]) pl.yticks([]) img = data[0][img_idx, :, :, :] img = img.squeeze() if len(img.shape) > 2: # more than 2 dimensions if img.shape[2] is 2: # if two channels # copy 2nd to 3rd channel for visualization a1 = img a2 = img[:, :, 1] a2 = a2[:, :, n.newaxis] img = n.concatenate((a1, a2), axis=2) pl.imshow(img, interpolation="lanczos") else: pl.imshow(img, interpolation="lanczos", cmap=pl.gray()) show_title = data[1].shape[0] == 1 true_label = [int(data[1][0, img_idx])] if show_title else n.where(data[1][:, img_idx] == 1)[0] # print true_label # print preds[img_idx,:].shape # print preds[img_idx,:].max() true_label_names = [label_names[i] for i in true_label] img_labels = sorted(zip(preds[img_idx, :], label_names), key=lambda x: x[0])[-NUM_TOP_CLASSES:] # print img_labels axes = pl.subplot(gs[(row * 2 + 1) * NUM_COLS + col]) height = 0.5 ylocs = n.array(range(NUM_TOP_CLASSES)) * height pl.barh( ylocs, [l[0] for l in img_labels], height=height, color=["#ffaaaa" if l[1] in true_label_names else "#aaaaff" for l in img_labels], ) # pl.title(", ".join(true_labels)) if show_title: pl.title(", ".join(true_label_names), fontsize=15, fontweight="bold") else: print true_label_names pl.yticks( ylocs + height / 2, [l[1] for l in img_labels], x=1, backgroundcolor=cconv.to_rgba("0.65", alpha=0.5), weight="bold", ) for line in enumerate(axes.get_yticklines()): line[1].set_visible(False) # pl.xticks([width], ['']) # pl.yticks([]) pl.xticks([]) pl.ylim(0, ylocs[-1] + height) pl.xlim(0, 1)
def generate_biom_and_metadata_release(study_status='public'): """Generate a list of biom/meatadata filepaths and a tgz of those files Parameters ---------- study_status : str, optional The study status to search for. Note that this should always be set to 'public' but having this exposed helps with testing. The other options are 'private' and 'sandbox' """ studies = qdb.study.Study.get_by_status(study_status) qiita_config = ConfigurationManager() working_dir = qiita_config.working_dir portal = qiita_config.portal bdir = qdb.util.get_db_files_base_dir() time = datetime.now().strftime('%m-%d-%y %H:%M:%S') data = [] for s in studies: # [0] latest is first, [1] only getting the filepath sample_fp = relpath(s.sample_template.get_filepaths()[0][1], bdir) for a in s.artifacts(artifact_type='BIOM'): if a.processing_parameters is None or a.visibility != study_status: continue merging_schemes, parent_softwares = a.merging_scheme software = a.processing_parameters.command.software software = '%s v%s' % (software.name, software.version) for x in a.filepaths: if x['fp_type'] != 'biom' or 'only-16s' in x['fp']: continue fp = relpath(x['fp'], bdir) for pt in a.prep_templates: categories = pt.categories() platform = '' target_gene = '' if 'platform' in categories: platform = ', '.join( set(pt.get_category('platform').values())) if 'target_gene' in categories: target_gene = ', '.join( set(pt.get_category('target_gene').values())) for _, prep_fp in pt.get_filepaths(): if 'qiime' not in prep_fp: break prep_fp = relpath(prep_fp, bdir) # format: (biom_fp, sample_fp, prep_fp, qiita_artifact_id, # platform, target gene, merging schemes, # artifact software/version, # parent sofware/version) data.append((fp, sample_fp, prep_fp, a.id, platform, target_gene, merging_schemes, software, parent_softwares)) # writing text and tgz file ts = datetime.now().strftime('%m%d%y-%H%M%S') tgz_dir = join(working_dir, 'releases') create_nested_path(tgz_dir) tgz_name = join(tgz_dir, '%s-%s-building.tgz' % (portal, study_status)) tgz_name_final = join(tgz_dir, '%s-%s.tgz' % (portal, study_status)) txt_lines = [ "biom fp\tsample fp\tprep fp\tqiita artifact id\tplatform\t" "target gene\tmerging scheme\tartifact software\tparent software"] with topen(tgz_name, "w|gz") as tgz: for biom_fp, sample_fp, prep_fp, aid, pform, tg, ms, asv, psv in data: txt_lines.append("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % ( biom_fp, sample_fp, prep_fp, aid, pform, tg, ms, asv, psv)) tgz.add(join(bdir, biom_fp), arcname=biom_fp, recursive=False) tgz.add(join(bdir, sample_fp), arcname=sample_fp, recursive=False) tgz.add(join(bdir, prep_fp), arcname=prep_fp, recursive=False) info = TarInfo(name='%s-%s-%s.txt' % (portal, study_status, ts)) txt_hd = BytesIO() txt_hd.write(bytes('\n'.join(txt_lines), 'ascii')) txt_hd.seek(0) info.size = len(txt_hd.read()) txt_hd.seek(0) tgz.addfile(tarinfo=info, fileobj=txt_hd) with open(tgz_name, "rb") as f: md5sum = md5() for c in iter(lambda: f.read(4096), b""): md5sum.update(c) rename(tgz_name, tgz_name_final) vals = [ ('filepath', tgz_name_final[len(working_dir):], r_client.set), ('md5sum', md5sum.hexdigest(), r_client.set), ('time', time, r_client.set)] for k, v, f in vals: redis_key = '%s:release:%s:%s' % (portal, study_status, k) # important to "flush" variables to avoid errors r_client.delete(redis_key) f(redis_key, v)
def make_dockerfile(obj): """Transform obj to a docker tar. """ if isinstance(obj, TarReader): return obj if isinstance(obj, str) and os.path.isfile(obj): archive = None ext = os.path.splitext(obj) if ext == '.tar': encoding = None archive = True elif ext in ('.tgz', '.gz'): encoding = 'gzip' archive = True elif ext in ('.tbz', '.tbz2', '.tb2', '.bz2'): encoding = 'bz2' archive = True elif ext in ('.tz', '.Z'): encoding = 'compress' archive = True elif ext in ('.tlz', '.lz', '.lzma'): encoding = 'lzma' archive = True elif ext in ('.txz', '.xz'): # lzma & lzma2 encoding = 'xz' archive = True with open(obj, 'rb') as file: if archive: return TarReader(file, encoding) # Let's pretend it's a single Dockerfile. Open it try: obj = TarFile.open(fileobj=file) except TarError: obj = io.BytesIO(file.read()) if isinstance(obj, str) and os.path.isdir(obj): raise NotImplementedError('Currently not implemented') if isinstance(obj, str): raise ValueError('%r is not a Dockerfile' % obj) if isinstance(obj, io.StringIO): obj = io.BytesIO(obj.getvalue().encode('utf-8')) if isinstance(obj, io.BytesIO): out = io.BytesIO() info = TarInfo('Dockerfile') info.size = len(obj.getvalue()) tar = TarFile.open(fileobj=out, mode='w') tar.addfile(info, obj) tar.close() obj = tar if isinstance(obj, str) and os.path.isdir(obj): # it's a docker context, Make a tar and compress it tar = TarFile.open(fileobj=NamedTemporaryFile(), mode='w:gz') tar.add(obj, arcname='.') tar.close() obj = tar.fileobj if isinstance(obj, TarFile): obj.close() obj = obj.fileobj if isinstance(obj, gzip.GzipFile): return TarReader(obj, 'gzip') if isinstance(obj, bz2.BZ2File): return TarReader(obj, 'bz2') if isinstance(obj, lzma.LZMAFile): return TarReader(obj, 'xz') return TarReader(obj)
def generate_biom_and_metadata_release(study_status='public'): """Generate a list of biom/meatadata filepaths and a tgz of those files Parameters ---------- study_status : str, optional The study status to search for. Note that this should always be set to 'public' but having this exposed helps with testing. The other options are 'private' and 'sandbox' """ studies = qdb.study.Study.get_by_status(study_status) qiita_config = ConfigurationManager() working_dir = qiita_config.working_dir portal = qiita_config.portal bdir = qdb.util.get_db_files_base_dir() time = datetime.now().strftime('%m-%d-%y %H:%M:%S') data = [] for s in studies: # [0] latest is first, [1] only getting the filepath sample_fp = relpath(s.sample_template.get_filepaths()[0][1], bdir) for a in s.artifacts(artifact_type='BIOM'): if a.processing_parameters is None: continue cmd_name = a.processing_parameters.command.name # this loop is necessary as in theory an artifact can be # generated from multiple prep info files human_cmd = [] for p in a.parents: pp = p.processing_parameters pp_cmd_name = pp.command.name if pp_cmd_name == 'Trimming': human_cmd.append('%s @ %s' % ( cmd_name, str(pp.values['length']))) else: human_cmd.append('%s, %s' % (cmd_name, pp_cmd_name)) human_cmd = ', '.join(human_cmd) for _, fp, fp_type in a.filepaths: if fp_type != 'biom' or 'only-16s' in fp: continue fp = relpath(fp, bdir) # format: (biom_fp, sample_fp, prep_fp, qiita_artifact_id, # human readable name) for pt in a.prep_templates: for _, prep_fp in pt.get_filepaths(): if 'qiime' not in prep_fp: break prep_fp = relpath(prep_fp, bdir) data.append((fp, sample_fp, prep_fp, a.id, human_cmd)) # writing text and tgz file ts = datetime.now().strftime('%m%d%y-%H%M%S') tgz_dir = join(working_dir, 'releases') if not exists(tgz_dir): makedirs(tgz_dir) tgz_name = join(tgz_dir, '%s-%s-building.tgz' % (portal, study_status)) tgz_name_final = join(tgz_dir, '%s-%s.tgz' % (portal, study_status)) txt_hd = StringIO() with topen(tgz_name, "w|gz") as tgz: # writing header for txt txt_hd.write( "biom_fp\tsample_fp\tprep_fp\tqiita_artifact_id\tcommand\n") for biom_fp, sample_fp, prep_fp, artifact_id, human_cmd in data: txt_hd.write("%s\t%s\t%s\t%s\t%s\n" % ( biom_fp, sample_fp, prep_fp, artifact_id, human_cmd)) tgz.add(join(bdir, biom_fp), arcname=biom_fp, recursive=False) tgz.add(join(bdir, sample_fp), arcname=sample_fp, recursive=False) tgz.add(join(bdir, prep_fp), arcname=prep_fp, recursive=False) txt_hd.seek(0) info = TarInfo(name='%s-%s-%s.txt' % (portal, study_status, ts)) info.size = len(txt_hd.buf) tgz.addfile(tarinfo=info, fileobj=txt_hd) with open(tgz_name, "rb") as f: md5sum = md5() for c in iter(lambda: f.read(4096), b""): md5sum.update(c) rename(tgz_name, tgz_name_final) vals = [ ('filepath', tgz_name_final[len(working_dir):], r_client.set), ('md5sum', md5sum.hexdigest(), r_client.set), ('time', time, r_client.set)] for k, v, f in vals: redis_key = '%s:release:%s:%s' % (portal, study_status, k) # important to "flush" variables to avoid errors r_client.delete(redis_key) f(redis_key, v)
def _addOneMember(path, data, modtime): stream = StringIO(v) info = TarInfo(k) info.size = len(v) info.mtime = modtime archive.addfile(info, stream)
def compute(self, conn, data=None): tarinfo = TarInfo() tarinfo.name = self.name tarinfo.mod = 0o700 tarinfo.uid = 0 tarinfo.gid = 0 tarinfo.type = REGTYPE tarinfo.linkname = "" if self.name == CONTAINER_PROPERTIES: meta = data or conn.container_get_properties(self.acct, self.ref) tarinfo.size = len(json.dumps(meta['properties'], sort_keys=True)) self._filesize = tarinfo.size self._buf = tarinfo.tobuf(format=PAX_FORMAT) return elif self.name == CONTAINER_MANIFEST: tarinfo.size = len(json.dumps(data, sort_keys=True)) self._filesize = tarinfo.size self._buf = tarinfo.tobuf(format=PAX_FORMAT) return entry = conn.object_get_properties(self.acct, self.ref, self.name) properties = entry['properties'] # x-static-large-object if properties.get(SLO, False): tarinfo.size = int(properties.get(SLO_SIZE)) _, slo = conn.object_fetch(self.acct, self.ref, self.name, properties=False) self._slo = json.loads("".join(slo), object_pairs_hook=OrderedDict) self._checksums = {} # format MD5 to share same format as multi chunks object offset = 0 for idx, ck in enumerate(self._slo): self._checksums[idx] = { 'hash': ck['hash'].upper(), 'size': ck['bytes'], 'offset': offset } offset += ck['bytes'] else: tarinfo.size = int(entry['length']) meta, chunks = conn.object_locate(self.acct, self.ref, self.name, properties=False) storage_method = STORAGE_METHODS.load(meta['chunk_method']) chunks = _sort_chunks(chunks, storage_method.ec) for idx in chunks: chunks[idx] = chunks[idx][0] del chunks[idx]['url'] del chunks[idx]['score'] del chunks[idx]['pos'] self._checksums = chunks self._filesize = tarinfo.size # XATTR # do we have to store basic properties like policy, ... ? for key, val in properties.items(): assert isinstance(val, basestring), \ "Invalid type for %s:%s:%s" % (self.acct, self.name, key) if self.slo and key in SLO_HEADERS: continue tarinfo.pax_headers[SCHILY + key] = val tarinfo.pax_headers['mime_type'] = entry['mime_type'] self._buf = tarinfo.tobuf(format=PAX_FORMAT)
def tar_add_file_from_string(tar, tar_path, filename, content): file = StringIO.StringIO(content) info = TarInfo(tar_join_path(tar_path, filename)) info.size = len(content) tar.addfile(info, file)