def download(self, remote_path, local_path): if self.use_chroot: remote_path = join_root(PosixPath('/experimentroot'), remote_path) temp = make_unique_name(b'reprozip_output_') rtemp = PosixPath('/vagrant') / temp ltemp = self.target / temp # Copy file to shared folder logging.info("Copying file to shared folder...") chan = self.ssh.get_transport().open_session() cp_cmd = '/bin/cp %s %s' % ( shell_escape(remote_path.path), shell_escape(rtemp.path)) chown_cmd = '/bin/chown vagrant %s' % shell_escape(rtemp.path) chmod_cmd = '/bin/chmod 644 %s' % shell_escape(rtemp.path) chan.exec_command('/usr/bin/sudo /bin/sh -c %s' % shell_escape( ' && '.join((cp_cmd, chown_cmd, chmod_cmd)))) if chan.recv_exit_status() != 0: logging.critical("Couldn't copy file in virtual machine") try: ltemp.remove() except OSError: pass return False # Move file to final destination try: ltemp.rename(local_path) except OSError as e: logging.critical("Couldn't download output file: %s\n%s", remote_path, str(e)) ltemp.remove() return False return True
def chroot_run(args): """Runs the command in the chroot. """ target = Path(args.target[0]) unpacked_info = metadata_read(target, 'chroot') cmdline = args.cmdline # Loads config config = load_config_file(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) root = target / 'root' # X11 handler x11 = X11Handler(args.x11, ('local', socket.gethostname()), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) cmd += '/usr/bin/env -i ' environ = x11.fix_env(run['environ']) environ = fixup_environment(environ, args) cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) userspec = '%s:%s' % (run.get('uid', 1000), run.get('gid', 1000)) cmd = 'chroot --userspec=%s %s /bin/sh -c %s' % ( userspec, shell_escape(unicode_(root)), shell_escape(cmd)) cmds.append(cmd) cmds = [ 'chroot %s /bin/sh -c %s' % (shell_escape(unicode_(root)), shell_escape(c)) for c in x11.init_cmds ] + cmds cmds = ' && '.join(cmds) # Starts forwarding forwarders = [] for portnum, connector in x11.port_forward: fwd = LocalForwarder(connector, portnum) forwarders.append(fwd) signals.pre_run(target=target) retcode = interruptible_call(cmds, shell=True) stderr.write("\n*** Command finished, status: %d\n" % retcode) signals.post_run(target=target, retcode=retcode) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) metadata_write(target, unpacked_info, 'chroot')
def finalize(self): if not self.docker_copy: self.build_directory.rmtree() return from_image = self.unpacked_info['current_image'] with self.build_directory.open('w', 'Dockerfile', encoding='utf-8', newline='\n') as dockerfile: dockerfile.write('FROM %s\n\n' % from_image.decode('ascii')) for src, target in self.docker_copy: # FIXME : spaces in filenames will probably break Docker dockerfile.write( 'COPY \\\n %s \\\n %s\n' % (shell_escape( unicode_(src)), shell_escape(unicode_(target)))) if self.docker_copy: dockerfile.write('RUN /busybox chown 1000:1000 \\\n' ' %s\n' % ' \\\n '.join( shell_escape(unicode_(target)) for src, target in self.docker_copy)) # TODO : restore permissions? image = make_unique_name(b'reprounzip_image_') retcode = subprocess.call(self.docker_cmd + ['build', '-t', image, '.'], cwd=self.build_directory.path) if retcode != 0: logging.critical("docker build failed with code %d", retcode) sys.exit(1) else: logging.info("New image created: %s", image.decode('ascii')) if from_image != self.unpacked_info['initial_image']: logging.info("Untagging previous image %s", from_image.decode('ascii')) retcode = subprocess.call(self.docker_cmd + ['rmi', from_image]) if retcode != 0: logging.warning( "Can't remove previous image, docker " "returned %d", retcode) self.unpacked_info['current_image'] = image write_dict(self.target, self.unpacked_info) self.build_directory.rmtree()
def finalize(self): if not self.docker_copy: self.build_directory.rmtree() return from_image = self.unpacked_info['current_image'] with self.build_directory.open('w', 'Dockerfile', encoding='utf-8', newline='\n') as dockerfile: dockerfile.write('FROM %s\n\n' % from_image.decode('ascii')) for src, target in self.docker_copy: # FIXME : spaces in filenames will probably break Docker dockerfile.write( 'COPY \\\n %s \\\n %s\n' % ( shell_escape(unicode_(src)), shell_escape(unicode_(target)))) if self.docker_copy: dockerfile.write('RUN /busybox chown 1000:1000 \\\n' ' %s\n' % ' \\\n '.join( shell_escape(unicode_(target)) for src, target in self.docker_copy)) # TODO : restore permissions? image = make_unique_name(b'reprounzip_image_') retcode = subprocess.call(self.docker_cmd + ['build', '-t', image, '.'], cwd=self.build_directory.path) if retcode != 0: logger.critical("docker build failed with code %d", retcode) sys.exit(1) else: logger.info("New image created: %s", image.decode('ascii')) if from_image != self.unpacked_info['initial_image']: logger.info("Untagging previous image %s", from_image.decode('ascii')) retcode = subprocess.call(self.docker_cmd + ['rmi', from_image]) if retcode != 0: logger.warning("Can't remove previous image, docker " "returned %d", retcode) self.unpacked_info['current_image'] = image write_dict(self.target, self.unpacked_info) self.build_directory.rmtree()
def upload_file(self, local_path, input_path): if self.use_chroot: remote_path = join_root(PosixPath('/experimentroot'), PosixPath(input_path)) else: remote_path = input_path # Upload to a temporary file first logging.info("Uploading file via SCP...") rtemp = PosixPath(make_unique_name(b'/tmp/reprozip_input_')) self.client_scp.put(local_path.path, rtemp.path, recursive=False) # Move it logging.info("Moving file into place...") chan = self.ssh.get_transport().open_session() chown_cmd = '/bin/chown --reference=%s %s' % (shell_escape( remote_path.path), shell_escape(rtemp.path)) chmod_cmd = '/bin/chmod --reference=%s %s' % (shell_escape( remote_path.path), shell_escape(rtemp.path)) mv_cmd = '/bin/mv %s %s' % (shell_escape( rtemp.path), shell_escape(remote_path.path)) chan.exec_command('/usr/bin/sudo /bin/sh -c %s' % shell_escape(';'.join( (chown_cmd, chmod_cmd, mv_cmd)))) if chan.recv_exit_status() != 0: logging.critical("Couldn't move file in virtual machine") sys.exit(1) chan.close()
def upload_file(self, local_path, input_path): if self.use_chroot: remote_path = join_root(PosixPath('/experimentroot'), input_path) else: remote_path = input_path # Upload to a temporary file first logging.info("Uploading file via SCP...") rtemp = PosixPath(make_unique_name(b'/tmp/reprozip_input_')) self.client_scp.put(local_path.path, rtemp.path, recursive=False) # Move it logging.info("Moving file into place...") chan = self.ssh.get_transport().open_session() chown_cmd = '/bin/chown --reference=%s %s' % ( shell_escape(remote_path.path), shell_escape(rtemp.path)) chmod_cmd = '/bin/chmod --reference=%s %s' % ( shell_escape(remote_path.path), shell_escape(rtemp.path)) mv_cmd = '/bin/mv %s %s' % ( shell_escape(rtemp.path), shell_escape(remote_path.path)) chan.exec_command('/usr/bin/sudo /bin/sh -c %s' % shell_escape( ';'.join((chown_cmd, chmod_cmd, mv_cmd)))) if chan.recv_exit_status() != 0: logging.critical("Couldn't move file in virtual machine") sys.exit(1) chan.close()
def upload_file(self, local_path, input_path): if self.use_chroot: remote_path = join_root(PosixPath('/experimentroot'), input_path) else: remote_path = input_path temp = make_unique_name(b'reprozip_input_') ltemp = self.target / temp rtemp = PosixPath('/vagrant') / temp # Copy file to shared folder logging.info("Copying file to shared folder...") local_path.copyfile(ltemp) # Move it logging.info("Moving file into place...") chan = self.ssh.get_transport().open_session() chown_cmd = '/bin/chown --reference=%s %s' % (shell_escape( remote_path.path), shell_escape(rtemp.path)) chmod_cmd = '/bin/chmod --reference=%s %s' % (shell_escape( remote_path.path), shell_escape(rtemp.path)) mv_cmd = '/bin/mv %s %s' % (shell_escape( rtemp.path), shell_escape(remote_path.path)) chan.exec_command('/usr/bin/sudo /bin/sh -c %s' % shell_escape(' && '.join( (chown_cmd, chmod_cmd, mv_cmd)))) if chan.recv_exit_status() != 0: logging.critical("Couldn't move file in virtual machine") try: ltemp.remove() except OSError: pass sys.exit(1) chan.close()
def cmd(lst, add=None): if add: logging.info("cmd: %s %s", ' '.join(lst), add) string = ' '.join(shell_escape(a) for a in (rpuz + lst)) string += ' ' + add subprocess.check_call(string, shell=True, cwd=args.directory) else: logging.info("cmd: %s", ' '.join(lst)) subprocess.check_call(rpuz + lst, cwd=args.directory)
def chroot_run(args): """Runs the command in the chroot. """ target = Path(args.target[0]) read_dict(target / '.reprounzip', 'chroot') cmdline = args.cmdline # Loads config runs, packages, other_files = load_config_file(target / 'config.yml', True) selected_runs = get_runs(runs, args.run, cmdline) root = target / 'root' # X11 handler x11 = X11Handler(args.x11, ('local', socket.gethostname()), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) cmd += '/usr/bin/env -i ' environ = x11.fix_env(run['environ']) cmd += ' '.join('%s=%s' % (k, shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) userspec = '%s:%s' % (run.get('uid', 1000), run.get('gid', 1000)) cmd = 'chroot --userspec=%s %s /bin/sh -c %s' % ( userspec, shell_escape(unicode_(root)), shell_escape(cmd)) cmds.append(cmd) cmds = ['chroot %s /bin/sh -c %s' % (shell_escape(unicode_(root)), shell_escape(c)) for c in x11.init_cmds] + cmds cmds = ' && '.join(cmds) # Starts forwarding forwarders = [] for portnum, connector in x11.port_forward: fwd = LocalForwarder(connector, portnum) forwarders.append(fwd) signals.pre_run(target=target) retcode = interruptible_call(cmds, shell=True) sys.stderr.write("\n*** Command finished, status: %d\n" % retcode) signals.post_run(target=target, retcode=retcode)
def chroot_run(args): """Runs the command in the chroot. """ target = Path(args.target[0]) read_dict(target / '.reprounzip', 'chroot') cmdline = args.cmdline # Loads config runs, packages, other_files = load_config_file(target / 'config.yml', True) selected_runs = get_runs(runs, args.run, cmdline) root = target / 'root' cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) cmd += '/usr/bin/env -i ' cmd += ' '.join('%s=%s' % (k, shell_escape(v)) for k, v in iteritems(run['environ'])) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) userspec = '%s:%s' % (run.get('uid', 1000), run.get('gid', 1000)) cmd = 'chroot --userspec=%s %s /bin/sh -c %s' % ( userspec, shell_escape(unicode_(root)), shell_escape(cmd)) cmds.append(cmd) cmds = ' && '.join(cmds) signals.pre_run(target=target) retcode = subprocess.call(cmds, shell=True) sys.stderr.write("\n*** Command finished, status: %d\n" % retcode) signals.post_run(target=target, retcode=retcode)
def do_vistrails(target): """Create a VisTrails workflow that runs the experiment. This is called from signals after an experiment has been setup by any unpacker. """ record_usage(do_vistrails=True) unpacker = signals.unpacker dot_vistrails = Path('~/.vistrails').expand_user() runs, packages, other_files = load_config(target / 'config.yml', canonical=True) for i, run in enumerate(runs): module_name = write_cltools_module(run, dot_vistrails) # Writes VisTrails workflow bundle = target / 'vistrails.vt' logging.info("Writing VisTrails workflow %s...", bundle) vtdir = Path.tempdir(prefix='reprounzip_vistrails_') try: with vtdir.open('w', 'vistrail', encoding='utf-8', newline='\n') as fp: vistrail = VISTRAILS_TEMPLATE cmdline = ' '.join(shell_escape(arg) for arg in run['argv']) vistrail = vistrail.format( date='2014-11-12 15:31:18', unpacker=unpacker, directory=escape_xml(str(target.absolute())), cmdline=escape_xml(cmdline), module_name=module_name, run=i) fp.write(vistrail) with bundle.open('wb') as fp: z = zipfile.ZipFile(fp, 'w') with vtdir.in_dir(): for path in Path('.').recursedir(): z.write(str(path)) z.close() finally: vtdir.rmtree()
def do_vistrails(target): """Create a VisTrails workflow that runs the experiment. This is called from signals after an experiment has been setup by any unpacker. """ record_usage(do_vistrails=True) unpacker = signals.unpacker dot_vistrails = Path('~/.vistrails').expand_user() runs, packages, other_files = load_config(target / 'config.yml', canonical=True) for i, run in enumerate(runs): module_name = write_cltools_module(run, dot_vistrails) # Writes VisTrails workflow bundle = target / 'vistrails.vt' logging.info("Writing VisTrails workflow %s...", bundle) vtdir = Path.tempdir(prefix='reprounzip_vistrails_') try: with vtdir.open('w', 'vistrail', encoding='utf-8', newline='\n') as fp: vistrail = VISTRAILS_TEMPLATE cmdline = ' '.join(shell_escape(arg) for arg in run['argv']) vistrail = vistrail.format(date='2014-11-12 15:31:18', unpacker=unpacker, directory=escape_xml( str(target.absolute())), cmdline=escape_xml(cmdline), module_name=module_name, run=i) fp.write(vistrail) with bundle.open('wb') as fp: z = zipfile.ZipFile(fp, 'w') with vtdir.in_dir(): for path in Path('.').recursedir(): z.write(str(path)) z.close() finally: vtdir.rmtree()
def upload_file(self, local_path, input_path): if self.use_chroot: remote_path = join_root(PosixPath('/experimentroot'), input_path) else: remote_path = input_path temp = make_unique_name(b'reprozip_input_') ltemp = self.target / temp rtemp = PosixPath('/vagrant') / temp # Copy file to shared folder logging.info("Copying file to shared folder...") local_path.copyfile(ltemp) # Move it logging.info("Moving file into place...") chan = self.ssh.get_transport().open_session() chown_cmd = '/bin/chown --reference=%s %s' % ( shell_escape(remote_path.path), shell_escape(rtemp.path)) chmod_cmd = '/bin/chmod --reference=%s %s' % ( shell_escape(remote_path.path), shell_escape(rtemp.path)) mv_cmd = '/bin/mv %s %s' % ( shell_escape(rtemp.path), shell_escape(remote_path.path)) chan.exec_command('/usr/bin/sudo /bin/sh -c %s' % shell_escape( ' && '.join((chown_cmd, chmod_cmd, mv_cmd)))) if chan.recv_exit_status() != 0: logging.critical("Couldn't move file in virtual machine") try: ltemp.remove() except OSError: pass sys.exit(1) chan.close()
def directory_run(args): """Runs the command in the directory. """ target = Path(args.target[0]) unpacked_info = metadata_read(target, 'directory') cmdline = args.cmdline # Loads config config = load_config_file(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) root = (target / 'root').absolute() # Gets library paths lib_dirs = [] logger.debug("Running: %s", "/sbin/ldconfig -v -N") p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, _ = p.communicate() try: for line in stdout.splitlines(): if len(line) < 2 or line[0] in (b' ', b'\t'): continue if line.endswith(b':'): lib_dirs.append(Path(line[:-1])) finally: if p.returncode != 0: raise subprocess.CalledProcessError(p.returncode, ['/sbin/ldconfig', '-v', '-N']) lib_dirs = ('export LD_LIBRARY_PATH=%s' % ':'.join( shell_escape(str(join_root(root, d))) for d in lib_dirs)) cmds = [lib_dirs] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape( str(join_root(root, Path(run['workingdir'])))) cmd += '/usr/bin/env -i ' environ = run['environ'] environ = fixup_environment(environ, args) if args.x11: if 'DISPLAY' in os.environ: environ['DISPLAY'] = os.environ['DISPLAY'] if 'XAUTHORITY' in os.environ: environ['XAUTHORITY'] = os.environ['XAUTHORITY'] cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in environ.items() if k != 'PATH') cmd += ' ' # PATH # Get the original PATH components path = [PosixPath(d) for d in run['environ'].get('PATH', '').split(':')] # The same paths but in the directory dir_path = [join_root(root, d) for d in path if d.root == '/'] # Rebuild string path = ':'.join(str(d) for d in dir_path + path) cmd += 'PATH=%s ' % shell_escape(path) # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = run['argv'] # Rewrites command-line arguments that are absolute filenames rewritten = False for i in range(len(argv)): try: p = Path(argv[i]) except UnicodeEncodeError: continue if p.is_absolute: rp = join_root(root, p) if (rp.exists() or (len(rp.components) > 3 and rp.parent.exists())): argv[i] = str(rp) rewritten = True if rewritten: logger.warning("Rewrote command-line as: %s", ' '.join(shell_escape(a) for a in argv)) else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) cmds.append(cmd) cmds = ' && '.join(cmds) signals.pre_run(target=target) logger.debug("Running: %s", cmds) retcode = interruptible_call(cmds, shell=True) print("\n*** Command finished, status: %d\n" % retcode, file=sys.stderr) signals.post_run(target=target, retcode=retcode) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) metadata_write(target, unpacked_info, 'directory')
def docker_run(args): """Runs the experiment in the container. """ target = Path(args.target[0]) unpacked_info = read_dict(target) cmdline = args.cmdline # Sanity check if args.detach and args.x11: logging.critical("Error: Can't use X11 forwarding if you're detaching") raise UsageError # Loads config config = load_config(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) # Get current image name if 'current_image' in unpacked_info: image = unpacked_info['current_image'] logging.debug("Running from image %s", image.decode('ascii')) else: logging.critical("Image doesn't exist yet, have you run setup/build?") sys.exit(1) # Name of new container if args.detach: container = make_unique_name(b'reprounzip_detached_') else: container = make_unique_name(b'reprounzip_run_') hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # X11 handler if args.x11: local_ip = get_local_addr() docker_host = local_ip if os.environ.get('DOCKER_HOST'): m = _dockerhost_re.match(os.environ['DOCKER_HOST']) if m is not None: docker_host = m.group(1) if args.tunneled_x11: x11 = X11Handler(True, ('internet', docker_host), args.x11_display) else: x11 = X11Handler(True, ('internet', local_ip), args.x11_display) if (docker_host != local_ip and docker_host != 'localhost' and not docker_host.startswith('127.') and not docker_host.startswith('192.168.99.')): ssh_cmdline = ' '.join('-R*:%(p)d:127.0.0.1:%(p)d' % {'p': port} for port, connector in x11.port_forward) logging.warning( "You requested X11 forwarding but the Docker container " "appears to be running remotely. It is probable that it " "won't be able to connect to the local display. Creating " "a remote SSH tunnel and running with --tunneled-x11 " "might help (%s).", ssh_cmdline) else: x11 = X11Handler(False, ('local', hostname), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) cmd += '/busybox env -i ' environ = x11.fix_env(run['environ']) environ = fixup_environment(environ, args) cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) uid = run.get('uid', 1000) gid = run.get('gid', 1000) cmd = '/rpzsudo \'#%d\' \'#%d\' /busybox sh -c %s' % ( uid, gid, shell_escape(cmd)) cmds.append(cmd) cmds = x11.init_cmds + cmds cmds = ' && '.join(cmds) signals.pre_run(target=target) # Creates forwarders forwarders = [] for port, connector in x11.port_forward: forwarders.append(LocalForwarder(connector, port)) if args.detach: logging.info("Start container %s (detached)", container.decode('ascii')) retcode = interruptible_call([ 'docker', 'run', b'--name=' + container, '-h', hostname, '-d', '-t' ] + args.docker_option + [image, '/busybox', 'sh', '-c', cmds]) if retcode != 0: logging.critical("docker run failed with code %d", retcode) subprocess.call(['docker', 'rm', '-f', container]) sys.exit(1) return # Run command in container logging.info("Starting container %s", container.decode('ascii')) retcode = interruptible_call( ['docker', 'run', b'--name=' + container, '-h', hostname, '-i', '-t'] + args.docker_option + [image, '/busybox', 'sh', '-c', cmds]) if retcode != 0: logging.critical("docker run failed with code %d", retcode) subprocess.call(['docker', 'rm', '-f', container]) sys.exit(1) # Get exit status from "docker inspect" out = subprocess.check_output(['docker', 'inspect', container]) outjson = json.loads(out.decode('ascii')) if (outjson[0]["State"]["Running"] is not False or outjson[0]["State"]["Paused"] is not False): logging.error("Invalid container state after execution:\n%s", json.dumps(outjson[0]["State"])) retcode = outjson[0]["State"]["ExitCode"] stderr.write("\n*** Command finished, status: %d\n" % retcode) # Commit to create new image new_image = make_unique_name(b'reprounzip_image_') logging.info("Committing container %s to image %s", container.decode('ascii'), new_image.decode('ascii')) subprocess.check_call(['docker', 'commit', container, new_image]) # Update image name unpacked_info['current_image'] = new_image write_dict(target, unpacked_info) # Remove the container logging.info("Destroying container %s", container.decode('ascii')) retcode = subprocess.call(['docker', 'rm', container]) if retcode != 0: logging.error("Error deleting container %s", container.decode('ascii')) # Untag previous image, unless it is the initial_image if image != unpacked_info['initial_image']: logging.info("Untagging previous image %s", image.decode('ascii')) subprocess.check_call(['docker', 'rmi', image]) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) write_dict(target, unpacked_info) signals.post_run(target=target, retcode=retcode)
def vagrant_run(args): """Runs the experiment in the virtual machine. """ target = Path(args.target[0]) unpacked_info = read_dict(target) use_chroot = unpacked_info.get('use_chroot', True) cmdline = args.cmdline check_vagrant_version() # Loads config config = load_config(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # X11 handler x11 = X11Handler(args.x11, ('local', hostname), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) if use_chroot: cmd += '/busybox env -i ' else: cmd += '/usr/bin/env -i ' environ = x11.fix_env(run['environ']) environ = fixup_environment(environ, args) cmd += ' '.join('%s=%s' % (k, shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) uid = run.get('uid', 1000) gid = run.get('gid', 1000) if use_chroot: userspec = '%s:%s' % (uid, gid) cmd = ('chroot --userspec=%s /experimentroot ' '/bin/sh -c %s' % ( userspec, shell_escape(cmd))) else: cmd = 'sudo -u \'#%d\' sh -c %s' % (uid, shell_escape(cmd)) cmds.append(cmd) if use_chroot: cmds = ['chroot /experimentroot /bin/sh -c %s' % shell_escape(c) for c in x11.init_cmds] + cmds else: cmds = x11.init_cmds + cmds cmds = ' && '.join(cmds) # Sets the hostname to the original experiment's machine's # FIXME: not reentrant: this restores the Vagrant machine's hostname after # the run, which might cause issues if several "reprounzip vagrant run" are # running at once cmds = ('OLD_HOSTNAME=$(/bin/hostname); /bin/hostname %s; ' % hostname + cmds + '; RES=$?; /bin/hostname "$OLD_HOSTNAME"; exit $RES') cmds = '/usr/bin/sudo /bin/sh -c %s' % shell_escape(cmds) # Gets vagrant SSH parameters info = machine_setup(target, unpacked_info['use_chroot']) signals.pre_run(target=target) interactive = not (args.no_stdin or os.environ.get('REPROUNZIP_NON_INTERACTIVE')) retcode = run_interactive(info, interactive, cmds, not args.no_pty, x11.port_forward) stderr.write("\r\n*** Command finished, status: %d\r\n" % retcode) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) write_dict(target, unpacked_info) signals.post_run(target=target, retcode=retcode)
def machine_setup(target, use_chroot): """Prepare the machine and get SSH parameters from ``vagrant ssh``. """ try: out = check_output(['vagrant', 'ssh-config'], cwd=target.path, stderr=subprocess.PIPE) except subprocess.CalledProcessError: # Makes sure the VM is running logging.info("Calling 'vagrant up'...") try: retcode = subprocess.check_call(['vagrant', 'up'], cwd=target.path) except OSError: logging.critical("vagrant executable not found") sys.exit(1) else: if retcode != 0: logging.critical("vagrant up failed with code %d", retcode) sys.exit(1) # Try again out = check_output(['vagrant', 'ssh-config'], cwd=target.path) vagrant_info = {} for line in out.split(b'\n'): line = line.strip().split(b' ', 1) if len(line) != 2: continue value = line[1].decode('utf-8') if len(value) >= 2 and value[0] == '"' and value[-1] == '"': # Vagrant should really be escaping special characters here, but # it's not -- https://github.com/mitchellh/vagrant/issues/6428 value = value[1:-1] vagrant_info[line[0].decode('utf-8').lower()] = value if 'identityfile' in vagrant_info: key_file = vagrant_info['identityfile'] else: key_file = Path('~/.vagrant.d/insecure_private_key').expand_user() info = dict(hostname=vagrant_info.get('hostname', '127.0.0.1'), port=int(vagrant_info.get('port', 2222)), username=vagrant_info.get('user', 'vagrant'), key_filename=key_file) logging.debug("SSH parameters from Vagrant: %s@%s:%s, key=%s", info['username'], info['hostname'], info['port'], info['key_filename']) if use_chroot: # Mount directories ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(IgnoreMissingKey()) ssh.connect(**info) chan = ssh.get_transport().open_session() chan.exec_command( '/usr/bin/sudo /bin/sh -c %s' % shell_escape( 'for i in dev proc; do ' 'if ! grep "^/experimentroot/$i$" /proc/mounts; then ' 'mount -o rbind /$i /experimentroot/$i; ' 'fi; ' 'done')) ssh.close() return info
def do_vistrails(target, pack=None, **kwargs): """Create a VisTrails workflow that runs the experiment. This is called from signals after an experiment has been setup by any unpacker. """ record_usage(do_vistrails=True) config = load_config(target / 'config.yml', canonical=True) # Writes VisTrails workflow bundle = target / 'vistrails.vt' logging.info("Writing VisTrails workflow %s...", bundle) vtdir = Path.tempdir(prefix='reprounzip_vistrails_') ids = IdScope() try: with vtdir.open('w', 'vistrail', encoding='utf-8', newline='\n') as fp: wf = Workflow(fp, ids) # Directory module, refering to this directory d = wf.add_module('%s:Directory' % rpz_id, rpz_version) wf.add_function(d, 'directory', [(directory_sig, str(target.resolve()))]) connect_from = d for i, run in enumerate(config.runs): inputs = sorted(n for n, f in iteritems(config.inputs_outputs) if i in f.read_runs) outputs = sorted(n for n, f in iteritems(config.inputs_outputs) if i in f.write_runs) ports = itertools.chain((('input', p) for p in inputs), (('output', p) for p in outputs)) # Run module r = wf.add_module('%s:Run' % rpz_id, rpz_version) wf.add_function(r, 'cmdline', [ (string_sig, ' '.join(shell_escape(arg) for arg in run['argv']))]) wf.add_function(r, 'run_number', [(integer_sig, i)]) # Port specs for input/output files for type_, name in ports: wf.add_port_spec(r, name, type_, [file_pkg_mod]) # Draw connection wf.connect(connect_from, experiment_sig, 'experiment', r, experiment_sig, 'experiment') connect_from = r wf.close() with bundle.open('wb') as fp: z = zipfile.ZipFile(fp, 'w') with vtdir.in_dir(): for path in Path('.').recursedir(): z.write(str(path)) z.close() finally: vtdir.rmtree()
def _print_package_info(pack, info, verbosity=1): print("Pack file: %s" % pack) print("\n----- Pack information -----") print("Compressed size: %s" % hsize(pack.size())) info_pack = info.get('pack') if info_pack: if 'total_size' in info_pack: print("Unpacked size: %s" % hsize(info_pack['total_size'])) if 'total_paths' in info_pack: print("Total packed paths: %d" % info_pack['total_paths']) if verbosity >= 3: print(" Files: %d" % info_pack['files']) print(" Directories: %d" % info_pack['dirs']) if info_pack.get('symlinks'): print(" Symbolic links: %d" % info_pack['symlinks']) if info_pack.get('hardlinks'): print(" Hard links: %d" % info_pack['hardlinks']) if info_pack.get('others'): print(" Unknown (what!?): %d" % info_pack['others']) print("\n----- Metadata -----") info_meta = info['meta'] if verbosity >= 3: print("Total paths: %d" % info_meta['total_paths']) print("Listed packed paths: %d" % info_meta['packed_paths']) if info_meta.get('packages'): print("Total software packages: %d" % info_meta['packages']) print("Packed software packages: %d" % info_meta['packed_packages']) if verbosity >= 3: print("Files from packed software packages: %d" % info_meta['packed_packages_files']) print("Files from unpacked software packages: %d" % info_meta['unpacked_packages_files']) if 'architecture' in info_meta: print("Architecture: %s (current: %s)" % (info_meta['architecture'], platform.machine().lower())) if 'distribution' in info_meta: distribution = ' '.join(t for t in info_meta['distribution'] if t) current_distribution = [distro.id(), distro.version()] current_distribution = ' '.join(t for t in current_distribution if t) print("Distribution: %s (current: %s)" % (distribution, current_distribution or "(not Linux)")) if 'runs' in info: runs = info['runs'] print("Runs (%d):" % len(runs)) for run in runs: cmdline = ' '.join(shell_escape(a) for a in run['argv']) if len(runs) == 1 and run['id'] == "run0": print(" %s" % cmdline) else: print(" %s: %s" % (run['id'], cmdline)) if verbosity >= 2: print(" wd: %s" % run['workingdir']) if 'signal' in run: print(" signal: %d" % run['signal']) else: print(" exitcode: %d" % run['exitcode']) if run.get('walltime') is not None: print(" walltime: %s" % run['walltime']) inputs_outputs = info.get('inputs_outputs') if inputs_outputs: if verbosity < 2: print("Inputs/outputs files (%d): %s" % (len(inputs_outputs), ", ".join(sorted(inputs_outputs)))) else: print("Inputs/outputs files (%d):" % len(inputs_outputs)) for name, f in sorted(inputs_outputs.items()): t = [] if f['read_runs']: t.append("in") if f['write_runs']: t.append("out") print(" %s (%s): %s" % (name, ' '.join(t), f['path'])) unpacker_status = info.get('unpacker_status') if unpacker_status: print("\n----- Unpackers -----") for s, n in [(COMPAT_OK, "Compatible"), (COMPAT_MAYBE, "Unknown"), (COMPAT_NO, "Incompatible")]: if s != COMPAT_OK and verbosity < 2: continue if s not in unpacker_status: continue upks = unpacker_status[s] print("%s (%d):" % (n, len(upks))) for upk_name, msg in upks: if msg is not None: print(" %s (%s)" % (upk_name, msg)) else: print(" %s" % upk_name)
def docker_run(args): """Runs the experiment in the container. """ target = Path(args.target[0]) unpacked_info = read_dict(target) cmdline = args.cmdline # Sanity check if args.detach and args.x11: logging.critical("Error: Can't use X11 forwarding if you're detaching") raise UsageError # Loads config config = load_config(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) # Get current image name if 'current_image' in unpacked_info: image = unpacked_info['current_image'] logging.debug("Running from image %s", image.decode('ascii')) else: logging.critical("Image doesn't exist yet, have you run setup/build?") sys.exit(1) # Name of new container if args.detach: container = make_unique_name(b'reprounzip_detached_') else: container = make_unique_name(b'reprounzip_run_') hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # X11 handler if args.x11: local_ip = get_local_addr() docker_host = local_ip if os.environ.get('DOCKER_HOST'): m = _dockerhost_re.match(os.environ['DOCKER_HOST']) if m is not None: docker_host = m.group(1) if args.tunneled_x11: x11 = X11Handler(True, ('internet', docker_host), args.x11_display) else: x11 = X11Handler(True, ('internet', local_ip), args.x11_display) if (docker_host != local_ip and docker_host != 'localhost' and not docker_host.startswith('127.') and not docker_host.startswith('192.168.99.')): ssh_cmdline = ' '.join( '-R*:%(p)d:127.0.0.1:%(p)d' % {'p': port} for port, connector in x11.port_forward) logging.warning( "You requested X11 forwarding but the Docker container " "appears to be running remotely. It is probable that it " "won't be able to connect to the local display. Creating " "a remote SSH tunnel and running with --tunneled-x11 " "might help (%s).", ssh_cmdline) else: x11 = X11Handler(False, ('local', hostname), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) cmd += '/busybox env -i ' environ = x11.fix_env(run['environ']) environ = fixup_environment(environ, args) cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) uid = run.get('uid', 1000) gid = run.get('gid', 1000) cmd = '/rpzsudo \'#%d\' \'#%d\' /busybox sh -c %s' % ( uid, gid, shell_escape(cmd)) cmds.append(cmd) cmds = x11.init_cmds + cmds cmds = ' && '.join(cmds) signals.pre_run(target=target) # Creates forwarders forwarders = [] for port, connector in x11.port_forward: forwarders.append(LocalForwarder(connector, port)) if args.detach: logging.info("Start container %s (detached)", container.decode('ascii')) retcode = interruptible_call(['docker', 'run', b'--name=' + container, '-h', hostname, '-d', '-t'] + args.docker_option + [image, '/busybox', 'sh', '-c', cmds]) if retcode != 0: logging.critical("docker run failed with code %d", retcode) subprocess.call(['docker', 'rm', '-f', container]) sys.exit(1) return # Run command in container logging.info("Starting container %s", container.decode('ascii')) retcode = interruptible_call(['docker', 'run', b'--name=' + container, '-h', hostname, '-i', '-t'] + args.docker_option + [image, '/busybox', 'sh', '-c', cmds]) if retcode != 0: logging.critical("docker run failed with code %d", retcode) subprocess.call(['docker', 'rm', '-f', container]) sys.exit(1) # Get exit status from "docker inspect" out = subprocess.check_output(['docker', 'inspect', container]) outjson = json.loads(out.decode('ascii')) if (outjson[0]["State"]["Running"] is not False or outjson[0]["State"]["Paused"] is not False): logging.error("Invalid container state after execution:\n%s", json.dumps(outjson[0]["State"])) retcode = outjson[0]["State"]["ExitCode"] stderr.write("\n*** Command finished, status: %d\n" % retcode) # Commit to create new image new_image = make_unique_name(b'reprounzip_image_') logging.info("Committing container %s to image %s", container.decode('ascii'), new_image.decode('ascii')) subprocess.check_call(['docker', 'commit', container, new_image]) # Update image name unpacked_info['current_image'] = new_image write_dict(target, unpacked_info) # Remove the container logging.info("Destroying container %s", container.decode('ascii')) retcode = subprocess.call(['docker', 'rm', container]) if retcode != 0: logging.error("Error deleting container %s", container.decode('ascii')) # Untag previous image, unless it is the initial_image if image != unpacked_info['initial_image']: logging.info("Untagging previous image %s", image.decode('ascii')) subprocess.check_call(['docker', 'rmi', image]) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) write_dict(target, unpacked_info) signals.post_run(target=target, retcode=retcode)
def vagrant_run(args): """Runs the experiment in the virtual machine. """ target = Path(args.target[0]) use_chroot = read_dict(target / '.reprounzip').get('use_chroot', True) cmdline = args.cmdline check_vagrant_version() # Loads config runs, packages, other_files = load_config(target / 'config.yml', True) selected_runs = get_runs(runs, args.run, cmdline) hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # X11 handler x11 = X11Handler(args.x11, ('local', hostname), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) cmd += '/usr/bin/env -i ' environ = x11.fix_env(run['environ']) cmd += ' '.join('%s=%s' % (k, shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) uid = run.get('uid', 1000) gid = run.get('gid', 1000) if use_chroot: userspec = '%s:%s' % (uid, gid) cmd = ('chroot --userspec=%s /experimentroot ' '/bin/sh -c %s' % (userspec, shell_escape(cmd))) else: cmd = 'sudo -u \'#%d\' sh -c %s' % (uid, shell_escape(cmd)) cmds.append(cmd) if use_chroot: cmds = [ 'chroot /experimentroot /bin/sh -c %s' % shell_escape(c) for c in x11.init_cmds ] + cmds else: cmds = x11.init_cmds + cmds cmds = ' && '.join(cmds) # Sets the hostname to the original experiment's machine's # FIXME: not reentrant: this restores the Vagrant machine's hostname after # the run, which might cause issues if several "reprounzip vagrant run" are # running at once cmds = ('OLD_HOSTNAME=$(/bin/hostname); /bin/hostname %s; ' % hostname + cmds + '; RES=$?; /bin/hostname "$OLD_HOSTNAME"; exit $RES') cmds = '/usr/bin/sudo /bin/sh -c %s' % shell_escape(cmds) # Gets vagrant SSH parameters info = get_ssh_parameters(target) signals.pre_run(target=target) interactive = not (args.no_stdin or os.environ.get('REPROUNZIP_NON_INTERACTIVE')) retcode = run_interactive(info, interactive, cmds, not args.no_pty, x11.port_forward) sys.stderr.write("\r\n*** Command finished, status: %d\r\n" % retcode) signals.post_run(target=target, retcode=retcode)
def finalize(self): if not self.docker_copy: self.build_directory.rmtree() return from_image = self.unpacked_info['current_image'] with self.build_directory.open('w', 'Dockerfile', encoding='utf-8', newline='\n') as dockerfile: dockerfile.write('FROM %s\n\n' % from_image.decode('ascii')) for src, target in self.docker_copy: # FIXME : spaces in filenames will probably break Docker dockerfile.write( 'COPY \\\n %s \\\n %s\n' % (shell_escape(str(src)), shell_escape(str(target)))) for src, target in self.docker_copy: uid = gid = None # Keep permissions if the file is already in there tar = tarfile.open(str(self.target / 'data.tgz'), 'r:*') try: info = tar.getmember( str(join_root(PosixPath(b'DATA'), target))) uid, gid = info.uid, info.gid except KeyError: pass # Otherwise default on the first run's UID/GID if uid is None: uid, gid = self.default_ownership # Lastly, use 1000 if uid is None: uid = gid = 1000 dockerfile.write('RUN ["/busybox", "chown", "%d:%d", %s]\n' % ( uid, gid, json.dumps(str(target)), )) image = make_unique_name(b'reprounzip_image_') retcode = subprocess.call(self.docker_cmd + ['build', '-t', image, '.'], cwd=self.build_directory.path) if retcode != 0: logger.critical("docker build failed with code %d", retcode) sys.exit(1) else: logger.info("New image created: %s", image.decode('ascii')) if from_image != self.unpacked_info['initial_image']: logger.info("Untagging previous image %s", from_image.decode('ascii')) retcode = subprocess.call(self.docker_cmd + ['rmi', from_image]) if retcode != 0: logger.warning( "Can't remove previous image, docker " "returned %d", retcode) self.unpacked_info['current_image'] = image write_dict(self.target, self.unpacked_info) self.build_directory.rmtree()
def vagrant_setup_create(args): """Sets up the experiment to be run in a Vagrant-built virtual machine. This can either build a chroot or not. If building a chroot, we do just like without Vagrant: we copy all the files and only get what's missing from the host. But we do install automatically the packages whose files are required. If not building a chroot, we install all the packages, and only unpack files that don't come from packages. In short: files from packages with packfiles=True will only be used if building a chroot. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) use_chroot = args.use_chroot mount_bind = args.bind_magic_dirs record_usage(use_chroot=use_chroot, mount_bind=mount_bind) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file tar = tarfile.open(str(pack), 'r:*') member = tar.getmember('METADATA/config.yml') member.name = 'config.yml' tar.extract(member, str(target)) tar.close() # Loads config runs, packages, other_files = load_config(target / 'config.yml', True) if args.base_image and args.base_image[0]: record_usage(vagrant_explicit_image=True) box = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, box = select_box(runs) logging.info("Using box %s", box) logging.debug("Distribution: %s", target_distribution or "unknown") # If using chroot, we might still need to install packages to get missing # (not packed) files if use_chroot: packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(vagrant_install_pkgs=True) logging.info( "Some packages were not packed, so we'll install and " "copy their files\n" "Packages that are missing:\n%s", ' '.join(pkg.name for pkg in packages)) if packages: try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error( "Need to install %d packages but couldn't select a " "package installer: %s", len(packages), e) target.mkdir(parents=True) # Writes setup script logging.info("Writing setup script %s...", target / 'setup.sh') with (target / 'setup.sh').open('w', encoding='utf-8', newline='\n') as fp: fp.write('#!/bin/sh\n\nset -e\n\n') if packages: # Updates package sources fp.write(installer.update_script()) fp.write('\n') # Installs necessary packages fp.write(installer.install_script(packages)) fp.write('\n') # TODO : Compare package versions (painful because of sh) # Untar if use_chroot: fp.write('\n' 'mkdir /experimentroot; cd /experimentroot\n') fp.write('tar zpxf /vagrant/experiment.rpz ' '--numeric-owner --strip=1 DATA\n') if mount_bind: fp.write('\n' 'mkdir -p /experimentroot/dev\n' 'mount -o rbind /dev /experimentroot/dev\n' 'mkdir -p /experimentroot/proc\n' 'mount -o rbind /proc /experimentroot/proc\n') for pkg in packages: fp.write('\n# Copies files from package %s\n' % pkg.name) for f in pkg.files: f = f.path dest = join_root(PosixPath('/experimentroot'), f) fp.write('mkdir -p %s\n' % shell_escape(unicode_(f.parent))) fp.write('cp -L %s %s\n' % (shell_escape( unicode_(f)), shell_escape(unicode_(dest)))) else: fp.write('\ncd /\n') paths = set() pathlist = [] dataroot = PosixPath('DATA') # Adds intermediate directories, and checks for existence in the # tar tar = tarfile.open(str(pack), 'r:*') for f in other_files: path = PosixPath('/') for c in f.path.components[1:]: path = path / c if path in paths: continue paths.add(path) datapath = join_root(dataroot, path) try: tar.getmember(str(datapath)) except KeyError: logging.info("Missing file %s", datapath) else: pathlist.append(unicode_(datapath)) tar.close() # FIXME : for some reason we need reversed() here, I'm not sure # why. Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files # TAR bug: there is no way to make --keep-old-files not report an # error if an existing file is encountered. --skip-old-files was # introduced too recently. Instead, we just ignore the exit status fp.write('tar zpxf /vagrant/experiment.rpz --keep-old-files ' '--numeric-owner --strip=1 %s || /bin/true\n' % ' '.join(shell_escape(p) for p in reversed(pathlist))) # Copies /bin/sh + dependencies if use_chroot: url = busybox_url(runs[0]['architecture']) fp.write(r''' mkdir -p /experimentroot/bin mkdir -p /experimentroot/usr/bin if [ ! -e /experimentroot/bin/sh -o ! -e /experimentroot/usr/bin/env ]; then wget --quiet -O /experimentroot/bin/busybox {url} chmod +x /experimentroot/bin/busybox fi [ -e /experimentroot/bin/sh ] || \ ln -s /bin/busybox /experimentroot/bin/sh [ -e /experimentroot/usr/bin/env ] || \ ln -s /bin/busybox /experimentroot/usr/bin/env '''.format(url=url)) # Copies pack logging.info("Copying pack file...") pack.copyfile(target / 'experiment.rpz') # Writes Vagrant file logging.info("Writing %s...", target / 'Vagrantfile') with (target / 'Vagrantfile').open('w', encoding='utf-8', newline='\n') as fp: # Vagrant header and version fp.write('# -*- mode: ruby -*-\n' '# vi: set ft=ruby\n\n' 'VAGRANTFILE_API_VERSION = "2"\n\n' 'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n') # Selects which box to install fp.write(' config.vm.box = "%s"\n' % box) # Run the setup script on the virtual machine fp.write(' config.vm.provision "shell", path: "setup.sh"\n') fp.write('end\n') # Meta-data for reprounzip write_dict(target / '.reprounzip', {'use_chroot': use_chroot}) signals.post_setup(target=target)
def vagrant_run(args): """Runs the experiment in the virtual machine. """ target = Path(args.target[0]) unpacked_info = read_dict(target) use_chroot = unpacked_info['use_chroot'] cmdline = args.cmdline check_vagrant_version() # Loads config config = load_config(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # Port forwarding ports = parse_ports(args.expose_port) # If the requested ports are not a subset of the ones already set on the # VM, we have to update the Vagrantfile and issue `vagrant reload`, which # will reboot the machine req_ports = set(ports) set_ports = set(unpacked_info.get('ports', [])) if not req_ports.issubset(set_ports): # Build new set of forwarded ports: the ones already set + the one just # requested # The ones we request now override the previous config all_ports = dict( (host, (guest, proto)) for host, guest, proto in set_ports) for host, guest, proto in req_ports: all_ports[host] = guest, proto unpacked_info['ports'] = sorted( (host, guest, proto) for host, (guest, proto) in iteritems(all_ports)) write_vagrantfile(target, unpacked_info) logger.info("Some requested ports are not yet forwarded, running " "'vagrant reload'") retcode = subprocess.call(['vagrant', 'reload', '--no-provision'], cwd=target.path) if retcode != 0: logger.critical("vagrant reload failed with code %d, aborting", retcode) sys.exit(1) write_dict(target, unpacked_info) # X11 handler if unpacked_info['gui']: x11 = LocalX11Handler() else: x11 = X11Handler(args.x11, ('local', hostname), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) if use_chroot: cmd += '/busybox env -i ' else: cmd += '/usr/bin/env -i ' environ = x11.fix_env(run['environ']) environ = fixup_environment(environ, args) cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) uid = run.get('uid', 1000) gid = run.get('gid', 1000) if use_chroot: userspec = '%s:%s' % (uid, gid) cmd = ('chroot --userspec=%s /experimentroot ' '/bin/sh -c %s' % (userspec, shell_escape(cmd))) else: cmd = 'sudo -u \'#%d\' sh -c %s' % (uid, shell_escape(cmd)) cmds.append(cmd) if use_chroot: cmds = [ 'chroot /experimentroot /bin/sh -c %s' % shell_escape(c) for c in x11.init_cmds ] + cmds else: cmds = x11.init_cmds + cmds cmds = ' && '.join(cmds) # Sets the hostname to the original experiment's machine's # FIXME: not reentrant: this restores the Vagrant machine's hostname after # the run, which might cause issues if several "reprounzip vagrant run" are # running at once cmds = ('OLD_HOSTNAME=$(/bin/hostname); /bin/hostname %s; ' % hostname + cmds + '; RES=$?; /bin/hostname "$OLD_HOSTNAME"; exit $RES') cmds = '/usr/bin/sudo /bin/sh -c %s' % shell_escape(cmds) # Gets vagrant SSH parameters info = machine_setup(target) signals.pre_run(target=target) interactive = not (args.no_stdin or os.environ.get('REPROUNZIP_NON_INTERACTIVE')) retcode = run_interactive(info, interactive, cmds, not args.no_pty, x11.port_forward) stderr.write("\r\n*** Command finished, status: %d\r\n" % retcode) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) write_dict(target, unpacked_info) signals.post_run(target=target, retcode=retcode)
def vagrant_setup_create(args): """Sets up the experiment to be run in a Vagrant-built virtual machine. This can either build a chroot or not. If building a chroot, we do just like without Vagrant: we copy all the files and only get what's missing from the host. But we do install automatically the packages whose files are required. If not building a chroot, we install all the packages, and only unpack files that don't come from packages. In short: files from packages with packfiles=True will only be used if building a chroot. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) use_chroot = args.use_chroot mount_bind = args.bind_magic_dirs record_usage(use_chroot=use_chroot, mount_bind=mount_bind) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config runs, packages, other_files = config = load_config(target / 'config.yml', True) if not args.memory: memory = None else: try: memory = int(args.memory[-1]) except ValueError: logging.critical("Invalid value for memory size: %r", args.memory) sys.exit(1) if args.base_image and args.base_image[0]: record_usage(vagrant_explicit_image=True) box = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, box = select_box(runs, gui=args.gui) logging.info("Using box %s", box) logging.debug("Distribution: %s", target_distribution or "unknown") # If using chroot, we might still need to install packages to get missing # (not packed) files if use_chroot: packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(vagrant_install_pkgs=True) logging.info("Some packages were not packed, so we'll install and " "copy their files\n" "Packages that are missing:\n%s", ' '.join(pkg.name for pkg in packages)) if packages: try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error("Need to install %d packages but couldn't select a " "package installer: %s", len(packages), e) target.mkdir(parents=True) try: # Writes setup script logging.info("Writing setup script %s...", target / 'setup.sh') with (target / 'setup.sh').open('w', encoding='utf-8', newline='\n') as fp: fp.write('#!/bin/sh\n\nset -e\n\n') if packages: # Updates package sources update_script = installer.update_script() if update_script: fp.write(update_script) fp.write('\n') # Installs necessary packages fp.write(installer.install_script(packages)) fp.write('\n') # TODO : Compare package versions (painful because of sh) # Untar if use_chroot: fp.write('\n' 'mkdir /experimentroot; cd /experimentroot\n') fp.write('tar zpxf /vagrant/data.tgz --numeric-owner ' '--strip=1 %s\n' % rpz_pack.data_prefix) if mount_bind: fp.write('\n' 'mkdir -p /experimentroot/dev\n' 'mkdir -p /experimentroot/proc\n') for pkg in packages: fp.write('\n# Copies files from package %s\n' % pkg.name) for f in pkg.files: f = f.path dest = join_root(PosixPath('/experimentroot'), f) fp.write('mkdir -p %s\n' % shell_escape(unicode_(f.parent))) fp.write('cp -L %s %s\n' % ( shell_escape(unicode_(f)), shell_escape(unicode_(dest)))) fp.write( '\n' 'cp /etc/resolv.conf /experimentroot/etc/resolv.conf\n') else: fp.write('\ncd /\n') paths = set() pathlist = [] # Adds intermediate directories, and checks for existence in # the tar logging.info("Generating file list...") data_files = rpz_pack.data_filenames() for f in other_files: if f.path.name == 'resolv.conf' and ( f.path.lies_under('/etc') or f.path.lies_under('/run') or f.path.lies_under('/var')): continue path = PosixPath('/') for c in rpz_pack.remove_data_prefix(f.path).components: path = path / c if path in paths: continue paths.add(path) if path in data_files: pathlist.append(path) else: logging.info("Missing file %s", path) # FIXME : for some reason we need reversed() here, I'm not sure # why. Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files # TAR bug: there is no way to make --keep-old-files not report # an error if an existing file is encountered. --skip-old-files # was introduced too recently. Instead, we just ignore the exit # status with (target / 'rpz-files.list').open('wb') as lfp: for p in reversed(pathlist): lfp.write(join_root(rpz_pack.data_prefix, p).path) lfp.write(b'\0') fp.write('tar zpxf /vagrant/data.tgz --keep-old-files ' '--numeric-owner --strip=1 ' '--null -T /vagrant/rpz-files.list || /bin/true\n') # Copies busybox if use_chroot: arch = runs[0]['architecture'] download_file(busybox_url(arch), target / 'busybox', 'busybox-%s' % arch) fp.write(r''' cp /vagrant/busybox /experimentroot/busybox chmod +x /experimentroot/busybox mkdir -p /experimentroot/bin [ -e /experimentroot/bin/sh ] || \ ln -s /busybox /experimentroot/bin/sh ''') # Copies pack logging.info("Copying pack file...") rpz_pack.copy_data_tar(target / 'data.tgz') rpz_pack.close() # Writes Vagrant file logging.info("Writing %s...", target / 'Vagrantfile') with (target / 'Vagrantfile').open('w', encoding='utf-8', newline='\n') as fp: # Vagrant header and version fp.write( '# -*- mode: ruby -*-\n' '# vi: set ft=ruby\n\n' 'VAGRANTFILE_API_VERSION = "2"\n\n' 'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n') # Selects which box to install fp.write(' config.vm.box = "%s"\n' % box) # Run the setup script on the virtual machine fp.write(' config.vm.provision "shell", path: "setup.sh"\n') # Memory size if memory is not None or args.gui: fp.write(' config.vm.provider "virtualbox" do |v|\n') if memory is not None: fp.write(' v.memory = %d\n' % memory) if args.gui: fp.write(' v.gui = true\n') fp.write(' end\n') fp.write('end\n') # Meta-data for reprounzip write_dict(target, metadata_initial_iofiles(config, {'use_chroot': use_chroot, 'gui': args.gui})) signals.post_setup(target=target, pack=pack) except Exception: target.rmtree(ignore_errors=True) raise
def machine_setup(target): """Prepare the machine and get SSH parameters from ``vagrant ssh``. """ try: out = subprocess.check_output(['vagrant', 'ssh-config'], cwd=target.path, stderr=subprocess.PIPE) except subprocess.CalledProcessError: # Makes sure the VM is running logging.info("Calling 'vagrant up'...") try: retcode = subprocess.check_call(['vagrant', 'up'], cwd=target.path) except OSError: logging.critical("vagrant executable not found") sys.exit(1) else: if retcode != 0: logging.critical("vagrant up failed with code %d", retcode) sys.exit(1) # Try again out = subprocess.check_output(['vagrant', 'ssh-config'], cwd=target.path) vagrant_info = {} for line in out.split(b'\n'): line = line.strip().split(b' ', 1) if len(line) != 2: continue value = line[1].decode('utf-8') if len(value) >= 2 and value[0] == '"' and value[-1] == '"': # Vagrant should really be escaping special characters here, but # it's not -- https://github.com/mitchellh/vagrant/issues/6428 value = value[1:-1] vagrant_info[line[0].decode('utf-8').lower()] = value if 'identityfile' in vagrant_info: key_file = vagrant_info['identityfile'] else: key_file = Path('~/.vagrant.d/insecure_private_key').expand_user() info = dict(hostname=vagrant_info.get('hostname', '127.0.0.1'), port=int(vagrant_info.get('port', 2222)), username=vagrant_info.get('user', 'vagrant'), key_filename=key_file) logging.debug("SSH parameters from Vagrant: %s@%s:%s, key=%s", info['username'], info['hostname'], info['port'], info['key_filename']) unpacked_info = read_dict(target) use_chroot = unpacked_info['use_chroot'] gui = unpacked_info['gui'] if use_chroot: # Mount directories ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(IgnoreMissingKey()) ssh.connect(**info) chan = ssh.get_transport().open_session() chan.exec_command( '/usr/bin/sudo /bin/sh -c %s' % shell_escape( 'for i in dev proc; do ' 'if ! grep "^/experimentroot/$i$" /proc/mounts; then ' 'mount -o rbind /$i /experimentroot/$i; ' 'fi; ' 'done')) if chan.recv_exit_status() != 0: logging.critical("Couldn't mount directories in chroot") sys.exit(1) if gui: # Mount X11 socket chan = ssh.get_transport().open_session() chan.exec_command( '/usr/bin/sudo /bin/sh -c %s' % shell_escape( 'if [ -d /tmp/.X11-unix ]; then ' '[ -d /experimentroot/tmp/.X11-unix ] || ' 'mkdir /experimentroot/tmp/.X11-unix; ' 'mount -o bind ' '/tmp/.X11-unix /experimentroot/tmp/.X11-unix; ' 'fi; exit 0')) if chan.recv_exit_status() != 0: logging.critical("Couldn't mount X11 sockets in chroot") sys.exit(1) ssh.close() return info
def _print_package_info(pack, info, verbosity=1): print("Pack file: %s" % pack) print("\n----- Pack information -----") print("Compressed size: %s" % hsize(pack.size())) info_pack = info.get('pack') if info_pack: if 'total_size' in info_pack: print("Unpacked size: %s" % hsize(info_pack['total_size'])) if 'total_paths' in info_pack: print("Total packed paths: %d" % info_pack['total_paths']) if verbosity >= 3: print(" Files: %d" % info_pack['files']) print(" Directories: %d" % info_pack['dirs']) if info_pack.get('symlinks'): print(" Symbolic links: %d" % info_pack['symlinks']) if info_pack.get('hardlinks'): print(" Hard links: %d" % info_pack['hardlinks']) if info_pack.get('others'): print(" Unknown (what!?): %d" % info_pack['others']) print("\n----- Metadata -----") info_meta = info['meta'] if verbosity >= 3: print("Total paths: %d" % info_meta['total_paths']) print("Listed packed paths: %d" % info_meta['packed_paths']) if info_meta.get('packages'): print("Total software packages: %d" % info_meta['packages']) print("Packed software packages: %d" % info_meta['packed_packages']) if verbosity >= 3: print("Files from packed software packages: %d" % info_meta['packed_packages_files']) print("Files from unpacked software packages: %d" % info_meta['unpacked_packages_files']) if 'architecture' in info_meta: print("Architecture: %s (current: %s)" % (info_meta['architecture'], platform.machine().lower())) if 'distribution' in info_meta: distribution = ' '.join(t for t in info_meta['distribution'] if t) current_distribution = [distro.id(), distro.version()] current_distribution = ' '.join(t for t in current_distribution if t) print("Distribution: %s (current: %s)" % ( distribution, current_distribution or "(not Linux)")) if 'runs' in info: runs = info['runs'] print("Runs (%d):" % len(runs)) for run in runs: cmdline = ' '.join(shell_escape(a) for a in run['argv']) if len(runs) == 1 and run['id'] == "run0": print(" %s" % cmdline) else: print(" %s: %s" % (run['id'], cmdline)) if verbosity >= 2: print(" wd: %s" % run['workingdir']) if 'signal' in run: print(" signal: %d" % run['signal']) else: print(" exitcode: %d" % run['exitcode']) if run.get('walltime') is not None: print(" walltime: %s" % run['walltime']) inputs_outputs = info.get('inputs_outputs') if inputs_outputs: if verbosity < 2: print("Inputs/outputs files (%d): %s" % ( len(inputs_outputs), ", ".join(sorted(inputs_outputs)))) else: print("Inputs/outputs files (%d):" % len(inputs_outputs)) for name, f in sorted(iteritems(inputs_outputs)): t = [] if f['read_runs']: t.append("in") if f['write_runs']: t.append("out") print(" %s (%s): %s" % (name, ' '.join(t), f['path'])) unpacker_status = info.get('unpacker_status') if unpacker_status: print("\n----- Unpackers -----") for s, n in [(COMPAT_OK, "Compatible"), (COMPAT_MAYBE, "Unknown"), (COMPAT_NO, "Incompatible")]: if s != COMPAT_OK and verbosity < 2: continue if s not in unpacker_status: continue upks = unpacker_status[s] print("%s (%d):" % (n, len(upks))) for upk_name, msg in upks: if msg is not None: print(" %s (%s)" % (upk_name, msg)) else: print(" %s" % upk_name)
def docker_run(args): """Runs the experiment in the container. """ target = Path(args.target[0]) unpacked_info = read_dict(target) cmdline = args.cmdline # Sanity check if args.detach and args.x11: logging.critical("Error: Can't use X11 forwarding if you're detaching") raise UsageError # Loads config config = load_config(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) # Get current image name if 'current_image' in unpacked_info: image = unpacked_info['current_image'] logging.debug("Running from image %s", image.decode('ascii')) else: logging.critical("Image doesn't exist yet, have you run setup/build?") sys.exit(1) # Name of new container if args.detach: container = make_unique_name(b'reprounzip_detached_') else: container = make_unique_name(b'reprounzip_run_') hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # Port forwarding port_options = [] for port_host, port_container, proto in parse_ports(args.expose_port): port_options.extend(['-p', '%s:%s%s' % (port_host, port_container, proto)]) # X11 handler if args.x11: local_ip = get_local_addr() docker_host = local_ip if os.environ.get('DOCKER_HOST'): m = _dockerhost_re.match(os.environ['DOCKER_HOST']) if m is not None: docker_host = m.group(1) if args.tunneled_x11: x11 = X11Handler(True, ('internet', docker_host), args.x11_display) else: x11 = X11Handler(True, ('internet', local_ip), args.x11_display) if (docker_host != local_ip and docker_host != 'localhost' and not docker_host.startswith('127.') and not docker_host.startswith('192.168.99.')): ssh_cmdline = ' '.join( '-R*:%(p)d:127.0.0.1:%(p)d' % {'p': port} for port, connector in x11.port_forward) logging.warning( "You requested X11 forwarding but the Docker container " "appears to be running remotely. It is probable that it " "won't be able to connect to the local display. Creating " "a remote SSH tunnel and running with --tunneled-x11 " "might help (%s).", ssh_cmdline) else: x11 = X11Handler(False, ('local', hostname), args.x11_display) cmd = [] for run_number in selected_runs: run = runs[run_number] env_set, env_unset = x11.env_fixes(run['environ']) a_env_set, a_env_unset = parse_environment_args(args) env_set.update(a_env_set) env_unset.extend(a_env_unset) if env_set or env_unset: cmd.append('env') env = [] for k in env_unset: env.append('-u') env.append(shell_escape(k)) for k, v in iteritems(env_set): env.append('%s=%s' % (shell_escape(k), shell_escape(v))) cmd.append(' '.join(env)) # FIXME : Use exec -a or something if binary != argv[0] if cmdline is not None: cmd.append('cmd') cmd.append(' '.join(shell_escape(a) for a in cmdline)) cmd.append('run') cmd.append('%d' % run_number) cmd = list(chain.from_iterable([['do', shell_escape(c)] for c in x11.init_cmds] + [cmd])) if logging.getLogger().isEnabledFor(logging.DEBUG): logging.debug("Passing arguments to Docker image:") for c in cmd: logging.debug(c) signals.pre_run(target=target) # Creates forwarders forwarders = [] for port, connector in x11.port_forward: forwarders.append(LocalForwarder(connector, port)) if args.detach: logging.info("Start container %s (detached)", container.decode('ascii')) retcode = interruptible_call(args.docker_cmd.split() + ['run', b'--name=' + container, '-h', hostname, '-d', '-t'] + port_options + args.docker_option + [image] + cmd) if retcode != 0: logging.critical("docker run failed with code %d", retcode) subprocess.call(['docker', 'rm', '-f', container]) sys.exit(1) return # Run command in container logging.info("Starting container %s", container.decode('ascii')) retcode = interruptible_call(args.docker_cmd.split() + ['run', b'--name=' + container, '-h', hostname, '-i', '-t'] + port_options + args.docker_option + [image] + cmd) if retcode != 0: logging.critical("docker run failed with code %d", retcode) subprocess.call(['docker', 'rm', '-f', container]) sys.exit(1) # Get exit status from "docker inspect" out = subprocess.check_output(args.docker_cmd.split() + ['inspect', container]) outjson = json.loads(out.decode('ascii')) if (outjson[0]["State"]["Running"] is not False or outjson[0]["State"]["Paused"] is not False): logging.error("Invalid container state after execution:\n%s", json.dumps(outjson[0]["State"])) retcode = outjson[0]["State"]["ExitCode"] stderr.write("\n*** Command finished, status: %d\n" % retcode) # Commit to create new image new_image = make_unique_name(b'reprounzip_image_') logging.info("Committing container %s to image %s", container.decode('ascii'), new_image.decode('ascii')) subprocess.check_call(args.docker_cmd.split() + ['commit', container, new_image]) # Update image name unpacked_info['current_image'] = new_image write_dict(target, unpacked_info) # Remove the container logging.info("Destroying container %s", container.decode('ascii')) retcode = subprocess.call(args.docker_cmd.split() + ['rm', container]) if retcode != 0: logging.error("Error deleting container %s", container.decode('ascii')) # Untag previous image, unless it is the initial_image if image != unpacked_info['initial_image']: logging.info("Untagging previous image %s", image.decode('ascii')) subprocess.check_call(args.docker_cmd.split() + ['rmi', image]) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) write_dict(target, unpacked_info) signals.post_run(target=target, retcode=retcode)
def docker_setup_create(args): """Sets up the experiment to be run in a Docker-built container. """ pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logger.critical("Target directory exists") sys.exit(1) signals.pre_setup(target=target, pack=pack) target.mkdir() try: # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config runs, packages, other_files = config = load_config( target / 'config.yml', True) if args.base_image: record_usage(docker_explicit_base=True) base_image = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, base_image = select_image(runs) logger.info("Using base image %s", base_image) logger.debug("Distribution: %s", target_distribution or "unknown") rpz_pack.copy_data_tar(target / 'data.tgz') arch = runs[0]['architecture'] # Writes Dockerfile logger.info("Writing %s...", target / 'Dockerfile') with (target / 'Dockerfile').open('w', encoding='utf-8', newline='\n') as fp: fp.write('FROM %s\n\n' % base_image) # Installs busybox download_file(busybox_url(arch), target / 'busybox', 'busybox-%s' % arch) fp.write('COPY busybox /busybox\n') # Installs rpzsudo download_file(sudo_url(arch), target / 'rpzsudo', 'rpzsudo-%s' % arch) fp.write('COPY rpzsudo /rpzsudo\n\n') fp.write('COPY data.tgz /reprozip_data.tgz\n\n') fp.write('COPY rpz-files.list /rpz-files.list\n') fp.write('RUN \\\n' ' chmod +x /busybox /rpzsudo && \\\n') if args.install_pkgs: # Install every package through package manager missing_packages = [] else: # Only install packages that were not packed missing_packages = [pkg for pkg in packages if pkg.packfiles] packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(docker_install_pkgs=True) try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logger.error("Need to install %d packages but couldn't " "select a package installer: %s", len(packages), e) sys.exit(1) # Updates package sources update_script = installer.update_script() if update_script: fp.write(' %s && \\\n' % update_script) # Installs necessary packages fp.write(' %s && \\\n' % installer.install_script(packages)) logger.info("Dockerfile will install the %d software " "packages that were not packed", len(packages)) else: record_usage(docker_install_pkgs=False) # Untar paths = set() pathlist = [] # Add intermediate directories, and check for existence in the tar logger.info("Generating file list...") missing_files = chain.from_iterable(pkg.files for pkg in missing_packages) data_files = rpz_pack.data_filenames() listoffiles = list(chain(other_files, missing_files)) for f in listoffiles: if f.path.name == 'resolv.conf' and ( f.path.lies_under('/etc') or f.path.lies_under('/run') or f.path.lies_under('/var')): continue path = PosixPath('/') for c in rpz_pack.remove_data_prefix(f.path).components: path = path / c if path in paths: continue paths.add(path) if path in data_files: pathlist.append(path) else: logger.info("Missing file %s", path) rpz_pack.close() # FIXME : for some reason we need reversed() here, I'm not sure why # Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files with (target / 'rpz-files.list').open('wb') as lfp: for p in reversed(pathlist): lfp.write(join_root(rpz_pack.data_prefix, p).path) lfp.write(b'\0') fp.write(' cd / && ' '(tar zpxf /reprozip_data.tgz -U --recursive-unlink ' '--numeric-owner --strip=1 --null -T /rpz-files.list || ' '/busybox echo "TAR reports errors, this might or might ' 'not prevent the execution to run")\n') # Setup entry point fp.write('COPY rpz_entrypoint.sh /rpz_entrypoint.sh\n' 'ENTRYPOINT ["/busybox", "sh", "/rpz_entrypoint.sh"]\n') # Write entry point script logger.info("Writing %s...", target / 'rpz_entrypoint.sh') with (target / 'rpz_entrypoint.sh').open('w', encoding='utf-8', newline='\n') as fp: # The entrypoint gets some arguments from the run command # By default, it just does all the runs # "run N" executes the run with that number # "cmd STR" sets a replacement command-line for the next run # "do STR" executes a command as-is fp.write( '#!/bin/sh\n' '\n' 'COMMAND=\n' 'ENVVARS=\n' '\n' 'if [ $# = 0 ]; then\n' ' exec /busybox sh /rpz_entrypoint.sh') for nb in irange(len(runs)): fp.write(' run %d' % nb) fp.write( '\n' 'fi\n' '\n' 'while [ $# != 0 ]; do\n' ' case "$1" in\n' ' help)\n' ' echo "Image built from reprounzip-docker" >&2\n' ' echo "Usage: docker run <image> [cmd "word [word ' '...]"] [run <R>]" >&2\n' ' echo " \\`cmd ...\\` changes the command for ' 'the next \\`run\\` option" >&2\n' ' echo " \\`run <name|number>\\` runs the ' 'specified run" >&2\n' ' echo "By default, all the runs are executed." ' '>&2\n' ' echo "The runs in this image are:" >&2\n') for run in runs: fp.write( ' echo " {name}: {cmdline}" >&2\n'.format( name=run['id'], cmdline=' '.join(shell_escape(a) for a in run['argv']))) fp.write( ' exit 0\n' ' ;;\n' ' do)\n' ' shift\n' ' $1\n' ' ;;\n' ' env)\n' ' shift\n' ' ENVVARS="$1"\n' ' ;;\n' ' cmd)\n' ' shift\n' ' COMMAND="$1"\n' ' ;;\n' ' run)\n' ' shift\n' ' case "$1" in\n') for i, run in enumerate(runs): cmdline = ' '.join([run['binary']] + run['argv'][1:]) fp.write( ' {name})\n' ' RUNCOMMAND={cmd}\n' ' RUNWD={wd}\n' ' RUNENV={env}\n' ' RUNUID={uid}\n' ' RUNGID={gid}\n' ' ;;\n'.format( name='%s|%d' % (run['id'], i), cmd=shell_escape(cmdline), wd=shell_escape(run['workingdir']), env=shell_escape(' '.join( '%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(run['environ']))), uid=run.get('uid', 1000), gid=run.get('gid', 1000))) fp.write( ' *)\n' ' echo "RPZ: Unknown run $1" >&2\n' ' exit 1\n' ' ;;\n' ' esac\n' ' if [ -n "$COMMAND" ]; then\n' ' RUNCOMMAND="$COMMAND"\n' ' COMMAND=\n' ' fi\n' ' export RUNWD; export RUNENV; export ENVVARS; ' 'export RUNCOMMAND\n' ' /rpzsudo "#$RUNUID" "#$RUNGID" /busybox sh -c ' '"cd \\"\\$RUNWD\\" && /busybox env -i $RUNENV $ENVVARS ' '$RUNCOMMAND; echo \\"*** Command finished, status: \\$?\\""\n' ' ENVVARS=\n' ' ;;\n' ' *)\n' ' echo "RPZ: Unknown option $1" >&2\n' ' exit 1\n' ' ;;\n' ' esac\n' ' shift\n' 'done\n') # Meta-data for reprounzip write_dict(target, metadata_initial_iofiles(config)) signals.post_setup(target=target, pack=pack) except Exception: target.rmtree(ignore_errors=True) raise
def directory_run(args): """Runs the command in the directory. """ target = Path(args.target[0]) read_dict(target / '.reprounzip', 'directory') cmdline = args.cmdline # Loads config runs, packages, other_files = load_config_file(target / 'config.yml', True) selected_runs = get_runs(runs, args.run, cmdline) root = (target / 'root').absolute() # Gets library paths lib_dirs = [] p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'], stdout=subprocess.PIPE) try: for l in p.stdout: if len(l) < 3 or l[0] in (b' ', b'\t'): continue if l.endswith(b':\n'): lib_dirs.append(Path(l[:-2])) finally: p.wait() lib_dirs = ( 'export LD_LIBRARY_PATH=%s' % ':'.join(shell_escape(unicode_(join_root(root, d))) for d in lib_dirs)) cmds = [lib_dirs] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape( unicode_(join_root(root, Path(run['workingdir'])))) cmd += '/usr/bin/env -i ' environ = run['environ'] if args.x11: if 'DISPLAY' in os.environ: environ['DISPLAY'] = os.environ['DISPLAY'] if 'XAUTHORITY' in os.environ: environ['XAUTHORITY'] = os.environ['XAUTHORITY'] cmd += ' '.join('%s=%s' % (k, shell_escape(v)) for k, v in iteritems(environ) if k != 'PATH') cmd += ' ' # PATH # Get the original PATH components path = [ PosixPath(d) for d in run['environ'].get('PATH', '').split(':') ] # The same paths but in the directory dir_path = [join_root(root, d) for d in path if d.root == '/'] # Rebuild string path = ':'.join(unicode_(d) for d in dir_path + path) cmd += 'PATH=%s ' % shell_escape(path) # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = run['argv'] # Rewrites command-line arguments that are absolute filenames rewritten = False for i in irange(len(argv)): try: p = Path(argv[i]) except UnicodeEncodeError: continue if p.is_absolute: rp = join_root(root, p) if (rp.exists() or (len(rp.components) > 3 and rp.parent.exists())): argv[i] = str(rp) rewritten = True if rewritten: logging.warning("Rewrote command-line as: %s", ' '.join(shell_escape(a) for a in argv)) else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) cmds.append(cmd) cmds = ' && '.join(cmds) signals.pre_run(target=target) retcode = interruptible_call(cmds, shell=True) stderr.write("\n*** Command finished, status: %d\n" % retcode) signals.post_run(target=target, retcode=retcode)
def docker_run(args): """Runs the experiment in the container. """ target = Path(args.target[0]) unpacked_info = read_dict(target) cmdline = args.cmdline # Sanity check if args.detach and args.x11: logger.critical("Error: Can't use X11 forwarding if you're detaching") raise UsageError # Loads config config = load_config(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) # Get current image name if 'current_image' in unpacked_info: image = unpacked_info['current_image'] logger.debug("Running from image %s", image.decode('ascii')) else: logger.critical("Image doesn't exist yet, have you run setup/build?") sys.exit(1) # Name of new container if args.detach: container = make_unique_name(b'reprounzip_detached_') else: container = make_unique_name(b'reprounzip_run_') hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # Port forwarding port_options = [] for port_host, port_container, proto in parse_ports(args.expose_port): port_options.extend(['-p', '%s:%s/%s' % (port_host, port_container, proto)]) # X11 handler if args.x11: local_ip = get_local_addr() docker_host = local_ip if os.environ.get('DOCKER_HOST'): m = _dockerhost_re.match(os.environ['DOCKER_HOST']) if m is not None: docker_host = m.group(1) if args.tunneled_x11: x11 = X11Handler(True, ('internet', docker_host), args.x11_display) else: x11 = X11Handler(True, ('internet', local_ip), args.x11_display) if (docker_host != local_ip and docker_host != 'localhost' and not docker_host.startswith('127.') and not docker_host.startswith('192.168.99.')): ssh_cmdline = ' '.join( '-R*:%(p)d:127.0.0.1:%(p)d' % {'p': port} for port, connector in x11.port_forward) logger.warning( "You requested X11 forwarding but the Docker container " "appears to be running remotely. It is probable that it " "won't be able to connect to the local display. Creating " "a remote SSH tunnel and running with --tunneled-x11 " "might help (%s).", ssh_cmdline) else: x11 = X11Handler(False, ('local', hostname), args.x11_display) cmd = [] for run_number in selected_runs: run = runs[run_number] env_set, env_unset = x11.env_fixes(run['environ']) a_env_set, a_env_unset = parse_environment_args(args) env_set.update(a_env_set) env_unset.extend(a_env_unset) if env_set or env_unset: cmd.append('env') env = [] for k in env_unset: env.append('-u') env.append(shell_escape(k)) for k, v in iteritems(env_set): env.append('%s=%s' % (shell_escape(k), shell_escape(v))) cmd.append(' '.join(env)) # FIXME : Use exec -a or something if binary != argv[0] if cmdline is not None: cmd.append('cmd') cmd.append(' '.join(shell_escape(a) for a in cmdline)) cmd.append('run') cmd.append('%d' % run_number) cmd = list(chain.from_iterable([['do', shell_escape(c)] for c in x11.init_cmds] + [cmd])) if logger.isEnabledFor(logging.DEBUG): logger.debug("Passing arguments to Docker image:") for c in cmd: logger.debug(c) signals.pre_run(target=target) # Creates forwarders forwarders = [] for port, connector in x11.port_forward: forwarders.append(LocalForwarder(connector, port)) if args.detach: logger.info("Start container %s (detached)", container.decode('ascii')) retcode = interruptible_call(args.docker_cmd.split() + ['run', b'--name=' + container, '-h', hostname, '-d', '-t'] + port_options + args.docker_option + [image] + cmd) if retcode != 0: logger.critical("docker run failed with code %d", retcode) subprocess.call(['docker', 'rm', '-f', container]) sys.exit(1) return # Run command in container logger.info("Starting container %s", container.decode('ascii')) retcode = interruptible_call(args.docker_cmd.split() + ['run', b'--name=' + container, '-h', hostname, '-i', '-t'] + port_options + args.docker_option + [image] + cmd, request_tty=True) # The image prints out the exit status(es) itself if retcode != 0: logger.critical("docker run failed with code %d", retcode) subprocess.call(['docker', 'rm', '-f', container]) sys.exit(1) # Commit to create new image new_image = make_unique_name(b'reprounzip_image_') logger.info("Committing container %s to image %s", container.decode('ascii'), new_image.decode('ascii')) subprocess.check_call(args.docker_cmd.split() + ['commit', container, new_image]) # Update image name unpacked_info['current_image'] = new_image write_dict(target, unpacked_info) # Remove the container logger.info("Destroying container %s", container.decode('ascii')) retcode = subprocess.call(args.docker_cmd.split() + ['rm', container]) if retcode != 0: logger.error("Error deleting container %s", container.decode('ascii')) # Untag previous image, unless it is the initial_image if image != unpacked_info['initial_image']: logger.info("Untagging previous image %s", image.decode('ascii')) subprocess.check_call(args.docker_cmd.split() + ['rmi', image]) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) write_dict(target, unpacked_info) signals.post_run(target=target, retcode=retcode)
def generate(target, configfile, database): """Go over the trace and generate the graph file. """ # Reads package ownership from the configuration if not configfile.is_file(): logger.critical("Configuration file does not exist!\n" "Did you forget to run 'reprozip trace'?\n" "If not, you might want to use --dir to specify an " "alternate location.") sys.exit(1) config = load_config(configfile, canonical=False) has_thread_flag = config.format_version >= LooseVersion('0.7') if PY3: # On PY3, connect() only accepts unicode conn = sqlite3.connect(str(database)) else: conn = sqlite3.connect(database.path) conn.row_factory = sqlite3.Row vertices = [] edges = [] # Create user entity, that initiates the runs vertices.append({'ID': 'user', 'type': 'Agent', 'subtype': 'User', 'label': 'User'}) run = -1 # Read processes cur = conn.cursor() rows = cur.execute( ''' SELECT id, parent, timestamp, is_thread, exitcode FROM processes; ''' if has_thread_flag else ''' SELECT id, parent, timestamp, 0 as is_thread, exitcode FROM processes; ''') for r_id, r_parent, r_timestamp, r_isthread, r_exitcode in rows: if r_parent is None: # Create run entity run += 1 vertices.append({'ID': 'run%d' % run, 'type': 'Activity', 'subtype': 'Run', 'label': "Run #%d" % run, 'date': r_timestamp}) # User -> run edges.append({'ID': 'user_run%d' % run, 'type': 'UserRuns', 'label': "User runs command", 'sourceID': 'user', 'targetID': 'run%d' % run}) # Run -> process edges.append({'ID': 'run_start%d' % run, 'type': 'RunStarts', 'label': "Run #%d command", 'sourceID': 'run%d' % run, 'targetID': 'process%d' % r_id}) # Create process entity vertices.append({'ID': 'process%d' % r_id, 'type': 'Agent', 'subtype': 'Thread' if r_isthread else 'Process', 'label': 'Process #%d' % r_id, 'date': r_timestamp}) # TODO: add process end time (use master branch?) # Add process creation activity if r_parent is not None: # Process creation activity vertex = {'ID': 'fork%d' % r_id, 'type': 'Activity', 'subtype': 'Fork', 'label': "#%d creates %s #%d" % ( r_parent, "thread" if r_isthread else "process", r_id), 'date': r_timestamp} if has_thread_flag: vertex['thread'] = 'true' if r_isthread else 'false' vertices.append(vertex) # Parent -> creation edges.append({'ID': 'fork_p_%d' % r_id, 'type': 'PerformsFork', 'label': "Performs fork", 'sourceID': 'process%d' % r_parent, 'targetID': 'fork%d' % r_id}) # Creation -> child edges.append({'ID': 'fork_c_%d' % r_id, 'type': 'ForkCreates', 'label': "Fork creates", 'sourceID': 'fork%d' % r_id, 'targetID': 'process%d' % r_id}) cur.close() file2package = dict((f.path.path, pkg) for pkg in config.packages for f in pkg.files) inputs_outputs = dict((f.path.path, (bool(f.write_runs), bool(f.read_runs))) for n, f in iteritems(config.inputs_outputs)) # Read opened files cur = conn.cursor() rows = cur.execute( ''' SELECT name, is_directory FROM opened_files GROUP BY name; ''') for r_name, r_directory in rows: # Create file entity vertex = {'ID': r_name, 'type': 'Entity', 'subtype': 'Directory' if r_directory else 'File', 'label': r_name} if r_name in file2package: vertex['package'] = file2package[r_name].name if r_name in inputs_outputs: out_, in_ = inputs_outputs[r_name] if in_: vertex['input'] = True if out_: vertex['output'] = True vertices.append(vertex) cur.close() # Read file opens cur = conn.cursor() rows = cur.execute( ''' SELECT id, name, timestamp, mode, process FROM opened_files; ''') for r_id, r_name, r_timestamp, r_mode, r_process in rows: # Create file access activity vertices.append({'ID': 'access%d' % r_id, 'type': 'Activity', 'subtype': ('FileWrites' if r_mode & FILE_WRITE else 'FileReads'), 'label': ("File write: %s" if r_mode & FILE_WRITE else "File read: %s") % r_name, 'date': r_timestamp, 'mode': r_mode}) # Process -> access edges.append({'ID': 'proc_access%d' % r_id, 'type': 'PerformsFileAccess', 'label': "Process does file access", 'sourceID': 'process%d' % r_process, 'targetID': 'access%d' % r_id}) # Access -> file edges.append({'ID': 'access_file%d' % r_id, 'type': 'AccessFile', 'label': "File access touches", 'sourceID': 'access%d' % r_id, 'targetID': r_name}) cur.close() # Read executions cur = conn.cursor() rows = cur.execute( ''' SELECT id, name, timestamp, process, argv FROM executed_files; ''') for r_id, r_name, r_timestamp, r_process, r_argv in rows: argv = r_argv.split('\0') if not argv[-1]: argv = argv[:-1] cmdline = ' '.join(shell_escape(a) for a in argv) # Create execution activity vertices.append({'ID': 'exec%d' % r_id, 'type': 'Activity', 'subtype': 'ProcessExecutes', 'label': "Process #%d executes file %s" % (r_process, r_name), 'date': r_timestamp, 'cmdline': cmdline, 'process': r_process, 'file': r_name}) # Process -> execution edges.append({'ID': 'proc_exec%d' % r_id, 'type': 'ProcessExecution', 'label': "Process does exec()", 'sourceID': 'process%d' % r_process, 'targetID': 'exec%d' % r_id}) # Execution -> file edges.append({'ID': 'exec_file%d' % r_id, 'type': 'ExecutionFile', 'label': "Execute file", 'sourceID': 'exec%d' % r_id, 'targetID': r_name}) cur.close() # Write the file from the created lists with target.open('w', encoding='utf-8', newline='\n') as out: out.write('<?xml version="1.0"?>\n\n' '<provenancedata xmlns:xsi="http://www.w3.org/2001/XMLSchema' '-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">\n' ' <vertices>\n') for vertex in vertices: if 'date' not in vertex: vertex['date'] = '-1' tags = {} for k in ('ID', 'type', 'label', 'date'): if k not in vertex: vertex.update(tags) raise ValueError("Vertex is missing tag '%s': %r" % ( k, vertex)) tags[k] = vertex.pop(k) out.write(' <vertex>\n ' + '\n '.join('<{k}>{v}</{k}>'.format(k=k, v=xml_escape(v)) for k, v in iteritems(tags))) if vertex: out.write('\n <attributes>\n') for k, v in iteritems(vertex): out.write(' <attribute>\n' ' <name>{k}</name>\n' ' <value>{v}</value>\n' ' </attribute>\n' .format(k=xml_escape(k), v=xml_escape(v))) out.write(' </attributes>') out.write('\n </vertex>\n') out.write(' </vertices>\n' ' <edges>\n') for edge in edges: for k in ('ID', 'type', 'label', 'sourceID', 'targetID'): if k not in edge: raise ValueError("Edge is missing tag '%s': %r" % ( k, edge)) if 'value' not in edge: edge['value'] = '' out.write(' <edge>\n ' + '\n '.join('<{k}>{v}</{k}>'.format(k=k, v=xml_escape(v)) for k, v in iteritems(edge)) + '\n </edge>\n') out.write(' </edges>\n' '</provenancedata>\n') conn.close()
def vagrant_setup_create(args): """Sets up the experiment to be run in a Vagrant-built virtual machine. This can either build a chroot or not. If building a chroot, we do just like without Vagrant: we copy all the files and only get what's missing from the host. But we do install automatically the packages whose files are required. If not building a chroot, we install all the packages, and only unpack files that don't come from packages. In short: files from packages with packfiles=True will only be used if building a chroot. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) use_chroot = args.use_chroot mount_bind = args.bind_magic_dirs record_usage(use_chroot=use_chroot, mount_bind=mount_bind) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file tar = tarfile.open(str(pack), 'r:*') member = tar.getmember('METADATA/config.yml') member.name = 'config.yml' tar.extract(member, str(target)) tar.close() # Loads config runs, packages, other_files = load_config(target / 'config.yml', True) if args.base_image and args.base_image[0]: record_usage(vagrant_explicit_image=True) box = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, box = select_box(runs) logging.info("Using box %s", box) logging.debug("Distribution: %s", target_distribution or "unknown") # If using chroot, we might still need to install packages to get missing # (not packed) files if use_chroot: packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(vagrant_install_pkgs=True) logging.info("Some packages were not packed, so we'll install and " "copy their files\n" "Packages that are missing:\n%s", ' '.join(pkg.name for pkg in packages)) if packages: try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error("Need to install %d packages but couldn't select a " "package installer: %s", len(packages), e) target.mkdir(parents=True) # Writes setup script logging.info("Writing setup script %s...", target / 'setup.sh') with (target / 'setup.sh').open('w', encoding='utf-8', newline='\n') as fp: fp.write('#!/bin/sh\n\nset -e\n\n') if packages: # Updates package sources fp.write(installer.update_script()) fp.write('\n') # Installs necessary packages fp.write(installer.install_script(packages)) fp.write('\n') # TODO : Compare package versions (painful because of sh) # Untar if use_chroot: fp.write('\n' 'mkdir /experimentroot; cd /experimentroot\n') fp.write('tar zpxf /vagrant/experiment.rpz ' '--numeric-owner --strip=1 DATA\n') if mount_bind: fp.write('\n' 'mkdir -p /experimentroot/dev\n' 'mount -o rbind /dev /experimentroot/dev\n' 'mkdir -p /experimentroot/proc\n' 'mount -o rbind /proc /experimentroot/proc\n') for pkg in packages: fp.write('\n# Copies files from package %s\n' % pkg.name) for f in pkg.files: f = f.path dest = join_root(PosixPath('/experimentroot'), f) fp.write('mkdir -p %s\n' % shell_escape(unicode_(f.parent))) fp.write('cp -L %s %s\n' % ( shell_escape(unicode_(f)), shell_escape(unicode_(dest)))) else: fp.write('\ncd /\n') paths = set() pathlist = [] dataroot = PosixPath('DATA') # Adds intermediate directories, and checks for existence in the # tar tar = tarfile.open(str(pack), 'r:*') for f in other_files: path = PosixPath('/') for c in f.path.components[1:]: path = path / c if path in paths: continue paths.add(path) datapath = join_root(dataroot, path) try: tar.getmember(str(datapath)) except KeyError: logging.info("Missing file %s", datapath) else: pathlist.append(unicode_(datapath)) tar.close() # FIXME : for some reason we need reversed() here, I'm not sure # why. Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files # TAR bug: there is no way to make --keep-old-files not report an # error if an existing file is encountered. --skip-old-files was # introduced too recently. Instead, we just ignore the exit status fp.write('tar zpxf /vagrant/experiment.rpz --keep-old-files ' '--numeric-owner --strip=1 %s || /bin/true\n' % ' '.join(shell_escape(p) for p in reversed(pathlist))) # Copies /bin/sh + dependencies if use_chroot: url = busybox_url(runs[0]['architecture']) fp.write(r''' mkdir -p /experimentroot/bin mkdir -p /experimentroot/usr/bin if [ ! -e /experimentroot/bin/sh -o ! -e /experimentroot/usr/bin/env ]; then wget --quiet -O /experimentroot/bin/busybox {url} chmod +x /experimentroot/bin/busybox fi [ -e /experimentroot/bin/sh ] || \ ln -s /bin/busybox /experimentroot/bin/sh [ -e /experimentroot/usr/bin/env ] || \ ln -s /bin/busybox /experimentroot/usr/bin/env '''.format(url=url)) # Copies pack logging.info("Copying pack file...") pack.copyfile(target / 'experiment.rpz') # Writes Vagrant file logging.info("Writing %s...", target / 'Vagrantfile') with (target / 'Vagrantfile').open('w', encoding='utf-8', newline='\n') as fp: # Vagrant header and version fp.write('# -*- mode: ruby -*-\n' '# vi: set ft=ruby\n\n' 'VAGRANTFILE_API_VERSION = "2"\n\n' 'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n') # Selects which box to install fp.write(' config.vm.box = "%s"\n' % box) # Run the setup script on the virtual machine fp.write(' config.vm.provision "shell", path: "setup.sh"\n') fp.write('end\n') # Meta-data for reprounzip write_dict(target / '.reprounzip', {'use_chroot': use_chroot}) signals.post_setup(target=target)
def docker_run(args): """Runs the experiment in the container. """ target = Path(args.target[0]) unpacked_info = read_dict(target / '.reprounzip') cmdline = args.cmdline # Loads config runs, packages, other_files = load_config(target / 'config.yml', True) selected_runs = get_runs(runs, args.run, cmdline) # Destroy previous container if 'ran_container' in unpacked_info: container = unpacked_info.pop('ran_container') logging.info("Destroying previous container %s", container.decode('ascii')) retcode = subprocess.call(['docker', 'rm', '-f', container]) if retcode != 0: logging.error("Error deleting previous container %s", container.decode('ascii')) write_dict(target / '.reprounzip', unpacked_info) # Use the initial image directly if 'current_image' in unpacked_info: image = unpacked_info['current_image'] logging.debug("Running from image %s", image.decode('ascii')) else: logging.critical("Image doesn't exist yet, have you run setup/build?") sys.exit(1) # Name of new container container = make_unique_name(b'reprounzip_run_') cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) cmd += '/usr/bin/env -i ' cmd += ' '.join('%s=%s' % (k, shell_escape(v)) for k, v in iteritems(run['environ'])) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) uid = run.get('uid', 1000) cmd = 'sudo -u \'#%d\' sh -c %s\n' % (uid, shell_escape(cmd)) cmds.append(cmd) cmds = ' && '.join(cmds) signals.pre_run(target=target) # Run command in container logging.info("Starting container %s", container.decode('ascii')) retcode = subprocess.call(['docker', 'run', b'--name=' + container, '-i', '-t', image, '/bin/sh', '-c', cmds]) sys.stderr.write("\n*** Command finished, status: %d\n" % retcode) # Store container name (so we can download output files) unpacked_info['ran_container'] = container write_dict(target / '.reprounzip', unpacked_info) signals.post_run(target=target, retcode=retcode)
def directory_run(args): """Runs the command in the directory. """ target = Path(args.target[0]) unpacked_info = metadata_read(target, 'directory') cmdline = args.cmdline # Loads config config = load_config_file(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) root = (target / 'root').absolute() # Gets library paths lib_dirs = [] p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'], stdout=subprocess.PIPE) try: for l in p.stdout: if len(l) < 3 or l[0] in (b' ', b'\t'): continue if l.endswith(b':\n'): lib_dirs.append(Path(l[:-2])) finally: p.communicate() lib_dirs = ('export LD_LIBRARY_PATH=%s' % ':'.join( shell_escape(unicode_(join_root(root, d))) for d in lib_dirs)) cmds = [lib_dirs] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape( unicode_(join_root(root, Path(run['workingdir'])))) cmd += '/usr/bin/env -i ' environ = run['environ'] environ = fixup_environment(environ, args) if args.x11: if 'DISPLAY' in os.environ: environ['DISPLAY'] = os.environ['DISPLAY'] if 'XAUTHORITY' in os.environ: environ['XAUTHORITY'] = os.environ['XAUTHORITY'] cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(environ) if k != 'PATH') cmd += ' ' # PATH # Get the original PATH components path = [PosixPath(d) for d in run['environ'].get('PATH', '').split(':')] # The same paths but in the directory dir_path = [join_root(root, d) for d in path if d.root == '/'] # Rebuild string path = ':'.join(unicode_(d) for d in dir_path + path) cmd += 'PATH=%s ' % shell_escape(path) # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = run['argv'] # Rewrites command-line arguments that are absolute filenames rewritten = False for i in irange(len(argv)): try: p = Path(argv[i]) except UnicodeEncodeError: continue if p.is_absolute: rp = join_root(root, p) if (rp.exists() or (len(rp.components) > 3 and rp.parent.exists())): argv[i] = str(rp) rewritten = True if rewritten: logging.warning("Rewrote command-line as: %s", ' '.join(shell_escape(a) for a in argv)) else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) cmds.append(cmd) cmds = ' && '.join(cmds) signals.pre_run(target=target) retcode = interruptible_call(cmds, shell=True) stderr.write("\n*** Command finished, status: %d\n" % retcode) signals.post_run(target=target, retcode=retcode) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) metadata_write(target, unpacked_info, 'directory')
def vagrant_setup_create(args): """Sets up the experiment to be run in a Vagrant-built virtual machine. This can either build a chroot or not. If building a chroot, we do just like without Vagrant: we copy all the files and only get what's missing from the host. But we do install automatically the packages whose files are required. If not building a chroot, we install all the packages, and only unpack files that don't come from packages. In short: files from packages with packfiles=True will only be used if building a chroot. """ if not args.pack: logging.critical("setup/create needs the pack filename") sys.exit(1) pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) use_chroot = args.use_chroot mount_bind = args.bind_magic_dirs record_usage(use_chroot=use_chroot, mount_bind=mount_bind) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config runs, packages, other_files = config = load_config(target / 'config.yml', True) if not args.memory: memory = None else: try: memory = int(args.memory[-1]) except ValueError: logging.critical("Invalid value for memory size: %r", args.memory) sys.exit(1) if args.base_image and args.base_image[0]: record_usage(vagrant_explicit_image=True) box = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, box = select_box(runs) logging.info("Using box %s", box) logging.debug("Distribution: %s", target_distribution or "unknown") # If using chroot, we might still need to install packages to get missing # (not packed) files if use_chroot: packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(vagrant_install_pkgs=True) logging.info("Some packages were not packed, so we'll install and " "copy their files\n" "Packages that are missing:\n%s", ' '.join(pkg.name for pkg in packages)) if packages: try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error("Need to install %d packages but couldn't select a " "package installer: %s", len(packages), e) target.mkdir(parents=True) try: # Writes setup script logging.info("Writing setup script %s...", target / 'setup.sh') with (target / 'setup.sh').open('w', encoding='utf-8', newline='\n') as fp: fp.write('#!/bin/sh\n\nset -e\n\n') if packages: # Updates package sources fp.write(installer.update_script()) fp.write('\n') # Installs necessary packages fp.write(installer.install_script(packages)) fp.write('\n') # TODO : Compare package versions (painful because of sh) # Untar if use_chroot: fp.write('\n' 'mkdir /experimentroot; cd /experimentroot\n') fp.write('tar zpxf /vagrant/data.tgz --numeric-owner ' '--strip=1 %s\n' % rpz_pack.data_prefix) if mount_bind: fp.write('\n' 'mkdir -p /experimentroot/dev\n' 'mkdir -p /experimentroot/proc\n') for pkg in packages: fp.write('\n# Copies files from package %s\n' % pkg.name) for f in pkg.files: f = f.path dest = join_root(PosixPath('/experimentroot'), f) fp.write('mkdir -p %s\n' % shell_escape(unicode_(f.parent))) fp.write('cp -L %s %s\n' % ( shell_escape(unicode_(f)), shell_escape(unicode_(dest)))) else: fp.write('\ncd /\n') paths = set() pathlist = [] # Adds intermediate directories, and checks for existence in # the tar for f in other_files: path = PosixPath('/') for c in rpz_pack.remove_data_prefix(f.path).components: path = path / c if path in paths: continue paths.add(path) try: rpz_pack.get_data(path) except KeyError: logging.info("Missing file %s", path) else: pathlist.append(path) # FIXME : for some reason we need reversed() here, I'm not sure # why. Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files # TAR bug: there is no way to make --keep-old-files not report # an error if an existing file is encountered. --skip-old-files # was introduced too recently. Instead, we just ignore the exit # status with (target / 'rpz-files.list').open('wb') as lfp: for p in reversed(pathlist): lfp.write(join_root(rpz_pack.data_prefix, p).path) lfp.write(b'\0') fp.write('tar zpxf /vagrant/data.tgz --keep-old-files ' '--numeric-owner --strip=1 ' '--null -T /vagrant/rpz-files.list || /bin/true\n') # Copies busybox if use_chroot: arch = runs[0]['architecture'] download_file(busybox_url(arch), target / 'busybox', 'busybox-%s' % arch) fp.write(r''' cp /vagrant/busybox /experimentroot/busybox chmod +x /experimentroot/busybox mkdir -p /experimentroot/bin [ -e /experimentroot/bin/sh ] || \ ln -s /busybox /experimentroot/bin/sh ''') # Copies pack logging.info("Copying pack file...") rpz_pack.copy_data_tar(target / 'data.tgz') rpz_pack.close() # Writes Vagrant file logging.info("Writing %s...", target / 'Vagrantfile') with (target / 'Vagrantfile').open('w', encoding='utf-8', newline='\n') as fp: # Vagrant header and version fp.write( '# -*- mode: ruby -*-\n' '# vi: set ft=ruby\n\n' 'VAGRANTFILE_API_VERSION = "2"\n\n' 'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n') # Selects which box to install fp.write(' config.vm.box = "%s"\n' % box) # Run the setup script on the virtual machine fp.write(' config.vm.provision "shell", path: "setup.sh"\n') # Memory size if memory is not None: fp.write(' config.vm.provider "virtualbox" do |v|\n' ' v.memory = %d\n' ' end\n' % memory) fp.write('end\n') # Meta-data for reprounzip write_dict(target, metadata_initial_iofiles(config, {'use_chroot': use_chroot})) signals.post_setup(target=target, pack=pack) except Exception: target.rmtree(ignore_errors=True) raise
def docker_setup_create(args): """Sets up the experiment to be run in a Docker-built container. """ pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) signals.pre_setup(target=target, pack=pack) # Unpacks configuration file tar = tarfile.open(str(pack), 'r:*') member = tar.getmember('METADATA/config.yml') member.name = 'config.yml' tar.extract(member, str(target)) tar.close() # Loads config runs, packages, other_files = load_config(target / 'config.yml', True) if args.base_image: record_usage(docker_explicit_base=True) base_image = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, base_image = select_image(runs) logging.info("Using base image %s", base_image) logging.debug("Distribution: %s", target_distribution or "unknown") target.mkdir(parents=True) pack.copyfile(target / 'experiment.rpz') # Writes Dockerfile logging.info("Writing %s...", target / 'Dockerfile') with (target / 'Dockerfile').open('w', encoding='utf-8', newline='\n') as fp: fp.write('FROM %s\n\n' % base_image) # Installs busybox download_file(busybox_url(runs[0]['architecture']), target / 'busybox') fp.write('COPY busybox /bin/busybox\n') fp.write('COPY experiment.rpz /reprozip_experiment.rpz\n\n') fp.write('RUN \\\n' ' chmod +x /bin/busybox && \\\n') if args.install_pkgs: # Install every package through package manager missing_packages = [] else: # Only install packages that were not packed missing_packages = [pkg for pkg in packages if pkg.packfiles] packages = [pkg for pkg in packages if not pkg.packfiles] # FIXME : Right now, we need 'sudo' to be available (and it's not # necessarily in the base image) if packages: record_usage(docker_install_pkgs=True) else: record_usage(docker_install_pkgs="sudo") packages += [Package('sudo', None, packfiles=False)] if packages: try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error("Need to install %d packages but couldn't " "select a package installer: %s", len(packages), e) sys.exit(1) # Updates package sources fp.write(' %s && \\\n' % installer.update_script()) # Installs necessary packages fp.write(' %s && \\\n' % installer.install_script(packages)) logging.info("Dockerfile will install the %d software packages that " "were not packed", len(packages)) # Untar paths = set() pathlist = [] dataroot = PosixPath('DATA') # Adds intermediate directories, and checks for existence in the tar tar = tarfile.open(str(pack), 'r:*') missing_files = chain.from_iterable(pkg.files for pkg in missing_packages) for f in chain(other_files, missing_files): path = PosixPath('/') for c in f.path.components[1:]: path = path / c if path in paths: continue paths.add(path) datapath = join_root(dataroot, path) try: tar.getmember(str(datapath)) except KeyError: logging.info("Missing file %s", datapath) else: pathlist.append(unicode_(datapath)) tar.close() # FIXME : for some reason we need reversed() here, I'm not sure why. # Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files fp.write(' cd / && tar zpxf /reprozip_experiment.rpz ' '--numeric-owner --strip=1 %s\n' % ' '.join(shell_escape(p) for p in reversed(pathlist))) # Meta-data for reprounzip write_dict(target / '.reprounzip', {}) signals.post_setup(target=target)
def vagrant_run(args): """Runs the experiment in the virtual machine. """ target = Path(args.target[0]) unpacked_info = read_dict(target) use_chroot = unpacked_info['use_chroot'] cmdline = args.cmdline check_vagrant_version() # Loads config config = load_config(target / 'config.yml', True) runs = config.runs selected_runs = get_runs(runs, args.run, cmdline) hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # Port forwarding ports = parse_ports(args.expose_port) # If the requested ports are not a subset of the ones already set on the # VM, we have to update the Vagrantfile and issue `vagrant reload`, which # will reboot the machine req_ports = set(ports) set_ports = set(unpacked_info.get('ports', [])) if not req_ports.issubset(set_ports): # Build new set of forwarded ports: the ones already set + the one just # requested # The ones we request now override the previous config all_ports = dict((host, (guest, proto)) for host, guest, proto in set_ports) for host, guest, proto in req_ports: all_ports[host] = guest, proto unpacked_info['ports'] = sorted( (host, guest, proto) for host, (guest, proto) in iteritems(all_ports)) write_vagrantfile(target, unpacked_info) logger.info("Some requested ports are not yet forwarded, running " "'vagrant reload'") retcode = subprocess.call(['vagrant', 'reload', '--no-provision'], cwd=target.path) if retcode != 0: logger.critical("vagrant reload failed with code %d, aborting", retcode) sys.exit(1) write_dict(target, unpacked_info) # X11 handler if unpacked_info['gui']: x11 = LocalX11Handler() else: x11 = X11Handler(args.x11, ('local', hostname), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) if use_chroot: cmd += '/busybox env -i ' else: cmd += '/usr/bin/env -i ' environ = x11.fix_env(run['environ']) environ = fixup_environment(environ, args) cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) uid = run.get('uid', 1000) gid = run.get('gid', 1000) if use_chroot: userspec = '%s:%s' % (uid, gid) cmd = ('chroot --userspec=%s /experimentroot ' '/bin/sh -c %s' % ( userspec, shell_escape(cmd))) else: cmd = 'sudo -u \'#%d\' sh -c %s' % (uid, shell_escape(cmd)) cmds.append(cmd) if use_chroot: cmds = ['chroot /experimentroot /bin/sh -c %s' % shell_escape(c) for c in x11.init_cmds] + cmds else: cmds = x11.init_cmds + cmds cmds = ' && '.join(cmds) # Sets the hostname to the original experiment's machine's # FIXME: not reentrant: this restores the Vagrant machine's hostname after # the run, which might cause issues if several "reprounzip vagrant run" are # running at once cmds = ('OLD_HOSTNAME=$(/bin/hostname); /bin/hostname %s; ' % hostname + cmds + '; RES=$?; /bin/hostname "$OLD_HOSTNAME"; exit $RES') cmds = '/usr/bin/sudo /bin/sh -c %s' % shell_escape(cmds) # Gets vagrant SSH parameters info = machine_setup(target) signals.pre_run(target=target) interactive = not (args.no_stdin or os.environ.get('REPROUNZIP_NON_INTERACTIVE')) retcode = run_interactive(info, interactive, cmds, not args.no_pty, x11.port_forward) stderr.write("\r\n*** Command finished, status: %d\r\n" % retcode) # Update input file status metadata_update_run(config, unpacked_info, selected_runs) write_dict(target, unpacked_info) signals.post_run(target=target, retcode=retcode)
def docker_run(args): """Runs the experiment in the container. """ target = Path(args.target[0]) unpacked_info = read_dict(target / '.reprounzip') cmdline = args.cmdline # Loads config runs, packages, other_files = load_config(target / 'config.yml', True) selected_runs = get_runs(runs, args.run, cmdline) # Destroy previous container if 'ran_container' in unpacked_info: container = unpacked_info.pop('ran_container') logging.info("Destroying previous container %s", container.decode('ascii')) retcode = subprocess.call(['docker', 'rm', '-f', container]) if retcode != 0: logging.error("Error deleting previous container %s", container.decode('ascii')) write_dict(target / '.reprounzip', unpacked_info) # Use the initial image directly if 'current_image' in unpacked_info: image = unpacked_info['current_image'] logging.debug("Running from image %s", image.decode('ascii')) else: logging.critical("Image doesn't exist yet, have you run setup/build?") sys.exit(1) # Name of new container container = make_unique_name(b'reprounzip_run_') hostname = runs[selected_runs[0]].get('hostname', 'reprounzip') # Get the local bridge IP ip_str = get_iface_addr('docker0') # X11 handler x11 = X11Handler(args.x11, ('internet', ip_str), args.x11_display) cmds = [] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape(run['workingdir']) cmd += '/usr/bin/env -i ' environ = x11.fix_env(run['environ']) cmd += ' '.join('%s=%s' % (k, shell_escape(v)) for k, v in iteritems(environ)) cmd += ' ' # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = [run['binary']] + run['argv'][1:] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) uid = run.get('uid', 1000) cmd = 'sudo -u \'#%d\' /bin/busybox sh -c %s\n' % (uid, shell_escape(cmd)) cmds.append(cmd) cmds = x11.init_cmds + cmds cmds = ' && '.join(cmds) signals.pre_run(target=target) # Creates forwarders forwarders = [] for port, connector in x11.port_forward: forwarders.append( LocalForwarder(connector, port)) # Run command in container logging.info("Starting container %s", container.decode('ascii')) retcode = interruptible_call(['docker', 'run', b'--name=' + container, '-h', hostname, '-i', '-t', image, '/bin/busybox', 'sh', '-c', cmds]) if retcode != 0: logging.critical("docker run failed with code %d", retcode) sys.exit(1) # Get exit status from "docker inspect" out = subprocess.check_output(['docker', 'inspect', container]) outjson = json.loads(out.decode('ascii')) if (outjson[0]["State"]["Running"] is not False or outjson[0]["State"]["Paused"] is not False): logging.error("Invalid container state after execution:\n%s", json.dumps(outjson[0]["State"])) retcode = outjson[0]["State"]["ExitCode"] sys.stderr.write("\n*** Command finished, status: %d\n" % retcode) # Store container name (so we can download output files) unpacked_info['ran_container'] = container write_dict(target / '.reprounzip', unpacked_info) signals.post_run(target=target, retcode=retcode)
def directory_run(args): """Runs the command in the directory. """ target = Path(args.target[0]) read_dict(target / '.reprounzip', 'directory') cmdline = args.cmdline # Loads config runs, packages, other_files = load_config_file(target / 'config.yml', True) selected_runs = get_runs(runs, args.run, cmdline) root = target / 'root' # Gets library paths lib_dirs = [] p = subprocess.Popen(['/sbin/ldconfig', '-v', '-N'], stdout=subprocess.PIPE) try: for l in p.stdout: if len(l) < 3 or l[0] in (b' ', b'\t'): continue if l.endswith(b':\n'): lib_dirs.append(Path(l[:-2])) finally: p.wait() lib_dirs = ( 'export LD_LIBRARY_PATH=%s' % ':'.join(shell_escape(unicode_(join_root(root, d))) for d in lib_dirs)) cmds = [lib_dirs] for run_number in selected_runs: run = runs[run_number] cmd = 'cd %s && ' % shell_escape( unicode_(join_root(root, Path(run['workingdir'])))) cmd += '/usr/bin/env -i ' cmd += ' '.join('%s=%s' % (k, shell_escape(v)) for k, v in iteritems(run['environ']) if k != 'PATH') cmd += ' ' # PATH # Get the original PATH components path = [ PosixPath(d) for d in run['environ'].get('PATH', '').split(':') ] # The same paths but in the directory dir_path = [join_root(root, d) for d in path if d.root == '/'] # Rebuild string path = ':'.join(unicode_(d) for d in dir_path + path) cmd += 'PATH=%s ' % shell_escape(path) # FIXME : Use exec -a or something if binary != argv[0] if cmdline is None: argv = run['argv'] else: argv = cmdline cmd += ' '.join(shell_escape(a) for a in argv) cmds.append(cmd) cmds = ' && '.join(cmds) signals.pre_run(target=target) retcode = subprocess.call(cmds, shell=True) sys.stderr.write("\n*** Command finished, status: %d\n" % retcode) signals.post_run(target=target, retcode=retcode)
def docker_setup_create(args): """Sets up the experiment to be run in a Docker-built container. """ pack = Path(args.pack[0]) target = Path(args.target[0]) if target.exists(): logging.critical("Target directory exists") sys.exit(1) signals.pre_setup(target=target, pack=pack) target.mkdir() try: # Unpacks configuration file rpz_pack = RPZPack(pack) rpz_pack.extract_config(target / 'config.yml') # Loads config runs, packages, other_files = config = load_config( target / 'config.yml', True) if args.base_image: record_usage(docker_explicit_base=True) base_image = args.base_image[0] if args.distribution: target_distribution = args.distribution[0] else: target_distribution = None else: target_distribution, base_image = select_image(runs) logging.info("Using base image %s", base_image) logging.debug("Distribution: %s", target_distribution or "unknown") rpz_pack.copy_data_tar(target / 'data.tgz') arch = runs[0]['architecture'] # Writes Dockerfile logging.info("Writing %s...", target / 'Dockerfile') with (target / 'Dockerfile').open('w', encoding='utf-8', newline='\n') as fp: fp.write('FROM %s\n\n' % base_image) # Installs busybox download_file(busybox_url(arch), target / 'busybox', 'busybox-%s' % arch) fp.write('COPY busybox /busybox\n') # Installs rpzsudo download_file(sudo_url(arch), target / 'rpzsudo', 'rpzsudo-%s' % arch) fp.write('COPY rpzsudo /rpzsudo\n\n') fp.write('COPY data.tgz /reprozip_data.tgz\n\n') fp.write('COPY rpz-files.list /rpz-files.list\n') fp.write('RUN \\\n' ' chmod +x /busybox /rpzsudo && \\\n') if args.install_pkgs: # Install every package through package manager missing_packages = [] else: # Only install packages that were not packed missing_packages = [pkg for pkg in packages if pkg.packfiles] packages = [pkg for pkg in packages if not pkg.packfiles] if packages: record_usage(docker_install_pkgs=True) try: installer = select_installer(pack, runs, target_distribution) except CantFindInstaller as e: logging.error("Need to install %d packages but couldn't " "select a package installer: %s", len(packages), e) sys.exit(1) # Updates package sources update_script = installer.update_script() if update_script: fp.write(' %s && \\\n' % update_script) # Installs necessary packages fp.write(' %s && \\\n' % installer.install_script(packages)) logging.info("Dockerfile will install the %d software " "packages that were not packed", len(packages)) else: record_usage(docker_install_pkgs=False) # Untar paths = set() pathlist = [] # Add intermediate directories, and check for existence in the tar logging.info("Generating file list...") missing_files = chain.from_iterable(pkg.files for pkg in missing_packages) data_files = rpz_pack.data_filenames() listoffiles = list(chain(other_files, missing_files)) for f in listoffiles: if f.path.name == 'resolv.conf' and ( f.path.lies_under('/etc') or f.path.lies_under('/run') or f.path.lies_under('/var')): continue path = PosixPath('/') for c in rpz_pack.remove_data_prefix(f.path).components: path = path / c if path in paths: continue paths.add(path) if path in data_files: pathlist.append(path) else: logging.info("Missing file %s", path) rpz_pack.close() # FIXME : for some reason we need reversed() here, I'm not sure why # Need to read more of tar's docs. # TAR bug: --no-overwrite-dir removes --keep-old-files with (target / 'rpz-files.list').open('wb') as lfp: for p in reversed(pathlist): lfp.write(join_root(rpz_pack.data_prefix, p).path) lfp.write(b'\0') fp.write(' cd / && ' '(tar zpxf /reprozip_data.tgz -U --recursive-unlink ' '--numeric-owner --strip=1 --null -T /rpz-files.list || ' '/busybox echo "TAR reports errors, this might or might ' 'not prevent the execution to run")\n') # Setup entry point fp.write('COPY rpz_entrypoint.sh /rpz_entrypoint.sh\n' 'ENTRYPOINT ["/busybox", "sh", "/rpz_entrypoint.sh"]\n') # Write entry point script logging.info("Writing %s...", target / 'rpz_entrypoint.sh') with (target / 'rpz_entrypoint.sh').open('w', encoding='utf-8', newline='\n') as fp: # The entrypoint gets some arguments from the run command # By default, it just does all the runs # "run N" executes the run with that number # "cmd STR" sets a replacement command-line for the next run # "do STR" executes a command as-is fp.write( '#!/bin/sh\n' '\n' 'COMMAND=\n' 'ENVVARS=\n' '\n' 'if [ $# = 0 ]; then\n' ' exec /busybox sh /rpz_entrypoint.sh') for nb in irange(len(runs)): fp.write(' run %d' % nb) fp.write( '\n' 'fi\n' '\n' 'while [ $# != 0 ]; do\n' ' case "$1" in\n' ' help)\n' ' echo "Image built from reprounzip-docker" >&2\n' ' echo "Usage: docker run <image> [cmd word [word ' '...]] [run <R>]" >&2\n' ' echo " \\`cmd ...\\` changes the command for ' 'the next \\`run\\` option" >&2\n' ' echo " \\`run <name|number>\\` runs the ' 'specified run" >&2\n' ' echo "By default, all the runs are executed." ' '>&2\n' ' echo "The runs in this image are:" >&2\n') for run in runs: fp.write( ' echo " {name}: {cmdline}" >&2\n'.format( name=run['id'], cmdline=' '.join(shell_escape(a) for a in run['argv']))) fp.write( ' exit 0\n' ' ;;\n' ' do)\n' ' shift\n' ' $1\n' ' ;;\n' ' env)\n' ' shift\n' ' ENVVARS="$1"\n' ' ;;\n' ' cmd)\n' ' shift\n' ' COMMAND="$1"\n' ' ;;\n' ' run)\n' ' shift\n' ' case "$1" in\n') for i, run in enumerate(runs): cmdline = ' '.join([run['binary']] + run['argv'][1:]) fp.write( ' {name})\n' ' RUNCOMMAND={cmd}\n' ' RUNWD={wd}\n' ' RUNENV={env}\n' ' RUNUID={uid}\n' ' RUNGID={gid}\n' ' ;;\n'.format( name='%s|%d' % (run['id'], i), cmd=shell_escape(cmdline), wd=shell_escape(run['workingdir']), env=shell_escape(' '.join( '%s=%s' % (shell_escape(k), shell_escape(v)) for k, v in iteritems(run['environ']))), uid=run.get('uid', 1000), gid=run.get('gid', 1000))) fp.write( ' *)\n' ' echo "RPZ: Unknown run $1" >&2\n' ' exit 1\n' ' ;;\n' ' esac\n' ' if [ -n "$COMMAND" ]; then\n' ' RUNCOMMAND="$COMMAND"\n' ' COMMAND=\n' ' fi\n' ' export RUNWD; export RUNENV; export ENVVARS; ' 'export RUNCOMMAND\n' ' /rpzsudo "#$RUNUID" "#$RUNGID" /busybox sh -c ' '"cd \\"\\$RUNWD\\" && /busybox env -i $RUNENV $ENVVARS ' '$RUNCOMMAND"\n' ' ENVVARS=\n' ' ;;\n' ' *)\n' ' echo "RPZ: Unknown option $1" >&2\n' ' exit 1\n' ' ;;\n' ' esac\n' ' shift\n' 'done\n') # Meta-data for reprounzip write_dict(target, metadata_initial_iofiles(config)) signals.post_setup(target=target, pack=pack) except Exception: target.rmtree(ignore_errors=True) raise
def generate(target, configfile, database): """Go over the trace and generate the graph file. """ # Reads package ownership from the configuration if not configfile.is_file(): logger.critical("Configuration file does not exist!\n" "Did you forget to run 'reprozip trace'?\n" "If not, you might want to use --dir to specify an " "alternate location.") sys.exit(1) config = load_config(configfile, canonical=False) has_thread_flag = config.format_version >= LooseVersion('0.7') assert database.is_file() conn = sqlite3.connect(str(database)) # connect() only accepts str conn.row_factory = sqlite3.Row vertices = [] edges = [] # Create user entity, that initiates the runs vertices.append({'ID': 'user', 'type': 'Agent', 'subtype': 'User', 'label': 'User'}) run = -1 # Read processes cur = conn.cursor() rows = cur.execute( ''' SELECT id, parent, timestamp, is_thread, exitcode FROM processes; ''' if has_thread_flag else ''' SELECT id, parent, timestamp, 0 as is_thread, exitcode FROM processes; ''') for r_id, r_parent, r_timestamp, r_isthread, r_exitcode in rows: if r_parent is None: # Create run entity run += 1 vertices.append({'ID': 'run%d' % run, 'type': 'Activity', 'subtype': 'Run', 'label': "Run #%d" % run, 'date': r_timestamp}) # User -> run edges.append({'ID': 'user_run%d' % run, 'type': 'UserRuns', 'label': "User runs command", 'sourceID': 'user', 'targetID': 'run%d' % run}) # Run -> process edges.append({'ID': 'run_start%d' % run, 'type': 'RunStarts', 'label': "Run #%d command", 'sourceID': 'run%d' % run, 'targetID': 'process%d' % r_id}) # Create process entity vertices.append({'ID': 'process%d' % r_id, 'type': 'Agent', 'subtype': 'Thread' if r_isthread else 'Process', 'label': 'Process #%d' % r_id, 'date': r_timestamp}) # TODO: add process end time (use master branch?) # Add process creation activity if r_parent is not None: # Process creation activity vertex = {'ID': 'fork%d' % r_id, 'type': 'Activity', 'subtype': 'Fork', 'label': "#%d creates %s #%d" % ( r_parent, "thread" if r_isthread else "process", r_id), 'date': r_timestamp} if has_thread_flag: vertex['thread'] = 'true' if r_isthread else 'false' vertices.append(vertex) # Parent -> creation edges.append({'ID': 'fork_p_%d' % r_id, 'type': 'PerformsFork', 'label': "Performs fork", 'sourceID': 'process%d' % r_parent, 'targetID': 'fork%d' % r_id}) # Creation -> child edges.append({'ID': 'fork_c_%d' % r_id, 'type': 'ForkCreates', 'label': "Fork creates", 'sourceID': 'fork%d' % r_id, 'targetID': 'process%d' % r_id}) cur.close() file2package = dict((f.path.path, pkg) for pkg in config.packages for f in pkg.files) inputs_outputs = dict((f.path.path, (bool(f.write_runs), bool(f.read_runs))) for n, f in config.inputs_outputs.items()) # Read opened files cur = conn.cursor() rows = cur.execute( ''' SELECT name, is_directory FROM opened_files GROUP BY name; ''') for r_name, r_directory in rows: # Create file entity vertex = {'ID': r_name, 'type': 'Entity', 'subtype': 'Directory' if r_directory else 'File', 'label': r_name} if r_name in file2package: vertex['package'] = file2package[r_name].name if r_name in inputs_outputs: out_, in_ = inputs_outputs[r_name] if in_: vertex['input'] = True if out_: vertex['output'] = True vertices.append(vertex) cur.close() # Read file opens cur = conn.cursor() rows = cur.execute( ''' SELECT id, name, timestamp, mode, process FROM opened_files; ''') for r_id, r_name, r_timestamp, r_mode, r_process in rows: # Create file access activity vertices.append({'ID': 'access%d' % r_id, 'type': 'Activity', 'subtype': ('FileWrites' if r_mode & FILE_WRITE else 'FileReads'), 'label': ("File write: %s" if r_mode & FILE_WRITE else "File read: %s") % r_name, 'date': r_timestamp, 'mode': r_mode}) # Process -> access edges.append({'ID': 'proc_access%d' % r_id, 'type': 'PerformsFileAccess', 'label': "Process does file access", 'sourceID': 'process%d' % r_process, 'targetID': 'access%d' % r_id}) # Access -> file edges.append({'ID': 'access_file%d' % r_id, 'type': 'AccessFile', 'label': "File access touches", 'sourceID': 'access%d' % r_id, 'targetID': r_name}) cur.close() # Read executions cur = conn.cursor() rows = cur.execute( ''' SELECT id, name, timestamp, process, argv FROM executed_files; ''') for r_id, r_name, r_timestamp, r_process, r_argv in rows: argv = r_argv.split('\0') if not argv[-1]: argv = argv[:-1] cmdline = ' '.join(shell_escape(a) for a in argv) # Create execution activity vertices.append({'ID': 'exec%d' % r_id, 'type': 'Activity', 'subtype': 'ProcessExecutes', 'label': "Process #%d executes file %s" % (r_process, r_name), 'date': r_timestamp, 'cmdline': cmdline, 'process': r_process, 'file': r_name}) # Process -> execution edges.append({'ID': 'proc_exec%d' % r_id, 'type': 'ProcessExecution', 'label': "Process does exec()", 'sourceID': 'process%d' % r_process, 'targetID': 'exec%d' % r_id}) # Execution -> file edges.append({'ID': 'exec_file%d' % r_id, 'type': 'ExecutionFile', 'label': "Execute file", 'sourceID': 'exec%d' % r_id, 'targetID': r_name}) cur.close() # Write the file from the created lists with target.open('w', encoding='utf-8', newline='\n') as out: out.write('<?xml version="1.0"?>\n\n' '<provenancedata xmlns:xsi="http://www.w3.org/2001/XMLSchema' '-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">\n' ' <vertices>\n') for vertex in vertices: if 'date' not in vertex: vertex['date'] = '-1' tags = {} for k in ('ID', 'type', 'label', 'date'): if k not in vertex: vertex.update(tags) raise ValueError("Vertex is missing tag '%s': %r" % ( k, vertex)) tags[k] = vertex.pop(k) out.write(' <vertex>\n ' + '\n '.join('<{k}>{v}</{k}>'.format(k=k, v=xml_escape(v)) for k, v in tags.items())) if vertex: out.write('\n <attributes>\n') for k, v in vertex.items(): out.write(' <attribute>\n' ' <name>{k}</name>\n' ' <value>{v}</value>\n' ' </attribute>\n' .format(k=xml_escape(k), v=xml_escape(v))) out.write(' </attributes>') out.write('\n </vertex>\n') out.write(' </vertices>\n' ' <edges>\n') for edge in edges: for k in ('ID', 'type', 'label', 'sourceID', 'targetID'): if k not in edge: raise ValueError("Edge is missing tag '%s': %r" % ( k, edge)) if 'value' not in edge: edge['value'] = '' out.write(' <edge>\n ' + '\n '.join('<{k}>{v}</{k}>'.format(k=k, v=xml_escape(v)) for k, v in edge.items()) + '\n </edge>\n') out.write(' </edges>\n' '</provenancedata>\n') conn.close()
def print_info(args): """Writes out some information about a pack file. """ pack = Path(args.pack[0]) # Loads config runs, packages, other_files = config = load_config(pack) inputs_outputs = config.inputs_outputs pack_total_size = 0 pack_total_paths = 0 pack_files = 0 pack_dirs = 0 pack_symlinks = 0 pack_others = 0 rpz_pack = RPZPack(pack) for m in rpz_pack.list_data(): pack_total_size += m.size pack_total_paths += 1 if m.isfile(): pack_files += 1 elif m.isdir(): pack_dirs += 1 elif m.issym(): pack_symlinks += 1 else: pack_others += 1 rpz_pack.close() meta_total_paths = 0 meta_packed_packages_files = 0 meta_unpacked_packages_files = 0 meta_packages = len(packages) meta_packed_packages = 0 for package in packages: nb = len(package.files) meta_total_paths += nb if package.packfiles: meta_packed_packages_files += nb meta_packed_packages += 1 else: meta_unpacked_packages_files += nb nb = len(other_files) meta_total_paths += nb meta_packed_paths = meta_packed_packages_files + nb if runs: meta_architecture = runs[0]['architecture'] if any(r['architecture'] != meta_architecture for r in runs): logging.warning("Runs have different architectures") meta_distribution = runs[0]['distribution'] if any(r['distribution'] != meta_distribution for r in runs): logging.warning("Runs have different distributions") meta_distribution = ' '.join(t for t in meta_distribution if t) current_architecture = platform.machine().lower() current_distribution = platform.linux_distribution()[0:2] current_distribution = ' '.join(t for t in current_distribution if t) print("Pack file: %s" % pack) print("\n----- Pack information -----") print("Compressed size: %s" % hsize(pack.size())) print("Unpacked size: %s" % hsize(pack_total_size)) print("Total packed paths: %d" % pack_total_paths) if args.verbosity >= 3: print(" Files: %d" % pack_files) print(" Directories: %d" % pack_dirs) print(" Symbolic links: %d" % pack_symlinks) if pack_others: print(" Unknown (what!?): %d" % pack_others) print("\n----- Metadata -----") if args.verbosity >= 3: print("Total paths: %d" % meta_total_paths) print("Listed packed paths: %d" % meta_packed_paths) if packages: print("Total software packages: %d" % meta_packages) print("Packed software packages: %d" % meta_packed_packages) if args.verbosity >= 3: print("Files from packed software packages: %d" % meta_packed_packages_files) print("Files from unpacked software packages: %d" % meta_unpacked_packages_files) if runs: print("Architecture: %s (current: %s)" % (meta_architecture, current_architecture)) print("Distribution: %s (current: %s)" % ( meta_distribution, current_distribution or "(not Linux)")) print("Executions (%d):" % len(runs)) for i, run in enumerate(runs): cmdline = ' '.join(shell_escape(a) for a in run['argv']) if len(runs) > 1: print(" %d: %s" % (i, cmdline)) else: print(" %s" % cmdline) if args.verbosity >= 2: print(" wd: %s" % run['workingdir']) if 'signal' in run: print(" signal: %d" % run['signal']) else: print(" exitcode: %d" % run['exitcode']) if inputs_outputs: if args.verbosity < 2: print("Inputs/outputs files (%d) :%s" % ( len(inputs_outputs), ", ".join(inputs_outputs))) else: print("Inputs/outputs files (%d):" % len(inputs_outputs)) for name, f in iteritems(inputs_outputs): t = [] if f.read_runs: t.append("in") if f.write_runs: t.append("out") print(" %s (%s): %s" % (name, ' '.join(t), f.path)) # Unpacker compatibility print("\n----- Unpackers -----") unpacker_status = {} for name, upk in iteritems(unpackers): if 'test_compatibility' in upk: compat = upk['test_compatibility'] if callable(compat): compat = compat(pack, config=config) if isinstance(compat, (tuple, list)): compat, msg = compat else: msg = None unpacker_status.setdefault(compat, []).append((name, msg)) else: unpacker_status.setdefault(None, []).append((name, None)) for s, n in [(COMPAT_OK, "Compatible"), (COMPAT_MAYBE, "Unknown"), (COMPAT_NO, "Incompatible")]: if s != COMPAT_OK and args.verbosity < 2: continue if s not in unpacker_status: continue upks = unpacker_status[s] print("%s (%d):" % (n, len(upks))) for upk_name, msg in upks: if msg is not None: print(" %s (%s)" % (upk_name, msg)) else: print(" %s" % upk_name)