Exemplo n.º 1
0
    def __init__(self,
                 target,
                 input_files,
                 files,
                 unpacked_info,
                 docker_cmd='docker'):
        self.unpacked_info = unpacked_info
        self.docker_cmd = docker_cmd
        config = load_config(target / 'config.yml', True)
        if config.runs:
            first_run = config.runs[0]
            self.default_ownership = (
                first_run.get('uid'),
                first_run.get('gid'),
            )
        else:
            self.default_ownership = None, None

        FileUploader.__init__(self, target, input_files, files)
Exemplo n.º 2
0
def do_vistrails(target):
    """Create a VisTrails workflow that runs the experiment.

    This is called from signals after an experiment has been setup by any
    unpacker.
    """
    record_usage(do_vistrails=True)
    unpacker = signals.unpacker
    dot_vistrails = Path('~/.vistrails').expand_user()

    runs, packages, other_files = load_config(target / 'config.yml',
                                              canonical=True)
    for i, run in enumerate(runs):
        module_name = write_cltools_module(run, dot_vistrails)

        # Writes VisTrails workflow
        bundle = target / 'vistrails.vt'
        logging.info("Writing VisTrails workflow %s...", bundle)
        vtdir = Path.tempdir(prefix='reprounzip_vistrails_')
        try:
            with vtdir.open('w', 'vistrail',
                            encoding='utf-8', newline='\n') as fp:
                vistrail = VISTRAILS_TEMPLATE
                cmdline = ' '.join(shell_escape(arg)
                                   for arg in run['argv'])
                vistrail = vistrail.format(
                        date='2014-11-12 15:31:18',
                        unpacker=unpacker,
                        directory=escape_xml(str(target.absolute())),
                        cmdline=escape_xml(cmdline),
                        module_name=module_name,
                        run=i)
                fp.write(vistrail)

            with bundle.open('wb') as fp:
                z = zipfile.ZipFile(fp, 'w')
                with vtdir.in_dir():
                    for path in Path('.').recursedir():
                        z.write(str(path))
                z.close()
        finally:
            vtdir.rmtree()
Exemplo n.º 3
0
def do_vistrails(target):
    """Create a VisTrails workflow that runs the experiment.

    This is called from signals after an experiment has been setup by any
    unpacker.
    """
    record_usage(do_vistrails=True)
    unpacker = signals.unpacker
    dot_vistrails = Path('~/.vistrails').expand_user()

    runs, packages, other_files = load_config(target / 'config.yml',
                                              canonical=True)
    for i, run in enumerate(runs):
        module_name = write_cltools_module(run, dot_vistrails)

        # Writes VisTrails workflow
        bundle = target / 'vistrails.vt'
        logging.info("Writing VisTrails workflow %s...", bundle)
        vtdir = Path.tempdir(prefix='reprounzip_vistrails_')
        try:
            with vtdir.open('w', 'vistrail', encoding='utf-8',
                            newline='\n') as fp:
                vistrail = VISTRAILS_TEMPLATE
                cmdline = ' '.join(shell_escape(arg) for arg in run['argv'])
                vistrail = vistrail.format(date='2014-11-12 15:31:18',
                                           unpacker=unpacker,
                                           directory=escape_xml(
                                               str(target.absolute())),
                                           cmdline=escape_xml(cmdline),
                                           module_name=module_name,
                                           run=i)
                fp.write(vistrail)

            with bundle.open('wb') as fp:
                z = zipfile.ZipFile(fp, 'w')
                with vtdir.in_dir():
                    for path in Path('.').recursedir():
                        z.write(str(path))
                z.close()
        finally:
            vtdir.rmtree()
Exemplo n.º 4
0
def generate(target, configfile, database, all_forks=False):
    """Main function for the graph subcommand.
    """
    # In here, a file is any file on the filesystem. A binary is a file, that
    # gets executed. A process is a system-level task, identified by its pid
    # (pids don't get reused in the database).
    # What I call program is the couple (process, binary), so forking creates a
    # new program (with the same binary) and exec'ing creates a new program as
    # well (with the same process)
    # Because of this, fork+exec will create an intermediate program that
    # doesn't do anything (new process but still old binary). If that program
    # doesn't do anything worth showing on the graph, it will be erased, unless
    # all_forks is True (--all-forks).

    # Reads package ownership from the configuration
    if not configfile.is_file():
        logging.critical("Configuration file does not exist!\n"
                         "Did you forget to run 'reprozip trace'?\n"
                         "If not, you might want to use --dir to specify an "
                         "alternate location.")
        sys.exit(1)
    runs, packages, other_files = load_config(configfile, canonical=False)
    packages = dict((f.path, pkg) for pkg in packages for f in pkg.files)

    if PY3:
        # On PY3, connect() only accepts unicode
        conn = sqlite3.connect(str(database))
    else:
        conn = sqlite3.connect(database.path)
    conn.row_factory = sqlite3.Row

    # This is a bit weird. We need to iterate on all types of events at the
    # same time, ordering by timestamp, so we decorate-sort-undecorate
    # Decoration adds timestamp (for sorting) and tags by event type, one of
    # 'process', 'open' or 'exec'

    # Reads processes from the database
    process_cursor = conn.cursor()
    process_rows = process_cursor.execute(
        '''
        SELECT id, parent, timestamp
        FROM processes
        ORDER BY id
        ''')
    processes = {}
    all_programs = []

    # ... and opened files...
    file_cursor = conn.cursor()
    file_rows = file_cursor.execute(
        '''
        SELECT name, timestamp, mode, process
        FROM opened_files
        ORDER BY id
        ''')
    binaries = set()
    files = OrderedSet()
    edges = OrderedSet()

    # ... as well as executed files.
    exec_cursor = conn.cursor()
    exec_rows = exec_cursor.execute(
        '''
        SELECT name, timestamp, process, argv
        FROM executed_files
        ORDER BY id
        ''')

    # Loop on all event lists
    logging.info("Getting all events from database...")
    rows = heapq.merge(((r[2], 'process', r) for r in process_rows),
                       ((r[1], 'open', r) for r in file_rows),
                       ((r[1], 'exec', r) for r in exec_rows))
    for ts, event_type, data in rows:
        if event_type == 'process':
            r_id, r_parent, r_timestamp = data
            if r_parent is not None:
                parent = processes[r_parent]
                binary = parent.binary
            else:
                parent = None
                binary = None
            p = Process(r_id,
                        parent,
                        r_timestamp,
                        False,
                        binary,
                        C_INITIAL if r_parent is None else C_FORK)
            processes[r_id] = p
            all_programs.append(p)

        elif event_type == 'open':
            r_name, r_timestamp, r_mode, r_process = data
            r_name = PosixPath(r_name)
            if r_mode != FILE_WDIR:
                process = processes[r_process]
                files.add(r_name)
                edges.add((process, r_name, r_mode, None))

        elif event_type == 'exec':
            r_name, r_timestamp, r_process, r_argv = data
            r_name = PosixPath(r_name)
            process = processes[r_process]
            binaries.add(r_name)
            # Here we split this process in two "programs", unless the previous
            # one hasn't done anything since it was created via fork()
            if not all_forks and not process.acted:
                process.binary = r_name
                process.created = C_FORKEXEC
                process.acted = True
            else:
                process = Process(process.pid,
                                  process,
                                  r_timestamp,
                                  True,         # Hides exec only once
                                  r_name,
                                  C_EXEC)
                all_programs.append(process)
                processes[r_process] = process
            argv = tuple(r_argv.split('\0'))
            if not argv[-1]:
                argv = argv[:-1]
            edges.add((process, r_name, None, argv))

    process_cursor.close()
    file_cursor.close()
    conn.close()

    # Puts files in packages
    logging.info("Organizes packages...")
    package_files = {}
    other_files = []
    for f in files:
        pkg = packages.get(f)
        if pkg is not None:
            package_files.setdefault((pkg.name, pkg.version), []).append(f)
        else:
            other_files.append(f)

    # Writes DOT file
    with target.open('w', encoding='utf-8', newline='\n') as fp:
        fp.write('digraph G {\n    /* programs */\n    node [shape=box];\n')
        # Programs
        logging.info("Writing programs...")
        for program in all_programs:
            fp.write('    prog%d [label="%s (%d)"];\n' % (
                     id(program), program.binary or "-", program.pid))
            if program.parent is not None:
                reason = ''
                if program.created == C_FORK:
                    reason = "fork"
                elif program.created == C_EXEC:
                    reason = "exec"
                elif program.created == C_FORKEXEC:
                    reason = "fork+exec"
                fp.write('    prog%d -> prog%d [label="%s"];\n' % (
                         id(program.parent), id(program), reason))

        fp.write('\n    node [shape=ellipse];\n\n    /* system packages */\n')

        # Files from packages
        logging.info("Writing packages...")
        for i, ((name, version), files) in enumerate(iteritems(package_files)):
            fp.write('    subgraph cluster%d {\n        label=' % i)
            if version:
                fp.write('"%s %s";\n' % (escape(name), escape(version)))
            else:
                fp.write('"%s";\n' % escape(name))
            for f in files:
                fp.write('        "%s";\n' % escape(unicode_(f)))
            fp.write('    }\n')

        fp.write('\n    /* other files */\n')

        # Other files
        logging.info("Writing other files...")
        for f in other_files:
            fp.write('    "%s"\n' % escape(unicode_(f)))

        fp.write('\n')

        # Edges
        logging.info("Connecting edges...")
        for prog, f, mode, argv in edges:
            if mode is None:
                fp.write('    "%s" -> prog%d [color=blue, label="%s"];\n' % (
                         escape(unicode_(f)),
                         id(prog),
                         escape(' '.join(argv))))
            elif mode & FILE_WRITE:
                fp.write('    prog%d -> "%s" [color=red];\n' % (
                         id(prog), escape(unicode_(f))))
            elif mode & FILE_READ:
                fp.write('    "%s" -> prog%d [color=green];\n' % (
                         escape(unicode_(f)), id(prog)))

        fp.write('}\n')
Exemplo n.º 5
0
def docker_run(args):
    """Runs the experiment in the container.
    """
    target = Path(args.target[0])
    unpacked_info = read_dict(target)
    cmdline = args.cmdline

    # Sanity check
    if args.detach and args.x11:
        logging.critical("Error: Can't use X11 forwarding if you're detaching")
        raise UsageError

    # Loads config
    config = load_config(target / 'config.yml', True)
    runs = config.runs

    selected_runs = get_runs(runs, args.run, cmdline)

    # Get current image name
    if 'current_image' in unpacked_info:
        image = unpacked_info['current_image']
        logging.debug("Running from image %s", image.decode('ascii'))
    else:
        logging.critical("Image doesn't exist yet, have you run setup/build?")
        sys.exit(1)

    # Name of new container
    if args.detach:
        container = make_unique_name(b'reprounzip_detached_')
    else:
        container = make_unique_name(b'reprounzip_run_')

    hostname = runs[selected_runs[0]].get('hostname', 'reprounzip')

    # X11 handler
    if args.x11:
        local_ip = get_local_addr()

        docker_host = local_ip
        if os.environ.get('DOCKER_HOST'):
            m = _dockerhost_re.match(os.environ['DOCKER_HOST'])
            if m is not None:
                docker_host = m.group(1)

        if args.tunneled_x11:
            x11 = X11Handler(True, ('internet', docker_host), args.x11_display)
        else:
            x11 = X11Handler(True, ('internet', local_ip), args.x11_display)

            if (docker_host != local_ip and docker_host != 'localhost'
                    and not docker_host.startswith('127.')
                    and not docker_host.startswith('192.168.99.')):
                ssh_cmdline = ' '.join('-R*:%(p)d:127.0.0.1:%(p)d' %
                                       {'p': port}
                                       for port, connector in x11.port_forward)
                logging.warning(
                    "You requested X11 forwarding but the Docker container "
                    "appears to be running remotely. It is probable that it "
                    "won't be able to connect to the local display. Creating "
                    "a remote SSH tunnel and running with --tunneled-x11 "
                    "might help (%s).", ssh_cmdline)
    else:
        x11 = X11Handler(False, ('local', hostname), args.x11_display)

    cmds = []
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(run['workingdir'])
        cmd += '/busybox env -i '
        environ = x11.fix_env(run['environ'])
        environ = fixup_environment(environ, args)
        cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v))
                        for k, v in iteritems(environ))
        cmd += ' '
        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = [run['binary']] + run['argv'][1:]
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        uid = run.get('uid', 1000)
        gid = run.get('gid', 1000)
        cmd = '/rpzsudo \'#%d\' \'#%d\' /busybox sh -c %s' % (
            uid, gid, shell_escape(cmd))
        cmds.append(cmd)
    cmds = x11.init_cmds + cmds
    cmds = ' && '.join(cmds)

    signals.pre_run(target=target)

    # Creates forwarders
    forwarders = []
    for port, connector in x11.port_forward:
        forwarders.append(LocalForwarder(connector, port))

    if args.detach:
        logging.info("Start container %s (detached)",
                     container.decode('ascii'))
        retcode = interruptible_call([
            'docker', 'run', b'--name=' + container, '-h', hostname, '-d', '-t'
        ] + args.docker_option + [image, '/busybox', 'sh', '-c', cmds])
        if retcode != 0:
            logging.critical("docker run failed with code %d", retcode)
            subprocess.call(['docker', 'rm', '-f', container])
            sys.exit(1)
        return

    # Run command in container
    logging.info("Starting container %s", container.decode('ascii'))
    retcode = interruptible_call(
        ['docker', 'run', b'--name=' + container, '-h', hostname, '-i', '-t'] +
        args.docker_option + [image, '/busybox', 'sh', '-c', cmds])
    if retcode != 0:
        logging.critical("docker run failed with code %d", retcode)
        subprocess.call(['docker', 'rm', '-f', container])
        sys.exit(1)

    # Get exit status from "docker inspect"
    out = subprocess.check_output(['docker', 'inspect', container])
    outjson = json.loads(out.decode('ascii'))
    if (outjson[0]["State"]["Running"] is not False
            or outjson[0]["State"]["Paused"] is not False):
        logging.error("Invalid container state after execution:\n%s",
                      json.dumps(outjson[0]["State"]))
    retcode = outjson[0]["State"]["ExitCode"]
    stderr.write("\n*** Command finished, status: %d\n" % retcode)

    # Commit to create new image
    new_image = make_unique_name(b'reprounzip_image_')
    logging.info("Committing container %s to image %s",
                 container.decode('ascii'), new_image.decode('ascii'))
    subprocess.check_call(['docker', 'commit', container, new_image])

    # Update image name
    unpacked_info['current_image'] = new_image
    write_dict(target, unpacked_info)

    # Remove the container
    logging.info("Destroying container %s", container.decode('ascii'))
    retcode = subprocess.call(['docker', 'rm', container])
    if retcode != 0:
        logging.error("Error deleting container %s", container.decode('ascii'))

    # Untag previous image, unless it is the initial_image
    if image != unpacked_info['initial_image']:
        logging.info("Untagging previous image %s", image.decode('ascii'))
        subprocess.check_call(['docker', 'rmi', image])

    # Update input file status
    metadata_update_run(config, unpacked_info, selected_runs)
    write_dict(target, unpacked_info)

    signals.post_run(target=target, retcode=retcode)
Exemplo n.º 6
0
def vagrant_run(args):
    """Runs the experiment in the virtual machine.
    """
    target = Path(args.target[0])
    unpacked_info = read_dict(target)
    use_chroot = unpacked_info.get('use_chroot', True)
    cmdline = args.cmdline

    check_vagrant_version()

    # Loads config
    config = load_config(target / 'config.yml', True)
    runs = config.runs

    selected_runs = get_runs(runs, args.run, cmdline)

    hostname = runs[selected_runs[0]].get('hostname', 'reprounzip')

    # X11 handler
    x11 = X11Handler(args.x11, ('local', hostname), args.x11_display)

    cmds = []
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(run['workingdir'])
        if use_chroot:
            cmd += '/busybox env -i '
        else:
            cmd += '/usr/bin/env -i '
        environ = x11.fix_env(run['environ'])
        environ = fixup_environment(environ, args)
        cmd += ' '.join('%s=%s' % (k, shell_escape(v))
                        for k, v in iteritems(environ))
        cmd += ' '
        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = [run['binary']] + run['argv'][1:]
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        uid = run.get('uid', 1000)
        gid = run.get('gid', 1000)
        if use_chroot:
            userspec = '%s:%s' % (uid, gid)
            cmd = ('chroot --userspec=%s /experimentroot '
                   '/bin/sh -c %s' % (
                       userspec,
                       shell_escape(cmd)))
        else:
            cmd = 'sudo -u \'#%d\' sh -c %s' % (uid, shell_escape(cmd))
        cmds.append(cmd)
    if use_chroot:
        cmds = ['chroot /experimentroot /bin/sh -c %s' % shell_escape(c)
                for c in x11.init_cmds] + cmds
    else:
        cmds = x11.init_cmds + cmds
    cmds = ' && '.join(cmds)
    # Sets the hostname to the original experiment's machine's
    # FIXME: not reentrant: this restores the Vagrant machine's hostname after
    # the run, which might cause issues if several "reprounzip vagrant run" are
    # running at once
    cmds = ('OLD_HOSTNAME=$(/bin/hostname); /bin/hostname %s; ' % hostname +
            cmds +
            '; RES=$?; /bin/hostname "$OLD_HOSTNAME"; exit $RES')
    cmds = '/usr/bin/sudo /bin/sh -c %s' % shell_escape(cmds)

    # Gets vagrant SSH parameters
    info = machine_setup(target, unpacked_info['use_chroot'])

    signals.pre_run(target=target)

    interactive = not (args.no_stdin or
                       os.environ.get('REPROUNZIP_NON_INTERACTIVE'))
    retcode = run_interactive(info, interactive,
                              cmds,
                              not args.no_pty,
                              x11.port_forward)
    stderr.write("\r\n*** Command finished, status: %d\r\n" % retcode)

    # Update input file status
    metadata_update_run(config, unpacked_info, selected_runs)
    write_dict(target, unpacked_info)

    signals.post_run(target=target, retcode=retcode)
Exemplo n.º 7
0
def run_from_vistrails():
    setup_logging('REPROUNZIP-VISTRAILS', logging.INFO)

    cli_version = 1
    if len(sys.argv) > 1:
        try:
            cli_version = int(sys.argv[1])
        except ValueError:
            logging.info("Compatibility mode: reprounzip-vistrails didn't get "
                         "a version number")
    if cli_version != 1:
        logging.critical("Unknown interface version %d; you are probably "
                         "using a version of reprounzip-vistrails too old for "
                         "your VisTrails package. Consider upgrading.",
                         cli_version)
        sys.exit(1)

    parser = argparse.ArgumentParser()
    parser.add_argument('unpacker')
    parser.add_argument('directory')
    parser.add_argument('run')
    parser.add_argument('--input-file', action='append', default=[])
    parser.add_argument('--output-file', action='append', default=[])
    parser.add_argument('--cmdline', action='store')

    args = parser.parse_args(sys.argv[2:])

    config = load_config(Path(args.directory) / 'config.yml', canonical=True)

    python = sys.executable
    rpuz = [python, '-m', 'reprounzip.main', args.unpacker]

    os.environ['REPROUNZIP_NON_INTERACTIVE'] = 'y'

    def cmd(lst, add=None):
        if add:
            logging.info("cmd: %s %s", ' '.join(lst), add)
            string = ' '.join(shell_escape(a) for a in (rpuz + lst))
            string += ' ' + add
            subprocess.check_call(string, shell=True,
                                  cwd=args.directory)
        else:
            logging.info("cmd: %s", ' '.join(lst))
            subprocess.check_call(rpuz + lst,
                                  cwd=args.directory)

    logging.info("reprounzip-vistrails calling reprounzip; dir=%s",
                 args.directory)

    # Parses input files from the command-line
    upload_command = []
    seen_input_names = set()
    for input_file in args.input_file:
        input_name, filename = input_file.split(':', 1)
        upload_command.append('%s:%s' % (filename, input_name))
        seen_input_names.add(input_name)

    # Resets the input files that are used by this run and were not given
    for name, f in iteritems(config.inputs_outputs):
        if name not in seen_input_names and int(args.run) in f.read_runs:
            upload_command.append(':%s' % name)

    # Runs the command
    cmd(['upload', '.'] + upload_command)

    # Runs the experiment
    if args.cmdline:
        cmd(['run', '.', args.run, '--cmdline'], add=args.cmdline)
    else:
        cmd(['run', '.', args.run])

    # Gets output files
    for output_file in args.output_file:
        output_name, filename = output_file.split(':', 1)
        cmd(['download', '.',
             '%s:%s' % (output_name, filename)])
Exemplo n.º 8
0
def generate(target, configfile, database):
    """Go over the trace and generate the graph file.
    """
    # Reads package ownership from the configuration
    if not configfile.is_file():
        logger.critical("Configuration file does not exist!\n"
                        "Did you forget to run 'reprozip trace'?\n"
                        "If not, you might want to use --dir to specify an "
                        "alternate location.")
        sys.exit(1)

    config = load_config(configfile, canonical=False)

    has_thread_flag = config.format_version >= LooseVersion('0.7')

    if PY3:
        # On PY3, connect() only accepts unicode
        conn = sqlite3.connect(str(database))
    else:
        conn = sqlite3.connect(database.path)
    conn.row_factory = sqlite3.Row

    vertices = []
    edges = []

    # Create user entity, that initiates the runs
    vertices.append({'ID': 'user',
                     'type': 'Agent',
                     'subtype': 'User',
                     'label': 'User'})

    run = -1

    # Read processes
    cur = conn.cursor()
    rows = cur.execute(
        '''
        SELECT id, parent, timestamp, is_thread, exitcode
        FROM processes;
        ''' if has_thread_flag else '''
        SELECT id, parent, timestamp, 0 as is_thread, exitcode
        FROM processes;
        ''')
    for r_id, r_parent, r_timestamp, r_isthread, r_exitcode in rows:
        if r_parent is None:
            # Create run entity
            run += 1
            vertices.append({'ID': 'run%d' % run,
                             'type': 'Activity',
                             'subtype': 'Run',
                             'label': "Run #%d" % run,
                             'date': r_timestamp})
            # User -> run
            edges.append({'ID': 'user_run%d' % run,
                          'type': 'UserRuns',
                          'label': "User runs command",
                          'sourceID': 'user',
                          'targetID': 'run%d' % run})
            # Run -> process
            edges.append({'ID': 'run_start%d' % run,
                          'type': 'RunStarts',
                          'label': "Run #%d command",
                          'sourceID': 'run%d' % run,
                          'targetID': 'process%d' % r_id})

        # Create process entity
        vertices.append({'ID': 'process%d' % r_id,
                         'type': 'Agent',
                         'subtype': 'Thread' if r_isthread else 'Process',
                         'label': 'Process #%d' % r_id,
                         'date': r_timestamp})
        # TODO: add process end time (use master branch?)

        # Add process creation activity
        if r_parent is not None:
            # Process creation activity
            vertex = {'ID': 'fork%d' % r_id,
                      'type': 'Activity',
                      'subtype': 'Fork',
                      'label': "#%d creates %s #%d" % (
                          r_parent,
                          "thread" if r_isthread else "process",
                          r_id),
                      'date': r_timestamp}
            if has_thread_flag:
                vertex['thread'] = 'true' if r_isthread else 'false'
            vertices.append(vertex)

            # Parent -> creation
            edges.append({'ID': 'fork_p_%d' % r_id,
                          'type': 'PerformsFork',
                          'label': "Performs fork",
                          'sourceID': 'process%d' % r_parent,
                          'targetID': 'fork%d' % r_id})
            # Creation -> child
            edges.append({'ID': 'fork_c_%d' % r_id,
                          'type': 'ForkCreates',
                          'label': "Fork creates",
                          'sourceID': 'fork%d' % r_id,
                          'targetID': 'process%d' % r_id})
    cur.close()

    file2package = dict((f.path.path, pkg)
                        for pkg in config.packages
                        for f in pkg.files)
    inputs_outputs = dict((f.path.path, (bool(f.write_runs),
                                         bool(f.read_runs)))
                          for n, f in iteritems(config.inputs_outputs))

    # Read opened files
    cur = conn.cursor()
    rows = cur.execute(
        '''
        SELECT name, is_directory
        FROM opened_files
        GROUP BY name;
        ''')
    for r_name, r_directory in rows:
        # Create file entity
        vertex = {'ID': r_name,
                  'type': 'Entity',
                  'subtype': 'Directory' if r_directory else 'File',
                  'label': r_name}
        if r_name in file2package:
            vertex['package'] = file2package[r_name].name
        if r_name in inputs_outputs:
            out_, in_ = inputs_outputs[r_name]
            if in_:
                vertex['input'] = True
            if out_:
                vertex['output'] = True
        vertices.append(vertex)
    cur.close()

    # Read file opens
    cur = conn.cursor()
    rows = cur.execute(
        '''
        SELECT id, name, timestamp, mode, process
        FROM opened_files;
        ''')
    for r_id, r_name, r_timestamp, r_mode, r_process in rows:
        # Create file access activity
        vertices.append({'ID': 'access%d' % r_id,
                         'type': 'Activity',
                         'subtype': ('FileWrites' if r_mode & FILE_WRITE
                                     else 'FileReads'),
                         'label': ("File write: %s" if r_mode & FILE_WRITE
                                   else "File read: %s") % r_name,
                         'date': r_timestamp,
                         'mode': r_mode})
        # Process -> access
        edges.append({'ID': 'proc_access%d' % r_id,
                      'type': 'PerformsFileAccess',
                      'label': "Process does file access",
                      'sourceID': 'process%d' % r_process,
                      'targetID': 'access%d' % r_id})
        # Access -> file
        edges.append({'ID': 'access_file%d' % r_id,
                      'type': 'AccessFile',
                      'label': "File access touches",
                      'sourceID': 'access%d' % r_id,
                      'targetID': r_name})
    cur.close()

    # Read executions
    cur = conn.cursor()
    rows = cur.execute(
        '''
        SELECT id, name, timestamp, process, argv
        FROM executed_files;
        ''')
    for r_id, r_name, r_timestamp, r_process, r_argv in rows:
        argv = r_argv.split('\0')
        if not argv[-1]:
            argv = argv[:-1]
        cmdline = ' '.join(shell_escape(a) for a in argv)

        # Create execution activity
        vertices.append({'ID': 'exec%d' % r_id,
                         'type': 'Activity',
                         'subtype': 'ProcessExecutes',
                         'label': "Process #%d executes file %s" % (r_process,
                                                                    r_name),
                         'date': r_timestamp,
                         'cmdline': cmdline,
                         'process': r_process,
                         'file': r_name})
        # Process -> execution
        edges.append({'ID': 'proc_exec%d' % r_id,
                      'type': 'ProcessExecution',
                      'label': "Process does exec()",
                      'sourceID': 'process%d' % r_process,
                      'targetID': 'exec%d' % r_id})
        # Execution -> file
        edges.append({'ID': 'exec_file%d' % r_id,
                      'type': 'ExecutionFile',
                      'label': "Execute file",
                      'sourceID': 'exec%d' % r_id,
                      'targetID': r_name})
    cur.close()

    # Write the file from the created lists
    with target.open('w', encoding='utf-8', newline='\n') as out:
        out.write('<?xml version="1.0"?>\n\n'
                  '<provenancedata xmlns:xsi="http://www.w3.org/2001/XMLSchema'
                  '-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">\n'
                  '  <vertices>\n')

        for vertex in vertices:
            if 'date' not in vertex:
                vertex['date'] = '-1'
            tags = {}
            for k in ('ID', 'type', 'label', 'date'):
                if k not in vertex:
                    vertex.update(tags)
                    raise ValueError("Vertex is missing tag '%s': %r" % (
                                     k, vertex))
                tags[k] = vertex.pop(k)
            out.write('    <vertex>\n      ' +
                      '\n      '.join('<{k}>{v}</{k}>'.format(k=k,
                                                              v=xml_escape(v))
                                      for k, v in iteritems(tags)))
            if vertex:
                out.write('\n      <attributes>\n')
                for k, v in iteritems(vertex):
                    out.write('        <attribute>\n'
                              '          <name>{k}</name>\n'
                              '          <value>{v}</value>\n'
                              '        </attribute>\n'
                              .format(k=xml_escape(k),
                                      v=xml_escape(v)))
                out.write('      </attributes>')
            out.write('\n    </vertex>\n')
        out.write('  </vertices>\n'
                  '  <edges>\n')
        for edge in edges:
            for k in ('ID', 'type', 'label', 'sourceID', 'targetID'):
                if k not in edge:
                    raise ValueError("Edge is missing tag '%s': %r" % (
                                     k, edge))
            if 'value' not in edge:
                edge['value'] = ''
            out.write('    <edge>\n      ' +
                      '\n      '.join('<{k}>{v}</{k}>'.format(k=k,
                                                              v=xml_escape(v))
                                      for k, v in iteritems(edge)) +
                      '\n    </edge>\n')
        out.write('  </edges>\n'
                  '</provenancedata>\n')

    conn.close()
Exemplo n.º 9
0
def run_from_vistrails():
    setup_logging('REPROUNZIP-VISTRAILS', logging.INFO)

    parser = argparse.ArgumentParser()
    parser.add_argument('unpacker')
    parser.add_argument('directory')
    parser.add_argument('run')
    parser.add_argument('--input-file', action='append', default=[])
    parser.add_argument('--output-file', action='append', default=[])
    parser.add_argument('--cmdline', action='store')

    args = parser.parse_args()

    runs, packages, other_files = load_config(Path(args.directory) /
                                              'config.yml',
                                              canonical=True)
    run = runs[int(args.run)]

    python = sys.executable
    rpuz = [python, '-m', 'reprounzip.main', args.unpacker]

    os.environ['REPROUNZIP_NON_INTERACTIVE'] = 'y'

    def cmd(lst, add=None):
        if add:
            logging.info("cmd: %s %s", ' '.join(lst), add)
            string = ' '.join(shell_escape(a) for a in (rpuz + lst))
            string += ' ' + add
            subprocess.check_call(string, shell=True, cwd=args.directory)
        else:
            logging.info("cmd: %s", ' '.join(lst))
            subprocess.check_call(rpuz + lst, cwd=args.directory)

    logging.info("reprounzip-vistrails calling reprounzip; dir=%s",
                 args.directory)

    # Parses input files from the command-line
    upload_command = []
    seen_input_names = set()
    for input_file in args.input_file:
        input_name, filename = input_file.split(':', 1)
        upload_command.append('%s:%s' % (filename, input_name))
        seen_input_names.add(input_name)

    # Resets the input files that were not given
    for input_name in run['input_files']:
        if input_name not in seen_input_names:
            upload_command.append(':%s' % input_name)

    # Runs the command
    cmd(['upload', '.'] + upload_command)

    # Runs the experiment
    if args.cmdline:
        cmd(['run', '.', '--cmdline'], add=args.cmdline)
    else:
        cmd(['run', '.'])

    # Gets output files
    for output_file in args.output_file:
        output_name, filename = output_file.split(':', 1)
        cmd(['download', '.', '%s:%s' % (output_name, filename)])
Exemplo n.º 10
0
def docker_setup_create(args):
    """Sets up the experiment to be run in a Docker-built container.
    """
    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    target.mkdir()

    try:
        # Unpacks configuration file
        rpz_pack = RPZPack(pack)
        rpz_pack.extract_config(target / 'config.yml')

        # Loads config
        runs, packages, other_files = config = load_config(
            target / 'config.yml', True)

        if args.base_image:
            record_usage(docker_explicit_base=True)
            base_image = args.base_image[0]
            if args.distribution:
                target_distribution = args.distribution[0]
            else:
                target_distribution = None
        else:
            target_distribution, base_image = select_image(runs)
        logging.info("Using base image %s", base_image)
        logging.debug("Distribution: %s", target_distribution or "unknown")

        rpz_pack.copy_data_tar(target / 'data.tgz')

        arch = runs[0]['architecture']

        # Writes Dockerfile
        logging.info("Writing %s...", target / 'Dockerfile')
        with (target / 'Dockerfile').open('w', encoding='utf-8',
                                          newline='\n') as fp:
            fp.write('FROM %s\n\n' % base_image)

            # Installs busybox
            download_file(busybox_url(arch),
                          target / 'busybox',
                          'busybox-%s' % arch)
            fp.write('COPY busybox /busybox\n')

            # Installs rpzsudo
            download_file(sudo_url(arch),
                          target / 'rpzsudo',
                          'rpzsudo-%s' % arch)
            fp.write('COPY rpzsudo /rpzsudo\n\n')

            fp.write('COPY data.tgz /reprozip_data.tgz\n\n')
            fp.write('COPY rpz-files.list /rpz-files.list\n')
            fp.write('RUN \\\n'
                     '    chmod +x /busybox /rpzsudo && \\\n')

            if args.install_pkgs:
                # Install every package through package manager
                missing_packages = []
            else:
                # Only install packages that were not packed
                missing_packages = [pkg for pkg in packages if pkg.packfiles]
                packages = [pkg for pkg in packages if not pkg.packfiles]
            if packages:
                record_usage(docker_install_pkgs=True)
                try:
                    installer = select_installer(pack, runs,
                                                 target_distribution)
                except CantFindInstaller as e:
                    logging.error("Need to install %d packages but couldn't "
                                  "select a package installer: %s",
                                  len(packages), e)
                    sys.exit(1)
                # Updates package sources
                update_script = installer.update_script()
                if update_script:
                    fp.write('    %s && \\\n' % update_script)
                # Installs necessary packages
                fp.write('    %s && \\\n' % installer.install_script(packages))
                logging.info("Dockerfile will install the %d software "
                             "packages that were not packed", len(packages))
            else:
                record_usage(docker_install_pkgs=False)

            # Untar
            paths = set()
            pathlist = []
            # Add intermediate directories, and check for existence in the tar
            logging.info("Generating file list...")
            missing_files = chain.from_iterable(pkg.files
                                                for pkg in missing_packages)
            data_files = rpz_pack.data_filenames()
            listoffiles = list(chain(other_files, missing_files))
            for f in listoffiles:
                if f.path.name == 'resolv.conf' and (
                        f.path.lies_under('/etc') or
                        f.path.lies_under('/run') or
                        f.path.lies_under('/var')):
                    continue
                path = PosixPath('/')
                for c in rpz_pack.remove_data_prefix(f.path).components:
                    path = path / c
                    if path in paths:
                        continue
                    paths.add(path)
                    if path in data_files:
                        pathlist.append(path)
                    else:
                        logging.info("Missing file %s", path)
            rpz_pack.close()
            # FIXME : for some reason we need reversed() here, I'm not sure why
            # Need to read more of tar's docs.
            # TAR bug: --no-overwrite-dir removes --keep-old-files
            with (target / 'rpz-files.list').open('wb') as lfp:
                for p in reversed(pathlist):
                    lfp.write(join_root(rpz_pack.data_prefix, p).path)
                    lfp.write(b'\0')
            fp.write('    cd / && '
                     '(tar zpxf /reprozip_data.tgz -U --recursive-unlink '
                     '--numeric-owner --strip=1 --null -T /rpz-files.list || '
                     '/busybox echo "TAR reports errors, this might or might '
                     'not prevent the execution to run")\n')

            # Setup entry point
            fp.write('COPY rpz_entrypoint.sh /rpz_entrypoint.sh\n'
                     'ENTRYPOINT ["/busybox", "sh", "/rpz_entrypoint.sh"]\n')

        # Write entry point script
        logging.info("Writing %s...", target / 'rpz_entrypoint.sh')
        with (target / 'rpz_entrypoint.sh').open('w', encoding='utf-8',
                                                 newline='\n') as fp:
            # The entrypoint gets some arguments from the run command
            # By default, it just does all the runs
            # "run N" executes the run with that number
            # "cmd STR" sets a replacement command-line for the next run
            # "do STR" executes a command as-is
            fp.write(
                '#!/bin/sh\n'
                '\n'
                'COMMAND=\n'
                'ENVVARS=\n'
                '\n'
                'if [ $# = 0 ]; then\n'
                '    exec /busybox sh /rpz_entrypoint.sh')
            for nb in irange(len(runs)):
                fp.write(' run %d' % nb)
            fp.write(
                '\n'
                'fi\n'
                '\n'
                'while [ $# != 0 ]; do\n'
                '    case "$1" in\n'
                '        help)\n'
                '            echo "Image built from reprounzip-docker" >&2\n'
                '            echo "Usage: docker run <image> [cmd word [word '
                '...]] [run <R>]" >&2\n'
                '            echo "    \\`cmd ...\\` changes the command for '
                'the next \\`run\\` option" >&2\n'
                '            echo "    \\`run <name|number>\\` runs the '
                'specified run" >&2\n'
                '            echo "By default, all the runs are executed." '
                '>&2\n'
                '            echo "The runs in this image are:" >&2\n')
            for run in runs:
                fp.write(
                    '            echo "    {name}: {cmdline}" >&2\n'.format(
                        name=run['id'],
                        cmdline=' '.join(shell_escape(a)
                                         for a in run['argv'])))
            fp.write(
                '            exit 0\n'
                '        ;;\n'
                '        do)\n'
                '            shift\n'
                '            $1\n'
                '        ;;\n'
                '        env)\n'
                '            shift\n'
                '            ENVVARS="$1"\n'
                '        ;;\n'
                '        cmd)\n'
                '            shift\n'
                '            COMMAND="$1"\n'
                '        ;;\n'
                '        run)\n'
                '            shift\n'
                '            case "$1" in\n')
            for i, run in enumerate(runs):
                cmdline = ' '.join([run['binary']] + run['argv'][1:])
                fp.write(
                    '                {name})\n'
                    '                    RUNCOMMAND={cmd}\n'
                    '                    RUNWD={wd}\n'
                    '                    RUNENV={env}\n'
                    '                    RUNUID={uid}\n'
                    '                    RUNGID={gid}\n'
                    '                ;;\n'.format(
                        name='%s|%d' % (run['id'], i),
                        cmd=shell_escape(cmdline),
                        wd=shell_escape(run['workingdir']),
                        env=shell_escape(' '.join(
                            '%s=%s' % (shell_escape(k), shell_escape(v))
                            for k, v in iteritems(run['environ']))),
                        uid=run.get('uid', 1000),
                        gid=run.get('gid', 1000)))
            fp.write(
                '                *)\n'
                '                    echo "RPZ: Unknown run $1" >&2\n'
                '                    exit 1\n'
                '                ;;\n'
                '            esac\n'
                '            if [ -n "$COMMAND" ]; then\n'
                '                RUNCOMMAND="$COMMAND"\n'
                '                COMMAND=\n'
                '            fi\n'
                '            export RUNWD; export RUNENV; export ENVVARS; '
                'export RUNCOMMAND\n'
                '            /rpzsudo "#$RUNUID" "#$RUNGID" /busybox sh -c '
                '"cd \\"\\$RUNWD\\" && /busybox env -i $RUNENV $ENVVARS '
                '$RUNCOMMAND"\n'
                '            ENVVARS=\n'
                '        ;;\n'
                '        *)\n'
                '            echo "RPZ: Unknown option $1" >&2\n'
                '            exit 1\n'
                '        ;;\n'
                '    esac\n'
                '    shift\n'
                'done\n')

        # Meta-data for reprounzip
        write_dict(target, metadata_initial_iofiles(config))

        signals.post_setup(target=target, pack=pack)
    except Exception:
        target.rmtree(ignore_errors=True)
        raise
Exemplo n.º 11
0
def vagrant_run(args):
    """Runs the experiment in the virtual machine.
    """
    target = Path(args.target[0])
    unpacked_info = read_dict(target)
    use_chroot = unpacked_info['use_chroot']
    cmdline = args.cmdline

    check_vagrant_version()

    # Loads config
    config = load_config(target / 'config.yml', True)
    runs = config.runs

    selected_runs = get_runs(runs, args.run, cmdline)

    hostname = runs[selected_runs[0]].get('hostname', 'reprounzip')

    # Port forwarding
    ports = parse_ports(args.expose_port)

    # If the requested ports are not a subset of the ones already set on the
    # VM, we have to update the Vagrantfile and issue `vagrant reload`, which
    # will reboot the machine
    req_ports = set(ports)
    set_ports = set(unpacked_info.get('ports', []))
    if not req_ports.issubset(set_ports):
        # Build new set of forwarded ports: the ones already set + the one just
        # requested
        # The ones we request now override the previous config
        all_ports = dict(
            (host, (guest, proto)) for host, guest, proto in set_ports)
        for host, guest, proto in req_ports:
            all_ports[host] = guest, proto
        unpacked_info['ports'] = sorted(
            (host, guest, proto)
            for host, (guest, proto) in iteritems(all_ports))

        write_vagrantfile(target, unpacked_info)
        logger.info("Some requested ports are not yet forwarded, running "
                    "'vagrant reload'")
        retcode = subprocess.call(['vagrant', 'reload', '--no-provision'],
                                  cwd=target.path)
        if retcode != 0:
            logger.critical("vagrant reload failed with code %d, aborting",
                            retcode)
            sys.exit(1)
        write_dict(target, unpacked_info)

    # X11 handler
    if unpacked_info['gui']:
        x11 = LocalX11Handler()
    else:
        x11 = X11Handler(args.x11, ('local', hostname), args.x11_display)

    cmds = []
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(run['workingdir'])
        if use_chroot:
            cmd += '/busybox env -i '
        else:
            cmd += '/usr/bin/env -i '
        environ = x11.fix_env(run['environ'])
        environ = fixup_environment(environ, args)
        cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v))
                        for k, v in iteritems(environ))
        cmd += ' '
        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = [run['binary']] + run['argv'][1:]
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        uid = run.get('uid', 1000)
        gid = run.get('gid', 1000)
        if use_chroot:
            userspec = '%s:%s' % (uid, gid)
            cmd = ('chroot --userspec=%s /experimentroot '
                   '/bin/sh -c %s' % (userspec, shell_escape(cmd)))
        else:
            cmd = 'sudo -u \'#%d\' sh -c %s' % (uid, shell_escape(cmd))
        cmds.append(cmd)
    if use_chroot:
        cmds = [
            'chroot /experimentroot /bin/sh -c %s' % shell_escape(c)
            for c in x11.init_cmds
        ] + cmds
    else:
        cmds = x11.init_cmds + cmds
    cmds = ' && '.join(cmds)
    # Sets the hostname to the original experiment's machine's
    # FIXME: not reentrant: this restores the Vagrant machine's hostname after
    # the run, which might cause issues if several "reprounzip vagrant run" are
    # running at once
    cmds = ('OLD_HOSTNAME=$(/bin/hostname); /bin/hostname %s; ' % hostname +
            cmds + '; RES=$?; /bin/hostname "$OLD_HOSTNAME"; exit $RES')
    cmds = '/usr/bin/sudo /bin/sh -c %s' % shell_escape(cmds)

    # Gets vagrant SSH parameters
    info = machine_setup(target)

    signals.pre_run(target=target)

    interactive = not (args.no_stdin
                       or os.environ.get('REPROUNZIP_NON_INTERACTIVE'))
    retcode = run_interactive(info, interactive, cmds, not args.no_pty,
                              x11.port_forward)
    stderr.write("\r\n*** Command finished, status: %d\r\n" % retcode)

    # Update input file status
    metadata_update_run(config, unpacked_info, selected_runs)
    write_dict(target, unpacked_info)

    signals.post_run(target=target, retcode=retcode)
Exemplo n.º 12
0
def generate(target, configfile, database, all_forks=False, graph_format='dot',
             level_pkgs='file', level_processes='thread',
             level_other_files='all',
             regex_filters=None, regex_replaces=None, aggregates=None):
    """Main function for the graph subcommand.
    """
    try:
        graph_format = {'dot': FORMAT_DOT, 'DOT': FORMAT_DOT,
                        'json': FORMAT_JSON, 'JSON': FORMAT_JSON}[graph_format]
    except KeyError:
        logging.critical("Unknown output format %r", graph_format)
        sys.exit(1)

    level_pkgs, level_processes, level_other_files, file_depth = \
        parse_levels(level_pkgs, level_processes, level_other_files)

    # Reads package ownership from the configuration
    if not configfile.is_file():
        logging.critical("Configuration file does not exist!\n"
                         "Did you forget to run 'reprozip trace'?\n"
                         "If not, you might want to use --dir to specify an "
                         "alternate location.")
        sys.exit(1)
    config = load_config(configfile, canonical=False)
    inputs_outputs = dict((f.path, n)
                          for n, f in iteritems(config.inputs_outputs))
    has_thread_flag = config.format_version >= LooseVersion('0.7')

    runs, files, edges = read_events(database, all_forks,
                                     has_thread_flag)

    # Label the runs
    if len(runs) != len(config.runs):
        logging.warning("Configuration file doesn't list the same number of "
                        "runs we found in the database!")
    else:
        for config_run, run in izip(config.runs, runs):
            run.name = config_run['id']

    # Apply regexes
    ignore = [lambda path, r=re.compile(p): r.search(path) is not None
              for p in regex_filters or []]
    replace = [lambda path, r=re.compile(p): r.sub(repl, path)
               for p, repl in regex_replaces or []]

    def filefilter(path):
        pathuni = unicode_(path)
        if any(f(pathuni) for f in ignore):
            logging.debug("IGN %s", pathuni)
            return None
        if not (replace or aggregates):
            return path
        for fi in replace:
            pathuni_ = fi(pathuni)
            if pathuni_ != pathuni:
                logging.debug("SUB %s -> %s", pathuni, pathuni_)
            pathuni = pathuni_
        for prefix in aggregates or []:
            if pathuni.startswith(prefix):
                logging.debug("AGG %s -> %s", pathuni, prefix)
                pathuni = prefix
                break
        return PosixPath(pathuni)

    files_new = set()
    for fi in files:
        fi = filefilter(fi)
        if fi is not None:
            files_new.add(fi)
    files = files_new

    edges_new = OrderedSet()
    for prog, fi, mode, argv in edges:
        fi = filefilter(fi)
        if fi is not None:
            edges_new.add((prog, fi, mode, argv))
    edges = edges_new

    # Puts files in packages
    package_map = {}
    if level_pkgs == LVL_PKG_IGNORE:
        packages = []
        other_files = files
    else:
        logging.info("Organizes packages...")
        file2package = dict((f.path, pkg)
                            for pkg in config.packages for f in pkg.files)
        packages = {}
        other_files = []
        for fi in files:
            pkg = file2package.get(fi)
            if pkg is not None:
                package = packages.get(pkg.name)
                if package is None:
                    package = Package(pkg.name, pkg.version)
                    packages[pkg.name] = package
                package.files.add(fi)
                package_map[fi] = package
            else:
                other_files.append(fi)
        packages = sorted(itervalues(packages), key=lambda pkg: pkg.name)
        for i, pkg in enumerate(packages):
            pkg.id = i

    # Filter other files
    if level_other_files == LVL_OTHER_ALL and file_depth is not None:
        other_files = set(PosixPath(*f.components[:file_depth + 1])
                          for f in other_files)
        edges = OrderedSet((prog,
                            f if f in package_map
                            else PosixPath(*f.components[:file_depth + 1]),
                            mode,
                            argv)
                           for prog, f, mode, argv in edges)
    else:
        if level_other_files == LVL_OTHER_IO:
            other_files = set(f for f in other_files if f in inputs_outputs)
            edges = [(prog, f, mode, argv)
                     for prog, f, mode, argv in edges
                     if f in package_map or f in other_files]
        elif level_other_files == LVL_OTHER_NO:
            other_files = set()
            edges = [(prog, f, mode, argv)
                     for prog, f, mode, argv in edges
                     if f in package_map]

    args = (target, runs, packages, other_files, package_map, edges,
            inputs_outputs, level_pkgs, level_processes, level_other_files)
    if graph_format == FORMAT_DOT:
        graph_dot(*args)
    elif graph_format == FORMAT_JSON:
        graph_json(*args)
    else:
        assert False
Exemplo n.º 13
0
def docker_run(args):
    """Runs the experiment in the container.
    """
    target = Path(args.target[0])
    unpacked_info = read_dict(target)
    cmdline = args.cmdline

    # Sanity check
    if args.detach and args.x11:
        logging.critical("Error: Can't use X11 forwarding if you're detaching")
        raise UsageError

    # Loads config
    config = load_config(target / 'config.yml', True)
    runs = config.runs

    selected_runs = get_runs(runs, args.run, cmdline)

    # Get current image name
    if 'current_image' in unpacked_info:
        image = unpacked_info['current_image']
        logging.debug("Running from image %s", image.decode('ascii'))
    else:
        logging.critical("Image doesn't exist yet, have you run setup/build?")
        sys.exit(1)

    # Name of new container
    if args.detach:
        container = make_unique_name(b'reprounzip_detached_')
    else:
        container = make_unique_name(b'reprounzip_run_')

    hostname = runs[selected_runs[0]].get('hostname', 'reprounzip')

    # X11 handler
    if args.x11:
        local_ip = get_local_addr()

        docker_host = local_ip
        if os.environ.get('DOCKER_HOST'):
            m = _dockerhost_re.match(os.environ['DOCKER_HOST'])
            if m is not None:
                docker_host = m.group(1)

        if args.tunneled_x11:
            x11 = X11Handler(True, ('internet', docker_host), args.x11_display)
        else:
            x11 = X11Handler(True, ('internet', local_ip), args.x11_display)

            if (docker_host != local_ip and docker_host != 'localhost' and
                    not docker_host.startswith('127.') and
                    not docker_host.startswith('192.168.99.')):
                ssh_cmdline = ' '.join(
                    '-R*:%(p)d:127.0.0.1:%(p)d' % {'p': port}
                    for port, connector in x11.port_forward)
                logging.warning(
                    "You requested X11 forwarding but the Docker container "
                    "appears to be running remotely. It is probable that it "
                    "won't be able to connect to the local display. Creating "
                    "a remote SSH tunnel and running with --tunneled-x11 "
                    "might help (%s).",
                    ssh_cmdline)
    else:
        x11 = X11Handler(False, ('local', hostname), args.x11_display)

    cmds = []
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(run['workingdir'])
        cmd += '/busybox env -i '
        environ = x11.fix_env(run['environ'])
        environ = fixup_environment(environ, args)
        cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v))
                        for k, v in iteritems(environ))
        cmd += ' '
        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = [run['binary']] + run['argv'][1:]
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        uid = run.get('uid', 1000)
        gid = run.get('gid', 1000)
        cmd = '/rpzsudo \'#%d\' \'#%d\' /busybox sh -c %s' % (
            uid, gid,
            shell_escape(cmd))
        cmds.append(cmd)
    cmds = x11.init_cmds + cmds
    cmds = ' && '.join(cmds)

    signals.pre_run(target=target)

    # Creates forwarders
    forwarders = []
    for port, connector in x11.port_forward:
        forwarders.append(LocalForwarder(connector, port))

    if args.detach:
        logging.info("Start container %s (detached)",
                     container.decode('ascii'))
        retcode = interruptible_call(['docker', 'run', b'--name=' + container,
                                      '-h', hostname,
                                      '-d', '-t'] +
                                     args.docker_option +
                                     [image, '/busybox', 'sh', '-c', cmds])
        if retcode != 0:
            logging.critical("docker run failed with code %d", retcode)
            subprocess.call(['docker', 'rm', '-f', container])
            sys.exit(1)
        return

    # Run command in container
    logging.info("Starting container %s", container.decode('ascii'))
    retcode = interruptible_call(['docker', 'run', b'--name=' + container,
                                  '-h', hostname,
                                  '-i', '-t'] +
                                 args.docker_option +
                                 [image, '/busybox', 'sh', '-c', cmds])
    if retcode != 0:
        logging.critical("docker run failed with code %d", retcode)
        subprocess.call(['docker', 'rm', '-f', container])
        sys.exit(1)

    # Get exit status from "docker inspect"
    out = subprocess.check_output(['docker', 'inspect', container])
    outjson = json.loads(out.decode('ascii'))
    if (outjson[0]["State"]["Running"] is not False or
            outjson[0]["State"]["Paused"] is not False):
        logging.error("Invalid container state after execution:\n%s",
                      json.dumps(outjson[0]["State"]))
    retcode = outjson[0]["State"]["ExitCode"]
    stderr.write("\n*** Command finished, status: %d\n" % retcode)

    # Commit to create new image
    new_image = make_unique_name(b'reprounzip_image_')
    logging.info("Committing container %s to image %s",
                 container.decode('ascii'), new_image.decode('ascii'))
    subprocess.check_call(['docker', 'commit', container, new_image])

    # Update image name
    unpacked_info['current_image'] = new_image
    write_dict(target, unpacked_info)

    # Remove the container
    logging.info("Destroying container %s", container.decode('ascii'))
    retcode = subprocess.call(['docker', 'rm', container])
    if retcode != 0:
        logging.error("Error deleting container %s", container.decode('ascii'))

    # Untag previous image, unless it is the initial_image
    if image != unpacked_info['initial_image']:
        logging.info("Untagging previous image %s", image.decode('ascii'))
        subprocess.check_call(['docker', 'rmi', image])

    # Update input file status
    metadata_update_run(config, unpacked_info, selected_runs)
    write_dict(target, unpacked_info)

    signals.post_run(target=target, retcode=retcode)
Exemplo n.º 14
0
def vagrant_setup_create(args):
    """Sets up the experiment to be run in a Vagrant-built virtual machine.

    This can either build a chroot or not.

    If building a chroot, we do just like without Vagrant: we copy all the
    files and only get what's missing from the host. But we do install
    automatically the packages whose files are required.

    If not building a chroot, we install all the packages, and only unpack
    files that don't come from packages.

    In short: files from packages with packfiles=True will only be used if
    building a chroot.
    """
    if not args.pack:
        logging.critical("setup/create needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)
    use_chroot = args.use_chroot
    mount_bind = args.bind_magic_dirs
    record_usage(use_chroot=use_chroot,
                 mount_bind=mount_bind)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    tar = tarfile.open(str(pack), 'r:*')
    member = tar.getmember('METADATA/config.yml')
    member.name = 'config.yml'
    tar.extract(member, str(target))
    tar.close()

    # Loads config
    runs, packages, other_files = load_config(target / 'config.yml', True)

    if args.base_image and args.base_image[0]:
        record_usage(vagrant_explicit_image=True)
        box = args.base_image[0]
        if args.distribution:
            target_distribution = args.distribution[0]
        else:
            target_distribution = None
    else:
        target_distribution, box = select_box(runs)
    logging.info("Using box %s", box)
    logging.debug("Distribution: %s", target_distribution or "unknown")

    # If using chroot, we might still need to install packages to get missing
    # (not packed) files
    if use_chroot:
        packages = [pkg for pkg in packages if not pkg.packfiles]
        if packages:
            record_usage(vagrant_install_pkgs=True)
            logging.info("Some packages were not packed, so we'll install and "
                         "copy their files\n"
                         "Packages that are missing:\n%s",
                         ' '.join(pkg.name for pkg in packages))

    if packages:
        try:
            installer = select_installer(pack, runs, target_distribution)
        except CantFindInstaller as e:
            logging.error("Need to install %d packages but couldn't select a "
                          "package installer: %s",
                          len(packages), e)

    target.mkdir(parents=True)

    # Writes setup script
    logging.info("Writing setup script %s...", target / 'setup.sh')
    with (target / 'setup.sh').open('w', encoding='utf-8', newline='\n') as fp:
        fp.write('#!/bin/sh\n\nset -e\n\n')
        if packages:
            # Updates package sources
            fp.write(installer.update_script())
            fp.write('\n')
            # Installs necessary packages
            fp.write(installer.install_script(packages))
            fp.write('\n')
            # TODO : Compare package versions (painful because of sh)

        # Untar
        if use_chroot:
            fp.write('\n'
                     'mkdir /experimentroot; cd /experimentroot\n')
            fp.write('tar zpxf /vagrant/experiment.rpz '
                     '--numeric-owner --strip=1 DATA\n')
            if mount_bind:
                fp.write('\n'
                         'mkdir -p /experimentroot/dev\n'
                         'mount -o rbind /dev /experimentroot/dev\n'
                         'mkdir -p /experimentroot/proc\n'
                         'mount -o rbind /proc /experimentroot/proc\n')

            for pkg in packages:
                fp.write('\n# Copies files from package %s\n' % pkg.name)
                for f in pkg.files:
                    f = f.path
                    dest = join_root(PosixPath('/experimentroot'), f)
                    fp.write('mkdir -p %s\n' %
                             shell_escape(unicode_(f.parent)))
                    fp.write('cp -L %s %s\n' % (
                             shell_escape(unicode_(f)),
                             shell_escape(unicode_(dest))))
        else:
            fp.write('\ncd /\n')
            paths = set()
            pathlist = []
            dataroot = PosixPath('DATA')
            # Adds intermediate directories, and checks for existence in the
            # tar
            tar = tarfile.open(str(pack), 'r:*')
            for f in other_files:
                path = PosixPath('/')
                for c in f.path.components[1:]:
                    path = path / c
                    if path in paths:
                        continue
                    paths.add(path)
                    datapath = join_root(dataroot, path)
                    try:
                        tar.getmember(str(datapath))
                    except KeyError:
                        logging.info("Missing file %s", datapath)
                    else:
                        pathlist.append(unicode_(datapath))
            tar.close()
            # FIXME : for some reason we need reversed() here, I'm not sure
            # why. Need to read more of tar's docs.
            # TAR bug: --no-overwrite-dir removes --keep-old-files
            # TAR bug: there is no way to make --keep-old-files not report an
            # error if an existing file is encountered. --skip-old-files was
            # introduced too recently. Instead, we just ignore the exit status
            fp.write('tar zpxf /vagrant/experiment.rpz --keep-old-files '
                     '--numeric-owner --strip=1 %s || /bin/true\n' %
                     ' '.join(shell_escape(p) for p in reversed(pathlist)))

        # Copies /bin/sh + dependencies
        if use_chroot:
            url = busybox_url(runs[0]['architecture'])
            fp.write(r'''
mkdir -p /experimentroot/bin
mkdir -p /experimentroot/usr/bin
if [ ! -e /experimentroot/bin/sh -o ! -e /experimentroot/usr/bin/env ]; then
    wget --quiet -O /experimentroot/bin/busybox {url}
    chmod +x /experimentroot/bin/busybox
fi
[ -e /experimentroot/bin/sh ] || \
    ln -s /bin/busybox /experimentroot/bin/sh
[ -e /experimentroot/usr/bin/env ] || \
    ln -s /bin/busybox /experimentroot/usr/bin/env
'''.format(url=url))

    # Copies pack
    logging.info("Copying pack file...")
    pack.copyfile(target / 'experiment.rpz')

    # Writes Vagrant file
    logging.info("Writing %s...", target / 'Vagrantfile')
    with (target / 'Vagrantfile').open('w', encoding='utf-8',
                                       newline='\n') as fp:
        # Vagrant header and version
        fp.write('# -*- mode: ruby -*-\n'
                 '# vi: set ft=ruby\n\n'
                 'VAGRANTFILE_API_VERSION = "2"\n\n'
                 'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n')
        # Selects which box to install
        fp.write('  config.vm.box = "%s"\n' % box)
        # Run the setup script on the virtual machine
        fp.write('  config.vm.provision "shell", path: "setup.sh"\n')

        fp.write('end\n')

    # Meta-data for reprounzip
    write_dict(target / '.reprounzip', {'use_chroot': use_chroot})

    signals.post_setup(target=target)
Exemplo n.º 15
0
def docker_run(args):
    """Runs the experiment in the container.
    """
    target = Path(args.target[0])
    unpacked_info = read_dict(target / '.reprounzip')
    cmdline = args.cmdline

    # Loads config
    runs, packages, other_files = load_config(target / 'config.yml', True)

    selected_runs = get_runs(runs, args.run, cmdline)

    # Destroy previous container
    if 'ran_container' in unpacked_info:
        container = unpacked_info.pop('ran_container')
        logging.info("Destroying previous container %s",
                     container.decode('ascii'))
        retcode = subprocess.call(['docker', 'rm', '-f', container])
        if retcode != 0:
            logging.error("Error deleting previous container %s",
                          container.decode('ascii'))
        write_dict(target / '.reprounzip', unpacked_info)

    # Use the initial image directly
    if 'current_image' in unpacked_info:
        image = unpacked_info['current_image']
        logging.debug("Running from image %s", image.decode('ascii'))
    else:
        logging.critical("Image doesn't exist yet, have you run setup/build?")
        sys.exit(1)

    # Name of new container
    container = make_unique_name(b'reprounzip_run_')

    cmds = []
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(run['workingdir'])
        cmd += '/usr/bin/env -i '
        cmd += ' '.join('%s=%s' % (k, shell_escape(v))
                        for k, v in iteritems(run['environ']))
        cmd += ' '
        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = [run['binary']] + run['argv'][1:]
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        uid = run.get('uid', 1000)
        cmd = 'sudo -u \'#%d\' sh -c %s\n' % (uid, shell_escape(cmd))
        cmds.append(cmd)
    cmds = ' && '.join(cmds)

    signals.pre_run(target=target)

    # Run command in container
    logging.info("Starting container %s", container.decode('ascii'))
    retcode = subprocess.call(['docker', 'run', b'--name=' + container,
                               '-i', '-t', image,
                               '/bin/sh', '-c', cmds])
    sys.stderr.write("\n*** Command finished, status: %d\n" % retcode)

    # Store container name (so we can download output files)
    unpacked_info['ran_container'] = container
    write_dict(target / '.reprounzip', unpacked_info)

    signals.post_run(target=target, retcode=retcode)
Exemplo n.º 16
0
def vagrant_run(args):
    """Runs the experiment in the virtual machine.
    """
    target = Path(args.target[0])
    unpacked_info = read_dict(target)
    use_chroot = unpacked_info['use_chroot']
    cmdline = args.cmdline

    check_vagrant_version()

    # Loads config
    config = load_config(target / 'config.yml', True)
    runs = config.runs

    selected_runs = get_runs(runs, args.run, cmdline)

    hostname = runs[selected_runs[0]].get('hostname', 'reprounzip')

    # Port forwarding
    ports = parse_ports(args.expose_port)

    # If the requested ports are not a subset of the ones already set on the
    # VM, we have to update the Vagrantfile and issue `vagrant reload`, which
    # will reboot the machine
    req_ports = set(ports)
    set_ports = set(unpacked_info.get('ports', []))
    if not req_ports.issubset(set_ports):
        # Build new set of forwarded ports: the ones already set + the one just
        # requested
        # The ones we request now override the previous config
        all_ports = dict((host, (guest, proto))
                         for host, guest, proto in set_ports)
        for host, guest, proto in req_ports:
            all_ports[host] = guest, proto
        unpacked_info['ports'] = sorted(
            (host, guest, proto)
            for host, (guest, proto) in iteritems(all_ports))

        write_vagrantfile(target, unpacked_info)
        logger.info("Some requested ports are not yet forwarded, running "
                    "'vagrant reload'")
        retcode = subprocess.call(['vagrant', 'reload', '--no-provision'],
                                  cwd=target.path)
        if retcode != 0:
            logger.critical("vagrant reload failed with code %d, aborting",
                            retcode)
            sys.exit(1)
        write_dict(target, unpacked_info)

    # X11 handler
    if unpacked_info['gui']:
        x11 = LocalX11Handler()
    else:
        x11 = X11Handler(args.x11, ('local', hostname), args.x11_display)

    cmds = []
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(run['workingdir'])
        if use_chroot:
            cmd += '/busybox env -i '
        else:
            cmd += '/usr/bin/env -i '
        environ = x11.fix_env(run['environ'])
        environ = fixup_environment(environ, args)
        cmd += ' '.join('%s=%s' % (shell_escape(k), shell_escape(v))
                        for k, v in iteritems(environ))
        cmd += ' '
        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = [run['binary']] + run['argv'][1:]
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        uid = run.get('uid', 1000)
        gid = run.get('gid', 1000)
        if use_chroot:
            userspec = '%s:%s' % (uid, gid)
            cmd = ('chroot --userspec=%s /experimentroot '
                   '/bin/sh -c %s' % (
                       userspec,
                       shell_escape(cmd)))
        else:
            cmd = 'sudo -u \'#%d\' sh -c %s' % (uid, shell_escape(cmd))
        cmds.append(cmd)
    if use_chroot:
        cmds = ['chroot /experimentroot /bin/sh -c %s' % shell_escape(c)
                for c in x11.init_cmds] + cmds
    else:
        cmds = x11.init_cmds + cmds
    cmds = ' && '.join(cmds)
    # Sets the hostname to the original experiment's machine's
    # FIXME: not reentrant: this restores the Vagrant machine's hostname after
    # the run, which might cause issues if several "reprounzip vagrant run" are
    # running at once
    cmds = ('OLD_HOSTNAME=$(/bin/hostname); /bin/hostname %s; ' % hostname +
            cmds +
            '; RES=$?; /bin/hostname "$OLD_HOSTNAME"; exit $RES')
    cmds = '/usr/bin/sudo /bin/sh -c %s' % shell_escape(cmds)

    # Gets vagrant SSH parameters
    info = machine_setup(target)

    signals.pre_run(target=target)

    interactive = not (args.no_stdin or
                       os.environ.get('REPROUNZIP_NON_INTERACTIVE'))
    retcode = run_interactive(info, interactive,
                              cmds,
                              not args.no_pty,
                              x11.port_forward)
    stderr.write("\r\n*** Command finished, status: %d\r\n" % retcode)

    # Update input file status
    metadata_update_run(config, unpacked_info, selected_runs)
    write_dict(target, unpacked_info)

    signals.post_run(target=target, retcode=retcode)
Exemplo n.º 17
0
def docker_run(args):
    """Runs the experiment in the container.
    """
    target = Path(args.target[0])
    unpacked_info = read_dict(target / '.reprounzip')
    cmdline = args.cmdline

    # Loads config
    runs, packages, other_files = load_config(target / 'config.yml', True)

    selected_runs = get_runs(runs, args.run, cmdline)

    # Destroy previous container
    if 'ran_container' in unpacked_info:
        container = unpacked_info.pop('ran_container')
        logging.info("Destroying previous container %s",
                     container.decode('ascii'))
        retcode = subprocess.call(['docker', 'rm', '-f', container])
        if retcode != 0:
            logging.error("Error deleting previous container %s",
                          container.decode('ascii'))
        write_dict(target / '.reprounzip', unpacked_info)

    # Use the initial image directly
    if 'current_image' in unpacked_info:
        image = unpacked_info['current_image']
        logging.debug("Running from image %s", image.decode('ascii'))
    else:
        logging.critical("Image doesn't exist yet, have you run setup/build?")
        sys.exit(1)

    # Name of new container
    container = make_unique_name(b'reprounzip_run_')

    hostname = runs[selected_runs[0]].get('hostname', 'reprounzip')

    # Get the local bridge IP
    ip_str = get_iface_addr('docker0')

    # X11 handler
    x11 = X11Handler(args.x11, ('internet', ip_str), args.x11_display)

    cmds = []
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(run['workingdir'])
        cmd += '/usr/bin/env -i '
        environ = x11.fix_env(run['environ'])
        cmd += ' '.join('%s=%s' % (k, shell_escape(v))
                        for k, v in iteritems(environ))
        cmd += ' '
        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = [run['binary']] + run['argv'][1:]
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        uid = run.get('uid', 1000)
        cmd = 'sudo -u \'#%d\' /bin/busybox sh -c %s\n' % (uid,
                                                           shell_escape(cmd))
        cmds.append(cmd)
    cmds = x11.init_cmds + cmds
    cmds = ' && '.join(cmds)

    signals.pre_run(target=target)

    # Creates forwarders
    forwarders = []
    for port, connector in x11.port_forward:
        forwarders.append(
                LocalForwarder(connector, port))

    # Run command in container
    logging.info("Starting container %s", container.decode('ascii'))
    retcode = interruptible_call(['docker', 'run', b'--name=' + container,
                                  '-h', hostname,
                                  '-i', '-t', image,
                                  '/bin/busybox', 'sh', '-c', cmds])
    if retcode != 0:
        logging.critical("docker run failed with code %d", retcode)
        sys.exit(1)

    # Get exit status from "docker inspect"
    out = subprocess.check_output(['docker', 'inspect', container])
    outjson = json.loads(out.decode('ascii'))
    if (outjson[0]["State"]["Running"] is not False or
            outjson[0]["State"]["Paused"] is not False):
        logging.error("Invalid container state after execution:\n%s",
                      json.dumps(outjson[0]["State"]))
    retcode = outjson[0]["State"]["ExitCode"]
    sys.stderr.write("\n*** Command finished, status: %d\n" % retcode)

    # Store container name (so we can download output files)
    unpacked_info['ran_container'] = container
    write_dict(target / '.reprounzip', unpacked_info)

    signals.post_run(target=target, retcode=retcode)
Exemplo n.º 18
0
def vagrant_setup_create(args):
    """Sets up the experiment to be run in a Vagrant-built virtual machine.

    This can either build a chroot or not.

    If building a chroot, we do just like without Vagrant: we copy all the
    files and only get what's missing from the host. But we do install
    automatically the packages whose files are required.

    If not building a chroot, we install all the packages, and only unpack
    files that don't come from packages.

    In short: files from packages with packfiles=True will only be used if
    building a chroot.
    """
    if not args.pack:
        logging.critical("setup/create needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)
    use_chroot = args.use_chroot
    mount_bind = args.bind_magic_dirs
    record_usage(use_chroot=use_chroot,
                 mount_bind=mount_bind)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    rpz_pack = RPZPack(pack)
    rpz_pack.extract_config(target / 'config.yml')

    # Loads config
    runs, packages, other_files = config = load_config(target / 'config.yml',
                                                       True)

    if not args.memory:
        memory = None
    else:
        try:
            memory = int(args.memory[-1])
        except ValueError:
            logging.critical("Invalid value for memory size: %r", args.memory)
            sys.exit(1)

    if args.base_image and args.base_image[0]:
        record_usage(vagrant_explicit_image=True)
        box = args.base_image[0]
        if args.distribution:
            target_distribution = args.distribution[0]
        else:
            target_distribution = None
    else:
        target_distribution, box = select_box(runs, gui=args.gui)
    logging.info("Using box %s", box)
    logging.debug("Distribution: %s", target_distribution or "unknown")

    # If using chroot, we might still need to install packages to get missing
    # (not packed) files
    if use_chroot:
        packages = [pkg for pkg in packages if not pkg.packfiles]
        if packages:
            record_usage(vagrant_install_pkgs=True)
            logging.info("Some packages were not packed, so we'll install and "
                         "copy their files\n"
                         "Packages that are missing:\n%s",
                         ' '.join(pkg.name for pkg in packages))

    if packages:
        try:
            installer = select_installer(pack, runs, target_distribution)
        except CantFindInstaller as e:
            logging.error("Need to install %d packages but couldn't select a "
                          "package installer: %s",
                          len(packages), e)

    target.mkdir(parents=True)

    try:
        # Writes setup script
        logging.info("Writing setup script %s...", target / 'setup.sh')
        with (target / 'setup.sh').open('w', encoding='utf-8',
                                        newline='\n') as fp:
            fp.write('#!/bin/sh\n\nset -e\n\n')
            if packages:
                # Updates package sources
                update_script = installer.update_script()
                if update_script:
                    fp.write(update_script)
                fp.write('\n')
                # Installs necessary packages
                fp.write(installer.install_script(packages))
                fp.write('\n')
                # TODO : Compare package versions (painful because of sh)

            # Untar
            if use_chroot:
                fp.write('\n'
                         'mkdir /experimentroot; cd /experimentroot\n')
                fp.write('tar zpxf /vagrant/data.tgz --numeric-owner '
                         '--strip=1 %s\n' % rpz_pack.data_prefix)
                if mount_bind:
                    fp.write('\n'
                             'mkdir -p /experimentroot/dev\n'
                             'mkdir -p /experimentroot/proc\n')

                for pkg in packages:
                    fp.write('\n# Copies files from package %s\n' % pkg.name)
                    for f in pkg.files:
                        f = f.path
                        dest = join_root(PosixPath('/experimentroot'), f)
                        fp.write('mkdir -p %s\n' %
                                 shell_escape(unicode_(f.parent)))
                        fp.write('cp -L %s %s\n' % (
                                 shell_escape(unicode_(f)),
                                 shell_escape(unicode_(dest))))
                fp.write(
                    '\n'
                    'cp /etc/resolv.conf /experimentroot/etc/resolv.conf\n')
            else:
                fp.write('\ncd /\n')
                paths = set()
                pathlist = []
                # Adds intermediate directories, and checks for existence in
                # the tar
                logging.info("Generating file list...")
                data_files = rpz_pack.data_filenames()
                for f in other_files:
                    if f.path.name == 'resolv.conf' and (
                            f.path.lies_under('/etc') or
                            f.path.lies_under('/run') or
                            f.path.lies_under('/var')):
                        continue
                    path = PosixPath('/')
                    for c in rpz_pack.remove_data_prefix(f.path).components:
                        path = path / c
                        if path in paths:
                            continue
                        paths.add(path)
                        if path in data_files:
                            pathlist.append(path)
                        else:
                            logging.info("Missing file %s", path)
                # FIXME : for some reason we need reversed() here, I'm not sure
                # why. Need to read more of tar's docs.
                # TAR bug: --no-overwrite-dir removes --keep-old-files
                # TAR bug: there is no way to make --keep-old-files not report
                # an error if an existing file is encountered. --skip-old-files
                # was introduced too recently. Instead, we just ignore the exit
                # status
                with (target / 'rpz-files.list').open('wb') as lfp:
                    for p in reversed(pathlist):
                        lfp.write(join_root(rpz_pack.data_prefix, p).path)
                        lfp.write(b'\0')
                fp.write('tar zpxf /vagrant/data.tgz --keep-old-files '
                         '--numeric-owner --strip=1 '
                         '--null -T /vagrant/rpz-files.list || /bin/true\n')

            # Copies busybox
            if use_chroot:
                arch = runs[0]['architecture']
                download_file(busybox_url(arch),
                              target / 'busybox',
                              'busybox-%s' % arch)
                fp.write(r'''
cp /vagrant/busybox /experimentroot/busybox
chmod +x /experimentroot/busybox
mkdir -p /experimentroot/bin
[ -e /experimentroot/bin/sh ] || \
    ln -s /busybox /experimentroot/bin/sh
''')

        # Copies pack
        logging.info("Copying pack file...")
        rpz_pack.copy_data_tar(target / 'data.tgz')

        rpz_pack.close()

        # Writes Vagrant file
        logging.info("Writing %s...", target / 'Vagrantfile')
        with (target / 'Vagrantfile').open('w', encoding='utf-8',
                                           newline='\n') as fp:
            # Vagrant header and version
            fp.write(
                '# -*- mode: ruby -*-\n'
                '# vi: set ft=ruby\n\n'
                'VAGRANTFILE_API_VERSION = "2"\n\n'
                'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n')
            # Selects which box to install
            fp.write('  config.vm.box = "%s"\n' % box)
            # Run the setup script on the virtual machine
            fp.write('  config.vm.provision "shell", path: "setup.sh"\n')

            # Memory size
            if memory is not None or args.gui:
                fp.write('  config.vm.provider "virtualbox" do |v|\n')
                if memory is not None:
                    fp.write('    v.memory = %d\n' % memory)
                if args.gui:
                    fp.write('    v.gui = true\n')
                fp.write('  end\n')

            fp.write('end\n')

        # Meta-data for reprounzip
        write_dict(target,
                   metadata_initial_iofiles(config,
                                            {'use_chroot': use_chroot,
                                             'gui': args.gui}))

        signals.post_setup(target=target, pack=pack)
    except Exception:
        target.rmtree(ignore_errors=True)
        raise
Exemplo n.º 19
0
def docker_run(args):
    """Runs the experiment in the container.
    """
    target = Path(args.target[0])
    unpacked_info = read_dict(target)
    cmdline = args.cmdline

    # Sanity check
    if args.detach and args.x11:
        logging.critical("Error: Can't use X11 forwarding if you're detaching")
        raise UsageError

    # Loads config
    config = load_config(target / 'config.yml', True)
    runs = config.runs

    selected_runs = get_runs(runs, args.run, cmdline)

    # Get current image name
    if 'current_image' in unpacked_info:
        image = unpacked_info['current_image']
        logging.debug("Running from image %s", image.decode('ascii'))
    else:
        logging.critical("Image doesn't exist yet, have you run setup/build?")
        sys.exit(1)

    # Name of new container
    if args.detach:
        container = make_unique_name(b'reprounzip_detached_')
    else:
        container = make_unique_name(b'reprounzip_run_')

    hostname = runs[selected_runs[0]].get('hostname', 'reprounzip')

    # Port forwarding
    port_options = []
    for port_host, port_container, proto in parse_ports(args.expose_port):
        port_options.extend(['-p',
                             '%s:%s%s' % (port_host, port_container, proto)])

    # X11 handler
    if args.x11:
        local_ip = get_local_addr()

        docker_host = local_ip
        if os.environ.get('DOCKER_HOST'):
            m = _dockerhost_re.match(os.environ['DOCKER_HOST'])
            if m is not None:
                docker_host = m.group(1)

        if args.tunneled_x11:
            x11 = X11Handler(True, ('internet', docker_host), args.x11_display)
        else:
            x11 = X11Handler(True, ('internet', local_ip), args.x11_display)

            if (docker_host != local_ip and docker_host != 'localhost' and
                    not docker_host.startswith('127.') and
                    not docker_host.startswith('192.168.99.')):
                ssh_cmdline = ' '.join(
                    '-R*:%(p)d:127.0.0.1:%(p)d' % {'p': port}
                    for port, connector in x11.port_forward)
                logging.warning(
                    "You requested X11 forwarding but the Docker container "
                    "appears to be running remotely. It is probable that it "
                    "won't be able to connect to the local display. Creating "
                    "a remote SSH tunnel and running with --tunneled-x11 "
                    "might help (%s).",
                    ssh_cmdline)
    else:
        x11 = X11Handler(False, ('local', hostname), args.x11_display)

    cmd = []
    for run_number in selected_runs:
        run = runs[run_number]
        env_set, env_unset = x11.env_fixes(run['environ'])
        a_env_set, a_env_unset = parse_environment_args(args)
        env_set.update(a_env_set)
        env_unset.extend(a_env_unset)
        if env_set or env_unset:
            cmd.append('env')
            env = []
            for k in env_unset:
                env.append('-u')
                env.append(shell_escape(k))
            for k, v in iteritems(env_set):
                env.append('%s=%s' % (shell_escape(k), shell_escape(v)))
            cmd.append(' '.join(env))
        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is not None:
            cmd.append('cmd')
            cmd.append(' '.join(shell_escape(a) for a in cmdline))
        cmd.append('run')
        cmd.append('%d' % run_number)
    cmd = list(chain.from_iterable([['do', shell_escape(c)]
                                    for c in x11.init_cmds] +
                                   [cmd]))
    if logging.getLogger().isEnabledFor(logging.DEBUG):
        logging.debug("Passing arguments to Docker image:")
        for c in cmd:
            logging.debug(c)

    signals.pre_run(target=target)

    # Creates forwarders
    forwarders = []
    for port, connector in x11.port_forward:
        forwarders.append(LocalForwarder(connector, port))

    if args.detach:
        logging.info("Start container %s (detached)",
                     container.decode('ascii'))
        retcode = interruptible_call(args.docker_cmd.split() +
                                     ['run', b'--name=' + container,
                                      '-h', hostname,
                                      '-d', '-t'] +
                                     port_options +
                                     args.docker_option +
                                     [image] + cmd)
        if retcode != 0:
            logging.critical("docker run failed with code %d", retcode)
            subprocess.call(['docker', 'rm', '-f', container])
            sys.exit(1)
        return

    # Run command in container
    logging.info("Starting container %s", container.decode('ascii'))
    retcode = interruptible_call(args.docker_cmd.split() +
                                 ['run', b'--name=' + container,
                                  '-h', hostname,
                                  '-i', '-t'] +
                                 port_options +
                                 args.docker_option +
                                 [image] + cmd)
    if retcode != 0:
        logging.critical("docker run failed with code %d", retcode)
        subprocess.call(['docker', 'rm', '-f', container])
        sys.exit(1)

    # Get exit status from "docker inspect"
    out = subprocess.check_output(args.docker_cmd.split() +
                                  ['inspect', container])
    outjson = json.loads(out.decode('ascii'))
    if (outjson[0]["State"]["Running"] is not False or
            outjson[0]["State"]["Paused"] is not False):
        logging.error("Invalid container state after execution:\n%s",
                      json.dumps(outjson[0]["State"]))
    retcode = outjson[0]["State"]["ExitCode"]
    stderr.write("\n*** Command finished, status: %d\n" % retcode)

    # Commit to create new image
    new_image = make_unique_name(b'reprounzip_image_')
    logging.info("Committing container %s to image %s",
                 container.decode('ascii'), new_image.decode('ascii'))
    subprocess.check_call(args.docker_cmd.split() +
                          ['commit', container, new_image])

    # Update image name
    unpacked_info['current_image'] = new_image
    write_dict(target, unpacked_info)

    # Remove the container
    logging.info("Destroying container %s", container.decode('ascii'))
    retcode = subprocess.call(args.docker_cmd.split() + ['rm', container])
    if retcode != 0:
        logging.error("Error deleting container %s", container.decode('ascii'))

    # Untag previous image, unless it is the initial_image
    if image != unpacked_info['initial_image']:
        logging.info("Untagging previous image %s", image.decode('ascii'))
        subprocess.check_call(args.docker_cmd.split() + ['rmi', image])

    # Update input file status
    metadata_update_run(config, unpacked_info, selected_runs)
    write_dict(target, unpacked_info)

    signals.post_run(target=target, retcode=retcode)
Exemplo n.º 20
0
def vagrant_setup_create(args):
    """Sets up the experiment to be run in a Vagrant-built virtual machine.

    This can either build a chroot or not.

    If building a chroot, we do just like without Vagrant: we copy all the
    files and only get what's missing from the host. But we do install
    automatically the packages whose files are required.

    If not building a chroot, we install all the packages, and only unpack
    files that don't come from packages.

    In short: files from packages with packfiles=True will only be used if
    building a chroot.
    """
    if not args.pack:
        logging.critical("setup/create needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)
    use_chroot = args.use_chroot
    mount_bind = args.bind_magic_dirs
    record_usage(use_chroot=use_chroot, mount_bind=mount_bind)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    tar = tarfile.open(str(pack), 'r:*')
    member = tar.getmember('METADATA/config.yml')
    member.name = 'config.yml'
    tar.extract(member, str(target))
    tar.close()

    # Loads config
    runs, packages, other_files = load_config(target / 'config.yml', True)

    if args.base_image and args.base_image[0]:
        record_usage(vagrant_explicit_image=True)
        box = args.base_image[0]
        if args.distribution:
            target_distribution = args.distribution[0]
        else:
            target_distribution = None
    else:
        target_distribution, box = select_box(runs)
    logging.info("Using box %s", box)
    logging.debug("Distribution: %s", target_distribution or "unknown")

    # If using chroot, we might still need to install packages to get missing
    # (not packed) files
    if use_chroot:
        packages = [pkg for pkg in packages if not pkg.packfiles]
        if packages:
            record_usage(vagrant_install_pkgs=True)
            logging.info(
                "Some packages were not packed, so we'll install and "
                "copy their files\n"
                "Packages that are missing:\n%s",
                ' '.join(pkg.name for pkg in packages))

    if packages:
        try:
            installer = select_installer(pack, runs, target_distribution)
        except CantFindInstaller as e:
            logging.error(
                "Need to install %d packages but couldn't select a "
                "package installer: %s", len(packages), e)

    target.mkdir(parents=True)

    # Writes setup script
    logging.info("Writing setup script %s...", target / 'setup.sh')
    with (target / 'setup.sh').open('w', encoding='utf-8', newline='\n') as fp:
        fp.write('#!/bin/sh\n\nset -e\n\n')
        if packages:
            # Updates package sources
            fp.write(installer.update_script())
            fp.write('\n')
            # Installs necessary packages
            fp.write(installer.install_script(packages))
            fp.write('\n')
            # TODO : Compare package versions (painful because of sh)

        # Untar
        if use_chroot:
            fp.write('\n' 'mkdir /experimentroot; cd /experimentroot\n')
            fp.write('tar zpxf /vagrant/experiment.rpz '
                     '--numeric-owner --strip=1 DATA\n')
            if mount_bind:
                fp.write('\n'
                         'mkdir -p /experimentroot/dev\n'
                         'mount -o rbind /dev /experimentroot/dev\n'
                         'mkdir -p /experimentroot/proc\n'
                         'mount -o rbind /proc /experimentroot/proc\n')

            for pkg in packages:
                fp.write('\n# Copies files from package %s\n' % pkg.name)
                for f in pkg.files:
                    f = f.path
                    dest = join_root(PosixPath('/experimentroot'), f)
                    fp.write('mkdir -p %s\n' %
                             shell_escape(unicode_(f.parent)))
                    fp.write('cp -L %s %s\n' % (shell_escape(
                        unicode_(f)), shell_escape(unicode_(dest))))
        else:
            fp.write('\ncd /\n')
            paths = set()
            pathlist = []
            dataroot = PosixPath('DATA')
            # Adds intermediate directories, and checks for existence in the
            # tar
            tar = tarfile.open(str(pack), 'r:*')
            for f in other_files:
                path = PosixPath('/')
                for c in f.path.components[1:]:
                    path = path / c
                    if path in paths:
                        continue
                    paths.add(path)
                    datapath = join_root(dataroot, path)
                    try:
                        tar.getmember(str(datapath))
                    except KeyError:
                        logging.info("Missing file %s", datapath)
                    else:
                        pathlist.append(unicode_(datapath))
            tar.close()
            # FIXME : for some reason we need reversed() here, I'm not sure
            # why. Need to read more of tar's docs.
            # TAR bug: --no-overwrite-dir removes --keep-old-files
            # TAR bug: there is no way to make --keep-old-files not report an
            # error if an existing file is encountered. --skip-old-files was
            # introduced too recently. Instead, we just ignore the exit status
            fp.write('tar zpxf /vagrant/experiment.rpz --keep-old-files '
                     '--numeric-owner --strip=1 %s || /bin/true\n' %
                     ' '.join(shell_escape(p) for p in reversed(pathlist)))

        # Copies /bin/sh + dependencies
        if use_chroot:
            url = busybox_url(runs[0]['architecture'])
            fp.write(r'''
mkdir -p /experimentroot/bin
mkdir -p /experimentroot/usr/bin
if [ ! -e /experimentroot/bin/sh -o ! -e /experimentroot/usr/bin/env ]; then
    wget --quiet -O /experimentroot/bin/busybox {url}
    chmod +x /experimentroot/bin/busybox
fi
[ -e /experimentroot/bin/sh ] || \
    ln -s /bin/busybox /experimentroot/bin/sh
[ -e /experimentroot/usr/bin/env ] || \
    ln -s /bin/busybox /experimentroot/usr/bin/env
'''.format(url=url))

    # Copies pack
    logging.info("Copying pack file...")
    pack.copyfile(target / 'experiment.rpz')

    # Writes Vagrant file
    logging.info("Writing %s...", target / 'Vagrantfile')
    with (target / 'Vagrantfile').open('w', encoding='utf-8',
                                       newline='\n') as fp:
        # Vagrant header and version
        fp.write('# -*- mode: ruby -*-\n'
                 '# vi: set ft=ruby\n\n'
                 'VAGRANTFILE_API_VERSION = "2"\n\n'
                 'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n')
        # Selects which box to install
        fp.write('  config.vm.box = "%s"\n' % box)
        # Run the setup script on the virtual machine
        fp.write('  config.vm.provision "shell", path: "setup.sh"\n')

        fp.write('end\n')

    # Meta-data for reprounzip
    write_dict(target / '.reprounzip', {'use_chroot': use_chroot})

    signals.post_setup(target=target)
Exemplo n.º 21
0
def vagrant_run(args):
    """Runs the experiment in the virtual machine.
    """
    target = Path(args.target[0])
    use_chroot = read_dict(target / '.reprounzip').get('use_chroot', True)
    cmdline = args.cmdline

    check_vagrant_version()

    # Loads config
    runs, packages, other_files = load_config(target / 'config.yml', True)

    selected_runs = get_runs(runs, args.run, cmdline)

    hostname = runs[selected_runs[0]].get('hostname', 'reprounzip')

    # X11 handler
    x11 = X11Handler(args.x11, ('local', hostname), args.x11_display)

    cmds = []
    for run_number in selected_runs:
        run = runs[run_number]
        cmd = 'cd %s && ' % shell_escape(run['workingdir'])
        cmd += '/usr/bin/env -i '
        environ = x11.fix_env(run['environ'])
        cmd += ' '.join('%s=%s' % (k, shell_escape(v))
                        for k, v in iteritems(environ))
        cmd += ' '
        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is None:
            argv = [run['binary']] + run['argv'][1:]
        else:
            argv = cmdline
        cmd += ' '.join(shell_escape(a) for a in argv)
        uid = run.get('uid', 1000)
        gid = run.get('gid', 1000)
        if use_chroot:
            userspec = '%s:%s' % (uid, gid)
            cmd = ('chroot --userspec=%s /experimentroot '
                   '/bin/sh -c %s' % (userspec, shell_escape(cmd)))
        else:
            cmd = 'sudo -u \'#%d\' sh -c %s' % (uid, shell_escape(cmd))
        cmds.append(cmd)
    if use_chroot:
        cmds = [
            'chroot /experimentroot /bin/sh -c %s' % shell_escape(c)
            for c in x11.init_cmds
        ] + cmds
    else:
        cmds = x11.init_cmds + cmds
    cmds = ' && '.join(cmds)
    # Sets the hostname to the original experiment's machine's
    # FIXME: not reentrant: this restores the Vagrant machine's hostname after
    # the run, which might cause issues if several "reprounzip vagrant run" are
    # running at once
    cmds = ('OLD_HOSTNAME=$(/bin/hostname); /bin/hostname %s; ' % hostname +
            cmds + '; RES=$?; /bin/hostname "$OLD_HOSTNAME"; exit $RES')
    cmds = '/usr/bin/sudo /bin/sh -c %s' % shell_escape(cmds)

    # Gets vagrant SSH parameters
    info = get_ssh_parameters(target)

    signals.pre_run(target=target)

    interactive = not (args.no_stdin
                       or os.environ.get('REPROUNZIP_NON_INTERACTIVE'))
    retcode = run_interactive(info, interactive, cmds, not args.no_pty,
                              x11.port_forward)
    sys.stderr.write("\r\n*** Command finished, status: %d\r\n" % retcode)

    signals.post_run(target=target, retcode=retcode)
Exemplo n.º 22
0
def docker_setup_create(args):
    """Sets up the experiment to be run in a Docker-built container.
    """
    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logger.critical("Target directory exists")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    target.mkdir()

    try:
        # Unpacks configuration file
        rpz_pack = RPZPack(pack)
        rpz_pack.extract_config(target / 'config.yml')

        # Loads config
        runs, packages, other_files = config = load_config(
            target / 'config.yml', True)

        if args.base_image:
            record_usage(docker_explicit_base=True)
            base_image = args.base_image[0]
            if args.distribution:
                target_distribution = args.distribution[0]
            else:
                target_distribution = None
        else:
            target_distribution, base_image = select_image(runs)
        logger.info("Using base image %s", base_image)
        logger.debug("Distribution: %s", target_distribution or "unknown")

        rpz_pack.copy_data_tar(target / 'data.tgz')

        arch = runs[0]['architecture']

        # Writes Dockerfile
        logger.info("Writing %s...", target / 'Dockerfile')
        with (target / 'Dockerfile').open('w', encoding='utf-8',
                                          newline='\n') as fp:
            fp.write('FROM %s\n\n' % base_image)

            # Installs busybox
            download_file(busybox_url(arch),
                          target / 'busybox',
                          'busybox-%s' % arch)
            fp.write('COPY busybox /busybox\n')

            # Installs rpzsudo
            download_file(sudo_url(arch),
                          target / 'rpzsudo',
                          'rpzsudo-%s' % arch)
            fp.write('COPY rpzsudo /rpzsudo\n\n')

            fp.write('COPY data.tgz /reprozip_data.tgz\n\n')
            fp.write('COPY rpz-files.list /rpz-files.list\n')
            fp.write('RUN \\\n'
                     '    chmod +x /busybox /rpzsudo && \\\n')

            if args.install_pkgs:
                # Install every package through package manager
                missing_packages = []
            else:
                # Only install packages that were not packed
                missing_packages = [pkg for pkg in packages if pkg.packfiles]
                packages = [pkg for pkg in packages if not pkg.packfiles]
            if packages:
                record_usage(docker_install_pkgs=True)
                try:
                    installer = select_installer(pack, runs,
                                                 target_distribution)
                except CantFindInstaller as e:
                    logger.error("Need to install %d packages but couldn't "
                                 "select a package installer: %s",
                                 len(packages), e)
                    sys.exit(1)
                # Updates package sources
                update_script = installer.update_script()
                if update_script:
                    fp.write('    %s && \\\n' % update_script)
                # Installs necessary packages
                fp.write('    %s && \\\n' % installer.install_script(packages))
                logger.info("Dockerfile will install the %d software "
                            "packages that were not packed", len(packages))
            else:
                record_usage(docker_install_pkgs=False)

            # Untar
            paths = set()
            pathlist = []
            # Add intermediate directories, and check for existence in the tar
            logger.info("Generating file list...")
            missing_files = chain.from_iterable(pkg.files
                                                for pkg in missing_packages)
            data_files = rpz_pack.data_filenames()
            listoffiles = list(chain(other_files, missing_files))
            for f in listoffiles:
                if f.path.name == 'resolv.conf' and (
                        f.path.lies_under('/etc') or
                        f.path.lies_under('/run') or
                        f.path.lies_under('/var')):
                    continue
                path = PosixPath('/')
                for c in rpz_pack.remove_data_prefix(f.path).components:
                    path = path / c
                    if path in paths:
                        continue
                    paths.add(path)
                    if path in data_files:
                        pathlist.append(path)
                    else:
                        logger.info("Missing file %s", path)
            rpz_pack.close()
            # FIXME : for some reason we need reversed() here, I'm not sure why
            # Need to read more of tar's docs.
            # TAR bug: --no-overwrite-dir removes --keep-old-files
            with (target / 'rpz-files.list').open('wb') as lfp:
                for p in reversed(pathlist):
                    lfp.write(join_root(rpz_pack.data_prefix, p).path)
                    lfp.write(b'\0')
            fp.write('    cd / && '
                     '(tar zpxf /reprozip_data.tgz -U --recursive-unlink '
                     '--numeric-owner --strip=1 --null -T /rpz-files.list || '
                     '/busybox echo "TAR reports errors, this might or might '
                     'not prevent the execution to run")\n')

            # Setup entry point
            fp.write('COPY rpz_entrypoint.sh /rpz_entrypoint.sh\n'
                     'ENTRYPOINT ["/busybox", "sh", "/rpz_entrypoint.sh"]\n')

        # Write entry point script
        logger.info("Writing %s...", target / 'rpz_entrypoint.sh')
        with (target / 'rpz_entrypoint.sh').open('w', encoding='utf-8',
                                                 newline='\n') as fp:
            # The entrypoint gets some arguments from the run command
            # By default, it just does all the runs
            # "run N" executes the run with that number
            # "cmd STR" sets a replacement command-line for the next run
            # "do STR" executes a command as-is
            fp.write(
                '#!/bin/sh\n'
                '\n'
                'COMMAND=\n'
                'ENVVARS=\n'
                '\n'
                'if [ $# = 0 ]; then\n'
                '    exec /busybox sh /rpz_entrypoint.sh')
            for nb in irange(len(runs)):
                fp.write(' run %d' % nb)
            fp.write(
                '\n'
                'fi\n'
                '\n'
                'while [ $# != 0 ]; do\n'
                '    case "$1" in\n'
                '        help)\n'
                '            echo "Image built from reprounzip-docker" >&2\n'
                '            echo "Usage: docker run <image> [cmd "word [word '
                '...]"] [run <R>]" >&2\n'
                '            echo "    \\`cmd ...\\` changes the command for '
                'the next \\`run\\` option" >&2\n'
                '            echo "    \\`run <name|number>\\` runs the '
                'specified run" >&2\n'
                '            echo "By default, all the runs are executed." '
                '>&2\n'
                '            echo "The runs in this image are:" >&2\n')
            for run in runs:
                fp.write(
                    '            echo "    {name}: {cmdline}" >&2\n'.format(
                        name=run['id'],
                        cmdline=' '.join(shell_escape(a)
                                         for a in run['argv'])))
            fp.write(
                '            exit 0\n'
                '        ;;\n'
                '        do)\n'
                '            shift\n'
                '            $1\n'
                '        ;;\n'
                '        env)\n'
                '            shift\n'
                '            ENVVARS="$1"\n'
                '        ;;\n'
                '        cmd)\n'
                '            shift\n'
                '            COMMAND="$1"\n'
                '        ;;\n'
                '        run)\n'
                '            shift\n'
                '            case "$1" in\n')
            for i, run in enumerate(runs):
                cmdline = ' '.join([run['binary']] + run['argv'][1:])
                fp.write(
                    '                {name})\n'
                    '                    RUNCOMMAND={cmd}\n'
                    '                    RUNWD={wd}\n'
                    '                    RUNENV={env}\n'
                    '                    RUNUID={uid}\n'
                    '                    RUNGID={gid}\n'
                    '                ;;\n'.format(
                        name='%s|%d' % (run['id'], i),
                        cmd=shell_escape(cmdline),
                        wd=shell_escape(run['workingdir']),
                        env=shell_escape(' '.join(
                            '%s=%s' % (shell_escape(k), shell_escape(v))
                            for k, v in iteritems(run['environ']))),
                        uid=run.get('uid', 1000),
                        gid=run.get('gid', 1000)))
            fp.write(
                '                *)\n'
                '                    echo "RPZ: Unknown run $1" >&2\n'
                '                    exit 1\n'
                '                ;;\n'
                '            esac\n'
                '            if [ -n "$COMMAND" ]; then\n'
                '                RUNCOMMAND="$COMMAND"\n'
                '                COMMAND=\n'
                '            fi\n'
                '            export RUNWD; export RUNENV; export ENVVARS; '
                'export RUNCOMMAND\n'
                '            /rpzsudo "#$RUNUID" "#$RUNGID" /busybox sh -c '
                '"cd \\"\\$RUNWD\\" && /busybox env -i $RUNENV $ENVVARS '
                '$RUNCOMMAND; echo \\"*** Command finished, status: \\$?\\""\n'
                '            ENVVARS=\n'
                '        ;;\n'
                '        *)\n'
                '            echo "RPZ: Unknown option $1" >&2\n'
                '            exit 1\n'
                '        ;;\n'
                '    esac\n'
                '    shift\n'
                'done\n')

        # Meta-data for reprounzip
        write_dict(target, metadata_initial_iofiles(config))

        signals.post_setup(target=target, pack=pack)
    except Exception:
        target.rmtree(ignore_errors=True)
        raise
Exemplo n.º 23
0
def do_vistrails(target, pack=None, **kwargs):
    """Create a VisTrails workflow that runs the experiment.

    This is called from signals after an experiment has been setup by any
    unpacker.
    """
    record_usage(do_vistrails=True)

    config = load_config(target / 'config.yml', canonical=True)

    # Writes VisTrails workflow
    bundle = target / 'vistrails.vt'
    logging.info("Writing VisTrails workflow %s...", bundle)
    vtdir = Path.tempdir(prefix='reprounzip_vistrails_')
    ids = IdScope()
    try:
        with vtdir.open('w', 'vistrail',
                        encoding='utf-8', newline='\n') as fp:
            wf = Workflow(fp, ids)

            # Directory module, refering to this directory
            d = wf.add_module('%s:Directory' % rpz_id, rpz_version)
            wf.add_function(d, 'directory',
                            [(directory_sig, str(target.resolve()))])

            connect_from = d

            for i, run in enumerate(config.runs):
                inputs = sorted(n for n, f in iteritems(config.inputs_outputs)
                                if i in f.read_runs)
                outputs = sorted(n for n, f in iteritems(config.inputs_outputs)
                                 if i in f.write_runs)
                ports = itertools.chain((('input', p) for p in inputs),
                                        (('output', p) for p in outputs))

                # Run module
                r = wf.add_module('%s:Run' % rpz_id, rpz_version)
                wf.add_function(r, 'cmdline', [
                                (string_sig,
                                 ' '.join(shell_escape(arg)
                                          for arg in run['argv']))])
                wf.add_function(r, 'run_number', [(integer_sig, i)])

                # Port specs for input/output files
                for type_, name in ports:
                    wf.add_port_spec(r, name, type_, [file_pkg_mod])

                # Draw connection
                wf.connect(connect_from, experiment_sig, 'experiment',
                           r, experiment_sig, 'experiment')
                connect_from = r

            wf.close()

        with bundle.open('wb') as fp:
            z = zipfile.ZipFile(fp, 'w')
            with vtdir.in_dir():
                for path in Path('.').recursedir():
                    z.write(str(path))
            z.close()
    finally:
        vtdir.rmtree()
Exemplo n.º 24
0
def docker_run(args):
    """Runs the experiment in the container.
    """
    target = Path(args.target[0])
    unpacked_info = read_dict(target)
    cmdline = args.cmdline

    # Sanity check
    if args.detach and args.x11:
        logger.critical("Error: Can't use X11 forwarding if you're detaching")
        raise UsageError

    # Loads config
    config = load_config(target / 'config.yml', True)
    runs = config.runs

    selected_runs = get_runs(runs, args.run, cmdline)

    # Get current image name
    if 'current_image' in unpacked_info:
        image = unpacked_info['current_image']
        logger.debug("Running from image %s", image.decode('ascii'))
    else:
        logger.critical("Image doesn't exist yet, have you run setup/build?")
        sys.exit(1)

    # Name of new container
    if args.detach:
        container = make_unique_name(b'reprounzip_detached_')
    else:
        container = make_unique_name(b'reprounzip_run_')

    hostname = runs[selected_runs[0]].get('hostname', 'reprounzip')

    # Port forwarding
    port_options = []
    for port_host, port_container, proto in parse_ports(args.expose_port):
        port_options.extend(['-p',
                             '%s:%s/%s' % (port_host, port_container, proto)])

    # X11 handler
    if args.x11:
        local_ip = get_local_addr()

        docker_host = local_ip
        if os.environ.get('DOCKER_HOST'):
            m = _dockerhost_re.match(os.environ['DOCKER_HOST'])
            if m is not None:
                docker_host = m.group(1)

        if args.tunneled_x11:
            x11 = X11Handler(True, ('internet', docker_host), args.x11_display)
        else:
            x11 = X11Handler(True, ('internet', local_ip), args.x11_display)

            if (docker_host != local_ip and docker_host != 'localhost' and
                    not docker_host.startswith('127.') and
                    not docker_host.startswith('192.168.99.')):
                ssh_cmdline = ' '.join(
                    '-R*:%(p)d:127.0.0.1:%(p)d' % {'p': port}
                    for port, connector in x11.port_forward)
                logger.warning(
                    "You requested X11 forwarding but the Docker container "
                    "appears to be running remotely. It is probable that it "
                    "won't be able to connect to the local display. Creating "
                    "a remote SSH tunnel and running with --tunneled-x11 "
                    "might help (%s).",
                    ssh_cmdline)
    else:
        x11 = X11Handler(False, ('local', hostname), args.x11_display)

    cmd = []
    for run_number in selected_runs:
        run = runs[run_number]
        env_set, env_unset = x11.env_fixes(run['environ'])
        a_env_set, a_env_unset = parse_environment_args(args)
        env_set.update(a_env_set)
        env_unset.extend(a_env_unset)
        if env_set or env_unset:
            cmd.append('env')
            env = []
            for k in env_unset:
                env.append('-u')
                env.append(shell_escape(k))
            for k, v in iteritems(env_set):
                env.append('%s=%s' % (shell_escape(k), shell_escape(v)))
            cmd.append(' '.join(env))
        # FIXME : Use exec -a or something if binary != argv[0]
        if cmdline is not None:
            cmd.append('cmd')
            cmd.append(' '.join(shell_escape(a) for a in cmdline))
        cmd.append('run')
        cmd.append('%d' % run_number)
    cmd = list(chain.from_iterable([['do', shell_escape(c)]
                                    for c in x11.init_cmds] +
                                   [cmd]))
    if logger.isEnabledFor(logging.DEBUG):
        logger.debug("Passing arguments to Docker image:")
        for c in cmd:
            logger.debug(c)

    signals.pre_run(target=target)

    # Creates forwarders
    forwarders = []
    for port, connector in x11.port_forward:
        forwarders.append(LocalForwarder(connector, port))

    if args.detach:
        logger.info("Start container %s (detached)",
                    container.decode('ascii'))
        retcode = interruptible_call(args.docker_cmd.split() +
                                     ['run', b'--name=' + container,
                                      '-h', hostname,
                                      '-d', '-t'] +
                                     port_options +
                                     args.docker_option +
                                     [image] + cmd)
        if retcode != 0:
            logger.critical("docker run failed with code %d", retcode)
            subprocess.call(['docker', 'rm', '-f', container])
            sys.exit(1)
        return

    # Run command in container
    logger.info("Starting container %s", container.decode('ascii'))
    retcode = interruptible_call(args.docker_cmd.split() +
                                 ['run', b'--name=' + container,
                                  '-h', hostname,
                                  '-i', '-t'] +
                                 port_options +
                                 args.docker_option +
                                 [image] + cmd,
                                 request_tty=True)

    # The image prints out the exit status(es) itself
    if retcode != 0:
        logger.critical("docker run failed with code %d", retcode)
        subprocess.call(['docker', 'rm', '-f', container])
        sys.exit(1)

    # Commit to create new image
    new_image = make_unique_name(b'reprounzip_image_')
    logger.info("Committing container %s to image %s",
                container.decode('ascii'), new_image.decode('ascii'))
    subprocess.check_call(args.docker_cmd.split() +
                          ['commit', container, new_image])

    # Update image name
    unpacked_info['current_image'] = new_image
    write_dict(target, unpacked_info)

    # Remove the container
    logger.info("Destroying container %s", container.decode('ascii'))
    retcode = subprocess.call(args.docker_cmd.split() + ['rm', container])
    if retcode != 0:
        logger.error("Error deleting container %s", container.decode('ascii'))

    # Untag previous image, unless it is the initial_image
    if image != unpacked_info['initial_image']:
        logger.info("Untagging previous image %s", image.decode('ascii'))
        subprocess.check_call(args.docker_cmd.split() + ['rmi', image])

    # Update input file status
    metadata_update_run(config, unpacked_info, selected_runs)
    write_dict(target, unpacked_info)

    signals.post_run(target=target, retcode=retcode)
Exemplo n.º 25
0
def vagrant_setup_create(args):
    """Sets up the experiment to be run in a Vagrant-built virtual machine.

    This can either build a chroot or not.

    If building a chroot, we do just like without Vagrant: we copy all the
    files and only get what's missing from the host. But we do install
    automatically the packages whose files are required.

    If not building a chroot, we install all the packages, and only unpack
    files that don't come from packages.

    In short: files from packages with packfiles=True will only be used if
    building a chroot.
    """
    if not args.pack:
        logging.critical("setup/create needs the pack filename")
        sys.exit(1)

    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)
    use_chroot = args.use_chroot
    mount_bind = args.bind_magic_dirs
    record_usage(use_chroot=use_chroot,
                 mount_bind=mount_bind)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    rpz_pack = RPZPack(pack)
    rpz_pack.extract_config(target / 'config.yml')

    # Loads config
    runs, packages, other_files = config = load_config(target / 'config.yml',
                                                       True)

    if not args.memory:
        memory = None
    else:
        try:
            memory = int(args.memory[-1])
        except ValueError:
            logging.critical("Invalid value for memory size: %r", args.memory)
            sys.exit(1)

    if args.base_image and args.base_image[0]:
        record_usage(vagrant_explicit_image=True)
        box = args.base_image[0]
        if args.distribution:
            target_distribution = args.distribution[0]
        else:
            target_distribution = None
    else:
        target_distribution, box = select_box(runs)
    logging.info("Using box %s", box)
    logging.debug("Distribution: %s", target_distribution or "unknown")

    # If using chroot, we might still need to install packages to get missing
    # (not packed) files
    if use_chroot:
        packages = [pkg for pkg in packages if not pkg.packfiles]
        if packages:
            record_usage(vagrant_install_pkgs=True)
            logging.info("Some packages were not packed, so we'll install and "
                         "copy their files\n"
                         "Packages that are missing:\n%s",
                         ' '.join(pkg.name for pkg in packages))

    if packages:
        try:
            installer = select_installer(pack, runs, target_distribution)
        except CantFindInstaller as e:
            logging.error("Need to install %d packages but couldn't select a "
                          "package installer: %s",
                          len(packages), e)

    target.mkdir(parents=True)

    try:
        # Writes setup script
        logging.info("Writing setup script %s...", target / 'setup.sh')
        with (target / 'setup.sh').open('w', encoding='utf-8',
                                        newline='\n') as fp:
            fp.write('#!/bin/sh\n\nset -e\n\n')
            if packages:
                # Updates package sources
                fp.write(installer.update_script())
                fp.write('\n')
                # Installs necessary packages
                fp.write(installer.install_script(packages))
                fp.write('\n')
                # TODO : Compare package versions (painful because of sh)

            # Untar
            if use_chroot:
                fp.write('\n'
                         'mkdir /experimentroot; cd /experimentroot\n')
                fp.write('tar zpxf /vagrant/data.tgz --numeric-owner '
                         '--strip=1 %s\n' % rpz_pack.data_prefix)
                if mount_bind:
                    fp.write('\n'
                             'mkdir -p /experimentroot/dev\n'
                             'mkdir -p /experimentroot/proc\n')

                for pkg in packages:
                    fp.write('\n# Copies files from package %s\n' % pkg.name)
                    for f in pkg.files:
                        f = f.path
                        dest = join_root(PosixPath('/experimentroot'), f)
                        fp.write('mkdir -p %s\n' %
                                 shell_escape(unicode_(f.parent)))
                        fp.write('cp -L %s %s\n' % (
                                 shell_escape(unicode_(f)),
                                 shell_escape(unicode_(dest))))
            else:
                fp.write('\ncd /\n')
                paths = set()
                pathlist = []
                # Adds intermediate directories, and checks for existence in
                # the tar
                for f in other_files:
                    path = PosixPath('/')
                    for c in rpz_pack.remove_data_prefix(f.path).components:
                        path = path / c
                        if path in paths:
                            continue
                        paths.add(path)
                        try:
                            rpz_pack.get_data(path)
                        except KeyError:
                            logging.info("Missing file %s", path)
                        else:
                            pathlist.append(path)
                # FIXME : for some reason we need reversed() here, I'm not sure
                # why. Need to read more of tar's docs.
                # TAR bug: --no-overwrite-dir removes --keep-old-files
                # TAR bug: there is no way to make --keep-old-files not report
                # an error if an existing file is encountered. --skip-old-files
                # was introduced too recently. Instead, we just ignore the exit
                # status
                with (target / 'rpz-files.list').open('wb') as lfp:
                    for p in reversed(pathlist):
                        lfp.write(join_root(rpz_pack.data_prefix, p).path)
                        lfp.write(b'\0')
                fp.write('tar zpxf /vagrant/data.tgz --keep-old-files '
                         '--numeric-owner --strip=1 '
                         '--null -T /vagrant/rpz-files.list || /bin/true\n')

            # Copies busybox
            if use_chroot:
                arch = runs[0]['architecture']
                download_file(busybox_url(arch),
                              target / 'busybox',
                              'busybox-%s' % arch)
                fp.write(r'''
cp /vagrant/busybox /experimentroot/busybox
chmod +x /experimentroot/busybox
mkdir -p /experimentroot/bin
[ -e /experimentroot/bin/sh ] || \
    ln -s /busybox /experimentroot/bin/sh
''')

        # Copies pack
        logging.info("Copying pack file...")
        rpz_pack.copy_data_tar(target / 'data.tgz')

        rpz_pack.close()

        # Writes Vagrant file
        logging.info("Writing %s...", target / 'Vagrantfile')
        with (target / 'Vagrantfile').open('w', encoding='utf-8',
                                           newline='\n') as fp:
            # Vagrant header and version
            fp.write(
                '# -*- mode: ruby -*-\n'
                '# vi: set ft=ruby\n\n'
                'VAGRANTFILE_API_VERSION = "2"\n\n'
                'Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|\n')
            # Selects which box to install
            fp.write('  config.vm.box = "%s"\n' % box)
            # Run the setup script on the virtual machine
            fp.write('  config.vm.provision "shell", path: "setup.sh"\n')

            # Memory size
            if memory is not None:
                fp.write('  config.vm.provider "virtualbox" do |v|\n'
                         '    v.memory = %d\n'
                         '  end\n' % memory)

            fp.write('end\n')

        # Meta-data for reprounzip
        write_dict(target,
                   metadata_initial_iofiles(config,
                                            {'use_chroot': use_chroot}))

        signals.post_setup(target=target, pack=pack)
    except Exception:
        target.rmtree(ignore_errors=True)
        raise
Exemplo n.º 26
0
def generate(target, directory, all_forks=False):
    """Main function for the graph subcommand.
    """
    # In here, a file is any file on the filesystem. A binary is a file, that
    # gets executed. A process is a system-level task, identified by its pid
    # (pids don't get reused in the database).
    # What I call program is the couple (process, binary), so forking creates a
    # new program (with the same binary) and exec'ing creates a new program as
    # well (with the same process)
    # Because of this, fork+exec will create an intermediate program that
    # doesn't do anything (new process but still old binary). If that program
    # doesn't do anything worth showing on the graph, it will be erased, unless
    # all_forks is True (--all-forks).

    database = directory / 'trace.sqlite3'

    # Reads package ownership from the configuration
    configfile = directory / 'config.yml'
    if not configfile.is_file():
        logging.critical("Configuration file does not exist!\n"
                         "Did you forget to run 'reprozip trace'?\n"
                         "If not, you might want to use --dir to specify an "
                         "alternate location.")
        sys.exit(1)
    runs, packages, other_files, patterns = load_config(configfile,
                                                        canonical=False)
    packages = dict((f.path, pkg) for pkg in packages for f in pkg.files)

    if PY3:
        # On PY3, connect() only accepts unicode
        conn = sqlite3.connect(str(database))
    else:
        conn = sqlite3.connect(database.path)

    # This is a bit weird. We need to iterate on all types of events at the
    # same time, ordering by timestamp, so we decorate-sort-undecorate
    # Decoration adds timestamp (for sorting) and tags by event type, one of
    # 'process', 'open' or 'exec'

    # Reads processes from the database
    process_cursor = conn.cursor()
    process_rows = process_cursor.execute(
            '''
            SELECT id, parent, timestamp
            FROM processes
            ORDER BY id
            ''')
    processes = {}
    all_programs = []

    # ... and opened files...
    file_cursor = conn.cursor()
    file_rows = file_cursor.execute(
            '''
            SELECT name, timestamp, mode, process
            FROM opened_files
            ORDER BY id
            ''')
    binaries = set()
    files = OrderedSet()
    edges = OrderedSet()

    # ... as well as executed files.
    exec_cursor = conn.cursor()
    exec_rows = exec_cursor.execute(
            '''
            SELECT name, timestamp, process, argv
            FROM executed_files
            ORDER BY id
            ''')

    # Loop on all event lists
    logging.info("Getting all events from database...")
    rows = heapq.merge(((r[2], 'process', r) for r in process_rows),
                       ((r[1], 'open', r) for r in file_rows),
                       ((r[1], 'exec', r) for r in exec_rows))
    for ts, event_type, data in rows:
        if event_type == 'process':
            r_id, r_parent, r_timestamp = data
            if r_parent is not None:
                parent = processes[r_parent]
                binary = parent.binary
            else:
                parent = None
                binary = None
            p = Process(r_id,
                        parent,
                        r_timestamp,
                        False,
                        binary,
                        C_INITIAL if r_parent is None else C_FORK)
            processes[r_id] = p
            all_programs.append(p)

        elif event_type == 'open':
            r_name, r_timestamp, r_mode, r_process = data
            r_name = PosixPath(r_name)
            if r_mode != FILE_WDIR:
                process = processes[r_process]
                files.add(r_name)
                edges.add((process, r_name, r_mode, None))

        elif event_type == 'exec':
            r_name, r_timestamp, r_process, r_argv = data
            r_name = PosixPath(r_name)
            process = processes[r_process]
            binaries.add(r_name)
            # Here we split this process in two "programs", unless the previous
            # one hasn't done anything since it was created via fork()
            if not all_forks and not process.acted:
                process.binary = r_name
                process.created = C_FORKEXEC
                process.acted = True
            else:
                process = Process(process.pid,
                                  process,
                                  r_timestamp,
                                  True,         # Hides exec only once
                                  r_name,
                                  C_EXEC)
                all_programs.append(process)
                processes[r_process] = process
            argv = tuple(r_argv.split('\0'))
            if not argv[-1]:
                argv = argv[:-1]
            edges.add((process, r_name, None, argv))

    process_cursor.close()
    file_cursor.close()
    conn.close()

    # Puts files in packages
    logging.info("Organizes packages...")
    package_files = {}
    other_files = []
    for f in files:
        pkg = packages.get(f)
        if pkg is not None:
            package_files.setdefault((pkg.name, pkg.version), []).append(f)
        else:
            other_files.append(f)

    # Writes DOT file
    with target.open('w', encoding='utf-8', newline='\n') as fp:
        fp.write('digraph G {\n    /* programs */\n    node [shape=box];\n')
        # Programs
        logging.info("Writing programs...")
        for program in all_programs:
            fp.write('    prog%d [label="%s (%d)"];\n' % (
                     id(program), program.binary or "-", program.pid))
            if program.parent is not None:
                reason = ''
                if program.created == C_FORK:
                    reason = "fork"
                elif program.created == C_EXEC:
                    reason = "exec"
                elif program.created == C_FORKEXEC:
                    reason = "fork+exec"
                fp.write('    prog%d -> prog%d [label="%s"];\n' % (
                         id(program.parent), id(program), reason))

        fp.write('\n    node [shape=ellipse];\n\n    /* system packages */\n')

        # Files from packages
        logging.info("Writing packages...")
        for i, ((name, version), files) in enumerate(iteritems(package_files)):
            fp.write('    subgraph cluster%d {\n        label=' % i)
            if version:
                fp.write('"%s %s";\n' % (escape(name), escape(version)))
            else:
                fp.write('"%s";\n' % escape(name))
            for f in files:
                fp.write('        "%s";\n' % escape(unicode_(f)))
            fp.write('    }\n')

        fp.write('\n    /* other files */\n')

        # Other files
        logging.info("Writing other files...")
        for f in other_files:
            fp.write('    "%s"\n' % escape(unicode_(f)))

        fp.write('\n')

        # Edges
        logging.info("Connecting edges...")
        for prog, f, mode, argv in edges:
            if mode is None:
                fp.write('    "%s" -> prog%d [color=blue, label="%s"];\n' % (
                         escape(unicode_(f)),
                         id(prog),
                         escape(' '.join(argv))))
            elif mode & FILE_WRITE:
                fp.write('    prog%d -> "%s" [color=red];\n' % (
                         id(prog), escape(unicode_(f))))
            elif mode & FILE_READ:
                fp.write('    "%s" -> prog%d [color=green];\n' % (
                         escape(unicode_(f)), id(prog)))

        fp.write('}\n')
Exemplo n.º 27
0
def docker_setup_create(args):
    """Sets up the experiment to be run in a Docker-built container.
    """
    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    target.mkdir()

    try:
        # Unpacks configuration file
        rpz_pack = RPZPack(pack)
        rpz_pack.extract_config(target / 'config.yml')

        # Loads config
        runs, packages, other_files = config = load_config(
            target / 'config.yml', True)

        if args.base_image:
            record_usage(docker_explicit_base=True)
            base_image = args.base_image[0]
            if args.distribution:
                target_distribution = args.distribution[0]
            else:
                target_distribution = None
        else:
            target_distribution, base_image = select_image(runs)
        logging.info("Using base image %s", base_image)
        logging.debug("Distribution: %s", target_distribution or "unknown")

        rpz_pack.copy_data_tar(target / 'data.tgz')

        arch = runs[0]['architecture']

        # Writes Dockerfile
        logging.info("Writing %s...", target / 'Dockerfile')
        with (target / 'Dockerfile').open('w', encoding='utf-8',
                                          newline='\n') as fp:
            fp.write('FROM %s\n\n' % base_image)

            # Installs busybox
            download_file(busybox_url(arch), target / 'busybox',
                          'busybox-%s' % arch)
            fp.write('COPY busybox /busybox\n')

            # Installs rpzsudo
            download_file(sudo_url(arch), target / 'rpzsudo',
                          'rpzsudo-%s' % arch)
            fp.write('COPY rpzsudo /rpzsudo\n\n')

            fp.write('COPY data.tgz /reprozip_data.tgz\n\n')
            fp.write('COPY rpz-files.list /rpz-files.list\n')
            fp.write('RUN \\\n' '    chmod +x /busybox /rpzsudo && \\\n')

            if args.install_pkgs:
                # Install every package through package manager
                missing_packages = []
            else:
                # Only install packages that were not packed
                missing_packages = [pkg for pkg in packages if pkg.packfiles]
                packages = [pkg for pkg in packages if not pkg.packfiles]
            if packages:
                record_usage(docker_install_pkgs=True)
                try:
                    installer = select_installer(pack, runs,
                                                 target_distribution)
                except CantFindInstaller as e:
                    logging.error(
                        "Need to install %d packages but couldn't "
                        "select a package installer: %s", len(packages), e)
                    sys.exit(1)
                # Updates package sources
                update_script = installer.update_script()
                if update_script:
                    fp.write('    %s && \\\n' % update_script)
                # Installs necessary packages
                fp.write('    %s && \\\n' % installer.install_script(packages))
                logging.info(
                    "Dockerfile will install the %d software "
                    "packages that were not packed", len(packages))
            else:
                record_usage(docker_install_pkgs=False)

            # Untar
            paths = set()
            pathlist = []
            # Add intermediate directories, and check for existence in the tar
            missing_files = chain.from_iterable(pkg.files
                                                for pkg in missing_packages)
            data_files = rpz_pack.data_filenames()
            listoffiles = list(chain(other_files, missing_files))
            for f in listoffiles:
                path = PosixPath('/')
                for c in rpz_pack.remove_data_prefix(f.path).components:
                    path = path / c
                    if path in paths:
                        continue
                    paths.add(path)
                    if path in data_files:
                        pathlist.append(path)
                    else:
                        logging.info("Missing file %s", path)
            rpz_pack.close()
            # FIXME : for some reason we need reversed() here, I'm not sure why
            # Need to read more of tar's docs.
            # TAR bug: --no-overwrite-dir removes --keep-old-files
            with (target / 'rpz-files.list').open('wb') as lfp:
                for p in reversed(pathlist):
                    lfp.write(join_root(rpz_pack.data_prefix, p).path)
                    lfp.write(b'\0')
            fp.write('    cd / && '
                     '(tar zpxf /reprozip_data.tgz -U --recursive-unlink '
                     '--numeric-owner --strip=1 --null -T /rpz-files.list || '
                     '/busybox echo "TAR reports errors, this might or might '
                     'not prevent the execution to run")\n')

        # Meta-data for reprounzip
        write_dict(target, metadata_initial_iofiles(config))

        signals.post_setup(target=target, pack=pack)
    except Exception:
        target.rmtree(ignore_errors=True)
        raise
Exemplo n.º 28
0
def docker_setup_create(args):
    """Sets up the experiment to be run in a Docker-built container.
    """
    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    rpz_pack = RPZPack(pack)
    rpz_pack.extract_config(target / 'config.yml')

    # Loads config
    runs, packages, other_files = load_config(target / 'config.yml', True)

    if args.base_image:
        record_usage(docker_explicit_base=True)
        base_image = args.base_image[0]
        if args.distribution:
            target_distribution = args.distribution[0]
        else:
            target_distribution = None
    else:
        target_distribution, base_image = select_image(runs)
    logging.info("Using base image %s", base_image)
    logging.debug("Distribution: %s", target_distribution or "unknown")

    target.mkdir(parents=True)
    rpz_pack.copy_data_tar(target / 'data.tgz')

    arch = runs[0]['architecture']

    # Writes Dockerfile
    logging.info("Writing %s...", target / 'Dockerfile')
    with (target / 'Dockerfile').open('w',
                                      encoding='utf-8', newline='\n') as fp:
        fp.write('FROM %s\n\n' % base_image)

        # Installs busybox
        download_file(busybox_url(arch),
                      target / 'busybox',
                      'busybox-%s' % arch)
        fp.write('COPY busybox /busybox\n')

        # Installs rpzsudo
        download_file(sudo_url(arch),
                      target / 'rpzsudo',
                      'rpzsudo-%s' % arch)
        fp.write('COPY rpzsudo /rpzsudo\n\n')

        fp.write('COPY data.tgz /reprozip_data.tgz\n\n')
        fp.write('COPY rpz-files.list /rpz-files.list\n')
        fp.write('RUN \\\n'
                 '    chmod +x /busybox /rpzsudo && \\\n')

        if args.install_pkgs:
            # Install every package through package manager
            missing_packages = []
        else:
            # Only install packages that were not packed
            missing_packages = [pkg for pkg in packages if pkg.packfiles]
            packages = [pkg for pkg in packages if not pkg.packfiles]
        if packages:
            record_usage(docker_install_pkgs=True)
            try:
                installer = select_installer(pack, runs, target_distribution)
            except CantFindInstaller as e:
                logging.error("Need to install %d packages but couldn't "
                              "select a package installer: %s",
                              len(packages), e)
                sys.exit(1)
            # Updates package sources
            fp.write('    %s && \\\n' % installer.update_script())
            # Installs necessary packages
            fp.write('    %s && \\\n' % installer.install_script(packages))
            logging.info("Dockerfile will install the %d software packages "
                         "that were not packed", len(packages))
        else:
            record_usage(docker_install_pkgs=False)

        # Untar
        paths = set()
        pathlist = []
        # Adds intermediate directories, and checks for existence in the tar
        missing_files = chain.from_iterable(pkg.files
                                            for pkg in missing_packages)
        for f in chain(other_files, missing_files):
            path = PosixPath('/')
            for c in rpz_pack.remove_data_prefix(f.path).components:
                path = path / c
                if path in paths:
                    continue
                paths.add(path)
                try:
                    rpz_pack.get_data(path)
                except KeyError:
                    logging.info("Missing file %s", path)
                else:
                    pathlist.append(path)
        rpz_pack.close()
        # FIXME : for some reason we need reversed() here, I'm not sure why.
        # Need to read more of tar's docs.
        # TAR bug: --no-overwrite-dir removes --keep-old-files
        with (target / 'rpz-files.list').open('wb') as lfp:
            for p in reversed(pathlist):
                lfp.write(join_root(rpz_pack.data_prefix, p).path)
                lfp.write(b'\0')
        fp.write('    cd / && '
                 '(tar zpxf /reprozip_data.tgz -U --recursive-unlink '
                 '--numeric-owner --strip=1 --null -T /rpz-files.list || '
                 '/busybox echo "TAR reports errors, this might or might '
                 'not prevent the execution to run")\n')

    # Meta-data for reprounzip
    write_dict(target / '.reprounzip', {})

    signals.post_setup(target=target, pack=pack)
Exemplo n.º 29
0
def generate(target,
             configfile,
             database,
             all_forks=False,
             graph_format='dot',
             level_pkgs='file',
             level_processes='thread',
             level_other_files='all',
             regex_filters=None,
             regex_replaces=None,
             aggregates=None):
    """Main function for the graph subcommand.
    """
    try:
        graph_format = {
            'dot': FORMAT_DOT,
            'DOT': FORMAT_DOT,
            'json': FORMAT_JSON,
            'JSON': FORMAT_JSON
        }[graph_format]
    except KeyError:
        logging.critical("Unknown output format %r", graph_format)
        sys.exit(1)

    level_pkgs, level_processes, level_other_files, file_depth = \
        parse_levels(level_pkgs, level_processes, level_other_files)

    # Reads package ownership from the configuration
    if not configfile.is_file():
        logging.critical("Configuration file does not exist!\n"
                         "Did you forget to run 'reprozip trace'?\n"
                         "If not, you might want to use --dir to specify an "
                         "alternate location.")
        sys.exit(1)
    config = load_config(configfile, canonical=False)
    inputs_outputs = dict(
        (f.path, n) for n, f in iteritems(config.inputs_outputs))
    has_thread_flag = config.format_version >= LooseVersion('0.7')

    runs, files, edges = read_events(database, all_forks, has_thread_flag)

    # Label the runs
    if len(runs) != len(config.runs):
        logging.warning("Configuration file doesn't list the same number of "
                        "runs we found in the database!")
    else:
        for config_run, run in izip(config.runs, runs):
            run.name = config_run['id']

    # Apply regexes
    ignore = [
        lambda path, r=re.compile(p): r.search(path) is not None
        for p in regex_filters or []
    ]
    replace = [
        lambda path, r=re.compile(p): r.sub(repl, path)
        for p, repl in regex_replaces or []
    ]

    def filefilter(path):
        pathuni = unicode_(path)
        if any(f(pathuni) for f in ignore):
            logging.debug("IGN %s", pathuni)
            return None
        if not (replace or aggregates):
            return path
        for fi in replace:
            pathuni_ = fi(pathuni)
            if pathuni_ != pathuni:
                logging.debug("SUB %s -> %s", pathuni, pathuni_)
            pathuni = pathuni_
        for prefix in aggregates or []:
            if pathuni.startswith(prefix):
                logging.debug("AGG %s -> %s", pathuni, prefix)
                pathuni = prefix
                break
        return PosixPath(pathuni)

    files_new = set()
    for fi in files:
        fi = filefilter(fi)
        if fi is not None:
            files_new.add(fi)
    files = files_new

    edges_new = OrderedSet()
    for prog, fi, mode, argv in edges:
        fi = filefilter(fi)
        if fi is not None:
            edges_new.add((prog, fi, mode, argv))
    edges = edges_new

    # Puts files in packages
    package_map = {}
    if level_pkgs == LVL_PKG_IGNORE:
        packages = []
        other_files = files
    else:
        logging.info("Organizes packages...")
        file2package = dict(
            (f.path, pkg) for pkg in config.packages for f in pkg.files)
        packages = {}
        other_files = []
        for fi in files:
            pkg = file2package.get(fi)
            if pkg is not None:
                package = packages.get(pkg.name)
                if package is None:
                    package = Package(pkg.name, pkg.version)
                    packages[pkg.name] = package
                package.files.add(fi)
                package_map[fi] = package
            else:
                other_files.append(fi)
        packages = sorted(itervalues(packages), key=lambda pkg: pkg.name)
        for i, pkg in enumerate(packages):
            pkg.id = i

    # Filter other files
    if level_other_files == LVL_OTHER_ALL and file_depth is not None:
        other_files = set(
            PosixPath(*f.components[:file_depth + 1]) for f in other_files)
        edges = OrderedSet((prog, f if f in package_map else PosixPath(
            *f.components[:file_depth + 1]), mode, argv)
                           for prog, f, mode, argv in edges)
    else:
        if level_other_files == LVL_OTHER_IO:
            other_files = set(f for f in other_files if f in inputs_outputs)
            edges = [(prog, f, mode, argv) for prog, f, mode, argv in edges
                     if f in package_map or f in other_files]
        elif level_other_files == LVL_OTHER_NO:
            other_files = set()
            edges = [(prog, f, mode, argv) for prog, f, mode, argv in edges
                     if f in package_map]

    args = (target, runs, packages, other_files, package_map, edges,
            inputs_outputs, level_pkgs, level_processes, level_other_files)
    if graph_format == FORMAT_DOT:
        graph_dot(*args)
    elif graph_format == FORMAT_JSON:
        graph_json(*args)
    else:
        assert False
Exemplo n.º 30
0
def docker_setup_create(args):
    """Sets up the experiment to be run in a Docker-built container.
    """
    pack = Path(args.pack[0])
    target = Path(args.target[0])
    if target.exists():
        logging.critical("Target directory exists")
        sys.exit(1)

    signals.pre_setup(target=target, pack=pack)

    # Unpacks configuration file
    tar = tarfile.open(str(pack), 'r:*')
    member = tar.getmember('METADATA/config.yml')
    member.name = 'config.yml'
    tar.extract(member, str(target))
    tar.close()

    # Loads config
    runs, packages, other_files = load_config(target / 'config.yml', True)

    if args.base_image:
        record_usage(docker_explicit_base=True)
        base_image = args.base_image[0]
        if args.distribution:
            target_distribution = args.distribution[0]
        else:
            target_distribution = None
    else:
        target_distribution, base_image = select_image(runs)
    logging.info("Using base image %s", base_image)
    logging.debug("Distribution: %s", target_distribution or "unknown")

    target.mkdir(parents=True)
    pack.copyfile(target / 'experiment.rpz')

    # Writes Dockerfile
    logging.info("Writing %s...", target / 'Dockerfile')
    with (target / 'Dockerfile').open('w',
                                      encoding='utf-8', newline='\n') as fp:
        fp.write('FROM %s\n\n' % base_image)

        # Installs busybox
        download_file(busybox_url(runs[0]['architecture']),
                      target / 'busybox')
        fp.write('COPY busybox /bin/busybox\n')

        fp.write('COPY experiment.rpz /reprozip_experiment.rpz\n\n')
        fp.write('RUN \\\n'
                 '    chmod +x /bin/busybox && \\\n')

        if args.install_pkgs:
            # Install every package through package manager
            missing_packages = []
        else:
            # Only install packages that were not packed
            missing_packages = [pkg for pkg in packages if pkg.packfiles]
            packages = [pkg for pkg in packages if not pkg.packfiles]
        # FIXME : Right now, we need 'sudo' to be available (and it's not
        # necessarily in the base image)
        if packages:
            record_usage(docker_install_pkgs=True)
        else:
            record_usage(docker_install_pkgs="sudo")
        packages += [Package('sudo', None, packfiles=False)]
        if packages:
            try:
                installer = select_installer(pack, runs, target_distribution)
            except CantFindInstaller as e:
                logging.error("Need to install %d packages but couldn't "
                              "select a package installer: %s",
                              len(packages), e)
                sys.exit(1)
            # Updates package sources
            fp.write('    %s && \\\n' % installer.update_script())
            # Installs necessary packages
            fp.write('    %s && \\\n' % installer.install_script(packages))
        logging.info("Dockerfile will install the %d software packages that "
                     "were not packed", len(packages))

        # Untar
        paths = set()
        pathlist = []
        dataroot = PosixPath('DATA')
        # Adds intermediate directories, and checks for existence in the tar
        tar = tarfile.open(str(pack), 'r:*')
        missing_files = chain.from_iterable(pkg.files
                                            for pkg in missing_packages)
        for f in chain(other_files, missing_files):
            path = PosixPath('/')
            for c in f.path.components[1:]:
                path = path / c
                if path in paths:
                    continue
                paths.add(path)
                datapath = join_root(dataroot, path)
                try:
                    tar.getmember(str(datapath))
                except KeyError:
                    logging.info("Missing file %s", datapath)
                else:
                    pathlist.append(unicode_(datapath))
        tar.close()
        # FIXME : for some reason we need reversed() here, I'm not sure why.
        # Need to read more of tar's docs.
        # TAR bug: --no-overwrite-dir removes --keep-old-files
        fp.write('    cd / && tar zpxf /reprozip_experiment.rpz '
                 '--numeric-owner --strip=1 %s\n' %
                 ' '.join(shell_escape(p) for p in reversed(pathlist)))

    # Meta-data for reprounzip
    write_dict(target / '.reprounzip', {})

    signals.post_setup(target=target)
Exemplo n.º 31
0
def generate(target, configfile, database):
    """Go over the trace and generate the graph file.
    """
    # Reads package ownership from the configuration
    if not configfile.is_file():
        logger.critical("Configuration file does not exist!\n"
                        "Did you forget to run 'reprozip trace'?\n"
                        "If not, you might want to use --dir to specify an "
                        "alternate location.")
        sys.exit(1)

    config = load_config(configfile, canonical=False)

    has_thread_flag = config.format_version >= LooseVersion('0.7')

    assert database.is_file()
    conn = sqlite3.connect(str(database))  # connect() only accepts str
    conn.row_factory = sqlite3.Row

    vertices = []
    edges = []

    # Create user entity, that initiates the runs
    vertices.append({'ID': 'user',
                     'type': 'Agent',
                     'subtype': 'User',
                     'label': 'User'})

    run = -1

    # Read processes
    cur = conn.cursor()
    rows = cur.execute(
        '''
        SELECT id, parent, timestamp, is_thread, exitcode
        FROM processes;
        ''' if has_thread_flag else '''
        SELECT id, parent, timestamp, 0 as is_thread, exitcode
        FROM processes;
        ''')
    for r_id, r_parent, r_timestamp, r_isthread, r_exitcode in rows:
        if r_parent is None:
            # Create run entity
            run += 1
            vertices.append({'ID': 'run%d' % run,
                             'type': 'Activity',
                             'subtype': 'Run',
                             'label': "Run #%d" % run,
                             'date': r_timestamp})
            # User -> run
            edges.append({'ID': 'user_run%d' % run,
                          'type': 'UserRuns',
                          'label': "User runs command",
                          'sourceID': 'user',
                          'targetID': 'run%d' % run})
            # Run -> process
            edges.append({'ID': 'run_start%d' % run,
                          'type': 'RunStarts',
                          'label': "Run #%d command",
                          'sourceID': 'run%d' % run,
                          'targetID': 'process%d' % r_id})

        # Create process entity
        vertices.append({'ID': 'process%d' % r_id,
                         'type': 'Agent',
                         'subtype': 'Thread' if r_isthread else 'Process',
                         'label': 'Process #%d' % r_id,
                         'date': r_timestamp})
        # TODO: add process end time (use master branch?)

        # Add process creation activity
        if r_parent is not None:
            # Process creation activity
            vertex = {'ID': 'fork%d' % r_id,
                      'type': 'Activity',
                      'subtype': 'Fork',
                      'label': "#%d creates %s #%d" % (
                          r_parent,
                          "thread" if r_isthread else "process",
                          r_id),
                      'date': r_timestamp}
            if has_thread_flag:
                vertex['thread'] = 'true' if r_isthread else 'false'
            vertices.append(vertex)

            # Parent -> creation
            edges.append({'ID': 'fork_p_%d' % r_id,
                          'type': 'PerformsFork',
                          'label': "Performs fork",
                          'sourceID': 'process%d' % r_parent,
                          'targetID': 'fork%d' % r_id})
            # Creation -> child
            edges.append({'ID': 'fork_c_%d' % r_id,
                          'type': 'ForkCreates',
                          'label': "Fork creates",
                          'sourceID': 'fork%d' % r_id,
                          'targetID': 'process%d' % r_id})
    cur.close()

    file2package = dict((f.path.path, pkg)
                        for pkg in config.packages
                        for f in pkg.files)
    inputs_outputs = dict((f.path.path, (bool(f.write_runs),
                                         bool(f.read_runs)))
                          for n, f in config.inputs_outputs.items())

    # Read opened files
    cur = conn.cursor()
    rows = cur.execute(
        '''
        SELECT name, is_directory
        FROM opened_files
        GROUP BY name;
        ''')
    for r_name, r_directory in rows:
        # Create file entity
        vertex = {'ID': r_name,
                  'type': 'Entity',
                  'subtype': 'Directory' if r_directory else 'File',
                  'label': r_name}
        if r_name in file2package:
            vertex['package'] = file2package[r_name].name
        if r_name in inputs_outputs:
            out_, in_ = inputs_outputs[r_name]
            if in_:
                vertex['input'] = True
            if out_:
                vertex['output'] = True
        vertices.append(vertex)
    cur.close()

    # Read file opens
    cur = conn.cursor()
    rows = cur.execute(
        '''
        SELECT id, name, timestamp, mode, process
        FROM opened_files;
        ''')
    for r_id, r_name, r_timestamp, r_mode, r_process in rows:
        # Create file access activity
        vertices.append({'ID': 'access%d' % r_id,
                         'type': 'Activity',
                         'subtype': ('FileWrites' if r_mode & FILE_WRITE
                                     else 'FileReads'),
                         'label': ("File write: %s" if r_mode & FILE_WRITE
                                   else "File read: %s") % r_name,
                         'date': r_timestamp,
                         'mode': r_mode})
        # Process -> access
        edges.append({'ID': 'proc_access%d' % r_id,
                      'type': 'PerformsFileAccess',
                      'label': "Process does file access",
                      'sourceID': 'process%d' % r_process,
                      'targetID': 'access%d' % r_id})
        # Access -> file
        edges.append({'ID': 'access_file%d' % r_id,
                      'type': 'AccessFile',
                      'label': "File access touches",
                      'sourceID': 'access%d' % r_id,
                      'targetID': r_name})
    cur.close()

    # Read executions
    cur = conn.cursor()
    rows = cur.execute(
        '''
        SELECT id, name, timestamp, process, argv
        FROM executed_files;
        ''')
    for r_id, r_name, r_timestamp, r_process, r_argv in rows:
        argv = r_argv.split('\0')
        if not argv[-1]:
            argv = argv[:-1]
        cmdline = ' '.join(shell_escape(a) for a in argv)

        # Create execution activity
        vertices.append({'ID': 'exec%d' % r_id,
                         'type': 'Activity',
                         'subtype': 'ProcessExecutes',
                         'label': "Process #%d executes file %s" % (r_process,
                                                                    r_name),
                         'date': r_timestamp,
                         'cmdline': cmdline,
                         'process': r_process,
                         'file': r_name})
        # Process -> execution
        edges.append({'ID': 'proc_exec%d' % r_id,
                      'type': 'ProcessExecution',
                      'label': "Process does exec()",
                      'sourceID': 'process%d' % r_process,
                      'targetID': 'exec%d' % r_id})
        # Execution -> file
        edges.append({'ID': 'exec_file%d' % r_id,
                      'type': 'ExecutionFile',
                      'label': "Execute file",
                      'sourceID': 'exec%d' % r_id,
                      'targetID': r_name})
    cur.close()

    # Write the file from the created lists
    with target.open('w', encoding='utf-8', newline='\n') as out:
        out.write('<?xml version="1.0"?>\n\n'
                  '<provenancedata xmlns:xsi="http://www.w3.org/2001/XMLSchema'
                  '-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">\n'
                  '  <vertices>\n')

        for vertex in vertices:
            if 'date' not in vertex:
                vertex['date'] = '-1'
            tags = {}
            for k in ('ID', 'type', 'label', 'date'):
                if k not in vertex:
                    vertex.update(tags)
                    raise ValueError("Vertex is missing tag '%s': %r" % (
                                     k, vertex))
                tags[k] = vertex.pop(k)
            out.write('    <vertex>\n      ' +
                      '\n      '.join('<{k}>{v}</{k}>'.format(k=k,
                                                              v=xml_escape(v))
                                      for k, v in tags.items()))
            if vertex:
                out.write('\n      <attributes>\n')
                for k, v in vertex.items():
                    out.write('        <attribute>\n'
                              '          <name>{k}</name>\n'
                              '          <value>{v}</value>\n'
                              '        </attribute>\n'
                              .format(k=xml_escape(k),
                                      v=xml_escape(v)))
                out.write('      </attributes>')
            out.write('\n    </vertex>\n')
        out.write('  </vertices>\n'
                  '  <edges>\n')
        for edge in edges:
            for k in ('ID', 'type', 'label', 'sourceID', 'targetID'):
                if k not in edge:
                    raise ValueError("Edge is missing tag '%s': %r" % (
                                     k, edge))
            if 'value' not in edge:
                edge['value'] = ''
            out.write('    <edge>\n      ' +
                      '\n      '.join('<{k}>{v}</{k}>'.format(k=k,
                                                              v=xml_escape(v))
                                      for k, v in edge.items()) +
                      '\n    </edge>\n')
        out.write('  </edges>\n'
                  '</provenancedata>\n')

    conn.close()
Exemplo n.º 32
0
def run_from_vistrails():
    setup_logging('REPROUNZIP-VISTRAILS', logging.INFO)

    parser = argparse.ArgumentParser()
    parser.add_argument('unpacker')
    parser.add_argument('directory')
    parser.add_argument('run')
    parser.add_argument('--input-file', action='append', default=[])
    parser.add_argument('--output-file', action='append', default=[])
    parser.add_argument('--cmdline', action='store')

    args = parser.parse_args()

    runs, packages, other_files = load_config(
            Path(args.directory) / 'config.yml',
            canonical=True)
    run = runs[int(args.run)]

    python = sys.executable
    rpuz = [python, '-m', 'reprounzip.main', args.unpacker]

    os.environ['REPROUNZIP_NON_INTERACTIVE'] = 'y'

    def cmd(lst, add=None):
        if add:
            logging.info("cmd: %s %s", ' '.join(lst), add)
            string = ' '.join(shell_escape(a) for a in (rpuz + lst))
            string += ' ' + add
            subprocess.check_call(string, shell=True,
                                  cwd=args.directory)
        else:
            logging.info("cmd: %s", ' '.join(lst))
            subprocess.check_call(rpuz + lst,
                                  cwd=args.directory)

    logging.info("reprounzip-vistrails calling reprounzip; dir=%s",
                 args.directory)

    # Parses input files from the command-line
    upload_command = []
    seen_input_names = set()
    for input_file in args.input_file:
        input_name, filename = input_file.split(':', 1)
        upload_command.append('%s:%s' % (filename, input_name))
        seen_input_names.add(input_name)

    # Resets the input files that were not given
    for input_name in run['input_files']:
        if input_name not in seen_input_names:
            upload_command.append(':%s' % input_name)

    # Runs the command
    cmd(['upload', '.'] + upload_command)

    # Runs the experiment
    if args.cmdline:
        cmd(['run', '.', '--cmdline'], add=args.cmdline)
    else:
        cmd(['run', '.'])

    # Gets output files
    for output_file in args.output_file:
        output_name, filename = output_file.split(':', 1)
        cmd(['download', '.',
             '%s:%s' % (output_name, filename)])