def canonicalize_config(runs, packages, other_files, additional_patterns, sort_packages): """Expands ``additional_patterns`` from the configuration file. """ add_files = expand_patterns(additional_patterns) if sort_packages: add_files, add_packages = identify_packages(add_files) else: add_packages = [] other_files, packages = merge_files(add_files, add_packages, other_files, packages) return runs, packages, other_files
def canonicalize_config(packages, other_files, additional_patterns, sort_packages): """Expands ``additional_patterns`` from the configuration file. """ add_files = expand_patterns(additional_patterns) if sort_packages: add_files, add_packages = identify_packages(add_files) else: add_packages = [] other_files, packages = merge_files(add_files, add_packages, other_files, packages) return packages, other_files
def canonicalize_config(packages, other_files, additional_patterns, sort_packages): """Expands ``additional_patterns`` from the configuration file. """ if additional_patterns: add_files = expand_patterns(additional_patterns) logger.info("Found %d files from expanding additional_patterns...", len(add_files)) if add_files: if sort_packages: add_files, add_packages = identify_packages(add_files) else: add_packages = [] other_files, packages = combine_files(add_files, add_packages, other_files, packages) return packages, other_files
def write_configuration(directory, sort_packages, find_inputs_outputs, overwrite=False): """Writes the canonical YAML configuration file. """ database = directory / 'trace.sqlite3' assert database.is_file() conn = sqlite3.connect(str(database)) # connect() only accepts str conn.row_factory = sqlite3.Row # Reads info from database files, inputs, outputs = get_files(conn) # Identifies which file comes from which package if sort_packages: files, packages = identify_packages(files) else: packages = [] # Writes configuration file config = directory / 'config.yml' distribution = [distro.id(), distro.version()] cur = conn.cursor() if overwrite or not config.exists(): runs = [] # This gets all the top-level processes (p.parent ISNULL) and the first # executed file for that process (sorting by ids, which are # chronological) executions = cur.execute(''' SELECT e.name, e.argv, e.envp, e.workingdir, p.timestamp, p.exit_timestamp, p.exitcode FROM processes p JOIN executed_files e ON e.id=( SELECT id FROM executed_files e2 WHERE e2.process=p.id ORDER BY e2.id LIMIT 1 ) WHERE p.parent ISNULL; ''') else: # Loads in previous config runs, oldpkgs, oldfiles = load_config(config, canonical=False, File=TracedFile) # Same query as previous block but only gets last process executions = cur.execute( ''' SELECT e.name, e.argv, e.envp, e.workingdir, p.timestamp, p.exit_timestamp, p.exitcode FROM processes p JOIN executed_files e ON e.id=( SELECT id FROM executed_files e2 WHERE e2.process=p.id ORDER BY e2.id LIMIT 1 ) WHERE p.parent ISNULL ORDER BY p.id LIMIT 2147483647 OFFSET ?; ''', (len(runs), )) for (r_name, r_argv, r_envp, r_workingdir, r_start, r_end, r_exitcode) in executions: # Decodes command-line argv = r_argv.split('\0') if not argv[-1]: argv = argv[:-1] # Decodes environment envp = r_envp.split('\0') if not envp[-1]: envp = envp[:-1] environ = dict(v.split('=', 1) for v in envp) run = { 'id': "run%d" % len(runs), 'binary': r_name, 'argv': argv, 'workingdir': str(Path(r_workingdir)), 'architecture': platform.machine().lower(), 'distribution': distribution, 'hostname': platform.node(), 'system': [platform.system(), platform.release()], 'environ': environ, 'uid': os.getuid(), 'gid': os.getgid() } if r_exitcode & 0x0100: run['signal'] = r_exitcode & 0xFF else: run['exitcode'] = r_exitcode & 0xFF if r_end is not None: run['walltime'] = (r_end - r_start) / 1.0E9 # ns to s runs.append(run) cur.close() conn.close() if find_inputs_outputs: inputs_outputs = compile_inputs_outputs(runs, inputs, outputs) else: inputs_outputs = {} save_config(config, runs, packages, files, reprozip_version, inputs_outputs) print("Configuration file written in {0!s}".format(config)) print("Edit that file then run the packer -- " "use 'reprozip pack -h' for help")
def write_configuration(directory, sort_packages, find_inputs_outputs, overwrite=False): """Writes the canonical YAML configuration file. """ database = directory / 'trace.sqlite3' if PY3: # On PY3, connect() only accepts unicode conn = sqlite3.connect(str(database)) else: conn = sqlite3.connect(database.path) conn.row_factory = sqlite3.Row # Reads info from database files, inputs, outputs = get_files(conn) # Identifies which file comes from which package if sort_packages: files, packages = identify_packages(files) else: packages = [] # Writes configuration file config = directory / 'config.yml' distribution = platform.linux_distribution()[0:2] cur = conn.cursor() if overwrite or not config.exists(): runs = [] # This gets all the top-level processes (p.parent ISNULL) and the first # executed file for that process (sorting by ids, which are # chronological) executions = cur.execute( ''' SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode FROM processes p JOIN executed_files e ON e.id=( SELECT id FROM executed_files e2 WHERE e2.process=p.id ORDER BY e2.id LIMIT 1 ) WHERE p.parent ISNULL; ''') else: # Loads in previous config runs, oldpkgs, oldfiles = load_config(config, canonical=False, File=TracedFile) # Same query as previous block but only gets last process executions = cur.execute( ''' SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode FROM processes p JOIN executed_files e ON e.id=( SELECT id FROM executed_files e2 WHERE e2.process=p.id ORDER BY e2.id LIMIT 1 ) WHERE p.parent ISNULL ORDER BY p.id DESC LIMIT 1; ''') inputs = inputs[-1:] outputs = outputs[-1:] files, packages = merge_files(files, packages, oldfiles, oldpkgs) for r_name, r_argv, r_envp, r_workingdir, r_exitcode in executions: # Decodes command-line argv = r_argv.split('\0') if not argv[-1]: argv = argv[:-1] # Decodes environment envp = r_envp.split('\0') if not envp[-1]: envp = envp[:-1] environ = dict(v.split('=', 1) for v in envp) runs.append({'binary': r_name, 'argv': argv, 'workingdir': unicode_(Path(r_workingdir)), 'architecture': platform.machine().lower(), 'distribution': distribution, 'hostname': platform.node(), 'system': [platform.system(), platform.release()], 'environ': environ, 'uid': os.getuid(), 'gid': os.getgid(), 'signal' if r_exitcode & 0x0100 else 'exitcode': r_exitcode & 0xFF}) cur.close() conn.close() if find_inputs_outputs: inputs_outputs = compile_inputs_outputs(runs, inputs, outputs) else: inputs_outputs = {} save_config(config, runs, packages, files, reprozip_version, inputs_outputs) print("Configuration file written in {0!s}".format(config)) print("Edit that file then run the packer -- " "use 'reprozip pack -h' for help")
def write_configuration(directory, sort_packages, overwrite=False): """Writes the canonical YAML configuration file. """ database = directory / 'trace.sqlite3' if PY3: # On PY3, connect() only accepts unicode conn = sqlite3.connect(str(database)) else: conn = sqlite3.connect(database.path) conn.row_factory = sqlite3.Row # Reads info from database files, inputs, outputs = get_files(conn) # Identifies which file comes from which package if sort_packages: files, packages = identify_packages(files) else: packages = [] # Makes sure all the directories used as working directories are packed # (they already do if files from them are used, but empty directories do # not get packed inside a tar archive) files.update(d for d in list_directories(conn) if d.path.is_dir()) # Writes configuration file config = directory / 'config.yml' distribution = platform.linux_distribution()[0:2] oldconfig = not overwrite and config.exists() cur = conn.cursor() if oldconfig: # Loads in previous config runs, oldpkgs, oldfiles, patterns = load_config(config, canonical=False, File=TracedFile) # Here, additional patterns are discarded executions = cur.execute( ''' SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode FROM executed_files e INNER JOIN processes p on p.id=e.id WHERE p.parent ISNULL ORDER BY p.id DESC LIMIT 1; ''') inputs = inputs[-1:] files, packages = merge_files(files, packages, oldfiles, oldpkgs) else: runs = [] executions = cur.execute( ''' SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode FROM executed_files e INNER JOIN processes p on p.id=e.id WHERE p.parent ISNULL ORDER BY p.id; ''') for ((r_name, r_argv, r_envp, r_workingdir, r_exitcode), input_files, output_files) in izip(executions, inputs, outputs): # Decodes command-line argv = r_argv.split('\0') if not argv[-1]: argv = argv[:-1] # Decodes environment envp = r_envp.split('\0') if not envp[-1]: envp = envp[:-1] environ = dict(v.split('=', 1) for v in envp) # Gets files from command-line command_line_files = {} for i, arg in enumerate(argv): p = Path(r_workingdir, arg).resolve() if p.is_file(): command_line_files[p] = i input_files_on_cmdline = sum(1 for in_file in input_files if in_file in command_line_files) output_files_on_cmdline = sum(1 for out_file in input_files if out_file in command_line_files) # Labels input files input_files_dict = {} for in_file in input_files: # If file is on the command-line if in_file in command_line_files: if input_files_on_cmdline > 1: label = "arg_%d" % command_line_files[in_file] else: label = "arg" # Else, use file's name else: label = in_file.unicodename # Make labels unique uniquelabel = label i = 1 while uniquelabel in input_files_dict: i += 1 uniquelabel = '%s_%d' % (label, i) input_files_dict[uniquelabel] = str(in_file) # TODO : Note that right now, we keep as input files the ones that # don't appear on the command-line # Labels output files output_files_dict = {} for out_file in output_files: # If file is on the command-line if out_file in command_line_files: if output_files_on_cmdline > 1: label = "arg_%d" % command_line_files[out_file] else: label = "arg" # Else, use file's name else: label = out_file.unicodename # Make labels unique uniquelabel = label i = 1 while uniquelabel in output_files_dict: i += 1 uniquelabel = '%s_%d' % (label, i) output_files_dict[uniquelabel] = str(out_file) # TODO : Note that right now, we keep as output files the ones that # don't appear on the command-line runs.append({'binary': r_name, 'argv': argv, 'workingdir': Path(r_workingdir).path, 'architecture': platform.machine().lower(), 'distribution': distribution, 'hostname': platform.node(), 'system': [platform.system(), platform.release()], 'environ': environ, 'uid': os.getuid(), 'gid': os.getgid(), 'signal' if r_exitcode & 0x0100 else 'exitcode': r_exitcode & 0xFF, 'input_files': input_files_dict, 'output_files': output_files_dict}) cur.close() conn.close() save_config(config, runs, packages, files, reprozip_version) print("Configuration file written in {0!s}".format(config)) print("Edit that file then run the packer -- " "use 'reprozip pack -h' for help")
def write_configuration(directory, sort_packages, overwrite=False): """Writes the canonical YAML configuration file. """ database = directory / 'trace.sqlite3' if PY3: # On PY3, connect() only accepts unicode conn = sqlite3.connect(str(database)) else: conn = sqlite3.connect(database.path) conn.row_factory = sqlite3.Row # Reads info from database files, inputs, outputs = get_files(conn) # Identifies which file comes from which package if sort_packages: files, packages = identify_packages(files) else: packages = [] # Makes sure all the directories used as working directories are packed # (they already do if files from them are used, but empty directories do # not get packed inside a tar archive) files.update(d for d in list_directories(conn) if d.path.is_dir()) # Writes configuration file config = directory / 'config.yml' distribution = platform.linux_distribution()[0:2] oldconfig = not overwrite and config.exists() cur = conn.cursor() if not oldconfig: runs = [] # This gets all the top-level processes (p.parent ISNULL) and the first # executed file for that process (sorting by ids, which are # chronological) executions = cur.execute( ''' SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode FROM processes p JOIN executed_files e ON e.id=( SELECT id FROM executed_files e2 WHERE e2.process=p.id ORDER BY e2.id LIMIT 1 ) WHERE p.parent ISNULL; ''') else: # Loads in previous config runs, oldpkgs, oldfiles, patterns = load_config(config, canonical=False, File=TracedFile) # Here, additional patterns are discarded # Same query as previous block but only gets last process executions = cur.execute( ''' SELECT e.name, e.argv, e.envp, e.workingdir, p.exitcode FROM processes p JOIN executed_files e ON e.id=( SELECT id FROM executed_files e2 WHERE e2.process=p.id ORDER BY e2.id LIMIT 1 ) WHERE p.parent ISNULL ORDER BY p.id DESC LIMIT 1; ''') inputs = inputs[-1:] outputs = outputs[-1:] files, packages = merge_files(files, packages, oldfiles, oldpkgs) for ((r_name, r_argv, r_envp, r_workingdir, r_exitcode), input_files, output_files) in izip(executions, inputs, outputs): # Decodes command-line argv = r_argv.split('\0') if not argv[-1]: argv = argv[:-1] # Decodes environment envp = r_envp.split('\0') if not envp[-1]: envp = envp[:-1] environ = dict(v.split('=', 1) for v in envp) # Gets files from command-line command_line_files = {} for i, arg in enumerate(argv): p = Path(r_workingdir, arg).resolve() if p.is_file(): command_line_files[p] = i input_files_on_cmdline = sum(1 for in_file in input_files if in_file in command_line_files) output_files_on_cmdline = sum(1 for out_file in output_files if out_file in command_line_files) # Labels input files input_files_dict = {} for in_file in input_files: # If file is on the command-line if in_file in command_line_files: if input_files_on_cmdline > 1: label = "arg_%d" % command_line_files[in_file] else: label = "arg" # Else, use file's name else: label = in_file.unicodename # Make labels unique uniquelabel = label i = 1 while uniquelabel in input_files_dict: i += 1 uniquelabel = '%s_%d' % (label, i) input_files_dict[uniquelabel] = str(in_file) # TODO : Note that right now, we keep as input files the ones that # don't appear on the command-line # Labels output files output_files_dict = {} for out_file in output_files: # If file is on the command-line if out_file in command_line_files: if output_files_on_cmdline > 1: label = "arg_%d" % command_line_files[out_file] else: label = "arg" # Else, use file's name else: label = out_file.unicodename # Make labels unique uniquelabel = label i = 1 while uniquelabel in output_files_dict: i += 1 uniquelabel = '%s_%d' % (label, i) output_files_dict[uniquelabel] = str(out_file) # TODO : Note that right now, we keep as output files the ones that # don't appear on the command-line runs.append({'binary': r_name, 'argv': argv, 'workingdir': Path(r_workingdir).path, 'architecture': platform.machine().lower(), 'distribution': distribution, 'hostname': platform.node(), 'system': [platform.system(), platform.release()], 'environ': environ, 'uid': os.getuid(), 'gid': os.getgid(), 'signal' if r_exitcode & 0x0100 else 'exitcode': r_exitcode & 0xFF, 'input_files': input_files_dict, 'output_files': output_files_dict}) cur.close() conn.close() save_config(config, runs, packages, files, reprozip_version) print("Configuration file written in {0!s}".format(config)) print("Edit that file then run the packer -- " "use 'reprozip pack -h' for help")