def test_groupby_regex_with_combinatorial_option(tmp_path): assert build_combinations( {"groupby": "regex", "files_x": ["y.x", "z.x"], "files_a": ["{}/data_0.a".format(tmp_path), "{}/data_1.a".format(tmp_path)], "files_b": ["{}/data_0.b".format(tmp_path), "{}/data_1.b".format(tmp_path)], "files_a_regex": r"data_(\d+).a", "files_b_regex": r"data_(\d+).b"}) == \ [ {'files_a': "{}/data_0.a".format(tmp_path), 'files_b': "{}/data_0.b".format(tmp_path), 'files_x': "y.x", 'name': "0"}, {'files_a': "{}/data_0.a".format(tmp_path), 'files_b': "{}/data_0.b".format(tmp_path), 'files_x': "z.x", 'name': "0"}, {'files_a': "{}/data_1.a".format(tmp_path), 'files_b': "{}/data_1.b".format(tmp_path), 'files_x': "y.x", 'name': "1"}, {'files_a': "{}/data_1.a".format(tmp_path), 'files_b': "{}/data_1.b".format(tmp_path), 'files_x': "z.x", 'name': "1"}, ]
def test_groupby_design_with_combinatorial_option(tmp_path): design_file = tmp_path / "design.tsv" with open(design_file, "w") as outf: outf.write("label\tc_option1\tc_option2\n") outf.write("label1\tvalue1\tvalueA\n") outf.write("label2\tvalue1\tvalueB\n") outf.write("label3\tvalue2\tvalueA\n") outf.write("label4\tvalue2\tvalueB\n") assert build_combinations( {"groupby": "file", "label": "label", "input": design_file, "option1": "c_option1", "option2": "c_option2", "option3": ["valueX", "valueY"]}) == \ [{'option1': 'value1', 'option2': 'valueA', "name": "label1", "option3": "valueX"}, {'option1': 'value1', 'option2': 'valueA', "name": "label1", "option3": "valueY"}, {'option1': 'value1', 'option2': 'valueB', "name": "label2", "option3": "valueX"}, {'option1': 'value1', 'option2': 'valueB', "name": "label2", "option3": "valueY"}, {'option1': 'value2', 'option2': 'valueA', "name": "label3", "option3": "valueX"}, {'option1': 'value2', 'option2': 'valueA', "name": "label3", "option3": "valueY"}, {'option1': 'value2', 'option2': 'valueB', "name": "label4", "option3": "valueX"}, {'option1': 'value2', 'option2': 'valueB', "name": "label4", "option3": "valueY"}]
def test_two_options(): assert build_combinations( {'option1': ["value1", "value2"], 'option2': ["valueA", "valueB"]}) == \ [{'option1': 'value1', 'option2': 'valueA'}, {'option1': 'value1', 'option2': 'valueB'}, {'option1': 'value2', 'option2': 'valueA'}, {'option1': 'value2', 'option2': 'valueB'}]
def test_groupby_regex_filters_when_data_point_missing(tmp_path): assert build_combinations( {"groupby": "regex", "files_a": ["{}/data_0.a".format(tmp_path)], "files_b": ["{}/data_0.b".format(tmp_path), "{}/data_1.b".format(tmp_path)], "files_a_regex": r"data_(\d+).a", "files_b_regex": r"data_(\d+).b"}) == \ [{'files_a': "{}/data_0.a".format(tmp_path), 'files_b': "{}/data_0.b".format(tmp_path), 'name': "0"}]
def test_groupby_named_regex(tmp_path): assert build_combinations( {"groupby": "regex", "files_a": ["{}/data_0.a".format(tmp_path), "{}/data_1.a".format(tmp_path)], "files_b": ["{}/data_0.b".format(tmp_path), "{}/data_1.b".format(tmp_path)], "files_a_regex": r"data_(?P<key1>\d+).a", "files_b_regex": r"data_(?P<key1>\d+).b"}) == \ [{'files_a': "{}/data_0.a".format(tmp_path), 'files_b': "{}/data_0.b".format(tmp_path), 'name': "0"}, {'files_a': "{}/data_1.a".format(tmp_path), 'files_b': "{}/data_1.b".format(tmp_path), 'name': "1"}]
def main(argv=sys.argv): parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option( "-n", "--dry-run", dest="dry_run", action="store_true", help="only show what will be done, don't do it [%default]") parser.add_option("-l", "--link", dest="link", action="store_true", help="link instead of rename [%default]") parser.set_defaults(dry_run=False, link=False) (options, args) = E.start(parser, argv) config = P.get_parameters("benchmark.yml") old_data, new_data = [], [] for old_info in glob.glob("*.dir/tool.info"): old_dir, old_file = os.path.split(old_info) old_info = toolkit.read_data(old_info) old_data.append((old_dir, old_info)) tool_functions = workflow.build_tool_functions(map_tool_to_runner, config) config_files = workflow.expand_globs(config["input"]) input_combos = workflow.build_combinations(config_files) map_property_to_dir = collections.defaultdict(list) for toolf, input_files in itertools.product(tool_functions, input_combos): # create a copy of the task function and give it its unique name # by mangling it with the input_files taskf = copy.copy(toolf) taskf.register_input(input_files) result_dir = os.path.basename(os.path.join(taskf.__name__ + ".dir")) new_data.append((result_dir, taskf)) for a, x, y in IOTools.nested_iter(taskf.input_files): map_property_to_dir[(x, y)].append(result_dir) map_property_to_dir[("name", taskf.name)].append(result_dir) for x, y in list(taskf._option_dict.items()): map_property_to_dir[(x, y)].append(result_dir) # match by input_files options.stdout.write("\t".join(("old", "new", "matching")) + "\n") for old_dir, old_info in old_data: targets = [] for a, x, y in IOTools.nested_iter(old_info["input_files"]): if (x, y) in map_property_to_dir: targets.extend(map_property_to_dir[(x, y)]) for x, y in list(old_info.items()): try: targets.extend(map_property_to_dir[(x, y)]) except TypeError: pass counts = collections.Counter(targets) max_count = max(counts.values()) max_count_items = [ x for x, y in list(counts.items()) if y == max_count ] if len(max_count_items) > 1: E.warn("multiple matches for {}, ignored".format(old_dir)) continue new_dir = max_count_items[0] options.stdout.write("\t".join(map(str, (old_dir, new_dir, max_count))) + "\n") if os.path.exists(new_dir): raise ValueError("directory {} already exists".format(new_dir)) if options.dry_run: continue if options.link: os.symlink(old_dir, new_dir) else: os.rename(old_dir, new_dir) E.stop()
def test_one_option(): assert build_combinations( {"option1": ["value1", "value2"]}) == \ [{'option1': 'value1'}, {'option1': 'value2'}]
def test_complex_values(): assert build_combinations( {'option1': [{"value1": [1, 2, 3]}, {"value2": [4, 5, 6]}]}) == \ [{'option1': {'value1': [1, 2, 3]}}, {'option1': {'value2': [4, 5, 6]}}]