def get_mismatches(self, reference_metadata, target_metadata, traverse_sub_workflows=False): """ Takes two metadata files (both belonging to a common workflow execution), iterates through the outputs of their task, downloads the objects if not already exist in the working directory, compares the corresponding files, and returns the files that do not match. """ def record_compare_result(match, reference, target): if not match: if call not in mismatches: mismatches[call] = [] mismatches[call].append([reference, target]) # First we define a method that takes a list # of a task outputs, and keeps only those that # are files and their extension match the # file types that we want to compare # (e.g., filter only VCF files). filter_method = FilterBasedOnExtensions( self.filetypes_to_compare.keys()).filter # Then we create two instances of the Metadata # class, one for each metadata file, and we # invoke the `get_outputs` method which traverses # the outputs of task, and returns those filtered # by the above-defined filter. ref_output_files = Metadata(reference_metadata).get_outputs( traverse_sub_workflows, filter_method) test_output_files = Metadata(target_metadata).get_outputs( traverse_sub_workflows, filter_method) mismatches = {} i = 0 r_t = ref_output_files.keys() - test_output_files.keys() t_r = test_output_files.keys() - ref_output_files.keys() if r_t or t_r: print(f"\n{COLOR_BLINKING}WARNING!{COLOR_ENDC}") print(f"The reference and test metadata files differ " f"in their outputs; " f"{COLOR_ULINE}the differences will be skipped.{COLOR_ENDC}") if r_t: print(f"\t{len(r_t)}/{len(ref_output_files.keys())} " f"outputs of the reference are not in the test:") for x in r_t: print(f"\t\t- {x}") if t_r: print(f"\t{len(t_r)}/{len(test_output_files.keys())} " f"outputs of the test are not in the reference:") for x in t_r: print(f"\t\t- {x}") print("\n") [ref_output_files.pop(x) for x in r_t] print(f"{COLOR_YELLOW}Comparing {len(ref_output_files)} " f"files that are common between reference and test " f"metadata files and their respective task is executed " f"successfully.{COLOR_ENDC}") for call, ref_outputs in ref_output_files.items(): i += 1 matched = True print(f"Comparing\t{i}/{len(ref_output_files)}\t{call} ... ", end="") for extension, objs in ref_outputs.items(): if len(objs) != len(test_output_files[call][extension]): record_compare_result(False, objs, test_output_files[call][extension]) matched = False continue for idx, obj in enumerate(objs): equals, x, y = \ self.filetypes_to_compare[extension].equals( obj, test_output_files[call][extension][idx]) record_compare_result(equals, x, y) if not equals: matched = False if matched: print(f"{COLOR_GREEN}match{COLOR_ENDC}") else: print(f"{COLOR_RED}mismatch{COLOR_ENDC}") return mismatches