def clear_product_blocks(blocks): blocks_samples = list(blocks.keys()) blocks_samples.sort(reverse=True) iter = 0 total_blocks = make_block_stats(blocks)["total"] for samples_num in blocks_samples: patterns_numbers = list(blocks[samples_num].keys()) patterns_numbers.sort(reverse=True) for patterns_num in patterns_numbers: good_blocks = [] for block in blocks[samples_num][patterns_num]: iter += 1 if iter % LOG_ITERATION == 0: print("Product blocks checked {}/{}".format( iter, total_blocks)) block_good = True prod_finder = ProductFinder(blocks, samples_num, patterns_num, equal=True) products = prod_finder.find_products() for product in products: if len(product.product_list) == 1: continue if not block_good: break for product_block in product: if product_block == block: block_good = False break if block_good: good_blocks.append(block) blocks[samples_num][patterns_num] = good_blocks return clear_empty_block_types(blocks)
def main(): args = read_args() ncs, msg = find_ncs(args.ncs, script_path) if not ncs: print(msg) exit() result, blocks, ncs_name, deuterated = read_blocks(args.elb_file) if result: exit() product_finder = ProductFinder(blocks, args.samples, args.min_patterns) products = product_finder.find_products() schemes_total = sum([len(product) for product in products]) blocks_total = {} print("{} products found".format(schemes_total)) output = "[NCS = {}]\n".format(ncs_name) output += "[Deuterated = {}]\n".format(deuterated) for product in products: for scheme in product: scheme.simplify() blocks_total = add_block(blocks_total, scheme.samples, len(scheme.patterns)) output += scheme.full_str() output += "\n" stats_total = make_block_stats(blocks_total) if args.output: with open(args.output, mode="w") as f: f.write(output) print("{} blocks were written to the file \'{}\'".format( schemes_total, args.output)) else: print(output) print("Statistics of generated blocks:") print(stats_total["str"])
def clear_redundant_blocks(blocks): iter = 0 total_blocks = make_block_stats(blocks)["total"] for samples_num in blocks: patterns_numbers = list(blocks[samples_num].keys()) patterns_numbers.sort(reverse=True) good_blocks = [] for pattern_num in patterns_numbers: new_block_list = [] for block in blocks[samples_num][pattern_num]: iter += 1 if iter % LOG_ITERATION == 0: print("Redundant blocks checked {}/{}".format( iter, total_blocks)) block_good = True for good_block in good_blocks: if block.is_subset_of(good_block.simplified): block_good = False break if block_good: good_blocks.append(block) new_block_list.append(block) if not new_block_list: blocks[samples_num].pop(pattern_num) else: blocks[samples_num][pattern_num] = new_block_list return clear_empty_block_types(blocks)
def main(): parser = argparse.ArgumentParser() parser.add_argument("files", help='Specify file(s) for which to calculate stats', type=str, nargs='+') parser.add_argument("--total", "-t", help='Print only total number of blocks', action="store_true") args = parser.parse_args() blocks_total = {} for file in args.files: blocks = {} if not os.path.isfile(file): print("Error! File '{}' not found!".format(file)) return output = "{:>25}:".format(file) with open(file, "r") as f: for line in f: result = Constants.elb_re.match(line) if result: blocks = add_block(blocks, int(result.group(1)), int(result.group(2))) blocks_total = add_block(blocks_total, int(result.group(1)), int(result.group(2))) stats = make_block_stats(blocks) if args.total: output += " {:>7}".format(int(stats["total"])) else: output += "\n" + stats["str"] print(output) stats_total = make_block_stats(blocks_total) if len(args.files) > 1: output = "{:>25}:".format("TOTAL FOR {} FILES".format(len(args.files))) if args.total: output += " {:>7}".format(int(stats_total["total"])) else: output += "\n" + stats_total["str"] print(output)
def clear_blocks(blocks, flags): (identical_flag, redundant_flag, product_flag) = flags print(make_block_stats(blocks)["str"] + "\n") if identical_flag: print("Clearing identical blocks...") blocks = clear_identical_blocks(blocks) print("Identical blocks cleared...") print(make_block_stats(blocks)["str"] + "\n") if redundant_flag: print("Clearing redundant blocks...") blocks = clear_redundant_blocks(blocks) print("Redundant blocks cleared...") print(make_block_stats(blocks)["str"] + "\n") if product_flag: print("Clearing product blocks...") blocks = clear_product_blocks(blocks) print("Product blocks cleared...") print(make_block_stats(blocks)["str"] + "\n") return blocks
def clear_identical_blocks(blocks): iter = 0 total_blocks = make_block_stats(blocks)["total"] for samples_num in blocks: patterns_numbers = list(blocks[samples_num].keys()) patterns_numbers.sort(reverse=True) for pattern_num in patterns_numbers: new_block_dict = {} for block_one in blocks[samples_num][pattern_num]: iter += 1 if iter % LOG_ITERATION == 0: print("Identical blocks checked {}/{}".format( iter, total_blocks)) new_block_dict[block_one] = block_one if not new_block_dict: blocks[samples_num].pop(pattern_num) else: blocks[samples_num][pattern_num] = list( new_block_dict.values()) return clear_empty_block_types(blocks)
def filterT_blocks(blocks, min_t_free, verbose_output): iter = 0 total_blocks = make_block_stats(blocks)["total"] filtered_blocks = 0 print( "Filter blocks with less then {} patterns that have no \"T\" labeling". format(min_t_free)) for samples_num in blocks: patterns_numbers = list(blocks[samples_num].keys()) patterns_numbers.sort(reverse=True) for pattern_num in patterns_numbers: new_block_list = [] for block in blocks[samples_num][pattern_num]: iter += 1 if iter % LOG_ITERATION == 0: print("filterT blocks checked {}/{}".format( iter, total_blocks)) have_t, have_no_t = count_typeT(block) output = "have_t = {:2} have_no_t = {:2}, ".format( have_t, have_no_t) if have_no_t >= min_t_free: output += "APPEND block" new_block_list.append(block) filtered_blocks += 1 else: output += "SKIP block" if verbose_output: print(block.full_str()) print(output + "\n") if not new_block_list: blocks[samples_num].pop(pattern_num) else: blocks[samples_num][pattern_num] = new_block_list print("Filtered {}/{} blocks by T labeling, min_t_free = {}".format( filtered_blocks, total_blocks, min_t_free)) return clear_empty_block_types(blocks)
def main(): parser = argparse.ArgumentParser() parser.add_argument("files", help='Specify file(s) for which to calculate stats', type=str, nargs='+') parser.add_argument("--total", "-t", help='Print only total number of blocks', action="store_true") parser.add_argument("--sv", "-s", help='Print SV min andd max values', action="store_true") args = parser.parse_args() blocks_total = {} sv_min = dict() sv_max = dict() for file in args.files: blocks = {} if not os.path.isfile(file): print("Error! File '{}' not found!".format(file)) return output = "{:>25}:".format(file) with open(file, "r") as f: for line in f: result = Constants.elb_re.match(line) if result: samples = int(result.group('samples')) patterns = int(result.group('patterns')) #print("{:>2} {:>2}".format(samples, patterns)) blocks = add_block(blocks, samples, patterns) blocks_total = add_block(blocks_total, samples, patterns) continue if args.sv: sv_re_match = Constants.sv_re.match(line) if sv_re_match: sv = [int(i) for i in sv_re_match.group('sv').split()] if samples in sv_min.keys(): sv_min[samples] = list( map(min, zip(sv_min[samples], sv))) sv_max[samples] = list( map(max, zip(sv_max[samples], sv))) else: #print(sv) #print(sv_min.keys()) sv_min[samples] = sv sv_max[samples] = sv stats = make_block_stats(blocks) if args.total: output += " {:>7}".format(int(stats["total"])) else: output += "\n" + stats["str"] print(output) stats_total = make_block_stats(blocks_total) if len(args.files) > 1: output = "{:>25}:".format("TOTAL FOR {} FILES".format(len(args.files))) if args.total: output += " {:>7}".format(int(stats_total["total"])) else: output += "\n" + stats_total["str"] print(output) if args.sv: print("Min and max values for SV vectors:") for s in sorted(sv_min.keys()): min_out = "SV_MIN ({} samples): ".format(s) max_out = "SV_MAX ({} samples): ".format(s) #print(sv_min) #print(sv_max) for mins in sv_min[s]: min_out += "{:>2} ".format(mins) for maxs in sv_max[s]: max_out += "{:>2} ".format(maxs) print(min_out) print(max_out)