def write_smc(filename, smc): """Writes a SMC file""" out = argweaver.open_stream(filename, "w") for item in smc: if item["tag"] == "NAMES": util.print_row("NAMES", *item["names"], out=out) elif item["tag"] == "REGION": util.print_row("REGION", item["chrom"], item["start"], item["end"], out=out) elif item["tag"] == "TREE": if not isinstance(item["tree"], basestring): tree = format_tree(item["tree"]) else: tree = item["tree"] util.print_row("TREE", item["start"], item["end"], tree, out=out) elif item["tag"] == "SPR": util.print_row("SPR", item["pos"], item["recomb_node"], item["recomb_time"], item["coal_node"], item["coal_time"], out=out) out.close()
def iter_arg_layout(filename): """ Iterate through an ARG layout file. """ with closing(argweaver.open_stream(filename, compress='bgzip')) as infile: for line in infile: tokens = line.rstrip().split("\t") block = [tokens[0], int(tokens[1]), int(tokens[2])] leaf_layout = {} for i in range(3, len(tokens), 2): leaf_layout[tokens[i]] = float(tokens[i + 1]) yield block, leaf_layout
def iter_arg_layout(filename): """ Iterate through an ARG layout file. """ with closing(argweaver.open_stream(filename, compress='bgzip')) as infile: for line in infile: tokens = line.rstrip().split("\t") block = [tokens[0], int(tokens[1]), int(tokens[2])] leaf_layout = {} for i in range(3, len(tokens), 2): leaf_layout[tokens[i]] = float(tokens[i+1]) yield block, leaf_layout
def iter_smc_file(filename, parse_trees=False, apply_spr=False, region=None): """ Iterates through a SMC file. parse_trees: If True, parses local trees. apply_spr: If True, avoids reading each tree by applying the SPR operation to the current tree. region: If given, returns only trees and SPRs within region=(start, end). Yields item, where item can be one of the following: {'tag': 'NAMES', 'names': names_of_sequences} {'tag': 'REGION', 'chrom': name_of_chromosome, 'start': start_coordinate_of_region, 'end': end_coordinate_of_region} {'tag': 'TREE', 'start': start_coordinate_of_local_region, 'end': end_coordinate_of_local_region, 'tree': local_tree} {'tag': 'SPR', 'pos': coordinate of recombination point, 'recomb_node': name_of_recombination_node, 'recomb_time': time_of_recombination, 'coal_node': name_of_branch_with_recoalescence, 'coal_time': time_of_recoalescence} """ if region: tree = None spr = None for item in iter_subsmc(iter_smc_file(filename), region): if item["tag"] == "SPR": spr = item elif item["tag"] == "TREE": if parse_trees: if apply_spr and tree is not None and spr is not None: smc_apply_spr(tree, spr) else: tree = parse_tree(item["tree"]) item["tree"] = tree yield item return with closing(argweaver.open_stream(filename)) as infile: spr = None tree = None for line in infile: line = line.rstrip() tokens = line.split("\t") if tokens[0] == "NAMES": yield {"tag": "NAMES", "names": tokens[1:]} elif tokens[0] == "REGION": yield { "tag": "REGION", "chrom": tokens[1], "start": int(tokens[2]), "end": int(tokens[3]) } elif tokens[0] == "RANGE": raise Exception("deprecated RANGE line, use REGION instead") elif tokens[0] == "TREE": tree_text = tokens[3] if parse_trees: if apply_spr and tree is not None and spr is not None: smc_apply_spr(tree, spr) else: tree = parse_tree(tree_text) else: tree = tree_text yield { "tag": "TREE", "start": int(tokens[1]), "end": int(tokens[2]), "tree": tree } elif tokens[0] == "SPR": spr = { "tag": "SPR", "pos": int(tokens[1]), "recomb_node": int(tokens[2]), "recomb_time": float(tokens[3]), "coal_node": int(tokens[4]), "coal_time": float(tokens[5]) } yield spr
def iter_smc_file(filename, parse_trees=False, apply_spr=False, region=None): """ Iterates through a SMC file. parse_trees: If True, parses local trees. apply_spr: If True, avoids reading each tree by applying the SPR operation to the current tree. region: If given, returns only trees and SPRs within region=(start, end). Yields item, where item can be one of the following: {'tag': 'NAMES', 'names': names_of_sequences} {'tag': 'REGION', 'chrom': name_of_chromosome, 'start': start_coordinate_of_region, 'end': end_coordinate_of_region} {'tag': 'TREE', 'start': start_coordinate_of_local_region, 'end': end_coordinate_of_local_region, 'tree': local_tree} {'tag': 'SPR', 'pos': coordinate of recombination point, 'recomb_node': name_of_recombination_node, 'recomb_time': time_of_recombination, 'coal_node': name_of_branch_with_recoalescence, 'coal_time': time_of_recoalescence} """ if region: tree = None spr = None for item in iter_subsmc(iter_smc_file(filename), region): if item["tag"] == "SPR": spr = item elif item["tag"] == "TREE": if parse_trees: if apply_spr and tree is not None and spr is not None: smc_apply_spr(tree, spr) else: tree = parse_tree(item["tree"]) item["tree"] = tree yield item return with closing(argweaver.open_stream(filename)) as infile: spr = None tree = None for line in infile: line = line.rstrip() tokens = line.split("\t") if tokens[0] == "NAMES": yield {"tag": "NAMES", "names": tokens[1:]} elif tokens[0] == "REGION": yield {"tag": "REGION", "chrom": tokens[1], "start": int(tokens[2]), "end": int(tokens[3])} elif tokens[0] == "RANGE": raise Exception("deprecated RANGE line, use REGION instead") elif tokens[0] == "TREE": tree_text = tokens[3] if parse_trees: if apply_spr and tree is not None and spr is not None: smc_apply_spr(tree, spr) else: tree = parse_tree(tree_text) else: tree = tree_text yield {"tag": "TREE", "start": int(tokens[1]), "end": int(tokens[2]), "tree": tree} elif tokens[0] == "SPR": spr = {"tag": "SPR", "pos": int(tokens[1]), "recomb_node": int(tokens[2]), "recomb_time": float(tokens[3]), "coal_node": int(tokens[4]), "coal_time": float(tokens[5])} yield spr