Python unzip示例，tree_to_data.unzip Python示例

示例#1

0

显示文件

文件： Treemix_to_AdmixtureBayes.py 项目： Tmacme/AdmixtureBayes

def read_treemix_file2(filename_treeout,
                       filename_vertices,
                       filename_edges,
                       outgroup=None):
    if filename_treeout.endswith('.gz'):
        filename_treeout = unzip(filename_treeout)
    if filename_vertices.endswith('.gz'):
        filename_vertices = unzip(filename_vertices)
    if filename_edges.endswith('.gz'):
        filename_edges = unzip(filename_edges)
    with open(filename_treeout, 'r') as f:
        newick_tree = f.readline().rstrip()
        admixtures = parse_admixtures2(map(str.rstrip, f.readlines()))
    vertice_dictionary = read_vertices(filename_vertices)
    print vertice_dictionary
    edges = get_edge_lengths2(filename_edges)
    #print newick_tree
    tree = make_Rtree(edges, vertice_dictionary, admixtures)
    print pretty_string(tree)
    #tree=remove_children(tree)
    if outgroup is not None:
        tree = rearrange_root(tree, outgroup)
        print 'after rearrangement'
        print pretty_string(tree)
    return tree

示例#2

0

显示文件

文件： wishart_distribution_estimation.py 项目： Tmacme/AdmixtureBayes

def make_single_files(filename,
                      blocksize,
                      no_blocks,
                      prefix='',
                      verbose_level='normal'):
    assert (blocksize is not None) or (
        no_blocks
        is not None), 'Has to specify either block size or number of blocks'
    filenames = []
    if filename.endswith('.gz'):
        filename = unzip(filename)
    filename_reduced = prefix + filename.split(os.sep)[-1] + 'boot.'
    with open(filename, 'r') as f:
        first_line = f.readline()
        lines = f.readlines()
        #print lines
    n = len(lines)
    if no_blocks is not None:
        blocksize = n / no_blocks
    line_sets = get_partitions(lines, blocksize)
    if verbose_level != 'silent':
        print 'total SNPs=', n
        print 'total blocksize', blocksize
        #print 'no_blocks', no_blocks
        print 'len(line_sets)', len(line_sets)
    for i, lins in enumerate(line_sets):
        new_filename = filename_reduced + str(i)
        with open(new_filename, 'w') as g:
            g.write(first_line)
            g.writelines(lins)
        gzipped_filename = gzip(new_filename, overwrite=True)
        filenames.append(gzipped_filename)
    return filenames, first_line.split()

示例#3

0

显示文件

文件： Treemix_to_AdmixtureBayes.py 项目： Tmacme/AdmixtureBayes

def read_treemix_file(filename_treeout,
                      filename_vertices,
                      filename_edges,
                      outgroup=None):
    np = new_node_naming_policy()
    if filename_treeout.endswith('.gz'):
        filename_treeout = unzip(filename_treeout)
    if filename_vertices.endswith('.gz'):
        filename_vertices = unzip(filename_vertices)
    if filename_edges.endswith('.gz'):
        filename_edges = unzip(filename_edges)
    with open(filename_treeout, 'r') as f:
        newick_tree = f.readline().rstrip()
        admixtures = parse_admixtures(map(str.rstrip, f.readlines()))
    edges = get_edge_lengths2(filename_edges)
    #print newick_tree
    tree, translates = parse_newick_tree(newick_tree)
    vd = vertice_dictionary()
    for adm_key, treemix_N_key in translates.items():
        vd.insert_mapping(adm_key, treemix_N_key, 'AdmB', 'Treemix_N')
    #print '-------------------------'
    #print vd
    vd, adm_vertices = match_vertices(filename_vertices, vd)
    #matched_admixtures=match_admixtures(admixtures, adm_vertices)
    # print '-------------------------'
    # print vd
    # print adm_vertices
    edges = get_edge_lengths(filename_edges)
    #  print edges
    tree = insert_children_in_tree(tree)
    reverse_translates = {v: k for k, v in translates.items()}
    #     for k,c in translates.items():
    #         print k, ':', c
    #     for k,v in tree.items():
    #         print k,':',v
    #     print translates
    #     print admixtures
    tree = add_admixtures(tree, vd, adm_vertices, edges, admixtures)
    if outgroup is not None:
        tree = rearrange_root(tree, outgroup)

    return tree

示例#4

0

显示文件

文件： wishart_distribution_estimation.py 项目： Tmacme/AdmixtureBayes

def make_bootstrap_files(filename,
                         blocksize=None,
                         no_blocks=None,
                         bootstrap_samples=None,
                         prefix=''):
    assert (blocksize is not None) or (
        no_blocks
        is not None), 'Has to specify either block size or number of blocks'
    filenames = []
    if filename.endswith('.gz'):
        filename = unzip(filename)
    filename_reduced = os.path.join(prefix,
                                    filename.split(os.sep)[-1] + 'boot.')
    with open(filename, 'r') as f:
        first_line = f.readline()
        lines = f.readlines()
    n = len(lines)
    if no_blocks is not None:
        blocksize = n / no_blocks
    line_sets = get_partitions(lines, blocksize)
    print 'total SNPs=', n
    print 'total blocksize', blocksize
    print 'no_blocks', no_blocks
    print 'bootstrap_samples', bootstrap_samples
    print 'len(line_sets)', len(line_sets)
    if bootstrap_samples is None:
        bootstrap_samples = len(line_sets)
    for i in range(bootstrap_samples):
        new_filename = filename_reduced + str(i)
        with open(new_filename, 'w') as g:
            g.write(first_line)
            bootstrap_inds = bootstrap_indices(len(line_sets))
            for i in bootstrap_inds:
                g.writelines(line_sets[i])
        gzipped_filename = gzip(new_filename, overwrite=True)
        filenames.append(gzipped_filename)
    return filenames, first_line.split()

示例#5

0

显示文件

文件： remove_populations_from_dataset.py 项目： Tmacme/AdmixtureBayes

if options.input_type == 'tree':
    tree = identifier_file_to_tree_clean(options.input_file)
    if options.input_add:
        with open(options.input_add, 'r') as f:
            add = float(f.readline())
        tree = add_outgroup(tree,
                            inner_node_name='new_node',
                            to_new_root_length=float(add),
                            to_outgroup_length=0,
                            outgroup_name=options.outgroup_name)
    nodes = get_leaf_keys(tree)
    assert all((a in nodes for a in options.populations
                )), 'Requested population was not found in the tree'
    subtree = get_subtree(tree, options.populations)
    if not options.output_file:
        options.output_file = options.input_file + '_'.join(
            options.populations)
    with open(options.output_file, 'w') as f:
        f.write(' '.join(sorted(options.populations)) + '\n')
        f.write(unique_identifier_and_branch_lengths(subtree))
if options.input_type == 'snps':
    if options.input_file.endswith('.gz'):
        options.input_file = unzip(options.input_file, overwrite=False)
    df = pd.read_csv(options.input_file, usecols=options.populations, sep=' ')
    if not options.output_file:
        options.output_file = options.input_file + '_'.join(
            options.populations)
    df.to_csv(options.output_file, sep=' ', index=False)
    gzip(options.output_file, overwrite=True)

示例#6

0

显示文件

文件： construct_nodes_choices.py 项目： Tmacme/AdmixtureBayes

def read_one_line(filename):
    if filename.endswith('.gz'):
        filename = unzip(filename)
    with open(filename, 'r') as f:
        return f.readline().rstrip().split()