Exemplo n.º 1
0
def simulate_tree(sample_size, sparse_factor, gen_exp, init_keep_rate):
    wanted_number_of_ibds = (sample_size**2) * sparse_factor

    # create an adjacency matrix representing
    gens = get_generations(generation_size(sample_size, gen_exp))
    edges = combine_ind_to_households(gens, init_keep_rate)
    rel_matrix = csr_matrix(
        (np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),
        shape=(sample_size, sample_size),
        dtype=np.bool)

    # check that every individual's parents are before him.
    assert triu(rel_matrix).nnz == 0

    # find the currect number of edges to remove till it matches
    np.random.shuffle(edges)
    res = find_number_of_edges_to_remove(rel_matrix, edges,
                                         wanted_number_of_ibds)
    if res == None:
        raise Exception("Did not find a good enough tree")
    else:
        num_edges = int(edges.shape[0] * res)
        rel_matrix[edges[:num_edges, 0], edges[:num_edges, 1]] = False
        rel_matrix.eliminate_zeros()

        # check that the resulting matrix is valid for our criteria
        assert np.abs(count_IBD_nonzero(rel_matrix) -
                      wanted_number_of_ibds) < 0.1 * wanted_number_of_ibds
        sex = np.zeros((sample_size))
        gen_ind = np.zeros((sample_size))
        for i, gen in enumerate(gens):
            sex[gen[:int(gen.size / 2)]] = 1
            gen_ind[gen] = i

        return rel_matrix, sex, gen_ind
Exemplo n.º 2
0
def count_with_removed_edges(edges_remove_part, rel_matrix, edges,
                             wanted_number_of_ibds):
    num_edges = int(edges.shape[0] * edges_remove_part)
    sub_rel = rel_matrix.copy()
    sub_rel[edges[:num_edges, 0], edges[:num_edges, 1]] = False

    diff = np.abs(count_IBD_nonzero(sub_rel) - wanted_number_of_ibds)
    if diff < 0.1 * wanted_number_of_ibds:
        raise Exception(edges_remove_part)
    return diff
Exemplo n.º 3
0
                        help='Gen size = gen_exp X prev gen size')
    parser.add_argument(
        '--init_keep_rate',
        dest='init_keep_rate',
        type=float,
        default=0.8,
        help='1 - number of edges to remove before iteration begins')
    parser.add_argument('--save_folder',
                        dest='save_folder',
                        type=str,
                        default='.',
                        help='which folder it should save the output to.')

    args = parser.parse_args()
    if args.sample_size <= 0:
        raise Exception("Sample size should be a positive number")
    if (args.sparsity_factor <= 0) or (args.sparsity_factor >= 1):
        raise Exception("Sparsity factor is within the range (0, 1)")
    if args.gen_exp <= 0:
        raise Exception("gen_exp is a positive number")
    if (args.init_keep_rate <= 0) or (args.init_keep_rate > 1):
        raise Exception("init_keep_rate is within the range (0, 1)")

    rel = load_sparse_csr('rel.npz')
    print(count_IBD_nonzero(rel))

    # rel, sex, gen_ind = simulate_tree(args.sample_size, args.sparsity_factor, args.gen_exp, args.init_keep_rate)
    # save_sparse_csr(os.path.join(args.save_folder, 'rel.npz'), rel)
    # np.save(os.path.join(args.save_folder, 'sex.npy'), sex)
    # np.save(os.path.join(args.save_folder, 'gen_ind.npy'), gen_ind)