Пример #1
0
def make_mutrel_from_clustrel(clustrel, clusters, check_sanity=True):
    mutrel.check_posterior_sanity(clustrel.rels)
    K = len(clusters)
    assert clustrel.rels.shape == (K, K, NUM_MODELS)

    vids, membership = util.make_membership_mat(clusters)
    # K: number of non-empty clusters
    M = len(membership)
    assert len(vids) == M
    assert membership.shape == (M, K)

    mrel = np.zeros((M, M, NUM_MODELS))

    for modelidx in range(NUM_MODELS):
        mut_vs_cluster = np.dot(membership, clustrel.rels[:, :,
                                                          modelidx])  # MxK
        mrel[:, :, modelidx] = np.dot(mut_vs_cluster, membership.T)
    # Disable check to improve performance. Since this is called for each tree
    # (for methods that don't have a fixed clustering), it can be prohibitively
    # slow -- it was consuming >50% of the total runtime for LICHeE's output
    # conversion.
    #mutrel.check_posterior_sanity(mrel)

    return mutrel.Mutrel(
        vids=vids,
        rels=mrel,
    )
Пример #2
0
def make_clustrel_from_cluster_adj(cluster_adj):
    '''
  * `K` = # of clusters (including empty first cluster)

  Arguments:
  `cluster_adj`: a `KxK` adjacency matrix, where `cluster_adj[a,b] = 1` iff
  `a = b` or `b` is a child of `a`

  Returns:
  a `KxKx5` binary mutation relation tensor
  '''
    K = len(cluster_adj)
    assert cluster_adj.shape == (K, K)
    cluster_anc = util.make_ancestral_from_adj(cluster_adj)
    # In determining A_B relations, don't want to set mutations (i,j), where i
    # and j are in same cluster, to 1.
    assert np.all(1 == cluster_anc[0])
    np.fill_diagonal(cluster_anc, 0)

    clustrel = np.zeros((K, K, NUM_MODELS))
    clustrel[:, :, Models.cocluster] = np.eye(K)
    clustrel[:, :, Models.A_B] = cluster_anc
    clustrel[:, :, Models.B_A] = clustrel[:, :, Models.A_B].T

    existing = (Models.cocluster, Models.A_B, Models.B_A)
    already_filled = np.sum(clustrel[:, :, existing], axis=2)
    clustrel[already_filled == 0, Models.diff_branches] = 1

    assert np.array_equal(np.ones((K, K)), np.sum(clustrel, axis=2))
    vids = ['S%s' % (idx + 1) for idx in range(K)]
    clustrel = mutrel.Mutrel(vids=vids, rels=clustrel)
    mutrel.check_posterior_sanity(clustrel.rels)
    return clustrel
Пример #3
0
def _compute_pairs(pairs,
                   variants,
                   logprior,
                   posterior,
                   evidence,
                   pbar=None,
                   parallel=1):
    logprior = _complete_logprior(logprior)
    # TODO: change ordering of pairs based on what will provide optimal
    # integration accuracy according to Quaid's advice.
    pairs = list(pairs)
    # Don't bother starting more workers than jobs.
    parallel = min(parallel, len(pairs))

    # If you set parallel = 0, we don't invoke the parallelism machinery. This
    # makes debugging easier.
    if parallel > 0:
        futures = []
        with concurrent.futures.ProcessPoolExecutor(
                max_workers=parallel) as ex:
            for A, B in pairs:
                futures.append(
                    ex.submit(_calc_lh_and_posterior, variants[A], variants[B],
                              logprior))
            if pbar is not None:
                for F in concurrent.futures.as_completed(futures):
                    pbar.update()
        for (A, B), F in zip(pairs, futures):
            evidence.rels[A, B], posterior.rels[A, B] = F.result()
    else:
        for A, B in pairs:
            evidence.rels[A, B], posterior.rels[A, B] = _calc_lh_and_posterior(
                variants[A], variants[B], logprior)

    # Duplicate evidence's keys, since we'll be modifying dictionary.
    for A, B in pairs:
        if A == B:
            continue
        evidence.rels[B, A] = swap_A_B(evidence.rels[A, B])
        posterior.rels[B, A] = swap_A_B(posterior.rels[A, B])

    mutrel.check_mutrel_sanity(evidence.rels)
    mutrel.check_posterior_sanity(posterior.rels)
    assert np.all(np.isclose(1, np.sum(posterior.rels, axis=2)))

    # TODO: only calculate posterior once here, instead of computing it within
    # each worker separately for a given variant pair.
    other = _calc_posterior_full(evidence.rels, logprior)
    assert np.allclose(posterior.rels, other)
    return (posterior, evidence)
Пример #4
0
def _calc_posterior_full(evidence, logprior):
    # This function is currently used only to double-check the results of
    # `_calc_posterior`.
    joint = evidence + logprior[None, None, :]
    diag = range(len(joint))
    joint[diag, diag, :] = -np.inf
    joint[diag, diag, Models.cocluster] = 0

    B = np.max(joint, axis=2)
    joint -= B[:, :, None]
    expjoint = np.exp(joint)
    posterior = expjoint / np.sum(expjoint, axis=2)[:, :, None]

    mutrel.check_posterior_sanity(posterior)
    return posterior
Пример #5
0
def add_garbage(posterior, garb_svids):
    if len(garb_svids) == 0:
        return posterior
    assert len(set(posterior.vids) & set(garb_svids)) == 0
    new_vids = posterior.vids + garb_svids
    new_posterior = mutrel.init_mutrel(new_vids)
    G = len(garb_svids)
    M = len(new_posterior.vids)

    # Rather than carefully slicing and dicing the array to set it, just use a
    # series of carefully ordered overwrite operations to put it in the correct
    # state.
    new_posterior.rels[:] = 0
    new_posterior.rels[:, :, Models.garbage] = 1
    diag = range(M)
    new_posterior.rels[diag, diag, :] = 0
    new_posterior.rels[diag, diag, Models.cocluster] = 1
    new_posterior.rels[:-G, :-G, :] = posterior.rels

    mutrel.check_posterior_sanity(new_posterior.rels)
    return new_posterior
Пример #6
0
def compare(mutrels):
    assert 'truth' in mutrels
    M, _, num_models = mutrels['truth'].rels.shape
    assert mutrels['truth'].rels.shape == (M, M, num_models)

    names = sorted(mutrels.keys())
    scores = {}

    for name in names:
        mrel = mutrels[name]
        if mrel is None:
            scores[name] = MISSING
            continue
        assert mrel.rels.shape == (M, M, num_models)
        assert np.array_equal(mrel.vids, mutrels['truth'].vids)
        mutrel.check_posterior_sanity(mrel.rels)

        scores[name] = _score_trerror(mrel.rels, mutrels['truth'].rels)
        if name == 'truth':
            assert np.isclose(0, scores[name])

    print(*names, sep=',')
    print(*[scores[name] for name in names], sep=',')
Пример #7
0
def save_sorted_mutrel(mrel, mrelfn):
    mrel = mutrel.sort_mutrel_by_vids(mrel)
    mutrel.check_posterior_sanity(mrel.rels)
    np.savez_compressed(mrelfn, rels=mrel.rels, vids=mrel.vids)