示例#1
0
def test_shuffled_edges():

    d = copy.deepcopy(d_original)
    original_dwell = json.loads(arbplf_dwell(json.dumps(d)))

    d = copy.deepcopy(d_original)
    original_ll = json.loads(arbplf_ll(json.dumps(d)))

    d = copy.deepcopy(d_original)
    d['site_reduction'] = {'aggregation' : 'sum'}
    original_em_update = json.loads(arbplf_em_update(json.dumps(d)))

    iter_count = 10
    for i in range(iter_count):
        d_shuffled, perm = _shuffle_edges(d_original)

        # the ll output does not have an edge column
        d = copy.deepcopy(d_shuffled)
        ll = json.loads(arbplf_ll(json.dumps(d)))
        assert_equal(ll, original_ll)

        d = copy.deepcopy(d_shuffled)
        dwell = json.loads(arbplf_dwell(json.dumps(d)))
        dwell_prime = _perm_output_edges(dwell, perm)
        dwell_prime['data'].sort()
        assert_equal(dwell_prime, original_dwell)

        d = copy.deepcopy(d_shuffled)
        d['site_reduction'] = {'aggregation' : 'sum'}
        em_update = json.loads(arbplf_em_update(json.dumps(d)))
        em_update_prime = _perm_output_edges(em_update, perm)
        em_update_prime['data'].sort()
        assert_equal(em_update_prime, original_em_update)
示例#2
0
def test_shuffled_nodes():

    d = copy.deepcopy(d_original)
    original_dwell = json.loads(arbplf_dwell(json.dumps(d)))

    d = copy.deepcopy(d_original)
    original_ll = json.loads(arbplf_ll(json.dumps(d)))

    d = copy.deepcopy(d_original)
    d['site_reduction'] = {'aggregation' : 'sum'}
    original_em_update = json.loads(arbplf_em_update(json.dumps(d)))

    iter_count = 10
    for i in range(iter_count):
        d_shuffled = _shuffle_nodes(d_original)

        d = copy.deepcopy(d_shuffled)
        dwell = json.loads(arbplf_dwell(json.dumps(d)))
        assert_equal(dwell, original_dwell)

        d = copy.deepcopy(d_shuffled)
        ll = json.loads(arbplf_ll(json.dumps(d)))
        assert_equal(ll, original_ll)

        d = copy.deepcopy(d_shuffled)
        d['site_reduction'] = {'aggregation' : 'sum'}
        em_update = json.loads(arbplf_em_update(json.dumps(d)))
        assert_equal(em_update, original_em_update)
def generic_neg_ll(get_s, X):
    s_in = get_s(X)
    s_out = arbplf_ll(s_in)
    df = pd.read_json(StringIO(s_out), orient='split', precise_float=True)
    y = -df.value.values[0]
    print(y)
    return y
示例#4
0
def myll(d):
    """
    Provides a dict -> pandas.DataFrame wrapper of the JSON arbplf_ll.
    """
    s = arbplf_ll(json.dumps(d))
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    return df
示例#5
0
文件: newt.py 项目: argriffing/phyly
def summarize(d):
    print('newton delta:')
    print(arbplf_newton_delta(json.dumps(d)))
    print('deriv:')
    print(arbplf_deriv(json.dumps(d)))
    print('ll:')
    print(arbplf_ll(json.dumps(d)))
示例#6
0
文件: newt.py 项目: argriffing/phyly
def summarize(d):
    print('newton delta:')
    print(arbplf_newton_delta(json.dumps(d)))
    print('deriv:')
    print(arbplf_deriv(json.dumps(d)))
    print('ll:')
    print(arbplf_ll(json.dumps(d)))
示例#7
0
def myll(d):
    """
    Provides a dict -> pandas.DataFrame wrapper of the JSON arbplf_ll.
    """
    s = arbplf_ll(json.dumps(d))
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    return df
def test_heterogeneous_edge_rates():
    # try changing one of the edge rate coefficients
    d = {
        "model_and_data": {
            "edges": [[0, 1], [1, 2]],
            "edge_rate_coefficients": [1, 2],
            "rate_matrix": [[0, 1], [0, 0]],
            "probability_array": [[[1, 0], [1, 1], [1, 0]]]
        },
        "site_reduction": {
            "aggregation": "only"
        }
    }

    actual_marginal = json.loads(arbplf_marginal(json.dumps(d)))
    assert_equal(actual_marginal, desired_marginal)

    g = copy.deepcopy(d)
    g['trans_reduction'] = dict(selection=[[0, 1], [1, 0]])
    actual_trans = json.loads(arbplf_trans(json.dumps(g)))
    assert_equal(actual_trans, desired_trans)

    actual_ll = json.loads(arbplf_ll(json.dumps(d)))
    desired_ll = {"columns": ["value"], "data": [[-3.0]]}
    assert_equal(actual_ll, desired_ll)

    actual_em_update = json.loads(arbplf_em_update(json.dumps(d)))
    assert_equal(actual_em_update, desired_em_update)

    actual_dwell = json.loads(arbplf_dwell(json.dumps(d)))
    assert_equal(actual_dwell, desired_dwell)
def test_edges_are_not_preordered():
    # Try switching the order of the edges in the input
    # and increasing the birth rate in the rate matrix.
    d = {
        "model_and_data": {
            "edges": [[1, 2], [0, 1]],
            "edge_rate_coefficients": [1, 2],
            "rate_matrix": [[0, 2], [0, 0]],
            "probability_array": [[[1, 0], [1, 1], [1, 0]]]
        },
        "site_reduction": {
            "aggregation": "only"
        }
    }

    actual_marginal = json.loads(arbplf_marginal(json.dumps(d)))
    assert_equal(actual_marginal, desired_marginal)

    g = copy.deepcopy(d)
    g['trans_reduction'] = dict(selection=[[0, 1], [1, 0]])
    actual_trans = json.loads(arbplf_trans(json.dumps(g)))
    assert_equal(actual_trans, desired_trans)

    actual_ll = json.loads(arbplf_ll(json.dumps(d)))
    desired_ll = {"columns": ["value"], "data": [[-6.0]]}
    assert_equal(actual_ll, desired_ll)

    actual_em_update = json.loads(arbplf_em_update(json.dumps(d)))
    assert_equal(actual_em_update, desired_em_update)

    actual_dwell = json.loads(arbplf_dwell(json.dumps(d)))
    assert_equal(actual_dwell, desired_dwell)
def test_truncated_ll():
    d = copy.deepcopy(D)
    d['model_and_data']['probability_array'][0][-1] = [0, 1]
    s = arbplf_ll(json.dumps(d))
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    actual = df.value.values[0]
    # compute the desired closed form solution
    T = sum(rates)
    desired = log(1 - exp(-T))
    # compare actual and desired result
    assert_allclose(actual, desired)
示例#11
0
def test_simplified_felsenstein_fig_16_4_example():
    x = {
     "model_and_data" : {
         "edges" : [[5, 0], [5, 1], [5, 6], [6, 2], [6, 7], [7, 3], [7, 4]],
         "edge_rate_coefficients" : [0.01, 0.2, 0.15, 0.3, 0.05, 0.3, 0.02],
         "rate_matrix" : [
             [0, 3, 3, 3],
             [3, 0, 3, 3],
             [3, 3, 0, 3],
             [3, 3, 3, 0]],
         "probability_array" : [[
             [1, 0, 0, 0],
             [0, 1, 0, 0],
             [0, 1, 0, 0],
             [0, 1, 0, 0],
             [0, 0, 1, 0],
             [0.25, 0.25, 0.25, 0.25],
             [1, 1, 1, 1],
             [1, 1, 1, 1]]]},
     "site_reduction" : {
         "aggregation" : "sum"}}
    arbplf_ll(json.dumps(x))
示例#12
0
def objective(X):
    # 7 branch length parameters
    # 3 stationary distribution parameters
    # 5 symmetric GTR parameters
    # 1 gamma rate mixture shape parameter
    print('in:', X)
    #edge_rate_coefficients = np.exp(X[:7]).tolist()
    edge_rate_coefficients = X[:7].tolist()
    p = expit(X[7:7+3])
    q = expit(-X[7:7+3])
    root_prior = np.zeros(4)
    root_prior[0] = p[0] * p[1]
    root_prior[1] = p[0] * q[1]
    root_prior[2] = q[0] * p[2]
    root_prior[3] = q[0] * q[2]
    a, b, c, d, e = np.exp(X[7+3:7+3+5])
    rate_matrix = np.array([
        [0, a, b, c],
        [a, 0, d, e],
        [b, d, 0, 1],
        [c, e, 1, 0]])
    rate_matrix = (rate_matrix * root_prior).tolist()
    root_prior = root_prior.tolist()
    gamma_shape = np.exp(X[-1])
    rate_mixture = dict(
            prior = [1/rate_category_count] * rate_category_count,
            rates = discretized_gamma(rate_category_count, gamma_shape))
    model_and_data = dict(
            edges = [[5, 0], [5, 1], [6, 5], [6, 2], [7, 6], [7, 3], [7, 4]],
            edge_rate_coefficients = edge_rate_coefficients,
            root_prior = root_prior,
            rate_matrix = rate_matrix,
            rate_mixture = rate_mixture,
            probability_array = probability_array)
    d = dict(
        model_and_data = model_and_data,
        site_reduction = dict(aggregation='sum'))
    s = arbplf_ll(json.dumps(d))
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    log_likelihood = df.values[0, 0]
    y = -log_likelihood
    print('out:', y)
    return y
def generic_objective(get_s, X):
    s_in = get_s(X)
    neg_ll = partial(generic_neg_ll, get_s)

    # compute negative log likelihood
    s_out = arbplf_ll(s_in)
    df = pd.read_json(StringIO(s_out), orient='split', precise_float=True)
    y = -df.value.values[0]

    # explicitly compute derivatives with respect to edge rate coefficients
    s_out = arbplf_deriv(s_in)
    df = pd.read_json(StringIO(s_out), orient='split', precise_float=True)
    d = -df.set_index('edge').value.values

    edge_count = len(_edges)
    D = np.concatenate((d, [0]*3))
    request = np.array([0]*edge_count + [1]*3)
    _finite_diffs(neg_ll, request, D, X, y)
    print(y, D)
    return y, D
示例#14
0
def objective(X):
    # 7 branch length parameters
    # 3 stationary distribution parameters
    # 5 symmetric GTR parameters
    # 1 gamma rate mixture shape parameter
    print('in:', X)
    #edge_rate_coefficients = np.exp(X[:7]).tolist()
    edge_rate_coefficients = X[:7].tolist()
    p = expit(X[7:7 + 3])
    q = expit(-X[7:7 + 3])
    root_prior = np.zeros(4)
    root_prior[0] = p[0] * p[1]
    root_prior[1] = p[0] * q[1]
    root_prior[2] = q[0] * p[2]
    root_prior[3] = q[0] * q[2]
    a, b, c, d, e = np.exp(X[7 + 3:7 + 3 + 5])
    rate_matrix = np.array([[0, a, b, c], [a, 0, d, e], [b, d, 0, 1],
                            [c, e, 1, 0]])
    rate_matrix = (rate_matrix * root_prior).tolist()
    root_prior = root_prior.tolist()
    gamma_shape = np.exp(X[-1])
    rate_mixture = dict(prior=[1 / rate_category_count] * rate_category_count,
                        rates=discretized_gamma(rate_category_count,
                                                gamma_shape))
    model_and_data = dict(edges=[[5, 0], [5, 1], [6, 5], [6, 2], [7, 6],
                                 [7, 3], [7, 4]],
                          edge_rate_coefficients=edge_rate_coefficients,
                          root_prior=root_prior,
                          rate_matrix=rate_matrix,
                          rate_mixture=rate_mixture,
                          probability_array=probability_array)
    d = dict(model_and_data=model_and_data,
             site_reduction=dict(aggregation='sum'))
    s = arbplf_ll(json.dumps(d))
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    log_likelihood = df.values[0, 0]
    y = -log_likelihood
    print('out:', y)
    return y
示例#15
0
def main():
    xs = np.linspace(1e-5, 1, 100)
    ts = -2 * np.log(xs)
    arr = []
    for i, t in enumerate(ts):
        s = arbplf_ll(get_json_input(t))
        df = pd.read_json(StringIO(s), orient='split', precise_float=True)
        arr.append(df.value.values[0])
    lines = plt.plot(xs, arr, 'blue')
    plt.ylabel("log likelihood")
    plt.xlabel("x = exp(-0.5 t)")
    plt.savefig('out00.svg', transparent=True)

    # local optima
    for i, t in enumerate((0.1, 6.0)):
        s = arbplf_newton_refine(get_json_input(t))
        df = pd.read_json(StringIO(s), orient='split', precise_float=True)
        u = df.value.values[0]
        print('local optimum', i, ':')
        print('  initial guess:', t)
        print('  refined isolated interior local optimum:')
        print('    t = {:.16}'.format(u))
        print('    x =', np.exp(-0.5 * u))
def myll(d):
    s = arbplf_ll(json.dumps(d))
    df = pd.read_json(StringIO(s), orient="split", precise_float=True)
    return df
示例#17
0
def run(assumed_kappa):
    state_count = 4
    node_count = 5
    true_kappa = 4
    assumed_m, assumed_denom = get_rate_matrix(assumed_kappa)
    true_m, true_denom = get_rate_matrix(true_kappa)
    edges = [[0, 2], [0, 1], [1, 3], [1, 4]]
    assumed_coeffs = [28, 21, 12, 9]
    true_coeffs = [30, 20, 10, 10]
    # There are five nodes.
    # Three of them have unobserved states.
    # Use one site for each of the 4^3 = 64 possible observations.
    X = [-1]
    U = range(4)
    all_site_patterns = list(itertools.product(X, X, U, U, U))
    prior_array = [[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1],
                    [1, 1, 1, 1]]]
    probability_array = []
    for pattern in all_site_patterns:
        arr = []
        for i, p in enumerate(pattern):
            if p == -1:
                row = [1] * state_count
            else:
                row = [0] * state_count
                row[p] = 1
            arr.append(row)
        probability_array.append(arr)
    model_and_data = {
        "edges": edges,
        "edge_rate_coefficients": true_coeffs,
        "root_prior": "equilibrium_distribution",
        "rate_matrix": true_m,
        "rate_divisor": true_denom * 100,
        "probability_array": probability_array
    }
    d = {"model_and_data": model_and_data}
    s = arbplf_ll(json.dumps(d))
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    log_likelihoods = df.value.values
    print('log likelihood sum:', sum(log_likelihoods))

    # compute ts and tv using the likelihoods as observation weights
    weights = [math.exp(ll) for ll in log_likelihoods]
    total = sum(weights)
    weights = [w / total for w in weights]
    ts_pairs, tv_pairs = get_ts_tv_pairs()

    model_and_data = {
        "edges": edges,
        "edge_rate_coefficients": assumed_coeffs,
        "root_prior": "equilibrium_distribution",
        "rate_matrix": assumed_m,
        "rate_divisor": assumed_denom * 100,
        "probability_array": probability_array
    }
    d = {
        "model_and_data": model_and_data,
        "site_reduction": {
            "aggregation": weights
        },
        "edge_reduction": {
            "aggregation": "sum"
        },
        "trans_reduction": {
            "aggregation": "sum"
        }
    }

    d['trans_reduction']['selection'] = ts_pairs
    d['trans_reduction']['aggregation'] = [1000] * len(ts_pairs)
    d['site_reduction']['aggregation'] = "sum"
    d['model_and_data']['probability_array'] = prior_array
    s = arbplf_trans(json.dumps(d))
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    print("prior ts expectation:")
    print(df.value.values[0])
    print(s)

    d['trans_reduction']['selection'] = ts_pairs
    d['trans_reduction']['aggregation'] = [1000] * len(ts_pairs)
    d['site_reduction']['aggregation'] = weights
    d['model_and_data']['probability_array'] = probability_array
    s = arbplf_trans(json.dumps(d))
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    print("conditional ts expectation:")
    print(df.value.values[0])
    print(s)

    d['trans_reduction']['selection'] = tv_pairs
    d['trans_reduction']['aggregation'] = [1000] * len(tv_pairs)
    d['site_reduction']['aggregation'] = "sum"
    d['model_and_data']['probability_array'] = prior_array
    s = arbplf_trans(json.dumps(d))
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    print("prior tv expectation:")
    print(df.value.values[0])
    print(s)

    d['trans_reduction']['selection'] = tv_pairs
    d['trans_reduction']['aggregation'] = [1000] * len(tv_pairs)
    d['site_reduction']['aggregation'] = weights
    d['model_and_data']['probability_array'] = probability_array
    s = arbplf_trans(json.dumps(d))
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    print("conditional tv expectation:")
    print(df.value.values[0])
    print(s)
示例#18
0
AGAACTGCTAACTCACTACCCATGTATAACAACATGGCTTTCTCAACTTTTAAAGGATA
ACAGCTATCCATTGGTCTTAGGACCCAAAAATTTTGGTGCAACTCCAAATAAAAGTAATA
GCAATGTACACCACCATAGCCATTCTAACGCTAACCTCCCTAATTCCCCCCATTACAGCC
ACCCTTATTAACCCCAATAAAAAGAACTTATACCCGCACTACGTAAAAATGACCATTGCC
TCTACCTTTATAATCAGCCTATTTCCCACAATAATATTCATGTGCACAGACCAAGAAACC
ATTATTTCAAACTGACACTGAACTGCAACCCAAACGCTAGAACTCTCCCTAAGCTT    
"""
)

def elem(i):
    x = [0]*4
    x[i] = 1
    return x
probability_array = []
sequences = [''.join(s.split()) for s in brown_nuc_sequences]
state_map = dict(zip('TCAG', (0, 1, 2, 3)))
for column in zip(*sequences):
    rows = []
    for nt in column:
        rows.append(elem(state_map[nt]))
    # 3 internal nodes
    for i in range(3):
        rows.append([1, 1, 1, 1])
    probability_array.append(rows)
model_and_data['probability_array'] = probability_array

d = dict(model_and_data=model_and_data, site_reduction=dict(aggregation='sum'))
s = arbplf_ll(json.dumps(d))
df = pd.read_json(StringIO(s), orient='split', precise_float=True)
print(df)
示例#19
0
GCAATGTACACCACCATAGCCATTCTAACGCTAACCTCCCTAATTCCCCCCATTACAGCC
ACCCTTATTAACCCCAATAAAAAGAACTTATACCCGCACTACGTAAAAATGACCATTGCC
TCTACCTTTATAATCAGCCTATTTCCCACAATAATATTCATGTGCACAGACCAAGAAACC
ATTATTTCAAACTGACACTGAACTGCAACCCAAACGCTAGAACTCTCCCTAAGCTT    
""")


def elem(i):
    x = [0] * 4
    x[i] = 1
    return x


probability_array = []
sequences = [''.join(s.split()) for s in brown_nuc_sequences]
state_map = dict(zip('TCAG', (0, 1, 2, 3)))
for column in zip(*sequences):
    rows = []
    for nt in column:
        rows.append(elem(state_map[nt]))
    # 3 internal nodes
    for i in range(3):
        rows.append([1, 1, 1, 1])
    probability_array.append(rows)
model_and_data['probability_array'] = probability_array

d = dict(model_and_data=model_and_data, site_reduction=dict(aggregation='sum'))
s = arbplf_ll(json.dumps(d))
df = pd.read_json(StringIO(s), orient='split', precise_float=True)
print(df)
示例#20
0
def test_ok():
    arbplf_ll(json.dumps(good_input))
示例#21
0
文件: opt.py 项目: argriffing/phyly
def myll(d):
    s = arbplf_ll(json.dumps(d))
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    return df
示例#22
0
def test_ok_reduction_deleted():
    x = copy.deepcopy(good_input)
    del x['site_reduction']
    arbplf_ll(json.dumps(x))
示例#23
0
def run():
    state_count = 4
    edge_count = 5
    node_count = edge_count + 1

    # Define the tree used in the phyl transition mapping example.
    edges = [[4, 0], [4, 1], [5, 4], [5, 2], [5, 3]]
    inference_rates = [0.001, 0.002, 0.008, 0.01, 0.1]
    simulation_rates = [0.001 * (9 / 20), 0.002, 0.008, 0.01, 0.1]

    """
    # Define the poisson rate matrix with expected exit rate 1
    rate_divisor = 3
    rate_matrix = [
            [0, 1, 1, 1],
            [1, 0, 1, 1],
            [1, 1, 0, 1],
            [1, 1, 1, 0]]
    """
    # use a GTR rate matrix
    a, b, c, d, e, pA, pC, pG, pT = (
            1, 0.2, 0.3, 0.4, 0.4, 0.1, 0.35, 0.35, 0.2)
    rate_matrix = make_rate_matrix(a, b, c, d, e, pA, pC, pG, pT)

    # Use one site for each of the 4^4 = 256 possible observations.
    X = [-1]
    U = range(4)
    all_site_patterns = list(itertools.product(U, U, U, U, X, X))
    prior_array = [[
        [1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]]]
    probability_array = []
    for pattern in all_site_patterns:
        arr = []
        for i, p in enumerate(pattern):
            if p == -1:
                row = [1]*state_count
            else:
                row = [0]*state_count
                row[p] = 1
            arr.append(row)
        probability_array.append(arr)
    model_and_data = {
            "edges" : edges,
            "edge_rate_coefficients" : simulation_rates,
            "rate_divisor" : "equilibrium_exit_rate",
            "root_prior" : "equilibrium_distribution",
            "rate_matrix" : rate_matrix,
            "probability_array" : probability_array}
    d = {"model_and_data" : model_and_data}
    s = json.dumps(d)
    s = arbplf_ll(s)
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    log_likelihoods = df.value.values

    # compute expectations using the likelihoods as observation weights
    weights = [math.exp(ll) for ll in log_likelihoods]
    total = sum(weights)
    weights = [(20000 * w) / total for w in weights]

    model_and_data = {
            "edges" : edges,
            "edge_rate_coefficients" : inference_rates,
            "rate_divisor" : "equilibrium_exit_rate",
            "root_prior" : "equilibrium_distribution",
            "rate_matrix" : rate_matrix,
            "probability_array" : probability_array}
    d = {
            "model_and_data" : model_and_data,
            "site_reduction" : {"aggregation" : weights},
            "trans_reduction" : {"aggregation" : "sum"}}

    d['model_and_data']['probability_array'] = prior_array
    d['trans_reduction']['selection'] = [
            [i, j] for i in range(4) for j in range(4) if i != j]
    d['site_reduction'] = {"aggregation" : "sum"}
    s = arbplf_trans(json.dumps(d))
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    print("prior expectation:")
    print(20000 * df.value.values)

    d['model_and_data']['probability_array'] = probability_array
    d['trans_reduction']['selection'] = [
            [i, j] for i in range(4) for j in range(4) if i != j]
    d['site_reduction'] = {"aggregation" : weights}
    s = arbplf_trans(json.dumps(d))
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    print("conditional expectation:")
    print(df.value.values)
示例#24
0
def test_ok():
    arbplf_ll(json.dumps(good_input))
示例#25
0
def test_ok_reduction_empty():
    x = copy.deepcopy(good_input)
    x['site_reduction'] = {}
    arbplf_ll(json.dumps(x))
示例#26
0
def test_ok_reduction_selection_aggregation():
    x = copy.deepcopy(good_input)
    x['site_reduction'] = {"selection" : [0], "aggregation" : "sum"}
    arbplf_ll(json.dumps(x))
示例#27
0
def test_ok_reduction_avg_aggregation():
    x = copy.deepcopy(good_input)
    x['site_reduction'] = {"aggregation" : "avg"}
    arbplf_ll(json.dumps(x))