示例#1
0
def test_to_igraph() :
    #Make sure the igraph output has correct same structure 
    
    T1 = SuchTree( gopher_tree )
    T2 = SuchTree( lice_tree   )
    links = pd.read_csv( gl_links, index_col=0 )
    
    SLT = SuchLinkedTrees( T1, T2, links )
    
    g = SLT.to_igraph()
    
    # igraph returns an unweighted adjacency matrix,
    # so we'll convert SuchLinkedTrees weighted
    # adjacency matrix to an unweighted form.
    saj = numpy.ceil( SLT.adjacency() )
    
    # For some reason, igraph invented its own Matrix
    # class that doesn't implement a standard numpy 
    # interface. :-/
    iaj = numpy.array( list( map( list, g.get_adjacency() ) ) )
    
    # matrixes must be the same shape
    assert saj.shape == iaj.shape
    
    # all matrix elements must be equal
    assert reduce( lambda a,b:a and b, (saj == iaj).flatten() )
示例#2
0
def test_to_igraph():
    #Make sure the igraph output has correct same structure

    T1 = SuchTree(gopher_tree)
    T2 = SuchTree(lice_tree)
    links = pd.read_csv(gl_links, index_col=0)

    SLT = SuchLinkedTrees(T1, T2, links)

    g = SLT.to_igraph()

    # igraph returns an unweighted adjacency matrix,
    # so we'll convert SuchLinkedTrees weighted
    # adjacency matrix to an unweighted form.
    saj = numpy.ceil(SLT.adjacency())

    # For some reason, igraph invented its own Matrix
    # class that doesn't implement a standard numpy
    # interface. :-/
    iaj = numpy.array(list(map(list, g.get_adjacency())))

    # matrixes must be the same shape
    assert saj.shape == iaj.shape

    # all matrix elements must be equal
    assert reduce(lambda a, b: a and b, (saj == iaj).flatten())
示例#3
0
def test_get_column_leafs_by_name_as_row_ids() :
    T = SuchTree( test_tree )
    links = pd.DataFrame( numpy.random.random_integers( 0, 3, size=(N,N)),
                          columns=list(T.leafs.keys()), 
                          index=list(T.leafs.keys()) )
    SLT = SuchLinkedTrees( T, T, links )
    for colname in links.columns :
        s = links.applymap(bool)[ colname ]
        leafs1 = set( map( list(SLT.col_ids).index, map( lambda x : T.leafs[x],  s[ s > 0 ].index ) ) )
        leafs2 = set( SLT.get_column_leafs( colname, as_row_ids=True ) )
        assert leafs1 == leafs2
示例#4
0
def test_get_column_leafs():
    T = SuchTree(test_tree)
    links = pd.DataFrame(numpy.random.random_integers(0, 3, size=(N, N)),
                         columns=list(T.leafs.keys()),
                         index=list(T.leafs.keys()))
    SLT = SuchLinkedTrees(T, T, links)
    for n, colname in enumerate(links.columns):
        s = links.applymap(bool)[colname]
        leafs1 = set(map(lambda x: T.leafs[x], s[s > 0].index))
        leafs2 = set(SLT.get_column_leafs(n))
        assert leafs1 == leafs2
示例#5
0
def test_get_column_leafs() :
    T = SuchTree( test_tree )
    links = pd.DataFrame( numpy.random.random_integers( 0, 3, size=(N,N)),
                          columns=list(T.leafs.keys()), 
                          index=list(T.leafs.keys()) )
    SLT = SuchLinkedTrees( T, T, links )
    for n,colname in enumerate( links.columns ) :
        s = links.applymap(bool)[ colname ]
        leafs1 = set( map( lambda x : T.leafs[x],  s[ s > 0 ].index ) )
        leafs2 = set( SLT.get_column_leafs(n) )
        assert leafs1 == leafs2
示例#6
0
def test_get_column_links():
    T = SuchTree(test_tree)
    row_names = list(T.leafs.keys())
    numpy.random.shuffle(row_names)
    links = pd.DataFrame(numpy.random.random_integers(0, 3, size=(N, N)),
                         columns=list(T.leafs.keys()),
                         index=row_names)
    SLT = SuchLinkedTrees(T, T, links)
    for n, colname in enumerate(links.columns):
        s = links.applymap(bool)[colname]
        c = SLT.get_column_links(n)
        for m, rowname in enumerate(SLT.row_names):
            assert s[rowname] == c[m]
示例#7
0
def test_get_column_links() :
    T = SuchTree( test_tree )
    row_names = list(T.leafs.keys())
    numpy.random.shuffle(row_names)
    links = pd.DataFrame( numpy.random.random_integers( 0, 3, size=(N,N)),
                          columns=list(T.leafs.keys()), 
                          index=row_names )
    SLT = SuchLinkedTrees( T, T, links )
    for n,colname in enumerate( links.columns ) :
        s = links.applymap(bool)[ colname ]
        c = SLT.get_column_links(n)
        for m,rowname in enumerate( SLT.row_names ) :
            assert s[rowname] == c[m]
示例#8
0
def test_get_column_leafs_by_name_as_row_ids():
    T = SuchTree(test_tree)
    links = pd.DataFrame(numpy.random.random_integers(0, 3, size=(N, N)),
                         columns=list(T.leafs.keys()),
                         index=list(T.leafs.keys()))
    SLT = SuchLinkedTrees(T, T, links)
    for colname in links.columns:
        s = links.applymap(bool)[colname]
        leafs1 = set(
            map(
                list(SLT.col_ids).index,
                map(lambda x: T.leafs[x], s[s > 0].index)))
        leafs2 = set(SLT.get_column_leafs(colname, as_row_ids=True))
        assert leafs1 == leafs2
示例#9
0
def test_init_both_trees_by_file():
    T = SuchTree(test_tree)
    links = pd.DataFrame(numpy.random.random_integers(0, 3, size=(N, N)),
                         columns=list(T.leafs.keys()),
                         index=list(T.leafs.keys()))
    SLT = SuchLinkedTrees(test_tree, test_tree, links)
    assert type(SLT) == SuchLinkedTrees
示例#10
0
def test_link_identities():
    with tempfile.NamedTemporaryFile() as f1:
        f1.file.write(b'(A:1,(B:1,(C:1,D:1)E:1)F:1)G:1;')
        f1.file.close()
        T1 = SuchTree(f1.name)
    with tempfile.NamedTemporaryFile() as f2:
        f2.file.write(b'((a:1,b:1)e:1,(c:1,d:1)f:1)g:1;')
        f2.file.close()
        T2 = SuchTree(f2.name)

    ll = (('A', 'a'), ('B', 'c'), ('B', 'd'), ('C', 'd'), ('D', 'd'))

    links = pd.DataFrame(numpy.zeros((4, 4), dtype=int),
                         index=list(T1.leafs.keys()),
                         columns=list(T2.leafs.keys()))
    for i, j in ll:
        links.at[i, j] = 1

    SLT = SuchLinkedTrees(T1, T2, links)

    t1_sfeal = dict(zip(T1.leafs.values(), T1.leafs.keys()))
    t2_sfeal = dict(zip(T2.leafs.values(), T2.leafs.keys()))

    lll = set((t1_sfeal[j], t2_sfeal[i]) for i, j in SLT.linklist.tolist())

    assert set(ll) == lll
示例#11
0
def test_subset_b() :
    T = SuchTree( test_tree )
    row_names = list(T.leafs.keys())
    numpy.random.shuffle(row_names)
    links = pd.DataFrame( numpy.random.random_integers( 0, 3, size=(N,N)),
                          columns=list(T.leafs.keys()), 
                          index=row_names )
    SLT = SuchLinkedTrees( T, T, links )
    sfeal = dict( zip( SLT.TreeB.leafs.values(), SLT.TreeB.leafs.keys() ) )
    subset_links = links[ list(map( lambda x: sfeal[x], SLT.TreeB.get_leafs(1) )) ]
    l = subset_links.unstack()
    SLT.subset_b(1)
    A = set(map( lambda x : (SLT.TreeB.leafs[x[0]], SLT.TreeA.leafs[x[1]]), 
        list( l[l>0].index ) ))
    B = set(map( lambda x : (x[0], x[1]),  SLT.linklist ) )
    assert A == B
示例#12
0
def test_subset_b():
    T = SuchTree(test_tree)
    row_names = list(T.leafs.keys())
    numpy.random.shuffle(row_names)
    links = pd.DataFrame(numpy.random.random_integers(0, 3, size=(N, N)),
                         columns=list(T.leafs.keys()),
                         index=row_names)
    SLT = SuchLinkedTrees(T, T, links)
    sfeal = dict(zip(SLT.TreeB.leafs.values(), SLT.TreeB.leafs.keys()))
    subset_links = links[list(map(lambda x: sfeal[x], SLT.TreeB.get_leafs(1)))]
    l = subset_links.unstack()
    SLT.subset_b(1)
    A = set(
        map(lambda x: (SLT.TreeB.leafs[x[0]], SLT.TreeA.leafs[x[1]]),
            list(l[l > 0].index)))
    B = set(map(lambda x: (x[0], x[1]), SLT.linklist))
    assert A == B
示例#13
0
def test_row_names():
    T = SuchTree(test_tree)
    links = pd.DataFrame(numpy.random.random_integers(0, 3, size=(N, N)),
                         columns=list(T.leafs.keys()),
                         index=list(T.leafs.keys()))

    SLT = SuchLinkedTrees(T, T, links)
    assert SLT.row_names == list(T.leafs.keys())
示例#14
0
def test_col_ids():
    T = SuchTree(test_tree)
    links = pd.DataFrame(numpy.random.random_integers(0, 3, size=(N, N)),
                         columns=list(T.leafs.keys()),
                         index=list(T.leafs.keys()))

    SLT = SuchLinkedTrees(T, T, links)
    col_ids = SLT.col_ids
    leaf_ids = T.leafs.values()
    assert len(col_ids) == len(leaf_ids)
    for i, j in zip(col_ids, leaf_ids):
        assert i == j
示例#15
0
def test_linkmatrix_property():
    T = SuchTree(test_tree)
    row_names = list(T.leafs.keys())
    numpy.random.shuffle(row_names)
    links = pd.DataFrame(numpy.random.random_integers(0, 3, size=(N, N)),
                         columns=list(T.leafs.keys()),
                         index=row_names)
    SLT = SuchLinkedTrees(T, T, links)
    for col in SLT.col_names:
        for row in SLT.row_names:
            col_id = SLT.col_names.index(col)
            row_id = SLT.row_names.index(row)
            assert bool(links.T[row][col]) == SLT.linkmatrix[row_id][col_id]
示例#16
0
def test_linklist_property():
    T = SuchTree(test_tree)
    row_names = list(T.leafs.keys())
    numpy.random.shuffle(row_names)
    links = pd.DataFrame(numpy.random.random_integers(0, 3, size=(N, N)),
                         columns=list(T.leafs.keys()),
                         index=row_names)
    SLT = SuchLinkedTrees(T, T, links)
    l = links.unstack()
    A = set(
        map(lambda x: (SLT.TreeB.leafs[x[0]], SLT.TreeA.leafs[x[1]]),
            list(l[l > 0].index)))
    B = set(map(lambda x: (x[0], x[1]), SLT.linklist))
    assert A == B
示例#17
0
def simtree(prefix,
            birth_rate=0.3,
            death_rate=0.1,
            min_host_leafs=8,
            max_host_leafs=64,
            min_guest_leafs=4,
            max_guest_leafs=128,
            duplication_rate=0.2,
            loss_rate=0.1,
            switch_rate=0.05,
            k=2.0,
            theta=0.5):
    '''
    Time interval is always 1.0 units, and GuestTreeGen stops after 1000
    attempts.
    '''

    max_guest_attempts = 1000

    # make output directory
    if not exists(prefix):
        mkdir(prefix)

    # build the host tree
    E = subprocess.call(['java'] + java_ops + [
        '-jar', 'jprime.jar', 'HostTreeGen', '-bi', '-min',
        str(min_host_leafs), '-max',
        str(max_host_leafs), '1.0',
        str(birth_rate),
        str(death_rate), prefix + '/' + 'host'
    ])

    if not E == 0: raise JPrIMEError('HostTreeGen failed.')

    E = subprocess.call(['java'] + java_ops + [
        '-jar', 'jprime.jar', 'BranchRelaxer', '-o', prefix + '/' +
        'host.relaxed.tree', prefix + '/' + 'host.pruned.tree', 'IIDGamma',
        str(k),
        str(theta)
    ])

    if not E == 0: raise JPrIMEError('BranchRelaxer failed on host tree.')

    # build the guest tree
    E = subprocess.call(['java'] + java_ops + [
        '-jar', 'jprime.jar', 'GuestTreeGen', '--max-attempts',
        str(max_guest_attempts), '-min',
        str(min_guest_leafs), '-max',
        str(max_guest_leafs), prefix + '/' + 'host.pruned.tree',
        str(duplication_rate),
        str(loss_rate),
        str(switch_rate), prefix + '/' + 'guest'
    ])

    if not E == 0: raise JPrIMEError('GuestTreGen failed.')

    E = subprocess.call(['java'] + java_ops + [
        '-jar', 'jprime.jar', 'BranchRelaxer', '-o', prefix + '/' +
        'guest.relaxed.tree', prefix + '/' + 'guest.pruned.tree', 'IIDGamma',
        str(k),
        str(theta)
    ])

    if not E == 0: raise JPrIMEError('BranchRelaxer failed on guest tree.')

    # load the trees
    T1 = SuchTree(prefix + '/' + 'host.relaxed.tree')
    T2 = SuchTree(prefix + '/' + 'guest.relaxed.tree')

    # populate the link matrix using the leaf names
    l = zeros((T1.n_leafs, T2.n_leafs), dtype=int)

    hostnames = T1.leafs.keys()
    guestnames = T2.leafs.keys()

    for L in T2.leafs.keys():
        guest, host = L.split('_')
        #host = 'H' + host
        i = hostnames.index(host)
        j = guestnames.index(L)
        l[i, j] = 1

    links = pandas.DataFrame(l, index=hostnames, columns=guestnames)
    links.to_csv(prefix + '/' + 'links.csv')

    # initialize the SuchLinkedTrees object
    SLT = SuchLinkedTrees(T1, T2, links)

    # plot the adjacency matrix
    aj = SLT.adjacency()
    lp_plot = seaborn.heatmap(aj.T,
                              cmap='viridis',
                              vmin=0,
                              vmax=1,
                              cbar=False,
                              square=True,
                              xticklabels=False,
                              yticklabels=False)
    lp_plot.invert_yaxis()
    fig = lp_plot.get_figure()
    fig.savefig(prefix + '/' + 'adjacency.png', size=6)
    fig.clf()

    # plot cophylogeny using R
    r_code = '''
    tr1 <- read.tree( "HOST_TREE" )
    tr2 <- read.tree( "GUEST_TREE" )
    links <- read.csv( "LINKS", row.names=1, stringsAsFactors = F )
    im <- graph_from_incidence_matrix( as.matrix( links ) )
    assoc <- as_edgelist( im )
    obj <- cophylo( tr1, tr2, assoc=assoc )
    pdf( "OUTFILE", width = 10, height = 12 )
    plot( obj )
    dev.off()
    '''
    r_code = r_code.replace('HOST_TREE', prefix + '/' + 'host.relaxed.tree')
    r_code = r_code.replace('GUEST_TREE', prefix + '/' + 'guest.relaxed.tree')
    r_code = r_code.replace('LINKS', prefix + '/' + 'links.csv')
    r_code = r_code.replace('OUTFILE', prefix + '/' + 'cophylo.pdf')
    robjects.r(r_code)

    # calculate spectral densities
    lambdas = SLT.spectrum()

    a_lambd = eigvalsh(SLT.TreeA.laplacian()['laplacian'])
    b_lambd = eigvalsh(SLT.TreeB.laplacian()['laplacian'])

    with open(prefix + '/' + 'eigenvalues.csv', 'w') as f:
        f.write('graph ' + ','.join(map(str, lambdas)) + '\n')
        f.write('TreeA ' + ','.join(map(str, a_lambd)) + '\n')
        f.write('TreeB ' + ','.join(map(str, b_lambd)) + '\n')

    bandwidth = 0.4
    X = linspace(-0.5, 1.5, 200)
    density = gaussian_kde(lambdas / max(lambdas), bw_method=bandwidth).pdf(X)
    a_dnsty = gaussian_kde(a_lambd / max(a_lambd), bw_method=bandwidth).pdf(X)
    b_dnsty = gaussian_kde(b_lambd / max(b_lambd), bw_method=bandwidth).pdf(X)

    with open(prefix + '/' + 'densities.txt', 'w') as f:
        f.write('graph ' + ','.join(map(str, density)) + '\n')
        f.write('TreeA ' + ','.join(map(str, a_dnsty)) + '\n')
        f.write('TreeB ' + ','.join(map(str, b_dnsty)) + '\n')

    # calculate Hommola correlation
    d = SLT.linked_distances()
    r, p = pearsonr(d['TreeA'], d['TreeB'])

    with open(prefix + '/' + 'distances.txt', 'w') as f:
        f.write('TreeA ' + ','.join(map(str, d['TreeA'])) + '\n')
        f.write('TreeB ' + ','.join(map(str, d['TreeB'])) + '\n')

    # save jointplot of patristic distances
    jp = seaborn.jointplot(d['TreeA'], d['TreeB'], size=6)
    jp.savefig(prefix + '/' + 'correlation.png')
    jp.fig.clf()

    # output moment data
    moments = {}
    moments['eigengap'] = lambdas[-1] - lambdas[-2]
    moments['skew'] = skew(density)
    moments['kurtosis'] = kurtosis(density)
    moments['treedist'] = pdd(a_dnsty, b_dnsty)
    moments['occupancy']   = ( 2.0 * SLT.n_links ) \
                             / ( SLT.TreeA.n_leafs \
                                 + SLT.TreeB.n_leafs )
    moments['squareness']  = float( SLT.TreeA.n_leafs ) \
                             / SLT.TreeB.n_leafs

    moments['r'] = r
    moments['p'] = p

    with open(prefix + '/' + 'moments.csv', 'w') as f:
        f.write(','.join(moments.keys()) + '\n')
        f.write(','.join(map(str, moments.values())))

    # output simulation parameters
    data = {}
    data['prefix'] = prefix
    data['host_leafs'] = T1.n_leafs
    data['guest_leafs'] = T2.n_leafs
    data['links'] = SLT.n_links
    data['birth_rate'] = birth_rate
    data['death_rate'] = death_rate
    data['min_host_leafs'] = min_host_leafs
    data['max_host_leafs'] = max_host_leafs
    data['min_guest_leafs'] = min_guest_leafs
    data['max_guest_leafs'] = max_guest_leafs
    data['duplication_rate'] = duplication_rate
    data['loss_rate'] = loss_rate
    data['switch_rate'] = switch_rate
    data['k'] = k
    data['theta'] = theta

    with open(prefix + '/' + 'data.csv', 'w') as f:
        f.write(','.join(data.keys()) + '\n')
        f.write(','.join(map(str, data.values())))