Exemplo n.º 1
0
modelILS = build_epoch_seperated_model(
    3,
    # we start with a,b,c.
    [
        # merge a and b to '0', c continues as '1'
        [0, 0, 1],
        # and now '0' and '1' is merged to a new '0'
        [0, 0]
    ],
    [1, estates, estates])
nstates = len(modelILS.tree_map)
names = ["bonobo", "pantro2", "hg18"]
init_ILS = (1.0e6 * u, 4.5e6 * u, i_c, i_r)

all_obs = []
total_L = 0
for seq in seqs:
    # each sequence creates a new column map extended with any new columns that
    # might be seen.
    obs, colmap = readObservations(seq, names, COL_MAP)
    COL_MAP = colmap
    total_L += len(obs)
    all_obs.append(obs)

print "%.2fMbp of data in total" % (total_L / 1e6)

L, est = estimate_ILS(modelILS, all_obs, *init_ILS, outfile="/dev/stdout")

print 'Final results:'
print "\t".join(map(str, [L] + est))
Exemplo n.º 2
0
def runILSctmc(seqs, **args):
    global COL_MAP
    NeRef = args["NeRef"]
    g = args["g"]
    u = args["u"]
    i_r = args["r"]
    i_N1 = args["N1"] 
    i_N2 = args["N12"]
    i_N3 = args["N123"]
    i_c1 = 1.0/(2*g*u*i_N1)
    i_c2 = 1.0/(2*g*u*i_N2)
    i_c3 = 1.0/(2*g*u*i_N3)
    i_t1 = u * args["T12"] / g / (2*NeRef)
    i_t2 = u * args["T123"] / g / (2*NeRef)

    startvalues = dict( [ (name,eval(name)) for name in ['i_t1', 'i_t2', 'i_c1', 'i_c2', 'i_c3', 'i_r'] ] )

#     # generation time and mutation rate
#     g = 20
#     u = 1e-9
# 
#     # initial values for recombination rate, population size/coalescence rate and
#     # migration rate.
#     i_r = 0.4
#     i_N = 100e3
#     i_c = 1.0/(2*g*u*i_N)
#     i_t1 = 3.7e6*u
#     i_t2 = 5.95e6*u

    estates = 4
    print >>sys.stderr, 'running with %d epoch states' % estates
    
#     inst = int(sys.argv[1])
# 
#     seqs = ["data/x_ils_%i.fa" % inst]

    modelILS = build_epoch_seperated_model(3, [[0,0,1], [0,0]], [1,estates,estates])
    nstates = len(modelILS.tree_map)
    names = ["'1'", "'2'", "'0'"]
    #names = ["'0'", "'2'", "'1'"]

    all_obs = []
    forwarders = []

    COL_MAP = dict((v,i) for i,v in enumerate(product('ACGT', repeat=3)))
    for seq in seqs:
        # each sequence creates a new column map extended with any new columns that
        # might be seen.
        obs, colmap = readObservations(seq, names, COL_MAP)
        all_obs.append(obs)

#     print len(COL_MAP)

    doEstimate = False
    
    L = None
    estimates = list()
    if doEstimate:
        for obs in all_obs:
            print 'next obs:'
            ffd, foutname = tempfile.mkstemp()
            print '  temp fd/name:', ffd, foutname
            fout = os.fdopen(ffd, 'w')
            L = len(obs)
            for j in xrange(L-1):
                o = obs[j]
                print >>fout, o,
            print >>fout, obs[j]
            fout.close()
            print '  written, creating forwarder'
            f = Forwarder.fromSequence(seqFilename = foutname, alphabetSize = len(COL_MAP))
            #f = Forwarder(seqFilename = foutname, nStates = len(modelILS.tree_map), nObservables = len(COL_MAP))
            print '  - done.'
            forwarders.append(f)
            os.system("rm %s" % foutname)                                                                

        L, estimates = estimate_ILS(modelILS, forwarders, i_t1, i_t2, i_c1, i_c2, i_c3, i_r, outfile="/dev/null")
        i_t1, i_t2, i_c1, i_c2, i_c3, i_r = estimates

    estimates = dict( [ (name,eval(name)) for name in ['i_t1', 'i_t2', 'i_c1', 'i_c2', 'i_c3', 'i_r'] ] )

    #print 'Estimates:'
    #print "\t".join(map(str, [L] + est))


    if "hook" in args:
        args["hook"].run(modelILS, COL_MAP, all_obs, [i_c1, i_c2, i_c3], [i_r]*3, [0]*3, [0, i_t1, i_t2])

    return L, estimates, startvalues
Exemplo n.º 3
0
        3,
        # we start with a,b,c.
        [
            # merge a and b to '0', c continues as '1'
            [0,0,1],
            # and now '0' and '1' is merged to a new '0'
            [0,0]
        ],
        [1,estates,estates])
nstates = len(modelILS.tree_map)
names = ["bonobo", "pantro2", "hg18"]
init_ILS = (1.0e6*u, 4.5e6*u, i_c, i_r)

all_obs = []
total_L = 0
for seq in seqs:
    # each sequence creates a new column map extended with any new columns that
    # might be seen.
    obs, colmap = readObservations(seq, names, COL_MAP)
    COL_MAP = colmap
    total_L += len(obs)
    all_obs.append(obs)

print "%.2fMbp of data in total" % (total_L/1e6)

L, est = estimate_ILS(modelILS, all_obs, *init_ILS, outfile="/dev/stdout")


print 'Final results:'
print "\t".join(map(str, [L] + est))