def init():
        try:
            global pyvolve
            import pyvolve
        except:
            from os import chdir
            chdir(GC.START_DIR)
            assert False, "Error loading Pyvolve. Install with: pip3 install pyvolve"
        # config validity checks
        custom_model_params = {}
        if isinstance(GC.nuc_kappa, str):
            GC.nuc_kappa = GC.nuc_kappa.strip()
            if len(GC.nuc_kappa) != 0:
                custom_model_params['kappa'] = float(GC.nuc_kappa)
        else:
            custom_model_params['kappa'] = float(GC.nuc_kappa)
        assert isinstance(GC.nuc_frequencies_dictionary, dict), "Specified nuc_frequencies_dictionary is not a dictionary"
        if len(GC.nuc_frequencies_dictionary) != 0:
            for key in GC.nuc_frequencies_dictionary:
                assert key in {'A','C','G','T'}, "%s is not a valid codon for nuc_frequencies_dictionary. Only DNA nucleotides (A, C, G, or T)"
            assert abs(sum(GC.nuc_frequencies_dictionary.values()) - 1) < 0.000000001, "Frequencies in nuc_frequencies_dictionary must sum to 1"
            custom_model_params['state_freqs'] = GC.nuc_frequencies_dictionary
        assert isinstance(GC.nuc_mutation_rates_dictionary, dict), "Specified nuc_mutation_rates_dictionary is not a dictionary"
        if len(GC.nuc_mutation_rates_dictionary) != 0:
            custom_model_params['mu'] = GC.nuc_mutation_rates_dictionary
        assert not ('kappa' in custom_model_params and 'nuc_mutation_rates_dictionary' in custom_model_params), "Cannot use custom values for both nuc_kappa and nuc_mutation_rates_dictionary: only one of the two"

        # set up Pyvolve
        if len(custom_model_params) == 0:
            GC.pyvolve_model = pyvolve.Model("nucleotide")
        else:
            GC.pyvolve_model = pyvolve.Model("nucleotide", custom_model_params)
示例#2
0
    def init():
        try:
            global pyvolve
            import pyvolve
        except:
            from os import chdir
            chdir(GC.START_DIR)
            assert False, "Error loading Pyvolve. Install with: pip3 install pyvolve"
        # config validity checks
        custom_model_params = {}
        GC.ecm_type = GC.ecm_type.strip()
        if GC.ecm_type == 'restricted':
            GC.ecm_type = 'ECMrest'
        elif GC.ecm_type == 'unrestricted':
            GC.ecm_type = 'ECMunrest'
        else:
            assert False, 'ecm_type must be "restricted" or "unrestricted"'
        if isinstance(GC.ecm_alpha, str):
            GC.ecm_alpha = GC.ecm_alpha.strip()
            if len(GC.ecm_alpha) != 0:
                custom_model_params['alpha'] = float(GC.ecm_alpha)
        else:
            custom_model_params['alpha'] = float(GC.ecm_alpha)
        if isinstance(GC.ecm_beta, str):
            GC.ecm_beta = GC.ecm_beta.strip()
            if len(GC.ecm_beta) != 0:
                custom_model_params['beta'] = float(GC.ecm_beta)
        else:
            custom_model_params['beta'] = float(GC.ecm_beta)
        if isinstance(GC.ecm_omega, str):
            GC.ecm_omega = GC.ecm_omega.strip()
            if len(GC.ecm_omega) != 0:
                custom_model_params['omega'] = float(GC.ecm_omega)
        else:
            custom_model_params['omega'] = float(GC.ecm_omega)
        assert isinstance(
            GC.ecm_codon_frequencies_dictionary, dict
        ), "Specified ecm_codon_frequencies_dictionary is not a dictionary"
        if len(GC.ecm_codon_frequencies_dictionary) != 0:
            codons = set(GC.generate_all_kmers(3, 'ACGT'))
            codons.difference_update({'TGA', 'TAA',
                                      'TAG'})  # remove STOP codons
            for key in GC.ecm_codon_frequencies_dictionary:
                assert key in codons, "%s is not a valid codon for ecm_codon_frequencies_dictionary. Only include 3-mers of the DNA alphabet, excluding the STOP codons (TGA, TAA, and TAG)"
            assert abs(
                sum(GC.ecm_codon_frequencies_dictionary.values()) - 1
            ) < 0.000000001, "Frequencies in ecm_codon_frequencies_dictionary must sum to 1"
            custom_model_params[
                'state_freqs'] = GC.ecm_codon_frequencies_dictionary

        # set up Pyvolve
        if len(custom_model_params) == 0:
            GC.pyvolve_model = pyvolve.Model(GC.ecm_type)
        else:
            GC.pyvolve_model = pyvolve.Model(GC.ecm_type, custom_model_params)
示例#3
0
def evolve(newicks, sequence_size, scale_tree):
    temp = "temporary_sequences.fasta"
    phy_files = []
    my_model = pyvolve.Model("nucleotide")
    partition = pyvolve.Partition(models = my_model, size = sequence_size)
    for i in range(0, len(newicks)):

        newick = newicks[i]
        tree = pyvolve.read_tree(tree = newick, scale_tree = scale_tree)
        my_evolver = pyvolve.Evolver(tree = tree, partitions = partition)
        fasta_seqfile = "temp" + str(i) + ".fasta"
        phylip_seqfile = "temp" + str(i) + ".phyl"
        phy_files.append(phylip_seqfile)

        my_evolver(seqfile=fasta_seqfile, seqfmt = "fasta", ratefile = None, infofile = None)
        fasta_to_phyl(fasta_seqfile, phylip_seqfile)

        os.remove(fasta_seqfile)

    phyl_output = "temp_seq.phyl"

    with open(phyl_output, 'w') as outfile:
        for fname in phy_files:
            with open(fname) as infile:
                outfile.write(infile.read())
                outfile.write("\n")
            os.remove(fname)

    return phyl_output
def get_random_tree(filename, tree_string, L, kappa):

	# strains = read_in_strains(filename)
	# # L = genome_length(strains)
	# min_m = get_min_m(strains, L)
	# scaled_tree_string = scale_newick_format_tree(strains, L, min_m, tree_string)

	phylogeny = pyvolve.read_tree(tree = tree_string)
	# pyvolve.print_tree(phylogeny)

	freqs = [0.25,0.25,0.25,0.25]

	nuc_model = pyvolve.Model('nucleotide', {'kappa':kappa, 'state_freqs':freqs})

	ancestor = generate_ancestor(L)
	print(ancestor)

	my_partition = pyvolve.Partition(models = nuc_model, root_sequence = ancestor)

	my_evolver = pyvolve.Evolver(partitions = my_partition, tree = phylogeny)
	my_evolver() 
	# my_evolver(write_anc = True)
	simulated_strains = my_evolver.get_sequences()
	# strains = my_evolver.get_sequences(anc = True)
	# strain_names = list(strains.keys())
	pi = pi_value(simulated_strains)
	theta = theta_value(simulated_strains)

	# print('pi: ' + str(pi))
	# print('theta: ' + str(theta))

	return {'pi': pi, 'theta': theta}


	
示例#5
0
def get_random_tree(L, species, scaled_tree_string, kappa, iteration):
    # strains = read_in_strains(filename)
    # L = genome_length(strains)
    # min_m = get_min_m(strains, L)
    # max_m = get_max_m(strains, L, tree_string)
    # pis = []
    # thetas = []

    # scaled_trees = []

    # for x in range(min_m,max_m+1):
    # 	scaled_tree_string = scale_newick_format_tree(strains, L, x, tree_string, increment)
    # 	scaled_trees.append(scaled_tree_string)

    # for tree in scaled_trees:
    phylogeny = pyvolve.read_tree(tree=scaled_tree_string)
    print('read in the tree')
    pyvolve.print_tree(phylogeny)

    freqs = [0.25, 0.25, 0.25, 0.25]

    nuc_model = pyvolve.Model('nucleotide', {
        'kappa': kappa,
        'state_freqs': freqs
    })

    ancestor = generate_ancestor(L)
    print('generated an ancestor')
    # 	# print(ancestor)

    my_partition = pyvolve.Partition(models=nuc_model, root_sequence=ancestor)

    my_evolver = pyvolve.Evolver(partitions=my_partition, tree=phylogeny)
    my_evolver(ratefile=None,
               infofile=None,
               seqfile="simulated_alignment_" + str(species[:-1]) +
               "_universal_" + str(iteration + 1) + ".fasta")
    # 	# my_evolver()
    print('evolved the sequences')
    # 	# my_evolver(write_anc = True)
    simulated_strains = my_evolver.get_sequences()
    # 	# strains = my_evolver.get_sequences(anc = True)
    # 	# strain_names = list(strains.keys())
    pi = pi_value(simulated_strains)
    theta = theta_value(simulated_strains)
    # 	pis.append(pi)
    # 	thetas.append(theta)

    # # print('pi: ' + str(pi))
    # # print('theta: ' + str(theta))

    # return {'pi': pis, 'theta': thetas}

    return pi, theta
示例#6
0
 def get_codon_model(self):
     codon_params = {}
     for param in ["ALPHA", "BETA", "KAPPA"]:
         codon_params[param.lower()] = float(self.parameters[param])
     codon_fitness_file = open("Input/codon_fitness.txt", "r")
     codon_fitness = []
     for line in codon_fitness_file:
         codon_fitness.append(float(line))
     #codon_fitness = np.random.normal(size = 61)
     #f = pyvolve.ReadFrequencies("codon", file = "FBgn0034744.fasta")
     #frequencies = f.compute_frequencies()
     return pyvolve.Model("MutSel", {"fitness": codon_fitness})
示例#7
0
def simulate(f, seqfile, tree, mu_dict, length):
    ''' Simulate single partition according homogeneous mutation-selection model.
    '''

    try:
        my_tree = pyvolve.read_tree(file=tree)
    except:
        my_tree = pyvolve.read_tree(tree=tree)

    model = pyvolve.Model("MutSel", {'state_freqs': f, 'mu': mu_dict})

    part = pyvolve.Partition(size=length, models=model)
    e = pyvolve.Evolver(partitions=part, tree=my_tree)
    e(seqfile=seqfile, ratefile=None, infofile=None)
示例#8
0
def get_accurate_c(L, kappa):

    ancestor = generate_ancestor(L)
    print(ancestor)

    # phylogeny = pyvolve.read_tree(tree = '(  (t1:0.5,t2:0.5)i1:0.5, (t3:0.5,t4:0.5)i2:0.5 ,  (t5:0.5,t6:0.5)i3:0.5, (t7:0.5,t8:0.5)i4:0.5  ) root;')
    phylogeny = pyvolve.read_tree(
        tree=
        '(  ((t7:0.5,t8:0.5)i4:0.5,(t5:0.5,t6:0.5)i3:0.5)i1:0.5, (t3:0.5,t4:0.5)i2:0.5  ) root;'
    )

    pyvolve.print_tree(phylogeny)

    freqs = [0.25, 0.25, 0.25, 0.25]

    nuc_model = pyvolve.Model('nucleotide', {
        'kappa': 1.86836732388,
        'state_freqs': freqs
    })

    my_partition = pyvolve.Partition(models=nuc_model, root_sequence=ancestor)

    my_evolver = pyvolve.Evolver(partitions=my_partition, tree=phylogeny)
    # my_evolver()
    my_evolver(write_anc=True)
    # strains = my_evolver.get_sequences()
    strains = my_evolver.get_sequences(anc=True)
    strain_names = list(strains.keys())  # pre-order traversal of the tree
    n = len(strain_names)

    print(strain_names)

    c_sites = {}
    for key in strain_names:
        c_sites[key] = []

    site_counts = L * [
        None
    ]  # list of dictionaries to keep track of which nucleotides are at each convergent site; index = site; key = nucleotide, value = number of strains with that nucleotide
    strains_with_site = L * [
        None
    ]  # list of the strains that have a convergent mutation at each site; index = site
    for x in range(L):
        site_counts[x] = {'A': 0, 'T': 0, 'G': 0, 'C': 0}
        strains_with_site[x] = []
    # c_list_matrix = [[{} for x in range(n)] for y in range(n)] # matrix of the convergent mutation sites; the (i,j) entry is a dictionary of the convergent mutation sites between strain i and strain j; key = site, value = nucleotide

    c = 0
    strain_names
示例#9
0
    def get_nucleotide_model(self):

        nucleotides = ['A', 'C', 'G', 'T']
        state_freqs = []
        custom_mu = {}

        for source in nucleotides:
            state_freqs.append(float(self.parameters[source]))
            for target in nucleotides:
                if source != target:
                    pair = source + target
                    custom_mu[pair] = float(self.parameters[pair])

        assert abs(sum(state_freqs) - 1) < 1e-6, "Equilibrium frequencies of nucleotides must sum to 1.0"
        return pyvolve.Model("nucleotide", {"mu": custom_mu, "state_freqs": state_freqs})
示例#10
0
def execute(tree, model, length, out, numSim):

    # read in model, tree, and define partition
    pyvolveModel = pyvolve.Model(model)
    pyvolveTree = pyvolve.read_tree(file=tree)
    pyvolvePartition = pyvolve.Partition(models=pyvolveModel, size=int(length))

    # create evolver
    my_evolver = pyvolve.Evolver(tree=pyvolveTree, partitions=pyvolvePartition)
    my_evolver()

    print("Simulating sequences...")
    # create simluated sequences
    for i in range(int(numSim)):
        print(str(out) + "." + str(i) + ".fa")
        my_evolver(seqfile=str(out) + "." + str(model) + "-" + str(i) + ".fa")
示例#11
0
def make_partition_model_set(vecs, kappa):
    paramlists = [
        pyvolve.MutSel_Sanity("mutsel", {
            "fitness": vec,
            "kappa": kappa,
            "popsize": 10000
        })() for vec in vecs
    ]
    matrices = [
        KimuraMutSelMatrix("mutsel", params)() for params in paramlists
    ]
    for i in range(0, len(paramlists)):
        paramlists[i].update({"matrix": matrices[i]})
    return [
        pyvolve.Model("custom", plist, name=("bp%d" % i))
        for (i, plist) in enumerate(paramlists)
    ]
示例#12
0
def generate_Q_matrix(eq_freq, omega, all_nsy_cdn_index, all_syn_cdn_index):
    all_cdn_index = numpy.concatenate([all_syn_cdn_index, all_nsy_cdn_index])
    cmp = {
        'omega': omega,
        'k_ti': 1,
        'k_tv': 1
    }  # background_omega have to be readjusted.
    model = pyvolve.Model(model_type='ECMunrest',
                          name='placeholder',
                          parameters=cmp,
                          state_freqs=eq_freq)
    mat = model.matrix
    dnds = get_total_Q(mat, all_nsy_cdn_index) / get_total_Q(
        mat, all_syn_cdn_index)
    mat = rescale_substitution_matrix(mat,
                                      all_nsy_cdn_index,
                                      scaling_factor=omega / dnds)
    return mat
示例#13
0
def exampleFastaGenerator(nwkFile, fastaOutputLocation, seqLength, rate=1):
    # Tree.
    treeName = nwkFile[nwkFile.rindex('/'):]
    treeName = treeName.split('.')[0]
    phylogony = pyvolve.read_tree(file=nwkFile)
    # Rates.
    mutationRates = {
        "AC": rate,
        "AG": rate,
        "AT": rate,
        "CG": rate,
        "CT": rate,
        "GT": rate
    }
    # Model.
    model = pyvolve.Model("nucleotide", {"mu": mutationRates})
    partition = pyvolve.Partition(models=model, size=seqLength)
    # Evolver.
    evolver = pyvolve.Evolver(partitions=[partition], tree=phylogony)
    evolver(seqfile=fastaOutputLocation, ratefile=None, infofile=None)
示例#14
0
def evolve_nonconvergent_partition(g):
    if (g['num_convergent_site'] == 0):
        site_start = 1
    else:
        site_start = g['num_simulated_site'] - g['num_convergent_site'] + 1
    site_end = g['num_simulated_site']
    print('Codon site {}-{}; Non-convergent codons'.format(
        site_start, site_end))
    num_nonconvergent_site = g['num_simulated_site'] - g['num_convergent_site']
    q_matrix = copy.copy(g['background_Q'])
    with suppress_stdout_stderr():
        model = pyvolve.Model(model_type='custom',
                              name='root',
                              parameters={'matrix': q_matrix})
    partition = pyvolve.Partition(models=model, size=num_nonconvergent_site)
    evolver = pyvolve.Evolver(partitions=partition, tree=g['background_tree'])
    evolver(ratefile='tmp.csubst.simulate_nonconvergent_ratefile.txt',
            infofile='tmp.csubst.simulate_nonconvergent_infofile.txt',
            seqfile='tmp.csubst.simulate_nonconvergent.fa',
            write_anc=False)
示例#15
0
def generateTree(tns, ntaxa, seqlen):
    #Construct the tree and save as newick file
    t = dendropy.simulate.treesim.birth_death_tree(birth_rate=1.0, death_rate=0, taxon_namespace=tns, num_extant_tips=ntaxa)
    t.write(path='/tmp/pyvt', schema='newick', suppress_rooting=True, suppress_internal_node_labels=True)
    
    #Set pyvolve data type
    m1 = pyvolve.Model("nucleotide")
    p1 = pyvolve.Partition(models=m1, size=seqlen)
    
    #Read tree from dendropy
    pot = pyvolve.read_tree(file='/tmp/pyvt')
    
    #Simulate evolution with no save file
    e1 = pyvolve.Evolver(tree=pot, partitions=p1)
    e1(seqfile=None)
    
    seqs = e1.get_sequences()
    
    ds=dendropy.DnaCharacterMatrix.from_dict(seqs, taxon_namespace=tns)
    ds.write(path="evolvedsequences.fasta", schema="fasta")
    #print ds
    return t
示例#16
0
def simulate(tree_index,length):
    """
        Inputs: tree (integer 0-2)
        Outputs: array of 4 sequences, using the tree from above
    """
    tree_map = ["alpha","beta","charlie"]
    tree = tree_map[tree_index]
    my_tree = pyvolve.read_tree(file = "trees/"+tree+".tre")

    #Idk weird pyvolve paramets
    parameters_omega = {"omega": 0.65}
    parameters_alpha_beta = {"beta": 0.65, "alpha": 0.98} # Corresponds to dN/dS = 0.65 / 0.98
    my_model = pyvolve.Model("MG", parameters_alpha_beta)

    # Assign the model to a pyvolve.Partition. The size argument indicates to evolve 250 positions (for a codon alignment, this means 250 codons, i.e. 750 nucleotide sites)
    my_partition = pyvolve.Partition(models = my_model, size = length)

    # Evolve!
    my_evolver = pyvolve.Evolver(partitions = my_partition, tree = my_tree, ratefile = None, infofile = None)
    my_evolver(ratefile = None, infofile = None)

    #Extract the sequences
    simulated_sequences = list(my_evolver.get_sequences().values())
    return simulated_sequences
示例#17
0
@author: david
"""
import pyvolve

"User defined params"
mut_rate = 0.005
freqs = [0.25, 0.25, 0.25, 0.25]
seq_length = 1000
kappa = 2.75

"Read in phylogeny along which Pyvolve should simulate"
"Scale_tree sets absolute mutation rate"
my_tree = pyvolve.read_tree(file = "AMR-sim.tre", scale_tree = mut_rate)
#pyvolve.print_tree(my_tree) # Print the parsed phylogeny

"Specify nucleotide substitution model with custom rates"
#custom_mu = {"AC":0.5, "AG":0.25, "AT":1.23, "CG":0.55, "CT":1.22, "GT":0.47}
#nuc_model = pyvolve.Model( "nucleotide", {"mu":custom_mu, "state_freqs":freqs} )

"Or just use an HKY model with kappa"
nuc_model = pyvolve.Model( "nucleotide", {"kappa":kappa, "state_freqs":freqs})

"Define a Partition object which evolves set # of positions according to my_model"
my_partition = pyvolve.Partition(models = nuc_model, size = seq_length)
#my_partition = pyvolve.Partition(models = nuc_model, root_sequence = "GATAGAAC") # Or with a root seq

"Define an Evolver instance to evolve a single partition"
my_evolver = pyvolve.Evolver(partitions = my_partition, tree = my_tree) 

"Evolve sequences with custom file names"
my_evolver(ratefile = "AMR_ratefile.txt", infofile = "AMR_infofile.txt", seqfile = "AMR-seqsim.fasta" )
示例#18
0
def main():
    """Main body of script."""
    codons = pyvolve.genetics.Genetics().codons
    codon_dict = pyvolve.genetics.Genetics().codon_dict
    pyrims = pyvolve.genetics.Genetics().pyrims
    purines = pyvolve.genetics.Genetics().purines

    args = vars(ParseArguments().parse_args())
    print("Read the following command line arguments:")
    print("\n\t{0}".format("\n\t".join(
        ["{0} = {1}".format(key, value) for (key, value) in args.items()])))

    print("\nPerforming simulation with pyvolve version {0}".format(
        pyvolve.__version__))

    print("\nReading model params from {0}".format(args['modelparams']))
    params = ReadParams(args['modelparams'])
    for (param, paramvalue) in params.items():
        print("The value of {0} is {1}".format(param, paramvalue))

    print("\nReading preferences from {0}".format(args['prefs']))
    tup = dms_tools.file_io.ReadPreferences(args['prefs'])
    (sites, pis) = (tup[0], tup[2])
    print("\nRead amino-acid preferences for {0} sites".format(len(pis)))

    tree = pyvolve.read_tree(file=args['tree'])

    # create models for simulation
    partitions = []
    for r in sites:
        if params['diversifyingsitesA'] and (int(r)
                                             in params['diversifyingsitesA']):
            omega = params['diversifyingomegaA']
            print r, omega
        elif params['diversifyingsitesB'] and (
                int(r) in params['diversifyingsitesB']):
            omega = params['diversifyingomegaB']
            print r, omega
        else:
            omega = 1.0
        matrix = []  # matrix[x][y] is rate of substitution from x to y
        for (xi, x) in enumerate(codons):
            row = []
            for (yi, y) in enumerate(codons):
                ntdiffs = [(x[j], y[j]) for j in range(3) if x[j] != y[j]]
                if len(ntdiffs) == 0:
                    assert x == y
                    row.append(
                        0)  # will later be adjusted to make row sum to zero
                elif len(ntdiffs) > 1:
                    # multi-nucleotide codon change
                    row.append(0)
                else:
                    # single nucleotide change
                    (xnt, ynt) = ntdiffs[0]
                    if (xnt in purines) == (ynt in purines):
                        # transition
                        qxy = params['kappa'] * params['phi{0}'.format(ynt)]
                    else:
                        # transversion
                        qxy = params['phi{0}'.format(ynt)]
                    (xaa, yaa) = (codon_dict[x], codon_dict[y])
                    if xaa == yaa:
                        fxy = 1.0
                    else:
                        pix = pis[r][xaa]**params['stringencyparameter']
                        piy = pis[r][yaa]**params['stringencyparameter']
                        if abs(pix - piy) < 1e-6:
                            fxy = omega
                        else:
                            fxy = omega * math.log(
                                piy / pix) / (1.0 - pix / piy)
                    row.append(qxy * fxy * params['scalerate'])
            assert len(row) == len(codons)
            row[xi] = -sum(row)
            matrix.append(row)
        model = pyvolve.Model("custom", {"matrix": matrix})
        partitions.append(pyvolve.Partition(models=model, size=1))

    print("\nSimulating evolution, writing to {0}...".format(
        args['simulatedalignment']))
    basename = os.path.splitext(args['simulatedalignment'])[0]
    evolver = pyvolve.Evolver(partitions=partitions, tree=tree)
    evolver(
        seqfile=args['simulatedalignment'],
        infofile='{0}_infofile.txt'.format(basename),
        ratefile='{0}_ratefile.txt'.format(basename),
    )
    print("Finished simulation")

    uniqueseqs = set([])
    uniquealignment = []
    ninitial = 0
    for seq in Bio.SeqIO.parse(args['simulatedalignment'], 'fasta'):
        ninitial += 1
        seqstr = str(seq.seq)
        if seqstr not in uniqueseqs:
            uniqueseqs.add(seqstr)
            uniquealignment.append(seq)
    print(
        "\nAfter removing redundant sequences, we have shrunk {0} from {1} to {2} sequences"
        .format(args['simulatedalignment'], ninitial, len(uniquealignment)))
    Bio.SeqIO.write(uniquealignment, args['simulatedalignment'], 'fasta')
示例#19
0
    def get_aminoacid_model(self):

        return pyvolve.Model(self.parameters['AA_MODEL'])
示例#20
0
 def get_codon_model(self):
     codon_params = {}
     for param in ["ALPHA", "BETA", "KAPPA"]:
         codon_params[param.lower()] = float(self.parameters[param])
     return pyvolve.Model(self.parameters['CODON_MODEL'], codon_params, neutral_scaling=True)
示例#21
0
# This example script demonstrates how to evolve according to custom model with custom code

import pyvolve
import numpy as np

# Define a phylogeny, from a file containing a newick tree
my_tree = pyvolve.read_tree(file="file_with_tree.tre")

# Define a custom model with custom matrix and custom code (states). The matrix must be square and have the same dimension (in 1D) as the provided code. Note that code is a list because, in theory, you can specify multi-character (as in letters) states.
matrix = np.array([[-0.5, 0.25, 0.25], [0.25, -0.5, 0.25], [0.25, 0.25, -0.5]])
code = ["0", "1", "2"]
my_model = pyvolve.Model("custom", {"matrix": matrix, "code": code})

my_partition = pyvolve.Partition(models=my_model, size=1)

my_evolver = pyvolve.Evolver(partitions=my_partition, tree=my_tree)
my_evolver()
示例#22
0
# This example script demonstrates how to evolve according to a simple codon model. All model parameters (except dN/dS!) are default: equal mutation rates and equal equilibrium frequencies.

import pyvolve

# Define a phylogeny, from a file containing a newick tree
my_tree = pyvolve.read_tree(file = "file_with_tree.tre")

# Define a codon model, as a pyvolve.Model object. The first argument can be either:
## 1) "GY" or "codon" for the GY94-style (uses codon equilibrium frequencies in the matrix)
## 2) "MG" for the MG94-style (uses nucleotide equilibrium frequencies in the matrix)

# Codon models require you to specify a second argument to pyvolve.Model, a dictionary of parameters. You must specify dN/dS using either "omega" (for the full ratio), or "beta" for dN and "alpha" for dS, as shown below. Either dictionary would be acceptable.
parameters_omega = {"omega": 0.65}
parameters_alpha_beta = {"beta": 0.65, "alpha": 0.98} # Corresponds to dN/dS = 0.65 / 0.98
my_model = pyvolve.Model("MG", parameters_alpha_beta)

# Assign the model to a pyvolve.Partition. The size argument indicates to evolve 250 positions (for a codon alignment, this means 250 codons, i.e. 750 nucleotide sites)
my_partition = pyvolve.Partition(models = my_model, size = 250)

# Evolve!
my_evolver = pyvolve.Evolver(partitions = my_partition, tree = my_tree)
my_evolver()
示例#23
0
# This example script demonstrates how to evolve according to a simple nucleotide model. All model parameters are default: equal mutation rates and equal equilibrium frequencies (e.g. JC69 model).

import pyvolve

# Define a phylogeny, from a file containing a newick tree
my_tree = pyvolve.read_tree(file = "file_with_tree.tre")

# Define a nucleotide model, as a pyvolve.Model object.
my_model = pyvolve.Model("nucleotide")

# Assign the model to a pyvolve.Partition. The size argument indicates to evolve 250 positions
my_partition = pyvolve.Partition(models = my_model, size = 250)

# Evolve!
my_evolver = pyvolve.Evolver(partitions = my_partition, tree = my_tree)
my_evolver()
示例#24
0
# This example script demonstrates how to evolve according to a customized mutation-selection nucleotide model. Customizable parameters include mutation rates, and either equilibrium frequencies or fitness values.
# Note that, for MutSel models, mutation rates do not have to be symmetric, so you can provide different rates for A->C ("AC") and C->A ("CA")
import pyvolve

# Define a phylogeny, from a file containing a newick tree
my_tree = pyvolve.read_tree(file = "file_with_tree.tre")


# Below are three example customized parameter dictionaries. Note that each of these could have "fitness" rather than "state_freqs" as a key
nuc_freqs = [0.334, 0.12, 0.41, 0.136]
custom_mutation_sym = {"AC": 1.5, "AG": 2.5, "AT": 0.5, "CG": 0.8, "CT": 0.99, "GT": 1.56} # For MutSel models, if you provide only 1 pair for each mutation rate (e.g. only AC and not CA), then Pyvolve will make mutation rates symmetric
custom_mutation_asym = {"AC": 1.5, "CA": 0.8, "AG": 2.5, "GA": 1.2, "AT": 0.5, "TA": 1.1, "CG": 0.8, "GC": 0.9, "CT": 0.99, "TC": 2.3, "GT": 1.56, "TC": 2.56} 

# Customize mutation rates using symmetric mutation rates, and specify frequencies for the MutSel model
parameters1 = {"state_freqs": nuc_freqs, "mu":custom_mutation_sym}

# Customize mutation rates using asymmetric mutation rates, and specify frequencies for the MutSel model
parameters2 = {"state_freqs": nuc_freqs, "mu":custom_mutation_asym}

# Customize mutation rates using kappa, and specify frequencies for the MutSel model
parameters3 = {"state_freqs": nuc_freqs, "kappa":4.25}

my_model = pyvolve.Model("mutsel", parameters3) # Any of the dictionaries shown above is acceptable!

# Assign the model to a pyvolve.Partition. The size argument indicates to evolve 250 positions
my_partition = pyvolve.Partition(models = my_model, size = 250)

# Evolve!
my_evolver = pyvolve.Evolver(partitions = my_partition, tree = my_tree)
my_evolver()
示例#25
0
import pyvolve

# Define a phylogeny, from a file containing a newick tree
my_tree = pyvolve.read_tree(file="file_with_tree.tre")

# Define a nucleotide model, as a pyvolve.Model object. For this example, we'll use default parameters, but see the example script custom_aminoacid.py for other options

# To implement rate heterogeneity, do either of these:
## 1) Custom rates: Provide a list of rate_factors when defining a Model object. These rate factors will be assigned to sites with equal probability by default. To change this, provide probabilities with the argument `rate_probs`.
## 2) Gamma rates: Provide the keyword arguments num_categories and alpha when defining a Model object. <num_categories> rates will be drawn from a gamma distribution with shape and scale parameter each equal to <alpha>. These rates will be equiprobable, unless overridden by `rate_probs`.

# Several model definitions are shown below (first argument can be a different model, as desired).

# custom rates
my_model1 = pyvolve.Model(
    "WAG", rate_factors=[0.3, 0.8, 1.5,
                         2.45])  # 25% of sites will have each factor.
my_model2 = pyvolve.Model(
    "WAG",
    rate_factors=[0.3, 0.8, 1.5, 2.45],
    rate_probs=[0.7, 0.2, 0.05, 0.05]
)  # 70% of sites evolve with rate of 0.3, 20% with a rate of 0.8, 5% with a rate of 1.5, and 5% with a rate of 2.45

# gamma rates
my_model3 = pyvolve.Model("WAG", alpha=0.6, num_categories=5)

# Assign the model to a pyvolve.Partition. The size argument indicates to evolve 250 positions
my_partition = pyvolve.Partition(models=my_model2, size=250)

# Evolve!
my_evolver = pyvolve.Evolver(partitions=my_partition, tree=my_tree)
    def init():
        try:
            global pyvolve
            import pyvolve
        except:
            from os import chdir
            chdir(GC.START_DIR)
            assert False, "Error loading Pyvolve. Install with: pip3 install pyvolve"
        # config validity checks
        GC.pyvolve_model_type = GC.pyvolve_model_type.strip()
        GC.pyvolve_state_frequencies_class = GC.pyvolve_state_frequencies_class.strip(
        )
        assert GC.pyvolve_state_frequencies_class in {
            "EqualFrequencies", "RandomFrequencies", "CustomFrequencies"
        }, 'Unsupported Pyvolve state_frequencies_class selected. Choose "EqualFrequencies", "RandomFrequencies", or "CustomFrequencies"'
        assert isinstance(
            GC.pyvolve_custom_model_parameters_dictionary, dict
        ), "Specified pyvolve_custom_model_parameters_dictionary is not a dictionary"
        assert isinstance(
            GC.pyvolve_state_frequencies_parameters_dictionary, dict
        ), "Specified pyvolve_state_frequencies_parameters_dictionary is not a dictionary"
        assert "alphabet" in GC.pyvolve_state_frequencies_parameters_dictionary, 'Specified pyvolve_state_frequencies_parameters_dictionary does not contain mandatory "alphabet" key'
        assert GC.pyvolve_state_frequencies_parameters_dictionary["alphabet"] in {
            "nucleotide", "amino_acid", "codon"
        }, 'Specified pyvolve_state_frequencies_parameters_dictionary has an invalid value for "alphabet" (must be "nucleotide", "amino_acid", or "codon")'
        if GC.pyvolve_state_frequencies_class == "CustomFrequencies":
            assert "freq_dict" in GC.pyvolve_state_frequencies_parameters_dictionary, 'Pyvolve CustomFrequencies class must have the "freq_dict" key in its pyvolve_state_frequencies_parameters_dictionary (and its value must be in the same format as the Pyvolve manual)'
            assert isinstance(
                GC.
                pyvolve_state_frequencies_parameters_dictionary["freq_dict"],
                dict
            ), 'Value of "freq_dict" in pyvolve_state_frequencies_parameters_dictionary is not a dictionary'

        # set up Pyvolve
        if GC.pyvolve_custom_model_parameters_dictionary == {}:
            GC.pyvolve_model = pyvolve.Model(GC.pyvolve_model_type)
        else:
            GC.pyvolve_model = pyvolve.Model(
                GC.pyvolve_model_type,
                GC.pyvolve_custom_model_parameters_dictionary)
        GC.pyvolve_f = None
        if "restrict" in GC.pyvolve_state_frequencies_parameters_dictionary:
            if GC.pyvolve_state_frequencies_class == "EqualFrequencies":
                GC.pyvolve_f = pyvolve.EqualFrequencies(
                    GC.pyvolve_state_frequencies_parameters_dictionary[
                        "alphabet"],
                    restrict=GC.
                    pyvolve_state_frequencies_parameters_dictionary["restrict"]
                )
            elif GC.pyvolve_state_frequencies_class == "RandomFrequencies":
                GC.pyvolve_f = pyvolve.RandomFrequencies(
                    GC.pyvolve_state_frequencies_parameters_dictionary[
                        "alphabet"],
                    restrict=GC.
                    pyvolve_state_frequencies_parameters_dictionary["restrict"]
                )
        elif GC.pyvolve_state_frequencies_class == "EqualFrequencies":
            GC.pyvolve_f = pyvolve.EqualFrequencies(
                GC.pyvolve_state_frequencies_parameters_dictionary["alphabet"])
        elif GC.pyvolve_state_frequencies_class == "RandomFrequencies":
            GC.pyvolve_f = pyvolve.RandomFrequencies(
                GC.pyvolve_state_frequencies_parameters_dictionary["alphabet"])
        elif GC.pyvolve_state_frequencies_class == "CustomFrequencies":
            GC.pyvolve_f = pyvolve.CustomFrequencies(
                GC.pyvolve_state_frequencies_parameters_dictionary["alphabet"],
                freq_dict=GC.
                pyvolve_state_frequencies_parameters_dictionary["freq_dict"])
        else:
            assert False, "Invalid Pyvolve StateFrequencies class specified"
        assert GC.pyvolve_f is not None, "Something went wrong in setting up the Pyvolve StateFrequencies class"
示例#27
0
# This example script demonstrates how to evolve according to a simple amin-acid model. Customizable model parameters are default: equal equilibrium frequencies.

import pyvolve

# Define a phylogeny, from a file containing a newick tree
my_tree = pyvolve.read_tree(file="file_with_tree.tre")

# Define an amino-acid model, as a pyvolve.Model object. The first argument should be either "JTT", "WAG", "LG", "AB", "mtmam", "mtREV24", "DAYHOFF" (available empirical matrices in Pyvolve)
my_model = pyvolve.Model("LG")

# Assign the model to a pyvolve.Partition. The size argument indicates to evolve 250 positions
my_partition = pyvolve.Partition(models=my_model, size=250)

# Evolve!
my_evolver = pyvolve.Evolver(partitions=my_partition, tree=my_tree)
my_evolver()
示例#28
0
def pyvolvePartitions(model, divselection=None):
    """Get list of `pyvolve` partitions for `model`.

    Args:
        `model` (`phydmslib.models.Models` object)
            The model used for the simulations. Currently only
            certain `Models` are supported (e.g., `YNGKP`,
            `ExpCM`)
        `divselection` (`None` or 2-tuple `(divomega, divsites)`)
            Set this option if you want to simulate a subset of sites
            as under diversifying selection (e.g., an `omega` different
            than that used by `model`. In this case, `divomega` is
            the omega for this subset of sites, and `divsites` is a list
            of the sites in 1, 2, ... numbering.

    Returns:
        `partitions` (`list` of `pyvolve.Partition` objects)
            Can be fed into `pyvolve.Evolver` to simulate evolution.
    """
    codons = pyvolve.genetics.Genetics().codons
    codon_dict = pyvolve.genetics.Genetics().codon_dict
    pyrims = pyvolve.genetics.Genetics().pyrims
    purines = pyvolve.genetics.Genetics().purines

    if divselection:
        (divomega, divsites) = divselection
    else:
        divsites = []

    assert all([1 <= r <= model.nsites for r in divsites])

    partitions = []
    for r in range(model.nsites):
        matrix = scipy.zeros((len(codons), len(codons)), dtype='float')
        for (xi, x) in enumerate(codons):
            for (yi, y) in enumerate(codons):
                ntdiffs = [(x[j], y[j]) for j in range(3) if x[j] != y[j]]
                if len(ntdiffs) == 1:
                    (xnt, ynt) = ntdiffs[0]
                    qxy = 1.0
                    if (xnt in purines) == (ynt in purines):
                        qxy *= model.kappa
                    (xaa, yaa) = (codon_dict[x], codon_dict[y])
                    fxy = 1.0
                    if xaa != yaa:
                        if type(
                                model
                        ) == phydmslib.models.ExpCM_empirical_phi_divpressure:
                            fxy *= model.omega * (
                                1 + model.omega2 * model.deltar[r])
                        elif r + 1 in divsites:
                            fxy *= divomega
                        else:
                            fxy *= model.omega
                    if type(model) in [
                            phydmslib.models.ExpCM,
                            phydmslib.models.ExpCM_empirical_phi,
                            phydmslib.models.ExpCM_empirical_phi_divpressure
                    ]:
                        qxy *= model.phi[NT_TO_INDEX[ynt]]
                        pix = model.pi[r][AA_TO_INDEX[xaa]]**model.beta
                        piy = model.pi[r][AA_TO_INDEX[yaa]]**model.beta
                        if abs(pix - piy) > ALMOST_ZERO:
                            fxy *= math.log(piy / pix) / (1.0 - pix / piy)
                    elif type(model) == phydmslib.models.YNGKP_M0:
                        for p in range(3):
                            qxy *= model.phi[p][NT_TO_INDEX[y[p]]]
                    else:
                        raise ValueError("Can't handle model type {0}".format(
                            type(model)))
                    matrix[xi][yi] = model.mu * qxy * fxy
            matrix[xi][xi] = -matrix[xi].sum()

        # create model in way that captures annoying print statements in pyvolve
        old_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')
        try:
            m = pyvolve.Model("custom", {"matrix": matrix})
        finally:
            sys.stdout.close()
            sys.stdout = old_stdout
        partitions.append(pyvolve.Partition(models=m, size=1))

    return partitions
示例#29
0
def cli(gnumber,
        glist,
        gtree,
        edprob,
        gsize,
        glen_range,
        dnds,
        tau=None,
        delrate=0.0,
        from_al=None,
        protlike=False,
        no_syn=False,
        sub_rate=1.0,
        min_cons=0.0,
        outdir=""):
    """Extract genome content based on a list of species """
    gleaf = []
    no_edit = []
    tree = None
    if gnumber:
        gleaf = ['Genome_{}'.format(i) for i in range(1, gnumber + 1)]
    elif glist:
        with open(glist) as G:
            for line in Glist:
                line = line.strip()
                if line and not line.startswith('#'):
                    gleaf.append(line.strip('-_'))
                    if line.startswith('-') or line.startswith('_'):
                        no_edit.append(line.strip('-_'))
    elif gtree:
        tree = Tree(gtree)
        gleaf = tree.get_leaf_names()
        no_edit = [x.strip('_') for x in gleaf if x.startswith('_')]
        for node in tree:
            node.name = node.name.strip('_')

    else:
        raise NotImplementedError(
            "One of --gnumber, --glist and --gtree is needed !")

    if not tree:
        tree = Tree()
        tree.populate(len(gleaf), names_library=gleaf, random_branches=True)

    param_list = {"alpha": dnds[1], "beta": dnds[0]}
    if tau:
        param_list.update({"kappa": tau})

    if from_al:  # read codons frequencies from an existing alignment
        f = pyvolve.ReadFrequencies("codon", file=from_al)
        param_list.update({'state_freqs': f.compute_frequencies()})

    #print(tree.get_ascii(show_internal=True, attributes=['name', 'dist']))
    phylogeny = pyvolve.read_tree(tree=tree.write(format=5),
                                  scale_tree=sub_rate)
    codon_model = pyvolve.Model("codon", param_list)  #, neutral_scaling=True)
    sequences = []
    edited_sequences = []
    truth_tables = []
    # add height to tree
    tree = add_height_to_tree(tree)

    for i in range(gsize):
        # gene length is given from an uniform distribution
        alen = np.random.randint(glen_range[0], glen_range[1]) * 3
        seq = simulate_genomes(codon_model, phylogeny, alen, outdir, i + 1)
        if delrate:
            seq = random_deletion(seq, tree, alen // 3, delrate)
        if protlike:
            for k in seq:
                seq[k] = 'ATG' + seq[k]
        sequences.append(seq)
        edited_seq, truth_table = CtoUsimulate(seq,
                                               tree,
                                               no_edit,
                                               edprob,
                                               no_syn=no_syn,
                                               min_cons=min_cons)
        edited_sequences.append(edited_seq)
        truth_tables.append(truth_table)
        save_data(tree, seq, edited_seq, truth_table, outdir, i + 1)
# This example script demonstrates how to evolve according to a simple codon mutation-selection (MutSel) model.
# For a MutSel model, you must supply either fitness values or equilibrium frequencies. Mutation rates are set as default (equal).

import pyvolve
import numpy as np  # imported to generate example mutation-selection model parameters

# Define a phylogeny, from a file containing a newick tree
my_tree = pyvolve.read_tree(file="file_with_tree.tre")

# Define a mutation-selection model, specifying a first argument of "MutSel". These models that you specify either a list of *fitness* values or a list of *equilibrium frequencies* in a parameters dictionary. See the user manual for more information here! Below are examples of acceptable dictionaries
parameters_fitness1 = {
    "fitness": np.random.uniform(-5, 5, size=61)
}  # Numpy array of length 61 defines codon fitness
parameters_fitness2 = {
    "fitness": np.random.uniform(-5, 5, size=20)
}  # Numpy array of length 20 defines amino-acid fitness, which are applied to codons such that synonymous codons will have the same fitness
parameters_freqs = {
    "state_freqs": np.repeat(1. / 61, 61)
}  # Numpy array of equal frequencies, just as an example! This list must sum to 1!

my_model = pyvolve.Model(
    "MutSel", parameters_fitness1
)  # Any of the above parameters dictionaries are acceptable as the second argument!

# Assign the model to a pyvolve.Partition. The size argument indicates to evolve 250 codon positions
my_partition = pyvolve.Partition(models=my_model, size=250)

# Evolve!
my_evolver = pyvolve.Evolver(partitions=my_partition, tree=my_tree)
my_evolver()