import Conf; import SeqGen; from Conf import *; from SeqGen import *; import generateYaml; from generateYaml import *; #conf = Conf("../conf/SeqGen.yaml"); generateYaml("/tmp"); conf = Conf("/tmp/seq.yml"); seqGen = SeqGen(conf); seqGen.GenerateRandomSequences("negative"); seqGen.GenerateRandomSequences("positive"); motif = seqGen.GenerateMotif() seqGen.embedMotifInSequence(); print "Positive Set: " for seq in seqGen.GetPositiveSet(): print seq; print "Negative Set: " for seq in seqGen.GetNegativeSet(): print seq;
import SeqGen from Conf import * from SeqGen import * import generateYaml from generateYaml import * from SeqGenUtils import * import sys directory = sys.argv[1] CreateConfFiles(directory) for confFile in findFiles(directory, '*.yml'): conf = Conf(confFile) seqGen = SeqGen(conf) filename = os.path.splitext(os.path.basename(confFile))[0] posFastaFile = directory + "/" + filename + "_pos.fa" seqGen.SetPosFileName(posFastaFile) negFastaFile = directory + "/" + filename + "_neg.fa" seqGen.SetNegFileName(negFastaFile) seqGen.GenerateRandomSequences("negative", 0) seqGen.GenerateRandomSequences("positive", 1) #seqGen.embedMotifInSequence(); # print "Positive Set: " # for seq in seqGen.GetPositiveSet(): # print seq;
import SeqGen; from Conf import *; from SeqGen import *; import generateYaml; from generateYaml import *; from SeqGenUtils import *; import sys; directory = sys.argv[1] CreateConfFiles(directory); for confFile in findFiles(directory, '*.yml'): conf = Conf(confFile); seqGen = SeqGen(conf); filename = os.path.splitext(os.path.basename(confFile))[0] negFastaFile = directory + "/" + filename + "_neg.fa"; seqGen.SetNegFileName(negFastaFile) seqGen.GenerateRandomSequences("negative", 0); #seqGen.embedMotifInSequence(); # print "Positive Set: " # for seq in seqGen.GetPositiveSet(): # print seq; # print "Negative Set: "
def main(tree_filename, output_filename, single_cols_size, paired_cols_size): """ tree_filename --- input newick-format tree filename output_filename --- output stockholm filename single|paried cols size --- number of unpaired or paired positions """ # msa_filename = 'data/test2.sto' # lo = .001 # hi = 1.5 # new_tree_filename = 'data/test3.seqgen.tree' # gen_tree(ntaxa, abl, tree_filename) tree = dendropy.Tree.get_from_path(tree_filename, 'newick') # print >> sys.stderr, "rescaling branches to be in [{0},{1})".format(lo, hi) # Tree.randomly_rescale_branches(tree, lo, hi) # with open(new_tree_filename, 'w') as f: # f.write(tree.as_newick_string() + '\n') # msa = MSA.MSA(msa_filename) # paired_cols = msa.BP.items() # paired_cols.sort() # paired_cols_size = len(paired_cols) # single_cols_size = msa.aln_len - paired_cols_size * 2 print >> sys.stderr, "{0} single cols, {1} paired cols".format(single_cols_size, paired_cols_size) tree.print_plot() oid_preorder = [n.oid for n in tree.preorder_node_iter()] r = tree.preorder_node_iter() r.next() # ignore root order = [(n.edge_length, oid_preorder.index(n.parent_node.oid)) for n in r] print >> sys.stderr, "order is", order single_model = SingleModel(SINGLE_MODEL_FULLPATH) paired_model = PairedModel(PAIRED_MODEL_FULLPATH, single_model) seqs_s = SeqGen.main(single_model.gtr.R, single_model.Frequency, order, single_cols_size, False) seqs_p = SeqGen.main(paired_model.gtr.R, paired_model.Frequency, order, paired_cols_size, True) # now print the generate seqs as stockholm file with open(output_filename, 'w') as f: l1 = single_cols_size/2 l2 = single_cols_size - l1 seqrecs = [] # DEBUG BEGIN # print("self\tparent\tbranch\tsequence") # for n in tree.preorder_node_iter(): # i = oid_preorder.index(n.oid) # odd_p = "".join(seqs_p[i][k*2] for k in xrange(paired_cols_size)) # eve_p = "".join(seqs_p[i][k*2+1] for k in xrange(paired_cols_size-1,-1,-1)) # s = seqs_s[i][:l1] # for k in xrange(paired_cols_size): s += seqs_p[i][k*2] # for k in xrange(paired_cols_size-1, -1, -1): s += seqs_p[i][k*2+1] # s += seqs_s[i][l1:] # print("{0}\t{1}\t{2}\t{3}".format(i, oid_preorder.index(n.parent_node.oid) if n.parent_node is not None else -1, n.edge_length, s)) # if n.is_leaf(): seqrecs.append(SeqRecord(Seq(s), id=n.taxon.label)) # DEBUG END # COMMENT BELOW WHILE DEBUGGING for n in tree.leaf_nodes(): i = oid_preorder.index(n.oid)# #odd_p = "".join(seqs_p[i][k*2] for k in xrange(paired_cols_size)) #eve_p = "".join(seqs_p[i][k*2+1] for k in xrange(paired_cols_size-1,-1,-1)) s = seqs_s[i][:l1] for k in xrange(paired_cols_size): s += seqs_p[i][k*2] for k in xrange(paired_cols_size-1, -1, -1): s += seqs_p[i][k*2+1] s += seqs_s[i][l1:] seqrecs.append(SeqRecord(Seq(s), id=n.taxon.label)) ss_cons = '.'*l1 + '('*paired_cols_size + ')'*paired_cols_size + '.'*l2 rf = 'x' * (single_cols_size + 2*paired_cols_size) MSA.write_stockholm(f, seqrecs, ss_cons, rf)