def main(): """ Use funcitons to: get user arguments. extract the node of interest. assess which node follows the species tree. get the average branch length. write to a table. """ treedir, outgroups, treelist, species_tree, outfile = get_args() outgroups = mod.get_file_data(outgroups) species_tree = tm.read_tree(species_tree) sp_nodes = tm.get_all_nodes(species_tree, tm.calculate_nnodes(species_tree)) species_like = {} with_deletion = {} for line in mod.get_file_data(treelist): treefile = line.split()[0] node = extract_node(line, treedir, outgroups) comparison_node = ete3_2_newick(node.children[1]) flag = 0 for child in node.children: newick_tree = ete3_2_newick(child) if flag == 1: with_deletion[treefile] = get_ave_branch_length(child) elif tm.node_with_deletion(comparison_node, newick_tree): with_deletion[treefile] = get_ave_branch_length(child) else: species_like[treefile] = get_ave_branch_length(child) flag = 1 write_table(outfile, species_like, with_deletion)
def main(): """ main. prints amusing combinations of Greek first names and English second names until the user has had enough """ first_name_lines = mod.get_file_data("first_names") first_names = [] for line in first_name_lines: name = line.split()[0] if name.isupper(): first_names.append(name.title()) family_name_lines = mod.get_file_data("family_names") family_names = [] i = 1 for line in family_name_lines: if line.startswith(str(i)): name = line.split("\t")[1].split(" ")[-1] family_names.append(name) i += 1 while True: print(f"\n\n{Fore.RED}" + random.choice(first_names) + " " + random.choice(family_names) + f"{Style.RESET_ALL}\n\n") #alternative #print("\n\n{} {}\n\n".format(random.choice(first_names), # random.choice(family_names))) opt = input("again?") if opt not in ["y", "yes"]: exit()
def main(): """Do the above.""" treedir, node_file, outfile = get_args() nodelines = mod.get_file_data(node_file) i = 0 while i < 578: flag = 1 if "full_" + str(i) + ".rooted" in nodelines: try: tree_string = mod.get_file_data(treedir + "/full_" + str(i) + ".rooted")[0] except: i += 1 continue branch_lengths = extract_branch_lengths(tree_string) for bl in branch_lengths: if bl < 0.01: flag = 0 break if flag == 1: out = open(outfile, "a") out.write("full_" + str(i) + ".rooted" + "\n") out.close() i += 1
def main(): """Load the sequences and write them as a phylip.""" path, output = read_args(sys.argv[1:]) prepath = path path += "_[0-9]*" files = glob.glob(path) seqs = {} for i in range(1, len(files) + 1): lines = mod.get_file_data(prepath + "_" + str(i) + ".phy")[1:] for line in lines: fields = re.split(r'\s{2,}', line) seq = re.sub(" ", "", fields[1]) if fields[0] in seqs: seqs[fields[0]] += seq else: seqs[fields[0]] = seq out = open(output, "w") flag = 1 for key, value in seqs.items(): if flag: out.write(str(len(seqs)) + " " + str(len(value)) + "\n") flag = 0 out.write(key + " " + value + "\n") out.close()
def main(): """Do the things.""" names = mod.get_file_data("supporters.txt") codewords = ["Stuart", "Jacob"] message = "Give your word and we rise" cipher = encrypt(message, names, codewords) print(" ".join(cipher))
def main(): """Simulate gene trees according to gene and lineage variation. Modify the site variation by gene by simulating different alpha values. also modify the clock for each site partition. """ params = variation.Settings() base_tree = variation.relaxed_tree( mod.get_file_data("../timetree.nwk")[0], 1, params.lineage_sigma2) i = 0 trees = [] alphas = [] while i < 70: relaxed_trees = [] rate = variation.get_rate(params.gene_alpha, params.gene_beta) j = 0 while j < params.n_bins_per_gene: relaxed_trees.append( variation.relaxed_tree(base_tree, rate, params.lineage_sigma2)) j += 1 trees.append(relaxed_trees) alphas.append( str(variation.get_rate(params.site_alpha, params.site_alpha))) i += 1 out_trees = open("treefiles.nwk", "w") for set_of_trees in trees: out_trees.write("\t".join(set_of_trees) + "\n") out_trees.close() out_alpha = open("alphas.txt", "w") out_alpha.write("\n".join(alphas)) out_alpha.close()
def read_codewords(self, filename): """Read the table of codewords for the encryption. Return as dict.""" codes = {} for line in mod.get_file_data(filename): fields = line.split() codes[fields[0]] = fields[1] return codes
def read_frequency_table(filename): """Create a dictionary with the frequency of all letters in english.""" freqs = {} lines = mod.get_file_data(filename) for line in lines: fields = line.split() freqs[fields[0]] = float(fields[1]) return freqs
def main(): """Read in trees and outgroups, write rooted trees.""" treedir, outgroups_file = get_args() outgroups = mod.get_file_data(outgroups_file) for i in range(len(outgroups)): tree = ete3.Tree(treedir + "/full_" + str(i) + ".treefile", format=1) tree.set_outgroup(outgroups[i]) out = open(treedir + "/full_" + str(i) + ".rooted", "w") out.write(tree.write()) out.close()
def get_rates(rates_file): """Get rates as dictionary from a paml output file.""" lines = mod.get_file_data(rates_file) rates = {} pattern = re.compile("^\s+[0-9]+\s+[0-9]+\s+\S+\s+\S+\s+[0-9]+") for line in lines: if re.match(pattern, line): fields = line.split() rates[fields[0]] = fields[3] return rates
def main(): """Modify the 70 gene trees using simulated rates.""" base_tree = mod.get_file_data("../timetree.nwk")[0] params = variation.Settings() i = 0 while i < 70: rate = variation.get_rate(params.gene_alpha, params.gene_beta) new_tree = variation.modified_tree(base_tree, rate) out = open("treefiles.nwk", "a+") out.write(new_tree + "\n") out.close() i += 1
def main(): """Using tree reading and writing funciutons, perform the above.""" sys.setrecursionlimit(20000) nodesfile, in_treedir, in_seqdir, out_seqdir = get_args() nodeslines = mod.get_file_data(nodesfile) i = 0 for line in nodeslines: subtree = tm.extract_treenode(line, in_treedir) if subtree: write_seqs(subtree, in_seqdir, out_seqdir, i, line) print() i += 1
def main(): """Do the things.""" lines = mod.get_file_data(input("Name of ciphertext file")) text = "" for line in lines: text += line base_frequencies = read_frequency_table("freq_table.tab") message_freqs = letter_frequencies(text) if is_appropriately_similar(base_frequencies, message_freqs, 10, 2): print("It is probably a transposition cipher") else: print("It is probably a substitution cipher")
def main(): """Write gene trees to a file.""" base_tree = mod.get_file_data("../timetree.nwk")[0] params = variation.Settings() rate = 1 tree = variation.relaxed_tree(base_tree, rate, params.lineage_sigma2) out = open("treefiles.nwk", "a+") i = 0 while i < 70: out.write(tree + "\n") i += 1 out.close()
def __init__(self): """Initialise stuff.""" #################### ##Begin user input## #################### self.plaintext = mod.get_file_data("message.txt")[0] self.key = "-1 3 -2 6 5 -4" self.nrow = 7 self.ncol = 6 self.codewords = self.read_codewords("codewords.txt") #################### ###End user input### #################### self.textlist = self.plaintext.split() self.keylist = list(map(int, self.key.split()))
def main(): """Generate gene trees and write to a file.""" base_tree = mod.get_file_data("../timetree.nwk")[0] params = variation.Settings() rate = 1 tree = variation.relaxed_tree(base_tree, rate, params.lineage_sigma2) out = open("treefiles.nwk", "a+") i = 0 while i < 70: rate = variation.get_rate(params.gene_alpha, params.gene_beta) new_tree = variation.modified_tree(tree, rate) out.write(new_tree + "\n") i += 1 out.close()
def main(): """Write all gene trees appropriate to a file.""" species_treefile, orthogroups_file, gene_tree_dir, outfile, extention = get_args() #species tree species_tree = tm.read_tree(species_treefile) sp_nnodes = tm.calculate_nnodes(species_tree) species_tree_nodes = tm.get_all_nodes(species_tree, sp_nnodes) #Gene trees #This is the old filterred list #candidates = mod.get_file_data("candidates") lines = mod.get_file_data(orthogroups_file) candidates = [] for line in lines[1:]: candidates.append(line.split("\t")[0]) out = open(outfile, "w") for family_name in candidates: sys.stderr.write(family_name + "\n") #print(tree_dir + "/" + family_name + extention) try: tree = tm.read_tree(gene_tree_dir + "/" + family_name + extention) except: sys.stderr.write(family_name + "not in candidates\n") continue nnodes = tm.calculate_nnodes(tree) sys.stderr.write("nodes = " + str(nnodes) + "\n") nodes = tm.get_all_nodes(tree, nnodes, [], []) backup = newick.dumps(tree) for node in nodes: sys.stderr.write("node = " + str(node) + "\n") tree = newick.loads(backup) if tm.is_duplication(node): sys.stderr.write("is duplication\n") # do the old checky sides = node.descendants for i in range(len(sides)): if tm.is_species_like(sides[i], species_tree_nodes): #print("blammo") if tm.node_with_deletion(sides[i], sides[(i + 1) % 2]): out.write(family_name + "\t" + node.name + "\n") sys.stderr.write("good node!\n\n") break
def main(): """ Run through the lines printing gene families if they have the above specification. """ dups_file, outfile = get_args() families = [] for line in mod.get_file_data(dups_file)[1:]: fields = line.split() if fields[4] == "Non-Terminal" \ and float(fields[3]) < 1.0 \ and float(fields[3]) > 0.4: if fields[0] not in families: families.append(fields[0]) out = open(outfile, "w") out.write("\n".join(families)) out.close()
def main(): """Load the sequences and write them as a phylip.""" directory, output = read_args(sys.argv[1:]) files = glob.glob(directory + "/*phy") seqs = {} for file in files: lines = mod.get_file_data(file)[1:] for line in lines: fields = re.split(r'\s{2,}', line) seq = re.sub(" ", "", fields[1]) if fields[0] in seqs: seqs[fields[0]] += seq else: seqs[fields[0]] = seq out = open(output, "w") flag = 1 for key, value in seqs.items(): if flag: out.write(str(len(seqs)) + " " + str(len(value)) + "\n") flag = 0 out.write(key + " " + value + "\n") out.close()
def main(): """Simulate gene trees for genes with gene + lineage + sitexlineage variation. This will involve a base tree for each bin per gene each gene will have these trees for each partition multiplied by the gene rate""" base_tree = mod.get_file_data("../timetree.nwk")[0] params = variation.Settings() out = open("treefiles.nwk", "w") relaxed_trees = [] i = 0 while i < 70: j = 0 rate = variation.get_rate(params.gene_alpha, params.gene_beta) while j < params.n_bins_per_gene: relaxed_trees.append( variation.relaxed_tree(base_tree, rate, params.lineage_sigma2)) j += 1 out.write("\t".join(relaxed_trees) + "\n") i += 1 out.close()
def main(): """Simulate gene trees according to gene variation. Modify the site variation by gene by simulating different alpha values. """ base_tree = mod.get_file_data("../timetree.nwk")[0] params = variation.Settings() i = 0 trees = [] alphas = [] while i < 70: rate = variation.get_rate(params.gene_alpha, params.gene_beta) trees.append(variation.modified_tree(base_tree, rate)) alphas.append(str(variation.get_rate(params.site_alpha, params.site_alpha))) i += 1 out_trees = open("treefiles.nwk", "w") out_trees.write("\n".join(trees)) out_trees.close() out_alpha = open("alphas.txt", "w") out_alpha.write("\n".join(alphas)) out_alpha.close()
def main(): """Do the things.""" message = mod.get_file_data("message.txt")[0] n = int(input("gizzus n")) solved = decrypt_null(message, n) print(solved)
def main(): """Do the things.""" text = mod.get_file_data("message.txt")[0] n = int(input("gis n")) print(decrypt(text, n))