def concatenate_alns(self): """Concatenate all alns into one aln. """ physcraper.debug("concat alns") count = 0 for gene in self.aln_all: if count == 0: aln1 = self.aln_all[gene] aln1.write(path="{}/aln1.fas".format(self.workdir), schema="fasta") count = 1 else: aln2 = self.aln_all[gene] count += 1 aln2.write(path="{}/aln{}.fas".format(self.workdir, count), schema="fasta") assert aln1.taxon_namespace == aln2.taxon_namespace aln1 = DnaCharacterMatrix.concatenate([aln1, aln2]) aln1.write(path="{}/concat.fas".format(self.workdir), schema="fasta") self.concatenated_aln = aln1
tmp_dict = {} for taxon, seq in physcraper_obj.aln.items(): aln_dict[taxon.label] = seq seqlen = len(seq) #should all be same bc aligned for spp_name in spp_dict.keys(): try: otu = random.choice(spp_dict[spp_name]) tmp_dict[spp_name] = aln_dict[otu] except KeyError: tmp_dict[spp_name] = "-" * seqlen return tmp_dict aln1 = DnaCharacterMatrix.from_dict(arbitrary_prune_fill(spp_to_otu1, gene1)) aln2 = DnaCharacterMatrix.from_dict(arbitrary_prune_fill(spp_to_otu2, gene2), taxon_namespace = aln1.taxon_namespace) concat = DnaCharacterMatrix.concatenate([aln1,aln2]) concat.write(path="concat.fas", schema="fasta") #Open the two pyscraper objects #Merge the alignements on OTT_ID? #How to force/missing data ... #Option 1: randomly select one seq from each ott ID. #Option 2: Use all pairwise? #Option 3: force mono phyly of spps? grrrrrrrrrrrrrrrr