def __init__(self, parent, base_dir, lower_bound, upper_bound): # Save parent # self.parent, self.fractions = parent, parent # Auto paths # self.base_dir = base_dir self.p = AutoPaths(self.base_dir, self.all_paths) # Bounds # self.lower_bound = lower_bound self.upper_bound = upper_bound # Size fractions # self.reads = FASTA(self.p.reads_fasta) self.refere = UchimeRef(self.p.reads, self.p.refere_dir, self) self.denovo = UchimeDenovo(self.p.reads, self.p.denovo_dir, self) # Classification # self.rdp = SimpleRdpTaxonomy(self.reads, self.p.rdp_dir) self.crest = SimpleCrestTaxonomy(self.reads, self.p.crest_dir)
class Fraction(object): """One size fraction from the quality reads.""" all_paths = """ /reads.fasta /refere/ /denovo/ /rdp/ /crest/ """ def __init__(self, parent, base_dir, lower_bound, upper_bound): # Save parent # self.parent, self.fractions = parent, parent # Auto paths # self.base_dir = base_dir self.p = AutoPaths(self.base_dir, self.all_paths) # Bounds # self.lower_bound = lower_bound self.upper_bound = upper_bound # Size fractions # self.reads = FASTA(self.p.reads_fasta) self.refere = UchimeRef(self.p.reads, self.p.refere_dir, self) self.denovo = UchimeDenovo(self.p.reads, self.p.denovo_dir, self) # Classification # self.rdp = SimpleRdpTaxonomy(self.reads, self.p.rdp_dir) self.crest = SimpleCrestTaxonomy(self.reads, self.p.crest_dir) def extract(self): self.fractions.pool.quality_reads.only_used.extract_length(self.lower_bound, self.upper_bound, self.reads) return self.reads def check_chimeras(self): self.refere.check() self.denovo.check() return self.refere, self.denovo def check_classification(self): self.rdp.assign()
# Make fraction graph # proj.graphs[-1].plot() # Get statistics # proj.reporter.fraction_discarded # Get clustering values # r1, r2 = list(set([p.run for p in proj])) r1.parse_report_xml() r2.parse_report_xml() print float(r1.report_stats['fwd']['DensityPF']) / float(r1.report_stats['fwd']['DensityRaw']) print float(r2.report_stats['fwd']['DensityPF']) / float(r2.report_stats['fwd']['DensityRaw']) # Check below 400 bp sequences # folder = DirectoryPath(illumitag.projects['evaluation'].base_dir + "below_400/") over = FASTA(folder + "reads.fasta") def over_iterator(reads, max_length=400): for read in reads: if len(read) <= max_length: yield read over.create() for pool in pools: over.add_iterator(over_iterator(pool.good_barcodes.assembled.good_primers.qual_filtered)) over.close() over.graphs[-1].plot() crest = SimpleCrestTaxonomy(over, folder) crest.assign() crest.composition.graph.plot() rdp = SimpleRdpTaxonomy(over, folder) rdp.assign() rdp.composition.graph.plot()
print float(r2.report_stats['fwd']['DensityPF']) / float(r2.report_stats['fwd']['DensityRaw']) # Check below 400 bp sequences # folder = DirectoryPath(illumitag.projects['evaluation'].base_dir + "below_400/") over = FASTA(folder + "reads.fasta") def over_iterator(reads, max_length=400): for read in reads: if len(read) <= max_length: yield read over.create() for pool in pools: over.add_iterator(over_iterator(pool.good_barcodes.assembled.good_primers.qual_filtered)) over.close() over.graphs[-1].plot() crest = SimpleCrestTaxonomy(over, folder) crest.assign() crest.composition.graph.plot() rdp = SimpleRdpTaxonomy(over, folder) rdp.assign() rdp.composition.graph.plot() # Check unassembled mate pairs # unassembled = [p.good_barcodes.unassembled for p in pools] paths = [u.flipped_reads.path for u in unassembled] folder = DirectoryPath(illumitag.projects['evaluation'].base_dir + "unassembled_taxonomy/") all_unassembled = FASTA(folder + 'unassembled_reads.fasta') shell_output('cat %s > %s' % (' '.join(paths), all_unassembled)) tax = SimpleRdpTaxonomy(all_unassembled, folder) tax.assign() tax.composition.graph.plot() # Upload raw samples for ENA # for pool in pools: pool.create_raw_samples()