def __init__(self, parent, base_dir=None, short_name=None): # Save parent # self.parent = parent # Base dir # if not base_dir: self.base_dir = self.parent.p.graphs_dir else: self.base_dir = base_dir # Short name # if short_name: self.short_name = short_name # Paths # self.path = FilePath(self.base_dir + self.short_name + '.pdf') self.csv_path = self.path.replace_extension('csv') # Extra # self.dev_mode = False
class Graph(object): width = 12.0 height = 7.0 bottom = 0.14 top = 0.93 left = 0.06 right = 0.98 formats = ('pdf',) def __init__(self, parent, base_dir=None, short_name=None): # Save parent # self.parent = parent # Base dir # if not base_dir: self.base_dir = self.parent.p.graphs_dir else: self.base_dir = base_dir # Short name # if short_name: self.short_name = short_name # Paths # self.path = FilePath(self.base_dir + self.short_name + '.pdf') self.csv_path = self.path.replace_extension('csv') # Extra # self.dev_mode = False def save_plot(self, fig, axes, width=None, height=None, bottom=None, top=None, left=None, right=None, sep=()): # Attributes or parameters # w = width if width != None else self.width h = height if height != None else self.height b = bottom if bottom != None else self.bottom t = top if top != None else self.top l = left if left != None else self.left r = right if right != None else self.right # Adjust # fig.set_figwidth(w) fig.set_figheight(h) fig.subplots_adjust(hspace=0.0, bottom=b, top=t, left=l, right=r) # Data and source # if self.dev_mode: fig.text(0.99, 0.98, time.asctime(), horizontalalignment='right') job_name = os.environ.get('SLURM_JOB_NAME', 'Unnamed') user_msg = 'user: %s, job: %s' % (getpass.getuser(), job_name) fig.text(0.01, 0.98, user_msg, horizontalalignment='left') # Nice digit grouping # if 'x' in sep: seperate = lambda x,pos: split_thousands(x) axes.xaxis.set_major_formatter(matplotlib.ticker.FuncFormatter(seperate)) if 'y' in sep: seperate = lambda y,pos: split_thousands(y) axes.yaxis.set_major_formatter(matplotlib.ticker.FuncFormatter(seperate)) # Save it as different formats # for ext in self.formats: fig.savefig(self.path.replace_extension(ext))
def __init__(self, parent): # Save parent # self.stat, self.parent = parent, parent self.tax = parent.tax # Paths # self.p = AutoPaths(self.parent.p.unifrac_dir, self.all_paths) # Files # self.clustalo_aligned = FASTA(self.p.clustalo_align) self.pynast_aligned = FASTA(self.p.pynast_align) self.mothur_aligned = FASTA(self.p.mothur_align) self.raxml_tree = FilePath(self.p.raxml_tree) self.fasttree_tree = FilePath(self.p.fasttree_tree) self.distances_csv = CSVTable(self.p.distances_csv) # Graphs # self.nmds = NMDS(self, self.distances_csv, calc_distance=False)
def __init__(self, cluster): # Save parent # self.cluster, self.parent = cluster, cluster # Inherited # self.samples = self.parent.samples # Paths # self.base_dir = self.parent.p.otus_dir + self.short_name + '/' self.p = AutoPaths(self.base_dir, self.all_paths) # Main FASTA file # self.reads = self.parent.reads # Files # self.all_otus = FilePath(self.p.all_otus) self.all_centers = FASTA(self.p.all_centers) self.otus = FilePath(self.base_dir + "otus.txt") self.centers = FASTA(self.base_dir + "centers.fasta") # Taxonomy # self.taxonomy_silva = CrestTaxonomy(self.centers, self, 'silvamod', self.p.silva) self.taxonomy_fw = CrestTaxonomy(self.centers, self, 'freshwater', self.p.fw_dir) # Preferred one # self.taxonomy = self.taxonomy_silva
def __init__(self, json_path, out_dir): # Attributes # self.out_dir = out_dir self.json_path = FilePath(json_path) # Parse # with open(json_path) as handle: self.info = json.load(handle) # Basic # self.run_num = self.info['run_num'] self.project_short_name = self.info['project'] self.project_long_name = self.info['project_name'] # Own attributes # self.num = self.info['sample_num'] self.short_name = self.info['sample'] self.long_name = self.info['sample_name'] self.name = 'run%i_sample%i' % (self.run_num, self.num) self.group = self.info['group'] self.id_name = "run%03d-sample%02d" % (self.run_num, self.num) # SFF files # self.sff_files_info = self.info['files'] for f in self.sff_files_info: if not os.path.exists(f['path']): raise Exception("No file at %s" % f['path']) # Automatic paths # self.base_dir = self.out_dir + self.id_name + '/' self.p = AutoPaths(self.base_dir, self.all_paths) # Make an alias to the json # self.p.info_json.link_from(self.json_path, safe=True) # Pool dummy # self.pool, self.parent = self, self # Other dummy variables # self.bar_len = 0 self.gziped = False self.used = True # Primer # self.primer_regex = re.compile(self.info['primer']) # Raw files # self.raw_fasta = FASTA(self.p.raw_fasta) self.raw_fastq = FASTQ(self.p.raw_fastq) # Standard FASTA # self.reads = FASTA(self.p.reads_fasta) self.fasta = FASTA(self.p.renamed) # Special FASTQ # self.fastq = FASTQ(self.p.reads_fastq) # A shameless hack for cdhit to work # self.renamed = self.fastq # Pre-denoised special case # if self.info['predenoised'] and False: self.sff_files_info = [] self.reads.link_from(self.info['predenoised'], safe=True)
def __init__(self, cluster): # Save parent # self.cluster, self.parent = cluster, cluster # Inherited # self.samples = self.parent.samples # Paths # self.base_dir = self.parent.p.otus_dir + self.short_name + '/' self.p = AutoPaths(self.base_dir, self.all_paths) # Main reads file here FASTQ # self.reads = FASTQ(self.p.all_reads) # Files # self.cdhit_clusters = FilePath(self.p.clstr) self.cdhit_centers = FASTA(self.p.clusters_dir + "OTU") self.centers = FASTA(self.p.centers) # Taxonomy # self.taxonomy_silva = CrestTaxonomy(self.centers, self, 'silvamod', self.p.silva) self.taxonomy_fw = CrestTaxonomy(self.centers, self, 'freshwater', self.p.fw_dir) # Preferred one # self.taxonomy = self.taxonomy_silva
class UclustOTUs(OTUs): """Will use uclust via the qimme wraper to create OTU clusters from a given FASTA file http://qiime.org/scripts/pick_otus.html""" short_name = 'uclust' title = 'UCLUST-QIIME denovo picking' all_paths = """ /clusters/clusters.uc /clusters/qiime.log /clusters/all_otus.txt /clusters/all_centers.fasta /centers.fasta /otus.txt /taxonomy_silva/ /taxonomy_fw/ /graphs/ """ def __repr__(self): return '<%s object of %s>' % (self.__class__.__name__, self.parent) def __init__(self, cluster): # Save parent # self.cluster, self.parent = cluster, cluster # Inherited # self.samples = self.parent.samples # Paths # self.base_dir = self.parent.p.otus_dir + self.short_name + '/' self.p = AutoPaths(self.base_dir, self.all_paths) # Main FASTA file # self.reads = self.parent.reads # Files # self.all_otus = FilePath(self.p.all_otus) self.all_centers = FASTA(self.p.all_centers) self.otus = FilePath(self.base_dir + "otus.txt") self.centers = FASTA(self.base_dir + "centers.fasta") # Taxonomy # self.taxonomy_silva = CrestTaxonomy(self.centers, self, 'silvamod', self.p.silva) self.taxonomy_fw = CrestTaxonomy(self.centers, self, 'freshwater', self.p.fw_dir) # Preferred one # self.taxonomy = self.taxonomy_silva def run(self): # Clean # shutil.rmtree(self.p.clusters_dir) # Run command # pick_otus = sh.Command('pick_otus.py') pick_otus('-m', 'uclust', '-i', self.reads, '-o', self.p.clusters_dir) # Move into place # base_name = self.p.clusters_dir + self.reads.prefix shutil.move(base_name + '_otus.txt', self.all_otus) shutil.move(base_name + '_otus.log', self.p.qiime_log) shutil.move(base_name + '_clusters.uc', self.p.clusters_uc) # Remove OTUs that are only one read # def filter_singletons(f): for line in f: line = line.split() if len(line) > 2: yield '\t'.join(line) + '\n' self.otus.writelines(filter_singletons(self.all_otus)) # Create the centers file that is missing # pick_rep = sh.Command('pick_rep_set.py') pick_rep('-i', self.all_otus, '-f', self.reads, '-o', self.all_centers) # Remake the centers file without the filtered OTUs # self.otus_to_keep = [line.split()[0] for line in self.otus] def filter_otus(f): for seq in f: if seq.id in self.otus_to_keep: yield seq self.centers.write(filter_otus(self.all_centers)) @property_cached def cluster_counts_table(self): """Create the unfiltered OTU table""" # Put results in a dict of dicts # result = defaultdict(lambda: defaultdict(int)) # Loop # for line in self.otus: # Parse the line # contents = line.split() otu, reads = contents[0], contents[1:] # Parse the hits # for r in reads: nums = re.findall("run([0-9]+)_pool([0-9]+)_sample([0-9]+)_read([0-9]+)", r) if nums: run_num, pool_num, sample_num, read_num = map(int, nums[0]) sample = illumitag.runs[run_num][pool_num-1][sample_num-1] name = sample.short_name else: nums = re.findall("run([0-9]+)_sample([0-9]+)_read([0-9]+)", r) run_num, sample_num, read_num = map(int, nums[0]) sample = [s for s in illumitag.presamples+illumitag.pyrosamples if s.run_num==run_num and s.num==sample_num][0] name = sample.short_name # Count # result[otu][name] += 1 # Return # result = pandas.DataFrame(result) result = result.fillna(0) result = result.astype(int) result = result.reindex_axis(sorted(result.columns, key=natural_sort), axis=1) return result
def __init__(self, json_path, out_dir): # Attributes # self.out_dir = out_dir self.json_path = FilePath(json_path) # Parse # with open(json_path) as handle: self.info = json.load(handle) # Basic # self.account = self.info['uppmax_id'] self.run_num = self.info['run_num'] self.run_label = self.info['run_id'] self.project_short_name = self.info['project'] self.project_long_name = self.info['project_name'] self.fwd_name = self.info['forward_reads'] self.rev_name = self.info['reverse_reads'] # Own attributes # self.num = self.info['sample_num'] self.label = self.info['sample_id'] self.short_name = self.info['sample'] self.long_name = self.info['sample_name'] self.name = 'run%i_sample%i' % (self.run_num, self.num) self.group = self.info['group'] self.id_name = "run%03d-sample%02d" % (self.run_num, self.num) self.fwd_mid = self.info['forward_mid'] self.rev_mid = self.info['forward_mid'] # Automatic paths # self.base_dir = self.out_dir + self.id_name + '/' self.p = AutoPaths(self.base_dir, self.all_paths) # Special # self.primers = TwoPrimers(self) # Samples dummy # self.info['samples'] = [{ "name": self.short_name, "used": 1, "group": self.group, "dummy": 1, "num": self.num, "fwd": "", "rev": "" }] self.samples = Samples(self) self.samples.load() # Pool dummy # self.pool, self.parent = self, self # Files # self.fwd_path = home + "ILLUMITAG/INBOX/%s/%s/%s" % ( self.run_label, self.label, self.fwd_name) self.rev_path = home + "ILLUMITAG/INBOX/%s/%s/%s" % ( self.run_label, self.label, self.rev_name) self.gziped = True if self.fwd_path.endswith('gz') else False self.fwd = FASTQ(self.p.fwd) self.rev = FASTQ(self.p.rev) self.fastq = PairedFASTQ(self.fwd.path, self.rev.path, self) # Barcode length # self.bar_len = 0 # Make an alias to the json # self.p.info_json.link_from(self.json_path, safe=True) # Assembly files as children # self.assembled = Assembled(self) self.unassembled = Unassembled(self) self.children = (self.assembled, self.unassembled) self.first = self.assembled # Graphs # self.graphs = [ getattr(outcome_plots, cls_name)(self) for cls_name in outcome_plots.__all__ ] # Runner # self.runner = PresampleRunner(self) # Final # self.trimmed = FASTQ(self.p.trimmed) self.renamed = FASTQ(self.p.renamed) self.fasta = FASTA(self.p.reads_fasta)
def __new__(cls, path=None, content=None, **kwargs): handle = open(path, 'w') if path else tempfile.NamedTemporaryFile( delete=False, **kwargs) if content: handle.write(content) handle.close() return FilePath.__new__(cls, handle.name)