def __init__(self, input_fasta, target, output=None, binary="blastn", makeblastdb="makeblastdb", log="/dev/null"): self.binary = binary self.params = '' self.input = input_fasta self.target = target self.output = output self.makeblastdb = makeblastdb self.log = log self.outfmt = "'6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore qlen slen'" self.binary_check() self.version_check() if not output: self.output = get_temporary_file_name() else: self.output = output self.search_cmd_tmpl = "%(binary)s -query %(input)s -db %(target)s -out %(output)s -outfmt %(outfmt)s %(params)s >> %(log)s 2>&1" self.makeblastdb_cmd_tmpl = "%(makeblastdb)s -in %(target)s -dbtype nucl >> %(log)s 2>&1" self.results_dict = {}
def __init__(self, input_fasta, target, output = None, binary = "blastn", makeblastdb = "makeblastdb", log = "/dev/null"): self.binary = binary self.params = '' self.input = input_fasta self.target = target self.output = output self.makeblastdb = makeblastdb self.log = log self.outfmt = "'6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore qlen slen'" self.binary_check() self.version_check() if not output: self.output = get_temporary_file_name() else: self.output = output self.search_cmd_tmpl = "%(binary)s -query %(input)s -db %(target)s -out %(output)s -outfmt %(outfmt)s %(params)s >> %(log)s 2>&1" self.makeblastdb_cmd_tmpl = "%(makeblastdb)s -in %(target)s -dbtype nucl >> %(log)s 2>&1" self.results_dict = {}
def generate_exclusive_figures(_object): import Oligotyping scripts_dir_path = os.path.dirname(Oligotyping.__file__) exclusive_figures_dict = {} sample_mapping_dict = get_sample_mapping_dict(_object.sample_mapping) for category in sample_mapping_dict: exclusive_figures_dict[category] = {} samples = sample_mapping_dict[category].keys() # double filter: first makes sure sample was not removed from the analysis due to losing all its reads during the # refinement, second makes sure that sample was actually mapped to something in the sample mapping file. samples = filter(lambda s: sample_mapping_dict[category][s], filter(lambda s: s in _object.samples, samples)) samples.sort() mapping_file_path = get_temporary_file_name('%s-' % category, '-mapping.txt', _object.tmp_directory) mapping_file = open(mapping_file_path, 'w') mapping_file.write('samples\t%s\n' % (category)) for sample in samples: mapping_file.write('%s\t%s\n' % (sample, sample_mapping_dict[category][sample])) mapping_file.close() if samples == _object.samples: matrix_percent_path = _object.matrix_percent_file_path matrix_count_path = _object.matrix_count_file_path else: matrix_percent_path = get_temporary_file_name( '%s-' % category, '-matrix-percent.txt', _object.tmp_directory) matrix_count_path = get_temporary_file_name( '%s-' % category, '-matrix-count.txt', _object.tmp_directory) if store_filtered_matrix(_object.matrix_percent_file_path, matrix_percent_path, samples) < 3: _object.logger.info("skipping exclusive figs for '%s'; less than 3 samples were left in MP"\ % (category)) continue if store_filtered_matrix(_object.matrix_count_file_path, matrix_count_path, samples) < 3: _object.logger.info("skipping exclusive figs for '%s'; less than 3 samples were left in MC"\ % (category)) continue # ready to roll. _object.logger.info("exclusive figs for '%s' with %d samples; mapping: '%s', MP: '%s', MC: '%s'"\ % (category, len(samples), mapping_file_path, matrix_percent_path, matrix_count_path)) for (analysis, script, output_dir) in [ ('NMDS Analysis', '../Scripts/R/metaMDS-analysis-with-metadata.R', 'nmds_analysis') ]: exclusive_figures_dict[category][output_dir] = {} target_dir = _object.generate_output_destination( '%s/%s/%s' % (os.path.basename( _object.figures_directory), category, output_dir), directory=True) for (distance_metric, matrix_file) in [("canberra", matrix_percent_path), ("kulczynski", matrix_percent_path), ("jaccard", matrix_percent_path), ("horn", matrix_percent_path), ("bray", matrix_percent_path)]: output_prefix = os.path.join(target_dir, distance_metric) cmd_line = ( '%s "%s" "%s" %s "%s" "%s" "%s" >> "%s" 2>&1' % (os.path.join(scripts_dir_path, script), matrix_file, mapping_file_path, distance_metric, category, _object.project, output_prefix, _object.log_file_path)) _object.progress.update('%s "%s" for "%s" ...' % (analysis, distance_metric, category)) _object.logger.info('exclusive figure: %s' % (cmd_line)) run_command(cmd_line) exclusive_figures_dict[category][output_dir][ distance_metric] = output_prefix # heatmap for (analysis, script, output_dir) in [ ('Heatmap Analysis', '../Scripts/R/heatmap.R', 'heatmap_analysis') ]: exclusive_figures_dict[category][output_dir] = {} target_dir = _object.generate_output_destination( '%s/%s/%s' % (os.path.basename( _object.figures_directory), category, output_dir), directory=True) for (distance_metric, matrix_file) in [("canberra", matrix_percent_path), ("kulczynski", matrix_percent_path), ("jaccard", matrix_percent_path), ("horn", matrix_percent_path), ("bray", matrix_percent_path)]: output_prefix = os.path.join(target_dir, distance_metric) cmd_line = ( '%s "%s" -m "%s" -d %s --title "%s" -o "%s" >> "%s" 2>&1' % (os.path.join(scripts_dir_path, script), matrix_file, mapping_file_path, distance_metric, _object.project, output_prefix, _object.log_file_path)) _object.progress.update('%s "%s" for "%s" ...' % (analysis, distance_metric, category)) _object.logger.info('exclusive figure: %s' % (cmd_line)) run_command(cmd_line) exclusive_figures_dict[category][output_dir][ distance_metric] = output_prefix return exclusive_figures_dict
def generate_exclusive_figures(_object): import Oligotyping scripts_dir_path = os.path.dirname(Oligotyping.__file__) exclusive_figures_dict = {} sample_mapping_dict = get_sample_mapping_dict(_object.sample_mapping) for category in sample_mapping_dict: exclusive_figures_dict[category] = {} samples = sample_mapping_dict[category].keys() # double filter: first makes sure sample was not removed from the analysis due to losing all its reads during the #  refinement, second makes sure that sample was actually mapped to something in the sample mapping file. samples = filter(lambda s: sample_mapping_dict[category][s], filter(lambda s: s in _object.samples, samples)) samples.sort() mapping_file_path = get_temporary_file_name("%s-" % category, "-mapping.txt", _object.tmp_directory) mapping_file = open(mapping_file_path, "w") mapping_file.write("samples\t%s\n" % (category)) for sample in samples: mapping_file.write("%s\t%s\n" % (sample, sample_mapping_dict[category][sample])) mapping_file.close() if samples == _object.samples: matrix_percent_path = _object.matrix_percent_file_path matrix_count_path = _object.matrix_count_file_path else: matrix_percent_path = get_temporary_file_name( "%s-" % category, "-matrix-percent.txt", _object.tmp_directory ) matrix_count_path = get_temporary_file_name("%s-" % category, "-matrix-count.txt", _object.tmp_directory) if store_filtered_matrix(_object.matrix_percent_file_path, matrix_percent_path, samples) < 3: _object.logger.info( "skipping exclusive figs for '%s'; less than 3 samples were left in MP" % (category) ) continue if store_filtered_matrix(_object.matrix_count_file_path, matrix_count_path, samples) < 3: _object.logger.info( "skipping exclusive figs for '%s'; less than 3 samples were left in MC" % (category) ) continue # ready to roll. _object.logger.info( "exclusive figs for '%s' with %d samples; mapping: '%s', MP: '%s', MC: '%s'" % (category, len(samples), mapping_file_path, matrix_percent_path, matrix_count_path) ) for (analysis, script, output_dir) in [ ("NMDS Analysis", "../Scripts/R/metaMDS-analysis-with-metadata.R", "nmds_analysis") ]: exclusive_figures_dict[category][output_dir] = {} target_dir = _object.generate_output_destination( "%s/%s/%s" % (os.path.basename(_object.figures_directory), category, output_dir), directory=True ) for (distance_metric, matrix_file) in [ ("canberra", matrix_percent_path), ("kulczynski", matrix_percent_path), ("jaccard", matrix_percent_path), ("horn", matrix_percent_path), ("bray", matrix_percent_path), ]: output_prefix = os.path.join(target_dir, distance_metric) cmd_line = '%s -o "%s" -d "%s" -m "%s" --title "%s" "%s" "%s" >> "%s" 2>&1' % ( os.path.join(scripts_dir_path, script), output_prefix, distance_metric, category, _object.project, matrix_file, mapping_file_path, _object.log_file_path, ) _object.progress.update('%s "%s" for "%s" ...' % (analysis, distance_metric, category)) _object.logger.info("exclusive figure: %s" % (cmd_line)) run_command(cmd_line) exclusive_figures_dict[category][output_dir][distance_metric] = output_prefix # heatmap for (analysis, script, output_dir) in [("Heatmap Analysis", "../Scripts/R/heatmap.R", "heatmap_analysis")]: exclusive_figures_dict[category][output_dir] = {} target_dir = _object.generate_output_destination( "%s/%s/%s" % (os.path.basename(_object.figures_directory), category, output_dir), directory=True ) for (distance_metric, matrix_file) in [ ("canberra", matrix_percent_path), ("kulczynski", matrix_percent_path), ("jaccard", matrix_percent_path), ("horn", matrix_percent_path), ("bray", matrix_percent_path), ]: output_prefix = os.path.join(target_dir, distance_metric) cmd_line = '%s "%s" -m "%s" -d %s --title "%s" -o "%s" >> "%s" 2>&1' % ( os.path.join(scripts_dir_path, script), matrix_file, mapping_file_path, distance_metric, _object.project, output_prefix, _object.log_file_path, ) _object.progress.update('%s "%s" for "%s" ...' % (analysis, distance_metric, category)) _object.logger.info("exclusive figure: %s" % (cmd_line)) run_command(cmd_line) exclusive_figures_dict[category][output_dir][distance_metric] = output_prefix return exclusive_figures_dict