def main(environment_file, sample_mapping_file=None, unit_mapping_file=None, min_abundance=0, min_sum_normalized_percent=1): samples_dict = utils.get_samples_dict_from_environment_file( environment_file) oligos = utils.get_oligos_sorted_by_abundance(samples_dict, min_abundance=min_abundance) unit_counts, unit_percents = utils.get_unit_counts_and_percents( oligos, samples_dict) if sample_mapping_file: sample_mapping = utils.get_sample_mapping_dict(sample_mapping_file) if unit_mapping_file: unit_mapping = utils.get_sample_mapping_dict(unit_mapping_file) output_file = '.'.join(environment_file.split('.')[:-1]) + '.gexf' utils.generate_gexf_network_file( oligos, samples_dict, unit_percents, output_file, sample_mapping_dict=sample_mapping if sample_mapping_file else None, unit_mapping_dict=unit_mapping if unit_mapping_file else None)
def main( environment_file, sample_mapping_file=None, unit_mapping_file=None, min_abundance=0, min_sum_normalized_percent=1 ): samples_dict = utils.get_samples_dict_from_environment_file(environment_file) oligos = utils.get_oligos_sorted_by_abundance(samples_dict, min_abundance=min_abundance) unit_counts, unit_percents = utils.get_unit_counts_and_percents(oligos, samples_dict) if sample_mapping_file: sample_mapping = utils.get_sample_mapping_dict(sample_mapping_file) if unit_mapping_file: unit_mapping = utils.get_sample_mapping_dict(unit_mapping_file) output_file = ".".join(environment_file.split(".")[:-1]) + ".gexf" utils.generate_gexf_network_file( oligos, samples_dict, unit_percents, output_file, sample_mapping_dict=sample_mapping if sample_mapping_file else None, unit_mapping_dict=unit_mapping if unit_mapping_file else None, )
def generate_exclusive_figures(_object): import Oligotyping scripts_dir_path = os.path.dirname(Oligotyping.__file__) exclusive_figures_dict = {} sample_mapping_dict = get_sample_mapping_dict(_object.sample_mapping) for category in sample_mapping_dict: exclusive_figures_dict[category] = {} samples = sample_mapping_dict[category].keys() # double filter: first makes sure sample was not removed from the analysis due to losing all its reads during the # refinement, second makes sure that sample was actually mapped to something in the sample mapping file. samples = filter(lambda s: sample_mapping_dict[category][s], filter(lambda s: s in _object.samples, samples)) samples.sort() mapping_file_path = get_temporary_file_name('%s-' % category, '-mapping.txt', _object.tmp_directory) mapping_file = open(mapping_file_path, 'w') mapping_file.write('samples\t%s\n' % (category)) for sample in samples: mapping_file.write('%s\t%s\n' % (sample, sample_mapping_dict[category][sample])) mapping_file.close() if samples == _object.samples: matrix_percent_path = _object.matrix_percent_file_path matrix_count_path = _object.matrix_count_file_path else: matrix_percent_path = get_temporary_file_name( '%s-' % category, '-matrix-percent.txt', _object.tmp_directory) matrix_count_path = get_temporary_file_name( '%s-' % category, '-matrix-count.txt', _object.tmp_directory) if store_filtered_matrix(_object.matrix_percent_file_path, matrix_percent_path, samples) < 3: _object.logger.info("skipping exclusive figs for '%s'; less than 3 samples were left in MP"\ % (category)) continue if store_filtered_matrix(_object.matrix_count_file_path, matrix_count_path, samples) < 3: _object.logger.info("skipping exclusive figs for '%s'; less than 3 samples were left in MC"\ % (category)) continue # ready to roll. _object.logger.info("exclusive figs for '%s' with %d samples; mapping: '%s', MP: '%s', MC: '%s'"\ % (category, len(samples), mapping_file_path, matrix_percent_path, matrix_count_path)) for (analysis, script, output_dir) in [ ('NMDS Analysis', '../Scripts/R/metaMDS-analysis-with-metadata.R', 'nmds_analysis') ]: exclusive_figures_dict[category][output_dir] = {} target_dir = _object.generate_output_destination( '%s/%s/%s' % (os.path.basename( _object.figures_directory), category, output_dir), directory=True) for (distance_metric, matrix_file) in [("canberra", matrix_percent_path), ("kulczynski", matrix_percent_path), ("jaccard", matrix_percent_path), ("horn", matrix_percent_path), ("bray", matrix_percent_path)]: output_prefix = os.path.join(target_dir, distance_metric) cmd_line = ( '%s "%s" "%s" %s "%s" "%s" "%s" >> "%s" 2>&1' % (os.path.join(scripts_dir_path, script), matrix_file, mapping_file_path, distance_metric, category, _object.project, output_prefix, _object.log_file_path)) _object.progress.update('%s "%s" for "%s" ...' % (analysis, distance_metric, category)) _object.logger.info('exclusive figure: %s' % (cmd_line)) run_command(cmd_line) exclusive_figures_dict[category][output_dir][ distance_metric] = output_prefix # heatmap for (analysis, script, output_dir) in [ ('Heatmap Analysis', '../Scripts/R/heatmap.R', 'heatmap_analysis') ]: exclusive_figures_dict[category][output_dir] = {} target_dir = _object.generate_output_destination( '%s/%s/%s' % (os.path.basename( _object.figures_directory), category, output_dir), directory=True) for (distance_metric, matrix_file) in [("canberra", matrix_percent_path), ("kulczynski", matrix_percent_path), ("jaccard", matrix_percent_path), ("horn", matrix_percent_path), ("bray", matrix_percent_path)]: output_prefix = os.path.join(target_dir, distance_metric) cmd_line = ( '%s "%s" -m "%s" -d %s --title "%s" -o "%s" >> "%s" 2>&1' % (os.path.join(scripts_dir_path, script), matrix_file, mapping_file_path, distance_metric, _object.project, output_prefix, _object.log_file_path)) _object.progress.update('%s "%s" for "%s" ...' % (analysis, distance_metric, category)) _object.logger.info('exclusive figure: %s' % (cmd_line)) run_command(cmd_line) exclusive_figures_dict[category][output_dir][ distance_metric] = output_prefix return exclusive_figures_dict
def generate_exclusive_figures(_object): import Oligotyping scripts_dir_path = os.path.dirname(Oligotyping.__file__) exclusive_figures_dict = {} sample_mapping_dict = get_sample_mapping_dict(_object.sample_mapping) for category in sample_mapping_dict: exclusive_figures_dict[category] = {} samples = sample_mapping_dict[category].keys() # double filter: first makes sure sample was not removed from the analysis due to losing all its reads during the #  refinement, second makes sure that sample was actually mapped to something in the sample mapping file. samples = filter(lambda s: sample_mapping_dict[category][s], filter(lambda s: s in _object.samples, samples)) samples.sort() mapping_file_path = get_temporary_file_name("%s-" % category, "-mapping.txt", _object.tmp_directory) mapping_file = open(mapping_file_path, "w") mapping_file.write("samples\t%s\n" % (category)) for sample in samples: mapping_file.write("%s\t%s\n" % (sample, sample_mapping_dict[category][sample])) mapping_file.close() if samples == _object.samples: matrix_percent_path = _object.matrix_percent_file_path matrix_count_path = _object.matrix_count_file_path else: matrix_percent_path = get_temporary_file_name( "%s-" % category, "-matrix-percent.txt", _object.tmp_directory ) matrix_count_path = get_temporary_file_name("%s-" % category, "-matrix-count.txt", _object.tmp_directory) if store_filtered_matrix(_object.matrix_percent_file_path, matrix_percent_path, samples) < 3: _object.logger.info( "skipping exclusive figs for '%s'; less than 3 samples were left in MP" % (category) ) continue if store_filtered_matrix(_object.matrix_count_file_path, matrix_count_path, samples) < 3: _object.logger.info( "skipping exclusive figs for '%s'; less than 3 samples were left in MC" % (category) ) continue # ready to roll. _object.logger.info( "exclusive figs for '%s' with %d samples; mapping: '%s', MP: '%s', MC: '%s'" % (category, len(samples), mapping_file_path, matrix_percent_path, matrix_count_path) ) for (analysis, script, output_dir) in [ ("NMDS Analysis", "../Scripts/R/metaMDS-analysis-with-metadata.R", "nmds_analysis") ]: exclusive_figures_dict[category][output_dir] = {} target_dir = _object.generate_output_destination( "%s/%s/%s" % (os.path.basename(_object.figures_directory), category, output_dir), directory=True ) for (distance_metric, matrix_file) in [ ("canberra", matrix_percent_path), ("kulczynski", matrix_percent_path), ("jaccard", matrix_percent_path), ("horn", matrix_percent_path), ("bray", matrix_percent_path), ]: output_prefix = os.path.join(target_dir, distance_metric) cmd_line = '%s -o "%s" -d "%s" -m "%s" --title "%s" "%s" "%s" >> "%s" 2>&1' % ( os.path.join(scripts_dir_path, script), output_prefix, distance_metric, category, _object.project, matrix_file, mapping_file_path, _object.log_file_path, ) _object.progress.update('%s "%s" for "%s" ...' % (analysis, distance_metric, category)) _object.logger.info("exclusive figure: %s" % (cmd_line)) run_command(cmd_line) exclusive_figures_dict[category][output_dir][distance_metric] = output_prefix # heatmap for (analysis, script, output_dir) in [("Heatmap Analysis", "../Scripts/R/heatmap.R", "heatmap_analysis")]: exclusive_figures_dict[category][output_dir] = {} target_dir = _object.generate_output_destination( "%s/%s/%s" % (os.path.basename(_object.figures_directory), category, output_dir), directory=True ) for (distance_metric, matrix_file) in [ ("canberra", matrix_percent_path), ("kulczynski", matrix_percent_path), ("jaccard", matrix_percent_path), ("horn", matrix_percent_path), ("bray", matrix_percent_path), ]: output_prefix = os.path.join(target_dir, distance_metric) cmd_line = '%s "%s" -m "%s" -d %s --title "%s" -o "%s" >> "%s" 2>&1' % ( os.path.join(scripts_dir_path, script), matrix_file, mapping_file_path, distance_metric, _object.project, output_prefix, _object.log_file_path, ) _object.progress.update('%s "%s" for "%s" ...' % (analysis, distance_metric, category)) _object.logger.info("exclusive figure: %s" % (cmd_line)) run_command(cmd_line) exclusive_figures_dict[category][output_dir][distance_metric] = output_prefix return exclusive_figures_dict