def main(cmdline=None): parser = make_parser() args = parser.parse_args(cmdline) configure_logging(args) if not validate_path_args(args): parser.error('Please set required parameters') if not (validate_library_file_existance(args) and validate_experiment_file_existance(args)): parser.error('Fix path to files') sep = get_seperator(args.sep) library_filenames = args.libraries library_filenames.extend(args.other_libraries) libraries = models.load_library_tables(library_filenames, sep) read1 = dict(find_fastqs(libraries, 'read_1')) if 'read_2' in libraries.columns: read2 = dict(find_fastqs(libraries, 'read_2')) else: read2 = {} dags = generate_star_rsem_analysis(args, libraries, read1, read2) generate_combined_analysis(args, dags) return 0
def main(cmdline=None): parser = make_parser() args = parser.parse_args(cmdline) configure_logging(args) sep = get_seperator(args.sep) if not validate_library_file_existance(args): parser.error('Fix incorrect library file names') library_filenames = args.libraries if len(library_filenames) == 0: parser.error('Need library information table') libraries = load_library_tables(library_filenames, sep) custom_tracks = [] for library_id, library in libraries.iterrows(): if args.bigwig: custom_tracks.extend( make_bigwig_custom_tracks(library, args.web_root, args.root)) if args.bam: custom_tracks.append( make_bam_custom_track(library, args.web_root, args.root)) print(os.linesep.join(custom_tracks))
def main(cmdline=None): parser = ArgumentParser() parser.add_argument('-n', '--experiment-name', required=True, help='Experiment name to select') add_metadata_arguments(parser) add_debug_arguments(parser) args = parser.parse_args(cmdline) configure_logging(args) header_printed = False libraries = load_library_tables(args.libraries) experiments = load_experiments(args.experiments) replicates = experiments.loc[args.experiment_name, 'replicates'] for i, (library_id, library) in enumerate(libraries.loc[replicates].iterrows()): filename = find_library_bam_file(library) LOGGER.info(' Reading %s %d/%d', filename, i + 1, len(replicates)) mode = get_mode(filename, 'r') with pysam.AlignmentFile(filename, mode) as alignment: if not header_printed: print(str(alignment.header)) header_printed = True for read in alignment: print(read.to_string())
def main(cmdline=None): parser = make_parser() args = parser.parse_args(cmdline) configure_logging(args) sep = get_seperator(args.sep) experiments = models.load_experiments(args.experiments, sep=sep) libraries = models.load_library_tables(args.libraries, sep=sep) output_sep = get_seperator(args.output_format) output_extension = {"TAB": ".tsv", ",": ".csv"}[args.output_format] if args.transcriptome: # isoforms load_quantifications = madqc.load_transcriptome_quantifications quantification_extension = "_isoform_" + args.quantification + output_extension else: # genes load_quantifications = madqc.load_genomic_quantifications quantification_extension = "_gene_" + args.quantification + output_extension for name in experiments: filename = name + quantification_extension replicates = experiments[name] logger.info("%s %s: %s", name, args.quantification, ",".join(replicates)) quantifications = load_quantifications(replicates, libraries, args.quantification) quantifications.to_csv(filename, sep=output_sep)
def main(cmdline=None): parser = make_parser() args = parser.parse_args(cmdline) configure_logging(args) logger.debug("current directory {}".format(os.getcwd())) logger.debug("env: {}".format(os.environ)) if args.prefix: prefix = args.prefix else: bam_extension = '_genome.bam' if args.bam.endswith(bam_extension): base = make_basename(args.bam) prefix = base[:-len('_genome')] else: parser.error( 'Target prefix must be provided for non-standard bam names') if args.stranded: targets = { 'Signal.UniqueMultiple.str1.out.bg': prefix + '_minusAll.bw', 'Signal.Unique.str1.out.bg': prefix + '_minusUniq.bw', 'Signal.UniqueMultiple.str2.out.bg': prefix + '_plusAll.bw', 'Signal.Unique.str2.out.bg': prefix + '_plusUniq.bw', } else: targets = { 'Signal.UniqueMultiple.str1.out.bg': prefix + '_all.bw', 'Signal.Unique.str1.out.bg': prefix + '_uniq.bw', } star_dir = Path(args.star_dir) if args.star_dir is not None else None ucsc_tools_dir = Path( args.ucsc_tools_dir) if args.ucsc_tools_dir is not None else None run_star_to_bedgraph(args.bam, args.stranded, args.reference_prefix, star_dir) chrom_info = make_chrom_info(args.bam) for target in targets: run_bedsort(target, ucsc_tools_dir) run_bedgraph2bigwig(target, chrom_info, targets[target], ucsc_tools_dir) os.unlink(chrom_info) return 0
def main(cmdline=None): parser = make_parser() args = parser.parse_args(cmdline) configure_logging(args) sep = get_seperator(args.sep) experiments = models.load_experiments(args.experiments, sep=sep) libraries = models.load_library_tables(args.libraries, sep=sep) output_sep = get_seperator(args.output_format) output_extension = { 'TAB': '.tsv', ',': '.csv', }[args.output_format] if args.add_names: if args.gtf_cache is None: parser.error('GTF-cache is needed to add names to the quantification file') else: logger.info('Loading GTF Cache %s', args.gtf_cache) annotation = models.load_gtf_cache(args.gtf_cache) else: annotation = None if args.transcriptome: # isoforms load_quantifications = madqc.load_transcriptome_quantifications lookup_ids = models.lookup_gene_name_by_transcript_id quantification_extension = '_isoform_' + args.quantification + output_extension else: # genes load_quantifications = madqc.load_genomic_quantifications lookup_ids = models.lookup_gene_name_by_gene_id quantification_extension = '_gene_' + args.quantification + output_extension for name in experiments: filename = name + quantification_extension replicates = experiments[name] logger.info("%s %s: %s", name, args.quantification, ','.join(replicates)) quantifications = load_quantifications( replicates, libraries, args.quantification) if annotation is not None: quantifications = lookup_ids(annotation, quantifications) quantifications.to_csv(filename, sep=output_sep)
def main(cmdline=None): parser = make_parser() args = parser.parse_args(cmdline) configure_logging(args) experiments = load_experiments(args.experiments) libraries = load_library_tables(args.libraries) if args.use_experiment: try: experiments = experiments.loc[[args.use_experiment]] except KeyError: logger.error('{} was not found in {}'.format( args.use_experiment, ', '.join(list(experiments.index)))) return None if len(args.gene_type_filter) > 0: logger.info('Limiting to the following gene types {}'.format(','.join( args.gene_type_filter))) else: logger.info('Using all gene types') # ids will be None if args.gene_list_filter is None ids = load_gene_id_list(args.gene_list_filter) plot = GenesDetectedPlot( experiments, libraries, args.genome_dir, args.quantification, gene_type_filter=args.gene_type_filter, gene_list_filter=ids, ) if __name__ == '__main__': curdoc().add_root(plot.static_layout()) save(curdoc(), args.output, title=plot.title) return plot
def main(cmdline=None): parser = make_parser() args = parser.parse_args(cmdline) configure_logging(args) sep = get_seperator(args.sep) experiments = models.load_experiments(args.experiments, sep=sep) libraries = models.load_library_tables(args.libraries, sep=sep) if args.add_names: if args.gtf_cache is None: parser.error('GTF-cache is needed to add names to the quantification file') else: logger.info('Loading GTF Cache %s', args.gtf_cache) annotation = models.load_gtf_cache(args.gtf_cache) else: annotation = None loader = StarLoader(args.strand, annotation) for i, experiment in experiments.iterrows(): quantification = loader.load(experiment, libraries) loader.save(quantification, args.output_format)
def main(cmdline=None): parser = make_parser() args = parser.parse_args(cmdline) configure_logging(args) sep = get_seperator(args.sep) experiments = models.load_experiments(args.experiments, sep=sep) libraries = models.load_library_tables(args.libraries, sep=sep) gtf_cache = None if args.add_names: if args.genome_dir is None: parser.error( 'genome-dir is needed to add names to the quantification file') else: gtf_cache = GTFCache(libraries, args.genome_dir) if len(args.quantification) > 0: quantification_list = args.quantification else: quantification_list = ['FPKM'] if args.transcriptome: # isoforms RsemLoader = IsoformRsemLoader else: # genes RsemLoader = GeneRsemLoader for quantification in quantification_list: logger.info('Building expression matrix for %s', quantification) for i, experiment in experiments.iterrows(): loader = RsemLoader(quantification, gtf_cache) matrix = loader.load(experiment, libraries) loader.save(matrix, args.output_format)
def main(cmdline=None): parser = make_parser() args = parser.parse_args(cmdline) configure_logging(args) if args.version: parser.exit(0, 'version: %s\n' % (get_git_version(),)) if not validate_args(args): parser.error("Please set required parameters") sep = get_seperator(args.sep) libraries = models.load_library_tables(args.libraries, sep) read1 = dict(find_fastqs(libraries, 'read_1')) if 'read_2' in libraries.columns: read2 = dict(find_fastqs(libraries, 'read_2')) else: read2 = {} dag = generate_star_rsem_analysis(args, libraries, read1, read2) print(dag) return 0