示例#1
0
def main(cmdline=None):
    parser = make_parser()
    args = parser.parse_args(cmdline)

    configure_logging(args)

    if not validate_path_args(args):
        parser.error('Please set required parameters')

    if not (validate_library_file_existance(args) and
            validate_experiment_file_existance(args)):
        parser.error('Fix path to files')

    sep = get_seperator(args.sep)
    library_filenames = args.libraries
    library_filenames.extend(args.other_libraries)

    libraries = models.load_library_tables(library_filenames, sep)
    read1 = dict(find_fastqs(libraries, 'read_1'))
    if 'read_2' in libraries.columns:
        read2 = dict(find_fastqs(libraries, 'read_2'))
    else:
        read2 = {}

    dags = generate_star_rsem_analysis(args, libraries, read1, read2)
    generate_combined_analysis(args, dags)

    return 0
示例#2
0
def main(cmdline=None):
    parser = make_parser()
    args = parser.parse_args(cmdline)

    configure_logging(args)

    sep = get_seperator(args.sep)

    if not validate_library_file_existance(args):
        parser.error('Fix incorrect library file names')

    library_filenames = args.libraries
    if len(library_filenames) == 0:
        parser.error('Need library information table')

    libraries = load_library_tables(library_filenames, sep)

    custom_tracks = []
    for library_id, library in libraries.iterrows():
        if args.bigwig:
            custom_tracks.extend(
                make_bigwig_custom_tracks(library, args.web_root, args.root))

        if args.bam:
            custom_tracks.append(
                make_bam_custom_track(library, args.web_root, args.root))

    print(os.linesep.join(custom_tracks))
def main(cmdline=None):
    parser = ArgumentParser()
    parser.add_argument('-n',
                        '--experiment-name',
                        required=True,
                        help='Experiment name to select')
    add_metadata_arguments(parser)
    add_debug_arguments(parser)
    args = parser.parse_args(cmdline)

    configure_logging(args)

    header_printed = False
    libraries = load_library_tables(args.libraries)
    experiments = load_experiments(args.experiments)

    replicates = experiments.loc[args.experiment_name, 'replicates']

    for i, (library_id,
            library) in enumerate(libraries.loc[replicates].iterrows()):
        filename = find_library_bam_file(library)
        LOGGER.info('  Reading %s %d/%d', filename, i + 1, len(replicates))

        mode = get_mode(filename, 'r')
        with pysam.AlignmentFile(filename, mode) as alignment:
            if not header_printed:
                print(str(alignment.header))
                header_printed = True

            for read in alignment:
                print(read.to_string())
def main(cmdline=None):
    parser = make_parser()
    args = parser.parse_args(cmdline)

    configure_logging(args)

    sep = get_seperator(args.sep)
    experiments = models.load_experiments(args.experiments, sep=sep)
    libraries = models.load_library_tables(args.libraries, sep=sep)

    output_sep = get_seperator(args.output_format)
    output_extension = {"TAB": ".tsv", ",": ".csv"}[args.output_format]

    if args.transcriptome:
        # isoforms
        load_quantifications = madqc.load_transcriptome_quantifications
        quantification_extension = "_isoform_" + args.quantification + output_extension
    else:
        # genes
        load_quantifications = madqc.load_genomic_quantifications
        quantification_extension = "_gene_" + args.quantification + output_extension

    for name in experiments:
        filename = name + quantification_extension
        replicates = experiments[name]
        logger.info("%s %s: %s", name, args.quantification, ",".join(replicates))
        quantifications = load_quantifications(replicates, libraries, args.quantification)
        quantifications.to_csv(filename, sep=output_sep)
def main(cmdline=None):
    parser = make_parser()
    args = parser.parse_args(cmdline)

    configure_logging(args)

    logger.debug("current directory {}".format(os.getcwd()))
    logger.debug("env: {}".format(os.environ))

    if args.prefix:
        prefix = args.prefix
    else:
        bam_extension = '_genome.bam'
        if args.bam.endswith(bam_extension):
            base = make_basename(args.bam)
            prefix = base[:-len('_genome')]
        else:
            parser.error(
                'Target prefix must be provided for non-standard bam names')

    if args.stranded:
        targets = {
            'Signal.UniqueMultiple.str1.out.bg': prefix + '_minusAll.bw',
            'Signal.Unique.str1.out.bg': prefix + '_minusUniq.bw',
            'Signal.UniqueMultiple.str2.out.bg': prefix + '_plusAll.bw',
            'Signal.Unique.str2.out.bg': prefix + '_plusUniq.bw',
        }
    else:
        targets = {
            'Signal.UniqueMultiple.str1.out.bg': prefix + '_all.bw',
            'Signal.Unique.str1.out.bg': prefix + '_uniq.bw',
        }

    star_dir = Path(args.star_dir) if args.star_dir is not None else None
    ucsc_tools_dir = Path(
        args.ucsc_tools_dir) if args.ucsc_tools_dir is not None else None

    run_star_to_bedgraph(args.bam, args.stranded, args.reference_prefix,
                         star_dir)
    chrom_info = make_chrom_info(args.bam)

    for target in targets:
        run_bedsort(target, ucsc_tools_dir)
        run_bedgraph2bigwig(target, chrom_info, targets[target],
                            ucsc_tools_dir)
    os.unlink(chrom_info)

    return 0
def main(cmdline=None):
    parser = make_parser()
    args = parser.parse_args(cmdline)

    configure_logging(args)

    sep = get_seperator(args.sep)
    experiments = models.load_experiments(args.experiments, sep=sep)
    libraries = models.load_library_tables(args.libraries, sep=sep)

    output_sep = get_seperator(args.output_format)
    output_extension = {
        'TAB': '.tsv',
        ',': '.csv',
    }[args.output_format]

    if args.add_names:
        if args.gtf_cache is None:
            parser.error('GTF-cache is needed to add names to the quantification file')
        else:
            logger.info('Loading GTF Cache %s', args.gtf_cache)
            annotation = models.load_gtf_cache(args.gtf_cache)
    else:
        annotation = None

    if args.transcriptome:
        # isoforms
        load_quantifications = madqc.load_transcriptome_quantifications
        lookup_ids = models.lookup_gene_name_by_transcript_id
        quantification_extension = '_isoform_' + args.quantification + output_extension
    else:
        # genes
        load_quantifications = madqc.load_genomic_quantifications
        lookup_ids = models.lookup_gene_name_by_gene_id
        quantification_extension = '_gene_' + args.quantification + output_extension

    for name in experiments:
        filename = name + quantification_extension
        replicates = experiments[name]
        logger.info("%s %s: %s",
                    name, args.quantification, ','.join(replicates))
        quantifications = load_quantifications(
            replicates, libraries, args.quantification)

        if annotation is not None:
            quantifications = lookup_ids(annotation, quantifications)

        quantifications.to_csv(filename, sep=output_sep)
def main(cmdline=None):
    parser = make_parser()
    args = parser.parse_args(cmdline)

    configure_logging(args)

    experiments = load_experiments(args.experiments)
    libraries = load_library_tables(args.libraries)
    if args.use_experiment:
        try:
            experiments = experiments.loc[[args.use_experiment]]
        except KeyError:
            logger.error('{} was not found in {}'.format(
                args.use_experiment, ', '.join(list(experiments.index))))
            return None

    if len(args.gene_type_filter) > 0:
        logger.info('Limiting to the following gene types {}'.format(','.join(
            args.gene_type_filter)))
    else:
        logger.info('Using all gene types')

    # ids will be None if args.gene_list_filter is None
    ids = load_gene_id_list(args.gene_list_filter)

    plot = GenesDetectedPlot(
        experiments,
        libraries,
        args.genome_dir,
        args.quantification,
        gene_type_filter=args.gene_type_filter,
        gene_list_filter=ids,
    )

    if __name__ == '__main__':
        curdoc().add_root(plot.static_layout())
        save(curdoc(), args.output, title=plot.title)

    return plot
def main(cmdline=None):
    parser = make_parser()
    args = parser.parse_args(cmdline)

    configure_logging(args)

    sep = get_seperator(args.sep)
    experiments = models.load_experiments(args.experiments, sep=sep)
    libraries = models.load_library_tables(args.libraries, sep=sep)

    if args.add_names:
        if args.gtf_cache is None:
            parser.error('GTF-cache is needed to add names to the quantification file')
        else:
            logger.info('Loading GTF Cache %s', args.gtf_cache)
            annotation = models.load_gtf_cache(args.gtf_cache)
    else:
        annotation = None

    loader = StarLoader(args.strand, annotation)

    for i, experiment in experiments.iterrows():
        quantification = loader.load(experiment, libraries)
        loader.save(quantification, args.output_format)
def main(cmdline=None):
    parser = make_parser()
    args = parser.parse_args(cmdline)

    configure_logging(args)

    sep = get_seperator(args.sep)
    experiments = models.load_experiments(args.experiments, sep=sep)
    libraries = models.load_library_tables(args.libraries, sep=sep)

    gtf_cache = None
    if args.add_names:
        if args.genome_dir is None:
            parser.error(
                'genome-dir is needed to add names to the quantification file')
        else:
            gtf_cache = GTFCache(libraries, args.genome_dir)

    if len(args.quantification) > 0:
        quantification_list = args.quantification
    else:
        quantification_list = ['FPKM']

    if args.transcriptome:
        # isoforms
        RsemLoader = IsoformRsemLoader
    else:
        # genes
        RsemLoader = GeneRsemLoader

    for quantification in quantification_list:
        logger.info('Building expression matrix for %s', quantification)
        for i, experiment in experiments.iterrows():
            loader = RsemLoader(quantification, gtf_cache)
            matrix = loader.load(experiment, libraries)
            loader.save(matrix, args.output_format)
示例#10
0
def main(cmdline=None):
    parser = make_parser()
    args = parser.parse_args(cmdline)

    configure_logging(args)

    if args.version:
        parser.exit(0, 'version: %s\n' % (get_git_version(),))

    if not validate_args(args):
        parser.error("Please set required parameters")

    sep = get_seperator(args.sep)
    libraries = models.load_library_tables(args.libraries, sep)
    read1 = dict(find_fastqs(libraries, 'read_1'))
    if 'read_2' in libraries.columns:
        read2 = dict(find_fastqs(libraries, 'read_2'))
    else:
        read2 = {}

    dag = generate_star_rsem_analysis(args, libraries, read1, read2)
    print(dag)
    
    return 0