Exemplo n.º 1
0
def main():
    from optparse import OptionParser
    parser = OptionParser()

    ##
    ## Psi utilities
    ##
    parser.add_option("--compare-samples", dest="samples_to_compare",
                      nargs=3, default=None,
                      help="Compute comparison statistics between the two " \
                      "given samples. Expects three directories: the first is " \
                      "sample1's MISO output, the second is sample2's MISO " \
                      "output, and the third is the directory where " \
                      "results of the sample comparison will be outputted.")
    parser.add_option("--comparison-labels",
                      dest="comparison_labels",
                      nargs=2,
                      default=None,
                      help="Use these labels for the sample comparison "
                      "made by --compare-samples. "
                      "Takes two arguments: the label for sample 1 "
                      "and the label for sample 2, where sample 1 and "
                      "sample 2 correspond to the order of samples given "
                      "to --compare-samples.")
    parser.add_option("--use-compressed",
                      dest="use_compressed",
                      nargs=1,
                      default=None,
                      help="Use compressed event IDs. Takes as input a "
                      "genes_to_filenames.shelve file produced by the "
                      "index_gff script.")
    (options, args) = parser.parse_args()

    if options.samples_to_compare is None:
        greeting()

    use_compressed = None
    if options.use_compressed is not None:
        use_compressed = \
            os.path.abspath(os.path.expanduser(options.use_compressed))
        if not os.path.exists(use_compressed):
            print "Error: mapping filename from event IDs to compressed IDs %s " \
                  "is not found." %(use_compressed)
            sys.exit(1)
        else:
            print "Compression being used."

    if options.samples_to_compare is not None:
        sample1_dirname = os.path.abspath(options.samples_to_compare[0])
        sample2_dirname = os.path.abspath(options.samples_to_compare[1])
        output_dirname = os.path.abspath(options.samples_to_compare[2])
        if not os.path.isdir(output_dirname):
            print "Making comparisons directory: %s" % (output_dirname)
            misc_utils.make_dir(output_dirname)
        ht.output_samples_comparison(sample1_dirname,
                                     sample2_dirname,
                                     output_dirname,
                                     sample_labels=options.comparison_labels,
                                     use_compressed=use_compressed)
Exemplo n.º 2
0
def main():
    from optparse import OptionParser
    parser = OptionParser()
    
    ##
    ## Psi utilities
    ##
    parser.add_option("--compare-samples", dest="samples_to_compare",
                      nargs=3, default=None,
                      help="Compute comparison statistics between the two " \
                      "given samples. Expects three directories: the first is " \
                      "sample1's MISO output, the second is sample2's MISO " \
                      "output, and the third is the directory where " \
                      "results of the sample comparison will be outputted.")
    parser.add_option("--comparison-labels", dest="comparison_labels",
                      nargs=2, default=None,
                      help="Use these labels for the sample comparison "
                      "made by --compare-samples. "
                      "Takes two arguments: the label for sample 1 "
                      "and the label for sample 2, where sample 1 and "
                      "sample 2 correspond to the order of samples given "
                      "to --compare-samples.")
    parser.add_option("--use-compressed", dest="use_compressed",
                      nargs=1, default=None,
                      help="Use compressed event IDs. Takes as input a "
                      "genes_to_filenames.shelve file produced by the "
                      "index_gff script.")
    (options, args) = parser.parse_args()

    if options.samples_to_compare is None:
        greeting()

    use_compressed = None
    if options.use_compressed is not None:
        use_compressed = \
            os.path.abspath(os.path.expanduser(options.use_compressed))
        if not os.path.exists(use_compressed):
            print "Error: mapping filename from event IDs to compressed IDs %s " \
                  "is not found." %(use_compressed)
            sys.exit(1)
        else:
            print "Compression being used."
            
    if options.samples_to_compare is not None:
        sample1_dirname = os.path.abspath(options.samples_to_compare[0])
	sample2_dirname = os.path.abspath(options.samples_to_compare[1])
	output_dirname = os.path.abspath(options.samples_to_compare[2])
	if not os.path.isdir(output_dirname):
            print "Making comparisons directory: %s" %(output_dirname)
            misc_utils.make_dir(output_dirname)
	ht.output_samples_comparison(sample1_dirname,
                                     sample2_dirname,
                                     output_dirname,
                                     sample_labels=options.comparison_labels,
                                     use_compressed=use_compressed)
Exemplo n.º 3
0
def main():
    from optparse import OptionParser
    parser = OptionParser()

    ##
    ## Main options
    ##
    parser.add_option("--compute-gene-psi", dest="compute_gene_psi",
                      nargs=4, default=None,
                      help="Compute Psi using for a given multi-isoform gene. "
                      "Expects four arguments: the first is a gene ID or set "
                      "of comma-separated (no spaces) gene IDs, "
                      "the second is a GFF indexed file with the gene "
                      "information, the third is a sorted and "
                      "indexed BAM file with reads aligned to the gene, "
                      "and the fourth is an output directory.")
    parser.add_option("--paired-end", dest="paired_end",
                      nargs=2, default=None,
                      help="Run in paired-end mode.  Takes a mean and standard "
                      "deviation for the fragment length distribution (assumed "
                      "to have discretized normal form.)")
    parser.add_option("--compute-genes-from-file", dest="compute_genes_from_file",
                      nargs=3, default=None,
                      help="Runs on a set of genes from a file. Takes as input: "
                      "(1) a two-column tab-delimited file, where column 1 is the "
                      "event ID (ID field from GFF) and the second column is "
                      "the path to the indexed GFF file for that event. "
                      "MISO will run on all the events described in the file, "
                      "(2) a sorted, indexed BAM file to run on, and (3) a "
                      "directory to output results to.")

    ##
    ## Psi utilities
    ##
    parser.add_option("--compare-samples", dest="samples_to_compare",
                      nargs=3, default=None,
                      help="Compute comparison statistics between the two "
                      "given samples. Expects three directories: the first is "
                      "sample1's MISO output, the second is sample2's MISO "
                      "output, and the third is the directory where "
                      "results of the sample comparison will be outputted.")
    parser.add_option("--comparison-labels", dest="comparison_labels",
                      nargs=2, default=None,
                      help="Use these labels for the sample comparison "
                      "made by --compare-samples. "
                      "Takes two arguments: the label for sample 1 "
                      "and the label for sample 2, where sample 1 and "
                      "sample 2 correspond to the order of samples given "
                      "to --compare-samples.")
    parser.add_option("--summarize-samples", dest="summarize_samples",
                      nargs=2, default=None,
                      help="Compute summary statistics of the given set "
                      "of samples. Expects a directory with MISO output "
                      "and a directory to output summary file to.")
    parser.add_option("--summary-label", dest="summary_label",
                      nargs=1, default=None,
                      help="Label for MISO summary file. If not given, "
                      "uses basename of MISO output directory.")
    parser.add_option("--use-cluster", action="store_true",
                      dest="use_cluster", default=False)
    parser.add_option("--chunk-jobs", dest="chunk_jobs",
                      default=False, type="int",
                      help="Size (in number of events) of each job to "
                      "chunk events file into. Only applies when "
                      "running on cluster.")
    parser.add_option("--settings-filename", dest="settings_filename",
                      default=os.path.join(miso_settings_path,
                                           "settings",
                                           "miso_settings.txt"),
                      help="Filename specifying MISO settings.")
    parser.add_option("--read-len", dest="read_len", type="int",
                      default=None)
    parser.add_option("--overhang-len", dest="overhang_len", type="int",
                      default=None)
    parser.add_option("--event-type", dest="event_type", default=None,
                      help="Event type of two-isoform "
                      "events (e.g. 'SE', 'RI', 'A3SS', ...)")
    parser.add_option("--use-compressed", dest="use_compressed",
                      nargs=1, default=None,
                      help="Use compressed event IDs. Takes as input a "
                      "genes_to_filenames.shelve file produced by the "
                      "index_gff script.")
    ##
    ## Gene utilities
    ##
    parser.add_option("--view-gene", dest="view_gene",
                      nargs=1, default=None,
                      help="View the contents of a gene/event that has "
                      "been indexed. Takes as input an "
                      "indexed (.pickle) filename.")
    (options, args) = parser.parse_args()

    if options.compute_gene_psi is None:
        greeting()

    ##
    ## Load the settings file
    ##
    Settings.load(os.path.expanduser(options.settings_filename))

    use_compressed = None
    if options.use_compressed is not None:
        use_compressed = \
            os.path.abspath(os.path.expanduser(options.use_compressed))
        if not os.path.exists(use_compressed):
            print "Error: mapping filename from event IDs to compressed IDs %s " \
                  "is not found." %(use_compressed)
            sys.exit(1)
        else:
            print "Compression being used."

    if options.samples_to_compare is not None:
        sample1_dirname = os.path.abspath(options.samples_to_compare[0])
        sample2_dirname = os.path.abspath(options.samples_to_compare[1])
        output_dirname = os.path.abspath(options.samples_to_compare[2])
        if not os.path.isdir(output_dirname):
            print "Making comparisons directory: %s" %(output_dirname)
            misc_utils.make_dir(output_dirname)
        ht.output_samples_comparison(sample1_dirname,
                                     sample2_dirname,
                                     output_dirname,
                                     sample_labels=options.comparison_labels,
                                     use_compressed=use_compressed)
    ##
    ## Main interface based on SAM files
    ##
    if options.compute_genes_from_file != None:
        # Run on events given by file
        run_compute_genes_from_file(options)
    if options.compute_gene_psi != None:
        run_compute_gene_psi(options)

    ##
    ## Summarizing samples
    ##
    if options.summarize_samples:
        samples_dir = \
            os.path.abspath(os.path.expanduser(options.summarize_samples[0]))
        if options.summary_label != None:
            samples_label = options.summary_label
            print "Using summary label: %s" %(samples_label)
        else:
            samples_label = \
                os.path.basename(os.path.expanduser(samples_dir))
        assert(len(samples_label) >= 1)
        summary_output_dir = \
            os.path.abspath(os.path.join(os.path.expanduser(options.summarize_samples[1]),
                                         'summary'))
        if not os.path.isdir(summary_output_dir):
            os.makedirs(summary_output_dir)

        summary_filename = os.path.join(summary_output_dir,
                                        '%s.miso_summary' %(samples_label))
        summarize_sampler_results(samples_dir, summary_filename,
                                  use_compressed=use_compressed)

    if options.view_gene != None:
        indexed_gene_filename = \
            os.path.abspath(os.path.expanduser(options.view_gene))
        print "Viewing genes in %s" %(indexed_gene_filename)
        gff_genes = gff_utils.load_indexed_gff_file(indexed_gene_filename)

        if gff_genes == None:
            print "No genes."
            sys.exit(1)

        for gene_id, gene_info in gff_genes.iteritems():
            print "Gene %s" %(gene_id)
            gene_obj = gene_info['gene_object']
            print " - Gene object: ", gene_obj
            print "=="
            print "Isoforms: "
            for isoform in gene_obj.isoforms:
                print " - ", isoform
            print "=="
            print "mRNA IDs: "
            for mRNA_id in gene_info['hierarchy'][gene_id]['mRNAs']:
                print "%s" %(mRNA_id)
            print "=="
            print "Exons: "
            for exon in gene_obj.parts:
                print " - ", exon
Exemplo n.º 4
0
def main():
    from optparse import OptionParser
    parser = OptionParser()

    ##
    ## Main options
    ##
    parser.add_option("--compute-gene-psi", dest="compute_gene_psi",
                      nargs=4, default=None,
                      help="Compute Psi using for a given multi-isoform gene. "
                      "Expects four arguments: the first is a gene ID or set "
                      "of comma-separated (no spaces) gene IDs, "
                      "the second is a GFF indexed file with the gene "
                      "information, the third is a sorted and "
                      "indexed BAM file with reads aligned to the gene, "
                      "and the fourth is an output directory.")
    parser.add_option("--paired-end", dest="paired_end",
                      nargs=2, default=None,
                      help="Run in paired-end mode.  Takes a mean and standard "
                      "deviation for the fragment length distribution (assumed "
                      "to have discretized normal form.)")
    parser.add_option("--compute-genes-from-file", dest="compute_genes_from_file",
                      nargs=3, default=None,
                      help="Runs on a set of genes from a file. Takes as input: "
                      "(1) a two-column tab-delimited file, where column 1 is the "
                      "event ID (ID field from GFF) and the second column is "
                      "the path to the indexed GFF file for that event. "
                      "MISO will run on all the events described in the file, "
                      "(2) a sorted, indexed BAM file to run on, and (3) a "
                      "directory to output results to.")
    
    ##
    ## Psi utilities
    ##
    parser.add_option("--compare-samples", dest="samples_to_compare",
                      nargs=3, default=None,
		      help="Compute comparison statistics between the two "
                      "given samples. Expects three directories: the first is "
                      "sample1's MISO output, the second is sample2's MISO "
                      "output, and the third is the directory where "
		      "results of the sample comparison will be outputted.")
    parser.add_option("--comparison-labels", dest="comparison_labels",
                      nargs=2, default=None,
                      help="Use these labels for the sample comparison "
                      "made by --compare-samples. "
                      "Takes two arguments: the label for sample 1 "
                      "and the label for sample 2, where sample 1 and "
                      "sample 2 correspond to the order of samples given "
                      "to --compare-samples.")
    parser.add_option("--summarize-samples", dest="summarize_samples",
                      nargs=2, default=None,
		      help="Compute summary statistics of the given set "
                      "of samples. Expects a directory with MISO output "
                      "and a directory to output summary file to.")
    parser.add_option("--summary-label", dest="summary_label",
                      nargs=1, default=None,
                      help="Label for MISO summary file. If not given, "
                      "uses basename of MISO output directory.")
    parser.add_option("--use-cluster", action="store_true",
                      dest="use_cluster", default=False)
    parser.add_option("--chunk-jobs", dest="chunk_jobs",
                      default=False, type="int",
		      help="Size (in number of events) of each job to "
                      "chunk events file into. Only applies when "
                      "running on cluster.")
    parser.add_option("--settings-filename", dest="settings_filename",
                      default=os.path.join(miso_settings_path,
                                           "settings",
                                           "miso_settings.txt"),
                      help="Filename specifying MISO settings.")
    parser.add_option("--read-len", dest="read_len", type="int",
                      default=None)
    parser.add_option("--overhang-len", dest="overhang_len", type="int",
                      default=None)
    parser.add_option("--event-type", dest="event_type", default=None,
		      help="Event type of two-isoform "
                      "events (e.g. 'SE', 'RI', 'A3SS', ...)")    
    parser.add_option("--use-compressed", dest="use_compressed",
                      nargs=1, default=None,
                      help="Use compressed event IDs. Takes as input a "
                      "genes_to_filenames.shelve file produced by the "
                      "index_gff script.")
    ##
    ## Gene utilities
    ##
    parser.add_option("--view-gene", dest="view_gene",
                      nargs=1, default=None,
                      help="View the contents of a gene/event that has "
                      "been indexed. Takes as input an "
                      "indexed (.pickle) filename.")
    (options, args) = parser.parse_args()

    if options.compute_gene_psi is None:
        greeting()

    ##
    ## Load the settings file 
    ##
    Settings.load(os.path.expanduser(options.settings_filename))

    use_compressed = None
    if options.use_compressed is not None:
        use_compressed = \
            os.path.abspath(os.path.expanduser(options.use_compressed))
        if not os.path.exists(use_compressed):
            print "Error: mapping filename from event IDs to compressed IDs %s " \
                  "is not found." %(use_compressed)
            sys.exit(1)
        else:
            print "Compression being used."
            
    if options.samples_to_compare is not None:
        sample1_dirname = os.path.abspath(options.samples_to_compare[0])
	sample2_dirname = os.path.abspath(options.samples_to_compare[1])
	output_dirname = os.path.abspath(options.samples_to_compare[2])
	if not os.path.isdir(output_dirname):
            print "Making comparisons directory: %s" %(output_dirname)
            misc_utils.make_dir(output_dirname)
	ht.output_samples_comparison(sample1_dirname,
                                     sample2_dirname,
                                     output_dirname,
                                     sample_labels=options.comparison_labels,
                                     use_compressed=use_compressed)
    ##
    ## Main interface based on SAM files
    ##
    if options.compute_genes_from_file != None:
        # Run on events given by file
        run_compute_genes_from_file(options)
    if options.compute_gene_psi != None:
        run_compute_gene_psi(options)
        
    ##
    ## Summarizing samples
    ##
    if options.summarize_samples:
	samples_dir = \
            os.path.abspath(os.path.expanduser(options.summarize_samples[0]))
        if options.summary_label != None:
            samples_label = options.summary_label
            print "Using summary label: %s" %(samples_label)
        else:
            samples_label = \
                os.path.basename(os.path.expanduser(samples_dir))
	assert(len(samples_label) >= 1)
	summary_output_dir = \
            os.path.abspath(os.path.join(os.path.expanduser(options.summarize_samples[1]),
                                         'summary'))
	if not os.path.isdir(summary_output_dir):
	    os.makedirs(summary_output_dir)
	    
	summary_filename = os.path.join(summary_output_dir,
					'%s.miso_summary' %(samples_label))
	summarize_sampler_results(samples_dir, summary_filename,
                                  use_compressed=use_compressed)

    if options.view_gene != None:
        indexed_gene_filename = \
            os.path.abspath(os.path.expanduser(options.view_gene))
        print "Viewing genes in %s" %(indexed_gene_filename)
        gff_genes = gff_utils.load_indexed_gff_file(indexed_gene_filename)

        if gff_genes == None:
            print "No genes."
            sys.exit(1)

        for gene_id, gene_info in gff_genes.iteritems():
            print "Gene %s" %(gene_id)
            gene_obj = gene_info['gene_object']
            print " - Gene object: ", gene_obj
            print "=="
            print "Isoforms: "
            for isoform in gene_obj.isoforms:
                print " - ", isoform
            print "=="
            print "mRNA IDs: "
            for mRNA_id in gene_info['hierarchy'][gene_id]['mRNAs']:
                print "%s" %(mRNA_id)
            print "=="    
            print "Exons: "
            for exon in gene_obj.parts:
                print " - ", exon