Пример #1
0
 def output_filtered_comparisons(self, output_dir=None,
                                 sort_column="bayes_factor",
                                 columns_to_write=[#"event_name",
                                                   "gene_id",
                                                   "gene_symbol",
                                                   "sample1_posterior_mean",
                                                   "sample1_ci_low",
                                                   "sample1_ci_high",
                                                   "sample2_posterior_mean",
                                                   "sample2_ci_low",
                                                   "sample2_ci_high",
                                                   "diff",
                                                   "bayes_factor",
                                                   "isoforms",
                                                   "sample1_counts",
                                                   "sample1_assigned_counts",
                                                   "sample2_counts",
                                                   "sample2_assigned_counts",
                                                   "chrom",
                                                   "strand",
                                                   "mRNA_starts",
                                                   "mRNA_ends"]):
     """
     Output filtered comparisons table.
     """
     if output_dir == None:
         output_dir = self.misowrap_obj.comparisons_dir
     # Output each file by event type
     output_dir = os.path.join(output_dir, "filtered_events")
     print "Outputting filtered events..."
     print "  - Output dir: %s" %(output_dir)
     utils.make_dir(output_dir)
     for event_type, filtered_df in self.filtered_events.iteritems():
         curr_output_dir = os.path.join(output_dir, event_type)
         print "Event type: %s" %(event_type)
         # View by comparison
         comparison_labels = \
             utils.unique_list(filtered_df.index.get_level_values(0))
         print "Outputting %d comparisons" %(len(comparison_labels))
         for label in comparison_labels:
             print "Comparison: %s" %(label)
             comparison_output_dir = os.path.join(curr_output_dir,
                                                  label)
             utils.make_dir(comparison_output_dir)
             output_filename = os.path.join(comparison_output_dir,
                                            "%s.%s.filtered.miso_bf" \
                                            %(label,
                                              event_type))
             print "Outputting to: %s" %(output_filename)
             curr_df = filtered_df.ix[label].sort_index(by=sort_column,
                                                        ascending=False)
             curr_df.to_csv(output_filename,
                            sep=self.delimiter,
                            float_format="%.4f",
                            cols=columns_to_write)
Пример #2
0
 def new_recs():
     for gene_recs in list(events_db.iter_by_parent_childs()):
         gene_rec = gene_recs[0]
         event_id = gene_rec.id
         # Use existing IDs if present
         if "ensgene_id" in gene_rec.attributes:
             ensgene_id = gene_rec.attributes["ensg_id"][0]
         else:
             ensgene_id = "NA"
         if "refseq_id" in gene_rec.attributes:
             refseq_id = gene_rec.attributes["refseq_id"][0]
         else:
             refseq_id = "NA"
         if "gene_symbol" in gene_rec.attributes:
             gene_symbol = gene_rec.attributes["gsymbol"][0]
         else:
             gene_symbol = "NA"
         if event_id in event_genes_to_info:
             event_info = event_genes_to_info[event_id]
             ensgene_ids = \
                 utils.unique_list(event_info["ensg_id"])
             if len(ensgene_ids) > 0 and ensgene_ids[0] != "NA":
                 ensgene_id = ",".join(ensgene_ids)
             refseq_ids = \
                 utils.unique_list(event_info["refseq_id"])
             if len(refseq_ids) > 0 and refseq_ids[0] != "NA":
                 refseq_id = ",".join(refseq_ids)
             gene_symbols = \
                 utils.unique_list(event_info["gsymbol"])
             if len(gene_symbols) > 0 and gene_symbols[0] != "NA":
                 gene_symbol = ",".join(gene_symbols)
         gene_rec.attributes["ensg_id"] = [ensgene_id]
         gene_rec.attributes["refseq_id"] = [refseq_id]
         gene_rec.attributes["gsymbol"] = [gene_symbol]
         # Yield all the gene's records
         for g in gene_recs:
             yield g
Пример #3
0
 def new_recs():
     for gene_recs in list(events_db.iter_by_parent_childs()):
         gene_rec = gene_recs[0]
         event_id = gene_rec.id
         # Use existing IDs if present
         if "ensgene_id" in gene_rec.attributes:
             ensgene_id = gene_rec.attributes["ensg_id"][0]
         else:
             ensgene_id = "NA"
         if "refseq_id" in gene_rec.attributes:
             refseq_id = gene_rec.attributes["refseq_id"][0]
         else:
             refseq_id = "NA"
         if "gene_symbol" in gene_rec.attributes:
             gene_symbol = gene_rec.attributes["gsymbol"][0]
         else:
             gene_symbol = "NA"
         if event_id in event_genes_to_info:
             event_info = event_genes_to_info[event_id]
             ensgene_ids = \
                 utils.unique_list(event_info["ensg_id"])
             if len(ensgene_ids) > 0 and ensgene_ids[0] != "NA":
                 ensgene_id = ",".join(ensgene_ids)
             refseq_ids = \
                 utils.unique_list(event_info["refseq_id"])
             if len(refseq_ids) > 0 and refseq_ids[0] != "NA":
                 refseq_id = ",".join(refseq_ids)
             gene_symbols = \
                 utils.unique_list(event_info["gsymbol"])
             if len(gene_symbols) > 0 and gene_symbols[0] != "NA":
                 gene_symbol = ",".join(gene_symbols)
         gene_rec.attributes["ensg_id"] = [ensgene_id]
         gene_rec.attributes["refseq_id"] = [refseq_id]
         gene_rec.attributes["gsymbol"] = [gene_symbol]
         # Yield all the gene's records
         for g in gene_recs:
             yield g
Пример #4
0
 def output_filtered_comparisons(
     self,
     output_dir=None,
     sort_column="bayes_factor",
     columns_to_write=[  #"event_name",
         "gene_id", "gene_symbol", "sample1_posterior_mean",
         "sample1_ci_low", "sample1_ci_high", "sample2_posterior_mean",
         "sample2_ci_low", "sample2_ci_high", "diff", "bayes_factor",
         "isoforms", "sample1_counts", "sample1_assigned_counts",
         "sample2_counts", "sample2_assigned_counts", "chrom", "strand",
         "mRNA_starts", "mRNA_ends"
     ]):
     """
     Output filtered comparisons table.
     """
     if output_dir == None:
         output_dir = self.misowrap_obj.comparisons_dir
     # Output each file by event type
     output_dir = os.path.join(output_dir, "filtered_events")
     print "Outputting filtered events..."
     print "  - Output dir: %s" % (output_dir)
     utils.make_dir(output_dir)
     for event_type, filtered_df in self.filtered_events.iteritems():
         curr_output_dir = os.path.join(output_dir, event_type)
         print "Event type: %s" % (event_type)
         # View by comparison
         comparison_labels = \
             utils.unique_list(filtered_df.index.get_level_values(0))
         print "Outputting %d comparisons" % (len(comparison_labels))
         for label in comparison_labels:
             print "Comparison: %s" % (label)
             comparison_output_dir = os.path.join(curr_output_dir, label)
             utils.make_dir(comparison_output_dir)
             output_filename = os.path.join(comparison_output_dir,
                                            "%s.%s.filtered.miso_bf" \
                                            %(label,
                                              event_type))
             print "Outputting to: %s" % (output_filename)
             curr_df = filtered_df.ix[label].sort_index(by=sort_column,
                                                        ascending=False)
             curr_df.to_csv(output_filename,
                            sep=self.delimiter,
                            float_format="%.4f",
                            cols=columns_to_write)