Пример #1
0
def make_chimera(cluster_pair, cluster_shelve, transcript_dict,
                 genome_tx_trees, annotation_source):
    # lookup 5' and 3' clusters
    cluster5p = cluster_shelve[str(cluster_pair.id5p)]
    cluster3p = cluster_shelve[str(cluster_pair.id3p)]
    # get 5' and 3' transcripts
    transcripts5p = lookup_transcripts(cluster5p, transcript_dict,
                                       genome_tx_trees)
    transcripts3p = lookup_transcripts(cluster3p, transcript_dict,
                                       genome_tx_trees)
    # lookup chimera type and distance
    chimera_type, distance = get_chimera_type(cluster5p, cluster3p,
                                              transcripts5p, transcripts3p,
                                              transcript_dict, genome_tx_trees)
    # format transcript information
    tx_names_5p, gene_names_5p, biotypes_5p = get_transcript_info(
        transcripts5p, annotation_source)
    tx_names_3p, gene_names_3p, biotypes_3p = get_transcript_info(
        transcripts3p, annotation_source)
    # make chimera object
    c = Chimera()
    c.rname5p = cluster5p.rname
    c.start5p = cluster5p.start
    c.end5p = cluster5p.end
    c.rname3p = cluster3p.rname
    c.start3p = cluster3p.start
    c.end3p = cluster3p.end
    c.chimera_id = "CHIMERA%d" % (cluster_pair.pair_id)
    frags = set(cluster_pair.qnames)
    frags.update(cluster_pair.spanning_qnames)
    c.num_frags = len(frags)
    c.strand5p = cluster5p.strand
    c.strand3p = cluster3p.strand
    c.chimera_type = chimera_type
    c.distance = distance
    c.num_discordant_frags = len(cluster_pair.qnames)
    c.num_spanning_frags = len(cluster_pair.spanning_qnames)
    c.num_discordant_frags_5p = len(cluster5p.qnames)
    c.num_discordant_frags_3p = len(cluster3p.qnames)
    c.num_concordant_frags_5p = cluster5p.concordant_frags
    c.num_concordant_frags_3p = cluster3p.concordant_frags
    c.biotypes_5p = sorted(biotypes_5p)
    c.biotypes_3p = sorted(biotypes_3p)
    c.genes_5p = sorted(gene_names_5p)
    c.genes_3p = sorted(gene_names_3p)
    c.transcripts_5p = sorted(tx_names_5p)
    c.transcripts_3p = sorted(tx_names_3p)
    return c
Пример #2
0
def make_chimera(cluster_pair, 
                 cluster_shelve,
                 transcript_dict,
                 genome_tx_trees,
                 annotation_source):
    # lookup 5' and 3' clusters
    cluster5p = cluster_shelve[str(cluster_pair.id5p)]
    cluster3p = cluster_shelve[str(cluster_pair.id3p)]
    # get 5' and 3' transcripts
    transcripts5p = lookup_transcripts(cluster5p, transcript_dict, genome_tx_trees)
    transcripts3p = lookup_transcripts(cluster3p, transcript_dict, genome_tx_trees)
    # lookup chimera type and distance
    chimera_type, distance = get_chimera_type(cluster5p, cluster3p, 
                                              transcripts5p, transcripts3p, 
                                              transcript_dict, genome_tx_trees)
    # format transcript information
    tx_names_5p, gene_names_5p, biotypes_5p = get_transcript_info(transcripts5p, annotation_source)
    tx_names_3p, gene_names_3p, biotypes_3p = get_transcript_info(transcripts3p, annotation_source)
    # make chimera object
    c = Chimera()
    c.rname5p = cluster5p.rname
    c.start5p = cluster5p.start
    c.end5p = cluster5p.end
    c.rname3p = cluster3p.rname
    c.start3p = cluster3p.start
    c.end3p = cluster3p.end
    c.chimera_id = "CHIMERA%d" % (cluster_pair.pair_id)
    frags = set(cluster_pair.qnames)
    frags.update(cluster_pair.spanning_qnames)
    c.num_frags = len(frags)
    c.strand5p = cluster5p.strand
    c.strand3p = cluster3p.strand
    c.chimera_type = chimera_type
    c.distance = distance
    c.num_discordant_frags = len(cluster_pair.qnames)
    c.num_spanning_frags = len(cluster_pair.spanning_qnames)
    c.num_discordant_frags_5p = len(cluster5p.qnames)
    c.num_discordant_frags_3p = len(cluster3p.qnames)
    c.num_concordant_frags_5p = cluster5p.concordant_frags
    c.num_concordant_frags_3p = cluster3p.concordant_frags
    c.biotypes_5p = sorted(biotypes_5p)
    c.biotypes_3p = sorted(biotypes_3p)
    c.genes_5p = sorted(gene_names_5p)
    c.genes_3p = sorted(gene_names_3p)
    c.transcripts_5p = sorted(tx_names_5p)
    c.transcripts_3p = sorted(tx_names_3p)
    return c