示例#1
0
def make_chimera(cluster_pair, cluster_shelve, transcript_dict,
                 genome_tx_trees, annotation_source):
    # lookup 5' and 3' clusters
    cluster5p = cluster_shelve[str(cluster_pair.id5p)]
    cluster3p = cluster_shelve[str(cluster_pair.id3p)]
    # get 5' and 3' transcripts
    transcripts5p = lookup_transcripts(cluster5p, transcript_dict,
                                       genome_tx_trees)
    transcripts3p = lookup_transcripts(cluster3p, transcript_dict,
                                       genome_tx_trees)
    # lookup chimera type and distance
    chimera_type, distance = get_chimera_type(cluster5p, cluster3p,
                                              transcripts5p, transcripts3p,
                                              transcript_dict, genome_tx_trees)
    # format transcript information
    tx_names_5p, gene_names_5p, biotypes_5p = get_transcript_info(
        transcripts5p, annotation_source)
    tx_names_3p, gene_names_3p, biotypes_3p = get_transcript_info(
        transcripts3p, annotation_source)
    # make chimera object
    c = Chimera()
    c.rname5p = cluster5p.rname
    c.start5p = cluster5p.start
    c.end5p = cluster5p.end
    c.rname3p = cluster3p.rname
    c.start3p = cluster3p.start
    c.end3p = cluster3p.end
    c.chimera_id = "CHIMERA%d" % (cluster_pair.pair_id)
    frags = set(cluster_pair.qnames)
    frags.update(cluster_pair.spanning_qnames)
    c.num_frags = len(frags)
    c.strand5p = cluster5p.strand
    c.strand3p = cluster3p.strand
    c.chimera_type = chimera_type
    c.distance = distance
    c.num_discordant_frags = len(cluster_pair.qnames)
    c.num_spanning_frags = len(cluster_pair.spanning_qnames)
    c.num_discordant_frags_5p = len(cluster5p.qnames)
    c.num_discordant_frags_3p = len(cluster3p.qnames)
    c.num_concordant_frags_5p = cluster5p.concordant_frags
    c.num_concordant_frags_3p = cluster3p.concordant_frags
    c.biotypes_5p = sorted(biotypes_5p)
    c.biotypes_3p = sorted(biotypes_3p)
    c.genes_5p = sorted(gene_names_5p)
    c.genes_3p = sorted(gene_names_3p)
    c.transcripts_5p = sorted(tx_names_5p)
    c.transcripts_3p = sorted(tx_names_3p)
    return c
示例#2
0
def make_chimera(cluster_pair, 
                 cluster_shelve,
                 transcript_dict,
                 genome_tx_trees,
                 annotation_source):
    # lookup 5' and 3' clusters
    cluster5p = cluster_shelve[str(cluster_pair.id5p)]
    cluster3p = cluster_shelve[str(cluster_pair.id3p)]
    # get 5' and 3' transcripts
    transcripts5p = lookup_transcripts(cluster5p, transcript_dict, genome_tx_trees)
    transcripts3p = lookup_transcripts(cluster3p, transcript_dict, genome_tx_trees)
    # lookup chimera type and distance
    chimera_type, distance = get_chimera_type(cluster5p, cluster3p, 
                                              transcripts5p, transcripts3p, 
                                              transcript_dict, genome_tx_trees)
    # format transcript information
    tx_names_5p, gene_names_5p, biotypes_5p = get_transcript_info(transcripts5p, annotation_source)
    tx_names_3p, gene_names_3p, biotypes_3p = get_transcript_info(transcripts3p, annotation_source)
    # make chimera object
    c = Chimera()
    c.rname5p = cluster5p.rname
    c.start5p = cluster5p.start
    c.end5p = cluster5p.end
    c.rname3p = cluster3p.rname
    c.start3p = cluster3p.start
    c.end3p = cluster3p.end
    c.chimera_id = "CHIMERA%d" % (cluster_pair.pair_id)
    frags = set(cluster_pair.qnames)
    frags.update(cluster_pair.spanning_qnames)
    c.num_frags = len(frags)
    c.strand5p = cluster5p.strand
    c.strand3p = cluster3p.strand
    c.chimera_type = chimera_type
    c.distance = distance
    c.num_discordant_frags = len(cluster_pair.qnames)
    c.num_spanning_frags = len(cluster_pair.spanning_qnames)
    c.num_discordant_frags_5p = len(cluster5p.qnames)
    c.num_discordant_frags_3p = len(cluster3p.qnames)
    c.num_concordant_frags_5p = cluster5p.concordant_frags
    c.num_concordant_frags_3p = cluster3p.concordant_frags
    c.biotypes_5p = sorted(biotypes_5p)
    c.biotypes_3p = sorted(biotypes_3p)
    c.genes_5p = sorted(gene_names_5p)
    c.genes_3p = sorted(gene_names_3p)
    c.transcripts_5p = sorted(tx_names_5p)
    c.transcripts_3p = sorted(tx_names_3p)
    return c
def read_pairs_to_chimera(chimera_name, tid5p, tid3p, readpairs, tid_tx_map,
                          genome_tx_trees, trim_bp):
    # get gene information
    tx5p = tid_tx_map[tid5p]
    tx3p = tid_tx_map[tid3p]
    # categorize chimera type
    chimera_type, distance = get_chimera_type(tx5p, tx3p, genome_tx_trees)
    # create chimera object
    c = Chimera()
    iter5p = itertools.imap(operator.itemgetter(0), readpairs)
    iter3p = itertools.imap(operator.itemgetter(1), readpairs)
    c.partner5p = ChimeraPartner.from_discordant_reads(iter5p, tx5p, trim_bp)
    c.partner3p = ChimeraPartner.from_discordant_reads(iter3p, tx3p, trim_bp)
    c.name = chimera_name
    c.chimera_type = chimera_type
    c.distance = distance
    # raw reads
    c.encomp_read_pairs = readpairs
    return c
def read_pairs_to_chimera(chimera_name, tid5p, tid3p, readpairs, 
                          tid_tx_map, genome_tx_trees, trim_bp):
    # get gene information
    tx5p = tid_tx_map[tid5p]
    tx3p = tid_tx_map[tid3p]
    # categorize chimera type
    chimera_type, distance = get_chimera_type(tx5p, tx3p, genome_tx_trees)
    # create chimera object
    c = Chimera()
    iter5p = itertools.imap(operator.itemgetter(0), readpairs)
    iter3p = itertools.imap(operator.itemgetter(1), readpairs)
    c.partner5p = ChimeraPartner.from_discordant_reads(iter5p, tx5p, trim_bp)
    c.partner3p = ChimeraPartner.from_discordant_reads(iter3p, tx3p, trim_bp)
    c.name = chimera_name
    c.chimera_type = chimera_type
    c.distance = distance
    # raw reads
    c.encomp_read_pairs = readpairs
    return c