def populate_exon_table():
    dbm = DBManager.Instance()
    ec = ExonContainer.Instance()
    dmc = DataMapContainer.Instance()
    
    protein_id_list = FileUtilities.get_protein_list()
    species_list = FileUtilities.get_default_species_list()
    exon_type_list = ["ensembl", "genewise", "blastn", "tblastn", "sw_gene"]
    
    exon_list = []
    for ref_protein_id in protein_id_list:
        for species in species_list:
            for exon_type in exon_type_list:
                exon_key = (ref_protein_id[0], species, exon_type)
                try:
                    exons = ec.get(exon_key).get_ordered_exons()
                    for exon in exons:
                        if type(exon) is Exon:
                            if exon.viability:
                                exon_list.append(exon)
                        else:
                            exon_list.append(exon)
                except KeyError:
                    pass
    dbm.update_exon_table(exon_list)
    dbm.update_alignment_table(exon_list)
Пример #2
0
def populate_SW_exon_alignments(protein_list):
    
    logger = Logger.Instance()
    alignment_logger = logger.get_logger('alignment')
    
    for protein_id in protein_list:
        try:
            if FileUtilities.read_status_file(protein_id)['ENSEMBL_EXON_RETRIEVAL'] == 'FAILED' or FileUtilities.read_status_file(protein_id)['REF_SP_DB_FORMATTING'] == 'FAILED':
                print "ABORTING {0} SW_EXON: some resources have FAILED stats!".format(protein_id)
                FileUtilities.update_entry_in_status_file(protein_id, 'SW_EXON_ALIGNMENT', 'FAILED')
                continue
        except KeyError:
            FileUtilities.update_entry_in_status_file(protein_id, 'SW_EXON_ALIGNMENT', 'FAILED')
            alignment_logger.error ("{0},Keys missing in the .status file (ENSEMBL_EXON_RETRIEVAL / REF_SP_DB_FORMATTING)".format(protein_id))
        try:
            if FileUtilities.read_status_file(protein_id)['SW_EXON_ALIGNMENT'] == 'OK':
                print "SKIPPING {0} SW_EXONs: .status file -> OK!".format(protein_id)
                continue
        except KeyError:
            pass
        print "ALIGNING SW_EXON: {0}".format(protein_id)
        if Alignments.generate_SW_exon_alignments2(protein_id):
            FileUtilities.update_entry_in_status_file(protein_id, 'SW_EXON_ALIGNMENT', 'OK')
        else:
            FileUtilities.update_entry_in_status_file(protein_id, 'SW_EXON_ALIGNMENT', 'PARTIAL') 
def populate_ortholog_table():
    dbm = DBManager.Instance()
    dmc = DataMapContainer.Instance()
    
    protein_id_list = FileUtilities.get_protein_list()
    species_list = FileUtilities.get_default_species_list()
    data_map_list = []
    for ref_protein_id in protein_id_list:
        for species in species_list:
            try:
                data_map = dmc.get((ref_protein_id[0], species))
                data_map_list.append(data_map)
            except KeyError, e:
                pass
def populate_gene_table():
    dbm = DBManager.Instance()
    dmc = DataMapContainer.Instance()
    
    protein_id_list = FileUtilities.get_protein_list()
    species_list = FileUtilities.get_default_species_list()
    data_map_list = []
    for ref_protein_id in protein_id_list:
        for species in species_list:
            try:
                data_map = dmc.get((ref_protein_id[0], species))
                data_map_list.append(data_map)
            except KeyError:
                print "PROTEIN_ID %s ERROR" % (ref_protein_id[0])
    print data_map_list
    dbm.update_gene_table(data_map_list)
Пример #5
0
def main ():
    referenced_species = "Homo_sapiens"
    # 'ENSP00000253108', 'Ailuropoda_melanoleuca', 'ensembl'
    protein_list_raw = FileUtilities.get_protein_list()
    protein_list = []
    for protein_tuple in protein_list_raw:
        protein_list.append(protein_tuple[0])
    
    fill_all_containers(False)
    
    if(len(sys.argv) < 1):
        print "Usage: {0} <blastn | tblastn | SW_gene | SW_exon | all> \n".format(sys.argv[0])
        exit
    mode = sys.argv[1]
    
    populate_referenced_species_databases(protein_list, referenced_species)
    
    if (mode == "blastn"):
        populate_blastn_alignments(protein_list)
    elif (mode == "tblastn"):
        populate_tblastn_alignments(protein_list)
    elif (mode == "SW_gene"):
        populate_SW_gene_alignments(protein_list)
    elif (mode == "SW_exon"):
        populate_SW_exon_alignments(protein_list)
    elif (mode == "all"):
        populate_blastn_alignments(protein_list)
        populate_tblastn_alignments(protein_list)
        populate_SW_gene_alignments(protein_list)
        populate_SW_exon_alignments(protein_list)
    else:
        print "Usage: {0} <blastn | tblastn | SW_gene | SW_exon | all> \n".format(sys.argv[0])
        exit
def fill_all_containers (load_alignments):
    '''
    Fills all the containers with correspondent data.
    The containers are: data maps, proteins, genes, transcripts, ensembl exons, and all the alignment exons
    '''
    dc = DirectoryCrawler()
    
    protein_list_raw = FileUtilities.get_protein_list()
    # flatten the raw protein list and take every second element, which is a protein id
    protein_list = list(chain.from_iterable(protein_list_raw))[0::2]
    algorithms = ["blastn", "tblastn", "sw_gene", "sw_exon"]
    for protein_id in protein_list:
        dc.generate_directory_tree(protein_id)
        
        
    ens_exon_container = load_protein_configuration_batch(protein_list)
    if ens_exon_container:
        
        load_exon_configuration_batch(protein_list, "ensembl")
        load_exon_configuration_batch(protein_list, "genewise")
        if load_alignments:
            load_exon_configuration_batch (protein_list, "blastn")
            load_exon_configuration_batch(protein_list, "tblastn")
            load_exon_configuration_batch(protein_list, "sw_gene")
            load_exon_configuration_batch(protein_list, "sw_exon") 
            set_frames_to_coding_exons_batch (protein_list)
            remove_overlapping_alignments_batch(protein_list, ["blastn", "tblastn"])
            annotate_spurious_alignments_batch(protein_list, algorithms)
Пример #7
0
    def create_map_from_file(filepath):
        """Create map from file.

        Parameters:
            filepath (str): file path to load the map from

        Returns:
            Map: map
        """
        from utilities import FileUtilities
        file_content = FileUtilities.get_sanitized_content_from_file(filepath)

        all_nodes = file_content.pop(0)
        car_node_name = file_content.pop(0)
        pet_nodes_name = file_content.pop(0).split(" ")
        house_nodes_names = file_content.pop(0).split(" ")
        m = MapFactory.create_map_from_node_names(car_node_name,
                                                  pet_nodes_name,
                                                  house_nodes_names)

        while file_content:
            conn_repr = file_content.pop(0).split(" ")

            node_source = m.get_node_by_name(conn_repr[0])
            node_destination = m.get_node_by_name(conn_repr[1])
            distance = int(conn_repr[2])

            node_source.add_connection_to(node_destination, distance)
            node_destination.add_connection_to(node_source, distance)

        return m
def populate_protein_table():
    dbm = DBManager.Instance()
    pc = ProteinContainer.Instance()
    dmc = DataMapContainer.Instance()
    
    protein_id_list = FileUtilities.get_protein_list()
    species_list = FileUtilities.get_default_species_list()
    protein_list = []
    for ref_protein_id in protein_id_list:
        for species in species_list:
            try:
                protein_id = dmc.get((ref_protein_id[0], species))
                protein = pc.get(protein_id.protein_id)
                protein_list.append(protein)
            except KeyError:
                print "PROTEIN_ID %s ERROR" % (ref_protein_id[0])
    dbm.update_protein_table(protein_list)
Пример #9
0
 def get_sequence_record (self, gene_type):
     '''
     Retrieves the gene sequence record dependent on the gene type
     @param gene_type: gene type can be normal and extended
     @return sequence of SeqRecord type
     '''
     if gene_type == "normal":
         try:
             return self.sequence
         except AttributeError:
             self.sequence = FileUtilities.load_fasta_single_record(self.get_gene_file_path())
                 
     else:
         try:
             return self.extended_sequence
         except AttributeError:
             self.extended_sequence = FileUtilities.load_fasta_single_record(self.get_expanded_gene_file_path())
def load_protein_configuration(protein_id, ref_species_dict = None):
    '''
    Loads the data from a description file, and calls the containers generating functions to create basic objects.
    @param protein_id: id of a single protein
    '''
    if not check_status_file_no_alignment(protein_id):
        return False
    
    if ref_species_dict is None:
        ref_species_dict    = FileUtilities.get_reference_species_dictionary()
    
    logger                  = Logger.Instance()
    containers_logger       = logger.get_logger('containers')
    
    data_map_container      = DataMapContainer.Instance()
    protein_container       = ProteinContainer.Instance()
    gene_container          = GeneContainer.Instance()
    transcript_container    = TranscriptContainer.Instance()
    ens_exon_container      = EnsemblExonContainer.Instance()
    
    (known_proteins, abinitio_proteins) = DescriptionParser().parse_description_file_general_info(protein_id)
    
    for species_data in known_proteins:
        (species_name, 
         spec_protein_id, 
         spec_gene_id, 
         spec_transcript_id, 
         location_type, 
         assembly, 
         location_id, 
         seq_begin, 
         seq_end, 
         strand) = species_data
        ab_initio = False

        # data map
        data_map_key    = (protein_id, species_name)
        data_map        = DataMap(spec_protein_id, spec_transcript_id, 
                                  spec_gene_id, data_map_key, location_type, assembly,
                                  location_id, strand, seq_begin, seq_end, ab_initio)
        try:
            data_map_container.add(data_map_key, data_map)
        except (KeyError, TypeError), e:
            containers_logger.error("{0}, {1}, {2}, error adding to datamap".format(protein_id, species_name, e.args[0]))
        
        # everything else - protein, transcript, gene, ensembl exons
        protein         = Protein(spec_protein_id, data_map_key, ref_species_dict[species_name])
        gene            = Gene(spec_gene_id, data_map_key, ref_species_dict[species_name])
        transcript      = Transcript(spec_transcript_id, data_map_key, ref_species_dict[species_name])
        
        ens_exons       = EnsemblExons(data_map_key, ref_species_dict[species_name])
        try:
            ens_exons.load_exons()
        except (Exception), e:
            containers_logger.error("{0}, {1}, {2}, error loading exons".format(protein_id, species_name, e.args[0]))
def produce_statistics_for_alignment (exons_key, alignment_type, reference_exons):
    '''
    Produces the straight-forward statistics for the alignment.
    For each alignment, it only calculates the coverage percentage. 
    Where there are multiple alignments for a particular exon, percentages are summed. 
    @param exons_key: (reference protein id, species)
    @param alignment_type: blastn, tblastn, sw_gene, sw_exon
    @return: list of similarity percentages in correct order starting from first reference exon to thel last
    '''
    (ref_protein_id, species) = exons_key
    
    # if the alignment type is tblastn, we have to multiply
    # the coverage by 3 because the length is expressed in AAs, not in NBs
    if alignment_type == "tblastn":
        coverage_constant = 1.
    else:
        coverage_constant = 1.
    
    exon_container          = ExonContainer.Instance()
    reference_species_dict  = FileUtilities.get_reference_species_dictionary()

    logger = Logger.Instance()
    containers_logger = logger.get_logger("containers")
    
    #reference_exons = exon_container.get((ref_protein_id, reference_species_dict[species], "ensembl"))
    try:
        alignment_exons = exon_container.get((ref_protein_id, species, alignment_type))
    except KeyError:
        containers_logger.error ("{0},{1},{2}".format(ref_protein_id, species, alignment_type))
        return None
    perc_list = []
    
    # TODO
    
    #remove_overlapping_alignments((ref_protein_id, species, alignment_type))
    for ref_exon in reference_exons:
        ref_exon_id = ref_exon.exon_id
        if ref_exon_id not in alignment_exons.alignment_exons:
            perc_list.append(0)
            #print "%s length: %d\n\tnot present in alignment" % (ref_exon_id, len(ref_exon.sequence))
        else:
            al_exons = alignment_exons.alignment_exons[ref_exon_id]
            internal_stat = 0.
            for al_exon in al_exons:
                if al_exon.viability:
                    #print al_exon.alignment_info["sbjct_start"], al_exon.alignment_info["sbjct_end"]
                    
                    internal_stat += coverage_constant * float(al_exon.alignment_info["identities"]) / len(ref_exon.sequence)
                    if internal_stat > 1:
                        print "Coverage cannot be larger than 1 (%s,%s,%s)" % (ref_protein_id, species, alignment_type)
                        print len(al_exon.alignment_info["sbjct_seq"]), len(ref_exon.sequence)
                        raise ValueError ("Coverage cannot be larger than 1 (%s,%s,%s,%s,%f)" % (ref_protein_id, species, alignment_type,ref_exon_id, internal_stat))
                #print "\t%1.2f" % ( float(al_exon.alignment_info["length"] - al_exon.alignment_info["gaps"]) / len(ref_exon.sequence))
            perc_list.append(internal_stat)
    return perc_list
def load_protein_configuration_batch(protein_id_list):
    '''
    Loads data from .descr files from all the proteins in the protein list
    @param protein_id_list: list of protein id's
    '''
    ref_species_dict    = FileUtilities.get_reference_species_dictionary()
    
    folders_loaded_cnt  = 0
    for protein_id in protein_id_list:
        if load_protein_configuration(protein_id, ref_species_dict) == True:
            folders_loaded_cnt += 1
    return folders_loaded_cnt
def load_exon_configuration_batch(protein_id_list, alignment_type):
    '''
    Loads exons for all the proteins in the protein list 
    for a particular alignment type
    '''
    ref_species_dict = FileUtilities.get_reference_species_dictionary()
   
    folders_loaded_cnt  = 0
    for protein_id in protein_id_list:
        if load_exon_configuration(protein_id, ref_species_dict, alignment_type) == True:
            folders_loaded_cnt += 1
    return folders_loaded_cnt
def load_exon_configuration (ref_protein_id, ref_species_dict, exon_type):
    '''
    Load exons of a particular type for all available species
    @param ref_protein_id: referent protein id
    @param exon_type: exon_type: ensembl, genewise, blatn, tblastn, sw_gene, sw_exon
    '''
    
    dc                  = DescriptionParser()
    exon_container      = ExonContainer.Instance()
    ens_exon_container  = EnsemblExonContainer.Instance()
    
    logger              = Logger.Instance()
    containers_logger   = logger.get_logger('containers')
    
    if exon_type == "ensembl" or exon_type == "genewise":
        if not check_status_file_no_alignment(ref_protein_id):
            containers_logger.info ("{0},exon_type:{1},check status file -> failed".format(ref_protein_id, exon_type))
            return False
    else:
        if not check_status_file(ref_protein_id):
            containers_logger.info ("{0},exon_type:{1},check status file -> failed".format(ref_protein_id, exon_type))
            return False
    
    if not ref_species_dict:
        ref_species_dict = FileUtilities.get_reference_species_dictionary()

    (known_species, abinitio_species) = dc.get_separated_species(ref_protein_id)
    
    for species in known_species:
         
        ref_species = ref_species_dict[species]
        if exon_type != "genewise":
            if exon_type == "ensembl":
                exons = EnsemblExons ((ref_protein_id, species), ref_species)
                try:
                    exon_dict = exons.load_exons()
                except Exception, e:
                    containers_logger.error("{0},{1},{2},error loading exons".format(ref_protein_id, species, exon_type))
                    continue
            else:
                exons = Exons((ref_protein_id, species), ref_species, exon_type)
            try:
                exon_dict = exons.load_exons()
            except Exception, e:
                    containers_logger.error("{0},{1},{2},error loading exons".format(ref_protein_id, species, exon_type))
                    continue
            if not exon_dict:
                continue
        
            if (exon_type != "ensembl"):
                exons.set_exon_ordinals()
            data_map_key = [ref_protein_id, species]
            exon_container.add(exon_type, data_map_key, exons)
def populate_exon_alignment_piece_table():
    dbm = DBManager.Instance()
    ec = ExonContainer.Instance()
    beac = BestExonAlignmentContainer.Instance()
    
    protein_id_list = FileUtilities.get_protein_list()
    species_list = FileUtilities.get_default_species_list()
    
    exon_aln_list = []
    for (ref_protein_id, exon_num) in protein_id_list:
        for species in species_list:
                try:
                    ref_exons = ec.get((ref_protein_id, 'Homo_sapiens', 'ensembl'))
                    for ref_exon in ref_exons.get_coding_exons():
                        best_exon_alignment = beac.get(ref_exon.exon_id, species)
                        if best_exon_alignment and best_exon_alignment.sw_gene_alignment:
                            for aln_piece in best_exon_alignment.sw_gene_alignment.alignment_pieces:
                                if aln_piece.type in ('coding', 'insertion'):
                                    exon_aln_list.append([ref_exon.exon_id, species, aln_piece])
                except KeyError, e:
                    print e
Пример #16
0
def populate_SW_gene_alignments(protein_list):
    for protein_id in protein_list:
        if FileUtilities.read_status_file(protein_id)['EXP_GENE_RETRIEVAL'] == 'FAILED' or FileUtilities.read_status_file(protein_id)['REF_SP_DB_FORMATTING'] == 'FAILED':
            print "ABORTING {0} SW_GENE: some resources have FAILED stats!".format(protein_id)
            FileUtilities.update_entry_in_status_file(protein_id, 'SW_GENE_ALIGNMENT', 'FAILED')
            continue
        try:
            if FileUtilities.read_status_file(protein_id)['SW_GENE_ALIGNMENT'] == 'OK':
                print "SKIPPING {0} SW_GENE: .status file -> OK!".format(protein_id)
                continue
        except KeyError:
            pass
        print "ALIGNING SW_GENE: {0}".format(protein_id)
        if Alignments.generate_SW_gene_alignments(protein_id):
            FileUtilities.update_entry_in_status_file(protein_id, 'SW_GENE_ALIGNMENT', 'OK')
        else:
            FileUtilities.update_entry_in_status_file(protein_id, 'SW_GENE_ALIGNMENT', 'PARTIAL') 
def annotate_spurious_alignments(exons_key):
    '''
    Annotates all the alignments which are not in the correct order.
    Annotation means their viability variable will be set to False.
    (Supporting the assumption that all exons are in the correct, sequential order)
    @param exons_key: (reference protein id, species)
    @param alignment_type: blastn, tblastn, sw_gene, sw_exon
    @return: updated alignment exons, None if something is wrong with
            the protein (meaning in the .status file)
    '''
    
    (ref_protein_id, 
     species, 
     alignment_type)            = exons_key
     
    print "Annotating spurious alignments %s,%s,%s" % (ref_protein_id, species, alignment_type)
     
    # if something is wrong with the protein, return
    if not check_status_file(ref_protein_id):
        return None
     
    exon_container              = ExonContainer.Instance()
    reference_species_dict      = FileUtilities.get_reference_species_dictionary()
    
    # load logging utilities
    logger                      = Logger.Instance()
    containers_logger           = logger.get_logger("containers")
    
    # get the reference exons: (ref_prot_id, ref_species, ensembl)
    reference_exons             = exon_container.get((ref_protein_id, 
                                              reference_species_dict[species], 
                                              "ensembl"))
    # try to get the exons which are the product of specified alignment
    try:
        alignment_exons = exon_container.get((ref_protein_id, species, alignment_type))
    except KeyError:
        containers_logger.error ("{0},{1},{2},No exons available for alignment".format(ref_protein_id, species, alignment_type))
        return None

    correct_order_exons     = _find_best_orderred_subset (alignment_exons,
                                                      reference_exons)
    updated_alignment_exons = _set_viabilities (alignment_exons, correct_order_exons)    
    # update the exon container to hold the new alignment exons 
    exon_container.update(exons_key, updated_alignment_exons)
    
    return updated_alignment_exons
Пример #18
0
def populate_referenced_species_databases(protein_list, referenced_species):
    for protein_id in protein_list:
        if not check_status_file_no_alignment(protein_id):
        #if FileUtilities.read_status_file(protein_id)['ENSEMBL_EXON_RETRIEVAL'] == 'FAILED':
            print "ABORTING {0} DATABASE FORMATTING: ENSEMBL_EXON_RETRIEVAL has a FAILED status!".format(protein_id)
            FileUtilities.update_entry_in_status_file(protein_id, 'REF_SP_DB_FORMATTING', 'FAILED')
            continue
        try:
            if FileUtilities.read_status_file(protein_id)['REF_SP_DB_FORMATTING'] == 'OK':
                print "SKIPPING {0} DATABASE FORMATTING: .status file -> OK!".format(protein_id)
                continue
        except KeyError:
            pass
        print "DATABASE FORMATTING: {0} for reference species {1}".format(protein_id, referenced_species)
        if Alignments.generate_referenced_species_database(protein_id, referenced_species):
            FileUtilities.update_entry_in_status_file(protein_id, 'REF_SP_DB_FORMATTING', 'OK')
        else:
            FileUtilities.update_entry_in_status_file(protein_id, 'REF_SP_DB_FORMATTING', 'FAILED') 
Пример #19
0
from utilities import FileUtilities


def get_frequency(instructions):
    frequency = 0
    while len(instructions) > 0:
        frequency_change = int(instructions.pop(0))
        frequency += frequency_change
    return frequency


if __name__ == "__main__":
    input_file_path = "puzzle.in"
    file_content = FileUtilities.get_sanitized_content_from_file(
        input_file_path)

    print(get_frequency(file_content))
def remove_overlapping_alignments (exons_key):
    (ref_protein_id, 
     species, 
     alignment_type)            = exons_key
    printin = False
    if printin: 
        print "Removing blastn overlaps (%s,%s,%s)..." % (ref_protein_id, species, alignment_type)

    if not check_status_file(ref_protein_id):
        return None
    
    exon_container              = ExonContainer.Instance()
    reference_species_dict      = FileUtilities.get_reference_species_dictionary()
    
    # load logging utilities
    logger                      = Logger.Instance()
    containers_logger           = logger.get_logger("containers")
    
    # get the reference exons: (ref_prot_id, ref_species, ensembl)
    reference_exons     = exon_container.get((ref_protein_id, 
                                              reference_species_dict[species], 
                                              "ensembl"))
    # try to get the exons which are the product of specified alignment
    try:
        alignment_exons = exon_container.get(exons_key)
    except KeyError:
        containers_logger.error ("{0},{1},{2}".format(ref_protein_id, species, alignment_type))
        return None
    
    for ref_exon_id in alignment_exons.alignment_exons:
        al_exons = alignment_exons.alignment_exons[ref_exon_id]
        if printin:
            print ref_exon_id
        toplevel_start = 0
        toplevel_stop = 0
        #for al_exon in sorted(al_exons, key = lambda al_exon: al_exon.get_fitness(), reverse = True):
        for al_exon in al_exons:
            
            exon_start = al_exon.alignment_info["sbjct_start"]
            exon_stop = exon_start + al_exon.alignment_info["length"]
            
            # if exon is already marked as not viable, just discard it
            if hasattr(al_exon, "viability"):
                if not al_exon.viability:
                    continue
                     
            
            if not toplevel_start:
                # if toplevel locations haven't been set, set them
                toplevel_start = exon_start
                toplevel_stop = exon_stop
                toplevel_exon = al_exon
                al_exon.set_viability(True)
                if printin:
                    print "First exon: %d - %d" % (exon_start, exon_stop)
                
            elif exon_start < toplevel_start and exon_stop > toplevel_stop:
                toplevel_exon.set_viability(False)
                toplevel_exon = al_exon
                toplevel_start = exon_start
                toplevel_stop = exon_stop
                al_exon.set_viability(True)
                if printin:
                    print "  New toplevel: %d - %d" % (exon_start, exon_stop)
                
            else:
                # what this wonderful if checks if one of the following cases:
                # if the exon is contained within the toplevel exon
                #          |----------------------|
                #               |------|
                # or the start is to the left of the toplevel, but they are still overlapping
                #      |----------|
                # or the end is to the right of the toplevel, but they are still overlapping
                #                        |--------------|
                if (exon_start >=toplevel_start and exon_stop <= toplevel_stop) or \
                (exon_start <= toplevel_start and (exon_stop >= toplevel_start and exon_stop <= toplevel_stop)) or \
                ((exon_start >= toplevel_start and exon_start <= toplevel_stop) and exon_stop >= toplevel_stop):
                    if printin:
                        print "   Bad exon: %d - %d" % (exon_start, exon_stop)
                    al_exon.set_viability(False)
                else:
                    if exon_start < toplevel_start:
                        toplevel_start = exon_start
                    if exon_stop > toplevel_stop:
                        toplevel_stop = exon_stop
                    if printin:
                        print "  Good exon: %d - %d" % (exon_start, exon_stop)
                        
    exon_container.update(exons_key, alignment_exons)