Пример #1
0
    def __init__(self, rs_id_file):
        '''

        :param rs_id_file: str
        :param output_tsv_file: str
        :return: None
        '''
        self.assigned_variants = AssignVariants(rs_id_file)
        self.tsv_file_header = ['rs_id',
                                'in_ensembl',
                                'ensembl_gene_ids',
                                'gene_symbols',
                                'so_term',
                                'distance']
        self.output_rows = self.__process_assigned_variants()
        self.reference_gene_map = self.__get_reference_gene_map()
Пример #2
0
class WriteGeneAssignments:
    def __init__(self, rs_id_file):
        '''

        :param rs_id_file: str
        :param output_tsv_file: str
        :return: None
        '''
        self.assigned_variants = AssignVariants(rs_id_file)
        self.tsv_file_header = ['rs_id',
                                'in_ensembl',
                                'ensembl_gene_ids',
                                'gene_symbols',
                                'so_term',
                                'distance']
        self.output_rows = self.__process_assigned_variants()
        self.reference_gene_map = self.__get_reference_gene_map()

    def __process_assigned_variants(self):
        '''

        :return: list
        '''

        assigned_variant_list = self.assigned_variants.get_assigned_variant_list()
        output_rows = []
        for row_map in assigned_variant_list:
            rs_id = row_map['variant_id']
            in_ensembl = row_map['is_in_ensembl?']
            if row_map['vep_associated_gene_ids']:
                so_term = row_map['most_severe_consequence']
                distance = 0
                ensembl_gene_ids = row_map['vep_associated_gene_ids']
            elif row_map['most_severe_consequence'] == 'regulatory_region_variant':
                so_term = 'nearest_gene_five_prime_end_reg'
                distance = row_map['nearest_ensembl_5p_distance']
                ensembl_gene_ids = [row_map['nearest_5p_ensembl_gene_id']]
            else:
                so_term = 'nearest_gene_five_prime_end'
                distance = row_map['nearest_ensembl_5p_distance']
                ensembl_gene_ids = [row_map['nearest_5p_ensembl_gene_id']]
            output_rows.append([rs_id, in_ensembl, ensembl_gene_ids,so_term, distance ])
        return output_rows

    def __get_reference_gene_map(self):
        '''

        :return: dict
        '''
        ensembl_gene_ids = []
        for output_row in self.output_rows:
            ensembl_gene_ids.extend(output_row[2])
        ensembl_id_uniq = list(set(ensembl_gene_ids))
        exec_ensembl_perl = ExecuteEnsemblPerl()
        reference_gene_map = exec_ensembl_perl.get_ensembl_gene_id_ref_map(ensembl_id_uniq)
        return reference_gene_map
    def write_output_to_file(self, path_to_output_file):
        '''

        :param path_to_file: string
        :return: None
        '''
        new_output_rows = []
        for output_row in self.output_rows:
            (rs_id, in_ensembl, ensembl_gene_ids, so_term, distance) = output_row
            #    (output_row[0], output_row[1], output_row[2], output_row[3])
            gene_symbols = []
            ensembl_gene_id_references = []
            for ensembl_gene_id in ensembl_gene_ids:
                try:
                    ref_gene_map = self.reference_gene_map[ensembl_gene_id]
                except KeyError:
                    continue
                gene_symbol = ref_gene_map['external_name']
                ensembl_gene_id_reference = ref_gene_map.get('ensembl_gene_id_for_reference', None)
                gene_symbols.append(gene_symbol)
                ensembl_gene_id_references.append(ensembl_gene_id_reference)
            new_output_row = [rs_id, in_ensembl, ','.join(ensembl_gene_id_references), ','.join(gene_symbols), so_term, distance]
            new_output_rows.append(new_output_row)
        with open(path_to_output_file, 'w') as fho:
            for data_row in new_output_rows:
                fho.write('\t'.join([str(element) for element in data_row]) + '\n')