def __get_reference_gene_map(self):
        '''

        :return: dict
        '''
        ensembl_gene_ids = []
        for output_row in self.output_rows:
            ensembl_gene_ids.extend(output_row[2])
        ensembl_id_uniq = list(set(ensembl_gene_ids))
        exec_ensembl_perl = ExecuteEnsemblPerl()
        reference_gene_map = exec_ensembl_perl.get_ensembl_gene_id_ref_map(ensembl_id_uniq)
        return reference_gene_map
示例#2
0
class CollectVariantData:
    def __init__(self, rs_id_file):
        '''
        :param rs_id_file:
        :return: None
        '''
        self.rs_id_file = rs_id_file
        self.ensembl_perl = ExecuteEnsemblPerl(rs_id_file)
    def get_variants_in_ensembl_map(self):
        '''
        Uses the Perl Ensembl REST API to determine if input variants are in Ensembl.
        :return: dict
        '''
        return  self.ensembl_perl.get_variant_in_ensembl_map()
    def get_nearest_gene_map(self):
        '''
        Uses the PERL Ensembl API to get the nearest gene at the 5' end for each variant.
        :return: dict
        '''
        return self.ensembl_perl.get_nearest_gene_map()
    def __chunk_list(self, input_list, chunk_size=200):
        '''
        Breaks the input list into chunks. Used to limit the number of variants
        sent to the Ensembl REST VEP POST API call.
        :param chunk_size: int
        :return: generator
        '''
        sublist = []
        for element in input_list:
            sublist.append(element)
            if len(sublist) == chunk_size:
                yield sublist
                sublist = []
        if sublist:
            yield sublist
    def get_rest_api_vep_list(self):
        '''
        Uses the Ensembl REST VEP POST API to return a list of VEP JSONs.
        :return: list
        '''
        vep_outputs = []
        rs_ids = [rs_id.strip() for rs_id in open(self.rs_id_file, 'rt').read().split('\n')]
        for sublist in self.__chunk_list(rs_ids):
            vep_post = RsIdVepPost(sublist)
            vep_post_output = vep_post.get_vep_post_output()
            for entry in vep_post_output:
                vep_outputs.append(entry)
        return vep_outputs
示例#3
0
 def __init__(self, rs_id_file):
     '''
     :param rs_id_file:
     :return: None
     '''
     self.rs_id_file = rs_id_file
     self.ensembl_perl = ExecuteEnsemblPerl(rs_id_file)
        :param rank: int
        :return: dictionary
        '''
        # Sort a list of dictionaries numerically using the key "distance".
        genes = sorted(self._nearest_gene_list, key=lambda x: abs(int(x['distance'])))
        if len(genes) -1 < rank:
            return {}
        return genes[rank]
    def get_nearest_gene_map(self, rank=0):
        '''
        Return a dictionary with required gene ID and distance information.
        Default rank=0 chooses the nearest gene but can provide rank=1
        to get, for example, the next nearest gene.
        :param rank: int
        :return: dictionary
        '''
        nearest_gene_map = self.get_gene_map_for_rank(rank)
        return nearest_gene_map
        
if __name__ == '__main__':
    from execute_ensembl_perl import ExecuteEnsemblPerl
    test_file = './test_data/rs_id_list.txt'
    exec_ensembl_perl = ExecuteEnsemblPerl(test_file)
    nearest_gene_map = exec_ensembl_perl.get_nearest_gene_map()
    rs_id = 'rs1000113'
    nearest_gene5p = NearestGeneFivePrime(nearest_gene_map[rs_id])
    print rs_id + ': ',
    print nearest_gene5p.get_nearest_gene_map()