Exemplo n.º 1
0
 def get_rsat_organism(self, kegg_organism):
     """returns the HTML page for the directory listing"""
     logging.debug('RSAT - get_directory()')
     cache_file = "/".join([self.cache_dir, 'rsat_dir.html'])
     text = util.read_url_cached("/".join([self.base_url,
                                           RsatDatabase.DIR_PATH]),
                                 cache_file)
     suggestion1 = util.best_matching_links(self.kegg_species, text)[0].rstrip('/')
     suggestion2 = util.best_matching_links(kegg_organism, text)[0].rstrip('/')
     if suggestion1 != suggestion2:
         ncbi_code1 = self.__get_ncbi_code(suggestion1)
         ncbi_code2 = self.__get_ncbi_code(suggestion2)
         if str(ncbi_code1) == str(self.ncbi_code):
             return suggestion1
         elif str(ncbi_code2) == str(self.ncbi_code):
             return suggestion2
         else:
             logging.warn("can't find the correct RSAT mapping !")
             return suggestion1
     else:
         ncbi_code = self.__get_ncbi_code(suggestion1)
         if str(ncbi_code) == str(self.ncbi_code):
             return suggestion1
         else:
             logging.warn("can't find the correct RSAT mapping !")
             return suggestion1
Exemplo n.º 2
0
 def get_rsat_organism(self, kegg_organism):
     """returns the HTML page for the directory listing"""
     logging.debug('RSAT - get_directory()')
     cache_file = "/".join([self.cache_dir, 'rsat_dir.html'])
     text = util.read_url_cached(
         "/".join([self.base_url, RsatDatabase.DIR_PATH]), cache_file)
     suggestion1 = util.best_matching_links(self.kegg_species,
                                            text)[0].rstrip('/')
     suggestion2 = util.best_matching_links(kegg_organism,
                                            text)[0].rstrip('/')
     if suggestion1 != suggestion2:
         ncbi_code1 = self.__get_ncbi_code(suggestion1)
         ncbi_code2 = self.__get_ncbi_code(suggestion2)
         if str(ncbi_code1) == str(self.ncbi_code):
             return suggestion1
         elif str(ncbi_code2) == str(self.ncbi_code):
             return suggestion2
         else:
             logging.warn("can't find the correct RSAT mapping !")
             return suggestion1
     else:
         ncbi_code = self.__get_ncbi_code(suggestion1)
         if str(ncbi_code) == str(self.ncbi_code):
             return suggestion1
         else:
             logging.warn("can't find the correct RSAT mapping !")
             return suggestion1
Exemplo n.º 3
0
 def get_rsat_organism(self, kegg_organism):
     """returns the HTML page for the directory listing"""
     logging.info('RSAT - get_directory()')
     cache_file = "/".join([self.cache_dir, 'rsat_dir.html'])
     text = util.read_url_cached(
         "/".join([self.base_url, RsatDatabase.DIR_PATH]), cache_file)
     return util.best_matching_links(kegg_organism, text)[0].rstrip('/')
Exemplo n.º 4
0
 def get_rsat_organism(self, kegg_organism):
     """returns the HTML page for the directory listing"""
     logging.info('RSAT - get_directory()')
     cache_file = "/".join([self.cache_dir, 'rsat_dir.html'])
     text = util.read_url_cached("/".join([self.base_url,
                                           RsatDatabase.DIR_PATH]),
                                 cache_file)
     return util.best_matching_links(kegg_organism, text)[0].rstrip('/')
Exemplo n.º 5
0
 def mapper_fun(kegg_organism):
     """Mapper function to return basic information about an organism
     stored in the RSAT database. Only the genes in gene_names will
     be considered in the construction"""
     rsat_organism = util.best_matching_links(
         kegg_organism,
         rsatdb.get_directory())[0].rstrip('/')
     return RsatSpeciesInfo(rsatdb, rsat_organism,
                            is_eukaryote(rsat_organism),
                            get_taxonomy_id(rsat_organism))
Exemplo n.º 6
0
 def mapper_fun(kegg_organism):
     """Mapper function to return basic information about an organism
     stored in the RSAT database. Only the genes in gene_names will
     be considered in the construction"""
     # in many cases, the fuzzy match delivers the correct RSAT organism
     # name, but there are exceptions
     if kegg_organism in patches.KEGG_EXCEPTIONS:
         kegg_organism = patches.KEGG_EXCEPTIONS[kegg_organism]
     rsat_organism = util.best_matching_links(
         kegg_organism,
         rsatdb.get_directory())[0].rstrip('/')
     print "mapper_fun(), kegg org = '%s', rsat org = '%s'" % (kegg_organism, rsat_organism)
     return RsatSpeciesInfo(rsatdb, rsat_organism,
                            is_eukaryote(rsat_organism),
                            get_taxonomy_id(rsat_organism))
Exemplo n.º 7
0
 def test_best_rsat_matches(self):
     """test the best_matching_links function"""
     with open(RSAT_LIST_FILE_PATH) as inputfile:
         html = inputfile.read()
     matches = util.best_matching_links('Halobacterium', html)
     self.assertEquals("Halobacterium_sp/", matches[0])