def __init__(self, graph_type, are_bnodes_skolemized, data_release_version=None): super().__init__( graph_type=graph_type, are_bnodes_skolemized=are_bnodes_skolemized, data_release_version=data_release_version, name='omia', ingest_title='Online Mendelian Inheritance in Animals', ingest_url='https://omia.org', ingest_logo='source-omia.png', # ingest_desc=None, license_url=None, data_rights='http://sydney.edu.au/disclaimer.shtml', # file_handle=None ) self.id_hash = { 'article': {}, 'phene': {}, 'breed': {}, 'taxon': {}, 'gene': {} } self.label_hash = {} # used to store the omia to omim phene mappings self.omia_omim_map = {} # used to store the unique genes that have phenes # (for fetching orthology) self.annotated_genes = set() self.test_ids = { 'disease': [ 'OMIA:001702', 'OMIA:001867', 'OMIA:000478', 'OMIA:000201', 'OMIA:000810', 'OMIA:001400' ], 'gene': [ '492297', '434', '492296', '3430235', '200685834', '394659996', '200685845', '28713538', '291822383' ], 'taxon': [ '9691', '9685', '9606', '9615', '9913', '93934', '37029', '9627', '9825' ], # to be filled in during parsing of breed table # for lookup by breed-associations 'breed': [] } # to store a map of omia ids and any molecular info # to write a report for curation self.stored_omia_mol_gen = {} self.graph = self.graph self.ncbi = NCBIGene(self.graph_type, self.are_bnodes_skized)
def fetch(self, is_dl_forced=False): """ :param is_dl_forced: :return: """ self.get_files(is_dl_forced) ncbi = NCBIGene(self.graph_type, self.are_bnodes_skized) # ncbi.fetch() gene_group = ncbi.files['gene_group'] self.fetch_from_url(gene_group['url'], '/'.join( (ncbi.rawdir, gene_group['file'])), False)
def fetch(self, is_dl_forced=False): """ :param is_dl_forced: :return: """ self.get_files(is_dl_forced) ncbi = NCBIGene(self.graph_type, self.are_bnodes_skized) # ncbi.fetch() gene_group = ncbi.files['gene_group'] self.fetch_from_url(gene_group['url'], '/'.join( (ncbi.rawdir, gene_group['file'])), False) # load and tag a list of OMIM IDs with types # side effect of populating omim replaced self.omim_type = self.find_omim_type() return
def parse(self, limit=None): # names of tables to iterate - probably don't need all these: # Article_Breed, Article_Keyword, Article_Gene, Article_Keyword, # Article_People, Article_Phene, Articles, Breed, Breed_Phene, # Genes_gb, Group_Categories, Group_MPO, Inherit_Type, Keywords, # Landmark, Lida_Links, OMIA_Group, OMIA_author, Omim_Xref, People, # Phene, Phene_Gene, Publishers, Resources, Species_gb, Synonyms self.scrub() if limit is not None: logger.info("Only parsing first %d rows", limit) logger.info("Parsing files...") if self.testOnly: self.testMode = True if self.testMode: self.g = self.testgraph else: self.g = self.graph # we do three passes through the file # first process species (two others reference this one) self.process_species(limit) # then, process the breeds, genes, articles, and other static stuff self.process_classes(limit) # next process the association data self.process_associations(limit) # process the vertebrate orthology for genes # that are annotated with phenotypes ncbi = NCBIGene(self.graph_type, self.are_bnodes_skized) ncbi.add_orthologs_by_gene_group(self.g, self.annotated_genes) logger.info("Done parsing.") self.write_molgen_report() return
def setUp(self): self.source = NCBIGene('rdf_graph', True) self.source.settestonly(True) self._setDirToSource() return
def setUp(self): self.source = NCBIGene('rdf_graph', True) self.source.test_ids = self._get_conf()['test_ids']['gene'] self.source.settestonly(True) self._setDirToSource() return