示例#1
0
    def __init__(self,
                 graph_type,
                 are_bnodes_skolemized,
                 data_release_version=None):
        super().__init__(
            graph_type=graph_type,
            are_bnodes_skolemized=are_bnodes_skolemized,
            data_release_version=data_release_version,
            name='omia',
            ingest_title='Online Mendelian Inheritance in Animals',
            ingest_url='https://omia.org',
            ingest_logo='source-omia.png',
            # ingest_desc=None,
            license_url=None,
            data_rights='http://sydney.edu.au/disclaimer.shtml',
            # file_handle=None
        )

        self.id_hash = {
            'article': {},
            'phene': {},
            'breed': {},
            'taxon': {},
            'gene': {}
        }
        self.label_hash = {}
        # used to store the omia to omim phene mappings
        self.omia_omim_map = {}
        # used to store the unique genes that have phenes
        # (for fetching orthology)
        self.annotated_genes = set()

        self.test_ids = {
            'disease': [
                'OMIA:001702', 'OMIA:001867', 'OMIA:000478', 'OMIA:000201',
                'OMIA:000810', 'OMIA:001400'
            ],
            'gene': [
                '492297', '434', '492296', '3430235', '200685834', '394659996',
                '200685845', '28713538', '291822383'
            ],
            'taxon': [
                '9691', '9685', '9606', '9615', '9913', '93934', '37029',
                '9627', '9825'
            ],
            # to be filled in during parsing of breed table
            # for lookup by breed-associations
            'breed': []
        }
        # to store a map of omia ids and any molecular info
        # to write a report for curation
        self.stored_omia_mol_gen = {}
        self.graph = self.graph
        self.ncbi = NCBIGene(self.graph_type, self.are_bnodes_skized)
示例#2
0
文件: OMIA.py 项目: sgml/dipper
    def fetch(self, is_dl_forced=False):
        """
        :param is_dl_forced:
        :return:
        """
        self.get_files(is_dl_forced)

        ncbi = NCBIGene(self.graph_type, self.are_bnodes_skized)
        # ncbi.fetch()
        gene_group = ncbi.files['gene_group']
        self.fetch_from_url(gene_group['url'], '/'.join(
            (ncbi.rawdir, gene_group['file'])), False)
示例#3
0
    def fetch(self, is_dl_forced=False):
        """
        :param is_dl_forced:
        :return:
        """
        self.get_files(is_dl_forced)

        ncbi = NCBIGene(self.graph_type, self.are_bnodes_skized)
        # ncbi.fetch()
        gene_group = ncbi.files['gene_group']
        self.fetch_from_url(gene_group['url'], '/'.join(
            (ncbi.rawdir, gene_group['file'])), False)

        # load and tag a list of OMIM IDs with types
        # side effect of populating omim replaced
        self.omim_type = self.find_omim_type()

        return
示例#4
0
    def parse(self, limit=None):
        # names of tables to iterate - probably don't need all these:
        # Article_Breed, Article_Keyword, Article_Gene, Article_Keyword,
        # Article_People, Article_Phene, Articles, Breed, Breed_Phene,
        # Genes_gb, Group_Categories, Group_MPO, Inherit_Type, Keywords,
        # Landmark, Lida_Links, OMIA_Group, OMIA_author, Omim_Xref, People,
        # Phene, Phene_Gene, Publishers, Resources, Species_gb, Synonyms

        self.scrub()

        if limit is not None:
            logger.info("Only parsing first %d rows", limit)

        logger.info("Parsing files...")

        if self.testOnly:
            self.testMode = True

        if self.testMode:
            self.g = self.testgraph
        else:
            self.g = self.graph

        # we do three passes through the file
        # first process species (two others reference this one)
        self.process_species(limit)

        # then, process the breeds, genes, articles, and other static stuff
        self.process_classes(limit)

        # next process the association data
        self.process_associations(limit)

        # process the vertebrate orthology for genes
        # that are annotated with phenotypes
        ncbi = NCBIGene(self.graph_type, self.are_bnodes_skized)
        ncbi.add_orthologs_by_gene_group(self.g, self.annotated_genes)

        logger.info("Done parsing.")

        self.write_molgen_report()

        return
示例#5
0
文件: test_ncbi.py 项目: sgml/dipper
 def setUp(self):
     self.source = NCBIGene('rdf_graph', True)
     self.source.settestonly(True)
     self._setDirToSource()
     return
示例#6
0
 def setUp(self):
     self.source = NCBIGene('rdf_graph', True)
     self.source.test_ids = self._get_conf()['test_ids']['gene']
     self.source.settestonly(True)
     self._setDirToSource()
     return