Python Source示例，dipper.sources.Source.Source Python示例

示例#1

0

显示文件

文件： Ensembl.py 项目： JervenBolleman/dipper

    def __init__(self, tax_ids=None, gene_ids=None):
        Source.__init__(self, 'ensembl')

        self.tax_ids = tax_ids
        self.gene_ids = gene_ids
        self.load_bindings()

        self.dataset = Dataset(
            'ensembl', 'ENSEMBL', 'http://www.ensembl.org', None)

        # Defaults
        if self.tax_ids is None:
            self.tax_ids = [9606, 10090, 7955]

        self.gene_ids = []
        if 'test_ids' not in config.get_config() \
                or 'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.gene_ids = config.get_config()['test_ids']['gene']

        self.properties = Feature.properties

        logger.setLevel(logging.INFO)

        return

示例#2

0

显示文件

文件： ClinVar.py 项目： d3borah/dipper

    def __init__(self, tax_ids=None, gene_ids=None):
        Source.__init__(self, 'clinvar')

        self.tax_ids = tax_ids
        self.gene_ids = gene_ids
        self.filter = 'taxids'
        self.load_bindings()

        self.dataset = Dataset('ClinVar', 'National Center for Biotechnology Information', 
                               'http://www.ncbi.nlm.nih.gov/clinvar/', None,
                               'http://www.ncbi.nlm.nih.gov/About/disclaimer.html',
                               'https://creativecommons.org/publicdomain/mark/1.0/')

        if 'test_ids' not in config.get_config() or 'gene' not in config.get_config()['test_ids']:
            logger.warn("not configured with gene test ids.")
        else:
            self.gene_ids = config.get_config()['test_ids']['gene']

        if 'test_ids' not in config.get_config() or 'disease' not in config.get_config()['test_ids']:
            logger.warn("not configured with disease test ids.")
        else:
            self.disease_ids = config.get_config()['test_ids']['disease']

        self.properties = Feature.properties

        return

示例#3

0

显示文件

文件： MPD.py 项目： alexgarciac/dipper

    def __init__(self, graph_type, are_bnodes_skolemized):
        Source.__init__(
            self,
            graph_type,
            are_bnodes_skolemized,
            'mpd',
            ingest_title='Mouse Phenome Database',
            ingest_url='https://phenome.jax.org/',
            # license_url=None,
            data_rights='https://phenome.jax.org/about/termsofuse'
            # file_handle=None
        )

        # @N, not sure if this step is required
        self.stdevthreshold = 2

        # TODO add a citation for mpd dataset as a whole
        self.dataset.set_citation('PMID:15619963')

        self.assayhash = {}
        self.idlabel_hash = {}
        # to store the mean/zscore of each measure by strain+sex
        self.score_means_by_measure = {}
        # to store the mean value for each measure by strain+sex
        self.strain_scores_by_measure = {}

        return

示例#4

0

显示文件

文件： GeneOntology.py 项目： JervenBolleman/dipper

    def __init__(self, tax_ids=None):
        Source.__init__(self, 'go')

        # Defaults
        self.tax_ids = tax_ids
        if self.tax_ids is None:
            self.tax_ids = [9606, 10090, 7955]
            logger.info("No taxa set.  Defaulting to %s", str(tax_ids))
        else:
            logger.info("Filtering on the following taxa: %s", str(tax_ids))

        # update the dataset object with details about this resource
        # NO LICENSE for this resource
        self.dataset = Dataset(
            'go', 'GeneOntology', 'http://www.geneontology.org', None,
            "https://creativecommons.org/licenses/by/4.0/legalcode",
            'http://geneontology.org/page/use-and-license')

        if 'test_ids' not in config.get_config() or \
                'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.test_ids = config.get_config()['test_ids']['gene']

        return

示例#5

0

显示文件

文件： CTD.py 项目： JervenBolleman/dipper

    def __init__(self):
        Source.__init__(self, 'ctd')
        self.dataset = Dataset(
            'ctd', 'CTD', 'http://ctdbase.org', None,
            'http://ctdbase.org/about/legal.jsp')

        if 'test_ids' not in config.get_config() \
                or 'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
            self.test_geneids = []
        else:
            self.test_geneids = config.get_config()['test_ids']['gene']

        if 'test_ids' not in config.get_config() \
                or 'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")
            self.test_diseaseids = []
        else:
            self.test_diseaseids = config.get_config()['test_ids']['disease']

        self.gu = GraphUtils(curie_map.get())
        self.g = self.graph
        self.geno = Genotype(self.g)

        return

示例#6

0

显示文件

文件： NCBIGene.py 项目： d3borah/dipper

    def __init__(self, tax_ids=None, gene_ids=None):
        Source.__init__(self, 'ncbigene')

        self.tax_ids = tax_ids
        self.gene_ids = gene_ids
        self.filter = 'taxids'
        self.load_bindings()

        self.dataset = Dataset('ncbigene', 'National Center for Biotechnology Information',
                               'http://ncbi.nih.nlm.gov/gene', None,
                               'http://www.ncbi.nlm.nih.gov/About/disclaimer.html',
                               'https://creativecommons.org/publicdomain/mark/1.0/')
        # data-source specific warnings (will be removed when issues are cleared)

        # Defaults
        if self.tax_ids is None:
            self.tax_ids = [9606, 10090, 7955]
            logger.info("No taxa set.  Defaulting to %s", str(tax_ids))
        else:
            logger.info("Filtering on the following taxa: %s", str(tax_ids))

        self.gene_ids = []
        if 'test_ids' not in config.get_config() or 'gene' not in config.get_config()['test_ids']:
            logger.warn("not configured with gene test ids.")
        else:
            self.gene_ids = config.get_config()['test_ids']['gene']

        self.properties = Feature.properties

        return

示例#7

0

显示文件

文件： HPOAnnotations.py 项目： JervenBolleman/dipper

    def __init__(self):
        Source.__init__(self, 'hpoa')

        self.load_bindings()

        self.dataset = Dataset(
            'hpoa', 'Human Phenotype Ontology',
            'http://www.human-phenotype-ontology.org', None,
            'http://www.human-phenotype-ontology.org/contao/index.php/legal-issues.html')

        self.replaced_id_count = 0

        if 'test_ids' not in config.get_config()\
                or 'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")
            self.test_ids = []
        else:
            self.test_ids = config.get_config()['test_ids']['disease']

        # data-source specific warnings to be removed when issues are cleared
        logger.warning(
            "note that some ECO classes are missing for ICE, PCS, and ITM;" +
            " using temporary mappings.")

        return

示例#8

0

显示文件

文件： MPD.py 项目： JervenBolleman/dipper

    def __init__(self):
        Source.__init__(self, 'mpd')
        # @N, not sure if this step is required
        self.namespaces.update(curie_map.get())
        self.stdevthreshold = 2

        self.nobnodes = True  # FIXME

        # update the dataset object with details about this resource
        # @N: Note that there is no license as far as I can tell
        self.dataset = Dataset(
            'mpd', 'MPD', 'http://phenome.jax.org', None, None)

        # TODO add a citation for mpd dataset as a whole
        self.dataset.set_citation('PMID:15619963')

        self.assayhash = {}
        self.idlabel_hash = {}
        # to store the mean/zscore of each measure by strain+sex
        self.score_means_by_measure = {}
        # to store the mean value for each measure by strain+sex
        self.strain_scores_by_measure = {}

        self.geno = Genotype(self.graph)
        self.gu = GraphUtils(curie_map.get())

        return

示例#9

0

显示文件

文件： MPD.py 项目： TomConlin/dipper

    def __init__(self, graph_type, are_bnodes_skolemized):
        Source.__init__(
            self,
            graph_type,
            are_bnodes_skolemized,
            'mpd',
            ingest_title='Mouse Phenome Database',
            ingest_url='https://phenome.jax.org/',
            # license_url=None,
            data_rights='https://phenome.jax.org/about/termsofuse'
            # file_handle=None
        )

        # @N, not sure if this step is required
        self.stdevthreshold = 2

        # TODO add a citation for mpd dataset as a whole
        self.dataset.set_citation('PMID:15619963')

        self.assayhash = {}
        self.idlabel_hash = {}
        # to store the mean/zscore of each measure by strain+sex
        self.score_means_by_measure = {}
        # to store the mean value for each measure by strain+sex
        self.strain_scores_by_measure = {}

        return

示例#10

0

显示文件

文件： MMRRC.py 项目： JervenBolleman/dipper

    def __init__(self):
        Source.__init__(self, 'mmrrc')
        self.strain_hash = {}
        self.id_label_hash = {}
        self.load_bindings()
        self.dataset = Dataset(
            'mmrrc', 'Mutant Mouse Regional Resource Centers',
            'https://www.mmrrc.org', None,
            'https://www.mmrrc.org/about/data_download.php')

        return

示例#11

0

显示文件

文件： WormBase.py 项目： JervenBolleman/dipper

    def __init__(self):
        Source.__init__(self, 'wormbase')

        # update the dataset object with details about this resource
        # NO LICENSE for this resource
        self.dataset = Dataset(
            'wormbase', 'WormBase', 'http://www.wormbase.org', None, None,
            'http://www.wormbase.org/about/policies#012')

        self.version_num = None
        return

示例#12

0

显示文件

文件： IMPC.py 项目： d3borah/dipper

    def __init__(self):
        Source.__init__(self, 'impc')

        # update the dataset object with details about this resource
        self.dataset = Dataset('impc', 'IMPC', 'http://www.mousephenotype.org', None,
                               'https://raw.githubusercontent.com/mpi2/PhenotypeArchive/master/LICENSE')

        # TODO add a citation for impc dataset as a whole
        # :impc cito:citesAsAuthority PMID:24194600

        return

示例#13

0

显示文件

文件： AnimalQTLdb.py 项目： d3borah/dipper

    def __init__(self):
        Source.__init__(self, 'animalqtldb')

        # update the dataset object with details about this resource
        self.dataset = Dataset('animalqtldb', 'Animal QTL db',
                               'http://www.animalgenome.org/cgi-bin/QTLdb/index', None, None,
                               'http://www.animalgenome.org/QTLdb/faq#23')

        # source-specific warnings.  will be cleared when resolved.
        logger.warn("No licences or rights exist for the raw data from this resource.")

        return

示例#14

0

显示文件

    def _get_curie_and_type_from_id(variant_id):
        """
        Given a variant id, our best guess at its curie and type (snp, haplotype, etc)
        'None' will be used for both curie and type  for IDs that we can't process

        # 2019-May three snp-id have  ' e' or ' a'  appended. note space.
        # examples: 'rs2440154 e-A'  and 'rs2440154 e'
        # including the suffix in the url is a web noop but breaks rdflib

        :param variant_id:
        :return:
        """
        curie = None
        variant_type = None

        # remove space before hyphens
        variant_id = re.sub(r' -', '-', variant_id).strip()
        if re.search(r' x ', variant_id) or re.search(r',', variant_id):
            # TODO deal with rs1234 x rs234... (haplotypes?)
            LOG.warning("Cannot parse variant groups of this format: %s",
                        variant_id)
        elif re.search(r';', variant_id):
            curie = ':haplotype_' + Source.hash_id(
                variant_id)  # deliberate 404
            variant_type = "haplotype"
        elif variant_id[:2] == 'rs':
            # remove whitespace from errant id, rs6194 5053-?
            curie = 'dbSNP:' + variant_id.split('-')[0].replace(' ', '')
            # curie = re.sub(r'-.*$', '', curie).strip()
            variant_type = "snp"
            # remove the alteration
        elif variant_id[:3] == 'kgp':
            # http://www.1000genomes.org/faq/what-are-kgp-identifiers
            curie = 'GWAS:' + variant_id.split('-')[0]
            variant_type = "snp"
        elif variant_id[:3] == 'chr':
            # like: chr10:106180121-G
            variant_id = re.sub(r'-?', '-N', variant_id)
            variant_id = re.sub(r' ', '', variant_id)
            # going to hate myself but ...
            # moving this from a broken base node to yet another blank node
            # It had produced this monstrocity with the embedded quote
            # :gwas--Nc-Nh-Nr-N1-N1-N--N1-N0-N2-N7-N5-N1-N1-N0-N2-N"-N?-N
            curie = Source.make_id('gwas-' + re.sub(r':', '-', variant_id),
                                   '_')
            variant_type = "snp"
        elif variant_id.strip() == '':
            pass
        else:
            LOG.warning("There's a snp id i can't manage: %s", variant_id)

        return curie, variant_type

示例#15

0

显示文件

文件： EOM.py 项目： d3borah/dipper

    def __init__(self):
        Source.__init__(self, 'eom')
        self.namespaces.update(curie_map.get())

        # update the dataset object with details about this resource
        # TODO put this into a conf file?
        self.dataset = Dataset('eom', 'EOM', 'http://elementsofmorphology.nih.gov', None, 
                               'http://www.genome.gov/copyright.cfm',
                               'https://creativecommons.org/publicdomain/mark/1.0/')

        # check if config exists; if it doesn't, error out and let user know
        if 'dbauth' not in config.get_config() or 'disco' not in config.get_config()['dbauth']:
            logger.error("not configured with PG user/password.")

        # source-specific warnings.  will be cleared when resolved.

        return

示例#16

0

显示文件

文件： SGD.py 项目： DoctorBud/dipper

    def __init__(self, graph_type, are_bnodes_skolemized):
        super().__init__(graph_type, are_bnodes_skolemized, 'sgd')
        self.dataset = Dataset(
            'sgd', 'SGD', 'https://www.yeastgenome.org/', None,
            None)

        self.global_terms = Source.open_and_parse_yaml('../../translationtable/global_terms.yaml')
        self.apo_term_id = SGD.make_apo_map()

示例#17

0

显示文件

文件： Coriell.py 项目： d3borah/dipper

    def __init__(self):
        Source.__init__(self, 'coriell')

        self.load_bindings()

        self.dataset = Dataset('coriell', 'Coriell', 'http://ccr.coriell.org/', None)

        # data-source specific warnings (will be removed when issues are cleared)

        logger.warn('We assume that if a species is not provided, that it is a Human-derived cell line')
        logger.warn('We map all omim ids as a disease/phenotype entity, but should be fixed in the future')

        # check if config exists; if it doesn't, error out and let user know
        if 'dbauth' not in config.get_config() or 'coriell' not in config.get_config()['dbauth']:
            logger.error("not configured with FTP user/password.")

        return

示例#18

0

显示文件

文件： Orphanet.py 项目： JervenBolleman/dipper

    def __init__(self):
        Source.__init__(self, 'orphanet')

        self.load_bindings()

        self.dataset = Dataset(
            'orphanet', 'Orphanet', 'http://www.orpha.net', None,
            'http://creativecommons.org/licenses/by-nd/3.0/',
            'http://omim.org/help/agreement')

        # check to see if there's any ids configured in the config;
        # otherwise, warn
        if 'test_ids' not in config.get_config() or \
                'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")

        return

示例#19

0

显示文件

    def __init__(self, graph_type, are_bnodes_skolemized):
        super().__init__(graph_type, are_bnodes_skolemized, 'sgd')
        self.dataset = Dataset('sgd', 'SGD', 'https://www.yeastgenome.org/',
                               None, None)

        self.global_terms = Source.open_and_parse_yaml(
            '../../translationtable/global_terms.yaml')
        self.apo_term_id = SGD.make_apo_map()

示例#20

0

显示文件

文件： HGNC.py 项目： d3borah/dipper

    def __init__(self, tax_ids=None, gene_ids=None):
        Source.__init__(self, 'hgnc')

        self.tax_ids = tax_ids
        self.gene_ids = gene_ids
        self.load_bindings()

        self.dataset = Dataset('hgnc', 'HGNC', 'http://www.genenames.org', None)

        self.gene_ids = []
        if 'test_ids' not in config.get_config() or 'gene' not in config.get_config()['test_ids']:
            logger.warn("not configured with gene test ids.")
        else:
            self.gene_ids = config.get_config()['test_ids']['gene']

        self.properties = Feature.properties

        return

示例#21

0

显示文件

文件： GWASCatalog.py 项目： JervenBolleman/dipper

    def __init__(self):
        Source.__init__(self, 'gwascatalog')

        self.load_bindings()

        self.dataset = Dataset(
            'gwascatalog', 'GWAS Catalog', 'http://www.ebi.ac.uk/gwas/',
            'The NHGRI-EBI Catalog of published genome-wide association studies',
            'http://creativecommons.org/licenses/by/3.0/', None)
        # 'http://www.ebi.ac.uk/gwas/docs/about'  # TODO add this

        if 'test_ids' not in config.get_config() or \
                'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.test_ids = config.get_config()['test_ids']

        return

示例#22

0

显示文件

文件： MyChem.py 项目： DoctorBud/dipper

    def __init__(self, graph_type, are_bnodes_skolemized):
        super().__init__(graph_type, are_bnodes_skolemized, 'mychem')
        self.dataset = Dataset(
            'mychem', 'MYCHEM', 'https://mychem.info/', None,
            None)

        self.global_terms = Source.open_and_parse_yaml('../../translationtable/global_terms.yaml')
        self.inchikeys = MyChem.chunks(l=MyChem.get_inchikeys(), n=10)
        self.drugbank_targets = list()
        self.drugcentral_interactors = list()

示例#23

0

显示文件

文件： Orphanet.py 项目： d3borah/dipper

    def __init__(self):
        Source.__init__(self, "orphanet")

        self.load_bindings()

        self.dataset = Dataset(
            "orphanet",
            "Orphanet",
            "http://www.orpha.net",
            None,
            "http://creativecommons.org/licenses/by-nd/3.0/",
            "http://omim.org/help/agreement",
        )

        # check to see if there's any ids configured in the config; otherwise, warn
        if "test_ids" not in config.get_config() or "disease" not in config.get_config()["test_ids"]:
            logger.warn("not configured with disease test ids.")

        return

示例#24

0

显示文件

文件： KEGG.py 项目： d3borah/dipper

    def __init__(self):
        Source.__init__(self, 'kegg')

        # update the dataset object with details about this resource
        self.dataset = Dataset('kegg', 'KEGG', 'http://www.genome.jp/kegg/', None, None,
                               'http://www.kegg.jp/kegg/legal.html')

        # source-specific warnings.  will be cleared when resolved.
        # check to see if there's any ids configured in the config; otherwise, warn
        if 'test_ids' not in config.get_config() or 'disease' not in config.get_config()['test_ids']:
            logger.warn("not configured with disease test ids.")
        else:
            self.test_ids['disease'] += config.get_config()['test_ids']['disease']

        self.label_hash = {}
        self.omim_disease_hash = {}  # to hold the mappings of omim:kegg ids
        self.kegg_disease_hash = {}  # to hold the mappings of kegg:omim ids

        return

示例#25

0

显示文件

文件： OMIA.py 项目： JervenBolleman/dipper

    def __init__(self):
        Source.__init__(self, 'omia')

        self.load_bindings()

        self.dataset = Dataset(
            'omia', 'Online Mendelian Inheritance in Animals',
            'http://omia.angis.org.au', None, None,
            'http://sydney.edu.au/disclaimer.shtml')

        self.id_hash = {
            'article': {},
            'phene': {},
            'breed': {},
            'taxon': {},
            'gene': {}
        }
        self.label_hash = {}
        self.gu = GraphUtils(curie_map.get())
        # used to store the omia to omim phene mappings
        self.omia_omim_map = {}
        # used to store the unique genes that have phenes
        # (for fetching orthology)
        self.annotated_genes = set()

        self.test_ids = {
            'disease': [
                'OMIA:001702', 'OMIA:001867', 'OMIA:000478', 'OMIA:000201',
                'OMIA:000810', 'OMIA:001400'],
            'gene': [
                492297, 434, 492296, 3430235, 200685834, 394659996, 200685845,
                28713538, 291822383],
            'taxon': [9691, 9685, 9606, 9615, 9913, 93934, 37029, 9627, 9825],
            # to be filled in during parsing of breed table
            # for lookup by breed-associations
            'breed': []
        }
        # to store a map of omia ids and any molecular info
        # to write a report for curation
        self.stored_omia_mol_gen = {}
        self.g = self.graph
        self.geno = Genotype(self.g)
        return

示例#26

0

显示文件

    def __init__(self, graph_type, are_bnodes_skolemized):
        super().__init__(graph_type, are_bnodes_skolemized, 'mychem')
        self.dataset = Dataset(
            'mychem', 'MYCHEM', 'https://mychem.info/', None,
            None)

        self.global_terms = Source.open_and_parse_yaml('../../translationtable/global_terms.yaml')
        self.inchikeys = MyChem.chunks(l=MyChem.get_inchikeys(), n=10)
        self.drugbank_targets = list()
        self.drugcentral_interactors = list()

示例#27

0

显示文件

文件： MPD.py 项目： DoctorBud/dipper

    def __init__(self, graph_type, are_bnodes_skolemized):
        Source.__init__(self, graph_type, are_bnodes_skolemized, 'mpd')
        # @N, not sure if this step is required
        self.stdevthreshold = 2

        # update the dataset object with details about this resource
        # @N: Note that there is no license as far as I can tell
        self.dataset = Dataset(
            'mpd', 'MPD', 'http://phenome.jax.org', None, None)

        # TODO add a citation for mpd dataset as a whole
        self.dataset.set_citation('PMID:15619963')

        self.assayhash = {}
        self.idlabel_hash = {}
        # to store the mean/zscore of each measure by strain+sex
        self.score_means_by_measure = {}
        # to store the mean value for each measure by strain+sex
        self.strain_scores_by_measure = {}

        return

示例#28

0

显示文件

文件： MPD.py 项目： putmantime/dipper

    def __init__(self, graph_type, are_bnodes_skolemized):
        Source.__init__(self, graph_type, are_bnodes_skolemized, 'mpd')
        # @N, not sure if this step is required
        self.stdevthreshold = 2

        # update the dataset object with details about this resource
        # @N: Note that there is no license as far as I can tell
        self.dataset = Dataset(
            'mpd', 'MPD', 'http://phenome.jax.org', None, None)

        # TODO add a citation for mpd dataset as a whole
        self.dataset.set_citation('PMID:15619963')

        self.assayhash = {}
        self.idlabel_hash = {}
        # to store the mean/zscore of each measure by strain+sex
        self.score_means_by_measure = {}
        # to store the mean value for each measure by strain+sex
        self.strain_scores_by_measure = {}

        return

示例#29

0

显示文件

文件： Decipher.py 项目： JervenBolleman/dipper

    def __init__(self):
        Source.__init__(self, 'decipher')

        self.load_bindings()

        self.dataset = Dataset(
            'decipher', 'Development Disorder Genotype – Phenotype Database',
            'https://decipher.sanger.ac.uk/', None,
            'https://decipher.sanger.ac.uk/legal')

        if 'test_ids' not in config.get_config() \
                or 'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")
            self.test_ids = []
        else:
            self.test_ids = config.get_config()['test_ids']['disease']

        self.gu = GraphUtils(curie_map.get())
        self.g = self.graph
        self.geno = Genotype(self.g)

        return

示例#30

0

显示文件

文件： OMIM.py 项目： d3borah/dipper

    def __init__(self):
        Source.__init__(self, 'omim')

        self.load_bindings()

        self.dataset = Dataset('omim', 'Online Mendelian Inheritance in Man', 'http://www.omim.org',
                               None, 'http://omim.org/help/agreement')

        # data-source specific warnings (will be removed when issues are cleared)

        # check if config exists; if it doesn't, error out and let user know
        if 'keys' not in config.get_config() and 'omim' not in config.get_config()['keys']:
            logger.error("not configured with API key.")

        # check to see if there's any ids configured in the config; otherwise, warn
        if 'test_ids' not in config.get_config() or 'disease' not in config.get_config()['test_ids']:
            logger.warn("not configured with disease test ids.")
        else:
            # select ony those test ids that are omim's.
            self.test_ids += [obj.replace('OMIM:', '') for obj in config.get_config()['test_ids']['disease']
                              if re.match('OMIM:', obj)]

        return

示例#31

0

显示文件

文件： GeneReviews.py 项目： JervenBolleman/dipper

    def __init__(self):
        Source.__init__(self, 'genereviews')

        self.load_bindings()

        self.dataset = Dataset(
            'genereviews', 'Gene Reviews', 'http://genereviews.org/',
            None, 'http://www.ncbi.nlm.nih.gov/books/NBK138602/')
        self.dataset.set_citation('GeneReviews:NBK1116')

        self.gu = GraphUtils(curie_map.get())

        self.book_ids = set()
        self.all_books = {}

        if 'test_ids' not in config.get_config() or\
                'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")
            self.test_ids = list()
        else:
            # select ony those test ids that are omim's.
            self.test_ids = config.get_config()['test_ids']['disease']

        return

示例#32

0

显示文件

文件： GWASCatalog.py 项目： lwinfree/dipper

    def _get_curie_and_type_from_id(variant_id):
        """
        Given a variant id, our best guess at its curie
        and type (snp, haplotype, etc)
        None will be used for both curie and type
        for IDs that we can't process
        :param variant_id:
        :return:
        """
        curie = None
        variant_type = None

        # remove space before hyphens
        variant_id = re.sub(r' -', '-', variant_id)
        if re.search(r' x ', variant_id) \
                or re.search(r',', variant_id):
            # TODO deal with rs1234 x rs234... (haplotypes?)
            logger.warning(
                "Cannot parse variant groups of this format: %s",
                variant_id)
        elif re.search(r';', variant_id):
            curie = ':haplotype_' + Source.hash_id(variant_id)
            variant_type = "haplotype"
        elif re.match(r'rs', variant_id):
            curie = 'dbSNP:' + variant_id.strip()
            curie = re.sub(r'-.*$', '', curie).strip()
            variant_type = "snp"
            # remove the alteration
        elif re.match(r'kgp', variant_id):
            # http://www.1000genomes.org/faq/what-are-kgp-identifiers
            curie = ':kgp-' + variant_id.strip()
            variant_type = "snp"
        elif re.match(r'chr', variant_id):
            # like: chr10:106180121-G
            #
            variant_id = re.sub(r'-?', '-N', variant_id)
            variant_id = re.sub(r' ', '', variant_id)
            curie = ':gwas-' + re.sub(
                r':', '-', variant_id.strip())
            variant_type = "snp"
        elif variant_id.strip() == '':
            pass
        else:
            logger.warning(
                "There's a snp id i can't manage: %s",
                variant_id)

        return curie, variant_type

示例#33

0

显示文件

文件： WormBase.py 项目： tegar9000/dipper-1

 def make_reagent_targeted_gene_id(gene_id, reagent_id):
     return Source.make_id('-'.join((gene_id, reagent_id)), '_')

示例#34

0

显示文件

    def __init__(self, graph_type, are_bnodes_skolemized):
        super().__init__(graph_type, are_bnodes_skolemized, 'rgd')
        self.dataset = Dataset('rgd', 'RGD', 'http://rgd.mcw.edu/', None, None)

        self.global_terms = Source.open_and_parse_yaml(
            '../../translationtable/global_terms.yaml')