Пример #1
0
 def create_ref_sources(self):
     # create an entrez ref and ensembl ref (optional)
     self.entrez_ref = make_ref_source(self.record['entrezgene']['@source'], PROPS['Entrez Gene ID'],
                                       self.external_ids['Entrez Gene ID'], login=self.login)
     if 'Ensembl Gene ID' in self.external_ids:
         if len(self.external_ids['Ensembl Gene ID']) != 1:
             raise ValueError("more than one ensembl gene ID: {}".format(self.record['entrezgene']))
         ensembl_gene_id = list(self.external_ids['Ensembl Gene ID'])[0]
         self.ensembl_ref = make_ref_source(self.record['ensembl']['@source'], PROPS['Ensembl Gene ID'],
                                            ensembl_gene_id, login=self.login)
Пример #2
0
 def create_ref_sources(self):
     # create an entrez ref and ensembl ref (optional)
     self.entrez_ref = make_ref_source(self.record['entrezgene']['@source'], PROPS['Entrez Gene ID'],
                                       self.external_ids['Entrez Gene ID'], login=self.login)
     if 'Reference Ensembl Gene ID' in self.external_ids:
         self.ensembl_ref = make_ref_source(self.record['ensembl']['@source'], PROPS['Ensembl Gene ID'],
                                            self.external_ids['Reference Ensembl Gene ID'], login=self.login)
     elif 'Ensembl Gene ID' in self.external_ids:
         if len(self.external_ids['Ensembl Gene ID']) == 1:
             self.ensembl_ref = make_ref_source(self.record['ensembl']['@source'], PROPS['Ensembl Gene ID'],
                                            list(self.external_ids['Ensembl Gene ID'])[0], login=self.login)
Пример #3
0
    def create_statements(self):
        """
        create statements common to all proteins
        """
        s = []

        ############
        # ID statements
        # Required: uniprot (1)
        # Optional: OMIM (1?), Ensembl protein (0 or more), refseq protein (0 or more)
        ############
        entrez_gene = self.external_ids['Entrez Gene ID']
        uniprot_ref = make_ref_source(self.record['uniprot']['@source'], PROPS['UniProt ID'],
                                      self.external_ids['UniProt ID'],
                                      login=self.login)
        entrez_ref = make_ref_source(self.record['entrezgene']['@source'], PROPS['Entrez Gene ID'],
                                     self.external_ids['Entrez Gene ID'], login=self.login)

        s.append(wdi_core.WDString(self.external_ids['UniProt ID'], PROPS['UniProt ID'], references=[uniprot_ref]))

        for key in ['Saccharomyces Genome Database ID']:
            if key in self.external_ids:
                s.append(wdi_core.WDString(self.external_ids[key], PROPS[key], references=[entrez_ref]))

        key = 'Ensembl Protein ID'
        if key in self.external_ids:
            for id in self.external_ids[key]:
                ref = make_ref_source(self.record['ensembl']['@source'], PROPS[key], id, login=self.login)
                s.append(wdi_core.WDString(id, PROPS[key], references=[ref]))

        key = 'RefSeq Protein ID'
        if key in self.external_ids:
            for id in self.external_ids[key]:
                ref = make_ref_source(self.record['refseq']['@source'], PROPS['Entrez Gene ID'], entrez_gene,
                                      login=self.login)
                s.append(wdi_core.WDString(id, PROPS[key], references=[ref]))

        ############
        # Protein statements
        ############
        # instance of protein
        s.append(wdi_core.WDItemID("Q8054", PROPS['instance of'], references=[uniprot_ref]))

        # found in taxon
        s.append(wdi_core.WDItemID(self.organism_info['wdid'], PROPS['found in taxon'], references=[uniprot_ref]))

        # encoded by
        s.append(wdi_core.WDItemID(self.gene_wdid, PROPS['encoded by'], references=[uniprot_ref]))

        return s
Пример #4
0
    def create_gp_statements(self):
        """
        Create genomic_pos start stop orientation plus chromosome qualifiers
        :return:
        """
        genomic_pos_value = self.record['genomic_pos']['@value'][0]
        genomic_pos_source = self.record['genomic_pos']['@source']
        genomic_pos_id_prop = source_ref_id[genomic_pos_source['id']]
        assert isinstance(self.external_ids[genomic_pos_id_prop], str)
        external_id = self.external_ids[genomic_pos_id_prop]

        genomic_pos_ref = make_ref_source(genomic_pos_source, PROPS[genomic_pos_id_prop], external_id, login=self.login)

        s = []

        # create qualifier for chromosome (which has the refseq ID on it)
        chr_refseq = genomic_pos_value['chr']
        chr_qid = self.refseq_qid_chrom[chr_refseq]
        qualifiers = [wdi_core.WDItemID(value=chr_qid, prop_nr=PROPS['chromosome'], is_qualifier=True)]

        # strand orientation
        strand_orientation = 'Q22809680' if genomic_pos_value['strand'] == 1 else 'Q22809711'
        s.append(wdi_core.WDItemID(strand_orientation, PROPS['strand orientation'],
                                   references=[genomic_pos_ref], qualifiers=qualifiers))
        # genomic start and end
        s.append(wdi_core.WDString(str(int(genomic_pos_value['start'])), PROPS['genomic start'],
                                   references=[genomic_pos_ref], qualifiers=qualifiers))
        s.append(wdi_core.WDString(str(int(genomic_pos_value['end'])), PROPS['genomic end'],
                                   references=[genomic_pos_ref], qualifiers=qualifiers))

        return s
Пример #5
0
    def make_gene_encodes(self, write=True):
        """
        Add an "encodes" statement to the gene item
        :return:
        """
        uniprot_ref = make_ref_source(self.record['uniprot']['@source'], PROPS['UniProt ID'],
                                      self.external_ids['UniProt ID'],
                                      login=self.login)

        try:
            statements = [wdi_core.WDItemID(self.protein_wdid, PROPS['encodes'], references=[uniprot_ref])]
            wd_item_gene = wdi_core.WDItemEngine(wd_item_id=self.gene_wdid,data=statements,
                                                 append_value=[PROPS['encodes']], fast_run=fast_run,
                                                 fast_run_base_filter={PROPS['Entrez Gene ID']: '',
                                                                       PROPS['found in taxon']: self.organism_info[
                                                                           'wdid']},
                                                 global_ref_mode="CUSTOM", ref_handler=update_retrieved_if_new,
                                                 core_props=core_props)
            wdi_helpers.try_write(wd_item_gene, self.external_ids['UniProt ID'], PROPS['UniProt ID'], self.login,
                                  write=write)
        except Exception as e:
            exc_info = sys.exc_info()
            traceback.print_exception(*exc_info)
            msg = wdi_helpers.format_msg(self.external_ids['UniProt ID'], PROPS['UniProt ID'], None,
                                         str(e), msg_type=type(e))
            wdi_core.WDItemEngine.log("ERROR", msg)
Пример #6
0
    def make_gene_encodes(self, write=True):
        """
        Add an "encodes" statement to the gene item
        :return:
        """
        uniprot_ref = make_ref_source(self.record['uniprot']['@source'],
                                      PROPS['UniProt ID'],
                                      self.external_ids['UniProt ID'],
                                      login=self.login)

        try:
            statements = [
                wdi_core.WDItemID(self.protein_wdid,
                                  PROPS['encodes'],
                                  references=[uniprot_ref])
            ]
            wd_item_gene = wdi_core.WDItemEngine(
                wd_item_id=self.gene_wdid,
                domain='genes',
                data=statements,
                append_value=[PROPS['encodes']])
            wdi_helpers.try_write(wd_item_gene,
                                  self.external_ids['UniProt ID'],
                                  PROPS['UniProt ID'],
                                  self.login,
                                  write=write)
        except Exception as e:
            exc_info = sys.exc_info()
            traceback.print_exception(*exc_info)
            msg = wdi_helpers.format_msg(self.external_ids['UniProt ID'],
                                         PROPS['UniProt ID'],
                                         None,
                                         str(e),
                                         msg_type=type(e))
            wdi_core.WDItemEngine.log("ERROR", msg)
Пример #7
0
    def create_gp_statements_chr(self):
        """
        Create genomic_pos start stop orientation on a chromosome
        :return:
        """
        genomic_pos_value = self.record['genomic_pos']['@value']
        genomic_pos_source = self.record['genomic_pos']['@source']
        genomic_pos_id_prop = source_ref_id[genomic_pos_source['id']]
        genomic_pos_ref = make_ref_source(
            genomic_pos_source,
            PROPS[genomic_pos_id_prop],
            self.external_ids[genomic_pos_id_prop],
            login=self.login)

        # create qualifier for start/stop/orientation
        chrom_wdid = self.chr_num_wdid[genomic_pos_value['chr']]
        qualifiers = [
            wdi_core.WDItemID(chrom_wdid,
                              PROPS['chromosome'],
                              is_qualifier=True)
        ]

        s = []
        # strand orientation
        strand_orientation = 'Q22809680' if genomic_pos_value[
            'strand'] == 1 else 'Q22809711'
        s.append(
            wdi_core.WDItemID(strand_orientation,
                              PROPS['strand orientation'],
                              references=[genomic_pos_ref]))
        # genomic start and end
        s.append(
            wdi_core.WDString(str(int(genomic_pos_value['start'])),
                              PROPS['genomic start'],
                              references=[genomic_pos_ref],
                              qualifiers=qualifiers))
        s.append(
            wdi_core.WDString(str(int(genomic_pos_value['end'])),
                              PROPS['genomic end'],
                              references=[genomic_pos_ref],
                              qualifiers=qualifiers))
        # chromosome
        s.append(
            wdi_core.WDItemID(chrom_wdid,
                              PROPS['chromosome'],
                              references=[genomic_pos_ref]))

        return s
Пример #8
0
    def create_statements(self):
        # create gene statements
        s = Gene.create_statements(self)
        entrez_ref = make_ref_source(self.record['entrezgene']['@source'], PROPS['Entrez Gene ID'],
                                     self.external_ids['Entrez Gene ID'], login=self.login)

        # add on human specific gene statements
        for key in ['HGNC ID', 'HGNC Gene Symbol']:
            if key in self.external_ids:
                s.append(wdi_core.WDString(self.external_ids[key], PROPS[key], references=[entrez_ref]))

        # add on gene position statements
        if 'genomic_pos' in self.record:
            ss = self.do_gp_human()
            if ss:
                s.extend(ss)

        return s
Пример #9
0
    def create_gp_statements(self):
        """
        Create genomic_pos start stop orientation no chromosome
        :return:
        """
        genomic_pos_value = self.record['genomic_pos']['@value']
        genomic_pos_source = self.record['genomic_pos']['@source']
        genomic_pos_id_prop = source_ref_id[genomic_pos_source['id']]
        genomic_pos_ref = make_ref_source(
            genomic_pos_source,
            PROPS[genomic_pos_id_prop],
            self.external_ids[genomic_pos_id_prop],
            login=self.login)

        s = []

        # create qualifier for chromosome REFSEQ ID (not chrom item)
        chromosome = genomic_pos_value['chr']
        rs_chrom = wdi_core.WDString(value=chromosome,
                                     prop_nr='P2249',
                                     is_qualifier=True)

        # strand orientation
        strand_orientation = 'Q22809680' if genomic_pos_value[
            'strand'] == 1 else 'Q22809711'
        s.append(
            wdi_core.WDItemID(strand_orientation,
                              PROPS['strand orientation'],
                              references=[genomic_pos_ref],
                              qualifiers=[rs_chrom]))
        # genomic start and end
        s.append(
            wdi_core.WDString(str(int(genomic_pos_value['start'])),
                              PROPS['genomic start'],
                              references=[genomic_pos_ref],
                              qualifiers=[rs_chrom]))
        s.append(
            wdi_core.WDString(str(int(genomic_pos_value['end'])),
                              PROPS['genomic end'],
                              references=[genomic_pos_ref],
                              qualifiers=[rs_chrom]))

        return s
Пример #10
0
    def do_gp_human(self):
        """
        create genomic pos, chr, strand statements for human
        includes genomic assembly

        genes that are on an unlocalized scaffold will have no genomic position statements
        example: https://mygene.info/v3/gene/102724770
        https://www.wikidata.org/wiki/Q20970159
        :return:
        """
        genomic_pos_value = self.record['genomic_pos']['@value']
        if genomic_pos_value['chr'] not in self.chr_num_wdid:
            return []
        genomic_pos_source = self.record['genomic_pos']['@source']
        genomic_pos_id_prop = source_ref_id[genomic_pos_source['id']]
        genomic_pos_ref = make_ref_source(
            genomic_pos_source,
            PROPS[genomic_pos_id_prop],
            self.external_ids[genomic_pos_id_prop],
            login=self.login)
        assembly = wdi_core.WDItemID("Q20966585",
                                     PROPS['genomic assembly'],
                                     is_qualifier=True)

        # create qualifier for start/stop
        chrom_wdid = self.chr_num_wdid[genomic_pos_value['chr']]
        qualifiers = [
            wdi_core.WDItemID(chrom_wdid,
                              PROPS['chromosome'],
                              is_qualifier=True), assembly
        ]

        strand_orientation = 'Q22809680' if genomic_pos_value[
            'strand'] == 1 else 'Q22809711'

        if 'genomic_pos_hg19' in self.record:
            do_hg19 = True
            genomic_pos_value_hg19 = self.record['genomic_pos_hg19']['@value']
            genomic_pos_source_hg19 = self.record['genomic_pos_hg19'][
                '@source']
            genomic_pos_id_prop_hg19 = source_ref_id[
                genomic_pos_source_hg19['id']]
            genomic_pos_ref_hg19 = make_ref_source(
                genomic_pos_source_hg19,
                PROPS[genomic_pos_id_prop_hg19],
                self.external_ids[genomic_pos_id_prop_hg19],
                login=self.login)
            assembly_hg19 = wdi_core.WDItemID("Q21067546",
                                              PROPS['genomic assembly'],
                                              is_qualifier=True)
            chrom_wdid_hg19 = self.chr_num_wdid[genomic_pos_value_hg19['chr']]
            qualifiers_hg19 = [
                wdi_core.WDItemID(chrom_wdid_hg19,
                                  PROPS['chromosome'],
                                  is_qualifier=True), assembly_hg19
            ]
            strand_orientation_hg19 = 'Q22809680' if genomic_pos_value_hg19[
                'strand'] == 1 else 'Q22809711'
        else:
            do_hg19 = False
            strand_orientation_hg19 = None
            assembly_hg19 = None
            genomic_pos_ref_hg19 = None
            genomic_pos_value_hg19 = None
            qualifiers_hg19 = None
            chrom_wdid_hg19 = None

        s = []

        # strand orientation
        # if the same for both assemblies, only put one statement
        if do_hg19 and strand_orientation == strand_orientation_hg19:
            s.append(
                wdi_core.WDItemID(strand_orientation,
                                  PROPS['strand orientation'],
                                  references=[genomic_pos_ref],
                                  qualifiers=[assembly, assembly_hg19]))
        else:
            s.append(
                wdi_core.WDItemID(strand_orientation,
                                  PROPS['strand orientation'],
                                  references=[genomic_pos_ref],
                                  qualifiers=[assembly]))
            if do_hg19:
                s.append(
                    wdi_core.WDItemID(strand_orientation_hg19,
                                      PROPS['strand orientation'],
                                      references=[genomic_pos_ref_hg19],
                                      qualifiers=[assembly_hg19]))

        # genomic start and end for both assemblies
        s.append(
            wdi_core.WDString(str(int(genomic_pos_value['start'])),
                              PROPS['genomic start'],
                              references=[genomic_pos_ref],
                              qualifiers=qualifiers))
        s.append(
            wdi_core.WDString(str(int(genomic_pos_value['end'])),
                              PROPS['genomic end'],
                              references=[genomic_pos_ref],
                              qualifiers=qualifiers))
        if do_hg19:
            s.append(
                wdi_core.WDString(str(int(genomic_pos_value_hg19['start'])),
                                  PROPS['genomic start'],
                                  references=[genomic_pos_ref_hg19],
                                  qualifiers=qualifiers_hg19))
            s.append(
                wdi_core.WDString(str(int(genomic_pos_value_hg19['end'])),
                                  PROPS['genomic end'],
                                  references=[genomic_pos_ref_hg19],
                                  qualifiers=qualifiers_hg19))

        # chromosome
        # if the same for both assemblies, only put one statement
        if do_hg19 and chrom_wdid == chrom_wdid_hg19:
            s.append(
                wdi_core.WDItemID(chrom_wdid,
                                  PROPS['chromosome'],
                                  references=[genomic_pos_ref],
                                  qualifiers=[assembly, assembly_hg19]))
        else:
            s.append(
                wdi_core.WDItemID(chrom_wdid,
                                  PROPS['chromosome'],
                                  references=[genomic_pos_ref],
                                  qualifiers=[assembly]))
            if do_hg19:
                s.append(
                    wdi_core.WDItemID(chrom_wdid_hg19,
                                      PROPS['chromosome'],
                                      references=[genomic_pos_ref_hg19],
                                      qualifiers=[assembly_hg19]))

        return s
Пример #11
0
    def create_statements(self):
        """
        create statements common to all genes
        """
        s = []

        ############
        # ID statements (required)
        ############

        entrez_ref = make_ref_source(self.record['entrezgene']['@source'],
                                     PROPS['Entrez Gene ID'],
                                     self.external_ids['Entrez Gene ID'],
                                     login=self.login)

        s.append(
            wdi_core.WDString(self.external_ids['Entrez Gene ID'],
                              PROPS['Entrez Gene ID'],
                              references=[entrez_ref]))

        # optional ID statements
        ensembl_ref = None
        if 'Ensembl Gene ID' in self.external_ids:
            ensembl_ref = make_ref_source(self.record['ensembl']['@source'],
                                          PROPS['Ensembl Gene ID'],
                                          self.external_ids['Ensembl Gene ID'],
                                          login=self.login)
            s.append(
                wdi_core.WDString(self.external_ids['Ensembl Gene ID'],
                                  PROPS['Ensembl Gene ID'],
                                  references=[ensembl_ref]))
            # no ensembl transcript ID unless ensembl gene is there also
            if 'Ensembl Transcript ID' in self.external_ids:
                for id in self.external_ids['Ensembl Transcript ID']:
                    s.append(
                        wdi_core.WDString(id,
                                          PROPS['Ensembl Transcript ID'],
                                          references=[ensembl_ref]))

        key = 'RefSeq RNA ID'
        if key in self.external_ids:
            for id in self.external_ids[key]:
                s.append(
                    wdi_core.WDString(id, PROPS[key], references=[entrez_ref]))

        for key in [
                'NCBI Locus tag', 'Saccharomyces Genome Database ID',
                'Mouse Genome Informatics ID', 'MGI Gene Symbol',
                'HomoloGene ID', 'Rat Genome Database ID', 'FlyBase Gene ID',
                'Wormbase Gene ID', 'ZFIN Gene ID'
        ]:
            if key in self.external_ids:
                s.append(
                    wdi_core.WDString(self.external_ids[key],
                                      PROPS[key],
                                      references=[entrez_ref]))

        ############
        # Gene statements
        ############
        # if there is an ensembl ID, this comes from ensembl, otherwise, entrez
        gene_ref = ensembl_ref if ensembl_ref is not None else entrez_ref

        # instance of gene
        s.append(
            wdi_core.WDItemID('Q7187',
                              PROPS['instance of'],
                              references=[gene_ref]))  # instance of 'gene'

        # found in taxon
        s.append(
            wdi_core.WDItemID(self.organism_info['wdid'],
                              PROPS['found in taxon'],
                              references=[gene_ref]))

        return s