Пример #1
0
    def create_reference(self):
        """
        Create wikidata references for interpro

        Items:
        Q3047275: InterPro

        Properties:
        stated in (P248)
        imported from (P143)
        software version (P348)
        publication date (P577)

        """
        # This same reference will be used for everything. Except for a ref to the interpro item itself
        ref_stated_in = PBB_Core.WDItemID("Q3047275",
                                          'P248',
                                          is_reference=True)
        ref_imported = PBB_Core.WDItemID("Q3047275", 'P143', is_reference=True)
        ref_version = PBB_Core.WDString(self.version,
                                        'P348',
                                        is_reference=True)
        ref_date = PBB_Core.WDTime(self.date.strftime("+%Y-%m-%dT00:00:00Z"),
                                   'P577',
                                   is_reference=True)
        ref_ipr = PBB_Core.WDString(self.id, "P2926", is_reference=True)
        self.reference = [
            ref_stated_in, ref_imported, ref_version, ref_date, ref_ipr
        ]
        for ref in self.reference:
            ref.overwrite_references = True
Пример #2
0
 def create_reference(self):
     ref_stated_in = PBB_Core.WDItemID(self.MONDO_WDID, 'P248', is_reference=True)
     ref_retrieved = PBB_Core.WDTime(self.retrieved.strftime('+%Y-%m-%dT00:00:00Z'), 'P813',
                                     is_reference=True)  # interpro ID
     #ref_archive_url = PBB_Core.WDUrl(self.ref_url, 'P1065', is_reference=True)
     #reference = [ref_stated_in, ref_retrieved, ref_archive_url]
     reference = [ref_stated_in, ref_retrieved]
     self.reference = reference
Пример #3
0
def make_ref_source(source_doc, id_prop, identifier, login=None):
    """
    Reference is made up of:
    stated_in: if the source has a release #:
        release edition
        else, stated in the source
    link to id: link to identifier in source
    retrieved: only if source has no release #
    login: must be passed if you want to be able to create new release items

    :param source_doc:
    :param id_prop:
    :param identifier:
    :return:
    """
    # source_doc = {'_id': 'uniprot', 'timestamp': '20161006'}
    # source_doc = {'_id': 'ensembl', 'release': 86, 'timestamp': '20161005'}
    source = source_doc['_id']
    if source not in source_items:
        raise ValueError(
            "Unknown source for reference creation: {}".format(source))
    if id_prop not in prop_ids:
        raise ValueError(
            "Unknown id_prop for reference creation: {}".format(id_prop))

    link_to_id = PBB_Core.WDString(value=str(identifier),
                                   prop_nr=prop_ids[id_prop],
                                   is_reference=True)

    if "release" in source_doc:
        source_doc['release'] = str(source_doc['release'])
        title = "{} Release {}".format(source_doc['_id'],
                                       source_doc['release'])
        description = "Release {} of {}".format(source_doc['release'],
                                                source_doc['_id'])
        edition_of_wdid = source_items[source_doc['_id']]
        release = PBB_Helpers.Release(
            title,
            description,
            source_doc['release'],
            edition_of_wdid=edition_of_wdid).get_or_create(login)

        stated_in = PBB_Core.WDItemID(value=release,
                                      prop_nr='P248',
                                      is_reference=True)
        reference = [stated_in, link_to_id]
    else:
        date_string = source_doc['timestamp']
        retrieved = datetime.strptime(date_string, "%Y%m%d")
        stated_in = PBB_Core.WDItemID(value=source_items[source],
                                      prop_nr='P248',
                                      is_reference=True)
        retrieved = PBB_Core.WDTime(retrieved.strftime('+%Y-%m-%dT00:00:00Z'),
                                    prop_nr='P813',
                                    is_reference=True)
        reference = [stated_in, retrieved, link_to_id]
    return reference
Пример #4
0
def make_ref(retrieved, genome_id):
    refs = [
        PBB_Core.WDItemID(value='Q20641742', prop_nr='P248',
                          is_reference=True),  # stated in ncbi gene
        PBB_Core.WDString(value=genome_id, prop_nr='P2249',
                          is_reference=True),  # Link to Refseq Genome ID
        PBB_Core.WDTime(retrieved.strftime('+%Y-%m-%dT00:00:00Z'),
                        prop_nr='P813',
                        is_reference=True)
    ]
    return refs
Пример #5
0
def make_reference(source, id_prop, identifier, retrieved):
    reference = [
        PBB_Core.WDItemID(value=source_items[source],
                          prop_nr='P248',
                          is_reference=True),  # stated in
        PBB_Core.WDString(value=str(identifier),
                          prop_nr=prop_ids[id_prop],
                          is_reference=True),  # Link to ID
        PBB_Core.WDTime(retrieved.strftime('+%Y-%m-%dT00:00:00Z'),
                        prop_nr='P813',
                        is_reference=True)
    ]
    return reference
Пример #6
0
def create_protein_ipr(uniprot_id, uniprot_wdid, families, has_part,
                       release_info, login):
    """
    Create interpro relationships to one protein
    :param uniprot_id: uniprot ID of the protein to modify
    :type uniprot_id: str
    :param uniprot_wdid: wikidata ID of the protein
    :param families: list of ipr wd ids the protein is a (P279) subclass of
    :param has_part: list of ipr wd ids the protein has (P527) has part
    :return:
    """
    date = release_info['date']
    version = release_info['version']

    # create ref
    ref_stated_in = PBB_Core.WDItemID("Q3047275", 'P248', is_reference=True)
    ref_imported = PBB_Core.WDItemID("Q3047275", 'P143', is_reference=True)
    ref_version = PBB_Core.WDString(version, 'P348', is_reference=True)
    ref_date = PBB_Core.WDTime(date.strftime("+%Y-%m-%dT00:00:00Z"),
                               'P577',
                               is_reference=True)
    ref_ipr = PBB_Core.WDString(
        "http://www.ebi.ac.uk/interpro/protein/{}".format(uniprot_id),
        "P854",
        is_reference=True)
    reference = [ref_stated_in, ref_imported, ref_version, ref_date, ref_ipr]
    for ref in reference:
        ref.overwrite_references = True

    statements = []
    if families:
        for f in families:
            statements.append(
                PBB_Core.WDItemID(value=f,
                                  prop_nr='P279',
                                  references=[reference]))
    if has_part:
        for hp in has_part:
            statements.append(
                PBB_Core.WDItemID(value=hp,
                                  prop_nr='P527',
                                  references=[reference]))

    item = PBB_Core.WDItemEngine(wd_item_id=uniprot_wdid,
                                 data=statements,
                                 server=SERVER,
                                 append_value=["P279", "P527", "P361"])
    # print(item.get_wd_json_representation())
    try:
        item.write(login)
    except WDApiError as e:
        print(e)
        PBB_Core.WDItemEngine.log(
            'ERROR',
            '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
            .format(main_data_id=uniprot_id,
                    exception_type=type(e),
                    message=e.__str__(),
                    wd_id=uniprot_wdid,
                    duration=datetime.now()))
        return

    PBB_Core.WDItemEngine.log(
        'INFO',
        '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'.
        format(main_data_id=uniprot_id,
               exception_type='',
               message='created protein interpro relationships: {}'.format([
                   (x.prop_nr, x.value) for x in statements
               ]),
               wd_id=uniprot_wdid,
               duration=datetime.now()))
Пример #7
0
    def __init__(self, object):
        """

        :type self: object
        """
        self.start = object["start"]
        self.entrezgene = object["entrezgene"]
        self.uniprotwikidataids = object["uniprotwikidataids"]
        gene_annotations = self.annotate_gene()
        self.genomeInfo = object["speciesInfo"][str(gene_annotations['taxid'])]
        self.content = object
        self.name = gene_annotations["name"]
        self.logincreds = object["logincreds"]
        if "_timestamp" in gene_annotations.keys():
            self.annotationstimestamp = gene_annotations["_timestamp"]
        self.wdid = object["wdid"]

        # symbol:
        self.symbol = gene_annotations["symbol"]
        print(self.symbol)
        # HGNC
        if "HGNC" in gene_annotations:
            if isinstance(gene_annotations["HGNC"], list):
                self.hgnc = gene_annotations["HGNC"]
            else:
                self.hgnc = [gene_annotations["HGNC"]]
        else:
            self.hgnc = None

        # Ensembl Gene & transcript
        if "ensembl" in gene_annotations:
            if "gene" in gene_annotations["ensembl"]:
                if isinstance(gene_annotations["ensembl"]["gene"], list):
                    self.ensembl_gene = gene_annotations["ensembl"]["gene"]
                else:
                    self.ensembl_gene = [gene_annotations["ensembl"]["gene"]]
            else:
                self.ensembl_gene = None

            if "transcript" in gene_annotations["ensembl"]:
                if isinstance(gene_annotations["ensembl"]["transcript"], list):
                    self.ensembl_transcript = gene_annotations["ensembl"]["transcript"]
                else:
                    self.ensembl_transcript = [gene_annotations["ensembl"]["transcript"]]
            else:
                self.ensembl_transcript = None
        # Homologene
        if "homologene" in gene_annotations:
            if isinstance(gene_annotations["homologene"]["id"], list):
                self.homologene = [str(i) for i in gene_annotations["homologene"]["id"]]
            else:
                self.homologene = [str(gene_annotations["homologene"]["id"])]
        else:
            self.homologene = None
        # Refseq 
        if "refseq" in gene_annotations:
            if "rna" in gene_annotations["refseq"]:
                if isinstance(gene_annotations["refseq"]["rna"], list):
                    self.refseq_rna = gene_annotations["refseq"]["rna"]
                else:
                    self.refseq_rna = [gene_annotations["refseq"]["rna"]]
            else:
                self.refseq_rna = None
        else:
            self.refseq_rna = None

            # MGI
        if "MGI" in gene_annotations:
            if isinstance(gene_annotations["MGI"], list):
                self.MGI = gene_annotations["MGI"]
            else:
                self.MGI = [gene_annotations["MGI"]]
        else:
            self.MGI = None

        self.chromosome = None
        self.startpost = None
        self.endpos = None
        if "genomic_pos" in gene_annotations:
            if isinstance(gene_annotations["genomic_pos"], list):
                self.chromosome = []
                self.startpos = []
                self.endpos = []
                for i in range(len(gene_annotations["genomic_pos"])):
                    if gene_annotations["genomic_pos"][i]["chr"] in ProteinBoxBotKnowledge.chromosomes[
                        self.genomeInfo["name"]].keys():
                        self.chromosome.append(ProteinBoxBotKnowledge.chromosomes[self.genomeInfo["name"]][
                                                   gene_annotations["genomic_pos"][i]["chr"]])
                        self.startpos.append(gene_annotations["genomic_pos"][i]["start"])
                        self.endpos.append(gene_annotations["genomic_pos"][i]["end"])
            else:
                self.chromosome = []
                self.startpos = []
                self.endpos = []
                if gene_annotations["genomic_pos"]["chr"] in ProteinBoxBotKnowledge.chromosomes[
                    self.genomeInfo["name"]].keys():
                    self.chromosome.append(ProteinBoxBotKnowledge.chromosomes[self.genomeInfo["name"]][
                                               gene_annotations["genomic_pos"]["chr"]])
                    self.startpos.append(gene_annotations["genomic_pos"]["start"])
                    self.endpos.append(gene_annotations["genomic_pos"]["end"])

        self.encodes = None
        if "uniprot" in gene_annotations.keys():
            if "Swiss-Prot" in gene_annotations["uniprot"].keys():
                if isinstance(gene_annotations["uniprot"]["Swiss-Prot"], list):
                    self.encodes = []
                    for uniprot in gene_annotations["uniprot"]["Swiss-Prot"]:
                        self.encodes.append(uniprot)
                else:
                    self.encodes = [gene_annotations["uniprot"]["Swiss-Prot"]]


        self.chromosomeHg19 = None
        self.startposHg19 = None
        self.endposHg19 = None
        if "genomic_pos_hg19" in gene_annotations:
            if isinstance(gene_annotations["genomic_pos_hg19"], list):
                self.chromosomeHg19 = []
                self.startposHg19 = []
                self.endposHg19 = []
                for i in range(len(gene_annotations["genomic_pos_hg19"])):
                    if gene_annotations["genomic_pos_hg19"][i]["chr"] in ProteinBoxBotKnowledge.chromosomes[
                        self.genomeInfo["name"]].keys():
                        self.chromosomeHg19.append(ProteinBoxBotKnowledge.chromosomes[self.genomeInfo["name"]][
                                                       gene_annotations["genomic_pos_hg19"][i]["chr"]])
                        self.startposHg19.append(gene_annotations["genomic_pos_hg19"][i]["start"])
                        self.endposHg19.append(gene_annotations["genomic_pos_hg19"][i]["end"])
            else:
                self.chromosomeHg19 = []
                self.startposHg19 = []
                self.endposHg19 = []
                if gene_annotations["genomic_pos_hg19"]["chr"] in ProteinBoxBotKnowledge.chromosomes[
                    self.genomeInfo["name"]].keys():
                    self.chromosomeHg19.append(ProteinBoxBotKnowledge.chromosomes[self.genomeInfo["name"]][
                                                   gene_annotations["genomic_pos_hg19"]["chr"]])
                    self.startposHg19.append(gene_annotations["genomic_pos_hg19"]["start"])
                    self.endposHg19.append(gene_annotations["genomic_pos_hg19"]["end"])

        # type of Gene
        if "type_of_gene" in gene_annotations:
            self.type_of_gene = []
            if gene_annotations["type_of_gene"] == "ncRNA":
                self.type_of_gene.append("Q427087")
            if gene_annotations["type_of_gene"] == "snRNA":
                self.type_of_gene.append("Q284578")
            if gene_annotations["type_of_gene"] == "snoRNA":
                self.type_of_gene.append("Q284416")
            if gene_annotations["type_of_gene"] == "rRNA":
                self.type_of_gene.append("Q215980")
            if gene_annotations["type_of_gene"] == "tRNA":
                self.type_of_gene.append("Q201448")
            if gene_annotations["type_of_gene"] == "pseudo":
                self.type_of_gene.append("Q277338")
            if gene_annotations["type_of_gene"] == "protein-coding":
                self.type_of_gene.append("Q20747295")
        else:
            self.type_of_gene = None
        # Reference section  
        # Prepare references
        refStatedIn = PBB_Core.WDItemID(value=self.genomeInfo["release"], prop_nr='P248', is_reference=True)
        refStatedIn.overwrite_references = True
        refImported = PBB_Core.WDItemID(value='Q20641742', prop_nr='P143', is_reference=True)
        refImported.overwrite_references = True
        timeStringNow = strftime("+%Y-%m-%dT00:00:00Z", gmtime())
        refRetrieved = PBB_Core.WDTime(timeStringNow, prop_nr='P813', is_reference=True)
        refRetrieved.overwrite_references = True
        gene_reference = [refStatedIn, refImported, refRetrieved]

        refStatedInEnsembl = PBB_Core.WDItemID(value= 'Q21996330', prop_nr='P248', is_reference=True)
        refStatedInEnsembl.overwrite_references = True
        refImportedEnsembl = PBB_Core.WDItemID(value='Q1344256', prop_nr='P143', is_reference=True)
        refImportedEnsembl.overwrite_references = True

        ensembl_reference = [refStatedInEnsembl, refImportedEnsembl, refRetrieved]

        genomeBuildQualifier = PBB_Core.WDItemID(value=self.genomeInfo["genome_assembly"], prop_nr='P659',
                                                 is_qualifier=True)
        genomeBuildPreviousQualifier = PBB_Core.WDItemID(value=self.genomeInfo["genome_assembly_previous"],
                                                         prop_nr='P659', is_qualifier=True)

        prep = dict()
        prep['P703'] = [PBB_Core.WDItemID(value=self.genomeInfo['wdid'], prop_nr='P703',
                                          references=[copy.deepcopy(gene_reference)])]
        if self.genomeInfo["name"] == "human":
            prep['P353'] = [
                PBB_Core.WDString(value=self.symbol, prop_nr='P353', references=[copy.deepcopy(gene_reference)])]
        prep['P351'] = [
            PBB_Core.WDString(value=str(self.entrezgene), prop_nr='P351', references=[copy.deepcopy(gene_reference)])]

        prep['P279'] = [PBB_Core.WDItemID(value='Q7187', prop_nr='P279', references=[copy.deepcopy(gene_reference)])]
        if "type_of_gene" in vars(self):
            if self.type_of_gene != None:
                for i in range(len(self.type_of_gene)):
                    prep['P279'].append(PBB_Core.WDItemID(value=self.type_of_gene[i], prop_nr='P279',
                                                          references=[copy.deepcopy(gene_reference)]))

        if "ensembl_gene" in vars(self):
            if self.ensembl_gene != None:
                prep['P594'] = []
                for ensemblg in self.ensembl_gene:
                    prep['P594'].append(
                        PBB_Core.WDString(value=ensemblg, prop_nr='P594', references=[copy.deepcopy(gene_reference)]))

        if "ensembl_transcript" in vars(self):
            if self.ensembl_transcript != None:
                prep['P704'] = []
                for ensemblt in self.ensembl_transcript:
                    prep['P704'].append(
                        PBB_Core.WDString(value=ensemblt, prop_nr='P704', references=[copy.deepcopy(gene_reference)]))

        if "encodes" in vars(self):
            if self.encodes != None:
                prep['P688'] = []
                for uniprot in self.encodes:
                    if uniprot in self.uniprotwikidataids.keys():
                        prep['P688'].append(PBB_Core.WDItemID(value=self.uniprotwikidataids[uniprot], prop_nr='P688', references=[copy.deepcopy(gene_reference)]))

        if "hgnc" in vars(self):
            if self.hgnc != None:
                prep['P354'] = []
                for hugo in self.hgnc:
                    prep['P354'].append(
                        PBB_Core.WDString(value=hugo, prop_nr='P354', references=[copy.deepcopy(gene_reference)]))

        if "homologene" in vars(self):
            if self.homologene != None:
                prep['P593'] = []
                for ortholog in self.homologene:
                    prep['P593'].append(
                        PBB_Core.WDString(value=ortholog, prop_nr='P593', references=[copy.deepcopy(gene_reference)]))

        if "refseq_rna" in vars(self):
            if self.refseq_rna != None:
                prep['P639'] = []
                for refseq in self.refseq_rna:
                    prep['P639'].append(
                        PBB_Core.WDString(value=refseq, prop_nr='P639', references=[copy.deepcopy(gene_reference)]))

        if "chromosome" in vars(self):
            prep['P1057'] = []
            if self.chromosome != None:
                for chrom in list(set(self.chromosome)):
                    prep['P1057'].append(
                        PBB_Core.WDItemID(value=chrom, prop_nr='P1057', references=[copy.deepcopy(gene_reference)]))

        if "startpos" in vars(self):
            if not 'P644' in prep.keys():
                prep['P644'] = []
            if self.startpos != None:
                for pos in self.startpos:
                    prep['P644'].append(
                        PBB_Core.WDString(value=str(pos), prop_nr='P644', references=[copy.deepcopy(ensembl_reference)],
                                          qualifiers=[copy.deepcopy(genomeBuildQualifier)]))
        if "endpos" in vars(self):
            if not 'P645' in prep.keys():
                prep['P645'] = []
            if self.endpos != None:
                for pos in self.endpos:
                    prep['P645'].append(
                        PBB_Core.WDString(value=str(pos), prop_nr='P645', references=[copy.deepcopy(ensembl_reference)],
                                          qualifiers=[copy.deepcopy(genomeBuildQualifier)]))

        if "startposHg19" in vars(self):
            if not 'P644' in prep.keys():
                prep['P644'] = []
            if self.startposHg19 != None:
                for pos in self.startposHg19:
                    prep['P644'].append(
                        PBB_Core.WDString(value=str(pos), prop_nr='P644', references=[copy.deepcopy(ensembl_reference)],
                                          qualifiers=[copy.deepcopy(genomeBuildPreviousQualifier)]))
        if "endposHg19" in vars(self):
            if not 'P644' in prep.keys():
                prep['P645'] = []
            if self.endposHg19 != None:
                for pos in self.endposHg19:
                    prep['P645'].append(
                        PBB_Core.WDString(value=str(pos), prop_nr='P645', references=[copy.deepcopy(ensembl_reference)],
                                          qualifiers=[copy.deepcopy(genomeBuildPreviousQualifier)]))

        if "MGI" in vars(self):
            prep['P671'] = []
            if self.MGI != None:
                for mgi in self.MGI:
                    prep['P671'].append(PBB_Core.WDString(value=mgi, prop_nr='P671',
                                        references=[copy.deepcopy(gene_reference)]))

        if "alias" in gene_annotations.keys():
            if isinstance(gene_annotations["alias"], list):
                self.synonyms = []
                for alias in gene_annotations["alias"]:
                    self.synonyms.append(alias)
            else:
                self.synonyms = [gene_annotations["alias"]]
            self.synonyms.append(self.symbol)
            print(self.synonyms)
        else:
            self.synonyms = None

        data2add = []
        for key in prep.keys():
            for statement in prep[key]:
                data2add.append(statement)
                print(statement.prop_nr, statement.value)

        if self.wdid != None:
          # if self.encodes != None:
            wdPage = PBB_Core.WDItemEngine(self.wdid, item_name=self.name, data=data2add, server="www.wikidata.org",
                                           domain="genes")
            if wdPage.get_description() == "":
                wdPage.set_description(description=self.genomeInfo['name'] + ' gene', lang='en')
            if wdPage.get_description(lang='fr') == "" or wdPage.get_description(lang='fr') == "gène":
                wdPage.set_description(description="Un gène " + self.genomeInfo['fr-name'], lang='fr')
            if wdPage.get_description(lang='nl') == "" or wdPage.get_description(lang='nl') == "gen":
                wdPage.set_description(description="Een "+ self.genomeInfo['nl-name']+ " gen", lang='nl')
            if self.synonyms != None:
                wdPage.set_aliases(aliases=self.synonyms, lang='en', append=True)
            print(self.wdid)
            self.wd_json_representation = wdPage.get_wd_json_representation()
            PBB_Debug.prettyPrint(self.wd_json_representation)
            PBB_Debug.prettyPrint(data2add)
            # print(self.wd_json_representation)
            wdPage.write(self.logincreds)
            print("aa")
        else:
          #if self.encodes != None:
            wdPage = PBB_Core.WDItemEngine(item_name=self.name, data=data2add, server="www.wikidata.org",
                                           domain="genes")
            if wdPage.get_description() != "":
                wdPage.set_description(description=self.genomeInfo['name'] + ' gene', lang='en')
            if wdPage.get_description(lang='fr') == "" or wdPage.get_description(lang='fr') == "gène":
                wdPage.setdescription(description="Un gène " + self.genomeInfo['fr-name'], lang='fr')
            if wdPage.get_description(lang='nl') == "" or wdPage.get_description(lang='nl') == "gen":
                wdPage.setdescription(description="Een "+ self.genomeInfo['nl-name']+ " gen", lang='nl')
            if self.synonyms != None:
                wdPage.set_aliases(aliases=self.synonyms, lang='en', append=True)
            self.wd_json_representation = wdPage.get_wd_json_representation()
            PBB_Debug.prettyPrint(self.wd_json_representation)
            PBB_Debug.prettyPrint(data2add)
            # print(self.wd_json_representation)
            self.wdid = wdPage.write(self.logincreds)

        PBB_Core.WDItemEngine.log('INFO', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'.format(
                        main_data_id=str(self.entrezgene),
                        exception_type='',
                        message=f.name,
                        wd_id=self.wdid,
                        duration=time.time()-self.start
                    ))