Пример #1
0
def reference_store(source='', identifier=''):
    """
    :param source: database source to be referenced (key name from source_qids)
    :param ref_type: type of WD reference statement (imported from, stated in) (key names from prop_ids)
    :return: PBB_Core reference object for database source
    """
    source_items = {'uniprot': 'Q905695',
                    'ncbi_gene': 'Q20641742',
                    'ncbi_taxonomy': 'Q13711410',
                    'swiss_prot': 'Q2629752',
                    'trembl': 'Q22935315'}

    prop_ids = {'uniprot': 'P352',
                'ncbi_gene': 'P351',
                'ncbi_taxonomy': 'P685',
                'ncbi_locus_tag': 'P2393'
                }
    refs = [PBB_Core.WDItemID(value=source_items[source], prop_nr='P248', is_reference=True),
            PBB_Core.WDItemID(value='Q1860', prop_nr='P407', is_reference=True),
            PBB_Core.WDString(value=identifier, prop_nr=prop_ids[source], is_reference=True),
            PBB_Core.WDTime(str(strftime("+%Y-%m-%dT00:00:00Z", gmtime())), prop_nr='P813', is_reference=True)
            ]
    for ref in refs:
        ref.overwrite_references = True
    return refs
Пример #2
0
    def make_reference(self,
                       stated_in,
                       source_element,
                       source_element_name,
                       source_element_prop,
                       date=time.strftime('+%Y-%m-%dT00:00:00Z'),
                       date_property='P813'):
        ref = [[
            PBB_Core.WDItemID(value=stated_in,
                              prop_nr='P248',
                              is_reference=True),  # stated in
            PBB_Core.WDString(value=source_element,
                              prop_nr=source_element_prop,
                              is_reference=True),  # source element
            PBB_Core.WDItemID(value='Q1860', prop_nr='P407',
                              is_reference=True),  # language of work
            PBB_Core.WDMonolingualText(value=source_element_name,
                                       language='en',
                                       prop_nr='P1476',
                                       is_reference=True),
            PBB_Core.WDTime(time=date,
                            prop_nr=date_property,
                            is_reference=True)  # publication date
        ]]

        # this will overwrite all existing references of a WD claim value.
        for x in ref[0]:
            x.overwrite_references = True

        return ref
Пример #3
0
    def __init__(self, object):
            self.logincreds = object["logincreds"]
            self.name = object["uberonLabel"]
            self.uberon = object["uberon"]
            self.uberon_id = self.uberon.replace("http://purl.obolibrary.org/obo/UBERON_", "")
            self.wikidata_id = object["wikidata_id"]
            self.start = object["start"]
            self.graph = object["graph"]

            subcls = URIRef("http://www.w3.org/2000/01/rdf-schema#subClassOf")
            id = URIRef("http://www.geneontology.org/formats/oboInOwl#id")
            hasExactSyn = URIRef("http://www.geneontology.org/formats/oboInOwl#hasExactSynonym")
            print(self.uberon_id)
            print(self.name)

            refStatedIn = PBB_Core.WDItemID(21552738, prop_nr='P248', is_reference=True)
            refStatedIn.overwrite_references = True
            refImported = PBB_Core.WDItemID(value=7876491, prop_nr='P143', is_reference=True)
            refImported.overwrite_references = True
            timeStringNow = strftime("+%Y-%m-%dT00:00:00Z", gmtime())
            refRetrieved = PBB_Core.WDTime(timeStringNow, prop_nr='P813', is_reference=True)
            refRetrieved.overwrite_references = True
            ub_reference = [refStatedIn, refImported, refRetrieved]


            if self.uberon_id in self.wikidata_id.keys():
                self.wdid = self.wikidata_id[self.uberon_id.replace("UBERON:", "")]
            else:
                self.wdid = None

            self.synonyms = []
            for synonym in self.graph.objects(URIRef(self.uberon), hasExactSyn):
                self.synonyms.append(str(synonym))

            prep = dict()
            prep["P279"] = [PBB_Core.WDItemID(value='Q4936952', prop_nr='P279', references=[copy.deepcopy(ub_reference)])]
            prep["P1554"] = [PBB_Core.WDString(value=self.uberon_id, prop_nr='P1554', references=[copy.deepcopy(ub_reference)])]
            print(self.uberon)
            prep["P1709"] = [PBB_Core.WDUrl(value=self.uberon, prop_nr='P1709', references=[copy.deepcopy(ub_reference)])]

            data2add = []
            for key in prep.keys():
                for statement in prep[key]:
                    data2add.append(statement)
                    print(statement.prop_nr, statement.value)

            if self.wdid is not None:
                wdPage = PBB_Core.WDItemEngine(self.wdid, item_name=self.name, data=data2add, server="www.wikidata.org", domain="anatomical_structure",append_value=['P279'])
            else:
                wdPage = PBB_Core.WDItemEngine(item_name=self.name, data=data2add, server="www.wikidata.org", domain="anatomical_structure", append_value=['P279'])
            if len(self.synonyms) >0:
                wdPage.set_aliases(aliases=self.synonyms, lang='en', append=True)
            print(self.synonyms)
            for syn in self.synonyms:
                print(syn)
            wdPage.write(self.logincreds)
            print("======")
            sys.exit()
Пример #4
0
 def create_reference(self):
     first_ref = PBB_Core.WDItemID(value='Q905695', prop_nr='P248', is_reference=True)
     first_ref.overwrite_references = True
     return [
         first_ref,
         PBB_Core.WDString(value=self.uniprot, prop_nr='P352', is_reference=True),
         PBB_Core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z', time.gmtime()), prop_nr='P813',
                         is_reference=True),
         PBB_Core.WDItemID(value='Q1860', prop_nr='P407', is_reference=True),  # language of work
     ]
Пример #5
0
    def __init__(self, object):
        self.logincreds = object["logincreds"]
        self.source = object["source"]
        self.ortholog = object["ortholog"]
        self.species = object["speciesWdID"]

        # Prepare references
        refStatedInHomologeneBuild = PBB_Core.WDItemID(value='Q20976936',
                                                       prop_nr='P248',
                                                       is_reference=True)
        refImportedFromHomologen = PBB_Core.WDItemID(value='Q468215',
                                                     prop_nr='P143',
                                                     is_reference=True)

        timeStringNow = strftime("+%Y-%m-%dT00:00:00Z", gmtime())
        refRetrieved = PBB_Core.WDTime(timeStringNow,
                                       prop_nr='P813',
                                       is_reference=True)

        homologene_reference = [[
            refStatedInHomologeneBuild, refImportedFromHomologen, refRetrieved
        ]]

        # Prepare qualifiers
        humanQualifier = PBB_Core.WDItemID(value='Q5',
                                           prop_nr='P703',
                                           is_qualifier=True)
        mouseQualifier = PBB_Core.WDItemID(value='Q83310',
                                           prop_nr='P703',
                                           is_qualifier=True)

        # Prepare the items to add
        if self.species == "Q5":
            orthologValue = PBB_Core.WDItemID(value=self.ortholog,
                                              prop_nr='P684',
                                              references=homologene_reference,
                                              qualifiers=[humanQualifier])
        elif self.species == "Q83310":
            orthologValue = PBB_Core.WDItemID(value=self.ortholog,
                                              prop_nr='P684',
                                              references=homologene_reference,
                                              qualifiers=[mouseQualifier])

        wdPage = PBB_Core.WDItemEngine(wd_item_id=self.source,
                                       data=[orthologValue],
                                       server="www.wikidata.org",
                                       domain="genes")
        print(wdPage.wd_json_representation)
        wdPage.write(self.logincreds)
Пример #6
0
 def create_reference(self):
     return [
         PBB_Core.WDItemID(value=self.ontology_ref_item,
                           prop_nr='P248',
                           is_reference=True),
         PBB_Core.WDItemID(value='Q22230760',
                           prop_nr='P143',
                           is_reference=True),
         PBB_Core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z',
                                            time.gmtime()),
                         prop_nr='P813',
                         is_reference=True),
         PBB_Core.WDItemID(value='Q1860', prop_nr='P407',
                           is_reference=True),  # language of work
     ]
Пример #7
0
def generate_refs(iuphar_ligand):
    ref_list = [[]]

    ref_list[0].extend([
        PBB_Core.WDItemID(value='Q2793172', prop_nr='P248',
                          is_reference=True),  # stated in
        PBB_Core.WDString(value=iuphar_ligand,
                          prop_nr='P595',
                          is_reference=True),  # source element
    ])

    ref_list[0].extend([
        PBB_Core.WDItemID(value='Q1860', prop_nr='P407',
                          is_reference=True),  # language of work
        # PBB_Core.WDMonolingualText(value=source_element_name, language='en',
        #                            prop_nr='P1476', is_reference=True),
        PBB_Core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z'),
                        prop_nr='P813',
                        is_reference=True)  # publication date
    ])

    return ref_list
Пример #8
0
def generate_refs(ref_source_id):
    ref_list = [[]]

    if ref_source_id.startswith('C'):
        ref_list[0].extend([
            PBB_Core.WDItemID(value='Q6120337', prop_nr='P248', is_reference=True),  # stated in
            PBB_Core.WDString(value=ref_source_id, prop_nr='P592', is_reference=True),  # source element
        ])
    elif ref_source_id.startswith('N'):
        ref_list[0].extend([
            PBB_Core.WDItemID(value='Q21008030', prop_nr='P248', is_reference=True),  # stated in
            PBB_Core.WDString(value=ref_source_id, prop_nr='P2115', is_reference=True),  # source element
        ])

    ref_list[0].extend([
        PBB_Core.WDItemID(value='Q1860', prop_nr='P407', is_reference=True),  # language of work
        # PBB_Core.WDMonolingualText(value=source_element_name, language='en',
        #                            prop_nr='P1476', is_reference=True),
        PBB_Core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z'), prop_nr='P813', is_reference=True)  # publication date
    ])

    return ref_list
Пример #9
0
    def __init__(self, object):
        """
        constructor
        :param wd_do_content: Wikidata item id
        :param do_id: Identifier of the disease in Disease Ontology
        :param label: Primary label of the disease in Disease Ontology
        :param synonyms: All synonyms for the disease captured in the Disease Ontology
        :param xrefs: a dictionary with all external references of the Disease captured in the Disease Ontology
        """
        # Reference section
        doVersionURL = object[1]
        doClass = object[0]
        self.logincreds = object[3]
        self.wd_doMappings = object[2]
        self.start = object[4]

        self.wd_do_content = doClass
        PBB_Debug.prettyPrint(self.wd_do_content)
        self.do_id = self.getDoValue(self.wd_do_content,
                                     './/oboInOwl:id')[0].text

        print(self.do_id)
        self.name = self.getDoValue(self.wd_do_content,
                                    './/rdfs:label')[0].text
        print(self.name)
        classDescription = self.getDoValue(
            self.wd_do_content,
            './/oboInOwl:hasDefinition/oboInOwl:Definition/rdfs:label')
        if len(classDescription) > 0:
            self.description = classDescription[0].text

        if self.do_id in object[2].keys():
            self.wdid = "Q" + str(object[2][self.do_id])
        else:
            self.wdid = None
        if len(self.getDoValue(self.wd_do_content,
                               './/owl:deprecated')) > 0 and self.getDoValue(
                                   self.wd_do_content,
                                   './/owl:deprecated')[0].text == "true":
            self.rank = "deprecated"
        else:
            self.rank = "normal"

        self.synonyms = []
        for synonym in self.getDoValue(self.wd_do_content,
                                       './/oboInOwl:hasExactSynonym'):
            self.synonyms.append(synonym.text)

        self.subclasses = []
        for subclass in self.getDoValue(self.wd_do_content,
                                        './/rdfs:subClassOf'):
            parts = subclass.get(
                '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource').split(
                    "DOID_")
            if len(parts) > 1:
                self.subclasses.append("DOID:" + parts[1])
            if "DOID:4" in self.subclasses:
                self.subclasses.remove("DOID:4")

        self.xrefs = dict()
        for xref in self.getDoValue(self.wd_do_content,
                                    './/oboInOwl:hasDbXref'):
            if not xref.text.split(":")[0] in self.xrefs.keys():
                self.xrefs[xref.text.split(":")[0]] = []
            self.xrefs[xref.text.split(":")[0]].append(xref.text.split(":")[1])

        refStatedIn = PBB_Core.WDUrl(value=doVersionURL,
                                     prop_nr='P1065',
                                     is_reference=True)
        refStatedIn.overwrite_references = True
        refImported = PBB_Core.WDItemID(value=5282129,
                                        prop_nr='P248',
                                        is_reference=True)
        refImported.overwrite_references = True
        timeStringNow = strftime("+%Y-%m-%dT00:00:00Z", gmtime())
        refRetrieved = PBB_Core.WDTime(timeStringNow,
                                       prop_nr='P813',
                                       is_reference=True)
        refRetrieved.overwrite_references = True
        do_reference = [refImported, refRetrieved, refStatedIn]

        prep = dict()
        prep["P279"] = [
            PBB_Core.WDItemID(value='Q12136',
                              prop_nr='P279',
                              references=[copy.deepcopy(do_reference)],
                              rank=self.rank)
        ]
        # Subclass of disease
        for subclass in self.subclasses:
            if subclass in self.wd_doMappings.keys():
                prep["P279"].append(
                    PBB_Core.WDItemID(value=self.wd_doMappings[subclass],
                                      prop_nr='P279',
                                      references=[copy.deepcopy(do_reference)],
                                      rank=self.rank))

        if "Orphanet" in self.xrefs.keys():
            prep["P1550"] = []
            if isinstance(self.xrefs["Orphanet"], list):
                for id in self.xrefs["Orphanet"]:
                    prep["P1550"].append(
                        PBB_Core.WDString(
                            value=self.xrefs["Orphanet"],
                            prop_nr='P1550',
                            references=[copy.deepcopy(do_reference)],
                            rank=self.rank))
            else:
                prep["P1550"] = [
                    PBB_Core.WDString(value=self.xrefs["Orphanet"],
                                      prop_nr='P1550',
                                      references=[copy.deepcopy(do_reference)],
                                      rank=self.rank)
                ]

        #disease Ontology

        prep["P699"] = [
            PBB_Core.WDString(value=self.do_id,
                              prop_nr='P699',
                              references=[do_reference],
                              rank=self.rank)
        ]

        if "url" in self.xrefs.keys():
            if isinstance(self.xrefs["url"], list):
                for i in self.xrefs["url"]:
                    if "//en.wikipedia.org/wiki/" in i:
                        wikilink = self.i.replace("//en.wikipedia.org/wiki/",
                                                  "").replace("_", "")
                    else:
                        wikilink = None
            else:
                if "//en.wikipedia.org/wiki/" in xrefs["url"]:
                    wikilink = xrefs["url"].replace("//en.wikipedia.org/wiki/",
                                                    "").replace("_", "")
                else:
                    wikilink = None
        else:
            wikilink = None

        if "ICD10CM" in self.xrefs.keys():
            prep["P494"] = []
            if isinstance(self.xrefs["ICD10CM"], list):
                for id in self.xrefs["ICD10CM"]:
                    prep["P494"].append(
                        PBB_Core.WDString(
                            value=id,
                            prop_nr='P494',
                            references=[copy.deepcopy(do_reference)],
                            rank=self.rank))
            else:
                prep["P494"] = [
                    PBB_Core.WDString(value=self.xrefs["ICD10CM"],
                                      prop_nr='P494',
                                      references=[copy.deepcopy(do_reference)],
                                      rank=self.rank)
                ]

        if "ICD9CM" in self.xrefs.keys():
            prep["P493"] = []
            if isinstance(self.xrefs["ICD9CM"], list):
                for id in self.xrefs["ICD9CM"]:
                    prep["P493"].append(
                        PBB_Core.WDString(
                            value=id,
                            prop_nr='P493',
                            references=[copy.deepcopy(do_reference)],
                            rank=self.rank))
            else:
                prep["P493"] = [
                    PBB_Core.WDString(value=self.xrefs["ICD9CM"],
                                      prop_nr='P493',
                                      references=[copy.deepcopy(do_reference)],
                                      rank=self.rank)
                ]

        if "MSH" in self.xrefs.keys():
            prep["P486"] = []
            if isinstance(self.xrefs["MSH"], list):
                for id in self.xrefs["MSH"]:
                    prep["P486"].append(
                        PBB_Core.WDString(
                            value=id,
                            prop_nr='P486',
                            references=[copy.deepcopy(do_reference)],
                            rank=self.rank))
            else:
                prep["P486"] = [
                    PBB_Core.WDString(value=self.xrefs["MSH"],
                                      prop_nr='P486',
                                      references=[copy.deepcopy(do_reference)],
                                      rank=self.rank)
                ]

        if "NCI" in self.xrefs.keys():
            prep["P1748"] = []
            if isinstance(self.xrefs["NCI"], list):
                for id in self.xrefs["NCI"]:
                    prep["P1748"].append(
                        PBB_Core.WDString(
                            value=id,
                            prop_nr='P1748',
                            references=[copy.deepcopy(do_reference)],
                            rank=self.rank))
            else:
                prep["P1748"] = [
                    PBB_Core.WDString(value=self.xrefs["NCI"],
                                      prop_nr='P1748',
                                      references=[copy.deepcopy(do_reference)],
                                      rank=self.rank)
                ]

        if "OMIM" in self.xrefs.keys():
            prep["P492"] = []
            if isinstance(self.xrefs["OMIM"], list):
                for id in self.xrefs["OMIM"]:
                    prep["P492"].append(
                        PBB_Core.WDString(
                            value=id,
                            prop_nr='P492',
                            references=[copy.deepcopy(do_reference)],
                            rank=self.rank))
            else:
                prep["P492"] = [
                    PBB_Core.WDString(value=self.xrefs["OMIM"],
                                      prop_nr='P492',
                                      references=[copy.deepcopy(do_reference)],
                                      rank=self.rank)
                ]

        print(self.wdid)
        data2add = []
        for key in prep.keys():
            for statement in prep[key]:
                data2add.append(statement)
                print(statement.prop_nr, statement.value)

        if self.wdid is not None:
            wdPage = PBB_Core.WDItemEngine(self.wdid,
                                           item_name=self.name,
                                           data=data2add,
                                           server="www.wikidata.org",
                                           domain="diseases",
                                           append_value=['P279'])
        else:
            wdPage = PBB_Core.WDItemEngine(item_name=self.name,
                                           data=data2add,
                                           server="www.wikidata.org",
                                           domain="diseases",
                                           append_value=['P279'])

        # wdPage.set_description(description='Human disease', lang='en')
        if wikilink is not None:
            wdPage.set_sitelink(site="enwiki", title=wikilink)
        if self.synonyms is not None:
            wdPage.set_aliases(aliases=self.synonyms, lang='en', append=True)
        self.wd_json_representation = wdPage.get_wd_json_representation()
        PBB_Debug.prettyPrint(self.wd_json_representation)
        wdPage.write(self.logincreds)
        if not os.path.exists('./json_dumps'):
            os.makedirs('./json_dumps')
        f = open('./json_dumps/' + self.do_id.replace(":", "_") + '.json',
                 'w+')
        pprint.pprint(self.wd_json_representation, stream=f)
        f.close()

        PBB_Core.WDItemEngine.log(
            'INFO',
            '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
            .format(main_data_id=self.do_id,
                    exception_type='',
                    message=f.name,
                    wd_id=self.wdid,
                    duration=time.time() - self.start))
Пример #10
0
            for gene in genes:
                for lgene in list_genes:
                    if gene == lgene['symbol']:
                        lgene['operon'] = {'operon': operon, 'strand': strand}
        f.close()
        return list_genes


ops = combine_resources()
#pprint.pprint(ops)
genestot = len(ops)
count = 0
reference = [
    PBB_Core.WDString(value='19448609', prop_nr='P698', is_reference=True),
    PBB_Core.WDTime(str(strftime("+%Y-%m-%dT00:00:00Z", gmtime())),
                    prop_nr='P813',
                    is_reference=True)
]
for ref in reference:
    ref.overwrite_references = True

login = PBB_login.WDLogin(sys.argv[1], sys.argv[2])
for gene in ops:
    statements = []
    if 'locus_tag' in gene.keys():
        item_name = '{}    {}'.format(gene['name'], gene['locus_tag'])

        if 'operon' in gene.keys():
            count += 1
            if count > 640:
                wd_operon = los.listeria_operons[gene['operon']
Пример #11
0
    def __init__(self, object):
        # Populate variables with different values
        self.geneSymbols = object["geneSymbols"]
        self.logincreds = object["logincreds"]
        self.goTerms = object["goTerms"]
        self.version = object["results"]["bindings"][0]["upversion"]["value"]
        self.uniprot = object["results"]["bindings"][0]["uniprot"]["value"]
        self.uniprotId = object["id"]
        self.name = object["results"]["bindings"][0]["plabel"]["value"]
        self.start = object["start"]
        self.entrezWikidataIds = object["entrezWikidataIds"]

        up_in_wd = search_wd(self.name)
        self.wdid = None
        hits = []
        for result in up_in_wd["search"]:
            if result["match"]["text"] == up_in_wd["searchinfo"]["search"]:
                hits.append(result)
                print(result["match"]["text"])
        if len(hits) > 0:
            valid = []
            for hit in hits:
                hitPage = PBB_Core.WDItemEngine(item_name=hit["label"],
                                                wd_item_id=hit["id"],
                                                data=[],
                                                server="www.wikidata.org",
                                                domain="proteins")
                json_rep = hitPage.get_wd_json_representation()
                proteinClaim = False
                geneClaim = False
                speciesClaim = False
                if "P279" in json_rep["claims"].keys():
                    for it in json_rep["claims"]["P279"]:
                        if it["mainsnak"]["datavalue"]["value"][
                                "numeric-id"] == 8054:
                            proteinClaim = True
                            break
                        if it["mainsnak"]["datavalue"]["value"][
                                "numeric-id"] == 7187:
                            geneClaim = True
                            break
                        if it["mainsnak"]["datavalue"]["value"][
                                "numeric-id"] == 407355:
                            proteinClaim = True
                            break
                if "P31" in json_rep["claims"].keys():
                    for it in json_rep["claims"]["P31"]:
                        if it["mainsnak"]["datavalue"]["value"][
                                "numeric-id"] == 8047:
                            proteinClaim = True
                            break
                        if it["mainsnak"]["datavalue"]["value"][
                                "numeric-id"] == 8054:
                            proteinClaim = True
                            break
                if "P703" in json_rep["claims"].keys():
                    for it in json_rep["claims"]["P703"]:
                        if it["mainsnak"]["datavalue"]["value"][
                                "numeric-id"] == 5:
                            speciesClaim = True
                            break

                if len(json_rep["claims"]) == 0:
                    raise Exception(hit["id"] +
                                    " has an indentical label as " +
                                    self.uniprotId + ", but with no claims")
                elif ("P352" in json_rep["claims"].keys()
                      or "P705" in json_rep["claims"].keys() or proteinClaim):
                    valid.append(hit["id"])
                elif geneClaim:
                    self.wdid = None
                else:
                    raise Exception(hit["id"] + " has an identical label as " +
                                    self.uniprotId +
                                    " but with no valid protein claims")
            if len(valid) == 1:
                self.wdid = valid[0]
            elif len(valid) > 1:
                raise Exception(
                    self.uniprotId +
                    " There are multiple valid Wikidata items that might be applicable. "
                    + str(valid))

        if "gene_id" in object["results"]["bindings"][0].keys():
            self.gene_id = []
            for geneId in object["results"]["bindings"][0]["gene_id"][
                    "value"].split(";"):
                if geneId != "":
                    self.gene_id.append(geneId)

        if "ecName" in object["results"]["bindings"][0].keys():
            self.ecname = []
            self.ecname.append(
                object["results"]["bindings"][0]["ecName"]["value"])
        self.alias = []
        for syn in object["results"]["bindings"][0]["upalias"]["value"].split(
                ";"):
            if syn != "":
                self.alias.append(syn)
        if "pdbid" in object["results"]["bindings"][0].keys(
        ) and object["results"]["bindings"][0]["pdbid"]["value"] != "":
            self.pdb = []
            for pdbId in object["results"]["bindings"][0]["pdbid"][
                    "value"].split(";"):
                self.pdb.append(
                    pdbId.replace("http://rdf.wwpdb.org/pdb/",
                                  "").replace(" ", ""))
        if "refseqid" in object["results"]["bindings"][0].keys():
            self.refseq = []
            for refseqId in object["results"]["bindings"][0]["refseqid"][
                    "value"].split(";"):
                self.refseq.append(
                    refseqId.replace("http://purl.uniprot.org/refseq/",
                                     "").replace(" ", ""))
        if "ensemblp" in object["results"]["bindings"][0].keys(
        ) and object["results"]["bindings"][0]["ensemblp"]["value"] != "":
            self.ensemblp = []
            for ensP in object["results"]["bindings"][0]["ensemblp"][
                    "value"].split(";"):
                self.ensemblp.append(
                    ensP.replace("http://purl.uniprot.org/ensembl/",
                                 "").replace(" ", ""))

        # Prepare references
        refStatedIn = PBB_Core.WDItemID(value=2629752,
                                        prop_nr='P248',
                                        is_reference=True)
        refStatedIn.overwrite_references = True
        refURL = "http://www.uniprot.org/uniprot/" + self.uniprotId + ".txt?version=" + str(
            self.version)
        refReferenceURL = PBB_Core.WDUrl(value=refURL,
                                         prop_nr='P854',
                                         is_reference=True)
        refReferenceURL.overwrite_references = True
        refImported = PBB_Core.WDItemID(value=905695,
                                        prop_nr='P143',
                                        is_reference=True)
        refImported.overwrite_references = True
        timeStringNow = strftime("+%Y-%m-%dT00:00:00Z", gmtime())
        refRetrieved = PBB_Core.WDTime(timeStringNow,
                                       prop_nr='P813',
                                       is_reference=True)
        refRetrieved.overwrite_references = True
        protein_reference = [[
            refStatedIn, refImported, refRetrieved, refReferenceURL
        ]]

        references = dict()
        proteinPrep = dict()
        genePrep = dict()

        # P279 = subclass of
        proteinPrep['P279'] = [
            PBB_Core.WDItemID(value="Q8054",
                              prop_nr='P279',
                              references=protein_reference)
        ]

        # P703 = found in taxon
        proteinPrep['P703'] = [
            PBB_Core.WDItemID(value="Q5",
                              prop_nr='P703',
                              references=protein_reference)
        ]

        # P352 = UniprotID
        proteinPrep['P352'] = [
            PBB_Core.WDString(value=self.uniprotId,
                              prop_nr='P352',
                              references=protein_reference)
        ]

        # P591 = ec number
        if "ecname" in vars(self):
            proteinPrep['P591'] = []
            for i in range(len(self.ecname)):
                proteinPrep['P591'].append(
                    PBB_Core.WDString(value=self.ecname[i],
                                      prop_nr='P591',
                                      references=protein_reference))

        # P638 = PDBID
        if "pdb" in vars(self) and len(self.pdb) > 0:
            proteinPrep['P638'] = []
            for i in range(len(self.pdb)):
                proteinPrep['P638'].append(
                    PBB_Core.WDString(value=self.pdb[i],
                                      prop_nr='P638',
                                      references=protein_reference))

        # P637 = Refseq Protein ID
        if "refseq" in vars(self) and len(self.refseq) > 0:
            proteinPrep['P637'] = []
            for i in range(len(self.refseq)):
                proteinPrep['P637'].append(
                    PBB_Core.WDString(value=self.refseq[i],
                                      prop_nr='P637',
                                      references=protein_reference))

        # P705 = Ensembl Protein ID
        if "ensemblp" in vars(self) and len(self.ensemblp) > 0:
            proteinPrep['P705'] = []
            for i in range(len(self.ensemblp)):
                proteinPrep['P705'].append(
                    PBB_Core.WDString(value=self.ensemblp[i],
                                      prop_nr='P705',
                                      references=protein_reference))
        """
        # P686 = Gene Ontology ID
        proteinPrep["P680"] = []
        proteinPrep["P681"] = []
        proteinPrep["P682"] = []

        for result in self.goTerms["results"]["bindings"]:

            statement = [
                    PBB_Core.WDString(value=result["go"]["value"].replace("http://purl.obolibrary.org/obo/GO_", "GO:"),
                                      prop_nr='P686', references=protein_reference)]
            goWdPage = PBB_Core.WDItemEngine(item_name=result["goLabel"]["value"], data=statement,
                                                 server="www.wikidata.org", domain="proteins")
            if goWdPage.get_description() == "":
                goWdPage.set_description("Gene Ontology term")
            js = goWdPage.get_wd_json_representation()
            goWdId = goWdPage.write(self.logincreds)

            if result["parentLabel"]["value"] == "molecular_function":
                exists = False
                for i in range(len(proteinPrep["P680"])):
                    if proteinPrep["P680"][i].value == goWdId:
                        exists = True
                if not exists:
                    proteinPrep["P680"].append(
                        PBB_Core.WDItemID(value=goWdId, prop_nr='P680', references=protein_reference))
            if result["parentLabel"]["value"] == "cellular_component":
                exists = False
                for i in range(len(proteinPrep["P681"])):
                    if proteinPrep["P681"][i].value == goWdId:
                        exists = True
                if not exists:
                    proteinPrep["P681"].append(
                        PBB_Core.WDItemID(value=goWdId, prop_nr='P681', references=protein_reference))
            if result["parentLabel"]["value"] == "biological_process":
                exists = False
                for i in range(len(proteinPrep["P682"])):
                    if proteinPrep["P682"][i].value == goWdId:
                        exists = True
                if not exists:
                    proteinPrep["P682"].append(
                        PBB_Core.WDItemID(value=goWdId, prop_nr='P682', references=protein_reference))
        """

        # P702 = Encoded by
        if "gene_id" in vars(self) and len(self.gene_id) > 0:
            proteinPrep['P702'] = []
            proteinPrep['P702'].append(
                PBB_Core.WDItemID(
                    value=self.entrezWikidataIds[self.gene_id[0].replace(
                        "http://purl.uniprot.org/geneid/",
                        "").replace(" ", "")],
                    prop_nr='P702',
                    references=protein_reference))

        proteinData2Add = []
        for key in proteinPrep.keys():
            for statement in proteinPrep[key]:
                proteinData2Add.append(statement)
                print(statement.prop_nr, statement.value)
        if self.wdid is None:
            wdProteinpage = PBB_Core.WDItemEngine(item_name=self.name,
                                                  data=proteinData2Add,
                                                  server="www.wikidata.org",
                                                  domain="proteins",
                                                  append_value=['P279'])
        else:
            wdProteinpage = PBB_Core.WDItemEngine(wd_item_id=self.wdid,
                                                  item_name=self.name,
                                                  data=proteinData2Add,
                                                  server="www.wikidata.org",
                                                  domain="proteins",
                                                  append_value=['P279'])

        if len(self.alias) > 0:
            wdProteinpage.set_aliases(aliases=self.alias,
                                      lang='en',
                                      append=True)
        if wdProteinpage.get_description() == "":
            wdProteinpage.set_description(description='human protein',
                                          lang='en')
        if wdProteinpage.get_description(lang="de") == "":
            wdProteinpage.set_description(description='humanes Protein',
                                          lang='de')
        if wdProteinpage.get_description(lang="nl") == "":
            wdProteinpage.set_description(description='menselijk eiwit',
                                          lang='nl')
        if wdProteinpage.get_description(
                lang="fr") == "" or wdProteinpage.get_description(
                    lang="fr") == "protéine":
            wdProteinpage.set_description(description='protéine humaine',
                                          lang='fr')

        self.wd_json_representation = wdProteinpage.get_wd_json_representation(
        )
        PBB_Debug.prettyPrint(self.wd_json_representation)
        wdProteinpage.write(self.logincreds)
        print(wdProteinpage.wd_item_id)
        if not os.path.exists('./json_dumps'):
            os.makedirs('./json_dumps')
        f = open('./json_dumps/' + self.uniprotId + '.json', 'w+')
        pprint.pprint(self.wd_json_representation, stream=f)
        f.close()
        PBB_Core.WDItemEngine.log(
            'INFO',
            '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
            .format(main_data_id=self.uniprotId,
                    exception_type='',
                    message=f.name,
                    wd_id=self.wdid,
                    duration=time.time() - self.start))
        print("===============")
Пример #12
0
                        str(gnsym_gemma_ids[values["Gene Symbol"]]),
                        prop_nr='P854',
                        is_reference=True)
                    refURL2 = PBB_Core.WDUrl(value=values["Web Link"],
                                             prop_nr='P854',
                                             is_reference=True)
                    refImported = PBB_Core.WDItemID(value='Q22330995',
                                                    prop_nr='P143',
                                                    is_reference=True)
                    refImported.overwrite_references = True
                    refStated = PBB_Core.WDItemID(value='Q22978334',
                                                  prop_nr='P248',
                                                  is_reference=True)
                    timeStringNow = strftime("+%Y-%m-%dT00:00:00Z", gmtime())
                    refRetrieved = PBB_Core.WDTime(timeStringNow,
                                                   prop_nr='P813',
                                                   is_reference=True)
                    refRetrieved.overwrite_references = True
                    gnasscn_reference = [[
                        refURL, refURL2, refStated, refImported, refRetrieved
                    ]]
                    value = PBB_Core.WDItemID(value=disease_wdid,
                                              prop_nr="P2293",
                                              references=gnasscn_reference)

                    # Get a pointer to the Wikidata page on the gene under scrutiny
                    wd_gene_page = PBB_Core.WDItemEngine(
                        wd_item_id=values["gene_wdid"],
                        data=[value],
                        server="www.wikidata.org",
                        domain="genes")