Пример #1
0
    def create_reference(self):
        """
        Create wikidata references for interpro

        Items:
        Q3047275: InterPro

        Properties:
        stated in (P248)
        imported from (P143)
        software version (P348)
        publication date (P577)

        """
        # This same reference will be used for everything. Except for a ref to the interpro item itself
        ref_stated_in = PBB_Core.WDItemID("Q3047275",
                                          'P248',
                                          is_reference=True)
        ref_imported = PBB_Core.WDItemID("Q3047275", 'P143', is_reference=True)
        ref_version = PBB_Core.WDString(self.version,
                                        'P348',
                                        is_reference=True)
        ref_date = PBB_Core.WDTime(self.date.strftime("+%Y-%m-%dT00:00:00Z"),
                                   'P577',
                                   is_reference=True)
        ref_ipr = PBB_Core.WDString(self.id, "P2926", is_reference=True)
        self.reference = [
            ref_stated_in, ref_imported, ref_version, ref_date, ref_ipr
        ]
        for ref in self.reference:
            ref.overwrite_references = True
Пример #2
0
def make_ref_source(source_doc, id_prop, identifier, login=None):
    """
    Reference is made up of:
    stated_in: if the source has a release #:
        release edition
        else, stated in the source
    link to id: link to identifier in source
    retrieved: only if source has no release #
    login: must be passed if you want to be able to create new release items

    :param source_doc:
    :param id_prop:
    :param identifier:
    :return:
    """
    # source_doc = {'_id': 'uniprot', 'timestamp': '20161006'}
    # source_doc = {'_id': 'ensembl', 'release': 86, 'timestamp': '20161005'}
    source = source_doc['_id']
    if source not in source_items:
        raise ValueError(
            "Unknown source for reference creation: {}".format(source))
    if id_prop not in prop_ids:
        raise ValueError(
            "Unknown id_prop for reference creation: {}".format(id_prop))

    link_to_id = PBB_Core.WDString(value=str(identifier),
                                   prop_nr=prop_ids[id_prop],
                                   is_reference=True)

    if "release" in source_doc:
        source_doc['release'] = str(source_doc['release'])
        title = "{} Release {}".format(source_doc['_id'],
                                       source_doc['release'])
        description = "Release {} of {}".format(source_doc['release'],
                                                source_doc['_id'])
        edition_of_wdid = source_items[source_doc['_id']]
        release = PBB_Helpers.Release(
            title,
            description,
            source_doc['release'],
            edition_of_wdid=edition_of_wdid).get_or_create(login)

        stated_in = PBB_Core.WDItemID(value=release,
                                      prop_nr='P248',
                                      is_reference=True)
        reference = [stated_in, link_to_id]
    else:
        date_string = source_doc['timestamp']
        retrieved = datetime.strptime(date_string, "%Y%m%d")
        stated_in = PBB_Core.WDItemID(value=source_items[source],
                                      prop_nr='P248',
                                      is_reference=True)
        retrieved = PBB_Core.WDTime(retrieved.strftime('+%Y-%m-%dT00:00:00Z'),
                                    prop_nr='P813',
                                    is_reference=True)
        reference = [stated_in, retrieved, link_to_id]
    return reference
Пример #3
0
 def create_reference(self):
     """ Create wikidata references for interpro
     This same reference will be used for everything. Except for a ref to the interpro item itself
     """
     # stated in Interpro version XX.X
     ref_stated_in = PBB_Core.WDItemID(self.release_wdid,
                                       'P248',
                                       is_reference=True)
     ref_ipr = PBB_Core.WDString(self.id, INTERPRO,
                                 is_reference=True)  # interpro ID
     self.reference = [ref_stated_in, ref_ipr]
Пример #4
0
def make_ref(retrieved, genome_id):
    refs = [
        PBB_Core.WDItemID(value='Q20641742', prop_nr='P248',
                          is_reference=True),  # stated in ncbi gene
        PBB_Core.WDString(value=genome_id, prop_nr='P2249',
                          is_reference=True),  # Link to Refseq Genome ID
        PBB_Core.WDTime(retrieved.strftime('+%Y-%m-%dT00:00:00Z'),
                        prop_nr='P813',
                        is_reference=True)
    ]
    return refs
Пример #5
0
def make_reference(source, id_prop, identifier, retrieved):
    reference = [
        PBB_Core.WDItemID(value=source_items[source],
                          prop_nr='P248',
                          is_reference=True),  # stated in
        PBB_Core.WDString(value=str(identifier),
                          prop_nr=prop_ids[id_prop],
                          is_reference=True),  # Link to ID
        PBB_Core.WDTime(retrieved.strftime('+%Y-%m-%dT00:00:00Z'),
                        prop_nr='P813',
                        is_reference=True)
    ]
    return reference
Пример #6
0
def create_uniprot_relationships(login, release_wdid, collection, taxon=None):
    # only do uniprot proteins that are already in wikidata
    if taxon:
        uniprot2wd = PBB_Helpers.id_mapper(UNIPROT, (("P703", taxon),))
        fast_run_base_filter = {UNIPROT: "", "P703": taxon}
    else:
        uniprot2wd = PBB_Helpers.id_mapper(UNIPROT)
        fast_run_base_filter = {UNIPROT: ""}

    cursor = collection.find({'_id': {'$in': list(uniprot2wd.keys())}}, no_cursor_timeout=True)
    for doc in tqdm(cursor, total=cursor.count()):
        uniprot_id = doc['_id']
        statements = []
        # uniprot ID. needed for PBB_core to find uniprot item
        # statements.append(PBB_Core.WDExternalID(value=uniprot_id, prop_nr=UNIPROT))

        ## References
        # stated in Interpro version XX.X
        ref_stated_in = PBB_Core.WDItemID(release_wdid, 'P248', is_reference=True)
        ref_ipr = PBB_Core.WDString("http://www.ebi.ac.uk/interpro/protein/{}".format(uniprot_id), "P854",
                                    is_reference=True)
        reference = [ref_stated_in, ref_ipr]

        if doc['subclass']:
            for f in doc['subclass']:
                statements.append(PBB_Core.WDItemID(value=IPRTerm.ipr2wd[f], prop_nr='P279', references=[reference]))
        if doc['has_part']:
            for hp in doc['has_part']:
                statements.append(PBB_Core.WDItemID(value=IPRTerm.ipr2wd[hp], prop_nr='P527', references=[reference]))

        if uniprot_id not in uniprot2wd:
            print("wdid_not_found " + uniprot_id + " " + uniprot2wd[uniprot_id])
            PBB_Core.WDItemEngine.log("ERROR", PBB_Helpers.format_msg(uniprot_id, UNIPROT, None, "wdid_not_found"))

        wd_item = PBB_Core.WDItemEngine(wd_item_id=uniprot2wd[uniprot_id], domain="proteins", data=statements,
                                        fast_run=True, fast_run_base_filter=fast_run_base_filter,
                                        append_value=["P279", "P527", "P361"])

        if wd_item.create_new_item:
            raise ValueError("something bad happened")
        PBB_Helpers.try_write(wd_item, uniprot_id, INTERPRO, login, edit_summary="add/update family and/or domains")

    cursor.close()
Пример #7
0
    def gene_item_statements():
        """
        construct list of referenced statements to past to PBB_Core Item engine
        """
        s = []

        ############
        # external IDs
        ############
        # will be used for reference statements
        external_ids = {
            'entrez_gene': str(record['entrezgene']['@value']),
            'ensembl_gene': record['ensembl']['@value']['gene'],
            'locus_tag': record['locus_tag']['@value']
        }

        # entrez gene id
        entrez_ref = make_ref_source(record['entrezgene']['@source'],
                                     'entrez_gene',
                                     external_ids['entrez_gene'])
        s.append(
            PBB_Core.WDString(external_ids['entrez_gene'],
                              PROPS['Entrez Gene ID'],
                              references=[entrez_ref]))

        # ensembl gene id
        ensembl_ref = make_ref_source(record['ensembl']['@source'],
                                      'ensembl_gene',
                                      external_ids['ensembl_gene'])
        s.append(
            PBB_Core.WDString(external_ids['ensembl_gene'],
                              PROPS['Ensembl Gene ID'],
                              references=[ensembl_ref]))

        # ncbi locus tag
        s.append(
            PBB_Core.WDString(external_ids['locus_tag'],
                              PROPS['NCBI Locus tag'],
                              references=[entrez_ref]))

        ############
        # statements with no referencable sources (make by hand, for now...)
        ############
        # subclass of gene
        s.append(
            PBB_Core.WDItemID('Q7187',
                              PROPS['subclass of'],
                              references=[ensembl_ref]))

        # found in taxon
        s.append(
            PBB_Core.WDItemID(strain_info['organism_wdid'],
                              PROPS['found in taxon'],
                              references=[ensembl_ref]))

        ############
        # genomic position: start, end, strand orientation, chromosome
        ############
        genomic_pos_value = record['genomic_pos']['@value']
        genomic_pos_source = record['genomic_pos']['@source']
        genomic_pos_id_prop = source_ref_id[genomic_pos_source['_id']]
        genomic_pos_ref = make_ref_source(genomic_pos_source,
                                          genomic_pos_id_prop,
                                          external_ids[genomic_pos_id_prop])

        # create chromosome qualifier
        chrom_genomeid = strain_info['chrom_genomeid_map'][
            genomic_pos_value['chr']]
        rs_chrom = PBB_Core.WDString(chrom_genomeid,
                                     'P2249',
                                     is_qualifier=True)  # Refseq Genome ID

        # strand orientation
        strand_orientation = 'Q22809680' if genomic_pos_value[
            'strand'] == 1 else 'Q22809711'
        s.append(
            PBB_Core.WDItemID(strand_orientation,
                              PROPS['strand orientation'],
                              references=[genomic_pos_ref]))
        # genomic start and end
        s.append(
            PBB_Core.WDString(str(int(genomic_pos_value['start'])),
                              PROPS['genomic start'],
                              references=[genomic_pos_ref],
                              qualifiers=[rs_chrom]))
        s.append(
            PBB_Core.WDString(str(int(genomic_pos_value['end'])),
                              PROPS['genomic end'],
                              references=[genomic_pos_ref],
                              qualifiers=[rs_chrom]))
        # chromosome
        chr_genomic_id = strain_info['chrom_genomeid_map'][
            genomic_pos_value['chr']]
        s.append(
            PBB_Core.WDItemID(chrom_wdid[chr_genomic_id],
                              PROPS['chromosome'],
                              references=[genomic_pos_ref]))

        return s
Пример #8
0
def protein_item(record, strain_info, gene_qid, go_wdid_mapping, login,
                 add_pubmed):
    """
    generate pbb_core item object
    """

    item_name = '{} {}'.format(record['name']['@value'],
                               record['ensembl']['@value']['protein'])
    item_description = '{} protein found in {}'.format(
        strain_info['organism_type'], strain_info['organism_name'])

    s = []

    ############
    # external IDs
    ############
    # will be used for reference statements
    external_ids = {
        'entrez_gene': str(record['entrezgene']['@value']),
        'ensembl_protein': record['ensembl']['@value']['protein'],
        'ensembl_gene': record['ensembl']['@value']['gene'],
        'refseq_protein': record['refseq']['@value']['protein'],
        'uniprot': record['uniprot']['@value']['Swiss-Prot']
    }

    # ensembl protein id
    ensembl_ref = make_ref_source(record['ensembl']['@source'],
                                  'ensembl_protein',
                                  external_ids['ensembl_protein'])
    s.append(
        PBB_Core.WDString(external_ids['ensembl_protein'],
                          'P705',
                          references=[ensembl_ref]))
    # refseq protein id
    refseq_ref = make_ref_source(record['refseq']['@source'], 'refseq_protein',
                                 external_ids['refseq_protein'])
    s.append(
        PBB_Core.WDString(external_ids['refseq_protein'],
                          'P637',
                          references=[refseq_ref]))
    # uniprot id
    uniprot_ref = make_ref_source(record['uniprot']['@source'], 'uniprot',
                                  external_ids['uniprot'])
    s.append(
        PBB_Core.WDString(external_ids['uniprot'],
                          'P352',
                          references=[uniprot_ref]))

    ############
    # GO terms
    # TODO: https://www.wikidata.org/wiki/Q3460832
    ############

    preprocess_go(record)
    print(record)
    go_source = record['go']['@source']
    go_id_prop = source_ref_id[go_source['_id']]
    reference = make_ref_source(go_source, go_id_prop,
                                external_ids[go_id_prop])
    for go_level, go_records in record['go']['@value'].items():
        level_wdid = go_props[go_level]
        for go_record in go_records:
            go_wdid = go_wdid_mapping[go_record['id']]
            evidence_wdid = go_evidence_codes[go_record['evidence']]
            evidence_statement = PBB_Core.WDItemID(value=evidence_wdid,
                                                   prop_nr='P459',
                                                   is_qualifier=True)
            this_reference = copy.deepcopy(reference)
            if add_pubmed:
                for pubmed in go_record['pubmed']:
                    pmid_wdid = PBB_Helpers.PubmedStub(pubmed).create(login)
                    this_reference.append(
                        PBB_Core.WDItemID(pmid_wdid, 'P248',
                                          is_reference=True))
            s.append(
                PBB_Core.WDItemID(go_wdid,
                                  level_wdid,
                                  references=[this_reference],
                                  qualifiers=[evidence_statement]))

    ############
    # statements with no referencable sources (make by hand, for now...)
    ############
    # subclass of protein
    s.append(PBB_Core.WDItemID('Q8054', 'P279', references=[ensembl_ref]))

    # found in taxon
    s.append(
        PBB_Core.WDItemID(strain_info['organism_wdid'],
                          'P703',
                          references=[ensembl_ref]))

    # encodes gene
    s.append(PBB_Core.WDItemID(gene_qid, 'P702', references=[ensembl_ref]))

    try:
        wd_item_protein = PBB_Core.WDItemEngine(
            item_name=item_name,
            domain='proteins',
            data=s,
            append_value=['P279'],
            fast_run=True,
            fast_run_base_filter={
                'P352': '',
                'P703': strain_info['organism_wdid']
            })
        wd_item_protein.set_label(item_name)
        wd_item_protein.set_description(item_description, lang='en')
        wd_item_protein.set_aliases(
            [record['symbol']['@value'], record['locus_tag']['@value']])
    except Exception as e:
        print(e)
        PBB_Core.WDItemEngine.log(
            "ERROR",
            format_msg(record['entrezgene']['@value'], str(e), None,
                       ENTREZ_PROP))
        return

    try_write(wd_item_protein, record['entrezgene']['@value'], 'P351', login)
Пример #9
0
def create_protein_ipr(uniprot_id, uniprot_wdid, families, has_part,
                       release_info, login):
    """
    Create interpro relationships to one protein
    :param uniprot_id: uniprot ID of the protein to modify
    :type uniprot_id: str
    :param uniprot_wdid: wikidata ID of the protein
    :param families: list of ipr wd ids the protein is a (P279) subclass of
    :param has_part: list of ipr wd ids the protein has (P527) has part
    :return:
    """
    date = release_info['date']
    version = release_info['version']

    # create ref
    ref_stated_in = PBB_Core.WDItemID("Q3047275", 'P248', is_reference=True)
    ref_imported = PBB_Core.WDItemID("Q3047275", 'P143', is_reference=True)
    ref_version = PBB_Core.WDString(version, 'P348', is_reference=True)
    ref_date = PBB_Core.WDTime(date.strftime("+%Y-%m-%dT00:00:00Z"),
                               'P577',
                               is_reference=True)
    ref_ipr = PBB_Core.WDString(
        "http://www.ebi.ac.uk/interpro/protein/{}".format(uniprot_id),
        "P854",
        is_reference=True)
    reference = [ref_stated_in, ref_imported, ref_version, ref_date, ref_ipr]
    for ref in reference:
        ref.overwrite_references = True

    statements = []
    if families:
        for f in families:
            statements.append(
                PBB_Core.WDItemID(value=f,
                                  prop_nr='P279',
                                  references=[reference]))
    if has_part:
        for hp in has_part:
            statements.append(
                PBB_Core.WDItemID(value=hp,
                                  prop_nr='P527',
                                  references=[reference]))

    item = PBB_Core.WDItemEngine(wd_item_id=uniprot_wdid,
                                 data=statements,
                                 server=SERVER,
                                 append_value=["P279", "P527", "P361"])
    # print(item.get_wd_json_representation())
    try:
        item.write(login)
    except WDApiError as e:
        print(e)
        PBB_Core.WDItemEngine.log(
            'ERROR',
            '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
            .format(main_data_id=uniprot_id,
                    exception_type=type(e),
                    message=e.__str__(),
                    wd_id=uniprot_wdid,
                    duration=datetime.now()))
        return

    PBB_Core.WDItemEngine.log(
        'INFO',
        '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'.
        format(main_data_id=uniprot_id,
               exception_type='',
               message='created protein interpro relationships: {}'.format([
                   (x.prop_nr, x.value) for x in statements
               ]),
               wd_id=uniprot_wdid,
               duration=datetime.now()))
Пример #10
0
def make_chroms(strain_info, retrieved, login):
    chrom_wdid = {}
    for chrom_num, genome_id in strain_info['chrom_genomeid_map'].items():

        item_name = '{} chromosome {}'.format(strain_info['organism_name'],
                                              chrom_num)
        item_description = '{} chromosome'.format(strain_info['organism_type'])
        print(item_name)
        print(genome_id)

        reference = make_ref(retrieved, genome_id)
        statements = []
        statements.append(
            PBB_Core.WDItemID(value='Q37748',
                              prop_nr='P279',
                              references=[reference
                                          ]))  # subclass of chromosome
        statements.append(
            PBB_Core.WDItemID(value=strain_info['organism_wdid'],
                              prop_nr='P703',
                              references=[reference]))  # found in taxon
        statements.append(
            PBB_Core.WDString(value=genome_id,
                              prop_nr='P2249',
                              references=[reference]))  # genome id

        wd_item = PBB_Core.WDItemEngine(item_name=item_name,
                                        domain='chromosome',
                                        data=statements,
                                        append_value=['P279'],
                                        fast_run=True,
                                        fast_run_base_filter={
                                            'P703':
                                            strain_info['organism_wdid'],
                                            'P2249': ''
                                        })

        if wd_item.require_write:
            print("require write")
            wd_item.set_label(item_name)
            wd_item.set_description(item_description, lang='en')
            try:
                msg = "CREATE" if wd_item.create_new_item else "UPDATE"
                wd_item.write(login=login)
                PBB_Core.WDItemEngine.log(
                    "INFO",
                    format_msg(genome_id,
                               msg,
                               wd_item.wd_item_id,
                               external_id_prop='P2249'))
            except Exception as e:
                print(e)
                PBB_Core.WDItemEngine.log(
                    "ERROR",
                    format_msg(genome_id,
                               str(e),
                               wd_item.wd_item_id,
                               external_id_prop='P2249'))
        else:
            chrom_wdid[chrom_num] = wd_item.wd_item_id
            PBB_Core.WDItemEngine.log(
                "INFO",
                format_msg(genome_id,
                           "SKIP",
                           wd_item.wd_item_id,
                           external_id_prop='P2249'))

    return chrom_wdid
Пример #11
0
    def __init__(self, object):
        """

        :type self: object
        """
        self.start = object["start"]
        self.entrezgene = object["entrezgene"]
        self.uniprotwikidataids = object["uniprotwikidataids"]
        gene_annotations = self.annotate_gene()
        self.genomeInfo = object["speciesInfo"][str(gene_annotations['taxid'])]
        self.content = object
        self.name = gene_annotations["name"]
        self.logincreds = object["logincreds"]
        if "_timestamp" in gene_annotations.keys():
            self.annotationstimestamp = gene_annotations["_timestamp"]
        self.wdid = object["wdid"]

        # symbol:
        self.symbol = gene_annotations["symbol"]
        print(self.symbol)
        # HGNC
        if "HGNC" in gene_annotations:
            if isinstance(gene_annotations["HGNC"], list):
                self.hgnc = gene_annotations["HGNC"]
            else:
                self.hgnc = [gene_annotations["HGNC"]]
        else:
            self.hgnc = None

        # Ensembl Gene & transcript
        if "ensembl" in gene_annotations:
            if "gene" in gene_annotations["ensembl"]:
                if isinstance(gene_annotations["ensembl"]["gene"], list):
                    self.ensembl_gene = gene_annotations["ensembl"]["gene"]
                else:
                    self.ensembl_gene = [gene_annotations["ensembl"]["gene"]]
            else:
                self.ensembl_gene = None

            if "transcript" in gene_annotations["ensembl"]:
                if isinstance(gene_annotations["ensembl"]["transcript"], list):
                    self.ensembl_transcript = gene_annotations["ensembl"]["transcript"]
                else:
                    self.ensembl_transcript = [gene_annotations["ensembl"]["transcript"]]
            else:
                self.ensembl_transcript = None
        # Homologene
        if "homologene" in gene_annotations:
            if isinstance(gene_annotations["homologene"]["id"], list):
                self.homologene = [str(i) for i in gene_annotations["homologene"]["id"]]
            else:
                self.homologene = [str(gene_annotations["homologene"]["id"])]
        else:
            self.homologene = None
        # Refseq 
        if "refseq" in gene_annotations:
            if "rna" in gene_annotations["refseq"]:
                if isinstance(gene_annotations["refseq"]["rna"], list):
                    self.refseq_rna = gene_annotations["refseq"]["rna"]
                else:
                    self.refseq_rna = [gene_annotations["refseq"]["rna"]]
            else:
                self.refseq_rna = None
        else:
            self.refseq_rna = None

            # MGI
        if "MGI" in gene_annotations:
            if isinstance(gene_annotations["MGI"], list):
                self.MGI = gene_annotations["MGI"]
            else:
                self.MGI = [gene_annotations["MGI"]]
        else:
            self.MGI = None

        self.chromosome = None
        self.startpost = None
        self.endpos = None
        if "genomic_pos" in gene_annotations:
            if isinstance(gene_annotations["genomic_pos"], list):
                self.chromosome = []
                self.startpos = []
                self.endpos = []
                for i in range(len(gene_annotations["genomic_pos"])):
                    if gene_annotations["genomic_pos"][i]["chr"] in ProteinBoxBotKnowledge.chromosomes[
                        self.genomeInfo["name"]].keys():
                        self.chromosome.append(ProteinBoxBotKnowledge.chromosomes[self.genomeInfo["name"]][
                                                   gene_annotations["genomic_pos"][i]["chr"]])
                        self.startpos.append(gene_annotations["genomic_pos"][i]["start"])
                        self.endpos.append(gene_annotations["genomic_pos"][i]["end"])
            else:
                self.chromosome = []
                self.startpos = []
                self.endpos = []
                if gene_annotations["genomic_pos"]["chr"] in ProteinBoxBotKnowledge.chromosomes[
                    self.genomeInfo["name"]].keys():
                    self.chromosome.append(ProteinBoxBotKnowledge.chromosomes[self.genomeInfo["name"]][
                                               gene_annotations["genomic_pos"]["chr"]])
                    self.startpos.append(gene_annotations["genomic_pos"]["start"])
                    self.endpos.append(gene_annotations["genomic_pos"]["end"])

        self.encodes = None
        if "uniprot" in gene_annotations.keys():
            if "Swiss-Prot" in gene_annotations["uniprot"].keys():
                if isinstance(gene_annotations["uniprot"]["Swiss-Prot"], list):
                    self.encodes = []
                    for uniprot in gene_annotations["uniprot"]["Swiss-Prot"]:
                        self.encodes.append(uniprot)
                else:
                    self.encodes = [gene_annotations["uniprot"]["Swiss-Prot"]]


        self.chromosomeHg19 = None
        self.startposHg19 = None
        self.endposHg19 = None
        if "genomic_pos_hg19" in gene_annotations:
            if isinstance(gene_annotations["genomic_pos_hg19"], list):
                self.chromosomeHg19 = []
                self.startposHg19 = []
                self.endposHg19 = []
                for i in range(len(gene_annotations["genomic_pos_hg19"])):
                    if gene_annotations["genomic_pos_hg19"][i]["chr"] in ProteinBoxBotKnowledge.chromosomes[
                        self.genomeInfo["name"]].keys():
                        self.chromosomeHg19.append(ProteinBoxBotKnowledge.chromosomes[self.genomeInfo["name"]][
                                                       gene_annotations["genomic_pos_hg19"][i]["chr"]])
                        self.startposHg19.append(gene_annotations["genomic_pos_hg19"][i]["start"])
                        self.endposHg19.append(gene_annotations["genomic_pos_hg19"][i]["end"])
            else:
                self.chromosomeHg19 = []
                self.startposHg19 = []
                self.endposHg19 = []
                if gene_annotations["genomic_pos_hg19"]["chr"] in ProteinBoxBotKnowledge.chromosomes[
                    self.genomeInfo["name"]].keys():
                    self.chromosomeHg19.append(ProteinBoxBotKnowledge.chromosomes[self.genomeInfo["name"]][
                                                   gene_annotations["genomic_pos_hg19"]["chr"]])
                    self.startposHg19.append(gene_annotations["genomic_pos_hg19"]["start"])
                    self.endposHg19.append(gene_annotations["genomic_pos_hg19"]["end"])

        # type of Gene
        if "type_of_gene" in gene_annotations:
            self.type_of_gene = []
            if gene_annotations["type_of_gene"] == "ncRNA":
                self.type_of_gene.append("Q427087")
            if gene_annotations["type_of_gene"] == "snRNA":
                self.type_of_gene.append("Q284578")
            if gene_annotations["type_of_gene"] == "snoRNA":
                self.type_of_gene.append("Q284416")
            if gene_annotations["type_of_gene"] == "rRNA":
                self.type_of_gene.append("Q215980")
            if gene_annotations["type_of_gene"] == "tRNA":
                self.type_of_gene.append("Q201448")
            if gene_annotations["type_of_gene"] == "pseudo":
                self.type_of_gene.append("Q277338")
            if gene_annotations["type_of_gene"] == "protein-coding":
                self.type_of_gene.append("Q20747295")
        else:
            self.type_of_gene = None
        # Reference section  
        # Prepare references
        refStatedIn = PBB_Core.WDItemID(value=self.genomeInfo["release"], prop_nr='P248', is_reference=True)
        refStatedIn.overwrite_references = True
        refImported = PBB_Core.WDItemID(value='Q20641742', prop_nr='P143', is_reference=True)
        refImported.overwrite_references = True
        timeStringNow = strftime("+%Y-%m-%dT00:00:00Z", gmtime())
        refRetrieved = PBB_Core.WDTime(timeStringNow, prop_nr='P813', is_reference=True)
        refRetrieved.overwrite_references = True
        gene_reference = [refStatedIn, refImported, refRetrieved]

        refStatedInEnsembl = PBB_Core.WDItemID(value= 'Q21996330', prop_nr='P248', is_reference=True)
        refStatedInEnsembl.overwrite_references = True
        refImportedEnsembl = PBB_Core.WDItemID(value='Q1344256', prop_nr='P143', is_reference=True)
        refImportedEnsembl.overwrite_references = True

        ensembl_reference = [refStatedInEnsembl, refImportedEnsembl, refRetrieved]

        genomeBuildQualifier = PBB_Core.WDItemID(value=self.genomeInfo["genome_assembly"], prop_nr='P659',
                                                 is_qualifier=True)
        genomeBuildPreviousQualifier = PBB_Core.WDItemID(value=self.genomeInfo["genome_assembly_previous"],
                                                         prop_nr='P659', is_qualifier=True)

        prep = dict()
        prep['P703'] = [PBB_Core.WDItemID(value=self.genomeInfo['wdid'], prop_nr='P703',
                                          references=[copy.deepcopy(gene_reference)])]
        if self.genomeInfo["name"] == "human":
            prep['P353'] = [
                PBB_Core.WDString(value=self.symbol, prop_nr='P353', references=[copy.deepcopy(gene_reference)])]
        prep['P351'] = [
            PBB_Core.WDString(value=str(self.entrezgene), prop_nr='P351', references=[copy.deepcopy(gene_reference)])]

        prep['P279'] = [PBB_Core.WDItemID(value='Q7187', prop_nr='P279', references=[copy.deepcopy(gene_reference)])]
        if "type_of_gene" in vars(self):
            if self.type_of_gene != None:
                for i in range(len(self.type_of_gene)):
                    prep['P279'].append(PBB_Core.WDItemID(value=self.type_of_gene[i], prop_nr='P279',
                                                          references=[copy.deepcopy(gene_reference)]))

        if "ensembl_gene" in vars(self):
            if self.ensembl_gene != None:
                prep['P594'] = []
                for ensemblg in self.ensembl_gene:
                    prep['P594'].append(
                        PBB_Core.WDString(value=ensemblg, prop_nr='P594', references=[copy.deepcopy(gene_reference)]))

        if "ensembl_transcript" in vars(self):
            if self.ensembl_transcript != None:
                prep['P704'] = []
                for ensemblt in self.ensembl_transcript:
                    prep['P704'].append(
                        PBB_Core.WDString(value=ensemblt, prop_nr='P704', references=[copy.deepcopy(gene_reference)]))

        if "encodes" in vars(self):
            if self.encodes != None:
                prep['P688'] = []
                for uniprot in self.encodes:
                    if uniprot in self.uniprotwikidataids.keys():
                        prep['P688'].append(PBB_Core.WDItemID(value=self.uniprotwikidataids[uniprot], prop_nr='P688', references=[copy.deepcopy(gene_reference)]))

        if "hgnc" in vars(self):
            if self.hgnc != None:
                prep['P354'] = []
                for hugo in self.hgnc:
                    prep['P354'].append(
                        PBB_Core.WDString(value=hugo, prop_nr='P354', references=[copy.deepcopy(gene_reference)]))

        if "homologene" in vars(self):
            if self.homologene != None:
                prep['P593'] = []
                for ortholog in self.homologene:
                    prep['P593'].append(
                        PBB_Core.WDString(value=ortholog, prop_nr='P593', references=[copy.deepcopy(gene_reference)]))

        if "refseq_rna" in vars(self):
            if self.refseq_rna != None:
                prep['P639'] = []
                for refseq in self.refseq_rna:
                    prep['P639'].append(
                        PBB_Core.WDString(value=refseq, prop_nr='P639', references=[copy.deepcopy(gene_reference)]))

        if "chromosome" in vars(self):
            prep['P1057'] = []
            if self.chromosome != None:
                for chrom in list(set(self.chromosome)):
                    prep['P1057'].append(
                        PBB_Core.WDItemID(value=chrom, prop_nr='P1057', references=[copy.deepcopy(gene_reference)]))

        if "startpos" in vars(self):
            if not 'P644' in prep.keys():
                prep['P644'] = []
            if self.startpos != None:
                for pos in self.startpos:
                    prep['P644'].append(
                        PBB_Core.WDString(value=str(pos), prop_nr='P644', references=[copy.deepcopy(ensembl_reference)],
                                          qualifiers=[copy.deepcopy(genomeBuildQualifier)]))
        if "endpos" in vars(self):
            if not 'P645' in prep.keys():
                prep['P645'] = []
            if self.endpos != None:
                for pos in self.endpos:
                    prep['P645'].append(
                        PBB_Core.WDString(value=str(pos), prop_nr='P645', references=[copy.deepcopy(ensembl_reference)],
                                          qualifiers=[copy.deepcopy(genomeBuildQualifier)]))

        if "startposHg19" in vars(self):
            if not 'P644' in prep.keys():
                prep['P644'] = []
            if self.startposHg19 != None:
                for pos in self.startposHg19:
                    prep['P644'].append(
                        PBB_Core.WDString(value=str(pos), prop_nr='P644', references=[copy.deepcopy(ensembl_reference)],
                                          qualifiers=[copy.deepcopy(genomeBuildPreviousQualifier)]))
        if "endposHg19" in vars(self):
            if not 'P644' in prep.keys():
                prep['P645'] = []
            if self.endposHg19 != None:
                for pos in self.endposHg19:
                    prep['P645'].append(
                        PBB_Core.WDString(value=str(pos), prop_nr='P645', references=[copy.deepcopy(ensembl_reference)],
                                          qualifiers=[copy.deepcopy(genomeBuildPreviousQualifier)]))

        if "MGI" in vars(self):
            prep['P671'] = []
            if self.MGI != None:
                for mgi in self.MGI:
                    prep['P671'].append(PBB_Core.WDString(value=mgi, prop_nr='P671',
                                        references=[copy.deepcopy(gene_reference)]))

        if "alias" in gene_annotations.keys():
            if isinstance(gene_annotations["alias"], list):
                self.synonyms = []
                for alias in gene_annotations["alias"]:
                    self.synonyms.append(alias)
            else:
                self.synonyms = [gene_annotations["alias"]]
            self.synonyms.append(self.symbol)
            print(self.synonyms)
        else:
            self.synonyms = None

        data2add = []
        for key in prep.keys():
            for statement in prep[key]:
                data2add.append(statement)
                print(statement.prop_nr, statement.value)

        if self.wdid != None:
          # if self.encodes != None:
            wdPage = PBB_Core.WDItemEngine(self.wdid, item_name=self.name, data=data2add, server="www.wikidata.org",
                                           domain="genes")
            if wdPage.get_description() == "":
                wdPage.set_description(description=self.genomeInfo['name'] + ' gene', lang='en')
            if wdPage.get_description(lang='fr') == "" or wdPage.get_description(lang='fr') == "gène":
                wdPage.set_description(description="Un gène " + self.genomeInfo['fr-name'], lang='fr')
            if wdPage.get_description(lang='nl') == "" or wdPage.get_description(lang='nl') == "gen":
                wdPage.set_description(description="Een "+ self.genomeInfo['nl-name']+ " gen", lang='nl')
            if self.synonyms != None:
                wdPage.set_aliases(aliases=self.synonyms, lang='en', append=True)
            print(self.wdid)
            self.wd_json_representation = wdPage.get_wd_json_representation()
            PBB_Debug.prettyPrint(self.wd_json_representation)
            PBB_Debug.prettyPrint(data2add)
            # print(self.wd_json_representation)
            wdPage.write(self.logincreds)
            print("aa")
        else:
          #if self.encodes != None:
            wdPage = PBB_Core.WDItemEngine(item_name=self.name, data=data2add, server="www.wikidata.org",
                                           domain="genes")
            if wdPage.get_description() != "":
                wdPage.set_description(description=self.genomeInfo['name'] + ' gene', lang='en')
            if wdPage.get_description(lang='fr') == "" or wdPage.get_description(lang='fr') == "gène":
                wdPage.setdescription(description="Un gène " + self.genomeInfo['fr-name'], lang='fr')
            if wdPage.get_description(lang='nl') == "" or wdPage.get_description(lang='nl') == "gen":
                wdPage.setdescription(description="Een "+ self.genomeInfo['nl-name']+ " gen", lang='nl')
            if self.synonyms != None:
                wdPage.set_aliases(aliases=self.synonyms, lang='en', append=True)
            self.wd_json_representation = wdPage.get_wd_json_representation()
            PBB_Debug.prettyPrint(self.wd_json_representation)
            PBB_Debug.prettyPrint(data2add)
            # print(self.wd_json_representation)
            self.wdid = wdPage.write(self.logincreds)

        PBB_Core.WDItemEngine.log('INFO', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'.format(
                        main_data_id=str(self.entrezgene),
                        exception_type='',
                        message=f.name,
                        wd_id=self.wdid,
                        duration=time.time()-self.start
                    ))