Exemplo n.º 1
0
    def protein_item_statements():
        """
        construct list of referenced statements to pass to PBB_Core Item engine
        :return:
        """
        uniprot_ref = wdo.reference_store(source='uniprot', identifier=uniprot)

        WD_String_CLAIMS = {
            'P637':
            str(gene_record['refseq']['protein']),  # set refseq protein id
            'P352': uniprot  # Set uniprot ID
        }

        WD_Item_CLAIMS = {
            'P703': [spec_strain.iloc[0]['wd_qid']
                     ],  # get strain taxid qid from strain record
            'P279': ['Q8054'],  # subclass of protein
        }

        statements = []
        #generate go term claims
        for gt in gene_record['GOTERMS']:
            goprop = go_props[gt[1]]
            govalue = wdo.WDSparqlQueries(
                prop='P686',
                string=gt[0]).wd_prop2qid()  #  Get GeneOntology Item by GO ID
            evprop = 'P459'
            try:
                evvalue = go_evidence_codes[gt[2]]
                evstat = PBB_Core.WDItemID(value=evvalue,
                                           prop_nr=evprop,
                                           is_qualifier=True)
                statements.append(
                    PBB_Core.WDItemID(value=govalue,
                                      prop_nr=goprop,
                                      references=[uniprot_ref],
                                      qualifiers=[evstat]))
            except Exception as e:
                statements.append(
                    PBB_Core.WDItemID(value=govalue,
                                      prop_nr=goprop,
                                      references=[uniprot_ref]))

        # generate list of pbb core value objects for all valid claims
        for k, v in WD_Item_CLAIMS.items():
            if v:
                for i in v:
                    statements.append(
                        PBB_Core.WDItemID(value=i,
                                          prop_nr=k,
                                          references=[uniprot_ref]))

        for k, v in WD_String_CLAIMS.items():
            if v:
                statements.append(
                    PBB_Core.WDString(value=v,
                                      prop_nr=k,
                                      references=[uniprot_ref]))

        return statements
Exemplo n.º 2
0
    def gene_item_statements():
        """
        construct list of referenced statements to past to PBB_Core Item engine
        :return:
        """
        # creates reference object for WD gene item claim
        ncbi_gene_reference = wdo.reference_store(source='ncbi_gene', identifier=gene_record['_id'])

        # claims for datatype string.
        WD_String_CLAIMS = {'P351': str(gene_record['_id']),
                            'P2393': gene_record['locus_tag'],
                            'P644': str(int(gene_record['genomic_pos']['start'])),
                            'P645': str(int(gene_record['genomic_pos']['end'])),
                            }
        # claims for datytpe item
        WD_Item_CLAIMS = {'P703': spec_strain.iloc[0]['wd_qid'],
                          'P279': 'Q7187',
                          }

        # convert integer representation of strand to corresponding WD item (Forward Strand/Reverse Strand)
        if gene_record['genomic_pos']['strand'] == '1':
            WD_Item_CLAIMS['P2548'] = 'Q22809680'
        elif gene_record['genomic_pos']['strand'] == '-1':
            WD_Item_CLAIMS['P2548'] = 'Q22809711'

        statements = []
        # process to pbb_Core data value object and append to statments for each valid item in each datatype dict
        # WDItemID datatype
        for k, v in WD_Item_CLAIMS.items():
            statements.append(PBB_Core.WDItemID(value=v, prop_nr=k, references=[ncbi_gene_reference]))
        # WDString datatype
        for k, v in WD_String_CLAIMS.items():
            statements.append(PBB_Core.WDString(value=v, prop_nr=k, references=[ncbi_gene_reference]))
        return statements
Exemplo n.º 3
0
    def gene_item_statements():
        """
        construct list of referenced statements to past to PBB_Core Item engine
        :return:
        """
        # creates reference object for WD gene item claim
        ncbi_gene_reference = wdo.reference_store(
            source='ncbi_gene', identifier=gene_record['_id'])

        # claims for datatype string.
        WD_String_CLAIMS = {
            'P351': str(gene_record['_id']),
            'P2393': gene_record['locus_tag'],
        }
        WD_Genome_Annotation_Claims = {
            'P644': str(int(gene_record['genomic_pos']['start'])),
            'P645': str(int(gene_record['genomic_pos']['end'])),
        }
        # claims for datytpe item
        WD_Item_CLAIMS = {
            'P703': spec_strain.iloc[0]['wd_qid'],
            'P279': 'Q7187',
        }

        # convert integer representation of strand to corresponding WD item (Forward Strand/Reverse Strand)
        if gene_record['genomic_pos']['strand'] == 1:
            WD_Item_CLAIMS['P2548'] = 'Q22809680'
        elif gene_record['genomic_pos']['strand'] == -1:
            WD_Item_CLAIMS['P2548'] = 'Q22809711'
        chromosome = gene_record['genomic_pos']['chr']
        rs_chrom = PBB_Core.WDString(value=chromosome,
                                     prop_nr='P2249',
                                     is_qualifier=True)

        statements = []
        # process to pbb_Core data value object and append to statments for each valid item in each datatype dict
        # WDItemID datatype
        for k, v in WD_Item_CLAIMS.items():
            statements.append(
                PBB_Core.WDItemID(value=v,
                                  prop_nr=k,
                                  references=[ncbi_gene_reference]))
        # WDString datatype
        for k, v in WD_String_CLAIMS.items():
            statements.append(
                PBB_Core.WDString(value=v,
                                  prop_nr=k,
                                  references=[ncbi_gene_reference]))
        for k, v in WD_Genome_Annotation_Claims.items():
            statements.append(
                PBB_Core.WDString(value=v,
                                  prop_nr=k,
                                  references=[ncbi_gene_reference],
                                  qualifiers=[rs_chrom]))

        return statements
Exemplo n.º 4
0
 def parse_go_terms(goclass):
     """
     parses the tsv formated result from uniprot REST API
     :param goclass:
     :return:
     """
     go_props = {'Function': 'P680', 'Component': 'P681', 'Process': 'P682'}
     goqid = wdo.WDSparqlQueries(string=goclass[0],
                                 prop='P686').wd_prop2qid()
     return goqid, go_props[goclass[1]]
Exemplo n.º 5
0
    def protein_item_statements():
        """
        construct list of referenced statements to past to PBB_Core Item engine
        :return:
        """
        uniprot_ref = wdo.reference_store(source='uniprot', identifier=uniprot)

        WD_String_CLAIMS = {
            'P637': str(gene_record['refseq']['protein']),
            #'P2393': gene_record['locus_tag'],
            'P352': uniprot
            #'P591': str(gene_record['EC number'])
        }
        WD_Item_CLAIMS = {
            'P703': [spec_strain.iloc[0]['wd_qid']],
            'P279': ['Q8054'],
            'P680': [],  # molecular function
            'P681': [],  # cellular component
            'P682': []  # biological process
        }
        for gt in gene_record['GOTERMS']:
            gtids = parse_go_terms(gt)
            WD_Item_CLAIMS[gtids[1]].append(gtids[0])

        statements = []
        # generate list of pbb core value objects for all valid claims
        for k, v in WD_Item_CLAIMS.items():
            if v:
                for i in v:
                    statements.append(
                        PBB_Core.WDItemID(value=i,
                                          prop_nr=k,
                                          references=[uniprot_ref]))

        for k, v in WD_String_CLAIMS.items():
            if v:
                statements.append(
                    PBB_Core.WDString(value=v,
                                      prop_nr=k,
                                      references=[uniprot_ref]))
        return statements
Exemplo n.º 6
0
def encodes(gene_record, login):
    """
    identifies microbial gene and protein items and links them via encodes (P688) and encoded by (P702) functions
    :param gene_record: gene record from MGI_UNIP_MERGER()
    :return: links gene and protein wikidata items.
    """

    uniprot = str(list(gene_record['uniprot'].values())[0])
    start = time.time()
    #  find gene and protein qids
    gene_qid = wdo.WDSparqlQueries(prop='P351',
                                   string=gene_record['_id']).wd_prop2qid()
    protein_qid = wdo.WDSparqlQueries(prop='P352',
                                      string=uniprot).wd_prop2qid()
    print(gene_qid, protein_qid)

    # if a gene or protein item is not found skip this one

    if gene_qid is not None and protein_qid is not None:
        print('gene {} and protein {} found'.format(gene_qid, protein_qid))
        # generate reference and claim values for each item
        ncbi_gene_reference = wdo.reference_store(
            source='ncbi_gene', identifier=gene_record['_id'])
        gene_encodes = [
            PBB_Core.WDItemID(value=protein_qid,
                              prop_nr='P688',
                              references=[ncbi_gene_reference])
        ]
        protein_encoded_by = [
            PBB_Core.WDItemID(value=gene_qid,
                              prop_nr='P702',
                              references=[ncbi_gene_reference])
        ]
        # find and write items
        success_count = 0
        wd_encodes_item = PBB_Core.WDItemEngine(wd_item_id=gene_qid,
                                                data=gene_encodes)
        #pprint.pprint(wd_encodes_item.get_wd_json_representation())

        try:
            wd_encodes_item = PBB_Core.WDItemEngine(wd_item_id=gene_qid,
                                                    data=gene_encodes)
            wd_encodes_item.write(login)
            PBB_Core.WDItemEngine.log(
                'INFO',
                '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
                .format(main_data_id=gene_record['_id'],
                        exception_type='',
                        message='encodes claim written successfully',
                        wd_id=wd_encodes_item.wd_item_id,
                        duration=time.time() - start))
            print('gene success')
            success_count += 1
        except Exception as e:
            print(e)
            PBB_Core.WDItemEngine.log(
                'ERROR',
                '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
                .format(main_data_id=gene_record['_id'],
                        exception_type=type(e),
                        message=e.__str__(),
                        wd_id='',
                        duration=time.time() - start))
        try:
            wd_encoded_by_item = PBB_Core.WDItemEngine(wd_item_id=protein_qid,
                                                       data=protein_encoded_by)

            wd_encoded_by_item.write(login)
            PBB_Core.WDItemEngine.log(
                'INFO',
                '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
                .format(main_data_id=uniprot,
                        exception_type='',
                        message='encoded by claim written successfully',
                        wd_id=wd_encoded_by_item.wd_item_id,
                        duration=time.time() - start))
            print('protein success')
            success_count += 1
        except Exception as e:
            print(e)
            PBB_Core.WDItemEngine.log(
                'ERROR',
                '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
                .format(main_data_id=gene_record['_id'],
                        exception_type=type(e),
                        message=e.__str__(),
                        wd_id='',
                        duration=time.time() - start))

        if success_count == 2:
            return 'success'

    end = time.time()
    print('Time elapsed:', end - start)
Exemplo n.º 7
0
def mol2qid(moldict):
    mols = []
    for key, value in moldict.items():
        wd = wdo.WDSparqlQueries(prop='P683', string=value)
        mols.append(wd.wd_prop2qid())
    return [x for x in mols if x != 'None']
def encodes(gene_record, login):
    """
    identifies microbial gene and protein items and links them via encodes (P688) and encoded by (P702) functions
    :param gene_record: gene record from MGI_UNIP_MERGER()
    :return: links gene and protein wikidata items.
    """
    uniprot = str(list(gene_record['uniprot'].values())[0])
    start = time.time()
    #  find gene and protein qids
    gene_qid = wdo.WDSparqlQueries(prop='P351', string=gene_record['_id']).wd_prop2qid()
    protein_qid = wdo.WDSparqlQueries(prop='P352', string=uniprot).wd_prop2qid()
    # if a gene or protein item is not found skip this one
    if gene_qid is not None and protein_qid is not None:
        print('gene {} and protein {} found'.format(gene_qid, protein_qid))
        # generate reference and claim values for each item
        ncbi_gene_reference = wdo.reference_store(source='ncbi_gene', identifier=gene_record['_id'])
        gene_encodes = [PBB_Core.WDItemID(value=protein_qid, prop_nr='P688', references=[ncbi_gene_reference])]
        protein_encoded_by = [PBB_Core.WDItemID(value=gene_qid, prop_nr='P702', references=[ncbi_gene_reference])]
        # find and write items
        success_count = 0
        try:
            wd_encodes_item = PBB_Core.WDItemEngine(wd_item_id=gene_qid, data=gene_encodes)
            wd_encodes_item.write(login)

            PBB_Core.WDItemEngine.log('INFO', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'.format(
                main_data_id=gene_record['_id'],
                exception_type='',
                message='encodes claim written successfully',
                wd_id=wd_encodes_item.wd_item_id,
                duration=time.time() - start
            )
                                      )
            print('gene success')
            success_count += 1
        except Exception as e:
            print(e)
            PBB_Core.WDItemEngine.log('ERROR', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'.format(
                main_data_id=gene_record['_id'],
                exception_type=type(e),
                message=e.__str__(),
                wd_id='',
                duration=time.time() - start
            ))
        try:
            wd_encoded_by_item = PBB_Core.WDItemEngine(wd_item_id=protein_qid, data=protein_encoded_by)
            wd_encoded_by_item.write(login)
            PBB_Core.WDItemEngine.log('INFO', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'.format(
                main_data_id=uniprot,
                exception_type='',
                message='encoded by claim written successfully',
                wd_id=wd_encoded_by_item.wd_item_id,
                duration=time.time() - start
            )
                                      )
            print('protein success')
            success_count += 1
        except Exception as e:
            print(e)
            PBB_Core.WDItemEngine.log('ERROR', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'.format(
                main_data_id=gene_record['_id'],
                exception_type=type(e),
                message=e.__str__(),
                wd_id='',
                duration=time.time() - start
            ))
        if success_count == 2:
            return 'success'

    end = time.time()
    print('Time elapsed:', end - start)
 def sparql_qid(taxid):
     qidobj = wdo.WDSparqlQueries(string=taxid, prop='P685')
     return qidobj.wd_prop2qid()