Пример #1
0
class LookUp(object):

    def __init__(self):
        self.blw = BioLinkWrapper(Config().get_biolink_api_endpoint())
        self.mg = get_client('gene')
        self.input_object = ''
        self.meta = {
            'data_type': 'disease',
            'input_type': {
                'complexity': 'single',
                'id_type': ['MONDO', 'DO', 'OMIM'],
            },
            'output_type': {
                'complexity': 'set',
                'id_type': 'HGNC'
            },
            'taxon': 'human',
            'limit': None,
            'source': 'Monarch Biolink',
            'predicate': 'blm:gene associated with condition'
        }

    def metadata(self):
        print("""Mod O DiseaseGeneLookup metadata:""")
        pprint(self.meta)

    #
    # def load_input_object(self, input_object):
    #     input_object = self.blw.get_obj(obj_id=input_object['input'])
    #     self.input_object = {
    #         'id': input_object['id'],
    #         'label': input_object['label'],
    #         'description': input_object['description'],
    #     }
    #
    # def get_input_object_id(self):
    #     return self.input_object['id']

    def echo_input_object(self, output=None):
        if output:
            dump(self.input_object, output, indent=4, separators=(',', ': '))
        else:
            dump(self.input_object, stdout, indent=4, separators=(',', ': '))

    def disease_geneset_lookup(self, name, id):
        input_disease_label = name
        input_disease_id = id
        input_gene_set = self.blw.disease2genes(input_disease_id)
        input_gene_set = [self.blw.parse_association(input_disease_id, input_disease_label, x) for x in
                          input_gene_set['associations']]
        for input_gene in input_gene_set:
            igene_mg = self.mg.query(input_gene['hit_id'].replace('HGNC', 'hgnc'), species='human', entrezonly=True,
                                     fields='entrez,HGNC,symbol')
            input_gene.update({'input_ncbi': 'NCBIGene:{}'.format(igene_mg['hits'][0]['_id'])})
        input_genes_df = pd.DataFrame(data=input_gene_set)
        # # group duplicate ids and gather sources
        input_genes_df['sources'] = input_genes_df['sources'].str.join(', ')
        input_genes_df = input_genes_df.groupby(
            ['input_id', 'input_symbol', 'hit_id', 'hit_symbol', 'relation'])['sources'].apply(', '.join).reset_index()
        return input_genes_df
Пример #2
0
class LookUp(object):
    def __init__(self):
        self.blw = BioLinkWrapper()
        self.mg = MyGeneInfo()
        self.input_object = ''
        self.meta = {
            'data_type': 'disease',
            'input_type': {
                'complexity': 'single',
                'id_type': ['MONDO', 'DO', 'OMIM'],
            },
            'output_type': {
                'complexity': 'set',
                'id_type': 'HGNC'
            },
            'taxon': 'human',
            'limit': None,
            'source': 'Monarch Biolink',
            'predicate': 'blm:gene associated with condition'
        }
        print("""Mod O DiseaseGeneLookup metadata:""")
        pprint(self.meta)

    def load_input_object(self, input_object):
        input_object = self.blw.get_obj(obj_id=input_object['input'])
        self.input_object = {
            'id': input_object['id'],
            'label': input_object['label'],
            'description': input_object['description'],
        }
        pprint(self.input_object)

    def disease_geneset_lookup(self):
        input_disease_id = self.input_object['id']
        input_disease_label = self.input_object['label']
        input_gene_set = self.blw.disease2genes(input_disease_id)
        input_gene_set = [
            self.blw.parse_association(input_disease_id, input_disease_label,
                                       x)
            for x in input_gene_set['associations']
        ]
        # for input_gene in input_gene_set:
        #     igene_mg = self.mg.query(input_gene['hit_id'].replace('HGNC', 'hgnc'), species='human', entrezonly=True,
        #                         fields='entrez,HGNC,symbol')
        #     input_gene.update({'input_ncbi': 'NCBIGene:{}'.format(igene_mg['hits'][0]['_id'])})
        input_genes_df = pd.DataFrame(data=input_gene_set)
        # # group duplicate ids and gather sources
        input_genes_df['sources'] = input_genes_df['sources'].str.join(', ')
        input_genes_df = input_genes_df.groupby(
            ['input_id', 'input_symbol', 'hit_id', 'hit_symbol',
             'relation'])['sources'].apply(', '.join).reset_index()
        return input_genes_df