Exemplo n.º 1
0
    def test_GO_annotate_genelist2_vs_enriched(self):
        input_lines = open(pkg_file(__name__, 'data/genelist2.tsv'),
                           'r').read()
        req = dict(job_req, **{'paste_data': input_lines})
        resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True)

        enrich_job = GOnetSubmission.objects.latest('submit_time')
        df = enrich_job.enrich_res_df
        enriched_terms = df[df['q'] < enrich_job.qvalue]['term']
        custom_annotation = '\n'.join(enriched_terms)
        req = dict(
            job_req, **{
                'paste_data': input_lines,
                'analysis_type': 'annot',
                'slim': 'custom',
                'custom_terms': custom_annotation
            })
        resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True)
        annot_job = GOnetSubmission.objects.latest('submit_time')

        G_enrich = cyjs.cyjs2nx(json.loads(enrich_job.network))
        G_annot = cyjs.cyjs2nx(json.loads(annot_job.network))

        self.assertSetEqual(set(G_enrich.nodes), set(G_annot.nodes))
        self.assertSetEqual(set(G_enrich.edges), set(G_annot.edges))
Exemplo n.º 2
0
 def test_GO_enrichment_qval01_celcomp(self):
     input_lines = open(pkg_file(__name__, 'data/genelist2.tsv'), 'r').read()
     req = dict(job_req, **{'paste_data':input_lines, 'qvalue':0.01,
                                'namespace':'cellular_component'})
     resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True)
     sn = GOnetSubmission.objects.latest('submit_time')
     net_dict = json.loads(sn.network)
     G = cyjs.cyjs2nx(net_dict)
     enriched = set(filter(lambda n: n.startswith('GO:'), G.nodes()))
     for term in ['GO:0000786', 'GO:0044815', 'GO:0032993', 'GO:0000785']:
         self.assertIn(term, enriched)
Exemplo n.º 3
0
    def test_resolution(self):
        # CMKBR7 is synonym of CCR7
        # O14804 is PNR
        input_lines = '\n'.join([
            "PNR", "KIAA0457", "ELDF10", "CCR7", "AIM", "O14804", "CMKBR7",
            "Q9UNH8", "P28068", "FOO"
        ])
        req = dict(job_req, **{'paste_data': input_lines})
        URL = urls.reverse('GOnet-submit-form')
        resp = c.post(URL, req, follow=True)
        sn = GOnetSubmission.objects.latest('submit_time')
        res_d = sn.parsed_data['submit_name'].to_dict()
        self.assertDictEqual(
            res_d, {
                'O75787': 'ELDF10',
                'P32248': 'CCR7',
                'Q9NRI5': 'KIAA0457',
                'O14804': 'PNR',
                '_00000': 'O14804',
                '_00001': 'CMKBR7',
                'Q9UNH8': 'Q9UNH8',
                'P28068': 'P28068',
                'P26358': 'AIM',
                '_00002': 'FOO'
            })
        self.assertEqual(sn.parsed_data.loc['_00000', 'duplicate_of'],
                         'O14804')

        #Test id mapping response
        idmap_resp = c.get(
            urls.reverse('GOnet-input-idmap', args=(str(sn.id), )))
        b = io.StringIO()
        b.write(idmap_resp.content.decode())
        b.seek(0)
        res = pd.read_csv(b, sep='\t', index_col=0)
        self.assertEqual(res.loc['CMKBR7', 'Notes'], 'same as CCR7')
        self.assertEqual(res.loc['O14804', 'Notes'], 'same as PNR')
        self.assertEqual(res.loc['AIM', 'Description'],
                         'DNA (cytosine-5)-methyltransferase 1')
        self.assertIn('ambiguous', res.loc['PNR', 'Notes'])
        self.assertIn('not recognized', res.loc['FOO', 'Notes'])

        #Test tricky Ensembl IDs
        self.assertEqual(res.loc['P28068', 'Ensembl_ID'], 'ENSG00000242574')

        # Check graph attributes
        G = cyjs.cyjs2nx(json.loads(sn.network))
        self.assertEqual(G.node['_00002']['data']['identified'], False)
        self.assertEqual(G.node['P32248']['data']['identified'], True)
        self.assertEqual(G.node['O14804']['data']['ambiguous'], True)
Exemplo n.º 4
0
 def test_GO_annotate_genelist1(self):
     input_lines = open(pkg_file(__name__, 'data/genelist1.lst'),
                        'r').read()
     req = dict(
         job_req, **{
             'paste_data': input_lines,
             'analysis_type': 'annot',
             'slim': 'goslim_immunol'
         })
     resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True)
     sn = GOnetSubmission.objects.latest('submit_time')
     net_dict = json.loads(sn.network)
     G = cyjs.cyjs2nx(net_dict)
     self.assertTrue(G.has_edge('GO:0042254', 'Q9HC36'))
Exemplo n.º 5
0
    def test_GO_annotate_genelist2(self):
        input_lines = open(pkg_file(__name__, 'data/genelist2.tsv'),
                           'r').read()
        custom_annotation = open(
            pkg_file(__name__, 'data/custom_annotation.txt'), 'r').read()
        req = dict(
            job_req, **{
                'paste_data': input_lines,
                'analysis_type': 'annot',
                'slim': 'custom',
                'custom_terms': custom_annotation
            })
        resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True)
        self.assertEqual(resp.status_code, 200)

        sn = GOnetSubmission.objects.latest('submit_time')
        net_dict = json.loads(sn.network)
        G = cyjs.cyjs2nx(net_dict)
        self.assertListEqual(list(G.predecessors('P29376')), ['GO:0071300'])
        self.assertListEqual(list(G.predecessors('Q5TBA9')), ['GO:0016043'])
        self.assertListEqual(list(G.predecessors('P16403')), ['GO:0065003'])

        # Test node GO:0071300 (cellular response to retinoic acid)
        n = list(
            filter(lambda n: n['data']['id'] == 'GO:0071300',
                   net_dict['elements']['nodes']))[0]
        self.assertEqual(n['data']['tot_gn'],
                         len(O.get_attr('GO:0071300', 'human')))

        # Test CSV response
        csv_resp = c.get(urls.reverse('GOnet-csv-res', args=(str(sn.id), )))
        b = io.StringIO()
        b.write(csv_resp.content.decode())
        b.seek(0)
        res = pd.read_csv(b, sep=',', index_col=1)
        self.assertIn('LTK', res.loc['GO:0032526', 'Genes'])

        # Test TXT response
        txt_resp = c.get(urls.reverse('GOnet-txt-res', args=(str(sn.id), )))
        b = io.StringIO()
        b.write(txt_resp.content.decode())
        b.seek(0)
        line_found = False
        for line in b:
            if line.strip().startswith('GO:0032526'):
                self.assertIn('LTK', line)
                line_found = True
                break
        self.assertTrue(line_found)
Exemplo n.º 6
0
    def test_GO_annotate_genelist2(self):
        input_lines = open(pkg_file(__name__, 'data/genelist2.tsv'),
                           'r').read()
        input_data_df = pd.read_csv(pkg_file(__name__, 'data/genelist2.tsv'),
                                    sep='\t',
                                    header=None)
        req = dict(
            job_req, **{
                'paste_data': input_lines,
                'analysis_type': 'annot',
                'slim': 'goslim_immunol'
            })
        resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True)
        self.assertEqual(resp.status_code, 200)
        sn = GOnetSubmission.objects.latest('submit_time')
        net = json.loads(sn.network)
        G = cyjs.cyjs2nx(net)
        self.assertTrue(G.has_edge('GO:0007165', 'P29376'))

        # Test recognition of user-supplied contrast values
        gene_nodes = filter(lambda n: not n['data']['name'].startswith('GO:'),
                            net['elements']['nodes'])
        gene_nodes = list(gene_nodes)
        self.assertEqual(len(list(filter(lambda node: float(node['data']['expr:user_supplied'])>0,   gene_nodes))), \
                         np.sum(input_data_df[1]>0) - 1 ) #-1 for HIST1H2AM
        self.assertEqual(len(list(filter(lambda node: float(node['data']['expr:user_supplied'])<0, gene_nodes))), \
                         np.sum(input_data_df[1]<0))

        #Test CSV response
        csv_resp = c.get(urls.reverse('GOnet-csv-res', args=(str(sn.id), )))
        res = io.StringIO()
        res.write(csv_resp.content.decode())
        res.seek(0)
        res_df = pd.read_csv(res, sep=',', index_col=0)
        self.assertIn('GO:0007165', set(res_df['GO_term_ID']))
        self.assertEqual(res_df.index[0], 1)

        #Test TXT response
        txt_resp = c.get(urls.reverse('GOnet-txt-res', args=(str(sn.id), )))
        res = io.StringIO()
        res.write(txt_resp.content.decode())
        res.seek(0)
        goterms = set()
        for line in res:
            goterms.add(line.split()[0])
        self.assertIn('GO:0007165', goterms)
Exemplo n.º 7
0
    def test_GO_annotate_genelist8_large(self):

        input_lines = open(pkg_file(__name__, 'data/genelist8.tsv'),
                           'r').read().split('\n')
        input_str = '\n'.join(input_lines[:2500])
        req = dict(
            job_req, **{
                'paste_data': input_str,
                'analysis_type': 'annot',
                'slim': 'goslim_generic'
            })
        resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True)
        sn = GOnetSubmission.objects.latest('submit_time')
        G = cyjs.cyjs2nx(json.loads(sn.network))
        self.assertEqual(G.node['Q8TCT6']['data']['nodesymbol'], 'SPPL3')

        #Check expression values (DICE-DB)
        resp = c.get(
            urls.reverse('GOnet-get-expression',
                         kwargs={
                             'jobid': str(sn.id),
                             'celltype': 'DICE-CD4 T cell (stim)'
                         }))
        expr_vals = json.loads(resp.content.decode())
        self.assertAlmostEqual(expr_vals['A1XBS5'], 0.09010716525639434)

        # Test TXT response
        txt_resp = c.get(urls.reverse('GOnet-txt-res', args=(str(sn.id), )))
        b = io.StringIO()
        b.write(txt_resp.content.decode())
        b.seek(0)

        # Test CSV response
        csv_resp = c.get(urls.reverse('GOnet-csv-res', args=(str(sn.id), )))
        b = io.StringIO()
        b.write(csv_resp.content.decode())
        b.seek(0)
        res = pd.read_csv(b, sep=',', index_col=0)
Exemplo n.º 8
0
 def test_GO_annot_mouse_genes(self):
     input_lines = open(pkg_file(__name__, 'data/genelist7.txt'),
                        'r').read()
     req = dict(
         job_req, **{
             'paste_data': input_lines,
             'analysis_type': 'annot',
             'slim': 'goslim_generic',
             'organism': 'mouse'
         })
     resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True)
     sn = GOnetSubmission.objects.latest('submit_time')
     net_dict = json.loads(sn.network)
     G = cyjs.cyjs2nx(net_dict)
     self.assertIn('GO:0006950', G.predecessors('MGI:1351618'))
     self.assertEqual(len(G.node['GO:0048856']['data']['xgenes']), 35)
     self.assertEqual(G.node['MGI:1351618']['data']['ensembl_id'],
                      'ENSMUSG00000014905')
     self.assertEqual(G.node['MGI:1351618']['data']['uniprot_id'], 'Q9QYI6')
     self.assertEqual(G.node['MGI:1351618']['data']['mgi_id'],
                      'MGI:1351618')
     self.assertEqual(G.node['MGI:1351618']['data']['desc'],
                      'DnaJ heat shock protein family (Hsp40) member B9')
Exemplo n.º 9
0
    def test_GO_annot_goslim_generic(self):
        input_lines = open(pkg_file(__name__, 'data/genelist2.tsv'),
                           'r').read()
        req = dict(
            job_req, **{
                'paste_data': input_lines,
                'analysis_type': 'annot',
                'slim': 'goslim_generic',
                'namespace': 'cellular_component'
            })
        resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True)
        sn = GOnetSubmission.objects.latest('submit_time')
        G = cyjs.cyjs2nx(json.loads(sn.network))
        self.assertTrue(G.has_edge('GO:0005886',
                                   'P32248'))  # CCR7 in plasma membrane

        self.assertEqual(len(G.node['O60282']['data']['slimterms']), 7)

        ids = list(
            filter(lambda n: G.node[n]['data']['nodesymbol'] == 'ZNF761',
                   G.nodes()))
        self.assertEqual(len(ids), 1)
        znf_node = G.node[ids[0]]
        self.assertEqual(len(znf_node['data']['slimterms']), 4)
Exemplo n.º 10
0
    def test_GO_enrichment_default(self):
        input_lines = open(pkg_file(__name__, 'data/genelist2.tsv'), 'r').read()
        req = dict(job_req, **{'paste_data':input_lines})
        resp = c.post(urls.reverse('GOnet-submit-form'), req, follow=True)
        sn = GOnetSubmission.objects.latest('submit_time')

        self.assertIn('GO:0006334', set(sn.enrich_res_df.query('q<0.05')['term']))
        net_dict = json.loads(sn.network)
        G_case = cyjs.cyjs2nx(net_dict)

        # Test CSV response
        csv_resp = c.get(urls.reverse('GOnet-csv-res', args=(str(sn.id),)))
        b = io.StringIO(); b.write(csv_resp.content.decode()); b.seek(0)
        res = pd.read_csv(b, sep=',', index_col=0)
        self.assertEqual(res.index[0], 1)
        self.assertListEqual(list(res.columns), ['GO_term_ID', 'GO_term_def', 'P',
                                                 'P_FDR_adj', 'NofGenes', 'Genes'])
        res.set_index('GO_term_ID', inplace=True)
        self.assertEqual(res.index.name, 'GO_term_ID')
        self.assertIn('GO:0006334', set(res.index))
        self.assertEqual(res.loc['GO:2000520', 'Genes'], "CCR7|HAVCR2")

        # Test TXT response
        txt_resp = c.get(urls.reverse('GOnet-txt-res', args=(str(sn.id),)))
        b = io.StringIO()
        b.write(txt_resp.content.decode())
        b.seek(0)
        enrichterms = []
        for line in b:
            enrichterms.append(line.split()[0])
        self.assertIn('GO:0006334', set(enrichterms))

        # Test specific terms
        t_edge_name = 'GO:0006334 (interacts with) HIST1H2BC'
        e = list(filter(lambda e: e['data']['name']==t_edge_name,
                           net_dict['elements']['edges']))[0]
        self.assertDictEqual(e['data']['specific_terms'],
                             {'GO:0006334': {'refs': ['PMID:21873635',
                                                      'PMID:422550', 'PMID:9119399'],
                             'specific_term_name': 'nucleosome assembly'}})
        # Test relation
        # "histone H3-K27 trimethylation" is_a "histone H3-K27 methylation"
        t_edge_name = 'GO:0070734 (interacts with) GO:0098532'
        e = list(filter(lambda e: e['data']['name']==t_edge_name,
                           net_dict['elements']['edges']))[0]
        self.assertEqual(e['data']['relation'], 'is_a')

        # Test node ABCB1
        n = list(filter(lambda n: n['data']['nodesymbol']=='ABCB1', net_dict['elements']['nodes']))[0]
        #self.assertAlmostEqual(n['data']['val'], 0.60217044443096)
        self.assertAlmostEqual(n['data']['expr:user_supplied'], 0.6021704444)
        self.assertEqual(n['data']['ensembl_id'], 'ENSG00000085563')

        # Test node GO:0098532 (histone H3-K27 trimethylation)
        n = list(filter(lambda n: n['data']['id']=='GO:0098532', net_dict['elements']['nodes']))[0]
        self.assertLess(n['data']['P'], 8.0e-07)
        # Should be rather significant
        self.assertLess(n['data']['Padj'], 0.01)
        self.assertEqual(n['data']['tot_gn'], 6)

        # Test resolved attribute
        n = list(filter(lambda e: e['data']['nodesymbol']=='LPPR2', net_dict['elements']['nodes']))[0]
        self.assertEqual(n['data']['uniprot_id'], 'Q96GM1')
        self.assertEqual(n['data']['ensembl_id'], 'ENSG00000105520')
        self.assertEqual(n['data']['desc'], 'Phospholipid phosphatase-related protein type 2')
        self.assertEqual(n['data']['primname'], 'PLPPR2')

        n = list(filter(lambda e: e['data']['nodesymbol']=='LTC4S', net_dict['elements']['nodes']))[0]
        self.assertEqual(n['data']['uniprot_id'], 'Q16873')
        self.assertEqual(n['data']['ensembl_id'], 'ENSG00000213316')

        #Check expression values (protein atlas)
        resp = c.get(urls.reverse('GOnet-get-expression',
                                  kwargs={'jobid':str(sn.id), 'celltype':'HPA-adipose tissue'}))
        expr_vals = json.loads(resp.content.decode())
        self.assertEqual(expr_vals['O14503'], 170.0)

        #Check expression values (DICE-DB)
        resp = c.get(urls.reverse('GOnet-get-expression',
                                  kwargs={'jobid':str(sn.id), 'celltype':'DICE-Th1Th17'}))
        expr_vals = json.loads(resp.content.decode())
        self.assertAlmostEqual(expr_vals['P10721'], 9.893615)

        #Test id mapping response
        idmap_resp = c.get(urls.reverse('GOnet-input-idmap', args=(str(sn.id),)))
        b = io.StringIO(); b.write(idmap_resp.content.decode()); b.seek(0)
        res = pd.read_csv(b, sep='\t', index_col=0)
        self.assertEqual(res.loc['HIST1H2AM', 'Notes'], 'same as HIST1H2AG')
        self.assertEqual(res.loc['LPPR2', 'Preferred_name'], 'PLPPR2')