Exemplo n.º 1
0
if __name__ == '__main__':

    MOLIDS = [
        's=Matsson_2009__n=Bromosulfalein',
        's=Zembruski_2011__n=103268452',
        's=Patel_2011__n=19',
        's=Ochoa-Puentes_2011__n=131273183',
        's=Jin_2006__n=Ginsenoside Rg1',
        's=Matsson_2007__n=Timolol',
    ]

    hub_lso = Hub(dset_id='bcrp', lso=True, expids=range(40000))
    hub_csr = Hub(dset_id='bcrp', lso=False, expids=range(40000))

    for molid, hub in product(sorted(hub_csr.mols().molids()), (hub_lso, hub_csr)):
        # print molid, hub.lso
        # rfr_the_loss(hub, molid)
        molid, most_influential, r2, _ = regress_the_loss(hub, molid, regressor=LinearRegression())
        print molid, hub.lso, r2
        for infmolid, coeff in most_influential:
            print '\t %.4f %s' % (coeff, infmolid)
        print '-' * 80


# MOLID = 'CHEMBL1951453'            # hERG
# MOLID = 'NOCAS_M43'                # mutagenicity, FAILS with BAD SMELL
# MOLID = '74-83-9'                  # mutagenicity
# MOLID = 'Bromocriptine'            # pgp-cruciani, BSEP HIT!!!
# MOLID = 'Succinylsulfathiazole'    # pgp-cruciani