示例#1
0
def read_data_sets_mol( fname, validation_rate = 0, test_rate = 0, disp = False):
	class DataSets(object):
		pass
	data_sets = DataSets()

	pdr = pd.read_csv( fname)
	xM_fp = jpd.pd_get_xM( pdr)
	xM_key = jpd.pd_get_xM_MACCSkeys( pdr)
	xM_molw = jpd.pd_get_xM_molw( pdr)
	xM_lasa = jpd.pd_get_xM_lasa( pdr)
	xM = np.concatenate( [xM_fp, xM_key, xM_molw, xM_lasa], axis = 1)

	yV = jpd.pd_get_yV( pdr, y_id = 'exp').A1
	yV = [1 if y > 0 else 0 for y in yV] # classification is performed

	X, Y = list(map( np.array, [xM, yV]))
	assert X.shape[0] == Y.shape[0]

	if test_rate > 0:
		X, Y, X_test, Y_test = XY_split( X, Y, test_rate)
		data_sets.test = DataSet_CSV( X_test, Y_test, disp = disp)

	if validation_rate > 0:
		X, Y, X_val, Y_val = XY_split( X, Y, validation_rate)
		data_sets.validation = DataSet_CSV( X_val, Y_val, disp = disp)

	# If test_rate and validation_rate are both zero, 
	# all data is allocated to train dataset. 
	data_sets.train = DataSet_CSV( X, Y, disp = disp)

	data_sets.IMAGE_PIXELS = xM.shape[1]
	return data_sets
示例#2
0
def grid_BIKE2(pdr, alphas_log, y_id = 'Solubility_log_mol_l'):
	print "BIKE with (A+B)+W"

	xM1 = jpd.pd_get_xM( pdr, radius=6, nBits=4096)
	xM2 = jpd.pd_get_xM_MACCSkeys( pdr)

	yV = jpd.pd_get_yV( pdr, y_id = y_id)

	#A1 = jpyx.calc_tm_sim_M( xM1)
	#A2 = jpyx.calc_tm_sim_M( xM2)
	#A = np.concatenate( ( A1, A2), axis = 1)
	xM = np.concatenate( ( xM1, xM2), axis = 1)
	A = jpyx.calc_tm_sim_M( xM1)
	print A.shape

	molw_l = jchem.rdkit_molwt( pdr.SMILES.tolist())
	print np.shape( molw_l)
	A_molw = jchem.add_new_descriptor( A, molw_l)
	print A_molw.shape

	gs = jgrid.gs_Ridge( A_molw, yV, alphas_log=alphas_log)
	jutil.show_gs_alpha( gs.grid_scores_)
	
	jgrid.cv( 'Ridge', A_molw, yV, alpha = gs.best_params_['alpha'])
	
	return gs
示例#3
0
def read_data_sets_mol(fname, validation_rate=0, test_rate=0, disp=False):
    class DataSets(object):
        pass

    data_sets = DataSets()

    pdr = pd.read_csv(fname)
    xM_fp = jpd.pd_get_xM(pdr)
    xM_key = jpd.pd_get_xM_MACCSkeys(pdr)
    xM_molw = jpd.pd_get_xM_molw(pdr)
    xM_lasa = jpd.pd_get_xM_lasa(pdr)
    xM = np.concatenate([xM_fp, xM_key, xM_molw, xM_lasa], axis=1)

    yV = jpd.pd_get_yV(pdr, y_id='exp').A1
    yV = [1 if y > 0 else 0 for y in yV]  # classification is performed

    X, Y = list(map(np.array, [xM, yV]))
    assert X.shape[0] == Y.shape[0]

    if test_rate > 0:
        X, Y, X_test, Y_test = XY_split(X, Y, test_rate)
        data_sets.test = DataSet_CSV(X_test, Y_test, disp=disp)

    if validation_rate > 0:
        X, Y, X_val, Y_val = XY_split(X, Y, validation_rate)
        data_sets.validation = DataSet_CSV(X_val, Y_val, disp=disp)

    # If test_rate and validation_rate are both zero,
    # all data is allocated to train dataset.
    data_sets.train = DataSet_CSV(X, Y, disp=disp)

    data_sets.IMAGE_PIXELS = xM.shape[1]
    return data_sets
示例#4
0
def grid_MLR_B(pdr, alphas_log, y_id = 'Solubility_log_mol_l'):
	print "MLR with B"

	xM2 = jpd.pd_get_xM_MACCSkeys( pdr)

	xM_molw = xM2
	yV = jpd.pd_get_yV( pdr, y_id = y_id)

	gs = jgrid.gs_Ridge( xM_molw, yV, alphas_log=alphas_log)
	jutil.show_gs_alpha( gs.grid_scores_)
	
	jgrid.cv( 'Ridge', xM_molw, yV, alpha = gs.best_params_['alpha'])
	
	return gs