# read in command line options
(options, args) = parser.parse_args()
# required arguments
if options.fp:
    fpname = options.fp
else:
    raise RuntimeError("fingerprint name missing")
print "ML model is trained with", fpname

# read the actives
fps_act = []
for line in open(inpath + "training_actives_cleaned.dat", "r"):
    line = line.strip().split()
    # contains: [sample_id, hit, pec50, smiles]
    fp = cf.getNumpyFP(line[3], fpname, "float")
    if fp is not None:
        fps_act.append(fp)
num_actives = len(fps_act)
print "actives read and fingerprints calculated:", num_actives

# read the inactives
fps_inact = []
for line in open(inpath + "training_inactives_cleaned.dat", "r"):
    line = line.strip().split()
    # contains: [sample_id, hit, pec50, smiles]
    fp = cf.getNumpyFP(line[3], fpname, "float")
    if fp is not None:
        fps_inact.append(fp)
num_inactives = len(fps_inact)
print "inactives read and fingerprints calculated:", num_inactives
# read in command line options
(options, args) = parser.parse_args()
# required arguments
if options.fp:
    fpname = options.fp
else:
    raise RuntimeError('fingerprint name missing')
print "ML model is trained with", fpname

# read the actives
fps_act = []
for line in open(inpath+'training_actives_cleaned.dat', 'r'):
    line = line.strip().split()
    # contains: [sample_id, hit, pec50, smiles]
    fp = cf.getNumpyFP(line[3], fpname, 'float')
    if fp is not None:
        fps_act.append(fp)
num_actives = len(fps_act)
print "actives read and fingerprints calculated:", num_actives

# read the inactives
fps_inact = []
for line in open(inpath+'training_inactives_cleaned.dat', 'r'):
    line = line.strip().split()
    # contains: [sample_id, hit, pec50, smiles]
    fp = cf.getNumpyFP(line[3], fpname, 'float')
    if fp is not None:
        fps_inact.append(fp)
num_inactives = len(fps_inact)
print "inactives read and fingerprints calculated:", num_inactives
Exemplo n.º 3
0
# read in command line options
(options, args) = parser.parse_args()
# required arguments
if options.fp:
    fpname = options.fp
else:
    raise RuntimeError('fingerprint name missing')
print "ML model is trained with", fpname

# read the actives
fps_act = []
for line in open(inpath + 'training_actives_cleaned.dat', 'r'):
    line = line.strip().split()
    # contains: [sample_id, hit, pec50, smiles]
    fp = cf.getNumpyFP(line[3], fpname, 'float')
    if fp is not None:
        fps_act.append(fp)
num_actives = len(fps_act)
print "actives read and fingerprints calculated:", num_actives

# read the inactives
fps_inact = []
for line in open(inpath + 'training_inactives_cleaned.dat', 'r'):
    line = line.strip().split()
    # contains: [sample_id, hit, pec50, smiles]
    fp = cf.getNumpyFP(line[3], fpname, 'float')
    if fp is not None:
        fps_inact.append(fp)
num_inactives = len(fps_inact)
print "inactives read and fingerprints calculated:", num_inactives
Exemplo n.º 4
0
lr_rdk5 = cPickle.load(gzip.open(path+'../final_models/lr_rdk5_model.pkl.gz', 'r'))
rf_rdk5 = cPickle.load(gzip.open(path+'../final_models/rf_rdk5_model.pkl.gz', 'r'))
rf_morgan2 = cPickle.load(gzip.open(path+'../final_models/rf_morgan2_model.pkl.gz', 'r'))
print "rf models loaded"

# loop over commercial products
proba_lr_rdk5 = []
proba_rf_rdk5 = []
proba_rf_morgan2 = []
mols = []
for line in gzip.open(path+'commercial_cmps_cleaned.dat.gz', 'r'):
    if line[0] == "#": continue
    line = line.rstrip().split()
    # contains: [smiles, identifier]
    # RDK5
    fp = cf.getNumpyFP(line[0], 'rdk5', 'float')
    proba_lr_rdk5.append(lr_rdk5.predict_proba(fp)[0][1])
    proba_rf_rdk5.append(rf_rdk5.predict_proba(fp)[0][1])
    fp = cf.getNumpyFP(line[0], 'morgan2', 'float')
    proba_rf_morgan2.append(rf_morgan2.predict_proba(fp)[0][1])
    mols.append((line[1], line[0]))
print "probabilities calculated"

# load similarities
scores_rdk5 = cPickle.load(gzip.open(path+'scores_rdk5.pkl.gz' , 'r'))
scores_morgan2 = cPickle.load(gzip.open(path+'scores_morgan2.pkl.gz' , 'r'))
"similarities loaded"

# assign ranks
scores_lr_rdk5 = cf.assignRanks(proba_lr_rdk5, scores_rdk5)
scores_rf_rdk5 = cf.assignRanks(proba_rf_rdk5, scores_rdk5)