# read in command line options (options, args) = parser.parse_args() # required arguments if options.fp: fpname = options.fp else: raise RuntimeError("fingerprint name missing") print "ML model is trained with", fpname # read the actives fps_act = [] for line in open(inpath + "training_actives_cleaned.dat", "r"): line = line.strip().split() # contains: [sample_id, hit, pec50, smiles] fp = cf.getNumpyFP(line[3], fpname, "float") if fp is not None: fps_act.append(fp) num_actives = len(fps_act) print "actives read and fingerprints calculated:", num_actives # read the inactives fps_inact = [] for line in open(inpath + "training_inactives_cleaned.dat", "r"): line = line.strip().split() # contains: [sample_id, hit, pec50, smiles] fp = cf.getNumpyFP(line[3], fpname, "float") if fp is not None: fps_inact.append(fp) num_inactives = len(fps_inact) print "inactives read and fingerprints calculated:", num_inactives
# read in command line options (options, args) = parser.parse_args() # required arguments if options.fp: fpname = options.fp else: raise RuntimeError('fingerprint name missing') print "ML model is trained with", fpname # read the actives fps_act = [] for line in open(inpath+'training_actives_cleaned.dat', 'r'): line = line.strip().split() # contains: [sample_id, hit, pec50, smiles] fp = cf.getNumpyFP(line[3], fpname, 'float') if fp is not None: fps_act.append(fp) num_actives = len(fps_act) print "actives read and fingerprints calculated:", num_actives # read the inactives fps_inact = [] for line in open(inpath+'training_inactives_cleaned.dat', 'r'): line = line.strip().split() # contains: [sample_id, hit, pec50, smiles] fp = cf.getNumpyFP(line[3], fpname, 'float') if fp is not None: fps_inact.append(fp) num_inactives = len(fps_inact) print "inactives read and fingerprints calculated:", num_inactives
# read in command line options (options, args) = parser.parse_args() # required arguments if options.fp: fpname = options.fp else: raise RuntimeError('fingerprint name missing') print "ML model is trained with", fpname # read the actives fps_act = [] for line in open(inpath + 'training_actives_cleaned.dat', 'r'): line = line.strip().split() # contains: [sample_id, hit, pec50, smiles] fp = cf.getNumpyFP(line[3], fpname, 'float') if fp is not None: fps_act.append(fp) num_actives = len(fps_act) print "actives read and fingerprints calculated:", num_actives # read the inactives fps_inact = [] for line in open(inpath + 'training_inactives_cleaned.dat', 'r'): line = line.strip().split() # contains: [sample_id, hit, pec50, smiles] fp = cf.getNumpyFP(line[3], fpname, 'float') if fp is not None: fps_inact.append(fp) num_inactives = len(fps_inact) print "inactives read and fingerprints calculated:", num_inactives
lr_rdk5 = cPickle.load(gzip.open(path+'../final_models/lr_rdk5_model.pkl.gz', 'r')) rf_rdk5 = cPickle.load(gzip.open(path+'../final_models/rf_rdk5_model.pkl.gz', 'r')) rf_morgan2 = cPickle.load(gzip.open(path+'../final_models/rf_morgan2_model.pkl.gz', 'r')) print "rf models loaded" # loop over commercial products proba_lr_rdk5 = [] proba_rf_rdk5 = [] proba_rf_morgan2 = [] mols = [] for line in gzip.open(path+'commercial_cmps_cleaned.dat.gz', 'r'): if line[0] == "#": continue line = line.rstrip().split() # contains: [smiles, identifier] # RDK5 fp = cf.getNumpyFP(line[0], 'rdk5', 'float') proba_lr_rdk5.append(lr_rdk5.predict_proba(fp)[0][1]) proba_rf_rdk5.append(rf_rdk5.predict_proba(fp)[0][1]) fp = cf.getNumpyFP(line[0], 'morgan2', 'float') proba_rf_morgan2.append(rf_morgan2.predict_proba(fp)[0][1]) mols.append((line[1], line[0])) print "probabilities calculated" # load similarities scores_rdk5 = cPickle.load(gzip.open(path+'scores_rdk5.pkl.gz' , 'r')) scores_morgan2 = cPickle.load(gzip.open(path+'scores_morgan2.pkl.gz' , 'r')) "similarities loaded" # assign ranks scores_lr_rdk5 = cf.assignRanks(proba_lr_rdk5, scores_rdk5) scores_rf_rdk5 = cf.assignRanks(proba_rf_rdk5, scores_rdk5)