def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--data', action='store', help='', metavar='FILE') parser.add_argument('--scratch', action='store', help='', metavar="DIR", default="_tmp_") parser.add_argument('-j', '--procs', action='store', help='', type=int, metavar='int', default=0) args = parser.parse_args() if args.scratch[-1] != "/": args.scratch += "/" header, data = read_csv(args.data, read_header=True) data = clean_data(data) misc.save_obj(args.scratch + "molecule_data", data) misc.save_json(args.scratch + "molecule_data", data) return
def voc2index(self): """ creates a vocabulary of the words in the table""" total_cells = self.short_sentences_table.shape[ 0] * self.short_sentences_table.shape[1] counter = 0 word_dict = defaultdict(int) for row in range(self.short_sentences_table.shape[0]): for col in range(self.short_sentences_table.shape[1]): counter += 1 if counter % 100 == 0: print(f'done {counter * 100 / total_cells} %') if col == 1 or col == 4 or col == 5: # already numerical values continue if col == 0: # name has two words sentence = self.short_sentences_table[row, col] split_words = sentence.split() for word in split_words: word_dict[word] += 1 else: # name has one word sentence = self.short_sentences_table[row, col] word = str(sentence) word_dict[word] += 1 index_dict_word = Vocab(Counter(word_dict)) save_obj(index_dict_word.stoi, self.dict_name) return index_dict_word.stoi
def create_indices_matrix(self, is_dict_existing): """ :param is_dict_existing: if a dictionary already exists (from previous calls), skip creating one""" if is_dict_existing: word_dict = load_obj(self.dict_name) else: # create a new vocabulary word_dict = self.voc2index() word_2_num_sentence = lambda t: [ word_dict[word] for word in t.split() ] # replace every word in the cell with the matching vocab number word_2_num_one_word = lambda t: [ word_dict[t] ] # refer to the cell content as one string and replace this string with the matching vocab number ''' for each column of the table, replace (if needed) the words / sentence with the matching index from the vocabulary''' names_indices = np.array( [word_2_num_sentence(t) for t in self.short_sentences_table[:, 0]]) item_conditions = np.expand_dims( self.short_sentences_table[:, 1].astype('float'), axis=1) category_names_indices = np.array( [word_2_num_one_word(t) for t in self.short_sentences_table[:, 2]]) brand_names_indices = np.array( [word_2_num_one_word(t) for t in self.short_sentences_table[:, 3]]) price = np.expand_dims(self.short_sentences_table[:, 4].astype('float'), axis=1) is_shipping = np.expand_dims( self.short_sentences_table[:, 5].astype('float'), axis=1) indices_matrix = np.concatenate( (names_indices, item_conditions, category_names_indices, brand_names_indices, price, is_shipping), axis=1) save_obj(indices_matrix, self.final_matrix_name) return indices_matrix
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--scratch', action='store', help='', metavar="DIR", default="_tmp_") parser.add_argument('--json', action='store', help='', metavar="FILE") parser.add_argument('-j', '--procs', action='store', help='pararallize', metavar="int", default=0, type=int) args = parser.parse_args() if args.scratch[-1] != "/": args.scratch += "/" data = misc.load_json(args.json) keys = data.keys() keys = list(keys) canonical_data = {} for key in keys: molobj, status = cheminfo.smiles_to_molobj(key) if molobj is None: print("error none mol:", key) continue smiles = cheminfo.molobj_to_smiles(molobj, remove_hs=True) if "." in smiles: print("error multi mol:", smiles) continue atoms = cheminfo.molobj_to_atoms(molobj) if not is_mol_allowed(atoms): print("error heavy mol:", smiles) continue canonical_data[smiles] = data[key] misc.save_json(args.scratch + "molecule_data", canonical_data) misc.save_obj(args.scratch + "molecule_data", canonical_data) return
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--scratch', action='store', help='', metavar="DIR", default="_tmp_") parser.add_argument('--sdf', action='store', help='', metavar="FILE", nargs="+") parser.add_argument('-j', '--procs', action='store', help='pararallize', metavar="int", default=0, type=int) args = parser.parse_args() if args.scratch[-1] != "/": args.scratch += "/" if args.procs == -1: args.procs = os.cpu_count() print("starting", args.procs, "procs") # fsdf = gzip.open(args.scratch + "structures.sdf.gz", 'w') # fprop = open(args.scratch + "properties.csv", 'w') mol_val_dict = {} for sdf in args.sdf: print("reading", sdf) molobjs, values = parse_ochem(sdf, debug=True, procs=args.procs) for molobj, value in zip(molobjs, values): smiles = cheminfo.molobj_to_smiles(molobj, remove_hs=True) if "smiles" not in mol_val_dict: mol_val_dict[smiles] = [] else: print("duplicate", smiles) mol_val_dict[smiles].append(value) # sdfstr = cheminfo.molobj_to_sdfstr(molobj) # sdfstr += "$$$$\n" # # propstr = "{:} {:}\n".format(value, 0.0) # fprop.write(propstr) # fsdf.close() # fprop.close() keys = mol_val_dict.keys() print("TOTAL ITEMS", len(keys)) misc.save_json(args.scratch + "molecule_data", mol_val_dict) misc.save_obj(args.scratch + "molecule_data", mol_val_dict) return
def create_short_sentences_database(self): """ outout: function creates short sentences from the relevant df columns and than concatenates all columns back to matrix """ names = np.expand_dims(self.get_list_of_two_most_important_word_in_sentence(self.shorten_table[:, 0].astype('str'), self.num_best_words), axis=1) item_conditions = np.expand_dims(self.shorten_table[:, 1], axis=1) category_names = np.expand_dims(self.shorten_table[:, 2].astype('str'), axis=1) brand_names = np.expand_dims(self.shorten_table[:, 3].astype('str'), axis=1) prices = np.expand_dims(self.shorten_table[:, 4],axis=1) is_shipping = np.expand_dims(self.shorten_table[:, 5], axis=1) #item_descriptions = np.expand_dims(self.get_list_of_two_most_important_word_in_sentence(sel.foriginal_table[:, 6].astype('str'), self.num_best_words), axis=1) # decided not to use it short_sentence_table = np.concatenate((names,item_conditions, category_names,brand_names, prices, is_shipping), axis=1) save_obj(short_sentence_table, 'short_sentences_table') return short_sentence_table
def clean_data(df, scratch): smiles = df.iloc[1] data = {} atom_types = [] for index, row in df.iterrows(): smi = row.smiles value = row.mpC + 273.15 molobj, status = cheminfo.smiles_to_molobj(smi) if molobj is None: print("error:", smi) continue smi = cheminfo.molobj_to_smiles(molobj, remove_hs=True) # Atoms atoms = cheminfo.molobj_to_atoms(molobj) atom_types += list(atoms) if smi not in data: data[smi] = [] data[smi].append(value) atom_types, counts = np.unique(atom_types, return_counts=True) for atom, count in zip(atom_types, counts): print(atom, count) misc.save_obj(scratch + "molecule_data", data) misc.save_json(scratch + "molecule_data", data) return
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--scratch', action='store', help='', metavar="DIR", default="_tmp_") parser.add_argument('--sdf', action='store', help='', metavar="FILE") #, nargs="+", default=[]) parser.add_argument('--properties', action='store', help='', metavar="FILE") #, nargs="+", default=[]) parser.add_argument('-j', '--procs', action='store', help='pararallize', metavar="int", default=0, type=int) args = parser.parse_args() if args.scratch[-1] != "/": args.scratch += "/" fsdf = gzip.open(args.scratch + "structures.sdf.gz", 'w') fprop = open(args.scratch + "properties.csv", 'w') molecules = cheminfo.read_sdffile(args.sdf) properties = open(args.properties, 'r') moledict = {} for molobj, line in zip(molecules, properties): status = molobjfilter(molobj) if not status: continue status = valuefilter(line) if not status: continue smiles = cheminfo.molobj_to_smiles(molobj, remove_hs=True) print(smiles) sdfstr = cheminfo.molobj_to_sdfstr(molobj) sdfstr += "$$$$\n" fsdf.write(sdfstr.encode()) fprop.write(line) values = [float(x) for x in line.split()[1:]] moledict[smiles] = values fsdf.close() fprop.close() properties.close() misc.save_json(args.scratch + "molecules", moledict) misc.save_obj(args.scratch + "molecules", moledict) return
def RunAll(self, total_episodes=10**3, bob=1): dict={} method = "ep-greedy" fav_keys=[] for ep in [0.01,0.3,1]: exper = training.Experiment(searching_method = method, layers=self.layers, ep=ep,resolution=self.resolution, bound_displacements=self.bound_displacements, states_wasted=total_episodes,ep_method="normal", min_ep=0.01, guessing_rule=self.guessing_rule, efficient_time=self.efficient_time) exper.train(bob) with open(str(exper.layers)+"L"+str(exper.number_phases)+"PH"+str(exper.resolution)+"R/number_rune.txt", "r") as f: c = f.readlines()[0] f.close() dict["run_"+str(c)] = {} dict["run_"+str(c)]["label"] = str(ep) +"-greedy " dict["run_"+str(c)]["info"] = [exper.number_phases, exper.amplitude, exper.layers, exper.resolution, exper.searching_method, exper.guessing_rule, exper.method_guess, exper.number_bobs, exper.bound_displacements, exper.efficient_time,exper.ts_method] dict["run_"+str(c)]["info_ep"] = [exper.ep_method, exper.ep, exper.min_ep, exper.time_tau] dict["run_"+str(c)]["info_ucb"] = [exper.ucb_method] fav_keys.append("run_"+str(c)) plot_dict = filter_keys(dict,fav_keys) save_obj(plot_dict, "ep-greedy-Dolinar", exper.layers, exper.number_phases, exper.resolution, bob) ploting(plot_dict, mode="minimax") if bob>1: ploting(plot_dict, mode="stds") fav_keys=[] for tau in [200]: for min_ep in [0.01]: for method_guess in ["undefined"]: exper = training.Experiment(searching_method = "ep-greedy", layers=self.layers, min_ep = min_ep, time_tau = tau, ep=ep,resolution=self.resolution, bound_displacements=self.bound_displacements, states_wasted=total_episodes,ep_method="exp-decay",guessing_rule=self.guessing_rule, efficient_time=self.efficient_time, method_guess = method_guess) exper.train(bob) with open(str(exper.layers)+"L"+str(exper.number_phases)+"PH"+str(exper.resolution)+"R/number_rune.txt", "r") as f: c = f.readlines()[0] f.close() dict["run_"+str(c)] = {} dict["run_"+str(c)]["label"] = "max("+ str(min_ep) +", e^-t/"+str(tau) +")-greedy " dict["run_"+str(c)]["info"] = [exper.number_phases, exper.amplitude, exper.layers, exper.resolution, exper.searching_method, exper.guessing_rule, exper.method_guess, exper.number_bobs, exper.bound_displacements, exper.efficient_time,exper.ts_method] dict["run_"+str(c)]["info_ep"] = [exper.ep_method, exper.ep, exper.min_ep, exper.time_tau] dict["run_"+str(c)]["info_ucb"] = [exper.ucb_method] fav_keys.append("run_"+str(c)) plot_dict = filter_keys(dict,fav_keys) save_obj(plot_dict, "exp-ep-greedy-Dolinar", exper.layers, exper.number_phases, exper.resolution, bob) ploting(plot_dict, mode="minimax") if bob>1: ploting(plot_dict, mode="stds") fav_keys=[] method = "ucb" for ucbm in ["ucb1", "ucb2", "ucb3"]: exper = training.Experiment(searching_method = method, layers=self.layers, ucb_method=ucbm , resolution=self.resolution, bound_displacements=self.bound_displacements, states_wasted=total_episodes, guessing_rule=self.guessing_rule, efficient_time=self.efficient_time) exper.train(bob) with open(str(exper.layers)+"L"+str(exper.number_phases)+"PH"+str(exper.resolution)+"R/number_rune.txt", "r") as f: c = f.readlines()[0] f.close() dict["run_"+str(c)] = {} dict["run_"+str(c)]["label"] = ucbm dict["run_"+str(c)]["info"] = [exper.number_phases, exper.amplitude, exper.layers, exper.resolution, exper.searching_method, exper.guessing_rule, exper.method_guess, exper.number_bobs,exper.bound_displacements, exper.efficient_time,exper.ts_method] dict["run_"+str(c)]["info_ep"] = [exper.ep_method, exper.ep, exper.min_ep, exper.time_tau] dict["run_"+str(c)]["info_ucb"] = [exper.ucb_method] fav_keys.append("run_"+str(c)) plot_dict = filter_keys(dict,fav_keys) save_obj(plot_dict, "ucbs-Dolinar", exper.layers, exper.number_phases, exper.resolution, bob, total_episodes) ploting(plot_dict, mode="minimax") if bob>1: ploting(plot_dict, mode="stds") fav_keys=[] method = "thompson-sampling" # for soft in [0.75, 1.25,1]: for soft in [1]: for mode_ts in ["None"]: #This is if you want to relate the q-table with the TS-update, but it doesnt' give any enhancement (for what i see). exper = training.Experiment(searching_method = method, layers=self.layers,resolution=self.resolution, bound_displacements=self.bound_displacements, states_wasted=total_episodes, guessing_rule=self.guessing_rule, soft_ts=soft, efficient_time=self.efficient_time, ts_method=mode_ts) exper.train(bob) with open(str(exper.layers)+"L"+str(exper.number_phases)+"PH"+str(exper.resolution)+"R/number_rune.txt", "r") as f: c = f.readlines()[0] f.close() dict["run_"+str(c)] = {} dict["run_"+str(c)]["label"] = str(soft)+"-TS" dict["run_"+str(c)]["info"] = [exper.number_phases, exper.amplitude, exper.layers, exper.resolution, exper.searching_method, exper.guessing_rule, exper.method_guess, exper.number_bobs,exper.bound_displacements, exper.efficient_time, exper.ts_method] dict["run_"+str(c)]["info_ep"] = [exper.ep_method, exper.ep, exper.min_ep, exper.time_tau] dict["run_"+str(c)]["info_ucb"] = [exper.ucb_method] fav_keys.append("run_"+str(c)) plot_dict = filter_keys(dict,fav_keys) save_obj(plot_dict, "TS", exper.layers, exper.number_phases, exper.resolution, bob) ploting(plot_dict, mode="minimax") if bob>1: ploting(plot_dict, mode="stds") save_obj(dict, "all_methods", exper.layers, exper.number_phases, exper.resolution, bob) return
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--scratch', action='store', help='', metavar="DIR", default="_tmp_") parser.add_argument('--sdf', action='store', help='', metavar="FILE", nargs="+", default=[]) parser.add_argument('--dict', action='store', help='', metavar="FILE", nargs="+", default=[]) parser.add_argument('--name', action='store', help='', metavar="STR", nargs="+") parser.add_argument('--filename', action='store', help='', metavar="STR") parser.add_argument('--filter', action='store_true', help='') parser.add_argument('-j', '--procs', action='store', help='pararallize', metavar="int", default=0, type=int) args = parser.parse_args() if args.scratch[-1] != "/": args.scratch += "/" print() databases_set = [] databases_dict = [] for sdf in args.sdf: molobjs = cheminfo.read_sdffile(sdf) molobjs = list(molobjs) smiles = [ cheminfo.molobj_to_smiles(molobj, remove_hs=True) for molobj in molobjs ] smiles = set(smiles) databases_set.append(smiles) print(sdf, len(smiles)) for filename in args.dict: data = misc.load_obj(filename) smiles = data.keys() smiles = set(smiles) databases_set.append(smiles) databases_dict.append(data) print(filename, len(smiles)) if args.scratch is not None: # Merge databases everything = {} for data in databases_dict: keys = data.keys() for key in keys: if key not in everything: everything[key] = [] everything[key] += data[key] if args.filter: everything = filter_dict(everything) keys = everything.keys() print("n items", len(keys)) # Save misc.save_json(args.scratch + "molecule_data", everything) misc.save_obj(args.scratch + "molecule_data", everything) if args.name is not None: n_db = len(databases_set) if n_db == 2: venn2(databases_set, set_labels=args.name) elif n_db == 3: venn3(databases_set, set_labels=args.name) plt.savefig(args.scratch + "venndiagram") return
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--scratch', action='store', help='', metavar="dir", default="_tmp_") parser.add_argument('--randomseed', action='store', help='random seed', metavar="int", default=1) parser.add_argument('-j', '--procs', action='store', help='pararallize', type=int, metavar="int", default=0) args = parser.parse_args() if args.scratch[-1] != "/": args.scratch += "/" # Not that random np.random.seed(args.randomseed) # Get properties properties = misc.load_npy(args.scratch + "properties") molobjs = cheminfo.read_sdffile(args.scratch + "structures.sdf.gz") # Get features filename = "repr.ols" if os.path.exists(args.scratch + filename + ".pkl"): features = misc.load_obj(args.scratch + filename) else: features = extract_features(properties, molobjs, procs=args.procs) features = pd.DataFrame(features) features = features.fillna(0) misc.save_obj(args.scratch + filename, features) n_items = len(features) X = np.arange(n_items) assert len(properties) == n_items # Train n_splits = 5 n_train = misc.load_npy(args.scratch + "n_train") fold_five = sklearn.model_selection.KFold(n_splits=n_splits, random_state=45, shuffle=True) scores = [] for i, (idxs_train, idxs_test) in enumerate(fold_five.split(X)): # un-ordered idxs_train np.random.seed(45 + i) np.random.shuffle(idxs_train) learning_curve = [] for n in n_train: idxs = idxs_train[:n] # signed difference sign_diff = fit_model(features, idxs, idxs_test) # rmse diff = sign_diff**2 rmse_test = np.sqrt(diff.mean()) # save learning_curve.append(rmse_test) scores.append(learning_curve) scores = np.array(scores) scores = scores.T mean_score = np.mean(scores, axis=1) print(mean_score) misc.save_npy(args.scratch + "score.ols", scores) return
def EnhancedQL(self, total_episodes=10**3, bob=1, plots=False): dict = {} method = "ep-greedy" fav_keys = [] for ep in [0.01, 0.3, 1]: exper = training.Experiment( searching_method=method, layers=self.layers, ep=ep, resolution=self.resolution, bound_displacements=self.bound_displacements, states_wasted=total_episodes, ep_method="normal", min_ep=0.01, guessing_rule=self.guessing_rule, efficient_time=self.efficient_time) exper.train(bob) with open( str(exper.layers) + "L" + str(exper.number_phases) + "PH" + str(exper.resolution) + "R/number_rune.txt", "r") as f: c = f.readlines()[0] f.close() dict["run_" + str(c)] = {} dict["run_" + str(c)]["label"] = str(ep) + "-greedy " dict["run_" + str(c)]["info"] = [ exper.number_phases, exper.amplitude, exper.layers, exper.resolution, exper.searching_method, exper.guessing_rule, exper.method_guess, exper.number_bobs, exper.bound_displacements, exper.efficient_time, exper.ts_method ] dict["run_" + str(c)]["info_ep"] = [ exper.ep_method, exper.ep, exper.min_ep, exper.time_tau ] dict["run_" + str(c)]["info_ucb"] = [exper.ucb_method] fav_keys.append("run_" + str(c)) plot_dict = filter_keys(dict, fav_keys) save_obj(plot_dict, "ep-greedy", exper.layers, exper.number_phases, exper.resolution, bob) fav_keys = [] for tau in [200]: for min_ep in [0.01]: exper = training.Experiment( searching_method="ep-greedy", layers=self.layers, min_ep=min_ep, time_tau=tau, resolution=self.resolution, bound_displacements=self.bound_displacements, states_wasted=total_episodes, ep_method="exp-decay", guessing_rule=self.guessing_rule, efficient_time=self.efficient_time) exper.train(bob) with open( str(exper.layers) + "L" + str(exper.number_phases) + "PH" + str(exper.resolution) + "R/number_rune.txt", "r") as f: c = f.readlines()[0] f.close() dict["run_" + str(c)] = {} dict["run_" + str(c)]["label"] = "max(" + str( min_ep) + ", e^-t/" + str(tau) + ")-greedy " dict["run_" + str(c)]["info"] = [ exper.number_phases, exper.amplitude, exper.layers, exper.resolution, exper.searching_method, exper.guessing_rule, exper.method_guess, exper.number_bobs, exper.bound_displacements, exper.efficient_time, exper.ts_method ] dict["run_" + str(c)]["info_ep"] = [ exper.ep_method, exper.ep, exper.min_ep, exper.time_tau ] dict["run_" + str(c)]["info_ucb"] = [exper.ucb_method] fav_keys.append("run_" + str(c)) plot_dict = filter_keys(dict, fav_keys) save_obj(plot_dict, "exp-ep-greedy", exper.layers, exper.number_phases, exper.resolution, bob) fav_keys = [] method = "ucb" for ucbm in ["ucb1", "ucb2", "ucb3"]: exper = training.Experiment( searching_method=method, layers=self.layers, ucb_method=ucbm, resolution=self.resolution, bound_displacements=self.bound_displacements, states_wasted=total_episodes, guessing_rule=self.guessing_rule, efficient_time=self.efficient_time) exper.train(bob) with open( str(exper.layers) + "L" + str(exper.number_phases) + "PH" + str(exper.resolution) + "R/number_rune.txt", "r") as f: c = f.readlines()[0] f.close() dict["run_" + str(c)] = {} dict["run_" + str(c)]["label"] = ucbm dict["run_" + str(c)]["info"] = [ exper.number_phases, exper.amplitude, exper.layers, exper.resolution, exper.searching_method, exper.guessing_rule, exper.method_guess, exper.number_bobs, exper.bound_displacements, exper.efficient_time, exper.ts_method ] dict["run_" + str(c)]["info_ep"] = [ exper.ep_method, exper.ep, exper.min_ep, exper.time_tau ] dict["run_" + str(c)]["info_ucb"] = [exper.ucb_method] fav_keys.append("run_" + str(c)) plot_dict = filter_keys(dict, fav_keys) save_obj(plot_dict, "ucbs", exper.layers, exper.number_phases, exper.resolution, bob, total_episodes) fav_keys = [] method = "thompson-sampling" for soft in [1]: for mode_ts in [ "None" ]: #This is if you want to relate the q-table with the TS-update, but it doesnt' give any enhancement (for what i see). exper = training.Experiment( searching_method=method, layers=self.layers, resolution=self.resolution, bound_displacements=self.bound_displacements, states_wasted=total_episodes, guessing_rule=self.guessing_rule, soft_ts=soft, efficient_time=self.efficient_time, ts_method=mode_ts) exper.train(bob) with open( str(exper.layers) + "L" + str(exper.number_phases) + "PH" + str(exper.resolution) + "R/number_rune.txt", "r") as f: c = f.readlines()[0] f.close() dict["run_" + str(c)] = {} dict["run_" + str(c)]["label"] = str(soft) + "-TS" dict["run_" + str(c)]["info"] = [ exper.number_phases, exper.amplitude, exper.layers, exper.resolution, exper.searching_method, exper.guessing_rule, exper.method_guess, exper.number_bobs, exper.bound_displacements, exper.efficient_time, exper.ts_method ] dict["run_" + str(c)]["info_ep"] = [ exper.ep_method, exper.ep, exper.min_ep, exper.time_tau ] dict["run_" + str(c)]["info_ucb"] = [exper.ucb_method] fav_keys.append("run_" + str(c)) plot_dict = filter_keys(dict, fav_keys) save_obj(plot_dict, "TS", exper.layers, exper.number_phases, exper.resolution, bob) method = "ep-greedy" ep = "TS+0.01exp" fav_keys = [] exper = training.Experiment( searching_method="ep-greedy", layers=self.layers, min_ep=0.01, time_tau=200, ep=0.01, resolution=self.resolution, bound_displacements=self.bound_displacements, states_wasted=total_episodes, ep_method="exp-decay", guessing_rule="None", efficient_time=self.efficient_time, method_guess="thompson-sampling") exper.train(bob) with open( str(exper.layers) + "L" + str(exper.number_phases) + "PH" + str(exper.resolution) + "R/number_rune.txt", "r") as f: c = f.readlines()[0] f.close() dict["run_" + str(c)] = {} dict["run_" + str(c)]["label"] = str(ep) + "-greedy " dict["run_" + str(c)]["info"] = [ exper.number_phases, exper.amplitude, exper.layers, exper.resolution, exper.searching_method, exper.guessing_rule, exper.method_guess, exper.number_bobs, exper.bound_displacements, exper.efficient_time, exper.ts_method ] dict["run_" + str(c)]["info_ep"] = [ exper.ep_method, exper.ep, exper.min_ep, exper.time_tau ] dict["run_" + str(c)]["info_ucb"] = [exper.ucb_method] fav_keys.append("run_" + str(c)) plot_dict = filter_keys(dict, fav_keys) save_obj(plot_dict, "ep-TS", exper.layers, exper.number_phases, exper.resolution, bob) ###################################################################################################3### save_obj(dict, "all_methods", exper.layers, exper.number_phases, exper.resolution, bob) if plots == True: mode_log = "on" # os.system("python3 trad_ql.py") # os.system("python3 enhanced_ql.py") # os.system("python3 trad_ql.py") matplotlib.rc('font', serif='cm10') matplotlib.rc('text', usetex=True) plt.rcParams.update({'font.size': 45}) color1 = "purple" color2 = (225 / 255, 15 / 255, 245 / 255) color3 = (150 / 255, 22 / 255, 9 / 255) color_2l = [46 / 255, 30 / 255, 251 / 255] colorexp = (13 / 255, 95 / 255, 14 / 255) colorucb1 = (19 / 255, 115 / 255, 16 / 255) colorucb2 = (170 / 255, 150 / 255, 223 / 255) colorucb3 = (74 / 255, 90 / 255, 93 / 255) colors = { "run_1": "orange", "run_2": color2, "run_3": "brown", "run_4": colorexp, "run_5": colorucb1, "run_6": colorucb2, "run_7": colorucb3, "run_8": "yellow", "run_9": "purple" } labels = { "run_1": r'$\epsilon = 0.01$' + "-greedy", "run_2": r'$\epsilon = 0.3$' + "-greedy", "run_3": r'$\epsilon = 1$' + "-greedy", "run_4": "max(0.01, " + r'$e^{-t/\tau}$' + ")-greedy", "run_5": "UCB-1", "run_6": "UCB-2", "run_7": "UCB-3", "run_8": "TS", "run_9": "max(0.01, " + r'$e^{-t/\tau}$' + ")-greedy + TS" } # ####### Q-LEARNING PLOT ###### ####### Q-LEARNING PLOT ###### ####### Q-LEARNING PLOT ###### ####### Q-LEARNING PLOT ###### interesting = ["run_1", "run_2", "run_3", "run_4"] dict_plot = {} print(dict.keys()) for i in interesting: dict_plot[i] = dict[i] for run in interesting: dict_plot[run]["label"] = labels[run] plt.figure(figsize=(30, 22), dpi=150) ax1 = plt.subplot2grid((2, 1), (0, 0)) ax2 = plt.subplot2grid((2, 1), (1, 0)) axinticks = [] name = str(dict.keys()) once = True neg = "Not" print("ploting Q learning") for run in dict_plot.keys(): number_phases, amplitude, layers, resolution, searching_method, guessing_rule, method_guess, number_bobs, bound_displacements, efficient_time, ts_method = dict[ run]["info"] exp = Experiment(number_phases=number_phases, amplitude=amplitude, layers=layers, resolution=resolution, bound_displacements=bound_displacements) exp.load_data(run) run_color = colors[run] if mode_log == "on": times = np.log10(exp.results[0]) else: times = exp.results[0] if once == True: ax1.plot(times, exp.optimal_value * np.ones(len(exp.results[0])), '--', linewidth=9, alpha=0.8, label=r'$P_*^{(2)}$', color=color_2l) ax1.plot([times[0], times[-1]], [exp.homodyne_limit] * 2, '--', linewidth=9, color="black", label="Homodyne limit") ax2.plot([times[0], times[-1]], [exp.homodyne_limit] * 2, '--', linewidth=9, color="black", label="Homodyne limit") ax2.plot(times, exp.optimal_value * np.ones(len(times)), '--', linewidth=9, alpha=0.6, color=color_2l) axins = zoomed_inset_axes(ax2, zoom=2.7, loc="lower right") loc1 = -int(len(exp.results[0]) * 0.7) loc2 = -1 once = False ax1.plot(times, exp.results[1] / exp.results[0], linewidth=9, alpha=0.9, label=dict[run]["label"], color=run_color) ax2.plot(times, exp.results[2], linewidth=3, alpha=.5, label=dict[run]["label"], color=run_color) axins.plot(np.log10(exp.results[0][loc1:loc2]), np.log10(exp.results[2][loc1:loc2]), '-', linewidth=9, alpha=.8, color=colors[run], label=dict[run]["label"]) axins.plot(np.log10( [exp.results[0][loc1], exp.results[0][loc2 - 1]]), np.log10([1 - exp.opt_2l] * 2), '-.', alpha=.8, linewidth=9, color=color_2l, label="Optimal 2L") axinticks.append(np.log10(exp.results[2][loc1])) ax1.fill_between( times, (exp.results[1] - exp.stds[0] / 2) / exp.results[0], (exp.results[1] + exp.stds[0] / 2) / exp.results[0], alpha=.4, color=run_color) ax2.fill_between(times, np.log10(exp.results[2] - exp.stds[1] / 2), np.log10(exp.results[2] + exp.stds[1] / 2), alpha=0.4, color=run_color) ax1.legend() mark_inset(ax2, axins, loc1=1, loc2=2, fc="green", ec="0.3", alpha=0.5) axinticks.append(exp.opt_2l) yticks = np.arange(np.round(min(exp.results[2]), 3), 1 - exp.opt_2l, .1) ax1.set_yticks(yticks) ax2.set_yticks(yticks) axins.set_yticks(axinticks) axins.set_yticklabels([str(np.round(i, 3)) for i in axinticks]) plt.setp(axins.get_yticklabels(), size=27) plt.setp(axins.get_xticklabels(), visible=False) plt.setp(ax1.get_xticklabels(), visible=False) ax2.set_xticks([0, 1, 2, 3, 4, 5, np.log10(5 * 10**5)]) ax2.set_xticklabels([ r'$10^{0}$', r'$10^{1}$', r'$10^{2}$', r'$10^{3}$', r'$10^{4}$', r'$10^{5}$', r'$5 \; 10^{5}$' ]) plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.1) ax2.tick_params(axis='x', which='both', top='off') ax1.legend(loc="lower right", prop={"size": 35}) ax2.set_xlabel("t", size=54) ax1.set_ylabel(r'\textbf{R}$_t$', size=54) ax2.set_ylabel(r'\textbf{P}$_t$', size=54) inf = dict[run]["info"] layers, phases, resolution = inf[2], inf[0], inf[3] plt.savefig( str(layers) + "L" + str(phases) + "PH" + str(resolution) + "R/figures/Qlearning.png") ####### ENH-Q-LEARINIG PLOT ###### ####### ENH-Q-LEARINIG PLOT ###### ####### ENH-Q-LEARINIG PLOT ###### ####### ENH-Q-LEARINIG PLOT ###### interesting = ["run_2", "run_9", "run_8", "run_5"] dict_plot = {} print(dict.keys()) for i in interesting: dict_plot[i] = dict[i] for run in interesting: dict_plot[run]["label"] = labels[run] plt.figure(figsize=(30, 22), dpi=150) ax1 = plt.subplot2grid((2, 1), (0, 0)) ax2 = plt.subplot2grid((2, 1), (1, 0)) axinticks = [] name = str(dict.keys()) once = True neg = "Not" for run in dict_plot.keys(): print(run) number_phases, amplitude, layers, resolution, searching_method, guessing_rule, method_guess, number_bobs, bound_displacements, efficient_time, ts_method = dict[ run]["info"] exp = Experiment(number_phases=number_phases, amplitude=amplitude, layers=layers, resolution=resolution, bound_displacements=bound_displacements) exp.load_data(run) run_color = colors[run] if mode_log == "on": times = np.log10(exp.results[0]) else: times = exp.results[0] if once == True: ax1.plot(times, exp.optimal_value * np.ones(len(exp.results[0])), '--', linewidth=9, alpha=0.8, label=r'$P_*^{(2)}$', color=color_2l) ax1.plot([times[0], times[-1]], [exp.homodyne_limit] * 2, '--', linewidth=9, color="black", label="Homodyne limit") ax2.plot([times[0], times[-1]], [exp.homodyne_limit] * 2, '--', linewidth=9, color="black", label="Homodyne limit") ax2.plot(times, exp.optimal_value * np.ones(len(times)), '--', linewidth=9, alpha=0.6, color=color_2l) axins = zoomed_inset_axes(ax2, zoom=2.7, loc="lower right") loc1 = -int(len(exp.results[0]) * 0.35) loc2 = -1 once = False ax1.plot(times, exp.results[1] / exp.results[0], linewidth=9, alpha=0.9, label=dict[run]["label"], color=run_color) ax2.plot(times, exp.results[2], linewidth=3, alpha=.5, label=dict[run]["label"], color=run_color) axins.plot(np.log10(exp.results[0][loc1:loc2]), np.log10(exp.results[2][loc1:loc2]), '-', linewidth=9, alpha=.8, color=colors[run], label=dict[run]["label"]) axins.plot(np.log10( [exp.results[0][loc1], exp.results[0][loc2 - 1]]), np.log10([1 - exp.opt_2l] * 2), '-.', alpha=.8, linewidth=9, color=color_2l, label="Optimal 2L") axinticks.append(np.log10(exp.results[2][loc1])) ax1.fill_between( times, (exp.results[1] - exp.stds[0] / 2) / exp.results[0], (exp.results[1] + exp.stds[0] / 2) / exp.results[0], alpha=.4, color=run_color) ax2.fill_between(times, np.log10(exp.results[2] - exp.stds[1] / 2), np.log10(exp.results[2] + exp.stds[1] / 2), alpha=0.4, color=run_color) ax1.legend() mark_inset(ax2, axins, loc1=1, loc2=2, fc="green", ec="0.3", alpha=0.5) axinticks.append(exp.opt_2l) yticks = np.arange(np.round(min(exp.results[2]), 3), 1 - exp.opt_2l, .1) ax1.set_yticks(yticks) ax2.set_yticks(yticks) axins.set_yticks(axinticks) axins.set_yticklabels([str(np.round(i, 3)) for i in axinticks]) plt.setp(axins.get_yticklabels(), size=27) plt.setp(axins.get_xticklabels(), visible=False) plt.setp(ax1.get_xticklabels(), visible=False) ax2.set_xticks([0, 1, 2, 3, 4, 5, np.log10(5 * 10**5)]) ax2.set_xticklabels([ r'$10^{0}$', r'$10^{1}$', r'$10^{2}$', r'$10^{3}$', r'$10^{4}$', r'$10^{5}$', r'$5 \; 10^{5}$' ]) plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.1) ax2.tick_params(axis='x', which='both', top='off') ax1.legend(loc="lower right", prop={"size": 35}) ax2.set_xlabel("t", size=54) ax1.set_ylabel(r'\textbf{R}$_t$', size=54) ax2.set_ylabel(r'\textbf{P}$_t$', size=54) inf = dict[run]["info"] layers, phases, resolution = inf[2], inf[0], inf[3] plt.savefig( str(layers) + "L" + str(phases) + "PH" + str(resolution) + "R/figures/ENH-QL.png") return
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--scratch', action='store', help='', metavar="dir", default="_tmp_") parser.add_argument('--conformers', action='store_true', help='') parser.add_argument('--sdf', action='store', help='', metavar="file") parser.add_argument('-j', '--procs', action='store', help='pararallize', metavar="int", default=0, type=int) parser.add_argument('-r', '--representations', action='store', help='', metavar="STR", nargs="+") args = parser.parse_args() if args.scratch[-1] != "/": args.scratch += "/" if args.procs == -1: args.procs = int(os.cpu_count()) print("set procs", args.procs) representation_names_coordbased = [ "cm", "fchl18", "fchl19", "slatm", "bob" ] representation_names_molbased = ["morgan", "rdkitfp"] if args.representations is None: # representation_names = ["cm", "fchl18", "fchl19", "slatm", "bob"] # representation_names = ["fchl18"] # representation_names = ["bob"] representation_names = ["slatm", "bob", "cm", "rdkitfp", "morgan"] else: representation_names = args.representations molobjs = cheminfo.read_sdffile(args.sdf) molobjs = [mol for mol in molobjs] xyzs = molobjs_to_xyzs(molobjs) mol_atoms, mol_coords = xyzs misc.save_obj(args.scratch + "atoms", mol_atoms) # Print unique atoms unique_atoms = [] for atoms in mol_atoms: unique_atoms += list(np.unique(atoms)) unique_atoms = np.array(unique_atoms) unique_atoms = unique_atoms.flatten() unique_atoms = np.unique(unique_atoms) # Calculate max_size max_atoms = [len(atoms) for atoms in mol_atoms] max_atoms = max(max_atoms) n_items = len(mol_coords) print("total mols:", n_items) print("atom types:", unique_atoms) print("max atoms: ", max_atoms) print() print("representations:", representation_names) print() misc.save_txt(args.scratch + "n_items", n_items) # Gas phase for name in representation_names: if name not in representation_names_coordbased: continue representations = xyzs_to_representations(mol_atoms, mol_coords, name=name, scr=args.scratch, max_atoms=max_atoms, procs=args.procs) if isinstance(representations, (np.ndarray, np.generic)): misc.save_npy(args.scratch + "repr." + name, representations) else: misc.save_obj(args.scratch + "repr." + name, representations) representations = None del representations for name in representation_names: if name not in representation_names_molbased: continue representations = molobjs_to_representations(molobjs, name=name, procs=args.procs) if isinstance(representations, (np.ndarray, np.generic)): misc.save_npy(args.scratch + "repr." + name, representations) else: misc.save_obj(args.scratch + "repr." + name, representations) representations = None del representations quit() # Ensemble # if args.conformers: # generate_conformer_representation(scr=args.scratch, procs=args.procs) return
def get_representations_slatm(atoms, structures, scr="_tmp_/", mbtypes=None, debug=True, procs=0, **kwargs): """ atoms -- list of molecule atoms """ # from qml.representations import get_slatm_mbtypes # Assume 'qm7' is a # list of Compound() objects. mbtypes = # get_slatm_mbtypes([mol.nuclear_charges for compound in qm7]) # Assume the # QM7 dataset is loaded into a list of Compound() for compound in qm7: # # Generate the desired representation for each compound # compound.generate_slatm(mbtypes, local=True, rcut=2.7) if mbtypes is None: filename_mbtypes = scr + "slatm.mbtypes" try: mbtypes = misc.load_obj(filename_mbtypes) except FileNotFoundError: print("Generate slatm mbtypes") mbtypes = qml.representations.get_slatm_mbtypes(atoms) misc.save_obj(filename_mbtypes, mbtypes) if debug: print("Generate slatm representations") replist = [] # Set OMP if procs > 1: os.environ["OMP_NUM_THREADS"] = "1" workargs = zip(structures, atoms) workargs = list(workargs) pool = Pool(processes=procs) funcname = partial(procs_representation_slatm, mbtypes=mbtypes) replist = pool.map(funcname, workargs) else: for i, (coord, atom) in enumerate(zip(structures, atoms)): rep = qml.representations.generate_slatm(coord, atom, mbtypes) replist.append(rep) # replist = [qml.representations.generate_slatm(coordinate, atom, mbtypes) for coordinate, atom in zip(structures, atoms)] replist = np.array(replist) # for i, rep in enumerate(replist): # m = rep.mean() # if np.isnan(m): # print(i, rep.mean()) # print(replist.mean()) return replist