Exemplo n.º 1
0
def main():

    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--data', action='store', help='', metavar='FILE')
    parser.add_argument('--scratch',
                        action='store',
                        help='',
                        metavar="DIR",
                        default="_tmp_")
    parser.add_argument('-j',
                        '--procs',
                        action='store',
                        help='',
                        type=int,
                        metavar='int',
                        default=0)
    args = parser.parse_args()

    if args.scratch[-1] != "/":
        args.scratch += "/"

    header, data = read_csv(args.data, read_header=True)
    data = clean_data(data)

    misc.save_obj(args.scratch + "molecule_data", data)
    misc.save_json(args.scratch + "molecule_data", data)

    return
Exemplo n.º 2
0
    def voc2index(self):
        """ creates a vocabulary of the words in the table"""
        total_cells = self.short_sentences_table.shape[
            0] * self.short_sentences_table.shape[1]
        counter = 0
        word_dict = defaultdict(int)
        for row in range(self.short_sentences_table.shape[0]):
            for col in range(self.short_sentences_table.shape[1]):
                counter += 1
                if counter % 100 == 0:
                    print(f'done {counter * 100 / total_cells} %')
                if col == 1 or col == 4 or col == 5:  # already numerical values
                    continue
                if col == 0:  # name has two words
                    sentence = self.short_sentences_table[row, col]
                    split_words = sentence.split()
                    for word in split_words:
                        word_dict[word] += 1
                else:  # name has one word
                    sentence = self.short_sentences_table[row, col]
                    word = str(sentence)
                    word_dict[word] += 1

        index_dict_word = Vocab(Counter(word_dict))
        save_obj(index_dict_word.stoi, self.dict_name)
        return index_dict_word.stoi
Exemplo n.º 3
0
    def create_indices_matrix(self, is_dict_existing):
        """ :param is_dict_existing: if a dictionary already exists (from previous calls), skip creating one"""
        if is_dict_existing:
            word_dict = load_obj(self.dict_name)
        else:  # create a new vocabulary
            word_dict = self.voc2index()

        word_2_num_sentence = lambda t: [
            word_dict[word] for word in t.split()
        ]  # replace every word in the cell with the matching vocab number
        word_2_num_one_word = lambda t: [
            word_dict[t]
        ]  # refer to the cell content as one string and replace this string with the matching vocab number
        ''' for each column of the table, replace (if needed) the words / sentence with the matching index from the vocabulary'''
        names_indices = np.array(
            [word_2_num_sentence(t) for t in self.short_sentences_table[:, 0]])
        item_conditions = np.expand_dims(
            self.short_sentences_table[:, 1].astype('float'), axis=1)
        category_names_indices = np.array(
            [word_2_num_one_word(t) for t in self.short_sentences_table[:, 2]])
        brand_names_indices = np.array(
            [word_2_num_one_word(t) for t in self.short_sentences_table[:, 3]])
        price = np.expand_dims(self.short_sentences_table[:,
                                                          4].astype('float'),
                               axis=1)
        is_shipping = np.expand_dims(
            self.short_sentences_table[:, 5].astype('float'), axis=1)
        indices_matrix = np.concatenate(
            (names_indices, item_conditions, category_names_indices,
             brand_names_indices, price, is_shipping),
            axis=1)
        save_obj(indices_matrix, self.final_matrix_name)
        return indices_matrix
Exemplo n.º 4
0
def main():

    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--scratch',
                        action='store',
                        help='',
                        metavar="DIR",
                        default="_tmp_")
    parser.add_argument('--json', action='store', help='', metavar="FILE")
    parser.add_argument('-j',
                        '--procs',
                        action='store',
                        help='pararallize',
                        metavar="int",
                        default=0,
                        type=int)

    args = parser.parse_args()

    if args.scratch[-1] != "/":
        args.scratch += "/"

    data = misc.load_json(args.json)

    keys = data.keys()
    keys = list(keys)

    canonical_data = {}

    for key in keys:

        molobj, status = cheminfo.smiles_to_molobj(key)

        if molobj is None:
            print("error none mol:", key)
            continue

        smiles = cheminfo.molobj_to_smiles(molobj, remove_hs=True)

        if "." in smiles:
            print("error multi mol:", smiles)
            continue

        atoms = cheminfo.molobj_to_atoms(molobj)

        if not is_mol_allowed(atoms):
            print("error heavy mol:", smiles)
            continue

        canonical_data[smiles] = data[key]

    misc.save_json(args.scratch + "molecule_data", canonical_data)
    misc.save_obj(args.scratch + "molecule_data", canonical_data)

    return
Exemplo n.º 5
0
def main():

    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--scratch', action='store', help='', metavar="DIR", default="_tmp_")
    parser.add_argument('--sdf', action='store', help='', metavar="FILE", nargs="+")
    parser.add_argument('-j', '--procs', action='store', help='pararallize', metavar="int", default=0, type=int)

    args = parser.parse_args()

    if args.scratch[-1] != "/":
        args.scratch += "/"

    if args.procs == -1:
        args.procs = os.cpu_count()
        print("starting", args.procs, "procs")

    # fsdf = gzip.open(args.scratch + "structures.sdf.gz", 'w')
    # fprop = open(args.scratch + "properties.csv", 'w')
    mol_val_dict = {}

    for sdf in args.sdf:

        print("reading", sdf)

        molobjs, values = parse_ochem(sdf, debug=True, procs=args.procs)

        for molobj, value in zip(molobjs, values):

            smiles = cheminfo.molobj_to_smiles(molobj, remove_hs=True)

            if "smiles" not in mol_val_dict:
                mol_val_dict[smiles] = []
            else:
                print("duplicate", smiles)

            mol_val_dict[smiles].append(value)

            # sdfstr = cheminfo.molobj_to_sdfstr(molobj)
            # sdfstr += "$$$$\n"
            #
            # propstr = "{:} {:}\n".format(value, 0.0)
            # fprop.write(propstr)

    # fsdf.close()
    # fprop.close()

    keys = mol_val_dict.keys()
    print("TOTAL ITEMS", len(keys))

    misc.save_json(args.scratch + "molecule_data", mol_val_dict)
    misc.save_obj(args.scratch + "molecule_data", mol_val_dict)

    return
Exemplo n.º 6
0
    def create_short_sentences_database(self):
        """ outout: function creates short sentences from the relevant df columns and than concatenates all columns
        back to matrix """
        names = np.expand_dims(self.get_list_of_two_most_important_word_in_sentence(self.shorten_table[:, 0].astype('str'), self.num_best_words), axis=1)
        item_conditions = np.expand_dims(self.shorten_table[:, 1], axis=1)
        category_names = np.expand_dims(self.shorten_table[:, 2].astype('str'), axis=1)
        brand_names = np.expand_dims(self.shorten_table[:, 3].astype('str'), axis=1)
        prices = np.expand_dims(self.shorten_table[:, 4],axis=1)
        is_shipping = np.expand_dims(self.shorten_table[:, 5], axis=1)
        #item_descriptions = np.expand_dims(self.get_list_of_two_most_important_word_in_sentence(sel.foriginal_table[:, 6].astype('str'), self.num_best_words), axis=1) # decided not to use it

        short_sentence_table = np.concatenate((names,item_conditions, category_names,brand_names, prices, is_shipping), axis=1)
        save_obj(short_sentence_table, 'short_sentences_table')
        return short_sentence_table
Exemplo n.º 7
0
def clean_data(df, scratch):

    smiles = df.iloc[1]

    data = {}

    atom_types = []

    for index, row in df.iterrows():

        smi = row.smiles
        value = row.mpC + 273.15

        molobj, status = cheminfo.smiles_to_molobj(smi)

        if molobj is None:
            print("error:", smi)
            continue

        smi = cheminfo.molobj_to_smiles(molobj, remove_hs=True)

        # Atoms
        atoms = cheminfo.molobj_to_atoms(molobj)
        atom_types += list(atoms)

        if smi not in data:
            data[smi] = []

        data[smi].append(value)

    atom_types, counts = np.unique(atom_types, return_counts=True)

    for atom, count in zip(atom_types, counts):
        print(atom, count)

    misc.save_obj(scratch + "molecule_data", data)
    misc.save_json(scratch + "molecule_data", data)

    return
Exemplo n.º 8
0
def main():

    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--scratch',
                        action='store',
                        help='',
                        metavar="DIR",
                        default="_tmp_")
    parser.add_argument('--sdf', action='store', help='',
                        metavar="FILE")  #, nargs="+", default=[])
    parser.add_argument('--properties',
                        action='store',
                        help='',
                        metavar="FILE")  #, nargs="+", default=[])
    parser.add_argument('-j',
                        '--procs',
                        action='store',
                        help='pararallize',
                        metavar="int",
                        default=0,
                        type=int)

    args = parser.parse_args()

    if args.scratch[-1] != "/":
        args.scratch += "/"

    fsdf = gzip.open(args.scratch + "structures.sdf.gz", 'w')
    fprop = open(args.scratch + "properties.csv", 'w')

    molecules = cheminfo.read_sdffile(args.sdf)
    properties = open(args.properties, 'r')

    moledict = {}

    for molobj, line in zip(molecules, properties):

        status = molobjfilter(molobj)

        if not status:
            continue

        status = valuefilter(line)

        if not status:
            continue

        smiles = cheminfo.molobj_to_smiles(molobj, remove_hs=True)

        print(smiles)

        sdfstr = cheminfo.molobj_to_sdfstr(molobj)
        sdfstr += "$$$$\n"
        fsdf.write(sdfstr.encode())
        fprop.write(line)

        values = [float(x) for x in line.split()[1:]]
        moledict[smiles] = values

    fsdf.close()
    fprop.close()

    properties.close()

    misc.save_json(args.scratch + "molecules", moledict)
    misc.save_obj(args.scratch + "molecules", moledict)

    return
Exemplo n.º 9
0
    def RunAll(self, total_episodes=10**3, bob=1):
        dict={}
        method = "ep-greedy"

        fav_keys=[]
        for ep in [0.01,0.3,1]:
            exper = training.Experiment(searching_method = method, layers=self.layers, ep=ep,resolution=self.resolution, bound_displacements=self.bound_displacements, states_wasted=total_episodes,ep_method="normal", min_ep=0.01, guessing_rule=self.guessing_rule, efficient_time=self.efficient_time)
            exper.train(bob)

            with open(str(exper.layers)+"L"+str(exper.number_phases)+"PH"+str(exper.resolution)+"R/number_rune.txt", "r") as f:
                c = f.readlines()[0]
                f.close()

            dict["run_"+str(c)] = {}
            dict["run_"+str(c)]["label"] = str(ep) +"-greedy "
            dict["run_"+str(c)]["info"] = [exper.number_phases, exper.amplitude, exper.layers, exper.resolution, exper.searching_method, exper.guessing_rule, exper.method_guess, exper.number_bobs, exper.bound_displacements, exper.efficient_time,exper.ts_method]
            dict["run_"+str(c)]["info_ep"] = [exper.ep_method, exper.ep, exper.min_ep, exper.time_tau]
            dict["run_"+str(c)]["info_ucb"] = [exper.ucb_method]
            fav_keys.append("run_"+str(c))

        plot_dict = filter_keys(dict,fav_keys)

        save_obj(plot_dict, "ep-greedy-Dolinar", exper.layers, exper.number_phases, exper.resolution, bob)
        ploting(plot_dict, mode="minimax")
        if bob>1:
            ploting(plot_dict, mode="stds")

        fav_keys=[]
        for tau in [200]:
            for min_ep in [0.01]:
                for method_guess in ["undefined"]:
                    exper = training.Experiment(searching_method = "ep-greedy", layers=self.layers, min_ep = min_ep, time_tau = tau,  ep=ep,resolution=self.resolution, bound_displacements=self.bound_displacements, states_wasted=total_episodes,ep_method="exp-decay",guessing_rule=self.guessing_rule, efficient_time=self.efficient_time, method_guess = method_guess)
                    exper.train(bob)

                    with open(str(exper.layers)+"L"+str(exper.number_phases)+"PH"+str(exper.resolution)+"R/number_rune.txt", "r") as f:
                        c = f.readlines()[0]
                        f.close()

                    dict["run_"+str(c)] = {}
                    dict["run_"+str(c)]["label"] = "max("+ str(min_ep) +", e^-t/"+str(tau) +")-greedy "
                    dict["run_"+str(c)]["info"] = [exper.number_phases, exper.amplitude, exper.layers, exper.resolution, exper.searching_method, exper.guessing_rule, exper.method_guess, exper.number_bobs, exper.bound_displacements, exper.efficient_time,exper.ts_method]
                    dict["run_"+str(c)]["info_ep"] = [exper.ep_method, exper.ep, exper.min_ep, exper.time_tau]
                    dict["run_"+str(c)]["info_ucb"] = [exper.ucb_method]

                    fav_keys.append("run_"+str(c))

        plot_dict = filter_keys(dict,fav_keys)
        save_obj(plot_dict, "exp-ep-greedy-Dolinar", exper.layers, exper.number_phases, exper.resolution, bob)
        ploting(plot_dict, mode="minimax")
        if bob>1:
            ploting(plot_dict, mode="stds")

        fav_keys=[]
        method = "ucb"
        for ucbm in ["ucb1", "ucb2", "ucb3"]:
            exper = training.Experiment(searching_method = method, layers=self.layers, ucb_method=ucbm , resolution=self.resolution, bound_displacements=self.bound_displacements,  states_wasted=total_episodes, guessing_rule=self.guessing_rule, efficient_time=self.efficient_time)
            exper.train(bob)

            with open(str(exper.layers)+"L"+str(exper.number_phases)+"PH"+str(exper.resolution)+"R/number_rune.txt", "r") as f:
                c = f.readlines()[0]
                f.close()

            dict["run_"+str(c)] = {}
            dict["run_"+str(c)]["label"] = ucbm
            dict["run_"+str(c)]["info"] = [exper.number_phases, exper.amplitude, exper.layers, exper.resolution, exper.searching_method, exper.guessing_rule, exper.method_guess, exper.number_bobs,exper.bound_displacements, exper.efficient_time,exper.ts_method]
            dict["run_"+str(c)]["info_ep"] = [exper.ep_method, exper.ep, exper.min_ep, exper.time_tau]
            dict["run_"+str(c)]["info_ucb"] = [exper.ucb_method]
            fav_keys.append("run_"+str(c))

        plot_dict = filter_keys(dict,fav_keys)
        save_obj(plot_dict, "ucbs-Dolinar", exper.layers, exper.number_phases, exper.resolution, bob, total_episodes)
        ploting(plot_dict, mode="minimax")
        if bob>1:
            ploting(plot_dict, mode="stds")

        fav_keys=[]
        method = "thompson-sampling"
        # for soft in [0.75, 1.25,1]:
        for soft in [1]:

            for mode_ts in ["None"]: #This is if you want to relate the q-table with the TS-update, but it doesnt' give any enhancement (for what i see).

                exper = training.Experiment(searching_method = method, layers=self.layers,resolution=self.resolution, bound_displacements=self.bound_displacements, states_wasted=total_episodes, guessing_rule=self.guessing_rule, soft_ts=soft, efficient_time=self.efficient_time, ts_method=mode_ts)
                exper.train(bob)

                with open(str(exper.layers)+"L"+str(exper.number_phases)+"PH"+str(exper.resolution)+"R/number_rune.txt", "r") as f:
                    c = f.readlines()[0]
                    f.close()

                dict["run_"+str(c)] = {}
                dict["run_"+str(c)]["label"] = str(soft)+"-TS"
                dict["run_"+str(c)]["info"] = [exper.number_phases, exper.amplitude, exper.layers, exper.resolution, exper.searching_method, exper.guessing_rule, exper.method_guess, exper.number_bobs,exper.bound_displacements, exper.efficient_time, exper.ts_method]
                dict["run_"+str(c)]["info_ep"] = [exper.ep_method, exper.ep, exper.min_ep, exper.time_tau]
                dict["run_"+str(c)]["info_ucb"] = [exper.ucb_method]
                fav_keys.append("run_"+str(c))

        plot_dict = filter_keys(dict,fav_keys)
        save_obj(plot_dict, "TS", exper.layers, exper.number_phases, exper.resolution, bob)
        ploting(plot_dict, mode="minimax")
        if bob>1:
            ploting(plot_dict, mode="stds")


        save_obj(dict, "all_methods", exper.layers, exper.number_phases, exper.resolution, bob)
        return
Exemplo n.º 10
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--scratch',
                        action='store',
                        help='',
                        metavar="DIR",
                        default="_tmp_")
    parser.add_argument('--sdf',
                        action='store',
                        help='',
                        metavar="FILE",
                        nargs="+",
                        default=[])
    parser.add_argument('--dict',
                        action='store',
                        help='',
                        metavar="FILE",
                        nargs="+",
                        default=[])
    parser.add_argument('--name',
                        action='store',
                        help='',
                        metavar="STR",
                        nargs="+")
    parser.add_argument('--filename', action='store', help='', metavar="STR")
    parser.add_argument('--filter', action='store_true', help='')
    parser.add_argument('-j',
                        '--procs',
                        action='store',
                        help='pararallize',
                        metavar="int",
                        default=0,
                        type=int)

    args = parser.parse_args()

    if args.scratch[-1] != "/":
        args.scratch += "/"

    print()
    databases_set = []
    databases_dict = []

    for sdf in args.sdf:
        molobjs = cheminfo.read_sdffile(sdf)
        molobjs = list(molobjs)
        smiles = [
            cheminfo.molobj_to_smiles(molobj, remove_hs=True)
            for molobj in molobjs
        ]
        smiles = set(smiles)
        databases_set.append(smiles)
        print(sdf, len(smiles))

    for filename in args.dict:
        data = misc.load_obj(filename)
        smiles = data.keys()
        smiles = set(smiles)
        databases_set.append(smiles)
        databases_dict.append(data)
        print(filename, len(smiles))

    if args.scratch is not None:

        # Merge databases
        everything = {}

        for data in databases_dict:

            keys = data.keys()

            for key in keys:

                if key not in everything:
                    everything[key] = []

                everything[key] += data[key]

        if args.filter:
            everything = filter_dict(everything)

        keys = everything.keys()
        print("n items", len(keys))

        # Save
        misc.save_json(args.scratch + "molecule_data", everything)
        misc.save_obj(args.scratch + "molecule_data", everything)

    if args.name is not None:

        n_db = len(databases_set)

        if n_db == 2:
            venn2(databases_set, set_labels=args.name)
        elif n_db == 3:
            venn3(databases_set, set_labels=args.name)

        plt.savefig(args.scratch + "venndiagram")

    return
Exemplo n.º 11
0
def main():

    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--scratch',
                        action='store',
                        help='',
                        metavar="dir",
                        default="_tmp_")
    parser.add_argument('--randomseed',
                        action='store',
                        help='random seed',
                        metavar="int",
                        default=1)
    parser.add_argument('-j',
                        '--procs',
                        action='store',
                        help='pararallize',
                        type=int,
                        metavar="int",
                        default=0)
    args = parser.parse_args()

    if args.scratch[-1] != "/":
        args.scratch += "/"

    # Not that random
    np.random.seed(args.randomseed)

    # Get properties
    properties = misc.load_npy(args.scratch + "properties")
    molobjs = cheminfo.read_sdffile(args.scratch + "structures.sdf.gz")

    # Get features
    filename = "repr.ols"
    if os.path.exists(args.scratch + filename + ".pkl"):
        features = misc.load_obj(args.scratch + filename)

    else:
        features = extract_features(properties, molobjs, procs=args.procs)
        features = pd.DataFrame(features)
        features = features.fillna(0)
        misc.save_obj(args.scratch + filename, features)

    n_items = len(features)
    X = np.arange(n_items)

    assert len(properties) == n_items

    # Train
    n_splits = 5
    n_train = misc.load_npy(args.scratch + "n_train")

    fold_five = sklearn.model_selection.KFold(n_splits=n_splits,
                                              random_state=45,
                                              shuffle=True)

    scores = []

    for i, (idxs_train, idxs_test) in enumerate(fold_five.split(X)):

        # un-ordered idxs_train
        np.random.seed(45 + i)
        np.random.shuffle(idxs_train)

        learning_curve = []

        for n in n_train:
            idxs = idxs_train[:n]

            # signed difference
            sign_diff = fit_model(features, idxs, idxs_test)

            # rmse
            diff = sign_diff**2
            rmse_test = np.sqrt(diff.mean())

            # save
            learning_curve.append(rmse_test)

        scores.append(learning_curve)

    scores = np.array(scores)
    scores = scores.T

    mean_score = np.mean(scores, axis=1)
    print(mean_score)
    misc.save_npy(args.scratch + "score.ols", scores)

    return
Exemplo n.º 12
0
    def EnhancedQL(self, total_episodes=10**3, bob=1, plots=False):
        dict = {}
        method = "ep-greedy"
        fav_keys = []
        for ep in [0.01, 0.3, 1]:
            exper = training.Experiment(
                searching_method=method,
                layers=self.layers,
                ep=ep,
                resolution=self.resolution,
                bound_displacements=self.bound_displacements,
                states_wasted=total_episodes,
                ep_method="normal",
                min_ep=0.01,
                guessing_rule=self.guessing_rule,
                efficient_time=self.efficient_time)
            exper.train(bob)
            with open(
                    str(exper.layers) + "L" + str(exper.number_phases) + "PH" +
                    str(exper.resolution) + "R/number_rune.txt", "r") as f:
                c = f.readlines()[0]
                f.close()
            dict["run_" + str(c)] = {}
            dict["run_" + str(c)]["label"] = str(ep) + "-greedy "
            dict["run_" + str(c)]["info"] = [
                exper.number_phases, exper.amplitude, exper.layers,
                exper.resolution, exper.searching_method, exper.guessing_rule,
                exper.method_guess, exper.number_bobs,
                exper.bound_displacements, exper.efficient_time,
                exper.ts_method
            ]
            dict["run_" + str(c)]["info_ep"] = [
                exper.ep_method, exper.ep, exper.min_ep, exper.time_tau
            ]
            dict["run_" + str(c)]["info_ucb"] = [exper.ucb_method]
            fav_keys.append("run_" + str(c))
        plot_dict = filter_keys(dict, fav_keys)
        save_obj(plot_dict, "ep-greedy", exper.layers, exper.number_phases,
                 exper.resolution, bob)
        fav_keys = []
        for tau in [200]:
            for min_ep in [0.01]:
                exper = training.Experiment(
                    searching_method="ep-greedy",
                    layers=self.layers,
                    min_ep=min_ep,
                    time_tau=tau,
                    resolution=self.resolution,
                    bound_displacements=self.bound_displacements,
                    states_wasted=total_episodes,
                    ep_method="exp-decay",
                    guessing_rule=self.guessing_rule,
                    efficient_time=self.efficient_time)
                exper.train(bob)
                with open(
                        str(exper.layers) + "L" + str(exper.number_phases) +
                        "PH" + str(exper.resolution) + "R/number_rune.txt",
                        "r") as f:
                    c = f.readlines()[0]
                    f.close()
                dict["run_" + str(c)] = {}
                dict["run_" + str(c)]["label"] = "max(" + str(
                    min_ep) + ", e^-t/" + str(tau) + ")-greedy "
                dict["run_" + str(c)]["info"] = [
                    exper.number_phases, exper.amplitude, exper.layers,
                    exper.resolution, exper.searching_method,
                    exper.guessing_rule, exper.method_guess, exper.number_bobs,
                    exper.bound_displacements, exper.efficient_time,
                    exper.ts_method
                ]
                dict["run_" + str(c)]["info_ep"] = [
                    exper.ep_method, exper.ep, exper.min_ep, exper.time_tau
                ]
                dict["run_" + str(c)]["info_ucb"] = [exper.ucb_method]
                fav_keys.append("run_" + str(c))
        plot_dict = filter_keys(dict, fav_keys)
        save_obj(plot_dict, "exp-ep-greedy", exper.layers, exper.number_phases,
                 exper.resolution, bob)
        fav_keys = []
        method = "ucb"
        for ucbm in ["ucb1", "ucb2", "ucb3"]:
            exper = training.Experiment(
                searching_method=method,
                layers=self.layers,
                ucb_method=ucbm,
                resolution=self.resolution,
                bound_displacements=self.bound_displacements,
                states_wasted=total_episodes,
                guessing_rule=self.guessing_rule,
                efficient_time=self.efficient_time)
            exper.train(bob)
            with open(
                    str(exper.layers) + "L" + str(exper.number_phases) + "PH" +
                    str(exper.resolution) + "R/number_rune.txt", "r") as f:
                c = f.readlines()[0]
                f.close()
            dict["run_" + str(c)] = {}
            dict["run_" + str(c)]["label"] = ucbm
            dict["run_" + str(c)]["info"] = [
                exper.number_phases, exper.amplitude, exper.layers,
                exper.resolution, exper.searching_method, exper.guessing_rule,
                exper.method_guess, exper.number_bobs,
                exper.bound_displacements, exper.efficient_time,
                exper.ts_method
            ]
            dict["run_" + str(c)]["info_ep"] = [
                exper.ep_method, exper.ep, exper.min_ep, exper.time_tau
            ]
            dict["run_" + str(c)]["info_ucb"] = [exper.ucb_method]
            fav_keys.append("run_" + str(c))
        plot_dict = filter_keys(dict, fav_keys)
        save_obj(plot_dict, "ucbs", exper.layers, exper.number_phases,
                 exper.resolution, bob, total_episodes)
        fav_keys = []
        method = "thompson-sampling"
        for soft in [1]:
            for mode_ts in [
                    "None"
            ]:  #This is if you want to relate the q-table with the TS-update, but it doesnt' give any enhancement (for what i see).
                exper = training.Experiment(
                    searching_method=method,
                    layers=self.layers,
                    resolution=self.resolution,
                    bound_displacements=self.bound_displacements,
                    states_wasted=total_episodes,
                    guessing_rule=self.guessing_rule,
                    soft_ts=soft,
                    efficient_time=self.efficient_time,
                    ts_method=mode_ts)
                exper.train(bob)
                with open(
                        str(exper.layers) + "L" + str(exper.number_phases) +
                        "PH" + str(exper.resolution) + "R/number_rune.txt",
                        "r") as f:
                    c = f.readlines()[0]
                    f.close()
                dict["run_" + str(c)] = {}
                dict["run_" + str(c)]["label"] = str(soft) + "-TS"
                dict["run_" + str(c)]["info"] = [
                    exper.number_phases, exper.amplitude, exper.layers,
                    exper.resolution, exper.searching_method,
                    exper.guessing_rule, exper.method_guess, exper.number_bobs,
                    exper.bound_displacements, exper.efficient_time,
                    exper.ts_method
                ]
                dict["run_" + str(c)]["info_ep"] = [
                    exper.ep_method, exper.ep, exper.min_ep, exper.time_tau
                ]
                dict["run_" + str(c)]["info_ucb"] = [exper.ucb_method]
                fav_keys.append("run_" + str(c))
        plot_dict = filter_keys(dict, fav_keys)
        save_obj(plot_dict, "TS", exper.layers, exper.number_phases,
                 exper.resolution, bob)

        method = "ep-greedy"
        ep = "TS+0.01exp"
        fav_keys = []
        exper = training.Experiment(
            searching_method="ep-greedy",
            layers=self.layers,
            min_ep=0.01,
            time_tau=200,
            ep=0.01,
            resolution=self.resolution,
            bound_displacements=self.bound_displacements,
            states_wasted=total_episodes,
            ep_method="exp-decay",
            guessing_rule="None",
            efficient_time=self.efficient_time,
            method_guess="thompson-sampling")
        exper.train(bob)
        with open(
                str(exper.layers) + "L" + str(exper.number_phases) + "PH" +
                str(exper.resolution) + "R/number_rune.txt", "r") as f:
            c = f.readlines()[0]
            f.close()
        dict["run_" + str(c)] = {}
        dict["run_" + str(c)]["label"] = str(ep) + "-greedy "
        dict["run_" + str(c)]["info"] = [
            exper.number_phases, exper.amplitude, exper.layers,
            exper.resolution, exper.searching_method, exper.guessing_rule,
            exper.method_guess, exper.number_bobs, exper.bound_displacements,
            exper.efficient_time, exper.ts_method
        ]
        dict["run_" + str(c)]["info_ep"] = [
            exper.ep_method, exper.ep, exper.min_ep, exper.time_tau
        ]
        dict["run_" + str(c)]["info_ucb"] = [exper.ucb_method]
        fav_keys.append("run_" + str(c))
        plot_dict = filter_keys(dict, fav_keys)
        save_obj(plot_dict, "ep-TS", exper.layers, exper.number_phases,
                 exper.resolution, bob)
        ###################################################################################################3###
        save_obj(dict, "all_methods", exper.layers, exper.number_phases,
                 exper.resolution, bob)
        if plots == True:

            mode_log = "on"

            # os.system("python3 trad_ql.py")
            # os.system("python3 enhanced_ql.py")
            # os.system("python3 trad_ql.py")

            matplotlib.rc('font', serif='cm10')
            matplotlib.rc('text', usetex=True)
            plt.rcParams.update({'font.size': 45})

            color1 = "purple"
            color2 = (225 / 255, 15 / 255, 245 / 255)
            color3 = (150 / 255, 22 / 255, 9 / 255)
            color_2l = [46 / 255, 30 / 255, 251 / 255]
            colorexp = (13 / 255, 95 / 255, 14 / 255)

            colorucb1 = (19 / 255, 115 / 255, 16 / 255)
            colorucb2 = (170 / 255, 150 / 255, 223 / 255)
            colorucb3 = (74 / 255, 90 / 255, 93 / 255)
            colors = {
                "run_1": "orange",
                "run_2": color2,
                "run_3": "brown",
                "run_4": colorexp,
                "run_5": colorucb1,
                "run_6": colorucb2,
                "run_7": colorucb3,
                "run_8": "yellow",
                "run_9": "purple"
            }

            labels = {
                "run_1": r'$\epsilon = 0.01$' + "-greedy",
                "run_2": r'$\epsilon = 0.3$' + "-greedy",
                "run_3": r'$\epsilon = 1$' + "-greedy",
                "run_4": "max(0.01, " + r'$e^{-t/\tau}$' + ")-greedy",
                "run_5": "UCB-1",
                "run_6": "UCB-2",
                "run_7": "UCB-3",
                "run_8": "TS",
                "run_9": "max(0.01, " + r'$e^{-t/\tau}$' + ")-greedy + TS"
            }  #

            ####### Q-LEARNING PLOT ######
            ####### Q-LEARNING PLOT ######
            ####### Q-LEARNING PLOT ######
            ####### Q-LEARNING PLOT ######

            interesting = ["run_1", "run_2", "run_3", "run_4"]
            dict_plot = {}
            print(dict.keys())
            for i in interesting:
                dict_plot[i] = dict[i]
            for run in interesting:
                dict_plot[run]["label"] = labels[run]

            plt.figure(figsize=(30, 22), dpi=150)
            ax1 = plt.subplot2grid((2, 1), (0, 0))
            ax2 = plt.subplot2grid((2, 1), (1, 0))
            axinticks = []

            name = str(dict.keys())

            once = True
            neg = "Not"

            print("ploting Q learning")
            for run in dict_plot.keys():

                number_phases, amplitude, layers, resolution, searching_method, guessing_rule, method_guess, number_bobs, bound_displacements, efficient_time, ts_method = dict[
                    run]["info"]
                exp = Experiment(number_phases=number_phases,
                                 amplitude=amplitude,
                                 layers=layers,
                                 resolution=resolution,
                                 bound_displacements=bound_displacements)
                exp.load_data(run)
                run_color = colors[run]
                if mode_log == "on":
                    times = np.log10(exp.results[0])
                else:
                    times = exp.results[0]
                if once == True:
                    ax1.plot(times,
                             exp.optimal_value * np.ones(len(exp.results[0])),
                             '--',
                             linewidth=9,
                             alpha=0.8,
                             label=r'$P_*^{(2)}$',
                             color=color_2l)
                    ax1.plot([times[0], times[-1]], [exp.homodyne_limit] * 2,
                             '--',
                             linewidth=9,
                             color="black",
                             label="Homodyne limit")

                    ax2.plot([times[0], times[-1]], [exp.homodyne_limit] * 2,
                             '--',
                             linewidth=9,
                             color="black",
                             label="Homodyne limit")
                    ax2.plot(times,
                             exp.optimal_value * np.ones(len(times)),
                             '--',
                             linewidth=9,
                             alpha=0.6,
                             color=color_2l)

                    axins = zoomed_inset_axes(ax2, zoom=2.7, loc="lower right")
                    loc1 = -int(len(exp.results[0]) * 0.7)
                    loc2 = -1
                    once = False
                ax1.plot(times,
                         exp.results[1] / exp.results[0],
                         linewidth=9,
                         alpha=0.9,
                         label=dict[run]["label"],
                         color=run_color)
                ax2.plot(times,
                         exp.results[2],
                         linewidth=3,
                         alpha=.5,
                         label=dict[run]["label"],
                         color=run_color)
                axins.plot(np.log10(exp.results[0][loc1:loc2]),
                           np.log10(exp.results[2][loc1:loc2]),
                           '-',
                           linewidth=9,
                           alpha=.8,
                           color=colors[run],
                           label=dict[run]["label"])
                axins.plot(np.log10(
                    [exp.results[0][loc1], exp.results[0][loc2 - 1]]),
                           np.log10([1 - exp.opt_2l] * 2),
                           '-.',
                           alpha=.8,
                           linewidth=9,
                           color=color_2l,
                           label="Optimal 2L")
                axinticks.append(np.log10(exp.results[2][loc1]))
                ax1.fill_between(
                    times, (exp.results[1] - exp.stds[0] / 2) / exp.results[0],
                    (exp.results[1] + exp.stds[0] / 2) / exp.results[0],
                    alpha=.4,
                    color=run_color)
                ax2.fill_between(times,
                                 np.log10(exp.results[2] - exp.stds[1] / 2),
                                 np.log10(exp.results[2] + exp.stds[1] / 2),
                                 alpha=0.4,
                                 color=run_color)
                ax1.legend()
                mark_inset(ax2,
                           axins,
                           loc1=1,
                           loc2=2,
                           fc="green",
                           ec="0.3",
                           alpha=0.5)
                axinticks.append(exp.opt_2l)
                yticks = np.arange(np.round(min(exp.results[2]), 3),
                                   1 - exp.opt_2l, .1)
                ax1.set_yticks(yticks)
                ax2.set_yticks(yticks)
                axins.set_yticks(axinticks)
                axins.set_yticklabels([str(np.round(i, 3)) for i in axinticks])
                plt.setp(axins.get_yticklabels(), size=27)
                plt.setp(axins.get_xticklabels(), visible=False)
                plt.setp(ax1.get_xticklabels(), visible=False)
                ax2.set_xticks([0, 1, 2, 3, 4, 5, np.log10(5 * 10**5)])
                ax2.set_xticklabels([
                    r'$10^{0}$', r'$10^{1}$', r'$10^{2}$', r'$10^{3}$',
                    r'$10^{4}$', r'$10^{5}$', r'$5 \; 10^{5}$'
                ])
                plt.subplots_adjust(left=None,
                                    bottom=None,
                                    right=None,
                                    top=None,
                                    wspace=None,
                                    hspace=0.1)
                ax2.tick_params(axis='x', which='both', top='off')
                ax1.legend(loc="lower right", prop={"size": 35})
                ax2.set_xlabel("t", size=54)
                ax1.set_ylabel(r'\textbf{R}$_t$', size=54)
                ax2.set_ylabel(r'\textbf{P}$_t$', size=54)

            inf = dict[run]["info"]
            layers, phases, resolution = inf[2], inf[0], inf[3]
            plt.savefig(
                str(layers) + "L" + str(phases) + "PH" + str(resolution) +
                "R/figures/Qlearning.png")

            ####### ENH-Q-LEARINIG PLOT ######
            ####### ENH-Q-LEARINIG PLOT ######
            ####### ENH-Q-LEARINIG PLOT ######
            ####### ENH-Q-LEARINIG PLOT ######

            interesting = ["run_2", "run_9", "run_8", "run_5"]
            dict_plot = {}
            print(dict.keys())
            for i in interesting:
                dict_plot[i] = dict[i]
            for run in interesting:
                dict_plot[run]["label"] = labels[run]

            plt.figure(figsize=(30, 22), dpi=150)
            ax1 = plt.subplot2grid((2, 1), (0, 0))
            ax2 = plt.subplot2grid((2, 1), (1, 0))
            axinticks = []

            name = str(dict.keys())

            once = True
            neg = "Not"

            for run in dict_plot.keys():
                print(run)
                number_phases, amplitude, layers, resolution, searching_method, guessing_rule, method_guess, number_bobs, bound_displacements, efficient_time, ts_method = dict[
                    run]["info"]
                exp = Experiment(number_phases=number_phases,
                                 amplitude=amplitude,
                                 layers=layers,
                                 resolution=resolution,
                                 bound_displacements=bound_displacements)
                exp.load_data(run)
                run_color = colors[run]
                if mode_log == "on":
                    times = np.log10(exp.results[0])
                else:
                    times = exp.results[0]
                if once == True:
                    ax1.plot(times,
                             exp.optimal_value * np.ones(len(exp.results[0])),
                             '--',
                             linewidth=9,
                             alpha=0.8,
                             label=r'$P_*^{(2)}$',
                             color=color_2l)
                    ax1.plot([times[0], times[-1]], [exp.homodyne_limit] * 2,
                             '--',
                             linewidth=9,
                             color="black",
                             label="Homodyne limit")

                    ax2.plot([times[0], times[-1]], [exp.homodyne_limit] * 2,
                             '--',
                             linewidth=9,
                             color="black",
                             label="Homodyne limit")
                    ax2.plot(times,
                             exp.optimal_value * np.ones(len(times)),
                             '--',
                             linewidth=9,
                             alpha=0.6,
                             color=color_2l)

                    axins = zoomed_inset_axes(ax2, zoom=2.7, loc="lower right")
                    loc1 = -int(len(exp.results[0]) * 0.35)
                    loc2 = -1
                    once = False
                ax1.plot(times,
                         exp.results[1] / exp.results[0],
                         linewidth=9,
                         alpha=0.9,
                         label=dict[run]["label"],
                         color=run_color)
                ax2.plot(times,
                         exp.results[2],
                         linewidth=3,
                         alpha=.5,
                         label=dict[run]["label"],
                         color=run_color)
                axins.plot(np.log10(exp.results[0][loc1:loc2]),
                           np.log10(exp.results[2][loc1:loc2]),
                           '-',
                           linewidth=9,
                           alpha=.8,
                           color=colors[run],
                           label=dict[run]["label"])
                axins.plot(np.log10(
                    [exp.results[0][loc1], exp.results[0][loc2 - 1]]),
                           np.log10([1 - exp.opt_2l] * 2),
                           '-.',
                           alpha=.8,
                           linewidth=9,
                           color=color_2l,
                           label="Optimal 2L")
                axinticks.append(np.log10(exp.results[2][loc1]))
                ax1.fill_between(
                    times, (exp.results[1] - exp.stds[0] / 2) / exp.results[0],
                    (exp.results[1] + exp.stds[0] / 2) / exp.results[0],
                    alpha=.4,
                    color=run_color)
                ax2.fill_between(times,
                                 np.log10(exp.results[2] - exp.stds[1] / 2),
                                 np.log10(exp.results[2] + exp.stds[1] / 2),
                                 alpha=0.4,
                                 color=run_color)
                ax1.legend()
                mark_inset(ax2,
                           axins,
                           loc1=1,
                           loc2=2,
                           fc="green",
                           ec="0.3",
                           alpha=0.5)
                axinticks.append(exp.opt_2l)
                yticks = np.arange(np.round(min(exp.results[2]), 3),
                                   1 - exp.opt_2l, .1)
                ax1.set_yticks(yticks)
                ax2.set_yticks(yticks)
                axins.set_yticks(axinticks)
                axins.set_yticklabels([str(np.round(i, 3)) for i in axinticks])
                plt.setp(axins.get_yticklabels(), size=27)
                plt.setp(axins.get_xticklabels(), visible=False)
                plt.setp(ax1.get_xticklabels(), visible=False)
                ax2.set_xticks([0, 1, 2, 3, 4, 5, np.log10(5 * 10**5)])
                ax2.set_xticklabels([
                    r'$10^{0}$', r'$10^{1}$', r'$10^{2}$', r'$10^{3}$',
                    r'$10^{4}$', r'$10^{5}$', r'$5 \; 10^{5}$'
                ])
                plt.subplots_adjust(left=None,
                                    bottom=None,
                                    right=None,
                                    top=None,
                                    wspace=None,
                                    hspace=0.1)
                ax2.tick_params(axis='x', which='both', top='off')
                ax1.legend(loc="lower right", prop={"size": 35})
                ax2.set_xlabel("t", size=54)
                ax1.set_ylabel(r'\textbf{R}$_t$', size=54)
                ax2.set_ylabel(r'\textbf{P}$_t$', size=54)

            inf = dict[run]["info"]
            layers, phases, resolution = inf[2], inf[0], inf[3]
            plt.savefig(
                str(layers) + "L" + str(phases) + "PH" + str(resolution) +
                "R/figures/ENH-QL.png")

        return
def main():

    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--scratch',
                        action='store',
                        help='',
                        metavar="dir",
                        default="_tmp_")
    parser.add_argument('--conformers', action='store_true', help='')
    parser.add_argument('--sdf', action='store', help='', metavar="file")
    parser.add_argument('-j',
                        '--procs',
                        action='store',
                        help='pararallize',
                        metavar="int",
                        default=0,
                        type=int)

    parser.add_argument('-r',
                        '--representations',
                        action='store',
                        help='',
                        metavar="STR",
                        nargs="+")

    args = parser.parse_args()

    if args.scratch[-1] != "/":
        args.scratch += "/"

    if args.procs == -1:
        args.procs = int(os.cpu_count())
        print("set procs", args.procs)

    representation_names_coordbased = [
        "cm", "fchl18", "fchl19", "slatm", "bob"
    ]
    representation_names_molbased = ["morgan", "rdkitfp"]

    if args.representations is None:
        # representation_names = ["cm", "fchl18", "fchl19", "slatm", "bob"]
        # representation_names = ["fchl18"]
        # representation_names = ["bob"]
        representation_names = ["slatm", "bob", "cm", "rdkitfp", "morgan"]
    else:
        representation_names = args.representations

    molobjs = cheminfo.read_sdffile(args.sdf)
    molobjs = [mol for mol in molobjs]

    xyzs = molobjs_to_xyzs(molobjs)

    mol_atoms, mol_coords = xyzs
    misc.save_obj(args.scratch + "atoms", mol_atoms)

    # Print unique atoms
    unique_atoms = []
    for atoms in mol_atoms:
        unique_atoms += list(np.unique(atoms))

    unique_atoms = np.array(unique_atoms)
    unique_atoms = unique_atoms.flatten()
    unique_atoms = np.unique(unique_atoms)

    # Calculate max_size
    max_atoms = [len(atoms) for atoms in mol_atoms]
    max_atoms = max(max_atoms)

    n_items = len(mol_coords)

    print("total mols:", n_items)
    print("atom types:", unique_atoms)
    print("max atoms: ", max_atoms)
    print()
    print("representations:", representation_names)
    print()

    misc.save_txt(args.scratch + "n_items", n_items)

    # Gas phase
    for name in representation_names:

        if name not in representation_names_coordbased: continue

        representations = xyzs_to_representations(mol_atoms,
                                                  mol_coords,
                                                  name=name,
                                                  scr=args.scratch,
                                                  max_atoms=max_atoms,
                                                  procs=args.procs)

        if isinstance(representations, (np.ndarray, np.generic)):
            misc.save_npy(args.scratch + "repr." + name, representations)
        else:
            misc.save_obj(args.scratch + "repr." + name, representations)

        representations = None
        del representations

    for name in representation_names:

        if name not in representation_names_molbased: continue

        representations = molobjs_to_representations(molobjs,
                                                     name=name,
                                                     procs=args.procs)

        if isinstance(representations, (np.ndarray, np.generic)):
            misc.save_npy(args.scratch + "repr." + name, representations)
        else:
            misc.save_obj(args.scratch + "repr." + name, representations)

        representations = None
        del representations

    quit()

    # Ensemble
    # if args.conformers:
    #     generate_conformer_representation(scr=args.scratch, procs=args.procs)

    return
def get_representations_slatm(atoms,
                              structures,
                              scr="_tmp_/",
                              mbtypes=None,
                              debug=True,
                              procs=0,
                              **kwargs):
    """
    atoms -- list of molecule atoms

    """

    # from qml.representations import get_slatm_mbtypes # Assume 'qm7' is a
    # list of Compound() objects. mbtypes =
    # get_slatm_mbtypes([mol.nuclear_charges for compound in qm7]) # Assume the
    # QM7 dataset is loaded into a list of Compound() for compound in qm7: #
    # Generate the desired representation for each compound
    # compound.generate_slatm(mbtypes, local=True, rcut=2.7)

    if mbtypes is None:

        filename_mbtypes = scr + "slatm.mbtypes"

        try:
            mbtypes = misc.load_obj(filename_mbtypes)
        except FileNotFoundError:

            print("Generate slatm mbtypes")
            mbtypes = qml.representations.get_slatm_mbtypes(atoms)
            misc.save_obj(filename_mbtypes, mbtypes)

    if debug:
        print("Generate slatm representations")

    replist = []

    # Set OMP
    if procs > 1:
        os.environ["OMP_NUM_THREADS"] = "1"

        workargs = zip(structures, atoms)
        workargs = list(workargs)

        pool = Pool(processes=procs)
        funcname = partial(procs_representation_slatm, mbtypes=mbtypes)
        replist = pool.map(funcname, workargs)

    else:
        for i, (coord, atom) in enumerate(zip(structures, atoms)):
            rep = qml.representations.generate_slatm(coord, atom, mbtypes)
            replist.append(rep)

    # replist = [qml.representations.generate_slatm(coordinate, atom, mbtypes) for coordinate, atom in zip(structures, atoms)]
    replist = np.array(replist)

    # for i, rep in enumerate(replist):
    #     m = rep.mean()
    #     if np.isnan(m):
    #         print(i, rep.mean())
    # print(replist.mean())

    return replist