示例#1
0
    def run(self):
        '''
        Run misoKG.
        '''

        if self.save_extra_files and self.overwrite:
            os.system("rm %s %s" % (self.mu_fname, self.sig_fname))

        # Error Handling
        assert self.costs is not None, "Error - You must specify costs before running!"
        assert len(self.costs) == len(
            self.IS
        ), "Error - You must specify the same number of information sources as costs!"

        if self.verbose:
            print(
                "\n-------------------------------------------------------------------------------------"
            )
            print("Beginning optimization.")
            # print("\tParallel = %s" % str(self.parallel))
            print("    Number of Information Sources = %d" % len(self.IS))
            print("    Acquisition = "),
            if self.acquisition == getNextSample_misokg:
                print("misoKG with a cost list = %s" % str(self.costs))
            elif self.acquisition == getNextSample_EI:
                print("EI")
            elif self.acquisition == getNextSample_kg:
                print("KG")
            else:
                print("Custom!")
            if self.hyperparameter_objective == MLE:
                obj_name = "MLE"
            elif self.hyperparameter_objective == MAP:
                obj_name = "MAP"
            else:
                obj_name = "Custom"
            if self.loglike == gaussian_loglike:
                loglike_name = "Gaussian"
            elif self.loglike == bonilla_loglike:
                loglike_name = "Bonilla"
            else:
                loglike_name = "Custom"
            print(
                "The Hyperparameter Objective is %s with %d starting samples."
                % (obj_name, self.n_start))
            print("The loglikelihood method is %s." % loglike_name)
            if self.dynamic_pc:
                print(
                    "Will use a dynamic pearson correlation coefficient for rho."
                )
            print("Will optimize the following parameters:")
            print("    " + ', '.join(self.theta.hp_names))
            print(
                "-------------------------------------------------------------------------------------"
            )
        # Start - TIMER
        self.t0 = time.time()

        # Step 1 - Ensure we have our historical training set.  If not, then
        # generate one.
        if self.fname_historical is None:
            self.fname_historical = "historical.dat"
            if self.numerical:
                self.sample_numerical()
            else:
                self.sample()
        else:
            self.historical = pickle.load(open(self.fname_historical, 'r'))
            if self.numerical:
                if len(self.historical[0]) != len(self.domain) + 1:
                    raise Exception(
                        "The historical data seems to be incorrect for misoKG.  Maybe the IS associated with each point was not included?"
                    )
            if len(self.historical[0]) not in [10, 17]:
                raise Exception(
                    "The historical data seems to be incorrect for misoKG.  Maybe the IS associated with each point was not included?"
                )

        # Step 2 - Generate a full list of our sample space if it has not been given
        if self.mixed_solvents and not self.numerical:
            raise Exception(
                "Mixed Solvents have not been implemented properly.")
        else:
            if self.combinations is None and not self.numerical:
                self.combinations = self.get_combos_pure_solvent()
        if self.all_X is None and not self.numerical:
            self.all_X = pal_strings.alphaToNum(
                self.combinations, solvents, mixed_halides=self.mixed_halides)
            self.all_solvent_properties = np.array(self.all_X)[:, -3:-1]
            self.all_Y = np.array([0 for i in range(len(self.all_X))])

        # Step 2.5 - Store our X and Y points
        if not self.numerical:
            self.assign_samples()
        # Store a list of samples that have been sampled at all information sources
        self.indices_overlap = range(len(self.sampled_X))

        # Step 3 - Get our hyperparameters.  As we don't have initial ones,
        # don't use_theta for this instance.
        self.updateHPs(use_theta=False)

        # Step 3.5 - Initialize indices_overlap variables
        self.indices_overlap_len = len(self.indices_overlap)
        self.indices_overlap_changed = False

        # Step 4 - Update the posterior based on the historical data.
        self.updatePosterior()

        if not self.numerical:
            # Save combinations and default save actions
            if self.combos_fname is not None:
                fptr = open(self.combos_fname, 'w')
                for i, c in enumerate(self.combinations):
                    fptr.write("%d\t%s\n" % (i, c))
                fptr.close()
            self.save()

            # Step 5 - Begin the main loop
            start, stop = len(self.sampled_X), len(self.combinations)
        else:
            start, stop = len(self.sampled_X), len(self.all_X)

        best_found_in = start
        best_value = max(
            np.array(self.sampled_objectives)[self._get_info_source_map(
                self.sampled_X)[0]])
        best_index = self.sampled_indices[self.sampled_objectives.index(
            best_value)]
        best_name = self.combinations[best_index]

        # Initialize our costs based on the sampled so far
        self.total_cost = sum([self.costs[int(x[0])] for x in self.sampled_X])

        best_prediction = max(
            np.array(self.mu)[self._get_info_source_map(self.all_X)[0]])
        best_prediction = list(
            np.array(self.mu)[self._get_info_source_map(
                self.all_X)[0]]).index(best_prediction)
        best_prediction = self._get_info_source_map(
            self.all_X)[0][best_prediction]
        recommendation = self.combinations[best_prediction]

        if self.save_extra_files and self.sample_fname is not None:
            fptr = open(self.sample_fname, 'a')
            for v in zip(self.sampled_names, self.sampled_objectives):
                fptr.write("%s\t%.4f\n" % v)

        # Begin the main loop
        fully_sampled = False
        recommendation_kill_flag = False
        iteration_kill_flag = False
        cost_kill_flag = False
        for index in range(start, stop):
            if self.iteration_kill_switch is not None and index >= self.iteration_kill_switch:
                iteration_kill_flag = True
                break

            # If we have sampled all the IS0, and noise doesn't exist, then we gracefully exit
            if not self.noise and all([
                    i_IS0 in self.sampled_indices
                    for i_IS0 in self._get_info_source_map(self.all_X)[0]
            ]):
                fully_sampled = True
                break

            # Step 6 - acquisition Function.  Decide on next point(s) to sample.
            next_point = self.acquisition(
                self.mu,
                #self.theta.rho_matrix(self.all_X) * self.K,
                self.K,
                max(self.sampled_objectives),
                len(self.combinations),
                self.costs,
                self.all_X,
                self.sampled_indices,
                save=self.acquisition_fname)
            if next_point in self.sampled_indices:
                print(
                    "\nFAILURE!!!! SAMPLED # %s - Index = %d# POINT TWICE!\n" %
                    (self.combinations[next_point], next_point))
                print("K Diagonal = %s" %
                      ' '.join(["%f" % v for v in np.diag(self.K)]))
                print("K[%d] = %s" % (next_point, ' '.join(
                    ["%f" % v for v in self.K[next_point]])))
                print("Sampled Points = %s" % str(self.sampled_indices))
                raise Exception(
                    "Error - acquisition function grabbed an already sampled point!"
                )

            if self.verbose:
                r = -1.23
                if "[0, 1]" in self.theta.rho:
                    r = self.theta.rho["[0, 1]"]
                suffix = "(iter %d) %s = %.4f, sampling %s. Recommendation = %s, Current Cost = %.2f, Rho = %.3f" % (
                    best_found_in, best_name, best_value,
                    self.combinations[next_point], recommendation,
                    self.total_cost, r)
                ppb(index, stop, prefix='Running', suffix=suffix, pad=True)
                if self.logger_fname is not None:
                    fptr = open(self.logger_fname, 'a')
                    fptr.write(suffix + "\n")
                    fptr.close()
            if self.recommendation_kill_switch is not None and recommendation == self.recommendation_kill_switch:
                recommendation_kill_flag = True
                break

            # Step 7 - Sample point(s)
            self.sampled_indices.append(next_point)
            self.sampled_names.append(self.combinations[next_point])
            if not self.numerical:
                self.sampled_X.append(
                    pal_strings.alphaToNum(
                        self.sampled_names[-1],
                        solvents,
                        mixed_halides=self.mixed_halides)[0])
                h, c, _, s, info_lvl = pal_strings.parseName(
                    self.sampled_names[-1])
                self.sampled_objectives.append(self.IS[info_lvl](h, c[0], s))
            else:
                x = self.all_X[next_point]
                self.sampled_X.append(x)
                info_lvl = int(x[0])
                self.sampled_objectives.append(self.IS[info_lvl](*x[1:]))

            if self.save_extra_files and self.sample_fname is not None:
                fptr = open(self.sample_fname, 'a')
                fptr.write(
                    "%s\t%.4f\n" %
                    (self.sampled_names[-1], self.sampled_objectives[-1]))

            # Ensure we get an array of all sampled indices that have been sampled
            # at ALL information source levels
            chk = self.sampled_X[-1][1:]
            if self.numerical:
                found = [
                    i for i, v in enumerate(self.sampled_X)
                    if all(chk == v[1:])
                ]
            else:
                found = [
                    i for i, v in enumerate(self.sampled_X) if chk == v[1:]
                ]
            # Assume we have 4 IS.  If we find 4 of chk, then we now have fully sampled chk.
            if len(found) == len(self.IS):
                for f in found:
                    if f not in self.indices_overlap:
                        self.indices_overlap.append(f)
                self.indices_overlap_changed = self.indices_overlap_len != len(
                    self.indices_overlap)
                self.indices_overlap_len = len(self.indices_overlap)

            # Step 7.5 - Maybe re-opt the hyperparameters
            # Note, we do this in a two step approach.  First, we optimize all HPs based on
            # only data points that exist at all levels of theory.  Then we optimize
            # only at the highest level of theory sampled so far (IS0).
            if index != start and (self.reopt is not None
                                   and index % self.reopt == 0) or (
                                       self.ramp_opt is not None
                                       and index < self.ramp_opt):
                self.updateHPs()
                # Step 8a - Update the posterior completely if we are reoptimizing the HPs
                self.updatePosterior()
            else:
                # Step 8b - Update the posterior with only the newest sampled point
                self.updatePosterior(
                    (self.sampled_indices[-1], self.sampled_objectives[-1]))

            self.save()

            # Count the cost of this iteration
            self.total_cost += self.costs[info_lvl]

            if self.cost_kill_switch is not None and self.total_cost > self.cost_kill_switch:
                cost_kill_flag = True
                break

            # Get our recommendation from max(mu) for only IS0
            best_prediction = max(
                np.array(self.mu)[self._get_info_source_map(self.all_X)[0]])
            best_prediction = list(
                np.array(self.mu)[self._get_info_source_map(
                    self.all_X)[0]]).index(best_prediction)
            best_prediction = self._get_info_source_map(
                self.all_X)[0][best_prediction]
            recommendation = self.combinations[best_prediction]

            # Get the best sampled so far
            potential_best = max(
                np.array(self.sampled_objectives)[self._get_info_source_map(
                    self.sampled_X)[0]])
            if potential_best > best_value:
                best_found_in = index
                best_value = potential_best
                best_index = self.sampled_indices[
                    self.sampled_objectives.index(best_value)]
                best_name = self.combinations[best_index]

        # END TIMER
        self.t1 = time.time()

        if self.verbose:
            print("-----------------------")
            print("PAL Optimizer has completed in %.2f s" %
                  (self.t1 - self.t0))
            if fully_sampled:
                print("Optimizer quit early as IS0 was fully sampled")
            if recommendation_kill_flag:
                print("Optimizer quit early due to recommendation of %s." %
                      self.recommendation_kill_switch)
            if iteration_kill_flag:
                print("Optimizer quit early due to exceeding %d iterations." %
                      self.iteration_kill_switch)
            if cost_kill_flag:
                print("Optimizer quit early due to exceeding %.4f cost." %
                      self.cost_kill_switch)
            print("-----------------------")
            print("Best combination: %s" % best_name)
            print("       Objective: %.4f" % best_value)
            print("       Maximized: %d" % best_found_in)
            print("-----------------------")
            print self.theta
            print(
                "-------------------------------------------------------------------------------------\n"
            )
示例#2
0
    def sample(self,
               specify=None,
               debug=False,
               MAX_LOOP=10,
               allow_reduced=False):
        '''
        This function will run, in parallel, N_samples of the objective functions for
        historical data generation.  Note, these are run for EVERY information source.
        '''

        if debug:
            print("Collecting LHS samples...")

        if specify is None:
            counter, samples = 0, []
            while len(
                    samples) != self.historical_nsample and counter < MAX_LOOP:

                # Grab a latin hypercube sample
                samples = doe_lhs.lhs(
                    int(self.mixed_halides) * 2 + 2, self.historical_nsample)
                # Round the LHS and figure out the samples
                solvent_ranges = [
                    i * 1.0 / len(self.S)
                    for i in range(1,
                                   len(self.solvents) + 1)
                ]
                solv = lambda v: self.S[[v <= s
                                         for s in solvent_ranges].index(True)]
                trio = lambda v: [
                    int(v > (chk - 1.0 / 3.0) and v <= chk)
                    for chk in [1. / 3., 2. / 3., 1.0]
                ]

                # Grab our samples
                if self.mixed_halides:
                    halides = [sorted([s[0], s[1], s[2]]) for s in samples]
                    halides = [[trio(h) for h in hh] for hh in halides]
                    samples = [
                        h[0] + h[1] + h[2] + trio(s[3]) + [
                            self.solvents[solv(s[-1])]["density"],
                            self.solvents[solv(s[-1])]["dielectric"],
                            self.solvents[solv(s[-1])]["index"]
                        ] for h, s in zip(halides, samples)
                    ]
                else:
                    samples = [
                        trio(s[0]) + trio(s[1]) + [
                            self.solvents[solv(s[-1])]["density"],
                            self.solvents[solv(s[-1])]["dielectric"],
                            self.solvents[solv(s[-1])]["index"]
                        ] for s in samples
                    ]

                # Ensure no duplicates
                samples = sorted(samples, key=lambda x: x[-1])
                samples = [tuple(s) for s in samples]
                samples = [list(s) for s in set(samples)]

                counter += 1
        else:
            if isinstance(specify, int):
                specify = [specify]
            self.historical_nsample = len(specify)
            samples = [self.combinations[i] for i in specify]
            samples = pal_strings.alphaToNum(samples,
                                             solvents,
                                             mixed_halides=self.mixed_halides,
                                             name_has_IS=True)
            samples = [s[1:] for s in samples
                       ]  # Remove the IS label from the descriptor

        if allow_reduced:
            print(
                "Warning - Will sample from subspace due to duplicates (%d instead of %d)."
                % (len(samples), self.historical_nsample))
            self.historical_nsample = len(samples)
        elif specify is None:
            assert counter < MAX_LOOP, "Error - Unable to sample from space without duplicates!"

        if debug:
            print("Will sample %s" % str(samples))

        # Now, run these simulations to get the sample points
        jobs = []
        for i, sample in enumerate(samples):
            if debug:
                print "Running %s..." % sample
            s = pal_strings.parseNum(sample,
                                     self.solvents,
                                     mixed_halides=self.mixed_halides,
                                     num_has_IS=False)
            hat, cat, _, solv, _ = pal_strings.parseName(s, name_has_IS=False)
            cat = cat[0]
            if not self.mixed_halides:
                hat = hat[0]
            if debug:
                print("\tAdding %s to sample runs..." % s)

            for j, obj in enumerate(self.IS):
                jobs.append([[j] + copy.deepcopy(sample), obj(hat, cat, solv)])

        # Now, get results from each simulation
        samples = []
        for sample, j in jobs:
            if not isinstance(j, float):
                j.wait()
                samples.append(sample + [j.get_result()])
            # In special situations, when we are reading from a list for example, we don't need to worry
            # about a job object, and can just assign the value directly.
            else:
                samples.append(sample + [j])

            s = pal_strings.parseNum(samples[-1][:-1],
                                     self.solvents,
                                     mixed_halides=self.mixed_halides,
                                     num_has_IS=True)
            if debug:
                print("\t%s was found as %lg" % (s, samples[-1][-1]))

        # Save the sampled data
        fptr = open(self.fname_historical, "w")
        pickle.dump(samples, fptr)
        fptr.close()
        self.historical = samples

        if debug:
            print("Done Collecting Samples\n")
示例#3
0
sim.combinations = [k[1] + "Pb" + k[0] + "_" + k[2] + "_" + str(IS) for k in [key.split() for key in IS0.keys()] for IS in range(len(sim.IS))]
combos_no_IS = [k[1] + "Pb" + k[0] + "_" + k[2] for k in [key.split() for key in IS0.keys()]]

# Because we do this, we should also generate our own historical sample
sim.historical_nsample = len(combos_no_IS)
choices = combos_no_IS
tmp_data = pal_strings.alphaToNum(
    choices,
    solvents,
    mixed_halides=True,
    name_has_IS=False)

data = []
for IS in range(len(sim.IS)):
    for i, d in enumerate(tmp_data):
        h, c, _, s, _ = pal_strings.parseName(pal_strings.parseNum(d, solvents, mixed_halides=True, num_has_IS=False), name_has_IS=False)
        c = c[0]
        data.append([IS] + d + [sim.IS[IS](h, c, s)])

IS0 = np.array([x[-1] for x in data if x[0] == 0])
IS1 = np.array([x[-1] * 1.8 for x in data if x[0] == 1])

IS0, IS1 = zip(*sorted(zip(IS0, IS1)))

# IS0 = IS0 / np.linalg.norm(IS0)
# IS1 = IS1 / np.linalg.norm(IS1)

# print IS0
# print IS1

import matplotlib.pyplot as plt
示例#4
0
def run_misokg(run_index):

    # Store data for debugging
    IS0 = pickle.load(open("enthalpy_N1_R3_Ukcal-mol", 'r'))
    IS1 = pickle.load(open("enthalpy_N1_R2_Ukcal-mol", 'r'))

    # Generate the main object
    sim = Optimizer()

    # Assign simulation properties
    #sim.hyperparameter_objective = MAP
    sim.hyperparameter_objective = MLE
    ###################################################################################################
    # File names
    sim.fname_out = "enthalpy_misokg.dat"
    sim.fname_historical = None

    # Information sources, in order from expensive to cheap
    sim.IS = [
        lambda h, c, s: -1.0 * IS0[' '.join([''.join(h), c, s])],
        lambda h, c, s: -1.0 * IS1[' '.join([''.join(h), c, s])]
    ]
    sim.costs = [1.0, 0.1]

    sim.logger_fname = "data_dumps/%d_misokg.log" % run_index
    if os.path.exists(sim.logger_fname):
        os.system("rm %s" % sim.logger_fname)
    os.system("touch %s" % sim.logger_fname)

    sim.obj_vs_cost_fname = "data_dumps/%d_misokg.dat" % run_index
    sim.mu_fname = "data_dumps/%d_mu_misokg.dat" % run_index
    sim.sig_fname = "data_dumps/%d_sig_misokg.dat" % run_index
    sim.combos_fname = "data_dumps/%d_combos_misokg.dat" % run_index
    sim.hp_fname = "data_dumps/%d_hp_misokg.dat" % run_index
    sim.acquisition_fname = "data_dumps/%d_acq_misokg.dat" % run_index
    sim.save_extra_files = True
    ########################################
    # Override the possible combinations with the reduced list of IS0
    # Because we do this, we should also generate our own historical sample
    combos_no_IS = [
        k[1] + "Pb" + k[0] + "_" + k[2]
        for k in [key.split() for key in IS0.keys()]
    ]
    sim.historical_nsample = 10
    choices = np.random.choice(combos_no_IS,
                               sim.historical_nsample,
                               replace=False)
    tmp_data = pal_strings.alphaToNum(choices,
                                      solvents,
                                      mixed_halides=True,
                                      name_has_IS=False)

    data = []
    for IS in range(len(sim.IS)):
        for i, d in enumerate(tmp_data):
            h, c, _, s, _ = pal_strings.parseName(pal_strings.parseNum(
                d, solvents, mixed_halides=True, num_has_IS=False),
                                                  name_has_IS=False)
            c = c[0]
            data.append([IS] + d + [sim.IS[IS](h, c, s)])

    sim.fname_historical = "data_dumps/%d.history" % run_index
    pickle.dump(data, open(sim.fname_historical, 'w'))
    simple_data = [d for d in data if d[0] == 0]
    pickle.dump(simple_data,
                open("data_dumps/%d_reduced.history" % run_index, 'w'))

    ########################################

    sim.n_start = 10  # The number of starting MLE samples
    sim.reopt = 20
    sim.ramp_opt = None
    sim.parallel = False

    # Possible compositions by default
    sim.A = ["Cs", "MA", "FA"]
    sim.B = ["Pb"]
    sim.X = ["Cl", "Br", "I"]
    sim.solvents = copy.deepcopy(solvents)
    sim.S = list(set([v["name"] for k, v in sim.solvents.items()]))
    sim.mixed_halides = True
    sim.mixed_solvents = False

    # Parameters for debugging and overwritting
    sim.debug = False
    sim.verbose = True
    sim.overwrite = True  # If True, warning, else Error

    sim.acquisition = getNextSample_misokg

    # Functional forms of our mean and covariance
    # MEAN: 4 * mu_alpha + mu_zeta
    # COV: sig_alpha * |X><X| + sig_beta * I_N + sig_zeta + MaternKernel(S, weights, sig_m)

    SCALE = [2.0, 4.0][int(sim.mixed_halides)]

    # _1, _2, _3 used as dummy entries
    def mean(X, Y, theta):
        mu = np.array([SCALE * theta.mu_alpha + theta.mu_zeta for _ in Y])
        return mu

    sim.mean = mean

    def cov_old(X, Y, theta):
        A = theta.sig_alpha * np.dot(
            np.array(X)[:, 1:-3],
            np.array(X)[:, 1:-3].T)
        B = theta.sig_beta * np.diag(np.ones(len(X)))
        C = theta.sig_zeta
        D = mk52(np.array(X)[:, -3:-1], [theta.l1, theta.l2], theta.sig_m)
        return theta.rho_matrix(X) * (A + B + C + D)

    def cov(X0, Y, theta):
        A = theta.sig_alpha * np.dot(
            np.array(X0)[:, :-3],
            np.array(X0)[:, :-3].T)
        B = theta.sig_beta * np.diag(np.ones(len(X0)))
        C = theta.sig_zeta
        D = mk52(np.array(X0)[:, -3:-1], [theta.l1, theta.l2], theta.sig_m)
        Kx = A + B + C + D

        Ks = np.array([
            np.array(
                [theta.rho[str(sorted([i, j]))] for j in range(theta.n_IS)])
            for i in range(theta.n_IS)
        ])
        if theta.normalize_Ks:
            Ks = Ks / np.linalg.norm(Ks)

        e = np.diag(np.array([theta.e1, theta.e2]))
        Ks = e.dot(Ks.dot(e))

        return np.kron(Ks, Kx)

    sim.cov = cov

    sim.theta.bounds = {}
    sim.theta.mu_alpha, sim.theta.bounds['mu_alpha'] = None, (
        1E-3, lambda _, Y: max(Y))
    sim.theta.sig_alpha, sim.theta.bounds['sig_alpha'] = None, (
        1E-2, lambda _, Y: 10.0 * np.var(Y))
    sim.theta.sig_beta, sim.theta.bounds['sig_beta'] = None, (
        1E-2, lambda _, Y: 10.0 * np.var(Y))
    sim.theta.mu_zeta, sim.theta.bounds['mu_zeta'] = None, (
        1E-3, lambda _, Y: max(Y))
    sim.theta.sig_zeta, sim.theta.bounds['sig_zeta'] = None, (
        1E-2, lambda _, Y: 10.0 * np.var(Y))
    sim.theta.sig_m, sim.theta.bounds['sig_m'] = None, (1E-2,
                                                        lambda _, Y: np.var(Y))
    sim.theta.l1, sim.theta.bounds['l1'] = None, (1E-1, 1)
    sim.theta.l2, sim.theta.bounds['l2'] = None, (1E-1, 1)

    sim.theta.e1, sim.theta.bounds['e1'] = None, (1E-1, 1.0)
    sim.theta.e2, sim.theta.bounds['e2'] = None, (1E-1, 1.0)

    # # NOTE! This is a reserved keyword in misoKG.  We will generate a list of the same length
    # # of the information sources, and use this for scaling our IS.
    sim.theta.rho = {"[0, 0]": 1.0, "[0, 1]": 0.96, "[1, 1]": 1.0}
    sim.theta.bounds['rho [0, 0]'] = (0.1, 1.0)
    sim.theta.bounds['rho [0, 1]'] = (0.1, 1.0)
    sim.theta.bounds['rho [1, 1]'] = (0.1, 1.0)

    sim.theta.set_hp_names()

    sim.primary_rho_opt = False
    sim.update_hp_only_with_IS0 = False
    sim.update_hp_only_with_overlapped = False

    sim.theta.normalize_L = False
    sim.theta.normalize_Ks = False

    # This was a test feature that actually over-wrote rho to be PSD
    # sim.force_rho_psd = True
    sim.recommendation_kill_switch = "FAPbBrBrCl_THTO_0"

    ###################################################################################################

    # Start simulation
    sim.run()
示例#5
0
def run_misokg(run_index):

    # Store data for debugging
    IS0 = pickle.load(open("enthalpy_N1_R3_Ukcal-mol", 'r'))
    IS1 = pickle.load(open("enthalpy_N1_R2_Ukcal-mol", 'r'))

    # Generate the main object
    sim = Optimizer()

    # Assign simulation properties
    #sim.hyperparameter_objective = MAP
    sim.hyperparameter_objective = MLE
    ###################################################################################################
    # File names
    sim.fname_out = "enthalpy_misokg.dat"
    sim.fname_historical = None

    # Information sources, in order from expensive to cheap
    sim.IS = [
        lambda h, c, s: -1.0 * IS0[' '.join([''.join(h), c, s])],
        lambda h, c, s: -1.0 * IS1[' '.join([''.join(h), c, s])]
    ]
    sim.costs = [
        1.0,
        0.1,
    ]

    sim.logger_fname = "data_dumps/%d_misokg.log" % run_index
    if os.path.exists(sim.logger_fname):
        os.system("rm %s" % sim.logger_fname)
    os.system("touch %s" % sim.logger_fname)

    sim.obj_vs_cost_fname = "data_dumps/%d_misokg.dat" % run_index
    sim.mu_fname = "data_dumps/%d_mu_misokg.dat" % run_index
    sim.sig_fname = "data_dumps/%d_sig_misokg.dat" % run_index
    sim.combos_fname = "data_dumps/%d_combos_misokg.dat" % run_index
    sim.hp_fname = "data_dumps/%d_hp_misokg.dat" % run_index
    sim.acquisition_fname = "data_dumps/%d_acq_misokg.dat" % run_index
    sim.save_extra_files = True
    ########################################
    # Override the possible combinations with the reduced list of IS0
    # Because we do this, we should also generate our own historical sample
    combos_no_IS = [
        k[1] + "Pb" + k[0] + "_" + k[2]
        for k in [key.split() for key in IS0.keys()]
    ]
    #sim.historical_nsample = 240
    sim.historical_nsample = 10
    choices = np.random.choice(combos_no_IS,
                               sim.historical_nsample,
                               replace=False)
    tmp_data = pal_strings.alphaToNum(choices,
                                      solvents,
                                      mixed_halides=True,
                                      name_has_IS=False)

    data = []
    for IS in range(len(sim.IS)):
        for i, d in enumerate(tmp_data):
            h, c, _, s, _ = pal_strings.parseName(pal_strings.parseNum(
                d, solvents, mixed_halides=True, num_has_IS=False),
                                                  name_has_IS=False)
            c = c[0]
            data.append([IS] + d + [sim.IS[IS](h, c, s)])

    sim.fname_historical = "data_dumps/%d.history" % run_index
    pickle.dump(data, open(sim.fname_historical, 'w'))
    simple_data = [d for d in data if d[0] == 0]
    pickle.dump(simple_data,
                open("data_dumps/%d_reduced.history" % run_index, 'w'))

    ########################################

    sim.n_start = 10  # The number of starting MLE samples
    sim.reopt = 10
    sim.ramp_opt = None
    sim.parallel = False

    # Possible compositions by default
    sim.A = ["Cs", "MA", "FA"]
    sim.B = ["Pb"]
    sim.X = ["Cl", "Br", "I"]
    sim.solvents = copy.deepcopy(solvents)
    sim.S = list(set([v["name"] for k, v in sim.solvents.items()]))
    sim.mixed_halides = True
    sim.mixed_solvents = False

    # Parameters for debugging and overwritting
    sim.debug = False
    sim.verbose = True
    sim.overwrite = True  # If True, warning, else Error

    sim.acquisition = getNextSample_misokg

    # Functional forms of our mean and covariance
    # MEAN: 4 * mu_alpha + mu_zeta
    # COV: sig_alpha * |X><X| + sig_beta * I_N + sig_zeta + MaternKernel(S, weights, sig_m)

    SCALE = [2.0, 4.0][int(sim.mixed_halides)]

    # _1, _2, _3 used as dummy entries
    def mean(X, Y, theta):
        mu = np.array([SCALE * theta.mu_alpha + theta.mu_zeta for _ in Y])
        return mu

    sim.mean = mean

    def cov_old(X, Y, theta):
        A = theta.sig_alpha * np.dot(
            np.array(X)[:, 1:-3],
            np.array(X)[:, 1:-3].T)
        B = theta.sig_beta * np.diag(np.ones(len(X)))
        C = theta.sig_zeta
        D = mk52(np.array(X)[:, -3:-1], [theta.l1, theta.l2], theta.sig_m)
        return theta.rho_matrix(X) * (A + B + C + D)

    def cov_old2(X, Y, theta):
        A = theta.sig_alpha * np.dot(
            np.array(X)[:, 1:-3],
            np.array(X)[:, 1:-3].T)
        B = theta.sig_beta * np.diag(np.ones(len(X)))
        C = theta.sig_zeta
        D = mk52(np.array(X)[:, -3:-1], [theta.l1, theta.l2], theta.sig_m)
        return theta.rho_matrix(X, use_psd=True) * (A + B + C + D)

    def cov_new(X, Y, theta):
        # Get a list of all unique X, removing initial IS identifier
        X0 = []
        for x in X:
            if not any(
                [all([a == b for a, b in zip(x[1:], xchk)]) for xchk in X0]):
                X0.append(x[1:])

        A = theta.sig_alpha * np.dot(
            np.array(X0)[:, :-3],
            np.array(X0)[:, :-3].T)
        B = theta.sig_beta * np.diag(np.ones(len(X0)))
        C = theta.sig_zeta
        D = mk52(np.array(X0)[:, -3:-1], [theta.l1, theta.l2], theta.sig_m)
        Kx = A + B + C + D

        L = np.array([
            np.array([
                theta.rho[str(sorted([i, j]))] if i >= j else 0.0
                for j in range(theta.n_IS)
            ]) for i in range(theta.n_IS)
        ])
        # Normalize L to stop over-scaling values small
        L = L / np.linalg.norm(L)
        # Force it to be positive semi-definite
        Ks = L.dot(L.T)

        return np.kron(Ks, Kx)
        #K = np.kron(Ks, Kx)

        # Now, we get the sub-covariance matrix for the specified sampled X and Y
        indices = []
        for l in range(theta.n_IS):
            for i, x in enumerate(X0):
                test = [l] + list(x)
                if any(
                    [all([a == b for a, b in zip(test, xchk)]) for xchk in X]):
                    indices.append(l * len(X0) + i)

        K_local = K[np.ix_(indices, indices)]

        return K_local

    sim.cov = cov_new

    sim.theta.bounds = {}
    sim.theta.mu_alpha, sim.theta.bounds['mu_alpha'] = None, (
        1E-3, lambda _, Y: max(Y))
    sim.theta.sig_alpha, sim.theta.bounds['sig_alpha'] = None, (
        1E-2, lambda _, Y: 10.0 * np.var(Y))
    sim.theta.sig_beta, sim.theta.bounds['sig_beta'] = None, (
        1E-2, lambda _, Y: 10.0 * np.var(Y))
    sim.theta.mu_zeta, sim.theta.bounds['mu_zeta'] = None, (
        1E-3, lambda _, Y: max(Y))
    sim.theta.sig_zeta, sim.theta.bounds['sig_zeta'] = None, (
        1E-2, lambda _, Y: 10.0 * np.var(Y))
    sim.theta.sig_m, sim.theta.bounds['sig_m'] = None, (1E-2,
                                                        lambda _, Y: np.var(Y))
    sim.theta.l1, sim.theta.bounds['l1'] = None, (1E-1, 1)
    sim.theta.l2, sim.theta.bounds['l2'] = None, (1E-1, 1)

    # # NOTE! This is a reserved keyword in misoKG.  We will generate a list of the same length
    # # of the information sources, and use this for scaling our IS.
    # sim.theta.rho = {"[0, 0]": 1, "[0, 1]": None, "[1, 1]": 1}
    # sim.theta.bounds['rho [0, 1]'] = (-1.0, 1.0)
    # sim.theta.bounds['rho [0, 0]'] = (1, 1)
    # sim.theta.bounds['rho [1, 1]'] = (1, 1)

    sim.theta.rho = {"[0, 0]": None, "[0, 1]": None, "[1, 1]": None}
    sim.theta.bounds['rho [0, 0]'] = (0.1, 1.0)
    sim.theta.bounds['rho [0, 1]'] = (0.1, 1.0)
    sim.theta.bounds['rho [1, 1]'] = (0.1, 1.0)

    sim.theta.set_hp_names()

    sim.primary_rho_opt = False
    #sim.update_hp_only_with_IS0 = True
    sim.update_hp_only_with_overlapped = True

    ###################################################################################################

    # Start simulation
    sim.run()