示例#1
0
def load_data(path, num=200):

    # Repulsive energy
    e_rep_list = np.zeros(num)
    # Overlap matrices
    overlap_list = []
    # Core Hamiltonian
    core_list = []
    # Fock matrices
    fock_list = []
    # MO energies
    e_mos_list = []
    # Convergeed densities
    density_list = []
    JK_list = []
    # Total energies using STO3G
    energies_3G = np.zeros(num)
    # List of Mulliken charges
    mull_list = []

    for i in range(num):
        msg.info(str(i + 1) + "/" + str(num))
        f = join(path, '{}.out'.format(i))
        (e_rep, overlap, core, fock, e_mos, density, energy, mull) = grep_sp(f)
        #e_rep_list[i] = e_rep[0]
        overlap_list.append(overlap[0].reshape(len(density[0])**2, ))
        core_list.append(core[0].reshape(len(density[0])**2, ))
        fock_list.append(fock[0].reshape(len(density[0])**2, ))
        #e_mos_list.append(e_mos[0])
        density_list.append(density[0].reshape(len(density[0])**2, ))
        #JK_list.append(sum(density[0]*core[0]))
        #energies_3G[i] = energy
        #mull_list.append(mull)

    return np.array(overlap_list), np.array(density_list)
def main():

    ############################################################################
    #         Inputs
    ############################################################################
    data_folder = "butadien/data/"
    postfix = ""#400"
    log_file = "butadien/results/classical_guess_performance_" + str(date.today()) + ".log"
    ############################################################################



    msg.print_level = 2

    msg.info("Hi. Classical guess performance for any stupid dataset",2)

    #--- fetch dataset and constants ---
    msg.info("Fetching dataset", 2)
    dataset, molecules = fetch_dataset(data_folder, postfix)
    dim = DIM
    s_raw = make_matrix_batch(dataset.inverse_input_transform(dataset.testing[0]), DIM, True)
    #---


    #--- Measureing & print ---
    with open(log_file, "a+") as f:
        f.write("##### Analysis of " + str(datetime.now()) + " #####\n")
        f.write("Datafolder: " + data_folder + "\n")
        f.write("Postfix: " + postfix + "\n")

    do_analysis(dataset, molecules, s_raw, log_file)
示例#3
0
def main():

    ############################################################################
    #         Inputs
    ############################################################################
    network_path = "butadien/data/networks/networkGaussians400EquidistantBroadening.npy"
    #network_path = "butadien/data/networks/networkSMatrixBigDataset.npy"
    data_folder = "butadien/data/400/"
    postfix = "400"

    log_file = "butadien/results/pretrained_" + str(date.today()) + ".log"
    ############################################################################



    msg.print_level = 2

    msg.info("Hi. Measurements for butadien",2)

    #--- fetch dataset and constants ---
    msg.info("Fetching dataset", 2)
    dataset, molecules, S = fetch_dataset(data_folder, postfix)
    dim = DIM
    s_test = make_matrix_batch(S, DIM, False)
    #---


    #--- Measureing & print ---
    with open(log_file, "a+") as f:
        f.write("##### Analysis of " + str(datetime.now()) + " #####\n")
        f.write("Network: " + network_path + "\n")
        f.write("Datafolder: " + data_folder + "\n")
        f.write("Postfix: " + postfix + "\n")

    do_analysis(network_path, dataset, molecules, s_test, log_file)
示例#4
0
def main():   

    S, P = np.load("butadien/data/dataset.npy")

    dataset = Dataset(S, P, split_test=0.25)


    trainer = Trainer(
        EluTrNNN([dim**2, 200, 100, dim**2], log_histograms=True),
        cost_function=IdempotencyPenalty(coupling=1e-6),
        optimizer=tf.train.AdamOptimizer(learning_rate=5e-3)
    )

    trainer.setup()
    network_idem, sess_idem = trainer.train(
        dataset,
        convergence_threshold=1e-5,
        #summary_save_path="butadien/log/idem"
    )
    graph_idem = trainer.graph

    with trainer.graph.as_default():
        error = trainer.cost_function.idempotency_error(network_idem)
        error_val = sess_idem.run(error, {network_idem.input_tensor: dataset.testing[0]})

    msg.info("Achieved idempotency error: " + str(error_val), 2)
示例#5
0
def main():   

    S, P = np.load("butadien/data/dataset.npy")

    dataset = Dataset(S, P, split_test=0.25)

    save_path = "butadien/scripts/log/idem"

    try:
        rmtree(save_path)
    except:
        pass

    trainer = Trainer(
        SeluTrNNN(
            [dim**2, 700, 700, dim**2], 
            log_histograms=True
        ),
        #error_function=AbsoluteError(),
        #cost_function=RegularizedMSE(alpha=1e-7),
        cost_function=IdempotencyPenalty(
            dataset.inverse_input_transform,
            coupling=1e-5
        ),
        #optimizer=tf.train.AdamOptimizer(learning_rate=1e-3)
    )

    trainer.setup()
    network, sess = trainer.train(
        dataset,
        convergence_threshold=1e-6,
        summary_save_path=save_path,
        mini_batch_size=15
    )
    graph_idem = trainer.graph

    with trainer.graph.as_default():
        y = tf.placeholder(
                dtype="float32", 
                shape=[None, network.structure[-1]],
                name="y"
            )
        error_val = sess.run(
            AbsoluteError().function(network, y), 
            {
                network.input_tensor: dataset.testing[0],
                y: dataset.testing[1]
            }
        )
        
        error_idem = sess.run(
            trainer.cost_function.idempotency_error(network), 
            {network.input_tensor: dataset.testing[0]}
        )

    msg.info("Achieved absolute error:    {:0.3E}".format(error_val), 2)
    msg.info("Achieved idempotency error: {:0.3E}".format(error_idem), 2)
示例#6
0
    def measure_iterations(mf_initializer, guesses, molecules):

        iterations = []
        for i, (p, molecule) in enumerate(zip(guesses, molecules)):

            msg.info("Iteration calculation: " + str(i))

            mf = mf_initializer(molecule.get_pyscf_molecule())
            mf.kernel(dm0=p)

            iterations.append(mf.iterations)

        return iterations
示例#7
0
    def fetch_molecules(folder):

        files = [file for file in listdir(folder) if ".inp" in file]

        for i, file in enumerate(files):

            msg.info("Fetching: " + str(i + 1) + "/" + str(len(files)))

            mol = Molecule(*grep_molecule(join(folder, file)))

            mol.basis = "sto-3g"

            yield mol
示例#8
0
def main():
    msg.info("Welcome to the method benchmark.", 2)

    #--- set up a H2 molecule ---
    msg.info("Setting up molecule: H_2")
    positions = [[0, 0, 0.0], [0, 0, 1.1], [0, 0, 2.2], [0, 0, 3.3]]
    mol = Molecule(['H' for i in range(4)], positions, 'H4')
    pyscf_mol = mol.get_pyscf_molecule()
    #---

    #--- test network model ---
    msg.info("Starting SCF with method: nn guess")
    mf = scf.RHF(pyscf_mol)
    mf.init_guess = "nn"
    S = mf.get_ovlp()
    dm = nn_guess(mol, S)
    mf.verbose = 4  # todo instead extract number of cycles and plot it with msg
    mf.kernel(dm)
    #---

    #--- test pyscf methods ---
    for method in ['minao', 'atom', '1e']:
        msg.info("Starting SCF with method: " + method, 1)
        mf = scf.RHF(pyscf_mol)
        mf.verbose = 4  # todo instead extract number of cycles and plot it with msg
        mf.init_guess = method
        mf.run()
示例#9
0
def main(species="H"):

    #--- assemble the dataset ---
    root_directory = normpath(join(dirname(realpath(__file__)), "../"))
    dataset_source_folder = join(root_directory, "dataset/")
    sources = [
        join(dataset_source_folder, directory) \
            for directory in ["GMTKN55"]
    ]

    dataset = Dataset(*assemble_batch(sources, species))
    #---

    #--- setup and train the network ---
    dim = N_BASIS[species]

    structure = [dim, 25, dim]

    network = EluTrNNN(structure)

    network, sess = train_network(network, dataset)
    #---

    save_path = join(root_directory, "tmp" + species + ".npy")
    #try:
    #--- save trained model ---
    save_object = [
        network.structure,
        network.weights_values(sess),
        network.biases_values(sess)
    ]

    np.save(save_path, save_object)
    sess.close()
    msg.info("Session closed", 1)
    #---

    #--- load and reinitialize model ---
    msg.info("Starting new session and loading the model ...", 1)
    sess = tf.Session()
    model = np.load(save_path)

    new_network = EluFixedValue(*model)
    new_network.setup()
    sess.run(tf.global_variables_initializer())

    #finally:
    if isfile(save_path):
        remove(save_path)
def fetch_molecules(folder):
    
    files = [file for file in listdir(folder) if ".inp" in file]
    
    files.sort(key=lambda x: float(x.split(".inp")[0]))

    for i, file in enumerate(files):
        
        msg.info("Fetching: " + str(i + 1) + "/" + str(len(files)))

        mol = Molecule(*grep_molecule(join(folder, file)))
        
        mol.basis = "6-31g*"
        
        yield mol
def scf_runs(molecules):

    S, P = [], []
    for i, molecule in enumerate(molecules):
        
        msg.info(str(i + 1) + "/" + str(len(molecules)))
        
        mol = molecule.get_pyscf_molecule()
        mf = hf.RHF(mol)
        mf.verbose = 1
        mf.run()
        
        S.append(mf.get_ovlp().reshape((dim**2, )))
        P.append(mf.make_rdm1().reshape((dim**2, )))

    return S, P
def fetch_molecules(folder):

    files = [file for file in listdir(folder) if ".out" in file]

    files.sort()

    for i, file in enumerate(files):

        msg.info("Fetching: " + str(i + 1) + "/" + str(len(files)))

        molecules = QChemResultsReader.read_file(folder + file)

        for molecule_values in molecules:
            mol = Molecule(*molecule_values)
            mol.basis = BASIS

            yield mol
def do_analysis(dataset, molecules, s_raw, log_file):

    #--- calculate guesses ---
    msg.info("Calculating guesses ...",2)

    p_1e = np.array([
        hf.init_guess_by_1e(mol.get_pyscf_molecule()) for mol in molecules[1]
    ])
    p_sap = np.array([
        hf.init_guess_by_atom(mol.get_pyscf_molecule()) for mol in molecules[1]
    ])
    p_minao = np.array([
        hf.init_guess_by_minao(mol.get_pyscf_molecule()) for mol in molecules[1]
    ])
    p_gwh = np.array([
        hf.init_guess_by_wolfsberg_helmholtz(mol.get_pyscf_molecule()) for mol in molecules[1]
    ])
    #--- 

    with open(log_file, "a+") as f:
        f.write("\n\n+++++ H_Core +++++\n")
    msg.info("Results H_Core: ", 1)
    measure_and_display(
        p_1e.reshape(-1, DIM**2), dataset, molecules, False, log_file, s=s_raw
    )

    with open(log_file, "a+") as f:
        f.write("\n\n+++++ SAP +++++\n")
    msg.info("Results SAP: ", 1)
    measure_and_display(
        p_sap.reshape(-1, DIM**2), dataset, molecules, False, log_file, s=s_raw
    )

    with open(log_file, "a+") as f:
        f.write("\n\n+++++ MINAO +++++\n")
    msg.info("Results MINAO: ", 1)
    measure_and_display(
        p_minao.reshape(-1, DIM**2), dataset, molecules, False, log_file, s=s_raw
    )

    with open(log_file, "a+") as f:
        f.write("\n\n+++++ GWH +++++\n")
    msg.info("Results GWH: ", 1)
    measure_and_display(
        p_gwh.reshape(-1, DIM**2), dataset, molecules, False, log_file, s=s_raw
    )
示例#14
0
def main():   

    S, P = np.load("butadien/data/dataset.npy")

    dataset = Dataset(S, P, split_test=0.25)

    msg.info("Starting grid search ", 2)
    with open(log_file, "w") as f:
        info  = "===============================\n"
        info += str(datetime.now()) + "\n\n"
        f.write(info)

    structures = sample_structures()
    for structure in uniquifiy(structures):
        try:
            investigate_structure(dataset, structure)
        except Exception as ex:
            msg.error("Something went wrong during investigation: " + str(ex))
示例#15
0
def analyze_raw_batch(P, P_ref, S, molecules):
    """Batch version of analyze raw"""
    
    n_samples = len(P)
    
    errors = []
    for i, (p, p_ref, s, mol) in enumerate(zip(P, P_ref, S, molecules)):
        msg.info(str(i+1) + " / " + str(n_samples))
        errors.append(analyze_raw(p, p_ref, s, mol))
    
    errors = np.array(errors)
    
    return (
        statistics(errors[:,0]), # abs
        statistics(errors[:,1]), # hf
        statistics(errors[:,2]), # idem
        statistics(errors[:,3]) # occ
    )
示例#16
0
def measure_and_display(p, dataset, molecules, is_triu, log_file, s):
    def format_results(result):
        if isinstance(result, list):
            out = list(map(
                lambda x: "{:0.5E} +- {:0.5E}".format(*x),
                result
            ))
            out = "\n".join(out)
        else:
            out =  "{:0.5E} +- {:0.5E}".format(*result)
        return out

    dim = DIM

    result = make_results_str(measure_all_quantities(
        p,
        dataset,
        molecules[1],
        N_ELECTRONS,
        mf_initializer,
        dim,
        is_triu=is_triu,
        is_dataset_triu=True,
        s=s
    ))

    result += "--- Iterations Damped ---\n" + \
        format_results(statistics(list(measure_iterations(
            mf_initializer_damping,
            make_matrix_batch(p, dim, is_triu=is_triu).astype('float64'),
            molecules[1]
        ))))

    result += "\n" + "--- Iterations DIIS ---\n" + \
        format_results(statistics(list(measure_iterations(
            mf_initializer_diis,
            make_matrix_batch(p, dim, is_triu=is_triu).astype('float64'),
            molecules[1]
        ))))

    msg.info(result, 1)
    with open(log_file, "a+") as f:
        f.write(result)
示例#17
0
def scf_runs(molecules):

    S, P, F = [], [], []
    for i, molecule in enumerate(molecules):

        msg.info(str(i + 1) + "/" + str(len(molecules)))

        mol = molecule.get_pyscf_molecule()
        mf = hf.RHF(mol)
        mf.verbose = 1
        mf.run()

        h = mf.get_hcore(mol)
        s = mf.get_ovlp()
        p = mf.make_rdm1()
        f = mf.get_fock(h, s, mf.get_veff(mol, p), p)

        S.append(s.reshape((dim**2, )))
        P.append(p.reshape((dim**2, )))
        F.append(f.reshape((dim**2, )))

    return S, P, F
示例#18
0
def measure_iterations(mf_initializer, guesses, molecules):
    """For an scf engine as returned by mf_initializer
    for a list of molecules and a list of corresponding guesses the number 
    of required iterations will be returned.
    """

    iterations = []
    for i, (p, molecule) in enumerate(zip(guesses, molecules)):

        msg.info("Iteration calculation: " + str(i))

        mf = mf_initializer(molecule.get_pyscf_molecule())

        try:
            mf.kernel(dm0=p)

            iterations.append(mf.iterations)

        except Exception as ex:
            msg.warn("SCF calculation failed: " + str(ex))

            iterations.append(mf.max_cycle)

    return iterations
示例#19
0
    def measure(self,
                dataset,
                molecules,
                number_of_measurements=10,
                convergence_threshold=1e-6):

        err_abs = []
        err_sym = []
        err_idem = []
        err_occ = []
        iterations = []

        s_raw = self.makeMatrixBatch(
            dataset.inverse_input_transform(dataset.testing[0]), self.dim)

        for i in range(number_of_measurements):

            msg.info("Network: " + str(i), 2)
            msg.info("train ... " + str(i), 1)

            network, sess = self.trainer.train(
                dataset, convergence_threshold=convergence_threshold)

            with self.trainer.graph.as_default():

                msg.info("calculate quantities ...", 1)

                p = network.run(sess, dataset.testing[0])
                p_batch = self.makeMatrixBatch(p, self.dim)

                err_abs.append(
                    statistics(list(self.measure_absolute_error(p, dataset))))

                err_sym.append(
                    statistics(list(self.measure_symmetry_error(p_batch))))

                err_idem.append(
                    statistics(
                        list(self.measure_idempotence_error(p_batch, s_raw))))

                err_occ.append(
                    statistics(
                        list(
                            self.measure_occupance_error(
                                p_batch, s_raw, self.n_electrons))))

                iterations.append(
                    statistics(
                        list(
                            self.measure_iterations(self.mf_initializer,
                                                    p_batch.astype('float64'),
                                                    molecules))))

        return (np.array(err_abs), np.array(err_sym), np.array(err_idem),
                np.array(err_occ), np.array(iterations))
示例#20
0
def main(species,
         structure,
         save_path=None,
         source=None,
         convergence_threshold=1e-7,
         learning_rate=0.0005,
         regularisation_parameter=0.01,
         mini_batch_size=0.2):

    if structure[0] != N_BASIS[species] or structure[-1] != N_BASIS[species]:
        raise ValueError(
            "Invalid structure. Bad Input/Output dim (should be " + \
            "{0} but was {1}/{2}!".format(
                N_BASIS[species], structure[0], structure[-1]
            )
        )

    #if minibatch is not given in absolute size
    if int(mini_batch_size) == mini_batch_size:
        mini_batch_size = int(mini_batch_size)

    if source is None:
        source = ["../dataset/PyQChem/s22"]

    msg.info("Assembling dataset ...", 2)
    dataset = Dataset(*assemble_batch(source, species))

    msg.info("Training model ...", 2)
    network = EluTrNNN(structure)
    network, sess = train_network(
        network,
        dataset,
        convergence_threshold=convergence_threshold,
        learning_rate=learning_rate,
        regularisation_parameter=regularisation_parameter,
        mini_batch_size=mini_batch_size)

    if not save_path is None:
        msg.info("Storing model ...", 2)
        save_object = [
            network.structure,
            network.weights_values(sess),
            network.biases_values(sess)
        ]

        np.save(save_path, save_object)
示例#21
0
    def train(
            self,
            dataset,
            max_steps=100000,
            evaluation_period=200,
            mini_batch_size=0.2,
            convergence_threshold=1e-5,
            summary_save_path=None
        ):


        with self.graph.as_default():
            sess = tf.Session(graph=self.graph)

            if self.training_step is None:
                self.setup()


            #--- prep the writer ---
            if not summary_save_path is None:
                summary = tf.summary.merge_all()
                writer = tf.summary.FileWriter(summary_save_path)
                writer.add_graph(sess.graph)
            #---

            #--- train the network ---
            old_error = 1e10

            sess.run(tf.global_variables_initializer())


            msg.info("Starting network training ...", 1)        
            for step in range(max_steps):
                mini_batch = dataset.sample_minibatch(mini_batch_size)

                if step % np.ceil(evaluation_period / 10):
                    if not summary_save_path is None:
                        writer.add_summary(
                            sess.run(
                                summary, 
                                feed_dict={
                                    self.input_placeholder: mini_batch[0], 
                                    self.target_placeholder: mini_batch[1]
                                }
                            ), 
                            step
                        )

                if step % evaluation_period == 0:
                    error = sess.run(
                        self.error,
                        feed_dict={
                            self.input_placeholder: dataset.validation[0], 
                            self.target_placeholder: dataset.validation[1]
                        }
                    )

                    cost = sess.run(
                        self.cost,
                        feed_dict={
                            self.input_placeholder: dataset.validation[0], 
                            self.target_placeholder: dataset.validation[1]
                        }
                    )

                    # compare to previous error
                    diff = np.abs(error - old_error)

                    # convergence check
                    if diff < convergence_threshold:
                        msg.info(
                            "Convergence reached after " + str(step) + " steps.", 
                            1
                        )

                        break
                    else:
                        msg.info(
                            "Val. Cost: " + \
                                "{:0.3E}. Error: {:0.3E}. Diff: {:0.1E}".format(
                                cost,
                                error,
                                diff
                            )
                        )

                        old_error = error
                    

                # do training step
                sess.run(
                    self.training_step, 
                    feed_dict={
                        self.input_placeholder: mini_batch[0], 
                        self.target_placeholder: mini_batch[1]
                    }
                )
            #---

            if not summary_save_path is None:
                writer.close()


            test_error = sess.run(
                self.error,
                feed_dict={
                    self.input_placeholder: dataset.testing[0], 
                    self.target_placeholder: dataset.testing[1]
                }
            )

        self.test_error = test_error

        msg.info("Test error: {:0.5E}".format(test_error), 1)

        return self.network, sess
示例#22
0
def not_used():
    msg.info("Netowrk Analysis!", 2)

    #--- fetching the molecules ---
    msg.info("Fetching the molecules", 2)

    def grep_molecule(input_file):
        import re

        with open(input_file) as f:

            molecule = re.search(r"\$molecule.*\$end", f.read(), re.DOTALL)
            if molecule is None:
                raise ValueError("No molecule found in " + f.name)
            else:
                molecule = molecule.group(0)

                # cut out geometries
                geometries = molecule.splitlines()[2:-1]

        # from geometries take the species and positions
        species, positions = [], []
        for line in geometries:
            splits = line.split()
            species.append(splits[0])
            positions.append(splits[1:])

        return species, positions

    def fetch_molecules(folder):

        files = [file for file in listdir(folder) if ".inp" in file]

        for i, file in enumerate(files):

            msg.info("Fetching: " + str(i + 1) + "/" + str(len(files)))

            mol = Molecule(*grep_molecule(join(folder, file)))

            mol.basis = "sto-3g"

            yield mol

    molecules = list(fetch_molecules("butadien/data"))
    #---

    #--- do scf ---
    msg.info("Running the SCF calculations", 2)
    iterations = []
    for i, molecule in enumerate(molecules):

        mol = molecule.get_pyscf_molecule()

        msg.info("Calculating: " + str(i + 1) + "/200.")

        # assemble pyscf initial guesses
        P_1e = hf.init_guess_by_1e(mol)
        P_atom = hf.init_guess_by_atom(mol)
        P_minao = hf.init_guess_by_minao(mol)

        # nn guess
        S = hf.get_ovlp(mol).reshape(1, dim**2)
        P_NN = network.run(sess, S).reshape(dim, dim)

        iterations_molecule = []
        for guess in [P_1e, P_atom, P_minao, P_NN]:  #, P_NN]:

            mf = hf.RHF(mol)
            mf.verbose = 1
            mf.kernel(dm0=guess)
            iterations_molecule.append(mf.iterations)

        iterations.append(iterations_molecule)

    iterations = np.array(iterations)
    #---

    #--- statistics ---
    fig, axes = plt.subplots(2, 2)

    bins = 1  # todo hier kann man auch ein array angeben

    for i, name in enumerate(['1e', 'atom', 'P_minao']):

        hist, bins = np.histogram(iterations[:, i])
        center = (bins[:-1] + bins[1:]) / 2
        axes[i].bar(center, hist, label=name)

    plt.legend()
    plt.show()
示例#23
0
def main():
    #todo this funciton and taining should become part of the library!!
    # sodass man nur mehr savepath und dataset angeben muss!

    msg.info("Traing a network for butadien", 2)

    msg.info("Fetching dataset ... ", 2)
    dataset = prep_dataset()

    save_path = "butadien/data/networks/networkGaussians400EquidistantBroadening.npy"


    user_input =  msg.input(
        "This will overwrite the model at " + save_path + \
        " Are you sure you want that? (y for yes)"
    )

    if user_input.upper() != "Y":
        msg.info("Aborting", 2)
        return

    msg.info("Try to fetch current model")
    try:
        
        model = np.load(save_path, encoding="latin1")
        structure, weights, biases = model[0], model[1], model[2]
        network = EluFixedValue(structure, weights, biases)
        test_error = model[3]

        user_input =  msg.input(
            "Model found with test error :  " + str(test_error) + \
            ". Do you want to continue to train it? (y for yes)"
        )

        if user_input.upper() != "Y":
            msg.info("Creating new network", 2)
            model = None

    except:
        model = None
        
    if model is None:
        dim_triu = int(DIM * (DIM + 1) / 2)
        structure = [18, int(dim_triu * 0.75), int(dim_triu * 0.5), dim_triu, dim_triu]
        test_error = 1e10


    msg.info("Train ... ", 2)
    
    network = EluTrNNN(structure)

    train_network(dataset, network, save_path, test_error)

    
    msg.info("All done. Bye bye..", 2)
示例#24
0
def main(data_folder="cc2ai/", index_file=None):

    data_folder += MOLECULE + "/"

    msg.info("Fetching molecules", 2)
    molecules = list(fetch_molecules(data_folder))

    if index_file is None:
        index = np.arange(len(molecules))
        np.random.shuffle(index)
    else:
        index = np.load(index_file)

    molecules = [molecules[i] for i in index]

    msg.info("Starting SCF Calculation", 2)
    S, P, F = scf_runs(molecules)

    msg.info("Exporting Results", 2)
    msg.info("Index ...", 1)
    np.save(data_folder + "index.npy", index)

    msg.info("Exporting Results", 2)
    msg.info("S & P ...", 1)
    np.save(data_folder + "S.npy", S)
    np.save(data_folder + "P.npy", P)
    np.save(data_folder + "F.npy", F)
    msg.info("Molecules ...", 1)
    np.save(data_folder + "molecules_" + MOLECULE + "_" + BASIS + ".npy",
            molecules)

    msg.info("All Done. ", 2)
示例#25
0
import numpy as np
import matplotlib.pyplot as plt

from butadien.load_data import load_data
from pyscf.scf import hf

from SCFInitialGuess.utilities.usermessages import Messenger as msg
from SCFInitialGuess.utilities.dataset import Dataset, Molecule
from SCFInitialGuess.nn.networks import EluTrNNN
from SCFInitialGuess.nn.training import train_network

dim = 26
model_save_path = "butadien/model.npy"
source = "butadien/data"

msg.info("Welcome", 2)

#--- train network ---
msg.info("Training the network", 2)
dataset = Dataset(*load_data(source))

structure = [dim**2, 200, 100, dim**2]

network, sess = train_network(EluTrNNN(structure),
                              dataset,
                              evaluation_period=100,
                              mini_batch_size=20,
                              convergence_threshold=1e-6)

msg.info("Exporting model", 2)
network.export(sess, model_save_path)
示例#26
0
    def train(
            self,
            dataset,
            network_save_path,
            comment=None,
            old_error=1e10,
            evaluation_period=2000,
            mini_batch_size=40
        ):
        """Similaraly to the train function in the superclass, the function will
        start the training. However it will continue to train until the user
        aborts it. It will be exported after evaluation_period training 
        steps if a new minumim of error on the validation training set is reached.
        """


        with self.graph.as_default():
            sess = tf.Session(graph=self.graph)

            if self.training_step is None:
                self.setup()

            #--- train the network ---

            sess.run(tf.global_variables_initializer())

            msg.info("Starting network training ...", 1)        
            
            #Training will run until user aborts it.
            while True:


                #--- do training ---
                for step in range(evaluation_period):
                    mini_batch = dataset.sample_minibatch(mini_batch_size)

                    sess.run(
                        self.training_step, 
                        feed_dict={
                            self.input_placeholder: mini_batch[0], 
                            self.target_placeholder: mini_batch[1]
                        }
                    )
                #---
                

                #--- evaluation ---
                # calculate validation errors ...
                error = sess.run(
                    self.error,
                    feed_dict={
                        self.input_placeholder: dataset.validation[0], 
                        self.target_placeholder: dataset.validation[1]
                    }
                )

                # ... and costs.
                cost = sess.run(
                    self.cost,
                    feed_dict={
                        self.input_placeholder: dataset.validation[0], 
                        self.target_placeholder: dataset.validation[1]
                    }
                )
                

                # Check for new validation error minimum
                diff = error - old_error
                
                # if a new minimum was found notify user 
                # and save the model.
                if diff < 0:
                    message = (
                        "New Minimum found! Val. Cost: {:0.1E}. " + \
                        "Error: {:0.3E}. Diff: {:0.1E}"
                        ).format(cost, error, diff)
                    msg.info(message)

                    # export network
                    self.network.export(sess, network_save_path, error, comment)

                    # store new minimum
                    old_error = error
def main(data_folder="butadien/data/", index_file=None):

    msg.info("Fetching molecules", 2)
    molecules = list(fetch_molecules(data_folder + "MDRuns/results"))

    if index_file is None:
        index = np.arange(len(molecules))
        np.random.shuffle(index)
    else:
        index = np.load(index_file)

    molecules = [molecules[i] for i in index]

    msg.info("Starting SCF Calculation", 2)
    S, P, F = do_scf_runs(molecules)

    msg.info("Exporting Results", 2)
    msg.info("Index ...", 1)
    np.save(data_folder + "index_Large.npy", index)

    msg.info("S & P ...", 1)
    np.save(data_folder + "S_Large.npy", np.array(S).reshape(-1, dim**2))
    np.save(data_folder + "P_Large.npy", np.array(P).reshape(-1, dim**2))
    np.save(data_folder + "F_Large.npy", np.array(F).reshape(-1, dim**2))

    msg.info("Molecules ...", 1)
    np.save(data_folder + "molecules_Large.npy", molecules)

    msg.info("All Done. ", 2)
示例#28
0
def train_network(
    network,
    dataset,
    sess=None,
    learning_rate=0.001,
    regularisation_parameter=0.01,
    max_steps=100000,
    evaluation_period=200,
    mini_batch_size=0.2,
    convergence_threshold=1e-5,
    summary_save_path=None
    ):
    """Train a neural Neutwork from nn.networks with the AdamOptimizer,
    to minimize the mean squared error with l2 regularisation.

    Args:
        - network <nn.networks.AbstractNeuralNetwork>: the network to be trained.
        - dataset <utilities.dataset.Dataset>: the dataset to train the net on.
        - learning_rate <float>: the learning rate to use for training w/
        AdamOptimizer
        - regularisation_parameter <float>: the factor with which the 
        regularisation is added to the total cost.
        - max_steps <int>: max number of learning steps to take if convergence 
        not met before.
        - evaluation_period <int>: period of training steps after which there
        will be a check for convergence.
        mini_batch_size <int>: size of the minibatch that is randomly sampled 
        from the training dataset in every training step.
        - convergence_threshold <float>: training convergence is reached if 
        difference in error drops below this value.
        - summary_save_path <str>: the full path to a folder in which the 
        tensorboard data will be written. If None given nothing will be exported.

    Returns:
        - the trained network
        - the session
    """

    if sess is None:
        sess = tf.Session()

    #--- set up the graph ---
    msg.info("Setting up the graph ...", 1)
    network_output = network.setup()
    x = network.input_tensor
    y = tf.placeholder(
            dtype="float32", 
            shape=[None, network.structure[-1]],
            name="y"
        )


    # cost is mse w/ l2 regularisation
    cost, mse, _ = mse_with_l2_regularisation(
        network,
        expectation_tensor=y,
        regularisation_parameter=regularisation_parameter
    )

    #optimizer and training
    with tf.name_scope("training"):
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        train_step = optimizer.minimize(cost) 
    #---

    #--- prep the writer ---
    if not summary_save_path is None:
        msg.warn("Careful! If more than 1 network is in current graph, " + \
            "it should be cleared before merging the summary!"
        )
        summary = tf.summary.merge_all()
        writer = tf.summary.FileWriter(summary_save_path)
        writer.add_graph(sess.graph)
    #---

    #--- train the network ---
    msg.info("Starting network training ...", 1)
    old_error = 1e10

    sess.run(tf.global_variables_initializer())

    for step in range(max_steps):
        mini_batch = dataset.sample_minibatch(mini_batch_size)

        if step % np.ceil(evaluation_period / 10) == 0:
            if not summary_save_path is None:
                writer.add_summary(
                    sess.run(
                        summary, 
                        feed_dict={
                            x: mini_batch[0], 
                            y: mini_batch[1]
                        }
                    ), 
                    step
                )

        if step % evaluation_period == 0:
            error = sess.run(
                mse,
                feed_dict={x: dataset.validation[0], y: dataset.validation[1]}
            )

            # compare to previous error
            diff = np.abs(error - old_error)

            # convergence check
            if diff < convergence_threshold:
                msg.info(
                    "Convergence reached after " + str(step) + " steps.", 
                    1
                )

                break
            else:
                msg.info(
                    "Validation cost: {:0.5E}. Diff to prev.: {:0.1E}".format(
                        error,
                        diff
                    )
                )

                old_error = error
            

        # do training step
        sess.run(train_step, feed_dict={x: mini_batch[0], y: mini_batch[1]})
    #---

    if not summary_save_path is None:
        writer.close()

    test_error = sess.run(
        mse,
        feed_dict={x: dataset.testing[0], y: dataset.testing[1]}
    )
    msg.info("Test error: {:0.5E}".format(test_error), 1)


    return network, sess
示例#29
0
def network_benchmark(
        models, 
        dataset, 
        logdir, 
        steps_report=250,
        max_training_steps=100000,
        convergence_eps=1e-7
    ):

    for model in models:

        msg.info("Investigating model " + str(model), 2)

        save_path = join(logdir, str(model))
        
        # make new session and build graph
        tf.reset_default_graph()
        sess = tf.Session()

        dim_in = model.network.structure[0]
        dim_out = model.network.structure[-1]

        f = model.network.setup()
        x = model.input_tensor
        y = tf.placeholder(tf.float32, shape=[None, dim_out])
        

        with tf.name_scope("loss"):
            error = tf.losses.mean_squared_error(y, f) / dim_out # sum_i (f8(x_i) - y_i)^2
            weight_decay = tf.contrib.layers.apply_regularization(
                tf.contrib.layers.l2_regularizer(0.001),
                model.network.weights
            )
            loss = error +  weight_decay

            tf.summary.scalar("weight_decay", weight_decay)
            tf.summary.scalar("error_per_element", error)
            tf.summary.scalar("total_loss", loss)

        # define loss
        with tf.name_scope("train"):
            train_step = model.optimizer.minimize(loss)

        summary = tf.summary.merge_all()
        #saver = tf.train.Saver()
        writer = tf.summary.FileWriter(save_path)
        writer.add_graph(sess.graph)

        msg.info("Start training ... ", 1)
        old_error = 1e13

        sess.run(tf.global_variables_initializer())

        for step in range(max_training_steps):
            batch = dataset.sample_minibatch(0.2) 

            # log progress
            if step % 50 == 0:
                writer.add_summary(sess.run(
                    summary, 
                    feed_dict={x: batch[0], y: batch[1]}
                ), step)

            # save graph and report error
            if step % steps_report == 0:
                validation_error = sess.run(
                    error, 
                    feed_dict={x: dataset.validation[0], y: dataset.validation[1]}
                ) / dim_out
                #saver.save(sess, log_dir, step)

                diff = np.abs(old_error - validation_error)
                msg.info("Error: {:0.4E}. Diff to before: {:0.4E}".format(
                    validation_error,
                    diff
                ))
                if diff < convergence_eps:
                    msg.info(
                        "Convergence reached after " + str(step) + " steps.", 1
                    )
                    break
                else:
                    old_error = validation_error
            
            if step + 1 == max_training_steps:
                msg.info("Max iterations exceeded.", 1)

            sess.run(train_step, feed_dict={x: batch[0], y: batch[1]})
            
        test_error = sess.run(
            error, 
            feed_dict={x: dataset.validation[0], y: dataset.validation[1]}
        ) / dim_out
        msg.info("Test error: {:0.1E}".format(test_error))
示例#30
0
    def setup(self, target_graph=None):

        if target_graph is None:
            msg.info("No target graph specified for Trainer setup. " + \
                "Creating new graph ...", 1)
            self.graph = tf.Graph()
        else:
            msg.info("Appending to graph: " + str(target_graph))
            self.graph = target_graph

        
        with self.graph.as_default():
            
            msg.info("Setting up the training in the target graph ...", 1)

            # placeholder for dataset target-values
            self.target_placeholder = tf.placeholder(
                dtype="float32", 
                shape=[None, self.network.structure[-1]],
                name="y"
            )

            msg.info("network ...", 1)
            with tf.name_scope("network/"):
                network_output = self.network.setup()
                self.input_placeholder = self.network.input_tensor

            msg.info("error function ...", 1)
            with tf.name_scope("error_function/"):
                self.error = self.error_function.function(
                    self.network, 
                    self.target_placeholder
                )
                
            msg.info("cost function ...", 1)
            with tf.name_scope("cost_function/"):
                self.cost = self.cost_function.function(
                    self.network, 
                    self.target_placeholder
            )

            msg.info("training step", 1)
            with tf.name_scope("training/"):
                self.training_step = self.optimizer.minimize(self.cost)

        return self.graph, self.network, self.target_placeholder