Exemplo n.º 1
0
class data_manipulation_tests(unittest.TestCase):
    data = ['cpt']
    data2 = ['cpt', 'density', 'viscosity']
    data_ranges = [[200, 1000], [900, 1300], [0, 2]]
    T = [298.1, 298.16]
    P = [101, 102]
    devmodel1 = salty.aggregate_data(data2, T=T, P=P, impute=True,
                                     data_ranges=data_ranges,
                                     scale_center=False)
    devmodel = salty.aggregate_data(data2, T=T, P=P, impute=True,
                                    data_ranges=data_ranges)

    def test_1_aggregate_data(self):
        devmodel = salty.aggregate_data(self.data, T=self.T, P=self.P)
        return devmodel

    def test_2_devmodel_to_array(self):
        X_train, Y_train, X_test, Y_test = salty.devmodel_to_array(
            self.devmodel, train_fraction=0.8)
        return X_train, Y_train, X_test, Y_test

    def test_3_merge_duplicates(self):
        data = salty.merge_duplicates(self.devmodel, keep_descriptors=True)
        return data

    def test_4_assign_category(self):
        data = salty.assign_category(self.devmodel1.Data)
        return data

    def test_benchmark(self):
        salty.Benchmark.run(self.test_1_aggregate_data)
        salty.Benchmark.run(self.test_2_devmodel_to_array)
        salty.Benchmark.run(self.test_3_merge_duplicates)
        salty.Benchmark.run(self.test_4_assign_category)
Exemplo n.º 2
0
 def test_gaussian_pdf(self):
     # grab experimental data
     T = [297, 316]  # select narrow state variable ranges
     P = [99, 102]  # we will set MD simulation to 101 kPa and 298 K
     cpt = [[207, 3000]]
     exp_data = ["cpt"]
     cpt_data = salty.aggregate_data(exp_data, T=T, P=P, data_ranges=cpt)
     exp_data = ["density"]
     dens_data = salty.aggregate_data(exp_data, T=T, P=P)
     # calc KDEs from experimental data
     adl.gaussian_pdf(
         cpt_data.Data["Heat capacity at constant pressure, J/K/mol"])
     adl.gaussian_pdf(dens_data.Data["Specific density, kg/m<SUP>3</SUP>"])
Exemplo n.º 3
0
    def test_calculate_minimum_distances(self):

        T = [298.1, 298.16]  # select narrow state variable ranges
        P = [101, 102]  # we will set MD simulation to 101 kPa and 298 K
        exp_data = ["cpt", "density"]
        data = salty.aggregate_data(exp_data, T=T, P=P)
        merged = salty.merge_duplicates(data)
        hull = merged.iloc[:, 2:4]
        adl.calculate_minimum_distances(hull, 1200, 600)
Exemplo n.º 4
0
class visualization_library_tests(unittest.TestCase):
    data = ['cpt', 'density']
    T = [298.1, 298.16]
    P = [101, 102]
    devmodel = salty.aggregate_data(data, T=T, P=P)
    X_train, Y_train, X_test, Y_test = \
        salty.devmodel_to_array(devmodel, train_fraction=0.8)
    model = Sequential()
    model.add(Dense(75, activation='relu', input_dim=X_train.shape[1]))
    model.add(Dropout(0.25))
    model.add(Dense(Y_train.shape[1], activation='linear'))
    model.compile(optimizer="adam", loss="mean_squared_error", metrics=['mse'])
    model.fit(X_train, Y_train, epochs=1, verbose=False)

    def test_1_parity_plot(self):
        plot = vis.parity_plot(self.X_test, self.Y_test, self.model,
                               self.devmodel)
        return plot

    def test_benchmark(self):
        salty.Benchmark.run(self.test_1_parity_plot)
Exemplo n.º 5
0
from gains.salt_generator import generate_solvent
import salty
from random import randint

model_ID = ["cpt", "density"]
T = [298.1, 298.16]
P = [101, 102]
exp_data = ["cpt", "density"]
data = salty.aggregate_data(exp_data, T=T, P=P)
merged = salty.merge_duplicates(data)
to_hull = merged.iloc[:, 2:4]
target = [0, 0]
simplex_id = 1
token_id = randint(1000, 9999)

generate_solvent(target,
                 model_ID,
                 heavy_atom_limit=20,
                 sim_bounds=[0.55, 1],
                 hits=10,
                 write_file=False,
                 hull=to_hull,
                 simplex=simplex_id,
                 exp_data=data,
                 verbose=0,
                 gen_token=token_id,
                 hull_bounds=[0, .1],
                 inner_search=False,
                 parent_cap=1,
                 mutation_cap=1000)
Exemplo n.º 6
0
 def test_1_aggregate_data(self):
     devmodel = salty.aggregate_data(self.data, T=self.T, P=self.P)
     return devmodel
Exemplo n.º 7
0
def build_model_from_md(df,
                        property_to_model,
                        temperature=[298.1, 299],
                        pressure=[101, 102],
                        output_ranges=[[200, 3000]],
                        md_temperature=298.15,
                        md_pressure=101.325):
    """
    creates new qspr models using md data

    Parameters
    ----------
    df : pandas DataFrame
        salt_log data from the genetic algorithm. Contains
        the headers 'Salt Smiles' and 'MD Calculation'. Current
        support is only for cpt and density
    property_to_model : str
        current support is for 'cpt' or 'density'
    temperature : array, optional
        temperature bounds on experimental data to add. Default
        297, 316 K
    pressure : array, optional
        pressure bounds on experimental data to add. Default
        99, 102 kpa
    output_ranges : array, optional
        property bounds on experimental data to add. Default
        200, 3000 (kg/m3 or kj/molK)
    md_temperature : float, optional
        temperature used to generate the md data. Default
        298.15 K
    md_pressure : float, optional
        pressure used to generate the md data. Dfault
        101.325 kPa

    Returns
    -------
    newmodel : salt dev_model object
    new_MD_data_index : int
        start index of the newly incorporated MD data

    Summary
    -------
    Create 4 lists from df: cation/anion smiles, cpt, density
    Nans will be used for cation/anion name in the newmodel
    output
    """

    cpt = []
    density = []
    cation_smi = []
    anion_smi = []
    for i in range(df.shape[0]):
        calculation = df["MD Calculation"][i]
        cpt.append(re.findall("\d+\.\d+", calculation)[0])
        density.append(re.findall("\d+\.\d+", calculation)[1])
        cation_smi.append(df['Salt Smiles'][i].split(".")[0])
        anion_smi.append(df['Salt Smiles'][i].split(".")[1])

    module_path = dirname(__file__)
    data = df
    n = data.shape[0]
    f = open(join(module_path, 'data', 'Deslist'), 'r')
    Deslist = []
    for line in f:
        Deslist.append(line.strip('\n\t'))
    calc = Calculator(Deslist)
    D = len(Deslist)
    d = len(Deslist) * 2 + 8
    X = np.zeros((n, d))
    X[:, -8] = md_temperature
    X[:, -7] = md_pressure
    for i in range(n):
        cation = Chem.MolFromSmiles(cation_smi[i])
        anion = Chem.MolFromSmiles(anion_smi[i])
        X[i][:D] = calc.CalcDescriptors(cation)
        X[i][D:2 * D] = calc.CalcDescriptors(anion)
    X[:, -5] = density
    X[:, -6] = cpt
    cols_cat = [s + "-cation" for s in Deslist]
    cols_ani = [s + "-anion" for s in Deslist]
    cols = cols_cat + cols_ani + [
        "Temperature, K", "Pressure, kPa",
        "Heat capacity at constant pressure,"
        "J/K/mol", "Specific density, kg/m<SUP>3</SUP>", "name-anion",
        "smiles-anion", "name-cation", "smiles-cation"
    ]
    X = pd.DataFrame(X, columns=cols)
    X.iloc[:, -4] = np.nan
    X.iloc[:, -2] = np.nan
    X.iloc[:, -3] = anion_smi
    X.iloc[:, -1] = cation_smi  # X is the df with the new simulation data
    new_MD_data_index = X.shape[0]  # plot new predictions after re-training

    devmodel = salty.aggregate_data(property_to_model,
                                    T=temperature,
                                    P=pressure,
                                    data_ranges=output_ranges,
                                    scale_center=False)
    cols = devmodel.Data.columns
    new_data = pd.concat([devmodel.Data, X])  # have to sort in future version

    if property_to_model == ['density']:
        prop = "Specific density, kg/m<SUP>3</SUP>"
        to_drop = "Heat capacity at constant pressure, J/K/mol"
    elif property_to_model == ['cpt']:
        to_drop = "Specific density, kg/m<SUP>3</SUP>"
        prop = "Heat capacity at constant pressure, J/K/mol"
    elif property_to_model == ["cpt", "density"]:
        prop = [
            "Heat capacity at constant pressure, J/K/mol",
            "Specific density, kg/m<SUP>3</SUP>"
        ]

    if property_to_model != ["cpt", "density"]:
        new_data.drop(columns=[to_drop], inplace=True)

    new_data = new_data[cols]
    new_data.reset_index(inplace=True, drop=True)

    if property_to_model == ["cpt", "density"]:
        exp_data = [prop[0], prop[1], "Temperature, K", "Pressure, kPa"]
    else:
        exp_data = [prop, "Temperature, K", "Pressure, kPa"]

    merged = new_data
    unique_salts = merged["smiles-cation"] + merged["smiles-anion"]
    unique_cations = repr(merged["smiles-cation"].unique())
    unique_anions = repr(merged["smiles-anion"].unique())
    actual_data_ranges = []
    for i in range(len(exp_data)):
        actual_data_ranges.append("{} - {}".format(
            str(merged[exp_data[i]].min()), str(merged[exp_data[i]].max())))
    a = np.array([
        len(unique_salts.unique()), unique_cations, unique_anions,
        len(unique_salts)
    ])
    a = np.concatenate((a, actual_data_ranges))
    cols1 = ["Unique salts", "Cations", "Anions", "Total datapoints"]
    cols = cols1 + exp_data
    data_summary = pd.DataFrame(a, cols)
    merged = new_data
    metaDf = merged.select_dtypes(include=["object"])
    dataDf = merged.select_dtypes(include=[np.number])
    cols = dataDf.columns.tolist()
    instance = StandardScaler()
    for i in range(1, len(property_to_model) + 1):
        dataDf.iloc[:, -i] = dataDf.iloc[:, -i].apply(lambda x: log(float(x)))

    scaled_data = pd.DataFrame(instance.fit_transform(
        dataDf.iloc[:, :-len(property_to_model)]),
                               columns=cols[:-len(property_to_model)])
    df = pd.concat(
        [scaled_data, dataDf.iloc[:, -len(property_to_model):], metaDf],
        axis=1)  # may have to sort in future version
    mean_std_of_coeffs = pd.DataFrame([instance.mean_, instance.scale_],
                                      columns=cols[:-len(property_to_model)])
    new_model = salty.dev_model(mean_std_of_coeffs, data_summary, df)
    print(new_model.Data_summary)
    return new_model, new_MD_data_index
Exemplo n.º 8
0
import gains as genetic
from gains.salt_generator import generate_solvent
import salty

model_ID = ["cpt", "density"]
T = [298.1, 298.16]  #select narrow state variable ranges
P = [101, 102]  #we will set MD simulation to 101 kPa and 298 K
data = salty.aggregate_data(model_ID, T=T, P=P)
merged = salty.merge_duplicates(data)
to_hull = merged.iloc[:, 2:4]

target = [800, 1200]
generate_solvent(target,
                 model_ID,
                 heavy_atom_limit=21,
                 sim_bounds=[0, 1],
                 hits=62,
                 write_file=True,
                 hull=to_hull)