Exemplo n.º 1
0
 def test_heavy(self):
     heavy = get_preset_config("heavy")
     for k in KEYSET:
         self.assertTrue(k in heavy.keys())
     MatPipe(**heavy)
Exemplo n.º 2
0
 def test_caching_powerup(self):
     cache_src = "./somefile.json"
     prod = get_preset_config("production", cache_src=cache_src)
     self.assertEqual(prod[AF_KEY].cache_src, cache_src)
     MatPipe(**prod)
Exemplo n.º 3
0
 def test_express(self):
     express = get_preset_config("express")
     for k in KEYSET:
         self.assertTrue(k in express.keys())
     MatPipe(**express)
Exemplo n.º 4
0
 def test_express_single(self):
     express_single = get_preset_config("express_single")
     for k in KEYSET:
         self.assertTrue(k in express_single.keys())
     MatPipe(**express_single)
Exemplo n.º 5
0
 def test_debug(self):
     debug = get_preset_config("debug")
     for k in KEYSET:
         self.assertTrue(k in debug.keys())
     MatPipe(**debug)
Exemplo n.º 6
0
 def test_debug_single(self):
     debug_single = get_preset_config("debug_single")
     for k in KEYSET:
         self.assertTrue(k in debug_single.keys())
     MatPipe(**debug_single)
from automatminer import MatPipe
from sklearn.model_selection import train_test_split
import sys
import pandas as pd
import numpy as np
import os
#a=sys.argv[1]
i = {'composition': [sys.argv[1]]}
rpath = sys.argv[2]
df = pd.DataFrame(i)
filename = 'D:/FYP_files/Machine_learning/pipeline/p_files/MatPipe_predict_thirdelongation_from_composition.p'
#MatPipe_predict_thirdelongation_from_composition.p
#MatPipe_predict_Ultimate_fourthtime_from_composition.p
pipe = MatPipe.load(filename)
if __name__ == '__main__':
    df = pipe.predict(df)
df.to_csv('%s/elongation.csv' % rpath)
Exemplo n.º 8
0
 def test_production(self):
     prod = get_preset_config("production")
     for k in KEYSET:
         self.assertTrue(k in prod.keys())
     MatPipe(**prod)
importances = rf.feature_importances_
# included = np.asarray(included)
included = X.columns.values
indices = np.argsort(importances)[::-1]

pf = PlotlyFig(y_title='Importance (%)',
               title='Feature by importances',
               filename='E:/importances.html',
               fontsize=20,
               ticksize=15)

pf.bar(x=included[indices][0:10], y=importances[indices][0:10])
#----------------------------------------------------------------
#----------------------------------------------------------------
#----------------------------------------------------------------
pipe = MatPipe.from_preset("express")#the heavy can change to express or light, judge on how exactly the data you want to get
pipe.fit(train_df, target)#this will take a long time
prediction_df = pipe.predict(prediction_df)
prediction_df.to_csv('C:/Users/DELL/Documents/predictionK_VRH.csv')
from sklearn.metrics import mean_absolute_error
from sklearn.dummy import DummyRegressor
# fit the dummy
dr = DummyRegressor()
dr.fit(train_df["composition"], train_df[target])
dummy_test = dr.predict(test_df["composition"])
# Score dummy and MatPipe
true = test_df[target]
matpipe_test = prediction_df[target + " predicted"]
mae_matpipe = mean_absolute_error(true, matpipe_test)
mae_dummy = mean_absolute_error(true, dummy_test)
print("K_VRH Dummy MAE: {} ".format(mae_dummy))
Exemplo n.º 10
0
 def test_n_jobs_powerup(self):
     n_jobs = 1
     prod = get_preset_config("production", n_jobs=n_jobs)
     self.assertEqual(prod[AF_KEY].n_jobs, n_jobs)
     self.assertEqual(prod[ML_KEY].tpot_kwargs["n_jobs"], n_jobs)
     MatPipe(**prod)
Exemplo n.º 11
0
df = pd.DataFrame(columns=['structure', 'K_VRH'])
df['structure'] = centro_structs
df['K_VRH'] = K_VRH

df = df.dropna()
df.to_csv('centro_elastic.csv')
print(df.describe())

train_df, test_df = train_test_split(df,
                                     test_size=0.1,
                                     shuffle=True,
                                     random_state=1)
target = "K_VRH"
prediction_df = test_df.drop(columns=[target])

pipe = MatPipe.from_preset("express")
pipe.fit(train_df, target)

prediction_df = pipe.predict(prediction_df)

# fit the dummy
dr = DummyRegressor()
dr.fit(train_df["structure"], train_df[target])
dummy_test = dr.predict(test_df["structure"])

# Score dummy and MatPipe
true = test_df[target]
matpipe_test = prediction_df[target + " predicted"]

mae_matpipe = mean_absolute_error(true, matpipe_test)
mse_matpipe = mean_squared_error(true, matpipe_test)
Exemplo n.º 12
0
df['Mh'] = mh  #df['diel']*df['K_VRH']
df = df.replace([np.inf, -np.inf], np.nan)
df = df.dropna()
df.to_csv('Mh_test.csv')
print(df.describe())

target = 'Mh'
train_df, test_df = train_test_split(df,
                                     test_size=0.1,
                                     shuffle=True,
                                     random_state=1)
prediction_df = test_df.drop(target)  #['Mh','K_VRH','diel'],axis=1)
print(prediction_df.columns)

from automatminer import MatPipe
pipe = MatPipe.from_preset("debug", n_jobs=28)  #,cache_src='Mh_cache.json')
pipe.fit(train_df, target)

prediction_df = pipe.predict(prediction_df)

from sklearn.metrics import mean_absolute_error
from sklearn.dummy import DummyRegressor

# fit the dummy
dr = DummyRegressor()
dr.fit(train_df["structure"], train_df[target])
dummy_test = dr.predict(test_df["structure"])

# Score dummy and MatPipe
true = test_df[target]
matpipe_test = prediction_df[target + " predicted"]
Exemplo n.º 13
0
)
pipe_config = {
    "learner":
    learner,
    "reducer":
    FeatureReducer(reducers=[]),
    "cleaner":
    DataCleaner(feature_na_method="mean",
                max_na_frac=0.01,
                na_method_fit="drop",
                na_method_transform="mean"),
    "autofeaturizer":
    AutoFeaturizer(n_jobs=10, preset="debug"),
}

pipe = MatPipe(**pipe_config)

mb = MatbenchBenchmark(autoload=False)

for task in mb.tasks:
    task.load()
    for fold in task.folds:

        df_train = task.get_train_and_val_data(fold, as_type="df")

        # Fit the RF with matpipe
        pipe.fit(df_train, task.metadata.target)

        df_test = task.get_test_data(fold, include_target=False, as_type="df")
        predictions = pipe.predict(
            df_test)[f"{task.metadata.target} predicted"]
Exemplo n.º 14
0
df = pd.DataFrame(columns=['structure', 'dielectric'])
df['structure'] = centro_structs
df['dielectric'] = diel

df = df.dropna()
df.to_csv('centro_diel.csv')
print(df.describe())

train_df, test_df = train_test_split(df,
                                     test_size=0.1,
                                     shuffle=True,
                                     random_state=1)
target = "dielectric"
prediction_df = test_df.drop(columns=[target])

pipe = MatPipe.from_preset("express", n_jobs=28, cache_src="cache_diel.json")
pipe.fit(train_df, target)

prediction_df = pipe.predict(prediction_df)

# fit the dummy
dr = DummyRegressor()
dr.fit(train_df["structure"], train_df[target])
dummy_test = dr.predict(test_df["structure"])

# Score dummy and MatPipe
true = test_df[target]
matpipe_test = prediction_df[target + " predicted"]

mae_matpipe = mean_absolute_error(true, matpipe_test)
mse_matpipe = mean_squared_error(true, matpipe_test)
Exemplo n.º 15
0
# The most basic usage of automatminer requires interacting with only one class,
# MatPipe. This class, once fit, is a complete pipeline, and is able to
# transform compositions, structures, bandstructures, and DOS into property
# predictions.

# A configured MatPipe object will featurize, clean, and learn on a dataset
# automatically, and it made of 4 classes: AutoFeaturizer, DataCleaner,
# FeatureReducer, and an ML adaptor (e.g., TPOTAdaptor). The exact operations
# MatPipe executes are based entirely on how these 4 classes are configured.

# The easiest way to get started is by passing in a preset configuration to
# MatPipe. We can do this with the get_preset_config function; here, we'll use
# the "express" config, which will provide decent results in a reasonable time
# frame (an hour or two).
pipe = MatPipe(**get_preset_config("express"))

# Let's download an example dataset and try predicting bulk moduli.
from sklearn.model_selection import train_test_split
from matminer.datasets.dataset_retrieval import load_dataset
df = load_dataset("elastic_tensor_2015")[["structure", "K_VRH"]]
train, test = train_test_split(df,
                               shuffle=True,
                               random_state=20190301,
                               test_size=0.2)
test_true = test['K_VRH']
test = test.drop(columns=["K_VRH"])

# MatPipe uses an sklearn-esque BaseEstimator API for fitting pipelines and
# predicting properties. Fitting a pipe trains it to the input data; predicting
# with a pipe will output predictions.