Python Featurize примеры использования

Язык программирования: Python

Пространство имен/Пакет: automatminer.featurize

Класс/Тип: Featurize

Примеров на hotexamples.com: 4

Python Featurize - 4 примера найдено. Это лучшие примеры Python кода для automatminer.featurize.Featurize, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Featurize(4)

featurize_formula(2)

auto_featurize(1)

Пример #1

Показать файл

Файл: tricky_target.py Проект: theiman112860/automatminer

    'CoulombMatrix', 'SineCoulombMatrix', 'OrbitalFieldMatrix',
    'MinimumRelativeDistances', 'ElectronicRadialDistributionFunction'
]
FEATUREIZE_THESE_COLUMNS = ["formula", "structure"]
MULTIINDEX = True
if MULTIINDEX:
    TARGET = ('Input Data', TARGET)

# actual pipeline:
df_init = load_castelli_perovskites()
if LIMIT and LIMIT < len(df_init):
    df_init = df_init.iloc[np.random.choice(len(df_init), LIMIT,
                                            replace=False)]

featzer = Featurize(ignore_cols=IGNORE_THESE_COLUMNS,
                    exclude=EXCLUDED_FEATURIZERS,
                    multiindex=MULTIINDEX,
                    drop_featurized_col=True)

df = featzer.auto_featurize(df_init,
                            input_cols=FEATUREIZE_THESE_COLUMNS,
                            guess_oxidstates=True)

prep = PreProcess(target=TARGET)
df = prep.preprocess(df)

X_train, X_test, y_train, y_test = train_test_split(df.drop(TARGET, axis=1),
                                                    df[TARGET])

print('start timing...')
start_time = time()
tpot = TPOTAutoML(mode=MODE,

Пример #2

Показать файл

Файл: feature_importance_regression.py Проект: theiman112860/automatminer

from automatminer.preprocess import PreProcess
from matminer import PlotlyFig
from scipy.stats import linregress
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

# inputs
target = 'gap expt'
RS = 24
mode = 'regression'
MULTIINDEX = True
if MULTIINDEX:
    target = ('Input Data', target)

df_init = load_expt_gap()
featzer = Featurize(exclude=['CohesiveEnergy', 'AtomicPackingEfficiency'],
                    multiindex=MULTIINDEX)

df = featzer.featurize_formula(df_init,
                               featurizers='all',
                               guess_oxidstates=False)

prep = PreProcess(target=target)
df = prep.preprocess(df)

print(df.head())
df.to_csv('test.csv')

X_train, X_test, y_train, y_test = train_test_split(df.drop(target, axis=1),
                                                    df[target])

model = RandomForestRegressor(n_estimators=100,

Пример #3

Показать файл

Файл: benchmark_autosklearn_gfa.py Проект: theiman112860/automatminer

model_tmp_path = r'example_data/matbench_data/autosklearn_output/tmp/'
model_output_path = r'example_data/matbench_data/autosklearn_output/output/'

feature_output_file = \
    os.path.join(feature_output_path,
                 "{}_all_featurized_data.csv".format(data_name))

if os.path.exists(feature_output_file):
    df = pd.read_csv(feature_output_file, index_col=0)
else:
    df_init = load_glass_ternary_landolt()

    prof = Profile()
    prof.enable()

    featzer = Featurize()
    df_feats = featzer.featurize_formula(df_init, featurizers="all")
    prep = PreProcess(max_colnull=0.1)
    df = prep.preprocess(df_feats)

    prof.create_stats()
    print("featurize time:\n")
    pstats.Stats(prof).strip_dirs().sort_stats("time").print_stats(5)

    if os.path.exists(feature_output_path):
        print("output path: {} exists!".format(feature_output_path))
    else:
        os.makedirs(feature_output_path)
        print("create output path: {} successful!".format(feature_output_path))

    prof.dump_stats(

Пример #4

Показать файл

Файл: benchmark_tpot_gfa.py Проект: theiman112860/automatminer

from automatminer.featurize import Featurize
from automatminer.preprocess import PreProcess
from sklearn.model_selection import train_test_split
from time import time

# user inputs
target = 'gfa'
RS = 29
timelimitmins = None
print('timelimitmins = ', timelimitmins)
model_type = 'classification'
scoring = 'f1'

# load and featurize:
df_init = load_glass_ternary_landolt()
featzer = Featurize(ignore_cols=['phase'], ignore_errors=True)

df_feats = featzer.featurize_formula(df_init, featurizers='all')

# preprocessing of the data
prep = PreProcess(max_colnull=0.1)
df = prep.preprocess(df_feats)
df.to_csv('{}_tpot_trained_data.csv'.format(target))
print(df.shape)
print(df.head())
assert df.isnull().sum().sum() == 0
# train/test split (development is within tpot crossvalidation)
X_train, X_test, y_train, y_test = \
    train_test_split(df.drop(target, axis=1).values,
                     df[target], train_size=0.75, test_size=0.25,
                     random_state=RS)