def make_pred_data_file_from(df): tc = 0.0 yx = [] zatom2 = 1 # atomic number Z : 1 atom2 = periodic_table.get_el_sp(zatom2) # atom2 : H for zatom1 in trange(3, 5, desc='1st loop'): # for zatom1 in trange(3, 86, desc='1st loop'): atom1 = periodic_table.get_el_sp(zatom1) if (not atom1.is_noble_gas): for natom1 in trange(1, 11, desc='2nd loop'): for natom2 in trange(1, 11, desc='3rd loop'): for p in trange(50, 550, 50, desc='4th loop'): str_mat = str(atom1) + str(natom1) + str(atom2) + str( natom2) material = Composition(str_mat) temp = [material.reduced_formula, tc, float(p)] temp.extend(get_parameters(material.reduced_formula)) yx.append(temp[:]) properties = df.columns.values df_test = pd.DataFrame(yx, columns=properties) # material.reduced_formulaにより重複行が発生したため、drop_duplicatesで削除する df_test = df_test.drop_duplicates() df_test.to_csv(pred_dat_f) return df_test
df['Tc'] = df[' Tc [K]'].apply(float) df['P'] = df[' P [GPa]'].apply(float) df['list'] = df['formula'].apply(get_parameters) df['formula'] = df['formula'].apply(lambda x: x.strip()) for i in range(len(get_parameters('H3S'))): name = 'prm' + str(i) df[name] = df['list'].apply(lambda x: x[i]) df = df.drop([' Tc [K]', ' P [GPa]', 'list'], axis=1) df.to_csv("tc_train.csv") print(df.head(5)) tc = 0.0 yx = [] zatom2 = 1 atom2 = periodic_table.get_el_sp(zatom2) for zatom1 in range(3, 10): atom1 = periodic_table.get_el_sp(zatom1) if (not atom1.is_noble_gas): for natom1 in range(1, 6): for natom2 in range(1, 6): for ip in range(100, 500, 100): str_mat = str(atom1) + str(natom1) + str(atom2) + str( natom2) material = Composition(str_mat) temp = [material.reduced_formula, tc, float(ip)] temp.extend(get_parameters(material.reduced_formula)) yx.append(temp[:]) properties = df.columns.values df_test = pd.DataFrame(yx, columns=properties)
# visualize fig = yyplot(y_train, y_pred) #%% # Novelty detection by One Class SVM with optimized hyperparameter clf = OneClassSVM(nu=0.003, kernel=gscv.best_params_['model__kernel'], gamma=gscv.best_params_['model__gamma']) clf.fit(X_train) y_pred = gscv.predict(X_test) # predicted y reliability = clf.predict(X_test) # outliers = -1 data = [] output = 'test2.csv' for i in range(len(X_test)): satom1 = periodic_table.get_el_sp(int(X_test[i][0])) satom2 = periodic_table.get_el_sp(int(X_test[i][1])) natom1 = int(X_test[i][2]) natom2 = int(X_test[i][3]) str_mat = str(satom1) + str(natom1) + str(satom2) + str(natom2) formula = Composition(str_mat).reduced_formula temp = (formula, int(X_test[i][4]), int(y_pred[i]), reliability[i]) data.append(temp) properties=['formula','P', 'Tc', 'AD'] df = pd.DataFrame(data, columns=properties) df.sort_values('Tc', ascending=False, inplace=True) df.to_csv(output, index=False) #df_in_ = df[df.AD == 1] #df_in_.to_csv(output, index=False) print('Predicted Tc is written in file {}'.format(output))
features.append(tc) features.extend(atomicNo) features.extend(natom) features.append(pressure) yx.append(features[:]) yx = np.array(yx) y_train = yx[:, 0] X_train = yx[:, 1:] ltest = False if (ltest): X = X_train print(X[0][:4]) atom1 = periodic_table.get_el_sp(X[0][0]) atom2 = periodic_table.get_el_sp(X[0][1]) str_mat = str(atom1) + str(X[0][2]) + str(atom2) + str(X[0][3]) material = Composition(str_mat) print(type(material.reduced_formula)) for i in range(1, 2): print(periodic_table.get_el_sp(i)) properties = ['Tc', 'Z1', 'Z2', 'N1', 'N2', 'P'] df = pd.DataFrame(yx, columns=properties) df.to_csv("tc_train.csv") tc = 0.0 yx = [] zatom2 = 1 atom2 = periodic_table.get_el_sp(zatom2)