def to_csv(self, data_all): if self.store: if isinstance(self.store, str): path = self.store else: path = os.getcwd() file_new_name = "_".join((str(self.pop), str(self.gen), str(self.mutate_prob), str(self.mate_prob), str(time.time()))) try: st = Store(path) st.to_csv(data_all, file_new_name) print("store data to ", path, file_new_name) except (IOError, PermissionError): st = Store(os.getcwd()) st.to_csv(data_all, file_new_name) print("store data to ", os.getcwd(), file_new_name)
y_predict, marker='^', s=50, alpha=0.7, c='green', linewidths=None, edgecolors='blue') ax.plot(x, y_predict, '-', ms=5, lw=2, alpha=0.7, color='green') # ax.plot([min(x), max(x)], [min(x), max(x)], '--', ms=5, lw=2, alpha=0.7, color='black') plt.xlabel(strx) plt.legend((l1, l2), (stry1, stry2), loc='upper left') plt.ylabel(stry) plt.show() a = np.arange(2000, 2020) scatter2(a, y[::-1], y_[::-1], strx='year', stry="y($10^4$T)", stry1='y_true($10^4$T)', stry2='y_predict($10^4$T)') # #导出 print(x_frame.iloc[:, :].columns.values[ba.support_]) store.to_pkl_sk(ba.estimator_, "model") all_import["y_predict"] = y_ store.to_csv(all_import, "predict")
data225_import = data_import.iloc[np.where(data_import['group_number'] == 225)[0]] data221_import = data_import.iloc[np.where(data_import['group_number'] == 221)[0]] data216_225_221import = pd.concat((data216_import, data225_import, data221_import)) list_name = data.csv.list_name list_name = list_name.values.tolist() list_name = [[i for i in _ if isinstance(i, str)] for _ in list_name] # grid = itertools.product(list_name[2],list_name[12],list_name[32]) select = ['volume', 'radii covalent', 'electronegativity(martynov&batsanov)', 'electron number'] select = ['volume'] + [j + "_%i" % i for j in select[1:] for i in range(2)] X_frame = data225_import[select] y_frame = data225_import['exp_gap'] X = X_frame.values y = y_frame.values name, rep_name = getName(X_frame) x0, x1, x2, x3, x4, x5, x6 = rep_name expr01 = sympy.log(1 / (x1 + x2) * x0 / (x5 + x6) * x4 / x3) results = calculateExpr(expr01, pset=None, x=X, y=y, score_method=r2_score, add_coeff=True, del_no_important=False, filter_warning=True, terminals=rep_name, inter_add=True, iner_add=False, random_add=False) print(select) print(results) store.to_csv(data216_225_221import, "plot221225216")
def eaSimple(population, toolbox, cxpb, mutpb, ngen, stats=None, halloffame=None, verbose=__debug__, pset=None, store=True): """ Parameters ---------- population toolbox cxpb mutpb ngen stats halloffame verbose pset store Returns ------- """ rst = random.getstate() len_pop = len(population) logbook = Logbook() logbook.header = ['gen', 'pop'] + (stats.fields if stats else []) # Evaluate the individuals with an invalid fitness invalid_ind = [ind for ind in population if not ind.fitness.valid] # fitnesses = toolbox.map(toolbox.evaluate, invalid_ind) fitnesses = toolbox.parallel(iterable=population) for ind, fit, in zip(invalid_ind, fitnesses): ind.fitness.values = fit[0], ind.expr = fit[1] ind.dim = fit[2] ind.withdim = fit[3] add_ind = toolbox.select_kbest_target_dim(population, K_best=0.1 * len_pop) if halloffame is not None: halloffame.update(add_ind) record = stats.compile(population) if stats else {} logbook.record(gen=0, nevals=len(population), **record) if verbose: print(logbook.stream) data_all = {} # Begin the generational process random.setstate(rst) for gen in range(1, ngen + 1): rst = random.getstate() if store: rst = random.getstate() target_dim = toolbox.select_kbest_target_dim.keywords['dim_type'] subp = functools.partial(sub, subed=pset.rep_name_list, subs=pset.real_name_list) data = { "gen{}_pop{}".format(gen, n): { "gen": gen, "pop": n, "score": i.fitness.values[0], "expr": str(subp(i.expr)), "with_dim": 1 if i.withdim else 0, "dim_is_target_dim": 1 if i.dim in target_dim else 0, "gen_dim": "{}{}".format(gen, 1 if i.withdim else 0), "gen_target_dim": "{}{}".format(gen, 1 if i.dim in target_dim else 0), "socre_dim": i.fitness.values[0] if i.withdim else 0, "socre_target_dim": i.fitness.values[0] if i.dim in target_dim else 0, } for n, i in enumerate(population) if i is not None } data_all.update(data) random.setstate(rst) # select_gs the next generation individuals offspring = toolbox.select_gs(population, len_pop) # Vary the pool of individuals offspring = varAnd(offspring, toolbox, cxpb, mutpb) rst = random.getstate() # Evaluate the individuals with an invalid fitness invalid_ind = [ind for ind in offspring if not ind.fitness.valid] # fitnesses = toolbox.map(toolbox.evaluate, invalid_ind) # fitnesses = parallelize(n_jobs=3, func=toolbox.evaluate, iterable=invalid_ind, respective=False) fitnesses = toolbox.parallel(iterable=invalid_ind) for ind, fit in zip(invalid_ind, fitnesses): ind.fitness.values = fit[0], ind.expr = fit[1] ind.dim = fit[2] ind.withdim = fit[3] add_ind = toolbox.select_kbest_target_dim(population, K_best=0.1 * len_pop) add_ind2 = toolbox.select_kbest_dimless(population, K_best=0.2 * len_pop) add_ind3 = toolbox.select_kbest(population, K_best=5) offspring += add_ind offspring += add_ind2 offspring += add_ind3 # Update the hall of fame with the generated individuals if halloffame is not None: halloffame.update(add_ind) if len(halloffame.items ) > 0 and halloffame.items[-1].fitness.values[0] >= 0.95: print(halloffame.items[-1]) print(halloffame.items[-1].fitness.values[0]) break # Replace the current population by the offspring population[:] = offspring # Append the current generation statistics to the logbook record = stats.compile(population) if stats else {} logbook.record(gen=gen, nevals=len(population), **record) if verbose: print(logbook.stream) random.setstate(rst) store = Store() store.to_csv(data_all) return population, logbook
mp = self.pareto_method() partotimei = list(list(zip(*mp))[0]) tabletimei = np.vstack([self.resultcv_score_all_0, self.resultcv_score_all_1, self.resultcv_score_all_2, self.resultcv_score_all_3, self.resultcv_score_all_4, self.resultcv_score_all_5, self.resultcv_score_all_6, self.resultcv_score_all_7]) parto.extend(partotimei) table.append(tabletimei) table = np.array(table) means_y = np.mean(table, axis=0).T result = pd.DataFrame(means_y) all_mean = np.mean(means_y, axis=1).T select_support = np.zeros(len(index_slice)) mean_parto_index = self._pareto(means_y) select_support[mean_parto_index] = 1 result["all_mean"] = all_mean result["parto_support"] = select_support result['index_all_abbr'] = index_all_abbr result['index_all_name'] = index_all_name result['index_all'] = index_slice result = result.sort_values(by="all_mean", ascending=False) store.to_csv(result, "result") # tables = table.reshape((-1, table.shape[2]), order="F").T # store.to_csv(tables, "100_times_y")
method_all = [ 'SVR-set', "GPR-set", "RFR-em", "AdaBR-em", "DTR-em", "LASSO-L1", "BRR-L1" ] methods = method_pack(method_all=method_all, me="reg", gd=True) pre_y = [] ests = [] for name, methodi in zip(method_all, methods): methodi.cv = 5 methodi.scoring = "neg_root_mean_squared_error" gd = methodi.fit(X=x_, y=y_) score = gd.best_score_ est = gd.best_estimator_ print(name, "neg_root_mean_squared_error", score) score = cross_val_score( est, X=x_, y=y_, scoring="r2", ).mean() print(name, "r2", score) pre_yi = est.predict(x) pre_y.append(pre_yi) ests.append(est) store.to_pkl_pd(est, name) pre_y.append(y) pre_y = np.array(pre_y).T pre_y = pd.DataFrame(pre_y) pre_y.columns = method_all + ["realy_y"] store.to_csv(pre_y, "wrtem_result")
# # all_import_title = com_data.join(ele_ratio) all_import_title = all_import_title.join(depart_elements_table) """add ele density""" select2 = ['electron number_0', 'electron number_1', 'cell volume'] x_rame = (all_import_title['electron number_0'] + all_import_title['electron number_1'] ) / all_import_title['cell volume'] all_import_title.insert( 10, "electron density", x_rame, ) store.to_csv(all_import_title, "all_import_title", reverse=False) all_import = all_import_title.drop([ 'name_number', 'name_number', "name", "structure", "structure_type", "space_group", "reference", 'material_id', 'composition', "com_0", "com_1" ], axis=1) all_import = all_import.iloc[np.where( all_import['group_number'] == 225)[0]] all_import = all_import.drop(['group_number'], axis=1) store.to_csv(all_import, "all_import", reverse=False) def get_abbr(): name = ["electron density", "cell density", 'cell volume', "component"]
def eaSimple(population, toolbox, cxpb, mutpb, ngen, stats=None, halloffame=None, verbose=__debug__, pset=None, store=True): """ Parameters ---------- population toolbox cxpb mutpb ngen stats halloffame verbose pset store Returns ------- """ rst = random.getstate() len_pop = len(population) logbook = Logbook() logbook.header = [] + (stats.fields if stats else []) data_all = {} random.setstate(rst) for gen in range(1, ngen + 1): "评价" rst = random.getstate() """score""" invalid_ind = [ind for ind in population if not ind.fitness.valid] fitnesses = toolbox.parallel(iterable=population) for ind, fit, in zip(invalid_ind, fitnesses): ind.fitness.values = fit[0], ind.expr = fit[1] ind.y_dim = fit[2] ind.withdim = fit[3] random.setstate(rst) rst = random.getstate() """elite""" add_ind = [] add_ind1 = toolbox.select_kbest_target_dim(population, K_best=0.05 * len_pop) add_ind += add_ind1 elite_size = len(add_ind) random.setstate(rst) rst = random.getstate() """score""" random.setstate(rst) rst = random.getstate() """record""" if halloffame is not None: halloffame.update(add_ind1) if len(halloffame.items ) > 0 and halloffame.items[-1].fitness.values[0] >= 0.9999: print(halloffame.items[-1]) print(halloffame.items[-1].fitness.values[0]) break random.setstate(rst) rst = random.getstate() """Dynamic output""" record = stats.compile_(population) if stats else {} logbook.record(gen=gen, pop=len(population), **record) if verbose: print(logbook.stream) random.setstate(rst) """crossover, mutate""" offspring = toolbox.select_gs(population, len_pop - elite_size) # Vary the pool of individuals offspring = varAnd(offspring, toolbox, cxpb, mutpb) rst = random.getstate() """re-run""" offspring.extend(add_ind) population[:] = offspring random.setstate(rst) store = Store() store.to_csv(data_all) return population, logbook
def eaSimple(population, toolbox, cxpb, mutpb, ngen, stats=None, halloffame=None, verbose=__debug__, pset=None, store=True): """ Parameters ---------- population toolbox cxpb mutpb ngen stats halloffame verbose pset store Returns ------- """ rst = random.getstate() len_pop = len(population) logbook = Logbook() logbook.header = [] + (stats.fields if stats else []) data_all = {} random.setstate(rst) for gen in range(1, ngen + 1): "评价" rst = random.getstate() """score""" invalid_ind = [ind for ind in population if not ind.fitness.valid] fitnesses = toolbox.parallel(iterable=population) for ind, fit, in zip(invalid_ind, fitnesses): ind.fitness.values = fit[0], ind.expr = fit[1] ind.dim = fit[2] ind.withdim = fit[3] random.setstate(rst) rst = random.getstate() """elite""" add_ind = [] add_ind1 = toolbox.select_kbest_target_dim(population, K_best=0.01 * len_pop) add_ind2 = toolbox.select_kbest_dimless(population, K_best=0.01 * len_pop) add_ind3 = toolbox.select_kbest(population, K_best=5) add_ind += add_ind1 add_ind += add_ind2 add_ind += add_ind3 elite_size = len(add_ind) random.setstate(rst) rst = random.getstate() """score""" if store: subp = functools.partial(sub, subed=pset.rep_name_list, subs=pset.real_name_list) data = {"gen{}_pop{}".format(gen, n): {"gen": gen, "pop": n, "score": i.fitness.values[0], "expr": str(subp(i.expr)), } for n, i in enumerate(population) if i is not None} data_all.update(data) random.setstate(rst) rst = random.getstate() """record""" if halloffame is not None: halloffame.update(add_ind3) if len(halloffame.items) > 0 and halloffame.items[-1].fitness.values[0] >= 0.95: print(halloffame.items[-1]) print(halloffame.items[-1].fitness.values[0]) break random.setstate(rst) rst = random.getstate() """Dynamic output""" record = stats.compile(population) if stats else {} logbook.record(gen=gen, pop=len(population), **record) if verbose: print(logbook.stream) random.setstate(rst) """crossover, mutate""" offspring = toolbox.select_gs(population, len_pop - elite_size) # Vary the pool of individuals offspring = varAnd(offspring, toolbox, cxpb, mutpb) rst = random.getstate() """re-run""" offspring.extend(add_ind) population[:] = offspring random.setstate(rst) store = Store() store.to_csv(data_all) return population, logbook
# -*- coding: utf-8 -*- # @Time : 2019/12/20 15:11 # @Email : [email protected] # @Software: PyCharm # @License: BSD 3-Clause from featurebox.tools.exports import Store from featurebox.tools.imports import Call store = Store(r'C:\Users\Administrator\Desktop\band_gap_exp\4.symbol', ) data = Call(r'C:\Users\Administrator\Desktop\band_gap_exp\4.symbol') store.to_csv(data.filename)
cov = pd.DataFrame(corr.cov_shrink) cov = cov.set_axis(X_frame_abbr, axis='index', inplace=False) cov = cov.set_axis(X_frame_abbr, axis='columns', inplace=False) fig = plt.figure() fig.add_subplot(111) sns.heatmap(cov, vmin=-1, vmax=1, cmap="bwr", linewidths=0.3, xticklabels=True, yticklabels=True, square=True, annot=True, annot_kws={'size': 3}) plt.show() corr_plot(corr.cov_shrink, X_frame_abbr, left_down="fill", right_top="pie", threshold_right=0, front_raito=0.5) list_name, list_abbr = name_to_name(X_frame_name, X_frame_abbr, search=corr.list_count, search_which=0, return_which=(1, 2), two_layer=True) store.to_csv(cov, "cov") store.to_txt(list_name, "list_name") store.to_txt(list_abbr, "list_abbr") # 2 select = ['volume', 'destiny', 'lattice constants a', 'lattice constants c', 'radii covalent', 'radii ionic(shannon)', 'distance core electron(schubert)', 'latent heat of fusion', 'energy cohesive brewer', 'total energy', 'charge nuclear effective(slater)', 'valence electron number', 'electronegativity(martynov&batsanov)', 'volume atomic(villars,daams)'] # human select select_index, select_abbr = name_to_name(X_frame_name, X_frame_abbr, search=select, search_which=1, return_which=(0, 2), two_layer=False) cov_select = corr.cov_shrink[select_index, :][:, select_index]
"""for element site""" com_mp = pd.Series([i.to_reduced_dict for i in composition_mp]) # com_mp = composition_mp all_import = data.csv.all_import id_structures = data.id_structures structures = id_structures vor_area = count_voronoinn(structures, mess="area") vor_dis = count_voronoinn(structures, mess="face_dist") vor = pd.DataFrame() vor.insert(0, 'vor_area0', vor_area[:, 0]) vor.insert(0, 'face_dist0', vor_dis[:, 0]) vor.insert(0, 'vor_area1', vor_area[:, 1]) vor.insert(0, 'face_dist1', vor_dis[:, 1]) data_title = all_import[[ 'name_number', "x_name", "structure", "structure_type", "space_group", "reference", 'material_id', 'composition', 'exp_gap', 'group_number' ]] data_tail = all_import.drop([ 'name_number', "x_name", "structure", "structure_type", "space_group", "reference", 'material_id', 'composition', 'exp_gap', 'group_number' ], axis=1) data_import = data_title.join( vor[["face_dist0", "vor_area0", "face_dist1", "vor_area1"]]) data_import = data_import.join(data_tail) store.to_csv(data_import, "all_import")
"""union""" index_all = [tuple(index[0]) for _ in index_all for index in _[:10]] index_all = list(set(index_all)) """get x_name and abbr""" index_all_name = name_to_name(X_frame.columns.values, search=[i for i in index_all], search_which=0, return_which=(1,), two_layer=True) index_all_name = [list(set([re.sub(r"_\d", "", j) for j in i])) for i in index_all_name] [i.sort() for i in index_all_name] index_all_abbr = name_to_name(name_init, abbr_init, search=index_all_name, search_which=1, return_which=2, two_layer=True) store.to_pkl_pd(index_all, "index_all") store.to_csv(index_all_name, "index_all_name") store.to_csv(index_all_abbr, "index_all_abbr") ugs = UGS(estimator_all, index_all, estimator_n=[2, 3], n_jobs=3) ugs.fit(X, y) # re = gs.cv_score_all(index_all) binary_distance = ugs.cal_binary_distance_all(index_all, estimator_i=3) # slice_k = gs._cv_predict_all(estimator_i=3) groups = ugs.cal_group(estimator_i=3, printing=True, print_noise=0.2, pre_binary_distance_all=binary_distance) ugs.cluster_print(binary_distance, highlight=[1, 2, 3]) # groups = ugs.cal_t_group(printing=False, pre_group=None) # ss=ugs.select_ugs(alpha=0.01) # results = gs.select_gs(alpha=0.01) # gs.cal_group(eps=0.10, estimator_i=1, printing=True, pre_binary_distance_all=slice_g, print_noise=0.1, # node_name=index_all_abbr)
batch_size=40, re_hall=3, n_jobs=12, mate_prob=0.9, max_value=5, mutate_prob=0.8, tq=False, dim_type="coef", re_Tree=0, store=False, random_state=i, verbose=True, stats={ "fitness_dim_max": ["max"], "dim_is_target": ["sum"] }, add_coef=True, inner_add=False, cal_dim=True, vector_add=True, personal_map=False) # b = time.time() exps = bl.run() print([i.coef_expr for i in exps]) score = exps.keys[0].values[0] name = group_str(exps[0], pset0, feature_name=True) dicts["s%s" % i] = [score, name] print(i) store.to_csv(dicts, model="a+")
return com ele_ratio = comdict_to_df(composition_mp) """get structure""" # with MPRester('Di2IZMunaeR8vr9w') as m: # ids = [i for i in com_data['material_id']] # structures = [m.get_structure_by_material_id(i) for i in ids] # store.to_pkl_pd(structures, "id_structures") # id_structures = pd.read_pickle( # r'C:\Users\Administrator\Desktop\band_gap_exp\1.generate_data\id_structures.pkl.pd') """get departed element feature""" departElementProPFeature = DepartElementFeaturizer(elem_data=select_element_table, n_composition=2, n_jobs=4, ) departElement = departElementProPFeature.fit_transform(composition_mp) """join""" depart_elements_table = departElement.set_axis(com_data.index.values, axis='index', inplace=False) ele_ratio = ele_ratio.set_axis(com_data.index.values, axis='index', inplace=False) all_import_title = com_data.join(ele_ratio) all_import_title = all_import_title.join(depart_elements_table) store.to_csv(all_import_title, "all_import_title") all_import = all_import_title.drop( ['name_number', 'name_number', "name", "structure", "structure_type", "space_group", "reference", 'material_id', 'composition', "com_0", "com_1"], axis=1) store.to_csv(all_import, "all_import")
m = MPRester(api_key) ids = m.query(criteria={ # 'pretty_formula': {"$in": name_list}, 'nelements': {"$lt": 5, "$gt": 3}, # 'spacegroup.number': {"$in": [225]}, 'nsites': {"$lt": 20}, 'formation_energy_per_atom': {"$lt": 0}, # "elements": {"$in": ["Al", "Co", "Cr", "Cu", "Fe", 'Ni'], "$all": "O"}, # "elements": {"$in": list(combinations(["Al", "Co", "Cr", "Cu", "Fe", 'Ni'], 5))} }, properties=["material_id"]) print("number %s" % len(ids)) return ids if __name__ == "__main__": list1 = list( ['CsCl', 'CsBr', 'CsI', 'CsSb', 'LiF', 'KF', 'RbF', 'CsF', 'MgO', 'CdO', 'MnO', 'VO', 'CaO', 'SrO', 'BaO', 'EuO', 'ScN', 'YN', 'ErN', 'HoN', 'DyN', 'GdN', 'EuN', 'CeN', 'LiCl', 'TlCl', 'AgCl', 'NaCl', 'RbCl', 'LiBr', 'TlBr', 'AgBr', 'NaBr', 'KBr', 'RbBr', 'MgSe', 'PbSe', 'CaSe', 'SrSe', 'BaSe', 'YbSe', 'EuSe', 'SmSe', 'PbS', 'MnS', 'CaS', 'SrS', 'BaS', 'YbS', 'EuS', 'SmS', 'LiI', 'TlI', 'NaI', 'KI', 'RbI', 'YbAs', 'TmAs', 'DyAs', 'GdAs', 'NdAs', 'SmAs', 'PrAs', 'SmP', 'AsTe', 'GeTe', 'SnTe', 'PbTe', 'CaTe', 'SrTe', 'BaTe', 'YbTe', 'ErTe', 'GdTe', 'EuTe', 'SmTe', 'LaSb', 'YbSb', 'SmSb', 'PrSb', 'NaF', 'KCl', 'CuBr', 'BeSe', 'ZnSe', 'CdSe', 'HgSe', 'BeS', 'ZnS', 'CdS', 'AlAs', 'AlP', 'BeTe', 'ZnTe', 'CdTe', 'HgTe', 'AlSb', 'BN', 'SiC3c', 'GaAs', 'InAs', 'BP', 'GaP', 'InP', 'GaSb', 'InSb', 'CuCl', 'HgS', 'CuI', 'MnTe', 'AgI', 'ZnS', 'ZnSe', 'ZnO', 'AlN', 'GaN', 'MgTe', 'BeO', 'BN', 'InN', 'SiC', 'MnS']) idss = get_ids(api_key="Di2IZMunaeR8vr9w", name_list=list1) idss1 = [i['material_id'] for i in idss] dff = data_fetcher("Di2IZMunaeR8vr9w", idss1, elasticity=False) st = Store(r"C:\Users\Administrator\Desktop") st.to_csv(dff, "id_structure")
clf = Exhaustion(estimator, n_select=n_select, muti_grade=2, muti_index=[2, X.shape[1]], must_index=None, n_jobs=1, refit=True).fit(X, y) name_ = name_to_name(X_frame.columns.values, search=[i[0] for i in clf.score_ex[:10]], search_which=0, return_which=(1, ), two_layer=True) sc = np.array(clf.scatter) for i in clf.score_ex[:]: print(i[1]) for i in name_: print(i) t = clf.predict(X) p = BasePlot() p.scatter(y, t, strx='True $E_{gap}$', stry='Calculated $E_{gap}$') plt.show() p.scatter(sc[:, 0], sc[:, 1], strx='Number', stry='Score') plt.show() store.to_csv(sc, method_name + "".join([str(i) for i in n_select])) store.to_pkl_pd(clf.score_ex, method_name + "".join([str(i) for i in n_select]))
param_grid3 = [{'n_estimators': [100, 200], 'learning_rate': [0.1, 0.05]}] # 2 model ref = RFECV(me2, cv=3) x_ = ref.fit_transform(x, y) gd = GridSearchCV(me2, cv=3, param_grid=param_grid2, scoring="r2", n_jobs=1) gd.fit(x_, y) score = gd.best_score_ # 1,3 model # gd = GridSearchCV(me1, cv=3, param_grid=param_grid1, scoring="r2", n_jobs=1) # gd.fit(x,y) # es = gd.best_estimator_ # sf = SelectFromModel(es, threshold=None, prefit=False, # norm_order=1, max_features=None) # sf.fit(x,y) # feature = sf.get_support() # # gd.fit(x[:,feature],y) # score = gd.best_score_ # 其他模型 # 穷举等... # 导出 # pd.to_pickle(gd,r'C:\Users\Administrator\Desktop\skk\gd_model') # pd.read_pickle(r'C:\Users\Administrator\Desktop\skk\gd_model') store.to_pkl_sk(gd) store.to_csv(x) store.to_txt(score)