def compute_stabilities_mod(self, phases_to_evaluate=None): """ Calculate the stability for every Phase. Args: phases_to_evaluate ([phase]): Included phases, if None, uses every Phase in PhaseSpace.phases """ if phases_to_evaluate is None: phases_to_evaluate = self.phases for p in tqdm(list(self.phase_dict.values())): if p.stability is None: # for low e phases, we only need to eval stability if it doesn't exist try: p.stability = p.energy - self.gclp(p.unit_comp)[0] except: print(p) p.stability = np.nan # will only do requested phases for things not in phase_dict for p in tqdm(phases_to_evaluate): if p not in list(self.phase_dict.values()): if p.name in self.phase_dict: p.stability = p.energy - self.phase_dict[ p.name].energy + self.phase_dict[p.name].stability else: try: p.stability = p.energy - self.gclp(p.unit_comp)[0] except: print(p) p.stability = np.nan
def compute_stabilities(self, phases, ncpus=cpu_count()): """ Calculate the stability for every Phase. Args: phases ([Phase]): list of Phases for which to compute stability ncpus (int): number of cpus to use, i. e. processes to use Returns: ([float]) stability values for all phases """ self.update_phase_dict(ncpus=ncpus) if ncpus > 1: with Pool(ncpus) as pool: stabilities = pool.map(self.compute_stability, phases) # Pool doesn't always modify the phases directly, # so assign stability after for phase, stability in zip(phases, stabilities): phase.stability = stability else: stabilities = [ self.compute_stability(phase) for phase in tqdm(phases) ] return stabilities
def cache_download(url, path): """ Quick helper function to cache a generic download from a url in the CAMD local data directory Args: url (str): url for download path (str): path for download, is appended to the CAMD_CACHE location Returns: (None) """ # Prep cache path and make necessary dirs cache_path = os.path.join(CAMD_CACHE, path) # Download and write file if not os.path.isfile(cache_path): makedirs_p(os.path.split(cache_path)[0]) r = requests.get(url, stream=True) total_size = int(r.headers.get('content-length', 0)) block_size = 1024 # 1 Kibibyte t = tqdm(total=total_size, unit='iB', unit_scale=True) with open(cache_path, 'wb') as f: for data in r.iter_content(block_size): t.update(len(data)) f.write(data)
def predict(self, X): # Apply the committee of models to candidate space committee_predictions = [] for scaler, model in tqdm(self.committee_models): _X = scaler.transform(X) committee_predictions.append(model.predict(_X)) stds = np.std(np.array(committee_predictions), axis=0) means = np.mean(np.array(committee_predictions), axis=0) return means, stds
def predict(self, X): # Apply the committee of models to candidate space committee_predictions = [] for i in tqdm(list(range(self.n_members))): scaler = self.committee_models[i][0] model = self.committee_models[i][1] _X = scaler.transform(X) committee_predictions.append(model.predict(_X)) stds = np.std(np.array(committee_predictions), axis=0) means = np.mean(np.array(committee_predictions), axis=0) return means, stds
def fit(self, X, y): """ Fits the QBC committee member models Args: X (pandas.DataFrame, np.ndarray): input X values for fitting y (pandas.DataFrame, np.ndarray): output y values to regress or fit to Returns: None """ self._X, self._y = X, y split_X = [] split_y = [] for i in range(self.n_members): a = np.arange(len(X)) np.random.shuffle(a) indices = a[:int(self.training_fraction * len(X))] split_X.append(X.iloc[indices]) split_y.append(y.iloc[indices]) self.committee_models = [] for i in tqdm(list(range(self.n_members))): scaler = StandardScaler() X = scaler.fit_transform(split_X[i]) y = split_y[i] model = clone(self.model) model.fit(X, y) # Saving the scaler and model to make predictions self.committee_models.append([scaler, model]) self.trained = True if self.test_full_model: # Get a CV score for an overall model with plot_hull dataset full_scaler = StandardScaler() _X = full_scaler.fit_transform(self._X, self._y) full_model = clone(self.model) full_model.fit(_X, self._y) cv_score = cross_val_score( full_model, _X, self._y, cv=KFold(5, shuffle=True), scoring="neg_mean_absolute_error", ) self.cv_score = np.mean(cv_score) * -1
def predict(self, X): """ Apply the fitted committee of models to candidate space Args: X (pandas.DataFrame, np.ndarray): input matrix or values on which to predict Returns: (np.ndarray): mean values for predictions for all committee members (np.ndarray): standard deviation values for predictions for all committee members """ committee_predictions = [] for scaler, model in tqdm(self.committee_models): _X = scaler.transform(X) committee_predictions.append(model.predict(_X)) stds = np.std(np.array(committee_predictions), axis=0) means = np.mean(np.array(committee_predictions), axis=0) return means, stds
def fit(self, X, y): self._X, self._y = X, y split_X = [] split_y = [] for i in range(self.n_members): a = np.arange(len(X)) np.random.shuffle(a) indices = a[:int(self.training_fraction * len(X))] split_X.append(X.iloc[indices]) split_y.append(y.iloc[indices]) self.committee_models = [] for i in tqdm(list(range(self.n_members))): scaler = StandardScaler() X = scaler.fit_transform(split_X[i]) y = split_y[i] model = self.ml_algorithm(**self.ml_algorithm_params) model.fit(X, y) self.committee_models.append( [scaler, model]) # Note we're saving the scaler to use in predictions self.trained = True if self.test_full_model: # Get a CV score for an overall model with present dataset overall_model = self.ml_algorithm(**self.ml_algorithm_params) overall_scaler = StandardScaler() _X = overall_scaler.fit_transform(self._X, self._y) overall_model.fit(_X, self._y) cv_score = cross_val_score(overall_model, _X, self._y, cv=KFold(5, shuffle=True), scoring='neg_mean_absolute_error') self.cv_score = np.mean(cv_score) * -1