def _run_alg(self): iter_cnt, tr_losses, avg_cost = self.setup_model_state() while iter_cnt < self.niter: self.collect_samples(self.nsamples) avg_cost.append(self.buffer.mean) if iter_cnt == self.niter - 1 and self.full_training: tr_loss, tr_loss_list = self.train(iter_cnt + 1, self.nepochs * 10) else: tr_loss, tr_loss_list = self.train(iter_cnt + 1, self.nepochs) tr_losses.append(tr_loss_list) if (iter_cnt + 1) % 10 == 0 and self.ndim == 2: _, xdata_ind = self.model.sample_model(1000, self.bsize, self.input_vectors_norm) fpath = self.work_dir / get_full_name( name='dist', prefix='training', suffix=f'{iter_cnt+1}_after') data_ind = xdata_ind.to( torch.device('cpu')).data.numpy().astype('int') data = index_to_xval(self.input_vectors, data_ind) s = self.input_scale _range = np.array([[-s, s], [-s, s]]) plt_hist2D(data, fpath=fpath, range=_range, cmap='binary') iter_cnt += 1 self.save_checkpoint(iter_cnt, tr_losses, avg_cost) plot_learning_with_epochs(fpath=self.work_dir / 'learning_curve.png', training=tr_losses) plot_cost(avg_cost, fpath=self.work_dir / 'cost.png')
def setup_state(self): if self.load: ckpt_dict = self.load_checkpoint(self.work_dir / 'checkpoint.pickle') iter_cnt = ckpt_dict['iter_cnt'] avg_cost = ckpt_dict['avg_cost'] sim_cnt_list = ckpt_dict['sim_cnt'] n_sols_in_buffer = ckpt_dict['n_sols_in_buffer'] sample_cnt_list = ckpt_dict['sample_cnt'] top_means = dict(top_20=ckpt_dict['top_20'], top_40=ckpt_dict['top_40'], top_60=ckpt_dict['top_60']) else: iter_cnt = 0 avg_cost, sim_cnt_list, sample_cnt_list, n_sols_in_buffer = [], [], [], [] top_means = dict(top_20=[], top_40=[], top_60=[]) samples, sample_fvals = self.collect_samples(self.n_init_samples, uniform=True) top_samples = self.get_top_samples(0, samples, sample_fvals) self.cem.fit(top_samples) if self.ndim == 2: xdata_ind = self.cem.sample(1000) fpath = self.work_dir / get_full_name( name='dist', prefix='training', suffix=f'0_after') s = self.input_scale _range = np.array([[-s, s], [-s, s]]) plt_hist2D(index_to_xval(self.input_vectors, xdata_ind), fpath=fpath, range=_range, cmap='binary') return iter_cnt, avg_cost, sim_cnt_list, sample_cnt_list, n_sols_in_buffer, top_means
def train(self, iter_cnt: int, nepochs: int, split=1.0): # treat the sampled data as a static data set and take some gradient steps on it xtr, xte, wtr, wte = self.buffer.draw_tr_te_ds(split=split) if self.ndim == 2: fpath = self.work_dir / get_full_name( name='dist', prefix='training', suffix=f'{iter_cnt}_before') samples = index_to_xval(self.input_vectors, xtr[:, 1, :].astype('int')) s = self.input_scale _range = np.array([[-s, s], [-s, s]]) plt_hist2D(samples, fpath=fpath, range=_range, cmap='binary') # per epoch print('-' * 50) tr_loss = 0 te_loss = 0 tr_loss_list = [] for epoch_id in range(nepochs): tr_nll = self.run_epoch(xtr, wtr, mode='train') tr_loss_list.append(tr_nll) tr_loss += tr_nll / self.nepochs # self.writer.add_scalar('loss', tr_nll, epoch_id) print(f'[train_{iter_cnt}] epoch {epoch_id} loss = {tr_nll}') if split < 1: te_nll = self.run_epoch(xte, wte, mode='test') te_loss += te_nll / self.nepochs print(f'[test_{iter_cnt}] epoch {epoch_id} loss = {te_nll}') if split < 1: return tr_loss, te_loss return tr_loss, tr_loss_list
def setup_model_state(self): # load the model or proceed without loading checkpoints if self.load: items = self.load_checkpoint(self.work_dir / 'checkpoint.tar') else: # collect samples using the random initial model (probably a bad initialization) self.model.eval() self.collect_samples(self.n_init_samples) # train the init model self.model.train() self.train(0, self.n_init_samples) if self.ndim == 2: _, xdata_ind = self.model.sample_model(1000, self.bsize, self.input_vectors_norm) fpath = self.work_dir / get_full_name( name='dist', prefix='training', suffix=f'0_after') data_ind = xdata_ind.to( torch.device('cpu')).data.numpy().astype('int') data = index_to_xval(self.input_vectors, data_ind) s = self.input_scale _range = np.array([[-s, s], [-s, s]]) plt_hist2D(data, fpath=fpath, range=_range, cmap='binary') items = (0, [], []) self.save_checkpoint(*items) return items
def _plot_dist(self, data_indices: torch.Tensor, name, prefix, suffix): fpath = self.work_dir / get_full_name(name, prefix, suffix) data_ind = data_indices.to(self.cpu).data.numpy().astype('int') data = index_to_xval(self.input_vectors, data_ind) s = self.input_scale _range = np.array([[-s, s], [-s, s]]) plt_hist2D(data, fpath=fpath, range=_range, cmap='binary')
def report_accuracy(self, ntimes, nsamples): accuracy_list, times, div_list = [], [], [] if self.ndim == 2: xsamples, _, _ = self._sample_model_for_eval(nsamples) s = self.input_scale _range = np.array([[-s, s], [-s, s]]) plt_hist2D(xsamples, range=_range, fpath=self.work_dir / get_full_name('trained_policy'), cmap='binary') for iter_id in range(ntimes): s = time.time() xsample, sample_ids, fval = self._sample_model_for_eval(nsamples) if self.mode == 'le': acc = (fval <= self.goal).sum(-1) / nsamples pos_samples = xsample[fval <= self.goal] else: acc = (fval >= self.goal).sum(-1) / nsamples pos_samples = xsample[fval >= self.goal] if len(pos_samples) >= self.ndim: div = get_diversity_fom(self.ndim, pos_samples) div_list.append(div) times.append(time.time() - s) accuracy_list.append(acc) print(f'gen_time / sample = {1e3 * np.mean(times).astype("float") / nsamples:.3f} ms') print(f'accuracy_avg = {100 * np.mean(accuracy_list).astype("float"):.6f}, ' f'accuracy_std = {100 * np.std(accuracy_list).astype("float"):.6f}, ' f'solution diversity = {np.mean(div_list).astype("float"):.6f}')
def check_solutions(self, ntimes: int, nsamples: int) -> None: accuracy_rnd_list = [] total_var_list, pos_var_list = [], [] diversity_fom_list = [] if self.ndim == 2: rnd_samples, _ = self.sample_data(self.ndim, self.input_vectors, nsamples) s = self.input_scale _range = np.array([[-s, s], [s, s]]) plt_hist2D(rnd_samples, fpath=self.work_dir / get_full_name('random_policy'), range=_range, cmap='binary') x, y = self.input_vectors plot_fn2d(x, y, self.fn, fpath=str(self.work_dir / 'fn2D.png'), cmap='viridis') show_solution_region(x, y, self.fn, self.goal, mode=self.mode, fpath=str(self.work_dir / 'dist2D.png'), cmap='binary') vector_mat = np.stack(self.input_vectors, axis=0) for iter_id in range(ntimes): _, rnd_ids = self.sample_data(self.ndim, self.input_vectors_norm, nsamples) rnd_samples = vector_mat[np.arange(self.ndim), rnd_ids] total_var = compute_emprical_variation(rnd_samples) rnd_fval: np.ndarray = self.fn(rnd_samples) if self.mode == 'le': pos_samples = rnd_samples[rnd_fval <= self.goal] if len(pos_samples) != 0: pos_var = compute_emprical_variation(pos_samples) else: pos_var = np.NAN accuracy_rnd_list.append((rnd_fval <= self.goal).sum(-1) / nsamples) else: pos_samples = rnd_samples[rnd_fval >= self.goal] if len(pos_samples) != 0: pos_var = compute_emprical_variation(pos_samples) else: pos_var = np.NAN accuracy_rnd_list.append((rnd_fval >= self.goal).sum(-1) / nsamples) pos_var_list.append(pos_var) total_var_list.append(total_var) if len(pos_samples) >= self.ndim: div = get_diversity_fom(self.ndim, pos_samples) diversity_fom_list.append(div) accuracy_rnd = np.array(accuracy_rnd_list, dtype='float32') print(f'accuracy_rnd_avg = {100 * np.mean(accuracy_rnd).astype("float"):.6f}, ' f'accuracy_rnd_std = {100 * np.std(accuracy_rnd).astype("float"):.6f}') print(f'random policy total variation / dim = ' f'{np.mean(total_var_list).astype("float"):.6f}') pos_var_arr = np.array(pos_var_list) if len(pos_var_arr[~np.isnan(pos_var_arr)]) == 0: print('No positive solution was found with random policy') else: print(f'pos solution variation / dim =' f' {np.mean(pos_var_arr[~np.isnan(pos_var_arr)]):.6f}') print(f'random policy solution diversity FOM: ' f'{np.mean(diversity_fom_list).astype("float"):.6f}')
def _save_2d_samples(self, model, iter_cnt, nsamples, name='dist'): _, xdata_ind = model.sample_model(nsamples, self.bsize, self.input_vectors_norm) fpath = self.work_dir / get_full_name(name=name, prefix='training', suffix=f'{iter_cnt+1}_after') data_ind = xdata_ind.to(torch.device('cpu')).data.numpy().astype('int') data = index_to_xval(self.input_vectors, data_ind) s = self.input_scale _range = np.array([[-s, s], [-s, s]]) plt_hist2D(data, fpath=fpath, range=_range, cmap='binary')
def setup_model_state(self): # load the model or proceed without loading checkpoints if self.load: ckpt_dict = self.load_checkpoint(self.work_dir / 'checkpoint.tar') tr_losses = ckpt_dict['tr_losses'] iter_cnt = ckpt_dict['iter_cnt'] avg_cost = ckpt_dict['avg_cost'] sim_cnt_list = ckpt_dict['sim_cnt'] n_sols_in_buffer = ckpt_dict['n_sols_in_buffer'] sample_cnt_list = ckpt_dict['sample_cnt'] top_means = dict(top_20=ckpt_dict['top_20'], top_40=ckpt_dict['top_40'], top_60=ckpt_dict['top_60']) else: # collect samples using the random initial model (probably a bad initialization) iter_cnt = 0 tr_losses, avg_cost, \ sim_cnt_list, sample_cnt_list, n_sols_in_buffer = [], [], [], [], [] top_means = dict(top_20=[], top_40=[], top_60=[]) self.model.eval() self.collect_samples(self.n_init_samples, uniform=True) write_pickle(self.work_dir / 'init_buffer.pickle', dict(init_buffer=self.buffer)) # train the init model self.model.train() self.train(0, self.init_nepochs) if self.ndim == 2: _, xdata_ind = self.sample_model(1000, model=self.model) fpath = self.work_dir / get_full_name( name='dist', prefix='training', suffix=f'0_after') data_ind = xdata_ind.to(self.cpu).data.numpy().astype('int') data = index_to_xval(self.input_vectors, data_ind) s = self.input_scale _range = np.array([[-s, s], [-s, s]]) plt_hist2D(data, fpath=fpath, range=_range, cmap='binary') saved_data = dict( iter_cnt=iter_cnt, tr_losses=tr_losses, avg_cost=avg_cost, sim_cnt=sim_cnt_list, n_sols_in_buffer=n_sols_in_buffer, sample_cnt=sample_cnt_list, **top_means, ) self.save_checkpoint(saved_data) return iter_cnt, tr_losses, avg_cost, sim_cnt_list, sample_cnt_list, n_sols_in_buffer, \ top_means
def get_top_samples(self, iter_cnt, samples, sample_fvals): if self.on_policy: nsamples = len(samples) sample_ids = range(nsamples) sorted_sample_ids = sorted(sample_ids, key=lambda i: sample_fvals[i], reverse=self.mode == 'ge') sorted_samples = samples[sorted_sample_ids] # find the last index which satisfies the constraint cond = sample_fvals <= self.goal if self.mode == 'le' else sample_fvals >= self.goal top_index = cond.sum(-1).astype('int') else: data, _, weights, _ = self.buffer.draw_tr_te_ds( split=1, normalize_weight=False) samples = data[:, 1].astype('int') nsamples = len(samples) weights_iter = iter(weights) sorted_samples = np.stack( sorted(samples, key=lambda x: next(weights_iter), reverse=True), axis=0, ) top_index = (weights == 1).sum(-1).astype('int') if self.elite_criteria == 'optim': top_index = self.cut_off elif self.elite_criteria == 'csp': top_index = max(top_index, min(self.cut_off, nsamples)) top_samples = sorted_samples[:top_index] # plot exploration if self.ndim == 2: fpath = self.work_dir / get_full_name( name='dist', prefix='training', suffix=f'{iter_cnt}_before') s = self.input_scale _range = np.array([[-s, s], [-s, s]]) plt_hist2D(index_to_xval(self.input_vectors, samples), fpath=fpath, range=_range, cmap='binary') return top_samples
def _run_alg(self): ret = self.setup_state() iter_cnt, avg_cost, sim_cnt_list, sample_cnt_list, n_sols_in_buffer, top_means = ret while iter_cnt < self.niter: print(f'iter {iter_cnt}') # ---- update plotting variables # sim_cnt_list.append(self.buffer.size) # n_sols_in_buffer.append(self.buffer.n_sols) # sample_cnt_list.append(self.buffer.tot_freq) # top_means['top_20'].append(self.buffer.topn_mean(20)) # top_means['top_40'].append(self.buffer.topn_mean(40)) # top_means['top_60'].append(self.buffer.topn_mean(60)) sim_cnt_list.append((iter_cnt + 1) * self.nsamples + self.n_init_samples) n_sols_in_buffer.append(len(self.buffer_temp)) sample_cnt_list.append((iter_cnt + 1) * self.nsamples + self.n_init_samples) top_means['top_20'].append(np.mean(self.fvals[:20])) top_means['top_40'].append(np.mean(self.fvals[:40])) top_means['top_60'].append(np.mean(self.fvals[:60])) samples, sample_fvals = self.collect_samples(self.nsamples) avg_cost.append( sample_fvals.mean() if self.on_policy else self.buffer.mean) top_samples = self.get_top_samples(iter_cnt + 1, samples, sample_fvals) self.cem.fit(top_samples) if (iter_cnt + 1) % 10 == 0 and self.ndim == 2: xdata_ind = self.sample_model(1000) fpath = self.work_dir / get_full_name( name='dist', prefix='training', suffix=f'{iter_cnt+1}_after') s = self.input_scale _range = np.array([[-s, s], [-s, s]]) plt_hist2D(index_to_xval(self.input_vectors, xdata_ind), fpath=fpath, range=_range, cmap='binary') iter_cnt += 1 saved_data = dict( iter_cnt=iter_cnt, avg_cost=avg_cost, sim_cnt=sim_cnt_list, n_sols_in_buffer=n_sols_in_buffer, sample_cnt=sample_cnt_list, **top_means, ) self.save_checkpoint(saved_data) plot_cost(avg_cost, fpath=self.work_dir / 'cost.png') plot_x_y( sample_cnt_list, n_sols_in_buffer, #annotate=sim_cnt_list,marker='s', fillstyle='none' fpath=self.work_dir / 'n_sols.png', xlabel='n_freq', ylabel=f'n_sols')
def __init__(self, spec_file: str = '', spec_dict: Optional[Mapping[str, Any]] = None, load: bool = False, use_time_stamp: bool = True, **kwargs) -> None: """ Parameters ---------- spec_file: str spec_dict: Dict[str, Any] some non-obvious fields elite_criteria: str 'optim': from sorted x1, ..., xn choose p-quantile 'csp': constraint satisfaction is enough, from x1, ..., xn choose p-quantile if it is worst than the constraint else choose all which are better than the constraint allow_repeated: bool True to allow repeated samples to be added to the buffer, else all samples in buffer will have equal likelihood when drawn from it. on_policy: bool True to allow on_policy sample usage, meaning that we won't use samples from previous policies to train the current policy (samples are not drawn from CacheBuffer) load: bool kwargs: Dict[str, Any] """ LoggingBase.__init__(self) if spec_file: specs = read_yaml(spec_file) else: specs = spec_dict self.specs = specs params = specs['params'] if load: self.work_dir = Path(spec_file).parent else: suffix = params.get('suffix', '') prefix = params.get('prefix', '') if use_time_stamp: unique_name = time.strftime('%Y%m%d%H%M%S') unique_name = get_full_name(unique_name, prefix, suffix) else: unique_name = f'{prefix}' if prefix else '' if suffix: unique_name = f'{unique_name}_{suffix}' if unique_name else f'{suffix}' self.work_dir = Path(specs['root_dir']) / f'{unique_name}' write_yaml(self.work_dir / 'params.yaml', specs, mkdir=True) self.load = load self.seed = params['seed'] self.ndim = params['ndim'] self.nsamples = params['nsamples'] self.n_init_samples = params['n_init_samples'] self.niter = params['niter'] self.cut_off = params['cut_off'] self.input_scale = params['input_scale'] # goal has to always be positive if not we'll change mode and negate self.goal self.goal = params['goal_value'] self.mode = params['mode'] self.allow_repeated = params.get('allow_repeated', False) self.elite_criteria = params.get('elite_criteria', 'optim') self.on_policy = params.get('on_policy', False) if self.elite_criteria not in ['csp', 'optim']: raise ValueError('invalid elite criteria: optim | csp') # allow repeated does not make sense when sampling is on-policy (on-policy: T -> repeat: T) self.allow_repeated = self.on_policy or self.allow_repeated eval_fn = params['fn'] try: fn = registered_functions[eval_fn] self.fn = fn except KeyError: raise ValueError(f'{eval_fn} is not a valid benchmark function') if self.goal < 0: self.mode = 'le' if self.mode == 'ge' else 'ge' self.fn = lambda x: -fn(x) # hacky version of passing input vectors around self.input_vectors_norm = [ np.linspace(start=-1.0, stop=1.0, dtype='float32', num=100) for _ in range(self.ndim) ] self.input_vectors = [ self.input_scale * vec for vec in self.input_vectors_norm ] self.cem = CEM(self.input_vectors, dist_type=params['base_fn'], average_coeff=params.get('average_coeff', 1), gauss_sigma=params.get('gauss_sigma', None)) self.buffer = CacheBuffer(self.mode, self.goal, self.cut_off, with_frequencies=self.allow_repeated) self.buffer_temp = {} self.fvals = SortedList()
def main(specs, force_replot=False): nsamples = specs['nsamples'] root_dir = Path(specs.get('root_dir', '')) prefix = specs.get('prefix', '') method = specs.get('method', 'pca') seed = specs.get('seed', 10) solution_only = specs.get('solution_only', False) samples_list, labels_list = [], [] init_pop_list, pop_labels_list = [], [] label_map = {} work_dir = root_dir / 'model_comparison' datasets_path = work_dir / 'datasets' datasets_path.parent.mkdir(exist_ok=True, parents=True) sol_all = 'sol' if solution_only else 'all' dataset_suf = f'n{nsamples}_' + sol_all fig_name = get_full_name('comparison', prefix, f'{method}_{sol_all}_s{seed}') # try reading the cache set try: cache = read_pickle(work_dir / 'cache.pickle') except FileNotFoundError: cache = set() # find a unique fname based on the content of spec file spec_immutable = to_immutable(specs) for index in itertools.count(): fig_path = work_dir / f'{fig_name}_{index}.png' # increment index if fig_path exists and spec is new if not fig_path.exists() or force_replot: break else: if spec_immutable in cache: print('nothing is new') exit() cache.add(spec_immutable) # noinspection PyUnboundLocalVariable fig_title = str(fig_path.stem) for label, (label_str, model_str) in enumerate(specs['models'].items()): data_path = datasets_path / f'{model_str}_{dataset_suf}.pickle' if data_path.exists(): print(f'loading dataset {label}: {label_str}') content = read_pickle(data_path) samples = content['samples'] else: print(f'sampling model {label} : {label_str}') model_path = root_dir / model_str / 'params.yaml' model_specs = read_yaml(model_path) alg_cls_str = model_specs.pop('alg_class') alg_cls = cast(Type[LoggingBase], import_class(alg_cls_str)) alg = alg_cls(model_path, load=True) # noinspection PyUnresolvedReferences samples = alg.load_and_sample(nsamples, only_positive=solution_only) print(f'saving into {str(data_path)}') write_pickle(data_path, dict(samples=samples)) labels = np.ones(shape=samples.shape[0]) * label label_map[label] = label_str # content = read_pickle(root_dir / model_str / 'init_buffer.pickle') # init_pop = list(map(lambda x: x.item, content['init_buffer'].db_set.keys())) # init_pop_list += init_pop # pop_labels_list.append(np.ones(shape=len(init_pop)) * label) # noinspection PyUnresolvedReferences samples_list.append(samples) labels_list.append(labels) samples = np.concatenate(samples_list, axis=0) labels = np.concatenate(labels_list, axis=0) # pops = np.stack(init_pop_list, axis=0) # pop_labels = np.concatenate(pop_labels_list, axis=0) if method == 'pca': pca_scatter2d(samples, labels, label_map, fpath=fig_path, alpha=0.5, title=fig_title, edgecolors='none', s=10) elif method == 'tsne': # import matplotlib.pyplot as plt # plt.close() # _, axes = plt.subplots(2, 1) # tsne_scatter2d(samples, labels, label_map, seed=seed, ax=axes[0], alpha=0.5, # title=fig_title, edgecolors='none', s=10) tsne_scatter2d(samples, labels, label_map, seed=seed, fpath=fig_path, alpha=0.5, title=fig_title, edgecolors='none', s=10) # tsne_scatter2d(pops, pop_labels, label_map, seed=seed, ax=axes[1], alpha=0.5, # title=fig_title, edgecolors='none', s=10) # plt.tight_layout() # plt.savefig(fig_path) else: raise ValueError( 'invalid dimensionality reduction, valid options are {"pca"| "tsne"}' ) # update cache write_pickle(work_dir / 'cache.pickle', cache)
def train(self, iter_cnt: int, nepochs: int, split=1.0): # treat the sampled data as a static data set and take some gradient steps on it print('-' * 50) if self.on_policy and iter_cnt != 0: # TODO: this is a stupid implementation, but ok for now xtr, wtr = self._sample_model_with_weights(self.nsamples) else: xtr, xte, wtr, wte = self.buffer.draw_tr_te_ds( split=split, normalize_weight=False) if self.model_visited: print('Training buffer model:') nepochs = self.init_nepochs if iter_cnt == 0 else self.nepoch_visited for epoch_id in range(nepochs): tr_nll = self.run_epoch(xtr, wtr, self.visited_dist, mode='train', debug=False) print(f'[visit_{iter_cnt}] epoch {epoch_id} loss = {tr_nll}') print('Finshed training buffer model') if (iter_cnt) % 10 == 0 and self.ndim == 2: _, xvisited_ind = self.sample_model(1000, model=self.visited_dist) self._plot_dist(xvisited_ind, 'dist', 'visited', f'{iter_cnt+1}') update_w = self.update_weight(xtr[:, 0, :], wtr) # debug if iter_cnt < -1: values = index_to_xval(self.input_vectors, xtr[:, 1, :].astype('int')) fvals = self.fn(values) wtr_norm = (wtr - wtr.mean()) / (wtr.std() + 1e-15) fref = sorted(fvals)[self.cut_off - 1] print(f'fred = {fref}') cond = np.logical_and(fvals >= 20, fvals <= fref) for index, wp, wn, wnorm in zip(xtr[:, 1, :][cond], wtr[cond], update_w[cond], wtr_norm[cond]): print(f'index = {index}, weight_before_update = {wp:.4f}, ' f'weights_norm = {wnorm:.4f}, ' f'weight_after_update = {wn:.4f}') pdb.set_trace() wtr = update_w if self.ndim == 2: fpath = self.work_dir / get_full_name( name='dist', prefix='training', suffix=f'{iter_cnt}_before') samples = index_to_xval(self.input_vectors, xtr[:, 1, :].astype('int')) s = self.input_scale plt_hist2D(samples, fpath=fpath, range=np.array([[-s, s], [-s, s]]), cmap='binary') # per epoch tr_loss = 0 te_loss = 0 tr_loss_list = [] print(f'Training model: fref = {self.buffer.zavg}') for epoch_id in range(nepochs): tr_nll = self.run_epoch(xtr, wtr, self.model, mode='train', debug=False) tr_loss_list.append(tr_nll) tr_loss += tr_nll / self.nepochs # self.writer.add_scalar('loss', tr_nll, epoch_id) print(f'[train_{iter_cnt}] epoch {epoch_id} loss = {tr_nll}') if split < 1: te_nll = self.run_epoch(xte, wte, self.model, mode='test') te_loss += te_nll / self.nepochs print(f'[test_{iter_cnt}] epoch {epoch_id} loss = {te_nll}') print('Finished training model.') if split < 1: return tr_loss, te_loss return tr_loss, tr_loss_list
def __init__(self, spec_file: str = '', spec_dict: Optional[Mapping[str, Any]] = None, load: bool = False, use_time_stamp: bool = True, init_buffer_path=None, **kwargs) -> None: LoggingBase.__init__(self) if spec_file: specs = read_yaml(spec_file) else: specs = spec_dict self.specs = specs params = specs['params'] if load: self.work_dir = Path(spec_file).parent else: suffix = params.get('suffix', '') prefix = params.get('prefix', '') if use_time_stamp: unique_name = time.strftime('%Y%m%d%H%M%S') unique_name = get_full_name(unique_name, prefix, suffix) else: unique_name = f'{prefix}' if prefix else '' if suffix: unique_name = f'{unique_name}_{suffix}' if unique_name else f'{suffix}' self.work_dir = Path(specs['root_dir']) / f'{unique_name}' write_yaml(self.work_dir / 'params.yaml', specs, mkdir=True) self.load = load self.seed = params.get('seed', 10) self.ndim = params['ndim'] self.bsize = params['batch_size'] self.hiddens = params['hidden_list'] self.niter = params['niter'] self.goal = params['goal_value'] self.mode = params['mode'] self.viz_rate = self.niter // 10 self.lr = params['lr'] self.nepochs = params['nepochs'] self.nsamples = params['nsamples'] self.n_init_samples = params['n_init_samples'] self.init_nepochs = params['init_nepochs'] self.cut_off = params['cut_off'] self.beta = params['beta'] self.nr_mix = params['nr_mix'] self.base_fn = params['base_fn'] self.only_pos = params['only_positive'] # whether to run 1000 epochs of training for the later round of iteration self.full_training = params['full_training_last'] self.input_scale = params['input_scale'] self.fixed_sigma = params.get('fixed_sigma', None) self.on_policy = params.get('on_policy', False) self.problem_type = params.get('problem_type', 'csp') self.allow_repeated = params.get('allow_repeated', False) self.allow_repeated = self.on_policy or self.allow_repeated self.important_sampling = params.get('important_sampling', False) self.visited_dist: Optional[nn.Module] = None self.visited_fixed_sigma = params.get('visited_fixed_sigma', None) self.visited_nr_mix = params.get('visited_nr_mix', None) self.explore_coeff = params.get('explore_coeff', None) self.nepoch_visited = params.get('nepoch_visited', -1) self.normalize_weight = params.get('normalize_weight', True) self.add_ent_before_norm = params.get( 'add_entropy_before_normalization', False) self.weight_type = params.get('weight_type', 'ind') self.model_visited = self.explore_coeff is not None or self.important_sampling if self.model_visited and self.nepoch_visited == -1: raise ValueError( 'nepoch_visited should be specified when a model is ' 'learning visited states') self.init_buffer_paths = init_buffer_path eval_fn = params['eval_fn'] try: self.fn = registered_functions[eval_fn] except KeyError: raise ValueError(f'{eval_fn} is not a valid benchmark function') self.device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') print(f'device: {self.device}') self.cpu = torch.device('cpu') self.model: Optional[nn.Module] = None self.buffer = None self.opt = None # hacky version of passing input vectors around self.input_vectors_norm = [ np.linspace(start=-1.0, stop=1.0, dtype='float32', num=100) for _ in range(self.ndim) ] self.input_vectors = [ self.input_scale * vec for vec in self.input_vectors_norm ] # TODO: remove this hacky way of keeping track of delta self.delta = self.input_vectors_norm[0][-1] - self.input_vectors_norm[ 0][-2] # keep track of lo and hi for indicies self.params_min = np.array([0] * self.ndim) self.params_max = np.array([len(x) - 1 for x in self.input_vectors]) self.fvals = SortedList()