def extract_from_dataset(dataset, index=None, topK=None): ''' Args: dataet : 代提取的数据集 idx : 提取 dataset 返回数据的第几个 如果只有一个数据则不填 ''' s = len(dataset) if topK is not None: s = topK with _tqdm(total=s) as pbar: pbar.set_description_str("Loading images") for idx, data in enumerate(dataset): if idx == 0: if index is None: result = data else: result = data[index] else: if index is None: result = _torch.cat((result, data), dim=0) else: result = _torch.cat((result, data[index]), dim=0) pbar.update(1) if topK is not None and topK == idx + 1: break pbar.set_description_str("Finished") return result
def test(self, test_loader=None): if test_loader is not None: self._testset = test_loader with _torch.no_grad(): self._before_test_epoch_start() self.test_epoch_start() epoch_outputs = [] with _tqdm(dynamic_ncols=True, total=len(self._testset)) as bar: bar.set_description(f"Epoch={self.trained_epochs} Testing") for idx, batch in enumerate(self._testset): batch = self._type_transfer(batch) for optimizer_idx in range(len(self._optimizers)): step_out = self.test_step(batch, idx, optimizer_idx) epoch_outputs.append(step_out) loss = None if isinstance(step_out, dict): loss = step_out['loss'] elif isinstance(step_out, _torch.Tensor): loss = step_out else: raise TypeError( "what training_step return should be Tensor or dict with key=loss with and its value type is Tensor" ) bar.set_postfix_str("loss={:.3}".format(loss.item())) bar.update(1) bar.set_description(f"Epoch={self.trained_epochs} Tested") self.test_epoch_end(epoch_outputs) return self._after_test_epoch_end(epoch_outputs)
def batch_image_generator(generator, noise_z, num_batch=50, batch_size=256, conditional=False, classes=None, best_size=30): with _tqdm(total=num_batch) as pbar: pbar.set_description_str("Saving images") if best_size > num_batch: best_size = num_batch results = _batch_image_generator(generator, noise_z, best_size, batch_size, conditional, classes) pbar.update(best_size) num_batch -= best_size while num_batch >= best_size: out = _batch_image_generator(generator, noise_z, best_size, batch_size, conditional, classes) results = _torch.cat((results, out), dim=0) pbar.update(best_size) num_batch -= best_size if num_batch > 0: out = _batch_image_generator(generator, noise_z, num_batch, batch_size, conditional, classes) results = _torch.cat((results, out), dim=0) pbar.update(num_batch) pbar.set_description_str("Finished") return results
def get_archive_mp3s(self, archive_entries, filepath): start = _timer() earliest_download = min([ entry['start_time'] for entry in archive_entries ]).strftime('%m-%d-%y %H:%M') latest_download = max([ entry['start_time'] for entry in archive_entries ]).strftime('%m-%d-%y %H:%M') t = _tqdm(archive_entries, desc='Overall progress', leave=True, dynamic_ncols=True) t.write(f'Downloading {earliest_download} to {latest_download}') t.write(f'Storing at {filepath}.') for file in t: feed_id = self._parent.feed_id archive_uri = file['uri'] file_date = self._format_entry_date(file['end_time']) # Build the path for saving the downloaded .mp3 out_file_name = filepath + '-'.join([feed_id, file_date]) + '.mp3' # Get the URL of the mp3 file mp3_soup = self.get_download_soup(archive_uri) file_url = self._parse_mp3_path(mp3_soup) self._fetch_mp3([out_file_name, file_url], t)
def _fetch_mp3(self, entry, main_progress_bar): path, url = entry file_name = url.split('/')[-1] if not _os.path.exists(path): self._parent.throttle.throttle('file') r = _requests.get(url, stream=True) file_size = int(r.headers['Content-Length']) t = _tqdm(total=file_size, desc=f'Downloading {file_name}', dynamic_ncols=True) if r.status_code == 200: self._parent.throttle.got_last_file = True with open(path, 'wb') as f: for chunk in r: f.write(chunk) t.update(len(chunk)) elif r.status_code == 403: t.write( f'\tReceived 403 on {file_name}. Archive file does not ' f' exist. Skipping.') else: t.write(f'\tCould not retrieve {url} (code {r.status_code}' f'). Skipping.') else: main_progress_bar.write(f'\t{file_name} already exists. Skipping.')
def multiprocess_interpolate(input, caches, max_process=32, size=None, scale_factor=None, mode='bilinear', align_corners=False): delta = len(input) // max_process + 1 processes = [] for idx in range(max_process): data = input[idx * delta:(idx + 1) * delta] cache = caches[idx * delta:(idx + 1) * delta] p = _processes.Process(target=_interpolate, args=(data, cache, size, scale_factor, mode, align_corners)) processes.append(p) for p in processes: p.start() print(f"{max_process} processes have been started", file=_sys.stderr) with _tqdm(total=len(processes)) as pbar: pbar.set_description_str("Executing") for p in processes: p.join() pbar.update(1) pbar.set_description_str("Executed") print("All the images have benn interpolated in caches", file=_sys.stderr)
def tqdm(itr=None, **kwargs): color = _get_color.color() # if itr is not None: # if len(list(itr)) == 0: # return itr return (_tqdm(itr, colour=color, **kwargs) if threeML_config.interface.progress_bars else itr)
def thread_save_image(tensor, base_dir, file_type: str = "jpg", normalize=True, max_threads=32, prefix="", suffix="", base_num=1, placeholder="0", just_length=10, batch_size=2048): ''' Parameters: tensor: images shape as : BxCxLxH base_dir: base directory of image will be saved file_type: save type prefix: file name prefix suffix: file name suffix base_num: file name is a number string and increase from base_num just_length: the length of string batch_size: how many images should be saved in every process ''' s = len(tensor) max_threads = min(64, max_threads) if s < batch_size * max_threads: batch_size = s // max_threads semaphor = _threading.Semaphore(value=max_threads) length = s // batch_size + 1 if s % batch_size == 0: length -= 1 with _tqdm(total=length) as pbar: pbar.set_description_str("Saving") for idx in range(length): if len(str(base_num + idx)) > just_length: raise RuntimeError( f"the length of {base_num + idx} > {just_length}") semaphor.acquire() thread = _threading.Process( target=_thread_save_image, args=(tensor[idx * batch_size:(idx + 1) * batch_size], base_dir, file_type, normalize, prefix, suffix, base_num + batch_size * idx, placeholder, just_length)) thread.start() pbar.update(1) semaphor.release() thread.join() pbar.set_description_str("Finished") print("All images will be saved after a few seconds ...", file=_sys.stderr)
def images_normalize(tensor, max_process=32): delta = len(tensor) // max_process + 1 processes = [] for idx in range(max_process): data = tensor[idx * delta:(idx + 1) * delta] p = _processes.Process(target=_normalize_data, args=(data, )) processes.append(p) for p in processes: p.start() print(f"{max_process} processes have been started", file=_sys.stderr) with _tqdm(total=len(processes)) as pbar: pbar.set_description_str("Executing") for p in processes: p.join() pbar.update(1) pbar.set_description_str("Executed") return tensor.mul(255).add_(0.5).clamp_(0, 255).permute(0, 2, 3, 1).to('cpu', _torch.uint8).numpy()
def genFibreH5(cellSize, hkl_str, uni_hkls_idx, symHKL_loop, xyz_pf, omega, qgrid, od): """ wrapper """ if not _os.path.exists('fibres.h5'): f = _h5.File('fibres.h5', 'w') else: f = _h5.File('fibres.h5', 'r+') f.close() hkl_loop_str = _np.array(hkl_str)[uni_hkls_idx] for hi, hfam in _tqdm(enumerate(symHKL_loop)): _calcFibreHDF5( hfam, xyz_pf, omega, qgrid, od, 'fibres.h5', hkl_loop_str[hi] + '_' + str(int(round(_np.rad2deg(cellSize))))) return
def eval(self, train=False): self.model.eval() loader = self.train_loader if not train: assert self.test_loader is not None, "test_loader is None , "\ "please pass test_loader first : clf_eval.test_loader = test_loader" loader = self.test_loader s = 0 a = 0 with _tqdm(total=len(loader)) as pbar: pbar.set_description("Evaluating") for data in loader: pbar.update(1) out, labels = self.step(batch_data = data) t_a, t_s = self.computer_acc(out, labels) a += t_a s += t_s pbar.set_description("Evaluated") self.model.train() return a / s
def optimize(self, maximize: Union[str, Callable[[pd.Series], float]] = 'SQN', constraint: Callable[[dict], bool] = None, return_heatmap: bool = False, **kwargs) -> Union[pd.Series, Tuple[pd.Series, pd.Series]]: """ Optimize strategy parameters to an optimal combination using parallel exhaustive search. Returns result `pd.Series` of the best run. `maximize` is a string key from the `backtesting.backtesting.Backtest.run`-returned results series, or a function that accepts this series object and returns a number; the higher the better. By default, the method maximizes Van Tharp's [System Quality Number](https://google.com/search?q=System+Quality+Number). `constraint` is a function that accepts a dict-like object of parameters (with values) and returns `True` when the combination is admissible to test with. By default, any parameters combination is considered admissible. If `return_heatmap` is `True`, besides returning the result series, an additional `pd.Series` is returned with a multiindex of all admissible parameter combinations, which can be further inspected or projected onto 2D to plot a heatmap (see `backtesting.lib.plot_heatmaps()`). Additional keyword arguments represent strategy arguments with list-like collections of possible values. For example, the following code finds and returns the "best" of the 7 admissible (of the 9 possible) parameter combinations: backtest.optimize(sma1=[5, 10, 15], sma2=[10, 20, 40], constraint=lambda p: p.sma1 < p.sma2) .. TODO:: Add parameter `max_tries: Union[int, float] = None` which switches from exhaustive grid search to random search. See notes in the source. .. TODO:: Improve multiprocessing/parallel execution on Windos with start method 'spawn'. """ if not kwargs: raise ValueError('Need some strategy parameters to optimize') if isinstance(maximize, str): stats = self._results if self._results is not None else self.run() if maximize not in stats: raise ValueError( '`maximize`, if str, must match a key in pd.Series ' 'result of backtest.run()') def maximize(stats: pd.Series, _key=maximize): return stats[_key] elif not callable(maximize): raise TypeError( '`maximize` must be str (a field of backtest.run() result ' 'Series) or a function that accepts result Series ' 'and returns a number; the higher the better') if constraint is None: def constraint(_): return True elif not callable(constraint): raise TypeError( "`constraint` must be a function that accepts a dict " "of strategy parameters and returns a bool whether " "the combination of parameters is admissible or not") def _tuple(x): return x if isinstance( x, Sequence) and not isinstance(x, str) else (x, ) class AttrDict(dict): def __getattr__(self, item): return self[item] param_combos = tuple( map( dict, # back to dict so it pickles filter( constraint, # constraints applied on our fancy dict map( AttrDict, product(*(zip(repeat(k), _tuple(v)) for k, v in kwargs.items())))))) if not param_combos: raise ValueError('No admissible parameter combinations to test') if len(param_combos) > 300: warnings.warn('Searching best of {} configurations.'.format( len(param_combos)), stacklevel=2) heatmap = pd.Series(np.nan, index=pd.MultiIndex.from_tuples( [p.values() for p in param_combos], names=next(iter(param_combos)).keys())) # TODO: add parameter `max_tries:Union[int, float]=None` which switches # exhaustive grid search to random search. This might need to avoid # returning NaNs in stats on runs with no trades to differentiate those # from non-tested parameter combos in heatmap. def _batch(seq): n = np.clip(len(seq) // (os.cpu_count() or 1), 5, 300) for i in range(0, len(seq), n): yield seq[i:i + n] # Save necessary objects into "global" state; pass into concurrent executor # (and thus pickle) nothing but two numbers; receive nothing but numbers. # With start method "fork", children processes will inherit parent address space # in a copy-on-write manner, achieving better performance/RAM benefit. backtest_uuid = np.random.random() param_batches = list(_batch(param_combos)) Backtest._mp_backtests[backtest_uuid] = (self, param_batches, maximize) try: # If multiprocessing start method is 'fork' (i.e. on POSIX), use # a pool of processes to compute results in parallel. # Otherwise (i.e. on Windos), sequential computation will be "faster". if mp.get_start_method(allow_none=False) == 'fork': with ProcessPoolExecutor() as executor: futures = [ executor.submit(Backtest._mp_task, backtest_uuid, i) for i in range(len(param_batches)) ] for future in _tqdm(as_completed(futures), total=len(futures)): batch_index, values = future.result() for value, params in zip(values, param_batches[batch_index]): heatmap[tuple(params.values())] = value else: if os.name == 'posix': warnings.warn( "For multiprocessing support in `Backtest.optimize()` " "set multiprocessing start method to 'fork'.") for batch_index in _tqdm(range(len(param_batches))): _, values = Backtest._mp_task(backtest_uuid, batch_index) for value, params in zip(values, param_batches[batch_index]): heatmap[tuple(params.values())] = value finally: del Backtest._mp_backtests[backtest_uuid] best_params = heatmap.idxmax() if pd.isnull(best_params): # No trade was made in any of the runs. Just make a random # run so we get some, if empty, results self.run(**param_combos[0]) else: # Re-run best strategy so that the next .plot() call will render it self.run(**dict(zip(heatmap.index.names, best_params))) if return_heatmap: return self._results, heatmap return self._results
def wimv(pfs, orient_dist, iterations=12): """ perform WIMV inversion fixed grid in PF space requiredpointer # TODO: remove requirement to pre-generate odf input: exp_pfs : poleFigure object orient_dist: orientDist object iterations : number of iterations """ """ calculate pointer """ orient_dist._calcPointer('wimv', pfs) """ done with pointer generation """ od_data = _np.ones(orient_dist.bungeList.shape[0] * orient_dist.bungeList.shape[1] * orient_dist.bungeList.shape[2]) calc_od = {} recalc_pf = {} numPoles = pfs._numHKL numHKLs = [len(fam) for fam in pfs._symHKL] fullPFgrid = pfs.genGrid(pfs.res, radians=True, centered=False) for i in _tqdm(range(iterations), desc='Performing WIMV iterations', position=0, leave=True): """ first iteration, skip recalc of PF """ if i == 0: #first iteration is direct from PFs od_data = _np.ones(orient_dist.bungeList.shape[0] * orient_dist.bungeList.shape[1] * orient_dist.bungeList.shape[2]) calc_od[0] = _np.zeros((od_data.shape[0], numPoles)) for fi in range(numPoles): for pf_cell in _np.ravel(fullPFgrid): if pf_cell in orient_dist.pointer['full']['pf to od'][fi]: od_cells = _np.array(orient_dist.pointer['full'] ['pf to od'][fi][pf_cell]) ai, bi = _np.divmod(pf_cell, fullPFgrid.shape[1]) if pf_cell < pfs.data[fi].shape[0] * pfs.data[ fi].shape[1]: #inside of measured PF range od_data[od_cells.astype(int)] *= pfs.data[fi][ int(ai), int(bi)] """ loop over od_cells (alternative) """ # for od_cell in _np.ravel(orient_dist.bungeList): # pf_cells = orient_dist.pointer['full']['od to pf'][fi][od_cell] # pf_cellMax = pfs.data[fi].shape[0]*pfs.data[fi].shape[1] # pf_cells = pf_cells[pf_cells < pf_cellMax] # ai, bi = _np.divmod(pf_cells, fullPFgrid.shape[1]) # od_data[int(od_cell)] = _np.product( pfs.data[fi][ai.astype(int),bi.astype(int)] ) calc_od[0][:, fi] = _np.power(od_data, (1 / numHKLs[fi])) # calc_od[0][:,fi] = _np.power(od_data,1) calc_od[0] = _np.product(calc_od[0], axis=1)**(1 / numPoles) #place into OD object calc_od[0] = _bunge(orient_dist.res, orient_dist.cs, orient_dist.ss, weights=calc_od[0]) calc_od[0].normalize() """ recalculate pole figures """ recalc_pf[i] = _np.zeros( (fullPFgrid.shape[0], fullPFgrid.shape[1], numPoles)) for fi in range(numPoles): for pf_cell in _np.ravel(fullPFgrid): if pf_cell in orient_dist.pointer['full']['pf to od'][ fi]: #pf_cell is defined od_cells = _np.array( orient_dist.pointer['full']['pf to od'][fi][pf_cell]) ai, bi = _np.divmod(pf_cell, fullPFgrid.shape[1]) recalc_pf[i][int(ai), int(bi), fi] = (1 / len(od_cells)) * _np.sum( calc_od[i].weights[od_cells.astype(int)]) recalc_pf[i] = _poleFigure(recalc_pf[i], pfs.hkls, orient_dist.cs, 'recalc', resolution=5) recalc_pf[i].normalize() """ compare recalculated to experimental """ RP_err = {} prnt_str = None _np.seterr(divide='ignore') for fi in range(numPoles): expLim = pfs.data[fi].shape RP_err[fi] = _np.abs( recalc_pf[i].data[fi][:expLim[0], :expLim[1]] - pfs.data[fi]) / recalc_pf[i].data[fi][:expLim[0], :expLim[1]] RP_err[fi][_np.isinf(RP_err[fi])] = 0 RP_err[fi] = _np.sqrt(_np.mean(RP_err[fi]**2)) if prnt_str is None: prnt_str = 'RP Error: {:.4f}'.format( _np.round(RP_err[fi], decimals=4)) else: prnt_str += ' | {:.4f}'.format( _np.round(RP_err[fi], decimals=4)) _tqdm.write(prnt_str) """ (i+1)th inversion """ od_data = _np.ones(orient_dist.bungeList.shape[0] * orient_dist.bungeList.shape[1] * orient_dist.bungeList.shape[2]) calc_od[i + 1] = _np.zeros((od_data.shape[0], numPoles)) for fi in range(numPoles): for pf_cell in _np.ravel(fullPFgrid): if pf_cell in orient_dist.pointer['full']['pf to od'][fi]: od_cells = _np.array( orient_dist.pointer['full']['pf to od'][fi][pf_cell]) ai, bi = _np.divmod(pf_cell, fullPFgrid.shape[1]) if pf_cell < pfs.data[fi].shape[0] * pfs.data[fi].shape[ 1]: #inside of measured PF range if recalc_pf[i].data[fi][int(ai), int(bi)] == 0: continue else: od_data[od_cells.astype(int)] *= ( pfs.data[fi][int(ai), int(bi)] / recalc_pf[i].data[fi][int(ai), int(bi)]) """ loop over od_cells (alternative) """ # for od_cell in _tqdm(_np.ravel(orient_dist.bungeList)): # pf_cells = orient_dist.pointer['full']['od to pf'][fi][od_cell] # pf_cellMax = pfs.data[fi].shape[0]*pfs.data[fi].shape[1] # pf_cells = pf_cells[pf_cells < pf_cellMax] # ai, bi = _np.divmod(pf_cells, fullPFgrid.shape[1]) # od_data[int(od_cell)] = _np.product( pfs.data[fi][ai.astype(int),bi.astype(int)] / recalc_pf[i].data[fi][ai.astype(int), bi.astype(int)] ) calc_od[i + 1][:, fi] = _np.power(od_data, (1 / numHKLs[fi])) calc_od[i + 1] = calc_od[i].weights * _np.power( _np.product(calc_od[i + 1], axis=1), (0.8 / numPoles)) #place into OD object calc_od[i + 1] = _bunge(orient_dist.res, orient_dist.cs, orient_dist.ss, weights=calc_od[i + 1]) calc_od[i + 1].normalize() return recalc_pf, calc_od
def e_wimv(pfs, orient_dist, tube_rad, tube_exp, rad_type, crystal_dict, iterations=12, ret_origOD=False): """ perform e-WIMV inversion arbitrary PF directions allowed minimium entropy solution input: exp_pfs : poleFigure object orient_dist : orientDist object rad_type : xrd or nd crystal_dict : dictionary defining variables for reflection weight calculators in pyTex.diffrac """ # rotations around y (integration variable along path) phi = _np.linspace(0, 2 * _np.pi, 73) _np.seterr(divide='ignore') # handle reflection weights if rad_type == 'xrd': pass #TODO: implement this # elif rad_type == 'nd': refl_wgt = _calc_NDreflWeights(crystal_dict, pfs.refls) #based on ND elif rad_type == 'nd': refl_wgt = _np.ones((len(pfs.hkls))) elif rad_type == 'none': refl_wgt = _np.ones((len(pfs.hkls))) #all ones else: raise ValueError('Please specify either xrd or nd or none (all = 1)') """ calculate 5x5 pf grid XYZ for paths """ fullPFgrid, alp, bet, xyz_pf = pfs.genGrid(res=_np.deg2rad(5), radians=True, centered=False, ret_ab=True, ret_xyz=True, offset=True) """ use sklearn KDTree for reduction of points for query (euclidean) """ #throw q_grid into positive hemisphere (SO3) for euclidean distance qgrid_pos = _np.copy(orient_dist.q_grid) qgrid_pos[qgrid_pos[:, 0] < 0] *= -1 tree = _KDTree(qgrid_pos) #gnomic rotation angle rad = _np.sqrt(2 * (1 - _np.cos(0.5 * tube_rad))) #euclidean rotation angle euc_rad = _np.sqrt(4 * _np.sin(0.25 * tube_rad)**2) #calculate arbitrary paths orient_dist._calcPath('arb', pfs._normHKLs, pfs.y, phi, rad, euc_rad, tree) """ search for unique hkls to save time during path calculation """ hkls_loop, uni_hkls_idx, hkls_loop_idx = _np.unique(_normalize( _np.array(pfs.hkls)), axis=0, return_inverse=True, return_index=True) if len(uni_hkls_idx) < len(pfs.hkls): #time can be saved by only calculating paths for unique reflections # symHKL_loop = _symmetrise(orient_dist.cs, hkls_loop) # symHKL_loop = _normalize(symHKL_loop) #calculate paths orient_dist._calcPath('full_trun', hkls_loop, xyz_pf, phi, rad, euc_rad, tree, hkls_loop_idx=hkls_loop_idx) #time can't be saved.. calculate all paths else: orient_dist._calcPath('full', pfs._normHKLs, xyz_pf, phi, rad, euc_rad, tree) """ calculate pointer """ orient_dist._calcPointer('e-wimv', pfs, tube_exp=tube_exp) """ e-wimv iterations """ od_data = _np.ones(orient_dist.bungeList.shape[0] * orient_dist.bungeList.shape[1] * orient_dist.bungeList.shape[2]) calc_od = {} recalc_pf = {} rel_err = {} recalc_pf_full = {} numPoles = pfs._numHKL numHKLs = [len(fam) for fam in pfs._symHKL] for i in _tqdm(range(iterations), position=0, desc='Performing E-WIMV iterations'): """ first iteration, skip recalc of PF """ if i == 0: #first iteration is direct from PFs od_data = _np.ones(orient_dist.bungeList.shape[0] * orient_dist.bungeList.shape[1] * orient_dist.bungeList.shape[2]) calc_od[0] = _np.ones((od_data.shape[0], numPoles)) for fi in range(numPoles): temp = _np.ones( (orient_dist.bungeList.shape[0] * orient_dist.bungeList.shape[1] * orient_dist.bungeList.shape[2], len(pfs.y[fi]))) for yi in range(len(pfs.y[fi])): #check for zero OD cells that correspond to the specified pole figure direction if yi in orient_dist.pointer['arb']['pf to od'][fi]: od_cells = orient_dist.pointer['arb']['pf to od'][fi][ yi]['cell'] wgts = orient_dist.pointer['arb']['pf to od'][fi][yi][ 'weight'] temp[od_cells.astype(int), yi] *= abs(pfs.data[fi][yi]) """ zero to 1E-5 """ temp = _np.where(temp == 0, 1E-5, temp) """ log before sum instead of product """ temp = _np.log(temp) n = _np.count_nonzero(temp, axis=1) n = _np.where(n == 0, 1, n) try: calc_od[0][:, fi] = _np.exp( (_np.sum(temp, axis=1) * refl_wgt[fi]) / numHKLs[fi]) except: print(temp) print(refl_wgt[fi]) print(fi) print(yi) calc_od[0] = _np.product(calc_od[0], axis=1)**(1 / numPoles) #place into OD object calc_od[0] = _bunge(orient_dist.res, orient_dist.cs, orient_dist.ss, weights=calc_od[0]) calc_od[0].normalize() """ recalculate poles """ recalc_pf[i] = {} for fi in range(numPoles): recalc_pf[i][fi] = _np.zeros(len(pfs.y[fi])) for yi in range(len(pfs.y[fi])): if yi in orient_dist.pointer['arb']['pf to od'][ fi]: #pf_cell is defined od_cells = _np.array( orient_dist.pointer['arb']['pf to od'][fi][yi]['cell']) #( 1 / (2*_np.pi) ) * recalc_pf[i][fi][yi] = (1 / (2 * _np.pi)) * ( 1 / sum(orient_dist.pointer['arb']['pf to od'][fi][yi] ['weight'])) * _np.sum( orient_dist.pointer['arb']['pf to od'][fi] [yi]['weight'] * calc_od[i].weights[od_cells.astype(int)]) """ compare recalculated to experimental """ prnt_str = None rel_err[i] = {} _np.seterr(divide='ignore') if numPoles < 5: iter_num = numPoles else: iter_num = 5 for fi in range(iter_num): # display only first three poles error rel_err[i][fi] = _np.abs(recalc_pf[i][fi] - pfs.data[fi]) / recalc_pf[i][fi] rel_err[i][fi][_np.isinf(rel_err[i][fi])] = 0 rel_err[i][fi] = _np.sqrt(_np.mean(rel_err[i][fi]**2)) if prnt_str is None: prnt_str = 'RP Error: {:.4f}'.format( _np.round(rel_err[i][fi], decimals=4)) else: prnt_str += ' | {:.4f}'.format( _np.round(rel_err[i][fi], decimals=4)) _tqdm.write(prnt_str) """ recalculate full pole figures """ ##for reduced grid # recalc_pf_full[i] = {} #for 5x5 grid recalc_pf_full[i] = _np.zeros( (fullPFgrid.shape[0], fullPFgrid.shape[1], numPoles)) for fi in range(numPoles): ##for reduced grid # recalc_pf_full[i][fi] = _np.zeros(len(xyz_pf)) # for yi in range(len(xyz_pf)): for yi in _np.ravel(fullPFgrid): if yi in orient_dist.pointer['full']['pf to od'][ fi]: #pf_cell is defined od_cells = _np.array(orient_dist.pointer['full'] ['pf to od'][fi][yi]['cell']) ##for reduced grid # recalc_pf_full[i][fi][yi] = ( 1 / _np.sum(orient_dist.pointer['full']['pf to od'][fi][yi]['weight']) ) * _np.sum( orient_dist.pointer['full']['pf to od'][fi][yi]['weight'] * calc_od[i].weights[od_cells.astype(int)] ) #for 5x5 grid ai, bi = _np.divmod(yi, fullPFgrid.shape[1]) recalc_pf_full[i][int(ai), int(bi), fi] = (1 / _np.sum( orient_dist.pointer['full']['pf to od'][fi][yi] ['weight'])) * _np.sum( orient_dist.pointer['full']['pf to od'][fi][yi] ['weight'] * calc_od[i].weights[od_cells.astype(int)]) #for reduced grid # recalc_pf_full[i] = _poleFigure(recalc_pf_full[i], pfs.hkls, orient_dist.cs, 'recalc', resolution=5, arb_y=xyz_pf) #for 5x5 grid recalc_pf_full[i] = _poleFigure(recalc_pf_full[i], pfs.hkls, orient_dist.cs, 'recalc', resolution=5) recalc_pf_full[i].normalize() if i == 0: pass #terminate early, error increased elif rel_err[i][0] >= rel_err[i - 1][0]: break """ (i+1)th inversion """ od_data = _np.ones(orient_dist.bungeList.shape[0] * orient_dist.bungeList.shape[1] * orient_dist.bungeList.shape[2]) calc_od[i + 1] = _np.zeros((od_data.shape[0], numPoles)) for fi in range(numPoles): temp = _np.ones((orient_dist.bungeList.shape[0] * orient_dist.bungeList.shape[1] * orient_dist.bungeList.shape[2], len(pfs.y[fi]))) for yi in range(len(pfs.y[fi])): #check for zero OD cells that correspond to the specified pole figure direction if yi in orient_dist.pointer['arb']['pf to od'][fi]: od_cells = orient_dist.pointer['arb']['pf to od'][fi][yi][ 'cell'] wgts = orient_dist.pointer['arb']['pf to od'][fi][yi][ 'weight'] if recalc_pf[i][fi][yi] == 0: continue else: temp[od_cells.astype(int), yi] = (abs(pfs.data[fi][yi]) / recalc_pf[i][fi][yi]) """ zero to 1E-5 """ temp = _np.where(temp == 0, 1E-5, temp) """ log sum """ temp = _np.log(temp) n = _np.count_nonzero(temp, axis=1) n = _np.where(n == 0, 1, n) calc_od[i + 1][:, fi] = _np.exp( (_np.sum(temp, axis=1) * refl_wgt[fi]) / numHKLs[fi]) calc_od[i + 1] = calc_od[i].weights * _np.power( _np.product(calc_od[i + 1], axis=1), (1 / numPoles)) #place into OD object calc_od[i + 1] = _bunge(orient_dist.res, orient_dist.cs, orient_dist.ss, weights=calc_od[i + 1]) calc_od[i + 1].normalize() if ret_origOD: return recalc_pf_full, calc_od, orient_dist else: return recalc_pf_full, calc_od
def fit(self, epochs, prog_bar_refresh_rate=1, val_every_n_epoch=1): self._data_init(prog_bar_refresh_rate, val_every_n_epoch) length = len(self._trainset) with _tqdm(dynamic_ncols=True, total=len(self._trainset)) as pbar: for i in range(epochs): self.training_epoch_start() self.on_epoch_start() epoch_outputs = [] pbar.set_description_str( f"Epoch={1 + self.trained_epochs} step[{i + 1}/{epochs}]") for idx, batch in enumerate(self._trainset): batch = self._type_transfer(batch) for optimizer_idx in range(len(self._optimizers)): step_out = self.training_step(batch, idx, optimizer_idx) if step_out is None: break epoch_outputs.append(step_out) loss = None if isinstance(step_out, dict): loss = step_out['loss'] elif isinstance(step_out, _torch.Tensor): loss = step_out else: raise TypeError( "what training_step return should be Tensor or dict with key=loss with and its value type is Tensor" ) self.optimizer_step(loss, self._optimizers[optimizer_idx], optimizer_idx=optimizer_idx) self.trained_steps += 1 self._bar_show(pbar) if ( idx + 1 ) % self.prog_bar_refresh_rate == 0 or idx + 1 == length: if idx - 1 == length: pbar.update(idx % self.prog_bar_refresh_rate + 1) else: pbar.update(self.prog_bar_refresh_rate) for optimizer_idx in range(len(self._optimizers)): if self._lr_scheduler_enabled and len( self._lr_schedulers) >= optimizer_idx: self._lr_schedulers[optimizer_idx].step() if i < epochs - 1: pbar.update(-length) self.training_epoch_end(epoch_outputs) if ( i + 1 ) % self.val_every_n_epoch == 0 and self._validation_enbale: epoch_outputs = [] self._before_validation_epoch_start() self.validation_epoch_start() with _torch.no_grad(): with _tqdm(dynamic_ncols=True, total=len(self._valset)) as bar: bar.set_description( f"Epoch={self.trained_epochs} Validating") for idx, batch in enumerate(self._valset): batch = self._type_transfer(batch) for optimizer_idx in range( len(self._optimizers)): step_out = self.validation_step( batch, idx, optimizer_idx) epoch_outputs.append(step_out) loss = None if isinstance(step_out, dict): loss = step_out['loss'] elif isinstance(step_out, _torch.Tensor): loss = step_out else: raise TypeError( "what test_step return should be Tensor or dict with key=loss with and its value type is Tensor" ) bar.set_postfix_str("loss={:.3}".format( loss.item())) bar.update(1) bar.set_description( f"Epoch={self.trained_epochs + 1} Validated") self.validation_epoch_end(epoch_outputs) self._after_validation_epoch_end() self.on_epoch_end() self.trained_epochs += 1
def optimize(self, maximize: Union[str, Callable[[pd.Series], float]] = 'SQN', constraint: Callable[[dict], bool] = None, return_heatmap: bool = False, **kwargs) -> Union[pd.Series, Tuple[pd.Series, pd.Series]]: """ Optimize strategy parameters to an optimal combination using parallel exhaustive search. Returns result `pd.Series` of the best run. `maximize` is a string key from the `backtesting.backtesting.Backtest.run`-returned results series, or a function that accepts this series object and returns a number; the higher the better. By default, the method maximizes Van Tharp's [System Quality Number](https://google.com/search?q=System+Quality+Number). `constraint` is a function that accepts a dict-like object of parameters (with values) and returns `True` when the combination is admissible to test with. By default, any parameters combination is considered admissible. If `return_heatmap` is `True`, besides returning the result series, an additional `pd.Series` is returned with a multiindex of all admissible parameter combinations, which can be further inspected or projected onto 2D to plot a heatmap. Additional keyword arguments represent strategy arguments with list-like collections of possible values. For example, the following code finds and returns the "best" of the 7 admissible (of the 9 possible) parameter combinations: backtest.optimize(sma1=[5, 10, 15], sma2=[10, 20, 40], constraint=lambda p: p.sma1 < p.sma2) .. TODO:: Add parameter `max_tries: Union[int, float] = None` which switches from exhaustive grid search to random search. See notes in the source. """ if not kwargs: raise ValueError('Need some strategy parameters to optimize') if isinstance(maximize, str): stats = self._results if self._results is not None else self.run() if maximize not in stats: raise ValueError( '`maximize`, if str, must match a key in pd.Series ' 'result of backtest.run()') def maximize(stats: pd.Series, _key=maximize): return stats[_key] elif not callable(maximize): raise TypeError( '`maximize` must be str (a field of backtest.run() result ' 'Series) or a function that accepts result Series ' 'and returns a number; the higher the better') if constraint is None: def constraint(_): return True elif not callable(constraint): raise TypeError( "`constraint` must be a function that accepts a dict " "of strategy parameters and returns a bool whether " "the combination of parameters is admissible or not") def _tuple(x): return x if isinstance( x, Sequence) and not isinstance(x, str) else (x, ) class AttrDict(dict): def __getattr__(self, item): return self[item] param_combos = tuple( map( dict, # back to dict so it pickles filter( constraint, # constraints applied on our fancy dict map( AttrDict, product(*(zip(repeat(k), _tuple(v)) for k, v in kwargs.items())))))) if not param_combos: raise ValueError('No admissible parameter combinations to test') if len(param_combos) > 300: warnings.warn('Searching best of {} configurations.'.format( len(param_combos)), stacklevel=2) heatmap = pd.Series(np.nan, index=pd.MultiIndex.from_tuples( [p.values() for p in param_combos], names=next(iter(param_combos)).keys())) # TODO: add parameter `max_tries:Union[int, float]=None` which switches # exhaustive grid search to random search. This might need to avoid # returning NaNs in stats on runs with no trades to differentiate those # from non-tested parameter combos in heatmap. def _batch(seq): n = np.clip(len(param_combos) // (os.cpu_count() or 1), 5, 300) for i in range(0, len(seq), n): yield seq[i:i + n] with ProcessPoolExecutor() as executor: futures = [ executor.submit(self._mp_task, params) for params in _batch(param_combos) ] for future in _tqdm(as_completed(futures), total=len(futures)): for params, stats in future.result(): heatmap[tuple(params.values())] = maximize(stats) best_params = heatmap.idxmax() if pd.isnull(best_params): # No trade was made in any of the runs. Just make a random # run so we get some, if empty, results self.run(**param_combos[0]) else: # Re-run best strategy so that the next .plot() call will render it self.run(**dict(zip(heatmap.index.names, best_params))) if return_heatmap: return self._results, heatmap return self._results
def build(self, start=None, end=None, days_back=None, chronological=False, rebuild=False): """ Build archive entry data for the BroadcastifyArchive's feed_id and populate as a dictionary to the .entries attribute. Parameters ---------- start : datetime.date The earliest date for which to populate the archive. If None, go from the earliest date on the calendar (inclusive). end : datetime.date The latest date for which to populate the archive. If None, go to the latest date on the calendar (inclusive). days_back : int The number of days before the current day to retrieve informa- tion for. A value of `0` retrieves only archive entries corres- ponding to the current day. Pass either days_back OR a valid combination of start/end dates. chronological : bool By default, start with the latest date and work backward in time. If True, reverse that. rebuild : bool Specifies that existing data in the `entries` list should be overwritten with data newly fetched from Broadcastify. """ # Prevent the user from unintentionally erasing existing archive info if self.entries and not rebuild: raise ValueError( f'Archive already built: Entries already exist for' f' this BroadcastifyArchive. To erase and rebuild,' f' specify `rebuild=True` when calling .build()') # Make sure valid arguments were passed ## Either start/end or days_back; not both if (start or end) and days_back: raise ValueError(f'Expected either `days_back` OR a `start`/`end` ' f'combination. Both were passed.') ## `days_back` must be a non-negative integer if days_back is not None: bad_days_back = False try: if days_back < 0: bad_days_back = True except: bad_days_back = True if bad_days_back: raise TypeError(f'`days_back` must be a non-negative integer.') # Capture the archive end date to count back from end = self.end_date # Make sure days_back is no larger than the archive date range size start = self.start_date archive_size = (end - start).days if days_back > archive_size: _warnings.warn( f"The number of days_back passed ({days_back}) " f"exceeds the size of the archive's date range (" f"{archive_size}). Only valid dates will be " f"built.") days_back = archive_size else: ## Check that `start` and `end` within archive's start/end dates ## If they weren't passed, set them to the archive's start/end dates out_of_range = '' if start: if start < self.start_date: out_of_range = (f'start date out of archive range: ' f'{start} < {self.start_date}\n') elif start > self.end_date: out_of_range = (f'start date out of archive range: ' f'{start} > {self.end_date}\n') else: start = self.start_date if end: if end > self.end_date: out_of_range += (f'end date out of archive range: ' f'{end} > {self.end_date}') elif end < self.start_date: out_of_range += (f'end date out of archive range: ' f'{end} < {self.start_date}') else: end = self.end_date if out_of_range: raise AttributeError(out_of_range) ## `start` cannot be > `end` if start > end: raise AttributeError(f'`start` date ({start}) cannot be after ' f'`end` date ({end}).') # Get size of the date range days_back = (end - start).days # Adjust for exclusive end of range() days_back += 1 # Build the list of dates to scrape date_list = sorted( [end - _dt.timedelta(days=x) for x in range(days_back)], reverse=not (chronological)) archive_entries = [] # Spin up a browser and an ArchiveCalendar # Set whether to show browser UI while fetching print('Launching webdriver...') options = _Options() if not self.show_browser_ui: options.add_argument('--headless') options.add_argument('--disable-gpu') with _webdriver.Chrome(executable_path=self.webdriver_path, chrome_options=options) as browser: browser.get(self.archive_url) self.arch_cal = ArchiveCalendar(self, browser) # Get archive entries for each date in list t = _tqdm(date_list, desc=f'Building dates', leave=True, dynamic_ncols=True) for date in t: t.set_description(f'Building {date}', refresh=True) self.arch_cal.go_to_date(date) if self.arch_cal.entries_for_date: archive_entries.extend(self.arch_cal.entries_for_date) # Empty & replace the current archive entries self.entries = [] # Store URIs and end times in the entries attritbute for entry in archive_entries: entry_dict = { 'uri': entry[0], 'start_time': entry[1], 'end_time': entry[2] } self.entries.append(entry_dict) self.earliest_entry = min( [entry['end_time'] for entry in self.entries]).date() self.latest_entry = max([entry['end_time'] for entry in self.entries]).date() print(self)