def __init__(self, image_dir, image_size, patch_size, patch_stride=None, n_refs=1): super(PatchSet, self).__init__() patch_size = make_tuple(patch_size) if not patch_stride: patch_stride = patch_size else: patch_stride = make_tuple(patch_stride) self.root_dir = image_dir self.image_size = image_size self.patch_size = patch_size self.patch_stride = patch_stride self.refs = n_refs self.image_dirs = [p for p in self.root_dir.glob('*') if p.is_dir()] self.num_im_pairs = len(self.image_dirs) # 计算出图像进行分块以后的patches的数目 self.num_patches_x = math.ceil( (image_size[0] - patch_size[0] + 1) / patch_stride[0]) self.num_patches_y = math.ceil( (image_size[1] - patch_size[1] + 1) / patch_stride[1]) self.num_patches = self.num_im_pairs * self.num_patches_x * self.num_patches_y self.transform = im2tensor
def __init__(self, image_dir, image_size, patch_size, num_cache=10, patch_stride=None, n_refs=1): super(PatchSet, self).__init__() patch_size = make_tuple(patch_size) if not patch_stride: patch_stride = patch_size else: patch_stride = make_tuple(patch_stride) self.logger = get_logger() self.root_dir = image_dir self.image_size = image_size self.patch_size = patch_size self.patch_stride = patch_stride self.refs = n_refs self.image_dirs = [p for p in self.root_dir.glob('*') if p.is_dir()] self.num_im_pairs = len(self.image_dirs) self.num_cache = num_cache if self.num_im_pairs > num_cache else self.num_im_pairs # 计算出图像进行分块以后的patches的数目 self.num_patches_x = math.ceil((image_size[0] - patch_size[0] + 1) / patch_stride[0]) self.num_patches_y = math.ceil((image_size[1] - patch_size[1] + 1) / patch_stride[1]) self.num_patches = self.num_im_pairs * self.num_patches_x * self.num_patches_y # self.num_patches = self.num_im_pairs self.transform = im2tensor self.now_index = -1 self.images = [] for i in range(self.num_cache): load_image_pair(self.images, self.image_dirs[i], self.refs) self.now_cache_index = [i for i in range(self.num_cache)] self.now_cache_index_amount_used = [0 for i in range(self.num_im_pairs)]
def minibatches(self, use_cuda=False, batch_size=128): if use_cuda: fnc = lambda x: _tensor_or_none(x, use_cuda) else: fnc = lambda x: x batch_generator = zip(*(minibatch(*fnc(make_tuple(attr)), batch_size=batch_size) if attr is not None else iter_none() for attr in (self.user_ids, self.item_ids, self.ratings, self.timestamps, self.weights, self.context_features))) user_features = fnc(self.user_features) item_features = fnc(self.item_features) for (uids_batch, iids_batch, ratings_batch, timestamps_batch, weights_batch, cf_batch) in batch_generator: yield InteractionsMinibatch( user_ids=uids_batch, item_ids=iids_batch, ratings=ratings_batch, timestamps=timestamps_batch, weights=weights_batch, user_features=_slice_or_none(user_features, uids_batch), item_features=_slice_or_none(item_features, iids_batch), context_features=_slice_or_none(context_features, cf_batch), )
def make_inputs(self, indict): """Extract from indict exactly the inputs needed for self.back_step_fun, process stuff through hetinput first if it's there""" if self.hetinput is not None: outputs_as_tuple = utils.make_tuple( self.hetinput(**{ k: indict[k] for k in self.hetinput_inputs if k in indict })) indict.update( dict(zip(self.hetinput_outputs_order, outputs_as_tuple))) indict_new = { k: indict[k] for k in self.all_inputs - self.inputs_p if k in indict } try: return { **indict_new, **{k + '_p': indict[k] for k in self.inputs_p} } except KeyError as e: print( f'Missing backward variable or Markov matrix {e} for {self.back_step_fun.__name__}!' ) raise
def _check(self): if self.user_ids.max() >= self.num_users: raise ValueError('Maximum user id greater ' 'than declared number of users.') if self.item_ids.max() >= self.num_items: raise ValueError('Maximum item id greater ' 'than declared number of items.') if self.ratings is not None and self.ratings.size != len(self): raise ValueError('Number of ratings incosistent ' 'with number of interactions.') for feature in make_tuple(self.user_features): if feature.shape[0] != self.num_users: raise ValueError('Number of user features not ' 'equal to number of users.') for feature in make_tuple(self.item_features): if feature.shape[0] != self.num_items: raise ValueError('Number of item features not ' 'equal to number of items.') for feature in make_tuple(self.context_features): if feature.shape[0] != len(self): raise ValueError('Number of context features not ' 'equal to number of interactions.') num_interactions = len(self.user_ids) for name, value in (('item IDs', self.item_ids), ('ratings', self.ratings), ('timestamps', self.timestamps), ('weights', self.weights)): if value is None: continue if len(value) != num_interactions: raise ValueError('Invalid {} dimensions: length ' 'must be equal to number of interactions' .format(name))
def __init__(self, image_dir, image_size, patch_size, patch_stride=None, mode=Mode.TRAINING): super(PatchSet, self).__init__() patch_size = make_tuple(patch_size) patch_stride = make_tuple(patch_stride) if patch_stride else patch_size self.root_dir = image_dir self.image_size = image_size self.patch_size = patch_size self.patch_stride = patch_stride self.mode = mode self.image_dirs = [p for p in self.root_dir.iterdir() if p.is_dir()] self.num_im_pairs = len(self.image_dirs) # 计算出图像进行分块以后的patches的数目 self.n_patch_x = math.ceil( (image_size[0] - patch_size[0] + 1) / patch_stride[0]) self.n_patch_y = math.ceil( (image_size[1] - patch_size[1] + 1) / patch_stride[1]) self.num_patch = self.num_im_pairs * self.n_patch_x * self.n_patch_y
def td(self, ss, **kwargs): kwargs_new = {} for k, v in kwargs.items(): if np.isscalar(v): raise ValueError( f'Keyword argument {k}={v} is scalar, should be time path.' ) kwargs_new[k] = Displace(v, ss=ss.get(k, None), name=k) for k in self.input_list: if k not in kwargs_new: kwargs_new[k] = Ignore(ss[k]) return dict( zip(self.output_list, utils.make_tuple(self.f(**kwargs_new))))
def contexts(self): if self.num_context_features(): for batch in self.minibatches(batch_size=1): yield batch else: # Sort by user id sort_indices = np.argsort(self.user_ids) self._sort(sort_indices) batch_generator = zip(*(grouped_minibatch( self.user_ids, *make_tuple(attr)) if attr is not None else iter_none() for attr in (self.user_ids, self.item_ids, self.ratings, self.timestamps, self.weights, self.context_features))) user_features = self.user_features item_features = self.item_features for (uids_batch, iids_batch, ratings_batch, timestamps_batch, weights_batch, cf_batch) in batch_generator: yield InteractionsMinibatch( user_ids=uids_batch, item_ids=iids_batch, ratings=ratings_batch, timestamps=timestamps_batch, weights=weights_batch, user_features=_slice_or_none(user_features, uids_batch), item_features=item_features, context_features=cf_batch, )
def test(self, test_dir, patch_size, test_refs, num_workers=0): self.model.eval() patch_size = utils.make_tuple(patch_size) utils.load_checkpoint(self.best, model=self.model) self.logger.info('Testing...') # 记录测试文件夹中的文件路径,用于最后投影信息的匹配 image_dirs = [p for p in test_dir.glob('*') if p.is_dir()] image_paths = [get_pair_path(d, test_refs) for d in image_dirs] # 在预测阶段,对图像进行切块的时候必须刚好裁切完全,这样才能在预测结束后进行完整的拼接 assert self.image_size[0] % patch_size[0] == 0 assert self.image_size[1] % patch_size[1] == 0 rows = int(self.image_size[1] / patch_size[1]) cols = int(self.image_size[0] / patch_size[0]) n_blocks = rows * cols test_set = PatchSet(test_dir, self.image_size, patch_size, n_refs=test_refs) test_loader = DataLoader(test_set, batch_size=1, num_workers=num_workers) scaled_patch_size = tuple(i * self.resolution_scale for i in patch_size) scaled_image_size = tuple(i * self.resolution_scale for i in self.image_size) pixel_value_scale = 10000 im_count = 0 patches = [] t_start = datetime.now() for inputs in test_loader: # 如果包含了target数据,则去掉最后的target if len(inputs) % 2 == 0: del inputs[-1] name = image_paths[im_count][-1].name if len(patches) == 0: t_start = timer() self.logger.info(f'Predict on image {name}') # 分块进行预测(每次进入深度网络的都是影像中的一块) inputs = [im.to(self.device) for im in inputs] prediction = self.model(inputs) prediction = prediction.cpu().numpy() patches.append(prediction * pixel_value_scale) # 完成一张影像以后进行拼接 if len(patches) == n_blocks: result = np.empty((NUM_BANDS, *scaled_image_size), dtype=np.float32) block_count = 0 for i in range(rows): row_start = i * scaled_patch_size[1] for j in range(cols): col_start = j * scaled_patch_size[0] result[:, col_start:col_start + scaled_patch_size[0], row_start:row_start + scaled_patch_size[1]] = patches[block_count] block_count += 1 patches.clear() # 存储预测影像结果 result = result.astype(np.int16) prototype = str(image_paths[im_count][1]) utils.save_array_as_tif(result, self.test_dir / name, prototype=prototype) im_count += 1 t_end = timer() self.logger.info(f'Time cost: {t_end - t_start}s')
def __init__(self, back_step_fun, exogenous, policy, backward): """Construct HetBlock from backward iteration function. Parameters ---------- back_step_fun : function backward iteration function exogenous : str names of Markov transition matrix for exogenous variable (now only single allowed for simplicity; use Kronecker product for more) policy : str or sequence of str names of policy variables of endogenous, continuous state variables e.g. assets 'a', must be returned by function backward : str or sequence of str variables that together comprise the 'v' that we use for iterating backward must appear both as outputs and as arguments It is assumed that every output of the function (except possibly backward), including policy, will be on a grid of dimension 1 + len(policy), where the first dimension is the exogenous variable and then the remaining dimensions are each of the continuous policy variables, in the same order they are listed in 'policy'. The Markov transition matrix between the current and future period and backward iteration variables should appear in the backward iteration function with '_p' subscripts ("prime") to indicate that they come from the next period. Currently, we only support up to two policy variables. """ self.back_step_fun = back_step_fun self.all_outputs_order = utils.output_list(back_step_fun) all_outputs = set(self.all_outputs_order) self.all_inputs = set(utils.input_list(back_step_fun)) self.exogenous = exogenous self.policy, self.backward = (utils.make_tuple(x) for x in (policy, backward)) if len(self.policy) > 2: raise ValueError(f"More than two endogenous policies in {back_step_fun.__name__}, not yet supported") self.inputs_p = {self.exogenous} | set(self.backward) # input checking if self.exogenous + '_p' not in self.all_inputs: raise ValueError(f"Markov matrix '{self.exogenous}_p' not included as argument in {back_step_fun.__name__}") for pol in self.policy: if pol not in all_outputs: raise ValueError(f"Policy '{pol}' not included as output in {back_step_fun.__name__}") for back in self.backward: if back + '_p' not in self.all_inputs: raise ValueError(f"Backward variable '{back}_p' not included as argument in {back_step_fun.__name__}") if back not in all_outputs: raise ValueError(f"Backward variable '{back}' not included as output in {back_step_fun.__name__}") self.non_back_outputs = all_outputs - set(self.backward) for out in self.non_back_outputs: if out.isupper(): raise ValueError("Output '{out}' is uppercase in {back_step_fun.__name__}, not allowed") # aggregate outputs and inputs for utils.block_sort self.inputs = self.all_inputs - {k + '_p' for k in self.backward} self.inputs.remove(exogenous + '_p') self.inputs.add(exogenous) self.outputs = {k.upper() for k in self.non_back_outputs} # start without a hetinput self.hetinput = None self.hetinput_inputs = set() self.hetinput_outputs_order = tuple() # 'saved' arguments start empty self.saved = {} self.prelim_saved = {} self.saved_shock_list = [] self.saved_output_list = []
def jac(self, ss, T=None, shock_list=None, h=1E-5): """Assemble nested dict of Jacobians Parameters ---------- ss : dict, steady state values T : int, optional number of time periods for explicit T*T Jacobian if omitted, more efficient SimpleSparse objects returned shock_list : list of str, optional names of input variables to differentiate wrt; if omitted, assume all inputs h : float, optional radius for symmetric numerical differentiation Returns ------- J : dict of {str: dict of {str: array(T,T)}} J[o][i] for output o and input i gives Jacobian of o with respect to i This Jacobian is a SimpleSparse object or, if T specific, a T*T matrix, omitted by convention if zero """ if shock_list is None: shock_list = self.input_list raw_derivatives = {o: {} for o in self.output_list} # initialize dict of default inputs k on which we'll evaluate simple blocks # each element is 'Ignore' object containing ss value of input k that ignores # time displacement, i.e. k(3) in a simple block will evaluate to just ss k x_ss_new = {k: Ignore(ss[k]) for k in self.input_list} # loop over all inputs k which we want to differentiate for k in shock_list: # detect all non-zero time displacements i with which k(i) appears in f # wrap steady-state values in Reporter class (similar to Ignore but adds any time # displacements to shared set), then feed into f reporter = Reporter(ss[k]) x_ss_new[k] = reporter self.f(**x_ss_new) relevant_displacements = reporter.myset # add zero by default (Reporter can't detect, since no explicit call k(0) required) relevant_displacements.add(0) # evaluate derivative with respect to input at each displacement i for i in relevant_displacements: # perturb k(i) up by +h from steady state and evaluate f x_ss_new[k] = Perturb(ss[k], h, i) y_up_all = utils.make_tuple(self.f(**x_ss_new)) # perturb k(i) down by -h from steady state and evaluate f x_ss_new[k] = Perturb(ss[k], -h, i) y_down_all = utils.make_tuple(self.f(**x_ss_new)) # for each output o of f, if affected, store derivative in rawderivatives[o][k][i] # this builds up Jacobian rawderivatives[o][k] of output o with respect to input k # which is 'sparsederiv' dict mapping time displacements 'i' to derivatives for y_up, y_down, o in zip(y_up_all, y_down_all, self.output_list): if y_up != y_down: sparsederiv = raw_derivatives[o].setdefault(k, {}) sparsederiv[i] = (y_up - y_down) / (2 * h) # replace our Perturb object for k with Ignore object, so we can run with other k x_ss_new[k] = Ignore(ss[k]) # process raw_derivatives to return either SimpleSparse objects or (if T provided) matrices J = {o: {} for o in self.output_list} for o in self.output_list: for k in raw_derivatives[o].keys(): if T is None: J[o][k] = SimpleSparse.from_simple_diagonals( raw_derivatives[o][k]) else: J[o][k] = SimpleSparse.from_simple_diagonals( raw_derivatives[o][k]).matrix(T) return J