def __getitem__(self, idx): img = list() for image_name in self.items[idx][0]: tmp_img = image.imread(image_name, self._flag) if self._transform is not None: tmp_img = self._transform(tmp_img) img.append(tmp_img) img = nd.stack(*img) img = nd.transpose(img, (1, 0, 2, 3)) label = self.align_generation(self.items[idx][1], padding=self._seq_len) return img, label
def default_pad_batchify_fn(data): """Collate data into batch, labels are padded to same shape""" if isinstance(data[0], nd.NDArray): return nd.stack(*data) elif isinstance(data[0], tuple): data = zip(*data) return [default_pad_batchify_fn(i) for i in data] else: data = np.asarray(data) pad = max([l.shape[0] for l in data] + [1,]) buf = np.full((len(data), pad, data[0].shape[-1]), -1, dtype=data[0].dtype) for i, l in enumerate(data): buf[i][:l.shape[0], :] = l return nd.array(buf, dtype=data[0].dtype)
def ten_crop(src, size): """Crop 10 regions from an array. This is performed same as: http://chainercv.readthedocs.io/en/stable/reference/transforms.html#ten-crop This method crops 10 regions. All regions will be in shape :obj`size`. These regions consist of 1 center crop and 4 corner crops and horizontal flips of them. The crops are ordered in this order. * center crop * top-left crop * bottom-left crop * top-right crop * bottom-right crop * center crop (flipped horizontally) * top-left crop (flipped horizontally) * bottom-left crop (flipped horizontally) * top-right crop (flipped horizontally) * bottom-right crop (flipped horizontally) Parameters ---------- src : mxnet.nd.NDArray Input image. size : tuple Tuple of length 2, as (width, height) of the cropped areas. Returns ------- mxnet.nd.NDArray The cropped images with shape (10, size[1], size[0], C) """ h, w, _ = src.shape ow, oh = size if h < oh or w < ow: raise ValueError( "Cannot crop area {} from image with size ({}, {})".format(str(size), h, w)) center = src[(h - oh) // 2:(h + oh) // 2, (w - ow) // 2:(w + ow) // 2, :] tl = src[0:oh, 0:ow, :] bl = src[h - oh:h, 0:ow, :] tr = src[0:oh, w - ow:w, :] br = src[h - oh:h, w - ow:w, :] crops = nd.stack(*[center, tl, bl, tr, br], axis=0) crops = nd.concat(*[crops, nd.flip(crops, axis=2)], dim=0) return crops
def default_mp_pad_batchify_fn(data): """Use shared memory for collating data into batch, labels are padded to same shape""" if isinstance(data[0], nd.NDArray): out = nd.empty((len(data),) + data[0].shape, dtype=data[0].dtype, ctx=context.Context('cpu_shared', 0)) return nd.stack(*data, out=out) elif isinstance(data[0], tuple): data = zip(*data) return [default_mp_pad_batchify_fn(i) for i in data] else: data = np.asarray(data) batch_size = len(data) pad = max([l.shape[0] for l in data] + [1,]) buf = np.full((batch_size, pad, data[0].shape[-1]), -1, dtype=data[0].dtype) for i, l in enumerate(data): buf[i][:l.shape[0], :] = l return nd.array(buf, dtype=data[0].dtype, ctx=context.Context('cpu_shared', 0))
def crop_resize_normalize(img, bbox_list, output_size): output_list = [] transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) for bbox in bbox_list: x0 = max(int(bbox[0]), 0) y0 = max(int(bbox[1]), 0) x1 = min(int(bbox[2]), int(img.shape[1])) y1 = min(int(bbox[3]), int(img.shape[0])) w = x1 - x0 h = y1 - y0 res_img = image.fixed_crop(nd.array(img), x0, y0, w, h, (output_size[1], output_size[0])) res_img = transform_test(res_img) output_list.append(res_img) output_array = nd.stack(*output_list) return output_array
def __getitem__(self, tokens): """Looks up embedding vectors of text tokens. Parameters ---------- tokens : str or list of strs A token or a list of tokens. Returns ------- mxnet.ndarray.NDArray: The embedding vector(s) of the token(s). According to numpy conventions, if `tokens` is a string, returns a 1-D NDArray (vector); if `tokens` is a list of strings, returns a 2-D NDArray (matrix) of shape=(len(tokens), vec_len). """ to_reduce = not isinstance(tokens, (list, tuple)) if to_reduce: tokens = [tokens] if self.unknown_lookup is not None and (not self.allow_extend or not self.unknown_autoextend): vecs = [ self.idx_to_vec[self.token_to_idx[token]] if token in self.token_to_idx else self.unknown_lookup[token] for token in tokens ] vecs = nd.stack(*vecs, axis=0) else: if self.unknown_lookup is not None and self.allow_extend and self.unknown_autoextend: new_tokens = [t for t in tokens if t not in self.token_to_idx] self[new_tokens] = self.unknown_lookup[new_tokens] indices = [self._token_to_idx[token] for token in tokens] vecs = nd.Embedding( nd.array(indices), self.idx_to_vec, self.idx_to_vec.shape[0], self.idx_to_vec.shape[1]) return vecs[0] if to_reduce else vecs
trainer = gluon.Trainer(less, 'sgd', {'learning_rate': 3}) for e in range(25): X, y = shuffle(X, y) for data, target in zip(np.split(X, 10), np.split(y, 10)): with mx.autograd.record(): cost = [] for decision in tree._routerlayer._children.values(): gate = decision._gate cost.append( nd.sigmoid(gate._qz_loga.data() - gate._temperature * nd.log(-1 * gate._limit_lo / gate._limit_hi))) cost = nd.sum(nd.stack(*cost)) loss = error(tree(nd.array(data)), nd.array(target)) loss = loss + 0.1 * cost loss.backward() trainer.step(data.shape[0], ignore_stale_grad=True) # %% # tree(nd.array(data)) # for decision in tree._routerlayer._children.values(): gate = decision._gate print("keep") print(gate._qz_loga.data()) print(decision._sharpness.data())
def __getitem__(self, index): """the index is the video index in clip_list,read several frame from the index""" filename, label = self.clip_lst[index] if not os.path.exists(filename): print("the file not exist", filename) return None cthw_data = None nd_image_list = [] while len(nd_image_list) is 0: v = cv2.VideoCapture(filename) width = v.get(cv2.CAP_PROP_FRAME_WIDTH) height = v.get(cv2.CAP_PROP_FRAME_HEIGHT) length = v.get(cv2.CAP_PROP_FRAME_COUNT) assert self.crop_size <= width and self.crop_size <= height, '%d' length = int(length) if length < self.n_frame: logger.info("%s length %d <%d" % (filename, length, self.n_frame)) # the following operation will tail the last frame # set the sample begin frame id if not self.is_train: frame_st = 0 if length <= self.n_frame else int( (length - self.n_frame) // 2) else: frame_st = 0 if length <= self.n_frame else random.randrange( length - self.n_frame + 1) # set random crop position in single frame if self.is_train: row_st = random.randrange(self.scale_h - self.crop_size + 1) col_st = random.randrange(self.scale_w - self.crop_size + 1) else: row_st = int((self.scale_h - self.crop_size) / 2) col_st = int((self.scale_w - self.crop_size) / 2) # allocate the capacity to store image and jump to the position v.set(cv2.CAP_PROP_POS_FRAMES, frame_st) #start to read the following frames by current start position import ipdb #ipdb.set_trace() for frame_p in range(min(self.n_frame, length)): _, f = v.read() if f is not None: f = cv2.resize( f, (self.scale_w, self.scale_h)) #in dim of hwc f = cv2.cvtColor(f, cv2.COLOR_BGR2RGB) f = f[row_st:row_st + self.crop_size, col_st:col_st + self.crop_size, :] if self._transform: nd_image_list.append(self._transform( nd.array(f))) # the frame_p transform else: nd_image_list.clear() #clear the image_list break # after transform return CHW dim # replication the last frame if the length < self.n_frame current_length = len(nd_image_list) cthw_data = nd.stack(*nd_image_list, axis=1) #from CHW, to CTHW #tmp = nd.zeros(shape=(self.n_frame, self.crop_size, self.crop_size, 3), dtype='float32') if current_length < self.n_frame: #construct the last frame and concat extra_data = nd.tile(nd_image_list[-1], reps=(self.n_frame - current_length, 1, 1, 1)) extra_data = extra_data.transpose((1, 0, 2, 3)) cthw_data = nd.concat(cthw_data, extra_data, dim=1) return cthw_data, label
def _corr2d_multi_in_out(self, X, K): return nd.stack(*[self._corr2d_multi_in(X, k) for k in K])
def generate_targets(self, img, boxes): """ img : [H, W, 3] boxes : [N, 5] """ rh, rw, _ = img.shape rx = nd.arange(0, rw).reshape((1, -1)) ry = nd.arange(0, rh).reshape((-1, 1)) sx = nd.tile(rx, reps=(rh, 1)) sy = nd.tile(ry, reps=(1, rw)) areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) boxes = boxes[nd.argsort(areas)] boxes = nd.concat(nd.zeros((1, 5)), boxes, dim=0) # for gt assign confusion x0, y0, x1, y1, cls = nd.split(boxes, num_outputs=5, axis=-1, squeeze_axis=True) n = boxes.shape[0] # [H, W, N] of_l = sx.reshape(-2, 1) - nd.expand_dims(nd.expand_dims(x0, axis=0), axis=0) of_t = sy.reshape(-2, 1) - nd.expand_dims(nd.expand_dims(y0, axis=0), axis=0) of_r = -(sx.reshape(-2, 1) - nd.expand_dims(nd.expand_dims(x1, axis=0), axis=0)) of_b = -(sy.reshape(-2, 1) - nd.expand_dims(nd.expand_dims(y1, axis=0), axis=0)) # [H, W, N] eps = 1e-5 ctr =(nd.minimum(of_l, of_r) / nd.maximum(of_l, of_r)) * \ (nd.minimum(of_t, of_b) / nd.maximum(of_t, of_b) + eps) ctr = nd.sqrt(nd.abs(ctr)) ctr[:, :, 0] = 0 # [H, W, N, 4] offsets = nd.concat(of_l.reshape(-2, 1), of_t.reshape(-2, 1), of_r.reshape(-2, 1), of_b.reshape(-2, 1), dim=-1) # fh = int(np.ceil(((rh + 1) / 2) // 2 / 2)) # fw = int(np.ceil(((rw + 1) / 2) // 2 / 2)) fh = int(np.ceil(np.ceil(np.ceil(rh / 2) / 2) / 2)) fw = int(np.ceil(np.ceil(np.ceil(rw / 2) / 2) / 2)) fm_list = [] for i in range(self._stages): fm_list.append((fh, fw)) fh = int(np.ceil(fh / 2)) fw = int(np.ceil(fw / 2)) fm_list = fm_list[::-1] cls_targets = [] ctr_targets = [] box_targets = [] cor_targets = [] stride = self._stride for i in range(self._stages): fh, fw = fm_list[i] cls_target = nd.zeros((fh, fw)) box_target = nd.zeros((fh, fw, 4)) ctr_target = nd.zeros((fh, fw)) cx = nd.arange(0, fw).reshape((1, -1)) cy = nd.arange(0, fh).reshape((-1, 1)) sx = nd.tile(cx, reps=(fh, 1)) sy = nd.tile(cy, reps=(1, fw)) syx = nd.stack(sy.reshape(-1), sx.reshape(-1)).transpose().astype('int32') # bugs in this type # bx = sxy[:, 0] * stride + nd.floor(sxy[:, 0] / 2).astype(np.int32) # by = sxy[:, 1] * stride + nd.floor(sxy[:, 1] / 2).astype(np.int32) by = syx[:, 0] * stride bx = syx[:, 1] * stride cor_targets.append(nd.stack(bx, by, axis=1)) # [FH*FW, N, 4] of_byx = offsets[by, bx] # of_byx = nd.gather_nd(offsets, indices=byx.transpose()) min_vr, max_vr = self._valid_range[i] # [FH*FW, N] is_in_box = nd.prod(of_byx > 0, axis=-1) is_valid_area = (of_byx.max(axis=-1) >= min_vr) * (of_byx.max(axis=-1) <= max_vr) # [FH*FW, N] valid_pos = nd.elemwise_mul(is_in_box, is_valid_area) of_valid = nd.zeros((fh, fw, n)) of_valid[syx[:, 0], syx[:, 1], :] = valid_pos # 1, 0 of_valid[:, :, 0] = 0 # [FH, FW] gt_inds = nd.argmax(of_valid, axis=-1) # box targets box_target[syx[:, 0], syx[:, 1]] = boxes[gt_inds[syx[:, 0], syx[:, 1]], :4] box_target = box_target.reshape(-1, 4) # cls targets cls_target[syx[:, 0], syx[:, 1]] = cls[gt_inds[syx[:, 0], syx[:, 1]]] cls_target = cls_target.reshape(-1) # ctr targets ctr_target[syx[:, 0], syx[:, 1]] = ctr[by, bx, gt_inds[syx[:, 0], syx[:, 1]]] ctr_target = ctr_target.reshape(-1) box_targets.append(box_target) cls_targets.append(cls_target) ctr_targets.append(ctr_target) stride = int(stride / 2) box_targets = nd.concat(*box_targets, dim=0) cls_targets = nd.concat(*cls_targets, dim=0) ctr_targets = nd.concat(*ctr_targets, dim=0) cor_targets = nd.concat(*cor_targets, dim=0) cor_targets = cor_targets.astype('float32') return cls_targets, ctr_targets, box_targets, cor_targets
def forward(self, is_train, req, in_data, out_data, aux): # im_info.shape(batch_size, 3) rpn_cls_score = in_data[0] gt_boxes = in_data[1] im_info = in_data[2] base_anchors = in_data[3] feat_stride = in_data[4] allowed_border = in_data[5] ctx = rpn_cls_score.context batch_size = rpn_cls_score.shape[0] feat_height, feat_width = rpn_cls_score.shape[-2:] A = base_anchors.shape[0] K = feat_height * feat_width N = K * A # generate anchors shifts shift_x = (nd.arange(0, feat_width, ctx=ctx) * feat_stride). \ broadcast_to((feat_height, feat_width)).reshape(K) shift_y = (nd.arange(0, feat_height, ctx=ctx) * feat_stride). \ reshape(feat_height, 1).broadcast_to((feat_height, feat_width)).reshape(K) # add A anchors (1, A, 4) to cell K shifts (K, 1, 4) to get shift anchors (K, A, 4) # then reshape and broadcast to (batch_size, K*A, 4) shifted anchors shifts = nd.stack(shift_x, shift_y, shift_x, shift_y, axis=-1).reshape(K, 1, 4) all_anchors = (base_anchors.reshape((1, A, 4)) + shifts).reshape(1, N, 4) \ .broadcast_to((batch_size, N, 4)) # keep only inside anchors, set outside anchors coordinate = (-1, -1, -1, -1) inside_bool_mask = (all_anchors[:, :, 0] >= -allowed_border) * \ (all_anchors[:, :, 1] >= -allowed_border) * \ (all_anchors[:, :, 2] < (im_info[:, 1] + allowed_border).reshape(0, 1)) * \ (all_anchors[:, :, 3] < (im_info[:, 0] + allowed_border).reshape(0, 1)) all_anchors[:] = inside_bool_mask.reshape(batch_size, -1, 1) * (all_anchors + 1) - 1 overlaps = bbox_overlaps(gt_boxes, all_anchors) # get max iou anchor for each gt_boxes gt_max_overlaps = overlaps.max(axis=2) gt_argmax_overlaps = overlaps.argmax(axis=2) # get max iou for each anchors max_overlaps = overlaps.max(axis=1) argmax_overlaps = overlaps.argmax(axis=1) # set positive anchor label=1, other=0 labels = max_overlaps >= self._positive_iou_th # set neither positive nor negative anchor label = -1 labels[:] = labels - ((max_overlaps > self._negative_iou_th) * (max_overlaps < self._positive_iou_th)) # set max iou anchor for each gt_boxes label >= 1 (<=3) and ignore padded gt_box batch_idx = nd.arange(batch_size, ctx=ctx).reshape(-1, 1) labels[batch_idx, gt_argmax_overlaps] = labels[ batch_idx, gt_argmax_overlaps] + 2 * (gt_max_overlaps > 0) # set outside anchor label <= -1 # then remain label=0 is negative samples labels[:] = labels - 4 * (1 - inside_bool_mask) # clip label values to -1, 0, 1 labels[:] = nd.clip(labels, -1, 1) # random choice labels labels_with_idx = nd.concat(labels.transpose(), nd.arange(N, ctx=ctx).reshape(-1, 1), dim=1) # column 0:batch_size is label, column batch_size is labels original index rand_labels_with_idx = nd.random.shuffle(labels_with_idx) # may include some bg_label if labels==1 num < num_fg fg_rand_labels_idx = rand_labels_with_idx[:, :batch_size].argsort( axis=0, is_ascend=0)[:self._rpn_fg_num] # use abs() to invert all label=-1, so that label=0 will at top after ascend sort abs_rand_labels = nd.abs(rand_labels_with_idx[:, :batch_size]) # set fg_label=-1 to let it at top after ascend sort abs_rand_labels[fg_rand_labels_idx, batch_idx.transpose()] = -1 # select rand labels idx that will be excluded exclude_rand_labels_idx = abs_rand_labels.argsort( axis=0, is_ascend=1)[self._rpn_batch_size:] # get original label index exclude_labels_idx = rand_labels_with_idx[exclude_rand_labels_idx, batch_size] # set exclude label = -1 labels[batch_idx, exclude_labels_idx.transpose()] = -1 # assign gt_boxes to anchor, anchor box_target is its max iou gt_box bbox_targets = nd.empty((batch_size, N, 4), ctx=ctx) bbox_targets[:] = bbox_transform( all_anchors, gt_boxes[batch_idx, argmax_overlaps, :4]) labels = labels.reshape((batch_size, feat_height, feat_width, A)).transpose(axes=(0, 3, 1, 2)) labels = labels.reshape((batch_size, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape( (batch_size, feat_height, feat_width, A, 4)).transpose(axes=(0, 4, 3, 1, 2)) bbox_targets = bbox_targets.reshape( (batch_size, 4, A * feat_height * feat_width)) out_data[0][:] = labels out_data[1][:] = bbox_targets
def aggfn(inputs, dsttype): # pylint: disable=unused-argument if len(inputs) == 0: return None stacked = nd.stack(*inputs, axis=0) return fn(stacked, axis=0)
print(urls[0]) print("caution,failed to download all pictures") print(result[0][1][0],result[0][1][1]) records.clear() urls.clear() docs.clear() for f_ret in result: try: if not f_ret[0].closed: f_ret[0].close() except Exception as e: print(e) continue nd_tensor_img = nd.stack(*nd_img_list,axis=0) nd_tensor_img = nd_tensor_img.as_in_context(context[0]) data = net.extract(nd_tensor_img) data = data.asnumpy() doc_types =['image']*len(records) vectors = [convert_vector_to_ascii(v) for v in data ] ret = requests.post(host + path + "add/batch", json={"docs": docs, "doc_types": doc_types, "vectors": vectors}) print(ret.json()) #for annother loop doc_types=[] vectors =[]
def corr2d_muti_in_out(X, K): # stack黏在后面 第一次(0 1 2 3)... # 遍历的是k的0通道的值,即第一对中括号中的逗号数目 return nd.stack(*[corr2d_multi_in(X, k) for k in K]) # * 把2*2 2*2 2*2 弄成 [2*2 2*2 2*2]
def stack_agg(inputs, dsttype): # pylint: disable=unused-argument if len(inputs) == 0: return None return nd.stack(*inputs, axis=1)
[[0, 1], [2, 3]], # R-kernel [[1, 2], [3, 4]] ]) # G-kernel print(corr2d_multi_in(X, K)) """**************************** 多输出通道 ****************************************""" def corr2d_muti_in_out(X, K): # stack黏在后面 第一次(0 1 2 3)... # 遍历的是k的0通道的值,即第一对中括号中的逗号数目 return nd.stack(*[corr2d_multi_in(X, k) for k in K]) # * 把2*2 2*2 2*2 弄成 [2*2 2*2 2*2] # [(0 1 2 3) (1 2 3 4) (2 3 4 5)] K = nd.stack(K, K + 1, K + 2) # shape 3*2*2*2 [(2*2)(2*2) (2*2)(2*2) (2*2)(2*2)] print( corr2d_muti_in_out(X, K) ) # 输出三个 相当于corr2d_multi_in(X, K) corr2d_multi_in(X, K+1) corr2d_multi_in(X, K+2) """**************************** 1*1卷积层 ****************************************""" # 核心: 每个通道数据先拉成行向量,运算,结果又拉回原形 def corr2d_multi_in_1x1(X, K): c_i, h, w = X.shape # first-channel, for eaample, R,G,B(3*3*3) X = X.reshape((c_i, h * w)) # 将每一个通道的数据转成行向量, (3*9) c_o = K.shape[0] # 输出多通道c_o K = K.reshape((c_o, c_i)) # 将对应的kernel转成行向量 (2*3) output = nd.dot(K, X) # (2*3)*(3*9)=(2*9) return output.reshape((c_o, h, w)) # 将每一层数据拉回来(2*3*3)
def __init__(self, num_inputs, num_hiddens, batch_first, drop_prob): super(LSTM, self).__init__() self.drop_prob = drop_prob self.batch_first = batch_first if batch_first == True: self.layout = 'NTC' else: self.layout = 'TNC' self.rnn = rnn.LSTM(num_hiddens, layout=self.layout, dropout=drop_prob, bidirectional=True, input_size=num_inputs, i2h_weight_initializer='Orthogonal', h2h_weight_initializer='Orthogonal') def forward(self, x, length, (hidden, cell)=None): outputs = self.rnn(x) #outputs:[batch, seq_length, 2*num_hiddens] if (hidden, cell) is not None: outputs, state = self.rnn(x, (hidden, cell)) outputs = nd.transpose(outputs, (1, 0, 2)) outputs = nd.SequenceMask(outputs, sequence_length=length, use_sequence_length=True, value=0) outputs = nd.transpose(outputs, (1, 0, 2)) hidden = [output[i - 1] for (output, i) in zip(outputs, length)] hidden = nd.stack(*hidden).squeeze() return outputs, hidden
def train(): net = TCDCN() epoches = 1000 path = 'log_DP/log6/' # if os.path.exists(path): # os.system('rm -r %s' % (path)) sv_model = path + 'tcdcn.pt' sw = SummaryWriter(logdir=path) ttmp.set(sw) # ctx = [mx.gpu(i) for i in [3, 4]] ctx = mx.gpu(1) initia_Tcdcn(net, ctx) # net.load_parameters(sv_model, ctx=ctx) lr = 0.001 batch_size = 256 * len(ctx) if type(ctx) == list else 512 idx_span = 50 eta_weight = 0.01 print '~' * 10, '\npath:', path, '\n', 'learning rate:', lr, '\n', 'ctx:', ctx, '\n', '~' * 10, '\n' keypoint_weight = [0.5, 1.5, 1.5, 2, 1] # "landmarks", "smile", "glasses", "gender", "pose" # train_data, test_data = database(batch_size) train_data, _, test_data, _ = HDF5_dataset(batch_size) trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': lr}) items = ["landmarks", "smile", "glasses", "gender", "pose"] j, k, epoch = 0, 0, 0 train_items, test_items = [], [] while epoch < epoches: batch = 0 ttmp.setstop(True) for batch in train_data: t = time.time() ttmp.setiter(j) batch = batch.data if type(ctx) == list: data = gluon.utils.split_and_load(batch[0], ctx) landmark = gluon.utils.split_and_load(batch[1], ctx) attr = gluon.utils.split_and_load(batch[2], ctx) with autograd.record(): out = [net(X) for X in data] loss_items = [ loss_FLD(X, Y, Z, keypoint_weight) for X, Y, Z in zip(out, landmark, attr) ] L2_weight = L2_penalty(net, eta_weight) losses = [ nd.sum(nd.stack(*X)) + Y[0] for X, Y in zip(loss_items, L2_weight) ] for loss in losses: loss.backward() else: data = batch[0].as_in_context(ctx) landmark = batch[1].as_in_context(ctx) attr = batch[2].as_in_context(ctx) with autograd.record(): out = net(data) loss_items = loss_FLD(out, landmark, attr, keypoint_weight) L2_weight = L2_penalty(net, eta_weight) losses = nd.sum(nd.stack(*loss_items)) + L2_weight[0] losses.backward() trainer.step(batch_size) if type(ctx) == list: loss_items = [nd.sum(nd.stack(*X)) for X in zip(*loss_items)] for item_idx, tag in enumerate(items): value = nd.sum(loss_items[item_idx]).asscalar() / batch_size sw.add_scalar(tag, value=value, global_step=j) recoder(loss_items, sw, j, train_items, test_items, mode='train', span=idx_span) j += 1 if j % 20 == 0: print epoch, 'train ok%2.5s' % (time.time() - t) # if j > 4: break ttmp.setstop(False) ShowNet_PerEpoch(net, sw, batch[1], epoch) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ for batch in test_data: batch = batch.data t = time.time() if type(ctx) == list: data = gluon.utils.split_and_load(batch[0], ctx) landmark = gluon.utils.split_and_load(batch[1], ctx) attr = gluon.utils.split_and_load(batch[2], ctx) out = [net(X) for X in data] loss_items = [ loss_FLD(X, Y, Z, keypoint_weight) for X, Y, Z in zip(out, landmark, attr) ] loss_items = [nd.sum(nd.stack(*X)) for X in zip(*loss_items)] else: data = batch[0].as_in_context(ctx) landmark = batch[1].as_in_context(ctx) attr = batch[2].as_in_context(ctx) out = net(data) loss_items = loss_FLD(out, landmark, attr, keypoint_weight) for item_idx, tag in enumerate(items): value = nd.sum(loss_items[item_idx]).asscalar() / batch_size sw.add_scalar(tag + '_test', value=value, global_step=k) recoder(loss_items, sw, k, train_items, test_items, mode='test', span=idx_span) k += 1 if k % 20 == 0: print epoch, 'test ok%2.5s' % (time.time() - t) # if k > 142: # Iface.idx["glasses"] = 0 # if k > 198: # Iface.idx["gender"] = 0 # if k > 195: # Iface.idx["smile"] = 0 # if k > 108: # Iface.idx["pose"] = 0 # if k > 4: break # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ epoch += 1 print 'save model' net.save_parameters(sv_model) train_data.reset() test_data.reset()
# %% root = next(iter(tree._structure.items()))[0] tree._contextify(nd.array([[3, 3]]))(root) tree._routerlayer(nd.array([[3, 3]])) root._decision._gate() # %% router_d = tree._contextify(nd.array([[3, 3]]))(root)[0] embedd_d = tree._contextify(nd.array([[3, 3]]))(root)[1] embedd = nd.stack(*[embedd_d[key] for key in sorted(embedd_d)], axis=0) router = nd.stack(*[router_d[key] for key in sorted(router_d)], axis=-1) nd.dot(router, embedd) tree._routerlayer(nd.array([[3, 3]])) nd.sigmoid(nd.array([6.0056 - 3])) # %% import matplotlib.pyplot as plt import numpy as np import sklearn.datasets from sklearn.utils import shuffle
def forward(self, word_inputs, bert, tag_inputs, arc_targets=None, rel_targets=None): is_train = arc_targets is not None top_recur = self.rnn(tag_inputs, word_inputs, bert) mask = np.greater(word_inputs, self.decoders[0]._vocab.ROOT).astype(np.float32) if is_train: total_loss = [] LF = [] arc_score = [] representations = [] for mlp, unbiaffine, decoder, a, r in zip(self.mlps, self.arc_biaffines, self.decoders, arc_targets, rel_targets): dep_arc, dep_rel, head_arc, head_rel = mlp(top_recur) arc_logits, arc_loss = unbiaffine(dep_arc, head_arc, mask, a) total_loss.append(arc_loss) arc_score.append(arc_logits) representations.append((dep_arc, dep_rel, head_arc, head_rel)) scores = nd.stack(*arc_score) for (dep_arc, dep_rel, head_arc, head_rel), w, decoder, a, r in zip(representations, self.weights.data(), self.decoders, arc_targets, rel_targets): blend = nd.dot(w, scores).squeeze() / w.sum() arc_accuracy, rel_accuracy, loss = decoder( dep_arc, dep_rel, head_arc, head_rel, word_inputs, a, r, blend) total_loss.append(loss) LF.append(rel_accuracy) if self.transfer: total_loss = [total_loss[len(self.mlps) - 1], total_loss[-1]] LF = LF[:-1] return -1, sum(LF) / len(LF), nd.stack(*total_loss).mean() else: arc_score = [] representations = [] outputs = [] for mlp, unbiaffine, decoder in zip(self.mlps, self.arc_biaffines, self.decoders): dep_arc, dep_rel, head_arc, head_rel = mlp(top_recur) arc_logits = unbiaffine(dep_arc, head_arc, mask) arc_score.append(arc_logits) representations.append((dep_arc, dep_rel, head_arc, head_rel)) scores = nd.stack(*arc_score) for (dep_arc, dep_rel, head_arc, head_rel), w, decoder in zip(representations, self.weights.data(), self.decoders): blend = nd.dot(w, scores).squeeze() / w.sum() outputs.append( decoder(dep_arc, dep_rel, head_arc, head_rel, word_inputs, None, None, blend)) return outputs
def extract_x(self, i, w): return nd.stack(*[ w[i + win] if 0 <= (i + win) < len(w) else self.pad for win in self.feature_windows ])
def stack(self, arrays, axis=0): res = nd.stack(*arrays, axis=axis) # Ugly fix for stacking zero-order tensors that are of shape (1, ) in MXNet if self.ndim(res) == 2 and self.shape(res) == (len(arrays), 1): return res.squeeze() return res
def _get_area(bbox: mx.nd.NDArray): zeros = mx.nd.zeros_like(bbox[:, 0]) width = mx.nd.max(nd.stack(bbox[:, 2] - bbox[:, 0], zeros), axis=0) height = mx.nd.max(nd.stack(bbox[:, 3] - bbox[:, 1], zeros), axis=0) return width * height
def forward(self, x, gt_boxes=None): """ :param x: ndarray (B,C,H,W) :return: """ def _split_box(x, num_outputs, axis, squeeze_axis=False): a = nd.split(x, axis=axis, num_outputs=num_outputs, squeeze_axis=squeeze_axis) if not isinstance(a, (list, tuple)): return [a] return a # 首先用basenet抽取特征 feat = self.features(x) # 输入RPN网络 if autograd.is_training(): # 训练过程 img = nd.zeros_like(x) rpn_score, rpn_box, raw_rpn_score, raw_rpn_box, anchors = self.rpn( feat, img) # 采样输出 rpn_box, samples, matches = self.sampler(rpn_box, rpn_score, gt_boxes) else: # 预测过程 # output shape (B,N,4) _, rpn_box = self.rpn(feat, x) # 对输出的Region Proposal 进行采样 # 输出送到后面运算的RoI # rois shape = (B,self._num_sampler,4), num_roi = self._num_sample if autograd.is_training( ) else self._rpn_test_post_nms # 将rois变为2D,加上batch_index with autograd.pause(): roi_batchid = nd.arange(0, self._max_batch, repeat=num_roi, ctx=rpn_box.context) rpn_roi = nd.concat( *[roi_batchid.reshape((-1, 1)), rpn_box.reshape((-1, 4))], dim=-1) rpn_roi = nd.stop_gradient(rpn_roi) # RoI Pooling 层 if self._roi_mode == 'pool': # (Batch*num_roi,channel,H,W) pool_feat = nd.ROIPooling(feat, rpn_roi, self._roi_size, 1 / self._stride) elif self._roi_mode == 'align': pool_feat = nd.contrib.ROIAlign(feat, rpn_roi, self._roi_size, 1 / self._stride, sample_ratio=2) else: raise ValueError("Invalid roi mode: {}".format(self._roi_mode)) top_feat = self.top_features(pool_feat) avg_feat = self.global_avg_pool(top_feat) # 类别预测,回归预测 # output shape (B*num_roi,(num_cls+1)) -> (B,N,C) cls_pred = self.class_predictor(avg_feat) # output shape (B*num_roi,(num_cls)*4) -> (B,N,C,4) box_pred = self.bbox_predictor(avg_feat) cls_pred = cls_pred.reshape( (self._max_batch, num_roi, self.num_class + 1)) box_pred = box_pred.reshape( (self._max_batch, num_roi, self.num_class, 4)) # 训练过程 if autograd.is_training(): return (cls_pred, box_pred, rpn_box, samples, matches, raw_rpn_score, raw_rpn_box, anchors) # 预测过程 # 还要进行的步骤,将预测的类别和预测的偏移量加到输入的RoI中 else: # 直接输出所有类别的信息 # cls_id (B,N,C) scores(B,N,C) cls_ids, scores = self.cls_decoder(nd.softmax(cls_pred, axis=-1)) # 将所有的C调换到第一维 # (B,N,C) -----> (B,N,C,1) -------> (B,C,N,1) cls_ids = cls_ids.transpose((0, 2, 1)).reshape((0, 0, 0, 1)) # (B,N,C) -----> (B,N,C,1) -------> (B,C,N,1) scores = scores.transpose((0, 2, 1)).reshape((0, 0, 0, 1)) # (B,N,C,4) -----> (B,C,N,4), box_pred = box_pred.transpose((0, 2, 1, 3)) rpn_boxes = _split_box(rpn_box, num_outputs=self._max_batch, axis=0, squeeze_axis=False) cls_ids = _split_box(cls_ids, num_outputs=self._max_batch, axis=0, squeeze_axis=True) scores = _split_box(scores, num_outputs=self._max_batch, axis=0, squeeze_axis=True) box_preds = _split_box(box_pred, num_outputs=self._max_batch, axis=0, squeeze_axis=True) results = [] # 对每个batch分别进行decoder nms for cls_id, score, box_pred, rpn_box in zip( cls_ids, scores, box_preds, rpn_boxes): # box_pred(C,N,4) rpn_box(1,N,4) box (C,N,4) box = self.box_decoder(box_pred, self.box_to_center(rpn_box)) # cls_id (C,N,1) score (C,N,1) box (C,N,4) # result (C,N,6) res = nd.concat(*[cls_id, score, box], dim=-1) # nms操作 (C,self.nms_topk,6) res = nd.contrib.box_nms(res, overlap_thresh=self.nms_thresh, valid_thresh=0.0001, topk=self.nms_topk, coord_start=2, score_index=1, id_index=0, force_suppress=True) res = res.reshape((-3, 0)) results.append(res) results = nd.stack(*results, axis=0) ids = nd.slice_axis(results, axis=-1, begin=0, end=1) scores = nd.slice_axis(results, axis=-1, begin=1, end=2) bboxes = nd.slice_axis(results, axis=-1, begin=2, end=6) # 输出为score,bbox return ids, scores, bboxes
def decode_centernet_pose(heat, wh, kps, reg=None, hm_hp=None, hp_offset=None, K=100): batch, cat, height, width = heat.shape num_joints = kps.shape[1] // 2 # perform nms on heatmaps heat = _nms(heat) scores, inds, clses, ys, xs = _topk(heat, K=K) kps = _tranpose_and_gather_feat(kps, inds) kps = nd.reshape(kps, (batch, K, num_joints * 2)) kps[:, :, ::2] += nd.reshape(xs, (batch, K, 1)).broadcast_to( (batch, K, num_joints)) kps[:, :, 1::2] += nd.reshape(ys, (batch, K, 1)).broadcast_to( (batch, K, num_joints)) if reg is not None: reg = _tranpose_and_gather_feat(reg, inds) reg = nd.reshape(reg, (batch, K, 2)) xs = xs.reshape((batch, K, 1)) + reg[:, :, 0:1] ys = ys.reshape((batch, K, 1)) + reg[:, :, 1:2] else: xs = xs.reshape((batch, K, 1)) + 0.5 ys = ys.reshape((batch, K, 1)) + 0.5 wh = _tranpose_and_gather_feat(wh, inds) wh = wh.reshape((batch, K, 2)) clses = clses.reshape((batch, K, 1)).astype('float32') scores = scores.reshape((batch, K, 1)) bboxes = nd.concat(xs - wh[:, :, 0:1] / 2, ys - wh[:, :, 1:2] / 2, xs + wh[:, :, 0:1] / 2, ys + wh[:, :, 1:2] / 2, dim=2) if hm_hp is not None: hm_hp = _nms(hm_hp) thresh = 0.1 kps = kps.reshape((batch, K, num_joints, 2)) kps = nd.swapaxes(kps, 1, 2) # b x J x K x 2 reg_kps = nd.expand_dims(kps, axis=3).broadcast_to( (batch, num_joints, K, K, 2)) hm_score, hm_inds, hm_ys, hm_xs = _topk_channel(hm_hp, K=K) # b x J x K if hp_offset is not None: hp_offset = _tranpose_and_gather_feat(hp_offset, hm_inds.reshape((batch, -1))) hp_offset = hp_offset.reshape((batch, num_joints, K, 2)) hm_xs = hm_xs + hp_offset[:, :, :, 0] hm_ys = hm_ys + hp_offset[:, :, :, 1] else: hm_xs = hm_xs + 0.5 hm_ys = hm_ys + 0.5 mask = (hm_score > thresh).astype('float32') hm_score = (1 - mask) * -1 + mask * hm_score hm_ys = (1 - mask) * (-10000) + mask * hm_ys hm_xs = (1 - mask) * (-10000) + mask * hm_xs hm_kps = nd.stack(hm_xs, hm_ys, axis=-1).expand_dims(axis=2).broadcast_to( (batch, num_joints, K, K, 2)) dist = (((reg_kps - hm_kps)**2).sum(axis=4)**0.5) min_dist = dist.min(axis=3) # b x J x K min_ind = nd.argmin(dist, axis=3) # b x J x K M, N, K = hm_score.shape[0:3] for i in range(M): for j in range(N): for k in range(K): hm_score[i, j, k] = hm_score[i, j, min_ind[i, j, k]] hm_score = hm_score.expand_dims(axis=-1) min_dist = min_dist.expand_dims(-1) hm_kps = hm_kps.reshape((batch, num_joints, K, 2)) for i in range(M): for j in range(N): for k in range(K): hm_kps[i, j, k, 0] = hm_kps[i, j, min_ind[i, j, k], 0] hm_kps[i, j, k, 1] = hm_kps[i, j, min_ind[i, j, k], 1] l = bboxes[:, :, 0].reshape((batch, 1, K, 1)).broadcast_to( (batch, num_joints, K, 1)) t = bboxes[:, :, 1].reshape((batch, 1, K, 1)).broadcast_to( (batch, num_joints, K, 1)) r = bboxes[:, :, 2].reshape((batch, 1, K, 1)).broadcast_to( (batch, num_joints, K, 1)) b = bboxes[:, :, 3].reshape((batch, 1, K, 1)).broadcast_to( (batch, num_joints, K, 1)) mask = (hm_kps[:, :, :, 0:1] < l) + (hm_kps[:, :, :, 0:1] > r) mask += (hm_kps[:, :, :, 1:2] < t) + (hm_kps[:, :, :, 1:2] > b) mask += (hm_score < thresh) mask += (min_dist > (nd.maximum(b - t, r - l) * 0.3)) mask = (mask > 0).astype('float32').broadcast_to( (batch, num_joints, K, 2)) kps = (1 - mask) * hm_kps + mask * kps kps = nd.swapaxes(kps, 1, 2).reshape((batch, K, num_joints * 2)) detections = nd.concat(bboxes, scores, kps, clses, dim=2) return detections
def fetch_embedding_of_sentence(sentence: str, embedding: nlp.embedding.TokenEmbedding): words = nltk.word_tokenize(sentence.lower()) embeds = [embedding[w] for w in words] return nd.stack(*embeds)
def run(self): ''' This function returns mini batch of experiences ''' # We initialize the lists that will contain the mini batch(mb) of experiences mb_obs, mb_rewards, mb_actions, mb_logprobs, mb_entropies, mb_values, mb_dones = [],[],[],[],[],[],[] mb_all_act_probs, mb_all_act_logits = [], [] mb_states = self.states epinfos = [] for n in range(self.nsteps): # Given observations, take action and value (V(s)) # We already have self.obs because self.obs[:] = env.reset() on init if len(self.obs.shape) == 4: # if input image ==> (nenv, h,w,c) ==> (nenv, c, h,w) self.obs = self.obs.transpose((0, 3, 1, 2)) #/ 255.0 obs_var = self.obs.copy() actions, logprobs, entropy, values, states, action_probs_logits = self.model( nd.array(obs_var, ctx=self.device), self.states) # Append the experiences mb_obs.append(np.copy(obs_var)) # mb_obs is(steps, nenv, c, h, w) mb_actions.append(actions) # size is nenv mb_entropies.append(entropy) # size is nenv mb_logprobs.append(logprobs) # size is nenv * num_actions mb_values.append(values) # values is is nenv * 1 mb_dones.append(self.dones) # self.dones is nenv * 1 # action_probs_logits ==> (probs, logits) mb_all_act_probs.append(action_probs_logits[0]) mb_all_act_logits.append(action_probs_logits[1]) # Take actions in env and look the results actions = actions.asnumpy() obs, rewards, dones, vinfos = self.env.step(actions) self.states = states self.dones = dones self.obs = obs mb_rewards.append(rewards) #################### ### collect reward info for info in vinfos: if 'episode' in info.keys(): epinfos.append(info['episode']) ################### if len(self.obs.shape) == 4: # if input image ==> (nenv, h,w,c) ==> (nenv, c, h,w) obs_var = self.obs.transpose((0, 3, 1, 2)) #/ 255.0 else: obs_var = self.obs.copy() _, _, _, last_values, _, _ = self.model( nd.array(obs_var, ctx=self.device), self.states) # need to keep track of last step to see whether or not it is terminal mb_obs.append(obs_var) mb_dones.append(self.dones) # Batch of steps to batch of rollouts mb_obs = np.asarray(mb_obs, dtype=np.float32) mb_actions = nd.stack(*mb_actions, axis=0) #(nsteps, nenv) mb_all_act_probs = nd.stack(*mb_all_act_probs, axis=0) #(nsteps, nenv, num_actions) mb_all_act_logits = nd.stack(*mb_all_act_logits, axis=0) #(nsteps, nenv, num_actions) mb_values = nd.stack(*mb_values, axis=0) # (nsteps, nenv, 1) mb_logprobs = nd.stack(*mb_logprobs, axis=0) # (nsteps, nenv) mb_entropies = nd.stack(*mb_entropies, axis=0) # (nsteps, nenvs) mb_raw_rewards = np.asarray(mb_rewards, dtype=np.float32).swapaxes( 1, 0) # (nsteps, nenvs) --> # (nenvs, nsteps) mb_rewards = nd.array(np.asarray(mb_rewards, dtype=np.float32)) # (nsteps, nenvs) mb_dones = np.asarray(mb_dones, dtype=np.bool) #(nsteps + 1, nenvs) mb_masks = nd.array(mb_dones.astype(np.float32)) # (nsteps + 1, nenvs) # raw mask for reporting purpose mb_raw_masks = mb_dones.swapaxes(1, 0) # (nenvs, nsteps + 1) mb_raw_masks = mb_raw_masks[:, :-1] # (nenvs, nsteps) # convert to numpy and remove last values (nsteps, nenvs) --> # (nenvs, nsteps) mb_vals_np = mb_values.squeeze(axis=-1).detach().asnumpy().swapaxes( 1, 0) info = {} info['obs'] = mb_obs info['states'] = mb_states info['rewards'] = mb_rewards info['mb_dones'] = mb_dones info['masks'] = mb_masks info['actions'] = mb_actions info['logprobs'] = mb_logprobs info['entropies'] = mb_entropies info['values'] = mb_values info['last_values'] = last_values info['values_np'] = mb_vals_np.flatten() info['pi_probs'] = mb_all_act_probs info['pi_logs'] = mb_all_act_logits info['epinfos'] = epinfos # raw masks and raw rewards will be used to report results info['raw_rewards'] = mb_raw_rewards.flatten() info['raw_masks'] = mb_raw_masks.flatten() return info
return nd.stack(*[corr2d_multi_in(X, k) for k in K]) def corr2d_multi_in_out_1x1(X, K): c_o = K.shape[0] c_i, h, w = X.shape X = X.reshape((c_i, -1)) K = K.reshape((c_o, c_i)) Y = nd.dot(K, X) return Y.reshape((c_o, h, w)) if __name__ == '__main__': x = nd.array([[[0, 1, 2], [3, 4, 5], [6, 7, 8]], [[1, 2, 3], [4, 5, 6], [7, 8, 9]]]) k = nd.array([[[0, 1], [2, 3]], [[1, 2], [3, 4]]]) # print(corr2d(x, k)) print(corr2d_multi_in(x, k)) K = nd.stack(k, k + 1, k + 2) print(K.shape) print(corr2d_multi_in_out(x, K)) # test corr2d_multi_in_out_1x1 X = nd.random.uniform(shape=(3, 3, 3)) K = nd.random.uniform(shape=(2, 3, 1, 1)) Y1 = corr2d_multi_in_out_1x1(X, K) Y2 = corr2d_multi_in_out(X, K) print((Y1 - Y2).norm().asscalar() < 1e-6)
def test_stack(): a = nd.array(np.ones((SMALL_Y, LARGE_X))) b = nd.array(np.zeros((SMALL_Y, LARGE_X))) c = nd.stack(a, b, axis=1) assert c.shape == (b.shape[0], 2, LARGE_X)
hitlist = mode if after.count(mode) > 1 else None for node, value in zip(list(tree._embeddlayer._children.values()), after): print(value) for node, value in zip(list(tree._embeddlayer._children.values()), after): if (value == hitlist): tree._prune(node) # %% root = next(iter(tree._structure.items()))[0] router_d, router_mat_d, weight_d, embedd_d = tree._contextify( nd.array([[1.75]]))(root) router = nd.stack(*[router_d[key] for key in sorted(router_d)], axis=-1) weight = nd.stack(*[weight_d[key] for key in sorted(weight_d)], axis=-1) embedd = nd.stack(*[embedd_d[key] for key in sorted(embedd_d)], axis=0) router_mat = nd.stack(*[router_mat_d[key] for key in sorted(router_mat_d)], axis=1) where = nd.argmin(nd.abs(router + 0.5), axis=1) head = nd.concat(*[router_mat[i][k] for i, k in enumerate(where)], dim=0) # %% def traverse(node=next(iter(tree._structure.items()))[0]): print("box")
[[1, 2, 3], [4, 5, 6], [7, 8, 9]]]) K = nd.array([[[0, 1], [2, 3]], [[1, 2], [3, 4]]]) corr2d_multi_in(X, K) # In[3]: def corr2d_multi_in_out(X, K): # 对K的第0维遍历,每次同输入X做互相关计算。所有结果使用stack函数合并在一起 return nd.stack(*[corr2d_multi_in(X, k) for k in K]) # In[4]: K = nd.stack(K, K + 1, K + 2) K.shape # In[5]: corr2d_multi_in_out(X, K) # In[6]: def corr2d_multi_in_out_1x1(X, K): c_i, h, w = X.shape c_o = K.shape[0] X = X.reshape((c_i, h * w)) K = K.reshape((c_o, c_i)) Y = nd.dot(K, X) # 全连接层的矩阵乘法
def corr2d_multi_in_out(X, K): # 对K的第0维遍历,每次同输入X做互相关计算。所有结果使用stack函数合并在一起 return nd.stack(*[corr2d_multi_in(X, k) for k in K])
def forward(self, batch_size, encoder_output, decoder_hidden, head_lda, y=None): output_seq = [] if y is None: decoder_input = self.embedding(nd.array([5])) decoder_input = decoder_input.squeeze(axis=0) else: decoder_input = y[0] decoder_input = nd.stack(decoder_input) if self.rnn_type != 'DLSTM': raise ValueError('rnn_type must be DLSTM.') # encoder_hidden shape [last_output, state:[directions, batch_size, units]] # to decoder_hidden( with two state): [last_output, state1==state, state2==zeros] begin_state = self.rnn.begin_state(batch_size=batch_size, ctx=self.ctx) begin_state[0] = decoder_hidden[0].squeeze(axis=0) begin_state[1] = decoder_hidden[1].squeeze(axis=0) decoder_hidden = begin_state target_len = len(y) if y is not None else self.target_len for i in range(target_len): # print('step',i) head_attn_context, _ = self.head_attention( head_lda, decoder_hidden[1].reshape((1, self.hidden_size, -1)), True) head_attn_context = head_attn_context.squeeze(axis=0) attn_input = nd.concat(decoder_input, head_attn_context, decoder_hidden[1], decoder_hidden[2]) atten_context, _ = self.attention(attn_input, encoder_output) atten_context = atten_context.squeeze(axis=0) context = self.input_linear(nd.concat(decoder_input, atten_context)) decoder_output, decoder_hidden = self.rnn(context, decoder_hidden) output = self.output_layer(decoder_output) output_seq.append(output) decoder_input = decoder_output if y is not None: if self.teaching_force: if round(random.random(), 1) < self.force_prob: if i < len(y): decoder_input = y[i] decoder_input = nd.stack(decoder_input) else: # print(output) pass return nd.concat(*output_seq, dim=0)
def corr2d_multi_in_out(X, K): # k shape : c_o * c_i * k_h * k_w return nd.stack(*[corr2d_multi_in(X, k) for k in K])
def forward(self, batch_size, encoder_output, decoder_hidden, head_lda, y=None): if y is None: decoder_input = self.embedding(nd.array([5])) decoder_input = decoder_input.squeeze(axis=0) else: decoder_input = y[0] decoder_input = nd.stack(decoder_input) if self.rnn_type != 'DLSTM': raise ValueError('rnn_type must be DLSTM.') # encoder_hidden shape [last_output, state:[directions, batch_size, units]] # to decoder_hidden( with two state): [last_output, state1==state, state2==zeros] begin_state = self.rnn.begin_state(batch_size=batch_size, ctx=self.ctx) begin_state[0] = decoder_hidden[0].squeeze(axis=0) begin_state[1] = decoder_hidden[1].squeeze(axis=0) decoder_hidden = begin_state hyps = [ Hypothesis([nd.array([5], ctx=self.ctx)], [0.0], decoder_hidden, []) for _ in range(self._beam_size) ] target_len = len(y) if y is not None else self.target_len results = [] step = 0 while step < target_len and len(results) < self._beam_size: # print(step) latest_tokens = [h.latest_token for h in hyps] states = [h.state for h in hyps] num_orig_hyps = 1 if step == 0 else len(hyps) all_hyps = [] for i in range(num_orig_hyps): # get input decoder_hidden = states[i] # print('latest_tokens', latest_tokens[i]) decoder_input = self.embedding(latest_tokens[i]) head_attn_context, _ = self.head_attention( head_lda, decoder_hidden[1].reshape( (1, self.hidden_size, -1)), True) head_attn_context = head_attn_context.squeeze(axis=0) attn_input = nd.concat(decoder_input, head_attn_context, decoder_hidden[1], decoder_hidden[2]) atten_context, _ = self.attention(attn_input, encoder_output) atten_context = atten_context.squeeze(axis=0) context = self.input_linear( nd.concat(decoder_input, atten_context)) decoder_output, decoder_hidden = self.rnn( context, decoder_hidden) output = self.output_layer(decoder_output) top_k = nd.topk(output, 1, self._beam_size * 2)[0] top_k_prob = [output[0][top_i] for top_i in top_k] h = hyps[i] for j in range(self._beam_size * 2): new_hyps = h.extend(top_k[j], top_k_prob[j], decoder_hidden, None) all_hyps.append(new_hyps) # print('len all_hyps', len(all_hyps)) # next step hyps next_hyps = [] # print('before', len(hyps),'after sort',len(sorted(hyps, key=lambda h:h.avg_log_prob, reverse=True))) for h in sorted(all_hyps, key=lambda h: h.avg_log_prob, reverse=True): if h.latest_token == 5: if step >= self._min_len: results.append(h) else: next_hyps.append(h) if len(next_hyps) == self._beam_size or len( results) == self._beam_size: break hyps = next_hyps # print('after',len(hyps)) step += 1 if len(results) == 0: results = hyps return sorted(results, key=lambda h: h.avg_log_prob, reverse=True)[0]
def corr2d_multi_in_out(X, K): return nd.stack(*[corr2d_multi_in(X, k) for k in K])