def test_resize(): # Test with normal case 3D input float type data_in_3d = nd.random.uniform(0, 255, (300, 300, 3)) out_nd_3d = transforms.Resize((100, 100))(data_in_3d) data_in_4d_nchw = nd.moveaxis(nd.expand_dims(data_in_3d, axis=0), 3, 1) data_expected_3d = (nd.moveaxis(nd.contrib.BilinearResize2D(data_in_4d_nchw, 100, 100), 1, 3))[0] assert_almost_equal(out_nd_3d.asnumpy(), data_expected_3d.asnumpy()) # Test with normal case 4D input float type data_in_4d = nd.random.uniform(0, 255, (2, 300, 300, 3)) out_nd_4d = transforms.Resize((100, 100))(data_in_4d) data_in_4d_nchw = nd.moveaxis(data_in_4d, 3, 1) data_expected_4d = nd.moveaxis(nd.contrib.BilinearResize2D(data_in_4d_nchw, 100, 100), 1, 3) assert_almost_equal(out_nd_4d.asnumpy(), data_expected_4d.asnumpy()) # Test invalid interp data_in_3d = nd.random.uniform(0, 255, (300, 300, 3)) invalid_transform = transforms.Resize(-150, keep_ratio=False, interpolation=2) assertRaises(MXNetError, invalid_transform, data_in_3d) # Credited to Hang Zhang def py_bilinear_resize_nhwc(x, outputHeight, outputWidth): batch, inputHeight, inputWidth, channel = x.shape if outputHeight == inputHeight and outputWidth == inputWidth: return x y = np.empty([batch, outputHeight, outputWidth, channel]).astype('uint8') rheight = 1.0 * (inputHeight - 1) / (outputHeight - 1) if outputHeight > 1 else 0.0 rwidth = 1.0 * (inputWidth - 1) / (outputWidth - 1) if outputWidth > 1 else 0.0 for h2 in range(outputHeight): h1r = 1.0 * h2 * rheight h1 = int(np.floor(h1r)) h1lambda = h1r - h1 h1p = 1 if h1 < (inputHeight - 1) else 0 for w2 in range(outputWidth): w1r = 1.0 * w2 * rwidth w1 = int(np.floor(w1r)) w1lambda = w1r - w1 w1p = 1 if w1 < (inputHeight - 1) else 0 for b in range(batch): for c in range(channel): y[b][h2][w2][c] = (1-h1lambda)*((1-w1lambda)*x[b][h1][w1][c] + \ w1lambda*x[b][h1][w1+w1p][c]) + \ h1lambda*((1-w1lambda)*x[b][h1+h1p][w1][c] + \ w1lambda*x[b][h1+h1p][w1+w1p][c]) return y # Test with normal case 3D input int8 type data_in_4d = nd.random.uniform(0, 255, (1, 300, 300, 3)).astype('uint8') out_nd_3d = transforms.Resize((100, 100))(data_in_4d[0]) assert_almost_equal(out_nd_3d.asnumpy(), py_bilinear_resize_nhwc(data_in_4d.asnumpy(), 100, 100)[0], atol=1.0) # Test with normal case 4D input int8 type data_in_4d = nd.random.uniform(0, 255, (2, 300, 300, 3)).astype('uint8') out_nd_4d = transforms.Resize((100, 100))(data_in_4d) assert_almost_equal(out_nd_4d.asnumpy(), py_bilinear_resize_nhwc(data_in_4d.asnumpy(), 100, 100), atol=1.0)
def tensor_load_rgbimage(filename, ctx, size=None, scale=None, keep_asp=False): img = Image.open(filename).convert('RGB') if size is not None: if keep_asp: size2 = int(size * 1.0 / img.size[0] * img.size[1]) img = img.resize((size, size2), Image.ANTIALIAS) else: img = img.resize((size, size), Image.ANTIALIAS) elif scale is not None: img = img.resize((int(img.size[0] / scale), int(img.size[1] / scale)), Image.ANTIALIAS) img = np.array(img).transpose(2, 0, 1).astype(float) img = F.expand_dims(mx.nd.array(img, ctx=ctx), 0) return img
def K_means_Algorithm(epoch=100, point_numbers=2000, centroid_numbers=5, ctx=mx.gpu(0)): dataset = [] centroid = [] # data generation for i in range(point_numbers): if random.random() > 0.5: dataset.append([ np.random.normal(loc=0, scale=0.9), np.random.normal(loc=0, scale=0.9) ]) else: dataset.append([ np.random.normal(loc=3, scale=0.5), np.random.normal(loc=0, scale=0.9) ]) df = pd.DataFrame({ "x": [d[0] for d in dataset], "y": [d[1] for d in dataset] }) sns.lmplot("x", "y", data=df, fit_reg=False, size=10) plt.savefig("K means Algorithm init using mxnet.png") # 1-step random.shuffle(dataset) for i in range(centroid_numbers): centroid.append(random.choice(dataset)) # using mxnet dataset = nd.array(dataset, ctx=ctx) centroid = nd.array(centroid, ctx=ctx) # data assignment , updating new center values for i in tqdm(range(epoch)): # 2-step diff = nd.subtract(nd.expand_dims(dataset, axis=0), nd.expand_dims(centroid, axis=1)) sqr = nd.square(diff) distance = nd.sum(sqr, axis=2) clustering = nd.argmin(distance, axis=0) # 3-step ''' Because mxnet's nd.where did not return the location. I wrote the np.where function. ''' for j in range(centroid_numbers): centroid[j][:] = nd.mean(nd.take( dataset, nd.array(np.reshape( np.where(np.equal(clustering.asnumpy(), j)), (-1, )), ctx=ctx), axis=0), axis=0) print("epoch : {}".format(i + 1)) for i in range(centroid_numbers): print("{}_center : Final center_value : {}".format( i + 1, centroid.asnumpy()[i])) #4 show result data = {"x": [], "y": [], "cluster": []} for i in range(len(clustering)): data["x"].append(dataset[i][0].asscalar()) data["y"].append(dataset[i][1].asscalar()) data["cluster"].append(clustering[i].asscalar()) df = pd.DataFrame(data) sns.lmplot("x", "y", data=df, fit_reg=False, size=10, hue="cluster") plt.savefig("K means Algorithm completed using mxnet.png") plt.show()
def run_demo(cuda, record, vfile): model = 'models/21styles.params' ngf = 128 style_size = 512 style_folder = 'images/styles/' mirror = False vDir = './video/' vPath = vDir + vfile oFile = 'output21-' + vfile wM, hM = 640, 480 if cuda: ctx = mx.gpu(0) os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' else: ctx = mx.cpu(0) style_loader = StyleLoader(style_folder, style_size, ctx) style_model = Net(ngf=ngf) style_model.load_parameters(model, ctx=ctx) metadata = ffprobe(vPath) fps = metadata["video"]["@avg_frame_rate"] # print(json.dumps(metadata["video"], indent=4)) w, h = int(metadata["video"]["@width"]), int(metadata["video"]["@height"]) downsize = h > hM if downsize: w = 2 * int(w * hM / h / 2) h = hM # downsize = w > wM # if downsize : # h = 2 * int(h * wM / w / 2); w = wM swidth = int(w / 4) sheight = int(h / 4) wName = vfile + ' STYLIZED VIDEO fps:' + fps + ' W:' + str( w) + ' H:' + str(h) if record: out = FFmpegWriter(vDir + oFile, inputdict={ '-r': str(fps), '-s': '{}x{}'.format(2 * w, h) }, outputdict={ '-r': str(fps), '-c:v': 'h264' }) key, idx = 0, 0 cv2.namedWindow(wName, cv2.WINDOW_NORMAL) cv2.resizeWindow(wName, 2 * w, h) for img in vreader(vPath): idx += 1 if downsize: img = cv2.resize(img, (w, h), interpolation=cv2.INTER_AREA) if mirror: img = cv2.flip(img, 1) cimg = img.copy() img = np.array(img).transpose(2, 0, 1).astype(float) img = F.expand_dims(mx.nd.array(img, ctx=ctx), 0) # changing styles if idx % 50 == 1: style_v = style_loader.get(int(idx / 20)) style_model.set_target(style_v) img = style_model(img) simg = np.squeeze(style_v.asnumpy()) simg = simg.transpose(1, 2, 0).astype('uint8') img = F.clip(img[0], 0, 255).asnumpy() img = img.transpose(1, 2, 0).astype('uint8') # display simg = cv2.resize(simg, (swidth, sheight), interpolation=cv2.INTER_CUBIC) cimg[0:sheight, 0:swidth, :] = simg img = np.concatenate((cimg, cv2.cvtColor(img, cv2.COLOR_BGR2RGB)), axis=1) if record: out.writeFrame(img) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) cv2.imshow(wName, img) key = cv2.waitKey(1) if key == 27: # Esc break if record: out.close() transferAudio(vPath, vDir, oFile) print("Done OK. Created Stylised Video file", vDir + oFile) print("fps :", fps, " W:", w, " H:", h) cv2.destroyAllWindows()
def compute_retrospective_loss(self, observed_arr, encoded_arr, decoded_arr, re_encoded_arr): ''' Compute retrospective loss. Returns: The tuple data. - `np.ndarray` of delta. - `np.ndarray` of losses of each batch. - float of loss of all batch. ''' if self.__output_neuron_count == self.__hidden_neuron_count: target_arr = nd.broadcast_sub( encoded_arr, nd.expand_dims(observed_arr.mean(axis=2), axis=2)) summary_delta_arr = nd.sqrt(nd.power(decoded_arr - target_arr, 2)) else: # For each batch, draw a samples from the Uniform distribution. if self.__output_neuron_count > self.__hidden_neuron_count: all_dim_arr = np.arange(self.__output_neuron_count) np.random.shuffle(all_dim_arr) choiced_dim_arr = all_dim_arr[:self.__hidden_neuron_count] target_arr = nd.broadcast_sub( encoded_arr, nd.expand_dims(observed_arr[:, :, choiced_dim_arr].mean(axis=2), axis=2)) summary_delta_arr = nd.sqrt( nd.power(decoded_arr[:, :, choiced_dim_arr] - target_arr, 2)) else: all_dim_arr = np.arange(self.__hidden_neuron_count) np.random.shuffle(all_dim_arr) choiced_dim_arr = all_dim_arr[:self.__output_neuron_count] target_arr = nd.broadcast_sub( encoded_arr[:, :, choiced_dim_arr], nd.expand_dims(observed_arr.mean(axis=2), axis=2)) summary_delta_arr = nd.sqrt( nd.power(decoded_arr - target_arr, 2)) match_delta_arr = None for i in range(self.__batch_size): arr = nd.sqrt( nd.power(encoded_arr[i, -1] - re_encoded_arr[i, -1], 2)) if match_delta_arr is None: match_delta_arr = nd.expand_dims(arr, axis=0) else: match_delta_arr = nd.concat(match_delta_arr, nd.expand_dims(arr, axis=0), dim=0) """ other_encoded_delta_arr = None for i in range(self.__batch_size): _encoded_arr = None for seq in range(encoded_arr[i].shape[0] - 1): if _encoded_arr is None: _encoded_arr = nd.expand_dims(encoded_arr[i][seq], axis=0) else: _encoded_arr = nd.concat( _encoded_arr, nd.expand_dims(encoded_arr[i][seq], axis=0), dim=0 ) arr = nd.nansum( nd.sqrt( nd.power( nd.maximum( 0, _encoded_arr - re_encoded_arr[i, -1].reshape( 1, re_encoded_arr.shape[2] ) ), 2 ) ) + self.__margin_param, axis=0 ) if other_encoded_delta_arr is None: other_encoded_delta_arr = nd.expand_dims(arr, axis=0) else: other_encoded_delta_arr = nd.concat( other_encoded_delta_arr, nd.expand_dims(arr, axis=0), dim=0 ) other_re_encoded_delta_arr = None for i in range(self.__batch_size): _re_encoded_arr = None for seq in range(re_encoded_arr[i].shape[0] - 1): if _re_encoded_arr is None: _re_encoded_arr = nd.expand_dims(re_encoded_arr[i][seq], axis=0) else: _re_encoded_arr = nd.concat( _re_encoded_arr, nd.expand_dims(re_encoded_arr[i][seq], axis=0), dim=0 ) arr = nd.nansum( nd.sqrt( nd.power( nd.maximum( 0, encoded_arr[i, -1].reshape( 1, encoded_arr.shape[2] ) - _re_encoded_arr ), 2 ) ) + self.__margin_param, axis=0 ) if other_re_encoded_delta_arr is None: other_re_encoded_delta_arr = nd.expand_dims(arr, axis=0) else: other_re_encoded_delta_arr = nd.concat( other_re_encoded_delta_arr, nd.expand_dims(arr, axis=0), dim=0 ) mismatch_delta_arr = ( match_delta_arr - other_encoded_delta_arr ) + ( match_delta_arr - other_re_encoded_delta_arr ) delta_arr = summary_delta_arr + nd.expand_dims( self.__retrospective_lambda * match_delta_arr, axis=1 ) + nd.expand_dims( self.__retrospective_eta * mismatch_delta_arr, axis=1 ) """ delta_arr = summary_delta_arr + nd.expand_dims( self.__retrospective_lambda * match_delta_arr, axis=1) v = nd.norm(delta_arr) if v > self.__grad_clip_threshold: delta_arr = delta_arr * self.__grad_clip_threshold / v loss = nd.mean(delta_arr, axis=0, exclude=True) return loss
def generate_learned_samples(self): ''' Draw and generate data. Returns: `Tuple` data. The shape is ... - `mxnet.ndarray` of observed data points in training. - `mxnet.ndarray` of supervised data in training. - `mxnet.ndarray` of observed data points in test. - `mxnet.ndarray` of supervised data in test. ''' for _ in range(self.iter_n): training_batch_arr, test_batch_arr = None, None training_label_arr, test_label_arr = None, None for batch_size in range(self.batch_size): dir_key = np.random.randint( low=0, high=len(self.__training_file_path_list)) training_one_hot_arr = nd.zeros( (1, len(self.__training_file_path_list)), ctx=self.__ctx) training_one_hot_arr[0, dir_key] = 1 file_key = np.random.randint( low=0, high=len(self.__training_file_path_list[dir_key])) training_data_arr = self.__image_extractor.extract( path=self.__training_file_path_list[dir_key][file_key], ) training_data_arr = self.pre_normalize(training_data_arr) test_dir_key = np.random.randint( low=0, high=len(self.__test_file_path_list)) test_one_hot_arr = nd.zeros( (1, len(self.__test_file_path_list)), ctx=self.__ctx) test_one_hot_arr[0, test_dir_key] = 1 file_key = np.random.randint( low=0, high=len(self.__test_file_path_list[test_dir_key])) test_data_arr = self.__image_extractor.extract( path=self.__test_file_path_list[test_dir_key][file_key], ) test_data_arr = self.pre_normalize(test_data_arr) training_data_arr = nd.expand_dims(training_data_arr, axis=0) test_data_arr = nd.expand_dims(test_data_arr, axis=0) if training_batch_arr is not None: training_batch_arr = nd.concat(training_batch_arr, training_data_arr, dim=0) else: training_batch_arr = training_data_arr if test_batch_arr is not None: test_batch_arr = nd.concat(test_batch_arr, test_data_arr, dim=0) else: test_batch_arr = test_data_arr if training_label_arr is not None: training_label_arr = nd.concat(training_label_arr, training_one_hot_arr, dim=0) else: training_label_arr = training_one_hot_arr if test_label_arr is not None: test_label_arr = nd.concat(test_label_arr, test_one_hot_arr, dim=0) else: test_label_arr = test_one_hot_arr if self.__noiseable_data is not None: training_batch_arr = self.__noiseable_data.noise( training_batch_arr) yield training_batch_arr, training_label_arr, test_batch_arr, test_label_arr
def train(pool_size, epochs, train_data, val_data, ctx, netEn, netDe, netD, trainerEn, trainerDe, trainerD, lambda1, batch_size, expname, append=True, useAE = False): text_file = open(expname + "_validtest.txt", "w") text_file.close() #netGT, netDT, _, _ = set_test_network(opt.depth, ctx, opt.lr, opt.beta1,opt.ndf, opt.ngf, opt.append) GAN_loss = gluon.loss.SigmoidBinaryCrossEntropyLoss() L1_loss = gluon.loss.L2Loss() image_pool = imagePool.ImagePool(pool_size) metric = mx.metric.CustomMetric(facc) metric2 = mx.metric.MSE() loss_rec_G = [] loss_rec_D = [] loss_rec_R = [] acc_rec = [] stamp = datetime.now().strftime('%Y_%m_%d-%H_%M') logging.basicConfig(level=logging.DEBUG) for epoch in range(epochs): tic = time.time() btic = time.time() train_data.reset() iter = 0 #print('learning rate : '+str(trainerD.learning_rate )) for batch in train_data: ############################ # (1) Update D network: maximize log(D(x, y)) + log(1 - D(x, G(x, z))) ########################### real_in = batch.data[0].as_in_context(ctx) real_out = batch.data[1].as_in_context(ctx) soft_zero = 1e-10 fake_latent= netEn(real_in) fake_latent = np.squeeze(fake_latent) mu_lv = nd.split(fake_latent, axis=1, num_outputs=2) mu = (mu_lv[0]) lv = (mu_lv[1]) KL = 0.5*nd.nansum(1+lv-mu*mu-nd.exp(lv+soft_zero)) eps = nd.random_normal(loc=0, scale=1, shape=(batch_size, 2048), ctx=ctx) z = mu + nd.exp(0.5*lv)*eps z = nd.expand_dims(nd.expand_dims(z,2),2) y = netDe(z) fake_out = y logloss = nd.nansum(real_in*nd.log(y+soft_zero)+ (1-real_in)*nd.log(1-y+soft_zero)) loss = -logloss-KL fake_concat = nd.concat(real_in, fake_out, dim=1) if append else fake_out with autograd.record(): # Train with fake image # Use image pooling to utilize history imagesi output = netD(fake_concat) fake_label = nd.zeros(output.shape, ctx=ctx) errD_fake = GAN_loss(output, fake_label) metric.update([fake_label, ], [output, ]) real_concat = nd.concat(real_in, real_out, dim=1) if append else real_out output = netD(real_concat) real_label = nd.ones(output.shape, ctx=ctx) errD_real = GAN_loss(output, real_label) errD = (errD_real + errD_fake) * 0.5 errD.backward() metric.update([real_label, ], [output, ]) trainerD.step(batch.data[0].shape[0]) ############################ # (2) Update G network: maximize log(D(x, G(x, z))) - lambda1 * L1(y, G(x, z)) ########################### with autograd.record(): fake_latent= np.squeeze(netEn(real_in)) mu_lv = nd.split(fake_latent, axis=1, num_outputs=2) mu = mu_lv[0] lv = mu_lv[1] KL = 0.5*nd.nansum(1+lv-mu*mu-nd.exp(lv+soft_zero)) eps = nd.random_normal(loc=0, scale=1, shape=(batch_size, 2048), ctx=ctx) #KL = 0.5*nd.nansum(1+lv-mu*mu-nd.exp(lv+soft_zero)) z = mu + nd.exp(0.5*lv)*eps z = nd.expand_dims(nd.expand_dims(z,2),2) y = netDe(z) fake_out = y logloss = nd.nansum((real_in+1)*0.5*nd.log(0.5*(y+1)+soft_zero)+ (1-0.5*(real_in+1))*nd.log(1-0.5*(y+1)+soft_zero)) loss =-logloss-KL fake_concat = nd.concat(real_in, fake_out, dim=1) if append else fake_out output = netD(fake_concat) real_label = nd.ones(output.shape, ctx=ctx) errG = GAN_loss(output, real_label) + loss*lambda1 #L1_loss(real_out, fake_out) * lambda1 errR = logloss#L1_loss(real_out, fake_out) errG.backward() trainerDe.step(batch.data[0].shape[0]) trainerEn.step(batch.data[0].shape[0]) loss_rec_G.append(nd.mean(errG).asscalar()-nd.mean(errR).asscalar()*lambda1) loss_rec_D.append(nd.mean(errD).asscalar()) loss_rec_R.append(nd.mean(errR).asscalar()) name, acc = metric.get() acc_rec.append(acc) # Print log infomation every ten batches if iter % 10 == 0: name, acc = metric.get() logging.info('speed: {} samples/s'.format(batch_size / (time.time() - btic))) #print(errD) logging.info('discriminator loss = %f, generator loss = %f, binary training acc = %f reconstruction error= %f at iter %d epoch %d' % (nd.mean(errD).asscalar(), nd.mean(errG).asscalar(), acc,nd.mean(errR).asscalar() ,iter, epoch)) iter = iter + 1 btic = time.time() name, acc = metric.get() metric.reset() train_data.reset() logging.info('\nbinary training acc at epoch %d: %s=%f' % (epoch, name, acc)) logging.info('time: %f' % (time.time() - tic)) if epoch%10 ==0: text_file = open(expname + "_validtest.txt", "a") filename = "checkpoints/"+expname+"_"+str(epoch)+"_D.params" netD.save_params(filename) filename = "checkpoints/"+expname+"_"+str(epoch)+"_En.params" netEn.save_params(filename) filename = "checkpoints/"+expname+"_"+str(epoch)+"_De.params" netDe.save_params(filename) fake_img1 = nd.concat(real_in[0],real_out[0], fake_out[0], dim=1) fake_img2 = nd.concat(real_in[1],real_out[1], fake_out[1], dim=1) fake_img3 = nd.concat(real_in[2],real_out[2], fake_out[2], dim=1) fake_img4 = nd.concat(real_in[3],real_out[3], fake_out[3], dim=1) val_data.reset() text_file = open(expname + "_validtest.txt", "a") for vbatch in val_data: real_in = vbatch.data[0].as_in_context(ctx) real_out = vbatch.data[1].as_in_context(ctx) fake_latent= netEn(real_in) mu_lv = nd.split(fake_latent, axis=1, num_outputs=2) mu = mu_lv[0] lv = mu_lv[1] eps = nd.random_normal(loc=0, scale=1, shape=(batch_size/5, 2048,1,1), ctx=ctx) z = mu + nd.exp(0.5*lv)*eps y = netDe(z) fake_out = y KL = 0.5*nd.sum(1+lv-mu*mu-nd.exp(lv),axis=1) logloss = nd.sum(real_in*nd.log(y+soft_zero)+ (1-real_in)*nd.log(1-y+soft_zero), axis=1) loss = logloss+KL metric2.update([fake_out, ], [real_out, ]) _, acc2 = metric2.get() text_file.write("%s %s %s\n" % (str(epoch), nd.mean(errR).asscalar(), str(acc2))) metric2.reset() fake_img1T = nd.concat(real_in[0],real_out[0], fake_out[0], dim=1) fake_img2T = nd.concat(real_in[1],real_out[1], fake_out[1], dim=1) fake_img3T = nd.concat(real_in[2],real_out[2], fake_out[2], dim=1) #fake_img4T = nd.concat(real_in[3],real_out[3], fake_out[3], dim=1) fake_img = nd.concat(fake_img1,fake_img2, fake_img3,fake_img1T,fake_img2T, fake_img3T,dim=2) visual.visualize(fake_img) plt.savefig('outputs/'+expname+'_'+str(epoch)+'.png') text_file.close() return([loss_rec_D,loss_rec_G, loss_rec_R, acc_rec])
def inference_g(self, observed_arr): ''' Inference with generator. Args: observed_arr: `mxnet.ndarray` of observed data points. Returns: Tuple data. - re-parametric data. - encoded data points. - re-encoded data points. ''' generated_arr, encoded_arr, re_encoded_arr = super().inference_g(observed_arr) if autograd.is_recording(): limit = self.long_term_seq_len seq_len = self.noise_sampler.seq_len self.noise_sampler.seq_len = limit long_term_observed_arr = self.noise_sampler.draw() observed_mean_arr = nd.expand_dims(nd.mean(long_term_observed_arr, axis=1), axis=1) sum_arr = None for seq in range(2, long_term_observed_arr.shape[1]): add_arr = nd.sum(long_term_observed_arr[:, :seq] - observed_mean_arr, axis=1) if sum_arr is None: sum_arr = nd.expand_dims(add_arr, axis=0) else: sum_arr = nd.concat( sum_arr, nd.expand_dims(add_arr, axis=0), dim=0 ) max_arr = nd.max(sum_arr, axis=0) min_arr = nd.min(sum_arr, axis=0) diff_arr = long_term_observed_arr - observed_mean_arr std_arr = nd.power(nd.mean(nd.square(diff_arr), axis=1), 1/2) R_S_arr = (max_arr - min_arr) / std_arr len_arr = nd.ones_like(R_S_arr, ctx=R_S_arr.context) * np.log(long_term_observed_arr.shape[1] / 2) observed_H_arr = nd.log(R_S_arr) / len_arr self.noise_sampler.seq_len = seq_len g_min_arr = nd.expand_dims(generated_arr.min(axis=1), axis=1) g_max_arr = nd.expand_dims(generated_arr.max(axis=1), axis=1) o_min_arr = nd.expand_dims(observed_arr.min(axis=1), axis=1) o_max_arr = nd.expand_dims(observed_arr.max(axis=1), axis=1) _observed_arr = generated_arr long_term_generated_arr = None for i in range(limit): generated_arr, _, _ = super().inference_g(_observed_arr) g_min_arr = nd.expand_dims(generated_arr.min(axis=1), axis=1) g_max_arr = nd.expand_dims(generated_arr.max(axis=1), axis=1) o_min_arr = nd.expand_dims(_observed_arr.min(axis=1), axis=1) o_max_arr = nd.expand_dims(_observed_arr.max(axis=1), axis=1) generated_arr = (generated_arr - g_min_arr) / (g_max_arr - g_min_arr) generated_arr = (o_max_arr - o_min_arr) * generated_arr generated_arr = o_min_arr + generated_arr if self.condition_sampler is not None: self.condition_sampler.output_shape = generated_arr.shape noise_arr = self.condition_sampler.generate() generated_arr += noise_arr if long_term_generated_arr is None: long_term_generated_arr = generated_arr else: long_term_generated_arr = nd.concat( long_term_generated_arr, generated_arr, dim=1 ) _observed_arr = generated_arr generated_mean_arr = nd.expand_dims(nd.mean(long_term_generated_arr, axis=1), axis=1) sum_arr = None for seq in range(2, long_term_generated_arr.shape[1]): add_arr = nd.sum(long_term_generated_arr[:, :seq] - generated_mean_arr, axis=1) if sum_arr is None: sum_arr = nd.expand_dims(add_arr, axis=0) else: sum_arr = nd.concat( sum_arr, nd.expand_dims(add_arr, axis=0), dim=0 ) max_arr = nd.max(sum_arr, axis=0) min_arr = nd.min(sum_arr, axis=0) diff_arr = long_term_generated_arr - generated_mean_arr std_arr = nd.power(nd.mean(nd.square(diff_arr), axis=1), 1/2) R_S_arr = (max_arr - min_arr) / std_arr len_arr = nd.ones_like(R_S_arr, ctx=R_S_arr.context) * np.log(long_term_generated_arr.shape[1] / 2) generated_H_arr = nd.log(R_S_arr) / len_arr multi_fractal_loss = nd.abs(generated_H_arr - observed_H_arr) multi_fractal_loss = nd.mean(multi_fractal_loss, axis=0, exclude=True) multi_fractal_loss = nd.expand_dims(multi_fractal_loss, axis=-1) multi_fractal_loss = nd.expand_dims(multi_fractal_loss, axis=-1) generated_arr = generated_arr + multi_fractal_loss return generated_arr, encoded_arr, re_encoded_arr
def select_action( self, possible_action_arr, possible_predicted_q_arr, possible_reward_value_arr, possible_next_q_arr, possible_meta_data_arr=None ): ''' Select action by Q(state, action). Args: possible_action_arr: Tensor of actions. possible_predicted_q_arr: Tensor of Q-Values. possible_reward_value_arr: Tensor of reward values. possible_next_q_arr: Tensor of Q-Values in next time. possible_meta_data_arr: `mxnet.ndarray.NDArray` or `np.array` of meta data of the actions. Retruns: Tuple(`np.ndarray` of action., Q-Value) ''' key_arr = self.select_action_key(possible_action_arr, possible_predicted_q_arr) meta_data_arr = None if possible_meta_data_arr is not None: for i in range(possible_meta_data_arr.shape[0]): _meta_data_arr = possible_meta_data_arr[i, key_arr[i]] if i == 0: if isinstance(_meta_data_arr, nd.NDArray) is True: meta_data_arr = nd.expand_dims(_meta_data_arr, axis=0) else: meta_data_arr = np.expand_dims(_meta_data_arr, axis=0) else: if isinstance(_meta_data_arr, nd.NDArray) is True: meta_data_arr = nd.concat( meta_data_arr, nd.expand_dims(_meta_data_arr, axis=0), dim=0 ) else: meta_data_arr = np.concatenate( [ meta_data_arr, np.expand_dims(_meta_data_arr, axis=0), ], axis=0 ) action_arr = None predicted_q_arr = None reward_value_arr = None next_q_arr = None for i in range(possible_action_arr.shape[0]): _action_arr = possible_action_arr[i, key_arr[i]] _predicted_q_arr = possible_predicted_q_arr[i, key_arr[i]] _reward_value_arr = possible_reward_value_arr[i, key_arr[i]] _next_q_arr = possible_next_q_arr[i, key_arr[i]] if i == 0: action_arr = nd.expand_dims(_action_arr, axis=0) predicted_q_arr = nd.expand_dims(_predicted_q_arr, axis=0) reward_value_arr = nd.expand_dims(_reward_value_arr, axis=0) next_q_arr = nd.expand_dims(_next_q_arr, axis=0) else: action_arr = nd.concat( action_arr, nd.expand_dims(_action_arr, axis=0), dim=0 ) predicted_q_arr = nd.concat( predicted_q_arr, nd.expand_dims(_predicted_q_arr, axis=0), dim=0 ) reward_value_arr = nd.concat( reward_value_arr, nd.expand_dims(_reward_value_arr, axis=0), dim=0 ) next_q_arr = nd.concat( next_q_arr, nd.expand_dims(_next_q_arr, axis=0), dim=0 ) return ( action_arr, predicted_q_arr, reward_value_arr, next_q_arr, meta_data_arr )
def batched_l1_dist(a, b): a = nd.expand_dims(a, axis=-2) b = nd.expand_dims(b, axis=-3) res = nd.norm(a - b, ord=1, axis=-1) return res
def make_grid(tensor, nrow=8, padding=2, normalize=False, range=None, scale_each=False, pad_value=0): if not (is_ndarray(tensor) or (isinstance(tensor, list) and all(is_ndarray(t) for t in tensor))): raise TypeError('tensor or list of tensors expected, got {}'.format( type(tensor))) # if list of tensors, convert to a 4D mini-batch Tensor if isinstance(tensor, list): tensor = nd.stack(tensor, dim=0) if tensor.ndim == 2: # single image H x W tensor = nd.expand_dims(tensor, axis=0) if tensor.ndim == 3: # single image if tensor.shape[0] == 1: # if single-channel, convert to 3-channel tensor = nd.concat(tensor, tensor, tensor, dim=0) tensor = nd.expand_dims(tensor, axis=0) if tensor.ndim == 4 and tensor.shape[1] == 1: # single-channel images tensor = nd.concat(tensor, tensor, tensor, dim=1) if normalize is True: tensor = tensor.copy() # avoid modifying tensor in-place if range is not None: assert isinstance( range, tuple ), "range has to be a tuple (min, max) if specified. min and max are numbers" def norm_ip(img, min, max): nd.clip(img, min, max) img += (-min) img /= (max - min + 1e-5) #img.add_(-min).div_(max - min + 1e-5) def norm_range(t, range): if range is not None: norm_ip(t, range[0], range[1]) else: norm_ip(t, float(t.min().asscalar()), float(t.max().asscalar())) if scale_each is True: for t in tensor: # loop over mini-batch dimension norm_range(t, range) else: norm_range(tensor, range) if tensor.shape[0] == 1: return tensor.reshape((-3, -2)) # make the mini-batch of images into a grid nmaps = tensor.shape[0] # 我们截取的mini_batch大小 # print(nmaps) xmaps = min(nrow, nmaps) # 输入的参数 ymaps = int(math.ceil(float(nmaps) / xmaps)) # 算列数向下取整 height, width = int(tensor.shape[2] + padding), int( tensor.shape[3] + padding) # 图片显示的高宽 num_channels = tensor.shape[1] # 图像通道数 grid = nd.full( (num_channels, height * ymaps + padding, width * xmaps + padding), pad_value) # 创建一个全为零的通道数等于输入图片,高宽等于图片高宽分别乘以行列数加上图片的间隔 k = 0 for y in irange(ymaps): for x in irange(xmaps): if k >= nmaps: break grid[:, x * width + padding:x * width + padding + width - padding, y * height + padding:y * height + padding + height - padding] = tensor[k] k = k + 1 return grid
def verify_broadcast_like_dynamic(xshp, wshp, lhs_axes, rhs_axes): x_np = np.random.uniform(size=xshp) w_np = np.random.uniform(size=wshp) x = nd.array(x_np) w = nd.array(w_np) # org op y = nd.broadcast_like(x, w, lhs_axes=lhs_axes, rhs_axes=rhs_axes) print(y.shape) # rewrite op xndims, wndims = len(xshp), len(wshp) if lhs_axes is None or rhs_axes is None: assert xndims == wndims and lhs_axes is None \ and rhs_axes is None z = _broadcast_like(x, w) else: lhs_axes, lndims = list(lhs_axes), len(lhs_axes) rhs_axes, rndims = list(rhs_axes), len(rhs_axes) assert lndims == rndims > 0 lhs_axes = tuple([v+xndims if v<0 else v for v in lhs_axes]) assert all([0<=v<xndims for v in list(lhs_axes)]) rhs_axes = tuple([v+wndims if v<0 else v for v in rhs_axes]) assert all([0<=v<wndims for v in list(rhs_axes)]) assert all([xshp[lhs_axes[i]] == 1 for i in range(lndims)]) batch_axes = [0] flg = all([batch_axis not in rhs_axes \ for batch_axis in batch_axes]) if flg: cnts = {v: wshp[rhs_axes[i]] \ for i, v in enumerate(lhs_axes)} reps = tuple([cnts[v] if v in lhs_axes else 1 \ for v in range(xndims)]) z = nd.tile(x, reps=reps) else: axis_map = {} for i, v in enumerate(lhs_axes): axis_map[v] = rhs_axes[i] for batch_axis in batch_axes: assert sum([1 if v == batch_axis else 0 \ for k, v in axis_map.items()]) <= 1, \ "multiple broadcast on batch_axis: %s, " + \ "which is not support by dynamic shape fusion." % \ batch_axis assert wndims < 6, \ "slice can manipulate at most 5d" # reduce shape to 1 for non-broadcast dimensions begin = tuple([0]*wndims) end = tuple([wshp[v] if v in axis_map.values() else 1 \ for v in range(wndims)]) w = nd.slice(w, begin=begin, end=end) # decompose k1->v, k2->v into k1->v, k2->v2 # which make axis while True: vs, flag, paxis_map = set(), True, axis_map for pk, pv in paxis_map.items(): if pv not in vs: vs.add(pv) continue flag = False axis_map = {k: (v+1 if v>pv or k==pk else v) \ for k, v in axis_map.items()} w = nd.expand_dims(w, axis=pv) w = nd.repeat(w, axis=pv, repeats=wshp[pv]) wshp = wshp[:pv] + (wshp[pv],) + wshp[pv:] break if flag: break wndims = len(wshp) # trim wndims if not equal to xndims v = 0 while wndims > xndims: while v in axis_map.values(): v += 1 w = nd.squeeze(w, axis=v) wndims -= 1 axis_map = {k: (nv-1 if nv > v else nv) \ for k, nv in axis_map.items()} while wndims < xndims: w = nd.expand_dims(w, axis=wndims) wndims += 1 axes = list(range(wndims)) while True: dels = [k for k, v in axis_map.items() if k==v] for k in dels: del axis_map[k] if not axis_map: break keys = list(axis_map.keys()) k, v = keys[0], axis_map[keys[0]] axes[k], axes[v] = axes[v], axes[k] for nk in keys: nv = axis_map[nk] if nv == k: axis_map[nk] = v elif nv == v: axis_map[nk] = k axes = tuple(axes) if axes != tuple(range(wndims)): assert wndims < 7, \ "slice can manipulate at most 6d" w = nd.transpose(w, axes=axes) z = _broadcast_like(x, w) print(z.shape) # compare assert z.shape == y.shape zn, zp = get_norm(z) yn, yp = get_norm(y) rn = np.linalg.norm(zp-yp) print(zn, yn, rn)
def tracker(siamfc, params, frame_name_list, pos_x, pos_y, target_w, target_h, ctx=mx.cpu()): pos_x = pos_x - 1 pos_y = pos_y - 1 # Load Video Information z = image.imread(frame_name_list[params.startFrame]).astype('float32') # H W C # frame_sz = z.shape # H W C avgChans = nd.mean(z, axis=[0, 1]) nImgs = np.size(frame_name_list) context = params.contextAmount * (target_w + target_h) wc_z = target_w + context hc_z = target_h + context s_z = params.exemplarSize / 127 * np.sqrt(np.prod(wc_z * hc_z)) s_x = params.instanceSize / 127 * np.sqrt(np.prod(wc_z * hc_z)) scales = params.scaleStep ** np.linspace(np.ceil(params.numScale/2 - params.numScale), np.floor(params.numScale/2), params.numScale) scaledExemplar = s_z * scales z_crop_, _ = make_scale_pyramid(z, pos_x, pos_y, scaledExemplar, params.exemplarSize, avgChans, params, ctx=ctx) # B H W C z_crop = z_crop_[1] z_crop = nd.expand_dims(z_crop, axis = 0) z_crop = np.transpose(z_crop, axes = (0, 3, 1, 2)) z_out_val = siamfc.net(z_crop.as_in_context(ctx)) min_s_x = params.minSFactor * s_x max_s_x = params.maxSFactor * s_x min_s_z = params.minSFactor * s_z max_s_z = params.maxSFactor * s_z # cosine window to penalize large displacements window_hann_1d = np.expand_dims(np.hanning(params.responseUp * params.scoreSize), axis = 0) window_hann_2d = np.transpose(window_hann_1d) * window_hann_1d window = window_hann_2d / np.sum(window_hann_2d) # stores tracker's output for evaluation print('Frame: %d' % (params.startFrame + 1)) bboxes = np.zeros((nImgs, 4)) bboxes[0,:] = [pos_x + 1-target_w / 2, pos_y + 1-target_h / 2, target_w, target_h] t_start = time.time() for i in range(params.startFrame + 1, nImgs): print('Frame: %d' % (i + 1)) scaledInstance = s_x * scales x = image.imread(frame_name_list[i]).astype('float32') # H W C x_crops, pad_masks_x = make_scale_pyramid(x, pos_x, pos_y, scaledInstance, params.instanceSize, avgChans, params, ctx=ctx) # B H W C x_crops_ = np.transpose(x_crops, axes = (0, 3, 1, 2)) x_out = siamfc.net(x_crops_.as_in_context(ctx)) responseMaps = siamfc.match_templates(z_out_val, x_out) # B C H W pos_x, pos_y, newScale = tracker_step(responseMaps, pos_x, pos_y, s_x, window, params) s_x = np.maximum(min_s_x, np.minimum(max_s_x, (1.0 - np.float64(params.scaleLR))* s_x + np.float64(params.scaleLR) * scaledInstance[newScale])) if params.zLR >0: scaledExemplar = s_z * scales z_crop_, _ = make_scale_pyramid(x, pos_x, pos_y, scaledExemplar, params.exemplarSize, avgChans, params, ctx=ctx) # B H W C z_crop = z_crop_[1] z_crop = nd.expand_dims(z_crop, axis = 0) z_crop = np.transpose(z_crop, axes = (0, 3, 1, 2)) z_out_val_new = siamfc.net(z_crop.as_in_context(ctx)) z_out_val = (1 - params.zLR) * z_out_val + params.zLR * z_out_val_new s_z = np.maximum(min_s_z, np.minimum(max_s_z, (1 - params.scaleLR) * s_z + params.scaleLR * scaledExemplar[newScale])) scaledTarget_x, scaledTarget_y = target_w * scales, target_h * scales target_w = (1 - params.scaleLR) * target_w + params.scaleLR * scaledTarget_x[newScale] target_h = (1 - params.scaleLR) * target_h + params.scaleLR * scaledTarget_y[newScale] bboxes[i-params.startFrame, :] = pos_x + 1 - target_w / 2, pos_y + 1 - target_h / 2, target_w, target_h if params.visualization: show_frame(x.asnumpy(), bboxes[i-params.startFrame, :], 1) t_elapsed = time.time() - t_start + 1 speed = (nImgs - 1) / t_elapsed return bboxes, speed
def test_resize_gpu(): # Test with normal case 3D input float type data_in_3d = nd.random.uniform(0, 255, (300, 300, 3)) out_nd_3d = transforms.Resize((100, 100))(data_in_3d) data_in_4d_nchw = nd.moveaxis(nd.expand_dims(data_in_3d, axis=0), 3, 1) data_expected_3d = (nd.moveaxis( nd.contrib.BilinearResize2D(data_in_4d_nchw, height=100, width=100), 1, 3))[0] assert_almost_equal(out_nd_3d.asnumpy(), data_expected_3d.asnumpy()) # Test with normal case 4D input float type data_in_4d = nd.random.uniform(0, 255, (2, 300, 300, 3)) out_nd_4d = transforms.Resize((100, 100))(data_in_4d) data_in_4d_nchw = nd.moveaxis(data_in_4d, 3, 1) data_expected_4d = nd.moveaxis( nd.contrib.BilinearResize2D(data_in_4d_nchw, height=100, width=100), 1, 3) assert_almost_equal(out_nd_4d.asnumpy(), data_expected_4d.asnumpy()) # Test invalid interp data_in_3d = nd.random.uniform(0, 255, (300, 300, 3)) invalid_transform = transforms.Resize(-150, keep_ratio=False, interpolation=2) assertRaises(MXNetError, invalid_transform, data_in_3d) # Credited to Hang Zhang def py_bilinear_resize_nhwc(x, outputHeight, outputWidth): batch, inputHeight, inputWidth, channel = x.shape if outputHeight == inputHeight and outputWidth == inputWidth: return x y = np.empty([batch, outputHeight, outputWidth, channel]).astype('uint8') rheight = 1.0 * (inputHeight - 1) / (outputHeight - 1) if outputHeight > 1 else 0.0 rwidth = 1.0 * (inputWidth - 1) / (outputWidth - 1) if outputWidth > 1 else 0.0 for h2 in range(outputHeight): h1r = 1.0 * h2 * rheight h1 = int(np.floor(h1r)) h1lambda = h1r - h1 h1p = 1 if h1 < (inputHeight - 1) else 0 for w2 in range(outputWidth): w1r = 1.0 * w2 * rwidth w1 = int(np.floor(w1r)) w1lambda = w1r - w1 w1p = 1 if w1 < (inputHeight - 1) else 0 for b in range(batch): for c in range(channel): y[b][h2][w2][c] = (1-h1lambda)*((1-w1lambda)*x[b][h1][w1][c] + \ w1lambda*x[b][h1][w1+w1p][c]) + \ h1lambda*((1-w1lambda)*x[b][h1+h1p][w1][c] + \ w1lambda*x[b][h1+h1p][w1+w1p][c]) return y # Test with normal case 3D input int8 type data_in_4d = nd.random.uniform(0, 255, (1, 300, 300, 3)).astype('uint8') out_nd_3d = transforms.Resize((100, 100))(data_in_4d[0]) assert_almost_equal(out_nd_3d.asnumpy(), py_bilinear_resize_nhwc(data_in_4d.asnumpy(), 100, 100)[0], atol=1.0) # Test with normal case 4D input int8 type data_in_4d = nd.random.uniform(0, 255, (2, 300, 300, 3)).astype('uint8') out_nd_4d = transforms.Resize((100, 100))(data_in_4d) assert_almost_equal(out_nd_4d.asnumpy(), py_bilinear_resize_nhwc(data_in_4d.asnumpy(), 100, 100), atol=1.0)
def train(args): np.random.seed(args.seed) if args.cuda: ctx = mx.gpu(0) else: ctx = mx.cpu(0) # dataloader transform = utils.Compose([utils.Scale(args.image_size), utils.CenterCrop(args.image_size), utils.ToTensor(ctx), ]) train_dataset = data.ImageFolder(args.dataset, transform) train_loader = gluon.data.DataLoader(train_dataset, batch_size=args.batch_size, last_batch='discard') style_loader = utils.StyleLoader(args.style_folder, args.style_size, ctx=ctx) print('len(style_loader):',style_loader.size()) # models vgg = net.Vgg16() utils.init_vgg_params(vgg, 'models', ctx=ctx) style_model = net.Net(ngf=args.ngf) style_model.initialize(init=mx.initializer.MSRAPrelu(), ctx=ctx) if args.resume is not None: print('Resuming, initializing using weight from {}.'.format(args.resume)) style_model.load_parameters(args.resume, ctx=ctx) print('style_model:',style_model) # optimizer and loss trainer = gluon.Trainer(style_model.collect_params(), 'adam', {'learning_rate': args.lr}) mse_loss = gluon.loss.L2Loss() for e in range(args.epochs): agg_content_loss = 0. agg_style_loss = 0. count = 0 for batch_id, (x, _) in enumerate(train_loader): n_batch = len(x) count += n_batch # prepare data style_image = style_loader.get(batch_id) style_v = utils.subtract_imagenet_mean_preprocess_batch(style_image.copy()) style_image = utils.preprocess_batch(style_image) features_style = vgg(style_v) gram_style = [net.gram_matrix(y) for y in features_style] xc = utils.subtract_imagenet_mean_preprocess_batch(x.copy()) f_xc_c = vgg(xc)[1] with autograd.record(): style_model.set_target(style_image) y = style_model(x) y = utils.subtract_imagenet_mean_batch(y) features_y = vgg(y) content_loss = 2 * args.content_weight * mse_loss(features_y[1], f_xc_c) style_loss = 0. for m in range(len(features_y)): gram_y = net.gram_matrix(features_y[m]) _, C, _ = gram_style[m].shape gram_s = F.expand_dims(gram_style[m], 0).broadcast_to((args.batch_size, 1, C, C)) style_loss = style_loss + 2 * args.style_weight * \ mse_loss(gram_y, gram_s[:n_batch, :, :]) total_loss = content_loss + style_loss total_loss.backward() trainer.step(args.batch_size) mx.nd.waitall() agg_content_loss += content_loss[0] agg_style_loss += style_loss[0] if (batch_id + 1) % args.log_interval == 0: mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.3f}\tstyle: {:.3f}\ttotal: {:.3f}".format( time.ctime(), e + 1, count, len(train_dataset), agg_content_loss.asnumpy()[0] / (batch_id + 1), agg_style_loss.asnumpy()[0] / (batch_id + 1), (agg_content_loss + agg_style_loss).asnumpy()[0] / (batch_id + 1) ) print(mesg) if (batch_id + 1) % (4 * args.log_interval) == 0: # save model save_model_filename = "Epoch_" + str(e) + "iters_" + \ str(count) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str( args.content_weight) + "_" + str(args.style_weight) + ".params" save_model_path = os.path.join(args.save_model_dir, save_model_filename) style_model.save_parameters(save_model_path) print("\nCheckpoint, trained model saved at", save_model_path) # save model save_model_filename = "Final_epoch_" + str(args.epochs) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str( args.content_weight) + "_" + str(args.style_weight) + ".params" save_model_path = os.path.join(args.save_model_dir, save_model_filename) style_model.save_parameters(save_model_path) print("\nDone, trained model saved at", save_model_path)
def np2nd(img_np, ctx=get_ctx()): img_nd = nd.array(img_np, ctx=ctx) img_nd = nd.swapaxes(img_nd, 1, 2) img_nd = nd.swapaxes(img_nd, 0, 1) img_nd = nd.expand_dims(img_nd, 0) return img_nd
def unsqueeze(input, dim): return nd.expand_dims(input, axis=dim)
def generate_learned_samples(self): ''' Draw and generate data. Returns: `Tuple` data. The shape is ... - `mxnet.ndarray` of observed data points in training. - `mxnet.ndarray` of supervised data in training. - `mxnet.ndarray` of observed data points in test. - `mxnet.ndarray` of supervised data in test. ''' for epoch in range(self.epochs): training_batch_arr, test_batch_arr = None, None for i in range(self.batch_size): file_key = np.random.randint(low=0, high=len(self.__train_csv_path_list)) train_observed_arr = self.__unlabeled_csv_extractor.extract( self.__train_csv_path_list[file_key] ) test_file_key = np.random.randint(low=0, high=len(self.__test_csv_path_list)) test_observed_arr = self.__unlabeled_csv_extractor.extract( self.__test_csv_path_list[test_file_key] ) train_observed_arr = np.identity( 1 + int(train_observed_arr.max() + (train_observed_arr.min() * -1)) )[ (train_observed_arr.reshape(train_observed_arr.shape[0], -1) + (train_observed_arr.min() * -1)).astype(int) ] test_observed_arr = np.identity( 1 + int(test_observed_arr.max() + (test_observed_arr.min() * -1)) )[ (test_observed_arr.reshape(test_observed_arr.shape[0], -1) + (test_observed_arr.min() * -1)).astype(int) ] start_row = np.random.randint(low=0, high=train_observed_arr.shape[0] - self.seq_len) test_start_row = np.random.randint(low=0, high=test_observed_arr.shape[0] - self.seq_len) train_observed_arr = train_observed_arr[start_row:start_row+self.seq_len] test_observed_arr = test_observed_arr[test_start_row:test_start_row+self.seq_len] if training_batch_arr is None: training_batch_arr = nd.expand_dims( nd.ndarray.array(train_observed_arr, ctx=self.__ctx), axis=0 ) else: training_batch_arr = nd.concat( training_batch_arr, nd.expand_dims( nd.ndarray.array(train_observed_arr, ctx=self.__ctx), axis=0 ), dim=0 ) if test_batch_arr is None: test_batch_arr = nd.expand_dims( nd.ndarray.array(test_observed_arr, ctx=self.__ctx), axis=0 ) else: test_batch_arr = nd.concat( test_batch_arr, nd.expand_dims( nd.ndarray.array(test_observed_arr, ctx=self.__ctx), axis=0 ), dim=0 ) training_batch_arr = self.pre_normalize(training_batch_arr) test_batch_arr = self.pre_normalize(test_batch_arr) if self.__noiseable_data is not None: training_batch_arr = self.__noiseable_data.noise(training_batch_arr) yield training_batch_arr, training_batch_arr, test_batch_arr, test_batch_arr
def train(args): np.random.seed(args.seed) if args.cuda: ctx = mx.gpu(0) else: ctx = mx.cpu(0) # dataloader transform = utils.Compose([utils.Scale(args.image_size), utils.CenterCrop(args.image_size), utils.ToTensor(ctx), ]) train_dataset = data.ImageFolder(args.dataset, transform) train_loader = gluon.data.DataLoader(train_dataset, batch_size=args.batch_size, last_batch='discard') style_loader = utils.StyleLoader(args.style_folder, args.style_size, ctx=ctx) print('len(style_loader):',style_loader.size()) # models vgg = net.Vgg16() utils.init_vgg_params(vgg, 'models', ctx=ctx) style_model = net.Net(ngf=args.ngf) style_model.initialize(init=mx.initializer.MSRAPrelu(), ctx=ctx) if args.resume is not None: print('Resuming, initializing using weight from {}.'.format(args.resume)) style_model.collect_params().load(args.resume, ctx=ctx) print('style_model:',style_model) # optimizer and loss trainer = gluon.Trainer(style_model.collect_params(), 'adam', {'learning_rate': args.lr}) mse_loss = gluon.loss.L2Loss() for e in range(args.epochs): agg_content_loss = 0. agg_style_loss = 0. count = 0 for batch_id, (x, _) in enumerate(train_loader): n_batch = len(x) count += n_batch # prepare data style_image = style_loader.get(batch_id) style_v = utils.subtract_imagenet_mean_preprocess_batch(style_image.copy()) style_image = utils.preprocess_batch(style_image) features_style = vgg(style_v) gram_style = [net.gram_matrix(y) for y in features_style] xc = utils.subtract_imagenet_mean_preprocess_batch(x.copy()) f_xc_c = vgg(xc)[1] with autograd.record(): style_model.setTarget(style_image) y = style_model(x) y = utils.subtract_imagenet_mean_batch(y) features_y = vgg(y) content_loss = 2 * args.content_weight * mse_loss(features_y[1], f_xc_c) style_loss = 0. for m in range(len(features_y)): gram_y = net.gram_matrix(features_y[m]) _, C, _ = gram_style[m].shape gram_s = F.expand_dims(gram_style[m], 0).broadcast_to((args.batch_size, 1, C, C)) style_loss = style_loss + 2 * args.style_weight * mse_loss(gram_y, gram_s[:n_batch, :, :]) total_loss = content_loss + style_loss total_loss.backward() trainer.step(args.batch_size) mx.nd.waitall() agg_content_loss += content_loss[0] agg_style_loss += style_loss[0] if (batch_id + 1) % args.log_interval == 0: mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.3f}\tstyle: {:.3f}\ttotal: {:.3f}".format( time.ctime(), e + 1, count, len(train_dataset), agg_content_loss.asnumpy()[0] / (batch_id + 1), agg_style_loss.asnumpy()[0] / (batch_id + 1), (agg_content_loss + agg_style_loss).asnumpy()[0] / (batch_id + 1) ) print(mesg) if (batch_id + 1) % (4 * args.log_interval) == 0: # save model save_model_filename = "Epoch_" + str(e) + "iters_" + str(count) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str( args.content_weight) + "_" + str(args.style_weight) + ".params" save_model_path = os.path.join(args.save_model_dir, save_model_filename) style_model.collect_params().save(save_model_path) print("\nCheckpoint, trained model saved at", save_model_path) # save model save_model_filename = "Final_epoch_" + str(args.epochs) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str( args.content_weight) + "_" + str(args.style_weight) + ".params" save_model_path = os.path.join(args.save_model_dir, save_model_filename) style_model.collect_params().save(save_model_path) print("\nDone, trained model saved at", save_model_path)
def get_action(self, st): st = nd.expand_dims(st, axis=0) a_q = self.infer_q_mod.forward(is_train=False, data=st) a = nd.argmax_channel(a_q[0]) return a