def data_pipeline(param): # load data X_train, y_train = utils.load_data('./data/train.p') X_valid, y_valid = utils.load_data('./data/valid.p') X_test, y_test = utils.load_data('./data/test.p') n_train = len(X_train) n_test = len(X_test) print("Number of training examples =", n_train) print("Number of testing examples =", n_test) image_shape = X_train.shape[1:] print("Image data shape =", image_shape) n_classes = np.max(y_train) + 1 print("Number of classes =", n_classes) # data augmentation X_train, y_train = utils.augment_data(X_train, y_train, param) print("Number of augmented training examples =", len(X_train)) print("Number of validation examples =", len(X_valid)) # pre-process X_train = np.array( [utils.pre_process(X_train[i]) for i in range(len(X_train))], dtype=np.float32) X_valid = np.array( [utils.pre_process(X_valid[i]) for i in range(len(X_valid))], dtype=np.float32) X_test = np.array( [utils.pre_process(X_test[i]) for i in range(len(X_test))], dtype=np.float32) return X_train, y_train, X_valid, y_valid, X_test, y_test
def _train_episode(self, episode): """ training phase """ state = self.env.reset() state = pre_process(state).repeat(self.config.in_c, 1, 1) loss, reward_sum, done = 0, 0, False while not done: action = random.randint(0, self.num_actions - 1) if random.random() >= 1 - self.epsilon \ else self._make_action(state, False) next_state, reward, done, _ = self.env.step(action) next_state = pre_process(next_state) next_state = torch.cat([state[:3], next_state], dim=0) reward_sum += self.reward_func(reward) self.step += 1 self.memory.append((state, next_state, torch.LongTensor([action]), torch.FloatTensor([reward]), torch.FloatTensor([done]))) state = next_state if self.step >= self.config.observate_time: loss = self._update_param() self.update_step += 1 if self.update_step % self.config.update_target: self.target.load_state_dict(self.model.state_dict()) if self.step <= self.eps_step: self.epsilon -= (self.init_eps - self.final_eps) / self.eps_step self.reward_list.append(reward_sum) self.reward_mean = reward_sum if self.reward_mean is None else self.reward_mean * 0.99 + reward_sum * 0.01 if self.config.visdom: self.visual.update_vis_line(episode - 1, [self.reward_mean], 'train', 'append') log = {'Episode': episode, 'Reward_cur': reward_sum, 'Reward_mean': self.reward_mean, 'Loss': loss, 'Reward_{}'.format(self.config.display_interval): sum(self.reward_list)} return log
def video_upload(): target = os.path.join(APP_ROOT, 'files/') clean_data(target) print(target) if not os.path.isdir(target): os.mkdir(target) print(request.files.getlist("file")) for upload in request.files.getlist("file"): print(upload) print("{} is the file name".format(upload.filename)) filename = upload.filename print(filename + "ana henaaa") ext = os.path.splitext(filename)[1] if (ext == ".mp4"): print("File supported moving on...") else: return render_template("Error.html", message="""The application supports only mp4 videos, this format is not supported""") destination = "".join([target, filename]) print("Accept incoming file:", filename) print("Save it to:", destination) upload.save(destination) pre_process(target, destination, filename) generate_video(target, filename) return render_template("complete_video.html", value=filename)
def track(self, current_frame): # for idx in range(len(frame_list)): frame_gray = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY) frame_gray = frame_gray.astype(np.float32) # import ipdb;ipdb.set_trace() Hi = self.Ai / self.Bi fi = frame_gray[self.pos[1]:self.pos[1] + self.pos[3], self.pos[0]:self.pos[0] + self.pos[2]] fi = pre_process(fi) Gi = Hi * np.fft.fft2(fi) gi = linear_mapping(np.fft.ifft2(Gi)) # find the max self.pos... max_pos = np.unravel_index(np.argmax(gi, axis=None), gi.shape) # update the position... self.pos[1] += max_pos[0] - gi.shape[0] // 2 self.pos[0] += max_pos[1] - gi.shape[1] // 2 # get the current fi.. fi = frame_gray[self.pos[1]:self.pos[1] + self.pos[3], self.pos[0]:self.pos[0] + self.pos[2]] fi = pre_process(fi) # online update... self.Ai = self.learning_rate * (self.G * np.conjugate( np.fft.fft2(fi))) + (1 - self.learning_rate) * self.Ai self.Bi = self.learning_rate * (np.fft.fft2(fi) * np.conjugate( np.fft.fft2(fi))) + (1 - self.learning_rate) * self.Bi return self.pos
def main(): env = gym.make(args.env_name) env.seed(500) torch.manual_seed(500) img_shape = env.observation_space.shape num_actions = 3 print('image size:', img_shape) print('action size:', num_actions) net = QNet(num_actions) net.load_state_dict(torch.load(args.save_path + 'model.pth')) net.to(device) net.eval() epsilon = 0 for e in range(5): done = False score = 0 state = env.reset() state = pre_process(state) state = torch.Tensor(state).to(device) history = torch.stack((state, state, state, state)) for i in range(3): action = env.action_space.sample() state, reward, done, info = env.step(action) state = pre_process(state) state = torch.Tensor(state).to(device) state = state.unsqueeze(0) history = torch.cat((state, history[:-1]), dim=0) while not done: if args.render: env.render() steps += 1 qvalue = net(history.unsqueeze(0)) action = get_action(0, qvalue, num_actions) next_state, reward, done, info = env.step(action + 1) next_state = pre_process(next_state) next_state = torch.Tensor(next_state).to(device) next_state = next_state.unsqueeze(0) next_history = torch.cat((next_state, history[:-1]), dim=0) score += reward history = next_history print('{} episode | score: {:.2f}'.format(e, score))
def _pre_training(self, init_frame, G): height, width = G.shape fi = cv2.resize(init_frame, (width, height)) # pre-process img.. fi = pre_process(fi) Ai = G * np.conjugate(np.fft.fft2(fi)) Bi = np.fft.fft2(init_frame) * np.conjugate(np.fft.fft2(init_frame)) for _ in range(self.args.num_pretrain): if self.args.rotate: fi = pre_process(random_warp(init_frame)) else: fi = pre_process(init_frame) Ai = Ai + G * np.conjugate(np.fft.fft2(fi)) Bi = Bi + np.fft.fft2(fi) * np.conjugate(np.fft.fft2(fi)) return Ai, Bi
def pre_train(self, training_img): init_frame = cv2.cvtColor(training_img, cv2.COLOR_BGR2GRAY) init_frame = init_frame.astype(np.float32) # Select Object to Track [x, y, width, height] init_gt = cv2.selectROI('initial_img', training_img, False, False) init_gt = np.array(init_gt).astype(np.int64) # Compute Gaussian Response g = np.zeros((init_frame.shape[0:2])).astype(np.float32) g[(init_gt[1] + init_gt[3] // 2), (init_gt[0] + init_gt[2] // 2)] = 1.0 gaussian_response = cv2.GaussianBlur(g, (0, 0), self.sigma) * init_frame # start to create the training set ... # get the goal.. g = gaussian_response[init_gt[1]:init_gt[1] + init_gt[3], init_gt[0]:init_gt[0] + init_gt[2]] init_frame = init_frame[init_gt[1]:init_gt[1] + init_gt[3], init_gt[0]:init_gt[0] + init_gt[2]] self.G = np.fft.fft2(g) # start to do the pre-training... Ai = np.zeros(self.G.shape) Bi = np.zeros(self.G.shape) for _ in range(self.num_pretrain): fi = pre_process(init_frame) Ai = Ai + self.G * np.conjugate(np.fft.fft2(fi)) Bi = Bi + np.fft.fft2(fi) * np.conjugate(np.fft.fft2(fi)) self.Ai = Ai * self.learning_rate self.Bi = Bi * self.learning_rate self.pos = init_gt.copy()
def exp_web_images(): # parameters param = ExperimentParam(n_rows=32, n_cols=32, n_channels=3, n_classes=43) # load data folder = './from-web' X, y = load_web_images(folder, param) X = np.array([utils.pre_process(X[i]) for i in range(len(X))], dtype=np.float32) # load model model_fname = param._model_fname net, sess = load_model(model_fname, param) preds, softmax = sess.run([net._preds, net._softmax], { net._X: X, net._is_training: False }) accuracy = utils.classification_accuracy(y, preds) print('Accuracy on web images: ', accuracy) print('labels: ', y) print('predictions: ', preds) # top softmax topk = sess.run(tf.nn.top_k(tf.constant(softmax), k=3)) print(topk)
def telemetry(sid, data): if data: # The current steering angle of the car steering_angle = data["steering_angle"] # The current throttle of the car throttle = data["throttle"] # The current speed of the car speed = data["speed"] # The current image from the center camera of the car imgString = data["image"] image = Image.open(BytesIO(base64.b64decode(imgString))) image_array = utils.pre_process(image) image_array = utils.to_keras(image_array) steering_angle = float( model.predict(image_array[None, :, :, :], batch_size=1)) throttle = controller.update(float(speed)) print(steering_angle, throttle) send_control(steering_angle, throttle) # save frame if args.image_folder != '': timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3] image_filename = os.path.join(args.image_folder, timestamp) image.save('{}.jpg'.format(image_filename)) else: # NOTE: DON'T EDIT THIS. sio.emit('manual', data={}, skip_sid=True)
def test_pre_process(self): tokens = nltk.word_tokenize(text) res = utils.pre_process(tokens) self.assertNotIn("the", res, msg="stopwords not excluded!!!") self.assertIn("student", res, msg="student in text but not in result!!!")
def main(out_video_path): # init camera print("[INFO] starting video stream...") vs = VideoStream(src=1).start() time.sleep(2.0) # use VideoWriter object to save video fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(out_video_path, fourcc, 20.0, (1920, 1080)) # begin detect while True: frame = vs.read() # get all 6 poly opened = pre_process(frame) poly_6_contours = find_poly(frame, opened, side_num=6) # get all circle circle_contours = find_circle(frame, opened) # draw contours cv2.drawContours(frame, poly_6_contours, -1, (255, 0, 0), 2) cv2.drawContours(frame, circle_contours, -1, (0, 255, 0), 2) # find momentum for cnt in poly_6_contours: center_x, center_y = get_momentum(cnt) cv2.circle(frame, (center_x, center_y), 3, 128, -1) # 绘制中心点 for cnt in circle_contours: center_x, center_y = get_momentum(cnt) cv2.circle(frame, (center_x, center_y), 3, 128, -1) # perspective trans if len(poly_6_contours) >= 4: # enough # find 4 max area poly 6 # perspective trans # find point pass else: # not enough pass # imshow cv2.imshow("frame", frame) out.write(frame) # break key = cv2.waitKey(1) if key == 27: break out.release() cv2.destroyAllWindows() vs.stop()
def forward(self, y, ret_edge=False): yp, params = utils.pre_process(y, self.tkargs[1], self.eval()) z = ST(self.A[0](yp), self.tau[0]) for i in range(1, self.iters): if ((i - 1) % self.edge_freq) == 0: edge = self.topK(z) r = self.B[i](z, edge) - yp z = ST(z - self.A[i](r, edge), self.tau[i]) edge = self.topK(z) xphat = self.D(z, edge) xhat = utils.post_process(xphat, params) if ret_edge: return xhat, edge return xhat
def train(dataset, output): raw_text = open(dataset).read() pre_text = utils.pre_process(raw_text) char_map = utils.map_chars_to_int(pre_text) params = { "seq_length": 80, "n_chars": len(pre_text), "n_vocab": len(char_map) } X = [] y = [] X, y = utils.prepare_dset(pre_text, char_map, params) params["n_patterns"] = len(X) X = np.reshape(X, (params["n_patterns"], params["seq_length"], 1)) y = encode(y) model_params = { "LSTM-1": 512, "LSTM-2": 256, "Dropout-1": 0.3, "Dropout-2": 0.2, "activation": "softmax", "loss": "categorical_crossentropy", "optimizer": "adam", "epochs": 100, "batch_size": 32 } model = create_model(X, y, model_params) filepath = os.path.join( output, "weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5") checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min') callbacks_list = [checkpoint] model.compile(loss=model_params["loss"], optimizer=model_params["optimizer"]) model.fit(X, y, epochs=model_params["epochs"], batch_size=model_params["batch_size"], callbacks=callbacks_list) with open("model-opm.json", "w") as json_file: json_file.write(model.to_json())
def _pre_training(self, init_frame, G): # G 的大小就是选中的目标区域的大小 height, width = G.shape fi = cv2.resize(init_frame, (width, height)) # pre-process img.. fi = pre_process(fi) # np.fft.fft2 表示求 fi 的傅立叶变换 # np.conjugate 表示求矩阵的共轭 # 比如 g = np.matrix('[1+2j, 2+3j; 3-2j, 1-4j]') # g.conjugate 为 matrix([[1-2j, 2-3j],[3+2j, 1+4j]]) Ai = G * np.conjugate(np.fft.fft2(fi)) Bi = np.fft.fft2(init_frame) * np.conjugate(np.fft.fft2(init_frame)) # 对 fi 进行多次刚性形变,增强检测的鲁棒性,计算出 Ai 和 Bi 的初始值 for _ in range(self.args.num_pretrain): if self.args.rotate: fi = pre_process(random_warp(init_frame)) else: fi = pre_process(init_frame) Ai = Ai + G * np.conjugate(np.fft.fft2(fi)) Bi = Bi + np.fft.fft2(fi) * np.conjugate(np.fft.fft2(fi)) return Ai, Bi
def gen_new_train(param): # load data X_train, y_train = utils.load_data('./data/train.p') # data augmentation X_train, y_train = utils.augment_data(X_train, y_train, param) # pre-process X_train = np.array( [utils.pre_process(X_train[i]) for i in range(len(X_train))], dtype=np.float32) # one hot oh_y_train = utils.one_hot_encode(y_train) return X_train, y_train, oh_y_train
def exp_test_data(): # parameters param = ExperimentParam(n_rows=32, n_cols=32, n_channels=3, n_classes=43) # load data X_test, y_test = utils.load_data('./data/test.p') X_test = np.array( [utils.pre_process(X_test[i]) for i in range(len(X_test))], dtype=np.float32) # load model model_fname = param._model_fname #preds = sess.run(net._preds, {net._X:X_test, net._is_training:False}) accuracy, _ = test_model(model_fname, param, X_test, y_test) print('Test accuracy: ', accuracy)
def test(valid_queue, model, num_samples, args, logging): if args.distributed: dist.barrier() nelbo_avg = utils.AvgrageMeter() neg_log_p_avg = utils.AvgrageMeter() model.eval() for step, x in enumerate(valid_queue): x = x[0] if len(x) > 1 else x x = x.float().cuda() # change bit length x = utils.pre_process(x, args.num_x_bits) with torch.no_grad(): nelbo, log_iw = [], [] for k in range(num_samples): logits, log_q, log_p, kl_all, _ = model(x) output = model.decoder_output(logits) recon_loss = utils.reconstruction_loss(output, x, crop=model.crop_output) balanced_kl, _, _ = utils.kl_balancer(kl_all, kl_balance=False) nelbo_batch = recon_loss + balanced_kl nelbo.append(nelbo_batch) log_iw.append( utils.log_iw(output, x, log_q, log_p, crop=model.crop_output)) nelbo = torch.mean(torch.stack(nelbo, dim=1)) log_p = torch.mean( torch.logsumexp(torch.stack(log_iw, dim=1), dim=1) - np.log(num_samples)) nelbo_avg.update(nelbo.data, x.size(0)) neg_log_p_avg.update(-log_p.data, x.size(0)) utils.average_tensor(nelbo_avg.avg, args.distributed) utils.average_tensor(neg_log_p_avg.avg, args.distributed) if args.distributed: # block to sync dist.barrier() logging.info('val, step: %d, NELBO: %f, neg Log p %f', step, nelbo_avg.avg, neg_log_p_avg.avg) return neg_log_p_avg.avg, nelbo_avg.avg
def test_document_features(self): tokens = nltk.word_tokenize(text) all_processed_words = utils.pre_process(tokens) all_word_freq = nltk.FreqDist(all_processed_words) word_features = list(all_word_freq) document_features = utils.wrap_document_features(word_features) res = document_features(text) for name, val in res.iteritems(): if val: s = name.replace('contains(', '').replace(')', '') self.assertIn(s, all_processed_words, msg="Does not contain the word feature:'{word}' from text given".format(word=s))
def run(self): super(EnvWorker, self).run() episode = 0 steps = 0 score = 0 life = 5 dead = False while True: if self.render: self.env.render() action = self.child_conn.recv() next_state, reward, done, info = self.env.step(action + 1) if life > info['ale.lives']: dead = True life = info['ale.lives'] next_state = pre_process(next_state) self.history = np.moveaxis(next_state, -1, 0) steps += 1 score += reward self.child_conn.send([deepcopy(self.history), reward, dead, done]) if done and dead: # print('{} episode | score: {:2f} | steps: {}'.format( # episode, score, steps # )) episode += 1 steps = 0 score = 0 dead = False life = 5 self.init_state() if dead: dead = False self.init_state()
def data_generator(df, batch_size=128): """ yields a pair (X, Y) where X and Y are both numpy arrays of length `batch_size` """ n_rows = df.shape[0] while True: # Shuffle the data frame rows after every complete cycle through the data df = df.sample(frac=1).reset_index(drop=True) for index in range(0, n_rows, batch_size): df_batch = df[index:index + batch_size] # Ignoring the last batch which is smaller than the requested batch size if (df_batch.shape[0] == batch_size): X_batch = np.array([ pre_process(get_image(row)) for i, row in df_batch.iterrows() ]) y_batch = np.array( [row['angle'] for i, row in df_batch.iterrows()]) yield X_batch, y_batch
def show_bad_cases(test_fname, param): # load test data X_test, y_test = utils.load_data(test_fname) X_test_normed = np.array( [utils.pre_process(X_test[i]) for i in range(len(X_test))], dtype=np.float32) n_data, n_rows, n_cols, n_channels = X_test.shape param._n_rows = n_rows param._n_cols = n_cols param._n_channels = n_channels # load model n_classes = int(np.max(y_test) + 1) tf.reset_default_graph() net = network.TrafficSignNet(n_classes, param) sess = tf.Session() sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, param._model_fname) # test preds_test = sess.run(net._preds, { net._X: X_test_normed, net._is_training: False }) test_accuracy = utils.classification_accuracy(y_test, preds_test) print('test accuracy: ', test_accuracy) sess.close() X_test_normed = None # show test images that are not correctly classified err_indices = np.where(preds_test != y_test)[0] utils.show_images(X_test, y_test, err_indices, n_cols=5, num_images=200, preds=preds_test)
def generate(dataset, weights, json_path): raw_text = open(dataset).read() pre_text = utils.pre_process(raw_text) char_map = utils.map_chars_to_int(pre_text) int_map = utils.map_int_to_char(pre_text) params = { "seq_length":80, "n_chars" : len(pre_text), "n_vocab" : len(char_map) } testX, _ = utils.prepare_dset(pre_text, char_map, params) params["n_patterns"] = len(testX) with open(json_path, 'r') as json_file: json_model = json_file.read() model = model_from_json(json_model) model.load_weights(weights) model.compile(loss="categorical_crossentropy", optimizer="adam") start = np.random.randint(0 , len(testX) - 1) sentence = testX[start] output = [] for i in range(1500): x = np.reshape(sentence, (1, len(sentence), 1)) x = x / float(params["n_vocab"]) prediction = model.predict(x, verbose=0) index = np.argmax(prediction) result = int_map[index] output.append(result) sentence.append(index) sentence = sentence[1:len(sentence)] print("".join(output))
def test_pre_process(self): # 3 1d_bins: (0, 85, 170, 255) n_1d_bins = 3 resolution = 2 image = np.array( [[[50, 50, 50], [0, 25, 100]], [[0, 50, 100], [100, 150, 200]]], dtype=np.uint8 ) # note the (width, height, 1) size instead of (width, height) expected_luminance = np.array([[[-75], [-94]], [[-75], [26]]], dtype=int) expected_ab_bins = np.array([[[4], [3]], [[4], [4]]], dtype=int) luminance, ab_bins = pre_process(image, resolution, n_1d_bins) np.testing.assert_equal(luminance, expected_luminance) np.testing.assert_equal(ab_bins, expected_ab_bins) self.assertEqual(luminance.dtype, expected_luminance.dtype) self.assertEqual(ab_bins.dtype, expected_ab_bins.dtype)
def forward(self, x, ret_edge=False): if ret_edge: edge_list = [] x, params = utils.pre_process(x, self.tkargs[1], self.eval()) z = torch.cat([self.PPCONV[i](self.INCONV[i](x)) for i in range(3)], dim=1) hiz = self.HPF(z) for i in range(self.iters): print(f"i = {i}") z0 = (1 - self.alpha[i]) * z + self.beta[i] * hiz z = self.LPF[i](z0, ret_edge=ret_edge) if ret_edge: z, edge = z edge_list.append(edge) z = z0 + z z = (1 - self.alpha[-1]) * z + self.beta[-1] * hiz edge = self.topK(z) z = self.GCout(z, edge) x = utils.post_process(x + z, params) if ret_edge: edge_list.append(edge) return x, edge_list return x
def main(img_path, side_num): img = read_img(img_path) opened = pre_process(img) # find poly or circle if side_num == 0: approx_contours = find_circle(img, opened) else: approx_contours = find_poly(img, opened, side_num=side_num) # temp # find momentums momentum_img = get_blank_img(img.shape) cv2.drawContours(momentum_img, approx_contours, -1, (0, 0, 0), 2) for contour in approx_contours: center_x, center_y = get_momentum(contour) print("x,y: {},{}".format(center_x, center_y)) cv2.circle(momentum_img, (center_x, center_y), 7, 128, -1) # 绘制中心点 # show result show_img(momentum_img, 'momentums') cv2.imwrite("./image/momentum.png", momentum_img) cv2.waitKey(0) cv2.destroyAllWindows()
def generator(X_train, y_train, batch_size, training=False): ''' Data generator used for training the model. - Reades images from path - Preprocess images - Augments training data only. ''' number_samples = len(y_train) while 1: shuffle(X_train, y_train) for offset in range(0, number_samples, batch_size): X, y = X_train[offset:offset + batch_size], y_train[offset:offset + batch_size] X = [utils.open_image(x) for x in X] X = [utils.pre_process(x) for x in X] if training: for i, (image, label) in enumerate(zip(X, y)): X[i] = utils.augment(image) X = np.array([keras_image.img_to_array(x) for x in X]) yield shuffle(X, y)
def main(): # get playlist id ids = utils.get_user_playlists(sp, login).id # get dataframe with all tracks from playlist playlist_tracks_df = pd.DataFrame() for i in ids: temp = utils.get_playlist_tracks(sp, i) playlist_tracks_df = pd.concat([playlist_tracks_df, temp]) playlist_tracks_df = playlist_tracks_df.reset_index(drop=True) # k-means clustering flow preprocessor = utils.pre_process() playlist_df_clusters = utils.train_model(preprocessor, playlist_tracks_df, 7) clusters = utils.cluster_averages(playlist_df_clusters).sort_values( 'danceability') clusters['duration_seconds'] = clusters.duration_ms / 1000 / 60 print(clusters) return
# -*- coding: utf -8 -*- import tensorflow as tf from word2vec import comment_embedding, WordModel import gensim.models.keyedvectors as word2vec import numpy as np from utils import pre_process from underthesea import word_tokenize text = ["San pham dep, minh rat thich chat vai nay!!!! 👍👍👍", "Làm ăn như ccccc 😡😡😡", "<3", "💥💥💥Giao hàng nhanh.👏 💥💥💥", "🔥 <3", "Shop chán vkl", "Nchung là ổn, giao hàng nhanh kb cho ⭐⭐⭐⭐⭐"] text = pre_process(text) print(text) for i in range(len(text)): text[i] = word_tokenize(text[i]) print(text)
def start_tracking(self): time_list = [] # get the image of the first frame... (read as gray scale image...) init_img = cv2.imread(self.frame_lists[0]) init_frame = cv2.cvtColor(init_img, cv2.COLOR_BGR2GRAY) init_frame = init_frame.astype(np.float32) # get the init ground truth.. [x, y, width, height] init_gt = cv2.selectROI('demo', init_img, False, False) init_gt = np.array(init_gt).astype(np.int64) # start to draw the gaussian response... response_map = self._get_gauss_response(init_frame, init_gt) # start to create the training set ... # get the goal.. g = response_map[init_gt[1]:init_gt[1]+init_gt[3], init_gt[0]:init_gt[0]+init_gt[2]] fi = init_frame[init_gt[1]:init_gt[1]+init_gt[3], init_gt[0]:init_gt[0]+init_gt[2]] G = np.fft.fft2(g) # start to do the pre-training... Ai, Bi = self._pre_training(fi, G) # start the tracking... i=0 for idx in range(len(self.frame_lists)): start = time.time() current_frame = cv2.imread(self.frame_lists[idx]) frame_gray = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY) frame_gray = frame_gray.astype(np.float32) if idx == 0: Ai = self.args.lr * Ai Bi = self.args.lr * Bi pos = init_gt.copy() clip_pos = np.array([pos[0], pos[1], pos[0]+pos[2], pos[1]+pos[3]]).astype(np.int64) else: Hi = Ai / Bi fi = frame_gray[clip_pos[1]:clip_pos[3], clip_pos[0]:clip_pos[2]] fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3]))) Gi = Hi * np.fft.fft2(fi) gi = linear_mapping(np.fft.ifft2(Gi)) # find the max pos... max_value = np.max(gi) max_pos = np.where(gi == max_value) dy = int(np.mean(max_pos[0]) - gi.shape[0] / 2) dx = int(np.mean(max_pos[1]) - gi.shape[1] / 2) # update the position... pos[0] = pos[0] + dx pos[1] = pos[1] + dy # trying to get the clipped position [xmin, ymin, xmax, ymax] clip_pos[0] = np.clip(pos[0], 0, current_frame.shape[1]) clip_pos[1] = np.clip(pos[1], 0, current_frame.shape[0]) clip_pos[2] = np.clip(pos[0]+pos[2], 0, current_frame.shape[1]) clip_pos[3] = np.clip(pos[1]+pos[3], 0, current_frame.shape[0]) clip_pos = clip_pos.astype(np.int64) # get the current fi.. fi = frame_gray[clip_pos[1]:clip_pos[3], clip_pos[0]:clip_pos[2]] fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3]))) # online update... Ai = self.args.lr * (G * np.conjugate(np.fft.fft2(fi))) + (1 - self.args.lr) * Ai Bi = self.args.lr * (np.fft.fft2(fi) * np.conjugate(np.fft.fft2(fi))) + (1 - self.args.lr) * Bi # visualize the tracking process... cv2.rectangle(current_frame, (pos[0], pos[1]), (pos[0]+pos[2], pos[1]+pos[3]), (255, 0, 0), 2) #out.write(current_frame) cv2.imshow('demo11', current_frame) cv2.imwrite('goog/'+str(i)+'.jpg', current_frame) i += 1 cv2.waitKey(10) # if record... save the frames.. # if self.args.record: # frame_path = 'record_frames/' + self.img_path.split('/')[1] + '/' # if not os.path.exists(frame_path): # os.mkdir(frame_path) # cv2.imwrite(frame_path + str(idx).zfill(5) + '.png', current_frame) # #out.write(current_frame) end = time.time() time_list.append(end-start) out.release() print('视频写入成功!') return time_list
selected_VGG_layer_weights, selected_VGG_layers) encoded = keras.models.Model( inputs=inputs, outputs=model.get_layer('encoder').get_layer('encoded').output) return model, encoder, decoder, lossModel, encoded if __name__ == '__main__': nc = Dataset('../Data/tohoku_2020.nc', 'r') Z = np.flip(np.array(nc.variables['elevation']), axis=0) x = 96 y = 96 X_train, X_val, X_test = pre_process(Z, x, y) #build model inputs = Input(shape=(y, x, 1), name='encoder_inputs') # type: #'AE'=autoencoder #'VAE'=variational autoencoder #'DFC_VAE'=deep feature consistent variational autoencoder model, encoder, decoder, lossModel, encoded = build_model(type='DFC_VAE') #hyper parameters epochs = 50 batch_size = 128 learning_rate = 1e-6
def train(train_queue, model, cnn_optimizer, grad_scalar, global_step, warmup_iters, writer, logging): alpha_i = utils.kl_balancer_coeff(num_scales=model.num_latent_scales, groups_per_scale=model.groups_per_scale, fun='square') nelbo = utils.AvgrageMeter() model.train() for step, x in enumerate(train_queue): x = x[0] if len(x) > 1 else x x = x.half().cuda() # change bit length x = utils.pre_process(x, args.num_x_bits) # warm-up lr if global_step < warmup_iters: lr = args.learning_rate * float(global_step) / warmup_iters for param_group in cnn_optimizer.param_groups: param_group['lr'] = lr # sync parameters, it may not be necessary if step % 100 == 0: utils.average_params(model.parameters(), args.distributed) cnn_optimizer.zero_grad() with autocast(): logits, log_q, log_p, kl_all, kl_diag = model(x) output = model.decoder_output(logits) kl_coeff = utils.kl_coeff( global_step, args.kl_anneal_portion * args.num_total_iter, args.kl_const_portion * args.num_total_iter, args.kl_const_coeff) recon_loss = utils.reconstruction_loss(output, x, crop=model.crop_output) balanced_kl, kl_coeffs, kl_vals = utils.kl_balancer( kl_all, kl_coeff, kl_balance=True, alpha_i=alpha_i) nelbo_batch = recon_loss + balanced_kl loss = torch.mean(nelbo_batch) norm_loss = model.spectral_norm_parallel() bn_loss = model.batchnorm_loss() # get spectral regularization coefficient (lambda) if args.weight_decay_norm_anneal: assert args.weight_decay_norm_init > 0 and args.weight_decay_norm > 0, 'init and final wdn should be positive.' wdn_coeff = (1. - kl_coeff) * np.log( args.weight_decay_norm_init) + kl_coeff * np.log( args.weight_decay_norm) wdn_coeff = np.exp(wdn_coeff) else: wdn_coeff = args.weight_decay_norm loss += norm_loss * wdn_coeff + bn_loss * wdn_coeff grad_scalar.scale(loss).backward() utils.average_gradients(model.parameters(), args.distributed) grad_scalar.step(cnn_optimizer) grad_scalar.update() nelbo.update(loss.data, 1) if (global_step + 1) % 100 == 0: if (global_step + 1) % 1000 == 0: # reduced frequency n = int(np.floor(np.sqrt(x.size(0)))) x_img = x[:n * n] output_img = output.mean if isinstance( output, torch.distributions.bernoulli.Bernoulli ) else output.sample() output_img = output_img[:n * n] x_tiled = utils.tile_image(x_img, n) output_tiled = utils.tile_image(output_img, n) in_out_tiled = torch.cat((x_tiled, output_tiled), dim=2) writer.add_image('reconstruction', in_out_tiled, global_step) # norm writer.add_scalar('train/norm_loss', norm_loss, global_step) writer.add_scalar('train/bn_loss', bn_loss, global_step) writer.add_scalar('train/norm_coeff', wdn_coeff, global_step) utils.average_tensor(nelbo.avg, args.distributed) logging.info('train %d %f', global_step, nelbo.avg) writer.add_scalar('train/nelbo_avg', nelbo.avg, global_step) writer.add_scalar( 'train/lr', cnn_optimizer.state_dict()['param_groups'][0]['lr'], global_step) writer.add_scalar('train/nelbo_iter', loss, global_step) writer.add_scalar('train/kl_iter', torch.mean(sum(kl_all)), global_step) writer.add_scalar( 'train/recon_iter', torch.mean( utils.reconstruction_loss(output, x, crop=model.crop_output)), global_step) writer.add_scalar('kl_coeff/coeff', kl_coeff, global_step) total_active = 0 for i, kl_diag_i in enumerate(kl_diag): utils.average_tensor(kl_diag_i, args.distributed) num_active = torch.sum(kl_diag_i > 0.1).detach() total_active += num_active # kl_ceoff writer.add_scalar('kl/active_%d' % i, num_active, global_step) writer.add_scalar('kl_coeff/layer_%d' % i, kl_coeffs[i], global_step) writer.add_scalar('kl_vals/layer_%d' % i, kl_vals[i], global_step) writer.add_scalar('kl/total_active', total_active, global_step) global_step += 1 utils.average_tensor(nelbo.avg, args.distributed) return nelbo.avg, global_step
shape=[ None, ], name='image_lables_tensor') images_path_tensor_val = tf.placeholder(tf.string, shape=[ None, ], name='images_path_tensor_val') # # A hack to add validation accuracy in tensorboard val_accuracy = tf.placeholder(tf.double, shape=[], name='val_accuracy') # Training print('[INFO]: getting training model') net = Network(FLAGS) images_tensor = pre_process(images_path_tensor, FLAGS) _print_shape = tf.Print(images_tensor, [tf.shape(images_tensor)], message="[INFO] current train batch shape: ", first_n=1) with tf.control_dependencies([_print_shape]): train = net(images_tensor, images_label_tensor) # Validation val_image_tensor = pre_process(images_path_tensor_val, FLAGS, mode='val') _print_val_shape = tf.Print(val_image_tensor, [tf.shape(val_image_tensor)], message="[INFO] current val batch shape: ", first_n=1) with tf.control_dependencies([_print_val_shape]): val_forward_pass = net.forward_pass(val_image_tensor) # Add summaries
def start_tracking(self): # get the image of the first frame... (read as gray scale image...) init_img = cv2.imread(self.frame_lists[0]) init_frame = cv2.cvtColor(init_img, cv2.COLOR_BGR2GRAY) init_frame = init_frame.astype(np.float32) # get the init ground truth.. [x, y, width, height] init_gt = cv2.selectROI('demo', init_img, False, False) init_gt = np.array(init_gt).astype(np.int64) # start to draw the gaussian response... response_map = self._get_gauss_response(init_frame, init_gt) # start to create the training set ... # get the goal.. g = response_map[init_gt[1]:init_gt[1]+init_gt[3], init_gt[0]:init_gt[0]+init_gt[2]] fi = init_frame[init_gt[1]:init_gt[1]+init_gt[3], init_gt[0]:init_gt[0]+init_gt[2]] G = np.fft.fft2(g) # start to do the pre-training... Ai, Bi = self._pre_training(fi, G) # start the tracking... for idx in range(len(self.frame_lists)): current_frame = cv2.imread(self.frame_lists[idx]) frame_gray = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY) frame_gray = frame_gray.astype(np.float32) if idx == 0: Ai = self.args.lr * Ai Bi = self.args.lr * Bi pos = init_gt.copy() clip_pos = np.array([pos[0], pos[1], pos[0]+pos[2], pos[1]+pos[3]]).astype(np.int64) else: Hi = Ai / Bi fi = frame_gray[clip_pos[1]:clip_pos[3], clip_pos[0]:clip_pos[2]] fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3]))) Gi = Hi * np.fft.fft2(fi) gi = linear_mapping(np.fft.ifft2(Gi)) # find the max pos... max_value = np.max(gi) max_pos = np.where(gi == max_value) dy = int(np.mean(max_pos[0]) - gi.shape[0] / 2) dx = int(np.mean(max_pos[1]) - gi.shape[1] / 2) # update the position... pos[0] = pos[0] + dx pos[1] = pos[1] + dy # trying to get the clipped position [xmin, ymin, xmax, ymax] clip_pos[0] = np.clip(pos[0], 0, current_frame.shape[1]) clip_pos[1] = np.clip(pos[1], 0, current_frame.shape[0]) clip_pos[2] = np.clip(pos[0]+pos[2], 0, current_frame.shape[1]) clip_pos[3] = np.clip(pos[1]+pos[3], 0, current_frame.shape[0]) clip_pos = clip_pos.astype(np.int64) # get the current fi.. fi = frame_gray[clip_pos[1]:clip_pos[3], clip_pos[0]:clip_pos[2]] fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3]))) # online update... Ai = self.args.lr * (G * np.conjugate(np.fft.fft2(fi))) + (1 - self.args.lr) * Ai Bi = self.args.lr * (np.fft.fft2(fi) * np.conjugate(np.fft.fft2(fi))) + (1 - self.args.lr) * Bi # visualize the tracking process... cv2.rectangle(current_frame, (pos[0], pos[1]), (pos[0]+pos[2], pos[1]+pos[3]), (255, 0, 0), 2) cv2.imshow('demo', current_frame) cv2.waitKey(100) # if record... save the frames.. if self.args.record: frame_path = 'record_frames/' + self.img_path.split('/')[1] + '/' if not os.path.exists(frame_path): os.mkdir(frame_path) cv2.imwrite(frame_path + str(idx).zfill(5) + '.png', current_frame)