def validate(val_loader, model, criterion, epoch): model.eval() losses = AverageMeter() for i, (input_gray, input_ab, target) in enumerate(val_loader): if use_gpu: input_gray = input_gray.cuda() input_ab = input_ab.cuda() target = target.cuda() output_ab = model(input_gray) loss = criterion(output_ab, input_ab) losses.update(loss.item(), input_gray.size(0)) for j in range(len(output_ab)): save_path = { 'grayscale': 'test_imgs/gray/', 'colorized': 'test_imgs/color/' } save_name = f'img-{i * val_loader.batch_size + j}.jpg' to_rgb(input_gray[j].cpu(), ab_input=output_ab[j].detach().cpu(), save_path=save_path, save_name=save_name) if i % 25 == 0: print( f'batch: {i}/{len(val_loader)}, loss value: {losses.value}, loss average: {losses.average}' ) print(f'Finished validation after epoch: {epoch}.') return losses.average
def evaluate(gray, ab_input, bins, model, colors_path, temperature): model.eval() with torch.no_grad(): bins = bins.squeeze(0) # Use GPU # if use_gpu: gray, ab_input, bins = gray.cuda(), ab_input.cuda(), bins.cuda() # Run model and record loss # add batch size 1 for single image output_bins = model(gray.unsqueeze(1)) # print(annealed_mean(output_bins.squeeze(0).numpy(), 0.36)) # print(output_bins.squeeze(0).shape) # remove batch size and get the max index of the predicted bin for each pixel color_image = to_rgb( gray, torch.from_numpy( deserialize_bins( output_bins.squeeze(0).argmax(0), temperature, colors_path['mode'], colors_path['mean'], ), ).float()) return color_image
def cut_foreground(image, mask): # Cut the foreground from the image using the mask supplied if len(image.shape) == 2 or image.shape[2] == 1: return image * mask elif len(image.shape) == 3 and image.shape[2] == 3: return image * utils.to_rgb(mask) else: raise IndexError("image has the wrong number of channels (must have 1 or 3 channels")
def combine_to_image(self, images: np.array, labels: np.array, predictions: np.array) -> np.array: """ Concatenates the three tensors to one RGB image :param images: images tensor, shape [None, nx, ny, channels] :param labels: labels tensor, shape [None, nx, ny, 1] for sparse or [None, nx, ny, classes] for one-hot :param predictions: labels tensor, shape [None, nx, ny, classes] :return: image tensor, shape [None, nx, 3 x ny, 3] """ if predictions.shape[-1] == 2: mask = predictions[..., :1] else: mask = np.argmax(predictions, axis=-1)[..., np.newaxis] output = np.concatenate((utils.to_rgb(images), utils.to_rgb(labels[..., :1]), utils.to_rgb(mask)), axis=2) return output
def preview(self, select=True): target = None if select: target = self.list_ckpt() self.load(self.cfg.checkpoint_dir, target) _z = np.random.uniform(-1, 1, [self.cfg.batch_size, self.cfg.z_dim]).astype(np.float32) _z = np.random.uniform(-1, 1, [63, self.cfg.z_dim]).astype(np.float32) z = tf.placeholder(tf.float32, [None, self.cfg.z_dim]) ret = self.sampler(z) imgs = self.sess.run(ret, feed_dict={z: _z}) img = merge(imgs, get_layout(_z.shape[0])) img = to_rgb(img) plt.figure(figsize=(16,9)) plt.imshow(img) plt.show()
def cut_foreground(image, mask): """ Cut the foreground from the image using the mask supplied :param image: image from which cut foreground :param mask: mask of the foreground :return: image with only the foreground :raise: *IndexError* error if the size of the image is wrong """ if len(image.shape) == 2 or image.shape[2] == 1: # we have a greyscale image return image * mask elif len(image.shape) == 3 and image.shape[2] == 3: return image * utils.to_rgb(mask) else: raise IndexError( "image has the wrong number of channels (must have 1 or 3 channels" )
def get_hog_desc(image_paths, do_prewhiten=False): nrof_samples = len(image_paths) #images = np.zeros((nrof_samples, crop_size, crop_size, 3), dtype = np.float32) hog_descriptors = np.zeros((nrof_samples, 2025), dtype = np.float32) hog = get_hog() for i in range(nrof_samples): img = cv2.imread(image_paths[i], cv2.IMREAD_COLOR) if img.ndim == 2: img = utils.to_rgb(img) if i % 10 == 0: #cv2.imshow('img', img) #cv2.waitKey(1) print('image # %d'%(i)) #if do_prewhiten: #img = utils.prewhiten(img) hog_descriptors[i,:] = np.squeeze(hog.compute(img)) hog_descriptors = np.squeeze(hog_descriptors) print(str(nrof_samples) + ' images loaded successfully') cv2.destroyAllWindows() return hog_descriptors
def main(self): dataset = [] dataset.extend(os.listdir(self.testImagePath)) for i, imgName in enumerate(dataset): try: self.imgName = imgName img = cv2.imread(self.testImagePath + '/' + imgName) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if gray.ndim == 2: img = to_rgb(gray) a = datetime.now() PLst, PLst2 = self.pnetDetector(img) b = datetime.now() if len(PLst) == 0: print("PNet Not Found !!!") continue RLst, RLst2 = self.rnetDetector(img, PLst, PLst2) c = datetime.now() if len(RLst) == 0: print("RNet Not Found !!!") continue OLst = self.onetDetector(img, RLst, RLst2) d = datetime.now() if len(OLst) == 0: print("ONet Not Found !!!") continue pt = (b - a).microseconds // 1000 rt = (c - b).microseconds // 1000 ot = (d - c).microseconds // 1000 print("pnet耗时:{0},rnet耗时:{1},onet耗时:{2},imgName:{3}:".format( pt, rt, ot, self.imgName)) self.screenImgTest(OLst, self.imgName, 'OLst') except Exception as e: print("************************", str(e))
def do_face_reginition_process_from_input(self, filepath, img=None): with self.sess.as_default(): with self.sess.graph.as_default(): frame = None # 处理传入的图片 if (img is None): frame = misc.imread(filepath) else: frame = img res = [] gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) ori_img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) if gray.ndim == 2: gray = to_rgb(gray) img = gray[:, :, 0:3] bboxs = self.detectFaceBoundingBox_mtcnn(img) if (bboxs != None): #im = self.fpe.doFaceEstimater(frame) # 人脸处理开始 for i, bb in enumerate(bboxs): x = bb[0] y = bb[1] x1 = bb[2] y1 = bb[3] cv2.rectangle(frame, (x, y), (x1, y1), (0, 255, 0), 2) reginized_name = None emotion_detect = None cropped = frame[bb[1]:bb[3], bb[0]:bb[2], :] if (self.emotion_reginition_mode): # print('index ================', index) t = ori_img[bb[1]:bb[3], bb[0]:bb[2]] emotion_detect = self.do_emotion_reginition_process( t) # 图像处理 img_list = [] aligned = misc.imresize( cropped, (self.image_size, self.image_size), interp='bilinear') prewhitened = facenet.prewhiten(aligned) img_list.append(prewhitened) images = np.stack(img_list) # Run forward pass to calculate embeddings feed_dict = { self.images_placeholder: images, self.phase_train_placeholder: False } emb = self.sess.run(self.embeddings, feed_dict=feed_dict) # 获取当前检测出的人脸编码值 current_emb = emb[0, :] # person_name = knn.predict([current_emb])[0] # print("***********************knn classifier person name result:", person_name) # 识别过程 distance_list = [] candi_len = len(self.face_encode_list) for id in range(candi_len): dist = np.sqrt( np.sum( np.square( np.subtract( current_emb, self.face_encode_list[id])))) distance_list.append(dist) print("distance list", "=" * 30, distance_list) X = softmax_label([distance_list]) max_index = np.argmax(X, axis=1)[0] prob = (str)(round(100 * (X[0][max_index]), 2)) min_distance = distance_list[max_index] if (min_distance < self.min_face_distance): reginized_name = os.path.splitext( self.fname_list[max_index])[0] reginized_name = reginized_name + "(" + prob + "%)" else: reginized_name = 'unknown' print('reginized person is:', reginized_name) recognition_title = reginized_name if (emotion_detect is not None): recognition_title = reginized_name + "(" + emotion_detect + ")" obj = DetectObj(i, bb, recognition_title) res.append(obj) return res
im) rango_busqueda = [float(i) / len(accum_i) for i in range(len(accum_i))] def w_dot(r): # La intensidad buscada es la de la acumulada. si = accum_s[side_by_side.search_not_exact(r[1], rango_busqueda)] return [r[0], si, r[2]] ret = im.copy() for i in range(ret.shape[0]): for j in range(ret.shape[1]): ret[i][j] = w_dot(ret[i][j]) return ret def transformacion_puntual(im, f): ret = im.copy() for i in range(ret.shape[0]): for j in range(ret.shape[1]): ret[i][j] = f(ret[i][j]) return ret im1 = np.asarray(Image.open(argv[1]).convert('RGB')) im2 = utils.to_hsi(im1) im3 = uniform_hist(im2) # im3 = transformacion_puntual(im2, umbral05) im4 = utils.to_rgb(im3) side_by_side.sbys_histogram([im1, im2, im3, im4], ['rgb', 'hsi', 'hsi', 'rgb'], argv=argv[2] if len(argv) > 2 else None)
if SAVED_MODEL_PATH is not None: model.load_state_dict(torch.load(SAVED_MODEL_PATH)) print(SAVED_MODEL_PATH) print('Model loaded') # Count numbers of parameters pytorch_total_params = sum(p.numel() for p in model.parameters()) print(pytorch_total_params) image = Image.open(IMAGE_PATH) image = TF.resize(image, 128) image = TF.center_crop(image, 128) gray, image_ab, bins = load_img(image, N_BINS) output_image = evaluate( gray, image_ab, bins, model, { 'mode': 'cached_colors/second-dataset/mode_color_bins_' + str(N_BINS) + '.npy', 'mean': 'cached_colors/second-dataset/mean_color_bins_' + str(N_BINS) + '.npy' }, temperature) f, axarr = plt.subplots(len(gray), 2) axarr[0].set_title('Ground truth') axarr[0].imshow(to_rgb(gray, image_ab)) axarr[1].set_title('Generated') axarr[1].imshow(output_image) plt.pause(5)
def do_face_reginition_process(self): with tf.Graph().as_default(): with tf.Session() as sess: # Load the model facenet.load_model(self.model) # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") # 获取摄像头采集 cap = self.get_capture_cv2() pnet, rnet, onet = self.load_pronet() font = cv2.FONT_HERSHEY_SIMPLEX font_size = 1 index = 0 image_save = False fname_list, face_encode_list = self.query_face_feature_from_db() knn = KNeighborsClassifier(n_neighbors=1) knn.fit(face_encode_list, fname_list) while (True): """ process input key """ k = cv2.waitKey(1) if k == ord('q'): # 按q键退出 break if k == ord('s'): # 按s键保存 image_save = True index += 1 ret, frame = cap.read() print("frame----=", pickle.dumps(frame)) # our operation on the frame come here gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) ori_img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) if gray.ndim == 2: gray = to_rgb(gray) img = gray[:, :, 0:3] # capture frame-by-frame if (self.face_reginition_mode == 1): bboxs = self.detectFaceBoundingBox_mtcnn(img, self.minsize, pnet, rnet, onet, self.threshold, self.factor, self.margin, self.detect_multiple_faces) if (bboxs != None): im = self.fpe.doFaceEstimater(frame) max_width = self.get_max_width_from_bounding_boxes(bboxs) # 人脸处理开始 for i, bb in enumerate(bboxs): x = bb[0] y = bb[1] x1 = bb[2] y1 = bb[3] # 绘制人名的坐标点 text_x = int((x + x1) / 2) text_font_size = font_size # float(((abs(x - x1)) / max_width) * font_size) cv2.rectangle(frame, (x, y), (x1, y1), (0, 255, 0), 2) # cv2.putText(frame, 'hhy', (text_x, y), font, text_font_size, (0, 0, 255), 2) reginized_name = None emotion_detect = None cropped = frame[bb[1]:bb[3], bb[0]:bb[2], :] if (image_save): cv2.imwrite('person_pic/hhy' + str(index) + '.jpg', cropped) image_save = False else: if (self.emotion_reginition_mode): # print('index ================', index) t = ori_img[bb[1]:bb[3], bb[0]:bb[2]] emotion_detect = self.do_emotion_reginition_process(t) # 图像处理 img_list = [] aligned = misc.imresize(cropped, (self.image_size, self.image_size), interp='bilinear') prewhitened = facenet.prewhiten(aligned) img_list.append(prewhitened) images = np.stack(img_list) # Run forward pass to calculate embeddings feed_dict = {images_placeholder: images, phase_train_placeholder: False} emb = sess.run(embeddings, feed_dict=feed_dict) # 获取当前检测出的人脸编码值 current_emb = emb[0, :] # person_name = knn.predict([current_emb])[0] # print("***********************knn classifier person name result:", person_name) # 识别过程 distance_list = [] candi_len = len(face_encode_list) for id in range(candi_len): dist = np.sqrt( np.sum(np.square(np.subtract(current_emb, face_encode_list[id])))) distance_list.append(dist) print("distance list", "=" * 30, distance_list) X = softmax_label([distance_list]) max_index = np.argmax(X, axis=1)[0] prob = (str)(round(100 * (X[0][max_index]), 2)) min_distance = distance_list[max_index] if (min_distance < self.min_face_distance): reginized_name = os.path.splitext(fname_list[max_index])[0] reginized_name = reginized_name + "(" + prob + "%)" else: reginized_name = 'unknown' print('reginized person is:', reginized_name) recognition_title = reginized_name if (emotion_detect is not None): recognition_title = reginized_name + "(" + emotion_detect + ")" # 展示人脸名称 #speaker.Speak(recognition_title) cv2.putText(frame, recognition_title, (text_x, y), font, text_font_size, (0, 0, 255), 2) else: pass else: pass # display the resulting frame cv2.imshow('frame', frame) # when everything done , release the capture cap.release() cv2.destroyAllWindows()
def main(_): if FLAGS.config.precrop_iters > 0 and FLAGS.config.batching: raise ValueError( "'precrop_iters has no effect when 'batching' the dataset") assert FLAGS.config.down_factor > 0 and FLAGS.config.render_factor > 0 logging.info("JAX host: %d / %d", jax.process_index(), jax.host_count()) logging.info("JAX local devices: %r", jax.local_devices()) platform.work_unit().set_task_status( f"host_id: {jax.process_index()}, host_count: {jax.host_count()}") platform.work_unit().create_artifact(platform.ArtifactType.DIRECTORY, FLAGS.model_dir, "model_dir") os.makedirs(FLAGS.model_dir, exist_ok=True) rng = jax.random.PRNGKey(FLAGS.seed) rng, rng_coarse, rng_fine, data_rng, step_rng = jax.random.split(rng, 5) rngs = common_utils.shard_prng_key(step_rng) ### Load dataset and data values datasets, counts, optics, render_datasets = get_dataset( FLAGS.data_dir, FLAGS.config, rng=data_rng, num_poses=FLAGS.config.num_poses) train_ds, val_ds, test_ds = datasets *_, test_items = counts hwf, r_hwf, near, far = optics render_ds, render_vdirs_ds, num_poses = render_datasets iter_render_ds = zip(range(num_poses), render_ds) iter_vdirs_ds = zip(range(num_poses), render_vdirs_ds) iter_test_ds = zip(range(test_items), test_ds) img_h, img_w, _ = hwf logging.info("Num poses: %d", num_poses) logging.info("Splits: train - %d, val - %d, test - %d", *counts) logging.info("Images: height %d, width %d, focal %.5f", *hwf) logging.info("Render: height %d, width %d, focal %.5f", *r_hwf) ### Init model parameters and optimizer initialized_ = functools.partial(initialized, model_config=FLAGS.config.model) pts_shape = (FLAGS.config.num_rand, FLAGS.config.num_samples, 3) views_shape = (FLAGS.config.num_rand, 3) model_coarse, params_coarse = initialized_(rng_coarse, pts_shape, views_shape) schedule_fn = optax.exponential_decay( init_value=FLAGS.config.learning_rate, transition_steps=FLAGS.config.lr_decay * 1000, decay_rate=FLAGS.config.decay_factor, ) tx = optax.adam(learning_rate=schedule_fn) state = train_state.TrainState.create(apply_fn=(model_coarse.apply, None), params={"coarse": params_coarse}, tx=tx) if FLAGS.config.num_importance > 0: pts_shape = ( FLAGS.config.num_rand, FLAGS.config.num_importance + FLAGS.config.num_samples, 3, ) model_fine, params_fine = initialized_(rng_fine, pts_shape, views_shape) state = train_state.TrainState.create( apply_fn=(model_coarse.apply, model_fine.apply), params={ "coarse": params_coarse, "fine": params_fine }, tx=tx, ) state = checkpoints.restore_checkpoint(FLAGS.model_dir, state) start_step = int(state.step) # cycle already seen examples if resuming from checkpoint # (only useful for ensuring deterministic dataset, slow for large start_step) # if start_step != 0: # for _ in range(start_step): # _ = next(train_ds) # parameter_overview.log_parameter_overview(state.optimizer_coarse.target) # if FLAGS.config.num_importance > 0: # parameter_overview.log_parameter_overview(state.optimizer_fine.target) state = jax.device_put_replicated(state, jax.local_devices()) ### Build "pmapped" functions for distributed training train_fn = functools.partial(train_step, near, far, FLAGS.config, schedule_fn) p_train_step = jax.pmap( train_fn, axis_name="batch", in_axes=(0, 0, None, 0), # donate_argnums=(0, 1, 2), ) def render_fn(state, rays): step_fn = functools.partial(eval_step, FLAGS.config, near, far, state) return lax.map(step_fn, rays) p_eval_step = jax.pmap( render_fn, axis_name="batch", # in_axes=(0, 0, None), # donate_argnums=(0, 1)) ) # TODO: add hparams writer = metric_writers.create_default_writer( FLAGS.model_dir, just_logging=jax.process_index() > 0) logging.info("Starting training loop.") hooks = [] profiler = periodic_actions.Profile(num_profile_steps=5, logdir=FLAGS.model_dir) report_progress = periodic_actions.ReportProgress( num_train_steps=FLAGS.config.num_steps, writer=writer) if jax.process_index() == 0: hooks += [profiler, report_progress] train_metrics = [] gen_video_ = functools.partial(gen_video, FLAGS.model_dir) for step in range(start_step, FLAGS.config.num_steps + 1): is_last_step = step == FLAGS.config.num_steps batch = next(train_ds) coords = None if not FLAGS.config.batching: coords = jnp.meshgrid(jnp.arange(img_h), jnp.arange(img_w), indexing="ij") if step < FLAGS.config.precrop_iters: dH = int(img_h // 2 * FLAGS.config.precrop_frac) dW = int(img_w // 2 * FLAGS.config.precrop_frac) coords = jnp.meshgrid( jnp.arange(img_h // 2 - dH, img_h // 2 + dH), jnp.arange(img_w // 2 - dW, img_w // 2 + dW), indexing="ij", ) coords = jnp.stack(coords, axis=-1).reshape([-1, 2]) with jax.profiler.StepTraceAnnotation("train", step_num=step): state, metrics = p_train_step(batch, state, coords, rngs) train_metrics.append(metrics) logging.log_first_n(logging.INFO, "Finished training step %d.", 5, step) _ = [h(step) for h in hooks] ### Write train summaries to TB if step % FLAGS.config.i_print == 0 or is_last_step: with report_progress.timed("training_metrics"): train_metrics = common_utils.get_metrics(train_metrics) train_summary = jax.tree_map(lambda x: x.mean(), train_metrics) summary = {f"train/{k}": v for k, v in train_summary.items()} writer.write_scalars(step, summary) train_metrics = [] ### Eval a random validation image and plot it to TB if step % FLAGS.config.i_img == 0 and step > 0 or is_last_step: with report_progress.timed("validation"): inputs = next(val_ds) rays, padding = prepare_render_data(inputs["rays"]._numpy()) outputs = p_eval_step(state, rays) preds, preds_c, z_std = jax.tree_map( lambda x: to_np(x, hwf, padding), outputs) loss = np.mean((preds["rgb"] - inputs["image"])**2) summary = {"val/loss": loss, "val/psnr": psnr_fn(loss)} writer.write_scalars(step, summary) summary = { "val/rgb": to_rgb(preds["rgb"]), "val/target": to_np(inputs["image"], hwf, padding), "val/disp": disp_post(preds["disp"], FLAGS.config), "val/acc": preds["acc"], } if FLAGS.config.num_importance > 0: summary["val/rgb_c"] = to_rgb(preds_c["rgb"]) summary["val/disp_c"] = disp_post(preds_c["disp"], FLAGS.config) summary["val/z_std"] = z_std writer.write_images(step, summary) ### Render a video with test poses if step % FLAGS.config.i_video == 0 and step > 0: with report_progress.timed("video_render"): logging.info("Rendering video at step %d", step) rgb_list = [] disp_list = [] for idx, inputs in tqdm(iter_render_ds, desc="Rays render"): rays, padding = prepare_render_data(inputs["rays"].numpy()) preds, *_ = p_eval_step(state, rays) preds = jax.tree_map(lambda x: to_np(x, r_hwf, padding), preds) rgb_list.append(preds["rgb"]) disp_list.append(preds["disp"]) gen_video_(np.stack(rgb_list), "rgb", r_hwf, step) disp = np.stack(disp_list) gen_video_(disp_post(disp, FLAGS.config), "disp", r_hwf, step, ch=1) if FLAGS.config.use_viewdirs: rgb_list = [] for idx, inputs in tqdm(iter_vdirs_ds, desc="Viewdirs render"): rays, padding = prepare_render_data( inputs["rays"].numpy()) preds, *_ = p_eval_step(state, rays) rgb_list.append(to_np(preds["rgb"], r_hwf, padding)) gen_video_(np.stack(rgb_list), "rgb_still", r_hwf, step) ### Save images in the test set if step % FLAGS.config.i_testset == 0 and step > 0: with report_progress.timed("test_render"): logging.info("Rendering test set at step %d", step) test_losses = [] for idx, inputs in tqdm(iter_test_ds, desc="Test render"): rays, padding = prepare_render_data(inputs["rays"].numpy()) preds, *_ = p_eval_step(state, rays) save_test_imgs(FLAGS.model_dir, preds["rgb"], r_hwf, step, idx) if FLAGS.config.render_factor == 0: loss = np.mean((preds["rgb"] - inputs["image"])**2.0) test_losses.append(loss) if FLAGS.config.render_factor == 0: loss = np.mean(test_losses) summary = {"test/loss": loss, "test/psnr": psnr_fn(loss)} writer.write_scalars(step, summary) writer.flush() ### Save ckpt if step % FLAGS.config.i_weights == 0 or is_last_step: with report_progress.timed("checkpoint"): save_checkpoint(state, FLAGS.model_dir)