def process_img(name, label, crop_shape, scale, random_draws, to_augment=True, no_rotation=True, logging=True): imgs = [] if logging: print "%s [%d] Processing file %s" % (get_time(), os.getpid(), name) pad_value = 127 img = image_load(name) simg = scale_radius(img, round(scale / .9)) uimg = unsharp_img(simg, round(scale / .9)) suimg = subsample_inner_circle_img(uimg, round(scale / .9), pad_value) cimg = center_crop(suimg, crop_shape) pimg = pad_img(cimg, (2 * scale, 2 * scale, 3), value=127) pimg[:10, :, :] = pad_value pimg[-10:, :, :] = pad_value imgs.append(pimg) # Check if augmentation is needed if (to_augment and np.random.uniform(0, 1) > pb[label]) or (not to_augment): return imgs for i in range(random_draws): dist_img = get_distorted_img(simg, 127, no_rotation) uimg = unsharp_img(dist_img, round(scale / .9)) suimg = subsample_inner_circle_img(uimg, round(scale / .9), pad_value) cimg = center_crop(suimg, (256, 256)) dimg = pad_img(cimg, (2 * scale, 2 * scale, 3), value=127) dimg[:10, :, :] = pad_value dimg[-10:, :, :] = pad_value imgs.append(dimg) return imgs
def __call__(self, IMG): IMG_ = pad_img(IMG) [NMS_IDX, BBOX, TOPK_CLASS, TOPK_SCORE] = self.sess.run([self.nms_idx, self.bbox, self.topK_class, self.topK_score], feed_dict={self.inputs: IMG_[np.newaxis] / 127.5 - 1.0, self.is_training: True}) for i in NMS_IDX: if TOPK_SCORE[i] > 0.5: IMG = draw_bbox(IMG, recover_ImgAndBbox_scale(IMG, BBOX[i]), CLASSES[TOPK_CLASS[i]]) # IMG_ = draw_bbox(IMG_, np.int32(BBOX[i]), CLASSES[TOPK_CLASS[i]]) return IMG
def save_image_with_options(img, highlight, pad, seam, rotated, savename, original_height, original_width, point, savepoints): if highlight: img = highlight_seam(img, seam) if pad: img = np.array(pad_img(img, original_height, original_width)) if rotated: img = Image.fromarray(np.transpose(img, axes=(1, 0, 2))) else: img = Image.fromarray(img) base, ext = savename.split('.') img.save(base + '/' + base.split('/')[-1] + '_' + str(point).zfill(len(str(savepoints[-1]))) + '.' + ext)
def train(x_u, x_l, y_l, x_v, y_v, c_l, c_u, c_v, batch_size, epoch, model, optimizer, device, log_interv, writer, logdir, n_labels): model.train() loss_accum = [] # accumulate one epoch kl_accum = [] classification_accum = [] recons_accum = [] h_accum = [] accuracy_accum = [] L_accum = [] U_accum = [] for train_step in range(len(x_u) // batch_size): optimizer.zero_grad() batch_idx_l = np.random.choice(len(x_l), batch_size, replace=False) batch_l = np.float32(x_l[batch_idx_l]) batch_idx_u = np.random.choice(len(x_u), batch_size, replace=False) batch_u = np.float32(x_u[batch_idx_u]) batch_labels = np.float32(y_l[batch_idx_l]) if c_l.shape[0] != 0: batch_c_l = from_np(np.float32(c_l[batch_idx_l]), device=device) batch_c_u = from_np(np.float32(c_u[batch_idx_u]), device=device) else: batch_c_l = None batch_c_u = None batch_u, batch_l, batch_labels = from_np(batch_u, batch_l, batch_labels, device=device) ## Forward recon_batch_u, mu_u, logvar_u, logits_u = model(batch_u, [], batch_c_u, tau=tau_schedule( model.global_step)) recon_batch_l, mu_l, logvar_l, logits_l = model(batch_l, batch_labels, batch_c_l, tau=tau_schedule( model.global_step)) ## Losses (normalized by minibatch size) if n_labels <= 2: bce_l = f.binary_cross_entropy( recon_batch_l, batch_l, reduction='sum') / batch_size else: if model.binary_input: bce_l = f.cross_entropy(recon_batch_l, torch.max(batch_l, 1)[1].type( torch.int64), reduction='sum') / batch_size else: bce_l = f.cross_entropy(recon_batch_l, batch_l[:, 0].type(torch.int64), reduction='sum') / batch_size kl_l = -0.5 * torch.sum(1 + logvar_l - mu_l.pow(2) - logvar_l.exp()) / batch_size loss_l = (bce_l + kl_l * beta_schedule(model.global_step) ) / 2 # we're actually using 2 batches L_accum.append(loss_l.item()) classification = f.cross_entropy(logits_l, torch.argmax(batch_labels, dim=1), reduction='sum') / batch_size loss_l += classification * alpha / 2 # in the overall loss it weighs half # TODO log p(y) is missing both here and in unlabeled (it's constant but we need it to report ELBO) accuracy = float( torch.sum( torch.max(logits_l, 1)[1].type(torch.cuda.FloatTensor) == torch.max(batch_labels, 1)[1].type( torch.cuda.FloatTensor))) / batch_size if n_labels <= 2: bce_u = f.binary_cross_entropy( recon_batch_u, batch_u, reduction='sum') / batch_size else: if model.binary_input: bce_u = f.cross_entropy(recon_batch_u, torch.max(batch_u, 1)[1].type( torch.int64), reduction='sum') / batch_size else: bce_u = f.cross_entropy(recon_batch_u, batch_u[:, 0].type(torch.int64), reduction='sum') / batch_size kl_u = -0.5 * torch.sum(1 + logvar_u - mu_u.pow(2) - logvar_u.exp()) / batch_size loss_u = (bce_u + kl_u * beta_schedule(model.global_step) ) / 2 # we're actually using 2 batches softmax_u = torch.softmax(logits_u, dim=-1) h = -torch.sum(torch.mul(softmax_u, torch.log(softmax_u + 1e-12)), dim=-1).mean() loss_u += -h * gamma U_accum.append(loss_u.item()) loss = loss_l + loss_u loss_accum.append(loss.item()) kl_accum.append(kl_l.item() + kl_u.item()) classification_accum.append(classification.item()) accuracy_accum.append(accuracy) h_accum.append(h.item()) recons_accum.append((bce_l.item() + bce_u.item()) / 2) ## Backward: accumulate gradients loss.backward() ## Clip gradients for param in model.parameters(): if param.grad is not None: torch.nn.utils.clip_grad_norm_(param, grad_norm_clip) optimizer.step() model.global_step += 1 ## Training step finished -- now write to tensorboard if model.global_step % log_interv == 0: ## Get losses over last step loss_step = loss_accum[-1] recons_step = np.mean(recons_accum[-1]) kl_step = np.mean(kl_accum[-1]) L_step = np.mean(L_accum[-1]) U_step = np.mean(U_accum[-1]) class_step = classification_accum[-1] accuracy_step = accuracy_accum[-1] h_step = h_accum[-1] print( "epoch {}, step {} - loss: {:.4g} \trecons: {:.4g} \tKL: {:.4g} \tclass: {:.4g} \taccuracy: {:.4g} \tcategorical entropy: {:.4g}" .format(epoch, model.global_step, loss_step, recons_step, kl_step, class_step, accuracy_step, h_step)) ## Save losses writer.add_scalar('losses/loss', loss_step, model.global_step) writer.add_scalar('losses/recons', recons_step, model.global_step) writer.add_scalar('losses/KL', kl_step, model.global_step) writer.add_scalar('losses/class', class_step, model.global_step) writer.add_scalar('losses/accuracy', accuracy_step, model.global_step) writer.add_scalar('losses/L', L_step, model.global_step) writer.add_scalar('losses/U', U_step, model.global_step) ## Validation set if model.global_step % (log_interv * 2) == 2: kl_accum_val = [] classification_accum_val = [] recons_accum_val = [] accuracy_accum_val = [] model.eval() for val_step in range(len(x_v) // batch_size): batch_idx = np.random.choice(len(x_v), batch_size, replace=False) data_val = np.float32(x_v[batch_idx]) labels_val = np.float32(y_v[batch_idx]) if c_v.shape[0] != 0: c_val = from_np(np.float32(c_v[batch_idx]), device=device) else: c_val = None data_val, labels_val = from_np(data_val, labels_val, device=device) recon_batch_val, mu_val, logvar_val, logits_val = model( data_val, labels_val, c_val) if n_labels <= 2: bce_val = f.binary_cross_entropy( recon_batch_val, data_val, reduction='sum') / batch_size else: if model.binary_input: bce_val = f.cross_entropy( recon_batch_val, torch.max(data_val, 1)[1].type(torch.int64), reduction='sum') / batch_size else: bce_val = f.cross_entropy(recon_batch_val, data_val[:, 0].type( torch.int64), reduction='sum') / batch_size kl_val = -0.5 * torch.sum(1 + logvar_val - mu_val.pow(2) - logvar_val.exp()) / batch_size classification_val = f.cross_entropy( logits_val, torch.argmax(labels_val, dim=1), reduction='sum') / batch_size accuracy_val = float( torch.sum( torch.max(logits_val[:batch_size], 1)[1].type( torch.cuda.FloatTensor) == torch.max( labels_val, 1)[1].type( torch.cuda.FloatTensor))) / batch_size kl_accum_val.append(kl_val.item()) recons_accum_val.append(bce_val.item()) classification_accum_val.append(classification_val.item()) accuracy_accum_val.append(accuracy_val) model.train() ## Log validation stuff recons_val_mean = np.mean(recons_accum_val) kl_val_mean = np.mean(kl_accum_val) class_val_mean = np.mean(classification_accum_val) accuracy_val_mean = np.mean(accuracy_accum_val) print("Validation: rec {:.4g} KL {:.4g} clf {:.4g} acc {:.4g}". format(recons_val_mean, kl_val_mean, class_val_mean, accuracy_val_mean)) writer.add_scalar('val losses/recons', recons_val_mean, model.global_step) writer.add_scalar('val losses/KL', kl_val_mean, model.global_step) writer.add_scalar('val losses/class', class_val_mean, model.global_step) writer.add_scalar('val losses/accuracy', accuracy_val_mean, model.global_step) if model.global_step % 500 == 0: ## Classifier output on unlabeled softmax_image = vutils.make_grid(softmax_u.permute(1, 0).detach()) writer.add_image('classifier output', softmax_image, model.global_step) ## Save imgs imgs = [] targ_size = 94 recon_batch_u = torch.argmax(recon_batch_u, dim=1) if model.binary_input: batch_u_ = torch.argmax(batch_u, dim=1).type(torch.int64).unsqueeze(1) else: batch_u_ = batch_u.type(torch.int64) recon_batch_u = recon_batch_u.type(torch.int64).unsqueeze(1) index = np.random.randint(25, batch_u_.shape[2] - 25) imgs.append(pad_img(batch_u_[0:1, :, index, :, :], targ_size)) imgs.append(pad_img(recon_batch_u[0:1, :, index, :, :], targ_size)) index = np.random.randint(25, batch_u_.shape[3] - 25) imgs.append(pad_img(batch_u_[0:1, :, :, index, :], targ_size)) imgs.append(pad_img(recon_batch_u[0:1, :, :, index, :], targ_size)) index = np.random.randint(25, batch_u_.shape[4] - 25) imgs.append(pad_img(batch_u_[0:1, :, :, :, index], targ_size)) imgs.append(pad_img(recon_batch_u[0:1, :, :, :, index], targ_size)) # - Concatenate and make into grid so they are displayed next to each other imgs = torch.cat(imgs, dim=0).detach() imgs = vutils.make_grid(imgs, nrow=2) if n_labels > 2: imgs = to_rgb(imgs[0]) # - Save writer.add_image('images/input and recons', imgs, model.global_step) ## Generate samples # - Sample with torch.no_grad(): z = model.sample_prior(n_samples=model.y_dim) y = torch.arange(0, torch.tensor(model.y_dim)) y_out = torch.zeros(model.y_dim, model.y_dim) y_out[torch.arange(y_out.shape[0]), y] = 1 y = y_out if c_l.shape[0] != 0: sample_reconstruction = model.decoder( z, y, batch_c_l[0:model.y_dim]) else: sample_reconstruction = model.decoder(z, y, None) # - One slice per dimension, for all samples imgs = [] sample_reconstruction = torch.argmax(sample_reconstruction, dim=1).unsqueeze(1) index = np.random.randint(25, batch_u_.shape[2] - 25) imgs.append( pad_img(sample_reconstruction[:, :, index, :, :], targ_size)) index = np.random.randint(25, batch_u_.shape[3] - 25) imgs.append( pad_img(sample_reconstruction[:, :, :, index, :], targ_size)) index = np.random.randint(25, batch_u_.shape[4] - 25) imgs.append( pad_img(sample_reconstruction[:, :, :, :, index], targ_size)) # - Concatenate and make into grid so they are displayed next to each other imgs = torch.cat(imgs, dim=0).detach() imgs = vutils.make_grid(imgs, nrow=3) if n_labels > 2: imgs = to_rgb(imgs[0]) # - Save writer.add_image('generated', imgs, model.global_step) samples = sample_reconstruction.cpu().data.numpy() for class_label in range(model.y_dim): img = nib.Nifti1Image(samples[class_label, 0].astype(np.int8), np.eye(4)) nib.save( img, join( logdir, "generated_class_%d_step_%d.nii.gz" % (class_label, model.global_step))) ## Save losses, avg over epoch writer.add_scalar('epoch losses/loss', np.mean(loss_accum), model.global_step) writer.add_scalar('epoch losses/recons', np.mean(recons_accum), model.global_step) writer.add_scalar('epoch losses/KL', np.mean(kl_accum), model.global_step) writer.add_scalar('epoch losses/classification', np.mean(classification_accum), model.global_step) writer.add_scalar('epoch losses/accuracy', np.mean(accuracy_accum), model.global_step) writer.add_scalar('epoch losses/categ_entropy', np.mean(h_accum), model.global_step)
def main(): parser = argparse.ArgumentParser( description="Intelligently crop an image along one axis") parser.add_argument('input_file') parser.add_argument('-a', '--axis', required=True, help="What axis to shrink the image on.", choices=['x', 'y']) parser.add_argument('-p', '--pixels', type=int, required=True, help="How many pixels to shrink the image by.") parser.add_argument('-o', '--output', help="What to name the new cropped image.") parser.add_argument('-i', '--interval', type=int, help="Save every i intermediate images.") parser.add_argument( '-b', '--border', type=bool, help= "Whether or not to pad the cropped images to the size of the original") parser.add_argument( '-s', '--show_seam', type=bool, help="Whether to highlight the removed seam on the intermediate images." ) args = vars(parser.parse_args()) print(args) img = get_img_arr(args['input_file']) if args['axis'] == 'y': img = np.transpose(img, axes=(1, 0, 2)) if args['output'] is None: name = args['input_file'].split('.') args['output'] = name[0] + '_crop.' + name[1] savepoints = every_n(args['interval'], img.shape[1]) if args['interval'] else None cropped_img = resize_image(img, args['pixels'], dual_gradient_energy, save_name=args['output'], savepoints=savepoints, rotated=args['axis'] == 'y', pad=args['border'], highlight=args['show_seam']) if args['axis'] == 'y': cropped_img = np.transpose(cropped_img, axes=(1, 0, 2)) if args['border']: h, w = img.shape[:2] if args['axis'] == 'y': h, w = w, h cropped_img = pad_img(cropped_img, h, w) cropped_img.save(args['output']) else: Image.fromarray(cropped_img).save(args['output']) print( "\nImage {0} cropped by {1} pixels along the {2}-axis and saved as {3}\n" .format(args['input_file'], args['pixels'], args['axis'], args['output']))
(255, 255, 255), 1) if show_fps: preview_frame = draw_fps(preview_frame, fps, timing) if not is_calibrated: preview_frame = draw_calib_text(preview_frame) if not opt.hide_rect: draw_rect(preview_frame) cv2.imshow('camera', preview_frame[..., ::-1]) if out is not None: if not opt.no_pad: out = pad_img(out, stream_img_size) if output_flip: out = cv2.flip(out, 1) cv2.imshow('impersonator', out[..., ::-1]) fps_hist.append(tt.toc(total=True)) if len(fps_hist) == 10: fps = 10 / (sum(fps_hist) / 1000) fps_hist = [] except KeyboardInterrupt: logging.info("main: user interrupt") logging.info("stopping camera") cap.stop()
def stabilize(args): # args.resize = True print("reading images") if args.img_dir[-4:] == '.mp4' or args.img_dir[-4:] == '.avi': from utils import vid2img_lists img_lists = vid2img_lists(args.img_dir) else: from utils import file2lists img_lists = file2lists(os.path.join(args.img_dir, 'img_lists.txt')) img_lists = [item_i for item_i in img_lists if item_i[-3:] == 'png'] img_lists = sorted(img_lists) img_lists = [ os.path.join(args.img_dir, item_i) for item_i in img_lists ] img_lists = [cv2.imread(fn)[:, :, ::-1] for fn in img_lists] raw_shape = img_lists[0].shape if args.resize: img_lists = [pad_img(img, pwc_opt.pyr_lvls) for img in img_lists] else: from utils import resize_img img_lists = [resize_img(img, pwc_opt.pyr_lvls) for img in img_lists] first_img_p, mid_img_p, end_img_p = tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3]), \ tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3]), \ tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3]) out_img_ts, debug_out_ts = build_model_test(first_img_p, mid_img_p, end_img_p, training=False, trainable=True) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) # sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore( sess, checkpoint_path + args.lr_str + '/stab.ckpt-' + str(args.modeli)) for iter in range(args.stab_iter): print("--------iter {}---------".format(iter)) next_img_lists = [] for k in range(args.skip): next_img_lists.append(img_lists[k]) for k in range(args.skip, len(img_lists) - args.skip): cur_img = img_lists[k] first_img = img_lists[k - args.skip] end_img = img_lists[k + args.skip] cur_img_, first_img_, end_img_ = list( map(lambda x: np.expand_dims(x, 0), [cur_img, first_img, end_img])) out_img, debug_out = sess.run([out_img_ts, debug_out_ts], feed_dict={ first_img_p: first_img_, mid_img_p: cur_img_, end_img_p: end_img_ }) out_img = out_img.squeeze() out_img = np.array(out_img * 255.0).astype(np.uint8) next_img_lists.append(out_img) if args.debug: debug_img_lists_k = [ 'first_img', 'cur_img', 'end_img', 'out_img' ] debug_img_lists_v = [ first_img[:, :, ::-1], cur_img[:, :, ::-1], end_img[:, :, ::-1], out_img[:, :, ::-1] ] debug_img_lists = dict( zip(debug_img_lists_k, debug_img_lists_v)) # write_imgs(debug_img_lists, k, args.debug_out_dir) [ warped_first, warped_end, img_int, flow_pred0, flow_pred1, flow_pred2 ] = debug_out debug_flow_lists_k = [ 'first2end_flow', 'end2first_flow', 'mid2int_flow' ] debug_flow_lists_v = [ flow_pred0[0], flow_pred2[0], flow_pred1[0] ] debug_flow_lists = dict( zip(debug_flow_lists_k, debug_flow_lists_v)) write_flows(debug_flow_lists, k, args.debug_out_dir) for k in range(len(img_lists) - args.skip, len(img_lists)): next_img_lists.append(img_lists[k]) img_lists = next_img_lists if args.resize: img_lists = [unpad_img(img, raw_shape) for img in img_lists] else: from utils import back_resize_img img_lists = [back_resize_img(img, raw_shape) for img in img_lists] # import pdb;pdb.set_trace(); if args.img_dir[-4:] == '.mp4': from utils import save2vid save2vid(img_lists, args.out_dir, args.img_dir) else: save_img_lists(img_lists, args.out_dir)