def __init__(self, query_expression, metric="iou", stack_size=7, steps=15): """ Post processing visually guided search. :param query_expression: expression to be optimized :param metric: metric to be minimized, like chamfer :param stack_size: max stack size required in any program :param steps: max tim step of any program """ self.parser = ParseModelOutput(canvas_shape=[64, 64], stack_size=stack_size, unique_draws=None, steps=steps) self.query_expression = query_expression self.get_graph_structure(query_expression) self.metric = metric self.errors = []
def __init__(self, labels_path, batch_size=100, train_size=10000, canvas_shape=[64, 64], max_len=13, self_training=False): self.labels = torch.load(labels_path + "labels.pt", map_location=device) if isinstance(self.labels, np.ndarray): self.labels = torch.from_numpy(self.labels).to(device) self.labels = self.labels.long() self.self_training = self_training if self_training: self.images = torch.load(labels_path + "images.pt") # pad labels with a stop symbol, should be correct but need to confirm this # since infer_programs currently outputs len 13 labels self.labels = F.pad(self.labels, (0, 1), 'constant', 399) self.train_size = train_size self.max_len = max_len self.canvas_shape = canvas_shape self.batch_size = batch_size with open("terminals.txt", "r") as file: self.unique_draw = file.readlines() for index, e in enumerate(self.unique_draw): self.unique_draw[index] = e[0:-1] self.parser = ParseModelOutput(self.unique_draw, self.max_len // 2 + 1, self.max_len, canvas_shape) self.expressions = self.parser.labels2exps(self.labels, self.labels.shape[1]) # Remove the stop symbol and later part of the expression for index, exp in enumerate(self.expressions): self.expressions[index] = exp.split("$")[0] self.correct_programs = []
def voxels_from_expressions(expressions: List, primitives: dict, max_len=7, downsample=None): """This take a generic expression as input and returns the final voxel representation for this. The expressions need not be valid. :param expressions: :param primitives: dictionary, containg shape primitves in voxel grids, for faster processing. In general creating all shape primitives on-the-fly is an expensive operation. :param max_len: maximum length of programs :param downsample: factor by which to downsample voxel grid :return images: voxel representation of the expressions """ stacks = [] unique_draw = sorted(primitives.keys()) parser = ParseModelOutput(unique_draw, max_len // 2 + 1, max_len, [64, 64, 64], primitives=primitives) if downsample: meshgrid = np.arange(0, 64, downsample) xv, yv, zv = np.meshgrid(meshgrid, meshgrid, meshgrid, sparse=False, indexing='ij') for index, exp in enumerate(expressions): program = parser.sim.parse(exp) if not validity(program, len(program), len(program) - 1): stack = np.zeros((parser.canvas_shape[0], parser.canvas_shape[1], parser.canvas_shape[2])) stacks.append(stack) continue # Use the primitives generated before. parser.sim.generate_stack(program, if_primitives=True) stack = parser.sim.stack_t stack = np.stack(stack, axis=0)[-1, 0, :, :] if downsample: stack = stack[xv, yv, zv] stacks.append(stack) stacks = np.stack(stacks, 0).astype(dtype=np.bool) return stacks
mean_train_loss = train_loss / (config.train_size // (config.batch_size)) print('train_loss', mean_train_loss.cpu().numpy(), epoch) print('train_reward', total_reward / (config.train_size // (config.batch_size)), epoch) end = time.time() print(f"TIME:{end - start}") CD = 0 test_losses = 0 total_reward = 0 imitate_net.eval() imitate_net.epsilon = 0 for batch_idx in range(config.test_size // config.batch_size): parser = ParseModelOutput(unique_draw, max_len // 2 + 1, max_len, config.canvas_shape) with torch.no_grad(): loss = Variable(torch.zeros(1)).cuda() Rs = np.zeros((config.batch_size, 1)) labels = np.zeros((config.batch_size, max_len), dtype=np.int32) data_ = next(val_gen) one_hot_labels = prepare_input_op(labels, len(unique_draw)) one_hot_labels = Variable(torch.from_numpy(one_hot_labels)).cuda() data = Variable(torch.from_numpy(data_)).cuda() outputs, samples = imitate_net([data, one_hot_labels, max_len]) R = reinforce.generate_rewards(samples, data_, time_steps=max_len, stack_size=max_len // 2 + 1, reward=reward, power=power)
print(f'train acc: {accs}') del data, loss, loss_sum, train_loss, outputs end_time = time.time() # print(f"TIME: {end_time - start_time}") test_losses = 0 imitate_net.eval() test_reward = 0 num_correct = 0 accs = 0 for batch_idx in range(config.test_size // config.batch_size): for k in data_labels_paths.keys(): with torch.no_grad(): parser = ParseModelOutput(stack_size=(k + 1) // 2 + 1, steps=k, canvas_shape=[64, 64, 64]) # samples = next(gen_objs_iters[k][1]) # data_ = np.stack([x[0] for x in samples]) # labels = (np.stack([x[1][0] for x in samples]), # np.stack([x[1][1] for x in samples]), # np.stack([x[1][2] for x in samples]), # np.stack([x[1][3] for x in samples])) data_, labels = next(train_gen_objs[k]) oh_labels = one_hot_labels(labels).cuda() data = Variable(torch.from_numpy(data_)).cuda() test_output = imitate_net.test([data, oh_labels, k]) accs += sum(accuracy(test_output, labels)) / 4
for k in data_labels_paths.keys(): # if using multi gpu training, train and test batch size should be multiple of # number of GPU edvices. test_batch_size = config.batch_size test_gen_objs[k] = generator.get_test_data(test_batch_size, k, num_train_images=dataset_sizes[k][0], num_test_images=dataset_sizes[k][1], if_primitives=True, final_canvas=True, if_jitter=False) Target_expressions = [] Predicted_expressions = [] parser = ParseModelOutput(generator.unique_draw, max_len // 2 + 1, max_len, [64, 64, 64], primitives=generator.primitives) imitate_net.eval() Rs = 0 t1 = time.time() IOU = {} total_iou = 0 print('begin testing') for k in data_labels_paths.keys(): Rs = 0.0 for batch_idx in range(dataset_sizes[k][1] // config.batch_size): data_, labels = next(test_gen_objs[k]) data_ = data_[:, :, 0:config.top_k + 1, :, :] one_hot_labels = prepare_input_op(labels, len(generator.unique_draw)) one_hot_labels = Variable(torch.from_numpy(one_hot_labels), volatile=True).cuda() data = Variable(torch.from_numpy(data_)).cuda() labels = Variable(torch.from_numpy(labels)).cuda()
def infer_programs(imitate_net, path, self_training=False, ab=None): save_viz = False config = read_config.Config("config_cad.yml") # Load the terminals symbols of the grammar with open("terminals.txt", "r") as file: unique_draw = file.readlines() for index, e in enumerate(unique_draw): unique_draw[index] = e[0:-1] config.train_size = 10000 config.test_size = 3000 imitate_net.eval() imitate_net.epsilon = 0 parser = ParseModelOutput(unique_draw, max_len // 2 + 1, max_len, config.canvas_shape) pred_expressions = [] if ab is not None: pred_labels = np.zeros((config.train_size * ab, max_len)) else: pred_labels = np.zeros((config.train_size, max_len)) image_path = f"{path}/images/" results_path = f"{path}/results/" labels_path = f"{path}/labels/" os.makedirs(os.path.dirname(image_path), exist_ok=True) os.makedirs(os.path.dirname(results_path), exist_ok=True) os.makedirs(os.path.dirname(labels_path), exist_ok=True) os.makedirs(os.path.dirname(labels_path + "val/"), exist_ok=True) generator = Generator() train_gen = generator.train_gen(batch_size=config.batch_size, path="data/cad/cad.h5", if_augment=False) val_gen = generator.val_gen(batch_size=config.batch_size, path="data/cad/cad.h5", if_augment=False) Rs = 0 CDs = 0 Target_images = [] start = time.time() pred_images = np.zeros((config.train_size, 64, 64)) for batch_idx in range(config.train_size // config.batch_size): with torch.no_grad(): print(f"Inferring cad batch: {batch_idx}") data_ = next(train_gen) labels = np.zeros((config.batch_size, max_len), dtype=np.int32) one_hot_labels = prepare_input_op(labels, len(unique_draw)) one_hot_labels = torch.from_numpy(one_hot_labels).to(device) data = torch.from_numpy(data_).to(device) all_beams, next_beams_prob, all_inputs = imitate_net.beam_search( [data[-1, :, 0, :, :], one_hot_labels], beam_width, max_len) beam_labels = beams_parser(all_beams, data_.shape[1], beam_width=beam_width) beam_labels_numpy = np.zeros( (config.batch_size * beam_width, max_len), dtype=np.int32) Target_images.append(data_[-1, :, 0, :, :]) for i in range(data_.shape[1]): beam_labels_numpy[i * beam_width:(i + 1) * beam_width, :] = beam_labels[i] # find expression from these predicted beam labels expressions = [""] * config.batch_size * beam_width for i in range(config.batch_size * beam_width): for j in range(max_len): expressions[i] += unique_draw[beam_labels_numpy[i, j]] for index, prog in enumerate(expressions): expressions[index] = prog.split("$")[0] pred_expressions += expressions predicted_images = image_from_expressions(parser, expressions) target_images = data_[-1, :, 0, :, :].astype(dtype=bool) target_images_new = np.repeat(target_images, axis=0, repeats=beam_width) # beam_R = np.sum(np.logical_and(target_images_new, predicted_images), # (1, 2)) / np.sum(np.logical_or(target_images_new, predicted_images), (1, 2)) # # R = np.zeros((config.batch_size, 1)) # for r in range(config.batch_size): # R[r, 0] = max(beam_R[r * beam_width:(r + 1) * beam_width]) # # Rs += np.mean(R) beam_CD = chamfer(target_images_new, predicted_images) # select best expression by chamfer distance if ab is None: best_labels = np.zeros((config.batch_size, max_len)) for r in range(config.batch_size): idx = np.argmin(beam_CD[r * beam_width:(r + 1) * beam_width]) best_labels[r] = beam_labels[r][idx] pred_labels[batch_idx * config.batch_size:batch_idx * config.batch_size + config.batch_size] = best_labels else: best_labels = np.zeros((config.batch_size * ab, max_len)) for r in range(config.batch_size): sorted_idx = np.argsort(beam_CD[r * beam_width:(r + 1) * beam_width])[:ab] best_labels[r * ab:r * ab + ab] = beam_labels[r][sorted_idx] pred_labels[batch_idx * config.batch_size * ab:batch_idx * config.batch_size * ab + config.batch_size * ab] = best_labels CD = np.zeros((config.batch_size, 1)) for r in range(config.batch_size): CD[r, 0] = min(beam_CD[r * beam_width:(r + 1) * beam_width]) pred_images[batch_idx * config.batch_size + r] = predicted_images[r * beam_width + np.argmin( beam_CD[r * beam_width:(r + 1) * beam_width])] CDs += np.mean(CD) if save_viz: for j in range(0, config.batch_size): f, a = plt.subplots(1, beam_width + 1, figsize=(30, 3)) a[0].imshow(data_[-1, j, 0, :, :], cmap="Greys_r") a[0].axis("off") a[0].set_title("target") for i in range(1, beam_width + 1): a[i].imshow(predicted_images[j * beam_width + i - 1], cmap="Greys_r") a[i].set_title("{}".format(i)) a[i].axis("off") plt.savefig( image_path + "{}.png".format(batch_idx * config.batch_size + j), transparent=0) plt.close("all") save_viz = False print("Inferring cad average chamfer distance: {}".format( CDs / (config.train_size // config.batch_size)), flush=True) Rs = Rs / (config.train_size // config.batch_size) CDs = CDs / (config.train_size // config.batch_size) print(Rs, CDs) results = {"iou": Rs, "chamferdistance": CDs} with open(results_path + "results_beam_width_{}.org".format(beam_width), 'w') as outfile: json.dump(results, outfile) torch.save(pred_labels, labels_path + "labels.pt") # torch.save(pred_images, labels_path + "images.pt") if self_training: if ab is None: torch.save(np.concatenate(Target_images, axis=0), labels_path + "images.pt") else: torch.save( np.repeat(np.concatenate(Target_images, axis=0), ab, axis=0), labels_path + "images.pt") test_gen = generator.test_gen(batch_size=config.batch_size, path="data/cad/cad.h5", if_augment=False) pred_expressions = [] Rs = 0 CDs = 0 Target_images = [] for batch_idx in range(config.test_size // config.batch_size): with torch.no_grad(): print(f"Inferring test cad batch: {batch_idx}") data_ = next(test_gen) labels = np.zeros((config.batch_size, max_len), dtype=np.int32) one_hot_labels = prepare_input_op(labels, len(unique_draw)) one_hot_labels = torch.from_numpy(one_hot_labels).to(device) data = torch.from_numpy(data_).to(device) all_beams, next_beams_prob, all_inputs = imitate_net.beam_search( [data[-1, :, 0, :, :], one_hot_labels], beam_width, max_len) beam_labels = beams_parser(all_beams, data_.shape[1], beam_width=beam_width) beam_labels_numpy = np.zeros( (config.batch_size * beam_width, max_len), dtype=np.int32) Target_images.append(data_[-1, :, 0, :, :]) for i in range(data_.shape[1]): beam_labels_numpy[i * beam_width:(i + 1) * beam_width, :] = beam_labels[i] # find expression from these predicted beam labels expressions = [""] * config.batch_size * beam_width for i in range(config.batch_size * beam_width): for j in range(max_len): expressions[i] += unique_draw[beam_labels_numpy[i, j]] for index, prog in enumerate(expressions): expressions[index] = prog.split("$")[0] pred_expressions += expressions predicted_images = image_from_expressions(parser, expressions) target_images = data_[-1, :, 0, :, :].astype(dtype=bool) target_images_new = np.repeat(target_images, axis=0, repeats=beam_width) beam_CD = chamfer(target_images_new, predicted_images) CD = np.zeros((config.batch_size, 1)) for r in range(config.batch_size): CD[r, 0] = min(beam_CD[r * beam_width:(r + 1) * beam_width]) CDs += np.mean(CD) print(f"TEST CD: {CDs / (config.test_size // config.batch_size)}") end = time.time() print(f"Inference time: {end-start}")
def train_generator(generator_net, load_path, save_path, max_epochs=None): if max_epochs is None: epochs = 500 else: epochs = max_epochs labels = torch.load(f"{load_path}/labels/labels.pt", map_location=device) # pad with a start and stop token labels = np.pad(labels, ((0, 0), (1, 1)), constant_values=399) batch_size = 100 optimizer = optim.Adam(generator_net.parameters(), lr=1e-3) generator_net.train() best_train_loss = 1e20 patience = 20 num_worse = 0 best_gen_dict = torch.save(generator_net.state_dict(), f"{save_path}/best_gen_dict.pth") for epoch in range(epochs): start = time.time() train_loss = 0 ce_loss = 0 kl_loss = 0 acc = 0 np.random.shuffle(labels) for i in range(0, len(labels), batch_size): batch = torch.from_numpy(labels[i:i + batch_size]).long().to(device) optimizer.zero_grad() recon_batch, mu, logvar = generator_net(batch) ce, kld = generator_net.loss_function(recon_batch, batch, mu, logvar) loss = ce + 0.1 * kld loss.backward() train_loss += loss.item() / (len(labels) * (labels.shape[1] - 1)) ce_loss += ce.item() / (len(labels) * (labels.shape[1] - 1)) kl_loss += kld.item() / (len(labels) * (labels.shape[1] - 1)) acc += (recon_batch.permute(1, 2, 0).max(dim=1)[1] == batch[:, 1:]).float().sum() / (len(labels) * (labels.shape[1] - 1)) optimizer.step() print( f"generator epoch: {epoch}, loss: {train_loss}, accuracy: {acc}, ce: {ce_loss}, kld: {kl_loss}" ) # if (epoch + 1) % 10 == 0: # latents = torch.randn(1, inference_test_size, generator_latent_dim).to(device) # sample_tokens = generator_net.decode(latents, timesteps=labels.shape[1] - 1) # sample_tokens = sample_tokens.permute(1, 0, 2).max(dim=2)[1][:, :-1] # os.makedirs(os.path.dirname(f"wake_sleep_data/generator/tmp/"), exist_ok=True) # os.makedirs(os.path.dirname(f"wake_sleep_data/generator/tmp/val/"), exist_ok=True) # torch.save(sample_tokens, f"wake_sleep_data/generator/tmp/labels.pt") # torch.save(sample_tokens, f"wake_sleep_data/generator/tmp/val/labels.pt") # fid_value = calculate_fid_given_paths(f"wake_sleep_data/generator/tmp", # "trained_models/fid-model-three.pth", # 100, # 32) # print('FID: ', fid_value) # load_images() if train_loss >= best_train_loss: num_worse += 1 else: num_worse = 0 best_train_loss = train_loss best_gen_dict = torch.save(generator_net.state_dict(), f"{save_path}/best_gen_dict.pth") if num_worse >= patience: # load the best model and stop training generator_net.load_state_dict( torch.load(f"{save_path}/best_gen_dict.pth")) break end = time.time() print(f'gen epoch time {end-start}') train_tokens = torch.zeros((inference_train_size, max_len)) for i in range(0, inference_train_size, batch_size): batch_latents = torch.randn(1, batch_size, generator_latent_dim).to(device) batch_tokens = generator_net.decode(batch_latents, timesteps=labels.shape[1] - 1) batch_tokens = batch_tokens.permute(1, 0, 2).max(dim=2)[1][:, :-1] train_tokens[i:i + batch_size] = batch_tokens # test_tokens = torch.zeros((inference_test_size, max_len)) # for i in range(0, inference_test_size, batch_size): # batch_latents = torch.randn(1, batch_size, generator_latent_dim).to(device) # batch_tokens = generator_net.decode(batch_latents, timesteps=labels.shape[1] - 1) # batch_tokens = batch_tokens.permute(1, 0, 2).max(dim=2)[1][:, :-1] # test_tokens[i:i+batch_size] = batch_tokens os.makedirs(os.path.dirname(f"{save_path}/"), exist_ok=True) torch.save(train_tokens, f"{save_path}/labels.pt") # os.makedirs(os.path.dirname(f"{save_path}/val/"), exist_ok=True) # torch.save(test_tokens, f"{save_path}/val/labels.pt") # fid_value = calculate_fid_given_paths(f"{save_path}", # f"trained_models/fid-model-two.pth", # 100) # print('FID: ', fid_value) # find expression from labels parser = ParseModelOutput(unique_draw, max_len // 2 + 1, max_len, [64, 64]) expressions = [""] * inference_train_size for i in range(inference_train_size): for j in range(max_len): expressions[i] += unique_draw[int(train_tokens[i, j])] for index, prog in enumerate(expressions): expressions[index] = prog.split("$")[0] pred_images = image_from_expressions(parser, expressions).astype(np.float32) torch.save(pred_images, f"{save_path}/images.pt") return epoch + 1
def train_inference(imitate_net, path, max_epochs=None, self_training=False, ab=None): if max_epochs is None: epochs = 1000 else: epochs = max_epochs config = read_config.Config("config_synthetic.yml") if ab is not None: train_size = inference_train_size * ab else: train_size = inference_train_size generator = WakeSleepGen(f"{path}/", batch_size=config.batch_size, train_size=train_size, canvas_shape=config.canvas_shape, max_len=max_len, self_training=True) train_gen = generator.get_train_data() cad_generator = Generator() val_gen = cad_generator.val_gen(batch_size=config.batch_size, path="data/cad/cad.h5", if_augment=False) for parameter in imitate_net.encoder.parameters(): parameter.requires_grad = False optimizer = optim.Adam( [para for para in imitate_net.parameters() if para.requires_grad], weight_decay=config.weight_decay, lr=config.lr) reduce_plat = LearningRate(optimizer, init_lr=config.lr, lr_dacay_fact=0.2, patience=config.patience) best_test_loss = 1e20 torch.save(imitate_net.state_dict(), f"{path}/best_dict.pth") best_test_cd = 1e20 patience = 20 num_worse = 0 for epoch in range(epochs): start = time.time() train_loss = 0 imitate_net.train() for batch_idx in range(train_size // (config.batch_size * config.num_traj)): optimizer.zero_grad() loss = 0 # acc = 0 for _ in range(config.num_traj): data, labels = next(train_gen) # data = data[:, :, 0:1, :, :] one_hot_labels = prepare_input_op(labels, len(generator.unique_draw)) one_hot_labels = torch.from_numpy(one_hot_labels).to(device) data = data.to(device) labels = labels.to(device) outputs = imitate_net([data, one_hot_labels, max_len]) # acc += float((torch.argmax(outputs, dim=2).permute(1, 0) == labels).float().sum()) \ # / (labels.shape[0] * labels.shape[1]) / config.num_traj loss_k = ( (losses_joint(outputs, labels, time_steps=max_len + 1) / (max_len + 1)) / config.num_traj) loss_k.backward() loss += float(loss_k) del loss_k optimizer.step() train_loss += loss print(f"batch {batch_idx} train loss: {loss}") # print(f"acc: {acc}") mean_train_loss = train_loss / (train_size // (config.batch_size)) print(f"epoch {epoch} mean train loss: {mean_train_loss}") imitate_net.eval() loss = 0 # acc = 0 metrics = {"cos": 0, "iou": 0, "cd": 0} # IOU = 0 # COS = 0 CD = 0 # correct_programs = 0 # pred_programs = 0 for batch_idx in range(inference_test_size // config.batch_size): parser = ParseModelOutput(generator.unique_draw, max_len // 2 + 1, max_len, config.canvas_shape) with torch.no_grad(): labels = np.zeros((config.batch_size, max_len), dtype=np.int32) data_ = next(val_gen) one_hot_labels = prepare_input_op(labels, len(generator.unique_draw)) one_hot_labels = torch.from_numpy(one_hot_labels).cuda() data = torch.from_numpy(data_).cuda() # outputs = imitate_net([data, one_hot_labels, max_len]) # loss_k = (losses_joint(outputs, labels, time_steps=max_len + 1) / # (max_len + 1)) # loss += float(loss_k) test_outputs = imitate_net.test( [data[-1, :, 0, :, :], one_hot_labels, max_len]) # acc += float((torch.argmax(torch.stack(test_outputs), dim=2).permute(1, 0) == labels[:, :-1]).float().sum()) \ # / (len(labels) * (max_len+1)) / (inference_test_size // config.batch_size) pred_images, correct_prog, pred_prog = parser.get_final_canvas( test_outputs, if_just_expressions=False, if_pred_images=True) # correct_programs += len(correct_prog) # pred_programs += len(pred_prog) target_images = data_[-1, :, 0, :, :].astype(dtype=bool) # iou = np.sum(np.logical_and(target_images, pred_images), # (1, 2)) / \ # np.sum(np.logical_or(target_images, pred_images), # (1, 2)) # cos = cosine_similarity(target_images, pred_images) CD += np.sum(chamfer(target_images, pred_images)) # IOU += np.sum(iou) # COS += np.sum(cos) # metrics["iou"] = IOU / inference_test_size # metrics["cos"] = COS / inference_test_size metrics["cd"] = CD / inference_test_size test_losses = loss test_loss = test_losses / (inference_test_size // (config.batch_size)) if metrics["cd"] >= best_test_cd: num_worse += 1 else: num_worse = 0 best_test_cd = metrics["cd"] torch.save(imitate_net.state_dict(), f"{path}/best_dict.pth") if num_worse >= patience: # load the best model and stop training imitate_net.load_state_dict(torch.load(f"{path}/best_dict.pth")) return epoch + 1 # reduce_plat.reduce_on_plateu(metrics["cd"]) print( f"Epoch {epoch}/100 => train_loss: {mean_train_loss}, iou: {0}, cd: {metrics['cd']}, test_mse: {test_loss}, test_acc: {0}" ) # print(f"CORRECT PROGRAMS: {correct_programs}") # print(f"PREDICTED PROGRAMS: {pred_programs}") # print(f"RATIO: {correct_programs/pred_programs}") end = time.time() print(f"Inference train time {end-start}") del test_losses, outputs, test_outputs return epochs
def infer_programs(imitate_net, self_training=False, ab=None): config = read_config.Config("config_cad.yml") # Load the terminals symbols of the grammar with open("terminals.txt", "r") as file: unique_draw = file.readlines() for index, e in enumerate(unique_draw): unique_draw[index] = e[0:-1] config.train_size = 10000 config.test_size = 3000 imitate_net.eval() imitate_net.epsilon = 0 parser = ParseModelOutput(unique_draw, max_len // 2 + 1, max_len, config.canvas_shape) generator = Generator() test_gen = generator.test_gen(batch_size=config.batch_size, path="data/cad/cad.h5", if_augment=False) pred_expressions = [] Rs = 0 CDs = 0 Target_images = [] for batch_idx in range(config.test_size // config.batch_size): with torch.no_grad(): print(f"Inferring test cad batch: {batch_idx}") data_ = next(test_gen) labels = np.zeros((config.batch_size, max_len), dtype=np.int32) one_hot_labels = prepare_input_op(labels, len(unique_draw)) one_hot_labels = torch.from_numpy(one_hot_labels).to(device) data = torch.from_numpy(data_).to(device) all_beams, next_beams_prob, all_inputs = imitate_net.beam_search( [data[-1, :, 0, :, :], one_hot_labels], beam_width, max_len) beam_labels = beams_parser(all_beams, data_.shape[1], beam_width=beam_width) beam_labels_numpy = np.zeros( (config.batch_size * beam_width, max_len), dtype=np.int32) Target_images.append(data_[-1, :, 0, :, :]) for i in range(data_.shape[1]): beam_labels_numpy[i * beam_width:(i + 1) * beam_width, :] = beam_labels[i] # find expression from these predicted beam labels expressions = [""] * config.batch_size * beam_width for i in range(config.batch_size * beam_width): for j in range(max_len): expressions[i] += unique_draw[beam_labels_numpy[i, j]] for index, prog in enumerate(expressions): expressions[index] = prog.split("$")[0] pred_expressions += expressions predicted_images = image_from_expressions(parser, expressions) target_images = data_[-1, :, 0, :, :].astype(dtype=bool) target_images_new = np.repeat(target_images, axis=0, repeats=beam_width) beam_CD = chamfer(target_images_new, predicted_images) CD = np.zeros((config.batch_size, 1)) for r in range(config.batch_size): CD[r, 0] = min(beam_CD[r * beam_width:(r + 1) * beam_width]) CDs += np.mean(CD) for j in range(0, config.batch_size): f, a = plt.subplots(1, beam_width + 1, figsize=(30, 3)) a[0].imshow(data_[-1, j, 0, :, :], cmap="Greys_r") a[0].axis("off") a[0].set_title("target") for i in range(1, beam_width + 1): a[i].imshow(predicted_images[j * beam_width + i - 1], cmap="Greys_r") a[i].set_title("{}".format(i)) a[i].axis("off") plt.savefig("best_lest/" + "{}.png".format(batch_idx * config.batch_size + j), transparent=0) plt.close("all") # with open("best_st_expressions.txt", "w") as file: # for e in pred_expressions: # file.write(f"{e}\n") # break return CDs / (config.test_size // config.batch_size)
for k in data_labels_paths.keys(): # if using multi gpu training, train and test batch size should be multiple of # number of GPU edvices. test_batch_size = config.batch_size test_gen_objs[k] = generator.get_test_data( test_batch_size, k, num_train_images=dataset_sizes[k][0], num_test_images=dataset_sizes[k][1], if_primitives=True, if_jitter=False) Target_expressions = [] Predicted_expressions = [] parser = ParseModelOutput(generator.unique_draw, max_len // 2 + 1, max_len, [64, 64, 64], primitives=generator.primitives) imitate_net.eval() Rs = 0 t1 = time.time() IOU = {} total_iou = 0 for k in data_labels_paths.keys(): Rs = 0.0 for batch_idx in range(dataset_sizes[k][1] // config.batch_size): data_, labels = next(test_gen_objs[k]) data_ = data_[:, :, 0:config.top_k + 1, :, :, :] one_hot_labels = prepare_input_op(labels, len(generator.unique_draw)) one_hot_labels = Variable(torch.from_numpy(one_hot_labels), volatile=True).cuda() data = Variable(torch.from_numpy(data_)).cuda()
def infer_progams(csgnet, train_dataset, val_dataset): parser = ParseModelOutput(unique_draws, max_len // 2 + 1, max_len, [64, 64, 64], primitives=primitives) csgnet.eval() datasets = [("train", train_dataset), ("val", val_dataset)] dataset_expressions = [] dataset_stacks = [] dataset_labels = [] for name, dataset in datasets: predicted_expressions = [] predicted_stacks = [] predicted_labels = [] IOU = {} total_iou = 0 Rs = 0.0 batch_idx = 0 count = 0 for batch in dataset: with torch.no_grad(): print(f"batch {batch_idx}/{len(train_dataset)}") batch_idx += 1 count += len(batch) vis_voxels(batch.squeeze().numpy()[:5], "gt") batch = batch.to(device) outputs = csgnet.test2(batch, max_len) labels = [ torch.max(o, 1)[1].data.cpu().numpy() for o in outputs ] labels += [np.full((len(batch), ), len(unique_draws) - 1)] stack, _, expressions = parser.get_final_canvas( outputs, if_pred_images=True, if_just_expressions=False) vis_voxels(stack[:5], "gen") break predicted_expressions += expressions predicted_stacks.append(stack) predicted_labels.append(np.stack(labels).transpose()) # stacks = parser.expression2stack(expressions) data_ = batch.squeeze().cpu().numpy() R = np.sum(np.logical_and(stack, data_), (1, 2, 3)) / (np.sum(np.logical_or(stack, data_), (1, 2, 3)) + 1) Rs += np.sum(R) IOU = Rs / count print(f"IOU on ShapeNet {name}: {IOU}") dataset_expressions.append(predicted_expressions) dataset_stacks.append(np.concatenate(predicted_stacks, axis=0)) dataset_labels.append(np.concatenate(predicted_labels, axis=0)) train_samples = list(zip(dataset_labels[0], list(dataset_stacks[0]))) val_samples = list(zip(dataset_labels[1], list(dataset_stacks[1]))) train_dataset = DataLoader(train_samples, batch_size=config.batch_size, shuffle=True, collate_fn=_col) val_dataset = DataLoader(val_samples, batch_size=config.batch_size, shuffle=False, collate_fn=_col) return train_dataset, val_dataset
def train_model(csgnet, train_dataset, val_dataset, max_epochs=None): if max_epochs is None: epochs = 100 else: epochs = max_epochs optimizer = optim.Adam( [para for para in csgnet.parameters() if para.requires_grad], weight_decay=config.weight_decay, lr=config.lr) reduce_plat = LearningRate(optimizer, init_lr=config.lr, lr_dacay_fact=0.2, lr_decay_epoch=3, patience=config.patience) best_state_dict = None patience = 3 prev_test_loss = 1e20 prev_test_reward = 0 num_worse = 0 for epoch in range(100): train_loss = 0 Accuracies = [] csgnet.train() # Number of times to accumulate gradients num_accums = config.num_traj batch_idx = 0 count = 0 for batch in train_dataset: labels = np.stack([x[0] for x in batch]) data = np.stack([x[1] for x in batch]) if not len(labels) == config.batch_size: continue optimizer.zero_grad() loss_sum = Variable(torch.zeros(1)).cuda().data one_hot_labels = prepare_input_op(labels, len(unique_draws)) one_hot_labels = Variable(torch.from_numpy(one_hot_labels)).cuda() data = Variable( torch.from_numpy(data)).cuda().unsqueeze(-1).float() labels = Variable(torch.from_numpy(labels)).cuda() # forward pass outputs = csgnet.forward2([data, one_hot_labels, max_len]) loss = losses_joint(outputs, labels, time_steps=max_len + 1) / num_accums loss.backward() loss_sum += loss.data batch_idx += 1 count += len(data) if batch_idx % num_accums == 0: # Clip the gradient to fixed value to stabilize training. torch.nn.utils.clip_grad_norm_(csgnet.parameters(), 20) optimizer.step() l = loss_sum train_loss += l # print(f'train loss batch {batch_idx}: {l}') mean_train_loss = (train_loss * num_accums) / (count // config.batch_size) print(f'train loss epoch {epoch}: {float(mean_train_loss)}') del data, loss, loss_sum, train_loss, outputs test_losses = 0 acc = 0 csgnet.eval() test_reward = 0 batch_idx = 0 count = 0 for batch in val_dataset: labels = np.stack([x[0] for x in batch]) data = np.stack([x[1] for x in batch]) if not len(labels) == config.batch_size: continue parser = ParseModelOutput(unique_draws, stack_size=(max_len + 1) // 2 + 1, steps=max_len, canvas_shape=[64, 64, 64], primitives=primitives) with torch.no_grad(): one_hot_labels = prepare_input_op(labels, len(unique_draws)) one_hot_labels = Variable( torch.from_numpy(one_hot_labels)).cuda() data = Variable( torch.from_numpy(data)).cuda().unsqueeze(-1).float() labels = Variable(torch.from_numpy(labels)).cuda() test_output = csgnet.forward2([data, one_hot_labels, max_len]) l = losses_joint(test_output, labels, time_steps=max_len + 1).data test_losses += l acc += float((torch.argmax(torch.stack(test_output), dim=2).permute(1, 0) == labels).float().sum()) \ / (labels.shape[0] * labels.shape[1]) test_output = csgnet.test2(data, max_len) stack, _, _ = parser.get_final_canvas( test_output, if_pred_images=True, if_just_expressions=False) data_ = data.squeeze().cpu().numpy() R = np.sum(np.logical_and(stack, data_), (1, 2, 3)) / (np.sum(np.logical_or(stack, data_), (1, 2, 3)) + 1) test_reward += np.sum(R) batch_idx += 1 count += len(data) test_reward = test_reward / count test_loss = test_losses / (count // config.batch_size) acc = acc / (count // config.batch_size) if test_loss < prev_test_loss: prev_test_loss = test_loss best_state_dict = csgnet.state_dict() num_worse = 0 else: num_worse += 1 if num_worse >= patience: csgnet.load_state_dict(best_state_dict) break print(f'test loss epoch {epoch}: {float(test_loss)}') print(f'test IOU epoch {epoch}: {test_reward}') print(f'test acc epoch {epoch}: {acc}') if config.if_schedule: reduce_plat.reduce_on_plateu(-test_reward) del test_losses, test_output if test_reward > prev_test_reward: prev_test_reward = test_reward
class WakeSleepGen: def __init__(self, labels_path, batch_size=100, train_size=10000, canvas_shape=[64, 64], max_len=13, self_training=False): self.labels = torch.load(labels_path + "labels.pt", map_location=device) if isinstance(self.labels, np.ndarray): self.labels = torch.from_numpy(self.labels).to(device) self.labels = self.labels.long() self.self_training = self_training if self_training: self.images = torch.load(labels_path + "images.pt") # pad labels with a stop symbol, should be correct but need to confirm this # since infer_programs currently outputs len 13 labels self.labels = F.pad(self.labels, (0, 1), 'constant', 399) self.train_size = train_size self.max_len = max_len self.canvas_shape = canvas_shape self.batch_size = batch_size with open("terminals.txt", "r") as file: self.unique_draw = file.readlines() for index, e in enumerate(self.unique_draw): self.unique_draw[index] = e[0:-1] self.parser = ParseModelOutput(self.unique_draw, self.max_len // 2 + 1, self.max_len, canvas_shape) self.expressions = self.parser.labels2exps(self.labels, self.labels.shape[1]) # Remove the stop symbol and later part of the expression for index, exp in enumerate(self.expressions): self.expressions[index] = exp.split("$")[0] self.correct_programs = [] def get_train_data(self): while True: # # full shuffle, only effective if train/test size smaller than inferred programs # ids = np.arange(len(self.expressions)) # np.random.shuffle(ids) # self.expressions = [self.expressions[index] for index in ids] # self.labels = self.labels[ids] self.correct_programs = [] ids = np.arange(self.train_size) np.random.shuffle(ids) for i in range(0, self.train_size, self.batch_size): stacks = [] batch_exp = [ self.expressions[index] for index in ids[i:i + self.batch_size] ] batch_labels = self.labels[ids[i:i + self.batch_size]] if self.self_training: batch_images = self.images[ids[i:i + self.batch_size]] for index, exp in enumerate(batch_exp): program = self.parser.Parser.parse(exp) # Check the validity of the expressions if validity(program, len(program), len(program) - 1): self.correct_programs.append(index) else: # stack = np.zeros( # (self.max_len + 1, self.max_len // 2 + 1, self.canvas_shape[0], # self.canvas_shape[1])) stack = np.zeros((64, 64)) stacks.append(stack) continue if not self.self_training: self.parser.sim.generate_stack(program) stack = self.parser.sim.stack_t stack = np.stack(stack, axis=0) # pad if the program was shorter than the max_len since csgnet can only train on fixed sizes stack = np.pad( stack, (((self.max_len + 1) - stack.shape[0], 0), (0, 0), (0, 0), (0, 0))) stack = stack[-1, 0, :, :] stacks.append(stack) if not self.self_training: stacks = np.stack(stacks, 0).astype(dtype=np.float32) else: stacks = batch_images # # data needs to be (program_len + 1, dataset_size, stack_length, canvas_height, canvas_width) # batch_data = torch.from_numpy(stacks).permute(1, 0, 2, 3, 4) batch_data = torch.from_numpy(stacks) yield (batch_data, batch_labels)
13: [370000, 1000 * proportion] } generator = MixedGenerateData(data_labels_paths={ 3: data_labels_paths[3], 5: data_labels_paths[5] }, batch_size=config.batch_size, canvas_shape=config.canvas_shape) assert len(generator.unique_draw) == 400 data_labels_paths = {3: data_labels_paths[3]} dataset_sizes = {3: dataset_sizes[3]} max_len = max(data_labels_paths.keys()) parser = ParseModelOutput(generator.unique_draw, max_len // 2 + 1, max_len, config.canvas_shape) # total size according to the test batch size. total_size = 0 config.test_size = sum(dataset_sizes[k][1] for k in dataset_sizes.keys()) for k in dataset_sizes.keys(): test_batch_size = config.batch_size total_size += (dataset_sizes[k][1] // test_batch_size) * test_batch_size over_all_CD = {} Pred_Prog = [] Targ_Prog = [] metrics = {} programs_tar = {} programs_pred = {}
import random import numpy as np from src.Models.models import ParseModelOutputGenData, validity, ParseModelOutput from src.Generator.parser import Parser import deepdish as dd from vis_voxels import vis_voxels from copy import deepcopy max_ops = 10 max_len = (max_ops * 2) + 1 parser = ParseModelOutput(max_len // 2 + 1, max_len, [64, 64, 64]) with open("draws_cuboids.txt", "r") as file: unique_draws = file.readlines() unique_draws = [x.strip() for x in unique_draws] other_parser = Parser() ops = ["+", "-", "*"] def get_voxels(exp): program = other_parser.parse(exp) parser.sim.generate_stack(program, start_scratch=False) stack = parser.sim.stack_t stack = np.stack(stack, axis=0)[-1, 0, :, :] return stack def clear_stack(): parser.sim.stack_t = [] parser.sim.stack.clear() parser.sim.stack_t.append(parser.sim.stack.get_items())
def optimize_expression(query_exp: string, target_image: np.ndarray, metric="iou", stack_size=7, steps=15, max_iter=100): """ A helper function for visually guided search. This takes the target image (or test image) and predicted expression from CSGNet and returns the final chamfer distance and optmized program with least chamfer distance possible. :param query_exp: program expression :param target_image: numpy array of test image :param metric: metric to minimize while running the optimizer, "chamfer" :param stack_size: max stack size of the program required :param steps: max number of time step present in any program :param max_iter: max iteration for which to run the program. :return: """ # a parser to parse the input expressions. parser = ParseModelOutput(canvas_shape=[64, 64], stack_size=stack_size, unique_draws=None, steps=steps) program = parser.Parser.parse(query_exp) if not validity(program, len(program), len(program) - 1): return query_exp, 16 x = [] for p in program: if p["value"] in ["c", "s", "t"]: x += [int(t) for t in p["param"]] optimizer = Optimize(query_exp, metric=metric, stack_size=stack_size, steps=steps) optimizer.get_target_image(target_image) if max_iter == None: # None will stop when tolerance hits, not based on maximum iterations res = minimize(optimizer.objective, x, method="Powell", tol=0.0001, options={ "disp": False, 'return_all': False }) else: # This will stop when max_iter hits res = minimize(optimizer.objective, x, method="Powell", tol=0.0001, options={ "disp": False, 'return_all': False, "maxiter": max_iter }) final_value = res.fun res = res.x.astype(np.int) for i in range(2, res.shape[0], 3): res[i] = np.clip(res[i], 8, 32) res = np.clip(res, 8, 56) predicted_exp = optimizer.make_expression(res) return predicted_exp, final_value
print(f"acc: {acc}") mean_train_loss = train_loss / (config.train_size // (config.batch_size)) print(f"epoch {epoch} mean train loss: {mean_train_loss.cpu().numpy()}") imitate_net.eval() loss = Variable(torch.zeros(1)).cuda() acc = 0 metrics = {"cos": 0, "iou": 0, "cd": 0} IOU = 0 COS = 0 CD = 0 beam_CD = 0 correct_programs = 0 pred_programs = 0 for batch_idx in range(config.test_size // (config.batch_size)): parser = ParseModelOutput(generator.unique_draw, max_len // 2 + 1, max_len, config.canvas_shape) for k in dataset_sizes.keys(): with torch.no_grad(): data_, labels = next(test_gen_objs[k]) one_hot_labels = prepare_input_op(labels, len(generator.unique_draw)) one_hot_labels = Variable( torch.from_numpy(one_hot_labels)).cuda() data = Variable(torch.from_numpy(data_)).cuda() labels = Variable(torch.from_numpy(labels)).cuda() #test_outputs = imitate_net([data, one_hot_labels, k]) #loss += (losses_joint(test_outputs, labels, time_steps=k + 1) / # (k + 1)) / types_prog test_output = imitate_net.test([data, one_hot_labels, max_len]) #acc += float((torch.argmax(torch.stack(test_output), dim=2)[:k].permute(1, 0) == labels[:, :-1]).float().sum()) \ # / (len(labels) * (k+1)) / types_prog / (config.test_size // config.batch_size)
class Optimize: """ Post processing visually guided search using Powell optimizer. """ def __init__(self, query_expression, metric="iou", stack_size=7, steps=15): """ Post processing visually guided search. :param query_expression: expression to be optimized :param metric: metric to be minimized, like chamfer :param stack_size: max stack size required in any program :param steps: max tim step of any program """ self.parser = ParseModelOutput(canvas_shape=[64, 64], stack_size=stack_size, unique_draws=None, steps=steps) self.query_expression = query_expression self.get_graph_structure(query_expression) self.metric = metric self.errors = [] def get_target_image(self, image: np.ndarray): """ Gets the target image. :param image: target image :return: """ self.target_image = image def get_graph_structure(self, expression): """ returns the nodes (terminals) of the program :param expression: input query expression :return: """ program = self.parser.Parser.parse(expression) self.graph_str = [] for p in program: self.graph_str.append(p["value"]) def make_expression(self, x: np.ndarray): expression = "" index = 0 for e in self.graph_str: if e in ["c", "s", "t"]: expression += e + "({},{},{})".format(x[index], x[index + 1], x[index + 2]) index += 3 else: expression += e return expression def objective(self, x: np.ndarray): """ Objective to minimize. :param x: input program parameters in numpy array format :return: """ x = x.astype(np.int) x = np.clip(x, 8, 56) query_exp = self.make_expression(x) query_image = self.parser.expression2stack([query_exp])[-1, 0, 0, :, :] if self.metric == "iou": error = -np.sum(np.logical_and( self.target_image, query_image)) / np.sum( np.logical_or(self.target_image, query_image)) elif self.metric == "chamfer": error = chamfer(np.expand_dims(self.target_image, 0), np.expand_dims(query_image, 0)) return error
l.cpu().numpy(), epoch * (config.train_size // (config.batch_size * num_accums)) + batch_idx) mean_train_loss = train_loss / (config.train_size // (config.batch_size * num_accums)) log_value('train_loss', mean_train_loss.cpu().numpy(), epoch) del data, loss, loss_sum, train_loss, outputs test_losses = 0 imitate_net.eval() test_reward = 0 for batch_idx in range(config.test_size // config.batch_size): for k in data_labels_paths.keys(): parser = ParseModelOutput(generator.unique_draw, stack_size=(k + 1) // 2 + 1, steps=k, canvas_shape=[64, 64, 64], primitives=generator.primitives) data_, labels = next(test_gen_objs[k]) one_hot_labels = prepare_input_op(labels, len(generator.unique_draw)) one_hot_labels = Variable(torch.from_numpy(one_hot_labels)).cuda() data = Variable(torch.from_numpy(data_[:, :, 0:config.top_k + 1, :, :, :]), volatile=True).cuda() data = data.permute(1, 0, 2, 3, 4, 5) labels = Variable(torch.from_numpy(labels)).cuda() test_output = imitate_net([data, one_hot_labels, k])
# if using multi gpu training, train and test batch size should be multiple of # number of GPU edvices. test_batch_size = config.batch_size test_gen_objs[k] = generator.get_test_data( test_batch_size, k, num_train_images=dataset_sizes[k][0], num_test_images=dataset_sizes[k][1], if_primitives=True, if_jitter=False) Target_expressions = [] Predicted_expressions = [] parser = ParseModelOutput(generator.unique_draw, max_len // 2 + 1, max_len, [64, 64, 64], primitives=generator.primitives) imitate_net.eval() Rs = 0 t1 = time.time() IOU = {} total_iou = 0 for k in data_labels_paths.keys(): Rs = 0.0 for batch_idx in range(dataset_sizes[k][1] // config.batch_size): print(batch_idx) data_, labels = next(test_gen_objs[k]) data_ = data_[:, :, 0:config.top_k + 1, :, :, :] one_hot_labels = prepare_input_op(labels, len(generator.unique_draw)) one_hot_labels = Variable(torch.from_numpy(one_hot_labels), volatile=True).cuda()