def main(_): pp.pprint(flags.FLAGS.__flags) if not os.path.exists(FLAGS.checkpoint_dir): os.makedirs(FLAGS.checkpoint_dir) if not os.path.exists(FLAGS.sample_dir): os.makedirs(FLAGS.sample_dir) with tf.Session() as sess: if FLAGS.dataset == 'mnist': dcgan = DCGAN(sess, image_size=FLAGS.image_size, batch_size=FLAGS.batch_size,y_dim=10, dataset_name=FLAGS.dataset, is_crop=FLAGS.is_crop, checkpoint_dir=FLAGS.checkpoint_dir, sample_dir = FLAGS.sample_dir, mid_epoch = FLAGS.mid_epoch) else: dcgan = DCGAN(sess, image_size=FLAGS.image_size, input_size=FLAGS.input_size, batch_size=FLAGS.batch_size, report_freq=FLAGS.report_freq, dataset_name=FLAGS.dataset, is_crop=FLAGS.is_crop, checkpoint_dir=FLAGS.checkpoint_dir, sample_dir = FLAGS.sample_dir, mid_epoch = FLAGS.mid_epoch, large_data_path = FLAGS.large_data_path, small_data_path = FLAGS.small_data_path) if FLAGS.is_train: dcgan.train(FLAGS) else: dcgan.load(FLAGS.checkpoint_dir) if FLAGS.visualize: to_json("./web/js/layers.js", [dcgan.h0_w, dcgan.h0_b, dcgan.g_bn0], [dcgan.h1_w, dcgan.h1_b, dcgan.g_bn1], [dcgan.h2_w, dcgan.h2_b, dcgan.g_bn2], [dcgan.h3_w, dcgan.h3_b, dcgan.g_bn3], [dcgan.h4_w, dcgan.h4_b, None]) # Below is codes for visualization OPTION = 2 visualize(sess, dcgan, FLAGS, OPTION)
def tes_mnist(epoch): # 在测试集上检验效果 feature_list = [] label_list = [] with torch.no_grad(): eval_loss = 0 eval_acc = 0 EbeddingNet.eval() # 将模型改为预测模式 with torch.no_grad(): for images, label in test_loader: images = images.cuda() label = label.cuda() feature = EbeddingNet(images, epoch) #out = head(feature) out, loss = head(feature, label) feature_list.append(feature) label_list.append(label) #loss = myCrossEntropyLoss(out, label) eval_loss += loss.item() _, pred = out.max(1) num_correct = (pred == label).sum().item() acc = float(num_correct) / images.shape[0] eval_acc += acc feats = torch.cat(feature_list, 0) labels = torch.cat(label_list, 0) if name == 'CenterFace': visualize_center(head.center, feats, labels, epoch, writer, name) else: visualize(feats, labels, epoch, writer, name) visualize_cos(feats, labels, epoch, writer, head, name) return eval_acc, eval_loss
def main(_): pp.pprint(flags.FLAGS.__flags) if not os.path.exists(FLAGS.checkpoint_dir): os.makedirs(FLAGS.checkpoint_dir) if not os.path.exists(FLAGS.sample_dir): os.makedirs(FLAGS.sample_dir) with tf.Session() as sess: dcgan = DCGAN(sess, image_size=FLAGS.image_size, output_size=FLAGS.output_size, batch_size=FLAGS.batch_size, sample_size=FLAGS.sample_size) if FLAGS.is_train: dcgan.train(FLAGS) else: dcgan.load(FLAGS.checkpoint_dir) if FLAGS.visualize: # to_json("./web/js/layers.js", [dcgan.h0_w, dcgan.h0_b, dcgan.g_bn0], # [dcgan.h1_w, dcgan.h1_b, dcgan.g_bn1], # [dcgan.h2_w, dcgan.h2_b, dcgan.g_bn2], # [dcgan.h3_w, dcgan.h3_b, dcgan.g_bn3], # [dcgan.h4_w, dcgan.h4_b, None]) # Below is codes for visualization OPTION = 2 visualize(sess, dcgan, FLAGS, OPTION)
def visualize_predicts(predicts, images_path, labels, masks, height, width, name): """ right: green error: red missing: blue """ if not path.exists('./logs/' + name + '/predicts/'): system('mkdir ./logs/' + name + '/predicts/') images = load_hdf5(images_path + 'test_images.hdf5') pad_height = labels.shape[1] pad_width = labels.shape[2] for i in range(len(predicts)): vis = np.zeros((pad_height, pad_width, 3)) right = predicts[i] * np.squeeze(labels[i]) * np.squeeze(masks[i]) error = predicts[i] - right missing = np.squeeze(labels[i]) - right vis[:, :, 0] = error vis[:, :, 1] = right vis[:, :, 2] = missing vis = np.concatenate((images[i], vis[:height, :width] * 255), axis=0) visualize(vis, './logs/' + name + '/predicts/' + str(i + 1) + '.png')
def main(_): pp.pprint(flags.FLAGS.__flags) if FLAGS.input_width is None: FLAGS.input_width = FLAGS.input_height if FLAGS.output_width is None: FLAGS.output_width = FLAGS.output_height if not os.path.exists(FLAGS.checkpoint_dir): os.makedirs(FLAGS.checkpoint_dir) if not os.path.exists(FLAGS.sample_dir): os.makedirs(FLAGS.sample_dir) #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) run_config = tf.ConfigProto() run_config.gpu_options.allow_growth=True with tf.Session(config=run_config) as sess: if FLAGS.dataset == 'birds': dcgan = DCGAN( sess, input_width=FLAGS.input_width, input_height=FLAGS.input_height, output_width=FLAGS.output_width, output_height=FLAGS.output_height, batch_size=FLAGS.batch_size, sample_num=FLAGS.batch_size, y_dim=None, z_dim=FLAGS.generate_test_images, dataset_name=FLAGS.dataset, input_fname_pattern=FLAGS.input_fname_pattern, crop=FLAGS.crop, checkpoint_dir=FLAGS.checkpoint_dir, sample_dir=FLAGS.sample_dir) else: dcgan = DCGAN( sess, input_width=FLAGS.input_width, input_height=FLAGS.input_height, output_width=FLAGS.output_width, output_height=FLAGS.output_height, batch_size=FLAGS.batch_size, sample_num=FLAGS.batch_size, z_dim=FLAGS.generate_test_images, dataset_name=FLAGS.dataset, input_fname_pattern=FLAGS.input_fname_pattern, crop=FLAGS.crop, checkpoint_dir=FLAGS.checkpoint_dir, sample_dir=FLAGS.sample_dir) show_all_variables() if FLAGS.train: dcgan.train(FLAGS) else: if not dcgan.load(FLAGS.checkpoint_dir)[0]: raise Exception("[!] Train a model first, then run test mode") OPTION = 1 visualize(sess, dcgan, FLAGS, OPTION)
def main(_): pp.pprint(flags.FLAGS.__flags) if FLAGS.input_width is None: FLAGS.input_width = FLAGS.input_height if FLAGS.output_width is None: FLAGS.output_width = FLAGS.output_height # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) run_config = tf.ConfigProto() run_config.gpu_options.allow_growth = True with tf.Session(config=run_config) as sess: HiDDen = Hidden( sess, input_width=FLAGS.input_width, input_height=FLAGS.input_height, output_width=FLAGS.output_width, output_height=FLAGS.output_height, batch_size=FLAGS.batch_size, dataset_name=FLAGS.dataset, ) HiDDen.train() OPTION = 1 visualize(sess, Hidden, FLAGS, OPTION)
def main(_): loader = Loader(FLAGS.data_dir, FLAGS.data, FLAGS.batch_size) print("# of data: {}".format(loader.data_num)) with tf.Session() as sess: lsgan = LSGAN([FLAGS.batch_size, 112, 112, 3]) sess.run(tf.global_variables_initializer()) for epoch in range(10000): loader.reset() for step in range(int(loader.batch_num / FLAGS.d)): if (step == 0 and epoch % 1 == 100): utils.visualize(sess.run(lsgan.gen_img), epoch) for _ in range(FLAGS.d): batch = np.asarray(loader.next_batch(), dtype=np.float32) batch = (batch - 127.5) / 127.5 #print("{}".format(batch.shape)) feed = {lsgan.X: batch} _ = sess.run(lsgan.d_train_op, feed_dict=feed) #utils.visualize(batch, (epoch+1)*100) #cv2.namedWindow("window") #cv2.imshow("window", cv2.cvtColor(batch[0], cv2.COLOR_RGB2BGR)) #cv2.waitKey(0) #cv2.destroyAllWindows() _ = sess.run(lsgan.g_train_op)
def main(_): if FLAGS.input_width is None: FLAGS.input_width = FLAGS.input_height if FLAGS.output_width is None: FLAGS.output_width = FLAGS.output_height if not os.path.exists(FLAGS.checkpoint_dir): os.makedirs(FLAGS.checkpoint_dir) if not os.path.exists(FLAGS.sample_dir): os.makedirs(FLAGS.sample_dir) # Prevent tensorflow from allocating the totality of a GPU memory run_config = tf.ConfigProto() run_config.gpu_options.allow_growth=True with tf.Session(config=run_config) as sess: dcgan = DCGAN(sess, flags=FLAGS) show_all_variables() if FLAGS.train: dcgan.train(FLAGS) else: if not dcgan.load(FLAGS.checkpoint_dir)[0]: raise Exception("[!] Train a model first, then run test mode") visualize(sess, dcgan, FLAGS)
def _train_2(self, args, epoch, old_disc, disc, gen, train_loader, optimizer_d, train_output_dir): cls_criterion = nn.CrossEntropyLoss() _loss_g, _loss_cls_gen, _loss_adv_gen = 0., 0., 0. _loss_d, _loss_cls, _loss_adv = 0., 0., 0. ypred, ypred_gen = [], [] ytrue, ytrue_gen = [], [] # gen_loss_lst = [] for i, (inputs, featmaps, targets) in enumerate(train_loader): inputs, featmaps, targets = inputs.to(args.device), featmaps.to( args.device), targets.to(args.device) # Optimize Discriminator loss = 0 optimizer_d.zero_grad() feats, logits_cls, logits_adv = old_disc(featmaps) gen_image = gen(feats.unsqueeze(2).unsqueeze(3).detach()) feats_gen, logits_cls_gen, logits_adv_gen = disc(gen_image) loss_cls_gen = cls_criterion(logits_cls_gen, targets.long()) loss = loss_cls_gen _loss_cls_gen += loss_cls_gen.item() preds_gen = F.softmax(logits_cls_gen, dim=1).argmax(dim=1).cpu().numpy().tolist() ypred_gen.extend(preds_gen) ytrue_gen.extend(targets) loss.backward() optimizer_d.step() feats, logits_cls, logits_adv = disc(featmaps) loss_cls = cls_criterion(logits_cls, targets.long()) _loss_cls += loss_cls.item() preds = F.softmax(logits_cls, dim=1).argmax(dim=1).cpu().numpy().tolist() ypred.extend(preds) ytrue.extend(targets) acc = round((np.array(ypred) == np.array(ytrue)).sum() / len(ytrue), 4) acc_gen = round((np.array(ypred_gen) == np.array(ytrue_gen)).sum() / len(ytrue_gen), 4) if epoch % args.visualize_freq == 0 or epoch == args.epochs + 1: print("Epoch {}, Training Iteration {}".format(epoch, i)) print("Accuracy: {}, Accuracy gen: {}".format(acc, acc_gen)) print("Loss_cls: {}, Loss_cls_gen: {}".format( _loss_cls / (i + 1), _loss_cls_gen / (i + 1))) visualize(featmaps[0], gen_image[0], out_dir=train_output_dir + str(epoch) + "_" + str(i) + ".jpg") return { "Train_acc": acc, "Train_acc_gen": acc_gen, "Loss_cls": _loss_cls / (i + 1), "Loss_cls_gen": _loss_cls_gen / (i + 1) }
def main(_): pp.pprint(flags.FLAGS.__flags) if FLAGS.output_width is None: FLAGS.output_width = FLAGS.output_height assert (os.path.exists(FLAGS.checkpoint_dir)) run_config = tf.ConfigProto() run_config.gpu_options.allow_growth = True with tf.Session(config=run_config) as sess: dcgan = DCGAN(sess, output_width=FLAGS.output_width, output_height=FLAGS.output_height, batch_size=FLAGS.batch_size, dataset_name=FLAGS.dataset, checkpoint_dir=FLAGS.checkpoint_dir, lam=FLAGS.lam) #dcgan.load(FLAGS.checkpoint_dir): dcgan.complete(FLAGS) show_all_variables() # to_json("./web/js/layers.js", [dcgan.h0_w, dcgan.h0_b, dcgan.g_bn0], # [dcgan.h1_w, dcgan.h1_b, dcgan.g_bn1], # [dcgan.h2_w, dcgan.h2_b, dcgan.g_bn2], # [dcgan.h3_w, dcgan.h3_b, dcgan.g_bn3], # [dcgan.h4_w, dcgan.h4_b, None]) # Below is codes for visualization OPTION = 1 visualize(sess, dcgan, FLAGS, OPTION)
def main(_): pp.pprint(flags.FLAGS.__flags) if not os.path.exists(FLAGS.checkpoint_dir): os.makedirs(FLAGS.checkpoint_dir) if not os.path.exists(FLAGS.sample_dir): os.makedirs(FLAGS.sample_dir) # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) run_config = tf.ConfigProto() run_config.gpu_options.allow_growth = True with tf.Session(config=run_config) as sess: patchGan = PatchGAN( sess, input_width=FLAGS.input_width, input_height=FLAGS.input_height, output_width=FLAGS.output_width, output_height=FLAGS.output_height, batch_size=FLAGS.batch_size, sample_num=FLAGS.batch_size, y_dim=10, z_dim=FLAGS.z_dim, checkpoint_dir=FLAGS.checkpoint_dir) show_all_variables() if FLAGS.train: patchGan.train(FLAGS) else: if not patchGan.load(FLAGS.checkpoint_dir)[0]: raise Exception("[!] Train a model first, then run test mode") OPTION = 1 visualize(sess, patchGan, FLAGS, OPTION)
def visualize(model, data_loader, data_iterator, metrics, params, num_steps, random=False): """ CONTINUE NOTE: https://github.com/sharkmir1/Hierarchical-Attention-Network/blob/master/utils.py Implement attention using code from above TODO: get self.vocab and self.tag_map to view it properly but just visualize it on indices for now. 1. call visualize from eval 2. return attn_weights from model 3. implement vis using above code (test.py and utils.py) """ # call utils.visualize to visualize model.eval() for i in tqdm(range(num_steps)): data_batch, labels_batch = next(data_iterator) # print(data_batch.shape) # print(labels_batch.shape) # print(data_batch[0]) utils.visualize(model, data_batch, labels_batch, data_loader, i, view_browser=True) # exit() _ = input("ENTER to continue")
def test_viz(): data, _, dataset_info = make_mpii(Config.MPII_SOURCE, Config.MPII_DATA, "mpii") joint_info = dataset_info['joint_info'] for d in data[:5]: coords = d['coords'] # find top left top_x = 9999 top_y = 9999 bottom_x = 0 bottom_y = 0 for coord in coords: x, y = coord if x < top_x: top_x = x if x > bottom_x: bottom_x = x if y < top_y: top_y = y if y > bottom_y: bottom_y = y bbox = [(top_x, top_y), (bottom_x, bottom_y)] visualize(d['path'], d['coords'], joint_info.stick_figure_edges, bbox)
def train(loss_type): """ original_softmax_loss_network = network(0) modified_softmax_loss_network = network(1) angular_softmax_loss_network = network(2) """ # define input placeholder for network images = tf.placeholder(tf.float32, shape=[batch_size,28,28,1], name='input') labels = tf.placeholder(tf.int64, [batch_size,]) global_step = tf.Variable(0, trainable=False) add_step_op = tf.assign_add(global_step, tf.constant(1)) # about network network = model.Model(images, labels, embedding_dim, loss_type) accuracy = network.accuracy loss = network.loss # define optimizer and learning rate decay_lr = tf.train.exponential_decay(lr, global_step, 500, 0.9) optimizer = tf.train.AdamOptimizer(decay_lr) train_op = optimizer.minimize(loss) sess = tf.Session() sess.run(tf.global_variables_initializer()) # training process network.loss_type = loss_type print('training loss type %d' %loss_type) if loss_type ==0: f = open('./result/oringal_softmax_result.txt', 'w') if loss_type ==1: f = open('./result/modified_softmax_result.txt', 'w') if loss_type ==2: f = open('./result/angular_softmax_result.txt', 'w') # summary = tf.summary.FileWriter("./image/", sess.graph) for epoch in range(epochs): nlabels = np.zeros((train_batchs*batch_size,), dtype=np.int32) embeddings = np.zeros((train_batchs*batch_size, embedding_dim), dtype=np.float32) train_acc = 0. for batch in tqdm(range(train_batchs)): i,j = batch*batch_size, (batch+1)*batch_size batch_images, batch_labels = mnist.train.next_batch(batch_size) feed_dict = {images:batch_images, labels:batch_labels} _, _, batch_loss, batch_acc, embeddings[i:j,:] = sess.run([train_op, add_step_op, loss, accuracy, network.embeddings], feed_dict) nlabels[i:j] = batch_labels f.write(" ".join(map(str,[batch_acc, batch_loss]))+ "\n") # print(batch_acc) train_acc += batch_acc train_acc /= train_batchs print("epoch %2d---------------------------train accuracy:%.4f" %(epoch+1, train_acc)) visualize(embeddings, nlabels, epoch, train_acc, picname="./image/%d/%d.jpg"%(loss_type, epoch)) # testing process test_acc = 0. embeddings = np.zeros((test_batchs*batch_size, embedding_dim), dtype=np.float32) nlabels = np.zeros(shape=(test_batchs*batch_size,), dtype=np.int32) for batch in range(test_batchs): i,j = batch*batch_size, (batch+1)*batch_size batch_images, batch_labels = mnist.test.next_batch(batch_size) feed_dict = {images:batch_images, labels:batch_labels} _, batch_loss, batch_acc, embeddings[i:j,:] = sess.run([train_op, loss, accuracy, network.embeddings], feed_dict) nlabels[i:j] = batch_labels test_acc += batch_acc test_acc /= test_batchs print("test accuracy: %.4f" %test_acc) return test_acc, embeddings, nlabels
def evaluate(epoch, disease): try: os.mkdir('./output_{}'.format(disease)) except: pass model.eval() sub_RNA, sub_miRNA = wrapper(RNA), wrapper(miRNA) recon_batch, mu, logvar = model(sub_RNA, sub_miRNA) if args.cuda: hidden = mu.cpu().data.numpy() else: hidden = mu.data.numpy() if survival.shape[-1] == 3: p_value, ratio, surv_info = cox(hidden, survival, epoch) print('Epoch %d ====> Cox P-value: %.7f' % (epoch, p_value)) surv_info.to_csv('./output_{}/survieinfo_{}.csv'.format( disease, epoch), index=False) np.savetxt('./output_{}/latent_{}.csv'.format(disease, epoch), hidden, delimiter='\t') else: # TO-DO: for all data types = set(survival[:, -1].tolist()) for type_ in types: indices = np.where(survival[:, -1] == type_)[0] p_value, ratio = cox(hidden[indices], survival[indices]) print('Epoch %d ====> Type: %s, Cox P-value: %.7f' % (epoch, type_, p_value)) visualize(hidden, survival[:, -1], outfp='%d.png' % (epoch)) return p_value
def main(_): pp.pprint(flags.FLAGS.__flags) sample_dir_ = os.path.join(FLAGS.sample_dir, FLAGS.name) checkpoint_dir_ = os.path.join(FLAGS.checkpoint_dir, FLAGS.name) log_dir_ = os.path.join(FLAGS.log_dir, FLAGS.name) if not os.path.exists(checkpoint_dir_): os.makedirs(checkpoint_dir_) if not os.path.exists(sample_dir_): os.makedirs(sample_dir_) if not os.path.exists(log_dir_): os.makedirs(log_dir_) with tf.Session() as sess: if FLAGS.dataset == 'mnist': dcgan = DCGAN(sess, config=FLAGS, batch_size=FLAGS.batch_size, output_size=28, c_dim=1, dataset_name=FLAGS.dataset, is_crop=FLAGS.is_crop, checkpoint_dir=checkpoint_dir_, sample_dir=sample_dir_, log_dir=log_dir_) else: dcgan = DCGAN(sess, image_size=FLAGS.image_size, batch_size=FLAGS.batch_size, output_size=FLAGS.output_size, c_dim=FLAGS.c_dim, dataset_name=FLAGS.dataset, is_crop=FLAGS.is_crop, checkpoint_dir=FLAGS.checkpoint_dir, sample_dir=FLAGS.sample_dir) if FLAGS.is_train: dcgan.train(FLAGS) else: dcgan.sampling(FLAGS) if FLAGS.visualize: to_json("./web/js/layers.js", [dcgan.h0_w, dcgan.h0_b, dcgan.g_bn0], [dcgan.h1_w, dcgan.h1_b, dcgan.g_bn1], [dcgan.h2_w, dcgan.h2_b, dcgan.g_bn2], [dcgan.h3_w, dcgan.h3_b, dcgan.g_bn3], [dcgan.h4_w, dcgan.h4_b, None]) # Below is codes for visualization OPTION = 2 visualize(sess, dcgan, FLAGS, OPTION)
def evaluate(epoch): model.eval() sub_RNA, sub_methy, sub_miRNA = wrapper(RNA), wrapper( methylation), wrapper(miRNA) recon_batch, mu, logvar = model(sub_RNA, sub_methy, sub_miRNA) if args.cuda: hidden = mu.cpu().data.numpy() else: hidden = mu.data.numpy() if survival.shape[-1] == 3: p_value, ratio, surv_info = cox(hidden, survival) print('Epoch %d ====> Cox P-value: %.7f' % (epoch, p_value)) surv_info.to_csv('./temp/survie_info_{}.csv'.format(epoch), index=False) else: # TO-DO: for all data types = set(survival[:, -1].tolist()) for type_ in types: indices = np.where(survival[:, -1] == type_)[0] p_value, ratio, surv_info = cox(hidden[indices], survival[indices]) print('Epoch %d ====> Type: %s, Cox P-value: %.7f' % (epoch, type_, p_value)) visualize(hidden, survival[:, -1], outfp='%d.png' % (epoch)) return p_value
def main(_): pp.pprint(flags.FLAGS.__flags) if not os.path.exists(FLAGS.checkpoint_dir): os.makedirs(FLAGS.checkpoint_dir) if not os.path.exists(FLAGS.sample_dir): os.makedirs(FLAGS.sample_dir) with tf.Session() as sess: if FLAGS.dataset == 'mnist': dcgan = DCGAN(sess, image_size=FLAGS.image_size, batch_size=FLAGS.batch_size, y_dim=10, dataset_name=FLAGS.dataset, is_crop=FLAGS.is_crop, checkpoint_dir=FLAGS.checkpoint_dir) else: dcgan = DCGAN(sess, image_size=FLAGS.image_size, batch_size=FLAGS.batch_size, dataset_name=FLAGS.dataset, is_crop=FLAGS.is_crop, checkpoint_dir=FLAGS.checkpoint_dir) if FLAGS.is_train: dcgan.train(FLAGS) else: dcgan.load(FLAGS.checkpoint_dir) to_json("./web/js/layers.js", [dcgan.h0_w, dcgan.h0_b, dcgan.g_bn0], [dcgan.h1_w, dcgan.h1_b, dcgan.g_bn1], [dcgan.h2_w, dcgan.h2_b, dcgan.g_bn2], [dcgan.h3_w, dcgan.h3_b, dcgan.g_bn3], [dcgan.h4_w, dcgan.h4_b, None]) # Below is codes for visualization OPTION = 2 visualize(sess, dcgan, FLAGS, OPTION)
def main(path): train_images, train_labels = utils.load_training_data(path) test_images, test_labels = utils.load_test_data(path) n_classes = 10 model = KMeans(n_clusters=n_classes, n_init=1, max_iter=1) model.fit(train_images) # which images are assigned to each cluster: # 1. check all data points assigned to each cluster # 2. check actual labels of the data points assigned to each cluster # 3. assign the mode of actual labels to be the label for that cluster cluster_label_dict = {} for cluster_num in range(n_classes): idx = utils.cluster_indices(cluster_num, model.labels_) original_labels = np.take(train_labels, idx) mode = stats.mode(original_labels)[0][0] cluster_label_dict.update({cluster_num: mode}) # prediction predicted_cluster = model.predict(test_images) predicted_labels = np.vectorize(cluster_label_dict.get)(predicted_cluster) accuracy = utils.classification_accuracy(predicted_labels, test_labels) print(" K means clustering accuracy for cifar 10 = {}".format(accuracy)) # visualise clusters cluster_centroids = model.cluster_centers_ utils.visualize(cluster_centroids)
def main(_): pp.pprint(flags.FLAGS.__flags) if not os.path.exists(FLAGS.checkpoint_dir): os.makedirs(FLAGS.checkpoint_dir) if not os.path.exists(FLAGS.sample_dir): os.makedirs(FLAGS.sample_dir) with tf.Session() as sess: if FLAGS.dataset == 'mnist': dcgan = DCGAN(sess, image_size=FLAGS.image_size, batch_size=FLAGS.batch_size, y_dim=10, output_size=28, c_dim=1, dataset_name=FLAGS.dataset, is_crop=FLAGS.is_crop, checkpoint_dir=FLAGS.checkpoint_dir, sample_dir=FLAGS.sample_dir) else: dcgan = DCGAN(sess, image_size=FLAGS.image_size, batch_size=FLAGS.batch_size, output_size=FLAGS.output_size, c_dim=FLAGS.c_dim, dataset_name=FLAGS.dataset, is_crop=FLAGS.is_crop, checkpoint_dir=FLAGS.checkpoint_dir, sample_dir=FLAGS.sample_dir) if FLAGS.is_train: dcgan.train(FLAGS) else: dcgan.load(FLAGS.checkpoint_dir) print(dcgan.evaluate(6400)) if FLAGS.visualize: # to_json("./web/js/layers.js", [dcgan.h0_w, dcgan.h0_b, dcgan.g_bn0], # [dcgan.h1_w, dcgan.h1_b, dcgan.g_bn1], # [dcgan.h2_w, dcgan.h2_b, dcgan.g_bn2], # [dcgan.h3_w, dcgan.h3_b, dcgan.g_bn3], # [dcgan.h4_w, dcgan.h4_b, None]) # Below is codes for visualization OPTION = 2 visualize(sess, dcgan, FLAGS, OPTION)
def main(_): pp.pprint(flags.FLAGS.__flags) if not os.path.exists(FLAGS.checkpoint_dir): os.makedirs(FLAGS.checkpoint_dir) if not os.path.exists(FLAGS.sample_dir): os.makedirs(FLAGS.sample_dir) run_config = tf.ConfigProto() run_config.gpu_options.allow_growth=True with tf.Session(config=run_config) as sess: dcgan = DCGAN( sess, input_width=FLAGS.input_width, input_height=FLAGS.input_height, output_width=FLAGS.output_width, output_height=FLAGS.output_height, batch_size=FLAGS.batch_size, sample_num=FLAGS.batch_size, y_dim=7, crop=FLAGS.crop, checkpoint_dir=FLAGS.checkpoint_dir, sample_dir=FLAGS.sample_dir) show_all_variables() if FLAGS.train: dcgan.train(FLAGS) else: if not dcgan.load(FLAGS.checkpoint_dir)[0]: raise Exception("[!] Train a model first, then run test mode") # Below is codes for visualization OPTION = 1 visualize(sess, dcgan, FLAGS)
def _iterate(self, epoch, phase): meter = Meter(phase, epoch) start = time.strftime('%H:%M:%S') print("Starting epoch: {} | phase: {} | Time: {}".format( epoch + 1, phase, start)) dl = self.dataloaders[phase] running_loss = 0.0 total_steps = len(dl) self.optimizer.zero_grad() for itr, sample in enumerate(tqdm(dl)): images = sample['image'] targets = sample['mask'] loss, outputs = self._forward(images, targets) loss /= self.accumlation_steps if phase == 'train': loss.backward() if (itr + 1) % self.accumlation_steps == 0: self.optimizer.step() self.optimizer.zero_grad() running_loss += loss.item() outputs = outputs.detach().cpu() meter.update(targets, outputs) epoch_loss = (running_loss * self.accumlation_steps) / total_steps dice, iou = epoch_log(phase, epoch, epoch_loss, meter, start) visualize(sample, outputs, epoch, phase) self.losses[phase].append(epoch_loss) self.dice_scores[phase].append(dice) self.iou_scores[phase].append(iou) return epoch_loss
def main(_): pp.print(flags.FLAGS.__flags) if FLAGS.input_width is None: FLAGS.input_width = FLAGS.input_height if FLAGS.output_width is None: FLAGS.output_width = FLAGS.output_height if not os.path.exists(FLAGS.checkpoint_dir): os.makedirs(FLAGS.checkpoint_dir) if not os.path.exists(FLAGS.sample_dir): os.makedirs(FLAGS.sample_dir) run_config = tf.ConfigProto() run_config.gpu_options.allow_growth = True with tf.Session(config=run_config) as sess: if FLAGS.dataset == 'mnist': dcgan = DCGAN( sess, input_width=FLAGS.input_width, input_height=FLAGS.input_height, output_width=FLAGS.output_width, output_height=FLAGS.output_height, batch_size=FLAGS.batch_size, sample_num=FLAGS.batch_size, y_dim=10, dataset_name=FLAGS.dataset, input_fname_pattern=FLAGS.input_fname_pattern, crop=FLAGS.crop, checkpoint_dir=FLAGS.checkpoint_dir, sample_dir=FLAGS.sample_dir ) else: dcgan = DCGAN( sess, input_width=FLAGS.input_width, input_height=FLAGS.input_height, output_width=FLAGS.output_width, output_height=FLAGS.output_height, batch_size=FLAGS.batch_size, sample_num=FLAGS.batch_size, dataset_name=FLAGS.dataset, input_fname_pattern=FLAGS.input_fname_pattern, crop=FLAGS.crop, checkpoint_dir=FLAGS.checkpoint_dir, sample_dir=FLAGS.sample_dir, ) show_all_variables() if FLAGS.train: dcgan.train(FLAGS) else: if not dcgan.load(FLAGS.checkpoint_dir)[0]: raise Exception('[!] Train a model first, then run test mode') OPTION = 1 visualize(sess, dcgan, FLAGS, OPTION)
def main(_): pp.pprint(flags.FLAGS.__flags) if not os.path.exists(FLAGS.checkpoint_dir): os.makedirs(FLAGS.checkpoint_dir) if not os.path.exists(FLAGS.sample_dir): os.makedirs(FLAGS.sample_dir) #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) run_config = tf.ConfigProto() run_config.gpu_options.allow_growth=True with tf.Session(config=run_config) as sess: if FLAGS.dataset == 'mnist': dcgan = DCGAN( sess, input_width=FLAGS.input_width, input_height=FLAGS.input_height, output_width=FLAGS.output_width, output_height=FLAGS.output_height, batch_size=FLAGS.batch_size, y_dim=10, c_dim=1, dataset_name=FLAGS.dataset, input_fname_pattern=FLAGS.input_fname_pattern, is_crop=FLAGS.is_crop, checkpoint_dir=FLAGS.checkpoint_dir, sample_dir=FLAGS.sample_dir) else: dcgan = DCGAN( sess, input_width=FLAGS.input_width, input_height=FLAGS.input_height, output_width=FLAGS.output_width, output_height=FLAGS.output_height, batch_size=FLAGS.batch_size, c_dim=FLAGS.c_dim, dataset_name=FLAGS.dataset, input_fname_pattern=FLAGS.input_fname_pattern, is_crop=FLAGS.is_crop, checkpoint_dir=FLAGS.checkpoint_dir, sample_dir=FLAGS.sample_dir) if FLAGS.is_train: dcgan.train(FLAGS) else: if not dcgan.load(FLAGS.checkpoint_dir): raise Exception("[!] Train a model first, then run test mode") # to_json("./web/js/layers.js", [dcgan.h0_w, dcgan.h0_b, dcgan.g_bn0], # [dcgan.h1_w, dcgan.h1_b, dcgan.g_bn1], # [dcgan.h2_w, dcgan.h2_b, dcgan.g_bn2], # [dcgan.h3_w, dcgan.h3_b, dcgan.g_bn3], # [dcgan.h4_w, dcgan.h4_b, None]) # Below is codes for visualization OPTION = 1 visualize(sess, dcgan, FLAGS, OPTION)
def main(_): pp.pprint(flags.FLAGS.__flags) if not os.path.exists(FLAGS.checkpoint_dir): os.makedirs(FLAGS.checkpoint_dir) if not os.path.exists(FLAGS.sample_dir): os.makedirs(FLAGS.sample_dir) # set up GPU properties: if FLAGS.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = str(FLAGS.gpu_id) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.4 config.allow_soft_placement = True else: config = tf.ConfigProto() with tf.Session(config=config) as sess: if FLAGS.dataset == 'mnist': dcgan = DCGAN(sess, image_size=FLAGS.image_size, batch_size=FLAGS.batch_size, y_dim=10, output_size=28, c_dim=1, dataset_name=FLAGS.dataset, is_crop=FLAGS.is_crop, checkpoint_dir=FLAGS.checkpoint_dir, sample_dir=FLAGS.sample_dir) else: dcgan = DCGAN(sess, image_size=FLAGS.image_size, batch_size=FLAGS.batch_size, output_size=FLAGS.output_size, c_dim=FLAGS.c_dim, dataset_name=FLAGS.dataset, is_crop=FLAGS.is_crop, checkpoint_dir=FLAGS.checkpoint_dir, sample_dir=FLAGS.sample_dir) if FLAGS.is_train: dcgan.train(FLAGS) else: dcgan.load(FLAGS.checkpoint_dir) if FLAGS.visualize: # to_json("./web/js/layers.js", [dcgan.h0_w, dcgan.h0_b, dcgan.g_bn0], # [dcgan.h1_w, dcgan.h1_b, dcgan.g_bn1], # [dcgan.h2_w, dcgan.h2_b, dcgan.g_bn2], # [dcgan.h3_w, dcgan.h3_b, dcgan.g_bn3], # [dcgan.h4_w, dcgan.h4_b, None]) # Below is codes for visualization OPTION = 2 visualize(sess, dcgan, FLAGS, OPTION)
def Predict(self, img_path, vis=True): ''' User function: Run inference on image and visualize it. Output mask saved as output_mask.npy Args: img_path (str): Relative path to the image file vis (bool): If True, predicted mask is displayed. Returns: list: List of bounding box locations of predicted objects along with classes. ''' dirPath = "tmp_test" if (os.path.isdir(dirPath)): shutil.rmtree(dirPath) os.mkdir(dirPath) os.mkdir(dirPath + "/img_dir") os.mkdir(dirPath + "/gt_dir") os.system("cp " + img_path + " " + dirPath + "/img_dir") os.system("cp " + img_path + " " + dirPath + "/gt_dir") x_test_dir = dirPath + "/img_dir" y_test_dir = dirPath + "/gt_dir" if (self.system_dict["params"]["image_shape"][0] % 32 != 0): self.system_dict["params"]["image_shape"][0] += ( 32 - self.system_dict["params"]["image_shape"][0] % 32) if (self.system_dict["params"]["image_shape"][1] % 32 != 0): self.system_dict["params"]["image_shape"][1] += ( 32 - self.system_dict["params"]["image_shape"][1] % 32) preprocess_input = sm.get_preprocessing( self.system_dict["params"]["backbone"]) test_dataset = Dataset( x_test_dir, y_test_dir, self.system_dict["params"]["classes_dict"], classes_to_train=self.system_dict["params"]["classes_to_train"], augmentation=get_validation_augmentation( self.system_dict["params"]["image_shape"][0], self.system_dict["params"]["image_shape"][1]), preprocessing=get_preprocessing(preprocess_input), ) test_dataloader = Dataloder(test_dataset, batch_size=1, shuffle=False) image, gt_mask = test_dataset[0] image = np.expand_dims(image, axis=0) pr_mask = self.system_dict["local"]["model"].predict(image).round() np.save("output_mask.npy", pr_mask) if (vis): visualize( image=denormalize(image.squeeze()), pr_mask=pr_mask[..., 0].squeeze(), )
def _test(self, args, epoch, disc, gen, test_loader, test_output_dir): mse_criterion = nn.MSELoss() cls_criterion = nn.CrossEntropyLoss() _loss_g, _loss_cls_gen, _loss_adv_gen = 0., 0., 0. _loss_d, _loss_cls, _loss_adv = 0., 0., 0. _loss = 0. ypred, ypred_gen = [], [] ytrue, ytrue_gen = [], [] for i, (inputs, featmaps, targets) in enumerate(test_loader): loss = 0 inputs, featmaps, targets = inputs.to(args.device), featmaps.to(args.device), targets.to(args.device) feats, logits_cls, logits_adv = disc(featmaps) loss_cls = cls_criterion(logits_cls, targets.long()) loss = loss_cls _loss_cls += loss_cls.item() preds = F.softmax(logits_cls, dim=1).argmax(dim=1).cpu().numpy().tolist() ypred.extend(preds) ytrue.extend(targets) # feats, gen_targets = self._sample_vecs(inputs.shape[0]) # feats = feats.to(args.device) gen_image = gen(feats.unsqueeze(2).unsqueeze(3).detach()) feats_gen, logits_cls_gen, logits_adv_gen = disc(gen_image) loss_cls_gen = cls_criterion(logits_cls_gen, targets.long()) loss += args.cls_w*loss_cls_gen _loss_cls_gen += loss_cls_gen.item() if args.adv: loss_adv = (adversarial_loss(logits_adv, is_real=True, is_disc=True, type_=args.adv_type) + adversarial_loss(logits_adv_gen, is_real=False, is_disc=True, type_=args.adv_type)) _loss_adv += loss_adv.item() loss += args.adv_w*loss_adv.clone()/2. loss_adv_gen = adversarial_loss(logits_adv_gen, is_real=True, is_disc=False, type_=args.adv_type) _loss_adv_gen += loss_adv_gen.item() loss += args.adv_w*loss_adv_gen.clone() preds_gen = F.softmax(logits_cls_gen, dim=1).argmax(dim=1).cpu().numpy().tolist() ypred_gen.extend(preds_gen) ytrue_gen.extend(targets) _loss += loss.item() if i%10 == 0: visualize(featmaps[0], gen_image[0], out_dir = test_output_dir + str(epoch) + "_" + str(i) + ".jpg") acc = round((np.array(ypred) == np.array(ytrue)).sum() / len(ytrue), 4) acc_gen = round((np.array(ypred_gen) == np.array(ytrue_gen)).sum() / len(ytrue_gen), 4) print("Test Set Epoch {}, Training Iteration {}".format(epoch, i)) print("Accuracy: {}, Accuracy gen: {}".format(acc, acc_gen)) print("Loss: {}, Loss_cls: {}, Loss_cls_gen: {}" .format(_loss/(i+1),_loss_cls/(i+1),_loss_cls_gen/(i+1))) if args.adv: print("Loss_adv: {}, Loss_adv_gen: {}" .format(_loss_adv/(i+1),_loss_adv_gen/(i+1))) return return_statement(i, acc, acc_gen, _loss_cls, _loss_cls_gen, _loss_adv, _loss_adv_gen)
def main(_): pp.pprint(flags.FLAGS.__flags) model_dir = 'mixing_' + str( FLAGS.lr ) + '_' + FLAGS.z_dims + '_' + FLAGS.epochs + '_' + FLAGS.g_layers + '_' + FLAGS.d_layers + '_' + FLAGS.output_dims + '_' + FLAGS.feature_map_shrink + FLAGS.feature_map_growth + FLAGS.spatial_map_shrink + FLAGS.spatial_map_growth + '_' + FLAGS.loss + '_' + FLAGS.z_distr + '_' + FLAGS.activation + '_' + FLAGS.weight_init + '_' + str( FLAGS.batch_size) + '_' + str(FLAGS.g_batchnorm) + '_' + str( FLAGS.d_batchnorm) + '_' + str(FLAGS.normalize_z) + '_' + str( FLAGS.minibatch_std) + '_' + str(FLAGS.use_wscale) + '_' + str( FLAGS.use_pixnorm) + '_' + str(FLAGS.D_loss_extra) gan = growGAN(z_dims=FLAGS.z_dims, epochs=FLAGS.epochs, g_layers=FLAGS.g_layers, d_layers=FLAGS.d_layers, output_dims=FLAGS.output_dims, useAlpha=FLAGS.useAlpha, useBeta=FLAGS.useBeta, useGamma=FLAGS.useGamma, useTau=FLAGS.useTau, feature_map_shrink=FLAGS.feature_map_shrink, feature_map_growth=FLAGS.feature_map_growth, spatial_map_shrink=FLAGS.spatial_map_shrink, spatial_map_growth=FLAGS.spatial_map_growth, stage=FLAGS.stage, loss=FLAGS.loss, z_distr=FLAGS.z_distr, activation=FLAGS.activation, weight_init=FLAGS.weight_init, lr=FLAGS.lr, beta1=FLAGS.beta1, beta2=FLAGS.beta2, epsilon=FLAGS.epsilon, batch_size=FLAGS.batch_size, sample_num=FLAGS.sample_num, gpu=FLAGS.gpu, g_batchnorm=FLAGS.g_batchnorm, d_batchnorm=FLAGS.d_batchnorm, normalize_z=FLAGS.normalize_z, crop=FLAGS.crop, trainflag=FLAGS.trainflag, visualize=FLAGS.visualize, model_dir=model_dir, minibatch_std=FLAGS.minibatch_std, use_wscale=FLAGS.use_wscale, use_pixnorm=FLAGS.use_pixnorm, D_loss_extra=FLAGS.D_loss_extra, G_run_avg=FLAGS.G_run_avg) show_all_variables() if FLAGS.trainflag: gan.train() else: if not gan.load()[0]: raise Exception("[!] Train a model first, then run test mode") if FLAGS.visualize: visualize(sess, gan, FLAGS)
def source_only(encoder, classifier, discriminator, source_train_loader, target_train_loader, save_name): print("Source-only training") for epoch in range(params.epochs): print('Epoch : {}'.format(epoch)) encoder = encoder.train() classifier = classifier.train() discriminator = discriminator.train() classifier_criterion = nn.CrossEntropyLoss().cuda() start_steps = epoch * len(source_train_loader) total_steps = params.epochs * len(target_train_loader) for batch_idx, (source_data, target_data) in enumerate( zip(source_train_loader, target_train_loader)): source_image, source_label = source_data p = float(batch_idx + start_steps) / total_steps source_image = torch.cat( (source_image, source_image, source_image), 1) # MNIST convert to 3 channel source_image, source_label = source_image.cuda( ), source_label.cuda() # 32 optimizer = optim.SGD(list(encoder.parameters()) + list(classifier.parameters()), lr=0.01, momentum=0.9) optimizer = utils.optimizer_scheduler(optimizer=optimizer, p=p) optimizer.zero_grad() source_feature = encoder(source_image) # Classification loss class_pred = classifier(source_feature) class_loss = classifier_criterion(class_pred, source_label) class_loss.backward() optimizer.step() if (batch_idx + 1) % 50 == 0: print('[{}/{} ({:.0f}%)]\tClass Loss: {:.6f}'.format( batch_idx * len(source_image), len(source_train_loader.dataset), 100. * batch_idx / len(source_train_loader), class_loss.item())) if (epoch + 1) % 10 == 0: test.tester(encoder, classifier, discriminator, source_test_loader, target_test_loader, training_mode='source_only') save_model(encoder, classifier, discriminator, 'source', save_name) visualize(encoder, 'source', save_name)
def main(_): # pp.pprint(flags.FLAGS.__flags) if FLAGS.input_width is None: FLAGS.input_width = FLAGS.input_height if FLAGS.output_width is None: FLAGS.output_width = FLAGS.output_height if not tf.gfile.Exists(FLAGS.checkpointDir): tf.gfile.MakeDirs(FLAGS.checkpointDir) if not tf.gfile.Exists(FLAGS.buckets): tf.gfile.MakeDirs(FLAGS.buckets) if not tf.gfile.Exists(FLAGS.summaryDir): tf.gfile.MakeDirs(FLAGS.summaryDir) # # 针对PAI IO 优化: # 把OSS文件拷贝到运行时目录 # 如果在本地运行请跳过这一步 # if not tf.gfile.Exists(os.path.join('cope_data', FLAGS.dataset)): # tf.gfile.MakeDirs(os.path.join('cope_data', FLAGS.dataset)) # for file_path in tf.gfile.Glob(os.path.join(FLAGS.buckets, FLAGS.dataset, '*')): # tf.gfile.Copy(file_path, os.path.join('cope_data', FLAGS.dataset, os.path.basename(file_path)), overwrite=True) # FLAGS.buckets = './cope_data/' # 注意, 如果不是在PAI上可以省去上面这一步 # 请注释掉上面5行代码 # with tf.Session() as sess: dcgan = DCGAN( sess, input_width=FLAGS.input_width, input_height=FLAGS.input_height, output_width=FLAGS.output_width, output_height=FLAGS.output_height, batch_size=FLAGS.batch_size, sample_num=FLAGS.batch_size, dataset_name=FLAGS.dataset, input_fname_pattern=FLAGS.input_fname_pattern, checkpointDir=FLAGS.checkpointDir, sample_dir=FLAGS.buckets, config=FLAGS) # show_all_variables() if FLAGS.train: dcgan.train(config=FLAGS) else: if not dcgan.load(FLAGS.checkpointDir)[0]: raise Exception("[!] Train a model first, then run test mode") option = 0 # 0: 生成1 * batch_size 张图片 # 1: 生成100 * batch_size 张图片 # 2 - 4: 使用不同的模式生成GIF visualize(sess, dcgan, FLAGS, option)
def test_flip_augmentation(): data, header, dataset_info = make_mpii(source_file, data_dir) joint_info = dataset_info['joint_info'] transform = FliplrWithPairs(joint_info.mirror_mapping_pairs, 0.5) transform.debug = True transform.augmentation = None pose_dataset = PoseDataset(data, header, dataset_info, cv2_loader, 'mpii', transform=transform) for _ in range(10): data = pose_dataset[25] visualize(to_cv(data['img']), data['coords'], joint_info.stick_figure_edges)
def main(): parser = get_parser() args = parser.parse_args() if args.doc: print __doc__ sys.exit() g = geosearchclass.GeoSearchClass() if args.filename: print 'Using parameters from ' + str(args.filename) # turn parameter file into dictionary g.set_params_from_file(args.filename) else: if args.default: print 'Using default search terms' else: print 'Using parameters from params.txt' g.set_params_from_file('params.txt') g.search() # print formatted results with extra info to terminal if args.verbose: g.print_search_results() if args.output: g.write_search_results(args.output) else: g.write_search_results() if args.json: g.json_search_results(args.json) if args.visualize: import utils filtered_words = utils.tokenize_and_filter(g.search_results) utils.visualize(filtered_words)
def main(_): pp.pprint(flags.FLAGS.__flags) if not os.path.exists(FLAGS.checkpoint_dir): os.makedirs(FLAGS.checkpoint_dir) if not os.path.exists(FLAGS.sample_dir): os.makedirs(FLAGS.sample_dir) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=False)) as sess: if FLAGS.dataset == 'mnist': assert False dcgan = DCGAN(sess, image_size=FLAGS.image_size, batch_size=FLAGS.batch_size, sample_size = 64, z_dim = 8192, d_label_smooth = .25, generator_target_prob = .75 / 2., out_stddev = .075, out_init_b = - .45, image_shape=[FLAGS.image_width, FLAGS.image_width, 3], dataset_name=FLAGS.dataset, is_crop=FLAGS.is_crop, checkpoint_dir=FLAGS.checkpoint_dir, sample_dir=FLAGS.sample_dir, generator=Generator(), train_func=train, discriminator_func=discriminator, build_model_func=build_model, config=FLAGS, devices=["gpu:0", "gpu:1", "gpu:2", "gpu:3"] #, "gpu:4"] ) if FLAGS.is_train: print "TRAINING" dcgan.train(FLAGS) print "DONE TRAINING" else: dcgan.load(FLAGS.checkpoint_dir) OPTION = 2 visualize(sess, dcgan, FLAGS, OPTION)
def train_dex( NET, sgd_params, datasets): """ Do DEX training. """ initial_learning_rate = sgd_params['start_rate'] learning_rate_decay = sgd_params['decay_rate'] n_epochs = sgd_params['epochs'] batch_size = sgd_params['batch_size'] wt_norm_bound = sgd_params['wt_norm_bound'] result_tag = sgd_params['result_tag'] txt_file_name = "results_dex_{0}.txt".format(result_tag) img_file_name = "weights_dex_{0}.png".format(result_tag) # Get the training data and create arrays of start/end indices for # easy minibatch slicing Xtr = datasets[0][0] idx_range = np.arange(Xtr.get_value(borrow=True).shape[0]) Ytr_shared = theano.shared(value=idx_range) Ytr = T.cast(Ytr_shared, 'int32') tr_samples = Xtr.get_value(borrow=True).shape[0] tr_batches = int(np.ceil(float(tr_samples) / batch_size)) tr_bidx = [[i*batch_size, min(tr_samples, (i+1)*batch_size)] for i in range(tr_batches)] tr_bidx = T.cast(tr_bidx, 'int32') print "Dataset info:" print " training samples: {0:d}".format(tr_samples) print " samples/minibatch: {0:d}, minibatches/epoch: {1:d}".format( \ batch_size, tr_batches) ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lvector() # index to a [mini]batch epoch = T.scalar() # epoch counter I = T.lvector() # keys for the training examples x = NET.input # symbolic matrix for inputs to NET learning_rate = theano.shared(np.asarray(initial_learning_rate, dtype=theano.config.floatX)) dex_weight = T.scalar(name='dex_weight', dtype=theano.config.floatX) # Collect the parameters to-be-optimized opt_params = NET.proto_params # Build the expressions for the cost functions DL = NET.dex_layers[0] RL_costs = [RL.rica_cost() for RL in NET.rica_layers] NET_rica_cost = sum([rlc[0] for rlc in RL_costs]) NET_rica_reg_cost = sum([rlc[2] for rlc in RL_costs]) NET_dex_cost = DL.dex_cost(index) NET_reg_cost = NET.act_reg_cost NET_cost = NET_dex_cost + NET_reg_cost #NET_rica_cost + NET_dex_cost + NET_reg_cost NET_metrics = [NET_cost, NET_cost, NET_dex_cost, NET_reg_cost, \ NET_reg_cost] ############################################################################ # Prepare momentum and gradient variables, and construct the updates that # # Theano will perform on the network parameters. # ############################################################################ NET_grads = [] for param in opt_params: NET_grads.append(T.grad(NET_cost, param, disconnected_inputs='warn')) NET_moms = [] for param in opt_params: NET_moms.append(theano.shared(np.zeros( \ param.get_value(borrow=True).shape, dtype=theano.config.floatX))) # Compute momentum for the current epoch mom = ifelse(epoch < 500, 0.5*(1. - epoch/500.) + 0.99*(epoch/500.), 0.99) # Use a "smoothed" learning rate, to ease into optimization gentle_rate = ifelse(epoch < 10, ((epoch / 10.) * learning_rate), learning_rate) # Update the step direction using a momentus update NET_updates = OrderedDict() for i in range(len(opt_params)): NET_updates[NET_moms[i]] = mom * NET_moms[i] + (1. - mom) * NET_grads[i] # ... and take a step along that direction for i in range(len(opt_params)): param = opt_params[i] grad_i = NET_grads[i] print("grad_{0:d}.owner.op: {1:s}".format(i, str(grad_i.owner.op))) NET_param = param - (gentle_rate * NET_updates[NET_moms[i]]) # Clip the updated param to bound its norm (where applicable) if (NET.clip_params.has_key(param) and \ (NET.clip_params[param] == 1)): NET_norms = T.sum(NET_param**2, axis=1, keepdims=1) NET_scale = T.clip(T.sqrt(wt_norm_bound / NET_norms), 0., 1.) NET_updates[param] = NET_param * NET_scale else: NET_updates[param] = NET_param # updates for dex layer NET_updates[DL.W] = T.inc_subtensor(T.take(DL.W, index, axis=0), \ -gentle_rate*DL.dW) NET_updates[DL.b] = T.inc_subtensor(T.take(DL.b, index), \ -gentle_rate*DL.db) # Compile theano functions for training. These return the training cost # and update the model parameters. train_NET = theano.function(inputs=[epoch, index, dex_weight], \ outputs=NET_metrics, \ updates=NET_updates, \ givens={ x: T.take(Xtr, index, axis=0) }, \ on_unused_input='warn') # Theano function to decay the learning rate, this is separate from the # training function because we only want to do this once each epoch instead # of after each minibatch. set_learning_rate = theano.function(inputs=[], outputs=learning_rate, updates={learning_rate: learning_rate * learning_rate_decay}) ############### # TRAIN MODEL # ############### print '... training' epoch_counter = 0 start_time = time.clock() results_file = open(txt_file_name, 'wb') results_file.write("ensemble description: ") results_file.write(" **TODO: Write code for this.**\n") results_file.flush() b_index = npr.randint(0, high=tr_samples, size=(batch_size,)) train_metrics = train_NET(0, b_index, 0.0) while epoch_counter < n_epochs: ###################################################### # Process some number of minibatches for this epoch. # ###################################################### e_time = time.clock() epoch_counter = epoch_counter + 1 train_metrics = [0.0 for val in train_metrics] for minibatch_index in xrange(tr_batches): # Compute update for some joint supervised/unsupervised minibatch b_index = npr.randint(0, high=tr_samples, size=(batch_size,)) dwight = 1.0 #0.0 if (epoch_counter <= 5) else 0.1 batch_metrics = train_NET(epoch_counter, b_index, dwight) train_metrics = [a+b for (a, b) in zip(train_metrics, batch_metrics)] train_metrics = [(val / tr_batches) for val in train_metrics] # Update the learning rate new_learning_rate = set_learning_rate() # Report and save progress. print("epoch {0:d}: total={1:.4f}, rica={2:.4f}, dex={3:.4f}, reg={4:.4f}, rica_reg:{5:.4f}".format( \ epoch_counter, train_metrics[0], train_metrics[1], train_metrics[2], \ train_metrics[3], train_metrics[4])) print("--time: {0:.4f}".format((time.clock() - e_time))) # Save first layer weights to an image locally utils.visualize(NET, 0, 0, img_file_name)
def test_gc_pair(): # Simple test code, to check that everything is basically functional. print("TESTING...") # Initialize a source of randomness rng = np.random.RandomState(1234) # Load some data to train/validate/test with dataset = 'data/mnist.pkl.gz' datasets = load_udm(dataset, zero_mean=False) Xtr = datasets[0][0] tr_samples = Xtr.get_value(borrow=True).shape[0] data_dim = Xtr.get_value(borrow=True).shape[1] mm_proj_dim = 250 # Do moment matching in some transformed space #P = np.identity(data_dim) P = npr.randn(data_dim, mm_proj_dim) / np.sqrt(float(mm_proj_dim)) P = theano.shared(value=P.astype(theano.config.floatX), name='P_proj') target_mean, target_cov = projected_moments(Xtr, P, ary_type='theano') P = P.get_value(borrow=False).astype(theano.config.floatX) ########################### # Setup generator network # ########################### # Choose some parameters for the generative network gn_params = {} gn_config = [200, 1000, 1000, 28*28] gn_params['mlp_config'] = gn_config gn_params['lam_l2a'] = 1e-3 gn_params['vis_drop'] = 0.0 gn_params['hid_drop'] = 0.0 gn_params['bias_noise'] = 0.1 gn_params['out_type'] = 'bernoulli' gn_params['activation'] = relu_actfun gn_params['init_scale'] = 4.0 # Symbolic input matrix to generator network Xp_sym = T.matrix(name='Xp_sym') Xd_sym = T.matrix(name='Xd_sym') # Initialize a generator network object GN = GenNet(rng=rng, Xp=Xp_sym, prior_sigma=1.0, params=gn_params) ############################### # Setup discriminator network # ############################### # Set some reasonable mlp parameters dn_params = {} # Set up some proto-networks pc0 = [28*28, (250, 4), (250, 4), 11] dn_params['proto_configs'] = [pc0] # Set up some spawn networks sc0 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True} #sc1 = {'proto_key': 0, 'input_noise': 0.1, 'bias_noise': 0.1, 'do_dropout': True} dn_params['spawn_configs'] = [sc0] dn_params['spawn_weights'] = [1.0] # Set remaining params dn_params['ear_type'] = 2 dn_params['ear_lam'] = 0.0 dn_params['lam_l2a'] = 1e-3 dn_params['vis_drop'] = 0.2 dn_params['hid_drop'] = 0.5 # Initialize a network object to use as the discriminator DN = PeaNet(rng=rng, Xd=Xd_sym, params=dn_params) ######################################################################## # Initialize the joint controller for the generator/discriminator pair # ######################################################################## gcp_params = {} gcp_params['d_net'] = DN gcp_params['g_net'] = GN gcp_params['lam_l2d'] = 1e-2 gcp_params['mom_mix_rate'] = 0.05 gcp_params['mom_match_weight'] = 0.05 gcp_params['mom_match_proj'] = P gcp_params['target_mean'] = target_mean gcp_params['target_cov'] = target_cov # Initialize a GCPair instance using the previously constructed generator and # discriminator networks. GCP = GCPair(rng=rng, Xd=Xd_sym, Xp=Xp_sym, d_net=DN, g_net=GN, \ data_dim=28*28, params=gcp_params) gn_learn_rate = 0.02 dn_learn_rate = 0.01 GCP.set_gn_sgd_params(learn_rate=gn_learn_rate, momentum=0.98) GCP.set_dn_sgd_params(learn_rate=dn_learn_rate, momentum=0.98) # Init generator's mean and covariance estimates with many samples GCP.init_moments(10000) batch_idx = T.lvector('batch_idx') batch_sample = theano.function(inputs=[ batch_idx ], \ outputs=Xtr.take(batch_idx, axis=0)) for i in range(750000): tr_idx = npr.randint(low=0,high=tr_samples,size=(100,)).astype(np.int32) Xn_np = GN.sample_from_prior(100) Xd_batch = batch_sample(tr_idx) Xd_batch = Xd_batch.astype(theano.config.floatX) Xn_batch = Xn_np.astype(theano.config.floatX) all_idx = np.arange(200) data_idx = all_idx[:100] noise_idx = all_idx[100:] scale = min(1.0, float(i+1)/10000.0) GCP.set_disc_weights(dweight_gn=scale, dweight_dn=scale) outputs = GCP.train_joint(Xd_batch, Xn_batch, data_idx, noise_idx) mom_match_cost = 1.0 * outputs[0] disc_cost_gn = 1.0 * outputs[1] disc_cost_dn = 1.0 * outputs[2] if ((i+1 % 100000) == 0): gn_learn_rate = gn_learn_rate * 0.7 dn_learn_rate = dn_learn_rate * 0.7 GCP.set_gn_sgd_params(learn_rate=gn_learn_rate, momentum=0.98) GCP.set_dn_sgd_params(learn_rate=dn_learn_rate, momentum=0.98) if ((i % 1000) == 0): print("batch: {0:d}, mom_match_cost: {1:.4f}, disc_cost_gn: {2:.4f}, disc_cost_dn: {3:.4f}".format( \ i, mom_match_cost, disc_cost_gn, disc_cost_dn)) if ((i % 5000) == 0): file_name = "GCP_SAMPLES_b{0:d}.png".format(i) Xs = GCP.sample_from_gn(200) utils.visualize_samples(Xs, file_name) file_name = "GCP_WEIGHTS_b{0:d}.png".format(i) utils.visualize(GCP.DN, 0, 0, file_name) print("TESTING COMPLETE!") return
kld_data = 1.0 * outputs[2] if ((i + 1) % 5000 == 0): print(" nll_data: {0:.4f}, kld_data: {1:.4f}".format( \ nll_data, kld_data)) if ((i+1) % 100000 == 0): learn_rate = learn_rate * 0.8 if (i % 1000 == 0): print("batch: {0:d}, mom_match_cost: {1:.4f}, disc_dn: {2:.6f}, disc_gn: {3:.6f}, nll: {4:.4f}, kld: {5:.4f}".format( \ i, mom_match_cost, disc_dn, disc_gn, nll_cost, kld_cost)) if (i % 5000 == 0): # draw independent samples from generative model's prior file_name = "VCG_SAMPLES_b{0:d}.png".format(i) Xs = VCG.sample_from_gn(200) utils.visualize_samples(Xs, file_name) file_name = "VCG_WEIGHTS_b{0:d}.png".format(i) utils.visualize(VCG.DN, 0, 0, file_name) # draw "markov chain" samples initiated from training data file_name = "VCG_SAMPLES_a{0:d}.png".format(i) Xd_samps = np.repeat(Xd_batch[0:10,:], 3, axis=0) sample_lists = GIP.sample_gil_from_data(Xd_samps, loop_iters=10) Xs = np.vstack(sample_lists["data samples"]) utils.visualize_samples(Xs, file_name) print("TESTING COMPLETE!") ############## # EYE BUFFER # ##############
def test_mlp( mlp_params, sgd_params, datasets): """ Datasets should be a three-tuple in which each item is a (matrix, vector) pair of (inputs, labels) for (training, validation, testing). """ initial_learning_rate = sgd_params['start_rate'] learning_rate_decay = sgd_params['decay_rate'] n_epochs = sgd_params['epochs'] batch_size = sgd_params['batch_size'] mlp_type = sgd_params['mlp_type'] wt_norm_bound = sgd_params['wt_norm_bound'] result_tag = sgd_params['result_tag'] txt_file_name = "results_{0}.txt".format(result_tag) img_file_name = "weights_{0}.png".format(result_tag) Xtr, Ytr = datasets[0] Xva, Yva = datasets[1] Xte, Yte = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = Xtr.get_value(borrow=True).shape[0] / batch_size n_valid_batches = Xva.get_value(borrow=True).shape[0] / batch_size n_test_batches = Xte.get_value(borrow=True).shape[0] / batch_size print "train batches: {0:d}, valid batches: {1:d}, test_batches: {2:d}".format( \ n_train_batches, n_valid_batches, n_test_batches) ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch epoch = T.scalar() # epoch counter x = T.matrix('x') # data is presented as rasterized images y = T.ivector('y') # labels are presented as integer categories learning_rate = theano.shared(np.asarray(initial_learning_rate, dtype=theano.config.floatX)) rng = np.random.RandomState(12345) # construct the MLP class NET = DEV_MLP(rng=rng, input=x, params=mlp_params) # Build the expressions for the cost functions. If training without SDE or # DEV regularization, the DEV loss/cost will be used, but the weights for # DEV regularization on each layer will be set to 0 before training. sde_cost = NET.sde_class_loss(y) + NET.sde_reg_loss dev_cost = 0.5 * (NET.dev_class_loss(y) + NET.dev_dev_loss) + NET.dev_reg_loss NET_metrics = [NET.raw_errors(y), NET.raw_class_loss(y), NET.dev_dev_loss, NET.raw_reg_loss] ############################################################################ # Compile testing and validation models. These models are evaluated on # # batches of the same size as used in training. Trying to jam a large # # validation or test set through the net may take too much memory. # ############################################################################ test_model = theano.function(inputs=[index], outputs=NET_metrics, givens={ x: Xte[index * batch_size:(index + 1) * batch_size], y: T.cast(Yte[index * batch_size:(index + 1) * batch_size], 'int32')}) #theano.printing.pydotprint(test_model, outfile="test_file.png", # var_with_name_simple=True) validate_model = theano.function(inputs=[index], outputs=NET_metrics, givens={ x: Xva[index * batch_size:(index + 1) * batch_size], y: T.cast(Yva[index * batch_size:(index + 1) * batch_size], 'int32')}) #theano.printing.pydotprint(validate_model, outfile="validate_file.png", # var_with_name_simple=True) ############################################################################ # Prepare momentum and gradient variables, and construct the updates that # # Theano will perform on the network parameters. # ############################################################################ sde_grads = [] dev_grads = [] for param in NET.params: sde_grads.append(T.grad(sde_cost, param)) dev_grads.append(T.grad(dev_cost, param)) sde_moms = [] dev_moms = [] for param in NET.params: sde_moms.append(theano.shared(np.zeros( \ param.get_value(borrow=True).shape, dtype=theano.config.floatX))) dev_moms.append(theano.shared(np.zeros( \ param.get_value(borrow=True).shape, dtype=theano.config.floatX))) # Compute momentum for the current epoch mom = ifelse(epoch < 500, 0.5*(1. - epoch/500.) + 0.99*(epoch/500.), 0.99) # Use a "smoothed" learning rate, to ease into optimization gentle_rate = ifelse(epoch < 5, (epoch / 5.0) * learning_rate, learning_rate) # Update the step direction using a momentus update sde_updates = OrderedDict() dev_updates = OrderedDict() for i in range(len(NET.params)): sde_updates[sde_moms[i]] = mom * sde_moms[i] + (1. - mom) * sde_grads[i] dev_updates[dev_moms[i]] = mom * dev_moms[i] + (1. - mom) * dev_grads[i] # ... and take a step along that direction for i in range(len(NET.params)): param = NET.params[i] sde_param = param - (gentle_rate * sde_updates[sde_moms[i]]) dev_param = param - (gentle_rate * dev_updates[dev_moms[i]]) # Clip the updated param to bound its norm (where applicable) if (NET.clip_params.has_key(param) and \ (NET.clip_params[param] == 1)): sde_norms = T.sum(sde_param**2, axis=1).reshape((sde_param.shape[0],1)) sde_scale = T.clip(T.sqrt(wt_norm_bound / sde_norms), 0., 1.) sde_updates[param] = sde_param * sde_scale dev_norms = T.sum(dev_param**2, axis=1).reshape((dev_param.shape[0],1)) dev_scale = T.clip(T.sqrt(wt_norm_bound / dev_norms), 0., 1.) dev_updates[param] = dev_param * dev_scale else: sde_updates[param] = sde_param dev_updates[param] = dev_param # Compile theano functions for training. These return the training cost # update the model parameters. train_sde = theano.function(inputs=[epoch, index], outputs=NET_metrics, updates=sde_updates, givens={ x: Xtr[index * batch_size:(index + 1) * batch_size], y: T.cast(Ytr[index * batch_size:(index + 1) * batch_size], 'int32')}) train_dev = theano.function(inputs=[epoch, index], outputs=NET_metrics, updates=dev_updates, givens={ x: Xtr[index * batch_size:(index + 1) * batch_size], y: T.cast(Ytr[index * batch_size:(index + 1) * batch_size], 'int32')}) # Theano function to decay the learning rate, this is separate from the # training function because we only want to do this once each epoch instead # of after each minibatch. set_learning_rate = theano.function(inputs=[], outputs=learning_rate, updates={learning_rate: learning_rate * learning_rate_decay}) ############### # TRAIN MODEL # ############### print '... training' best_params = None best_validation_errors = np.inf best_iter = 0 test_score = 0. epoch_counter = 0 start_time = time.clock() results_file = open(txt_file_name, 'wb') results_file.write("mlp_type: {0}\n".format(mlp_type)) results_file.write("lam_l2a: {0:.4f}\n".format(mlp_params['lam_l2a'])) results_file.write("dev_types: {0}\n".format(str(mlp_params['dev_types']))) results_file.write("dev_lams: {0}\n".format(str(mlp_params['dev_lams']))) results_file.flush() e_time = time.clock() epoch_metrics = train_sde(1, 1) while epoch_counter < n_epochs: # Train this epoch epoch_counter = epoch_counter + 1 for minibatch_index in xrange(n_train_batches): if ((epoch_counter <= 0) or (mlp_type == 'sde')): batch_metrics = train_sde(epoch_counter, minibatch_index) else: batch_metrics = train_dev(epoch_counter, minibatch_index) epoch_metrics = [(em + bm) for (em, bm) in zip(epoch_metrics, batch_metrics)] # Compute classification errors on validation set validation_metrics = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_errors = np.sum([m[0] for m in validation_metrics]) # Report and save progress. epoch_metrics = [(float(v) / float(n_train_batches)) for v in epoch_metrics] tag = (" **" if (this_validation_errors < best_validation_errors) else " ") print "epoch {0}: t_err={1:.2f}, t_loss={2:.4f}, t_dev={3:.4f}, t_reg={4:.4f}, v_err={5:d}{6}".format( \ epoch_counter, epoch_metrics[0], epoch_metrics[1], epoch_metrics[2], epoch_metrics[3], \ int(this_validation_errors), tag) print "--time: {0:.4f}".format((time.clock() - e_time)) epoch_metrics = [0.0 for v in epoch_metrics] e_time = time.clock() best_validation_errors = min(best_validation_errors, this_validation_errors) results_file.write("{0}\n".format(this_validation_errors)) results_file.flush() new_learning_rate = set_learning_rate() # Save first layer weights to an image locally utils.visualize(NET, 0, img_file_name) end_time = time.clock() # Compute loss on test set test_metrics = [test_model(i) for i in xrange(n_test_batches)] test_score = np.sum([m[0] for m in test_metrics]) print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_errors * 100., best_iter, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def run(dry_run=False): data = MnistDataset(batch_size=500, testset_size=1000, normalize=False, as_one_hot=True) stiff_start, stiff_end, stiff_decay = (0.005, 0.005, 0.99) stiff_update = lambda s: s * stiff_decay + (1 - stiff_decay) * stiff_end input_dim = data.size('trn')[0][0] label_dim = data.size('trn')[1][0] class Model(ECA): def structure(self): if dry_run: self.U = Input('U', input_dim) self.Y = Input('Y', label_dim) RegressionLayer('Z', 3, (self.U, self.Y), rect, min_tau=0.0, stiffx=1.0, merge_op=lambda u, y: rect(u + y)), else: self.U = Input('U', input_dim) self.Y = Input('Y', label_dim) RegressionLayer('Z', 100, (self.U, self.Y), rect, min_tau=0.0, stiffx=1.0, merge_op=lambda u, y: u) mdl = Model() trn_iters = 10000 if not dry_run else 1 #mdl.Y.nonlin_est = lambda x: T.nnet.softmax(x.T).T print 'Training', trn_iters, 'iterations' d = data.get('trn') stiff = stiff_start weights = [] try: trn_sig = mdl.new_signals(data.samples('trn')) trne_sig = mdl.new_signals(data.samples('trn')) val_sig = mdl.new_signals(data.samples('val')) tst_sig = mdl.new_signals(data.samples('tst')) for i in range(1, trn_iters + 1): t = time.time() # Update model trn_sig.propagate(d.samples, d.labels) trn_sig.adapt_layers(stiff) stiff = stiff_update(stiff) if i % 200 == 0: calculate_accuracy(trne_sig, val_sig, tst_sig, data) # Progress prints if (i % 20 == 0): weights += [mdl.first_phi()[:-10, :]] i_str = "I %4d:" % (i) t_str = 't: %.2f s' % (time.time() - t) t = time.time() stiff_str = "stiff: %5.3f" % stiff tostr = lambda t: "{" + ", ".join(["%s: %6.2f" % (n, v) for (n, v) in t]) + "}" var_str = " logvar:" + tostr(trn_sig.variance()) a_str = " avg: " + tostr(trn_sig.avg_levels()) phi_norms = mdl.phi_norms() phi_larg_str = " |phinL|: " + tostr(map(lambda a: (a[0], np.sum(a[1] > 1.1)), phi_norms)) phi_ones_str = " |phin1|: " + tostr(map(lambda a: (a[0], np.sum(np.isclose(a[1], 1.0, atol=0.1))), phi_norms)) phi_zero_str = " |phin0|: " + tostr(map(lambda a: (a[0], np.sum(np.isclose(a[1], 0.0, atol=0.5))), phi_norms)) phi_str = " |phi|: " + tostr(map(lambda a: (a[0], np.average(a[1])), phi_norms)) E_str = " E: " + tostr(trn_sig.energy()) y_acc = " yacc: %.2f" % (d.accuracy(trn_sig.y_est()) * 100.) Y_acc = " Yacc: %.2f" % (d.accuracy(trn_sig.Y.var.get_value()) * 100.) Y_cost = " Yerr: %.3f" % np.mean(trn_sig.Y.energy()) U_cost = " Uerr: %.3f" % np.mean(trn_sig.U.energy()) print i_str, stiff_str, t_str, y_acc, Y_acc, Y_cost, U_cost, phi_ones_str #print var_str, a_str, phi_str except KeyboardInterrupt: pass if dry_run: return try: print 'Calculating final accuracy' calculate_accuracy(trne_sig, val_sig, tst_sig, data) if False: visualize(weights[0]) except KeyboardInterrupt: pass
def train_mlp( NET, mlp_params, sgd_params, datasets): """ Train NET using purely supervised data. This trains a standard supervised MLP, using any of: no droppish stuff, standard dropout, or dropout ensemble variance regularization. Datasets should be a three-tuple, where each item is an (X, Y) pair of observation matrix X and class vector Y. The (X, Y) pairs will be used for training, validation, and testing respectively. """ initial_learning_rate = sgd_params['start_rate'] learning_rate_decay = sgd_params['decay_rate'] n_epochs = sgd_params['epochs'] batch_size = sgd_params['batch_size'] mlp_type = sgd_params['mlp_type'] wt_norm_bound = sgd_params['wt_norm_bound'] result_tag = sgd_params['result_tag'] txt_file_name = "results_mlp_{0}.txt".format(result_tag) img_file_name = "weights_mlp_{0}.png".format(result_tag) ########################################################################### # We will use minibatches for training, as well as for computing stats # # over the validation and testing sets. For Theano reasons, it will be # # easiest if we set up arrays storing the start/end index of each batch # # w.r.t. the relevant observation/class matrices/vectors. # ########################################################################### # Get the training observations and classes Xtr, Ytr = (datasets[0][0], T.cast(datasets[0][1], 'int32')) tr_samples = Xtr.get_value(borrow=True).shape[0] tr_batches = int(np.ceil(tr_samples / float(batch_size))) tr_bidx = [[i*batch_size, min(tr_samples, (i+1)*batch_size)] \ for i in range(tr_batches)] tr_bidx = theano.shared(value=np.asarray(tr_bidx, dtype=theano.config.floatX)) tr_bidx = T.cast(tr_bidx, 'int32') # Get the validation and testing observations and classes Xva, Yva = (datasets[1][0], T.cast(datasets[1][1], 'int32')) Xte, Yte = (datasets[2][0], T.cast(datasets[2][1], 'int32')) va_samples = Xva.get_value(borrow=True).shape[0] te_samples = Xte.get_value(borrow=True).shape[0] va_batches = int(np.ceil(va_samples / 100.)) te_batches = int(np.ceil(te_samples / 100.)) va_bidx = [[i*100, min(va_samples, (i+1)*100)] for i in range(va_batches)] te_bidx = [[i*100, min(te_samples, (i+1)*100)] for i in range(te_batches)] va_bidx = theano.shared(value=np.asarray(va_bidx, dtype=theano.config.floatX)) te_bidx = theano.shared(value=np.asarray(te_bidx, dtype=theano.config.floatX)) va_bidx = T.cast(va_bidx, 'int32') te_bidx = T.cast(te_bidx, 'int32') # Print some useful information about the dataset print "dataset info:" print " training samples: {0:d}".format(tr_samples) print " samples/minibatch: {0:d}, minibatches/epoch: {1:d}".format( \ batch_size, tr_batches) print " validation samples: {0:d}, testing samples: {1:d}".format( \ va_samples, te_samples) ###################### # build actual model # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch epoch = T.scalar() # epoch counter su_idx = T.lscalar() # symbolic batch index into supervised samples un_idx = T.lscalar() # symbolic batch index into unsupervised samples x = NET.input # some observations have labels y = T.ivector('y') # the labels are presented as integer categories learning_rate = theano.shared(np.asarray(initial_learning_rate, \ dtype=theano.config.floatX)) # Build the expressions for the cost functions. if training without sde or # dev regularization, the dev loss/cost will be used, but the weights for # dev regularization on each layer will be set to 0 before training. sde_cost = NET.sde_cost(y) dev_cost = NET.dev_cost(y) NET_metrics = [NET.class_errors(y), NET.raw_class_loss(y), \ NET.dev_reg_loss(y), NET.raw_reg_loss] ############################################################################ # Compile testing and validation models. these models are evaluated on # # batches of the same size as used in training. trying to jam a large # # validation or test set through the net may take too much memory. # ############################################################################ test_model = theano.function(inputs=[index], \ outputs=NET_metrics, \ givens={ \ x: Xte[te_bidx[index,0]:te_bidx[index,1],:], \ y: Yte[te_bidx[index,0]:te_bidx[index,1]]}) validate_model = theano.function(inputs=[index], \ outputs=NET_metrics, \ givens={ \ x: Xva[va_bidx[index,0]:va_bidx[index,1],:], \ y: Yva[va_bidx[index,0]:va_bidx[index,1]]}) ############################################################################ # prepare momentum and gradient variables, and construct the updates that # # theano will perform on the network parameters. # ############################################################################ opt_params = NET.mlp_params if sgd_params.has_key('top_only'): if sgd_params['top_only']: opt_params = NET.class_params sde_grads = [] dev_grads = [] for param in opt_params: sde_grads.append(T.grad(sde_cost, param)) dev_grads.append(T.grad(dev_cost, param)) sde_moms = [] dev_moms = [] for param in opt_params: sde_moms.append(theano.shared(np.zeros( \ param.get_value(borrow=True).shape, dtype=theano.config.floatX))) dev_moms.append(theano.shared(np.zeros( \ param.get_value(borrow=True).shape, dtype=theano.config.floatX))) # compute momentum for the current epoch mom = ifelse(epoch < 500, 0.5*(1. - epoch/500.) + 0.99*(epoch/500.), 0.99) # use a "smoothed" learning rate, to ease into optimization gentle_rate = ifelse(epoch < 5, (epoch / 5.) * learning_rate, learning_rate) # update the step direction using a momentus update sde_updates = OrderedDict() dev_updates = OrderedDict() for i in range(len(opt_params)): sde_updates[sde_moms[i]] = mom * sde_moms[i] + (1. - mom) * sde_grads[i] dev_updates[dev_moms[i]] = mom * dev_moms[i] + (1. - mom) * dev_grads[i] # ... and take a step along that direction for i in range(len(opt_params)): param = opt_params[i] sde_param = param - (gentle_rate * sde_updates[sde_moms[i]]) dev_param = param - (gentle_rate * dev_updates[dev_moms[i]]) # clip the updated param to bound its norm (where applicable) if (NET.clip_params.has_key(param) and \ (NET.clip_params[param] == 1)): sde_norms = T.sum(sde_param**2, axis=1, keepdims=1) sde_scale = T.clip(T.sqrt(wt_norm_bound / sde_norms), 0., 1.) sde_updates[param] = sde_param * sde_scale dev_norms = T.sum(dev_param**2, axis=1, keepdims=1) dev_scale = T.clip(T.sqrt(wt_norm_bound / dev_norms), 0., 1.) dev_updates[param] = dev_param * dev_scale else: sde_updates[param] = sde_param dev_updates[param] = dev_param # compile theano functions for training. these return the training cost # and update the model parameters. train_sde = theano.function(inputs=[epoch, index], outputs=NET_metrics, \ updates=sde_updates, \ givens={ \ x: Xtr[tr_bidx[index,0]:tr_bidx[index,1],:], \ y: Ytr[tr_bidx[index,0]:tr_bidx[index,1]]}) train_dev = theano.function(inputs=[epoch, index], outputs=NET_metrics, \ updates=dev_updates, \ givens={ \ x: Xtr[tr_bidx[index,0]:tr_bidx[index,1],:], \ y: Ytr[tr_bidx[index,0]:tr_bidx[index,1]]}) # theano function to decay the learning rate, this is separate from the # training function because we only want to do this once each epoch instead # of after each minibatch. set_learning_rate = theano.function(inputs=[], outputs=learning_rate, \ updates={learning_rate: learning_rate * learning_rate_decay}) ############### # train model # ############### print '... training' validation_error = 100. test_error = 100. min_validation_error = 100. min_test_error = 100. epoch_counter = 0 start_time = time.clock() results_file = open(txt_file_name, 'wb') results_file.write("mlp_type: {0}\n".format(mlp_type)) results_file.write("lam_l2a: {0:.4f}\n".format(mlp_params['lam_l2a'])) results_file.write("dev_types: {0}\n".format(str(mlp_params['dev_types']))) results_file.write("dev_lams: {0}\n".format(str(mlp_params['dev_lams']))) results_file.flush() e_time = time.clock() # get array of epoch metrics (on a single minibatch) epoch_metrics = train_sde(1, 0) validation_metrics = [0. for v in epoch_metrics] test_metrics = [0. for v in epoch_metrics] # compute metrics on testing set while epoch_counter < n_epochs: ###################################################### # process some number of minibatches for this epoch. # ###################################################### NET.set_bias_noise(0.1) epoch_counter = epoch_counter + 1 epoch_metrics = [0. for v in epoch_metrics] for b_idx in xrange(tr_batches): # compute update for some this minibatch if ((epoch_counter <= 0) or (mlp_type == 'sde')): batch_metrics = train_sde(epoch_counter, b_idx) else: batch_metrics = train_dev(epoch_counter, b_idx) epoch_metrics = [(em + bm) for (em, bm) in zip(epoch_metrics, batch_metrics)] # Compute 'averaged' values over the minibatches epoch_metrics[0] = 100 * (float(epoch_metrics[0]) / tr_samples) epoch_metrics[1:] = [(float(v) / tr_batches) for v in epoch_metrics[1:]] train_error = epoch_metrics[0] train_loss = epoch_metrics[1] # update the learning rate new_learning_rate = set_learning_rate() ###################################################### # validation, testing, and general diagnostic stuff. # ###################################################### NET.set_bias_noise(0.0) # compute metrics on validation set validation_metrics = [0. for v in epoch_metrics] for b_idx in xrange(va_batches): batch_metrics = validate_model(b_idx) validation_metrics = [(em + bm) for (em, bm) in zip(validation_metrics, batch_metrics)] # Compute 'averaged' values over the minibatches validation_error = 100 * (float(validation_metrics[0]) / va_samples) validation_metrics[1:] = [(float(v) / va_batches) for v in validation_metrics[1:]] validation_loss = validation_metrics[1] # compute test error if new best validation error was found tag = " " # compute metrics on testing set test_metrics = [0. for v in epoch_metrics] for b_idx in xrange(te_batches): batch_metrics = test_model(b_idx) test_metrics = [(em + bm) for (em, bm) in zip(test_metrics, batch_metrics)] # Compute 'averaged' values over the minibatches test_error = 100 * (float(test_metrics[0]) / te_samples) test_metrics[1:] = [(float(v) / te_batches) for v in test_metrics[1:]] test_loss = test_metrics[1] if (validation_error < min_validation_error): min_validation_error = validation_error min_test_error = test_error tag = ", test={0:.2f}".format(test_error) results_file.write("{0:.2f} {1:.2f} {2:.2f} {3:.4f} {4:.4f} {5:.4f}\n".format( \ train_error, validation_error, test_error, train_loss, validation_loss, test_loss)) results_file.flush() # report and save progress. print "epoch {0:d}: t_err={1:.2f}, t_loss={2:.4f}, t_dev={3:.4f}, t_reg={4:.4f}, valid={5:.2f}{6}".format( \ epoch_counter, epoch_metrics[0], epoch_metrics[1], epoch_metrics[2], epoch_metrics[3], \ validation_error, tag) print "--time: {0:.4f}".format((time.clock() - e_time)) e_time = time.clock() # save first layer weights to an image locally utils.visualize(NET, 0, img_file_name) print("optimization complete. best validation error {0:.4f}, with test error {1:.4f}".format( \ (min_validation_error), (min_test_error)))
data_idx = all_idx[:100] noise_idx = all_idx[100:] scale = min(1.0, float(i+1)/10000.0) GCP.set_disc_weights(dweight_gn=scale, dweight_dn=scale) outputs = GCP.train_joint(Xd_batch, Xn_batch, data_idx, noise_idx) mom_match_cost = 1.0 * outputs[0] disc_cost_gn = 1.0 * outputs[1] disc_cost_dn = 1.0 * outputs[2] if ((i+1 % 100000) == 0): gn_learn_rate = gn_learn_rate * 0.7 dn_learn_rate = dn_learn_rate * 0.7 GCP.set_sgd_params(lr_gn=gn_learn_rate, lr_dn=dn_learn_rate, \ mom_1=0.8, mom_2=0.98) if ((i % 1000) == 0): print("batch: {0:d}, mom_match_cost: {1:.4f}, disc_cost_gn: {2:.4f}, disc_cost_dn: {3:.4f}".format( \ i, mom_match_cost, disc_cost_gn, disc_cost_dn)) if ((i % 5000) == 0): file_name = "GCP_SAMPLES_b{0:d}.png".format(i) Xs = GCP.sample_from_gn(200) utils.visualize_samples(Xs, file_name) file_name = "GCP_WEIGHTS_b{0:d}.png".format(i) utils.visualize(GCP.DN, 0, 0, file_name) print("TESTING COMPLETE!") ############## # EYE BUFFER # ##############
def run(dry_run=False): data = MnistDataset(batch_size=100, testset_size=1000, normalize=False) stiff_start, stiff_end, stiff_decay = (0.05, 0.05, 0.90) stiff_update = lambda s: s * stiff_decay + (1 - stiff_decay) * stiff_end input_dim = data.size('trn')[0][0] class Model(ECA): def structure(self): if dry_run: self.U = Input('U', input_dim) self.X = Layer('X', 3, self.U, rect, 1.0) else: self.U = Input('U', input_dim) self.X1 = Layer('X1', 20, self.U, rect, min_tau=0., stiffx=1.0) self.X2 = Layer('X2', 5, self.X1, rect, min_tau=0., stiffx=1.0) self.X2.disable() mdl = Model() trn_iters = 1000 if not dry_run else 1 print 'Training', trn_iters, 'iterations' d = data.get('trn') stiff = stiff_start weights = [] try: trn_sig = mdl.new_signals(data.samples('trn')) for i in range(1, trn_iters + 1): t = time.time() # Update model trn_sig.propagate(d.samples, None) trn_sig.adapt_layers(stiff) # Enable second layer once the first layer has settled if i == 250: mdl.X2.enable() stiff = stiff_update(stiff) if CONTINUOUS_VISUALIZATION and i % (1 if SAVE_MOVIE else 5) == 0: import matplotlib.pyplot as plt if SAVE_MOVIE and i == 1: import matplotlib.animation as animation fig, ax = plt.subplots(2, 2) fig.set_size_inches(10.5, 10.5) writer = animation.writers['ffmpeg'](fps=10, bitrate=1000) writer.setup(fig, 'eca-multilayer-rec.mp4', dpi=100) elif not SAVE_MOVIE: fig, ax = plt.subplots(2, 2) fig.set_size_inches(10.5, 10.5) u, U, X, phi, Q, = (d.samples, trn_sig.U.var.get_value(), trn_sig.U.next.var.get_value(), mdl.first_phi(), mdl.U.next.Q.get_value()) X2 = trn_sig.U.next.next.var.get_value() import matplotlib.cm as cm n = 50 svd = lambda x: np.linalg.svd(x, compute_uv=False) order = np.argsort(svd(X)) pu, = ax[0, 0].plot(svd(u)[order][:-n:-1], 'o-') pU, = ax[0, 0].plot(svd(U)[order][:-n:-1], 'v-') pX, = ax[0, 0].plot(svd(X)[order][:-n:-1], 'x-') pX2, = ax[0, 0].plot(np.sort(svd(X2))[:-n:-1], 'x-') ax[0, 0].legend([pu, pU, pX, pX2], ['svd(u)', 'svd(U)', 'svd(X)', 'svd(X2)']) XE = np.mean(np.square(X), axis=1) #XE = np.var(X, axis=1) nphi = np.linalg.norm(phi.T, axis=1) order = np.arange(len(XE)) pXE, = ax[0, 1].plot(XE[order][:-n:-1], 's-') pphi, = ax[0, 1].plot(nphi[order][:-n:-1], '*-') pq, = ax[0, 1].plot(Q[order, order][:-n:-1], 'x-') ax[0, 1].legend([pXE, pphi, pq], ['E{X^2}', '|phi|', 'q_i']) ax[0, 1].set_ylim([0.2, 5]) XE2 = np.mean(np.square(X2), axis=1) nphi2 = np.linalg.norm(mdl.X2.phi.get_value().T, axis=1) order = np.arange(len(XE2)) Q2 = mdl.X2.Q.get_value() pXE2, = ax[1, 0].plot(XE2[order][:-n:-1], 's-') pphi2, = ax[1, 0].plot(nphi2[order][:-n:-1], '*-') pq2, = ax[1, 0].plot(Q2[order, order][:-n:-1], 'x-') ax[1, 0].legend([pXE2, pphi2, pq2], ['E{X2^2}', '|phi2|', 'q2_i']) ax[1, 0].set_ylim([0.2, 3]) ax[1, 1].imshow(rearrange_for_plot(phi), cmap=cm.Greys_r) if SAVE_MOVIE: if i < 500: writer.grab_frame() ax[0, 0].clear() ax[0, 1].clear() ax[1, 0].clear() elif i == 500: writer.finish() else: fig = plt.gcf() plt.show() # Progress prints if (i % 20 == 0): weights += [mdl.first_phi()[:, :]] if not CONTINUOUS_VISUALIZATION: visualize(weights[-1]) i_str = "I %4d:" % (i) t_str = 't: %.2f s' % (time.time() - t) t = time.time() stiff_str = "stiff: %5.3f" % stiff tostr = lambda t: "{" + ", ".join(["%s: %6.2f" % (n, v) for (n, v) in t]) + "}" var_str = " logvar:" + tostr(trn_sig.variance()) a_str = " avg: " + tostr(trn_sig.avg_levels()) phi_norms = mdl.phi_norms() phi_larg_str = " |phinL|: " + tostr(map(lambda a: (a[0], np.sum(a[1] > 1.1)), phi_norms)) phi_ones_str = " |phin1|: " + tostr(map(lambda a: (a[0], np.sum(np.isclose(a[1], 1.0, atol=0.1))), phi_norms)) phi_zero_str = " |phin0|: " + tostr(map(lambda a: (a[0], np.sum(np.isclose(a[1], 0.0, atol=0.5))), phi_norms)) phi_str = " |phi|: " + tostr(map(lambda a: (a[0], np.average(a[1])), phi_norms)) E_str = " E: " + tostr(trn_sig.energy()) u_err = ' uerr: %.5f' % trn_sig.u_err(d.samples) U_sqr = ' Usqr: %.5f' % np.average(np.square(trn_sig.U.var.get_value())) print i_str, stiff_str, t_str, E_str, phi_ones_str, u_err, U_sqr #print var_str, a_str, phi_str except KeyboardInterrupt: pass try: if not dry_run and False: visualize(weights[0]) except KeyboardInterrupt: pass
def run(dry_run=False): data = MnistDataset(batch_size=1000, testset_size=1000, normalize=False) stiff_start, stiff_end, stiff_decay = (0.5, 0.005, 0.99) stiff_update = lambda s: s * stiff_decay + (1 - stiff_decay) * stiff_end input_dim = data.size('trn')[0][0] class Model(ECA): def structure(self): if dry_run: self.U = Input('U', input_dim) self.X = Layer('X', 3, self.U, rect, 1.0) else: self.U = Input('U', input_dim) self.X = Layer('X1', 100, self.U, rect, min_tau=0., stiffx=1.0) mdl = Model() trn_iters = 1000 if not dry_run else 1 print 'Training', trn_iters, 'iterations' d = data.get('trn') n = mdl.U.next[0].n weights = [] try: trn_sig = mdl.new_signals(data.samples('trn')) trne_sig = mdl.new_signals(data.samples('trn')) val_sig = mdl.new_signals(data.samples('val')) tst_sig = mdl.new_signals(data.samples('tst')) # "Module model" so that it updates only certain weights at a time en = np.ones((n, d.samples.shape[1]), dtype=np.float32) for j in range(10): en[n//10 * j: n//10 * (j+1), d.labels != j] *= -0.1 trn_sig.U.next.set_modulation(en) stiff = stiff_start for i in range(1, trn_iters + 1): t = time.time() # Update model trn_sig.propagate(d.samples, None) trn_sig.adapt_layers(stiff) stiff = stiff_update(stiff) if i % 200 == 0: calculate_accuracy(trne_sig, val_sig, tst_sig, data) # Progress prints if (i % 20 == 0): weights += [mdl.first_phi()[:-10, :]] i_str = "I %4d:" % (i) t_str = 't: %.2f s' % (time.time() - t) t = time.time() stiff_str = "stiff: %5.3f" % stiff tostr = lambda t: "{" + ", ".join(["%s: %6.2f" % (n, v) for (n, v) in t]) + "}" var_str = " logvar:" + tostr(trn_sig.variance()) a_str = " avg: " + tostr(trn_sig.avg_levels()) phi_norms = mdl.phi_norms() phi_larg_str = " |phinL|: " + tostr(map(lambda a: (a[0], np.sum(a[1] > 1.1)), phi_norms)) phi_ones_str = " |phin1|: " + tostr(map(lambda a: (a[0], np.sum(np.isclose(a[1], 1.0, atol=0.1))), phi_norms)) phi_zero_str = " |phin0|: " + tostr(map(lambda a: (a[0], np.sum(np.isclose(a[1], 0.0, atol=0.5))), phi_norms)) phi_str = " |phi|: " + tostr(map(lambda a: (a[0], np.average(a[1])), phi_norms)) E_str = " E: " + tostr(trn_sig.energy()) print i_str, stiff_str, t_str, E_str, phi_ones_str #print var_str, a_str, phi_str except KeyboardInterrupt: pass if dry_run: return try: print 'Calculating final accuracy' calculate_accuracy(trne_sig, val_sig, tst_sig, data) if False: visualize(weights[0]) except KeyboardInterrupt: pass
def train_dae( NET, dae_layer, sgd_params, datasets): """ Train some layer of NET as an autoencoder of its input source. Datasets should be a three-tuple, in which each item is a matrix of observations. the first, second, and third matrices will be used for training, validation, and testing, respectively. """ initial_learning_rate = sgd_params['start_rate'] learning_rate_decay = sgd_params['decay_rate'] n_epochs = sgd_params['epochs'] batch_size = sgd_params['batch_size'] wt_norm_bound = sgd_params['wt_norm_bound'] result_tag = sgd_params['result_tag'] txt_file_name = "results_dae_{0}.txt".format(result_tag) img_file_name = "weights_dae_{0}.png".format(result_tag) # Get the training data and create arrays of start/end indices for # easy minibatch slicing Xtr = datasets[0][0] tr_samples = Xtr.get_value(borrow=True).shape[0] tr_batches = int(np.ceil(float(tr_samples) / batch_size)) tr_bidx = [[i*batch_size, min(tr_samples, (i+1)*batch_size)] for i in range(tr_batches)] tr_bidx = T.cast(tr_bidx, 'int32') # Get the validation and testing sets and create arrays of start/end # indices for easy minibatch slicing Xva = datasets[1][0] Xte = datasets[2][0] va_samples = Xva.get_value(borrow=True).shape[0] te_samples = Xte.get_value(borrow=True).shape[0] va_batches = int(np.ceil(va_samples / 100.)) te_batches = int(np.ceil(te_samples / 100.)) va_bidx = [[i*100, min(va_samples, (i+1)*100)] for i in range(va_batches)] te_bidx = [[i*100, min(te_samples, (i+1)*100)] for i in range(te_batches)] va_bidx = theano.shared(value=np.asarray(va_bidx, dtype=theano.config.floatX)) te_bidx = theano.shared(value=np.asarray(te_bidx, dtype=theano.config.floatX)) va_bidx = T.cast(va_bidx, 'int32') te_bidx = T.cast(te_bidx, 'int32') print "Dataset info:" print " training samples: {0:d}".format(tr_samples) print " samples/minibatch: {0:d}, minibatches/epoch: {1:d}".format( \ batch_size, tr_batches) print " validation samples: {0:d}, testing samples: {1:d}".format( \ va_samples, te_samples) ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch epoch = T.scalar() # epoch counter x = NET.input # symbolic matrix for inputs to NET learning_rate = theano.shared(np.asarray(initial_learning_rate, dtype=theano.config.floatX)) # Build the expressions for the cost functions. NET_cost = NET.dae_costs[dae_layer][0] + NET.dae_costs[dae_layer][1] NET_metrics = [NET_cost, NET.dae_costs[dae_layer][0], NET.dae_costs[dae_layer][1]] opt_params = NET.dae_params[dae_layer] ############################################################################ # Compile testing and validation models. These models are evaluated on # # batches of the same size as used in training. Trying to jam a large # # validation or test set through the net may be stupid. # ############################################################################ test_model = theano.function(inputs=[index], outputs=NET_metrics, givens={ x: Xte[te_bidx[index,0]:te_bidx[index,1],:] }) validate_model = theano.function(inputs=[index], outputs=NET_metrics, givens={ x: Xva[va_bidx[index,0]:va_bidx[index,1],:] }) ############################################################################ # Prepare momentum and gradient variables, and construct the updates that # # Theano will perform on the network parameters. # ############################################################################ NET_grads = [] for param in opt_params: NET_grads.append(T.grad(NET_cost, param)) NET_moms = [] for param in opt_params: NET_moms.append(theano.shared(np.zeros( \ param.get_value(borrow=True).shape, dtype=theano.config.floatX))) # Compute momentum for the current epoch mom = ifelse(epoch < 500, 0.5*(1. - epoch/500.) + 0.99*(epoch/500.), 0.99) # Use a "smoothed" learning rate, to ease into optimization gentle_rate = ifelse(epoch < 20, ((epoch / 20.) * learning_rate), learning_rate) # Update the step direction using a momentus update NET_updates = OrderedDict() for i in range(len(opt_params)): NET_updates[NET_moms[i]] = mom * NET_moms[i] + (1. - mom) * NET_grads[i] # ... and take a step along that direction for i in range(len(opt_params)): param = opt_params[i] NET_param = param - (gentle_rate * NET_updates[NET_moms[i]]) # Clip the updated param to bound its norm (where applicable) if (NET.clip_params.has_key(param) and \ (NET.clip_params[param] == 1)): NET_norms = T.sum(NET_param**2, axis=1, keepdims=1) NET_scale = T.clip(T.sqrt(wt_norm_bound / NET_norms), 0., 1.) NET_updates[param] = NET_param * NET_scale else: NET_updates[param] = NET_param # Compile theano functions for training. These return the training cost # and update the model parameters. train_NET = theano.function(inputs=[epoch, index], \ outputs=NET_metrics, \ updates=NET_updates, \ givens={ x: Xtr[tr_bidx[index,0]:tr_bidx[index,1],:] }) # Theano function to decay the learning rate, this is separate from the # training function because we only want to do this once each epoch instead # of after each minibatch. set_learning_rate = theano.function(inputs=[], outputs=learning_rate, updates={learning_rate: learning_rate * learning_rate_decay}) ############### # TRAIN MODEL # ############### print '... training' validation_loss = 1e6 test_loss = 1e6 min_validation_loss = 1e6 min_test_loss = 1e6 epoch_counter = 0 start_time = time.clock() results_file = open(txt_file_name, 'wb') results_file.write("ensemble description: ") results_file.write(" **TODO: Write code for this.**\n") results_file.flush() train_metrics = train_NET(1, 0) while epoch_counter < n_epochs: ###################################################### # Process some number of minibatches for this epoch. # ###################################################### e_time = time.clock() lam_l1 = 0.2 * min(float(epoch_counter)/10.0, 1.0) NET.dae_lam_l1.set_value(np.asarray([lam_l1]).astype(theano.config.floatX)) epoch_counter = epoch_counter + 1 train_metrics = [0. for val in train_metrics] for minibatch_index in xrange(tr_batches): # Compute update for some joint supervised/unsupervised minibatch batch_metrics = train_NET(epoch_counter, minibatch_index) train_metrics = [(em + bm) for (em, bm) in zip(train_metrics, batch_metrics)] train_metrics = [(val / tr_batches) for val in train_metrics] # Update the learning rate new_learning_rate = set_learning_rate() ###################################################### # Validation, testing, and general diagnostic stuff. # ###################################################### # Compute metrics on validation set validation_metrics = [validate_model(i) for i in xrange(va_batches)] validation_loss = np.mean([vm[0] for vm in validation_metrics]) # Compute test error if new best validation error was found tag = " " if ((validation_loss < min_validation_loss) or ((epoch_counter % 10) == 0)): # Compute metrics on testing set test_metrics = [test_model(i) for i in xrange(te_batches)] test_loss = np.mean([tm[0] for tm in test_metrics]) if (validation_loss < min_validation_loss): min_validation_loss = validation_loss min_test_loss = test_loss tag = ", te_loss={0:.4f}".format(test_loss) results_file.write("{0:.4f} {1:.4f}\n".format(validation_loss, test_loss)) results_file.flush() # Report and save progress. print "epoch {0:d}: tr_loss={1:.4f}, tr_recon={2:.4f}, tr_sparse={3:.4f}, va_loss={4:.4f}{5}".format( \ epoch_counter, train_metrics[0], train_metrics[1], train_metrics[2], validation_loss, tag) print "--time: {0:.4f}".format((time.clock() - e_time)) # Save first layer weights to an image locally utils.visualize(NET, 0, 0, img_file_name)
def train_ss_mlp( NET, sgd_params, datasets): """ Train NET using a mix of labeled an unlabeled data. Datasets should be a four-tuple, in which the first item is a matrix/vector pair of inputs/labels for training, the second item is a matrix of unlabeled inputs for training, the third item is a matrix/vector pair of inputs/labels for validation, and the fourth is a matrix/vector pair of inputs/labels for testing. """ initial_learning_rate = sgd_params['start_rate'] learning_rate_decay = sgd_params['decay_rate'] n_epochs = sgd_params['epochs'] batch_size = sgd_params['batch_size'] wt_norm_bound = sgd_params['wt_norm_bound'] result_tag = sgd_params['result_tag'] txt_file_name = "results_mlp_{0}.txt".format(result_tag) img_file_name = "weights_mlp_{0}.png".format(result_tag) # Get supervised and unsupervised portions of training data, and create # arrays of start/end indices for easy minibatch slicing. (Xtr_su, Ytr_su) = (datasets[0][0], T.cast(datasets[0][1], 'int32')) (Xtr_un, Ytr_un) = (datasets[1][0], T.cast(datasets[1][1], 'int32')) su_samples = Xtr_su.get_value(borrow=True).shape[0] un_samples = Xtr_un.get_value(borrow=True).shape[0] tr_batches = 250 su_bsize = batch_size / 2 un_bsize = batch_size - su_bsize su_batches = int(np.ceil(float(su_samples) / su_bsize)) un_batches = int(np.ceil(float(un_samples) / un_bsize)) su_bidx = [[i*su_bsize, min(su_samples, (i+1)*su_bsize)] for i in range(su_batches)] un_bidx = [[i*un_bsize, min(un_samples, (i+1)*un_bsize)] for i in range(un_batches)] su_bidx = theano.shared(value=np.asarray(su_bidx, dtype=theano.config.floatX)) un_bidx = theano.shared(value=np.asarray(un_bidx, dtype=theano.config.floatX)) su_bidx = T.cast(su_bidx, 'int32') un_bidx = T.cast(un_bidx, 'int32') # get the validation and testing sets and create arrays of start/end # indices for easy minibatch slicing Xva, Yva = (datasets[2][0], T.cast(datasets[2][1], 'int32')) Xte, Yte = (datasets[3][0], T.cast(datasets[3][1], 'int32')) va_samples = Xva.get_value(borrow=True).shape[0] te_samples = Xte.get_value(borrow=True).shape[0] va_batches = int(np.ceil(va_samples / 100.)) te_batches = int(np.ceil(te_samples / 100.)) va_bidx = [[i*100, min(va_samples, (i+1)*100)] for i in range(va_batches)] te_bidx = [[i*100, min(te_samples, (i+1)*100)] for i in range(te_batches)] va_bidx = theano.shared(value=np.asarray(va_bidx, dtype=theano.config.floatX)) te_bidx = theano.shared(value=np.asarray(te_bidx, dtype=theano.config.floatX)) va_bidx = T.cast(va_bidx, 'int32') te_bidx = T.cast(te_bidx, 'int32') # Print some useful information about the dataset print "dataset info:" print " supervised samples: {0:d}, unsupervised samples: {1:d}".format( \ su_samples, un_samples) print " samples/minibatch: {0:d}, minibatches/epoch: {1:d}".format( \ (su_bsize + un_bsize), tr_batches) print " validation samples: {0:d}, testing samples: {1:d}".format( \ va_samples, te_samples) ###################### # build actual model # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch epoch = T.scalar() # epoch counter su_idx = T.lscalar() # symbolic batch index into supervised samples un_idx = T.lscalar() # symbolic batch index into unsupervised samples x = NET.input # some observations have labels y = T.ivector('y') # the labels are presented as integer categories learning_rate = theano.shared(np.asarray(initial_learning_rate, \ dtype=theano.config.floatX)) # build the expressions for the cost functions. if training without sde or # dev regularization, the dev loss/cost will be used, but the weights for # dev regularization on each layer will be set to 0 before training. #train_cost = NET.spawn_class_cost(y) + NET.spawn_reg_cost(y) class_cost = NET.spawn_class_cost(y) if ('ear_type' in sgd_params): ear_reg_cost = NET.spawn_reg_cost_alt(y, sgd_params['ear_type']) else: ear_reg_cost = NET.spawn_reg_cost(y) act_reg_cost = NET.act_reg_cost if ('ent_lam' in sgd_params): train_cost = class_cost + ear_reg_cost + act_reg_cost + \ NET.spawn_ent_cost(sgd_params['ent_lam'], y) else: train_cost = class_cost + ear_reg_cost + act_reg_cost tr_outputs = [train_cost, class_cost, ear_reg_cost, act_reg_cost] vt_outputs = [NET.proto_class_errors(y), NET.proto_class_loss(y)] ############################################################################ # compile testing and validation models. these models are evaluated on # # batches of the same size as used in training. trying to jam a large # # validation or test set through the net may take too much memory. # ############################################################################ test_model = theano.function(inputs=[index], outputs=vt_outputs, \ givens={ \ x: Xte[te_bidx[index,0]:te_bidx[index,1],:], \ y: Yte[te_bidx[index,0]:te_bidx[index,1]]}) validate_model = theano.function(inputs=[index], outputs=vt_outputs, \ givens={ \ x: Xva[va_bidx[index,0]:va_bidx[index,1],:], \ y: Yva[va_bidx[index,0]:va_bidx[index,1]]}) ############################################################################ # prepare momentum and gradient variables, and construct the updates that # # theano will perform on the network parameters. # ############################################################################ opt_params = NET.proto_params if sgd_params.has_key('top_only'): if sgd_params['top_only']: opt_params = NET.class_params NET_grads = [] for param in opt_params: NET_grads.append(T.grad(train_cost, param)) NET_moms = [] for param in opt_params: NET_moms.append(theano.shared(np.zeros( \ param.get_value(borrow=True).shape, dtype=theano.config.floatX))) # compute momentum for the current epoch mom = ifelse(epoch < 500, 0.5*(1. - epoch/500.) + 0.99*(epoch/500.), 0.99) # use a "smoothed" learning rate, to ease into optimization gentle_rate = ifelse(epoch < 5, ((epoch / 5.) * learning_rate), learning_rate) # update the step direction using a momentus update dev_updates = OrderedDict() for i in range(len(opt_params)): dev_updates[NET_moms[i]] = mom * NET_moms[i] + (1. - mom) * NET_grads[i] # ... and take a step along that direction for i in range(len(opt_params)): param = opt_params[i] dev_param = param - (gentle_rate * dev_updates[NET_moms[i]]) # clip the updated param to bound its norm (where applicable) if (NET.clip_params.has_key(param) and \ (NET.clip_params[param] == 1)): dev_norms = T.sum(dev_param**2, axis=1, keepdims=1) dev_scale = T.clip(T.sqrt(wt_norm_bound / dev_norms), 0., 1.) dev_updates[param] = dev_param * dev_scale else: dev_updates[param] = dev_param # compile theano functions for training. these return the training cost # and update the model parameters. train_dev = theano.function(inputs=[epoch, su_idx, un_idx], outputs=tr_outputs, \ updates=dev_updates, \ givens={ \ x: T.concatenate([Xtr_su[su_bidx[su_idx,0]:su_bidx[su_idx,1],:], \ Xtr_un[un_bidx[un_idx,0]:un_bidx[un_idx,1],:]]), y: T.concatenate([Ytr_su[su_bidx[su_idx,0]:su_bidx[su_idx,1]], \ Ytr_un[un_bidx[un_idx,0]:un_bidx[un_idx,1]]])}) # theano function to decay the learning rate, this is separate from the # training function because we only want to do this once each epoch instead # of after each minibatch. set_learning_rate = theano.function(inputs=[], outputs=learning_rate, \ updates={learning_rate: learning_rate * learning_rate_decay}) ############### # train model # ############### print '... training' validation_error = 100. test_error = 100. min_validation_error = 100. min_test_error = 100. epoch_counter = 0 start_time = time.clock() results_file = open(txt_file_name, 'wb') results_file.write("ensemble description: ") results_file.write(" **TODO: Write code for this.**\n") results_file.flush() e_time = time.clock() su_index = 0 un_index = 0 # get array of epoch metrics (on a single minibatch) train_metrics = train_dev(0, 0, 0) validation_metrics = validate_model(0) test_metrics = test_model(0) # compute metrics on testing set while epoch_counter < n_epochs: ###################################################### # process some number of minibatches for this epoch. # ###################################################### epoch_counter = epoch_counter + 1 train_metrics = [0. for v in train_metrics] for b_idx in xrange(tr_batches): # compute update for some this minibatch batch_metrics = train_dev(epoch_counter, su_index, un_index) train_metrics = [(em + bm) for (em, bm) in zip(train_metrics, batch_metrics)] su_index = (su_index + 1) if ((su_index + 1) < su_batches) else 0 un_index = (un_index + 1) if ((un_index + 1) < un_batches) else 0 # Compute 'averaged' values over the minibatches train_metrics = [(float(v) / tr_batches) for v in train_metrics] # update the learning rate new_learning_rate = set_learning_rate() ###################################################### # validation, testing, and general diagnostic stuff. # ###################################################### # compute metrics on validation set validation_metrics = [0. for v in validation_metrics] for b_idx in xrange(va_batches): batch_metrics = validate_model(b_idx) validation_metrics = [(em + bm) for (em, bm) in zip(validation_metrics, batch_metrics)] # Compute 'averaged' values over the minibatches validation_error = 100 * (float(validation_metrics[0]) / va_samples) validation_loss = float(validation_metrics[1]) / va_batches # compute test error if new best validation error was found tag = " " # compute metrics on testing set test_metrics = [0. for v in test_metrics] for b_idx in xrange(te_batches): batch_metrics = test_model(b_idx) test_metrics = [(em + bm) for (em, bm) in zip(test_metrics, batch_metrics)] # Compute 'averaged' values over the minibatches test_error = 100 * (float(test_metrics[0]) / te_samples) test_loss = float(test_metrics[1]) / te_batches if (validation_error < min_validation_error): min_validation_error = validation_error min_test_error = test_error tag = ", test={0:.2f}".format(test_error) results_file.write("{0:.2f} {1:.2f} {2:.2f} {3:.4f} {4:.4f} {5:.4f}\n".format( \ train_metrics[2], validation_error, test_error, train_metrics[1], \ validation_loss, test_loss)) results_file.flush() # report and save progress. print "epoch {0:d}: t_cost={1:.2f}, t_loss={2:.4f}, t_ear={3:.4f}, t_act={6:.4f}, valid={4:.2f}{5}".format( \ epoch_counter, train_metrics[0], train_metrics[1], train_metrics[2], \ validation_error, tag, train_metrics[3]) print "--time: {0:.4f}".format((time.clock() - e_time)) e_time = time.clock() # save first layer weights to an image locally utils.visualize(NET, 0, 0, img_file_name) print("optimization complete. best validation error {0:.4f}, with test error {1:.4f}".format( \ (min_validation_error), (min_test_error)))
def main(_): pp.pprint(flags.FLAGS.__flags) if FLAGS.input_width is None: FLAGS.input_width = FLAGS.input_height if FLAGS.output_width is None: FLAGS.output_width = FLAGS.output_height if not os.path.exists(FLAGS.checkpoint_dir): os.makedirs(FLAGS.checkpoint_dir) if not os.path.exists(FLAGS.sample_dir): os.makedirs(FLAGS.sample_dir) #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) run_config = tf.ConfigProto() run_config.gpu_options.allow_growth=True with tf.Session(config=run_config) as sess: if FLAGS.dataset == 'mnist': dcgan = DCGAN( sess, input_width=FLAGS.input_width, input_height=FLAGS.input_height, output_width=FLAGS.output_width, output_height=FLAGS.output_height, batch_size=FLAGS.batch_size, sample_num=FLAGS.batch_size, y_dim=10, c_dim=1, dataset_name=FLAGS.dataset, input_fname_pattern=FLAGS.input_fname_pattern, is_crop=FLAGS.is_crop, checkpoint_dir=FLAGS.checkpoint_dir, sample_dir=FLAGS.sample_dir) else: dcgan = DCGAN( sess, input_width=FLAGS.input_width, input_height=FLAGS.input_height, output_width=FLAGS.output_width, output_height=FLAGS.output_height, batch_size=FLAGS.batch_size, sample_num=FLAGS.batch_size, c_dim=FLAGS.c_dim, dataset_name=FLAGS.dataset, input_fname_pattern=FLAGS.input_fname_pattern, is_crop=FLAGS.is_crop, checkpoint_dir=FLAGS.checkpoint_dir, sample_dir=FLAGS.sample_dir) show_all_variables() if FLAGS.is_train: dcgan.train(FLAGS) else: if not dcgan.load(FLAGS.checkpoint_dir): raise Exception("[!] Train a model first, then run test mode") # to_json("./web/js/layers.js", [dcgan.h0_w, dcgan.h0_b, dcgan.g_bn0], # [dcgan.h1_w, dcgan.h1_b, dcgan.g_bn1], # [dcgan.h2_w, dcgan.h2_b, dcgan.g_bn2], # [dcgan.h3_w, dcgan.h3_b, dcgan.g_bn3], # [dcgan.h4_w, dcgan.h4_b, None]) # Below is codes for visualization OPTION = 1 visualize(sess, dcgan, FLAGS, OPTION)
#################################################### machine_steering = [] print('performing inference...') time_start = time.time() #for frame_id in range(frame_count): # ret, img = cap.read() # assert ret # ## you can modify here based on your model # img = img_pre_process(img) # img = img[None,:,:,:] # deg = float(model.predict(img, batch_size=1)) # machine_steering.append(deg) # this line goes wrong, instead with below code ############ New codes added by student ############ machine_steering = model.predict(imgs_test, batch_size=128, verbose=0) #################################################### fps = frame_count / (time.time() - time_start) print('completed inference, total frames: {}, average fps: {} Hz'.format(frame_count, round(fps, 1))) print('performing visualization...') utils.visualize(epoch_id, machine_steering, params.out_dir, verbose=True, frame_count_limit=None)
from scipy import io ############# FILE STUFF ############# trainFileMNIST = "./mnist_data/images.mat" trainMatrix = io.loadmat(trainFileMNIST) # Dictionary ############# GET DATA ############# print 20 * "#", "Getting Data", 20 * "#" imageData = np.array(trainMatrix['images']) imageData = np.rollaxis(imageData, 2, 0) # move the index axis to be the first dataShape = np.shape(imageData) print "Image Data Shape", dataShape imageDataFlat = [] for elem in imageData: imageDataFlat.append(elem.flatten()) dataShape = np.shape(imageDataFlat) print "Image Data Flat Shape", dataShape num_clusters = [5, 10, 20] for cluster in num_clusters: print 20 * "#", "Num Clusters:", cluster, 20 * "#" KM = KMeans(cluster, max_iter=10) KM.fit(imageDataFlat) visualize(KM.cluster_centers_, cluster)
plt.grid() plt.legend(loc='best', fancybox=True, framealpha=0.5) plt.xlabel('time(10ms)') plt.ylabel('Value') plt.show() return data if __name__== '__main__': ''' matchs=filesinroot(path,"multispeed",0) multispeed_data=construct(matchs) multispeed_data=select(multispeed_data) #multispeed_data=add_simu_data(multispeed_data,multispeed_data.shape[0]/2) visualize(multispeed_data) savefile(multispeed_data,parent_path+'/dataset/multispeed_dataset'+tstr+'.pkl.gz') ''' ''' matchs=filesinroot(path,"test",0) data=dg.construct(matchs) data=dg.select(data) data=dg.add_simu_data(data,data.shape[0]/2) data=np.vstack((multispeed_data,data)) dg.visualize(data) savefile(multispeed_data,parent_path+'/dataset/dataset'+tstr+'.pkl.gz') ''' #test decay mode data=test_time_decay(path='/Users/subercui/Git/BaikeBalance/New data/nn_test/MLP_error.txt') utils.visualize(data)
def run(dry_run=False): trn_iters = 1000 if not dry_run else 1 data = MnistDataset(batch_size=2000, testset_size=1000, normalize=False) stiff_start, stiff_end, stiff_decay = (0.5, 0.05, 0.90) stiff_update = lambda s: s * stiff_decay + (1 - stiff_decay) * stiff_end input_dim = data.size('trn')[0][0] class Model(ECA): def structure(self): if dry_run: self.U = Input('U', input_dim) self.X = Layer('X', 3, self.U, rect, 1.0) else: self.U = Input('U', input_dim) self.X1 = Layer('X1', 100, self.U, lambda x: rect(x), min_tau=0., stiffx=1.0) mdl = Model() print 'Training for', trn_iters, 'iterations' d = data.get('trn') k = data.samples('trn') stiff = stiff_start weights = [] try: rng = np.random.RandomState(seed=0) trn_sig = mdl.new_signals(k) test_k = 25 test_sig = mdl.new_signals(test_k) test_data = data.get('val').samples[:, :test_k] guessed = test_data.copy() RECONSTRUCT_HALF = False if RECONSTRUCT_HALF: guessed[:784//2] = np.nan else: mask = rng.binomial(n=1, p=0.1, size=(784, test_k)) guessed = np.where(mask == 1, guessed, np.nan) for i in range(1, trn_iters + 1): t = time.time() # Training trn_sig.propagate(d.samples, None) trn_sig.adapt_layers(stiff) stiff = stiff_update(stiff) # Progress prints if (i % 20 == 0): i_str = "I %4d:" % (i) t_str = 't: %.2f s' % (time.time() - t) t = time.time() stiff_str = "stiff: %5.3f" % stiff tostr = lambda t: "{" + ", ".join(["%s: %6.2f" % (n, v) for (n, v) in t]) + "}" var_str = " logvar:" + tostr(trn_sig.variance()) a_str = " avg: " + tostr(trn_sig.avg_levels()) phi_norms = mdl.phi_norms() phi_larg_str = " |phinL|: " + tostr(map(lambda a: (a[0], np.sum(a[1] > 1.1)), phi_norms)) phi_ones_str = " |phin1|: " + tostr(map(lambda a: (a[0], np.sum(np.isclose(a[1], 1.0, atol=0.1))), phi_norms)) phi_zero_str = " |phin0|: " + tostr(map(lambda a: (a[0], np.sum(np.isclose(a[1], 0.0, atol=0.5))), phi_norms)) phi_str = " |phi|: " + tostr(map(lambda a: (a[0], np.average(a[1])), phi_norms)) E_str = " E: " + tostr(trn_sig.energy()) u_err = ' uerr: %.5f' % trn_sig.u_err(d.samples) U_sqr = ' Usqr: %.5f' % np.average(np.square(trn_sig.U.var.get_value())) impute(data, test_k, mdl, test_sig, guessed, test_data, sample=False) print i_str, stiff_str, t_str, E_str, phi_ones_str, u_err, U_sqr #print var_str, a_str, phi_str except KeyboardInterrupt: pass try: # Try to interpret u_est as pdf and sample from it while True and not dry_run: guessed = impute(data, test_k, mdl, test_sig, guessed, test_data, sample=True) except KeyboardInterrupt: pass try: if not dry_run and False: visualize(weights[0]) except KeyboardInterrupt: pass
def test_mlp( rng, mlp_params, sgd_params, datasets): """ Datasets should be a four-tuple, in which the first item is a matrix/vector pair of inputs/labels for training, the second item is a matrix of unlabeled inputs for training, the third item is a matrix/vector pair of inputs/labels for validation, and the fourth is a matrix/vector pair of inputs/labels for testing. """ initial_learning_rate = sgd_params['start_rate'] learning_rate_decay = sgd_params['decay_rate'] n_epochs = sgd_params['epochs'] batch_size = sgd_params['batch_size'] mlp_type = sgd_params['mlp_type'] wt_norm_bound = sgd_params['wt_norm_bound'] result_tag = sgd_params['result_tag'] txt_file_name = "results_{0}.txt".format(result_tag) img_file_name = "weights_{0}.png".format(result_tag) # Get supervised and unsupervised portions of training data, and create # arrays of start/end indices for easy minibatch slicing. (Xtr_su, Ytr_su) = (datasets[0][0], T.cast(datasets[0][1], 'int32')) (Xtr_un, Ytr_un) = (datasets[1][0], T.cast(datasets[1][1], 'int32')) su_samples = Xtr_su.get_value(borrow=True).shape[0] un_samples = Xtr_un.get_value(borrow=True).shape[0] tr_batches = 250 mlp_params['ss_ratio'] = float(su_samples) / (su_samples + un_samples) su_bsize = batch_size / 2 un_bsize = batch_size - su_bsize su_batches = int(np.ceil(float(su_samples) / su_bsize)) un_batches = int(np.ceil(float(un_samples) / un_bsize)) su_bidx = [[i*su_bsize, min(su_samples, (i+1)*su_bsize)] for i in range(su_batches)] un_bidx = [[i*un_bsize, min(un_samples, (i+1)*un_bsize)] for i in range(un_batches)] su_bidx = theano.shared(value=np.asarray(su_bidx, dtype=theano.config.floatX)) un_bidx = theano.shared(value=np.asarray(un_bidx, dtype=theano.config.floatX)) su_bidx = T.cast(su_bidx, 'int32') un_bidx = T.cast(un_bidx, 'int32') # Get the validation and testing sets and create arrays of start/end # indices for easy minibatch slicing Xva, Yva = (datasets[2][0], T.cast(datasets[2][1], 'int32')) Xte, Yte = (datasets[3][0], T.cast(datasets[3][1], 'int32')) va_samples = Xva.get_value(borrow=True).shape[0] te_samples = Xte.get_value(borrow=True).shape[0] va_batches = int(np.ceil(va_samples / 100.0)) te_batches = int(np.ceil(te_samples / 100.0)) va_bidx = [[i*100, min(va_samples, (i+1)*100)] for i in range(va_batches)] te_bidx = [[i*100, min(te_samples, (i+1)*100)] for i in range(te_batches)] va_bidx = theano.shared(value=np.asarray(va_bidx, dtype=theano.config.floatX)) te_bidx = theano.shared(value=np.asarray(te_bidx, dtype=theano.config.floatX)) va_bidx = T.cast(va_bidx, 'int32') te_bidx = T.cast(te_bidx, 'int32') print "Dataset info:" print " supervised samples: {0:d}, unsupervised samples: {1:d}".format( \ su_samples, un_samples) print " samples/minibatch: {0:d}, minibatches/epoch: {1:d}".format( \ (su_bsize + un_bsize), tr_batches) print " validation samples: {0:d}, testing samples: {1:d}".format( \ va_samples, te_samples) ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch epoch = T.scalar() # epoch counter su_idx = T.lscalar() # symbolic batch index into supervised samples un_idx = T.lscalar() # symbolic batch index into unsupervised samples x = T.matrix('x') # some observations have labels y = T.ivector('y') # the labels are presented as integer categories learning_rate = theano.shared(np.asarray(initial_learning_rate, dtype=theano.config.floatX)) # construct the MLP class NET = SS_DEV_MLP(rng=rng, input=x, params=mlp_params) # Build the expressions for the cost functions. If training without SDE or # DEV regularization, the DEV loss/cost will be used, but the weights for # DEV regularization on each layer will be set to 0 before training. sde_cost = NET.sde_class_loss(y) + NET.sde_reg_loss dev_cost = NET.dev_class_loss(y) + NET.dev_dev_loss(y) + NET.dev_reg_loss NET_metrics = [NET.raw_errors(y), NET.raw_class_loss(y), \ NET.dev_dev_loss(y), NET.raw_reg_loss] ############################################################################ # Compile testing and validation models. These models are evaluated on # # batches of the same size as used in training. Trying to jam a large # # validation or test set through the net may take too much memory. # ############################################################################ test_model = theano.function(inputs=[index], outputs=NET_metrics, givens={ x: Xte[te_bidx[index,0]:te_bidx[index,1],:], y: Yte[te_bidx[index,0]:te_bidx[index,1]]}) validate_model = theano.function(inputs=[index], outputs=NET_metrics, givens={ x: Xva[va_bidx[index,0]:va_bidx[index,1],:], y: Yva[va_bidx[index,0]:va_bidx[index,1]]}) ############################################################################ # Prepare momentum and gradient variables, and construct the updates that # # Theano will perform on the network parameters. # ############################################################################ sde_grads = [] dev_grads = [] for param in NET.params: sde_grads.append(T.grad(sde_cost, param)) dev_grads.append(T.grad(dev_cost, param)) sde_moms = [] dev_moms = [] for param in NET.params: sde_moms.append(theano.shared(np.zeros( \ param.get_value(borrow=True).shape, dtype=theano.config.floatX))) dev_moms.append(theano.shared(np.zeros( \ param.get_value(borrow=True).shape, dtype=theano.config.floatX))) # Compute momentum for the current epoch mom = ifelse(epoch < 500, 0.5*(1. - epoch/500.) + 0.99*(epoch/500.), 0.99) # Use a "smoothed" learning rate, to ease into optimization gentle_rate = ifelse(epoch < 5, (epoch / 5.0) * learning_rate, learning_rate) # Update the step direction using a momentus update sde_updates = OrderedDict() dev_updates = OrderedDict() for i in range(len(NET.params)): sde_updates[sde_moms[i]] = mom * sde_moms[i] + (1. - mom) * sde_grads[i] dev_updates[dev_moms[i]] = mom * dev_moms[i] + (1. - mom) * dev_grads[i] # ... and take a step along that direction for i in range(len(NET.params)): param = NET.params[i] sde_param = param - (gentle_rate * sde_updates[sde_moms[i]]) dev_param = param - (gentle_rate * dev_updates[dev_moms[i]]) # Clip the updated param to bound its norm (where applicable) if (NET.clip_params.has_key(param) and \ (NET.clip_params[param] == 1)): sde_norms = T.sum(sde_param**2, axis=1).reshape((sde_param.shape[0],1)) sde_scale = T.clip(T.sqrt(wt_norm_bound / sde_norms), 0., 1.) sde_updates[param] = sde_param * sde_scale dev_norms = T.sum(dev_param**2, axis=1).reshape((dev_param.shape[0],1)) dev_scale = T.clip(T.sqrt(wt_norm_bound / dev_norms), 0., 1.) dev_updates[param] = dev_param * dev_scale else: sde_updates[param] = sde_param dev_updates[param] = dev_param # Compile theano functions for training. These return the training cost # and update the model parameters. train_sde = theano.function(inputs=[epoch, su_idx, un_idx], outputs=NET_metrics, updates=sde_updates, givens={ x: T.concatenate([Xtr_su[su_bidx[su_idx,0]:su_bidx[su_idx,1],:], \ Xtr_un[un_bidx[un_idx,0]:un_bidx[un_idx,1],:]]), y: T.concatenate([Ytr_su[su_bidx[su_idx,0]:su_bidx[su_idx,1]], \ Ytr_un[un_bidx[un_idx,0]:un_bidx[un_idx,1]]])}) train_dev = theano.function(inputs=[epoch, su_idx, un_idx], outputs=NET_metrics, updates=dev_updates, givens={ x: T.concatenate([Xtr_su[su_bidx[su_idx,0]:su_bidx[su_idx,1],:], \ Xtr_un[un_bidx[un_idx,0]:un_bidx[un_idx,1],:]]), y: T.concatenate([Ytr_su[su_bidx[su_idx,0]:su_bidx[su_idx,1]], \ Ytr_un[un_bidx[un_idx,0]:un_bidx[un_idx,1]]])}) # Theano function to decay the learning rate, this is separate from the # training function because we only want to do this once each epoch instead # of after each minibatch. set_learning_rate = theano.function(inputs=[], outputs=learning_rate, updates={learning_rate: learning_rate * learning_rate_decay}) ############### # TRAIN MODEL # ############### print '... training' validation_errors = 1e6 test_errors = 1e6 min_validation_errors = 1e6 min_test_errors = 1e6 epoch_counter = 0 start_time = time.clock() results_file = open(txt_file_name, 'wb') results_file.write("mlp_type: {0}\n".format(mlp_type)) results_file.write("lam_l2a: {0:.4f}\n".format(mlp_params['lam_l2a'])) results_file.write("dev_types: {0}\n".format(str(mlp_params['dev_types']))) results_file.write("dev_lams: {0}\n".format(str(mlp_params['dev_lams']))) results_file.flush() e_time = time.clock() su_index = 0 un_index = 0 # Get array of epoch metrics (on a single minibatch) epoch_metrics = train_sde(1, su_index, un_index) # Compute metrics on validation set validation_metrics = [validate_model(i) for i in xrange(va_batches)] validation_errors = np.sum([m[0] for m in validation_metrics]) # Compute metrics on testing set test_metrics = [test_model(i) for i in xrange(te_batches)] test_errors = np.sum([m[0] for m in test_metrics]) while epoch_counter < n_epochs: ###################################################### # Process some number of minibatches for this epoch. # ###################################################### epoch_counter = epoch_counter + 1 epoch_metrics = [0.0 for v in epoch_metrics] for minibatch_index in xrange(tr_batches): # Compute update for some joint supervised/unsupervised minibatch if ((epoch_counter <= 0) or (mlp_type == 'sde')): batch_metrics = train_sde(epoch_counter, su_index, un_index) else: batch_metrics = train_dev(epoch_counter, su_index, un_index) epoch_metrics = [(em + bm) for (em, bm) in zip(epoch_metrics, batch_metrics)] su_index = (su_index + 1) if ((su_index + 1) < su_batches) else 0 un_index = (un_index + 1) if ((un_index + 1) < un_batches) else 0 # Update the learning rate new_learning_rate = set_learning_rate() ###################################################### # Validation, testing, and general diagnostic stuff. # ###################################################### # Compute metrics on validation set validation_metrics = [validate_model(i) for i in xrange(va_batches)] validation_errors = np.sum([m[0] for m in validation_metrics]) # Compute test error if new best validation error was found tag = " " #if ((validation_errors < min_validation_errors) or ((epoch_counter % 10) == 0)): if ('I' == 'I'): # Compute metrics on testing set test_metrics = [test_model(i) for i in xrange(te_batches)] test_errors = np.sum([m[0] for m in test_metrics]) tag = ", test={0:d}".format(test_errors) if (validation_errors < min_validation_errors): min_validation_errors = validation_errors min_test_errors = test_errors results_file.write("{0:d} {1:d}\n".format(validation_errors, test_errors)) results_file.flush() # Report and save progress. epoch_metrics[0] = float(epoch_metrics[0]) / (tr_batches * su_bsize) epoch_metrics[1:] = [(float(v) / tr_batches) for v in epoch_metrics[1:]] print "epoch {0:d}: t_err={1:.2f}, t_loss={2:.4f}, t_dev={3:.4f}, t_reg={4:.4f}, valid={5:d}{6}".format( \ epoch_counter, epoch_metrics[0], epoch_metrics[1], epoch_metrics[2], epoch_metrics[3], \ int(validation_errors), tag) print "--time: {0:.4f}".format((time.clock() - e_time)) e_time = time.clock() # Save first layer weights to an image locally utils.visualize(NET, 0, img_file_name) print("Optimization complete. Best validation score of {0:.4f}, with test score {1:.4f}".format( \ (min_validation_errors / 100.0), (min_test_errors / 100.0))) results_file.write("{0:d} {1:d}\n".format(min_validation_errors, min_test_errors)) results_file.flush()
import sys import utils from dbscan import * if __name__ == "__main__": filename = sys.argv[1] points = utils.read_input(filename) dbscan(points, 0.5) # it will labeled cluster of elements of points utils.visualize(points)