def main(): # Disable output buffering #sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) learning_rate = 0.001 X_train, y_train = load('med') print 'Data loaded' net = BidirectionalLSTMNet() model = net.build_model() checkpointer = ModelCheckpoint(filepath='./experiments/lstm_test_med.hdf5', verbose=1, save_best_only=True) earlystopper = EarlyStopping(monitor='val_loss', patience=3, verbose=1) rmsprop = RMSprop(lr = learning_rate) model.compile(loss='binary_crossentropy', optimizer=rmsprop) model.fit(X_train, y_train, batch_size=100, class_weight='auto', nb_epoch=10, shuffle=True, validation_split=0.2, callbacks=[checkpointer, earlystopper])
def load_patient(filepath, patient_index): # file handle f = data_utils.load(filepath) # get spectra and labels spectra = data_utils.get_spectra_metadata(f=f, patient_index=patient_index) raw_labels, raw_label_times = data_utils.get_label_metadata(f=f, patient_index=patient_index) # extract measurement and corresponding times measurements = spectra['time_series'] measurement_times = spectra['measurement_times'] mz = spectra['mz_vals'] # focus on labeled measurements max_time = min(max(raw_label_times), max(measurement_times)) min_time = max(min(raw_label_times), min(measurement_times)) # cut measurements and times measurement_idx_to_keep = np.logical_and(measurement_times <= max_time, measurement_times >= min_time) raw_label_idx_to_keep = np.logical_and(raw_label_times <= max_time, raw_label_times >= min_time) measurements = measurements[measurement_idx_to_keep, :] measurement_times = measurement_times[measurement_idx_to_keep] # cut labels and times raw_labels = raw_labels[raw_label_idx_to_keep] raw_label_times = raw_label_times[raw_label_idx_to_keep] # label the measurements num_measurements = measurement_times.shape[0] labels = np.zeros(num_measurements) temporal_discrepancy = [] for i in range(num_measurements): mtime = measurement_times[i] ltime, idx = find_nearest(raw_label_times, mtime) labels[i] = raw_labels[idx] temporal_discrepancy.append(np.abs(ltime - mtime)) # some output temporal_discrepancy = np.array(temporal_discrepancy) print("Average temporal discrepancy: ", np.mean(temporal_discrepancy)) print("STD of temporal discrepancy: ", np.std(temporal_discrepancy)) return measurements, labels, mz
val_split = val_y[tr_idx], val_y[val_idx] tr_gen, val_gen, test_gen, dummy_test_gen = get_argment_generator( train_split, val_split, train, test, BATCH_SIZE) h = model.fit_generator(tr_gen, epochs=EPOCH, steps_per_epoch=len(tr_x) // BATCH_SIZE, validation_data=val_gen, validation_steps=len(val_x) // BATCH_SIZE, callbacks=callbacks) val_pred = model.predict(dummy_test_gen) test_pred = model.predict(test_gen) val_score = accuracy_score(np.argmax(val_split[1], 1), val_pred) print(f"fold_{i} val accuracy : {val_score}") pred.append(test_pred) mean_pred = np.mean(pred, axis=0) pred_y = np.argmax(mean_pred, 1) submission = pd.Series(pred_y, name='label') submission.to_csv(os.path.join(PATH, f"submission_fold{N_FOLD}.csv"), header=True, index_label='id') train, test, y = load() train_split, val_split = data_split(train, y, N_VAL) tr_gen, val_gen, test_gen, dummy_test_gen = get_argment_generator( train_split, val_split, train, test, BATCH_SIZE) tr_x, tr_y = train_split val_x, val_y = val_split
from matplotlib import pyplot from data_utils import load from data_stats import CDataStatsNoPreprocess, CDataStatsTrimmed, CDataStatsFrame x, y = load('data/mnist.csv') stats = [ CDataStatsNoPreprocess(), CDataStatsTrimmed(30), CDataStatsFrame(28, 28, 5) ] names = ['original', 'trimmed', 'framed'] output = '' for i in range(3): digit = x[i] pyplot.figure(figsize=(12, 12)) pyplot.subplot(2, 2, 1) pyplot.imshow(digit.reshape(28, 28)) pyplot.title('The ' + str(i + 1) + '. digit') for j in range(len(stats)): pyplot.subplot(2, 2, j + 2) stat = stats[j] stat.histogram(digit) pyplot.title('Histogram, ' + names[j]) output += 'The mean of the ' + str( i + 1) + '. digit, ' + names[j] + ': ' + str( stat.mean(digit)) + '\n' pyplot.savefig('digit' + str(i + 1) + '.pdf', dpi=1000)
type=int, default=4, help="sample negative items for training") parser.add_argument("--test_num_ng", type=int, default=99, help="sample part of negative items for testing") parser.add_argument("--out", default=True, help="save model or not") parser.add_argument("--gpu", default="0", help="gpu card ID") args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu cudnn.benchmark = True # PREPARE DATASET train_data, test_data, user_num, item_num, train_mat = data_utils.load() # construct the train and test datasets train_dataset = data_utils.NCFData(train_data, item_num, train_mat, args.num_ng, True) test_dataset = data_utils.NCFData(test_data, item_num, train_mat, 0, False) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4) test_loader = data.DataLoader(test_dataset, batch_size=args.test_num_ng + 1, shuffle=False, num_workers=0) # CREATE MODEL
deconvolved = rl_deconv_all(image_masked_array_bilinear, psf_array_linear, iterations=20, lbd=0.1) # Figures io.imsave('original.png', scale(downscaled)) io.imsave('deconvolved.png', scale(deconvolved)) plt.figure() fig, ax = plt.subplots() ax1 = plt.subplot(2, 2, 1) ax2 = plt.subplot(2, 2, 3) ax3 = plt.subplot(2, 2, 2) ax4 = plt.subplot(2, 2, 4) im1 = ax1.imshow(downscaled) ax1.set_title('Original') im2 = ax2.imshow(output_filtered_scaled[0][:, :], vmin=1, vmax=6) ax2.set_title('Detected FWMH X (px)') im3 = ax3.imshow(deconvolved) ax3.set_title('Deconvolved RL TV') im4 = ax4.imshow(output_filtered_scaled[1][:, :], vmin=1, vmax=6) ax4.set_title('Detected FWMH Y (px)') plt.show() if __name__ == "__main__": model = load('models/model_26.pt') image = io.imread('data/fly.png') deconvolution_demo(image)
import elastic_utils import elastic_settings import data_utils if __name__ == '__main__': result_count = 5 result_explain = True # build index with default settings elastic_utils.reindex(elastic_settings.english_analyzer(), data_utils.load()) print("Search title, overview, cast, director:") query = elastic_utils.build_multi_match_query('patrick stewart', ['title', 'overview', 'cast.name', 'directors.name'], 'best_fields', result_count, result_explain) results = elastic_utils.execute_search(query) elastic_utils.print_result_table(results, result_count) #elastic_utils.print_explain_from_results(results) # elastic_utils.print_results(results) #elastic_utils.print_explanation(results, result_count) print("\nDown-boost director:") query = elastic_utils.build_multi_match_query('patrick stewart', ['title', 'overview', 'cast.name', 'directors.name^0.1'], 'best_fields', result_count, result_explain) results = elastic_utils.execute_search(query) elastic_utils.print_result_table(results, result_count) print("\nReindexing with bigrams")
def reindex(): elastic_utils.reindex(elastic_settings.english_analyzer(), data_utils.load())
compute_grid(output_filtered_scaled[0],downscaled) grid_z1 = load_grid(psf_array_linear.shape[0]) for i, current_psf in enumerate(psf_array_linear): log.info('Detected PSF {} with focus x {} y {}'.format(i, flattened_map[0][i],flattened_map[1][i])) image_masked_array_bilinear.append(np.multiply(grid_z1[i], downscaled)) deconvolved = rl_deconv_all(image_masked_array_bilinear, psf_array_linear, iterations=20, lbd=0.1) # Figures io.imsave('original.png', scale(downscaled)) io.imsave('deconvolved.png', scale(deconvolved)) fig, ax = plt.subplots() ax1 = plt.subplot(2, 2, 1) ax2 = plt.subplot(2, 2, 3) ax3 = plt.subplot(2, 2, 2) ax4 = plt.subplot(2, 2, 4) im1 = ax1.imshow(downscaled) ax1.set_title('Original') im2 = ax2.imshow(output_filtered_scaled[0][:,:], vmin=0.5, vmax=4) ax2.set_title('Detected FWMH X (px)') im3 = ax3.imshow(deconvolved) ax3.set_title('Deconvolved RL TV') im4 = ax4.imshow(output_filtered_scaled[1][:,:], vmin=0.5, vmax=4) ax4.set_title('Detected FWMH Y (px)') plt.show() if __name__ == "__main__": model = load('models/model_999.pt') image = io.imread('data/fly.png') deconvolution_demo(image)
def main(args): device = torch.device("cuda:0") # model hyperparameters dataset = args.dataset batch_size = args.batch_size hps = Hyperparameters(base_dim=args.base_dim, res_blocks=args.res_blocks, bottleneck=args.bottleneck, skip=args.skip, weight_norm=args.weight_norm, coupling_bn=args.coupling_bn, affine=args.affine) scale_reg = 5e-5 # L2 regularization strength # optimization hyperparameters lr = args.lr momentum = args.momentum decay = args.decay # prefix for images and checkpoints filename = 'bs%d_' % batch_size \ + 'normal_' \ + 'bd%d_' % hps.base_dim \ + 'rb%d_' % hps.res_blocks \ + 'bn%d_' % hps.bottleneck \ + 'sk%d_' % hps.skip \ + 'wn%d_' % hps.weight_norm \ + 'cb%d_' % hps.coupling_bn \ + 'af%d' % hps.affine \ # load dataset train_split, val_split, data_info = data_utils.load(dataset) train_loader = torch.utils.data.DataLoader(train_split, batch_size=batch_size, shuffle=True, num_workers=2) val_loader = torch.utils.data.DataLoader(val_split, batch_size=batch_size, shuffle=False, num_workers=2) prior = distributions.Normal( # isotropic standard normal distribution torch.tensor(0.).to(device), torch.tensor(1.).to(device)) flow = realnvp.RealNVP(datainfo=data_info, prior=prior, hps=hps).to(device) optimizer = optim.Adamax(flow.parameters(), lr=lr, betas=(momentum, decay), eps=1e-7) epoch = 0 running_loss = 0. running_log_ll = 0. optimal_log_ll = float('-inf') early_stop = 0 image_size = data_info.channel * data_info.size**2 # full image dimension while epoch < args.max_epoch: epoch += 1 print('Epoch %d:' % epoch) flow.train() for batch_idx, data in enumerate(train_loader, 1): optimizer.zero_grad() x, _ = data # log-determinant of Jacobian from the logit transform x, log_det = data_utils.logit_transform(x) x = x.to(device) log_det = log_det.to(device) # log-likelihood of input minibatch log_ll, weight_scale = flow(x) log_ll = (log_ll + log_det).mean() # add L2 regularization on scaling factors loss = -log_ll + scale_reg * weight_scale running_loss += loss.item() running_log_ll += log_ll.item() loss.backward() optimizer.step() if batch_idx % 10 == 0: bit_per_dim = (-log_ll.item() + np.log(256.) * image_size) \ / (image_size * np.log(2.)) print('[%d/%d]\tloss: %.3f\tlog-ll: %.3f\tbits/dim: %.3f' % \ (batch_idx*batch_size, len(train_loader.dataset), loss.item(), log_ll.item(), bit_per_dim)) mean_loss = running_loss / batch_idx mean_log_ll = running_log_ll / batch_idx mean_bit_per_dim = (-mean_log_ll + np.log(256.) * image_size) \ / (image_size * np.log(2.)) print('===> Average train loss: %.3f' % mean_loss) print('===> Average train log-likelihood: %.3f' % mean_log_ll) print('===> Average train bit_per_dim: %.3f' % mean_bit_per_dim) running_loss = 0. running_log_ll = 0. flow.eval() with torch.no_grad(): for batch_idx, data in enumerate(val_loader, 1): x, _ = data x, log_det = data_utils.logit_transform(x) x = x.to(device) log_det = log_det.to(device) # log-likelihood of input minibatch log_ll, weight_scale = flow(x) log_ll = (log_ll + log_det).mean() # add L2 regularization on scaling factors loss = -log_ll + scale_reg * weight_scale running_loss += loss.item() running_log_ll += log_ll.item() mean_loss = running_loss / batch_idx mean_log_ll = running_log_ll / batch_idx mean_bit_per_dim = (-mean_log_ll + np.log(256.) * image_size) \ / (image_size * np.log(2.)) print('===> Average validation loss: %.3f' % mean_loss) print('===> Average validation log-likelihood: %.3f' % mean_log_ll) print('===> Average validation bits/dim: %.3f' % mean_bit_per_dim) running_loss = 0. running_log_ll = 0. samples = flow.sample(args.sample_size) samples, _ = data_utils.logit_transform(samples, reverse=True) utils.save_image( utils.make_grid(samples), './samples/' + dataset + '/' + filename + '_ep%d.png' % epoch) if mean_log_ll > optimal_log_ll: early_stop = 0 optimal_log_ll = mean_log_ll torch.save(flow, './models/' + dataset + '/' + filename + '.model') print('[MODEL SAVED]') else: early_stop += 1 if early_stop >= 100: break print('--> Early stopping %d/100 (BEST validation log-likelihood: %.3f)' \ % (early_stop, optimal_log_ll)) print('Training finished at epoch %d.' % epoch)
batch_size = 100 num_train = 49000 num_val = 1000 num_test = 1000 dropout = 0.5 display_step = 10 reg = 0.1 drop = False learning_rate = 0.01 first_layer = 20 second_layer = 50 verbose = True test = True fsize = 5 Xtr, Ytr, Xte, Yte = load(path) i_placeholder = tf.placeholder(tf.float32, shape=[batch_size,32,32,3], name='images') l_placeholder = tf.placeholder(tf.float32, shape=(batch_size,20,),name='labels') drop_placeholder = tf.placeholder(tf.float32) def conv2d(x, W): return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') def max_pool_nxn(x, n): return tf.nn.max_pool(x, ksize=[1, n, n, 1], strides=[1, n, n, 1], padding='SAME') def one_hot(labels, num_classes=20): num_labels = labels.shape[0] index_offset = np.arange(num_labels) * num_classes
def train(self, check_point=None): self.device = 'cuda:0' if self.config.training.use_gpu else 'cpu' train_data, _ = data_utils.load(self.config.data.dataset) image_loader = data_utils.InfiniteLoader( dset.DataLoader(dataset=train_data, batch_size=self.config.training.batch_size, drop_last=True, shuffle=True, num_workers=2)) if check_point is not None: try: states = torch.load(check_point) except FileNotFoundError: print("Check point is not Found...\n") netD = Discriminator(self.config.data.channels).to(self.device) netG = Generator(self.config.data.channels).to(self.device) netD.load_state_dict(states[0]) netG.load_state_dict(states[1]) optD = self.set_optimizer(netD.parameters()) optG = self.set_optimizer(netG.parameters()) optD.load_state_dict(states[2]) optG.load_state_dict(states[3]) else: netD = Discriminator(self.config.data.channels).to(self.device) netG = Generator(self.config.data.channels).to(self.device) optD = self.set_optimizer(netD.parameters()) optG = self.set_optimizer(netG.parameters()) writer = SummaryWriter(log_dir=self.config.training.log_dir) one = torch.tensor(1., dtype=torch.float).to(self.device) mone = (one * -1).to(self.device) tbar = tqdm(range(self.config.training.max_iter)) for g_iter in tbar: for p in netD.parameters(): p.requires_grad = True d_loss_real = 0. d_loss_fake = 0. Wasserstein_D = 0. # netD optimization for d_iter in range(self.config.training.critic_iter): netD.zero_grad() real_images, _ = next(image_loader) real_images = real_images.to(self.device) z = torch.randn([ self.config.training.batch_size, self.config.model.hidden_dim, 1, 1 ]).to(self.device) d_loss_real = netD(real_images) d_loss_real = d_loss_real.mean() d_loss_real.backward(mone) fake_images = netG(z) d_loss_fake = netD(fake_images) d_loss_fake = d_loss_fake.mean() d_loss_fake.backward(one) gradient_penalty = self.calc_gradient_penalty( netD, real_images, fake_images) gradient_penalty.backward() d_loss = d_loss_fake - d_loss_real + gradient_penalty Wasserstein_D = d_loss_real - d_loss_fake optD.step() # netG optimization for p in netD.parameters(): p.requires_grad = False netG.zero_grad() z = torch.randn([ self.config.training.batch_size, self.config.model.hidden_dim, 1, 1 ]).to(self.device) fake_images = netG(z) g_loss = netD(fake_images) g_loss = g_loss.mean() g_loss.backward(mone) optG.step() if (g_iter + 1) % self.config.training.minitor_iter == 0: # Using Tensorboard to log training log = { 'wasserstein_distance': Wasserstein_D.item(), 'loss_D': d_loss.item(), 'loss_G': g_loss.item(), 'loss_D_real': d_loss_real.item(), 'loss_G_fake': d_loss_fake.item() } info = "[{}/{}], loss_D: {:.3f}, loss_G: {:.3f}, W_distance: {:.3f}".format( g_iter + 1, self.config.training.max_iter, d_loss.item(), -g_loss.item(), Wasserstein_D.item()) tbar.set_description(info) for key, value in log.items(): writer.add_scalar(key, value, g_iter + 1) if (g_iter + 1) % self.config.training.save_iter == 0: real_images = real_images.mul(0.5).add(0.5).cpu() z = torch.randn([ self.config.training.batch_size, self.config.model.hidden_dim, 1, 1 ]).to(self.device) samples = netG(z) samples = samples.mul(0.5).add(0.5).cpu() grid = utils.make_grid(samples) utils.save_image( grid, os.path.join(self.args.image_path, self.config.data.dataset, "iters_%d.png" % (g_iter + 1))) image_log = { 'real_image': real_images, 'generated_image': samples } for key, value in image_log.items(): writer.add_images(key, value, g_iter + 1) states = [ netD.state_dict(), netG.state_dict(), optD.state_dict(), optG.state_dict() ] torch.save( states, os.path.join(self.config.training.check_point, 'checkpoint_{}.pth'.format(g_iter + 1))) torch.save( states, os.path.join(self.config.training.check_point, 'checkpoint.pth'))
def reindex(name, settings): print("\nReindexing with settings: {}".format(name)) elastic_utils.reindex(elastic_settings.english_bigrams(), data_utils.load())