def main(args): # Import data mnist = input_data.read_data_sets(args.data_dir, one_hot=True) model = Model(layers=[ Dense(10, weight_initializer=weight_initializer, bias_initializer=bias_initializer, input_shape=(784, )) ], optimizer=GradientDescent(learning_rate=0.5), loss=SoftMaxCrossEntropyWithLogits()) # Train for _ in range(1000): batch_xs, batch_ys = mnist.train.next_batch(100) model.fit_batch(batch_xs, batch_ys) # Test trained model actual_labels = np.argmax(mnist.test.labels, 1) predictions = model.predict(mnist.test.images) predicted_labels = np.argmax(predictions, 1) accuracy = (actual_labels == predicted_labels).mean() print("Test accuracy: {}".format(accuracy))
def setup_train(self, model_file_path=None): self.model = Model(model_file_path) params = list(self.model.encoder.parameters()) + \ list(self.model.section_encoder.parameters()) + \ list(self.model.sentence_filterer.parameters()) + \ list(self.model.decoder.parameters()) + \ list(self.model.reduce_state.parameters()) + \ list(self.model.section_reduce_state.parameters()) initial_lr = config.lr_coverage if config.is_coverage else config.lr self.optimizer = AdagradCustom(params, lr=initial_lr, initial_accumulator_value=config.adagrad_init_acc) start_iter, start_loss = 0, 0 if model_file_path is not None: state = torch.load(model_file_path, map_location= lambda storage, location: storage) start_iter = state['iter'] start_loss = state['current_loss'] if not config.is_coverage and not config.is_sentence_filtering: self.optimizer.load_state_dict(state['optimizer']) if use_cuda: for state in self.optimizer.state.values(): for k, v in state.items(): if torch.is_tensor(v): state[k] = v.cuda() return start_iter, start_loss
def run_seq(config, evaluator): for i in range(0, 21): print(i) model = Model(config, evaluator.dataset, str(i)) # model = torch.nn.DataParallel(model) model.to(evaluator.device) evaluator.eval(model)
def __init__(self, model_file_path): model_name = os.path.basename(model_file_path) self._decode_dir = os.path.join(config.log_root, 'decode_%s' % (model_name)) self._rouge_ref_dir = os.path.join(self._decode_dir, 'rouge_ref') self._rouge_dec_dir = os.path.join(self._decode_dir, 'rouge_dec_dir') for p in [self._decode_dir, self._rouge_ref_dir, self._rouge_dec_dir]: if not os.path.exists(p): os.mkdir(p) self.vocab = Vocab(config.vocab_path, config.vocab_size) self.batcher = Batcher(config.decode_data_path, self.vocab, mode='decode', batch_size=config.beam_size, single_pass=True) time.sleep(15) self.model = Model(model_file_path, is_eval=True)
def __init__(self, model_file_path): self.vocab = Vocab(config.vocab_path, config.vocab_size) self.batcher = Batcher(config.eval_data_path, self.vocab, mode='eval', batch_size=config.batch_size, single_pass=True) time.sleep(15) model_name = os.path.basename(model_file_path) eval_dir = os.path.join(config.log_root, 'eval_%s' % (model_name)) if not os.path.exists(eval_dir): os.mkdir(eval_dir) self.summary_writer = tf.summary.FileWriter(eval_dir) self.model = Model(model_file_path, is_eval=True)
def optimaze_lstm(trial): model = Model( s_stage='ResNet', res_block_num=4, t_hidden_dim=trial.suggest_int('t_hidden_dim', 50, 500, 50), t_output_dim=trial.suggest_int('t_output_dim', 50, 500, 50), ) score = train(model, train_loader, test_loader, DEVICE) return score
def search_res_block_num(train_loader, test_loader): print('\n=== ResNet ===') scores = [] for i in range(1, 6): model = Model( s_stage='ResNet', res_block_num=i, ) best_auc = train(model, train_loader, test_loader, DEVICE) scores.append(best_auc) for i in range(5): print(i + 1, scores[i])
def train( net: Model, inputs: Tensor, targets: Tensor, epochs: int = 100000, iterator: DataIterator = DataLoader(), loss: Loss = MSE(), optim: Optimizer = SGD(), ) -> None: for epoch in range(epochs): epoch_loss = 0.0 for batch in iterator(inputs, targets): predicted = net(batch.inputs) epoch_loss += loss.loss(predicted, batch.targets) grad = loss.grad(predicted, batch.targets) net.backward(grad) optim.step(net) print(f"Epoch: {epoch} --> Loss: {epoch_loss}")
def optimaze_san(trial): block_num = trial.suggest_int('block_num', 1, 5) layer_size_hop = trial.suggest_int('layer_size_hop', 2, 5) kernel_size = trial.suggest_int('kernel_size', 3, 7, 2) layers = [3] kernels = [3] for i in range(1, block_num): layers.append(2 + i * layer_size_hop) kernels.append(kernel_size) model = Model( s_stage='SAN', san_layers=layers, san_kernels=kernels, ) score = train(model, train_loader, test_loader, DEVICE) return score
def main(args): # Import data mnist = input_data.read_data_sets(args.data_dir, one_hot=True) relu = ReLU() model = Model(layers=[ Conv2D(filter_size=(10, 10), input_shape=(28, 28, 1), stride=(1, 1), channels=32, activation=relu, padding='same', filter_initializer=weight_initializer, bias_initializer=bias_initializer), MaxPool2D(pool_size=(2, 2)), Conv2D(filter_size=(5, 5), stride=(1, 1), channels=16, activation=relu, padding='same', filter_initializer=weight_initializer, bias_initializer=bias_initializer), MaxPool2D(pool_size=(2, 2)), Flatten(), Dense(1024, weight_initializer=weight_initializer, bias_initializer=bias_initializer, activation=relu), Dense(10, weight_initializer=weight_initializer, bias_initializer=bias_initializer) ], optimizer=AdaGrad(learning_rate=0.001, epsilon=1e-8), loss=SoftMaxCrossEntropyWithLogits()) # Train for _ in range(200): batch_xs, batch_ys = mnist.train.next_batch(100) batch_xs = np.reshape(batch_xs, [-1, 28, 28, 1]) model.fit_batch(batch_xs, batch_ys) if args.verbose: print('Batch {} loss: {}'.format( model.batch_number, model.loss.compute(model.predict(batch_xs), batch_ys))) # Test trained model actual_labels = np.argmax(mnist.test.labels, 1) predictions = model.predict(np.reshape(mnist.test.images, [-1, 28, 28, 1])) predicted_labels = np.argmax(predictions, 1) accuracy = (actual_labels == predicted_labels).mean() print("Test accuracy: {}".format(accuracy))
from nn.model import Model from nn.layer import FC, Softmax, ReLU, Dropout from nn.optimizer import Adam, SGD from nn.loss import cross_entropy from nn.metrix import accuracy (X_train, y_train), (X_test, y_test) = load_mnist() X_train = X_train.reshape((X_train.shape[0], -1)) / 255 X_test = X_test.reshape((X_test.shape[0], -1)) / 255 transformer = MakeOneHot() y_train = transformer.fit_transform(y_train) y_test = transformer.transform(y_test) model = Model() model.add(FC(500, input_shape=784)) model.add(ReLU()) model.add(Dropout(0.5)) model.add(FC(150)) model.add(ReLU()) model.add(Dropout(0.5)) model.add(FC(50)) model.add(ReLU()) model.add(Dropout(0.5)) model.add(FC(10)) model.add(Softmax()) model.compile(Adam(eta=0.01), cross_entropy, accuracy) model.fit(X_train, y_train, max_iter=10, batch_size=2000)
def MyFashMNIST_CNN(): conv1_params = { 'kernel_h': 3, 'kernel_w': 3, 'pad': 2, 'stride': 1, 'in_channel': 1, 'out_channel': 32 } conv2_params = { 'kernel_h': 3, 'kernel_w': 3, 'pad': 2, 'stride': 1, 'in_channel': 32, 'out_channel': 64 } conv3_params = { 'kernel_h': 3, 'kernel_w': 3, 'pad': 2, 'stride': 1, 'in_channel': 64, 'out_channel': 64 } pool1_params = { 'pool_type': 'max', 'pool_height': 2, 'pool_width': 2, 'stride': 2, 'pad': 2 } pool2_params = { 'pool_type': 'max', 'pool_height': 3, 'pool_width': 3, 'stride': 2, 'pad': 2 } model = Model() model.add( Conv2D(conv1_params, name='conv1', initializer=Gaussian(std=0.001))) model.add(ReLU(name='relu1')) #model.add(Pool2D(pool1_params, name='pooling1')) model.add( Conv2D(conv2_params, name='conv2', initializer=Gaussian(std=0.001))) model.add(ReLU(name='relu2')) model.add(Pool2D(pool1_params, name='pooling1')) model.add(Dropout(rate=0.25, name='dropout1')) model.add( Conv2D(conv3_params, name='conv3', initializer=Gaussian(std=0.001))) model.add(ReLU(name='relu2')) model.add(Pool2D(pool1_params, name='pooling2')) model.add(Dropout(rate=0.25, name='dropout2')) model.add(Flatten(name='flatten')) model.add( Linear(4096, 1024, name='fclayer1', initializer=Gaussian(std=0.01))) model.add(ReLU(name='relu3')) model.add(Dropout(rate=0.5)) model.add( Linear(1024, 256, name='fclayer2', initializer=Gaussian(std=0.01))) model.add(Dropout(rate=0.5)) model.add(Linear(256, 10, name='fclayer3', initializer=Gaussian(std=0.01))) return model
def MNISTNet(): conv1_params = { 'kernel_h': 3, 'kernel_w': 3, 'pad': 0, 'stride': 1, 'in_channel': 1, 'out_channel': 6 } conv2_params = { 'kernel_h': 3, 'kernel_w': 3, 'pad': 0, 'stride': 1, 'in_channel': 6, 'out_channel': 16 } pool1_params = { 'pool_type': 'max', 'pool_height': 2, 'pool_width': 2, 'stride': 2, 'pad': 0 } pool2_params = { 'pool_type': 'max', 'pool_height': 3, 'pool_width': 3, 'stride': 2, 'pad': 0 } model = Model() model.add( Conv2D(conv1_params, name='conv1', initializer=Gaussian(std=0.001))) model.add(ReLU(name='relu1')) model.add(Pool2D(pool1_params, name='pooling1')) model.add( Conv2D(conv2_params, name='conv2', initializer=Gaussian(std=0.001))) model.add(ReLU(name='relu2')) model.add(Pool2D(pool2_params, name='pooling2')) # model.add(Dropout(ratio=0.25, name='dropout1')) model.add(Flatten(name='flatten')) model.add(Linear(400, 256, name='fclayer1', initializer=Gaussian(std=0.01))) model.add(ReLU(name='relu3')) # model.add(Dropout(ratio=0.5)) model.add(Linear(256, 10, name='fclayer2', initializer=Gaussian(std=0.01))) return model
def MyModel_FashionMNIST(): conv1_params = { 'kernel_h': 3, 'kernel_w': 3, 'pad': 2, 'stride': 1, 'in_channel': 1, 'out_channel': 6 } conv2_params = { 'kernel_h': 3, 'kernel_w': 3, 'pad': 2, 'stride': 1, 'in_channel': 6, 'out_channel': 16 } conv3_params = { 'kernel_h': 3, 'kernel_w': 3, 'pad': 2, 'stride': 1, 'in_channel': 16, 'out_channel': 16 } pool1_params = { 'pool_type': 'max', 'pool_height': 2, 'pool_width': 2, 'stride': 2, 'pad': 0 } pool2_params = { 'pool_type': 'max', 'pool_height': 2, 'pool_width': 2, 'stride': 2, 'pad': 0 } model = Model() model.add( Conv2D(conv1_params, name='conv1', initializer=Gaussian(std=0.001))) model.add(ReLU(name='relu1')) # model.add(Pool2D(pool1_params, name='pooling1')) model.add( Conv2D(conv2_params, name='conv2', initializer=Gaussian(std=0.001))) model.add(ReLU(name='relu2')) model.add(Pool2D(pool2_params, name='pooling2')) # model.add(Conv2D(conv3_params, name='conv3', # initializer=Gaussian(std=0.001))) # model.add(ReLU(name='relu1')) # model.add(Pool2D(pool1_params, name='pooling1')) model.add(Flatten(name='flatten')) model.add( Linear( 3136, 100, name='fclayer1', #10816 initializer=Gaussian(std=0.01))) model.add(ReLU(name='relu3')) model.add(Linear(100, 10, name='fclayer2', initializer=Gaussian(std=0.01))) return model
split = int(0.8 * all_data.shape[0]) x_train = all_data[:split, 1:] x_test = all_data[split:, 1:] y_train = all_data[:split, 0] y_test = all_data[split:, 0] y_train = one_hot(y_train.astype('int')) y_test = one_hot(y_test.astype('int')) def accuracy(y, y_hat): y = np.argmax(y, axis=1) y_hat = np.argmax(y_hat, axis=1) return np.mean(y==y_hat) def relu(x): return np.maximum(x, 0) model = Model() model.add_layer(Layer(784, 10, softmax)) #model.add_layer(Layer(64, 64, relu)) #model.add_layer(Layer(64, 10, softmax)) model.compile(CrossEntropyLoss, DataLoader, accuracy, batches_per_epoch=x_train.shape[0] // 32 + 1, n_workers=50, c1=1., c2=2.) model.fit(x_train, y_train, 100) y_hat = model.predict(x_test) print('Accuracy on test:', accuracy(y_test, y_hat))
def main(): seed = 0 fix_seed(seed) data = cr.Dataset( data_paths=cfg.data_paths, exp_id=cfg.exp_id, img_shape=cfg.img_shape, img_crop_size=cfg.img_crop_size, max_trace=cfg.max_trace_len, ) x_train, x_test, y_train, y_test = data.split_training_test_data( test_split=.20, seed=10, for_deep=True) trainsets = NNDataset(x_train, y_train, DEVICE) testsets = NNDataset(x_test, y_test, DEVICE) train_loader = torch.utils.data.DataLoader(trainsets, batch_size=32) test_loader = torch.utils.data.DataLoader(testsets, batch_size=32) for model_name in MODEL_LIST: print(f'\n======== {model_name} ========\n') if model_name == 'LSTM': model = Model(t_stage='LSTM', device=DEVICE, t_hidden_dim=500, t_output_dim=500, use_cnn_for_trace=False) elif model_name == 'CNN_LSTM': model = Model(t_stage='LSTM', device=DEVICE, t_hidden_dim=500, t_output_dim=500) elif model_name == 'OnlyCNN': model = Model(s_stage='CNN', device=DEVICE, block_num=3) else: model = Model(s_stage=model_name, t_stage='LSTM', device=DEVICE, pretrained=PRETRAINED, block_num=3, t_hidden_dim=500, t_output_dim=500) if MODE == 'train': score, model = train(model, model_name, train_loader, test_loader, DEVICE, log_path=f'{ROOT}/out/{model_name}.txt') model = model.to('cpu') torch.save(model.state_dict(), f'{ROOT}/best_models/{model_name}.pth') elif MODE == 'fps': model.eval() inputs = (torch.rand(1, 1, 500).to(DEVICE), torch.rand(1, 1, 80, 80).to(DEVICE)) t0 = time.time() for i in range(100): model(inputs) with open(f'{ROOT}/out/speed.txt', 'a') as f: f.write(f'{model_name}: {100 / (time.time() - t0):.04f} fps\n') else: raise ValueError
class Train(object): def __init__(self): self.vocab = Vocab(config.vocab_path, config.vocab_size) self.batcher = Batcher(config.train_data_path, self.vocab, mode='train', batch_size=config.batch_size, single_pass=False) time.sleep(15) train_dir = os.path.join(config.log_root, 'train_%d' % (int(time.time()))) if not os.path.exists(train_dir): os.mkdir(train_dir) self.model_dir = os.path.join(train_dir, 'model') if not os.path.exists(self.model_dir): os.mkdir(self.model_dir) self.summary_writer = tf.summary.FileWriter(train_dir) def save_model(self, running_avg_loss, iter): state = { 'iter': iter, 'encoder_state_dict': self.model.encoder.state_dict(), 'section_encoder_state_dict': self.model.section_encoder.state_dict(), 'sentence_filterer_state_dict': self.model.sentence_filterer.state_dict(), 'decoder_state_dict': self.model.decoder.state_dict(), 'reduce_state_dict': self.model.reduce_state.state_dict(), 'section_reduce_state_dict': self.model.section_reduce_state.state_dict(), 'optimizer': self.optimizer.state_dict(), 'current_loss': running_avg_loss } model_save_path = os.path.join(self.model_dir, 'model_%d_%d' % (iter, int(time.time()))) torch.save(state, model_save_path) def setup_train(self, model_file_path=None): self.model = Model(model_file_path) params = list(self.model.encoder.parameters()) + \ list(self.model.section_encoder.parameters()) + \ list(self.model.sentence_filterer.parameters()) + \ list(self.model.decoder.parameters()) + \ list(self.model.reduce_state.parameters()) + \ list(self.model.section_reduce_state.parameters()) initial_lr = config.lr_coverage if config.is_coverage else config.lr self.optimizer = AdagradCustom(params, lr=initial_lr, initial_accumulator_value=config.adagrad_init_acc) start_iter, start_loss = 0, 0 if model_file_path is not None: state = torch.load(model_file_path, map_location= lambda storage, location: storage) start_iter = state['iter'] start_loss = state['current_loss'] if not config.is_coverage and not config.is_sentence_filtering: self.optimizer.load_state_dict(state['optimizer']) if use_cuda: for state in self.optimizer.state.values(): for k, v in state.items(): if torch.is_tensor(v): state[k] = v.cuda() return start_iter, start_loss def train_one_batch(self, batch): enc_batch, enc_padding_mask, enc_lens, enc_batch_extend_vocab, extra_zeros, c_t_1, coverage, sent_lens = \ get_input_from_batch(batch, use_cuda) dec_batch, dec_padding_mask, max_dec_len, dec_lens_var, target_batch = \ get_output_from_batch(batch, use_cuda) self.optimizer.zero_grad() encoder_outputs, encoder_hidden, max_encoder_output = self.model.encoder(enc_batch, enc_lens) s_t_1 = self.model.reduce_state(encoder_hidden) if config.use_maxpool_init_ctx: c_t_1 = max_encoder_output gamma = None if config.is_sentence_filtering: gamma, sent_dists = self.model.sentence_filterer(encoder_outputs, sent_lens) section_outputs, section_hidden = self.model.section_encoder(s_t_1) s_t_1 = self.model.section_reduce_state(section_hidden) step_losses = [] for di in range(min(max_dec_len, config.max_dec_steps)): y_t_1 = dec_batch[:, di] # Teacher forcing final_dist, s_t_1, c_t_1, attn_dist, p_gen, coverage = self.model.decoder(y_t_1, s_t_1, encoder_outputs, section_outputs, enc_padding_mask, c_t_1, extra_zeros, enc_batch_extend_vocab, coverage, gamma) target = target_batch[:, di] gold_probs = torch.gather(final_dist, 1, target.unsqueeze(1)).squeeze() step_loss = -torch.log(gold_probs + config.eps) if config.is_coverage: step_coverage_loss = torch.sum(torch.min(attn_dist, coverage.view(*attn_dist.shape)), 1) step_loss = step_loss + config.cov_loss_wt * step_coverage_loss step_mask = dec_padding_mask[:, di] step_loss = step_loss * step_mask step_losses.append(step_loss) sum_losses = torch.sum(torch.stack(step_losses, 1), 1) batch_avg_loss = sum_losses/dec_lens_var loss = torch.mean(batch_avg_loss) if config.is_sentence_filtering: sim_scores = torch.FloatTensor(batch.sim_scores) if use_cuda: sim_scores = sim_scores.cuda() sent_filter_loss = F.binary_cross_entropy(sent_dists, sim_scores) loss += config.sent_loss_wt * sent_filter_loss loss.backward() clip_grad_norm_(self.model.encoder.parameters(), config.max_grad_norm) clip_grad_norm_(self.model.section_encoder.parameters(), config.max_grad_norm) clip_grad_norm_(self.model.sentence_filterer.parameters(), config.max_grad_norm) clip_grad_norm_(self.model.decoder.parameters(), config.max_grad_norm) clip_grad_norm_(self.model.reduce_state.parameters(), config.max_grad_norm) clip_grad_norm_(self.model.section_reduce_state.parameters(), config.max_grad_norm) self.optimizer.step() return loss.item() def trainIters(self, n_iters, model_file_path=None): iter, running_avg_loss = self.setup_train(model_file_path) start = time.time() while iter < n_iters: print('\rBatch %d' % iter, end="") batch = self.batcher.next_batch() loss = self.train_one_batch(batch) running_avg_loss = calc_running_avg_loss(loss, running_avg_loss, self.summary_writer, iter) iter += 1 if iter % 5000 == 0: self.summary_writer.flush() print_interval = 10 if iter % print_interval == 0: print(' steps %d, seconds for %d batch: %.2f , loss: %f' % (iter, print_interval, time.time() - start, loss)) start = time.time() if iter % 1000 == 0: self.save_model(running_avg_loss, iter)
def MyModel_SentimentNet(word_to_idx): vocab_size = len(word_to_idx) model = Model() model.add(Linear2D(vocab_size, 200, name='embedding', initializer=Gaussian(std=0.01))) model.add(BiRNN(in_features=200, units=50, initializer=Gaussian(std=0.01))) model.add(Linear2D(100, 50, name='linear1', initializer=Gaussian(std=0.01))) model.add(TemporalPooling()) # defined in layers.py model.add(Linear2D(50, 2, name='linear2', initializer=Gaussian(std=0.01))) # model.add(Linear2D(vocab_size, 200, name='embedding', initializer=Gaussian(std=0.01))) # model.add(GRU(in_features=200, units=50, initializer=Gaussian(std=0.01))) # model.add(Linear2D(50, 50, name='embedding', initializer=Gaussian(std=0.01))) # model.add(GRU(in_features=200, units=50, initializer=Gaussian(std=0.01))) # model.add(TemporalPooling()) # model.add(Linear(200,2,initializer=Gaussian(std=0.01))) return model
all_data = all_data.reshape(len(all_data), 1) encoded_x = Onehot_encoder.fit_transform(all_data) x_train, x_test, y_train, y_test = train_test_split(encoded_x, y, test_size=0.15, random_state=21) def accuracy(y, y_hat): y_hat = (y_hat >= 0.5).astype('int') y = y.astype('int') return np.mean(y_hat[:, 0] == y) model = Model() model.add_layer(Layer(965, 10, tanh)) model.add_layer(Layer(10, 10, tanh)) model.add_layer(Layer(10, 10, tanh)) model.add_layer(Layer(10, 10, tanh)) model.add_layer(Layer(10, 1, sigmoid)) model.compile(BinaryCrossEntropyLoss, DataLoader, accuracy, batches_per_epoch=20, n_workers=10) print(x_train.shape, y_train.shape, y_train.shape, y_test.shape) index_list, cost_list = model.fit(x_train, y_train, 500) y_hat = model.predict(x_test) #print(confusion_matrix(y_test, y_hat))
def step(self, net: Model) -> None: for param, grad in net.get_params_grad(): # print(param.shape, grad.shape) param -= self.lr * grad
def main(argv): (opts, args) = parser.parse_args(argv) config = ConfigParser(opts.config) if torch.cuda.is_available(): gpu_ids = np.array(config.general.gpu_ids.split(' ')).astype(np.int) device = torch.device('cuda:{0}'.format(gpu_ids[0])) else: device = torch.device('cpu') # torch.cuda.set_device(device) raw_df = pd.read_csv(config.dataset.raw_path, sep="\t") name_vectorizer = train_tf_idf(MIN_NAME_DF, 'name', raw_df) train_loader, dataset = init_dataset(config, DBType.Train, name_vectorizer, raw_df) current_iteration_path = os.path.join(config.general.output_path, config.general.current_iteration_file_name) if os.path.isfile(current_iteration_path): start_epoch, epoch_iteration = np.loadtxt(current_iteration_path, delimiter=',', dtype=int) print('resuming from epoch %d at iteration %d' % (start_epoch, epoch_iteration)) else: start_epoch, epoch_iteration = 0, 0 tmp_start = epoch_iteration model = Model(config, dataset) # model = torch.nn.DataParallel(model) model.train() dataset_size = len(dataset) logger = Logger(config) current_step = start_epoch * dataset_size + epoch_iteration steps_counter = 0 accumulated_loss = 0 freq_loss = 0 evaluator = Evaluator(DBType.Validation, config, name_vectorizer, raw_df) raw_df = None # if start_epoch % config.train.lr_update_freq == 0: # model.update_learning_rate() # if len(gpu_ids) > 1: # model = nn.DataParallel(model) model.to(device) freq_start_time = time.time() current_eval = last_eval = 99999999 tmp_count = 0 for epoch in range(start_epoch, config.train.num_epochs): epoch_start_time = time.time() if epoch != start_epoch: epoch_iteration = 0 for i, data in enumerate(train_loader, start=epoch_iteration): if steps_counter % 500 == 0: print('{} / {}'.format(epoch_iteration, dataset_size)) current_step += config.train.batch_size epoch_iteration += config.train.batch_size name = data['name'].to(device) cid = data['cid'].to(device) c_name = data['c_name'].to(device) b_name = data['b_name'].to(device) price = data['price'].to(device).unsqueeze(1) shipping = data['shipping'].to(device) desc = data['desc'].to(device) desc_len = data['desc_len'].to(device) loss = model(name, cid, c_name, b_name, shipping, desc, desc_len, price) loss = torch.mean(loss) model.optimizer.zero_grad() loss.backward() if config.general.clip_grads: torch.nn.utils.clip_grad_norm_(model.parameters(), 0.25) model.optimizer.step() accumulated_loss += loss.item() freq_loss += loss.item() if (steps_counter % config.general.print_logs_freq == 0) and steps_counter != 0: freq_loss = freq_loss / config.general.print_logs_freq print('freq_loss {}. time {}'.format(freq_loss, time.time() - freq_start_time)) losses_dict = {'loss': loss.item(), 'freq_loss': freq_loss} logger.dump_current_errors(losses_dict, current_step) freq_loss = 0 freq_start_time = time.time() if (steps_counter % config.general.save_checkpoint_freq == 0) and steps_counter != 0: print('========== saving model (epoch %d, total_steps %d) =========' % (epoch, current_step)) model.save('latest') np.savetxt(current_iteration_path, (epoch, epoch_iteration), delimiter=',', fmt='%d') steps_counter += 1 print('end of epoch %d / %d \t time taken: %d sec' % (epoch, config.train.num_epochs, time.time() - epoch_start_time)) accumulated_loss = accumulated_loss / (i + 1 - tmp_start) tmp_start = 0 print('accumulated loss {}'.format(accumulated_loss)) losses_dict = {'accumulated_loss': accumulated_loss} logger.dump_current_errors(losses_dict, current_step) accumulated_loss = 0 model.save('latest') model.save(str(epoch)) np.savetxt(current_iteration_path, (epoch + 1, 0), delimiter=',', fmt='%d') # if epoch % config.general.eval_epcohs_freq == 0: current_eval = evaluator.eval(model, max_iterations=config.train.max_eval_iterations) # if epoch % config.train.lr_update_freq == 0: if current_eval > last_eval: tmp_count += 1 if tmp_count == 3: model.update_learning_rate() tmp_count = 0 last_eval = current_eval else: tmp_count = 0 last_eval = current_eval
# Initializing layers batch_size = 32 dense1 = Dense((batch_size, dataset.x_train.shape[1]), 200) relu1 = ReLU() dense2 = Dense((batch_size, 200), 100) relu2 = ReLU() drop1 = Dropout(0.2) dense3 = Dense((batch_size, 100), 10) loss_f = CrossEntropy() optimizer = Adam() # Initializing model model = Model(input_shape=(batch_size, dataset.x_train.shape[1]), layers=[dense1, relu1, dense2, relu2, drop1, dense3], loss_f=loss_f, optimizer=optimizer) # Create hyper-param schedulers lr_schedule = HyperParamScheduler(3e-4, 3e-3, True) mom_schedule = HyperParamScheduler(0.8, 0.9, False) # Train model.fit_one_cycle(dataset.x_train, dataset.y_train, 50, lr_schedule, mom_schedule, batch_size, [dataset.x_val, dataset.y_val]) # Save model save_path = pathlib.Path.home() / "Desktop" / "MNIST_model" model.save_model_weights(save_path)
def FashionMNIST_CNN(): conv1_params = { 'kernel_h': 3, 'kernel_w': 3, 'pad': 0, 'stride': 1, 'in_channel': 1, 'out_channel': 32 } conv2_params = { 'kernel_h': 3, 'kernel_w': 3, 'pad': 0, 'stride': 1, 'in_channel': 32, 'out_channel': 64 } pool1_params = { 'pool_type': 'max', 'pool_height': 2, 'pool_width': 2, 'stride': 2, 'pad': 0 } model = Model() model.add( Conv2D(conv1_params, name='conv1', initializer=Gaussian(std=0.001))) model.add(ReLU(name='relu1')) model.add( Conv2D(conv2_params, name='conv2', initializer=Gaussian(std=0.001))) model.add(ReLU(name='relu2')) model.add(Pool2D(pool1_params, name='pooling1')) model.add(Dropout(rate=0.25)) model.add(Flatten(name='flatten')) model.add( Linear(9216, 128, name='fclayer1', initializer=Gaussian(std=0.01))) model.add(ReLU(name='relu3')) model.add(Dropout(rate=0.25)) model.add(Linear(128, 10, name='fclayer2', initializer=Gaussian(std=0.01))) return model
class Evaluate(object): def __init__(self, model_file_path): self.vocab = Vocab(config.vocab_path, config.vocab_size) self.batcher = Batcher(config.eval_data_path, self.vocab, mode='eval', batch_size=config.batch_size, single_pass=True) time.sleep(15) model_name = os.path.basename(model_file_path) eval_dir = os.path.join(config.log_root, 'eval_%s' % (model_name)) if not os.path.exists(eval_dir): os.mkdir(eval_dir) self.summary_writer = tf.summary.FileWriter(eval_dir) self.model = Model(model_file_path, is_eval=True) def eval_one_batch(self, batch): enc_batch, enc_padding_mask, enc_lens, enc_batch_extend_vocab, extra_zeros, c_t_1, coverage = \ get_input_from_batch(batch, use_cuda) dec_batch, dec_padding_mask, max_dec_len, dec_lens_var, target_batch = \ get_output_from_batch(batch, use_cuda) encoder_outputs, encoder_hidden, max_encoder_output = self.model.encoder( enc_batch, enc_lens) s_t_1 = self.model.reduce_state(encoder_hidden) if config.use_maxpool_init_ctx: c_t_1 = max_encoder_output step_losses = [] for di in range(min(max_dec_len, config.max_dec_steps)): y_t_1 = dec_batch[:, di] # Teacher forcing final_dist, s_t_1, c_t_1, attn_dist, p_gen, coverage = self.model.decoder( y_t_1, s_t_1, encoder_outputs, enc_padding_mask, c_t_1, extra_zeros, enc_batch_extend_vocab, coverage) target = target_batch[:, di] gold_probs = torch.gather(final_dist, 1, target.unsqueeze(1)).squeeze() step_loss = -torch.log(gold_probs + config.eps) if config.is_coverage: step_coverage_loss = torch.sum(torch.min(attn_dist, coverage), 1) step_loss = step_loss + config.cov_loss_wt * step_coverage_loss step_mask = dec_padding_mask[:, di] step_loss = step_loss * step_mask step_losses.append(step_loss) sum_step_losses = torch.sum(torch.stack(step_losses, 1), 1) batch_avg_loss = sum_step_losses / dec_lens_var loss = torch.mean(batch_avg_loss) return loss.data[0] def run_eval(self): running_avg_loss, iter = 0, 0 start = time.time() batch = self.batcher.next_batch() while batch is not None: loss = self.eval_one_batch(batch) running_avg_loss = calc_running_avg_loss(loss.item(), running_avg_loss, self.summary_writer, iter) iter += 1 if iter % 100 == 0: self.summary_writer.flush() print_interval = 1 if iter % print_interval == 0: print('steps %d, seconds for %d batch: %.2f , loss: %f' % (iter, print_interval, time.time() - start, running_avg_loss)) start = time.time() batch = self.batcher.next_batch()
class BeamSearch(object): def __init__(self, model_file_path): model_name = os.path.basename(model_file_path) self._decode_dir = os.path.join(config.log_root, 'decode_%s' % (model_name)) self._rouge_ref_dir = os.path.join(self._decode_dir, 'rouge_ref') self._rouge_dec_dir = os.path.join(self._decode_dir, 'rouge_dec_dir') for p in [self._decode_dir, self._rouge_ref_dir, self._rouge_dec_dir]: if not os.path.exists(p): os.mkdir(p) self.vocab = Vocab(config.vocab_path, config.vocab_size) self.batcher = Batcher(config.decode_data_path, self.vocab, mode='decode', batch_size=config.beam_size, single_pass=True) time.sleep(15) self.model = Model(model_file_path, is_eval=True) def sort_beams(self, beams): return sorted(beams, key=lambda h: h.avg_log_prob, reverse=True) def decode(self): start = time.time() counter = 0 batch = self.batcher.next_batch() while batch is not None: # Run beam search to get best Hypothesis with torch.no_grad(): best_summary = self.beam_search(batch) # Extract the output ids from the hypothesis and convert back to words output_ids = [int(t) for t in best_summary.tokens[1:]] decoded_words = data.outputids2words(output_ids, self.vocab, (batch.art_oovs[0] if config.pointer_gen else None)) # Remove the [STOP] token from decoded_words, if necessary try: fst_stop_idx = decoded_words.index(data.STOP_DECODING) decoded_words = decoded_words[:fst_stop_idx] except ValueError: decoded_words = decoded_words print("===============SUMMARY=============") print(' '.join(decoded_words)) original_abstract_sents = batch.original_abstracts_sents[0] write_for_rouge(original_abstract_sents, decoded_words, counter, self._rouge_ref_dir, self._rouge_dec_dir) counter += 1 if counter % 1000 == 0: print('%d example in %d sec'%(counter, time.time() - start)) start = time.time() batch = self.batcher.next_batch() print("Decoder has finished reading dataset for single_pass.") print("Now starting ROUGE eval...") results_dict = rouge_eval(self._rouge_ref_dir, self._rouge_dec_dir) rouge_log(results_dict, self._decode_dir) def beam_search(self, batch): #batch should have only one example enc_batch, enc_padding_mask, enc_lens, enc_batch_extend_vocab, extra_zeros, c_t_0, coverage_t_0, sent_lens = \ get_input_from_batch(batch, use_cuda) encoder_outputs, encoder_hidden, max_encoder_output = self.model.encoder(enc_batch, enc_lens) s_t_0 = self.model.reduce_state(encoder_hidden) if config.use_maxpool_init_ctx: c_t_0 = max_encoder_output gamma = None if config.is_sentence_filtering: gamma, sent_dists = self.model.sentence_filterer(encoder_outputs, sent_lens) section_outputs, section_hidden = self.model.section_encoder(s_t_0) s_t_0 = self.model.section_reduce_state(section_hidden) dec_h, dec_c = s_t_0 # 1 x 2*hidden_size dec_h = dec_h.squeeze() dec_c = dec_c.squeeze() #decoder batch preparation, it has beam_size example initially everything is repeated beams = [Beam(tokens=[self.vocab.word2id(data.START_DECODING)], log_probs=[0.0], state=(dec_h[0], dec_c[0]), context = c_t_0[0], coverage=(coverage_t_0[0] if config.is_coverage else None)) for _ in range(config.beam_size)] results = [] steps = 0 while steps < config.max_dec_steps and len(results) < config.beam_size: latest_tokens = [h.latest_token for h in beams] latest_tokens = [t if t < self.vocab.size() else self.vocab.word2id(data.UNKNOWN_TOKEN) \ for t in latest_tokens] y_t_1 = Variable(torch.LongTensor(latest_tokens)) if use_cuda: y_t_1 = y_t_1.cuda() all_state_h =[] all_state_c = [] all_context = [] for h in beams: state_h, state_c = h.state all_state_h.append(state_h) all_state_c.append(state_c) all_context.append(h.context) s_t_1 = (torch.stack(all_state_h, 0).unsqueeze(0), torch.stack(all_state_c, 0).unsqueeze(0)) c_t_1 = torch.stack(all_context, 0) coverage_t_1 = None if config.is_coverage: all_coverage = [] for h in beams: all_coverage.append(h.coverage) coverage_t_1 = torch.stack(all_coverage, 0) final_dist, s_t, c_t, attn_dist, p_gen, coverage_t = self.model.decoder(y_t_1, s_t_1, encoder_outputs, section_outputs, enc_padding_mask, c_t_1, extra_zeros, enc_batch_extend_vocab, coverage_t_1, gamma) topk_log_probs, topk_ids = torch.topk(final_dist, config.beam_size * 2) dec_h, dec_c = s_t dec_h = dec_h.squeeze() dec_c = dec_c.squeeze() all_beams = [] num_orig_beams = 1 if steps == 0 else len(beams) for i in range(num_orig_beams): h = beams[i] state_i = (dec_h[i], dec_c[i]) context_i = c_t[i] coverage_i = (coverage_t[i] if config.is_coverage else None) for j in range(config.beam_size * 2): # for each of the top 2*beam_size hyps: new_beam = h.extend(token=topk_ids[i, j].item(), log_prob=topk_log_probs[i, j].item(), state=state_i, context=context_i, coverage=coverage_i) all_beams.append(new_beam) beams = [] for h in self.sort_beams(all_beams): if h.latest_token == self.vocab.word2id(data.STOP_DECODING): if steps >= config.min_dec_steps: results.append(h) else: beams.append(h) if len(beams) == config.beam_size or len(results) == config.beam_size: break steps += 1 if len(results) == 0: results = beams beams_sorted = self.sort_beams(results) return beams_sorted[0]
def loop(mode, outer_steps, inner_steps, log_steps, fig_epochs, inner_lr, log_mask=True, unroll_steps=None, meta_batchsize=0, sampler=None, epoch=1, outer_optim=None, save_path=None): """Args: meta_batchsize(int): If meta_batchsize |m| > 0, gradients for multiple unrollings from each episodes size of |m| will be accumulated in sequence but updated all at once. (Which can be done in parallel when VRAM is large enough, but will be simulated in this code.) If meta_batchsize |m| = 0(default), then update will be performed after each unrollings. """ assert mode in ['train', 'valid', 'test'] assert meta_batchsize >= 0 print(f'Start_of_{mode}.') if mode == 'train' and unroll_steps is None: raise Exception("unroll_steps has to be specied when mode='train'.") if mode != 'train' and unroll_steps is not None: raise Warning("unroll_steps has no effect when mode mode!='train'.") train = True if mode == 'train' else False force_base = True metadata = MetaMultiDataset(split=mode) mask_based = 'query' mask_type = 5 mask_sample = False mask_scale = True easy_ratio = 17 / 50 # 0.3 scale_manual = 0.8 # 1.0 when lr=0.001 and 0.8 when lr=0.00125 inner_lr *= scale_manual if train: if meta_batchsize > 0: # serial processing of meta-minibatch update_epochs = meta_batchsize update_steps = None else: # update at every unrollings update_steps = unroll_steps update_epochs = None assert (update_epochs is None) != (update_steps is None) # for result recordin result_frame = ResultFrame() if save_path: writer = SummaryWriter(os.path.join(save_path, 'tfevent')) for i in range(1, outer_steps + 1): outer_loss = 0 for j, epi in enumerate(metadata.loader(n_batches=1), 1): # initialize base learner model = Model(epi.n_classes) params = model.get_init_params('ours') epi.s = C(epi.s) epi.q = C(epi.q) # baseline parameters params_b0 = C(params.copy('b0')) params_b1 = C(params.copy('b1')) params_b2 = C(params.copy('b2')) result_dict = ResultDict() for k in range(1, inner_steps + 1): # feed support set (baseline) out_s_b0 = model(epi.s, params_b0, None) out_s_b1 = model(epi.s, params_b1, None) out_s_b2 = model(epi.s, params_b2, None) if mask_based == 'support': out = out_s_b1 elif mask_based == 'query': with torch.no_grad(): # test on query set out_q_b1 = model(epi.q, params_b1, mask=None) out = out_q_b1 else: print('WARNING') # attach mask to get loss_s if mask_type == 1: mask = (out.loss.exp().mean().log() - out.loss).exp() elif mask_type == 2: mask = (out.loss.exp().mean().log() / out.loss) elif mask_type == 3: mask = out.loss.mean() / out.loss elif mask_type == 4: mask = out.loss.min() / out.loss elif mask_type == 5 or mask_type == 6: mask_scale = False # weight by magnitude if mask_type == 5: mask = [scale_manual ] * 5 + [(1 - easy_ratio) * scale_manual] * 5 # weight by ordering elif mask_type == 6: if k < inner_steps * easy_ratio: mask = [scale_manual] * 5 + [0.0] * 5 else: mask = [scale_manual] * 5 + [scale_manual] * 5 # sampling from 0 < p < 1 if mask_sample: mask = [np.random.binomial(1, m) for m in mask] mask = C(torch.tensor(mask).float()) else: print('WARNING') if mask_scale: mask = (mask / (mask.max() + 0.05)) # to debug in the middle of running process.class if sig_2.is_active(): import pdb pdb.set_trace() mask = mask.unsqueeze(1) out_s_b1.attach_mask(mask) out_s_b2.attach_mask(mask) # lll = out_s_b0.loss * 0.5 params_b0 = params_b0.sgd_step(out_s_b0.loss.mean(), inner_lr, 'no_grad') params_b1 = params_b1.sgd_step(out_s_b1.loss_masked_mean, inner_lr, 'no_grad') params_b2 = params_b2.sgd_step(out_s_b2.loss_scaled_mean, inner_lr, 'no_grad') with torch.no_grad(): # test on query set out_q_b0 = model(epi.q, params_b0, mask=None) out_q_b1 = model(epi.q, params_b1, mask=None) out_q_b2 = model(epi.q, params_b2, mask=None) # record result result_dict.append(outer_step=epoch * i, inner_step=k, **out_s_b0.as_dict(), **out_s_b1.as_dict(), **out_s_b2.as_dict(), **out_q_b0.as_dict(), **out_q_b1.as_dict(), **out_q_b2.as_dict()) ### end of inner steps (k) ### # append to the dataframe result_frame = result_frame.append_dict( result_dict.index_all(-1).mean_all(-1)) # logging if k % log_steps == 0: # print info msg = Printer.step_info(epoch, mode, i, outer_steps, k, inner_steps, inner_lr) msg += Printer.way_shot_query(epi) # # print mask if not sig_1.is_active() and log_mask: msg += Printer.colorized_mask(mask, fmt="3d", vis_num=20) # print outputs (loss, acc, etc.) msg += Printer.outputs([out_s_b0, out_s_b1, out_s_b2], sig_1.is_active()) msg += Printer.outputs([out_q_b0, out_q_b1, out_q_b2], sig_1.is_active()) print(msg) ### end of meta minibatch (j) ### # tensorboard if save_path and train: step = (epoch * (outer_steps - 1)) + i res = ResultFrame( result_frame[result_frame['outer_step'] == i]) loss = res.get_best_loss().mean() acc = res.get_best_acc().mean() writer.add_scalars('Loss/train', {n: loss[n] for n in loss.index}, step) writer.add_scalars('Acc/train', {n: acc[n] for n in acc.index}, step) # dump figures if save_path and i % fig_epochs == 0: epi.s.save_fig(f'imgs/support', save_path, i) epi.q.save_fig(f'imgs/query', save_path, i) # result_dict['ours_s_mask'].save_fig(f'imgs/masks', save_path, i) result_dict.get_items([ 'b0_s_loss', 'b1_s_loss', 'b2_s_loss' 'b0_q_loss', 'b1_q_loss', 'b2_q_loss' ]).save_csv(f'classwise/{mode}', save_path, i) # distinguishable episodes if not i == outer_steps: print(f'Path for saving: {save_path}') print(f'End_of_episode: {i}') import pdb pdb.set_trace() ### end of episode (i) ### print(f'End_of_{mode}.') # del metadata return sampler, result_frame
def main(): tensorboard_directory = './tmp/tensorboard/001' tensorboard_paths = [ r'C:\Users\parth\Documents\GitHub\Kaggle-Santander-Value-Prediction-Challenge\tmp\tensorboard\001' ] tensorboard_names = ['rmse'] # Model Parameters # -------------------------------------------------------------------------- use_dropout = False use_batch_norm = False # Dropout inputs # use : to use dropout in this layer # rate : dropout rate dropout_parameters = [{ 'use': True, 'rate': 0.5 }, { 'use': True, 'rate': 0.5 }, { 'use': True, 'rate': 0.5 }, { 'use': True, 'rate': 0.5 }] # Fully Connected Layers unit size fc_parameters = [{ 'units': 5000 }, { 'units': 5000 }, { 'units': 5000 }, { 'units': 5000 }] num_dense = len(fc_parameters) data_shape = [None, 4990] batch_size = 500 val_size = 5000 epochs = 100000 learning_rate = 0.001 session = tf.Session() Tensorboard.make(paths=tensorboard_paths, names=tensorboard_names, host='127.0.0.1', port='6006', output=True, start=False) dropout_parameters = [] model = Model(sess=session, data_shape=data_shape, num_classes=1, num_dense=2, learning_rate=learning_rate, use_batch_norm=use_batch_norm, use_dropout=use_dropout, dropout_parameters=dropout_parameters, fc_parameters=fc_parameters, tensorboard_directory=tensorboard_directory) train_data, train_labels = get_data() train_data, val_data, train_labels, val_labels = train_test_split( train_data, train_labels, test_size=0.30) print('> Training Data: {} {}'.format(train_data.shape, train_labels.shape)) print('> Val Data: {} {}'.format(val_data.shape, val_labels.shape)) # print('> Test Data: {} {}'.format(test_data.shape, test_labels.shape)) model.train_data(data=train_data, labels=train_labels) model.val_data(data=val_data, labels=val_labels) model.train(batch_size=batch_size, epochs=epochs)
def run_once(config, evaluator): model = Model(config, evaluator.dataset) # model = torch.nn.DataParallel(model) model.to(evaluator.device) evaluator.eval(model)
def SentimentNet(word_to_idx): """Construct a RNN model for sentiment analysis # Arguments: word_to_idx: A dictionary giving the vocabulary. It contains V entries, and maps each string to a unique integer in the range [0, V). # Returns model: the constructed model """ vocab_size = len(word_to_idx) model = Model() model.add( Linear2D(vocab_size, 200, name='embedding', initializer=Gaussian(std=0.01))) model.add(BiRNN(in_features=200, units=50, initializer=Gaussian(std=0.01))) model.add(Linear2D(100, 32, name='linear1', initializer=Gaussian(std=0.01))) model.add(TemporalPooling()) # defined in layers.py model.add(Linear2D(32, 2, name='linear2', initializer=Gaussian(std=0.01))) return model
def load_model(config, n_classes=2): model = Model() X = model.add(input([config.SEQUENCE_LEN], dtype='int32', name="input")) if config.is_use_embedding(): embedding = model.add( embeddings(X, config.WORD_COUNT, config.EMBEDDING_DIM, weights=config.EMBEDDING_MATRIX, input_length=config.SEQUENCE_LEN, frozen=config.is_embedding_trainable())) else: embedding = model.add( embeddings(X, config.WORD_COUNT, config.EMBEDDING_DIM, input_length=config.SEQUENCE_LEN, frozen=config.is_embedding_trainable())) dropout_1 = model.add(dropout(embedding, config.DROPOUT_LIST[0])) conv_list = [] for k_size, n_C, k_pool in zip(config.FILTER_SIZE_LIST, config.FILTERS_PER_LAYER, config.POOL_SIZE_LIST): c = conv1d(dropout_1, k_size, n_C, nonlin='relu') p = maxpool(c, k_pool) conv_list.append(flatten(p)) if len(conv_list) > 1: conv_out = model.add(concat(conv_list)) else: conv_out = model.add(conv_list[0]) dense_1 = model.add(dense(conv_out, 150, nonlin='relu')) dropout_2 = model.add(dropout(dense_1, config.DROPOUT_LIST[1])) out = model.add(dense(dropout_2, n_classes, nonlin='softmax')) model.compile(optimizer='rmsprop', loss='softmax_entropy', learning_rate=config.LEARNING_RATE, ckpt_file=config.CKPT_PATH, device=config.DEVICE) return model