def train_model(save_file, random_file='random.png', epochs=5): batch_size = 32 x_train, y_train, x_test, y_test = prepare_data(random_file) print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') model = build_model(x_train.shape[1:]) opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['categorical_accuracy']) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test)) s, accuracy = model.evaluate(x_test, y_test) if accuracy < 0.5: return False model.save(save_file) print "Model has been saved to {}".format(save_file) return True
def preparation(): # set model save path if args.if_load_from_checkpoint: timestamp = args.checkpoint_name else: timestamp = str(int(time.time())) print("create new model save path: %s" % timestamp) args.current_save_path = 'save/%s/' % timestamp args.log_file = args.current_save_path + time.strftime("log_%Y_%m_%d_%H_%M_%S.txt", time.localtime()) print("create log file at path: %s" % args.log_file) if os.path.exists(args.current_save_path): add_log("Load checkpoint model from Path: %s" % args.current_save_path) else: os.makedirs(args.current_save_path) add_log("Path: %s is created" % args.current_save_path) # set task type if args.task == 'yelp': args.data_path = '../../data/yelp/processed_files/' elif args.task == 'amazon': args.data_path = '../../data/amazon/processed_files/' elif args.task == 'imagecaption': pass else: raise TypeError('Wrong task type!') # prepare data args.id_to_word, args.vocab_size, \ args.train_file_list, args.train_label_list = prepare_data( data_path=args.data_path, max_num=args.word_dict_max_num, task_type=args.task ) return
def preparation(): # set gpu if torch.cuda.is_available(): args.device = "cuda" os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_ids print("Info: You are now using GPU mode: %s , with GPU id: %s" % (args.device, args.gpu_ids)) else: print( "Warning: You do not have a CUDA device, so you now running with CPU!" ) # set task type if args.task == 'yelp': args.data_path = '../../data/yelp/processed_files/' elif args.task == 'amazon': args.data_path = '../../data/amazon/processed_files/' elif args.task == 'imagecaption': pass else: raise TypeError('Wrong task type!') # prepare data args.id_to_word, args.vocab_size, \ args.train_file_list, args.train_label_list, \ args.test_file_list, args.test_label_list = \ prepare_data(data_path=args.data_path, max_num=args.word_dict_max_num, task_type=args.task) return
def preparation(): # set model save path if not os.path.exists(args.current_save_path): os.mkdir(args.current_save_path) args.log_file = args.current_save_path / 'train_log.txt' args.output_file = args.current_save_path / 'eval_log.txt' if not args.load_prev: # delete last record if not loading previous model if os.path.exists(args.log_file): os.remove(args.log_file) if os.path.exists(args.output_file): os.remove(args.output_file) # set task type if args.task == 'news_china_taiwan': args.data_path = './data/news_china_taiwan/' elif args.task == 'yelp': args.data_path = './data/yelp/' elif args.task == 'political': args.data_path = './data/political/' else: raise TypeError('Unsupported task type!') # prepare data args.train_file_list, args.train_label_list = prepare_data( data_path=args.data_path, task_type=args.task)
def main(): train_iterator, valid_iterator, test_iterator, params = prepare_data() (INPUT_DIM, OUTPUT_DIM, ENC_EMB_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, ENC_DROPOUT, DEC_DROPOUT) = params # INPUT_DIM = len(SRC.vocab), 7855 # OUTPUT_DIM = len(TRG.vocab), 5893 # ENC_EMB_DIM = 256 # DEC_EMB_DIM = 256 # ENC_HID_DIM = 512 # DEC_HID_DIM = 512 # ENC_DROPOUT = 0.5 # DEC_DROPOUT = 0.5 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') attn = Attention(ENC_HID_DIM, DEC_HID_DIM) enc = Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, ENC_DROPOUT) dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, DEC_DROPOUT, attn) model = Seq2Seq(enc, dec, device).to(device) model.apply(init_weights) print(f'The model has {count_parameters(model):,} trainable parameters') for i, batch in enumerate(train_iterator): print(f'ITER: {i}') example = batch print("Input Length:", example.src.shape, "[src_len, batch_size]") output = model.forward(example.src, example.trg) print(output.shape) print('') if i > 3: break
def fast_adapt_model(self, model): # run few adaptation steps for policy context, and conitnued adaptation for model fine-tuning inputs, targets = prepare_data(self.replay_buffer.sample(return_all_data=True)) train_inputs, train_targets, val_inputs, val_targets = \ split_data_into_train_val(inputs, targets, self.train_val_ratio) train_feed_dict = self._get_feed_dict_for_extrapolation((train_inputs, train_targets), model) val_feed_dict = self._get_feed_dict_for_extrapolation((val_inputs, val_targets), model) assert self.num_fast_adapt_steps_for_context > 0 for fast_step in range(self.num_fast_adapt_steps_for_context): train_feed_dict[model.test_time_lr] = model.fast_adapt_lr if fast_step < model.fast_adapt_steps \ else model.fast_adapt_lr / 10 updated_context, model_loss_dict = self.sess.run([model.test_time_updated_context, model.model_prior_loss_dict], feed_dict=train_feed_dict) val_model_loss_dict = self.sess.run(model.model_prior_loss_dict, feed_dict=val_feed_dict) self.log_finetuning_model_losses(fast_step, model_loss_dict, val_model_loss_dict) self.sess.run(tf.assign(model.context, updated_context)) context_for_policy = updated_context for fast_step in range(self.num_fast_adapt_steps_for_context, self.num_fast_adapt_steps_for_context + self.num_extra_fast_adapt_steps_for_model): _, model_loss_dict = self.sess.run([model.test_task_train_op, model.model_prior_loss_dict], feed_dict=train_feed_dict) val_model_loss_dict = self.sess.run(model.model_prior_loss_dict, feed_dict=val_feed_dict) self.log_finetuning_model_losses(fast_step, model_loss_dict, val_model_loss_dict) return context_for_policy
def main(): parser = argparse.ArgumentParser(description='Evaluate model') parser.add_argument('--model_config', default='config.json', help='train config for model_weights') parser.add_argument('--model_weights', default='./checkpoints/en_de_final.pt', help='path for weights of the model') args = parser.parse_args() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') with open(os.path.join('checkpoints', args.model_config), 'rt') as f: model_args = argparse.Namespace() model_args.__dict__.update(json.load(f)) model_args = parser.parse_args(namespace=model_args) train_data, valid_data, test_data, src_lang, trg_lang = prepare_data() model = build_model(model_args, src_lang, trg_lang, len(src_lang.vocab), len(trg_lang.vocab), device) model.load_state_dict(torch.load(args.model_weights, map_location='cpu')) model.eval() log.info( 'Bleu score: \n', calculate_bleu(test_data, src_lang, trg_lang, model, device, max_len=100))
def mergeSort(): # prepare data data = prepare_data() # get region data region = list(data.Region) # call sort function _mergeSort(region) return region
def compute_returns(task_idxs): all_returns = [] for task_id in task_idxs: self.sampler.reset_task(task_id) data = self.sampler.sample( self.num_sample_steps_for_adaptation, self.model.get_context()) proc_data = prepare_data(data) updated_context = self.model.get_updated_context(proc_data[0], proc_data[1]) all_returns.append(self.eval_single_task(epoch, updated_context, log_name = 'eval/perTask/Task_'+str(task_id))) return all_returns
def run_genetic_for_solver(): """ Prepares and runs genetic for Solver - user application. """ solver_load = Loader(path='./data/important_wc.p') solver_directory = './data/traindata' expression_list = None solver_train_data = prepare_data(loader=solver_load, chosen=expression_list, directory=solver_directory, count=count_in_dir(solver_directory, 3)) start_genetic(solver_train_data, solver_load)
def collect_data_for_metatraining(self, task_id, collect_with_updated_context=True): self.sampler.reset_task(task_id) data = self.sampler.sample(self.num_sample_steps_prior, self.model.get_context()) self.pre_adapt_replay_buffer.add(data, task_id) self.replay_buffer.add(data, task_id) if collect_with_updated_context: proc_data = prepare_data(data) updated_context = self.model.get_updated_context(proc_data[0], proc_data[1]) post_update_data = self.sampler.sample(self.num_sample_steps_updated_context, updated_context) self.replay_buffer.add(post_update_data, task_id)
def preparation(args): args.current_save_path = './save/{}'.format(args.name) if not (os.path.exists(args.current_save_path)): os.makedirs(args.current_save_path) print('dir:{}'.format(args.current_save_path)) args.id_to_word, args.vocab_size, args.train_file_list, args.train_label_list = prepare_data( data_path=args.data_path, max_num=args.word_dict_max_num, task_type='yelp') return args
def fleischner_classification(dataset_path): print('\nNodule Fleischner Classification\n') # gpu_devices = tf.config.experimental.list_physical_devices('GPU') # tf.config.experimental.set_memory_growth(gpu_devices[0], True) scans, segmentations = dt.load_data(path=dataset_path) images, labels = dt.process_data_3d(scans, segmentations, path=dataset_path, image_size=64) (train_images, train_labels), (test_images, test_labels) = dt.prepare_data(images, labels, should_balance=True) model, loaded = mdl.load_model('fleischner') if not loaded: model = mdl.create_model_fc(input=(64, 64, 64, 1), output=4) model.summary() if not loaded: start_time = time.perf_counter() history = model.fit(train_images, train_labels, batch_size=15, epochs=60, validation_split=0.10) end_time = time.perf_counter() print('Total time elapsed: {}s'.format(end_time - start_time)) plt.plot(history.history['accuracy'], label='accuracy') plt.plot(history.history['val_accuracy'], label='val_accuracy') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.ylim([0, 1]) plt.legend(loc='lower right') plt.show() mdl.save_model(model, 'fleischner') score = model.evaluate(test_images, test_labels, verbose=0) print(model.metrics_names) print(score)
def experiment(args): utils.seed_everything(seed=args.seed) qa_model = models.QAModel(hparams=args) train_dl, valid_dl, test_dl = data.prepare_data(args) wandb_logger = WandbLogger(project='qa', entity='nlp', tags=args.tags, offline=args.fast_dev_run) wandb_logger.watch(qa_model, log='all') args.logger = wandb_logger trainer = pl.Trainer.from_argparse_args(args) trainer.fit(qa_model, train_dataloader=train_dl, val_dataloaders=valid_dl) trainer.test(qa_model, test_dataloaders=test_dl)
def main(): parser = argparse.ArgumentParser( description='demonstration of machine translation algorithm') parser.add_argument('--model_config', default='./checkpoints/config.json', help='train config for model_weights') parser.add_argument('--model_weights', default='./checkpoints/en_de_final.pt', help='path for weights of the model') args = parser.parse_args() seed = 42 torch.manual_seed(seed) torch.cuda.manual_seed(seed) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') with open(os.path.join(args.model_config), 'rt') as f: model_args = argparse.Namespace() model_args.__dict__.update(json.load(f)) model_args = parser.parse_args(namespace=model_args) print('Loading models...') train_data, valid_data, test_data, src_lang, trg_lang = prepare_data() model = build_model(model_args, src_lang, trg_lang, len(src_lang.vocab), len(trg_lang.vocab), device) model.load_state_dict(torch.load(args.model_weights, map_location='cpu')) model.eval() print('Evaluating 5 random sentence from test set:') for _ in range(5): random_element = vars(test_data.examples[np.random.randint( len(test_data))]) input_sentence = random_element['src'] print(colored('Input sentence: \n', 'yellow'), ' '.join(input_sentence)) translation, _ = translate_sentence(input_sentence, src_lang, trg_lang, model, device) # cut off <eos> token translation = translation[:-1] print(colored('GT translation: \n', 'green'), ' '.join(random_element['trg'])) print(colored('Model translation: \n', 'green'), ' '.join(translation))
def downscale_data(opt): for dataset_idx in sorted(opt['dataset'].keys()): if not dataset_idx.startswith('all'): continue loader = create_dataloader(opt, dataset_idx=dataset_idx) degradation_type = opt['dataset']['degradation']['type'] if degradation_type == 'BD': kernel = data_utils.create_kernel(opt) if degradation_type == 'Style': path = opt['cartoon_model'] cartoonizer = SimpleGenerator().to(torch.device(opt['device'])) cartoonizer.load_weights(path) cartoonizer.eval() for item in tqdm(loader, ascii=True): if degradation_type == 'BD': data = prepare_data(opt, item, kernel) elif degradation_type == 'BI': data = data_utils.BI_downsample(opt, item) elif degradation_type == 'Style': image = item['gt'][0] image = resize(image) image = image.to(torch.device(opt['device'])) with torch.no_grad(): stylized_image = cartoonizer(image).unsqueeze(0) stylized_image = (stylized_image + 1) * 0.5 data = {'gt': image.unsqueeze(0), 'lr': stylized_image} lr_data = data['lr'] gt_data = data['gt'] img = lr_data.squeeze(0).squeeze(0).permute(1, 2, 0).cpu().numpy() path = osp.join( 'data', opt['dataset']['common']['name'], opt['data_subset'], opt['dataset'][dataset_idx]['actor_name'], opt['data_type'] + '_' + opt['dataset']['degradation']['type'], opt['dataset'][dataset_idx]['segment'], 'frames') os.makedirs(path, exist_ok=True) path = osp.join(path, item['frame_key'][0]) img = img * 255.0 img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) cv2.imwrite(path, img)
def main(args): fetch_data(args.data_dir, args.dataset_name) X, y = prepare_data(os.path.join(args.data_dir, args.dataset_name, '*.csv')) print("Starting dask cluster ...") cluster = make_cluster(n_workers=args.n_workers) client = Client() # wrapping our target regressors for further benchmarking sk_xgb = ModelFitter(GradientBoostingRegressor(), True) dask_xgb = ModelFitter(XGBRegressor()) print("Starting benchmarking process ...") results = benchmark_models(X, y, [sk_xgb, dask_xgb], args.n_folds, param_grid) res_df = pd.DataFrame(index=["sklearn", "dask"], data=results, columns=["GS time", "training time", "Metric"]) print(res_df)
def main(_): # generate data (inputs_, inputs_valid_, inputs_test_, labels_, labels_valid_, labels_test_) = \ prepare_data(FLAGS.time_len, FLAGS.n, FLAGS.input_size, seed=FLAGS.seed) # initialize model & build TF graph model = CudnnLSTMModel(FLAGS.input_size, FLAGS.num_layers, FLAGS.num_units, FLAGS.direction, FLAGS.learning_rate, FLAGS.dropout, FLAGS.seed, is_training=True) # training model.train(inputs_, inputs_valid_, labels_, labels_valid_, FLAGS.batch_size, FLAGS.num_epochs) # evalutation on test set model.eval(inputs_test_, labels_test_)
def main(): input_lang, output_lang, pairs = prepare_data('eng', 'fra', True) print(random.choice(pairs)) encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device) attn_decoder = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device) train_iters(input_lang, output_lang, pairs, encoder, attn_decoder, 75000, print_every=5000) evaluate_randomly(encoder, attn_decoder, input_lang, output_lang, pairs) output_words, attentions = evaluate(encoder, attn_decoder, input_lang, output_lang, "je suis trop froid .") mat_plot(attentions.numpy())
def load_from_params(cls): """ Creates a chatbot from params.py. Good for development, not for production. :return: Chatbot initialized from params.py. """ import params # load data questions, answers = load_data(params.data_file_directory, params.files, params.encoding) bigramer = Bigramer(params.bigramer) # prepare data manipulators VOCAB_SIZE = params.vocab_size tokenizer = create_tokenizer(questions + answers, VOCAB_SIZE, params.unknown_token) tokenized_questions, tokenized_answers = tokenize_q_a( tokenizer, questions, answers) # prepare data prepared_data = prepare_data(tokenized_questions, tokenized_answers) max_len_questions, max_len_answers, *_ = prepared_data # mle_model reversed_tokenizer_word_dict = { index: word for (word, index) in tokenizer.word_index.items() } mle_model = utils.fit_mle_model(tokenized_answers, reversed_tokenizer_word_dict) # load model model_data = utils.load_keras_model(params.model) _, encoder_inputs, encoder_states, decoder_inputs, decoder_embedding, decoder_lstm, decoder_dense = model_data return cls(params.model, tokenizer, mle_model, bigramer, max_len_questions, max_len_answers, params.strategy)
def preparation(args): # set model save path if not args.exp_id: args.exp_id = str(int(time.time())) args.current_save_path = os.path.join(args.dump_path, args.exp_name, args.exp_id) if not os.path.exists(args.current_save_path): os.makedirs(args.current_save_path, exist_ok=True) local_time = time.localtime() args.log_file = os.path.join( args.current_save_path, time.strftime("log_%Y_%m_%d_%H_%M_%S.txt", local_time)) args.output_file = os.path.join( args.current_save_path, time.strftime("output_%Y_%m_%d_%H_%M_%S.txt", local_time)) add_log(args, "exp id : %s" % args.exp_id) add_log(args, "Path: %s is created" % args.current_save_path) add_log(args, "create log file at path: %s" % args.log_file) # prepare data args.id_to_word, args.vocab_size = prepare_data(args)
def train(opt): # logging logger = base_utils.get_logger('base') logger.info('{} Options {}'.format('='*20, '='*20)) base_utils.print_options(opt, logger) # create data loader train_loader = create_dataloader(opt, dataset_idx='train') # create downsampling kernels for BD degradation kernel = data_utils.create_kernel(opt) # create model model = define_model(opt) # training configs total_sample = len(train_loader.dataset) iter_per_epoch = len(train_loader) total_iter = opt['train']['total_iter'] total_epoch = int(math.ceil(total_iter / iter_per_epoch)) start_iter, iter = opt['train']['start_iter'], 0 test_freq = opt['test']['test_freq'] log_freq = opt['logger']['log_freq'] ckpt_freq = opt['logger']['ckpt_freq'] logger.info('Number of training samples: {}'.format(total_sample)) logger.info('Total epochs needed: {} for {} iterations'.format( total_epoch, total_iter)) # train for epoch in range(total_epoch): for data in train_loader: # update iter iter += 1 curr_iter = start_iter + iter if iter > total_iter: logger.info('Finish training') break # update learning rate model.update_learning_rate() # prepare data data = prepare_data(opt, data, kernel) # train for a mini-batch model.train(data) # update running log model.update_running_log() # log if log_freq > 0 and iter % log_freq == 0: # basic info msg = '[epoch: {} | iter: {}'.format(epoch, curr_iter) for lr_type, lr in model.get_current_learning_rate().items(): msg += ' | {}: {:.2e}'.format(lr_type, lr) msg += '] ' # loss info log_dict = model.get_running_log() msg += ', '.join([ '{}: {:.3e}'.format(k, v) for k, v in log_dict.items()]) logger.info(msg) # save model if ckpt_freq > 0 and iter % ckpt_freq == 0: model.save(curr_iter) # evaluate performance if test_freq > 0 and iter % test_freq == 0: # setup model index model_idx = 'G_iter{}'.format(curr_iter) # for each testset for dataset_idx in sorted(opt['dataset'].keys()): # use dataset with prefix `test` if not dataset_idx.startswith('test'): continue ds_name = opt['dataset'][dataset_idx]['name'] logger.info( 'Testing on {}: {}'.format(dataset_idx, ds_name)) # create data loader test_loader = create_dataloader(opt, dataset_idx=dataset_idx) # define metric calculator metric_calculator = MetricCalculator(opt) # infer and compute metrics for each sequence for data in test_loader: # fetch data lr_data = data['lr'][0] seq_idx = data['seq_idx'][0] frm_idx = [frm_idx[0] for frm_idx in data['frm_idx']] # infer hr_seq = model.infer(lr_data) # thwc|rgb|uint8 # save results (optional) if opt['test']['save_res']: res_dir = osp.join( opt['test']['res_dir'], ds_name, model_idx) res_seq_dir = osp.join(res_dir, seq_idx) data_utils.save_sequence( res_seq_dir, hr_seq, frm_idx, to_bgr=True) # compute metrics for the current sequence true_seq_dir = osp.join( opt['dataset'][dataset_idx]['gt_seq_dir'], seq_idx) metric_calculator.compute_sequence_metrics( seq_idx, true_seq_dir, '', pred_seq=hr_seq) # save/print metrics if opt['test'].get('save_json'): # save results to json file json_path = osp.join( opt['test']['json_dir'], '{}_avg.json'.format(ds_name)) metric_calculator.save_results( model_idx, json_path, override=True) else: # print directly metric_calculator.display_results()
if __name__ == '__main__': img_rows = 80 #80*2 #436/4 #128 #224 #109 img_cols = 112 #112*2 #1024/4 #128 #224 #256 color_type = 3 gen_data = 1 unidimensional = 0 fit = 0 augmentation = 0 predicting = 0 if gen_data: # Generate data X, y, X_val, y_val, Xa, ya, Xa_val, ya_val = d.prepare_data( img_rows, img_cols, color_type, False, False, True) ''' del X del y ''' del X_val del y_val del Xa del ya del Xa_val del ya_val if unidimensional: #Transform y to a 1D vector y = np.array(
def main(_): # some startup settings np.random.seed(FLAGS.random_seed) tf.set_random_seed(FLAGS.random_seed) if FLAGS.random_learning_rate: FLAGS.learning_rate = 10**np.random.uniform(-2, 0) #Check log folders and if necessary remove: summary_dir = os.path.join(os.getenv('HOME'), FLAGS.summary_dir) # summary_dir = FLAGS.summary_dir print("summary dir: {}".format(summary_dir)) #Check log folders and if necessary remove: if FLAGS.log_tag == 'testing' or FLAGS.owr: if os.path.isdir(summary_dir + FLAGS.log_tag): shutil.rmtree(summary_dir + FLAGS.log_tag, ignore_errors=False) else: if os.path.isdir(summary_dir + FLAGS.log_tag): raise NameError('Logfolder already exists, overwriting alert: ' + summary_dir + FLAGS.log_tag) os.makedirs(summary_dir + FLAGS.log_tag) # os.mkdir(summary_dir+FLAGS.log_tag) save_config(summary_dir + FLAGS.log_tag) #define the size of the network input if FLAGS.network == 'inception': state_dim = [ 1, inception.inception_v3.default_image_size, inception.inception_v3.default_image_size, 3 ] elif FLAGS.network == 'fc_control': state_dim = [1, fc_control.fc_control_v1.input_size] elif FLAGS.network == 'depth': state_dim = depth_estim.depth_estim_v1.input_size elif FLAGS.network == 'mobile': state_dim = [ 1, mobile_net.mobilenet_v1.default_image_size, mobile_net.mobilenet_v1.default_image_size, 3 ] elif FLAGS.network == 'mobile_small': state_dim = [ 1, mobile_net.mobilenet_v1.default_image_size_small, mobile_net.mobilenet_v1.default_image_size_small, 3 ] elif FLAGS.network == 'mobile_medium': state_dim = [ 1, mobile_net.mobilenet_v1.default_image_size_medium, mobile_net.mobilenet_v1.default_image_size_medium, 3 ] else: raise NameError('Network is unknown: ', FLAGS.network) action_dim = 1 #initially only turn and go straight print("Number of State Dimensions:", state_dim) print("Number of Action Dimensions:", action_dim) print("Action bound:", FLAGS.action_bound) # import pdb; pdb.set_trace() # tf.logging.set_verbosity(tf.logging.DEBUG) # inputs=random_ops.random_uniform(state_dim) # targets=random_ops.random_uniform((1,action_dim)) # depth_targets=random_ops.random_uniform((1,1,1,64)) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = False # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) # config=tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options) # config.gpu_options.allow_growth = True sess = tf.Session(config=config) model = Model(sess, state_dim, action_dim, bound=FLAGS.action_bound) writer = tf.summary.FileWriter(summary_dir + FLAGS.log_tag, sess.graph) model.writer = writer def signal_handler(signal, frame): print('You pressed Ctrl+C!') #save checkpoint? print('saving checkpoints') model.save(summary_dir + FLAGS.log_tag) sess.close() print('done.') sys.exit(0) signal.signal(signal.SIGINT, signal_handler) print('------------Press Ctrl+C to end the learning') def run_episode(data_type, sumvar): '''run over batches return different losses type: 'train', 'val' or 'test' ''' activation_images = [] depth_predictions = [] endpoint_activations = [] start_time = time.time() data_loading_time = 0 calculation_time = 0 start_data_time = time.time() tot_loss = [] ctr_loss = [] dep_loss = [] odo_loss = [] q_loss = [] for index, ok, batch in data.generate_batch(data_type): data_loading_time += (time.time() - start_data_time) start_calc_time = time.time() if ok: inputs = np.array([_['img'] for _ in batch]) state = [] targets = np.array([[_['ctr']] for _ in batch]) # target_depth = np.array([_['depth'] for _ in batch]).reshape((-1,55,74,FLAGS.n_frames if FLAGS.n_fc else 1)) if FLAGS.auxiliary_depth or FLAGS.rl else [] target_depth = np.array([_['depth'] for _ in batch]).reshape( (-1, 55, 74)) if FLAGS.auxiliary_depth or FLAGS.rl else [] target_odom = np.array([_['odom'] for _ in batch]).reshape( (-1, 6)) if FLAGS.auxiliary_odom else [] # target_odom = np.array([_['odom'] for _ in batch]).reshape((-1,4)) if FLAGS.auxiliary_odom else [] prev_action = np.array([_['prev_act'] for _ in batch]).reshape( (-1, 1)) if FLAGS.auxiliary_odom else [] if data_type == 'train': losses = model.backward(inputs, state, targets, depth_targets=target_depth, odom_targets=target_odom, prev_action=prev_action) elif data_type == 'val' or data_type == 'test': state, losses, aux_results = model.forward( inputs, state, auxdepth=False, auxodom=False, prev_action=prev_action, targets=targets, target_depth=target_depth, target_odom=target_odom) tot_loss.append(losses['t']) if not FLAGS.no_control and (not FLAGS.rl or FLAGS.auxiliary_ctr): ctr_loss.append(losses['c']) if FLAGS.auxiliary_depth: dep_loss.append(losses['d']) if FLAGS.auxiliary_odom: odo_loss.append(losses['o']) if FLAGS.rl: q_loss.append(losses['q']) if index == 1 and data_type == 'val': if FLAGS.plot_activations: activation_images = model.plot_activations( inputs, targets) if FLAGS.plot_depth: depth_predictions = model.plot_depth( inputs, target_depth) if FLAGS.plot_histograms: # stime = time.time() endpoint_activations = model.get_endpoint_activations( inputs) # print('plot activations: {}'.format((stime-time.time()))) calculation_time += (time.time() - start_calc_time) start_data_time = time.time() sumvar['loss_' + data_type + '_total'] = np.mean(tot_loss) if not FLAGS.no_control and (not FLAGS.rl or FLAGS.auxiliary_ctr): sumvar['loss_' + data_type + '_control'] = np.mean(ctr_loss) if FLAGS.auxiliary_depth: sumvar['loss_' + data_type + '_depth'] = np.mean(dep_loss) if FLAGS.auxiliary_odom: sumvar['loss_' + data_type + '_odom'] = np.mean(odo_loss) if FLAGS.rl: sumvar['loss_' + data_type + '_q'] = np.mean(q_loss) if len(activation_images) != 0: sumvar['conv_activations'] = activation_images if len(depth_predictions) != 0: sumvar['depth_predictions'] = depth_predictions if FLAGS.plot_histograms: for i, ep in enumerate(model.endpoints): sumvar['activations_{}'.format(ep)] = endpoint_activations[i] print( '>>{0} [{1[2]}/{1[1]}_{1[3]:02d}:{1[4]:02d}]: data {2}; calc {3}'. format(data_type.upper(), tuple(time.localtime()[0:5]), print_dur(data_loading_time), print_dur(calculation_time))) # print('losses: tot {0:.3g}; ctrl {1:.3g}; depth {2:.3g}; odom {2:.3g}; q {3:.3g}'.format(np.mean(tot_loss), np.mean(ctr_loss), np.mean(dep_loss), np.mean(odo_loss), np.mean(q_loss))) if data_type == 'val' or data_type == 'test': print('{}'.format(str(sumvar))) sys.stdout.flush() return sumvar data.prepare_data((state_dim[1], state_dim[2], state_dim[3])) for ep in range(FLAGS.max_episodes): print('start episode: {}'.format(ep)) # ----------- train episode sumvar = run_episode('train', {}) # ----------- validate episode # sumvar = run_episode('val', {}) sumvar = run_episode('val', sumvar) # import pdb; pdb.set_trace() # ----------- write summary try: model.summarize(sumvar) except Exception as e: print('failed to summarize {}'.format(e)) # write checkpoint every x episodes if (ep % 20 == 0 and ep != 0): print('saved checkpoint') model.save(summary_dir + FLAGS.log_tag) # ------------ test sumvar = run_episode('test', {}) # ----------- write summary try: model.summarize(sumvar) except Exception as e: print('failed to summarize {}'.format(e)) # write checkpoint every x episodes if (ep % 20 == 0 and ep != 0) or ep == (FLAGS.max_episodes - 1): print('saved checkpoint') model.save(summary_dir + FLAGS.log_tag)
'test', args.class_name) else: print("Use predict.py to create test predictions") # to be save - free memory del model torch.cuda.empty_cache() if __name__ == "__main__": task_data_path = os.path.join(args.processed_data_path, 'c2_muse_topic') transcription_path = os.path.join(task_data_path, 'transcription_segments') Param = get_parameters(args) # create working folders if not os.path.exists(Param['output_dir']): os.makedirs(Param['output_dir']) if not os.path.exists(Param['cache_dir']): os.makedirs(Param['cache_dir']) if not os.path.exists(Param['best_model_dir']): os.makedirs(Param['best_model_dir']) with open(os.path.join(Param['output_dir'], 'parameter.json'), 'w') as pa: json.dump(Param, pa, indent=' ') data = prepare_data(task_data_path, transcription_path, args.class_name, args.cont_emotions, args.evaluate_test, None) if not args.predict_test: main(Param, task_data_path, transcription_path)
if __name__ == '__main__': MODE = 'TRAIN' FILENAME = 'data/fra-eng/fra.txt' #'/data/fra-eng.zip' NUM_EXAMPLES = 64 * 100 BATCH_SIZE = 64 EMBEDDING_SIZE = 256 RNN_SIZE = 512 NUM_EPOCHS = 25 MODEL_SIZE = 128 H = 8 NUM_LAYERS = 4 ATTENTION_FUNC = 'concat' # Can choose between 'dot', 'general' or 'concat' # prepare_data data_en, data_fr, raw_data_en, raw_data_fr, en_tokenizer, fr_tokenizer = prepare_data( FILENAME, NUM_EXAMPLES) max_length = max(len(data_en[0]), len(data_fr[0])) dataset = tf.data.Dataset.from_tensor_slices((data_en, data_fr)) dataset = dataset.shuffle(len(raw_data_en)).batch(BATCH_SIZE) pes = [] for i in range(max_length): pes.append(positional_encoding(i, MODEL_SIZE)) pes = np.concatenate(pes, axis=0) pes = tf.constant(pes, dtype=tf.float32) print(pes.shape) print(data_en.shape) print(data_fr.shape)
def main(args): global anno, infer_y, h_pre, alpha_past, if_trainning, dictLen worddicts = load_dict(args.dictPath) dictLen = len(worddicts) worddicts_r = [None] * len(worddicts) for kk, vv in worddicts.items(): worddicts_r[vv] = kk train, train_uid_list = dataIterator( args.trainPklPath, args.trainCaptionPath, worddicts, batch_size=args.batch_size, batch_Imagesize=500000, maxlen=150, maxImagesize=500000, ) valid, valid_uid_list = dataIterator( args.validPklPath, args.validCaptionPath, worddicts, batch_size=args.batch_size, batch_Imagesize=500000, maxlen=150, maxImagesize=500000, ) print("train lenth is ", len(train)) print("valid lenth is ", len(valid)) x = tf.placeholder(tf.float32, shape=[None, None, None, 1]) y = tf.placeholder(tf.int32, shape=[None, None]) x_mask = tf.placeholder(tf.float32, shape=[None, None, None]) y_mask = tf.placeholder(tf.float32, shape=[None, None]) lr = tf.placeholder(tf.float32, shape=()) if_trainning = tf.placeholder(tf.bool, shape=()) watcher_train = Watcher_train(blocks=3, level=16, growth_rate=24, training=if_trainning) annotation, anno_mask = watcher_train.dense_net(x, x_mask) # for initilaizing validation anno = tf.placeholder( tf.float32, shape=[ None, annotation.shape.as_list()[1], annotation.shape.as_list()[2], annotation.shape.as_list()[3], ], ) infer_y = tf.placeholder(tf.int64, shape=(None, )) h_pre = tf.placeholder(tf.float32, shape=[None, 256]) alpha_past = tf.placeholder( tf.float32, shape=[ None, annotation.shape.as_list()[1], annotation.shape.as_list()[2] ], ) attender = Attender(annotation.shape.as_list()[3], 256, 512) parser = Parser(256, 256, attender, annotation.shape.as_list()[3]) wap = WAP( watcher_train, attender, parser, 256, 256, annotation.shape.as_list()[3], dictLen, if_trainning, ) hidden_state_0 = tf.tanh( tf.tensordot(tf.reduce_mean(anno, axis=[1, 2]), wap.Wa2h, axes=1) + wap.ba2h) # [batch, hidden_dim] cost = wap.get_cost(annotation, y, anno_mask, y_mask) vs = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) for vv in vs: if not vv.name.startswith("batch_normalization"): cost += 1e-4 * tf.reduce_sum(tf.pow(vv, 2)) p, w, h, alpha = wap.get_word(infer_y, h_pre, alpha_past, anno) optimizer = tf.train.AdadeltaOptimizer(learning_rate=lr) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): trainer = optimizer.minimize(cost) max_epoch = 200 config = tf.ConfigProto() config.gpu_options.allow_growth = True init = tf.global_variables_initializer() uidx = 0 cost_s = 0 dispFreq = 100 if args.dispFreq is None else args.dispFreq saveFreq = (len(train) * args.epochDispRatio if args.saveFreq is None else args.saveFreq) sampleFreq = (len(train) * args.epochSampleRatio if args.sampleFreq is None else args.sampleFreq) validFreq = (len(train) * args.epochValidRatio if args.validFreq is None else args.validFreq) history_errs = [] estop = False halfLrFlag = 0 patience = 15 if args.patience is None else args.patience lrate = args.lr logPath = "./log.txt" if args.logPath is None else args.logPath log = open(logPath, "w") log.write(str(vars(args))) log.write(str(patience)) log.write(str(lr)) saver = tf.train.Saver() with tf.Session(config=config) as sess: sess.run(init) for epoch in range(max_epoch): n_samples = 0 random.shuffle(train) for batch_x, batch_y in train: batch_x, batch_x_m, batch_y, batch_y_m = prepare_data( batch_x, batch_y) n_samples += len(batch_x) uidx += 1 cost_i, _ = sess.run( [cost, trainer], feed_dict={ x: batch_x, y: batch_y, x_mask: batch_x_m, y_mask: batch_y_m, if_trainning: True, lr: lrate, }, ) cost_s += cost_i if np.isnan(cost_i) or np.isinf(cost_i): print("invalid cost value detected") sys.exit(0) if np.mod(uidx, dispFreq) == 0: cost_s /= dispFreq print("Epoch ", epoch, "Update ", uidx, "Cost ", cost_s, "Lr ", lrate) log.write("Epoch " + str(epoch) + " Update " + str(uidx) + " Cost " + str(cost_s) + " Lr " + str(lrate) + "\n") log.flush() cost_s = 0 if np.mod(uidx, sampleFreq) == 0: print("Start sampling...") _t = time.time() fpp_sample = open( os.path.join(args.resultPath, str(args.resultFileName) + ".txt"), "w", ) valid_count_idx = 0 for batch_x, batch_y in valid: for xx in batch_x: xx = np.moveaxis(xx, 0, -1) xx_pad = np.zeros( (xx.shape[0], xx.shape[1], xx.shape[2]), dtype="float32") xx_pad[:, :, :] = xx / 255.0 xx_pad = xx_pad[None, :, :, :] annot = sess.run(annotation, feed_dict={ x: xx_pad, if_trainning: False }) h_state = sess.run(hidden_state_0, feed_dict={anno: annot}) sample, score = wap.get_sample( p, w, h, alpha, annot, h_state, 10, 100, False, sess, training=False, ) score = score / np.array([len(s) for s in sample]) ss = sample[score.argmin()] fpp_sample.write(valid_uid_list[valid_count_idx]) valid_count_idx = valid_count_idx + 1 if np.mod(valid_count_idx, 100) == 0: print("gen %d samples" % valid_count_idx) log.write("gen %d samples" % valid_count_idx + "\n") log.flush() for vv in ss: if vv == 0: # <eol> break fpp_sample.write(" " + worddicts_r[vv]) fpp_sample.write("\n") fpp_sample.close() print("valid set decode done") log.write("valid set decode done\n") log.flush() print("Done sampling, took" + str(time.time() - _t)) if np.mod(uidx, validFreq) == 0: print("Start validating...") _t = time.time() probs = [] for batch_x, batch_y in valid: batch_x, batch_x_m, batch_y, batch_y_m = prepare_data( batch_x, batch_y) pprobs, annot = sess.run( [cost, annotation], feed_dict={ x: batch_x, y: batch_y, x_mask: batch_x_m, y_mask: batch_y_m, if_trainning: False, }, ) probs.append(pprobs) valid_errs = np.array(probs) valid_err_cost = valid_errs.mean() wer_process( os.path.join(args.resultPath, args.resultFileName + ".txt"), args.validCaptionPath, os.path.join(args.resultPath, args.resultFileName + ".wer"), ) fpp = open( os.path.join(args.resultPath, args.resultFileName + ".wer")) stuff = fpp.readlines() fpp.close() m = re.search("WER (.*)\n", stuff[0]) valid_per = 100.0 * float(m.group(1)) m = re.search("ExpRate (.*)\n", stuff[1]) valid_sacc = 100.0 * float(m.group(1)) valid_err = valid_per history_errs.append(valid_err) if (uidx / validFreq == 0 or valid_err <= np.array(history_errs).min()): bad_counter = 0 if (uidx / validFreq != 0 and valid_err > np.array(history_errs).min()): bad_counter += 1 if bad_counter > patience: if halfLrFlag == 2: print("Early Stop!") log.write("Early Stop!\n") log.flush() estop = True break else: print("Lr decay and retrain!") log.write("Lr decay and retrain!\n") log.flush() bad_counter = 0 lrate = lrate / 10 halfLrFlag += 1 print("bad_counter" + str(bad_counter)) print("Valid WER: %.2f%%, ExpRate: %.2f%%, Cost: %f" % (valid_per, valid_sacc, valid_err_cost)) log.write("Valid WER: %.2f%%, ExpRate: %.2f%%, Cost: %f" % (valid_per, valid_sacc, valid_err_cost) + "\n") log.flush() print("Done validating, took" + str(time.time() - _t)) if estop: break save_path = saver.save(sess, os.path.join(args.savePath + args.saveName))
def main(args): if args.cuda and torch.cuda.is_available(): device = torch.device("cuda:0") else: device = torch.device("cpu") init_dict, train_dict, test_dict = prepare_data( args.data_loc, args.num_init, args.num_total, test_is_year=False, seed=args.seed, ) init_x, init_y, init_y_var = ( init_dict["x"].to(device), init_dict["y"].to(device), init_dict["y_var"].to(device), ) train_x, train_y, train_y_var = ( train_dict["x"].to(device), train_dict["y"].to(device), train_dict["y_var"].to(device), ) test_x, test_y, test_y_var = ( test_dict["x"].to(device), test_dict["y"].to(device), test_dict["y_var"].to(device), ) if args.model == "wiski": model = FixedNoiseOnlineSKIGP( init_x, init_y.view(-1, 1), init_y_var.view(-1, 1), GridInterpolationKernel( base_kernel=ScaleKernel( MaternKernel( ard_num_dims=2, nu=0.5, lengthscale_prior=GammaPrior(3.0, 6.0), ), outputscale_prior=GammaPrior(2.0, 0.15), ), grid_size=30, num_dims=2, grid_bounds=torch.tensor([[0.0, 1.0], [0.0, 1.0]]), ), learn_additional_noise=False, ).to(device) mll_type = lambda x, y: BatchedWoodburyMarginalLogLikelihood( x, y, clear_caches_every_iteration=True) elif args.model == "exact": model = FixedNoiseGP( init_x, init_y.view(-1, 1), init_y_var.view(-1, 1), ScaleKernel( MaternKernel( ard_num_dims=2, nu=0.5, lengthscale_prior=GammaPrior(3.0, 6.0), ), outputscale_prior=GammaPrior(2.0, 0.15), ), ).to(device) mll_type = ExactMarginalLogLikelihood mll = mll_type(model.likelihood, model) print("---- Fitting initial model ----") start = time.time() model.train() model.zero_grad() # with max_cholesky_size(args.cholesky_size), skip_logdet_forward(True), \ # use_toeplitz(args.toeplitz), max_root_decomposition_size(args.sketch_size): fit_gpytorch_torch(mll, options={"lr": 0.1, "maxiter": 1000}) end = time.time() print("Elapsed fitting time: ", end - start) print("Named parameters: ", list(model.named_parameters())) print("--- Now computing initial RMSE") model.eval() with gpytorch.settings.skip_posterior_variances(True): test_pred = model(test_x) pred_rmse = ((test_pred.mean - test_y)**2).mean().sqrt() print("---- Initial RMSE: ", pred_rmse.item()) all_outputs = [] start_ind = init_x.shape[0] end_ind = int(start_ind + args.batch_size) for step in range(args.num_steps): if step > 0 and step % 25 == 0: print("Beginning step ", step) total_time_step_start = time.time() if step > 0: print("---- Fitting model ----") start = time.time() model.train() model.zero_grad() mll = mll_type(model.likelihood, model) # with skip_logdet_forward(True), max_root_decomposition_size( # args.sketch_size # ), max_cholesky_size(args.cholesky_size), use_toeplitz( # args.toeplitz # ): fit_gpytorch_torch(mll, options={ "lr": 0.01 * (0.99**step), "maxiter": 300 }) model.zero_grad() end = time.time() print("Elapsed fitting time: ", end - start) print("Named parameters: ", list(model.named_parameters())) if not args.random: if args.model == "wiski": botorch_model = OnlineSKIBotorchModel(model=model) else: botorch_model = model # qmc_sampler = SobolQMCNormalSampler(num_samples=4) bounds = torch.stack([torch.zeros(2), torch.ones(2)]).to(device) qnipv = qNIPV( model=botorch_model, mc_points=test_x, # sampler=qmc_sampler, ) #with use_toeplitz(args.toeplitz), root_pred_var(True), fast_pred_var(True): candidates, acq_value = optimize_acqf( acq_function=qnipv, bounds=bounds, q=args.batch_size, num_restarts=1, raw_samples=10, # used for intialization heuristic options={ "batch_limit": 5, "maxiter": 200 }, ) else: candidates = torch.rand(args.batch_size, train_x.shape[-1], device=device, dtype=train_x.dtype) acq_value = torch.zeros(1) model.eval() _ = model(test_x[:10]) # to init caches print("---- Finished optimizing; now querying dataset ---- ") with torch.no_grad(): covar_dists = model.covar_module(candidates, train_x) nearest_points = covar_dists.evaluate().argmax(dim=-1) new_x = train_x[nearest_points] new_y = train_y[nearest_points] new_y_var = train_y_var[nearest_points] todrop = torch.tensor( [x in nearest_points for x in range(train_x.shape[0])]) train_x, train_y, train_y_var = train_x[~todrop], train_y[ ~todrop], train_y_var[~todrop] print("New train_x shape", train_x.shape) print("--- Now updating model with simulator ----") model = model.condition_on_observations(X=new_x, Y=new_y.view(-1, 1), noise=new_y_var.view( -1, 1)) print("--- Now computing updated RMSE") model.eval() # with gpytorch.settings.fast_pred_var(True), \ # detach_test_caches(True), \ # max_root_decomposition_size(args.sketch_size), \ # max_cholesky_size(args.cholesky_size), \ # use_toeplitz(args.toeplitz), root_pred_var(True): test_pred = model(test_x) pred_rmse = ((test_pred.mean.view(-1) - test_y.view(-1))**2).mean().sqrt() pred_avg_variance = test_pred.variance.mean() total_time_step_elapsed_time = time.time() - total_time_step_start step_output_list = [ total_time_step_elapsed_time, acq_value.item(), pred_rmse.item(), pred_avg_variance.item() ] print("Step RMSE: ", pred_rmse) all_outputs.append(step_output_list) start_ind = end_ind end_ind = int(end_ind + args.batch_size) output_dict = { "model_state_dict": model.cpu().state_dict(), "queried_points": { 'x': model.cpu().train_inputs[0], 'y': model.cpu().train_targets }, "results": DataFrame(all_outputs) } torch.save(output_dict, args.output)
import network as nn import data as dt train_inputs, train_labels, test_inputs, test_labels = dt.prepare_data() nn.train(train_inputs, train_labels, test_inputs, test_labels) # Rodar o tensorboard com o comando: # tensorboard --logdir logs/fit # no prompt
def main(args): worddicts = load_dict(args.path + '/data/dictionary.txt') worddicts_r = [None] * len(worddicts) for kk, vv in worddicts.items(): worddicts_r[vv] = kk train, train_uid_list = dataIterator(args.path + '/data/offline-train.pkl', args.path + '/data/train_caption.txt', worddicts, batch_size=args.batch_size, batch_Imagesize=400000, maxlen=100, maxImagesize=400000) valid, valid_uid_list = dataIterator(args.path + '/data/offline-test.pkl', args.path + '/data/test_caption.txt', worddicts, batch_size=args.batch_size, batch_Imagesize=400000, maxlen=100, maxImagesize=400000) print('train lenght is ', len(train)) x = tf.placeholder(tf.float32, shape=[None, None, None, 1]) y = tf.placeholder(tf.int32, shape=[None, None]) x_mask = tf.placeholder(tf.float32, shape=[None, None, None]) y_mask = tf.placeholder(tf.float32, shape=[None, None]) lr = tf.placeholder(tf.float32, shape=()) if_trainning = tf.placeholder(tf.bool, shape=()) watcher_train = Watcher_train(blocks=3, level=16, growth_rate=24, training=if_trainning) annotation, anno_mask = watcher_train.dense_net(x, x_mask) # for initilaizing validation anno = tf.placeholder(tf.float32, shape=[ None, annotation.shape.as_list()[1], annotation.shape.as_list()[2], annotation.shape.as_list()[3] ]) infer_y = tf.placeholder(tf.int64, shape=(None, )) h_pre = tf.placeholder(tf.float32, shape=[None, 256]) alpha_past = tf.placeholder(tf.float32, shape=[ None, annotation.shape.as_list()[1], annotation.shape.as_list()[2] ]) attender = Attender(annotation.shape.as_list()[3], 256, 512) parser = Parser(256, 256, attender, annotation.shape.as_list()[3]) w = WAP(watcher_train, attender, parser, 256, 256, annotation.shape.as_list()[3], 111, if_trainning) hidden_state_0 = tf.tanh( tf.tensordot(tf.reduce_mean(anno, axis=[1, 2]), w.Wa2h, axes=1) + w.ba2h) # [batch, hidden_dim] cost = w.get_cost(annotation, y, anno_mask, y_mask) vs = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) for vv in vs: if not vv.name.startswith('batch_normalization'): cost += 1e-4 * tf.reduce_sum(tf.pow(vv, 2)) p, w, h, alpha = w.get_word(infer_y, h_pre, alpha_past, anno) optimizer = tf.train.AdadeltaOptimizer(learning_rate=lr) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): trainer = optimizer.minimize(cost) max_epoch = 200 config = tf.ConfigProto() config.gpu_options.allow_growth = True init = tf.global_variables_initializer() uidx = 0 cost_s = 0 dispFreq = 100 saveFreq = len(train) sampleFreq = len(train) validFreq = len(train) history_errs = [] estop = False halfLrFlag = 0 patience = 15 lrate = 1.0 log = open(args.path + '/log-bs-6.txt', 'w') with tf.Session(config=config) as sess: sess.run(init) for epoch in range(max_epoch): n_samples = 0 random.shuffle(train) for batch_x, batch_y in train: batch_x, batch_x_m, batch_y, batch_y_m = prepare_data( batch_x, batch_y) n_samples += len(batch_x) uidx += 1 cost_i, _ = sess.run( [cost, trainer], feed_dict={ x: batch_x, y: batch_y, x_mask: batch_x_m, y_mask: batch_y_m, if_trainning: True, lr: lrate }) cost_s += cost_i if np.isnan(cost_i) or np.isinf(cost_i): print('invalid cost value detected') sys.exit(0) if np.mod(uidx, dispFreq) == 0: cost_s /= dispFreq print('Epoch ', epoch, 'Update ', uidx, 'Cost ', cost_s, 'Lr ', lrate) log.write('Epoch ' + str(epoch) + ' Update ' + str(uidx) + ' Cost ' + str(cost_s) + ' Lr ' + str(lrate) + '\n') log.flush() cost_s = 0 if np.mod(uidx, sampleFreq) == 0: fpp_sample = open( args.path + '/result/valid_decode_result-bs-6.txt', 'w') valid_count_idx = 0 for batch_x, batch_y in valid: for xx in batch_x: xx = np.moveaxis(xx, 0, -1) xx_pad = np.zeros( (xx.shape[0], xx.shape[1], xx.shape[2]), dtype='float32') xx_pad[:, :, :] = xx / 255. xx_pad = xx_pad[None, :, :, :] annot = sess.run(annotation, feed_dict={ x: xx_pad, if_trainning: False }) h_state = sess.run(hidden_state_0, feed_dict={anno: annot}) sample, score = w.get_sample(p, w, h, alpha, annot, h_state, 10, 100, False, sess, training=False) score = score / np.array([len(s) for s in sample]) ss = sample[score.argmin()] fpp_sample.write(valid_uid_list[valid_count_idx]) valid_count_idx = valid_count_idx + 1 if np.mod(valid_count_idx, 100) == 0: print('gen %d samples' % valid_count_idx) log.write('gen %d samples' % valid_count_idx + '\n') log.flush() for vv in ss: if vv == 0: # <eol> break fpp_sample.write(' ' + worddicts_r[vv]) fpp_sample.write('\n') fpp_sample.close() print('valid set decode done') log.write('valid set decode done\n') log.flush() if np.mod(uidx, validFreq) == 0: probs = [] for batch_x, batch_y in valid: batch_x, batch_x_m, batch_y, batch_y_m = prepare_data( batch_x, batch_y) pprobs, annot = sess.run( [cost, annotation], feed_dict={ x: batch_x, y: batch_y, x_mask: batch_x_m, y_mask: batch_y_m, if_trainning: False }) probs.append(pprobs) valid_errs = np.array(probs) valid_err_cost = valid_errs.mean() os.system('python3.4 compute-wer.py ' + args.path + '/result/valid_decode_result-bs-6.txt' + ' ' + args.path + '/data/test_caption.txt' + ' ' + args.path + '/result/valid-bs-6.wer') fpp = open(args.path + '/result/valid-bs-6.wer') stuff = fpp.readlines() fpp.close() m = re.search('WER (.*)\n', stuff[0]) valid_per = 100. * float(m.group(1)) m = re.search('ExpRate (.*)\n', stuff[1]) valid_sacc = 100. * float(m.group(1)) valid_err = valid_per history_errs.append(valid_err) if uidx / validFreq == 0 or valid_err <= np.array( history_errs).min(): bad_counter = 0 if uidx / validFreq != 0 and valid_err > np.array( history_errs).min(): bad_counter += 1 if bad_counter > patience: if halfLrFlag == 2: print('Early Stop!') log.write('Early Stop!\n') log.flush() estop = True break else: print('Lr decay and retrain!') log.write('Lr decay and retrain!\n') log.flush() bad_counter = 0 lrate = lrate / 10 halfLrFlag += 1 print('Valid WER: %.2f%%, ExpRate: %.2f%%, Cost: %f' % (valid_per, valid_sacc, valid_err_cost)) log.write('Valid WER: %.2f%%, ExpRate: %.2f%%, Cost: %f' % (valid_per, valid_sacc, valid_err_cost) + '\n') log.flush() if estop: break
import neurolab as nl import numpy as np from data import prepare_data __author__ = 'pradyumnad' train, train_target, test, test_target = prepare_data() print('Train: ', len(train)) print('Test : ', len(test)) input = list(train[['1st serve points won Norm', '2nd serve points won Norm', 'Break points won Norm']].values) target = list(train_target) target = [[i] for i in target] print input print target # Create network with 3 layers and random initialized net = nl.net.newff([[0, 1], [0, 1], [0, 1]], [3, 1]) net.trainf = nl.net.train.train_gd print(net.ci, net.co) # Train network error = net.train(input, target, epochs=500, show=50, goal=0.02) # Plot result import pylab as pl