def save_entry(sender): vs = sender.superview date = get_datetime(vs) data = get_info(vs, date) #dayif data['dateTime'] #^ this if for implementing the postprandial alerts. file = _path + '/' + 'journal.jl' log_success = log.save(data, file) save_status_alert(log_success) if log_success: views[0]['label2'].text = 'Out of Sync'
def log(): """Receives a Slack channel message and passes it off to slack_log() Args: request (object): the Flask request object, including the the form- encoded message fields which Slack POSTs Returns: bool: True, or HTTP 500. """ if config.log: print('log()') if logger.save(request): return jsonify({'text':''}) else: if config.log: print('log() failed') abort(500)
def main(args, logger): # -------------------------------------------------------------------------- # MODEL logger.print('-' * 100) start_epoch = 1 if args.only_test: if not os.path.isfile(args.model_file): raise IOError('No such file: %s' % args.model_file) model = VarmisuseModel.load(args.model_file) else: if args.checkpoint and os.path.isfile(args.model_file + '.checkpoint'): # Just resume training, no modifications. logger.print('Found a checkpoint...') checkpoint_file = args.model_file + '.checkpoint' model, start_epoch = VarmisuseModel.load_checkpoint( checkpoint_file, args.cuda) else: # Training starts fresh. But the model state is either pretrained or # newly (randomly) initialized. if args.pretrained: logger.print('Using pretrained model...') model = VarmisuseModel.load(args.pretrained, args) else: logger.print('Training model from scratch...') model = init_from_scratch(args, logger) # Set up optimizer model.init_optimizer() # log the parameter details logger.print( 'Trainable #parameters [encoder-decoder] {} [total] {}'.format( human_format(model.network.count_encoder_parameters() + model.network.count_decoder_parameters()), human_format(model.network.count_parameters()))) table = model.network.layer_wise_parameters() logger.print('Breakdown of the trainable paramters\n%s' % table) # Use the GPU? if args.cuda: model.cuda() if args.parallel: model.parallelize() # -------------------------------------------------------------------------- # DATA ITERATORS # Two datasets: train and dev. If we sort by length it's faster. logger.print('-' * 100) logger.print('Make data loaders') dev_files = dict() dev_files['src'] = args.dev_src_file dev_files['src_tag'] = args.dev_src_tag_file dev_files['tgt'] = args.dev_tgt_file if not args.only_test: train_files = dict() train_files['src'] = args.train_src_file train_files['src_tag'] = args.train_src_tag_file train_files['tgt'] = args.train_tgt_file train_dataset = data_utils.VarmisuseDataset(model, args, train_files) if args.sort_by_len: train_sampler = data.SortedBatchSampler(train_dataset.lengths(), args.batch_size, shuffle=True) else: train_sampler = torch.utils.data.sampler.RandomSampler( train_dataset) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, sampler=train_sampler, num_workers=args.data_workers, collate_fn=data_utils.batchify_varmisuse, pin_memory=args.cuda, drop_last=args.parallel) dev_dataset = data_utils.VarmisuseDataset(model, args, dev_files) dev_sampler = torch.utils.data.sampler.SequentialSampler(dev_dataset) dev_loader = torch.utils.data.DataLoader( dev_dataset, batch_size=args.test_batch_size, sampler=dev_sampler, num_workers=args.data_workers, collate_fn=data_utils.batchify_varmisuse, pin_memory=args.cuda, drop_last=args.parallel) # ------------------------------------------------------------------------- # PRINT CONFIG logger.print('-' * 100) logger.print('CONFIG:\n%s' % json.dumps(vars(args), indent=4, sort_keys=True)) # -------------------------------------------------------------------------- # DO TEST if args.only_test: stats = { 'timer': Timer(), 'epoch': 100000, 'best_valid': 0, 'no_improvement': 0 } validate_official(args, dev_loader, model, stats, logger, mode='test') logger.save(silent=True) # -------------------------------------------------------------------------- # TRAIN/VALID LOOP else: logger.print('-' * 100) logger.print('Starting training...') stats = { 'timer': Timer(), 'epoch': start_epoch, 'best_valid': 0, 'no_improvement': 0 } if args.optimizer in ['sgd', 'adam' ] and args.warmup_epochs >= start_epoch: logger.print( "Use warmup lrate for the %d epoch, from 0 up to %s." % (args.warmup_epochs, args.learning_rate)) num_batches = len(train_loader.dataset) // args.batch_size warmup_factor = (args.learning_rate + 0.) / (num_batches * args.warmup_epochs) stats['warmup_factor'] = warmup_factor for epoch in range(start_epoch, args.num_epochs + 1): stats['epoch'] = epoch if args.optimizer in ['sgd', 'adam' ] and epoch > args.warmup_epochs + 1: model.optimizer.param_groups[0]['lr'] = \ model.optimizer.param_groups[0]['lr'] * args.lr_decay train(args, train_loader, model, stats, logger) if epoch % args.print_fq == 0: model.save(logger.path + '/best_model.cpt') result = validate_official(args, dev_loader, model, stats, logger) logger.save(silent=True) if epoch % args.save_fq == 0: model.save(logger.path + '/model_epoch%d.cpt' % epoch) # Save best valid if ((epoch % args.print_fq == 0) and \ (result[args.valid_metric] > stats['best_valid'])): logger.print('Best valid: %s = %.2f (epoch %d, %d updates)' % (args.valid_metric, result[args.valid_metric], stats['epoch'], model.updates)) stats['best_valid'] = result[args.valid_metric] stats['no_improvement'] = 0 else: stats['no_improvement'] += 1 if stats['no_improvement'] >= args.early_stop: break
def main(args, logger): # -------------------------------------------------------------------------- # DATA logger.print('-' * 100) logger.print('Load and process data files') train_exs = [] if not args.only_test: args.dataset_weights = dict() for train_src, train_src_tag, train_tgt, train_rel_matrix, dataset_name in \ zip(args.train_src_files, args.train_src_tag_files, args.train_tgt_files, args.train_rel_matrix_files,\ args.dataset_name): train_files = dict() train_files['src'] = train_src train_files['src_tag'] = train_src_tag train_files['tgt'] = train_tgt train_files["rel_matrix"] = train_rel_matrix exs = util.load_data(args, train_files, max_examples=args.max_examples, dataset_name=dataset_name) lang_name = constants.DATA_LANG_MAP[dataset_name] args.dataset_weights[constants.LANG_ID_MAP[lang_name]] = len(exs) train_exs.extend(exs) logger.print('Num train examples = %d' % len(train_exs)) args.num_train_examples = len(train_exs) for lang_id in args.dataset_weights.keys(): weight = (1.0 * args.dataset_weights[lang_id]) / len(train_exs) args.dataset_weights[lang_id] = round(weight, 2) logger.print('Dataset weights = %s' % str(args.dataset_weights)) dev_exs = [] for dev_src, dev_src_tag, dev_tgt, dev_rel_matrix, dataset_name in \ zip(args.dev_src_files, args.dev_src_tag_files, args.dev_tgt_files, args.dev_rel_matrix_files, args.dataset_name): dev_files = dict() dev_files['src'] = dev_src dev_files['src_tag'] = dev_src_tag dev_files['tgt'] = dev_tgt dev_files["rel_matrix"] = dev_rel_matrix exs = util.load_data(args, dev_files, max_examples=args.max_examples, dataset_name=dataset_name, test_split=True) dev_exs.extend(exs) logger.print('Num dev examples = %d' % len(dev_exs)) # -------------------------------------------------------------------------- # MODEL logger.print('-' * 100) start_epoch = 1 if args.only_test: #if args.pretrained: # model = Code2NaturalLanguage.load(args.pretrained) #else: if not os.path.isfile(args.model_file): raise IOError('No such file: %s' % args.model_file) model = Code2NaturalLanguage.load(args.model_file) else: if args.checkpoint and os.path.isfile(args.model_file + '.checkpoint'): # Just resume training, no modifications. logger.print('Found a checkpoint...') checkpoint_file = args.model_file + '.checkpoint' model, start_epoch = Code2NaturalLanguage.load_checkpoint( checkpoint_file, args.cuda) else: # Training starts fresh. But the model state is either pretrained or # newly (randomly) initialized. if args.pretrained: logger.print('Using pretrained model...') model = Code2NaturalLanguage.load(args.pretrained, args) else: logger.print('Training model from scratch...') model = init_from_scratch(args, train_exs, dev_exs, logger) # Set up optimizer model.init_optimizer() # log the parameter details logger.print( 'Trainable #parameters [encoder-decoder] {} [total] {}'.format( human_format(model.network.count_encoder_parameters() + model.network.count_decoder_parameters()), human_format(model.network.count_parameters()))) table = model.network.layer_wise_parameters() logger.print('Breakdown of the trainable paramters\n%s' % table) # Use the GPU? if args.cuda: model.cuda() if args.parallel: model.parallelize() # -------------------------------------------------------------------------- # DATA ITERATORS # Two datasets: train and dev. If we sort by length it's faster. logger.print('-' * 100) logger.print('Make data loaders') if not args.only_test: train_dataset = data.CommentDataset(train_exs, model) if args.sort_by_len: train_sampler = data.SortedBatchSampler(train_dataset.lengths(), args.batch_size, shuffle=True) else: train_sampler = torch.utils.data.sampler.RandomSampler( train_dataset) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, sampler=train_sampler, num_workers=args.data_workers, collate_fn=vector.batchify, pin_memory=args.cuda, drop_last=args.parallel) dev_dataset = data.CommentDataset(dev_exs, model) dev_sampler = torch.utils.data.sampler.SequentialSampler(dev_dataset) dev_loader = torch.utils.data.DataLoader(dev_dataset, batch_size=args.test_batch_size, sampler=dev_sampler, num_workers=args.data_workers, collate_fn=vector.batchify, pin_memory=args.cuda, drop_last=args.parallel) # ------------------------------------------------------------------------- # PRINT CONFIG logger.print('-' * 100) logger.print('CONFIG:\n%s' % json.dumps(vars(args), indent=4, sort_keys=True)) # -------------------------------------------------------------------------- # DO TEST if args.only_test: stats = { 'timer': Timer(), 'epoch': 100000, 'best_valid': 0, 'no_improvement': 0 } validate_official(args, dev_loader, model, stats, logger, mode='test') # -------------------------------------------------------------------------- # TRAIN/VALID LOOP else: logger.print('-' * 100) logger.print('Starting training...') stats = { 'timer': Timer(), 'epoch': start_epoch, 'best_valid': 0, 'no_improvement': 0 } if args.optimizer in ['sgd', 'adam' ] and args.warmup_epochs >= start_epoch: logger.print( "Use warmup lrate for the %d epoch, from 0 up to %s." % (args.warmup_epochs, args.learning_rate)) num_batches = len(train_loader.dataset) // args.batch_size warmup_factor = (args.learning_rate + 0.) / (num_batches * args.warmup_epochs) stats['warmup_factor'] = warmup_factor for epoch in range(start_epoch, args.num_epochs + 1): stats['epoch'] = epoch if args.optimizer in ['sgd', 'adam' ] and epoch > args.warmup_epochs: model.optimizer.param_groups[0]['lr'] = \ model.optimizer.param_groups[0]['lr'] * args.lr_decay train(args, train_loader, model, stats, logger) if epoch % args.print_fq == 0: result = validate_official(args, dev_loader, model, stats, logger) logger.save(silent=True) # Save best valid if ((epoch % args.print_fq == 0) and \ (result[args.valid_metric] > stats['best_valid'])): logger.print('Best valid: %s = %.2f (epoch %d, %d updates)' % (args.valid_metric, result[args.valid_metric], stats['epoch'], model.updates)) model.save(logger.path + '/best_model.cpt') stats['best_valid'] = result[args.valid_metric] stats['no_improvement'] = 0 else: stats['no_improvement'] += 1 if stats['no_improvement'] >= args.early_stop: break
def main(): parser = argparse.ArgumentParser(description='gpat train') parser.add_argument("out") parser.add_argument('--resume', default=None) parser.add_argument('--log_dir', default='runs_active') parser.add_argument('--gpu', '-g', type=int, default=0) parser.add_argument('--iterations', default=10**5, type=int, help='number of iterations to learn') parser.add_argument('--interval', default=100, type=int, help='number of iterations to evaluate') parser.add_argument('--batch_size', '-b', type=int, default=64, help='learning minibatch size') parser.add_argument('--lr', type=float, default=1e-4) parser.add_argument('--loaderjob', type=int, default=8) parser.add_argument('--hed', dest='hed', action='store_true', default=False) parser.add_argument('--from_tiff', dest='from_tiff', action='store_true', default=False) parser.add_argument('--no-texture', dest='texture', action='store_false', default=True) parser.add_argument('--cbp', dest='cbp', action='store_true', default=False) parser.add_argument('--no-color_aug', dest='color_aug', action='store_false', default=True) parser.add_argument('--model_test', default='', type=str) parser.add_argument('--arch', default='googlenet', choices=[ 'texturecnn', 'resnet', 'googlenet', 'vgg', 'alex', 'trained', 'resume' ]) parser.add_argument('--opt', default='adam', choices=['adam', 'momentum']) parser.add_argument('--train_path', default='train_extracted_dataset.pkl') parser.add_argument('--test_path', default='test_extracted_dataset.pkl') parser.add_argument('--epoch_interval', default=20, type=int) parser.add_argument('--active_sample_size', type=int, default=100) parser.add_argument('--no-every_init', dest='every_init', action='store_false', default=True) parser.add_argument('--random_sample', action='store_true', default=False) parser.add_argument('--fixed_ratio', action='store_true', default=False) parser.add_argument('--label_init', choices=['random', 'clustering'], default='clustering') parser.add_argument('--init_size', default=100, type=int) parser.add_argument('--uncertain', action='store_true', default=False) parser.add_argument('--uncertain_with_dropout', action='store_true', default=False) parser.add_argument('--uncertain_strategy', choices=['entropy', 'least_confident', 'margin'], default='margin') parser.add_argument('--clustering', action='store_true', default=False) parser.add_argument('--kmeans_cache', default='initial_clustering_result.pkl') parser.add_argument('--initial_label_cache', default='initial_label_cache.npy') parser.add_argument('--query_by_committee', action='store_true', default=False) parser.add_argument('--qbc_strategy', choices=['vote', 'average_kl'], default='average_kl') parser.add_argument('--committee_size', default=10, type=int) parser.add_argument('--aug_in_inference', action='store_true', default=False) args = parser.parse_args() device = args.gpu # log directory logger.init(args) # load data train_dataset = np.load(os.path.join(dataset_path, args.train_path)) test_dataset = np.load(os.path.join(dataset_path, args.test_path)) num_class = 2 image_size = 256 crop_size = 224 preprocess_type = args.arch if not args.hed else 'hed' perm = np.random.permutation(len(test_dataset))[:10000] test_dataset = [test_dataset[idx] for idx in perm] test = CamelyonDatasetEx(test_dataset, original_size=image_size, crop_size=crop_size, aug=False, color_aug=False, preprocess_type=preprocess_type) test_iter = iterators.MultiprocessIterator(test, args.batch_size, repeat=False, shuffle=False) cbp_feat = np.load('train_cbp512_feat.npy') labeled_data, unlabeled_data, feat = initialize_labeled_dataset( args, train_dataset, cbp_feat) print('now {} labeled samples, {} unlabeled'.format( len(labeled_data), len(unlabeled_data))) # start training reporter = ProgresssReporter(args) for iteration in range(100): # model construct if args.texture: model = BilinearCNN(base_cnn=args.arch, pretrained_model='auto', num_class=num_class, texture_layer=None, cbp=args.cbp, cbp_size=4096) else: model = TrainableCNN(base_cnn=args.arch, pretrained_model='auto', num_class=num_class) # set optimizer optimizer = make_optimizer(model, args.opt, args.lr) # use gpu cuda.get_device_from_id(device).use() model.to_gpu() labeled_dataset = CamelyonDatasetEx(labeled_data, original_size=image_size, crop_size=crop_size, aug=True, color_aug=True, preprocess_type=preprocess_type) labeled_iter = iterators.MultiprocessIterator(labeled_dataset, args.batch_size) # train phase count = 0 train_loss = 0 train_acc = 0 epoch_interval = args.epoch_interval if len( labeled_data[0]) < 10000 else args.epoch_interval * 2 anneal_epoch = int(epoch_interval * 0.8) while labeled_iter.epoch < epoch_interval: # train with labeled dataset batch = labeled_iter.next() x, t = chainer.dataset.concat_examples(batch, device=device) optimizer.update(model, x, t) reporter(labeled_iter.epoch) if labeled_iter.is_new_epoch and labeled_iter.epoch == anneal_epoch: optimizer.alpha *= 0.1 if labeled_iter.epoch > args.epoch_interval - 5: count += len(batch) train_loss += model.loss.data * len(batch) train_acc += model.accuracy.data * len(batch) # if labeled_iter.is_new_epoch: # train_loss_tmp = cuda.to_cpu(train_loss) / len(labeled_iter.dataset) # loss_history.append(train_loss_tmp - np.sum(loss_history)) reporter.reset() logger.plot('train_loss', cuda.to_cpu(train_loss) / count) logger.plot('train_accuracy', cuda.to_cpu(train_acc) / count) # test print('\ntest') with chainer.using_config('train', False), chainer.no_backprop_mode(): evaluate_ex(model, test_iter, device) # logger logger.flush() if len(labeled_data[0]) >= 10000: print('done') exit() tmp_indices = np.random.permutation(len(unlabeled_data))[:10000] tmp_unlabeled_data = [unlabeled_data[idx] for idx in tmp_indices] tmp_cbp_feat = cbp_feat[tmp_indices] unlabeled_dataset = CamelyonDatasetEx(tmp_unlabeled_data, original_size=image_size, crop_size=crop_size, aug=args.aug_in_inference, color_aug=args.aug_in_inference, preprocess_type=preprocess_type) unlabeled_iter = iterators.MultiprocessIterator(unlabeled_dataset, args.batch_size, repeat=False, shuffle=False) preds = np.zeros((args.committee_size, len(tmp_unlabeled_data), 2)) # feat = np.zeros((len(unlabeled_iter.dataset), 784)) if args.random_sample: tmp_query_indices = np.random.permutation( len(tmp_unlabeled_data))[:args.active_sample_size] else: loop_num = args.committee_size for loop in range(loop_num): count = 0 for batch in unlabeled_iter: x, t = chainer.dataset.concat_examples(batch, device=device) with chainer.no_backprop_mode(): y = F.softmax(model.forward(x)) preds[loop, count:count + len(batch)] = cuda.to_cpu(y.data) count += len(batch) # if loop == 0: # feat[i * batch_size: (i + 1) * batch_size] = cuda.to_cpu(x) unlabeled_iter.reset() tmp_query_indices = active_annotation(preds, tmp_cbp_feat, opt=args) # active sampling print('active sampling: ', end='') if iteration % 10 == 0: logger.save(model, [tmp_unlabeled_data[idx] for idx in tmp_query_indices]) query_indices = tmp_indices[tmp_query_indices] labeled_data, unlabeled_data, cbp_feat = query_dataset( labeled_data, unlabeled_data, cbp_feat, query_indices) print('now {} labeled samples, {} unlabeled'.format( len(labeled_data), len(unlabeled_data)))
def save_entry(sender): vs = sender.superview date = get_datetime(vs) data = get_info(vs, date) log_success = log.save(data) save_status_alert(log_success)