def main(args): # preprocessor = Preprocessor(args.embedding_path) # train, valid = preprocessor.get_train_valid_dataset(args.data_path) with open(args.pickle_path, 'rb') as f: data = pickle.load(f) preprocessor = data['preprocessor'] train, valid = data['train'], data['valid'] if args.arch == 'NSMv1': from torch_solver import TorchSolver solver = TorchSolver(preprocessor.get_word_dim(), args.dim_hidden, valid=valid, batch_size=args.batch_size, n_epochs=args.n_epochs, learning_rate=args.learning_rate, device=args.device, decoder_use_state=args.decoder_use_state) # load model if args.load is not None: solver.load(args.load) if not args.five_fold: model_checkpoint = ModelCheckpoint(args.model_path, 'loss', 1, 'all') metrics_logger = MetricsLogger(args.log_path) solver.fit_dataset(train, [model_checkpoint, metrics_logger]) else: from utils import MWPDataset problems = train._problems fold_indices = [int(len(problems) * 0.2) * i for i in range(6)] for fold in range(5): train = [] for j in range(5): if j != fold: start = fold_indices[j] end = fold_indices[j + 1] train += problems[start:end] transform = \ PermuteStackOps(args.revert_prob, args.transpose_prob) \ if args.permute else None train = MWPDataset(train, preprocessor.indices_to_embeddings) logging.info('Start training fold {}'.format(fold)) model_checkpoint = ModelCheckpoint( '{}.fold{}'.format(args.model_path, fold), 'loss', 1, 'all') metrics_logger = MetricsLogger('{}.fold{}'.format( args.log_path, fold)) solver = TorchSolver(preprocessor.get_word_dim(), args.dim_hidden, valid=valid, batch_size=args.batch_size, n_epochs=args.n_epochs, learning_rate=args.learning_rate, device=args.device) solver.fit_dataset(train, [model_checkpoint, metrics_logger])
def main(args): config_path = os.path.join(args.model_dir, 'config.json') with open(config_path) as f: config = json.load(f) logging.info('loading embedding...') with open(config['model_parameters']['embedding'], 'rb') as f: embedding = pickle.load(f) config['model_parameters']['embedding'] = embedding.vectors logging.info('loading valid data...') with open(config['model_parameters']['valid'], 'rb') as f: valid = pickle.load(f) logging.info('loading train data...') with open(config['train'], 'rb') as f: train = pickle.load(f) PredictorClass = BestPredictor predictor = PredictorClass(metrics=[Recall(at=1), Recall(at=5)], **config['model_parameters']) model_checkpoint = ModelCheckpoint( os.path.join(args.model_dir, 'model.pkl'), 'loss', 1, 'all') metrics_logger = MetricsLogger(os.path.join(args.model_dir, 'log.json')) logging.info('start training!') predictor.fit_dataset(train, valid, train.collate_fn, [model_checkpoint, metrics_logger])
def main(args): if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) model_dir, exp_dir = os.path.split( args.output_dir[:-1]) if args.output_dir[-1] == '/' else os.path.split( args.output_dir) config_path = os.path.join(model_dir, 'config.json') with open(config_path) as f: config = json.load(f) logging.info(f'Save config file to {args.output_dir}.') with open(os.path.join(args.output_dir, 'config.json'), 'w') as f: json.dump(config, f, indent=4) logging.info('Loading training data...') with open(config['train_path'], 'rb') as f: train = pickle.load(f) train.context_padded_len = config['train_context_padded_len'] train.option_padded_len = config['train_option_padded_len'] train.n_negative = config['train_n_negative'] logging.info('Loading validation data...') with open(config['valid_path'], 'rb') as f: valid = pickle.load(f) config['model_parameters']['valid'] = valid valid.context_padded_len = config['valid_context_padded_len'] valid.option_padded_len = config['valid_option_padded_len'] logging.info('Loading preproprecessed word embedding...') with open(config['embedding_path'], 'rb') as f: embedding = pickle.load(f) config['model_parameters']['embedding'] = embedding metric = Recall(at=10) predictor = Predictor(training=True, metrics=[metric], device=args.device, **config['model_parameters']) model_checkpoint = ModelCheckpoint(filepath=os.path.join( args.output_dir, 'model'), monitor=metric.name, mode='max', all_saved=False) metrics_logger = MetricsLogger( log_dest=os.path.join(args.output_dir, 'log.json')) if args.load_dir is not None: predictor.load(args.load_dir) logging.info('Start training.') start = time.time() predictor.fit_dataset(data=train, collate_fn=train.collate_fn, callbacks=[model_checkpoint, metrics_logger], output_dir=args.output_dir) end = time.time() total = end - start hrs, mins, secs = int(total // 3600), int( (total % 3600) // 60), int(total % 60) logging.info('End training.') logging.info(f'Total time: {hrs}hrs {mins}mins {secs}secs.')
def main(args): config_path = os.path.join(args.model_dir, 'config.json') with open(config_path) as f: config = json.load(f) logging.info('loading valid data...') with open(config['model']['valid'], 'rb') as f: config['model']['valid'] = pickle.load(f) logging.info('loading character vocabulary...') with open(config['charmap'], 'rb') as f: charmap = pickle.load(f) logging.info('loading word vocabulary...') with open(config['wordmap'], 'rb') as f: wordmap = pickle.load(f) config['model']['num_embeddings'] = len(charmap) config['model']['padding_idx'] = charmap['<PAD>'] config['model']['vocab_size'] = len(wordmap) predictor = Predictor(arch=config['arch'], device=args.device, metrics=[Perplexity()], **config['model']) if args.load is not None: predictor.load(args.load) model_checkpoint = ModelCheckpoint( os.path.join(args.model_dir, 'model.pkl'), **config['callbacks']) metrics_logger = MetricsLogger(os.path.join(args.model_dir, 'log.json')) logging.info('start training!') predictor.fit_dataset(config['train'], model_checkpoint, metrics_logger)
def main(args): config_path = os.path.join(args.model_dir, 'config.json') with open(config_path) as f: config = json.load(f) logging.info('loading embedding...') with open(config['model']['embedding'], 'rb') as f: embedding = pickle.load(f) config['model']['embedding'] = embedding.vectors logging.info('loading valid data...') with open(config['model']['valid'], 'rb') as f: config['model']['valid'] = pickle.load(f) logging.info('loading train data...') with open(config['train'], 'rb') as f: train = pickle.load(f) predictor = Predictor(arch=config['arch'], device=args.device, metrics=[Recall()], **config['model']) if args.load is not None: predictor.load(args.load) model_checkpoint = ModelCheckpoint( os.path.join(args.model_dir, 'model.pkl'), **config['callbacks']) metrics_logger = MetricsLogger(os.path.join(args.model_dir, 'log.json')) logging.info('start training!') predictor.fit_dataset(train, train.collate_fn, model_checkpoint, metrics_logger)
def main(args): print(args) config_path = os.path.join(args.model_dir, 'config.json') with open(config_path) as f: config = json.load(f) logging.info('loading embedding...') with open(config['model_parameters']['embedding'], 'rb') as f: embedding = pickle.load(f) config['model_parameters']['embedding'] = embedding.vectors logging.info('loading valid data...') with open(config['model_parameters']['valid'], 'rb') as f: config['model_parameters']['valid'] = pickle.load(f) logging.info('loading train data...') with open(config['train'], 'rb') as f: train = pickle.load(f) if config['arch'] == 'ExampleNet': from example_predictor import ExamplePredictor PredictorClass = ExamplePredictor predictor = PredictorClass(metrics=[Recall(1), Recall(10)], **config['model_parameters']) if args.load is not None: predictor.load(args.load) model_checkpoint = ModelCheckpoint( os.path.join(args.model_dir, 'model.pkl'), 'loss', 1, 'all') metrics_logger = MetricsLogger(os.path.join(args.model_dir, 'log.json')) logging.info('start training!') predictor.fit_dataset(train, train.collate_fn, [model_checkpoint, metrics_logger])
def __init__(self, env, agent, trainer_cfg) -> None: self.cfg = trainer_cfg self.env = env self.agent = agent run_cfg = { 'agent': OmegaConf.to_container(self.agent.cfg), 'trainer': OmegaConf.to_container(self.cfg) } self.callback_runner = CallbackRunner( WandBLogger('atari-Q', config=run_cfg, plot_every=500, tags=[self.env.spec.id]), ModelCheckpoint(best_only=True), ReplayBuffer(self.cfg.replay_buffer), TargetModelUpdater( self.cfg.target_step), # runs weight sync for target model model=self.agent.model, target_model=self.agent.target_model, env=self.env, ) self.eval_env = gym.wrappers.Monitor( self.env, self.callback_runner.WandBLogger.run.dir, video_callable=lambda episode_id: True, mode='evaluation') self.optimizer = configure_optimizer(self.cfg, self.agent.model) self.criterion = configure_loss(self.cfg) self.train_steps = 0
def main(args): config_path = os.path.join(args.model_dir, 'config.json') with open(config_path) as f: config = json.load(f) logging.info(f"Using cuda device: {config['cuda_ids']}") os.environ['CUDA_VISIBLE_DEVICES'] = config['cuda_ids'] logging.info('loading embedding...') with open(config['model_parameters']['embedding'], 'rb') as f: embedding = pickle.load(f) config['model_parameters']['embedding'] = embedding.vectors logging.info('loading valid data...') with open(config['model_parameters']['valid'], 'rb') as f: config['model_parameters']['valid'] = pickle.load(f) logging.info('loading train data...') with open(config['train'], 'rb') as f: train = pickle.load(f) if config['arch'] == 'ExampleNet': from example_predictor import ExamplePredictor PredictorClass = ExamplePredictor elif config['arch'] == 'RnnBaselineNet': from rnnbaseline_predictor import RnnBaselinePredictor PredictorClass = RnnBaselinePredictor elif config['arch'] == 'RnnAttentionNet': from rnnattention_predictor import RnnAttentionPredictor PredictorClass = RnnAttentionPredictor elif config['arch'] == 'RnnTransformerNet': from rnntransformer_predictor import RnnTransformerPredictor PredictorClass = RnnTransformerPredictor predictor = PredictorClass( metrics=[Recall(), Recall(1), Recall(5)], **config['model_parameters'] ) if args.load is not None: predictor.load(args.load) model_checkpoint = ModelCheckpoint( os.path.join(args.model_dir, 'model.pkl'), 'loss', 1, 'all' ) metrics_logger = MetricsLogger( os.path.join(args.model_dir, 'log.json') ) logging.info('start training!') predictor.fit_dataset(train, train.collate_fn, [model_checkpoint, metrics_logger])
def main(args): config_path = os.path.join(args.model_dir, 'config.json') with open(config_path) as f: config = json.load(f) logging.info('loading embedding...') with open(config['model_parameters']['embedding'], 'rb') as f: embedding = pickle.load(f) config['model_parameters']['embedding'] = embedding.vectors logging.info('loading valid data...') with open(config['model_parameters']['valid'], 'rb') as f: config['model_parameters']['valid'] = pickle.load(f) logging.info('loading train data...') with open(config['train'], 'rb') as f: train = pickle.load(f) #print(train) #dataset.DialogDataset object #word2index = embedding.word_dict #index2word = {v: k for k, v in word2index.items()} if config['arch'] == 'ExampleNet': #from example_predictor import ExamplePredictor #from rnn_predictor import RNNPredictor #from best_predictor import BestRNNAttPredictor from rnnatt_predictor import RNNAttPredictor #PredictorClass = ExamplePredictor #PredictorClass = RNNPredictor PredictorClass = RNNAttPredictor #PredictorClass = BestRNNAttPredictor #print("config['model_parameters']: ", config['model_parameters']) #it's a dict; {'valid': dataset.DialogDataset object, 'embedding':a big tensor} #print("**config['model_parameters']: ", **config['model_parameters']) predictor = PredictorClass(metrics=[Recall()], **config['model_parameters']) # **dict : https://stackoverflow.com/questions/21809112/what-does-tuple-and-dict-means-in-python #input() if args.load is not None: predictor.load(args.load) model_checkpoint = ModelCheckpoint( os.path.join(args.model_dir, 'model_rnnatt_6_negative_samples_0324.pkl'), 'loss', 1, 'all') metrics_logger = MetricsLogger( os.path.join(args.model_dir, 'log_rnnatt_6_neagtive_samples_0324.json')) logging.info('start training!') predictor.fit_dataset(train, train.collate_fn, [model_checkpoint, metrics_logger])
def main(args): config_path = os.path.join(args.model_dir, 'config.json') with open(config_path) as f: config = json.load(f) logging.info('loading word dictionary...') with open(config['words_dict'], 'rb') as f: words_dict = pickle.load(f) logging.info('loading train data...') with open(config['train'], 'rb') as f: train = pickle.load(f) logging.info('loading validation data...') with open(config['model_parameters']['valid'], 'rb') as f: valid = pickle.load(f) config['model_parameters']['valid'] = valid if args.lr_finder: pass else: if config['arch'] == 'Predictor': from predictor import Predictor PredictorClass = Predictor predictor = PredictorClass(metrics=[Accuracy()], word_dict=words_dict, **config['model_parameters']) metrics_logger = MetricsLogger(os.path.join(args.model_dir, 'log.json')) if args.load is not None: predictor.load(args.load) try: metrics_logger.load(int(args.load.split('.')[-1])) except: metrics_logger.load(448) model_checkpoint = ModelCheckpoint( os.path.join(args.model_dir, 'model.pkl'), 'Accuracy', 1, 'max') logging.info('start training!') predictor.fit_dataset(train, train.collate_fn, [model_checkpoint, metrics_logger])
def main(args, config_path): logging.info('Loading configuration file from {}'.format(config_path)) with open(config_path) as f: config = json.load(f) embedding_pkl_path = os.path.join(args.model_dir, config["embedding_pkl_path"]) train_pkl_path = os.path.join(args.model_dir, config["train_pkl_path"]) val_pkl_path = os.path.join(args.model_dir, config["val_pkl_path"]) labelEncoder_path = os.path.join(args.model_dir, config["labelEncoder_path"]) with open(embedding_pkl_path, "rb") as f: config["model_parameters"]["embedding"] = pickle.load(f).vectors logging.info( "Load embedding from {}".format(embedding_pkl_path)) with open(train_pkl_path, "rb") as f: train = pickle.load(f) logging.info( "Load train from {}".format(train_pkl_path)) with open(val_pkl_path, "rb") as f: config["model_parameters"]["valid"] = pickle.load(f) logging.info( "Load val from {}".format(val_pkl_path)) with open(labelEncoder_path, "rb") as f: config["model_parameters"]["labelEncoder"] = pickle.load(f) logging.info( "Load labelEncoder from {}".format(labelEncoder_path)) predictor = Predictor(metric=Metric(), **config["model_parameters"]) if args.load is not None: predictor.load(args.load) model_checkpoint = ModelCheckpoint( os.path.join(args.model_dir, 'model.pkl'), 'loss', 1, 'all') metrics_logger = MetricsLogger( os.path.join(args.model_dir, 'log.json')) tensorboard = Tensorboard(config["tensorboard"]) logging.info("start training!") predictor.fit_dataset(train, train.collate_fn, [model_checkpoint, metrics_logger, tensorboard])
momentum=args.momentum) logger.info('Number of model parameters: {:,}'.format( sum([p.data.nelement() for p in model.parameters()]))) trainer = Trainer(model, optimizer, watch=['acc'], val_watch=['acc']) if args.is_train: logger.info("Train on {} samples, validate on {} samples".format( len(train_loader.dataset), len(val_loader.dataset))) start_epoch = 0 if args.resume: start_epoch = load_checkpoint(args.ckpt_dir, model, optimizer) trainer.train(train_loader, val_loader, start_epoch=start_epoch, epochs=args.epochs, callbacks=[ PlotCbk(model, args.plot_num_imgs, args.plot_freq, args.use_gpu), TensorBoard(model, args.log_dir), ModelCheckpoint(model, optimizer, args.ckpt_dir), LearningRateScheduler( ReduceLROnPlateau(optimizer, 'min'), 'val_loss'), EarlyStopping(model, patience=args.patience) ]) else: logger.info("Test on {} samples".format((len(test_loader)))) load_checkpoint(args.ckpt_dir, model, best=True) trainer.test(test_loader, best=args.best)
def main(args): config_path = os.path.join(args.model_dir, 'config.json') with open(config_path) as f: config = json.load(f) # logging.info('loading embedding...') # with open(config['model_parameters']['embeddings'], 'rb') as f: # embeddings = pickle.load(f) # config['model_parameters']['embeddings'] = embeddings logging.info('loading dev data...') with open(config['model_parameters']['valid'], 'rb') as f: config['model_parameters']['valid'] = pickle.load(f) logging.info('loading train data...') with open(config['train'], 'rb') as f: train = pickle.load(f) if 'train_max_len' in config: train.data = list( filter(lambda s: s['context_len'] < config['train_max_len'], train.data) ) if config['arch'] == 'BiDAF': from bidaf_predictor import BiDAFPredictor PredictorClass = BiDAFPredictor elif config['arch'] == 'QANet': from qanet_predictor import QANetPredictor PredictorClass = QANetPredictor elif config['arch'] == 'BERT': from bert_predictor import BERTPredictor PredictorClass = BERTPredictor if config['arch'] != 'XLNet': predictor = PredictorClass( metrics=[SimpleEM(), QuACF1()], **config['model_parameters'] ) else: from bert_predictor import BERTPredictor predictor = BERTPredictor( metrics=[SimpleEM(), QuACF1()], ctx_emb='xlnet', **config['model_parameters'] ) if args.load is not None: predictor.load(args.load) model_checkpoint = ModelCheckpoint( os.path.join(args.model_dir, 'model.pkl'), 'loss', 1, 'all' ) metrics_logger = MetricsLogger( os.path.join(args.model_dir, 'log.json') ) logging.info('start training!') predictor.fit_dataset(train, train.collate_fn, [model_checkpoint, metrics_logger])
os.makedirs(res_dir) # Callback de save des logs keras_logs = os.path.join(res_dir, 'keras_logs') if not os.path.exists(keras_logs): os.makedirs(keras_logs) csv_logger = CSVLogger(os.path.join(keras_logs, "1hidden128" + '.csv'), append=True, separator=';') # Callback de save des modeles keras_states = os.path.join(res_dir, 'keras_states') if not os.path.exists(keras_states): os.makedirs(keras_states) saver = ModelCheckpoint(os.path.join(keras_states, "1hidden128" + '.epoch{epoch}.state'), verbose=1, period=1) # Je me sers du StratifiedKFold pour spliter le dataset retro ancien en 15. # Chaque quinzieme sera associé à la totalité du dataset train prospectif pour l'entrainement. nb_split = 50 skf = StratifiedKFold(n_splits=nb_split) i_split = 0 for _, train_index in skf.split(ancien_data_x, ancien_data_y): print("SPLIT NUMERO", str(i_split), "SUR", str(nb_split)) # X_train = ancien_data_x.loc[train_index] # y_train = ancien_data_y.loc[train_index] # # X_train = pd.concat([X_train, X_train_pros]) # y_train = pd.concat([y_train, y_train_pros]) #
imY = imY.reshape(imY.shape + (1,)) imX = imX.astype(numpy.float32) imY = imY.astype(numpy.float32) validate_imX_mat[run_index] = imX validate_imY_mat[run_index] = imY # == Note: do data normalization here to reduce memory footprint ==# run_index+=1 """ Data arrays are filled """ latest_epoch, latest_model = 0, None ModelCheckpoint.remove_all_checkpoints(modelfile, weightfile) """ Our UNet model is defined in the module unetwork stored locally. Using a factory class we can easyly generate a custom model using very few steps. unet_factory.dropout = None unet_factory.convolution_kernel_size = 3 unet_factory.batch_normalization = False unet_factory.begin(image_shape=image_shape) unet_factory.generateLevels(init_filter_count=32, recursive_depth=4) model = unet_factory.finalize(final_filter_count=1) Here, using the specific network of Maier et al. 2018 on Deep Scatter Estimation (DSE) """ nnet = UNetFactory()
def __init__(self, args): logging.basicConfig(format='%(asctime)s | %(levelname)s | %(message)s', level=logging.INFO, datefmt='%Y-%m-%d %H:%M:%S') logging.info('Initiating task: %s' % args.taskname) self.config = config(args) if not all([os.path.isfile(i) for i in self.config.pickle_files]): logging.info('Preprocesing data.....') if args.pick == 'neg4': build_processed_data(self.config.datadir, self.config.pickledir, neg_num=4) elif args.pick == 'last': build_processed_data(self.config.datadir, self.config.pickledir, last=True) elif args.pick == 'difemb': build_processed_data(self.config.datadir, self.config.pickledir, difemb=True) else: build_processed_data(self.config.datadir, self.config.pickledir) else: logging.info('Preprocesing already done.') with open(os.path.join(self.config.pickledir, 'embedding.pkl'), 'rb') as f: embedding = pickle.load(f) embedding = embedding.vectors self.embedding_dim = embedding.size(1) self.embedding = torch.nn.Embedding(embedding.size(0), embedding.size(1)) self.embedding.weight = torch.nn.Parameter(embedding) self.Modelfunc = { 'lin': LinearNet, 'rnn': RnnNet, 'att': RnnAttentionNet, 'best': BestNet, 'gru': GruNet, 'last': LastNet, } if os.path.exists(self.config.outputdir): if args.resume == False: logging.info( 'Warning, task already exists, add --resume True, exiting') sys.exit(0) else: logging.info('Resuming....') with open(self.config.modeltype_path, 'r') as f: resume_type = f.read() self.model = self.Modelfunc[resume_type] logging.info('model type is %s, model to be constructed' % resume_type) else: os.mkdir(self.config.outputdir) with open(self.config.modeltype_path, 'w') as f: f.write(args.modeltype) self.model = self.Modelfunc[args.modeltype](self.embedding_dim) logging.info('model type is %s, model created' % args.modeltype) model_checkpoint = ModelCheckpoint(self.config.modelpath, 'loss', 1, 'all') metrics_logger = MetricsLogger(self.config.logpath) if args.resume: self.config.start_epoch = metrics_logger.load() if args.resume_epoch != -1: self.config.resumepath = self.config.modelpath + '.%d' % self.config.resume_epoch else: self.config.resumepath = self.config.modelpath + '.%d' % ( self.config.start_epoch - 1) self.model = self.model(self.embedding_dim) self.model.load_state_dict(torch.load(self.config.resumepath)) logging.info('config loaded, model constructed and loaded') print(self.model) logging.info('loading dataloaders') self.trainloader, self.testloader, self.validloader = make_dataloader( self.config.pickledir) self.metrics = [Recall()] self.callbacks = [model_checkpoint, metrics_logger] self.device = 'cuda' if torch.cuda.is_available() else 'cpu' if self.device == 'cuda': self.model.to(self.device) self.embedding.to(self.device) self.criterion = torch.nn.BCEWithLogitsLoss() self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.config.lr)
def main(): parser = argparse.ArgumentParser(description='CNN Training') parser.add_argument('--batch', type=int, default=48) parser.add_argument('--epoch', type=int, default=100) parser.add_argument('--data', type=str, default='/data2/RCA2017/', help='directory where images are stored') parser.add_argument('--output', type=str, required=True) parser.add_argument('--skip', type=bool, default=True) parser.add_argument('--load_weights', type=bool, default=False) parser.add_argument('--load_model', type=bool, default=False) parser.add_argument('--cropem', type=bool, default=False) parser.add_argument('--maskout', type=bool, default=False) parser.add_argument('--numaugs', type=int, default=4) parser.add_argument('--verbose', type=int, default=0) args = parser.parse_args() cropem = args.cropem maskout = args.maskout numAugs = args.numaugs verbose = args.verbose imdims = [128, 128, 8, 1] sys.stdout.write('[*]\tSplitting train/validation/test\n') X_train, y_train, X_val, y_val, X_test, y_test = get_split( args.data, 'bbdata.npy', 1) ### DEFINE THE MODEL ### if args.load_model: ''' Try to load the model from the weights, if not just load from the model file''' weight_files = [ os.path.join(args.output, x) for x in os.listdir(args.output) if x.startswith("weights-") and x.endswith(".h5py") ] modelPath = max(weight_files, key=os.path.getctime) try: model = load_model(modelPath) except: "model path does not exist starting again: {}".format(modelPath) initial_epoch = int(modelPath.split('improvement-')[-1].split('-')[0]) sys.stdout.write('[*]\tModel loaded: {}\n'.format(modelPath)) sys.stdout.flush() else: model = Resnet3DBuilder.build_resnet_101([128, 128, 8, 1], 2) initial_epoch = 0 lr = 0.0001 model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=lr, decay=0.005), metrics=['acc']) ''' SOME HYPERPARAMETERS ''' if verbose == 2: model.summary() if not os.path.exists(os.path.abspath(args.output)): os.makedirs(os.path.abspath(args.output)) plot_model(model, show_shapes=True, to_file=os.path.join(args.output, 'model.png')) logdir = os.path.join(args.output, 'logs') checkpoint_filepath = os.path.join( args.output, "weights-improvement-{epoch:02d}-{val_acc:.3f}.h5py") checkpoint = ModelCheckpoint(checkpoint_filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') tbCallBack = TensorBoard(log_dir=logdir, histogram_freq=0, write_batch_performance=True, write_graph=True, write_images=True) history = model.fit_generator( generator=data_generator(X_train, y_train, args.batch, imdims, numAugs=numAugs, cropem=cropem, maskout=maskout, dataroot=args.data), use_multiprocessing=True, max_queue_size=20, steps_per_epoch=int(len(X_train) / args.batch) * numAugs, validation_data=data_generator(X_val, y_val, args.batch, imdims, phase='val', numAugs=0, cropem=cropem, maskout=maskout, dataroot=args.data), validation_steps=int(len(X_val) / args.batch), epochs=args.epoch, verbose=1, callbacks=[checkpoint, tbCallBack], initial_epoch=initial_epoch) model.save(os.path.join(args.output, 'model.h5py')) ###### GET THE BEST VALIDATION RESULTS, LOAD THOSE WEIGHTS AND DO THE FINAL TEST ############## weight_files = [ os.path.join(args.output, x) for x in os.listdir(args.output) if x.startswith("weights-") and x.endswith(".h5py") ] newest = max(weight_files, key=os.path.getctime) model.load_weights(newest) sys.stdout.write('[*]\tWeights loaded: {}\n'.format(newest)) sys.stdout.flush() loss, acc = model.evaluate_generator(generator=data_generator( X_test, y_test, args.batch, imdims, phase='test', numAugs=0, cropem=cropem, maskout=maskout, dataroot=args.data), steps=int(len(X_test) / args.batch), max_queue_size=20) print('Best Test loss:', loss) print('Best Test acc:', acc) save_history(history, args.output) model_json = model.to_json() if not os.path.isdir(args.output): os.makedirs(args.output) with open(os.path.join(args.output, 'model.json'), 'w') as json_file: json_file.write(model_json)
def train_net(train, val, unsupervised, model, name): unsupervised_initialization = mlb.transform(unsupervised['tags'].str.split()).astype(np.float32) unsupervised_samples = unsupervised['image_name'].as_matrix() unsupervised_initialization = unsupervised_initialization[:len(unsupervised_initialization)//2*3] unsupervised_samples = unsupervised_samples[:len(unsupervised_samples)//2*3] transformations_train = transforms.apply_chain([ transforms.random_fliplr(), transforms.random_flipud(), transforms.augment(), torchvision.transforms.ToTensor() ]) transformations_val = transforms.apply_chain([ torchvision.transforms.ToTensor() ]) dset_train_unsupervised = KaggleAmazonUnsupervisedDataset( unsupervised_samples, paths.test_jpg, '.jpg', transformations_train, transformations_val, unsupervised_initialization ) dset_train_supervised = KaggleAmazonJPGDataset(train, paths.train_jpg, transformations_train, divide=False) dset_train = KaggleAmazonSemiSupervisedDataset(dset_train_supervised, dset_train_unsupervised, None, indices=False) train_loader = DataLoader(dset_train, batch_size=64, shuffle=True, num_workers=10, pin_memory=True) dset_val = KaggleAmazonJPGDataset(val, paths.train_jpg, transformations_val, divide=False) val_loader = DataLoader(dset_val, batch_size=64, num_workers=10, pin_memory=True) ignored_params = list(map(id, chain( model.classifier.parameters(), model.layer1.parameters(), model.layer2.parameters(), model.layer3.parameters(), model.layer4.parameters() ))) base_params = filter(lambda p: id(p) not in ignored_params, model.parameters()) optimizer = optim.Adam([ {'params': base_params}, {'params': model.layer1.parameters()}, {'params': model.layer2.parameters()}, {'params': model.layer3.parameters()}, {'params': model.layer4.parameters()}, {'params': model.classifier.parameters()} ], lr=0, weight_decay=0.0001) trainer = ModuleTrainer(model) def schedule(current_epoch, current_lrs, **logs): lrs = [1e-3, 1e-4, 0.5e-4, 1e-5, 0.5e-5] epochs = [0, 1, 6, 8, 12] for lr, epoch in zip(lrs, epochs): if current_epoch >= epoch: current_lrs[5] = lr if current_epoch >= 2: current_lrs[4] = lr * 1 current_lrs[3] = lr * 1 current_lrs[2] = lr * 1 current_lrs[1] = lr * 1 current_lrs[0] = lr * 0.1 return current_lrs trainer.set_callbacks([ ModelCheckpoint( paths.models, name, save_best_only=False, saving_strategy=lambda epoch: True ), CSVLogger(paths.logs + name), LearningRateScheduler(schedule), SemiSupervisedUpdater(trainer, dset_train_unsupervised, start_epoch=6, momentum=0.25) ]) trainer.compile(loss=nn.BCELoss(), optimizer=optimizer) trainer.fit_loader(train_loader, val_loader, nb_epoch=16, verbose=1, cuda_device=0)
model = arch.build_model() optimizer = RMSprop(lr) #Nadam(lr) # # metrics = [Recall(), Precision(), dice_coef, MeanIoU(num_classes=2)] # # model.compile(loss=dice_loss, optimizer=optimizer, metrics=metrics) # metrics = [Recall(), Precision(), dice_coef, MeanIoU(num_classes=2),miou_coef] # model.compile(loss=miou_loss, optimizer=optimizer, metrics=metrics) # # model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer=optimizer, metrics=metrics) ## Model with CustomObjectScope({'dice_loss': dice_loss, 'dice_coef': dice_coef,'miou_loss':miou_loss,'miou_coef':miou_coef}): model = load_model(model_path) print("model loaded successfully") metrics = [Recall(), Precision(), dice_coef, MeanIoU(num_classes=2),miou_coef] model.compile(loss=miou_loss, optimizer=optimizer, metrics=metrics) print("model compiled successfully") csv_logger = CSVLogger(f"{file_path}{model_name}_{batch_size}_{epochs}.csv", append=False) checkpoint = ModelCheckpoint(model_path, verbose=1, save_best_only=True) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=1e-6, verbose=1) early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=False) callbacks = [csv_logger, checkpoint, reduce_lr, early_stopping] model.fit_generator(train_gen, validation_data=valid_gen, steps_per_epoch=train_steps, validation_steps=valid_steps, epochs=epochs, callbacks=callbacks) # !python3 resume_training.py
def main(args): config_path = os.path.join(args.model_dir, 'config.json') with open(config_path) as f: config = json.load(f) logging.info('loading embedding...') with open(config['model_parameters']['embedding'], 'rb') as f: embedding = pickle.load(f) config['model_parameters']['embedding'] = embedding.vectors logging.info('loading valid data...') with open(config['model_parameters']['valid'], 'rb') as f: config['model_parameters']['valid'] = pickle.load(f) #valid = pickle.load(f) logging.info('loading train data...') with open(config['train'], 'rb') as f: train = pickle.load(f) if config['arch'] == 'ExampleNet': #from modules import ExampleNet from predictors import ExamplePredictor PredictorClass = ExamplePredictor predictor = PredictorClass( metrics=[Recall(1), Recall(10)], batch_size=128, max_epochs=1000000, dropout_rate=0.2, learning_rate=1e-3, grad_accumulate_steps=1, loss='BCELoss', #BCELoss, FocalLoss margin=0, threshold=None, similarity='MLP', #inner_product, Cosine, MLP device=args.device, **config['model_parameters']) elif config['arch'] == 'RnnNet': from predictors import RnnPredictor PredictorClass = RnnPredictor predictor = PredictorClass( metrics=[Recall(1), Recall(10)], batch_size=512, max_epochs=1000000, dropout_rate=0.2, learning_rate=1e-3, grad_accumulate_steps=1, loss='FocalLoss', #BCELoss, FocalLoss margin=0, threshold=None, similarity='Cosine', #inner_product, Cosine, MLP device=args.device, **config['model_parameters']) elif config['arch'] == 'RnnAttentionNet': from predictors import RnnAttentionPredictor PredictorClass = RnnAttentionPredictor predictor = PredictorClass( metrics=[Recall(1), Recall(10)], batch_size=32, max_epochs=1000000, dropout_rate=0.2, learning_rate=1e-3, grad_accumulate_steps=1, loss='BCELoss', #BCELoss, FocalLoss margin=0, threshold=None, similarity='MLP', #inner_product, Cosine, MLP device=args.device, **config['model_parameters']) else: logging.warning('Unknown config["arch"] {}'.format(config['arch'])) if args.load is not None: predictor.load(args.load) #def ModelCheckpoint(filepath, monitor='loss', verbose=0, mode='min') model_checkpoint = ModelCheckpoint(os.path.join(args.model_dir, 'model.pkl'), monitor='Recall@{}'.format(10), verbose=1, mode='all') metrics_logger = MetricsLogger(os.path.join(args.model_dir, 'log.json')) early_stopping = EarlyStopping(os.path.join(args.model_dir, 'model.pkl'), monitor='Recall@{}'.format(10), verbose=1, mode='max', patience=30) logging.info('start training!') #print ('train', train) predictor.fit_dataset(train, train.collate_fn, [model_checkpoint, metrics_logger, early_stopping])
def main(): import torch seed = 42 torch.manual_seed(seed) np.random.seed(seed) ### Data loading # mnist = MNISTDataset.load('haar_mnist.pkl') # mnist = MNISTDataset.load('filtered_mnist.pkl') mnist = MNISTDataset.load() (Xtr, Ytr), (Xts, Yts) = mnist.get_train_test(center=True, reduce=True) m = 1_0 X, Y = Xtr[:m], Ytr[:m] X_val, Y_val = Xtr[-10_000:], Ytr[-10_000:] ### Choice of encoder # encoder = LabelEncoder.load_encodings('js_without_0', convert_to_int=True) # encoder = LabelEncoder.load_encodings('mario') # encoder = LabelEncoder.load_encodings('ideal_mnist', convert_to_int=True) encoder = OneHotEncoder(Ytr) # encoder = AllPairsEncoder(Ytr) ### Choice of weak learner # weak_learner = WLThresholdedRidge(threshold=.5) # weak_learner = WLRidge f_gen = WeightFromBankGenerator(filter_bank=Xtr[-3000:], filters_shape=(11,11), filter_processing=center_weight) filters = Filters(n_filters=3, weights_generator=f_gen, # locality=3, maxpool_shape=(3,3)) # Xtr, X_val, Xts = RandomConvolution.format_data(Xtr), RandomConvolution.format_data(X_val),RandomConvolution.format_data(Xts) # Xtr, X_val, Xts = Xtr.to('cuda'), X_val.to('cuda'), Xts.to('cuda') weak_learner = RandomConvolution(filters=filters, weak_learner=Ridge) # weak_learner = MulticlassDecisionTree(max_n_leaves=4) # weak_learner = MulticlassDecisionStump # sorted_X, sorted_X_idx = weak_learner.sort_data(X) ### Callbacks # filename = 'haar_onehot_ds_' # filename = 'ideal_mnist_ds_' filename = 'test' ckpt = ModelCheckpoint(filename=filename+'_{round}.ckpt', dirname='./results', save_last=True) logger = CSVLogger(filename=filename+'_log.csv', dirname='./results/log') zero_risk = BreakOnZeroRiskCallback() tracker = BestRoundTrackerCallback(quantity='valid_acc', monitor='max') callbacks = [ckpt, logger, zero_risk, # tracker, ] ### Fitting the model qb = QuadBoostMHCR(weak_learner, encoder=encoder, dampening=1) qb.fit(X, Y, max_round_number=2, patience=10, X_val=X_val, Y_val=Y_val, callbacks=callbacks, # n_jobs=1, sorted_X=sorted_X, sorted_X_idx=sorted_X_idx, ) print(f'Best round recap:\nBoosting round {qb.best_round.step_number+1:03d} | Train acc: {qb.best_round.train_acc:.3%} | Valid acc: {qb.best_round.valid_acc:.3%} | Risk: {qb.best_round.risk:.3f}') print(f'Test accuracy on best model: {qb.evaluate(Xts, Yts):.3%}') print(f'Test accuracy on last model: {qb.evaluate(Xts, Yts, mode="last"):.3%}')
val_generator = d.flow_from_directory( '/media/palm/data/scene_validation_20170908/ai_challenger_scene_validation_20170908/images', batch_size=32, target_size=(224, 224)) model = EfficientNetB0(include_top=False, pooling='avg') x = model.output x = layers.Dense(80, activation='softmax')(x) model = models.Model(model.input, x) model.compile(optimizer=optimizers.SGD(0.01, momentum=0.9), loss='categorical_crossentropy', metrics=['acc', f1_m]) tb = CustomTensorBoard('B0, preprocess, 224*224, batch_size 32', log_dir='logs/B0-1', write_graph=False) cp = ModelCheckpoint('weights/b0.h5', save_weights_only=True, save_best_only=True, monitor='val_acc', mode='max') print(f'\033[{np.random.randint(31, 37)}m') model.fit_generator(train_generator, steps_per_epoch=len(train_generator), epochs=10, validation_data=val_generator, callbacks=[tb, cp], validation_steps=len(val_generator), workers=8)