Пример #1
0
def train(cfg):
    print('Preparing to train {} on {} data...'.format(cfg['main']['model_name'],
                                                       cfg['main']['dataset_name']))

    np.random.seed(1337)  # for reproducibility

    print('Tensorflow backend detected; Applying memory usage constraints')
    ss = K.tf.Session(config=K.tf.ConfigProto(gpu_options=K.tf.GPUOptions(allow_growth=True)))
    K.set_session(ss)
    ss.run(K.tf.global_variables_initializer())
    K.set_learning_phase(1)

    Prostate_dataset = select_dataset(cfg)
    if not cfg["validation"]["do_validation"]:
        image_gen = Prostate_dataset.create_generator()
        val_data = None
    else:
        image_gen, val_data = Prostate_dataset.create_generator_with_validation()
    model = select_model(cfg, Prostate_dataset)
    # model.summary()
    myAdam = keras.optimizers.Adam(lr=1e-4)

    model.compile(optimizer=Adam(lr=1e-4),
                      loss={'Radboud_Branch': 'categorical_crossentropy', 'Karolinska_Branch': 'categorical_crossentropy'}, loss_weights=[0.5, 0.5], metrics=['accuracy'])

    print('Created image generator from dataset {}!'.format(cfg['main']['dataset_name']))
    tf_saver = tf.train.Saver(max_to_keep=2)

    checkpoint_callback = TFCheckPointCallback(tf_saver, ss, cfg)
    modelsaver_callback = SaveKerasModel(cfg)
    learningRate_callback = SelectLRCallBack(cfg)
    tbCallBack = TensoBoardCallback(cfg)



    model.fit_generator(image_gen,
                        steps_per_epoch=Prostate_dataset.sample_number / cfg['training']['batch_size'],
                        epochs=cfg['training']['epochs'],
                        callbacks=[checkpoint_callback, learningRate_callback, modelsaver_callback, tbCallBack],
                        validation_data=val_data)
Пример #2
0
args.o2n = o2n
args.n2o = n2o

for key in ['train_labels_path', 'val_labels_path']:
    if args[key] == 'None':
        args[key] = None

settings.set_settings(args)

#args.output_path = tempfile.TemporaryDirectory().name
os.makedirs(args.output_path)

train_loader, val_loader, labelled_train_loader = dataset.get_data_loaders(
    args)

model = models.select_model(args)


def SIDX(template_id):
    return (3 + (template_id - 1) * 7)


def EIDX(template_id):
    return (3 + (template_id - 1) * 7 + 7)


# for i in val_loader.dataset.raw_data[:,SIDX(6)]:
# 	print (i)
# print (val_loader.dataset.raw_data[:,SIDX(2)])

#my_score, max_score, similarity, rank, conditional_rank, mean, std
Пример #3
0
        ss = K.tf.Session(config=K.tf.ConfigProto(gpu_options=K.tf.GPUOptions(allow_growth=True)))
        K.set_session(ss)

    # debugging parameters
    interim_testing = False

    # parameters
    dataset_name = 'mscoco'
    load_mscoco = False
    dw = 512
    dh = 512
    model_name = 'enet_unpooling'
    pw = os.path.join('models', dataset_name, 'enet_unpooling', 'weights', '{}_best.h5'.format(model_name))
    nc = datasets.load(dataset_name).num_classes()

    autoencoder = models.select_model(model_name=model_name)
    segmenter, model_name = autoencoder.build(nc=nc, w=dw, h=dh)
    segmenter.load_weights(pw)

    if load_mscoco:
        data = load_mscoco_data(segmenter=segmenter)
    else:
        txt_file = sys.argv[1]
        image_dir = os.path.dirname(txt_file)
        with open(txt_file) as fin:
            image_filenames = [os.path.join(image_dir, line.rstrip('\n')) for line in fin]
            data = load_arbitrary_data(segmenter=segmenter, image_filenames=image_filenames)

        data_gen = data['data_gen']
        if interim_testing:
            for idx, item in enumerate(data_gen):
def main():

    # build parser and check arguments
    args = _build_parser()
    _check_args(args)

    # Setup Estimator
    '''Estimator name: 
    xgb: XGBoost Classifier
    log: Logistic Regression
    knn: KNeighbors Classifier
    rfo: RandomForest Classifier 
    ada: AdaBoost Classifier
    ext: ExtraTrees Classifier
    svc: Support Vector Classifier
    keras: Keras Neural Networks
    '''

    if not args.estimator == 'all':
        estimators = [args.estimator]
    elif args.estimator == 'all':
        estimators = ['xgb', 'lgb', 'log', 'rfo', 'ext', 'ada', 'knn', 'svc']

    # Training neural nets with keras
    if args.train_nn:
        estimator_name = 'keras'
        print('Training %s...' % estimator_name)

        params = {
            'n_features': n_features,
            'n_classes': n_classes,
            'dropout': args.dropout,
            'hidden_unit': args.hidden_unit,
            'n_layers': args.layers,
            'optimizer': args.optimizer,
            'init': args.init,
            'batch_size': args.batch_size,
            'epochs': args.epochs,
        }
        estimator = keras_model(**params)

        train_kwargs = {
            'X_train': X_train,
            'y_train': y_train,
            'X_val': X_val,
            'y_val': y_val,
            'score_name': args.score,
            'num': args.num
        }
        _ = estimator.train(**train_kwargs)
        print('params: \n', params)

    # Training random search CV with scikit-learn models
    if args.train_random:
        for estimator_name in estimators:
            print('Training %s...' % estimator_name)

            if not estimator_name == 'keras':
                seed = args.seed if args.seed != None else np.random.randint(
                    100)
                estimator, params = select_model(estimator_name, n_features,
                                                 n_classes, seed)

                # kwargs dict for train and predict
                train_kwargs = {
                    'estimator': estimator,
                    'params': params,
                    'X_train': X_train,
                    'y_train': y_train,
                    'X_val': X_val,
                    'y_val': y_val,
                    'n_iter': args.n_iter,
                    'score_name': args.score,
                }

                # Train model and Predict results
                best_params, best_score, val_score = random_model(
                    **train_kwargs)
                timestamp = get_timestamp()

                # Write params to file
                write_params(estimator_name, best_params, best_score,
                             val_score, timestamp, args.num)

            elif estimator_name == 'keras':

                space_params = {
                    'n_features':
                    n_features,
                    'n_classes':
                    n_classes,
                    'dropout':
                    hp.uniform('dropout', .20, .80),
                    'hidden_unit':
                    hp.quniform('hidden_unit', 10, 50, q=1),
                    'n_layers':
                    hp.choice('n_layers', [1, 2, 3, 4]),
                    'optimizer':
                    hp.choice('optimizer', ['adam', 'adadelta', 'sgd']),
                    'init':
                    hp.choice('init', ['glorot_uniform', 'normal', 'uniform']),
                    'batch_size':
                    hp.choice('batch_size', [16, 32, 64, 128]),
                    'epochs':
                    hp.quniform('epochs', 100, 1000, q=1),
                    'score_name':
                    args.score,
                    'num':
                    args.num,
                }
                trials = Trials()
                best_params = fmin(random_nn,
                                   space_params,
                                   algo=tpe.suggest,
                                   max_evals=args.n_iter,
                                   trials=trials)
                print('best_params \n', best_params)

    # Evaluate with ensemble method and predict result
    if args.predict:

        eva_kwargs = {
            'estimators': estimators,
            'threshold': args.threshold,
            'X_train': X_train,
            'y_train': y_train,
            'X_val': X_val,
            'y_val': y_val,
            'X_test': X_test,
            'score_name': args.score,
            'n_classes': n_classes,
        }

        # Predict with ensemble voting and write result
        prediction = ensemble(**eva_kwargs)
        if args.ensemble == 'vote':
            result = prediction.vote()
        elif args.ensemble == 'stack':
            result = prediction.stack(args.num_imp)

        timestamp = get_timestamp()
        write_result(result, label_list, timestamp)
Пример #5
0
def main(args):

    # Store name of experiment
    exp_name = args.exp_name
    exp_name = '{}_r{}_p{}_n{}_i{}_k{}'.format(exp_name, args.rho,
                                               args.pos_reward,
                                               args.neg_reward,
                                               args.class_imbalance,
                                               args.kldiv_lambda)

    # Create an directory for output path
    args.output_path = os.path.join(args.output_path, args.exp_name)
    os.makedirs(args.output_path, exist_ok=True)

    utils.LOG_FILE = os.path.join(args.output_path, 'log.txt')

    LEARNING_PROFILE_FILE = os.path.join(args.output_path,
                                         'learning_curve.txt')
    lpf = open(LEARNING_PROFILE_FILE, 'a')
    args.lpf = lpf
    # Set logging
    logging.basicConfig(filename=utils.LOG_FILE,
                        filemode='a',
                        format='%(levelname)s :: %(asctime)s - %(message)s',
                        level=args.log_level,
                        datefmt='%d/%m/%Y %I:%M:%S %p')
    console = logging.StreamHandler()
    console.setLevel(args.log_level)
    formatter = logging.Formatter('%(levelname)s :: %(asctime)s - %(message)s',
                                  datefmt='%d/%m/%Y %I:%M:%S %p')
    console.setFormatter(formatter)
    logging.getLogger().addHandler(console)

    logging.info(
        'Beginning code for experiment {} and storing stuff in {}'.format(
            exp_name, args.output_path))
    logging.info('Loaded arguments as \n{}'.format(str(pprint.pformat(args))))

    # Begin of main code

    train_loader, val_loader, labelled_train_loader = dataset.get_data_loaders(
        args)
    model = models.select_model(args)
    my_eval_fn = compute.get_evaluation_function(args)

    if args.optim == 'sgd':
        optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                     model.parameters()),
                              momentum=args.momentum,
                              lr=args.lr,
                              weight_decay=args.decay)
    else:
        optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                      model.parameters()),
                               lr=args.lr,
                               weight_decay=args.decay)

    checkpoint_file = os.path.join(args.output_path,
                                   '{}_checkpoint.pth'.format(exp_name))
    best_checkpoint_file = os.path.join(
        args.output_path, '{}_best_checkpoint.pth'.format(exp_name))
    logging.info('Saving checkpoints at {} and best checkpoint at : {}'.format(
        checkpoint_file, best_checkpoint_file))

    start_epoch = 0
    best_score = -9999999

    # Load checkpoint if present in input arguments
    if args.checkpoint != '':
        logging.info('Starting from checkpoint: {}'.format(args.checkpoint))
        cp = torch.load(args.checkpoint)
        start_epoch = cp['epoch'] + 1
        model.load_state_dict(cp['model'])
        # optimizer.load_state_dict(cp['optimizer']) TODO: - Why not do this?
        best_score = cp['best_score']
        for param_group in optimizer.param_groups:
            param_group['lr'] = args.lr
            param_group['weight_decay'] = args.decay

    num_epochs = args.num_epochs
    logging.info('Beginning train/validate cycle')

    time1 = time.time()
    if val_loader is not None:
        record, metric_idx, headers = compute.compute(start_epoch - 1,
                                                      model,
                                                      val_loader,
                                                      optimizer,
                                                      'eval',
                                                      eval_fn=my_eval_fn,
                                                      args=args)
        if (args.log_eval is not None):
            handler = open(args.log_eval, "a")
            print(','.join([
                str(round(x, 6)) if isinstance(x, float) else str(x)
                for x in record
            ]),
                  file=handler)
            handler.close()
    print("Time taken:", time.time() - time1)
    if (args.only_eval):
        logging.info('Ran only eval mode, now exiting')
        exit(0)

    # Start TRAINING
    for epoch in range(start_epoch, num_epochs):
        logging.info('Beginning epoch {}'.format(epoch))

        if labelled_train_loader is not None:
            record, metric_idx, _ = compute.compute(epoch,
                                                    model,
                                                    labelled_train_loader,
                                                    optimizer,
                                                    'train_sup',
                                                    eval_fn=my_eval_fn,
                                                    args=args)

        if train_loader is not None:
            record, metric_idx, _ = compute.compute(
                epoch,
                model,
                train_loader,
                optimizer,
                'train_un',
                eval_fn=my_eval_fn,
                args=args,
                labelled_train_loader=labelled_train_loader)

        if val_loader is not None:
            record, metric_idx, _ = compute.compute(epoch,
                                                    model,
                                                    val_loader,
                                                    None,
                                                    'eval',
                                                    eval_fn=my_eval_fn,
                                                    args=args)

        is_best = False
        logging.info('Best score: {}, This score: {}'.format(
            best_score, record[metric_idx]))

        if record[metric_idx] > best_score:
            best_score = record[metric_idx]
            is_best = True

        utils.save_checkpoint(
            {
                'epoch': epoch,
                'best_score': best_score,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'is_best': is_best
            }, epoch, is_best, checkpoint_file, best_checkpoint_file)

    args.lpf.close()
Пример #6
0
	def __init__(self,config,train_loader,val_loader,model_name):
		
		self.config = config
		self.train_loader = train_loader
		self.val_loader = val_loader
		self.model_name = model_name
		

		self.num_train = len(self.train_loader.dataset)
		self.num_valid = len(self.val_loader.dataset)
		self.saved_model_dir = self.config.saved_model_dir
		self.output_model_dir = self.config.output_model_dir
		self.best_model = config.best_model
		self.use_gpu = self.config.use_gpu



		if(self.config.resume == True):
			print("LOADING SAVED MODEL")
			self.net = select_model(self.model_name,self.config.file_type)
			self.loadCheckpoint()
		
		else:
			print("INTIALIZING NEW MODEL")
			
			self.net = select_model(self.model_name,self.config.file_type)
			
		self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
		self.net = self.net.to(self.device)
		self.total_epochs = config.epochs
		

		if(self.model_name =="lstm_model" or self.model_name =="cnn_lstm_model"):
			print("NLL LOSS")
			self.criterion = nn.NLLLoss() 
			self.optimizer = optim.Adam(self.net.parameters(), lr=0.0001)
			loss_name = "nll_loss"
			lr = 0.0001

		else:
			self.criterion = nn.CrossEntropyLoss()
			self.optimizer = optim.SGD(self.net.parameters(),lr = self.config.lr,
										momentum = self.config.momentum, 
										weight_decay = self.config.weight_decay)

			loss_name = "crossentropy_loss"
			lr = self.config.lr
			


		self.num_params = sum([p.data.nelement() for p in self.net.parameters()])
		self.batch_size = self.config.batch_size
		self.train_paitence = config.train_paitence
		self.num_classes = 10
		
		if(self.config.debug == False):
			self.experiment = wandb.init(project="audio_classification")
			
			hyper_params = {
							"model_name": self.model_name,
							"file_type": self.config.file_type,
							"dataset": self.config.dataset,
							"batch_size": self.config.batch_size,
   							"num_epochs": self.total_epochs,
							"loss_function": loss_name,
							"learning_rate": lr,
							"momentum":		self.config.momentum,
							"weight_decay": self.config.weight_decay

						}
			self.experiment.config.update(hyper_params)
			wandb.watch(self.net)

		# summary(self.net, input_size=(1, 128, 216))
		print('[*] Number of model parameters: {:,}'.format(self.num_params))