def setUp(self): parser = ArgumentParser() add_train_args(parser) args = parser.parse_args([]) args.data_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'delaney_toy.csv') args.dataset_type = 'regression' args.batch_size = 2 args.hidden_size = 5 args.epochs = 1 args.quiet = True self.temp_dir = TemporaryDirectory() args.save_dir = self.temp_dir.name logger = create_logger(name='train', save_dir=args.save_dir, quiet=args.quiet) modify_train_args(args) cross_validate(args, logger) clear_cache() parser = ArgumentParser() add_predict_args(parser) args = parser.parse_args([]) args.batch_size = 2 args.checkpoint_dir = self.temp_dir.name args.preds_path = NamedTemporaryFile().name args.test_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'delaney_toy_smiles.csv') self.args = args
def test_hyperopt(self): try: parser = ArgumentParser() add_train_args(parser) parser.add_argument('--num_iters', type=int, default=20, help='Number of hyperparameter choices to try') parser.add_argument('--config_save_path', type=str, help='Path to .json file where best hyperparameter settings will be written') parser.add_argument('--log_dir', type=str, help='(Optional) Path to a directory where all results of the hyperparameter optimization will be written') args = parser.parse_args([]) args.data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'delaney_toy.csv') args.dataset_type = 'regression' args.batch_size = 2 args.hidden_size = 5 args.epochs = 1 args.quiet = True temp_file = NamedTemporaryFile() args.config_save_path = temp_file.name args.num_iters = 3 modify_train_args(args) grid_search(args) clear_cache() except: self.fail('hyperopt')
def setUp(self): parser = ArgumentParser() add_train_args(parser) args = parser.parse_args([]) args.data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'delaney_toy.csv') args.dataset_type = 'regression' args.batch_size = 2 args.hidden_size = 5 args.epochs = 1 args.quiet = True self.args = args logger = create_logger(name='train', save_dir=args.save_dir, quiet=args.quiet) self.logger = logger
model = load_checkpoint(os.path.join(save_dir, 'model.pt'), cuda=args.cuda) test_smiles, test_targets = test_data.smiles(), test_data.targets() test_preds = predict(model, test_data, args.batch_size) test_scores = evaluate_predictions(test_preds, test_targets, args.num_tasks, metric_func, args.dataset_type) avg_test_score = np.nanmean(test_scores) print(f'Test {args.metric} = {avg_test_score:.4f}') return avg_test_score if __name__ == "__main__": parser = ArgumentParser() parser.add_argument('--source_data_path', required=True) parser.add_argument('--src_batch_size', type=int, default=100) parser.add_argument('--lambda_e', type=float, default=0.1) add_train_args(parser) args = parser.parse_args() modify_train_args(args) all_test_score = np.zeros((args.num_folds, )) for i in range(args.num_folds): fold_dir = os.path.join(args.save_dir, f'fold_{i}') makedirs(fold_dir) all_test_score[i] = run_training(args, fold_dir) mean, std = np.mean(all_test_score), np.std(all_test_score) print(f'{args.num_folds} fold average: {mean:.4f} +/- {std:.4f}')
def train(): global training_message if request.method == 'GET': return render_template('train.html', datasets=get_datasets(), started=False, cuda=app.config['CUDA'], gpus=app.config['GPUS']) # Get arguments data_name, epochs, checkpoint_name = \ request.form['dataName'], int(request.form['epochs']), request.form['checkpointName'] gpu = request.form.get('gpu', None) dataset_type = request.form.get('datasetType', 'regression') if not checkpoint_name.endswith('.pt'): checkpoint_name += '.pt' # Create and modify args parser = ArgumentParser() add_train_args(parser) args = parser.parse_args() args.data_path = os.path.join(app.config['DATA_FOLDER'], data_name) args.dataset_type = dataset_type args.epochs = epochs target_set, all_targets_have_labels, has_invalid_targets = get_target_set( args.data_path) if len(target_set) == 0: return render_template('train.html', datasets=get_datasets(), started=False, cuda=app.config['CUDA'], gpus=app.config['GPUS'], error="No training labels provided") if has_invalid_targets: return render_template('train.html', datasets=get_datasets(), started=False, cuda=app.config['CUDA'], gpus=app.config['GPUS'], error="Training data contains invalid labels") classification_on_regression_dataset = ((not target_set <= set([0, 1])) and args.dataset_type == 'classification') if classification_on_regression_dataset: return render_template( 'train.html', datasets=get_datasets(), started=False, cuda=app.config['CUDA'], gpus=app.config['GPUS'], error= 'Selected classification dataset, but not all labels are 0 or 1') regression_on_classification_dataset = (target_set <= set([0, 1]) and args.dataset_type == 'regression') if not all_targets_have_labels: training_message += 'One or more targets have no labels. \n' # TODO could have separate warning messages for each? if regression_on_classification_dataset: training_message += 'All labels are 0 or 1; did you mean to train classification instead of regression?\n' if gpu is not None: if gpu == 'None': args.no_cuda = True else: args.gpu = int(gpu) with TemporaryDirectory() as temp_dir: args.save_dir = temp_dir modify_train_args(args) if os.path.isdir(args.save_dir): training_message += 'Overwriting preexisting checkpoint with the same name.' logger = logging.getLogger('train') logger.setLevel(logging.DEBUG) logger.propagate = False set_logger(logger, args.save_dir, args.quiet) global progress process = mp.Process(target=progress_bar, args=(args, progress)) process.start() global started started = 1 # Run training run_training(args, logger) process.join() # reset globals started = 0 progress = mp.Value('d', 0.0) # Move checkpoint shutil.move( os.path.join(args.save_dir, 'model_0', 'model.pt'), os.path.join(app.config['CHECKPOINT_FOLDER'], checkpoint_name)) warning = training_message if len(training_message) > 0 else None training_message = "" return render_template('train.html', datasets=get_datasets(), cuda=app.config['CUDA'], gpus=app.config['GPUS'], trained=True, warning=warning)
def train(): """Renders the train page and performs training if request method is POST.""" global PROGRESS, TRAINING warnings, errors = [], [] if request.method == 'GET': return render_train() # Get arguments data_name, epochs, ensemble_size, checkpoint_name = \ request.form['dataName'], int(request.form['epochs']), \ int(request.form['ensembleSize']), request.form['checkpointName'] gpu = request.form.get('gpu') data_path = os.path.join(app.config['DATA_FOLDER'], f'{data_name}.csv') dataset_type = request.form.get('datasetType', 'regression') # Create and modify args parser = ArgumentParser() add_train_args(parser) args = parser.parse_args([]) args.data_path = data_path args.dataset_type = dataset_type args.epochs = epochs args.ensemble_size = ensemble_size # Check if regression/classification selection matches data data = get_data(path=data_path) targets = data.targets() unique_targets = {target for row in targets for target in row if target is not None} if dataset_type == 'classification' and len(unique_targets - {0, 1}) > 0: errors.append('Selected classification dataset but not all labels are 0 or 1. Select regression instead.') return render_train(warnings=warnings, errors=errors) if dataset_type == 'regression' and unique_targets <= {0, 1}: errors.append('Selected regression dataset but all labels are 0 or 1. Select classification instead.') return render_train(warnings=warnings, errors=errors) if gpu is not None: if gpu == 'None': args.no_cuda = True else: args.gpu = int(gpu) current_user = request.cookies.get('currentUser') if not current_user: # Use DEFAULT as current user if the client's cookie is not set. current_user = app.config['DEFAULT_USER_ID'] ckpt_id, ckpt_name = db.insert_ckpt(checkpoint_name, current_user, args.dataset_type, args.epochs, args.ensemble_size, len(targets)) with TemporaryDirectory() as temp_dir: args.save_dir = temp_dir modify_train_args(args) process = mp.Process(target=progress_bar, args=(args, PROGRESS)) process.start() TRAINING = 1 # Run training logger = create_logger(name='train', save_dir=args.save_dir, quiet=args.quiet) task_scores = run_training(args, logger) process.join() # Reset globals TRAINING = 0 PROGRESS = mp.Value('d', 0.0) # Check if name overlap if checkpoint_name != ckpt_name: warnings.append(name_already_exists_message('Checkpoint', checkpoint_name, ckpt_name)) # Move models for root, _, files in os.walk(args.save_dir): for fname in files: if fname.endswith('.pt'): model_id = db.insert_model(ckpt_id) save_path = os.path.join(app.config['CHECKPOINT_FOLDER'], f'{model_id}.pt') shutil.move(os.path.join(args.save_dir, root, fname), save_path) return render_train(trained=True, metric=args.metric, num_tasks=len(args.task_names), task_names=args.task_names, task_scores=format_float_list(task_scores), mean_score=format_float(np.mean(task_scores)), warnings=warnings, errors=errors)
def train(): global progress, training warnings, errors = [], [] if request.method == 'GET': return render_train() # Get arguments data_name, epochs, checkpoint_name = \ request.form['dataName'], int(request.form['epochs']), request.form['checkpointName'] gpu = request.form.get('gpu') data_path = os.path.join(app.config['DATA_FOLDER'], data_name) dataset_type = request.form.get('datasetType', 'regression') if not checkpoint_name.endswith('.pt'): checkpoint_name += '.pt' # Create and modify args parser = ArgumentParser() add_train_args(parser) args = parser.parse_args() args.data_path = data_path args.dataset_type = dataset_type args.epochs = epochs # Check if regression/classification selection matches data data = get_data(path=data_path) targets = data.targets() unique_targets = set(np.unique(targets)) if dataset_type == 'classification' and len(unique_targets - {0, 1}) > 0: errors.append( 'Selected classification dataset but not all labels are 0 or 1. Select regression instead.' ) return render_train(warnings=warnings, errors=errors) if dataset_type == 'regression' and unique_targets <= {0, 1}: errors.append( 'Selected regression dataset but all labels are 0 or 1. Select classification instead.' ) return render_train(warnings=warnings, errors=errors) if gpu is not None: if gpu == 'None': args.no_cuda = True else: args.gpu = int(gpu) with TemporaryDirectory() as temp_dir: args.save_dir = temp_dir modify_train_args(args) logger = logging.getLogger('train') logger.setLevel(logging.DEBUG) logger.propagate = False set_logger(logger, args.save_dir, args.quiet) process = mp.Process(target=progress_bar, args=(args, progress)) process.start() training = 1 # Run training task_scores = run_training(args, logger) process.join() # Reset globals training = 0 progress = mp.Value('d', 0.0) # Check if name overlap original_save_path = os.path.join(app.config['CHECKPOINT_FOLDER'], checkpoint_name) save_path = find_unique_path(original_save_path) if save_path != original_save_path: warnings.append( name_already_exists_message('Checkpoint', original_save_path, save_path)) # Move checkpoint shutil.move(os.path.join(args.save_dir, 'model_0', 'model.pt'), save_path) return render_train(trained=True, metric=args.metric, num_tasks=len(args.task_names), task_names=args.task_names, task_scores=format_float_list(task_scores), mean_score=format_float(np.mean(task_scores)), warnings=warnings, errors=errors)