def _test_ad(self, sess, model, hp_dict, neval): """Test automatic differentiation. Args: model: Mode object. hp_dict: A dictionary of hyperparameter name and initial values. """ hyperparams = dict( [(hp_name, model.optimizer.hyperparams[hp_name]) for hp_name in hp_dict.keys()]) grads = model.optimizer.grads accumulators = model.optimizer.accumulators new_accumulators = model.optimizer.new_accumulators loss = model.cost # Build look ahead graph. look_ahead_ops, grad_ops, zero_out_ops = look_ahead_grads( hyperparams, grads, accumulators, new_accumulators, loss, dtype=self._dtype) # Gets variables to be checkpointed. ckpt_var_list = [] for var_list in model.optimizer.accumulators.values(): ckpt_var_list.extend(var_list) ckpt_var_list.append(model.global_step) # Build checkpoint ops. ckpt = build_checkpoint(ckpt_var_list) read_checkpoint_op = read_checkpoint(ckpt, ckpt_var_list) write_checkpoint_op = write_checkpoint(ckpt, ckpt_var_list) # Initialize weights parameters. sess.run(tf.global_variables_initializer()) # Checkpoint weights and momentum parameters. sess.run(write_checkpoint_op) # Initialize hyperparameters. for hp_name, init_hp in hp_dict.items(): model.optimizer.assign_hyperparam(sess, hp_name, init_hp) _, grad_hp = self.run_loss(sess, model, self._batch_size, look_ahead_ops, grad_ops) # Check the gradient of loss wrt. each hp. for ii, (hp_name, init_hp) in enumerate(hp_dict.items()): sess.run(read_checkpoint_op) sess.run(zero_out_ops) model.optimizer.assign_hyperparam(sess, hp_name, init_hp - self._eps) l1, _ = self.run_loss(sess, model, self._batch_size, look_ahead_ops, grad_ops) sess.run(read_checkpoint_op) sess.run(zero_out_ops) model.optimizer.assign_hyperparam(sess, hp_name, init_hp + self._eps) l2, _ = self.run_loss(sess, model, self._batch_size, look_ahead_ops, grad_ops) grad_hp_fd = (l2 - l1) / (2 * self._eps) model.optimizer.assign_hyperparam(sess, hp_name, init_hp) np.testing.assert_allclose(grad_hp[ii], grad_hp_fd, rtol=self._rtol, atol=self._atol)
def main_raxml_runner(args, op): """ Run pargenes from the parsed arguments op """ start = time.time() output_dir = op.output_dir checkpoint_index = checkpoint.read_checkpoint(output_dir) if (os.path.exists(output_dir) and not op.do_continue): logger.info( "[Error] The output directory " + output_dir + " already exists. Please use another output directory or run with --continue." ) sys.exit(1) commons.makedirs(output_dir) logger.init_logger(op.output_dir) print_header(args) msas = None logger.timed_log("end of MSAs initializations") scriptdir = os.path.dirname(os.path.realpath(__file__)) modeltest_run_path = os.path.join(output_dir, "modeltest_run") raxml_run_path = os.path.join(output_dir, "mlsearch_run") binaries_dir = os.path.join(scriptdir, "..", "pargenes_binaries") print("Binaries directory: " + binaries_dir) if (op.scheduler != "split"): raxml_library = os.path.join(binaries_dir, "raxml-ng") modeltest_library = os.path.join(binaries_dir, "modeltest-ng") else: raxml_library = os.path.join(binaries_dir, "raxml-ng-mpi.so") modeltest_library = os.path.join(binaries_dir, "modeltest-ng-mpi.so") astral_jar = os.path.join(binaries_dir, "astral.jar") if (len(op.raxml_binary) > 1): raxml_library = op.raxml_binary if (len(op.modeltest_binary) > 1): modeltest_library = op.modeltest_binary if (len(op.astral_jar) > 1): astral_jar = op.astral_jar astral_jar = os.path.abspath(astral_jar) if (checkpoint_index < 1): msas = commons.init_msas(op) raxml.run_parsing_step(msas, raxml_library, op.scheduler, os.path.join(output_dir, "parse_run"), op.cores, op) raxml.analyse_parsed_msas(msas, op) checkpoint.write_checkpoint(output_dir, 1) logger.timed_log("end of parsing mpi-scheduler run") else: msas = raxml.load_msas(op) if (op.dry_run): logger.info("End of the dry run. Exiting") return 0 logger.timed_log("end of anlysing parsing results") if (op.use_modeltest): if (checkpoint_index < 2): modeltest.run(msas, output_dir, modeltest_library, modeltest_run_path, op) logger.timed_log("end of modeltest mpi-scheduler run") modeltest.parse_modeltest_results(op.modeltest_criteria, msas, output_dir) logger.timed_log("end of parsing modeltest results") # then recompute the binary MSA files to put the correct model, and reevaluate the MSA sizes with the new models shutil.move(os.path.join(output_dir, "parse_run"), os.path.join(output_dir, "old_parse_run")) raxml.run_parsing_step(msas, raxml_library, op.scheduler, os.path.join(output_dir, "parse_run"), op.cores, op) raxml.analyse_parsed_msas(msas, op) logger.timed_log("end of the second parsing step") checkpoint.write_checkpoint(output_dir, 2) if (checkpoint_index < 3): raxml.run(msas, op.random_starting_trees, op.parsimony_starting_trees, op.bootstraps, raxml_library, op.scheduler, raxml_run_path, op.cores, op) logger.timed_log("end of mlsearch mpi-scheduler run") checkpoint.write_checkpoint(output_dir, 3) if (op.random_starting_trees + op.parsimony_starting_trees > 1): if (checkpoint_index < 4): raxml.select_best_ml_tree(msas, op) logger.timed_log("end of selecting the best ML tree") checkpoint.write_checkpoint(output_dir, 4) if (op.bootstraps != 0): if (checkpoint_index < 5): bootstraps.concatenate_bootstraps(output_dir, min(16, op.cores)) logger.timed_log("end of bootstraps concatenation") checkpoint.write_checkpoint(output_dir, 5) starting_trees = op.random_starting_trees + op.parsimony_starting_trees if (checkpoint_index < 6 and starting_trees > 0): bootstraps.run(msas, output_dir, raxml_library, op.scheduler, os.path.join(output_dir, "supports_run"), op.cores, op) logger.timed_log("end of supports mpi-scheduler run") checkpoint.write_checkpoint(output_dir, 6) if (op.use_astral): if (checkpoint_index < 7): astral.run_astral_pargenes(astral_jar, op) checkpoint.write_checkpoint(output_dir, 7) all_invalid = True for name, msa in msas.items(): if (msa.valid): all_invalid = False if (all_invalid): print("[Error] ParGenes failed to analyze all MSAs.") report.report_and_exit(op.output_dir, 1) print_stats(op) return 0
def online_smd(dataset_name='mnist', init_lr=1e-1, momentum=0.001, num_steps=20000, middle_decay=False, steps_per_update=10, smd=True, steps_look_ahead=5, num_meta_steps=10, steps_per_eval=100, batch_size=100, meta_lr=1e-2, print_step=False, effective_lr=True, negative_momentum=True, optimizer='momentum', stochastic=True, exp_folder='.'): """Train an MLP for MNIST. Args: dataset_name: String. Name of the dataset. init_lr: Float. Initial learning rate, default 0.1. momentum: Float. Initial momentum, default 0.9. num_steps: Int. Total number of steps, default 20000. middle_decay: Whether applying manual learning rate decay to 1e-4 from the middle, default False. steps_per_update: Int. Number of steps per update, default 10. smd: Bool. Whether run SMD. steps_look_ahead: Int. Number of steps to look ahead, default 5. num_meta_steps: Int. Number of meta steps, default 10. steps_per_eval: Int. Number of training steps per evaluation, default 100. batch_size: Int. Mini-batch size, default 100. meta_lr: Float. Meta learning rate, default 1e-2. print_step: Bool. Whether to print loss during training, default True. effective_lr: Bool. Whether to re-parameterize learning rate as lr / (1 - momentum), default True. negative_momentum: Bool. Whether to re-parameterize momentum as (1 - momentum), default True. optimizer: String. Name of the optimizer. Options: `momentum`, `adam, default `momentum`. stochastic: Bool. Whether to do stochastic or deterministic look ahead, default True. Returns: results: Results tuple object. """ dataset = get_dataset(dataset_name) dataset_train = get_dataset( dataset_name) # For evaluate training progress (full epoch). dataset_test = get_dataset( dataset_name, test=True) # For evaluate test progress (full epoch). if dataset_name == 'mnist': input_shape = [None, 28, 28, 1] elif dataset_name.startswith('cifar'): input_shape = [None, 32, 32, 3] x = tf.placeholder(tf.float32, input_shape, name="x") y = tf.placeholder(tf.int64, [None], name="y") if effective_lr: init_lr_ = init_lr / (1.0 - momentum) else: init_lr_ = init_lr if negative_momentum: init_mom_ = 1.0 - momentum else: init_mom_ = momentum if dataset_name == 'mnist': config = get_mnist_mlp_config( init_lr_, init_mom_, effective_lr=effective_lr, negative_momentum=negative_momentum) elif dataset_name == 'cifar-10': config = get_cifar_cnn_config( init_lr_, init_mom_, effective_lr=effective_lr, negative_momentum=negative_momentum) else: raise NotImplemented with tf.name_scope('Train'): with tf.variable_scope('Model'): if dataset_name == 'mnist': m = get_mnist_mlp_model( config, x, y, optimizer=optimizer, training=True) model = m elif dataset_name == 'cifar-10': m = get_cifar_cnn_model( config, x, y, optimizer=optimizer, training=True) model = m with tf.name_scope('Test'): with tf.variable_scope('Model', reuse=True): if dataset_name == 'mnist': mtest = get_mnist_mlp_model(config, x, y, training=False) elif dataset_name == 'cifar-10': mtest = get_cifar_cnn_model(config, x, y, training=False) final_lr = 1e-4 midpoint = num_steps // 2 if dataset_name == 'mnist': num_train = 60000 num_test = 10000 elif dataset_name.startswith('cifar'): num_train = 50000 num_test = 10000 lr_ = init_lr_ mom_ = init_mom_ bsize = batch_size steps_per_epoch = num_train // bsize steps_test_per_epoch = num_test // bsize train_xent_list = [] train_acc_list = [] test_xent_list = [] test_acc_list = [] lr_list = [] mom_list = [] step_list = [] log.info( 'Applying decay at midpoint with final learning rate = {:.3e}'.format( final_lr)) if 'momentum' in optimizer: mom_name = 'mom' elif 'adam' in optimizer: mom_name = 'beta1' else: raise ValueError('Unknown optimizer') hp_dict = {'lr': init_lr} #, mom_name: momentum} hp_names = hp_dict.keys() hyperparams = dict([(hp_name, model.optimizer.hyperparams[hp_name]) for hp_name in hp_names]) grads = model.optimizer.grads accumulators = model.optimizer.accumulators new_accumulators = model.optimizer.new_accumulators loss = model.cost # Build look ahead graph. look_ahead_ops, hp_grad_ops, zero_out_ops = look_ahead_grads( hyperparams, grads, accumulators, new_accumulators, loss) # Meta optimizer, use Adam on the log space. meta_opt = LogOptimizer(tf.train.AdamOptimizer(meta_lr)) hp = [model.optimizer.hyperparams[hp_name] for hp_name in hp_names] hp_grads_dict = { 'lr': tf.placeholder(tf.float32, [], name='lr_grad'), # mom_name: tf.placeholder( # tf.float32, [], name='{}_grad'.format(mom_name)) } hp_grads_plh = [hp_grads_dict[hp_name] for hp_name in hp_names] hp_grads_and_vars = list(zip(hp_grads_plh, hp)) cgrad = {'lr': (-1e1, 1e1)} #, mom_name: (-1e1, 1e1)} cval = {'lr': (1e-4, 1e1)} #, mom_name: (1e-4, 1e0)} cgrad_ = [cgrad[hp_name] for hp_name in hp_names] cval_ = [cval[hp_name] for hp_name in hp_names] meta_train_op = meta_opt.apply_gradients( hp_grads_and_vars, clip_gradients=cgrad_, clip_values=cval_) var_list = tf.global_variables() ckpt = build_checkpoint(tf.global_variables()) write_op = write_checkpoint(ckpt, var_list) read_op = read_checkpoint(ckpt, var_list) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) exp_logger = _get_exp_logger(sess, exp_folder) def log_hp(hp_dict): lr_ = hp_dict['lr'] mom_ = hp_dict['mom'] # Log current learning rate and momentum. if negative_momentum: exp_logger.log(ii, 'mom', 1.0 - mom_) exp_logger.log(ii, 'log neg mom', np.log10(mom_)) mom__ = 1.0 - mom_ else: exp_logger.log(ii, 'mom', mom_) exp_logger.log(ii, 'log neg mom', np.log10(1.0 - mom_)) mom__ = mom_ if effective_lr: lr__ = lr_ * (1.0 - mom__) eflr_ = lr_ else: lr__ = lr_ eflr_ = lr_ / (1.0 - mom__) exp_logger.log(ii, 'eff lr', eflr_) exp_logger.log(ii, 'log eff lr', np.log10(eflr_)) exp_logger.log(ii, 'lr', lr__) exp_logger.log(ii, 'log lr', np.log10(lr__)) exp_logger.flush() return lr__, mom__ # Assign initial learning rate and momentum. m.optimizer.assign_hyperparam(sess, 'lr', lr_) m.optimizer.assign_hyperparam(sess, 'mom', mom_) train_iter = six.moves.xrange(num_steps) if not print_step: train_iter = tqdm(train_iter, ncols=0) for ii in train_iter: # Meta-optimization loop. if ii == 0 or ii % steps_per_update == 0: if ii < midpoint and smd: if stochastic: data_list = [ dataset.next_batch(bsize) for step in six.moves.xrange(steps_look_ahead) ] # Take next few batches for last step evaluation. eval_data_list = [ dataset.next_batch(bsize) for step in six.moves.xrange(steps_look_ahead) ] else: data_entry = dataset.next_batch(bsize) data_list = [data_entry] * steps_look_ahead # Use the deterministic batch for last step evaluation. eval_data_list = [data_list[0]] sess.run(write_op) for ms in six.moves.xrange(num_meta_steps): cost, hp_dict = meta_step(sess, model, data_list, look_ahead_ops, hp_grad_ops, hp_grads_plh, meta_train_op, eval_data_list) sess.run(read_op) for hpname, hpval in hp_dict.items(): model.optimizer.assign_hyperparam( sess, hpname, hpval) lr_ = hp_dict['lr'] # mom_ = hp_dict['mom'] else: hp_dict = sess.run(model.optimizer.hyperparams) lr_log, mom_log = log_hp(hp_dict) lr_list.append(lr_log) mom_list.append(mom_log) if ii == midpoint // 2: m.optimizer.assign_hyperparam(sess, 'mom', 1 - 0.9) if ii == midpoint: lr_before_mid = hp_dict['lr'] tau = (num_steps - midpoint) / np.log(lr_before_mid / final_lr) if ii > midpoint: lr_ = np.exp(-(ii - midpoint) / tau) * lr_before_mid m.optimizer.assign_hyperparam(sess, 'lr', lr_) # Run regular training. if lr_ > 1e-6: # Use CBL for first half of training xd, yd = data_entry if (smd and not stochastic and ii < midpoint) else dataset.next_batch(bsize) cost_, _ = sess.run( [m.cost, m.train_op], feed_dict={ m.x: xd, m.y: yd }) if ii < midpoint: sess.run(m._retrieve_ema_op) # Evaluate every certain number of steps. if ii == 0 or (ii + 1) % steps_per_eval == 0: test_acc = 0.0 test_xent = 0.0 train_acc = 0.0 train_xent = 0.0 # Report full epoch training loss. for jj in six.moves.xrange(steps_per_epoch): xd, yd = dataset_train.next_batch(bsize) xent_, acc_ = sess.run( [m.cost, m.acc], feed_dict={ x: xd, y: yd }) train_xent += xent_ / float(steps_per_epoch) train_acc += acc_ / float(steps_per_epoch) step_list.append(ii + 1) train_xent_list.append(train_xent) train_acc_list.append(train_acc) dataset_train.reset() # Report full epoch validation loss. for jj in six.moves.xrange(steps_test_per_epoch): xd, yd = dataset_test.next_batch(bsize) xent_, acc_ = sess.run( [mtest.cost, mtest.acc], feed_dict={ x: xd, y: yd }) test_xent += xent_ / float(steps_test_per_epoch) test_acc += acc_ / float(steps_test_per_epoch) test_xent_list.append(test_xent) test_acc_list.append(test_acc) dataset_test.reset() # Log training progress. exp_logger.log(ii, 'train loss', train_xent) exp_logger.log(ii, 'log train loss', np.log10(train_xent)) exp_logger.log(ii, 'test loss', test_xent) exp_logger.log(ii, 'log test loss', np.log10(test_xent)) exp_logger.log(ii, 'train acc', train_acc) exp_logger.log(ii, 'test acc', test_acc) exp_logger.flush() if print_step: log.info(( 'Steps {:d} T Xent {:.3e} T Acc {:.3f} V Xent {:.3e} V Acc {:.3f} ' 'LR {:.3e}').format(ii + 1, train_xent, train_acc * 100.0, test_xent, test_acc * 100.0, lr_)) return Results( step=np.array(step_list), train_xent=np.array(train_xent_list), train_acc=np.array(train_acc_list), test_xent=np.array(test_xent_list), test_acc=np.array(test_acc_list), lr=np.array(lr_list), momentum=np.array(mom_list))
def run_offline_smd(num_steps, init_lr, init_decay, meta_lr, num_meta_steps, momentum=MOMENTUM, effective_lr=False, negative_momentum=False, pretrain_ckpt=None, output_fname=None, seed=0): """Run offline SMD experiments. Args: init_lr: Initial learning rate. init_decay: Initial decay constant. data_list: List of tuples of inputs and labels. meta_lr: Float. Meta descent learning rate. num_meta_steps: Int. Number of meta descent steps. momentum: Float. Momentum. effective_lr: Bool. Whether to optimize in the effective LR space. negative_momentum: Bool. Whether to optimize in the negative momentum space. """ bsize = BATCH_SIZE if output_fname is not None: log_folder = os.path.dirname(output_fname) else: log_folder = os.path.join('results', 'mnist', 'offline', 'smd') log_folder = os.path.join(log_folder, _get_run_number(log_folder)) if not os.path.exists(log_folder): os.makedirs(log_folder) with tf.Graph().as_default(), tf.Session() as sess: dataset = get_dataset('mnist') exp_logger = _get_exp_logger(sess, log_folder) if effective_lr: init_lr_ = init_lr / float(1.0 - momentum) else: init_lr_ = init_lr if negative_momentum: init_mom_ = 1.0 - momentum else: init_mom_ = momentum config = get_mnist_mlp_config( init_lr_, init_mom_, decay=init_decay, effective_lr=effective_lr, negative_momentum=negative_momentum) x = tf.placeholder(tf.float32, [None, 28, 28, 1], name="x") y = tf.placeholder(tf.int64, [None], name="y") with tf.name_scope('Train'): with tf.variable_scope('Model'): model = get_mnist_mlp_model( config, x, y, optimizer='momentum_inv_decay', training=True) all_vars = tf.global_variables() var_to_restore = list( filter(lambda x: 'momentum' not in x.name.lower(), all_vars)) var_to_restore = list( filter(lambda x: 'global_step' not in x.name.lower(), var_to_restore)) var_to_restore = list( filter(lambda x: 'lr' not in x.name.lower(), var_to_restore)) var_to_restore = list( filter(lambda x: 'mom' not in x.name.lower(), var_to_restore)) var_to_restore = list( filter(lambda x: 'decay' not in x.name.lower(), var_to_restore)) saver = tf.train.Saver(var_to_restore) rnd = np.random.RandomState(seed) hp_dict = {'lr': init_lr, 'decay': init_decay} hp_names = hp_dict.keys() hyperparams = dict([(hp_name, model.optimizer.hyperparams[hp_name]) for hp_name in hp_names]) grads = model.optimizer.grads accumulators = model.optimizer.accumulators new_accumulators = model.optimizer.new_accumulators loss = model.cost # Build look ahead graph. look_ahead_ops, hp_grad_ops, zero_out_ops = look_ahead_grads( hyperparams, grads, accumulators, new_accumulators, loss) # Meta optimizer, use Adam on the log space. # meta_opt = LogOptimizer(tf.train.AdamOptimizer(meta_lr)) meta_opt = LogOptimizer(tf.train.MomentumOptimizer(meta_lr, 0.9)) hp = [model.optimizer.hyperparams[hp_name] for hp_name in hp_names] hp_grads_dict = { 'lr': tf.placeholder(tf.float32, [], name='lr_grad'), 'decay': tf.placeholder(tf.float32, [], name='decay_grad') } hp_grads_plh = [hp_grads_dict[hp_name] for hp_name in hp_names] hp_grads_and_vars = list(zip(hp_grads_plh, hp)) cgrad = {'lr': (-1e1, 1e1), 'decay': (-1e1, 1e1)} cval = {'lr': (1e-4, 1e1), 'decay': (1e-4, 1e3)} cgrad_ = [cgrad[hp_name] for hp_name in hp_names] cval_ = [cval[hp_name] for hp_name in hp_names] meta_train_op = meta_opt.apply_gradients( hp_grads_and_vars, clip_gradients=cgrad_, clip_values=cval_) if output_fname is not None: msg = '{} exists, please remove previous experiment data.'.format( output_fname) assert not os.path.exists(output_fname), msg log.info('Writing to {}'.format(output_fname)) with open(output_fname, 'w') as f: f.write('Step,LR,Mom,Decay,Loss\n') # Initialize all variables. sess.run(tf.global_variables_initializer()) var_list = tf.global_variables() if pretrain_ckpt is not None: saver.restore(sess, pretrain_ckpt) ckpt = build_checkpoint(var_list) write_op = write_checkpoint(ckpt, var_list) read_op = read_checkpoint(ckpt, var_list) sess.run(write_op) # Progress bar. it = tqdm( six.moves.xrange(num_meta_steps), ncols=0, desc='look_{}_ilr_{:.0e}_decay_{:.0e}'.format( num_steps, init_lr, init_decay)) for run in it: # Stochastic data list makes the SMD converge faster. data_list = [ dataset.next_batch(bsize) for step in six.moves.xrange(num_steps) ] eval_data_list = [ dataset.next_batch(bsize) for step in six.moves.xrange(NUM_TRAIN // bsize) ] # Run meta descent step. cost, hp_dict = meta_step(sess, model, data_list, look_ahead_ops, hp_grad_ops, hp_grads_plh, meta_train_op, eval_data_list) # Early stop if hits NaN. if np.isnan(cost): break # Restore parameters. sess.run(read_op) for hpname, hpval in hp_dict.items(): model.optimizer.assign_hyperparam(sess, hpname, hpval) # Read out hyperparameters in normal parameterization. if negative_momentum: mom = 1 - hp_dict['mom'] else: mom = hp_dict['mom'] if effective_lr: lr = hp_dict['lr'] * (1 - mom) else: lr = hp_dict['lr'] # Write to logs. if output_fname is not None: with open(output_fname, 'a') as f: f.write('{:d},{:f},{:f},{:f},{:f}\n'.format( run, lr, hp_dict['mom'], hp_dict['decay'], cost)) # Log to TensorBoard. exp_logger.log(run, 'lr', lr) exp_logger.log(run, 'decay', hp_dict['decay']) exp_logger.log(run, 'log loss', np.log10(cost)) exp_logger.flush() # Update progress bar. it.set_postfix( lr='{:.3e}'.format(lr), decay='{:.3e}'.format(hp_dict['decay']), loss='{:.3e}'.format(cost)) exp_logger.close()
def main_raxml_runner(op): """ Run pargenes from the parsed arguments op """ start = time.time() output_dir = op.output_dir checkpoint_index = checkpoint.read_checkpoint(output_dir) print("Checkpoint: " + str(checkpoint_index)) if (os.path.exists(output_dir) and not op.do_continue): print( "[Error] The output directory " + output_dir + " already exists. Please use another output directory or run with --continue." ) sys.exit(1) commons.makedirs(output_dir) logs = commons.get_log_file(output_dir, "pargenes_logs") print("Redirecting logs to " + logs) sys.stdout = open(logs, "w") print_header() msas = commons.init_msas(op) timed_print(start, "end of MSAs initializations") scriptdir = os.path.dirname(os.path.realpath(__file__)) modeltest_run_path = os.path.join(output_dir, "modeltest_run") raxml_run_path = os.path.join(output_dir, "mlsearch_run") if (op.scheduler == "onecore"): raxml_library = os.path.join(scriptdir, "..", "raxml-ng", "bin", "raxml-ng") modeltest_library = os.path.join(scriptdir, "..", "modeltest", "bin", "modeltest-ng") else: raxml_library = os.path.join(scriptdir, "..", "raxml-ng", "bin", "raxml-ng-mpi.so") modeltest_library = os.path.join(scriptdir, "..", "modeltest", "build", "src", "modeltest-ng-mpi.so") if (checkpoint_index < 1): raxml.run_parsing_step(msas, raxml_library, op.scheduler, os.path.join(output_dir, "parse_run"), op.cores, op) checkpoint.write_checkpoint(output_dir, 1) timed_print(start, "end of parsing mpi-scheduler run") raxml.analyse_parsed_msas(msas, op, output_dir) if (op.dry_run): print("End of the dry run. Exiting") return 0 timed_print(start, "end of anlysing parsing results") if (op.use_modeltest): if (checkpoint_index < 2): modeltest.run(msas, output_dir, modeltest_library, modeltest_run_path, op) timed_print(start, "end of modeltest mpi-scheduler run") modeltest.parse_modeltest_results(op.modeltest_criteria, msas, output_dir) timed_print(start, "end of parsing modeltest results") # then recompute the binary MSA files to put the correct model, and reevaluate the MSA sizes with the new models shutil.move(os.path.join(output_dir, "parse_run"), os.path.join(output_dir, "old_parse_run")) raxml.run_parsing_step(msas, raxml_library, op.scheduler, os.path.join(output_dir, "parse_run"), op.cores, op) raxml.analyse_parsed_msas(msas, op, output_dir) timed_print(start, "end of the second parsing step") checkpoint.write_checkpoint(output_dir, 2) if (checkpoint_index < 3): raxml.run(msas, op.random_starting_trees, op.parsimony_starting_trees, op.bootstraps, raxml_library, op.scheduler, raxml_run_path, op.cores, op) timed_print(start, "end of mlsearch mpi-scheduler run") checkpoint.write_checkpoint(output_dir, 3) if (op.random_starting_trees + op.parsimony_starting_trees > 1): if (checkpoint_index < 4): raxml.select_best_ml_tree(msas, op) timed_print(start, "end of selecting the best ML tree") checkpoint.write_checkpoint(output_dir, 4) if (op.bootstraps != 0): if (checkpoint_index < 5): bootstraps.concatenate_bootstraps(output_dir, min(16, op.cores)) timed_print(start, "end of bootstraps concatenation") checkpoint.write_checkpoint(output_dir, 5) if (checkpoint_index < 6): bootstraps.run(output_dir, raxml_library, op.scheduler, os.path.join(output_dir, "supports_run"), op.cores, op) timed_print(start, "end of supports mpi-scheduler run") checkpoint.write_checkpoint(output_dir, 6) return 0