def train(): config = load_config() if check_sub_config(): sub_config = load_sub_config() num_node = sub_config['train']['num_node'] gpu_per_node = sub_config['train'].get('gpu_per_node', None) port = sub_config['train'].get('port', None) else: sub_config = None num_node = 1 num_iter = get_num_train_iter() if num_node > 1: worker_script_list = [ WorkerJobScript(index, num_node, gpu_per_node, port) for index in range(num_iter) ] else: worker_script_list = [ WorkerJobScript(index, num_node) for index in range(num_iter) ] train_script_list = [ TrainJobScript(index, num_node) for index in range(num_iter) ] if sub_config is not None: sub_script = TrainSubScript(train_script_list) else: run_script = TrainRunScript(train_script_list)
def get_arch_list(): config = load_config() if 'arch_list' in config['rerank']: arch_list = config['rerank']['arch_list'] else: arch_list = ['roberta_large'] return arch_list
def bea19_preproc(): config = load_config() num_iter = config['iter'] if 'preprocess' in config: if 'src_dict_path' in config['preprocess']: first_index = None else: first_index = config['preprocess'].get('first_index', 0) else: first_index = 0 if first_index is not None: first_script = BEA19PreprocessJobScript(first_index, first_index) script_list = [ BEA19PreprocessJobScript(first_index, n) for n in range(num_iter) if n != first_index ] if check_sub_config(): if first_index is not None: first_sub = PreprocessSubScript([first_script], first=True) rest_sub = PreprocessSubScript(script_list) else: if first_index is not None: first_run = PreprocessRunScript([first_script], first=True) rest_run = PReprocessRunScript(script_list)
def get_attributes(phase): config = load_config() src_lang = config['generate']['source_lang'] trg_lang = config['generate']['target_lang'] dataset = config['generate']['dataset'] dataset_name = config['generate']['{}_dataset'.format(phase)] return src_lang, trg_lang, dataset, dataset_name
def mt_prepare(): config = load_config() num_iter = config['iter'] script_list = [MTPrepareJobScript(n) for n in range(num_iter)] if check_sub_config(): sub = PrepareSubScript(script_list) else: run = PrepareRunScript(script_list)
def __init__(self): super().__init__() self.config = load_config() self.path = self.make_path() self.prepare() self.header() self.make() self.footer() self.make_dir() self.save()
def get_num_train_iter(): config = load_config() if 'iter' in config: num_iter = config['num_iter'] elif 'data_indices' in config: num_iter = len(config['data_indices']) elif 'seed_list' in config['train']: num_iter = len(config['train']['seed_list']) else: assert False return num_iter
def fix_prepare(): config = load_config() num_expts = config['expt'] num_indices = len(config['readied']) script_list = [ FixedErgPrepareJobScript(expt, index) for expt in range(num_expts) for index in range(num_indices) ] if check_sub_config(): sub(script_list) else: run(script_list)
def erg(): config = load_config() num_trials = config['trials'] num_indices = len(config['readied']) script_list = [ ErgJobScript(trial, index) for trial in range(num_trials) for index in range(num_indices) ] if check_sub_config(): sub_erg(script_list) else: run_erg(script_list)
def mt_preproc(): config = load_config() num_iter = config['iter'] first_index = config['preprocess'].get('first_index', 0) first_script = MTPreprocessJobScript(first_index, first_index) script_list = [ MTPreprocessJobScript(first_index, n) for n in range(num_iter) if n != first_index ] if check_sub_config(): first_sub = PreprocessSubScript([first_script], first=True) rest_sub = PreprocessSubScript(script_list) else: first_run = PreprocessRunScript([first_script], first=True) rest_run = PReprocessRunScript(script_list)
def fix_preproc(): config = load_config() num_expts = config['expt'] num_iters = config['iter'] num_indices = config['indices'] script_list = [] for expt in range(num_expts): for i in range(num_iters): trial = expt * num_iters + i for index in range(num_indices): script = FixedErgPreprocessJobScript(expt, trial, index) script_list.append(script) if check_sub_config(): sub(script_list) else: run(script_list)
def get_bridge_language_list(): config = load_config() return list(config['bridges'])
def get_num_groups(): config = load_config() return config['groups']
def ready(): config = load_config() num_iter = len(config['input_list']) script_list = [ReadyJobScript(index) for index in range(num_iter)] generate_run(script_list, ReadyRunScript, ReadySubScript)
def get_lambda_list(): config = load_config() return config['rerank']['lambda']
def tokenize(): config = load_config() num_iter = config['iter'] script_list = [TokenizeJobScript(index) for index in range(num_iter)] generate_run(script_list, TokenizeRunScript, TokenizeSubScript)
def get_rtt_segments(): config = load_config() return [x for x in range(config['segments'])]
def get_rtt_indices(): config = load_config() return [x for x in range(config['indices'])]
def get_num_translation_groups(): config = load_config() return config['translation_groups']
def bea19_prepare(): config = load_config() num_iter = config['iter'] script_list = [BEA19PrepareJobScript(n) for n in range(num_iter)] generate_run(script_list, PrepareRunScript, BEA19PrepareSubScript)