def train(): config = load_config() if check_sub_config(): sub_config = load_sub_config() num_node = sub_config['train']['num_node'] gpu_per_node = sub_config['train'].get('gpu_per_node', None) port = sub_config['train'].get('port', None) else: sub_config = None num_node = 1 num_iter = get_num_train_iter() if num_node > 1: worker_script_list = [ WorkerJobScript(index, num_node, gpu_per_node, port) for index in range(num_iter) ] else: worker_script_list = [ WorkerJobScript(index, num_node) for index in range(num_iter) ] train_script_list = [ TrainJobScript(index, num_node) for index in range(num_iter) ] if sub_config is not None: sub_script = TrainSubScript(train_script_list) else: run_script = TrainRunScript(train_script_list)
def bea19_preproc(): config = load_config() num_iter = config['iter'] if 'preprocess' in config: if 'src_dict_path' in config['preprocess']: first_index = None else: first_index = config['preprocess'].get('first_index', 0) else: first_index = 0 if first_index is not None: first_script = BEA19PreprocessJobScript(first_index, first_index) script_list = [ BEA19PreprocessJobScript(first_index, n) for n in range(num_iter) if n != first_index ] if check_sub_config(): if first_index is not None: first_sub = PreprocessSubScript([first_script], first=True) rest_sub = PreprocessSubScript(script_list) else: if first_index is not None: first_run = PreprocessRunScript([first_script], first=True) rest_run = PReprocessRunScript(script_list)
def mt_prepare(): config = load_config() num_iter = config['iter'] script_list = [MTPrepareJobScript(n) for n in range(num_iter)] if check_sub_config(): sub = PrepareSubScript(script_list) else: run = PrepareRunScript(script_list)
def erg(): config = load_config() num_trials = config['trials'] num_indices = len(config['readied']) script_list = [ ErgJobScript(trial, index) for trial in range(num_trials) for index in range(num_indices) ] if check_sub_config(): sub_erg(script_list) else: run_erg(script_list)
def fix_prepare(): config = load_config() num_expts = config['expt'] num_indices = len(config['readied']) script_list = [ FixedErgPrepareJobScript(expt, index) for expt in range(num_expts) for index in range(num_indices) ] if check_sub_config(): sub(script_list) else: run(script_list)
def mt_preproc(): config = load_config() num_iter = config['iter'] first_index = config['preprocess'].get('first_index', 0) first_script = MTPreprocessJobScript(first_index, first_index) script_list = [ MTPreprocessJobScript(first_index, n) for n in range(num_iter) if n != first_index ] if check_sub_config(): first_sub = PreprocessSubScript([first_script], first=True) rest_sub = PreprocessSubScript(script_list) else: first_run = PreprocessRunScript([first_script], first=True) rest_run = PReprocessRunScript(script_list)
def make(self): if check_sub_config(): self.make_copy() self.make_config() self.make_erg() self.make_form() self.make_bpe() min_len = self.config.get('min_len', 1) max_len = self.config['max_len'] self.append('paste ${SGE_LOCALDIR}/src.txt ${SGE_LOCALDIR}/trg.txt \\') self.append(' | tondi --min-len {} --max-len {} \\'.format( min_len, max_len)) self.append(' | progress \\') self.append(' | pigz -c \\') self.append(' > {}'.format( Path('{}/{}/train.gz'.format(self.trial, self.index)).resolve()))
def fix_preproc(): config = load_config() num_expts = config['expt'] num_iters = config['iter'] num_indices = config['indices'] script_list = [] for expt in range(num_expts): for i in range(num_iters): trial = expt * num_iters + i for index in range(num_indices): script = FixedErgPreprocessJobScript(expt, trial, index) script_list.append(script) if check_sub_config(): sub(script_list) else: run(script_list)
def make(self): if check_sub_config(): self.make_copy() self.make_config() self.make_erg() self.make_form() source_path = Path('{}/{}/source.gz'.format(self.expt, self.index)).resolve() target_path = Path('{}/{}/target.gz'.format(self.expt, self.index)).resolve() self.append('cat ${SGE_LOCALDIR}/formed.txt \\') self.append(' | cut -f 1 \\') self.append(' | pigz -c \\') self.append(' > {} &'.format(source_path)) self.append('cat ${SGE_LOCALDIR}/formed.txt \\') self.append(' | cut -f 2 \\') self.append(' | pigz -c \\') self.append(' > {} &'.format(target_path)) self.append('wait')
def split(): job_script = SplitJobScript() if check_sub_config(): sub_script = SplitSubScript([job_script]) else: run_script = SplitRunScript([job_script])
def rtt_back(): script_list = make_script_list(RTTBackJobScript) if check_sub_config(): rtt_sub_translation(RTTBackSubScript, script_list) else: run_script = RTTBackRunScript(script_list)
def rtt_fore(): script_list = make_script_list(RTTForeJobScript) if check_sub_config(): rtt_sub_translation(RTTForeSubScript, script_list) else: run_script = RTTForeRunScript(script_list)