def generate_robust_sero_for_train(): total_sequence_length = 512 * 4 src_window_size = 512 encoder = MultiWindow(src_window_size, total_sequence_length) worker_factory = partial(RobustWorker, RobustPointwiseTrainGenEx(encoder, total_sequence_length, "desc")) runner = JobRunner(sydney_working_dir, 4, "RobustSero5", worker_factory) runner.start()
def generate_robust_sero_for_prediction(): total_sequence_length = 512 * 4 src_window_size = 512 - 2 encoder = MultiWindow(src_window_size, total_sequence_length) worker_factory = partial( RobustWorker, RobustPredictGenOld(encoder, total_sequence_length)) runner = JobRunner(sydney_working_dir, 4, "RobustSeroPred4", worker_factory) runner.start()
def generate_robust_sero_for_train(): total_sequence_length = 128 * 16 src_window_size = 128 encoder = MultiWindow(src_window_size, total_sequence_length) worker_factory = partial( RobustWorker, RobustPointwiseTrainGenEx(encoder, total_sequence_length, "desc")) runner = JobRunner(job_man_dir, 4, "RobustSero_128_16", worker_factory) runner.start()
def generate_robust_sero_for_train(): total_sequence_length = 128 * 4 src_window_size = 128 encoder = MultiWindow(src_window_size, total_sequence_length) worker_factory = partial( RobustWorker, RobustPredictGen(encoder, total_sequence_length, 100, "desc")) runner = JobRunner(job_man_dir, 4, "RobustSero5_128_pred", worker_factory) runner.start()
def generate_robust_sero_for_train(): window_size = int(sys.argv[1]) n_window = int(sys.argv[2]) total_sequence_length = window_size * n_window src_window_size = window_size encoder = MultiWindow(src_window_size, total_sequence_length) worker_factory = partial( RobustWorker, RobustPointwiseTrainGenEx(encoder, total_sequence_length, "desc")) runner = JobRunner(job_man_dir, 4, "RobustSero_{}_{}".format(window_size, n_window), worker_factory) runner.auto_runner()
from typing import List, Dict from data_generator.job_runner import JobRunner from epath import job_man_dir from tlm.data_gen.adhoc_datagen import LeadingN, MultiWindow from tlm.data_gen.msmarco_doc_gen.gen_worker import MMDWorker, PointwiseGen, \ FirstPassagePairGenerator from tlm.data_gen.msmarco_doc_gen.processed_resource import ProcessedResource, ProcessedResource10doc, \ ProcessedResource50doc if __name__ == "__main__": split = "train" resource = ProcessedResource(split) total_sequence_length = 512 * 4 src_window_size = 512 encoder = MultiWindow(src_window_size, total_sequence_length) generator = FirstPassagePairGenerator(resource, encoder, total_sequence_length) def factory(out_dir): return MMDWorker(resource.query_group, generator, out_dir) runner = JobRunner(job_man_dir, len(resource.query_group) - 1, "MMD_pair_512_4".format(split), factory) runner.start()