Пример #1
0
def generate_robust_sero_for_train():
    total_sequence_length = 512 * 4
    src_window_size = 512
    encoder = MultiWindow(src_window_size, total_sequence_length)
    worker_factory = partial(RobustWorker, RobustPointwiseTrainGenEx(encoder, total_sequence_length, "desc"))
    runner = JobRunner(sydney_working_dir, 4, "RobustSero5", worker_factory)
    runner.start()
Пример #2
0
def generate_robust_sero_for_prediction():
    total_sequence_length = 512 * 4
    src_window_size = 512 - 2
    encoder = MultiWindow(src_window_size, total_sequence_length)
    worker_factory = partial(
        RobustWorker, RobustPredictGenOld(encoder, total_sequence_length))
    runner = JobRunner(sydney_working_dir, 4, "RobustSeroPred4",
                       worker_factory)
    runner.start()
Пример #3
0
def generate_robust_sero_for_train():
    total_sequence_length = 128 * 16
    src_window_size = 128
    encoder = MultiWindow(src_window_size, total_sequence_length)
    worker_factory = partial(
        RobustWorker,
        RobustPointwiseTrainGenEx(encoder, total_sequence_length, "desc"))
    runner = JobRunner(job_man_dir, 4, "RobustSero_128_16", worker_factory)
    runner.start()
Пример #4
0
def generate_robust_sero_for_train():
    total_sequence_length = 128 * 4
    src_window_size = 128
    encoder = MultiWindow(src_window_size, total_sequence_length)
    worker_factory = partial(
        RobustWorker,
        RobustPredictGen(encoder, total_sequence_length, 100, "desc"))
    runner = JobRunner(job_man_dir, 4, "RobustSero5_128_pred", worker_factory)
    runner.start()
Пример #5
0
def generate_robust_sero_for_train():
    window_size = int(sys.argv[1])
    n_window = int(sys.argv[2])
    total_sequence_length = window_size * n_window
    src_window_size = window_size
    encoder = MultiWindow(src_window_size, total_sequence_length)
    worker_factory = partial(
        RobustWorker,
        RobustPointwiseTrainGenEx(encoder, total_sequence_length, "desc"))
    runner = JobRunner(job_man_dir, 4,
                       "RobustSero_{}_{}".format(window_size,
                                                 n_window), worker_factory)
    runner.auto_runner()
Пример #6
0
from typing import List, Dict

from data_generator.job_runner import JobRunner
from epath import job_man_dir
from tlm.data_gen.adhoc_datagen import LeadingN, MultiWindow
from tlm.data_gen.msmarco_doc_gen.gen_worker import MMDWorker, PointwiseGen, \
    FirstPassagePairGenerator
from tlm.data_gen.msmarco_doc_gen.processed_resource import ProcessedResource, ProcessedResource10doc, \
    ProcessedResource50doc

if __name__ == "__main__":
    split = "train"
    resource = ProcessedResource(split)
    total_sequence_length = 512 * 4
    src_window_size = 512

    encoder = MultiWindow(src_window_size, total_sequence_length)

    generator = FirstPassagePairGenerator(resource, encoder,
                                          total_sequence_length)

    def factory(out_dir):
        return MMDWorker(resource.query_group, generator, out_dir)

    runner = JobRunner(job_man_dir,
                       len(resource.query_group) - 1,
                       "MMD_pair_512_4".format(split), factory)
    runner.start()