示例#1
0
    def __init__(
        self,
        crp,
        feature_flow,
        feature_scorer,
        denominator_path,
        alignment_options=None,
        short_pauses=None,
        use_gpu=False,
        rtf=40,
        mem=4,  # TODO check requirements
        extra_config=None,
        extra_post_config=None,
    ):

        self.set_vis_name("Accuracy Lattice")

        kwargs = locals()
        del kwargs["self"]

        self.config, self.post_config = self.create_config(**kwargs)
        self.alignment_flow = self.create_flow(**kwargs)
        self.exe = self.select_exe(crp.lattice_processor_exe,
                                   "lattice-processor")
        self.concurrent = crp.concurrent
        self.use_gpu = use_gpu

        self.log_file = self.log_file_output_path("create-accuracy", crp, True)
        self.single_lattice_caches = {
            task_id: self.output_path("accuracy.%d" % task_id, cached=True)
            for task_id in range(1, crp.concurrent + 1)
        }
        self.lattice_bundle = self.output_path("accuracy.bundle", cached=True)
        self.lattice_path = util.MultiOutputPath(self,
                                                 "accuracy.$(TASK)",
                                                 self.single_lattice_caches,
                                                 cached=True)
        self.single_segmentwise_alignment_caches = {
            task_id: self.output_path("segmentwise-alignment.%d" % task_id,
                                      cached=True)
            for task_id in range(1, crp.concurrent + 1)
        }
        self.segmentwise_alignment_bundle = self.output_path(
            "segmentwise-alignment.bundle", cached=True)
        self.segmentwise_alignment_path = util.MultiOutputPath(
            self,
            "segmentwise-alignment.$(TASK)",
            self.single_segmentwise_alignment_caches,
            cached=True,
        )

        self.rqmt = {
            "time": max(crp.corpus_duration * rtf / crp.concurrent, 0.5),
            "cpu": 2,
            "gpu": 1 if self.use_gpu else 0,
            "mem": mem,
        }
示例#2
0
    def __init__(self, crp,
                 feature_flow, label_scorer, alignment_options, word_boundaries=False,
                 align_node_options={},
                 use_gpu=False, rtf=1.0, rasr_exe=None,
                 extra_config=None, extra_post_config=None):
        """
        :param recipe.rasr.csp.CommonSprintParameters crp:
        :param feature_flow:
        :param rasr.FeatureScorer feature_scorer:
        :param dict[str] alignment_options:
        :param bool word_boundaries:
        :param bool label_aligner:
        :param recipe.rasr.LabelScorer label_scorer:
        :param dict[str] align_node_options:
        :param bool use_gpu:
        :param float rtf:
        :param extra_config:
        :param extra_post_config:
        """

        assert label_scorer is not None, 'need label scorer for label aligner'
        self.set_vis_name('Alignment')

        kwargs = locals()
        del kwargs['self']

        self.config, self.post_config = LabelAlignmentJob.create_config(**kwargs)
        self.alignment_flow           = LabelAlignmentJob.create_flow(**kwargs)
        self.concurrent               = crp.concurrent
        if rasr_exe is None:
            rasr_exe = crp.acoustic_model_trainer_exe
        self.exe                      = self.select_exe(rasr_exe, 'acoustic-model-trainer')
        self.use_gpu                  = use_gpu
        self.word_boundaries          = word_boundaries

        self.out_log_file                = self.log_file_output_path('alignment', crp, True)
        self.out_single_alignment_caches = dict((i, self.output_path('alignment.cache.%d' % i, cached=True)) for i in range(1, self.concurrent + 1))
        self.out_alignment_path          = util.MultiOutputPath(self, 'alignment.cache.$(TASK)', self.out_single_alignment_caches, cached=True)
        self.out_alignment_bundle        = self.output_path('alignment.cache.bundle', cached=True)

        if self.word_boundaries:
            self.single_word_boundary_caches = dict((i, self.output_path('word_boundary.cache.%d' % i, cached=True)) for i in range(1, self.concurrent + 1))
            self.word_boundary_path          = util.MultiOutputPath(self, 'word_boundary.cache.$(TASK)', self.single_word_boundary_caches, cached=True)
            self.word_boundary_bundle        = self.output_path('word_boundary.cache.bundle', cached=True)

        self.rqmt =  { 'time' : max(rtf * crp.corpus_duration / crp.concurrent, .5),
                       'cpu'  : 1,
                       'gpu'  : 1 if self.use_gpu else 0,
                       'mem'  : 2 }
示例#3
0
    def __init__(
        self,
        crp,
        feature_flow,
        label_tree,
        label_scorer,
        search_parameters=None,
        lm_lookahead=True,
        lookahead_options=None,
        eval_single_best=True,
        eval_best_in_lattice=True,
        use_gpu=False,
        rtf=2,
        mem=8,
        hard_rqmt=False,
        extra_config=None,
        extra_post_config=None,
        sprint_exe=None,  # allow separat executable than default settings
        lm_gc_job=None,
        lm_gc_job_local=False,
        lm_gc_job_mem=2,
        lm_gc_job_default_search=False,
    ):  # TODO set this to true later
        self.set_vis_name("Label Synchronized Search")
        kwargs = locals()
        del kwargs["self"]

        self.config, self.post_config = LabelSyncSearchJob.create_config(
            **kwargs)
        self.feature_flow = feature_flow
        if sprint_exe is None:
            sprint_exe = crp.flf_tool_exe
        self.exe = self.select_exe(sprint_exe, "flf-tool")
        self.concurrent = crp.concurrent
        self.use_gpu = use_gpu

        self.out_log_file = self.log_file_output_path("search", crp, True)

        self.out_single_lattice_caches = dict(
            (task_id,
             self.output_path("lattice.cache.%d" % task_id, cached=True))
            for task_id in range(1, crp.concurrent + 1))
        self.out_lattice_bundle = self.output_path("lattice.bundle",
                                                   cached=True)
        self.out_lattice_path = util.MultiOutputPath(
            self,
            "lattice.cache.$(TASK)",
            self.out_single_lattice_caches,
            cached=True)

        self.rqmt = {
            "time": max(crp.corpus_duration * rtf / crp.concurrent, 4.5),
            "cpu": 3,
            "gpu": 1 if self.use_gpu else 0,
            "mem": mem,
        }
        # no automatic resume with doubled rqmt
        self.hard_rqmt = hard_rqmt
示例#4
0
    def __init__(
        self,
        crp,
        feature_flow,
        feature_scorer,
        search_parameters=None,
        lm_lookahead=True,
        lookahead_options=None,
        use_gpu=False,
        rtf=30,
        mem=4,
        model_combination_config=None,
        model_combination_post_config=None,
        extra_config=None,
        extra_post_config=None,
    ):

        assert isinstance(feature_scorer, rasr.FeatureScorer)

        self.set_vis_name("Raw Denominator Lattice")

        kwargs = locals()
        del kwargs["self"]

        self.config, self.post_config = self.create_config(**kwargs)
        self.feature_flow = feature_flow
        self.exe = self.select_exe(crp.speech_recognizer_exe,
                                   "speech-recognizer")
        self.concurrent = crp.concurrent
        self.use_gpu = use_gpu

        self.log_file = self.log_file_output_path("create-raw-denominator",
                                                  crp, True)
        self.single_lattice_caches = {
            task_id: self.output_path("raw-denominator.%d" % task_id,
                                      cached=True)
            for task_id in range(1, crp.concurrent + 1)
        }
        self.lattice_bundle = self.output_path("raw-denominator.bundle",
                                               cached=True)
        self.lattice_path = util.MultiOutputPath(self,
                                                 "raw-denominator.$(TASK)",
                                                 self.single_lattice_caches,
                                                 cached=True)

        self.rqmt = {
            "time": max(crp.corpus_duration * rtf / crp.concurrent, 0.5),
            "cpu": 2,
            "gpu": 1 if self.use_gpu else 0,
            "mem": mem,
        }
    def __init__(
        self,
        crp,
        feature_flow,
        feature_scorer,
        lattice_cache,
        *,
        global_scale=1.0,
        confidence_threshold=0.75,
        weight_scale=1.0,
        ref_alignment_path=None,
        use_gpu=False,
        rtf=0.5,
        extra_config=None,
        extra_post_config=None,
    ):
        assert isinstance(feature_scorer, rasr.FeatureScorer)

        self.set_vis_name("Confidence-based alignment")

        kwargs = locals()
        del kwargs["self"]

        self.config, self.post_config = ConfidenceBasedAlignmentJob.create_config(
            **kwargs)
        self.alignment_flow = ConfidenceBasedAlignmentJob.create_flow(**kwargs)
        self.concurrent = crp.concurrent
        self.exe = self.select_exe(crp.acoustic_model_trainer_exe,
                                   "acoustic-model-trainer")
        self.feature_scorer = feature_scorer
        self.use_gpu = use_gpu

        self.out_log_file = self.log_file_output_path("alignment", crp, True)
        self.out_single_alignment_caches = dict(
            (i, self.output_path("alignment.cache.%d" % i, cached=True))
            for i in range(1, self.concurrent + 1))
        self.out_alignment_path = util.MultiOutputPath(
            self,
            "alignment.cache.$(TASK)",
            self.out_single_alignment_caches,
            cached=True,
        )
        self.out_alignment_bundle = self.output_path("alignment.cache.bundle",
                                                     cached=True)

        self.rqmt = {
            "time": max(rtf * crp.corpus_duration / crp.concurrent, 0.5),
            "cpu": 1,
            "gpu": 1 if self.use_gpu else 0,
            "mem": 2,
        }
示例#6
0
    def __init__(
        self,
        crp,
        feature_energy_flow,
        minimum_segment_length=0,
        maximum_segment_length=6000,
        iterations=1,
        penalty=0,
        minimum_speech_proportion=0.7,
        save_alignment=False,
        keep_accumulators=False,
        extra_merge_args=None,
        extra_config=None,
        extra_post_config=None,
    ):
        self.set_vis_name("Linear Alignment")

        kwargs = locals()
        del kwargs["self"]

        super().__init__(**LinearAlignmentJob.merge_args(**kwargs))

        self.config, self.post_config = LinearAlignmentJob.create_config(
            **kwargs)
        self.linear_alignment_flow = LinearAlignmentJob.create_flow(**kwargs)
        self.exe = self.select_exe(crp.acoustic_model_trainer_exe,
                                   "acoustic-model-trainer")
        self.concurrent = crp.concurrent
        self.save_alignment = save_alignment
        self.keep_accumulators = keep_accumulators

        self.out_log_file = self.log_file_output_path("accumulate", crp, True)
        if save_alignment:
            self.single_alignment_caches = dict(
                (i, self.output_path("alignment.cache.%d" % i, cached=True))
                for i in range(1, self.concurrent + 1))
            self.out_alignment_path = util.MultiOutputPath(
                self,
                "alignment.cache.$(TASK)",
                self.single_alignment_caches,
                cached=True,
            )
            self.out_alignment_bundle = self.output_path(
                "alignment.cache.bundle")

        self.accumulate_rqmt = {
            "time": max(crp.corpus_duration / (20.0 * self.concurrent), 0.5),
            "cpu": 1,
            "mem": 1,
        }
示例#7
0
    def __init__(
        self,
        crp,
        feature_flow,
        feature_scorer,
        alignment_options=None,
        use_gpu=False,
        rtf=10.0,
        extra_config=None,
        extra_post_config=None,
    ):

        assert isinstance(feature_scorer, rasr.FeatureScorer)

        self.set_vis_name("NumeratorLattice")

        kwargs = locals()
        del kwargs["self"]

        self.config, self.post_config = NumeratorLatticeJob.create_config(
            **kwargs)
        self.alignment_flow = NumeratorLatticeJob.create_flow(**kwargs)
        self.exe = self.select_exe(crp.acoustic_model_trainer_exe,
                                   "acoustic-model-trainer")
        self.concurrent = crp.concurrent
        self.feature_scorer = feature_scorer
        self.use_gpu = use_gpu

        self.log_file = self.log_file_output_path("create-numerator", crp,
                                                  True)
        self.single_lattice_caches = {
            i: self.output_path("numerator.%d" % i, cached=True)
            for i in range(1, self.concurrent + 1)
        }
        self.lattice_path = util.MultiOutputPath(self,
                                                 "numerator.$(TASK)",
                                                 self.single_lattice_caches,
                                                 cached=True)
        self.lattice_bundle = self.output_path("numerator.bundle", cached=True)

        self.rqmt = {
            "time": max(rtf * crp.corpus_duration / crp.concurrent, 0.5),
            "cpu": 2,
            "gpu": 1 if self.use_gpu else 0,
            "mem": 2,
        }
示例#8
0
    def __init__(
        self,
        crp,
        t_matrix,
        ubm,
        features,
        alignment,
        allophones,
        dim,
        allophones_to_ignore,
        length_norm=True,
        rqmt=None,
    ):
        """
        :param crp: (CommonRasrParameters) need for concurrency
        :param t_matrix: (HDF5File) IVectorTrainingJob.t_matrix, contains learned ubm and JFA
        :param ubm: (Path) to UBM trained with ubm.TrainWarpingFactorsSequence
        :param features: system.feature_caches['corpus']['mfcc'].hidden_paths; gone feature.cache file per i-vector, good features are mfcc, plp
        :param alignment: system.alignments['corpus'][''].alternatives['task_dependent'].hidden_paths; one alignment.cache file per i-vector
        :param allophones: system.allophone_files['base']
        :param dim: (int) dimension of the i-vec, usually between 50-400
        :param allophones_to_ignore: list(string) ['laughs', 'noise', 'sil', 'inaudible', 'spn']
        :param length_norm: (bool) normalize i-vector to unit length
        :param rqmt:
        """
        self.crp = crp
        self.ubm = ubm
        self.t_matrix = t_matrix
        self.features = features
        self.alignment = alignment
        self.allophones = allophones
        self.dim = dim
        self.allophones_to_ignore = allophones_to_ignore
        self.length_norm = length_norm

        self.concurrent = crp.concurrent
        self.rqmt = rqmt if rqmt else {"time": 1, "cpu": 1, "gpu": 0, "mem": 1}

        self.single_ivec_caches = dict(
            (i, self.output_path("ivec.%d" % i, cached=True))
            for i in range(1, self.concurrent + 1))
        self.ivec_path = util.MultiOutputPath(self,
                                              "ivec.$(TASK)",
                                              self.single_ivec_caches,
                                              cached=True)
示例#9
0
    def __init__(
        self,
        crp,
        lattice_path,
        pruning_threshold=100,
        phone_coverage=0,
        nonword_phones="[*",
        max_arcs_per_second=50000,
        max_arcs_per_segment=1000000,
        output_format="flf",
        pronunciation_scale=None,
        extra_config=None,
        extra_post_config=None,
    ):
        self.set_vis_name("Lattice Pruning")

        kwargs = locals()
        del kwargs["self"]

        self.config, self.post_config = self.create_config(**kwargs)
        self.exe = self.select_exe(crp.flf_tool_exe, "flf-tool")
        self.concurrent = crp.concurrent

        self.out_log_file = self.log_file_output_path("pruning", crp, True)
        self.out_single_lattice_caches = dict(
            (
                task_id,
                self.output_path("pruned_lattice.cache.%d" % task_id, cached=True),
            )
            for task_id in range(1, crp.concurrent + 1)
        )
        self.out_lattice_bundle = self.output_path("pruned_lattice.bundle", cached=True)
        self.out_lattice_path = util.MultiOutputPath(
            self,
            "pruned_lattice.cache.$(TASK)",
            self.out_single_lattice_caches,
            cached=True,
        )

        self.rqmt = {
            "time": max(crp.corpus_duration * 0.2 / crp.concurrent, 0.5),
            "cpu": 1,
            "gpu": 0,
            "mem": 2.0,
        }
示例#10
0
    def __init__(
        self,
        crp,
        ubm,
        features,
        alignment,
        allophones,
        dim,
        allophones_to_ignore,
        iter=10,
        rqmt=None,
    ):
        """
        :param crp: (CommonRasrParameters) need for concurrency
        :param ubm: (Path) to UBM trained with ubm.TrainWarpingFactorsSequence
        :param features: system.feature_caches['corpus']['mfcc'].hidden_paths; gone feature.cache file per i-vector, good features are mfcc, plp
        :param alignment: system.alignments['corpus'][''].alternatives['task_dependent'].hidden_paths; one alignment.cache file per i-vector
        :param allophones: system.allophone_files['base']
        :param dim: (int) dimension of the i-vec, usually between 50-400
        :param allophones_to_ignore: list(string) ['laughs', 'noise', 'sil', 'inaudible', 'spn']
        :param iter: (int) number of em iterations during ivector training
        :param rqmt:
        """
        self.crp = crp
        self.ubm = ubm
        self.features = features
        self.alignment = alignment
        self.allophones = allophones
        self.dim = dim
        self.allophones_to_ignore = allophones_to_ignore
        self.iter = iter

        self.concurrent = crp.concurrent
        self.rqmt = rqmt if rqmt else {"time": 1, "cpu": 1, "gpu": 0, "mem": 1}

        self.single_accu_caches = dict(
            (i, self.output_path("accu.%d" % i, cached=True))
            for i in range(1, self.concurrent + 1))
        self.accu_path = util.MultiOutputPath(self,
                                              "accu.$(TASK)",
                                              self.single_accu_caches,
                                              cached=True)
        self.t_matrix = self.output_path("t.matrix")
示例#11
0
    def __init__(
        self,
        crp,
        raw_denominator_path,
        numerator_path,
        use_gpu=False,
        rtf=1,
        mem=4,  # TODO check requirements
        search_options=None,
        extra_config=None,
        extra_post_config=None,
    ):

        self.set_vis_name("Denominator Lattice")

        kwargs = locals()
        del kwargs["self"]

        self.config, self.post_config = self.create_config(**kwargs)
        self.exe = self.select_exe(crp.lattice_processor_exe,
                                   "lattice-processor")
        self.concurrent = crp.concurrent
        self.use_gpu = use_gpu

        self.log_file = self.log_file_output_path("create-denominator", crp,
                                                  True)
        self.single_lattice_caches = {
            task_id: self.output_path("denominator.%d" % task_id, cached=True)
            for task_id in range(1, crp.concurrent + 1)
        }
        self.lattice_bundle = self.output_path("denominator.bundle",
                                               cached=True)
        self.lattice_path = util.MultiOutputPath(self,
                                                 "denominator.$(TASK)",
                                                 self.single_lattice_caches,
                                                 cached=True)

        self.rqmt = {
            "time": max(crp.corpus_duration * rtf / crp.concurrent, 0.5),
            "cpu": 2,
            "gpu": 1 if self.use_gpu else 0,
            "mem": mem,
        }
示例#12
0
    def __init__(
        self,
        crp,
        lattice_path,
        lm_scale,
        pron_scale=1.0,
        write_cn=False,
        extra_config=None,
        extra_post_config=None,
    ):
        self.set_vis_name("CN decoding")

        kwargs = locals()
        del kwargs["self"]

        self.config, self.post_config = self.create_config(**kwargs)
        self.exe = self.select_exe(crp.flf_tool_exe, "flf-tool")
        self.concurrent = crp.concurrent
        self.write_cn = write_cn

        self.out_log_file = self.log_file_output_path("cn_decoding", crp, True)
        self.out_single_lattice_caches = dict((
            task_id,
            self.output_path("confusion_lattice.cache.%d" % task_id,
                             cached=True),
        ) for task_id in range(1, crp.concurrent + 1))
        self.out_ctm_file = self.output_path("lattice.ctm")
        if self.write_cn:
            self.out_lattice_bundle = self.output_path(
                "confusion_lattice.bundle", cached=True)
            self.out_lattice_path = util.MultiOutputPath(
                self,
                "confusion_lattice.cache.$(TASK)",
                self.out_single_lattice_caches,
                cached=True,
            )

        self.rqmt = {
            "time": max(crp.corpus_duration * 0.2 / crp.concurrent, 0.5),
            "cpu": 1,
            "gpu": 0,
            "mem": 2.0,
        }
示例#13
0
    def __init__(
        self,
        crp,
        feature_flow,
        original_alignment,
        extra_config=None,
        extra_post_config=None,
    ):
        self.set_vis_name("Dump Alignment")

        kwargs = locals()
        del kwargs["self"]

        self.config, self.post_config = DumpAlignmentJob.create_config(
            **kwargs)
        self.dump_flow = DumpAlignmentJob.create_flow(**kwargs)
        self.exe = self.select_exe(crp.acoustic_model_trainer_exe,
                                   "acoustic-model-trainer")
        self.concurrent = crp.concurrent

        self.out_log_file = self.log_file_output_path("dump", crp, True)
        self.out_single_alignment_caches = dict(
            (i, self.output_path("alignment.cache.%d" % i, cached=True))
            for i in range(1, self.concurrent + 1))
        self.out_alignment_path = util.MultiOutputPath(
            self,
            "alignment.cache.$(TASK)",
            self.out_single_alignment_caches,
            cached=True,
        )
        self.out_alignment_bundle = self.output_path("alignment.cache.bundle",
                                                     cached=True)

        self.rqmt = {
            "time": max(crp.corpus_duration / (50.0 * crp.concurrent), 0.5),
            "cpu": 1,
            "mem": 1,
        }
示例#14
0
    def __init__(
        self,
        crp,
        timestamp_flow,
        *,
        samples_flow=None,
        min_length=0.5,
        timestamp_port="features",
        extract_concurrent=4,
        rtf=0.1,
        mem=2.0,
        extra_dump_config=None,
        extra_dump_post_config=None,
        extra_convert_config=None,
        extra_convert_post_config=None,
    ):
        kwargs = locals()
        del kwargs["self"]

        self.min_length = min_length
        self.extract_concurrent = extract_concurrent
        self.dump_config, self.dump_post_config = self.create_dump_config(
            **kwargs)
        self.dump_flow = self.create_dump_flow(**kwargs)
        self.convert_config, self.convert_post_config = self.create_convert_config(
            **kwargs)
        self.convert_flow = self.create_convert_flow(**kwargs)
        self.exe = (crp.feature_extraction_exe if crp.feature_extraction_exe
                    is not None else self.default_exe("feature-extraction"))
        self.concurrent = crp.concurrent

        self.out_dump_log_file = self.log_file_output_path("dump", crp, True)
        self.out_convert_log_file = self.log_file_output_path(
            "convert", crp, True)
        self.out_single_feature_caches = dict(
            (task_id, self.output_path("tone.cache.%d" % task_id, cached=True))
            for task_id in range(1, crp.concurrent + 1))
        self.out_feature_bundle = self.output_path("tone.cache.bundle",
                                                   cached=True)
        self.out_feature_path = util.MultiOutputPath(
            self,
            "tone.cache.$(TASK)",
            self.out_single_feature_caches,
            cached=True)

        self.dump_rqmt = {
            "time": max(crp.corpus_duration * rtf / crp.concurrent, 0.5),
            "cpu": 1,
            "mem": mem,
        }
        self.extract_pitch_rqmt = {
            "time": max(crp.corpus_duration * rtf / self.extract_concurrent,
                        0.5),
            "cpu": extract_concurrent,
            "mem": mem,
        }
        self.convert_rqmt = {
            "time": max(crp.corpus_duration * rtf / crp.concurrent, 0.5),
            "cpu": 1,
            "mem": mem,
        }
示例#15
0
    def __init__(
        self,
        crp,
        feature_flow,
        feature_scorer,
        alignment_options=None,
        word_boundaries=False,
        use_gpu=False,
        rtf=1.0,
        extra_config=None,
        extra_post_config=None,
    ):
        """
        :param rasr.crp.CommonRasrParameters crp:
        :param feature_flow:
        :param rasr.FeatureScorer feature_scorer:
        :param dict[str] alignment_options:
        :param bool word_boundaries:
        :param bool use_gpu:
        :param float rtf:
        :param extra_config:
        :param extra_post_config:
        """
        assert isinstance(feature_scorer, rasr.FeatureScorer)

        self.set_vis_name("Alignment")

        kwargs = locals()
        del kwargs["self"]

        self.config, self.post_config = AlignmentJob.create_config(**kwargs)
        self.alignment_flow = AlignmentJob.create_flow(**kwargs)
        self.concurrent = crp.concurrent
        self.exe = self.select_exe(crp.acoustic_model_trainer_exe,
                                   "acoustic-model-trainer")
        self.feature_scorer = feature_scorer
        self.use_gpu = use_gpu
        self.word_boundaries = word_boundaries

        self.out_log_file = self.log_file_output_path("alignment", crp, True)
        self.out_single_alignment_caches = dict(
            (i, self.output_path("alignment.cache.%d" % i, cached=True))
            for i in range(1, self.concurrent + 1))
        self.out_alignment_path = util.MultiOutputPath(
            self,
            "alignment.cache.$(TASK)",
            self.out_single_alignment_caches,
            cached=True,
        )
        self.out_alignment_bundle = self.output_path("alignment.cache.bundle",
                                                     cached=True)
        if self.word_boundaries:
            self.out_single_word_boundary_caches = dict(
                (i,
                 self.output_path("word_boundary.cache.%d" % i, cached=True))
                for i in range(1, self.concurrent + 1))
            self.out_word_boundary_path = util.MultiOutputPath(
                self,
                "word_boundary.cache.$(TASK)",
                self.out_single_word_boundary_caches,
                cached=True,
            )
            self.out_word_boundary_bundle = self.output_path(
                "word_boundary.cache.bundle", cached=True)

        self.rqmt = {
            "time": max(rtf * crp.corpus_duration / crp.concurrent, 0.5),
            "cpu": 1,
            "gpu": 1 if self.use_gpu else 0,
            "mem": 2,
        }