Пример #1
0
    def plp_features(self, corpus, num_deriv=2, num_features=23, **kwargs):
        """
        :param str corpus:
        :param int num_deriv:
        :param int num_features:
        :param kwargs:
        :return:
        """
        self.jobs[corpus]["plp_features"] = f = features.PlpJob(
            self.crp[corpus], **kwargs)
        f.add_alias("%s_plp_features" % corpus)
        self.feature_caches[corpus]["plp"] = f.out_feature_path["plp"]
        self.feature_bundles[corpus]["plp"] = f.out_feature_bundle["plp"]

        feature_path = rasr.FlagDependentFlowAttribute(
            "cache_mode",
            {
                "task_dependent": self.feature_caches[corpus]["plp"],
                "bundle": self.feature_bundles[corpus]["plp"],
            },
        )
        self.feature_flows[corpus]["plp"] = features.basic_cache_flow(
            feature_path)
        self.feature_flows[corpus]["plp+deriv"] = features.add_derivatives(
            self.feature_flows[corpus]["plp"], num_deriv)
        if num_features is not None:
            self.feature_flows[corpus]["plp+deriv"] = features.select_features(
                self.feature_flows[corpus]["plp+deriv"],
                "0-%d" % (num_features - 1))
        self.feature_flows[corpus]["uncached_plp"] = f.feature_flow
Пример #2
0
 def vtln_features(self, name, corpus, raw_feature_flow, warping_map,
                   **kwargs):
     """
     :param str name:
     :param str corpus:
     :param rasr.FlagDependentFlowAttribute raw_feature_flow:
     :param tk.Path warping_map:
     :param kwargs:
     :return:
     """
     name = "%s+vtln" % name
     self.jobs[corpus]["%s_features" % name] = f = vtln.VTLNFeaturesJob(
         self.crp[corpus], raw_feature_flow, warping_map, **kwargs)
     self.feature_caches[corpus][name] = f.out_feature_path["vtln"]
     self.feature_bundles[corpus][name] = f.out_feature_bundle["vtln"]
     feature_path = rasr.FlagDependentFlowAttribute(
         "cache_mode",
         {
             "task_dependent": self.feature_caches[corpus][name],
             "bundle": self.feature_bundles[corpus][name],
         },
     )
     self.feature_flows[corpus][name] = features.basic_cache_flow(
         feature_path)
     self.feature_flows[corpus]["uncached_" + name] = f.feature_flow
Пример #3
0
    def gt_features(self, corpus, prefix="", **kwargs):
        """
        :param str corpus:
        :param str prefix:
        :param kwargs:
        :return:
        """
        self.jobs[corpus]["gt_features"] = f = features.GammatoneJob(
            self.crp[corpus], **kwargs)
        if "gt_options" in kwargs and "channels" in kwargs.get("gt_options"):
            f.add_alias(
                "%s%s_gt_%i_features" %
                (prefix, corpus, kwargs.get("gt_options").get("channels")))
        else:
            f.add_alias("%s%s_gt_features" % (prefix, corpus))
        self.feature_caches[corpus]["gt"] = f.out_feature_path["gt"]
        self.feature_bundles[corpus]["gt"] = f.out_feature_bundle["gt"]

        feature_path = rasr.FlagDependentFlowAttribute(
            "cache_mode",
            {
                "task_dependent": self.feature_caches[corpus]["gt"],
                "bundle": self.feature_bundles[corpus]["gt"],
            },
        )
        self.feature_flows[corpus]["gt"] = features.basic_cache_flow(
            feature_path)
        self.feature_flows[corpus]["uncached_gt"] = f.feature_flow
Пример #4
0
    def mfcc_features(self,
                      corpus,
                      num_deriv=2,
                      num_features=33,
                      prefix="",
                      **kwargs):
        """
        :param str corpus:
        :param int num_deriv:
        :param int num_features:
        :param str prefix:
        :param kwargs:
        :return:
        """
        self.jobs[corpus]["mfcc_features"] = f = features.MfccJob(
            self.crp[corpus], **kwargs)
        f.add_alias("%s%s_mfcc_features" % (prefix, corpus))
        self.feature_caches[corpus]["mfcc"] = f.out_feature_path["mfcc"]
        self.feature_bundles[corpus]["mfcc"] = f.out_feature_bundle["mfcc"]

        feature_path = rasr.FlagDependentFlowAttribute(
            "cache_mode",
            {
                "task_dependent": self.feature_caches[corpus]["mfcc"],
                "bundle": self.feature_bundles[corpus]["mfcc"],
            },
        )
        self.feature_flows[corpus]["mfcc"] = features.basic_cache_flow(
            feature_path)
        self.feature_flows[corpus]["uncached_mfcc"] = f.feature_flow
        self.add_derivatives(corpus, "mfcc", num_deriv, num_features)
        self.add_derivatives(corpus, "uncached_mfcc", num_deriv, num_features)
Пример #5
0
    def fb_features(self, corpus, **kwargs):
        """
        :param str corpus:
        :param kwargs:
        :return:
        """
        self.jobs[corpus]["fb_features"] = f = features.FilterbankJob(
            self.crp[corpus], **kwargs)
        f.add_alias("%s_fb_features" % corpus)
        self.feature_caches[corpus]["fb"] = f.out_feature_path["fb"]
        self.feature_bundles[corpus]["fb"] = f.out_feature_bundle["fb"]

        feature_path = rasr.FlagDependentFlowAttribute(
            "cache_mode",
            {
                "task_dependent": self.feature_caches[corpus]["fb"],
                "bundle": self.feature_bundles[corpus]["fb"],
            },
        )
        self.feature_flows[corpus]["fb"] = features.basic_cache_flow(
            feature_path)
        self.feature_flows[corpus]["uncached_fb"] = f.feature_flow
Пример #6
0
    def generic_features(self,
                         corpus,
                         name,
                         feature_flow,
                         port_name="features",
                         prefix="",
                         **kwargs):
        """
        :param str corpus: corpus identifier
        :param str name: feature identifier, like "mfcc". Also used in the naming of the output feature caches.
        :param rasr.FlowNetwork feature_flow: definition of the RASR feature flow network
        :param str port_name: output port of the flow network to use
        :param str prefix: prefix for the alias job symlink
        :param kwargs:
        :return:
        """
        port_name_mapping = {port_name: name}
        self.jobs[corpus][
            f"{name}_features"] = f = features.FeatureExtractionJob(
                self.crp[corpus],
                feature_flow,
                port_name_mapping,
                job_name=name,
                **kwargs)
        f.add_alias(f"{prefix}{corpus}_{name}_features")
        self.feature_caches[corpus][name] = f.out_feature_path[name]
        self.feature_bundles[corpus][name] = f.out_feature_bundle[name]

        feature_path = rasr.FlagDependentFlowAttribute(
            "cache_mode",
            {
                "task_dependent": self.feature_caches[corpus][name],
                "bundle": self.feature_bundles[corpus][name],
            },
        )
        self.feature_flows[corpus][name] = features.basic_cache_flow(
            feature_path)
        self.feature_flows[corpus][f"uncached_{name}"] = f.feature_flow
Пример #7
0
    def voiced_features(self, corpus, prefix="", **kwargs):
        """
        :param str corpus:
        :param str prefix:
        :param kwargs:
        :return:
        """
        self.jobs[corpus]["voiced_features"] = f = features.VoicedJob(
            self.crp[corpus], **kwargs)
        f.add_alias("%s_%s_voiced_features" % (prefix, corpus))
        self.feature_caches[corpus]["voiced"] = f.out_feature_path["voiced"]
        self.feature_bundles[corpus]["voiced"] = f.out_feature_bundle["voiced"]

        feature_path = rasr.FlagDependentFlowAttribute(
            "cache_mode",
            {
                "task_dependent": self.feature_caches[corpus]["voiced"],
                "bundle": self.feature_bundles[corpus]["voiced"],
            },
        )
        self.feature_flows[corpus]["voiced"] = features.basic_cache_flow(
            feature_path)
        self.feature_flows[corpus]["uncached_voiced"] = f.feature_flow
Пример #8
0
    def tone_features(self, corpus, timestamp_flow, prefix="", **kwargs):
        """
        :param str corpus:
        :param str timestamp_flow:
        :param str prefix:
        :param kwargs:
        :return:
        """
        timestamp_flow = self.feature_flows[corpus][timestamp_flow]
        self.jobs[corpus]["tone_features"] = f = features.ToneJob(
            self.crp[corpus], timestamp_flow=timestamp_flow, **kwargs)
        f.add_alias("%s%s_tone_features" % (prefix, corpus))
        self.feature_caches[corpus]["tone"] = f.out_feature_path
        self.feature_bundles[corpus]["tone"] = f.out_feature_bundle

        feature_path = rasr.FlagDependentFlowAttribute(
            "cache_mode",
            {
                "task_dependent": self.feature_caches[corpus]["tone"],
                "bundle": self.feature_bundles[corpus]["tone"],
            },
        )
        self.feature_flows[corpus]["tone"] = features.basic_cache_flow(
            feature_path)
Пример #9
0
    def returnn_rasr_training(
        self,
        name,
        returnn_config,
        nn_train_args,
        train_corpus_key,
        cv_corpus_key,
    ):
        train_data = self.train_input_data[train_corpus_key]
        dev_data = self.cv_input_data[cv_corpus_key]

        train_crp = train_data.get_crp()
        dev_crp = dev_data.get_crp()

        assert train_data.feature_flow == dev_data.feature_flow
        assert train_data.features == dev_data.features
        assert train_data.alignments == dev_data.alignments

        if train_data.feature_flow is not None:
            feature_flow = train_data.feature_flow
        else:
            if isinstance(train_data.features,
                          rasr.FlagDependentFlowAttribute):
                feature_path = train_data.features
            elif isinstance(train_data.features, (MultiPath, MultiOutputPath)):
                feature_path = rasr.FlagDependentFlowAttribute(
                    "cache_mode",
                    {
                        "task_dependent": train_data.features,
                    },
                )
            elif isinstance(train_data.features, tk.Path):
                feature_path = rasr.FlagDependentFlowAttribute(
                    "cache_mode",
                    {
                        "bundle": train_data.features,
                    },
                )
            else:
                raise NotImplementedError

            feature_flow = features.basic_cache_flow(feature_path)
            if isinstance(train_data.features, tk.Path):
                feature_flow.flags = {"cache_mode": "bundle"}

        if isinstance(train_data.alignments, rasr.FlagDependentFlowAttribute):
            alignments = copy.deepcopy(train_data.alignments)
            net = rasr.FlowNetwork()
            net.flags = {"cache_mode": "bundle"}
            alignments = alignments.get(net)
        elif isinstance(train_data.alignments, (MultiPath, MultiOutputPath)):
            raise NotImplementedError
        elif isinstance(train_data.alignments, tk.Path):
            alignments = train_data.alignments
        else:
            raise NotImplementedError

        assert isinstance(returnn_config, returnn.ReturnnConfig)

        train_job = returnn.ReturnnRasrTrainingJob(
            train_crp=train_crp,
            dev_crp=dev_crp,
            feature_flow=feature_flow,
            alignment=alignments,
            returnn_config=returnn_config,
            returnn_root=self.returnn_root,
            returnn_python_exe=self.returnn_python_exe,
            **nn_train_args,
        )
        self._add_output_alias_for_train_job(
            train_job=train_job,
            train_corpus_key=train_corpus_key,
            cv_corpus_key=cv_corpus_key,
            name=name,
        )

        return train_job