Exemplo n.º 1
0
    def create_files(self):
        config = self.returnn_config
        config.write(self.out_returnn_config_file.get_path())

        cmd = self._get_run_cmd()
        util.create_executable("rnn.sh", cmd)

        # check here if model actually exists
        assert os.path.exists(
            tk.uncached_path(self.model_checkpoint.index_path)
        ), "Provided model does not exists: %s" % str(self.model_checkpoint)
Exemplo n.º 2
0
    def create_files(self):
        # returnn
        shutil.copy(
            tk.uncached_path(self.returnn_config_file_in),
            tk.uncached_path(self.returnn_config_file),
        )

        parameter_list = self.get_parameter_list()
        cmd = [
            tk.uncached_path(self.returnn_python_exe),
            os.path.join(tk.uncached_path(self.returnn_root), "rnn.py"),
            self.returnn_config_file.get_path(),
        ] + parameter_list

        util.create_executable("rnn.sh", cmd)
Exemplo n.º 3
0
    def create_files(self):
        config = self.create_returnn_config(
            model_checkpoint=self._model_checkpoint,
            returnn_config=self._returnn_config,
            log_verbosity=self._log_verbosity,
            device=self._device)
        config.write(self.out_returnn_config_file.get_path())

        cmd = [
            tk.uncached_path(self.returnn_python_exe),
            os.path.join(tk.uncached_path(self.returnn_root), 'rnn.py'),
            self.out_returnn_config_file.get_path()
        ]
        util.create_executable("rnn.sh", cmd)

        # check here if model actually exists
        assert os.path.exists(self._model_checkpoint.index_path.get_path()), \
            "Provided model does not exists: %s" % str(self._model_checkpoint)
Exemplo n.º 4
0
    def run(self):
        cmd = [
            sys.executable,
            os.path.join(tk.uncached_path(self.subword_nmt_repo),
                         "apply_bpe.py"),
            "--input",
            self.text_file.get_path(),
            "--codes",
            self.bpe_codes.get_path(),
            "--output",
            self.out_bpe_text.get_path(),
        ]

        if self.bpe_vocab:
            cmd += ["--vocabulary", self.bpe_vocab.get_path()]

        util.create_executable("apply_bpe.sh", cmd)
        sp.run(cmd, check=True)
Exemplo n.º 5
0
    def run(self):
        bpe_codes_cmd = [
            sys.executable,
            os.path.join(tk.uncached_path(self.subword_nmt_repo), "learn_bpe.py"),
            "--output",
            self.out_bpe_codes.get_path(),
            "--symbols",
            str(self.bpe_size),
        ]

        util.create_executable("create_bpe_codes.sh", bpe_codes_cmd)

        with util.uopen(self.text_file, "rb") as f:
            p = sp.Popen(
                bpe_codes_cmd, stdin=sp.PIPE, stdout=sys.stdout, stderr=sys.stderr
            )
            while True:
                data = f.read(4096)
                if len(data) > 0:
                    p.stdin.write(data)
                else:
                    break
            p.communicate()
            assert p.returncode == 0

        bpe_vocab_cmd = [
            sys.executable,
            os.path.join(tk.uncached_path(self.subword_nmt_repo), "create-py-vocab.py"),
            "--txt",
            self.text_file.get_path(),
            "--bpe",
            self.out_bpe_codes.get_path(),
            "--unk",
            self.unk_label,
            "--out",
            self.out_bpe_vocab.get_path(),
        ]

        util.create_executable("create_bpe_vocab.sh", bpe_vocab_cmd)
        sp.run(bpe_vocab_cmd, check=True)

        with util.uopen(self.out_bpe_vocab) as f:
            num_labels = max(list(eval(f.read()).values())) + 1  # 0-based index
            self.out_vocab_size.set(num_labels)
Exemplo n.º 6
0
    def run(self):
        self.returnn_config.write("returnn.config")

        command = [
            self.returnn_python_exe.get(),
            os.path.join(self.returnn_root.get(), "tools/dump-dataset.py"),
            "returnn.config",
            "--endseq -1",
            "--stats",
            "--dump_stats stats",
        ]

        create_executable("rnn.sh", command)
        subprocess.check_call(["./rnn.sh"])

        shutil.move("stats.mean.txt", self.out_mean_file.get_path())
        shutil.move("stats.std_dev.txt", self.out_std_dev_file.get_path())

        total_mean = 0
        total_var = 0

        with open(self.out_mean_file.get_path()) as mean_file, open(
                self.out_std_dev_file.get_path()) as std_dev_file:

            # compute the total mean and std-dev in an iterative way
            for i, (mean, std_dev) in enumerate(zip(mean_file, std_dev_file)):
                mean = float(mean)
                var = float(std_dev.strip())**2
                mean_variance = (total_mean - mean)**2
                adjusted_mean_variance = mean_variance * i / (i + 1)
                total_var = (total_var * i + var +
                             adjusted_mean_variance) / (i + 1)
                total_mean = (total_mean * i + mean) / (i + 1)

            self.out_mean.set(total_mean)
            self.out_std_dev.set(numpy.sqrt(total_var))
Exemplo n.º 7
0
    def create_files(self):
        self.returnn_config.write(self.out_returnn_config_file.get_path())

        util.create_executable("rnn.sh", self._get_run_cmd())