Пример #1
0
    def run(self, filepath):
        self.log(logging.INFO, "Starting %s" % (filepath))

        out_file = self.derive_new_file_path(filepath, ".csv")

        if file_utils.should_run(filepath, out_file):

            t = transcript.PlaintextTranscript(
                filepath=filepath,
                label=None,
                pos_tagger_path=self.pos_tagger_path)

            transcript_utterances_fillers = None
            if self.filler_dir:
                file_id = os.path.basename(file_utils.strip_ext(filepath))
                if file_id in self.filler_files:
                    filler_file = os.path.join(self.filler_dir,
                                               self.filler_files[file_id])
                    filler_transcript = transcript.PlaintextTranscript(
                        filepath=filler_file,
                        label=None,
                        pos_tagger_path=self.pos_tagger_path)
                    transcript_utterances_fillers = filler_transcript.tokens

            self.feature_extractor.extract(
                t,
                out_csv=out_file,
                transcript_utterances_fillers=transcript_utterances_fillers)

            self.log(logging.INFO, "Done %s -> %s" % (filepath, out_file))

        self.emit(out_file)
Пример #2
0
    def run(self, in_file):
        self.log(logging.INFO, "Starting %s" % (in_file))

        out_file = self.derive_new_file_path(in_file, 'txt')

        if file_utils.should_run(in_file, out_file):
            cmd = self.cmd.replace("**in_placeholder**",
                                   os.path.abspath(in_file))

            process = subprocess.Popen(cmd,
                                       shell=True,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.PIPE)
            out, err = process.communicate()
            res = process.returncode

            if res == 0:
                hyp = self.regex.search(str(err).replace("\\n", "\n")).group(1)

                with open(out_file, "w") as f:
                    f.write(hyp)

                self.log(logging.INFO, "Done %s -> %s" % (in_file, out_file))
            else:
                error_lines = self.error_regex.findall(
                    str(err).replace("\\n", "\n"))
                self.log(
                    logging.ERROR,
                    "Failed %s -> %s with error code %i. cmd: %s. Error message: %s"
                    % (in_file, out_file, res, cmd, "\n\t".join(error_lines)))
                return

        self.emit([out_file])
Пример #3
0
    def run(self):
        if file_utils.should_run(self.filepath, self.out_file):
            self.features['FileID'] = self.filepath

            with open(self.filepath) as f:
                self.tokens = f.read()

            self.compute_basic_word_stats()
            self.compute_word_frequency_norms()

            self._run_chinese_corenlp(self.filepath)
            self._parse_corenlp_output()
            self.write_features(self.out_file, debug=False)
Пример #4
0
    def run(self, in_file):
        self.log(logging.INFO, "Starting %s" % (in_file))

        out_file = self.derive_new_file_path(in_file, self.out_ext)

        if file_utils.should_run(in_file, out_file):
            try:
                self.matlab_function(in_file, out_file)
            except Exception as e:
                self.log(logging.ERROR, e)

            self.log(logging.INFO, "Done %s -> %s" % (in_file, out_file))

        self.emit(out_file)
Пример #5
0
    def run(self, in_file):
        self.log(logging.INFO, "Starting %s" % (in_file))

        out_file = self.derive_new_file_path(in_file, self.ext)

        if file_utils.should_run(in_file, out_file):
            cmd = self.command.format(in_file=in_file, out_file=out_file)
            res = shell_run(cmd.split(" "))

            if res != 0:
                self.log(
                    logging.ERROR,
                    "Failed %s -> %s with error code %i. cmd: %s" %
                    (in_file, out_file, res, cmd))
                return

            self.log(logging.INFO, "Done %s -> %s" % (in_file, out_file))

        self.emit(out_file)
Пример #6
0
    def run(self, mp3_file):
        self.log(logging.INFO, "Starting %s" % (mp3_file))

        if not mp3_file.endswith(".mp3"):
            self.log(logging.ERROR, "Failed %s. Not mp3 file" % (mp3_file))
            return

        wav_file = self.derive_new_file_path(mp3_file, "wav")

        if file_utils.should_run(mp3_file, wav_file):
            res = shell_run(["lame", "--decode", mp3_file, wav_file])

            if res != 0:
                self.log(
                    logging.ERROR, "Failed %s -> %s with lame error code %i" %
                    (mp3_file, wav_file, res))
                return

            self.log(logging.INFO, "Done %s -> %s" % (mp3_file, wav_file))

        self.emit(wav_file)
Пример #7
0
    def run(self, in_file):
        self.log(logging.INFO, "Starting %s" % (in_file))

        out_file = self.derive_new_file_path(in_file, self.out_ext)

        if file_utils.should_run(in_file, out_file):
            cmd = [
                self.opensmile_exec, "-C", self.conf_file, "-I", in_file,
                self.out_flag, out_file
            ] + self.extra_flags
            res = shell_run(cmd)

            if res != 0:
                self.log(
                    logging.ERROR,
                    "Failed %s -> %s with SmileExtract error code %i. cmd: %s"
                    % (in_file, out_file, res, " ".join(cmd)))
                return

            self.log(logging.INFO, "Done %s -> %s" % (in_file, out_file))

        self.emit([out_file])
Пример #8
0
    def run(self, in_file):
        self.log(logging.INFO, "Starting %s" % (in_file))

        out_file = self.derive_new_file_path(in_file, 'csv')

        if file_utils.should_run(in_file, out_file):
            cmd = [
                'praat', '--run', 'scripts/syllable_nuclei_v2.praat',
                os.path.abspath(in_file)
            ]
            res = shell_run(cmd, stdout=out_file)

            if res != 0:
                self.log(
                    logging.ERROR,
                    "Failed %s -> %s with error code %i. cmd: %s" %
                    (in_file, out_file, res, " ".join(cmd)))
                return

            self.log(logging.INFO, "Done %s -> %s" % (in_file, out_file))

        self.emit([out_file])
Пример #9
0
    def run(self, wav_file):
        self.log(logging.INFO, "Starting %s" % (wav_file))

        if not wav_file.endswith(".wav"):
            self.log(logging.ERROR, "Failed %s. Not wav file" % (wav_file))
            return

        new_wav_file = self.derive_new_file_path(wav_file, "wav")

        if file_utils.should_run(wav_file, new_wav_file):
            res = shell_run(
                ["sox", wav_file, "--rate",
                 str(self.new_sr), new_wav_file])

            if res != 0:
                self.log(
                    logging.ERROR, "Failed %s -> %s with lame error code %i" %
                    (wav_file, new_wav_file, res))
                return

            self.log(logging.INFO, "Done %s -> %s" % (wav_file, new_wav_file))

        self.emit(new_wav_file)