Пример #1
0
    def init_directory(self):
        # init directory for save the audio
        make_dir(os.path.join(self.audio_dir, self.tts.getName()))

        # init directory for save the transcription
        for asr in self.asrs:
            make_dir(
                os.path.join(self.transcription_dir, self.tts.getName(),
                             asr.getName()))
Пример #2
0
 def get_outputfile_for_failed_test_case(self):
     asrs_dir = "_".join([asr.getName() for asr in self.asrs])
     result_dir = os.path.join(
         self.output_dir, "result", self.tts.getName(), asrs_dir,
         f"num_iteration_{self.num_iteration}",
         f"text_batch_size_{self.text_batch_size if self.text_batch_size else 'global' }"
     )
     make_dir(result_dir)
     experiment_name = f"with-estimator-{self.estimator.getName().replace('/','-')}" if self.estimator else "without-estimator"
     return os.path.join(result_dir, experiment_name + ".json")
Пример #3
0
 def saveFailedTestCases(self, processed_texts, cases):
     failed_test_case_dir = os.path.join(self.output_dir,
                                         "failed_test_cases",
                                         self.tts.getName(),
                                         self.target_asr)
     make_dir(failed_test_case_dir)
     ids = self.get_id_only(processed_texts)
     input_texts = self.get_text_only(processed_texts)
     source_audio_dir = os.path.join(self.audio_dir, self.tts.getName())
     for input_text, filename, case in zip(input_texts, ids, cases):
         if case[self.target_asr] == FAILED_TEST_CASE:
             src_audio_fpath = source_audio_dir + f"/{filename}.wav"
             trgt_audio_fpath = failed_test_case_dir + f"/{filename}.wav"
             os.system(f"cp {src_audio_fpath} {trgt_audio_fpath}")
             ground_truth_file = failed_test_case_dir + f"/{filename}.txt"
             f = open(ground_truth_file, 'w+')
             f.write(input_text)
             f.close()
Пример #4
0
def generate(tts_name: str, corpus_path: str, data_dir: str, execution_time_dir:str):
    tts = create_tts_by_name(tts_name)
    
    audio_dir = os.path.join(data_dir, AUDIO_DIR)
    execution_time_dir = os.path.join(execution_time_dir, AUDIO_DIR, tts_name)
    make_dir(execution_time_dir)

    corpus = read_corpus(corpus_path)

    for i in range(0, 3) :
        c = corpus[i]
        text = c.getText()
        filename = c.getId()
        start = time.time()
        tts.generateAudio(text=text, audio_dir=audio_dir, filename=filename)
        end = time.time()
        execution_time = end - start
        fpath = os.path.join(execution_time_dir, filename + ".txt")
        save_execution_time(fpath=fpath, execution_time=execution_time)
        print(f"Generate {i}")
        i += 1
        if tts_name in ["google"]:
            random_number = float(random.randint(15, 40))/10.
            time.sleep(random_number)
Пример #5
0
    df = df.reset_index(drop=True)

    print("get sample: " + str(datetime.now()))
    # get sample data

    df = df.sample(frac=1, random_state=seed).reset_index(drop=True)

    N = 20000
    sample_df = get_sample_data(df, int(2 * N))

    print("preprocess data: " + str(datetime.now()))

    # text preprocessing
    data = preprocess_data(sample_df, N)

    print("write data: " + str(datetime.now()))

    # prepare folder to save the data
    directory = config["output_dir"]
    if len(config["corpus_fpath"].split("/")) > 1:
        directory = os.path.join(
            directory, "/".join(config["corpus_fpath"].split("/")[:-1]))
    make_dir(directory)

    outfile = os.path.join(config["output_dir"], config["corpus_fpath"])

    file = open(outfile, "w+")
    for s in data:
        file.write("%s\n" % s)
    file.close()
Пример #6
0
 def saveTranscription(self, transcription_dir: str, filename: str):
     transcription_dir = os.path.join(transcription_dir, self.getName())
     make_dir(transcription_dir)
     transcription_path = os.path.join(transcription_dir, filename + ".txt")
     with open(transcription_path, "w+") as f:
         f.write(self.getTranscription())
Пример #7
0
    def processText(self, text: str, filename: str):
        """
        Run CrossASR on a single text
        Description: Given a sentence as input, the program will generate a test case. The program needs some parameters, i.e. a TTS and ASRs used
        :params text:
        :params filename:
        :returns case:
        :returns execution time:
        """
        execution_time = 0.

        directory = os.path.join(self.execution_time_dir, AUDIO_DIR,
                                 self.getTTS().getName())
        make_dir(directory)
        time_for_generating_audio_fpath = os.path.join(directory,
                                                       filename + ".txt")

        audio_fpath = self.getTTS().getAudioPath(text=text,
                                                 audio_dir=self.audio_dir,
                                                 filename=filename)

        if self.recompute or not os.path.exists(audio_fpath):
            # print(audio_fpath)
            start_time = time.time()
            self.getTTS().generateAudio(text=text, audio_fpath=audio_fpath)
            save_execution_time(fpath=time_for_generating_audio_fpath,
                                execution_time=time.time() - start_time)

        ## add execution time for generating audio
        execution_time += get_execution_time(
            fpath=time_for_generating_audio_fpath)

        transcription_dir = os.path.join(self.transcription_dir,
                                         self.getTTS().getName())

        transcriptions = {}
        for asr in self.asrs:
            directory = os.path.join(self.execution_time_dir,
                                     TRANSCRIPTION_DIR,
                                     self.getTTS().getName(), asr.getName())
            make_dir(directory)
            time_for_recognizing_audio_fpath = os.path.join(
                directory, filename + ".txt")

            if self.recompute:
                start_time = time.time()
                # TODO:
                # change recognize audio -> input audio instead of fpath
                # audio = asr.loadAudio(audio_fpath=audio_fpath)
                # transcription = asr.recognizeAudio(audio=audio)
                # asr.saveTranscription(transcription_fpath, transcription)
                transcription = asr.recognizeAudio(audio_fpath=audio_fpath)
                asr.setTranscription(transcription)
                asr.saveTranscription(transcription_dir=transcription_dir,
                                      filename=filename)
                save_execution_time(fpath=time_for_recognizing_audio_fpath,
                                    execution_time=time.time() - start_time)

            transcription = asr.loadTranscription(
                transcription_dir=transcription_dir, filename=filename)
            num_retry = 0
            while transcription == "" and num_retry < self.max_num_retry:
                start_time = time.time()
                asr.recognizeAudio(audio_fpath=audio_fpath)
                asr.saveTranscription(transcription_dir=transcription_dir,
                                      filename=filename)
                save_execution_time(fpath=time_for_recognizing_audio_fpath,
                                    execution_time=time.time() - start_time)
                transcription = asr.loadTranscription(
                    transcription_dir=transcription_dir, filename=filename)

                if asr.getName() == "wit":
                    random_number = float(random.randint(9, 47)) / 10.
                    time.sleep(random_number)

                num_retry += 1

            transcriptions[asr.getName()] = preprocess_text(transcription)

            ## add execution time for generating audio
            execution_time += get_execution_time(
                fpath=time_for_recognizing_audio_fpath)

        cases = self.caseDeterminer(text, transcriptions)
        # if sum(cases.values()) == 0 :
        #     print(text)
        #     print(transcriptions["wav2vec2"])
        #     print(cases)
        #     print()

        for asr_name, case in cases.items():
            self.saveCase(self.case_dir,
                          self.getTTS().getName(), asr_name, filename,
                          str(case))

        # print(f"Execution time: {execution_time}")
        return cases, execution_time
Пример #8
0
class CrossASR:
    def __init__(self,
                 tts: TTS,
                 asrs: [ASR],
                 output_dir: "",
                 target_asr=None,
                 recompute=False,
                 num_iteration=5,
                 time_budget=3600,
                 max_num_retry=0,
                 text_batch_size=None,
                 seed=None,
                 estimator=None):
        self.tts = tts
        self.asrs = asrs
        self.target_asr = target_asr

        self.output_dir = output_dir

        self.audio_dir = os.path.join(output_dir, DATA_DIR, AUDIO_DIR)
        self.transcription_dir = os.path.join(output_dir, DATA_DIR,
                                              TRANSCRIPTION_DIR)
        self.init_directory()

        ## TODO: make init directory for execution time and case
        self.execution_time_dir = os.path.join(output_dir, EXECUTION_TIME_DIR)
        self.case_dir = os.path.join(output_dir, CASE_DIR)
        self.recompute = recompute
        self.num_iteration = num_iteration
        self.time_budget = time_budget
        self.max_num_retry = max_num_retry
        self.text_batch_size = text_batch_size
        self.estimator = estimator
        self.outputfile_failed_test_case = self.get_outputfile_for_failed_test_case(
        )

        if seed:
            crossasr.utils.set_seed(seed)

        ## TODO: convert print into global logging

    def init_directory(self):
        # init directory for save the audio
        make_dir(os.path.join(self.audio_dir, self.tts.getName()))

        # init directory for save the transcription
        for asr in self.asrs:
            make_dir(
                os.path.join(self.transcription_dir, self.tts.getName(),
                             asr.getName()))

    def get_outputfile_for_failed_test_case(self):
        asrs_dir = "_".join([asr.getName() for asr in self.asrs])
        result_dir = os.path.join(
            self.output_dir, "result", self.tts.getName(), asrs_dir,
            f"num_iteration_{self.num_iteration}",
            f"text_batch_size_{self.text_batch_size if self.text_batch_size else 'global' }"
        )
        make_dir(result_dir)
        experiment_name = f"with-estimator-{self.estimator.getName().replace('/','-')}" if self.estimator else "without-estimator"
        return os.path.join(result_dir, experiment_name + ".json")

    def getTTS(self):
        return self.tts

    def setTTS(self, tts: TTS):
        self.tts = tts

    def getASRS(self):
        return self.asrs

    def addASR(self, asr: ASR):
        for curr_asr in self.asrs:
            if asr.getName() == curr_asr.getName():
                # asr is already on the list of asrs
                return
        self.asrs.append(asr)

    def removeASR(self, asr_name: str):
        for i, asr in enumerate(self.asrs):
            if asr_name == asr.getName():
                break
        del self.asrs[i]

    def getOutputDir(self):
        return self.audio_dir

    def setOutputDir(self, output_dir: str):
        self.output_dir = output_dir

        self.audio_dir = os.path.join(output_dir, DATA_DIR, AUDIO_DIR)
        self.transcription_dir = os.path.join(output_dir, DATA_DIR,
                                              TRANSCRIPTION_DIR)
        self.execution_time_dir = os.path.join(output_dir, EXECUTION_TIME_DIR)
        self.case_dir = os.path.join(output_dir, CASE_DIR)

    def caseDeterminer(self, text: str, transcriptions: str):
        # word error rate
        wers = {}

        is_determinable = False

        for k, transcription in transcriptions.items():
            word_error_rate = wer(text, transcription)
            wers[k] = word_error_rate
            if word_error_rate == 0:
                is_determinable = True

        case = {}
        if is_determinable:
            for k in transcriptions.keys():
                if wers[k] == 0:
                    case[k] = SUCCESSFUL_TEST_CASE
                else:
                    case[k] = FAILED_TEST_CASE
        else:
            for k in transcriptions.keys():
                case[k] = INDETERMINABLE_TEST_CASE

        return case

    def saveCase(self, case_dir: str, tts_name: str, asr_name: str,
                 filename: str, case: str):
        case_dir = os.path.join(case_dir, tts_name, asr_name)
        make_dir(case_dir)
        fpath = os.path.join(case_dir, filename + ".txt")
        file = open(fpath, "w+")
        file.write(case)
        file.close()