def pronunciation_assessment_from_microphone():
    """"performs one-shot pronunciation assessment asynchronously with input from microphone."""

    # Creates an instance of a speech config with specified subscription key and service region.
    # Replace with your own subscription key and service region (e.g., "westus").
    # Note: The pronunciation assessment feature is currently only available on westus, eastasia and centralindia regions.
    # And this feature is currently only available on en-US language.
    config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)

    reference_text = ""
    # create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement.
    pronunciation_config = speechsdk.PronunciationAssessmentConfig(reference_text=reference_text,
                                                                   grading_system=speechsdk.PronunciationAssessmentGradingSystem.HundredMark,
                                                                   granularity=speechsdk.PronunciationAssessmentGranularity.Phoneme,
                                                                   enable_miscue=True)

    recognizer = speechsdk.SpeechRecognizer(speech_config=config)
    while True:
        # Receives reference text from console input.
        print('Enter reference text you want to assess, or enter empty text to exit.')
        print('> ')

        try:
            reference_text = input()
        except EOFError:
            break

        pronunciation_config.reference_text = reference_text
        pronunciation_config.apply_to(recognizer)

        # Starts recognizing.
        print('Read out "{}" for pronunciation assessment ...'.format(reference_text))

        # Note: Since recognize_once() returns only a single utterance, it is suitable only for single
        # shot evaluation.
        # For long-running multi-utterance pronunciation evaluation, use start_continuous_recognition() instead.
        result = recognizer.recognize_once_async().get()

        # Check the result
        if result.reason == speechsdk.ResultReason.RecognizedSpeech:
            print('Recognized: {}'.format(result.text))
            print('  Pronunciation Assessment Result:')

            pronunciation_result = speechsdk.PronunciationAssessmentResult(result)
            print('    Accuracy score: {}, Pronunciation score: {}, Completeness score : {}, FluencyScore: {}'.format(
                pronunciation_result.accuracy_score, pronunciation_result.pronunciation_score,
                pronunciation_result.completeness_score, pronunciation_result.fluency_score
            ))
            print('  Word-level details:')
            for idx, word in enumerate(pronunciation_result.words):
                print('    {}: word: {}, accuracy score: {}, error type: {};'.format(
                    idx + 1, word.word, word.accuracy_score, word.error_type
                ))
        elif result.reason == speechsdk.ResultReason.NoMatch:
            print("No speech could be recognized")
        elif result.reason == speechsdk.ResultReason.Canceled:
            cancellation_details = result.cancellation_details
            print("Speech Recognition canceled: {}".format(cancellation_details.reason))
            if cancellation_details.reason == speechsdk.CancellationReason.Error:
                print("Error details: {}".format(cancellation_details.error_details))
 def recognized(evt):
     print('pronunciation assessment for: {}'.format(evt.result.text))
     pronunciation_result = speechsdk.PronunciationAssessmentResult(
         evt.result)
     print(
         '    Accuracy score: {}, pronunciation score: {}, completeness score : {}, fluency score: {}'
         .format(pronunciation_result.accuracy_score,
                 pronunciation_result.pronunciation_score,
                 pronunciation_result.completeness_score,
                 pronunciation_result.fluency_score))
     nonlocal recognized_words, accuracy_scores, durations, valid_durations, start_offset, end_offset
     recognized_words += pronunciation_result.words
     accuracy_scores.append(pronunciation_result.accuracy_score)
     json_result = evt.result.properties.get(
         speechsdk.PropertyId.SpeechServiceResponse_JsonResult)
     jo = json.loads(json_result)
     nb = jo['NBest'][0]
     durations.append(sum([int(w['Duration']) for w in nb['Words']]))
     if start_offset is None:
         start_offset = nb['Words'][0]['Offset']
     end_offset = nb['Words'][-1]['Offset'] + nb['Words'][-1][
         'Duration'] + 100000
     for w, d in zip(pronunciation_result.words, nb['Words']):
         if w.error_type == 'None':
             valid_durations.append(d['Duration'] + 100000)
 def recognized(evt):
     print('pronunciation assessment for: {}'.format(evt.result.text))
     pronunciation_result = speechsdk.PronunciationAssessmentResult(evt.result)
     print('    Accuracy score: {}, Pronunciation score: {}, Completeness score : {}, FluencyScore: {}'.format(
         pronunciation_result.accuracy_score, pronunciation_result.pronunciation_score,
         pronunciation_result.completeness_score, pronunciation_result.fluency_score
     ))
     nonlocal recognized_words
     recognized_words += pronunciation_result.words
 def recognized(evt):
     print('pronunciation assessment for: {}'.format(evt.result.text))
     pronunciation_result = speechsdk.PronunciationAssessmentResult(
         evt.result)
     print(
         '    Accuracy score: {}, pronunciation score: {}, completeness score : {}, fluency score: {}'
         .format(pronunciation_result.accuracy_score,
                 pronunciation_result.pronunciation_score,
                 pronunciation_result.completeness_score,
                 pronunciation_result.fluency_score))
     nonlocal recognized_words, accuracy_scores, fluency_scores, durations
     recognized_words += pronunciation_result.words
     accuracy_scores.append(pronunciation_result.accuracy_score)
     fluency_scores.append(pronunciation_result.fluency_score)
     json_result = evt.result.properties.get(
         speechsdk.PropertyId.SpeechServiceResponse_JsonResult)
     jo = json.loads(json_result)
     nb = jo['NBest'][0]
     durations.append(sum([int(w['Duration']) for w in nb['Words']]))
def pronunciation_assessment_from_microphone():
    """"
    Performs one-shot pronunciation assessment asynchronously with input from microphone.
    See more information at https://aka.ms/csspeech/pa
    """

    # Creates an instance of a speech config with specified subscription key and service region.
    # Replace with your own subscription key and service region (e.g., "westus").
    config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)

    # The pronunciation assessment service has a longer default end silence timeout (5 seconds) than normal STT
    # as the pronunciation assessment is widely used in education scenario where kids have longer break in reading.
    # You can adjust the end silence timeout based on your real scenario.
    config.set_property(speechsdk.PropertyId.SpeechServiceConnection_EndSilenceTimeoutMs, "3000")

    reference_text = ""
    # create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement.
    pronunciation_config = speechsdk.PronunciationAssessmentConfig(
        reference_text=reference_text,
        grading_system=speechsdk.PronunciationAssessmentGradingSystem.HundredMark,
        granularity=speechsdk.PronunciationAssessmentGranularity.Phoneme,
        enable_miscue=True)

    # Creates a speech recognizer, also specify the speech language
    recognizer = speechsdk.SpeechRecognizer(speech_config=config, language="en-US")
    while True:
        # Receives reference text from console input.
        print('Enter reference text you want to assess, or enter empty text to exit.')
        print('> ')

        try:
            reference_text = input()
        except EOFError:
            break

        pronunciation_config.reference_text = reference_text
        pronunciation_config.apply_to(recognizer)

        # Starts recognizing.
        print('Read out "{}" for pronunciation assessment ...'.format(reference_text))

        # Note: Since recognize_once() returns only a single utterance, it is suitable only for single
        # shot evaluation.
        # For long-running multi-utterance pronunciation evaluation, use start_continuous_recognition() instead.
        result = recognizer.recognize_once_async().get()

        # Check the result
        if result.reason == speechsdk.ResultReason.RecognizedSpeech:
            print('Recognized: {}'.format(result.text))
            print('  Pronunciation Assessment Result:')

            pronunciation_result = speechsdk.PronunciationAssessmentResult(result)
            print('    Accuracy score: {}, Pronunciation score: {}, Completeness score : {}, FluencyScore: {}'.format(
                pronunciation_result.accuracy_score, pronunciation_result.pronunciation_score,
                pronunciation_result.completeness_score, pronunciation_result.fluency_score
            ))
            print('  Word-level details:')
            for idx, word in enumerate(pronunciation_result.words):
                print('    {}: word: {}, accuracy score: {}, error type: {};'.format(
                    idx + 1, word.word, word.accuracy_score, word.error_type
                ))
        elif result.reason == speechsdk.ResultReason.NoMatch:
            print("No speech could be recognized")
        elif result.reason == speechsdk.ResultReason.Canceled:
            cancellation_details = result.cancellation_details
            print("Speech Recognition canceled: {}".format(cancellation_details.reason))
            if cancellation_details.reason == speechsdk.CancellationReason.Error:
                print("Error details: {}".format(cancellation_details.error_details))