示例#1
0
    def _get_output_tensors(
            self, interpreter: tf.lite.Interpreter) -> List[np.ndarray]:
        """Returns output tensors of given TFLite model Interpreter.

    Args:
      interpreter: a tf.lite.Interpreter object with allocated tensors.

    Returns:
      a list of numpy arrays representing output tensor results.
    """
        return [
            interpreter.get_tensor(tensor['index'])
            for tensor in interpreter.get_output_details()
        ]
示例#2
0
    def _get_output_tensors(
            self, interpreter: tf.lite.Interpreter) -> List[np.ndarray]:
        """Returns output tensors of given TFLite model Interpreter.

    Args:
      interpreter: a tf.lite.Interpreter object with allocated tensors.

    Returns:
      a list of numpy arrays representing output tensor results.
    """

        outputs = []
        for output_detail in interpreter.get_output_details():
            tensor = interpreter.get_tensor(output_detail['index'])
            if output_detail['dtype'] == np.int8:
                quant_params = _get_quant_params(output_detail)
                if quant_params:
                    scale, zero_point = quant_params
                    tensor = ((tensor.astype(np.float32) - zero_point) *
                              scale).astype(np.float32)
            outputs.append(tensor)

        return outputs
示例#3
0
def clean_speech(audio, interpreter_1: tf.lite.Interpreter,
                 interpreter_2: tf.lite.Interpreter):
    block_len = 512
    block_shift = 128
    # load models
    interpreter_1.allocate_tensors()
    interpreter_2.allocate_tensors()

    # Get input and output tensors.
    input_details_1 = interpreter_1.get_input_details()
    output_details_1 = interpreter_1.get_output_details()

    input_details_2 = interpreter_2.get_input_details()
    output_details_2 = interpreter_2.get_output_details()
    # create states for the lstms
    states_1 = np.zeros(input_details_1[1]['shape']).astype('float32')
    states_2 = np.zeros(input_details_2[1]['shape']).astype('float32')
    # preallocate output audio
    out_file = np.zeros((len(audio)))
    # create buffer
    in_buffer = np.zeros((block_len)).astype('float32')
    out_buffer = np.zeros((block_len)).astype('float32')
    # calculate number of blocks
    num_blocks = (audio.shape[0] - (block_len - block_shift)) // block_shift
    # iterate over the number of blcoks
    for idx in range(num_blocks):
        # shift values and write to buffer
        in_buffer[:-block_shift] = in_buffer[block_shift:]
        in_buffer[-block_shift:] = audio[idx *
                                         block_shift:(idx * block_shift) +
                                         block_shift]
        # calculate fft of input block
        in_block_fft = np.fft.rfft(in_buffer)
        in_mag = np.abs(in_block_fft)
        in_phase = np.angle(in_block_fft)
        # reshape magnitude to input dimensions
        in_mag = np.reshape(in_mag, (1, 1, -1)).astype('float32')
        # set tensors to the first model
        interpreter_1.set_tensor(input_details_1[1]['index'], states_1)
        interpreter_1.set_tensor(input_details_1[0]['index'], in_mag)
        # run calculation
        interpreter_1.invoke()
        # get the output of the first block
        out_mask = interpreter_1.get_tensor(output_details_1[0]['index'])
        states_1 = interpreter_1.get_tensor(output_details_1[1]['index'])
        # calculate the ifft
        estimated_complex = in_mag * out_mask * np.exp(1j * in_phase)
        estimated_block = np.fft.irfft(estimated_complex)
        # reshape the time domain block
        estimated_block = np.reshape(estimated_block,
                                     (1, 1, -1)).astype('float32')
        # set tensors to the second block
        interpreter_2.set_tensor(input_details_2[1]['index'], states_2)
        interpreter_2.set_tensor(input_details_2[0]['index'], estimated_block)
        # run calculation
        interpreter_2.invoke()
        # get output tensors
        out_block = interpreter_2.get_tensor(output_details_2[0]['index'])
        states_2 = interpreter_2.get_tensor(output_details_2[1]['index'])

        # shift values and write to buffer
        out_buffer[:-block_shift] = out_buffer[block_shift:]
        out_buffer[-block_shift:] = np.zeros((block_shift))
        out_buffer += np.squeeze(out_block)
        # write block to output file
        out_file[idx * block_shift:(idx * block_shift) +
                 block_shift] = out_buffer[:block_shift]

    output_bytes = io.BytesIO()
    sf.write('out.wav', out_file, 16000)
    return output_bytes