def input_types(self): """Returns definitions of module input ports. """ return { "input_signal": NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)), "length": NeuralType(tuple('B'), LengthsType()), }
def input_types(self) -> Optional[Dict[str, NeuralType]]: if hasattr(self.preprocessor, '_sample_rate'): input_signal_eltype = AudioSignal( freq=self.preprocessor._sample_rate) else: input_signal_eltype = AudioSignal() return { "input_signal": NeuralType(('B', 'T'), input_signal_eltype, optional=True), "input_signal_length": NeuralType(tuple('B'), LengthsType(), optional=True), "processed_signal": NeuralType(('B', 'D', 'T'), SpectrogramType(), optional=True), "processed_signal_length": NeuralType(tuple('B'), LengthsType(), optional=True), }
def output_ports(self) -> Dict[str, NeuralType]: return { "audio_signal": NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)), "a_sig_length": NeuralType(tuple('B'), LengthsType()) }
def output_types(self): return { 'audio_signal': NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)), 'a_sig_length': NeuralType(tuple('B'), LengthsType()), }
def output_types(self) -> Optional[Dict[str, NeuralType]]: """Returns definitions of module output ports. """ return { 'audio_signal': NeuralType( ('B', 'T'), AudioSignal(freq=self._sample_rate) if self is not None and hasattr(self, '_sample_rate') else AudioSignal(), ), 'a_sig_length': NeuralType(tuple('B'), LengthsType()), 'label': NeuralType(tuple('B'), LabelsType()), 'label_length': NeuralType(tuple('B'), LengthsType()), }
def test_parameterized_type_audio_sampling_frequency(self): audio16K = NeuralType(axes=('B', 'T'), elements_type=AudioSignal(16000)) audio8K = NeuralType(axes=('B', 'T'), elements_type=AudioSignal(8000)) another16K = NeuralType(axes=('B', 'T'), elements_type=AudioSignal(16000)) self.assertEqual( audio8K.compare(audio16K), NeuralTypeComparisonResult.SAME_TYPE_INCOMPATIBLE_PARAMS) self.assertEqual( audio16K.compare(audio8K), NeuralTypeComparisonResult.SAME_TYPE_INCOMPATIBLE_PARAMS) self.assertEqual(another16K.compare(audio16K), NeuralTypeComparisonResult.SAME) self.assertEqual(audio16K.compare(another16K), NeuralTypeComparisonResult.SAME)
def input_types(self) -> Optional[Dict[str, NeuralType]]: return { "source": NeuralType(('B', 'T'), AudioSignal()), "padding_mask": NeuralType(('B', 'T'), MaskType(), optional=True), "mask": NeuralType(elements_type=BoolType(), optional=True), "features_only": NeuralType(elements_type=BoolType(), optional=True), }
def output_ports(self): """Returns definitions of module output ports. """ return { # "audio_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "a_sig_length": NeuralType({0: AxisType(BatchTag)}), "audio_signal": NeuralType(('B', 'T'), AudioSignal()), "a_sig_length": NeuralType(tuple('B'), LengthsType()), }
def input_types(self): """Returns definitions of module input ports. """ return { "input_signal": NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)), "length": NeuralType( tuple('B'), LengthsType() ), # Please note that length should be in samples not seconds. }
def output_ports(self): """Returns definitions of module output ports.""" return dict( audio=NeuralType(('B', 'T'), AudioSignal(freq=self.sample_rate)), audio_len=NeuralType(tuple('B'), LengthsType()), text=NeuralType(('B', 'T'), EmbeddedTextType()), text_pos=NeuralType(('B', 'T'), MaskType()), dur_true=NeuralType(('B', 'T'), LengthsType()), )
def output_ports(self): """Returns definitions of module output ports. """ return { # 'audio_signal': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # 'a_sig_length': NeuralType({0: AxisType(BatchTag)}), # 'transcripts': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # 'transcript_length': NeuralType({0: AxisType(BatchTag)}), "audio_signal": NeuralType( ("B", "T"), AudioSignal(freq=self._sample_rate) if self is not None and self._sample_rate is not None else AudioSignal(), ), "a_sig_length": NeuralType(tuple("B"), LengthsType()), "transcripts": NeuralType(("B", "T"), LabelsType()), "transcript_length": NeuralType(tuple("B"), LengthsType()), }
def output_ports(self): """Returns definitions of module output ports.""" return dict( audio=NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)), audio_len=NeuralType(tuple('B'), LengthsType()), text=NeuralType(('B', 'T'), EmbeddedTextType()), text_mask=NeuralType(('B', 'T'), MaskType()), dur=NeuralType(('B', 'T'), LengthsType()), text_rep=NeuralType(('B', 'T'), LengthsType()), text_rep_mask=NeuralType(('B', 'T'), MaskType()), text_raw=NeuralType(), speaker=NeuralType(('B',), EmbeddedTextType(), optional=True), speaker_emb=NeuralType(('B', 'T'), EncodedRepresentation(), optional=True), )
def input_types(self): """Returns definitions of module input ports. input_signal: 0: AxisType(BatchTag) 1: AxisType(TimeTag) input_signal_length: 0: AxisType(BatchTag) Note: length is in number of samples, not seconds """ return { "input_signal": NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)), "length": NeuralType(tuple('B'), LengthsType()), }
def output_ports(self): return { "input_signal": NeuralType(('B', 'T'), AudioSignal(freq=self.sample_rate)), "length": NeuralType(tuple('B'), LengthsType()), }
def types(self) -> Dict[str, NeuralType]: signal = NeuralType(("B", "T"), AudioSignal(freq=self._sample_rate)) length = NeuralType(tuple("B"), LengthsType()) return {"audio_signal": signal, "a_sig_length": length}