def save(filepath, src, sample_rate, precision=32): """Saves a Tensor with audio signal to disk as a standard format like mp3, wav, etc. Args: filepath (string): path to audio file src (Tensor): an input 2D Tensor of shape `[L x C]` where L is the number of audio frames, C is the number of channels sample_rate (int): the sample-rate of the audio to be saved precision (int, optional): the bit-precision of the audio to be saved Example:: >>> data, sample_rate = torchaudio.load('foo.mp3') >>> torchaudio.save('foo.wav', data, sample_rate) """ # check if save directory exists abs_dirpath = os.path.dirname(os.path.abspath(filepath)) if not os.path.isdir(abs_dirpath): raise OSError("Directory does not exist: {}".format(abs_dirpath)) # Check/Fix shape of source data if len(src.size()) == 1: # 1d tensors as assumed to be mono signals src.unsqueeze_(1) elif len(src.size()) > 2 or src.size(1) > 2: raise ValueError( "Expected format (L x N), N = 1 or 2, but found {}".format( src.size())) # check if sample_rate is an integer if not isinstance(sample_rate, int): if int(sample_rate) == sample_rate: sample_rate = int(sample_rate) else: raise TypeError('Sample rate should be a integer') # check if bit_rate is an integer if not isinstance(precision, int): if int(precision) == precision: precision = int(precision) else: raise TypeError('Bit precision should be a integer') # programs such as librosa normalize the signal, unnormalize if detected if src.min() >= -1.0 and src.max() <= 1.0: src = src * (1 << 31) # assuming 16-bit depth src = src.long() # save data to file extension = os.path.splitext(filepath)[1] check_input(src) _torch_sox.write_audio_file(filepath, src, extension[1:], sample_rate, precision)
def save(filepath, src, sample_rate): """Saves a Tensor with audio signal to disk as a standard format like mp3, wav, etc. Args: filepath (string): path to audio file src (Tensor): an input 2D Tensor of shape `[L x C]` where L is the number of audio frames, C is the number of channels sample_rate (int): the sample-rate of the audio to be saved Example:: >>> data, sample_rate = torchaudio.load('foo.mp3') >>> torchaudio.save('foo.wav', data, sample_rate) """ # check if save directory exists abs_dirpath = os.path.dirname(os.path.abspath(filepath)) if not os.path.isdir(abs_dirpath): raise OSError("Directory does not exist: {}".format(abs_dirpath)) # Check/Fix shape of source data if len(src.size()) == 1: # 1d tensors as assumed to be mono signals src.unsqueeze_(1) elif len(src.size()) > 2 or src.size(1) > 2: raise ValueError( "Expected format (L x N), N = 1 or 2, but found {}".format(src.size())) # check if sample_rate is an integer if not isinstance(sample_rate, int): if int(sample_rate) == sample_rate: sample_rate = int(sample_rate) else: raise TypeError('Sample rate should be a integer') # programs such as librosa normalize the signal, unnormalize if detected if src.min() >= -1.0 and src.max() <= 1.0: src = src * (1 << 31) # assuming 16-bit depth src = src.long() # save data to file extension = os.path.splitext(filepath)[1] check_input(src) _torch_sox.write_audio_file(filepath, src, extension[1:], sample_rate)
def save_encinfo(filepath, src, channels_first=True, signalinfo=None, encodinginfo=None, filetype=None): r"""Saves a tensor of an audio signal to disk as a standard format like mp3, wav, etc. Args: filepath (str): Path to audio file src (torch.Tensor): An input 2D tensor of shape `[C x L]` or `[L x C]` where L is the number of audio frames, C is the number of channels channels_first (bool): Set channels first or length first in result. (Default: ``True``) signalinfo (sox_signalinfo_t): A sox_signalinfo_t type, which could be helpful if the audio type cannot be automatically determined. (Default: ``None``) encodinginfo (sox_encodinginfo_t, optional): A sox_encodinginfo_t type, which could be set if the audio type cannot be automatically determined. (Default: ``None``) filetype (str, optional): A filetype or extension to be set if sox cannot determine it automatically. (Default: ``None``) Example >>> data, sample_rate = torchaudio.load('foo.mp3') >>> torchaudio.save('foo.wav', data, sample_rate) """ ch_idx, len_idx = (0, 1) if channels_first else (1, 0) # check if save directory exists abs_dirpath = os.path.dirname(os.path.abspath(filepath)) if not os.path.isdir(abs_dirpath): raise OSError("Directory does not exist: {}".format(abs_dirpath)) # check that src is a CPU tensor check_input(src) # Check/Fix shape of source data if src.dim() == 1: # 1d tensors as assumed to be mono signals src.unsqueeze_(ch_idx) elif src.dim() > 2 or src.size(ch_idx) > 16: # assumes num_channels < 16 raise ValueError("Expected format where C < 16, but found {}".format( src.size())) # sox stores the sample rate as a float, though practically sample rates are almost always integers # convert integers to floats if not isinstance(signalinfo.rate, float): if float(signalinfo.rate) == signalinfo.rate: signalinfo.rate = float(signalinfo.rate) else: raise TypeError('Sample rate should be a float or int') # check if the bit precision (i.e. bits per sample) is an integer if not isinstance(signalinfo.precision, int): if int(signalinfo.precision) == signalinfo.precision: signalinfo.precision = int(signalinfo.precision) else: raise TypeError('Bit precision should be an integer') # programs such as librosa normalize the signal, unnormalize if detected if src.min() >= -1.0 and src.max() <= 1.0: src = src * (1 << 31) src = src.long() # set filetype and allow for files with no extensions extension = os.path.splitext(filepath)[1] filetype = extension[1:] if len(extension) > 0 else filetype # transpose from C x L -> L x C if channels_first: src = src.transpose(1, 0) # save data to file src = src.contiguous() _torch_sox.write_audio_file(filepath, src, signalinfo, encodinginfo, filetype)