def __getitem__(self, index): x = self.load_sample(self.files[index]) x = VoiceActivityDetector.from_picture_to_tensor(x) if self.mode == 'test': return x else: label_id = VoiceActivityDetector.LABEL_TO_IDX[self.labels[index]] return x, label_id
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('model_path', type=str, help='The path where model is stored') parser.add_argument('audio_path', type=str, help='The path to the audio file to be processed') args = parser.parse_args() print(f'Processing on: {VoiceActivityDetector.DEVICE}') # ======================================================== detector = VoiceActivityDetector() detector.load(args.model_path) rate, signal, labels = load_labeled_audio(args.audio_path) signal = signal[int(0 * rate):int(30 * rate)] labels = labels[int(0 * rate):int(30 * rate)] detector.setup(rate) buffer_sizes = list(range(20, 151, 5)) # ms ratios = [] for buffer_size in buffer_sizes: print(f'buffer_size = {buffer_size}') st = time.time()
parser.add_argument('--net-window-size', type=float, default=0.05, help='Window size of neural network in seconds') parser.add_argument( '--net-step-size-ratio', type=float, default=0.5, help= 'Step size ratio of neural network: percentage of window size for neural network in [0, 1]' ) parser.add_argument('model_path', type=str, help='The path where model will be saved') parser.add_argument('--arc', type=str, default='cnn', help='Architecture type of neural network') args = parser.parse_args() params: dict = copy.deepcopy(vars(args)) params.pop('model_path') params.pop('arc') detector = VoiceActivityDetector(params) print(f'Saving model...\n{detector}') detector.save(args.model_path) print('Done')
) parser.add_argument( '--cuda', type=bool, default=True ) args = parser.parse_args() if args.cuda and torch.cuda.is_available(): VoiceActivityDetector.DEVICE = torch.device('cuda') else: VoiceActivityDetector.DEVICE = torch.device('cpu') print(f'Processing on: {VoiceActivityDetector.DEVICE}') detector = VoiceActivityDetector() detector.load(args.model_path) dataset_dir = Path(args.dataset_dir) dataset_paths = sorted(list(dataset_dir.rglob('*.png'))) labels = [path.parent.name for path in dataset_paths] X_train, X_val, y_train, y_val = train_test_split( dataset_paths, labels, test_size=args.val_ratio, shuffle=True ) train_dataset = TrainVadDataset(X_train, mode='train') val_dataset = TrainVadDataset(X_val, mode='val')
default=0.05, help='The buffer size for audio pieces in seconds') args = parser.parse_args() print(args.cuda) if args.cuda and torch.cuda.is_available(): print('???') VoiceActivityDetector.DEVICE = torch.device('cuda') else: VoiceActivityDetector.DEVICE = torch.device('cpu') print(f'Processing on: {VoiceActivityDetector.DEVICE}') # ======================================================== detector = VoiceActivityDetector() detector.load(args.model_path) rate, signal, labels = load_labeled_audio(args.audio_path) # signal = signal[int(500 * rate): int(750 * rate)] # labels = labels[int(500 * rate): int(750 * rate)] ts = np.linspace(0, len(signal) / rate, num=len(signal)) detector.setup(rate) stream_buffer = StreamBuffer(rate) buffer_size_f = int(np.ceil(rate * args.buffer_size)) signal_size_f = len(signal)
def __getitem__(self, index): pxl_l = self.pxl_ls[index] x = spectrogram[:, pxl_l:pxl_l + sample_pxl_width, :] x = Image.fromarray(x) x = VoiceActivityDetector.from_picture_to_tensor(x) return x
'--mat-output-path', type=str, help='The path to the .mat file where labels will be stored') parser.add_argument('--device', type=str, help='Device type for computations', default='cuda') parser.add_argument( '--statistics-path', type=str, help= 'The path to the file where statistics of processing will be stored') args = parser.parse_args() # ======================================================== detector = VoiceActivityDetector() detector.load(args.model_path) rate, signal, labels = load_labeled_audio(args.audio_path) ts = np.linspace(0, len(signal) / rate, num=len(signal)) # ======================================================== spectrogram = build_spectrogram( signal, rate, n_filters=detector.params['n_filters'], window_size_s=detector.params['window_size'], step_size_ratio=detector.params['step_size_ratio']) # ======================================================== net_window_size_f = int(rate * detector.params['net_window_size'])