assert np.array_equal( test_labels_acoustic, test_labels_linguistic ), "Labels for acoustic and linguistic datasets are not the same!" """Choosing hardware""" device = 'cuda' if torch.cuda.is_available() else 'cpu' if device == "cuda": print( "Using GPU. Setting default tensor type to torch.cuda.FloatTensor") torch.set_default_tensor_type("torch.cuda.FloatTensor") else: print("Using CPU. Setting default tensor type to torch.FloatTensor") torch.set_default_tensor_type("torch.FloatTensor") """Converting model to specified hardware and format""" acoustic_cfg_json = json.load( open(args.acoustic_model.replace(".torch", ".json"), "r")) acoustic_cfg = AcousticConfig.from_json(acoustic_cfg_json) acoustic_model = CNN(acoustic_cfg) acoustic_model.float().to(device) try: acoustic_model.load_state_dict(torch.load(args.acoustic_model)) except: print( "Failed to load model from {} without device mapping. Trying to load with mapping to {}" .format(args.acoustic_model, device)) acoustic_model.load_state_dict( torch.load(args.acoustic_model, map_location=device)) linguistic_cfg_json = json.load( open(args.linguistic_model.replace(".torch", ".json"), "r")) linguistic_cfg = LinguisticConfig.from_json(linguistic_cfg_json)
params["n_layers"] = np.random.randint(1, 4) params["hidden_dim"] = np.random.randint(10, 100) params["dropout"] = 0.5 + np.random.rand() * 0.4 params["dropout2"] = 0.5 + np.random.rand() * 0.45 params["reg_ratio"] = np.random.rand() * 0.0015 params["batch_size"] = np.random.randint(26, 256) params["bidirectional"] = bool(np.random.randint(0, 2)) cfg = AcousticLLDConfig(**params) model = RNN(cfg) elif args.model_type == "acoustic-spectrogram": test_features, test_labels, val_features, val_labels, train_features, train_labels = load_spectrogram_dataset( ) params["fc_size"] = np.random.randint(10, 200) params["dropout"] = 0.3 + np.random.rand() * 0.6 cfg = AcousticSpectrogramConfig(**params) model = CNN(cfg) else: raise Exception( "model_type parameter has to be one of [linguistic|acoustic-lld|acoustic-spectrogram]" ) print( "Subsets sizes: test_features:{}, test_labels:{}, val_features:{}, val_labels:{}, train_features:{}, train_labels:{}" .format(test_features.shape[0], test_labels.shape[0], val_features.shape[0], val_labels.shape[0], train_features.shape[0], train_labels.shape[0])) """Converting model to specified hardware and format""" model.float() model = model.to(get_device())
parser = argparse.ArgumentParser() parser.add_argument("-m", "--model_type", type=str, default="linguistic") args = parser.parse_args() if args.model_type == "linguistic": cfg = LinguisticConfig() test_features, test_labels, val_features, val_labels, train_features, train_labels = load_linguistic_dataset( ) model = RNN(cfg) elif args.model_type == "acoustic-lld": cfg = AcousticLLDConfig() test_features, test_labels, val_features, val_labels, train_features, train_labels = load_acoustic_features_dataset( ) model = RNN(cfg) elif args.model_type == "acoustic-spectrogram": cfg = AcousticSpectrogramConfig() test_features, test_labels, val_features, val_labels, train_features, train_labels = load_spectrogram_dataset( ) model = CNN(cfg) else: raise Exception( "model_type parameter has to be one of [linguistic|acoustic-lld|acoustic-spectrogram]" ) print( "Subsets sizes: test_features:{}, test_labels:{}, val_features:{}, val_labels:{}, train_features:{}, train_labels:{}" .format(test_features.shape[0], test_labels.shape[0], val_features.shape[0], val_labels.shape[0], train_features.shape[0], train_labels.shape[0])) """Running training""" run_training(model, cfg, test_features, test_labels, train_features,
dtype=np.int16) sd.wait() # Wait until recording is finished print("Recording finished") write(TMP_FILENAME, SAMPLE_RATE, myrecording) # Save as WAV file from deepspeech_generator import speech_to_text transcription = speech_to_text(join(args.deepspeech, "output_graph.pbmm"), join(args.deepspeech, "alphabet.txt"), join(args.deepspeech, "lm.binary"), join(args.deepspeech, "trie"), TMP_FILENAME) print(transcription) """Converting model to specified hardware and format""" acoustic_cfg_json = json.load( open(args.acoustic_model.replace(".torch", ".json"), "r")) acoustic_cfg = AcousticSpectrogramConfig.from_json(acoustic_cfg_json) acoustic_model = CNN(acoustic_cfg) acoustic_model.float().to("cpu") try: acoustic_model.load_state_dict(torch.load(args.acoustic_model)) except: print( "Failed to load model from {} without device mapping. Trying to load with mapping to {}" .format(args.acoustic_model, "cpu")) acoustic_model.load_state_dict( torch.load(args.acoustic_model, map_location="cpu")) acoustic_model.eval()