def load_model(device, model_path, use_half): model = DeepSpeech.load_model(model_path) model.eval() model = model.to(device) if use_half: model = model.half() return model
parser.add_argument('--lm-num-alphas', default=45, type=float, help='Number of alpha candidates for tuning') parser.add_argument('--lm-num-betas', default=8, type=float, help='Number of beta candidates for tuning') parser = add_decoder_args(parser) args = parser.parse_args() if args.lm_path is None: print("error: LM must be provided for tuning") sys.exit(1) model = DeepSpeech.load_model(args.model_path) saved_output = np.load(args.saved_output) def init(beam_width, blank_index, lm_path): global decoder decoder = BeamCTCDecoder(model.labels, lm_path=lm_path, beam_width=beam_width, num_processes=args.lm_workers, blank_index=blank_index) def decode_dataset(params): lm_alpha, lm_beta = params
import argparse import numpy as np from deepspeech.model import DeepSpeech from deepspeech.data.data_loader import SpectrogramParser from noswear.model import load_model parser = argparse.ArgumentParser() parser.add_argument('audio_file', type=argparse.FileType('r'), help='File to classify') args = parser.parse_args() base_model = DeepSpeech.load_model('models/librispeech_pretrained.pth') audio_conf = DeepSpeech.get_audio_conf(base_model) parser = SpectrogramParser(audio_conf, normalize=True) net = load_model(base_model, {'f_pickle': 'models/binary_clf.pkl'}) print(net) fpath = args.audio_file.name audio = parser.parse_audio(fpath) X = {'lens': np.array([audio.shape[1]]), 'X': np.array(audio)[None]} y_pred = net.predict(X) print(y_pred[0] and 'swear! :(' or 'noswear :)')