示例#1
0
def test_main():
    config_path = "./ctc_config_ph6.yaml"
    config = load_config(config_path)
    data_cfg = config['data']
    print(config)
    logger = get_logger('./test.log')
    preproc = Preprocessor(data_cfg['dev_sets']['cv'], config['preproc'], logger,  max_samples=100, start_and_end=False)
    preproc.update()
    print("preproc: \n", preproc)

    check_empty_filename(preproc)
    check_run_from_AudioDataset(preproc, data_cfg['dev_sets']['cv'])
示例#2
0
 def __init__(self, output_dir, dataset_name, config_path=None):
     """
     Properties:
         num_examples (int): number of examples to be downloaded
         target_eq_guess (bool): if True, the target == guess criterion will filter the downloaded files
     """
     super().__init__(output_dir, dataset_name)
     config = load_config(config_path)
     lists_of_ids = [
         get_dataset_ids(data_path) for data_path in config['datasets']
     ]
     self.record_ids = [
         ids for list_of_ids in lists_of_ids for ids in list_of_ids
     ]
示例#3
0
 def __init__(self, output_dir, dataset_name, config_path=None):
     """
     Properties:
         num_examples (int): number of examples to be downloaded
         target_eq_guess (bool): if True, the target == guess criterion will filter the downloaded files
     """
     super().__init__(output_dir, dataset_name)
     config = load_config(config_path)
     self.num_examples = config['num_examples']
     self.target_eq_guess = config['target_eq_guess']
     self.check_constraints = config['check_constraints']
     self.constraints = config['constraints']
     self.days_from_today = config['days_from_today']
     self.disjoint_metadata_tsv = config['disjoint_metadata_tsv']
     self.disjoint_id_names = config['disjoint_id_names']
     self.disjoint_datasets = config['disjoint_datasets']
示例#4
0
def torch_to_onnx(
    model_name:str, 
    num_frames:int, 
    use_state_dict:bool, 
    return_models:bool=False)->None:
    """
    Arg:
        model_name (str): filename of the model
        num_frames (int): number of feature frames that will fix the model's size
        return_models (bool, False): if true, the function will return the torch and onnx model objects
    """  

    torch_path, config_path, onnx_path = pytorch_onnx_paths(model_name)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
    config = load_config(config_path)
    model_cfg = config['model']

    freq_dim = 257  #freq dimension out of log_spectrogram 
    vocab_size = 39
    time_dim = num_frames
    
    model_cfg.update({'blank_idx': config['preproc']['blank_idx']})
    torch_model = CTC_model(freq_dim, vocab_size, model_cfg) 

    state_dict = load_state_dict(torch_path, device=device)

    torch_model.load_state_dict(state_dict)
    torch_model.to(device)
    print("model on cuda?: ", torch_model.is_cuda)    
    
    torch_model.eval()    

    # create the tracking inputs
    hidden_size = config['model']['encoder']['rnn']['dim'] 
    input_tensor = generate_test_input("pytorch", model_name, time_dim, hidden_size) 

    # export the models to onnx
    torch_onnx_export(torch_model, input_tensor, onnx_path)
    print(f"Torch model sucessfully converted to Onnx at {onnx_path}")

    if return_models:
        onnx_model = onnx.load(onnx_path)
        return torch_model, onnx_model
示例#5
0
def torch_to_coreml(
    model_name:str, 
    num_frames:int, 
    use_state_dict:bool, 
    return_models:bool=False)->None:
    """
    Arg:
        model_name (str): filename of the model
        num_frames (int): number of feature frames that will fix the model's size
        return_models (bool, False): if true, the function will return the torch and onnx model objects
    """  

    torch_path, config_path, onnx_path = pytorch_onnx_paths(model_name)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
    config = load_config(config_path)
    model_cfg = config['model']

    freq_dim = 257  #freq dimension out of log_spectrogram 
    vocab_size = 39
    
    torch_model = CTC_model(freq_dim, vocab_size, model_cfg) 

    state_dict = load_state_dict(torch_path, device=device)

    torch_model.load_state_dict(state_dict)
    torch_model.to(device)
    print("model on cuda?: ", torch_model.is_cuda)    
    
    torch_model.eval()    

    # create the tracking inputs
    hidden_size = config['model']['encoder']['rnn']['dim'] 
    x, (h_in, c_in) = generate_test_input("pytorch", model_name, 31, hidden_size) 

    traced_model = torch.jit.trace(torch_model, (x, (h_in, c_in)))

    x_46, (h_46, c_46) = generate_test_input("pytorch", model_name, 46, hidden_size)

    out_46, (h_out_46, c_out_46) = traced_model(x_46, (h_46, c_46))

    if return_models:
        pass
示例#6
0
def _load_model(model_params: str,
                device) -> Tuple[torch.nn.Module, speech.loader.Preprocessor]:
    """
    This function will load the model, config, and preprocessing object and prepare the model and preproc for evaluation
    Args:
        model_path (dict): dict containing model path, tag, and filename
        device (torch.device): torch processing device
    Returns:
        torch.nn.Module: torch model
        preprocessing object (speech.loader.Preprocessor): preprocessing object
    """

    model_path, preproc_path, config_path = get_names(
        model_params['path'],
        tag=model_params['tag'],
        get_config=True,
        model_name=model_params['filename'])

    # load and update preproc
    preproc = read_pickle(preproc_path)
    preproc.update()

    # load and assign config
    config = load_config(config_path)
    model_cfg = config['model']
    model_cfg.update({'blank_idx': config['preproc']['blank_idx']
                      })  # creat `blank_idx` in model_cfg section

    # create model
    model = CTC_train(preproc.input_dim, preproc.vocab_size, model_cfg)

    state_dict = load_state_dict(model_path, device=device)
    model.load_state_dict(state_dict)

    model.to(device)
    # turn model and preproc to eval_mode
    model.set_eval()
    preproc.set_eval()

    return model, preproc
示例#7
0
def run_eval(
        model_path, 
        dataset_json, 
        batch_size=8, 
        tag="best", 
        model_name="model_state_dict.pth",
        device = None,
        add_filename=False, 
        add_maxdecode:bool=False, 
        formatted=False, 
        config_path = None, 
        out_file=None)->int:
    """
    calculates the  distance between the predictions from
    the model in model_path and the labels in dataset_json

    Args:
        model_path (str): path to the directory that contains the model,
        dataset_json (str): path to the dataset json file
        batch_size (int): number of examples to be fed into the model at once
        tag (str): string that prefixes the model_name.  if best,  the "best_model" is used
        model_name (str): name of the model, likely either "model_state_dict.pth" or "model"
        device (torch.device): device that the evaluation should run on
        add_filename (bool): if true, the filename is added to each example in `save_json`
        add_maxdecode (bool): if true, the predictions using max decoding will be added in addition 
            to the predictions from the ctc_decoder
        formatted (bool): if true, the `format_save` will be used instead of `json_save` where 
            `format_save` outputs a more human-readable output file
        config_path (bool): specific path to the config file, if the one in `model_path` is not desired
        out_file (str): path where the output file will be saved
    
    Returns:
        (int): returns the computed error rate of the model on the dataset
    """

    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model_path, preproc_path, config_path = get_names(model_path, tag=tag, model_name=model_name, get_config=True)
    
    # load and update preproc
    preproc = read_pickle(preproc_path)
    preproc.update()

    # load and assign config
    config = load_config(config_path)
    model_cfg = config['model']
    model_cfg.update({'blank_idx': config['preproc']['blank_idx']}) # creat `blank_idx` in model_cfg section


    # create model
    model = CTC_train(
        preproc.input_dim,
        preproc.vocab_size,
        model_cfg
    )

    state_dict = load_state_dict(model_path, device=device)
    model.load_state_dict(state_dict)
    
    ldr =  loader.make_loader(
        dataset_json,
        preproc, 
        batch_size
    )
    
    model.to(device)
    model.set_eval()
    print(f"preproc train_status before set_eval: {preproc.train_status}")
    preproc.set_eval()
    preproc.use_log = False
    print(f"preproc train_status after set_eval: {preproc.train_status}")


    results = eval_loop(model, ldr, device)
    print(f"number of examples: {len(results)}")
    #results_dist = [[(preproc.decode(pred[0]), preproc.decode(pred[1]), prob)] 
    #                for example_dist in results_dist
    #                for pred, prob in example_dist]
    results = [(preproc.decode(label), preproc.decode(pred), conf)
               for label, pred, conf in results]
    # maxdecode_results = [(preproc.decode(label), preproc.decode(pred))
    #           for label, pred in results]
    cer = speech.compute_cer(results, verbose=True)

    print("PER {:.3f}".format(cer))
    
    if out_file is not None:
        compile_save(results, dataset_json, out_file, formatted, add_filename)
    
    return round(cer, 3)
示例#8
0
    Args:
        dataset_paths (List[str]): a list of dataset paths (str)
    Returns:
        Dict[str, set]: a dict with the set of ids as values
    """
    data_dict = dict()

    for data_path in dataset_paths:
        # _extract_id on the data path will return the dataset name
        data_name = _extract_id(data_path)
        dataset = read_data_json(data_path)
        # set comprehension what extracts the record-id from each audiopath in the dataset
        id_set = {_extract_id(xmpl['audio']) for xmpl in dataset}
        data_dict.update({data_name: (id_set, len(dataset))})

    return data_dict


def _extract_id(record_path: str) -> str:
    #returns the basename of the path without the extension
    return os.path.basename(os.path.splitext(record_path)[0])


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Eval a speech model.")
    parser.add_argument(
        "--config", help="Path to config file containing the necessary inputs")
    args = parser.parse_args()
    config = load_config(args.config)
    verify_disjoint(config)