def main(model_name, checkpoint_name, url):
    config = Cfg.load_config_from_name(model_name)
    dataset_params = {
        'name': 'hw',
        'data_root': '/home/longhn',
        # 'train_root': '/home/fdm/Desktop/chungnph/vietocr/Annotation_2505',
        # 'val_root': '/home/fdm/Desktop/chungnph/vietocr/Annotation_2505',
        'train_annotation': f'{url}/train.txt',
        'valid_annotation': f'{url}/valid.txt'
    }

    params = {
        'print_every': 200,
        'valid_every': 10 * 200,
        'iters': 30000,
        'checkpoint': f'./checkpoint/{checkpoint_name}.pth',
        'export': f'./checkpoint/{checkpoint_name}.pth',
        'metrics': 15000,
        'batch_size': 32
    }
    dataloader_params = {'num_workers': 1}
    # config['pretrain']['cached'] = 'checkpoint/ngaycap_0204.pth'
    config['trainer'].update(params)
    config['dataset'].update(dataset_params)
    config['dataloader'].update(dataloader_params)
    config['device'] = 'cuda'
    config[
        'vocab'] = '''aAàÀảẢãÃáÁạẠăĂằẰẳẲẵẴắẮặẶâÂầẦẩẨẫẪấẤậẬbBcCdDđĐeEèÈẻẺẽẼéÉẹẸêÊềỀểỂễỄếẾệỆfFgGhHiIìÌỉỈĩĨíÍịỊjJkKlLmMnNoOòÒỏỎõÕóÓọỌôÔồỒổỔỗỖốỐộỘơƠờỜởỞỡỠớỚợỢpPqQrRsStTuUùÙủỦũŨúÚụỤưƯừỪửỬữỮứỨựỰvVwWxXyYỳỲỷỶỹỸýÝỵỴzZ0123456789!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ '''
    # config['weights'] = 'checkpoint/ngaycap_0204.pth'
    print(config)
    trainer = Trainer(config, pretrained=True)
    trainer.config.save(f'train_config/{checkpoint_name}.yml')
    trainer.train()
Exemplo n.º 2
0
def main(args):
    config.seed = args.seed
    random.seed(args.seed)
    np.random.seed(args.seed)
    if args.gpu_device >= 0:
        import torch
        torch.cuda.set_device(args.gpu_device)
    print("Reading...")
    g = read_hin(args.dataset)
    el = EdgeLabel(g)
    el.split(n_val=0.1, n_test=0.9 - args.train_ratio, seed=args.seed)

    if args.mode == 'DistMult':
        score = 'inner'
        p_reg = 2
        proj_name = 'distmult'
    elif args.mode == 'TransH1':
        score = 'l1'
        p_reg = 1
        proj_name = 'transh'
    elif args.mode == 'TransH2':
        score = 'l2'
        p_reg = 2
        proj_name = 'transh'

    t = Trainer(g, el, batch_size=args.batch_size, n_neighbor=args.n_nb, self_loop=True, score=score,
                proj_name=proj_name, p_reg=p_reg, l_reg=args.l_reg, degree=args.degree)
    t.run(lr=1e-4, patience=args.patience, max_steps=args.max_steps, cuda=True if args.gpu_device >= 0 else False)
Exemplo n.º 3
0
 def train(self, x_train, kb_words, y_train, x_valid=None, y_valid=None):
     trainer = Trainer(self.model,
                       self.kb_miner,
                       self.training_config,
                       checkpoint_path=self.log_dir,
                       preprocessor=self.p)
     trainer.train(x_train, kb_words, y_train, x_valid, y_valid)
Exemplo n.º 4
0
def vae_train(config):
    config['outdir'].mkdir(parents=True, exist_ok=True)

    # get device
    if config['use_gpu']:
        device = torch.device('cuda')
        # moving a tensor to GPU
        # useful at BUT cluster to prevent someone from getting the same GPU
        fake = torch.Tensor([1]).to(device)
    else:
        device = torch.device('cpu')

    dataset_class = get_dataset(config['dataset_type'])

    # compute or load mean and std of dataset
    trans = lambda x: logspec(x, **config['spectrum_conf'])
    dataset = dataset_class(config['dataset'], transform=trans)
    dataloader_meanstd = DataLoader(dataset)
    meanstd_norm = get_meanstd_norm(config['meanstd_norm_file'],
                                    dataloader_meanstd)

    # load the dataset
    trans = lambda x: meanstd_norm(logspec(x, **config['spectrum_conf']))
    dataset = dataset_class(config['dataset'], transform=trans)
    dataloader_train = DataLoader(dataset,
                                  batch_size=config['batch_size'],
                                  collate_fn=PadCollate(),
                                  shuffle=True)

    # create the model
    model = SeqVAESpeaker(**config['vae_conf']).to(device)

    # store model config
    with open(config['outdir'] / 'vae_config', 'w') as f:
        json.dump(config['vae_conf'], f, indent=2)

    # load loss function
    if config['vae_objective'] == 'elbo':
        loss = ELBOLoss(model).to(device)
    elif config['vae_objective'] == 'elbo_speakerid':
        loss = ELBOSpeakerLoss(model, config['speaker_loss_weight']).to(device)
    else:
        raise KeyError(f'Unknown objective {config["vae_objective"]}')

    # run training
    trainer = Trainer(model,
                      loss,
                      dataloader_train,
                      config['outdir'],
                      device=device,
                      **config['optimizer_conf'])
    trainer.run()
Exemplo n.º 5
0
    def __init__(self, trainX, trainY, penalty=0.00015):
        m, n = trainX.shape
        o, p = trainY.shape

        self.network = NeuralNetwork(n, n + 1, p, penalty)
        self.xMax = np.amax(trainX, axis=0)
        self.yMax = np.amax(trainY, axis=0)

        self.trainX = trainX / self.xMax
        self.trainY = trainY / self.yMax

        trainer = Trainer(self.network)
        trainer.train(self.trainX, self.trainY)
Exemplo n.º 6
0
def main(args):
    train_data, val_data, test_data, data_parser = read_dataset(args.data,
                                                                cuda=args.cuda)
    args.max_len = min(args.max_len, data_parser.output_max_len)
    input_dict, output_dict = data_parser.input_dict, data_parser.output_dict

    net = load_model(args.load_model,
                     input_dict,
                     output_dict,
                     load_last=args.load_last,
                     eval=True,
                     cuda=args.cuda)
    trainer = Trainer(net, cuda=args.cuda, batch_size=args.batch_size)

    if args.print_errors:
        print_errors(trainer, data_parser, test_data, args)

    train_acc, train_loss, train_time = get_loss_acc(trainer, data_parser,
                                                     train_data, args)
    print('%sTrain acc %.6f%s' % (bcolors.OKBLUE, train_acc, bcolors.ENDC))
    print('%sTrain Loss %.6f%s' % (bcolors.OKBLUE, train_loss, bcolors.ENDC))

    val_acc, val_loss, val_time = get_loss_acc(trainer, data_parser, val_data,
                                               args)
    print('%sEval acc %.6f%s' % (bcolors.MAGENTA, val_acc, bcolors.ENDC))
    print('%sEval Loss %.6f%s' % (bcolors.MAGENTA, val_loss, bcolors.ENDC))

    test_acc, test_loss, test_time = get_loss_acc(trainer, data_parser,
                                                  test_data, args)
    print('%sTest acc %.6f%s' % (bcolors.CYAN, test_acc, bcolors.ENDC))
    print('%sTest Loss %.6f%s' % (bcolors.CYAN, test_loss, bcolors.ENDC))

    if args.eval_time:
        print_times(train_time, val_time, test_time)
Exemplo n.º 7
0
    def fit(self,
            pairs,
            save_dir='./checkpoints',
            batch_size=64,
            teacher_forcing_ratio=1.0,
            learning_rate=0.0001,
            decoder_learning_ratio=5.0,
            n_iteration=4000,
            print_every=1,
            save_every=500,
            clip=50.0):

        tr = Trainer(self, teacher_forcing_ratio, self.loadFilename,
                     learning_rate, decoder_learning_ratio)
        tr.trainIters(pairs, save_dir, n_iteration, batch_size, print_every,
                      save_every, clip, self.corpus_name)
Exemplo n.º 8
0
def load_training_model_from_factory(configs, ngpu):
    if configs['model']['type'] == 'Encoder':
        from model.trainer import Trainer
        net, optimizer = load_training_net_from_factory(configs)
        loss = load_loss_from_factory(configs)
        trainer = Trainer(net, loss, configs['op']['loss'], optimizer, ngpu)
    elif configs['model']['type'] == 'GAN':
        from model.gan_trainer import Trainer
        sr_G, sr_D, optimizerG, optimizerD = load_training_net_from_factory(
            configs)
        g_loss, d_loss = load_loss_from_factory(configs)
        trainer = Trainer(sr_G, sr_D, g_loss, d_loss, optimizerG, optimizerD,
                          ngpu)
    else:
        raise Exception("Wrong model type!")

    return trainer
Exemplo n.º 9
0
def main(args):
    # gpu 하나일 때 / colab 기준 환경
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    weights_matrix = np.load(args.weights_matrix, allow_pickle=True)  # 새로 저장

    if args.model == 'bi-lstm':
        model = BiLSTM(weights_matrix).to(device)
        model_path = ''

    elif args.model == 'cnn':
        pass

    checkpoint = torch.load(args.model_path)
    state_dict = checkpoint['net']
    model.load_state_dict(state_dict=state_dict)

    cls = Trainer(args)
    cls.test(model, device)
Exemplo n.º 10
0
def main(config):
    logger = config.get_logger('train')

    # setup data_loader instances
    data_loader = config.init_obj('data_loader', module_data, split='train')
    valid_data_loader = config.init_obj('data_loader',
                                        module_data,
                                        split='dev')

    # build model architecture, then print to console
    model = config.init_obj('arch', module_arch)
    #logger.info(model)

    # get function handles of loss and metrics
    criterion = getattr(module_loss, config['loss'])
    sentence_metrics = [
        getattr(module_metric, met) for met in config['sentence_metrics']
    ]
    term_metrics = [
        getattr(module_metric, met) for met in config['term_metrics']
    ]

    # build optimizer, learning rate scheduler.
    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    #optimizer = config.init_obj('optimizer', torch.optim, trainable_params)
    optimizer = config.init_obj('optimizer', transformers.optimization,
                                trainable_params)

    lr_scheduler = config.init_obj('lr_scheduler', torch.optim.lr_scheduler,
                                   optimizer)

    trainer = Trainer(model,
                      criterion,
                      sentence_metrics,
                      term_metrics,
                      optimizer,
                      config=config,
                      data_loader=data_loader,
                      valid_data_loader=valid_data_loader)
    #lr_scheduler=lr_scheduler)

    trainer.train()
Exemplo n.º 11
0
def train(train_data, val_data, data_parser, args):
    input_dict, output_dict = data_parser.input_dict, data_parser.output_dict

    net = get_model(input_dict,
                    output_dict,
                    args.model,
                    args.hidden_size,
                    args.max_len,
                    args.layers,
                    args.dropout,
                    kernel_size=args.kernel_size,
                    dilate=args.dilate,
                    ignore_pad=args.ignore_pad,
                    multilinear=(not args.single_linear),
                    input_len=data_parser.input_max_len,
                    stride=args.stride,
                    attn_heads=args.attn_heads,
                    cuda=args.cuda)
    net.initialize_params(args.init_params)

    print(args)
    print()
    print(net)
    print('Number of Elements: %d' %
          (sum([x.view(-1).size(0) for x in net.parameters()])))

    trainer = Trainer(net,
                      optim_type=args.optim,
                      print_every=args.print_every,
                      cuda=args.cuda,
                      save_dir=args.save_dir,
                      save_every=args.save_every,
                      improve_wait=args.improve_wait,
                      batch_size=args.batch_size)
    if args.train_from:
        trainer.load_checkpoint(args.train_from)
        trainer.continue_training(data_parser,
                                  train_data,
                                  val_data,
                                  args.epochs,
                                  lr=args.lr,
                                  weight_decay=args.weight_decay)
    else:
        trainer.train_epochs(data_parser,
                             train_data,
                             val_data,
                             args.epochs,
                             lr=args.lr,
                             weight_decay=args.weight_decay)

    return trainer
Exemplo n.º 12
0
def main(args):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    weights_matrix = np.load(args.weights_matrix, allow_pickle=True)  # 새로 저장

    if args.model == 'bi-lstm':
        model = BiLSTM(weights_matrix).to(device)
        model_path = ''

    # TO DO
    elif args.model == 'cnn':
        pass

    trainer = Trainer(args)
    trainer.train(num_epochs=args.n_epochs,
                  model=model,
                  saved_dir=args.save_dir,
                  device=device,
                  criterion=torch.nn.BCELoss(),
                  optimizer=torch.optim.Adam(params=model.parameters(),
                                             lr=1e-5),
                  val_every=args.val_every)
Exemplo n.º 13
0
def load_model(trial_path):
    with open(trial_path + "/hyper.csv") as file:
        reader = csv.DictReader(file)
        for row in reader:
            hyper = dict(row)

    dataset = hyper['dataset']
    model = hyper['model']
    batch = int(hyper['batch'])
    units_conv = int(hyper['units_conv'])
    units_dense = int(hyper['units_dense'])
    num_layers = int(hyper['num_layers'])
    loss = hyper['loss']
    pooling = hyper['pooling']
    std = float(hyper['data_std'])
    mean = float(hyper['data_mean'])

    # Load model
    trainer = Trainer(dataset)
    trainer.load_data(batch=batch)
    trainer.data.std = std
    trainer.data.mean = mean
    trainer.load_model(model,
                       units_conv=units_conv,
                       units_dense=units_dense,
                       num_layers=num_layers,
                       loss=loss,
                       pooling=pooling)

    # Load best weight
    trainer.model.load_weights(trial_path + "/best_weight.hdf5")
    print("Loaded Weights from {}".format(trial_path + "/best_weight.hdf5"))

    return trainer, hyper
Exemplo n.º 14
0
def main() -> None:
    args = TrainArgs.get_args()
    train_dataset, dev_dataset, vectors = get_datasets(args)
    training_config = get_training_config(args)
    with open(f"{args.run_name}_config.json", "w") as config_file:
        json.dump(vars(args), config_file, indent=2)

    try:
        print(f"Attempting to load model to train from {args.run_name}.pth")
        model = t.load(f"{args.run_name}.pth").to(training_config.device)
    except IOError as e:
        print(f"Can't load model: {e}, initializing from scratch")
        model = initialize_model(args, train_dataset, vectors)

    Trainer.train_model(model,
                        train_dataset,
                        dev_dataset,
                        training_config,
                        debug=args.debug)
    dev_answers = Trainer.answer_dataset(dev_dataset, model, training_config)
    gold_answers = dev_dataset.get_gold_answers()
    qid_to_answers = {}
    for qid, model_answer in dev_answers.items():
        qid_to_answers[qid] = {
            "model_answer": model_answer,
            "gold_answer": gold_answers[qid],
        }
    with open("dev-pred.json", "w") as f:
        json.dump(dev_answers, f)
    with open("dev-pred-with-gold.json", "w") as f:
        json.dump(qid_to_answers, f)
    print("Final evaluation on dev")
    eval_results = Trainer.evaluate_on_squad_dataset(dev_dataset, model,
                                                     training_config)
    print(eval_results)

    print(f"Saving model to {args.run_name}.pth")
    t.save(model, f"{args.run_name}.pth")
Exemplo n.º 15
0
def main(config):
    logger = config.get_logger('train')

    # setup data_loader instances
    data_loader = config.init_obj('data_loader', module_data, split='train')
    valid_data_loader = config.init_obj('data_loader',
                                        module_data,
                                        split='dev')
    #valid_data_loader = None

    # build model architecture, then print to console
    model = config.init_obj('arch',
                            module_arch,
                            num_classes=data_loader.dataset.num_classes,
                            vocab_size=len(data_loader.dataset.tokenizer))
    #logger.info(model)

    # get function handles of loss and metrics
    criterion = getattr(module_loss, config['loss'])
    metrics = [getattr(module_metric, met) for met in config['metrics']]

    # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler
    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = config.init_obj('optimizer', transformers.optimization,
                                trainable_params)

    lr_scheduler = config.init_obj('lr_scheduler', torch.optim.lr_scheduler,
                                   optimizer)
    trainer = Trainer(model,
                      criterion,
                      metrics,
                      optimizer,
                      config=config,
                      data_loader=data_loader,
                      valid_data_loader=valid_data_loader)
    #lr_scheduler=lr_scheduler)

    trainer.train()
Exemplo n.º 16
0
def main():
    gpus = get_gpus()
    print("GPUs: {}".format(gpus))

    args = get_user_args()
    configs = parser.parse(args.config)
    params = setup(configs)

    data_loader = DataLoader()
    train_datagen = data_loader.get_datagen('train', params)
    val_datagen = data_loader.get_datagen('val', params)

    model = Model(params)
    model.create_model()
    model.inspect()

    trainer = Trainer(
        model.model,
        train_datagen,
        val_datagen,
        params,
    )

    pprint(params)
    trainer.add_callbacks(params)
    trainer.train(params)
    trainer.model.save(
        **params["trained_model"]
    )

    test_datagen = data_loader.get_datagen('test', params)
    step_size = test_datagen.n // test_datagen.batch_size
    preds = trainer.model.predict_generator(
        test_datagen, verbose=1, steps=step_size
    )
    predicted_class_indices = np.argmax(preds, axis=1)
    labels = dict((v, k) for k, v in (train_datagen.class_indices).items())
    predictions = [labels[k] for k in predicted_class_indices]
    filenames = test_datagen.filenames
    results = pd.DataFrame(
        {
            "file": filenames,
            "prediction": predictions,
            "prediction_class": predicted_class_indices,
            "cat_proba": [p[0] for p in preds],
            "no_cat_proba": [p[1] for p in preds],
            "label": test_datagen.classes,
        }
    )
    results_fp = params["results_fp"]
    results.to_csv(f"{results_fp}", index=False)
    print(results)
Exemplo n.º 17
0
def load_best_model(model_dir, model_type="predictor"):
    model_file = model_dir + "/best_model.pt"
    print("Loading model from {}".format(model_file))
    model_opt = torch_utils.load_config(model_file)
    if model_type == "predictor":
        predictor = Predictor(model_opt)
        model = Trainer(model_opt, predictor, model_type=model_type)
    else:
        selector = Selector(model_opt)
        model = Trainer(model_opt, selector, model_type=model_type)
    model.load(model_file)
    helper.print_config(model_opt)
    return model
Exemplo n.º 18
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', required=True, help='see example at ')
    parser.add_argument('--checkpoint', required=False, help='your checkpoint')

    args = parser.parse_args()
    config = Cfg.load_config_from_file(args.config)

    trainer = Trainer(config)

    if args.checkpoint:
        trainer.load_checkpoint(args.checkpoint)
        
    trainer.train()
Exemplo n.º 19
0
def get_training_config(args: TrainArgs) -> Trainer.TrainingConfig:
    """
    Parse the command line args builds a TrainingConfig object
    :param args: TrainArgs object containing invocation parameters
    :returns: A well formatted TrainingConfig object that can be used
        for training the model
    """
    return Trainer.TrainingConfig(
        learning_rate=args.lr,
        weight_decay=args.weight_decay,
        max_grad_norm=args.max_grad_norm,
        ema_weight=args.ema_weight,
        num_epochs=args.num_epochs,
        batch_size=args.batch_size,
        max_question_size=args.max_question_size,
        max_context_size=args.max_context_size,
        device=get_device(args.disable_cuda),
        loader_num_workers=args.loader_num_workers,
        model_checkpoint_path=args.run_name,
    )
Exemplo n.º 20
0
def main():
    args = parse_args()
    # load model
    model_file = args.model_file
    print("Loading model from {}...".format(model_file))
    trainer = Trainer(model_file=model_file)
    opt, vocab = trainer.opt, trainer.vocab
    trainer.model.eval()
    print("Loaded.\n")

    # run
    while True:
        background, findings = get_input(opt)
        sum_words = run(background, findings, trainer, vocab, opt)
        print(blue("Predicted Impression:\n") + " ".join(sum_words))
        print("")
        inp = input("Quit (q to quit, otherwise continue)? ")
        if inp == 'q':
            break
    return
Exemplo n.º 21
0
def get_trainer(config, model):
    optimizer_params = {
        'lr': config['lr'],
        'lr_decay': config['lr_decay'],
        'weight_decay': config['weight_decay'],
        'amsgrad': config['amsgrad']
    }
    loss_params = {
        'smoothing': config['smoothing'],
        'lm_weight': config['lm_weight'],
        'cls_weight': config['cls_weight']
    }
    amp_params = {
        'opt_level': config['opt_level'],
        'loss_scale': config['loss_scale']
    }
    checkpoint_dir = config['checkpoint_dir']
    device = config['device']
    n_jobs = config['n_jobs']

    trainer = Trainer(model, optimizer_params, loss_params, amp_params,
                      checkpoint_dir, device, n_jobs)

    return trainer
Exemplo n.º 22
0
    def test_mh_model(self):
        path_captions = ProjectPaths.text_representation_path
        path_embedding_captions = ProjectPaths.embedding_captions_path
        path_embedding_images = ProjectPaths.embedding_images_path
        text_representation = TextRepresentation(path_captions)

        features = ImageRepresentation.load_features(path_embedding_images)

        model_builder = ModelBuilder(text_representation, path_embedding_captions)

        trainer = Trainer(text_representation, features, model_builder, loss='mh', mini_batch_size=50)

        trainer.train(epochs=40)

        trainer.save(
            image_name=ProjectPaths.image_model_mh_path,
            caption_name=ProjectPaths.caption_model_mh_path,
            caption_representations_name=ProjectPaths.caption_representations_name,
            image_representations_name=ProjectPaths.image_representations_name
        )
Exemplo n.º 23
0
import sys
sys.path.append('../../')
from model.trainer import Trainer

if __name__ == "__main__":
    trainer = Trainer(None)

    target_parameters = {
        "units_conv": 128,
        "units_dense": 128,
        "pooling": "max",
        "num_layers": 2,
        "name": "target"
    }
    molecule_parameters = {
        "units_conv": 128,
        "units_dense": 128,
        "pooling": "max",
        "num_layers": 2,
        "name": "molecule"
    }

    hyperparameters = {
        "epoch": 20,
        "batch": 16,
        "fold": 10,
        "loss": "binary_crossentropy",
        "monitor": "val_roc",
        "label": "",
        "target_parameters": target_parameters,
        "molecule_parameters": molecule_parameters
Exemplo n.º 24
0
    model_dir = os.path.join(args.finetune_model, "nnet")

    # Set the random seed. The random operations may appear in data input, batch forming, etc.
    tf.set_random_seed(params.seed)
    random.seed(params.seed)
    np.random.seed(params.seed)

    dim = FeatureReader(args.train_dir).get_dim()
    if "selected_dim" in params.dict:
        dim = params.selected_dim

    with open(os.path.join(model_dir, "feature_dim"), "w") as f:
        f.write("%d\n" % dim)

    num_total_train_speakers = KaldiDataRandomQueue(args.train_dir, args.train_spklist).num_total_speakers
    tf.logging.info("There are %d speakers in the training set and the dim is %d" % (num_total_train_speakers, dim))

    min_valid_loss = ValidLoss()

    # The trainer is used to control the training process
    trainer = Trainer(params, args.finetune_model, dim, num_total_train_speakers)
    trainer.build("train")
    trainer.build("valid")

    # Load the pre-trained model and transfer to current model
    trainer.get_finetune_model(params.noload_var_list)

    trainer.train_tune_lr(args.train_dir, args.train_spklist, args.tune_period)
    trainer.close()
    tf.logging.info("Finish tuning.")
Exemplo n.º 25
0
    nnet_dir = os.path.join(args.model_dir, "nnet")

    config_json = os.path.join(args.model_dir, "nnet/config.json")
    if not os.path.isfile(config_json):
        sys.exit("Cannot find params.json in %s" % config_json)
    params = Params(config_json)

    # Change the output node if necessary
    if len(args.node) != 0:
        params.embedding_node = args.node
    tf.logging.info("Extract embedding from %s" % params.embedding_node)

    with open(os.path.join(nnet_dir, "feature_dim"), "r") as f:
        dim = int(f.readline().strip())
    trainer = Trainer(params, args.model_dir, dim, single_cpu=True)
    trainer.build("predict")

    if args.rspecifier.rsplit(".", 1)[1] == "scp":
        # The rspecifier cannot be scp
        sys.exit("The rspecifier must be ark or input pipe")

    fp_out = open_or_fd(args.wspecifier, "wb")
    for index, (key, feature) in enumerate(read_mat_ark(args.rspecifier)):
        if feature.shape[0] < args.min_chunk_size:
            tf.logging.info("[INFO] Key %s length too short, %d < %d, skip." %
                            (key, feature.shape[0], args.min_chunk_size))
            continue
        if feature.shape[0] > args.chunk_size:
            feature_array = []
            feature_length = []
Exemplo n.º 26
0
if __name__ == '__main__':
    tf.logging.set_verbosity(tf.logging.INFO)
    args = parser.parse_args()
    params = save_codes_and_config(True, args.model, None)

    # The model directory always has a folder named nnet
    model_dir = os.path.join(args.model, "nnet")

    # Set the random seed. The random operations may appear in data input, batch forming, etc.
    tf.set_random_seed(params.seed)
    random.seed(params.seed)
    np.random.seed(params.seed)

    dim = FeatureReader(args.data_dir).get_dim()
    if "selected_dim" in params.dict:
        dim = params.selected_dim

    with open(args.data_spklist, 'r') as f:
        num_total_train_speakers = len(f.readlines())
    trainer = Trainer(params, args.model, dim, num_total_train_speakers)
    trainer.build("valid")
    valid_loss, valid_embeddings, valid_labels = trainer.insight(
        args.data_dir,
        args.data_spklist,
        batch_type=params.batch_type,
        output_embeddings=True)
    eer = compute_cos_pairwise_eer(valid_embeddings, valid_labels)
    tf.logging.info("EER: %f" % eer)
    trainer.close()
    dim = FeatureReader(args.train_dir).get_dim()
    with open(os.path.join(model_dir, "feature_dim"), "w") as f:
        f.write("%d\n" % dim)
    num_total_train_speakers = KaldiDataRandomQueue(
        args.train_dir, args.train_spklist).num_total_speakers
    tf.logging.info(
        "There are %d speakers in the training set and the dim is %d" %
        (num_total_train_speakers, dim))

    # Load the history valid loss
    min_valid_loss = ValidLoss()
    if os.path.isfile(os.path.join(model_dir, "valid_loss")):
        min_valid_loss = load_valid_loss(os.path.join(model_dir, "valid_loss"))

    # The trainer is used to control the training process
    trainer = Trainer(params, args.model)
    trainer.build("train",
                  dim=dim,
                  loss_type=params.loss_func,
                  num_speakers=num_total_train_speakers)
    trainer.build("valid",
                  dim=dim,
                  loss_type=params.loss_func,
                  num_speakers=num_total_train_speakers)

    if "early_stop_epochs" not in params.dict:
        params.dict["early_stop_epochs"] = 10
    if "min_learning_rate" not in params.dict:
        params.dict["min_learning_rate"] = 1e-5

    for epoch in range(start_epoch, params.num_epochs):
Exemplo n.º 28
0
parser.add_argument('--cuda', type=bool, default=torch.cuda.is_available())
parser.add_argument('--cpu', action='store_true')
args = parser.parse_args()

torch.manual_seed(args.seed)
random.seed(1234)
if args.cpu:
    args.cuda = False
elif args.cuda:
    torch.cuda.manual_seed(args.seed)

# load opt
model_file = args.model_dir + '/' + args.model
print("Loading model from {}".format(model_file))
opt = torch_utils.load_config(model_file)
trainer = Trainer(opt)
trainer.load(model_file)

# load vocab
vocab_file = args.model_dir + '/vocab.pkl'
vocab = Vocab(vocab_file, load=True)
assert opt[
    'vocab_size'] == vocab.size, "Vocab size must match that in the saved model."

char_vocab_file = args.model_dir + '/vocab_char.pkl'
char_vocab = Vocab(char_vocab_file, load=True)
assert opt[
    'char_vocab_size'] == char_vocab.size, "Char vocab size must match that in the saved model."

# load data
data_file = opt['data_dir'] + '/{}.jsonl'.format(args.dataset)
Exemplo n.º 29
0
from model.trainer import Trainer

if __name__ == "__main__":
    trainer = Trainer("hiv")

    hyperparameters = {
        "epoch": 150,
        "batch": 16,
        "fold": 10,
        "units_conv": 128,
        "units_dense": 128,
        "pooling": "max",
        "num_layers": 2,
        "loss": "binary_crossentropy",
        "monitor": "val_roc",
        "label": ""
    }

    features = {
        "use_atom_symbol": True,
        "use_degree": True,
        "use_hybridization": True,
        "use_implicit_valence": True,
        "use_partial_charge": True,
        "use_ring_size": True,
        "use_hydrogen_bonding": True,
        "use_acid_base": True,
        "use_aromaticity": True,
        "use_chirality": True,
        "use_num_hydrogen": True
    }
Exemplo n.º 30
0
# test_file = 'data/annotated_ner_data/StackOverflow/test.txt'
# dataset = reader.read_txt(train_file, -1)
# devs = reader.read_txt(dev_file, -1)
# tests = reader.read_txt(test_file, -1)

dataset = reader.read_txt(conf.train_all_file, -1)
# devs = reader.read_txt(conf.dev_file, -1)
tests = reader.read_txt(conf.test_file, -1)
print(len(dataset))

# setting for data
conf.use_iobes(dataset)
# conf.use_iobes(devs)
conf.use_iobes(tests)

conf.build_label_idx(dataset)
conf.build_word_idx(dataset, None, tests)
conf.build_emb_table()

conf.map_insts_ids(dataset)
# conf.map_insts_ids(devs)
conf.map_insts_ids(tests)

random.shuffle(dataset)

model = BertCRF(conf).to(conf.device)

trainer = Trainer(model, conf, None, tests, use_crf=True)
model = trainer.train_model(conf.num_epochs, dataset)
# torch.save(model.state_dict(), 'model/softparams.pt')