Exemplo n.º 1
0
def main(config, resume):
    train_logger = Logger()

    # Build model architecture
    model = get_instance(module_arch, 'arch', config)
    img_sz = config["train_loader"]["args"]["resize"]
    model.summary(input_shape=(3, img_sz, img_sz))

    # Setup data_loader instances
    train_loader = get_instance(module_data, 'train_loader', config).loader
    valid_loader = get_instance(module_data, 'valid_loader', config).loader

    # Get function handles of loss and metrics
    loss = getattr(module_loss, config['loss'])
    metrics = [getattr(module_metric, met) for met in config['metrics']]

    # Build optimizer, learning rate scheduler.
    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = get_instance(torch.optim, 'optimizer', config,
                             trainable_params)
    lr_scheduler = get_instance(torch.optim.lr_scheduler, 'lr_scheduler',
                                config, optimizer)

    # Create trainer and start training
    trainer = Trainer(model,
                      loss,
                      metrics,
                      optimizer,
                      resume=resume,
                      config=config,
                      data_loader=train_loader,
                      valid_data_loader=valid_loader,
                      lr_scheduler=lr_scheduler,
                      train_logger=train_logger)
    trainer.train()
Exemplo n.º 2
0
def main(config, resume):
    train_logger = Logger()

    # setup data_loader instances
    data_loader = get_instance(module_data, 'data_loader', config)
    valid_data_loader = data_loader.split_validation()

    # build model architecture
    model = get_instance(module_arch, 'arch', config)
    print(model)

    # get function handles of loss and metrics
    loss = [getattr(module_loss, l) for l in config['loss']]
    metrics = [getattr(module_metric, met) for met in config['metrics']]

    # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler
    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = get_instance(torch.optim, 'optimizer', config, trainable_params)
    Lambda = [l for l in config['Lambda']]

    lr_scheduler = get_instance(torch.optim.lr_scheduler, 'lr_scheduler', config, optimizer)

    trainer = Trainer(model, loss, Lambda, metrics, optimizer,
                      resume=resume,
                      config=config,
                      data_loader=data_loader,
                      valid_data_loader=valid_data_loader,
                      lr_scheduler=lr_scheduler,
                      train_logger=train_logger)

    trainer.train()
Exemplo n.º 3
0
def main(config):
    device = torch.device('cuda')

    logger = config.get_logger('trainer')

    dataset = config.initialize('dataset', dataset_module)
    train_loader = config.initialize('data_loader', data_loader_module,
                                     dataset)
    test_loader = train_loader.get_test_loader()

    model = Model().to(device)
    logger.info(model)

    metrics = [getattr(metric_module, mtr) for mtr in config['metrics']]
    loss = getattr(F, config['loss'])

    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = config.initialize('optimizer', torch.optim, trainable_params)
    scheduler = config.initialize('scheduler', torch.optim.lr_scheduler,
                                  optimizer)

    trainer = Trainer(model,
                      loss,
                      metrics,
                      config,
                      optimizer,
                      train_loader,
                      test_loader=test_loader,
                      scheduler=scheduler)
    trainer.train()
Exemplo n.º 4
0
def main():
    # Define a curriculum for the training
    curriculum = Curriculum()
    curriculum.add(Program({"n_units": 1000, "n_outs": 10, "batch_size": 64, "epoch": 1}, [Target.lte("loss", 0.19), Target.gte("accuracy", 0.941)]))
    curriculum.add(Program({"n_units": 1000, "n_outs": 10, "batch_size": 100, "epoch": 1}, [Target.lte("loss", 0.19), Target.gte("accuracy", 0.941)]))
    curriculum.add(Program({"n_units": 1000, "n_outs": 10, "batch_size": 128, "epoch": 1}, [Target.lte("loss", 0.19), Target.gte("accuracy", 0.941)]))

    # Define figures to visualize the training
    # VisualizerDefinition: x_axis = activites.key, y_axis = results.key
    visualizer = Visualizer([
        VisualizerDefinition("batch_size", "loss", "Compare loss for hyperparameter: batch_size, iteration #1"),
        VisualizerDefinition("batch_size", "accuracy", "Compare accuracy for hyperparameter: accuracy, iteration #1")
    ])

    # Defines the trainer
    trainer = Trainer(curriculum, visualizer)

    # Defines the trainer callback
    # Called at each training iteration and should call the network with the parameters defined in the activity
    def trainer_callback(activities):
        sum_loss, sum_accuracy = network(activities["n_units"], activities["n_outs"], activities["batch_size"], activities["epoch"])

        # Values that will be used in trainer and will be compared to the Targets
        return {"loss": sum_loss, "accuracy": sum_accuracy}

    trainer.train("./example/output", trainer_callback)
Exemplo n.º 5
0
def main(config, resume):
    train_logger = Logger()

    model = get_instance(module_arch, 'arch', config)
    img_sz = config["train_loader"]["args"]["resize"]
    if config["arch"]["type"] != "HighResolutionNet":
        model.summary(input_shape=(3, img_sz, img_sz))
    else:
        dump_input = torch.rand((1, 3, img_sz, img_sz))
        print(get_model_summary(model, dump_input, verbose=True))

    train_loader = get_instance(module_data, 'train_loader', config).loader
    valid_loader = get_instance(module_data, 'valid_loader', config).loader

    loss = getattr(module_loss, config['loss'])
    metrics = [getattr(module_metric, met) for met in config['metrics']]

    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = get_instance(torch.optim, 'optimizer', config,
                             trainable_params)
    lr_scheduler = get_instance(torch.optim.lr_scheduler, 'lr_scheduler',
                                config, optimizer)

    trainer = Trainer(model,
                      loss,
                      metrics,
                      optimizer,
                      resume=resume,
                      config=config,
                      data_loader=train_loader,
                      valid_data_loader=valid_loader,
                      lr_scheduler=lr_scheduler,
                      train_logger=train_logger)
    trainer.train()
Exemplo n.º 6
0
def train(cfg):
    train_ds = COCODataset(cfg.train_imgs_path,
                           cfg.train_anno_path,
                           resize_size=cfg.resize_size)
    train_dl = DataLoader(train_ds,
                          batch_size=cfg.batch_size,
                          shuffle=False,
                          num_workers=cfg.num_workers,
                          collate_fn=train_ds.collate_fn)

    if cfg.eval:
        eval_ds = COCODataset(cfg.eval_imgs_path,
                              cfg.eval_anno_path,
                              resize_size=cfg.resize_size)
        eval_dl = DataLoader(eval_ds,
                             batch_size=max(cfg.batch_size // 2, 1),
                             num_workers=cfg.num_workers,
                             collate_fn=eval_ds.collate_fn)
    else:
        eval_dl = None

    model = CenterNet(cfg)
    if cfg.gpu:
        model = model.cuda()

    loss_func = Loss(cfg)

    trainer = Trainer(cfg, model, loss_func, train_dl, eval_dl)
    trainer.train()
def main(config):
    #%%
    preprocessing = Preprocessing_sasrec('/content/drive/MyDrive/brunch/data')
    item_num = max(preprocessing.content2num.values())
    
    train_dataset = BrunchDataset(preprocessing.train_df, item_num)
    train_len = int(0.9*len(train_dataset))
    valid_len = len(train_dataset) - train_len
    train_dataset, val_dataset = random_split(train_dataset, [train_len, valid_len])
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=32,
        shuffle=True,
        num_workers=4,
        pin_memory=True
    )
    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=32,
        shuffle=True,
        num_workers=4,
        pin_memory=True
    )
    
    model = SASRec(item_num, config)
    model.to(config.SYSTEM.DEVICE)
    criterion = torch.nn.BCEWithLogitsLoss() # torch.nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=config.TRAIN.BASE_LR, betas=(0.9, 0.98))

    trainer = Trainer(model, criterion, optimizer, train_loader, val_loader, config)
    trainer.train(config.TRAIN.EPOCH)
Exemplo n.º 8
0
def main(config):
    print('loading dataset')
    nlp_dataset = NPLMDataset(csv_file=config.data_path,
                              root_dir='.',
                              config=config)
    dataloader = DataLoader(
        nlp_dataset,
        batch_size=config.batch_size,
        shuffle=False,
        num_workers=0,
        collate_fn=collate_fn,
    )
    model = EmbeddingModule(
        len(nlp_dataset.word_to_idx),
        config.embedding_dim,
        config.h_dim,
        config,
    ).to(config.device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
    trainer = Trainer(dataset, model, criterion, optimizer, config)
    trainer.train()
    model = trainer.model
    with open('./checkpoints/nplm_model.pkl', 'wb') as f:
        pickle.dump(model, f)
Exemplo n.º 9
0
def main(config):
    logger = config.get_logger('train')
    num_words = config["dataset"]["args"]["num_words"]
    num_phoneme_thr = config["dataset"]["args"]["num_phoneme_thr"]
    split = config["dataset"]["args"]["split"]
    cmu_dict_path = config["dataset"]["args"]["cmu_dict_path"]
    data_struct_path = config["dataset"]["args"]["data_struct_path"]
    p_field_path = config["dataset"]["args"]["field_vocab_paths"]["phonemes"]
    g_field_path = config["dataset"]["args"]["field_vocab_paths"]["graphemes"]
    vis_feat_dir = config["dataset"]["args"]["vis_feat_dir"]
    batch_size = config["data_loader"]["args"]["batch_size"]
    shuffle = config["data_loader"]["args"]["shuffle"]
    drop_last = config["data_loader"]["args"]["drop_last"]
    pin_memory = config["data_loader"]["args"]["pin_memory"]
    num_workers = config["data_loader"]["args"]["num_workers"]

    train_dataset = DatasetV(num_words, num_phoneme_thr, cmu_dict_path,
                             vis_feat_dir, "train", data_struct_path,
                             p_field_path, g_field_path, True)

    train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=batch_size,
                                                   num_workers=num_workers,
                                                   pin_memory=pin_memory,
                                                   shuffle=True,
                                                   drop_last=True,
                                                   collate_fn=collate_fn)

    val_dataset = DatasetV(num_words, num_phoneme_thr, cmu_dict_path,
                           vis_feat_dir, "val", data_struct_path, p_field_path,
                           g_field_path, False)

    val_dataloader = torch.utils.data.DataLoader(val_dataset,
                                                 batch_size=batch_size,
                                                 num_workers=num_workers,
                                                 pin_memory=pin_memory,
                                                 shuffle=False,
                                                 drop_last=True,
                                                 collate_fn=collate_fn)

    model = config.init('arch', module_arch)
    logger.info(model)
    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = config.init('optimizer', torch.optim, trainable_params)
    lr_scheduler = config.init('lr_scheduler', torch.optim.lr_scheduler,
                               optimizer)
    trainer = Trainer(
        model,
        optimizer,
        config=config,
        lr_scheduler=lr_scheduler,
        num_words=num_words,
        logger=logger,
        train_dataset=train_dataset,
        train_dataloader=train_dataloader,
        val_dataset=val_dataset,
        val_dataloader=val_dataloader,
    )

    trainer.train()
Exemplo n.º 10
0
def main(config, resume):
    logger = Logger()

    act = config['data_loader']['activate']
    if act == 0:
        # ICDAR 2019 LSVT
        data_loader = ICDAR2019DataLoaderFactory(config)
        train = data_loader.train()
        val = data_loader.val()
    elif act == 1:
        pass

    os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(
        [str(i) for i in config['gpus']])
    model = eval(config['arch'])(config)
    # model.summary()

    loss = eval(config['loss'])(config)
    metrics = [eval(metric) for metric in config['metrics']]

    trainer = Trainer(model,
                      loss,
                      metrics,
                      resume=resume,
                      config=config,
                      data_loader=train,
                      valid_data_loader=val,
                      train_logger=logger,
                      toolbox=Toolbox())

    trainer.train()
Exemplo n.º 11
0
def main(config: ConfigParser):
    # 获取一个logging.getLogger,默认日志级别为debug
    logger = config.get_logger('train')
    # 数据模块
    # 获取config中读取到的config.json里的loader的名字,并实例化,用json里的参数去填充
    data_loader = config.init_obj('data_loader', module_data)
    valid_data_loader = data_loader.split_validation()

    # 模型模块
    model = config.init_obj('arch', module_arch)
    logger.info(model)

    # 损失与评估模块
    criterion = getattr(module_loss, config['loss'])
    # 这里面存的是function,也可能存的是类,通过__name__方法获得名字
    metrics = [getattr(module_metric, met) for met in config['metrics']]

    # 优化器模块
    # filter,过滤掉false值
    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = config.init_obj('optimizer', torch.optim, trainable_params)
    # 学习率衰减策略
    lr_scheduler = config.init_obj('lr_scheduler', torch.optim.lr_scheduler, optimizer)

    # 训练模型
    trainer = Trainer(model, criterion, metrics, optimizer,
                      config=config,
                      data_loader=data_loader,
                      valid_data_loader=valid_data_loader,
                      lr_scheduler=lr_scheduler)

    trainer.train()
def main(config, resume):
    torch.manual_seed(config["seed"])
    np.random.seed(config["seed"])

    train_dataset = initialize_config(config["train_dataset"])
    train_data_loader = DataLoader(
        dataset=train_dataset,
        shuffle=config["train_dataloader"]["shuffle"],
        batch_size=config["train_dataloader"]["batch_size"],
        num_workers=config["train_dataloader"]["num_workers"])

    validation_dataset = initialize_config(config["validation_dataset"])
    valid_data_loader = DataLoader(
        dataset=validation_dataset,
        num_workers=config["validation_dataloader"]["num_workers"],
        batch_size=config["validation_dataloader"]["batch_size"])

    model = initialize_config(config["model"])

    optimizer = torch.optim.Adam(params=model.parameters(),
                                 lr=config["optimizer"]["lr"],
                                 betas=(config["optimizer"]["beta1"], 0.999))

    loss_function = initialize_config(config["loss_function"])

    trainer = Trainer(config=config,
                      resume=resume,
                      model=model,
                      loss_function=loss_function,
                      optimizer=optimizer,
                      train_dataloader=train_data_loader,
                      validation_dataloader=valid_data_loader)

    trainer.train()
Exemplo n.º 13
0
def active_learning(config):
    from data_process import makeDataSet
    from trainer.trainer import Trainer

    logger = config.get_logger('train')
    train_set, valid_set, query_set = makeDataSet(config)

    model = makeModel(config)
    logger.info(model)

    criterion = makeLoss(config)
    metrics = makeMetrics(config)

    optimizer = makeOptimizer(config, model)
    # lr_scheduler = makeLrSchedule(config, optimizer, train_set)

    trainer = Trainer(
        model,
        criterion,
        metrics,
        optimizer,
        config=config,
        train_dataset=train_set,
        valid_dataset=valid_set,
        query_dataset=query_set,
        test_dataset=None,
        # lr_scheduler=lr_scheduler
    )

    trainer.train()
Exemplo n.º 14
0
def main():
    # Base Logger setup
    MyLogger.logger.setLevel(logging.INFO)
    logger = MyLogger.logger.getChild(__name__)

    args = argument_parser()
    # params from argparse
    csvpath = args.train_data
    BATCH_SIZE = args.batch_size
    EPOCHS = args.epochs
    OUTPKL = args.outpkl
    MDL_OUTDIR = args.mdl_outdir
    if not os.path.exists(MDL_OUTDIR):
        os.mkdir(MDL_OUTDIR)

    LOSS_HISTORY = {}
    MODELS = []
    # (width, depth)
    DEEP_MODEL = [(18, 1), (9, 4), (8, 5)]
    # (width)
    SHALLOW_MODEL = [128]

    training_dataset = TrainingDataset(csvpath)
    train_loader = DataLoader(training_dataset,
                              shuffle=True,
                              batch_size=BATCH_SIZE)

    # append different models
    for w in SHALLOW_MODEL:
        MODELS.append(ShallowNet(w))
    for w, d in DEEP_MODEL:
        MODELS.append(DeepNet(depth=d, width=w))

    # train EPOCHS epochs for each model in MODELS
    for model in MODELS:
        model_name = model.get_name()
        logger.info("Start training {}".format(model_name))
        trainer = Trainer(model, train_loader)
        # add model loss history into dictionary
        LOSS_HISTORY[model_name] = []

        s = time.time()
        for epoch in range(EPOCHS):
            trainer.train(epoch + 1)
            LOSS_HISTORY[model_name].append(trainer.loss)
        e = time.time()

        torch.save(model, "{}/{}.h5".format(MDL_OUTDIR, model_name))

        logger.info(
            "Finished training {}  Time elapsed: {: .3f} seconds. \n".format(
                model_name, e - s))

    logger.info("Finished training all models. Dumping loss history")

    with open(OUTPKL, 'wb') as f:
        pickle.dump(LOSS_HISTORY, f)

    logger.info("Finished dumping. Existing program...")
Exemplo n.º 15
0
def train(fold, params, cfg, save_model=False):
    print('\n' + '=' * 30 + f' {fold} FOLD TRAINING START!! ' + '=' * 30 +
          '\n')

    seed_everything(cfg.values.seed)
    MODEL_NAME = cfg.values.model_name
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    tokenizer.add_special_tokens(
        {'additional_special_tokens': ['[ENT1]', '[ENT2]']})

    df = pd.read_csv('/opt/ml/input/data/train/train_folds.tsv', delimiter=',')
    train_df = df[df.kfold != fold].reset_index(drop=True)
    valid_df = df[df.kfold == fold].reset_index(drop=True)

    tokenized_train = tokenized_dataset(train_df, tokenizer)
    tokenized_val = tokenized_dataset(valid_df, tokenizer)

    RE_train_dataset = RE_Dataset(tokenized_train, train_df['label'].values)
    RE_val_dataset = RE_Dataset(tokenized_val, valid_df['label'].values)

    train_loader = DataLoader(RE_train_dataset,
                              batch_size=params['batch_size'],
                              num_workers=cfg.values.train_args.num_workers,
                              pin_memory=True,
                              shuffle=True)

    valid_loader = DataLoader(RE_val_dataset,
                              batch_size=cfg.values.train_args.batch_size,
                              num_workers=cfg.values.train_args.num_workers,
                              pin_memory=True,
                              shuffle=False)

    model = MultilabeledSequenceModel(MODEL_NAME, 42, len(tokenizer),
                                      params['dropout']).to(device)

    optimizer = AdamW(params=model.parameters(), lr=params['lr'])
    loss = nn.CrossEntropyLoss()
    model_set = {'model': model, 'loss': loss, 'optimizer': optimizer}
    best_acc = -np.inf
    trainer = Trainer(model_set, device, cfg)
    early_stopping = 3
    early_stopping_counter = 0
    for epoch in range(cfg.values.train_args.num_epochs):
        train_loss = trainer.train_epoch(train_loader)
        val_acc = trainer.evaluate_epoch(valid_loader)
        print(
            f'FOLD: {fold}, EPOCH: {epoch}, TRAIN_LOSS: {train_loss:.3f}, VAL_ACC: {val_acc:.3f}'
        )
        if val_acc > best_acc:
            best_acc = val_acc
            if save_model:
                torch.save(model.state_dict(), f'model_{fold}_.bin')
        else:
            early_stopping_counter += 1

        if early_stopping_counter > early_stopping:
            break
    return best_acc
Exemplo n.º 16
0
Arquivo: main.py Projeto: nonojh/GACRP
def main():
    # TODO: Implement argparse
    params = {'n_estimators': 10000, 'num_leaves': 30, 'learning_rate': 0.01}

    training_params = {'early_stop_round': 100, 'verbose': 100}

    trainer = Trainer('lightgbm', params, training_params)
    trainer.train()
Exemplo n.º 17
0
def main(config, resume):
    torch.manual_seed(config["seed"])
    np.random.seed(config["seed"])

    def pad_to_longest(batch):
        mixture_list = []
        clean_list = []
        names = []
        n_frames_list = []

        for mixture, clean, n_frames, name in batch:
            mixture_list.append(torch.tensor(mixture).reshape(-1, 1))
            clean_list.append(torch.tensor(clean).reshape(-1, 1))
            n_frames_list.append(n_frames)
            names.append(name)

        # seq_list = [(L_1, 1), (L_2, 1), ...]
        #   item.size() must be (L, *)
        #   return (longest_len, len(seq_list), *)
        mixture_list = pad_sequence(mixture_list).squeeze(2).permute(1, 0)
        clean_list = pad_sequence(clean_list).squeeze(2).permute(1, 0)

        return mixture_list, clean_list, n_frames_list, names

    train_dataset = initialize_config(config["train_dataset"])
    train_data_loader = DataLoader(
        shuffle=config["train_dataloader"]["shuffle"],
        dataset=train_dataset,
        batch_size=config["train_dataloader"]["batch_size"],
        num_workers=config["train_dataloader"]["num_workers"],
        collate_fn=pad_to_longest,
        drop_last=True)

    validation_dataset = initialize_config(config["validation_dataset"])
    valid_data_loader = DataLoader(
        dataset=validation_dataset,
        num_workers=config["validation_dataloader"]["num_workers"],
        batch_size=config["validation_dataloader"]["batch_size"],
        collate_fn=pad_to_longest,
        shuffle=config["validation_dataloader"]["shuffle"])

    model = initialize_config(config["model"])

    optimizer = torch.optim.Adam(params=model.parameters(),
                                 lr=config["optimizer"]["lr"],
                                 betas=(config["optimizer"]["beta1"], 0.999))

    loss_function = initialize_config(config["loss_function"])

    trainer = Trainer(config=config,
                      resume=resume,
                      model=model,
                      optimizer=optimizer,
                      loss_function=loss_function,
                      train_dataloader=train_data_loader,
                      validation_dataloader=valid_data_loader)

    trainer.train()
Exemplo n.º 18
0
def main(config, resume):
    """
    训练脚本的入口函数
    
    Notes:
        1. 加载数据集
        2. 初始化模型
        3. 设置优化器
        4. 选择损失函数
        5. 训练脚本 run

    Args:
        config (dict): 配置项
        resume (bool): 是否加载最近一次存储的模型断点
    """
    torch.manual_seed(config["seed"])
    np.random.seed(config["seed"])

    train_dataset = TrainDataset(
        mixture_dataset=config["train_dataset"]["mixture"],
        mask_dataset=config["train_dataset"]["clean"],
        limit=config["train_dataset"]["limit"],
        offset=config["train_dataset"]["offset"],
    )
    train_data_loader = DataLoader(
        dataset=train_dataset,
        batch_size=config["train_dataset"]["batch_size"],
        num_workers=config["train_dataset"]["num_workers"],
        shuffle=config["train_dataset"]["shuffle"])

    valid_dataset = TestDataset(
        mixture_dataset=config["valid_dataset"]["mixture"],
        clean_dataset=config["valid_dataset"]["clean"],
        limit=config["valid_dataset"]["limit"],
        offset=config["valid_dataset"]["offset"],
    )

    valid_data_loader = DataLoader(dataset=valid_dataset)

    model = initialize_config(config["model"])

    optimizer = torch.optim.Adam(params=model.parameters(),
                                 lr=config["optimizer"]["lr"])

    loss_function = initialize_config(config["loss_function"])

    trainer = Trainer(
        config=config,
        resume=resume,
        model=model,
        loss_function=loss_function,
        optim=optimizer,
        train_dl=train_data_loader,
        validation_dl=valid_data_loader,
    )

    trainer.train()
Exemplo n.º 19
0
def main(config):
    logger = config.get_logger("train")

    data_loader = CityscapesDataLoader(
        config["data_loader"]["args"]["data_dir"],
        config["data_loader"]["args"]["train_extra"],
        config["data_loader"]["args"]["batch_size"],
        config["data_loader"]["args"]["num_workers"],
    )

    num_classes = config["arch"]["args"]["num_classes"]
    model = DeepLabv3Plus(num_classes=num_classes)
    logger.info(
        summary(
            model,
            (3, 1024, 2048),
            col_names=("kernel_size", "output_size", "num_params"),
            depth=5,
            verbose=0,
        )
    )

    device, device_ids = prepare_device(config["n_gpu"])
    model = model.to(device)
    if len(device_ids) > 1:
        model = nn.DataParallel(model, device_ids=device_ids)

    ignore_index = config["loss"]["args"]["ignore_index"]
    criterion = nn.CrossEntropyLoss(ignore_index=ignore_index)
    metrics = SegmentationMetrics(num_classes, ignore_index)

    optimizer = torch.optim.SGD(
        model.parameters(),
        lr=config["optimizer"]["args"]["lr"],
        momentum=config["optimizer"]["args"]["momentum"],
        weight_decay=config["optimizer"]["args"]["weight_decay"],
    )
    lr_scheduler = PolynomialLRDecay(
        optimizer,
        max_decay_steps=config["lr_scheduler"]["args"]["max_decay_steps"],
        end_learning_rate=config["lr_scheduler"]["args"]["end_learning_rate"],
        power=config["lr_scheduler"]["args"]["power"],
    )

    trainer = Trainer(
        config=config,
        model=model,
        criterion=criterion,
        metrics=metrics,
        optimizer=optimizer,
        device=device,
        train_loader=data_loader.train_loader,
        val_loader=data_loader.val_loader,
        lr_scheduler=lr_scheduler,
    )
    trainer.train()
Exemplo n.º 20
0
def main(config):

    train_dataset = Dataset_DAVIS(config['train_data_dir'])
    train_loader = Dataloader_DAVIS(dataset=train_dataset, batch_size=1, shuffle=False, validation_split=0.0, num_workers=1, training=True)

    valid_dataset = Dataset_DAVIS(config['valid_data_dir'])
    valid_loader = Dataloader_DAVIS(dataset=valid_dataset, batch_size=1, shuffle=False, validation_split=0.0, num_workers=1, training=False)

    trainer = Trainer(train_loader=train_loader, config=config, valid_loader=valid_loader)
    trainer.test("results/test_results.jpg")
Exemplo n.º 21
0
def main(args):
    config = ConfigParser(args)
    cfg = config.config
    logger = get_logger(config.log_dir, "train")
    train_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])
    validation_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    train_dataset = CTImageLoader(link_label_file=cfg["train_data"],
                                  image_size=cfg["input_size"],
                                  root_folder=cfg["root_folder"],
                                  transforms=train_transform)
    validation_dataset = CTImageLoader(link_label_file=cfg["validation_data"],
                                       image_size=cfg["input_size"],
                                       root_folder=cfg["root_folder"],
                                       transforms=validation_transform)

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=cfg["train_batch_size"],
        shuffle=True,
        num_workers=cfg["workers"],
        drop_last=True)
    vali_loader = torch.utils.data.DataLoader(
        validation_dataset,
        batch_size=cfg["vali_batch_size"],
        shuffle=False,
        num_workers=cfg["workers"],
        drop_last=False)

    model = resnet50(number_class=3, pretrained=True)

    criterion = getattr(loss, 'cross_entropy')
    optimizer = optim.SGD(model.parameters(),
                          lr=cfg["lr"],
                          momentum=0.9,
                          weight_decay=cfg["weight_decay"])
    metrics_name = ["accuracy"]
    trainer = Trainer(model=model,
                      criterion=criterion,
                      optimizer=optimizer,
                      train_loader=train_loader,
                      nb_epochs=config['epoch'],
                      valid_loader=vali_loader,
                      logger=logger,
                      log_dir=config.save_dir,
                      metrics_name=metrics_name,
                      resume=config['resume'],
                      save_dir=config.save_dir)
    trainer.train()
Exemplo n.º 22
0
def train_model_once(subject_id, i_valid_fold, config, model_state_dict=None):
    # Data loading
    data = get_dataset(subject_id, i_valid_fold,
                       config["experiment"]["dataset"], config)
    # import pickle
    # from base.base_data_loader import BaseDataLoader
    # from braindecode.datautil.splitters import split_into_train_valid_test
    # pickle_path = os.path.abspath(
    #     os.path.join(os.path.dirname(__file__), '..',
    #                  'data/bcic_iv_2a_all_9_subjects.pickle'))
    # with open(pickle_path, 'rb') as f:
    #     data = pickle.load(f)
    # data = data[0]
    # train, valid, test = split_into_train_valid_test(data, 4, 0)
    # data = BaseDataLoader(train, valid, test, 4)

    # Build model architecture
    model = get_model(data, model_state_dict, config)

    # Set iterator and metric function handle
    iterator = get_iterator(model, data, config)
    predict_label_func = get_prediction_func(config)

    # Get function handle of loss
    loss_function = get_loss(config)

    # Build optimizer, learning rate scheduler
    stop_criterion = get_stop_criterion(config)
    optimizer = get_optmizer(model, config)

    print(model)

    # Init trainer and train
    trainer = Trainer(
        data.train_set,
        data.validation_set,
        data.test_set,
        model,
        optimizer,
        iterator,
        loss_function,
        stop_criterion,
        model_constraint=MaxNormDefaultConstraint(),
        cuda=torch.cuda.is_available(),
        func_compute_pred_labels=predict_label_func,
        siamese=(config["experiment"]["type"] == "ccsa_da"),
    )
    trainer.train()

    # Save results
    log_training_results(trainer)
    file_state_dict = save_result_and_model(trainer, model, config)
    return file_state_dict
Exemplo n.º 23
0
def main():
    device = "cuda:0"

    parser = argparse.ArgumentParser()

    parser.add_argument("--model", default=None)
    parser.add_argument("--checkpoint", type=int, default=None)
    parser.add_argument("--dataset", default=None)
    parser.add_argument("--data_type", default="test")

    args = parser.parse_args()

    model_path = proj_dir("models", "ddff_mdff")

    loader = ModelLoader(model_path, args.model)
    model_params = loader.load("params")

    model = project.create_component("net", **model_params["net"])

    epoch = args.checkpoint

    if args.checkpoint is not None:
        print("Load from checkpoint")
        model.load_state_dict(loader.load("checkpoint", epoch=epoch)["model"])
    else:
        print("Load model")
        loader.load("model", model)

    # data_class = datatype_from_str(args.dataset)

    #data = data_class(root_dir=proj_dir("datasets"), data_type=args.data_type)

    data = project.create_component("data",
                                    name=args.dataset,
                                    root_dir=proj_dir("datasets"),
                                    data_type=args.data_type)

    data_loader = torch.utils.data.DataLoader(data,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=1)

    trainer = Trainer(model, device)

    test_loss, eval_pred = trainer.test_model(data_loader)

    loader.save("eval", {"pred": eval_pred},
                epoch=epoch,
                data_name=args.dataset,
                data_type=args.data_type)

    print("Done. Loss:", test_loss)
Exemplo n.º 24
0
def run(args):

    print("Arguments in args:\n{}".format(pprint.pformat(vars(args))),
          flush=True)

    # load configurations
    with open(args.conf, "r") as f:
        conf = yaml.load(f, Loader=yaml.FullLoader)
    print("Arguments in yaml:\n{}".format(pprint.pformat(conf)), flush=True)

    checkpoint_dir = Path(conf['train']['checkpoint'])
    checkpoint_dir.mkdir(exist_ok=True, parents=True)

    random.seed(conf['train']['seed'])
    np.random.seed(conf['train']['seed'])
    th.cuda.manual_seed_all(conf['train']['seed'])

    # if exist, resume training
    last_checkpoint = checkpoint_dir / "last.pt.tar"
    if last_checkpoint.exists():
        print(f"Found old checkpoint: {last_checkpoint}", flush=True)
        conf['train']['resume'] = last_checkpoint.as_posix()

    # dump configurations
    with open(checkpoint_dir / "train.yaml", "w") as f:
        yaml.dump(conf, f)

    #build nnet
    nnet = model(**conf["nnet_conf"])
    # build optimizer
    optimizer = make_optimizer(nnet.parameters(), conf)
    # build dataloader
    train_loader, valid_loader = make_dataloader(conf)
    # build scheduler
    scheduler = ReduceLROnPlateau(optimizer,
                                  mode="min",
                                  factor=conf['scheduler']['factor'],
                                  patience=conf['scheduler']['patience'],
                                  min_lr=conf['scheduler']['min_lr'],
                                  verbose=True)

    device = th.device('cuda' if conf['train']['use_cuda']
                       and th.cuda.is_available() else 'cpu')

    trainer = Trainer(nnet, optimizer, scheduler, device, conf)

    trainer.run(
        train_loader,
        valid_loader,
        num_epoches=conf['train']['epoch'],
    )
Exemplo n.º 25
0
def main(config, resume):
    torch.manual_seed(config["seed"])  # For both GPU and CPU
    np.random.seed(config["seed"])
    random.seed(config["seed"])

    train_dataset = initialize_config(config["train_dataset"])
    train_data_loader = DataLoader(
        dataset=train_dataset,
        batch_size=config["train_dataloader"]["batch_size"],
        num_workers=config["train_dataloader"]["num_workers"],
        shuffle=config["train_dataloader"]["shuffle"],
        pin_memory=config["train_dataloader"]["pin_memory"],
        collate_fn=train_dataset.pad_batch
    )

    validation_dataset = initialize_config(config["validation_dataset"])
    valid_data_loader = DataLoader(
        dataset=validation_dataset,
        num_workers=1,
        batch_size=1
    )

    generator = initialize_config(config["generator_model"])
    discriminator = initialize_config(config["discriminator_model"])

    generator_optimizer = torch.optim.Adam(
        params=generator.parameters(),
        lr=config["optimizer"]["G_lr"],
        betas=(config["optimizer"]["beta1"], config["optimizer"]["beta2"])
    )
    discriminator_optimizer = torch.optim.Adam(
        params=discriminator.parameters(),
        lr=config["optimizer"]["D_lr"],
        betas=(config["optimizer"]["beta1"], config["optimizer"]["beta2"])
    )

    loss_function = initialize_config(config["loss_function"])

    trainer = Trainer(
        config=config,
        resume=resume,
        generator=generator,
        discriminator=discriminator,
        generator_optimizer=generator_optimizer,
        discriminator_optimizer=discriminator_optimizer,
        loss_function=loss_function,
        train_dl=train_data_loader,
        validation_dl=valid_data_loader,
    )

    trainer.train()
Exemplo n.º 26
0
def main(config, resume):
    """
    训练脚本的入口函数

    Args:
        config (dict): 配置项
        resume (bool): 是否加载最近一次存储的模型断点
    """
    torch.manual_seed(config["seed"])
    np.random.seed(config["seed"])

    train_dataset = initialize_config(config["train_dataset"])
    validation_dataset = initialize_config(config["validation_dataset"])
    train_data_loader = DataLoader(
        dataset=train_dataset,
        batch_size=config["train_dataloader"]["batch_size"],
        num_workers=config["train_dataloader"]["num_workers"],
        shuffle=config["train_dataloader"]["shuffle"])

    collate_all_data = lambda data_list: data_list
    valid_data_loader = DataLoader(
        dataset=validation_dataset,
        batch_size=config["validation_dataloader"]["batch_size"],
        num_workers=config["validation_dataloader"]["num_workers"],
        collate_fn=collate_all_data)

    generator = initialize_config(config["generator_model"])
    discriminator = initialize_config(config["discriminator_model"])

    generator_optimizer = torch.optim.Adam(params=generator.parameters(),
                                           lr=config["optimizer"]["G_lr"])
    discriminator_optimizer = torch.optim.Adam(
        params=discriminator.parameters(),
        lr=config["optimizer"]["D_lr"],
        betas=(config["optimizer"]["beta1"], 0.999))

    loss_function = initialize_config(config["loss_function"])

    trainer = Trainer(
        config=config,
        resume=resume,
        G=generator,
        D=discriminator,
        optim_G=generator_optimizer,
        optim_D=discriminator_optimizer,
        loss_function=loss_function,
        train_dl=train_data_loader,
        validation_dl=valid_data_loader,
    )

    trainer.train()
Exemplo n.º 27
0
def main(config, resume):
    train_logger = Logger()

    # load data
    train_dataloader = ICDARDataLoader(config).train()

    # initial model
    os.environ['CUDA_VISIBLE_DEVICES'] = ','.join([str(i) for i in config['gpus']])
    model = Model(config)
    model.summary()

    loss = Loss()
    trainer = Trainer(model, loss, resume, config, train_dataloader, train_logger)
    trainer.train()
Exemplo n.º 28
0
def load_model(model_path):
    if model_path.endswith(".pth"):
        model = ConjunctionClassifier()
        trainer = Trainer(model, None, None)  # Need this for model loading
        trainer._load_model(model_path)
        return model
    elif model_path.endswith(".joblib"):
        model = load(model_path)
        assert isinstance(model, svm.SVC)
        return model
    elif model_path == "baseline":
        return None
    else:
        raise Exception("Model must be SVM or BERT-based")
Exemplo n.º 29
0
def main():
    args = parse_args()

    trainer = Trainer(args)
    trainer.save_config()
    trainer.train()
    trainer.save_model()
Exemplo n.º 30
0
def main():
    # Processing config file
    config = get_config_from_json('.\\utils\\config.json')

    # Processing data
    train_dataset, val_dataset, test_dataset, num_train_features, num_train_samples, num_val_samples, num_test_samples = getData(
        mypath, config)

    # Creating an empty model
    dense_model = DenseModel(num_train_features, config)
    load_flag = config.experiment.load

    # load model from h5 file
    if load_flag == True:
        try:
            print('Loading saved model')
            dense_model.load(".\\h5 models\\" + model_version)
            results = dense_model.model.evaluate(
                test_dataset,
                steps=int(num_test_samples / (config.model.batch_size)))
            print('test loss, test acc:', results)
        except Exception as ex:
            print(ex)
            print("Invalid model file name provided")

    # build and train and save a new model
    elif load_flag == False:
        try:
            dense_model.build_model()
            print('Create the trainer')
            trainer = Trainer(dense_model.model,
                              train_dataset,
                              val_dataset,
                              config,
                              steps_per_epoch=int(num_train_samples /
                                                  config.model.batch_size),
                              val_steps=int(num_val_samples /
                                            config.model.batch_size))
            print('Start training the model.')
            trainer.train()
            dense_model.save(".\\h5_models\\" + model_version)
        except Exception as ex:
            print(ex)
            print("Unable to create new model")
    else:
        print("Invalid load flag in config file")

    logging.info('---------Successful execution---------')
Exemplo n.º 31
0
# -*- coding: utf-8 -*-

import os
from trainer.trainer import Trainer


trainer = Trainer()
trainer.train(10, 1, False)
trainer.test()
Exemplo n.º 32
0
    def evaluate(self):
        """Evaluates the system using 10-fold cross validation, returning
        a dictionary of results keyed by classifier type."""
        trainer = Trainer(self.profiles, self.profile_type,
                          self.converter, self.network)
        training_set = trainer.generate_training_set()

        profiles = numpy.array(list(self.profiles))
        data = numpy.array(training_set.data)
        labels = numpy.array(training_set.labels)

        fold_iterator = cross_validation.StratifiedKFold(labels,
                                                         n_folds=10,
                                                         shuffle=True,
                                                         random_state=42)

        official_profile_pairs = ((x['name'], self.profile_type(x['profile'],
                                                                x['posts']))
                                  for x in self.profiles if x['label'] == 2)
        affiliate_profile_pairs = ((x['name'], self.profile_type(x['profile'],
                                                                 x['posts']))
                                   for x in self.profiles if x['label'] == 1)

        official_profiles = defaultdict(list)
        for name, profile in official_profile_pairs:
            official_profiles[name].append(profile)

        affiliate_profiles = defaultdict(list)
        for name, profile in affiliate_profile_pairs:
            affiliate_profiles[name].append(profile)

        classification_results = defaultdict(list)
        fold = 1
        for train, test in fold_iterator:
            classifiers = initialize_classifiers()

            training_data = data[train]
            training_labels = labels[train]

            test_set = itertools.compress(profiles[test], labels[test])
            company_names = set(x['name'] for x in test_set)
            print 'Test set', fold, '-', len(company_names), 'companies.'

            for classifier in classifiers:
                classifier_name = classifier['type']
                c = classifier['classifier']
                trained = c.fit(training_data, training_labels)

                system = SingleNetworkSearcher(
                    classifier=trained,
                    searchengine=self.search_engine,
                    profile_converter=self.converter,
                    network=self.network)

                number_of_workers = int(multiprocessing.cpu_count() * 0.75)
                worker_pool = ProcessingPool(number_of_workers)
                all_results = worker_pool.map(system.query, company_names)

                combined_official_results = []
                combined_affiliate_results = []
                for idx, name in enumerate(company_names):
                    official_results = official_profiles[name]
                    affiliate_results = affiliate_profiles[name]

                    results = all_results[idx]
                    classified_official = results.official
                    classified_affiliate = results.affiliate
                    classified_unrelated = results.unrelated

                    marked_official_handles = [x['profile'].handle.lower()
                                               for x in classified_official]
                    marked_affiliate_handles = [x['profile'].handle.lower()
                                                for x in classified_affiliate]
                    marked_unrelated_handles = [x['profile'].handle.lower()
                                                for x in classified_unrelated]
                    official_handles = [x.handle.lower()
                                        for x in official_results]
                    affiliate_handles = [x.handle.lower()
                                         for x in affiliate_results]

                    official_counts = MetricCalculator.count_positives(
                        actual_handles=official_handles,
                        marked_positive_handles=marked_official_handles,
                        marked_negative_handles=(marked_affiliate_handles
                                                 + marked_unrelated_handles))
                    combined_official_results.append(official_counts)

                    affiliate_counts = MetricCalculator.count_positives(
                        actual_handles=affiliate_handles,
                        marked_positive_handles=marked_affiliate_handles,
                        marked_negative_handles=(marked_unrelated_handles
                                                 + marked_official_handles))
                    combined_affiliate_results.append(affiliate_counts)

                official_metrics = MetricCalculator.fold_metrics(
                    combined_official_results)
                affiliate_metrics = MetricCalculator.fold_metrics(
                    combined_affiliate_results)

                result = {
                    'official': official_metrics,
                    'affiliate': affiliate_metrics
                }
                classification_results[classifier_name].append(result)

            fold += 1

        return classification_results
Exemplo n.º 33
0
    def evaluate_statistical(self):
        """Evaluates the system using 10-fold cross validation, returning
        a dictionary containing the number of correct results per-fold in
        each class."""
        trainer = Trainer(self.profiles, self.profile_type,
                          self.converter, self.network)
        training_set = trainer.generate_training_set()

        profiles = numpy.array(list(self.profiles))
        data = numpy.array(training_set.data)
        labels = numpy.array(training_set.labels)

        fold_iterator = cross_validation.StratifiedKFold(labels,
                                                         n_folds=10,
                                                         shuffle=True,
                                                         random_state=42)

        official_profile_pairs = ((x['name'], self.profile_type(x['profile'],
                                                                x['posts']))
                                  for x in self.profiles if x['label'] == 2)
        affiliate_profile_pairs = ((x['name'], self.profile_type(x['profile'],
                                                                 x['posts']))
                                   for x in self.profiles if x['label'] == 1)

        official_profiles = defaultdict(list)
        for name, profile in official_profile_pairs:
            official_profiles[name].append(profile)

        affiliate_profiles = defaultdict(list)
        for name, profile in affiliate_profile_pairs:
            affiliate_profiles[name].append(profile)

        fold = 1
        # This assumes we're just using Random Forest (i.e. one classifier)
        # Ugly hack for now.
        classification_results = {
            'official_correct': [],
            'affiliate_correct': []
        }
        for train, test in fold_iterator:
            classifiers = initialize_classifiers()

            training_data = data[train]
            training_labels = labels[train]

            test_set = itertools.compress(profiles[test], labels[test])
            company_names = set(x['name'] for x in test_set)
            print 'Test set', fold, '-', len(company_names), 'companies.'

            for classifier in classifiers:
                classifier_name = classifier['type']
                c = classifier['classifier']
                trained = c.fit(training_data, training_labels)

                system = SingleNetworkSearcher(
                    classifier=trained,
                    searchengine=self.search_engine,
                    profile_converter=self.converter,
                    network=self.network)

                number_of_workers = int(multiprocessing.cpu_count() * 0.75)
                worker_pool = ProcessingPool(number_of_workers)
                all_results = worker_pool.map(system.query, company_names)

                for idx, name in enumerate(company_names):
                    official_results = official_profiles[name]
                    affiliate_results = affiliate_profiles[name]

                    results = all_results[idx]
                    classified_official = results.official
                    classified_affiliate = results.affiliate

                    marked_official_handles = [x['profile'].handle.lower()
                                               for x in classified_official]
                    marked_affiliate_handles = [x['profile'].handle.lower()
                                                for x in classified_affiliate]

                    official_handles = [x.handle.lower()
                                        for x in official_results]
                    affiliate_handles = [x.handle.lower()
                                         for x in affiliate_results]

                    official_correct = 0
                    for handle in marked_official_handles:
                        if handle in official_handles:
                            official_correct += 1

                    affiliate_correct = 0
                    for handle in marked_affiliate_handles:
                        if handle in affiliate_handles:
                            affiliate_correct += 1

                    classification_results['official_correct'].append(official_correct)
                    classification_results['affiliate_correct'].append(affiliate_correct)

            fold += 1

        return classification_results