示例#1
0
def run(args):
    train_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        args.data + '/train', transform=data_transforms),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=16)

    val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        args.data + '/val', transform=validation_data_transforms),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=16)

    model = CNNModel()
    model = nn.DataParallel(model)
    model = model.to(args.device)

    if args.checkpoint is not None:
        model.load_state_dict(torch.load(args.checkpoint))

    optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-3)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size)

    for epoch in range(1, args.epochs + 1):
        scheduler.step()
        train(epoch, model, optimizer, train_loader, args.log_interval)
        validation(epoch, model, val_loader)
        model_file = 'model_' + str(epoch) + '.pth'
        torch.save(model.state_dict(), model_file)
    writer.close()
示例#2
0
def run(args):
    train_loader = torch.utils.data.DataLoader(
        datasets.ImageFolder('../../ssl_data_96/supervised/train',
                             transform=data_transforms),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=4)  #n_worker to 4, to use 4 gpu

    val_loader = torch.utils.data.DataLoader(
        datasets.ImageFolder('../../ssl_data_96/supervised/val',
                             transform=validation_data_transforms),
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=4)  #n_worker to 4, to use 4 gpu

    model = CNNModel()
    model.cuda()

    optimizer = optim.RMSprop(model.parameters(),
                              lr=args.lr,
                              momentum=args.momentum,
                              weight_decay=1e-3)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5)

    for epoch in range(1, args.epochs + 1):
        scheduler.step()
        train(epoch, model, optimizer, train_loader, args.log_interval)
        validation(epoch, model, val_loader)
        model_file = 'model_' + str(epoch) + '.pth'
        torch.save(model.state_dict(), model_file)
    writer.close()
def on_message(client, userdata, msg):
    try:
        print("Model from trainer received!")
        print('Topic: ', msg.topic)
        #print('Message: ', msg.payload)

        model_str = msg.payload
        buff = io.BytesIO(bytes(model_str))

        # Create a dummy model to read weights
        model = CNNModel()
        model.load_state_dict(torch.load(buff))

        global trainer_weights
        trainer_weights.append(copy.deepcopy(model.state_dict()))

        # Wait until we get trained weights from all trainers
        if len(trainer_weights) == NUM_TRAINERS:
            update_global_weights_and_send(trainer_weights)
            trainer_weights.clear()

    except:
        print("Unexpected error:", sys.exc_info())
                inputs, lbl = inputs.cuda(), lbl.cuda()

            # set the gradient for each parameters zero
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, lbl)
            loss.backward()
            optimizer.step()
            print('-[step: %d, loss: %f]' % (i + 1, loss.item()))
        scheduler.step()

    print('Finished Training')


if __name__ == '__main__':
    cnn = CNNModel()
    batch = 2000
    if torch.cuda.is_available():
        cnn.cuda()

    trainingDataset = LoadTrainingData()
    dataLoader = DataLoader(dataset=trainingDataset,
                            batch_size=batch,
                            shuffle=True,
                            num_workers=2)

    train_model(cnn, dataLoader, epoch=40, batch_size=batch)

    # save model
    torch.save(cnn.state_dict(), './trained_model.pth')
示例#5
0
            output = model(x_batch)
            loss = loss_func(output.view(-1, output.size(-1)),
                             y_batch.view(-1))

            y_pred = torch.max(output, -1)[1]
            y_pred = y_pred.masked_fill_((y_batch == 0), 0)
            nonzeros += (y_batch != 0).data.sum()

            total_loss += loss.data[0]
            total_acc += (y_pred == y_batch).data.sum() - (y_batch
                                                           == 0).data.sum()

            total_ed += avg_ed(encode(y_pred.data.cpu().numpy()),
                               encode(y_batch.data.cpu().numpy()))

        print('Validation: loss:{:.4f}, acc:{:.4f}, ed:{:.4f}'.format(
            total_loss / (i + 1), total_acc / nonzeros,
            total_ed / y_valid.shape[0]))

        early_stop_cnt += 1
        if (total_ed / y_valid.shape[0]) < best_ed:
            early_stop_cnt = 0
            best_ed = total_ed / y_valid.shape[0]
            print('Save best model: ed={:.4f}'.format(best_ed))
            with open('model/model_best.pt'.format(best_ed), 'wb') as file:
                torch.save(model.state_dict(), file)

        if early_stop_cnt >= 20 and best_ed < 15:
            print('No improvement for 20 epochs. Stop training.')
            break
train_epochs = 300000
test_episode = 10
log_interval = 100
test_interval = 1000
save_interval = 1000

env = make_env('BreakoutNoFrameskip-v4', seed, num_procs)
in_ch = env.observation_space.shape[-1]
n_action = env.action_space.n
import ipdb;ipdb.set_trace()
model = CNNModel(in_ch, n_action)
obs_preproc = ObsPreproc(device=device)
agent = A2CAgent(model, env, obs_preproc, device, lr, gamma, entropy_coef, value_loss_coef)

test_env = make_env('BreakoutNoFrameskip-v4', seed, 1, clip_reward=False)
test_agent = TestAgent(model, test_env, obs_preproc, device, test_episode)


for i in range(train_epochs):
    batch, log = agent.collect_batch(num_frames_per_proc)
    info = agent.update_parameters(batch)
    if i % log_interval == 0:
        print_dict({'step': i}, info, log)
    if i % test_interval == 0:
        print('=' * 20 + 'Test Agent' + '=' * 20)
        info = test_agent.evaluate()
        print_dict(info)
    if i % save_interval == 0:
        print('Save Model')
        torch.save(model.state_dict(), 'ckpt.pth')
示例#7
0
        print("Model received from coordinator!")
        print(msg.topic + ' ' + str(msg.payload))

    except:
        print("Unexpected error:", sys.exc_info()[0])


local_mqttclient = mqtt.Client()
local_mqttclient.connect(LOCAL_MQTT_HOST, LOCAL_MQTT_PORT, 60)
local_mqttclient.on_connect = on_connect_local
local_mqttclient.on_message = on_message

# Read test model
model = CNNModel()
model.load_state_dict(torch.load('models/mnist_cnn.pt'))
buff = io.BytesIO()
torch.save(model.state_dict(), buff)

buff.seek(0)

# Convert model to string for transmission
model_str = buff.getvalue()

local_mqttclient.publish(LOCAL_MQTT_TOPIC,
                         payload=model_str,
                         qos=0,
                         retain=False)
#local_mqttclient.publish(LOCAL_MQTT_TOPIC, payload="test message", qos=0, retain=False)

local_mqttclient.loop_forever()
示例#8
0
    train_set = SpeechDataset(filelist='data/digits/short_train.lst',
                              rootdir='data/digits',
                              n_mfcc=20)
    test_set = SpeechDataset(filelist='data/digits/short_test.lst',
                             rootdir='data/digits',
                             n_mfcc=20)
    train_dl = DataLoader(train_set,
                          batch_size=64,
                          shuffle=False,
                          num_workers=16,
                          pin_memory=True)
    test_dl = DataLoader(test_set,
                         batch_size=64,
                         shuffle=False,
                         num_workers=16,
                         pin_memory=True)

    device = get_default_device()
    model = CNNModel(pool_method=args.pool_method).to(device)
    fit(model, train_dl, test_dl, epochs=10, lr=0.001)
    Path("models").mkdir(parents=True, exist_ok=True)
    torch.save(model.state_dict(), args.model_file)
    """
    device = get_default_device()
    model = ResNetModel(pool_method=args.pool_method).to(device)
    fit(model, train_dl, test_dl, epochs=10, lr=0.001)
    Path("models").mkdir(parents=True, exist_ok=True)
    torch.save(model.state_dict(), args.model_file)
    """
示例#9
0
            label = label.to(DEVICE)

            optimizer.zero_grad()
            outputs = model(tokens, pos1, pos2)
            loss = criterion(outputs, label)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i % PRINT_PER_STEP == PRINT_PER_STEP - 1:
                acc, precision, recall, f1_micro, f1_macro = evaluate(model, train_loader, DEVICE)
                print(' [%d, %5d] AVG-Loss: %.4f - TRAIN >>> ACC: %.4f, Precision: %.4f, Recall: %.4f, F1-micro: %.4f, F1-macro: %.4f\r' \
                    % (epoch+1, i+1, running_loss / PRINT_PER_STEP, acc, precision, recall, f1_micro, f1_macro), end='')
                running_loss = 0.0

        acc, precision, recall, f1_micro, f1_macro = evaluate(model, test_loader, DEVICE)
        print('\nTEST >>> ACC: %.4f, Precision: %.4f, Recall: %.4f, F1-micro: %.4f, F1-macro: %.4f\n' \
                % (acc, precision, recall, f1_micro, f1_macro))
        if f1_micro > best_f1_micro:
            print('Best model, storing...\n')
            torch.save(model.state_dict(), BEST_MODEL_SAVE_PATH)
            best_f1_micro = f1_micro
        else:
            waste_epoch += 1

        if EARLY_STOP_EPOCH > 0:    
            if waste_epoch >= EARLY_STOP_EPOCH:
                break

    print('Traning finished. Best f1-micro score: %.4f' % best_f1_micro)
示例#10
0
        # target = batch_y.unsqueeze(2).cuda()        LSTM MODEL UNCOMMENT THIS
        target = batch_y.cuda()      # LSTML MODEL COMMENT THIS
        data = data.cuda()
        target = target.cuda()
        pred = model(data)
        lossB = torch.abs(pred - target).mean()
        # lossA = -(pred * (target*2-1)).mean()
        lossC = F.cosine_similarity(pred, target)
        loss = torch.exp(-lossC).mean() + lossB
        loss.backward()
        optimizer.step()
        if step % 10 == 0 and step > 0:
            print('%d epoch\'s %d step has total loss %f, the L1 loss is %f'%(epoch, step, loss.item(), lossB.item()))
        if step % 2000 == 0 and step > 0:
            # print( torch.min(pred), torch.max(pred), torch.mean(pred))
            torch.save(model.state_dict(), './models/model_epoch'+str(epoch)+'_iter'+str(step)+'.pth')

            W_distance = 0.0
            model.eval()
            for step, (batch_x, batch_y) in enumerate(valid_loader):

                # data = batch_x.unsqueeze(2).cuda()  # bs, seq, 1  LSTM MODEL UNCOMMENT THIS
                data = batch_x.unsqueeze(1).cuda()
                data = (data - torch.mean(data, dim=2, keepdim=True)) / torch.std(data, dim=2, keepdim=True)
                # target = batch_y.unsqueeze(2).cuda()              LSTM MODEL UNCOMMENT THIS
                target = batch_y.cuda()
                pred = model(data)
                # for d in range(1029):
                #     test_debug = pred.squeeze().cpu().detach()
                #     print(test_debug[10][d])
                #print('debug ', pred.squeeze().cpu().detach().shape, batch_y.squeeze().cpu().detach().shape)
示例#11
0
def train_dann(dataset_source, dataset_target, n_epoch, batch_size, in_dim,
               h_dims, out_dim, ckpt_save_path):
    lr = 1e-3
    l_d = 0.1

    dataloader_source = torch.utils.data.DataLoader(
        dataset=dataset_source,
        batch_size=batch_size,
        shuffle=True,
    )

    dataloader_target = torch.utils.data.DataLoader(
        dataset=dataset_target,
        batch_size=batch_size,
        shuffle=True,
    )

    model = CNNModel(in_dim, h_dims, out_dim)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    loss_class = torch.nn.CrossEntropyLoss()
    loss_domain = torch.nn.CrossEntropyLoss()

    if cuda:
        model = model.cuda()
        loss_class = loss_class.cuda()
        loss_domain = loss_domain.cuda()

    for p in model.parameters():
        p.requires_grad = True

    # training
    best_acc = 0.0
    best_ep = 0
    tr_acc_ls = []
    te_acc_ls = []
    loss_ls = []
    for epoch in range(n_epoch):
        model.train()
        len_dataloader = min(len(dataloader_source), len(dataloader_target))
        data_source_iter = iter(dataloader_source)
        data_target_iter = iter(dataloader_target)
        loss_sum = 0.0
        n_s = 0
        for i in range(len_dataloader):
            # Compute reverse layer parameter alpha
            p = float(i + epoch * len_dataloader) / n_epoch / len_dataloader
            alpha = 2. / (1. + np.exp(-10 * p)) - 1

            # training model using source data
            data_s, label_s = data_source_iter.next()
            batch_size_s = len(label_s)
            n_s += batch_size_s
            domain_label = torch.zeros(batch_size_s).long()

            if cuda:
                data_s = data_s.cuda()
                label_s = label_s.cuda()
                domain_label = domain_label.cuda()

            class_output, domain_output = model(input_data=data_s, alpha=alpha)
            loss_c = loss_class(class_output, label_s)
            loss_ds = loss_domain(domain_output, domain_label)

            # training model using target data
            data_t, _ = data_target_iter.next()
            batch_size_t = len(data_t)
            domain_label = torch.ones(batch_size_t).long()

            if cuda:
                data_t = data_t.cuda()
                domain_label = domain_label.cuda()
            _, domain_output = model(input_data=data_t, alpha=alpha)
            loss_dt = loss_domain(domain_output, domain_label)

            # Compute overall loss and backprop
            loss = loss_c + l_d * (loss_dt + loss_ds)
            loss_sum += loss.item() * batch_size_s

            model.zero_grad()
            loss.backward()
            optimizer.step()

            # logger.info('epoch: {:>4}, [iter: {:>4} / all {:>4}], loss {:8.4f}, '
            #             'loss_c: {:8.4f}, loss_ds: {:8.4f}, loss_dt: {:8.4f}\n'
            #             .format(epoch, i+1, len_dataloader, loss.item(), loss_c.item(), loss_ds.item(), loss_dt.item()))

        tr_acc, tr_f1 = evaluate_dann(model, dataset_source, batch_size)
        te_acc, te_f1 = evaluate_dann(model, dataset_target, batch_size)
        tr_acc_ls.append(tr_acc)
        te_acc_ls.append(te_acc)
        loss_ls.append(loss_sum)
        # If find a better result, save the model
        if te_acc > best_acc:
            best_acc = te_acc
            best_ep = epoch
            checkpoint = {"epoch": epoch, "state_dict": model.state_dict()}
            torch.save(checkpoint, ckpt_save_path + '.ckpt')

        logger.info(
            'epoch: {:>4}, loss: {:8.4f}, train acc: {:8.4f}, train f1: {:8.4f},'
            ' eval acc: {:8.4f}, eval f1: {:8.4f}'.format(
                epoch, loss_sum, tr_acc, tr_f1, te_acc, te_f1))

    logger.info('=' * 10)
    logger.info('best epoch: {:>4}, best acc: {:8.4f}'.format(
        best_ep, best_acc))
    pickle.dump(tr_acc_ls, open(ckpt_save_path + '.tracc', 'wb'))
    pickle.dump(te_acc_ls, open(ckpt_save_path + '.teacc', 'wb'))
    pickle.dump(loss_ls, open(ckpt_save_path + '.loss', 'wb'))