Exemplo n.º 1
0
def train(model, trainX, trainTE, trainY, valX, valTE, valY, mean, std):
    num_train = trainX.shape[0]
    min_loss = 10000000.0
    model.train()
    optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.learning_rate)
    # lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 15],
    #                                                         gamma=0.2)
    lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5,    
                                    verbose=False, threshold=0.001, threshold_mode='rel', cooldown=0, min_lr=2e-6, eps=1e-08)
    
    for epoch in tqdm(range(1,args.max_epoch+1)):
        model.train()
        train_l_sum, train_acc_sum, n, batch_count, start = 0.0, 0.0, 0, 0, time.time()
        permutation = np.random.permutation(num_train)
        trainX = trainX[permutation]
        # trainTE = trainTE[permutation]
        trainY = trainY[permutation]
        num_batch = math.ceil(num_train / args.batch_size)
        with tqdm(total=num_batch) as pbar:
            for batch_idx in range(num_batch):
                start_idx = batch_idx * args.batch_size
                end_idx = min(num_train, (batch_idx + 1) * args.batch_size)

                X = torch.from_numpy(trainX[start_idx : end_idx]).float().to(device)
                y = torch.from_numpy(trainY[start_idx : end_idx]).float().to(device)
                # te = torch.from_numpy(trainTE[start_idx : end_idx]).to(device)

                optimizer.zero_grad()

                y_hat = model(X)

                y_d = y
                y_hat_d = y_hat


                loss = _compute_loss(y, y_hat*std+mean)

                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(), 5)
                optimizer.step()
                
                train_l_sum += loss.cpu().item()
                # print(f"\nbatch loss: {l.cpu().item()}")
                n += y.shape[0]
                batch_count += 1
                pbar.update(1)
        # lr = lr_scheduler.get_lr()
        log_string(log, 'epoch %d, lr %.6f, loss %.4f, time %.1f sec'
              % (epoch, optimizer.param_groups[0]['lr'], train_l_sum / batch_count, time.time() - start))
        # print('epoch %d, lr %.6f, loss %.4f, time %.1f sec'
        #       % (epoch, optimizer.param_groups[0]['lr'], train_l_sum / batch_count, time.time() - start))
        mae, rmse, mape = res(model, valX, valTE, valY, mean, std)
        # lr_scheduler.step()
        lr_scheduler.step(mae[-1])
        if mae[-1] < min_loss:
            min_loss = mae[-1]
            torch.save(model, args.model_file)
Exemplo n.º 2
0
def res(model, valX, valTE, valY, mean, std):
    model.eval() # 评估模式, 这会关闭dropout
    # it = test_iter.get_iterator()
    num_val = valX.shape[0]
    pred = []
    label = []
    num_batch = math.ceil(num_val / args.batch_size)
    with torch.no_grad():
        for batch_idx in range(num_batch):
            if isinstance(model, torch.nn.Module):
                start_idx = batch_idx * args.batch_size
                end_idx = min(num_val, (batch_idx + 1) * args.batch_size)

                X = torch.from_numpy(valX[start_idx : end_idx]).float().to(device)
                y = valY[start_idx : end_idx]
                # te = torch.from_numpy(valTE[start_idx : end_idx]).to(device)

                y_hat = model(X)

                pred.append(y_hat.cpu().numpy()*std+mean)
                label.append(y)
    
    pred = np.concatenate(pred, axis = 0)
    label = np.concatenate(label, axis = 0)

    # print(pred.shape, label.shape)
    maes = []
    rmses = []
    mapes = []
    wapes = []

    for i in range(12):
        mae, rmse , mape, wape = metric(pred[:,i,:], label[:,i,:])
        maes.append(mae)
        rmses.append(rmse)
        mapes.append(mape)
        wapes.append(wape)
        # if i == 11:
        log_string(log,'step %d, mae: %.4f, rmse: %.4f, mape: %.4f, wape: %.4f' % (i+1, mae, rmse, mape, wape))
            # print('step %d, mae: %.4f, rmse: %.4f, mape: %.4f' % (i+1, mae, rmse, mape))
    
    mae, rmse, mape, wape = metric(pred, label)
    maes.append(mae)
    rmses.append(rmse)
    mapes.append(mape)
    wapes.append(wape)
    log_string(log, 'average, mae: %.4f, rmse: %.4f, mape: %.4f, wape: %.4f' % (mae, rmse, mape, wape))
    # print('average, mae: %.4f, rmse: %.4f, mape: %.4f' % (mae, rmse, mape))
    
    return np.stack(maes, 0), np.stack(rmses, 0), np.stack(mapes, 0)
Exemplo n.º 3
0
parser.add_argument('--traffic_file',
                    default='data/PeMS.h5',
                    help='traffic file')
parser.add_argument('--SE_file',
                    default='data/SE(PeMS).txt',
                    help='spatial emebdding file')
parser.add_argument('--model_file',
                    default='data/GMAN(PeMS)',
                    help='save the model to disk')
parser.add_argument('--log_file', default='data/log(PeMS)', help='log file')
args = parser.parse_args()

start = time.time()

log = open(args.log_file, 'w')
utils.log_string(log, str(args)[10:-1])

# load data
utils.log_string(log, 'loading data...')
(trainX, trainTE, trainY, valX, valTE, valY, testX, testTE, testY, SE, mean,
 std) = utils.loadData(args)
utils.log_string(log, 'trainX: %s\ttrainY: %s' % (trainX.shape, trainY.shape))
utils.log_string(log, 'valX:   %s\t\tvalY:   %s' % (valX.shape, valY.shape))
utils.log_string(log, 'testX:  %s\t\ttestY:  %s' % (testX.shape, testY.shape))
utils.log_string(log, 'data loaded!')

# train model
utils.log_string(log, 'compiling model...')
T = 24 * 60 // args.time_slot
num_train, _, N = trainX.shape
X, TE, label, is_training = model.placeholder(args.P, args.Q, N)
Exemplo n.º 4
0
parser.add_argument('--traffic_file', default = '**.npz',
                    help = 'traffic file')
parser.add_argument('--SE_file', default = '**.npy',
                    help = 'spatial emebdding file')
parser.add_argument('--model_file', default = 'PEMS',
                    help = 'save the model to disk')
parser.add_argument('--log_file', default = 'log(PEMS)',
                    help = 'log file')

args = parser.parse_args()

log = open(args.log_file, 'w')

device = torch.device("cuda:5" if torch.cuda.is_available() else "cpu")

log_string(log, "loading data....")

trainX, trainTE, trainY, valX, valTE, valY, testX, testTE, testY, SE, mean, std = loadPEMSData(args)
SE = torch.from_numpy(SE).to(device)


log_string(log, "loading end....")

def res(model, valX, valTE, valY, mean, std):
    model.eval() # 评估模式, 这会关闭dropout
    # it = test_iter.get_iterator()
    num_val = valX.shape[0]
    pred = []
    label = []
    num_batch = math.ceil(num_val / args.batch_size)
    with torch.no_grad():
Exemplo n.º 5
0
parser.add_argument('--traffic_file',
                    default='data/PeMS.h5',
                    help='traffic file')
parser.add_argument('--SE_file',
                    default='data/SE(PeMS).txt',
                    help='spatial emebdding file')
parser.add_argument('--model_file',
                    default='data/GMAN(PeMS)',
                    help='pre-trained model')
parser.add_argument('--log_file', default='data/log(PeMS)', help='log file')
args = parser.parse_args()

start = time.time()

log = open(args.log_file, 'w')
utils.log_string(log, str(args)[10:-1])

# load data
utils.log_string(log, 'loading data...')
(trainX, trainTE, trainY, valX, valTE, valY, testX, testTE, testY, SE, mean,
 std) = utils.loadData(args)
num_train, num_val, num_test = trainX.shape[0], valX.shape[0], testX.shape[0]
utils.log_string(log, 'trainX: %s\ttrainY: %s' % (trainX.shape, trainY.shape))
utils.log_string(log, 'valX:   %s\t\tvalY:   %s' % (valX.shape, valY.shape))
utils.log_string(log, 'testX:  %s\t\ttestY:  %s' % (testX.shape, testY.shape))
utils.log_string(log, 'data loaded!')

# test model
utils.log_string(log, '**** testing model ****')
utils.log_string(log, 'loading model from %s' % args.model_file)
graph = tf.Graph()
def main(args):
    '''create dir'''
    experiment_dir = Path('./experiment/')
    experiment_dir.mkdir(exist_ok=True)
    checkpoints_dir = Path('./experiment/checkpoints/')
    checkpoints_dir.mkdir(exist_ok=True)
    log_dir = Path('./experiment/logs/')
    log_dir.mkdir(exist_ok=True)

    ctx = [mxnet.gpu(gpu_id) for gpu_id in args.gpu]
    '''initialize the network'''
    net = MVRNN(cnn_arch='vgg11_bn',
                cnn_feature_length=4096,
                num_views=args.num_views,
                num_class=args.num_classes,
                pretrained=True,
                pretrained_cnn=args.pretrained_cnn,
                ctx=ctx)
    if args.checkpoint:
        net.load_parameters(args.checkpoint, ctx=ctx)
    else:
        net.initialize(init=init.MSRAPrelu(), ctx=ctx)
    net.hybridize()
    '''set grad_req to 'add' to manually aggregate gradients'''
    net.collect_params().setattr('grad_req', 'add')
    net._cnn2.collect_params().setattr('lr_mult', args.output_lr_mult)
    '''Setup loss function'''
    loss_fun = gluon.loss.SoftmaxCrossEntropyLoss(
        sparse_label=not args.label_smoothing)
    '''Loading dataset'''
    train_ds = MultiViewImageDataset(os.path.join(args.dataset_path, 'train'),
                                     args.num_views,
                                     transform=Compose([
                                         ToTensor(),
                                         Normalize(mean=(0.485, 0.456, 0.406),
                                                   std=(0.229, 0.224, 0.225))
                                     ]))
    test_ds = MultiViewImageDataset(os.path.join(args.dataset_path, 'test'),
                                    args.num_views,
                                    transform=Compose([
                                        ToTensor(),
                                        Normalize(mean=(0.485, 0.456, 0.406),
                                                  std=(0.229, 0.224, 0.225))
                                    ]))
    loader = gluon.data.DataLoader
    train_data = loader(train_ds,
                        args.batch_size,
                        shuffle=True,
                        last_batch='keep',
                        num_workers=4)
    test_data = loader(test_ds,
                       args.batch_size,
                       shuffle=False,
                       last_batch='keep',
                       num_workers=4)

    current_time = datetime.datetime.now()
    time_str = '%d-%d-%d--%d-%d-%d' % (
        current_time.year, current_time.month, current_time.day,
        current_time.hour, current_time.minute, current_time.second)
    log_filename = time_str + '.txt'
    checkpoint_name = 'checkpoint_' + time_str
    checkpoint_dir = Path(os.path.join(checkpoints_dir, checkpoint_name))
    checkpoint_dir.mkdir(exist_ok=True)

    with open(os.path.join(
            log_dir,
            log_filename,
    ), 'w') as log_out:
        try:
            kv = mxnet.kv.create('device')
            utils.log_string(log_out, sys.argv[0])
            utils.train(net, train_data, test_data, loss_fun, kv, log_out,
                        str(checkpoint_dir), args)
        except Exception as e:
            raise e