Exemplo n.º 1
0
def test_model(model, data_loader, mode, teaching_force=False, **kwargs):
    predictions = list()
    targets = list()
    tqdm_loader = tqdm(enumerate(data_loader))
    model = convert_to_gpu(model)
    #model = nn.DataParallel(convert_to_gpu(model), [0, 1])
    if kwargs['return_attn']:
        attn_record = list()
    with torch.no_grad():
        model.eval()
        for step, (features, truth, covariate) in tqdm(tqdm_loader):
            features = convert_to_gpu(features)
            truth = convert_to_gpu(truth)
            covariate = convert_to_gpu(covariate)
            if kwargs['return_attn']:
                outputs, attn = model(features, covariate)
                #attn_record.append(attn_T.cpu().numpy())
            else:
                outputs = model(features, covariate)
            outputs, truth = normalized_transform(outputs, truth, **kwargs)
            targets.append(truth.cpu().numpy())
            predictions.append(outputs.cpu().detach().numpy())
    pre2 = np.concatenate(predictions)
    tar2 = np.concatenate(targets)
    print(pre2.shape)
    print(calculate_metrics(pre2, tar2, mode, **kwargs))
    if kwargs['return_attn']:
        #attn_plot(attn_record)
        # np.save('data/result/attn.npy', attn)
        return attn
    else:
        return pre2, tar2
Exemplo n.º 2
0
 def __getitem__(self, index):
     """
     :param index:
     :return:  g, graph, fully connected, containing N nodes, unweighted
               nodes_feature, tensor  (N, item_embedding)
               edges_weight, tensor (T, N*N)
               nodes, tensor (N, )
               user_data, list, (baskets, items)
     """
     # list of tensors
     user_data = self.data_list[index]
     # print(user_data)
     # nodes -> tensor,  len(nodes) = N
     # may change the order of appearing items in dataset
     nodes = self.get_nodes(baskets=user_data[:-1])
     # print(nodes)
     # N * item_embedding tensor
     # print(nodes)
     nodes_feature = self.item_embedding_matrix(convert_to_gpu(nodes))
     # construct graph for the user
     project_nodes = torch.tensor(list(range(nodes.shape[0])))
     # construct fully connected graph, containing N nodes, unweighted
     # (0, 0), (0, 1), ..., (0, N-1), (1, 0), (1, 1), ..., (1, N-1), ...
     # src -> [0, 0, 0, ... N-1, N-1, N-1, ...],  dst -> [0, 1, ..., N-1, ..., 0, 1, ..., N-1]
     src = torch.stack(
         [project_nodes for _ in range(project_nodes.shape[0])],
         dim=1).flatten().tolist()
     dst = torch.stack(
         [project_nodes for _ in range(project_nodes.shape[0])],
         dim=0).flatten().tolist()
     g = dgl.graph((src, dst), num_nodes=project_nodes.shape[0])
     edges_weight_dict = self.get_edges_weight(user_data[:-1])
     # add self-loop
     for node in nodes.tolist():
         if edges_weight_dict[(node, node)] == 0.0:
             edges_weight_dict[(node, node)] = 1.0
     # normalize weight
     max_weight = max(edges_weight_dict.values())
     for i, j in edges_weight_dict.items():
         edges_weight_dict[i] = j / max_weight
     # get edge weight for each timestamp, shape (T, N*N)
     # print(edges_weight_dict)
     edges_weight = []
     for basket in user_data[:-1]:
         basket = basket.tolist()
         # list containing N * N weights of elements
         edge_weight = []
         for node_1 in nodes.tolist():
             for node_2 in nodes.tolist():
                 if (node_1 in basket and node_2 in basket) or (node_1
                                                                == node_2):
                     # each node has a self connection
                     edge_weight.append(edges_weight_dict[(node_1, node_2)])
                 else:
                     edge_weight.append(0.0)
         edges_weight.append(torch.Tensor(edge_weight))
     # tensor -> shape (T, N*N)
     edges_weight = torch.stack(edges_weight)
     return g, nodes_feature, edges_weight, nodes, user_data
Exemplo n.º 3
0
def train_model(model: nn.Module,
                data_loaders: Dict[str, DataLoader],
                loss_func: callable,
                optimizer,
                model_folder: str,
                tensorboard_folder: str,
                pid: int):

    phases = ['train', 'validate', 'test']

    writer = SummaryWriter(tensorboard_folder)
    num_epochs = get_attribute('epochs')

    since = time.perf_counter()

    model = convert_to_gpu(model)
    loss_func = convert_to_gpu(loss_func)

    save_dict, best_f1_score = {'model_state_dict': copy.deepcopy(model.state_dict()), 'epoch': 0}, 0

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=.5, patience=2, threshold=1e-3, min_lr=1e-6)
    test_metric = None
    try:
        for epoch in range(num_epochs):

            running_loss, running_metrics = {phase: 0.0 for phase in phases}, {phase: dict() for phase in phases}
            save_validate_this_epoch = False
            for phase in phases:
                if phase == 'train':
                    model.train()
                else:
                    model.eval()

                steps, predictions, targets = 0, list(), list()
                tqdm_loader = tqdm(enumerate(data_loaders[phase]))
                for step, (g, spatial_features, temporal_features, external_features, truth_data) in tqdm_loader:

                    if list(external_features.size())[0] != get_attribute("batch_size"):
                        continue

                    if not get_attribute("use_spatial_features"):
                        torch.zero_(spatial_features)
                    if not get_attribute("use_temporal_features"):
                        torch.zero_(temporal_features)
                    if not get_attribute("use_external_features"):
                        torch.zero_(external_features)

                    features, truth_data = convert_train_truth_to_gpu(
                        [spatial_features, temporal_features, external_features], truth_data)

                    with torch.set_grad_enabled(phase == 'train'):
                        _outputs = model(g, *features)
                        outputs = torch.squeeze(_outputs)  # squeeze [batch-size, 1] to [batch-size]
                        loss = loss_func(truth=truth_data, predict=outputs)
                        if phase == 'train':
                            optimizer.zero_grad()
                            loss.backward()
                            optimizer.step()

                    targets.append(truth_data.cpu().numpy())
                    with torch.no_grad():
                        predictions.append(outputs.cpu().detach().numpy())

                    running_loss[phase] += loss * truth_data.size(0)
                    steps += truth_data.size(0)

                    tqdm_loader.set_description(
                        f'{pid:2} pid: {phase:8} epoch: {epoch:3}, {phase:8} loss: {running_loss[phase] / steps:3.6}')

                    # For the issue that the CPU memory increases while training. DO NOT know why, but it works.
                    torch.cuda.empty_cache()

                print(f'{phase} metric ...')
                _cp = np.concatenate(predictions)
                _ct = np.concatenate(targets)
                scores = evaluate(_cp, _ct)
                running_metrics[phase] = scores
                print(scores)

                if phase == 'validate' and scores['F1-SCORE'] > best_f1_score:
                    best_f1_score = scores['F1-SCORE']
                    save_validate_this_epoch = True
                    save_dict.update(model_state_dict=copy.deepcopy(model.state_dict()),
                                     epoch=epoch,
                                     optimizer_state_dict=copy.deepcopy(optimizer.state_dict()))
                    print(f"save model as {model_folder}/model_{epoch}.pkl")
                    save_model(f"{model_folder}/model_{epoch}.pkl", **save_dict)

            scheduler.step(running_loss['train'])

            if save_validate_this_epoch:
                test_metric = running_metrics["test"].copy()

            for metric in running_metrics['train'].keys():
                writer.add_scalars(metric, {
                    f'{phase} {metric}': running_metrics[phase][metric] for phase in phases},
                                   global_step=epoch)
            writer.add_scalars('Loss', {
                f'{phase} loss': running_loss[phase] / len(data_loaders[phase].dataset) for phase in phases},
                               global_step=epoch)
    finally:

        time_elapsed = time.perf_counter() - since
        print(f"cost {time_elapsed} seconds")

        save_model(f"{model_folder}/best_model.pkl", **save_dict)

    return test_metric
Exemplo n.º 4
0
def train_model(model: nn.Module, train_data_loader: DataLoader,
                valid_data_loader: DataLoader, loss_func, epochs, optimizer,
                model_folder, tensorboard_folder):
    """
    Args:
        model: nn.Module
        train_data_loader: DataLoader
        valid_data_loader: DataLoader
        loss_func: nn.Module
        epochs: int
        optimizer: Optimizer
        model_folder: str
        tensorboard_folder: str
    """
    warnings.filterwarnings('ignore')

    print(model)
    print(optimizer)

    writer = SummaryWriter(tensorboard_folder)
    writer.add_text('Welcome', 'Welcome to tensorboard!')

    model = convert_to_gpu(model)
    model.train()
    loss_func = convert_to_gpu(loss_func)

    start_time = datetime.datetime.now()

    validate_max_ndcg = 0
    name_list = ["train", "validate"]

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer)

    for epoch in range(epochs):
        loss_dict, metric_dict = {name: 0.0
                                  for name in name_list
                                  }, {name: dict()
                                      for name in name_list}
        data_loader_dic = {
            "train": train_data_loader,
            "validate": valid_data_loader
        }

        for name in name_list:
            # training
            if name == "train":
                model.train()
            # validate
            else:
                model.eval()

            y_true = []
            y_pred = []
            total_loss = 0.0
            tqdm_loader = tqdm(data_loader_dic[name])
            for step, (g, nodes_feature, edges_weight, lengths, nodes,
                       truth_data, users_frequency) in enumerate(tqdm_loader):
                g, nodes_feature, edges_weight, lengths, nodes, truth_data, users_frequency = \
                    convert_all_data_to_gpu(g, nodes_feature, edges_weight, lengths, nodes, truth_data, users_frequency)

                with torch.set_grad_enabled(name == 'train'):
                    # (B, N)
                    output = model(g, nodes_feature, edges_weight, lengths,
                                   nodes, users_frequency)
                    loss = loss_func(output, truth_data.float())
                    total_loss += loss.cpu().data.numpy()
                    if name == "train":
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()
                    y_pred.append(output.detach().cpu())
                    y_true.append(truth_data.detach().cpu())
                    tqdm_loader.set_description(
                        f'{name} epoch: {epoch}, {name} loss: {total_loss / (step + 1)}'
                    )

            loss_dict[name] = total_loss / (step + 1)
            y_true = torch.cat(y_true, dim=0)
            y_pred = torch.cat(y_pred, dim=0)

            print(f'{name} metric ...')
            scores = get_metric(y_true=y_true, y_pred=y_pred)
            scores = sorted(scores.items(),
                            key=lambda item: item[0],
                            reverse=False)
            scores = {item[0]: item[1] for item in scores}
            print(json.dumps(scores, indent=4))
            metric_dict[name] = scores

            # save best model
            if name == "validate":
                validate_ndcg_list = []
                for key in metric_dict["validate"]:
                    if key.startswith("ndcg_"):
                        validate_ndcg_list.append(metric_dict["validate"][key])
                validate_ndcg = np.mean(validate_ndcg_list)
                if validate_ndcg > validate_max_ndcg:
                    validate_max_ndcg = validate_ndcg
                    model_path = f"{model_folder}/model_epoch_{epoch}.pkl"
                    save_model(model, model_path)
                    print(f"model save as {model_path}")

        scheduler.step(loss_dict['train'])

        writer.add_scalars(
            'Loss', {f'{name} loss': loss_dict[name]
                     for name in name_list},
            global_step=epoch)

        for metric in metric_dict['train'].keys():
            for name in name_list:
                writer.add_scalars(f'{name} {metric}',
                                   {f'{metric}': metric_dict[name][metric]},
                                   global_step=epoch)

    end_time = datetime.datetime.now()
    print("cost %d seconds" % (end_time - start_time).seconds)
Exemplo n.º 5
0
from utils.metric import evaluate
from utils.load_config import get_attribute
from train.train_main import create_model
import torch
from utils.data_container import get_data_loaders
from tqdm import tqdm
from utils.util import convert_train_truth_to_gpu
from utils.util import convert_to_gpu

if __name__ == '__main__':
    model_path = f"../saves/spatial_temporal_external/DSTGCN/model_0.pkl"
    print(f'model path -> {model_path}')
    model = create_model()
    model.load_state_dict(torch.load(model_path)["model_state_dict"])
    print(f'model epoch -> {torch.load(model_path)["epoch"]}')
    model = convert_to_gpu(model)
    print(model)

    data_loaders = get_data_loaders(get_attribute('K_hop'),
                                    get_attribute('batch_size'))
    phase = "test"
    tqdm_loader = tqdm(enumerate(data_loaders[phase]))
    predictions, targets = list(), list()
    for step, (g, spatial_features, temporal_features, external_features,
               truth_data) in tqdm_loader:
        torch.zero_(external_features)

        features, truth_data = convert_train_truth_to_gpu(
            [spatial_features, temporal_features, external_features],
            truth_data)
        outputs = model(g, *features)
def train_my_model(model: nn.Module, data_loader, loss_func: callable, optimizer, num_epochs, model_folder,
                tensorboard_folder: str, **kwargs):
    phases = ['train', 'val', 'test']
    writer = SummaryWriter(tensorboard_folder)
    model = convert_to_gpu(model)
    #model = nn.DataParallel(convert_to_gpu(model), [0, 1])
    loss_func = convert_to_gpu(loss_func)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=.1, patience=8, threshold=1e-4, min_lr=1e-6)
    save_dict = {'model_state_dict': copy.deepcopy(model.state_dict()), 'epoch': 0}
    loss_global = 100000
    for epoch in range(num_epochs):
        running_loss = {phase: 0.0 for phase in phases}
        for phase in phases:
            if phase == 'train':
                model.train()
            else:
                model.eval()
            steps, predictions, targets = 0, list(), list()
            tqdm_loaders = tqdm(enumerate(data_loader[phase]))
            for step, (features, truth, covariate) in tqdm_loaders:
                features = convert_to_gpu(features)
                truth = convert_to_gpu(truth)
                covariate = convert_to_gpu(covariate)
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(features, covariate)
                    if not get_Parameter('loss_normalized'):
                        outputs, truth = normalized_transform(outputs, truth, **kwargs)
                    taxi_pickup_loss = loss_func(truth[:, :, :get_Parameter('taxi_size'), 0], outputs[:, :, :get_Parameter('taxi_size'), 0])
                    taxi_dropoff_loss = loss_func(truth[:, :, :get_Parameter('taxi_size'), 1], outputs[:, :, :get_Parameter('taxi_size'), 1])
                    #taxi_loss = loss_func(truth[:, :, :get_Parameter('taxi_size')], outputs[:, :, :get_Parameter('taxi_size')])
                    taxi_loss = taxi_pickup_loss + taxi_dropoff_loss*1.5
                    bike_loss = loss_func(truth[:, :, get_Parameter('taxi_size'):], outputs[:, :, get_Parameter('taxi_size'):])
                    # if epoch<=100:
                    #     loss = (2*taxi_loss + bike_loss)*100
                    # else:
                    #     loss = taxi_loss
                    #loss = taxi_loss + 30*bike_loss
                    loss = (1.5*taxi_loss + bike_loss)*100
                    #loss = loss_func(truth, outputs)
                    #loss = bike_loss
                    if phase == 'train':
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()
                    if get_Parameter('loss_normalized'):
                        outputs, truth = normalized_transform(outputs, truth, **kwargs)
                targets.append(truth.cpu().numpy())
                with torch.no_grad():
                    predictions.append(outputs.cpu().numpy())
                running_loss[phase] += loss.item()
                steps += truth.size(0)

                tqdm_loaders.set_description(f'{phase} epoch:{epoch}, {phase} loss: {running_loss[phase]/steps}')

            predictions = np.concatenate(predictions)
            targets = np.concatenate(targets)

            scores = calculate_metrics(predictions.reshape(predictions.shape[0], -1),
                                       targets.reshape(targets.shape[0], -1), mode='train', **kwargs)
            print(scores)
            writer.add_scalars(f'score/{phase}', scores, global_step=epoch)
            if phase == 'val' and scores['RMSE'] < loss_global:
                loss_global = scores['RMSE']
                save_dict.update(model_state_dict=copy.deepcopy(model.state_dict()), epoch=epoch,
                                 optimizer_state_dict=copy.deepcopy(optimizer.state_dict()))

        scheduler.step(running_loss['train'])
        writer.add_scalars('Loss', {
            f'{phase} loss': running_loss[phase] for phase in phases
        }, global_step=epoch)

    save_model(f'{model_folder}/best_model.pkl', **save_dict)
    model.load_state_dict(save_dict['model_state_dict'])
    return model