Пример #1
0
def preprocess_pipeline(args):

    # [STEP 0] load the .mat files (sample-level)
    if not os.path.exists(args.path_raw):
        sys.stdout = Logger(os.path.join(args.path_raw, "log_raw.txt"))
        print(paint("[STEP 0] Loading the .mat files..."))
        load_mat(path_data=args.path_data,
                 path_raw=args.path_raw,
                 class_map=args.class_map)
    else:
        print(paint("[STEP 0] Files already loaded!"))

    # [STEP 1] partition the datasets (segment-level)
    w, s = args.window, args.stride
    if not os.path.exists(args.path_processed):
        sys.stdout = Logger(
            os.path.join(args.path_processed, f"log_{w}_{s}.txt"))
        print(
            paint(
                f"[STEP 1] Partitioning the dataset (window,stride) = ({w},{s})..."
            ))
        partition(
            path_raw=args.path_raw,
            path_processed=args.path_processed,
            window=w,
            stride=s,
            class_map=args.class_map,
        )
    else:
        print(
            paint(
                f"[STEP 1] Dataset already partitioned (window,stride) = ({w},{s})!"
            ))
Пример #2
0
    def get_info(self, n_samples=3):
        print(paint(f"[-] Information on {self.prefix} dataset:"))
        print("\t data: ", self.data.shape, self.data.dtype, type(self.data))
        print("\t target: ", self.target.shape, self.target.dtype,
              type(self.target))

        target_idx = [
            np.where(self.target == label)[0] for label in set(self.target)
        ]
        target_idx_samples = np.array([
            np.random.choice(idx, n_samples, replace=False)
            for idx in target_idx
        ]).flatten()

        for i, random_idx in enumerate(target_idx_samples):
            data, target, index = self.__getitem__(random_idx)
            if i == 0:
                print(
                    paint(
                        f"[-] Information on segment #{random_idx}/{self.len}:"
                    ))
                print("\t data: ", data.shape, data.dtype, type(data))
                print("\t target: ", target.shape, target.dtype, type(target))
                print("\t index: ", index, index.shape, index.dtype,
                      type(index))

            path_save = os.path.join(self.path_processed, "segments")
            plot_segment(
                data,
                target,
                index=index,
                prefix=self.prefix,
                path_save=path_save,
                num_class=len(target_idx),
            )
Пример #3
0
def main():

    # get experiment arguments
    args, _, config_model = get_args()
    args.experiment = "test_models"
    config_model["experiment"] = "test_models"

    # [STEP 1] create synthetic HAR batch
    data_synthetic = torch.randn(
        (args.batch_size, args.window, args.input_dim)).cuda()

    # [STEP 2] create HAR models
    if torch.cuda.is_available():
        model = create(args.model, config_model).cuda()
        torch.backends.cudnn.benchmark = True
        get_info_params(model)
        get_info_layers(model)
        model.apply(init_weights_orthogonal)

    model.eval()
    with torch.no_grad():
        print(paint("[*] Performing a forward pass with a synthetic batch..."))
        z, logits = model(data_synthetic)
        print(f"\t input: {data_synthetic.shape} {data_synthetic.dtype}")
        print(f"\t z: {z.shape} {z.dtype}")
        print(f"\t logits: {logits.shape} {logits.dtype}")
Пример #4
0
def partition(path_raw, path_processed, window, stride, class_map):

    # read raw datasets (sample-level)
    print(f"[*] Reading raw files from {path_raw}")
    dataset_train = np.load(os.path.join(path_raw, "train.npz"))
    x_train, y_train = dataset_train["x"], dataset_train["y"]
    dataset_val = np.load(os.path.join(path_raw, "val.npz"))
    x_val, y_val = dataset_val["x"], dataset_val["y"]
    dataset_test = np.load(os.path.join(path_raw, "test.npz"))
    x_test, y_test = dataset_test["x"], dataset_test["y"]

    # apply sliding window over raw samples and generate segments
    data_train, target_train = sliding_window(x_train, y_train, window, stride)
    data_val, target_val = sliding_window(x_val, y_val, window, stride)
    data_test, target_test = sliding_window(x_test, y_test, window, stride)
    data_test_sample_wise, target_test_sample_wise = sliding_window(
        x_test, y_test, window, 1)

    # show processed datasets info (segment-level)
    print("[-] Train data : {} {}, target {} {}".format(
        data_train.shape, data_train.dtype, target_train.shape,
        target_train.dtype))
    print("[-] Valid data : {} {}, target {} {}".format(
        data_val.shape, data_val.dtype, target_val.shape, target_val.dtype))
    print("[-] Test data : {} {}, target {} {}".format(data_test.shape,
                                                       data_test.dtype,
                                                       target_test.shape,
                                                       target_test.dtype))
    print("[-] Test data sample-wise : {} {}, target sample-wise {} {}".format(
        data_test_sample_wise.shape,
        data_test_sample_wise.dtype,
        target_test_sample_wise.shape,
        target_test_sample_wise.dtype,
    ))

    # plot processed target distributions (segment-level)
    plot_pie(target_train, "train", path_processed, class_map)
    plot_pie(target_val, "val", path_processed, class_map)
    plot_pie(target_test, "test", path_processed, class_map)
    plot_pie(target_test_sample_wise, "test_sample_wise", path_processed,
             class_map)

    # save processed datasets (segment-level)
    np.savez_compressed(os.path.join(path_processed, "train.npz"),
                        data=data_train,
                        target=target_train)
    np.savez_compressed(os.path.join(path_processed, "val.npz"),
                        data=data_val,
                        target=target_val)
    np.savez_compressed(os.path.join(path_processed, "test.npz"),
                        data=data_test,
                        target=target_test)
    np.savez_compressed(
        os.path.join(path_processed, "test_sample_wise.npz"),
        data=data_test_sample_wise,
        target=target_test_sample_wise,
    )
    print("[+] Processed segment datasets successfully saved!")
    print(paint("--" * 50, "blue"))
Пример #5
0
def get_info_params(model):
    """
    Display a summary of trainable/frozen network parameter counts
    :param model:
    :return:
    """
    num_trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
    num_total = sum(p.numel() for p in model.parameters())
    print(paint(f"[-] {num_trainable}/{num_total} trainable parameters", "blue"))
Пример #6
0
def main():

    # get experiment arguments
    args, config_dataset, config_model = get_args()

    # [STEP 0 and 1] load the .mat files (sample-level) and partition the datasets (segment-level)
    preprocess_pipeline(args)

    if args.train_mode:

        # [STEP 2] create HAR datasets
        dataset = SensorDataset(**config_dataset, prefix="train")
        dataset_val = SensorDataset(**config_dataset, prefix="val")

        # [STEP 3] create HAR models
        if torch.cuda.is_available():
            model = create(args.model, config_model).cuda()
            torch.backends.cudnn.benchmark = True
            sys.stdout = Logger(
                os.path.join(model.path_logs,
                             f"log_main_{args.experiment}.txt"))

        # show args
        print("##" * 50)
        print(paint(f"Experiment: {model.experiment}", "blue"))
        print(
            paint(
                f"[-] Using {torch.cuda.device_count()} GPU: {torch.cuda.is_available()}"
            ))
        print(args)
        get_info_params(model)
        get_info_layers(model)
        print("##" * 50)

        # [STEP 4] train HAR models
        model_train(model, dataset, dataset_val, args)

    # [STEP 5] evaluate HAR models
    dataset_test = SensorDataset(**config_dataset, prefix="test")
    if not args.train_mode:
        config_model["experiment"] = "inference"
        model = create(args.model, config_model).cuda()
    model_eval(model, dataset_test, args)
Пример #7
0
def model_eval(model, dataset_test, args):
    print(paint("[STEP 5] Running HAR evaluation loop ..."))

    loader_test = DataLoader(dataset_test,
                             args.batch_size,
                             False,
                             pin_memory=True)

    criterion = nn.CrossEntropyLoss(reduction="mean").cuda()

    print("[-] Loading checkpoint ...")
    if args.train_mode:
        path_checkpoint = os.path.join(model.path_checkpoints,
                                       "checkpoint_best.pth")
    else:
        path_checkpoint = os.path.join(
            f"./weights/checkpoint_{args.dataset}.pth")

    checkpoint = torch.load(path_checkpoint)
    model.load_state_dict(checkpoint["model_state_dict"])
    criterion.load_state_dict(checkpoint["criterion_state_dict"])

    start_time = time.time()

    loss_test, acc_test, fm_test, fw_test = eval_one_epoch(model,
                                                           loader_test,
                                                           criterion,
                                                           -1,
                                                           logger=None,
                                                           args=args)

    print(
        paint(
            f"[-] Test loss: {loss_test:.2f}"
            f"\tacc: {acc_test:.2f}(%)\tfm: {fm_test:.2f}(%)\tfw: {fw_test:.2f}(%)"
        ))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    print(paint(f"[STEP 5] Finished HAR evaluation loop (h:m:s): {elapsed}"))
Пример #8
0
    def __init__(
        self,
        model,
        dataset,
        input_dim,
        hidden_dim,
        filter_num,
        filter_size,
        enc_num_layers,
        enc_is_bidirectional,
        dropout,
        dropout_rnn,
        dropout_cls,
        activation,
        sa_div,
        num_class,
        train_mode,
        experiment,
    ):
        super(AttendDiscriminate, self).__init__()

        self.experiment = f"train_{experiment}" if train_mode else experiment
        self.model = model
        self.dataset = dataset
        self.hidden_dim = hidden_dim
        print(paint(f"[STEP 3] Creating {self.model} HAR model ..."))

        self.fe = FeatureExtractor(
            input_dim,
            hidden_dim,
            filter_num,
            filter_size,
            enc_num_layers,
            enc_is_bidirectional,
            dropout,
            dropout_rnn,
            activation,
            sa_div,
        )

        self.dropout = nn.Dropout(dropout_cls)
        self.classifier = Classifier(hidden_dim, num_class)
        self.register_buffer("centers",
                             (torch.randn(num_class, self.hidden_dim).cuda()))

        # do not create log directories if we are only testing the models module
        if experiment != "test_models":
            if train_mode:
                makedir(self.path_checkpoints)
                makedir(self.path_logs)
            makedir(self.path_visuals)
Пример #9
0
    def get_weights(self):

        target = self.target

        target_count = np.array(
            [np.sum(target == label) for label in set(target)])
        weight_target = 1.0 / target_count
        weight_samples = np.array([weight_target[t] for t in target])
        weight_samples = torch.from_numpy(weight_samples)
        weight_samples = weight_samples.double()

        if self.verbose:
            print(paint("[-] Target sampling weights:")),
            print(weight_target)

        return weight_samples
Пример #10
0
    def __init__(
        self,
        dataset,
        window,
        stride,
        stride_test,
        path_processed,
        prefix,
        transform=None,
        verbose=False,
    ):

        self.dataset = dataset
        self.window = window
        self.stride = stride
        self.prefix = prefix
        self.transform = transform
        self.path_processed = path_processed
        self.verbose = verbose

        if prefix == "test" and stride_test == 1:
            self.path_dataset = os.path.join(path_processed,
                                             "test_sample_wise.npz")
        else:
            self.path_dataset = os.path.join(path_processed,
                                             "{}.npz".format(prefix))
        dataset = np.load(self.path_dataset)

        self.data = dataset["data"]
        self.target = dataset["target"]
        self.len = self.data.shape[0]
        assert self.data.shape[0] == self.target.shape[0]
        print(
            paint(
                f"[STEP 2] Creating {self.dataset} {self.prefix} HAR dataset of size {self.len} ..."
            ))

        if self.verbose:
            self.get_info()
            self.get_distribution()

        if prefix == "train":
            self.weight_samples = self.get_weights()
Пример #11
0
def load_mat(path_data, path_raw, class_map):

    # load .mat files
    print(f"[*] Reading data files from {path_data}")
    contents = sio.loadmat(path_data)

    if len(class_map) == 18:
        # opportunity dataset
        x_train = contents["trainingData"].astype(np.float32).T
        y_train = contents["trainingLabels"].reshape(-1).astype(np.int64) - 1
        x_val = contents["valData"].astype(np.float32).T
        y_val = contents["valLabels"].reshape(-1).astype(np.int64) - 1
        x_test = contents["testingData"].astype(np.float32).T
        y_test = contents["testingLabels"].reshape(-1).astype(np.int64) - 1

        # normalizing
        mean_train = np.mean(x_train, axis=0)
        std_train = np.std(x_train, axis=0)
        x_train = (x_train - mean_train) / std_train
        x_val = (x_val - mean_train) / std_train
        x_test = (x_test - mean_train) / std_train

    elif len(class_map) == 7:
        # hospital dataset
        x_train = contents["X_train"].astype(np.float32)
        y_train = contents["y_train"].reshape(-1).astype(np.int64)
        x_val = contents["X_valid"].astype(np.float32)
        y_val = contents["y_valid"].reshape(-1).astype(np.int64)
        x_test = contents["X_test"].astype(np.float32)
        y_test = contents["y_test"].reshape(-1).astype(np.int64)

        # normalizing
        mean_train = np.mean(x_train, axis=0)
        std_train = np.std(x_train, axis=0)
        x_train = (x_train - mean_train) / std_train
        x_val = (x_val - mean_train) / std_train
        x_test = (x_test - mean_train) / std_train

    else:
        # all other datasets
        x_train = contents["X_train"].astype(np.float32)
        y_train = contents["y_train"].reshape(-1).astype(np.int64)
        x_val = contents["X_valid"].astype(np.float32)
        y_val = contents["y_valid"].reshape(-1).astype(np.int64)
        x_test = contents["X_test"].astype(np.float32)
        y_test = contents["y_test"].reshape(-1).astype(np.int64)

    # show raw datasets info (sample-level)
    print("[-] Train data : {} {}, target {} {}".format(
        x_train.shape, x_train.dtype, y_train.shape, y_train.dtype))
    print("[-] Valid data : {} {}, target {} {}".format(
        x_val.shape, x_val.dtype, y_val.shape, y_val.dtype))
    print("[-] Test data : {} {}, target {} {}".format(x_test.shape,
                                                       x_test.dtype,
                                                       y_test.shape,
                                                       y_test.dtype))

    # plot raw target distributions (sample-level)
    plot_pie(y_train, "train", path_raw, class_map)
    plot_pie(y_val, "val", path_raw, class_map)
    plot_pie(y_test, "test", path_raw, class_map)

    # save raw datasets (sample-level)
    np.savez_compressed(os.path.join(path_raw, "train.npz"),
                        x=x_train,
                        y=y_train)
    np.savez_compressed(os.path.join(path_raw, "val.npz"), x=x_val, y=y_val)
    np.savez_compressed(os.path.join(path_raw, "test.npz"), x=x_test, y=y_test)
    print("[+] Raw sample datasets successfully saved!")
    print(paint("--" * 50, "blue"))
Пример #12
0
def model_train(model, dataset, dataset_val, args):
    print(paint("[STEP 4] Running HAR training loop ..."))

    logger = SummaryWriter(log_dir=os.path.join(model.path_logs, "train"))
    logger_val = SummaryWriter(log_dir=os.path.join(model.path_logs, "val"))

    if args.weighted_sampler:
        print(paint("[-] Using weighted sampler (balanced batch)..."))
        sampler = WeightedRandomSampler(dataset.weight_samples,
                                        len(dataset.weight_samples))
        loader = DataLoader(dataset,
                            args.batch_size,
                            sampler=sampler,
                            pin_memory=True)
    else:
        loader = DataLoader(dataset, args.batch_size, True, pin_memory=True)
    loader_val = DataLoader(dataset_val,
                            args.batch_size,
                            False,
                            pin_memory=True)

    criterion = nn.CrossEntropyLoss(reduction="mean").cuda()

    params = filter(lambda p: p.requires_grad, model.parameters())

    if args.optimizer == "Adam":
        optimizer = optim.Adam(params, lr=args.lr)
    elif args.optimizer == "RMSprop":
        optimizer = optim.RMSprop(params, lr=args.lr)

    if args.lr_step > 0:
        scheduler = optim.lr_scheduler.StepLR(optimizer,
                                              step_size=args.lr_step,
                                              gamma=args.lr_decay)

    if args.init_weights == "orthogonal":
        print(paint("[-] Initializing weights (orthogonal)..."))
        model.apply(init_weights_orthogonal)

    metric_best = 0.0
    start_time = time.time()

    for epoch in range(args.epochs):
        print("--" * 50)
        print("[-] Learning rate: ", optimizer.param_groups[0]["lr"])
        train_one_epoch(model, loader, criterion, optimizer, epoch, args)
        loss, acc, fm, fw = eval_one_epoch(model, loader, criterion, epoch,
                                           logger, args)
        loss_val, acc_val, fm_val, fw_val = eval_one_epoch(
            model, loader_val, criterion, epoch, logger_val, args)

        print(
            paint(
                f"[-] Epoch {epoch}/{args.epochs}"
                f"\tTrain loss: {loss:.2f} \tacc: {acc:.2f}(%)\tfm: {fm:.2f}(%)\tfw: {fw:.2f}(%)"
            ))

        print(
            paint(
                f"[-] Epoch {epoch}/{args.epochs}"
                f"\tVal loss: {loss_val:.2f} \tacc: {acc_val:.2f}(%)\tfm: {fm_val:.2f}(%)\tfw: {fw_val:.2f}(%)"
            ))

        checkpoint = {
            "model_state_dict": model.state_dict(),
            "optim_state_dict": optimizer.state_dict(),
            "criterion_state_dict": criterion.state_dict(),
            "random_rnd_state": random.getstate(),
            "numpy_rnd_state": np.random.get_state(),
            "torch_rnd_state": torch.get_rng_state(),
        }

        metric = fm_val
        if metric >= metric_best:
            print(
                paint(f"[*] Saving checkpoint... ({metric_best}->{metric})",
                      "blue"))
            metric_best = metric
            torch.save(
                checkpoint,
                os.path.join(model.path_checkpoints, "checkpoint_best.pth"))

        if epoch % 5 == 0:
            torch.save(
                checkpoint,
                os.path.join(model.path_checkpoints,
                             f"checkpoint_{epoch}.pth"),
            )

        if args.lr_step > 0:
            scheduler.step()

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))

    print(paint(f"[STEP 4] Finished HAR training loop (h:m:s): {elapsed}"))
    print(paint("--" * 50, "blue"))