Пример #1
0
 def augmentation(self, img, shape):
     augset = Augment(self.args, self.mode)
     if self.args.task in ['v1', 'v2']:
         img_list = []
         for _ in range(2): # query, key
             aug_img = tf.identity(img)
             if self.args.task == 'v1':
                 aug_img = augset._augmentv1(aug_img, shape) # moco v1
             else:
                 radius = np.random.choice([3, 5])
                 aug_img = augset._augmentv2(aug_img, shape, (radius, radius)) # moco v2
             img_list.append(aug_img)
         return img_list
     else:
         return augset._augment_lincls(img, shape)
Пример #2
0
    def augmentation(img, label, shape):
        augment = Augment(args, mode)
        img = augment(img, shape)

        # one-hot encodding
        label = tf.one_hot(label, args.classes)
        return img, label
Пример #3
0
    def __init__(self,
                 args,
                 task,
                 mode,
                 datalist,
                 batch_size,
                 num_workers=1,
                 shuffle=True):
        self.args = args
        self.task = task
        self.mode = mode
        self.datalist = datalist
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.shuffle = shuffle

        self.augset = Augment(self.args, self.mode)
        self.dataloader = self._dataloader()
Пример #4
0
    def augmentation(self, img, shape):
        augset = Augment(self.args, self.mode)
        if self.args.task == 'pretext':
            img_dict = {}
            offset_list = []
            size_list = []
            isflip_list = []
            prob_list = [{
                'p_blur': 1.,
                'p_solar': 0.
            }, {
                'p_blur': .1,
                'p_solar': .2
            }]
            for i, view in enumerate(['view1', 'view2']):  # view1, view2
                aug_img = tf.identity(img)
                aug_img, offset, size, isflip = augset._augment_pretext(
                    aug_img, shape, **prob_list[i])
                img_dict[view] = aug_img
                offset_list.append(offset)
                size_list.append(size)
                isflip_list.append(isflip)

            A_dict = self.get_distance_A(offset_list, size_list, isflip_list)
            img_dict.update(A_dict)
            if self.mode == 'train':
                return img_dict
            else:
                return img_dict, {
                    'img': img,
                    'offset_list': offset_list,
                    'size_list': size_list,
                    'isflip_list': isflip_list
                }
        else:
            raise NotImplementedError('lincls is not implemented yet.')
Пример #5
0
    def __init__(self, args, logger, num_workers=1, **kwargs):
        super(BarlowTwins, self).__init__(**kwargs)
        self.args = args
        self._num_workers = num_workers
        norm = 'bn' if self._num_workers == 1 else 'syncbn'

        # preprocess
        augment = Augment(args)
        self.preprocess = tf.keras.Sequential(name='preprocess')
        self.preprocess.add(Lambda(lambda x: augment._random_color_jitter(x)))
        self.preprocess.add(Lambda(lambda x: augment._random_grayscale(x)))
        if self.args.dataset == 'imagenet':
            self.preprocess.add(
                Lambda(lambda x: augment._random_gaussian_blur(x)))
        self.preprocess.add(Lambda(lambda x: augment._random_hflip(x)))
        self.preprocess.add(Lambda(lambda x: augment._standardize(x)))

        # encoder
        DEFAULT_ARGS = {
            "use_bias": self.args.use_bias,
            "kernel_regularizer": l2(self.args.weight_decay)
        }
        FAMILY_DICT[self.args.backbone].Conv2D = _conv2d(**DEFAULT_ARGS)
        FAMILY_DICT[self.args.backbone].BatchNormalization = _batchnorm(
            norm=norm)
        FAMILY_DICT[self.args.backbone].Dense = _dense(**DEFAULT_ARGS)

        DEFAULT_ARGS.update({'norm': norm})  # for resnet18
        self.encoder = MODEL_DICT[self.args.backbone](
            include_top=False,
            weights=None,
            input_shape=(self.args.img_size, self.args.img_size, 3),
            pooling='avg',
            **DEFAULT_ARGS if self.args.backbone == 'resnet18' else {})
        DEFAULT_ARGS.pop('norm')  # for resnet18

        # projector
        num_mlp = 3
        self.projector = tf.keras.Sequential(name='projector')
        for i in range(num_mlp - 1):
            self.projector.add(
                _dense(**DEFAULT_ARGS)(self.args.proj_dim,
                                       name=f'proj_fc{i+1}'))
            self.projector.add(
                _batchnorm(norm=norm)(epsilon=1.001e-5, name=f'proj_bn{i+1}'))
            self.projector.add(Activation('relu', name=f'proj_relu{i+1}'))

        self.projector.add(
            _dense(**DEFAULT_ARGS)(self.args.proj_dim, name=f'proj_fc{i+2}'))
Пример #6
0
def augment(in_path: str,
            anomaly_type: str,
            iterations: int,
            block_size: int = 1024) -> (np.ndarray, np.ndarray):
    global BLOCKSIZE
    PLT_CHANNELS = list(range(12))
    a = Augment(use_path=True, path=in_path, anomaly_type=anomaly_type)
    ANOMALY = (anomaly_type, "", ".")  # ANOMALY type, "", out path
    ITERATIONS = iterations
    try:
        BLOCKSIZE = block_size
    except:
        BLOCKSIZE = None
    ecg, ann = main(PLT_CHANNELS,
                    ITERATIONS,
                    a,
                    ANOMALY,
                    blocksize=BLOCKSIZE,
                    ret=True)
    return np.array(ecg), np.array(ann)
Пример #7
0
def trainModel():
    # Parse args
    parser = argparse.ArgumentParser(description='Train the CNN')
    parser.add_argument('--expt_dir', default='./logs',
                        help='save dir for experiment logs')
    parser.add_argument('--train', default='./data',
                        help='path to training set')
    parser.add_argument('--val', default='./data',
                        help='path to validation set')
    parser.add_argument('--test', default='./data',
                        help='path to test set')
    parser.add_argument('--save_dir', default='./models',
                        help='path to save model')
    parser.add_argument('--arch', default='models/cnn.json',
                        help = 'path to model architecture')
    parser.add_argument('--model_name', default = 'model',
                        help = 'name of the model to save logs, weights')
    parser.add_argument('--lr', default = 0.001,
                        help = 'learning rate')
    parser.add_argument('--init', default = '1',
                        help = 'initialization')
    parser.add_argument('--batch_size', default = 20,
                        help = 'batch_size')
    args = parser.parse_args()

    # Load data
    train_path, valid_path, test_path = args.train, args.val, args.test
    logs_path = args.expt_dir
    model_path, model_name = args.save_dir, args.model_name
    model_path = os.path.join(model_path, model_name)
    if not os.path.isdir(model_path):
        os.mkdir(model_path)
    lr, batch_size, init = float(args.lr), int(args.batch_size), int(args.init)

    data = loadData(train_path, valid_path, test_path)
    train_X, train_Y, valid_X, valid_Y, test_X, test_Y = data['train']['X'], data['train']['Y'],\
                                                         data['valid']['X'], data['valid']['Y'],\
                                                         data['test']['X'], data['test']['Y'],


    # Logging
    train_log_name = '{}.train.log'.format(model_name)
    valid_log_name = '{}.valid.log'.format(model_name)
    train_log = setup_logger('train-log', os.path.join(logs_path, train_log_name))
    valid_log = setup_logger('valid-log', os.path.join(logs_path, valid_log_name))

    # Train
    num_epochs = 500
    num_batches = int(float(train_X.shape[0]) / batch_size)
    steps = 0
    patience = 100
    early_stop=0

    model = getModel(lr)
    loss_history = [np.inf]
    for epoch in range(num_epochs):
        print 'Epoch {}'.format(epoch)
        steps = 0
        indices = np.arange(train_X.shape[0])
        np.random.shuffle(indices)
        train_X, train_Y = train_X[indices], train_Y[indices]
        for batch in range(num_batches):
            start, end = batch * batch_size, (batch + 1) * batch_size
            x, y = Augment(train_X[range(start, end)]).batch, train_Y[range(start, end)]
            model.fit(x.reshape((-1, 1, 28, 28)), y, batch_size = batch_size, verbose = 0)
            steps += batch_size
            if steps % train_X.shape[0] == 0 and steps != 0:
                train_loss, train_acc = model.evaluate(train_X.reshape((-1, 1, 28, 28)), train_Y)
                train_log.info('Epoch {}, Step {}, Loss: {}, Accuracy: {}, lr: {}'.format(epoch, steps, train_loss, train_acc, lr))
                valid_loss, valid_acc = model.evaluate(valid_X.reshape((-1, 1, 28, 28)), valid_Y)
                valid_log.info('Epoch {}, Step {}, Loss: {}, Accuracy: {}, lr: {}'.format(epoch, steps, valid_loss, valid_acc, lr))
                if valid_loss < min(loss_history):
                    save_path = os.path.join(model_path, 'model')
                    model.save(save_path)
                    early_stop = 0
                early_stop += 1
                if (early_stop >= patience):
                    print "No improvement in validation loss for " + str(patience) + " steps - stopping training!"
                    print("Optimization Finished!")
                    return 1
                loss_history.append(valid_loss)
    print("Optimization Finished!")
Пример #8
0
def trainModel():
    # Parse args
    parser = argparse.ArgumentParser(description='Train the CNN')
    parser.add_argument('--expt_dir',
                        default='./logs',
                        help='save dir for experiment logs')
    parser.add_argument('--train',
                        default='./data',
                        help='path to training set')
    parser.add_argument('--val',
                        default='./data',
                        help='path to validation set')
    parser.add_argument('--test', default='./data', help='path to test set')
    parser.add_argument('--save_dir',
                        default='./models',
                        help='path to save model')
    parser.add_argument('--arch',
                        default='models/cnn.json',
                        help='path to model architecture')
    parser.add_argument('--model_name',
                        default='model',
                        help='name of the model to save logs, weights')
    parser.add_argument('--lr', default=0.001, help='learning rate')
    parser.add_argument('--init', default='1', help='initialization')
    parser.add_argument('--batch_size', default=20, help='batch_size')
    args = parser.parse_args()

    # Load data
    train_path, valid_path, test_path = args.train, args.val, args.test
    logs_path = args.expt_dir
    model_path, model_arch, model_name = args.save_dir, args.arch, args.model_name
    model_path = os.path.join(model_path, model_name)
    if not os.path.isdir(model_path):
        os.mkdir(model_path)
    lr, batch_size, init = float(args.lr), int(args.batch_size), int(args.init)

    data = loadData(train_path, valid_path, test_path)
    train_X, train_Y, valid_X, valid_Y, test_X, test_Y = data['train']['X'], data['train']['Y'],\
                                                         data['valid']['X'], data['valid']['Y'],\
                                                         data['test']['X'], data['test']['Y'],

    # Load architecture
    arch = loadArch(model_arch)

    # Logging
    train_log_name = '{}.train.log'.format(model_name)
    valid_log_name = '{}.valid.log'.format(model_name)
    train_log = setup_logger('train-log',
                             os.path.join(logs_path, train_log_name))
    valid_log = setup_logger('valid-log',
                             os.path.join(logs_path, valid_log_name))

    # GPU config
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0)

    # Train
    num_epochs = 100
    num_batches = int(float(train_X.shape[0]) / batch_size)
    steps = 0
    patience = 50
    early_stop = 0

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as session:
        model = CNN(arch, session, logs_path, init, lr)
        loss_history = [np.inf]
        for epoch in range(num_epochs):
            print 'Epoch {}'.format(epoch)
            steps = 0
            indices = np.arange(train_X.shape[0])
            np.random.shuffle(indices)
            train_X, train_Y = train_X[indices], train_Y[indices]
            for batch in range(num_batches):
                start, end = batch * batch_size, (batch + 1) * batch_size
                x, y = Augment(train_X[range(start,
                                             end)]).batch, train_Y[range(
                                                 start, end)]
                try:
                    model.step(x, y)
                except MemoryError:
                    print 'Memory error in step'
                    exit()
                steps += batch_size
                if steps % train_X.shape[0] == 0 and steps != 0:
                    try:
                        train_loss, train_acc = testModel(
                            model, train_X, train_Y, batch_size)
                    except MemoryError:
                        print 'Memory error in test for train'
                        exit()
                    train_log.info(
                        'Epoch {}, Step {}, Loss: {}, Accuracy: {}, lr: {}'.
                        format(epoch, steps, train_loss, train_acc, model.lr))
                    try:
                        valid_loss, valid_acc = testModel(
                            model, valid_X, valid_Y, batch_size)
                    except MemoryError:
                        print 'Memory error in test for valid'
                        exit()
                    valid_log.info(
                        'Epoch {}, Step {}, Loss: {}, Accuracy: {}, lr: {}'.
                        format(epoch, steps, valid_loss, valid_acc, model.lr))
                    if valid_loss < min(loss_history):
                        save_path = os.path.join(model_path, 'model')
                        model.save(save_path)
                        early_stop = 0
                    early_stop += 1
                    if (early_stop >= patience):
                        print "No improvement in validation loss for " + str(
                            patience) + " steps - stopping training!"
                        print("Optimization Finished!")
                        return 1
                    loss_history.append(valid_loss)
        print("Optimization Finished!")
Пример #9
0
class DataLoader:
    def __init__(self,
                 args,
                 task,
                 mode,
                 datalist,
                 batch_size,
                 num_workers=1,
                 shuffle=True):
        self.args = args
        self.task = task
        self.mode = mode
        self.datalist = datalist
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.shuffle = shuffle

        self.augset = Augment(self.args, self.mode)
        self.dataloader = self._dataloader()

    def __len__(self):
        return len(self.datalist)

    def fetch_dataset(self, path, y=None):
        x = tf.io.read_file(path)
        if y is not None:
            return tf.data.Dataset.from_tensors((x, y))
        return tf.data.Dataset.from_tensors(x)

    def augmentation(self, img, shape):
        if self.task == 'pretext':
            img_list = []
            for _ in range(2):  # query, key
                aug_img = tf.identity(img)
                aug_img = self.augset._augment_simsiam(aug_img, shape)
                img_list.append(aug_img)
            return img_list
        else:
            return self.augset._augment_lincls(img, shape)

    def dataset_parser(self, value, label=None):
        if self.args.dataset == 'imagenet':
            shape = tf.image.extract_jpeg_shape(value)
            img = tf.io.decode_jpeg(value, channels=3)
        elif self.args.dataset == 'cifar10':
            shape = (32, 32, 3)
            img = value

        if label is None:
            # pretext
            return self.augmentation(img, shape)
        else:
            # lincls
            inputs = self.augmentation(img, shape)
            # labels = tf.one_hot(label, self.args.classes)
            return (inputs, label)

    def _dataloader(self):
        self.imglist = self.datalist[:, 0].tolist()
        if self.task == 'pretext':
            dataset = tf.data.Dataset.from_tensor_slices(self.imglist)
        else:
            self.labellist = self.datalist[:, 1].tolist()
            dataset = tf.data.Dataset.from_tensor_slices(
                (self.imglist, self.labellist))

        dataset = dataset.repeat()
        if self.shuffle:
            dataset = dataset.shuffle(len(self.datalist))

        if self.args.dataset == 'imagenet':
            dataset = dataset.interleave(self.fetch_dataset,
                                         num_parallel_calls=AUTO)

        dataset = dataset.map(self.dataset_parser, num_parallel_calls=AUTO)
        dataset = dataset.batch(self.batch_size)
        dataset = dataset.prefetch(AUTO)
        return dataset
Пример #10
0
def main():
    args = parse_args()
    if args.augment.lower() == 'none':
        args.augment = None
    device = to_device(args.gpu)

    args.seed = args.seed + args.fold
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    data = load_data(args.data)
    num_features = data.num_features
    num_classes = data.num_classes

    trn_graphs, test_graphs = load_data_fold(args.data, args.fold)
    trn_loader = DataLoader(trn_graphs, batch_size=256)
    test_loader = DataLoader(test_graphs, batch_size=256)

    if args.iters == 'auto':
        args.iters = math.ceil(len(trn_graphs) / args.batch_size)
    else:
        args.iters = int(args.iters)

    model = GIN(num_features, num_classes, args.units, args.layers,
                args.dropout)
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           weight_decay=args.decay)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)
    loss_func = SoftCELoss()

    augment = Augment(trn_graphs, args.augment, aug_size=args.aug_size)

    if args.verbose > 0:
        print(' epochs\t   loss\ttrn_acc\tval_acc')

    out_list = dict(trn_loss=[], trn_acc=[], test_loss=[], test_acc=[])
    for epoch in range(args.epochs):
        model.train()
        loss_sum = 0
        for _ in range(args.iters):
            idx = torch.randperm(len(trn_graphs))[:args.batch_size]
            data = augment(idx).to(device)
            output = model(data.x, data.edge_index, data.batch)
            loss = loss_func(output, data.y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loss_sum += loss.item()

        if args.schedule:
            scheduler.step(epoch)

        trn_loss = loss_sum / args.iters
        trn_acc = eval_acc(model, trn_loader, device)
        test_loss = eval_loss(model, loss_func, test_loader, device)
        test_acc = eval_acc(model, test_loader, device)

        out_list['trn_loss'].append(trn_loss)
        out_list['trn_acc'].append(trn_acc)
        out_list['test_loss'].append(test_loss)
        out_list['test_acc'].append(test_acc)

        if args.verbose > 0 and (epoch + 1) % args.verbose == 0:
            print(
                f'{epoch + 1:7d}\t{trn_loss:7.4f}\t{trn_acc:7.4f}\t{test_acc:7.4f}'
            )

    if args.print_all:
        out = {arg: getattr(args, arg) for arg in vars(args)}
        out['all'] = out_list
        print(json.dumps(out))
    else:
        print(f'Training accuracy: {out_list["trn_acc"][-1]}')
        print(f'Test accuracy: {out_list["test_acc"][-1]}')
Пример #11
0
#!/usr/bin/env python

from __future__ import print_function
from augment import Augment
#
augment = Augment()
augment.do_grayscale()
augment.do_flip()
augment.do_resize()
augment.do_augment()
augment.do_clean()