示例#1
0
def train_step(model_with_loss: Loss, optimizer: Optimizer, x_batch, y_batch) -> Dict[str, tf.Tensor]:
    with tf.GradientTape(persistent=True) as tape: # type: ignore
        _ = model_with_loss(x_batch, y_batch)
        loss_value = model_with_loss.metric_values["_loss"]
        # print('watched variables')
        # print(tape.watched_variables())
        loss_mean = tf.reduce_mean(loss_value)
        # encoded_mean = tf.reduce_mean(tape.watched_variables()[-1])

    metric_values = model_with_loss.metric_values
    error = tf.cast(tf.argmax(metric_values["outputs"], axis=1) != tf.argmax(y_batch, axis=1), tf.float32)
    metric_values["error"] = error
    model_with_loss.reset()
    # grads = tape.gradient(tf.reduce_mean(loss_value), self.encoder_decoder.parameters())
    # print('Encoder Decoder Params')
    # print(self.encoder_decoder.parameters())
    # print(tape.gradient(tf.reduce_mean(loss_value), tape.watched_variables()))
    # print('dLoss / dWatched')
    # print(tape.gradient(loss_mean, tape.watched_variables()))
    # for var in tape.watched_variables():
    #     print(f'd{var.name}/dWatched')
    #     print(tape.gradient(var, tape.watched_variables()))
    
    # grads = None
    # print(grads)
    grads = tape.gradient(loss_mean, model_with_loss.parameters())
    optimizer.apply_gradients(zip(grads, model_with_loss.parameters()))
    return metric_values
示例#2
0
def main(args):
    # get datasets
    source_train, source_test = chainer.datasets.get_svhn()
    target_train, target_test = chainer.datasets.get_mnist(ndim=3,
                                                           rgb_format=True)
    source = source_train, source_test

    # resize mnist to 32x32
    def transform(in_data):
        img, label = in_data
        img = resize(img, (32, 32))
        return img, label

    target_train = TransformDataset(target_train, transform)
    target_test = TransformDataset(target_test, transform)

    target = target_train, target_test

    # load pretrained source, or perform pretraining
    pretrained = os.path.join(args.output, args.pretrained_source)
    if not os.path.isfile(pretrained):
        source_cnn = pretrain_source_cnn(source, args)
    else:
        source_cnn = Loss(num_classes=10)
        serializers.load_npz(pretrained, source_cnn)

    # how well does this perform on target domain?
    test_pretrained_on_target(source_cnn, target, args)

    # initialize the target cnn (do not use source_cnn.copy)
    target_cnn = Loss(num_classes=10)
    # copy parameters from source cnn to target cnn
    target_cnn.copyparams(source_cnn)

    train_target_cnn(source, target, source_cnn, target_cnn, args)
示例#3
0
    def __init__(self, train_dl, val_dl):
        self.device = ('cuda:0' if torch.cuda.is_available() else 'cpu')
        self.train_dl = train_dl
        self.val_dl = val_dl

        self.loss = Loss()
        self.net = UNet(1).to(self.device)
        self.net.apply(Model._init_weights)
        self.criterion = self.loss.BCEDiceLoss
        self.optim = None
        self.scheduler = None

        self._init_optim(LR, BETAS)

        self.cycles = 0
        self.hist = {'train': [], 'val': [], 'loss': []}

        utils.create_dir('./pt')
        utils.log_data_to_txt('train_log', f'\nUsing device {self.device}')
示例#4
0
def pretrain_source_cnn(data, args, epochs=1000):
    print(":: pretraining source encoder")
    source_cnn = Loss(num_classes=10)
    if args.device >= 0:
        source_cnn.to_gpu()

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(source_cnn)

    train_iterator, test_iterator = data2iterator(data,
                                                  args.batchsize,
                                                  multiprocess=False)

    # train_iterator = chainer.iterators.MultiprocessIterator(data, args.batchsize, n_processes=4)

    updater = chainer.training.StandardUpdater(iterator=train_iterator,
                                               optimizer=optimizer,
                                               device=args.device)
    trainer = chainer.training.Trainer(updater, (epochs, 'epoch'),
                                       out=args.output)

    # learning rate decay
    # trainer.extend(extensions.ExponentialShift("alpha", rate=0.9, init=args.learning_rate, target=args.learning_rate*10E-5))

    trainer.extend(
        extensions.Evaluator(test_iterator, source_cnn, device=args.device))
    # trainer.extend(extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=(10, "epoch"))
    trainer.extend(extensions.snapshot_object(
        optimizer.target, "source_model_epoch_{.updater.epoch}"),
                   trigger=(epochs, "epoch"))

    trainer.extend(extensions.ProgressBar(update_interval=10))
    trainer.extend(extensions.LogReport(trigger=(1, "epoch")))
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy', 'elapsed_time'
        ]))

    trainer.run()

    return source_cnn
def main():
    parser = argparse.ArgumentParser()
    arg = parser.add_argument
    arg('--size', type=str, default='512X512', help='Input size, for example 512X512. Must be multiples of 2')
    arg('--num_workers', type=int, default=4, help='Enter the number of workers')
    arg('--batch_size', type=int, default=16, help='Enter batch size')
    arg('--n_epochs', type=int, default=52, help='Enter number of epochs to run training for')
    arg('--report_each', type=int, default=10, help='Enter the span of last readings of running loss to report')
    arg('--lr', type=int, default=0.0001, help='Enter learning rate')
    arg('--fold_no', type=int, default=0, help='Enter the fold no')
    arg('--to_augment', type=bool, default=False, help='Augmentation flag')
    args = parser.parse_args()


    local_data_path = Path('.').absolute()
    local_data_path.mkdir(exist_ok=True)
    #mention the fold path here
    train_path=local_data_path/'..'/'input'/'train'
    a=CoinDataset(train_path,to_augment=args.to_augment)
    n_classes=get_n_classes(train_path)
    print(n_classes)
    '''
    num_workers,batch_size
    '''
    def make_loader(ds_root: Path, to_augment=False, shuffle=False):
        return DataLoader(
            dataset=CoinDataset(ds_root, to_augment=to_augment),
            shuffle=shuffle,
            num_workers=args.num_workers,
            batch_size=args.batch_size,
            pin_memory=True
        )

    #craeting a dataloader
    #mention the fold path here
    train_path=local_data_path/'..'/'input'/'train'
    train_loader=make_loader(train_path,to_augment=args.to_augment, shuffle=True)
    validation_path=local_data_path/'..'/'input'/'validation'
    validation_loader=make_loader(validation_path,to_augment=args.to_augment, shuffle=True)
    test_path=local_data_path/'..'/'input'/'test'
    test_loader=make_loader(test_path,to_augment=args.to_augment, shuffle=True)

    #define model, and handle gpus

    print('device is',device)
    model_name='resnet50'
    model=get_model(model_name=model_name,pretrained_status=True,n_classes=n_classes).to(device)
    if device.type=="cuda":
        #model = nn.DataParallel(model, device_ids=device_list)
        print('cuda devices',device_list)

    #define optimizer and learning_rate
    init_optimizer=lambda lr: Adam(model.parameters(), lr=lr)
    lr=args.lr
    optimizer=init_optimizer(lr)
    criterion=Loss()
    #print(model)

    report_each=args.report_each
    #model save implementation
    model_path= local_data_path/'model_checkpoints'
    model_path.mkdir(exist_ok=True)
    model_path=local_data_path/'model_checkpoints'/'{model_name}_{fold}.pt'.format(model_name=model_name,fold=args.fold_no)
    best_model_path= local_data_path/'best_model_checkpoints'
    best_model_path.mkdir(exist_ok=True)
    best_model_path=local_data_path/'best_model_checkpoints'/'{model_name}_{fold}.pt'.format(model_name=model_name,fold=args.fold_no)
    #updated fold checkpoint here
    save = lambda ep: torch.save({
        'model': model.state_dict(),
        'epoch': ep,
        'best_valid_loss': best_valid_loss
    }, str(model_path))


    best_valid_loss = float('inf')
    valid_losses = []
    test_losses=[]
    valid_accuracy = []
    test_accuracy=[]
    for epoch in range(0, args.n_epochs):

        model.train()
        tq = tqdm(total=(len(train_loader) * args.batch_size))
        tq.set_description('Epoch {}, lr {}'.format(epoch, lr))
        losses = []
        for i, (inputs,_,_, targets) in enumerate(train_loader):
            inputs=inputs.to(device)
            outputs = model(inputs)
            #start here
            _, preds = torch.max(outputs, 1)
            #end here
            targets=targets.to(device)-1
            loss = criterion(outputs, targets)
            optimizer.zero_grad()
            batch_size = inputs.size(0)
            tq.update(batch_size)
            losses.append(loss.item())
            mean_loss = np.mean(losses[-report_each:])
            tq.set_postfix(loss='{:.5f}'.format(mean_loss))
            (batch_size * loss).backward()
            optimizer.step()
        tq.close()
        save(epoch)
        valid_metrics = validation(model, criterion, validation_loader)
        valid_loss = valid_metrics['valid_loss']
        valid_losses.append(valid_loss)
        test_metrics = test(model, criterion, test_loader)
        test_loss = test_metrics['test_loss']
        test_losses.append(test_loss)
        if valid_loss < best_valid_loss:
            print('found better val loss model')
            best_valid_loss = valid_loss
            shutil.copy(str(model_path), str(best_model_path))
示例#6
0
    if cfg.data == 'Structured3D':
        dataset = Structured3D(cfg.Dataset.Structured3D, 'test')
    elif cfg.data == 'NYU303':
        dataset = NYU303(cfg.Dataset.NYU303, 'test', exam=cfg.exam)
    elif cfg.data == 'CUSTOM':
        dataset = CustomDataset(cfg.Dataset.CUSTOM, 'test')
    else:
        raise NotImplementedError

    dataloader = torch.utils.data.DataLoader(dataset,
                                             num_workers=cfg.num_workers)

    # create network
    model = Detector()
    # compute loss
    criterion = Loss(cfg.Weights)

    # set data parallel
    # if cfg.num_gpus > 1 and torch.cuda.is_available():
    #     model = torch.nn.DataParallel(model)

    # reload weights
    if cfg.pretrained:
        state_dict = torch.load(cfg.pretrained,
                                map_location=torch.device('cpu'))
        model.load_state_dict(state_dict)

    # set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    criterion.to(device)
示例#7
0
def main():

    yolov5l = YOLOv5_large((
        608,
        608,
        3,
    ), 80)
    loss1 = Loss((
        608,
        608,
        3,
    ), 0, 80)
    loss2 = Loss((
        608,
        608,
        3,
    ), 1, 80)
    loss3 = Loss((
        608,
        608,
        3,
    ), 2, 80)
    if exists('./checkpoints/ckpt'):
        yolov5l.load_weights('./checkpoints/ckpt/variables/variables')
    optimizer = tf.keras.optimizers.Adam(1e-4)
    yolov5l.compile(optimizer=optimizer,
                    loss={
                        'output1':
                        lambda labels, outputs: loss1([outputs, labels]),
                        'output2':
                        lambda labels, outputs: loss2([outputs, labels]),
                        'output3':
                        lambda labels, outputs: loss3([outputs, labels])
                    })

    class SummaryCallback(tf.keras.callbacks.Callback):
        def __init__(self, eval_freq=100):
            self.eval_freq = eval_freq
            testset = tf.data.TFRecordDataset(testset_filenames).map(
                parse_function).repeat(-1)
            self.iter = iter(testset)
            self.train_loss = tf.keras.metrics.Mean(name='train loss',
                                                    dtype=tf.float32)
            self.log = tf.summary.create_file_writer('./checkpoints')

        def on_batch_begin(self, batch, logs=None):
            pass

        def on_batch_end(self, batch, logs=None):
            self.train_loss.update_state(logs['loss'])
            if batch % self.eval_freq == 0:
                image, bbox, labels = next(self.iter)
                image = image.numpy().astype('uint8')
                predictor = Predictor(yolov5l=yolov5l)
                boundings = predictor.predict(image)
                color_map = dict()
                for bounding in boundings:
                    if bounding[5].numpy().astype('int32') not in color_map:
                        color_map[bounding[5].numpy().astype('int32')] = tuple(
                            np.random.randint(low=0, high=256,
                                              size=(3, )).tolist())
                    clr = color_map[bounding[5].numpy().astype('int32')]
                    cv2.rectangle(image,
                                  tuple(bounding[0:2].numpy().astype('int32')),
                                  tuple(bounding[2:4].numpy().astype('int32')),
                                  clr, 1)
                    cv2.putText(
                        image,
                        predictor.getClsName(
                            bounding[5].numpy().astype('int32')),
                        tuple(bounding[0:2].numpy().astype('int32')),
                        cv2.FONT_HERSHEY_PLAIN, 1, clr, 2)
                image = tf.expand_dims(image, axis=0)
                with self.log.as_default():
                    tf.summary.scalar('train loss',
                                      self.train_loss.result(),
                                      step=optimizer.iterations)
                    tf.summary.image('detect',
                                     image[..., ::-1],
                                     step=optimizer.iterations)
                self.train_loss.reset_states()

        def on_epoch_begin(self, epoch, logs=None):
            pass

        def on_epoch_end(self, batch, logs=None):
            pass

    # load downloaded dataset
    trainset_filenames = [
        join('trainset', filename) for filename in listdir('trainset')
    ]
    testset_filenames = [
        join('testset', filename) for filename in listdir('testset')
    ]
    trainset = tf.data.TFRecordDataset(trainset_filenames).map(
        parse_function_generator(80)).shuffle(batch_size).batch(
            batch_size).prefetch(tf.data.experimental.AUTOTUNE)
    testset = tf.data.TFRecordDataset(testset_filenames).map(
        parse_function_generator(80)).shuffle(batch_size).batch(
            batch_size).prefetch(tf.data.experimental.AUTOTUNE)
    callbacks = [
        tf.keras.callbacks.TensorBoard(log_dir='./checkpoints'),
        tf.keras.callbacks.ModelCheckpoint(filepath='./checkpoints/ckpt',
                                           save_freq=10000),
        SummaryCallback(),
    ]
    yolov5l.fit(trainset,
                epochs=100,
                validation_data=testset,
                callbacks=callbacks)
    yolov5l.save('yolov5l.h5')
示例#8
0
class Model:
    def __init__(self, train_dl, val_dl):
        self.device = ('cuda:0' if torch.cuda.is_available() else 'cpu')
        self.train_dl = train_dl
        self.val_dl = val_dl

        self.loss = Loss()
        self.net = UNet(1).to(self.device)
        self.net.apply(Model._init_weights)
        self.criterion = self.loss.BCEDiceLoss
        self.optim = None
        self.scheduler = None

        self._init_optim(LR, BETAS)

        self.cycles = 0
        self.hist = {'train': [], 'val': [], 'loss': []}

        utils.create_dir('./pt')
        utils.log_data_to_txt('train_log', f'\nUsing device {self.device}')

    def _init_optim(self, lr, betas):
        self.optim = optim.Adam(utils.filter_gradients(self.net), lr=lr)

        self.scheduler = optim.lr_scheduler.StepLR(self.optim,
                                                   step_size=100,
                                                   gamma=.75)

    def _save_models(self):
        utils.save_state_dict(self.net, 'model', './pt')
        utils.save_state_dict(self.optim, 'optim', './pt')
        utils.save_state_dict(self.scheduler, 'scheduler', './pt')

    def train(self, epochs):
        self.net.train()
        for epoch in range(epochs):
            self.net.train()
            for idx, data in enumerate(self.train_dl):
                batch_time = time.time()

                self.cycles += 1
                print(self.cycles)

                image = data['MRI'].to(self.device)
                target = data['Mask'].to(self.device)

                output = self.net(image)

                output_rounded = np.copy(output.data.cpu().numpy())
                output_rounded[np.nonzero(output_rounded < 0.5)] = 0.
                output_rounded[np.nonzero(output_rounded >= 0.5)] = 1.
                train_f1 = self.loss.F1_metric(output_rounded,
                                               target.data.cpu().numpy())

                loss = self.criterion(output, target)

                self.hist['train'].append(train_f1)
                self.hist['loss'].append(loss.item())

                self.optim.zero_grad()
                loss.backward()
                self.optim.step()
                self.scheduler.step()

                if self.cycles % 100 == 0:
                    self._save_models()
                    val_f1 = self.evaluate()
                    utils.log_data_to_txt(
                        'train_log',
                        f'\nEpoch: {epoch}/{epochs} - Batch: {idx * BATCH_SIZE}/{len(self.train_dl.dataset)}'
                        f'\nLoss: {loss.mean().item():.4f}'
                        f'\nTrain F1: {train_f1:.4f} - Val F1: {val_f1}'
                        f'\nTime taken: {time.time() - batch_time:.4f}s')

    def evaluate(self):
        # model.eval()
        loss_v = 0

        with torch.no_grad():
            for idx, data in enumerate(self.val_dl):
                image, target = data['MRI'], data['Mask']

                image = image.to(self.device)
                target = target.to(self.device)

                outputs = self.net(image)

                out_thresh = np.copy(outputs.data.cpu().numpy())
                out_thresh[np.nonzero(out_thresh < .3)] = 0.0
                out_thresh[np.nonzero(out_thresh >= .3)] = 1.0

                loss = self.loss.F1_metric(out_thresh,
                                           target.data.cpu().numpy())
                loss_v += loss

        return loss_v / idx

    @classmethod
    def _init_weights(cls, layer: nn.Module):
        name = layer.__class__.__name__
        if name.find('Conv') != -1 and name.find('2d') != -1:
            nn.init.normal_(layer.weight.data, .0, 2e-2)
        if name.find('BatchNorm') != -1:
            nn.init.normal_(layer.weight.data, 1.0, 2e-2)
            nn.init.constant_(layer.bias.data, .0)
示例#9
0
def train_cptn():

    detector = TextDetector()
    loss = Loss()
    optimizer = tf.keras.optimizers.Adam(
        tf.keras.optimizers.schedules.ExponentialDecay(1e-5,
                                                       decay_steps=30000,
                                                       decay_rate=0.9))
    # load dataset
    trainset = tf.data.TFRecordDataset(
        join('datasets',
             'trainset.tfrecord')).repeat(-1).map(ctpn_parse_function).batch(
                 1).prefetch(tf.data.experimental.AUTOTUNE)
    # restore from existing checkpoint
    if False == exists('checkpoints'): mkdir('checkpoints')
    checkpoint = tf.train.Checkpoint(model=detector.ctpn, optimizer=optimizer)
    checkpoint.restore(tf.train.latest_checkpoint('checkpoints'))
    # create log
    log = tf.summary.create_file_writer('checkpoints')
    # train model
    avg_loss = tf.keras.metrics.Mean(name="loss", dtype=tf.float32)
    for image, labels in trainset:
        if labels.shape[1] == 0:
            print("skip sample without labels")
            continue
        with tf.GradientTape() as tape:
            bbox_pred = detector.ctpn(image)
            l = loss([bbox_pred, labels])
        avg_loss.update_state(l)
        # write log
        if tf.equal(optimizer.iterations % 100, 0):
            with log.as_default():
                tf.summary.scalar('loss',
                                  avg_loss.result(),
                                  step=optimizer.iterations)
                # draw text detection results
                text_lines, _, _ = detector.detect(image, False)
                image = image[0, ...].numpy().astype('uint8')
                for text_line in text_lines:
                    cv2.rectangle(image,
                                  (int(text_line[0]), int(text_line[1])),
                                  (int(text_line[2]), int(text_line[3])),
                                  (0, 255, 0), 2)
                image = tf.expand_dims(image, axis=0)
                tf.summary.image('text lines',
                                 image,
                                 step=optimizer.iterations)
            print('Step #%d Loss: %.6f lr: %.6f' %
                  (optimizer.iterations, avg_loss.result(),
                   optimizer._hyper['learning_rate'](optimizer.iterations)))
            if avg_loss.result() < 0.01: break
            avg_loss.reset_states()
        grads = tape.gradient(l, detector.ctpn.trainable_variables)
        if tf.reduce_any(
            [tf.reduce_any(tf.math.is_nan(grad)) for grad in grads]) == True:
            print("NaN was detected in gradients, skip gradient apply!")
            continue
        optimizer.apply_gradients(zip(grads,
                                      detector.ctpn.trainable_variables))
        # save model
        if tf.equal(optimizer.iterations % 2000, 0):
            checkpoint.save(join('checkpoints', 'ckpt'))
    # save the network structure with weights
    if False == exists('model'): mkdir('model')
    detector.ctpn.save(join('model', 'ctpn.h5'))
示例#10
0
def main():

    gpus = tf.config.experimental.list_physical_devices('GPU')
    [tf.config.experimental.set_memory_growth(gpu, True) for gpu in gpus]
    # yolov5l model
    yolov5l = YOLOv5_large((608, 608, 3), 80)
    loss1 = Loss((608, 608, 3), 0, 80)
    loss2 = Loss((608, 608, 3), 1, 80)
    loss3 = Loss((608, 608, 3), 2, 80)
    #optimizer = tf.keras.optimizers.Adam(tf.keras.optimizers.schedules.ExponentialDecay(1e-5, decay_steps = 110000, decay_rate = 0.99));
    optimizer = tf.keras.optimizers.Adam(1e-5)
    checkpoint = tf.train.Checkpoint(model=yolov5l, optimizer=optimizer)
    train_loss = tf.keras.metrics.Mean(name='train loss', dtype=tf.float32)
    test_loss = tf.keras.metrics.Mean(name='test loss', dtype=tf.float32)
    # load downloaded dataset
    trainset_filenames = [
        join('trainset', filename) for filename in listdir('trainset')
    ]
    testset_filenames = [
        join('testset', filename) for filename in listdir('testset')
    ]
    trainset = tf.data.TFRecordDataset(trainset_filenames).map(
        parse_function_generator(80)).repeat(-1).shuffle(batch_size).batch(
            batch_size).prefetch(tf.data.experimental.AUTOTUNE)
    testset = tf.data.TFRecordDataset(testset_filenames).map(
        parse_function_generator(80)).repeat(-1).shuffle(batch_size).batch(
            batch_size).prefetch(tf.data.experimental.AUTOTUNE)
    validationset = tf.data.TFRecordDataset(testset_filenames).map(
        parse_function).repeat(-1)
    trainset_iter = iter(trainset)
    testset_iter = iter(testset)
    validationset_iter = iter(validationset)
    # restore from existing checkpoint
    if False == exists('checkpoints'): mkdir('checkpoints')
    checkpoint.restore(tf.train.latest_checkpoint('checkpoints'))
    # tensorboard summary
    log = tf.summary.create_file_writer('checkpoints')
    # train model
    while True:
        images, labels = next(trainset_iter)
        labels1, labels2, labels3 = labels
        with tf.GradientTape() as tape:
            outputs1, outputs2, outputs3 = yolov5l(images)
            loss = loss1([outputs1, labels1]) + loss2(
                [outputs2, labels2]) + loss3([outputs3, labels3])
        # check whether the loss numberic is correct
        if tf.math.reduce_any(tf.math.is_nan(loss)) == True:
            print("NaN was detected in loss, skip the following steps!")
            continue
        grads = tape.gradient(loss, yolov5l.trainable_variables)
        # check whether the grad numerics is correct
        if tf.math.reduce_any([
                tf.math.reduce_any(tf.math.is_nan(grad)) for grad in grads
        ]) == True:
            print("NaN was detected in gradients, skip gradient apply!")
            continue
        optimizer.apply_gradients(zip(grads, yolov5l.trainable_variables))
        train_loss.update_state(loss)
        # save model
        if tf.equal(optimizer.iterations % 10000, 0):
            # save checkpoint every 1000 steps
            checkpoint.save(join('checkpoints', 'ckpt'))
            yolov5l.save('yolov5l.h5')
        if tf.equal(optimizer.iterations % 100, 0):
            # evaluate
            for i in range(10):
                images, labels = next(testset_iter)
                # images.shape = (b, h, w, 3)
                outputs = yolov5l(images)
                loss = yolov3_loss([*outputs, *labels])
                test_loss.update_state(loss)
            # visualize
            image, bbox, labels = next(validationset_iter)
            # image.shape = (h, w, 3)
            image = image.numpy().astype('uint8')
            predictor = Predictor(yolov5l=yolov5l)
            boundings = predictor.predict(image)
            color_map = dict()
            for bounding in boundings:
                if bounding[5].numpy().astype('int32') not in color_map:
                    color_map[bounding[5].numpy().astype('int32')] = tuple(
                        np.random.randint(low=0, high=256,
                                          size=(3, )).tolist())
                clr = color_map[bounding[5].numpy().astype('int32')]
                cv2.rectangle(image,
                              tuple(bounding[0:2].numpy().astype('int32')),
                              tuple(bounding[2:4].numpy().astype('int32')),
                              clr, 1)
                cv2.putText(
                    image,
                    predictor.getClsName(bounding[5].numpy().astype('int32')),
                    tuple(bounding[0:2].numpy().astype('int32')),
                    cv2.FONT_HERSHEY_PLAIN, 1, clr, 2)
            image = tf.expand_dims(image, axis=0)
            # write log
            with log.as_default():
                tf.summary.scalar('train loss',
                                  train_loss.result(),
                                  step=optimizer.iterations)
                tf.summary.scalar('test loss',
                                  test_loss.result(),
                                  step=optimizer.iterations)
                tf.summary.image('detect',
                                 image[..., ::-1],
                                 step=optimizer.iterations)
            print('Step #%d Train Loss: %.6f Test Loss: %.6f' %
                  (optimizer.iterations, train_loss.result(),
                   test_loss.result()))
            # break condition
            #if train_loss.result() < 0.001: break;
            # reset
            train_loss.reset_states()
            test_loss.reset_states()
    yolov5l.save('yolov5l.h5')
示例#11
0
def main():
    parser = argparse.ArgumentParser()
    arg = parser.add_argument
    arg('--size',
        type=str,
        default='512X512',
        help='Input size, for example 512X512. Must be multiples of 2')
    arg('--num_workers',
        type=int,
        default=4,
        help='Enter the number of workers')
    arg('--batch_size', type=int, default=16, help='Enter batch size')
    arg('--n_epochs',
        type=int,
        default=52,
        help='Enter number of epochs to run training for')
    arg('--report_each',
        type=int,
        default=10,
        help='Enter the span of last readings of running loss to report')
    arg('--lr', type=float, default=0.0001, help='Enter learning rate')
    arg('--fold_no', type=int, default=0, help='Enter the fold no')
    arg('--to_augment', type=bool, default=False, help='Augmentation flag')
    arg('--model_name', type=str, default='resnet18', help='enter model name')

    args = parser.parse_args()

    local_data_path = Path('.').absolute()
    local_data_path.mkdir(exist_ok=True)
    #mention the fold path here
    train_path = local_data_path / '..' / 'input' / 'train'
    a = CoinDataset(train_path, to_augment=args.to_augment)
    n_classes = get_n_classes(train_path)
    print(n_classes)
    '''

    num_workers,batch_size
    '''
    def make_loader(ds_root: Path, to_augment=False, shuffle=False):
        return DataLoader(dataset=CoinDataset(ds_root, to_augment=to_augment),
                          shuffle=shuffle,
                          num_workers=args.num_workers,
                          batch_size=args.batch_size,
                          pin_memory=True)

    #craeting a dataloader
    #mention the fold path here
    train_path = local_data_path / '..' / 'input' / 'train'
    train_loader = make_loader(train_path,
                               to_augment=args.to_augment,
                               shuffle=True)
    validation_path = local_data_path / '..' / 'input' / 'validation'
    validation_loader = make_loader(validation_path,
                                    to_augment=args.to_augment,
                                    shuffle=True)
    test_path = local_data_path / '..' / 'input' / 'test'
    test_loader = make_loader(test_path,
                              to_augment=args.to_augment,
                              shuffle=True)

    #define model, and handle gpus

    print('device is', device)
    model_name = args.model_name
    model = get_model(model_name=model_name,
                      pretrained_status=True,
                      n_classes=n_classes).to(device)
    if device.type == "cuda":
        #model = nn.DataParallel(model, device_ids=device_list)
        print('cuda devices', device_list)

    #define optimizer and learning_rate
    init_optimizer = lambda lr: Adam(model.parameters(), lr=lr)
    lr = args.lr
    optimizer = init_optimizer(lr)
    criterion = Loss()
    #print(model)

    report_each = args.report_each
    #model save implementation
    model_path = local_data_path / 'model_checkpoints'
    model_path.mkdir(exist_ok=True)
    model_path = local_data_path / 'model_checkpoints' / '{model_name}_{fold}.pt'.format(
        model_name=model_name, fold=args.fold_no)
    best_model_path = local_data_path / 'best_model_checkpoints'
    best_model_path.mkdir(exist_ok=True)
    best_model_path = local_data_path / 'best_model_checkpoints' / '{model_name}_{fold}.pt'.format(
        model_name=model_name, fold=args.fold_no)
    #updated fold checkpoint here
    save = lambda ep: torch.save(
        {
            'model': model.state_dict(),
            'epoch': ep,
            'best_valid_loss': best_valid_loss
        }, str(model_path))

    best_valid_loss = float('inf')
    valid_losses = []
    test_losses = []
    valid_accuracy = []
    test_accuracy = []

    ####defining the dataframe for dumping outputs##############################
    n_trials = 1
    modes = ['train', 'validation', 'test']
    metrics = ['loss', 'accuracy']

    def get_column_name(trial, mode, metric):
        return 'trial= ' + str(trial) + ' mode=' + mode + ' metric=' + metric

    col_list = []
    for trial in range(n_trials):
        for mode in modes:
            for metric in metrics:
                col_list.append(get_column_name(trial, mode, metric))

    #create the dataframe before saving the results
    df = pd.DataFrame(0.0, index=np.arange(args.n_epochs), columns=col_list)
    ########training loop begins#################################################
    ####need to start the trial of the experiment here
    for trial in range(n_trials):
        for epoch in range(0, args.n_epochs):

            model.train()
            tq = tqdm(total=(len(train_loader) * args.batch_size))
            tq.set_description('Epoch {}, lr {}'.format(epoch, lr))
            losses = []
            for i, (inputs, _, _, targets) in enumerate(train_loader):
                inputs = inputs.to(device)
                outputs = model(inputs)
                #start here
                _, preds = torch.max(outputs, 1)
                #end here
                targets = targets.to(device) - 1
                loss = criterion(outputs, targets)
                optimizer.zero_grad()
                batch_size = inputs.size(0)
                tq.update(batch_size)
                losses.append(loss.item())
                mean_loss = np.mean(losses[-report_each:])
                tq.set_postfix(loss='{:.5f}'.format(mean_loss))
                (batch_size * loss).backward()
                optimizer.step()
            tq.close()
            save(epoch)
            #############epoch completes here, dump the data to the dataframe ##############3

            train_loss = np.mean(losses)
            valid_metrics = validation(model, criterion, validation_loader)
            valid_loss = valid_metrics['valid_loss']
            valid_accuracy = valid_metrics['valid_accuracy']
            valid_losses.append(valid_loss)
            test_metrics = test(model, criterion, test_loader)
            test_loss = test_metrics['test_loss']
            test_accuracy = test_accuracy['test_accuracy']
            test_losses.append(test_loss)

            #update the data in the data frame
            #test accuracy not needed leave it

            df.loc[epoch][get_column_name(trial, 'train', 'loss')] = train_loss
            df.loc[epoch][get_column_name(trial, 'validation',
                                          'loss')] = valid_loss
            df.loc[epoch][get_column_name(trial, 'validation',
                                          'accuracy')] = valid_accuracy
            df.loc[epoch][get_column_name(trial, 'test', 'loss')] = test_loss
            df.loc[epoch][get_column_name(trial, 'test',
                                          'accuracy')] = test_accuracy

            #just check if correctly updated
            print('just checking if one field updated',
                  df.loc[epoch][get_column_name(trial, 'test', 'accuracy')])

            #save the incomplete dataframe till now
            df.to_csv('results.csv')
            if valid_loss < best_valid_loss:
                print('found better val loss model')
                best_valid_loss = valid_loss
                shutil.copy(str(model_path), str(best_model_path))

    #save the complete dataframe here
    df.to_csv('results.csv')
示例#12
0
def main():

  gpus = tf.config.experimental.list_physical_devices('GPU');
  [tf.config.experimental.set_memory_growth(gpu, True) for gpu in gpus];
  # yolov3 model
  yolov3 = YOLOv3((416,416,3), 80);
  yolov3_loss = Loss((416,416,3), 80);
  #optimizer = tf.keras.optimizers.Adam(tf.keras.optimizers.schedules.ExponentialDecay(1e-5, decay_steps = 110000, decay_rate = 0.99));
  optimizer = tf.keras.optimizers.Adam(1e-5);
  checkpoint = tf.train.Checkpoint(model = yolov3, optimizer = optimizer);
  train_loss = tf.keras.metrics.Mean(name = 'train loss', dtype = tf.float32);
  test_loss = tf.keras.metrics.Mean(name = 'test loss', dtype = tf.float32);
  # load downloaded dataset
  trainset_filenames = [join('trainset', filename) for filename in listdir('trainset')];
  testset_filenames = [join('testset', filename) for filename in listdir('testset')];
  trainset = tf.data.TFRecordDataset(trainset_filenames).map(parse_function_generator(80)).repeat(-1).shuffle(batch_size).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE);
  testset = tf.data.TFRecordDataset(testset_filenames).map(parse_function_generator(80)).repeat(-1).shuffle(batch_size).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE);
  validationset = tf.data.TFRecordDataset(testset_filenames).map(parse_function).repeat(-1);
  trainset_iter = iter(trainset);
  testset_iter = iter(testset);
  validationset_iter = iter(validationset);
  # restore from existing checkpoint
  if False == exists('checkpoints'): mkdir('checkpoints');
  checkpoint.restore(tf.train.latest_checkpoint('checkpoints'));
  # tensorboard summary
  log = tf.summary.create_file_writer('checkpoints');
  # train model
  while True:
    images, labels = next(trainset_iter);
    with tf.GradientTape() as tape:
      outputs = yolov3(images);
      loss = yolov3_loss([*outputs, *labels]);
    # check whether the loss numberic is correct
    if tf.math.reduce_any(tf.math.is_nan(loss)) == True:
      print("NaN was detected in loss, skip the following steps!");
      continue;
    grads = tape.gradient(loss, yolov3.trainable_variables);
    # check whether the grad numerics is correct
    if tf.math.reduce_any([tf.math.reduce_any(tf.math.is_nan(grad)) for grad in grads]) == True:
      print("NaN was detected in gradients, skip gradient apply!");
      continue;
    optimizer.apply_gradients(zip(grads, yolov3.trainable_variables));
    train_loss.update_state(loss);
    # save model
    if tf.equal(optimizer.iterations % 10000, 0):
      # save checkpoint every 1000 steps
      checkpoint.save(join('checkpoints','ckpt'));
      yolov3.save('yolov3.h5');
    if tf.equal(optimizer.iterations % 100, 0):
      # evaluate
      for i in range(10):
        images, labels = next(testset_iter); # images.shape = (b, h, w, 3)
        outputs = yolov3(images);
        loss = yolov3_loss([*outputs, *labels]);
        test_loss.update_state(loss);
      # visualize
      image, bbox, labels = next(validationset_iter); # image.shape = (h, w, 3)
      image = image.numpy().astype('uint8');
      predictor = Predictor(yolov3 = yolov3);
      boundings = predictor.predict(image);
      color_map = dict();
      for bounding in boundings:
        if bounding[5].numpy().astype('int32') in color_map:
          clr = color_map[bounding[5].numpy().astype('int32')];
        else:
          color_map[bounding[5].numpy().astype('int32')] = tuple(np.random.randint(low = 0, high = 256, size = (3,)).tolist());
          clr = color_map[bounding[5].numpy().astype('int32')];
        cv2.rectangle(image, tuple(bounding[0:2].numpy().astype('int32')), tuple(bounding[2:4].numpy().astype('int32')), clr, 5);
      image = tf.expand_dims(image, axis = 0);
      # write log
      with log.as_default():
        tf.summary.scalar('train loss', train_loss.result(), step = optimizer.iterations);
        tf.summary.scalar('test loss', test_loss.result(), step = optimizer.iterations);
        tf.summary.image('detect', image[...,::-1], step = optimizer.iterations);
      print('Step #%d Train Loss: %.6f Test Loss: %.6f' % (optimizer.iterations, train_loss.result(), test_loss.result()));
      # break condition
      #if train_loss.result() < 0.001: break;
      # reset
      train_loss.reset_states();
      test_loss.reset_states();
  yolov3.save('yolov3.h5');
示例#13
0
 def loss(labels, outputs):
   return Loss((416,416,3,),80)([outputs[0], outputs[1], outputs[2], labels[0], labels[1], labels[2]]);