示例#1
0
def main():
    model = config.get('config', 'model')
    cachedir = utils.get_cachedir(config)
    with open(os.path.join(cachedir, 'names'), 'r') as f:
        names = [line.strip() for line in f]
    width, height = np.array(utils.get_downsampling(config)) * 13
    anchors = pd.read_csv(os.path.expanduser(os.path.expandvars(config.get(model, 'anchors'))), sep='\t').values
    func = getattr(inference, config.get(model, 'inference'))
    with tf.Session() as sess:
        image = tf.placeholder(tf.float32, [1, height, width, 3], name='image')
        func(image, len(names), len(anchors))
        tf.contrib.framework.get_or_create_global_step()
        tf.global_variables_initializer().run()
        prog = re.compile(r'[_\w\d]+\/conv(\d*)\/(weights|biases|(BatchNorm\/(gamma|beta|moving_mean|moving_variance)))$')
        variables = [(prog.match(v.op.name).group(1), v) for v in tf.global_variables() if prog.match(v.op.name)]
        variables = sorted([[int(k) if k else -1, [v for _, v in g]] for k, g in itertools.groupby(variables, operator.itemgetter(0))], key=operator.itemgetter(0))
        assert variables[0][0] == -1
        variables[0][0] = len(variables) - 1
        variables.insert(len(variables), variables.pop(0))
        with tf.name_scope('assign'):
            with open(os.path.expanduser(os.path.expandvars(args.file)), 'rb') as f:
                major, minor, revision, seen = struct.unpack('4i', f.read(16))
                tf.logging.info('major=%d, minor=%d, revision=%d, seen=%d' % (major, minor, revision, seen))
                for i, layer in variables:
                    tf.logging.info('processing layer %d' % i)
                    total = 0
                    for suffix in ['biases', 'beta', 'gamma', 'moving_mean', 'moving_variance', 'weights']:
                        try:
                            v = next(filter(lambda v: v.op.name.endswith(suffix), layer))
                        except StopIteration:
                            continue
                        shape = v.get_shape().as_list()
                        cnt = np.multiply.reduce(shape)
                        total += cnt
                        tf.logging.info('%s: %s=%d' % (v.op.name, str(shape), cnt))
                        p = struct.unpack('%df' % cnt, f.read(4 * cnt))
                        if suffix == 'weights':
                            ksize1, ksize2, channels_in, channels_out = shape
                            p = np.reshape(p, [channels_out, channels_in, ksize1, ksize2]) # Darknet format
                            p = np.transpose(p, [2, 3, 1, 0]) # TensorFlow format (ksize1, ksize2, channels_in, channels_out)
                        sess.run(v.assign(p))
                    tf.logging.info('%d parameters assigned' % total)
                remaining = os.fstat(f.fileno()).st_size - f.tell()
            transpose(sess, layer, len(anchors))
        saver = tf.train.Saver()
        logdir = utils.get_logdir(config)
        if args.delete:
            tf.logging.warn('delete logging directory: ' + logdir)
            shutil.rmtree(logdir, ignore_errors=True)
        os.makedirs(logdir, exist_ok=True)
        model_path = os.path.join(logdir, 'model.ckpt')
        tf.logging.info('save model into ' + model_path)
        saver.save(sess, model_path)
        if args.summary:
            path = os.path.join(logdir, args.logname)
            summary_writer = tf.summary.FileWriter(path)
            summary_writer.add_graph(sess.graph)
            tf.logging.info('tensorboard --logdir ' + logdir)
    if remaining > 0:
        tf.logging.warn('%d bytes remaining' % remaining)
示例#2
0
def get_instance():
    env = utils.get_env()
    for k, v in env.__dict__.items():
        print(f'{k}: {v}')
    n_states = env.n_states
    n_actions = env.n_actions
    agent = DQN(n_states, n_actions)
    logdir = utils.get_logdir()
    return env, agent, logdir
示例#3
0
def get_instance(diffs):
    default_conf = utils.get_config('default_config.yaml')
    conf = deepcopy(default_conf)
    recursive_merge(conf, diffs)

    env = utils.get_env(**conf['env'])
    # for k in diffs.keys():
    #     print(k, ':', env.__getattribute__(k))

    n_states = env.n_states
    n_actions = env.n_actions
    agent = DQN(n_states, n_actions, **conf['agent'])
    # different to pa_main
    logdir = utils.get_logdir(conf, default_conf)
    return env, agent, logdir
示例#4
0
def train():
    for time in range(5):
        logx.initialize(get_logdir("../runs"),
                        tensorboard=True,
                        coolname=False)

        model.load_state_dict(
            torch.load("..\\runs\exp10\last_checkpoint_ep0.pth")
            ['state_dict'])  # warmup

        dataset_train = TrainDataset(
            '../' + cfg.root_folder +
            '/five_fold/train_kfold_{}.csv'.format(time),
            '../' + cfg.root_folder + '/train/', train_transform)
        train_loader = DataLoader(dataset_train,
                                  batch_size=cfg.bs,
                                  shuffle=True)
        test_data = TrainDataset(
            '../' + cfg.root_folder +
            '/five_fold/test_kfold_{}.csv'.format(time),
            '../' + cfg.root_folder + '/train/',
        )
        test_load = DataLoader(test_data, batch_size=cfg.bs, shuffle=False)

        # train
        for epoch in range(cfg.epoch):
            loss_epoch = 0
            total = 0
            correct = 0
            for i, (x, y) in enumerate(train_loader, 1):
                x, y = x.to(device), y.to(device)
                y_hat = model(x)
                # 计算正确率
                total += x.size(0)
                _, predict = torch.max(y_hat.data, dim=1)
                correct += (predict == y).sum().item()

                # 损失
                loss = criterion(y_hat, y)
                loss_epoch += loss.item()
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                # 过程可视化
                if i % 30 == 0:
                    print(
                        'epoch:%d,  enumerate:%d,  loss_avg:%f,  now_acc:%f' %
                        (epoch, i, loss_epoch / i, correct / total))

            # epoch matric 可视化
            train_loss = loss_epoch / i
            train_acc = (correct / total) * 100
            logx.metric('train', {'loss': train_loss, 'acc': train_acc}, epoch)

            # valid
            # 开发集正确率
            correct = 0
            total = 0
            val_loss = 0
            with torch.no_grad():
                for i, (img, label) in enumerate(test_load, 1):
                    img, label = img.to(device), label.to(device)
                    output = model(img)
                    loss = criterion(output, label)
                    val_loss += loss.cpu().item()
                    _, predicted = torch.max(output.data, dim=1)  # 最大值,位置
                    total += img.size(0)
                    correct += (predicted == label).sum().item()
            val_acc = (100 * correct / total)
            val_loss /= i
            logx.metric('val', {'loss': val_loss, 'acc': val_acc}, epoch)
            # epoch lossand other metric
            print(
                'epoch over; train_loss:%f, val_loss:%f, train_acc=%f, val_acc:%f'
                % (train_loss, val_loss, train_acc, val_acc))
            logx.save_model({
                'state_dict': model.state_dict(),
                'epoch': epoch
            },
                            val_acc,
                            higher_better=True,
                            epoch=epoch,
                            delete_old=True)
            scheduler.step()
示例#5
0
def main():
    model = config.get('config', 'model')
    cachedir = utils.get_cachedir(config)
    with open(os.path.join(cachedir, 'names'), 'r') as f:
        names = [line.strip() for line in f]
    width, height = np.array(utils.get_downsampling(config)) * 13
    anchors = pd.read_csv(os.path.expanduser(
        os.path.expandvars(config.get(model, 'anchors'))),
                          sep='\t').values
    func = getattr(inference, config.get(model, 'inference'))
    with tf.Session() as sess:
        image = tf.placeholder(tf.float32, [1, height, width, 3], name='image')
        func(image, len(names), len(anchors))
        tf.contrib.framework.get_or_create_global_step()
        tf.global_variables_initializer().run()
        prog = re.compile(
            r'[_\w\d]+\/conv(\d*)\/(weights|biases|(BatchNorm\/(gamma|beta|moving_mean|moving_variance)))$'
        )
        variables = [(prog.match(v.op.name).group(1), v)
                     for v in tf.global_variables() if prog.match(v.op.name)]
        variables = sorted(
            [[int(k) if k else -1, [v for _, v in g]]
             for k, g in itertools.groupby(variables, operator.itemgetter(0))],
            key=operator.itemgetter(0))
        assert variables[0][0] == -1
        variables[0][0] = len(variables) - 1
        variables.insert(len(variables), variables.pop(0))
        with tf.name_scope('assign'):
            with open(os.path.expanduser(os.path.expandvars(args.file)),
                      'rb') as f:
                major, minor, revision, seen = struct.unpack('4i', f.read(16))
                tf.logging.info('major=%d, minor=%d, revision=%d, seen=%d' %
                                (major, minor, revision, seen))
                for i, layer in variables:
                    tf.logging.info('processing layer %d' % i)
                    total = 0
                    for suffix in [
                            'biases', 'beta', 'gamma', 'moving_mean',
                            'moving_variance', 'weights'
                    ]:
                        try:
                            v = next(
                                filter(lambda v: v.op.name.endswith(suffix),
                                       layer))
                        except StopIteration:
                            continue
                        shape = v.get_shape().as_list()
                        cnt = np.multiply.reduce(shape)
                        total += cnt
                        tf.logging.info('%s: %s=%d' %
                                        (v.op.name, str(shape), cnt))
                        p = struct.unpack('%df' % cnt, f.read(4 * cnt))
                        if suffix == 'weights':
                            ksize1, ksize2, channels_in, channels_out = shape
                            p = np.reshape(
                                p, [channels_out, channels_in, ksize1, ksize2
                                    ])  # Darknet format
                            p = np.transpose(
                                p, [2, 3, 1, 0]
                            )  # TensorFlow format (ksize1, ksize2, channels_in, channels_out)
                        sess.run(v.assign(p))
                    tf.logging.info('%d parameters assigned' % total)
                remaining = os.fstat(f.fileno()).st_size - f.tell()
            transpose(sess, layer, len(anchors))
        saver = tf.train.Saver()
        logdir = utils.get_logdir(config)
        if args.delete:
            tf.logging.warn('delete logging directory: ' + logdir)
            shutil.rmtree(logdir, ignore_errors=True)
        os.makedirs(logdir, exist_ok=True)
        model_path = os.path.join(logdir, 'model.ckpt')
        tf.logging.info('save model into ' + model_path)
        saver.save(sess, model_path)
        if args.summary:
            path = os.path.join(logdir, args.logname)
            summary_writer = tf.summary.FileWriter(path)
            summary_writer.add_graph(sess.graph)
            tf.logging.info('tensorboard --logdir ' + logdir)
    if remaining > 0:
        tf.logging.warn('%d bytes remaining' % remaining)
def run(args):
    start_epoch = 1
    best_loss = 1e+9

    # logs
    args.logdir = get_logdir(args)
    logger = get_logger(os.path.join(args.logdir, 'main.log'))
    logger.info(args)
    writer = SummaryWriter(args.logdir)

    # data
    train_set = MovingMNIST(root='./data', train=True, download=True)
    valid_set = MovingMNIST(root='./data',
                            train=False,
                            download=True,
                            split=args.test_size)
    train_loader = DataLoader(train_set,
                              batch_size=args.batch_size,
                              num_workers=args.n_workers,
                              shuffle=True)
    valid_loader = DataLoader(valid_set,
                              batch_size=args.batch_size,
                              num_workers=args.n_workers,
                              shuffle=False)

    # network
    model = models.__dict__[args.model](args=args)
    model = nn.DataParallel(model)
    args.device = torch.device(
        'cuda:0' if torch.cuda.is_available() else 'cpu')
    model = model.to(args.device)
    # training
    criterion = get_loss_fn(args)
    optimizer = get_optimizer(model, args)
    scheduler = get_scheduler(optimizer, args)

    if args.resume:
        if os.path.isfile(args.resume):
            checkpoint = torch.load(args.resume)
            start_epoch = checkpoint['epoch'] + 1
            best_loss = checkpoint['best/{}'.format(args.loss)]
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            logger.info('Loaded checkpoint {} (epoch {})'.format(
                args.resume, start_epoch - 1))
        else:
            raise IOError('No such file {}'.format(args.resume))

    for epoch_i in range(start_epoch, args.epochs + 1):
        training = train(train_loader,
                         model,
                         criterion,
                         optimizer,
                         logger=logger,
                         args=args)
        validation = validate(valid_loader,
                              model,
                              criterion,
                              logger=logger,
                              args=args)

        writer.add_scalar('Train/{}'.format(args.loss), training[args.loss],
                          epoch_i)
        writer.add_scalar('Valid/{}'.format(args.loss), validation[args.loss],
                          epoch_i)
        writer.add_image('Train/Predict', _get_images(training['output'],
                                                      args), epoch_i)
        writer.add_image('Train/Target', _get_images(training['target'], args),
                         epoch_i)
        writer.add_image('Valid/Predict',
                         _get_images(validation['output'], args), epoch_i)
        writer.add_image('Valid/Target', _get_images(validation['target'],
                                                     args), epoch_i)

        message = '[{}] Epoch {} Train/{} {:.4f} Valid/{} {:.4f} '
        message = message.format(
            args.expid,
            epoch_i,
            args.loss,
            training[args.loss],
            args.loss,
            validation[args.loss],
        )

        is_best = validation[args.loss] < best_loss
        if is_best:
            best_loss = validation[args.loss]
            message += '(Best)'
        save_checkpoint(
            {
                'epoch': epoch_i,
                'state_dict': model.state_dict(),
                'valid/{}'.format(args.loss): validation[args.loss],
                'best/{}'.format(args.loss): best_loss,
                'optimizer': optimizer.state_dict(),
            }, is_best, args.logdir)

        if scheduler is not None:
            scheduler.step(epoch=epoch_i)
            logger.debug('Scheduler stepped.')
            for param_group in optimizer.param_groups:
                logger.debug(param_group['lr'])

        logger.info(message)