Exemplo n.º 1
0
def main():
    with open("12/input.txt", encoding="UTF-8") as file:
        lines = file.read().splitlines()

    network = get_network(lines)
    pathfinder = PathFinder1(network)
    paths = pathfinder.get_distinct_paths()

    print("Part 1: ", len(paths))

    network = get_network(lines, part2=True)
    pathfinder = PathFinder2(network)
    paths = pathfinder.get_distinct_paths()

    print("Part 2: ", len(paths))
Exemplo n.º 2
0
def main(args):
    # load configuration
    config = load_config(os.path.join(args.restore, 'config.json'))

    # create autoencoder
    ae = get_network(config['hiddens'], logger=g_logger)

    # build graph
    sess, saver, _ = build_graph(ae, input_shape=[None, 784])
    restore(sess, saver, args.restore)

    test_result = os.path.join(args.result, 'test')
    # make result directory if not exists
    if not os.path.exists(test_result):
        os.makedirs(test_result)

    # use mnist for test
    mnist = tf.contrib.learn.datasets.load_dataset('mnist')

    row_col_size = 10
    cnt = 0
    for x, y in next_mnist_data(mnist, 'test', batch_size=row_col_size**2):
        x_ = sess.run(ae.x_, feed_dict={ae.x: x})
        save_mnist_images(x, test_result, cnt, suffix='original', row_col_size=row_col_size)
        save_mnist_images(x_, test_result, cnt, suffix='reconstruct', row_col_size=row_col_size)
        cnt += 1
Exemplo n.º 3
0
def autotvm_tune(network, batch_size, dtype, target, log_prefix):
    kernel_log = log_prefix + ".kernel.log"
    graph_log = log_prefix + ".graph.log"
    os.makedirs(os.path.dirname(graph_log), exist_ok=True)
    if os.path.exists(kernel_log):
        os.remove(kernel_log)
    if os.path.exists(graph_log):
        os.remove(graph_log)

    layout = "NCHW"
    mod, params, input_name, input_shape, output_shape = get_network(
        network, batch_size, dtype, layout)
    tuning_opt = get_tuning_option(network, batch_size, dtype, target,
                                   kernel_log)
    ops = [
        relay.op.get("nn.batch_matmul"),
        relay.op.get("nn.dense"),
        relay.op.get("nn.conv2d"),
    ]

    tasks = autotvm.task.extract_from_program(mod["main"],
                                              target=target,
                                              params=params,
                                              ops=ops)
    tune_kernels(tasks, **tuning_opt)

    if use_graph_tuner(network, batch_size, dtype, target):
        tune_graph(mod["main"], input_name, input_shape, target, kernel_log,
                   graph_log)
Exemplo n.º 4
0
def main():
    args = parser()
    cfg = Config.fromfile(args.config)
    log = Logger('./cache/log/' + args.net + '_trainlog.txt', level='info')
    log.logger.info('Preparing data')
    train_loader , val_loader = dataLoad(cfg)
    start_epoch = 0
    if args.pretrain:
        log.logger.info('Loading Pretrain Data')
    net = get_network(args).cuda()
    model_params(net, log)
    criterion = CrossEntropy().cuda()
    # criterion = MeanSquaredError().cuda()
    optimizer = optim.SGD(net.parameters(), lr=cfg.PARA.train.LR, momentum=cfg.PARA.train.momentum, weight_decay=cfg.PARA.train.wd)
    net = torch.nn.DataParallel(net, device_ids=cfg.PARA.train.device_ids)
    torch.backends.cudnn.benchmark = True
    if args.resume:
        log.logger.info('Resuming from checkpoint')
        weighted_file = os.path.join('./cache/checkpoint/'+args.net, args.epoch + 'ckpt.pth')
        checkpoint = torch.load(weighted_file)
        net.load_state_dict(checkpoint['net'])
        start_epoch = checkpoint['epoch']

    train(start_epoch, train_loader, val_loader, cfg, net, criterion, optimizer, args, log)
    log.logger.info("Training Finished, Total EPOCH=%d" % cfg.PARA.train.EPOCH)
Exemplo n.º 5
0
def plot():
    """
    Serve a plot of the network.
    """
    G = utils.get_network()
    result = {'plot': utils.plot(G)}
    return render_template('plot.html', result=result)
Exemplo n.º 6
0
    def __init__(self, config, storage, replay_buffer, state=None):
        set_all_seeds(config.seed)

        self.run_tag = config.run_tag
        self.group_tag = config.group_tag
        self.worker_id = 'learner'
        self.replay_buffer = replay_buffer
        self.storage = storage
        self.config = deepcopy(config)

        if "learner" in self.config.use_gpu_for:
            if torch.cuda.is_available():
                if self.config.learner_gpu_device_id is not None:
                    device_id = self.config.learner_gpu_device_id
                    self.device = torch.device("cuda:{}".format(device_id))
                else:
                    self.device = torch.device("cuda")
            else:
                raise RuntimeError(
                    "GPU was requested but torch.cuda.is_available() is False."
                )
        else:
            self.device = torch.device("cpu")

        self.network = get_network(config, self.device)
        self.network.to(self.device)
        self.network.train()

        self.optimizer = get_optimizer(config, self.network.parameters())
        self.lr_scheduler = get_lr_scheduler(config, self.optimizer)
        self.scalar_loss_fn, self.policy_loss_fn = get_loss_functions(config)

        self.training_step = 0
        self.losses_to_log = {'reward': 0., 'value': 0., 'policy': 0.}

        self.throughput = {
            'total_frames': 0,
            'total_games': 0,
            'training_step': 0,
            'time': {
                'ups': 0,
                'fps': 0
            }
        }

        if self.config.norm_obs:
            self.obs_min = np.array(self.config.obs_range[::2],
                                    dtype=np.float32)
            self.obs_max = np.array(self.config.obs_range[1::2],
                                    dtype=np.float32)
            self.obs_range = self.obs_max - self.obs_min

        if state is not None:
            self.load_state(state)

        Logger.__init__(self)
Exemplo n.º 7
0
def evaluate_network(test_data, targets, model_file, model_type, batch_size, extra_args=None):
    # load the model file
    model = cPickle.load(open(model_file, 'r'))
    n_train_samples, data_dim = test_data.shape
    n_classes = len(set(targets))
    if data_dim != model['in_dim'] or n_classes != model['n_classes']:
        print("This data is not compatible with this network, exiting", file=sys.stderr)
        return False
    net = get_network(x=test_data, in_dim=model['in_dim'], n_classes=model['n_classes'], model_type=model_type,
                      hidden_dim=model['hidden_dim'], extra_args=extra_args)
    net.load_from_object(model=model, careful=True)
    errors, probs = predict(test_data=test_data, true_labels=targets, batch_size=batch_size, model=net, model_file=None)
    return errors, probs
Exemplo n.º 8
0
def main():
    args = parser.parse_args()
    config = vars(args)
    train_loader,val_loader,test_loader = get_data_loader(dataset_name=config['dataset'],\
                                                          data_path=config['dataset_path'],\
                                                          TRAIN_BATCH_SIZE=config['train_batch_size'],\
                                                          VAL_BATCH_SIZE=config['val_batch_size'],\
                                                          TEST_BATCH_SIZE=config['test_batch_size'])
    model = get_network(config['network'])
    model.train()
    model.cuda()
    train_reg2(model, train_loader, config)
    evaluate(model, val_loader, test_loader, config)
Exemplo n.º 9
0
def inference(config_file, image_file):
    """ Run text recognition network on an image file.
    """
    # Get config
    FLAGS = Flags(config_file).get()
    out_charset = load_charset(FLAGS.charset)
    num_classes = len(out_charset)
    net = get_network(FLAGS, out_charset)

    if FLAGS.use_rgb:
        num_channel = 3
        mode = cv2.IMREAD_COLOR
    else:
        num_channel = 1
        mode = cv2.IMREAD_GRAYSCALE

    # Input node
    image = tf.placeholder(tf.uint8,
                           shape=[None, None, num_channel],
                           name='input_node')

    # Network
    proc_image = net.preprocess_image(image, is_train=False)
    proc_image = tf.expand_dims(proc_image, axis=0)
    proc_image.set_shape(
        [None, FLAGS.resize_hw.height, FLAGS.resize_hw.width, num_channel])
    logits, sequence_length = net.get_logits(proc_image,
                                             is_train=False,
                                             label=None)
    prediction, log_prob = net.get_prediction(logits, sequence_length)
    prediction = tf.sparse_to_dense(sparse_indices=prediction.indices,
                                    sparse_values=prediction.values,
                                    output_shape=prediction.dense_shape,
                                    default_value=num_classes,
                                    name='output_node')

    # Restore
    restore_model = get_init_trained()
    sess = tf.Session()
    restore_model(sess, FLAGS.eval.model_path)

    # Run
    img = cv2.imread(image_file, mode)
    img = np.reshape(img, [img.shape[0], img.shape[1], num_channel])
    predicted = sess.run(prediction, feed_dict={image: img})
    string = get_string(predicted[0], out_charset)
    string = adjust_string(string, FLAGS.eval.lowercase,
                           FLAGS.eval.alphanumeric)
    print(string)

    return string
def do_partition(partition, device_id):
    # You have to set the right variables as global for visibility
    global net, optimizer, clr_scheduler, loss_function, fprint

    # The predecessor you load, all else is re-executed
    predecessor_epoch = partition[0] - 1
    if not flor.is_initialized():
        # Ray creates a new instance of the library per worker, so we have to re-init
        flor.initialize(**user_settings, predecessor_id=predecessor_epoch)

    # This line is so parallel workers don't collide
    fprint = flor.utils.fprint(['data', 'rogarcia', 'flor_output'], device_id)

    # Do the general initialization
    # The code below is copy/pasteed from __main__
    # Each worker needs to initialize its own Neural Net so it's in the right GPU
    # Anything that goes on the GPU or reads from the GPU has to be initialized in each worker
    net = get_network(args, use_gpu=True)
    flor.namespace_stack.test_force(net, 'net')
    loss_function = nn.CrossEntropyLoss()
    flor.namespace_stack.test_force(loss_function, 'loss_function')
    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=0.0,
                          weight_decay=0.0)
    flor.namespace_stack.test_force(optimizer, 'optimizer')
    clr_scheduler = CLR_Scheduler(optimizer,
                                  net_steps=(iter_per_epoch * settings.EPOCH),
                                  min_lr=args.lr,
                                  max_lr=3.0,
                                  tail_frac=0.0)
    flor.namespace_stack.test_force(clr_scheduler, 'clr_scheduler')

    # Load the end state of the predecessor so we can re-execute in the middle
    if predecessor_epoch >= 0:
        # Initialize the Previous Epoch
        train(predecessor_epoch)
        eval_training(predecessor_epoch)

    # Re-execute in the middle
    flor.SKIP = False  # THIS IS IMPORTANT, otherwise flor will SKIP
    for epoch in partition:
        # This is just good old fashined re-execution
        train(epoch)
        (loss, acc) = eval_training(epoch)
        fprint('Test set: Average loss: {:.4f}, Accuracy: {:.4f}'.format(
            loss, acc))

    # Clear the memory for cleanliness, this step might be optional
    torch.cuda.empty_cache()
Exemplo n.º 11
0
def benchmark(network, batch_size, dtype, target, log_prefix, repeat):
    layout = "NCHW"
    mod, params, input_name, input_shape, output_shape = get_network(
        network, batch_size, dtype, layout)

    if use_graph_tuner(network, batch_size, dtype, target):
        log_file = log_prefix + ".graph.log"
        history_best_context = autotvm.apply_graph_best(log_file)
    else:
        log_file = log_prefix + ".kernel.log"
        history_best_context = autotvm.apply_history_best(log_file)

    assert os.path.exists(
        log_file), "The log file '%s' does not exist." % log_file
    print("Use log file %s" % log_file)

    if network in ["bert"]:
        # Build module
        with history_best_context:
            with tvm.transform.PassContext(opt_level=3):
                lib = relay.build(mod, target=target, params=params)
        ctx = tvm.context(str(target), 0)
        module = runtime.GraphModule(lib["default"](ctx))

        # Feed input data
        seq_length = input_shape[0][1]
        data = np.random.uniform(size=input_shape[0])
        token_types = np.random.uniform(size=input_shape[1])
        valid_length = np.array([seq_length] * batch_size)
        module.set_input(data0=data, data1=token_types, data2=valid_length)
    else:
        # Build module
        with history_best_context:
            with tvm.transform.PassContext(opt_level=3):
                lib = relay.build(mod, target=target, params=params)
        ctx = tvm.context(str(target), 0)
        module = runtime.GraphModule(lib["default"](ctx))

        # Feed input data
        data = np.random.uniform(size=input_shape)
        module.set_input(input_name, data)

    # Evaluate
    ftimer = module.module.time_evaluator("run",
                                          ctx,
                                          min_repeat_ms=500,
                                          repeat=repeat)
    return np.array(ftimer().results)
Exemplo n.º 12
0
def main():
    random.seed(1234)
    np.random.seed(1234)
    tf.random.set_seed(1234)

    tf.keras.backend.set_floatx('float32')
    args = get_args()
    if not os.path.exists(args.output_path):
        os.makedirs(args.output_path)




    ref_dataloader = DataLoader(args.ref_train_path, args.ref_val_path, args.ref_test_path, args.cls_num, args.input_size,
                            name="ref_dataloader", output_path=args.output_path)
    tar_dataloader = DataLoader(args.tar_train_path, args.tar_val_path, args.tar_test_path, args.cls_num, args.input_size,
                            name="tar_dataloader", output_path=args.output_path)
    network = utils.get_network(args.nntype)
    network.freeze_layers(19)
    #if args.ckpt is not None:
     #   network.load_weights(args.ckpt).expect_partial()  # expect_partial enables to ignore training information for prediction
    optimizer = tf.keras.optimizers.Adam(learning_rate=args.lr)
    D_loss = tf.keras.losses.CategoricalCrossentropy()
    C_loss = compactnes_loss
    features_model = network.get_features_model(args.test_layer)



    trainer = TrainTestHelper(network, optimizer, D_loss, C_loss, args.lambd, training=True)
    validator = TrainTestHelper(network, optimizer, D_loss, C_loss, args.lambd, training=False)

    test_images, labels = ref_dataloader.read_batch(200, "test")
    save_predicted_results(test_images, labels, network, ref_dataloader.paths_logger["test"], D_loss, "before_training", args.output_path)

    random.seed(1234)
    np.random.seed(1234)
    tf.random.set_seed(1234)

    test_helper = TestHelper(ref_dataloader, tar_dataloader, args.templates_num, args.test_num, features_model, args.output_path)
    random.seed(1234)
    np.random.seed(1234)
    tf.random.set_seed(1234)


    train(ref_dataloader, tar_dataloader, trainer, validator, args.batchs_num, args.train_iterations, args.print_freq, test_helper, args.output_path, network)
    save_predicted_results(test_images, labels, network, ref_dataloader.paths_logger["test"], D_loss, "after_training", args.output_path)

    network.save_model(args.train_iterations, args.output_path)
Exemplo n.º 13
0
def load_model(session, model_name):
    model_path = Path(__file__).parent / 'models' / model_name
    params_path = Path(__file__).parent / 'params' / ('%s.json' % model_name)

    with open(params_path) as f:
        params = json.load(f)

    assert model_path.exists()

    inputs = tf.placeholder(tf.float32)
    network = get_network(inputs, params)
    checkpoint = tf.train.get_checkpoint_state(model_path)
    saver = tf.train.Saver()
    saver.restore(session, checkpoint.model_checkpoint_path)

    return network
Exemplo n.º 14
0
def main():
    args = parser()
    cfg = Config.fromfile(args.config)
    log = Logger('./cache/log/' + args.net + '_testlog.txt', level='info')
    log.logger.info('==> Preparing data <==')
    test_loader = dataLoad(cfg)
    log.logger.info('==> Loading model <==')
    net = get_network(args).cuda()
    net = torch.nn.DataParallel(net, device_ids=cfg.PARA.train.device_ids)
    log.logger.info("==> Waiting Test <==")
    # for epoch in range(1, cfg.PARA.train.EPOCH+1):
    epoch = 121
    checkpoint = torch.load('./cache/checkpoint/' + args.net + '/' +
                            str(epoch) + 'ckpt.pth')
    net.load_state_dict(checkpoint['net'])
    test(net, epoch, test_loader, log, args, cfg)
Exemplo n.º 15
0
def main():
    np.random.seed(1234)
    tf.random.set_seed(1234)
    tf.keras.backend.set_floatx('float32')
    args = configurations.get_args()
    if not os.path.exists(args.output_path):
        os.makedirs(args.output_path)

    ref_dataloader = DataLoader(args.ref_train_path,
                                args.ref_val_path,
                                args.ref_test_path,
                                args.cls_num,
                                args.input_size,
                                name="ref_dataloader",
                                output_path=args.output_path)
    tar_dataloader = DataLoader(args.tar_train_path,
                                args.tar_val_path,
                                args.tar_test_path,
                                args.cls_num,
                                args.input_size,
                                name="tar_dataloader",
                                output_path=args.output_path)
    network = utils.get_network(args.nntype)
    network.freeze_layers(args.last_frozen_layer)
    optimizer = tf.keras.optimizers.Adam(learning_rate=args.lr)
    D_loss = tf.keras.losses.SparseCategoricalCrossentropy()
    C_loss = compactnes_loss
    trainer = TrainTestHelper(network,
                              optimizer,
                              D_loss,
                              C_loss,
                              args.lambd,
                              training=True)
    validator = TrainTestHelper(network,
                                optimizer,
                                D_loss,
                                C_loss,
                                args.lambd,
                                training=False)

    test_helper = TestHelper(ref_dataloader, tar_dataloader,
                             args.templates_num, args.test_num, network)

    train(ref_dataloader, tar_dataloader, trainer, validator, args.batchs_num,
          args.train_iterations, args.print_freq, test_helper,
          args.output_path)
Exemplo n.º 16
0
def benchmark(network, batch_size, dtype, target, log_file, repeat):
    layout = "NHWC"
    mod, params, input_name, input_shape, output_shape = get_network(
        network, batch_size, dtype, layout
    )

    assert os.path.exists(log_file), "The log file '%s' does not exist." % log_file
    print("Use log file %s" % log_file)

    if network in ["bert"]:
        # Build module
        with auto_scheduler.ApplyHistoryBest(log_file):
            with tvm.transform.PassContext(
                opt_level=3, config={"relay.backend.use_auto_scheduler": True}
            ):
                lib = relay.build(mod, target=target, params=params)

        ctx = tvm.context(str(target), 0)
        module = runtime.GraphModule(lib["default"](ctx))

        # Feed input data
        seq_length = input_shape[0][1]
        data = np.random.uniform(size=input_shape[0])
        token_types = np.random.uniform(size=input_shape[1])
        valid_length = np.array([seq_length] * batch_size)
        module.set_input(data0=data, data1=token_types, data2=valid_length)
    else:
        # Build module
        with auto_scheduler.ApplyHistoryBest(log_file):
            with tvm.transform.PassContext(
                opt_level=3, config={"relay.backend.use_auto_scheduler": True}
            ):
                lib = relay.build(mod, target=target, params=params)
        ctx = tvm.context(str(target), 0)
        module = runtime.GraphModule(lib["default"](ctx))

        # Feed input data
        data = np.random.uniform(size=input_shape)
        module.set_input(input_name, data)

    # Evaluate
    ftimer = module.module.time_evaluator("run", ctx, min_repeat_ms=500, repeat=repeat)
    return np.array(ftimer().results)
Exemplo n.º 17
0
def main():
    args = configurations.get_args()
    ref_labels = dataloader.read_labels_file(args.reflabelpath)
    classes_num = len(np.unique(ref_labels))
    ref_images_paths = dataloader.get_images_path(args.refpath)
    target_images_paths = get_target_images_by_classes(args.targetpath,
                                                       ["knife", "sword"])
    ref_dataloader = dataloader.Dataloader(ref_images_paths, classes_num,
                                           ref_labels)
    target_dataloader = dataloader.Dataloader(target_images_paths, classes_num)
    network = utils.get_network(args.nntype)
    optimizer = tf.keras.optimizers.Adam(learning_rate=args.lr)
    trainer = Trainer(network, optimizer, args.lambd, compactnes_loss,
                      descriptiveness_loss)

    num_iterations = max(len(ref_images_paths) / args.batches, 1)

    train(ref_dataloader, target_dataloader, trainer, args.batches,
          num_iterations, args.epochs)
Exemplo n.º 18
0
def auto_scheduler_tune(network, batch_size, dtype, target, log_file):
    os.makedirs(os.path.dirname(log_file), exist_ok=True)
    #if os.path.exists(log_file):
    #    os.remove(log_file)

    layout = "NHWC"
    mod, params, input_name, input_shape, output_shape = get_network(
        network, batch_size, dtype, layout)

    n_trials = network_to_n_trials[(network, batch_size, dtype,
                                    str(target.kind))]

    if "cpu" in target.keys:
        tuning_opt = auto_scheduler.TuningOptions(
            num_measure_trials=n_trials,
            runner=auto_scheduler.LocalRunner(repeat=10,
                                              enable_cpu_cache_flush=True),
            measure_callbacks=[auto_scheduler.RecordToFile(log_file)],
        )
    else:
        min_repeat_ms = 450 if network in ["bert"] else 300
        measure_ctx = auto_scheduler.LocalRPCMeasureContext(
            repeat=1, min_repeat_ms=min_repeat_ms, timeout=10)
        tuning_opt = auto_scheduler.TuningOptions(
            num_measure_trials=n_trials,
            runner=measure_ctx.runner,
            measure_callbacks=[auto_scheduler.RecordToFile(log_file)],
        )

    tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params,
                                                       target)
    print(log_file)
    update_file(log_file, tasks)
    return
    for idx, task in enumerate(tasks):
        print("========== Task %d  (workload key: %s) ==========" %
              (idx, task.workload_key))
        print(task.compute_dag)

    tuner = auto_scheduler.TaskScheduler(tasks, task_weights)
    tuner.tune(tuning_opt)
Exemplo n.º 19
0
def main():
    random.seed(1234)
    np.random.seed(1234)
    tf.random.set_seed(1234)

    tf.keras.backend.set_floatx('float32')
    args = get_args()
    if not os.path.exists(args.output_path):
        os.makedirs(args.output_path)

    ref_dataloader = DataLoader(args.ref_train_path,
                                args.ref_val_path,
                                args.ref_test_path,
                                args.cls_num,
                                args.input_size,
                                name="ref_dataloader",
                                output_path=args.output_path)
    tar_dataloader = DataLoader(args.tar_train_path,
                                args.tar_val_path,
                                args.tar_test_path,
                                args.cls_num,
                                args.input_size,
                                name="tar_dataloader",
                                output_path=args.output_path)

    network = utils.get_network(args.nntype)
    network.freeze_layers(19)
    network.load_model(args.ckpt_dir)
    features_model = network.get_features_model(args.test_layer)

    test_helper = TestHelper(ref_dataloader, tar_dataloader,
                             args.templates_num, args.test_num, features_model,
                             args.output_path)

    if args.hot_map_paths != None:
        paths = []
        with open(args.hot_map_paths, "r") as f:
            for line in f:
                paths.append(line.rstrip('\n'))
        test_helper.predict_hot_maps(paths, args.kernel_size, args.stride,
                                     args.input_size)
def main():
    tf.keras.backend.set_floatx('float32')
    args = get_args()
    if not os.path.exists(args.output_path):
        os.makedirs(args.output_path)


    dataloader = DataLoader(args.train_path, args.val_path, args.test_path, args.cls_num, args.input_size,
                            name="dataloader", output_path=args.output_path)
    network = utils.get_network(args.nntype)
    network.freeze_layers(19)
    optimizer = tf.keras.optimizers.Adam(learning_rate=args.lr)
    loss = tf.keras.losses.SparseCategoricalCrossentropy()
    trainer = TrainTestHelper(network, optimizer, loss, training=True)
    validator = TrainTestHelper(network, optimizer, loss, training=False)

    test_images, labels = dataloader.read_batch(200, "test")
    save_predicted_results(test_images, labels, network, dataloader.paths_logger["test"], loss, "before_training", args.output_path)


    train(dataloader, trainer, validator, args.batchs_num, args.train_iterations, args.print_freq)
    save_predicted_results(test_images, labels, network, dataloader.paths_logger["test"], loss, "after_training", args.output_path)
Exemplo n.º 21
0
def plot():
    """
    Serve a plot of the network.
    """
    scale = int(request.args.get('scale') or '10')

    log = request.args.get('log') or 'false'
    if log.lower() in ['0', 'false', 'off', 'no']:
        log = False
    else:
        log = True

    drop = request.args.get('drop') or 'true'
    if drop.lower() in ['1', 'true', 'on', 'yes']:
        drop = True
    else:
        drop = False

    years = utils.get_years()
    G = utils.get_network(years)
    if len(G) < 1:
        return render_template('plot.html', result={})
    result = {'network_plot': utils.plot_network(G, years, scale=scale)}

    result['years_plot'] = utils.plot_bars(years,
                                           sort=True,
                                           drop=drop,
                                           log=log)

    lasts = utils.get_lasts()
    result['lasts_plot'] = utils.plot_bars(lasts, title="Current position")

    lens = utils.get_lens()
    result['lens_plot'] = utils.plot_bars(lens,
                                          title="Career length so far",
                                          lpos=0.5)

    return render_template('plot.html', result=result)
Exemplo n.º 22
0
    parser.add_argument('-eval',
                        action='store_true',
                        default=False,
                        help='evaluate only')
    parser.add_argument('-pth',
                        type=str,
                        default=None,
                        help='path to model folder')
    parser.add_argument('-ckpt',
                        type=str,
                        default=None,
                        help='path to model .pth file')

    args = parser.parse_args()

    net = get_network(args)
    # net = torchvision.models.resnet50().cuda()
    if args.distill:
        teacher_net = get_network(args)
        distill_loss = DistillationOrthogonalProjectionLoss()
    else:
        teacher_net = None
        distill_loss = None

    if args.dataset == "aircraft":
        # data preprocessing:
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
        transform_list = transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
Exemplo n.º 23
0
                        default=0.01,
                        help='initial learning rate')
    parser.add_argument('-act',
                        type=str,
                        default='RELU',
                        help='Activation function to use')
    parser.add_argument('-error', type=float, default=0.1, help='Error Rate')
    parser.add_argument('-resume',
                        type=str,
                        default='yes',
                        help='Resume the training')

    args = parser.parse_args()
    error = args.error
    print(args.act)
    net = get_network(args, use_gpu=args.gpu)
    #net = vgg16_bn()
    #data preprocessing:

    cifar100_training_loader = get_training_dataloader(
        settings.CIFAR100_TRAIN_MEAN,
        settings.CIFAR100_TRAIN_STD,
        num_workers=args.w,
        batch_size=args.b,
        shuffle=args.s)

    cifar100_test_loader = get_test_dataloader(settings.CIFAR100_TRAIN_MEAN,
                                               settings.CIFAR100_TRAIN_STD,
                                               num_workers=args.w,
                                               batch_size=args.b,
                                               shuffle=args.s)
Exemplo n.º 24
0
def main(args):
    ###
    CHECKPOINT_PATH = 'checkpoint'
    EPOCH = 75
    MILESTONES = [50]
    TIME_NOW = datetime.now().isoformat()
    LOG_DIR = 'runs'
    DATASET = 'cifar-100'
    SAVE_EPOCH = 15
    ###

    classes = [i for i in range(100)]
    training_batches = [
        classes[i:i + args.step_classes]
        for i in range(0, len(classes), args.step_classes)
    ]

    net = get_network(args, use_gpu=True)

    checkpoint_path = os.path.join(CHECKPOINT_PATH, DATASET,
                                   str(args.step_classes),
                                   str(args.buffer_size), args.net,
                                   str(TIME_NOW))

    old_data_batch = []
    incremental_accuracy = []

    criterion = nn.CrossEntropyLoss()

    replay_dataloader = None

    replay_dataset = get_buffer_dataset(buffer_size=args.buffer_size)
    for idx, training_batch in enumerate(training_batches):
        print('Training batch: '.format(training_batch))
        # data preprocessing:
        training_loader = get_training_dataloader(include_list=training_batch,
                                                  num_workers=args.w,
                                                  batch_size=args.b,
                                                  shuffle=args.s)

        test_loader = get_test_dataloader(include_list=training_batch +
                                          old_data_batch,
                                          num_workers=args.w,
                                          batch_size=args.b,
                                          shuffle=args.s)

        new_test_loader = get_test_dataloader(include_list=training_batch,
                                              num_workers=args.w,
                                              batch_size=args.b,
                                              shuffle=args.s)
        if idx > 0:
            old_test_loader = get_test_dataloader(include_list=old_data_batch,
                                                  num_workers=args.w,
                                                  batch_size=args.b,
                                                  shuffle=args.s)

        if idx > 0:
            EPOCH = 30  #Monica
        if idx > len(training_batches) // 3:
            lr = 0.01
        else:
            lr = 0.1
        new_data_optimizer = optim.SGD(net.parameters(),
                                       lr=0.1,
                                       momentum=0.9,
                                       weight_decay=5e-4)
        #optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4)
        train_scheduler = optim.lr_scheduler.MultiStepLR(new_data_optimizer,
                                                         milestones=MILESTONES,
                                                         gamma=0.1)
        iter_per_epoch = float(len(training_loader))

        # create checkpoint folder to save model
        if not os.path.exists(checkpoint_path):
            Path(checkpoint_path).mkdir(parents=True, exist_ok=True)
        ckp_path = os.path.join(checkpoint_path,
                                '{net}-{idx}-{epoch}-{type}.pth')
        with tqdm(total=EPOCH) as pbar:
            for epoch in range(1, EPOCH):
                if epoch == EPOCH // 3 and idx > 0:
                    lr *= .1

                net.train()
                avg_learning_ratio = 0
                if idx > 0:
                    # old_dataloader = replay_manager.get_dataloader(batch_size=args.b)
                    # old_dataiter = iter(old_dataloader)
                    replay_dataloader = DataLoader(dataset=replay_dataset,
                                                   shuffle=True,
                                                   batch_size=args.b)
                    old_dataiter = iter(replay_dataloader)
                for batch_index, (images,
                                  labels) in enumerate(training_loader):
                    if idx > 0:
                        try:
                            old_images, old_labels = next(old_dataiter)
                        except StopIteration:
                            old_dataiter = iter(replay_dataloader)
                            old_images, old_labels = next(old_dataiter)

                        from PIL import Image
                        # im = Image.fromarray(old_images[0].mul_(255).permute(1, 2, 0).to('cpu', torch.uint8).numpy())
                        # im.save('sample_old.png')
                        old_images_gpu = old_images.cuda()
                        old_labels_gpu = old_labels.cuda()

                        net.zero_grad()
                        old_outputs = net(old_images_gpu)
                        old_data_loss = criterion(old_outputs, old_labels_gpu)
                        old_data_loss.backward()
                        old_data_gradient_magnitudes = []
                        # old_gradient_data = []
                        for f in net.parameters():
                            old_data_gradient_magnitudes.append(
                                f.grad.norm(2).item()**2)
                            # old_gradient_data.append(f.grad.data)

                        old_magnitude = np.sum(
                            np.asarray(old_data_gradient_magnitudes))

                    new_labels_gpu = labels.cuda()
                    new_images_gpu = images.cuda()

                    net.zero_grad()
                    outputs = net(new_images_gpu)
                    new_data_loss = criterion(outputs, new_labels_gpu)
                    new_data_loss.backward()
                    new_data_gradient_magnitudes = []
                    # new_gradient_data = []
                    for f in net.parameters():
                        new_data_gradient_magnitudes.append(
                            f.grad.norm(2).item()**2)
                        # new_gradient_data.append(f.grad.data)
                    new_magnitude = np.sum(
                        np.asarray(new_data_gradient_magnitudes))
                    if idx > 0:
                        learning_ratio = old_magnitude / new_magnitude
                        avg_learning_ratio += learning_ratio
                        if learning_ratio < .01:
                            net.zero_grad()
                            outputs = net(new_images_gpu)
                            new_data_loss = criterion(outputs, new_labels_gpu)
                            new_data_loss.backward()
                            for f in net.parameters():
                                f.data.sub_(lr * f.grad.data)
                            # print('Learning weighted new -- {}'.format(learning_ratio))
                        elif learning_ratio < .1:
                            combined_images = torch.cat([images, old_images],
                                                        axis=0)
                            combined_labels = torch.cat([labels, old_labels],
                                                        axis=0)
                            combined_images = combined_images.cuda()
                            combined_labels = combined_labels.cuda()
                            net.zero_grad()
                            outputs = net(combined_images)
                            combined_data_loss = criterion(
                                outputs, combined_labels)
                            combined_data_loss.backward()
                            for f in net.parameters():
                                f.data.sub_(lr * f.grad.data)
                            # print('Learning combined! -- {}'.format(learning_ratio))
                        else:
                            net.zero_grad()
                            old_outputs = net(old_images_gpu)
                            old_data_loss = criterion(old_outputs,
                                                      old_labels_gpu)
                            old_data_loss.backward()
                            for f in net.parameters():
                                f.data.sub_(0.1 * f.grad.data)
                            # print('Learning old! -- {}'.format(learning_ratio))
                    else:
                        new_data_optimizer.step()
                        train_scheduler.step(epoch)

                    if (epoch == 1 or epoch == EPOCH - 1) and batch_index == 0:
                        print('New Batch Magnitude is {} at epoch {}'.format(
                            new_magnitude, epoch))
                        draw_magnitudes(
                            new_data_gradient_magnitudes,
                            '_'.join(str(i) for i in training_batch),
                            checkpoint_path, '{}_{}'.format(idx, epoch))
                        if idx > 0:
                            print(
                                'Old Batch Magnitude is {} at epoch {}'.format(
                                    old_magnitude, epoch))
                            draw_magnitudes(old_data_gradient_magnitudes,
                                            'old Class', checkpoint_path,
                                            'old_{}_{}'.format(idx, epoch))
                    print('Learning magnitude ratio {}'.format(
                        avg_learning_ratio / iter_per_epoch))
                    if idx > 0:
                        print(
                            'Training Epoch: {epoch} \tNew Loss: {:0.4f}\t Old Loss: {:0.4f}'
                            .format(new_data_loss.item() / images.size(0),
                                    old_data_loss.item() / old_images.size(0),
                                    epoch=epoch))

                loss_value, acc = evaluate(net, new_test_loader, criterion)
                print('New Test set: Average loss: {:.4f}, Accuracy: {:.4f}'.
                      format(loss_value, acc))

                if idx > 0:
                    loss_value, acc = evaluate(net, old_test_loader, criterion)
                    print(
                        'Old Test set: Average loss: {:.4f}, Accuracy: {:.4f}'.
                        format(loss_value, acc))

                loss_value, acc = evaluate(net, test_loader, criterion)
                print(
                    'Complete Test set: Average loss: {:.4f}, Accuracy: {:.4f}'
                    .format(loss_value, acc))

                if epoch == EPOCH - 1:
                    incremental_accuracy.append(acc.float())

                if not epoch % SAVE_EPOCH:
                    torch.save(
                        net.state_dict(),
                        ckp_path.format(net=args.net,
                                        idx=idx,
                                        epoch=epoch,
                                        type='regular'))

            pbar.update(1)
        torch.save(
            net.state_dict(),
            ckp_path.format(net=args.net, idx=idx, epoch=epoch, type='end'))

        # Populate Replay Buffer

        replay_dataset.append_data(training_batch)
        old_data_batch += training_batch

        replay_dataloader = DataLoader(dataset=replay_dataset,
                                       batch_size=args.b)
        loss_value, acc = evaluate(net, replay_dataloader, criterion)
        print(
            'Replay Train set: Average loss: {:.4f}, Accuracy: {:.4f}'.format(
                loss_value, acc))

    print(incremental_accuracy)
Exemplo n.º 25
0
            },
            is_best=is_best,
            checkpoint=model_dir)

if __name__ == '__main__':

    # Load the parameters from parser
    args = parser.parse_args()

    model_name = args.model
    lr = args.lr
    epochs = args.epoch
    batch_size = args.batch_size

    logging.info("Loading the training dataset...")

    # fetch train dataloader
    train_dataloader = data_loader.train_data_loader()

    logging.info("- done.")

    # Define the model and optimizer
    model = utils.get_network(args)
    optimizer = utils.get_optimizer(model_name, model, lr)

    # fetch loss function
    loss_fn = nn.CrossEntropyLoss()

    # Train the model
    logging.info("Starting training for {} epoch(s).".format(epochs))
    train(model, optimizer, loss_fn, train_dataloader)
Exemplo n.º 26
0
    parser.add_argument('--norecord', dest='record', action='store_false', help='whether to save checkpoint and events')
    parser.add_argument('--record', dest='record', action='store_true', help='whether to save checkpoint and events')
    parser.add_argument('--nomean', dest='meanweight', action='store_false', help='whether to')
    parser.set_defaults(record=False)
    parser.set_defaults(meanweight=True)
    args = parser.parse_args()
    print(args)
    print(settings.TIME_NOW)
    if args.gpu=='-1':
        use_gpu=False
        print(use_gpu)
    else:
        use_gpu=True
        os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)

    net = get_network(args.net,use_gpu=use_gpu)
        
    #data preprocessing:
    cifar100_training_loader = get_training_dataloader(
        settings.CIFAR100_TRAIN_MEAN,
        settings.CIFAR100_TRAIN_STD,
        num_workers=args.w,
        batch_size=args.b,
        shuffle=args.s
    )
    
    cifar100_test_loader = get_test_dataloader(
        settings.CIFAR100_TRAIN_MEAN,
        settings.CIFAR100_TRAIN_STD,
        num_workers=args.w,
        batch_size=args.b,
Exemplo n.º 27
0
def main(config_file):
    """ Train text recognition network
    """
    # Parse configs
    FLAGS = Flags(config_file).get()

    # Set directory, seed, logger
    model_dir = create_model_dir(FLAGS.model_dir)
    logger = get_logger(model_dir, 'train')
    best_model_dir = os.path.join(model_dir, 'best_models')
    set_seed(FLAGS.seed)

    # Print configs
    flag_strs = [
        '{}:\t{}'.format(name, value)
        for name, value in FLAGS._asdict().items()
    ]
    log_formatted(logger, '[+] Model configurations', *flag_strs)

    # Print system environments
    num_gpus = count_available_gpus()
    num_cpus = os.cpu_count()
    mem_size = virtual_memory().available // (1024**3)
    log_formatted(logger, '[+] System environments',
                  'The number of gpus : {}'.format(num_gpus),
                  'The number of cpus : {}'.format(num_cpus),
                  'Memory Size : {}G'.format(mem_size))

    # Get optimizer and network
    global_step = tf.train.get_or_create_global_step()
    optimizer, learning_rate = get_optimizer(FLAGS.train.optimizer,
                                             global_step)
    out_charset = load_charset(FLAGS.charset)
    net = get_network(FLAGS, out_charset)
    is_ctc = (net.loss_fn == 'ctc_loss')

    # Multi tower for multi-gpu training
    tower_grads = []
    tower_extra_update_ops = []
    tower_preds = []
    tower_gts = []
    tower_losses = []
    batch_size = FLAGS.train.batch_size
    tower_batch_size = batch_size // num_gpus

    val_tower_outputs = []
    eval_tower_outputs = []

    for gpu_indx in range(num_gpus):

        # Train tower
        print('[+] Build Train tower GPU:%d' % gpu_indx)
        input_device = '/gpu:%d' % gpu_indx

        tower_batch_size = tower_batch_size \
            if gpu_indx < num_gpus-1 \
            else batch_size - tower_batch_size * (num_gpus-1)

        train_loader = DatasetLodaer(
            dataset_paths=FLAGS.train.dataset_paths,
            dataset_portions=FLAGS.train.dataset_portions,
            batch_size=tower_batch_size,
            label_maxlen=FLAGS.label_maxlen,
            out_charset=out_charset,
            preprocess_image=net.preprocess_image,
            is_train=True,
            is_ctc=is_ctc,
            shuffle_and_repeat=True,
            concat_batch=True,
            input_device=input_device,
            num_cpus=num_cpus,
            num_gpus=num_gpus,
            worker_index=gpu_indx,
            use_rgb=FLAGS.use_rgb,
            seed=FLAGS.seed,
            name='train')

        tower_output = single_tower(net,
                                    gpu_indx,
                                    train_loader,
                                    out_charset,
                                    optimizer,
                                    name='train',
                                    is_train=True)
        tower_grads.append([x for x in tower_output.grads if x[0] is not None])
        tower_extra_update_ops.append(tower_output.extra_update_ops)
        tower_preds.append(tower_output.prediction)
        tower_gts.append(tower_output.text)
        tower_losses.append(tower_output.loss)

        # Print network structure
        if gpu_indx == 0:
            param_stats = tf.profiler.profile(tf.get_default_graph())
            logger.info('total_params: %d\n' % param_stats.total_parameters)

        # Valid tower
        print('[+] Build Valid tower GPU:%d' % gpu_indx)
        valid_loader = DatasetLodaer(dataset_paths=FLAGS.valid.dataset_paths,
                                     dataset_portions=None,
                                     batch_size=FLAGS.valid.batch_size //
                                     num_gpus,
                                     label_maxlen=FLAGS.label_maxlen,
                                     out_charset=out_charset,
                                     preprocess_image=net.preprocess_image,
                                     is_train=False,
                                     is_ctc=is_ctc,
                                     shuffle_and_repeat=False,
                                     concat_batch=False,
                                     input_device=input_device,
                                     num_cpus=num_cpus,
                                     num_gpus=num_gpus,
                                     worker_index=gpu_indx,
                                     use_rgb=FLAGS.use_rgb,
                                     seed=FLAGS.seed,
                                     name='valid')

        val_tower_output = single_tower(net,
                                        gpu_indx,
                                        valid_loader,
                                        out_charset,
                                        optimizer=None,
                                        name='valid',
                                        is_train=False)

        val_tower_outputs.append(
            (val_tower_output.loss, val_tower_output.prediction,
             val_tower_output.text, val_tower_output.filename,
             val_tower_output.dataset))

    # Aggregate gradients
    losses = tf.reduce_mean(tower_losses)
    grads = _average_gradients(tower_grads)

    with tf.control_dependencies(tower_extra_update_ops[-1]):
        if FLAGS.train.optimizer.grad_clip_norm is not None:
            grads, global_norm = _clip_gradients(
                grads, FLAGS.train.optimizer.grad_clip_norm)
            tf.summary.scalar('global_norm', global_norm)

        train_op = optimizer.apply_gradients(grads, global_step=global_step)

    # Define config, scaffold
    saver = tf.train.Saver()
    sess_config = get_session_config()
    scaffold = get_scaffold(saver, FLAGS.train.tune_from, 'train')
    restore_model = get_init_trained()

    # Define validation saver, summary writer
    summaries = tf.get_collection(tf.GraphKeys.SUMMARIES)
    val_summary_op = tf.summary.merge(
        [s for s in summaries if 'valid' in s.name])
    val_summary_writer = {
        dataset_name:
        tf.summary.FileWriter(os.path.join(model_dir, 'valid', dataset_name))
        for dataset_name in valid_loader.dataset_names
    }
    val_summary_writer['total_valid'] = tf.summary.FileWriter(
        os.path.join(model_dir, 'valid', 'total_valid'))
    val_saver = tf.train.Saver(max_to_keep=len(valid_loader.dataset_names) + 1)
    best_val_err_rates = {}
    best_steps = {}

    # Training
    print('[+] Make Session...')

    with tf.train.MonitoredTrainingSession(
            checkpoint_dir=model_dir,
            scaffold=scaffold,
            config=sess_config,
            save_checkpoint_steps=FLAGS.train.save_steps,
            save_checkpoint_secs=None,
            save_summaries_steps=FLAGS.train.summary_steps,
            save_summaries_secs=None,
    ) as sess:

        log_formatted(logger, 'Training started!')
        _step = 0
        train_t = 0
        start_t = time.time()

        while _step < FLAGS.train.max_num_steps \
                and not sess.should_stop():

            # Train step
            step_t = time.time()
            [step_loss, _, _step, preds, gts, lr] = sess.run([
                losses, train_op, global_step, tower_preds[0], tower_gts[0],
                learning_rate
            ])
            train_t += time.time() - step_t

            # Summary
            if _step % FLAGS.valid.steps == 0:

                # Train summary
                train_err = 0.

                for i, (p, g) in enumerate(zip(preds, gts)):
                    s = get_string(p, out_charset, is_ctc=is_ctc)
                    g = g.decode('utf8').replace(DELIMITER, '')

                    s = adjust_string(s, FLAGS.train.lowercase,
                                      FLAGS.train.alphanumeric)
                    g = adjust_string(g, FLAGS.train.lowercase,
                                      FLAGS.train.alphanumeric)
                    e = int(s != g)

                    train_err += e

                    if FLAGS.train.verbose and i < 5:
                        print('TRAIN :\t{}\t{}\t{}'.format(s, g, not bool(e)))

                train_err_rate = \
                    train_err / len(gts)

                # Valid summary
                val_cnts, val_errs, val_err_rates, _ = \
                    validate(sess,
                             _step,
                             val_tower_outputs,
                             out_charset,
                             is_ctc,
                             val_summary_op,
                             val_summary_writer,
                             val_saver,
                             best_val_err_rates,
                             best_steps,
                             best_model_dir,
                             FLAGS.valid.lowercase,
                             FLAGS.valid.alphanumeric)

                # Logging
                log_strings = ['', '-' * 28 + ' VALID_DETAIL ' + '-' * 28, '']

                for dataset in sorted(val_err_rates.keys()):
                    if dataset == 'total_valid':
                        continue

                    cnt = val_cnts[dataset]
                    err = val_errs[dataset]
                    err_rate = val_err_rates[dataset]
                    best_step = best_steps[dataset]

                    s = '%s : %.2f%%(%d/%d)\tBEST_STEP : %d' % \
                        (dataset, (1.-err_rate)*100, cnt-err, cnt, best_step)

                    log_strings.append(s)

                elapsed_t = float(time.time() - start_t) / 60
                remain_t = (elapsed_t / (_step+1)) * \
                    (FLAGS.train.max_num_steps - _step - 1)
                log_formatted(
                    logger, 'STEP : %d\tTRAIN_LOSS : %f' % (_step, step_loss),
                    'ELAPSED : %.2f min\tREMAIN : %.2f min\t'
                    'STEP_TIME: %.1f sec' %
                    (elapsed_t, remain_t, float(train_t) / (_step + 1)),
                    'TRAIN_SEQ_ERR : %f\tVALID_SEQ_ERR : %f' %
                    (train_err_rate, val_err_rates['total_valid']),
                    'BEST_STEP : %d\tBEST_VALID_SEQ_ERR : %f' %
                    (best_steps['total_valid'],
                     best_val_err_rates['total_valid']), *log_strings)

        log_formatted(logger, 'Training is completed!')
Exemplo n.º 28
0
                        default=True,
                        help='whether shuffle the dataset')
    parser.add_argument('-warm',
                        type=int,
                        default=1,
                        help='warm up training phase')
    parser.add_argument('-lr',
                        type=float,
                        default=0.1,
                        help='initial learning rate')
    args = parser.parse_args()

    if torch.cuda.is_available() and args.gpu:
        device = torch.cuda.current_device()

    net = get_network(args, use_gpu=args.gpu, device=device)

    #data preprocessing:
    cifar100_training_loader = get_training_dataloader(
        settings.CIFAR100_TRAIN_MEAN,
        settings.CIFAR100_TRAIN_STD,
        num_workers=args.w,
        batch_size=args.b,
        shuffle=args.s)

    cifar100_test_loader = get_test_dataloader(settings.CIFAR100_TRAIN_MEAN,
                                               settings.CIFAR100_TRAIN_STD,
                                               num_workers=args.w,
                                               batch_size=args.b,
                                               shuffle=args.s)
Exemplo n.º 29
0
def mini_batch_sgd_with_annealing(motif, train_data, labels, xTrain_data, xTrain_targets,
                                  learning_rate, L1_reg, L2_reg, epochs,
                                  batch_size,
                                  hidden_dim, model_type, model_file=None,
                                  trained_model_dir=None, verbose=True, extra_args=None):
    # Preamble #
    # determine dimensionality of data and number of classes
    n_train_samples, data_dim = train_data.shape
    n_classes = len(set(labels))

    # compute number of mini-batches for training, validation and testing
    train_set_x, train_set_y = shared_dataset(train_data, labels, True)
    xtrain_set_x, xtrain_set_y = shared_dataset(xTrain_data, xTrain_targets, True)
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_xtrain_batches = xtrain_set_x.get_value(borrow=True).shape[0] / batch_size

    batch_index = T.lscalar()

    # containers to hold mini-batches
    x = T.matrix('x')
    y = T.ivector('y')

    net = get_network(x=x, in_dim=data_dim, n_classes=n_classes, hidden_dim=hidden_dim, model_type=model_type,
                      extra_args=extra_args)

    if net is False:
        return False

    # cost function
    cost = (net.negative_log_likelihood(labels=y) + L1_reg * net.L1 + (L2_reg / n_train_samples) * net.L2_sq)

    xtrain_fcn = theano.function(inputs=[batch_index],
                                 outputs=net.errors(y),
                                 givens={
                                     x: xtrain_set_x[batch_index * batch_size: (batch_index + 1) * batch_size],
                                     y: xtrain_set_y[batch_index * batch_size: (batch_index + 1) * batch_size]
                                 })

    # gradients
    nambla_params = [T.grad(cost, param) for param in net.params]

    # update tuple
    dynamic_learning_rate = T.as_tensor_variable(learning_rate)

    # dynamic_learning_rate = learning_rate
    updates = [(param, param - dynamic_learning_rate * nambla_param)
               for param, nambla_param in zip(net.params, nambla_params)]

    # main function? could make this an attribute and reduce redundant code
    train_fcn = theano.function(inputs=[batch_index],
                                outputs=cost,
                                updates=updates,
                                givens={
                                    x: train_set_x[batch_index * batch_size: (batch_index + 1) * batch_size],
                                    y: train_set_y[batch_index * batch_size: (batch_index + 1) * batch_size]
                                })
    train_error_fcn = theano.function(inputs=[batch_index],
                                      outputs=net.errors(y),
                                      givens={
                                          x: train_set_x[batch_index * batch_size: (batch_index + 1) * batch_size],
                                          y: train_set_y[batch_index * batch_size: (batch_index + 1) * batch_size]
                                      })

    if model_file is not None:
        net.load_from_file(file_path=model_file, careful=True)

    # do the actual training
    batch_costs = [np.inf]
    add_to_batch_costs = batch_costs.append
    xtrain_accuracies = []
    add_to_xtrain_acc = xtrain_accuracies.append
    train_accuracies = []
    add_to_train_acc = train_accuracies.append
    xtrain_costs_bin = []
    prev_xtrain_cost = 1e-10

    best_xtrain_accuracy = -np.inf
    best_model = ''
    check_frequency = int(epochs / 10)

    for epoch in xrange(0, epochs):
        # evaluation of training progress and summary stat collection
        if epoch % check_frequency == 0:
            # get the accuracy on the cross-train data
            xtrain_errors = [xtrain_fcn(_) for _ in xrange(n_xtrain_batches)]
            avg_xtrain_errors = np.mean(xtrain_errors)
            avg_xtrain_accuracy = 100 * (1 - avg_xtrain_errors)
            # then the training set
            train_errors = [train_error_fcn(_) for _ in xrange(n_train_batches)]
            avg_training_errors = np.mean(train_errors)
            avg_train_accuracy = 100 * (1 - avg_training_errors)
            # collect for tracking progress
            add_to_xtrain_acc(avg_xtrain_accuracy)
            add_to_train_acc(avg_train_accuracy)
            xtrain_costs_bin += xtrain_errors

            if verbose:
                print("{0}: epoch {1}, batch cost {2}, train accuracy {3}, cross-train accuracy {4}"
                      .format(motif, epoch, batch_costs[-1], avg_train_accuracy, avg_xtrain_accuracy), file=sys.stderr)

            # if we're getting better, save the model, the 'oldest' model should be the one with the highest
            # cross-train accuracy
            if avg_xtrain_accuracy >= best_xtrain_accuracy and trained_model_dir is not None:
                if not os.path.exists(trained_model_dir):
                    os.makedirs(trained_model_dir)
                # update the best accuracy and best model
                best_xtrain_accuracy = avg_xtrain_accuracy
                best_model = "{0}model{1}.pkl".format(trained_model_dir, epoch)
                net.write(best_model)

        for i in xrange(n_train_batches):
            batch_avg_cost = train_fcn(i)
            if i % (n_train_batches / 10) == 0:
                add_to_batch_costs(float(batch_avg_cost))

        # annealing protocol
        mean_xtrain_cost = np.mean([xtrain_fcn(_) for _ in xrange(n_xtrain_batches)])
        if mean_xtrain_cost / prev_xtrain_cost < 1.0:
            dynamic_learning_rate *= 0.9

        if mean_xtrain_cost > prev_xtrain_cost:
            dynamic_learning_rate *= 1.05
        prev_xtrain_cost = mean_xtrain_cost

    # pickle the summary stats for the training
    summary = {
        "batch_costs": batch_costs,
        "xtrain_accuracies": xtrain_accuracies,
        "train_accuracies": train_accuracies,
        "xtrain_errors": xtrain_costs_bin,
        "best_model": best_model
    }
    if trained_model_dir is not None:
        with open("{}summary_stats.pkl".format(trained_model_dir), 'w') as f:
            cPickle.dump(summary, f)

    return net, summary
Exemplo n.º 30
0
                        help='batch size for dataloader')
    parser.add_argument('-warm',
                        type=int,
                        default=1,
                        help='warm up training phase')
    parser.add_argument('-lr',
                        type=float,
                        default=0.1,
                        help='initial learning rate')
    parser.add_argument('-resume',
                        action='store_true',
                        default=False,
                        help='resume training')
    args = parser.parse_args()

    net = get_network(args)

    #data preprocessing:
    cifar100_training_loader = get_training_dataloader(
        settings.CIFAR100_TRAIN_MEAN,
        settings.CIFAR100_TRAIN_STD,
        num_workers=4,
        batch_size=args.b,
        shuffle=True)

    cifar100_test_loader = get_test_dataloader(settings.CIFAR100_TRAIN_MEAN,
                                               settings.CIFAR100_TRAIN_STD,
                                               num_workers=4,
                                               batch_size=args.b,
                                               shuffle=True)
Exemplo n.º 31
0
                              params['spatial_stride'])

logging.info('Loading validation dataset...')

validation_set = data.ImageDataset(params['validation_partitions'],
                                   params['temporal_patch_size'])

logging.info('Loading test dataset...')

test_set = data.ImageDataset(params['test_partitions'],
                             params['temporal_patch_size'])

inputs = tf.placeholder(tf.float32)
ground_truth = tf.placeholder(tf.float32)
global_step = tf.Variable(0, trainable=False, name='global_step')
network = get_network(inputs, params)
base_loss = tf.losses.mean_squared_error(network.outputs, ground_truth)
weight_loss = params['weight_decay'] * tf.reduce_sum(
    tf.stack([tf.nn.l2_loss(weight) for weight in network.weights]))
loss = base_loss + weight_loss

accuracy = tf.placeholder(tf.float32, shape=[])
precision = tf.placeholder(tf.float32, shape=[])
recall = tf.placeholder(tf.float32, shape=[])
f1_score = tf.placeholder(tf.float32, shape=[])

tf.summary.scalar('accuracy', accuracy)
tf.summary.scalar('precision', precision)
tf.summary.scalar('recall', recall)
tf.summary.scalar('f1_score', f1_score)
Exemplo n.º 32
0
def main(
    expt,
    model_name,
    device,
    gpu_id,
    optimizer,
    arch,
    num_layers,
    n_classes,
    img_size,
    batch_size,
    test_batch_size,
    subset,
    init_w,
    ckpt_g,
    n_epochs,
    lr_clfs,
    weight_decays,
    milestones,
    gamma,
):
    device = torch_device(device, gpu_id[0])
    num_clfs = len([_ for _ in n_classes if _ > 0])
    if arch == 'resnet':
        print('Using resnet')
        Net = get_resnet(num_layers)
    else:
        print('Using {}'.format(arch))
        Net = get_network(arch, num_layers)

    net_G = define_G(cfg.num_channels[expt],
                     cfg.num_channels[expt],
                     64,
                     gpu_id=device)
    clfs = [
        Net(num_channels=cfg.num_channels[expt], num_classes=_).to(device)
        for _ in n_classes if _ > 0
    ]

    if len(gpu_id) > 1:
        net_G = nn.DataParallel(net_G, device_ids=gpu_id)
        clfs = [nn.DataParallel(clf, device_ids=gpu_id) for clf in clfs]

    assert len(clfs) == num_clfs

    print("Loading weights...\n{}".format(ckpt_g))
    net_G.load_state_dict(torch.load(ckpt_g))
    if init_w:
        print("Init weights...")
        for clf in clfs:
            clf.apply(weights_init)

    scheduler = torch.optim.lr_scheduler.MultiStepLR
    if optimizer == 'sgd':
        opt_clfs = [
            torch.optim.SGD(clf.parameters(),
                            lr=lr,
                            momentum=0.9,
                            weight_decay=weight_decays[0])
            for lr, clf in zip(lr_clfs, clfs)
        ]
    elif optimizer == 'adam':
        opt_clfs = [
            torch.optim.SGD(clf.parameters(),
                            lr=lr,
                            weight_decay=weight_decays[0])
            for lr, clf in zip(lr_clfs, clfs)
        ]
    sch_clfs = [
        scheduler(optim, milestones, gamma=gamma) for optim in opt_clfs
    ]

    assert len(opt_clfs) == num_clfs

    criterionNLL = nn.CrossEntropyLoss().to(device)

    train_loader = get_loader(expt,
                              batch_size,
                              True,
                              img_size=img_size,
                              subset=subset)
    valid_loader = get_loader(expt,
                              test_batch_size,
                              False,
                              img_size=img_size,
                              subset=subset)

    template = '{}'.format(model_name)

    loss_history = defaultdict(list)
    acc_history = defaultdict(list)
    for epoch in range(n_epochs):
        logging.info(
            "Train Epoch " +
            ' '.join(["\t Clf: {}".format(_) for _ in range(num_clfs)]))

        for iteration, (image, labels) in enumerate(train_loader, 1):
            real = image.to(device)

            with torch.no_grad():
                X = net_G(real)
            ys = [_.to(device) for _ in labels]

            [opt.zero_grad() for opt in opt_clfs]
            ys_hat = [clf(X) for clf in clfs]
            loss = [criterionNLL(y_hat, y) for y_hat, y in zip(ys_hat, ys)]
            ys_hat = [_.argmax(1, keepdim=True) for _ in ys_hat]
            acc = [
                y_hat.eq(y.view_as(y_hat)).sum().item() / len(y)
                for y_hat, y in zip(ys_hat, ys)
            ]
            [l.backward() for l in loss]
            [opt.step() for opt in opt_clfs]

            iloss = [l.item() for l in loss]
            assert len(iloss) == num_clfs

            logging.info('[{}]({}/{}) '.format(
                epoch,
                iteration,
                len(train_loader),
            ) + ' '.join([
                '\t {:.4f} ({:.2f})'.format(l, a) for l, a in zip(iloss, acc)
            ]))

        loss_history['train_epoch'].append(epoch)
        acc_history['train_epoch'].append(epoch)
        for idx, (l, a) in enumerate(zip(iloss, acc)):
            loss_history['train_M_{}'.format(idx)].append(l)
            acc_history['train_M_{}'.format(idx)].append(a)

        logging.info(
            "Valid Epoch " +
            ' '.join(["\t Clf: {}".format(_) for _ in range(num_clfs)]))

        loss_m_batch = [0 for _ in range(num_clfs)]
        acc_m_batch = [0 for _ in range(num_clfs)]
        for iteration, (image, labels) in enumerate(valid_loader, 1):

            X = net_G(image.to(device))
            ys = [_.to(device) for _ in labels]

            ys_hat = [clf(X) for clf in clfs]
            loss = [criterionNLL(y_hat, y) for y_hat, y in zip(ys_hat, ys)]
            ys_hat = [_.argmax(1, keepdim=True) for _ in ys_hat]
            acc = [
                y_hat.eq(y.view_as(y_hat)).sum().item() / len(y)
                for y_hat, y in zip(ys_hat, ys)
            ]

            iloss = [l.item() for l in loss]
            for idx, (l, a) in enumerate(zip(iloss, acc)):
                loss_m_batch[idx] += l
                acc_m_batch[idx] += a

            logging.info('[{}]({}/{}) '.format(
                epoch,
                iteration,
                len(valid_loader),
            ) + ' '.join([
                '\t {:.4f} ({:.2f})'.format(l, a) for l, a in zip(iloss, acc)
            ]))

        num_samples = len(valid_loader)
        logging.info('[{}](batch) '.format(epoch, ) + ' '.join([
            '\t {:.4f} ({:.2f})'.format(l / num_samples, a / num_samples)
            for l, a in zip(loss_m_batch, acc_m_batch)
        ]))

        num_samples = len(valid_loader)
        loss_history['valid_epoch'].append(epoch)
        acc_history['valid_epoch'].append(epoch)
        for idx, (l, a) in enumerate(zip(loss_m_batch, acc_m_batch)):
            loss_history['valid_M_{}'.format(idx)].append(l / num_samples)
            acc_history['valid_M_{}'.format(idx)].append(a / num_samples)

        [sch.step() for sch in sch_clfs]

    train_loss_keys = [
        _ for _ in loss_history if 'train' in _ and 'epoch' not in _
    ]
    valid_loss_keys = [
        _ for _ in loss_history if 'valid' in _ and 'epoch' not in _
    ]
    train_acc_keys = [
        _ for _ in acc_history if 'train' in _ and 'epoch' not in _
    ]
    valid_acc_keys = [
        _ for _ in acc_history if 'valid' in _ and 'epoch' not in _
    ]

    cols = 5
    rows = len(train_loss_keys) // cols + 1
    fig = plt.figure(figsize=(7 * cols, 5 * rows))
    base = cols * 100 + rows * 10
    for idx, (tr_l, val_l) in enumerate(zip(train_loss_keys, valid_loss_keys)):
        ax = fig.add_subplot(rows, cols, idx + 1)
        ax.plot(loss_history['train_epoch'], loss_history[tr_l], 'b.:')
        ax.plot(loss_history['valid_epoch'], loss_history[val_l], 'bs-.')
        ax.set_xlabel('epochs')
        ax.set_ylabel('loss')
        ax.set_title(tr_l[6:])
        ax.grid()
        if tr_l in acc_history:
            ax2 = plt.twinx()
            ax2.plot(acc_history['train_epoch'], acc_history[tr_l], 'r.:')
            ax2.plot(acc_history['valid_epoch'], acc_history[val_l], 'rs-.')
            ax2.set_ylabel('accuracy')
    fig.subplots_adjust(wspace=0.4, hspace=0.3)
    plt_ckpt = '{}/{}/plots/{}.jpg'.format(cfg.ckpt_folder, expt, model_name)
    logging.info('Plot: {}'.format(plt_ckpt))
    plt.savefig(plt_ckpt, bbox_inches='tight', dpi=80)

    hist_ckpt = '{}/{}/history/{}.pkl'.format(cfg.ckpt_folder, expt,
                                              model_name)
    logging.info('History: {}'.format(hist_ckpt))
    pkl.dump((loss_history, acc_history), open(hist_ckpt, 'wb'))

    for idx, clf in enumerate(clfs):
        model_ckpt = '{}/{}/models/{}_clf_{}.stop'.format(
            cfg.ckpt_folder, expt, model_name, idx)
        logging.info('Model: {}'.format(model_ckpt))
        torch.save(clf.state_dict(), model_ckpt)