Exemplo n.º 1
0
def test_cifar_convnet_error(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    try_set_default_device(cntk_device(device_id))

    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms
    set_fixed_random_seed(1)
    force_deterministic_algorithms()

    reader_train1 = create_reader(os.path.join(base_path, 'train_map.txt'),
                                  os.path.join(base_path, 'CIFAR-10_mean.xml'),
                                  False)
    reader_test1 = create_reader(os.path.join(base_path, 'test_map.txt'),
                                 os.path.join(base_path, 'CIFAR-10_mean.xml'),
                                 False)
    model1 = create_convnet_cifar10_model(num_classes=10)
    train_loss1 = train_model(reader_train1,
                              reader_test1,
                              model1,
                              epoch_size=128,
                              max_epochs=1)
Exemplo n.º 2
0
def test_bn_inception_cifar(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    try_set_default_device(cntk_device(device_id))

    current_path = os.getcwd()
    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms
    set_fixed_random_seed(1)
    force_deterministic_algorithms()

    mean_data = os.path.join(base_path, 'CIFAR-10_mean.xml')
    train_data = os.path.join(base_path, 'train_map.txt')
    test_data = os.path.join(base_path, 'test_map.txt')

    try:
        error = bn_inception_train_and_eval(train_data, test_data, mean_data, minibatch_size=16, epoch_size=500,
                                    max_epochs=8, restore=False, testing_parameters=(500,16))
    finally:
        os.chdir(current_path)

    expected_error = 0.88
    assert np.allclose(error, expected_error, atol=TOLERANCE_ABSOLUTE)
def test_bn_inception_cifar(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    try_set_default_device(cntk_device(device_id))

    current_path = os.getcwd()
    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms
    set_fixed_random_seed(1)
    force_deterministic_algorithms()

    mean_data = os.path.join(base_path, 'CIFAR-10_mean.xml')
    train_data = os.path.join(base_path, 'train_map.txt')
    test_data = os.path.join(base_path, 'test_map.txt')

    try:
        error = bn_inception_train_and_eval(train_data,
                                            test_data,
                                            mean_data,
                                            minibatch_size=16,
                                            epoch_size=500,
                                            max_epochs=8,
                                            restore=False,
                                            testing_parameters=(500, 16))
    finally:
        os.chdir(current_path)

    expected_error = 0.88
    assert np.allclose(error, expected_error, atol=TOLERANCE_ABSOLUTE)
def test_inception_v3_imagenet(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    try_set_default_device(cntk_device(device_id))

    current_path = os.getcwd()
    base_path = prepare_ImageNet_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms
    set_fixed_random_seed(1)
    force_deterministic_algorithms()

    train_data = os.path.join(base_path, 'train_map.txt')
    test_data = os.path.join(base_path, 'val_map.txt')

    try:
        error = inception_v3_train_and_eval(train_data,
                                            test_data,
                                            minibatch_size=8,
                                            epoch_size=200,
                                            max_epochs=4,
                                            restore=False,
                                            testing_parameters=(200, 8))
    finally:
        os.chdir(current_path)

    expected_error = 0.99
    assert np.allclose(error, expected_error, atol=TOLERANCE_ABSOLUTE)
Exemplo n.º 5
0
def test_inception_v3_imagenet(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    try_set_default_device(cntk_device(device_id))

    current_path = os.getcwd()
    base_path = prepare_ImageNet_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms
    set_fixed_random_seed(1)
    force_deterministic_algorithms()

    train_data = os.path.join(base_path, 'train_map.txt')
    test_data = os.path.join(base_path, 'val_map.txt')

    try:
        error = inception_v3_train_and_eval(train_data, test_data, minibatch_size=8, epoch_size=200,
                                            max_epochs=4, restore=False, testing_parameters=(200, 8))
    finally:
        os.chdir(current_path)

    expected_error = 0.99
    assert np.allclose(error, expected_error, atol=TOLERANCE_ABSOLUTE)
def test_cifar_convnet_error(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    try_set_default_device(cntk_device(device_id))

    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms
    set_fixed_random_seed(1)
    force_deterministic_algorithms()

    reader_train = create_reader(os.path.join(base_path, 'train_map.txt'),
                                 os.path.join(base_path, 'CIFAR-10_mean.xml'),
                                 False)
    model = create_convnet_cifar10_model(num_classes=10)
    model.update_signature((num_channels, image_height, image_width))
    criterion = create_criterion_function(model, normalize=lambda x: x / 256)
    train_loss, metric = train_model(reader_train,
                                     model,
                                     criterion,
                                     epoch_size=128,
                                     max_epochs=5)

    expected_loss_metric = (2.2963, 0.9062)
    assert np.allclose((train_loss, metric),
                       expected_loss_metric,
                       atol=TOLERANCE_ABSOLUTE)
def prepare(cfg, use_arg_parser=True):
    cfg.MB_SIZE = 1
    cfg.NUM_CHANNELS = 3
    cfg.OUTPUT_PATH = os.path.join(cfg.DATA.MAP_FILE_PATH, "results", datetime.now().strftime("%d-%m-%Y-%H-%M"))
    # cfg["DATA"].MAP_FILE_PATH = os.path.join(abs_path, cfg["DATA"].MAP_FILE_PATH)
    running_locally = os.path.exists(cfg["DATA"].MAP_FILE_PATH)
    if not running_locally:
        # disable debug and plot outputs when running on GPU cluster
        cfg["CNTK"].DEBUG_OUTPUT = False
        cfg.VISUALIZE_RESULTS = False

    if use_arg_parser:
        parse_arguments(cfg)

    data_path = cfg["DATA"].MAP_FILE_PATH
    if not os.path.isdir(data_path):
        raise RuntimeError("Directory %s does not exist" % data_path)

    cfg["DATA"].CLASS_MAP_FILE = os.path.join(data_path, cfg["DATA"].CLASS_MAP_FILE)
    cfg["DATA"].TRAIN_MAP_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_MAP_FILE)
    cfg["DATA"].TEST_MAP_FILE = os.path.join(data_path, cfg["DATA"].TEST_MAP_FILE)
    cfg["DATA"].TRAIN_ROI_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_ROI_FILE)
    cfg["DATA"].TEST_ROI_FILE = os.path.join(data_path, cfg["DATA"].TEST_ROI_FILE)

    cfg['MODEL_PATH'] = os.path.join(cfg.OUTPUT_PATH, "faster_rcnn_eval_{}_{}_{}.model"
                                     .format(cfg["MODEL"].BASE_MODEL, "e2e" if cfg["CNTK"].TRAIN_E2E else "4stage",
                                             cfg.DATA.DATASET))
    cfg['BASE_MODEL_PATH'] = os.path.join(cfg['DATA'].MAP_FILE_PATH,
                                          "pretrained-models",
                                          cfg["MODEL"].BASE_MODEL_FILE)

    cfg["DATA"].CLASSES = parse_class_map_file(cfg["DATA"].CLASS_MAP_FILE)
    cfg["DATA"].NUM_CLASSES = len(cfg["DATA"].CLASSES)

    if cfg["CNTK"].FAST_MODE:
        cfg["CNTK"].E2E_MAX_EPOCHS = 1
        cfg["CNTK"].RPN_EPOCHS = 1
        cfg["CNTK"].FRCN_EPOCHS = 1

    if cfg["CNTK"].FORCE_DETERMINISTIC:
        force_deterministic_algorithms()
    np.random.seed(seed=cfg.RND_SEED)

    if False and cfg["CNTK"].DEBUG_OUTPUT:
        # report args
        print("Using the following parameters:")
        print("Flip image       : {}".format(cfg["TRAIN"].USE_FLIPPED))
        print("Train conv layers: {}".format(cfg.TRAIN_CONV_LAYERS))
        print("Random seed      : {}".format(cfg.RND_SEED))
        print("Momentum per MB  : {}".format(cfg["CNTK"].MOMENTUM_PER_MB))
        if cfg["CNTK"].TRAIN_E2E:
            print("E2E epochs       : {}".format(cfg["CNTK"].E2E_MAX_EPOCHS))
        else:
            print("RPN lr factor    : {}".format(cfg["CNTK"].RPN_LR_FACTOR))
            print("RPN epochs       : {}".format(cfg["CNTK"].RPN_EPOCHS))
            print("FRCN lr factor   : {}".format(cfg["CNTK"].FRCN_LR_FACTOR))
            print("FRCN epochs      : {}".format(cfg["CNTK"].FRCN_EPOCHS))
Exemplo n.º 8
0
def test_binary_convnet_error(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    try_set_default_device(cntk_device(device_id))

    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms
    set_fixed_random_seed(1)
    force_deterministic_algorithms()

    reader_train = create_reader(os.path.join(base_path, 'train_map.txt'),
                                 os.path.join(base_path, 'CIFAR-10_mean.xml'),
                                 False)
    model = create_binary_convolution_model()
    z, criterion = get_z_and_criterion(model)
    train_loss, metric = train_model(reader_train,
                                     z,
                                     criterion,
                                     epoch_size=8192,
                                     max_epochs=5)

    expected_loss_metric = (2.677057718858123, 0.6043701171875)
    assert np.allclose((train_loss, metric),
                       expected_loss_metric,
                       atol=TOLERANCE_ABSOLUTE)

    # save and load (as an illustration)
    model_path = "model.cmf"
    model.save(model_path)
    eval_device = C.cpu()
    model = Function.load(model_path, device=eval_device)

    # test
    model_with_native_binary_convolutions = clone_with_native_binary_convolutions(
        model)
    _, criterion = get_z_and_criterion(model_with_native_binary_convolutions)

    reader_test = create_reader(os.path.join(base_path, 'test_map.txt'),
                                os.path.join(base_path, 'CIFAR-10_mean.xml'),
                                False)
    test_loss, metric = evaluate(reader_test,
                                 criterion,
                                 device=eval_device,
                                 minibatch_size=1,
                                 max_samples=200)

    expected_loss_metric = (0.0, 0.695)
    assert np.allclose((test_loss, metric),
                       expected_loss_metric,
                       atol=TOLERANCE_ABSOLUTE)
Exemplo n.º 9
0
def prepare(cfg, use_arg_parser=True):
    cfg.MB_SIZE = 1
    cfg.NUM_CHANNELS = 3
    cfg.OUTPUT_PATH = os.path.join(abs_path, "Output")
    cfg["DATA"].MAP_FILE_PATH = os.path.join(abs_path, cfg["DATA"].MAP_FILE_PATH)
    running_locally = os.path.exists(cfg["DATA"].MAP_FILE_PATH)
    if running_locally:
        os.chdir(cfg["DATA"].MAP_FILE_PATH)
        if not os.path.exists(os.path.join(abs_path, "Output")):
            os.makedirs(os.path.join(abs_path, "Output"))
        if not os.path.exists(os.path.join(abs_path, "Output", cfg["DATA"].DATASET)):
            os.makedirs(os.path.join(abs_path, "Output", cfg["DATA"].DATASET))
    else:
        # disable debug and plot outputs when running on GPU cluster
        cfg["CNTK"].DEBUG_OUTPUT = False
        cfg.VISUALIZE_RESULTS = False

    if use_arg_parser:
        parse_arguments(cfg)

    data_path = cfg["DATA"].MAP_FILE_PATH
    if not os.path.isdir(data_path):
        raise RuntimeError("Directory %s does not exist" % data_path)

    cfg["DATA"].CLASS_MAP_FILE = os.path.join(data_path, cfg["DATA"].CLASS_MAP_FILE)
    cfg["DATA"].TRAIN_MAP_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_MAP_FILE)
    cfg["DATA"].TEST_MAP_FILE = os.path.join(data_path, cfg["DATA"].TEST_MAP_FILE)
    cfg["DATA"].TRAIN_ROI_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_ROI_FILE)
    cfg["DATA"].TEST_ROI_FILE = os.path.join(data_path, cfg["DATA"].TEST_ROI_FILE)
    if cfg.USE_PRECOMPUTED_PROPOSALS:
        try:
            cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE = os.path.join(data_path,
                                                                        cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE)
        except:
            print("To use precomputed proposals please specify the following parameters in your configuration:\n"
                  "__C.DATA.TRAIN_PRECOMPUTED_PROPOSALS_FILE\n"
                  "__C.DATA.TEST_PRECOMPUTED_PROPOSALS_FILE")
            exit(-1)

    cfg['MODEL_PATH'] = os.path.join(cfg.OUTPUT_PATH, "fast_rcnn_eval_{}.model".format(cfg["MODEL"].BASE_MODEL))
    cfg['BASE_MODEL_PATH'] = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..", "..",
                                          "PretrainedModels",
                                          cfg["MODEL"].BASE_MODEL_FILE)

    cfg["DATA"].CLASSES = parse_class_map_file(cfg["DATA"].CLASS_MAP_FILE)
    cfg["DATA"].NUM_CLASSES = len(cfg["DATA"].CLASSES)

    if cfg["CNTK"].FAST_MODE:
        cfg["CNTK"].MAX_EPOCHS = 1

    if cfg["CNTK"].FORCE_DETERMINISTIC:
        force_deterministic_algorithms()
    np.random.seed(seed=cfg.RND_SEED)
Exemplo n.º 10
0
def prepare(cfg, use_arg_parser=True):
    cfg.MB_SIZE = 1
    cfg.NUM_CHANNELS = 3
    cfg.OUTPUT_PATH = os.path.join(abs_path, "Output")
    cfg["DATA"].MAP_FILE_PATH = os.path.join(abs_path, cfg["DATA"].MAP_FILE_PATH)
    running_locally = os.path.exists(cfg["DATA"].MAP_FILE_PATH)
    if running_locally:
        os.chdir(cfg["DATA"].MAP_FILE_PATH)
        if not os.path.exists(os.path.join(abs_path, "Output")):
            os.makedirs(os.path.join(abs_path, "Output"))
        if not os.path.exists(os.path.join(abs_path, "Output", cfg["DATA"].DATASET)):
            os.makedirs(os.path.join(abs_path, "Output", cfg["DATA"].DATASET))
    else:
        # disable debug and plot outputs when running on GPU cluster
        cfg["CNTK"].DEBUG_OUTPUT = False
        cfg.VISUALIZE_RESULTS = False

    if use_arg_parser:
        parse_arguments(cfg)

    data_path = cfg["DATA"].MAP_FILE_PATH
    if not os.path.isdir(data_path):
        raise RuntimeError("Directory %s does not exist" % data_path)

    cfg["DATA"].CLASS_MAP_FILE = os.path.join(data_path, cfg["DATA"].CLASS_MAP_FILE)
    cfg["DATA"].TRAIN_MAP_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_MAP_FILE)
    cfg["DATA"].TEST_MAP_FILE = os.path.join(data_path, cfg["DATA"].TEST_MAP_FILE)
    cfg["DATA"].TRAIN_ROI_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_ROI_FILE)
    cfg["DATA"].TEST_ROI_FILE = os.path.join(data_path, cfg["DATA"].TEST_ROI_FILE)
    if cfg.USE_PRECOMPUTED_PROPOSALS:
        try:
            cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE)
        except:
            print("To use precomputed proposals please specify the following parameters in your configuration:\n"
                  "__C.DATA.TRAIN_PRECOMPUTED_PROPOSALS_FILE\n"
                  "__C.DATA.TEST_PRECOMPUTED_PROPOSALS_FILE")
            exit(-1)

    cfg['MODEL_PATH'] = os.path.join(cfg.OUTPUT_PATH, "fast_rcnn_eval_{}.model".format(cfg["MODEL"].BASE_MODEL))
    cfg['BASE_MODEL_PATH'] = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..", "..", "PretrainedModels",
                                          cfg["MODEL"].BASE_MODEL_FILE)

    cfg["DATA"].CLASSES = parse_class_map_file(cfg["DATA"].CLASS_MAP_FILE)
    cfg["DATA"].NUM_CLASSES = len(cfg["DATA"].CLASSES)

    if cfg["CNTK"].FAST_MODE:
        cfg["CNTK"].MAX_EPOCHS = 1

    if cfg["CNTK"].FORCE_DETERMINISTIC:
        force_deterministic_algorithms()
    np.random.seed(seed=cfg.RND_SEED)
Exemplo n.º 11
0
def force_deterministic(seed):
    ''' 
    Force most of the computation nodes to run deterministically.
    
    Args:
        seed (int): set the random seed for all random ops in the graph and readers.  
    '''
    from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms
    import warnings

    warnings.warn("RNN based nodes don't run deterministically yet.", Warning)

    set_fixed_random_seed(seed)
    force_deterministic_algorithms()
Exemplo n.º 12
0
def force_deterministic(seed):
    ''' 
    Force most of the computation nodes to run deterministically.
    
    Args:
        seed (int): set the random seed for all random ops in the graph and readers.  
    '''
    from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms
    import warnings
    
    warnings.warn("pooling nodes and RNN based nodes don't run deterministically yet.", Warning)

    set_fixed_random_seed(seed)
    force_deterministic_algorithms()
def test_binary_convnet_error(device_id):

    if not native_convolve_function_registered:
      pytest.skip("Could not find {0} library. "
        "Please check if HALIDE_PATH is configured properly "
        "and try building {1} again"
        .format('Cntk.BinaryConvolution-' + C.__version__.rstrip('+'),
        'Extnsibiliy\\BinaryConvolution'))
     
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    try_set_default_device(cntk_device(device_id))

    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms
    set_fixed_random_seed(1)
    force_deterministic_algorithms()

    reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False)
    model = create_binary_convolution_model()
    z, criterion = get_z_and_criterion(model)
    train_loss, metric = train_model(reader_train, z, criterion, epoch_size=8192, max_epochs=5)

    expected_loss_metric = (2.677057718858123, 0.6043701171875)
    assert np.allclose((train_loss, metric), expected_loss_metric, atol=TOLERANCE_ABSOLUTE)

    # save and load (as an illustration)
    model_path = "model.cmf"
    model.save(model_path)
    eval_device = C.cpu()
    model = Function.load(model_path, device=eval_device)

    # test
    model_with_native_binary_convolutions = clone_with_native_binary_convolutions(model)
    _, criterion = get_z_and_criterion(model_with_native_binary_convolutions)

    reader_test = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False)
    test_loss, metric = evaluate(reader_test, criterion, device=eval_device, minibatch_size=1, max_samples=200)

    expected_loss_metric = (0.0, 0.695)
    assert np.allclose((test_loss, metric), expected_loss_metric, atol=TOLERANCE_ABSOLUTE)
Exemplo n.º 14
0
def test_cifar_convnet_error(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    try_set_default_device(cntk_device(device_id))

    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms
    set_fixed_random_seed(1)
    force_deterministic_algorithms()

    reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False)
    model = create_convnet_cifar10_model(num_classes=10)
    model.update_signature((num_channels, image_height, image_width))
    criterion = create_criterion_function(model, normalize=lambda x: x / 256)
    train_loss, metric = train_model(reader_train, model, criterion, epoch_size=128, max_epochs=5)

    expected_loss_metric = (2.2963, 0.9062)
    assert np.allclose((train_loss, metric), expected_loss_metric, atol=TOLERANCE_ABSOLUTE)
Exemplo n.º 15
0
def test_binary_convnet_error(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    try_set_default_device(cntk_device(device_id))

    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms
    set_fixed_random_seed(1)
    force_deterministic_algorithms()

    reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False)
    model = create_binary_convolution_model()
    z, criterion = get_z_and_criterion(model)
    train_loss, metric = train_model(reader_train, z, criterion, epoch_size=8192, max_epochs=5)

    expected_loss_metric = (2.677057718858123, 0.6043701171875)
    assert np.allclose((train_loss, metric), expected_loss_metric, atol=TOLERANCE_ABSOLUTE)

    # save and load (as an illustration)
    model_path = "model.cmf"
    model.save(model_path)
    eval_device = C.cpu()
    model = Function.load(model_path, device=eval_device)

    # test
    model_with_native_binary_convolutions = clone_with_native_binary_convolutions(model)
    _, criterion = get_z_and_criterion(model_with_native_binary_convolutions)

    reader_test = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False)
    test_loss, metric = evaluate(reader_test, criterion, device=eval_device, minibatch_size=1, max_samples=200)

    expected_loss_metric = (0.0, 0.695)
    assert np.allclose((test_loss, metric), expected_loss_metric, atol=TOLERANCE_ABSOLUTE)
Exemplo n.º 16
0
def prepare(cfg, use_arg_parser=True):
    cfg.MB_SIZE = 1
    cfg.NUM_CHANNELS = 3
    cfg.OUTPUT_PATH = os.path.join(abs_path, "Output")
    cfg["DATA"].MAP_FILE_PATH = os.path.join(abs_path, cfg["DATA"].MAP_FILE_PATH)
    running_locally = os.path.exists(cfg["DATA"].MAP_FILE_PATH)
    if running_locally:
        os.chdir(cfg["DATA"].MAP_FILE_PATH)
        if not os.path.exists(os.path.join(abs_path, "Output")):
            os.makedirs(os.path.join(abs_path, "Output"))
        if not os.path.exists(os.path.join(abs_path, "Output", cfg["DATA"].DATASET)):
            os.makedirs(os.path.join(abs_path, "Output", cfg["DATA"].DATASET))
    else:
        # disable debug and plot outputs when running on GPU cluster
        cfg["CNTK"].DEBUG_OUTPUT = False
        cfg.VISUALIZE_RESULTS = False

    if use_arg_parser:
        parse_arguments(cfg)

    data_path = cfg["DATA"].MAP_FILE_PATH
    if not os.path.isdir(data_path):
        raise RuntimeError("Directory %s does not exist" % data_path)

    cfg["DATA"].CLASS_MAP_FILE = os.path.join(data_path, cfg["DATA"].CLASS_MAP_FILE)
    cfg["DATA"].TRAIN_MAP_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_MAP_FILE)
    cfg["DATA"].TEST_MAP_FILE = os.path.join(data_path, cfg["DATA"].TEST_MAP_FILE)
    cfg["DATA"].TRAIN_ROI_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_ROI_FILE)
    cfg["DATA"].TEST_ROI_FILE = os.path.join(data_path, cfg["DATA"].TEST_ROI_FILE)

    cfg['MODEL_PATH'] = os.path.join(cfg.OUTPUT_PATH, "faster_rcnn_eval_{}_{}.model"
                                     .format(cfg["MODEL"].BASE_MODEL, "e2e" if cfg["CNTK"].TRAIN_E2E else "4stage"))
    cfg['BASE_MODEL_PATH'] = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..", "..", "PretrainedModels",
                                          cfg["MODEL"].BASE_MODEL_FILE)

    cfg["DATA"].CLASSES = parse_class_map_file(cfg["DATA"].CLASS_MAP_FILE)
    cfg["DATA"].NUM_CLASSES = len(cfg["DATA"].CLASSES)

    if cfg["CNTK"].FAST_MODE:
        cfg["CNTK"].E2E_MAX_EPOCHS = 1
        cfg["CNTK"].RPN_EPOCHS = 1
        cfg["CNTK"].FRCN_EPOCHS = 1

    if cfg["CNTK"].FORCE_DETERMINISTIC:
        force_deterministic_algorithms()
    np.random.seed(seed=cfg.RND_SEED)

    if False and cfg["CNTK"].DEBUG_OUTPUT:
        # report args
        print("Using the following parameters:")
        print("Flip image       : {}".format(cfg["TRAIN"].USE_FLIPPED))
        print("Train conv layers: {}".format(cfg.TRAIN_CONV_LAYERS))
        print("Random seed      : {}".format(cfg.RND_SEED))
        print("Momentum per MB  : {}".format(cfg["CNTK"].MOMENTUM_PER_MB))
        if cfg["CNTK"].TRAIN_E2E:
            print("E2E epochs       : {}".format(cfg["CNTK"].E2E_MAX_EPOCHS))
        else:
            print("RPN lr factor    : {}".format(cfg["CNTK"].RPN_LR_FACTOR))
            print("RPN epochs       : {}".format(cfg["CNTK"].RPN_EPOCHS))
            print("FRCN lr factor   : {}".format(cfg["CNTK"].FRCN_LR_FACTOR))
            print("FRCN epochs      : {}".format(cfg["CNTK"].FRCN_EPOCHS))
Exemplo n.º 17
0
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================

import numpy as np
import os
import pytest
import sys
from cntk import load_model
from cntk.cntk_py import DeviceKind_GPU
from cntk.device import try_set_default_device, gpu
from cntk.logging.graph import get_node_outputs
from cntk.ops.tests.ops_test_utils import cntk_device
from _cntk_py import force_deterministic_algorithms
force_deterministic_algorithms()

abs_path = os.path.dirname(os.path.abspath(__file__))
sys.path.append(abs_path)
sys.path.append(
    os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image",
                 "Detection", "FasterRCNN"))

from prepare_test_data import prepare_Grocery_data, prepare_alexnet_v0_model
grocery_path = prepare_Grocery_data()
prepare_alexnet_v0_model()

from install_data_and_model import create_grocery_mappings
create_grocery_mappings(grocery_path)

win35_linux34 = pytest.mark.skipif(
def test_transfer_learning(device_id):
    set_fixed_random_seed(1)
    force_deterministic_algorithms()

    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU') # due to batch normalization in ResNet_18
    try_set_default_device(cntk_device(device_id))

    base_path = os.path.dirname(os.path.abspath(__file__))
    animals_path = os.path.join(base_path, *"../../../../Examples/Image/DataSets/Animals".split("/"))
    externalData = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ
    if externalData:
        extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY']
        model_file = os.path.join(extPath, *"PreTrainedModels/ResNet/v1/ResNet_18.model".split("/"))

        if not os.path.isfile(os.path.join(animals_path, 'Test', 'Weaver_bird.jpg')):
            # copy data from external test data location and unzip
            os.chdir(os.path.join(base_path, '..', '..', '..'))
            prepare_animals_data()
            os.chdir(base_path)
            zip_path = os.path.join(animals_path, 'Animals.zip')
            with zipfile.ZipFile(zip_path) as myzip:
                myzip.extractall(os.path.join(animals_path, '..'))

    else:
        model_file = os.path.join(base_path, *"../../../../Examples/Image/PretrainedModels/ResNet_18.model".split("/"))

    train_image_folder = os.path.join(animals_path, "Train")
    test_image_folder = os.path.join(animals_path, "Test")
    output_file = os.path.join(base_path, "tl_extended_output.txt")

    train_and_eval(model_file, train_image_folder, test_image_folder, output_file, None, testing=True)

    expected_output_file = os.path.join(base_path, "tl_extended_expected_output.txt")

    with open(output_file) as output_json:
        output_lines = output_json.readlines()
    with open(expected_output_file) as expected_output_json:
        expected_output_lines = expected_output_json.readlines()

    # handling different ordering of files
    out_dict = {}
    exp_dict = {}
    for i in range(len(output_lines)):
        output = json.loads(output_lines[i])[0]
        expected_output = json.loads(expected_output_lines[i])[0]

        out_dict[output["image"]] = output
        exp_dict[expected_output["image"]] = expected_output

    # debug output
    for k in out_dict:
        output = out_dict[k]
        expected_output = exp_dict[k]

        print("output: {}".format(output))
        print("expect: {}".format(expected_output))

    for k in out_dict:
        output = out_dict[k]
        expected_output = exp_dict[k]

        assert np.allclose(output["predictions"]["Sheep"], expected_output["predictions"]["Sheep"], atol=TOLERANCE_ABSOLUTE)
        assert np.allclose(output["predictions"]["Wolf"], expected_output["predictions"]["Wolf"], atol=TOLERANCE_ABSOLUTE)
def set_global_vars(use_arg_parser=True):
    data_path = map_file_path

    # set and overwrite learning parameters
    globalvars['rpn_lr_factor'] = cfg["CNTK"].RPN_LR_FACTOR
    globalvars['frcn_lr_factor'] = cfg["CNTK"].FRCN_LR_FACTOR
    globalvars['e2e_lr_factor'] = cfg["CNTK"].E2E_LR_FACTOR
    globalvars['momentum_per_mb'] = cfg["CNTK"].MOMENTUM_PER_MB
    globalvars['e2e_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg[
        "CNTK"].E2E_MAX_EPOCHS
    globalvars[
        'rpn_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].RPN_EPOCHS
    globalvars['frcn_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg[
        "CNTK"].FRCN_EPOCHS
    globalvars['rnd_seed'] = cfg.RNG_SEED
    globalvars['train_conv'] = cfg["CNTK"].TRAIN_CONV_LAYERS
    globalvars['train_e2e'] = cfg["CNTK"].TRAIN_E2E

    globalvars['fea_map_dim'] = cfg["CNTK"].FEA_MAP_DIM

    if use_arg_parser:
        parser = argparse.ArgumentParser()
        parser.add_argument(
            '-datadir',
            '--datadir',
            help='Data directory where the ImageNet dataset is located',
            required=False,
            default=data_path)
        parser.add_argument('-outputdir',
                            '--outputdir',
                            help='Output directory for checkpoints and models',
                            required=False,
                            default=None)
        parser.add_argument('-logdir',
                            '--logdir',
                            help='Log file',
                            required=False,
                            default=None)
        parser.add_argument('-n',
                            '--num_epochs',
                            help='Total number of epochs to train',
                            type=int,
                            required=False,
                            default=cfg["CNTK"].E2E_MAX_EPOCHS)
        parser.add_argument('-m',
                            '--minibatch_size',
                            help='Minibatch size',
                            type=int,
                            required=False,
                            default=mb_size)
        parser.add_argument('-e',
                            '--epoch_size',
                            help='Epoch size',
                            type=int,
                            required=False,
                            default=epoch_size)
        parser.add_argument(
            '-q',
            '--quantized_bits',
            help='Number of quantized bits used for gradient aggregation',
            type=int,
            required=False,
            default='32')
        parser.add_argument(
            '-r',
            '--restart',
            help=
            'Indicating whether to restart from scratch (instead of restart from checkpoint file by default)',
            action='store_true')
        parser.add_argument(
            '-device',
            '--device',
            type=int,
            help="Force to run the script on a specified device",
            required=False,
            default=None)
        parser.add_argument('-rpnLrFactor',
                            '--rpnLrFactor',
                            type=float,
                            help="Scale factor for rpn lr schedule",
                            required=False)
        parser.add_argument('-frcnLrFactor',
                            '--frcnLrFactor',
                            type=float,
                            help="Scale factor for frcn lr schedule",
                            required=False)
        parser.add_argument('-e2eLrFactor',
                            '--e2eLrFactor',
                            type=float,
                            help="Scale factor for e2e lr schedule",
                            required=False)
        parser.add_argument('-momentumPerMb',
                            '--momentumPerMb',
                            type=float,
                            help="momentum per minibatch",
                            required=False)
        parser.add_argument('-e2eEpochs',
                            '--e2eEpochs',
                            type=int,
                            help="number of epochs for e2e training",
                            required=False)
        parser.add_argument('-rpnEpochs',
                            '--rpnEpochs',
                            type=int,
                            help="number of epochs for rpn training",
                            required=False)
        parser.add_argument('-frcnEpochs',
                            '--frcnEpochs',
                            type=int,
                            help="number of epochs for frcn training",
                            required=False)
        parser.add_argument('-rndSeed',
                            '--rndSeed',
                            type=int,
                            help="the random seed",
                            required=False)
        parser.add_argument('-trainConv',
                            '--trainConv',
                            type=int,
                            help="whether to train conv layers",
                            required=False)
        parser.add_argument('-trainE2E',
                            '--trainE2E',
                            type=int,
                            help="whether to train e2e (otherwise 4 stage)",
                            required=False)

        args = vars(parser.parse_args())

        if args['rpnLrFactor'] is not None:
            globalvars['rpn_lr_factor'] = args['rpnLrFactor']
        if args['frcnLrFactor'] is not None:
            globalvars['frcn_lr_factor'] = args['frcnLrFactor']
        if args['e2eLrFactor'] is not None:
            globalvars['e2e_lr_factor'] = args['e2eLrFactor']
        if args['momentumPerMb'] is not None:
            globalvars['momentum_per_mb'] = args['momentumPerMb']
        if args['e2eEpochs'] is not None:
            globalvars['e2e_epochs'] = args['e2eEpochs']
        if args['rpnEpochs'] is not None:
            globalvars['rpn_epochs'] = args['rpnEpochs']
        if args['frcnEpochs'] is not None:
            globalvars['frcn_epochs'] = args['frcnEpochs']
        if args['rndSeed'] is not None:
            globalvars['rnd_seed'] = args['rndSeed']
        if args['trainConv'] is not None:
            globalvars[
                'train_conv'] = True if args['trainConv'] == 1 else False
        if args['trainE2E'] is not None:
            globalvars['train_e2e'] = True if args['trainE2E'] == 1 else False

        if args['outputdir'] is not None:
            globalvars['output_path'] = args['outputdir']
        if args['logdir'] is not None:
            log_dir = args['logdir']
        if args['device'] is not None:
            # Setting one worker on GPU and one worker on CPU. Otherwise memory consumption is too high for a single GPU.
            if Communicator.rank() == 0:
                cntk.device.try_set_default_device(
                    cntk.device.gpu(args['device']))
            else:
                cntk.device.try_set_default_device(cntk.device.cpu())

        if args['datadir'] is not None:
            data_path = args['datadir']

    if not os.path.isdir(data_path):
        raise RuntimeError("Directory %s does not exist" % data_path)

    globalvars['class_map_file'] = os.path.join(data_path,
                                                globalvars['class_map_file'])
    globalvars['train_map_file'] = os.path.join(data_path,
                                                globalvars['train_map_file'])
    globalvars['test_map_file'] = os.path.join(data_path,
                                               globalvars['test_map_file'])
    globalvars['train_roi_file'] = os.path.join(data_path,
                                                globalvars['train_roi_file'])
    globalvars['test_roi_file'] = os.path.join(data_path,
                                               globalvars['test_roi_file'])

    if cfg["CNTK"].FORCE_DETERMINISTIC:
        force_deterministic_algorithms()
    np.random.seed(seed=globalvars['rnd_seed'])
    globalvars['classes'] = parse_class_map_file(globalvars['class_map_file'])
    globalvars['num_classes'] = len(globalvars['classes'])

    if cfg["CNTK"].DEBUG_OUTPUT:
        # report args
        print("Using the following parameters:")
        print("Flip image       : {}".format(cfg["TRAIN"].USE_FLIPPED))
        print("Train conv layers: {}".format(globalvars['train_conv']))
        print("Random seed      : {}".format(globalvars['rnd_seed']))
        print("Momentum per MB  : {}".format(globalvars['momentum_per_mb']))
        if globalvars['train_e2e']:
            print("E2E epochs       : {}".format(globalvars['e2e_epochs']))
        else:
            print("RPN lr factor    : {}".format(globalvars['rpn_lr_factor']))
            print("RPN epochs       : {}".format(globalvars['rpn_epochs']))
            print("FRCN lr factor   : {}".format(globalvars['frcn_lr_factor']))
            print("FRCN epochs      : {}".format(globalvars['frcn_epochs']))
Exemplo n.º 20
0
            t += trainer.previous_minibatch_sample_count                    # count samples processed so far
            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
            #def trace_node(name):
            #    nl = [n for n in z.parameters if n.name() == name]
            #    if len(nl) > 0:
            #        print (name, np.asarray(nl[0].value))
            #trace_node('W')
            #trace_node('stabilizer_param')
        loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True)

    return loss, metric

#############################
# main function boilerplate #
#############################

if __name__=='__main__':
    # TODO: leave these in for now as debugging aids; remove for beta
    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
    #set_computation_network_trace_level(1)  # TODO: remove debugging facilities once this all works
    set_fixed_random_seed(1)  # BUGBUG: has no effect at present  # TODO: remove debugging facilities once this all works
    force_deterministic_algorithms()

    reader = create_reader(data_dir + "/atis.train.ctf")
    model = create_model()
    # train
    train(reader, model, max_epochs=8)
    # test (TODO)
    reader = create_reader(data_dir + "/atis.test.ctf")
    #test(reader, model_dir + "/slu.cmf")  # TODO: what is the correct pattern here?
Exemplo n.º 21
0
def test_language_understanding(device_id):
    from cntk.ops.tests.ops_test_utils import cntk_device
    DeviceDescriptor.try_set_default_device(cntk_device(device_id))

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
    #set_computation_network_trace_level(1)
    set_fixed_random_seed(
        1
    )  # to become invariant to initialization order, which is a valid change
    # BUGBUG: This ^^ currently seems to have no impact; the two BN models below should be identical in training
    force_deterministic_algorithms()

    if device_id >= 0:  # BatchNormalization currently does not run on CPU
        # change to intent classifier   --moved up here since this fails, as repro
        # BUGBUG: Broken, need to pass new criterion to train().
        #with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
        #    select_last = slice(placeholder(), Axis.default_dynamic_axis(), -1, 0)
        #    # BUGBUG: Fails with "RuntimeError: The specified dynamic axis named defaultDynamicAxis does not match any of the dynamic axes of the operand"
        #    run_model_test('change to intent classifier', Sequential([
        #        Embedding(emb_dim),
        #        with_lookahead(),
        #        BatchNormalization(),
        #        BiRecurrence(LSTM(hidden_dim)),
        #        BatchNormalization(),
        #        select_last,  # fails here with an axis problem
        #        Dense(num_labels)
        #    ]), [0.084, 0.407364])

        # replace lookahead by bidirectional model
        with default_options(
                initial_state=0.1
        ):  # inject an option to mimic the BS version identically; remove some day
            run_model_test(
                'replace lookahead by bidirectional model',
                Sequential([
                    Embedding(emb_dim),
                    BatchNormalization(),
                    BiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim)),
                    BatchNormalization(),
                    Dense(num_labels)
                ]), [0.0579573500457558, 0.3214986774820327])

        # replace lookahead by bidirectional model
        with default_options(
                initial_state=0.1
        ):  # inject an option to mimic the BS version identically; remove some day
            #with default_options(dtype=np.float64):  # test this with double precision since single precision is too little for reproducable aggregation
            # ^^ This test requires to change the #if 1 in Functions.cpp PopulateNetworkInputs() to be changed to #if 0.
            run_model_test(
                'replace lookahead by bidirectional model, with shared BN',
                Sequential([
                    Embedding(emb_dim),
                    BNBiRecurrence(LSTM(hidden_dim),
                                   LSTM(hidden_dim),
                                   test_dual=True),
                    #BNBiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim), test_dual=False),
                    BatchNormalization(normalization_time_constant=-1),
                    Dense(num_labels)
                ]),
                [0.0579573500457558, 0.3214986774820327])
            # values with normalization_time_constant=-1 and double precision:
            # [0.0583178503091983, 0.3199431143304898]
            """ with normalization_time_constant=-1:
             Minibatch[   1-   1]: loss = 5.945220 * 67, metric = 100.0% * 67
             Minibatch[   2-   2]: loss = 4.850601 * 63, metric = 79.4% * 63
             Minibatch[   3-   3]: loss = 3.816031 * 68, metric = 57.4% * 68
             Minibatch[   4-   4]: loss = 2.213172 * 70, metric = 41.4% * 70
             Minibatch[   5-   5]: loss = 2.615342 * 65, metric = 40.0% * 65
             Minibatch[   6-   6]: loss = 2.360896 * 62, metric = 25.8% * 62
             Minibatch[   7-   7]: loss = 1.452822 * 58, metric = 27.6% * 58
             Minibatch[   8-   8]: loss = 0.947210 * 70, metric = 10.0% * 70
             Minibatch[   9-   9]: loss = 0.595654 * 59, metric = 10.2% * 59
             Minibatch[  10-  10]: loss = 1.515479 * 64, metric = 23.4% * 64
             Minibatch[  11- 100]: loss = 0.686744 * 5654, metric = 10.4% * 5654
             Minibatch[ 101- 200]: loss = 0.289059 * 6329, metric = 5.8% * 6329
             Minibatch[ 201- 300]: loss = 0.218765 * 6259, metric = 4.7% * 6259
             Minibatch[ 301- 400]: loss = 0.182855 * 6229, metric = 3.5% * 6229
             Minibatch[ 401- 500]: loss = 0.156745 * 6289, metric = 3.4% * 6289
            Finished Epoch [1]: [Training] loss = 0.321413 * 36061, metric = 5.8% * 36061
            --> 0.057818696098277916 0.3214128415043278
             Minibatch[   1-   1]: loss = 0.000000 * 991, metric = 2.5% * 991
             Minibatch[   2-   2]: loss = 0.000000 * 1000, metric = 2.8% * 1000
             Minibatch[   3-   3]: loss = 0.000000 * 992, metric = 4.0% * 992
             Minibatch[   4-   4]: loss = 0.000000 * 989, metric = 3.0% * 989
             Minibatch[   5-   5]: loss = 0.000000 * 998, metric = 3.8% * 998
             Minibatch[   6-   6]: loss = 0.000000 * 995, metric = 1.5% * 995
             Minibatch[   7-   7]: loss = 0.000000 * 998, metric = 2.5% * 998
             Minibatch[   8-   8]: loss = 0.000000 * 992, metric = 1.6% * 992
             Minibatch[   9-   9]: loss = 0.000000 * 1000, metric = 1.6% * 1000
             Minibatch[  10-  10]: loss = 0.000000 * 996, metric = 7.9% * 996
            Finished Epoch [1]: [Evaluation] loss = 0.000000 * 10984, metric = 3.2% * 10984
            --> 0.03159140568099053 0.0
            """

        # BatchNorm test case for global-corpus aggregation
        with default_options(
                initial_state=0.1
        ):  # inject an option to mimic the BS version identically; remove some day
            run_model_test(
                'BatchNorm global-corpus aggregation',
                Sequential([
                    Embedding(emb_dim),
                    BatchNormalization(normalization_time_constant=-1),
                    Recurrence(LSTM(hidden_dim), go_backwards=False),
                    BatchNormalization(normalization_time_constant=-1),
                    Dense(num_labels)
                ]), [0.05662627214996811, 0.2968516879905391])
            """
             Minibatch[   1-   1]: loss = 5.745576 * 67, metric = 100.0% * 67
             Minibatch[   2-   2]: loss = 4.684151 * 63, metric = 90.5% * 63
             Minibatch[   3-   3]: loss = 3.957423 * 68, metric = 63.2% * 68
             Minibatch[   4-   4]: loss = 2.286908 * 70, metric = 41.4% * 70
             Minibatch[   5-   5]: loss = 2.733978 * 65, metric = 38.5% * 65
             Minibatch[   6-   6]: loss = 2.189765 * 62, metric = 30.6% * 62
             Minibatch[   7-   7]: loss = 1.427890 * 58, metric = 25.9% * 58
             Minibatch[   8-   8]: loss = 1.501557 * 70, metric = 18.6% * 70
             Minibatch[   9-   9]: loss = 0.632599 * 59, metric = 13.6% * 59
             Minibatch[  10-  10]: loss = 1.516047 * 64, metric = 23.4% * 64
             Minibatch[  11- 100]: loss = 0.580329 * 5654, metric = 9.8% * 5654
             Minibatch[ 101- 200]: loss = 0.280317 * 6329, metric = 5.6% * 6329
             Minibatch[ 201- 300]: loss = 0.188372 * 6259, metric = 4.1% * 6259
             Minibatch[ 301- 400]: loss = 0.170403 * 6229, metric = 3.9% * 6229
             Minibatch[ 401- 500]: loss = 0.159605 * 6289, metric = 3.4% * 6289
            Finished Epoch [1]: [Training] loss = 0.296852 * 36061, metric = 5.7% * 36061
            --> 0.05662627214996811 0.2968516879905391
             Minibatch[   1-   1]: loss = 0.000000 * 991, metric = 1.8% * 991
             Minibatch[   2-   2]: loss = 0.000000 * 1000, metric = 3.4% * 1000
             Minibatch[   3-   3]: loss = 0.000000 * 992, metric = 3.9% * 992
             Minibatch[   4-   4]: loss = 0.000000 * 989, metric = 4.1% * 989
             Minibatch[   5-   5]: loss = 0.000000 * 998, metric = 4.0% * 998
             Minibatch[   6-   6]: loss = 0.000000 * 995, metric = 1.2% * 995
             Minibatch[   7-   7]: loss = 0.000000 * 998, metric = 2.8% * 998
             Minibatch[   8-   8]: loss = 0.000000 * 992, metric = 2.9% * 992
             Minibatch[   9-   9]: loss = 0.000000 * 1000, metric = 2.0% * 1000
             Minibatch[  10-  10]: loss = 0.000000 * 996, metric = 8.2% * 996
            Finished Epoch [1]: [Evaluation] loss = 0.000000 * 10984, metric = 3.5% * 10984
            --> 0.035050983248361256 0.0
            """

        # plus BatchNorm
        with default_options(
                initial_state=0.1
        ):  # inject an option to mimic the BS version identically; remove some day
            run_model_test(
                'plus BatchNorm',
                Sequential([
                    Embedding(emb_dim),
                    BatchNormalization(),
                    Recurrence(LSTM(hidden_dim), go_backwards=False),
                    BatchNormalization(),
                    Dense(num_labels)
                ]), [0.05662627214996811, 0.2968516879905391])

        # plus lookahead
        with default_options(
                initial_state=0.1
        ):  # inject an option to mimic the BS version identically; remove some day
            run_model_test(
                'plus lookahead',
                Sequential([
                    Embedding(emb_dim),
                    with_lookahead(),
                    BatchNormalization(),
                    Recurrence(LSTM(hidden_dim), go_backwards=False),
                    BatchNormalization(),
                    Dense(num_labels)
                ]), [0.057901888466764646, 0.3044637752807047])

        # replace lookahead by bidirectional model
        with default_options(
                initial_state=0.1
        ):  # inject an option to mimic the BS version identically; remove some day
            run_model_test(
                'replace lookahead by bidirectional model',
                Sequential([
                    Embedding(emb_dim),
                    BatchNormalization(),
                    BiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim)),
                    BatchNormalization(),
                    Dense(num_labels)
                ]), [0.0579573500457558, 0.3214986774820327])

        # test of a config like in the example but with additions to test many code paths
        with default_options(enable_self_stabilization=True,
                             use_peepholes=True):
            run_model_test(
                'alternate paths',
                Sequential([
                    Embedding(emb_dim),
                    BatchNormalization(),
                    Recurrence(LSTM(hidden_dim, cell_shape=hidden_dim + 50),
                               go_backwards=True),
                    BatchNormalization(map_rank=1),
                    Dense(num_labels)
                ]), [0.08574360112032389, 0.41847621578367716])

    # test of the example itself
    # this emulates the main code in the PY file
    if device_id >= 0:  # sparse FSAdagrad currently does not run on CPU  --TODO: fix this test once it does
        reader = create_reader(data_dir + "/atis.train.ctf", is_training=True)
        model = create_model_function()
        loss_avg, evaluation_avg = train(reader, model, max_epochs=1)
        expected_avg = [0.09698114255561419, 0.5290531086061565]
        assert np.allclose([evaluation_avg, loss_avg],
                           expected_avg,
                           atol=TOLERANCE_ABSOLUTE)

        # test
        reader = create_reader(data_dir + "/atis.test.ctf", is_training=False)
        evaluate(reader, model)

    # test of a config like in the example but with additions to test many code paths
    if device_id >= 0:  # BatchNormalization currently does not run on CPU
        # Create a path to TensorBoard log directory and make sure it does not exist.
        abs_path = os.path.dirname(os.path.abspath(__file__))
        tb_logdir = os.path.join(abs_path, 'language_understanding_test_log')
        if os.path.exists(tb_logdir):
            shutil.rmtree(tb_logdir)

        reader = create_reader(data_dir + "/atis.train.ctf", is_training=True)
        model = create_test_model()
        # TODO: update example to support tensorboard, or decide to not show it in all examples (in upcoming update of examples)
        loss_avg, evaluation_avg = train(
            reader, model, max_epochs=1)  #, tensorboard_logdir=tb_logdir)
        log_number_of_parameters(model, trace_level=1)
        print()
        expected_avg = [0.084, 0.407364]
        assert np.allclose([evaluation_avg, loss_avg],
                           expected_avg,
                           atol=TOLERANCE_ABSOLUTE)
Exemplo n.º 22
0
def set_global_vars(use_arg_parser=True):
    global globalvars
    global image_width
    global image_height
    global dims_input_const
    global img_pad_value
    global normalization_const
    global map_file_path
    global epoch_size
    global num_test_images
    global model_folder
    global base_model_file
    global feature_node_name
    global last_conv_node_name
    global start_train_conv_node_name
    global pool_node_name
    global last_hidden_node_name
    global roi_dim
    global prediction
    global prediction_in
    global prediction_out

    if use_arg_parser:
        parser = argparse.ArgumentParser()
        parser.add_argument('-c',
                            '--config',
                            help='Configuration file in YAML format',
                            required=False,
                            default=None)
        parser.add_argument('-t',
                            '--device_type',
                            type=str,
                            help="The type of the device (cpu|gpu)",
                            required=False,
                            default="cpu")
        parser.add_argument(
            '-d',
            '--device',
            type=int,
            help="Force to run the script on a specified device",
            required=False,
            default=None)
        parser.add_argument('-l',
                            '--list_devices',
                            action='store_true',
                            help="Lists the available devices and exits",
                            required=False,
                            default=False)
        parser.add_argument('--prediction',
                            action='store_true',
                            help="Switches to prediction mode",
                            required=False,
                            default=False)
        parser.add_argument(
            '--prediction_in',
            action='append',
            type=str,
            help=
            "The input directory for images in prediction mode. Can be supplied mulitple times.",
            required=False,
            default=list())
        parser.add_argument(
            '--prediction_out',
            action='append',
            type=str,
            help=
            "The output directory for processed images and predicitons in prediction mode. Can be supplied mulitple times.",
            required=False,
            default=list())
        parser.add_argument(
            '--no_headers',
            action='store_true',
            help="Whether to suppress the header row in the ROI CSV files",
            required=False,
            default=False)
        parser.add_argument(
            '--output_width_height',
            action='store_true',
            help=
            "Whether to output width/height instead of second x/y in the ROI CSV files",
            required=False,
            default=False)
        parser.add_argument(
            '--suppressed_labels',
            type=str,
            help=
            "Comma-separated list of labels to suppress from being output in ROI CSV files.",
            required=False,
            default="")

        args = vars(parser.parse_args())

        # prediction mode?
        prediction = args['prediction']
        if prediction:
            prediction_in = args['prediction_in']
            if len(prediction_in) == 0:
                raise RuntimeError("No prediction input directory provided!")
            for p in prediction_in:
                if not os.path.exists(p):
                    raise RuntimeError(
                        "Prediction input directory '%s' does not exist" % p)
            prediction_out = args['prediction_out']
            if len(prediction_out) == 0:
                raise RuntimeError("No prediction output directory provided!")
            for p in prediction_out:
                if not os.path.exists(p):
                    raise RuntimeError(
                        "Prediction output directory '%s' does not exist" % p)
            if len(prediction_in) != len(prediction_out):
                raise RuntimeError(
                    "Number of input and output directories don't match: %i != %i"
                    % (len(prediction_in), len(prediction_out)))
            for i in range(len(prediction_in)):
                if prediction_in[i] == prediction_out[i]:
                    raise RuntimeError(
                        "Input and output directories #%i for prediction are the same: %s"
                        % ((i + 1), prediction_in[i]))

        if args['list_devices']:
            print("Available devices (Type - ID - description)")
            for d in cntk.device.all_devices():
                if d.type() == 0:
                    type = "cpu"
                elif d.type() == 1:
                    type = "gpu"
                else:
                    type = "<unknown:" + str(d.type()) + ">"
                print(type + " - " + str(d.id()) + " - " + str(d))
            sys.exit(0)
        if args['config'] is not None:
            cfg_from_file(args['config'])
        if args['device'] is not None:
            if args['device_type'] == 'gpu':
                cntk.device.try_set_default_device(
                    cntk.device.gpu(args['device']))
            else:
                cntk.device.try_set_default_device(cntk.device.cpu())

    image_width = cfg["CNTK"].IMAGE_WIDTH
    image_height = cfg["CNTK"].IMAGE_HEIGHT

    # dims_input -- (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
    dims_input_const = MinibatchData(
        Value(batch=np.asarray([
            image_width, image_height, image_width, image_height, image_width,
            image_height
        ],
                               dtype=np.float32)), 1, 1, False)

    # Color used for padding and normalization (Caffe model uses [102.98010, 115.94650, 122.77170])
    img_pad_value = [103, 116, 123] if cfg["CNTK"].BASE_MODEL == "VGG16" else [
        114, 114, 114
    ]
    normalization_const = Constant([[[103]], [[116]], [[
        123
    ]]]) if cfg["CNTK"].BASE_MODEL == "VGG16" else Constant([[[114]], [[114]],
                                                             [[114]]])

    # dataset specific parameters
    map_file_path = os.path.join(abs_path, cfg["CNTK"].MAP_FILE_PATH)
    globalvars['class_map_file'] = cfg["CNTK"].CLASS_MAP_FILE
    globalvars['train_map_file'] = cfg["CNTK"].TRAIN_MAP_FILE
    globalvars['test_map_file'] = cfg["CNTK"].TEST_MAP_FILE
    globalvars['train_roi_file'] = cfg["CNTK"].TRAIN_ROI_FILE
    globalvars['test_roi_file'] = cfg["CNTK"].TEST_ROI_FILE
    globalvars['output_path'] = cfg["CNTK"].OUTPUT_PATH
    epoch_size = cfg["CNTK"].NUM_TRAIN_IMAGES
    num_test_images = cfg["CNTK"].NUM_TEST_IMAGES

    # model specific parameters
    if cfg["CNTK"].PRETRAINED_MODELS.startswith(".."):
        model_folder = os.path.join(abs_path, cfg["CNTK"].PRETRAINED_MODELS)
    else:
        model_folder = cfg["CNTK"].PRETRAINED_MODELS
    base_model_file = os.path.join(model_folder, cfg["CNTK"].BASE_MODEL_FILE)
    feature_node_name = cfg["CNTK"].FEATURE_NODE_NAME
    last_conv_node_name = cfg["CNTK"].LAST_CONV_NODE_NAME
    start_train_conv_node_name = cfg["CNTK"].START_TRAIN_CONV_NODE_NAME
    pool_node_name = cfg["CNTK"].POOL_NODE_NAME
    last_hidden_node_name = cfg["CNTK"].LAST_HIDDEN_NODE_NAME
    roi_dim = cfg["CNTK"].ROI_DIM

    data_path = map_file_path

    # set and overwrite learning parameters
    globalvars['rpn_lr_factor'] = cfg["CNTK"].RPN_LR_FACTOR
    globalvars['frcn_lr_factor'] = cfg["CNTK"].FRCN_LR_FACTOR
    globalvars['e2e_lr_factor'] = cfg["CNTK"].E2E_LR_FACTOR
    globalvars['momentum_per_mb'] = cfg["CNTK"].MOMENTUM_PER_MB
    globalvars['e2e_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg[
        "CNTK"].E2E_MAX_EPOCHS
    globalvars[
        'rpn_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].RPN_EPOCHS
    globalvars['frcn_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg[
        "CNTK"].FRCN_EPOCHS
    globalvars['rnd_seed'] = cfg.RNG_SEED
    globalvars['train_conv'] = cfg["CNTK"].TRAIN_CONV_LAYERS
    globalvars['train_e2e'] = cfg["CNTK"].TRAIN_E2E

    if not os.path.isdir(data_path):
        raise RuntimeError("Directory %s does not exist" % data_path)

    globalvars['class_map_file'] = os.path.join(data_path,
                                                globalvars['class_map_file'])
    globalvars['train_map_file'] = os.path.join(data_path,
                                                globalvars['train_map_file'])
    globalvars['test_map_file'] = os.path.join(data_path,
                                               globalvars['test_map_file'])
    globalvars['train_roi_file'] = os.path.join(data_path,
                                                globalvars['train_roi_file'])
    globalvars['test_roi_file'] = os.path.join(data_path,
                                               globalvars['test_roi_file'])
    globalvars['headers'] = not args['no_headers']
    globalvars['output_width_height'] = args['output_width_height']
    suppressed_labels = []
    if len(args['suppressed_labels']) > 0:
        suppressed_labels = args['suppressed_labels'].split(",")
    globalvars['suppressed_labels'] = suppressed_labels

    if cfg["CNTK"].FORCE_DETERMINISTIC:
        force_deterministic_algorithms()
    np.random.seed(seed=globalvars['rnd_seed'])
    globalvars['classes'] = parse_class_map_file(globalvars['class_map_file'])
    globalvars['num_classes'] = len(globalvars['classes'])

    if cfg["CNTK"].DEBUG_OUTPUT:
        # report args
        print("Using the following parameters:")
        print("Flip image       : {}".format(cfg["TRAIN"].USE_FLIPPED))
        print("Train conv layers: {}".format(globalvars['train_conv']))
        print("Random seed      : {}".format(globalvars['rnd_seed']))
        print("Momentum per MB  : {}".format(globalvars['momentum_per_mb']))
        if globalvars['train_e2e']:
            print("E2E epochs       : {}".format(globalvars['e2e_epochs']))
        else:
            print("RPN lr factor    : {}".format(globalvars['rpn_lr_factor']))
            print("RPN epochs       : {}".format(globalvars['rpn_epochs']))
            print("FRCN lr factor   : {}".format(globalvars['frcn_lr_factor']))
            print("FRCN epochs      : {}".format(globalvars['frcn_epochs']))
def test_transfer_learning(device_id):
    set_fixed_random_seed(1)
    force_deterministic_algorithms()

    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip(
            'test only runs on GPU')  # due to batch normalization in ResNet_18
    try_set_default_device(cntk_device(device_id))

    base_path = os.path.dirname(os.path.abspath(__file__))
    animals_path = os.path.join(
        base_path, *"../../../../Examples/Image/DataSets/Animals".split("/"))
    externalData = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ
    if externalData:
        extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY']
        model_file = os.path.join(
            extPath, *"PreTrainedModels/ResNet/v1/ResNet_18.model".split("/"))

        if not os.path.isfile(
                os.path.join(animals_path, 'Test', 'Weaver_bird.jpg')):
            # copy data from external test data location and unzip
            os.chdir(os.path.join(base_path, '..', '..', '..'))
            prepare_animals_data()
            os.chdir(base_path)
            zip_path = os.path.join(animals_path, 'Animals.zip')
            with zipfile.ZipFile(zip_path) as myzip:
                myzip.extractall(os.path.join(animals_path, '..'))

    else:
        model_file = os.path.join(
            base_path,
            *"../../../../Examples/Image/PretrainedModels/ResNet_18.model".
            split("/"))

    train_image_folder = os.path.join(animals_path, "Train")
    test_image_folder = os.path.join(animals_path, "Test")
    output_file = os.path.join(base_path, "tl_extended_output.txt")

    train_and_eval(model_file,
                   train_image_folder,
                   test_image_folder,
                   output_file,
                   None,
                   testing=True)

    expected_output_file = os.path.join(base_path,
                                        "tl_extended_expected_output.txt")

    with open(output_file) as output_json:
        output_lines = output_json.readlines()
    with open(expected_output_file) as expected_output_json:
        expected_output_lines = expected_output_json.readlines()

    # handling different ordering of files
    out_dict = {}
    exp_dict = {}
    for i in range(len(output_lines)):
        output = json.loads(output_lines[i])[0]
        expected_output = json.loads(expected_output_lines[i])[0]

        out_dict[output["image"]] = output
        exp_dict[expected_output["image"]] = expected_output

    # debug output
    for k in out_dict:
        output = out_dict[k]
        expected_output = exp_dict[k]

        print("output: {}".format(output))
        print("expect: {}".format(expected_output))

    for k in out_dict:
        output = out_dict[k]
        expected_output = exp_dict[k]

        assert np.allclose(output["predictions"]["Sheep"],
                           expected_output["predictions"]["Sheep"],
                           atol=TOLERANCE_ABSOLUTE)
        assert np.allclose(output["predictions"]["Wolf"],
                           expected_output["predictions"]["Wolf"],
                           atol=TOLERANCE_ABSOLUTE)
def prepare(cfg, use_arg_parser=True):
    cfg.MB_SIZE = 1
    cfg.NUM_CHANNELS = 3
    cfg.OUTPUT_PATH = os.path.join(abs_path, "Output")

    data_path = cfg["DATA"].MAP_FILE_PATH
    if load_file_from_blob(cfg["AZURE"].ACCOUNT_NAME, \
                        cfg["AZURE"].DATA, cfg["DATA"].DATASET +".zip", data_path+".zip" ) is True:
        unzip_file(data_path + ".zip", data_path)

    if not os.path.isdir(data_path):
        raise RuntimeError("Directory %s does not exist" % data_path)

    running_locally = os.path.exists(cfg["DATA"].MAP_FILE_PATH)
    if running_locally:
        os.chdir(cfg["DATA"].MAP_FILE_PATH)
        if not os.path.exists(os.path.join(abs_path, "Output")):
            os.makedirs(os.path.join(abs_path, "Output"))
        if not os.path.exists(
                os.path.join(abs_path, "Output", cfg["DATA"].DATASET)):
            os.makedirs(os.path.join(abs_path, "Output", cfg["DATA"].DATASET))
    else:
        # disable debug and plot outputs when running on GPU cluster
        cfg["CNTK"].DEBUG_OUTPUT = False
        cfg.VISUALIZE_RESULTS = False

    if use_arg_parser:
        parse_arguments(cfg)

    cfg["DATA"].CLASS_MAP_FILE = os.path.join(data_path,
                                              cfg["DATA"].CLASS_MAP_FILE)
    cfg["DATA"].TRAIN_MAP_FILE = os.path.join(data_path,
                                              cfg["DATA"].TRAIN_MAP_FILE)
    cfg["DATA"].TEST_MAP_FILE = os.path.join(data_path,
                                             cfg["DATA"].TEST_MAP_FILE)
    cfg["DATA"].TRAIN_ROI_FILE = os.path.join(data_path,
                                              cfg["DATA"].TRAIN_ROI_FILE)
    cfg["DATA"].TEST_ROI_FILE = os.path.join(data_path,
                                             cfg["DATA"].TEST_ROI_FILE)

    cfg['OUTPUT_MODEL_NAME'] = "faster_rcnn_eval_{}_{}.model".format(
        cfg["MODEL"].BASE_MODEL, "e2e" if cfg["CNTK"].TRAIN_E2E else "4stage")
    cfg['MODEL_PATH'] = os.path.join(
        os.environ['AZUREML_NATIVE_SHARE_DIRECTORY'], "output",
        cfg['OUTPUT_MODEL_NAME'])
    cfg['BASE_MODEL_PATH'] = os.path.join(
        os.environ['AZUREML_NATIVE_SHARE_DIRECTORY'], "base_model")

    cfg["DATA"].CLASSES = parse_class_map_file(cfg["DATA"].CLASS_MAP_FILE)
    cfg["DATA"].NUM_CLASSES = len(cfg["DATA"].CLASSES)

    if cfg["CNTK"].FAST_MODE:
        cfg["CNTK"].E2E_MAX_EPOCHS = 1
        cfg["CNTK"].RPN_EPOCHS = 1
        cfg["CNTK"].FRCN_EPOCHS = 1

    if cfg["CNTK"].FORCE_DETERMINISTIC:
        force_deterministic_algorithms()
    np.random.seed(seed=cfg.RND_SEED)

    print("Downloading base model {}".format(cfg["MODEL"].BASE_MODEL_FILE))
    print("PRETRAINED_MODELS {} ".format(cfg["AZURE"].PRETRAINED_MODELS))
    print("BASE_MODEL_FILE {}".format(cfg["MODEL"].BASE_MODEL_FILE))
    cfg['BASE_MODEL_PATH'] = cfg['BASE_MODEL_PATH'] + '_' + cfg[
        "MODEL"].BASE_MODEL_FILE
    print("BASE_MODEL_PATH {}".format(cfg['BASE_MODEL_PATH']))
    # For testing to make sure this works when we don't have any stored model in shared directory
    # os.remove(cfg['BASE_MODEL_PATH'])
    load_file_from_blob(cfg["AZURE"].ACCOUNT_NAME, \
                        cfg["AZURE"].PRETRAINED_MODELS, cfg["MODEL"].BASE_MODEL_FILE, cfg['BASE_MODEL_PATH'] )
    if False and cfg["CNTK"].DEBUG_OUTPUT:
        # report args
        print("Using the following parameters:")
        print("Flip image       : {}".format(cfg["TRAIN"].USE_FLIPPED))
        print("Train conv layers: {}".format(cfg.TRAIN_CONV_LAYERS))
        print("Random seed      : {}".format(cfg.RND_SEED))
        print("Momentum per MB  : {}".format(cfg["CNTK"].MOMENTUM_PER_MB))
        if cfg["CNTK"].TRAIN_E2E:
            print("E2E epochs       : {}".format(cfg["CNTK"].E2E_MAX_EPOCHS))
        else:
            print("RPN lr factor    : {}".format(cfg["CNTK"].RPN_LR_FACTOR))
            print("RPN epochs       : {}".format(cfg["CNTK"].RPN_EPOCHS))
            print("FRCN lr factor   : {}".format(cfg["CNTK"].FRCN_LR_FACTOR))
            print("FRCN epochs      : {}".format(cfg["CNTK"].FRCN_EPOCHS))
Exemplo n.º 25
0
def test_language_understanding(device_id):
    from cntk.ops.tests.ops_test_utils import cntk_device
    DeviceDescriptor.try_set_default_device(cntk_device(device_id))

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
    #set_computation_network_trace_level(1)
    set_fixed_random_seed(1) # to become invariant to initialization order, which is a valid change
    # BUGBUG: This ^^ currently seems to have no impact; the two BN models below should be identical in training
    force_deterministic_algorithms()

    if device_id >= 0: # BatchNormalization currently does not run on CPU
        # change to intent classifier   --moved up here since this fails, as repro
        # BUGBUG: Broken, need to pass new criterion to train().
        #with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
        #    select_last = slice(placeholder(), Axis.default_dynamic_axis(), -1, 0)
        #    # BUGBUG: Fails with "RuntimeError: The specified dynamic axis named defaultDynamicAxis does not match any of the dynamic axes of the operand"
        #    run_model_test('change to intent classifier', Sequential([
        #        Embedding(emb_dim),
        #        with_lookahead(),
        #        BatchNormalization(),
        #        BiRecurrence(LSTM(hidden_dim)),
        #        BatchNormalization(),
        #        select_last,  # fails here with an axis problem
        #        Dense(num_labels)
        #    ]), [0.084, 0.407364])


        # replace lookahead by bidirectional model
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
            run_model_test('replace lookahead by bidirectional model', Sequential([
                Embedding(emb_dim),
                BatchNormalization(),
                BiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim)),
                BatchNormalization(),
                Dense(num_labels)
            ]), [0.0579573500457558, 0.3214986774820327])

        # replace lookahead by bidirectional model
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
          #with default_options(dtype=np.float64):  # test this with double precision since single precision is too little for reproducable aggregation
          # ^^ This test requires to change the #if 1 in Functions.cpp PopulateNetworkInputs() to be changed to #if 0.
            run_model_test('replace lookahead by bidirectional model, with shared BN', Sequential([
                Embedding(emb_dim),
                BNBiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim), test_dual=True),
                #BNBiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim), test_dual=False),
                BatchNormalization(normalization_time_constant=-1),
                Dense(num_labels)
            ]), [0.0579573500457558, 0.3214986774820327])
            # values with normalization_time_constant=-1 and double precision:
            # [0.0583178503091983, 0.3199431143304898]
            """ with normalization_time_constant=-1:
             Minibatch[   1-   1]: loss = 5.945220 * 67, metric = 100.0% * 67
             Minibatch[   2-   2]: loss = 4.850601 * 63, metric = 79.4% * 63
             Minibatch[   3-   3]: loss = 3.816031 * 68, metric = 57.4% * 68
             Minibatch[   4-   4]: loss = 2.213172 * 70, metric = 41.4% * 70
             Minibatch[   5-   5]: loss = 2.615342 * 65, metric = 40.0% * 65
             Minibatch[   6-   6]: loss = 2.360896 * 62, metric = 25.8% * 62
             Minibatch[   7-   7]: loss = 1.452822 * 58, metric = 27.6% * 58
             Minibatch[   8-   8]: loss = 0.947210 * 70, metric = 10.0% * 70
             Minibatch[   9-   9]: loss = 0.595654 * 59, metric = 10.2% * 59
             Minibatch[  10-  10]: loss = 1.515479 * 64, metric = 23.4% * 64
             Minibatch[  11- 100]: loss = 0.686744 * 5654, metric = 10.4% * 5654
             Minibatch[ 101- 200]: loss = 0.289059 * 6329, metric = 5.8% * 6329
             Minibatch[ 201- 300]: loss = 0.218765 * 6259, metric = 4.7% * 6259
             Minibatch[ 301- 400]: loss = 0.182855 * 6229, metric = 3.5% * 6229
             Minibatch[ 401- 500]: loss = 0.156745 * 6289, metric = 3.4% * 6289
            Finished Epoch [1]: [Training] loss = 0.321413 * 36061, metric = 5.8% * 36061
            --> 0.057818696098277916 0.3214128415043278
             Minibatch[   1-   1]: loss = 0.000000 * 991, metric = 2.5% * 991
             Minibatch[   2-   2]: loss = 0.000000 * 1000, metric = 2.8% * 1000
             Minibatch[   3-   3]: loss = 0.000000 * 992, metric = 4.0% * 992
             Minibatch[   4-   4]: loss = 0.000000 * 989, metric = 3.0% * 989
             Minibatch[   5-   5]: loss = 0.000000 * 998, metric = 3.8% * 998
             Minibatch[   6-   6]: loss = 0.000000 * 995, metric = 1.5% * 995
             Minibatch[   7-   7]: loss = 0.000000 * 998, metric = 2.5% * 998
             Minibatch[   8-   8]: loss = 0.000000 * 992, metric = 1.6% * 992
             Minibatch[   9-   9]: loss = 0.000000 * 1000, metric = 1.6% * 1000
             Minibatch[  10-  10]: loss = 0.000000 * 996, metric = 7.9% * 996
            Finished Epoch [1]: [Evaluation] loss = 0.000000 * 10984, metric = 3.2% * 10984
            --> 0.03159140568099053 0.0
            """

        # BatchNorm test case for global-corpus aggregation
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
            run_model_test('BatchNorm global-corpus aggregation', Sequential([
                Embedding(emb_dim),
                BatchNormalization(normalization_time_constant=-1),
                Recurrence(LSTM(hidden_dim), go_backwards=False),
                BatchNormalization(normalization_time_constant=-1),
                Dense(num_labels)
            ]), [0.05662627214996811, 0.2968516879905391])
            """
             Minibatch[   1-   1]: loss = 5.745576 * 67, metric = 100.0% * 67
             Minibatch[   2-   2]: loss = 4.684151 * 63, metric = 90.5% * 63
             Minibatch[   3-   3]: loss = 3.957423 * 68, metric = 63.2% * 68
             Minibatch[   4-   4]: loss = 2.286908 * 70, metric = 41.4% * 70
             Minibatch[   5-   5]: loss = 2.733978 * 65, metric = 38.5% * 65
             Minibatch[   6-   6]: loss = 2.189765 * 62, metric = 30.6% * 62
             Minibatch[   7-   7]: loss = 1.427890 * 58, metric = 25.9% * 58
             Minibatch[   8-   8]: loss = 1.501557 * 70, metric = 18.6% * 70
             Minibatch[   9-   9]: loss = 0.632599 * 59, metric = 13.6% * 59
             Minibatch[  10-  10]: loss = 1.516047 * 64, metric = 23.4% * 64
             Minibatch[  11- 100]: loss = 0.580329 * 5654, metric = 9.8% * 5654
             Minibatch[ 101- 200]: loss = 0.280317 * 6329, metric = 5.6% * 6329
             Minibatch[ 201- 300]: loss = 0.188372 * 6259, metric = 4.1% * 6259
             Minibatch[ 301- 400]: loss = 0.170403 * 6229, metric = 3.9% * 6229
             Minibatch[ 401- 500]: loss = 0.159605 * 6289, metric = 3.4% * 6289
            Finished Epoch [1]: [Training] loss = 0.296852 * 36061, metric = 5.7% * 36061
            --> 0.05662627214996811 0.2968516879905391
             Minibatch[   1-   1]: loss = 0.000000 * 991, metric = 1.8% * 991
             Minibatch[   2-   2]: loss = 0.000000 * 1000, metric = 3.4% * 1000
             Minibatch[   3-   3]: loss = 0.000000 * 992, metric = 3.9% * 992
             Minibatch[   4-   4]: loss = 0.000000 * 989, metric = 4.1% * 989
             Minibatch[   5-   5]: loss = 0.000000 * 998, metric = 4.0% * 998
             Minibatch[   6-   6]: loss = 0.000000 * 995, metric = 1.2% * 995
             Minibatch[   7-   7]: loss = 0.000000 * 998, metric = 2.8% * 998
             Minibatch[   8-   8]: loss = 0.000000 * 992, metric = 2.9% * 992
             Minibatch[   9-   9]: loss = 0.000000 * 1000, metric = 2.0% * 1000
             Minibatch[  10-  10]: loss = 0.000000 * 996, metric = 8.2% * 996
            Finished Epoch [1]: [Evaluation] loss = 0.000000 * 10984, metric = 3.5% * 10984
            --> 0.035050983248361256 0.0
            """


        # plus BatchNorm
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
            run_model_test('plus BatchNorm', Sequential([
                Embedding(emb_dim),
                BatchNormalization(),
                Recurrence(LSTM(hidden_dim), go_backwards=False),
                BatchNormalization(),
                Dense(num_labels)
            ]), [0.05662627214996811, 0.2968516879905391])

        # plus lookahead
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
            run_model_test('plus lookahead', Sequential([
                Embedding(emb_dim),
                with_lookahead(),
                BatchNormalization(),
                Recurrence(LSTM(hidden_dim), go_backwards=False),
                BatchNormalization(),
                Dense(num_labels)
            ]), [0.057901888466764646, 0.3044637752807047])

        # replace lookahead by bidirectional model
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
            run_model_test('replace lookahead by bidirectional model', Sequential([
                Embedding(emb_dim),
                BatchNormalization(),
                BiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim)),
                BatchNormalization(),
                Dense(num_labels)
            ]), [0.0579573500457558, 0.3214986774820327])

        # test of a config like in the example but with additions to test many code paths
        with default_options(enable_self_stabilization=True, use_peepholes=True):
                run_model_test('alternate paths', Sequential([
                Embedding(emb_dim),
                BatchNormalization(),
                Recurrence(LSTM(hidden_dim, cell_shape=hidden_dim+50), go_backwards=True),
                BatchNormalization(map_rank=1),
                    Dense(num_labels)
                ]), [0.08574360112032389, 0.41847621578367716])

    # test of the example itself
    # this emulates the main code in the PY file
    if device_id >= 0: # sparse FSAdagrad currently does not run on CPU  --TODO: fix this test once it does
        reader = create_reader(data_dir + "/atis.train.ctf", is_training=True)
        model = create_model_function()
        loss_avg, evaluation_avg = train(reader, model, max_epochs=1)
        expected_avg = [0.09698114255561419, 0.5290531086061565]
        assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)

        # test
        reader = create_reader(data_dir + "/atis.test.ctf", is_training=False)
        evaluate(reader, model)

    # test of a config like in the example but with additions to test many code paths
    if device_id >= 0: # BatchNormalization currently does not run on CPU
        # Create a path to TensorBoard log directory and make sure it does not exist.
        abs_path = os.path.dirname(os.path.abspath(__file__))
        tb_logdir = os.path.join(abs_path, 'language_understanding_test_log')
        if os.path.exists(tb_logdir):
            shutil.rmtree(tb_logdir)

        reader = create_reader(data_dir + "/atis.train.ctf", is_training=True)
        model = create_test_model()
        # TODO: update example to support tensorboard, or decide to not show it in all examples (in upcoming update of examples)
        loss_avg, evaluation_avg = train(reader, model, max_epochs=1) #, tensorboard_logdir=tb_logdir)
        log_number_of_parameters(model, trace_level=1) ; print()
        expected_avg = [0.084, 0.407364]
        assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)
Exemplo n.º 26
0
def set_global_vars(use_arg_parser = True):
    data_path = map_file_path

    # set and overwrite learning parameters
    globalvars['rpn_lr_factor'] = cfg["CNTK"].RPN_LR_FACTOR
    globalvars['frcn_lr_factor'] = cfg["CNTK"].FRCN_LR_FACTOR
    globalvars['e2e_lr_factor'] = cfg["CNTK"].E2E_LR_FACTOR
    globalvars['momentum_per_mb'] = cfg["CNTK"].MOMENTUM_PER_MB
    globalvars['e2e_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].E2E_MAX_EPOCHS
    globalvars['rpn_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].RPN_EPOCHS
    globalvars['frcn_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].FRCN_EPOCHS
    globalvars['rnd_seed'] = cfg.RNG_SEED
    globalvars['train_conv'] = cfg["CNTK"].TRAIN_CONV_LAYERS
    globalvars['train_e2e'] = cfg["CNTK"].TRAIN_E2E


    if use_arg_parser:
        parser = argparse.ArgumentParser()
        parser.add_argument('-datadir', '--datadir', help='Data directory where the ImageNet dataset is located',
                            required=False, default=data_path)
        parser.add_argument('-outputdir', '--outputdir', help='Output directory for checkpoints and models',
                            required=False, default=None)
        parser.add_argument('-logdir', '--logdir', help='Log file',
                            required=False, default=None)
        parser.add_argument('-n', '--num_epochs', help='Total number of epochs to train', type=int,
                            required=False, default=cfg["CNTK"].E2E_MAX_EPOCHS)
        parser.add_argument('-m', '--minibatch_size', help='Minibatch size', type=int,
                            required=False, default=mb_size)
        parser.add_argument('-e', '--epoch_size', help='Epoch size', type=int,
                            required=False, default=epoch_size)
        parser.add_argument('-q', '--quantized_bits', help='Number of quantized bits used for gradient aggregation', type=int,
                            required=False, default='32')
        parser.add_argument('-r', '--restart',
                            help='Indicating whether to restart from scratch (instead of restart from checkpoint file by default)',
                            action='store_true')
        parser.add_argument('-device', '--device', type=int, help="Force to run the script on a specified device",
                            required=False, default=None)
        parser.add_argument('-rpnLrFactor', '--rpnLrFactor', type=float, help="Scale factor for rpn lr schedule", required=False)
        parser.add_argument('-frcnLrFactor', '--frcnLrFactor', type=float, help="Scale factor for frcn lr schedule", required=False)
        parser.add_argument('-e2eLrFactor', '--e2eLrFactor', type=float, help="Scale factor for e2e lr schedule", required=False)
        parser.add_argument('-momentumPerMb', '--momentumPerMb', type=float, help="momentum per minibatch", required=False)
        parser.add_argument('-e2eEpochs', '--e2eEpochs', type=int, help="number of epochs for e2e training", required=False)
        parser.add_argument('-rpnEpochs', '--rpnEpochs', type=int, help="number of epochs for rpn training", required=False)
        parser.add_argument('-frcnEpochs', '--frcnEpochs', type=int, help="number of epochs for frcn training", required=False)
        parser.add_argument('-rndSeed', '--rndSeed', type=int, help="the random seed", required=False)
        parser.add_argument('-trainConv', '--trainConv', type=int, help="whether to train conv layers", required=False)
        parser.add_argument('-trainE2E', '--trainE2E', type=int, help="whether to train e2e (otherwise 4 stage)", required=False)

        args = vars(parser.parse_args())

        if args['rpnLrFactor'] is not None:
            globalvars['rpn_lr_factor'] = args['rpnLrFactor']
        if args['frcnLrFactor'] is not None:
            globalvars['frcn_lr_factor'] = args['frcnLrFactor']
        if args['e2eLrFactor'] is not None:
            globalvars['e2e_lr_factor'] = args['e2eLrFactor']
        if args['momentumPerMb'] is not None:
            globalvars['momentum_per_mb'] = args['momentumPerMb']
        if args['e2eEpochs'] is not None:
            globalvars['e2e_epochs'] = args['e2eEpochs']
        if args['rpnEpochs'] is not None:
            globalvars['rpn_epochs'] = args['rpnEpochs']
        if args['frcnEpochs'] is not None:
            globalvars['frcn_epochs'] = args['frcnEpochs']
        if args['rndSeed'] is not None:
            globalvars['rnd_seed'] = args['rndSeed']
        if args['trainConv'] is not None:
            globalvars['train_conv'] = True if args['trainConv']==1 else False
        if args['trainE2E'] is not None:
            globalvars['train_e2e'] = True if args['trainE2E']==1 else False

        if args['outputdir'] is not None:
            globalvars['output_path'] = args['outputdir']
        if args['logdir'] is not None:
            log_dir = args['logdir']
        if args['device'] is not None:
            # Setting one worker on GPU and one worker on CPU. Otherwise memory consumption is too high for a single GPU.
            if Communicator.rank() == 0:
                cntk.device.try_set_default_device(cntk.device.gpu(args['device']))
            else:
                cntk.device.try_set_default_device(cntk.device.cpu())

        if args['datadir'] is not None:
            data_path = args['datadir']

    if not os.path.isdir(data_path):
        raise RuntimeError("Directory %s does not exist" % data_path)

    globalvars['class_map_file'] = os.path.join(data_path, globalvars['class_map_file'])
    globalvars['train_map_file'] = os.path.join(data_path, globalvars['train_map_file'])
    globalvars['test_map_file'] = os.path.join(data_path, globalvars['test_map_file'])
    globalvars['train_roi_file'] = os.path.join(data_path, globalvars['train_roi_file'])
    globalvars['test_roi_file'] = os.path.join(data_path, globalvars['test_roi_file'])

    if cfg["CNTK"].FORCE_DETERMINISTIC:
        force_deterministic_algorithms()
    np.random.seed(seed=globalvars['rnd_seed'])
    globalvars['classes'] = parse_class_map_file(globalvars['class_map_file'])
    globalvars['num_classes'] = len(globalvars['classes'])

    if cfg["CNTK"].DEBUG_OUTPUT:
        # report args
        print("Using the following parameters:")
        print("Flip image       : {}".format(cfg["TRAIN"].USE_FLIPPED))
        print("Train conv layers: {}".format(globalvars['train_conv']))
        print("Random seed      : {}".format(globalvars['rnd_seed']))
        print("Momentum per MB  : {}".format(globalvars['momentum_per_mb']))
        if globalvars['train_e2e']:
            print("E2E epochs       : {}".format(globalvars['e2e_epochs']))
        else:
            print("RPN lr factor    : {}".format(globalvars['rpn_lr_factor']))
            print("RPN epochs       : {}".format(globalvars['rpn_epochs']))
            print("FRCN lr factor   : {}".format(globalvars['frcn_lr_factor']))
            print("FRCN epochs      : {}".format(globalvars['frcn_epochs']))