def train(nonlinearity, num_hidden_layers, device_id, minibatch_size=10, num_samples=1000): from cntk.cntk_py import always_allow_setting_default_device always_allow_setting_default_device() C.try_set_default_device(cntk_device(device_id)) np.random.seed(0) learning_rate = 0.5 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) hidden_layers_dim = 50 inp = C.input_variable((input_dim), np.float32) label = C.input_variable((num_output_classes), np.float32) z = fully_connected_classifier_net(inp, num_output_classes, hidden_layers_dim, num_hidden_layers, nonlinearity) loss = C.cross_entropy_with_softmax(z, label) eval_error = C.classification_error(z, label) learner = C.sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, eval_error), [learner]) num_minibatches_to_train = int(num_samples / minibatch_size) training_progress_output_freq = 20 losses = [] errors = [] for i in range(num_minibatches_to_train): features, labels = generate_random_data_sample(minibatch_size, input_dim, num_output_classes) # Specify the input variables mapping in the model to actual minibatch # data for training. trainer.train_minibatch({ inp: features, label: labels }, device=cntk_device(device_id)) batchsize, loss, error = print_training_progress( trainer, i, training_progress_output_freq) if not (loss == "NA" or error == "NA"): losses.append(loss) errors.append(error) return losses, errors
def test_initializer_init(device_id): from cntk.utils import cntk_device from cntk import cntk_py from cntk.device import set_default_device cntk_py.always_allow_setting_default_device() set_default_device(cntk_device(device_id)) _check(uniform(scale=10), 'uniform') _check(gaussian(output_rank=1, filter_rank=2, scale=10), 'gaussian') _check(xavier(output_rank=1, filter_rank=2, scale=10), 'xavier') _check(glorot_uniform(output_rank=1, filter_rank=2, scale=10), 'glorot_uniform') _check(glorot_normal(output_rank=1, filter_rank=2, scale=10), 'glorot_normal') _check(he_uniform(output_rank=1, filter_rank=2, scale=10), 'he_uniform') _check(he_normal(output_rank=1, filter_rank=2, scale=10), 'he_normal')
def test_initializer_init(device_id): from cntk.ops.tests.ops_test_utils import cntk_device from cntk import cntk_py cntk_py.always_allow_setting_default_device() from cntk.device import set_default_device set_default_device(cntk_device(device_id)) _check(uniform(scale=1), 'uniform') _check(normal(scale=1, output_rank=1, filter_rank=2), 'normal') _check(xavier(scale=10, output_rank=1, filter_rank=2), 'xavier') _check(glorot_uniform(scale=10, output_rank=1, filter_rank=2), 'glorot_uniform') _check(glorot_normal(scale=10, output_rank=1, filter_rank=2), 'glorot_normal') _check(he_uniform(scale=10, output_rank=1, filter_rank=2), 'he_uniform') _check(he_normal(scale=10, output_rank=1, filter_rank=2), 'he_normal')
def train(nonlinearity, num_hidden_layers, device_id): from cntk.cntk_py import always_allow_setting_default_device always_allow_setting_default_device() set_default_device(cntk_device(device_id)) np.random.seed(0) learning_rate = 0.5 lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch) mysamplesize = 64 features, labels = generate_random_data_sample(mysamplesize, input_dim, num_output_classes) hidden_layers_dim = 50 input = input_variable((input_dim), np.float32) label = input_variable((num_output_classes), np.float32) z = fully_connected_classifier_net(input, num_output_classes, hidden_layers_dim, num_hidden_layers, nonlinearity) loss = cross_entropy_with_softmax(z, label) eval_error = classification_error(z, label) learner = sgd(z.parameters, lr_schedule) trainer = Trainer(z, (loss, eval_error), [learner]) minibatch_size = 25 num_samples = 2500 num_minibatches_to_train = num_samples / minibatch_size training_progress_output_freq = 20 losses = [] errors = [] for i in range(0, int(num_minibatches_to_train)): features, labels = generate_random_data_sample(minibatch_size, input_dim, num_output_classes) # Specify the input variables mapping in the model to actual minibatch data for training trainer.train_minibatch({input : features, label : labels}, device=cntk_device(device_id)) batchsize, loss, error = print_training_progress(trainer, i, training_progress_output_freq) if not (loss == "NA" or error =="NA"): losses.append(loss) errors.append(error) return losses, errors
def test_native_fasterrcnn_eval(tmpdir, device_id): from config import cfg cfg["CNTK"].FORCE_DETERMINISTIC = True cfg["CNTK"].DEBUG_OUTPUT = False cfg["CNTK"].VISUALIZE_RESULTS = False cfg["CNTK"].FAST_MODE = True cfg["CNTK"].MAP_FILE_PATH = grocery_path from FasterRCNN import set_global_vars set_global_vars(False) if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') # it runs very slow in CPU try_set_default_device(cntk_device(device_id)) # since we do not use a reader for evaluation we need unzipped data externalData = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ if externalData: extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'] model_file = os.path.join(extPath, "PreTrainedModels", "AlexNet", "v0", "AlexNet.model") else: model_file = os.path.join( abs_path, *"../../../../Examples/Image/PretrainedModels/AlexNet.model".split( "/")) from FasterRCNN import train_faster_rcnn_e2e, eval_faster_rcnn_mAP np.random.seed(seed=3) eval_model = train_faster_rcnn_e2e(model_file, debug_output=False) meanAP_python = eval_faster_rcnn_mAP(eval_model) cntk_py.always_allow_setting_default_device() try_set_default_device(cpu()) from native_proposal_layer import clone_with_native_proposal_layer model_with_native_pl = clone_with_native_proposal_layer(eval_model) meanAP_native = eval_faster_rcnn_mAP(model_with_native_pl) # 0.2067 (python) vs 0.2251 (native) -- the difference stems # from different sorting algorithms: quicksort in python and # heapsort in c++ (both are not stable). assert abs(meanAP_python - meanAP_native) < 0.02
def test_initializer_init(device_id): from cntk.utils import cntk_device from cntk import DeviceDescriptor, cntk_py cntk_py.always_allow_setting_default_device() DeviceDescriptor.set_default_device(cntk_device(device_id)) _check(uniform(scale=10), 'uniform') _check(gaussian(output_rank=1, filter_rank=2, scale=10), 'gaussian') _check(xavier(output_rank=1, filter_rank=2, scale=10), 'xavier') _check(glorot_uniform(output_rank=1, filter_rank=2, scale=10), 'glorot_uniform') _check(glorot_normal(output_rank=1, filter_rank=2, scale=10), 'glorot_normal') _check(he_uniform(output_rank=1, filter_rank=2, scale=10), 'he_uniform') _check(he_normal(output_rank=1, filter_rank=2, scale=10), 'he_normal')
def test_initializer_init(device_id): from cntk.ops.tests.ops_test_utils import cntk_device from cntk import cntk_py cntk_py.always_allow_setting_default_device() from cntk.device import try_set_default_device try_set_default_device(cntk_device(device_id)) _check(uniform(scale=1), 'uniform') _check(normal(scale=1, output_rank=1, filter_rank=2), 'normal') _check(xavier(scale=10, output_rank=1, filter_rank=2), 'xavier') _check(glorot_uniform(scale=10, output_rank=1, filter_rank=2), 'glorot_uniform') _check(glorot_normal(scale=10, output_rank=1, filter_rank=2), 'glorot_normal') _check(he_uniform(scale=10, output_rank=1, filter_rank=2), 'he_uniform') _check(he_normal(scale=10, output_rank=1, filter_rank=2), 'he_normal') _check(truncated_normal(stdev=10), 'truncated_gaussian') _check_min_max(truncated_normal(stdev=2), -4, 4, 'truncated_gaussian')
def reenable_once_sorting_is_stable_test_native_fasterrcnn_eval(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') # it runs very slow in CPU try_set_default_device(cntk_device(device_id)) from FasterRCNN_eval import compute_test_set_aps eval_model, meanAP_python, cfg = run_fasterrcnn_grocery_training(True) cntk_py.always_allow_setting_default_device() try_set_default_device(cpu()) sys.path.append(os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Extensibility", "ProposalLayer")) from native_proposal_layer import clone_with_native_proposal_layer model_with_native_pl = clone_with_native_proposal_layer(eval_model) eval_results = compute_test_set_aps(model_with_native_pl, cfg) meanAP_native = np.nanmean(list(eval_results.values())) # 0.2067 (python) vs 0.2251 (native) -- the difference stems # from different sorting algorithms: quicksort in python and # heapsort in c++ (both are not stable). print("Python: {}, native: {}".format(meanAP_python, meanAP_native)) assert abs(meanAP_python - meanAP_native) < 0.1
def reenable_once_sorting_is_stable_test_native_fasterrcnn_eval(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') # it runs very slow in CPU try_set_default_device(cntk_device(device_id)) from FasterRCNN_eval import compute_test_set_aps eval_model, meanAP_python, cfg = run_fasterrcnn_grocery_training(True) cntk_py.always_allow_setting_default_device() try_set_default_device(cpu()) sys.path.append( os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Extensibility", "ProposalLayer")) from native_proposal_layer import clone_with_native_proposal_layer model_with_native_pl = clone_with_native_proposal_layer(eval_model) eval_results = compute_test_set_aps(model_with_native_pl, cfg) meanAP_native = np.nanmean(list(eval_results.values())) # 0.2067 (python) vs 0.2251 (native) -- the difference stems # from different sorting algorithms: quicksort in python and # heapsort in c++ (both are not stable). print("Python: {}, native: {}".format(meanAP_python, meanAP_native)) assert abs(meanAP_python - meanAP_native) < 0.1
def mem_leak_check(nonlinearity, num_hidden_layers, device_id, minibatch_size=1, num_samples=10000): from cntk.cntk_py import always_allow_setting_default_device always_allow_setting_default_device() C.try_set_default_device(cntk_device(device_id)) np.random.seed(0) learning_rate = 0.5 lr_schedule = C.learning_rate_schedule(learning_rate) hidden_layers_dim = 50 inp = C.input_variable((input_dim), np.float32) label = C.input_variable((num_output_classes), np.float32) z = fully_connected_classifier_net(inp, num_output_classes, hidden_layers_dim, num_hidden_layers, nonlinearity) loss = C.cross_entropy_with_softmax(z, label) eval_error = C.classification_error(z, label) learner = C.sgd(z.parameters, lr_schedule, minibatch_size = 0) trainer = C.Trainer(z, (loss, eval_error), [learner]) num_minibatches_to_train = int(num_samples / minibatch_size) mem = np.zeros(num_minibatches_to_train) features, labels = generate_random_data_sample(minibatch_size, input_dim, num_output_classes) # Set a maximum fraction of iterations, in which the memory is allowed to # increase. Most likely these will be the first training runs. # Long-term this test needs to be run in a separate process over a longer # period of time. MEM_INCREASE_FRACTION_TOLERANCE = 0.01 # Set a maximum allowed memory increase. This tolerance should not be # exceeded when run as a standalone process (simply run this file with the # Python executable). MEM_INCREASE_TOLERANCE = 10*1024 dev = cntk_device(device_id) i = 0 proc = os_process() while i < num_minibatches_to_train: mem[i] = mem_used(proc) # Specify the input variables mapping in the model to actual minibatch # data for training. trainer.train_minibatch({inp: features, label: labels}, device=dev) i += 1 mem_deltas = np.diff(mem) iterations_with_mem_increase = (mem_deltas > 0).sum() mem_inc_fraction = iterations_with_mem_increase/num_minibatches_to_train mem_diff = mem[-1] - mem[10] if mem_inc_fraction > MEM_INCREASE_FRACTION_TOLERANCE and \ mem_diff > MEM_INCREASE_TOLERANCE: # For the rough leak estimation we take the memory footprint after the # dust of the first train_minibatch runs has settled. mem_changes = mem_deltas[mem_deltas != 0] raise ValueError('Potential memory leak of ~ %i KB (%i%% of MBs ' 'increased memory usage) detected with %s:\n%s' % (int(mem_diff/1024), int(mem_inc_fraction*100), nonlinearity, mem_changes))
def mem_leak_check(nonlinearity, num_hidden_layers, device_id, minibatch_size=1, num_samples=10000): from cntk.cntk_py import always_allow_setting_default_device always_allow_setting_default_device() C.try_set_default_device(cntk_device(device_id)) np.random.seed(0) learning_rate = 0.5 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) hidden_layers_dim = 50 inp = C.input_variable((input_dim), np.float32) label = C.input_variable((num_output_classes), np.float32) z = fully_connected_classifier_net(inp, num_output_classes, hidden_layers_dim, num_hidden_layers, nonlinearity) loss = C.cross_entropy_with_softmax(z, label) eval_error = C.classification_error(z, label) learner = C.sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, eval_error), [learner]) num_minibatches_to_train = int(num_samples / minibatch_size) mem = np.zeros(num_minibatches_to_train) features, labels = generate_random_data_sample(minibatch_size, input_dim, num_output_classes) # Set a maximum fraction of iterations, in which the memory is allowed to # increase. Most likely these will be the first training runs. # Long-term this test needs to be run in a separate process over a longer # period of time. MEM_INCREASE_FRACTION_TOLERANCE = 0.01 # Set a maximum allowed memory increase. This tolerance should not be # exceeded when run as a standalone process (simply run this file with the # Python executable). MEM_INCREASE_TOLERANCE = 10*1024 dev = cntk_device(device_id) i = 0 proc = os_process() while i < num_minibatches_to_train: mem[i] = mem_used(proc) # Specify the input variables mapping in the model to actual minibatch # data for training. trainer.train_minibatch({inp: features, label: labels}, device=dev) i += 1 mem_deltas = np.diff(mem) iterations_with_mem_increase = (mem_deltas > 0).sum() mem_inc_fraction = iterations_with_mem_increase/num_minibatches_to_train mem_diff = mem[-1] - mem[10] if mem_inc_fraction > MEM_INCREASE_FRACTION_TOLERANCE and \ mem_diff > MEM_INCREASE_TOLERANCE: # For the rough leak estimation we take the memory footprint after the # dust of the first train_minibatch runs has settled. mem_changes = mem_deltas[mem_deltas != 0] raise ValueError('Potential memory leak of ~ %i KB (%i%% of MBs ' 'increased memory usage) detected with %s:\n%s' % (int(mem_diff/1024), int(mem_inc_fraction*100), nonlinearity, mem_changes))