def run(model_name): running_on_laptop = socket.gethostname() == 'yop' X = tensor.tensor4('image_features', dtype='float32') T = tensor.matrix('targets', dtype='float32') image_border_size = 100 if running_on_laptop: host_plot = 'http://*****:*****@ %s' % (model_name, datetime.datetime.now(), socket.gethostname()), channels=[['loss', 'valid_loss_test'], ['valid_error']], after_epoch=True, server_url=host_plot), Printing(), Checkpoint('train2') ] main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, extensions=extensions) main_loop.run()
def __init__(self): ImageNet_Base.__init__(self) data_stream_train = ServerDataStream(('filenames',), False, port=self.port_train) self.get_epoch_train = data_stream_train.get_epoch_iterator data_stream_val = ServerDataStream(('filenames',), False, port=self.port_val) self.get_epoch_val = data_stream_val.get_epoch_iterator
def __init__(self, ports, config, *args, **kwargs): """""" self.config = config self.host = config.data_server.host self.hwm = config.data_server.hwm # open streams self.data_streams = {} for target, dset_ports in ports.iteritems(): self.data_streams[target] = {} for dset, port in dset_ports.iteritems(): self.data_streams[target][dset] = ServerDataStream( sources=('raw'), produces_examples=True, port=port, host=self.host, hwm=self.hwm ) # initiate epoch iterators self.epoch_iterators = self._init_epoch_iterators() # assign instance method self.dset_size = {} for target in config.target: self.dset_size[target] = {} self.dset_size[target]['train'] = eval( 'self.config.paths.meta_data.size.{}.train'.format(target)) self.dset_size[target]['valid'] = eval( 'self.config.paths.meta_data.size.{}.valid'.format(target)) # get n_iteration self.n_iter = sum([d['train'] for d in self.dset_size.values()]) self.n_iter = int(self.n_iter / config.hyper_parameters.batch_size)
def fork_to_background(make_datastream, sources): port = get_open_port() proc = Process(target=on_thread, args=(make_datastream, port)) proc.start() datastream = ServerDataStream(sources, port=port, hwm=hwm, produces_examples=False) return datastream, proc
def test_server(): server_process = Process(target=start_server, args=(get_stream(), )) server_process.start() try: server_data = ServerDataStream(('f', 't')).get_epoch_iterator() expected_data = get_stream().get_epoch_iterator() for _, s, e in zip(range(3), server_data, expected_data): for data in zip(s, e): assert_allclose(*data) assert_raises(StopIteration, next, server_data) finally: server_process.terminate()
def main(name, num_epochs): train_stream = ServerDataStream(('features', 'labels'), produces_examples=False) valid_stream = ServerDataStream(('features', 'labels'), produces_examples=False, port=5558) X = tensor.ftensor4('images') y = tensor.imatrix('targets') prediction_train, prediction_test, params = get_model(X) loss = lasagne.objectives.binary_crossentropy(prediction_train, y) loss = loss.mean() prediction_01 = tensor.ge(prediction_train, numpy.float32(.5)) f2 = f2_score(prediction_01, y) f2_diff = f2_score(prediction_train, y) loss = -f2_diff updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=1e-3, momentum=0.9) train_fn = function([X, y], loss, updates=updates) valid_fn = function([X, y], f2) best_valid_score = 0 patience = 0 all_train_loss = [] iteration = 0 for epoch in range(num_epochs): f2_valid_loss = [] f2_train_loss = [] for imgs, targets in train_stream.get_epoch_iterator(): f2_train_loss.append(train_fn(imgs, targets)) iteration += 1 all_train_loss.append(f2_train_loss) train_score = -numpy.mean(numpy.asarray(f2_train_loss)) print('Iteration %d' % (iteration, )) print('train score : {0}'.format(train_score)) for imgs, targets in valid_stream.get_epoch_iterator(): f2_valid_loss.append(valid_fn(imgs, targets)) valid_score = numpy.mean(numpy.asarray(f2_valid_loss)) print('valid score : {0}'.format(valid_score)) if best_valid_score < valid_score: best_valid_score = valid_score patience = 0 param_values = [p.get_value() for p in params] numpy.savez_compressed('%s.npz' % (name, ), param_values) pickle.dump(all_train_loss, open('%s.pkl' % (name, ), 'wb')) else: patience += 1 if patience == 5: break print('patience : {0}'.format(patience)) print('\n')
def main(args): print(args) parser = argparse.ArgumentParser(description='train') parser.add_argument('-p', '--parallel', action='store_true') parser.add_argument('-m', '--mnist', action='store_true') parser.add_argument('--L1', type=float) parser.add_argument('--L2', type=float) parser.add_argument('-e', '--early_stopping', action='store_true') parser.add_argument('-d', '--dropout', action='store_true') parser.add_argument('-j', '--jobid') parser.add_argument('-s', '--small', action='store_true') parser.add_argument('-u', '--update', choices=["rmsprop"]) parser.add_argument('-f', '--finish', type=int) parser.add_argument('-t', '--duration', type=int) parser.add_argument('-a', '--augmentation', action='store_true') parser.add_argument('--port', default=5557, type=int) args = parser.parse_args(args) image_size = (128, 128) if args.mnist: train, test = get_mnist() net = net_mnist() else: net = net_dvc(image_size) if args.parallel: sources = ('image_features', 'targets') train = ServerDataStream(sources, True, port=args.port) valid = ServerDataStream(sources, True, port=args.port + 1) test = ServerDataStream(sources, True, port=args.port + 2) else: train, valid, test = get_dvc(image_size, shortcut=args.small, augmentation=args.augmentation) train_net(net, train, test, **vars(args))
def stream_from_file(sources, filename, *args): port = get_open_port() proc = Popen(['python', filename, str(port)] + list(args), env=dict(os.environ, THEANO_FLAGS='device=cpu')) stream = ServerDataStream(sources, port=port, hwm=50, produces_examples=False) def term(): if proc: proc.kill() atexit.register(term) return stream, proc
class TestServer(object): def setUp(self): self.server_process = Process( target=start_server, args=(get_stream(),)) self.server_process.start() self.stream = ServerDataStream(('f', 't'), False) def tearDown(self): self.server_process.terminate() self.stream = None def test_server(self): server_data = self.stream.get_epoch_iterator() expected_data = get_stream().get_epoch_iterator() for _, s, e in zip(range(3), server_data, expected_data): for data in zip(s, e): assert_allclose(*data) assert_raises(StopIteration, next, server_data) def test_pickling(self): try: self.stream = cPickle.loads(cPickle.dumps(self.stream)) # regression test: pickling of an unpickled stream used it fail cPickle.dumps(self.stream) server_data = self.stream.get_epoch_iterator() expected_data = get_stream().get_epoch_iterator() for _, s, e in zip(range(3), server_data, expected_data): for data in zip(s, e): assert_allclose(*data, rtol=1e-3) except AssertionError as e: raise SkipTest("Skip test_that failed with: {}".format(e)) assert_raises(StopIteration, next, server_data) def test_value_error_on_request(self): assert_raises(ValueError, self.stream.get_data, [0, 1]) def test_close(self): self.stream.close() def test_next_epoch(self): self.stream.next_epoch() def test_reset(self): self.stream.reset()
class TestServer(object): def setUp(self): self.server_process = Process( target=start_server, args=(get_stream(),)) self.server_process.start() self.stream = ServerDataStream(('f', 't'), False) def tearDown(self): self.server_process.terminate() self.stream = None def test_server(self): server_data = self.stream.get_epoch_iterator() expected_data = get_stream().get_epoch_iterator() for _, s, e in zip(range(3), server_data, expected_data): for data in zip(s, e): assert_allclose(*data) assert_raises(StopIteration, next, server_data) def test_pickling(self): self.stream = cPickle.loads(cPickle.dumps(self.stream)) server_data = self.stream.get_epoch_iterator() expected_data = get_stream().get_epoch_iterator() for _, s, e in zip(range(3), server_data, expected_data): for data in zip(s, e): #pass assert_allclose(*data, rtol=1e-5) assert_raises(StopIteration, next, server_data) def test_value_error_on_request(self): assert_raises(ValueError, self.stream.get_data, [0, 1]) def test_close(self): self.stream.close() def test_next_epoch(self): self.stream.next_epoch() def test_reset(self): self.stream.reset()
class TestServer(object): def setUp(self): self.server_process = Process(target=start_server, args=(get_stream(), )) self.server_process.start() self.stream = ServerDataStream(('f', 't'), False) def tearDown(self): self.server_process.terminate() self.stream = None def test_server(self): server_data = self.stream.get_epoch_iterator() expected_data = get_stream().get_epoch_iterator() for _, s, e in zip(range(3), server_data, expected_data): for data in zip(s, e): assert_allclose(*data) assert_raises(StopIteration, next, server_data) def test_pickling(self): self.stream = cPickle.loads(cPickle.dumps(self.stream)) server_data = self.stream.get_epoch_iterator() expected_data = get_stream().get_epoch_iterator() for _, s, e in zip(range(3), server_data, expected_data): for data in zip(s, e): assert_allclose(*data, rtol=1e-5) assert_raises(StopIteration, next, server_data) def test_value_error_on_request(self): assert_raises(ValueError, self.stream.get_data, [0, 1]) def test_close(self): self.stream.close() def test_next_epoch(self): self.stream.next_epoch() def test_reset(self): self.stream.reset()
def get_stream(hdf5_file, which_set, batch_size=None): dataset = TrajectoryDataset(which_sets=(which_set, )) if batch_size == None: batch_size = dataset.num_examples data_stream = DataStream(dataset=dataset, iteration_scheme=ShuffledScheme( examples=dataset.num_examples, batch_size=batch_size)) load_in_memory = os.path.getsize( hdf5_file) < 14 * 10**9 or which_set == 'test' if not load_in_memory: port = 5557 if which_set == 'train' else 5558 print port server_process = Process(target=start_server, args=(data_stream, port, 10)) server_process.start() data_stream = ServerDataStream(dataset.sources, False, host='localhost', port=port, hwm=10) return data_stream
def train_language_model(new_training_job, config, save_path, params, fast_start, fuel_server, seed): c = config if seed: fuel.config.default_seed = seed blocks.config.config.default_seed = seed data, lm, retrieval = initialize_data_and_model(config) # full main loop can be saved... main_loop_path = os.path.join(save_path, 'main_loop.tar') # or only state (log + params) which can be useful not to pickle embeddings state_path = os.path.join(save_path, 'training_state.tar') stream_path = os.path.join(save_path, 'stream.pkl') best_tar_path = os.path.join(save_path, "best_model.tar") words = tensor.ltensor3('words') words_mask = tensor.matrix('words_mask') if theano.config.compute_test_value != 'off': test_value_data = next( data.get_stream('train', batch_size=4, max_length=5).get_epoch_iterator()) words.tag.test_value = test_value_data[0] words_mask.tag.test_value = test_value_data[1] costs, updates = lm.apply(words, words_mask) cost = rename(costs.mean(), 'mean_cost') cg = Model(cost) if params: logger.debug("Load parameters from {}".format(params)) with open(params) as src: cg.set_parameter_values(load_parameters(src)) length = rename(words.shape[1], 'length') perplexity, = VariableFilter(name='perplexity')(cg) perplexities = VariableFilter(name_regex='perplexity.*')(cg) monitored_vars = [length, cost] + perplexities if c['dict_path']: num_definitions, = VariableFilter(name='num_definitions')(cg) monitored_vars.extend([num_definitions]) parameters = cg.get_parameter_dict() trained_parameters = parameters.values() saved_parameters = parameters.values() if c['embedding_path']: logger.debug("Exclude word embeddings from the trained parameters") trained_parameters = [ p for p in trained_parameters if not p == lm.get_def_embeddings_params() ] saved_parameters = [ p for p in saved_parameters if not p == lm.get_def_embeddings_params() ] if c['cache_size'] != 0: logger.debug("Enable fake recursivity for looking up embeddings") trained_parameters = [ p for p in trained_parameters if not p == lm.get_cache_params() ] logger.info("Cost parameters" + "\n" + pprint.pformat([ " ".join( (key, str(parameters[key].get_value().shape), 'trained' if parameters[key] in trained_parameters else 'frozen')) for key in sorted(parameters.keys()) ], width=120)) rules = [] if c['grad_clip_threshold']: rules.append(StepClipping(c['grad_clip_threshold'])) rules.append(Adam(learning_rate=c['learning_rate'], beta1=c['momentum'])) algorithm = GradientDescent(cost=cost, parameters=trained_parameters, step_rule=CompositeRule(rules)) if c['cache_size'] != 0: algorithm.add_updates(updates) train_monitored_vars = list(monitored_vars) if c['grad_clip_threshold']: train_monitored_vars.append(algorithm.total_gradient_norm) word_emb_RMS, = VariableFilter(name='word_emb_RMS')(cg) main_rnn_in_RMS, = VariableFilter(name='main_rnn_in_RMS')(cg) train_monitored_vars.extend([word_emb_RMS, main_rnn_in_RMS]) if c['monitor_parameters']: train_monitored_vars.extend(parameter_stats(parameters, algorithm)) # We use a completely random seed on purpose. With Fuel server # it's currently not possible to restore the state of the training # stream. That's why it's probably better to just have it stateless. stream_seed = numpy.random.randint(0, 10000000) if fuel_server else None training_stream = data.get_stream('train', batch_size=c['batch_size'], max_length=c['max_length'], seed=stream_seed) valid_stream = data.get_stream('valid', batch_size=c['batch_size_valid'], max_length=c['max_length'], seed=stream_seed) original_training_stream = training_stream if fuel_server: # the port will be configured by the StartFuelServer extension training_stream = ServerDataStream( sources=training_stream.sources, produces_examples=training_stream.produces_examples) validation = DataStreamMonitoring(monitored_vars, valid_stream, prefix="valid").set_conditions( before_first_epoch=not fast_start, on_resumption=True, every_n_batches=c['mon_freq_valid']) track_the_best = TrackTheBest(validation.record_name(perplexity), choose_best=min).set_conditions( on_resumption=True, after_epoch=True, every_n_batches=c['mon_freq_valid']) # don't save them the entire main loop to avoid pickling everything if c['fast_checkpoint']: load = (LoadNoUnpickling(state_path, load_iteration_state=True, load_log=True).set_conditions( before_training=not new_training_job)) cp_args = { 'save_main_loop': False, 'save_separately': ['log', 'iteration_state'], 'parameters': saved_parameters } checkpoint = Checkpoint(state_path, before_training=not fast_start, every_n_batches=c['save_freq_batches'], after_training=not fast_start, **cp_args) if c['checkpoint_every_n_batches']: intermediate_cp = IntermediateCheckpoint( state_path, every_n_batches=c['checkpoint_every_n_batches'], after_training=False, **cp_args) else: load = (Load(main_loop_path, load_iteration_state=True, load_log=True).set_conditions( before_training=not new_training_job)) cp_args = { 'save_separately': ['iteration_state'], 'parameters': saved_parameters } checkpoint = Checkpoint(main_loop_path, before_training=not fast_start, every_n_batches=c['save_freq_batches'], after_training=not fast_start, **cp_args) if c['checkpoint_every_n_batches']: intermediate_cp = IntermediateCheckpoint( main_loop_path, every_n_batches=c['checkpoint_every_n_batches'], after_training=False, **cp_args) checkpoint = checkpoint.add_condition( ['after_batch', 'after_epoch'], OnLogRecord(track_the_best.notification_name), (best_tar_path, )) extensions = [ load, StartFuelServer(original_training_stream, stream_path, before_training=fuel_server), Timing(every_n_batches=c['mon_freq_train']) ] if retrieval: extensions.append( RetrievalPrintStats(retrieval=retrieval, every_n_batches=c['mon_freq_train'], before_training=not fast_start)) extensions.extend([ TrainingDataMonitoring(train_monitored_vars, prefix="train", every_n_batches=c['mon_freq_train']), validation, track_the_best, checkpoint ]) if c['checkpoint_every_n_batches']: extensions.append(intermediate_cp) extensions.extend([ DumpTensorflowSummaries(save_path, every_n_batches=c['mon_freq_train'], after_training=True), Printing(on_resumption=True, every_n_batches=c['mon_freq_train']), FinishIfNoImprovementAfter(track_the_best.notification_name, iterations=50 * c['mon_freq_valid'], every_n_batches=c['mon_freq_valid']), FinishAfter(after_n_batches=c['n_batches']) ]) logger.info("monitored variables during training:" + "\n" + pprint.pformat(train_monitored_vars, width=120)) logger.info("monitored variables during valid:" + "\n" + pprint.pformat(monitored_vars, width=120)) main_loop = MainLoop(algorithm, training_stream, model=Model(cost), extensions=extensions) main_loop.run()
def main(feature_maps=None, mlp_hiddens=None, conv_sizes=None, pool_sizes=None, batch_size=None, num_batches=None): if feature_maps is None: feature_maps = [32, 48, 64, 96, 96, 128] if mlp_hiddens is None: mlp_hiddens = [1000] if conv_sizes is None: conv_sizes = [9, 7, 5, 3, 2, 1] if pool_sizes is None: pool_sizes = [2, 2, 2, 2, 1, 1] if batch_size is None: batch_size = 64 conv_steps=[2, 1, 1, 1, 1, 1] #same as stride image_size = (128, 128) output_size = 2 learningRate = 0.001 drop_prob = 0.4 weight_noise = 0.75 num_epochs = 150 num_batches = None host_plot='http://*****:*****@ %s' % (graph_name, datetime.datetime.now(), socket.gethostname()), channels=[['train_error_rate', 'valid_error_rate'], ['train_total_gradient_norm']], after_epoch=True, server_url=host_plot)) PLOT_AVAILABLE = True except ImportError: PLOT_AVAILABLE = False extensions.append(Checkpoint(save_to, after_epoch=True, after_training=True, save_separately=['log'])) logger.info("Building the model") model = Model(cost) ########### Loading images ##################### main_loop = MainLoop( algorithm, stream_data_train, model=model, extensions=extensions) main_loop.run()
n_batches = pl_params.n_batches seq_length = pl_params.seq_length # print config.recursion_limit floatX = theano.config.floatX experiment_name = pl_params.experiment_name stream_vars = ( 'upsampled', 'residual', ) train_stream = ServerDataStream(stream_vars, produces_examples=False, port=pl_params.port) valid_stream = ServerDataStream(stream_vars, produces_examples=False, port=pl_params.port + 50) if tbptt_flag: train_stream = SegmentSequence(train_stream, seq_length, add_flag=True) valid_stream = SegmentSequence(valid_stream, seq_length, add_flag=True) #x_tr = next(train_stream.get_epoch_iterator()) ################# # Model #################
def main(): feature_maps = [20, 50] mlp_hiddens = [50] conv_sizes = [5, 5] pool_sizes = [3, 3] save_to = "DvC.pkl" batch_size = 500 image_size = (32, 32) output_size = 2 learningRate = 0.1 num_epochs = 10 num_batches = None host_plot = 'http://*****:*****@ %s' % ('CNN ', datetime.datetime.now(), socket.gethostname()), channels=[['valid_cost', 'valid_error_rate'], ['train_total_gradient_norm']], after_epoch=True, server_url=host_plot)) model = Model(cost) main_loop = MainLoop(algorithm, stream_data_train, model=model, extensions=extensions) main_loop.run()
def train(port=55557, num_epochs=500, learning_rate=0.01, momentum=0.9, l2_penalty_scale=1e-04, batchsize=500, save_model_file='./params_file.npz', start_with_saved_params=False): print("Loading data...") # Prepare Theano variables for inputs and targets input_var_x = T.tensor4('inputs') input_var_u = T.tensor4('inputs') input_var_v = T.tensor4('inputs') target_var = T.ivector('targets') # Build the model network = build_cnn(input_var_x, input_var_u, input_var_v) print(network_repr.get_network_str( lasagne.layers.get_all_layers(network), get_network=False, incomings=True, outgoings=True)) if start_with_saved_params and os.path.isfile(save_model_file): with np.load(save_model_file) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, param_values) # Create a loss expression for training. prediction = lasagne.layers.get_output(network) l2_penalty = lasagne.regularization.regularize_layer_params( lasagne.layers.get_all_layers(network), lasagne.regularization.l2) * l2_penalty_scale loss = categorical_crossentropy(prediction, target_var) + l2_penalty loss = loss.mean() # Create update expressions for training. params = lasagne.layers.get_all_params(network, trainable=True) print( """ //// Use AdaGrad update schedule for learning rate, see Duchi, Hazan, and Singer (2011) "Adaptive subgradient methods for online learning and stochasitic optimization." JMLR, 12:2121-2159 //// """) updates_adagrad = lasagne.updates.adagrad( loss, params, learning_rate=learning_rate, epsilon=1e-06) print( """ //// Apply Nesterov momentum using Lisa Lab's modifications. //// """) updates = lasagne.updates.apply_nesterov_momentum( updates_adagrad, params, momentum=momentum) # Create a loss expression for validation/testing. Note we do a # deterministic forward pass through the network, disabling dropout. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = categorical_crossentropy(test_prediction, target_var) + \ l2_penalty test_loss = test_loss.mean() # Also create an expression for the classification accuracy: test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function([input_var_x, input_var_u, input_var_v, target_var], loss, updates=updates, allow_input_downcast=True) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var_x, input_var_u, input_var_v, target_var], [test_loss, test_acc], allow_input_downcast=True) print("Starting training...") train_dstream = ServerDataStream(('train',), port=port, produces_examples=False) # # TODO: early stopping logic goes here... # for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() for data in train_dstream.get_epoch_iterator(): _, inputs, targets = data[0], data[1], data[2] inputx, inputu, inputv = split_inputs_xuv(inputs) train_err += train_fn(inputx, inputu, inputv, targets) train_batches += 1 # And a full pass over the validation data: # val_err = 0 # val_acc = 0 # val_batches = 0 # for data in valid_dstream.get_epoch_iterator(): # _, inputs, targets = data[0], data[1], data[2] # inputx, inputu, inputv = split_inputs_xuv(inputs) # err, acc = val_fn(inputx, inputu, inputv, targets) # val_err += err # val_acc += acc # val_batches += 1 # Dump the current network weights to file np.savez(save_model_file, *lasagne.layers.get_all_param_values(network)) # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) # print(" validation loss:\t\t{:.6f}".format(val_err / val_batches)) # print(" validation accuracy:\t\t{:.2f} %".format( # val_acc / val_batches * 100)) print("Finished {} epochs.".format(epoch + 1))
def _push_allocation_config(self): self.conv_sequence._push_allocation_config() conv_out_dim = self.conv_sequence.get_dim('output') self.top_mlp.activations = self.top_mlp_activations self.top_mlp.dims = [numpy.prod(conv_out_dim)] + self.top_mlp_dims #Generating input and target variables x = tensor.tensor4('image_features') y = tensor.lmatrix('targets') #Load Data stream_train = ServerDataStream(('image_features', 'targets'), False, port=5556) stream_valid = ServerDataStream(('image_features', 'targets'), False, port=5557) #stream_test = ServerDataStream(('image_features','targets'), False, port=5558) # Init an instance of the convnet convnet = LeNet(conv_activations, num_channels, image_shape, filter_sizes=filter_sizes, feature_maps=feature_maps, pooling_sizes=pooling_sizes, top_mlp_activations=mlp_activations, top_mlp_dims=mlp_hiddens + [output_size],
outf = Flattener().apply(out_soft3) predict3 = NDimensionalSoftmax().apply(outf) cost3 = CategoricalCrossEntropy().apply(y.flatten(), predict3).copy(name='cost3') cost = cost3 + 0.3 * cost2 + 0.3 * cost1 cost = cost.copy(name='cost') error = MisclassificationRate().apply(y.flatten(), predict3) #Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason) error_rate = error.copy(name='error_rate') error_rate2 = error.copy(name='error_rate2') cg = ComputationGraph([cost, error_rate]) ########### GET THE DATA ##################### stream_train = ServerDataStream(('image_features', 'targets'), False, port=5652, hwm=50) stream_valid = ServerDataStream(('image_features', 'targets'), False, port=5653, hwm=50) ########### DEFINE THE ALGORITHM ############# track_cost = TrackTheBest("cost", after_epoch=True, after_batch=False) algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Momentum(learning_rate=0.0001, momentum=0.9)) extensions = [ Timing(), FinishAfter(after_n_epochs=num_epochs),
def setUp(self): self.server_process = Process( target=start_server, args=(get_stream(),)) self.server_process.start() self.stream = ServerDataStream(('f', 't'), False)
@contextmanager def timer(name): """Times a block of code and prints the result. Parameters ---------- name : str What this block of code represents. """ start_time = time.time() yield stop_time = time.time() print('{} took {} seconds'.format(name, stop_time - start_time)) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( '-p', '--parallel', action='store_true', help='run data preprocessing in a separate process') args = parser.parse_args() if args.parallel: data_stream = ServerDataStream(('features', ), True) else: data_stream = create_data_stream(0.005) with timer('Training'): for i in range(5): for data in data_stream.get_epoch_iterator(): time.sleep(0.01)
def setUp(self): self.server_process = Process(target=start_server, args=(get_stream(), )) self.server_process.start() self.stream = ServerDataStream(('f', 't'), False)
#import sys #sys.path.append('experiments/simple_vgg/') from .vgg_16 import get_model from theano import tensor, function import lasagne from fuel.streams import ServerDataStream import numpy from utils import f2_score import pickle num_epochs = 50 train_stream = ServerDataStream(('features', 'labels'), produces_examples=False) valid_stream = ServerDataStream(('features', 'labels'), produces_examples=False, port=5558) X = tensor.ftensor4('images') y = tensor.imatrix('targets') prediction_train, prediction_test, params = get_model(X) loss = lasagne.objectives.binary_crossentropy(prediction_train, y) loss = loss.mean() prediction_01 = tensor.ge(prediction_train, numpy.float32(.5)) f2 = f2_score(prediction_01, y) f2_diff = f2_score(prediction_train, y) loss = -f2_diff
## choose model from model.vgg_structured import build_model from blocks.algorithms import GradientDescent, Adam from blocks.graph import ComputationGraph, apply_batch_normalization, get_batch_normalization_updates, apply_dropout from blocks.model import Model from blocks.filter import VariableFilter from blocks.roles import WEIGHT, INPUT # BUILD MODEL images = tensor.ftensor4('images') labels = tensor.ftensor4('labels') cost_dropout, parameters = build_model(images, labels) # LEARN WEIGHTS train_stream = ServerDataStream(('images', 'labels'), False, hwm=10) valid_stream = ServerDataStream(('images', 'labels'), False, hwm=10, port=5558) model = Model(cost_dropout) # ALGORITHM alpha = 0.01 # learning rate of Adam algorithm = GradientDescent(cost=cost_dropout, parameters=parameters, step_rule=Adam(), on_unused_sources='ignore') # EXTENSIONS from blocks.extensions import Printing, Timing from blocks.extensions.training import TrackTheBest from blocks.extensions.monitoring import TrainingDataMonitoring, DataStreamMonitoring from blocks.extensions.stopping import FinishIfNoImprovementAfter
stream = Random2DRotation(stream, which_sources=('image_features',)) # Data Transformation stream = ScaleAndShift(stream, 1./255, 0, which_sources=('image_features',)) stream = Cast(stream, dtype='float32', which_sources=('image_features',)) return stream if mode is "CPU_test": data_train_stream = create_data(DogsVsCats(('train',), subset=slice(0, 100))) data_valid_stream = create_data(DogsVsCats(('train',), subset=slice(100, 110))) if mode is "GPU_run": data_train_stream = create_data(DogsVsCats(('train',), subset=slice(0, 22500))) data_valid_stream = create_data(DogsVsCats(('train',), subset=slice(22500, 25000))) if mode is "data_server": data_train_stream = ServerDataStream(('image_features','targets'), False, port=5560) data_valid_stream = ServerDataStream(('image_features','targets'), False, port=5561) ### Setting up the model probs = top_mlp.apply(conv_out) cost = CategoricalCrossEntropy().apply(y.flatten(), probs).copy(name='cost') error = MisclassificationRate().apply(y.flatten(), probs) error_rate = error.copy(name='error_rate') error_rate2 = error.copy(name='error_rate2') cg = ComputationGraph([cost, error_rate]) ### Gradient Descent algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Scale(learning_rate=learning_rate))
from resnet_152 import get_model as model_resnet # build model and load weights input_var = tensor.tensor4('X') _, test_prediction, _ = model_resnet(input_var) # create prediction function val_fn = theano.function([input_var], [test_prediction]) # Try for a few data points n_datapoints = 2 from fuel.streams import ServerDataStream import numpy as np train_stream = ServerDataStream(('features', 'labels'), produces_examples=False) labels_count = np.zeros((17,)) mb_count = 0 iterator = train_stream.get_epoch_iterator() data = iterator.next() labels_count += data[1].sum(axis=0) mb_count += 1 feat = np.asarray(data[0][:n_datapoints], dtype=np.float32) pred = val_fn(feat) print('Prediction for the {0} datapoints is : '.format(n_datapoints)) print(pred)
def run(get_model, model_name): train_stream = ServerDataStream( ('cases', 'image_features', 'image_targets', 'multiplier'), False, hwm=10) valid_stream = ServerDataStream( ('cases', 'image_features', 'image_targets', 'multiplier'), False, hwm=10, port=5558) input_var = tensor.tensor4('image_features') target_var = tensor.tensor4('image_targets') multiply_var = tensor.matrix('multiplier') multiply_var = T.addbroadcast(multiply_var, 1) test_prediction, prediction, params = get_model(input_var, target_var, multiply_var) loss = binary_crossentropy(prediction, target_var).mean() loss.name = 'loss' valid_error = T.neq((test_prediction > 0.5) * 1., target_var).mean() valid_error.name = 'error' scale = Scale(0.1) algorithm = GradientDescent( cost=loss, parameters=params, step_rule=scale, #step_rule=Adam(), on_unused_sources='ignore') host_plot = 'http://localhost:5006' extensions = [ Timing(), TrainingDataMonitoring([loss], after_epoch=True), DataStreamMonitoring(variables=[loss, valid_error], data_stream=valid_stream, prefix="valid"), Plot('%s %s %s' % (model_name, datetime.date.today(), time.strftime('%H:%M')), channels=[['loss', 'valid_loss'], ['valid_error']], after_epoch=True, server_url=host_plot), Printing(), # Checkpoint('train'), FinishAfter(after_n_epochs=10) ] main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, extensions=extensions) cg = ComputationGraph(test_prediction) while True: main_loop.run() scale.learning_rate.set_value( numpy.float32(scale.learning_rate.get_value() * 0.7)) numpy.savez('best_weights.npz', [param.get_value() for param in cg.shared_variables])
import sys from vgg_16 import get_model, build_model from theano import tensor, function, config import lasagne from fuel.streams import ServerDataStream import numpy import pickle from config import basepath submit_stream = ServerDataStream(('features', 'image_name'), produces_examples=False) # tensor X = tensor.ftensor4('images') # build simple vgg model net, layers_names = build_model(X) f_pretrained = open(basepath + 'vgg16.pkl') model_pretrained = pickle.load(f_pretrained) w_pretrained = model_pretrained['param values'] net['mean value'].set_value(model_pretrained['mean value'].astype(config.floatX)) # load weights from lasagne.layers import set_all_param_values with numpy.load('weights/simple_vgg_valid.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] set_all_param_values(net[layers_names[len(layers_names)-1]], param_values[0]) # create predict function prediction_test = lasagne.layers.get_output(net[layers_names[len(layers_names)-1]], deterministic=True)
def run(get_model, model_name): train_stream = ServerDataStream(('cases', 'image_position', 'multiplier', 'sax', 'sax_features', 'targets'), False, hwm=10) valid_stream = ServerDataStream(('cases', 'image_position', 'multiplier', 'sax', 'sax_features', 'targets'), False, hwm=10, port=5558) ftensor5 = tensor.TensorType('float32', (False, ) * 5) input_var = ftensor5('sax_features') target_var = tensor.matrix('targets') multiply_var = tensor.matrix('multiplier') multiply_var = T.addbroadcast(multiply_var, 1) prediction, test_prediction, test_pred_mid, params_bottom, params_top = get_model( input_var, multiply_var) # load parameters cg = ComputationGraph(test_pred_mid) params_val = numpy.load('sunnybrook/best_weights.npz') for p, value in zip(cg.shared_variables, params_val['arr_0']): p.set_value(value) crps = tensor.abs_(test_prediction - target_var).mean() loss = squared_error(prediction, target_var).mean() loss.name = 'loss' crps.name = 'crps' algorithm = GradientDescent(cost=loss, parameters=params_top, step_rule=Adam(), on_unused_sources='ignore') host_plot = 'http://localhost:5006' extensions = [ Timing(), TrainingDataMonitoring([loss], after_epoch=True), DataStreamMonitoring(variables=[crps, loss], data_stream=valid_stream, prefix="valid"), Plot('%s %s %s' % (model_name, datetime.date.today(), time.strftime('%H:%M')), channels=[['loss', 'valid_loss'], ['valid_crps']], after_epoch=True, server_url=host_plot), Printing(), Checkpoint('train'), FinishAfter(after_n_epochs=20) ] main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, extensions=extensions) main_loop.run()
def run(model_name, port_train, port_valid): running_on_laptop = socket.gethostname() == 'yop' X = tensor.tensor4('image_features', dtype='float32') T = tensor.matrix('targets', dtype='float32') image_border_size = (100, 100) if running_on_laptop: host_plot = 'http://*****:*****@ %s' % (model_name, datetime.datetime.now(), socket.gethostname()), channels=[['loss'], ['error', 'valid_error']], after_epoch=True, server_url=host_plot), Printing(), Checkpoint('/tmp/train_bn2') ] main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, extensions=extensions, model=model) main_loop.run()
def predict(port, l2_penalty_scale, save_model_file='./params_file.npz', batchsize=500, load_in_memory=False, be_verbose=False): print("Loading data for prediction...") # extract timestamp from model file - assume it is the first set of numbers # otherwise just use "now" import re import time tstamp = str(time.time()).split('.')[0] m = re.search(r"[0-9]+", save_model_file) if m: tstamp = m.group(0) # Prepare Theano variables for inputs and targets input_var_x = T.tensor4('inputs') input_var_u = T.tensor4('inputs') input_var_v = T.tensor4('inputs') target_var = T.ivector('targets') # Build the model network = build_cnn(input_var_x, input_var_u, input_var_v) with np.load(save_model_file) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, param_values) # Create a loss expression for testing. test_prediction = lasagne.layers.get_output(network, deterministic=True) l2_penalty = lasagne.regularization.regularize_layer_params( lasagne.layers.get_all_layers(network), lasagne.regularization.l2) * l2_penalty_scale test_loss = categorical_crossentropy(test_prediction, target_var) + \ l2_penalty test_loss = test_loss.mean() # Also create an expression for the classification accuracy: test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) # Look at the classifications test_prediction_values = T.argmax(test_prediction, axis=1) # Compile a function computing the validation loss and accuracy: val_fn = theano.function([input_var_x, input_var_u, input_var_v, target_var], [test_loss, test_acc], allow_input_downcast=True) # Compute the actual predictions - also instructive is to look at # `test_prediction` as an output (array of softmax probabilities) # (but that prints a _lot_ of stuff to screen...) pred_fn = theano.function([input_var_x, input_var_u, input_var_v], [test_prediction_values], allow_input_downcast=True) # don't `produces_examples`, produce batches test_dstream = ServerDataStream(('test',), port=port, produces_examples=False) # look at some concrete predictions targ_numbers = [1, 2, 3, 4, 5] pred_target = np.array([0, 0, 0, 0, 0]) true_target = np.array([0, 0, 0, 0, 0]) targs_mat = np.zeros(11 * 11).reshape(11, 11) for data in test_dstream.get_epoch_iterator(): _, inputs, targets = data[0], data[1], data[2] inputx, inputu, inputv = split_inputs_xuv(inputs) pred = pred_fn(inputx, inputu, inputv) pred_targ = zip(pred[0], targets) if be_verbose: print("(prediction, true target):", pred_targ) print("----------------") for p, t in pred_targ: targs_mat[t][p] += 1 if t in targ_numbers: true_target[t-1] += 1 if p == t: pred_target[p-1] += 1 acc_target = 100.0 * pred_target / true_target.astype('float32') perf_file = 'perfmat' + tstamp + '.npy' np.save(perf_file, targs_mat) # compute and print the test error: test_err = 0 test_acc = 0 test_batches = 0 for data in test_dstream.get_epoch_iterator(): _, inputs, targets = data[0], data[1], data[2] inputx, inputu, inputv = split_inputs_xuv(inputs) err, acc = val_fn(inputx, inputu, inputv, targets) test_err += err test_acc += acc test_batches += 1 print("Final results:") print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches)) print(" test accuracy:\t\t{:.2f} %".format( test_acc / test_batches * 100)) for i, v in enumerate(acc_target): print(" target {} accuracy:\t\t\t{:.3f} %".format( (i + 1), acc_target[i]))
lr = 10**(2 * numpy.random.rand() - 5) config.recursion_limit = 100000 floatX = theano.config.floatX #job_id = 5557 job_id = int(sys.argv[1]) save_dir = os.environ['FUEL_DATA_PATH'] save_dir = os.path.join(save_dir, '..', 'results/', 'blizzard/') experiment_name = 'deep_l0_{}_{}'.format(job_id, lr) train_stream = ServerDataStream(( 'upsampled', 'residual', ), produces_examples=False, port=job_id) valid_stream = ServerDataStream(( 'upsampled', 'residual', ), produces_examples=False, port=job_id + 50) ################# # Model ################# x = tensor.tensor3('upsampled') y = tensor.tensor3('residual')
def _push_allocation_config(self): self.conv_sequence._push_allocation_config() conv_out_dim = self.conv_sequence.get_dim('output') self.top_mlp.activations = self.top_mlp_activations self.top_mlp.dims = [numpy.prod(conv_out_dim)] + self.top_mlp_dims #Generating input and target variables x = tensor.tensor4('image_features') y = tensor.lmatrix('targets') #Load Data #stream_train = ServerDataStream(('image_features','targets'), False, port=5556) #stream_valid = ServerDataStream(('image_features','targets'), False, port=5557) stream_test = ServerDataStream(('image_features', 'targets'), False, port=5558) # Init an instance of the convnet convnet = LeNet(conv_activations, num_channels, image_shape, filter_sizes=filter_sizes, feature_maps=feature_maps, pooling_sizes=pooling_sizes, top_mlp_activations=mlp_activations, top_mlp_dims=mlp_hiddens + [output_size], conv_step=conv_step, border_mode=border_mode, weights_init=Uniform(width=0.2), biases_init=Constant(0))