def main(name, num_epochs): train_stream = ServerDataStream(('features', 'labels'), produces_examples=False) valid_stream = ServerDataStream(('features', 'labels'), produces_examples=False, port=5558) X = tensor.ftensor4('images') y = tensor.imatrix('targets') prediction_train, prediction_test, params = get_model(X) loss = lasagne.objectives.binary_crossentropy(prediction_train, y) loss = loss.mean() prediction_01 = tensor.ge(prediction_train, numpy.float32(.5)) f2 = f2_score(prediction_01, y) f2_diff = f2_score(prediction_train, y) loss = -f2_diff updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=1e-3, momentum=0.9) train_fn = function([X, y], loss, updates=updates) valid_fn = function([X, y], f2) best_valid_score = 0 patience = 0 all_train_loss = [] iteration = 0 for epoch in range(num_epochs): f2_valid_loss = [] f2_train_loss = [] for imgs, targets in train_stream.get_epoch_iterator(): f2_train_loss.append(train_fn(imgs, targets)) iteration += 1 all_train_loss.append(f2_train_loss) train_score = -numpy.mean(numpy.asarray(f2_train_loss)) print('Iteration %d' % (iteration, )) print('train score : {0}'.format(train_score)) for imgs, targets in valid_stream.get_epoch_iterator(): f2_valid_loss.append(valid_fn(imgs, targets)) valid_score = numpy.mean(numpy.asarray(f2_valid_loss)) print('valid score : {0}'.format(valid_score)) if best_valid_score < valid_score: best_valid_score = valid_score patience = 0 param_values = [p.get_value() for p in params] numpy.savez_compressed('%s.npz' % (name, ), param_values) pickle.dump(all_train_loss, open('%s.pkl' % (name, ), 'wb')) else: patience += 1 if patience == 5: break print('patience : {0}'.format(patience)) print('\n')
def run(model_name): running_on_laptop = socket.gethostname() == 'yop' X = tensor.tensor4('image_features', dtype='float32') T = tensor.matrix('targets', dtype='float32') image_border_size = 100 if running_on_laptop: host_plot = 'http://*****:*****@ %s' % (model_name, datetime.datetime.now(), socket.gethostname()), channels=[['loss', 'valid_loss_test'], ['valid_error']], after_epoch=True, server_url=host_plot), Printing(), Checkpoint('train2') ] main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, extensions=extensions) main_loop.run()
def __init__(self): ImageNet_Base.__init__(self) data_stream_train = ServerDataStream(('filenames',), False, port=self.port_train) self.get_epoch_train = data_stream_train.get_epoch_iterator data_stream_val = ServerDataStream(('filenames',), False, port=self.port_val) self.get_epoch_val = data_stream_val.get_epoch_iterator
def __init__(self, ports, config, *args, **kwargs): """""" self.config = config self.host = config.data_server.host self.hwm = config.data_server.hwm # open streams self.data_streams = {} for target, dset_ports in ports.iteritems(): self.data_streams[target] = {} for dset, port in dset_ports.iteritems(): self.data_streams[target][dset] = ServerDataStream( sources=('raw'), produces_examples=True, port=port, host=self.host, hwm=self.hwm ) # initiate epoch iterators self.epoch_iterators = self._init_epoch_iterators() # assign instance method self.dset_size = {} for target in config.target: self.dset_size[target] = {} self.dset_size[target]['train'] = eval( 'self.config.paths.meta_data.size.{}.train'.format(target)) self.dset_size[target]['valid'] = eval( 'self.config.paths.meta_data.size.{}.valid'.format(target)) # get n_iteration self.n_iter = sum([d['train'] for d in self.dset_size.values()]) self.n_iter = int(self.n_iter / config.hyper_parameters.batch_size)
def fork_to_background(make_datastream, sources): port = get_open_port() proc = Process(target=on_thread, args=(make_datastream, port)) proc.start() datastream = ServerDataStream(sources, port=port, hwm=hwm, produces_examples=False) return datastream, proc
def test_server(): server_process = Process(target=start_server, args=(get_stream(), )) server_process.start() try: server_data = ServerDataStream(('f', 't')).get_epoch_iterator() expected_data = get_stream().get_epoch_iterator() for _, s, e in zip(range(3), server_data, expected_data): for data in zip(s, e): assert_allclose(*data) assert_raises(StopIteration, next, server_data) finally: server_process.terminate()
def main(args): print(args) parser = argparse.ArgumentParser(description='train') parser.add_argument('-p', '--parallel', action='store_true') parser.add_argument('-m', '--mnist', action='store_true') parser.add_argument('--L1', type=float) parser.add_argument('--L2', type=float) parser.add_argument('-e', '--early_stopping', action='store_true') parser.add_argument('-d', '--dropout', action='store_true') parser.add_argument('-j', '--jobid') parser.add_argument('-s', '--small', action='store_true') parser.add_argument('-u', '--update', choices=["rmsprop"]) parser.add_argument('-f', '--finish', type=int) parser.add_argument('-t', '--duration', type=int) parser.add_argument('-a', '--augmentation', action='store_true') parser.add_argument('--port', default=5557, type=int) args = parser.parse_args(args) image_size = (128, 128) if args.mnist: train, test = get_mnist() net = net_mnist() else: net = net_dvc(image_size) if args.parallel: sources = ('image_features', 'targets') train = ServerDataStream(sources, True, port=args.port) valid = ServerDataStream(sources, True, port=args.port + 1) test = ServerDataStream(sources, True, port=args.port + 2) else: train, valid, test = get_dvc(image_size, shortcut=args.small, augmentation=args.augmentation) train_net(net, train, test, **vars(args))
def stream_from_file(sources, filename, *args): port = get_open_port() proc = Popen(['python', filename, str(port)] + list(args), env=dict(os.environ, THEANO_FLAGS='device=cpu')) stream = ServerDataStream(sources, port=port, hwm=50, produces_examples=False) def term(): if proc: proc.kill() atexit.register(term) return stream, proc
def get_stream(hdf5_file, which_set, batch_size=None): dataset = TrajectoryDataset(which_sets=(which_set, )) if batch_size == None: batch_size = dataset.num_examples data_stream = DataStream(dataset=dataset, iteration_scheme=ShuffledScheme( examples=dataset.num_examples, batch_size=batch_size)) load_in_memory = os.path.getsize( hdf5_file) < 14 * 10**9 or which_set == 'test' if not load_in_memory: port = 5557 if which_set == 'train' else 5558 print port server_process = Process(target=start_server, args=(data_stream, port, 10)) server_process.start() data_stream = ServerDataStream(dataset.sources, False, host='localhost', port=port, hwm=10) return data_stream
def main(feature_maps=None, mlp_hiddens=None, conv_sizes=None, pool_sizes=None, batch_size=None, num_batches=None): if feature_maps is None: feature_maps = [32, 48, 64, 96, 96, 128] if mlp_hiddens is None: mlp_hiddens = [1000] if conv_sizes is None: conv_sizes = [9, 7, 5, 3, 2, 1] if pool_sizes is None: pool_sizes = [2, 2, 2, 2, 1, 1] if batch_size is None: batch_size = 64 conv_steps=[2, 1, 1, 1, 1, 1] #same as stride image_size = (128, 128) output_size = 2 learningRate = 0.001 drop_prob = 0.4 weight_noise = 0.75 num_epochs = 150 num_batches = None host_plot='http://*****:*****@ %s' % (graph_name, datetime.datetime.now(), socket.gethostname()), channels=[['train_error_rate', 'valid_error_rate'], ['train_total_gradient_norm']], after_epoch=True, server_url=host_plot)) PLOT_AVAILABLE = True except ImportError: PLOT_AVAILABLE = False extensions.append(Checkpoint(save_to, after_epoch=True, after_training=True, save_separately=['log'])) logger.info("Building the model") model = Model(cost) ########### Loading images ##################### main_loop = MainLoop( algorithm, stream_data_train, model=model, extensions=extensions) main_loop.run()
n_batches = pl_params.n_batches seq_length = pl_params.seq_length # print config.recursion_limit floatX = theano.config.floatX experiment_name = pl_params.experiment_name stream_vars = ( 'upsampled', 'residual', ) train_stream = ServerDataStream(stream_vars, produces_examples=False, port=pl_params.port) valid_stream = ServerDataStream(stream_vars, produces_examples=False, port=pl_params.port + 50) if tbptt_flag: train_stream = SegmentSequence(train_stream, seq_length, add_flag=True) valid_stream = SegmentSequence(valid_stream, seq_length, add_flag=True) #x_tr = next(train_stream.get_epoch_iterator()) ################# # Model #################
def main(): feature_maps = [20, 50] mlp_hiddens = [50] conv_sizes = [5, 5] pool_sizes = [3, 3] save_to = "DvC.pkl" batch_size = 500 image_size = (32, 32) output_size = 2 learningRate = 0.1 num_epochs = 10 num_batches = None host_plot = 'http://*****:*****@ %s' % ('CNN ', datetime.datetime.now(), socket.gethostname()), channels=[['valid_cost', 'valid_error_rate'], ['train_total_gradient_norm']], after_epoch=True, server_url=host_plot)) model = Model(cost) main_loop = MainLoop(algorithm, stream_data_train, model=model, extensions=extensions) main_loop.run()
def run(model_name, port_train, port_valid): running_on_laptop = socket.gethostname() == 'yop' X = tensor.tensor4('image_features', dtype='float32') T = tensor.matrix('targets', dtype='float32') image_border_size = (100, 100) if running_on_laptop: host_plot = 'http://*****:*****@ %s' % (model_name, datetime.datetime.now(), socket.gethostname()), channels=[['loss'], ['error', 'valid_error']], after_epoch=True, server_url=host_plot), Printing(), Checkpoint('/tmp/train_bn2') ] main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, extensions=extensions, model=model) main_loop.run()
def train(port=55557, num_epochs=500, learning_rate=0.01, momentum=0.9, l2_penalty_scale=1e-04, batchsize=500, save_model_file='./params_file.npz', start_with_saved_params=False): print("Loading data...") # Prepare Theano variables for inputs and targets input_var_x = T.tensor4('inputs') input_var_u = T.tensor4('inputs') input_var_v = T.tensor4('inputs') target_var = T.ivector('targets') # Build the model network = build_cnn(input_var_x, input_var_u, input_var_v) print(network_repr.get_network_str( lasagne.layers.get_all_layers(network), get_network=False, incomings=True, outgoings=True)) if start_with_saved_params and os.path.isfile(save_model_file): with np.load(save_model_file) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, param_values) # Create a loss expression for training. prediction = lasagne.layers.get_output(network) l2_penalty = lasagne.regularization.regularize_layer_params( lasagne.layers.get_all_layers(network), lasagne.regularization.l2) * l2_penalty_scale loss = categorical_crossentropy(prediction, target_var) + l2_penalty loss = loss.mean() # Create update expressions for training. params = lasagne.layers.get_all_params(network, trainable=True) print( """ //// Use AdaGrad update schedule for learning rate, see Duchi, Hazan, and Singer (2011) "Adaptive subgradient methods for online learning and stochasitic optimization." JMLR, 12:2121-2159 //// """) updates_adagrad = lasagne.updates.adagrad( loss, params, learning_rate=learning_rate, epsilon=1e-06) print( """ //// Apply Nesterov momentum using Lisa Lab's modifications. //// """) updates = lasagne.updates.apply_nesterov_momentum( updates_adagrad, params, momentum=momentum) # Create a loss expression for validation/testing. Note we do a # deterministic forward pass through the network, disabling dropout. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = categorical_crossentropy(test_prediction, target_var) + \ l2_penalty test_loss = test_loss.mean() # Also create an expression for the classification accuracy: test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function([input_var_x, input_var_u, input_var_v, target_var], loss, updates=updates, allow_input_downcast=True) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var_x, input_var_u, input_var_v, target_var], [test_loss, test_acc], allow_input_downcast=True) print("Starting training...") train_dstream = ServerDataStream(('train',), port=port, produces_examples=False) # # TODO: early stopping logic goes here... # for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() for data in train_dstream.get_epoch_iterator(): _, inputs, targets = data[0], data[1], data[2] inputx, inputu, inputv = split_inputs_xuv(inputs) train_err += train_fn(inputx, inputu, inputv, targets) train_batches += 1 # And a full pass over the validation data: # val_err = 0 # val_acc = 0 # val_batches = 0 # for data in valid_dstream.get_epoch_iterator(): # _, inputs, targets = data[0], data[1], data[2] # inputx, inputu, inputv = split_inputs_xuv(inputs) # err, acc = val_fn(inputx, inputu, inputv, targets) # val_err += err # val_acc += acc # val_batches += 1 # Dump the current network weights to file np.savez(save_model_file, *lasagne.layers.get_all_param_values(network)) # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) # print(" validation loss:\t\t{:.6f}".format(val_err / val_batches)) # print(" validation accuracy:\t\t{:.2f} %".format( # val_acc / val_batches * 100)) print("Finished {} epochs.".format(epoch + 1))
import sys from vgg_16 import get_model, build_model from theano import tensor, function, config import lasagne from fuel.streams import ServerDataStream import numpy import pickle from config import basepath submit_stream = ServerDataStream(('features', 'image_name'), produces_examples=False) # tensor X = tensor.ftensor4('images') # build simple vgg model net, layers_names = build_model(X) f_pretrained = open(basepath + 'vgg16.pkl') model_pretrained = pickle.load(f_pretrained) w_pretrained = model_pretrained['param values'] net['mean value'].set_value(model_pretrained['mean value'].astype(config.floatX)) # load weights from lasagne.layers import set_all_param_values with numpy.load('weights/simple_vgg_valid.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] set_all_param_values(net[layers_names[len(layers_names)-1]], param_values[0]) # create predict function prediction_test = lasagne.layers.get_output(net[layers_names[len(layers_names)-1]], deterministic=True)
from resnet_152 import get_model as model_resnet # build model and load weights input_var = tensor.tensor4('X') _, test_prediction, _ = model_resnet(input_var) # create prediction function val_fn = theano.function([input_var], [test_prediction]) # Try for a few data points n_datapoints = 2 from fuel.streams import ServerDataStream import numpy as np train_stream = ServerDataStream(('features', 'labels'), produces_examples=False) labels_count = np.zeros((17,)) mb_count = 0 iterator = train_stream.get_epoch_iterator() data = iterator.next() labels_count += data[1].sum(axis=0) mb_count += 1 feat = np.asarray(data[0][:n_datapoints], dtype=np.float32) pred = val_fn(feat) print('Prediction for the {0} datapoints is : '.format(n_datapoints)) print(pred)
def setUp(self): self.server_process = Process(target=start_server, args=(get_stream(), )) self.server_process.start() self.stream = ServerDataStream(('f', 't'), False)
#import sys #sys.path.append('experiments/simple_vgg/') from .vgg_16 import get_model from theano import tensor, function import lasagne from fuel.streams import ServerDataStream import numpy from utils import f2_score import pickle num_epochs = 50 train_stream = ServerDataStream(('features', 'labels'), produces_examples=False) valid_stream = ServerDataStream(('features', 'labels'), produces_examples=False, port=5558) X = tensor.ftensor4('images') y = tensor.imatrix('targets') prediction_train, prediction_test, params = get_model(X) loss = lasagne.objectives.binary_crossentropy(prediction_train, y) loss = loss.mean() prediction_01 = tensor.ge(prediction_train, numpy.float32(.5)) f2 = f2_score(prediction_01, y) f2_diff = f2_score(prediction_train, y) loss = -f2_diff
## choose model from model.vgg_structured import build_model from blocks.algorithms import GradientDescent, Adam from blocks.graph import ComputationGraph, apply_batch_normalization, get_batch_normalization_updates, apply_dropout from blocks.model import Model from blocks.filter import VariableFilter from blocks.roles import WEIGHT, INPUT # BUILD MODEL images = tensor.ftensor4('images') labels = tensor.ftensor4('labels') cost_dropout, parameters = build_model(images, labels) # LEARN WEIGHTS train_stream = ServerDataStream(('images', 'labels'), False, hwm=10) valid_stream = ServerDataStream(('images', 'labels'), False, hwm=10, port=5558) model = Model(cost_dropout) # ALGORITHM alpha = 0.01 # learning rate of Adam algorithm = GradientDescent(cost=cost_dropout, parameters=parameters, step_rule=Adam(), on_unused_sources='ignore') # EXTENSIONS from blocks.extensions import Printing, Timing from blocks.extensions.training import TrackTheBest from blocks.extensions.monitoring import TrainingDataMonitoring, DataStreamMonitoring from blocks.extensions.stopping import FinishIfNoImprovementAfter
def train_snli_model(new_training_job, config, save_path, params, fast_start, fuel_server, seed, model='simple'): if config['exclude_top_k'] > config['num_input_words'] and config[ 'num_input_words'] > 0: raise Exception("Some words have neither word nor def embedding") c = config logger = configure_logger(name="snli_baseline_training", log_file=os.path.join(save_path, "log.txt")) if not os.path.exists(save_path): logger.info("Start a new job") os.mkdir(save_path) else: logger.info("Continue an existing job") with open(os.path.join(save_path, "cmd.txt"), "w") as f: f.write(" ".join(sys.argv)) # Make data paths nice for path in [ 'dict_path', 'embedding_def_path', 'embedding_path', 'vocab', 'vocab_def', 'vocab_text' ]: if c.get(path, ''): if not os.path.isabs(c[path]): c[path] = os.path.join(fuel.config.data_path[0], c[path]) main_loop_path = os.path.join(save_path, 'main_loop.tar') main_loop_best_val_path = os.path.join(save_path, 'main_loop_best_val.tar') stream_path = os.path.join(save_path, 'stream.pkl') # Save config to save_path json.dump(config, open(os.path.join(save_path, "config.json"), "w")) if model == 'simple': nli_model, data, used_dict, used_retrieval, _ = _initialize_simple_model_and_data( c) elif model == 'esim': nli_model, data, used_dict, used_retrieval, _ = _initialize_esim_model_and_data( c) else: raise NotImplementedError() # Compute cost s1, s2 = T.lmatrix('sentence1'), T.lmatrix('sentence2') if c['dict_path']: assert os.path.exists(c['dict_path']) s1_def_map, s2_def_map = T.lmatrix('sentence1_def_map'), T.lmatrix( 'sentence2_def_map') def_mask = T.fmatrix("def_mask") defs = T.lmatrix("defs") else: s1_def_map, s2_def_map = None, None def_mask = None defs = None s1_mask, s2_mask = T.fmatrix('sentence1_mask'), T.fmatrix('sentence2_mask') y = T.ivector('label') cg = {} for train_phase in [True, False]: # NOTE: Please don't change outputs of cg if train_phase: with batch_normalization(nli_model): pred = nli_model.apply(s1, s1_mask, s2, s2_mask, def_mask=def_mask, defs=defs, s1_def_map=s1_def_map, s2_def_map=s2_def_map, train_phase=train_phase) else: pred = nli_model.apply(s1, s1_mask, s2, s2_mask, def_mask=def_mask, defs=defs, s1_def_map=s1_def_map, s2_def_map=s2_def_map, train_phase=train_phase) cost = CategoricalCrossEntropy().apply(y.flatten(), pred) error_rate = MisclassificationRate().apply(y.flatten(), pred) cg[train_phase] = ComputationGraph([cost, error_rate]) # Weight decay (TODO: Make it less bug prone) if model == 'simple': weights_to_decay = VariableFilter( bricks=[dense for dense, relu, bn in nli_model._mlp], roles=[WEIGHT])(cg[True].variables) weight_decay = np.float32(c['l2']) * sum( (w**2).sum() for w in weights_to_decay) elif model == 'esim': weight_decay = 0.0 else: raise NotImplementedError() final_cost = cg[True].outputs[0] + weight_decay final_cost.name = 'final_cost' # Add updates for population parameters if c.get("bn", True): pop_updates = get_batch_normalization_updates(cg[True]) extra_updates = [(p, m * 0.1 + p * (1 - 0.1)) for p, m in pop_updates] else: pop_updates = [] extra_updates = [] if params: logger.debug("Load parameters from {}".format(params)) with open(params) as src: loaded_params = load_parameters(src) cg[True].set_parameter_values(loaded_params) for param, m in pop_updates: param.set_value(loaded_params[get_brick( param).get_hierarchical_name(param)]) if os.path.exists(os.path.join(save_path, "main_loop.tar")): logger.warning("Manually loading BN stats :(") with open(os.path.join(save_path, "main_loop.tar")) as src: loaded_params = load_parameters(src) for param, m in pop_updates: param.set_value( loaded_params[get_brick(param).get_hierarchical_name(param)]) if theano.config.compute_test_value != 'off': test_value_data = next( data.get_stream('train', batch_size=4).get_epoch_iterator()) s1.tag.test_value = test_value_data[0] s1_mask.tag.test_value = test_value_data[1] s2.tag.test_value = test_value_data[2] s2_mask.tag.test_value = test_value_data[3] y.tag.test_value = test_value_data[4] # Freeze embeddings if not c['train_emb']: frozen_params = [ p for E in nli_model.get_embeddings_lookups() for p in E.parameters ] train_params = [p for p in cg[True].parameters] assert len(set(frozen_params) & set(train_params)) > 0 else: frozen_params = [] if not c.get('train_def_emb', 1): frozen_params_def = [ p for E in nli_model.get_def_embeddings_lookups() for p in E.parameters ] train_params = [p for p in cg[True].parameters] assert len(set(frozen_params_def) & set(train_params)) > 0 frozen_params += frozen_params_def train_params = [p for p in cg[True].parameters if p not in frozen_params] train_params_keys = [ get_brick(p).get_hierarchical_name(p) for p in train_params ] # Optimizer algorithm = GradientDescent(cost=final_cost, on_unused_sources='ignore', parameters=train_params, step_rule=Adam(learning_rate=c['lr'])) algorithm.add_updates(extra_updates) m = Model(final_cost) parameters = m.get_parameter_dict() # Blocks version mismatch logger.info("Trainable parameters" + "\n" + pprint.pformat([(key, parameters[key].get_value().shape) for key in sorted(train_params_keys)], width=120)) logger.info("# of parameters {}".format( sum([ np.prod(parameters[key].get_value().shape) for key in sorted(train_params_keys) ]))) ### Monitored args ### train_monitored_vars = [final_cost] + cg[True].outputs monitored_vars = cg[False].outputs val_acc = monitored_vars[1] to_monitor_names = [ 'def_unk_ratio', 's1_merged_input_rootmean2', 's1_def_mean_rootmean2', 's1_gate_rootmean2', 's1_compose_gate_rootmean2' ] for k in to_monitor_names: train_v, valid_v = VariableFilter(name=k)( cg[True]), VariableFilter(name=k)(cg[False]) if len(train_v): logger.info("Adding {} tracking".format(k)) train_monitored_vars.append(train_v[0]) monitored_vars.append(valid_v[0]) else: logger.warning("Didnt find {} in cg".format(k)) if c['monitor_parameters']: for name in train_params_keys: param = parameters[name] num_elements = numpy.product(param.get_value().shape) norm = param.norm(2) / num_elements grad_norm = algorithm.gradients[param].norm(2) / num_elements step_norm = algorithm.steps[param].norm(2) / num_elements stats = tensor.stack(norm, grad_norm, step_norm, step_norm / grad_norm) stats.name = name + '_stats' train_monitored_vars.append(stats) regular_training_stream = data.get_stream('train', batch_size=c['batch_size'], seed=seed) if fuel_server: # the port will be configured by the StartFuelServer extension training_stream = ServerDataStream( sources=regular_training_stream.sources, hwm=100, produces_examples=regular_training_stream.produces_examples) else: training_stream = regular_training_stream ### Build extensions ### extensions = [ # Load(main_loop_path, load_iteration_state=True, load_log=True) # .set_conditions(before_training=not new_training_job), StartFuelServer(regular_training_stream, stream_path, hwm=100, script_path=os.path.join( os.path.dirname(__file__), "../bin/start_fuel_server.py"), before_training=fuel_server), Timing(every_n_batches=c['mon_freq']), ProgressBar(), RetrievalPrintStats(retrieval=used_retrieval, every_n_batches=c['mon_freq_valid'], before_training=not fast_start), Timestamp(), TrainingDataMonitoring(train_monitored_vars, prefix="train", every_n_batches=c['mon_freq']), ] if c['layout'] == 'snli': validation = DataStreamMonitoring(monitored_vars, data.get_stream('valid', batch_size=14, seed=seed), before_training=not fast_start, on_resumption=True, after_training=True, every_n_batches=c['mon_freq_valid'], prefix='valid') extensions.append(validation) elif c['layout'] == 'mnli': validation = DataStreamMonitoring(monitored_vars, data.get_stream('valid_matched', batch_size=14, seed=seed), every_n_batches=c['mon_freq_valid'], on_resumption=True, after_training=True, prefix='valid_matched') validation_mismatched = DataStreamMonitoring( monitored_vars, data.get_stream('valid_mismatched', batch_size=14, seed=seed), every_n_batches=c['mon_freq_valid'], before_training=not fast_start, on_resumption=True, after_training=True, prefix='valid_mismatched') extensions.extend([validation, validation_mismatched]) else: raise NotImplementedError() # Similarity trackers for embeddings if len(c.get('vocab_def', '')): retrieval_vocab = Vocabulary(c['vocab_def']) else: retrieval_vocab = data.vocab retrieval_all = Retrieval(vocab_text=retrieval_vocab, dictionary=used_dict, max_def_length=c['max_def_length'], exclude_top_k=0, max_def_per_word=c['max_def_per_word']) for name in [ 's1_word_embeddings', 's1_dict_word_embeddings', 's1_translated_word_embeddings' ]: variables = VariableFilter(name=name)(cg[False]) if len(variables): s1_emb = variables[0] logger.info("Adding similarity tracking for " + name) # A bit sloppy about downcast if "dict" in name: embedder = construct_dict_embedder(theano.function( [s1, defs, def_mask, s1_def_map], s1_emb, allow_input_downcast=True), vocab=data.vocab, retrieval=retrieval_all) extensions.append( SimilarityWordEmbeddingEval( embedder=embedder, prefix=name, every_n_batches=c['mon_freq_valid'], before_training=not fast_start)) else: embedder = construct_embedder(theano.function( [s1], s1_emb, allow_input_downcast=True), vocab=data.vocab) extensions.append( SimilarityWordEmbeddingEval( embedder=embedder, prefix=name, every_n_batches=c['mon_freq_valid'], before_training=not fast_start)) track_the_best = TrackTheBest(validation.record_name(val_acc), before_training=not fast_start, every_n_epochs=c['save_freq_epochs'], after_training=not fast_start, every_n_batches=c['mon_freq_valid'], choose_best=min) extensions.append(track_the_best) # Special care for serializing embeddings if len(c.get('embedding_path', '')) or len(c.get('embedding_def_path', '')): extensions.insert( 0, LoadNoUnpickling(main_loop_path, load_iteration_state=True, load_log=True).set_conditions( before_training=not new_training_job)) extensions.append( Checkpoint(main_loop_path, parameters=train_params + [p for p, m in pop_updates], save_main_loop=False, save_separately=['log', 'iteration_state'], before_training=not fast_start, every_n_epochs=c['save_freq_epochs'], after_training=not fast_start).add_condition( ['after_batch', 'after_epoch'], OnLogRecord(track_the_best.notification_name), (main_loop_best_val_path, ))) else: extensions.insert( 0, Load(main_loop_path, load_iteration_state=True, load_log=True).set_conditions( before_training=not new_training_job)) extensions.append( Checkpoint(main_loop_path, parameters=cg[True].parameters + [p for p, m in pop_updates], before_training=not fast_start, every_n_epochs=c['save_freq_epochs'], after_training=not fast_start).add_condition( ['after_batch', 'after_epoch'], OnLogRecord(track_the_best.notification_name), (main_loop_best_val_path, ))) extensions.extend([ DumpCSVSummaries(save_path, every_n_batches=c['mon_freq_valid'], after_training=True), DumpTensorflowSummaries(save_path, after_epoch=True, every_n_batches=c['mon_freq_valid'], after_training=True), Printing(every_n_batches=c['mon_freq_valid']), PrintMessage(msg="save_path={}".format(save_path), every_n_batches=c['mon_freq']), FinishAfter(after_n_batches=c['n_batches']).add_condition( ['after_batch'], OnLogStatusExceed('iterations_done', c['n_batches'])) ]) logger.info(extensions) ### Run training ### if "VISDOM_SERVER" in os.environ: print("Running visdom server") ret = subprocess.Popen([ os.path.join(os.path.dirname(__file__), "../visdom_plotter.py"), "--visdom-server={}".format(os.environ['VISDOM_SERVER']), "--folder={}".format(save_path) ]) time.sleep(0.1) if ret.returncode is not None: raise Exception() atexit.register(lambda: os.kill(ret.pid, signal.SIGINT)) model = Model(cost) for p, m in pop_updates: model._parameter_dict[get_brick(p).get_hierarchical_name(p)] = p main_loop = MainLoop(algorithm, training_stream, model=model, extensions=extensions) assert os.path.exists(save_path) main_loop.run()
def train_language_model(new_training_job, config, save_path, params, fast_start, fuel_server, seed): c = config if seed: fuel.config.default_seed = seed blocks.config.config.default_seed = seed data, lm, retrieval = initialize_data_and_model(config) # full main loop can be saved... main_loop_path = os.path.join(save_path, 'main_loop.tar') # or only state (log + params) which can be useful not to pickle embeddings state_path = os.path.join(save_path, 'training_state.tar') stream_path = os.path.join(save_path, 'stream.pkl') best_tar_path = os.path.join(save_path, "best_model.tar") words = tensor.ltensor3('words') words_mask = tensor.matrix('words_mask') if theano.config.compute_test_value != 'off': test_value_data = next( data.get_stream('train', batch_size=4, max_length=5).get_epoch_iterator()) words.tag.test_value = test_value_data[0] words_mask.tag.test_value = test_value_data[1] costs, updates = lm.apply(words, words_mask) cost = rename(costs.mean(), 'mean_cost') cg = Model(cost) if params: logger.debug("Load parameters from {}".format(params)) with open(params) as src: cg.set_parameter_values(load_parameters(src)) length = rename(words.shape[1], 'length') perplexity, = VariableFilter(name='perplexity')(cg) perplexities = VariableFilter(name_regex='perplexity.*')(cg) monitored_vars = [length, cost] + perplexities if c['dict_path']: num_definitions, = VariableFilter(name='num_definitions')(cg) monitored_vars.extend([num_definitions]) parameters = cg.get_parameter_dict() trained_parameters = parameters.values() saved_parameters = parameters.values() if c['embedding_path']: logger.debug("Exclude word embeddings from the trained parameters") trained_parameters = [ p for p in trained_parameters if not p == lm.get_def_embeddings_params() ] saved_parameters = [ p for p in saved_parameters if not p == lm.get_def_embeddings_params() ] if c['cache_size'] != 0: logger.debug("Enable fake recursivity for looking up embeddings") trained_parameters = [ p for p in trained_parameters if not p == lm.get_cache_params() ] logger.info("Cost parameters" + "\n" + pprint.pformat([ " ".join( (key, str(parameters[key].get_value().shape), 'trained' if parameters[key] in trained_parameters else 'frozen')) for key in sorted(parameters.keys()) ], width=120)) rules = [] if c['grad_clip_threshold']: rules.append(StepClipping(c['grad_clip_threshold'])) rules.append(Adam(learning_rate=c['learning_rate'], beta1=c['momentum'])) algorithm = GradientDescent(cost=cost, parameters=trained_parameters, step_rule=CompositeRule(rules)) if c['cache_size'] != 0: algorithm.add_updates(updates) train_monitored_vars = list(monitored_vars) if c['grad_clip_threshold']: train_monitored_vars.append(algorithm.total_gradient_norm) word_emb_RMS, = VariableFilter(name='word_emb_RMS')(cg) main_rnn_in_RMS, = VariableFilter(name='main_rnn_in_RMS')(cg) train_monitored_vars.extend([word_emb_RMS, main_rnn_in_RMS]) if c['monitor_parameters']: train_monitored_vars.extend(parameter_stats(parameters, algorithm)) # We use a completely random seed on purpose. With Fuel server # it's currently not possible to restore the state of the training # stream. That's why it's probably better to just have it stateless. stream_seed = numpy.random.randint(0, 10000000) if fuel_server else None training_stream = data.get_stream('train', batch_size=c['batch_size'], max_length=c['max_length'], seed=stream_seed) valid_stream = data.get_stream('valid', batch_size=c['batch_size_valid'], max_length=c['max_length'], seed=stream_seed) original_training_stream = training_stream if fuel_server: # the port will be configured by the StartFuelServer extension training_stream = ServerDataStream( sources=training_stream.sources, produces_examples=training_stream.produces_examples) validation = DataStreamMonitoring(monitored_vars, valid_stream, prefix="valid").set_conditions( before_first_epoch=not fast_start, on_resumption=True, every_n_batches=c['mon_freq_valid']) track_the_best = TrackTheBest(validation.record_name(perplexity), choose_best=min).set_conditions( on_resumption=True, after_epoch=True, every_n_batches=c['mon_freq_valid']) # don't save them the entire main loop to avoid pickling everything if c['fast_checkpoint']: load = (LoadNoUnpickling(state_path, load_iteration_state=True, load_log=True).set_conditions( before_training=not new_training_job)) cp_args = { 'save_main_loop': False, 'save_separately': ['log', 'iteration_state'], 'parameters': saved_parameters } checkpoint = Checkpoint(state_path, before_training=not fast_start, every_n_batches=c['save_freq_batches'], after_training=not fast_start, **cp_args) if c['checkpoint_every_n_batches']: intermediate_cp = IntermediateCheckpoint( state_path, every_n_batches=c['checkpoint_every_n_batches'], after_training=False, **cp_args) else: load = (Load(main_loop_path, load_iteration_state=True, load_log=True).set_conditions( before_training=not new_training_job)) cp_args = { 'save_separately': ['iteration_state'], 'parameters': saved_parameters } checkpoint = Checkpoint(main_loop_path, before_training=not fast_start, every_n_batches=c['save_freq_batches'], after_training=not fast_start, **cp_args) if c['checkpoint_every_n_batches']: intermediate_cp = IntermediateCheckpoint( main_loop_path, every_n_batches=c['checkpoint_every_n_batches'], after_training=False, **cp_args) checkpoint = checkpoint.add_condition( ['after_batch', 'after_epoch'], OnLogRecord(track_the_best.notification_name), (best_tar_path, )) extensions = [ load, StartFuelServer(original_training_stream, stream_path, before_training=fuel_server), Timing(every_n_batches=c['mon_freq_train']) ] if retrieval: extensions.append( RetrievalPrintStats(retrieval=retrieval, every_n_batches=c['mon_freq_train'], before_training=not fast_start)) extensions.extend([ TrainingDataMonitoring(train_monitored_vars, prefix="train", every_n_batches=c['mon_freq_train']), validation, track_the_best, checkpoint ]) if c['checkpoint_every_n_batches']: extensions.append(intermediate_cp) extensions.extend([ DumpTensorflowSummaries(save_path, every_n_batches=c['mon_freq_train'], after_training=True), Printing(on_resumption=True, every_n_batches=c['mon_freq_train']), FinishIfNoImprovementAfter(track_the_best.notification_name, iterations=50 * c['mon_freq_valid'], every_n_batches=c['mon_freq_valid']), FinishAfter(after_n_batches=c['n_batches']) ]) logger.info("monitored variables during training:" + "\n" + pprint.pformat(train_monitored_vars, width=120)) logger.info("monitored variables during valid:" + "\n" + pprint.pformat(monitored_vars, width=120)) main_loop = MainLoop(algorithm, training_stream, model=Model(cost), extensions=extensions) main_loop.run()
def main(): x = T.tensor3('features') m = T.matrix('features_mask') y = T.imatrix('targets') embedding_size = 300 glove_version = "glove.6B.300d.txt" #embedding_size = 50 #glove_version = "vectors.6B.50d.txt" o = x.sum(axis=1) + m.mean() * 0 score_layer = Linear( input_dim = 300, output_dim = 1, weights_init = IsotropicGaussian(std=0.02), biases_init = Constant(0.), name="linear2") score_layer.initialize() o = score_layer.apply(o) probs = Sigmoid().apply(o) cost = - (y * T.log(probs) + (1-y) * T.log(1 - probs)).mean() cost.name = 'cost' misclassification = (y * (probs < 0.5) + (1-y) * (probs > 0.5)).mean() misclassification.name = 'misclassification' # ================= cg = ComputationGraph([cost]) params = cg.parameters algorithm = GradientDescent( cost = cg.outputs[0], params=params, step_rule = CompositeRule([ StepClipping(threshold=4), AdaM(), ]) ) # ======== print "setting up data" ports = { 'gpu0_train' : 5557, 'gpu0_test' : 5558, 'gpu1_train' : 5559, 'gpu1_test' : 5560, } #batch_size = 16 batch_size = 16 def start_server(port, which_set): fuel.server.logger.setLevel('WARN') dataset = IMDBText(which_set, sorted=True) n_train = dataset.num_examples #scheme = ShuffledScheme(examples=n_train, batch_size=batch_size) scheme = BatchwiseShuffledScheme(examples=n_train, batch_size=batch_size) stream = DataStream( dataset=dataset, iteration_scheme=scheme) print "loading glove" glove = GloveTransformer(glove_version, data_stream=stream) padded = Padding( data_stream=glove, mask_sources=('features',) ) fuel.server.start_server(padded, port=port, hwm=20) train_port = ports[theano.config.device + '_train'] train_p = Process(target=start_server, args=(train_port, 'train')) train_p.start() test_port = ports[theano.config.device + '_test'] test_p = Process(target=start_server, args=(test_port, 'test')) test_p.start() train_stream = ServerDataStream(('features', 'features_mask', 'targets'), port=train_port) test_stream = ServerDataStream(('features', 'features_mask', 'targets'), port=test_port) print "setting up model" n_examples = 25000 #====== model = Model(cost) extensions = [] extensions.append(EpochProgress(batch_per_epoch=n_examples // batch_size + 1)) extensions.append(TrainingDataMonitoring( [ cost, misclassification, ], prefix='train', after_epoch=True )) #extensions.append(DataStreamMonitoring( #[cost, misclassification], #data_stream=test_stream, #prefix='test', #after_epoch=True #)) extensions.append(Timing()) extensions.append(Printing()) extensions.append(Plot( theano.config.device+"_result", channels=[['train_cost']], after_epoch=True )) main_loop = MainLoop( model=model, data_stream=train_stream, algorithm=algorithm, extensions=extensions) main_loop.run()
stream = Random2DRotation(stream, which_sources=('image_features',)) # Data Transformation stream = ScaleAndShift(stream, 1./255, 0, which_sources=('image_features',)) stream = Cast(stream, dtype='float32', which_sources=('image_features',)) return stream if mode is "CPU_test": data_train_stream = create_data(DogsVsCats(('train',), subset=slice(0, 100))) data_valid_stream = create_data(DogsVsCats(('train',), subset=slice(100, 110))) if mode is "GPU_run": data_train_stream = create_data(DogsVsCats(('train',), subset=slice(0, 22500))) data_valid_stream = create_data(DogsVsCats(('train',), subset=slice(22500, 25000))) if mode is "data_server": data_train_stream = ServerDataStream(('image_features','targets'), False, port=5560) data_valid_stream = ServerDataStream(('image_features','targets'), False, port=5561) ### Setting up the model probs = top_mlp.apply(conv_out) cost = CategoricalCrossEntropy().apply(y.flatten(), probs).copy(name='cost') error = MisclassificationRate().apply(y.flatten(), probs) error_rate = error.copy(name='error_rate') error_rate2 = error.copy(name='error_rate2') cg = ComputationGraph([cost, error_rate]) ### Gradient Descent algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Scale(learning_rate=learning_rate))
def run(get_model, model_name): train_stream = ServerDataStream( ('cases', 'image_features', 'image_targets', 'multiplier'), False, hwm=10) valid_stream = ServerDataStream( ('cases', 'image_features', 'image_targets', 'multiplier'), False, hwm=10, port=5558) input_var = tensor.tensor4('image_features') target_var = tensor.tensor4('image_targets') multiply_var = tensor.matrix('multiplier') multiply_var = T.addbroadcast(multiply_var, 1) test_prediction, prediction, params = get_model(input_var, target_var, multiply_var) loss = binary_crossentropy(prediction, target_var).mean() loss.name = 'loss' valid_error = T.neq((test_prediction > 0.5) * 1., target_var).mean() valid_error.name = 'error' scale = Scale(0.1) algorithm = GradientDescent( cost=loss, parameters=params, step_rule=scale, #step_rule=Adam(), on_unused_sources='ignore') host_plot = 'http://localhost:5006' extensions = [ Timing(), TrainingDataMonitoring([loss], after_epoch=True), DataStreamMonitoring(variables=[loss, valid_error], data_stream=valid_stream, prefix="valid"), Plot('%s %s %s' % (model_name, datetime.date.today(), time.strftime('%H:%M')), channels=[['loss', 'valid_loss'], ['valid_error']], after_epoch=True, server_url=host_plot), Printing(), # Checkpoint('train'), FinishAfter(after_n_epochs=10) ] main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, extensions=extensions) cg = ComputationGraph(test_prediction) while True: main_loop.run() scale.learning_rate.set_value( numpy.float32(scale.learning_rate.get_value() * 0.7)) numpy.savez('best_weights.npz', [param.get_value() for param in cg.shared_variables])
outf = Flattener().apply(out_soft3) predict3 = NDimensionalSoftmax().apply(outf) cost3 = CategoricalCrossEntropy().apply(y.flatten(), predict3).copy(name='cost3') cost = cost3 + 0.3 * cost2 + 0.3 * cost1 cost = cost.copy(name='cost') error = MisclassificationRate().apply(y.flatten(), predict3) #Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason) error_rate = error.copy(name='error_rate') error_rate2 = error.copy(name='error_rate2') cg = ComputationGraph([cost, error_rate]) ########### GET THE DATA ##################### stream_train = ServerDataStream(('image_features', 'targets'), False, port=5652, hwm=50) stream_valid = ServerDataStream(('image_features', 'targets'), False, port=5653, hwm=50) ########### DEFINE THE ALGORITHM ############# track_cost = TrackTheBest("cost", after_epoch=True, after_batch=False) algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Momentum(learning_rate=0.0001, momentum=0.9)) extensions = [ Timing(), FinishAfter(after_n_epochs=num_epochs),
def run(get_model, model_name): train_stream = ServerDataStream(('cases', 'image_position', 'multiplier', 'sax', 'sax_features', 'targets'), False, hwm=10) valid_stream = ServerDataStream(('cases', 'image_position', 'multiplier', 'sax', 'sax_features', 'targets'), False, hwm=10, port=5558) ftensor5 = tensor.TensorType('float32', (False, ) * 5) input_var = ftensor5('sax_features') target_var = tensor.matrix('targets') multiply_var = tensor.matrix('multiplier') multiply_var = T.addbroadcast(multiply_var, 1) prediction, test_prediction, test_pred_mid, params_bottom, params_top = get_model( input_var, multiply_var) # load parameters cg = ComputationGraph(test_pred_mid) params_val = numpy.load('sunnybrook/best_weights.npz') for p, value in zip(cg.shared_variables, params_val['arr_0']): p.set_value(value) crps = tensor.abs_(test_prediction - target_var).mean() loss = squared_error(prediction, target_var).mean() loss.name = 'loss' crps.name = 'crps' algorithm = GradientDescent(cost=loss, parameters=params_top, step_rule=Adam(), on_unused_sources='ignore') host_plot = 'http://localhost:5006' extensions = [ Timing(), TrainingDataMonitoring([loss], after_epoch=True), DataStreamMonitoring(variables=[crps, loss], data_stream=valid_stream, prefix="valid"), Plot('%s %s %s' % (model_name, datetime.date.today(), time.strftime('%H:%M')), channels=[['loss', 'valid_loss'], ['valid_crps']], after_epoch=True, server_url=host_plot), Printing(), Checkpoint('train'), FinishAfter(after_n_epochs=20) ] main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm, extensions=extensions) main_loop.run()
def _push_allocation_config(self): self.conv_sequence._push_allocation_config() conv_out_dim = self.conv_sequence.get_dim('output') self.top_mlp.activations = self.top_mlp_activations self.top_mlp.dims = [numpy.prod(conv_out_dim)] + self.top_mlp_dims #Generating input and target variables x = tensor.tensor4('image_features') y = tensor.lmatrix('targets') #Load Data stream_train = ServerDataStream(('image_features', 'targets'), False, port=5556) stream_valid = ServerDataStream(('image_features', 'targets'), False, port=5557) #stream_test = ServerDataStream(('image_features','targets'), False, port=5558) # Init an instance of the convnet convnet = LeNet(conv_activations, num_channels, image_shape, filter_sizes=filter_sizes, feature_maps=feature_maps, pooling_sizes=pooling_sizes, top_mlp_activations=mlp_activations, top_mlp_dims=mlp_hiddens + [output_size],
def predict(port, l2_penalty_scale, save_model_file='./params_file.npz', batchsize=500, load_in_memory=False, be_verbose=False): print("Loading data for prediction...") # extract timestamp from model file - assume it is the first set of numbers # otherwise just use "now" import re import time tstamp = str(time.time()).split('.')[0] m = re.search(r"[0-9]+", save_model_file) if m: tstamp = m.group(0) # Prepare Theano variables for inputs and targets input_var_x = T.tensor4('inputs') input_var_u = T.tensor4('inputs') input_var_v = T.tensor4('inputs') target_var = T.ivector('targets') # Build the model network = build_cnn(input_var_x, input_var_u, input_var_v) with np.load(save_model_file) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(network, param_values) # Create a loss expression for testing. test_prediction = lasagne.layers.get_output(network, deterministic=True) l2_penalty = lasagne.regularization.regularize_layer_params( lasagne.layers.get_all_layers(network), lasagne.regularization.l2) * l2_penalty_scale test_loss = categorical_crossentropy(test_prediction, target_var) + \ l2_penalty test_loss = test_loss.mean() # Also create an expression for the classification accuracy: test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) # Look at the classifications test_prediction_values = T.argmax(test_prediction, axis=1) # Compile a function computing the validation loss and accuracy: val_fn = theano.function([input_var_x, input_var_u, input_var_v, target_var], [test_loss, test_acc], allow_input_downcast=True) # Compute the actual predictions - also instructive is to look at # `test_prediction` as an output (array of softmax probabilities) # (but that prints a _lot_ of stuff to screen...) pred_fn = theano.function([input_var_x, input_var_u, input_var_v], [test_prediction_values], allow_input_downcast=True) # don't `produces_examples`, produce batches test_dstream = ServerDataStream(('test',), port=port, produces_examples=False) # look at some concrete predictions targ_numbers = [1, 2, 3, 4, 5] pred_target = np.array([0, 0, 0, 0, 0]) true_target = np.array([0, 0, 0, 0, 0]) targs_mat = np.zeros(11 * 11).reshape(11, 11) for data in test_dstream.get_epoch_iterator(): _, inputs, targets = data[0], data[1], data[2] inputx, inputu, inputv = split_inputs_xuv(inputs) pred = pred_fn(inputx, inputu, inputv) pred_targ = zip(pred[0], targets) if be_verbose: print("(prediction, true target):", pred_targ) print("----------------") for p, t in pred_targ: targs_mat[t][p] += 1 if t in targ_numbers: true_target[t-1] += 1 if p == t: pred_target[p-1] += 1 acc_target = 100.0 * pred_target / true_target.astype('float32') perf_file = 'perfmat' + tstamp + '.npy' np.save(perf_file, targs_mat) # compute and print the test error: test_err = 0 test_acc = 0 test_batches = 0 for data in test_dstream.get_epoch_iterator(): _, inputs, targets = data[0], data[1], data[2] inputx, inputu, inputv = split_inputs_xuv(inputs) err, acc = val_fn(inputx, inputu, inputv, targets) test_err += err test_acc += acc test_batches += 1 print("Final results:") print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches)) print(" test accuracy:\t\t{:.2f} %".format( test_acc / test_batches * 100)) for i, v in enumerate(acc_target): print(" target {} accuracy:\t\t\t{:.3f} %".format( (i + 1), acc_target[i]))
lr = 10**(2 * numpy.random.rand() - 5) config.recursion_limit = 100000 floatX = theano.config.floatX #job_id = 5557 job_id = int(sys.argv[1]) save_dir = os.environ['FUEL_DATA_PATH'] save_dir = os.path.join(save_dir, '..', 'results/', 'blizzard/') experiment_name = 'deep_l0_{}_{}'.format(job_id, lr) train_stream = ServerDataStream(( 'upsampled', 'residual', ), produces_examples=False, port=job_id) valid_stream = ServerDataStream(( 'upsampled', 'residual', ), produces_examples=False, port=job_id + 50) ################# # Model ################# x = tensor.tensor3('upsampled') y = tensor.tensor3('residual')
def _push_allocation_config(self): self.conv_sequence._push_allocation_config() conv_out_dim = self.conv_sequence.get_dim('output') self.top_mlp.activations = self.top_mlp_activations self.top_mlp.dims = [numpy.prod(conv_out_dim)] + self.top_mlp_dims #Generating input and target variables x = tensor.tensor4('image_features') y = tensor.lmatrix('targets') #Load Data #stream_train = ServerDataStream(('image_features','targets'), False, port=5556) #stream_valid = ServerDataStream(('image_features','targets'), False, port=5557) stream_test = ServerDataStream(('image_features', 'targets'), False, port=5558) # Init an instance of the convnet convnet = LeNet(conv_activations, num_channels, image_shape, filter_sizes=filter_sizes, feature_maps=feature_maps, pooling_sizes=pooling_sizes, top_mlp_activations=mlp_activations, top_mlp_dims=mlp_hiddens + [output_size], conv_step=conv_step, border_mode=border_mode, weights_init=Uniform(width=0.2), biases_init=Constant(0))