Exemplo n.º 1
0
def main(name, num_epochs):
    train_stream = ServerDataStream(('features', 'labels'),
                                    produces_examples=False)

    valid_stream = ServerDataStream(('features', 'labels'),
                                    produces_examples=False,
                                    port=5558)

    X = tensor.ftensor4('images')
    y = tensor.imatrix('targets')

    prediction_train, prediction_test, params = get_model(X)

    loss = lasagne.objectives.binary_crossentropy(prediction_train, y)
    loss = loss.mean()

    prediction_01 = tensor.ge(prediction_train, numpy.float32(.5))
    f2 = f2_score(prediction_01, y)
    f2_diff = f2_score(prediction_train, y)
    loss = -f2_diff

    updates = lasagne.updates.nesterov_momentum(loss,
                                                params,
                                                learning_rate=1e-3,
                                                momentum=0.9)

    train_fn = function([X, y], loss, updates=updates)
    valid_fn = function([X, y], f2)

    best_valid_score = 0
    patience = 0
    all_train_loss = []
    iteration = 0
    for epoch in range(num_epochs):
        f2_valid_loss = []
        f2_train_loss = []
        for imgs, targets in train_stream.get_epoch_iterator():
            f2_train_loss.append(train_fn(imgs, targets))
            iteration += 1
        all_train_loss.append(f2_train_loss)
        train_score = -numpy.mean(numpy.asarray(f2_train_loss))
        print('Iteration %d' % (iteration, ))
        print('train score : {0}'.format(train_score))
        for imgs, targets in valid_stream.get_epoch_iterator():
            f2_valid_loss.append(valid_fn(imgs, targets))
        valid_score = numpy.mean(numpy.asarray(f2_valid_loss))
        print('valid score : {0}'.format(valid_score))
        if best_valid_score < valid_score:
            best_valid_score = valid_score
            patience = 0
            param_values = [p.get_value() for p in params]
            numpy.savez_compressed('%s.npz' % (name, ), param_values)
            pickle.dump(all_train_loss, open('%s.pkl' % (name, ), 'wb'))
        else:
            patience += 1
            if patience == 5:
                break
        print('patience : {0}'.format(patience))
        print('\n')
Exemplo n.º 2
0
def run(model_name):

    running_on_laptop = socket.gethostname() == 'yop'

    X = tensor.tensor4('image_features', dtype='float32')
    T = tensor.matrix('targets', dtype='float32')

    image_border_size = 100

    if running_on_laptop:
        host_plot = 'http://*****:*****@ %s' %
             (model_name, datetime.datetime.now(), socket.gethostname()),
             channels=[['loss', 'valid_loss_test'], ['valid_error']],
             after_epoch=True,
             server_url=host_plot),
        Printing(),
        Checkpoint('train2')
    ]

    main_loop = MainLoop(data_stream=train_stream,
                         algorithm=algorithm,
                         extensions=extensions)
    main_loop.run()
Exemplo n.º 3
0
    def __init__(self):

        ImageNet_Base.__init__(self)

        data_stream_train = ServerDataStream(('filenames',), False,
                                             port=self.port_train)
        self.get_epoch_train = data_stream_train.get_epoch_iterator

        data_stream_val = ServerDataStream(('filenames',), False,
                                           port=self.port_val)
        self.get_epoch_val = data_stream_val.get_epoch_iterator
Exemplo n.º 4
0
    def __init__(self, ports, config, *args, **kwargs):
        """"""
        self.config = config
        self.host = config.data_server.host
        self.hwm = config.data_server.hwm

        # open streams
        self.data_streams = {}
        for target, dset_ports in ports.iteritems():
            self.data_streams[target] = {}
            for dset, port in dset_ports.iteritems():
                self.data_streams[target][dset] = ServerDataStream(
                    sources=('raw'), produces_examples=True,
                    port=port, host=self.host, hwm=self.hwm
                )

        # initiate epoch iterators
        self.epoch_iterators = self._init_epoch_iterators()

        # assign instance method
        self.dset_size = {}
        for target in config.target:
            self.dset_size[target] = {}
            self.dset_size[target]['train'] = eval(
                'self.config.paths.meta_data.size.{}.train'.format(target))
            self.dset_size[target]['valid'] = eval(
                'self.config.paths.meta_data.size.{}.valid'.format(target))

        # get n_iteration
        self.n_iter = sum([d['train'] for d in self.dset_size.values()])
        self.n_iter = int(self.n_iter / config.hyper_parameters.batch_size)
Exemplo n.º 5
0
def fork_to_background(make_datastream, sources):
    port = get_open_port()
    proc = Process(target=on_thread, args=(make_datastream, port))
    proc.start()
    datastream = ServerDataStream(sources,
                                  port=port,
                                  hwm=hwm,
                                  produces_examples=False)
    return datastream, proc
Exemplo n.º 6
0
def test_server():
    server_process = Process(target=start_server, args=(get_stream(), ))
    server_process.start()
    try:
        server_data = ServerDataStream(('f', 't')).get_epoch_iterator()
        expected_data = get_stream().get_epoch_iterator()
        for _, s, e in zip(range(3), server_data, expected_data):
            for data in zip(s, e):
                assert_allclose(*data)
        assert_raises(StopIteration, next, server_data)
    finally:
        server_process.terminate()
Exemplo n.º 7
0
def main(args):
    print(args)
    parser = argparse.ArgumentParser(description='train')
    parser.add_argument('-p', '--parallel', action='store_true')
    parser.add_argument('-m', '--mnist', action='store_true')
    parser.add_argument('--L1', type=float)
    parser.add_argument('--L2', type=float)
    parser.add_argument('-e', '--early_stopping', action='store_true')
    parser.add_argument('-d', '--dropout', action='store_true')
    parser.add_argument('-j', '--jobid')
    parser.add_argument('-s', '--small', action='store_true')
    parser.add_argument('-u', '--update', choices=["rmsprop"])
    parser.add_argument('-f', '--finish', type=int)
    parser.add_argument('-t', '--duration', type=int)
    parser.add_argument('-a', '--augmentation', action='store_true')
    parser.add_argument('--port', default=5557, type=int)
    args = parser.parse_args(args)

    image_size = (128, 128)

    if args.mnist:
        train, test = get_mnist()
        net = net_mnist()
    else:
        net = net_dvc(image_size)
        if args.parallel:
            sources = ('image_features', 'targets')
            train = ServerDataStream(sources, True, port=args.port)
            valid = ServerDataStream(sources, True, port=args.port + 1)
            test = ServerDataStream(sources, True, port=args.port + 2)
        else:
            train, valid, test = get_dvc(image_size,
                                         shortcut=args.small,
                                         augmentation=args.augmentation)

    train_net(net, train, test, **vars(args))
Exemplo n.º 8
0
def stream_from_file(sources, filename, *args):
    port = get_open_port()
    proc = Popen(['python', filename, str(port)] + list(args),
                 env=dict(os.environ, THEANO_FLAGS='device=cpu'))
    stream = ServerDataStream(sources,
                              port=port,
                              hwm=50,
                              produces_examples=False)

    def term():
        if proc:
            proc.kill()

    atexit.register(term)

    return stream, proc
Exemplo n.º 9
0
def get_stream(hdf5_file, which_set, batch_size=None):
    dataset = TrajectoryDataset(which_sets=(which_set, ))
    if batch_size == None:
        batch_size = dataset.num_examples
    data_stream = DataStream(dataset=dataset,
                             iteration_scheme=ShuffledScheme(
                                 examples=dataset.num_examples,
                                 batch_size=batch_size))

    load_in_memory = os.path.getsize(
        hdf5_file) < 14 * 10**9 or which_set == 'test'
    if not load_in_memory:
        port = 5557 if which_set == 'train' else 5558
        print port
        server_process = Process(target=start_server,
                                 args=(data_stream, port, 10))
        server_process.start()
        data_stream = ServerDataStream(dataset.sources,
                                       False,
                                       host='localhost',
                                       port=port,
                                       hwm=10)

    return data_stream
def main(feature_maps=None, mlp_hiddens=None,
         conv_sizes=None, pool_sizes=None, batch_size=None,
         num_batches=None):
    if feature_maps is None:
        feature_maps = [32, 48, 64, 96, 96, 128]
    if mlp_hiddens is None:
        mlp_hiddens = [1000]
    if conv_sizes is None:
        conv_sizes = [9, 7, 5, 3, 2, 1]
    if pool_sizes is None:
        pool_sizes = [2, 2, 2, 2, 1, 1]
    if batch_size is None:
        batch_size = 64
    conv_steps=[2, 1, 1, 1, 1, 1] #same as stride
    image_size = (128, 128)
    output_size = 2
    learningRate = 0.001
    drop_prob = 0.4
    weight_noise = 0.75
    num_epochs = 150
    num_batches = None
    host_plot='http://*****:*****@ %s' % (graph_name, datetime.datetime.now(), socket.gethostname()),
                                channels=[['train_error_rate', 'valid_error_rate'],
                                 ['train_total_gradient_norm']], after_epoch=True, server_url=host_plot))
            PLOT_AVAILABLE = True
        except ImportError:
            PLOT_AVAILABLE = False
        extensions.append(Checkpoint(save_to, after_epoch=True, after_training=True, save_separately=['log']))


    logger.info("Building the model")

    model = Model(cost)

    ########### Loading images #####################
    main_loop = MainLoop(
        algorithm,
        stream_data_train,
        model=model,
        extensions=extensions)

    main_loop.run()
Exemplo n.º 11
0
n_batches = pl_params.n_batches
seq_length = pl_params.seq_length

# print config.recursion_limit
floatX = theano.config.floatX

experiment_name = pl_params.experiment_name

stream_vars = (
    'upsampled',
    'residual',
)

train_stream = ServerDataStream(stream_vars,
                                produces_examples=False,
                                port=pl_params.port)

valid_stream = ServerDataStream(stream_vars,
                                produces_examples=False,
                                port=pl_params.port + 50)

if tbptt_flag:
    train_stream = SegmentSequence(train_stream, seq_length, add_flag=True)
    valid_stream = SegmentSequence(valid_stream, seq_length, add_flag=True)

#x_tr = next(train_stream.get_epoch_iterator())

#################
# Model
#################
Exemplo n.º 12
0
def main():
    feature_maps = [20, 50]
    mlp_hiddens = [50]
    conv_sizes = [5, 5]
    pool_sizes = [3, 3]
    save_to = "DvC.pkl"
    batch_size = 500
    image_size = (32, 32)
    output_size = 2
    learningRate = 0.1
    num_epochs = 10
    num_batches = None
    host_plot = 'http://*****:*****@ %s' %
             ('CNN ', datetime.datetime.now(), socket.gethostname()),
             channels=[['valid_cost', 'valid_error_rate'],
                       ['train_total_gradient_norm']],
             after_epoch=True,
             server_url=host_plot))

    model = Model(cost)

    main_loop = MainLoop(algorithm,
                         stream_data_train,
                         model=model,
                         extensions=extensions)

    main_loop.run()
Exemplo n.º 13
0
def run(model_name, port_train, port_valid):

    running_on_laptop = socket.gethostname() == 'yop'

    X = tensor.tensor4('image_features', dtype='float32')
    T = tensor.matrix('targets', dtype='float32')

    image_border_size = (100, 100)

    if running_on_laptop:
        host_plot = 'http://*****:*****@ %s' %
             (model_name, datetime.datetime.now(), socket.gethostname()),
             channels=[['loss'], ['error', 'valid_error']],
             after_epoch=True,
             server_url=host_plot),
        Printing(),
        Checkpoint('/tmp/train_bn2')
    ]

    main_loop = MainLoop(data_stream=train_stream,
                         algorithm=algorithm,
                         extensions=extensions,
                         model=model)
    main_loop.run()
Exemplo n.º 14
0
def train(port=55557, num_epochs=500, learning_rate=0.01, momentum=0.9,
          l2_penalty_scale=1e-04, batchsize=500,
          save_model_file='./params_file.npz', start_with_saved_params=False):
    print("Loading data...")

    # Prepare Theano variables for inputs and targets
    input_var_x = T.tensor4('inputs')
    input_var_u = T.tensor4('inputs')
    input_var_v = T.tensor4('inputs')
    target_var = T.ivector('targets')

    # Build the model
    network = build_cnn(input_var_x, input_var_u, input_var_v)
    print(network_repr.get_network_str(
        lasagne.layers.get_all_layers(network),
        get_network=False, incomings=True, outgoings=True))
    if start_with_saved_params and os.path.isfile(save_model_file):
        with np.load(save_model_file) as f:
            param_values = [f['arr_%d' % i] for i in range(len(f.files))]
        lasagne.layers.set_all_param_values(network, param_values)

    # Create a loss expression for training.
    prediction = lasagne.layers.get_output(network)
    l2_penalty = lasagne.regularization.regularize_layer_params(
        lasagne.layers.get_all_layers(network),
        lasagne.regularization.l2) * l2_penalty_scale
    loss = categorical_crossentropy(prediction, target_var) + l2_penalty
    loss = loss.mean()

    # Create update expressions for training.
    params = lasagne.layers.get_all_params(network, trainable=True)
    print(
        """
        ////
        Use AdaGrad update schedule for learning rate, see Duchi, Hazan, and
        Singer (2011) "Adaptive subgradient methods for online learning and
        stochasitic optimization." JMLR, 12:2121-2159
        ////
        """)
    updates_adagrad = lasagne.updates.adagrad(
        loss, params, learning_rate=learning_rate, epsilon=1e-06)
    print(
        """
        ////
        Apply Nesterov momentum using Lisa Lab's modifications.
        ////
        """)
    updates = lasagne.updates.apply_nesterov_momentum(
        updates_adagrad, params, momentum=momentum)

    # Create a loss expression for validation/testing. Note we do a
    # deterministic forward pass through the network, disabling dropout.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = categorical_crossentropy(test_prediction, target_var) + \
        l2_penalty
    test_loss = test_loss.mean()
    # Also create an expression for the classification accuracy:
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function([input_var_x, input_var_u, input_var_v,
                                target_var],
                               loss, updates=updates,
                               allow_input_downcast=True)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var_x, input_var_u, input_var_v,
                              target_var],
                             [test_loss, test_acc],
                             allow_input_downcast=True)

    print("Starting training...")
    train_dstream = ServerDataStream(('train',),
                                     port=port,
                                     produces_examples=False)

    #
    # TODO: early stopping logic goes here...
    #

    for epoch in range(num_epochs):

        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()
        for data in train_dstream.get_epoch_iterator():
            _, inputs, targets = data[0], data[1], data[2]
            inputx, inputu, inputv = split_inputs_xuv(inputs)
            train_err += train_fn(inputx, inputu, inputv, targets)
            train_batches += 1

        # And a full pass over the validation data:
        # val_err = 0
        # val_acc = 0
        # val_batches = 0
        # for data in valid_dstream.get_epoch_iterator():
        #     _, inputs, targets = data[0], data[1], data[2]
        #     inputx, inputu, inputv = split_inputs_xuv(inputs)
        #     err, acc = val_fn(inputx, inputu, inputv, targets)
        #     val_err += err
        #     val_acc += acc
        #     val_batches += 1

        # Dump the current network weights to file
        np.savez(save_model_file,
                 *lasagne.layers.get_all_param_values(network))

        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time))
        print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
        # print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
        # print("  validation accuracy:\t\t{:.2f} %".format(
        #     val_acc / val_batches * 100))

    print("Finished {} epochs.".format(epoch + 1))
Exemplo n.º 15
0
import sys
from vgg_16 import get_model, build_model
from theano import tensor, function, config
import lasagne
from fuel.streams import ServerDataStream
import numpy
import pickle
from config import basepath

submit_stream = ServerDataStream(('features', 'image_name'), produces_examples=False)

# tensor
X = tensor.ftensor4('images')

# build simple vgg model
net, layers_names = build_model(X)
f_pretrained      = open(basepath + 'vgg16.pkl')
model_pretrained  = pickle.load(f_pretrained)
w_pretrained      = model_pretrained['param values']
net['mean value'].set_value(model_pretrained['mean value'].astype(config.floatX))

# load weights
from lasagne.layers import set_all_param_values

with numpy.load('weights/simple_vgg_valid.npz') as f:
    param_values = [f['arr_%d' % i] for i in range(len(f.files))]

set_all_param_values(net[layers_names[len(layers_names)-1]], param_values[0])

# create predict function
prediction_test = lasagne.layers.get_output(net[layers_names[len(layers_names)-1]], deterministic=True)
Exemplo n.º 16
0
from resnet_152 import get_model as model_resnet

# build model and load weights
input_var = tensor.tensor4('X')
_, test_prediction, _ = model_resnet(input_var)

# create prediction function
val_fn          = theano.function([input_var], [test_prediction])

# Try for a few data points
n_datapoints = 2

from fuel.streams import ServerDataStream
import numpy as np

train_stream = ServerDataStream(('features', 'labels'),
                                produces_examples=False)

labels_count = np.zeros((17,))
mb_count = 0

iterator      = train_stream.get_epoch_iterator()
data          = iterator.next()
labels_count += data[1].sum(axis=0)
mb_count += 1

feat            = np.asarray(data[0][:n_datapoints], dtype=np.float32)
pred            = val_fn(feat)

print('Prediction for the {0} datapoints is : '.format(n_datapoints))
print(pred)
Exemplo n.º 17
0
 def setUp(self):
     self.server_process = Process(target=start_server,
                                   args=(get_stream(), ))
     self.server_process.start()
     self.stream = ServerDataStream(('f', 't'), False)
Exemplo n.º 18
0
#import sys
#sys.path.append('experiments/simple_vgg/')
from .vgg_16 import get_model
from theano import tensor, function
import lasagne
from fuel.streams import ServerDataStream
import numpy
from utils import f2_score
import pickle

num_epochs = 50

train_stream = ServerDataStream(('features', 'labels'),
                                produces_examples=False)

valid_stream = ServerDataStream(('features', 'labels'),
                                produces_examples=False,
                                port=5558)

X = tensor.ftensor4('images')
y = tensor.imatrix('targets')

prediction_train, prediction_test, params = get_model(X)

loss = lasagne.objectives.binary_crossentropy(prediction_train, y)
loss = loss.mean()

prediction_01 = tensor.ge(prediction_train, numpy.float32(.5))
f2 = f2_score(prediction_01, y)
f2_diff = f2_score(prediction_train, y)
loss = -f2_diff
Exemplo n.º 19
0
## choose model
from model.vgg_structured import build_model

from blocks.algorithms import GradientDescent, Adam
from blocks.graph import ComputationGraph, apply_batch_normalization, get_batch_normalization_updates, apply_dropout
from blocks.model import Model
from blocks.filter import VariableFilter
from blocks.roles import WEIGHT, INPUT

# BUILD MODEL
images = tensor.ftensor4('images')
labels = tensor.ftensor4('labels')
cost_dropout, parameters = build_model(images, labels)

# LEARN WEIGHTS
train_stream = ServerDataStream(('images', 'labels'), False, hwm=10)
valid_stream = ServerDataStream(('images', 'labels'), False, hwm=10, port=5558)
model = Model(cost_dropout)

# ALGORITHM
alpha = 0.01  # learning rate of Adam
algorithm = GradientDescent(cost=cost_dropout,
                            parameters=parameters,
                            step_rule=Adam(),
                            on_unused_sources='ignore')

# EXTENSIONS
from blocks.extensions import Printing, Timing
from blocks.extensions.training import TrackTheBest
from blocks.extensions.monitoring import TrainingDataMonitoring, DataStreamMonitoring
from blocks.extensions.stopping import FinishIfNoImprovementAfter
Exemplo n.º 20
0
def train_snli_model(new_training_job,
                     config,
                     save_path,
                     params,
                     fast_start,
                     fuel_server,
                     seed,
                     model='simple'):
    if config['exclude_top_k'] > config['num_input_words'] and config[
            'num_input_words'] > 0:
        raise Exception("Some words have neither word nor def embedding")
    c = config
    logger = configure_logger(name="snli_baseline_training",
                              log_file=os.path.join(save_path, "log.txt"))
    if not os.path.exists(save_path):
        logger.info("Start a new job")
        os.mkdir(save_path)
    else:
        logger.info("Continue an existing job")
    with open(os.path.join(save_path, "cmd.txt"), "w") as f:
        f.write(" ".join(sys.argv))

    # Make data paths nice
    for path in [
            'dict_path', 'embedding_def_path', 'embedding_path', 'vocab',
            'vocab_def', 'vocab_text'
    ]:
        if c.get(path, ''):
            if not os.path.isabs(c[path]):
                c[path] = os.path.join(fuel.config.data_path[0], c[path])

    main_loop_path = os.path.join(save_path, 'main_loop.tar')
    main_loop_best_val_path = os.path.join(save_path, 'main_loop_best_val.tar')
    stream_path = os.path.join(save_path, 'stream.pkl')

    # Save config to save_path
    json.dump(config, open(os.path.join(save_path, "config.json"), "w"))

    if model == 'simple':
        nli_model, data, used_dict, used_retrieval, _ = _initialize_simple_model_and_data(
            c)
    elif model == 'esim':
        nli_model, data, used_dict, used_retrieval, _ = _initialize_esim_model_and_data(
            c)
    else:
        raise NotImplementedError()

    # Compute cost
    s1, s2 = T.lmatrix('sentence1'), T.lmatrix('sentence2')

    if c['dict_path']:
        assert os.path.exists(c['dict_path'])
        s1_def_map, s2_def_map = T.lmatrix('sentence1_def_map'), T.lmatrix(
            'sentence2_def_map')
        def_mask = T.fmatrix("def_mask")
        defs = T.lmatrix("defs")
    else:
        s1_def_map, s2_def_map = None, None
        def_mask = None
        defs = None

    s1_mask, s2_mask = T.fmatrix('sentence1_mask'), T.fmatrix('sentence2_mask')
    y = T.ivector('label')

    cg = {}
    for train_phase in [True, False]:
        # NOTE: Please don't change outputs of cg
        if train_phase:
            with batch_normalization(nli_model):
                pred = nli_model.apply(s1,
                                       s1_mask,
                                       s2,
                                       s2_mask,
                                       def_mask=def_mask,
                                       defs=defs,
                                       s1_def_map=s1_def_map,
                                       s2_def_map=s2_def_map,
                                       train_phase=train_phase)
        else:
            pred = nli_model.apply(s1,
                                   s1_mask,
                                   s2,
                                   s2_mask,
                                   def_mask=def_mask,
                                   defs=defs,
                                   s1_def_map=s1_def_map,
                                   s2_def_map=s2_def_map,
                                   train_phase=train_phase)

        cost = CategoricalCrossEntropy().apply(y.flatten(), pred)
        error_rate = MisclassificationRate().apply(y.flatten(), pred)
        cg[train_phase] = ComputationGraph([cost, error_rate])

    # Weight decay (TODO: Make it less bug prone)
    if model == 'simple':
        weights_to_decay = VariableFilter(
            bricks=[dense for dense, relu, bn in nli_model._mlp],
            roles=[WEIGHT])(cg[True].variables)
        weight_decay = np.float32(c['l2']) * sum(
            (w**2).sum() for w in weights_to_decay)
    elif model == 'esim':
        weight_decay = 0.0
    else:
        raise NotImplementedError()

    final_cost = cg[True].outputs[0] + weight_decay
    final_cost.name = 'final_cost'

    # Add updates for population parameters

    if c.get("bn", True):
        pop_updates = get_batch_normalization_updates(cg[True])
        extra_updates = [(p, m * 0.1 + p * (1 - 0.1)) for p, m in pop_updates]
    else:
        pop_updates = []
        extra_updates = []

    if params:
        logger.debug("Load parameters from {}".format(params))
        with open(params) as src:
            loaded_params = load_parameters(src)
            cg[True].set_parameter_values(loaded_params)
            for param, m in pop_updates:
                param.set_value(loaded_params[get_brick(
                    param).get_hierarchical_name(param)])

    if os.path.exists(os.path.join(save_path, "main_loop.tar")):
        logger.warning("Manually loading BN stats :(")
        with open(os.path.join(save_path, "main_loop.tar")) as src:
            loaded_params = load_parameters(src)

        for param, m in pop_updates:
            param.set_value(
                loaded_params[get_brick(param).get_hierarchical_name(param)])

    if theano.config.compute_test_value != 'off':
        test_value_data = next(
            data.get_stream('train', batch_size=4).get_epoch_iterator())
        s1.tag.test_value = test_value_data[0]
        s1_mask.tag.test_value = test_value_data[1]
        s2.tag.test_value = test_value_data[2]
        s2_mask.tag.test_value = test_value_data[3]
        y.tag.test_value = test_value_data[4]

    # Freeze embeddings
    if not c['train_emb']:
        frozen_params = [
            p for E in nli_model.get_embeddings_lookups() for p in E.parameters
        ]
        train_params = [p for p in cg[True].parameters]
        assert len(set(frozen_params) & set(train_params)) > 0
    else:
        frozen_params = []
    if not c.get('train_def_emb', 1):
        frozen_params_def = [
            p for E in nli_model.get_def_embeddings_lookups()
            for p in E.parameters
        ]
        train_params = [p for p in cg[True].parameters]
        assert len(set(frozen_params_def) & set(train_params)) > 0
        frozen_params += frozen_params_def
    train_params = [p for p in cg[True].parameters if p not in frozen_params]
    train_params_keys = [
        get_brick(p).get_hierarchical_name(p) for p in train_params
    ]

    # Optimizer
    algorithm = GradientDescent(cost=final_cost,
                                on_unused_sources='ignore',
                                parameters=train_params,
                                step_rule=Adam(learning_rate=c['lr']))
    algorithm.add_updates(extra_updates)
    m = Model(final_cost)

    parameters = m.get_parameter_dict()  # Blocks version mismatch
    logger.info("Trainable parameters" + "\n" +
                pprint.pformat([(key, parameters[key].get_value().shape)
                                for key in sorted(train_params_keys)],
                               width=120))
    logger.info("# of parameters {}".format(
        sum([
            np.prod(parameters[key].get_value().shape)
            for key in sorted(train_params_keys)
        ])))

    ### Monitored args ###
    train_monitored_vars = [final_cost] + cg[True].outputs
    monitored_vars = cg[False].outputs
    val_acc = monitored_vars[1]
    to_monitor_names = [
        'def_unk_ratio', 's1_merged_input_rootmean2', 's1_def_mean_rootmean2',
        's1_gate_rootmean2', 's1_compose_gate_rootmean2'
    ]
    for k in to_monitor_names:
        train_v, valid_v = VariableFilter(name=k)(
            cg[True]), VariableFilter(name=k)(cg[False])
        if len(train_v):
            logger.info("Adding {} tracking".format(k))
            train_monitored_vars.append(train_v[0])
            monitored_vars.append(valid_v[0])
        else:
            logger.warning("Didnt find {} in cg".format(k))

    if c['monitor_parameters']:
        for name in train_params_keys:
            param = parameters[name]
            num_elements = numpy.product(param.get_value().shape)
            norm = param.norm(2) / num_elements
            grad_norm = algorithm.gradients[param].norm(2) / num_elements
            step_norm = algorithm.steps[param].norm(2) / num_elements
            stats = tensor.stack(norm, grad_norm, step_norm,
                                 step_norm / grad_norm)
            stats.name = name + '_stats'
            train_monitored_vars.append(stats)

    regular_training_stream = data.get_stream('train',
                                              batch_size=c['batch_size'],
                                              seed=seed)

    if fuel_server:
        # the port will be configured by the StartFuelServer extension
        training_stream = ServerDataStream(
            sources=regular_training_stream.sources,
            hwm=100,
            produces_examples=regular_training_stream.produces_examples)
    else:
        training_stream = regular_training_stream

    ### Build extensions ###

    extensions = [
        # Load(main_loop_path, load_iteration_state=True, load_log=True)
        #     .set_conditions(before_training=not new_training_job),
        StartFuelServer(regular_training_stream,
                        stream_path,
                        hwm=100,
                        script_path=os.path.join(
                            os.path.dirname(__file__),
                            "../bin/start_fuel_server.py"),
                        before_training=fuel_server),
        Timing(every_n_batches=c['mon_freq']),
        ProgressBar(),
        RetrievalPrintStats(retrieval=used_retrieval,
                            every_n_batches=c['mon_freq_valid'],
                            before_training=not fast_start),
        Timestamp(),
        TrainingDataMonitoring(train_monitored_vars,
                               prefix="train",
                               every_n_batches=c['mon_freq']),
    ]

    if c['layout'] == 'snli':
        validation = DataStreamMonitoring(monitored_vars,
                                          data.get_stream('valid',
                                                          batch_size=14,
                                                          seed=seed),
                                          before_training=not fast_start,
                                          on_resumption=True,
                                          after_training=True,
                                          every_n_batches=c['mon_freq_valid'],
                                          prefix='valid')
        extensions.append(validation)
    elif c['layout'] == 'mnli':
        validation = DataStreamMonitoring(monitored_vars,
                                          data.get_stream('valid_matched',
                                                          batch_size=14,
                                                          seed=seed),
                                          every_n_batches=c['mon_freq_valid'],
                                          on_resumption=True,
                                          after_training=True,
                                          prefix='valid_matched')
        validation_mismatched = DataStreamMonitoring(
            monitored_vars,
            data.get_stream('valid_mismatched', batch_size=14, seed=seed),
            every_n_batches=c['mon_freq_valid'],
            before_training=not fast_start,
            on_resumption=True,
            after_training=True,
            prefix='valid_mismatched')
        extensions.extend([validation, validation_mismatched])
    else:
        raise NotImplementedError()

    # Similarity trackers for embeddings
    if len(c.get('vocab_def', '')):
        retrieval_vocab = Vocabulary(c['vocab_def'])
    else:
        retrieval_vocab = data.vocab

    retrieval_all = Retrieval(vocab_text=retrieval_vocab,
                              dictionary=used_dict,
                              max_def_length=c['max_def_length'],
                              exclude_top_k=0,
                              max_def_per_word=c['max_def_per_word'])

    for name in [
            's1_word_embeddings', 's1_dict_word_embeddings',
            's1_translated_word_embeddings'
    ]:
        variables = VariableFilter(name=name)(cg[False])
        if len(variables):
            s1_emb = variables[0]
            logger.info("Adding similarity tracking for " + name)
            # A bit sloppy about downcast

            if "dict" in name:
                embedder = construct_dict_embedder(theano.function(
                    [s1, defs, def_mask, s1_def_map],
                    s1_emb,
                    allow_input_downcast=True),
                                                   vocab=data.vocab,
                                                   retrieval=retrieval_all)
                extensions.append(
                    SimilarityWordEmbeddingEval(
                        embedder=embedder,
                        prefix=name,
                        every_n_batches=c['mon_freq_valid'],
                        before_training=not fast_start))
            else:
                embedder = construct_embedder(theano.function(
                    [s1], s1_emb, allow_input_downcast=True),
                                              vocab=data.vocab)
                extensions.append(
                    SimilarityWordEmbeddingEval(
                        embedder=embedder,
                        prefix=name,
                        every_n_batches=c['mon_freq_valid'],
                        before_training=not fast_start))

    track_the_best = TrackTheBest(validation.record_name(val_acc),
                                  before_training=not fast_start,
                                  every_n_epochs=c['save_freq_epochs'],
                                  after_training=not fast_start,
                                  every_n_batches=c['mon_freq_valid'],
                                  choose_best=min)
    extensions.append(track_the_best)

    # Special care for serializing embeddings
    if len(c.get('embedding_path', '')) or len(c.get('embedding_def_path',
                                                     '')):
        extensions.insert(
            0,
            LoadNoUnpickling(main_loop_path,
                             load_iteration_state=True,
                             load_log=True).set_conditions(
                                 before_training=not new_training_job))
        extensions.append(
            Checkpoint(main_loop_path,
                       parameters=train_params + [p for p, m in pop_updates],
                       save_main_loop=False,
                       save_separately=['log', 'iteration_state'],
                       before_training=not fast_start,
                       every_n_epochs=c['save_freq_epochs'],
                       after_training=not fast_start).add_condition(
                           ['after_batch', 'after_epoch'],
                           OnLogRecord(track_the_best.notification_name),
                           (main_loop_best_val_path, )))
    else:
        extensions.insert(
            0,
            Load(main_loop_path, load_iteration_state=True,
                 load_log=True).set_conditions(
                     before_training=not new_training_job))
        extensions.append(
            Checkpoint(main_loop_path,
                       parameters=cg[True].parameters +
                       [p for p, m in pop_updates],
                       before_training=not fast_start,
                       every_n_epochs=c['save_freq_epochs'],
                       after_training=not fast_start).add_condition(
                           ['after_batch', 'after_epoch'],
                           OnLogRecord(track_the_best.notification_name),
                           (main_loop_best_val_path, )))

    extensions.extend([
        DumpCSVSummaries(save_path,
                         every_n_batches=c['mon_freq_valid'],
                         after_training=True),
        DumpTensorflowSummaries(save_path,
                                after_epoch=True,
                                every_n_batches=c['mon_freq_valid'],
                                after_training=True),
        Printing(every_n_batches=c['mon_freq_valid']),
        PrintMessage(msg="save_path={}".format(save_path),
                     every_n_batches=c['mon_freq']),
        FinishAfter(after_n_batches=c['n_batches']).add_condition(
            ['after_batch'],
            OnLogStatusExceed('iterations_done', c['n_batches']))
    ])

    logger.info(extensions)

    ### Run training ###

    if "VISDOM_SERVER" in os.environ:
        print("Running visdom server")
        ret = subprocess.Popen([
            os.path.join(os.path.dirname(__file__), "../visdom_plotter.py"),
            "--visdom-server={}".format(os.environ['VISDOM_SERVER']),
            "--folder={}".format(save_path)
        ])
        time.sleep(0.1)
        if ret.returncode is not None:
            raise Exception()
        atexit.register(lambda: os.kill(ret.pid, signal.SIGINT))

    model = Model(cost)
    for p, m in pop_updates:
        model._parameter_dict[get_brick(p).get_hierarchical_name(p)] = p

    main_loop = MainLoop(algorithm,
                         training_stream,
                         model=model,
                         extensions=extensions)

    assert os.path.exists(save_path)
    main_loop.run()
def train_language_model(new_training_job, config, save_path, params,
                         fast_start, fuel_server, seed):
    c = config
    if seed:
        fuel.config.default_seed = seed
        blocks.config.config.default_seed = seed

    data, lm, retrieval = initialize_data_and_model(config)

    # full main loop can be saved...
    main_loop_path = os.path.join(save_path, 'main_loop.tar')
    # or only state (log + params) which can be useful not to pickle embeddings
    state_path = os.path.join(save_path, 'training_state.tar')
    stream_path = os.path.join(save_path, 'stream.pkl')
    best_tar_path = os.path.join(save_path, "best_model.tar")

    words = tensor.ltensor3('words')
    words_mask = tensor.matrix('words_mask')
    if theano.config.compute_test_value != 'off':
        test_value_data = next(
            data.get_stream('train', batch_size=4,
                            max_length=5).get_epoch_iterator())
        words.tag.test_value = test_value_data[0]
        words_mask.tag.test_value = test_value_data[1]

    costs, updates = lm.apply(words, words_mask)
    cost = rename(costs.mean(), 'mean_cost')

    cg = Model(cost)
    if params:
        logger.debug("Load parameters from {}".format(params))
        with open(params) as src:
            cg.set_parameter_values(load_parameters(src))

    length = rename(words.shape[1], 'length')
    perplexity, = VariableFilter(name='perplexity')(cg)
    perplexities = VariableFilter(name_regex='perplexity.*')(cg)
    monitored_vars = [length, cost] + perplexities
    if c['dict_path']:
        num_definitions, = VariableFilter(name='num_definitions')(cg)
        monitored_vars.extend([num_definitions])

    parameters = cg.get_parameter_dict()
    trained_parameters = parameters.values()
    saved_parameters = parameters.values()
    if c['embedding_path']:
        logger.debug("Exclude word embeddings from the trained parameters")
        trained_parameters = [
            p for p in trained_parameters
            if not p == lm.get_def_embeddings_params()
        ]
        saved_parameters = [
            p for p in saved_parameters
            if not p == lm.get_def_embeddings_params()
        ]

    if c['cache_size'] != 0:
        logger.debug("Enable fake recursivity for looking up embeddings")
        trained_parameters = [
            p for p in trained_parameters if not p == lm.get_cache_params()
        ]

    logger.info("Cost parameters" + "\n" + pprint.pformat([
        " ".join(
            (key, str(parameters[key].get_value().shape),
             'trained' if parameters[key] in trained_parameters else 'frozen'))
        for key in sorted(parameters.keys())
    ],
                                                          width=120))

    rules = []
    if c['grad_clip_threshold']:
        rules.append(StepClipping(c['grad_clip_threshold']))
    rules.append(Adam(learning_rate=c['learning_rate'], beta1=c['momentum']))
    algorithm = GradientDescent(cost=cost,
                                parameters=trained_parameters,
                                step_rule=CompositeRule(rules))

    if c['cache_size'] != 0:
        algorithm.add_updates(updates)

    train_monitored_vars = list(monitored_vars)
    if c['grad_clip_threshold']:
        train_monitored_vars.append(algorithm.total_gradient_norm)

    word_emb_RMS, = VariableFilter(name='word_emb_RMS')(cg)
    main_rnn_in_RMS, = VariableFilter(name='main_rnn_in_RMS')(cg)
    train_monitored_vars.extend([word_emb_RMS, main_rnn_in_RMS])

    if c['monitor_parameters']:
        train_monitored_vars.extend(parameter_stats(parameters, algorithm))

    # We use a completely random seed on purpose. With Fuel server
    # it's currently not possible to restore the state of the training
    # stream. That's why it's probably better to just have it stateless.
    stream_seed = numpy.random.randint(0, 10000000) if fuel_server else None
    training_stream = data.get_stream('train',
                                      batch_size=c['batch_size'],
                                      max_length=c['max_length'],
                                      seed=stream_seed)
    valid_stream = data.get_stream('valid',
                                   batch_size=c['batch_size_valid'],
                                   max_length=c['max_length'],
                                   seed=stream_seed)
    original_training_stream = training_stream
    if fuel_server:
        # the port will be configured by the StartFuelServer extension
        training_stream = ServerDataStream(
            sources=training_stream.sources,
            produces_examples=training_stream.produces_examples)

    validation = DataStreamMonitoring(monitored_vars,
                                      valid_stream,
                                      prefix="valid").set_conditions(
                                          before_first_epoch=not fast_start,
                                          on_resumption=True,
                                          every_n_batches=c['mon_freq_valid'])
    track_the_best = TrackTheBest(validation.record_name(perplexity),
                                  choose_best=min).set_conditions(
                                      on_resumption=True,
                                      after_epoch=True,
                                      every_n_batches=c['mon_freq_valid'])

    # don't save them the entire main loop to avoid pickling everything
    if c['fast_checkpoint']:
        load = (LoadNoUnpickling(state_path,
                                 load_iteration_state=True,
                                 load_log=True).set_conditions(
                                     before_training=not new_training_job))
        cp_args = {
            'save_main_loop': False,
            'save_separately': ['log', 'iteration_state'],
            'parameters': saved_parameters
        }

        checkpoint = Checkpoint(state_path,
                                before_training=not fast_start,
                                every_n_batches=c['save_freq_batches'],
                                after_training=not fast_start,
                                **cp_args)

        if c['checkpoint_every_n_batches']:
            intermediate_cp = IntermediateCheckpoint(
                state_path,
                every_n_batches=c['checkpoint_every_n_batches'],
                after_training=False,
                **cp_args)
    else:
        load = (Load(main_loop_path, load_iteration_state=True,
                     load_log=True).set_conditions(
                         before_training=not new_training_job))
        cp_args = {
            'save_separately': ['iteration_state'],
            'parameters': saved_parameters
        }

        checkpoint = Checkpoint(main_loop_path,
                                before_training=not fast_start,
                                every_n_batches=c['save_freq_batches'],
                                after_training=not fast_start,
                                **cp_args)

        if c['checkpoint_every_n_batches']:
            intermediate_cp = IntermediateCheckpoint(
                main_loop_path,
                every_n_batches=c['checkpoint_every_n_batches'],
                after_training=False,
                **cp_args)

    checkpoint = checkpoint.add_condition(
        ['after_batch', 'after_epoch'],
        OnLogRecord(track_the_best.notification_name), (best_tar_path, ))

    extensions = [
        load,
        StartFuelServer(original_training_stream,
                        stream_path,
                        before_training=fuel_server),
        Timing(every_n_batches=c['mon_freq_train'])
    ]

    if retrieval:
        extensions.append(
            RetrievalPrintStats(retrieval=retrieval,
                                every_n_batches=c['mon_freq_train'],
                                before_training=not fast_start))

    extensions.extend([
        TrainingDataMonitoring(train_monitored_vars,
                               prefix="train",
                               every_n_batches=c['mon_freq_train']),
        validation, track_the_best, checkpoint
    ])
    if c['checkpoint_every_n_batches']:
        extensions.append(intermediate_cp)
    extensions.extend([
        DumpTensorflowSummaries(save_path,
                                every_n_batches=c['mon_freq_train'],
                                after_training=True),
        Printing(on_resumption=True, every_n_batches=c['mon_freq_train']),
        FinishIfNoImprovementAfter(track_the_best.notification_name,
                                   iterations=50 * c['mon_freq_valid'],
                                   every_n_batches=c['mon_freq_valid']),
        FinishAfter(after_n_batches=c['n_batches'])
    ])

    logger.info("monitored variables during training:" + "\n" +
                pprint.pformat(train_monitored_vars, width=120))
    logger.info("monitored variables during valid:" + "\n" +
                pprint.pformat(monitored_vars, width=120))

    main_loop = MainLoop(algorithm,
                         training_stream,
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()
Exemplo n.º 22
0
def main():
    x = T.tensor3('features')
    m = T.matrix('features_mask')
    y = T.imatrix('targets')

    embedding_size = 300
    glove_version = "glove.6B.300d.txt"
    #embedding_size = 50
    #glove_version = "vectors.6B.50d.txt"

    o = x.sum(axis=1) + m.mean() * 0

    score_layer = Linear(
            input_dim = 300,
            output_dim = 1,
            weights_init = IsotropicGaussian(std=0.02),
            biases_init = Constant(0.),
            name="linear2")
    score_layer.initialize()
    o = score_layer.apply(o)
    probs = Sigmoid().apply(o)

    cost = - (y * T.log(probs) + (1-y) * T.log(1 - probs)).mean()
    cost.name = 'cost'
    misclassification = (y * (probs < 0.5) + (1-y) * (probs > 0.5)).mean()
    misclassification.name = 'misclassification'

    # =================
    cg = ComputationGraph([cost])
    params = cg.parameters

    algorithm = GradientDescent(
            cost = cg.outputs[0],
            params=params,
            step_rule = CompositeRule([
                StepClipping(threshold=4),
                AdaM(),
                ])

            )

    # ========
    print "setting up data"
    ports = {
            'gpu0_train' : 5557,
            'gpu0_test' : 5558,
            'gpu1_train' : 5559,
            'gpu1_test' : 5560,
            }

    #batch_size = 16
    batch_size = 16
    def start_server(port, which_set):
        fuel.server.logger.setLevel('WARN')
        dataset = IMDBText(which_set, sorted=True)

        n_train = dataset.num_examples
        #scheme = ShuffledScheme(examples=n_train, batch_size=batch_size)
        scheme = BatchwiseShuffledScheme(examples=n_train, batch_size=batch_size)

        stream = DataStream(
                dataset=dataset,
                iteration_scheme=scheme)
        print "loading glove"
        glove = GloveTransformer(glove_version, data_stream=stream)
        padded = Padding(
                data_stream=glove,
                mask_sources=('features',)
                )

        fuel.server.start_server(padded, port=port, hwm=20)

    train_port = ports[theano.config.device + '_train']
    train_p = Process(target=start_server, args=(train_port, 'train'))
    train_p.start()

    test_port = ports[theano.config.device + '_test']
    test_p = Process(target=start_server, args=(test_port, 'test'))
    test_p.start()

    train_stream = ServerDataStream(('features', 'features_mask', 'targets'), port=train_port)
    test_stream = ServerDataStream(('features', 'features_mask', 'targets'), port=test_port)

    print "setting up model"

    n_examples = 25000
    #======
    model = Model(cost)
    extensions = []
    extensions.append(EpochProgress(batch_per_epoch=n_examples // batch_size + 1))
    extensions.append(TrainingDataMonitoring(
        [
            cost,
            misclassification,
            ],
        prefix='train',
        after_epoch=True
        ))

    #extensions.append(DataStreamMonitoring(
        #[cost, misclassification],
        #data_stream=test_stream,
        #prefix='test',
        #after_epoch=True
        #))
    extensions.append(Timing())
    extensions.append(Printing())

    extensions.append(Plot(
        theano.config.device+"_result",
        channels=[['train_cost']],
        after_epoch=True
        ))


    main_loop = MainLoop(
            model=model,
            data_stream=train_stream,
            algorithm=algorithm,
            extensions=extensions)
    main_loop.run()
Exemplo n.º 23
0
    stream = Random2DRotation(stream, which_sources=('image_features',))

    # Data Transformation
    stream = ScaleAndShift(stream, 1./255, 0, which_sources=('image_features',))
    stream = Cast(stream, dtype='float32', which_sources=('image_features',))
    return stream


if mode is "CPU_test":
    data_train_stream = create_data(DogsVsCats(('train',), subset=slice(0, 100)))
    data_valid_stream = create_data(DogsVsCats(('train',), subset=slice(100, 110)))
if mode is  "GPU_run":
    data_train_stream = create_data(DogsVsCats(('train',), subset=slice(0, 22500)))
    data_valid_stream = create_data(DogsVsCats(('train',), subset=slice(22500, 25000)))
if mode is "data_server":
    data_train_stream = ServerDataStream(('image_features','targets'), False, port=5560)
    data_valid_stream = ServerDataStream(('image_features','targets'), False, port=5561)


### Setting up the model
probs = top_mlp.apply(conv_out)

cost = CategoricalCrossEntropy().apply(y.flatten(), probs).copy(name='cost')
error = MisclassificationRate().apply(y.flatten(), probs)
error_rate = error.copy(name='error_rate')
error_rate2 = error.copy(name='error_rate2')
cg = ComputationGraph([cost, error_rate])

### Gradient Descent
algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Scale(learning_rate=learning_rate))
Exemplo n.º 24
0
def run(get_model, model_name):
    train_stream = ServerDataStream(
        ('cases', 'image_features', 'image_targets', 'multiplier'),
        False,
        hwm=10)
    valid_stream = ServerDataStream(
        ('cases', 'image_features', 'image_targets', 'multiplier'),
        False,
        hwm=10,
        port=5558)

    input_var = tensor.tensor4('image_features')
    target_var = tensor.tensor4('image_targets')
    multiply_var = tensor.matrix('multiplier')
    multiply_var = T.addbroadcast(multiply_var, 1)

    test_prediction, prediction, params = get_model(input_var, target_var,
                                                    multiply_var)

    loss = binary_crossentropy(prediction, target_var).mean()

    loss.name = 'loss'

    valid_error = T.neq((test_prediction > 0.5) * 1., target_var).mean()
    valid_error.name = 'error'

    scale = Scale(0.1)
    algorithm = GradientDescent(
        cost=loss,
        parameters=params,
        step_rule=scale,
        #step_rule=Adam(),
        on_unused_sources='ignore')

    host_plot = 'http://localhost:5006'

    extensions = [
        Timing(),
        TrainingDataMonitoring([loss], after_epoch=True),
        DataStreamMonitoring(variables=[loss, valid_error],
                             data_stream=valid_stream,
                             prefix="valid"),
        Plot('%s %s %s' %
             (model_name, datetime.date.today(), time.strftime('%H:%M')),
             channels=[['loss', 'valid_loss'], ['valid_error']],
             after_epoch=True,
             server_url=host_plot),
        Printing(),
        # Checkpoint('train'),
        FinishAfter(after_n_epochs=10)
    ]

    main_loop = MainLoop(data_stream=train_stream,
                         algorithm=algorithm,
                         extensions=extensions)
    cg = ComputationGraph(test_prediction)
    while True:
        main_loop.run()
        scale.learning_rate.set_value(
            numpy.float32(scale.learning_rate.get_value() * 0.7))
        numpy.savez('best_weights.npz',
                    [param.get_value() for param in cg.shared_variables])
Exemplo n.º 25
0
outf = Flattener().apply(out_soft3)
predict3 = NDimensionalSoftmax().apply(outf)
cost3 = CategoricalCrossEntropy().apply(y.flatten(),
                                        predict3).copy(name='cost3')

cost = cost3 + 0.3 * cost2 + 0.3 * cost1
cost = cost.copy(name='cost')
error = MisclassificationRate().apply(y.flatten(), predict3)
#Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason)
error_rate = error.copy(name='error_rate')
error_rate2 = error.copy(name='error_rate2')
cg = ComputationGraph([cost, error_rate])

########### GET THE DATA #####################
stream_train = ServerDataStream(('image_features', 'targets'),
                                False,
                                port=5652,
                                hwm=50)
stream_valid = ServerDataStream(('image_features', 'targets'),
                                False,
                                port=5653,
                                hwm=50)

########### DEFINE THE ALGORITHM #############
track_cost = TrackTheBest("cost", after_epoch=True, after_batch=False)
algorithm = GradientDescent(cost=cost,
                            parameters=cg.parameters,
                            step_rule=Momentum(learning_rate=0.0001,
                                               momentum=0.9))
extensions = [
    Timing(),
    FinishAfter(after_n_epochs=num_epochs),
Exemplo n.º 26
0
def run(get_model, model_name):
    train_stream = ServerDataStream(('cases', 'image_position', 'multiplier',
                                     'sax', 'sax_features', 'targets'),
                                    False,
                                    hwm=10)
    valid_stream = ServerDataStream(('cases', 'image_position', 'multiplier',
                                     'sax', 'sax_features', 'targets'),
                                    False,
                                    hwm=10,
                                    port=5558)

    ftensor5 = tensor.TensorType('float32', (False, ) * 5)

    input_var = ftensor5('sax_features')
    target_var = tensor.matrix('targets')
    multiply_var = tensor.matrix('multiplier')
    multiply_var = T.addbroadcast(multiply_var, 1)

    prediction, test_prediction, test_pred_mid, params_bottom, params_top = get_model(
        input_var, multiply_var)

    # load parameters
    cg = ComputationGraph(test_pred_mid)
    params_val = numpy.load('sunnybrook/best_weights.npz')

    for p, value in zip(cg.shared_variables, params_val['arr_0']):
        p.set_value(value)

    crps = tensor.abs_(test_prediction - target_var).mean()

    loss = squared_error(prediction, target_var).mean()

    loss.name = 'loss'
    crps.name = 'crps'

    algorithm = GradientDescent(cost=loss,
                                parameters=params_top,
                                step_rule=Adam(),
                                on_unused_sources='ignore')

    host_plot = 'http://localhost:5006'

    extensions = [
        Timing(),
        TrainingDataMonitoring([loss], after_epoch=True),
        DataStreamMonitoring(variables=[crps, loss],
                             data_stream=valid_stream,
                             prefix="valid"),
        Plot('%s %s %s' %
             (model_name, datetime.date.today(), time.strftime('%H:%M')),
             channels=[['loss', 'valid_loss'], ['valid_crps']],
             after_epoch=True,
             server_url=host_plot),
        Printing(),
        Checkpoint('train'),
        FinishAfter(after_n_epochs=20)
    ]

    main_loop = MainLoop(data_stream=train_stream,
                         algorithm=algorithm,
                         extensions=extensions)
    main_loop.run()
Exemplo n.º 27
0
    def _push_allocation_config(self):
        self.conv_sequence._push_allocation_config()
        conv_out_dim = self.conv_sequence.get_dim('output')

        self.top_mlp.activations = self.top_mlp_activations
        self.top_mlp.dims = [numpy.prod(conv_out_dim)] + self.top_mlp_dims


#Generating input and target variables
x = tensor.tensor4('image_features')
y = tensor.lmatrix('targets')

#Load Data
stream_train = ServerDataStream(('image_features', 'targets'),
                                False,
                                port=5556)
stream_valid = ServerDataStream(('image_features', 'targets'),
                                False,
                                port=5557)
#stream_test  = ServerDataStream(('image_features','targets'), False, port=5558)

# Init an instance of the convnet
convnet = LeNet(conv_activations,
                num_channels,
                image_shape,
                filter_sizes=filter_sizes,
                feature_maps=feature_maps,
                pooling_sizes=pooling_sizes,
                top_mlp_activations=mlp_activations,
                top_mlp_dims=mlp_hiddens + [output_size],
Exemplo n.º 28
0
def predict(port, l2_penalty_scale, save_model_file='./params_file.npz',
            batchsize=500, load_in_memory=False, be_verbose=False):
    print("Loading data for prediction...")

    # extract timestamp from model file - assume it is the first set of numbers
    # otherwise just use "now"
    import re
    import time
    tstamp = str(time.time()).split('.')[0]
    m = re.search(r"[0-9]+", save_model_file)
    if m:
        tstamp = m.group(0)

    # Prepare Theano variables for inputs and targets
    input_var_x = T.tensor4('inputs')
    input_var_u = T.tensor4('inputs')
    input_var_v = T.tensor4('inputs')
    target_var = T.ivector('targets')

    # Build the model
    network = build_cnn(input_var_x, input_var_u, input_var_v)
    with np.load(save_model_file) as f:
        param_values = [f['arr_%d' % i] for i in range(len(f.files))]
    lasagne.layers.set_all_param_values(network, param_values)

    # Create a loss expression for testing.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    l2_penalty = lasagne.regularization.regularize_layer_params(
        lasagne.layers.get_all_layers(network),
        lasagne.regularization.l2) * l2_penalty_scale
    test_loss = categorical_crossentropy(test_prediction, target_var) + \
                l2_penalty
    test_loss = test_loss.mean()
    # Also create an expression for the classification accuracy:
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)
    # Look at the classifications
    test_prediction_values = T.argmax(test_prediction, axis=1)

    # Compile a function computing the validation loss and accuracy:
    val_fn = theano.function([input_var_x, input_var_u, input_var_v,
                              target_var],
                             [test_loss, test_acc],
                             allow_input_downcast=True)
    # Compute the actual predictions - also instructive is to look at
    # `test_prediction` as an output (array of softmax probabilities)
    # (but that prints a _lot_ of stuff to screen...)
    pred_fn = theano.function([input_var_x, input_var_u, input_var_v],
                              [test_prediction_values],
                              allow_input_downcast=True)

    # don't `produces_examples`, produce batches 
    test_dstream = ServerDataStream(('test',),
                                    port=port,
                                    produces_examples=False)

    # look at some concrete predictions
    targ_numbers = [1, 2, 3, 4, 5]
    pred_target = np.array([0, 0, 0, 0, 0])
    true_target = np.array([0, 0, 0, 0, 0])
    targs_mat = np.zeros(11 * 11).reshape(11, 11)
    for data in test_dstream.get_epoch_iterator():
        _, inputs, targets = data[0], data[1], data[2]
        inputx, inputu, inputv = split_inputs_xuv(inputs)
        pred = pred_fn(inputx, inputu, inputv)
        pred_targ = zip(pred[0], targets)
        if be_verbose:
            print("(prediction, true target):", pred_targ)
            print("----------------")
        for p, t in pred_targ:
            targs_mat[t][p] += 1
            if t in targ_numbers:
                true_target[t-1] += 1
                if p == t:
                    pred_target[p-1] += 1

    acc_target = 100.0 * pred_target / true_target.astype('float32')
    perf_file = 'perfmat' + tstamp + '.npy'
    np.save(perf_file, targs_mat)

    # compute and print the test error:
    test_err = 0
    test_acc = 0
    test_batches = 0
    for data in test_dstream.get_epoch_iterator():
        _, inputs, targets = data[0], data[1], data[2]
        inputx, inputu, inputv = split_inputs_xuv(inputs)
        err, acc = val_fn(inputx, inputu, inputv, targets)
        test_err += err
        test_acc += acc
        test_batches += 1
    print("Final results:")
    print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
    print("  test accuracy:\t\t{:.2f} %".format(
        test_acc / test_batches * 100))
    for i, v in enumerate(acc_target):
        print("   target {} accuracy:\t\t\t{:.3f} %".format(
            (i + 1), acc_target[i]))
Exemplo n.º 29
0
lr = 10**(2 * numpy.random.rand() - 5)

config.recursion_limit = 100000
floatX = theano.config.floatX

#job_id = 5557
job_id = int(sys.argv[1])

save_dir = os.environ['FUEL_DATA_PATH']
save_dir = os.path.join(save_dir, '..', 'results/', 'blizzard/')

experiment_name = 'deep_l0_{}_{}'.format(job_id, lr)

train_stream = ServerDataStream((
    'upsampled',
    'residual',
),
                                produces_examples=False,
                                port=job_id)

valid_stream = ServerDataStream((
    'upsampled',
    'residual',
),
                                produces_examples=False,
                                port=job_id + 50)
#################
# Model
#################

x = tensor.tensor3('upsampled')
y = tensor.tensor3('residual')
Exemplo n.º 30
0
    def _push_allocation_config(self):
        self.conv_sequence._push_allocation_config()
        conv_out_dim = self.conv_sequence.get_dim('output')

        self.top_mlp.activations = self.top_mlp_activations
        self.top_mlp.dims = [numpy.prod(conv_out_dim)] + self.top_mlp_dims


#Generating input and target variables
x = tensor.tensor4('image_features')
y = tensor.lmatrix('targets')

#Load Data
#stream_train = ServerDataStream(('image_features','targets'), False, port=5556)
#stream_valid = ServerDataStream(('image_features','targets'), False, port=5557)
stream_test = ServerDataStream(('image_features', 'targets'), False, port=5558)

# Init an instance of the convnet
convnet = LeNet(conv_activations,
                num_channels,
                image_shape,
                filter_sizes=filter_sizes,
                feature_maps=feature_maps,
                pooling_sizes=pooling_sizes,
                top_mlp_activations=mlp_activations,
                top_mlp_dims=mlp_hiddens + [output_size],
                conv_step=conv_step,
                border_mode=border_mode,
                weights_init=Uniform(width=0.2),
                biases_init=Constant(0))