Пример #1
0
def test_dump_and_add_to_dump():
    x = 3
    y = 2
    with NamedTemporaryFile(delete=False) as f:
        dump_and_add_to_dump(x, f, None, {'y': y})
    assert load(open(f.name, 'rb')) == x
    assert load(open(f.name, 'rb'), 'y') == y
Пример #2
0
def test_dump_and_add_to_dump():
    x = 3
    y = 2
    with NamedTemporaryFile(delete=False) as f:
        dump_and_add_to_dump(x, f, None, {'y': y})
    assert load(open(f.name, 'rb')) == x
    assert load(open(f.name, 'rb'), 'y') == y
Пример #3
0
 def load_to(self, main_loop):
     with open(self.path, "rb") as source:
         main_loop.model.set_parameter_values(load_parameters(source))
         if self.load_iteration_state:
             main_loop.iteration_state = load(source,
                                              name='iteration_state')
         if self.load_log:
             main_loop.log = load(source, name='log')
Пример #4
0
def test_pickle_log():
    log1 = TrainingLog()
    dump(log1, "log1.pkl")
    log2 = load("log1.pkl")
    dump(log2, "log2.pkl")
    load("log2.pkl")  # loading an unresumed log works
    log2.resume()
    dump(log2, "log3.pkl")
    load("log3.pkl")  # loading a resumed log does not work
Пример #5
0
def test_protocol0_regression():
    """Check for a regression where protocol 0 dumps fail on load."""
    brick = Linear(5, 10)
    brick.allocate()
    buf = BytesIO()
    dump(brick, buf, parameters=list(brick.parameters), protocol=0)
    try:
        load(buf)
    except TypeError:
        assert False  # Regression
Пример #6
0
 def __init__(self, filename=None, model=None):
     if model is not None:
         self.model = model
     else:
         try:
             self.model = Model(load(filename).algorithm.cost)
         except AttributeError:
             # newer version of blocks
             with open(filename, 'rb') as src:
                 self.model = Model(load(src).algorithm.cost)
Пример #7
0
 def __init__(self, filename=None, model=None):
     if model is not None:
         self.model = model
     else:
         try:
             self.model = Model(load(filename).algorithm.cost)
         except AttributeError:
             # newer version of blocks
             with open(filename, "rb") as src:
                 self.model = Model(load(src).algorithm.cost)
Пример #8
0
def test_add_to_dump():

    # Create a simple MLP to dump.
    mlp = MLP(activations=[None, None],
              dims=[10, 10, 10],
              weights_init=Constant(1.),
              use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[1].W
    W.set_value(W.get_value() * 2)
    mlp2 = MLP(activations=[None, None],
               dims=[10, 10, 10],
               weights_init=Constant(1.),
               use_bias=False,
               name='mlp2')
    mlp2.initialize()

    # Ensure that adding to dump is working.
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb+') as ff:
        add_to_dump(mlp.children[0],
                    ff,
                    'child_0',
                    parameters=[mlp.children[0].W])
        add_to_dump(mlp.children[1], ff, 'child_1')
    with tarfile.open(f.name, 'r') as tarball:
        assert set(tarball.getnames()) == set(
            ['_pkl', '_parameters', 'child_0', 'child_1'])

    # Ensure that we can load any object from the tarball.
    with open(f.name, 'rb') as ff:
        saved_children_0 = load(ff, 'child_0')
        saved_children_1 = load(ff, 'child_1')
        assert_allclose(saved_children_0.W.get_value(), numpy.ones((10, 10)))
        assert_allclose(saved_children_1.W.get_value(),
                        numpy.ones((10, 10)) * 2)

    # Check the error if using a reserved name.
    with open(f.name, 'rb+') as ff:
        assert_raises(ValueError, add_to_dump, *[mlp.children[0], ff, '_pkl'])

    # Check the error if saving an object with other parameters
    with open(f.name, 'rb+') as ff:
        assert_raises(
            ValueError, add_to_dump, *[mlp2, ff, 'mlp2'],
            **dict(parameters=[mlp2.children[0].W, mlp2.children[1].W]))

    # Check the warning if adding to a dump with no parameters
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    with open(f.name, 'rb+') as ff:
        assert_raises(
            ValueError, add_to_dump, *[mlp2, ff, 'mlp2'],
            **dict(parameters=[mlp2.children[0].W, mlp2.children[1].W]))
Пример #9
0
def test_add_to_dump():

    # Create a simple MLP to dump.
    mlp = MLP(activations=[None, None], dims=[10, 10, 10],
              weights_init=Constant(1.), use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[1].W
    W.set_value(W.get_value() * 2)
    mlp2 = MLP(activations=[None, None], dims=[10, 10, 10],
               weights_init=Constant(1.), use_bias=False,
               name='mlp2')
    mlp2.initialize()

    # Ensure that adding to dump is working.
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb+') as ff:
        add_to_dump(mlp.children[0], ff, 'child_0',
                    parameters=[mlp.children[0].W])
        add_to_dump(mlp.children[1], ff, 'child_1')
    with tarfile.open(f.name, 'r') as tarball:
        assert set(tarball.getnames()) == set(['_pkl', '_parameters',
                                               'child_0', 'child_1'])

    # Ensure that we can load any object from the tarball.
    with open(f.name, 'rb') as ff:
        saved_children_0 = load(ff, 'child_0')
        saved_children_1 = load(ff, 'child_1')
        assert_allclose(saved_children_0.W.get_value(),
                        numpy.ones((10, 10)))
        assert_allclose(saved_children_1.W.get_value(),
                        numpy.ones((10, 10)) * 2)
    
    # Check the error if using a reserved name.
    with open(f.name, 'rb+') as ff:
        assert_raises(ValueError, add_to_dump, *[mlp.children[0], ff, '_pkl'])

    # Check the error if saving an object with other parameters
    with open(f.name, 'rb+') as ff:
        assert_raises(ValueError, add_to_dump, *[mlp2, ff, 'mlp2'],
                      **dict(parameters=[mlp2.children[0].W,
                                         mlp2.children[1].W]))

    # Check the warning if adding to a dump with no parameters
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    with open(f.name, 'rb+') as ff:
        assert_raises(ValueError, add_to_dump, *[mlp2, ff, 'mlp2'],
                      **dict(parameters=[mlp2.children[0].W,
                                         mlp2.children[1].W]))
Пример #10
0
 def __init__(self, filename=None, model=None):
     if model is not None:
         self.model, = model.top_bricks
     else:
         with open(filename, 'rb') as src:
             main_loop = load(src)
             self.model, = main_loop.model.top_bricks
Пример #11
0
 def __init__(self, filename=None, model=None):
     if model is not None:
         self.model, = model.top_bricks
     else:
         with open(filename, 'rb') as src:
             main_loop = load(src)
             self.model, = main_loop.model.top_bricks
Пример #12
0
    def __init__(self, lr, track_var, states={}, path=None, **kwargs):
        self.lr = lr
        self.patience = 15  #3
        self.counter = 0
        self.best_value = numpy.inf
        self.track_var = track_var
        # self.iteration_state = None
        self.log = None
        self.parameter_values = None
        self.algorithm_buffers = None
        self.tolerance = 1e-13
        self.states = states
        self.epsilon = -1e-5

        if path is not None:
            loaded_main_loop = load(path)
            #Hardcoded
            ext = loaded_main_loop.extensions[-1]
            self.lr.set_value(2. * ext.lr.get_value())
            self.log = ext.log
            self.parameter_values = ext.parameter_values
            self.best_value = ext.best_value
            self.counter = self.patience

        super(LearningRateSchedule, self).__init__(**kwargs)
Пример #13
0
Файл: gen.py Проект: grappli/pm1
def gen():
    encoder = HDF5CharEncoder('warpeace_input.txt', 200)

    classes = encoder.encoder.classes_

    seq_len = 100

    with open('trainingdata.tar', 'rb') as f:
        model = load(f).model

    generator = model.top_bricks[0]

    #model.get_parameter_dict()["/sequencegenerator/with_fake_attention/simplerecurrent.initial_state"].set_value()

    f = ComputationGraph(generator.generate(n_steps=1, batch_size=1, iterate=True)).get_theano_function()

    output = ''

    for n in range(seq_len):
        (next_state, next_char, cost) = f()

        output += str(classes[next_char[0][0]])

        #model.get_parameter_dict()["/sequencegenerator/with_fake_attention/simplerecurrent.initial_state"].set_value(next_state[0][0])

    print output
Пример #14
0
    def __init__(self, lr, track_var, states = {}, path = None, **kwargs):
        self.lr = lr
        self.patience = 15 #3
        self.counter = 0
        self.best_value = numpy.inf
        self.track_var = track_var
        # self.iteration_state = None
        self.log = None
        self.parameter_values = None
        self.algorithm_buffers = None
        self.tolerance = 1e-13
        self.states = states
        self.epsilon = -1e-5

        if path is not None:
            loaded_main_loop = load(path)
            #Hardcoded
            ext = loaded_main_loop.extensions[-1]
            self.lr.set_value(2.*ext.lr.get_value())
            self.log = ext.log
            self.parameter_values = ext.parameter_values
            self.best_value = ext.best_value
            self.counter = self.patience

        super(LearningRateSchedule, self).__init__(**kwargs)
Пример #15
0
def load_log(tar_file):
    print("Opening .tar file")
    with open(tar_file, 'rb') as src:
        main_loop_loaded = load(src)
    log = main_loop_loaded.log
    print("Finished opening .tar file")
    return log
Пример #16
0
def rank_with_paired_nn(dicta, queries, model_path, model_opt_path):
    """
    Rank clean dictionary chunks for each noisy query chunk using the paired 
    (clean,noisy) input neural network
    """
    with open(model_path, 'rb') as fmodel:
        ml = load(fmodel)
    model_func = ml.model.get_theano_function()
    npz = np.load(model_opt_path)

    queries = queries.astype(np.float32)
    dicta = dicta.astype(np.float32)

    T = queries.shape[0]
    D = queries.shape[0]
    sim = np.zeros((D, T))
    for t in xrange(T):
        sys.write('.')
        if t % 70 == 0 and t:
            sys.write('\n')
        qrep = repmat(queries[t, :], D, 1)
        test_x = np.hstack([dicta, qrep])
        test_x = mean_var_normalize_test(test_x, npz['train_mean'], npz['train_std'])
        sim[:, t] = model_func(test_x)

    best_chunks = np.argmax(sim, axis=0)
    return best_chunks
Пример #17
0
def create_running_graphs(classifier):
    try:
        classifier_model = Model(load(classifier).algorithm.cost)
    except AttributeError:
        # newer version of blocks
        with open(classifier, 'rb') as src:
            classifier_model = Model(load(src).algorithm.cost)

    selector = Selector(classifier_model.top_bricks)
    convnet, = selector.select('/convnet').bricks
    mlp, = selector.select('/mlp').bricks

    x = tensor.tensor4('features')
    y_hat = mlp.apply(convnet.apply(x).flatten(ndim=2))
    cg = ComputationGraph([y_hat])
    return cg
Пример #18
0
def gen():
    encoder = HDF5CharEncoder('warpeace_input.txt', 200)

    classes = encoder.encoder.classes_

    seq_len = 100

    with open('trainingdata.tar', 'rb') as f:
        model = load(f).model

    generator = model.top_bricks[0]

    #model.get_parameter_dict()["/sequencegenerator/with_fake_attention/simplerecurrent.initial_state"].set_value()

    f = ComputationGraph(
        generator.generate(n_steps=1, batch_size=1,
                           iterate=True)).get_theano_function()

    output = ''

    for n in range(seq_len):
        (next_state, next_char, cost) = f()

        output += str(classes[next_char[0][0]])

        #model.get_parameter_dict()["/sequencegenerator/with_fake_attention/simplerecurrent.initial_state"].set_value(next_state[0][0])

    print output
Пример #19
0
def create_running_graphs(classifier):
    try:
        classifier_model = Model(load(classifier).algorithm.cost)
    except AttributeError:
        # newer version of blocks
        with open(classifier, 'rb') as src:
            classifier_model = Model(load(src).algorithm.cost)

    selector = Selector(classifier_model.top_bricks)
    convnet, = selector.select('/convnet').bricks
    mlp, = selector.select('/mlp').bricks

    x = tensor.tensor4('features')
    y_hat = mlp.apply(convnet.apply(x).flatten(ndim=2))
    cg = ComputationGraph([y_hat])
    return cg
def maxout_vae_mnist_test(path_vae_mnist):

    # load vae model on mnist
    vae_mnist = load(path_vae_mnist)
    maxout = Maxout()
    x = T.matrix('features')
    y = T.imatrix('targets')
    batch_size = 128
    z, _ = vae_mnist.sampler.sample(vae_mnist.encoder_mlp.apply(x))
    predict = maxout.apply(z)

    cost = Softmax().categorical_cross_entropy(y.flatten(), predict)
    y_hat = Softmax().apply(predict)
    cost.name = 'cost'
    cg = ComputationGraph(cost)

    temp = cg.parameters
    for t, i in zip(temp, range(len(temp))):
        t.name = t.name+str(i)+"maxout"

    error_brick = MisclassificationRate()
    error_rate = error_brick.apply(y, y_hat) 

    # training
    step_rule = RMSProp(0.01, 0.9)
    #step_rule = Momentum(0.2, 0.9)
    train_set = MNIST('train')
    test_set = MNIST("test")

    data_stream_train = Flatten(DataStream.default_stream(
            train_set, iteration_scheme=SequentialScheme(train_set.num_examples, batch_size)))

    data_stream_test =Flatten(DataStream.default_stream(
            test_set, iteration_scheme=SequentialScheme(test_set.num_examples, batch_size)))

    algorithm = GradientDescent(cost=cost, params=cg.parameters,
                                step_rule=step_rule)

    monitor_train = TrainingDataMonitoring(
        variables=[cost], data_stream=data_stream_train, prefix="train")
    monitor_valid = DataStreamMonitoring(
        variables=[cost, error_rate], data_stream=data_stream_test, prefix="test")


    extensions = [  monitor_train,
                    monitor_valid,
                    FinishAfter(after_n_epochs=50),
                    Printing(every_n_epochs=1)
                  ]

    main_loop = MainLoop(data_stream=data_stream_train,
                        algorithm=algorithm, model = Model(cost),
                        extensions=extensions)
    main_loop.run()

    # save here
    from blocks.serialization import dump
    with closing(open('../data_mnist/maxout', 'w')) as f:
	    dump(maxout, f)
Пример #21
0
def test_mnist_lenet():
    with tempfile.NamedTemporaryFile() as f:
        main(f.name, 1)
        with open(f.name, "rb") as source:
            main_loop = load(source)
    main_loop.find_extension("FinishAfter").set_conditions(after_n_epochs=2)
    main_loop.run()
    assert main_loop.log.status['epochs_done'] == 2
Пример #22
0
def test_secure_dump():
    foo = object()
    bar = lambda: None  # flake8: noqa
    with NamedTemporaryFile(delete=False, dir=config.temp_dir) as f:
        secure_dump(foo, f.name)
    assert_raises(PicklingError, secure_dump, bar, f.name)
    with open(f.name, 'rb') as f:
        assert type(load(f)) is object
Пример #23
0
def test_secure_dump():
    foo = object()
    bar = lambda: None  # flake8: noqa
    with NamedTemporaryFile(delete=False) as f:
        secure_dump(foo, f.name)
    assert_raises(PicklingError, secure_dump, bar, f.name)
    with open(f.name, 'rb') as f:
        assert type(load(f)) is object
Пример #24
0
 def load_to(self, main_loop):
     main_loop.model.set_parameter_values(load_parameter_values(self.path))
     if self.load_iteration_state or self.load_log:
         with open(self.path, "rb") as source:
             loaded_main_loop = load(source)
         if self.load_log:
             main_loop.log = loaded_main_loop.log
         if self.load_iteration_state:
             main_loop.iteration_state = loaded_main_loop.iteration_state
Пример #25
0
def main(save_to, num_epochs, resume=False, **kwargs):
    if resume:
        with open(save_to, 'rb') as source:
            main_loop = load(source)
    else:
        main_loop = create_main_loop(save_to, num_epochs, **kwargs)

    if main_loop.status['epochs_done'] < num_epochs:
        main_loop.run()
Пример #26
0
def test_mnist():
    with tempfile.NamedTemporaryFile() as f:
        main(f.name, 1)
        with open(f.name, "rb") as source:
            main_loop = load(source)
        main_loop.find_extension("FinishAfter").set_conditions(
            after_n_epochs=2)
        main_loop.run()
        assert main_loop.log.status['epochs_done'] == 2
Пример #27
0
 def load_to(self, main_loop):
     main_loop.model.set_parameter_values(load_parameter_values(self.path))
     if self.load_iteration_state or self.load_log:
         with open(self.path, "rb") as source:
             loaded_main_loop = load(source)
         if self.load_log:
             main_loop.log = loaded_main_loop.log
         if self.load_iteration_state:
             main_loop.iteration_state = loaded_main_loop.iteration_state
Пример #28
0
def main(save_to, num_epochs, resume=False, **kwargs):
    if resume:
        with open(save_to, 'rb') as source:
            main_loop = load(source)
    else:
        main_loop = create_main_loop(save_to, num_epochs, **kwargs)

    if main_loop.status['epochs_done'] < num_epochs:
        main_loop.run()
def dump_hiddens(args, main_loop):
    # load parameters of trained model
    trained_main_loop = load(args.dump_hiddens)
    transfer_parameters(trained_main_loop, main_loop)
    del trained_main_loop

    for extension in main_loop.extensions:
        if isinstance(extension, DumpVariables):
            extension.do("after_training")
Пример #30
0
def test_pickle_log():
    log1 = TrainingLog()
    with open('log1.tar', 'wb') as f:
        dump(log1, f)
    with open('log1.tar', 'rb') as f:
        log2 = load(f)
    with open('log2.tar', 'wb') as f:
        dump(log2, f)
    with open('log2.tar', 'rb') as f:
        load(f)  # loading an unresumed log works
    log2.resume()
    with open('log3.tar', 'wb') as f:
        dump(log2, f)
    with open('log3.tar', 'rb') as f:
        load(f)  # loading a resumed log does not work
    os.remove('log1.tar')
    os.remove('log2.tar')
    os.remove('log3.tar')
Пример #31
0
def dump_hiddens(args, main_loop):
    # load parameters of trained model
    trained_main_loop = load(args.dump_hiddens)
    transfer_parameters(trained_main_loop, main_loop)
    del trained_main_loop

    for extension in main_loop.extensions:
        if isinstance(extension, DumpVariables):
            extension.do("after_training")
Пример #32
0
def test_serialization():
    # Create a simple brick with two parameters
    mlp = MLP(activations=[None, None],
              dims=[10, 10, 10],
              weights_init=Constant(1.),
              use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[1].W
    W.set_value(W.get_value() * 2)

    # Check the data using numpy.load
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    numpy_data = numpy.load(f.name)
    assert set(numpy_data.keys()) == \
        set(['mlp-linear_0.W', 'mlp-linear_1.W', 'pkl'])
    assert_allclose(numpy_data['mlp-linear_0.W'], numpy.ones((10, 10)))
    assert numpy_data['mlp-linear_0.W'].dtype == theano.config.floatX

    # Ensure that it can be unpickled
    mlp = load(f.name)
    assert_allclose(mlp.linear_transformations[1].W.get_value(),
                    numpy.ones((10, 10)) * 2)

    # Ensure that only parameters are saved as NPY files
    mlp.random_data = numpy.random.rand(10)
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    numpy_data = numpy.load(f.name)
    assert set(numpy_data.keys()) == \
        set(['mlp-linear_0.W', 'mlp-linear_1.W', 'pkl'])

    # Ensure that parameters can be loaded with correct names
    parameter_values = load_parameter_values(f.name)
    assert set(parameter_values.keys()) == \
        set(['/mlp/linear_0.W', '/mlp/linear_1.W'])

    # Ensure that duplicate names are dealt with
    for child in mlp.children:
        child.name = 'linear'
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    numpy_data = numpy.load(f.name)
    assert set(numpy_data.keys()) == \
        set(['mlp-linear.W', 'mlp-linear.W_2', 'pkl'])

    # Ensure warnings are raised when __main__ namespace objects are dumped
    foo.__module__ = '__main__'
    import __main__
    __main__.__dict__['foo'] = foo
    mlp.foo = foo
    with NamedTemporaryFile(delete=False) as f:
        with warnings.catch_warnings(record=True) as w:
            dump(mlp, f)
            assert len(w) == 1
            assert '__main__' in str(w[-1].message)
Пример #33
0
def load_named_instance(name, path):
    print(name, path)
    npz_path = path.replace(".pkl", ".npz")
    try:
        hiddens = np.load(npz_path)
    except:
        with open(path, "rb") as file:
            hiddens = load(file)
        np.savez_compressed(npz_path, **hiddens)
    return dict(name=name, path=path, hiddens=hiddens)
Пример #34
0
def test_mnist_lenet():
    with tempfile.NamedTemporaryFile() as f:
        main(f.name, 1, num_batches=3)
        with open(f.name, "rb") as source:
            main_loop = load(source)
    main_loop.find_extension("FinishAfter").set_conditions(
        after_n_batches=6)
    main_loop.run()
    assert main_loop.log.status['epochs_done'] == 0
    assert main_loop.log.status['iterations_done'] == 6
Пример #35
0
def test_serialization():
    # Create a simple brick with two parameters
    mlp = MLP(activations=[None, None], dims=[10, 10, 10],
              weights_init=Constant(1.), use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[1].W
    W.set_value(W.get_value() * 2)

    # Check the data using numpy.load
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    numpy_data = numpy.load(f.name)
    assert set(numpy_data.keys()) == \
        set(['mlp-linear_0.W', 'mlp-linear_1.W', 'pkl'])
    assert_allclose(numpy_data['mlp-linear_0.W'], numpy.ones((10, 10)))
    assert numpy_data['mlp-linear_0.W'].dtype == theano.config.floatX

    # Ensure that it can be unpickled
    mlp = load(f.name)
    assert_allclose(mlp.linear_transformations[1].W.get_value(),
                    numpy.ones((10, 10)) * 2)

    # Ensure that only parameters are saved as NPY files
    mlp.random_data = numpy.random.rand(10)
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    numpy_data = numpy.load(f.name)
    assert set(numpy_data.keys()) == \
        set(['mlp-linear_0.W', 'mlp-linear_1.W', 'pkl'])

    # Ensure that parameters can be loaded with correct names
    parameter_values = load_parameter_values(f.name)
    assert set(parameter_values.keys()) == \
        set(['/mlp/linear_0.W', '/mlp/linear_1.W'])

    # Ensure that duplicate names are dealt with
    for child in mlp.children:
        child.name = 'linear'
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    numpy_data = numpy.load(f.name)
    assert set(numpy_data.keys()) == \
        set(['mlp-linear.W', 'mlp-linear.W_2', 'pkl'])

    # Ensure warnings are raised when __main__ namespace objects are dumped
    foo.__module__ = '__main__'
    import __main__
    __main__.__dict__['foo'] = foo
    mlp.foo = foo
    with NamedTemporaryFile(delete=False) as f:
        with warnings.catch_warnings(record=True) as w:
            dump(mlp, f)
            assert len(w) == 1
            assert '__main__' in str(w[-1].message)
Пример #36
0
def test_serialization():

    # Create a simple MLP to dump.
    mlp = MLP(activations=[None, None],
              dims=[10, 10, 10],
              weights_init=Constant(1.),
              use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[1].W
    W.set_value(W.get_value() * 2)

    # Ensure warnings are raised when __main__ namespace objects are dumped.
    foo.__module__ = '__main__'
    import __main__
    __main__.__dict__['foo'] = foo
    mlp.foo = foo
    with NamedTemporaryFile(delete=False) as f:
        with warnings.catch_warnings(record=True) as w:
            dump(mlp.foo, f)
            assert len(w) == 1
            assert '__main__' in str(w[-1].message)

    # Check the parameters.
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb') as ff:
        numpy_data = load_parameters(ff)
    assert set(numpy_data.keys()) == \
        set(['/mlp/linear_0.W', '/mlp/linear_1.W'])
    assert_allclose(numpy_data['/mlp/linear_0.W'], numpy.ones((10, 10)))
    assert numpy_data['/mlp/linear_0.W'].dtype == theano.config.floatX

    # Ensure that it can be unpickled.
    with open(f.name, 'rb') as ff:
        mlp = load(ff)
    assert_allclose(mlp.linear_transformations[1].W.get_value(),
                    numpy.ones((10, 10)) * 2)

    # Ensure that duplicate names are dealt with.
    for child in mlp.children:
        child.name = 'linear'
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb') as ff:
        numpy_data = load_parameters(ff)
    assert set(numpy_data.keys()) == \
        set(['/mlp/linear.W', '/mlp/linear.W_2'])

    # Check when we don't dump the main object.
    with NamedTemporaryFile(delete=False) as f:
        dump(None, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with tarfile.open(f.name, 'r') as tarball:
        assert set(tarball.getnames()) == set(['_parameters'])
Пример #37
0
    def load_to(self, main_loop):
        aux_param_step = load_parameter_values(self.path)
        #ugly hacking, but Im super lazy, and this only needs to work for a few days with a limited set of models
        # I changed stuff in set_paramter_values, remember, it is ugly

        main_loop.model.set_parameter_values(aux_param_step)
        if self.load_iteration_state or self.load_log:
            with open(self.path, "rb") as source:
                loaded_main_loop = load(source)
            if self.load_log:
                main_loop.log = loaded_main_loop.log
            if self.load_iteration_state:
                main_loop.iteration_state = loaded_main_loop.iteration_state
Пример #38
0
    def testing(self, fea2obj):
        config = self._config
        dsdir = config['dsdir']
        devfile = dsdir + '/dev.txt'
        testfile = dsdir + '/test.txt'
        networkfile = config['net']
        batch_size = 10000#int(config['batchsize'])
        devMentions = load_ent_ds(devfile)
        tstMentions = load_ent_ds(testfile)
        logger.info('#dev: %d #test: %d', len(devMentions), len(tstMentions))

        main_loop = load(networkfile + '.best.pkl')
        logger.info('Model loaded. Building prediction function...')
        old_model = main_loop.model
        logger.info(old_model.inputs)
        sources = [inp.name for inp in old_model.inputs]
#         fea2obj = build_input_objs(sources, config)
        t2idx = fea2obj['targets'].t2idx
        deterministic = str_to_bool(config['use_mean_pred']) if 'use_mean_pred' in config else True
        kl_weight = shared_floatx(0.001, 'kl_weight')
        entropy_weight= shared_floatx(0.001, 'entropy_weight')


        cost, _, y_hat, _, _,_,_ = build_model_new(fea2obj, len(t2idx), self._config, kl_weight, entropy_weight, deterministic=deterministic, test=True)
        model = Model(cost)
        model.set_parameter_values(old_model.get_parameter_values())

        theinputs = []
        for fe in fea2obj.keys():
            if 'targets' in fe:
                continue
            for inp in model.inputs:
                if inp.name == fe:
                    theinputs.append(inp)

#         theinputs = [inp for inp in model.inputs if inp.name != 'targets']
        print "theinputs: ", theinputs
        predict = theano.function(theinputs, y_hat)

        test_stream, num_samples_test = get_comb_stream(fea2obj, 'test', batch_size, shuffle=False)
        dev_stream, num_samples_dev = get_comb_stream(fea2obj, 'dev', batch_size, shuffle=False)
        logger.info('sources: %s -- number of test/dev samples: %d/%d', test_stream.sources, num_samples_test, num_samples_dev)
        idx2type = {idx:t for t,idx in t2idx.iteritems()}

        logger.info('Starting to apply on dev inputs...')
        self.applypredict(theinputs, predict, dev_stream, devMentions, num_samples_dev, batch_size, os.path.join(config['exp_dir'], config['matrixdev']), idx2type)
        logger.info('...apply on dev data finished')

        logger.info('Starting to apply on test inputs...')
        self.applypredict(theinputs, predict, test_stream, tstMentions, num_samples_test, batch_size, os.path.join(config['exp_dir'], config['matrixtest']), idx2type)
        logger.info('...apply on test data finished')
Пример #39
0
def test_serialization():

    # Create a simple MLP to dump.
    mlp = MLP(activations=[None, None], dims=[10, 10, 10],
              weights_init=Constant(1.), use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[1].W
    W.set_value(W.get_value() * 2)

    # Ensure warnings are raised when __main__ namespace objects are dumped.
    foo.__module__ = '__main__'
    import __main__
    __main__.__dict__['foo'] = foo
    mlp.foo = foo
    with NamedTemporaryFile(delete=False) as f:
        with warnings.catch_warnings(record=True) as w:
            dump(mlp.foo, f)
            assert len(w) == 1
            assert '__main__' in str(w[-1].message)

    # Check the parameters.
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb') as ff:
        numpy_data = load_parameters(ff)
    assert set(numpy_data.keys()) == \
        set(['/mlp/linear_0.W', '/mlp/linear_1.W'])
    assert_allclose(numpy_data['/mlp/linear_0.W'], numpy.ones((10, 10)))
    assert numpy_data['/mlp/linear_0.W'].dtype == theano.config.floatX

    # Ensure that it can be unpickled.
    with open(f.name, 'rb') as ff:
        mlp = load(ff)
    assert_allclose(mlp.linear_transformations[1].W.get_value(),
                    numpy.ones((10, 10)) * 2)

    # Ensure that duplicate names are dealt with.
    for child in mlp.children:
        child.name = 'linear'
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb') as ff:
        numpy_data = load_parameters(ff)
    assert set(numpy_data.keys()) == \
        set(['/mlp/linear.W', '/mlp/linear.W_2'])

    # Check when we don't dump the main object.
    with NamedTemporaryFile(delete=False) as f:
        dump(None, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with tarfile.open(f.name, 'r') as tarball:
        assert set(tarball.getnames()) == set(['_parameters'])
Пример #40
0
    def testing(self, fea2obj):
        config = self._config
        dsdir = config['dsdir']
        devfile = dsdir + '/dev.txt'
        testfile = dsdir + '/test.txt'
        networkfile = config['net']
        batch_size = 10000#int(config['batchsize'])
        devMentions = load_ent_ds(devfile)
        tstMentions = load_ent_ds(testfile)
        logger.info('#dev: %d #test: %d', len(devMentions), len(tstMentions))
        
        main_loop = load(networkfile + '.best.pkl')
        logger.info('Model loaded. Building prediction function...')
        old_model = main_loop.model
        logger.info(old_model.inputs)
        sources = [inp.name for inp in old_model.inputs]
#         fea2obj = build_input_objs(sources, config)
        t2idx = fea2obj['targets'].t2idx
        deterministic = str_to_bool(config['use_mean_pred']) if 'use_mean_pred' in config else True 
        kl_weight = shared_floatx(0.001, 'kl_weight')
        entropy_weight= shared_floatx(0.001, 'entropy_weight')
       
       
        cost, _, y_hat, _, _,_,_ = build_model_new(fea2obj, len(t2idx), self._config, kl_weight, entropy_weight, deterministic=deterministic, test=True)
        model = Model(cost)
        model.set_parameter_values(old_model.get_parameter_values())
        
        theinputs = []
        for fe in fea2obj.keys():
            if 'targets' in fe:
                continue
            for inp in model.inputs:
                if inp.name == fe:
                    theinputs.append(inp)
                    
#         theinputs = [inp for inp in model.inputs if inp.name != 'targets']
        print "theinputs: ", theinputs
        predict = theano.function(theinputs, y_hat)
        
        test_stream, num_samples_test = get_comb_stream(fea2obj, 'test', batch_size, shuffle=False)
        dev_stream, num_samples_dev = get_comb_stream(fea2obj, 'dev', batch_size, shuffle=False)
        logger.info('sources: %s -- number of test/dev samples: %d/%d', test_stream.sources, num_samples_test, num_samples_dev)
        idx2type = {idx:t for t,idx in t2idx.iteritems()}
        
        logger.info('Starting to apply on dev inputs...')
        self.applypredict(theinputs, predict, dev_stream, devMentions, num_samples_dev, batch_size, os.path.join(config['exp_dir'], config['matrixdev']), idx2type)
        logger.info('...apply on dev data finished')
        
        logger.info('Starting to apply on test inputs...')
        self.applypredict(theinputs, predict, test_stream, tstMentions, num_samples_test, batch_size, os.path.join(config['exp_dir'], config['matrixtest']), idx2type)
        logger.info('...apply on test data finished')
Пример #41
0
def load_model(path):
    log = load(open(path), name='log')
    df = pandas.DataFrame.from_dict(log, orient='index')
    name = path[:-4] if path[-3:] == 'tar' else path
    logs[name] = log
    dfs[name] = df
    print path
    print log.status['iterations_done'], ' iterations done'
    if 'best_valid_per' in log.status:
        print 'best_valid_per', log.status['best_valid_per']
    if 'best_valid_train_cost' in log.status:
        print 'best_valid_train_cost', log.status['best_valid_train_cost']
    if 'best_valid_mean_total_reward' in log.status:
        print 'best_valid_mean_total_reward', log.status['best_valid_mean_total_reward']
    if 'mean_total_reward' in dfs[name]:
        print 'mean_total_reward:', dfs[name].mean_total_reward[-10:].mean()
Пример #42
0
def get(name, path, channel_name):
    print(name, path)
    npz_path = path.replace(".pkl", "_%s.npz" % channel_name)
    try:
        data = np.load(npz_path)
        t = np.array(data["t"])
        v = np.array(data["v"])
    except:
        with open(path, "rb") as file:
            log = load(file)
        tv = np.array([[t, v[channel_name]] for t, v in log.items()
                       if v and channel_name in v])
        log = None
        t = tv[:, 0]
        v = tv[:, 1]
        np.savez_compressed(npz_path, t=t, v=v)
    return dict(name=name, path=path, t=t, v=v)
Пример #43
0
def gen(num_sample=1000, num_lines=20):
    c = Corpus(open('lyrics_out.txt').read())

    with open('trainingdata.tar', 'rb') as f:
        model = load(f).model

    generator = model.top_bricks[0]

    sample = ComputationGraph(
        generator.generate(n_steps=num_sample, batch_size=1,
                           iterate=True)).get_theano_function()

    output_char_int = sample()[6][:, 0]

    output = "\n".join(
        ("".join(c.decode(output_char_int))).splitlines()[0:num_lines])
    return output
Пример #44
0
def gen(num_sample=1000,num_lines=20):
    c = Corpus(open('lyrics_out.txt').read())

    with open('trainingdata.tar', 'rb') as f:
        model = load(f).model

    generator = model.top_bricks[0]

    sample = ComputationGraph(generator.generate(
        n_steps=num_sample,
        batch_size=1,
        iterate=True
    )).get_theano_function()

    output_char_int = sample()[6][:,0]

    output = "\n".join(("".join(c.decode(output_char_int))).splitlines()[0:num_lines])
    return output
def build_classifier_grad(classifier_fn='mlp.zip', label=2):
    """
    Loads a classifier, and builds functions p(y_label|x) and
        d p(y_label|x)/dx where x is the image

    ----------
    Parameters
    ----------
    classifier_fn : string
         Filename to load the brick containing the classifier
    label : int
         Integer determining which class 

    FIXME: probably the case that you need to load in the relevant bricks
            modules to open the classifier file
    """
    with open(classifier_fn, 'r') as f:
        classifier_brick = load(f)

    x = T.tensor4('features')
    y_hat = classifier_brick.apply(x)
    
    # Note y_hat vectorized giving an output shaped (batches, labels), 
    pk_grad = theano.gradient.jacobian(tensor.log(y_hat[:, label]), x)
    # should make this more efficient using scan.. does dy[i]/dx[j]

    pk_grad_func1 = theano.function(inputs=[x],
                                   outputs=pk_grad) 

    def pk_grad_func(x):
        """
        Takes diagonal of first two terms of derivative
        """
        res = pk_grad_func1(x)
        n_s = res.shape[0]
        di = numpy.diag_indices(n_s)
        return res[di]
    
    pk_prob_func = theano.function(inputs=[x],
                                   outputs=y_hat[:, label])

    return pk_prob_func, pk_grad_func
Пример #46
0
def test_save_the_best():
    skip_if_configuration_set('log_backend', 'sqlite',
                              "Known to be flaky with SQLite log backend.")
    with NamedTemporaryFile(dir=config.temp_dir) as dst,\
            NamedTemporaryFile(dir=config.temp_dir) as dst_best:
        track_cost = TrackTheBest("cost", after_epoch=False, after_batch=True)
        main_loop = MockMainLoop(extensions=[
            FinishAfter(after_n_epochs=1),
            WriteCostExtension(), track_cost,
            Checkpoint(dst.name, after_batch=True, save_separately=['log']).
            add_condition(["after_batch"],
                          OnLogRecord(track_cost.notification_name), (
                              dst_best.name, ))
        ])
        main_loop.run()

        assert main_loop.log[4]['saved_to'] == (dst.name, dst_best.name)
        assert main_loop.log[5]['saved_to'] == (dst.name, dst_best.name)
        assert main_loop.log[6]['saved_to'] == (dst.name, )
        with open(dst_best.name, 'rb') as src:
            assert load(src).log.status['iterations_done'] == 5
Пример #47
0
def load_log(fname):
    """Load a :class:`TrainingLog` object from disk.

    This function automatically handles various file formats that contain
    an instance of an :class:`TrainingLog`. This includes a pickled
    Log object, a pickled :class:`MainLoop` or an experiment dump (TODO).

    """
    with change_recursion_limit(config.recursion_limit):
        with open(fname, 'rb') as f:
            from_disk = load(f)
        # TODO: Load "dumped" experiments

    if isinstance(from_disk, TrainingLog):
        log = from_disk
    elif isinstance(from_disk, MainLoop):
        log = from_disk.log
        del from_disk
    else:
        raise ValueError("Could not load '{}': Unrecognized content.")

    return log
Пример #48
0
def test_save_the_best():
    skip_if_configuration_set('log_backend', 'sqlite',
                              "Known to be flaky with SQLite log backend.")
    with NamedTemporaryFile(dir=config.temp_dir) as dst,\
            NamedTemporaryFile(dir=config.temp_dir) as dst_best:
        track_cost = TrackTheBest("cost", after_epoch=False, after_batch=True)
        main_loop = MockMainLoop(
            extensions=[FinishAfter(after_n_epochs=1),
                        WriteCostExtension(),
                        track_cost,
                        Checkpoint(dst.name, after_batch=True,
                                   save_separately=['log'])
                        .add_condition(
                            ["after_batch"],
                            OnLogRecord(track_cost.notification_name),
                            (dst_best.name,))])
        main_loop.run()

        assert main_loop.log[4]['saved_to'] == (dst.name, dst_best.name)
        assert main_loop.log[5]['saved_to'] == (dst.name, dst_best.name)
        assert main_loop.log[6]['saved_to'] == (dst.name,)
        with open(dst_best.name, 'rb') as src:
            assert load(src).log.status['iterations_done'] == 5
Пример #49
0
from blocks_extras.extensions.plot import Plot

from utils import get_stream, track_best, MainLoop, get_seed, make_wav, rescale
from model import nn_fprop
from config import config
# Load config parameters
locals().update(config)

# Set up model and prediction function
x = tensor.tensor3('inputs', dtype='float64')
y = tensor.tensor3('targets', dtype='float64')

model = 'bs'
with open ('gru_best.pkl', 'r') as picklefile:
    model = load(picklefile)
y_hat, cost, cells = nn_fprop(x, y, frame_length, hidden_size, num_layers, model)
predict_fn = theano.function([x], y_hat)

# Generate
print "generating audio..."
seed = get_seed(hdf5_file, [seed_index])
sec = 16000
samples_to_generate = sec*secs_to_generate
num_frames_to_generate = samples_to_generate/frame_length + seq_length #don't include seed
predictions = []
prev_input = seed
for i in range(num_frames_to_generate):
    prediction = predict_fn(prev_input)
    predictions.append(prediction)
    pred_min = numpy.min(predictions)
    parser.add_argument("--initial-beta", type=float, default=0)
    parser.add_argument("--cluster", action="store_true")
    parser.add_argument("--activation", choices=list(activations.keys()), default="tanh")
    parser.add_argument("--init", type=str, default="ortho")
    parser.add_argument("--continue-from")
    parser.add_argument("--permuted", action="store_true")
    args = parser.parse_args()

    #assert not (args.noise and args.summarize)
    np.random.seed(args.seed)
    blocks.config.config.default_seed = args.seed


    if args.continue_from:
        from blocks.serialization import load
        main_loop = load(args.continue_from)
        main_loop.run()
        sys.exit(0)

    graphs, extensions, updates = construct_graphs(args, nclasses, sequence_length)

    ### optimization algorithm definition
    step_rule = CompositeRule([
        StepClipping(1.),
        #Momentum(learning_rate=args.learning_rate, momentum=0.9),
        RMSProp(learning_rate=args.learning_rate, decay_rate=0.5),
    ])

    algorithm = GradientDescent(cost=graphs["training"].outputs[0],
                                parameters=graphs["training"].parameters,
                                step_rule=step_rule)
Пример #51
0
extra_updates = []
for name, var in states.items():
    update = tensor.switch(
        start_flag, 0.0 * var, VariableFilter(theano_name_regex=regex_final_value(name))(cg.auxiliary_variables)[0]
    )
    extra_updates.append((var, update))


# Old values for n

load_name = "sp_and_f0_1"

from blocks.serialization import load

main_loop = load(save_dir + "pkl/best_" + load_name + ".pkl")

new_params = []
for key, value in model.get_parameter_dict().items():
    if key in [
        name.replace("with_fake_attention", "att_trans") for name in main_loop.model.get_parameter_values().keys()
    ]:
        value.set_value(main_loop.model.get_parameter_values()[key.replace("att_trans", "with_fake_attention")])
    else:
        new_params.append(value)

del main_loop

#################
# Monitoring vars
#################
Пример #52
0
 def load_iteration_state(self):
     with open(self.path_to_iteration_state, "rb") as source:
         return load(source)
Пример #53
0
from blocks.serialization import load
from blocks.graph import ComputationGraph

data_dir = os.environ['FUEL_DATA_PATH']
data_dir = os.path.join(data_dir, 'blizzard/', 'sp_standardize.npz')

data_stats = numpy.load(data_dir)
sp_mean = data_stats['sp_mean']
sp_std = data_stats['sp_std']

save_dir = os.environ['RESULTS_DIR']
save_dir = os.path.join(save_dir,'blizzard/')

experiment_name = "sp_only_0"

main_loop = load(save_dir+"pkl/best_"+experiment_name+".pkl")

generator = main_loop.model.get_top_bricks()[0]

steps = 2048
n_samples = 1

sample = ComputationGraph(generator.generate(n_steps=steps, 
    batch_size=n_samples, iterate=True))
sample_fn = sample.get_theano_function()

outputs = sample_fn()[-2]

outputs = outputs*sp_std + sp_mean
outputs = outputs.swapaxes(0,1)
outputs = outputs[0]
Пример #54
0
import sys
from fuel.datasets.hdf5 import H5PYDataset
from fuel.schemes import SequentialScheme, ShuffledScheme
from fuel.streams import DataStream
sys.path.append('./functions/')
from custom_transformers import Normalize, Cast
from fuel.converters.base import progress_bar

# load model
images = tensor.ftensor4('images')
images_test = tensor.ftensor4('images_test')
labels = tensor.imatrix('labels')

#parameters = load_parameters(open("./train", "rb"))
#model = Model(cost)
main_loop = load(open("./train", "rb"))
model = main_loop.model

#model.set_parameter_values(parameters)
sample = theano.function([images], model.get_top_bricks()[0].apply(images))

# load data
submit_set = H5PYDataset('../data/data_1.hdf5', which_sets=('submit', ))
submit_stream = DataStream.default_stream(submit_set,
                                          iteration_scheme=SequentialScheme(
                                              submit_set.num_examples, 50))
submit_stream = Normalize(submit_stream)
submit_stream = Cast(submit_stream, 'floatX')
submit_it = submit_stream.get_epoch_iterator()

output = open('submission.csv', 'w')
Пример #55
0
#######################
### Load model
#######################
import theano.tensor as tensor
from blocks.serialization import load
from blocks.model import Model
from blocks.filter import VariableFilter
from blocks.bricks import Softmax
from blocks.bricks.cost import MisclassificationRate
from blocks.extensions.monitoring import DataStreamMonitoring

main_loop = load(open('./exp/softmax_2_bn_noise.pkl', 'rb'))

train_monitor = None
test_monitor = None
for ex in main_loop.extensions:
    if isinstance(ex, DataStreamMonitoring) and ex.prefix == 'train':
        train_monitor = ex
    if isinstance(ex, DataStreamMonitoring) and ex.prefix == 'test':
        test_monitor = ex
        
model = Model(test_monitor._evaluator.theano_variables[0])

loss = model.outputs[0]
y, x = model.inputs
out = VariableFilter(theano_name='mlp_inference_output')(model.variables)[0]
out2 = VariableFilter(theano_name='linear_apply_output')(model.variables)[0]
pred = Softmax().apply(out2)
misclass = MisclassificationRate().apply(tensor.flatten(y, outdim=1), out)

##############
Пример #56
0
    hyperparameters_path = getattr(
        args, "hyperparameters",
        os.path.join(os.path.dirname(__file__), "defaults.yaml"))

    with open(hyperparameters_path, "rb") as f:
        hyperparameters = yaml.load(f)

    hyperparameters["n_spatial_dims"] = len(hyperparameters["patch_shape"])
    hyperparameters["hyperparameters"] = hyperparameters
    hyperparameters["name"] += "_" + hyperparameters["task_name"]
    hyperparameters[
        "checkpoint_save_path"] = hyperparameters["name"] + "_checkpoint.zip"

    checkpoint_path = None
    if args.autoresume and os.path.exists(
            hyperparameters["checkpoint_save_path"]):
        checkpoint_path = hyperparameters["checkpoint_save_path"]
    elif args.checkpoint:
        checkpoint_path = args.checkpoint
    if checkpoint_path:
        from blocks.serialization import load
        main_loop = load(checkpoint_path)
    else:
        main_loop = construct_main_loop(**hyperparameters)

    if not (args.autoresume and main_loop.log.current_row.get(
            "training_finish_requested", False)):
        print "training..."
        main_loop.run()