def load(lstm_opts=None): if not lstm_opts: lstm_opts = {"class": "lstm2"} lstm_opts = lstm_opts.copy() lstm_opts.update({"n_out": 10, "from": "in"}) num_inputs = 9 num_outputs = 2 net_topo = { "in": { "class": "dump", "filename": "in" }, "lstm": lstm_opts, "lstm_dump": { "class": "dump", "from": "lstm", "filename": "lstm" }, "output": { "class": "softmax", "loss": "ce", "from": "lstm_dump" } } collected_data = {} DumpLayer.global_debug_container = collected_data net = Network.LayerNetwork.from_json(json_content=net_topo, n_in=num_inputs, n_out={"classes": (num_outputs, 1)}, train_flag=True) net.declare_train_params() # Init dataset and prepare one minibatch. epoch = 1 dataset = Task12AXDataset(num_seqs=1000, seq_ordering="random", chunking="200:200") dataset.init_seq_order(epoch=epoch) batch_gen = dataset.generate_batches(recurrent_net=net.recurrent, batch_size=5000, max_seqs=10) batches = batch_gen.peek_next_n(1) # We need the DummyDevice for assign_dev_data. dev = DummyDevice() assign_success, _ = EngineUtil.assign_dev_data(device=dev, dataset=dataset, batches=batches) assert assign_success dev.initialize(net) dev.update_data() givens = [(net.y[k], dev.y[k]) for k in dev.used_data_keys] givens += [(net.j[k], dev.j[k]) for k in dev.used_data_keys] # Now gradients, updates and compile everything. gradients = { p: T.grad(net.get_objective(), p, known_grads=net.known_grads) for p in net.train_params_vars } updater = Updater(adam=True) updater.initVars(net, gradients) updater.setLearningRate(learning_rate=0.01) trainer = theano.function(inputs=[], outputs=[net.total_cost], givens=givens, updates=updater.getUpdateList(), on_unused_input='warn', name="train_and_updater") for p in net.train_params_vars: collected_data["param:%s" % p.name] = p.get_value() # And finally, run it. cost = trainer() collected_data["cost"] = cost return collected_data
def load(lstm_opts=None): if not lstm_opts: lstm_opts = {"class": "lstm2"} lstm_opts = lstm_opts.copy() lstm_opts.update({"n_out": 10, "from": "in"}) num_inputs = 9 num_outputs = 2 net_topo = { "in": {"class": "dump", "filename": "in"}, "lstm": lstm_opts, "lstm_dump": {"class": "dump", "from": "lstm", "filename": "lstm"}, "output": {"class": "softmax", "loss": "ce", "from": "lstm_dump"} } collected_data = {} DumpLayer.global_debug_container = collected_data net = Network.LayerNetwork.from_json( json_content=net_topo, n_in=num_inputs, n_out={"classes": (num_outputs, 1)}, train_flag=True ) net.declare_train_params() # Init dataset and prepare one minibatch. epoch = 1 dataset = Task12AXDataset(num_seqs=1000, seq_ordering="random", chunking="200:200") dataset.init_seq_order(epoch=epoch) batch_gen = dataset.generate_batches( recurrent_net=net.recurrent, batch_size=5000, max_seqs=10) batches = batch_gen.peek_next_n(1) # We need the DummyDevice for assign_dev_data. dev = DummyDevice() assign_success, _ = EngineUtil.assign_dev_data(device=dev, dataset=dataset, batches=batches) assert assign_success dev.initialize(net) dev.update_data() givens = [(net.y[k], dev.y[k]) for k in dev.used_data_keys] givens += [(net.j[k], dev.j[k]) for k in dev.used_data_keys] # Now gradients, updates and compile everything. gradients = {p: T.grad(net.get_objective(), p, known_grads=net.known_grads) for p in net.train_params_vars} updater = Updater(adam=True) updater.initVars(net, gradients) updater.setLearningRate(learning_rate=0.01) trainer = theano.function( inputs=[], outputs=[net.total_cost], givens=givens, updates=updater.getUpdateList(), on_unused_input='warn', name="train_and_updater") for p in net.train_params_vars: collected_data["param:%s" % p.name] = p.get_value() # And finally, run it. cost = trainer() collected_data["cost"] = cost return collected_data