def test_hdf_data_target_int32(): from GeneratingDataset import StaticDataset dataset = StaticDataset([ {"data": numpy.array([1, 2, 3], dtype="uint8"), "classes": numpy.array([2147483647, 2147483646, 2147483645], dtype="int32")}], output_dim={"data": (255, 1), "classes": (10, 1)}) dataset.initialize() dataset.init_seq_order(epoch=0) dataset.load_seqs(0, 1) orig_classes_dtype = dataset.get_data_dtype("classes") orig_classes_seq = dataset.get_data(0, "classes") assert orig_classes_seq.shape == (3,) and orig_classes_seq[0] == 2147483647 assert orig_classes_seq.dtype == orig_classes_dtype == "int32" hdf_fn = _get_tmp_file(suffix=".hdf") hdf_writer = HDFDatasetWriter(filename=hdf_fn) hdf_writer.dump_from_dataset(dataset, use_progress_bar=False) hdf_writer.close() hdf_dataset = HDFDataset(files=[hdf_fn]) hdf_dataset.initialize() hdf_dataset.init_seq_order(epoch=1) hdf_classes_dtype = hdf_dataset.get_data_dtype("classes") assert hdf_classes_dtype == orig_classes_dtype hdf_classes_shape = hdf_dataset.get_data_shape("classes") assert hdf_classes_shape == [] hdf_dataset.load_seqs(0, 1) hdf_data_classes = hdf_dataset.get_data(0, "classes") assert hdf_data_classes.dtype == orig_classes_dtype assert all(hdf_data_classes == orig_classes_seq)
def _classify(params): ret = { } output_dim = {} hash = hashlib.new('ripemd160') hash.update(json.dumps(params)) hash = hash.hexdigest() for k in params: try: params[k] = numpy.asarray(params[k], dtype='float32') if k != 'data': output_dim[k] = network.n_out[k] # = [network.n_in,2] if k == 'data' else network.n_out[k] except Exception: if k != 'data' and not k in network.n_out: ret['error'] = 'unknown target: %s' % k else: ret['error'] = 'unable to convert %s to an array from value %s' % (k,str(params[k])) break if not 'error' in ret: data = StaticDataset(data=[params], output_dim=output_dim) data.init_seq_order() try: data = StaticDataset(data=[params], output_dim=output_dim) data.init_seq_order() except Exception: ret['error'] = "invalid data: %s" % params else: batches = data.generate_batches(recurrent_net=network.recurrent, batch_size=sys.maxint, max_seqs=1) if not hash in classifiers: classifiers[hash] = ClassificationTaskThread(network, devices, data, batches) classifiers[hash].json_params = params print >> log.v3, "classifier started:", hash ret['result'] = { 'hash' : hash } return ret
def classify_in_background(self): while True: requests = [] # fetch first request r = yield self.classification_queue.get() requests.append(r) # grab all other waiting requests try: while True: requests.append(self.classification_queue.get_nowait()) except QueueEmpty: pass output_dim = {} # Do dataset creation and classification. dataset = StaticDataset(data=[r.data for r in requests], output_dim=output_dim) dataset.init_seq_order() batches = dataset.generate_batches(recurrent_net=self.engine.network.recurrent, batch_size=self.batch_size, max_seqs=self.max_seqs) with (yield self.lock.acquire()): ctt = ForwardTaskThread(self.engine.network, self.devices, dataset, batches) yield ctt.join() try: for i in range(dataset.num_seqs): requests[i].future.set_result(ctt.result[i]) self.classification_queue.task_done() except Exception as e: print('exception', e) raise
def classify_in_background(self): while True: requests = [] # fetch first request r = yield self.classification_queue.get() requests.append(r) # grab all other waiting requests try: while True: requests.append(self.classification_queue.get_nowait()) except QueueEmpty: pass output_dim = {} # Do dataset creation and classification. dataset = StaticDataset(data=[r.data for r in requests], output_dim=output_dim) dataset.init_seq_order() batches = dataset.generate_batches( recurrent_net=self.engine.network.recurrent, batch_size=self.batch_size, max_seqs=self.max_seqs) with (yield self.lock.acquire()): ctt = ForwardTaskThread(self.engine.network, self.devices, dataset, batches) yield ctt.join() try: for i in range(dataset.num_seqs): requests[i].future.set_result(ctt.result[i]) self.classification_queue.task_done() except Exception as e: print('exception', e) raise
def _classify(params): ret = { } output_dim = {} hash = hashlib.new('ripemd160') hash.update(json.dumps(params)) hash = hash.hexdigest() for k in params: try: params[k] = numpy.asarray(params[k], dtype='float32') if k != 'data': output_dim[k] = network.n_out[k] # = [network.n_in,2] if k == 'data' else network.n_out[k] except Exception: if k != 'data' and not k in network.n_out: ret['error'] = 'unknown target: %s' % k else: ret['error'] = 'unable to convert %s to an array from value %s' % (k,str(params[k])) break if not 'error' in ret: data = StaticDataset(data=[params], output_dim=output_dim) data.init_seq_order() try: data = StaticDataset(data=[params], output_dim=output_dim) data.init_seq_order() except Exception: ret['error'] = "invalid data: %s" % params else: batches = data.generate_batches(recurrent_net=network.recurrent, batch_size=sys.maxsize, max_seqs=1) if not hash in workers: workers[hash] = ClassificationTaskThread(network, devices, data, batches) workers[hash].json_params = params print("worker started:", hash, file=log.v3) ret['result'] = { 'hash' : hash } return ret
def post(self, *args, **kwargs): #TODO: Make this batch over a specific time period params = json.loads(self.request.body) output_dim = {} ret = {} #first get meta data engine_hash = params['engine_hash'] print('Received engine hash: ', engine_hash, file=log.v4) #delete unneccessary stuff so that the rest works del params['engine_hash'] #load in engine and hash engine = _engines[engine_hash] network = engine.network devices = _devices[engine_hash] hash_engine = hashlib.new('ripemd160') hash_engine.update(json.dumps(params) + engine_hash) hash_temp = hash_engine.hexdigest() #process the data for k in params: try: params[k] = numpy.asarray(params[k], dtype='float32') if k != 'data': output_dim[k] = network.n_out[k] # = [network.n_in,2] if k == 'data' else network.n_out[k] except Exception: if k != 'data' and not k in network.n_out: ret['error'] = 'unknown target: %s' % k else: ret['error'] = 'unable to convert %s to an array from value %s' % (k, str(params[k])) break if not 'error' in ret: try: data = StaticDataset(data=[params], output_dim=output_dim) data.init_seq_order() except Exception: ret['error'] = 'Dataset server error' self.write(ret) pass else: batches = data.generate_batches(recurrent_net=network.recurrent, batch_size=sys.maxsize, max_seqs=1) if not hash_temp in _classify_cache: print('Starting classification', file=log.v3) #if we haven't yet processed this exact request, and saved it in the cache _classify_cache[hash_temp] = yield self.classification_task(network=network, devices=devices, data=data, batches=batches) ret = {'result': {k: _classify_cache[hash_temp].result[k].tolist() for k in _classify_cache[hash_temp].result}} print("Finished processing classification with ID: ", hash_temp, file=log.v4) self.write(ret)
def test_multi_target_init(): config = Config() config.update({ "multiprocessing": False, "blocking": True, "device": "cpu", "num_epochs": 1, "num_inputs": 3, "num_outputs": { "t1": 4, "t2": 5 }, "learning_rate": 1.0, }) config.network_topology_json = """ { "fw0": {"class": "hidden", "activation": "identity", "n_out": 3}, "out1": {"class": "softmax", "loss": "ce", "target": "t1", "from": ["fw0"]}, "out2": {"class": "softmax", "loss": "ce", "target": "t2", "from": ["fw0"]} } """ device = Device("cpu", config=config, blocking=True) assert_true(device.trainnet, "train network initialized") assert_true(device.testnet, "test network initialized") param_vars = device.trainnet.get_all_params_vars() print "params:", param_vars assert_equal(len(param_vars), 6, "W, b vars for each out, and fw") num_params = get_num_params(param_vars) assert_equal(num_params, (3 * 3 + 3) + (3 * 4 + 4) + (3 * 5 + 5), "W, b for each out, and fw") assert_in("fw0", device.testnet.hidden) assert_in("out1", device.testnet.output) assert_in("out2", device.testnet.output) assert_is(device.testnet.j["t1"], device.testnet.output["out1"].index) assert_true(device.updater) update_list = device.updater.getUpdateList() print "update list:" pprint(update_list) update_dict = dict(update_list) assert_equal(len(update_dict), len(update_list), "all params in update list only once") assert_in("fw0", device.trainnet.hidden) assert_equal(len(device.trainnet.hidden), 1) assert_in("W_in_data_fw0", device.trainnet.hidden["fw0"].params) assert_in("b_fw0", device.trainnet.hidden["fw0"].params) assert_equal(len(device.trainnet.hidden["fw0"].params), 2) assert_in("out1", device.trainnet.output) assert_equal(len(device.trainnet.output), 2) assert_in("W_in_fw0_out1", device.trainnet.output["out1"].params) assert_in("b_out1", device.trainnet.output["out1"].params) assert_equal(len(device.trainnet.output["out1"].params), 2) assert_in(device.trainnet.hidden["fw0"].params["W_in_data_fw0"], update_dict) assert_in(device.trainnet.hidden["fw0"].params["b_fw0"], update_dict) assert_in(device.trainnet.output["out1"].params["W_in_fw0_out1"], update_dict) assert_in(device.trainnet.output["out1"].params["b_out1"], update_dict) assert_in(device.trainnet.output["out2"].params["W_in_fw0_out2"], update_dict) assert_in(device.trainnet.output["out2"].params["b_out2"], update_dict) assert_equal(len(update_dict), 6) # Set net params. net_params = { "fw0": { "W_in_data_fw0": numpy.identity(3, dtype="float32"), "b_fw0": numpy.zeros((3, ), dtype="float32") }, "out1": { "W_in_fw0_out1": numpy.arange(0.0, 1.2, 0.1, dtype="float32").reshape((3, 4)), "b_out1": numpy.arange(0.0, 4, dtype="float32") }, "out2": { "W_in_fw0_out2": numpy.arange(0.0, 1.5, 0.1, dtype="float32").reshape((3, 5)), "b_out2": numpy.arange(0.0, 5, dtype="float32") } } device.trainnet.set_params_by_dict(net_params) device.testnet.set_params_by_dict(net_params) # Show params. for p in param_vars: print "init %s:" % p pprint(p.get_value()) # Init dataset. dataset = StaticDataset(data=[{ "data": numpy.array([[0.1, 0.2, -0.3]], dtype="float32"), "t1": numpy.array([2]), "t2": numpy.array([4]) }], output_dim=config.typed_value("num_outputs")) dataset.init_seq_order() assert_equal(dataset.is_data_sparse("data"), False) assert_equal(dataset.is_data_sparse("t1"), True) assert_equal(dataset.is_data_sparse("t2"), True) # Copy to device allocation. success = assign_dev_data_single_seq(device, dataset, 0) assert_true(success, "failed to allocate & assign data") # Check allocated data. assert_equal(device.targets["data"].shape, (1, 1, 3)) # input shape. (time,batch,dim) assert_in("t1", device.targets) assert_in("t2", device.targets) assert_equal(device.targets["t1"].shape, (1, 1)) assert_equal(device.targets["t2"].shape, (1, 1)) assert_equal(device.output_index["data"].shape, (1, 1)) numpy.testing.assert_equal(device.output_index["data"], numpy.array([[1]])) assert_equal(device.output_index["t1"].shape, (1, 1)) numpy.testing.assert_equal(device.output_index["t1"], numpy.array([[1]])) # Forward test. device.update_data() device.testnet.costs["out1"].name = "out1_cost" # nice in the func graph out_i1 = device.testnet.output["out1"].index out_i1_nonzero = device.testnet.output["out1"].i nll1, pcx1 = T.nnet.crossentropy_softmax_1hot( x=device.testnet.output["out1"].y_m[out_i1_nonzero], y_idx=device.testnet.output["out1"].y_data_flat[out_i1_nonzero]) forward_func = theano.function( inputs=[device.block_start, device.block_end], outputs=[ device.testnet.j["t1"], out_i1, out_i1_nonzero[0], nll1, pcx1, device.testnet.costs["out1"], device.testnet.output["out1"].p_y_given_x, device.testnet.costs["out2"], device.testnet.output["out2"].p_y_given_x ], givens=device.make_givens(device.testnet), no_default_updates=True, on_unused_input='warn', name="forward") #print "forward func:" #theano.printing.debugprint(forward_func) net_j1, out_i1_val, out_i1_nz_val, nll1_val, pcx1_val, t1_cost, t1_y, t2_cost, t2_y = forward_func( 0, 1) print "forward results:" pprint(net_j1) pprint(out_i1_val) pprint(out_i1_nz_val) pprint(nll1_val) pprint(pcx1_val) pprint(t1_cost) pprint(t1_y) pprint(t2_cost) pprint(t2_y) assert_equal(net_j1, numpy.array([[1]])) assert_equal(out_i1_val, numpy.array([[1]])) assert_equal(out_i1_nz_val, numpy.array([0])) assert_almost_equal(nll1_val, numpy.array([t1_cost])) numpy.testing.assert_almost_equal(t1_y, pcx1_val) assert_almost_equal(t1_cost, 1.440189698561195, places=6) assert_almost_equal(t2_cost, 0.45191439593759336, places=6) numpy.testing.assert_almost_equal( t1_y, numpy.array([[0.0320586, 0.08714432, 0.23688282, 0.64391426]]), decimal=6) numpy.testing.assert_almost_equal(t2_y, numpy.array([[ 0.01165623, 0.03168492, 0.08612854, 0.23412166, 0.63640865 ]]), decimal=6) # One train step. device.set_learning_rate(config.typed_value("learning_rate")) device.run("train") output_list, outputs_format = device.result() assert_is_instance(output_list, list) assert_true(outputs_format, "for train, we should always get the format") outputs = Device.make_result_dict(output_list, outputs_format) pprint(outputs) assert_in("cost:out1", outputs) assert_greater(outputs["cost:out1"], 0) assert_almost_equal(outputs["cost:out1"], t1_cost) # Get net params. params = device.get_net_train_params(device.trainnet) references_params = { "W_in_data_fw0": numpy.array([[1.00055406e+00, 5.54056978e-04, 5.54056978e-04], [1.10811396e-03, 1.00110811e+00, 1.10811396e-03], [-1.66217093e-03, -1.66217093e-03, 9.98337829e-01]]), "b_fw0": numpy.array([0.00554057, 0.00554057, 0.00554057]), "W_in_fw0_out1": numpy.array([[-0.00320586, 0.09128557, 0.27631172, 0.23560857], [0.39358828, 0.48257114, 0.75262344, 0.57121715], [0.80961758, 0.9261433, 0.77106485, 1.29317428]]), "b_out1": numpy.array([-0.0320586, 0.91285568, 2.76311718, 2.35608574]), "W_in_fw0_out2": numpy.array([[ -1.16562310e-03, 9.68315079e-02, 1.91387146e-01, 2.76587834e-01, 4.36359135e-01 ], [ 4.97668754e-01, 5.93663016e-01, 6.82774291e-01, 7.53175669e-01, 9.72718271e-01 ], [ 1.00349687e+00, 1.10950548e+00, 1.22583856e+00, 1.37023650e+00, 1.29092259e+00 ]]), "b_out2": numpy.array( [-0.01165623, 0.96831508, 1.91387146, 2.76587834, 4.36359135]) } assert_equal(len(param_vars), len(params)) for p, v in zip(param_vars, params): print "%s:" % p pprint(v) assert_true(p.name) numpy.testing.assert_almost_equal(references_params[p.name], v, decimal=6)
def test_combi_auto_enc(): config = Config() config.update({ "multiprocessing": False, "blocking": True, "device": "cpu", "num_epochs": 1, "num_inputs": 3, "num_outputs": { "classes": 2 }, "learning_rate": 1.0, "network": { "output": { "class": "softmax", "loss": "ce", "target": "classes" }, "auto-enc": { "class": "softmax", "loss": "sse", "dtype": "float32", "target": "data" } } }) device = Device("cpu", config=config, blocking=True) # Set net params. def get_net_params(with_auto_enc=True): d = { "output": { "W_in_data_output": numpy.arange(0.1, 0.7, 0.1, dtype="float32").reshape((3, 2)), "b_output": numpy.arange(0.0, 2, dtype="float32") } } if with_auto_enc: d["auto-enc"] = { "W_in_data_auto-enc": numpy.arange(0.1, 1.0, 0.1, dtype="float32").reshape((3, 3)), "b_auto-enc": numpy.arange(0.0, 3, dtype="float32") } return d device.trainnet.set_params_by_dict(get_net_params()) device.testnet.set_params_by_dict(get_net_params()) # Show params. for p in device.trainnet.get_all_params_vars(): print "init %s:" % p pprint(p.get_value()) # Init dataset. dataset = StaticDataset(data=[{ "data": numpy.array([[0.1, 0.2, -0.3]], dtype="float32"), "classes": numpy.array([1]), }], output_dim=config.typed_value("num_outputs")) dataset.init_seq_order() # Copy to device allocation. success = assign_dev_data_single_seq(device, dataset, 0) assert_true(success, "failed to allocate & assign data") # One train step. device.set_learning_rate(config.typed_value("learning_rate")) device.run("train") output_list, outputs_format = device.result() assert_is_instance(output_list, list) assert_true(outputs_format, "for train, we should always get the format") outputs = Device.make_result_dict(output_list, outputs_format) pprint(outputs) assert_in("cost:output", outputs) assert_in("cost:auto-enc", outputs) expected_cost_output = 0.3132616877555847 assert_almost_equal(outputs["cost:output"], expected_cost_output, places=6) exact_cost_output = outputs["cost:output"] assert_almost_equal(outputs["cost:auto-enc"], 5.263200283050537, places=6) # Now, drop the auto-enc from the network, and redo the same thing. del config.typed_value("network")["auto-enc"] device = Device("cpu", config=config, blocking=True) device.trainnet.set_params_by_dict(get_net_params(with_auto_enc=False)) device.testnet.set_params_by_dict(get_net_params(with_auto_enc=False)) for p in device.trainnet.get_all_params_vars(): print "second run, init %s:" % p pprint(p.get_value()) dataset.init_seq_order() # reset. probably not needed success = assign_dev_data_single_seq(device, dataset, 0) assert_true(success, "failed to allocate & assign data") device.set_learning_rate(config.typed_value("learning_rate")) device.run("train") output_list, outputs_format = device.result() assert_is_instance(output_list, list) assert_true(outputs_format, "for train, we should always get the format") outputs = Device.make_result_dict(output_list, outputs_format) pprint(outputs) assert_in("cost:output", outputs) assert_not_in("cost:auto-enc", outputs) assert_almost_equal(outputs["cost:output"], expected_cost_output, places=6) assert_equal(outputs["cost:output"], exact_cost_output)
def post(self, *args, **kwargs): # TODO: Make this batch over a specific time period # TODO: Write formal documentation url_params = self.request.arguments output_dim = {} ret = {} data = {} data_format = '' data_type = '' engine_hash = '' data_shape = '' # First get meta data from URL parameters try: engine_hash = str(url_params['engine_hash']).replace("['", '').replace( "']", '') if 'data_format' in url_params: data_format = str(url_params['data_format']).replace( "['", '').replace("']", '') if 'data_type' in url_params: # Possible options: https://docs.scipy.org/doc/numpy-1.10.1/user/basics.types.html data_type = str(url_params['data_type']).replace("['", '').replace( "']", '') if 'data_shape' in url_params: data_shape = str(url_params['data_shape']).replace( "['", '').replace("']", '') # either '' or 'dim1,dim2' except Exception as e: print('Parameter formatting exception: ' + str(e.message), file=log.v4) # Apply defaults, in case we didn't get them through the header. if data_format == '': data_format = 'json' if data_type == '': data_type = 'float32' print('Received engine hash: ' + engine_hash + ', data formatted: ' + data_format + ', data type ' + data_type + ' data shape: ' + data_shape, file=log.v5) # Load in engine and hash engine = _engines[engine_hash] network = engine.network devices = _devices[engine_hash] hash_engine = hashlib.new('ripemd160') hash_engine.update(str(self.request.body) + engine_hash) hash_temp = hash_engine.hexdigest() # Pre-process the data if data_format == 'json': data = json.loads(self.request.body) for k in data: try: data[k] = np.asarray(data[k], dtype=data_type) if k != 'data': output_dim[k] = network.n_out[ k] # = [network.n_in,2] if k == 'data' else network.n_out[k] except Exception: if k != 'data' and not k in network.n_out: ret['error'] = 'unknown target: %s' % k else: ret['error'] = 'unable to convert %s to an array from value %s' % ( k, str(data[k])) break if data_format == 'binary': try: float_array = array(self._get_type_code(data_type)) float_array.fromstring(self.request.body) data['data'] = np.asarray(float_array.tolist(), dtype=data_type) data_shape_arr = data_shape.split(",") shape = (int(data_shape_arr[0]), int(data_shape_arr[1])) data['data'] = np.reshape(data['data'], shape) except Exception as e: print('Binary data error: ' + str(e.message), file=log.v4) ret['error'] = 'Error during binary data conversion: ' + e.message # Do dataset creation and classification. if not 'error' in ret: try: data = StaticDataset(data=[data], output_dim=output_dim) data.init_seq_order() except Exception: ret['error'] = 'Dataset server error' self.write(ret) pass else: batches = data.generate_batches( recurrent_net=network.recurrent, batch_size=sys.maxsize, max_seqs=1) if not hash_temp in _classify_cache: print('Starting classification', file=log.v3) # If we haven't yet processed this exact request and saved it in the cache _classify_cache[ hash_temp] = yield self.classification_task( network=network, devices=devices, data=data, batches=batches) ret = { 'result': { k: _classify_cache[hash_temp].result[k].tolist() for k in _classify_cache[hash_temp].result } } # Update engine usage for performance optimization _engine_usage[engine_hash] = datetime.datetime.now() print("Finished processing classification with ID: ", hash_temp, file=log.v3) self.write(ret)
def test_multi_target_init(): config = Config() config.update({ "multiprocessing": False, "blocking": True, "device": "cpu", "num_epochs": 1, "num_inputs": 3, "num_outputs": {"t1": 4, "t2": 5}, "learning_rate": 1.0, }) config.network_topology_json = """ { "fw0": {"class": "hidden", "activation": "identity", "n_out": 3}, "out1": {"class": "softmax", "loss": "ce", "target": "t1", "from": ["fw0"]}, "out2": {"class": "softmax", "loss": "ce", "target": "t2", "from": ["fw0"]} } """ device = Device("cpu", config=config, blocking=True) assert_true(device.trainnet, "train network initialized") assert_true(device.testnet, "test network initialized") param_vars = device.trainnet.get_all_params_vars() print "params:", param_vars assert_equal(len(param_vars), 6, "W, b vars for each out, and fw") num_params = get_num_params(param_vars) assert_equal(num_params, (3 * 3 + 3) + (3 * 4 + 4) + (3 * 5 + 5), "W, b for each out, and fw") assert_in("fw0", device.testnet.hidden) assert_in("out1", device.testnet.output) assert_in("out2", device.testnet.output) assert_is(device.testnet.j["t1"], device.testnet.output["out1"].index) assert_true(device.updater) update_list = device.updater.getUpdateList() print "update list:" pprint(update_list) update_dict = dict(update_list) assert_equal(len(update_dict), len(update_list), "all params in update list only once") assert_in("fw0", device.trainnet.hidden) assert_equal(len(device.trainnet.hidden), 1) assert_in("W_in_data_fw0", device.trainnet.hidden["fw0"].params) assert_in("b_fw0", device.trainnet.hidden["fw0"].params) assert_equal(len(device.trainnet.hidden["fw0"].params), 2) assert_in("out1", device.trainnet.output) assert_equal(len(device.trainnet.output), 2) assert_in("W_in_fw0_out1", device.trainnet.output["out1"].params) assert_in("b_out1", device.trainnet.output["out1"].params) assert_equal(len(device.trainnet.output["out1"].params), 2) assert_in(device.trainnet.hidden["fw0"].params["W_in_data_fw0"], update_dict) assert_in(device.trainnet.hidden["fw0"].params["b_fw0"], update_dict) assert_in(device.trainnet.output["out1"].params["W_in_fw0_out1"], update_dict) assert_in(device.trainnet.output["out1"].params["b_out1"], update_dict) assert_in(device.trainnet.output["out2"].params["W_in_fw0_out2"], update_dict) assert_in(device.trainnet.output["out2"].params["b_out2"], update_dict) assert_equal(len(update_dict), 6) # Set net params. net_params = { "fw0": {"W_in_data_fw0": numpy.identity(3, dtype="float32"), "b_fw0": numpy.zeros((3,), dtype="float32")}, "out1": {"W_in_fw0_out1": numpy.arange(0.0, 1.2, 0.1, dtype="float32").reshape((3, 4)), "b_out1": numpy.arange(0.0, 4, dtype="float32")}, "out2": {"W_in_fw0_out2": numpy.arange(0.0, 1.5, 0.1, dtype="float32").reshape((3, 5)), "b_out2": numpy.arange(0.0, 5, dtype="float32")} } device.trainnet.set_params_by_dict(net_params) device.testnet.set_params_by_dict(net_params) # Show params. for p in param_vars: print "init %s:" % p pprint(p.get_value()) # Init dataset. dataset = StaticDataset(data=[{ "data": numpy.array([[0.1, 0.2, -0.3]], dtype="float32"), "t1": numpy.array([2]), "t2": numpy.array([4]) }], output_dim=config.typed_value("num_outputs")) dataset.init_seq_order() assert_equal(dataset.is_data_sparse("data"), False) assert_equal(dataset.is_data_sparse("t1"), True) assert_equal(dataset.is_data_sparse("t2"), True) # Copy to device allocation. success = assign_dev_data_single_seq(device, dataset, 0) assert_true(success, "failed to allocate & assign data") # Check allocated data. assert_equal(device.targets["data"].shape, (1, 1, 3)) # input shape. (time,batch,dim) assert_in("t1", device.targets) assert_in("t2", device.targets) assert_equal(device.targets["t1"].shape, (1, 1)) assert_equal(device.targets["t2"].shape, (1, 1)) assert_equal(device.output_index["data"].shape, (1, 1)) numpy.testing.assert_equal(device.output_index["data"], numpy.array([[1]])) assert_equal(device.output_index["t1"].shape, (1, 1)) numpy.testing.assert_equal(device.output_index["t1"], numpy.array([[1]])) # Forward test. device.update_data() device.testnet.costs["out1"].name = "out1_cost" # nice in the func graph out_i1 = device.testnet.output["out1"].index out_i1_nonzero = device.testnet.output["out1"].i nll1, pcx1 = T.nnet.crossentropy_softmax_1hot(x=device.testnet.output["out1"].y_m[out_i1_nonzero], y_idx=device.testnet.output["out1"].y_data_flat[out_i1_nonzero]) forward_func = theano.function( inputs=[device.block_start, device.block_end], outputs=[ device.testnet.j["t1"], out_i1, out_i1_nonzero[0], nll1, pcx1, device.testnet.costs["out1"], device.testnet.output["out1"].p_y_given_x, device.testnet.costs["out2"], device.testnet.output["out2"].p_y_given_x], givens=device.make_givens(device.testnet), no_default_updates=True, on_unused_input='warn', name="forward") #print "forward func:" #theano.printing.debugprint(forward_func) net_j1, out_i1_val, out_i1_nz_val, nll1_val, pcx1_val, t1_cost, t1_y, t2_cost, t2_y = forward_func(0, 1) print "forward results:" pprint(net_j1) pprint(out_i1_val) pprint(out_i1_nz_val) pprint(nll1_val) pprint(pcx1_val) pprint(t1_cost) pprint(t1_y) pprint(t2_cost) pprint(t2_y) assert_equal(net_j1, numpy.array([[1]])) assert_equal(out_i1_val, numpy.array([[1]])) assert_equal(out_i1_nz_val, numpy.array([0])) assert_almost_equal(nll1_val, numpy.array([t1_cost])) numpy.testing.assert_almost_equal(t1_y, pcx1_val) assert_almost_equal(t1_cost, 1.440189698561195, places=6) assert_almost_equal(t2_cost, 0.45191439593759336, places=6) numpy.testing.assert_almost_equal(t1_y, numpy.array([[ 0.0320586 , 0.08714432, 0.23688282, 0.64391426]]), decimal=6) numpy.testing.assert_almost_equal(t2_y, numpy.array([[ 0.01165623, 0.03168492, 0.08612854, 0.23412166, 0.63640865]]), decimal=6) # One train step. device.set_learning_rate(config.typed_value("learning_rate")) device.run("train") output_list, outputs_format = device.result() assert_is_instance(output_list, list) assert_true(outputs_format, "for train, we should always get the format") outputs = Device.make_result_dict(output_list, outputs_format) pprint(outputs) assert_in("cost:out1", outputs) assert_greater(outputs["cost:out1"], 0) assert_almost_equal(outputs["cost:out1"], t1_cost) # Get net params. params = device.get_net_train_params(device.trainnet) references_params = { "W_in_data_fw0": numpy.array([[ 1.00055406e+00, 5.54056978e-04, 5.54056978e-04], [ 1.10811396e-03, 1.00110811e+00, 1.10811396e-03], [ -1.66217093e-03, -1.66217093e-03, 9.98337829e-01]]), "b_fw0": numpy.array([ 0.00554057, 0.00554057, 0.00554057]), "W_in_fw0_out1": numpy.array([[-0.00320586, 0.09128557, 0.27631172, 0.23560857], [ 0.39358828, 0.48257114, 0.75262344, 0.57121715], [ 0.80961758, 0.9261433 , 0.77106485, 1.29317428]]), "b_out1": numpy.array([-0.0320586 , 0.91285568, 2.76311718, 2.35608574]), "W_in_fw0_out2": numpy.array([[ -1.16562310e-03, 9.68315079e-02, 1.91387146e-01, 2.76587834e-01, 4.36359135e-01], [ 4.97668754e-01, 5.93663016e-01, 6.82774291e-01, 7.53175669e-01, 9.72718271e-01], [ 1.00349687e+00, 1.10950548e+00, 1.22583856e+00, 1.37023650e+00, 1.29092259e+00]]), "b_out2": numpy.array([-0.01165623, 0.96831508, 1.91387146, 2.76587834, 4.36359135]) } assert_equal(len(param_vars), len(params)) for p, v in zip(param_vars, params): print "%s:" % p pprint(v) assert_true(p.name) numpy.testing.assert_almost_equal(references_params[p.name], v, decimal=6)
def test_combi_auto_enc(): config = Config() config.update({ "multiprocessing": False, "blocking": True, "device": "cpu", "num_epochs": 1, "num_inputs": 3, "num_outputs": {"classes": 2}, "learning_rate": 1.0, "network": { "output": {"class": "softmax", "loss": "ce", "target": "classes"}, "auto-enc": {"class": "softmax", "loss": "sse", "dtype": "float32", "target": "data"} } }) device = Device("cpu", config=config, blocking=True) # Set net params. def get_net_params(with_auto_enc=True): d = { "output": {"W_in_data_output": numpy.arange(0.1, 0.7, 0.1, dtype="float32").reshape((3, 2)), "b_output": numpy.arange(0.0, 2, dtype="float32")} } if with_auto_enc: d["auto-enc"] = {"W_in_data_auto-enc": numpy.arange(0.1, 1.0, 0.1, dtype="float32").reshape((3, 3)), "b_auto-enc": numpy.arange(0.0, 3, dtype="float32")} return d device.trainnet.set_params_by_dict(get_net_params()) device.testnet.set_params_by_dict(get_net_params()) # Show params. for p in device.trainnet.get_all_params_vars(): print "init %s:" % p pprint(p.get_value()) # Init dataset. dataset = StaticDataset(data=[{ "data": numpy.array([[0.1, 0.2, -0.3]], dtype="float32"), "classes": numpy.array([1]), }], output_dim=config.typed_value("num_outputs")) dataset.init_seq_order() # Copy to device allocation. success = assign_dev_data_single_seq(device, dataset, 0) assert_true(success, "failed to allocate & assign data") # One train step. device.set_learning_rate(config.typed_value("learning_rate")) device.run("train") output_list, outputs_format = device.result() assert_is_instance(output_list, list) assert_true(outputs_format, "for train, we should always get the format") outputs = Device.make_result_dict(output_list, outputs_format) pprint(outputs) assert_in("cost:output", outputs) assert_in("cost:auto-enc", outputs) expected_cost_output = 0.3132616877555847 assert_almost_equal(outputs["cost:output"], expected_cost_output, places=6) exact_cost_output = outputs["cost:output"] assert_almost_equal(outputs["cost:auto-enc"], 5.263200283050537, places=6) # Now, drop the auto-enc from the network, and redo the same thing. del config.typed_value("network")["auto-enc"] device = Device("cpu", config=config, blocking=True) device.trainnet.set_params_by_dict(get_net_params(with_auto_enc=False)) device.testnet.set_params_by_dict(get_net_params(with_auto_enc=False)) for p in device.trainnet.get_all_params_vars(): print "second run, init %s:" % p pprint(p.get_value()) dataset.init_seq_order() # reset. probably not needed success = assign_dev_data_single_seq(device, dataset, 0) assert_true(success, "failed to allocate & assign data") device.set_learning_rate(config.typed_value("learning_rate")) device.run("train") output_list, outputs_format = device.result() assert_is_instance(output_list, list) assert_true(outputs_format, "for train, we should always get the format") outputs = Device.make_result_dict(output_list, outputs_format) pprint(outputs) assert_in("cost:output", outputs) assert_not_in("cost:auto-enc", outputs) assert_almost_equal(outputs["cost:output"], expected_cost_output, places=6) assert_equal(outputs["cost:output"], exact_cost_output)
def main(): rnn.init( command_line_options=sys.argv[1:], config_updates={ "task": "nop", "log": None, "device": "cpu", "allow_random_model_init": True, "debug_add_check_numerics_on_output": False}, extra_greeting="Import Blocks MT model.") assert Util.BackendEngine.is_tensorflow_selected() config = rnn.config # Load Blocks MT model params. if not config.has("blocks_mt_model"): print("Please provide the option blocks_mt_model.") sys.exit(1) blocks_mt_model_fn = config.value("blocks_mt_model", "") assert blocks_mt_model_fn assert os.path.exists(blocks_mt_model_fn) if os.path.isdir(blocks_mt_model_fn): blocks_mt_model_fn += "/params.npz" assert os.path.exists(blocks_mt_model_fn) dry_run = config.bool("dry_run", False) if dry_run: our_model_fn = None print("Dry-run, will not save model.") else: our_model_fn = config.value('model', "returnn-model") + ".imported" print("Will save Returnn model as %s." % our_model_fn) assert os.path.exists(os.path.dirname(our_model_fn) or "."), "model-dir does not exist" assert not os.path.exists(our_model_fn + Util.get_model_filename_postfix()), "model-file already exists" blocks_mt_model = numpy.load(blocks_mt_model_fn) assert isinstance(blocks_mt_model, numpy.lib.npyio.NpzFile), "did not expect type %r in file %r" % ( type(blocks_mt_model), blocks_mt_model_fn) print("Params found in Blocks model:") blocks_params = {} # type: dict[str,numpy.ndarray] blocks_params_hierarchy = {} # type: dict[str,dict[str]] blocks_total_num_params = 0 for key in sorted(blocks_mt_model.keys()): value = blocks_mt_model[key] key = key.replace("-", "/") assert key[0] == "/" key = key[1:] blocks_params[key] = value print(" %s: %s, %s" % (key, value.shape, value.dtype)) blocks_total_num_params += numpy.prod(value.shape) d = blocks_params_hierarchy for part in key.split("/"): d = d.setdefault(part, {}) print("Blocks total num params: %i" % blocks_total_num_params) # Init our network structure. from TFNetworkRecLayer import _SubnetworkRecCell _SubnetworkRecCell._debug_out = [] # enable for debugging intermediate values below ChoiceLayer._debug_out = [] # also for debug outputs of search rnn.engine.use_search_flag = True # construct the net as in search rnn.engine.init_network_from_config() print("Our network model params:") our_params = {} # type: dict[str,tf.Variable] our_total_num_params = 0 for v in rnn.engine.network.get_params_list(): key = v.name[:-2] our_params[key] = v print(" %s: %s, %s" % (key, v.shape, v.dtype.base_dtype.name)) our_total_num_params += numpy.prod(v.shape.as_list()) print("Our total num params: %i" % our_total_num_params) # Now matching... blocks_used_params = set() # type: set[str] our_loaded_params = set() # type: set[str] def import_var(our_var, blocks_param): """ :param tf.Variable our_var: :param str|numpy.ndarray blocks_param: """ assert isinstance(our_var, tf.Variable) if isinstance(blocks_param, str): blocks_param = load_blocks_var(blocks_param) assert isinstance(blocks_param, numpy.ndarray) assert_equal(tuple(our_var.shape.as_list()), blocks_param.shape) our_loaded_params.add(our_var.name[:-2]) our_var.load(blocks_param, session=rnn.engine.tf_session) def load_blocks_var(blocks_param_name): """ :param str blocks_param_name: :rtype: numpy.ndarray """ assert isinstance(blocks_param_name, str) assert blocks_param_name in blocks_params blocks_used_params.add(blocks_param_name) return blocks_params[blocks_param_name] enc_name = "bidirectionalencoder" enc_embed_name = "EncoderLookUp0.W" assert enc_name in blocks_params_hierarchy assert enc_embed_name in blocks_params_hierarchy[enc_name] # input embedding num_encoder_layers = max([ int(re.match(".*([0-9]+)", s).group(1)) for s in blocks_params_hierarchy[enc_name] if s.startswith("EncoderBidirectionalLSTM")]) blocks_input_dim, blocks_input_embed_dim = blocks_params["%s/%s" % (enc_name, enc_embed_name)].shape print("Blocks input dim: %i, embed dim: %i" % (blocks_input_dim, blocks_input_embed_dim)) print("Blocks num encoder layers: %i" % num_encoder_layers) expected_enc_entries = ( ["EncoderLookUp0.W"] + ["EncoderBidirectionalLSTM%i" % i for i in range(1, num_encoder_layers + 1)]) assert_equal(set(expected_enc_entries), set(blocks_params_hierarchy[enc_name].keys())) our_input_layer = find_our_input_embed_layer() assert our_input_layer.input_data.dim == blocks_input_dim assert our_input_layer.output.dim == blocks_input_embed_dim assert not our_input_layer.with_bias import_var(our_input_layer.params["W"], "%s/%s" % (enc_name, enc_embed_name)) dec_name = "decoder/sequencegenerator" dec_hierarchy_base = get_in_hierarchy(dec_name, blocks_params_hierarchy) assert_equal(set(dec_hierarchy_base.keys()), {"att_trans", "readout"}) dec_embed_name = "readout/lookupfeedbackwmt15/lookuptable.W" get_in_hierarchy(dec_embed_name, dec_hierarchy_base) # check for i in range(num_encoder_layers): # Assume standard LSTMCell. # i = input_gate, j = new_input, f = forget_gate, o = output_gate # lstm_matrix = self._linear1([inputs, m_prev]) # i, j, f, o = array_ops.split(value=lstm_matrix, num_or_size_splits=4, axis=1) # bias (4*in), kernel (in+out,4*out), w_(f|i|o)_diag (out) # prefix: rec/rnn/lstm_cell # Blocks: gate-in, gate-forget, next-in, gate-out for direction in ("fwd", "bwd"): our_layer = get_network().layers["lstm%i_%s" % (i, direction[:2])] blocks_prefix = "bidirectionalencoder/EncoderBidirectionalLSTM%i" % (i + 1,) # (in,out*4), (out*4,) W_in, b = [load_blocks_var( "%s/%s_fork/fork_inputs.%s" % (blocks_prefix, {"bwd": "back", "fwd": "fwd"}[direction], p)) for p in ("W", "b")] W_re = load_blocks_var( "%s/bidirectionalseparateparameters/%s.W_state" % (blocks_prefix, {"fwd": "forward", "bwd": "backward"}[direction])) W = numpy.concatenate([W_in, W_re], axis=0) b = lstm_vec_blocks_to_tf(b) W = lstm_vec_blocks_to_tf(W) import_var(our_layer.params["rnn/lstm_cell/bias"], b) import_var(our_layer.params["rnn/lstm_cell/kernel"], W) import_var(our_layer.params["initial_c"], "%s/bidirectionalseparateparameters/%s.initial_cells" % (blocks_prefix, {"fwd": "forward", "bwd": "backward"}[direction])) import_var(our_layer.params["initial_h"], "%s/bidirectionalseparateparameters/%s.initial_state" % (blocks_prefix, {"fwd": "forward", "bwd": "backward"}[direction])) for s1, s2 in [("W_cell_to_in", "w_i_diag"), ("W_cell_to_forget", "w_f_diag"), ("W_cell_to_out", "w_o_diag")]: import_var(our_layer.params["rnn/lstm_cell/%s" % s2], "%s/bidirectionalseparateparameters/%s.%s" % (blocks_prefix, {"fwd": "forward", "bwd": "backward"}[direction], s1)) import_var(get_network().layers["enc_ctx"].params["W"], "decoder/sequencegenerator/att_trans/attention/encoder_state_transformer.W") import_var(get_network().layers["enc_ctx"].params["b"], "decoder/sequencegenerator/att_trans/attention/encoder_state_transformer.b") import_var(our_params["output/rec/s/initial_c"], "decoder/sequencegenerator/att_trans/lstm_decoder.initial_cells") import_var(our_params["output/rec/s/initial_h"], "decoder/sequencegenerator/att_trans/lstm_decoder.initial_state") import_var(our_params["output/rec/weight_feedback/W"], "decoder/sequencegenerator/att_trans/attention/sum_alignment_transformer.W") import_var(our_params["output/rec/target_embed/W"], "decoder/sequencegenerator/readout/lookupfeedbackwmt15/lookuptable.W") import_var(our_params["fertility/W"], "decoder/sequencegenerator/att_trans/attention/fertility_transformer.W") import_var(our_params["output/rec/energy/W"], "decoder/sequencegenerator/att_trans/attention/energy_comp/linear.W") prev_s_trans_W_states = load_blocks_var("decoder/sequencegenerator/att_trans/attention/state_trans/transform_states.W") prev_s_trans_W_cells = load_blocks_var("decoder/sequencegenerator/att_trans/attention/state_trans/transform_cells.W") prev_s_trans_W = numpy.concatenate([prev_s_trans_W_cells, prev_s_trans_W_states], axis=0) import_var(our_params["output/rec/prev_s_transformed/W"], prev_s_trans_W) import_var(our_params["output/rec/s/rec/lstm_cell/bias"], numpy.zeros(our_params["output/rec/s/rec/lstm_cell/bias"].shape)) dec_lstm_kernel_in_feedback = load_blocks_var("decoder/sequencegenerator/att_trans/feedback_to_decoder/fork_inputs.W") dec_lstm_kernel_in_ctx = load_blocks_var("decoder/sequencegenerator/att_trans/context_to_decoder/fork_inputs.W") dec_lstm_kernel_re = load_blocks_var("decoder/sequencegenerator/att_trans/lstm_decoder.W_state") dec_lstm_kernel = numpy.concatenate([dec_lstm_kernel_in_feedback, dec_lstm_kernel_in_ctx, dec_lstm_kernel_re], axis=0) dec_lstm_kernel = lstm_vec_blocks_to_tf(dec_lstm_kernel) import_var(our_params["output/rec/s/rec/lstm_cell/kernel"], dec_lstm_kernel) for s1, s2 in [("W_cell_to_in", "w_i_diag"), ("W_cell_to_forget", "w_f_diag"), ("W_cell_to_out", "w_o_diag")]: import_var(our_params["output/rec/s/rec/lstm_cell/%s" % s2], "decoder/sequencegenerator/att_trans/lstm_decoder.%s" % s1) readout_in_W_states = load_blocks_var("decoder/sequencegenerator/readout/merge/transform_states.W") readout_in_W_feedback = load_blocks_var("decoder/sequencegenerator/readout/merge/transform_feedback.W") readout_in_W_att = load_blocks_var("decoder/sequencegenerator/readout/merge/transform_weighted_averages.W") readout_in_W = numpy.concatenate([readout_in_W_states, readout_in_W_feedback, readout_in_W_att], axis=0) import_var(our_params["output/rec/readout_in/W"], readout_in_W) import_var(our_params["output/rec/readout_in/b"], "decoder/sequencegenerator/readout/initializablefeedforwardsequence/maxout_bias.b") import_var(our_params["output/rec/output_prob/W"], "decoder/sequencegenerator/readout/initializablefeedforwardsequence/softmax1.W") import_var(our_params["output/rec/output_prob/b"], "decoder/sequencegenerator/readout/initializablefeedforwardsequence/softmax1.b") print("Not initialized own params:") count = 0 for key, v in sorted(our_params.items()): if key in our_loaded_params: continue print(" %s: %s, %s" % (key, v.shape, v.dtype.base_dtype.name)) count += 1 if not count: print(" None.") print("Not used Blocks params:") count = 0 for key, value in sorted(blocks_params.items()): if key in blocks_used_params: continue print(" %s: %s, %s" % (key, value.shape, value.dtype)) count += 1 if not count: print(" None.") print("Done.") blocks_debug_dump_output = config.value("blocks_debug_dump_output", None) if blocks_debug_dump_output: print("Will read Blocks debug dump output from %r and compare with Returnn outputs." % blocks_debug_dump_output) blocks_initial_outputs = numpy.load("%s/initial_states_data.0.npz" % blocks_debug_dump_output) blocks_search_log = pickle.load(open("%s/search.log.pkl" % blocks_debug_dump_output, "rb"), encoding="bytes") blocks_search_log = {d[b"step"]: d for d in blocks_search_log} input_seq = blocks_initial_outputs["input"] beam_size, seq_len = input_seq.shape input_seq = input_seq[0] # all the same, select beam 0 assert isinstance(input_seq, numpy.ndarray) print("Debug input seq: %s" % input_seq.tolist()) from GeneratingDataset import StaticDataset dataset = StaticDataset( data=[{"data": input_seq}], output_dim={"data": get_network().extern_data.get_default_input_data().get_kwargs()}) dataset.init_seq_order(epoch=0) extract_output_dict = { "enc_src_emb": get_network().layers["source_embed"].output.get_placeholder_as_batch_major(), "encoder": get_network().layers["encoder"].output.get_placeholder_as_batch_major(), "enc_ctx": get_network().layers["enc_ctx"].output.get_placeholder_as_batch_major(), "output": get_network().layers["output"].output.get_placeholder_as_batch_major() } from TFNetworkLayer import concat_sources for i in range(num_encoder_layers): extract_output_dict["enc_layer_%i" % i] = concat_sources( [get_network().layers["lstm%i_fw" % i], get_network().layers["lstm%i_bw" % i]] ).get_placeholder_as_batch_major() extract_output_dict["enc_layer_0_fwd"] = get_network().layers["lstm0_fw"].output.get_placeholder_as_batch_major() our_output = rnn.engine.run_single( dataset=dataset, seq_idx=0, output_dict=extract_output_dict) blocks_out = blocks_initial_outputs["bidirectionalencoder_EncoderLookUp0__EncoderLookUp0_apply_output"] our_out = our_output["enc_src_emb"] print("our enc emb shape:", our_out.shape) print("Blocks enc emb shape:", blocks_out.shape) assert our_out.shape[:2] == (1, seq_len) assert blocks_out.shape[:2] == (seq_len, beam_size) assert our_out.shape[2] == blocks_out.shape[2] assert_almost_equal(our_out[0], blocks_out[:, 0], decimal=5) blocks_lstm0_out_ref = calc_lstm(blocks_out[:, 0], blocks_params) blocks_lstm0_out = blocks_initial_outputs["bidirectionalencoder_EncoderBidirectionalLSTM1_bidirectionalseparateparameters_forward__forward_apply_states"] our_lstm0_out = our_output["enc_layer_0_fwd"] assert blocks_lstm0_out.shape == (seq_len, beam_size) + blocks_lstm0_out_ref.shape assert our_lstm0_out.shape == (1, seq_len) + blocks_lstm0_out_ref.shape assert_almost_equal(blocks_lstm0_out[0, 0], blocks_lstm0_out_ref, decimal=6) print("Blocks LSTM0 frame 0 matched to ref calc.") assert_almost_equal(our_lstm0_out[0, 0], blocks_lstm0_out_ref, decimal=6) print("Our LSTM0 frame 0 matched to ref calc.") for i in range(num_encoder_layers): blocks_out = blocks_initial_outputs[ "bidirectionalencoder_EncoderBidirectionalLSTM%i_bidirectionalseparateparameters__bidirectionalseparateparameters_apply_output_0" % (i + 1,)] our_out = our_output["enc_layer_%i" % i] print("our enc layer %i shape:" % i, our_out.shape) print("Blocks enc layer %i shape:" % i, blocks_out.shape) assert our_out.shape[:2] == (1, seq_len) assert blocks_out.shape[:2] == (seq_len, beam_size) assert our_out.shape[2] == blocks_out.shape[2] assert_almost_equal(our_out[0], blocks_out[:, 0], decimal=6) print("our encoder shape:", our_output["encoder"].shape) blocks_encoder_out = blocks_initial_outputs["bidirectionalencoder__bidirectionalencoder_apply_representation"] print("Blocks encoder shape:", blocks_encoder_out.shape) assert our_output["encoder"].shape[:2] == (1, seq_len) assert blocks_encoder_out.shape[:2] == (seq_len, beam_size) assert our_output["encoder"].shape[2] == blocks_encoder_out.shape[2] assert_almost_equal(our_output["encoder"][0], blocks_encoder_out[:, 0], decimal=6) blocks_first_frame_outputs = numpy.load("%s/next_states.0.npz" % blocks_debug_dump_output) blocks_enc_ctx_out = blocks_first_frame_outputs["decoder_sequencegenerator_att_trans_attention__attention_preprocess_preprocessed_attended"] our_enc_ctx_out = our_output["enc_ctx"] print("Blocks enc ctx shape:", blocks_enc_ctx_out.shape) assert blocks_enc_ctx_out.shape[:2] == (seq_len, beam_size) assert our_enc_ctx_out.shape[:2] == (1, seq_len) assert blocks_enc_ctx_out.shape[2:] == our_enc_ctx_out.shape[2:] assert_almost_equal(blocks_enc_ctx_out[:, 0], our_enc_ctx_out[0], decimal=5) fertility = numpy.dot(blocks_encoder_out[:, 0], blocks_params["decoder/sequencegenerator/att_trans/attention/fertility_transformer.W"]) fertility = sigmoid(fertility) assert fertility.shape == (seq_len, 1) fertility = fertility[:, 0] assert fertility.shape == (seq_len,) our_dec_outputs = {v["step"]: v for v in _SubnetworkRecCell._debug_out} assert our_dec_outputs print("our dec frame keys:", sorted(our_dec_outputs[0].keys())) our_dec_search_outputs = {v["step"]: v for v in ChoiceLayer._debug_out} assert our_dec_search_outputs print("our dec search frame keys:", sorted(our_dec_search_outputs[0].keys())) print("Blocks search frame keys:", sorted(blocks_search_log[0].keys())) dec_lookup = blocks_params["decoder/sequencegenerator/readout/lookupfeedbackwmt15/lookuptable.W"] last_lstm_state = blocks_params["decoder/sequencegenerator/att_trans/lstm_decoder.initial_state"] last_lstm_cells = blocks_params["decoder/sequencegenerator/att_trans/lstm_decoder.initial_cells"] last_accumulated_weights = numpy.zeros((seq_len,), dtype="float32") last_output = 0 dec_seq_len = 0 for dec_step in range(100): blocks_frame_state_outputs_fn = "%s/next_states.%i.npz" % (blocks_debug_dump_output, dec_step) blocks_frame_probs_outputs_fn = "%s/logprobs.%i.npz" % (blocks_debug_dump_output, dec_step) if dec_step > 3: if not os.path.exists(blocks_frame_state_outputs_fn) or not os.path.exists(blocks_frame_probs_outputs_fn): print("Seq not ended yet but frame not found for step %i." % dec_step) break blocks_frame_state_outputs = numpy.load(blocks_frame_state_outputs_fn) blocks_frame_probs_outputs = numpy.load(blocks_frame_probs_outputs_fn) blocks_search_frame = blocks_search_log[dec_step] our_dec_frame_outputs = our_dec_outputs[dec_step] assert our_dec_frame_outputs["step"] == dec_step assert our_dec_frame_outputs[":i.output"].tolist() == [dec_step] our_dec_search_frame_outputs = our_dec_search_outputs[dec_step] blocks_last_lstm_state = blocks_frame_probs_outputs["decoder_sequencegenerator__sequencegenerator_generate_states"] blocks_last_lstm_cells = blocks_frame_probs_outputs["decoder_sequencegenerator__sequencegenerator_generate_cells"] assert blocks_last_lstm_state.shape == (beam_size, last_lstm_state.shape[0]) assert_almost_equal(blocks_last_lstm_state[0], last_lstm_state, decimal=5) assert_almost_equal(blocks_last_lstm_cells[0], last_lstm_cells, decimal=5) our_last_lstm_cells = our_dec_frame_outputs["prev:s.extra.state"][0] our_last_lstm_state = our_dec_frame_outputs["prev:s.extra.state"][1] assert our_last_lstm_state.shape == our_last_lstm_cells.shape == (beam_size, last_lstm_state.shape[0]) assert_almost_equal(our_last_lstm_state[0], last_lstm_state, decimal=5) assert_almost_equal(our_last_lstm_cells[0], last_lstm_cells, decimal=5) our_last_s = our_dec_frame_outputs["prev:s.output"] assert our_last_s.shape == (beam_size, last_lstm_state.shape[0]) assert_almost_equal(our_last_s[0], last_lstm_state, decimal=5) blocks_last_accum_weights = blocks_frame_probs_outputs["decoder_sequencegenerator__sequencegenerator_generate_accumulated_weights"] assert blocks_last_accum_weights.shape == (beam_size, seq_len) assert_almost_equal(blocks_last_accum_weights[0], last_accumulated_weights, decimal=5) our_last_accum_weights = our_dec_frame_outputs["prev:accum_att_weights.output"] assert our_last_accum_weights.shape == (beam_size, seq_len if dec_step > 0 else 1, 1) if dec_step > 0: assert_almost_equal(our_last_accum_weights[0, :, 0], last_accumulated_weights, decimal=4) else: assert_almost_equal(our_last_accum_weights[0, 0, 0], last_accumulated_weights.sum(), decimal=4) energy_sum = numpy.copy(blocks_enc_ctx_out[:, 0]) # (T,enc-ctx-dim) weight_feedback = numpy.dot(last_accumulated_weights[:, None], blocks_params["decoder/sequencegenerator/att_trans/attention/sum_alignment_transformer.W"]) energy_sum += weight_feedback transformed_states = numpy.dot(last_lstm_state[None, :], blocks_params["decoder/sequencegenerator/att_trans/attention/state_trans/transform_states.W"]) transformed_cells = numpy.dot(last_lstm_cells[None, :], blocks_params["decoder/sequencegenerator/att_trans/attention/state_trans/transform_cells.W"]) energy_sum += transformed_states + transformed_cells assert energy_sum.shape == (seq_len, blocks_enc_ctx_out.shape[-1]) blocks_energy_sum_tanh = blocks_frame_probs_outputs["decoder_sequencegenerator_att_trans_attention_energy_comp_tanh__tanh_apply_output"] assert blocks_energy_sum_tanh.shape == (seq_len, beam_size, energy_sum.shape[-1]) assert_almost_equal(blocks_energy_sum_tanh[:, 0], numpy.tanh(energy_sum), decimal=5) assert_equal(our_dec_frame_outputs["weight_feedback.output"].shape, (beam_size, seq_len if dec_step > 0 else 1, blocks_enc_ctx_out.shape[-1])) assert_equal(our_dec_frame_outputs["prev_s_transformed.output"].shape, (beam_size, blocks_enc_ctx_out.shape[-1])) our_energy_sum = our_dec_frame_outputs["energy_in.output"] assert our_energy_sum.shape == (beam_size, seq_len, blocks_enc_ctx_out.shape[-1]) assert_almost_equal(our_energy_sum[0], energy_sum, decimal=4) blocks_energy = blocks_frame_probs_outputs["decoder_sequencegenerator_att_trans_attention_energy_comp__energy_comp_apply_output"] assert blocks_energy.shape == (seq_len, beam_size, 1) energy = numpy.dot(numpy.tanh(energy_sum), blocks_params["decoder/sequencegenerator/att_trans/attention/energy_comp/linear.W"]) assert energy.shape == (seq_len, 1) assert_almost_equal(blocks_energy[:, 0], energy, decimal=4) our_energy = our_dec_frame_outputs["energy.output"] assert our_energy.shape == (beam_size, seq_len, 1) assert_almost_equal(our_energy[0], energy, decimal=4) weights = softmax(energy[:, 0]) assert weights.shape == (seq_len,) our_weights = our_dec_frame_outputs["att_weights.output"] assert our_weights.shape == (beam_size, seq_len, 1) assert_almost_equal(our_weights[0, :, 0], weights, decimal=4) accumulated_weights = last_accumulated_weights + weights / (2.0 * fertility) assert accumulated_weights.shape == (seq_len,) #blocks_accumulated_weights = blocks_frame_probs_outputs["decoder_sequencegenerator_att_trans_attention__attention_take_glimpses_accumulated_weights"] #assert blocks_accumulated_weights.shape == (beam_size, seq_len) #assert_almost_equal(blocks_accumulated_weights[0], accumulated_weights, decimal=5) blocks_weights = blocks_frame_probs_outputs["decoder_sequencegenerator_att_trans_attention__attention_compute_weights_output_0"] assert blocks_weights.shape == (seq_len, beam_size) assert_almost_equal(weights, blocks_weights[:, 0], decimal=4) our_accum_weights = our_dec_frame_outputs["accum_att_weights.output"] assert our_accum_weights.shape == (beam_size, seq_len, 1) weighted_avg = (weights[:, None] * blocks_encoder_out[:, 0]).sum(axis=0) # att in our assert weighted_avg.shape == (blocks_encoder_out.shape[-1],) blocks_weighted_avg = blocks_frame_probs_outputs["decoder_sequencegenerator_att_trans_attention__attention_compute_weighted_averages_output_0"] assert blocks_weighted_avg.shape == (beam_size, blocks_encoder_out.shape[-1]) assert_almost_equal(blocks_weighted_avg[0], weighted_avg, decimal=4) our_att = our_dec_frame_outputs["att.output"] assert our_att.shape == (beam_size, blocks_encoder_out.shape[-1]) assert_almost_equal(our_att[0], weighted_avg, decimal=4) blocks_last_output = blocks_frame_probs_outputs["decoder_sequencegenerator__sequencegenerator_generate_outputs"] assert blocks_last_output.shape == (beam_size,) assert max(blocks_last_output[0], 0) == last_output last_target_embed = dec_lookup[last_output] if dec_step == 0: last_target_embed = numpy.zeros_like(last_target_embed) our_last_target_embed = our_dec_frame_outputs["prev:target_embed.output"] assert our_last_target_embed.shape == (beam_size, dec_lookup.shape[-1]) assert_almost_equal(our_last_target_embed[0], last_target_embed, decimal=4) readout_in_state = numpy.dot(last_lstm_state, blocks_params["decoder/sequencegenerator/readout/merge/transform_states.W"]) blocks_trans_state = blocks_frame_probs_outputs["decoder_sequencegenerator_readout_merge__merge_apply_states"] assert blocks_trans_state.shape == (beam_size, last_lstm_state.shape[0]) assert_almost_equal(blocks_trans_state[0], readout_in_state, decimal=4) readout_in_feedback = numpy.dot(last_target_embed, blocks_params["decoder/sequencegenerator/readout/merge/transform_feedback.W"]) blocks_trans_feedback = blocks_frame_probs_outputs["decoder_sequencegenerator_readout_merge__merge_apply_feedback"] assert blocks_trans_feedback.shape == (beam_size, readout_in_feedback.shape[0]) assert_almost_equal(blocks_trans_feedback[0], readout_in_feedback, decimal=4) readout_in_weighted_avg = numpy.dot(weighted_avg, blocks_params["decoder/sequencegenerator/readout/merge/transform_weighted_averages.W"]) blocks_trans_weighted_avg = blocks_frame_probs_outputs["decoder_sequencegenerator_readout_merge__merge_apply_weighted_averages"] assert blocks_trans_weighted_avg.shape == (beam_size, readout_in_weighted_avg.shape[0]) assert_almost_equal(blocks_trans_weighted_avg[0], readout_in_weighted_avg, decimal=4) readout_in = readout_in_state + readout_in_feedback + readout_in_weighted_avg blocks_readout_in = blocks_frame_probs_outputs["decoder_sequencegenerator_readout_merge__merge_apply_output"] assert blocks_readout_in.shape == (beam_size, readout_in.shape[0]) assert_almost_equal(blocks_readout_in[0], readout_in, decimal=4) readout_in += blocks_params["decoder/sequencegenerator/readout/initializablefeedforwardsequence/maxout_bias.b"] assert readout_in.shape == (blocks_params["decoder/sequencegenerator/readout/initializablefeedforwardsequence/maxout_bias.b"].shape[0],) our_readout_in = our_dec_frame_outputs["readout_in.output"] assert our_readout_in.shape == (beam_size, readout_in.shape[0]) assert_almost_equal(our_readout_in[0], readout_in, decimal=4) readout = readout_in.reshape((readout_in.shape[0] // 2, 2)).max(axis=1) our_readout = our_dec_frame_outputs["readout.output"] assert our_readout.shape == (beam_size, readout.shape[0]) assert_almost_equal(our_readout[0], readout, decimal=4) prob_logits = numpy.dot(readout, blocks_params["decoder/sequencegenerator/readout/initializablefeedforwardsequence/softmax1.W"]) + \ blocks_params["decoder/sequencegenerator/readout/initializablefeedforwardsequence/softmax1.b"] assert prob_logits.ndim == 1 blocks_prob_logits = blocks_frame_probs_outputs["decoder_sequencegenerator_readout__readout_readout_output_0"] assert blocks_prob_logits.shape == (beam_size, prob_logits.shape[0]) assert_almost_equal(blocks_prob_logits[0], prob_logits, decimal=4) output_prob = softmax(prob_logits) log_output_prob = log_softmax(prob_logits) assert_almost_equal(numpy.log(output_prob), log_output_prob, decimal=4) our_output_prob = our_dec_frame_outputs["output_prob.output"] assert our_output_prob.shape == (beam_size, output_prob.shape[0]) assert_almost_equal(our_output_prob[0], output_prob, decimal=4) blocks_nlog_prob = blocks_frame_probs_outputs["logprobs"] assert blocks_nlog_prob.shape == (beam_size, output_prob.shape[0]) assert_almost_equal(blocks_nlog_prob[0], -log_output_prob, decimal=4) assert_almost_equal(our_dec_search_frame_outputs["scores_in_orig"][0], output_prob, decimal=4) assert_almost_equal(blocks_search_frame[b'logprobs'][0], -log_output_prob, decimal=4) #for b in range(beam_size): # assert_almost_equal(-numpy.log(our_output_prob[b]), blocks_frame_probs_outputs["logprobs"][b], decimal=4) ref_output = numpy.argmax(output_prob) # Note: Don't take the readout.emit outputs. They are randomly sampled. blocks_dec_output = blocks_search_frame[b'outputs'] assert blocks_dec_output.shape == (beam_size,) our_dec_output = our_dec_frame_outputs["output.output"] assert our_dec_output.shape == (beam_size,) print("Frame %i: Ref best greedy output symbol: %i" % (dec_step, int(ref_output))) print("Blocks labels:", blocks_dec_output.tolist()) print("Our labels:", our_dec_output.tolist()) # Well, the following two could be not true if all the other beams have much better scores, # but this is unlikely. assert ref_output in blocks_dec_output assert ref_output in our_dec_output if dec_step == 0: # This assumes that the results are ordered by score which might not be true (see tf.nn.top_k). assert blocks_dec_output[0] == our_dec_output[0] == ref_output # We assume that the best is the same. Note that this also might not be true if there are two equally best scores. # It also assumes that it's ordered by the score which also might not be true (see tf.nn.top_k). # For the same reason, the remaining list and entries might also not perfectly match. assert our_dec_output[0] == blocks_dec_output[0] # Just follow the first beam. ref_output = blocks_dec_output[0] assert our_dec_search_frame_outputs["src_beam_idxs"].shape == (1, beam_size) assert our_dec_search_frame_outputs["scores"].shape == (1, beam_size) print("Blocks src_beam_idxs:", blocks_search_frame[b'indexes'].tolist()) print("Our src_beam_idxs:", our_dec_search_frame_outputs["src_beam_idxs"][0].tolist()) print("Blocks scores:", blocks_search_frame[b'chosen_costs'].tolist()) print("Our scores:", our_dec_search_frame_outputs["scores"][0].tolist()) if list(our_dec_search_frame_outputs["src_beam_idxs"][0]) != list(blocks_search_frame[b'indexes']): print("Warning, beams do not match.") print("Blocks scores base:", blocks_search_frame[b'scores_base'].flatten().tolist()) print("Our scores base:", our_dec_search_frame_outputs["scores_base"].flatten().tolist()) #print("Blocks score in orig top k:", sorted(blocks_search_frame[b'logprobs'].flatten())[:beam_size]) #print("Our score in orig top k:", sorted(-numpy.log(our_dec_search_frame_outputs["scores_in_orig"].flatten()))[:beam_size]) print("Blocks score in top k:", sorted((blocks_search_frame[b'logprobs'] * blocks_search_log[dec_step - 1][b'mask'][:, None]).flatten())[:beam_size]) print("Our score in top k:", sorted(-our_dec_search_frame_outputs["scores_in"].flatten())[:beam_size]) blocks_scores_combined = blocks_search_frame[b'next_costs'] our_scores_combined = our_dec_search_frame_outputs["scores_combined"] print("Blocks scores combined top k:", sorted(blocks_scores_combined.flatten())[:beam_size]) print("Our neg scores combined top k:", sorted(-our_scores_combined.flatten())[:beam_size]) #raise Exception("beams mismatch") assert our_dec_search_frame_outputs["src_beam_idxs"][0][0] == blocks_search_frame[b'indexes'][0] beam_idx = our_dec_search_frame_outputs["src_beam_idxs"][0][0] if beam_idx != 0: print("Selecting different beam: %i." % beam_idx) # Just overwrite the needed states by Blocks outputs. accumulated_weights = blocks_frame_state_outputs["decoder_sequencegenerator_att_trans_attention__attention_take_glimpses_accumulated_weights"][0] weighted_avg = blocks_frame_state_outputs["decoder_sequencegenerator__sequencegenerator_generate_weighted_averages"][0] last_lstm_state = blocks_frame_state_outputs["decoder_sequencegenerator__sequencegenerator_generate_states"][0] last_lstm_cells = blocks_frame_state_outputs["decoder_sequencegenerator__sequencegenerator_generate_cells"][0] # From now on, use blocks_frame_state_outputs instead of blocks_frame_probs_outputs because # it will have the beam reordered. blocks_target_emb = blocks_frame_state_outputs["decoder_sequencegenerator_fork__fork_apply_feedback_decoder_input"] assert blocks_target_emb.shape == (beam_size, dec_lookup.shape[1]) target_embed = dec_lookup[ref_output] assert target_embed.shape == (dec_lookup.shape[1],) assert_almost_equal(blocks_target_emb[0], target_embed) feedback_to_decoder = numpy.dot(target_embed, blocks_params["decoder/sequencegenerator/att_trans/feedback_to_decoder/fork_inputs.W"]) context_to_decoder = numpy.dot(weighted_avg, blocks_params["decoder/sequencegenerator/att_trans/context_to_decoder/fork_inputs.W"]) lstm_z = feedback_to_decoder + context_to_decoder assert lstm_z.shape == feedback_to_decoder.shape == context_to_decoder.shape == (last_lstm_state.shape[-1] * 4,) blocks_feedback_to_decoder = blocks_frame_state_outputs["decoder_sequencegenerator_att_trans_feedback_to_decoder__feedback_to_decoder_apply_inputs"] blocks_context_to_decoder = blocks_frame_state_outputs["decoder_sequencegenerator_att_trans_context_to_decoder__context_to_decoder_apply_inputs"] assert blocks_feedback_to_decoder.shape == blocks_context_to_decoder.shape == (beam_size, last_lstm_state.shape[-1] * 4) assert_almost_equal(blocks_feedback_to_decoder[0], feedback_to_decoder, decimal=4) assert_almost_equal(blocks_context_to_decoder[0], context_to_decoder, decimal=4) lstm_state, lstm_cells = calc_raw_lstm( lstm_z, blocks_params=blocks_params, prefix="decoder/sequencegenerator/att_trans/lstm_decoder.", last_state=last_lstm_state, last_cell=last_lstm_cells) assert lstm_state.shape == last_lstm_state.shape == lstm_cells.shape == last_lstm_cells.shape blocks_lstm_state = blocks_frame_state_outputs["decoder_sequencegenerator_att_trans_lstm_decoder__lstm_decoder_apply_states"] blocks_lstm_cells = blocks_frame_state_outputs["decoder_sequencegenerator_att_trans_lstm_decoder__lstm_decoder_apply_cells"] assert blocks_lstm_state.shape == blocks_lstm_cells.shape == (beam_size, last_lstm_state.shape[-1]) assert_almost_equal(blocks_lstm_state[0], lstm_state, decimal=4) assert_almost_equal(blocks_lstm_cells[0], lstm_cells, decimal=4) our_lstm_cells = our_dec_frame_outputs["s.extra.state"][0] our_lstm_state = our_dec_frame_outputs["s.extra.state"][1] assert our_lstm_state.shape == our_lstm_cells.shape == (beam_size, lstm_state.shape[0]) assert_almost_equal(our_lstm_state[0], lstm_state, decimal=4) assert_almost_equal(our_lstm_cells[0], lstm_cells, decimal=4) our_s = our_dec_frame_outputs["s.output"] assert our_s.shape == (beam_size, lstm_state.shape[0]) assert_almost_equal(our_s[0], lstm_state, decimal=4) last_accumulated_weights = accumulated_weights last_lstm_state = lstm_state last_lstm_cells = lstm_cells last_output = ref_output if last_output == 0: print("Sequence finished, seq len %i." % dec_step) dec_seq_len = dec_step break assert dec_seq_len > 0 print("All outputs seem to match.") else: print("blocks_debug_dump_output not specified. It will not compare the model outputs." % blocks_debug_dump_output) if dry_run: print("Dry-run, not saving model.") else: rnn.engine.save_model(our_model_fn) print("Finished importing.")
def post(self, *args, **kwargs): # TODO: Write formal documentation """ Method for handling classification via HTTP Post request. The following must be defined in the URL paramaters: engine_hash (engine hash which points to which engine to use), and the data itself in the body. If using binary data, the following URL paramaters must also be supplied: data_format='binary', data_shape=(<dim1,dim2>). If using a specific data type, you can supply it as the url parameter data_type. :param args: :param kwargs: :return: Either JSON with error or JSON list of generated outputs. """ url_params = self.request.arguments output_dim = {} ret = {} data = {} data_format = '' data_type = '' engine_hash = '' data_shape = '' # First get meta data from URL parameters engine_hash = str(url_params['engine_hash']).replace("['", '').replace( "']", '') if 'data_format' in url_params: data_format = str(url_params['data_format']).replace("['", '').replace( "']", '') if 'data_type' in url_params: # Possible options: https://docs.scipy.org/doc/numpy-1.10.1/user/basics.types.html data_type = str(url_params['data_type']).replace("['", '').replace( "']", '') if 'data_shape' in url_params: data_shape = str(url_params['data_shape']).replace( "['", '').replace("']", '') # either '' or 'dim1,dim2' # Apply defaults, in case we didn't get them through the header. if data_format == '': data_format = 'json' if data_type == '': data_type = 'float32' print( 'Received engine hash: %s data formatted: %s, data type %s data shape: %s' % (engine_hash, data_format, data_type, data_shape), file=log.v5) # Load in engine and hash engine = _engines[engine_hash] network = engine.network devices = _devices[engine_hash] hash_engine = hashlib.new('ripemd160') hash_engine.update(str(self.request.body) + engine_hash) hash_temp = hash_engine.hexdigest() # Pre-process the data if data_format == 'json': data = json.loads(self.request.body) for k in data: try: data[k] = np.asarray(data[k], dtype=data_type) if k != 'data': output_dim[k] = network.n_out[ k] # = [network.n_in,2] if k == 'data' else network.n_out[k] except Exception: if k != 'data' and k not in network.n_out: ret['error'] = 'unknown target: %s' % k else: ret['error'] = 'unable to convert %s to an array from value %s' % ( k, str(data[k])) break if data_format == 'binary': float_array = array(self._get_type_code(data_type)) try: float_array.fromstring(self.request.body) except Exception as e: print('Binary data error: %s' % str(e.message), file=log.v4) ret['error'] = 'Error during binary data conversion: ' + e.message data['data'] = np.asarray(float_array.tolist(), dtype=data_type) data_shape_arr = data_shape.split(",") shape = (int(data_shape_arr[0]), int(data_shape_arr[1])) data['data'] = np.reshape(data['data'], shape) # Do dataset creation and classification. if 'error' not in ret: data = StaticDataset(data=[data], output_dim=output_dim) data.init_seq_order() batches = data.generate_batches(recurrent_net=network.recurrent, batch_size=sys.maxsize, max_seqs=1) if hash_temp not in _classify_cache: print('Starting classification', file=log.v3) # If we haven't yet processed this exact request and saved it in the cache _classify_cache[hash_temp] = yield self._classification_task( network=network, devices=devices, data=data, batches=batches) ret = { 'result': { k: _classify_cache[hash_temp].result[k].tolist() for k in _classify_cache[hash_temp].result } } # Update engine usage for performance optimization _engine_usage[engine_hash] = datetime.datetime.now() print("Finished processing classification with ID: ", hash_temp, file=log.v3) self.write(ret)
def main(): rnn.init( commandLineOptions=sys.argv[1:], config_updates={ "task": "nop", "log": None, "device": "cpu", "allow_random_model_init": True, "debug_add_check_numerics_on_output": False}, extra_greeting="Import Blocks MT model.") assert Util.BackendEngine.is_tensorflow_selected() config = rnn.config # Load Blocks MT model params. if not config.has("blocks_mt_model"): print("Please provide the option blocks_mt_model.") sys.exit(1) blocks_mt_model_fn = config.value("blocks_mt_model", "") assert blocks_mt_model_fn assert os.path.exists(blocks_mt_model_fn) if os.path.isdir(blocks_mt_model_fn): blocks_mt_model_fn += "/params.npz" assert os.path.exists(blocks_mt_model_fn) dry_run = config.bool("dry_run", False) if dry_run: our_model_fn = None print("Dry-run, will not save model.") else: our_model_fn = config.value('model', "returnn-model") + ".imported" print("Will save Returnn model as %s." % our_model_fn) assert os.path.exists(os.path.dirname(our_model_fn) or "."), "model-dir does not exist" assert not os.path.exists(our_model_fn + Util.get_model_filename_postfix()), "model-file already exists" blocks_mt_model = numpy.load(blocks_mt_model_fn) assert isinstance(blocks_mt_model, numpy.lib.npyio.NpzFile), "did not expect type %r in file %r" % ( type(blocks_mt_model), blocks_mt_model_fn) print("Params found in Blocks model:") blocks_params = {} # type: dict[str,numpy.ndarray] blocks_params_hierarchy = {} # type: dict[str,dict[str]] blocks_total_num_params = 0 for key in sorted(blocks_mt_model.keys()): value = blocks_mt_model[key] key = key.replace("-", "/") assert key[0] == "/" key = key[1:] blocks_params[key] = value print(" %s: %s, %s" % (key, value.shape, value.dtype)) blocks_total_num_params += numpy.prod(value.shape) d = blocks_params_hierarchy for part in key.split("/"): d = d.setdefault(part, {}) print("Blocks total num params: %i" % blocks_total_num_params) # Init our network structure. from TFNetworkRecLayer import _SubnetworkRecCell _SubnetworkRecCell._debug_out = [] # enable for debugging intermediate values below ChoiceLayer._debug_out = [] # also for debug outputs of search rnn.engine.use_search_flag = True # construct the net as in search rnn.engine.init_network_from_config() print("Our network model params:") our_params = {} # type: dict[str,tf.Variable] our_total_num_params = 0 for v in rnn.engine.network.get_params_list(): key = v.name[:-2] our_params[key] = v print(" %s: %s, %s" % (key, v.shape, v.dtype.base_dtype.name)) our_total_num_params += numpy.prod(v.shape.as_list()) print("Our total num params: %i" % our_total_num_params) # Now matching... blocks_used_params = set() # type: set[str] our_loaded_params = set() # type: set[str] def import_var(our_var, blocks_param): """ :param tf.Variable our_var: :param str|numpy.ndarray blocks_param: """ assert isinstance(our_var, tf.Variable) if isinstance(blocks_param, str): blocks_param = load_blocks_var(blocks_param) assert isinstance(blocks_param, numpy.ndarray) assert_equal(tuple(our_var.shape.as_list()), blocks_param.shape) our_loaded_params.add(our_var.name[:-2]) our_var.load(blocks_param, session=rnn.engine.tf_session) def load_blocks_var(blocks_param_name): """ :param str blocks_param_name: :rtype: numpy.ndarray """ assert isinstance(blocks_param_name, str) assert blocks_param_name in blocks_params blocks_used_params.add(blocks_param_name) return blocks_params[blocks_param_name] enc_name = "bidirectionalencoder" enc_embed_name = "EncoderLookUp0.W" assert enc_name in blocks_params_hierarchy assert enc_embed_name in blocks_params_hierarchy[enc_name] # input embedding num_encoder_layers = max([ int(re.match(".*([0-9]+)", s).group(1)) for s in blocks_params_hierarchy[enc_name] if s.startswith("EncoderBidirectionalLSTM")]) blocks_input_dim, blocks_input_embed_dim = blocks_params["%s/%s" % (enc_name, enc_embed_name)].shape print("Blocks input dim: %i, embed dim: %i" % (blocks_input_dim, blocks_input_embed_dim)) print("Blocks num encoder layers: %i" % num_encoder_layers) expected_enc_entries = ( ["EncoderLookUp0.W"] + ["EncoderBidirectionalLSTM%i" % i for i in range(1, num_encoder_layers + 1)]) assert_equal(set(expected_enc_entries), set(blocks_params_hierarchy[enc_name].keys())) our_input_layer = find_our_input_embed_layer() assert our_input_layer.input_data.dim == blocks_input_dim assert our_input_layer.output.dim == blocks_input_embed_dim assert not our_input_layer.with_bias import_var(our_input_layer.params["W"], "%s/%s" % (enc_name, enc_embed_name)) dec_name = "decoder/sequencegenerator" dec_hierarchy_base = get_in_hierarchy(dec_name, blocks_params_hierarchy) assert_equal(set(dec_hierarchy_base.keys()), {"att_trans", "readout"}) dec_embed_name = "readout/lookupfeedbackwmt15/lookuptable.W" get_in_hierarchy(dec_embed_name, dec_hierarchy_base) # check for i in range(num_encoder_layers): # Assume standard LSTMCell. # i = input_gate, j = new_input, f = forget_gate, o = output_gate # lstm_matrix = self._linear1([inputs, m_prev]) # i, j, f, o = array_ops.split(value=lstm_matrix, num_or_size_splits=4, axis=1) # bias (4*in), kernel (in+out,4*out), w_(f|i|o)_diag (out) # prefix: rec/rnn/lstm_cell # Blocks: gate-in, gate-forget, next-in, gate-out for direction in ("fwd", "bwd"): our_layer = get_network().layers["lstm%i_%s" % (i, direction[:2])] blocks_prefix = "bidirectionalencoder/EncoderBidirectionalLSTM%i" % (i + 1,) # (in,out*4), (out*4,) W_in, b = [load_blocks_var( "%s/%s_fork/fork_inputs.%s" % (blocks_prefix, {"bwd": "back", "fwd": "fwd"}[direction], p)) for p in ("W", "b")] W_re = load_blocks_var( "%s/bidirectionalseparateparameters/%s.W_state" % (blocks_prefix, {"fwd": "forward", "bwd": "backward"}[direction])) W = numpy.concatenate([W_in, W_re], axis=0) b = lstm_vec_blocks_to_tf(b) W = lstm_vec_blocks_to_tf(W) import_var(our_layer.params["rnn/lstm_cell/bias"], b) import_var(our_layer.params["rnn/lstm_cell/kernel"], W) import_var(our_layer.params["initial_c"], "%s/bidirectionalseparateparameters/%s.initial_cells" % (blocks_prefix, {"fwd": "forward", "bwd": "backward"}[direction])) import_var(our_layer.params["initial_h"], "%s/bidirectionalseparateparameters/%s.initial_state" % (blocks_prefix, {"fwd": "forward", "bwd": "backward"}[direction])) for s1, s2 in [("W_cell_to_in", "w_i_diag"), ("W_cell_to_forget", "w_f_diag"), ("W_cell_to_out", "w_o_diag")]: import_var(our_layer.params["rnn/lstm_cell/%s" % s2], "%s/bidirectionalseparateparameters/%s.%s" % (blocks_prefix, {"fwd": "forward", "bwd": "backward"}[direction], s1)) import_var(get_network().layers["enc_ctx"].params["W"], "decoder/sequencegenerator/att_trans/attention/encoder_state_transformer.W") import_var(get_network().layers["enc_ctx"].params["b"], "decoder/sequencegenerator/att_trans/attention/encoder_state_transformer.b") import_var(our_params["output/rec/s/initial_c"], "decoder/sequencegenerator/att_trans/lstm_decoder.initial_cells") import_var(our_params["output/rec/s/initial_h"], "decoder/sequencegenerator/att_trans/lstm_decoder.initial_state") import_var(our_params["output/rec/weight_feedback/W"], "decoder/sequencegenerator/att_trans/attention/sum_alignment_transformer.W") import_var(our_params["output/rec/target_embed/W"], "decoder/sequencegenerator/readout/lookupfeedbackwmt15/lookuptable.W") import_var(our_params["fertility/W"], "decoder/sequencegenerator/att_trans/attention/fertility_transformer.W") import_var(our_params["output/rec/energy/W"], "decoder/sequencegenerator/att_trans/attention/energy_comp/linear.W") prev_s_trans_W_states = load_blocks_var("decoder/sequencegenerator/att_trans/attention/state_trans/transform_states.W") prev_s_trans_W_cells = load_blocks_var("decoder/sequencegenerator/att_trans/attention/state_trans/transform_cells.W") prev_s_trans_W = numpy.concatenate([prev_s_trans_W_cells, prev_s_trans_W_states], axis=0) import_var(our_params["output/rec/prev_s_transformed/W"], prev_s_trans_W) import_var(our_params["output/rec/s/rec/lstm_cell/bias"], numpy.zeros(our_params["output/rec/s/rec/lstm_cell/bias"].shape)) dec_lstm_kernel_in_feedback = load_blocks_var("decoder/sequencegenerator/att_trans/feedback_to_decoder/fork_inputs.W") dec_lstm_kernel_in_ctx = load_blocks_var("decoder/sequencegenerator/att_trans/context_to_decoder/fork_inputs.W") dec_lstm_kernel_re = load_blocks_var("decoder/sequencegenerator/att_trans/lstm_decoder.W_state") dec_lstm_kernel = numpy.concatenate([dec_lstm_kernel_in_feedback, dec_lstm_kernel_in_ctx, dec_lstm_kernel_re], axis=0) dec_lstm_kernel = lstm_vec_blocks_to_tf(dec_lstm_kernel) import_var(our_params["output/rec/s/rec/lstm_cell/kernel"], dec_lstm_kernel) for s1, s2 in [("W_cell_to_in", "w_i_diag"), ("W_cell_to_forget", "w_f_diag"), ("W_cell_to_out", "w_o_diag")]: import_var(our_params["output/rec/s/rec/lstm_cell/%s" % s2], "decoder/sequencegenerator/att_trans/lstm_decoder.%s" % s1) readout_in_W_states = load_blocks_var("decoder/sequencegenerator/readout/merge/transform_states.W") readout_in_W_feedback = load_blocks_var("decoder/sequencegenerator/readout/merge/transform_feedback.W") readout_in_W_att = load_blocks_var("decoder/sequencegenerator/readout/merge/transform_weighted_averages.W") readout_in_W = numpy.concatenate([readout_in_W_states, readout_in_W_feedback, readout_in_W_att], axis=0) import_var(our_params["output/rec/readout_in/W"], readout_in_W) import_var(our_params["output/rec/readout_in/b"], "decoder/sequencegenerator/readout/initializablefeedforwardsequence/maxout_bias.b") import_var(our_params["output/rec/output_prob/W"], "decoder/sequencegenerator/readout/initializablefeedforwardsequence/softmax1.W") import_var(our_params["output/rec/output_prob/b"], "decoder/sequencegenerator/readout/initializablefeedforwardsequence/softmax1.b") print("Not initialized own params:") count = 0 for key, v in sorted(our_params.items()): if key in our_loaded_params: continue print(" %s: %s, %s" % (key, v.shape, v.dtype.base_dtype.name)) count += 1 if not count: print(" None.") print("Not used Blocks params:") count = 0 for key, value in sorted(blocks_params.items()): if key in blocks_used_params: continue print(" %s: %s, %s" % (key, value.shape, value.dtype)) count += 1 if not count: print(" None.") print("Done.") blocks_debug_dump_output = config.value("blocks_debug_dump_output", None) if blocks_debug_dump_output: print("Will read Blocks debug dump output from %r and compare with Returnn outputs." % blocks_debug_dump_output) blocks_initial_outputs = numpy.load("%s/initial_states_data.0.npz" % blocks_debug_dump_output) blocks_search_log = pickle.load(open("%s/search.log.pkl" % blocks_debug_dump_output, "rb"), encoding="bytes") blocks_search_log = {d[b"step"]: d for d in blocks_search_log} input_seq = blocks_initial_outputs["input"] beam_size, seq_len = input_seq.shape input_seq = input_seq[0] # all the same, select beam 0 assert isinstance(input_seq, numpy.ndarray) print("Debug input seq: %s" % input_seq.tolist()) from GeneratingDataset import StaticDataset dataset = StaticDataset( data=[{"data": input_seq}], output_dim={"data": get_network().extern_data.get_default_input_data().get_kwargs()}) dataset.init_seq_order(epoch=0) extract_output_dict = { "enc_src_emb": get_network().layers["source_embed"].output.get_placeholder_as_batch_major(), "encoder": get_network().layers["encoder"].output.get_placeholder_as_batch_major(), "enc_ctx": get_network().layers["enc_ctx"].output.get_placeholder_as_batch_major(), "output": get_network().layers["output"].output.get_placeholder_as_batch_major() } from TFNetworkLayer import concat_sources for i in range(num_encoder_layers): extract_output_dict["enc_layer_%i" % i] = concat_sources( [get_network().layers["lstm%i_fw" % i], get_network().layers["lstm%i_bw" % i]] ).get_placeholder_as_batch_major() extract_output_dict["enc_layer_0_fwd"] = get_network().layers["lstm0_fw"].output.get_placeholder_as_batch_major() our_output = rnn.engine.run_single( dataset=dataset, seq_idx=0, output_dict=extract_output_dict) blocks_out = blocks_initial_outputs["bidirectionalencoder_EncoderLookUp0__EncoderLookUp0_apply_output"] our_out = our_output["enc_src_emb"] print("our enc emb shape:", our_out.shape) print("Blocks enc emb shape:", blocks_out.shape) assert our_out.shape[:2] == (1, seq_len) assert blocks_out.shape[:2] == (seq_len, beam_size) assert our_out.shape[2] == blocks_out.shape[2] assert_almost_equal(our_out[0], blocks_out[:, 0], decimal=5) blocks_lstm0_out_ref = calc_lstm(blocks_out[:, 0], blocks_params) blocks_lstm0_out = blocks_initial_outputs["bidirectionalencoder_EncoderBidirectionalLSTM1_bidirectionalseparateparameters_forward__forward_apply_states"] our_lstm0_out = our_output["enc_layer_0_fwd"] assert blocks_lstm0_out.shape == (seq_len, beam_size) + blocks_lstm0_out_ref.shape assert our_lstm0_out.shape == (1, seq_len) + blocks_lstm0_out_ref.shape assert_almost_equal(blocks_lstm0_out[0, 0], blocks_lstm0_out_ref, decimal=6) print("Blocks LSTM0 frame 0 matched to ref calc.") assert_almost_equal(our_lstm0_out[0, 0], blocks_lstm0_out_ref, decimal=6) print("Our LSTM0 frame 0 matched to ref calc.") for i in range(num_encoder_layers): blocks_out = blocks_initial_outputs[ "bidirectionalencoder_EncoderBidirectionalLSTM%i_bidirectionalseparateparameters__bidirectionalseparateparameters_apply_output_0" % (i + 1,)] our_out = our_output["enc_layer_%i" % i] print("our enc layer %i shape:" % i, our_out.shape) print("Blocks enc layer %i shape:" % i, blocks_out.shape) assert our_out.shape[:2] == (1, seq_len) assert blocks_out.shape[:2] == (seq_len, beam_size) assert our_out.shape[2] == blocks_out.shape[2] assert_almost_equal(our_out[0], blocks_out[:, 0], decimal=6) print("our encoder shape:", our_output["encoder"].shape) blocks_encoder_out = blocks_initial_outputs["bidirectionalencoder__bidirectionalencoder_apply_representation"] print("Blocks encoder shape:", blocks_encoder_out.shape) assert our_output["encoder"].shape[:2] == (1, seq_len) assert blocks_encoder_out.shape[:2] == (seq_len, beam_size) assert our_output["encoder"].shape[2] == blocks_encoder_out.shape[2] assert_almost_equal(our_output["encoder"][0], blocks_encoder_out[:, 0], decimal=6) blocks_first_frame_outputs = numpy.load("%s/next_states.0.npz" % blocks_debug_dump_output) blocks_enc_ctx_out = blocks_first_frame_outputs["decoder_sequencegenerator_att_trans_attention__attention_preprocess_preprocessed_attended"] our_enc_ctx_out = our_output["enc_ctx"] print("Blocks enc ctx shape:", blocks_enc_ctx_out.shape) assert blocks_enc_ctx_out.shape[:2] == (seq_len, beam_size) assert our_enc_ctx_out.shape[:2] == (1, seq_len) assert blocks_enc_ctx_out.shape[2:] == our_enc_ctx_out.shape[2:] assert_almost_equal(blocks_enc_ctx_out[:, 0], our_enc_ctx_out[0], decimal=5) fertility = numpy.dot(blocks_encoder_out[:, 0], blocks_params["decoder/sequencegenerator/att_trans/attention/fertility_transformer.W"]) fertility = sigmoid(fertility) assert fertility.shape == (seq_len, 1) fertility = fertility[:, 0] assert fertility.shape == (seq_len,) our_dec_outputs = {v["step"]: v for v in _SubnetworkRecCell._debug_out} assert our_dec_outputs print("our dec frame keys:", sorted(our_dec_outputs[0].keys())) our_dec_search_outputs = {v["step"]: v for v in ChoiceLayer._debug_out} assert our_dec_search_outputs print("our dec search frame keys:", sorted(our_dec_search_outputs[0].keys())) print("Blocks search frame keys:", sorted(blocks_search_log[0].keys())) dec_lookup = blocks_params["decoder/sequencegenerator/readout/lookupfeedbackwmt15/lookuptable.W"] last_lstm_state = blocks_params["decoder/sequencegenerator/att_trans/lstm_decoder.initial_state"] last_lstm_cells = blocks_params["decoder/sequencegenerator/att_trans/lstm_decoder.initial_cells"] last_accumulated_weights = numpy.zeros((seq_len,), dtype="float32") last_output = 0 dec_seq_len = 0 for dec_step in range(100): blocks_frame_state_outputs_fn = "%s/next_states.%i.npz" % (blocks_debug_dump_output, dec_step) blocks_frame_probs_outputs_fn = "%s/logprobs.%i.npz" % (blocks_debug_dump_output, dec_step) if dec_step > 3: if not os.path.exists(blocks_frame_state_outputs_fn) or not os.path.exists(blocks_frame_probs_outputs_fn): print("Seq not ended yet but frame not found for step %i." % dec_step) break blocks_frame_state_outputs = numpy.load(blocks_frame_state_outputs_fn) blocks_frame_probs_outputs = numpy.load(blocks_frame_probs_outputs_fn) blocks_search_frame = blocks_search_log[dec_step] our_dec_frame_outputs = our_dec_outputs[dec_step] assert our_dec_frame_outputs["step"] == dec_step assert our_dec_frame_outputs[":i.output"].tolist() == [dec_step] our_dec_search_frame_outputs = our_dec_search_outputs[dec_step] blocks_last_lstm_state = blocks_frame_probs_outputs["decoder_sequencegenerator__sequencegenerator_generate_states"] blocks_last_lstm_cells = blocks_frame_probs_outputs["decoder_sequencegenerator__sequencegenerator_generate_cells"] assert blocks_last_lstm_state.shape == (beam_size, last_lstm_state.shape[0]) assert_almost_equal(blocks_last_lstm_state[0], last_lstm_state, decimal=5) assert_almost_equal(blocks_last_lstm_cells[0], last_lstm_cells, decimal=5) our_last_lstm_cells = our_dec_frame_outputs["prev:s.extra.state"][0] our_last_lstm_state = our_dec_frame_outputs["prev:s.extra.state"][1] assert our_last_lstm_state.shape == our_last_lstm_cells.shape == (beam_size, last_lstm_state.shape[0]) assert_almost_equal(our_last_lstm_state[0], last_lstm_state, decimal=5) assert_almost_equal(our_last_lstm_cells[0], last_lstm_cells, decimal=5) our_last_s = our_dec_frame_outputs["prev:s.output"] assert our_last_s.shape == (beam_size, last_lstm_state.shape[0]) assert_almost_equal(our_last_s[0], last_lstm_state, decimal=5) blocks_last_accum_weights = blocks_frame_probs_outputs["decoder_sequencegenerator__sequencegenerator_generate_accumulated_weights"] assert blocks_last_accum_weights.shape == (beam_size, seq_len) assert_almost_equal(blocks_last_accum_weights[0], last_accumulated_weights, decimal=5) our_last_accum_weights = our_dec_frame_outputs["prev:accum_att_weights.output"] assert our_last_accum_weights.shape == (beam_size, seq_len if dec_step > 0 else 1, 1) if dec_step > 0: assert_almost_equal(our_last_accum_weights[0, :, 0], last_accumulated_weights, decimal=4) else: assert_almost_equal(our_last_accum_weights[0, 0, 0], last_accumulated_weights.sum(), decimal=4) energy_sum = numpy.copy(blocks_enc_ctx_out[:, 0]) # (T,enc-ctx-dim) weight_feedback = numpy.dot(last_accumulated_weights[:, None], blocks_params["decoder/sequencegenerator/att_trans/attention/sum_alignment_transformer.W"]) energy_sum += weight_feedback transformed_states = numpy.dot(last_lstm_state[None, :], blocks_params["decoder/sequencegenerator/att_trans/attention/state_trans/transform_states.W"]) transformed_cells = numpy.dot(last_lstm_cells[None, :], blocks_params["decoder/sequencegenerator/att_trans/attention/state_trans/transform_cells.W"]) energy_sum += transformed_states + transformed_cells assert energy_sum.shape == (seq_len, blocks_enc_ctx_out.shape[-1]) blocks_energy_sum_tanh = blocks_frame_probs_outputs["decoder_sequencegenerator_att_trans_attention_energy_comp_tanh__tanh_apply_output"] assert blocks_energy_sum_tanh.shape == (seq_len, beam_size, energy_sum.shape[-1]) assert_almost_equal(blocks_energy_sum_tanh[:, 0], numpy.tanh(energy_sum), decimal=5) assert_equal(our_dec_frame_outputs["weight_feedback.output"].shape, (beam_size, seq_len if dec_step > 0 else 1, blocks_enc_ctx_out.shape[-1])) assert_equal(our_dec_frame_outputs["prev_s_transformed.output"].shape, (beam_size, blocks_enc_ctx_out.shape[-1])) our_energy_sum = our_dec_frame_outputs["energy_in.output"] assert our_energy_sum.shape == (beam_size, seq_len, blocks_enc_ctx_out.shape[-1]) assert_almost_equal(our_energy_sum[0], energy_sum, decimal=4) blocks_energy = blocks_frame_probs_outputs["decoder_sequencegenerator_att_trans_attention_energy_comp__energy_comp_apply_output"] assert blocks_energy.shape == (seq_len, beam_size, 1) energy = numpy.dot(numpy.tanh(energy_sum), blocks_params["decoder/sequencegenerator/att_trans/attention/energy_comp/linear.W"]) assert energy.shape == (seq_len, 1) assert_almost_equal(blocks_energy[:, 0], energy, decimal=4) our_energy = our_dec_frame_outputs["energy.output"] assert our_energy.shape == (beam_size, seq_len, 1) assert_almost_equal(our_energy[0], energy, decimal=4) weights = softmax(energy[:, 0]) assert weights.shape == (seq_len,) our_weights = our_dec_frame_outputs["att_weights.output"] assert our_weights.shape == (beam_size, seq_len, 1) assert_almost_equal(our_weights[0, :, 0], weights, decimal=4) accumulated_weights = last_accumulated_weights + weights / (2.0 * fertility) assert accumulated_weights.shape == (seq_len,) #blocks_accumulated_weights = blocks_frame_probs_outputs["decoder_sequencegenerator_att_trans_attention__attention_take_glimpses_accumulated_weights"] #assert blocks_accumulated_weights.shape == (beam_size, seq_len) #assert_almost_equal(blocks_accumulated_weights[0], accumulated_weights, decimal=5) blocks_weights = blocks_frame_probs_outputs["decoder_sequencegenerator_att_trans_attention__attention_compute_weights_output_0"] assert blocks_weights.shape == (seq_len, beam_size) assert_almost_equal(weights, blocks_weights[:, 0], decimal=4) our_accum_weights = our_dec_frame_outputs["accum_att_weights.output"] assert our_accum_weights.shape == (beam_size, seq_len, 1) weighted_avg = (weights[:, None] * blocks_encoder_out[:, 0]).sum(axis=0) # att in our assert weighted_avg.shape == (blocks_encoder_out.shape[-1],) blocks_weighted_avg = blocks_frame_probs_outputs["decoder_sequencegenerator_att_trans_attention__attention_compute_weighted_averages_output_0"] assert blocks_weighted_avg.shape == (beam_size, blocks_encoder_out.shape[-1]) assert_almost_equal(blocks_weighted_avg[0], weighted_avg, decimal=4) our_att = our_dec_frame_outputs["att.output"] assert our_att.shape == (beam_size, blocks_encoder_out.shape[-1]) assert_almost_equal(our_att[0], weighted_avg, decimal=4) blocks_last_output = blocks_frame_probs_outputs["decoder_sequencegenerator__sequencegenerator_generate_outputs"] assert blocks_last_output.shape == (beam_size,) assert max(blocks_last_output[0], 0) == last_output last_target_embed = dec_lookup[last_output] if dec_step == 0: last_target_embed = numpy.zeros_like(last_target_embed) our_last_target_embed = our_dec_frame_outputs["prev:target_embed.output"] assert our_last_target_embed.shape == (beam_size, dec_lookup.shape[-1]) assert_almost_equal(our_last_target_embed[0], last_target_embed, decimal=4) readout_in_state = numpy.dot(last_lstm_state, blocks_params["decoder/sequencegenerator/readout/merge/transform_states.W"]) blocks_trans_state = blocks_frame_probs_outputs["decoder_sequencegenerator_readout_merge__merge_apply_states"] assert blocks_trans_state.shape == (beam_size, last_lstm_state.shape[0]) assert_almost_equal(blocks_trans_state[0], readout_in_state, decimal=4) readout_in_feedback = numpy.dot(last_target_embed, blocks_params["decoder/sequencegenerator/readout/merge/transform_feedback.W"]) blocks_trans_feedback = blocks_frame_probs_outputs["decoder_sequencegenerator_readout_merge__merge_apply_feedback"] assert blocks_trans_feedback.shape == (beam_size, readout_in_feedback.shape[0]) assert_almost_equal(blocks_trans_feedback[0], readout_in_feedback, decimal=4) readout_in_weighted_avg = numpy.dot(weighted_avg, blocks_params["decoder/sequencegenerator/readout/merge/transform_weighted_averages.W"]) blocks_trans_weighted_avg = blocks_frame_probs_outputs["decoder_sequencegenerator_readout_merge__merge_apply_weighted_averages"] assert blocks_trans_weighted_avg.shape == (beam_size, readout_in_weighted_avg.shape[0]) assert_almost_equal(blocks_trans_weighted_avg[0], readout_in_weighted_avg, decimal=4) readout_in = readout_in_state + readout_in_feedback + readout_in_weighted_avg blocks_readout_in = blocks_frame_probs_outputs["decoder_sequencegenerator_readout_merge__merge_apply_output"] assert blocks_readout_in.shape == (beam_size, readout_in.shape[0]) assert_almost_equal(blocks_readout_in[0], readout_in, decimal=4) readout_in += blocks_params["decoder/sequencegenerator/readout/initializablefeedforwardsequence/maxout_bias.b"] assert readout_in.shape == (blocks_params["decoder/sequencegenerator/readout/initializablefeedforwardsequence/maxout_bias.b"].shape[0],) our_readout_in = our_dec_frame_outputs["readout_in.output"] assert our_readout_in.shape == (beam_size, readout_in.shape[0]) assert_almost_equal(our_readout_in[0], readout_in, decimal=4) readout = readout_in.reshape((readout_in.shape[0] // 2, 2)).max(axis=1) our_readout = our_dec_frame_outputs["readout.output"] assert our_readout.shape == (beam_size, readout.shape[0]) assert_almost_equal(our_readout[0], readout, decimal=4) prob_logits = numpy.dot(readout, blocks_params["decoder/sequencegenerator/readout/initializablefeedforwardsequence/softmax1.W"]) + \ blocks_params["decoder/sequencegenerator/readout/initializablefeedforwardsequence/softmax1.b"] assert prob_logits.ndim == 1 blocks_prob_logits = blocks_frame_probs_outputs["decoder_sequencegenerator_readout__readout_readout_output_0"] assert blocks_prob_logits.shape == (beam_size, prob_logits.shape[0]) assert_almost_equal(blocks_prob_logits[0], prob_logits, decimal=4) output_prob = softmax(prob_logits) log_output_prob = log_softmax(prob_logits) assert_almost_equal(numpy.log(output_prob), log_output_prob, decimal=4) our_output_prob = our_dec_frame_outputs["output_prob.output"] assert our_output_prob.shape == (beam_size, output_prob.shape[0]) assert_almost_equal(our_output_prob[0], output_prob, decimal=4) blocks_nlog_prob = blocks_frame_probs_outputs["logprobs"] assert blocks_nlog_prob.shape == (beam_size, output_prob.shape[0]) assert_almost_equal(blocks_nlog_prob[0], -log_output_prob, decimal=4) assert_almost_equal(our_dec_search_frame_outputs["scores_in_orig"][0], output_prob, decimal=4) assert_almost_equal(blocks_search_frame[b'logprobs'][0], -log_output_prob, decimal=4) #for b in range(beam_size): # assert_almost_equal(-numpy.log(our_output_prob[b]), blocks_frame_probs_outputs["logprobs"][b], decimal=4) ref_output = numpy.argmax(output_prob) # Note: Don't take the readout.emit outputs. They are randomly sampled. blocks_dec_output = blocks_search_frame[b'outputs'] assert blocks_dec_output.shape == (beam_size,) our_dec_output = our_dec_frame_outputs["output.output"] assert our_dec_output.shape == (beam_size,) print("Frame %i: Ref best greedy output symbol: %i" % (dec_step, int(ref_output))) print("Blocks labels:", blocks_dec_output.tolist()) print("Our labels:", our_dec_output.tolist()) # Well, the following two could be not true if all the other beams have much better scores, # but this is unlikely. assert ref_output in blocks_dec_output assert ref_output in our_dec_output if dec_step == 0: # This assumes that the results are ordered by score which might not be true (see tf.nn.top_k). assert blocks_dec_output[0] == our_dec_output[0] == ref_output # We assume that the best is the same. Note that this also might not be true if there are two equally best scores. # It also assumes that it's ordered by the score which also might not be true (see tf.nn.top_k). # For the same reason, the remaining list and entries might also not perfectly match. assert our_dec_output[0] == blocks_dec_output[0] # Just follow the first beam. ref_output = blocks_dec_output[0] assert our_dec_search_frame_outputs["src_beam_idxs"].shape == (1, beam_size) assert our_dec_search_frame_outputs["scores"].shape == (1, beam_size) print("Blocks src_beam_idxs:", blocks_search_frame[b'indexes'].tolist()) print("Our src_beam_idxs:", our_dec_search_frame_outputs["src_beam_idxs"][0].tolist()) print("Blocks scores:", blocks_search_frame[b'chosen_costs'].tolist()) print("Our scores:", our_dec_search_frame_outputs["scores"][0].tolist()) if list(our_dec_search_frame_outputs["src_beam_idxs"][0]) != list(blocks_search_frame[b'indexes']): print("Warning, beams do not match.") print("Blocks scores base:", blocks_search_frame[b'scores_base'].flatten().tolist()) print("Our scores base:", our_dec_search_frame_outputs["scores_base"].flatten().tolist()) #print("Blocks score in orig top k:", sorted(blocks_search_frame[b'logprobs'].flatten())[:beam_size]) #print("Our score in orig top k:", sorted(-numpy.log(our_dec_search_frame_outputs["scores_in_orig"].flatten()))[:beam_size]) print("Blocks score in top k:", sorted((blocks_search_frame[b'logprobs'] * blocks_search_log[dec_step - 1][b'mask'][:, None]).flatten())[:beam_size]) print("Our score in top k:", sorted(-our_dec_search_frame_outputs["scores_in"].flatten())[:beam_size]) blocks_scores_combined = blocks_search_frame[b'next_costs'] our_scores_combined = our_dec_search_frame_outputs["scores_combined"] print("Blocks scores combined top k:", sorted(blocks_scores_combined.flatten())[:beam_size]) print("Our neg scores combined top k:", sorted(-our_scores_combined.flatten())[:beam_size]) #raise Exception("beams mismatch") assert our_dec_search_frame_outputs["src_beam_idxs"][0][0] == blocks_search_frame[b'indexes'][0] beam_idx = our_dec_search_frame_outputs["src_beam_idxs"][0][0] if beam_idx != 0: print("Selecting different beam: %i." % beam_idx) # Just overwrite the needed states by Blocks outputs. accumulated_weights = blocks_frame_state_outputs["decoder_sequencegenerator_att_trans_attention__attention_take_glimpses_accumulated_weights"][0] weighted_avg = blocks_frame_state_outputs["decoder_sequencegenerator__sequencegenerator_generate_weighted_averages"][0] last_lstm_state = blocks_frame_state_outputs["decoder_sequencegenerator__sequencegenerator_generate_states"][0] last_lstm_cells = blocks_frame_state_outputs["decoder_sequencegenerator__sequencegenerator_generate_cells"][0] # From now on, use blocks_frame_state_outputs instead of blocks_frame_probs_outputs because # it will have the beam reordered. blocks_target_emb = blocks_frame_state_outputs["decoder_sequencegenerator_fork__fork_apply_feedback_decoder_input"] assert blocks_target_emb.shape == (beam_size, dec_lookup.shape[1]) target_embed = dec_lookup[ref_output] assert target_embed.shape == (dec_lookup.shape[1],) assert_almost_equal(blocks_target_emb[0], target_embed) feedback_to_decoder = numpy.dot(target_embed, blocks_params["decoder/sequencegenerator/att_trans/feedback_to_decoder/fork_inputs.W"]) context_to_decoder = numpy.dot(weighted_avg, blocks_params["decoder/sequencegenerator/att_trans/context_to_decoder/fork_inputs.W"]) lstm_z = feedback_to_decoder + context_to_decoder assert lstm_z.shape == feedback_to_decoder.shape == context_to_decoder.shape == (last_lstm_state.shape[-1] * 4,) blocks_feedback_to_decoder = blocks_frame_state_outputs["decoder_sequencegenerator_att_trans_feedback_to_decoder__feedback_to_decoder_apply_inputs"] blocks_context_to_decoder = blocks_frame_state_outputs["decoder_sequencegenerator_att_trans_context_to_decoder__context_to_decoder_apply_inputs"] assert blocks_feedback_to_decoder.shape == blocks_context_to_decoder.shape == (beam_size, last_lstm_state.shape[-1] * 4) assert_almost_equal(blocks_feedback_to_decoder[0], feedback_to_decoder, decimal=4) assert_almost_equal(blocks_context_to_decoder[0], context_to_decoder, decimal=4) lstm_state, lstm_cells = calc_raw_lstm( lstm_z, blocks_params=blocks_params, prefix="decoder/sequencegenerator/att_trans/lstm_decoder.", last_state=last_lstm_state, last_cell=last_lstm_cells) assert lstm_state.shape == last_lstm_state.shape == lstm_cells.shape == last_lstm_cells.shape blocks_lstm_state = blocks_frame_state_outputs["decoder_sequencegenerator_att_trans_lstm_decoder__lstm_decoder_apply_states"] blocks_lstm_cells = blocks_frame_state_outputs["decoder_sequencegenerator_att_trans_lstm_decoder__lstm_decoder_apply_cells"] assert blocks_lstm_state.shape == blocks_lstm_cells.shape == (beam_size, last_lstm_state.shape[-1]) assert_almost_equal(blocks_lstm_state[0], lstm_state, decimal=4) assert_almost_equal(blocks_lstm_cells[0], lstm_cells, decimal=4) our_lstm_cells = our_dec_frame_outputs["s.extra.state"][0] our_lstm_state = our_dec_frame_outputs["s.extra.state"][1] assert our_lstm_state.shape == our_lstm_cells.shape == (beam_size, lstm_state.shape[0]) assert_almost_equal(our_lstm_state[0], lstm_state, decimal=4) assert_almost_equal(our_lstm_cells[0], lstm_cells, decimal=4) our_s = our_dec_frame_outputs["s.output"] assert our_s.shape == (beam_size, lstm_state.shape[0]) assert_almost_equal(our_s[0], lstm_state, decimal=4) last_accumulated_weights = accumulated_weights last_lstm_state = lstm_state last_lstm_cells = lstm_cells last_output = ref_output if last_output == 0: print("Sequence finished, seq len %i." % dec_step) dec_seq_len = dec_step break assert dec_seq_len > 0 print("All outputs seem to match.") else: print("blocks_debug_dump_output not specified. It will not compare the model outputs." % blocks_debug_dump_output) if dry_run: print("Dry-run, not saving model.") else: rnn.engine.save_model(our_model_fn) print("Finished importing.")