Exemplo n.º 1
0
Arquivo: main.py Projeto: orrenkt/ntm
def main(_):
    pp.pprint(flags.FLAGS.__flags)

    with tf.device('/cpu:0'), tf.Session() as sess:
        if FLAGS.task == 'copy':
            if FLAGS.is_train:
                cell, ntm = copy_train(FLAGS, sess)
            else:
                cell = NTMCell(input_dim=FLAGS.input_dim,
                               output_dim=FLAGS.output_dim,
                               controller_layer_size=FLAGS.controller_layer_size,
                               write_head_size=FLAGS.write_head_size,
                               read_head_size=FLAGS.read_head_size)
                ntm = NTM(cell, sess, 1, FLAGS.max_length,
                          test_max_length=FLAGS.test_max_length, forward_only=True)

            ntm.load(FLAGS.checkpoint_dir, 'copy')

            copy(ntm, int(FLAGS.test_max_length*1/3), sess)
            print
            copy(ntm, int(FLAGS.test_max_length*2/3), sess)
            print
            copy(ntm, int(FLAGS.test_max_length*3/3), sess)
        elif FLAGS.task == 'recall':
            pass
Exemplo n.º 2
0
def compareFixed():
    t = Tasks()
    x_test, y_test = t.sequence_type_1(100)

    add_params, mul_params = torch.load('program_memory/add.pt'), torch.load(
        'program_memory/mul.pt')
    hnm = HNM(10, 20, add_params, mul_params)
    hnm.load_state_dict(torch.load("learned_params/hnm_arch_2.pt"))

    ntm = NTM(10, 20)
    ntm.load_state_dict(torch.load("learned_params/ntm.pt"))

    lstm = LSTM(14, 256, 325, 1)
    lstm.load_state_dict(torch.load("learned_params/lstm.pt"))

    hnm_diff, lstm_diff, ntm_diff = 0, 0, 0

    for i in range(len(x_test)):
        hnm_out = hnm.recurrent_forward(x_test[i:i + 1])
        ntm_out = ntm.recurrent_forward(x_test[i:i + 1])
        lstm_out = lstm.recurrent_forward(x_test[i:i + 1])

        answer = np.argmax(y_test[i:i + 1].detach().numpy())
        hnm_diff += abs(answer - np.argmax(hnm_out.detach().numpy()))
        ntm_diff += abs(answer - np.argmax(ntm_out.detach().numpy()))
        lstm_diff += abs(answer - np.argmax(lstm_out.detach().numpy()))

    print(hnm_diff / len(y_test), ntm_diff / len(y_test),
          lstm_diff / len(y_test))
Exemplo n.º 3
0
def create_ntm(config, sess, **ntm_args):
    if config.rand_hyper:
        hyper_params = {}
        if config.is_test:
            hyper_params = load_hyperparamters(config)
        else:
            hyper_params = generate_hyperparams(config)
        print(" [*] Hyperparameters: {}".format(hyper_params))
        cell = NTMCell(input_dim=config.input_dim,
                       output_dim=config.output_dim,
                       controller_layer_size=hyper_params["c_layer"],
                       controller_dim=hyper_params["c_dim"],
                       mem_size=hyper_params["mem_size"],
                       write_head_size=config.write_head_size,
                       read_head_size=config.read_head_size,
                       is_LSTM_mode=config.is_LSTM_mode)
        scope = ntm_args.pop('scope', 'NTM-%s' % config.task)

        # Description + query + plan + answer
        min_length = (config.min_size -
                      1) + 1 + config.plan_length + (config.min_size - 1)
        max_length = int(((config.max_size * (config.max_size - 1) / 2) + 1 +
                          config.plan_length + (config.max_size - 1)))
        ntm = NTM(cell,
                  sess,
                  min_length,
                  max_length,
                  config.min_size,
                  config.max_size,
                  scope=scope,
                  **ntm_args,
                  lr=hyper_params["lr"],
                  momentum=hyper_params["momentum"],
                  decay=hyper_params["decay"],
                  beta=hyper_params["l2"])

    else:
        cell = NTMCell(input_dim=config.input_dim,
                       output_dim=config.output_dim,
                       controller_layer_size=config.controller_layer_size,
                       controller_dim=config.controller_dim,
                       write_head_size=config.write_head_size,
                       read_head_size=config.read_head_size,
                       is_LSTM_mode=config.is_LSTM_mode)
        scope = ntm_args.pop('scope', 'NTM-%s' % config.task)

        # Description + query + plan + answer
        min_length = (config.min_size -
                      1) + 1 + config.plan_length + (config.min_size - 1)
        max_length = int(((config.max_size * (config.max_size - 1) / 2) + 1 +
                          config.plan_length + (config.max_size - 1)))
        ntm = NTM(cell,
                  sess,
                  min_length,
                  max_length,
                  config.min_size,
                  config.max_size,
                  scope=scope,
                  **ntm_args)
    return cell, ntm
Exemplo n.º 4
0
    def __init__(self,
                 d_vocab,
                 d_emb,
                 d_dec,
                 max_len,
                 bos_idx,
                 num_heads=8,
                 N=64,
                 M=32,
                 seg_size=20):

        super().__init__()
        self.d_vocab = d_vocab
        self.seg_size = seg_size
        self.embs = nn.Embedding(d_vocab, d_emb)
        self.rnn = nn.GRU(d_emb, d_dec, batch_first=True)
        self.ntm_scale = nn.Parameter(torch.zeros([1, d_dec]),
                                      requires_grad=True)
        self.ntm = NTM('mem-aug',
                       embedding_size=d_dec,
                       hidden_size=d_dec,
                       memory_size=M,
                       head_num=num_heads,
                       memory_feature_size=N,
                       output_size=d_dec)
        self.init = nn.Parameter(torch.zeros(1, d_dec), requires_grad=True)
        self.bos_idx = nn.Parameter(torch.tensor([bos_idx]),
                                    requires_grad=False)
        self.out_layer = nn.Linear(d_dec, d_vocab)
        self.max_len = max_len
Exemplo n.º 5
0
def trainNTM():
    t = Tasks()
    x_train, y_train = t.sequence_type_1(2000)

    ntm = NTM(10, 20)

    ntm.train(x_train, y_train, 1, maxEpoch=25, learning_rate=0.0006)
Exemplo n.º 6
0
def predict_train(config, sess):
    """Train an NTM for the copy task given a TensorFlow session, which is a
    connection to the C++ backend"""

    if not os.path.isdir(config.checkpoint_dir):
        raise Exception(" [!] Directory %s not found" % config.checkpoint_dir)

    # delimiter flag-like vector inputs indicating the start and end
    # you can see these in the figure examples in the README
    # this is kind of defined redundantly
    start_symbol = np.zeros([config.input_dim], dtype=np.float32)
    start_symbol[0] = 1
    end_symbol = np.zeros([config.input_dim], dtype=np.float32)
    end_symbol[1] = 1

    # initialise the neural turing machine and the neural-net controller thing
    cell = NTMCell(input_dim=config.input_dim,
                   output_dim=config.output_dim,
                   controller_layer_size=config.controller_layer_size,
                   write_head_size=config.write_head_size,
                   read_head_size=config.read_head_size)
    ntm = NTM(cell, sess, config.min_length, config.max_length*3)

    print(" [*] Initialize all variables")
    tf.initialize_all_variables().run()
    print(" [*] Initialization finished")

    start_time = time.time()
    for idx in xrange(config.epoch):
        # generate a sequence of random length
        seq_length = randint(config.min_length, config.max_length) * 4
        inc_seq, comp_seq = generate_predict_sequence(seq_length, config.input_dim - 2)

        # this somehow associates the desired inputs and outputs with the NTM
        feed_dict = {input_:vec for vec, input_ in zip(inc_seq, ntm.inputs)}
        feed_dict.update(
            {true_output:vec for vec, true_output in zip(comp_seq, ntm.true_outputs)}
        )
        feed_dict.update({
            ntm.start_symbol: start_symbol,
            ntm.end_symbol: end_symbol
        })

        # this runs the session and returns the current training loss and step
        # I'm kind of surprised it returns the step, but whatevs
        _, cost, step = sess.run([ntm.optims[seq_length],
                                  ntm.get_loss(seq_length),
                                  ntm.global_step], feed_dict=feed_dict)

        # how does one use these checkpoints?
        if idx % 100 == 0:
            ntm.save(config.checkpoint_dir, 'copy', step)

        if idx % print_interval == 0:
            print("[%5d] %2d: %.2f (%.1fs)" \
                % (idx, seq_length, cost, time.time() - start_time))

    print("Training Copy task finished")
    return cell, ntm
Exemplo n.º 7
0
def trainNTM():
	ntm = NTM(10, 14)

	X, y = [], []
	for i in range(10):
		tempX, tempY = getData("data/observations_"+str(i*500)+".npy", "data/actions_"+str(i*500)+".npy")
		X.extend(tempX)
		y.extend(tempY)

	print(len(X), len(y))

	ntm.train(X, y, 1)
Exemplo n.º 8
0
def copy_train(config):
    sess = config.sess

    if not os.path.isdir(config.checkpoint_dir):
        raise Exception(" [!] Directory %s not found" % config.checkpoint_dir)

    # delimiter flag for start and end
    start_symbol = np.zeros([config.input_dim], dtype=np.float32)
    start_symbol[0] = 1
    end_symbol = np.zeros([config.input_dim], dtype=np.float32)
    end_symbol[1] = 1

    cell = NTMCell(input_dim=config.input_dim,
                   output_dim=config.output_dim,
                   controller_layer_size=config.controller_layer_size,
                   write_head_size=config.write_head_size,
                   read_head_size=config.read_head_size)
    ntm = NTM(cell, sess, config.min_length, config.max_length)

    print(" [*] Initialize all variables")
    tf.initialize_all_variables().run()
    print(" [*] Initialization finished")

    start_time = time.time()
    for idx in xrange(config.epoch):
        seq_length = randint(config.min_length, config.max_length)
        seq = generate_copy_sequence(seq_length, config.input_dim - 2)

        feed_dict = {input_: vec for vec, input_ in zip(seq, ntm.inputs)}
        feed_dict.update({
            true_output: vec
            for vec, true_output in zip(seq, ntm.true_outputs)
        })
        feed_dict.update({
            ntm.start_symbol: start_symbol,
            ntm.end_symbol: end_symbol
        })

        _, cost, step = sess.run([
            ntm.optims[seq_length],
            ntm.get_loss(seq_length), ntm.global_step
        ],
                                 feed_dict=feed_dict)

        if idx % 100 == 0:
            ntm.save(config.checkpoint_dir, 'copy', step)

        if idx % print_interval == 0:
            print("[%5d] %2d: %.2f (%.1fs)" \
                % (idx, seq_length, cost, time.time() - start_time))

    print("Training Copy task finished")
    return cell, ntm
Exemplo n.º 9
0
def copy_train(config):
    sess = config.sess

    if not os.path.isdir(config.checkpoint_dir):
        raise Exception(" [!] Directory %s not found" % config.checkpoint_dir)

    # delimiter flag for start and end
    start_symbol = np.zeros([config.input_dim], dtype=np.float32)
    start_symbol[0] = 1
    end_symbol = np.zeros([config.input_dim], dtype=np.float32)
    end_symbol[1] = 1

    cell = NTMCell(input_dim=config.input_dim,
                   output_dim=config.output_dim,
                   controller_layer_size=config.controller_layer_size,
                   write_head_size=config.write_head_size,
                   read_head_size=config.read_head_size)
    ntm = NTM(cell, sess, config.min_length, config.max_length)

    print(" [*] Initialize all variables")
    tf.initialize_all_variables().run()
    print(" [*] Initialization finished")

    start_time = time.time()
    for idx in xrange(config.epoch):
        seq_length = randint(config.min_length, config.max_length)
        seq = generate_copy_sequence(seq_length, config.input_dim - 2)

        feed_dict = {input_:vec for vec, input_ in zip(seq, ntm.inputs)}
        feed_dict.update(
            {true_output:vec for vec, true_output in zip(seq, ntm.true_outputs)}
        )
        feed_dict.update({
            ntm.start_symbol: start_symbol,
            ntm.end_symbol: end_symbol
        })

        _, cost, step = sess.run([ntm.optims[seq_length],
                                  ntm.get_loss(seq_length),
                                  ntm.global_step], feed_dict=feed_dict)

        if idx % 100 == 0:
            ntm.save(config.checkpoint_dir, 'copy', step)

        if idx % print_interval == 0:
            print("[%5d] %2d: %.2f (%.1fs)" \
                % (idx, seq_length, cost, time.time() - start_time))

    print("Training Copy task finished")
    return cell, ntm
Exemplo n.º 10
0
    def __init__(self,
                 num_inputs,
                 num_outputs,
                 controller_size,
                 controller_layers,
                 num_heads,
                 N,
                 M,
                 controller_type='lstm'):
        """Initialize an EncapsulatedNTM.

        :param num_inputs: External number of inputs.
        :param num_outputs: External number of outputs.
        :param controller_size: The size of the internal representation.
        :param controller_layers: Controller number of layers.
        :param num_heads: Number of heads.
        :param N: Number of rows in the memory bank.
        :param M: Number of cols/features in the memory bank.
        """
        super(EncapsulatedNTM, self).__init__()

        # Save args
        self.num_inputs = num_inputs
        self.num_outputs = num_outputs
        self.controller_size = controller_size
        self.controller_layers = controller_layers
        self.num_heads = num_heads
        self.N = N
        self.M = M

        # Create the NTM components
        memory = NTMMemory(N, M)
        if controller_type == 'lstm':
            controller = LSTMController(num_inputs + M * num_heads,
                                        controller_size, controller_layers)
        else:
            controller = MLPController(num_inputs + M * num_heads,
                                       controller_size, controller_layers)

        heads = nn.ModuleList([])
        for i in range(num_heads):
            heads += [
                NTMReadHead(memory, controller_size),
                NTMWriteHead(memory, controller_size)
            ]

        self.ntm = NTM(num_inputs, num_outputs, controller, memory, heads)
        self.memory = memory
Exemplo n.º 11
0
def create_ntm(FLAGS, sess, **ntm_args):
    cell = NTMCell(
        input_dim=FLAGS.input_dim,
        output_dim=FLAGS.output_dim,
        controller_layer_size=FLAGS.controller_layer_size,
        write_head_size=FLAGS.write_head_size,
        read_head_size=FLAGS.read_head_size)
    ntm = NTM(
        cell, sess, FLAGS.min_length, FLAGS.max_length,
        test_max_length=FLAGS.test_max_length, scope='NTM-%s' % FLAGS.task, **ntm_args)
    return cell, ntm
Exemplo n.º 12
0
def create_ntm(config, sess, **ntm_args):
    cell = NTMCell(
        input_dim=config.input_dim,
        output_dim=config.output_dim,
        controller_layer_size=config.controller_layer_size,
        controller_dim=config.controller_dim,
        write_head_size=config.write_head_size,
        read_head_size=config.read_head_size)
    scope = ntm_args.pop('scope', 'NTM-%s' % config.task)
    ntm = NTM(
        cell, sess, config.min_length, config.max_length,
        test_max_length=config.test_max_length, scope=scope, **ntm_args)
    return cell, ntm
Exemplo n.º 13
0
def compare():

    obstacle, wall_cw, wall_awc = Obstacle(), WallCW(), WallACW()
    obstacle_params, wall_cw_params, wall_acw_params = torch.load(
        'program_memory/move.pt'), torch.load(
            'program_memory/cw.pt'), torch.load('program_memory/acw.pt')
    networks = [obstacle, wall_cw, wall_awc]
    params = [obstacle_params, wall_cw_params, wall_acw_params]
    hnm = HNM(10, 14, networks, params)
    hnm.load_state_dict(torch.load('learned_params/hnm.pt'))

    ntm = NTM(10, 14)
    ntm.load_state_dict(torch.load('learned_params/ntm.pt'))

    lstm = LSTM(14, 64, 3, 1)
    lstm.load_state_dict(torch.load('learned_params/lstm.pt'))

    testX, testY = getTestData()

    hnm_correct, ntm_correct, lstm_correct = 0, 0, 0
    totSamples = 0

    for i in range(0, 25):

        s = torch.from_numpy(np.array(testX[i:i + 1][0])).float().unsqueeze(0)
        s_lstm = s.view(s.size()[0], s.size()[2], -1)
        l = np.array(testY[i:i + 1][0])

        print(i)

        (hnm_read_weights, hnm_write_weights) = hnm._initialise()
        (ntm_read_weights, ntm_write_weights) = ntm._initialise()
        lstm_h = lstm.h0.expand(s_lstm.size()[0], 64)
        lstm_c = lstm.c0.expand(s_lstm.size()[0], 64)

        for j in range(s.size()[1]):

            (hnm_out, hnm_read_weights,
             hnm_write_weights) = hnm.forward(s[:, j, :], hnm_read_weights,
                                              hnm_write_weights)
            (ntm_out, ntm_read_weights,
             ntm_write_weights) = ntm.forward(s[:, j, :], ntm_read_weights,
                                              ntm_write_weights)
            lstm_h, lstm_c, lstm_out = lstm.forward(s_lstm[:, :, j], lstm_h,
                                                    lstm_c)

            if np.argmax(hnm_out.detach().numpy()) == np.argmax(l[j]):
                hnm_correct += 1
            if np.argmax(ntm_out.detach().numpy()) == np.argmax(l[j]):
                ntm_correct += 1
            if np.argmax(lstm_out.detach().numpy()) == np.argmax(l[j]):
                lstm_correct += 1

            totSamples += 1

    print(hnm_correct, ntm_correct, lstm_correct)
    print(totSamples)
Exemplo n.º 14
0
def gen_model(input_dim,
              batch_size,
              output_dim,
              n_slots=n_slots,
              m_depth=m_depth,
              controller_model=None,
              activation="sigmoid",
              read_heads=1,
              write_heads=1):

    model = Sequential()
    model.name = "NTM_-_" + controller_model.name
    model.batch_size = batch_size
    model.input_dim = input_dim
    model.output_dim = output_dim

    ntm = NTM(output_dim,
              n_slots=n_slots,
              m_depth=m_depth,
              shift_range=3,
              controller_model=controller_model,
              activation=activation,
              read_heads=read_heads,
              write_heads=write_heads,
              return_sequences=True,
              input_shape=(None, input_dim),
              batch_size=batch_size)
    model.add(ntm)

    sgd = Adam(lr=learning_rate, clipnorm=clipnorm)
    model.compile(loss='binary_crossentropy',
                  optimizer=sgd,
                  metrics=['binary_accuracy'],
                  sample_weight_mode="temporal")

    return model
Exemplo n.º 15
0
    return parser.parse_args()


if __name__ == "__main__":

    args = parse_arguments()
    writer = SummaryWriter()
    dataset = BinaySeqDataset(args)
    dataloader = DataLoader(dataset, batch_size=1,
                            shuffle=True, num_workers=4)

    model = NTM(M=args.memory_capacity,
                N=args.memory_vector_size,
                input_size=args.token_size,
                output_size=args.token_size,
                controller_out_dim=args.controller_output_dim,
                controller_hid_dim=args.controller_hidden_dim,
                )

    print(model)

    criterion = torch.nn.BCELoss()
    optimizer = torch.optim.RMSprop(model.parameters(), lr=args.learning_rate)

    print("--------- Number of parameters -----------")
    print(model.calculate_num_params())
    print("--------- Start training -----------")

    losses = []
Exemplo n.º 16
0
class NTMAugmentedDecoder(nn.Module):
    def __init__(self,
                 d_vocab,
                 d_emb,
                 d_dec,
                 max_len,
                 bos_idx,
                 num_heads=8,
                 N=64,
                 M=32,
                 seg_size=20):

        super().__init__()
        self.d_vocab = d_vocab
        self.seg_size = seg_size
        self.embs = nn.Embedding(d_vocab, d_emb)
        self.rnn = nn.GRU(d_emb, d_dec, batch_first=True)
        self.ntm_scale = nn.Parameter(torch.zeros([1, d_dec]),
                                      requires_grad=True)
        self.ntm = NTM('mem-aug',
                       embedding_size=d_dec,
                       hidden_size=d_dec,
                       memory_size=M,
                       head_num=num_heads,
                       memory_feature_size=N,
                       output_size=d_dec)
        self.init = nn.Parameter(torch.zeros(1, d_dec), requires_grad=True)
        self.bos_idx = nn.Parameter(torch.tensor([bos_idx]),
                                    requires_grad=False)
        self.out_layer = nn.Linear(d_dec, d_vocab)
        self.max_len = max_len

    def forward(self, labels, state=None):
        """
        Run decoder on input context with optional conditioning on labels and prelabels
        :param labels: labels conditioned on at each timestep in next-prediction task (used during training)
        :param state: initial state to begin decoding with. Could be output of encoder.
        :return: If labels are provided, returns logits tensor (batch_size, num_steps, vocab_size). If labels are not provided,
        returns predictions tensor (batch_size, num_steps) using provided sampling function.
        """
        batch_size = labels.shape[0]
        self.ntm.reset(batch_size, device=labels.device)
        if state is None:
            state = self.init.expand(batch_size, -1).contiguous()  # bxh

        init = self.embs(self.bos_idx.expand(batch_size).unsqueeze(1))  # bx1xh

        # initialize ntm state, which keeps track of reads and writes
        ntm_state = torch.zeros_like(state).to(state.device)

        labels_no_last = labels[:, :
                                -1]  # b x (t-1) # we don't take last word as input
        # break input into slices, read and write between slices
        num_slices = labels.shape[1] // self.seg_size
        all_logit_slices = []

        for slice in range(num_slices):
            # grab slice of input
            labels_slice = labels_no_last[:, slice *
                                          self.seg_size:slice * self.seg_size +
                                          self.seg_size]
            in_embs = self.embs(labels_slice)  # b x (t-1) x w

            if slice == 0:  # add bos index on first iteration
                in_embs = torch.cat([init, in_embs], dim=1)  # b x t x w

            # give ntm state as input to all time steps of next slice
            # multiple ntm state by scalars before giving to RNN, so it is not used in the beginning of training
            #scaled_ntm_state = ntm_state * self.ntm_scale
            #exp_ntm_state = scaled_ntm_state.unsqueeze(1).expand([-1, in_embs.shape[1], -1])  # b x t x h
            rnn_input = in_embs  # torch.cat([in_embs, exp_ntm_state], dim=-1)
            # read slice of conversation history, with access to ntm state
            outputs, _ = self.rnn(
                rnn_input, state.unsqueeze(0))  # b x (t-1) x h OR b x t x h
            # grab last state and use it to read and write from ntm
            state = outputs[:, -1, :]
            #ntm_state = self.ntm(state)
            # predict outputs for this slice
            logits = self.out_layer(outputs)  # b x t x v
            all_logit_slices.append(logits)
        # append predictions for all slices together
        logits = torch.cat(all_logit_slices, dim=1)

        return logits

    def complete(self, x, state=None, sample_func=None):
        """
        Given tensor x containing token indices, fill in all padding token (zero) elements
        with predictions from the NTM decoder.
        :param x: (batch_size, num_steps) tensor containing token indices
        :return: tensor same shape as x, where zeros have been filled with decoder predictions
        """
        batch_size, num_steps = x.shape
        self.ntm.reset(batch_size, device=x.device)
        if state is None:
            state = self.init.expand(batch_size, -1).contiguous()  # bxh
        if sample_func is None:
            sample_func = partial(torch.argmax, dim=-1)

        ntm_state = torch.zeros_like(state).to(state.device)

        all_logits = []
        all_preds = []
        init = self.embs(self.bos_idx.expand(batch_size).unsqueeze(1))  # bx1xh
        word = init.squeeze(1)  # b x w

        for step in range(num_steps):
            # run RNN over input words
            rnn_input = word.unsqueeze(
                1)  # torch.cat([word, ntm_state], dim=-1).unsqueeze(1)
            _, state = self.rnn(rnn_input, state.unsqueeze(0))  # 1 x b x h
            state = state.squeeze(0)

            # produce prediction at each time step
            logits = self.out_layer(state)  # b x v
            all_logits.append(logits)
            pred = sample_func(logits)  # b

            # # at the end of each segment, read and write from NTM
            # if step % self.seg_size == (self.seg_size - 1):
            #     # end of each segment, read and write from NTM
            #     ntm_state = self.ntm(state)

            # here, we grab word from x if it exists, otherwise use prediction
            mask = (x[:, step] != 0).long()  # b
            word_index = x[:, step] * mask + pred * (
                1 - mask)  # use label or prediction, whichever is available
            word = self.embs(word_index)  # b x w

            all_preds.append(word_index)
        logits = torch.stack(all_logits, dim=1)
        return logits, torch.stack(all_preds, dim=1)
Exemplo n.º 17
0
cur_dir = os.getcwd()
PATH = os.path.join(cur_dir, args.saved_model)
# PATH = os.path.join(cur_dir, 'saved_models/saved_model_copy_500000.pt')
# ntm = torch.load(PATH)

"""
For the Copy task, input_size: seq_width + 2, output_size: seq_width
For the RepeatCopy task, input_size: seq_width + 2, output_size: seq_width + 1
For the Associative task, input_size: seq_width + 2, output_size: seq_width
For the NGram task, input_size: 1, output_size: 1
For the Priority Sort task, input_size: seq_width + 1, output_size: seq_width
"""

ntm = NTM(input_size=task_params['seq_width'] + 1,
          output_size=task_params['seq_width'],
          controller_size=task_params['controller_size'],
          memory_units=task_params['memory_units'],
          memory_unit_size=task_params['memory_unit_size'],
          num_heads=task_params['num_heads'])

ntm.load_state_dict(torch.load(PATH))

# -----------------------------------------------------------------------------
# --- evaluation
# -----------------------------------------------------------------------------
ntm.reset()
data = dataset[0]  # 0 is a dummy index
input, target = data['input'], data['target']
out = torch.zeros(target.size())

# -----------------------------------------------------------------------------
# loop for other tasks
Exemplo n.º 18
0
    task_params['num_heads'], task_params['uniform'],
    task_params['random_distr'], task_params['multi_layer_controller'])

# Output directory for tensorboard
configure(args.tb_dir + "/" + saved_model_name)
"""
For the Copy task, input_size: seq_width + 2, output_size: seq_width
For the RepeatCopy task, input_size: seq_width + 2, output_size: seq_width + 1
For the Associative task, input_size: seq_width + 2, output_size: seq_width
For the NGram task, input_size: 1, output_size: 1
For the Priority Sort task, input_size: seq_width + 1, output_size: seq_width
"""
ntm = NTM(input_size=task_params['seq_width'] + 1,
          output_size=task_params['seq_width'],
          controller_size=task_params['controller_size'],
          memory_units=task_params['memory_units'],
          memory_unit_size=task_params['memory_unit_size'],
          num_heads=task_params['num_heads'],
          multi_layer_controller=task_params['multi_layer_controller'])

if args.load_model != "":
    ntm.load_state_dict(torch.load(args.load_model))

criterion = nn.BCELoss()
# As the learning rate is task specific, the argument can be moved to json file
optimizer = optim.RMSprop(ntm.parameters(),
                          lr=args.lr,
                          alpha=args.alpha,
                          momentum=args.momentum)
'''
optimizer = optim.Adam(ntm.parameters(), lr=args.lr,
Exemplo n.º 19
0
controller_input_dim, controller_output_dim = controller_shape(num_encoder_tokens, 
                                                               layer_dim, 
                                                               m_depth, 
                                                               n_slots, 
                                                               shift_range, 
                                                               read_heads, 
                                                               write_heads)



encoder_inputs = Input(shape=(None, num_encoder_tokens))
encoder = NTM(layer_dim,
              n_slots=n_slots,
              m_depth=m_depth,
              shift_range=shift_range,
              controller_model=None,
              activation="sigmoid",
              read_heads = read_heads,
              write_heads = write_heads,
              return_sequences=True,
              return_state=True)
saidas = encoder(encoder_inputs)

print(saidas[1])







Exemplo n.º 20
0
def generate_target_original_plots(iteration, task_params, model_path,
                                   image_output):

    dataset = PrioritySort(task_params)
    criterion = nn.BCELoss()

    ntm = NTM(input_size=task_params['seq_width'] + 1,
              output_size=task_params['seq_width'],
              controller_size=task_params['controller_size'],
              memory_units=task_params['memory_units'],
              memory_unit_size=task_params['memory_unit_size'],
              num_heads=task_params['num_heads'],
              save_weigths=True,
              multi_layer_controller=task_params['multi_layer_controller'])

    ntm.load_state_dict(torch.load(model_path))

    # -----------------------------------------------------------------------------
    # --- evaluation
    # -----------------------------------------------------------------------------
    ntm.reset()
    data = dataset[0]  # 0 is a dummy index
    input, target = data['input'], data['target']
    out = torch.zeros(target.size())

    # -----------------------------------------------------------------------------
    # loop for other tasks
    # -----------------------------------------------------------------------------
    for i in range(input.size()[0]):
        # to maintain consistency in dimensions as torch.cat was throwing error
        in_data = torch.unsqueeze(input[i], 0)
        ntm(in_data)

    # passing zero vector as the input while generating target sequence
    in_data = torch.unsqueeze(torch.zeros(input.size()[1]), 0)
    for i in range(target.size()[0]):
        out[i] = ntm(in_data)

    loss = criterion(out, target)

    binary_output = out.clone()
    binary_output = binary_output.detach().apply_(lambda x: 0
                                                  if x < 0.5 else 1)

    # sequence prediction error is calculted in bits per sequence
    error = torch.sum(torch.abs(binary_output - target))

    fig = plt.figure()
    ax1 = fig.add_subplot(211)
    ax2 = fig.add_subplot(221)

    ax1.set_title("Result")
    ax2.set_title("Target")

    sns.heatmap(binary_output,
                ax=ax1,
                vmin=0,
                vmax=1,
                linewidths=.5,
                cbar=False,
                square=True)
    sns.heatmap(target,
                ax=ax2,
                vmin=0,
                vmax=1,
                linewidths=.5,
                cbar=False,
                square=True)

    plt.savefig(
        image_output +
        "/priority_sort_{}_{}_{}_{}_{}_{}_{}_{}_{}_image_{}.png".format(
            task_params['seq_width'] + 1, task_params['seq_width'],
            task_params['controller_size'], task_params['memory_units'],
            task_params['memory_unit_size'], task_params['num_heads'],
            task_params['uniform'], task_params['random_distr'],
            task_params['multi_layer_controller'], iteration))

    fig = plt.figure(figsize=(15, 6))
    ax1_2 = fig.add_subplot(211)
    ax2_2 = fig.add_subplot(212)
    ax1_2.set_title("Read Weigths")
    ax2_2.set_title("Write Weights")

    sns.heatmap(ntm.all_read_w, ax=ax1_2, linewidths=.01, square=True)
    sns.heatmap(ntm.all_write_w, ax=ax2_2, linewidths=.01, square=True)

    plt.tight_layout()
    plt.savefig(
        image_output +
        "/priority_sort_{}_{}_{}_{}_{}_{}_{}_{}_{}_weigths_{}.png".format(
            task_params['seq_width'] + 1, task_params['seq_width'],
            task_params['controller_size'], task_params['memory_units'],
            task_params['memory_unit_size'], task_params['num_heads'],
            task_params['uniform'], task_params['random_distr'],
            task_params['multi_layer_controller'], iteration),
        dpi=250)

    # ---logging---
    print('[*] Checkpoint Loss: %.2f\tError in bits per sequence: %.2f' %
          (loss, error))
Exemplo n.º 21
0
def get_ntm_model():
    from keras.models import Model
    import keras.backend as K

    assert permute_layer is not None
    num_read = 1
    num_write = 1
    mem_length = 40
    n_slots = 128

    model_input = Input(
        (WINDOW_LENGTH, 1) + INPUT_SHAPE,
        #batch_shape = (batch_size,) + (WINDOW_LENGTH,1) + INPUT_SHAPE
    )

    per = permute_layer(model_input)
    x = TimeDistributed(
        Conv2D(32, (8, 8), name='conv1', activation='relu',
               subsample=(4, 4)))(per)
    x = TimeDistributed(
        Conv2D(64, (4, 4), name='conv2', activation='relu',
               subsample=(2, 2)))(x)
    x = TimeDistributed(
        Conv2D(64, (3, 3), name='conv3', activation='relu',
               subsample=(1, 1)))(x)
    #x = TimeDistributed(Conv2D(64,(4,4),name='conv2',activation = 'relu',subsample = (2,2)))(x)
    #x = TimeDistributed(Conv2D(64,(3,3),name='conv3',activation = 'relu',subsample = (1,1)))(x)
    x = TimeDistributed(Flatten(name="Flatten1"))(
        x)  # (batch_size,WINDOW_LENGTH,3176)
    x_shape = K.int_shape(x)
    print('x has shape:', x_shape)
    # controller construction
    controller_inp = Input((x_shape[-1], ),
                           name="controller_input")  # (batch_size,3176)
    read_inp = Input((num_read, mem_length),
                     name="read_inp")  # (batch_size,n_read,n_write)
    read_inp_flatten = Flatten(name="read_inp_flatten")(
        read_inp)  #(batch_size,n_read * n_write)
    print('controller_inp shape:', controller_inp.shape)
    #print('read_inp_flatten shape:',K.int_shape(read_inp_flatten))
    #print('read_inp_flatten_repeat shape:',K.int_shape(read_inp_flatten_repeat))
    #hidden_int = Dense(512,activation = 'relu')(controller_inp)
    hidden = Concatenate(name="ctrl_inp_read_inp_concat")(
        [controller_inp,
         read_inp_flatten])  # (batch_size, 3176 + num_read * mem_length)
    #hidden = Dense(512,activation = 'relu')(concat)
    controller_output = Dense(nb_actions, activation='linear')(hidden)
    controller = Model([controller_inp, read_inp],
                       [controller_output, controller_inp])
    controller.summary()
    # ntm constuction
    #TODO: reset the state for on_batch_end!!
    ntm_cell = NTM(
        controller,  # custom controller, should output a vector
        n_slots,
        mem_length,  # Memory config
        num_shift=3,  # shifting
        batch_size=batch_size,
        #controller_instr_output_dim = controller_instr_output_dim,
        return_sequences=False,
        is_controller_recurrent=True,
        num_read=num_read,
        num_write=num_write)(x)  # (batch_size,512)
    ntm_cell_output_shape = K.int_shape(ntm_cell)
    print('ntm_cell output:', ntm_cell_output_shape)
    ntm_cell_output_shape = ntm_cell_output_shape[1:]

    #model_output = Dense(nb_actions,activation = 'linear')(ntm_cell)

    model = Model(model_input, ntm_cell)
    model.summary()
    return model
Exemplo n.º 22
0
              n_slots = n_slots,
              m_depth = m_depth,
              controller_model = None,
              activation = "sigmoid",
              read_head = 1,
              write_head = 1):

    model = Sequential()
    model.name = "NTM_-_"+ controller_model.name 
    model.batch_size = batch_size
    model.input_dim = input_dim
    model.ouput_dim =output_dim

    ntm = NTM(output_dim, n_slots = n_slots, m_depth = m_depth,
              shift_range = 3,
              controller_model = controller_model,
              activation = activation,
              read_heads = read_heads,
              write_heads = write_heads,
             # return_sequences = True,
             input_shape = (None,input_dim),
             batch_size = batch_size)

    model.add(NTM)

    sgd = Adam(lr = learning_rate, clipnorm = clipnorm)
    model.compile(loss = 'binary_crossentropy',optimizer=sgd, metrics = ['binary_accuracy'],sample_weight_model = "temporal")

    return model

Exemplo n.º 23
0
from keras.models import Sequential
from keras.optimizers import Adam
from ntm import NeuralTuringMachine as NTM

model = Sequential()

ntm = NTM([625],
          n_slots=50,
          m_depth=20,
          shift_range=3,
          controller_model=None,
          return_sequences=True,
          input_shape=(None, 625),
          batch_size=100)
model.add(ntm)

# sgd = Adam(lr=learning_rate, clipnorm=clipnorm)
model.compile(loss='binary_crossentropy',
              optimizer='Adam',
              metrics=['binary_accuracy'],
              sample_weight_mode="temporal")
print(model.summary())
Exemplo n.º 24
0
def trainNTM():
    ntm = NTM(10, 14)

    X, y = getData()

    ntm.train(X, y, 1)
Exemplo n.º 25
0
    dataset = PrioritySort(task_params)
    input_size=task_params['seq_width']+1
    output_size=task_params['seq_width']

"""
For the Copy task, input_size: seq_width + 2, output_size: seq_width
For the RepeatCopy task, input_size: seq_width + 2, output_size: seq_width + 1
For the Associative task, input_size: seq_width + 2, output_size: seq_width
For the NGram task, input_size: 1, output_size: 1
For the Priority Sort task, input_size: seq_width + 1, output_size: seq_width
"""
has_tau=0
if args.model=='ntm':
    model = NTM(input_size= input_size,
          output_size=output_size,
          controller_size=args.lstm_size,
          memory_units=128,
          memory_unit_size=20,
          num_heads=1)#task_params['num_heads'])
elif args.model=='dnc':
    model = DNC(input_size= input_size,
          output_size=output_size,
          hidden_size=args.lstm_size,
          nr_cells=128,
          cell_size=20,
          read_heads=1)#task_params['num_heads'])
    model.init_param()
elif args.model=='sam':
    model = SAM(input_size= input_size,
          output_size=output_size,
          hidden_size=args.lstm_size,
          nr_cells=128,
Exemplo n.º 26
0
    'length': 5,
    'controller_layer_size': 1,
    'write_head_size': 1,
    'read_head_size': 1,
    'checkpoint_dir': 'checkpoint'
}

if __name__ == "__main__":
    with tf.device('/cpu:0'), tf.Session() as sess:
        cell = NTMCell(input_dim=config['input_dim'],
                       output_dim=config['output_dim'],
                       controller_layer_size=config['controller_layer_size'],
                       write_head_size=config['write_head_size'],
                       read_head_size=config['read_head_size'],
                       controller_dim=32)
        ntm = NTM(cell, sess, config['length'] * 2 + 2)

        if not os.path.isdir(config['checkpoint_dir'] + '/copy_' +
                             str(config['length'] * 2 + 2)):
            print(" [*] Initialize all variables")
            tf.global_variables_initializer().run()
            print(" [*] Initialization finished")
        else:
            ntm.load(config['checkpoint_dir'], 'copy')

        start_time = time.time()
        print('')
        for idx in range(config['epoch']):
            seq_length = np.random.randint(2, config['length'] + 1)
            X, Y, masks = build_seq_batch(seq_length, config['length'],
                                          config['input_dim'] - 2)
Exemplo n.º 27
0
class EncapsulatedNTM(nn.Module):
    def __init__(self,
                 num_inputs,
                 num_outputs,
                 controller_size,
                 controller_layers,
                 num_heads,
                 N,
                 M,
                 controller_type='lstm'):
        """Initialize an EncapsulatedNTM.
        :param num_inputs: External number of inputs.
        :param num_outputs: External number of outputs.
        :param controller_size: The size of the internal representation.
        :param controller_layers: Controller number of layers.
        :param num_heads: Number of heads.
        :param N: Number of rows in the memory bank.
        :param M: Number of cols/features in the memory bank.
        """
        super(EncapsulatedNTM, self).__init__()

        # Save args
        self.num_inputs = num_inputs
        self.num_outputs = num_outputs
        self.controller_size = controller_size
        self.controller_layers = controller_layers
        self.num_heads = num_heads
        self.N = N
        self.M = M

        # Create the NTM components
        memory = NTMMemory(N, M)
        if controller_type == 'lstm':
            controller = LSTMController(num_inputs + M * num_heads,
                                        controller_size, controller_layers)
        else:
            controller = MLPController(num_inputs + M * num_heads,
                                       controller_size, controller_layers)

        heads = nn.ModuleList([])
        for i in range(num_heads):
            heads += [
                NTMReadHead(memory, controller_size),
                NTMWriteHead(memory, controller_size)
            ]

        self.ntm = NTM(num_inputs, num_outputs, controller, memory, heads)
        self.memory = memory

    def init_sequence(self, batch_size):
        """Initializing the state."""
        self.batch_size = batch_size
        self.memory.reset(batch_size)
        self.previous_state = self.ntm.create_new_state(batch_size)

    def forward(self, x=None):
        if x is None:
            x = Variable(torch.zeros(self.batch_size, self.num_inputs))

        o, self.previous_state = self.ntm(x, self.previous_state)
        return o, self.previous_state

    def calculate_num_params(self):
        """Returns the total number of parameters."""
        num_params = 0
        for p in self.parameters():
            num_params += p.data.view(-1).size(0)
        return num_params
Exemplo n.º 28
0
dataset = RepeatCopyDataset(task_params)
dataset = AssociativeDataset(task_params)
dataset = NGram(task_params)
dataset = PrioritySort(task_params)
'''

"""
For the Copy task, input_size: seq_width + 2, output_size: seq_width
For the RepeatCopy task, input_size: seq_width + 2, output_size: seq_width + 1
For the Associative task, input_size: seq_width + 2, output_size: seq_width
For the NGram task, input_size: 1, output_size: 1
For the Priority Sort task, input_size: seq_width + 1, output_size: seq_width
"""
ntm = NTM(input_size=task_params['seq_width'] + 2,
          output_size=task_params['seq_width'],
          controller_size=task_params['controller_size'],
          memory_units=task_params['memory_units'],
          memory_unit_size=task_params['memory_unit_size'],
          num_heads=task_params['num_heads'])

criterion = nn.BCELoss()
# As the learning rate is task specific, the argument can be moved to json file
optimizer = optim.RMSprop(ntm.parameters(),
                          lr=args.lr,
                          alpha=args.alpha,
                          momentum=args.momentum)
'''
optimizer = optim.Adam(ntm.parameters(), lr=args.lr,
                       betas=(args.beta1, args.beta2))
'''

args.saved_model = 'saved_model_copy.pt'
Exemplo n.º 29
0
task_params['max_seq_len'] = data[d][1]
dataset = ReverseDataset(task_params)
dataset2 = ReverseDataset(task_params)
task_params['min_seq_len'] = data[d][2]
task_params['max_seq_len'] = data[d][3]
dataset3 = ReverseDataset(task_params)
#4.save model
args.saved_model = 'saved_model/' + 'saved_model_reverse_' + args.config + '.pt'
cur_dir = os.getcwd()
PATH = os.path.join(cur_dir, args.saved_model)
"""
For the Reverse task, input_size: seq_width + 2, output_size: seq_width
"""
ntm = NTM(input_size=task_params['seq_width'] + 2,
          output_size=task_params['seq_width'],
          controller_size=task_params['controller_size'],
          memory_units=task_params['memory_units'],
          memory_unit_size=task_params['memory_unit_size'],
          num_heads=task_params['num_heads'])

criterion = nn.BCELoss()
# As the learning rate is task specific, the argument can be moved to json file
# optimizer = optim.RMSprop(ntm.parameters(),
#                           lr=args.lr,
#                           alpha=args.alpha,
#                           momentum=args.momentum)
optimizer = optim.Adam(ntm.parameters(),
                       lr=args.lr,
                       betas=(args.beta1, args.beta2))

# ----------------------------------------------------------------------------
# -- basic training loop
Exemplo n.º 30
0
model = Sequential()
model.name = "NTM_-_" + None.name
model.batch_size = batch_size
model.input_dim = input_dim
model.output_dim = 1

ntm = NTM(
    1,
    n_slots=n_slots,  #n_slots: Memory width
    m_depth=m_depth,  #m_depth: Memory depth at each location
    shift_range=3,
    #shift_range: int, number of available shifts, ex. if 3, avilable shifts are
    #                 (-1, 0, 1)
    controller_model=None,
    #controller_model: A keras model with required restrictions to be used as a controller.
    #                  The requirements are appropriate shape, linear activation and stateful=True if recurrent.
    #                  Default: One dense layer.
    activation="sigmoid",
    #activation: This is the activation applied to the layer output.
    #            It can be either a Keras activation or a string like "tanh", "sigmoid", "linear" etc.
    #            Default is linear.
    read_heads=1,
    write_heads=1,
    return_sequences=True,
    input_shape=(None, input_dim),
    batch_size=batch_size)

model.add(ntm)
model.compile(loss='binary_crossentropy',
              optimizer=Adam(lr=learning_rate, clipnorm=clipnorm),
              metrics=['binary_accuracy'],
              sample_weight_mode="temporal")
Exemplo n.º 31
0
def main():
    train = True
    weight_path = ""  #"model-constant-mem_4500.save"

    if train:
        # The big cheese, we're doin it guys!

        plt.ion()
        fig = plt.figure()

        # COPY TASK, copy a 20 length tensor of random numbers. Each section will contain 5 bits, one of which (bits 0 - 3) will be on.
        # We'll input the 20 length, then input an end token, which will be the 5th bit (or 4th if using zero-based indexing), then input another 20 length with all zeros. This signals to the NTM that it needs to start outputting the copy.

        class Controller:
            def __init__(self):
                # Output size is 5, because it needs to output the copied 5 bits
                self.size = 128

                # We'll have 1 read head, which produces a single read_vector of size 10. We also need to feed in the input, which is of size 5 (for the five bits)
                # so our total input size is 15
                self.fc_1 = init_weight(15, 128)
                self.fc_2 = init_weight(128, 128)

                # This is our controller output
                self.fc_3 = init_weight(128, 128)

            def get_weights(self):
                return [self.fc_1, self.fc_2, self.fc_3]

            def forward(self, inp):
                fc1 = T.nnet.relu(T.dot(inp, self.fc_1))
                fc2 = T.nnet.relu(T.dot(fc1, self.fc_2))

                # I would ReLU the output, but I already did in the NTM implementation
                fc3 = T.dot(fc2, self.fc_3)

                return fc3

        # output size is 5, for the 5 copy bits
        ntm = NTM(controller=Controller(),
                  output_size=5,
                  memory_slots=20,
                  slot_size=10,
                  read_heads=1,
                  batch_size=10)

        data = T.tensor3()
        target = T.tensor3()

        #r = theano.shared(np.random.randn(10, 10, 20))
        r = theano.shared(1.)
        r_ = theano.shared(np.zeros([10, 10, 20])) + r

        if weight_path != '':
            print("loading weights")

            # Load weights, but just the NTM weights, not memory, since we may extend it
            checkpoint = open(weight_path, 'rb')

            all_weights = ntm.weights
            for w in all_weights:
                w.set_value(cPickle.load(checkpoint).get_value())
            checkpoint.close()

        memory_states, _, weightings, ntm_outputs = ntm.process(data, r_)

        # We average the loss across batches, so that we have a singular loss for each timestep. We then average these losses
        # ntm_outputs - target ** 2 -> ts x batchsize x bits
        #

        loss = T.sum(T.mean(T.sum(5 *
                                  (T.nnet.sigmoid(ntm_outputs) - target)**2,
                                  axis=2),
                            axis=1),
                     axis=0)

        updates = RMSprop(cost=loss, params=ntm.weights + [r], lr=1e-3)

        train = theano.function(inputs=[data, target],
                                outputs=[
                                    memory_states, weightings, weightings,
                                    ntm_outputs, loss, updates[2][1]
                                ],
                                updates=updates)

        for example in range(5000):
            # Produce the first half

            # let's feed a test example
            # ts x batchsize x bits

            end = np.zeros([1, 10, 5])
            for batch in range(10):
                end[0, batch, -1] = 1  # Make the last bit in each batch a 1

            first_half = (np.random.randn(10, 10, 5) > 0).astype(
                np.float32) * 1

            for batch in range(10):
                first_half[:, batch,
                           -1] = 0  # Make sure the last bit (end bit) of each batch is 0

            # Produce second half
            second_half = np.zeros([10, 10, 5])  # Just a bunch of zeros

            data = np.concatenate([first_half, end, second_half], axis=0)
            target = np.concatenate([second_half, end, first_half], axis=0)

            # lamar gotta have that extra timestep for the end bit
            outputs = train(data, target)

            print("LOSS " + str(outputs[-2]) + ", " + str(example))

            read = outputs[2]
            read = read[:, 0, 0, :]

            write = outputs[2]
            write = write[:, 1, 0, :]

            outputs = outputs[3]
            outputs = outputs[:, 0]

            #.transpose([1, 0])

            if (example % 20 == 0 and example != 0):
                cmap = 'jet'

                fig.add_subplot(2, 2, 1)
                plt.imshow(sigmoid(outputs), cmap=cmap)
                fig.add_subplot(2, 2, 2)
                plt.imshow(target[:, 0], cmap=cmap)
                fig.add_subplot(2, 2, 3)
                plt.imshow(read, cmap=cmap)
                fig.add_subplot(2, 2, 4)
                plt.imshow(write, cmap=cmap)
                plt.pause(0.1)

            if (example % 500 == 0):
                print("SAVING WEIGHTS")
                f = open('model-constant-mem_' + str(example) + '.save', 'wb')
                for w in ntm.weights + [r]:
                    cPickle.dump(w, f, protocol=cPickle.HIGHEST_PROTOCOL)

                f.close()
            """
            fig = plt.figure()
            fig.add_subplot(2, 2, 1) 
            plt.imshow(data[:, 0, :], origin = [0, 0])
            fig.add_subplot(2, 2, 2)
            plt.imshow(target[:, 0, :], origin = [10, 0])
            plt.show()
            """
    else:
        # Test time!

        plt.ion()
        fig = plt.figure()

        # COPY TASK, we're going to see how well our Neural Turing Machine model extends to longer sequences

        class Controller:
            def __init__(self):
                # Output size is 5, because it needs to output the copied 5 bits
                self.size = 128

                # We'll have 1 read head, which produces a single read_vector of size 10. We also need to feed in the input, which is of size 5 (for the five bits)
                # so our total input size is 15
                self.fc_1 = init_weight(15, 128)
                self.fc_2 = init_weight(128, 128)

                # This is our controller output
                self.fc_3 = init_weight(128, 128)

            def get_weights(self):
                return [self.fc_1, self.fc_2, self.fc_3]

            def forward(self, inp):
                fc1 = T.nnet.relu(T.dot(inp, self.fc_1))
                fc2 = T.nnet.relu(T.dot(fc1, self.fc_2))

                # I would ReLU the output, but I already did in the NTM implementation
                fc3 = T.dot(fc2, self.fc_3)

                return fc3

        # output size is 5, for the 5 copy bits
        ntm = NTM(controller=Controller(),
                  output_size=5,
                  memory_slots=80,
                  slot_size=10,
                  read_heads=1,
                  batch_size=10)

        data = T.tensor3()

        # Load weights
        checkpoint = open(
            'pretrained-models-copy/model-constant-mem_4500.save', 'rb')

        all_weights = ntm.weights
        for w in all_weights:
            w.set_value(cPickle.load(checkpoint).get_value())

        r = theano.shared(
            np.zeros(shape=[10, 10, 80]) +
            cPickle.load(checkpoint).get_value())

        checkpoint.close()

        memory_states, _, weightings, ntm_outputs = ntm.process(data, r)

        test = theano.function(
            inputs=[data],
            outputs=[memory_states, weightings, weightings, ntm_outputs])

        for example in range(5000):
            print(r.get_value())

            # Produce the first half

            # let's feed a test example
            # ts x batchsize x bits

            end = np.zeros([1, 10, 5])
            for batch in range(10):
                end[0, batch, -1] = 1  # Make the last bit in each batch a 1

            first_half = (np.random.randn(60, 10, 5) > .7).astype(
                np.float32) * 1

            for batch in range(10):
                first_half[:, batch,
                           -1] = 0  # Make sure the last bit (end bit) of each batch is 0

            # Produce second half
            second_half = np.zeros([60, 10, 5])  # Just a bunch of zeros

            data = np.concatenate([first_half, end, second_half], axis=0)

            # lamar gotta have that extra timestep for the end bit
            outputs = test(data)

            read = outputs[2]
            read = read[:, 0, 0, :]

            write = outputs[2]
            write = write[:, 1, 0, :]

            outputs = outputs[3]
            outputs = outputs[:, 0]

            #.transpose([1, 0])

            cmap = 'jet'

            fig.add_subplot(2, 2, 1)
            plt.imshow(sigmoid(outputs), cmap=cmap)
            fig.add_subplot(2, 2, 2)
            plt.imshow(data[:, 0], cmap=cmap)
            fig.add_subplot(2, 2, 3)
            plt.imshow(read, cmap=cmap)
            fig.add_subplot(2, 2, 4)
            plt.imshow(write, cmap=cmap)
            plt.pause(0.1)

            input("")
Exemplo n.º 32
0
            from_checkpoint = opt[1]
        elif opt[0] == '--iterations':
            iterations = int(opt[1])

    graph = tf.Graph()

    with graph.as_default():
        with tf.compat.v1.Session(graph=graph) as session:

            llprint("Building Computational Graph ... ")

            optimizer = tf.compat.v1.train.RMSPropOptimizer(learning_rate,
                                                            momentum=momentum)

            turing_machine = NTM(RecurrentController, input_size, output_size,
                                 memory_size, word_size, read_heads,
                                 shift_range, batch_size)

            # squash the DNC output between 0 and 1
            output, _ = turing_machine.get_outputs()
            squashed_output = tf.clip_by_value(tf.sigmoid(output), 1e-6,
                                               1. - 1e-6)
            loss = binary_cross_entropy(squashed_output,
                                        turing_machine.target_output)

            summaries = []

            gradients = optimizer.compute_gradients(loss)
            for i, (grad, var) in enumerate(gradients):
                if grad is not None:
                    summaries.append(