Exemplo n.º 1
0
 def to_file(self, fout):
     pickle.dump(self.iters, fout)
     pickle.dump(self.costs, fout)
     pickle.dump(self.expcosts, fout)
     if self.mom > 0:
         pickle.dump([as_np(self.vel[k]) for k in self.model.param_keys],
                     fout)
Exemplo n.º 2
0
def sample_continuation(s, model, order, alpha=1.0):
    if MODEL_TYPE == 'rnn':
        data = array(np.array([char_inds[w] for w in s[-1:]])).reshape(-1, 1)
    else:
        data = array(np.array([char_inds[w] for w in s[-order+1:]])).reshape(-1, 1)

    data = one_hot(data, model.hps.output_size)
    if MODEL_TYPE == 'rnn':
        _, probs = model.cost_and_grad(data, None, prev_h0=model.last_h)
        probs = np.squeeze(as_np(probs))
    else:
        data = data.reshape((-1, data.shape[2]))
        _, probs = model.cost_and_grad(data, None)
    probs = probs.ravel()

    # Higher alpha -> more and more like most likely sequence
    probs = probs ** alpha
    probs = probs / sum(probs)

    w = np.random.choice(range(model.hps.output_size), p=probs)
    char = chars[w]

    return char
Exemplo n.º 3
0
Arquivo: rnn.py Projeto: comadan/nn
    def cost_and_grad(self, data, labels, back=True, prev_h0=None):
        hps = self.hps
        T = data.shape[1]
        bsize = data.shape[2]

        # FIXME gnumpy reallocates if try and use same parameters?
        #us = self.us[:, 0:T, 0:bsize]
        #dus = self.dus[:, 0:T, 0:bsize]
        #hs = self.hs[:, 0:T, 0:bsize]
        #dhs = self.dhs[:, 0:T, 0:bsize]
        #probs = self.probs[:, 0:T, 0:bsize]
        #dprobs = self.dprobs[:, 0:T, 0:bsize]
        #costs = self.costs[0:T, 0:bsize]

        us = list()
        dus = list()
        hs = list()
        dhs = list()
        h0 = list()
        for k in xrange(hps.hidden_layers):
            us.append(list())
            dus.append(list())
            hs.append(list())
            dhs.append(list())
            h0.append(empty((hps.hidden_size, bsize)))
            for t in xrange(T):
                us[k].append(zeros((hps.hidden_size, bsize)))
                dus[k].append(zeros((hps.hidden_size, bsize)))
                hs[k].append(zeros((hps.hidden_size, bsize)))
                dhs[k].append(zeros((hps.hidden_size, bsize)))
        probs = list()
        for t in xrange(T):
            probs.append(zeros((hps.output_size, bsize)))
        costs = np.zeros((T, bsize))
        if prev_h0 is not None:
            h0 = prev_h0
        else:
            for k in xrange(hps.hidden_layers):
                h0[k] = tile(self.params['h0'][:, k].reshape(-1, 1), bsize)
        bih = self.params['bih']
        Wih = self.params['Wih']
        Whh = self.params['Whh']
        bhh = self.params['bhh']
        Who = self.params['Who']
        bho = self.params['bho']

        # Forward prop

        for t in xrange(T):
            for k in xrange(hps.hidden_layers):
                if t == 0:
                    hprev = h0[k]
                else:
                    hprev = hs[k][t-1]

                if k == 0:
                    us[k][t] = mult(Wih, data[:, t, :]) + bih
                else:
                    us[k][t] = mult(self.params['Wh%d' % k], hs[k-1][t])

                if k == hps.recurrent_layer - 1:
                    us[k][t] += mult(Whh, hprev) + bhh
                    # Clip maximum activation
                    mask = us[k][t] < hps.max_act
                    us[k][t] = us[k][t] * mask + hps.max_act * (1 - mask)
                elif k != 0:
                    us[k][t] += self.params['bh%d' % k]

                hs[k][t] = self.nl(us[k][t])

            probs[t] = softmax(mult(Who, hs[-1][t]) + bho)

        self.last_h = list()
        for k in xrange(hps.hidden_layers):
            self.last_h.append(hs[k][-1])

        if labels is None:
            return None, probs

        probs_neg_log = list()
        dprobs = list()
        for t in xrange(T):
            probs_neg_log.append(as_np(-1 * log(probs[t])))
            dprobs.append(as_np(probs[t].copy()))
        for k in xrange(bsize):
            for t in xrange(len(labels[k])):
                costs[t, k] = probs_neg_log[t][labels[k][t], k]
                dprobs[t][labels[k][t], k] -= 1
        for t in xrange(T):
            dprobs[t] = array(dprobs[t])

        # NOTE Summing costs over time
        # NOTE FIXME Dividing by T to get better sense if objective
        # is decreasing, remove for grad checking
        cost = costs.sum() / bsize / float(T)
        if not back:
            return cost, probs

        # Backprop

        for k in self.grads:
            self.grads[k][:] = 0

        for t in reversed(xrange(T)):
            self.grads['bho'] += dprobs[t][:, :].sum(axis=-1).reshape((-1, 1)) / bsize
            self.grads['Who'] += mult(dprobs[t], hs[-1][t].T) / bsize

            for k in reversed(xrange(hps.hidden_layers)):
                if k == hps.hidden_layers - 1:
                    dhs[k][t] += mult(Who.T, dprobs[t])
                else:
                    dhs[k][t] += mult(self.params['Wh%d' % (k+1)].T, dhs[k+1][t])
                dus[k][t] += get_nl_grad(self.hps.nl, us[k][t]) * dhs[k][t]

                if k > 0:
                    self.grads['Wh%d' % k] += mult(dus[k][t], hs[k-1][t].T) / bsize
                    self.grads['bh%d' % k] += dus[k][t].sum(axis=-1).reshape((-1, 1)) / bsize

                if k == hps.recurrent_layer - 1:
                    if t == 0:
                        hprev = h0[k]
                        self.grads['h0'][:, k] = mult(Whh.T, dus[k][t]).sum(axis=-1) / bsize
                    else:
                        hprev = hs[k][t-1]
                        dhs[k][t-1] = mult(Whh.T, dus[k][t])
                    self.grads['Whh'] += mult(dus[k][t], hprev.T) / bsize
                    self.grads['bhh'] += dus[k][t].sum(axis=-1).reshape((-1, 1)) / bsize

            self.grads['Wih'] += mult(dus[0][t], data[:, t, :].T) / bsize
            self.grads['bih'] += dus[0][t].sum(axis=-1).reshape((-1, 1)) / bsize

        return cost, self.grads
Exemplo n.º 4
0
    def cost_and_grad(self, data, labels, back=True, prev_h0=None):
        hps = self.hps
        T = data.shape[1]
        bsize = data.shape[2]

        # FIXME gnumpy reallocates if try and use same parameters?
        #us = self.us[:, 0:T, 0:bsize]
        #dus = self.dus[:, 0:T, 0:bsize]
        #hs = self.hs[:, 0:T, 0:bsize]
        #dhs = self.dhs[:, 0:T, 0:bsize]
        #probs = self.probs[:, 0:T, 0:bsize]
        #dprobs = self.dprobs[:, 0:T, 0:bsize]
        #costs = self.costs[0:T, 0:bsize]

        us = list()
        dus = list()
        hs = list()
        dhs = list()
        h0 = list()
        for k in xrange(hps.hidden_layers):
            us.append(list())
            dus.append(list())
            hs.append(list())
            dhs.append(list())
            h0.append(empty((hps.hidden_size, bsize)))
            for t in xrange(T):
                us[k].append(zeros((hps.hidden_size, bsize)))
                dus[k].append(zeros((hps.hidden_size, bsize)))
                hs[k].append(zeros((hps.hidden_size, bsize)))
                dhs[k].append(zeros((hps.hidden_size, bsize)))
        probs = list()
        for t in xrange(T):
            probs.append(zeros((hps.output_size, bsize)))
        costs = np.zeros((T, bsize))
        if prev_h0 is not None:
            h0 = prev_h0
        else:
            for k in xrange(hps.hidden_layers):
                h0[k] = tile(self.params['h0'][:, k].reshape(-1, 1), bsize)
        bih = self.params['bih']
        Wih = self.params['Wih']
        Whh = self.params['Whh']
        bhh = self.params['bhh']
        Who = self.params['Who']
        bho = self.params['bho']

        # Forward prop

        for t in xrange(T):
            for k in xrange(hps.hidden_layers):
                if t == 0:
                    hprev = h0[k]
                else:
                    hprev = hs[k][t - 1]

                if k == 0:
                    us[k][t] = mult(Wih, data[:, t, :]) + bih
                else:
                    us[k][t] = mult(self.params['Wh%d' % k], hs[k - 1][t])

                if k == hps.recurrent_layer - 1:
                    us[k][t] += mult(Whh, hprev) + bhh
                    # Clip maximum activation
                    mask = us[k][t] < hps.max_act
                    us[k][t] = us[k][t] * mask + hps.max_act * (1 - mask)
                elif k != 0:
                    us[k][t] += self.params['bh%d' % k]

                hs[k][t] = self.nl(us[k][t])

            probs[t] = softmax(mult(Who, hs[-1][t]) + bho)

        self.last_h = list()
        for k in xrange(hps.hidden_layers):
            self.last_h.append(hs[k][-1])

        if labels is None:
            return None, probs

        probs_neg_log = list()
        dprobs = list()
        for t in xrange(T):
            probs_neg_log.append(as_np(-1 * log(probs[t])))
            dprobs.append(as_np(probs[t].copy()))
        for k in xrange(bsize):
            for t in xrange(len(labels[k])):
                costs[t, k] = probs_neg_log[t][labels[k][t], k]
                dprobs[t][labels[k][t], k] -= 1
        for t in xrange(T):
            dprobs[t] = array(dprobs[t])

        # NOTE Summing costs over time
        # NOTE FIXME Dividing by T to get better sense if objective
        # is decreasing, remove for grad checking
        cost = costs.sum() / bsize / float(T)
        if not back:
            return cost, probs

        # Backprop

        for k in self.grads:
            self.grads[k][:] = 0

        for t in reversed(xrange(T)):
            self.grads['bho'] += dprobs[t][:, :].sum(axis=-1).reshape(
                (-1, 1)) / bsize
            self.grads['Who'] += mult(dprobs[t], hs[-1][t].T) / bsize

            for k in reversed(xrange(hps.hidden_layers)):
                if k == hps.hidden_layers - 1:
                    dhs[k][t] += mult(Who.T, dprobs[t])
                else:
                    dhs[k][t] += mult(self.params['Wh%d' % (k + 1)].T,
                                      dhs[k + 1][t])
                dus[k][t] += get_nl_grad(self.hps.nl, us[k][t]) * dhs[k][t]

                if k > 0:
                    self.grads['Wh%d' %
                               k] += mult(dus[k][t], hs[k - 1][t].T) / bsize
                    self.grads['bh%d' % k] += dus[k][t].sum(axis=-1).reshape(
                        (-1, 1)) / bsize

                if k == hps.recurrent_layer - 1:
                    if t == 0:
                        hprev = h0[k]
                        self.grads['h0'][:, k] = mult(
                            Whh.T, dus[k][t]).sum(axis=-1) / bsize
                    else:
                        hprev = hs[k][t - 1]
                        dhs[k][t - 1] = mult(Whh.T, dus[k][t])
                    self.grads['Whh'] += mult(dus[k][t], hprev.T) / bsize
                    self.grads['bhh'] += dus[k][t].sum(axis=-1).reshape(
                        (-1, 1)) / bsize

            self.grads['Wih'] += mult(dus[0][t], data[:, t, :].T) / bsize
            self.grads['bih'] += dus[0][t].sum(axis=-1).reshape(
                (-1, 1)) / bsize

        return cost, self.grads
Exemplo n.º 5
0
    def cost_and_grad(self, data, labels, back=True):
        hps = self.hps
        grads = self.grads

        # May not be full batch size if at end of dataset
        bsize = data.shape[-1]

        p = ParamStruct(**self.params)

        # Forward prop

        acts = list()
        acts.append(self.nl(mult(p.Wih, data) + p.bih))

        for k in xrange(hps.hidden_layers - 1):
            W = self.params['W%d' % (k + 1)]
            b = self.params['b%d' % (k + 1)]
            acts.append(self.nl(mult(W, acts[-1]) + b))

        y = mult(p.Who, acts[-1]) + p.bho
        probs = softmax(y)

        if labels is None:
            return None, probs

        # NOTE For more precision if necessary convert to nparray early
        cost_array = np.empty(bsize, dtype=np.float64)
        # Speed things up by doing assignments off gpu
        neg_log_prob = -1 * np.log(as_np(probs))
        for k in xrange(bsize):
            cost_array[k] = neg_log_prob[labels[k], k]
        cost = cost_array.sum() / bsize

        if not back:
            return cost, probs

        # Backprop

        for k in self.grads:
            self.grads[k][:] = 0

        # Do assignments off GPU to speed things up
        dLdy = as_np(probs)
        # NOTE This changes probs
        for k in xrange(bsize):
            dLdy[labels[k], k] -= 1
        dLdy = array(dLdy)

        grads['bho'] = dLdy.sum(axis=1).reshape((-1, 1))
        grads['Who'] = mult(dLdy, acts[-1].T)
        Ws = [p.Wih] + [
            self.params['W%d' % (k + 1)] for k in xrange(hps.hidden_layers - 1)
        ] + [p.Who]
        deltas = [dLdy]

        for k in reversed(xrange(hps.hidden_layers - 1)):
            delta = get_nl_grad(self.hps.nl, acts[k + 1]) * mult(
                Ws[k + 2].T, deltas[-1])
            deltas.append(delta)
            grads['b%d' % (k + 1)] = delta.sum(axis=1).reshape((-1, 1))
            grads['W%d' % (k + 1)] = mult(delta, acts[k].T)

        delta = get_nl_grad(self.hps.nl, acts[0]) * mult(Ws[1].T, deltas[-1])
        grads['bih'] = delta.sum(axis=1).reshape((-1, 1))
        grads['Wih'] = mult(delta, data.T)

        # Normalize
        for k in self.grads:
            self.grads[k] /= bsize

        return cost, self.grads
Exemplo n.º 6
0
Arquivo: dnn.py Projeto: comadan/nn
    def cost_and_grad(self, data, labels, back=True):
        hps = self.hps
        grads = self.grads

        # May not be full batch size if at end of dataset
        bsize = data.shape[-1]

        p = ParamStruct(**self.params)

        # Forward prop

        acts = list()
        acts.append(self.nl(mult(p.Wih, data) + p.bih))

        for k in xrange(hps.hidden_layers - 1):
            W = self.params['W%d' % (k+1)]
            b = self.params['b%d' % (k+1)]
            acts.append(self.nl(mult(W, acts[-1]) + b))

        y = mult(p.Who, acts[-1]) + p.bho
        probs = softmax(y)

        if labels is None:
            return None, probs

        # NOTE For more precision if necessary convert to nparray early
        cost_array = np.empty(bsize, dtype=np.float64)
        # Speed things up by doing assignments off gpu
        neg_log_prob = -1 * np.log(as_np(probs))
        for k in xrange(bsize):
            cost_array[k] = neg_log_prob[labels[k], k]
        cost = cost_array.sum() / bsize

        if not back:
            return cost, probs

        # Backprop

        for k in self.grads:
            self.grads[k][:] = 0

        # Do assignments off GPU to speed things up
        dLdy = as_np(probs)
        # NOTE This changes probs
        for k in xrange(bsize):
            dLdy[labels[k], k] -= 1
        dLdy = array(dLdy)

        grads['bho'] = dLdy.sum(axis=1).reshape((-1, 1))
        grads['Who'] = mult(dLdy, acts[-1].T)
        Ws = [p.Wih] + [self.params['W%d' % (k+1)] for k in xrange(hps.hidden_layers - 1)] + [p.Who]
        deltas = [dLdy]

        for k in reversed(xrange(hps.hidden_layers - 1)):
            delta = get_nl_grad(self.hps.nl, acts[k+1]) * mult(Ws[k + 2].T, deltas[-1])
            deltas.append(delta)
            grads['b%d' % (k+1)] = delta.sum(axis=1).reshape((-1, 1))
            grads['W%d' % (k+1)] = mult(delta, acts[k].T)

        delta = get_nl_grad(self.hps.nl, acts[0]) * mult(Ws[1].T, deltas[-1])
        grads['bih'] = delta.sum(axis=1).reshape((-1, 1))
        grads['Wih'] = mult(delta, data.T)

        # Normalize
        for k in self.grads:
            self.grads[k] /= bsize

        return cost, self.grads
Exemplo n.º 7
0
 def to_file(self, fout):
     logger.info('Saving state')
     pickle.dump([as_np(self.params[k]) for k in self.param_keys], fout)
     self.opt.to_file(fout)
Exemplo n.º 8
0
    model_hps = NCLMHyperparams()
    opt_hps = OptimizerHyperparams()
    model_hps.set_from_dict(cfg)
    opt_hps.set_from_dict(cfg)
    cfg = CfgStruct(**cfg)

    # Load dataset
    #dataset = BrownCorpus(model_hps.context_size, model_hps.batch_size, subset='dev')
    dataset = CharCorpus(model_hps.context_size,
                         model_hps.batch_size,
                         subset='dev')

    # Construct network
    model = NCLM(dataset, model_hps, opt_hps, train=False, opt='nag')
    # Load parameters
    with open(pjoin(os.path.dirname(args.cfg_file), 'params.pk'), 'rb') as fin:
        model.from_file(fin)

    embeddings = as_np(model.params['C']).T
    # NOTE Normalizing
    embeddings = embeddings / np.sqrt(np.sum(np.square(embeddings),
                                             axis=1)).reshape((-1, 1))
    tree = KDTree(embeddings, leaf_size=30, metric='euclidean')
    #query = embeddings[model.dset.word_inds['king'], :]
    query = embeddings[model.dset.char_inds['e'], :]
    # PARAM
    dists, inds = tree.query(query, k=10)
    for dist, ind in zip(dists.ravel(), inds.ravel()):
        #print model.dset.words[ind], dist
        print model.dset.chars[ind], dist
Exemplo n.º 9
0
Arquivo: models.py Projeto: comadan/nn
 def to_file(self, fout):
     logger.info('Saving state')
     pickle.dump([as_np(self.params[k]) for k in self.param_keys], fout)
     self.opt.to_file(fout)
Exemplo n.º 10
0
Arquivo: test.py Projeto: xiamike/nn
    else:
        params_file = pjoin(os.path.dirname(args.cfg_file), "params.pk")
    logger.info("Loading params from %s" % params_file)
    with open(params_file, "rb") as fin:
        model.from_file(fin)

    likelihoods = None
    labels = None
    it = 0
    while dataset.data_left():
        cost, probs = model.run(back=False)

        if MODEL_TYPE == "rnn":
            llt = np.zeros((probs[0].shape[0], len(probs), probs[0].shape[1]))
            for t in xrange(len(probs)):
                llt[:, t, :] = as_np(probs[t])

            # Deal with sequences in batch being of different lengths
            ll = llt[:, 0 : len(model.dset.batch_labels[0]), 0].reshape((llt.shape[0], -1))
            j = 1
            for sl in model.dset.batch_labels[1:]:
                ll = np.hstack((ll, llt[:, 0 : len(sl), j].reshape(llt.shape[0], -1)))
                j += 1

            y = np.array([i for sl in model.dset.batch_labels for i in sl])
        else:
            ll = as_np(probs)
            y = as_np(model.dset.batch_labels)

        if likelihoods is None:
            likelihoods = ll
Exemplo n.º 11
0
    else:
        params_file = pjoin(os.path.dirname(args.cfg_file), 'params.pk')
    logger.info('Loading params from %s' % params_file)
    with open(params_file, 'rb') as fin:
        model.from_file(fin)

    likelihoods = None
    labels = None
    it = 0
    while dataset.data_left():
        cost, probs = model.run(back=False)

        if MODEL_TYPE == 'rnn':
            llt = np.zeros((probs[0].shape[0], len(probs), probs[0].shape[1]))
            for t in xrange(len(probs)):
                llt[:, t, :] = as_np(probs[t])

            # Deal with sequences in batch being of different lengths
            ll = llt[:, 0:len(model.dset.batch_labels[0]), 0].reshape(
                (llt.shape[0], -1))
            j = 1
            for sl in model.dset.batch_labels[1:]:
                ll = np.hstack((ll, llt[:, 0:len(sl),
                                        j].reshape(llt.shape[0], -1)))
                j += 1

            y = np.array([i for sl in model.dset.batch_labels for i in sl])
        else:
            ll = as_np(probs)
            y = as_np(model.dset.batch_labels)
Exemplo n.º 12
0
    args = parser.parse_args()

    cfg = load_config(args.cfg_file)
    model_hps = NCLMHyperparams()
    opt_hps = OptimizerHyperparams()
    model_hps.set_from_dict(cfg)
    opt_hps.set_from_dict(cfg)
    cfg = CfgStruct(**cfg)

    # Load dataset
    #dataset = BrownCorpus(model_hps.context_size, model_hps.batch_size, subset='dev')
    dataset = CharCorpus(model_hps.context_size, model_hps.batch_size, subset='dev')

    # Construct network
    model = NCLM(dataset, model_hps, opt_hps, train=False, opt='nag')
    # Load parameters
    with open(pjoin(os.path.dirname(args.cfg_file), 'params.pk'), 'rb') as fin:
        model.from_file(fin)

    embeddings = as_np(model.params['C']).T
    # NOTE Normalizing
    embeddings = embeddings / np.sqrt(np.sum(np.square(embeddings), axis=1)).reshape((-1, 1))
    tree = KDTree(embeddings, leaf_size=30, metric='euclidean')
    #query = embeddings[model.dset.word_inds['king'], :]
    query = embeddings[model.dset.char_inds['e'], :]
    # PARAM
    dists, inds = tree.query(query, k=10)
    for dist, ind in zip(dists.ravel(), inds.ravel()):
        #print model.dset.words[ind], dist
        print model.dset.chars[ind], dist
Exemplo n.º 13
0
Arquivo: mom.py Projeto: comadan/nn
 def to_file(self, fout):
     pickle.dump(self.iters, fout)
     pickle.dump(self.costs, fout)
     pickle.dump(self.expcosts, fout)
     if self.mom > 0:
         pickle.dump([as_np(self.vel[k]) for k in self.model.param_keys], fout)