Пример #1
0
 def setup_network(self, n_features):
     if self.net_type == 'lstm':
         self.network = CharLSTM(self.vocab_size, self.net_hidden, self.batch_size)
     elif self.net_type == 'irnn':
         self.network = CharIRNN(self.vocab_size, self.net_hidden, self.batch_size)
     else:
         error("Unknown net_type")
     self.reset_accum_loss()
Пример #2
0
 def setup_network(self, n_features):
     if self.net_type == 'lstm':
         self.network = CharLSTM(self.vocab_size, self.net_hidden,
                                 self.batch_size)
     elif self.net_type == 'irnn':
         self.network = CharIRNN(self.vocab_size, self.net_hidden,
                                 self.batch_size)
     else:
         error("Unknown net_type")
     self.reset_accum_loss()
Пример #3
0
class RNNCharEstimator(ChainerClassifier):
    def __init__(self, net_type='lstm', net_hidden=100,
                       vocab_size=1000, dropout_ratio=0.0, seq_size=70, grad_clip=100.0,
                       **params):
        ChainerClassifier.__init__(self, **params)
        self.net_hidden    = net_hidden
        self.net_type      = net_type
        self.vocab_size    = vocab_size
        self.dropout_ratio = dropout_ratio
        self.seq_size      = seq_size
        self.grad_clip     = grad_clip
        self.param_names.append('vocab_size')
        self.param_names.append('net_type')
        self.param_names.append('net_hidden')
        self.param_names.append('dropout_ratio')

    def setup_network(self, n_features):
        if self.net_type == 'lstm':
            self.network = CharLSTM(self.vocab_size, self.net_hidden, self.batch_size)
        elif self.net_type == 'irnn':
            self.network = CharIRNN(self.vocab_size, self.net_hidden, self.batch_size)
        else:
            error("Unknown net_type")
        self.reset_accum_loss()

    def reset_accum_loss(self):
        if self.gpu >= 0:
            self.accum_loss = Variable(cuda.zeros(()))
        else:
            self.accum_loss = Variable(np.zeros(()))

    def forward_train(self, x, t):
        return self.network.train(x, t, dropout_ratio=self.dropout_ratio)

    def predict(self, x_data):
        self.network.reset_state(1)
        if self.gpu >= 0:
            self.network.to_gpu()
            x_data = cuda.to_gpu(x_data)
        results = None
        for i in xrange(x_data.shape[0]):
            x = Variable(x_data[i,:])
            y = self.network.predict(x)
            if results == None:
                results = cuda.to_cpu(y.data)
            else:
                results = np.concatenate([results, cuda.to_cpu(y.data)])
        results = results.argmax(1)
        return results

    def fit_update(self, loss, batch_id):
        self.accum_loss += loss
        if ((batch_id + 1) % self.seq_size) == 0: # Run Truncated BPTT
            self.optimizer.zero_grads()
            self.accum_loss.backward()
            self.accum_loss.unchain_backward()  # truncate
            self.optimizer.clip_grads(self.grad_clip)
            self.optimizer.update()
            self.reset_accum_loss()

    def make_batch(self, x_data, y_data, batch_id):
        batch_num = self.n_samples / self.batch_size
        x_batch = np.array([x_data[(batch_id + batch_num * j) % self.n_samples]
                            for j in xrange(self.batch_size)]).reshape(self.batch_size)
        y_batch = np.array([y_data[(batch_id + batch_num * j) % self.n_samples]
                            for j in xrange(self.batch_size)])
        return x_batch, y_batch
Пример #4
0
class RNNCharEstimator(ChainerClassifier):
    def __init__(self,
                 net_type='lstm',
                 net_hidden=100,
                 vocab_size=1000,
                 dropout_ratio=0.0,
                 seq_size=70,
                 grad_clip=100.0,
                 **params):
        ChainerClassifier.__init__(self, **params)
        self.net_hidden = net_hidden
        self.net_type = net_type
        self.vocab_size = vocab_size
        self.dropout_ratio = dropout_ratio
        self.seq_size = seq_size
        self.grad_clip = grad_clip
        self.param_names.append('vocab_size')
        self.param_names.append('net_type')
        self.param_names.append('net_hidden')
        self.param_names.append('dropout_ratio')

    def setup_network(self, n_features):
        if self.net_type == 'lstm':
            self.network = CharLSTM(self.vocab_size, self.net_hidden,
                                    self.batch_size)
        elif self.net_type == 'irnn':
            self.network = CharIRNN(self.vocab_size, self.net_hidden,
                                    self.batch_size)
        else:
            error("Unknown net_type")
        self.reset_accum_loss()

    def reset_accum_loss(self):
        if self.gpu >= 0:
            self.accum_loss = Variable(cuda.zeros(()))
        else:
            self.accum_loss = Variable(np.zeros(()))

    def forward_train(self, x, t):
        return self.network.train(x, t, dropout_ratio=self.dropout_ratio)

    def predict(self, x_data):
        self.network.reset_state(1)
        if self.gpu >= 0:
            self.network.to_gpu()
            x_data = cuda.to_gpu(x_data)
        results = None
        for i in xrange(x_data.shape[0]):
            x = Variable(x_data[i, :])
            y = self.network.predict(x)
            if results == None:
                results = cuda.to_cpu(y.data)
            else:
                results = np.concatenate([results, cuda.to_cpu(y.data)])
        results = results.argmax(1)
        return results

    def fit_update(self, loss, batch_id):
        self.accum_loss += loss
        if ((batch_id + 1) % self.seq_size) == 0:  # Run Truncated BPTT
            self.optimizer.zero_grads()
            self.accum_loss.backward()
            self.accum_loss.unchain_backward()  # truncate
            self.optimizer.clip_grads(self.grad_clip)
            self.optimizer.update()
            self.reset_accum_loss()

    def make_batch(self, x_data, y_data, batch_id):
        batch_num = self.n_samples / self.batch_size
        x_batch = np.array([
            x_data[(batch_id + batch_num * j) % self.n_samples]
            for j in xrange(self.batch_size)
        ]).reshape(self.batch_size)
        y_batch = np.array([
            y_data[(batch_id + batch_num * j) % self.n_samples]
            for j in xrange(self.batch_size)
        ])
        return x_batch, y_batch
Пример #5
0
n_epochs = args.epochs
n_units = args.rnn_size
batchsize = args.batchsize
bprop_len = args.seq_length
grad_clip = args.grad_clip

train_data, words, vocab = load_data(args)
pickle.dump(vocab, open('%s/vocab.bin' % args.data_dir, 'wb'))

if len(args.init_from) > 0:
    model = pickle.load(open(args.init_from, 'rb'))
else:
    if args.net == 'lstm':
        model = CharLSTM(len(vocab), n_units)
    elif args.net == 'irnn':
        model = CharIRNN(len(vocab), n_units)
    else:
        error("unknown net")

state = model.make_initial_state(n_units, batchsize=batchsize)

if args.gpu >= 0:
    cuda.init()
    model.to_gpu()

if args.optimizer == 'rmsprop':
    optimizer = optimizers.RMSprop(lr=args.learning_rate,
                                   alpha=args.decay_rate)
elif args.optimizer == 'adam':
    optimizer = optimizers.Adam()
elif args.optimizers == 'adagrad':