Python Adagrad примеры использования

Язык программирования: Python

Пространство имен/Пакет: adagrad

Класс/Тип: Adagrad

Примеров на hotexamples.com: 9

Python Adagrad - 9 примеров найдено. Это лучшие примеры Python кода для adagrad.Adagrad, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Adagrad(6)

gradient_descent(1)

reset_weights(1)

train(1)

Пример #1

Показать файл

def main(args):
    data = TextData(args.source)
    if args.checkpoint:
        rnn = load_crnn(args.checkpoint)
    else:
        rnn = CharRNN(in_out_size=data.num_classes, state_size=args.state)
    opt = Adagrad(rnn, 0.1, stateful=True, clip=5)

    setup_plot()

    sequence_pairs = list(data.get_seqs(25))
    print('Training on {}:\n'
          '- {} total chars\n'
          '- {} unique chars\n'
          '- {} sequences of length 25'.format(args.source, data.tot_chars,
                                               data.num_classes,
                                               len(sequence_pairs)))
    opt.train(sequence_pairs,
              epochs=40,
              callback=partial(callback, data=data, start=time.time()),
              callback_every=4321,
              epoch_callback=epoch_callback)

    plt.savefig('plots/{}.png'.format(basename(args.source)))

Пример #2

Показать файл

Файл: dependencyRNN.py Проект: jxWho/hw3

    def __init__(self, d, V, r, answer_idxs, embeddings=None, seed=0):
        '''
        d = dimensionality of embeddings
        V = size of vocabulary
        r = number of dependency relations
        answer_idxs = list of indices into the embeddings matrix for all the answers
        embeddings = pre-trained word embeddings
        seed = for random number generator for reproducivility
        '''
        
        self.d = d

        rnge = sqrt(6) / sqrt(201)
        rnge_we = sqrt(6) / sqrt(51)

        np.random.seed(seed)
        
        #|V| x d embedding matrix
        if embeddings is None:
            self.We = theano.shared(name='embeddings',
                                    value=np.random.rand(V, d) * 2 * rnge_we - rnge_we
                                    ).astype(theano.config.floatX)
        else:
            self.We = theano.shared(name='embeddings',
                                    value=embeddings
                                    ).astype(theano.config.floatX)
            
        #r x d x d tensor (matrix for each dependency relation)
        self.Wr = theano.shared(name='dependencies',
                                value=np.random.rand(r, d, d) * 2 * rnge - rnge
                                ).astype(theano.config.floatX)
        
        #d x d map from embedding to hidden vector
        self.Wv = theano.shared(name='Wv',
                                value=np.random.rand(d, d) * 2 * rnge - rnge
                                ).astype(theano.config.floatX)
        
        #d long bias vector
        self.b = theano.shared(name='b',
                               value=np.zeros(d, dtype=theano.config.floatX))
        
        self.params = [self.We, self.Wr, self.Wv, self.b]
        
        self.answer_idxs = np.array(answer_idxs, dtype=np.int32)
        self.ans_probabilities = np.ones(self.answer_idxs.shape[0])/(self.answer_idxs.shape[0]-1)
        self.ans_lookup = {j:i for i,j in enumerate(self.answer_idxs)}
        self._answers = {}
        
        self.descender = Adagrad(self.params)

        def normalized_tanh(x):
            '''returns tanh(x) / ||tanh(x)||'''
            temp = T.tanh(x)
            return T.tanh(x)/ T.sqrt( (temp ** 2).sum() )

        self.f = normalized_tanh


        def helperFunction(x_z, prior_result, x_c, h_n):
            return prior_result + T.maximum(0.0, (1.0 - T.dot(x_c, h_n) ) + T.dot(x_z, h_n))

        #need to calculate both the input to its parent node and the error at this step
        def recurrence(n, hidden_states, hidden_sums, cost, x, r, p, wrong_ans, corr_ans):
            '''
            function called below by scan over the nodes in the dependency parse

            n - this is the index of the current node
            hidden_states - a list of hidden_states for every node, to be updated
            hidden_sums - sum over the children of dot product of the hidden nodes and the relation matrix
            cost - the total cost so far for this tree
            x - a list of word embeddings (x[n] will access the embedding for the current word)
            r - a list of relation matrices (r[n] will access the current matrix)
            p - a list of parent node indices
            wrong_ans - a list of randomly sampled word embeddings for wrong answers
            corr_ans - the word embedding for the correct answer

            You need to calculate 3 things:
            1) The value of hidden_states[n] : h_n = f(W_v \dot x_n + b + sum_n)
            2) The updated value of hidden_sums[p[n]] : hidden_sums[p[n]] + W_r(n) \dot h_n
            3) The updated cost :
            for a single node, this is \sum_{z \in wrong_ans} max(0, 1 - x_c \dot h_n + x_z \dot h_n)

            you need to return the updates to hidden_states, hidden_sums, and cost
            (in that order)
            '''

            h_n = self.f( T.dot(self.Wv, x[n]) + self.b + hidden_sums[n] )
            newStates = T.set_subtensor( hidden_states[n], h_n )

            #newSum = hidden_sums[ p[n] ] + T.dot(self.Wr[n], h_n)
            newSum = T.dot(r[n], h_n)
            #newSum = hidden_sums[ p[n] ] + T.dot(r[n], h_n)
            newSums = T.set_subtensor( hidden_sums[ p[n] ], newSum)
            # update cost
            rr , updates = theano.scan(
                    fn=helperFunction,
                    sequences=wrong_ans,
                    outputs_info=T.as_tensor_variable(np.asarray(0, theano.config.floatX) ),
                    non_sequences=[corr_ans, h_n]
                    )
            final_result = rr[-1] + cost


            return newStates, newSums, final_result

        idxs = T.ivector('idxs')
        x = self.We[idxs]

        rel_idxs = T.ivector('rel_idxs')
        r = self.Wr[rel_idxs]

        p = T.ivector('parents')

        wrong_idxs = T.ivector('wrong_idxs')
        wrong_ans = self.We[wrong_idxs]

        corr_idx = T.iscalar('corr_idx') # index of answer
        corr_ans = self.We[corr_idx]

        hidden_states = T.zeros((idxs.shape[0], d), dtype=theano.config.floatX)
        #needs to be sent_length + 1 to store final sum
        hidden_sums = T.zeros((idxs.shape[0]+1, d), dtype=theano.config.floatX)

        [h, s, cost], updates = theano.scan(fn=recurrence,
                                            sequences=T.arange(x.shape[0]),
                                            outputs_info=[hidden_states,
                                                          hidden_sums,
                                                          T.as_tensor_variable(np.asarray(0, theano.config.floatX))],
                                            non_sequences=[x, r, p, wrong_ans, corr_ans])
        final_states = h[-1]
        self.states = theano.function(inputs=[idxs, rel_idxs, p, wrong_idxs, corr_idx], outputs=final_states)

        final_cost = cost[-1] #no regularization
        gradients = T.grad(final_cost, self.params)
        self.cost_and_grad = theano.function(inputs=[idxs, rel_idxs, p, wrong_idxs, corr_idx], outputs=[final_cost] + gradients)

Пример #3

Показать файл

Файл: dependencyRNN.py Проект: jxWho/hw3

class DependencyRNN:
    '''
    class for dependency RNN for QANTA
    '''
    def __init__(self, d, V, r, answer_idxs, embeddings=None, seed=0):
        '''
        d = dimensionality of embeddings
        V = size of vocabulary
        r = number of dependency relations
        answer_idxs = list of indices into the embeddings matrix for all the answers
        embeddings = pre-trained word embeddings
        seed = for random number generator for reproducivility
        '''
        
        self.d = d

        rnge = sqrt(6) / sqrt(201)
        rnge_we = sqrt(6) / sqrt(51)

        np.random.seed(seed)
        
        #|V| x d embedding matrix
        if embeddings is None:
            self.We = theano.shared(name='embeddings',
                                    value=np.random.rand(V, d) * 2 * rnge_we - rnge_we
                                    ).astype(theano.config.floatX)
        else:
            self.We = theano.shared(name='embeddings',
                                    value=embeddings
                                    ).astype(theano.config.floatX)
            
        #r x d x d tensor (matrix for each dependency relation)
        self.Wr = theano.shared(name='dependencies',
                                value=np.random.rand(r, d, d) * 2 * rnge - rnge
                                ).astype(theano.config.floatX)
        
        #d x d map from embedding to hidden vector
        self.Wv = theano.shared(name='Wv',
                                value=np.random.rand(d, d) * 2 * rnge - rnge
                                ).astype(theano.config.floatX)
        
        #d long bias vector
        self.b = theano.shared(name='b',
                               value=np.zeros(d, dtype=theano.config.floatX))
        
        self.params = [self.We, self.Wr, self.Wv, self.b]
        
        self.answer_idxs = np.array(answer_idxs, dtype=np.int32)
        self.ans_probabilities = np.ones(self.answer_idxs.shape[0])/(self.answer_idxs.shape[0]-1)
        self.ans_lookup = {j:i for i,j in enumerate(self.answer_idxs)}
        self._answers = {}
        
        self.descender = Adagrad(self.params)

        def normalized_tanh(x):
            '''returns tanh(x) / ||tanh(x)||'''
            temp = T.tanh(x)
            return T.tanh(x)/ T.sqrt( (temp ** 2).sum() )

        self.f = normalized_tanh


        def helperFunction(x_z, prior_result, x_c, h_n):
            return prior_result + T.maximum(0.0, (1.0 - T.dot(x_c, h_n) ) + T.dot(x_z, h_n))

        #need to calculate both the input to its parent node and the error at this step
        def recurrence(n, hidden_states, hidden_sums, cost, x, r, p, wrong_ans, corr_ans):
            '''
            function called below by scan over the nodes in the dependency parse

            n - this is the index of the current node
            hidden_states - a list of hidden_states for every node, to be updated
            hidden_sums - sum over the children of dot product of the hidden nodes and the relation matrix
            cost - the total cost so far for this tree
            x - a list of word embeddings (x[n] will access the embedding for the current word)
            r - a list of relation matrices (r[n] will access the current matrix)
            p - a list of parent node indices
            wrong_ans - a list of randomly sampled word embeddings for wrong answers
            corr_ans - the word embedding for the correct answer

            You need to calculate 3 things:
            1) The value of hidden_states[n] : h_n = f(W_v \dot x_n + b + sum_n)
            2) The updated value of hidden_sums[p[n]] : hidden_sums[p[n]] + W_r(n) \dot h_n
            3) The updated cost :
            for a single node, this is \sum_{z \in wrong_ans} max(0, 1 - x_c \dot h_n + x_z \dot h_n)

            you need to return the updates to hidden_states, hidden_sums, and cost
            (in that order)
            '''

            h_n = self.f( T.dot(self.Wv, x[n]) + self.b + hidden_sums[n] )
            newStates = T.set_subtensor( hidden_states[n], h_n )

            #newSum = hidden_sums[ p[n] ] + T.dot(self.Wr[n], h_n)
            newSum = T.dot(r[n], h_n)
            #newSum = hidden_sums[ p[n] ] + T.dot(r[n], h_n)
            newSums = T.set_subtensor( hidden_sums[ p[n] ], newSum)
            # update cost
            rr , updates = theano.scan(
                    fn=helperFunction,
                    sequences=wrong_ans,
                    outputs_info=T.as_tensor_variable(np.asarray(0, theano.config.floatX) ),
                    non_sequences=[corr_ans, h_n]
                    )
            final_result = rr[-1] + cost


            return newStates, newSums, final_result

        idxs = T.ivector('idxs')
        x = self.We[idxs]

        rel_idxs = T.ivector('rel_idxs')
        r = self.Wr[rel_idxs]

        p = T.ivector('parents')

        wrong_idxs = T.ivector('wrong_idxs')
        wrong_ans = self.We[wrong_idxs]

        corr_idx = T.iscalar('corr_idx') # index of answer
        corr_ans = self.We[corr_idx]

        hidden_states = T.zeros((idxs.shape[0], d), dtype=theano.config.floatX)
        #needs to be sent_length + 1 to store final sum
        hidden_sums = T.zeros((idxs.shape[0]+1, d), dtype=theano.config.floatX)

        [h, s, cost], updates = theano.scan(fn=recurrence,
                                            sequences=T.arange(x.shape[0]),
                                            outputs_info=[hidden_states,
                                                          hidden_sums,
                                                          T.as_tensor_variable(np.asarray(0, theano.config.floatX))],
                                            non_sequences=[x, r, p, wrong_ans, corr_ans])
        final_states = h[-1]
        self.states = theano.function(inputs=[idxs, rel_idxs, p, wrong_idxs, corr_idx], outputs=final_states)

        final_cost = cost[-1] #no regularization
        gradients = T.grad(final_cost, self.params)
        self.cost_and_grad = theano.function(inputs=[idxs, rel_idxs, p, wrong_idxs, corr_idx], outputs=[final_cost] + gradients)

    def gradient_descent(self, new_gradients):
        self.descender.gradient_descent(*new_gradients)

    #batch consists of tuples of word indices, relation indices, parent indices, and an answer index
    def train(self, batch, num_wrong_ans=100):
        total_cost_and_grad = None
        total_nodes = 0.

        #split data into batches, then into minibatches for multiprocessing

        for datum in batch:
            idxs, rel_idxs, p, corr_idx = datum

            #sample new wrong answers for every point (make sure not to sample the correct answer)
            self.ans_probabilities[self.ans_lookup[corr_idx]] = 0
            wrong_idxs = self.answer_idxs[np.random.choice(self.answer_idxs.shape[0],
                                                           num_wrong_ans,
                                                           False,
                                                           self.ans_probabilities)]
            self.ans_probabilities[self.ans_lookup[corr_idx]] = 1./(self.ans_probabilities.shape[0]-1)

            cost_and_grad = self.cost_and_grad(idxs, rel_idxs, p, wrong_idxs, corr_idx)
            if total_cost_and_grad is None:
                total_cost_and_grad = [0] + [np.zeros(i.shape) for i in cost_and_grad[1:]]
            for i in range(len(cost_and_grad)):
                total_cost_and_grad[i] += cost_and_grad[i]
            total_nodes += len(idxs)

        #update gradients from total_cost_and_grad[1:]
        self.gradient_descent([i/total_nodes for i in total_cost_and_grad[1:]])

        #print total_nodes, total_cost_and_grad[0]/total_nodes, total_cost_and_grad[0], total_nodes
        return total_cost_and_grad[0]/total_nodes

    def reset_weights(self):
        self.descender.reset_weights()

    def transform(self, batch, stop_indices=None):
        features = []
        for idxs,rel_idxs,p in batch:
            h = self.states(idxs, rel_idxs, p, [], 0)
            x = np.zeros(self.d)
            count = 0.0
            for i,s in enumerate(h):
                if stop_indices is None or idxs[i] not in stop_indices:
                    x += s
                    count += 1
            features.append(x / count)
            
        return(np.array(features))

    def save(self, filename, answers):
        '''save all the weights and hyperparameters to a file'''
        kwds = {}
        for param in self.params:
            kwds[param.name] = param.get_value()
        kwds['answer_idxs'] = self.answer_idxs
        
        with open(filename, 'wb') as f:
            np.savez(f, **kwds)

        embeddings = self.We.get_value()
        for answer in answers:
            self._answers[answer] = embeddings[answers[answer]].tolist()

        with open(filename + '.json', 'w') as f:
            json.dump(self._answers, f)

    @classmethod
    def load(cls, filename):
        '''load pre-trained weights from a file'''
        with open(filename) as f:
            npzfile = np.load(f)
            
            d = npzfile['embeddings'].shape[1]
            V = npzfile['embeddings'].shape[0]
            r = npzfile['dependencies'].shape[0]

            d = cls(d, V, r, npzfile['answer_idxs'])

            for param in d.params:
                param.set_value(npzfile[param.name])

        with open(filename + '.json') as f:
            d._answers = json.load(f)

        return d

    @property
    def answers(self):
        return self._answers

Пример #4

Показать файл

model.add(Dense(1, activation='sigmoid'))

plot_model(model, to_file='model_imdb.png', show_shapes=True)

results_acc = []
result_acc = []
results_loss = []
result_loss = []
test_acc_results = []
test_loss_results = []
l = [
    Adam(lr=0.001, amsgrad=True),
    AAdam(lr=0.001, amsgrad=True),
    Adam(lr=0.001, amsgrad=False),
    AAdam(lr=0.001, amsgrad=False),
    Adagrad(),
    AAdagrad(),
    SGD(),
    ASGD()
]  #, Adam(lr=0.001, amsgrad = True), AAdam(lr=0.001, amsgrad = True)]

for opt in l:

    model.compile(loss='binary_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'])
    #model.save_weights('initial_weights_imdb.h5')
    model.load_weights('initial_weights_imdb.h5')
    initial_weights = model.get_weights()
    result_acc = []
    result_loss = []

Пример #5

Показать файл

Файл: dependencyRNN.py Проект: PankajMehar/qanta

    def __init__(self, d, V, r, answer_idxs, embeddings=None, seed=0):
        '''
        d = dimensionality of embeddings
        V = size of vocabulary
        r = number of dependency relations
        answer_idxs = list of indices into the embeddings matrix for all the answers
        embeddings = pre-trained word embeddings
        seed = for random number generator for reproducivility
        '''
        
        self.d = d

        rnge = sqrt(6) / sqrt(201)
        rnge_we = sqrt(6) / sqrt(51)

        np.random.seed(seed)
        
        #|V| x d embedding matrix
        if embeddings is None:
            self.We = theano.shared(name='embeddings',
                                    value=np.random.rand(V, d) * 2 * rnge_we - rnge_we
                                    ).astype(theano.config.floatX)
        else:
            self.We = theano.shared(name='embeddings',
                                    value=embeddings
                                    ).astype(theano.config.floatX)
            
        #r x d x d tensor (matrix for each dependency relation)
        self.Wr = theano.shared(name='dependencies',
                                value=np.random.rand(r, d, d) * 2 * rnge - rnge
                                ).astype(theano.config.floatX)
        
        #d x d map from embedding to hidden vector
        self.Wv = theano.shared(name='Wv',
                                value=np.random.rand(d, d) * 2 * rnge - rnge
                                ).astype(theano.config.floatX)
        
        #d long bias vector
        self.b = theano.shared(name='b',
                               value=np.zeros(d, dtype=theano.config.floatX))
        
        self.params =  [self.We, self.Wr, self.Wv, self.b]
        
        self.answer_idxs = np.array(answer_idxs, dtype=np.int32)
        self.ans_probabilities = np.ones(self.answer_idxs.shape[0])/(self.answer_idxs.shape[0]-1)
        self.ans_lookup = {j:i for i,j in enumerate(self.answer_idxs)}
        self._answers = {}
        
        self.descender = Adagrad(self.params)

        def normalized_tanh(x):
            '''returns tanh(x) / ||tanh(x)||'''
            return T.tanh(x)/T.sqrt(T.sum(T.tanh(x)**2))
            
        self.f = normalized_tanh

        #need to calculate both the input to its parent node and the error at this step
        def recurrence(n, hidden_states, hidden_sums, cost, x, r, p, wrong_ans, corr_ans):
            '''
            function called below by scan over the nodes in the dependency parse
            
            n - this is the index of the current node
            hidden_states - a list of hidden_states for every node, to be updated
            hidden_sums - sum over the children of dot product of the hidden nodes and the relation matrix
            cost - the total cost so far for this tree
            x - a list of word embeddings (x[n] will access the embedding for the current word)
            r - a list of relation matrices (r[n] will access the current matrix)
            p - a list of parent node indices
            wrong_ans - a list of randomly sampled word embeddings for wrong answers
            corr_ans - the word embedding for the correct answer
            '''
            h_n = self.f(T.dot(self.Wv, x[n]) + self.b + hidden_sums[n])

            update_new = T.set_subtensor(hidden_sums[p[n]], hidden_sums[p[n]]+ T.dot(r[n], h_n))

            update_hidden_new = T.set_subtensor(hidden_states[n], h_n)

            output, updates = theano.scan(fn = lambda xz, xc, h_n: T.maximum(0., 1.-T.dot(xc, h_n) + T.dot(xz, h_n)),
                                         outputs_info = None, sequences = wrong_ans, non_sequences = [corr_ans, h_n])

            output_sum = T.as_tensor_variable(output.sum())

            new_cost = cost + output_sum

            return update_hidden_new, update_new, new_cost

        idxs = T.ivector('idxs')
        x = self.We[idxs]

        rel_idxs = T.ivector('rel_idxs')
        r = self.Wr[rel_idxs]

        p = T.ivector('parents')

        wrong_idxs = T.ivector('wrong_idxs')
        wrong_ans = self.We[wrong_idxs]

        corr_idx = T.iscalar('corr_idx') # index of answer
        corr_ans = self.We[corr_idx]

        hidden_states = T.zeros((idxs.shape[0], d), dtype=theano.config.floatX)
        #needs to be sent_length + 1 to store final sum
        hidden_sums = T.zeros((idxs.shape[0]+1, d), dtype=theano.config.floatX)

        [h, s, cost], updates = theano.scan(fn=recurrence,
                                            sequences=T.arange(x.shape[0]),
                                            outputs_info=[hidden_states,
                                                          hidden_sums,
                                                          T.as_tensor_variable(np.asarray(0, theano.config.floatX))],
                                            non_sequences=[x, r, p, wrong_ans, corr_ans])
        final_states = h[-1]
        self.states = theano.function(inputs=[idxs, rel_idxs, p, wrong_idxs, corr_idx], outputs=final_states)

        final_cost = cost[-1] #no regularization
        gradients = T.grad(final_cost, self.params)
        self.cost_and_grad = theano.function(inputs=[idxs, rel_idxs, p, wrong_idxs, corr_idx], outputs=[final_cost] + gradients)

Пример #6

Показать файл

Файл: dependencyRNN.py Проект: PankajMehar/qanta

class DependencyRNN:
    '''
    class for dependency RNN for QANTA
    '''
    def __init__(self, d, V, r, answer_idxs, embeddings=None, seed=0):
        '''
        d = dimensionality of embeddings
        V = size of vocabulary
        r = number of dependency relations
        answer_idxs = list of indices into the embeddings matrix for all the answers
        embeddings = pre-trained word embeddings
        seed = for random number generator for reproducivility
        '''
        
        self.d = d

        rnge = sqrt(6) / sqrt(201)
        rnge_we = sqrt(6) / sqrt(51)

        np.random.seed(seed)
        
        #|V| x d embedding matrix
        if embeddings is None:
            self.We = theano.shared(name='embeddings',
                                    value=np.random.rand(V, d) * 2 * rnge_we - rnge_we
                                    ).astype(theano.config.floatX)
        else:
            self.We = theano.shared(name='embeddings',
                                    value=embeddings
                                    ).astype(theano.config.floatX)
            
        #r x d x d tensor (matrix for each dependency relation)
        self.Wr = theano.shared(name='dependencies',
                                value=np.random.rand(r, d, d) * 2 * rnge - rnge
                                ).astype(theano.config.floatX)
        
        #d x d map from embedding to hidden vector
        self.Wv = theano.shared(name='Wv',
                                value=np.random.rand(d, d) * 2 * rnge - rnge
                                ).astype(theano.config.floatX)
        
        #d long bias vector
        self.b = theano.shared(name='b',
                               value=np.zeros(d, dtype=theano.config.floatX))
        
        self.params =  [self.We, self.Wr, self.Wv, self.b]
        
        self.answer_idxs = np.array(answer_idxs, dtype=np.int32)
        self.ans_probabilities = np.ones(self.answer_idxs.shape[0])/(self.answer_idxs.shape[0]-1)
        self.ans_lookup = {j:i for i,j in enumerate(self.answer_idxs)}
        self._answers = {}
        
        self.descender = Adagrad(self.params)

        def normalized_tanh(x):
            '''returns tanh(x) / ||tanh(x)||'''
            return T.tanh(x)/T.sqrt(T.sum(T.tanh(x)**2))
            
        self.f = normalized_tanh

        #need to calculate both the input to its parent node and the error at this step
        def recurrence(n, hidden_states, hidden_sums, cost, x, r, p, wrong_ans, corr_ans):
            '''
            function called below by scan over the nodes in the dependency parse
            
            n - this is the index of the current node
            hidden_states - a list of hidden_states for every node, to be updated
            hidden_sums - sum over the children of dot product of the hidden nodes and the relation matrix
            cost - the total cost so far for this tree
            x - a list of word embeddings (x[n] will access the embedding for the current word)
            r - a list of relation matrices (r[n] will access the current matrix)
            p - a list of parent node indices
            wrong_ans - a list of randomly sampled word embeddings for wrong answers
            corr_ans - the word embedding for the correct answer
            '''
            h_n = self.f(T.dot(self.Wv, x[n]) + self.b + hidden_sums[n])

            update_new = T.set_subtensor(hidden_sums[p[n]], hidden_sums[p[n]]+ T.dot(r[n], h_n))

            update_hidden_new = T.set_subtensor(hidden_states[n], h_n)

            output, updates = theano.scan(fn = lambda xz, xc, h_n: T.maximum(0., 1.-T.dot(xc, h_n) + T.dot(xz, h_n)),
                                         outputs_info = None, sequences = wrong_ans, non_sequences = [corr_ans, h_n])

            output_sum = T.as_tensor_variable(output.sum())

            new_cost = cost + output_sum

            return update_hidden_new, update_new, new_cost

        idxs = T.ivector('idxs')
        x = self.We[idxs]

        rel_idxs = T.ivector('rel_idxs')
        r = self.Wr[rel_idxs]

        p = T.ivector('parents')

        wrong_idxs = T.ivector('wrong_idxs')
        wrong_ans = self.We[wrong_idxs]

        corr_idx = T.iscalar('corr_idx') # index of answer
        corr_ans = self.We[corr_idx]

        hidden_states = T.zeros((idxs.shape[0], d), dtype=theano.config.floatX)
        #needs to be sent_length + 1 to store final sum
        hidden_sums = T.zeros((idxs.shape[0]+1, d), dtype=theano.config.floatX)

        [h, s, cost], updates = theano.scan(fn=recurrence,
                                            sequences=T.arange(x.shape[0]),
                                            outputs_info=[hidden_states,
                                                          hidden_sums,
                                                          T.as_tensor_variable(np.asarray(0, theano.config.floatX))],
                                            non_sequences=[x, r, p, wrong_ans, corr_ans])
        final_states = h[-1]
        self.states = theano.function(inputs=[idxs, rel_idxs, p, wrong_idxs, corr_idx], outputs=final_states)

        final_cost = cost[-1] #no regularization
        gradients = T.grad(final_cost, self.params)
        self.cost_and_grad = theano.function(inputs=[idxs, rel_idxs, p, wrong_idxs, corr_idx], outputs=[final_cost] + gradients)

    def gradient_descent(self, new_gradients):
        self.descender.gradient_descent(*new_gradients)

    #batch consists of tuples of word indices, relation indices, parent indices, and an answer index
    def train(self, batch, num_wrong_ans=100):
        total_cost_and_grad = None
        total_nodes = 0.

        #split data into batches, then into minibatches for multiprocessing

        for datum in batch:
            idxs, rel_idxs, p, corr_idx = datum

            #sample new wrong answers for every point (make sure not to sample the correct answer)
            self.ans_probabilities[self.ans_lookup[corr_idx]] = 0
            wrong_idxs = self.answer_idxs[np.random.choice(self.answer_idxs.shape[0],
                                                           num_wrong_ans,
                                                           False,
                                                           self.ans_probabilities)]
            self.ans_probabilities[self.ans_lookup[corr_idx]] = 1./(self.ans_probabilities.shape[0]-1)

            cost_and_grad = self.cost_and_grad(idxs, rel_idxs, p, wrong_idxs, corr_idx)
            if total_cost_and_grad is None:
                total_cost_and_grad = [0] + [np.zeros(i.shape) for i in cost_and_grad[1:]]
            for i in range(len(cost_and_grad)):
                total_cost_and_grad[i] += cost_and_grad[i]
            total_nodes += len(idxs)

        #update gradients from total_cost_and_grad[1:]
        self.gradient_descent([i/total_nodes for i in total_cost_and_grad[1:]])

        return total_cost_and_grad[0]/total_nodes

    def reset_weights(self):
        self.descender.reset_weights()

    def transform(self, batch, stop_indices=None):
        features = []
        for idxs,rel_idxs,p in batch:
            h = self.states(idxs, rel_idxs, p, [], 0)
            x = np.zeros(self.d)
            count = 0.0
            for i,s in enumerate(h):
                if stop_indices is None or idxs[i] not in stop_indices:
                    x += s
                    count += 1
            features.append(x / count)
            
        return(np.array(features))

    def save(self, filename, answers):
        '''save all the weights and hyperparameters to a file'''
        kwds = {}
        for param in self.params:
            kwds[param.name] = param.get_value()
        kwds['answer_idxs'] = self.answer_idxs
        
        with open(filename, 'wb') as f:
            np.savez(f, **kwds)

        embeddings = self.We.get_value()
        for answer in answers:
            self._answers[answer] = embeddings[answers[answer]].tolist()

        with open(filename + '.json', 'w') as f:
            json.dump(self._answers, f)

    @classmethod
    def load(cls, filename):
        '''load pre-trained weights from a file'''
        with open(filename) as f:
            npzfile = np.load(f)
            
            d = npzfile['embeddings'].shape[1]
            V = npzfile['embeddings'].shape[0]
            r = npzfile['dependencies'].shape[0]

            d = cls(d, V, r, npzfile['answer_idxs'])

            for param in d.params:
                param.set_value(npzfile[param.name])

        with open(filename + '.json') as f:
            d._answers = json.load(f)

        return d

    @property
    def answers(self):
        return self._answers

Пример #7

Показать файл

Файл: mnist_mlp.py Проект: skchao74/gRDA-Optimizer-1

network.add(layers.Dense(512, activation='relu', input_shape=(28 * 28, )))
network.add(layers.Dense(10, activation='softmax'))

results_acc = []
result_acc = []
results_loss = []
result_loss = []
test_acc_results = []
test_loss_results = []
nonzero_weights = []

l = [
    GRDA(lr=.005, c=.02),
    SGD(lr=.005, nesterov=False),
    SGD(lr=.005, nesterov=True),
    Adagrad(lr=.005),
    Adam(lr=.005, amsgrad=False),
    Adam(lr=.005, amsgrad=True)
]
allcounts = np.sum([x.size for x in network.get_weights()])

for opt in l:
    network.compile(optimizer=opt,
                    loss='categorical_crossentropy',
                    metrics=['accuracy'])
    #network.save_weights('initial_weights.h5')
    #network.load_weights('initial_weights.h5')
    #initial_weights = network.get_weights()
    result_acc = []
    result_loss = []
    test_loss = []

Пример #8

Показать файл

Файл: main.py Проект: yangkevin2/count-sketch

model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid,
                       args.nlayers, args.dropout, args.tied,
                       not args.dense).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = None
if args.opt == 'Adam':
    optimizer = Adam_Base(model.parameters(), betas=(0.9, 0.999))
elif args.opt == 'SGD':
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
elif args.opt == 'Adagrad':
    optimizer = torch.optim.Adagrad(model.parameters(), args.lr)
elif args.opt == 'SGD_CS':
    optimizer = SGD(model.parameters(), args.lr, momentum=0.9, nesterov=True)
elif args.opt == 'Adagrad_CS':
    optimizer = Adagrad(model.parameters(), args.lr)
elif args.opt == 'Adam_CS':
    optimizer = Adam(model.parameters(), betas=(0.9, 0.999))
elif args.opt == 'RMSprop_CS':
    optimizer = RMSprop(model.parameters())
elif args.opt == 'Adam_MNK':
    optimizer = Adam_MNK(model.parameters(), betas=(0.9, 0.999))
elif args.opt == 'Adam_MNK_Embed':
    optimizer = Adam_MNK_Embed(model.parameters(), betas=(0.9, 0.999))
elif args.opt == 'Adagrad_SM3':
    optimizer = Adagrad_SM3(model.parameters(), args.lr)
elif args.opt == 'Adagrad_SM3_II_Embed':
    optimizer = Adagrad_SM3_II_Embed(model.parameters(), args.lr)
else:
    assert args.opt == 'Adagrad_SM3_II'
    optimizer = Adagrad_SM3_II(model.parameters(), args.lr)

Пример #9

Показать файл

    def __init__(self, d, V, r, answer_idxs, embeddings=None, seed=0):
        '''
        d = dimensionality of embeddings
        V = size of vocabulary
        r = number of dependency relations
        answer_idxs = list of indices into the embeddings matrix for all the answers
        embeddings = pre-trained word embeddings
        seed = for random number generator for reproducivility
        '''

        self.d = d

        rnge = sqrt(6) / sqrt(201)
        rnge_we = sqrt(6) / sqrt(51)

        np.random.seed(seed)

        #|V| x d embedding matrix
        if embeddings is None:
            self.We = theano.shared(name='embeddings',
                                    value=np.random.rand(V, d) * 2 * rnge_we -
                                    rnge_we).astype(theano.config.floatX)
        else:
            self.We = theano.shared(name='embeddings',
                                    value=embeddings).astype(
                                        theano.config.floatX)

        #r x d x d tensor (matrix for each dependency relation)
        self.Wr = theano.shared(name='dependencies',
                                value=np.random.rand(r, d, d) * 2 * rnge -
                                rnge).astype(theano.config.floatX)

        #d x d map from embedding to hidden vector
        self.Wv = theano.shared(name='Wv',
                                value=np.random.rand(d, d) * 2 * rnge -
                                rnge).astype(theano.config.floatX)

        #d long bias vector
        self.b = theano.shared(name='b',
                               value=np.zeros(d, dtype=theano.config.floatX))

        self.params = [self.Wr, self.Wv, self.b]

        self.answer_idxs = np.array(answer_idxs, dtype=np.int32)
        self.ans_probabilities = np.ones(
            self.answer_idxs.shape[0]) / (self.answer_idxs.shape[0] - 1)
        self.ans_lookup = {j: i for i, j in enumerate(self.answer_idxs)}
        self._answers = {}

        self.descender = Adagrad(self.params)

        def normalized_tanh(x):
            '''returns tanh(x) / ||tanh(x)||'''
            tanh = T.tanh(x)
            #norm_tanh = tanh/(T.sqrt((T.dot(tanh, tanh)).sum()))
            sumt = (tanh**2).sum()
            norm_tanh = tanh / (T.sqrt(sumt))
            #raise NotImplementedError
            return norm_tanh

        self.f = normalized_tanh

        #need to calculate both the input to its parent node and the error at this step
        def recurrence(n, hidden_states, hidden_sums, cost, x, r, p, wrong_ans,
                       corr_ans):
            '''
            function called below by scan over the nodes in the dependency parse
            
            n - this is the index of the current node
            hidden_states - a list of hidden_states for every node, to be updated
            hidden_sums - sum over the children of dot product of the hidden nodes and the relation matrix
            cost - the total cost so far for this tree
            x - a list of word embeddings (x[n] will access the embedding for the current word)
            r - a list of relation matrices (r[n] will access the current matrix)
            p - a list of parent node indices
            wrong_ans - a list of randomly sampled word embeddings for wrong answers
            corr_ans - the word embedding for the correct answer

            You need to calculate 3 things:
            1) The value of hidden_states[n] : h_n = f(W_v \dot x_n + b + sum_n)
            2) The updated value of hidden_sums[p[n]] : hidden_sums[p[n]] + W_r(n) \dot h_n
            3) The updated cost :
            for a single node, this is \sum_{z \in wrong_ans} max(0, 1 - x_c \dot h_n + x_z \dot h_n)
            
            you need to return the updates to hidden_states, hidden_sums, and cost
            (in that order)
            '''
            #raise NotImplementedError

            #hidden_states[n]:
            #h_n = T.set_subtensor(hidden_states[n], self.f(T.dot(self.Wv, x[n]) + self.b + hidden_sums[n]))
            h_n = self.f(T.dot(self.Wv, x[n]) + self.b + hidden_sums[n])

            #updated value of hidden_sums[p[n]]:
            h_p_n = T.set_subtensor(hidden_sums[p[n]],
                                    hidden_sums[p[n]] + T.dot(r[n], h_n))

            #updated cost:
            #cost = T.maximum(0, 1- T.dot(corr_ans, h_n) + T.dot(wrong_ans[z], h_n)
            def costupdate(z, prev, base, h_n):
                return prev + T.maximum(0, base + T.dot(z, h_n))

            base = 1 - T.dot(corr_ans, h_n)
            result, s_updates = theano.scan(fn=costupdate,
                                            sequences=wrong_ans,
                                            outputs_info=T.as_tensor_variable(
                                                np.asarray(
                                                    0, theano.config.floatX)),
                                            non_sequences=[base, h_n])
            x = result[-1] + cost

            return (T.set_subtensor(hidden_states[n], h_n), h_p_n, x)

        idxs = T.ivector('idxs')
        x = self.We[idxs]

        rel_idxs = T.ivector('rel_idxs')
        r = self.Wr[rel_idxs]

        p = T.ivector('parents')

        wrong_idxs = T.ivector('wrong_idxs')
        wrong_ans = self.We[wrong_idxs]

        corr_idx = T.iscalar('corr_idx')  # index of answer
        corr_ans = self.We[corr_idx]

        hidden_states = T.zeros((idxs.shape[0], d), dtype=theano.config.floatX)

        #needs to be sent_length + 1 to store final sum
        hidden_sums = T.zeros((idxs.shape[0] + 1, d),
                              dtype=theano.config.floatX)

        [h, s, cost], updates = theano.scan(
            fn=recurrence,
            sequences=T.arange(x.shape[0]),
            outputs_info=[
                hidden_states, hidden_sums,
                T.as_tensor_variable(np.asarray(0, theano.config.floatX))
            ],
            non_sequences=[x, r, p, wrong_ans, corr_ans])
        final_states = h[-1]
        self.states = theano.function(
            inputs=[idxs, rel_idxs, p, wrong_idxs, corr_idx],
            outputs=final_states)

        final_cost = cost[-1]  #no regularization
        gradients = T.grad(final_cost, self.params)
        self.cost_and_grad = theano.function(
            inputs=[idxs, rel_idxs, p, wrong_idxs, corr_idx],
            outputs=[final_cost] + gradients)