import NN_process
from threading import Thread
import time
import copy

NO_DATA = 4900000
BATCH_SIZE = 100
BATCHES = NO_DATA / BATCH_SIZE
EMBEDDING_DIM = 400
WORDS = 20
INPUT_SHAPE = (400, 20)
OUTPUT_SHAPE = (400, 1)

processor = NN_process.PairProcessor('../data/pairs/enwiki_pairs_20.txt',
                                     '../data/pairs/enwiki_no_pairs_20.txt',
                                     '../data/model/docfreq.npy',
                                     '../data/model/minimal', WORDS,
                                     EMBEDDING_DIM, BATCH_SIZE)
t = Thread(target=processor.process)
t.start()
print 'Start processor thread'

processor.new_epoch()
processor.lock.acquire()
while not processor.ready:
    processor.lock.wait()
processor.lock.release()

processor.lock.acquire()
processor.cont = True
processor.ready = False
Exemplo n.º 2
0
    def run(self, epochs=EPOCHS, learning_rate=LEARNING_RATE, regularization=REGULARIZATION, momentum=MOMENTUM):
        processor = NN_process.PairProcessor('../data/wiki/pairs/sets/enwiki_pairs_' + str(WORDS) + '-train.txt',
                                             '../data/wiki/pairs/sets/enwiki_no_pairs_' + str(WORDS) + '-train.txt',
                                  '../data/wiki/model/docfreq.npy', '../data/wiki/model/minimal', WORDS, EMBEDDING_DIM, BATCH_SIZE)
        train_x1 = theano.shared(value=processor.x1, name='train_x1', borrow=False)
        train_x2 = theano.shared(value=processor.x2, name='train_x2', borrow=False)
        train_y = theano.shared(value=processor.y, name='train_y', borrow=False)
        train_z = theano.shared(value=processor.z, name='train_z', borrow=False)

        print 'Initializing train function...'
        train = self.train_function_momentum(train_x1, train_x2, train_y, train_z)

        t = Thread(target=processor.process)
        t.daemon = True
        t.start()

        import signal
        def signal_handler(signal, frame):
            import os
            os._exit(0)
        signal.signal(signal.SIGINT, signal_handler)

        for e in xrange(epochs):
            processor.new_epoch()

            processor.lock.acquire()
            while not processor.ready:
                processor.lock.wait()
            processor.lock.release()

            train_x1.set_value(processor.x1, borrow=False)
            train_x2.set_value(processor.x2, borrow=False)
            train_y.set_value(processor.y, borrow=False)
            train_z.set_value(processor.z, borrow=False)

            processor.lock.acquire()
            processor.cont = True
            processor.ready = False
            processor.lock.notifyAll()
            processor.lock.release()

            for b in xrange(BATCHES):
                #c = []
                cost = train(lr=learning_rate, reg=regularization, mom=momentum)
                #c.append(cost)

                print 'Training, batch %d, cost %.5f' % (b, cost)
                print repr(self.model1.W.get_value())

                processor.lock.acquire()
                while not processor.ready:
                    processor.lock.wait()
                processor.lock.release()

                train_x1.set_value(processor.x1, borrow=False)
                train_x2.set_value(processor.x2, borrow=False)
                train_y.set_value(processor.y, borrow=False)
                train_z.set_value(processor.z, borrow=False)

                processor.lock.acquire()
                if b < BATCHES-2:
                    processor.cont = True
                    processor.ready = False
                if b == BATCHES-1 and e == epochs-1:
                    processor.stop = True
                    processor.cont = True
                processor.lock.notifyAll()
                processor.lock.release()

            #print 'Training, epoch %d, cost %.5f' % (e, numpy.mean(c))

        t.join()
        self.save_me('run1.npy')