def test(): import numpy as np video_shape = (16, 112, 112) data = DataLayer("data/tinytraindb.lmdb", video_shape, 16, verbose=True) data2 = DataLayer("data/tinyvaldb.lmdb", video_shape, 16, verbose=True) synch_data = [] # Retrieve data synchronously as a reference for correctness fetcher = DataFetcher("data/tinytraindb.lmdb", video_shape, 16, dtype='float32') for i in range(10): X, y, epoch = fetcher.load_data() synch_data.append(X) #print X for i in range(10): data.load_batch() data2.load_batch() tic = time.time() # Do some work a = np.random.randn(400, 600).dot(np.random.randn(600, 400)) #print np.linalg.norm(a) toc = time.time() print "Work took %0.6f seconds" % (toc - tic) assert np.linalg.norm(data.X.get_value(borrow=True) - synch_data[i]) < 1e-8
def __init__(self, db_name, video_shape, mem_batch_size, verbose=False, buffer_size=6): self.fetcher = DataFetcher(db_name, video_shape, mem_batch_size, dtype=theano.config.floatX) self.batch_size = mem_batch_size self.video_shape = video_shape self.current_batch = 0 self.verbose = verbose # Could manage with a buffer size of exactly 2, but need to change # the interprocess communication somewhat assert buffer_size > 2 X = np.empty((mem_batch_size, 3) + video_shape, dtype=theano.config.floatX) y = np.empty((mem_batch_size, ), dtype=theano.config.floatX) self.shared_data = theano.shared(X, borrow=True) self.shared_label = theano.shared(y, borrow=True) self.X = self.shared_data self.y = T.cast(self.shared_label, 'int32') # Create shared memory object for async loading X_shared_array_base = multiprocessing.Array( ctypes.c_float, buffer_size * self.batch_size * 3 * np.prod(video_shape)) X_shared_array = np.ctypeslib.as_array(X_shared_array_base.get_obj()) self.X_shared_array = X_shared_array.reshape(buffer_size, self.batch_size, 3, *video_shape) y_shared_array_base = multiprocessing.Array( ctypes.c_float, buffer_size * self.batch_size) y_shared_array = np.ctypeslib.as_array(y_shared_array_base.get_obj()) self.y_shared_array = y_shared_array.reshape(buffer_size, self.batch_size) # Start up worker process self.queue = multiprocessing.Queue(maxsize=buffer_size - 2) self.worker = multiprocessing.Process( target=fetcher_loop, args=(self.fetcher, self.X_shared_array, self.y_shared_array, self.queue)) self.worker.start()
def test(): import numpy as np video_shape = (16,112,112) data = DataLayer("data/tinytraindb.lmdb",video_shape,16,verbose=True) data2 = DataLayer("data/tinyvaldb.lmdb",video_shape,16,verbose=True) synch_data = [] # Retrieve data synchronously as a reference for correctness fetcher = DataFetcher("data/tinytraindb.lmdb",video_shape,16,dtype='float32') for i in range(10): X, y, epoch = fetcher.load_data() synch_data.append(X) #print X for i in range(10): data.load_batch() data2.load_batch() tic = time.time() # Do some work a = np.random.randn(400,600).dot(np.random.randn(600,400)) #print np.linalg.norm(a) toc = time.time() print "Work took %0.6f seconds" % (toc - tic) assert np.linalg.norm(data.X.get_value(borrow=True) - synch_data[i]) < 1e-8
def evaluate_3d_conv(): theano.config.exception_verbosity = "high" theano.config.optimizer = 'None' rng = np.random.RandomState(234) TT, HH, WW = 16,240,320 N = 10 num_classes = 5 batch_size = 1 num_filters = 4 num_channels = 3 if len(sys.argv) > 1: fetcher = DataFetcher("data/tinyvideodb.lmdb") X, y = fetcher.load_data(10,(16,240,320)) y /= 21 else: X = np.random.randint(-127,127,size=(N,3,16,240,320)).astype(theano.config.floatX) y = np.random.randint(0,num_classes,size=(N,)) X_train = theano.shared(X.astype('float32'), borrow=True) y_train = theano.shared(y.astype('int32'), borrow=True) print y_train.get_value() params = [] x = dtensor5('x') y = T.ivector('y') FT, FH, FW = 5, 5, 5 ########################################################################### # CONV-RELU-POOL (Layer 1) ########################################################################### conv1 = ConvLayer(x,num_channels,num_filters,(FT,FH,FW),(TT,HH,WW),batch_size,relu, layer_name="Conv1") params += conv1.params pool1 = PoolLayer(conv1.output,(2,2,2)) ########################################################################### # CONV-RELU-POOL (Layer 2) ########################################################################### conv2 = ConvLayer(pool1.output,num_filters,num_filters, (FT,FH,FW), (TT/2,HH/2,WW/2), batch_size, relu, layer_name="Conv2") params += conv2.params pool2 = PoolLayer(conv2.output,(2,2,2)) ########################################################################### # FULLY-CONNECTED (Layer 3) ########################################################################### out_dim = num_filters*TT*HH*WW/64 num_hidden = 64 fc3 = HiddenLayer(pool2.output.flatten(ndim=2),out_dim,num_hidden,relu) params += fc3.params ########################################################################### # SOFTMAX (Layer 4) ########################################################################### softmax = LogRegr(fc3.output,num_hidden,num_classes,relu,rng) params += softmax.params reg = 0.01 cost = softmax.negative_log_likelihood(y) + reg*T.sum(softmax.W*softmax.W) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in params] learning_rate = 1e-5 updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(params, gparams) ] index = T.lscalar() train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: X_train[index * batch_size: (index + 1) * batch_size], y: y_train[index * batch_size: (index + 1) * batch_size] } ) for k in range(10): tic = time.time() cost = train_model(k % (N/batch_size)) toc = time.time() print cost, "(%0.4f seconds)" % (toc - tic)