def test_param_allow_downcast_int(self): a = tensor.wvector('a') # int16 b = tensor.bvector('b') # int8 c = tensor.bscalar('c') # int8 f = pfunc([Param(a, allow_downcast=True), Param(b, allow_downcast=False), Param(c, allow_downcast=None)], (a + b + c)) # Both values are in range. Since they're not ndarrays (but lists), # they will be converted, and their value checked. assert numpy.all(f([3], [6], 1) == 10) # Values are in range, but a dtype too large has explicitly been given # For performance reasons, no check of the data is explicitly performed # (It might be OK to change this in the future.) self.assertRaises(TypeError, f, [3], numpy.array([6], dtype='int16'), 1) # Value too big for a, silently ignored assert numpy.all(f([2 ** 20], numpy.ones(1, dtype='int8'), 1) == 2) # Value too big for b, raises TypeError self.assertRaises(TypeError, f, [3], [312], 1) # Value too big for c, raises TypeError self.assertRaises(TypeError, f, [3], [6], 806)
def test_param_allow_downcast_int(self): a = tensor.wvector("a") # int16 b = tensor.bvector("b") # int8 c = tensor.bscalar("c") # int8 f = pfunc( [ In(a, allow_downcast=True), In(b, allow_downcast=False), In(c, allow_downcast=None), ], (a + b + c), ) # Both values are in range. Since they're not ndarrays (but lists), # they will be converted, and their value checked. assert np.all(f([3], [6], 1) == 10) # Values are in range, but a dtype too large has explicitly been given # For performance reasons, no check of the data is explicitly performed # (It might be OK to change this in the future.) with pytest.raises(TypeError): f([3], np.array([6], dtype="int16"), 1) # Value too big for a, silently ignored assert np.all(f([2**20], np.ones(1, dtype="int8"), 1) == 2) # Value too big for b, raises TypeError with pytest.raises(TypeError): f([3], [312], 1) # Value too big for c, raises TypeError with pytest.raises(TypeError): f([3], [6], 806)
def test_param_allow_downcast_int(self): a = tensor.wvector('a') # int16 b = tensor.bvector('b') # int8 c = tensor.bscalar('c') # int8 f = pfunc([ Param(a, allow_downcast=True), Param(b, allow_downcast=False), Param(c, allow_downcast=None) ], (a + b + c)) # Both values are in range. Since they're not ndarrays (but lists), # they will be converted, and their value checked. assert numpy.all(f([3], [6], 1) == 10) # Values are in range, but a dtype too large has explicitly been given # For performance reasons, no check of the data is explicitly performed # (It might be OK to change this in the future.) self.assertRaises(TypeError, f, [3], numpy.array([6], dtype='int16'), 1) # Value too big for a, silently ignored assert numpy.all(f([2**20], numpy.ones(1, dtype='int8'), 1) == 2) # Value too big for b, raises TypeError self.assertRaises(TypeError, f, [3], [312], 1) # Value too big for c, raises TypeError self.assertRaises(TypeError, f, [3], [6], 806)
def build_2048_ann(self, nb, nh, nh2): ''' ''' #nb = input nodes #nh = first hidden layer size #nh2 = second hidden layer size print("building") w1 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nb, nh))) w2 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh, nh2))) w3 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh2, 4))) input = T.dvector('input') target = T.wvector('target') x1 = T.switch(T.dot(input, w1) > 0, T.dot(input, w1), 0) x2 = T.switch(T.dot(x1, w2) > 0, T.dot(x1, w2), 0) x3 = Tann.softmax(T.dot(x2, w3)) error = T.sum(pow((target - x3), 2)) params = [w1, w2, w3] gradients = T.grad(error, params) backprops = [(p, p - self.lrate * g) for p, g in zip(params, gradients)] self.trainer = theano.function(inputs=[input, target], outputs=error, updates=backprops, allow_input_downcast=True) self.predictor = theano.function(inputs=[input], outputs=x3, allow_input_downcast=True) print("Built")
def test_allow_input_downcast_int(self): a = tensor.wvector("a") # int16 b = tensor.bvector("b") # int8 c = tensor.bscalar("c") # int8 f = pfunc([a, b, c], (a + b + c), allow_input_downcast=True) # Value too big for a, b, or c, silently ignored assert f([2**20], [1], 0) == 1 assert f([3], [312], 0) == 59 assert f([3], [1], 806) == 42 g = pfunc([a, b, c], (a + b + c), allow_input_downcast=False) # All values are in range. Since they're not ndarrays (but lists # or scalars), they will be converted, and their value checked. assert np.all(g([3], [6], 0) == 9) # Values are in range, but a dtype too large has explicitly been given # For performance reasons, no check of the data is explicitly performed # (It might be OK to change this in the future.) with pytest.raises(TypeError): g([3], np.array([6], dtype="int16"), 0) # Value too big for b, raises TypeError with pytest.raises(TypeError): g([3], [312], 0) h = pfunc([a, b, c], (a + b + c)) # Default: allow_input_downcast=None # Everything here should behave like with False assert np.all(h([3], [6], 0) == 9) with pytest.raises(TypeError): h([3], np.array([6], dtype="int16"), 0) with pytest.raises(TypeError): h([3], [312], 0)
def build_custom_ann(self, layer_list, ann_type = "rlu", nb = 784): ''' ''' layer_list = [nb] + layer_list input = T.dvector('input') target = T.wvector('target') w_list = [] x_list = [] w_list.append(theano.shared(np.random.uniform(low=-.1, high=.1, size=(layer_list[0],layer_list[1])))) if ann_type == "rlu": x_list.append(T.switch(T.dot(input,w_list[0]) > 0, T.dot(input,w_list[0]), 0)) elif ann_type == "sigmoid": x_list.append(Tann.sigmoid(T.dot(input, w_list[0]))) elif ann_type == "ht": x_list.append(T.tanh(T.dot(input, w_list[0]))) for count in range(0, len(layer_list) - 2): w_list.append(theano.shared(np.random.uniform(low=-.1, high=.1, size=(layer_list[count + 1],layer_list[count + 2])))) if ann_type=="rlu": x_list.append(T.switch(T.dot(x_list[count],w_list[count + 1]) > 0, T.dot(x_list[count], w_list[count + 1]), 0)) elif ann_type == "sigmoid": x_list.append(Tann.sigmoid(T.dot(x_list[count],w_list[count + 1]))) elif ann_type == "ht": x_list.append(T.tanh(T.dot(x_list[count],w_list[count + 1]))) w_list.append(theano.shared(np.random.uniform(low=-.1, high=.1, size=(layer_list[-1], 10)))) x_list.append(T.switch(T.dot(x_list[-1],w_list[-1]) > 0, T.dot(x_list[-1],w_list[-1]), 0)) error = T.sum(pow((target - x_list[-1]), 2)) params = w_list gradients = T.grad(error, params) backprops = [(p, p - self.lrate*g) for p,g in zip(params,gradients)] self.trainer = theano.function(inputs=[input, target], outputs=error, updates=backprops, allow_input_downcast=True) self.predictor = theano.function(inputs=[input], outputs=x_list[-1], allow_input_downcast=True)
def build_rectified_linear2_ann(self, nb, nh, nh2): ''' Builds a neural network, using rectified linear units 2 as the activation function. ''' print("Building rectified linear ann") w1 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nb, nh))) w2 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh, nh2))) w3 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh2, 10))) input = T.dvector('input') target = T.wvector('target') x1 = T.switch(T.dot(input, w1) > 0, T.dot(input, w1), 0) x2 = T.switch(T.dot(x1, w2) > 0, T.dot(x1, w2), 0) x3 = T.switch(T.dot(x2, w3) > 0, T.dot(x2, w3), 0) error = T.sum(pow((target - x3), 2)) params = [w1, w2, w3] gradients = T.grad(error, params) backprops = [(p, p - self.lrate * g) for p, g in zip(params, gradients)] self.trainer = theano.function(inputs=[input, target], outputs=error, updates=backprops, allow_input_downcast=True) self.predictor = theano.function(inputs=[input], outputs=x3, allow_input_downcast=True)
def test_allow_input_downcast_int(self): a = tensor.wvector('a') # int16 b = tensor.bvector('b') # int8 c = tensor.bscalar('c') # int8 f = pfunc([a, b, c], (a + b + c), allow_input_downcast=True) # Value too big for a, b, or c, silently ignored assert f([2 ** 20], [1], 0) == 1 assert f([3], [312], 0) == 59 assert f([3], [1], 806) == 42 g = pfunc([a, b, c], (a + b + c), allow_input_downcast=False) # All values are in range. Since they're not ndarrays (but lists # or scalars), they will be converted, and their value checked. assert numpy.all(g([3], [6], 0) == 9) # Values are in range, but a dtype too large has explicitly been given # For performance reasons, no check of the data is explicitly performed # (It might be OK to change this in the future.) self.assertRaises(TypeError, g, [3], numpy.array([6], dtype='int16'), 0) # Value too big for b, raises TypeError self.assertRaises(TypeError, g, [3], [312], 0) h = pfunc([a, b, c], (a + b + c)) # Default: allow_input_downcast=None # Everything here should behave like with False assert numpy.all(h([3], [6], 0) == 9) self.assertRaises(TypeError, h, [3], numpy.array([6], dtype='int16'), 0) self.assertRaises(TypeError, h, [3], [312], 0)
def inputs(self): return { "call_type": tensor.bvector("call_type"), "origin_call": tensor.ivector("origin_call"), "origin_stand": tensor.bvector("origin_stand"), "taxi_id": tensor.wvector("taxi_id"), "timestamp": tensor.ivector("timestamp"), "day_type": tensor.bvector("day_type"), "missing_data": tensor.bvector("missing_data"), "latitude": tensor.matrix("latitude"), "longitude": tensor.matrix("longitude"), "destination_latitude": tensor.vector("destination_latitude"), "destination_longitude": tensor.vector("destination_longitude"), "travel_time": tensor.ivector("travel_time"), "first_k_latitude": tensor.matrix("first_k_latitude"), "first_k_longitude": tensor.matrix("first_k_longitude"), "last_k_latitude": tensor.matrix("last_k_latitude"), "last_k_longitude": tensor.matrix("last_k_longitude"), "input_time": tensor.ivector("input_time"), "week_of_year": tensor.bvector("week_of_year"), "day_of_week": tensor.bvector("day_of_week"), "qhour_of_day": tensor.bvector("qhour_of_day"), "candidate_call_type": tensor.bvector("candidate_call_type"), "candidate_origin_call": tensor.ivector("candidate_origin_call"), "candidate_origin_stand": tensor.bvector("candidate_origin_stand"), "candidate_taxi_id": tensor.wvector("candidate_taxi_id"), "candidate_timestamp": tensor.ivector("candidate_timestamp"), "candidate_day_type": tensor.bvector("candidate_day_type"), "candidate_missing_data": tensor.bvector("candidate_missing_data"), "candidate_latitude": tensor.matrix("candidate_latitude"), "candidate_longitude": tensor.matrix("candidate_longitude"), "candidate_destination_latitude": tensor.vector("candidate_destination_latitude"), "candidate_destination_longitude": tensor.vector("candidate_destination_longitude"), "candidate_travel_time": tensor.ivector("candidate_travel_time"), "candidate_first_k_latitude": tensor.matrix("candidate_first_k_latitude"), "candidate_first_k_longitude": tensor.matrix("candidate_first_k_longitude"), "candidate_last_k_latitude": tensor.matrix("candidate_last_k_latitude"), "candidate_last_k_longitude": tensor.matrix("candidate_last_k_longitude"), "candidate_input_time": tensor.ivector("candidate_input_time"), "candidate_week_of_year": tensor.bvector("candidate_week_of_year"), "candidate_day_of_week": tensor.bvector("candidate_day_of_week"), "candidate_qhour_of_day": tensor.bvector("candidate_qhour_of_day"), }
def inputs(self): return {'call_type': tensor.bvector('call_type'), 'origin_call': tensor.ivector('origin_call'), 'origin_stand': tensor.bvector('origin_stand'), 'taxi_id': tensor.wvector('taxi_id'), 'timestamp': tensor.ivector('timestamp'), 'day_type': tensor.bvector('day_type'), 'missing_data': tensor.bvector('missing_data'), 'latitude': tensor.matrix('latitude'), 'longitude': tensor.matrix('longitude'), 'latitude_mask': tensor.matrix('latitude_mask'), 'longitude_mask': tensor.matrix('longitude_mask'), 'week_of_year': tensor.bvector('week_of_year'), 'day_of_week': tensor.bvector('day_of_week'), 'qhour_of_day': tensor.bvector('qhour_of_day'), 'destination_latitude': tensor.vector('destination_latitude'), 'destination_longitude': tensor.vector('destination_longitude')}
def create_models(self): if self.verbose: print("Creating Training model...") x = T.tensor3('x', dtype=theano.config.floatX) y = T.wvector('y') # int16 self.model.create_computational_graph(x, y) index = T.wscalar() # int16 self.train_model = \ theano.function( [index], [self.model.cost, self.model.error, self.model.negative_log_likelihood, self.model.penalty, self.model.sensitivity, self.model.specificity], updates=self.model.updates, givens={x: self.data_handler.training_data[ index * self.mini_batch_size: (index + 1) * self.mini_batch_size], y: self.data_handler.training_labels[ index * self.mini_batch_size: (index + 1) * self.mini_batch_size] } ) if self.verbose: print("Training model created.") if self.verbose: print("Creating Test model...") self.test_model = \ theano.function( [x, y], [self.model.error, self.model.sensitivity, self.model.specificity, self.model.fully_connected_layer_output]) if self.verbose: print("Test model created.") self.feature_extractor = theano.function( [x], self.model.fully_connected_layer_output)
def build_sigmoid_ann(self,nb,nh): ''' Builds a neural network, using sigmoids as the activation function. ''' print("Building sigmoid ann") w1 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nb,nh))) w2 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh,10))) input = T.dvector('input') target = T.wvector('target') x1 = Tann.sigmoid(T.dot(input,w1)) x2 = Tann.sigmoid(T.dot(x1,w2)) error = T.sum(pow((target - x2), 2)) params = [w1, w2] gradients = T.grad(error, params) backprop_acts = [(p, p - self.lrate*g) for p,g in zip(params,gradients)] self.trainer = theano.function(inputs=[input, target], outputs=error, updates=backprop_acts, allow_input_downcast=True) self.predictor = theano.function(inputs=[input], outputs=x2, allow_input_downcast=True)
def inputs(self): return { 'call_type': tensor.bvector('call_type'), 'origin_call': tensor.ivector('origin_call'), 'origin_stand': tensor.bvector('origin_stand'), 'taxi_id': tensor.wvector('taxi_id'), 'timestamp': tensor.ivector('timestamp'), 'day_type': tensor.bvector('day_type'), 'missing_data': tensor.bvector('missing_data'), 'latitude': tensor.matrix('latitude'), 'longitude': tensor.matrix('longitude'), 'latitude_mask': tensor.matrix('latitude_mask'), 'longitude_mask': tensor.matrix('longitude_mask'), 'week_of_year': tensor.bvector('week_of_year'), 'day_of_week': tensor.bvector('day_of_week'), 'qhour_of_day': tensor.bvector('qhour_of_day'), 'destination_latitude': tensor.vector('destination_latitude'), 'destination_longitude': tensor.vector('destination_longitude') }
def test_matrixmul(): """ Tests for projection """ rng = np.random.RandomState(222) dtypes = [ 'int16', 'int32', 'int64' ] tensor_x = [ tensor.wmatrix(), tensor.imatrix(), tensor.lmatrix(), tensor.wvector(), tensor.ivector(), tensor.lvector() ] np_W, np_x = [], [] for dtype in dtypes: np_W.append(rng.rand(10, np.random.randint(1, 10))) np_x.append(rng.randint( 0, 10, (rng.random_integers(5), rng.random_integers(5)) ).astype(dtype)) for dtype in dtypes: np_W.append(rng.rand(10, np.random.randint(1, 10))) np_x.append( rng.randint(0, 10, (rng.random_integers(5),)).astype(dtype) ) tensor_W = [sharedX(W) for W in np_W] matrixmul = [MatrixMul(W) for W in tensor_W] assert all(mm.get_params()[0] == W for mm, W in zip(matrixmul, tensor_W)) fn = [theano.function([x], mm.project(x)) for x, mm in zip(tensor_x, matrixmul)] for W, x, f in zip(np_W, np_x, fn): W_x = W[x] if x.ndim == 2: W_x = W_x.reshape((W_x.shape[0], np.prod(W_x.shape[1:]))) else: W_x = W_x.flatten() np.testing.assert_allclose(f(x), W_x)
def build_rectified_linear2_ann(self, nb, nh, nh2): ''' Builds a neural network, using rectified linear units 2 as the activation function. ''' print("Building rectified linear ann") w1 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nb,nh))) w2 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh,nh2))) w3 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh2, 10))) input = T.dvector('input') target = T.wvector('target') x1 = T.switch(T.dot(input,w1) > 0, T.dot(input,w1), 0) x2 = T.switch(T.dot(x1,w2) > 0, T.dot(x1,w2), 0) x3 = T.switch(T.dot(x2, w3) > 0, T.dot(x2, w3), 0) error = T.sum(pow((target - x3), 2)) params = [w1, w2, w3] gradients = T.grad(error, params) backprops = [(p, p - self.lrate*g) for p,g in zip(params,gradients)] self.trainer = theano.function(inputs=[input, target], outputs=error, updates=backprops, allow_input_downcast=True) self.predictor = theano.function(inputs=[input], outputs=x3, allow_input_downcast=True)
def compileModel(data, nInputs, nOutputs, hiddenLayersSize = [1200, 1200], dropoutRates = [0.2, 0.5, 0.5], activation = 'relu', weightInitMode = 'normal', regularizer = 0.0001): """ Creates a symbolic model given the specified parameters using Theano Output: A list containing three the training, validation and test compiled functions of Theano """ np.random.seed(815) x = T.matrix('x') y = T.wvector('y') learningRate = T.scalar('learningRate') regularization = T.scalar('regularization') #Data sets train_x, train_y = data[0] valid_x, valid_y = data[1] test_x, test_y = data[2] nnet = MLP(x, nInputs, hiddenLayersSize, nOutputs, dropoutRates = dropoutRates, activation = activation, weightInitMode = weightInitMode) loss = nnet.loss(y, regularization) error = nnet.error(y) gParams = T.grad(loss, nnet.params) weightUpdates = [(param, param - learningRate * gParam) for param, gParam in zip(nnet.params, gParams)] batchIndicesVecctor = T.ivector('batchIndicesVecctor') trainF = function([batchIndicesVecctor, learningRate, regularization], Out(sbasic.gpu_from_host(loss), borrow = True), updates = weightUpdates, givens = {x: train_x[batchIndicesVecctor], y: train_y[batchIndicesVecctor]}) validF = function([batchIndicesVecctor], Out(sbasic.gpu_from_host(T.cast(error, T.config.floatX)), borrow = True), givens = {x: valid_x[batchIndicesVecctor], y: valid_y[batchIndicesVecctor]}) testF = function([batchIndicesVecctor], Out(sbasic.gpu_from_host(T.cast(error, T.config.floatX)), borrow = True), givens = {x: test_x[batchIndicesVecctor], y: test_y[batchIndicesVecctor]}) return [trainF, validF, testF]
def build_sigmoid_ann(self, nb, nh): ''' Builds a neural network, using sigmoids as the activation function. ''' print("Building sigmoid ann") w1 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nb, nh))) w2 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh, 10))) input = T.dvector('input') target = T.wvector('target') x1 = Tann.sigmoid(T.dot(input, w1)) x2 = Tann.sigmoid(T.dot(x1, w2)) error = T.sum(pow((target - x2), 2)) params = [w1, w2] gradients = T.grad(error, params) backprop_acts = [(p, p - self.lrate * g) for p, g in zip(params, gradients)] self.trainer = theano.function(inputs=[input, target], outputs=error, updates=backprop_acts, allow_input_downcast=True) self.predictor = theano.function(inputs=[input], outputs=x2, allow_input_downcast=True)
def build_2048_ann(self, nb, nh, nh2): ''' ''' #nb = input nodes #nh = first hidden layer size #nh2 = second hidden layer size print("building") w1 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nb,nh))) w2 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh,nh2))) w3 = theano.shared(np.random.uniform(low=-.1, high=.1, size=(nh2, 4))) input = T.dvector('input') target = T.wvector('target') x1 = T.switch(T.dot(input,w1) > 0, T.dot(input,w1), 0) x2 = T.switch(T.dot(x1,w2) > 0, T.dot(x1,w2), 0) x3 = Tann.softmax(T.dot(x2, w3)) error = T.sum(pow((target - x3), 2)) params = [w1, w2, w3] gradients = T.grad(error, params) backprops = [(p, p - self.lrate*g) for p,g in zip(params,gradients)] self.trainer = theano.function(inputs=[input, target], outputs=error, updates=backprops, allow_input_downcast=True) self.predictor = theano.function(inputs=[input], outputs=x3, allow_input_downcast=True) print("Built")
def __init__(self,input_image_size, batchsize=None, ImageDepth = 1, InputImageDimensions = None, bSupportVariableBatchsize=True, bDropoutEnabled_ = False, bInputIsFlattened=False, verbose = 1, bWeightDecay = False): """ Otherwise: Assuming that <input_image_size> == Image_width == Image_height UNLESS it is a tuple <InputImageDimensions> my be <ImageDepth> is 1 by default, but change it to 3 if you use RGB images, 4 if you use RGB-D images etc. bDropoutEnabled_ must be set to True if it is to be used anywhere in the network! You can disable it at any time in the future (incurring a speed-performance loss as compared to disabling it right here) """ if bSupportVariableBatchsize==True: batchsize = None self.batchsize = None #print "bSupportVariableBatchsize is in EXPERIMENTAL stage!" else: self.batchsize = batchsize if not isinstance(InputImageDimensions, list) and not isinstance(InputImageDimensions, tuple): if InputImageDimensions is None: print "assuming input dimension==1 (if wrong: specify <InputImageDimensions> or set <input_image_size> as a tuple)" InputImageDimensions=1 input_image_size = (int(input_image_size),)*InputImageDimensions self.y = T.wvector('y_cnn_labels') # the labels are presented as 1D vector (int16) (ivector is int32) self.rng = numpy.random.RandomState(int(time.time())) self.layers = [] #will contain all layers ([0] input layer ---> [-1] output layer) self.autoencoderChains=[] self.output_layers = [] # this will stay empty, UNLESS you use addOutputFunction ... these layers will NOT be included in self.layers! self.SGD_global_LR_output_layers_multiplicator = theano.shared(np.float32(1.0)) self.TotalForwardPassCost = 0 # number of multiplications done self.verbose = verbose self.output = None #self.output_layers_params = [] self.params=[] # after calling CompileOutputFunctions(): self.bDropoutEnabled = bDropoutEnabled_ # Reshape matrix of rasterized images of shape (batch_size,input_image_size*input_image_size) # to a 4D tensor, compatible with our ConvPoolLayer if ImageDepth==1 and InputImageDimensions!=3: if bInputIsFlattened or InputImageDimensions==1: self.x = T.fmatrix('x_cnn_input') # the data is presented as rasterized images (np.float32) else: self.x = T.ftensor4('x_cnn_input') else: if InputImageDimensions==3: self.x = T.TensorType('float32',(False,)*5,name='x_cnn_input')('x_cnn_input') else: self.x = T.ftensor4('x_cnn_input') # assert InputImageDimensions in [1,2,3],"MixedConvNN::InputImageDimensions currently unsupported" if InputImageDimensions==2: if self.batchsize != None: self.layer0_input = self.x.reshape((batchsize, ImageDepth, input_image_size[0], input_image_size[1])) #1st entry is batch_size, but it is 1 for the all-pure-convolutional net else: self.layer0_input = self.x self.input_shape = (batchsize, ImageDepth, input_image_size[0], input_image_size[1]) # valid for FIRST LAYER only. each layer has one entry called like this elif InputImageDimensions==3: if self.batchsize != None: self.layer0_input = self.x.reshape((batchsize, input_image_size[0], ImageDepth, input_image_size[1], input_image_size[2])) #1st entry is batch_size, but it is 1 for the all-pure-convolutional net else: self.layer0_input = self.x self.input_shape = (batchsize, input_image_size[0], ImageDepth, input_image_size[1], input_image_size[2]) else: if self.batchsize != None: self.layer0_input = self.x.reshape((batchsize, input_image_size[0])) else: self.layer0_input = self.x self.input_shape = (batchsize, input_image_size[0]) # valid for FIRST LAYER only. each layer has one entry called like this self.SGD_global_LR = theano.shared(np.float32(1e-3)) self.SGD_momentum = theano.shared(np.float32(0.9)) self.debug_functions=[] self.debug_functions_conv_output=[] self.debug_gradients_function=None self.debug_lgradients_function=None self.output_stride = 1 #for fragment-max-pooling (fast segmentation/sliding window) self.bWeightDecay = bWeightDecay self.CompileSGD = NN_Optimizers.CompileSGD self.CompileRPROP = NN_Optimizers.CompileRPROP #self.compileCG = NN_Optimizers.compileCG #self.CompileARP = NN_Optimizers.CompileARP self.CompileADADELTA = NN_Optimizers.CompileADADELTA
def inputs(self): return { 'call_type': tensor.bvector('call_type'), 'origin_call': tensor.ivector('origin_call'), 'origin_stand': tensor.bvector('origin_stand'), 'taxi_id': tensor.wvector('taxi_id'), 'timestamp': tensor.ivector('timestamp'), 'day_type': tensor.bvector('day_type'), 'missing_data': tensor.bvector('missing_data'), 'latitude': tensor.matrix('latitude'), 'longitude': tensor.matrix('longitude'), 'destination_latitude': tensor.vector('destination_latitude'), 'destination_longitude': tensor.vector('destination_longitude'), 'travel_time': tensor.ivector('travel_time'), 'first_k_latitude': tensor.matrix('first_k_latitude'), 'first_k_longitude': tensor.matrix('first_k_longitude'), 'last_k_latitude': tensor.matrix('last_k_latitude'), 'last_k_longitude': tensor.matrix('last_k_longitude'), 'input_time': tensor.ivector('input_time'), 'week_of_year': tensor.bvector('week_of_year'), 'day_of_week': tensor.bvector('day_of_week'), 'qhour_of_day': tensor.bvector('qhour_of_day'), 'candidate_call_type': tensor.bvector('candidate_call_type'), 'candidate_origin_call': tensor.ivector('candidate_origin_call'), 'candidate_origin_stand': tensor.bvector('candidate_origin_stand'), 'candidate_taxi_id': tensor.wvector('candidate_taxi_id'), 'candidate_timestamp': tensor.ivector('candidate_timestamp'), 'candidate_day_type': tensor.bvector('candidate_day_type'), 'candidate_missing_data': tensor.bvector('candidate_missing_data'), 'candidate_latitude': tensor.matrix('candidate_latitude'), 'candidate_longitude': tensor.matrix('candidate_longitude'), 'candidate_destination_latitude': tensor.vector('candidate_destination_latitude'), 'candidate_destination_longitude': tensor.vector('candidate_destination_longitude'), 'candidate_travel_time': tensor.ivector('candidate_travel_time'), 'candidate_first_k_latitude': tensor.matrix('candidate_first_k_latitude'), 'candidate_first_k_longitude': tensor.matrix('candidate_first_k_longitude'), 'candidate_last_k_latitude': tensor.matrix('candidate_last_k_latitude'), 'candidate_last_k_longitude': tensor.matrix('candidate_last_k_longitude'), 'candidate_input_time': tensor.ivector('candidate_input_time'), 'candidate_week_of_year': tensor.bvector('candidate_week_of_year'), 'candidate_day_of_week': tensor.bvector('candidate_day_of_week'), 'candidate_qhour_of_day': tensor.bvector('candidate_qhour_of_day') }
def compileModel(data, nInputs, nOutputs, hiddenLayersSize=[1200, 1200], dropoutRates=[0.2, 0.5, 0.5], activation='relu', weightInitMode='normal', regularizer=0.0001): """ Creates a symbolic model given the specified parameters using Theano Output: A list containing three the training, validation and test compiled functions of Theano """ np.random.seed(815) x = T.matrix('x') y = T.wvector('y') learningRate = T.scalar('learningRate') regularization = T.scalar('regularization') #Data sets train_x, train_y = data[0] valid_x, valid_y = data[1] test_x, test_y = data[2] nnet = MLP(x, nInputs, hiddenLayersSize, nOutputs, dropoutRates=dropoutRates, activation=activation, weightInitMode=weightInitMode) loss = nnet.loss(y, regularization) error = nnet.error(y) gParams = T.grad(loss, nnet.params) weightUpdates = [(param, param - learningRate * gParam) for param, gParam in zip(nnet.params, gParams)] batchIndicesVecctor = T.ivector('batchIndicesVecctor') trainF = function([batchIndicesVecctor, learningRate, regularization], Out(sbasic.gpu_from_host(loss), borrow=True), updates=weightUpdates, givens={ x: train_x[batchIndicesVecctor], y: train_y[batchIndicesVecctor] }) validF = function([batchIndicesVecctor], Out(sbasic.gpu_from_host(T.cast(error, T.config.floatX)), borrow=True), givens={ x: valid_x[batchIndicesVecctor], y: valid_y[batchIndicesVecctor] }) testF = function([batchIndicesVecctor], Out(sbasic.gpu_from_host(T.cast(error, T.config.floatX)), borrow=True), givens={ x: test_x[batchIndicesVecctor], y: test_y[batchIndicesVecctor] }) return [trainF, validF, testF]
path) # the artificial data is set up in dlp_art_data.py #dlp.neglogl(theta0, W, X, ZA, ZB, ZE, S, setup) import theano import theano.tensor as t #f = theano.function() thetat = t.fvector() Wt = t.fmatrix() Xt = t.fvector() ZAt = t.fmatrix() ZBt = t.fmatrix() ZEt = t.fmatrix() St = t.fvector() setupt = t.wvector() nlogl = dlp.neglogl(thetat, Wt, Xt, ZAt, ZBt, ZEt, St, setup) class linreg(object): def __init__(self, beta, y, x): self.beta = beta self.y = y self.x = x def mu(self): return t.dot(self.beta, self.x.T) def rss(self): diff = (self.y - self.mu())**2
if len(valid_data[bb]) >= batch_size: valid_gens.append(WordLMGenerator([valid_data[bb], valid_mask[bb]], glove, \ sequence_length, stride_length, buckets[bb], batch_size)) #for i in range(len(train_gens)): # train_gen = train_gens[i] # for index in range(train_gen.max_index): # # run minibatch # for trainset in train_gen.get_minibatch(index): # data, mask, label, reset # print(i, index) #================Build graph================# x = T.ftensor3('X') # (batch_size, sequence_length, 300) m = T.wmatrix('M') # (batch_size, sequence_length) r = T.wvector('r') # (batch_size,) x_ext = T.ftensor3('X_ext') m_ext = T.wmatrix('M_ext') y_ext = T.imatrix('Y_ext') r_ext = T.wvector('r_ext') encoder = SimpleGraph(experiment_name + '_enc', batch_size) encoder.add_layer(LSTMRecurrentLayer(input_shape=(300, ), output_shape=(512, ), forget_bias_one=True, peephole=True, output_return_index=[-1], save_state_index=stride_length - 1, also_return_cell=True, precompute=False, unroll=False,
def __init__(self, input_image_size, batchsize=None, ImageDepth=1, InputImageDimensions=None, bSupportVariableBatchsize=True, bDropoutEnabled_=False, bInputIsFlattened=False, verbose=1, bWeightDecay=False): """ Otherwise: Assuming that <input_image_size> == Image_width == Image_height UNLESS it is a tuple <InputImageDimensions> my be <ImageDepth> is 1 by default, but change it to 3 if you use RGB images, 4 if you use RGB-D images etc. bDropoutEnabled_ must be set to True if it is to be used anywhere in the network! You can disable it at any time in the future (incurring a speed-performance loss as compared to disabling it right here) """ if bSupportVariableBatchsize == True: batchsize = None self.batchsize = None #print "bSupportVariableBatchsize is in EXPERIMENTAL stage!" else: self.batchsize = batchsize if not isinstance(InputImageDimensions, list) and not isinstance( InputImageDimensions, tuple): if InputImageDimensions is None: print( "assuming input dimension==1 (if wrong: specify <InputImageDimensions> or set <input_image_size> as a tuple)" ) InputImageDimensions = 1 input_image_size = (int(input_image_size), ) * InputImageDimensions self.y = T.wvector( 'y_cnn_labels' ) # the labels are presented as 1D vector (int16) (ivector is int32) self.rng = numpy.random.RandomState(int(time.time())) self.layers = [ ] #will contain all layers ([0] input layer ---> [-1] output layer) self.autoencoderChains = [] self.output_layers = [ ] # this will stay empty, UNLESS you use addOutputFunction ... these layers will NOT be included in self.layers! self.SGD_global_LR_output_layers_multiplicator = theano.shared( np.float32(1.0)) self.TotalForwardPassCost = 0 # number of multiplications done self.verbose = verbose self.output = None #self.output_layers_params = [] self.params = [] # after calling CompileOutputFunctions(): self.bDropoutEnabled = bDropoutEnabled_ # Reshape matrix of rasterized images of shape (batch_size,input_image_size*input_image_size) # to a 4D tensor, compatible with our ConvPoolLayer if ImageDepth == 1 and InputImageDimensions != 3: if bInputIsFlattened or InputImageDimensions == 1: self.x = T.fmatrix( 'x_cnn_input' ) # the data is presented as rasterized images (np.float32) else: self.x = T.ftensor4('x_cnn_input') else: if InputImageDimensions == 3: self.x = T.TensorType('float32', (False, ) * 5, name='x_cnn_input')('x_cnn_input') else: self.x = T.ftensor4('x_cnn_input') # assert InputImageDimensions in [ 1, 2, 3 ], "MixedConvNN::InputImageDimensions currently unsupported" if InputImageDimensions == 2: if self.batchsize != None: self.layer0_input = self.x.reshape( (batchsize, ImageDepth, input_image_size[0], input_image_size[1]) ) #1st entry is batch_size, but it is 1 for the all-pure-convolutional net else: self.layer0_input = self.x self.input_shape = ( batchsize, ImageDepth, input_image_size[0], input_image_size[1] ) # valid for FIRST LAYER only. each layer has one entry called like this elif InputImageDimensions == 3: if self.batchsize != None: self.layer0_input = self.x.reshape( (batchsize, input_image_size[0], ImageDepth, input_image_size[1], input_image_size[2]) ) #1st entry is batch_size, but it is 1 for the all-pure-convolutional net else: self.layer0_input = self.x self.input_shape = (batchsize, input_image_size[0], ImageDepth, input_image_size[1], input_image_size[2]) else: if self.batchsize != None: self.layer0_input = self.x.reshape( (batchsize, input_image_size[0])) else: self.layer0_input = self.x self.input_shape = ( batchsize, input_image_size[0] ) # valid for FIRST LAYER only. each layer has one entry called like this self.SGD_global_LR = theano.shared(np.float32(1e-3)) self.SGD_momentum = theano.shared(np.float32(0.9)) self.debug_functions = [] self.debug_functions_conv_output = [] self.debug_gradients_function = None self.debug_lgradients_function = None self.output_stride = 1 #for fragment-max-pooling (fast segmentation/sliding window) self.bWeightDecay = bWeightDecay self.CompileSGD = NN_Optimizers.CompileSGD self.CompileRPROP = NN_Optimizers.CompileRPROP #self.compileCG = NN_Optimizers.compileCG #self.CompileARP = NN_Optimizers.CompileARP self.CompileADADELTA = NN_Optimizers.CompileADADELTA
def objective_train_model(params): # Initialise parameters start = timeit.default_timer() print(params) num_lstm_units = int(params['num_lstm_units']) num_lstm_layers = int(params['num_lstm_layers']) num_dense_layers = int(params['num_dense_layers']) num_dense_units = int(params['num_dense_units']) num_epochs = params['num_epochs'] learn_rate = params['learn_rate'] mb_size = params['mb_size'] l2reg = params['l2reg'] rng_seed = params['rng_seed'] #%% # Load training data path = 'saved_data' brancharray = numpy.load(os.path.join(path, 'train/branch_arrays.npy')) num_features = numpy.shape(brancharray)[-1] train_mask = numpy.load(os.path.join(path, 'train/mask.npy')).astype(numpy.int16) train_label = numpy.load(os.path.join(path, 'train/padlabel.npy')) train_rmdoublemask = numpy.load( os.path.join(path, 'train/rmdoublemask.npy')).astype(numpy.int16) train_rmdoublemask = train_rmdoublemask.flatten() #%% numpy.random.seed(rng_seed) rng_inst = numpy.random.RandomState(rng_seed) lasagne.random.set_rng(rng_inst) input_var = T.ftensor3('inputs') mask = T.wmatrix('mask') target_var = T.ivector('targets') rmdoublesmask = T.wvector('rmdoublemask') # Build network network = build_nn(input_var, mask, num_features, num_lstm_layers=num_lstm_layers, num_lstm_units=num_lstm_units, num_dense_layers=num_dense_layers, num_dense_units=num_dense_units) # This function returns the values of the parameters # of all layers below one or more given Layer instances, # including the layer(s) itself. # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss * rmdoublesmask loss = lasagne.objectives.aggregate(loss, mask.flatten()) # regularisation l2_penalty = l2reg * lasagne.regularization.regularize_network_params( network, lasagne.regularization.l2) loss = loss + l2_penalty # We could add some weight decay as well here, see lasagne.regularization. # Create update expressions for training, i.e., how to modify the # parameters at each training step. Here, we'll use Adadelta parameters = lasagne.layers.get_all_params(network, trainable=True) my_updates = lasagne.updates.adam(loss, parameters, learning_rate=learn_rate) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy( prediction, target_var) test_loss = test_loss * rmdoublesmask test_loss = lasagne.objectives.aggregate(test_loss, mask.flatten()) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function( inputs=[input_var, mask, rmdoublesmask, target_var], outputs=loss, updates=my_updates, on_unused_input='warn') # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, mask, rmdoublesmask, target_var], [test_loss, test_prediction], on_unused_input='warn') #%% # We iterate over epochs: for epoch in range(num_epochs): # print("Epoch {} ".format(epoch)) train_err = 0 # In each epoch, we do a full pass over the training data: for batch in iterate_minibatches(brancharray, train_mask, train_rmdoublemask, train_label, mb_size, shuffle=False): inputs, mask, rmdmask, targets = batch train_err += train_fn(inputs, mask, rmdmask, targets) #%% # Load development data dev_brancharray = numpy.load(os.path.join(path, 'dev/branch_arrays.npy')) dev_mask = numpy.load(os.path.join(path, 'dev/mask.npy')).astype(numpy.int16) dev_label = numpy.load(os.path.join(path, 'dev/padlabel.npy')) dev_rmdoublemask = numpy.load(os.path.join( path, 'dev/rmdoublemask.npy')).astype(numpy.int16).flatten() with open(os.path.join(path, 'dev/ids.pkl'), 'rb') as handle: dev_ids_padarray = pickle.load(handle) #%% # get predictions for development set err, val_ypred = val_fn(dev_brancharray, dev_mask, dev_rmdoublemask, dev_label.flatten()) val_ypred = numpy.argmax(val_ypred, axis=1).astype(numpy.int32) acv_label = dev_label.flatten() acv_prediction = numpy.asarray(val_ypred) acv_mask = dev_mask.flatten() clip_dev_label = [o for o, m in zip(acv_label, acv_mask) if m == 1] clip_dev_ids = [o for o, m in zip(dev_ids_padarray, acv_mask) if m == 1] clip_dev_prediction = [ o for o, m in zip(acv_prediction, acv_mask) if m == 1 ] # remove repeating instances uniqtwid, uindices2 = numpy.unique(clip_dev_ids, return_index=True) uniq_dev_label = [clip_dev_label[i] for i in uindices2] uniq_dev_prediction = [clip_dev_prediction[i] for i in uindices2] uniq_dev_id = [clip_dev_ids[i] for i in uindices2] dev_accuracy = accuracy_score(uniq_dev_label, uniq_dev_prediction) mactest_P, mactest_R, mactest_F, _ = precision_recall_fscore_support( uniq_dev_label, uniq_dev_prediction, average='macro') mictest_P, mictest_R, mictest_F, _ = precision_recall_fscore_support( uniq_dev_label, uniq_dev_prediction, average='micro') test_P, test_R, test_F, _ = precision_recall_fscore_support( uniq_dev_label, uniq_dev_prediction) # to change scoring objective you need to change 'loss' output = { 'loss': 1 - dev_accuracy, 'status': STATUS_OK, 'Params': params, 'Macro': { 'Macro_Precision': mactest_P, 'Macro_Recall': mactest_R, 'macro_F_score': mactest_F }, 'Micro': { 'Micro_Precision': mictest_P, 'Micro_Recall': mictest_R, 'micro_F_score': mictest_F }, 'Support': { 'Support_Precision': test_P[0], 'Support_Recall': test_R[0], 'Support_F_score': test_F[0] }, 'Comment': { 'Comment_Precision': test_P[1], 'Comment_Recall': test_R[1], 'Comment_F_score': test_F[1] }, 'Deny': { 'Deny_Precision': test_P[2], 'Deny_Recall': test_R[2], 'Deny_F_score': test_F[2] }, 'Appeal': { 'Appeal_Precision': test_P[3], 'Appeal_Recall': test_R[3], 'Appeal_F_score': test_F[3] }, 'attachments': { 'Labels': pickle.dumps(uniq_dev_label), 'Predictions': pickle.dumps(uniq_dev_prediction), 'ID': pickle.dumps(uniq_dev_id) } } print("1-accuracy loss = ", output['loss']) stop = timeit.default_timer() print("Time: ", stop - start) return output
def build_custom_ann(self, layer_list, ann_type="rlu", nb=784): ''' ''' print(ann_type) layer_list = [nb] + layer_list input = T.dvector('input') target = T.wvector('target') w_list = [] x_list = [] w_list.append( theano.shared( np.random.uniform(low=-.1, high=.1, size=(layer_list[0], layer_list[1])))) if ann_type == "rlu": x_list.append( T.switch( T.dot(input, w_list[0]) > 0, T.dot(input, w_list[0]), 0)) elif ann_type == "sigmoid": x_list.append(Tann.sigmoid(T.dot(input, w_list[0]))) elif ann_type == "ht": x_list.append(T.tanh(T.dot(input, w_list[0]))) for count in range(0, len(layer_list) - 2): print("looping") w_list.append( theano.shared( np.random.uniform(low=-.1, high=.1, size=(layer_list[count + 1], layer_list[count + 2])))) if ann_type == "rlu": x_list.append( T.switch( T.dot(x_list[count], w_list[count + 1]) > 0, T.dot(x_list[count], w_list[count + 1]), 0)) elif ann_type == "sigmoid": x_list.append( Tann.sigmoid(T.dot(x_list[count], w_list[count + 1]))) elif ann_type == "ht": x_list.append(T.tanh(T.dot(x_list[count], w_list[count + 1]))) print(len(x_list)) print(len(w_list)) w_list.append( theano.shared( np.random.uniform(low=-.1, high=.1, size=(layer_list[-1], 10)))) x_list.append( T.switch( T.dot(x_list[-1], w_list[-1]) > 0, T.dot(x_list[-1], w_list[-1]), 0)) error = T.sum(pow((target - x_list[-1]), 2)) params = w_list gradients = T.grad(error, params) backprops = [(p, p - self.lrate * g) for p, g in zip(params, gradients)] self.trainer = theano.function(inputs=[input, target], outputs=error, updates=backprops, allow_input_downcast=True) self.predictor = theano.function(inputs=[input], outputs=x_list[-1], allow_input_downcast=True)
def eval_train_model(params): print("Retrain model on train+dev set and evaluate on testing set") # Initialise parameters num_lstm_units = int(params['num_lstm_units']) num_lstm_layers = int(params['num_lstm_layers']) num_dense_layers = int(params['num_dense_layers']) num_dense_units = int(params['num_dense_units']) num_epochs = params['num_epochs'] learn_rate = params['learn_rate'] mb_size = params['mb_size'] l2reg = params['l2reg'] rng_seed = params['rng_seed'] #%% # Load data path = 'saved_data' brancharray = numpy.load(os.path.join(path, 'train/branch_arrays.npy')) num_features = numpy.shape(brancharray)[-1] train_mask = numpy.load(os.path.join(path, 'train/mask.npy')).astype(numpy.int16) train_label = numpy.load(os.path.join(path, 'train/padlabel.npy')) train_rmdoublemask = numpy.load( os.path.join(path, 'train/rmdoublemask.npy')).astype(numpy.int16) train_rmdoublemask = train_rmdoublemask.flatten() #%% numpy.random.seed(rng_seed) rng_inst = numpy.random.RandomState(rng_seed) lasagne.random.set_rng(rng_inst) input_var = T.ftensor3('inputs') mask = T.wmatrix('mask') target_var = T.ivector('targets') rmdoublesmask = T.wvector('rmdoublemask') # Build network network = build_nn(input_var, mask, num_features, num_lstm_layers=num_lstm_layers, num_lstm_units=num_lstm_units, num_dense_layers=num_dense_layers, num_dense_units=num_dense_units) # This function returns the values of the parameters of all # layers below one or more given Layer instances, # including the layer(s) itself. # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss * rmdoublesmask loss = lasagne.objectives.aggregate(loss, mask.flatten()) # regularisation l2_penalty = l2reg * lasagne.regularization.regularize_network_params( network, lasagne.regularization.l2) loss = loss + l2_penalty # We could add some weight decay as well here, see lasagne.regularization. # Create update expressions for training, i.e., how to modify the # parameters at each training step. parameters = lasagne.layers.get_all_params(network, trainable=True) my_updates = lasagne.updates.adam(loss, parameters, learning_rate=learn_rate) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function( inputs=[input_var, mask, rmdoublesmask, target_var], outputs=loss, updates=my_updates, on_unused_input='warn') # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, mask], test_prediction, on_unused_input='warn') #%% # READ THE DATA dev_brancharray = numpy.load(os.path.join(path, 'dev/branch_arrays.npy')) dev_mask = numpy.load(os.path.join(path, 'dev/mask.npy')).astype(numpy.int16) dev_label = numpy.load(os.path.join(path, 'dev/padlabel.npy')) dev_rmdoublemask = numpy.load(os.path.join( path, 'dev/rmdoublemask.npy')).astype(numpy.int16).flatten() with open(os.path.join(path, 'dev/ids.pkl'), 'rb') as handle: dev_ids_padarray = pickle.load(handle) test_brancharray = numpy.load(os.path.join(path, 'test/branch_arrays.npy')) test_mask = numpy.load(os.path.join(path, 'test/mask.npy')).astype(numpy.int16) test_rmdoublemask = numpy.load(os.path.join( path, 'test/rmdoublemask.npy')).astype(numpy.int16).flatten() with open(os.path.join(path, 'test/ids.pkl'), 'rb') as handle: test_ids_padarray = pickle.load(handle) #%% #start training loop # We iterate over epochs: for epoch in range(num_epochs): #print("Epoch {} ".format(epoch)) train_err = 0 # In each epoch, we do a full pass over the training data: for batch in iterate_minibatches(brancharray, train_mask, train_rmdoublemask, train_label, mb_size, max_seq_len=25, shuffle=False): inputs, mask, rmdmask, targets = batch train_err += train_fn(inputs, mask, rmdmask, targets) for batch in iterate_minibatches(dev_brancharray, dev_mask, dev_rmdoublemask, dev_label, mb_size, max_seq_len=20, shuffle=False): inputs, mask, rmdmask, targets = batch train_err += train_fn(inputs, mask, rmdmask, targets) # And a full pass over the test data: test_ypred = val_fn(test_brancharray, test_mask) # get class label instead of probabilities new_test_ypred = numpy.argmax(test_ypred, axis=1).astype(numpy.int32) #Take mask into account acv_prediction = numpy.asarray(new_test_ypred) acv_mask = test_mask.flatten() clip_dev_ids = [o for o, m in zip(test_ids_padarray, acv_mask) if m == 1] clip_dev_prediction = [ o for o, m in zip(acv_prediction, acv_mask) if m == 1 ] # remove repeating instances uniqtwid, uindices2 = numpy.unique(clip_dev_ids, return_index=True) uniq_dev_prediction = [clip_dev_prediction[i] for i in uindices2] uniq_dev_id = [clip_dev_ids[i] for i in uindices2] output = { 'status': STATUS_OK, 'Params': params, 'attachments': { 'Predictions': pickle.dumps(uniq_dev_prediction), 'ID': pickle.dumps(uniq_dev_id) } } return output
valid_gens.append(WordLMGenerator([valid_data[bb], valid_mask[bb]], glove, \ sequence_length, stride_length, buckets[bb], batch_size)) #for i in range(len(train_gens)): # train_gen = train_gens[i] # for index in range(train_gen.max_index): # # run minibatch # for trainset in train_gen.get_minibatch(index): # data, mask, label, reset # print(i, index) #================Build graph================# x = T.ftensor3('X') # (batch_size, sequence_length, 300) m = T.wmatrix('M') # (batch_size, sequence_length) y = T.imatrix('Y') # (batch_size, sequence_length) r = T.wvector('r') # (batch_size,) graph = SimpleGraph(experiment_name, batch_size) graph.add_layer(LSTMRecurrentLayer(input_shape=(300,), output_shape=(1024,), forget_bias_one=True, peephole=True, output_return_index=None, save_state_index=stride_length-1, precompute=False, unroll=False, backward=False), is_start=True) # graph.add_layer(TimeDistributedDenseLayer((1024,), (512,))) # not much time difference, and less memory graph.add_layer(DenseLayer((1024,), (512,))) graph.add_layer(TimeDistributedDenseLayerSCP((512,), (glove.vocabulary,)))