def main(): x = T.tensor3('features') m = T.matrix('features_mask') y = T.imatrix('targets') x = m.mean() + x #stupid mask not always needed... #embedding_size = 300 #glove_version = "glove.6B.300d.txt" embedding_size = 50 glove_version = "vectors.6B.50d.txt" wstd = 0.02 conv1 = Conv1D(filter_length=5, num_filters=128, input_dim=embedding_size, weights_init=IsotropicGaussian(std=wstd), biases_init=Constant(0.0)) conv1.initialize() o = conv1.apply(x) o = Rectifier(name="conv1red").apply(o) o = MaxPooling1D(pooling_length=5 #, step=2 ).apply(o) conv2 = Conv1D(filter_length=5, num_filters=128, input_dim=128, weights_init=IsotropicGaussian(std=wstd), biases_init=Constant(0.0), step=3, name="conv2") conv2.initialize() o = conv2.apply(o) o = Rectifier(name="conv2rec").apply(o) conv2 = Conv1D(filter_length=5, num_filters=128, input_dim=128, weights_init=IsotropicGaussian(std=wstd), biases_init=Constant(0.0), step=3, name="conv3") conv2.initialize() o = conv2.apply(o) o = Rectifier(name="conv3rec").apply(o) fork = Fork(weights_init=IsotropicGaussian(0.02), biases_init=Constant(0.), input_dim=128, output_dims=[128]*3, output_names=['inputs', 'reset_inputs', 'update_inputs'] ) fork.initialize() inputs, reset_inputs, update_inputs = fork.apply(o) out = o.mean(axis=1) #gru = GatedRecurrent(dim=128, #weights_init=IsotropicGaussian(0.02), #biases_init=IsotropicGaussian(0.0)) #gru.initialize() #states = gru.apply(inputs=inputs, reset_inputs=reset_inputs, update_inputs=update_inputs) #out = states[:, -1, :] hidden = Linear( input_dim = 128, output_dim = 128, weights_init = Uniform(std=0.01), biases_init = Constant(0.)) hidden.initialize() o = hidden.apply(out) o = Rectifier().apply(o) #hidden = Linear( #input_dim = 128, #output_dim = 128, #weights_init = IsotropicGaussian(std=0.02), #biases_init = Constant(0.), #name="hiddenmap2") #hidden.initialize() #o = hidden.apply(o) #o = Rectifier(name="rec2").apply(o) score_layer = Linear( input_dim = 128, output_dim = 1, weights_init = IsotropicGaussian(std=wstd), biases_init = Constant(0.), name="linear2") score_layer.initialize() o = score_layer.apply(o) probs = Sigmoid().apply(o) cost = - (y * T.log(probs) + (1-y) * T.log(1 - probs)).mean() cost.name = 'cost' misclassification = (y * (probs < 0.5) + (1-y) * (probs > 0.5)).mean() misclassification.name = 'misclassification' #print (rnn_states * m.dimshuffle(0, 1, 'x')).sum(axis=1).shape.eval( #{x : np.ones((45, 111, embedding_size), dtype=theano.config.floatX), #m : np.ones((45, 111), dtype=theano.config.floatX)}) #print (m).sum(axis=1).shape.eval({ #m : np.ones((45, 111), dtype=theano.config.floatX)}) #print (m).shape.eval({ #m : np.ones((45, 111), dtype=theano.config.floatX)}) #raw_input() # ================= cg = ComputationGraph([cost]) params = cg.parameters algorithm = GradientDescent( cost = cost, params=params, step_rule = CompositeRule([ StepClipping(threshold=10), AdaM(), #AdaDelta(), ]) ) # ======== print "setting up data" ports = { 'gpu0_train' : 5557, 'gpu0_test' : 5558, 'gpu1_train' : 5559, 'gpu1_test' : 5560, } batch_size = 16 def start_server(port, which_set): fuel.server.logger.setLevel('WARN') dataset = IMDBText(which_set) n_train = dataset.num_examples stream = DataStream( dataset=dataset, iteration_scheme=ShuffledScheme( examples=n_train, batch_size=batch_size) ) print "loading glove" glove = GloveTransformer(glove_version, data_stream=stream) padded = Padding( data_stream=glove, mask_sources=('features',) ) fuel.server.start_server(padded, port=port, hwm=20) train_port = ports[theano.config.device + '_train'] train_p = Process(target=start_server, args=(train_port, 'train')) train_p.start() test_port = ports[theano.config.device + '_test'] test_p = Process(target=start_server, args=(test_port, 'test')) test_p.start() train_stream = ServerDataStream(('features', 'features_mask', 'targets'), port=train_port) test_stream = ServerDataStream(('features', 'features_mask', 'targets'), port=test_port) print "setting up model" #import ipdb #ipdb.set_trace() n_examples = 25000 #====== model = Model(cost) extensions = [] extensions.append(EpochProgress(batch_per_epoch=n_examples // batch_size + 1)) extensions.append(TrainingDataMonitoring( [cost, misclassification], prefix='train', after_epoch=True )) extensions.append(DataStreamMonitoring( [cost, misclassification], data_stream=test_stream, prefix='test', after_epoch=True )) extensions.append(Timing()) extensions.append(Printing()) #extensions.append(Plot("norms", channels=[['train_lstm_norm', 'train_pre_norm']], after_epoch=True)) extensions.append(Plot(theano.config.device+"_result", channels=[['test_misclassification', 'train_misclassification']], after_epoch=True)) main_loop = MainLoop( model=model, data_stream=train_stream, algorithm=algorithm, extensions=extensions) main_loop.run()