Exemplo n.º 1
0
    def __init__(self, t_layer_sizes, p_layer_sizes, dropout=0):
        
        self.t_layer_sizes = t_layer_sizes
        self.p_layer_sizes = p_layer_sizes

        # From our architecture definition, size of the notewise input
        self.t_input_size = 80
        
        # time network maps from notewise input size to various hidden sizes
        self.time_model = StackedCells( self.t_input_size, celltype=LSTM, layers = t_layer_sizes)
        self.time_model.layers.append(PassthroughLayer()) #add the output layer of time model
        
        # pitch network takes last layer of time model and state of last note, moving upward
        # and eventually ends with a two-element sigmoid layer
        
        #The extra 2 input elements are
        #1. a value (0 or 1) for whether the previous (half-step lower)
        # note was chosen to be played (based on previous note-step, starts 0)
        #2. a value (0 or 1) for whether the previous (half-step lower) note was chosen to be articulated 
        #(based on previous note-step, starts 0)
        p_input_size = t_layer_sizes[-1] + 2 
        self.pitch_model = StackedCells( p_input_size, celltype=LSTM, layers = p_layer_sizes)
        self.pitch_model.layers.append(Layer(p_layer_sizes[-1], 2, activation = T.nnet.sigmoid))
        
        self.dropout = dropout

        self.conservativity = T.fscalar() #A placeholder for float number
        self.srng = T.shared_randomstreams.RandomStreams(np.random.randint(0, 1024))#an object that is used to generate random number

        self.setup_train()
        self.setup_predict()
        self.setup_slow_walk()
    def __init__(self, data_manager, t_layer_sizes, p_layer_sizes, dropout=0):
        print('{:25}'.format("Initializing Model"), end='', flush=True)
        self.t_layer_sizes = t_layer_sizes
        self.p_layer_sizes = p_layer_sizes
        self.dropout = dropout

        self.data_manager = data_manager
        self.t_input_size = self.data_manager.f.feature_count
        self.output_size = self.data_manager.s.information_count

        self.time_model = StackedCells(self.t_input_size,
                                       celltype=LSTM,
                                       layers=t_layer_sizes)
        self.time_model.layers.append(PassthroughLayer())

        p_input_size = t_layer_sizes[-1] + self.output_size
        self.pitch_model = StackedCells(p_input_size,
                                        celltype=LSTM,
                                        layers=p_layer_sizes)
        self.pitch_model.layers.append(
            Layer(p_layer_sizes[-1],
                  self.output_size,
                  activation=T.nnet.sigmoid))

        self.conservativity = T.fscalar()
        self.srng = T.shared_randomstreams.RandomStreams(
            np.random.randint(0, 1024))

        self.epsilon = np.spacing(np.float32(1.0))

        print("Done")
Exemplo n.º 3
0
    def __init__(self, t_layer_sizes, p_layer_sizes, dropout=0):
        
        self.t_layer_sizes = t_layer_sizes
        self.p_layer_sizes = p_layer_sizes

        # From our architecture definition, size of the notewise input
        self.t_input_size = 80
        
        # time network maps from notewise input size to various hidden sizes
        self.time_model = StackedCells( self.t_input_size, celltype=LSTM, layers = t_layer_sizes)
        self.time_model.layers.append(PassthroughLayer())
        
        # pitch network takes last layer of time model and state of last note, moving upward
        # and eventually ends with a two-element sigmoid layer
        p_input_size = t_layer_sizes[-1] + 2
        self.pitch_model = StackedCells( p_input_size, celltype=LSTM, layers = p_layer_sizes)
        self.pitch_model.layers.append(Layer(p_layer_sizes[-1], 2, activation = T.nnet.sigmoid))
        
        self.dropout = dropout

        self.conservativity = T.fscalar()
        self.srng = T.shared_randomstreams.RandomStreams(np.random.randint(0, 1024))

        self.setup_train()
        self.setup_predict()
        self.setup_slow_walk()
Exemplo n.º 4
0
    def __init__(self, t_layer_sizes, p_layer_sizes, dropout=0):

        self.t_layer_sizes = t_layer_sizes
        self.p_layer_sizes = p_layer_sizes

        # From our architecture definition, size of the notewise input
        self.t_input_size = 80

        # time network maps from notewise input size to various hidden sizes
        self.time_model = StackedCells( self.t_input_size, celltype=LSTM, layers = t_layer_sizes)
        self.time_model.layers.append(PassthroughLayer())

        # pitch network takes last layer of time model and state of last note, moving upward
        # and eventually ends with a two-element sigmoid layer
        p_input_size = t_layer_sizes[-1] + 2
        self.pitch_model = StackedCells( p_input_size, celltype=LSTM, layers = p_layer_sizes)
        self.pitch_model.layers.append(Layer(p_layer_sizes[-1], 2, activation = T.nnet.sigmoid))

        self.dropout = dropout

        self.conservativity = T.fscalar()
        self.srng = T.shared_randomstreams.RandomStreams(np.random.randint(0, 1024))

        print "model-setup::Trace-1"
        self.setup_train()
        print "model-setup::Trace-2"
        self.setup_predict()
        print "model-setup::Trace-3"
        self.setup_slow_walk()
Exemplo n.º 5
0
 def __init__(self,
              hidden_size,
              input_size,
              vocab_size,
              stack_size=1,
              celltype=LSTM):
     # declare model
     self.model = StackedCells(input_size,
                               celltype=celltype,
                               layers=[hidden_size] * stack_size)
     # add an embedding
     self.model.layers.insert(0, Embedding(vocab_size, input_size))
     # add a classifier:
     self.model.layers.append(
         Layer(hidden_size, vocab_size, activation=softmax))
     # inputs are matrices of indices,
     # each row is a sentence, each column a timestep
     self._stop_word = theano.shared(np.int32(999999999), name="stop word")
     self.for_how_long = T.ivector()
     self.input_mat = T.imatrix()
     self.priming_word = T.iscalar()
     self.srng = T.shared_randomstreams.RandomStreams(
         np.random.randint(0, 1024))
     # create symbolic variables for prediction:
     self.predictions = self.create_prediction()
     # create symbolic variable for greedy search:
     self.greedy_predictions = self.create_prediction(greedy=True)
     # create gradient training functions:
     self.create_cost_fun()
     self.create_training_function()
     self.create_predict_function()
     # For saving state
     self.epochs = 0
Exemplo n.º 6
0
 def __init__(self,
              hidden_size,
              input_size,
              output_size,
              stack_size=1,
              celltype=RNN,
              steps=40):
     # declare model
     self.model = StackedCells(input_size,
                               celltype=celltype,
                               layers=[hidden_size] * stack_size)
     # add a classifier:
     self.model.layers.append(
         Layer(hidden_size, output_size, activation=T.tanh))
     # inputs are matrices of indices,
     # each row is a sentence, each column a timestep
     self.steps = steps
     self.gfs = T.tensor3('gfs')  #输入gfs数据
     self.pm25in = T.tensor3('pm25in')  #pm25初始数据部分
     self.layerstatus = None
     self.results = None
     self.cnt = T.tensor3('cnt')
     # create symbolic variables for prediction:(就是做一次整个序列完整的进行预测,得到结果是prediction)
     self.predictions = self.create_prediction()
     self.create_predict_function()
     '''上面几步的意思就是先把公式写好'''
Exemplo n.º 7
0
    def __init__(self, hidden_size, input_size, stack_size=2, celltype=LSTM):
        self.input_size = input_size
        # Modelling
        self.model = StackedCells(input_size,
                                  celltype=celltype,
                                  activation=T.tanh,
                                  layers=[hidden_size] * stack_size)

        # disable modulation of the input layer
        self.model.layers[0].in_gate2.activation = lambda x: x

        # add an output layer
        self.model.layers.append(
            Layer(hidden_size, input_size, activation=softmax))

        # Setup symbolic tensor variables that will be used in computation

        # inputs are windows of spectrum data
        self.input = T.fvector("input")
        self.prev_input = T.fvector("prev_input")

        # create symbolic variables for prediction:
        self.prediction = self.create_prediction()

        # create gradient training functions:
        self.create_cost_fun()
        self.create_training_function()
        self.create_predict_function()
Exemplo n.º 8
0
class Model(object):
    """
    Simple predictive model for forecasting words from
    sequence using LSTMs. Choose how many LSTMs to stack
    what size their memory should be, and how many
    words can be predicted.
    """
    def __init__(self, hidden_size, input_size, output_size, stack_size=1, celltype=RNN,steps=40):
        # declare model
        self.model = StackedCells(input_size, celltype=celltype, layers =[hidden_size] * stack_size)
        # add a classifier:
        self.model.layers.append(Layer(hidden_size, output_size, activation = T.tanh))
        # inputs are matrices of indices,
        # each row is a sentence, each column a timestep
        self.steps=steps
        self.gfs=T.tensor3('gfs')#输入gfs数据
        self.pm25in=T.tensor3('pm25in')#pm25初始数据部分
        self.layerstatus=None
        self.results=None
        self.cnt = T.tensor3('cnt')
        # create symbolic variables for prediction:(就是做一次整个序列完整的进行预测,得到结果是prediction)
        self.predictions = self.create_prediction()
        self.create_predict_function()
        '''上面几步的意思就是先把公式写好'''
        
        
    @property
    def params(self):
        return self.model.params
        
    def create_prediction(self):#做一次predict的方法
        gfs=self.gfs
        pm25in=self.pm25in
        #初始第一次前传
        self.layerstatus=self.model.forward(T.concatenate([gfs[:,0],gfs[:,1],gfs[:,2],pm25in[:,0],pm25in[:,1],self.cnt[:,:,0]],axis=1))
        #results.shape?40*1
        self.results=self.layerstatus[-1]
        if self.steps > 1:
            self.layerstatus=self.model.forward(T.concatenate([gfs[:,1],gfs[:,2],gfs[:,3],pm25in[:,1],self.results,self.cnt[:,:,1]],axis=1),self.layerstatus)
            self.results=T.concatenate([self.results,self.layerstatus[-1]],axis=1)      
            #前传之后step-2次
            for i in xrange(2,self.steps):
                self.layerstatus=self.model.forward(T.concatenate([gfs[:,i],gfs[:,i+1],gfs[:,i+2],T.shape_padright(self.results[:,i-2]),T.shape_padright(self.results[:,i-1]),self.cnt[:,:,i]],axis=1),self.layerstatus)
                #need T.shape_padright???
                self.results=T.concatenate([self.results,self.layerstatus[-1]],axis=1)
        return self.results
                      
    def create_predict_function(self):
        self.pred_fun = theano.function(inputs=[self.gfs,self.pm25in,self.cnt],outputs =self.predictions,allow_input_downcast=True)
                                        
    def __call__(self, gfs,pm25in):
        return self.pred_fun(gfs,pm25in)
Exemplo n.º 9
0
class Model(object):
    """
    Simple predictive model for forecasting words from
    sequence using LSTMs. Choose how many LSTMs to stack
    what size their memory should be, and how many
    words can be predicted.
    """
    def __init__(self, hidden_size, input_size, output_size, stack_size=1, celltype=RNN,steps=40):
        # declare model
        self.model = StackedCells(input_size, celltype=celltype, layers =[hidden_size] * stack_size)
        # add a classifier:
        self.model.layers.append(Layer(hidden_size, output_size, activation = T.tanh))
        # inputs are matrices of indices,
        # each row is a sentence, each column a timestep
        self.steps=steps
        self.gfs=T.tensor3('gfs')#输入gfs数据
        self.pm25in=T.tensor3('pm25in')#pm25初始数据部分
        self.layerstatus=None
        self.results=None
        self.cnt = T.tensor3('cnt')
        # create symbolic variables for prediction:(就是做一次整个序列完整的进行预测,得到结果是prediction)
        self.predictions = self.create_prediction()
        self.create_predict_function()
        '''上面几步的意思就是先把公式写好'''
        
        
    @property
    def params(self):
        return self.model.params
        
    def create_prediction(self):#做一次predict的方法
        gfs=self.gfs
        pm25in=self.pm25in
        #初始第一次前传
        self.layerstatus=self.model.forward(T.concatenate([gfs[:,0],gfs[:,1],gfs[:,2],pm25in[:,0],pm25in[:,1],self.cnt[:,:,0]],axis=1))
        #results.shape?40*1
        self.results=self.layerstatus[-1]
        if self.steps > 1:
            self.layerstatus=self.model.forward(T.concatenate([gfs[:,1],gfs[:,2],gfs[:,3],pm25in[:,1],self.results,self.cnt[:,:,1]],axis=1),self.layerstatus)
            self.results=T.concatenate([self.results,self.layerstatus[-1]],axis=1)      
            #前传之后step-2次
            for i in xrange(2,self.steps):
                self.layerstatus=self.model.forward(T.concatenate([gfs[:,i],gfs[:,i+1],gfs[:,i+2],T.shape_padright(self.results[:,i-2]),T.shape_padright(self.results[:,i-1]),self.cnt[:,:,i]],axis=1),self.layerstatus)
                #need T.shape_padright???
                self.results=T.concatenate([self.results,self.layerstatus[-1]],axis=1)
        return self.results
                      
    def create_predict_function(self):
        self.pred_fun = theano.function(inputs=[self.gfs,self.pm25in,self.cnt],outputs =self.predictions,allow_input_downcast=True)
                                        
    def __call__(self, gfs,pm25in):
        return self.pred_fun(gfs,pm25in)
Exemplo n.º 10
0
    def __init__(self, time_model_layer_sizes, note_model_layer_sizes):
        self.time_model = StackedCells(input_size, celltype=LSTM, layers=time_model_layer_sizes)
        self.time_model.layers.append(Router())

        note_model_input_size = time_model_layer_sizes[-1] + outptu_size
        self.note_model = StackedCells(note_model_input_size, celltype=LSTM, layers=note_model_layer_sizes)
        self.note_model.layers.append(Layer(note_model_layer_sizes[-1], output_size, activation=T.nnet.sigmoid))

        self.time_model_layer_sizes = time_model_layer_sizes
        self.note_model_layer_sizes = note_model_layer_sizes

        self._initialize_update_function()
        self._initialize_predict_function()
Exemplo n.º 11
0
    def __init__(self,
                 input_parts,
                 layer_sizes,
                 output_size,
                 window_size=0,
                 dropout=0,
                 mode="drop",
                 unroll_batch_num=None):
        """
        Parameters:
            input_parts: A list of InputParts
            layer_sizes: A list of the form [ (indep, per_note), ... ] where
                    indep is the number of non-shifted cells to have, and
                    per_note is the number of cells to have per window note, which shift as the
                        network moves
                    Alternately can just be [ indep, ... ]
            output_size: An integer, the width of the desired output
            dropout: How much dropout to apply.
            mode: Either "drop" or "roll". If drop, discard memory that goes out of range. If roll, roll it instead
        """

        self.input_parts = input_parts
        self.window_size = window_size

        layer_sizes = [
            x if isinstance(x, tuple) else (x, 0) for x in layer_sizes
        ]
        self.layer_sizes = layer_sizes
        self.tot_layer_sizes = [(indep + per_note * self.window_size)
                                for indep, per_note in layer_sizes]

        self.output_size = output_size
        self.dropout = dropout

        self.input_size = sum(part.PART_WIDTH for part in input_parts)

        self.cells = StackedCells(self.input_size,
                                  celltype=LSTM,
                                  activation=T.tanh,
                                  layers=self.tot_layer_sizes)
        self.cells.layers.append(
            Layer(self.tot_layer_sizes[-1],
                  self.output_size,
                  activation=lambda x: x))

        assert mode in ("drop",
                        "roll"), "Must specify either drop or roll mode"
        self.mode = mode

        self.unroll_batch_num = unroll_batch_num
Exemplo n.º 12
0
class Model(object):
    """
    Simple predictive model for forecasting words from
    sequence using LSTMs. Choose how many LSTMs to stack
    what size their memory should be, and how many
    words can be predicted.
    """
    def __init__(self, hidden_size, input_size, output_size, stack_size, celltype=RNN,steps=40):
        # declare model
        self.model = StackedCells(input_size, celltype=celltype, layers =[hidden_size]*stack_size)
        # add a classifier:
        self.model.layers.append(Layer(hidden_size, output_size, activation = lambda x:x))
        # inputs are matrices of indices,
        # each row is a sentence, each column a timestep
        self.steps=steps
        self.gfs=T.tensor3('gfs')#输入gfs数据
        self.pm25in=T.tensor3('pm25in')#pm25初始数据部分
        self.layerstatus=None
        self.results=None
        # create symbolic variables for prediction:(就是做一次整个序列完整的进行预测,得到结果是prediction)
        self.predictions = self.create_prediction()
        self.create_predict_function()        
        
    @property
    def params(self):
        return self.model.params
        
    def create_prediction(self):#做一次predict的方法
        gfs=self.gfs
        pm25in=self.pm25in
        #初始第一次前传
        gfs_x=T.concatenate([gfs[:,0],gfs[:,1],gfs[:,2]],axis=1)
        pm25in_x=T.concatenate([pm25in[:,0],pm25in[:,1]],axis=1)
        self.layerstatus=self.model.forward(T.concatenate([gfs_x,pm25in_x],axis=1))
        self.results=self.layerstatus[-1]
        pm25next=pm25in[:,1]-self.results
        if self.steps > 1:
            for i in xrange(1,self.steps):
                gfs_x=T.concatenate([gfs_x[:,9:],gfs[:,i+2]],axis=1)
                pm25in_x=T.concatenate([pm25in_x[:,1:],pm25next],axis=1)
                self.layerstatus=self.model.forward(T.concatenate([gfs_x,pm25in_x],axis=1),self.layerstatus)
                self.results=T.concatenate([self.results,self.layerstatus[-1]],axis=1)
                pm25next=pm25next-self.layerstatus[-1]                
        return self.results

    def create_predict_function(self):
        self.pred_fun = theano.function(inputs=[self.gfs,self.pm25in],outputs =self.predictions,allow_input_downcast=True)
                                                              
    def __call__(self, gfs,pm25in):
        return self.pred_fun(gfs,pm25in)
    def __init__(self, input_parts, layer_sizes, output_size, window_size=0, dropout=0, mode="drop", unroll_batch_num=None):
        """
        Parameters:
            input_parts: A list of InputParts
            layer_sizes: A list of the form [ (indep, per_note), ... ] where
                    indep is the number of non-shifted cells to have, and
                    per_note is the number of cells to have per window note, which shift as the
                        network moves
                    Alternately can just be [ indep, ... ]
            output_size: An integer, the width of the desired output
            dropout: How much dropout to apply.
            mode: Either "drop" or "roll". If drop, discard memory that goes out of range. If roll, roll it instead
        """

        self.input_parts = input_parts
        self.window_size = window_size

        layer_sizes = [x if isinstance(x,tuple) else (x,0) for x in layer_sizes]
        self.layer_sizes = layer_sizes
        self.tot_layer_sizes = [(indep + per_note*self.window_size) for indep, per_note in layer_sizes]
        
        self.output_size = output_size
        self.dropout = dropout

        self.input_size = sum(part.PART_WIDTH for part in input_parts)

        self.cells = StackedCells( self.input_size, celltype=LSTM, activation=T.tanh, layers = self.tot_layer_sizes )
        self.cells.layers.append(Layer(self.tot_layer_sizes[-1], self.output_size, activation = lambda x:x))

        assert mode in ("drop", "roll"), "Must specify either drop or roll mode"
        self.mode = mode

        self.unroll_batch_num = unroll_batch_num
 def __init__(self, hidden_size, input_size, n_components, stack_size=1,
              celltype=LSTM):
     # declare model
     self.model = StackedCells(input_size, celltype=celltype,
                               layers=[hidden_size] * stack_size)
     # add an embedding
     self.model.layers.insert(0, Embedding(vocab_size, input_size))
     # add a classifier:
     self.model.layers.append(Layer(hidden_size, vocab_size,
                                    activation=linear))
     # inputs are matrices of indices,
     # each row is a sentence, each column a timestep
     self._stop_word   = theano.shared(np.int32(999999999), name="stop word")
     self.for_how_long = T.ivector()
     self.input_mat = T.imatrix()
     self.priming_word = T.iscalar()
     self.srng = T.shared_randomstreams.RandomStreams(np.random.randint(0, 1024))
     # create symbolic variables for prediction:
     self.predictions = self.create_prediction()
     # create symbolic variable for greedy search:
     self.greedy_predictions = self.create_prediction(greedy=True)
     # create gradient training functions:
     self.create_cost_fun()
     self.create_training_function()
     self.create_predict_function()
Exemplo n.º 15
0
 def __init__(self, hidden_size, input_size, output_size, stack_size, celltype=RNN,steps=40):
     # declare model
     self.model = StackedCells(input_size, celltype=celltype, layers =[hidden_size]*stack_size)
     # add a classifier:
     self.model.layers.append(Layer(hidden_size, output_size, activation = lambda x:x))
     # inputs are matrices of indices,
     # each row is a sentence, each column a timestep
     self.steps=steps
     self.gfs=T.tensor3('gfs')#输入gfs数据
     self.pm25in=T.tensor3('pm25in')#pm25初始数据部分
     self.layerstatus=None
     self.results=None
     # create symbolic variables for prediction:(就是做一次整个序列完整的进行预测,得到结果是prediction)
     self.predictions = self.create_prediction()
     self.create_predict_function()        
Exemplo n.º 16
0
	def __init__(self, hidden_size, input_size, vocab_size, entropy_reg = 0.001, key_entropy_reg = 0.001, stack_size=1, celltype=LSTM):

		# core layer in RNN/LSTM
		self.model = StackedCells(input_size, celltype=celltype, layers =[hidden_size] * stack_size)

		# add an embedding
		self.model.layers.insert(0, Embedding(vocab_size, input_size))

		# add a classifier:
		self.model.layers.append(Layer(hidden_size, vocab_size, activation = softmax))

		self.entropy_reg     = entropy_reg
		self.key_entropy_reg = key_entropy_reg

		self.turing_params = Parameters()
		#init turing machine model
		self.turing_updates , self.turing_predict = turing_model.build(self.turing_params , hidden_size , vocab_size)
		self.hidden_size = hidden_size         
		# inputs are matrices of indices,
		# each row is a sentence, each column a timestep
		self._stop_word   = theano.shared(np.int32(999999999), name="stop word")
		self.for_how_long = T.ivector()
		self.mask_matrix = T.imatrix()
		self.input_mat = T.imatrix()
		self.priming_word = T.iscalar()
		self.srng = T.shared_randomstreams.RandomStreams(np.random.randint(0, 1024))

		# create symbolic variables for prediction:
		#change by darong #issue : what is greedy
		self.lstm_predictions = self.create_lstm_prediction()
		self.final_predictions,self.entropy,self.key_entropy = self.create_final_prediction()

		# create symbolic variable for greedy search:
		self.greedy_predictions = self.create_lstm_prediction(greedy=True)

		# create gradient training functions:
		self.create_cost_fun()#create 2 cost func(lstm final)

		self.lstm_lr = 0.01
		self.turing_lr = 0.01
		self.all_lr = 0.01
		self.create_training_function()#create 3 functions(lstm turing all)
		self.create_predict_function()#create 2 predictions(lstm final)

		# create ppl
		self.lstm_ppl = self.create_lstm_ppl()
		self.final_ppl = self.create_final_ppl()
		self.create_ppl_function()
Exemplo n.º 17
0
 def __init__(self, hidden_size, input_size, output_size, stack_size=1, celltype=RNN):
     # declare model
     self.model = StackedCells(input_size, celltype=celltype, layers =[hidden_size] * stack_size)
     # add a classifier:
     self.model.layers.append(Layer(hidden_size, output_size, activation = T.tanh))
     # inputs are matrices of indices,
     # each row is a sentence, each column a timestep
     self.steps=T.iscalar()
     self.gfs=T.matrix()#输入gfs数据
     self.pm25in=T.matrix()#pm25初始数据部分
     self.pm25target=T.matrix()#输出的目标target
     self.srng = T.shared_randomstreams.RandomStreams(np.random.randint(0, 1024))
     # create symbolic variables for prediction:(就是做一次整个序列完整的进行预测,得到结果是prediction)
     self.predictions = self.create_prediction()
     # create gradient training functions:
     self.create_cost_fun()
     self.create_training_function()
     self.create_predict_function()
     '''上面几步的意思就是先把公式写好'''
 def __init__(self, hidden_size, input_size, output_size, stack_size=1, celltype=RNN,steps=40):
     # declare model
     self.model = StackedCells(input_size, celltype=celltype, layers =[hidden_size] * stack_size)
     # add a classifier:
     self.model.layers.append(Layer(hidden_size, output_size, activation = T.tanh))
     # inputs are matrices of indices,
     # each row is a sentence, each column a timestep
     self.steps=steps
     self.gfs=T.tensor3('gfs')#输入gfs数据
     self.pm25in=T.tensor3('pm25in')#pm25初始数据部分
     self.layerstatus=None
     self.results=None
     self.cnt = T.tensor3('cnt')
     # create symbolic variables for prediction:(就是做一次整个序列完整的进行预测,得到结果是prediction)
     self.predictions = self.create_prediction()
     self.create_predict_function()
     self.pm25target=T.matrix('pm25target')#输出的目标target,这一版把target维度改了
     self.create_valid_error()
     self.create_validate_function()
     '''上面几步的意思就是先把公式写好'''
Exemplo n.º 19
0
 def __init__(self, hidden_size, input_size, output_size, stack_size=1, celltype=Layer,steps=40):
     # declare model
     self.model = StackedCells(input_size, celltype=celltype, layers =[hidden_size] * stack_size)
     # add a classifier:
     self.model.layers.append(Layer(hidden_size, output_size, activation = T.tanh))
     # inputs are matrices of indices,
     # each row is a sentence, each column a timestep
     self.steps=steps
     self.stepsin=T.iscalar('stepsin')
     self.x=T.tensor3('x')#输入gfs数据
     self.target=T.tensor3('target')#输出的目标target,这一版把target维度改了
     self.layerstatus=None
     self.results=None
     # create symbolic variables for prediction:(就是做一次整个序列完整的进行预测,得到结果是prediction)
     self.predictions = self.create_prediction()
     self.predictions2 = self.create_prediction2()
     # create gradient training functions:
     self.create_cost_fun()
     self.create_valid_error()
     self.create_training_function()
     self.create_predict_function()
     self.create_validate_function()
     '''上面几步的意思就是先把公式写好'''
Exemplo n.º 20
0
 def __init__(self, hidden_size, input_size, output_size, celltype=Layer):
     # declare model
     self.model = StackedCells(input_size, celltype=celltype, layers =hidden_size)
     # add a classifier:
     self.regression=Layer(hidden_size[-1], output_size[0], activation = T.tanh)
     self.classifier=Layer(hidden_size[-1], output_size[1], activation = softmax)
     # inputs are matrices of indices,
     # each row is a sentence, each column a timestep
     self.steps=T.iscalar('steps')
     self.x=T.tensor3('x')#输入gfs数据
     self.target0=T.tensor3('target0')#输出的目标target,这一版把target维度改了
     self.target1=T.itensor3('target1')
     self.layerstatus=None
     self.results=None
     # create symbolic variables for prediction:(就是做一次整个序列完整的进行预测,得到结果是prediction)
     self.predictions0,self.predictions1 = self.create_prediction()
     # create gradient training functions:
     #self.create_cost_fun()
     #self.create_valid_error()
     #self.create_training_function()
     self.create_predict_function()
     #self.create_validate_function()
     '''上面几步的意思就是先把公式写好'''
Exemplo n.º 21
0
class Model(object):
    
    def __init__(self, t_layer_sizes, p_layer_sizes, dropout=0):
        
        self.t_layer_sizes = t_layer_sizes
        self.p_layer_sizes = p_layer_sizes

        # From our architecture definition, size of the notewise input
        self.t_input_size = 80
        
        # time network maps from notewise input size to various hidden sizes
        self.time_model = StackedCells( self.t_input_size, celltype=LSTM, layers = t_layer_sizes)
        self.time_model.layers.append(PassthroughLayer())
        
        # pitch network takes last layer of time model and state of last note, moving upward
        # and eventually ends with a two-element sigmoid layer
        p_input_size = t_layer_sizes[-1] + 2
        self.pitch_model = StackedCells( p_input_size, celltype=LSTM, layers = p_layer_sizes)
        self.pitch_model.layers.append(Layer(p_layer_sizes[-1], 2, activation = T.nnet.sigmoid))
        
        self.dropout = dropout

        self.conservativity = T.fscalar()
        self.srng = T.shared_randomstreams.RandomStreams(np.random.randint(0, 1024))

        self.setup_train()
        self.setup_predict()
        self.setup_slow_walk()

    @property
    def params(self):
        return self.time_model.params + self.pitch_model.params
    
    @params.setter
    def params(self, param_list):
        ntimeparams = len(self.time_model.params)
        self.time_model.params = param_list[:ntimeparams]
        self.pitch_model.params = param_list[ntimeparams:]

    @property
    def learned_config(self):
        return [self.time_model.params, self.pitch_model.params, [l.initial_hidden_state for mod in (self.time_model, self.pitch_model) for l in mod.layers if has_hidden(l)]]

    @learned_config.setter
    def learned_config(self, learned_list):
        self.time_model.params = learned_list[0]
        self.pitch_model.params = learned_list[1]
        for l, val in zip((l for mod in (self.time_model, self.pitch_model) for l in mod.layers if has_hidden(l)), learned_list[2]):
            l.initial_hidden_state.set_value(val.get_value())
    
    def setup_train(self):

        # dimensions: (batch, time, notes, input_data) with input_data as in architecture
        self.input_mat = T.btensor4()
        # dimensions: (batch, time, notes, onOrArtic) with 0:on, 1:artic
        self.output_mat = T.btensor4()
        
        self.epsilon = np.spacing(np.float32(1.0))

        def step_time(in_data, *other):
            other = list(other)
            split = -len(self.t_layer_sizes) if self.dropout else len(other)
            hiddens = other[:split]
            masks = [None] + other[split:] if self.dropout else []
            new_states = self.time_model.forward(in_data, prev_hiddens=hiddens, dropout=masks)
            return new_states
        
        def step_note(in_data, *other):
            other = list(other)
            split = -len(self.p_layer_sizes) if self.dropout else len(other)
            hiddens = other[:split]
            masks = [None] + other[split:] if self.dropout else []
            new_states = self.pitch_model.forward(in_data, prev_hiddens=hiddens, dropout=masks)
            return new_states
        
        # We generate an output for each input, so it doesn't make sense to use the last output as an input.
        # Note that we assume the sentinel start value is already present
        # TEMP CHANGE: NO SENTINEL
        input_slice = self.input_mat[:,0:-1]
        n_batch, n_time, n_note, n_ipn = input_slice.shape
        
        # time_inputs is a matrix (time, batch/note, input_per_note)
        time_inputs = input_slice.transpose((1,0,2,3)).reshape((n_time,n_batch*n_note,n_ipn))
        num_time_parallel = time_inputs.shape[1]
        
        # apply dropout
        if self.dropout > 0:
            time_masks = theano_lstm.MultiDropout( [(num_time_parallel, shape) for shape in self.t_layer_sizes], self.dropout)
        else:
            time_masks = []

        time_outputs_info = [initial_state_with_taps(layer, num_time_parallel) for layer in self.time_model.layers]
        time_result, _ = theano.scan(fn=step_time, sequences=[time_inputs], non_sequences=time_masks, outputs_info=time_outputs_info)
        
        self.time_thoughts = time_result
        
        # Now time_result is a list of matrix [layer](time, batch/note, hidden_states) for each layer but we only care about 
        # the hidden state of the last layer.
        # Transpose to be (note, batch/time, hidden_states)
        last_layer = get_last_layer(time_result)
        n_hidden = last_layer.shape[2]
        time_final = get_last_layer(time_result).reshape((n_time,n_batch,n_note,n_hidden)).transpose((2,1,0,3)).reshape((n_note,n_batch*n_time,n_hidden))
        
        # note_choices_inputs represents the last chosen note. Starts with [0,0], doesn't include last note.
        # In (note, batch/time, 2) format
        # Shape of start is thus (1, N, 2), concatenated with all but last element of output_mat transformed to (x, N, 2)
        start_note_values = T.alloc(np.array(0,dtype=np.int8), 1, time_final.shape[1], 2 )
        correct_choices = self.output_mat[:,1:,0:-1,:].transpose((2,0,1,3)).reshape((n_note-1,n_batch*n_time,2))
        note_choices_inputs = T.concatenate([start_note_values, correct_choices], axis=0)
        
        # Together, this and the output from the last LSTM goes to the new LSTM, but rotated, so that the batches in
        # one direction are the steps in the other, and vice versa.
        note_inputs = T.concatenate( [time_final, note_choices_inputs], axis=2 )
        num_timebatch = note_inputs.shape[1]
        
        # apply dropout
        if self.dropout > 0:
            pitch_masks = theano_lstm.MultiDropout( [(num_timebatch, shape) for shape in self.p_layer_sizes], self.dropout)
        else:
            pitch_masks = []

        note_outputs_info = [initial_state_with_taps(layer, num_timebatch) for layer in self.pitch_model.layers]
        note_result, _ = theano.scan(fn=step_note, sequences=[note_inputs], non_sequences=pitch_masks, outputs_info=note_outputs_info)
        
        self.note_thoughts = note_result
        
        # Now note_result is a list of matrix [layer](note, batch/time, onOrArticProb) for each layer but we only care about 
        # the hidden state of the last layer.
        # Transpose to be (batch, time, note, onOrArticProb)
        note_final = get_last_layer(note_result).reshape((n_note,n_batch,n_time,2)).transpose(1,2,0,3)
        
        # The cost of the entire procedure is the negative log likelihood of the events all happening.
        # For the purposes of training, if the ouputted probability is P, then the likelihood of seeing a 1 is P, and
        # the likelihood of seeing 0 is (1-P). So the likelihood is (1-P)(1-x) + Px = 2Px - P - x + 1
        # Since they are all binary decisions, and are all probabilities given all previous decisions, we can just
        # multiply the likelihoods, or, since we are logging them, add the logs.
        
        # Note that we mask out the articulations for those notes that aren't played, because it doesn't matter
        # whether or not those are articulated.
        # The padright is there because self.output_mat[:,:,:,0] -> 3D matrix with (b,x,y), but we need 3d tensor with 
        # (b,x,y,1) instead
        active_notes = T.shape_padright(self.output_mat[:,1:,:,0])
        mask = T.concatenate([T.ones_like(active_notes),active_notes], axis=3)
        
        loglikelihoods = mask * T.log( 2*note_final*self.output_mat[:,1:] - note_final - self.output_mat[:,1:] + 1 + self.epsilon )
        self.cost = T.neg(T.sum(loglikelihoods))
        
        updates, _, _, _, _ = create_optimization_updates(self.cost, self.params, method="adadelta")
        self.update_fun = theano.function(
            inputs=[self.input_mat, self.output_mat],
            outputs=self.cost,
            updates=updates,
            allow_input_downcast=True)

        self.update_thought_fun = theano.function(
            inputs=[self.input_mat, self.output_mat],
            outputs= ensure_list(self.time_thoughts) + ensure_list(self.note_thoughts) + [self.cost],
            allow_input_downcast=True)
    
    def _predict_step_note(self, in_data_from_time, *states):
        # States is [ *hiddens, last_note_choice ]
        hiddens = list(states[:-1])
        in_data_from_prev = states[-1]
        in_data = T.concatenate([in_data_from_time, in_data_from_prev])

        # correct for dropout
        if self.dropout > 0:
            masks = [1 - self.dropout for layer in self.pitch_model.layers]
            masks[0] = None
        else:
            masks = []

        new_states = self.pitch_model.forward(in_data, prev_hiddens=hiddens, dropout=masks)
        
        # Now new_states is a per-layer set of activations.
        probabilities = get_last_layer(new_states)
        
        # Thus, probabilities is a vector of two probabilities, P(play), and P(artic | play)
        
        shouldPlay = T.cast(self.srng.uniform() < (probabilities[0] ** self.conservativity), 'int16')
        shouldArtic = T.cast(shouldPlay * (self.srng.uniform() < probabilities[1]), 'int16')
        
        chosen = T.stack(shouldPlay, shouldArtic)
#        print chosen
        
        return ensure_list(new_states) + [chosen]
#        l = map(lambda x: T.cast(x, 'float32'), ensure_list(new_states) + [chosen])
#        print l
#        return l


    def setup_predict(self):
        # In prediction mode, note steps are contained in the time steps. So the passing gets a little bit hairy.

        self.predict_seed = T.bmatrix()
        self.steps_to_simulate = T.iscalar()

        def step_time(*states):
            # States is [ *hiddens, prev_result, time]
            hiddens = list(states[:-2])
            in_data = states[-2]
            time = states[-1]

            # correct for dropout
            if self.dropout > 0:
                masks = [1 - self.dropout for layer in self.time_model.layers]
                masks[0] = None
            else:
                masks = []

            new_states = self.time_model.forward(in_data, prev_hiddens=hiddens, dropout=masks)
            
            # Now new_states is a list of matrix [layer](notes, hidden_states) for each layer
            time_final = get_last_layer(new_states)
            
            start_note_values = theano.tensor.alloc(np.array(0,dtype=np.int8), 2)
            
            # This gets a little bit complicated. In the training case, we can pass in a combination of the
            # time net's activations with the known choices. But in the prediction case, those choices don't
            # exist yet. So instead of iterating over the combination, we iterate over only the activations,
            # and then combine in the previous outputs in the step. And then since we are passing outputs to
            # previous inputs, we need an additional outputs_info for the initial "previous" output of zero.
            note_outputs_info = ([ initial_state_with_taps(layer) for layer in self.pitch_model.layers ] +
                                 [ dict(initial=start_note_values, taps=[-1.0]) ])

            #print note_outputs_info
            #print LSTM #.initial_hidden_state
            notes_result, updates = theano.scan(fn=self._predict_step_note, sequences=[time_final], outputs_info=note_outputs_info)
            
            # Now notes_result is a list of matrix [layer/output](notes, onOrArtic)
            output = get_last_layer(notes_result)
            
            next_input = OutputFormToInputFormOp()(output, time + 1) # TODO: Fix time
            #next_input = T.cast(T.alloc(0, 3, 4),'int64')
            
            return (ensure_list(new_states) + [ next_input, time + 1, output ]), updates
        
        # start_sentinel = startSentinel()
        num_notes = self.predict_seed.shape[0]
        
        time_outputs_info = ([ initial_state_with_taps(layer, num_notes) for layer in self.time_model.layers ] +
                             [ dict(initial=self.predict_seed, taps=[-1]),
                               dict(initial=0, taps=[-1]),
                               None ])
            
        time_result, updates = theano.scan( fn=step_time, 
                                            outputs_info=time_outputs_info, 
                                            n_steps=self.steps_to_simulate )
        
        self.predict_thoughts = time_result
        
        self.predicted_output = time_result[-1]
        
        self.predict_fun = theano.function(
            inputs=[self.steps_to_simulate, self.conservativity, self.predict_seed],
            outputs=self.predicted_output,
            updates=updates,
            allow_input_downcast=True)

        self.predict_thought_fun = theano.function(
            inputs=[self.steps_to_simulate, self.conservativity, self.predict_seed],
            outputs=ensure_list(self.predict_thoughts),
            updates=updates,
            allow_input_downcast=True)

    def setup_slow_walk(self):

        self.walk_input = theano.shared(np.ones((2,2), dtype='int8'))
        self.walk_time = theano.shared(np.array(0, dtype='int64'))
        self.walk_hiddens = [theano.shared(np.ones((2,2), dtype=theano.config.floatX)) for layer in self.time_model.layers if has_hidden(layer)]
        
        # correct for dropout
        if self.dropout > 0:
            masks = [1 - self.dropout for layer in self.time_model.layers]
            masks[0] = None
        else:
            masks = []

        new_states = self.time_model.forward(self.walk_input, prev_hiddens=self.walk_hiddens, dropout=masks)

        # Now new_states is a list of matrix [layer](notes, hidden_states) for each layer
        time_final = get_last_layer(new_states)
        
        start_note_values = theano.tensor.alloc(np.array(0,dtype=np.int8), 2)
        note_outputs_info = ([ initial_state_with_taps(layer) for layer in self.pitch_model.layers ] +
                             [ dict(initial=start_note_values, taps=[-1]) ])
        
        notes_result, updates = theano.scan(fn=self._predict_step_note, sequences=[time_final], outputs_info=note_outputs_info)
        
        # Now notes_result is a list of matrix [layer/output](notes, onOrArtic)
        output = get_last_layer(notes_result)
        
        next_input = OutputFormToInputFormOp()(output, self.walk_time + 1) # TODO: Fix time
        #next_input = T.cast(T.alloc(0, 3, 4),'int64')

        slow_walk_results = (new_states[:-1] + notes_result[:-1] + [ next_input, output ])

        updates.update({
                self.walk_time: self.walk_time+1,
                self.walk_input: next_input
            })

        updates.update({hidden:newstate for hidden, newstate, layer in zip(self.walk_hiddens, new_states, self.time_model.layers) if has_hidden(layer)})

        self.slow_walk_fun = theano.function(
            inputs=[self.conservativity],
            outputs=slow_walk_results,
            updates=updates,
            allow_input_downcast=True)

    def start_slow_walk(self, seed):
        seed = np.array(seed)
        num_notes = seed.shape[0]

        self.walk_time.set_value(0)
        self.walk_input.set_value(seed)
        for layer, hidden in zip((l for l in self.time_model.layers if has_hidden(l)),self.walk_hiddens):
            hidden.set_value(np.repeat(np.reshape(layer.initial_hidden_state.get_value(), (1,-1)), num_notes, axis=0))
Exemplo n.º 22
0
class RelativeShiftLSTMStack(object):
    """
    Manages a stack of LSTM cells with potentially a relative shift applied
    """
    def __init__(self,
                 input_parts,
                 layer_sizes,
                 output_size,
                 window_size=0,
                 dropout=0,
                 mode="drop",
                 unroll_batch_num=None):
        """
        Parameters:
            input_parts: A list of InputParts
            layer_sizes: A list of the form [ (indep, per_note), ... ] where
                    indep is the number of non-shifted cells to have, and
                    per_note is the number of cells to have per window note, which shift as the
                        network moves
                    Alternately can just be [ indep, ... ]
            output_size: An integer, the width of the desired output
            dropout: How much dropout to apply.
            mode: Either "drop" or "roll". If drop, discard memory that goes out of range. If roll, roll it instead
        """

        self.input_parts = input_parts
        self.window_size = window_size

        layer_sizes = [
            x if isinstance(x, tuple) else (x, 0) for x in layer_sizes
        ]
        self.layer_sizes = layer_sizes
        self.tot_layer_sizes = [(indep + per_note * self.window_size)
                                for indep, per_note in layer_sizes]

        self.output_size = output_size
        self.dropout = dropout

        self.input_size = sum(part.PART_WIDTH for part in input_parts)

        self.cells = StackedCells(self.input_size,
                                  celltype=LSTM,
                                  activation=T.tanh,
                                  layers=self.tot_layer_sizes)
        self.cells.layers.append(
            Layer(self.tot_layer_sizes[-1],
                  self.output_size,
                  activation=lambda x: x))

        assert mode in ("drop",
                        "roll"), "Must specify either drop or roll mode"
        self.mode = mode

        self.unroll_batch_num = unroll_batch_num

    @property
    def params(self):
        return self.cells.params + list(
            l.initial_hidden_state for l in self.cells.layers if has_hidden(l))

    @params.setter
    def params(self, paramlist):
        self.cells.params = paramlist[:len(self.cells.params)]
        for l, val in zip((l for l in self.cells.layers if has_hidden(l)),
                          paramlist[len(self.cells.params):]):
            l.initial_hidden_state.set_value(val.get_value())

    def perform_step(self, in_data, shifts, hiddens, dropout_masks=[]):
        """
        Perform a step through the LSTM network.

        in_data: A theano tensor (float32) of shape (batch, input_size)
        shifts: A theano tensor (int32) of shape (batch), giving the relative
            shifts to apply to the last hiddens
        hiddens: A list of hiddens [layer](batch, hidden_idx)
        dropout_masks: If [], apply dropout deterministically. Otherwise, should
            be a set of masks returned by get_dropout_masks, generally passed through
            a scan as a non-sequence.
        """

        # hiddens is of shape [layer](batch, hidden_idx)
        # We want to permute the hidden_idx values according to shifts,
        # which are ints of shape (batch)

        n_batch = in_data.shape[0]
        new_hiddens = []
        for layer_i, (indep, per_note) in enumerate(self.layer_sizes):
            if per_note == 0:
                # Don't bother with this layer
                new_hiddens.append(hiddens[layer_i])
                continue
            # The theano_lstm code puts [memory_cells... , old_activations...]
            # We want to slide the memory cells only.
            lstm_hsplit = self.cells.layers[layer_i].hidden_size
            indep_mem = hiddens[layer_i][:, :indep]
            per_note_mem = hiddens[layer_i][:, indep:lstm_hsplit]
            remaining_values = hiddens[layer_i][:, lstm_hsplit:]
            # per_note_mem is (batch, per_note_mem)
            separated_mem = per_note_mem.reshape(
                (n_batch, self.window_size, per_note))

            # separated_mem is (batch, note, mem)
            # [a b c ... x y z] shifted up 1   (+1) goes to  [b c ... x y z 0]
            # [a b c ... x y z] shifted down 1 (-1) goes to [0 a b c ... x y]
            def _shift_step(c_mem, c_shift):
                # c_mem is (note, mem)
                # c_shift is an int
                if self.mode == "drop":

                    def _clamp_w(x):
                        return T.maximum(0, T.minimum(x, self.window_size))

                    ins_at_front = T.zeros((_clamp_w(-c_shift), per_note))
                    ins_at_back = T.zeros((_clamp_w(c_shift), per_note))
                    take_part = c_mem[_clamp_w(c_shift):self.window_size -
                                      _clamp_w(-c_shift), :]
                    return T.concatenate(
                        [ins_at_front, take_part, ins_at_back], 0)
                elif self.mode == "roll":
                    return T.roll(c_mem, (-c_shift) % 12, axis=0)

            if self.unroll_batch_num is None:
                shifted_mem, _ = theano.map(_shift_step,
                                            [separated_mem, shifts])
            else:
                shifted_mem_parts = []
                for i in range(self.unroll_batch_num):
                    shifted_mem_parts.append(
                        _shift_step(separated_mem[i], shifts[i]))
                shifted_mem = T.stack(shifted_mem_parts)

            new_per_note_mem = shifted_mem.reshape(
                (n_batch, self.window_size * per_note))
            new_layer_hiddens = T.concatenate(
                [indep_mem, new_per_note_mem, remaining_values], 1)
            new_hiddens.append(new_layer_hiddens)

        if dropout_masks == [] or not self.dropout:
            masks = []
        else:
            masks = [None] + dropout_masks
        new_states = self.cells.forward(in_data,
                                        prev_hiddens=new_hiddens,
                                        dropout=masks)
        return new_states

    def do_preprocess_scan(self, deterministic_dropout=False, **kwargs):
        """
        Run a scan using this LSTM, preprocessing all inputs before the scan.

        Parameters:
            kwargs[k]: should be a theano tensor of shape (n_batch, n_time, ... )
                Note that "relative_position" should be a keyword argument given here if there are relative
                shifts.
            deterministic_dropout: If True, apply dropout deterministically, scaling everything. If false,
                sample dropout

        Returns:
            A theano tensor of shape (n_batch, n_time, output_size) of activations
        """

        assert len(kwargs) > 0, "Need at least one input argument!"
        n_batch, n_time = list(kwargs.values())[0].shape[:2]

        squashed_kwargs = {
            k: v.reshape([n_batch * n_time] + [x for x in v.shape[2:]])
            for k, v in kwargs.items()
        }

        full_input = T.concatenate(
            [part.generate(**squashed_kwargs) for part in self.input_parts], 1)
        adjusted_input = full_input.reshape([n_batch, n_time,
                                             self.input_size]).dimshuffle(
                                                 (1, 0, 2))

        if "relative_position" in kwargs:
            relative_position = kwargs["relative_position"]
            diff_shifts = T.extra_ops.diff(relative_position, axis=1)
            cat_shifts = T.concatenate(
                [T.zeros((n_batch, 1), 'int32'), diff_shifts], 1)
            shifts = cat_shifts.dimshuffle((1, 0))
        else:
            shifts = T.zeros(n_time, n_batch, 'int32')

        def _scan_fn(in_data, shifts, *other):
            other = list(other)
            if self.dropout and not deterministic_dropout:
                split = -len(self.tot_layer_sizes)
                hiddens = other[:split]
                masks = [None] + other[split:]
            else:
                masks = []
                hiddens = other

            return self.perform_step(in_data,
                                     shifts,
                                     hiddens,
                                     dropout_masks=masks)

        if self.dropout and not deterministic_dropout:
            dropout_masks = UpscaleMultiDropout(
                [(n_batch, shape) for shape in self.tot_layer_sizes],
                self.dropout)
        else:
            dropout_masks = []

        outputs_info = [
            initial_state_with_taps(layer, n_batch)
            for layer in self.cells.layers
        ]
        result, _ = theano.scan(fn=_scan_fn,
                                sequences=[adjusted_input, shifts],
                                non_sequences=dropout_masks,
                                outputs_info=outputs_info)

        final_out = get_last_layer(result).transpose((1, 0, 2))

        return final_out

    def prepare_sample_scan(self,
                            start_pos,
                            start_out,
                            deterministic_dropout=False,
                            **kwargs):
        """
        Prepare a sample scan

        Parameters:
            kwargs[k]: should be a theano tensor of shape (n_batch, n_time, ... )
                Note that "relative_position" should be a keyword argument given here if there are relative
                shifts, as should "timestep"
            start_pos: a theano tensor of shape (n_batch) giving the initial position passed to the
                out_to_in function
            start_out: a theano tensor of shape (n_batch, X) giving the initial "output" passed
                to the out_to_in_fn
            deterministic_dropout: If True, apply dropout deterministically, scaling everything. If false,
                sample dropout

        Returns:
            A namedtuple, where
                sequences: a list of sequences to input into scan
                non_sequences: a list of non_sequences into scan
                outputs_info: a list of outputs_info for scan
                num_taps: the number of outputs with taps for this 
                (other values): for internal use
        """
        assert len(kwargs) > 0, "Need at least one input argument!"
        n_batch, n_time = list(kwargs.values())[0].shape[:2]

        transp_kwargs = {
            k: v.dimshuffle((1, 0) + tuple(range(2, v.ndim)))
            for k, v in kwargs.items()
        }

        if self.dropout and not deterministic_dropout:
            dropout_masks = UpscaleMultiDropout(
                [(n_batch, shape) for shape in self.tot_layer_sizes],
                self.dropout)
        else:
            dropout_masks = []

        outputs_info = [{
            "initial": start_pos,
            "taps": [-1]
        }, {
            "initial": start_out,
            "taps": [-1]
        }] + [
            initial_state_with_taps(layer, n_batch)
            for layer in self.cells.layers
        ]
        sequences = list(transp_kwargs.values())
        non_sequences = dropout_masks
        num_taps = len([True for x in outputs_info if x is not None])
        return SampleScanSpec(sequences=sequences,
                              non_sequences=non_sequences,
                              outputs_info=outputs_info,
                              num_taps=num_taps,
                              kwargs_keys=list(transp_kwargs.keys()),
                              deterministic_dropout=deterministic_dropout,
                              start_pos=start_pos)

    def sample_scan_routine(self, spec, *inputs):
        """
        Start a scan routine. This is implemented as a generator, since we may need to interrupt the state in the
        middle of iteration. How to use:

        scan_rout = x.sample_scan_routine(spec, *inputs)
                - spec: The SampleScanSpec returned by prepare_sample_scan
                - *inputs: The scan inputs, in [ sequences..., taps..., non_sequences... ] order

        last_rel_pos, last_out, cur_kwargs = scan_rout.send(None)
                - last_rel_pos is a theano tensor of shape (n_batch)
                - last_out will be a theano tensor of shape (n_batch, output_size)
                - cur_kwargs[k] is a theano tensor of shape (n_batch, ...), from kwargs

        out_activations = scan_rout.send((new_pos, addtl_kwargs))
                - new_pos is a theano tensor of shape (n_batch), giving the new relative position
                - addtl_kwargs[k] is a theano tensor of shape (n_batch, ...) to be added to cur kwargs
                    Note that "relative_position" will be added automatically.

        scan_outputs = scan_rout.send(new_out)
                - new_out is a tensor of shape (n_batch, X) to be output

        scan_rout.close()

        -> scan_outputs should be returned back to scan
        """
        stuff = list(inputs)
        I = len(spec.kwargs_keys)
        kwarg_seq_vals = stuff[:I]
        cur_kwargs = {k: v for k, v in zip(spec.kwargs_keys, kwarg_seq_vals)}
        last_pos, last_out = stuff[I:I + 2]
        other = stuff[I + 2:]

        if self.dropout and not spec.deterministic_dropout:
            split = -len(self.tot_layer_sizes)
            hiddens = other[:split]
            masks = [None] + other[split:]
        else:
            masks = []
            hiddens = other

        cur_pos, addtl_kwargs = yield (last_pos, last_out, cur_kwargs)
        all_kwargs = {"relative_position": cur_pos}
        all_kwargs.update(cur_kwargs)
        all_kwargs.update(addtl_kwargs)

        shift = T.switch(T.eq(all_kwargs["timestep"], 0), 0,
                         cur_pos - last_pos)

        full_input = T.concatenate(
            [part.generate(**all_kwargs) for part in self.input_parts], 1)

        step_stuff = self.perform_step(full_input,
                                       shift,
                                       hiddens,
                                       dropout_masks=masks)
        new_hiddens = step_stuff[:-1]
        raw_output = step_stuff[-1]
        sampled_output = yield (raw_output)

        yield [cur_pos, sampled_output] + step_stuff

    def extract_sample_scan_results(self, spec, outputs):
        """
        Extract outputs from the scan results. 

        Parameters:
            outputs: The outputs from the scan associated with this stack

        Returns:
            positions, raw_output, sampled_output
        """
        positions = T.concatenate([
            T.shape_padright(spec.start_pos), outputs[0].transpose(
                (1, 0))[:, :-1]
        ], 1)
        sampled_output = outputs[2].transpose((1, 0, 2))
        raw_output = outputs[-1].transpose((1, 0, 2))

        return positions, raw_output, sampled_output

    def do_sample_scan(self,
                       start_pos,
                       start_out,
                       sample_fn,
                       out_to_in_fn,
                       deterministic_dropout=True,
                       **kwargs):
        """
        Run a scan using this LSTM, sampling and processing as we go.

        Parameters:
            kwargs[k]: should be a theano tensor of shape (n_batch, n_time, ... )
                Note that "relative_position" should be a keyword argument given here if there are relative
                shifts.
            start_pos: a theano tensor of shape (n_batch) giving the initial position passed to the
                out_to_in function
            start_out: a theano tensor of shape (n_batch, X) giving the initial "output" passed
                to the out_to_in_fn
            sample_fn: a function with signature
                    sample_fn(out_activations, rel_pos) -> new_out, new_rel_pos
                where
                    - rel_pos is a theano tensor of shape (n_batch)
                    - out_activations is a tensor of shape (n_batch, output_size)
                and
                    - new_out is a tensor of shape (n_batch, X) to be output
                    - new_rel_pos should be a theano tensor of shape (n_batch)
            out_to_in_fn: a function with signature
                    out_to_in_fn(rel_pos, last_out, **cur_kwargs) -> addtl_kwargs
                where 
                    - rel_pos is a theano tensor of shape (n_batch)
                    - last_out will be a theano tensor of shape (n_batch, output_size)
                    - cur_kwargs[k] is a theano tensor of shape (n_batch, ...), from kwargs
                and
                    - addtl_kwargs[k] is a theano tensor of shape (n_batch, ...) to be added to cur kwargs
                        Note that "relative_position" will be added automatically.
            deterministic_dropout: If True, apply dropout deterministically, scaling everything. If false,
                sample dropout

        Returns: positions, raw_output, sampled_output, updates
        """
        raise NotImplementedError()
        spec = self.prepare_sample_scan(start_pos, start_out, sample_fn,
                                        deterministic_dropout, **kwargs)

        def _scan_fn(*stuff):
            scan_rout = self.sample_scan_routine(spec, *stuff)
            rel_pos, last_out, cur_kwargs = scan_rout.send(None)
            addtl_kwargs = out_to_in_fn(rel_pos, last_out, **cur_kwargs)
            out_activations = scan_rout.send(addtl_kwargs)
            sampled_output, new_pos = sample_fn(out_activations, rel_pos)
            scan_outputs = scan_rout.send((sampled_output, new_pos))
            scan_rout.close()
            return scan_outputs

        result, updates = theano.scan(fn=_scan_fn,
                                      sequences=spec.sequences,
                                      non_sequences=spec.non_sequences,
                                      outputs_info=spec.outputs_info)
        positions, raw_output, sampled_output = self.extract_sample_scan_results(
            spec, result)
        return positions, raw_output, sampled_output, updates
Exemplo n.º 23
0
class Model(object):

    def __init__(self, t_layer_sizes, p_layer_sizes, dropout=0):

        self.t_layer_sizes = t_layer_sizes
        self.p_layer_sizes = p_layer_sizes

        # From our architecture definition, size of the notewise input
        self.t_input_size = 80

        # time network maps from notewise input size to various hidden sizes
        self.time_model = StackedCells( self.t_input_size, celltype=LSTM, layers = t_layer_sizes)
        self.time_model.layers.append(PassthroughLayer())

        # pitch network takes last layer of time model and state of last note, moving upward
        # and eventually ends with a two-element sigmoid layer
        p_input_size = t_layer_sizes[-1] + 2
        self.pitch_model = StackedCells( p_input_size, celltype=LSTM, layers = p_layer_sizes)
        self.pitch_model.layers.append(Layer(p_layer_sizes[-1], 2, activation = T.nnet.sigmoid))

        self.dropout = dropout

        self.conservativity = T.fscalar()
        self.srng = T.shared_randomstreams.RandomStreams(np.random.randint(0, 1024))

        print "model-setup::Trace-1"
        self.setup_train()
        print "model-setup::Trace-2"
        self.setup_predict()
        print "model-setup::Trace-3"
        self.setup_slow_walk()


    @property
    def params(self):
        return self.time_model.params + self.pitch_model.params

    @params.setter
    def params(self, param_list):
        ntimeparams = len(self.time_model.params)
        self.time_model.params = param_list[:ntimeparams]
        self.pitch_model.params = param_list[ntimeparams:]

    @property
    def learned_config(self):
        return [self.time_model.params, self.pitch_model.params, [l.initial_hidden_state for mod in (self.time_model, self.pitch_model) for l in mod.layers if has_hidden(l)]]

    @learned_config.setter
    def learned_config(self, learned_list):
        self.time_model.params = learned_list[0]
        self.pitch_model.params = learned_list[1]
        for l, val in zip((l for mod in (self.time_model, self.pitch_model) for l in mod.layers if has_hidden(l)), learned_list[2]):
            l.initial_hidden_state.set_value(val.get_value())

    def setup_train(self):

        # dimensions: (batch, time, notes, input_data) with input_data as in architecture
        self.input_mat = T.btensor4()
        # dimensions: (batch, time, notes, onOrArtic) with 0:on, 1:artic
        self.output_mat = T.btensor4()

        self.epsilon = np.spacing(np.float32(1.0))

        print "model-setup-train::Trace-1"


        def step_time(in_data, *other):
            other = list(other)
            split = -len(self.t_layer_sizes) if self.dropout else len(other)
            hiddens = other[:split]
            masks = [None] + other[split:] if self.dropout else []
            new_states = self.time_model.forward(in_data, prev_hiddens=hiddens, dropout=masks)
            return new_states

        def step_note(in_data, *other):
            other = list(other)
            split = -len(self.p_layer_sizes) if self.dropout else len(other)
            hiddens = other[:split]
            masks = [None] + other[split:] if self.dropout else []
            new_states = self.pitch_model.forward(in_data, prev_hiddens=hiddens, dropout=masks)
            return new_states

        # We generate an output for each input, so it doesn't make sense to use the last output as an input.
        # Note that we assume the sentinel start value is already present
        # TEMP CHANGE: NO SENTINEL

        print "model-setup-train::Trace-2"

        input_slice = self.input_mat[:,0:-1]
        n_batch, n_time, n_note, n_ipn = input_slice.shape

        # time_inputs is a matrix (time, batch/note, input_per_note)
        time_inputs = input_slice.transpose((1,0,2,3)).reshape((n_time,n_batch*n_note,n_ipn))
        num_time_parallel = time_inputs.shape[1]

        # apply dropout
        if self.dropout > 0:
            time_masks = MultiDropout( [(num_time_parallel, shape) for shape in self.t_layer_sizes], self.dropout)
        else:
            time_masks = []

        print "model-setup-train::Trace-3"

        time_outputs_info = [initial_state_with_taps(layer, num_time_parallel) for layer in self.time_model.layers]
        time_result, _ = theano.scan(fn=step_time, sequences=[time_inputs], non_sequences=time_masks, outputs_info=time_outputs_info)

        print "model-setup-train::Trace-4"


        self.time_thoughts = time_result

        # Now time_result is a list of matrix [layer](time, batch/note, hidden_states) for each layer but we only care about
        # the hidden state of the last layer.
        # Transpose to be (note, batch/time, hidden_states)
        last_layer = get_last_layer(time_result)
        n_hidden = last_layer.shape[2]
        time_final = get_last_layer(time_result).reshape((n_time,n_batch,n_note,n_hidden)).transpose((2,1,0,3)).reshape((n_note,n_batch*n_time,n_hidden))

        # note_choices_inputs represents the last chosen note. Starts with [0,0], doesn't include last note.
        # In (note, batch/time, 2) format
        # Shape of start is thus (1, N, 2), concatenated with all but last element of output_mat transformed to (x, N, 2)
        start_note_values = T.alloc(0, 1, time_final.shape[1], 2 )
        correct_choices = self.output_mat[:,1:,0:-1,:].transpose((2,0,1,3)).reshape((n_note-1,n_batch*n_time,2))
        note_choices_inputs = T.concatenate([start_note_values, correct_choices], axis=0)

        print "model-setup-train::Trace-5"


        # Together, this and the output from the last LSTM goes to the new LSTM, but rotated, so that the batches in
        # one direction are the steps in the other, and vice versa.
        note_inputs = T.concatenate( [time_final, note_choices_inputs], axis=2 )
        num_timebatch = note_inputs.shape[1]

        # apply dropout
        if self.dropout > 0:
            pitch_masks = MultiDropout( [(num_timebatch, shape) for shape in self.p_layer_sizes], self.dropout)
        else:
            pitch_masks = []

        print "model-setup-train::Trace-6"


        note_outputs_info = [initial_state_with_taps(layer, num_timebatch) for layer in self.pitch_model.layers]
        note_result, _ = theano.scan(fn=step_note, sequences=[note_inputs], non_sequences=pitch_masks, outputs_info=note_outputs_info)

        self.note_thoughts = note_result

        # Now note_result is a list of matrix [layer](note, batch/time, onOrArticProb) for each layer but we only care about
        # the hidden state of the last layer.
        # Transpose to be (batch, time, note, onOrArticProb)
        note_final = get_last_layer(note_result).reshape((n_note,n_batch,n_time,2)).transpose(1,2,0,3)

        print "model-setup-train::Trace-7"


        # The cost of the entire procedure is the negative log likelihood of the events all happening.
        # For the purposes of training, if the ouputted probability is P, then the likelihood of seeing a 1 is P, and
        # the likelihood of seeing 0 is (1-P). So the likelihood is (1-P)(1-x) + Px = 2Px - P - x + 1
        # Since they are all binary decisions, and are all probabilities given all previous decisions, we can just
        # multiply the likelihoods, or, since we are logging them, add the logs.

        # Note that we mask out the articulations for those notes that aren't played, because it doesn't matter
        # whether or not those are articulated.
        # The padright is there because self.output_mat[:,:,:,0] -> 3D matrix with (b,x,y), but we need 3d tensor with
        # (b,x,y,1) instead
        active_notes = T.shape_padright(self.output_mat[:,1:,:,0])
        mask = T.concatenate([T.ones_like(active_notes),active_notes], axis=3)

        loglikelihoods = mask * T.log( 2*note_final*self.output_mat[:,1:] - note_final - self.output_mat[:,1:] + 1 + self.epsilon )

        print "model-setup-train::Trace-8"

        self.cost = T.neg(T.sum(loglikelihoods))

        print "model-setup-train::Trace-9"

        updates, _, _, _, _ = create_optimization_updates(self.cost, self.params, method="adadelta")

        print "model-setup-train::Trace-10"

        self.update_fun = theano.function(
            inputs=[self.input_mat, self.output_mat],
            outputs=self.cost,
            updates=updates,
            allow_input_downcast=True)


        self.update_thought_fun = theano.function(
            inputs=[self.input_mat, self.output_mat],
            outputs= ensure_list(self.time_thoughts) + ensure_list(self.note_thoughts) + [self.cost],
            allow_input_downcast=True)



    def _predict_step_note(self, in_data_from_time, *states):
        # States is [ *hiddens, last_note_choice ]
        hiddens = list(states[:-1])
        in_data_from_prev = states[-1]
        in_data = T.concatenate([in_data_from_time, in_data_from_prev])

        # correct for dropout
        if self.dropout > 0:
            masks = [1 - self.dropout for layer in self.pitch_model.layers]
            masks[0] = None
        else:
            masks = []

        new_states = self.pitch_model.forward(in_data, prev_hiddens=hiddens, dropout=masks)

        # Now new_states is a per-layer set of activations.
        probabilities = get_last_layer(new_states)

        # Thus, probabilities is a vector of two probabilities, P(play), and P(artic | play)

        shouldPlay = self.srng.normal() < (probabilities[0] ** self.conservativity)
        shouldArtic = shouldPlay * (self.srng.uniform() < probabilities[1])

        chosen = T.stack(shouldPlay, shouldArtic)

        return ensure_list(new_states) + [chosen]

    def setup_predict(self):
        # In prediction mode, note steps are contained in the time steps. So the passing gets a little bit hairy.

        self.predict_seed = T.bmatrix()
        self.steps_to_simulate = T.iscalar()

        def step_time(*states):
            # States is [ *hiddens, prev_result, time]
            hiddens = list(states[:-2])
            in_data = states[-2]
            time = states[-1]

            # correct for dropout
            if self.dropout > 0:
                masks = [1 - self.dropout for layer in self.time_model.layers]
                masks[0] = None
            else:
                masks = []

            new_states = self.time_model.forward(in_data, prev_hiddens=hiddens, dropout=masks)

            # Now new_states is a list of matrix [layer](notes, hidden_states) for each layer
            time_final = get_last_layer(new_states)

            start_note_values = theano.tensor.alloc(0, 2)

            # This gets a little bit complicated. In the training case, we can pass in a combination of the
            # time net's activations with the known choices. But in the prediction case, those choices don't
            # exist yet. So instead of iterating over the combination, we iterate over only the activations,
            # and then combine in the previous outputs in the step. And then since we are passing outputs to
            # previous inputs, we need an additional outputs_info for the initial "previous" output of zero.
            note_outputs_info = ([ initial_state_with_taps(layer) for layer in self.pitch_model.layers ] +
                                 [ dict(initial=start_note_values, taps=[-1]) ])

            notes_result, updates = theano.scan(fn=self._predict_step_note, sequences=[time_final], outputs_info=note_outputs_info)

            # Now notes_result is a list of matrix [layer/output](notes, onOrArtic)
            output = get_last_layer(notes_result)

            next_input = OutputFormToInputFormOp()(output, time + 1) # TODO: Fix time
            #next_input = T.cast(T.alloc(0, 3, 4),'int64')

            return (ensure_list(new_states) + [ next_input, time + 1, output ]), updates

        num_notes = self.predict_seed.shape[0]

        time_outputs_info = ([ initial_state_with_taps(layer, num_notes) for layer in self.time_model.layers ] +
                             [ dict(initial=self.predict_seed, taps=[-1]),
                               dict(initial=0, taps=[-1]),
                               None ])

        time_result, updates = theano.scan( fn=step_time,
                                            outputs_info=time_outputs_info,
                                            n_steps=self.steps_to_simulate )

        self.predict_thoughts = time_result

        self.predicted_output = time_result[-1]

        self.predict_fun = theano.function(
            inputs=[self.steps_to_simulate, self.conservativity, self.predict_seed],
            outputs=self.predicted_output,
            updates=updates,
            allow_input_downcast=True)

        self.predict_thought_fun = theano.function(
            inputs=[self.steps_to_simulate, self.conservativity, self.predict_seed],
            outputs=ensure_list(self.predict_thoughts),
            updates=updates,
            allow_input_downcast=True)

    def setup_slow_walk(self):

        self.walk_input = theano.shared(np.ones((2,2), dtype='int8'))
        self.walk_time = theano.shared(np.array(0, dtype='int64'))
        self.walk_hiddens = [theano.shared(np.ones((2,2), dtype=theano.config.floatX)) for layer in self.time_model.layers if has_hidden(layer)]

        # correct for dropout
        if self.dropout > 0:
            masks = [1 - self.dropout for layer in self.time_model.layers]
            masks[0] = None
        else:
            masks = []

        new_states = self.time_model.forward(self.walk_input, prev_hiddens=self.walk_hiddens, dropout=masks)

        # Now new_states is a list of matrix [layer](notes, hidden_states) for each layer
        time_final = get_last_layer(new_states)

        start_note_values = theano.tensor.alloc(0, 2)
        note_outputs_info = ([ initial_state_with_taps(layer) for layer in self.pitch_model.layers ] +
                             [ dict(initial=start_note_values, taps=[-1]) ])

        notes_result, updates = theano.scan(fn=self._predict_step_note, sequences=[time_final], outputs_info=note_outputs_info)

        # Now notes_result is a list of matrix [layer/output](notes, onOrArtic)
        output = get_last_layer(notes_result)

        next_input = OutputFormToInputFormOp()(output, self.walk_time + 1) # TODO: Fix time
        #next_input = T.cast(T.alloc(0, 3, 4),'int64')

        slow_walk_results = (new_states[:-1] + notes_result[:-1] + [ next_input, output ])

        updates.update({
                self.walk_time: self.walk_time+1,
                self.walk_input: next_input
            })

        updates.update({hidden:newstate for hidden, newstate, layer in zip(self.walk_hiddens, new_states, self.time_model.layers) if has_hidden(layer)})

        self.slow_walk_fun = theano.function(
            inputs=[self.conservativity],
            outputs=slow_walk_results,
            updates=updates,
            allow_input_downcast=True)

    def start_slow_walk(self, seed):
        seed = np.array(seed)
        num_notes = seed.shape[0]

        self.walk_time.set_value(0)
        self.walk_input.set_value(seed)
        for layer, hidden in zip((l for l in self.time_model.layers if has_hidden(l)),self.walk_hiddens):
            hidden.set_value(np.repeat(np.reshape(layer.initial_hidden_state.get_value(), (1,-1)), num_notes, axis=0))
Exemplo n.º 24
0
class Model(object):
    def __init__(self, data_manager, t_layer_sizes, p_layer_sizes, dropout=0):
        print('{:25}'.format("Initializing Model"), end='', flush=True)
        self.t_layer_sizes = t_layer_sizes
        self.p_layer_sizes = p_layer_sizes
        self.dropout = dropout

        self.data_manager = data_manager
        self.t_input_size = self.data_manager.f.feature_count
        self.output_size = self.data_manager.s.information_count

        self.time_model = StackedCells(self.t_input_size,
                                       celltype=LSTM,
                                       layers=t_layer_sizes)
        self.time_model.layers.append(PassthroughLayer())

        p_input_size = t_layer_sizes[-1] + self.output_size
        self.pitch_model = StackedCells(p_input_size,
                                        celltype=LSTM,
                                        layers=p_layer_sizes)
        self.pitch_model.layers.append(
            Layer(p_layer_sizes[-1],
                  self.output_size,
                  activation=T.nnet.sigmoid))

        self.conservativity = T.fscalar()
        self.srng = T.shared_randomstreams.RandomStreams(
            np.random.randint(0, 1024))

        self.epsilon = np.spacing(np.float32(1.0))

        print("Done")

    @property
    def params(self):
        return self.time_model.params + self.pitch_model.params

    @params.setter
    def params(self, param_list):
        ntimeparams = len(self.time_model.params)
        self.time_model.params = param_list[:ntimeparams]
        self.pitch_model.params = param_list[ntimeparams:]

    @property
    def learned_config(self):
        return [
            self.time_model.params, self.pitch_model.params,
            [
                l.initial_hidden_state
                for mod in (self.time_model, self.pitch_model)
                for l in mod.layers if has_hidden(l)
            ]
        ]

    @learned_config.setter
    def learned_config(self, learned_list):
        self.time_model.params = learned_list[0]
        self.pitch_model.params = learned_list[1]
        for l, val in zip((l for mod in (self.time_model, self.pitch_model)
                           for l in mod.layers if has_hidden(l)),
                          learned_list[2]):
            l.initial_hidden_state.set_value(val.get_value())

    def setup(self):
        self.setup_train()
        self.setup_generate()

    def loss_func(self, y_true, y_predict):
        active_notes = T.shape_padright(y_true[:, :, :, 0])
        mask = T.concatenate([
            T.ones_like(active_notes), active_notes,
            T.repeat(T.ones_like(active_notes), self.output_size - 2, -1)
        ],
                             axis=-1)
        loglikelihoods = mask * T.log(2 * y_predict * y_true - y_predict -
                                      y_true + 1 + self.epsilon)
        return T.neg(T.sum(loglikelihoods))

    def setup_train(self):
        print('{:25}'.format("Setup Train"), end='', flush=True)

        self.input_mat = T.btensor4()
        self.output_mat = T.btensor4()

        def step_time(in_data, *other):
            other = list(other)
            split = -len(self.t_layer_sizes) if self.dropout else len(other)
            hiddens = other[:split]
            masks = [None] + other[split:] if self.dropout else []
            new_states = self.time_model.forward(in_data,
                                                 prev_hiddens=hiddens,
                                                 dropout=masks)
            return new_states

        def step_note(in_data, *other):
            other = list(other)
            split = -len(self.p_layer_sizes) if self.dropout else len(other)
            hiddens = other[:split]
            masks = [None] + other[split:] if self.dropout else []
            new_states = self.pitch_model.forward(in_data,
                                                  prev_hiddens=hiddens,
                                                  dropout=masks)
            return new_states

        def get_dropout(layers, num_time_parallel=1):
            if self.dropout > 0:
                return theano_lstm.MultiDropout([(num_time_parallel, shape)
                                                 for shape in layers],
                                                self.dropout)
            else:
                return []

        # TIME PASS
        input_slice = self.input_mat[:, 0:-1]
        n_batch, n_time, n_note, n_ipn = input_slice.shape
        time_inputs = input_slice.transpose((1, 0, 2, 3)).reshape(
            (n_time, n_batch * n_note, n_ipn))

        time_masks = get_dropout(self.t_layer_sizes, time_inputs.shape[1])
        time_outputs_info = [
            initial_state_with_taps(layer, time_inputs.shape[1])
            for layer in self.time_model.layers
        ]
        time_result, _ = theano.scan(fn=step_time,
                                     sequences=[time_inputs],
                                     non_sequences=time_masks,
                                     outputs_info=time_outputs_info)
        self.time_thoughts = time_result

        last_layer = get_last_layer(time_result)
        n_hidden = last_layer.shape[2]
        time_final = get_last_layer(time_result).reshape(
            (n_time, n_batch, n_note, n_hidden)).transpose(
                (2, 1, 0, 3)).reshape((n_note, n_batch * n_time, n_hidden))

        # PITCH PASS
        start_note_values = T.alloc(np.array(0, dtype=np.int8), 1,
                                    time_final.shape[1], self.output_size)
        correct_choices = self.output_mat[:, 1:, 0:-1, :].transpose(
            (2, 0, 1, 3)).reshape(
                (n_note - 1, n_batch * n_time, self.output_size))
        note_choices_inputs = T.concatenate(
            [start_note_values, correct_choices], axis=0)

        note_inputs = T.concatenate([time_final, note_choices_inputs], axis=2)

        note_masks = get_dropout(self.p_layer_sizes, note_inputs.shape[1])
        note_outputs_info = [
            initial_state_with_taps(layer, note_inputs.shape[1])
            for layer in self.pitch_model.layers
        ]
        note_result, _ = theano.scan(fn=step_note,
                                     sequences=[note_inputs],
                                     non_sequences=note_masks,
                                     outputs_info=note_outputs_info)

        self.note_thoughts = note_result

        note_final = get_last_layer(note_result).reshape(
            (n_note, n_batch, n_time, self.output_size)).transpose(1, 2, 0, 3)

        self.cost = self.loss_func(self.output_mat[:, 1:], note_final)

        updates, _, _, _, _ = create_optimization_updates(self.cost,
                                                          self.params,
                                                          method="adadelta")
        self.update_fun = theano.function(
            inputs=[self.input_mat, self.output_mat],
            outputs=self.cost,
            updates=updates,
            allow_input_downcast=True)

        print("Done")

    def _predict_step_note(self, in_data_from_time, *states):
        hiddens = list(states[:-1])
        in_data_from_prev = states[-1]
        in_data = T.concatenate([in_data_from_time, in_data_from_prev])

        if self.dropout > 0:
            masks = [1 - self.dropout for layer in self.pitch_model.layers]
            masks[0] = None
        else:
            masks = []

        new_states = self.pitch_model.forward(in_data,
                                              prev_hiddens=hiddens,
                                              dropout=masks)
        probabilities = get_last_layer(new_states)

        shouldPlay = self.srng.uniform() < (probabilities[0]**
                                            self.conservativity)
        shouldArtic = shouldPlay * (self.srng.uniform() < probabilities[1])

        chosen = T.stack(
            [T.cast(shouldPlay, 'int8'),
             T.cast(shouldArtic, 'int8')])
        return ensure_list(new_states) + [chosen]

    def setup_generate(self):
        print('{:25}'.format("Setup Generate"), end='', flush=True)

        self.generate_seed_input = T.btensor3()
        self.steps_to_simulate = T.iscalar()

        def step_time_seed(in_data, *hiddens):
            if self.dropout > 0:
                time_masks = [
                    1 - self.dropout for layer in self.time_model.layers
                ]
                time_masks[0] = None
            else:
                time_masks = []

            new_states = self.time_model.forward(in_data,
                                                 prev_hiddens=hiddens,
                                                 dropout=time_masks)
            return new_states

        time_inputs = self.generate_seed_input[0:-1]
        n_time, n_note, n_ipn = time_inputs.shape

        time_outputs_info_seed = [
            initial_state_with_taps(layer, n_note)
            for layer in self.time_model.layers
        ]
        time_result, _ = theano.scan(fn=step_time_seed,
                                     sequences=[time_inputs],
                                     outputs_info=time_outputs_info_seed)

        last_layer = get_last_layer(time_result)
        n_hidden = last_layer.shape[2]

        def step_time(*states):
            hiddens = list(states[:-2])
            in_data = states[-2]
            time = states[-1]

            if self.dropout > 0:
                masks = [1 - self.dropout for layer in self.time_model.layers]
                masks[0] = None
            else:
                masks = []

            new_states = self.time_model.forward(in_data,
                                                 prev_hiddens=hiddens,
                                                 dropout=masks)

            time_final = get_last_layer(new_states)

            start_note_values = theano.tensor.alloc(np.array(0, dtype=np.int8),
                                                    self.output_size)
            note_outputs_info = ([
                initial_state_with_taps(layer)
                for layer in self.pitch_model.layers
            ] + [dict(initial=start_note_values, taps=[-1])])

            notes_result, updates = theano.scan(fn=self._predict_step_note,
                                                sequences=[time_final],
                                                outputs_info=note_outputs_info)
            output = get_last_layer(notes_result)
            next_input = OutputFormToInputFormOp(self.data_manager)(output,
                                                                    time + 1)

            return (ensure_list(new_states) +
                    [next_input, time + 1, output]), updates

        time_outputs_info = (time_outputs_info_seed + [
            dict(initial=self.generate_seed_input[-1], taps=[-1]),
            dict(initial=n_time, taps=[-1]), None
        ])

        time_result, updates = theano.scan(fn=step_time,
                                           outputs_info=time_outputs_info,
                                           n_steps=self.steps_to_simulate)

        self.predicted_output = time_result[-1]

        self.generate_fun = theano.function(inputs=[
            self.steps_to_simulate, self.conservativity,
            self.generate_seed_input
        ],
                                            outputs=self.predicted_output,
                                            updates=updates,
                                            allow_input_downcast=True,
                                            on_unused_input='warn')

        print("Done")
Exemplo n.º 25
0
class MusicGenerator(object):
    def __init__(self, time_model_layer_sizes, note_model_layer_sizes):
        self.time_model = StackedCells(input_size, celltype=LSTM, layers=time_model_layer_sizes)
        self.time_model.layers.append(Router())

        note_model_input_size = time_model_layer_sizes[-1] + outptu_size
        self.note_model = StackedCells(note_model_input_size, celltype=LSTM, layers=note_model_layer_sizes)
        self.note_model.layers.append(Layer(note_model_layer_sizes[-1], output_size, activation=T.nnet.sigmoid))

        self.time_model_layer_sizes = time_model_layer_sizes
        self.note_model_layer_sizes = note_model_layer_sizes

        self._initialize_update_function()
        self._initialize_predict_function()

    @property
    def params(self):
        return self.time_model.params + self.note_model.params

    @params.setter
    def params(self, params):
        time_model_size = len(self.time_model.params)
        self.time_model.params = params[:time_model_size]
        self.note_model.params = params[time_model_size:]

    @property
    def configuration(self):
        models = [self.time_model, self.note_model]

        initial_hidden_states = []
        for model in models:
            for layer in model.layers:
                if hasattr(layer, initial_state):
                    initial_hidden_states.append(layer.initial_hidden_state)

        return [self.time_model.params, self.note_model.params, initial_hidden_states]

    @configuration.setter
    def configuration(self, configuration):
        self.time_model.params = configuration[0]
        self.note_model.params = configuration[1]

        hidden_state_layers = []
        models = [self.time_model, self.note_model]

        for model in models:
            for layer in model.layers:
                if hasattr(layer, INITIAL_HIDDEN_STATE_KEY):
                    hidden_state_layers.append(layer)

        initial_hidden_states = configuration[2]
        for layer_id in xrange(len(hidden_state_layers)):
            layer = hidden_state_layers[layer_id]
            state = initial_hidden_states[layer_id]
            layer.initial_hidden_state.set_value(state.get_value())

    @staticmethod
    def get_time_model_input(adjusted_input):
        batch_size, num_timesteps, num_notes, num_attributes = adjusted_input.shape

        tranposed_input = adjusted_input.transpose((1, 0, 2, 3))
        return tranposed_input.reshape((num_timesteps, batch_size * num_notes, num_attributes))

    @staticmethod
    def get_note_model_input(adjusted_input, adjusted_output, time_model_output):
        batch_size, num_timesteps, num_notes, _ = adjusted_input.shape
        num_hidden = time_model_output.shape[2]

        reshaped_time_model_output = time_model_output.reshape((num_timesteps, batch_size, num_notes, num_hidden))
        transposed_time_model_output = reshaped_time_model_output.transpose((2, 1, 0, 3))
        adjusted_time_model_output = transposed_time_model_output.reshape((num_notes, batch_size * num_timesteps, num_hidden))

        starting_notes = T.alloc(0, 1, adjusted_time_model_output.shape[1], output_size)
        correct_choices = adjusted_output[:, :, :-1, :].transpose((2, 0, 1, 3))
        reshaped_correct_choices = correct_choices.reshape((num_notes - 1, batch_size * num_timesteps, output_size))
        adjusted_correct_choices = T.concatenate([starting_notes, reshaped_correct_choices], axis=0)

        return T.concatenate([adjusted_time_model_output, adjusted_correct_choices], axis=2)

    @staticmethod
    def get_initial_state(layer, dimensions=None):
        if not hasattr(layer, INITIAL_HIDDEN_STATE_KEY):
            return None

        return {
            'initial': layer.initial_hidden_state if dimensions is None else T.repeat(T.shape_padleft(layer.initial_hidden_state), dimensions, axis=0),
            'taps': [-1]
        }

    @staticmethod
    def get_output(step, input, outputs_info):
        result, _ = theano.scan(fn=step, sequences=[input], outputs_info=outputs_info)
        return result[-1]

    @staticmethod
    def get_prediction(adjusted_input, note_model_output):
        batch_size, num_timesteps, num_notes, _ = adjusted_input.shape

        reshaped_note_model_output = note_model_output.reshape((num_notes, batch_size, num_timesteps, output_size))
        return reshaped_note_model_output.transpose(1, 2, 0, 3)

    @staticmethod
    def get_loss(adjusted_output, prediction):
        epsilon = 1e-7

        active_notes = T.shape_padright(adjusted_output[:, :, :, 0])
        masks = T.concatenate([T.ones_like(active_notes), active_notes], axis=3)

        log_likelihoods = T.log(2 * prediction * adjusted_output - prediction - adjusted_output + 1 + epsilon)
        masked_log_likelihoods = masks * log_likelihoods

        return T.neg(T.sum(masked_log_likelihoods))

    def get_outputs_info(self, adjusted_input, layers):
        batch_size = adjusted_input.shape[1]
        return [self.get_initial_state(layer, batch_size) for layer in layers]

    def get_time_prediction_outputs_info(self, initial_note):
        initial_states = [self.get_initial_state(layer) for layer in self.note_model.layers]
        first_note = {
            'initial': initial_note,
            'taps': [-1]
        }
        return initial_states + [first_note]

    def get_prediction_outputs_info(self, num_notes, initial_note):
        initial_states = [self.get_initial_state(layer, num_notes) for layer in self.time_model.layers]
        first_note = {
            'initial': initial_note,
            'taps': [-1]
        }
        padder = {
            'initial': 0,
            'taps': [-1]
        }
        return initial_states + [first_note, padder, None]

    def _initialize_update_function(self):
        def time_step(input, *previous_hidden_state):
            return self.time_model.forward(input, prev_hiddens=previous_hidden_state)

        def note_step(input, *previous_hidden_state):
            return self.note_model.forward(input, prev_hiddens=previous_hidden_state)

        input = T.btensor4()
        adjusted_input = input[:, :-1]

        output = T.btensor4()
        adjusted_output = output[:, 1:]

        time_model_input = self.get_time_model_input(adjusted_input)
        time_model_outputs_info = self.get_outputs_info(time_model_input, self.time_model.layers)
        time_model_output = self.get_output(time_step, time_model_input, time_model_outputs_info)

        note_model_input = self.get_note_model_input(adjusted_input, adjusted_output, time_model_output)
        note_outputs_info = self.get_outputs_info(note_model_input, self.note_model.layers)
        note_model_output = self.get_output(note_step, note_model_input, note_outputs_info)

        prediction = self.get_prediction(adjusted_input, note_model_output)
        loss = self.get_loss(adjusted_output, prediction)

        updates, _, _, _, _ = create_optimization_updates(loss, self.params)

        self.update = theano.function(inputs=[input, output], outputs=loss, updates=updates, allow_input_downcast=True)

    def _initialize_predict_function(self):
        def predicted_note_step(time_model_output, *states):
            previous_note_model_input = states[-1]

            note_model_input = T.concatenate([time_model_output, previous_note_model_input])
            previous_hidden_state = list(states[:-1])
            note_model_output = self.note_model.forward(note_model_input, prev_hiddens=previous_hidden_state)
            probabilities = note_model_output[-1]

            generator = T.shared_randomstreams.RandomStreams(np.random.randint(0, 1024))

            is_note_played = probabilities[0] > generator.uniform()
            is_note_articulated = (probabilities[1] > generator.uniform()) * is_note_played
            prediction = T.cast(T.stack(is_note_played, is_note_articulated), 'int8')

            return note_model_output + [prediction]

        def predicted_time_step(*states):
            time_model_input = states[-2]
            previous_hidden_state = list(states[:-2])
            time_model_output = self.time_model.forward(time_model_input, prev_hiddens=previous_hidden_state)

            time_model_output_last_layer = time_model_output[-1]
            initial_note = T.alloc(0, output_size)
            note_outputs_info = self.get_time_prediction_outputs_info(initial_note)
            notes_model_output, updates = theano.scan(fn=predicted_note_step, sequences=[time_model_output_last_layer], outputs_info=note_outputs_info)

            output = notes_model_output[-1]
            time = states[-1]
            next_input = OutputTransformer()(output, time + 1)

            return (time_model_output + [next_input, time + 1, output]), updates

        length = T.iscalar()
        initial_note = T.bmatrix()

        num_notes = initial_note.shape[0]
        time_outputs_info = self.get_prediction_outputs_info(num_notes, initial_note)
        time_model_output, updates = theano.scan(fn=predicted_time_step, outputs_info=time_outputs_info, n_steps=length)
        prediction = time_model_output[-1]

        self.predict = theano.function([length, initial_note], outputs=prediction, updates=updates, allow_input_downcast=True)
Exemplo n.º 26
0
class Model:
    """
    Simple predictive model for forecasting spectral content
    from sequence using LSTMs. Choose how many LSTMs to stack
    and what size their memory should be.
    """
    def __init__(self, hidden_size, input_size, stack_size=2, celltype=LSTM):
        self.input_size = input_size
        # Modelling
        self.model = StackedCells(input_size,
                                  celltype=celltype,
                                  activation=T.tanh,
                                  layers=[hidden_size] * stack_size)

        # disable modulation of the input layer
        self.model.layers[0].in_gate2.activation = lambda x: x

        # add an output layer
        self.model.layers.append(
            Layer(hidden_size, input_size, activation=softmax))

        # Setup symbolic tensor variables that will be used in computation

        # inputs are windows of spectrum data
        self.input = T.fvector("input")
        self.prev_input = T.fvector("prev_input")

        # create symbolic variables for prediction:
        self.prediction = self.create_prediction()

        # create gradient training functions:
        self.create_cost_fun()
        self.create_training_function()
        self.create_predict_function()

    @property
    def params(self):
        return self.model.params

    def create_prediction(self):
        result = self.model.forward(self.input)
        # softmaxes are the last layer of our network,
        # and are at the end of our results list:
        # we reorder the predictions to be:
        # 1. what row / example
        # 2. what timestep
        # 3. softmax dimension
        return result[-1]

    def create_cost_fun(self):
        # our cost function is the squared difference
        # between the input and the prediction
        diff = self.prediction - self.input
        squared_diff = diff**2
        self.cost = squared_diff.sum()

    def create_predict_function(self):
        self.pred_fun = theano.function(inputs=[self.input],
                                        outputs=self.prediction,
                                        allow_input_downcast=True)

    def create_training_function(self):
        updates, _, _, _, _ = create_optimization_updates(self.cost,
                                                          self.params,
                                                          method="adadelta")
        self.update_fun = theano.function(inputs=[self.input],
                                          outputs=self.cost,
                                          updates=updates,
                                          allow_input_downcast=True)

    def __call__(self, x):
        return self.pred_fun(x)
Exemplo n.º 27
0
class Model:
	"""
	Simple predictive model for forecasting words from
	sequence using LSTMs. Choose how many LSTMs to stack
	what size their memory should be, and how many
	words can be predicted.
	"""
	def __init__(self, hidden_size, input_size, vocab_size, stack_size=1, celltype=LSTM):

		# core layer in RNN/LSTM
		self.model = StackedCells(input_size, celltype=celltype, layers =[hidden_size] * stack_size)

		# add an embedding
		self.model.layers.insert(0, Embedding(vocab_size, input_size))

		# add a classifier:
		self.model.layers.append(Layer(hidden_size, vocab_size, activation = softmax))

                self.turing_params = Parameters()
		#init turing machine model
		self.turing_updates , self.turing_predict = turing_model.build(self.turing_params , hidden_size , vocab_size)

		# inputs are matrices of indices,
		# each row is a sentence, each column a timestep
		self._stop_word   = theano.shared(np.int32(999999999), name="stop word")
		self.for_how_long = T.ivector()
		self.input_mat = T.imatrix()
		self.priming_word = T.iscalar()
		self.srng = T.shared_randomstreams.RandomStreams(np.random.randint(0, 1024))

		# create symbolic variables for prediction:
		#change by darong #issue : what is greedy
		self.lstm_predictions = self.create_lstm_prediction()
		self.final_predictions = self.create_final_prediction()

		# create symbolic variable for greedy search:
		self.greedy_predictions = self.create_lstm_prediction(greedy=True)

		# create gradient training functions:
		self.create_cost_fun()#create 2 cost func(lstm final)

		self.lstm_lr = 0.01
		self.turing_lr = 0.01
		self.all_lr = 0.01
		self.create_training_function()#create 3 functions(lstm turing all)
		self.create_predict_function()#create 2 predictions(lstm final)

		# create ppl
		self.lstm_ppl = self.create_lstm_ppl()
		self.final_ppl = self.create_final_ppl()
		self.create_ppl_function()


	def save(self, save_file, vocab):
		pickle.dump(self.model, open(save_file, "wb")) # pickle is for lambda function, cPickle cannot
		pickle.dump(vocab, open(save_file+'.vocab', "wb")) # pickle is for lambda function, cPickle cannot
	def save_turing(self, save_file):
		self.turing_params.save(save_file + '.turing')


	def load(self, load_file, lr):
		self.model = pickle.load(open(load_file, "rb"))
		if os.path.isfile(load_file + '.turing') :
			self.turing_params.load(load_file + '.turing')			
		else :
			print "no turing model!!!! pretrain with lstm param"
			self.turing_params['W_input_hidden'] = self.model.layers[-1].params[0].get_value().T #not sure
			self.turing_params['W_read_hidden']  = self.model.layers[-1].params[0].get_value().T
			self.turing_params['b_hidden_0'] = self.model.layers[-1].params[1].get_value()
                        temp = self.model.layers[1].initial_hidden_state.get_value()[self.hidden_size:]
			self.turing_params['memory_init'] = temp.reshape((1,)+temp.shape)

		# need to compile again for calculating predictions after loading lstm
		self.srng = T.shared_randomstreams.RandomStreams(np.random.randint(0, 1024))
		self.lstm_predictions = self.create_lstm_prediction()
		self.final_predictions = self.create_final_prediction()
		self.greedy_predictions = self.create_lstm_prediction(greedy=True)#can change to final
		self.create_cost_fun()#create 2 cost func(lstm final)
		self.lstm_lr = lr
		self.turing_lr = lr#change this
		self.all_lr = lr
		self.create_training_function()#create 3 functions(lstm turing all)
		self.create_predict_function()#create 2 predictions(lstm final)
		self.lstm_ppl = self.create_lstm_ppl()
		self.final_ppl = self.create_final_ppl()
		self.create_ppl_function()
		print "done loading model"
#		print "done compile"


	def stop_on(self, idx):
		self._stop_word.set_value(idx)
		
	@property
	def params(self):
		return self.model.params
								 
	def create_lstm_prediction(self, greedy=False):
		def step(idx, *states):
			# new hiddens are the states we need to pass to LSTMs
			# from past. Because the StackedCells also include
			# the embeddings, and those have no state, we pass
			# a "None" instead:
			new_hiddens = [None] + list(states)
			
			new_states = self.model.forward(idx, prev_hiddens = new_hiddens)
			if greedy:
				new_idxes = new_states[-1]
				new_idx   = new_idxes.argmax()
				# provide a stopping condition for greedy search:
				return ([new_idx.astype(self.priming_word.dtype)] + new_states[1:-1]), theano.scan_module.until(T.eq(new_idx,self._stop_word))
			else:
				return new_states[1:]

		# in sequence forecasting scenario we take everything
		# up to the before last step, and predict subsequent
		# steps ergo, 0 ... n - 1, hence:
		inputs = self.input_mat[:, 0:-1]
		num_examples = inputs.shape[0]
		# pass this to Theano's recurrence relation function:
		
		# choose what gets outputted at each timestep:
		if greedy:
			outputs_info = [dict(initial=self.priming_word, taps=[-1])] + [initial_state_with_taps(layer) for layer in self.model.layers[1:-1]]
			result, _ = theano.scan(fn=step,
								n_steps=200,
								outputs_info=outputs_info)
		else:
			outputs_info = [initial_state_with_taps(layer, num_examples) for layer in self.model.layers[1:]]
			result, _ = theano.scan(fn=step,
								sequences=[inputs.T],
								outputs_info=outputs_info)
								 
		if greedy:
			return result[0]
		# softmaxes are the last layer of our network,
		# and are at the end of our results list:
		return result[-1].transpose((2,0,1))
		# we reorder the predictions to be:
		# 1. what row / example
		# 2. what timestep
		# 3. softmax dimension

	def create_final_prediction(self, greedy=False):
		def step(idx, *states):
			# new hiddens are the states we need to pass to LSTMs
			# from past. Because the StackedCells also include
			# the embeddings, and those have no state, we pass
			# a "None" instead:
			new_hiddens = [None] + list(states)
			
			new_states = self.model.forward(idx, prev_hiddens = new_hiddens)
			if greedy:
				new_idxes = new_states[-1]
				new_idx   = new_idxes.argmax()
				# provide a stopping condition for greedy search:
				return ([new_idx.astype(self.priming_word.dtype)] + new_states[1:-1]), theano.scan_module.until(T.eq(new_idx,self._stop_word))
			else:
				return new_states[1:]

		# in sequence forecasting scenario we take everything
		# up to the before last step, and predict subsequent
		# steps ergo, 0 ... n - 1, hence:
		inputs = self.input_mat[:, 0:-1]
		num_examples = inputs.shape[0]
		# pass this to Theano's recurrence relation function:
		
		# choose what gets outputted at each timestep:
		if greedy:
			outputs_info = [dict(initial=self.priming_word, taps=[-1])] + [initial_state_with_taps(layer) for layer in self.model.layers[1:-1]]
			result, _ = theano.scan(fn=step,
								n_steps=200,
								outputs_info=outputs_info)
		else:
			outputs_info = [initial_state_with_taps(layer, num_examples) for layer in self.model.layers[1:]]
			result, _ = theano.scan(fn=step,
								sequences=[inputs.T],
								outputs_info=outputs_info)
								 
		if greedy:
			return result[0]
		# softmaxes are the last layer of our network,
		# and are at the end of our results list:
                hidden_size = result[-2].shape[2]/2
		turing_result = self.turing_predict(result[-2][:,:,hidden_size:]) 
		#the last layer do transpose before compute
		return turing_result.transpose((1,0,2))
		# we reorder the predictions to be:
		# 1. what row / example
		# 2. what timestep
		# 3. softmax dimension	
								 
	def create_cost_fun (self):

		# create a cost function that
		# takes each prediction at every timestep
		# and guesses next timestep's value:
		what_to_predict = self.input_mat[:, 1:]
		# because some sentences are shorter, we
		# place masks where the sentences end:
		# (for how long is zero indexed, e.g. an example going from `[2,3)`)
		# has this value set 0 (here we substract by 1):
		for_how_long = self.for_how_long - 1
		# all sentences start at T=0:
		starting_when = T.zeros_like(self.for_how_long)
								 
		self.lstm_cost = masked_loss(self.lstm_predictions,
								what_to_predict,
								for_how_long,
								starting_when).sum()

		self.final_cost = masked_loss(self.final_predictions,
								what_to_predict,
								for_how_long,
								starting_when).sum()
		
	def create_predict_function(self):
		self.lstm_pred_fun = theano.function(
			inputs=[self.input_mat],
			outputs=self.lstm_predictions,
			allow_input_downcast=True
		)
		self.final_pred_fun = theano.function(
			inputs=[self.input_mat],
			outputs=self.final_predictions,
			allow_input_downcast=True
		)
		
		self.greedy_fun = theano.function(
			inputs=[self.priming_word],
			outputs=T.concatenate([T.shape_padleft(self.priming_word), self.greedy_predictions]),
			allow_input_downcast=True
		)
								 
	def create_training_function(self):
		updates, _, _, _, _ = create_optimization_updates(self.lstm_cost, self.params, method="SGD", lr=self.lstm_lr)
#		updates, _, _, _, _ = create_optimization_updates(self.cost, self.params, method="adadelta", lr=self.lr)
		self.lstm_update_fun = theano.function(
			inputs=[self.input_mat, self.for_how_long],
			outputs=self.lstm_cost,
			updates=updates,
			allow_input_downcast=True)

		updates_turing = self.turing_updates(self.final_cost , lr=self.turing_lr)
#		updates, _, _, _, _ = create_optimization_updates(self.cost, self.params, method="adadelta", lr=self.lr)
		self.turing_update_fun = theano.function(
			inputs=[self.input_mat, self.for_how_long],
			outputs=self.final_cost,
			updates=updates_turing,
                        mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True),
			allow_input_downcast=True)

		all_updates_lstm, _, _, _, _ = create_optimization_updates(self.final_cost, self.params, method="SGD", lr=self.all_lr,part=True)
		all_updates_turing_temp = self.turing_updates(self.final_cost , lr=self.all_lr)
                updates_all = all_updates_lstm
                for pair in all_updates_turing_temp :
                    updates_all[pair[0]] = pair[1]

		self.all_update_fun = theano.function(
			inputs=[self.input_mat, self.for_how_long],
			outputs=self.final_cost,
			updates=updates_all,
			allow_input_downcast=True)

	def create_lstm_ppl(self):

		def timestep(predictions, label, len_example, total_len_example):

			label_binary = T.gt(label[0:len_example-1], 0)
			oov_count = T.shape(label_binary)[0] - T.sum(label_binary)
			
			a = total_len_example
			return T.sum(T.log( 1./ predictions[T.arange(len_example-1), label[0:len_example-1]]) * label_binary ), oov_count


		result, _ = theano.scan(fn=timestep,
						   sequences=[ self.lstm_predictions, self.input_mat[:, 1:], self.for_how_long ],
						   non_sequences=T.sum(self.for_how_long))

		oov_count_total = T.sum(result[1])
		return T.exp(T.sum(result[0]).astype(theano.config.floatX)/(T.sum(self.for_how_long) - oov_count_total).astype(theano.config.floatX)).astype(theano.config.floatX)

	def create_final_ppl(self):

		def timestep(predictions, label, len_example, total_len_example):

			label_binary = T.gt(label[0:len_example-1], 0)
			oov_count = T.shape(label_binary)[0] - T.sum(label_binary)
			
			a = total_len_example
			return T.sum(T.log( 1./ predictions[T.arange(len_example-1), label[0:len_example-1]]) * label_binary ), oov_count


		result, _ = theano.scan(fn=timestep,
						   sequences=[ self.final_predictions, self.input_mat[:, 1:], self.for_how_long ],
						   non_sequences=T.sum(self.for_how_long))

		oov_count_total = T.sum(result[1])
		return T.exp(T.sum(result[0]).astype(theano.config.floatX)/(T.sum(self.for_how_long) - oov_count_total).astype(theano.config.floatX)).astype(theano.config.floatX)

	def create_ppl_function(self):
		self.lstm_ppl_fun = theano.function(
			inputs=[self.input_mat, self.for_how_long],
			outputs=self.lstm_ppl,
			allow_input_downcast=True)

		self.final_ppl_fun = theano.function(
			inputs=[self.input_mat, self.for_how_long],
			outputs=self.final_ppl,
			allow_input_downcast=True)
		
		
	def __call__(self, x):
		return self.pred_fun(x)#any problem??
Exemplo n.º 28
0
class Model:
    """
    Simple predictive model for forecasting words from
    sequence using LSTMs. Choose how many LSTMs to stack
    what size their memory should be, and how many
    words can be predicted.
    """
    def __init__(self, hidden_size, input_size, output_size, celltype=Layer):
        # declare model
        self.model = StackedCells(input_size, celltype=celltype, layers =hidden_size)
        # add a classifier:
        self.regression=Layer(hidden_size[-1], output_size[0], activation = T.tanh)
        self.classifier=Layer(hidden_size[-1], output_size[1], activation = softmax)
        # inputs are matrices of indices,
        # each row is a sentence, each column a timestep
        self.steps=T.iscalar('steps')
        self.x=T.tensor3('x')#输入gfs数据
        self.target0=T.tensor3('target0')#输出的目标target,这一版把target维度改了
        self.target1=T.itensor3('target1')
        self.layerstatus=None
        self.results=None
        # create symbolic variables for prediction:(就是做一次整个序列完整的进行预测,得到结果是prediction)
        self.predictions0,self.predictions1 = self.create_prediction()
        # create gradient training functions:
        #self.create_cost_fun()
        #self.create_valid_error()
        #self.create_training_function()
        self.create_predict_function()
        #self.create_validate_function()
        '''上面几步的意思就是先把公式写好'''
        
        
    @property
    def params(self):
        return self.model.params+self.regression.params+self.classifier.params
        
    def create_prediction(self):#做一次predict的方法        
        def step(idx):
            new_states=self.model.forward(idx)
            output0=self.regression.activate(new_states[-1])
            output1=self.classifier.activate(new_states[-1])
            return [output0,output1]#不论recursive与否,会全部输出
        
        x = self.x
        num_examples = x.shape[0]
        #outputs_info =[initial_state_with_taps(layer, num_examples) for layer in self.model.layers]
        #outputs_info = [initial_state_with_taps(layer, num_examples) for layer in self.model.layers[1:]]
        [result0,result1], _ = theano.scan(fn=step,
                                n_steps=self.steps,
                                sequences=dict(input=x.dimshuffle((1,0,2)), taps=[-0]),
                                )
                                

        return result0.dimshuffle((1,0,2)),result1.dimshuffle((2,0,1))
        
        
    def create_cost_fun (self):
        y=self.target1[:,0,0]                                 
        self.cost = (self.predictions0 - self.target0[:,:,0:1]).norm(L=2)+100*(-T.mean(T.log(self.predictions1)[T.arange(y.shape[0]),:,y]))

    def create_valid_error(self):
        self.valid_error0=T.mean(T.abs_(self.predictions0 - self.target0[:,:,0:1]),axis=0)
        #self.valid_error1=-T.mean(T.log(self.predictions1)[T.arange(self.target1.shape[0]),:,self.target1[:,0,0]])
        self.valid_error1=T.mean(T.eq(T.argmax(self.predictions1, axis=2).dimshuffle(1,0),self.target1[:,0,0]))
                
    def create_predict_function(self):
        self.pred_fun = theano.function(inputs=[self.x,self.steps],outputs =[self.predictions0,self.predictions1],allow_input_downcast=True)
                                 
    def create_training_function(self):
        updates, gsums, xsums, lr, max_norm = create_optimization_updates(self.cost, self.params, lr=0.01, method="adagrad")#这一步Gradient Decent!!!!
        self.update_fun = theano.function(
            inputs=[self.x, self.target0,self.target1,self.steps],
            outputs=self.cost,
            updates=updates,
            name='update_fun',
            profile=False,
            allow_input_downcast=True)
            
    def create_validate_function(self):
        self.valid_fun = theano.function(
            inputs=[self.x, self.target0,self.target1,self.steps],
            outputs=[self.valid_error0,self.valid_error1],
            allow_input_downcast=True
        )
        
    def __call__(self, x):
        return self.pred_fun(x)
Exemplo n.º 29
0
class Model:
    """
    Simple predictive model for forecasting words from
    sequence using LSTMs. Choose how many LSTMs to stack
    what size their memory should be, and how many
    words can be predicted.
    """
    def __init__(self, hidden_size, input_size, vocab_size, stack_size=1, celltype=LSTM):
        # declare model
        self.model = StackedCells(input_size, celltype=celltype, layers =[hidden_size] * stack_size)
        # add an embedding
        self.model.layers.insert(0, Embedding(vocab_size, input_size))
        # add a classifier:
        self.model.layers.append(Layer(hidden_size, vocab_size, activation = softmax))
        # inputs are matrices of indices,
        # each row is a sentence, each column a timestep
        self._stop_word   = theano.shared(np.int32(999999999), name="stop word")
        self.for_how_long = T.ivector()
        self.input_mat = T.imatrix()
        self.priming_word = T.iscalar()
        self.srng = T.shared_randomstreams.RandomStreams(np.random.randint(0, 1024))
        # create symbolic variables for prediction:(就是做一次整个序列完整的进行预测,得到结果是prediction)
        self.predictions = self.create_prediction()
        # create symbolic variable for greedy search:
        self.greedy_predictions = self.create_prediction(greedy=True)
        # create gradient training functions:
        self.create_cost_fun()
        self.create_training_function()
        self.create_predict_function()
        '''上面几步的意思就是先把公式写好'''
        
    def stop_on(self, idx):
        self._stop_word.set_value(idx)
        
    @property
    def params(self):
        return self.model.params
        
    def create_prediction(self,greedy=False):
        def step(idx,*states):
            new_hiddens=list(states)
            new_states=self.model.forward(idx,prev_hiddens = new_hiddens)
            if greedy:
                return
            else:
                return new_states#不论recursive与否,会全部输出
        
        inputs = self.input_mat[:,0:-1]
        num_examples = inputs.shape[0]
        if greedy:
            return
        else:
            outputs_info = [initial_state_with_taps(layer, num_examples) for layer in self.model.layers[1:]]
            result, _ = theano.scan(fn=step,
                                sequences=[inputs.T],
                                outputs_info=outputs_info)
                                

        return result[-1].transpose((2,0,1))
                                 
    def create_prediction(self, greedy=False):
        def step(idx, *states):
            # new hiddens are the states we need to pass to LSTMs
            # from past. Because the StackedCells also include
            # the embeddings, and those have no state, we pass
            # a "None" instead:
            new_hiddens = [None] + list(states)
            
            new_states = self.model.forward(idx, prev_hiddens = new_hiddens)#这一步更新!!!!,idx是layer_input
            #new_states是一个列表,包括了stackcells各个层的最新输出
            if greedy:
                new_idxes = new_states[-1]#即最后一层softmax的输出
                new_idx   = new_idxes.argmax()
                # provide a stopping condition for greedy search:
                return ([new_idx.astype(self.priming_word.dtype)] + new_states[1:-1]), theano.scan_module.until(T.eq(new_idx,self._stop_word))
            else:
                return new_states[1:]#除第0层之外,其他各层输出
       
        # in sequence forecasting scenario we take everything
        # up to the before last step, and predict subsequent
        # steps ergo, 0 ... n - 1, hence:
        inputs = self.input_mat[:, 0:-1]
        num_examples = inputs.shape[0]
        # pass this to Theano's recurrence relation function:
        
        # choose what gets outputted at each timestep:
        if greedy:
            outputs_info = [dict(initial=self.priming_word, taps=[-1])] + [initial_state_with_taps(layer) for layer in self.model.layers[1:-1]]
            result, _ = theano.scan(fn=step,
                                n_steps=200,
                                outputs_info=outputs_info)
        else:
            outputs_info = [initial_state_with_taps(layer, num_examples) for layer in self.model.layers[1:]]
            result, _ = theano.scan(fn=step,
                                sequences=[inputs.T],
                                outputs_info=outputs_info)
        '''就是这里sequences相当于每次把inputs的一个给到idx,改动这里使符合一次给多种的pm25形式'''
        '''outputs_info:就是说让scan把每回的输出重新传回fn的输入,而outputs_info就是第一回没有之前输出时,给入的值。于是output_info也暗示了这种回传的形式
        Second, if there is no accumulation of results, we can set outputs_info to None. This indicates to scan that it doesn’t need to pass the prior result to fn.'''
        
        '''The general order of function parameters to fn is:
            sequences (if any), prior result(s) (if needed), non-sequences (if any)
            not only taps should respect an order, but also variables, since this is how scan figures out what should be represented by what'''                                                  
        if greedy:
            return result[0]
        # softmaxes are the last layer of our network,指的就是result[-1]是softmax层
        # and are at the end of our results list:
#       print "res=", result
#        print "res eval=", result[-1].eval()
        
        return result[-1].transpose((2,0,1))
        # we reorder the predictions to be:
        # 1. what row / example
        # 2. what timestep
        # 3. softmax dimension
        
        
    '''def create_prediction(self, greedy=False):
        return result[-1].transpose((2,0,1))'''
                                 
    def create_cost_fun (self):
        # create a cost function that
        # takes each prediction at every timestep
        # and guesses next timestep's value:
        what_to_predict = self.input_mat[:, 1:]#每一句话除了第一个字符之后的所有字符,等于给了第一个,之后整句话是predict出来
        # because some sentences are shorter, we
        # place masks where the sentences end:
        # (for how long is zero indexed, e.g. an example going from `[2,3)`)
        # has this value set 0 (here we substract by 1):
        for_how_long = self.for_how_long - 1
        # all sentences start at T=0:
        starting_when = T.zeros_like(self.for_how_long)
                                 
        '''predict的是完整的句子后面的各个词,注意这个predictions只调用了一遍,那就是说这一遍就是一个mini batch了'''
        self.cost = masked_loss(self.predictions,
                                what_to_predict,
                                for_how_long,
                                starting_when).sum()
        
    def create_predict_function(self):
        self.pred_fun = theano.function(
            inputs=[self.input_mat],
            outputs =self.predictions,
            allow_input_downcast=True
        )
        
        self.greedy_fun = theano.function(
            inputs=[self.priming_word],
            outputs=T.concatenate([T.shape_padleft(self.priming_word), self.greedy_predictions]),
            allow_input_downcast=True
        )
                                 
    def create_training_function(self):
        updates, _, _, _, _ = create_optimization_updates(self.cost, self.params, method="adadelta")#这一步Gradient Decent!!!!
        self.update_fun = theano.function(
            inputs=[self.input_mat, self.for_how_long],
            outputs=self.cost,
            updates=updates,
            allow_input_downcast=True)
        
    def __call__(self, x):
        return self.pred_fun(x)
Exemplo n.º 30
0
class Model(object):
    """
    Simple predictive model for forecasting words from
    sequence using LSTMs. Choose how many LSTMs to stack
    what size their memory should be, and how many
    words can be predicted.
    """
    def __init__(self, hidden_size, input_size, output_size, stack_size=1, celltype=RNN,steps=40):
        # declare model
        self.model = StackedCells(input_size, celltype=celltype, layers =[hidden_size] * stack_size)
        # add a classifier:
        self.model.layers.append(Layer(hidden_size, output_size, activation = lambda x:x))
        # inputs are matrices of indices,
        # each row is a sentence, each column a timestep
        self.steps=steps
        self.gfs=T.tensor3('gfs')#输入gfs数据
        self.pm25in=T.tensor3('pm25in')#pm25初始数据部分
        self.layerstatus=None
        self.results=None
        self.cnt = T.tensor3('cnt')
        # create symbolic variables for prediction:(就是做一次整个序列完整的进行预测,得到结果是prediction)
        self.predictions = self.create_prediction()
        self.create_predict_function()
        self.pm25target=T.matrix('pm25target')#输出的目标target,这一版把target维度改了
        self.create_valid_error()
        self.create_validate_function()
        '''上面几步的意思就是先把公式写好'''
        
        
    @property
    def params(self):
        return self.model.params
        
    def create_prediction(self):#做一次predict的方法
        gfs=self.gfs
        pm25in=self.pm25in
        #初始第一次前传
        gfs_x=T.concatenate([gfs[:,0],gfs[:,1],gfs[:,2]],axis=1)
        pm25in_x=T.concatenate([pm25in[:,0],pm25in[:,1]],axis=1)
        self.layerstatus=self.model.forward(T.concatenate([gfs_x,pm25in_x,self.cnt[:,:,0]],axis=1))
        self.results=self.layerstatus[-1]
        for i in xrange(1,7):#前6次(0-5),输出之前的先做的6个frame,之后第7次是第1个输出
            gfs_x=T.concatenate([gfs_x[:,9:],gfs[:,i+2]],axis=1)
            pm25in_x=T.concatenate([pm25in_x[:,1:],pm25in[:,i+1]],axis=1)
            self.layerstatus=self.model.forward(T.concatenate([gfs_x,pm25in_x,self.cnt[:,:,i]],axis=1),self.layerstatus)
            self.results=T.concatenate([self.results,self.layerstatus[-1]],axis=1)
        if self.steps > 1:
            gfs_x=T.concatenate([gfs_x[:,9:],gfs[:,9]],axis=1)
            pm25in_x=T.concatenate([pm25in_x[:,1:],T.shape_padright(self.results[:,-1])],axis=1)
            self.layerstatus=self.model.forward(T.concatenate([gfs_x,pm25in_x,self.cnt[:,:,7]],axis=1),self.layerstatus)
            self.results=T.concatenate([self.results,self.layerstatus[-1]],axis=1)
            #前传之后step-2次
            for i in xrange(2,self.steps):
                gfs_x=T.concatenate([gfs_x[:,9:],gfs[:,i+8]],axis=1)
                pm25in_x=T.concatenate([pm25in_x[:,1:],T.shape_padright(self.results[:,-1])],axis=1)
                self.layerstatus=self.model.forward(T.concatenate([gfs_x,pm25in_x,self.cnt[:,:,i+6]],axis=1),self.layerstatus)
                #need T.shape_padright???
                self.results=T.concatenate([self.results,self.layerstatus[-1]],axis=1)
        return self.results
                      
    def create_predict_function(self):
        self.pred_fun = theano.function(inputs=[self.gfs,self.pm25in,self.cnt],outputs =self.predictions,allow_input_downcast=True)
    
    def create_valid_error(self):
        self.valid_error=T.mean(T.abs_(self.predictions[:,6:46] - self.pm25target[:,6:46]),axis=0)                                   
    
    def create_validate_function(self):
        self.valid_fun = theano.function(
            inputs=[self.gfs,self.pm25in, self.pm25target,self.cnt],
            outputs=self.valid_error,
            allow_input_downcast=True
        )                                                                                                            
                                                                                                                                                                                                                                                                                                                                        
    def __call__(self, gfs,pm25in):
        return self.pred_fun(gfs,pm25in)
Exemplo n.º 31
0
class Model:
    """
    Simple predictive model for forecasting words from
    sequence using LSTMs. Choose how many LSTMs to stack
    what size their memory should be, and how many
    words can be predicted.
    """
    def __init__(self, hidden_size, input_size, vocab_size, stack_size=1, celltype=LSTM):
        # declare model
        self.model = StackedCells(input_size, celltype=celltype, layers =[hidden_size] * stack_size)
        # add an embedding
        self.model.layers.insert(0, Embedding(vocab_size, input_size))
        # add a classifier:
        self.model.layers.append(Layer(hidden_size, vocab_size, activation = softmax))
        # inputs are matrices of indices,
        # each row is a sentence, each column a timestep
        self._stop_word   = theano.shared(np.int32(999999999), name="stop word")
        self.for_how_long = T.ivector()
        self.input_mat = T.imatrix()
        self.priming_word = T.iscalar()
        self.srng = T.shared_randomstreams.RandomStreams(np.random.randint(0, 1024))
        # create symbolic variables for prediction:
        self.predictions = self.create_prediction()
        # create symbolic variable for greedy search:
        self.greedy_predictions = self.create_prediction(greedy=True)
        # create gradient training functions:
        self.create_cost_fun()
        self.create_training_function()
        self.create_predict_function()

    def stop_on(self, idx):
        self._stop_word.set_value(idx)

    @property
    def params(self):
        return self.model.params

    def create_prediction(self, greedy=False):
        def step(idx, *states):
            # new hiddens are the states we need to pass to LSTMs
            # from past. Because the StackedCells also include
            # the embeddings, and those have no state, we pass
            # a "None" instead:
            new_hiddens = [None] + list(states)

            new_states = self.model.forward(idx, prev_hiddens = new_hiddens)
            if greedy:
                new_idxes = new_states[-1]
                new_idx   = new_idxes.argmax()
                # provide a stopping condition for greedy search:
                return ([new_idx.astype(self.priming_word.dtype)] + new_states[1:-1]), theano.scan_module.until(T.eq(new_idx,self._stop_word))
            else:
                return new_states[1:]
        # in sequence forecasting scenario we take everything
        # up to the before last step, and predict subsequent
        # steps ergo, 0 ... n - 1, hence:
        inputs = self.input_mat[:, 0:-1]
        num_examples = inputs.shape[0]
        # pass this to Theano's recurrence relation function:

        # choose what gets outputted at each timestep:
        if greedy:
            outputs_info = [dict(initial=self.priming_word, taps=[-1])] + [initial_state_with_taps(layer) for layer in self.model.layers[1:-1]]
            result, _ = theano.scan(fn=step,
                                n_steps=200,
                                outputs_info=outputs_info)
        else:
            outputs_info = [initial_state_with_taps(layer, num_examples) for layer in self.model.layers[1:]]
            result, _ = theano.scan(fn=step,
                                sequences=[inputs.T],
                                outputs_info=outputs_info)

        if greedy:
            return result[0]
        # softmaxes are the last layer of our network,
        # and are at the end of our results list:
        return result[-1].transpose((2,0,1))
        # we reorder the predictions to be:
        # 1. what row / example
        # 2. what timestep
        # 3. softmax dimension

    def create_cost_fun (self):
        # create a cost function that
        # takes each prediction at every timestep
        # and guesses next timestep's value:
        what_to_predict = self.input_mat[:, 1:]
        # because some sentences are shorter, we
        # place masks where the sentences end:
        # (for how long is zero indexed, e.g. an example going from `[2,3)`)
        # has this value set 0 (here we substract by 1):
        for_how_long = self.for_how_long - 1
        # all sentences start at T=0:
        starting_when = T.zeros_like(self.for_how_long)

        self.cost = masked_loss(self.predictions,
                                what_to_predict,
                                for_how_long,
                                starting_when).sum()

    def create_predict_function(self):
        self.pred_fun = theano.function(
            inputs=[self.input_mat],
            outputs =self.predictions,
            allow_input_downcast=True
        )

        self.greedy_fun = theano.function(
            inputs=[self.priming_word],
            outputs=T.concatenate([T.shape_padleft(self.priming_word), self.greedy_predictions]),
            allow_input_downcast=True
        )

    def create_training_function(self):
        updates, _, _, _, _ = create_optimization_updates(self.cost, self.params, method="adadelta")
        self.update_fun = theano.function(
            inputs=[self.input_mat, self.for_how_long],
            outputs=self.cost,
            updates=updates,
            allow_input_downcast=True)

    def __call__(self, x):
        return self.pred_fun(x)
Exemplo n.º 32
0
class Model:
    """
    Simple predictive model for forecasting words from
    sequence using LSTMs. Choose how many LSTMs to stack
    what size their memory should be, and how many
    words can be predicted.
    """
    def __init__(self, hidden_size, input_size, output_size, stack_size=1, celltype=RNN,steps=40):
        # declare model
        self.model = StackedCells(input_size, celltype=celltype, layers =[hidden_size] * stack_size)
        # add a classifier:
        self.model.layers.append(Layer(hidden_size, output_size, activation = T.tanh))
        # inputs are matrices of indices,
        # each row is a sentence, each column a timestep
        self.steps=steps
        self.gfs=T.matrix()#输入gfs数据
        self.pm25in=T.matrix()#pm25初始数据部分
        self.pm25target=T.matrix()#输出的目标target
        self.layerstatus=None
        self.results=None
        self.srng = T.shared_randomstreams.RandomStreams(np.random.randint(0, 1024))
        # create symbolic variables for prediction:(就是做一次整个序列完整的进行预测,得到结果是prediction)
        self.predictions = self.create_prediction()
        # create gradient training functions:
        self.create_cost_fun()
        self.create_valid_error()
        self.create_training_function()
        self.create_predict_function()
        self.create_validate_function()
        '''上面几步的意思就是先把公式写好'''
        
        
    @property
    def params(self):
        return self.model.params
        
    def create_prediction(self):#做一次predict的方法
        gfs=self.gfs
        pm25in=self.pm25in
        #初始第一次前传
        self.layerstatus=self.model.forward(T.concatenate([gfs[0],gfs[1],gfs[2],pm25in[0],pm25in[1]],axis=0))
        self.results=T.shape_padright(self.layerstatus[-1])
        if self.steps > 1:
            self.layerstatus=self.model.forward(T.concatenate([gfs[1],gfs[2],gfs[3],pm25in[1],self.results[0]],axis=0),self.layerstatus)
            self.results=T.concatenate([self.results,T.shape_padright(self.layerstatus[-1])],axis=0)      
            #前传之后step-2次
            for i in xrange(2,self.steps):
                self.layerstatus=self.model.forward(T.concatenate([gfs[i],gfs[i+1],gfs[i+2],self.results[i-2],self.results[i-1]],axis=0),self.layerstatus)
                #need T.shape_padright???
                self.results=T.concatenate([self.results,T.shape_padright(self.layerstatus[-1])],axis=0)
        return self.results
        
    def create_cost_fun (self):                                 
        self.cost = (self.predictions - self.pm25target).norm(L=2) / self.steps

    def create_valid_error(self):
        self.valid_error=T.abs_(self.predictions - self.pm25target)
                
    def create_predict_function(self):
        self.pred_fun = theano.function(inputs=[self.gfs,self.pm25in],outputs =self.predictions,allow_input_downcast=True)
                                 
    def create_training_function(self):
        updates, gsums, xsums, lr, max_norm = create_optimization_updates(self.cost, self.params, method="adadelta")#这一步Gradient Decent!!!!
        self.update_fun = theano.function(
            inputs=[self.gfs,self.pm25in, self.pm25target],
            outputs=self.cost,
            updates=updates,
            name='update_fun',
            profile=True,
            allow_input_downcast=True)
            
    def create_validate_function(self):
        self.valid_fun = theano.function(
            inputs=[self.gfs,self.pm25in, self.pm25target],
            outputs=self.valid_error,
            allow_input_downcast=True
        )
        
    def __call__(self, gfs,pm25in):
        return self.pred_fun(gfs,pm25in)
Exemplo n.º 33
0
class Model:
    """
    Simple predictive model for forecasting words from
    sequence using LSTMs. Choose how many LSTMs to stack
    what size their memory should be, and how many
    words can be predicted.
    """
    def __init__(self,
                 hidden_size,
                 input_size,
                 vocab_size,
                 stack_size=1,
                 celltype=LSTM):
        # declare model
        self.model = StackedCells(input_size,
                                  celltype=celltype,
                                  layers=[hidden_size] * stack_size)
        # add an embedding
        self.model.layers.insert(0, Embedding(vocab_size, input_size))
        # add a classifier:
        self.model.layers.append(
            Layer(hidden_size, vocab_size, activation=softmax))
        # inputs are matrices of indices,
        # each row is a sentence, each column a timestep
        self._stop_word = theano.shared(np.int32(999999999), name="stop word")
        self.for_how_long = T.ivector()
        self.input_mat = T.imatrix()
        self.priming_word = T.iscalar()
        self.srng = T.shared_randomstreams.RandomStreams(
            np.random.randint(0, 1024))
        # create symbolic variables for prediction:
        self.predictions = self.create_prediction()
        # create symbolic variable for greedy search:
        self.greedy_predictions = self.create_prediction(greedy=True)
        # create gradient training functions:
        self.create_cost_fun()
        self.create_training_function()
        self.create_predict_function()
        # For saving state
        self.epochs = 0

    def stop_on(self, idx):
        self._stop_word.set_value(idx)

    @property
    def params(self):
        return self.model.params

    def create_prediction(self, greedy=False):
        def step(idx, *states):
            # new hiddens are the states we need to pass to LSTMs
            # from past. Because the StackedCells also include
            # the embeddings, and those have no state, we pass
            # a "None" instead:
            new_hiddens = [None] + list(states)

            new_states = self.model.forward(idx, prev_hiddens=new_hiddens)
            if greedy:
                new_idxes = new_states[-1]
                #new_idx   = new_idxes.argmax()
                new_idx = new_idxes.argmax()
                # provide a stopping condition for greedy search:
                return ([new_idx.astype(self.priming_word.dtype)] +
                        new_states[1:-1]), theano.scan_module.until(
                            T.eq(new_idx, self._stop_word))
            else:
                return new_states[1:]

        # in sequence forecasting scenario we take everything
        # up to the before last step, and predict subsequent
        # steps ergo, 0 ... n - 1, hence:
        inputs = self.input_mat[:, 0:-1]
        num_examples = inputs.shape[0]
        # pass this to Theano's recurrence relation function:

        # choose what gets outputted at each timestep:
        if greedy:
            outputs_info = [dict(initial=self.priming_word, taps=[-1])] + [
                initial_state_with_taps(layer)
                for layer in self.model.layers[1:-1]
            ]
            result, _ = theano.scan(fn=step,
                                    n_steps=200,
                                    outputs_info=outputs_info)
        else:
            outputs_info = [
                initial_state_with_taps(layer, num_examples)
                for layer in self.model.layers[1:]
            ]
            result, _ = theano.scan(fn=step,
                                    sequences=[inputs.T],
                                    outputs_info=outputs_info)

        if greedy:
            return result[0]
        # softmaxes are the last layer of our network,
        # and are at the end of our results list:
        return result[-1].transpose((2, 0, 1))
        # we reorder the predictions to be:
        # 1. what row / example
        # 2. what timestep
        # 3. softmax dimension

    def create_cost_fun(self):
        # create a cost function that
        # takes each prediction at every timestep
        # and guesses next timestep's value:
        what_to_predict = self.input_mat[:, 1:]
        # because some sentences are shorter, we
        # place masks where the sentences end:
        # (for how long is zero indexed, e.g. an example going from `[2,3)`)
        # has this value set 0 (here we substract by 1):
        for_how_long = self.for_how_long - 1
        # all sentences start at T=0:
        starting_when = T.zeros_like(self.for_how_long)

        self.cost = masked_loss(self.predictions, what_to_predict,
                                for_how_long, starting_when).sum()

    def create_predict_function(self):
        self.pred_fun = theano.function(inputs=[self.input_mat],
                                        outputs=self.predictions,
                                        allow_input_downcast=True)

        self.greedy_fun = theano.function(
            inputs=[self.priming_word],
            outputs=T.concatenate(
                [T.shape_padleft(self.priming_word), self.greedy_predictions]),
            allow_input_downcast=True)

    def create_training_function(self):
        updates, _, _, _, _ = create_optimization_updates(self.cost,
                                                          self.params,
                                                          method="adadelta")
        self.update_fun = theano.function(
            inputs=[self.input_mat, self.for_how_long],
            outputs=self.cost,
            updates=updates,
            allow_input_downcast=True)

    def clean_up_files(self, load_path):
        glob_path = "%s-*%s" % (os.path.splitext(load_path)[0],
                                os.path.splitext(load_path)[1])
        files = glob.glob(glob_path)
        oldest_age = 0
        for name in files:
            try:
                age = int(os.path.splitext(name)[0].split("-")[-1])
                if age > oldest_age:
                    oldest_age = age
            except:
                pass
        # delete on second pass
        print "Cleaning up redundant files."
        for name in files:
            try:
                age = int(os.path.splitext(name)[0].split("-")[-1])
                if age < oldest_age:
                    print ".. removing", name
                    os.remove(name)
            except:
                pass

    def save(self, save_path, clean=False):
        path = "%s-%s%s" % (os.path.splitext(save_path)[0], self.epochs,
                            os.path.splitext(save_path)[1])
        with open(path, 'wb') as f:
            pickle.dump(self.model.params, f, protocol=pickle.HIGHEST_PROTOCOL)
        print "Saved model to", path
        if clean:
            self.clean_up_files(save_path)

    def load(self, load_path):
        glob_path = "%s-*%s" % (os.path.splitext(load_path)[0],
                                os.path.splitext(load_path)[1])
        files = glob.glob(glob_path)
        oldest_age = 0
        print glob_path, files
        for name in files:
            try:
                age = int(os.path.splitext(name)[0].split("-")[-1])
                if age > oldest_age:
                    oldest_age = age
            except:
                pass
        if oldest_age:
            path = "%s-%s%s" % (os.path.splitext(load_path)[0], oldest_age,
                                os.path.splitext(load_path)[1])
            theano.config.reoptimize_unpickled_function = False
            theano.gof.compilelock.set_lock_status(False)
            with open(path, 'rb') as f:
                print "Loading model from ", path
                self.model.params = pickle.load(f)
                self.epochs = oldest_age
        else:
            print "Sorry, there is no file I can open with that name"

    def __call__(self, x):
        return self.pred_fun(x)
Exemplo n.º 34
0
class Model(object):
    """
    Simple predictive model for forecasting words from
    sequence using LSTMs. Choose how many LSTMs to stack
    what size their memory should be, and how many
    words can be predicted.
    """
    def __init__(self, hidden_size, input_size, output_size, stack_size=1, celltype=RNN):
        # declare model
        self.model = StackedCells(input_size, celltype=celltype, layers =[hidden_size] * stack_size)
        # add a classifier:
        self.model.layers.append(Layer(hidden_size, output_size, activation = T.tanh))
        # inputs are matrices of indices,
        # each row is a sentence, each column a timestep
        self.steps=40
        self.gfs=T.matrix('gfs')#输入gfs数据
        self.pm25in=T.matrix('pm25in')#pm25初始数据部分
        self.pm25target=T.matrix('pm25target')#输出的目标target
        #self.srng = T.shared_randomstreams.RandomStreams(np.random.randint(0, 1024))
        # create symbolic variables for prediction:(就是做一次整个序列完整的进行预测,得到结果是prediction)
        self.predictions = self.create_prediction()
        # create gradient training functions:
        self.create_cost_fun()
        self.create_valid_error()
        self.create_training_function()
        self.create_predict_function()
        self.create_validate_function()
        '''上面几步的意思就是先把公式写好'''
        
        
    @property
    def params(self):
        return self.model.params      
        
    def create_prediction(self):
        def oneStep(gfs_tm2,gfs_tm1,gfs_t,pm25_tm2,pm25_tm1,*prev_hiddens):
            input_x=T.concatenate([gfs_tm2,gfs_tm1,gfs_t,pm25_tm2,pm25_tm1],axis=0)
            new_states = self.model.forward(input_x, prev_hiddens)
            #错位之后返回
            return [new_states[-1]]+new_states[:-1]
            
        result, updates = theano.scan(oneStep,
                          n_steps=self.steps,
                          sequences=[dict(input=self.gfs, taps=[-2,-1,-0])],
                          outputs_info=[dict(initial=self.pm25in, taps=[-2,-1])] + [dict(initial=layer.initial_hidden_state, taps=[-1]) for layer in self.model.layers if hasattr(layer, 'initial_hidden_state')])
        #根据oneStep,result的结果list有两个元素,result[0]是new_stats[-1]即最后一层输出的array,result[1]是之前层
        return result[0]
        
    def create_cost_fun (self):
        #可能改cost function,记得                                 
        self.cost = (self.predictions - self.pm25target).norm(L=2) / self.steps
        
    def create_valid_error(self):
        self.valid_error=T.abs_(self.predictions - self.pm25target)
        
    def create_predict_function(self):
        self.pred_fun = theano.function(inputs=[self.gfs,self.pm25in],outputs =self.predictions,allow_input_downcast=True)
                                 
    def create_training_function(self):
        updates, gsums, xsums, lr, max_norm = create_optimization_updates(self.cost, self.params, method="adadelta")#这一步Gradient Decent!!!!
        self.update_fun = theano.function(
            inputs=[self.gfs,self.pm25in, self.pm25target],
            outputs=self.cost,
            updates=updates,
            name='update_fun',
            profile=True,
            allow_input_downcast=True)
    
    def create_validate_function(self):
        self.valid_fun = theano.function(
            inputs=[self.gfs,self.pm25in, self.pm25target],
            outputs=self.valid_error,
            allow_input_downcast=True
        )
        
    def __call__(self, gfs,pm25in):
        return self.pred_fun(gfs,pm25in)
Exemplo n.º 35
0
class Model:
    """
    Simple predictive model for forecasting words from
    sequence using LSTMs. Choose how many LSTMs to stack
    what size their memory should be, and how many
    words can be predicted.
    """
    def __init__(self, hidden_size, input_size, output_size, stack_size=1, celltype=RNN):
        # declare model
        self.model = StackedCells(input_size, celltype=celltype, layers =[hidden_size] * stack_size)
        # add a classifier:
        self.model.layers.append(Layer(hidden_size, output_size, activation = T.tanh))
        # inputs are matrices of indices,
        # each row is a sentence, each column a timestep
        self.steps=T.iscalar()
        self.gfs=T.matrix()#输入gfs数据
        self.pm25in=T.matrix()#pm25初始数据部分
        self.pm25target=T.matrix()#输出的目标target
        self.srng = T.shared_randomstreams.RandomStreams(np.random.randint(0, 1024))
        # create symbolic variables for prediction:(就是做一次整个序列完整的进行预测,得到结果是prediction)
        self.predictions = self.create_prediction()
        # create gradient training functions:
        self.create_cost_fun()
        self.create_training_function()
        self.create_predict_function()
        '''上面几步的意思就是先把公式写好'''
        
        
    @property
    def params(self):
        return self.model.params
        
    '''def create_prediction(self):
        def oneStep(gfs_tm2,gfs_tm1,gfs_t,pm25_in,pm25_tm1,*hidden_states):
            input_x=gfs_tm2+gfs_tm1+gfs_t+pm25_in+pm25_tm1
            new_hiddens=list(hidden_states)
            layers_out = self.model.forward(input_x, prev_hiddens = new_hiddens)
            #这一步更新!!!!,这里input_x和previous_hidden应该是放在outputinfo里进行迭代的
            y_given_x=layers_out[-1]#每一层的结果都有输出,最后一层就是输出层了,这里就是输出了下一帧pm25
            hiddens=layers_out
            return [y_given_x]+hiddens
            
        #按下面三行描述规则排序,预测的那一时刻帧为0
        # in sequence forecasting scenario we take everything
        # up to the before last step, and predict subsequent
        # steps ergo, 0 ... n - 1, hence:
        gfs=self.gfs
        pm25in=self.pm25in
        pm250=self.pm250
        hiddens0=[initial_state_with_taps(layer,1) for layer in self.model.layers]
        #这个函数是自动按照scan的格式,已经把taps=-1加上了,所以之后在scan里就直接写进去了
        
        # pass this to Theano's recurrence relation function:
        
        # choose what gets outputted at each timestep:
        outputs_info = [dict(initial=pm250, taps=[-1])]+hiddens0
        result, _ = theano.scan(fn=oneStep,
                            sequences=[dict(input=gfs, taps=[-2,-1,0]),pm25in],
                            outputs_info=outputs_info,
                            n_steps=self.steps)
                                 

        return result[0]#每一次y_given_x组成的list
        # we reorder the predictions to be:
        # 1. what row / example
        # 2. what timestep
        # 3. softmax dimension'''
        
    def create_prediction(self):
        def oneStep(gfs_tm2,gfs_tm1,gfs_t,pm25_tm2,pm25_tm1,*prev_hiddens):
            input_x=gfs_tm2+gfs_tm1+gfs_t+pm25_tm2+pm25_tm1
            new_states = self.model.forward(input_x, prev_hiddens)
            #错位之后返回
            return [new_states[-1]]+new_states[:-1]
        
        gfs=self.gfs
        initial_predict=self.pm25in
            
        result, updates = theano.scan(oneStep,
                          n_steps=self.steps,
                          sequences=[dict(input=gfs, taps=[-2,-1,-0])],
                          outputs_info=[dict(initial=initial_predict, taps=[-2,-1])] + [dict(initial=layer.initial_hidden_state, taps=[-1]) for layer in self.model.layers if hasattr(layer, 'initial_hidden_state')])
        return result[0]
        
    def create_cost_fun (self):                                 
        self.cost = (self.predictions - self.pm25target).norm(L=2) / self.steps
        
    def create_predict_function(self):
        self.pred_fun = theano.function(inputs=[self.gfs,self.pm25in,self.steps],outputs =self.predictions,allow_input_downcast=True)
                                 
    def create_training_function(self):
        updates, gsums, xsums, lr, max_norm = create_optimization_updates(self.cost, self.params, method="adadelta")#这一步Gradient Decent!!!!
        self.update_fun = theano.function(
            inputs=[self.gfs,self.pm25in, self.pm25target,self.steps],
            outputs=self.cost,
            updates=updates,
            allow_input_downcast=True)
        
    def __call__(self, gfs,pm25in,steps):
        return self.pred_fun(gfs,pm25in,steps)
Exemplo n.º 36
0
class Model:
    """
    Simple predictive model for forecasting words from
    sequence using LSTMs. Choose how many LSTMs to stack
    what size their memory should be, and how many
    words can be predicted.
    """
    def __init__(self, hidden_size, input_size, vocab_size, stack_size=1, celltype=LSTM):
        # declare model
        self.model = StackedCells(input_size, celltype=celltype, layers =[hidden_size] * stack_size)
        # add an embedding
        self.model.layers.insert(0, Embedding(vocab_size, input_size))
        # add a classifier:
        self.model.layers.append(Layer(hidden_size, vocab_size, activation = softmax))
        # inputs are matrices of indices,
        # each row is a sentence, each column a timestep
        self._stop_word   = theano.shared(np.int32(999999999), name="stop word")
        self.for_how_long = T.ivector()
        self.input_mat = T.imatrix()
        self.priming_word = T.iscalar()
        self.srng = T.shared_randomstreams.RandomStreams(np.random.randint(0, 1024))
        # create symbolic variables for prediction:
        self.predictions = self.create_prediction()
        # create symbolic variable for greedy search:
        self.greedy_predictions = self.create_prediction(greedy=True)
        # create gradient training functions:
        self.create_cost_fun()
        self.create_training_function()
        self.create_predict_function()

    def stop_on(self, idx):
        self._stop_word.set_value(idx)

    @property
    def params(self):
        return self.model.params

    def create_prediction(self, greedy=False):
        def step(idx, *states):
            # new hiddens are the states we need to pass to LSTMs
            # from past. Because the StackedCells also include
            # the embeddings, and those have no state, we pass
            # a "None" instead:
            new_hiddens = [None] + list(states)

            new_states = self.model.forward(idx, prev_hiddens = new_hiddens)
            if greedy:
                new_idxes = new_states[-1]
                new_idx   = new_idxes.argmax()
                # provide a stopping condition for greedy search:
                return ([new_idx.astype(self.priming_word.dtype)] + new_states[1:-1]), theano.scan_module.until(T.eq(new_idx,self._stop_word))
            else:
                return new_states[1:]
        # in sequence forecasting scenario we take everything
        # up to the before last step, and predict subsequent
        # steps ergo, 0 ... n - 1, hence:
        inputs = self.input_mat[:, 0:-1]
        num_examples = inputs.shape[0]
        # pass this to Theano's recurrence relation function:

        # choose what gets outputted at each timestep:
        if greedy:
            outputs_info = [dict(initial=self.priming_word, taps=[-1])] + [initial_state_with_taps(layer) for layer in self.model.layers[1:-1]]
            result, _ = theano.scan(fn=step,
                                n_steps=200,
                                outputs_info=outputs_info)
        else:
            outputs_info = [initial_state_with_taps(layer, num_examples) for layer in self.model.layers[1:]]
            result, _ = theano.scan(fn=step,
                                sequences=[inputs.T],
                                outputs_info=outputs_info)

        if greedy:
            return result[0]
        # softmaxes are the last layer of our network,
        # and are at the end of our results list:
        return result[-1].transpose((2,0,1))
        # we reorder the predictions to be:
        # 1. what row / example
        # 2. what timestep
        # 3. softmax dimension

    def create_cost_fun (self):
        # create a cost function that
        # takes each prediction at every timestep
        # and guesses next timestep's value:
        what_to_predict = self.input_mat[:, 1:]
        # because some sentences are shorter, we
        # place masks where the sentences end:
        # (for how long is zero indexed, e.g. an example going from `[2,3)`)
        # has this value set 0 (here we substract by 1):
        for_how_long = self.for_how_long - 1
        # all sentences start at T=0:
        starting_when = T.zeros_like(self.for_how_long)

        self.cost = masked_loss(self.predictions,
                                what_to_predict,
                                for_how_long,
                                starting_when).sum()

    def create_predict_function(self):
        self.pred_fun = theano.function(
            inputs=[self.input_mat],
            outputs =self.predictions,
            allow_input_downcast=True
        )

        self.greedy_fun = theano.function(
            inputs=[self.priming_word],
            outputs=T.concatenate([T.shape_padleft(self.priming_word), self.greedy_predictions]),
            allow_input_downcast=True
        )

    def create_training_function(self):
        updates, _, _, _, _ = create_optimization_updates(self.cost, self.params, method="adadelta")
        self.update_fun = theano.function(
            inputs=[self.input_mat, self.for_how_long],
            outputs=self.cost,
            updates=updates,
            allow_input_downcast=True)

    def __call__(self, x):
        return self.pred_fun(x)
Exemplo n.º 37
0
class Model:
    """
    Simple predictive model for forecasting words from
    sequence using LSTMs. Choose how many LSTMs to stack
    what size their memory should be, and how many
    words can be predicted.
    """
    def __init__(self, hidden_size, input_size, output_size, stack_size=1, celltype=Layer,steps=40):
        # declare model
        self.model = StackedCells(input_size, celltype=celltype, layers =[hidden_size] * stack_size)
        # add a classifier:
        self.model.layers.append(Layer(hidden_size, output_size, activation = T.tanh))
        # inputs are matrices of indices,
        # each row is a sentence, each column a timestep
        self.steps=steps
        self.stepsin=T.iscalar('stepsin')
        self.x=T.tensor3('x')#输入gfs数据
        self.target=T.tensor3('target')#输出的目标target,这一版把target维度改了
        self.layerstatus=None
        self.results=None
        # create symbolic variables for prediction:(就是做一次整个序列完整的进行预测,得到结果是prediction)
        self.predictions = self.create_prediction()
        self.predictions2 = self.create_prediction2()
        # create gradient training functions:
        self.create_cost_fun()
        self.create_valid_error()
        self.create_training_function()
        self.create_predict_function()
        self.create_validate_function()
        '''上面几步的意思就是先把公式写好'''
        
        
    @property
    def params(self):
        return self.model.params
        
    def create_prediction(self):#做一次predict的方法
        '''x=self.x
        #初始第一次前传
        self.layerstatus=self.model.forward(x[:,0])
	#results.shape?40*1
        self.results=self.layerstatus[-1].dimshuffle((0,'x',1))
        if self.steps > 1:
            for i in xrange(1,self.steps):
                self.layerstatus=self.model.forward(x[:,i],self.layerstatus)
                #need T.shape_padright???
                self.results=T.concatenate([self.results,self.layerstatus[-1].dimshuffle((0,'x',1))],axis=1)
        return self.results'''
        
        def step(idx):
            new_states=self.model.forward(idx)
            return new_states#不论recursive与否,会全部输出
        
        x = self.x
        num_examples = x.shape[0]
        #outputs_info =[initial_state_with_taps(layer, num_examples) for layer in self.model.layers]
        #outputs_info = [initial_state_with_taps(layer, num_examples) for layer in self.model.layers[1:]]
        result, _ = theano.scan(fn=step,
                                n_steps=self.steps,
                                sequences=dict(input=x.dimshuffle((1,0,2)), taps=[-0]),
                                )
                                

        return result[-1].dimshuffle((1,0,2))
        
    def create_prediction2(self):#做一次predict的方法        
        def step(idx):
            new_states=self.model.forward(idx)
            return new_states#不论recursive与否,会全部输出
        
        x = self.x
        num_examples = x.shape[0]
        #outputs_info =[initial_state_with_taps(layer, num_examples) for layer in self.model.layers]
        #outputs_info = [initial_state_with_taps(layer, num_examples) for layer in self.model.layers[1:]]
        result, _ = theano.scan(fn=step,
                                n_steps=self.stepsin,
                                sequences=dict(input=x.dimshuffle((1,0,2)), taps=[-0]),
                                )
                                

        return result[-1].dimshuffle((1,0,2))
        
    def create_cost_fun (self):                                 
        self.cost = (self.predictions - self.target[:,:,0:1]).norm(L=2)

    def create_valid_error(self):
        self.valid_error=T.mean(T.abs_(self.predictions - self.target[:,:,0:1]),axis=0)
                
    def create_predict_function(self):
        self.pred_fun = theano.function(inputs=[self.x],outputs =self.predictions,allow_input_downcast=True)
        self.pred_fun2 = theano.function(inputs=[self.x,self.stepsin],outputs =self.predictions2,allow_input_downcast=True)
                                 
    def create_training_function(self):
        updates, gsums, xsums, lr, max_norm = create_optimization_updates(self.cost, self.params, method="adadelta")#这一步Gradient Decent!!!!
        self.update_fun = theano.function(
            inputs=[self.x, self.target],
            outputs=self.cost,
            updates=updates,
            name='update_fun',
            profile=False,
            allow_input_downcast=True)
            
    def create_validate_function(self):
        self.valid_fun = theano.function(
            inputs=[self.x, self.target],
            outputs=self.valid_error,
            allow_input_downcast=True
        )
        
    def __call__(self, x):
        return self.pred_fun(x)
Exemplo n.º 38
0
class Model:
    """
    Simple predictive model for forecasting words from
    sequence using LSTMs. Choose how many LSTMs to stack
    what size their memory should be, and how many
    words can be predicted.
    """
    def __init__(self, hidden_size, input_size, output_size, stack_size=1, celltype=RNN,steps=40):
        # declare model
        self.model = StackedCells(input_size, celltype=celltype, layers =[hidden_size] * stack_size)
        # add a classifier:
        self.model.layers.append(Layer(hidden_size, output_size, activation = lambda x:x))
        # inputs are matrices of indices,
        # each row is a sentence, each column a timestep
        self.steps=steps
        self.gfs=T.tensor3('gfs')#输入gfs数据
        self.pm25in=T.tensor3('pm25in')#pm25初始数据部分
        self.pm25target=T.matrix('pm25target')#输出的目标target,这一版把target维度改了
        self.layerstatus=None
        self.results=None
        self.cnt = T.tensor3('cnt')
        # create symbolic variables for prediction:(就是做一次整个序列完整的进行预测,得到结果是prediction)
        self.predictions = self.create_prediction()
        # create gradient training functions:
        self.create_cost_fun()
        self.create_valid_error()
        self.create_training_function()
        self.create_predict_function()
        self.create_validate_function()
        '''上面几步的意思就是先把公式写好'''
        
        
    @property
    def params(self):
        return self.model.params
        
    def create_prediction(self):#做一次predict的方法
        gfs=self.gfs
        pm25in=self.pm25in
        #初始第一次前传
        gfs_x=T.concatenate([gfs[:,0],gfs[:,1],gfs[:,2]],axis=1)
        pm25in_x=T.concatenate([pm25in[:,0],pm25in[:,1]],axis=1)
        self.layerstatus=self.model.forward(T.concatenate([gfs_x,pm25in_x,self.cnt[:,:,0]],axis=1))
        self.results=self.layerstatus[-1]
        for i in xrange(1,46):#前6次(0-5),输出之前的先做的6个frame,之后第7次是第1个输出
            gfs_x=T.concatenate([gfs_x[:,9:],gfs[:,i+2]],axis=1)
            pm25in_x=T.concatenate([pm25in_x[:,1:],pm25in[:,i+1]],axis=1)
            self.layerstatus=self.model.forward(T.concatenate([gfs_x,pm25in_x,self.cnt[:,:,i]],axis=1),self.layerstatus)
            self.results=T.concatenate([self.results,self.layerstatus[-1]],axis=1)
        return self.results
        
    def create_cost_fun (self):                                 
        self.cost = (self.predictions[:,6:46] - self.pm25target[:,6:46]).norm(L=2)

    def create_valid_error(self):
        self.valid_error=T.mean(T.abs_(self.predictions[:,6:46] - self.pm25target[:,6:46]),axis=0)
                
    def create_predict_function(self):
        self.pred_fun = theano.function(inputs=[self.gfs,self.pm25in,self.cnt],outputs =self.predictions,allow_input_downcast=True)
                                 
    def create_training_function(self):
        updates, gsums, xsums, lr, max_norm = create_optimization_updates(self.cost, self.params, method="adadelta")#这一步Gradient Decent!!!!
        self.update_fun = theano.function(
            inputs=[self.gfs,self.pm25in, self.pm25target,self.cnt],
            outputs=self.cost,
            updates=updates,
            name='update_fun',
            profile=False,
            allow_input_downcast=True)
            
    def create_validate_function(self):
        self.valid_fun = theano.function(
            inputs=[self.gfs,self.pm25in, self.pm25target,self.cnt],
            outputs=self.valid_error,
            allow_input_downcast=True
        )
        
    def __call__(self, gfs,pm25in):
        return self.pred_fun(gfs,pm25in)
Exemplo n.º 39
0
class Model:
    """
    Simple predictive model for forecasting words from
    sequence using LSTMs. Choose how many LSTMs to stack
    what size their memory should be, and how many
    words can be predicted.
    """
    def __init__(self, hidden_size, input_size, output_size, stack_size=1, celltype=RNN,steps=40):
        # declare model
        self.celltype=celltype
        self.model = StackedCells(input_size, celltype=celltype, layers =[hidden_size] * stack_size)
        # add a classifier:
        self.model.layers.append(Layer(hidden_size, output_size, activation = T.tanh))
        # inputs are matrices of indices,
        # each row is a sentence, each column a timestep
        self.steps=steps
        self.gfs=T.tensor3('gfs')#输入gfs数据
        self.pm25in=T.tensor3('pm25in')#pm25初始数据部分
        self.pm25target=T.matrix('pm25target')#输出的目标target,这一版把target维度改了
        self.layerstatus=None
        self.results=None
        self.cnt = T.tensor3('cnt')
        # create symbolic variables for prediction:(就是做一次整个序列完整的进行预测,得到结果是prediction)
        self.predictions = self.create_prediction()
        # create gradient training functions:
        self.create_cost_fun()
        self.create_valid_error()
        self.create_training_function()
        self.create_predict_function()
        self.create_validate_function()
        '''上面几步的意思就是先把公式写好'''
        
        
    @property
    def params(self):
        return self.model.params
        
    def create_prediction(self):#做一次predict的方法
        gfs=self.gfs
        pm25in=self.pm25in
        #初始第一次前传
        x=T.concatenate([gfs[:,0],gfs[:,1],gfs[:,2],pm25in[:,0],pm25in[:,1],self.cnt[:,:,0]],axis=1)
        if self.celltype==RNN:
            init_hiddens = [(T.repeat(T.shape_padleft(create_shared(layer.hidden_size, name="RNN.initial_hidden_state")),
                                      x.shape[0], axis=0)
                             if x.ndim > 1 else create_shared(layer.hidden_size, name="RNN.initial_hidden_state"))
                            if hasattr(layer, 'initial_hidden_state') else None
                            for layer in self.model.layers]
        if self.celltype==LSTM:
            init_hiddens = [(T.repeat(T.shape_padleft(create_shared(layer.hidden_size * 2, name="LSTM.initial_hidden_state")),
                                      x.shape[0], axis=0)
                             if x.ndim > 1 else create_shared(layer.hidden_size * 2, name="LSTM.initial_hidden_state"))
                            if hasattr(layer, 'initial_hidden_state') else None
                            for layer in self.model.layers]
        self.layerstatus=self.model.forward(x,init_hiddens)
        #results.shape?40*1
        self.results=self.layerstatus[-1]
        if self.steps > 1:
            self.layerstatus=self.model.forward(T.concatenate([gfs[:,1],gfs[:,2],gfs[:,3],pm25in[:,1],self.results,self.cnt[:,:,1]],axis=1),self.layerstatus)
            self.results=T.concatenate([self.results,self.layerstatus[-1]],axis=1)      
            #前传之后step-2次
            for i in xrange(2,self.steps):
                self.layerstatus=self.model.forward(T.concatenate([gfs[:,i],gfs[:,i+1],gfs[:,i+2],T.shape_padright(self.results[:,i-2]),T.shape_padright(self.results[:,i-1]),self.cnt[:,:,i]],axis=1),self.layerstatus)
                #need T.shape_padright???
                self.results=T.concatenate([self.results,self.layerstatus[-1]],axis=1)
        return self.results
        
    def create_cost_fun (self):                                 
        self.cost = (self.predictions - self.pm25target).norm(L=2)

    def create_valid_error(self):
        self.valid_error=T.mean(T.abs_(self.predictions - self.pm25target),axis=0)
                
    def create_predict_function(self):
        self.pred_fun = theano.function(inputs=[self.gfs,self.pm25in,self.cnt],outputs =self.predictions,allow_input_downcast=True)
                                 
    def create_training_function(self):
        updates, gsums, xsums, lr, max_norm = create_optimization_updates(self.cost, self.params, method="adadelta")#这一步Gradient Decent!!!!
        self.update_fun = theano.function(
            inputs=[self.gfs,self.pm25in, self.pm25target,self.cnt],
            outputs=self.cost,
            updates=updates,
            name='update_fun',
            profile=False,
            allow_input_downcast=True)
            
    def create_validate_function(self):
        self.valid_fun = theano.function(
            inputs=[self.gfs,self.pm25in, self.pm25target,self.cnt],
            outputs=self.valid_error,
            allow_input_downcast=True
        )
        
    def __call__(self, gfs,pm25in):
        return self.pred_fun(gfs,pm25in)
class RelativeShiftLSTMStack( object ):
    """
    Manages a stack of LSTM cells with potentially a relative shift applied
    """

    def __init__(self, input_parts, layer_sizes, output_size, window_size=0, dropout=0, mode="drop", unroll_batch_num=None):
        """
        Parameters:
            input_parts: A list of InputParts
            layer_sizes: A list of the form [ (indep, per_note), ... ] where
                    indep is the number of non-shifted cells to have, and
                    per_note is the number of cells to have per window note, which shift as the
                        network moves
                    Alternately can just be [ indep, ... ]
            output_size: An integer, the width of the desired output
            dropout: How much dropout to apply.
            mode: Either "drop" or "roll". If drop, discard memory that goes out of range. If roll, roll it instead
        """

        self.input_parts = input_parts
        self.window_size = window_size

        layer_sizes = [x if isinstance(x,tuple) else (x,0) for x in layer_sizes]
        self.layer_sizes = layer_sizes
        self.tot_layer_sizes = [(indep + per_note*self.window_size) for indep, per_note in layer_sizes]
        
        self.output_size = output_size
        self.dropout = dropout

        self.input_size = sum(part.PART_WIDTH for part in input_parts)

        self.cells = StackedCells( self.input_size, celltype=LSTM, activation=T.tanh, layers = self.tot_layer_sizes )
        self.cells.layers.append(Layer(self.tot_layer_sizes[-1], self.output_size, activation = lambda x:x))

        assert mode in ("drop", "roll"), "Must specify either drop or roll mode"
        self.mode = mode

        self.unroll_batch_num = unroll_batch_num

    @property
    def params(self):
        return self.cells.params + list(l.initial_hidden_state for l in self.cells.layers if has_hidden(l))

    @params.setter
    def params(self, paramlist):
        self.cells.params = paramlist[:len(self.cells.params)]
        for l, val in zip((l for l in self.cells.layers if has_hidden(l)), paramlist[len(self.cells.params):]):
            l.initial_hidden_state.set_value(val.get_value())

    def perform_step(self, in_data, shifts, hiddens, dropout_masks=[]):
        """
        Perform a step through the LSTM network.

        in_data: A theano tensor (float32) of shape (batch, input_size)
        shifts: A theano tensor (int32) of shape (batch), giving the relative
            shifts to apply to the last hiddens
        hiddens: A list of hiddens [layer](batch, hidden_idx)
        dropout_masks: If [], apply dropout deterministically. Otherwise, should
            be a set of masks returned by get_dropout_masks, generally passed through
            a scan as a non-sequence.
        """

        # hiddens is of shape [layer](batch, hidden_idx)
        # We want to permute the hidden_idx values according to shifts,
        # which are ints of shape (batch)

        n_batch = in_data.shape[0]
        new_hiddens = []
        for layer_i, (indep, per_note) in enumerate(self.layer_sizes):
            if per_note == 0:
                # Don't bother with this layer
                new_hiddens.append(hiddens[layer_i])
                continue
            # The theano_lstm code puts [memory_cells... , old_activations...]
            # We want to slide the memory cells only.
            lstm_hsplit = self.cells.layers[layer_i].hidden_size
            indep_mem = hiddens[layer_i][:,:indep]
            per_note_mem = hiddens[layer_i][:,indep:lstm_hsplit]
            remaining_values = hiddens[layer_i][:,lstm_hsplit:]
            # per_note_mem is (batch, per_note_mem)
            separated_mem = per_note_mem.reshape((n_batch, self.window_size, per_note))
            # separated_mem is (batch, note, mem)
            # [a b c ... x y z] shifted up 1   (+1) goes to  [b c ... x y z 0]
            # [a b c ... x y z] shifted down 1 (-1) goes to [0 a b c ... x y]
            def _shift_step(c_mem, c_shift):
                # c_mem is (note, mem)
                # c_shift is an int
                if self.mode=="drop":
                    def _clamp_w(x):
                        return T.maximum(0,T.minimum(x,self.window_size))
                    ins_at_front = T.zeros((_clamp_w(-c_shift),per_note))
                    ins_at_back = T.zeros((_clamp_w(c_shift),per_note))
                    take_part = c_mem[_clamp_w(c_shift):self.window_size-_clamp_w(-c_shift),:]
                    return T.concatenate([ins_at_front, take_part, ins_at_back], 0)
                elif self.mode=="roll":
                    return T.roll(c_mem, (-c_shift)%12, axis=0)

            if self.unroll_batch_num is None:
                shifted_mem, _ = theano.map(_shift_step, [separated_mem, shifts])
            else:
                shifted_mem_parts = []
                for i in range(self.unroll_batch_num):
                    shifted_mem_parts.append(_shift_step(separated_mem[i], shifts[i]))
                shifted_mem = T.stack(shifted_mem_parts)

            new_per_note_mem = shifted_mem.reshape((n_batch, self.window_size * per_note))
            new_layer_hiddens = T.concatenate([indep_mem, new_per_note_mem, remaining_values], 1)
            new_hiddens.append(new_layer_hiddens)

        if dropout_masks == [] or not self.dropout:
            masks = []
        else:
            masks = [None] + dropout_masks
        new_states = self.cells.forward(in_data, prev_hiddens=new_hiddens, dropout=masks)
        return new_states

    def do_preprocess_scan(self, deterministic_dropout=False, **kwargs):
        """
        Run a scan using this LSTM, preprocessing all inputs before the scan.

        Parameters:
            kwargs[k]: should be a theano tensor of shape (n_batch, n_time, ... )
                Note that "relative_position" should be a keyword argument given here if there are relative
                shifts.
            deterministic_dropout: If True, apply dropout deterministically, scaling everything. If false,
                sample dropout

        Returns:
            A theano tensor of shape (n_batch, n_time, output_size) of activations
        """

        assert len(kwargs)>0, "Need at least one input argument!"
        n_batch, n_time = list(kwargs.values())[0].shape[:2]

        squashed_kwargs = {
            k: v.reshape([n_batch*n_time] + [x for x in v.shape[2:]]) for k,v in kwargs.items()
        }

        full_input = T.concatenate([ part.generate(**squashed_kwargs) for part in self.input_parts ], 1)
        adjusted_input = full_input.reshape([n_batch, n_time, self.input_size]).dimshuffle((1,0,2))

        if "relative_position" in kwargs:
            relative_position = kwargs["relative_position"]
            diff_shifts = T.extra_ops.diff(relative_position, axis=1)
            cat_shifts = T.concatenate([T.zeros((n_batch, 1), 'int32'), diff_shifts], 1)
            shifts = cat_shifts.dimshuffle((1,0))
        else:
            shifts = T.zeros(n_time, n_batch, 'int32')

        def _scan_fn(in_data, shifts, *other):
            other = list(other)
            if self.dropout and not deterministic_dropout:
                split = -len(self.tot_layer_sizes)
                hiddens = other[:split]
                masks = [None] + other[split:]
            else:
                masks = []
                hiddens = other

            return self.perform_step(in_data, shifts, hiddens, dropout_masks=masks)

        if self.dropout and not deterministic_dropout:
            dropout_masks = UpscaleMultiDropout( [(n_batch, shape) for shape in self.tot_layer_sizes], self.dropout)
        else:
            dropout_masks = []

        outputs_info = [initial_state_with_taps(layer, n_batch) for layer in self.cells.layers]
        result, _ = theano.scan(fn=_scan_fn, sequences=[adjusted_input, shifts], non_sequences=dropout_masks, outputs_info=outputs_info)

        final_out = get_last_layer(result).transpose((1,0,2))

        return final_out

    def prepare_sample_scan(self, start_pos, start_out, deterministic_dropout=False, **kwargs):
        """
        Prepare a sample scan

        Parameters:
            kwargs[k]: should be a theano tensor of shape (n_batch, n_time, ... )
                Note that "relative_position" should be a keyword argument given here if there are relative
                shifts, as should "timestep"
            start_pos: a theano tensor of shape (n_batch) giving the initial position passed to the
                out_to_in function
            start_out: a theano tensor of shape (n_batch, X) giving the initial "output" passed
                to the out_to_in_fn
            deterministic_dropout: If True, apply dropout deterministically, scaling everything. If false,
                sample dropout

        Returns:
            A namedtuple, where
                sequences: a list of sequences to input into scan
                non_sequences: a list of non_sequences into scan
                outputs_info: a list of outputs_info for scan
                num_taps: the number of outputs with taps for this 
                (other values): for internal use
        """
        assert len(kwargs)>0, "Need at least one input argument!"
        n_batch, n_time = list(kwargs.values())[0].shape[:2]

        transp_kwargs = {
            k: v.dimshuffle((1,0) + tuple(range(2,v.ndim))) for k,v in kwargs.items()
        }

        if self.dropout and not deterministic_dropout:
            dropout_masks = UpscaleMultiDropout( [(n_batch, shape) for shape in self.tot_layer_sizes], self.dropout)
        else:
            dropout_masks = []

        outputs_info = [{"initial":start_pos, "taps":[-1]}, {"initial":start_out, "taps":[-1]}] + [initial_state_with_taps(layer, n_batch) for layer in self.cells.layers]
        sequences = list(transp_kwargs.values())
        non_sequences = dropout_masks
        num_taps = len([True for x in outputs_info if x is not None])
        return SampleScanSpec(sequences=sequences, non_sequences=non_sequences, outputs_info=outputs_info, num_taps=num_taps, kwargs_keys=list(transp_kwargs.keys()), deterministic_dropout=deterministic_dropout, start_pos=start_pos)


    def sample_scan_routine(self, spec, *inputs):
        """
        Start a scan routine. This is implemented as a generator, since we may need to interrupt the state in the
        middle of iteration. How to use:

        scan_rout = x.sample_scan_routine(spec, *inputs)
                - spec: The SampleScanSpec returned by prepare_sample_scan
                - *inputs: The scan inputs, in [ sequences..., taps..., non_sequences... ] order

        last_rel_pos, last_out, cur_kwargs = scan_rout.send(None)
                - last_rel_pos is a theano tensor of shape (n_batch)
                - last_out will be a theano tensor of shape (n_batch, output_size)
                - cur_kwargs[k] is a theano tensor of shape (n_batch, ...), from kwargs

        out_activations = scan_rout.send((new_pos, addtl_kwargs))
                - new_pos is a theano tensor of shape (n_batch), giving the new relative position
                - addtl_kwargs[k] is a theano tensor of shape (n_batch, ...) to be added to cur kwargs
                    Note that "relative_position" will be added automatically.

        scan_outputs = scan_rout.send(new_out)
                - new_out is a tensor of shape (n_batch, X) to be output

        scan_rout.close()

        -> scan_outputs should be returned back to scan
        """
        stuff = list(inputs)
        I = len(spec.kwargs_keys)
        kwarg_seq_vals = stuff[:I]
        cur_kwargs = {k:v for k,v in zip(spec.kwargs_keys, kwarg_seq_vals)}
        last_pos, last_out = stuff[I:I+2]
        other = stuff[I+2:]

        if self.dropout and not spec.deterministic_dropout:
            split = -len(self.tot_layer_sizes)
            hiddens = other[:split]
            masks = [None] + other[split:]
        else:
            masks = []
            hiddens = other

        cur_pos, addtl_kwargs = yield(last_pos, last_out, cur_kwargs)
        all_kwargs = {
            "relative_position": cur_pos
        }
        all_kwargs.update(cur_kwargs)
        all_kwargs.update(addtl_kwargs)

        shift = T.switch(T.eq(all_kwargs["timestep"],0), 0, cur_pos - last_pos)

        full_input = T.concatenate([ part.generate(**all_kwargs) for part in self.input_parts ], 1)

        step_stuff = self.perform_step(full_input, shift, hiddens, dropout_masks=masks)
        new_hiddens = step_stuff[:-1]
        raw_output = step_stuff[-1]
        sampled_output = yield(raw_output)

        yield [cur_pos, sampled_output] + step_stuff

    def extract_sample_scan_results(self, spec, outputs):
        """
        Extract outputs from the scan results. 

        Parameters:
            outputs: The outputs from the scan associated with this stack

        Returns:
            positions, raw_output, sampled_output
        """
        positions = T.concatenate([T.shape_padright(spec.start_pos), outputs[0].transpose((1,0))[:,:-1]], 1)
        sampled_output = outputs[2].transpose((1,0,2))
        raw_output = outputs[-1].transpose((1,0,2))

        return positions, raw_output, sampled_output


    def do_sample_scan(self, start_pos, start_out, sample_fn, out_to_in_fn, deterministic_dropout=True, **kwargs):
        """
        Run a scan using this LSTM, sampling and processing as we go.

        Parameters:
            kwargs[k]: should be a theano tensor of shape (n_batch, n_time, ... )
                Note that "relative_position" should be a keyword argument given here if there are relative
                shifts.
            start_pos: a theano tensor of shape (n_batch) giving the initial position passed to the
                out_to_in function
            start_out: a theano tensor of shape (n_batch, X) giving the initial "output" passed
                to the out_to_in_fn
            sample_fn: a function with signature
                    sample_fn(out_activations, rel_pos) -> new_out, new_rel_pos
                where
                    - rel_pos is a theano tensor of shape (n_batch)
                    - out_activations is a tensor of shape (n_batch, output_size)
                and
                    - new_out is a tensor of shape (n_batch, X) to be output
                    - new_rel_pos should be a theano tensor of shape (n_batch)
            out_to_in_fn: a function with signature
                    out_to_in_fn(rel_pos, last_out, **cur_kwargs) -> addtl_kwargs
                where 
                    - rel_pos is a theano tensor of shape (n_batch)
                    - last_out will be a theano tensor of shape (n_batch, output_size)
                    - cur_kwargs[k] is a theano tensor of shape (n_batch, ...), from kwargs
                and
                    - addtl_kwargs[k] is a theano tensor of shape (n_batch, ...) to be added to cur kwargs
                        Note that "relative_position" will be added automatically.
            deterministic_dropout: If True, apply dropout deterministically, scaling everything. If false,
                sample dropout

        Returns: positions, raw_output, sampled_output, updates
        """
        raise NotImplementedError()
        spec = self.prepare_sample_scan(start_pos, start_out, sample_fn, deterministic_dropout, **kwargs)

        def _scan_fn(*stuff):
            scan_rout = self.sample_scan_routine(spec, *stuff)
            rel_pos, last_out, cur_kwargs = scan_rout.send(None)
            addtl_kwargs = out_to_in_fn(rel_pos, last_out, **cur_kwargs)
            out_activations = scan_rout.send(addtl_kwargs)
            sampled_output, new_pos = sample_fn(out_activations, rel_pos)
            scan_outputs = scan_rout.send((sampled_output, new_pos))
            scan_rout.close()
            return scan_outputs

        result, updates = theano.scan(fn=_scan_fn, sequences=spec.sequences, non_sequences=spec.non_sequences, outputs_info=spec.outputs_info)
        positions, raw_output, sampled_output = self.extract_sample_scan_results(spec, result)
        return positions, raw_output, sampled_output, updates