def __init__(self, dinput, doutput): W = random(doutput, dinput + 1) W = glorotize(W) self.W = W self.dinput = dinput self.doutput = doutput
def __init__(self, dinput, nstates, doutput, clock_periods, full_recurrence=False, learn_state=True, first_layer=False): super(CRNN, self).__init__() nclocks = len(clock_periods) Wi = random(nclocks * nstates, dinput + 1) Wh = random(nclocks * nstates, nclocks * nstates + 1) Wo = random(doutput, nclocks * nstates + 1) H_0 = np.zeros((nclocks * nstates, 1)) Wi = glorotize(Wi) Wh[:, :-1] = orthogonalize(Wh[:, :-1]) Wo = glorotize(Wo) utri_mask = recurrent_mask(nclocks, nstates) if not full_recurrence: Wh[:, :-1] *= utri_mask schedules = make_schedule(clock_periods, nstates) self.dinput = dinput self.nstates = nstates self.doutput = doutput self.clock_periods = clock_periods self.nclocks = nclocks self.Wi = nn.Parameter(torch.from_numpy(Wi).float()) self.Wh = nn.Parameter(torch.from_numpy(Wh).float()) self.Wo = nn.Parameter(torch.from_numpy(Wo).float()) self.H_0 = torch.from_numpy(H_0).float() self.utri_mask = utri_mask self.schedules = schedules self.full_recurrence = full_recurrence self.learn_state = learn_state self.first_layer = first_layer self.H_last = None
def __init__(self, dinput, nstates, doutput, clock_periods, full_recurrence=False, learn_state=True, first_layer=False): ''' Clockwork Recurrent Neural Network This follows the variant described in the paper by Koutnik et al. dinput: dimension of the input (per time step) nstates: number of states per module/clock doutput: required dimension of the output clock_periods: the periods of clocks (order is maintained and not sorted) full_recurrence: True: all modules can 'see' the hidden states every module False: as per the original paper - only faster modules can see slower modules learn_state: True: initial state is randomly initalized and learnt during training False: start with all zero initial state and don't learn first_layer: True: if this is the first layer of the network. If it is, the gradients w.r.t inputs are not calculated as it is useless for training. saves time False: gradients w.r.t are calculated and returned ''' nclocks = len(clock_periods) Wi = random(nclocks * nstates, dinput + 1) Wh = random(nclocks * nstates, nclocks * nstates + 1) Wo = random(doutput, nclocks * nstates + 1) if learn_state: H_0 = random(nclocks * nstates, 1) else: H_0 = np.zeros((nclocks * nstates, 1)) # some fancy inits Wi = glorotize(Wi) Wh[:, :-1] = orthogonalize(Wh[:, :-1]) Wo = glorotize(Wo) # mask to make Wh a block upper triangle matrix utri_mask = recurrent_mask(nclocks, nstates) if not full_recurrence: Wh[:,:-1] *= utri_mask # column vector to selectively activate rows based on time schedules = make_schedule(clock_periods, nstates) schedules = np.array(schedules).reshape(-1, 1) # store it all self.dinput = dinput self.nstates = nstates self.doutput = doutput self.clock_periods = clock_periods self.nclocks = nclocks self.Wi = Wi self.Wh = Wh self.Wo = Wo self.H_0 = H_0 self.utri_mask = utri_mask self.schedules = schedules self.full_recurrence = full_recurrence self.learn_state = learn_state self.first_layer = first_layer self.forget()
def __init__(self, dinput, nstates, doutput, clock_periods, full_recurrence=False, learn_state=True, first_layer=False): ''' Clockwork Recurrent Neural Network This follows the variant described in the paper by Koutnik et al. dinput: dimension of the input (per time step) nstates: number of states per module/clock doutput: required dimension of the output clock_periods: the periods of clocks (order is maintained and not sorted) full_recurrence: True: all modules can 'see' the hidden states every module False: as per the original paper - only faster modules can see slower modules learn_state: True: initial state is randomly initalized and learnt during training False: start with all zero initial state and don't learn first_layer: True: if this is the first layer of the network. If it is, the gradients w.r.t inputs are not calculated as it is useless for training. saves time False: gradients w.r.t are calculated and returned ''' nclocks = len(clock_periods) Wi = random(nclocks * nstates, dinput + 1) Wh = random(nclocks * nstates, nclocks * nstates + 1) Wo = random(doutput, nclocks * nstates + 1) if learn_state: H_0 = random(nclocks * nstates, 1) else: H_0 = np.zeros((nclocks * nstates, 1)) # some fancy inits Wi = glorotize(Wi) Wh[:, :-1] = orthogonalize(Wh[:, :-1]) Wo = glorotize(Wo) # mask to make Wh a block upper triangle matrix utri_mask = recurrent_mask(nclocks, nstates) if not full_recurrence: Wh[:, :-1] *= utri_mask # column vector to selectively activate rows based on time schedules = make_schedule(clock_periods, nstates) schedules = np.array(schedules).reshape(-1, 1) # store it all self.dinput = dinput self.nstates = nstates self.doutput = doutput self.clock_periods = clock_periods self.nclocks = nclocks self.Wi = Wi self.Wh = Wh self.Wo = Wo self.H_0 = H_0 self.utri_mask = utri_mask self.schedules = schedules self.full_recurrence = full_recurrence self.learn_state = learn_state self.first_layer = first_layer self.forget()