def __init__(self, conf: ZStackingLabelConf, base_node: ZlabelNode, **kwargs): # -- conf.__dict__.update( base_node.conf.__dict__) # update from the base one! # -- conf.emb_size = -1 # no use of basic ones! super().__init__(conf, **kwargs) conf: ZStackingLabelConf = self.conf # -- self.setattr_borrow("_base", base_node) # assert conf.use_input_emb or conf.use_input_base dim_emb = base_node.conf.emb_size dim_base = base_node.conf._csize if conf.use_input_emb: self.aff_final = AffineNode(None, isize=dim_emb, osize=conf._csize, no_drop=True) else: self.aff_final = None if conf.use_input_base: self.W_base = BK.new_param([dim_base, conf._csize]) BK.init_param(self.W_base, "glorot") else: self.W_base = None
def reset_parameters(self): BK.init_param(self.mixed_weights, "zero") # make it all 0 if self.input_mask is not None: with BK.no_grad_env(): self.input_mask.set_( BK.random_bernoulli((self.conf._isize, ), self.fixed_mask_rate, 1.))
def reset_parameters(self): _isize = self.conf._isize for ii in range(3): BK.init_param(self.W_hid[:, ii * _isize:(ii + 1) * _isize], "glorot") BK.init_param(self.W_gate, "glorot") BK.init_param(self.b_hid, "zero") BK.init_param(self.b_gate, "zero")
def reset_parameters(self): if self.use_lrank: BK.init_param(self.E, "glorot", lookup=True) BK.init_param(self.W, "ortho") else: BK.init_param(self.W, "zero")
def reset_parameters(self): BK.init_param(self.w_out, "glorot", lookup=True, scale=self.conf.init_scale_out)
def reset_parameters(self): if self.logs2 is not None: BK.init_param(self.logs2, "zero")
def reset_parameters(self): if not self.conf.e_tie_weights: BK.init_param(self.W, "glorot", lookup=True)