def _setup_weights(self): """ Initializes weight tensor with random values ties init and dc weights if specified :return: Null """ torch.manual_seed(42) # setup init self.weight_tensors = ParameterList() self.tensor_tuple = () self.feature_id = [] self.W = None for featurizer in self.featurizers: self.feature_id.append(featurizer.id) if featurizer.id == 'SignalInit': if self.tie_init: signals_W = Parameter( torch.randn(1).expand(1, self.output_dim)) else: signals_W = Parameter(torch.randn(1, self.output_dim)) elif featurizer.id == 'SignalDC': if self.tie_dc: signals_W = Parameter( torch.randn(featurizer.count, 1).expand(-1, self.output_dim)) else: signals_W = Parameter( torch.randn(featurizer.count, self.output_dim)) else: signals_W = Parameter( torch.randn(featurizer.count, 1).expand(-1, self.output_dim)) self.weight_tensors.append(signals_W) return
def __init__(self, env, feat_info, output_dim, bias=False): super(TiedLinear, self).__init__() self.env = env # Init parameters self.in_features = 0.0 self.weight_list = ParameterList() if bias: self.bias_list = ParameterList() else: self.register_parameter('bias', None) self.output_dim = output_dim self.bias_flag = bias # Iterate over featurizer info list for feat_entry in feat_info: learnable = feat_entry.learnable feat_size = feat_entry.size init_weight = feat_entry.init_weight self.in_features += feat_size feat_weight = Parameter(init_weight * torch.ones(1, feat_size), requires_grad=learnable) if learnable: self.reset_parameters(feat_weight) self.weight_list.append(feat_weight) if bias: feat_bias = Parameter(torch.zeros(1, feat_size), requires_grad=learnable) if learnable: self.reset_parameters(feat_bias) self.bias_list.append(feat_bias)
def __init__(self): super().__init__() directional_actions = [ Action.LEFT_ARC, Action.NO_LEFT_ARC, Action.RIGHT_ARC, Action.NO_RIGHT_ARC ] directional_and_shift = directional_actions + [Action.SHIFT] # 如果上一步是方向的,下一步可以继续判断,或者转移 for action in directional_actions: self.valid_actions[action] = directional_and_shift common_actions = [ Action.PRED_GEN, Action.NO_PRED, Action.NO_LEFT_ARC, Action.NO_RIGHT_ARC ] # 如果上一步是NO-PRED,则按左右栈是否空来给定 # 0. 左右都有 self.valid_actions[(False, False)] = common_actions # 1. 左空右不, self.valid_actions[(True, False)] = [Action.RIGHT_ARC] + common_actions # 2. 左不右空 self.valid_actions[(False, True)] = [Action.LEFT_ARC] + common_actions # 3. 左右皆空 self.valid_actions[(True, True)] = [Action.SHIFT] self.masks = ParameterList() self.key_to_id = dict() for k, v in self.valid_actions.items(): values = set(a.value for a in v) self.key_to_id[k] = len(self.masks) self.masks.append( Parameter(torch.tensor( [1 if i in values else 0 for i in range(len(Action))]), requires_grad=False))
def __init__(self, in_size, out_size, in_rank, out_rank, alpha=1, beta=0.1, c=1e-3, **kwargs): super(tensorizedlinear, self).__init__() self.in_size = list(in_size) self.out_size = list(out_size) self.in_rank = list(in_rank) self.out_rank = list(out_rank) self.factors_in = ParameterList([ Parameter(torch.Tensor(r, s)) for (r, s) in zip(in_rank, in_size) ]) self.factors_out = ParameterList([ Parameter(torch.Tensor(s, r)) for (r, s) in zip(out_rank, out_size) ]) self.core = Parameter(torch.Tensor(np.prod(out_rank), np.prod(in_rank))) self.bias = Parameter(torch.Tensor(np.prod(out_size))) self.lamb_in = ParameterList( [Parameter(torch.ones(r)) for r in in_rank]) self.lamb_out = ParameterList( [Parameter(torch.ones(r)) for r in out_rank]) self.alpha = Parameter(torch.tensor(alpha), requires_grad=False) self.beta = Parameter(torch.tensor(beta), requires_grad=False) self.c = Parameter(torch.tensor(c), requires_grad=False) self._initialize_weights()
def __init__(self, base_model, h_dim, out_dim=1, device='cpu', seed=None): ''' :Parameters: base_model: torch.nn.Module: task-agnostic model h_dim: int: dimension of base_model output out_dim: output dimension of task-specific tensor \omega (dimension of loss_function input) ''' super().__init__() self.base = base_model self.h_dim = h_dim self.out_dim = out_dim self.device = device #replay buffers for the previous tasks (includes torch.utils.data.Subsets) self.tasks_replay_buffers = [] #task-specific tensors (which applied to base model outputs) self.tasks_omegas = ParameterList() if out_dim == 1: # computes loss self.loss_func = nn.BCEWithLogitsLoss() # predicts distribution over the classes def pred_func(input): pred = F.sigmoid(input) return torch.stack([1. - pred, pred], dim=-1).squeeze() self.pred_func = pred_func if out_dim > 1: self.loss_func = nn.CrossEntropyLoss() self.pred_func = nn.Softmax() self.torch_gen = create_torch_random_gen(seed) self.to(self.device)
def __init__(self, size, rank, alpha=1, beta=0.2, c=1, d=1e6, e=1, init='unif'): super(bftucker, self).__init__() self.size = size self.rank = rank self.dim = len(size) self.tau = Parameter(torch.tensor(1.0)) self.alpha = Parameter(torch.tensor(alpha), requires_grad=False) self.beta = Parameter(torch.tensor(beta), requires_grad=False) self.c = Parameter(torch.tensor(c), requires_grad=False) self.d = Parameter(torch.tensor(d), requires_grad=False) self.e = Parameter(torch.tensor(e), requires_grad=False) # self.lamb = torch.Tensor(self.rank) self.lamb = ParameterList([Parameter(torch.Tensor(r)) for r in rank]) self.factors = ParameterList( [Parameter(torch.Tensor(s, r)) for (s, r) in zip(size, rank)]) self.core = Parameter(torch.zeros(rank)) self.reset_parameters(init)
def __init__(self, F, l_h, l_a, C, l_keep_prob = None): super(FFNN, self).__init__() sizes = [F] + l_h + [C] self.Ws = ParameterList([Parameter(torch.randn(sizes[i], sizes[i+1])) for i in range(len(sizes)-1)]) self.bs = ParameterList([Parameter(torch.zeros(h)) for h in sizes[1:]]) self.fs = l_a self.dropout_prob = l_keep_prob if l_keep_prob != None else [1 for _ in range(len(l_a) +1)]
def __init__(self, args_dict): super(TwoTwoNet, self).__init__() self.is_W_parametrized = False self.is_dale_constrained = False for k, v in args_dict.items(): setattr(self, k, v) assert self.n_channels == 1 if len(self.saturations) > 2: logging.error( 'ManyChannelsIntegrator.saturations should be [low, high], not {}' .format(saturations)) std = 1. / sqrt(self.n) self.encoders = ParameterList([ Parameter(tch.zeros(self.n).normal_(0, std), requires_grad=False) for _ in range(self.n_channels) ]) self.decoders = ParameterList([ Parameter(tch.zeros(self.n).normal_(0, std), requires_grad=False) for _ in range(self.n_channels) ]) if self.init_vectors_type == 'random': pass elif self.init_vectors_type == 'orthonormal': logging.info('Orthogonalizing encoders and decoders') plop = tch.zeros(self.n, 2 * self.n_channels) for idx, item in enumerate(self.encoders): plop[:, idx] = item.data for idx, item in enumerate(self.decoders): plop[:, len(self.encoders) + idx] = item.data plop = orth(plop) for idx, item in enumerate(self.encoders): item.data = plop[:, idx] for idx, item in enumerate(self.decoders): item.data = plop[:, len(self.encoders) + idx] self.encoders[0].data = self.encoders[0].data / tch.sqrt( (self.encoders[0].data**2).sum()) self.decoders[0].data = self.decoders[0].data / tch.sqrt( (self.decoders[0].data**2).sum()) # Align the encoder / decoder self.decoders[0].data = ( (1. - self.init_vectors_overlap) * self.decoders[0].data + self.init_vectors_overlap * self.encoders[0].data) # Rescale the io vectors self.decoders[0].data = self.init_vectors_scales[0] * self.decoders[ 0].data / tch.sqrt((self.decoders[0].data**2).sum()) self.encoders[ 0].data = self.encoders[0].data * self.init_vectors_scales[1] self.w = Parameter(tch.zeros(2, 2).normal_(0, std), requires_grad=True) eigs, _ = tch.eig(self.w, eigenvectors=False) spectral_rad = tch.sqrt((eigs**2).sum(dim=1).max()).item() assert spectral_rad != 0 self.w.data = self.init_radius * self.w.data / spectral_rad self.device = tch.device(self.device_name) self.to(self.device) os.makedirs(self.save_folder, exist_ok=True) self.compute_relevant_quantities()
def __init__(self, F, l_h, l_a, C): super(FFNN, self).__init__() sizes = [F] + l_h + [C] self.Ws = ParameterList([ Parameter(torch.randn(sizes[i], sizes[i + 1])) for i in range(len(sizes) - 1) ]) self.bs = ParameterList([Parameter(torch.zeros(h)) for h in sizes[1:]]) self.fs = l_a
class HM_color(nn.Module): def __init__(self, layers=None): super(HM_color, self).__init__() if layers is None: layers = [38804, 2048, 128, 32] self.rgb_models = [ HM_bw(layers), HM_bw(layers), HM_bw(layers), ] self.params = ParameterList() for model in self.rgb_models: self.params.extend(model.parameters()) def forward(self, x): """ x must have shape N x C x H x W """ x = torch.round(x) flat_dim = x.shape[-1] * x.shape[-2] color_layers = [x[:, i].reshape(-1, flat_dim) for i in range(3)] outputs = [ model.forward(layer) for model, layer in zip(self.rgb_models, color_layers) ] return outputs def loss_function(self, *fwd_outputs): losses = [ model.loss_function(*output) for model, output in zip(self.rgb_models, fwd_outputs) ] return sum(losses) def sample(self, num_samples): fake_x = torch.zeros(num_samples) fantasies = [ model.run_sleep(fake_x)[2][-1] for model in self.rgb_models ] return torch.stack(fantasies, dim=-1) def reconstruct(self, x): x = torch.round(x) flat_dim = x.shape[-1] * x.shape[-2] color_layers = [x[:, i].reshape(-1, flat_dim) for i in range(3)] images = [ model.reconstruct(layer) for model, layer in zip(self.rgb_models, color_layers) ] return torch.stack(images, dim=-1)
def __init__(self, dim: int, n_components: int) -> None: super(DMM, self).__init__() self._dim = dim self._n_components = n_components mixture_logits = torch.zeros((n_components, ), dtype=torch.float) self.mixture_logits = Parameter(mixture_logits) self.log_alphas = ParameterList() for _ in range(n_components): log_alpha = Parameter(torch.randn(dim, dtype=torch.float) / 3) self.log_alphas.append(log_alpha)
def __init__(self, metadata: Metadata, min_embedding_size: int = 2, max_embedding_size: int = 50) -> None: super(MultiInputLayer, self).__init__() self.metadata = metadata self.has_categorical = False self.output_size = 0 # our embeddings need to be referenced like this to be considered in the parameters of this model self.embeddings = ParameterList() # this reference is for using the embeddings during the forward pass self.embedding_by_variable = {} for i, variable_metadata in enumerate( self.metadata.get_by_independent_variable()): # if it is a numerical variable if variable_metadata.is_binary() or variable_metadata.is_numerical( ): assert variable_metadata.get_size() == 1 self.output_size += 1 # if it is a categorical variable elif variable_metadata.is_categorical(): variable_size = variable_metadata.get_size() # this is an arbitrary rule of thumb taken from several blog posts embedding_size = compute_embedding_size( variable_size, min_embedding_size, max_embedding_size) # the embedding is implemented manually to be able to use one hot encoding # PyTorch embedding only accepts as input label encoding embedding = Parameter(data=torch.Tensor( variable_size, embedding_size).normal_(), requires_grad=True) self.embeddings.append(embedding) self.embedding_by_variable[ variable_metadata.get_name()] = embedding self.output_size += embedding_size self.has_categorical = True # if it is another type else: raise Exception( "Unexpected variable type '{}' for variable '{}'.".format( variable_metadata.get_type(), variable_metadata.get_name()))
def __init__(self, tensor, gradient_update="S", rank=10): super().__init__() self.tensor = tensor self.num_train = len(tensor.train_vals) self.dims = tensor.dims self.ndim = len(self.dims) self.rank = rank self.datatype = tensor.datatype self.gradient_update = gradient_update self.means = ModuleList() self.chols = ModuleList() for dim, ncol in enumerate(self.dims): mean_list = ParameterList() cov_list = ParameterList() for _ in range(ncol): mean_list.append(Parameter(torch.randn(rank), requires_grad=True)) cov_list.append(Parameter(torch.ones(rank) + 1/4 * torch.randn(rank), requires_grad=True)) self.means.append(mean_list) self.chols.append(cov_list) self.standard_multi_normal = MultivariateNormal(torch.zeros(rank), torch.eye(rank)) self.sigma = 1 self.batch_size = 64 self.lambd = 1/self.batch_size self.round_robins_indices = [0 for _ in self.dims] self.k1 = 128
def __init__(self, layers=None): super(HM_color, self).__init__() if layers is None: layers = [38804, 2048, 128, 32] self.rgb_models = [ HM_bw(layers), HM_bw(layers), HM_bw(layers), ] self.params = ParameterList() for model in self.rgb_models: self.params.extend(model.parameters())
def __init__(self, layers=None, scale=.1, p=None, lr=.1, lam=None): super().__init__() if layers is None: layers = [2, 100, 2] self.weights = ParameterList([ Parameter(scale * torch.randn(m, n)) for m, n in zip(layers[:-1], layers[1:]) ]) self.biases = ParameterList( [Parameter(scale * torch.randn(n)) for n in layers[1:]]) self.p = p self.lr = lr self.lam = lam self.train = False
def __init__( self, mixture_size: int, do_layer_norm: bool = False, initial_scalar_parameters: List[float] = None, trainable: bool = True, ) -> None: super().__init__() self.mixture_size = mixture_size self.do_layer_norm = do_layer_norm if initial_scalar_parameters is None: initial_scalar_parameters = [0.0] * mixture_size elif len(initial_scalar_parameters) != mixture_size: raise ConfigurationError( "Length of initial_scalar_parameters {} differs " "from mixture_size {}".format(initial_scalar_parameters, mixture_size)) self.scalar_parameters = ParameterList([ Parameter(torch.FloatTensor([initial_scalar_parameters[i]]), requires_grad=trainable) for i in range(mixture_size) ]) self.gamma = Parameter(torch.FloatTensor([1.0]), requires_grad=trainable)
def __init__( self, mixture_size: int, do_layer_norm: bool = False, initial_scalar_parameters: List[float] = None, trainable: bool = True, ) -> None: super().__init__() self.mixture_size = mixture_size self.do_layer_norm = do_layer_norm if initial_scalar_parameters is None: initial_scalar_parameters = [0.0] * mixture_size elif len(initial_scalar_parameters) != mixture_size: raise ValueError( f"Length of `initial_scalar_parameters` {initial_scalar_parameters} differs " f"from `mixture_size` {mixture_size}") self.scalar_parameters = ParameterList([ Parameter( torch.FloatTensor([initial_scalar_parameters[i]]), requires_grad=trainable, ) for i in range(mixture_size) ]) self.gamma = Parameter(torch.FloatTensor([1.0]), requires_grad=trainable)
def __init__(self, mixture_size: int, trainable: bool = False) -> None: """ Inits scalar mix implementation. ``mixture = gamma * sum(s_k * tensor_k)`` where ``s = softmax(w)``, with ``w`` and ``gamma`` scalar parameters. :param mixture_size: size of mixtures (usually the number of layers) """ super(ScalarMix, self).__init__() self.mixture_size = mixture_size initial_scalar_parameters = [0.0] * mixture_size self.scalar_parameters = ParameterList([ Parameter( torch.tensor( [initial_scalar_parameters[i]], dtype=torch.float, device=flair.device, ), requires_grad=trainable, ) for i in range(mixture_size) ]) self.gamma = Parameter(torch.tensor( [1.0], dtype=torch.float, device=flair.device, ), requires_grad=trainable)
def __init__(self, args): #Assert module specifications are consistent. if not hasattr(self, 'type_modules'): self.type_modules = [] if self.type_modules != []: pass elif type(args.type_modules) == type(''): self.type_modules = args.type_modules.split(',') else: assert type(args.type_modules) == type([]) assert type(args.type_modules[0]) == type('a') self.type_modules = args.type_modules if type(args.num_modules) == type(''): self.num_modules = list(map(int, args.num_modules.split(','))) else: assert type(args.num_modules) == type([]) assert type(args.num_modules[0]) == type(1) self.num_modules = args.num_modules self.num_types = len(self.num_modules) assert len(self.type_modules) == self.num_types, (str(self.type_modules) + \ ' should have '+str(self.num_types)+' elts.') self.tot_modules = sum(self.num_modules) self.usage_normalization = 1e-9 self.has_global_variable = False self.StructureParameters = ParameterList()
def __init__(self, out_cls=10): # these are two useless prior super(Hbnn, self).__init__() self.prior_v = 100 self.prior_tau_0_reciprocal = 1000 self.num_net = 0 self.out_cls = out_cls self.w0 = Net(out_cls) # this is the network of w0 self.hbnn = ModuleList() # this is the network of all the classes self.mu_gamma_g = ParameterList() self.sigma_gamma_g = ParameterList() self.mu_gamma = Parameter(torch.ones(1)) self.sigma_gamma = Parameter(torch.ones(1)) if torch.cuda.is_available(): self.w0 = self.w0.cuda()
def _create_candecomp_cores_unconstrained(tensor_modes, order): list_cores = [] modes = tensor_modes for mm in modes: list_cores.append(Parameter(torch.Tensor(mm, order).zero_())) list_cores = ParameterList(list_cores) return list_cores
def __init__(self, tau_in, tau_out, weight_init='randn', real=False, gain=1, device=torch.device('cpu'), dtype=torch.float): super(MixRepsScalar, self).__init__() # Remove extra tailing zeros in input/output type while not tau_in[-1]: tau_in.pop() if type(tau_out) is int: tau_out = [tau_out] * len(tau_in) else: while not tau_out[-1]: tau_out.pop() self.tau_in = list(tau_in) self.tau_out = list(tau_out) self.real = real self.cat_dim = -1 if real else -2 weights = init_mix_reps_weights(tau_in, tau_out, weight_init, real=real, gain=gain, device=device, dtype=dtype) self.weights = ParameterList([Parameter(weight) for weight in weights])
def __init__(self, mixture_size: int, do_layer_norm: bool = False, initial_scalar_parameters: List[float] = None, trainable: bool = True, dropout: float = None, dropout_value: float = -1e20) -> None: super(ScalarMixWithDropout, self).__init__() self.mixture_size = mixture_size self.do_layer_norm = do_layer_norm self.dropout = dropout if initial_scalar_parameters is None: initial_scalar_parameters = [0.0] * mixture_size elif len(initial_scalar_parameters) != mixture_size: raise ConfigurationError( "Length of initial_scalar_parameters {} differs " "from mixture_size {}".format(initial_scalar_parameters, mixture_size)) self.scalar_parameters = ParameterList([ Parameter(torch.FloatTensor([initial_scalar_parameters[i]]), requires_grad=trainable) for i in range(mixture_size) ]) self.gamma = Parameter(torch.FloatTensor([1.0]), requires_grad=trainable) if self.dropout: dropout_mask = torch.zeros(len(self.scalar_parameters)) dropout_fill = torch.empty(len( self.scalar_parameters)).fill_(dropout_value) self.register_buffer("dropout_mask", dropout_mask) self.register_buffer("dropout_fill", dropout_fill)
def __init__( self, mixture_size: int, do_layer_norm: bool = False, initial_scalar_parameters: Optional[List[float]] = None, trainable: bool = True, ) -> None: super().__init__() self.mixture_size = mixture_size self.do_layer_norm = do_layer_norm if initial_scalar_parameters is None: initial_scalar_parameters = [1.0 / mixture_size] * mixture_size elif len(initial_scalar_parameters) != mixture_size: raise ValueError( "initial_scalar_parameters & mixture_size not match.") self.scalar_parameters = ParameterList([ Parameter( torch.FloatTensor([val]), requires_grad=trainable, ) for val in initial_scalar_parameters ]) self.gamma = Parameter(torch.FloatTensor([1.0]), requires_grad=trainable)
def __init__(self, F, l_h, l_a, C, params=None): super(FFNN, self).__init__() sizes = [F] + l_h + [C] self.Ws = ParameterList([ Parameter(torch.randn(sizes[i], sizes[i + 1])) for i in range(len(sizes) - 1) ]) self.bs = ParameterList([Parameter(torch.zeros(h)) for h in sizes[1:]]) self.fs = l_a if params is None: self.params = [None for _ in l_a] else: self.params = [ Parameter(torch.tensor(p)) if p else None for p in params ] self.params_list = ParameterList([p for p in self.params if p])
def reset_layer_num(self): num_elmo_layers = self._elmo._elmo_lstm.num_layers scalar_mix_parameters = [ Parameter(torch.FloatTensor([0.0])) for i in range(num_elmo_layers) ] scalar_mix_parameters = ParameterList(scalar_mix_parameters) self._elmo._scalar_mixes[0].scalar_parameters = scalar_mix_parameters self._elmo._scalar_mixes[0].gamma = Parameter(torch.FloatTensor([1.0]))
def __init__(self, mixture_size: int, do_layer_norm: bool = False) -> None: super(ScalarMix, self).__init__() self.mixture_size = mixture_size self.do_layer_norm = do_layer_norm self.scalar_parameters = ParameterList( [Parameter(torch.FloatTensor([0.0])) for _ in range(mixture_size)]) self.gamma = Parameter(torch.FloatTensor([1.0]))
def _create_candecomp_cores(in_modes, out_modes, order): assert len(in_modes) == len(out_modes) assert order > 0 list_cores = [] modes = in_modes + out_modes # extend list for mm in modes: list_cores.append(Parameter(torch.Tensor(mm, order).zero_())) list_cores = ParameterList(list_cores) return list_cores
def _create_tucker_params(in_modes, out_modes, ranks) : assert len(in_modes) == len(out_modes) == len(ranks) modes = in_modes + out_modes # extend list core = Parameter(torch.Tensor(*list(ranks+ranks)).normal_()) factors = [] for mm, rr in zip(modes, ranks+ranks) : factors.append(Parameter(torch.Tensor(mm, rr).normal_())) factors = ParameterList(factors) return core, factors
def __init__(self, num_tensors, trainable=True): super(ScalarMix, self).__init__() self.num_tensors = num_tensors self.scalar_parameters = ParameterList([ Parameter(torch.FloatTensor([0.0]), requires_grad=trainable) for _ in range(num_tensors) ]) self.gamma = Parameter(torch.FloatTensor([1.0]), requires_grad=trainable)