def __init__(self, in_features, hidd_dim, kge_dim, rel_total, heads_out_feat_params, blocks_params): super().__init__() self.in_features = in_features self.hidd_dim = hidd_dim self.rel_total = rel_total self.kge_dim = kge_dim self.n_blocks = len(blocks_params) self.initial_norm = LayerNorm(self.in_features) self.blocks = [] self.net_norms = ModuleList() for i, (head_out_feats, n_heads) in enumerate(zip(heads_out_feat_params, blocks_params)): block = SSI_DDI_Block(n_heads, in_features, head_out_feats, final_out_feats=self.hidd_dim) self.add_module(f"block{i}", block) self.blocks.append(block) self.net_norms.append(LayerNorm(head_out_feats * n_heads)) in_features = head_out_feats * n_heads self.co_attention = CoAttentionLayer(self.kge_dim) self.KGE = RESCAL(self.rel_total, self.kge_dim)
def create_network(self): # process architecture self.n_hidden_layers = len(self.arch_spec) self.is_linear = self.n_hidden_layers == 0 # no hidden layers --> linear model if not self.is_linear: assert self.f_act is not None # seeding if self.seed is not None: torch.manual_seed(self.seed) # define the network if self.is_linear: self.linears = ModuleList( [nn.Linear(in_features=self.d_in, out_features=self.d_out)] ) else: self.linears = ModuleList( [nn.Linear(in_features=self.d_in, out_features=self.arch_spec[0])] ) for i in range(1, len(self.arch_spec)): self.linears.append( nn.Linear( in_features=self.linears[-1].out_features, out_features=self.arch_spec[i], ) ) self.linears.append( nn.Linear( in_features=self.linears[-1].out_features, out_features=self.d_out ) )
class FCDecoder(Decoder): """ Implements a fully connected decoder. """ def __init__(self, latent_dim, activations, hidden_dims, output_dim): """ :param latent_dim: dimension of the latent space :param output_dim: dimension of the output :param activations: array of activations names for each layer :param hidden_dims: array of hidden_dims (if different for each layer) """ super(FCDecoder, self).__init__(latent_dim, output_dim) self.activations = activations self.hidden_dims = hidden_dims # build the network self.__build_layers() def __build_layers(self): self.layers = ModuleList([nn.Linear(self.latent_dim, self.hidden_dims[0])]) self.layers.extend( [nn.Linear(self.hidden_dims[i], self.hidden_dims[i + 1]) for i in range(0, len(self.hidden_dims) - 1)]) self.layers.append(nn.Linear(self.hidden_dims[-1], self.output_dim)) def forward(self, x): for i, layer in enumerate(self.layers): x = self.activations[i](layer(x)) return x
class FCEncoder(Encoder): """ Implements a fully connected encoder. """ def __init__(self, input_dim, latent_dim, activations, hidden_dims): """ :param input_dim: input dimension :param latent_dim: latent_dimension :param activations: array of activations for each layer :param hidden_dims: array of hidden_dims """ super(FCEncoder, self).__init__(input_dim, latent_dim) self.activations = activations self.hidden_dims = hidden_dims self.__build_layers() def __build_layers(self): self.layers = ModuleList( [nn.Linear(self.input_dim, self.hidden_dims[0])]) self.layers.extend([ nn.Linear(self.hidden_dims[i], self.hidden_dims[i + 1]) for i in range(0, len(self.hidden_dims) - 1) ]) self.layers.append(nn.Linear(self.hidden_dims[-1], self.latent_dim)) def forward(self, x): for i, layer in enumerate(self.layers): x = self.activations[i](layer(x)) return x
def __build_layers(self): self.layers = ModuleList( [nn.Linear(self.input_dim, self.hidden_dims[0])]) self.layers.extend([ nn.Linear(self.hidden_dims[i], self.hidden_dims[i + 1]) for i in range(0, len(self.hidden_dims) - 1) ]) self.layers.append(nn.Linear(self.hidden_dims[-1], self.latent_dim))
def __init__(self) -> None: super().__init__() self.modulelist = ModuleList( EltwiseMultiplicationModule(weight=Parameter( torch.empty((param_sz, ), dtype=torch.float32))) for _ in range(n_layers)) for layer_num, module in enumerate(self.modulelist): if dist.get_rank() == 0: param: Parameter = module.weight partition_sz = math.ceil(param.numel() / dist.get_world_size()) offset = 0 for rank in range(dist.get_world_size()): with torch.no_grad(): param[offset:offset + partition_sz].fill_( 2 * layer_num * rank) offset += partition_sz
def _get_clones(module, N): return ModuleList([copy.deepcopy(module) for i in range(N)])
class MLP(nn.Module): def __init__( self, name, logpath=None, epoch=None, d_in=None, d_out=None, mlp_layers=None, shape=None, d_avg=None, n_hidden=None, f_act=None, f_out=(None, {}), seed=None, use_standard_initialization=True, std_weight=None, std_bias=None, ): super(MLP, self).__init__() self.name = name self.function_name_mappings = { "tanh": torch.tanh, "tanh2": tanh2, "relu": torch.relu, "softplus": F.softplus, "exp": torch.exp, "None": None, } if logpath is not None: assert d_in is None assert d_out is None assert mlp_layers is None assert f_act is None assert f_out == (None, {}) assert seed is None self.load_parameters(logpath) self.create_network() self.load_weights(logpath, epoch=epoch) else: assert epoch is None self.d_in = d_in self.d_out = d_out if mlp_layers is not None: # either provide the arch spec directly via mlp_layers assert shape is None assert d_avg is None assert n_hidden is None self.arch_spec = mlp_layers else: # or compute the arch spec from shape, d_avg, and n_hidden assert mlp_layers is None self.arch_spec = self.compute_arch_spec( shape=shape, d_avg=d_avg, n_hidden=n_hidden ) self.f_act = self.function_name_mappings[f_act] self.out_trafo_fun = ( self.function_name_mappings[f_out[0]] if f_out[0] is not None else None ) self.out_trafo_params = f_out[1] self.seed = seed self.create_network() if not use_standard_initialization: self.initialize_weights(std_weight=std_weight, std_bias=std_bias) def create_network(self): # process architecture self.n_hidden_layers = len(self.arch_spec) self.is_linear = self.n_hidden_layers == 0 # no hidden layers --> linear model if not self.is_linear: assert self.f_act is not None # seeding if self.seed is not None: torch.manual_seed(self.seed) # define the network if self.is_linear: self.linears = ModuleList( [nn.Linear(in_features=self.d_in, out_features=self.d_out)] ) else: self.linears = ModuleList( [nn.Linear(in_features=self.d_in, out_features=self.arch_spec[0])] ) for i in range(1, len(self.arch_spec)): self.linears.append( nn.Linear( in_features=self.linears[-1].out_features, out_features=self.arch_spec[i], ) ) self.linears.append( nn.Linear( in_features=self.linears[-1].out_features, out_features=self.d_out ) ) def forward(self, X, output_layer=None): if output_layer is None: output_layer = self.n_hidden_layers + 1 assert 0 <= output_layer <= len(self.arch_spec) + 1 Y = X if output_layer == 0: return Y if self.is_linear: Y = ( self.linears[0](Y) if self.out_trafo_fun is None else self.out_trafo_fun(self.linears[0](Y), **self.out_trafo_params) ) else: # do not iterate directly over self.linears, this is slow using ModuleList for i in range(self.n_hidden_layers): Y = self.f_act(self.linears[i](Y)) if i + 1 == output_layer: return Y Y = ( self.linears[-1](Y) if self.out_trafo_fun is None else self.out_trafo_fun(self.linears[-1](Y), **self.out_trafo_params) ) return Y def save(self, path, epoch): with open(os.path.join(path, self.name + "_parameters.pkl"), "wb") as f: parameters = { "d_in": self.d_in, "d_out": self.d_out, "arch_spec": self.arch_spec, "f_act": self.f_act.__name__ if self.f_act is not None else "None", "seed": self.seed, "out_trafo_fun": self.out_trafo_fun.__name__ if self.out_trafo_fun is not None else "None", "out_trafo_params": self.out_trafo_params if self.out_trafo_params is not None else "None", } pkl.dump(parameters, f) if epoch is not None: with open( os.path.join(path, self.name + "_weights_{:d}".format(epoch)), "wb" ) as f: torch.save(self.state_dict(), f) else: with open(os.path.join(path, self.name + "_weights"), "wb") as f: torch.save(self.state_dict(), f) def delete_all_weight_files(self, path): for file in os.listdir(path): if file.startswith(self.name + "_weights"): os.remove(os.path.join(path, file)) def load_parameters(self, path): with open(os.path.join(path, self.name + "_parameters.pkl"), "rb") as f: parameters = pkl.load(f) self.d_in = parameters["d_in"] self.d_out = parameters["d_out"] self.arch_spec = parameters["arch_spec"] self.seed = parameters["seed"] self.f_act = self.function_name_mappings[parameters["f_act"]] self.out_trafo_fun = self.function_name_mappings[parameters["out_trafo_fun"]] def load_weights(self, path, epoch): if epoch is not None: self.load_state_dict( torch.load( os.path.join(path, self.name + "_weights_{:d}".format(epoch)) ) ) else: self.load_state_dict(torch.load(os.path.join(path, self.name + "_weights")))
def __init__(self, n_syscall, n_process, args): """Constructor. Args: n_syscall (int): number of word in the vocabulary args (argparse.Namespace): arguments """ super(Transformer, self).__init__() # Get which arguments are disabled self.disable_entry = args.disable_entry self.disable_ret = args.disable_ret self.disable_time = args.disable_time self.disable_proc = args.disable_proc self.disable_pid = args.disable_pid self.disable_tid = args.disable_tid self.disable_order = args.disable_order # Get arguments self.emb_sys = args.emb_sys self.emb_proc = args.emb_proc self.emb_pid = args.emb_pid self.emb_tid = args.emb_tid self.emb_order = args.emb_order self.emb_time = args.emb_time # Compute the embedding size self.emb_dim = self.emb_sys if not self.disable_proc: self.emb_dim += self.emb_proc if not self.disable_pid: self.emb_dim += self.emb_pid if not self.disable_tid: self.emb_dim += self.emb_tid if not self.disable_order: self.emb_dim += self.emb_order if not self.disable_time: self.emb_dim += self.emb_time # Embeddings self.embedding_call = nn.Embedding(n_syscall, self.emb_sys, padding_idx=0) if not self.disable_entry: self.embedding_entry = nn.Embedding(3, self.emb_sys, padding_idx=0) if not self.disable_ret: self.embedding_ret = nn.Embedding(3, self.emb_sys, padding_idx=0) if not self.disable_proc: self.embedding_proc = nn.Embedding(n_process, self.emb_proc, padding_idx=0) self.dropout = nn.Dropout(args.dropout) # Encoder encoder_layer = nn.TransformerEncoderLayer(self.emb_dim, args.heads, args.hiddens, args.dropout) self.encoder_layers = ModuleList( [copy.deepcopy(encoder_layer) for i in range(args.layers)]) # Decoder self.decoder = nn.Linear(self.emb_dim, n_syscall) self.init_weights()
def __init__(self, decoder_layer, num_layers, norm=None): super(TransformerDecoder, self).__init__() self.layers = ModuleList( [copy.deepcopy(decoder_layer) for _ in range(num_layers)]) self.num_layers = num_layers self.norm = norm
def _get_clones(module, N): return ModuleList([copy.deepcopy(module) for i in range(N)]) raise RuntimeError( "activation should be relu/gelu, not {}".format(activation))
def _get_clones(module, N): # 여기서 module이 우리 전체 model의 현재 상태를 나타내는가? 그래야 한다…! return ModuleList([copy.deepcopy(module) for i in range(N)])
class SSI_DDI(nn.Module): def __init__(self, in_features, hidd_dim, kge_dim, rel_total, heads_out_feat_params, blocks_params): super().__init__() self.in_features = in_features self.hidd_dim = hidd_dim self.rel_total = rel_total self.kge_dim = kge_dim self.n_blocks = len(blocks_params) self.initial_norm = LayerNorm(self.in_features) self.blocks = [] self.net_norms = ModuleList() for i, (head_out_feats, n_heads) in enumerate(zip(heads_out_feat_params, blocks_params)): block = SSI_DDI_Block(n_heads, in_features, head_out_feats, final_out_feats=self.hidd_dim) self.add_module(f"block{i}", block) self.blocks.append(block) self.net_norms.append(LayerNorm(head_out_feats * n_heads)) in_features = head_out_feats * n_heads self.co_attention = CoAttentionLayer(self.kge_dim) self.KGE = RESCAL(self.rel_total, self.kge_dim) def forward(self, triples): h_data, t_data, rels = triples h_data.x = self.initial_norm(h_data.x, h_data.batch) t_data.x = self.initial_norm(t_data.x, t_data.batch) repr_h = [] repr_t = [] for i, block in enumerate(self.blocks): out1, out2 = block(h_data), block(t_data) h_data = out1[0] t_data = out2[0] r_h = out1[1] r_t = out2[1] repr_h.append(r_h) repr_t.append(r_t) h_data.x = F.elu(self.net_norms[i](h_data.x, h_data.batch)) t_data.x = F.elu(self.net_norms[i](t_data.x, t_data.batch)) repr_h = torch.stack(repr_h, dim=-2) repr_t = torch.stack(repr_t, dim=-2) kge_heads = repr_h kge_tails = repr_t attentions = self.co_attention(kge_heads, kge_tails) # attentions = None scores = self.KGE(kge_heads, kge_tails, rels, attentions) return scores
def __init__(self, d_model: int, num_layers: int, num_heads: int): super(TransformerDecoder, self).__init__() self.layers = ModuleList([ TransformerDecoderLayer(d_model, num_heads) for _ in range(num_layers) ])
def _get_clones(module, n): return ModuleList([copy.deepcopy(module) for _ in range(n)])