def _create_layers(self): self.latent_dim = self.model_params["categ_encoding"]["num_dimensions"] model_func = lambda c_out : AutoregressiveLSTMModel(c_in=self.latent_dim, c_out=c_out, max_seq_len=self.max_seq_len, num_layers=self.model_params["coupling_hidden_layers"], hidden_size=self.model_params["coupling_hidden_size"], dp_rate=self.model_params["coupling_dropout"], input_dp_rate=self.model_params["coupling_input_dropout"]) self.model_params["categ_encoding"]["flow_config"]["model_func"] = model_func self.encoding_layer = create_encoding(self.model_params["categ_encoding"], dataset_class=self.dataset_class, vocab_size=self.vocab_size, vocab=self.vocab) num_flows = self.model_params["coupling_num_flows"] layers = [] for flow_index in range(num_flows): layers += [ActNormFlow(self.latent_dim)] if flow_index > 0: layers += [InvertibleConv(self.latent_dim)] layers += [ AutoregressiveMixtureCDFCoupling( c_in=self.latent_dim, model_func=model_func, block_type="LSTM model", num_mixtures=self.model_params["coupling_num_mixtures"]) ] self.flow_layers = nn.ModuleList([self.encoding_layer] + layers)
def _create_node_flow_layers(self): num_flows = get_param_val(self.model_params, "coupling_num_flows", default_val=8) hidden_size = get_param_val(self.model_params, "coupling_hidden_size", default_val=384) hidden_layers = get_param_val(self.model_params, "coupling_hidden_layers", default_val=4) num_mixtures = get_param_val(self.model_params, "coupling_num_mixtures", default_val=16) mask_ratio = get_param_val(self.model_params, "coupling_mask_ratio", default_val=0.5) dropout = get_param_val(self.model_params, "coupling_dropout", default_val=0.0) coupling_mask = CouplingLayer.create_channel_mask(self.embed_dim, ratio=mask_ratio) model_func = lambda c_out: RGCNNet(c_in=self.embed_dim, c_out=c_out, num_edges=1, num_layers=hidden_layers, hidden_size=hidden_size, dp_rate=dropout, rgc_layer_fun=RelationGraphAttention ) layers = [] for _ in range(num_flows): layers += [ ActNormFlow(self.embed_dim), InvertibleConv(self.embed_dim), MixtureCDFCoupling( c_in=self.embed_dim, mask=coupling_mask, model_func=model_func, block_type="GraphAttentionNet", num_mixtures=num_mixtures, regularizer_max=3.5, # To ensure a accurate reversibility regularizer_factor=2) ] layers += [ActNormFlow(c_in=self.embed_dim)] return layers
def _create_layers(self): self.latent_dim = self.model_params["categ_encoding"]["num_dimensions"] model_func = lambda c_out: CouplingTransformerNet( c_in=self.latent_dim, c_out=c_out, num_layers=self.model_params["coupling_hidden_layers"], hidden_size=self.model_params["coupling_hidden_size"]) self.model_params["categ_encoding"]["flow_config"][ "model_func"] = model_func self.model_params["categ_encoding"]["flow_config"][ "block_type"] = "Transformer" self.encoding_layer = create_encoding( self.model_params["categ_encoding"], dataset_class=self.dataset_class, vocab_size=self.vocab_size) num_flows = self.model_params["coupling_num_flows"] if self.latent_dim > 1: coupling_mask = CouplingLayer.create_channel_mask( self.latent_dim, ratio=self.model_params["coupling_mask_ratio"]) coupling_mask_func = lambda flow_index: coupling_mask else: coupling_mask = CouplingLayer.create_chess_mask() coupling_mask_func = lambda flow_index: coupling_mask if flow_index % 2 == 0 else 1 - coupling_mask layers = [] for flow_index in range(num_flows): layers += [ ActNormFlow(self.latent_dim), InvertibleConv(self.latent_dim), MixtureCDFCoupling( c_in=self.latent_dim, mask=coupling_mask_func(flow_index), model_func=model_func, block_type="Transformer", num_mixtures=self.model_params["coupling_num_mixtures"]) ] self.flow_layers = nn.ModuleList([self.encoding_layer] + layers)
def _create_flows(num_dims, embed_dims, config): num_flows = get_param_val(config, "num_flows", 0) model_func = get_param_val(config, "model_func", allow_default=False) block_type = get_param_val(config, "block_type", None) num_mixtures = get_param_val(config, "num_mixtures", 8) # For the activation normalization, we map an embedding to scaling and bias with a single layer block_fun_actn = lambda: SimpleLinearLayer( c_in=embed_dims, c_out=2 * num_dims, data_init=True) permut_layer = lambda flow_index: InvertibleConv(c_in=num_dims) actnorm_layer = lambda flow_index: ExtActNormFlow(c_in=num_dims, net=block_fun_actn()) if num_dims > 1: mask = CouplingLayer.create_channel_mask(c_in=num_dims) mask_func = lambda _: mask else: mask = CouplingLayer.create_chess_mask() mask_func = lambda flow_index: mask if flow_index % 2 == 0 else 1 - mask coupling_layer = lambda flow_index: MixtureCDFCoupling( c_in=num_dims, mask=mask_func(flow_index), block_type=block_type, model_func=model_func, num_mixtures=num_mixtures) flow_layers = [] if num_flows == 0: # Num_flows == 0 => mixture model flow_layers += [actnorm_layer(flow_index=0)] else: for flow_index in range(num_flows): flow_layers += [ actnorm_layer(flow_index), permut_layer(flow_index), coupling_layer(flow_index) ] return nn.ModuleList(flow_layers)
def _create_flows(num_dims, embed_dims, config): num_flows = get_param_val(config, "num_flows", 0) num_hidden_layers = get_param_val(config, "hidden_layers", 2) hidden_size = get_param_val(config, "hidden_size", 256) # We apply a linear net in the coupling layers for linear flows block_type_name = "LinearNet" block_fun_coup = lambda c_out: LinearNet(c_in=num_dims, c_out=c_out, num_layers=num_hidden_layers, hidden_size=hidden_size, ext_input_dims=embed_dims) # For the activation normalization, we map an embedding to scaling and bias with a single layer block_fun_actn = lambda: SimpleLinearLayer( c_in=embed_dims, c_out=2 * num_dims, data_init=True) permut_layer = lambda flow_index: InvertibleConv(c_in=num_dims) actnorm_layer = lambda flow_index: ExtActNormFlow(c_in=num_dims, net=block_fun_actn()) # We do not use mixture coupling layers here aas we need the inverse to be differentiable as well coupling_layer = lambda flow_index: CouplingLayer( c_in=num_dims, mask=CouplingLayer.create_channel_mask(c_in=num_dims), block_type=block_type_name, model_func=block_fun_coup) flow_layers = [] if num_flows == 0 or num_dims == 1: # Num_flows == 0 => mixture model, num_dims == 1 => coupling layers have no effect flow_layers += [actnorm_layer(flow_index=0)] else: for flow_index in range(num_flows): flow_layers += [ actnorm_layer(flow_index), permut_layer(flow_index), coupling_layer(flow_index) ] return nn.ModuleList(flow_layers)
def _create_step_flows(self): ## Get hyperparameters from model_params dictionary hidden_size_nodes = get_param_val(self.model_params, "coupling_hidden_size_nodes", default_val=64) hidden_size_edges = get_param_val(self.model_params, "coupling_hidden_size_edges", default_val=16) num_flows = get_param_val(self.model_params, "coupling_num_flows", default_val="4,6,6") num_flows = [int(k) for k in num_flows.split(",")] hidden_layers = get_param_val(self.model_params, "coupling_hidden_layers", default_val=4) if isinstance(hidden_layers, str): if "," in hidden_layers: hidden_layers = [int(l) for l in hidden_layers.split(",")] else: hidden_layers = [int(hidden_layers)] * 3 else: hidden_layers = [hidden_layers] * 3 num_mixtures_nodes = get_param_val(self.model_params, "coupling_num_mixtures_nodes", default_val=16) num_mixtures_edges = get_param_val(self.model_params, "coupling_num_mixtures_edges", default_val=16) mask_ratio = get_param_val(self.model_params, "coupling_mask_ratio", default_val=0.5) dropout = get_param_val(self.model_params, "coupling_dropout", default_val=0.0) #----------------# #- Step 1 flows -# #----------------# coupling_mask_nodes = CouplingLayer.create_channel_mask( self.encoding_dim_nodes, ratio=mask_ratio ) # 1*self.encoding_dim_nodes, where the first half is 1 and the last half is 0. step1_model_func = lambda c_out: RGCNNet( c_in=self.encoding_dim_nodes, c_out=c_out, num_edges=self.num_edge_types, num_layers=hidden_layers[0], hidden_size=hidden_size_nodes, max_neighbours=self.dataset_class.num_max_neighbours(), dp_rate=dropout, rgc_layer_fun=RelationGraphConv) step1_flows = [] for _ in range(num_flows[0]): step1_flows += [ ActNormFlow(self.encoding_dim_nodes), InvertibleConv(self.encoding_dim_nodes), MixtureCDFCoupling( c_in=self.encoding_dim_nodes, mask=coupling_mask_nodes, model_func=step1_model_func, block_type="RelationGraphConv", num_mixtures=num_mixtures_nodes, regularizer_max=3.5, # To ensure a accurate reversibility regularizer_factor=2) ] self.step1_flows = nn.ModuleList(step1_flows) #------------------# #- Step 2+3 flows -# #------------------# coupling_mask_edges = CouplingLayer.create_channel_mask( self.encoding_dim_edges, ratio=mask_ratio) # Definition of the Edge-GNN network def edge2node_layer_func(step_idx): if step_idx == 1: return lambda: Edge2NodeAttnLayer( hidden_size_nodes=hidden_size_nodes, hidden_size_edges=hidden_size_edges, skip_config=2) else: return lambda: Edge2NodeQKVAttnLayer( hidden_size_nodes=hidden_size_nodes, hidden_size_edges=hidden_size_edges, skip_config=2) node2edge_layer_func = lambda: Node2EdgePlainLayer( hidden_size_nodes=hidden_size_nodes, hidden_size_edges=hidden_size_edges, skip_config=2) def edge_gnn_layer_func(step_idx): return lambda: EdgeGNNLayer( edge2node_layer_func=edge2node_layer_func(step_idx), node2edge_layer_func=node2edge_layer_func) def get_model_func(step_idx): return lambda c_out_nodes, c_out_edges: EdgeGNN( c_in_nodes=self.encoding_dim_nodes, c_in_edges=self.encoding_dim_edges, c_out_nodes=c_out_nodes, c_out_edges=c_out_edges, edge_gnn_layer_func=edge_gnn_layer_func(step_idx), max_neighbours=self.dataset_class.num_max_neighbours(), num_layers=hidden_layers[step_idx]) # Activation normalization and invertible 1x1 convolution need to be applied on both nodes and edges independently. # The "NodeEdgeFlowWrapper" handles the forward pass for such flows actnorm_layer = lambda: NodeEdgeFlowWrapper( node_flow=ActNormFlow(c_in=self.encoding_dim_nodes), edge_flow=ActNormFlow(c_in=self.encoding_dim_edges)) permut_layer = lambda: NodeEdgeFlowWrapper( node_flow=InvertibleConv(c_in=self.encoding_dim_nodes), edge_flow=InvertibleConv(c_in=self.encoding_dim_edges)) coupling_layer = lambda step_idx: NodeEdgeCoupling( c_in_nodes=self.encoding_dim_nodes, c_in_edges=self.encoding_dim_edges, mask_nodes=coupling_mask_nodes, mask_edges=coupling_mask_edges, num_mixtures_nodes=num_mixtures_nodes, num_mixtures_edges=num_mixtures_edges, model_func=get_model_func(step_idx), regularizer_max=3.5, # To ensure a accurate reversibility regularizer_factor=2) step2_flows = [] for _ in range(num_flows[1]): step2_flows += [ actnorm_layer(), permut_layer(), coupling_layer( step_idx=1) # the second step forward they used EdgeGNN ] self.step2_flows = nn.ModuleList(step2_flows) step3_flows = [] for _ in range(num_flows[2]): step3_flows += [ actnorm_layer(), permut_layer(), coupling_layer( step_idx=2 ) # the last step forward they used attention network ] self.step3_flows = nn.ModuleList(step3_flows)