return BatchNorm1d(num_features=TEST_SETTINGS["out_features"]) def bn_layer2(): return BatchNorm1d(num_features=TEST_SETTINGS["out_features2"]) INPUT_SHAPE = (TEST_SETTINGS["batch"], TEST_SETTINGS["in_features"]) TEST_PROBLEMS = {} for lin_name, lin_cls in LINEARS.items(): TEST_PROBLEMS["{}-bn-regression".format( lin_name)] = make_regression_problem( INPUT_SHAPE, single_linear_layer(TEST_SETTINGS, lin_cls, activation_cls=None) + [BatchNorm1d(TEST_SETTINGS["out_features"])]) TEST_PROBLEMS["{}-bn-classification".format( lin_name)] = make_classification_problem( INPUT_SHAPE, single_linear_layer(TEST_SETTINGS, lin_cls, activation_cls=None) + [bn_layer1()]) TEST_PROBLEMS["{}-bn-2layer-classification".format( lin_name)] = make_classification_problem( INPUT_SHAPE, two_linear_layers(TEST_SETTINGS, lin_cls, activation_cls=None) + [bn_layer2()])
def __init__(self, input_dim, output_dim, n_d=8, n_a=8, n_steps=3, gamma=1.3, n_independent=2, n_shared=2, epsilon=1e-15, virtual_batch_size=128, momentum=0.02, mask_type="sparsemax"): """ Defines main part of the TabNet network without the embedding layers. Parameters ---------- input_dim : int Number of features output_dim : int or list of int for multi task classification Dimension of network output examples : one for regression, 2 for binary classification etc... n_d : int Dimension of the prediction layer (usually between 4 and 64) n_a : int Dimension of the attention layer (usually between 4 and 64) n_steps : int Number of sucessive steps in the newtork (usually betwenn 3 and 10) gamma : float Float above 1, scaling factor for attention updates (usually betwenn 1.0 to 2.0) n_independent : int Number of independent GLU layer in each GLU block (default 2) n_shared : int Number of independent GLU layer in each GLU block (default 2) epsilon : float Avoid log(0), this should be kept very low virtual_batch_size : int Batch size for Ghost Batch Normalization momentum : float Float value between 0 and 1 which will be used for momentum in all batch norm mask_type : str Either "sparsemax" or "entmax" : this is the masking function to use """ super(TabNetNoEmbeddings, self).__init__() self.input_dim = input_dim self.output_dim = output_dim self.is_multi_task = isinstance(output_dim, list) self.n_d = n_d self.n_a = n_a self.n_steps = n_steps self.gamma = gamma self.epsilon = epsilon self.n_independent = n_independent self.n_shared = n_shared self.virtual_batch_size = virtual_batch_size self.mask_type = mask_type self.initial_bn = BatchNorm1d(self.input_dim, momentum=0.01) if self.n_shared > 0: shared_feat_transform = torch.nn.ModuleList() for i in range(self.n_shared): if i == 0: shared_feat_transform.append( Linear(self.input_dim, 2 * (n_d + n_a), bias=False)) else: shared_feat_transform.append( Linear(n_d + n_a, 2 * (n_d + n_a), bias=False)) else: shared_feat_transform = None self.initial_splitter = FeatTransformer( self.input_dim, n_d + n_a, shared_feat_transform, n_glu_independent=self.n_independent, virtual_batch_size=self.virtual_batch_size, momentum=momentum) self.feat_transformers = torch.nn.ModuleList() self.att_transformers = torch.nn.ModuleList() for step in range(n_steps): transformer = FeatTransformer( self.input_dim, n_d + n_a, shared_feat_transform, n_glu_independent=self.n_independent, virtual_batch_size=self.virtual_batch_size, momentum=momentum) attention = AttentiveTransformer( n_a, self.input_dim, virtual_batch_size=self.virtual_batch_size, momentum=momentum, mask_type=self.mask_type) self.feat_transformers.append(transformer) self.att_transformers.append(attention) if self.is_multi_task: self.multi_task_mappings = torch.nn.ModuleList() for task_dim in output_dim: task_mapping = Linear(n_d, task_dim, bias=False) initialize_non_glu(task_mapping, n_d, task_dim) self.multi_task_mappings.append(task_mapping) else: self.final_mapping = Linear(n_d, output_dim, bias=False) initialize_non_glu(self.final_mapping, n_d, output_dim)
def __init__(self, model: str, in_channels: int, out_channels: int, hidden_channels: int, num_relations: int, num_layers: int, heads: int = 4, dropout: float = 0.5): super().__init__() self.save_hyperparameters() self.model = model.lower() self.num_relations = num_relations self.dropout = dropout self.convs = ModuleList() self.norms = ModuleList() self.skips = ModuleList() if self.model == 'rgat': self.convs.append( ModuleList([ GATConv(in_channels, hidden_channels // heads, heads, add_self_loops=False) for _ in range(num_relations) ])) for _ in range(num_layers - 1): self.convs.append( ModuleList([ GATConv(hidden_channels, hidden_channels // heads, heads, add_self_loops=False) for _ in range(num_relations) ])) elif self.model == 'rgraphsage': self.convs.append( ModuleList([ SAGEConv(in_channels, hidden_channels, root_weight=False) for _ in range(num_relations) ])) for _ in range(num_layers - 1): self.convs.append( ModuleList([ SAGEConv(hidden_channels, hidden_channels, root_weight=False) for _ in range(num_relations) ])) for _ in range(num_layers): self.norms.append(BatchNorm1d(hidden_channels)) self.skips.append(Linear(in_channels, hidden_channels)) for _ in range(num_layers - 1): self.skips.append(Linear(hidden_channels, hidden_channels)) self.mlp = Sequential( Linear(hidden_channels, hidden_channels), BatchNorm1d(hidden_channels), ReLU(inplace=True), Dropout(p=self.dropout), Linear(hidden_channels, out_channels), ) self.acc = Accuracy()
from itertools import product import pytest import torch import torch.nn.functional as F from torch.nn import BatchNorm1d, ReLU from torch_geometric.nn import LayerNorm from torch_geometric.nn.models import GAT, GCN, GIN, PNA, GraphSAGE out_dims = [None, 8] dropouts = [0.0, 0.5] acts = [None, torch.relu_, F.elu, ReLU()] norms = [None, BatchNorm1d(16), LayerNorm(16)] jks = ['last', 'cat', 'max', 'lstm'] @pytest.mark.parametrize('out_dim,dropout,act,norm,jk', product(out_dims, dropouts, acts, norms, jks)) def test_gcn(out_dim, dropout, act, norm, jk): x = torch.randn(3, 8) edge_index = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]]) out_channels = 16 if out_dim is None else out_dim model = GCN(8, 16, num_layers=2, out_channels=out_dim, dropout=dropout, act=act, norm=norm,
def __init__(self, vocab_size, embedding_size, cnn_sizes, output_size, kernel_size, pooling_len=1, pooling='avg', dilatation_rate=1, activation='ReLU', b_norm=False, dropout=0.0, padding_idx=None): super(Cnn1dEncoder, self).__init__() self.__params = locals() activation_cls = get_activation(activation) if not isinstance(pooling_len, (list, int)): raise TypeError("pooling_len should be of type int or int list") if pooling not in ['avg', 'max']: raise ValueError("the pooling type must be either 'max' or 'avg'") if len(cnn_sizes) <= 0: raise ValueError( "There should be at least on convolution layer (cnn_size should be positive.)" ) if isinstance(pooling_len, int): pooling_len = [pooling_len] * len(cnn_sizes) if isinstance(kernel_size, int): kernel_size = [kernel_size] * len(cnn_sizes) if pooling == 'avg': pool1d = AvgPool1d else: pool1d = MaxPool1d # network construction embedding = Embedding(vocab_size, embedding_size, padding_idx=padding_idx) layers = [Transpose(1, 2)] in_channels = [embedding_size] + cnn_sizes[:-1] for i, (in_channel, out_channel, ksize, l_pool) in \ enumerate(zip(in_channels, cnn_sizes, kernel_size, pooling_len)): pad = ((dilatation_rate**i) * (ksize - 1) + 1) // 2 layers.append( Conv1d(in_channel, out_channel, padding=pad, kernel_size=ksize, dilation=dilatation_rate**i)) if b_norm: layers.append(BatchNorm1d(out_channel)) layers.append(activation_cls) layers.append(Dropout(dropout)) if l_pool > 1: layers.append(pool1d(l_pool)) layers.append(Transpose(1, 2)) self.locals_extractor = Sequential(embedding, *layers) self.global_pooler = SelfAttention(cnn_sizes[-1], output_size, pooling=pooling) self.__g_output_dim = output_size self.__l_output_dim = cnn_sizes[-1]
def __init__(self, input_dim, output_dim, activate, bn_decay): super(ResidualFC, self).__init__() self.seq = Sequential(Linear(input_dim, output_dim), BatchNorm1d(output_dim, momentum=bn_decay), activate())
def __init__( self, n_fft=4096, n_hop=1024, input_is_spectrogram=True, hidden_size=512, nb_channels=2, sample_rate=44100, nb_layers=3, input_mean=None, input_scale=None, max_bin=None, unidirectional=False, power=1, ): """ Input: (nb_samples, nb_channels, nb_timesteps) or (nb_frames, nb_samples, nb_channels, nb_bins) Output: Power/Mag Spectrogram (nb_frames, nb_samples, nb_channels, nb_bins) """ super(OpenUnmix, self).__init__() self.nb_output_bins = n_fft // 2 + 1 if max_bin: self.nb_bins = max_bin else: self.nb_bins = self.nb_output_bins self.hidden_size = hidden_size self.stft = STFT(n_fft=n_fft, n_hop=n_hop) self.spec = Spectrogram(power=power, mono=(nb_channels == 1)) self.register_buffer('sample_rate', torch.tensor(sample_rate)) if input_is_spectrogram: self.transform = NoOp() else: self.transform = nn.Sequential(self.stft, self.spec) self.fc1 = Linear(self.nb_bins * nb_channels * 2, hidden_size, bias=False) self.bn1 = BatchNorm1d(hidden_size) if unidirectional: lstm_hidden_size = hidden_size else: lstm_hidden_size = hidden_size // 2 self.lstm = LSTM( input_size=hidden_size, hidden_size=lstm_hidden_size, num_layers=nb_layers, bidirectional=not unidirectional, batch_first=False, dropout=0.4, ) self.fc2 = Linear(in_features=hidden_size * 2, out_features=hidden_size, bias=False) self.bn2 = BatchNorm1d(hidden_size) self.fc3 = Linear(in_features=hidden_size, out_features=self.nb_output_bins * nb_channels, bias=False) self.bn3 = BatchNorm1d(self.nb_output_bins * nb_channels) if input_mean is not None: mix_mean = torch.from_numpy(-input_mean[0][:self.nb_bins]).float() mix_filtered_mean = torch.from_numpy( -input_mean[1][:self.nb_bins]).float() else: mix_mean = torch.zeros(self.nb_bins) mix_filtered_mean = torch.zeros(self.nb_bins) if input_scale is not None: mix_scale = torch.from_numpy( 1.0 / input_scale[0][:self.nb_bins]).float() mix_filtered_scale = torch.from_numpy( 1.0 / input_scale[1][:self.nb_bins]).float() else: mix_scale = torch.ones(self.nb_bins) mix_filtered_scale = torch.ones(self.nb_bins) self.mix_mean = Parameter(mix_mean) self.mix_scale = Parameter(mix_scale) self.mix_filtered_mean = Parameter(mix_filtered_mean) self.mix_filtered_scale = Parameter(mix_filtered_scale) self.output_scale = Parameter(torch.ones(self.nb_output_bins).float()) self.output_mean = Parameter(torch.ones(self.nb_output_bins).float())
def __init__( self, out_h, out_w, feat_dim, blocks_args=None, global_params=None ): super().__init__() assert isinstance(blocks_args, list), "blocks_args should be a list" assert len(blocks_args) > 0, "block args must be greater than 0" self._global_params = global_params self._blocks_args = blocks_args # Batch norm parameters bn_mom = 1 - self._global_params.batch_norm_momentum bn_eps = self._global_params.batch_norm_epsilon # Get stem static or dynamic convolution depending on image size image_size = global_params.image_size Conv2d = get_same_padding_conv2d(image_size=image_size) # Stem in_channels = 3 # rgb out_channels = round_filters( 32, self._global_params ) # number of output channels # self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False) self._conv_stem = Conv2d( in_channels, out_channels, kernel_size=3, stride=1, bias=False ) self._bn0 = nn.BatchNorm2d( num_features=out_channels, momentum=bn_mom, eps=bn_eps ) image_size = calculate_output_image_size(image_size, 2) # Build blocks self._blocks = nn.ModuleList([]) for block_args in self._blocks_args: # Update block input and output filters based on depth multiplier. block_args = block_args._replace( input_filters=round_filters( block_args.input_filters, self._global_params ), output_filters=round_filters( block_args.output_filters, self._global_params ), num_repeat=round_repeats( block_args.num_repeat, self._global_params ), ) # The first block needs to take care of stride and filter size increase. self._blocks.append( MBConvBlock( block_args, self._global_params, image_size=image_size ) ) image_size = calculate_output_image_size( image_size, block_args.stride ) if ( block_args.num_repeat > 1 ): # modify block_args to keep same output size block_args = block_args._replace( input_filters=block_args.output_filters, stride=1 ) for _ in range(block_args.num_repeat - 1): self._blocks.append( MBConvBlock( block_args, self._global_params, image_size=image_size ) ) # image_size = calculate_output_image_size(image_size, block_args.stride) # stride = 1 # Head in_channels = block_args.output_filters # output of final block out_channels = round_filters(1280, self._global_params) # out_channels = round_filters(512, self._global_params) Conv2d = get_same_padding_conv2d(image_size=image_size) self._conv_head = Conv2d( in_channels, out_channels, kernel_size=1, bias=False ) self._bn1 = nn.BatchNorm2d( num_features=out_channels, momentum=bn_mom, eps=bn_eps ) # Final linear layer self._avg_pooling = nn.AdaptiveAvgPool2d(1) self._dropout = nn.Dropout(self._global_params.dropout_rate) self._fc = nn.Linear(out_channels, self._global_params.num_classes) self._swish = MemoryEfficientSwish() self.output_layer = Sequential( BatchNorm2d(1280), # BatchNorm2d(512), Dropout(self._global_params.dropout_rate), Flatten(), Linear(1280 * out_h * out_w, feat_dim), # Linear(512 * out_h * out_w, feat_dim), BatchNorm1d(feat_dim), )
def __init__(self, args): super(GIN, self).__init__() self.args = args self.num_layer = int(self.args["num_layers"]) assert self.num_layer > 2, "Number of layers in GIN should not less than 3" missing_keys = list( set([ "features_num", "num_class", "num_graph_features", "num_layers", "hidden", "dropout", "act", "mlp_layers", "eps", ]) - set(self.args.keys())) if len(missing_keys) > 0: raise Exception("Missing keys: %s." % ",".join(missing_keys)) if not self.num_layer == len(self.args["hidden"]) + 1: LOGGER.warn( "Warning: layer size does not match the length of hidden units" ) self.num_graph_features = self.args["num_graph_features"] if self.args["act"] == "leaky_relu": act = LeakyReLU() elif self.args["act"] == "relu": act = ReLU() elif self.args["act"] == "elu": act = ELU() elif self.args["act"] == "tanh": act = Tanh() else: act = ReLU() train_eps = True if self.args["eps"] == "True" else False self.convs = torch.nn.ModuleList() self.bns = torch.nn.ModuleList() nn = [Linear(self.args["features_num"], self.args["hidden"][0])] for _ in range(self.args["mlp_layers"] - 1): nn.append(act) nn.append(Linear(self.args["hidden"][0], self.args["hidden"][0])) # nn.append(BatchNorm1d(self.args['hidden'][0])) self.convs.append(GINConv(Sequential(*nn), train_eps=train_eps)) self.bns.append(BatchNorm1d(self.args["hidden"][0])) for i in range(self.num_layer - 3): nn = [Linear(self.args["hidden"][i], self.args["hidden"][i + 1])] for _ in range(self.args["mlp_layers"] - 1): nn.append(act) nn.append( Linear(self.args["hidden"][i + 1], self.args["hidden"][i + 1])) # nn.append(BatchNorm1d(self.args['hidden'][i+1])) self.convs.append(GINConv(Sequential(*nn), train_eps=train_eps)) self.bns.append(BatchNorm1d(self.args["hidden"][i + 1])) self.fc1 = Linear( self.args["hidden"][self.num_layer - 3] + self.num_graph_features, self.args["hidden"][self.num_layer - 2], ) self.fc2 = Linear(self.args["hidden"][self.num_layer - 2], self.args["num_class"])
def __init__(self): super(Decoder2, self).__init__() kernel_size = 3 stride = 2 padding = self.same_padding(kernel_size) self.dense1 = Sequential( Linear(parameter.latent_dim, flat_dim), BatchNorm1d(flat_dim), ReLU(), Reshape(*orig_dim) ) # inception 1 self.inc1_1 = Sequential( Conv2d(orig_dim[0], 16, kernel_size=1, stride=1), ReLU(), ) self.inc1_2 = Sequential( Conv2d(orig_dim[0], 16, kernel_size=kernel_size, stride=1, padding=padding), ReLU(), ) self.conv1 = Sequential( ConvTranspose2d(32, 32, kernel_size=kernel_size, stride=stride, padding=padding, output_padding=padding), ReLU(), ) # inception 2 self.inc2_1 = Sequential( Conv2d(32, 8, kernel_size=1, stride=1), ReLU(), ) self.inc2_2 = Sequential( Conv2d(32, 8, kernel_size=kernel_size, stride=1, padding=padding), ReLU(), ) self.conv2 = Sequential( ConvTranspose2d(16, 16, kernel_size=kernel_size, stride=stride, padding=padding, output_padding=padding), ReLU(), ) # inception 3 self.inc3_1 = Sequential( Conv2d(16, 4, kernel_size=1, stride=1), ReLU(), ) self.inc3_2 = Sequential( Conv2d(16, 4, kernel_size=kernel_size, stride=1, padding=padding), ReLU(), ) self.conv3 = Sequential( ConvTranspose2d(8, 8, kernel_size=kernel_size, stride=stride, padding=padding, output_padding=padding), BatchNorm2d(8), ReLU(), ) # inception 4 self.inc4_1 = Sequential( Conv2d(8, 2, kernel_size=1, stride=1), ReLU(), ) self.inc4_2 = Sequential( Conv2d(8, 2, kernel_size=kernel_size, stride=1, padding=padding), ReLU(), ) self.conv4 = Sequential( ConvTranspose2d(4, 4, kernel_size=kernel_size, stride=stride, padding=padding, output_padding=padding), Sigmoid(), ) self.set_optimizer(parameter.optimizer, lr=parameter.learning_rate, betas=parameter.betas)
def __init__( self, n_fft=4096, n_hop=1024, input_is_spectrogram=False, hidden_size=512, nb_channels=2, sample_rate=44100, nb_layers=3, input_mean=None, input_scale=None, max_bin=None, unidirectional=False, power=1, ): """ Input: (nb_samples, nb_channels, nb_timesteps) or (nb_frames, nb_samples, nb_channels, nb_bins) Output: Power/Mag Spectrogram (nb_frames, nb_samples, nb_channels, nb_bins) """ super(OpenUnmix, self).__init__() self.nb_output_bins = n_fft // 2 + 1 if max_bin: self.nb_bins = max_bin else: self.nb_bins = self.nb_output_bins self.hidden_size = hidden_size self.stft = STFT(n_fft=n_fft, n_hop=n_hop) self.spec = Spectrogram(power=power, mono=(nb_channels == 1)) #self.mel_scale = torchaudio.transforms.MelScale() #self.fb = Fb()#### return fb,stft #self.spec2 = MelSpectrogram(power=power, mono=(nb_channels == 1)) ######### self.register_buffer('sample_rate', torch.tensor(sample_rate)) if input_is_spectrogram: self.transform = NoOp() else: self.transform = nn.Sequential(self.stft, self.spec) #self.transform2 = nn.Sequential(self.stft, self.spec2) self.fc1 = Linear(self.nb_bins * nb_channels, hidden_size, bias=False) self.bn1 = BatchNorm1d(hidden_size) if unidirectional: lstm_hidden_size = hidden_size else: lstm_hidden_size = hidden_size #// 2 ### RNN hidden_size= 512 for default # use RNN to replace lstm, keep 3x layers self.lstm = LSTM( input_size=hidden_size, hidden_size=lstm_hidden_size, num_layers=nb_layers, #bidirectional=not unidirectional, default Not bi batch_first=False, dropout=0.4, ) self.fc2 = Linear( in_features=hidden_size * 2, ########################### baseline *2 , Mel model *3 out_features=hidden_size, bias=False) self.bn2 = BatchNorm1d(hidden_size) self.fc3 = Linear(in_features=hidden_size, out_features=self.nb_output_bins * nb_channels, bias=False) self.bn3 = BatchNorm1d(self.nb_output_bins * nb_channels) if input_mean is not None: input_mean = torch.from_numpy(-input_mean[:self.nb_bins]).float() else: input_mean = torch.zeros(self.nb_bins) if input_scale is not None: input_scale = torch.from_numpy(1.0 / input_scale[:self.nb_bins]).float() else: input_scale = torch.ones(self.nb_bins) self.input_mean = Parameter(input_mean) self.input_scale = Parameter(input_scale) self.output_scale = Parameter(torch.ones(self.nb_output_bins).float()) self.output_mean = Parameter(torch.ones(self.nb_output_bins).float())
def __init__(self, num_node_features=1, num_edge_features=3, num_state_features=0, num_node_hidden_channels=128, num_node_interaction_channels=128, num_interactions=1, num_edge_gaussians=None, num_node_embeddings=120, cutoff=10.0, out_size=1, readout='add', mean=None, std=None, norm=False, atom_ref=None, simple_z=True, interactions=None, readout_layer=None, add_state=False, **kwargs): """ Args: num_node_features: (int) input number of node feature (atom feature). num_edge_features: (int) input number of bond feature. if ``num_edge_gaussians`` offered, this parameter is neglect. num_state_features: (int) input number of state feature. num_node_embeddings: (int) number of embeddings, For generate the initial embedding matrix to on behalf of node feature. num_node_hidden_channels: (int) num_node_hidden_channels for node feature. num_node_interaction_channels: (int) channels for node feature. num_interactions: (int) conv number. num_edge_gaussians: (int) number of gaussian Smearing number for radius. deprecated keep this compact with your bond data. cutoff: (float) cutoff for calculate neighbor bond readout: (str) Merge node method. such as "add","mean","max","mean". mean: (float) mean std: (float) std norm:(bool) False or True atom_ref: (torch.tensor shape (120,1)) properties for atom. such as target y is volumes of compound, atom_ref could be the atom volumes of all atom (H,H,He,Li,...). And you could copy the first term to make sure the `H` index start form 1. simple_z: (bool,str) just used "z" or used "x" to calculate. interactions: (Callable) torch module for interactions dynamically: pass the torch module to interactions parameter.static: re-define the ``get_interactions_layer`` and keep this parameter is None. the forward input is (h, edge_index, edge_weight, edge_attr, data=data) readout_layer: (Callable) torch module for interactions dynamically: pass the torch module to interactions parameter. static: re-define the ``get_interactions_layer`` and keep this parameter is None. the forward input is (out,) add_state: (bool) add state attribute before output. out_size:(int) number of out size. for regression,is 1 and for classification should be defined. """ super(BaseCrystalModel, self).__init__() self.interaction_kwargs = {} for k, v in kwargs.items(): if "interaction_kwargs_" in k: self.interaction_kwargs[k.replace("interaction_kwargs_", "")] = v self.readout_kwargs = {} for k, v in kwargs.items(): if "readout_kwargs_" in k: self.readout_kwargs[k.replace("readout_kwargs_", "")] = v # 初始定义 if num_edge_gaussians is None: num_edge_gaussians = num_edge_features assert readout in ['add', 'sum', 'min', 'mean', "max"] self.num_node_hidden_channels = num_node_hidden_channels self.num_state_features = num_state_features self.num_node_interaction_channels = num_node_interaction_channels self.num_interactions = num_interactions self.num_edge_gaussians = num_edge_gaussians self.cutoff = cutoff self.readout = readout self.mean = mean self.std = std self.scale = None self.simple_z = simple_z self.interactions = interactions self.readout_layer = readout_layer self.out_size = out_size self.norm = norm # 嵌入原子属性,备用 # (嵌入别太多,容易慢,大多数情况下用不到。) atomic_mass = torch.from_numpy(ase_data.atomic_masses) # 嵌入原子质量 covalent_radii = torch.from_numpy(ase_data.covalent_radii) # 嵌入共价半径 self.register_buffer('atomic_mass', atomic_mass) self.register_buffer('atomic_radii', covalent_radii) # 缓冲buffer必须要登记注册才会有效,如果仅仅将张量赋值给Module模块的属性,不会被自动转为缓冲buffer. # 因而也无法被state_dict()、buffers()、named_buffers()访问到。 # 定义输入 # 使用原子性质,或者使用Embedding 产生随机数据。 # 使用键性质,或者使用Embedding 产生随机数据。 if num_node_embeddings < 120: print( "default, num_node_embeddings>=120,if you want simple the net work and " "This network does not apply to other elements, the num_node_embeddings could be less but large than " "the element type number in your data.") # 原子个数,一般不用动,这是所有原子种类数, # 一般来说,采用embedding的网络, # 在向其他元素(训练集中没有的)数据推广的能力较差。 if simple_z is True: if num_node_features != 0: warnings.warn( "simple_z just accept num_node_features == 0, " "and don't use your self-defined 'x' data, but element number Z", UserWarning) self.embedding_e = Embedding(num_node_embeddings, num_node_hidden_channels) # self.embedding_l = Linear(2, 2) # not used # self.embedding_l2 = Linear(2, 2) # not used elif self.simple_z == "no_embed": # self.embedding_e = Linear(2, 2) self.embedding_l = Linear(num_node_features, num_node_hidden_channels) self.embedding_l2 = Linear(num_node_hidden_channels, num_node_hidden_channels) else: assert num_node_features > 0, "The `num_node_features` must be the same size with `x` feature." self.embedding_e = Embedding(num_node_embeddings, num_node_hidden_channels) self.embedding_l = Linear(num_node_features, num_node_hidden_channels) self.embedding_l2 = Linear(num_node_hidden_channels, num_node_hidden_channels) self.bn = BatchNorm1d(num_node_hidden_channels) # 交互层 需要自定义 get_interactions_layer if interactions is None: self.get_interactions_layer() elif isinstance(interactions, ModuleList): self.get_res_interactions_layer(interactions) elif isinstance(interactions, Module): self.interactions = interactions else: raise NotImplementedError( "please implement get_interactions_layer function, " "or pass interactions parameters.") # 合并层 需要自定义 if readout_layer is None: self.get_readout_layer() elif isinstance(interactions, Module): self.readout_layer = readout_layer else: raise NotImplementedError( "please implement get_readout_layer function, " "or pass readout_layer parameters.") self.register_buffer('initial_atom_ref', atom_ref) if atom_ref is None: self.atom_ref = atom_ref elif isinstance(atom_ref, Tensor) and atom_ref.shape[0] == 120: self.atom_ref = lambda x: atom_ref[x] elif atom_ref == "embed": self.atom_ref = Embedding(120, 1) self.atom_ref.weight.data.copy_(atom_ref) # 单个原子的性质是否加到最后 else: self.atom_ref = atom_ref self.add_state = add_state if self.add_state: self.dp = LayerNorm(self.num_state_features) self.ads = Linear(self.num_state_features, 1) self.ads2 = Linear(1, self.out_size) self.reset_parameters()
def __init__(self, num_classes=10, weight_bit_width=None, act_bit_width=None, in_bit_width=None, in_ch=3): super(CNV, self).__init__() weight_quant_type = get_quant_type(weight_bit_width) act_quant_type = get_quant_type(act_bit_width) in_quant_type = get_quant_type(in_bit_width) stats_op = get_stats_op(weight_quant_type) max_in_val = 1 - 2**(-7) # for Q1.7 input format self.conv_features = ModuleList() self.linear_features = ModuleList() self.conv_features.append( QuantHardTanh(bit_width=in_bit_width, quant_type=in_quant_type, max_val=max_in_val, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST)) for out_ch, is_pool_enabled in CNV_OUT_CH_POOL: self.conv_features.append( get_quant_conv2d(in_ch=in_ch, out_ch=out_ch, bit_width=weight_bit_width, quant_type=weight_quant_type, stats_op=stats_op)) in_ch = out_ch self.conv_features.append(BatchNorm2d(in_ch, eps=1e-4)) self.conv_features.append( get_act_quant(act_bit_width, act_quant_type)) if is_pool_enabled: self.conv_features.append(MaxPool2d(kernel_size=2)) for in_features, out_features in INTERMEDIATE_FC_FEATURES: self.linear_features.append( get_quant_linear( in_features=in_features, out_features=out_features, per_out_ch_scaling=INTERMEDIATE_FC_PER_OUT_CH_SCALING, bit_width=weight_bit_width, quant_type=weight_quant_type, stats_op=stats_op)) self.linear_features.append(BatchNorm1d(out_features, eps=1e-4)) self.linear_features.append( get_act_quant(act_bit_width, act_quant_type)) self.linear_features.append( get_quant_linear(in_features=LAST_FC_IN_FEATURES, out_features=num_classes, per_out_ch_scaling=LAST_FC_PER_OUT_CH_SCALING, bit_width=weight_bit_width, quant_type=weight_quant_type, stats_op=stats_op)) self.linear_features.append(LayerNorm()) for m in self.modules(): if isinstance(m, QuantConv2d) or isinstance(m, QuantLinear): torch.nn.init.uniform_(m.weight.data, -1, 1)
def bn_layer2(): return BatchNorm1d(num_features=TEST_SETTINGS["out_features2"])
def __init__(self, in_c): super(GNAP, self).__init__() self.bn1 = BatchNorm2d(in_c, affine=False) self.pool = nn.AdaptiveAvgPool2d((1, 1)) self.bn2 = BatchNorm1d(in_c, affine=False)
def __init__(self, num_features): super().__init__() self.bn = BatchNorm1d(num_features) self.bn.weight.data.fill_(1)
def __init__(self, hidden_channels=20, inchannels=15, edgechannels=2, heads=5, num_graph_convs=5, embedding_layers=2, num_fc=5, fc_channels=15, positional_encoding=True, pos_embedding_dropout=0.1, fc_dropout=0.5, edge_dropout=0.1, recurrent=True, parameter_efficient=True, attention_channels=None, principal_neighbourhood_aggregation=False, deg=None, aggr='add'): if num_graph_convs < 1: print("need at least one graph convolution") raise num_graph_convs = int(num_graph_convs) super().__init__() torch.manual_seed(12345) self.edge_dropout = edge_dropout #number of input chip features self.inchannels = inchannels #Whether to apply positional encoding to nodes self.positional_encoding = positional_encoding if positional_encoding: self.posencoder = PositionalEncoding(hidden_channels, dropout=pos_embedding_dropout, identical_sizes=True) #initial embeddding layer embedding = [] embedding.append( Sequential(BatchNorm1d(inchannels), ReLU(), Dropout(p=fc_dropout), Linear(inchannels, hidden_channels))) for idx in torch.arange(embedding_layers - 1): embedding.append( Sequential(BatchNorm1d(hidden_channels), ReLU(), Dropout(p=fc_dropout), Linear(hidden_channels, hidden_channels))) self.embedding = ModuleList(embedding) #graph convolution layers #Encoding layer enc = Deep_GATE_Conv(node_in_channels=hidden_channels, node_out_channels=hidden_channels, edge_in_channels=edgechannels, edge_out_channels=edgechannels, heads=heads, dropout=fc_dropout, attention_channels=attention_channels, parameter_efficient=parameter_efficient, principal_neighbourhood_aggregation= principal_neighbourhood_aggregation, deg=deg, aggr=aggr) gconv = [enc] #encoder/decoder layer if recurrent: encdec = Deep_GATE_Conv(node_in_channels=hidden_channels, node_out_channels=hidden_channels, edge_in_channels=edgechannels, edge_out_channels=edgechannels, heads=heads, dropout=fc_dropout, attention_channels=attention_channels, parameter_efficient=parameter_efficient, principal_neighbourhood_aggregation= principal_neighbourhood_aggregation, deg=deg, aggr=aggr) for idx in np.arange(num_graph_convs - 1): gconv.append(encdec) else: for idx in np.arange(num_graph_convs - 1): enc = Deep_GATE_Conv(node_in_channels=hidden_channels, node_out_channels=hidden_channels, edge_in_channels=edgechannels, edge_out_channels=edgechannels, heads=heads, dropout=fc_dropout, attention_channels=attention_channels, parameter_efficient=parameter_efficient, principal_neighbourhood_aggregation= principal_neighbourhood_aggregation, deg=deg, aggr=aggr) gconv.append(enc) self.gconv = Sequential(*gconv) #fully connected channels fc_channels = [fc_channels] * num_fc fc_channels = [hidden_channels] + fc_channels lin = [] for idx in torch.arange(num_fc): lin.append(BatchNorm1d(fc_channels[idx])) lin.append(torch.nn.ReLU()) lin.append(torch.nn.Dropout(p=fc_dropout)) lin.append(Linear(fc_channels[idx], fc_channels[idx + 1])) self.lin = Sequential(*lin)
def __init__(self, keep, embedding_size, input_size=(112, 112)): super(MobileFaceNet_y2, self).__init__() ave_pool_size = get_shuffle_ave_pooling_size(input_size[0], input_size[1]) self.conv1 = Conv_block(3, keep[0], kernel=(3, 3), stride=(2, 2), padding=(1, 1)) i = 0 c1, c2, c3 = [], [], [] for _ in range(2): c1.append((keep[i], keep[i + 1])) c2.append((keep[i + 1], keep[i + 2])) c3.append((keep[i + 2], keep[i + 3])) i += 3 self.conv2_dw = Residual(c1, c2, c3, num_block=2, groups=keep[i - 2 * 3], kernel=(3, 3), stride=(1, 1), padding=(1, 1)) c1, c2, c3 = [], [], [] for _ in range(1): c1.append((keep[i], keep[i + 1])) c2.append((keep[i + 1], keep[i + 2])) c3.append((keep[i + 2], keep[i + 3])) i += 3 self.conv_23 = Depth_Wise(c1[0], c2[0], c3[0], kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=keep[i - 3]) c1, c2, c3 = [], [], [] for _ in range(8): c1.append((keep[i], keep[i + 1])) c2.append((keep[i + 1], keep[i + 2])) c3.append((keep[i + 2], keep[i + 3])) i += 3 self.conv_3 = Residual(c1, c2, c3, num_block=8, groups=keep[i - 8 * 3], kernel=(3, 3), stride=(1, 1), padding=(1, 1)) c1, c2, c3 = [], [], [] for _ in range(1): c1.append((keep[i], keep[i + 1])) c2.append((keep[i + 1], keep[i + 2])) c3.append((keep[i + 2], keep[i + 3])) i += 3 self.conv_34 = Depth_Wise(c1[0], c2[0], c3[0], kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=keep[i - 3]) c1, c2, c3 = [], [], [] for _ in range(16): c1.append((keep[i], keep[i + 1])) c2.append((keep[i + 1], keep[i + 2])) c3.append((keep[i + 2], keep[i + 3])) i += 3 self.conv_4 = Residual(c1, c2, c3, num_block=16, groups=keep[i - 16 * 3], kernel=(3, 3), stride=(1, 1), padding=(1, 1)) c1, c2, c3 = [], [], [] for _ in range(1): c1.append((keep[i], keep[i + 1])) c2.append((keep[i + 1], keep[i + 2])) c3.append((keep[i + 2], keep[i + 3])) i += 3 self.conv_45 = Depth_Wise(c1[0], c2[0], c3[0], kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=keep[i - 3]) c1, c2, c3 = [], [], [] for _ in range(4): c1.append((keep[i], keep[i + 1])) c2.append((keep[i + 1], keep[i + 2])) c3.append((keep[i + 2], keep[i + 3])) i += 3 self.conv_5 = Residual(c1, c2, c3, num_block=4, groups=keep[i - 4 * 3], kernel=(3, 3), stride=(1, 1), padding=(1, 1)) self.conv_6_sep = Conv_block(keep[i], keep[i + 1], kernel=(1, 1), stride=(1, 1), padding=(0, 0)) self.conv_6_dw = Linear_block(keep[i + 1], keep[i + 2], groups=keep[i + 1], kernel=(int(ave_pool_size[0]), int(ave_pool_size[1])), stride=(1, 1), padding=(0, 0)) self.conv_6_flatten = Flatten() self.linear = Linear(512, embedding_size, bias=False) self.bn = BatchNorm1d(embedding_size) self.l2 = L2Norm()
def __init__(self, input_size, block, layers, radix=1, groups=1, bottleneck_width=64, dilated=False, dilation=1, deep_stem=False, stem_width=64, avg_down=False, rectified_conv=False, rectify_avg=False, avd=False, avd_first=False, final_drop=0.0, dropblock_prob=0, last_gamma=False, norm_layer=nn.BatchNorm2d): self.cardinality = groups self.bottleneck_width = bottleneck_width # ResNet-D params self.inplanes = stem_width * 2 if deep_stem else 64 self.avg_down = avg_down self.last_gamma = last_gamma # ResNeSt params self.radix = radix self.avd = avd self.avd_first = avd_first super(ResNet, self).__init__() self.rectified_conv = rectified_conv self.rectify_avg = rectify_avg if rectified_conv: from rfconv import RFConv2d conv_layer = RFConv2d else: conv_layer = nn.Conv2d conv_kwargs = {'average_mode': rectify_avg} if rectified_conv else {} if deep_stem: self.conv1 = nn.Sequential( conv_layer(3, stem_width, kernel_size=3, stride=2, padding=1, bias=False, **conv_kwargs), norm_layer(stem_width), nn.PReLU(stem_width), conv_layer(stem_width, stem_width, kernel_size=3, stride=1, padding=1, bias=False, **conv_kwargs), norm_layer(stem_width), nn.PReLU(stem_width), conv_layer(stem_width, stem_width * 2, kernel_size=3, stride=1, padding=1, bias=False, **conv_kwargs), ) else: self.conv1 = conv_layer(3, 64, kernel_size=7, stride=2, padding=3, bias=False, **conv_kwargs) self.bn1 = norm_layer(self.inplanes) self.relu = nn.PReLU(self.inplanes) #self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0], norm_layer=norm_layer, is_first=False) self.layer2 = self._make_layer(block, 128, layers[1], stride=2, norm_layer=norm_layer) if dilated or dilation == 4: self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2, norm_layer=norm_layer, dropblock_prob=dropblock_prob) self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4, norm_layer=norm_layer, dropblock_prob=dropblock_prob) elif dilation == 2: self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilation=1, norm_layer=norm_layer, dropblock_prob=dropblock_prob) self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=2, norm_layer=norm_layer, dropblock_prob=dropblock_prob) else: self.layer3 = self._make_layer(block, 256, layers[2], stride=2, norm_layer=norm_layer, dropblock_prob=dropblock_prob) self.layer4 = self._make_layer(block, 512, layers[3], stride=2, norm_layer=norm_layer, dropblock_prob=dropblock_prob) #self.avgpool = GlobalAvgPool2d() #self.drop = nn.Dropout(final_drop) if final_drop > 0.0 else None #self.fc = nn.Linear(512 * block.expansion, num_classes) self.fc = Sequential(BatchNorm2d(512 * block.expansion), Dropout(0.4), Flatten(), Linear(512 * block.expansion * 7 * 7, 512), BatchNorm1d(512, affine=False)) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, norm_layer): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.Linear): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') if m.bias is not None: m.bias.data.zero_()
def __init__(self, input_dim, output_dim, n_d=8, n_a=8, n_steps=3, gamma=1.3, cat_idxs=[], cat_dims=[], cat_emb_dim=1, n_independent=2, n_shared=2, epsilon=1e-15, virtual_batch_size=128, momentum=0.02, device_name='auto'): """ Defines TabNet network Parameters ---------- - input_dim : int Initial number of features - output_dim : int Dimension of network output examples : one for regression, 2 for binary classification etc... - n_d : int Dimension of the prediction layer (usually between 4 and 64) - n_a : int Dimension of the attention layer (usually between 4 and 64) - n_steps: int Number of sucessive steps in the newtork (usually betwenn 3 and 10) - gamma : float Float above 1, scaling factor for attention updates (usually betwenn 1.0 to 2.0) - cat_idxs : list of int Index of each categorical column in the dataset - cat_dims : list of int Number of categories in each categorical column - cat_emb_dim : int or list of int Size of the embedding of categorical features if int, all categorical features will have same embedding size if list of int, every corresponding feature will have specific size - momentum : float Float value between 0 and 1 which will be used for momentum in all batch norm - n_independent : int Number of independent GLU layer in each GLU block (default 2) - n_shared : int Number of independent GLU layer in each GLU block (default 2) - epsilon: float Avoid log(0), this should be kept very low """ super(TabNet, self).__init__() self.cat_idxs = cat_idxs or [] self.cat_dims = cat_dims or [] self.cat_emb_dim = cat_emb_dim self.input_dim = input_dim self.output_dim = output_dim self.n_d = n_d self.n_a = n_a self.n_steps = n_steps self.gamma = gamma self.epsilon = epsilon self.n_independent = n_independent self.n_shared = n_shared self.virtual_batch_size = virtual_batch_size if type(cat_emb_dim) == int: self.cat_emb_dims = [cat_emb_dim] * len(self.cat_idxs) else: # check that all embeddings are provided assert (len(cat_emb_dim) == len(cat_dims)) self.cat_emb_dims = cat_emb_dim self.embeddings = torch.nn.ModuleList() for cat_dim, emb_dim in zip(self.cat_dims, self.cat_emb_dims): self.embeddings.append(torch.nn.Embedding(cat_dim, emb_dim)) # record continuous indices self.continuous_idx = torch.ones(self.input_dim, dtype=torch.bool) self.continuous_idx[self.cat_idxs] = 0 if isinstance(cat_emb_dim, int): self.post_embed_dim = self.input_dim + (cat_emb_dim - 1) * len( self.cat_idxs) else: self.post_embed_dim = self.input_dim + np.sum(cat_emb_dim) - len( cat_emb_dim) self.post_embed_dim = np.int(self.post_embed_dim) self.tabnet = TabNetNoEmbeddings(self.post_embed_dim, output_dim, n_d, n_a, n_steps, gamma, n_independent, n_shared, epsilon, virtual_batch_size, momentum) self.initial_bn = BatchNorm1d(self.post_embed_dim, momentum=0.01) # Defining device if device_name == 'auto': if torch.cuda.is_available(): device_name = 'cuda' else: device_name = 'cpu' self.device = torch.device(device_name) self.to(self.device)
def __init__(self, feature_number, num_propagation_steps, filters, activation, num_classes, pooling_method='mean', aggr='mean'): super(SingleConvMeshNet, self).__init__() curr_size = feature_number inplace = False self._pooling_method = pooling_method if activation == 'ReLU': self._activation = ReLU self._act = F.relu elif activation == 'LeakyReLU': self._activation = LeakyReLU self._act = F.leaky_relu else: raise NotImplementedError(f"{activation} is not implemented") self.left_geo_cnns = [] self.right_geo_cnns = [] self.pooling_cnns = [] self.squeeze_cnns = [] self._graph_levels = len(filters) for level in range(len(filters)): if level < len(filters) - 1: if level == 0: # First level needs translation invariant version of edge conv left_geo = [ get_gcn_filter(curr_size, filters[level], self._activation, aggregation=aggr, module=EdgeConvTransInv) ] else: left_geo = [ get_gcn_filter(2 * curr_size, filters[level], self._activation, aggregation=aggr) ] for _ in range(num_propagation_steps - 1): left_geo.append( get_gcn_filter(2 * filters[level], filters[level], self._activation, aggregation=aggr)) # DECODER branch of U-NET curr_size = filters[level] + filters[level + 1] right_geo = [ get_gcn_filter(2 * curr_size, filters[level], self._activation, aggregation=aggr) ] for _ in range(num_propagation_steps - 1): right_geo.append( get_gcn_filter(2 * filters[level], filters[level], self._activation, aggregation=aggr)) self.right_geo_cnns.append(torch.nn.ModuleList(right_geo)) curr_size = filters[level] else: left_geo = [] for _ in range(num_propagation_steps): left_geo.append( get_gcn_filter(2 * filters[level], filters[level], self._activation, aggregation=aggr)) self.left_geo_cnns.append(torch.nn.ModuleList(left_geo)) self.final_convs = [ Seq(Lin(filters[0], filters[0] // 2), BatchNorm1d(filters[0] // 2), self._activation(inplace=inplace), Lin(filters[0] // 2, num_classes)) ] self.left_geo_cnns = torch.nn.ModuleList(self.left_geo_cnns) self.right_geo_cnns = torch.nn.ModuleList(self.right_geo_cnns) self.final_convs = torch.nn.ModuleList(self.final_convs)
def __init__(self, useIntraGCN=True, useInterGCN=True, useRandomMatrix=False, useAllOneMatrix=False, useCov=False, useCluster=False): super(Backbone_VGG, self).__init__() self.layer1 = Sequential(Conv2d(3, 64, kernel_size=3, padding=1), BatchNorm2d(64), ReLU(inplace=True), Conv2d(64, 64, kernel_size=3, padding=1), BatchNorm2d(64), ReLU(inplace=True), MaxPool2d(kernel_size=2, stride=2)) self.layer2 = Sequential(Conv2d(64, 128, kernel_size=3, padding=1), BatchNorm2d(128), ReLU(inplace=True), Conv2d(128, 128, kernel_size=3, padding=1), BatchNorm2d(128), ReLU(inplace=True), MaxPool2d(kernel_size=2, stride=2)) self.layer3 = Sequential(Conv2d(128, 256, kernel_size=3, padding=1), BatchNorm2d(256), ReLU(inplace=True), Conv2d(256, 256, kernel_size=3, padding=1), BatchNorm2d(256), ReLU(inplace=True), Conv2d(256, 256, kernel_size=3, padding=1), BatchNorm2d(256), ReLU(inplace=True), MaxPool2d(kernel_size=2, stride=2)) self.layer4 = Sequential(Conv2d(256, 512, kernel_size=3, padding=1), BatchNorm2d(512), ReLU(inplace=True), Conv2d(512, 512, kernel_size=3, padding=1), BatchNorm2d(512), ReLU(inplace=True), Conv2d(512, 512, kernel_size=3, padding=1), BatchNorm2d(512), ReLU(inplace=True), Conv2d(512, 512, kernel_size=3, padding=1), BatchNorm2d(512), ReLU(inplace=True), Conv2d(512, 512, kernel_size=3, padding=1), BatchNorm2d(512), ReLU(inplace=True), Conv2d(512, 512, kernel_size=3, padding=1), BatchNorm2d(512), ReLU(inplace=True), MaxPool2d(kernel_size=2, stride=2)) self.output_layer = Sequential( nn.Conv2d(in_channels=512, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), nn.ReLU(), nn.AdaptiveAvgPool2d((1, 1))) self.Crop_Net = nn.ModuleList([ Sequential( Conv2d(128, 256, kernel_size=3, padding=1), BatchNorm2d(256), ReLU(inplace=True), Conv2d(256, 256, kernel_size=3, padding=1), BatchNorm2d(256), ReLU(inplace=True), Conv2d(256, 512, kernel_size=3, padding=1), BatchNorm2d(512), ReLU(inplace=True), Conv2d(512, 512, kernel_size=3, padding=1), BatchNorm2d(512), ReLU(inplace=True), nn.Conv2d(in_channels=512, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), nn.ReLU()) for i in range(5) ]) self.fc = nn.Linear(64 + 320, 7) self.loc_fc = nn.Linear(320, 7) self.GAP = nn.AdaptiveAvgPool2d((1, 1)) #self.GCN = GCN(64, 128, 64) self.GCN = GCNwithIntraAndInterMatrix(64, 128, 64, useIntraGCN=useIntraGCN, useInterGCN=useInterGCN, useRandomMatrix=useRandomMatrix, useAllOneMatrix=useAllOneMatrix) self.SourceMean = ( CountMeanAndCovOfFeature(64 + 320) if useCov else CountMeanOfFeature(64 + 320) ) if not useCluster else CountMeanOfFeatureInCluster(64 + 320) self.TargetMean = ( CountMeanAndCovOfFeature(64 + 320) if useCov else CountMeanOfFeature(64 + 320) ) if not useCluster else CountMeanOfFeatureInCluster(64 + 320) self.SourceBN = BatchNorm1d(64 + 320) self.TargetBN = BatchNorm1d(64 + 320)
def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, \ track_running_stats=True): super(NaiveComplexBatchNorm1D, self).__init__() self.bn_r = BatchNorm1d(num_features, eps, momentum, affine, track_running_stats) self.bn_i = BatchNorm1d(num_features, eps, momentum, affine, track_running_stats)
def __init__(self, config, num_classes=10): super(CNV, self).__init__() self.config = config weight_quant_type = get_quant_type(config.weight_bit_width) act_quant_type = get_quant_type(config.activation_bit_width) in_quant_type = get_quant_type(config.input_bit_width) stats_op = get_stats_op(weight_quant_type) self.preproc_features = ModuleList() self.conv_features = ModuleList() self.linear_features = ModuleList() if config.colortrans: self.preproc_features.append(ColorSpaceTransformation(3, 3)) if config.preproc_mode == 'trained_dithering': self.preproc_features.append( TrainedDithering(config.input_bit_width, 3, 3)) elif config.preproc_mode == 'fixed_dithering': self.preproc_features.append( FixedDithering(config.input_bit_width, 3)) elif config.preproc_mode == 'quant': self.preproc_features.append(Quantization(config.input_bit_width)) in_ch = 3 for i, out_ch, is_pool_enabled in CNV_OUT_CH_POOL: self.conv_features.append( get_quant_conv2d(in_ch=in_ch, out_ch=out_ch, bit_width=config.weight_bit_width, quant_type=weight_quant_type, stats_op=stats_op)) in_ch = out_ch if is_pool_enabled: self.conv_features.append(MaxPool2d(kernel_size=2)) if i == 5: self.conv_features.append(Sequential()) self.conv_features.append(BatchNorm2d(in_ch)) self.conv_features.append( get_act_quant(config.activation_bit_width, act_quant_type)) for in_features, out_features in INTERMEDIATE_FC_FEATURES: self.linear_features.append( get_quant_linear( in_features=in_features, out_features=out_features, per_out_ch_scaling=INTERMEDIATE_FC_PER_OUT_CH_SCALING, bit_width=config.weight_bit_width, quant_type=weight_quant_type, stats_op=stats_op)) self.linear_features.append(BatchNorm1d(out_features)) self.linear_features.append( get_act_quant(config.activation_bit_width, act_quant_type)) self.classifier = get_quant_linear( in_features=LAST_FC_IN_FEATURES, out_features=num_classes, per_out_ch_scaling=LAST_FC_PER_OUT_CH_SCALING, bit_width=config.weight_bit_width, quant_type=weight_quant_type, stats_op=stats_op)
def __init__(self, input_dim, virtual_batch_size=128, momentum=0.01): super(GBN, self).__init__() self.input_dim = input_dim self.virtual_batch_size = virtual_batch_size self.bn = BatchNorm1d(self.input_dim, momentum=momentum)
def __init__(self, feat_dim, hidden_dim, num_feat_layers=1, num_conv_layers=3, num_fc_layers=2, xg_dim=None, gfn=False, collapse=False, residual=False, global_pool="sum", dropout=0, edge_norm=True): super(ResGCN, self).__init__() assert num_feat_layers == 1, "more feat layers are not now supported" self.conv_residual = residual self.fc_residual = False # no skip-connections for fc layers. self.collapse = collapse assert "sum" in global_pool or "mean" in global_pool, global_pool if "sum" in global_pool: self.global_pool = global_add_pool else: self.global_pool = global_mean_pool self.dropout = dropout GConv = partial(ResGCNConv, edge_norm=edge_norm, gfn=gfn) if xg_dim is not None: # Utilize graph level features. self.use_xg = True self.bn1_xg = BatchNorm1d(xg_dim) self.lin1_xg = Linear(xg_dim, hidden_dim) self.bn2_xg = BatchNorm1d(hidden_dim) self.lin2_xg = Linear(hidden_dim, hidden_dim) else: self.use_xg = False if collapse: self.bn_feat = BatchNorm1d(feat_dim) self.bns_fc = torch.nn.ModuleList() self.lins = torch.nn.ModuleList() if "gating" in global_pool: self.gating = torch.nn.Sequential(Linear(feat_dim, feat_dim), torch.nn.ReLU(), Linear(feat_dim, 1), torch.nn.Sigmoid()) else: self.gating = None for i in range(num_fc_layers - 1): self.bns_fc.append(BatchNorm1d(hidden_in)) self.lins.append(Linear(hidden_in, hidden_dim)) hidden_in = hidden_dim else: self.bn_feat = BatchNorm1d(feat_dim) feat_gfn = True # set true so GCNConv is feat transform self.conv_feat = ResGCNConv(feat_dim, hidden_dim, gfn=feat_gfn) if "gating" in global_pool: self.gating = torch.nn.Sequential( Linear(hidden_dim, hidden_dim), torch.nn.ReLU(), Linear(hidden_dim, 1), torch.nn.Sigmoid()) else: self.gating = None self.bns_conv = torch.nn.ModuleList() self.convs = torch.nn.ModuleList() for i in range(num_conv_layers): self.bns_conv.append(BatchNorm1d(hidden_dim)) self.convs.append(GConv(hidden_dim, hidden_dim)) self.bn_hidden = BatchNorm1d(hidden_dim) self.bns_fc = torch.nn.ModuleList() self.lins = torch.nn.ModuleList() for i in range(num_fc_layers - 1): self.bns_fc.append(BatchNorm1d(hidden_dim)) self.lins.append(Linear(hidden_dim, hidden_dim)) # BN initialization. for m in self.modules(): if isinstance(m, (torch.nn.BatchNorm1d)): torch.nn.init.constant_(m.weight, 1) torch.nn.init.constant_(m.bias, 0.0001)
def __init__(self, i, o): super(Residual, self).__init__() self.fc = Linear(i, o) self.bn = BatchNorm1d(o) self.relu = ReLU()
def __init__(self, cfg, model_name): super(PolicyFullyConnectedMessagePassing, self).__init__() self.model_name = model_name self.n_passes = cfg['n_passes'] self.use_value_critic = cfg['use_value_critic'] self.use_batch_norm = cfg['use_batch_norm'] self.logit_normalizer = cfg['logit_normalizer'] if cfg['use_batch_norm']: self.edge_embedding_model = Seq(Lin(cfg['edge_feature_dim'], cfg['edge_embedding_dim']), LeakyReLU(), BatchNorm1d(cfg['edge_embedding_dim']) ) self.node_embedding_model = Seq(Lin(cfg['node_feature_dim'], cfg['node_embedding_dim']), LeakyReLU(), BatchNorm1d(cfg['node_embedding_dim']) ) else: self.edge_embedding_model = Seq(Lin(cfg['edge_feature_dim'], cfg['edge_embedding_dim']), LeakyReLU(), ) self.node_embedding_model = Seq(Lin(cfg['node_feature_dim'], cfg['node_embedding_dim']), LeakyReLU(), ) self.global_embedding_model = Seq( Lin(cfg['global_feature_dim'], cfg['global_embedding_dim']), LeakyReLU()) # assume that after embedding the edges, nodes and globals have a new length mp_dict = [MetaLayer(EdgeModel(n_edge_features=cfg['edge_embedding_dim'], n_node_features=cfg['node_embedding_dim'], n_global_features=cfg['global_embedding_dim'], n_hiddens=cfg['edge_hidden_dim'], n_targets=cfg['edge_target_dim'], use_batch_norm=cfg['use_batch_norm']), NodeModel(n_edge_features=cfg['edge_embedding_dim'], n_node_features=cfg['node_embedding_dim'], n_global_features=cfg['global_embedding_dim'], n_hiddens=cfg['node_hidden_dim'], n_targets=cfg['node_target_dim'], use_batch_norm=cfg['use_batch_norm']), GlobalModel(n_node_features=cfg['node_embedding_dim'], n_global_features=cfg['global_embedding_dim'], n_hiddens=cfg['global_hidden_dim'], n_targets=cfg['global_target_dim'], use_batch_norm=cfg['use_batch_norm']))] for i in range(1, self.n_passes): mp_dict.append(MetaLayer(EdgeModel(n_edge_features=cfg['edge_target_dim'], n_node_features=cfg['node_target_dim'], n_global_features=cfg['global_target_dim'], n_hiddens=cfg['edge_hidden_dim'], n_targets=cfg['edge_target_dim']), NodeModel(n_edge_features=cfg['edge_target_dim'], n_node_features=cfg['node_target_dim'], n_global_features=cfg['global_target_dim'], n_hiddens=cfg['node_hidden_dim'], n_targets=cfg['node_target_dim']), GlobalModel(n_node_features=cfg['node_target_dim'], n_global_features=cfg['global_target_dim'], n_hiddens=cfg['global_hidden_dim'], n_targets=cfg['global_target_dim']))) self.message_passing = torch.nn.ModuleList(mp_dict) self.node_decoder_model = Lin(cfg['node_target_dim'], cfg['node_dim_out']) if self.use_value_critic: self.value_model = Seq(Lin(cfg['global_target_dim'], cfg['value_embedding_dim']), LeakyReLU(), Lin(cfg['value_embedding_dim'], 1))
def __init__(self, embedding_size): super(MobileFaceNet, self).__init__() self.conv1 = Conv_block(3, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1)) self.conv2_dw = Conv_block(64, 64, kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=64) self.conv_23 = Depth_Wise(64, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=128) self.conv_3 = Residual(64, num_block=4, groups=128, kernel=(3, 3), stride=(1, 1), padding=(1, 1)) self.conv_34 = Depth_Wise(64, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=256) self.conv_4 = Residual(128, num_block=6, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)) self.conv_45 = Depth_Wise(128, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=512) self.conv_5 = Residual(128, num_block=2, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)) self.conv_6_sep = Conv_block(128, 512, kernel=(1, 1), stride=(1, 1), padding=(0, 0)) self.conv_6_dw = Linear_block(512, 512, groups=512, kernel=(7, 7), stride=(1, 1), padding=(0, 0)) self.conv_6_flatten = Flatten() self.linear = Linear(512, embedding_size, bias=False) self.bn = BatchNorm1d(embedding_size)
def make_conv2d_model(input_shape, output_shape, params): batch_norm = params.get('use_batch_norm', False) layers = ModuleList() out_filters = params['initial_kernel_number'] output = torch.zeros((2, ) + input_shape) if params['n_conv_layers'] > 0: for ii in range(params['n_conv_layers']): if ii == 0: shp = output.shape[2:] kernel_size = params['input_kernel_size'] kernel_size = (min(kernel_size[0], shp[0]), min(kernel_size[1], shp[1])) layers.append(Conv2d(input_shape[0], out_filters, kernel_size)) output = layers[-1].forward(output) layers.append(_activation[params['activation']]()) output = layers[-1].forward(output) if batch_norm: layers.append(BatchNorm2d(out_filters)) output = layers[-1].forward(output) else: in_filters = out_filters if params['conv_dim_change'] == 'double': out_filters = out_filters * 2 elif params['conv_dim_change'] == 'halve-first': if ii == 0: out_filters = out_filters // 2 elif params['conv_dim_change'] == 'halve-last': if ii == params['n_conv_layers'] - 2: out_filters = out_filters // 2 shp = output.shape[2:] kernel_size = params['conv_kernel_size'] kernel_size = (min(kernel_size[0], shp[0]), min(kernel_size[1], shp[1])) layers.append(Conv2d(in_filters, out_filters, kernel_size)) output = layers[-1].forward(output) layers.append(_activation[params['activation']]()) output = layers[-1].forward(output) if batch_norm: layers.append(BatchNorm2d(out_filters)) output = layers[-1].forward(output) if params['pool_size'] is not None: shp = output.shape[2:] pool_size = params['pool_size'] pool_size = (min(kernel_size[0], shp[0]), min(kernel_size[1], shp[1])) layers.append(MaxPool2d(pool_size)) output = layers[-1].forward(output) lin_layers = ModuleList() input_dim = np.prod(output.shape[1:]) current_dim = params['dense_dim'] if params['n_dense_layers'] == 0: return layers, None if params['n_dense_layers'] > 1: for ii in range(params['n_dense_layers'] - 1): if ii == 0: lin_layers.append(Linear(input_dim, current_dim)) else: lin_layers.append(Linear(current_dim, current_dim)) layers.append(_activation[params['activation']]()) if batch_norm: lin_layers.append(BatchNorm1d(current_dim)) if params['dense_dim_change'] != 'none': raise NotImplementedError else: current_dim = input_dim lin_layers.append(Linear(current_dim, output_shape)) return layers, lin_layers