def __init__(self, embed_size=15, num_filters=(25, 50, 75, 100, 125, 150), ngram_filter_sizes=(1, 2, 3, 4, 5, 6), conv_layer_activation='tanh', num_highway=1, highway_layer_activation='relu', highway_bias=HighwayBias(nonlinear_transform_bias=0.0, transform_gate_bias=-2.0), output_size=None, **kwargs): super(ConvolutionalEncoder, self).__init__(**kwargs) self._embed_size = embed_size self._num_filters = num_filters self._ngram_filter_sizes = ngram_filter_sizes self._num_highway = num_highway self._output_size = output_size with self.name_scope(): self._convs = gluon.contrib.nn.HybridConcurrent() maxpool_output_size = 0 with self._convs.name_scope(): for num_filter, ngram_size in zip(self._num_filters, self._ngram_filter_sizes): seq = nn.HybridSequential() seq.add( nn.Conv1D(in_channels=self._embed_size, channels=num_filter, kernel_size=ngram_size, use_bias=True)) seq.add( gluon.nn.HybridLambda(lambda F, x: F.max(x, axis=2))) seq.add(nn.Activation(conv_layer_activation)) self._convs.add(seq) maxpool_output_size += num_filter if self._num_highway: self._highways = Highway(maxpool_output_size, self._num_highway, activation=highway_layer_activation, highway_bias=highway_bias) else: self._highways = None if self._output_size: self._projection = nn.Dense(in_units=maxpool_output_size, units=self._output_size, use_bias=True) else: self._projection = None self._output_size = maxpool_output_size
def __init__(self, channels, in_channels=0, **kwargs): super(FeatureConv, self).__init__(**kwargs) self.body = nn.HybridSequential(prefix='') self.body.add( nn.Conv1D(channels, kernel_size=1, strides=1, in_channels=in_channels, use_bias=False, weight_initializer=mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2))) self.body.add(nn.BatchNorm(momentum=bn_momentum)) self.body.add(nn.Activation('relu'))
def __init__(self, vocab, embed_size, kernel_sizes, num_channels, **kwargs): super(TextCNN, self).__init__(**kwargs) self.embedding = nn.Embedding(len(vocab), embed_size) # 不参与训练的嵌入层, 这个是为了什么 self.constant_embedding = nn.Embedding(len(vocab), embed_size) self.dropout = nn.Dropout(0.5) self.decoder = nn.Dense(2) # 时序最大池化层没有权重,所以可以共用一个实例 self.pool = nn.GlobalMaxPool1D() self.convs = nn.Sequential() # 创建多个一维卷积层 # 100,100,100 3,4,5 for c, k in zip(num_channels, kernel_sizes): self.convs.add(nn.Conv1D(c, k, activation='relu'))
def __init__(self, opt): super(CNNText, self).__init__() self.opt = opt with self.name_scope(): self.drop = nn.Dropout(opt.drop) #self.encoder = nn.Embedding(input_dim=opt.vocab_size,output_dim=opt.embed_dim) self.conv_block = HybrideConcurrent(concat_dim=1) for i, ngram_filter in enumerate(opt.ngram_filters): net = nn.HybridSequential(prefix='filter' + str(i)) net.add(nn.Conv1D(opt.num_hidden, ngram_filter)) #net.add(nn.BatchNorm()) net.add(nn.Activation('relu')) net.add(nn.MaxPool1D(opt.seq_len - ngram_filter + 1)) self.conv_block.add(net)
def test_conv(): layers1d = [ nn.Conv1D(16, 3, in_channels=4), nn.Conv1D(16, 3, groups=2, in_channels=4), nn.Conv1D(16, 3, strides=3, groups=2, in_channels=4), ] for layer in layers1d: check_layer_forward(layer, (1, 4, 10)) layers2d = [ nn.Conv2D(16, (3, 4), in_channels=4), nn.Conv2D(16, (5, 4), in_channels=4), nn.Conv2D(16, (3, 4), groups=2, in_channels=4), nn.Conv2D(16, (3, 4), strides=4, in_channels=4), nn.Conv2D(16, (3, 4), dilation=4, in_channels=4), nn.Conv2D(16, (3, 4), padding=4, in_channels=4), ] for layer in layers2d: check_layer_forward(layer, (1, 4, 20, 20)) layers3d = [ nn.Conv3D(16, (1, 8, 4), in_channels=4, activation='relu'), nn.Conv3D(16, (5, 4, 3), in_channels=4), nn.Conv3D(16, (3, 3, 3), groups=2, in_channels=4), nn.Conv3D(16, 4, strides=4, in_channels=4), nn.Conv3D(16, (3, 3, 3), padding=4, in_channels=4), ] for layer in layers3d: check_layer_forward(layer, (1, 4, 10, 10, 10)) layer = nn.Conv2D(16, (3, 3), layout='NHWC', in_channels=4) # check_layer_forward(layer, (1, 10, 10, 4)) layer = nn.Conv3D(16, (3, 3, 3), layout='NDHWC', in_channels=4)
def CNN(): net = nn.Sequential() with net.name_scope(): net.add( nn.Conv1D(channels=32, kernel_size=33), nn.BatchNorm(axis=1), nn.Activation('relu'), nn.MaxPool1D(pool_size=13), nn.Flatten(), nn.Dense(33, activation='relu'), nn.Dropout(0.2), nn.Dense(1, activation='sigmoid'), ) return net
def __init__(self, vocab, embed_size, kernel_sizes, num_channels, **kwargs): super(TextCNN, self).__init__(**kwargs) self.embedding = nn.Embedding(len(vocab), embed_size) # The embedding layer does not participate in training self.constant_embedding = nn.Embedding(len(vocab), embed_size) self.dropout = nn.Dropout(0.5) self.decoder = nn.Dense(2) # The max-over-time pooling layer has no weight, so it can share an # instance self.pool = nn.GlobalMaxPool1D() # Create multiple one-dimensional convolutional layers self.convs = nn.Sequential() for c, k in zip(num_channels, kernel_sizes): self.convs.add(nn.Conv1D(c, k, activation='relu'))
def __init__(self): super(OriginDQN, self).__init__() with self.name_scope(): self.net = nn.Sequential() self.net.add( nn.Conv1D(channels=64, kernel_size=2, strides=1, activation='relu'), # nn.Conv2D(channels=64, kernel_size=2, strides=1, activation='relu'), # nn.Conv2D(channels=64, kernel_size=4, strides=2, activation='relu'), # nn.Conv2D(channels=64, kernel_size=3, strides=1, activation='relu'), nn.Flatten()) self.fully_connected = nn.Dense(512, activation='relu') self.value = nn.Dense(8)
def __init__(self, in_channels, out_channels, kernel_size, strides, padding, dilation=1, groups=1, use_bias=False, use_bn=True, bn_epsilon=1e-5, bn_use_global_stats=False, bn_cudnn_off=False, activation=(lambda: nn.Activation("relu")), dropout_rate=0.0, **kwargs): super(DwsConvBlock1d, self).__init__(**kwargs) self.activate = (activation is not None) self.use_bn = use_bn self.use_dropout = (dropout_rate != 0.0) self.use_channel_shuffle = (groups > 1) with self.name_scope(): self.dw_conv = nn.Conv1D(channels=in_channels, kernel_size=kernel_size, strides=strides, padding=padding, dilation=dilation, groups=in_channels, use_bias=use_bias, in_channels=in_channels) self.pw_conv = conv1d1(in_channels=in_channels, out_channels=out_channels, groups=groups, use_bias=use_bias) if self.use_channel_shuffle: self.shuffle = ChannelShuffle(channels=out_channels, groups=groups) if self.use_bn: self.bn = BatchNormExtra(in_channels=out_channels, epsilon=bn_epsilon, use_global_stats=bn_use_global_stats, cudnn_off=bn_cudnn_off) if self.activate: self.activ = activation() if self.use_dropout: self.dropout = nn.Dropout(rate=dropout_rate)
def __init__(self, mu=256, n_residue=32, n_skip=512, dilation_depth=10, n_repeat=5): # mu: audio quantization size # n_residue: residue channels # n_skip: skip channels # dilation_depth & n_repeat: dilation layer setup super(WaveNet, self).__init__() self.dilation_depth = dilation_depth self.dilations = [2**i for i in range(dilation_depth)] * n_repeat with self.name_scope(): self.one_hot = One_Hot(mu) self.from_input = nn.Conv1D(in_channels=mu, channels=n_residue, kernel_size=1) self.conv_sigmoid = nn.Sequential() self.conv_tanh = nn.Sequential() self.skip_scale = nn.Sequential() self.residue_scale = nn.Sequential() for d in self.dilations: self.conv_sigmoid.add( nn.Conv1D(in_channels=n_residue, channels=n_residue, kernel_size=2, dilation=d)) self.conv_tanh.add( nn.Conv1D(in_channels=n_residue, channels=n_residue, kernel_size=2, dilation=d)) self.skip_scale.add( nn.Conv1D(in_channels=n_residue, channels=n_skip, kernel_size=1, dilation=d)) self.residue_scale.add( nn.Conv1D(in_channels=n_residue, channels=n_residue, kernel_size=1, dilation=d)) self.conv_post_1 = nn.Conv1D(in_channels=n_skip, channels=n_skip, kernel_size=1) self.conv_post_2 = nn.Conv1D(in_channels=n_skip, channels=mu, kernel_size=1)
def __init__(self, num_series, conv_hid, gru_hid, skip_gru_hid, skip, ar_window): super(LSTNet, self).__init__() kernel_size = 6 dropout_rate = 0.2 self.skip = skip self.ar_window = ar_window with self.name_scope(): self.conv = nn.Conv1D(conv_hid, kernel_size=kernel_size, layout='NCW', activation='relu') self.dropout = nn.Dropout(dropout_rate) self.gru = rnn.GRU(gru_hid, layout='TNC') self.skip_gru = rnn.GRU(skip_gru_hid, layout='TNC') self.fc = nn.Dense(num_series) self.ar_fc = nn.Dense(1)
def __init__( self, channels: int, kernel_size: int, activation: str = "tanh", **kwargs, ): super(CausalConv1D, self).__init__(**kwargs) self.kernel_size = kernel_size self.channels = channels with self.name_scope(): self.net = nn.Conv1D( channels, kernel_size, use_bias=False, activation="tanh", weight_initializer=init.Xavier(), )
def __init__(self, num_series, conv_hid, gru_hid, skip_gru_hid, skip, ar_window): super(LSTNet, self).__init__() kernel_size = 6 # in this case looks at 6 hour data window dropout_rate = 0.2 # for regularization self.skip = skip # determines the seasonality/cycles self.ar_window = ar_window with self.name_scope(): # define specific layers for the model self.conv = nn.Conv1D(conv_hid, kernel_size=kernel_size, layout='NCW', activation='relu') self.dropout = nn.Dropout(dropout_rate) self.gru = rnn.GRU(gru_hid, layout='TNC') self.skip_gru = rnn.GRU(skip_gru_hid, layout='TNC') self.fc = nn.Dense(num_series) self.ar_fc = nn.Dense(1)
def SequentialTextCNN(config): net = nn.Sequential() with net.name_scope(): net.add( nn.Embedding(input_dim=config['vocab_size'], output_dim=config['embedding_dim'])) net.add(nn.Lambda(lambda x: x.transpose((0, 2, 1)))) net.add( nn.Conv1D(channels=config['feature_map'], kernel_size=config['kernel_size'][0], strides=1)) net.add(nn.BatchNorm(axis=1)) net.add(nn.Activation('relu')) net.add(nn.GlobalMaxPool1D()) net.add(nn.Dropout(rate=config['dropout_rate'])) net.add(nn.Dense(units=2)) return net
def __init__(self, embedding_size, field_num, layer_size, ctx, **kwargs): super(CIN, self).__init__(**kwargs) self.embedding_size = embedding_size self.field_num = field_num self.ctx = ctx self.layer_size = layer_size self.field_nums = [self.field_num] self.conv_list = [] for idx, size in enumerate(self.layer_size): self.conv_list.append( nn.Conv1D(channels=size, kernel_size=1, strides=1, padding=0, activation='relu', in_channels=self.field_nums[0] * self.field_nums[-1])) self.field_nums.append(size) self.register_child(self.conv_list[idx])
def __init__(self, k, d, out_channels, in_channels=1, dropout=0.4, prefix=None): super(TemporalBlock, self).__init__(prefix=prefix) with self.name_scope(): self.in_channels = in_channels self.kernel_size = k self.dilation = d self.out_channels = out_channels self.net = gluon.nn.Sequential() self.conv1 = gluon.nn.Conv1D(out_channels, k, in_channels=in_channels, dilation=d, padding=d * (k - 1)) self.cut1 = CutRight(d * (k - 1)) self.bn1 = gluon.nn.BatchNorm() self.relu1 = gluon.nn.Activation('relu') self.dropout1 = gluon.nn.Dropout(dropout) self.conv2 = gluon.nn.Conv1D(out_channels, k, in_channels=out_channels, dilation=d, padding=d * (k - 1)) self.cut2 = CutRight(d * (k - 1)) self.bn2 = gluon.nn.BatchNorm() self.relu2 = gluon.nn.Activation('relu') self.dropout2 = gluon.nn.Dropout(dropout) layers = [ self.conv1, self.cut1, self.bn1, self.relu1, self.dropout1, self.conv2, self.cut2, self.bn2, self.relu2, self.dropout2 ] for layer in layers: self.net.add(layer) self.relu = nn.Activation('relu') self.downsample = nn.Conv1D( out_channels, 1) if in_channels != out_channels else None
def __init__( self, channels: int, kernel_size: Union[int, Tuple[int], List[int]], dilation: Union[int, Tuple[int], List[int]] = 1, activation: Optional[str] = None, **kwargs, ): super(CausalConv1D, self).__init__(**kwargs) self.dilation = _get_int(dilation) self.kernel_size = _get_int(kernel_size) self.padding = self.dilation * (self.kernel_size - 1) self.conv1d = nn.Conv1D( channels=channels, kernel_size=self.kernel_size, dilation=self.dilation, padding=self.padding, activation=activation, **kwargs, )
def build(self): with self.name_scope(): self.conv = nn.Conv1D(channels=5,kernel_size=3) #self.mp = nn.MaxPool1D(pool_size=2) self.flatten = nn.Flatten() self.dense0 = nn.Dense(128,activation='tanh') self.dense1 = nn.Dense(64,activation='tanh') self.dense2 = nn.Dense(32,activation='tanh') self.dense3 = nn.Dense(16,activation='tanh') if self.bn: print(self.bn) self.bn0 = nn.BatchNorm(axis=1) self.bn1 = nn.BatchNorm(axis=1) self.bn2 = nn.BatchNorm(axis=1) self.bn3 = nn.BatchNorm(axis=1) if self.dropout is not None: self.dropout0 = nn.Dropout(self.dropout[0]) self.dropout1 = nn.Dropout(self.dropout[1]) self.dropout2 = nn.Dropout(self.dropout[2]) self.dropout3 = nn.Dropout(self.dropout[3]) self.output = nn.Dense(1)
def __init__( self, channels: int, kernel_size: int, dilation: int = 1, activation: Optional[str] = None, **kwargs, ): super(CausalConv1D, self).__init__(**kwargs) self.dilation = dilation self.kernel_size = kernel_size self.padding = dilation * (kernel_size - 1) self.conv1d = nn.Conv1D( channels=channels, kernel_size=kernel_size, dilation=dilation, padding=self.padding, activation=activation, **kwargs, )
def __init__(self, filters=512, kernel_size=5, strides=1, padding=None, *args, **kwargs): super(ConvBlock, self).__init__(*args, **kwargs) if padding is None: padding = (kernel_size - 1) // 2 with self.name_scope(): self._stages = nn.HybridSequential() self._stages.add( nn.Conv1D(channels=filters, kernel_size=kernel_size, strides=strides, padding=padding)) self._stages.add(nn.BatchNorm()) self._stages.add(nn.Activation('relu'))
def __init__(self, vocab, embedding_size, ngram_kernel_sizes, nums_channels, num_outputs, **kwargs): super(TextCNN, self).__init__(**kwargs) self.ngram_kernel_sizes = ngram_kernel_sizes self.embedding_static = nn.Embedding(len(vocab), embedding_size) self.embedding_non_static = nn.Embedding(len(vocab), embedding_size) for i in range(len(ngram_kernel_sizes)): # 一维卷积层。 conv = nn.Conv1D(nums_channels[i], kernel_size=ngram_kernel_sizes[i], strides=1, activation='relu') # 时序最大池化层。 bn = nn.BatchNorm() pool = nn.GlobalMaxPool1D() # 将 self.conv_{i} 置为第 i 个 conv。 setattr(self, 'conv_{i}', conv) setattr(self, 'bn_{i}', bn) # 将 self.pool_{i} 置为第 i 个 pool。 setattr(self, 'pool_{i}', pool) self.dropout = nn.Dropout(0.5) self.decoder = nn.Dense(num_outputs)
def __init__(self, input_shape, filter_list, num_filters, dropout=0., Attention=True, **kwargs): super(TemporalConvNet, self).__init__(**kwargs) self.c = input_shape[1] # input: n,c,w print("Input_feature_shape: ", input_shape) with self.name_scope(): self._convNet = nn.HybridSequential() for i, kernel_size in enumerate(filter_list): dilation = 2**i padding = (kernel_size - 1) * dilation nb_filter = num_filters[i] self._convNet.add( nn.Conv1D(channels=nb_filter, kernel_size=kernel_size, strides=1, padding=padding, dilation=dilation, layout='NCW', activation='relu'), nn.BatchNorm(axis=2), #nn.Conv1D(channels=nb_filter, kernel_size=kernel_size, strides=1, padding=padding, dilation=dilation, layout='NCW', activation='relu'), #nn.BatchNorm(axis=2), Chomp1d(padding), nn.Dropout(dropout), Self_Attn1D(nb_filter, 'relu'), ) #print("Attention: ",Attention) #if Attention: #self._convNet.add(Self_Attn1D(nb_filter, 'relu')) #convNet: (n, nb_filter, w) #self._downsample = nn.Conv1D(channels=self.c, kernel_size=1) self._downsample = None
def conv1d1(in_channels, out_channels, strides=1, groups=1, use_bias=False): """ 1-dim kernel version of the 1D convolution layer. Parameters: ---------- in_channels : int Number of input channels. out_channels : int Number of output channels. strides : int, default 1 Strides of the convolution. groups : int, default 1 Number of groups. use_bias : bool, default False Whether the layer uses a bias vector. """ return nn.Conv1D(channels=out_channels, kernel_size=1, strides=strides, groups=groups, use_bias=use_bias, in_channels=in_channels)
def __init__(self, ctx=mx.cpu(), warmup=5, runs=25, inputs=None): # Set the default Inputs default_parameters = { "data": (32, 3, 256), "data_initializer": nd.normal, "channels": 64, "kernel_size": 3, "strides": 1, "padding": 1, "dilation": 1, "layout": "NCW", "activation": None, "run_backward": True, "dtype": "float32" } super().__init__(ctx=ctx, warmup=warmup, runs=runs, default_parameters=default_parameters, custom_parameters=inputs) self.data = get_mx_ndarray(ctx=self.ctx, in_tensor=self.inputs["data"], dtype=self.inputs["dtype"], initializer=self.inputs["data_initializer"], attach_grad=self.inputs["run_backward"]) self.block = nn.Conv1D(channels=self.inputs["channels"], kernel_size=self.inputs["kernel_size"], strides=self.inputs["strides"], padding=self.inputs["padding"], dilation=self.inputs["dilation"], layout=self.inputs["layout"], activation=self.inputs["activation"]) self.block.initialize(ctx=self.ctx)
def __init__(self, K, channels, in_channels=0, with_bn=True, activation='relu', pooling='average', cpu_mode=False): """EdgeConv Args: K: int, number of neighbors in_channels: # of input channels channels: tuple of output channels pooling: pooling method ('max' or 'average') Inputs: points: (N, C_p, P) features: (N, C_0, P) Returns: transformed points: (N, C_out, P), C_out = channels[-1] """ super(EdgeConv, self).__init__() self.K = K self.pooling = pooling if self.pooling not in ('max', 'average'): raise RuntimeError('Pooling method should be "max" or "average"') with self.name_scope(): self.batch_distance_matrix = BatchDistanceMatrix() self.knn = NearestNeighborsFromIndices(K, cpu_mode=cpu_mode) self.convs = [] self.bns = [] self.acts = [] for idx, C in enumerate(channels): self.convs.append(nn.Conv2D(channels=C, kernel_size=(1, 1), strides=(1, 1), use_bias=False if with_bn else True, in_channels=2 * in_channels if idx == 0 else channels[idx - 1], weight_initializer=mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2))) self.register_child(self.convs[-1], 'conv_%d' % idx) self.bns.append(nn.BatchNorm() if with_bn else None) self.register_child(self.bns[-1], 'bn_%d' % idx) self.acts.append(nn.Activation(activation) if activation else None) self.register_child(self.acts[-1], 'act_%d' % idx) if channels[-1] == in_channels: self.sc_conv = None else: self.sc_conv = nn.Conv1D(channels=channels[-1], kernel_size=1, strides=1, use_bias=False if with_bn else True, in_channels=in_channels, weight_initializer=mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2)) self.sc_bn = nn.BatchNorm() if with_bn else None self.sc_act = nn.Activation(activation) if activation else None
def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=None, dilation=1, bias=True, init_gain='linear'): super(ConvNorm, self).__init__() self._init = {'weight_initializer': CustomXavier(init_gain)} if padding is None: assert kernel_size % 2 == 1 padding = dilation * (kernel_size - 1) // 2 self._conv = nn.Conv1D(out_channels, in_channels=in_channels, kernel_size=kernel_size, strides=stride, padding=padding, dilation=dilation, use_bias=bias, **self._init)
def conv1d(channels, kernel_size, in_channels, use_bias=True, **kwargs): """ Conv1D with better default initialization. """ n = in_channels kernel_size = (kernel_size if isinstance(kernel_size, list) else [kernel_size]) for k in kernel_size: n *= k stdv = 1.0 / math.sqrt(n) winit = mx.initializer.Uniform(stdv) if use_bias: binit = mx.initializer.Uniform(stdv) else: binit = "zeros" return nn.Conv1D( channels=channels, kernel_size=kernel_size, in_channels=in_channels, use_bias=use_bias, weight_initializer=winit, bias_initializer=binit, **kwargs, )
def __init__(self, classes=7, **kwargs): super(GluonCrepe, self).__init__(**kwargs) self.NUM_FILTERS = 256 # number of convolutional filters per convolutional layer self.NUM_OUTPUTS = classes # number of classes self.FULLY_CONNECTED = 1024 # number of unit in the fully connected dense layer self.features = nn.HybridSequential() with self.name_scope(): self.features.add( nn.Conv1D(channels=self.NUM_FILTERS, kernel_size=7, activation='relu'), nn.MaxPool1D(pool_size=3, strides=3), nn.Conv1D(channels=self.NUM_FILTERS, kernel_size=7, activation='relu'), nn.MaxPool1D(pool_size=3, strides=3), nn.Conv1D(channels=self.NUM_FILTERS, kernel_size=3, activation='relu'), nn.Conv1D(channels=self.NUM_FILTERS, kernel_size=3, activation='relu'), nn.Conv1D(channels=self.NUM_FILTERS, kernel_size=3, activation='relu'), nn.Conv1D(channels=self.NUM_FILTERS, kernel_size=3, activation='relu'), nn.MaxPool1D(pool_size=3, strides=3), nn.Flatten(), nn.Dense(self.FULLY_CONNECTED, activation='relu'), nn.Dense(self.FULLY_CONNECTED, activation='relu'), ) self.output = nn.Dense(self.NUM_OUTPUTS)
def __init__(self, feature_dict, args, ctx, task, **kwargs): """{"sparse":[SingleFeat],"dense":[SingleFeat]}""" super(xDeepFM, self).__init__(**kwargs) # ?? util.mkdir_if_not_exist(args.SAVE_PARAMS_PATH_PREFIX) # self.feature_sizes = args.FEATURE_SIZE self.field_size = args.FIELD_NUM self.feature_dict = feature_dict print('field_size:') print(self.field_size) if args.TASK == 'finish': self.embedding_size = args.FINISH_EMBEDDING_SIZE self.batch_size = args.FINISH_BATCH_SIZE else: self.embedding_size = args.LIKE_EMBEDDING_SIZE self.batch_size = args.LIKE_BATCH_SIZE self.config_name = args.CONFIG_NAME # self.dropout_prob = args.DROPOUT_PROB self.task = task # self.loss = gloss.SigmoidBinaryCrossEntropyLoss() if args.LOSS == 'l2loss': self.loss = gloss.L2Loss() else: self.loss = gloss.SigmoidBinaryCrossEntropyLoss() self.ctx = ctx self.embedding_dict = OrderedDict() self.dense_dict = OrderedDict() with self.name_scope(): if self.task == 'finish': self.layer_list = [np.int(x) for x in args.FINISH_LAYER] self.dropout = args.FINISH_DROPOUT_PROB else: self.layer_list = [np.int(x) for x in args.LIKE_LAYER] self.dropout = args.LIKE_DROPOUT_PROB # self.params.get('v',shape=(self.field_size,self.embedding_size)) self.dnn_out = nn.Dense(1, use_bias=False) self.register_child(self.dnn_out) for feat in feature_dict['sparse']: self.embedding_dict[feat.feat_name] = nn.Embedding( feat.feat_num, self.embedding_size) for feat in feature_dict['dense']: self.dense_dict[feat.feat_name] = nn.Dense(self.embedding_size) for emb_k, emb_v in self.embedding_dict.items(): self.register_child(emb_v) for den_k, den_v in self.dense_dict.items(): self.register_child(den_v) self.linear_logit_dense = nn.Dense(1, use_bias=False) self.register_child(self.linear_logit_dense) self.linear_logit_embedding_bn = nn.BatchNorm() self.register_child(self.linear_logit_embedding_bn) self.dense_list = [] self.dropout_list = [] self.bn_list = [] self.activation_list = [] for i in range(len(self.layer_list)): self.dense_list.append(nn.Dense(self.layer_list[i])) self.dropout_list.append(nn.Dropout(self.dropout)) self.bn_list.append(nn.BatchNorm()) self.activation_list.append(nn.Activation('relu')) self.register_child(self.dense_list[i]) self.register_child(self.dropout_list[i]) self.register_child(self.bn_list[i]) self.register_child(self.activation_list[i]) # if True: print('true') self.layer_size = [np.int(x) for x in args.CONV1D_LAYER] # self.cin_net = CIN(self.embedding_size,self.field_size, (128, 64), self.ctx) # print('oo') # self.cin_net.initialize() # print('uu') # self.register_child(self.cin_net) self.cin_dense = nn.Dense(1) self.register_child(self.cin_dense) self.cin_bn = nn.BatchNorm() self.register_child(self.cin_bn) self.field_nums = [self.field_size] self.conv_list = [] for idx, size in enumerate(self.layer_size): self.conv_list.append( nn.Conv1D(channels=size, kernel_size=1, strides=1, padding=0, activation='relu', in_channels=self.field_nums[0] * self.field_nums[-1], weight_initializer=init.Uniform())) self.field_nums.append(size) self.register_child(self.conv_list[idx])
def _make_layer(self, stage_index, block, planes, blocks, strides=1, dilation=1, pre_dilation=1, avg_down=False, norm_layer=None, last_gamma=False, dropblock_prob=0, input_size=224, use_splat=False, avd=False): downsample = None if strides != 1 or self.inplanes != planes * block.expansion: downsample = nn.HybridSequential(prefix='down%d_' % stage_index) with downsample.name_scope(): if avg_down: if pre_dilation == 1: downsample.add( nn.AvgPool1D(pool_size=strides, strides=strides, ceil_mode=True, count_include_pad=False)) elif strides == 1: downsample.add( nn.AvgPool1D(pool_size=1, strides=1, ceil_mode=True, count_include_pad=False)) else: downsample.add( nn.AvgPool1D(pool_size=pre_dilation * strides, strides=strides, padding=1, ceil_mode=True, count_include_pad=False)) downsample.add( nn.Conv1D(channels=planes * block.expansion, kernel_size=1, strides=1, use_bias=False, in_channels=self.inplanes)) downsample.add( norm_layer(in_channels=planes * block.expansion, **self.norm_kwargs)) else: downsample.add( nn.Conv1D(channels=planes * block.expansion, kernel_size=1, strides=strides, use_bias=False, in_channels=self.inplanes)) downsample.add( norm_layer(in_channels=planes * block.expansion, **self.norm_kwargs)) layers = nn.HybridSequential(prefix='layers%d_' % stage_index) with layers.name_scope(): if dilation in (1, 2): layers.add( block(planes, cardinality=self.cardinality, bottleneck_width=self.bottleneck_width, strides=strides, dilation=pre_dilation, downsample=downsample, previous_dilation=dilation, norm_layer=norm_layer, norm_kwargs=self.norm_kwargs, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd, avd_first=self.avd_first, radix=self.radix, in_channels=self.inplanes, split_drop_ratio=self.split_drop_ratio)) elif dilation == 4: layers.add( block(planes, cardinality=self.cardinality, bottleneck_width=self.bottleneck_width, strides=strides, dilation=pre_dilation, downsample=downsample, previous_dilation=dilation, norm_layer=norm_layer, norm_kwargs=self.norm_kwargs, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd, avd_first=self.avd_first, radix=self.radix, in_channels=self.inplanes, split_drop_ratio=self.split_drop_ratio)) else: raise RuntimeError( "=> unknown dilation size: {}".format(dilation)) input_size = _update_input_size(input_size, strides) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.add( block(planes, cardinality=self.cardinality, bottleneck_width=self.bottleneck_width, dilation=dilation, previous_dilation=dilation, norm_layer=norm_layer, norm_kwargs=self.norm_kwargs, last_gamma=last_gamma, dropblock_prob=dropblock_prob, input_size=input_size, use_splat=use_splat, avd=avd, avd_first=self.avd_first, radix=self.radix, in_channels=self.inplanes, split_drop_ratio=self.split_drop_ratio)) return layers