def block(n_in, n_out, k, stride, is_layer_norm=False, is_group_norm=False, conv_bias=False): def make_conv(): conv = nn.Conv1d(n_in, n_out, k, stride=stride, bias=conv_bias) nn.init.kaiming_normal_(conv.weight) return conv assert ( is_layer_norm and is_group_norm ) == False, "layer norm and group norm are exclusive" if is_layer_norm: return nn.Sequential( make_conv(), nn.Dropout(p=dropout), nn.Sequential( TransposeLast(), Fp32LayerNorm(dim, elementwise_affine=True), TransposeLast(), ), nn.GELU(), ) elif is_group_norm: return nn.Sequential( make_conv(), nn.Dropout(p=dropout), Fp32GroupNorm(dim, dim, affine=True), nn.GELU(), ) else: return nn.Sequential(make_conv(), nn.Dropout(p=dropout), nn.GELU())
def make_aggregator(): if args.aggregator == "cnn": agg_layers = eval(args.conv_aggregator_layers) agg_dim = agg_layers[-1][0] feature_aggregator = ConvAggegator( conv_layers=agg_layers, embed=embed, dropout=args.dropout, skip_connections=args.skip_connections_agg, residual_scale=args.residual_scale, non_affine_group_norm=args.non_affine_group_norm, conv_bias=not args.no_conv_bias, zero_pad=args.agg_zero_pad, activation=activation, ) elif args.aggregator == "gru": agg_dim = args.gru_dim feature_aggregator = nn.Sequential( TransposeLast(), nn.GRU( input_size=embed, hidden_size=agg_dim, num_layers=1, dropout=args.dropout, ), TransposeLast(deconstruct_idx=0), ) else: raise Exception("unknown aggregator type " + args.aggregator) return feature_aggregator, agg_dim
def norm_block(is_layer_norm, dim, affine=True): if is_layer_norm: mod = nn.Sequential( TransposeLast(), Fp32LayerNorm(dim, elementwise_affine=affine), TransposeLast(), ) else: mod = Fp32GroupNorm(1, dim, affine=affine) return mod