示例#1
0
    def __init__(self, output_dim, embedding_dim, hidden_dim, dropout_rate, n_layers,
            bos, eos, pad, ls_weight, labeldist):
        super(LM, self).__init__()

        self.bos, self.eos, self.pad = bos, eos, pad
        self.embedding = torch.nn.Embedding(output_dim, embedding_dim, padding_idx=pad)
        self.LSTM = torch.nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers, batch_first=True, 
                dropout=dropout_rate if n_layers > 1 else 0)

        # re-init
        weight_init(self.LSTM)

        self.output_layer = torch.nn.Linear(hidden_dim, output_dim)
        self.dropout_layer = torch.nn.Dropout(p=dropout_rate)

        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.dropout_rate = dropout_rate
        self.n_layers = n_layers

        # label smoothing hyperparameters
        self.ls_weight = ls_weight
        self.labeldist = labeldist
        if labeldist is not None:
            self.vlabeldist = cc(torch.from_numpy(np.array(labeldist, dtype=np.float32)))
示例#2
0
def init_params(model, args):
    # - reinitialize all parameters according to default initialization
    model.apply(utils.weight_reset)
    # - initialize parameters according to chosen custom initialization (if requested)
    if hasattr(args, 'init_weight') and not args.init_weight == "standard":
        utils.weight_init(model, strategy="xavier_normal")
    if hasattr(args, 'init_bias') and not args.init_bias == "standard":
        utils.bias_init(model, strategy="constant", value=0.01)
    # - use pre-trained weights (either for full model or just in conv-layers)?
    if utils.checkattr(args, "pre_convE") and hasattr(
            model, 'depth') and model.depth > 0:
        load_name = model.convE.name if (
            not hasattr(args, 'convE_ltag')
            or args.convE_ltag == "none") else "{}-{}".format(
                model.convE.name, args.convE_ltag)
        utils.load_checkpoint(model.convE,
                              model_dir=args.m_dir,
                              name=load_name)
    if utils.checkattr(args, "pre_convD") and hasattr(
            model, 'convD') and model.depth > 0:
        utils.load_checkpoint(model.convD, model_dir=args.m_dir)
    return model


##-------------------------------------------------------------------------------------------------------------------##
示例#3
0
    def weights_init(self,
                     init_list,
                     vae_list,
                     flow_list=None,
                     pretrained=None,
                     filters_list=None,
                     logvar=-10.):
        self.apply(
            utils.weight_init(module=nn.Conv2d, initf=nn.init.xavier_normal_))
        self.apply(
            utils.weight_init(module=nn.Linear, initf=nn.init.xavier_normal_))
        self.apply(
            utils.weight_init(module=bayes.LogScaleConv2d,
                              initf=utils.const_init(logvar)))
        self.apply(
            utils.weight_init(module=bayes.LogScaleLinear,
                              initf=utils.const_init(logvar)))

        if len(init_list) > 0 and init_list[0] == 'pretrained':
            assert len(init_list) == 1
            w_pretrained = torch.load(pretrained)
            for k, v in w_pretrained.items():
                if k in self.state_dict():
                    self.state_dict()[k].data.copy_(v)
                else:
                    tokens = k.split('.')
                    self.state_dict()['.'.join(tokens[:2] + ['mean'] +
                                               tokens[-1:])].data.copy_(v)
            return

        convs = [self.features.conv1, self.features.conv2]
        for i, m in enumerate(convs):
            init = init_list[i] if i < len(init_list) else 'xavier'
            w = m.mean.weight if isinstance(m, bayes._Bayes) else m.weight
            if init == 'vae':
                vae_path = vae_list[i]
                vae = utils.load_vae(vae_path, device=self.device)
                z = torch.randn(
                    w.size(0) * w.size(1), vae.encoder.z_dim, 1,
                    1).to(vae.device)
                x = vae.decode(z)[0]
                w.data = x.reshape(w.shape)
            elif init == 'flow':
                flow_path = flow_list[i]
                flow = utils.load_flow(flow_path, device=self.device)
                utils.flow_init(flow)(w)
            elif init == 'xavier':
                pass
            elif init == 'filters':
                filters = np.load(filters_list[i])
                N = np.prod(w.shape[:2])
                filters = filters[np.random.permutation(len(filters))[:N]]
                w.data = torch.from_numpy(filters.reshape(*w.shape)).to(
                    self.device)
            else:
                raise NotImplementedError
示例#4
0
 def finetune_from(self, path):
     weight_init(self)
     weights = torch.load(path, map_location='cpu')
     load_state = weights.state_dict()
     own_state = self.state_dict()
     for name, param in load_state.items():
         if name not in own_state:
             continue
         if 'head' in name:
             continue
         own_state[name].copy_(param)
 def __init__(self, args):
     super(FiLM, self).__init__()
     # CNN
     self.cnn = CNN(args)
     # FiLM Generator
     self.film_generator = FiLM_Generator(args)
     # FiLM-ed Network
     self.filmed_network = FiLMed_Network(args)
     # weight initialization
     weight_init(self.modules())
     # model device cfg
     self.to(args.device)
    def __init__(
        self,
        hidden_dims=[30, 30],
        input_dim=4,
        output_dim=1,
        grad_clip=5,
        reg=0.01,
        dropout=0.7,
        beta=0.05,
        nu=0.05,
    ):
        """
        Create a simple multi-layer perceptron network with a number of hidden
        layers.
        
        Input:
            hidden_dims : A tuple or list of hidden dimensions
            input_dim   : Dimensions of the input (x)
            output_dim  : Dimensions of the output/prediction (y_hat)
            grad_clip   : Clips the gradient for stability in gradient descent
                Either False (disable gradient clipping) or a number maximum abs 
                value of the gradient
            reg         : L2 regularization scale (==0.0 for no regularization)
            dropout     : The probability of keeping each neuron in dropout
            beta        : (β) Adaption gain (or learning rate)
            nu          : (ν) E-mod gain
        """
        self.n_hidden = len(hidden_dims)
        self.M = output_dim
        self.D = input_dim
        self.grad_clip = grad_clip
        self.params = {}
        self.reg = reg  # Strength of L2 regulariation
        self.dropout = dropout  # The `keep` probability for dropout
        self.beta = beta
        self.nu = nu

        # For each layer get the input and output dimensions
        input_dims = [input_dim] + hidden_dims  # By default: [4, 10, 10]
        output_dims = hidden_dims + [output_dim]  # By default: [10, 10, 1]

        # Create all of the initial weights and biases, save to dictionary (aka hashtable)
        self.params = {}
        for l in range(self.n_hidden):
            i, o = input_dims[l], output_dims[l]
            self.params["w" + str(l)] = weight_init(i, o)
            self.params["b" + str(l)] = bias_init(o)
        self.params["w_out"] = weight_init(input_dims[-1], output_dim)
        self.params["b_out"] = bias_init(output_dim)

        # This part allows for data whitening at the output layer
        self.whiten_mu = np.zeros((output_dim))
        self.whiten_sigma = np.ones((output_dim))
示例#7
0
    def __init__(self, num_classes: int):
        super().__init__()

        self.num_classes = num_classes
        self.base_net = MobileNetV1(pretrained=True).model
        self.source_layer_indexes = [12, 14]
        self.extras = ModuleList([
            Sequential(
                Conv2d(in_channels=1024, out_channels=256, kernel_size=1),
                ReLU(),
                Conv2d(in_channels=256,
                       out_channels=512,
                       kernel_size=3,
                       stride=2,
                       padding=1), ReLU()),
            Sequential(
                Conv2d(in_channels=512, out_channels=128, kernel_size=1),
                ReLU(),
                Conv2d(in_channels=128,
                       out_channels=256,
                       kernel_size=3,
                       stride=2,
                       padding=1), ReLU()),
            Sequential(
                Conv2d(in_channels=256, out_channels=128, kernel_size=1),
                ReLU(),
                Conv2d(in_channels=128,
                       out_channels=256,
                       kernel_size=3,
                       stride=2,
                       padding=1), ReLU()),
            Sequential(
                Conv2d(in_channels=256, out_channels=128, kernel_size=1),
                ReLU(),
                Conv2d(in_channels=128,
                       out_channels=256,
                       kernel_size=3,
                       stride=2,
                       padding=1), ReLU())
        ])
        weight_init(self.extras)

        out_channels = 256
        self.fpn = FPN([512, 1024, 512, 256, 256, 256], out_channels)

        out_channels_list = [out_channels] * 6
        self.head = SSDHead(
            num_classes, out_channels_list, self.sizes,
            AnchorCellCreator(self.aspect_ratios, smin=0.2, smax=0.95))
示例#8
0
    def __init__(self, args):
        super(RN, self).__init__()
        self.args = args
        self.cnn = CNN(args)  # (N,C,H,W)
        self.pos = self.get_positional_encoding(args)  # (1,2,H,W)
        self.rn_g = RN_G(args)
        self.rn_f = RN_F(args)
        cls_ch = args.rn_f_chs.split(",")[-1]
        cls_ch = int(cls_ch[:-1]) if cls_ch[-1].lower() == "d" else int(cls_ch)
        self.classifier = nn.Linear(cls_ch, args.num_cat)
        if args.rn_extension:
            self.ref_finder = RefFinder(args)

        weight_init(self.modules())
        self.to(args.device)
示例#9
0
    def finetune_from(self, path):
        weight_init(self)
        weights = torch.load(path, map_location='cpu')
        load_state = weights.state_dict()
        own_state = self.state_dict()
        for name, param in load_state.items():
            if name not in own_state:
                continue
            if 'head' in name:
                continue
            own_state[name].copy_(param)

# model = Resnet50_FPN_SSD(2)
# x = torch.zeros((1, 3, 300, 300))
#
# o = model(x)
示例#10
0
def init_params(model, args):
    # - reinitialize all parameters according to default initialization
    model.apply(utils.weight_reset)
    # - initialize parameters according to chosen custom initialization (if requested)
    if hasattr(args, 'init_weight') and not args.init_weight == "standard":
        utils.weight_init(model, strategy="xavier_normal")
    if hasattr(args, 'init_bias') and not args.init_bias == "standard":
        utils.bias_init(model, strategy="constant", value=0.01)
    # - use pre-trained weights in conv-layers
    load_name = "{}-e100".format(model.convE.name)
    utils.load_checkpoint(model.convE,
                          model_dir='./conv_layers',
                          name=load_name)
    # - freeze weights of conv-layers?
    if utils.checkattr(args, "bir"):
        for param in model.convE.parameters():
            param.requires_grad = False
    return model
    def __init__(self,
                 img_channel,
                 base_channel,
                 wn,
                 bn,
                 conv_dcp,
                 ca,
                 upscale_factor,
                 num_blocks,
                 wa_rate=1,
                 residual_rescale=1):
        """
        init the model described in my paper.
        :param img_channel: input image channels, RGB, YCbCr: 3, grey: 1.
        :param base_channel: base feature channels in the network, int.
                             if wa_rate=1, every feature map in residual blocks should have 'base_channel' channels.
        :param wn: if use weight normalization, bool.
        :param bn: if use batch normalization, bool.
        :param conv_dcp: if use asymmetric decomposed convolution layer in residual blocks, bool.
        :param ca: if use channel attention mechanism in residual blocks, bool.
        :param upscale_factor: int, can be any positive integer number.
        :param num_blocks: how many residual blocks in the network.
        :param wa_rate: widen activation layer rate, float, wa_rate>=1. if wa_rate=1, this mechanism is disabled.
        :param residual_rescale: rescale the activation value of residual blocks. Disabled by default.
        """
        nn.Module.__init__(self)

        channel_lo = round(base_channel / wa_rate)
        channel_hi = round(base_channel * wa_rate)
        if channel_lo * 1.2 < img_channel * upscale_factor * upscale_factor:
            raise Warning(
                'wide activation rate not fit upscale factor or too less base channels.'
            )

        self.base_feat_extract = conv_base(img_channel, channel_lo, wn, 3, 1,
                                           1)
        block_list = [
            res_block(channel_lo, channel_hi, wn, bn, conv_dcp, ca,
                      residual_rescale) for _ in range(num_blocks)
        ]
        self.res_blocks = nn.Sequential(*block_list)
        self.upsampler = upsampler(channel_lo, img_channel, upscale_factor, wn)

        weight_init(self)
示例#12
0
    def __init__(self, opt):
        super(MUNIT, self).__init__()

        # generators and discriminators
        self.gen_a = Generator(opt.ngf, opt.style_dim, opt.mlp_dim)
        self.gen_b = Generator(opt.ngf, opt.style_dim, opt.mlp_dim)
        self.dis_a = Discriminator(opt.ndf)
        self.dis_b = Discriminator(opt.ndf)
        #random style code
        self.s_a = torch.randn(opt.display_size,
                               opt.style_dim,
                               1,
                               1,
                               requires_grad=True).cuda()
        self.s_b = torch.randn(opt.display_size,
                               opt.style_dim,
                               1,
                               1,
                               requires_grad=True).cuda()

        #optimizers
        dis_params = list(self.dis_a.parameters()) + list(
            self.dis_b.parameters())
        gen_params = list(self.gen_a.parameters()) + list(
            self.gen_b.parameters())
        self.dis_opt = torch.optim.Adam(dis_params,
                                        lr=opt.lr,
                                        beta=opt.beta1,
                                        weight_delay=opt.weight_delay)
        self.gen_opt = torch.optim.Adam(gen_params,
                                        lr=opt.lr,
                                        beta=opt.beta1,
                                        weight_delay=opt.weight_delay)

        # nerwork weight initialization
        self.apply(weight_init('kaiming'))
        self.dis_a.apply(weight_init('gaussian'))
        self.dis_b.apply(weight_init('gaussian'))
示例#13
0
    dis = models.ResNet32Discriminator(N_CHANNEL, 1, N_FILTERS_D, BATCH_NORM_D)
elif MODEL == "dcgan":
    gen = models.DCGAN32Generator(N_LATENT,
                                  N_CHANNEL,
                                  N_FILTERS_G,
                                  batchnorm=BATCH_NORM_G)
    dis = models.DCGAN32Discriminator(N_CHANNEL,
                                      1,
                                      N_FILTERS_D,
                                      batchnorm=BATCH_NORM_D)

if CUDA:
    gen = gen.cuda(0)
    dis = dis.cuda(0)

gen.apply(lambda x: utils.weight_init(x, mode='normal'))
dis.apply(lambda x: utils.weight_init(x, mode='normal'))

if ALGORITHM == 'Adam':
    import torch.optim as optim
    dis_optimizer = optim.Adam(dis.parameters(),
                               lr=LEARNING_RATE_D,
                               betas=(BETA_1, BETA_2))
    gen_optimizer = optim.Adam(gen.parameters(),
                               lr=LEARNING_RATE_G,
                               betas=(BETA_1, BETA_2))
elif ALGORITHM == 'ExtraAdam':
    import optim
    dis_optimizer = optim.ExtraAdam(dis.parameters(),
                                    lr=LEARNING_RATE_D,
                                    betas=(BETA_1, BETA_2))
示例#14
0
test_iter = data.Iterator(dataset=test_data, batch_size=BATCH_SIZE, sort=False)

# build model
from text_classify.model import RNN, WordAVGModel, TextCNN
from text_classify.transformer import Transformer
embedding_size = TEXT.vocab.vectors.shape[
    1] if USE_PRE_TRAIN_MODEL else EMBEDDING_SIZE

# model = RNN(input_size=len(TEXT.vocab), embedding_size=embedding_size, hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS, output_size=len(LABEL.vocab))
# model = TextCNN(input_size=len(TEXT.vocab), embedding_size=embedding_size, output_size=len(LABEL.vocab), pooling_method='avg')
model = WordAVGModel(vocab_size=len(TEXT.vocab),
                     embedding_dim=embedding_size,
                     output_dim=len(LABEL.vocab))
# model = Transformer(input_size=len(TEXT.vocab), d_model=embedding_size, num_head=4, d_ff=HIDDEN_SIZE, output_size=len(LABEL.vocab), pad=TEXT.vocab.stoi['<pad>'], use_mask=True)

utils.weight_init(model)
if USE_PRE_TRAIN_MODEL:
    model.embedding.weight.data.copy_(TEXT.vocab.vectors)
model.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(params=model.parameters(), lr=LEARNING_RATE)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.98)
if TRAIN:
    for epoch in range(1, 1 + EPOCH_SIZE):
        torch.cuda.empty_cache()
        train_loss = []
        valid_loss = []
        valid_acc = 0
        model.train()
        for batch in tqdm(train_iter):
            model.zero_grad()
    qvalue_node = nengo.Node(size_in=4)

    # define neurons to encode state representations
    state = nengo.Ensemble(n_neurons=n_neurons, dimensions=25,
                           intercepts=nengo.dists.Choice([0.15]), radius=2)

    # define neurons that compute the learning signal
    learn_signal = nengo.Ensemble(n_neurons=1000, dimensions=4)

    # connect the sensor to state ensemble
    nengo.Connection(sensor_node, state, synapse=None)
    reward_probe = nengo.Probe(reward_node, synapse=fast_tau)

    # connect state representation to environment interface
    q_conn = nengo.Connection(state.neurons, update_node,
                              transform=weight_init(shape=(n_actions, n_neurons)),
                              learning_rule_type=nengo.PES(1e-3, pre_tau=slow_tau),
                              synapse=fast_tau)

    # connect update node to error signal ensemble w/ fast, slow conns to compute prediction error
    nengo.Connection(update_node[0:n_actions], learn_signal, transform=-1, synapse=slow_tau)
    nengo.Connection(update_node[n_actions:2 * n_actions], learn_signal, transform=1, synapse=fast_tau)

    # connect the learning signal to the learning rule
    nengo.Connection(learn_signal, q_conn.learning_rule, transform=-1, synapse=fast_tau)

    # for plotting and visualization purposes
    nengo.Connection(update_node[2 * n_actions:], qvalue_node, synapse=fast_tau)

with nengo.Simulator(model) as sim:
    sim.run(10)