def train(print_result=True):
    """train"""
    # 1. initialize parallel environment
    train_data_list1 = []
    train_data_list2 = []
    dist.init_parallel_env()

    # 2. create data parallel layer & optimizer
    layer = LinearNet()
    dp_layer = paddle.DataParallel(layer)

    loss_fn = nn.MSELoss()
    adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters())

    # 3. run layer
    inputs = paddle.randn([10, 10], 'float32')
    outputs = dp_layer(inputs)
    labels = paddle.randn([10, 1], 'float32')
    loss = loss_fn(outputs, labels)
    assert len(loss) == 1
    if print_result is True:
        train_data_list1.append(loss.numpy())
    assert len(train_data_list1)

    loss.backward()

    adam.step()
    adam.clear_grad()
Exemplo n.º 2
0
def train():
    # 1. enable dynamic mode
    paddle.disable_static()

    # 2. initialize parallel environment
    dist.init_parallel_env()

    # 3. create data parallel layer & optimizer
    layer = LinearNet()
    dp_layer = paddle.DataParallel(layer)

    loss_fn = nn.MSELoss()
    adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters())

    # 4. run layer
    inputs = paddle.randn([10, 10], 'float32')
    outputs = dp_layer(inputs)
    labels = paddle.randn([10, 1], 'float32')
    loss = loss_fn(outputs, labels)

    loss = dp_layer.scale_loss(loss)
    loss.backward()
    dp_layer.apply_collective_grads()

    adam.step()
    adam.clear_grad()
Exemplo n.º 3
0
 def __init__(self, reduction='mean', loss_weight=1.0):
     # when loss weight less than zero return None
     if loss_weight <= 0:
         return None
     self._l2_loss = nn.MSELoss(reduction)
     self.loss_weight = loss_weight
     self.reduction = reduction
Exemplo n.º 4
0
def train(print_result=False):
    # 1. initialize parallel environment
    dist.init_parallel_env()

    # 2. create data parallel layer & optimizer
    layer = LinearNet()
    dp_layer = paddle.DataParallel(layer)

    loss_fn = nn.MSELoss()
    adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters())

    # 3. run layer
    inputs = paddle.randn([10, 10], 'float32')
    outputs = dp_layer(inputs)
    labels = paddle.randn([10, 1], 'float32')
    loss = loss_fn(outputs, labels)

    if print_result is True:
        print("Rank:", int(os.getenv("PADDLE_TRAINER_ID")))

    loss.backward()
    adam.step()
    adam.clear_grad()

    return int(os.getenv("PADDLE_TRAINER_ID"))
Exemplo n.º 5
0
def train(print_result=False):
    # 1. enable dynamic mode
    paddle.disable_static()
    
    # 2. initialize parallel environment
    dist.init_parallel_env()

    # 3. create data parallel layer & optimizer
    layer = LinearNet()
    dp_layer = paddle.DataParallel(layer)

    loss_fn = nn.MSELoss()
    adam = opt.Adam(
        learning_rate=0.001, parameters=dp_layer.parameters())

    # 4. run layer
    inputs = paddle.randn([10, 10], 'float32')
    outputs = dp_layer(inputs)
    labels = paddle.randn([10, 1], 'float32')
    loss = loss_fn(outputs, labels)
    
    if print_result is True:
        print("loss:", loss.numpy())
    
    loss.backward()

    adam.step()
    adam.clear_grad()
Exemplo n.º 6
0
def train(print_result=True):
    # 1. enable dynamic mode
    # device = paddle.set_device('gpu')
    # paddle.disable_static(device)

    # 2. initialize parallel environment
    dist.init_parallel_env()

    # 3. create data parallel layer & optimizer
    layer = LinearNet()
    dp_layer = paddle.DataParallel(layer)

    loss_fn = nn.MSELoss()
    adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters())

    dataset = FakeDataset()
    # loader = paddle.io.DataLoader(dataset, batch_size=2, places=device, num_workers=2)
    loader = paddle.io.DataLoader(dataset, batch_size=2, num_workers=2)
    # 4. run layer
    for inputs, labels in loader:
        # inputs = paddle.randn([10, 10], 'float32')
        outputs = dp_layer(inputs)
        # labels = paddle.randn([10, 1], 'float32')
        loss = loss_fn(outputs, labels)

        if print_result is True:
            print("loss:", loss.numpy())

        # loss = dp_layer.scale_loss(loss)
        loss.backward()
        # dp_layer.apply_collective_grads()

        adam.step()
        adam.clear_grad()
Exemplo n.º 7
0
    def __init__(self, gan_mode, target_real_label=1.0, target_fake_label=0.0):
        """ Initialize the GANLoss class.

        Parameters:
            gan_mode (str) - - the type of GAN objective. It currently supports vanilla, lsgan, and wgangp.
            target_real_label (bool) - - label for a real image
            target_fake_label (bool) - - label of a fake image

        Note: Do not use sigmoid as the last layer of Discriminator.
        LSGAN needs no sigmoid. vanilla GANs will handle it with BCEWithLogitsLoss.
        """
        super(GANLoss, self).__init__()
        # self.register_buffer('real_label', torch.tensor(target_real_label))
        # self.register_buffer('fake_label', torch.tensor(target_fake_label))
        self.real_label = paddle.fluid.dygraph.to_variable(
            np.array(target_real_label))
        self.fake_label = paddle.fluid.dygraph.to_variable(
            np.array(target_fake_label))
        # self.real_label.stop_gradients = True
        # self.fake_label.stop_gradients = True

        self.gan_mode = gan_mode
        if gan_mode == 'lsgan':
            self.loss = nn.MSELoss()
        elif gan_mode == 'vanilla':
            self.loss = nn.BCELoss()  #nn.BCEWithLogitsLoss()
        elif gan_mode in ['wgangp']:
            self.loss = None
        else:
            raise NotImplementedError('gan mode %s not implemented' % gan_mode)
Exemplo n.º 8
0
    def __init__(self,
                 gan_mode,
                 target_real_label=1.0,
                 target_fake_label=0.0,
                 loss_weight=1.0):
        """ Initialize the GANLoss class.

        Args:
            gan_mode (str): the type of GAN objective. It currently supports vanilla, lsgan, and wgangp.
            target_real_label (bool): label for a real image
            target_fake_label (bool): label of a fake image

        Note: Do not use sigmoid as the last layer of Discriminator.
        LSGAN needs no sigmoid. vanilla GANs will handle it with BCEWithLogitsLoss.
        """
        super(GANLoss, self).__init__()
        # when loss weight less than zero return None
        if loss_weight <= 0:
            return None

        self.target_real_label = target_real_label
        self.target_fake_label = target_fake_label
        self.loss_weight = loss_weight

        self.gan_mode = gan_mode
        if gan_mode == 'lsgan':
            self.loss = nn.MSELoss()
        elif gan_mode == 'vanilla':
            self.loss = nn.BCEWithLogitsLoss()
        elif gan_mode in ['wgan', 'wgangp', 'hinge', 'logistic']:
            self.loss = None
        else:
            raise NotImplementedError('gan mode %s not implemented' % gan_mode)
Exemplo n.º 9
0
 def __init__(self, mode="l2", **kargs):
     super().__init__()
     assert mode in ["l1", "l2", "smooth_l1"]
     if mode == "l1":
         self.loss_func = nn.L1Loss(**kargs)
     elif mode == "l2":
         self.loss_func = nn.MSELoss(**kargs)
     elif mode == "smooth_l1":
         self.loss_func = nn.SmoothL1Loss(**kargs)
Exemplo n.º 10
0
    def __init__(self, use_target_weight=True):
        """
        KeyPointMSELoss layer

        Args:
            use_target_weight (bool): whether to use target weight
        """
        super(KeyPointMSELoss, self).__init__()
        self.criterion = nn.MSELoss(reduction='mean')
        self.use_target_weight = use_target_weight
Exemplo n.º 11
0
 def __init__(self,
              use_target_weight=True,
              loss_scale=0.5,
              key=None,
              weight=1.0):
     super().__init__()
     self.criterion = nn.MSELoss(reduction='mean')
     self.use_target_weight = use_target_weight
     self.loss_scale = loss_scale
     self.key = key
     self.weight = weight
Exemplo n.º 12
0
def train():
    dist.init_parallel_env()
    # 1. initialize parallel environment
    set_seed(2021)
    # 2. create data parallel layer & optimizer
    layer = LinearNet()

    if dist.get_world_size() > 1:
        dp_layer = paddle.DataParallel(layer)
    else:
        dp_layer = layer

    layer2 = LinearNet()

    if dist.get_world_size() > 1:
        dp_layer2 = paddle.DataParallel(layer2)
    else:
        dp_layer2 = layer2

    dp_layer2.set_state_dict(dp_layer.state_dict())

    loss_fn = nn.MSELoss()
    adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters())

    adam2 = opt.Adam(learning_rate=0.001, parameters=dp_layer2.parameters())
    # 3. run layer

    print("Start")
    for i in range(10):
        batch_size = 10
        shard = int(batch_size / dist.get_world_size())
        start_no = shard * dist.get_rank()
        end_no = start_no + shard
        inputs = paddle.randn([10, 10], 'float32')[start_no:end_no]
        outputs = dp_layer(inputs)
        labels = paddle.randn([10, 1], 'float32')[start_no:end_no]
        loss = loss_fn(outputs, labels)
        if dist.get_rank() == 0:
            print("Loss1", loss.numpy()[0])
            print(dp_layer.parameters())
        loss.backward()
        adam.step()
        adam.clear_grad()

        outputs = dp_layer2(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        if dist.get_rank() == 0:
            print("Loss2", loss.numpy()[0])
            print(dp_layer2.parameters())
        adam2.step()
        adam2.clear_grad()
Exemplo n.º 13
0
def train():
    """bergin train"""
    arr1 = []
    arr2 = []
    dist.init_parallel_env()
    set_seed(2021)
    layer = LinearNet()

    if dist.get_world_size() > 1:
        dp_layer = paddle.DataParallel(layer)
    else:
        dp_layer = layer

    layer2 = LinearNet()

    if dist.get_world_size() > 1:
        dp_layer2 = paddle.DataParallel(layer2)
    else:
        dp_layer2 = layer2

    dp_layer2.set_state_dict(dp_layer.state_dict())

    loss_fn = nn.MSELoss()
    adam = opt.Adam(
        learning_rate=0.001, parameters=dp_layer.parameters())

    adam2 = opt.Adam(
        learning_rate=0.001, parameters=dp_layer2.parameters())

    for i in range(2):
        batch_size = 10
        shard = int(batch_size / dist.get_world_size())
        start_no = shard * dist.get_rank()
        end_no = start_no + shard
        inputs = paddle.randn([10, 10], 'float32')[start_no:end_no]
        outputs = dp_layer(inputs)
        labels = paddle.randn([10, 1], 'float32')[start_no:end_no]
        loss = loss_fn(outputs, labels)
        if dist.get_rank() == 0:
            arr1.append(loss.numpy()[0])
        loss.backward()
        adam.step()
        adam.clear_grad()

        outputs = dp_layer2(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        if dist.get_rank() == 0:
            arr2.append(loss.numpy()[0])
        adam2.step()
        adam2.clear_grad()
    check_data(arr1, arr2)
Exemplo n.º 14
0
def pack_models(path):
    model = Model()
    loss = nn.MSELoss()
    adam = paddle.optimizer.Adam(parameters=model.parameters())

    train_data = paddle.text.datasets.UCIHousing(mode="train")

    loader = paddle.io.DataLoader(train_data,
                                  batch_size=BATCH_SIZE,
                                  shuffle=True,
                                  drop_last=True,
                                  num_workers=2)

    train(model, loader, loss, adam)

    PaddlePaddleModelArtifact("model").pack(model).save(path)
Exemplo n.º 15
0
    def __init__(self, cfg):
        """Initialize the CycleGAN class.

        Parameters:
            opt (config)-- stores all the experiment flags; needs to be a subclass of Dict
        """
        super(UGATITModel, self).__init__(cfg)

        # define networks (both Generators and discriminators)
        # The naming is different from those used in the paper.
        self.nets['genA2B'] = build_generator(cfg.model.generator)
        self.nets['genB2A'] = build_generator(cfg.model.generator)
        init_weights(self.nets['genA2B'])
        init_weights(self.nets['genB2A'])

        if self.is_train:
            # define discriminators
            self.nets['disGA'] = build_discriminator(cfg.model.discriminator_g)
            self.nets['disGB'] = build_discriminator(cfg.model.discriminator_g)
            self.nets['disLA'] = build_discriminator(cfg.model.discriminator_l)
            self.nets['disLB'] = build_discriminator(cfg.model.discriminator_l)
            init_weights(self.nets['disGA'])
            init_weights(self.nets['disGB'])
            init_weights(self.nets['disLA'])
            init_weights(self.nets['disLB'])

        if self.is_train:
            # define loss functions
            self.BCE_loss = nn.BCEWithLogitsLoss()
            self.L1_loss = nn.L1Loss()
            self.MSE_loss = nn.MSELoss()

            self.build_lr_scheduler()
            self.optimizers['optimizer_G'] = build_optimizer(
                cfg.optimizer,
                self.lr_scheduler,
                parameter_list=self.nets['genA2B'].parameters() +
                self.nets['genB2A'].parameters())
            self.optimizers['optimizer_D'] = build_optimizer(
                cfg.optimizer,
                self.lr_scheduler,
                parameter_list=self.nets['disGA'].parameters() +
                self.nets['disGB'].parameters() +
                self.nets['disLA'].parameters() +
                self.nets['disLB'].parameters())
            self.Rho_clipper = RhoClipper(0, 1)
Exemplo n.º 16
0
    def validation_step(self, batch: int, batch_idx: int) -> dict:
        '''
        One step for validation, which should be called as forward computation.

        Args:
            batch(list[paddle.Tensor]): The one batch data, which contains images and labels.
            batch_idx(int): The index of batch.

        Returns:
            results(dict) : The model outputs, such as metrics.
        '''
        mse_loss = nn.MSELoss()
        N, C, H, W = batch[0].shape
        batch[1] = batch[1][0].unsqueeze(0)
        self.setTarget(batch[1])

        y = self(batch[0])
        xc = paddle.to_tensor(batch[0].numpy().copy())
        y = utils.subtract_imagenet_mean_batch(y)
        xc = utils.subtract_imagenet_mean_batch(xc)
        features_y = self.getFeature(y)
        features_xc = self.getFeature(xc)
        f_xc_c = paddle.to_tensor(features_xc[1].numpy(), stop_gradient=True)
        content_loss = mse_loss(features_y[1], f_xc_c)

        batch[1] = utils.subtract_imagenet_mean_batch(batch[1])
        features_style = self.getFeature(batch[1])
        gram_style = [utils.gram_matrix(y) for y in features_style]
        style_loss = 0.
        for m in range(len(features_y)):
            gram_y = utils.gram_matrix(features_y[m])
            gram_s = paddle.to_tensor(
                np.tile(gram_style[m].numpy(), (N, 1, 1, 1)))
            style_loss += mse_loss(gram_y, gram_s[:N, :, :])

        loss = content_loss + style_loss

        return {
            'loss': loss,
            'metrics': {
                'content gap': content_loss,
                'style gap': style_loss
            }
        }
Exemplo n.º 17
0
    def test_dygraph_single(self):
        paddle.disable_static()
        fleet.init(is_collective=True)

        layer = LinearNet()
        loss_fn = nn.MSELoss()
        adam = paddle.optimizer.Adam(learning_rate=0.001,
                                     parameters=layer.parameters())

        adam = fleet.distributed_optimizer(adam)
        dp_layer = fleet.distributed_model(layer)
        for step in range(2):
            inputs = paddle.randn([10, 10], 'float32')
            outputs = dp_layer(inputs)
            labels = paddle.randn([10, 1], 'float32')
            loss = loss_fn(outputs, labels)
            loss.backward()
            adam.step()
            adam.clear_grad()
Exemplo n.º 18
0
    def __init__(self,
                 balance_loss=True,
                 main_loss_type='DiceLoss',
                 negative_ratio=3,
                 return_origin=False,
                 eps=1e-6,
                 **kwargs):
        """
               The BalanceLoss for Differentiable Binarization text detection
               args:
                   balance_loss (bool): whether balance loss or not, default is True
                   main_loss_type (str): can only be one of ['CrossEntropy','DiceLoss',
                       'Euclidean','BCELoss', 'MaskL1Loss'], default is  'DiceLoss'.
                   negative_ratio (int|float): float, default is 3.
                   return_origin (bool): whether return unbalanced loss or not, default is False.
                   eps (float): default is 1e-6.
               """
        super(BalanceLoss, self).__init__()
        self.balance_loss = balance_loss
        self.main_loss_type = main_loss_type
        self.negative_ratio = negative_ratio
        self.return_origin = return_origin
        self.eps = eps

        if self.main_loss_type == "CrossEntropy":
            self.loss = nn.CrossEntropyLoss()
        elif self.main_loss_type == "Euclidean":
            self.loss = nn.MSELoss()
        elif self.main_loss_type == "DiceLoss":
            self.loss = DiceLoss(self.eps)
        elif self.main_loss_type == "BCELoss":
            self.loss = BCELoss(reduction='none')
        elif self.main_loss_type == "MaskL1Loss":
            self.loss = MaskL1Loss(self.eps)
        else:
            loss_type = [
                'CrossEntropy', 'DiceLoss', 'Euclidean', 'BCELoss',
                'MaskL1Loss'
            ]
            raise Exception(
                "main_loss_type in BalanceLoss() can only be one of {}".format(
                    loss_type))
Exemplo n.º 19
0
        def run_double_hook_in_model(data,
                                     label,
                                     hook=None,
                                     register=False,
                                     remove=False):
            for device in self.devices:
                paddle.seed(self.seed)
                paddle.set_device(device)

                net = SimpleNet(self.in_size, self.out_size)
                loss_fn = nn.MSELoss()

                data = paddle.to_tensor(data)
                label = paddle.to_tensor(label)

                ret1, out = net(data, hook, register, remove)
                loss = loss_fn(out, label)
                loss.backward()

                return (ret1.grad.numpy(), net.linear1.weight.grad.numpy(),
                        net.linear1.bias.grad.numpy())
Exemplo n.º 20
0
def train_paddle_model() -> "LinearModel":
    set_random_seed(SEED)
    model = LinearModel()
    loss = nn.MSELoss()
    adam = paddle.optimizer.Adam(parameters=model.parameters())

    train_data = paddle.text.datasets.UCIHousing(mode="train")

    loader = paddle.io.DataLoader(
        train_data, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=2
    )

    model.train()
    for _ in range(EPOCH_NUM):
        for _, (feature, label) in enumerate(loader()):
            out = model(feature)
            loss_fn = loss(out, label)
            loss_fn.backward()
            adam.step()
            adam.clear_grad()
    return model
Exemplo n.º 21
0
    def __init__(self, conf, info_graph, soc_graph, user_feature, item_feautre):
        super(DiffNet, self).__init__()

        self.conf = conf
        self.user_feature = paddle.to_tensor(user_feature)
        self.item_feature = paddle.to_tensor(item_feautre)

        # the user-item interactions form the infomation graph => info_graph
        self.infomation_gcn_layer = CustomGCNConv(self.conf['gnn_dim'], self.conf['gnn_dim'], info_graph)
        # the user-user relations form the social graph => soc_graph
        self.social_gcn_layer = CustomGCNConv(self.conf['gnn_dim'], self.conf['gnn_dim'], soc_graph)

        self.user_embedding = nn.Embedding(self.conf['num_users'], self.conf['gnn_dim'], sparse=True)
        self.item_embedding = nn.Embedding(self.conf['num_items'], self.conf['gnn_dim'], sparse=True)

        # initialize user_embedding and item_embedding from \mathcal{N}(\mu, \sigma^2)
        # self.user_embedding.weight.set_value(0.1 * np.random.randn(self.conf['num_users'], self.conf['gnn_dim']))
        # self.item_embedding.weight.set_value(0.1 * np.randn(self.conf['num_items'], self.conf['gnn_dim']))

        self.reduce_dim_layer = nn.Linear(self.conf['review_feature_dim'], self.conf['gnn_dim'])

        self.mse_loss = nn.MSELoss()
Exemplo n.º 22
0
def train():
    """train"""
    # 1. initialize parallel environment
    dist.init_parallel_env()

    # 2. create data parallel layer & optimizer
    layer = LinearNet()
    dp_layer = paddle.DataParallel(layer)

    loss_fn = nn.MSELoss()
    adam = opt.Adam(
        learning_rate=0.001, parameters=dp_layer.parameters())

    # 3. run layer
    inputs = paddle.randn([10, 10], 'float32')
    outputs = dp_layer(inputs)
    labels = paddle.randn([10, 1], 'float32')
    loss = loss_fn(outputs, labels)

    loss.backward()

    adam.step()
    adam.clear_grad()
    assert len(loss) == 1
Exemplo n.º 23
0
def train():
    # 1. initialize parallel environment (cpu & gpu)
    dist.init_parallel_env()

    # 2. set cpu place
    paddle.set_device('cpu')

    # 3. create data parallel layer & optimizer
    layer = LinearNet()
    dp_layer = paddle.DataParallel(layer)

    loss_fn = nn.MSELoss()
    adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters())

    # 4. run layer
    inputs = paddle.randn([10, 10], 'float32')
    outputs = dp_layer(inputs)
    labels = paddle.randn([10, 1], 'float32')
    loss = loss_fn(outputs, labels)

    loss.backward()

    adam.step()
    adam.clear_grad()
Exemplo n.º 24
0
    def __init__(self, gan_mode, target_real_label=1.0, target_fake_label=0.0):
        """ Initialize the GANLoss class.

        Parameters:
            gan_mode (str) - - the type of GAN objective. It currently supports vanilla, lsgan, and wgangp.
            target_real_label (bool) - - label for a real image
            target_fake_label (bool) - - label of a fake image

        Note: Do not use sigmoid as the last layer of Discriminator.
        LSGAN needs no sigmoid. vanilla GANs will handle it with BCEWithLogitsLoss.
        """
        super(GANLoss, self).__init__()
        self.target_real_label = target_real_label
        self.target_fake_label = target_fake_label

        self.gan_mode = gan_mode
        if gan_mode == 'lsgan':
            self.loss = nn.MSELoss()
        elif gan_mode == 'vanilla':
            self.loss = BCEWithLogitsLoss()
        elif gan_mode in ['wgangp']:
            self.loss = None
        else:
            raise NotImplementedError('gan mode %s not implemented' % gan_mode)
Exemplo n.º 25
0
# from collections import OrderedDict
# net = nn.Sequential(OrderedDict([
#     ('linear', nn.Linear(num_inputs, 1))
# ]))

print(net)

for param in net.parameters():
    print(param)

# 3.3.4 初始化模型参数
# 设置全局参数初始化
fluid.set_global_initializer(initializer.Uniform(), initializer.Constant())

# 3.3.5 定义损失函数
loss = nn.MSELoss()

# 3.3.6 定义优化算法
optimizer = optim.SGD(learning_rate=0.03, parameters=net.parameters())
print(optimizer)

# 设置不同自网络的学习率(待修改)
# optimizer = optim.SGD([
#     {'params': net._sub_layers1.paramaters()},
#     {'params': net._sub_layers2.paramaters(), 'lr': 0.01}
# ], learning_rate=0.03)

# for param_group in optimizer.param_groups:
#     param_group['lr'] *= 0.1

# 3.3.7 训练模型
Exemplo n.º 26
0
def main(args):
    """
    Model training for one epoch and return the average loss and model evaluating to monitor pcc.
    """
    paddle.set_device('gpu:{}'.format(args.device) if args.use_cuda else 'cpu')

    logging.info('Load data ...')
    dataset = InMemoryDataset(npz_data_path=args.data_path)

    train_ds = Dataset(dataset[1])
    test_ds = Dataset(dataset[0])
    train_loader = train_ds.get_data_loader(batch_size=args.batch_size,
                                            collate_fn=collate_fn)
    test_loader = test_ds.get_data_loader(batch_size=args.batch_size,
                                          collate_fn=collate_fn)

    logging.info("Data loaded.")

    model = CDRModel(args)

    optim = Adam(learning_rate=args.lr, parameters=model.parameters())
    criterion = nn.MSELoss()

    global_step = 0
    best_pcc = 0.0
    os.makedirs(args.output_path, exist_ok=True)
    best_model = os.path.join(args.output_path, 'best_model.pdparams')

    for epoch in range(1, args.epoch_num + 1):
        model.train()
        for idx, batch_data in enumerate(train_loader):
            graphs, mut, gexpr, met, label = batch_data
            g = pgl.Graph.batch(graphs).tensor()
            mut = paddle.to_tensor(mut)
            gexpr = paddle.to_tensor(gexpr)
            met = paddle.to_tensor(met)
            label = paddle.to_tensor(label)

            pred = model([g, mut, gexpr, met])
            train_loss = paddle.pow(criterion(pred[:, 0], label)[0], 0.5)
            train_loss.backward()
            train_pcc = pearsonr(pred[:, 0].numpy(), label.numpy())[0]
            optim.step()
            optim.clear_grad()

            global_step += 1
            if global_step % 500 == 0:
                message = "train: epoch %d | step %d | " % (epoch, global_step)
                message += "loss %.6f | pcc %.4f" % (train_loss, train_pcc)
                log.info(message)

        result = evaluate(model, test_loader, criterion)
        message = "eval: epoch %d | step %d " % (epoch, global_step)
        for key, value in result.items():
            message += "| %s %.6f" % (key, value)
        log.info(message)

        if best_pcc < result['pcc']:
            best_pcc = result['pcc']
            paddle.save(model.state_dict(), best_model)

    log.info("best evaluating accuracy: %.6f" % best_pcc)
Exemplo n.º 27
0

if __name__ == "__main__":
    trainset = pd.DataFrame({
        'weight': [133., 160, 152, 120],
        'height': [65., 72, 70, 60],
        'label': [0, 1, 1, 0]
    })
    trainset = GetDataset(trainset)
    trainset.__getitem__(0)
    exit()
    train_loader = DataLoader(trainset, batch_size=4)

    lr = 0.5
    epochs = 2000
    loss_fn = nn.MSELoss()

    model = ConNet()
    model.train()  # 训练模式开启
    optimizer = paddle.optimizer.SGD(parameters=model.parameters(),
                                     learning_rate=lr)  # 优化器

    for epoch in range(epochs):
        for i, data in enumerate(train_loader, 0):
            X, y = data
            y_pred = model(X)
            loss = loss_fn(y_pred, y)

            if epoch % 100 == 99:
                print("epoch: %d/%d - loss is: %.6f" %
                      (epoch + 1, epochs, float(loss)))
Exemplo n.º 28
0
def do_train(agrs):
    train_data_loader, dev_data_loader = create_distill_loader(
        args.task_name,
        model_name=args.model_name,
        vocab_path=args.vocab_path,
        batch_size=args.batch_size,
        max_seq_length=args.max_seq_length,
        n_iter=args.n_iter)

    emb_tensor = load_embedding(
        args.vocab_path) if args.use_pretrained_emb else None

    model = BiLSTM(args.emb_dim, args.hidden_size, args.vocab_size,
                   args.output_dim, args.padding_idx, args.num_layers,
                   args.dropout_prob, args.init_scale, emb_tensor)

    if args.optimizer == 'adadelta':
        optimizer = paddle.optimizer.Adadelta(learning_rate=args.lr,
                                              rho=0.95,
                                              parameters=model.parameters())
    else:
        optimizer = paddle.optimizer.Adam(learning_rate=args.lr,
                                          parameters=model.parameters())

    ce_loss = nn.CrossEntropyLoss()
    mse_loss = nn.MSELoss()
    klloss = nn.KLDivLoss()

    metric_class = TASK_CLASSES[args.task_name][1]
    metric = metric_class()

    teacher = TeacherModel(model_name=args.model_name,
                           param_path=args.teacher_path)

    print("Start to distill student model.")

    global_step = 0
    tic_train = time.time()
    for epoch in range(args.max_epoch):
        model.train()
        for i, batch in enumerate(train_data_loader):
            if args.task_name == 'qqp':
                bert_input_ids, bert_segment_ids, student_input_ids_1, seq_len_1, student_input_ids_2, seq_len_2, labels = batch
            else:
                bert_input_ids, bert_segment_ids, student_input_ids, seq_len, labels = batch

            # Calculate teacher model's forward.
            with paddle.no_grad():
                teacher_logits = teacher.model(bert_input_ids,
                                               bert_segment_ids)

            # Calculate student model's forward.
            if args.task_name == 'qqp':
                logits = model(student_input_ids_1, seq_len_1,
                               student_input_ids_2, seq_len_2)
            else:
                logits = model(student_input_ids, seq_len)

            loss = args.alpha * ce_loss(logits, labels) + (
                1 - args.alpha) * mse_loss(logits, teacher_logits)

            loss.backward()
            optimizer.step()
            optimizer.clear_grad()

            if i % args.log_freq == 0:
                print(
                    "global step %d, epoch: %d, batch: %d, loss: %f, speed: %.4f step/s"
                    % (global_step, epoch, i, loss, args.log_freq /
                       (time.time() - tic_train)))
                tic_eval = time.time()
                acc = evaluate(args.task_name, model, metric, dev_data_loader)
                print("eval done total : %s s" % (time.time() - tic_eval))
                tic_train = time.time()
            global_step += 1
Exemplo n.º 29
0
 def __init__(self):
     super(LandmarkLoss, self).__init__(name_scope='LandmarkLoss')
     self.square_loss = nn.MSELoss(reduction='none')
     self.keep_ratio = 1.0
Exemplo n.º 30
0
def do_train(agrs):
    device = paddle.set_device(args.device)
    train_data_loader, dev_data_loader = create_distill_loader(
        args.task_name,
        model_name=args.model_name,
        vocab_path=args.vocab_path,
        batch_size=args.batch_size,
        max_seq_length=args.max_seq_length,
        n_iter=args.n_iter,
        whole_word_mask=args.whole_word_mask,
        seed=args.seed)

    model = BiLSTM(args.emb_dim, args.hidden_size, args.vocab_size,
                   args.output_dim, args.vocab_path, args.padding_idx,
                   args.num_layers, args.dropout_prob, args.init_scale,
                   args.embedding_name)

    if args.optimizer == 'adadelta':
        optimizer = paddle.optimizer.Adadelta(learning_rate=args.lr,
                                              rho=0.95,
                                              parameters=model.parameters())
    else:
        optimizer = paddle.optimizer.Adam(learning_rate=args.lr,
                                          parameters=model.parameters())

    ce_loss = nn.CrossEntropyLoss()
    mse_loss = nn.MSELoss()
    klloss = nn.KLDivLoss()

    metric_class = TASK_CLASSES[args.task_name][1]
    metric = metric_class()

    teacher = TeacherModel(model_name=args.model_name,
                           param_path=args.teacher_path)

    print("Start to distill student model.")

    if args.init_from_ckpt:
        model.set_state_dict(paddle.load(args.init_from_ckpt + ".pdparams"))
        optimizer.set_state_dict(paddle.load(args.init_from_ckpt + ".pdopt"))
        print("Loaded checkpoint from %s" % args.init_from_ckpt)

    global_step = 0
    tic_train = time.time()
    for epoch in range(args.max_epoch):
        model.train()
        for i, batch in enumerate(train_data_loader):
            global_step += 1
            if args.task_name == 'qqp':
                bert_input_ids, bert_segment_ids, student_input_ids_1, seq_len_1, student_input_ids_2, seq_len_2, labels = batch
            else:
                bert_input_ids, bert_segment_ids, student_input_ids, seq_len, labels = batch

            # Calculate teacher model's forward.
            with paddle.no_grad():
                teacher_logits = teacher.model(bert_input_ids,
                                               bert_segment_ids)

            # Calculate student model's forward.
            if args.task_name == 'qqp':
                logits = model(student_input_ids_1, seq_len_1,
                               student_input_ids_2, seq_len_2)
            else:
                logits = model(student_input_ids, seq_len)

            loss = args.alpha * ce_loss(logits, labels) + (
                1 - args.alpha) * mse_loss(logits, teacher_logits)

            loss.backward()
            optimizer.step()
            optimizer.clear_grad()

            if global_step % args.log_freq == 0:
                print(
                    "global step %d, epoch: %d, batch: %d, loss: %f, speed: %.4f step/s"
                    % (global_step, epoch, i, loss, args.log_freq /
                       (time.time() - tic_train)))
                tic_eval = time.time()
                acc = evaluate(args.task_name, model, metric, dev_data_loader)
                print("eval done total : %s s" % (time.time() - tic_eval))
                tic_train = time.time()

            if global_step % args.save_steps == 0:
                paddle.save(
                    model.state_dict(),
                    os.path.join(args.output_dir,
                                 "step_" + str(global_step) + ".pdparams"))
                paddle.save(
                    optimizer.state_dict(),
                    os.path.join(args.output_dir,
                                 "step_" + str(global_step) + ".pdopt"))