示例#1
0
    def load_checkpoint(self, cfg, checkpoint_path, resume=None):
        if os.path.exists(checkpoint_path + '_net_G.pdparams'):
            if resume is None:
                resume = False
        else:
            current_epoch = 0
            current_iteration = 0
            print("No checkpoint found.")
            return current_epoch, current_iteration

        net_G_dict, opt_G_dict = dg.load_dygraph(checkpoint_path + '_net_G')
        if not self.is_inference:
            net_D_dict, opt_D_dict = dg.load_dygraph(checkpoint_path +
                                                     '_net_D')
        current_epoch, current_iteration = int(
            checkpoint_path.split('_')[-4]), int(
                checkpoint_path.split('_')[-2])
        if resume:
            self.net_G.set_dict(net_G_dict)
            self.net_D.set_dict(net_D_dict)
            # self.opt_G.set_dict(opt_G_dict)
            self.opt_D.set_dict(opt_D_dict)
            print("Load from: {}".format(checkpoint_path))
        else:
            self.net_G.set_dict(net_G_dict)
            print("Load generator weights only.")

        print("Done with loading the checkpoint.")
        return current_epoch, current_iteration
def build():
    model = InceptionV4()
    path = '/home/aistudio/vid2vid/model/backbones/inceptionv4'
    state_dict, _ = dg.load_dygraph(path)
    model.set_dict(state_dict)
    print("load pretrained inception v4 models from path " + path)
    return model
示例#3
0
def build_hand_model():
    hand_model = HandPose()
    state_dict, _ = dg.load_dygraph(
        '/home/aistudio/openpose/pretrained_models/pose_hand_21_102000.pdparams'
    )
    hand_model.load_dict(state_dict)
    return hand_model
示例#4
0
def build_face_model():
    face_model = FacePose()
    state_dict, _ = dg.load_dygraph(
        '/home/aistudio/openpose/pretrained_models/pose_face_70_iter_116000.pdparams'
    )
    face_model.load_dict(state_dict)
    return face_model
示例#5
0
def extract_and_convert(input_dir, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    config = json.load(
        open(os.path.join(input_dir, 'ernie_config.json'),
             'rt',
             encoding='utf-8'))
    print('=' * 20 + 'save vocab file' + '=' * 20)
    shutil.copyfile(os.path.join(input_dir, 'vocab.txt'),
                    os.path.join(output_dir, 'vocab.txt'))
    print('=' * 20 + 'extract weights' + '=' * 20)
    state_dict = []
    weight_map = build_params_map(attention_num=config['num_hidden_layers'])
    with fluid.dygraph.guard():
        paddle_paddle_params, _ = D.load_dygraph(
            os.path.join(input_dir, 'params'))
    for weight_name, weight_value in paddle_paddle_params.items():
        if weight_name not in weight_map.keys():
            continue
        #print(weight_name, weight_value.shape)
        if 'w_0' in weight_name \
            or 'post_att_layer_norm_scale' in weight_name \
            or 'post_ffn_layer_norm_scale' in weight_name \
            or 'cls_out_w' in weight_name:
            weight_value = weight_value.transpose()
        state_dict.append({
            'name': weight_map[weight_name],
            'data': Tensor(weight_value)
        })
        print(weight_name, '->', weight_map[weight_name], weight_value.shape)
    save_checkpoint(state_dict, os.path.join(output_dir, "ernie.ckpt"))
示例#6
0
    def _initialize(self, line=4, word=7):
        """
        initialize with the necessary elements
        """
        if line not in [4, 8]:
            raise ValueError("The line could only be 4 or 8.")
        if word not in [5, 7]:
            raise ValueError("The word could only be 5 or 7.")

        self.line = line
        assets_path = os.path.join(self.directory, "assets")
        gen_checkpoint_path = os.path.join(
            assets_path, "ernie_gen_acrostic_poetry_L%sW%s" % (line, word))
        ernie_cfg_path = os.path.join(assets_path, 'ernie_config.json')
        with open(ernie_cfg_path, encoding='utf8') as ernie_cfg_file:
            ernie_cfg = dict(json.loads(ernie_cfg_file.read()))
        ernie_vocab_path = os.path.join(assets_path, 'vocab.txt')
        with open(ernie_vocab_path, encoding='utf8') as ernie_vocab_file:
            ernie_vocab = {
                j.strip().split('\t')[0]: i
                for i, j in enumerate(ernie_vocab_file.readlines())
            }

        with fluid.dygraph.guard(fluid.CPUPlace()):
            with fluid.unique_name.guard():
                self.model = ErnieModelForGeneration(ernie_cfg)
                finetuned_states, _ = D.load_dygraph(gen_checkpoint_path)
                self.model.set_dict(finetuned_states)

        self.tokenizer = ErnieTokenizer(ernie_vocab)
        self.rev_dict = {v: k for k, v in self.tokenizer.vocab.items()}
        self.rev_dict[self.tokenizer.pad_id] = ''  # replace [PAD]
        self.rev_dict[self.tokenizer.unk_id] = ''  # replace [PAD]
        self.rev_lookup = np.vectorize(lambda i: self.rev_dict[i])
示例#7
0
def extract_and_convert(input_dir, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    print('=' * 20 + 'save config file' + '=' * 20)
    config = json.load(
        open(os.path.join(input_dir, 'ernie_config.json'),
             'rt',
             encoding='utf-8'))
    config['layer_norm_eps'] = 1e-5
    if 'sent_type_vocab_size' in config:
        config['type_vocab_size'] = config['sent_type_vocab_size']
    config['intermediate_size'] = 4 * config['hidden_size']
    json.dump(config,
              open(os.path.join(output_dir, 'config.json'),
                   'wt',
                   encoding='utf-8'),
              indent=4)
    print('=' * 20 + 'save vocab file' + '=' * 20)
    shutil.copyfile(os.path.join(input_dir, 'vocab.txt'),
                    os.path.join(output_dir, 'vocab.txt'))
    print('=' * 20 + 'extract weights' + '=' * 20)
    state_dict = collections.OrderedDict()
    weight_map = build_params_map(attention_num=config['num_hidden_layers'])
    with fluid.dygraph.guard():
        paddle_paddle_params, _ = D.load_dygraph(
            os.path.join(input_dir, 'saved_weights'))
    for weight_name, weight_value in paddle_paddle_params.items():
        if 'weight' in weight_name:
            if 'encoder_stack' in weight_name or 'pooler' in weight_name or 'mlm.' in weight_name:
                weight_value = weight_value.transpose()
        state_dict[weight_map[weight_name]] = torch.FloatTensor(weight_value)
        print(weight_name, '->', weight_map[weight_name], weight_value.shape)
    torch.save(state_dict, os.path.join(output_dir, "pytorch_model.bin"))
def main():
    Place = paddle.fluid.CUDAPlace(0)
    with fluid.dygraph.guard(Place):
        model = Transformer(image_size=512,
                            num_classes=15,
                            hidden_unit_num=1024,
                            layer_num=2,
                            head_num=16,
                            dropout=0.8,
                            decoder_name='PUP',
                            hyber=True,
                            visualable=False)
        preprocess = Transform(512)
        dataloader_1 = Dataloader('/home/aistudio/dataset',
                                  '/home/aistudio/dataset/val_list.txt',
                                  transform=preprocess,
                                  shuffle=True)
        val_load = fluid.io.DataLoader.from_generator(capacity=1,
                                                      use_multiprocess=False)
        val_load.set_sample_generator(dataloader_1, batch_size=1, places=Place)
        model_dic, optic_dic = load_dygraph(
            "./output/SETR-NotZero-Epoch-2-Loss-0.161517-MIOU-0.325002")
        model.load_dict(model_dic)
        model.eval()
        '''result = get_infer_data("/home/aistudio/dataset/infer")
        infer_load  = Load_infer('/home/aistudio/dataset', result, transform=preprocess, shuffle=False)
        loader_infer= fluid.io.DataLoader.from_generator(capacity=1, use_multiprocess=False)
        loader_infer.set_sample_generator(infer_load, batch_size=1, places=Place)
        process_image(model, loader_infer, result)'''
        validation(val_load, model, 15)
示例#9
0
    def _initialize(self):
        """
        initialize with the necessary elements
        """
        assets_path = os.path.join(self.directory, "assets")
        gen_checkpoint_path = os.path.join(assets_path, "ernie_gen")
        ernie_cfg_path = os.path.join(assets_path, 'ernie_config.json')
        with open(ernie_cfg_path, encoding='utf8') as ernie_cfg_file:
            ernie_cfg = dict(json.loads(ernie_cfg_file.read()))
        ernie_vocab_path = os.path.join(assets_path, 'vocab.txt')
        with open(ernie_vocab_path, encoding='utf8') as ernie_vocab_file:
            ernie_vocab = {
                j.strip().split('\t')[0]: i
                for i, j in enumerate(ernie_vocab_file.readlines())
            }

        with fluid.dygraph.guard(fluid.CPUPlace()):
            with fluid.unique_name.guard():
                self.model = ErnieModelForGeneration(ernie_cfg)
                finetuned_states, _ = D.load_dygraph(gen_checkpoint_path)
                self.model.set_dict(finetuned_states)

        self.tokenizer = ErnieTokenizer(ernie_vocab)
        self.rev_dict = {v: k for k, v in self.tokenizer.vocab.items()}
        self.rev_dict[self.tokenizer.pad_id] = ''  # replace [PAD]
        self.rev_dict[self.tokenizer.unk_id] = ''  # replace [PAD]
        self.rev_lookup = np.vectorize(lambda i: self.rev_dict[i])
示例#10
0
def load_parameters(model,
                    optimizer=None,
                    checkpoint_dir=None,
                    iteration=None,
                    checkpoint_path=None):
    """Load a specific model checkpoint from disk. 

    Args:
        model (obj): model to load parameters.
        optimizer (obj, optional): optimizer to load states if needed.
            Defaults to None.
        checkpoint_dir (str, optional): the directory where checkpoint is saved.
        iteration (int, optional): if specified, load the specific checkpoint,
            if not specified, load the latest one. Defaults to None.
        checkpoint_path (str, optional): if specified, load the checkpoint
            stored in the checkpoint_path and the argument 'checkpoint_dir' will 
            be ignored. Defaults to None. 

    Returns:
        iteration (int): number of iterations that the loaded checkpoint has 
            been trained.
    """
    if checkpoint_path is not None:
        iteration = int(os.path.basename(checkpoint_path).split("-")[-1])
    elif checkpoint_dir is not None:
        if iteration is None:
            iteration = _load_latest_checkpoint(checkpoint_dir)
        if iteration == 0:
            return iteration
        checkpoint_path = os.path.join(checkpoint_dir,
                                       "step-{}".format(iteration))
    else:
        raise ValueError(
            "At least one of 'checkpoint_dir' and 'checkpoint_path' should be specified!"
        )

    local_rank = dg.parallel.Env().local_rank
    model_dict, optimizer_dict = dg.load_dygraph(checkpoint_path)

    state_dict = model.state_dict()

    # cast to desired data type, for mixed-precision training/inference.
    for k, v in model_dict.items():
        if k in state_dict and convert_np_dtype(
                v.dtype) != state_dict[k].dtype:
            model_dict[k] = v.astype(state_dict[k].numpy().dtype)

    model.set_dict(model_dict)

    print("[checkpoint] Rank {}: loaded model from {}.pdparams".format(
        local_rank, checkpoint_path))

    if optimizer and optimizer_dict:
        optimizer.set_dict(optimizer_dict)
        print("[checkpoint] Rank {}: loaded optimizer state from {}.pdopt".
              format(local_rank, checkpoint_path))

    return iteration
示例#11
0
def load_checkpoint(step, model_path):
    model_dict, opti_dict = dg.load_dygraph(os.path.join(model_path, step))
    new_state_dict = OrderedDict()
    for param in model_dict:
        if param.startswith('_layers.'):
            new_state_dict[param[8:]] = model_dict[param]
        else:
            new_state_dict[param] = model_dict[param]
    return new_state_dict, opti_dict
示例#12
0
def load_model(init_model, model_path):
    if os.path.exists(model_path + ".pdparams"):
        logging.info("load model from {}".format(model_path))
        start_time = time.time()
        sd, _ = D.load_dygraph(model_path)
        init_model.set_dict(sd)
        logging.info("cost time: %.4fs" % (time.time() - start_time))
    else:
        logging.info("cannot find model file: {}".format(model_path + ".pdparams"))
示例#13
0
def load_D(path='data/anime-biggan-256px-run39-607250.discriminator'):
    place = fluid.CUDAPlace(fluid.dygraph.ParallelEnv().dev_id)
    fluid.enable_dygraph(place)

    discriminator = Discriminator(n_class=1000,
                                  chn=96,
                                  blocks_with_attention="B2",
                                  resolution=256)
    discriminator.set_dict(dg.load_dygraph(path)[0])
    model_cache.D = discriminator
示例#14
0
def load_G(path='data/anime-biggan-256px-run39-607250.generator'):
    place = fluid.CUDAPlace(fluid.dygraph.ParallelEnv().dev_id)
    fluid.enable_dygraph(place)

    generator = Generator(code_dim=140,
                          n_class=1000,
                          chn=96,
                          blocks_with_attention="B5",
                          resolution=256)
    generator.set_dict(dg.load_dygraph(path)[0])
    model_cache.G = generator
示例#15
0
def load_model(init_model, model_path):
    """ 将训练得到的参数加载到paddle动态图模型结构中
    [in] init_model: 已构造好的模型结构
         model_path: str, 模型地址(去掉.pdparams后缀)
    """
    if os.path.exists(model_path + ".pdparams"):
        logging.info("load model from {}".format(model_path))
        start_time = time.time()
        sd, _ = D.load_dygraph(model_path)
        init_model.set_dict(sd)
        logging.info("cost time: %.4fs" % (time.time() - start_time))
    else:
        logging.info("cannot find model file: {}".format(model_path + ".pdparams"))
示例#16
0
def load_wavenet(model, path):
    wavenet_dict, _ = dg.load_dygraph(path)
    encoder_dict = OrderedDict()
    teacher_dict = OrderedDict()
    for k, v in wavenet_dict.items():
        if k.startswith("encoder."):
            encoder_dict[k.split('.', 1)[1]] = v
        else:
            # k starts with "decoder."
            teacher_dict[k.split('.', 1)[1]] = v

    model.encoder.set_dict(encoder_dict)
    model.teacher.set_dict(teacher_dict)
    print("loaded the encoder part and teacher part from wavenet model.")
示例#17
0
def build_body_model(body_points=25):
    if body_points == 25:
        body_model = BodyPose25()
        state_dict, _ = dg.load_dygraph(
            '/home/aistudio/openpose/pretrained_models/pose_body_25_iter_584000.pdparams'
        )
        body_model.load_dict(state_dict)
    elif body_points == 18:
        body_model = BodyPose18()
        state_dict, _ = dg.load_dygraph(
            '/home/aistudio/openpose/pretrained_models/pose_body_18_iter_440000.pdparams'
        )
        body_model.load_dict(state_dict)
    elif body_points == 15:
        body_model = BodyPose15()
        state_dict, _ = dg.load_dygraph(
            '/home/aistudio/openpose/pretrained_models/pose_body_15_iter_160000.pdparams'
        )
        body_model.load_dict(state_dict)
    else:
        raise ValueError()

    return body_model
示例#18
0
    def from_pretrained(cls,
                        pretrain_dir_or_url,
                        force_download=False,
                        **kwargs):
        if not Path(pretrain_dir_or_url).exists(
        ) and pretrain_dir_or_url in cls.resource_map:
            url = cls.resource_map[pretrain_dir_or_url]
            logger.info('get pretrain dir from %s' % url)
            pretrain_dir = Path(_fetch_from_remote(url, force_download))
        else:
            logger.info('pretrain dir %s not in %s, read from local' %
                        (pretrain_dir_or_url, repr(cls.resource_map)))
            pretrain_dir = Path(pretrain_dir_or_url)

        if not pretrain_dir.exists():
            raise ValueError('pretrain dir not found: %s' % pretrain_dir)
        param_path = pretrain_dir / 'params'
        state_dict_path = pretrain_dir / 'saved_weights'
        config_path = pretrain_dir / 'ernie_config.json'

        if not config_path.exists():
            raise ValueError('config path not found: %s' % config_path)
        name_prefix = kwargs.pop('name', None)
        cfg_dict = dict(json.loads(config_path.open().read()), **kwargs)
        model = cls(cfg_dict, name=name_prefix)

        logger.info('loading pretrained model from %s' % pretrain_dir)

        # if os.path.exists(param_path):
        #    raise NotImplementedError()
        #    logger.debug('load pretrained weight from program state')
        #    F.io.load_program_state(param_path) #buggy in dygraph.gurad, push paddle to fix
        if state_dict_path.with_suffix('.pdparams').exists():
            m, _ = D.load_dygraph(state_dict_path.as_posix())
            for k, v in model.state_dict().items():
                if k not in m:
                    logger.warn('param:%s not set in pretrained model, skip' %
                                k)
                    m[k] = v
            model.set_dict(m)
        else:
            raise ValueError('weight file not found in pretrain dir: %s' %
                             pretrain_dir)
        return model
示例#19
0
    def from_pretrained(cls,
                        pretrain_dir_or_url,
                        force_download=False,
                        **kwargs):
        if pretrain_dir_or_url in cls.resource_map:
            url = cls.resource_map[pretrain_dir_or_url]
            log.info('get pretrain dir from %s' % url)
            pretrain_dir = _fetch_from_remote(url, force_download)
        else:
            log.info('pretrain dir %s not in %s, read from local' %
                     (pretrain_dir_or_url, repr(cls.resource_map)))
            pretrain_dir = pretrain_dir_or_url

        if not os.path.exists(pretrain_dir):
            raise ValueError('pretrain dir not found: %s' % pretrain_dir)
        param_path = os.path.join(pretrain_dir, 'params')
        state_dict_path = os.path.join(pretrain_dir, 'saved_weights')
        config_path = os.path.join(pretrain_dir, 'ernie_config.json')

        if not os.path.exists(config_path):
            raise ValueError('config path not found: %s' % config_path)
        name_prefix = kwargs.pop('name', None)
        cfg_dict = dict(json.loads(open(config_path).read()), **kwargs)
        model = cls(cfg_dict, name=name_prefix)

        log.info('loading pretrained model from %s' % pretrain_dir)

        #if os.path.exists(param_path):
        #    raise NotImplementedError()
        #    log.debug('load pretrained weight from program state')
        #    F.io.load_program_state(param_path) #buggy in dygraph.gurad, push paddle to fix
        if os.path.exists(state_dict_path + '.pdparams'):
            m, _ = D.load_dygraph(state_dict_path)
            for k, v in model.state_dict().items():
                if k not in m:
                    log.warn('param:%s not set in pretrained model, skip' % k)
                    m[k] = v  # FIXME: no need to do this in the future
            model.set_dict(m)
        else:
            raise ValueError('weight file not found in pretrain dir: %s' %
                             pretrain_dir)
        return model
                                a = L.argmax(logits, -1) == label
                                acc.append(a.numpy())
                            model.train()
                        log.debug('acc %.5f' % np.concatenate(acc).mean())
            if args.save_dir is not None:
                F.save_dygraph(model.state_dict(), args.save_dir)
        else:
            feature_column = propeller.data.FeatureColumns([
                propeller.data.TextColumn('seg_a',
                                          unk_id=tokenizer.unk_id,
                                          vocab_dict=tokenizer.vocab,
                                          tokenizer=tokenizer.tokenize),
            ])

            assert args.save_dir is not None
            sd, _ = FD.load_dygraph(args.save_dir)
            model.set_dict(sd)
            model.eval()

            def map_fn(seg_a):
                seg_a, _ = tokenizer.truncate(seg_a, [],
                                              seqlen=args.max_seqlen)
                sentence, segments = tokenizer.build_for_ernie(seg_a, [])
                return sentence, segments

            predict_ds = feature_column.build_dataset_from_stdin('predict') \
                                           .map(map_fn) \
                                           .padded_batch(args.bsz)
            shapes = ([-1, args.max_seqlen], [-1, args.max_seqlen])
            types = ('int64', 'int64')
            predict_ds.data_shapes = shapes
示例#21
0
             if args.use_data_parallel else
             fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace())
    with dg.guard(place) as g:
        pyreader = fluid.io.PyReader(capacity=10, return_list=True)
        pyreader.decorate_batch_generator(data_loader, place)

        model = make_deepvoice3_from_hparams(hparams)
        optimizer, clipper = make_optimizer_from_hparams(hparams)
        print("Log event path: {}".format(tensorboard_dir))
        writer = SummaryWriter(tensorboard_dir) if local_rank == 0 else None
        criterion = make_loss_from_hparams(hparams)

        # loading saved model
        if args.train_postnet_only or args.train_seq2seq_only:
            assert args.checkpoint is not None, \
                "you must train part of the model from a trained whole model"
        if args.train_postnet_only:
            assert hparams.use_decoder_state_for_postnet_input is False, \
                "when training only the postnet, there is no decoder states"

        if args.checkpoint is not None:
            model_dict, optimizer_dict = dg.load_dygraph(args.checkpoint)

        if args.use_data_parallel:
            strategy = dg.parallel.prepare_context()
            model = MyDataParallel(model, strategy)

        train_model(model, pyreader, criterion, optimizer, clipper, writer,
                    args, hparams)
    print("Done!")
示例#22
0
        print("  {}: {}".format(k, v))

    # Load preset if specified
    if preset is not None:
        with io.open(preset) as f:
            hparams.parse_json(f.read())
    # Override hyper parameters
    hparams.parse(args.hparams)
    assert hparams.name == "deepvoice3"

    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
    with dg.guard(place):
        # Model
        model = make_deepvoice3_from_hparams(hparams)
        dry_run(model)
        model_dict, _ = dg.load_dygraph(args.checkpoint)
        model.set_dict(model_dict)

        checkpoint_name = splitext(basename(checkpoint_path))[0]

        model.seq2seq.decoder.max_decoder_steps = max_decoder_steps

        if not os.path.exists(dst_dir):
            os.makedirs(dst_dir)
        with io.open(text_list_file_path, "rt", encoding="utf-8") as f:
            lines = f.readlines()
            for idx, line in enumerate(lines):
                text = line[:-1]
                words = nltk.word_tokenize(text)
                waveform, alignment, _, _ = tts(model,
                                                text,
示例#23
0
 # print(np.percentile([len(row[0]) for row in dev_features], [0, 50, 95, 99, 100]))
 # to batch
 print('start training...')
 bst_f1, global_step = 0, 0
 args.max_steps = (len(train_features) // args.bsz + 1) * args.epochs
 try:
     place = F.CUDAPlace(0)
 except:
     place = F.CPUPlace()
 with FD.guard(place):
     if 'ernie' in args.from_pretrained:
         model = ErnieModelForSequenceClassification.from_pretrained(
             args.from_pretrained, num_labels=2, name='')
         if args.init_checkpoint is not None:
             print('loading checkpoint from %s' % args.init_checkpoint)
             sd, _ = FD.load_dygraph(args.init_checkpoint)
             model.set_dict(sd)
     elif 'wwm' in args.from_pretrained:
         config = json.load(
             open(os.path.join(args.from_pretrained, 'ernie_config.json'),
                  'rt',
                  encoding='utf-8'))
         config['num_labels'] = 2
         model = ErnieModelForSequenceClassification(config)
         # print(model)
         print('loading checkpoint from %s' % 'chinese_roberta_wwm_pp')
         sd, _ = FD.load_dygraph('%s/roberta_wwm.pdparams' %
                                 args.from_pretrained)
         for k, v in model.state_dict().items():
             if k not in sd:
                 print('param:%s not set in pretrained model, skip' % k)
示例#24
0
    # print(np.percentile([len(row[0]) for row in train_features], [0, 50, 95, 99, 100]))
    # print(np.percentile([len(row[0]) for row in dev_features], [0, 50, 95, 99, 100]))
    # to batch
    try:
        place = F.CUDAPlace(0)
    except:
        place = F.CPUPlace()
    with FD.guard(place):
        if 'wwm' in args.from_pretrained:
            config = json.load(open(os.path.join(args.from_pretrained, 'ernie_config.json'), 'rt', encoding='utf-8'))
            config['num_labels'] = 2
            model = ErnieModelForSequenceClassification(config)
            # print(model)
            print('loading checkpoint from %s' % 'chinese_roberta_wwm_pp')
            sd, _ = FD.load_dygraph('%s/roberta_wwm.pdparams' % args.from_pretrained)
            for k, v in model.state_dict().items():
                if k not in sd:
                    print('param:%s not set in pretrained model, skip' % k)
                    sd[k] = v # FIXME: no need to do this in the future
            model.set_dict(sd)
        else:       
            model = ErnieModelForSequenceClassification.from_pretrained(args.from_pretrained, num_labels=2, name='')
            if args.init_checkpoint is not None:
                print('loading checkpoint from %s' % args.init_checkpoint)
                sd, _ = FD.load_dygraph(args.init_checkpoint)
                model.set_dict(sd)

        test_batch_data = batchify(test_features, args.bsz, args.max_seqlen)
        if args.debug:
            print(len(test_batch_data))
示例#25
0
                        default=None,
                        help='inference model output directory')
    parser.add_argument('--init_checkpoint', type=str, default=None)
    parser.add_argument('--save_dir',
                        type=str,
                        default=None,
                        help='model output directory')
    parser.add_argument('--wd',
                        type=float,
                        default=0.01,
                        help='weight decay, aka L2 regularizer')

    args = parser.parse_args()

    place = F.CUDAPlace(D.parallel.Env().dev_id)
    D.guard(place).__enter__()

    ernie = ErnieModelForGeneration.from_pretrained(args.from_pretrained)
    tokenizer = ErnieTokenizer.from_pretrained(args.from_pretrained,
                                               mask_token=None)
    rev_dict = {v: k for k, v in tokenizer.vocab.items()}
    rev_dict[tokenizer.pad_id] = ''  # replace [PAD]
    rev_dict[tokenizer.unk_id] = ''  # replace [PAD]

    if args.init_checkpoint is not None:
        log.info('loading checkpoint from %s' % args.init_checkpoint)
        sd, _ = D.load_dygraph(args.init_checkpoint)
        ernie.set_dict(sd)

    seq2seq(ernie, tokenizer, args)
示例#26
0
    for epoch in range(EPOCH):
        for step, (ids_student, ids, sids,
                   labels) in enumerate(train_ds.start(place)):
            loss, logits = teacher_model(ids, labels=labels)
            loss.backward()
            if step % 10 == 0:
                print('[step %03d] teacher train loss %.5f lr %.3e' %
                      (step, loss.numpy(), opt.current_step_lr()))
            opt.minimize(loss, grad_clip=g_clip)
            teacher_model.clear_gradients()
            if step % 100 == 0:
                f1 = evaluate_teacher(teacher_model, dev_ds)
                print('teacher f1: %.5f' % f1)
    D.save_dygraph(teacher_model.state_dict(), './teacher_model')
else:
    state_dict, _ = D.load_dygraph('./teacher_model')
    teacher_model.set_dict(state_dict)
    f1 = evaluate_teacher(teacher_model, dev_ds)
    print('teacher f1: %.5f' % f1)

# 定义finetune student 模型所需要的超参数
SEQLEN = 256
BATCH = 100
EPOCH = 10
LR = 1e-4


def evaluate_student(model, dataset):
    all_pred, all_label = [], []
    with D.base._switch_tracer_mode_guard_(is_train=False):
        model.eval()
示例#27
0
    def finetune(
            self,
            train_path,
            dev_path=None,
            save_dir="ernie_gen_result",
            init_ckpt_path=None,
            use_gpu=True,
            max_steps=500,
            batch_size=8,
            max_encode_len=50,
            max_decode_len=50,
            learning_rate=5e-5,
            warmup_proportion=0.1,
            weight_decay=0.1,
            noise_prob=0,
            label_smooth=0,
            beam_width=5,
            length_penalty=1.0,
            log_interval=100,
            save_interval=200,
    ):
        """
        finetune with the specified dataset.

        Args:
            train_path(str): the train dataset path.
            dev_path(str): the dev dataset path.
            save_dir(str): the model params and dev dataset predict result save path.
            init_ckpt_path(str): incremental training load path.
            use_gpu(bool): use gpu or not.
            max_steps(int): max training steps.
            batch_size(int): the batch size.
            max_encode_len(int): the max encode length.
            max_decode_len(int): the max decode length.
            learning_rate(float): the learning rate.
            warmup_proportion(float): the warmup proportion.
            weight_decay(float): the weight decay magnitude.
            noise_prob(float): the nosie probability. see the ernie gen paper for details.
            label_smooth(float): the label smooth magnitude.
            beam_width(int): the beam size during evaluating the dev dataset.
            length_penalty(float): the length penalty during evaluating the dev dataset.
            log_interval(int): the log interval.
            save_interval(int): the save interval. dev set will be evaluated after saving.

        Return:
            result(dict): A Dictionary of shape::
                {
                    last_save_path(str): last model save path.
                    last_ppl(float): last model ppl.
                }
        """
        self.max_encode_len = max_encode_len
        self.max_decode_len = max_decode_len
        self.noise_prob = noise_prob

        place = F.CUDAPlace(0) if use_gpu else F.CPUPlace()

        with F.dygraph.guard(place):
            if init_ckpt_path is not None:
                logger.info('loading checkpoint from %s' % init_ckpt_path)
                sd, _ = D.load_dygraph(init_ckpt_path)
                self.model.set_dict(sd)

            feature_column = propeller.data.FeatureColumns([
                propeller.data.LabelColumn('id'),
                propeller.data.TextColumn(
                    'src',
                    unk_id=self.tokenizer.unk_id,
                    vocab_dict=self.tokenizer.vocab,
                    tokenizer=self.tokenizer.tokenize),
                propeller.data.TextColumn(
                    'tgt',
                    unk_id=self.tokenizer.unk_id,
                    vocab_dict=self.tokenizer.vocab,
                    tokenizer=self.tokenizer.tokenize),
            ])

            train_ds = feature_column.build_dataset('train', data_file=train_path, shuffle=False,
                                                    repeat=True, use_gz=False)\
                .map(self._map_fn).shuffle(10000).padded_batch(batch_size).map(self._after_padding)
            train_ds.data_shapes = [[None, None]] * 7 + [[None, None, None]
                                                         ] * 3 + [[None]]
            train_ds.data_types = ['int64'] * 11

            if dev_path:
                dev_ds = feature_column.build_dataset('dev', data_file=dev_path, shuffle=False,
                                                    repeat=False, use_gz=False) \
                    .map(self._map_fn) \
                    .padded_batch(1) \
                    .map(self._after_padding)
                dev_ds.data_shapes = [[None, None]] * 7 + [[None, None, None]
                                                           ] * 3 + [[None]]
                dev_ds.data_types = ['int64'] * 11

            vocab_size, _ = self.model.word_emb.weight.shape
            g_clip = F.clip.GradientClipByGlobalNorm(1.0)
            opt = AdamW(
                learning_rate=LinearDecay(learning_rate,
                                          int(warmup_proportion * max_steps),
                                          max_steps),
                parameter_list=self.model.parameters(),
                weight_decay=weight_decay,
                grad_clip=g_clip)

            loss = None

            save_path = None
            ppl = None

            if save_dir and not os.path.exists(save_dir):
                os.makedirs(save_dir)
            for step, data in enumerate(train_ds.start(place)):
                (example_id, src_ids, src_sids, src_pids, tgt_ids, tgt_sids,
                 tgt_pids, attn_ids, mask_src_2_src, mask_tgt_2_srctgt,
                 mask_attn_2_srctgtattn, tgt_labels) = data

                _, __, info = self.model(
                    src_ids,
                    sent_ids=src_sids,
                    pos_ids=src_pids,
                    attn_bias=mask_src_2_src,
                    encode_only=True)
                cached_k, cached_v = info['caches']
                _, __, info = self.model(
                    tgt_ids,
                    sent_ids=tgt_sids,
                    pos_ids=tgt_pids,
                    attn_bias=mask_tgt_2_srctgt,
                    past_cache=(cached_k, cached_v),
                    encode_only=True)
                cached_k2, cached_v2 = info['caches']
                past_cache_k = [
                    L.concat([k, k2], 1) for k, k2 in zip(cached_k, cached_k2)
                ]
                past_cache_v = [
                    L.concat([v, v2], 1) for v, v2 in zip(cached_v, cached_v2)
                ]
                if label_smooth > 0.:
                    tgt_labels = L.label_smooth(
                        F.one_hot(tgt_labels, vocab_size), epsilon=label_smooth)
                loss, _, __ = self.model(
                    attn_ids,
                    sent_ids=tgt_sids,
                    pos_ids=tgt_pids,
                    attn_bias=mask_attn_2_srctgtattn,
                    past_cache=(past_cache_k, past_cache_v),
                    tgt_labels=tgt_labels,
                    tgt_pos=L.where(attn_ids == self.tokenizer.vocab['[MASK]']))

                loss.backward()
                opt.minimize(loss)
                self.model.clear_gradients()

                if step % log_interval == 0:
                    loss_np = loss.numpy()
                    ppl = np.exp(loss_np)
                    logger.info(
                        '[step %d / %d]train loss %.5f, ppl %.5f, elr %.3e' %
                        (step, max_steps, loss_np, ppl, opt.current_step_lr()))
                if save_dir and step % save_interval == 0 and step > 0:
                    loss_np = loss.numpy()
                    ppl = np.exp(loss_np)
                    save_name = "step_%s_ppl_%.5f" % (step, ppl)
                    save_path = os.path.join(save_dir, save_name)
                    logger.info("save the model in %s" % save_path)
                    F.save_dygraph(self.model.state_dict(), save_path)

                    if dev_path:
                        logger.info('evaluating...')
                        res = self._evaluate(dev_ds, place, beam_width,
                                             length_penalty)
                        output_path = os.path.join(
                            save_dir, "step_%s_ppl_%.5f.txt" % (step, ppl))
                        logger.info(
                            'save the predict result in %s' % output_path)
                        with open(output_path, 'w') as fout:
                            fout.write(('\n'.join(res)))

                if step > max_steps:
                    break

            if loss:
                loss_np = loss.numpy()
                ppl = np.exp(loss_np)
                logger.info('[final step %d]train loss %.5f, ppl %.5f, elr %.3e'
                            % (step, loss_np, ppl, opt.current_step_lr()))
                if save_dir:
                    save_name = "step_%s_ppl_%.5f" % (step, ppl)
                    save_path = os.path.join(save_dir, save_name)
                    logger.info("save the model in %s" % save_path)
                    F.save_dygraph(self.model.state_dict(), save_path)

                    if dev_path:
                        logger.info('evaluating...')
                        res = self._evaluate(dev_ds, place, beam_width,
                                             length_penalty)
                        output_path = os.path.join(
                            save_dir, "step_%s_ppl_%.5f.txt" % (step, ppl))
                        logger.info(
                            'save the predict result in %s' % output_path)
                        with open(output_path, 'w') as fout:
                            fout.write(('\n'.join(res)))

            result = {
                "last_save_path": "%s.pdparams" % save_path,
                "last_ppl": ppl[0],
            }

            return result