示例#1
0
                'learning_rate_0')
            propeller.summary.scalar('lr', scheduled_lr)
            predictions = [
                logits,
            ]
            train_hooks = [lr_step_hook]

    return propeller.ModelSpec(loss=loss,
                               mode=mode,
                               metrics=metrics,
                               predictions=predictions,
                               train_hooks=train_hooks)


if __name__ == '__main__':
    parser = propeller.ArgumentParser('DAN model with Paddle')
    parser.add_argument('--do_predict', action='store_true')
    parser.add_argument('--max_seqlen', type=int, default=128)
    parser.add_argument('--data_dir', type=str, required=True)
    parser.add_argument('--from_pretrained', type=str, required=True)
    parser.add_argument('--warm_start_from', type=str)
    parser.add_argument('--epoch', type=int, default=3)
    parser.add_argument('--use_amp', action='store_true')

    args = parser.parse_args()

    P.enable_static()

    if not os.path.exists(args.from_pretrained):
        raise ValueError('--from_pretrained not found: %s' %
                         args.from_pretrained)
示例#2
0
            weight_decay=self.config.weight_decay,
            scheduler="linear_warmup_decay",
        )
        propeller.summary.scalar('lr', scheduled_lr)

    def metrics(self, predictions, labels):
        predictions = L.argmax(predictions, axis=1)
        labels = L.argmax(labels, axis=1)
        #predictions = L.unsqueeze(predictions, axes=[1])
        acc = propeller.metrics.Acc(labels, predictions)
        #auc = propeller.metrics.Auc(labels, predictions)
        return {'acc': acc}


if __name__ == '__main__':
    parser = propeller.ArgumentParser('distill model with ERNIE')
    parser.add_argument('--max_seqlen', type=int, default=128)
    parser.add_argument('--vocab_file', type=str, required=True)
    parser.add_argument('--teacher_vocab_file', type=str, required=True)
    parser.add_argument('--teacher_max_seqlen', type=int, default=128)
    parser.add_argument('--data_dir', type=str)
    parser.add_argument('--server_batch_size', type=int, default=64)
    parser.add_argument('--num_coroutine', type=int, default=1)
    parser.add_argument('--teacher_host', type=str, required=True)
    args = parser.parse_args()
    run_config = propeller.parse_runconfig(args)
    hparams = propeller.parse_hparam(args)

    teacher_vocab = {
        j.strip().split(b'\t')[0].decode('utf8'): i
        for i, j in enumerate(open(args.teacher_vocab_file, 'rb'))
示例#3
0
            startup_prog=F.default_startup_program(),
            weight_decay=self.config.weight_decay,
            scheduler="linear_warmup_decay",)
        propeller.summary.scalar('lr', scheduled_lr)


    def metrics(self, predictions, labels):
        predictions = L.argmax(predictions, axis=1)
        labels = L.argmax(labels, axis=1)
        #predictions = L.unsqueeze(predictions, axes=[1])
        acc = propeller.metrics.Acc(labels, predictions)
        #auc = propeller.metrics.Auc(labels, predictions)
        return {'acc': acc}

if __name__ == '__main__':
    parser = propeller.ArgumentParser('Distill model with Paddle')
    parser.add_argument('--max_seqlen', type=int, default=128)
    parser.add_argument('--vocab_file', type=str, required=True)
    parser.add_argument('--unsupervise_data_dir', type=str, required=True)
    parser.add_argument('--data_dir', type=str)
    args = parser.parse_args()
    run_config = propeller.parse_runconfig(args)
    hparams = propeller.parse_hparam(args)

    vocab = {j.strip().split(b'\t')[0].decode('utf8'): i for i, j in enumerate(open(args.vocab_file, 'rb'))}
    unk_id = vocab['[UNK]']

    char_tokenizer = utils.data.CharTokenizer(vocab.keys())
    space_tokenizer = utils.data.SpaceTokenizer(vocab.keys())

    supervise_feature_column = propeller.data.FeatureColumns([
import paddle.fluid.dygraph as FD
import paddle.fluid.layers as L

from propeller import log
import propeller.paddle as propeller

log.setLevel(logging.DEBUG)
logging.getLogger().setLevel(logging.DEBUG)

#from model.bert import BertConfig, BertModelLayer
from ernie.modeling_ernie import ErnieModel, ErnieModelForSequenceClassification
from ernie.tokenizing_ernie import ErnieTokenizer, ErnieTinyTokenizer
from ernie.optimization import AdamW, LinearDecay

if __name__ == '__main__':
    parser = propeller.ArgumentParser('classify model with ERNIE')
    parser.add_argument('--from_pretrained',
                        type=str,
                        required=True,
                        help='pretrained model directory or tag')
    parser.add_argument(
        '--max_seqlen',
        type=int,
        default=128,
        help='max sentence length, should not greater than 512')
    parser.add_argument('--bsz', type=int, default=32, help='batchsize')
    parser.add_argument('--data_dir',
                        type=str,
                        required=True,
                        help='data directory includes train / develop data')
    parser.add_argument(
示例#5
0
        return ds

    def after(*features):
        return utils.data.expand_dims(*features)

    dataset = propeller.data.Dataset.from_generator_func(stdin_gen)
    dataset = read_bio_data(dataset)
    dataset = reseg_token_label(dataset)
    dataset = convert_to_ids(dataset)
    dataset = dataset.padded_batch(batch_size).map(after)
    dataset.name = name
    return dataset


if __name__ == '__main__':
    parser = propeller.ArgumentParser('NER model with ERNIE')
    parser.add_argument('--max_seqlen', type=int, default=128)
    parser.add_argument('--data_dir', type=str, required=True)
    parser.add_argument('--vocab_file', type=str, required=True)
    parser.add_argument('--do_predict', action='store_true')
    parser.add_argument('--use_sentence_piece_vocab', action='store_true')
    parser.add_argument('--warm_start_from', type=str)
    args = parser.parse_args()
    run_config = propeller.parse_runconfig(args)
    hparams = propeller.parse_hparam(args)

    vocab = {
        j.strip().split('\t')[0]: i
        for i, j in enumerate(open(args.vocab_file, 'r', encoding='utf8'))
    }
    tokenizer = utils.data.CharTokenizer(
示例#6
0
        scheduled_lr, _ = optimization(
            loss=loss,
            warmup_steps=int(self.run_config.max_steps *
                             self.hparam['warmup_proportion']),
            num_train_steps=self.run_config.max_steps,
            learning_rate=self.hparam['learning_rate'],
            train_program=F.default_main_program(),
            startup_prog=F.default_startup_program(),
            weight_decay=self.hparam['weight_decay'],
            scheduler="linear_warmup_decay",
        )
        propeller.summary.scalar('lr', scheduled_lr)


if __name__ == '__main__':
    parser = propeller.ArgumentParser('ranker model with ERNIE')
    parser.add_argument('--do_predict', action='store_true')
    parser.add_argument('--predict_model', type=str, default=None)
    parser.add_argument('--max_seqlen', type=int, default=128)
    parser.add_argument('--vocab_file', type=str, required=True)
    parser.add_argument('--data_dir', type=str, required=True)
    parser.add_argument('--warm_start_from', type=str)
    parser.add_argument('--sentence_piece_model', type=str, default=None)
    parser.add_argument('--word_dict', type=str, default=None)
    args = parser.parse_args()
    run_config = propeller.parse_runconfig(args)
    hparams = propeller.parse_hparam(args)

    vocab = {
        j.strip().split(b'\t')[0].decode('utf8'): i
        for i, j in enumerate(open(args.vocab_file, 'rb'))