예제 #1
0
def init_model(args, field, logger, world_size, device):
    logger.info(f'Initializing {args.model}')
    Model = getattr(models, args.model)
    model = Model(field, args)
    params = get_trainable_params(model)
    num_param = count_params(params)
    logger.info(f'{args.model} has {num_param:,} trainable parameters')

    model.to(device)
    if world_size > 1:
        logger.info(f'Wrapping model for distributed')
        model = DistributedDataParallel(model)

    model.params = params
    return model
예제 #2
0
파일: train.py 프로젝트: shaogx/decaNLP
def init_model(args, field, logger, world_size, device):
    logger.info(f'Initializing {args.model}')
    Model = getattr(models, args.model) 
    model = Model(field, args)
    params = get_trainable_params(model) 
    num_param = count_params(params)
    logger.info(f'{args.model} has {num_param:,} trainable parameters')

    model.to(device)
    if world_size > 1: 
        logger.info(f'Wrapping model for distributed')
        model = DistributedDataParallel(model)

    model.params = params
    return model
예제 #3
0
def init_model(world_size):

    model = MultitaskQuestionAnsweringNetwork()
    if os.path.isfile('model.pth'):
        print('load pretrained model')
        model.load_state_dict(torch.load('model.pth'))
    else:
        print('new model ')
    params = get_trainable_params(model)
    num_param = count_params(params)
    print(f'model  has {num_param:,} parameters')
    if world_size > 1:
        print(f'Wrapping model for distributed')
        model = DistributedDataParallel(model)
    model.params = params
    return model
예제 #4
0
파일: train.py 프로젝트: arfu2016/decaNLP
def init_model(args, field, logger, world_size):
    logger.info(f'Initializing {args.model}')
    Model = getattr(models, args.model) 
    model = Model(field, args)
    # 模型初始化
    params = get_trainable_params(model) 
    num_param = count_params(params)
    # 计算模型参数个数
    logger.info(f'{args.model} has {num_param:,} parameters')

    if args.gpus[0] > -1:
        model.cuda()
        # 是否使用gpu设置的地方,如果设置为-1或者更负,就不使用gpu,只用cpu
    if world_size > 1: 
        logger.info(f'Wrapping model for distributed')
        model = DistributedDataParallel(model)

    model.params = params
    return model
    def __init__(self, field, args):
        super().__init__()
        self.field = field
        self.args = args
        self.pad_idx = self.field.vocab.stoi[self.field.pad_token]
        def dp(args):
            return args.dropout_ratio if args.rnn_layers > 1 else 0.

        if self.args.glove_and_char:
        
            self.encoder_embeddings = Embedding(field, args.dimension, 
                dropout=args.dropout_ratio, project=not args.cove)
    
            if self.args.cove or self.args.intermediate_cove:
                self.cove = MTLSTM(model_cache=args.embeddings, layer0=args.intermediate_cove, layer1=args.cove)
                cove_params = get_trainable_params(self.cove) 
                for p in cove_params:
                    p.requires_grad = False
                cove_dim = int(args.intermediate_cove) * 600 + int(args.cove) * 600 + 400 # the last 400 is for GloVe and char n-gram embeddings
                self.project_cove = Feedforward(cove_dim, args.dimension)

        if -1 not in self.args.elmo:
            options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json"
            weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"
            self.elmo = Elmo(options_file, weight_file, 3, dropout=0.0, do_layer_norm=False)
            elmo_params = get_trainable_params(self.elmo)
            for p in elmo_params:
                p.requires_grad = False
            elmo_dim = 1024 * len(self.args.elmo)
            self.project_elmo = Feedforward(elmo_dim, args.dimension)
            if self.args.glove_and_char:
                self.project_embeddings = Feedforward(2 * args.dimension, args.dimension, dropout=0.0)
        
        self.decoder_embeddings = Embedding(field, args.dimension, 
            dropout=args.dropout_ratio, project=True)
    
        self.bilstm_before_coattention = PackedLSTM(args.dimension,  args.dimension,
            batch_first=True, bidirectional=True, num_layers=1, dropout=0)
        self.coattention = CoattentiveLayer(args.dimension, dropout=0.3)
        dim = 2*args.dimension + args.dimension + args.dimension

        self.context_bilstm_after_coattention = PackedLSTM(dim, args.dimension,
            batch_first=True, dropout=dp(args), bidirectional=True, 
            num_layers=args.rnn_layers)
        self.self_attentive_encoder_context = TransformerEncoder(args.dimension, args.transformer_heads, args.transformer_hidden, args.transformer_layers, args.dropout_ratio)
        self.bilstm_context = PackedLSTM(args.dimension, args.dimension,
            batch_first=True, dropout=dp(args), bidirectional=True, 
            num_layers=args.rnn_layers)

        self.question_bilstm_after_coattention = PackedLSTM(dim, args.dimension,
            batch_first=True, dropout=dp(args), bidirectional=True, 
            num_layers=args.rnn_layers)
        self.self_attentive_encoder_question = TransformerEncoder(args.dimension, args.transformer_heads, args.transformer_hidden, args.transformer_layers, args.dropout_ratio)
        self.bilstm_question = PackedLSTM(args.dimension, args.dimension,
            batch_first=True, dropout=dp(args), bidirectional=True, 
            num_layers=args.rnn_layers)

        self.self_attentive_decoder = TransformerDecoder(args.dimension, args.transformer_heads, args.transformer_hidden, args.transformer_layers, args.dropout_ratio)
        self.dual_ptr_rnn_decoder = DualPtrRNNDecoder(args.dimension, args.dimension,
            dropout=args.dropout_ratio, num_layers=args.rnn_layers)

        self.generative_vocab_size = min(len(field.vocab), args.max_generative_vocab)
        self.out = nn.Linear(args.dimension, self.generative_vocab_size)

        self.dropout = nn.Dropout(0.4)
예제 #6
0
    def __init__(self, field, args):
        super().__init__()
        self.field = field
        self.args = args
        self.pad_idx = self.field.vocab.stoi[self.field.pad_token]

        def dp(args):
            return args.dropout_ratio if args.rnn_layers > 1 else 0.

        self.encoder_embeddings = Embedding(field,
                                            args.dimension,
                                            dropout=args.dropout_ratio,
                                            project=not args.cove)
        self.decoder_embeddings = Embedding(field,
                                            args.dimension,
                                            dropout=args.dropout_ratio,
                                            project=True)

        if self.args.cove or self.args.intermediate_cove:
            self.cove = MTLSTM(model_cache=args.embeddings,
                               layer0=args.intermediate_cove,
                               layer1=args.cove)
            cove_params = get_trainable_params(self.cove)
            for p in cove_params:
                p.requires_grad = False
            cove_dim = int(args.intermediate_cove) * 600 + int(
                args.cove
            ) * 600 + 400  # the last 400 is for GloVe and char n-gram embeddings
            self.project_cove = Feedforward(cove_dim, args.dimension)

        self.bilstm_before_coattention = PackedLSTM(args.dimension,
                                                    args.dimension,
                                                    batch_first=True,
                                                    bidirectional=True,
                                                    num_layers=1,
                                                    dropout=0)
        self.coattention = CoattentiveLayer(args.dimension, dropout=0.3)
        dim = 2 * args.dimension + args.dimension + args.dimension

        self.context_bilstm_after_coattention = PackedLSTM(
            dim,
            args.dimension,
            batch_first=True,
            dropout=dp(args),
            bidirectional=True,
            num_layers=args.rnn_layers)
        self.self_attentive_encoder_context = TransformerEncoder(
            args.dimension, args.transformer_heads, args.transformer_hidden,
            args.transformer_layers, args.dropout_ratio)
        self.bilstm_context = PackedLSTM(args.dimension,
                                         args.dimension,
                                         batch_first=True,
                                         dropout=dp(args),
                                         bidirectional=True,
                                         num_layers=args.rnn_layers)

        self.question_bilstm_after_coattention = PackedLSTM(
            dim,
            args.dimension,
            batch_first=True,
            dropout=dp(args),
            bidirectional=True,
            num_layers=args.rnn_layers)
        self.self_attentive_encoder_question = TransformerEncoder(
            args.dimension, args.transformer_heads, args.transformer_hidden,
            args.transformer_layers, args.dropout_ratio)
        self.bilstm_question = PackedLSTM(args.dimension,
                                          args.dimension,
                                          batch_first=True,
                                          dropout=dp(args),
                                          bidirectional=True,
                                          num_layers=args.rnn_layers)

        self.self_attentive_decoder = TransformerDecoder(
            args.dimension, args.transformer_heads, args.transformer_hidden,
            args.transformer_layers, args.dropout_ratio)
        self.dual_ptr_rnn_decoder = DualPtrRNNDecoder(
            args.dimension,
            args.dimension,
            dropout=args.dropout_ratio,
            num_layers=args.rnn_layers)

        self.generative_vocab_size = min(len(field.vocab),
                                         args.max_generative_vocab)
        self.out = nn.Linear(args.dimension, self.generative_vocab_size)

        self.dropout = nn.Dropout(0.4)