def init_model(args, field, logger, world_size, device): logger.info(f'Initializing {args.model}') Model = getattr(models, args.model) model = Model(field, args) params = get_trainable_params(model) num_param = count_params(params) logger.info(f'{args.model} has {num_param:,} trainable parameters') model.to(device) if world_size > 1: logger.info(f'Wrapping model for distributed') model = DistributedDataParallel(model) model.params = params return model
def init_model(world_size): model = MultitaskQuestionAnsweringNetwork() if os.path.isfile('model.pth'): print('load pretrained model') model.load_state_dict(torch.load('model.pth')) else: print('new model ') params = get_trainable_params(model) num_param = count_params(params) print(f'model has {num_param:,} parameters') if world_size > 1: print(f'Wrapping model for distributed') model = DistributedDataParallel(model) model.params = params return model
def init_model(args, field, logger, world_size): logger.info(f'Initializing {args.model}') Model = getattr(models, args.model) model = Model(field, args) # 模型初始化 params = get_trainable_params(model) num_param = count_params(params) # 计算模型参数个数 logger.info(f'{args.model} has {num_param:,} parameters') if args.gpus[0] > -1: model.cuda() # 是否使用gpu设置的地方,如果设置为-1或者更负,就不使用gpu,只用cpu if world_size > 1: logger.info(f'Wrapping model for distributed') model = DistributedDataParallel(model) model.params = params return model
def __init__(self, field, args): super().__init__() self.field = field self.args = args self.pad_idx = self.field.vocab.stoi[self.field.pad_token] def dp(args): return args.dropout_ratio if args.rnn_layers > 1 else 0. if self.args.glove_and_char: self.encoder_embeddings = Embedding(field, args.dimension, dropout=args.dropout_ratio, project=not args.cove) if self.args.cove or self.args.intermediate_cove: self.cove = MTLSTM(model_cache=args.embeddings, layer0=args.intermediate_cove, layer1=args.cove) cove_params = get_trainable_params(self.cove) for p in cove_params: p.requires_grad = False cove_dim = int(args.intermediate_cove) * 600 + int(args.cove) * 600 + 400 # the last 400 is for GloVe and char n-gram embeddings self.project_cove = Feedforward(cove_dim, args.dimension) if -1 not in self.args.elmo: options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" self.elmo = Elmo(options_file, weight_file, 3, dropout=0.0, do_layer_norm=False) elmo_params = get_trainable_params(self.elmo) for p in elmo_params: p.requires_grad = False elmo_dim = 1024 * len(self.args.elmo) self.project_elmo = Feedforward(elmo_dim, args.dimension) if self.args.glove_and_char: self.project_embeddings = Feedforward(2 * args.dimension, args.dimension, dropout=0.0) self.decoder_embeddings = Embedding(field, args.dimension, dropout=args.dropout_ratio, project=True) self.bilstm_before_coattention = PackedLSTM(args.dimension, args.dimension, batch_first=True, bidirectional=True, num_layers=1, dropout=0) self.coattention = CoattentiveLayer(args.dimension, dropout=0.3) dim = 2*args.dimension + args.dimension + args.dimension self.context_bilstm_after_coattention = PackedLSTM(dim, args.dimension, batch_first=True, dropout=dp(args), bidirectional=True, num_layers=args.rnn_layers) self.self_attentive_encoder_context = TransformerEncoder(args.dimension, args.transformer_heads, args.transformer_hidden, args.transformer_layers, args.dropout_ratio) self.bilstm_context = PackedLSTM(args.dimension, args.dimension, batch_first=True, dropout=dp(args), bidirectional=True, num_layers=args.rnn_layers) self.question_bilstm_after_coattention = PackedLSTM(dim, args.dimension, batch_first=True, dropout=dp(args), bidirectional=True, num_layers=args.rnn_layers) self.self_attentive_encoder_question = TransformerEncoder(args.dimension, args.transformer_heads, args.transformer_hidden, args.transformer_layers, args.dropout_ratio) self.bilstm_question = PackedLSTM(args.dimension, args.dimension, batch_first=True, dropout=dp(args), bidirectional=True, num_layers=args.rnn_layers) self.self_attentive_decoder = TransformerDecoder(args.dimension, args.transformer_heads, args.transformer_hidden, args.transformer_layers, args.dropout_ratio) self.dual_ptr_rnn_decoder = DualPtrRNNDecoder(args.dimension, args.dimension, dropout=args.dropout_ratio, num_layers=args.rnn_layers) self.generative_vocab_size = min(len(field.vocab), args.max_generative_vocab) self.out = nn.Linear(args.dimension, self.generative_vocab_size) self.dropout = nn.Dropout(0.4)
def __init__(self, field, args): super().__init__() self.field = field self.args = args self.pad_idx = self.field.vocab.stoi[self.field.pad_token] def dp(args): return args.dropout_ratio if args.rnn_layers > 1 else 0. self.encoder_embeddings = Embedding(field, args.dimension, dropout=args.dropout_ratio, project=not args.cove) self.decoder_embeddings = Embedding(field, args.dimension, dropout=args.dropout_ratio, project=True) if self.args.cove or self.args.intermediate_cove: self.cove = MTLSTM(model_cache=args.embeddings, layer0=args.intermediate_cove, layer1=args.cove) cove_params = get_trainable_params(self.cove) for p in cove_params: p.requires_grad = False cove_dim = int(args.intermediate_cove) * 600 + int( args.cove ) * 600 + 400 # the last 400 is for GloVe and char n-gram embeddings self.project_cove = Feedforward(cove_dim, args.dimension) self.bilstm_before_coattention = PackedLSTM(args.dimension, args.dimension, batch_first=True, bidirectional=True, num_layers=1, dropout=0) self.coattention = CoattentiveLayer(args.dimension, dropout=0.3) dim = 2 * args.dimension + args.dimension + args.dimension self.context_bilstm_after_coattention = PackedLSTM( dim, args.dimension, batch_first=True, dropout=dp(args), bidirectional=True, num_layers=args.rnn_layers) self.self_attentive_encoder_context = TransformerEncoder( args.dimension, args.transformer_heads, args.transformer_hidden, args.transformer_layers, args.dropout_ratio) self.bilstm_context = PackedLSTM(args.dimension, args.dimension, batch_first=True, dropout=dp(args), bidirectional=True, num_layers=args.rnn_layers) self.question_bilstm_after_coattention = PackedLSTM( dim, args.dimension, batch_first=True, dropout=dp(args), bidirectional=True, num_layers=args.rnn_layers) self.self_attentive_encoder_question = TransformerEncoder( args.dimension, args.transformer_heads, args.transformer_hidden, args.transformer_layers, args.dropout_ratio) self.bilstm_question = PackedLSTM(args.dimension, args.dimension, batch_first=True, dropout=dp(args), bidirectional=True, num_layers=args.rnn_layers) self.self_attentive_decoder = TransformerDecoder( args.dimension, args.transformer_heads, args.transformer_hidden, args.transformer_layers, args.dropout_ratio) self.dual_ptr_rnn_decoder = DualPtrRNNDecoder( args.dimension, args.dimension, dropout=args.dropout_ratio, num_layers=args.rnn_layers) self.generative_vocab_size = min(len(field.vocab), args.max_generative_vocab) self.out = nn.Linear(args.dimension, self.generative_vocab_size) self.dropout = nn.Dropout(0.4)