def write(filename, filename2, rows): options_file = 'biomed_elmo_options.json' weight_file = 'biomed_elmo_weights.hdf5' elmo = Elmo(options_file, weight_file, 1, dropout=0) elmo = elmo.to("cuda") df = pd.read_csv(filename, nrows=rows) x = df["TEXT"].values y = df[[ 'No Finding', 'Enlarged Cardiomediastinum', 'Cardiomegaly', 'Airspace Opacity', 'Lung Lesion', 'Edema', 'Consolidation', 'Pneumonia', 'Atelectasis', 'Pneumothorax', 'Pleural Effusion', 'Pleural Other', 'Fracture', 'Support Devices' ]].values for i in range(rows): print(str(i) + " of " + str(rows)) text = x[i] token_list = [] for word in tokenizer(text): token_list.append(word) for n in range(MAXLEN - len(token_list)): token_list.append('PAD') token_list = np.array([token_list]) character_ids = batch_to_ids(token_list) character_ids = character_ids.to("cuda") word_emb = elmo(character_ids)['elmo_representations'][0] character_ids.to("cpu") word_emb = word_emb.data.cpu().numpy() label = y[i] batch = np.array([word_emb, label]) final_filename = "./Data/" + filename2 + "_instance_" + str(i) np.save(final_filename, batch)
def __init__(self, config): super(SeqPrototypicalNetwork, self).__init__() self.base_path = config['base_path'] self.early_stopping = config['early_stopping'] self.lr = config.get('meta_lr', 1e-3) self.weight_decay = config.get('meta_weight_decay', 0.0) if 'seq' in config['learner_model']: self.learner = RNNSequenceModel(config['learner_params']) elif 'mlp' in config['learner_model']: self.learner = MLPModel(config['learner_params']) elif 'bert' in config['learner_model']: self.learner = BERTSequenceModel(config['learner_params']) self.num_outputs = config['learner_params']['num_outputs'] self.vectors = config.get('vectors', 'glove') if self.vectors == 'elmo': self.elmo = Elmo( options_file= "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json", weight_file= "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5", num_output_representations=1, dropout=0, requires_grad=False) elif self.vectors == 'glove': self.glove = torchtext.vocab.GloVe(name='840B', dim=300) elif self.vectors == 'bert': self.bert_tokenizer = BertTokenizer.from_pretrained( 'bert-base-cased') self.loss_fn = {} for task in config['learner_params']['num_outputs']: self.loss_fn[task] = nn.CrossEntropyLoss(ignore_index=-1) if config.get('trained_learner', False): self.learner.load_state_dict( torch.load( os.path.join(self.base_path, 'saved_models', config['trained_learner']))) logger.info('Loaded trained learner model {}'.format( config['trained_learner'])) self.device = torch.device(config.get('device', 'cpu')) self.to(self.device) if self.vectors == 'elmo': self.elmo.to(self.device) self.initialize_optimizer_scheduler()
def __init__(self, options_file: str, weight_file: str, do_layer_norm: bool = False, dropout: float = 0.5, requires_grad: bool = False, projection_dim: int = None, num_output_representations: int = 1) -> None: super(ElmoTokenEmbedderWrapper, self).__init__() # other arguments can be passed in when needed self._elmo = Elmo(options_file=options_file, weight_file=weight_file, num_output_representations=num_output_representations, dropout=dropout)
def __init__(self, weight, vocab_size, embedding_dim, rnn_size_in=(1024 + 300, 1024 + 300), rnn_size_out=(300, 300), max_l=150, max_span_l=50, mlp_d=300, num_of_class=3, drop_r=0.5, activation_type='relu', use_extra_lex_feature=True): super(Model, self).__init__() self.glove_embd_layer = Embedding(vocab_size, embedding_dim, weight=weight, padding_index=0) options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" num_of_elmo = 1 self.max_l = max_l self.elmo_embd_layer = Elmo(options_file, weight_file, num_of_elmo, dropout=0) self.esim_layer = ESIM_SENT_WISE(rnn_size_in, rnn_size_out, max_l, max_span_l, mlp_d, num_of_class, drop_r, activation_type) self.use_extra_lex_feature = use_extra_lex_feature
def get_elmo(options_file: str, weight_file: str, num_output_representations: int, requires_grad: bool = False, do_layer_norm: bool = False, dropout: float = 0.5, vocab_to_cache: List[str] = None, keep_sentence_boundaries: bool = False, ): from allennlp.modules import Elmo key = (options_file, weight_file) old_elmo = global_elmo_cache.get(key) if old_elmo: # noinspection PyProtectedMember module = old_elmo._elmo_lstm options_file = None weight_file = None else: module = None ret = Elmo(options_file=options_file, weight_file=weight_file, num_output_representations=num_output_representations, requires_grad=requires_grad, do_layer_norm=do_layer_norm, dropout=dropout, vocab_to_cache=vocab_to_cache, keep_sentence_boundaries=keep_sentence_boundaries, module=module) if not old_elmo: global_elmo_cache[key] = ret return ret
def __init__(self, device, elmo_options, elmo_weights, elmo_size=None): super().__init__() self.device = device self.elmo_options = elmo_options self.elmo_weights = elmo_weights self.elmo_size = elmo_size self.elmo = Elmo(self.elmo_options, self.elmo_weights, 2, dropout=0)
class ElmoTokenEmbedderWrapper(TokenEmbedder): """ Wraps the Elmo call so that the parameters are saved correctly Forwards all calls to Elmo """ def __init__(self, options_file: str, weight_file: str, do_layer_norm: bool = False, dropout: float = 0.5, requires_grad: bool = False, projection_dim: int = None, num_output_representations: int = 1) -> None: super(ElmoTokenEmbedderWrapper, self).__init__() # other arguments can be passed in when needed self._elmo = Elmo(options_file=options_file, weight_file=weight_file, num_output_representations=num_output_representations, dropout=dropout) def get_output_dim(self): return self._elmo.get_output_dim() def forward(self, inputs: torch.Tensor) -> Dict[str, torch.Tensor]: # pylint: disable=arguments-differ return self._elmo(inputs) # this is also deferred to elmo @classmethod def from_params(cls, params: Params): self._elmo = Elmo.from_params(params) return self
def __init__(self, weight, vocab_size, embedding_dim, rnn_size_in=(1024 + 300, ), rnn_size_out=(1024, ), max_l=600, mlp_d=1024, num_of_class=3, drop_r=0.5, activation_type='relu'): super(Model, self).__init__() self.glove_embd_layer = Embedding(vocab_size, embedding_dim, weight=weight, padding_index=0) options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" num_of_elmo = 1 self.max_l = max_l self.elmo_embd_layer = Elmo(options_file, weight_file, num_of_elmo, dropout=0) self.esim_layer = FastMaxout(rnn_size_in, rnn_size_out, max_l, mlp_d, num_of_class, drop_r, activation_type)
def __init__(self, model='original_5b', bos_eos=(True, True), n_out=0, dropout=0.5, requires_grad=False): super().__init__() from allennlp.modules import Elmo self.elmo = Elmo(options_file=self.OPTION[model], weight_file=self.WEIGHT[model], num_output_representations=1, dropout=dropout, requires_grad=requires_grad, keep_sentence_boundaries=True) self.model = model self.bos_eos = bos_eos self.hidden_size = self.elmo.get_output_dim() self.n_out = n_out or self.hidden_size self.dropout = dropout self.requires_grad = requires_grad self.projection = nn.Linear(self.hidden_size, self.n_out, False) if self.hidden_size != n_out else nn.Identity()
def __init__(self, config): self.vectors = config.get('vectors', 'elmo') self.device = torch.device(config.get('device', 'cpu')) if self.vectors == 'elmo': self.elmo = Elmo( options_file= "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json", weight_file= "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5", num_output_representations=1, dropout=0, requires_grad=False) self.elmo.to(self.device) elif self.vectors == 'glove': self.glove = torchtext.vocab.GloVe(name='840B', dim=300) elif self.vectors == 'bert': self.bert_tokenizer = BertTokenizer.from_pretrained( 'bert-base-cased') self.bert = BertModel.from_pretrained('bert-base-cased') self.bert.to(self.device) logger.info('Nearest neighbor classifier instantiated')
def __init__(self, config): super(SeqMetaModel, self).__init__() self.base_path = config['base_path'] self.learner_lr = config.get('learner_lr', 1e-3) self.output_lr = config.get('output_lr', 0.1) if 'seq' in config['learner_model']: self.learner = RNNSequenceModel(config['learner_params']) elif 'mlp' in config['learner_model']: self.learner = MLPModel(config['learner_params']) elif 'bert' in config['learner_model']: self.learner = BERTSequenceModel(config['learner_params']) self.proto_maml = config.get('proto_maml', False) self.fomaml = config.get('fomaml', False) self.vectors = config.get('vectors', 'glove') if self.vectors == 'elmo': self.elmo = Elmo(options_file="https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json", weight_file="https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5", num_output_representations=1, dropout=0, requires_grad=False) elif self.vectors == 'glove': self.glove = torchtext.vocab.GloVe(name='840B', dim=300) elif self.vectors == 'bert': self.bert_tokenizer = BertTokenizer.from_pretrained('bert-base-cased') self.learner_loss = {} for task in config['learner_params']['num_outputs']: self.learner_loss[task] = nn.CrossEntropyLoss(ignore_index=-1) self.output_layer_weight = None self.output_layer_bias = None if config.get('trained_learner', False): self.learner.load_state_dict(torch.load( os.path.join(self.base_path, 'saved_models', config['trained_learner']) )) logger.info('Loaded trained learner model {}'.format(config['trained_learner'])) self.device = torch.device(config.get('device', 'cpu')) self.to(self.device) if self.proto_maml: logger.info('Initialization of output layer weights as per prototypical networks turned on') params = [p for p in self.learner.parameters() if p.requires_grad] self.learner_optimizer = optim.SGD(params, lr=self.learner_lr)
def from_params( # type: ignore cls, vocab: Vocabulary, params: Params) -> "BiattentiveClassificationNetwork": embedder_params = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params( vocab=vocab, params=embedder_params) embedding_dropout = params.pop("embedding_dropout") pre_encode_feedforward = FeedForward.from_params( params.pop("pre_encode_feedforward")) encoder = Seq2SeqEncoder.from_params(params.pop("encoder")) integrator = Seq2SeqEncoder.from_params(params.pop("integrator")) integrator_dropout = params.pop("integrator_dropout") output_layer_params = params.pop("output_layer") if "activations" in output_layer_params: output_layer = FeedForward.from_params(output_layer_params) else: output_layer = Maxout.from_params(output_layer_params) elmo = params.pop("elmo", None) if elmo is not None: elmo = Elmo.from_params(elmo) use_input_elmo = params.pop_bool("use_input_elmo", False) use_integrator_output_elmo = params.pop_bool( "use_integrator_output_elmo", False) initializer = InitializerApplicator.from_params( params.pop("initializer", [])) regularizer = RegularizerApplicator.from_params( params.pop("regularizer", [])) params.assert_empty(cls.__name__) return cls( vocab=vocab, text_field_embedder=text_field_embedder, embedding_dropout=embedding_dropout, pre_encode_feedforward=pre_encode_feedforward, encoder=encoder, integrator=integrator, integrator_dropout=integrator_dropout, output_layer=output_layer, elmo=elmo, use_input_elmo=use_input_elmo, use_integrator_output_elmo=use_integrator_output_elmo, initializer=initializer, regularizer=regularizer, )
def __init__(self, config): super(SeqBaselineModel, self).__init__() self.base_path = config['base_path'] self.early_stopping = config['early_stopping'] self.learner_lr = config.get('learner_lr', 1e-3) self.weight_decay = config.get('meta_weight_decay', 0.0) if 'seq' in config['learner_model']: self.learner = RNNSequenceModel(config['learner_params']) elif 'mlp' in config['learner_model']: self.learner = MLPModel(config['learner_params']) self.vectors = config.get('vectors', 'glove') if self.vectors == 'elmo': self.elmo = Elmo( options_file= "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json", weight_file= "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5", num_output_representations=1, dropout=0, requires_grad=False) elif self.vectors == 'glove': self.glove = torchtext.vocab.GloVe(name='840B', dim=300) self.learner_loss = {} for task in config['learner_params']['num_outputs']: self.learner_loss[task] = nn.CrossEntropyLoss(ignore_index=-1) self.output_layer = None if config.get('trained_baseline', None): self.learner.load_state_dict( torch.load( os.path.join(self.base, 'saved_models', config['trained_baseline']))) logger.info('Loaded trained baseline model {}'.format( config['trained_baseline'])) self.device = torch.device(config.get('device', 'cpu')) self.to(self.device)
def from_params(cls, vocab: Vocabulary, params: Params) -> 'BiattentiveClassificationNetwork': # type: ignore # pylint: disable=arguments-differ embedder_params = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params(vocab=vocab, params=embedder_params) embedding_dropout = params.pop("embedding_dropout") pre_encode_feedforward = FeedForward.from_params(params.pop("pre_encode_feedforward")) encoder = Seq2SeqEncoder.from_params(params.pop("encoder")) integrator = Seq2SeqEncoder.from_params(params.pop("integrator")) integrator_dropout = params.pop("integrator_dropout") output_layer_params = params.pop("output_layer") if "activations" in output_layer_params: output_layer = FeedForward.from_params(output_layer_params) else: output_layer = Maxout.from_params(output_layer_params) elmo = params.pop("elmo", None) if elmo is not None: elmo = Elmo.from_params(elmo) use_input_elmo = params.pop_bool("use_input_elmo", False) use_integrator_output_elmo = params.pop_bool("use_integrator_output_elmo", False) initializer = InitializerApplicator.from_params(params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params(params.pop('regularizer', [])) params.assert_empty(cls.__name__) return cls(vocab=vocab, text_field_embedder=text_field_embedder, embedding_dropout=embedding_dropout, pre_encode_feedforward=pre_encode_feedforward, encoder=encoder, integrator=integrator, integrator_dropout=integrator_dropout, output_layer=output_layer, elmo=elmo, use_input_elmo=use_input_elmo, use_integrator_output_elmo=use_integrator_output_elmo, initializer=initializer, regularizer=regularizer)
def from_params(cls, vocab , params ) : # type: ignore # pylint: disable=arguments-differ embedder_params = params.pop(u"text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params(vocab=vocab, params=embedder_params) embedding_dropout = params.pop(u"embedding_dropout") pre_encode_feedforward = FeedForward.from_params(params.pop(u"pre_encode_feedforward")) encoder = Seq2SeqEncoder.from_params(params.pop(u"encoder")) integrator = Seq2SeqEncoder.from_params(params.pop(u"integrator")) integrator_dropout = params.pop(u"integrator_dropout") output_layer_params = params.pop(u"output_layer") if u"activations" in output_layer_params: output_layer = FeedForward.from_params(output_layer_params) else: output_layer = Maxout.from_params(output_layer_params) elmo = params.pop(u"elmo", None) if elmo is not None: elmo = Elmo.from_params(elmo) use_input_elmo = params.pop_bool(u"use_input_elmo", False) use_integrator_output_elmo = params.pop_bool(u"use_integrator_output_elmo", False) initializer = InitializerApplicator.from_params(params.pop(u'initializer', [])) regularizer = RegularizerApplicator.from_params(params.pop(u'regularizer', [])) params.assert_empty(cls.__name__) return cls(vocab=vocab, text_field_embedder=text_field_embedder, embedding_dropout=embedding_dropout, pre_encode_feedforward=pre_encode_feedforward, encoder=encoder, integrator=integrator, integrator_dropout=integrator_dropout, output_layer=output_layer, elmo=elmo, use_input_elmo=use_input_elmo, use_integrator_output_elmo=use_integrator_output_elmo, initializer=initializer, regularizer=regularizer)
def from_params(cls, params: Params): self._elmo = Elmo.from_params(params) return self
class NearestNeighborClassifier(): def __init__(self, config): self.vectors = config.get('vectors', 'elmo') self.device = torch.device(config.get('device', 'cpu')) if self.vectors == 'elmo': self.elmo = Elmo( options_file= "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json", weight_file= "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5", num_output_representations=1, dropout=0, requires_grad=False) self.elmo.to(self.device) elif self.vectors == 'glove': self.glove = torchtext.vocab.GloVe(name='840B', dim=300) elif self.vectors == 'bert': self.bert_tokenizer = BertTokenizer.from_pretrained( 'bert-base-cased') self.bert = BertModel.from_pretrained('bert-base-cased') self.bert.to(self.device) logger.info('Nearest neighbor classifier instantiated') def vectorize(self, batch_x, batch_len, batch_y): with torch.no_grad(): if self.vectors == 'elmo': char_ids = batch_to_ids(batch_x) char_ids = char_ids.to(self.device) batch_x = self.elmo(char_ids)['elmo_representations'][0] elif self.vectors == 'glove': max_batch_len = max(batch_len) vec_batch_x = torch.ones((len(batch_x), max_batch_len, 300)) for i, sent in enumerate(batch_x): sent_emb = self.glove.get_vecs_by_tokens( sent, lower_case_backup=True) vec_batch_x[i, :len(sent_emb)] = sent_emb batch_x = vec_batch_x.to(self.device) elif self.vectors == 'bert': max_batch_len = max(batch_len) + 2 input_ids = torch.zeros((len(batch_x), max_batch_len)).long() for i, sent in enumerate(batch_x): sent_token_ids = self.bert_tokenizer.encode( sent, add_special_tokens=True) input_ids[i, :len(sent_token_ids)] = torch.tensor( sent_token_ids) batch_x = input_ids.to(self.device) attention_mask = (batch_x.detach() != 0).float() batch_x, _ = self.bert(batch_x, attention_mask=attention_mask) batch_x = batch_x[:, 1:-1, :] batch_len = torch.tensor(batch_len).to(self.device) batch_y = torch.tensor(batch_y).to(self.device) return batch_x, batch_len, batch_y def training(self, train_episodes, val_episodes): return 0 def testing(self, test_episodes): episode_accuracies, episode_precisions, episode_recalls, episode_f1s = [], [], [], [] for episode_id, episode in enumerate(test_episodes): batch_x, batch_len, batch_y = next(iter(episode.support_loader)) support_repr, _, support_labels = self.vectorize( batch_x, batch_len, batch_y) support_repr = support_repr.reshape( support_repr.shape[0] * support_repr.shape[1], -1) support_labels = support_labels.view(-1) support_repr = support_repr[support_labels != -1].cpu().numpy() support_labels = support_labels[support_labels != -1].cpu().numpy() batch_x, batch_len, batch_y = next(iter(episode.query_loader)) query_repr, _, true_labels = self.vectorize( batch_x, batch_len, batch_y) query_repr = query_repr.reshape( query_repr.shape[0] * query_repr.shape[1], -1) true_labels = true_labels.view(-1) query_repr = query_repr[true_labels != -1].cpu().numpy() true_labels = true_labels[true_labels != -1].cpu().numpy() dist = cdist(query_repr, support_repr, metric='cosine') nearest_neighbor = np.argmin(dist, axis=1) predictions = support_labels[nearest_neighbor] accuracy = metrics.accuracy_score(true_labels, predictions) precision = metrics.precision_score(true_labels, predictions, average='macro') recall = metrics.recall_score(true_labels, predictions, average='macro') f1_score = metrics.f1_score(true_labels, predictions, average='macro') logger.info( 'Episode {}/{}, task {} [query set]: Accuracy = {:.5f}, precision = {:.5f}, ' 'recall = {:.5f}, F1 score = {:.5f}'.format( episode_id + 1, len(test_episodes), episode.task_id, accuracy, precision, recall, f1_score)) episode_accuracies.append(accuracy) episode_precisions.append(precision) episode_recalls.append(recall) episode_f1s.append(f1_score) logger.info( 'Avg meta-testing metrics: Accuracy = {:.5f}, precision = {:.5f}, recall = {:.5f}, ' 'F1 score = {:.5f}'.format(np.mean(episode_accuracies), np.mean(episode_precisions), np.mean(episode_recalls), np.mean(episode_f1s))) return np.mean(episode_f1s)
def __init__(self, config, pre_trained_embeds=None): super(Document_Classifier, self).__init__() self.lstm_dim = config['lstm_dim'] self.model_name = config['model_name'] self.embed_name = config['embed_dim'] self.fc_dim = config['fc_dim'] self.num_classes = config['n_classes'] self.embed_dim = config['embed_dim'] if config[ 'embed_dim'] == 300 else 2 * config['embed_dim'] self.batch_size = config['batch_size'] self.num_kernels = config["kernel_num"] self.kernel_sizes = [int(k) for k in config["kernel_sizes"].split(',')] self.mode = 'single' if not config['parallel_computing'] else 'multi' # Choose the right embedding method based on embed_dim given if config['embed_dim'] == 300: self.vocab_size = config['vocab_size'] self.embedding = nn.Embedding(self.vocab_size, self.embed_dim) self.embedding.weight.data.copy_(pre_trained_embeds) self.embedding.requires_grad = False elif config['embed_dim'] == 128: # Small self.options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_options.json" self.weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5" self.elmo = Elmo(options_file=self.options_file, weight_file=self.weight_file, num_output_representations=1, requires_grad=False) elif config['embed_dim'] == 256: # Medium self.options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x2048_256_2048cnn_1xhighway/elmo_2x2048_256_2048cnn_1xhighway_options.json" self.weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x2048_256_2048cnn_1xhighway/elmo_2x2048_256_2048cnn_1xhighway_weights.hdf5" self.elmo = Elmo(options_file=self.options_file, weight_file=self.weight_file, num_output_representations=1, requires_grad=False) elif config['embed_dim'] == 512: # Highest self.options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json" self.weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5" self.elmo = Elmo(options_file=self.options_file, weight_file=self.weight_file, num_output_representations=1, requires_grad=False) if self.model_name == 'bilstm': self.encoder = BiLSTM(config) self.fc_inp_dim = 2 * config['lstm_dim'] elif self.model_name == 'bilstm_pool': self.encoder = BiLSTM(config, max_pool=True) self.fc_inp_dim = 2 * config['lstm_dim'] elif self.model_name == 'bilstm_reg': self.encoder = BiLSTM_reg(config) self.fc_inp_dim = config['lstm_dim'] elif self.model_name == 'han': self.encoder = HAN(config) self.fc_inp_dim = 2 * config['sent_lstm_dim'] elif self.model_name == 'cnn': self.encoder = Kim_CNN(config) self.fc_inp_dim = self.num_kernels * len(self.kernel_sizes) if config['embed_name'] in ['dbert', 'roberta']: MODEL_CLASSES = { "dbert": (DistilBertConfig, DistilBertPreTrainedModel, DistilBertTokenizerFast), "roberta": (RobertaConfig, RobertaModel, RobertaTokenizerFast), } config_class, _, _ = MODEL_CLASSES[config['embed_name']] self.encoder = Transformer_model(config, config_class) self.fc_inp_dim = 768 if 'base' in config['model_name'] else 1024 self.classifier = nn.Sequential( nn.Dropout(config["dropout"]), nn.Linear(self.fc_inp_dim, self.fc_dim), nn.ReLU(), nn.Linear(self.fc_dim, self.num_classes))
class ELMoEmbedding(nn.Module): r""" Contextual word embeddings using word-level bidirectional LM :cite:`peters-etal-2018-deep`. Args: model (str): The name of the pretrained ELMo registered in `OPTION` and `WEIGHT`. Default: ``'original_5b'``. bos_eos (tuple[bool]): A tuple of two boolean values indicating whether to keep start/end boundaries of sentence outputs. Default: ``(True, True)``. n_out (int): The requested size of the embeddings. If 0, uses the default size of ELMo outputs. Default: 0. dropout (float): The dropout ratio for the ELMo layer. Default: 0. requires_grad (bool): If ``True``, the model parameters will be updated together with the downstream task. Default: ``False``. """ OPTION = { 'small': 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_options.json', # noqa 'medium': 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x2048_256_2048cnn_1xhighway/elmo_2x2048_256_2048cnn_1xhighway_options.json', # noqa 'original': 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json', # noqa 'original_5b': 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json', # noqa } WEIGHT = { 'small': 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5', # noqa 'medium': 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x2048_256_2048cnn_1xhighway/elmo_2x2048_256_2048cnn_1xhighway_weights.hdf5', # noqa 'original': 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5', # noqa 'original_5b': 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5', # noqa } def __init__(self, model='original_5b', bos_eos=(True, True), n_out=0, dropout=0.5, requires_grad=False): super().__init__() from allennlp.modules import Elmo self.elmo = Elmo(options_file=self.OPTION[model], weight_file=self.WEIGHT[model], num_output_representations=1, dropout=dropout, requires_grad=requires_grad, keep_sentence_boundaries=True) self.model = model self.bos_eos = bos_eos self.hidden_size = self.elmo.get_output_dim() self.n_out = n_out or self.hidden_size self.dropout = dropout self.requires_grad = requires_grad self.projection = nn.Linear(self.hidden_size, self.n_out, False) if self.hidden_size != n_out else nn.Identity() def __repr__(self): s = f"{self.model}, n_out={self.n_out}" if self.dropout > 0: s += f", dropout={self.dropout}" if self.requires_grad: s += f", requires_grad={self.requires_grad}" return f"{self.__class__.__name__}({s})" def forward(self, chars): r""" Args: chars (~torch.Tensor): ``[batch_size, seq_len, fix_len]``. Returns: ~torch.Tensor: ELMo embeddings of shape ``[batch_size, seq_len, n_out]``. """ x = self.projection(self.elmo(chars)['elmo_representations'][0]) if not self.bos_eos[0]: x = x[:, 1:] if not self.bos_eos[1]: x = x[:, :-1] return x
class SeqPrototypicalNetwork(nn.Module): def __init__(self, config): super(SeqPrototypicalNetwork, self).__init__() self.base_path = config['base_path'] self.early_stopping = config['early_stopping'] self.lr = config.get('meta_lr', 1e-3) self.weight_decay = config.get('meta_weight_decay', 0.0) if 'seq' in config['learner_model']: self.learner = RNNSequenceModel(config['learner_params']) elif 'mlp' in config['learner_model']: self.learner = MLPModel(config['learner_params']) elif 'bert' in config['learner_model']: self.learner = BERTSequenceModel(config['learner_params']) self.num_outputs = config['learner_params']['num_outputs'] self.vectors = config.get('vectors', 'glove') if self.vectors == 'elmo': self.elmo = Elmo( options_file= "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json", weight_file= "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5", num_output_representations=1, dropout=0, requires_grad=False) elif self.vectors == 'glove': self.glove = torchtext.vocab.GloVe(name='840B', dim=300) elif self.vectors == 'bert': self.bert_tokenizer = BertTokenizer.from_pretrained( 'bert-base-cased') self.loss_fn = {} for task in config['learner_params']['num_outputs']: self.loss_fn[task] = nn.CrossEntropyLoss(ignore_index=-1) if config.get('trained_learner', False): self.learner.load_state_dict( torch.load( os.path.join(self.base_path, 'saved_models', config['trained_learner']))) logger.info('Loaded trained learner model {}'.format( config['trained_learner'])) self.device = torch.device(config.get('device', 'cpu')) self.to(self.device) if self.vectors == 'elmo': self.elmo.to(self.device) self.initialize_optimizer_scheduler() def initialize_optimizer_scheduler(self): learner_params = [ p for p in self.learner.parameters() if p.requires_grad ] if isinstance(self.learner, BERTSequenceModel): self.optimizer = AdamW(learner_params, lr=self.lr, weight_decay=self.weight_decay) self.lr_scheduler = get_constant_schedule_with_warmup( self.optimizer, num_warmup_steps=100) else: self.optimizer = optim.Adam(learner_params, lr=self.lr, weight_decay=self.weight_decay) self.lr_scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=500, gamma=0.5) def vectorize(self, batch_x, batch_len, batch_y): with torch.no_grad(): if self.vectors == 'elmo': char_ids = batch_to_ids(batch_x) char_ids = char_ids.to(self.device) batch_x = self.elmo(char_ids)['elmo_representations'][0] elif self.vectors == 'glove': max_batch_len = max(batch_len) vec_batch_x = torch.ones((len(batch_x), max_batch_len, 300)) for i, sent in enumerate(batch_x): sent_emb = self.glove.get_vecs_by_tokens( sent, lower_case_backup=True) vec_batch_x[i, :len(sent_emb)] = sent_emb batch_x = vec_batch_x.to(self.device) elif self.vectors == 'bert': max_batch_len = max(batch_len) + 2 input_ids = torch.zeros((len(batch_x), max_batch_len)).long() for i, sent in enumerate(batch_x): sent_token_ids = self.bert_tokenizer.encode( sent, add_special_tokens=True) input_ids[i, :len(sent_token_ids)] = torch.tensor( sent_token_ids) batch_x = input_ids.to(self.device) batch_len = torch.tensor(batch_len).to(self.device) batch_y = torch.tensor(batch_y).to(self.device) return batch_x, batch_len, batch_y def forward(self, episodes, updates=1, testing=False): query_losses, query_accuracies, query_precisions, query_recalls, query_f1s = [], [], [], [], [] n_episodes = len(episodes) for episode_id, episode in enumerate(episodes): batch_x, batch_len, batch_y = next(iter(episode.support_loader)) batch_x, batch_len, batch_y = self.vectorize( batch_x, batch_len, batch_y) self.train() support_repr, support_label = [], [] batch_x_repr = self.learner(batch_x, batch_len) support_repr.append(batch_x_repr) support_label.append(batch_y) prototypes = self._build_prototypes(support_repr, support_label, episode.n_classes) # Run on query query_loss = 0.0 all_predictions, all_labels = [], [] for module in self.learner.modules(): if isinstance(module, nn.Dropout): module.eval() for n_batch, (batch_x, batch_len, batch_y) in enumerate(episode.query_loader): batch_x, batch_len, batch_y = self.vectorize( batch_x, batch_len, batch_y) batch_x_repr = self.learner(batch_x, batch_len) output = self._normalized_distances(prototypes, batch_x_repr) output = output.view(output.size()[0] * output.size()[1], -1) batch_y = batch_y.view(-1) loss = self.loss_fn[episode.base_task](output, batch_y) query_loss += loss.item() if not testing: self.optimizer.zero_grad() loss.backward() self.optimizer.step() self.lr_scheduler.step() relevant_indices = torch.nonzero( batch_y != -1).view(-1).detach() all_predictions.extend( make_prediction(output[relevant_indices]).cpu()) all_labels.extend(batch_y[relevant_indices].cpu()) query_loss /= n_batch + 1 # Calculate metrics accuracy, precision, recall, f1_score = utils.calculate_metrics( all_predictions, all_labels, binary=False) logger.info( 'Episode {}/{}, task {} [query set]: Loss = {:.5f}, accuracy = {:.5f}, precision = {:.5f}, ' 'recall = {:.5f}, F1 score = {:.5f}'.format( episode_id + 1, n_episodes, episode.task_id, query_loss, accuracy, precision, recall, f1_score)) query_losses.append(query_loss) query_accuracies.append(accuracy) query_precisions.append(precision) query_recalls.append(recall) query_f1s.append(f1_score) return query_losses, query_accuracies, query_precisions, query_recalls, query_f1s def _build_prototypes(self, data_repr, data_label, num_outputs): n_dim = data_repr[0].shape[2] data_repr = torch.cat(tuple([x.view(-1, n_dim) for x in data_repr]), dim=0) data_label = torch.cat(tuple([y.view(-1) for y in data_label]), dim=0) prototypes = torch.zeros((num_outputs, n_dim), device=self.device) for c in range(num_outputs): idx = torch.nonzero(data_label == c).view(-1) if idx.nelement() != 0: prototypes[c] = torch.mean(data_repr[idx], dim=0) return prototypes def _normalized_distances(self, prototypes, q): d = torch.stack(tuple( [q.sub(p).pow(2).sum(dim=-1) for p in prototypes]), dim=-1) return d.neg()