def glove_linear_cnn( glove_path: Union[Path, TextIO], glove_dim: int, num_classes: int, vocab: Vocabulary, filters: List[int], out_channels: int, freeze: bool = True, saved_glove_file: Optional[Path] = None, dropout: float = 0, ) -> LinearCnn: """Return LinearCnn with embedding layer initialised with GloVe.""" glove = load_glove( input_file=glove_path, glove_dim=glove_dim, vocab=vocab, saved_glove_file=saved_glove_file, ) embedding = nn.Embedding.from_pretrained(glove, freeze=freeze) return LinearCnn( embedding=embedding, num_classes=num_classes, filters=filters, out_channels=out_channels, dropout=dropout, )
def test_synthetic_glove() -> None: """Test if GloVe loader works with synthetic data.""" vocab = data.Vocabulary(["test", "new"]) input_file = StringIO(glove_str) tensor = glove.load_glove(input_file, vocab, 3) assert tensor.shape == (vocab.vocab_size(), 3)
def glove_bc_lstm( glove_path: Union[Path, TextIO], glove_dim: int, num_classes: int, vocab: Vocabulary, filters: List[int], out_channels: int, freeze: bool = True, saved_glove_file: Optional[Path] = None, rnn_hidden_size: int = 100, rnn_num_layers: int = 1, bidirectional: bool = False, rnn_dropout: float = 0, ) -> BcLstm: """ Return bcLSTM with embedding layer initialised with GloVe. Both RNNs used (utterance and dialogue) are LSTMs. """ glove = load_glove( input_file=glove_path, glove_dim=glove_dim, vocab=vocab, saved_glove_file=saved_glove_file, ) embedding = nn.Embedding.from_pretrained(glove, freeze=freeze) utterance_lstm = nn.LSTM( input_size=out_channels * len(filters), hidden_size=rnn_hidden_size, num_layers=rnn_num_layers, bidirectional=bidirectional, dropout=rnn_dropout, batch_first=True, ) dialogue_input = (1 + bidirectional) * utterance_lstm.hidden_size dialogue_lstm = nn.LSTM( input_size=dialogue_input, hidden_size=rnn_hidden_size, num_layers=rnn_num_layers, bidirectional=False, # this doesn't make sense to be bidirectional [1] dropout=rnn_dropout, batch_first=True, ) return BcLstm( embedding=embedding, num_classes=num_classes, dialogue_rnn=dialogue_lstm, utterance_rnn=utterance_lstm, filters=filters, out_channels=out_channels, )
def test_real_glove() -> None: """Test if GloVe loader works with real data.""" glove_dim = 50 data_path = Path('./data/dev/metadata.csv') dataset = MeldLinearTextDataset(data_path) input_file = Path(f'./data/glove/glove.6B.{glove_dim}d.txt') tensor = glove.load_glove(input_file, dataset.vocab, glove_dim) assert tensor.shape == (dataset.vocab.vocab_size(), glove_dim)
def glove_contextual_lstm( glove_path: Union[Path, TextIO], glove_dim: int, num_classes: int, vocab: Vocabulary, freeze: bool = True, saved_glove_file: Optional[Path] = None, rnn_hidden_size: int = 100, rnn_num_layers: int = 1, bidirectional: bool = False, rnn_dropout: float = 0, ) -> ContextualRnn: """ Return ContextualRnn with embedding layer initialised with GloVe. Both RNNs used (utterance and dialogue) are LSTMs. """ glove = load_glove( input_file=glove_path, glove_dim=glove_dim, vocab=vocab, saved_glove_file=saved_glove_file, ) embedding = nn.Embedding.from_pretrained(glove, freeze=freeze) utterance_lstm = nn.LSTM( input_size=embedding.embedding_dim, hidden_size=rnn_hidden_size, num_layers=rnn_num_layers, bidirectional=bidirectional, dropout=rnn_dropout, batch_first=True, ) dialogue_lstm = nn.LSTM( input_size=utterance_lstm.hidden_size, hidden_size=rnn_hidden_size, num_layers=rnn_num_layers, bidirectional=False, # this doesn't make sense to be bidirectional [1] dropout=rnn_dropout, batch_first=True, ) return ContextualRnn( embedding=embedding, num_classes=num_classes, dialogue_rnn=dialogue_lstm, utterance_rnn=utterance_lstm, )
def glove_linear_cnn_lstm( glove_path: Union[Path, TextIO], glove_dim: int, num_classes: int, vocab: Vocabulary, filters: List[int], out_channels: int, rnn_hidden_size: int = 100, rnn_num_layers: int = 1, bidirectional: bool = False, rnn_dropout: float = 0, freeze: bool = True, saved_glove_file: Optional[Path] = None, ) -> LinearCnnRnn: """Return LinearCnnRnn with embedding layer initialised with GloVe.""" glove = load_glove( input_file=glove_path, glove_dim=glove_dim, vocab=vocab, saved_glove_file=saved_glove_file, ) embedding = nn.Embedding.from_pretrained(glove, freeze=freeze) lstm = nn.LSTM( input_size=out_channels * len(filters), hidden_size=rnn_hidden_size, num_layers=rnn_num_layers, bidirectional=bidirectional, dropout=rnn_dropout, batch_first=True, ) return LinearCnnRnn( embedding=embedding, num_classes=num_classes, rnn=lstm, filters=filters, out_channels=out_channels, )
def glove_simple( glove_path: Union[Path, TextIO], glove_dim: int, num_classes: int, vocab: Vocabulary, freeze: bool = True, saved_glove_file: Optional[Path] = None, ) -> Simple: """Return Simple with embedding layer initialised with GloVe.""" glove = load_glove( input_file=glove_path, glove_dim=glove_dim, vocab=vocab, saved_glove_file=saved_glove_file, ) embedding = nn.Embedding.from_pretrained(glove, freeze=freeze) return Simple( embedding=embedding, num_classes=num_classes, )
def glove_linear_lstm( glove_path: Union[Path, TextIO], glove_dim: int, num_classes: int, vocab: Vocabulary, freeze: bool = True, saved_glove_file: Optional[Path] = None, rnn_hidden_size: int = 100, rnn_num_layers: int = 1, bidirectional: bool = False, rnn_dropout: float = 0, ) -> LinearRnn: """ Return RnnClassifier with embedding layer initialised with GloVe. RNN used is an LSTM. """ glove = load_glove( input_file=glove_path, glove_dim=glove_dim, vocab=vocab, saved_glove_file=saved_glove_file, ) embedding = nn.Embedding.from_pretrained(glove, freeze=freeze) lstm = nn.LSTM( input_size=embedding.embedding_dim, hidden_size=rnn_hidden_size, num_layers=rnn_num_layers, bidirectional=bidirectional, dropout=rnn_dropout, batch_first=True, ) return LinearRnn( embedding=embedding, num_classes=num_classes, rnn=lstm, )