# type: ignore import abc import unittest from src import logging, preprocessing from src.text_encoder import ( SubWordTextEncoder, TextEncoder, WordNoFilterTextEncoder, WordTextEncoder, ) logger = logging.create_logger(__name__) CORPUS = preprocessing.add_start_end_token( ["a against", "battle", "pandemy covid-19 not easy"]) A_VOCAB_SIZE = 258 ANY_TEXT_FILE = "data/train.lang1" SAMPLE = "a pandemy" class TextEncoderTest(abc.ABC): def test_can_save_load(self): encoder = self.create_encoder() file_name = "/tmp/text_ecoder" encoder.save_to_file(file_name) loaded_encoder = self.load_encoder(file_name) self.assertEqual(loaded_encoder.vocab_size, encoder.vocab_size)
import tensorflow as tf from tensorflow.keras import layers from src.logging import create_logger from src.model import base from src.model.transformer import Encoder, _create_padding_mask NAME = "demi-bert" logger = create_logger(__name__) class DemiBERT(base.Model): """D E M I B E R T.""" def __init__(self, num_layers, d_model, num_heads, dff, vocab_size, max_pe, rate): """Initialize D E M I B E R T.""" super().__init__(f"{NAME}") self.encoder = Encoder( num_layers=num_layers, d_model=d_model, num_heads=num_heads, dff=dff, input_vocab_size=vocab_size, maximum_position_encoding=max_pe, rate=rate, ) self.dense = layers.Dense(d_model) self._embedding_size = d_model def call(self, x: tf.Tensor, training=False):