def __init__(self, preprocessor, device, word_attention, sentence_attention, final_layer_dim, final_layer_dropout): super().__init__() self.preprocessor = preprocessor self.word_attention = registry.instantiate( callable=registry.lookup("word_attention", word_attention["name"]), config=word_attention, unused_keys=("name", ), device=device, preprocessor=preprocessor.preprocessor) self.sentence_attention = registry.instantiate( callable=registry.lookup("sentence_attention", sentence_attention["name"]), config=sentence_attention, unused_keys=("name", ), device=device, ) self.mlp = nn.Sequential( torch.nn.Linear(self.sentence_attention.recurrent_size, final_layer_dim), nn.ReLU(), nn.Dropout(final_layer_dropout), torch.nn.Linear(final_layer_dim, self.preprocessor.get_num_classes())) self.loss = nn.CrossEntropyLoss(reduction="mean").to(device)
def __init__(self, config): self.config = config if torch.cuda.is_available(): self.device = torch.device("cuda") else: self.device = torch.device("cpu") torch.set_num_threads(1) # 0. Construct classes dictionary mapping self.classes = registry.instantiate( callable=registry.lookup("classes", config["classes"]), config=config["classes"], unused_keys=("name", ), ) self.label_to_name = self.classes.get_classes_dict() # 1. Construct preprocessors self.model_preprocessor = registry.instantiate( callable=registry.lookup("model", config["model"]).Preprocessor, config=config["model"], unused_keys=("model", "name", "sentence_attention", "word_attention", "final_layer_dim", "final_layer_dropout"), ) self.model_preprocessor.load() self.id_to_label = { value: key for key, value in self.model_preprocessor.label_to_id_map().items() }
def __init__(self, save_path, min_freq, max_count, word_emb, nlp, max_sent_length, max_doc_length): self.word_emb: Embedder = registry.instantiate( registry.lookup("word_emb", word_emb["name"]), word_emb, unused_keys=("name", ), ) self.nlp: NLP = registry.instantiate( registry.lookup("nlp", nlp["name"]), nlp, unused_keys=("name", ), ) self.max_doc_length = max_doc_length self.max_sent_length = max_sent_length self.data_dir = os.path.join(save_path, "tokenized_data") self.texts = collections.defaultdict(list) self.vocab_builder = vocab.VocabBuilder(min_freq, max_count) self.vocab_path = os.path.join(save_path, "han_vocab.json") self.vocab_word_freq_path = os.path.join(save_path, "han_word_freq.json") self.classes_path = os.path.join(save_path, "classes.json") self.dataset_sizes_path = os.path.join(save_path, "dataset_sizes.json") self.vocab = None self.label_to_id = {} self.dataset_sizes = {}
def __init__(self, preprocessor): super().__init__() self.preprocessor: abstract_preprocessor.AbstractPreproc = registry.instantiate( callable=registry.lookup("preprocessor", preprocessor["name"]), config=preprocessor, unused_keys=("name", "final_layer_dim", "final_layer_dropout"))
def __init__(self, config): self.config = config self.model_preprocessor = registry.instantiate( callable=registry.lookup("model", config["model"]).Preprocessor, config=config["model"], unused_keys=("sentence_attention", "word_attention", "name", "final_layer_dim", "final_layer_dropout"), )
def __init__(self, logger, config): if torch.cuda.is_available(): self.device = torch.device("cuda") else: self.device = torch.device("cpu") self.logger = logger self.train_config = registry.instantiate(TrainConfig, config["train"]) self.data_random = random_state.RandomContext( self.train_config.data_seed) self.model_random = random_state.RandomContext( self.train_config.model_seed) self.init_random = random_state.RandomContext( self.train_config.init_seed) with self.init_random: # Load preprocessors self.model_preprocessor = registry.instantiate( callable=registry.lookup("model", config["model"]).Preprocessor, config=config["model"], unused_keys=("model", "name", "sentence_attention", "word_attention", "final_layer_dim", "final_layer_dropout"), ) self.model_preprocessor.load() # Construct model self.model = registry.construct( kind="model", config=config["model"], unused_keys=("preprocessor", ), preprocessor=self.model_preprocessor, device=self.device, ) self.model.to(self.device)