Exemplo n.º 1
0
    def __init__(self, preprocessor, device, word_attention,
                 sentence_attention, final_layer_dim, final_layer_dropout):
        super().__init__()
        self.preprocessor = preprocessor
        self.word_attention = registry.instantiate(
            callable=registry.lookup("word_attention", word_attention["name"]),
            config=word_attention,
            unused_keys=("name", ),
            device=device,
            preprocessor=preprocessor.preprocessor)
        self.sentence_attention = registry.instantiate(
            callable=registry.lookup("sentence_attention",
                                     sentence_attention["name"]),
            config=sentence_attention,
            unused_keys=("name", ),
            device=device,
        )

        self.mlp = nn.Sequential(
            torch.nn.Linear(self.sentence_attention.recurrent_size,
                            final_layer_dim), nn.ReLU(),
            nn.Dropout(final_layer_dropout),
            torch.nn.Linear(final_layer_dim,
                            self.preprocessor.get_num_classes()))

        self.loss = nn.CrossEntropyLoss(reduction="mean").to(device)
Exemplo n.º 2
0
    def __init__(self, config):
        self.config = config
        if torch.cuda.is_available():
            self.device = torch.device("cuda")
        else:
            self.device = torch.device("cpu")
            torch.set_num_threads(1)

        # 0. Construct classes dictionary mapping
        self.classes = registry.instantiate(
            callable=registry.lookup("classes", config["classes"]),
            config=config["classes"],
            unused_keys=("name", ),
        )

        self.label_to_name = self.classes.get_classes_dict()

        # 1. Construct preprocessors
        self.model_preprocessor = registry.instantiate(
            callable=registry.lookup("model", config["model"]).Preprocessor,
            config=config["model"],
            unused_keys=("model", "name", "sentence_attention",
                         "word_attention", "final_layer_dim",
                         "final_layer_dropout"),
        )
        self.model_preprocessor.load()

        self.id_to_label = {
            value: key
            for key, value in
            self.model_preprocessor.label_to_id_map().items()
        }
    def __init__(self, save_path, min_freq, max_count, word_emb, nlp,
                 max_sent_length, max_doc_length):
        self.word_emb: Embedder = registry.instantiate(
            registry.lookup("word_emb", word_emb["name"]),
            word_emb,
            unused_keys=("name", ),
        )
        self.nlp: NLP = registry.instantiate(
            registry.lookup("nlp", nlp["name"]),
            nlp,
            unused_keys=("name", ),
        )

        self.max_doc_length = max_doc_length
        self.max_sent_length = max_sent_length

        self.data_dir = os.path.join(save_path, "tokenized_data")
        self.texts = collections.defaultdict(list)

        self.vocab_builder = vocab.VocabBuilder(min_freq, max_count)
        self.vocab_path = os.path.join(save_path, "han_vocab.json")
        self.vocab_word_freq_path = os.path.join(save_path,
                                                 "han_word_freq.json")
        self.classes_path = os.path.join(save_path, "classes.json")
        self.dataset_sizes_path = os.path.join(save_path, "dataset_sizes.json")
        self.vocab = None
        self.label_to_id = {}
        self.dataset_sizes = {}
Exemplo n.º 4
0
        def __init__(self, preprocessor):
            super().__init__()

            self.preprocessor: abstract_preprocessor.AbstractPreproc = registry.instantiate(
                callable=registry.lookup("preprocessor", preprocessor["name"]),
                config=preprocessor,
                unused_keys=("name", "final_layer_dim", "final_layer_dropout"))
Exemplo n.º 5
0
 def __init__(self, config):
     self.config = config
     self.model_preprocessor = registry.instantiate(
         callable=registry.lookup("model", config["model"]).Preprocessor,
         config=config["model"],
         unused_keys=("sentence_attention", "word_attention", "name",
                      "final_layer_dim", "final_layer_dropout"),
     )
Exemplo n.º 6
0
    def __init__(self, logger, config):
        if torch.cuda.is_available():
            self.device = torch.device("cuda")
        else:
            self.device = torch.device("cpu")

        self.logger = logger
        self.train_config = registry.instantiate(TrainConfig, config["train"])
        self.data_random = random_state.RandomContext(
            self.train_config.data_seed)
        self.model_random = random_state.RandomContext(
            self.train_config.model_seed)
        self.init_random = random_state.RandomContext(
            self.train_config.init_seed)

        with self.init_random:
            # Load preprocessors
            self.model_preprocessor = registry.instantiate(
                callable=registry.lookup("model",
                                         config["model"]).Preprocessor,
                config=config["model"],
                unused_keys=("model", "name", "sentence_attention",
                             "word_attention", "final_layer_dim",
                             "final_layer_dropout"),
            )
            self.model_preprocessor.load()

            # Construct model
            self.model = registry.construct(
                kind="model",
                config=config["model"],
                unused_keys=("preprocessor", ),
                preprocessor=self.model_preprocessor,
                device=self.device,
            )
            self.model.to(self.device)