def test_load_config(): """Simple sanity check for loading feature config.""" # Check that default is loaded defaults = get_config() assert defaults["featurization"]["textual"]["window_feature"]["size"] == 3 assert defaults["learning"]["LSTM"]["emb_dim"] == 100 assert defaults["learning"]["LSTM"]["bias"] is False # Check that file is loaded if present settings = get_config(os.path.dirname(__file__)) assert settings["featurization"]["textual"]["window_feature"]["size"] == 8 assert settings["learning"]["LSTM"]["bias"] is False # Check that defaults are used for unspecified settings assert (settings["featurization"]["tabular"]["unary_features"] ["get_head_ngrams"]["max"] == 2)
def test_load_config(caplog): """Simple sanity check for loading feature config.""" caplog.set_level(logging.INFO) # Check that default is loaded defaults = get_config() assert defaults["featurization"]["content"]["window_feature"]["size"] == 3 assert defaults["learning"]["LSTM"]["emb_dim"] == 100 assert defaults["learning"]["LSTM"]["host_device"] == "CPU" # Check that file is loaded if present settings = get_config(os.path.dirname(__file__)) assert settings["featurization"]["content"]["window_feature"]["size"] == 8 assert settings["learning"]["LSTM"]["host_device"] == "GPU" # Check that defaults are used for unspecified settings assert ( settings["featurization"]["table"]["unary_features"]["get_head_ngrams"]["max"] == 2 )
def _update_settings(self, X): """ Update the model argument. :param X: The input data of the model. :type X: list of (candidate, features) pair """ self.logger.info("Load defalut parameters for Logistic Regression") config = get_config()["learning"]["LogisticRegression"] for key in config.keys(): if key not in self.settings: self.settings[key] = config[key] self.settings["input_dim"] = X[1].shape[1]
def _update_settings(self, X): """ Update the model argument. :param X: The input data of the model. :type X: list of (candidate, features) pairs """ self.logger.info("Load defalut parameters for LSTM") config = get_config()["learning"]["LSTM"] for key in config.keys(): if key not in self.settings: self.settings[key] = config[key] self.settings["relation_arity"] = len(X[0][0]) self.settings["input_dim"] = X[1].shape[1] + len( X[0][0]) * self.settings["hidden_dim"] * ( 2 if self.settings["bidirectional"] else 1)
def _update_kwargs(self, X, **model_kwargs): """ Update the model argument. :param X: The input data of the model :param model_kwargs: The arguments of the model """ self.logger.info("Load defalut parameters for LSTM") settings = get_config()["learning"]["LSTM"] for key in settings.keys(): if key not in model_kwargs: model_kwargs[key] = settings[key] model_kwargs["relation_arity"] = len(X[0][0]) model_kwargs["input_dim"] = X[0][1].shape[1] + len(X[0][0]) * model_kwargs[ "hidden_dim" ] * (2 if model_kwargs["bidirectional"] else 1) return model_kwargs
compile_relation_feature_generator, ) from fonduer.candidates.models.span_mention import TemporarySpanMention from fonduer.features.feature_libs.tree_structs import corenlp_to_xmltree from fonduer.utils.config import get_config from fonduer.utils.data_model_utils import get_left_ngrams, get_right_ngrams from fonduer.utils.utils import get_as_dict, tokens_to_ngrams DEF_VALUE = 1 unary_ddlib_feats = {} unary_word_feats = {} unary_tdl_feats = {} binary_tdl_feats = {} settings = get_config() def get_content_feats(candidates): candidates = candidates if isinstance(candidates, list) else [candidates] for candidate in candidates: args = tuple([m.context for m in candidate.get_mentions()]) if not (isinstance(args[0], TemporarySpanMention)): raise ValueError( f"Accepts Span-type arguments, {type(candidate)}-type found.") # Unary candidates if len(args) == 1: span = args[0] if span.sentence.is_lingual(): get_tdl_feats = compile_entity_feature_generator()
def create_task( task_names: Union[str, List[str]], n_arities: Union[int, List[int]], n_features: int, n_classes: Union[int, List[int]], emb_layer: Optional[EmbeddingModule], model: str = "LSTM", mode: str = "MTL", ) -> List[EmmentalTask]: """Create task from relation(s). :param task_names: Relation name(s), If str, only one relation; If List[str], multiple relations. :param n_arities: The arity of each relation. :param n_features: The multimodal feature set size. :param n_classes: Number of classes for each task. (Only support classification task now). :param emb_layer: The embedding layer for LSTM. No need for LogisticRegression model. :param model: Model name (available models: "LSTM", "LogisticRegression"), defaults to "LSTM". :param mode: Learning mode (available modes: "STL", "MTL"), defaults to "MTL". """ if model not in ["LSTM", "LogisticRegression"]: raise ValueError( f"Unrecognized model {model}. Only support {['LSTM', 'LogisticRegression']}" ) if mode not in ["STL", "MTL"]: raise ValueError( f"Unrecognized mode {mode}. Only support {['STL', 'MTL']}") config = get_config()["learning"][model] logger.info(f"{model} model config: {config}") if not isinstance(task_names, list): task_names = [task_names] if not isinstance(n_arities, list): n_arities = [n_arities] if not isinstance(n_classes, list): n_classes = [n_classes] tasks = [] for task_name, n_arity, n_class in zip(task_names, n_arities, n_classes): if mode == "MTL": feature_module_name = "shared_feature" else: feature_module_name = f"{task_name}_feature" if model == "LSTM": module_pool = nn.ModuleDict({ "emb": emb_layer, feature_module_name: SparseLinear(n_features + 1, config["hidden_dim"], bias=config["bias"]), }) for i in range(n_arity): module_pool.update({ f"{task_name}_lstm{i}": RNN( num_classes=0, emb_size=emb_layer.dim, lstm_hidden=config["hidden_dim"], attention=config["attention"], dropout=config["dropout"], bidirectional=config["bidirectional"], ) }) module_pool.update({ f"{task_name}_pred_head": ConcatLinear( [f"{task_name}_lstm{i}" for i in range(n_arity)] + [feature_module_name], config["hidden_dim"] * (2 * n_arity + 1) if config["bidirectional"] else config["hidden_dim"] * (n_arity + 1), n_class, ) }) task_flow = [] task_flow += [{ "name": f"{task_name}_emb{i}", "module": "emb", "inputs": [("_input_", f"m{i}")], } for i in range(n_arity)] task_flow += [{ "name": f"{task_name}_lstm{i}", "module": f"{task_name}_lstm{i}", "inputs": [(f"{task_name}_emb{i}", 0), ("_input_", f"m{i}_mask")], } for i in range(n_arity)] task_flow += [{ "name": feature_module_name, "module": feature_module_name, "inputs": [ ("_input_", "feature_index"), ("_input_", "feature_weight"), ], }] task_flow += [{ "name": f"{task_name}_pred_head", "module": f"{task_name}_pred_head", "inputs": None, }] elif model == "LogisticRegression": module_pool = nn.ModuleDict({ feature_module_name: SparseLinear(n_features + 1, config["hidden_dim"], bias=config["bias"]), f"{task_name}_pred_head": ConcatLinear([feature_module_name], config["hidden_dim"], n_class), }) task_flow = [ { "name": feature_module_name, "module": feature_module_name, "inputs": [ ("_input_", "feature_index"), ("_input_", "feature_weight"), ], }, { "name": f"{task_name}_pred_head", "module": f"{task_name}_pred_head", "inputs": None, }, ] else: raise ValueError(f"Unrecognized model {model}.") tasks.append( EmmentalTask( name=task_name, module_pool=module_pool, task_flow=task_flow, loss_func=partial(loss, f"{task_name}_pred_head"), output_func=partial(output, f"{task_name}_pred_head"), scorer=Scorer( metrics=["accuracy", "precision", "recall", "f1"]), )) return tasks