Exemplo n.º 1
0
    def __init__(
        self,
        save_dir: str,
        corpus: Corpus,
        activation_names: ActivationNames,
        activations_dir: Optional[str] = None,
        test_activations_dir: Optional[str] = None,
        test_corpus: Optional[Corpus] = None,
        model: Optional[LanguageModel] = None,
        selection_func: SelectFunc = lambda sen_id, pos, example: True,
    ) -> None:
        self.save_dir = save_dir
        if not os.path.exists(save_dir):
            os.mkdir(save_dir)

        activations_dir, test_activations_dir = self._extract_activations(
            save_dir,
            corpus,
            activation_names,
            selection_func,
            activations_dir,
            test_activations_dir,
            test_corpus,
            model,
        )

        self.activation_names = activation_names
        self.data_loader = DataLoader(
            activations_dir,
            corpus,
            test_activations_dir=test_activations_dir,
            test_corpus=test_corpus,
            selection_func=selection_func,
        )
        self.classifier = LogRegCV()
Exemplo n.º 2
0
    def setUpClass(cls) -> None:
        # Create directory if necessary
        if not os.path.exists(ACTIVATIONS_DIR):
            os.makedirs(ACTIVATIONS_DIR)

        # Create dummy data have reader read it
        create_and_dump_dummy_activations(
            num_sentences=NUM_TEST_SENTENCES, activations_dim=ACTIVATIONS_DIM, max_tokens=5,
            activations_dir=ACTIVATIONS_DIR, activations_name=ACTIVATIONS_NAME, num_classes=2
        )
        cls.data_loader = DataLoader(activations_dir=ACTIVATIONS_DIR)
        cls.num_labels = cls.data_loader.data_len
Exemplo n.º 3
0
    def __init__(
        self,
        save_dir: str,
        corpus: Corpus,
        activation_names: ActivationNames,
        activations_dir: Optional[str] = None,
        test_activations_dir: Optional[str] = None,
        test_corpus: Optional[Corpus] = None,
        model: Optional[LanguageModel] = None,
        train_selection_func: SelectionFunc = lambda sen_id, pos, example:
        True,
        test_selection_func: Optional[SelectionFunc] = None,
        control_task: Optional[ControlTask] = None,
        classifier_type: str = "logreg_torch",
        save_logits: bool = False,
        verbose: int = 0,
    ) -> None:
        self.save_dir = save_dir
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        self.remove_callbacks = []
        activations_dir, test_activations_dir = self._extract_activations(
            save_dir,
            corpus,
            activation_names,
            train_selection_func,
            activations_dir,
            test_activations_dir,
            test_corpus,
            test_selection_func,
            model,
        )

        self.activation_names = activation_names
        self.data_dict: DataDict = {}
        self.data_loader = DataLoader(
            activations_dir,
            corpus,
            test_activations_dir=test_activations_dir,
            test_corpus=test_corpus,
            train_selection_func=train_selection_func,
            test_selection_func=test_selection_func,
            control_task=control_task,
        )
        assert classifier_type in [
            "logreg_torch",
            "logreg_sklearn",
        ], "Classifier type not understood, should be either `logreg_toch` or `logreg_sklearn`"
        self.classifier_type = classifier_type
        self.save_logits = save_logits
        self.verbose = verbose
Exemplo n.º 4
0
    def setUpClass(cls) -> None:
        # Create directory if necessary
        if not os.path.exists(ACTIVATIONS_DIR):
            os.makedirs(ACTIVATIONS_DIR)

        # Create dummy data have reader read it
        cls.num_labels = create_and_dump_dummy_activations(
            num_sentences=NUM_TEST_SENTENCES,
            activations_dim=ACTIVATIONS_DIM,
            max_sen_len=5,
            activations_dir=ACTIVATIONS_DIR,
            activations_name=ACTIVATIONS_NAME,
            num_classes=2,
        )
        corpus = import_corpus(f"{ACTIVATIONS_DIR}/corpus.tsv")

        cls.data_loader = DataLoader(ACTIVATIONS_DIR, corpus)
Exemplo n.º 5
0
    def __init__(self,
                 corpus: Corpus,
                 activations_dir: str,
                 activation_names: List[ActivationName],
                 save_dir: str,
                 classifier_type: str,
                 calc_class_weights: bool = False) -> None:

        self.activation_names: List[ActivationName] = activation_names
        self.save_dir = save_dir
        if not os.path.exists(save_dir):
            os.mkdir(save_dir)

        # TODO: Allow own classifier here (should adhere to some base functions, such as .fit())
        self.classifier_type = classifier_type
        self.calc_class_weights = calc_class_weights

        self.data_loader = DataLoader(activations_dir, corpus)
        self.results: ResultsDict = defaultdict(dict)

        self._reset_classifier()