def test_load_option_for_titles_only(self): bibtex_content = ''' @ARTICLE{I[2], inserir = {true}, title = {abobrinha1}, year = {2008}, abstract = {abobrinha abstract}, } @INPROCEEDINGS{E[2], inserir = {false}, title = {umbrela}, year = {2020}, abstract = {another abstract}, } ''' file_stub = Mock() file_stub.__enter__ = Mock(return_value=file_stub) file_stub.__exit__ = Mock() file_stub.read = Mock(return_value=bibtex_content) with patch.object(codecs, 'open', return_value=file_stub) as cod: X, y, years = load(['pepino.bib'], titles_only=True) cod.assert_called_once_with('pepino.bib', 'r', encoding='utf-8') self.assertEqual(X, ['abobrinha1', 'umbrela']) self.assertEqual(y, [1, 0]) self.assertEqual(years, [2008, 2020])
def test_load_2_bibfiles(self): bibtex_content_1 = ''' @ARTICLE{I[2], inserir = {true}, title = {abobrinha1}, year = {2008}, abstract = {abobrinha abstract}, } @INPROCEEDINGS{E[2], inserir = {false}, title = {umbrela}, year = {2020}, abstract = {another abstract}, } ''' file_stub_1 = Mock() file_stub_1.__enter__ = Mock(return_value=file_stub_1) file_stub_1.__exit__ = Mock() file_stub_1.read = Mock(return_value=bibtex_content_1) bibtex_content_2 = ''' @ARTICLE{I[2], inserir = {false}, title = {uva}, year = {2020}, abstract = {contos da uva}, } @INPROCEEDINGS{E[2], inserir = {false}, title = {note}, year = {2021}, abstract = {not cool}, } ''' file_stub_2 = Mock() file_stub_2.__enter__ = Mock(return_value=file_stub_2) file_stub_2.__exit__ = Mock() file_stub_2.read = Mock(return_value=bibtex_content_2) files = [file_stub_1, file_stub_2] with patch.object(codecs, 'open', side_effect=files) as cod: X, y, years = load(['pepino.bib', 'abacaxi.bib']) cod.assert_has_calls([ call('pepino.bib', 'r', encoding='utf-8'), call('abacaxi.bib', 'r', encoding='utf-8') ]) self.assertEqual(X, [ 'abobrinha1\nabobrinha abstract', 'umbrela\nanother abstract', 'uva\ncontos da uva', 'note\nnot cool' ]) self.assertEqual(y, [1, 0, 0, 0]) self.assertEqual(years, [2008, 2020, 2020, 2021])
def test_load_bibitem(self): bibtex_content = ''' @ARTICLE{I[1], inserir = {true}, title = {abobrinha1}, year = {2008}, abstract = {abobrinha abstract}, } ''' file_stub = Mock() file_stub.__enter__ = Mock(return_value=file_stub) file_stub.__exit__ = Mock() file_stub.read = Mock(return_value=bibtex_content) with patch.object(codecs, 'open', return_value=file_stub) as cod: X, y, years = load(['abobrinha.bib']) cod.assert_called_once_with('abobrinha.bib', 'r', encoding='utf-8') self.assertEqual(X, ['abobrinha1\nabobrinha abstract']) self.assertEqual(y, [1]) self.assertEqual(years, [2008])
print('sixth argument missing: padding sequence (for embeddings only!)') sys.exit(1) if (len(sys.argv) < 7): print( 'seventh argument missing: extrator (tfidf,embeddings_glove,embeddings_se)' ) sys.exit(1) _, theme, classifier_name, ngram_range, titles, maxlen, extractor = sys.argv titles = True if titles == 'true' else False embedding_dim = 200 embedding_file = './embeddings/glove.6B.200d.txt' if classifier_name == 'embeddings_glove' or extractor == 'embeddings_glove' else './embeddings/SO_vectors_200.bin' slr_files = get_slr_files(theme) X, y, years = load(slr_files, titles_only=titles) kfold = YearsSplit(n_split=3, years=years) result = {'fscore': [], 'threashold': [], 'missed': [], 'excluded': []} X = np.array(X) y = np.array(y) for train_index, test_index in kfold.split(X, y): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] if classifier_name[:9] != 'embedding': classifier, classifier_params = get_classifier(classifier_name) extractor_class, selector_f, selector_params = get_extractor( extractor, ngram_range, embedding_file) classifier_params.update(selector_params)