def test_true_true_code_bytes(abspath_mock, bpe_learner_mock, dataset_mock): # given abspath_mock.return_value = PATH_TO_DATASET_STUB dataset_mock.create = Mock(spec=dataset_mock, return_value=dataset_mock) argv = [ 'learn-bpe', '1000', '-p', PATH_TO_DATASET_STUB, '--bytes', '--word-end' ] # when parse_and_run(argv) # then prep_config = PrepConfig({ PrepParam.EN_ONLY: 'u', PrepParam.COM: '0', PrepParam.STR: 'E', PrepParam.SPLIT: 'F', PrepParam.TABS_NEWLINES: 's', PrepParam.CASE: 'u' }) bpe_config = BpeConfig({ BpeParam.CASE: 'yes', BpeParam.WORD_END: True, BpeParam.BASE: 'code', BpeParam.UNICODE: 'bytes', }) dataset_mock.create.assert_called_with(PATH_TO_DATASET_STUB, prep_config, None, None, bpe_config) bpe_learner_mock.run.assert_called_with(dataset_mock, 1000, bpe_config)
def test_xx0Fxx_max_str_length(): argv = [ 'nosplit', 'str', '-e', 'java', '--no-spaces', '--no-str', '--no-com', '--full-strings' ] with pytest.raises(DocoptExit): parse_and_run(argv)
def test_learn_bpe_codes(): if platform.system() != 'Darwin': parse_and_run(['learn-bpe', '100', '-p', PATH_TO_TEST_CORPUS, '-e', 'java']) parse_and_run(['learn-bpe', '150', '-p', PATH_TO_TEST_CORPUS, '-e', 'java']) api.bpe(path=PATH_TO_TEST_CORPUS, bpe_codes_id='test-corpus-130', extensions="java", output_path=TEST_OUTPUT) else: print('Skipping the test on OSx.')
def test_xxxFsx(api_mock): argv = ['nosplit', 'str', '-e', 'java', '--full-strings'] parse_and_run(argv) prep_config = PrepConfig({ PrepParam.EN_ONLY: 'u', PrepParam.COM: 'c', PrepParam.STR: '1', PrepParam.SPLIT: 'F', PrepParam.TABS_NEWLINES: 's', PrepParam.CASE: 'u' }) api_mock.text.preprocess.assert_called_with("str", prep_config, None, extension="java")
def test_all_short_config_options(api_mock): argv = ['basic', 'str', '-e', 'java', '-0lSCU'] parse_and_run(argv) prep_config = PrepConfig({ PrepParam.EN_ONLY: 'U', PrepParam.COM: '0', PrepParam.STR: '0', PrepParam.SPLIT: '1', PrepParam.TABS_NEWLINES: '0', PrepParam.CASE: 'l' }) api_mock.text.preprocess.assert_called_with("str", prep_config, None, extension="java")
def test_xxx1xu(api_mock): argv = ['basic', 'str', '-e', 'java', '--no-spaces'] parse_and_run(argv) prep_config = PrepConfig({ PrepParam.EN_ONLY: 'u', PrepParam.COM: 'c', PrepParam.STR: '1', PrepParam.SPLIT: '1', PrepParam.TABS_NEWLINES: '0', PrepParam.CASE: 'u' }) api_mock.text.preprocess.assert_called_with("str", prep_config, None, extension="java")
def test_path_short(api_mock): argv = ['nosplit', '-p', PATH_TO_DATASET_STUB, '--no-spaces'] parse_and_run(argv) prep_config = PrepConfig({ PrepParam.EN_ONLY: 'u', PrepParam.COM: 'c', PrepParam.STR: '1', PrepParam.SPLIT: '0', PrepParam.TABS_NEWLINES: '0', PrepParam.CASE: 'u' }) api_mock.corpus.preprocess_corpus.assert_called_with(PATH_TO_DATASET_STUB, prep_config, None, calc_vocab=False, extensions=None, output_path=None)
def test_output_with_text(): argv = ['nosplit', 'str', '-o', PATH_TO_OUTPUT_STUB, '--no-spaces'] with pytest.raises(DocoptExit) as context: parse_and_run(argv)
def test_xxA8xx(): argv = ['chars', 'str', '-e', 'java', '--no-str', '--max-str-length=10'] with pytest.raises(DocoptExit): parse_and_run(argv)
def test_xx0xxx_with_max_str_length(): argv = ['basic', 'str', '-e', 'java', '--no-str', '--max-str-length=10'] with pytest.raises(DocoptExit): parse_and_run(argv)
def test_xxA1xx(api_mock): argv = ['basic', 'str', '-e', 'java', '--no-str', '--max-str-length=10'] with pytest.raises(DocoptExit): parse_and_run(argv)
def test_xxx0x1(): argv = ['nosplit', 'str', '-e', 'java', '--no-spaces', '--no-case'] with pytest.raises(DocoptExit) as context: parse_and_run(argv)
def main(): parse_and_run(sys.argv[1:])