示例#1
0
文件: tagger.py 项目: Roger/quepy
def get_tagger():
    """
    Return a tagging function given some app settings.
    `Settings` is the settings module of an app.
    The returned value is a function that receives a unicode string and returns
    a list of `Word` instances.
    """
    if settings.USE_FREELING:
        from quepy.freeling import run_freeling

        tagger_function = lambda x: run_freeling(x, settings.FREELING_CMD)
    else:
        from quepy.nltktagger import run_nltktagger

        tagger_function = lambda x: run_nltktagger(x, settings.NLTK_DATA_PATH)

    def wrapper(string):
        assert_valid_encoding(string)
        words = tagger_function(string)
        for word in words:
            if word.pos not in PENN_TAGSET:
                logger.warning("Tagger emmited a non-penn " "POS tag {!r}".format(word.pos))
        return words

    return wrapper
示例#2
0
    def test_run_freeling(self):

        class FakeStringIO(StringIO):
            name = "some_name"

        class FakeExecutionCtx(object):
            runcmd_called = False

            def __init__(self, *args, **kwargs):
                pass

            def runcmd(self, cmd, stdin=None):
                self.cmd = cmd
                FakeExecutionCtx.runcmd_called = True
                return (StringIO(), StringIO())

            def tmpfile(self, name):
                return FakeStringIO()

        class FakeFunction(object):
            def __init__(self):
                self.called = False
                self.args = None
                self.kwargs = None

            def __call__(self, *args, **kwargs):
                self.called = True
                self.args = args
                self.kwargs = kwargs

        bkp_ctx = sysutils.ExecutionContext
        bkp_parse_output = freeling._parse_freeling_output

        sysutils.ExecutionContext = FakeExecutionCtx
        fake_parse_output = FakeFunction()
        freeling._parse_freeling_output = fake_parse_output

        freeling.run_freeling(u"who is Tom Cruise?", FREELING_CMD)

        self.assertTrue(FakeExecutionCtx.runcmd_called)
        self.assertTrue(fake_parse_output.called)

        sysutils.ExecutionContext = bkp_ctx
        freeling._parse_freeling_output = bkp_parse_output
示例#3
0
    def test_run_freeling(self):
        class FakeStringIO(StringIO):
            name = "some_name"

        class FakeExecutionCtx(object):
            runcmd_called = False

            def __init__(self, *args, **kwargs):
                pass

            def runcmd(self, cmd, stdin=None):
                self.cmd = cmd
                FakeExecutionCtx.runcmd_called = True
                return (StringIO(), StringIO())

            def tmpfile(self, name):
                return FakeStringIO()

        class FakeFunction(object):
            def __init__(self):
                self.called = False
                self.args = None
                self.kwargs = None

            def __call__(self, *args, **kwargs):
                self.called = True
                self.args = args
                self.kwargs = kwargs

        bkp_ctx = sysutils.ExecutionContext
        bkp_parse_output = freeling._parse_freeling_output

        sysutils.ExecutionContext = FakeExecutionCtx
        fake_parse_output = FakeFunction()
        freeling._parse_freeling_output = fake_parse_output

        freeling.run_freeling(u"who is Tom Cruise?", FREELING_CMD)

        self.assertTrue(FakeExecutionCtx.runcmd_called)
        self.assertTrue(fake_parse_output.called)

        sysutils.ExecutionContext = bkp_ctx
        freeling._parse_freeling_output = bkp_parse_output
示例#4
0
    def test_real_run(self):
        out = freeling.run_freeling(u"who is Tom Cruise?", FREELING_CMD)
        out = list(out)

        expected_pos = {
            u"who": u"WP",
            u"is": u"VBZ",
            u"Tom Cruise": u"NNP",
            u"?": u".",
        }

        for word in out:
            self.assertIsInstance(word, freeling.Word)
            self.assertEqual(word.pos, expected_pos[word.token])
示例#5
0
    def test_real_run(self):
        out = freeling.run_freeling(u"who is Tom Cruise?", FREELING_CMD)
        out = list(out)

        expected_pos = {
            u"who": u"WP",
            u"is": u"VBZ",
            u"Tom Cruise": u"NNP",
            u"?": u".",
        }

        for word in out:
            self.assertIsInstance(word, freeling.Word)
            self.assertEqual(word.pos, expected_pos[word.token])
示例#6
0
def get_tagger():
    """
    Return a tagging function given some app settings.
    `Settings` is the settings module of an app.
    The returned value is a function that receives a unicode string and returns
    a list of `Word` instances.
    """
    if settings.USE_FREELING:
        from quepy.freeling import run_freeling
        tagger_function = lambda x: run_freeling(x, settings.FREELING_CMD)
    else:
        from quepy.nltktagger import run_nltktagger
        tagger_function = lambda x: run_nltktagger(x, settings.NLTK_DATA_PATH)

    def wrapper(string):
        assert_valid_encoding(string)
        words = tagger_function(string)
        for word in words:
            if word.pos not in PENN_TAGSET:
                logger.warning("Tagger emmited a non-penn "
                               "POS tag {!r}".format(word.pos))
        return words

    return wrapper