Пример #1
0
    def test_annotate(self):
        impl = NoopAnnotator()
        host = 'localhost'
        port = find_port()
        timeout = 5

        comm_id = '1-2-3-4'
        comm = create_comm(comm_id)

        comm_uuid_uuidString = comm.uuid.uuidString
        comm_metadata_tool = comm.metadata.tool
        comm_metadata_timestamp = comm.metadata.timestamp

        with SubprocessAnnotatorServiceWrapper(impl,
                                               host,
                                               port,
                                               timeout=timeout):
            transport = TSocket.TSocket(host, port)
            transport = TTransport.TFramedTransport(transport)
            protocol = TCompactProtocol.TCompactProtocol(transport)

            cli = Annotator.Client(protocol)
            transport.open()
            res = cli.annotate(comm)
            transport.close()

            self.assertEqual(res.id, comm_id)
            self.assertEqual(res.uuid.uuidString, comm_uuid_uuidString)
            self.assertEqual(res.metadata.tool, comm_metadata_tool)
            self.assertEqual(res.metadata.timestamp, comm_metadata_timestamp)
Пример #2
0
    def __enter__(self):
        socket = factory.createSocket(self.host, self.port)
        self.transport = factory.createTransport(socket)
        protocol = factory.createProtocol(self.transport)

        cli = Annotator.Client(protocol)

        self.transport.open()
        return cli
Пример #3
0
    def __init__(self):
        services = []
        for service in [
                "sentence.splitter", "word.tokenizer", "pos.tagger",
                "ne.chunker"
        ]:

            transport = TTransport.TFramedTransport(
                TSocket.TSocket(host=service, port=9090))

            protocol = TCompactProtocol.TCompactProtocol(transport)

            client = Annotator.Client(protocol)

            transport.open()

            services.append(client)

        self.sentence_splitter, self.word_tokenizer, self.pos_tagger, self.ne_chunker = services
Пример #4
0
    def test_get_metadata(self):
        impl = NoopAnnotator()
        host = 'localhost'
        port = find_port()
        timeout = 5

        with SubprocessAnnotatorServiceWrapper(impl,
                                               host,
                                               port,
                                               timeout=timeout):
            transport = TSocket.TSocket(host, port)
            transport = TTransport.TFramedTransport(transport)
            protocol = TCompactProtocol.TCompactProtocol(transport)

            cli = Annotator.Client(protocol)
            transport.open()
            metadata = cli.getMetadata()
            transport.close()

            self.assertEqual(NoopAnnotator.METADATA_TOOL, metadata.tool)
Пример #5
0
    def index():
        text = request.forms.get('text')
        transport = TTransport.TFramedTransport(
            TSocket.TSocket(options.annotator_host, options.annotator_port))
        protocol = TCompactProtocol.TCompactProtocol(transport)
        client = Annotator.Client(protocol)
        transport.open()
        augf = AnalyticUUIDGeneratorFactory()
        aug = augf.create()
        c = Communication(
            id="",
            text=text,
            uuid=aug.next(),
            type="user-supplied input",
            metadata=AnnotationMetadata(timestamp=int(time.time()),
                                        tool="stdin"),
            sectionList=[
                Section(uuid=aug.next(),
                        sentenceList=[],
                        kind="paragraph",
                        textSpan=TextSpan(start=0, ending=len(text)))
            ],
            entitySetList=[],
            entityMentionSetList=[],
        )

        new_c = client.annotate(c)
        form = '''<form action="/" method="post">
        Enter or paste some text: <input name="text" type="text" />
        <input value="Submit" type="submit" />
        </form>
        '''
        return form + "\n".join(["<h3>%s</h3>" % text] + [
            "\n".join([
                "<br>%s %s" % (e.type, e.canonicalName) for e in es.entityList
            ]) for es in new_c.entitySetList
        ])
Пример #6
0
                sentence.tokenization = Tokenization(uuid = aug.next(),
                                                     kind = TokenizationKind.TOKEN_LIST,
                                                     tokenList = TokenList(tokenList=[]),
                                                     tokenTaggingList = [],
                                                     metadata = AnnotationMetadata(timestamp=int(time.time()), tool="nltk"))
                                                     
                for i, token in enumerate(nltk.word_tokenize(text)):
                    logging.info("Found token %s", token)
                    sentence.tokenization.tokenList.tokenList.append(Token(tokenIndex=i, text=token))
        return communication
    
if __name__ == "__main__":

    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("-p", "--port", dest="port", type=int, default=9090)
    options = parser.parse_args()

    logging.basicConfig(level=logging.INFO)
    
    handler = CommunicationHandler()
    processor = Annotator.Processor(handler)
    transport = TSocket.TServerSocket(port=options.port)
    ipfactory = TCompactProtocol.TCompactProtocolFactory()
    opfactory = TCompactProtocol.TCompactProtocolFactory()

    server = TNonblockingServer.TNonblockingServer(processor, transport, ipfactory, opfactory)
    logging.info('Starting the server...')
    server.serve()
Пример #7
0
    parser = argparse.ArgumentParser()
    parser.add_argument("-p", "--port", dest="port", type=int, default=9090)
    parser.add_argument("-H", "--host", dest="host", default="localhost")
    options = parser.parse_args()

    # Make socket
    transport = TSocket.TSocket(options.host, options.port)

    # Buffering is critical. Raw sockets are very slow
    transport = TTransport.TBufferedTransport(transport)

    # Wrap in a protocol
    protocol = TCompactProtocol.TCompactProtocol(transport)

    # Create a client to use the protocol encoder
    client = Annotator.Client(protocol)
    
    # Connect!
    transport.open()

    while True:
        s = raw_input("Write some text > ")
        if re.match(r"^\s*$", s):
            break
        else:
            augf = AnalyticUUIDGeneratorFactory()
            aug = augf.create()
            c = Communication(id="", text=s, uuid=aug.next(), type="tweet", metadata=AnnotationMetadata(timestamp=0, tool="stdin"), lidList=[])

            new_c = client.annotate(c)
            print new_c
Пример #8
0
 def __init__(self, implementation):
     self.processor = Annotator.Processor(implementation)