def test_annotate(self): impl = NoopAnnotator() host = 'localhost' port = find_port() timeout = 5 comm_id = '1-2-3-4' comm = create_comm(comm_id) comm_uuid_uuidString = comm.uuid.uuidString comm_metadata_tool = comm.metadata.tool comm_metadata_timestamp = comm.metadata.timestamp with SubprocessAnnotatorServiceWrapper(impl, host, port, timeout=timeout): transport = TSocket.TSocket(host, port) transport = TTransport.TFramedTransport(transport) protocol = TCompactProtocol.TCompactProtocol(transport) cli = Annotator.Client(protocol) transport.open() res = cli.annotate(comm) transport.close() self.assertEqual(res.id, comm_id) self.assertEqual(res.uuid.uuidString, comm_uuid_uuidString) self.assertEqual(res.metadata.tool, comm_metadata_tool) self.assertEqual(res.metadata.timestamp, comm_metadata_timestamp)
def __enter__(self): socket = factory.createSocket(self.host, self.port) self.transport = factory.createTransport(socket) protocol = factory.createProtocol(self.transport) cli = Annotator.Client(protocol) self.transport.open() return cli
def __init__(self): services = [] for service in [ "sentence.splitter", "word.tokenizer", "pos.tagger", "ne.chunker" ]: transport = TTransport.TFramedTransport( TSocket.TSocket(host=service, port=9090)) protocol = TCompactProtocol.TCompactProtocol(transport) client = Annotator.Client(protocol) transport.open() services.append(client) self.sentence_splitter, self.word_tokenizer, self.pos_tagger, self.ne_chunker = services
def test_get_metadata(self): impl = NoopAnnotator() host = 'localhost' port = find_port() timeout = 5 with SubprocessAnnotatorServiceWrapper(impl, host, port, timeout=timeout): transport = TSocket.TSocket(host, port) transport = TTransport.TFramedTransport(transport) protocol = TCompactProtocol.TCompactProtocol(transport) cli = Annotator.Client(protocol) transport.open() metadata = cli.getMetadata() transport.close() self.assertEqual(NoopAnnotator.METADATA_TOOL, metadata.tool)
def index(): text = request.forms.get('text') transport = TTransport.TFramedTransport( TSocket.TSocket(options.annotator_host, options.annotator_port)) protocol = TCompactProtocol.TCompactProtocol(transport) client = Annotator.Client(protocol) transport.open() augf = AnalyticUUIDGeneratorFactory() aug = augf.create() c = Communication( id="", text=text, uuid=aug.next(), type="user-supplied input", metadata=AnnotationMetadata(timestamp=int(time.time()), tool="stdin"), sectionList=[ Section(uuid=aug.next(), sentenceList=[], kind="paragraph", textSpan=TextSpan(start=0, ending=len(text))) ], entitySetList=[], entityMentionSetList=[], ) new_c = client.annotate(c) form = '''<form action="/" method="post"> Enter or paste some text: <input name="text" type="text" /> <input value="Submit" type="submit" /> </form> ''' return form + "\n".join(["<h3>%s</h3>" % text] + [ "\n".join([ "<br>%s %s" % (e.type, e.canonicalName) for e in es.entityList ]) for es in new_c.entitySetList ])
sentence.tokenization = Tokenization(uuid = aug.next(), kind = TokenizationKind.TOKEN_LIST, tokenList = TokenList(tokenList=[]), tokenTaggingList = [], metadata = AnnotationMetadata(timestamp=int(time.time()), tool="nltk")) for i, token in enumerate(nltk.word_tokenize(text)): logging.info("Found token %s", token) sentence.tokenization.tokenList.tokenList.append(Token(tokenIndex=i, text=token)) return communication if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("-p", "--port", dest="port", type=int, default=9090) options = parser.parse_args() logging.basicConfig(level=logging.INFO) handler = CommunicationHandler() processor = Annotator.Processor(handler) transport = TSocket.TServerSocket(port=options.port) ipfactory = TCompactProtocol.TCompactProtocolFactory() opfactory = TCompactProtocol.TCompactProtocolFactory() server = TNonblockingServer.TNonblockingServer(processor, transport, ipfactory, opfactory) logging.info('Starting the server...') server.serve()
parser = argparse.ArgumentParser() parser.add_argument("-p", "--port", dest="port", type=int, default=9090) parser.add_argument("-H", "--host", dest="host", default="localhost") options = parser.parse_args() # Make socket transport = TSocket.TSocket(options.host, options.port) # Buffering is critical. Raw sockets are very slow transport = TTransport.TBufferedTransport(transport) # Wrap in a protocol protocol = TCompactProtocol.TCompactProtocol(transport) # Create a client to use the protocol encoder client = Annotator.Client(protocol) # Connect! transport.open() while True: s = raw_input("Write some text > ") if re.match(r"^\s*$", s): break else: augf = AnalyticUUIDGeneratorFactory() aug = augf.create() c = Communication(id="", text=s, uuid=aug.next(), type="tweet", metadata=AnnotationMetadata(timestamp=0, tool="stdin"), lidList=[]) new_c = client.annotate(c) print new_c
def __init__(self, implementation): self.processor = Annotator.Processor(implementation)