def setUp(self): self.mocked_src = MockedNewsSource() root = logging.getLogger('marbles') root.addHandler(logging.NullHandler()) logger = logging.getLogger(__name__) self.state = MockedState(logger) self.svc = grpc.CcgParserService(daemon='easysrl')
def make_derivations(daemon): global pypath, projdir, datapath, idsrch allfiles = [] esrlpath = os.path.join(projdir, 'data', 'ldc', daemon, 'ccgbank') if not os.path.exists(esrlpath): os.makedirs(esrlpath) progress = 0 svc = grpc.CcgParserService(daemon) stub = svc.open_client() failed_total = 0 ldcpath = os.path.join(projdir, 'data', 'ldc', 'ccgbank_1_1', 'data', 'RAW') dirlist = os.listdir(ldcpath) try: for fname in dirlist: ldcpath1 = os.path.join(ldcpath, fname) with open(ldcpath1, 'r') as fd: lines = fd.readlines() m = idsrch.match(os.path.basename(ldcpath1)) if m is None: continue derivations = [] failed_parse = [] for ln in lines: # Parse with EasySRL via gRPC try: ccg = grpc.ccg_parse(stub, ln) derivations.append(safe_utf8_encode(ccg.replace('\n', ''))) except Exception as e: failed_parse.append(safe_utf8_encode(ln.strip())) # Add comment so line numbers match id's derivations.append( safe_utf8_encode('# FAILED: ' + ln.strip())) progress = print_progress(progress, 10) id = m.group('id') if len(derivations) != 0: with open(os.path.join(esrlpath, 'ccg_derivation%s.txt' % id), 'w') as fd: fd.write(b'\n'.join(derivations)) failed_total += len(failed_parse) if len(failed_parse) != 0: with open(os.path.join(esrlpath, 'ccg_failed%s.txt' % id), 'w') as fd: fd.write(b'\n'.join(failed_parse)) finally: print_progress(progress, 10, done=True) svc.shutdown() if failed_total != 0: print('THERE WERE %d PARSE FAILURES' % failed_total)
def on_start(self, workdir): # Start dependent gRPC CCG parser service self.grpc_daemon = grpc.CcgParserService(self.grpc_daemon_name, workdir=workdir, extra_args=self.extra_args, jarfile=self.jar_file) # If we run multiple threads then each thread needs its own resources (S3, SQS etc). res = AwsNewsQueueReaderResources(self.grpc_daemon.open_client(), news_queue_name, ccg_queue_name) self.parsers = [AwsNewsQueueReader(res, state, CO_NO_WIKI_SEARCH)]
def setUp(self): # Print log messages to console self.logger = logging.getLogger('marbles') self.logger.setLevel(logging.DEBUG) if DPRINT_ON: console_handler = logging.StreamHandler() console_handler.setLevel(logging.DEBUG) self.logger.addHandler(console_handler) # Load gRPC service self.svc = grpc.CcgParserService('neuralccg') self.stub = self.svc.open_client()
def on_start(self, workdir): # Start dependent gRPC CCG parser service self.grpc_daemon = gsvc.CcgParserService( self.grpc_daemon_name, workdir=workdir, extra_args=self.extra_args, jarfile=self.jar_file, debug=not self.state.daemonize) # Start InfoX gRPC service svc_handler = InfoxService(self.grpc_daemon.open_client(), self.state) self.server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) infox_service_pb2.add_InfoxServiceServicer_to_server( svc_handler, self.server) self.server.add_insecure_port('[::]:%d' % self.port) self.server.start()
def setUp(self): self.svc = grpc.CcgParserService('easysrl') self.stub = self.svc.open_client()
with open(out_file, 'w') as fd: for txt in lines: print(txt) fd.write(txt) fd.write('\n') SVCLIST = ['neuralccg'] if __name__ == '__main__': idsrch = re.compile(r'[^.]+\.(?P<id>\d+)\.raw') estub = None nstub = None if 'easysrl' in SVCLIST: esvc = grpc.CcgParserService('easysrl') estub = esvc.open_client() if 'neuralccg' in SVCLIST: nsvc = grpc.CcgParserService('neuralccg') nstub = nsvc.open_client() try: allfiles = [] autopath = os.path.join(projdir, 'data', 'ldc', 'ccgbank_1_1', 'data', 'AUTO') rawpath = os.path.join(projdir, 'data', 'ldc', 'ccgbank_1_1', 'data', 'RAW') mappath = os.path.join(projdir, 'data', 'ldc', 'mapping') outpath = os.path.join(projdir, 'data', 'ldc', 'compare') if not os.path.exists(outpath): os.makedirs(outpath)
#from marbles.ie.parse import parse_ccg_derivation from marbles.ie.drt.common import SHOW_LINEAR from marbles.ie.utils.text import preprocess_sentence titleRe = options.title or r'^\s*[A-Z][-A-Z\s\.]*$' wordsep = options.wordsep or '-' outfile = options.outfile or None daemon = options.daemon or 'easysrl' if len(args) == 0: die('missing filename') if daemon not in ['easysrl', 'neuralccg']: die('daemon must be easysrl or neuralccg') svc = grpc.CcgParserService(daemon) stub = svc.open_client() try: sessionId = grpc.DEFAULT_SESSION if options.ofmt is not None: if options.ofmt not in [ 'ccgbank', 'html', 'logic', 'extended', 'drs' ]: die('bad output format %s, must be ccgbank|html|logic|extended' % options.ofmt) # Create a session to match output format, default is CCGBANK if options.ofmt != 'ccgbank' and options.ofmt != 'drs': sessionId = grpc.create_session(stub, options.ofmt) titleSrch = re.compile(titleRe)
def setUp(self): self.svc = grpc.CcgParserService('neuralccg') self.stub = self.svc.open_client()