def run(self): global work_mutex global work_numbers err_local = 0 try: socket = TSocket(self.server_ip, int(self.server_port)) transport = TFramedTransport(socket) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) client = ThriftNeloEventServer.Client(protocol) stop_flag = True while stop_flag: #read thrift from file f = file(file_name, 'r') fd_transport = TFileObjectTransport(f) buffered_transport = TBufferedTransport(fd_transport) binary_protocol = TBinaryProtocol.TBinaryProtocol( buffered_transport) fd_transport.open() stop_flag = False try: evt = ThriftNeloEvent() while True: evt.read(binary_protocol) #send the log to each project name for prjName, logCnt in prj_dict.items(): try: if logCnt_dict.has_key(prjName): if int(logCnt_dict[prjName]) < int(logCnt): evt.projectName = prjName evt.sendTime = int(time.time() * 1000) err = client.ackedAppend(evt) tps_remember(err) err_local += err logCnt_dict[ prjName] = logCnt_dict[prjName] + 1 stop_flag = True else: evt.projectName = prjName err = client.ackedAppend(evt) tps_remember(err) err_local += err logCnt_dict[prjName] = 1 stop_flag = True except TException, msg: print msg, prjName except EOFError, msg: buffered_transport.close() #close the transport stop_flag = True work_mutex.acquire() work_numbers -= 1 work_mutex.release() socket.close()
def run(self): global work_mutex global work_numbers err_local = 0 try: socket = TSocket(self.server_ip, int(self.server_port)) transport = TFramedTransport(socket) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) client = ThriftNeloEventServer.Client(protocol) stop_flag = True while stop_flag: #read thrift from file f = file(file_name, 'r') fd_transport = TFileObjectTransport(f) buffered_transport = TBufferedTransport(fd_transport) binary_protocol = TBinaryProtocol.TBinaryProtocol(buffered_transport) fd_transport.open() stop_flag = False try: evt = ThriftNeloEvent() while True: evt.read(binary_protocol) #send the log to each project name for prjName, logCnt in prj_dict.items(): try: if logCnt_dict.has_key(prjName): if int(logCnt_dict[prjName]) < int(logCnt): evt.projectName = prjName evt.sendTime = int(time.time() * 1000) err = client.ackedAppend(evt) tps_remember(err) err_local += err logCnt_dict[prjName] = logCnt_dict[prjName] + 1 stop_flag = True else: evt.projectName = prjName err = client.ackedAppend(evt) tps_remember(err) err_local += err logCnt_dict[prjName] = 1 stop_flag = True except TException, msg: print msg, prjName except EOFError,msg: buffered_transport.close() #close the transport stop_flag = True work_mutex.acquire() work_numbers -= 1 work_mutex.release() socket.close()
def get_mention_from_wikilink_thrift_file(fn): f = open(args.thrift_data_dir + '/%03d' % fn) out_val = defaultdict(list) p = TBinaryProtocol.TBinaryProtocolAccelerated(TFileObjectTransport(f)) pp = WikiLinkItem() while True: try: pp.read(p) except EOFError: break for m in pp.mentions: c = m.context if c is not None: url = simplify_wiki_url(m.wiki_url) # Follow url redirect. try: url = redirect[hash(url)] except KeyError: pass if url in POOL: # if c.left.startswith('the Musical August 30th, 2009 | Author: operator Shrek the Musical is a musical with music by Jeanine Tesori and a book and lyrics'): # print 1, fn out_val[url].append([c.left, c.middle, c.right]) print fn, len(out_val), sum(len(e) for e in out_val.itervalues()) out_val.default_factory = None # FINALIZE out_val return out_val
def _process_socket(self, client, address): """A greenlet for handling a single client.""" client = TFileObjectTransport(client.makefile()) itrans = self.inputTransportFactory.getTransport(client) otrans = self.outputTransportFactory.getTransport(client) iprot = self.inputProtocolFactory.getProtocol(itrans) oprot = self.outputProtocolFactory.getProtocol(otrans) try: while True: self.processor.process(iprot, oprot) except EOFError: pass except Exception: self.log.exception("caught exception while processing thrift request") itrans.close() otrans.close()
def _process_socket(self, client, address): """A greenlet for handling a single client.""" client = TFileObjectTransport(client.makefile()) itrans = self.inputTransportFactory.getTransport(client) otrans = self.outputTransportFactory.getTransport(client) iprot = self.inputProtocolFactory.getProtocol(itrans) oprot = self.outputProtocolFactory.getProtocol(otrans) try: while True: self.processor.process(iprot, oprot) except EOFError: pass except Exception: self.log.exception( "caught exception while processing thrift request") itrans.close() otrans.close()
def _handle_request(self, listener_name, sock, addr): client = TFileObjectTransport(sock.makefile()) itrans = self.tfactory.getTransport(client) otrans = self.tfactory.getTransport(client) iprot = self.pfactory.getProtocol(itrans) oprot = self.pfactory.getProtocol(otrans) try: while True: (name, type, seqid) = iprot.readMessageBegin() request_start = time.time() try: timeout_con = Timeout(self.cfg.timeout, Timeout) timeout_con.start() if name not in self.wsgi._processMap: iprot.skip(TType.STRUCT) iprot.readMessageEnd() x = TApplicationException( TApplicationException.UNKNOWN_METHOD, "Unknown function %s" % (name)) oprot.writeMessageBegin( name, TMessageType.EXCEPTION, seqid) x.write(oprot) oprot.writeMessageEnd() oprot.trans.flush() raise ThriftFuncNotFound else: self.wsgi._processMap[name](self.wsgi, seqid, iprot, oprot) except ThriftFuncNotFound, ex: self.log.error("Unknown function %s" % (name)) self.log.access( addr, name, "FUNC_NOT_FOUND", time.time() - request_start) break except Timeout, ex: self.log.error("A greenlet process timeout.") self.log.access( addr, name, "TIMEOUT", time.time() - request_start) break
def read_thrift(file_obj, ttype): """Read a thrift structure from the given fo.""" from thrift.transport.TTransport import TFileObjectTransport, TBufferedTransport starting_pos = file_obj.tell() # set up the protocol chain ft = TFileObjectTransport(file_obj) bufsize = 2 ** 16 # for accelerated reading ensure that we wrap this so that the CReadable transport can be used. bt = TBufferedTransport(ft, bufsize) pin = TCompactProtocol(bt) # read out type obj = ttype() obj.read(pin) # The read will actually overshoot due to the buffering that thrift does. Seek backwards to the correct spot,. buffer_pos = bt.cstringio_buf.tell() ending_pos = file_obj.tell() blocks = ((ending_pos - starting_pos) // bufsize) - 1 if blocks < 0: blocks = 0 file_obj.seek(starting_pos + blocks * bufsize + buffer_pos) return obj
arg_parser.add_argument('--out_fn', default='data/wiki_link_url_counts.pkl', type=str) args = arg_parser.parse_args() import sys sys.path.append(args.thrift_class_dir) from edu.umass.cs.iesl.wikilink.expanded.data.constants import WikiLinkItem pp = WikiLinkItem() from collections import defaultdict out_val = defaultdict(int) for fn in xrange(1, 110): with open(args.thrift_data_dir + '/%03d' % fn) as f: p = TBinaryProtocol.TBinaryProtocolAccelerated(TFileObjectTransport(f)) print fn, len(out_val) while True: try: pp.read(p) except EOFError: break for m in pp.mentions: c = m.context if c is not None: url = m.wiki_url out_val[url] += 1 import cPickle as pickle with open(args.out_fn, 'wb') as out_f: pickle.dump(dict(out_val), out_f, -1)