def main(): sfile = settings.BIG_FILE fsize = os.path.getsize(sfile) with open(sfile, "r") as fh: chunks = size_chunks(fh, fsize, num_chunks=settings.BIGFILE_MP_CHUNKS) # Debug #for c in chunks: #print(c) q = Queue() pattern = re.compile(settings.TARGET_USERNAME) # consumer #con = multiprocessing.Process(target=opener, args=(cat(grep(pattern, writer())),)) #con.daemon = True #con.start() # producer producers = [] file_handles = [] for chunk in chunks: fh = open(sfile, "r") file_handles.append(fh) o = opener(cat(chunk, grep(pattern, writer(q)))) t = multiprocessing.Process(target=sender, args=(o,)) t.daemon = True producers.append(t) for p in producers: p.start() for p in producers: p.join() #con.join() q.put(None) # sentinel for f in file_handles: f.close() recsmatch = 0 print("Before queue comp") while True: x = q.get() if x == None: break recsmatch += 1 print("After queue comp") print("recsmatch={r} chunks={c}".format(r=recsmatch, c=settings.BIGFILE_MP_CHUNKS))
def main(): sfile = settings.BIG_FILE fsize = os.path.getsize(sfile) with open(sfile, "r") as fh: chunks = size_chunks(fh, fsize, num_chunks=settings.BIGFILE_MP_CHUNKS) # Debug # for c in chunks: # print(c) q = Queue() pattern = re.compile(settings.TARGET_USERNAME) # consumer # con = multiprocessing.Process(target=opener, args=(cat(grep(pattern, writer())),)) # con.daemon = True # con.start() # producer producers = [] file_handles = [] for chunk in chunks: fh = open(sfile, "r") file_handles.append(fh) o = opener(cat(chunk, grep(pattern, writer(q)))) t = multiprocessing.Process(target=sender, args=(o,)) t.daemon = True producers.append(t) for p in producers: p.start() for p in producers: p.join() # con.join() q.put(None) # sentinel for f in file_handles: f.close() recsmatch = 0 print("Before queue comp") while True: x = q.get() if x == None: break recsmatch += 1 print("After queue comp") print("recsmatch={r} chunks={c}".format(r=recsmatch, c=settings.BIGFILE_MP_CHUNKS))
print("usage: %prog") sys.exit(1) sfile = settings.BIG_FILE fsize = os.path.getsize(sfile) with open(sfile, "r") as fh: chunks = size_chunks(fh, fsize, num_chunks=settings.BIGFILE_GEVENT_CHUNKS) pattern = re.compile(settings.TARGET_USERNAME) # maxsize = 0 makes the queue act like a channel. The queue will block # until a get call retrieves the data. In effect, it works like a CSP. q = gevent.queue.Queue(maxsize=0) # consumer con = gevent.spawn(count_matches, q) # producer fhandles = [open(sfile, "r") for i in xrange(0, settings.BIGFILE_GEVENT_CHUNKS)] jobs = [gevent.spawn(opener, cat(grep(pattern, writer()))) for i in xrange(0, settings.BIGFILE_GEVENT_CHUNKS)] gevent.joinall(jobs, timeout=10) #q.put(None) #con.join() for f in fhandles: f.close() #print("chunks={c}".format(c=settings.BIGFILE_GEVENT_CHUNKS))
fsize = os.path.getsize(sfile) with open(sfile, "r") as fh: chunks = size_chunks(fh, fsize, num_chunks=settings.BIGFILE_GEVENT_CHUNKS) pattern = re.compile(settings.TARGET_USERNAME) # maxsize = 0 makes the queue act like a channel. The queue will block # until a get call retrieves the data. In effect, it works like a CSP. q = gevent.queue.Queue(maxsize=0) # consumer con = gevent.spawn(count_matches, q) # producer fhandles = [ open(sfile, "r") for i in xrange(0, settings.BIGFILE_GEVENT_CHUNKS) ] jobs = [ gevent.spawn(opener, cat(grep(pattern, writer()))) for i in xrange(0, settings.BIGFILE_GEVENT_CHUNKS) ] gevent.joinall(jobs, timeout=10) #q.put(None) #con.join() for f in fhandles: f.close() #print("chunks={c}".format(c=settings.BIGFILE_GEVENT_CHUNKS))