class PoolStreamData(StreamData): def __init__(self, feat_path, feature_maker, label_path=None, fold_in_cv=None): self.pool = PriorityQueue(100000) super(PoolStreamData, self).__init__(feat_path, feature_maker, label_path, fold_in_cv) self.fill_pool() def fill_pool(self): while not self.pool.full(): try: ins = super(PoolStreamData,self).next() self.pool.put((random.random(),ins)) # insert ins with random priority --> kind of random shuffle except StopIteration: break def rewind(self): self.pool = PriorityQueue(100000) super(PoolStreamData,self).rewind() self.fill_pool() def next(self): try: (_,ins) = self.pool.get(False) except Empty: raise StopIteration self.fill_pool() return ins
class Client(object): def __init__(self, conn, addr): self.conn = conn self.addr = addr self.thread = None self.requests = PriorityQueue(10) self.alive = True def sendto(self, msg): self.conn.send(msg) def recvfrom(self, bits): res = self.conn.recv(bits) return res def close(self): self.conn.close() def updateThread(self, thread): self.thread = thread def getThread(self): return self.thread def getAddr(self): return self.addr def addRequest(self, request): if not self.requests.full(): self.requests.put(request) def getRequest(self): if not self.empty(): return self.requests.get() return None def empty(self): return self.requests.empty() def updateLife(self, boolean): self.alive = boolean def is_alive(self): return self.alive
def search_image(filepath, codebook, tfidf, control="with_tfidf"): print_detail = True k, _ = codebook.shape if print_detail: print "-------------------------------------------------------------------------------" bow = get_bow(filepath, codebook) if print_detail: print "-------------------------------------------------------------------------------" print "tfidf matrix shape:" print tfidf.shape print "-------------------------------------------------------------------------------" print "Bag of Word of: ", filepath print bow print "-------------------------------------------------------------------------------" _, l = tfidf.shape control = "no_tfidf" if control == "with_tfidf": idi = np.zeros((1, l)) for i in range(k): if bow[i] != 0: idi = np.add(idi, tfidf[i]) rank = [(i, j) for (i, j) in zip([i for i in range(l)], idi.tolist()[0])] else: bow = [float(i) / sum(bow) for i in bow.tolist()] rank = np.dot(np.asarray(bow), tfidf) rank = [(i, j) for (i, j) in zip([i for i in range(l)], rank.tolist())] q = PriorityQueue(50) for (x, y) in rank: if not q.full(): q.put(Image(y, x)) else: if y > q.queue[0].similarity(): q.get() q.put(Image(y, x)) result = [] while not q.empty(): result.append(q.get()) # images_data_path = "/Users/minhuigu/FoodAdvisor/app/outputs/images_data.txt" # images_folder = "/Users/minhuigu/Desktop/" # img_list = [] # json_content = open(images_data_path).read() # for each in json.loads(json_content): # img_list.append(images_folder + each['relpath']) # # for a in [i.id() for i in result][::]: # print img_list[a] if print_detail: print "Best rank images: " print [i.id() for i in result][::] print "-------------------------------------------------------------------------------" # decreasing according to similarity return [i.id() for i in result][::]
def kMostFreqHeapThreePass(nums, k): freq = {} heap = PriorityQueue(k) # count for i in nums: freq[i] = freq.get(i, 0)+1 for key, val in freq.iteritems(): if not heap.full(): heap.put((val, key)) else: if heap.queue[0][0] < val: heap.get() heap.put((val, key)) return [itm[1] for itm in reversed(heap.queue)]
def kMostFreqHeapThreePass(nums, k): freq = {} heap = PriorityQueue(k) # count for i in nums: freq[i] = freq.get(i, 0) + 1 for key, val in freq.iteritems(): if not heap.full(): heap.put((val, key)) else: if heap.queue[0][0] < val: heap.get() heap.put((val, key)) return [itm[1] for itm in reversed(heap.queue)]
class CrawlerPQueue(PriorityQueue): def __init__(self): self.pq = PriorityQueue(-1) def qsize(self): return self.pq.qsize() def empty(self): return self.pq.empty() def full(self): return self.pq.full() def put(self, item): self.pq.put(item) def get(self): return self.pq.get()
from Queue import Queue, LifoQueue, PriorityQueue q = Queue(maxsize=5) lq = LifoQueue(maxsize=6) pq = PriorityQueue(maxsize=5) for i in range(5): q.put(i) lq.put(i) pq.put(i) print "FIFO queue: %s, is empty: %s, size: %s, is full: %s" %(q.queue,q.empty(),q.qsize(),q.full()) print "LIFO queue: %s, is empty: %s, size: %s, is full: %s" %(lq.queue,lq.empty(),lq.qsize(),lq.full()) print "Priority queue: %s, is empty: %s, size: %s, is full: %s" %(pq.queue,pq.empty(),pq.qsize(),pq.full()) print q.get(), lq.get(), pq.get() print "FIFO queue: %s, is empty: %s, size: %s, is full: %s" %(q.queue,q.empty(),q.qsize(),q.full()) print "LIFO queue: %s, is empty: %s, size: %s, is full: %s" %(lq.queue,lq.empty(),lq.qsize(),lq.full()) print "Priority queue: %s, is empty: %s, size: %s, is full: %s" %(pq.queue,pq.empty(),pq.qsize(),pq.full())
def find_match(d, song_id): real_song_id = song_id find_landmark_num = 0 match_hash_num = 0 hash_num = 0 result_dic = defaultdict(lambda: defaultdict(lambda: 0)) final_id = -1 final_delta_t = -1 max_match_num = 0 r = getRedis(host=redisServerIp, dbnum=dbnum) for h in next_time_hash(d): hash_num = hash_num + 1 start_time, hash_value = parse_starttime_hash(h) result_str_arr = r.smembers(hash_value) if result_str_arr and len(result_str_arr) > 0: match_hash_num = max_match_num + 1 for str_value in result_str_arr: find_landmark_num = find_landmark_num + 1 song_id, start_time_song = parse_id_starttime(str_value) # if song_id == '1101': # continue delta_t = start_time_song - start_time result_dic[song_id][delta_t] += 1 hash_count = ( result_dic[song_id][delta_t] + result_dic[song_id].get(delta_t - 1, 0) + result_dic[song_id].get(delta_t + 1, 0) ) if hash_count > max_match_num: max_match_num = hash_count final_id = song_id final_delta_t = delta_t break # what use??!#!@#!@#!@#@#!@# top25 = 0 second_max = 0 second_id = 0 real_song_hash_match = -1 real_song_hash_match_time = -1 song_below_20_num = 0 # hash_thresh = 100 # # for (s_id, song_value) in result_dic.iteritems(): # if len(song_value) < hash_thresh: # song_below_20_num += 1 print "song_below_20_num : ", song_below_20_num print "total_song_num : ", len(result_dic) top25_song_id_queue = PriorityQueue(25) for (s_id, song_value) in result_dic.iteritems(): name_printed = False # max matched hash number per song max_matched_hash_number_per_song = 0 for (dt, num) in song_value.iteritems(): num = song_value.get(dt, 0) + song_value.get(dt - 1, 0) + song_value.get(dt + 1, 0) if max_matched_hash_number_per_song < num: max_matched_hash_number_per_song = num if int(real_song_id) == int(s_id): if real_song_hash_match < num: real_song_hash_match = num real_song_hash_match_time = dt if num > second_max and s_id != final_id: second_max = num second_id = s_id if num > max_match_num - max_match_num * 0.25: if not name_printed: if s_id == final_id: print u"命中歌曲" print "song id: ", s_id name_printed = True top25 = top25 + 1 print ("delta t: ", dt, "hashnum: ", num) if top25_song_id_queue.full(): top25_song_id_queue.get() top25_song_id_queue.put((max_matched_hash_number_per_song, s_id), False) # while not top25_song_id_queue.empty(): # _, song_id = top25_song_id_queue.get_nowait() # filename = str(final_id) + "_" + str(song_id) + ".txt" # if song_id == final_id: # filename = str(final_id) + "_" + str(song_id) + "_final.txt" # output_file = open(filename,'w') # song_result = result_dic[song_id] # for delta_t, match_time in song_result.iteritems(): # output_file.write(str(match_time)+','+str(delta_t) + '\n') # output_file.close() name_redis = getRedis() song_name = name_redis.get("song_id:" + str(final_id)) print song_name print "find landmark num ", find_landmark_num print "highest match hash num", max_match_num # print "match hash num ", match_hash_num print "total hash num: ", hash_num is_match = "f" if str(real_song_id) == str(final_id): is_match = "t" ret = { "_is_match": is_match, "id": str(final_id), "delta_t": str(final_delta_t * 0.032), "real_song_hash_match": str(real_song_hash_match), "real_song_hash_match_time": str(real_song_hash_match_time), "real_song_id": real_song_id, "second_max_num": str(second_max), "second_id": str(second_id), "top25_num": str(top25), "total_hash_num": str(hash_num), "match_hash_num": str(max_match_num) # , 'id_song_hash_time':str(max_match_num) , "song_name": song_name.decode("utf-8").encode("utf-8"), } print json.dumps(ret) return ret
for i, it in enumerate(q): pool.put((it[-1], i)) vmx, imx = max((vmx, imx), (it[-1], i)) it.pop() done = False while not done: assert not pool.empty() vmn, imn = pool.get() if max(vmn[0], vmx[0]) > min(vmn[1], vmx[1]): #print 'Drop!' if len(q[imn]) == 0: done = True break vmn = q[imn][-1] q[imn].pop() assert not pool.full() pool.put((vmn, imn)) vmx, imx = max((vmx, imx), (vmn, imn)) else: #print 'Serv!' if min(vmn[1], vmx[1]) > 0: res += 1 pool = PriorityQueue(n) vmx, imx = (-1, -1), 0 for i, it in enumerate(q): if len(it) == 0: done = True break assert not pool.full() pool.put((it[-1], i)) vmx, imx = max((vmx, imx), (it[-1], i))
class MonkServer(object): EXIT_WAIT_TIME = 3 MAX_QUEUE_SIZE = 100000 MAINTAIN_INTERVAL = 10000 POLL_INTERVAL = 0.1 EXECUTE_INTERVAL = 0.1 def __init__(self, serverName='', config=None): if not config: self.ready = False return self.pq = PriorityQueue(self.MAX_QUEUE_SIZE) self.serverName = serverName self.lastMaintenance = now() self.ioLoop = tornado.ioloop.IOLoop.instance() self.httpServer = None self.port = 8888 self.webApps = [] self.brokers = self.init_brokers(config) if platform.system() == 'Windows': win32api.SetConsoleCtrlHandler(self._sig_handler, 1) else: signal.signal(signal.SIGINT, self._sig_handler) signal.signal(signal.SIGTERM, self._sig_handler) self.ready = True def _sig_handler(self, sig, frame): logger.warning('Caught signal : {}'.format(sig)) self.ioLoop.add_callback(self._onexit) def _onexit(self): logger.info('stopping the server {}'.format(self.serverName)) if self.httpServer: self.httpServer.stop() logger.info('exit in {} seconds'.format(self.EXIT_WAIT_TIME)) #deadline = now() + self.EXIT_WAIT_TIME logger.info('onexit') self.onexit() logger.info('stopping ioloop') self.ioLoop.stop() for broker in self.brokers: logger.info('closing broker {}'.format(broker)) broker.close() logger.info('stopping monkapi') monkapi.exits() #def stop_loop(): # logger.info('stopping loop') # nowt = now() # if nowt < deadline and (self.ioLoop._callbacks or self.ioLoop._timeouts): # self.ioLoop.add_timeout(nowt + 1, stop_loop) # else: # self.ioLoop.stop() # for broker in self.brokers: # logger.info('closing broker') # broker.close() # logger.info('exiting monkapi') # monkapi.exits() #stop_loop() logger.info('exited') def _maintain(self): self.maintain() self.ioLoop.add_timeout(now() + self.MAINTAIN_INTERVAL, self._maintain) def _poll(self): if self.pq.full(): logger.debug('queue is full') self.ioLoop.add_timeout(now() + self.POLL_INTERVAL, self._poll) else: ready = filter(None, (broker.is_consumer_ready() for broker in self.brokers)) if not ready: self._onexit() return taskScripts = filter(None, (broker.consume_one() for broker in self.brokers)) for tscript in taskScripts: t = taskFactory.create(tscript) if t: self.pq.put((t.priority, t), block=False) if taskScripts: #logger.debug('processing next task') self.ioLoop.add_callback(self._poll) else: #logger.debug('waiting on the polling') self.ioLoop.add_timeout(now() + self.POLL_INTERVAL, self._poll) def _execute(self): if self.pq.queue: try: priority, task = self.pq.get() task.act() logger.debug('executing {}'.format(task.name)) except Exception as e: logger.debug(e.message) logger.debug(traceback.format_exc()) finally: self.ioLoop.add_callback(self._execute) else: logger.debug('waiting for tasks {}'.format(now())) self.ioLoop.add_timeout(now() + self.EXECUTE_INTERVAL, self._execute) def add_application(self, regx, handler): self.webApps.append((regx, handler)) def init_brokers(self, argvs): raise Exception('not implemented yet') def maintain(self): pass def onexit(self): pass def run(self): if not self.ready: logger.info('server {} is not intialized properly'.format( self.serverName)) return self.ioLoop.add_timeout(now() + self.MAINTAIN_INTERVAL, self._maintain) self.ioLoop.add_timeout(now() + self.POLL_INTERVAL, self._poll) self.ioLoop.add_timeout(now() + self.EXECUTE_INTERVAL, self._execute) if self.webApps: # fail immediately if http server can not run application = tornado.web.Application(self.webApps) self.httpServer = tornado.httpserver.HTTPServer(application) self.httpServer.listen(self.port) logger.info('{} is running'.format(self.serverName)) self.ioLoop.start() logger.info('{} is exiting'.format(self.serverName))
def train(): reader = DataReader(os.path.join(FLAGS.data_path, FLAGS.data_set), FLAGS.embedding_bag_size) train_data = reader.train_dataset eval_data = reader.dev_dataset iterator = tf.data.Iterator.from_structure(train_data.output_types, train_data.output_shapes) batch_data = iterator.get_next() start = batch_data['start'] path = batch_data['path'] end = batch_data['end'] score = batch_data['score'] original_features = batch_data['original_features'] train_init_op = iterator.make_initializer(train_data) eval_init_op = iterator.make_initializer(eval_data) with tf.variable_scope("code2vec_model"): opt = Option(reader) train_model = Code2VecModel(start, path, end, score, original_features, opt) train_op = utils.get_optimizer(FLAGS.optimizer, FLAGS.learning_rate).minimize(train_model.loss) with tf.variable_scope('code2vec_model', reuse=True): eval_opt = Option(reader, training=False) eval_model = Code2VecModel(start, path, end, score, original_features, eval_opt) session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) with tf.Session(config=session_conf) as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) min_eval_loss = PriorityQueue(maxsize=3) stable_min_loss = 0 for i in range(1000): start_time = time.time() train_loss, train_acc = evaluate(sess, train_model, batch_data, train_init_op, train_op) eval_loss, eval_acc = evaluate(sess, eval_model, batch_data, eval_init_op) eval_reg_loss, eval_reg_acc = evaluate(sess, train_model, batch_data, eval_init_op) if not min_eval_loss.full(): min_eval_loss.put(-eval_loss) stable_min_loss = 0 else: k = min_eval_loss.get() if k >= -eval_loss: stable_min_loss += 1 else: stable_min_loss = 0 min_eval_loss.put(max(k, -eval_loss)) if opt.classification > 0: tf.logging.info( 'Epoch %2d: train-loss: %.5f (acc=%.2f), val-loss: %.5f (acc=%.2f), min-loss: %.5f, cost: %.4f s' % (i + 1, train_loss, train_acc, eval_loss, eval_acc, float(-np.mean(min_eval_loss.queue)), time.time() - start_time)) else: tf.logging.info( 'Epoch %2d: train-loss: %.5f, val-reg: %.5f, val-loss: %.5f, min-loss: %.5f, cost: %.4f s, attention_orthogonal_penalty: %.4f, fusion_penalty: %4f, encoding_weight_L2: %4f' % (i + 1, train_loss, eval_reg_loss, eval_loss, float(-np.mean(min_eval_loss.queue)), time.time() - start_time, train_model.regularizations['attention_orthogonal_penalty'].eval(), train_model.regularizations['fusion_penalty'].eval(), train_model.regularizations['encoding_weight_L2'].eval())) if stable_min_loss >= 5 and i >= 200: break