def test_pool_exhaustion(): pool = ConnectionPool(size=1, **connection_kwargs) def run(): with assert_raises(NoConnectionsAvailable): with pool.connection(timeout=.1) as connection: connection.tables() with pool.connection(): # At this point the only connection is assigned to this thread, # so another thread cannot obtain a connection at this point. t = threading.Thread(target=run) t.start() t.join()
def _get_job_rule(pool: happybase.ConnectionPool, job_name) -> crawler.CrawlJobCore: ''' 获取 hbase 里的 crawl_job_core (爬取规则) ''' with pool.connection() as conn: try: conn: happybase.Connection table = conn.table(job_name) row = table.row(rule_row_key, columns=[ rule_col, ]) rule = row[bytes(rule_col, encoding="utf-8")].decode("utf-8") # _json_str = row.values # print(rule) common.print_info("get crawl rule: {}".format(rule)) crawl_job_core = crawler.CrawlJobCore.loads(rule) # TODO 键 有点问题 return crawl_job_core except Exception as e: common.print_exception(e) return None pass finally: conn.close() # 关闭连接
def predict_from_image_batch(self, mnist_batch, index): t0 = time.time() connection_pool = ConnectionPool(size=self.CONNECTION_POOL_SIZE, host=HBaseManager.HOST, port=HBaseManager.PORT) hbase_manager = HBaseManager(connection_pool) process_pool = Pool(self.POOL_SIZE) n = len(mnist_batch) indexs = list(range(n)) extract_process = process_pool.starmap_async(self.extract_keys, zip(mnist_batch, indexs)) extracted_keys = extract_process.get() predict_hash_args = zip(extracted_keys, indexs) predictions = [ self.predict_hash_values(keys, hbase_manager, i) for keys, i in predict_hash_args ] process_pool.close() t1 = time.time() print("Mnist Batch {} predicted in: {} Seconds, For Node: {}".format( str(index), str(t1 - t0), self.__str__())) return predictions
def _get_job_result(pool: happybase.ConnectionPool, crawl_job_name) -> list: ''' 获取爬虫结果 ''' with pool.connection() as conn: try: conn: happybase.Connection table = conn.table(crawl_job_name) result_list = [] for key, value in table.scan(include_timestamp=True): tmp = {} tmp['url'] = key.decode("utf-8") # tmp['result']={ele.decode("utf-8"):value[ele].decode("utf-8") for ele in value} tmp['result'] = { ele.decode("utf-8"): (value[ele][0].decode("utf-8"), value[ele][1]) for ele in value } result_list.append(tmp) return result_list except Exception as e: common.print_exception(e) return None pass finally: conn.close() # 关闭连接
def _save_results(pool: happybase.ConnectionPool, crawl_job_core, url, result_list) -> bool: ''' 保存爬取结果到 hbase 里 如果 result_list 为空,不进行操作 ''' if not bool(result_list): return False core = crawl_job_core with pool.connection() as conn: try: conn: happybase.Connection table = conn.table(core.name) row_key = url table.put(row_key, { results_col_pattern(i): ele for i, ele in enumerate(result_list) }) return True except Exception as e: common.print_exception(e) return False pass finally: conn.close() # 关闭连接
def train_batch(self, mnist_batch, index): ''' :type mnist_batch: list of tuple :type deviate: boolean :rtype: None ''' t0 = time.time() connection_pool = ConnectionPool(size=self.CONNECTION_POOL_SIZE, host=HBaseManager.HOST, port=HBaseManager.PORT) hbase_manager = HBaseManager(connection_pool) process_pool = Pool(self.POOL_SIZE) thread_pool = ThreadPool(self.POOL_SIZE) n = len(mnist_batch) numbers, mnist_images = MnistHelper.extract_numbers_images(mnist_batch) mnist_images = [mnist_obs[MnistModel.PREDICTOR_INDEX] for mnist_obs in mnist_batch] indexs = list(range(n)) extract_process = process_pool.starmap_async(self.extract_keys, zip(mnist_images, indexs)) extracted_keys = extract_process.get() store_hash_args = zip(extracted_keys, numbers, indexs) [self.store_hash_values(k, n, hbase_manager, i) for k, n, i in store_hash_args] process_pool.close() thread_pool.close() t1 = time.time() print("Time taken to train batch {} : {} Seconds".format(str(index),str(t1 - t0)))
def test_connection_pool(): from thriftpy2.thrift import TException def run(): name = threading.current_thread().name print("Thread %s starting" % name) def inner_function(): # Nested connection requests must return the same connection with pool.connection() as another_connection: assert connection is another_connection # Fake an exception once in a while if random.random() < .25: print("Introducing random failure") connection.transport.close() raise TException("Fake transport exception") for i in range(50): with pool.connection() as connection: connection.tables() try: inner_function() except TException: # This error should have been picked up by the # connection pool, and the connection should have # been replaced by a fresh one pass connection.tables() print("Thread %s done" % name) N_THREADS = 10 pool = ConnectionPool(size=3, **connection_kwargs) threads = [threading.Thread(target=run) for i in range(N_THREADS)] for t in threads: t.start() while threads: for t in threads: t.join(timeout=.1) # filter out finished threads threads = [t for t in threads if t.is_alive()] print("%d threads still alive" % len(threads))
def _get_job_list(pool: happybase.ConnectionPool) -> list: ''' 获取hbase中存的job名称list ''' with pool.connection() as conn: try: conn: happybase.Connection table_list = conn.tables() return table_list except Exception as e: common.print_exception(e) return None pass finally: conn.close() # 关闭连接
def _set_job_rule(pool: happybase.ConnectionPool, crawl_job_core) -> bool: ''' 改变规则 ''' core = crawl_job_core with pool.connection() as conn: try: conn: happybase.Connection table = conn.table(core.name) table.put(rule_row_key, { rule_col: core.dumps(), }) return True except Exception as e: common.print_exception(e) return False pass finally: conn.close() # 关闭连接
def _save_job(pool: happybase.ConnectionPool, crawl_job_core)-> bool: ''' 存储 crawl_job_core (爬取规则) 到 hbase 里 ''' core = crawl_job_core with pool.connection() as conn: try: conn: happybase.Connection conn.create_table(name=core.name, families={ rule_col: dict(max_versions=rule_max_version), results_family: dict(max_versions=results_max_version), }) table = conn.table(core.name) table.put(rule_row_key, { rule_col: core.dumps() }) return True except Exception as e: common.print_exception(e) return False pass finally: conn.close() # 关闭连接
def test_connection_pool(): def run(): name = threading.current_thread().name print "Thread %s starting" % name def inner_function(): # Nested connection requests must return the same connection with pool.connection() as another_connection: assert connection is another_connection for i in xrange(100): with pool.connection() as connection: connection.tables() # Fake an exception once in a while if random.random() < .001: connection._tainted = True inner_function() print "Thread %s done" % name N_THREADS = 50 pool = ConnectionPool(size=3, **connection_kwargs) threads = [threading.Thread(target=run) for i in xrange(N_THREADS)] for t in threads: t.start() while threads: for t in threads: t.join(timeout=.1) # filter out finished threads threads = [t for t in threads if t.is_alive()] print "%d threads still alive" % len(threads)
def test_connection_pool_construction(): with assert_raises(TypeError): ConnectionPool(size='abc') with assert_raises(ValueError): ConnectionPool(size=0)
def setup(self): HBaseManager(ConnectionPool(size=1, host=HBaseManager.HOST, port=HBaseManager.PORT)).create_table( table_name=self.TABLE_NAME, delete=True)
from django.http import HttpResponse from happybase import Connection, ConnectionPool from collections import Counter import random import itertools import pandas as pd import numpy as np from django.shortcuts import render_to_response from desktop.lib.django_util import render import datetime, json from os.path import abspath, split, join import strategy # conn = Connection('192.168.2.41') pool = ConnectionPool(size=5, host='192.168.2.41') ''' user_t = conn.table('haodou') keys_t = conn.table('tag_search_keys') id2tokens_t = conn.table('id2tokens') #id-标签对应表 favs_t = conn.table('favs_by_time') #key: time, column: token, value: [w1, w2, w3, ...] user_tags_t = conn.table('haodou_user_tags') # 用户标签库 goods_t = conn.table('haodou_goods') td_w_t = conn.table('td_w') # tag_date_weight ud_w_t = conn.table('ud_w') # user_date_weight ''' current_tag = '' country_index = [u'中国', u'加拿大', u'美国', u'日本', u'澳大利亚'] token2tag = {