def make_query(query): dh = DataHub(client_id=client_id, client_secret=client_secret, grant_type='password', username=username, password=password) res = dh.query(repo_base='livinglab', repo='wifi', query=query) return res
def __init__(self): access_id = conf_aliyun_datahub['dev_access_id'] access_key = conf_aliyun_datahub['dev_access_key'] endpoint = conf_aliyun_datahub['dev_endpoint'] self.project_name = conf_aliyun_datahub['dev_project'] if conf.dev_or_product == 2: logger.debug("product ! ") access_id = conf_aliyun_datahub['product_access_id'] access_key = conf_aliyun_datahub['product_access_key'] endpoint = conf_aliyun_datahub['product_endpoint'] self.project_name = conf_aliyun_datahub['product_project'] #self.datahub = DataHub(access_id, access_key, endpoint, enable_pb=True) self.datahub = DataHub(access_id, access_key, endpoint) self.cursor_type = CursorType.LATEST self.get_limit_num = 30 logger.debug(self.to_string() + "__init__()")
def __init__(self, host, port): try: transport = TSocket.TSocket(host, port) self.transport = TTransport.TBufferedTransport(transport) protocol = TBinaryProtocol.TBinaryProtocol(self.transport) self.client = DataHub.Client(protocol) except Thrift.TException, tex: print '%s' % (tex.message)
def create_tables(): """ creates tables to be inserted into""" dh = DataHub(client_id=secret.client_id, client_secret=secret.client_secret, grant_type='password', username=secret.username, password=secret.password) query = """ create table if not exists natural_gas.ngrid_repaired_2015( id integer, formatted_address text, lat double precision, lng double precision, location_type text, record_date date, grade integer, PRIMARY KEY (id));""" dh.query(REPO_BASE, 'natural_gas', query) query = """ create table if not exists natural_gas.ngrid_unrepaired_2015( id integer, formatted_address text, lat double precision, lng double precision, location_type text, record_date date, grade integer, PRIMARY KEY (id));""" dh.query(REPO_BASE, 'natural_gas', query) query = """ create table if not exists natural_gas.ngrid_unrepaired_2014( id integer, formatted_address text, lat double precision, lng double precision, location_type text, record_date date, grade integer, PRIMARY KEY (id));""" dh.query(REPO_BASE, 'natural_gas', query) query = """ create table if not exists natural_gas.ngrid_repaired_2014( id integer, formatted_address text, lat double precision, lng double precision, location_type text, record_date date, grade integer, PRIMARY KEY (id));""" dh.query(REPO_BASE, 'natural_gas', query)
def datahub_import(thread_info): try: # 连接datahub dh = DataHub() # block等待所有shard状态ready dh.wait_shards_ready(thread_info[2], thread_info[3]) # 获取topic topic = dh.get_topic(thread_info[3], thread_info[2]) # 获取分区 shards = dh.list_shards(thread_info[2], thread_info[3]) # 写入数组记录 records = [] # 枚举目录下所有文件 file_arr = enum_file(thread_info[0], thread_info[1]) for data_file in file_arr: # 读取文本内容 data_ = open(data_file, 'rb') content = data_.readlines() data_.close() # shard计数器 i = 0 # 逐行处理 for line in content: try: # 字符串分割 value_arr = line[:-2].split(thread_info[4]) values = [] for value in value_arr: values.append(value) # 生成record对象 record = TupleRecord(schema=topic.record_schema, values=values) # 设置shard分区 record.shard_id = shards[i % len(shards)].shard_id # 写入数组 records.append(record) # 计数递增 i += 1 # 判断是否达到1W条 if 10000 <= i: put_records(dh, thread_info[2], thread_info[3], records, thread_info[5], data_file) records = [] i = 0 except: continue # 尝试写入datahub,并记录错误记录 put_records(dh, thread_info[2], thread_info[3], records, thread_info[5], data_file) # TODO 文件处理完毕后续操作 print '%s上传完毕' % thread_info[0] except: traceback.print_exc()
# KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import sys import traceback from datahub import DataHub from datahub.exceptions import DatahubException, ResourceExistException from datahub.models import RecordType, FieldType, RecordSchema, TupleRecord access_id = '******* your access id *******' access_key = '******* your access key *******' endpoint = '******* your endpoint *******' dh = DataHub(access_id, access_key, endpoint, read_timeout=10) project_name = 'tuple_record_test' topic_name = 'tuple_record_test' shard_count = 3 life_cycle = 7 record_type = RecordType.TUPLE record_schema = RecordSchema.from_lists([ 'bigint_field', 'string_field', 'double_field', 'bool_field', 'time_field' ], [ FieldType.BIGINT, FieldType.STRING, FieldType.DOUBLE, FieldType.BOOLEAN, FieldType.TIMESTAMP ]) try: dh.create_project(project_name, 'comment') print("create project success!")
project_name = configer.get('datahub', 'project_name', 'meter_project_test') topic_name = configer.get('datahub', 'topic_name', 'meter_topic_test') print "=======================================" print "access_id: %s" % access_id print "access_key: %s" % access_key print "endpoint: %s" % endpoint print "project_name: %s" % project_name print "topic_name: %s" % topic_name print "=======================================\n\n" if not access_id or not access_key or not endpoint: print "access_id and access_key and endpoint must be set!" sys.exit(-1) dh = DataHub(access_id, access_key, endpoint) try: for pi in range(1, 10): project_name = "meter_project_test_%d" % pi for ti in range(1, 100): topic_name = "meter_topic_test_%d_%d" % (pi, ti) try: dh.delete_topic(topic_name, project_name) print "delete topic %s success!" % topic_name except Exception, e: print "delete %s failed!" % topic_name print traceback.format_exc() print "=======================================\n\n" except Exception, e: print traceback.format_exc()
project_name = configer.get('datahub', 'project_name', 'pydatahub_project_test') topic_name = configer.get('datahub', 'topic_name', 'pydatahub_tuple_topic_test') print "=======================================" print "access_id: %s" % access_id print "access_key: %s" % access_key print "endpoint: %s" % endpoint print "project_name: %s" % project_name print "topic_name: %s" % topic_name print "=======================================\n\n" if not access_id or not access_key or not endpoint: print "access_id and access_key and endpoint must be set!" sys.exit(-1) dh = DataHub(access_id, access_key, endpoint) try: topic = dh.get_topic(topic_name, project_name) print "get topic suc! topic=%s" % str(topic) if topic.record_type != RecordType.TUPLE: print "topic type illegal!" sys.exit(-1) print "=======================================\n\n" cursor = dh.get_cursor(project_name, topic_name, CursorType.OLDEST, '0') while True: (record_list, record_num, next_cursor) = dh.get_records(topic, '0', cursor, 10) for record in record_list: print record if 0 == record_num:
from thrift import Thrift from thrift.protocol import TBinaryProtocol from thrift.transport import THttpClient from thrift.transport import TTransport ''' @author: anant bhardwaj @date: Oct 11, 2013 Sample Python client for DataHub ''' try: transport = THttpClient.THttpClient('http://datahub.csail.mit.edu/service') transport = TTransport.TBufferedTransport(transport) protocol = TBinaryProtocol.TBinaryProtocol(transport) client = DataHub.Client(protocol) print "Version: %s" % (client.get_version()) # open connection con_params = ConnectionParams(user='******', password='******') con = client.open_connection(con_params=con_params) # execute a query res = client.execute_sql( con=con, query='create table if not exists domo.terms (term text)', query_params=None) res = client.execute_sql(con=con, query="insert into domo.terms values('VC Firm')",
print("access_key:%s" % args.access_key) print("endpoint:%s" % args.endpoint) print("project:%s" % args.project) print("topic:%s" % args.topic) print("retry_times:%d" % args.retry_times) print("conn_timeout:%d" % args.conn_timeout) print("read_timeout:%d" % args.read_timeout) print("batch record num:%d" % args.batch) print("round num:%d" % args.round) print("stream:%s" % args.stream) print("protobuf:%s" % args.protobuf) print("=======================================\n\n") dh = DataHub(args.access_id, args.access_key, args.endpoint, retry_times=args.retry_times, conn_timeout=args.conn_timeout, read_timeout=args.read_timeout) # project = Project(name=args.project, comment='perf project for python sdk') # dh.create_project(project) # print "create project %s success!" % args.project # print "=======================================\n\n" topic_result = dh.get_topic(args.project, args.topic) print("get topic %s success! detail:\n%s" % (args.topic, str(topic_result))) print("=======================================\n\n") cursor_result = dh.get_cursor(args.project, args.topic, '0', CursorType.OLDEST) print("get topic %s oldest cursor success! detail:\n%s" %
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import json import sys sys.path.append('./') from httmock import HTTMock from datahub import DataHub from datahub.exceptions import InvalidParameterException, ResourceNotFoundException, ResourceExistException from datahub.models import RecordSchema, FieldType, RecordType from .unittest_util import gen_mock_api dh = DataHub('access_id', 'access_key', 'http://endpoint') class TestTopic: def test_list_topic_success(self): project_name = 'success' def check(request): assert request.method == 'GET' assert request.url == 'http://endpoint/projects/success/topics' with HTTMock(gen_mock_api(check)): result = dh.list_topic(project_name) print(result) assert 'topic_name_1' in result.topic_names
class Address(object): """ Class for storing unprocessed and processed information about a gas leak. """ # class level variable for datahub connector datahub = DataHub(client_id=secret.client_id, client_secret=secret.client_secret, grant_type='password', username=secret.username, password=secret.password) def __init__(self, primary_key, addr, town, intersection, date_classified, date_repaired, grade, repo, table, national_grid=False): super(Address, self).__init__() self.primary_key = int(primary_key) self.addr = addr self.town = town if national_grid: self.town = NATIONAL_GRID_NAMES.get(town.upper().strip(), town) self.intersection = intersection if intersection is None: self.intersection = '' elif national_grid: self.intersection = 'and ' + intersection self.date_classified = None if date_classified and date_classified != '': self.date_classified = parser.parse(date_classified) self.date_repaired = None if date_repaired and date_classified != '': self.date_repaired = parser.parse(date_repaired) # sometimes something strange will get passed in for grade try: self.grade = int(grade) except: self.grade = 0 # the repo and table that this will be inserted into self.repo = repo self.table = table def get_details_for_address(self): """ Uses the address, town, intersection attributes to lookup the location in google maps populates formatted_address, lat, lng, location_type """ addr_str = "" if self.addr[0].isdigit(): addr_str = "%s %s, MA" % (self.addr, self.town) else: addr_str = "%s %s %s, MA" % (self.addr, self.intersection, self.town) addr_str = urllib.quote(addr_str) URI = ('https://maps.googleapis.com/maps/api/geocode/json?' 'address=%s&key=%s' % (addr_str, secret.GOOGLE_API_KEY)) res = requests.get(URI) content = json.loads(res.content) status = content['status'] if status == 'OVER_QUERY_LIMIT': raise Exception('API is over query limit') elif (status == 'ZERO_RESULTS') or (status == 'INVALID_REQUEST'): print '%s is invalid. Skipping' % addr_str elif status == 'OK' and len(content.get('results', [])) > 0: # extract some variables result = content['results'][0] self.formatted_address = result['formatted_address'] self.lat = result['geometry']['location']['lat'] self.lng = result['geometry']['location']['lng'] self.location_type = result['geometry']['location_type'] def get_query_values(self): """ the values part of the insert statement. It's useful to have it broken up, because that enables batch inserts. """ # do a little dance to make the date variable right date = None if self.date_classified: date = self.date_classified.date() elif self.date_repaired: date = self.date_repaired.date() # escape quotations formatted_address = json.dumps(self.formatted_address) location_type = json.dumps(self.location_type) values = ("(%d, '%s', %f, %f, '%s'," "to_date('%s', 'YYYY-MM-DD'), %d) " % (self.primary_key, formatted_address, self.lat, self.lng, location_type, str(date), self.grade)) return values def get_query_insert(self): insert = ("INSERT INTO %s.%s " "(id, formatted_address, lat, lng, " "location_type, record_date, grade) " % (self.repo, self.table)) return insert def insert_into_datahub(self): """ inserts the address into the appropriate table in datahub""" query = self.get_query_insert() + " values " + self.get_query_values() try: res = self.__class__.datahub.query(REPO_BASE, self.repo, query) # print "%s: id %d" % (res['rows'][0]['status'], self.primary_key) except Exception: print("Failed to insert row %d into %s.%s in datahub" % (self.primary_key, self.repo, self.table)) print res print '----'
import sys import time import traceback import requests from requests.packages.urllib3.exceptions import InsecureRequestWarning requests.packages.urllib3.disable_warnings(InsecureRequestWarning) from datahub import DataHub from datahub.exceptions import ResourceExistException, DatahubException from datahub.models import FieldType, RecordSchema, TupleRecord, BlobRecord, CursorType, RecordType access_id = '' access_key = '' endpoint = 'https://dh-cn-beijing.aliyuncs.com' dh = DataHub(access_id, access_key, endpoint) # ===================== create project ===================== project_name = 'caict_smart_sync' comment = 'smart_sync' try: dh.create_project(project_name, comment) print("create project success!") print("=======================================\n\n") except ResourceExistException: print("project already exist!") print("=======================================\n\n") except Exception as e: print(traceback.format_exc()) sys.exit(-1)
# under the License. import json import os from httmock import HTTMock, urlmatch, response from datahub import DataHub from datahub.exceptions import ResourceNotFoundException, InvalidOperationException, \ InvalidParameterException, LimitExceededException from datahub.models import RecordSchema, FieldType, BlobRecord, TupleRecord _TESTS_PATH = os.path.abspath(os.path.dirname(__file__)) _FIXTURE_PATH = os.path.join(_TESTS_PATH, '../fixtures') dh = DataHub('access_id', 'access_key', 'http://endpoint', enable_pb=False) dh2 = DataHub('access_id', 'access_key', 'http://endpoint', enable_pb=True) @urlmatch(netloc=r'(.*\.)?endpoint') def datahub_api_mock(url, request): path = url.path.replace('/', '.')[1:] res_file = os.path.join(_FIXTURE_PATH, '%s.json' % path) status_code = 200 content = { } headers = { 'Content-Type': 'application/json', 'x-datahub-request-id': 0 } try:
# specific language governing permissions and limitations # under the License. import sys import time import traceback from datahub import DataHub from datahub.exceptions import ResourceExistException, DatahubException from datahub.models import FieldType, RecordSchema, TupleRecord, BlobRecord, CursorType, RecordType access_id = '' access_key = '' endpoint = '' dh = DataHub(access_id, access_key, endpoint) # ===================== 创建project ===================== project_name = 'project' comment = 'comment' try: dh.create_project(project_name, comment) print("create project success!") print("=======================================\n\n") except ResourceExistException: print("project already exist!") print("=======================================\n\n") except Exception as e: print(traceback.format_exc()) sys.exit(-1)
def handler(event, context): logger = logging.getLogger() evt = json.loads(event) #print("[print1]IoT trigger and send data to FunctionCompute test output, The content of event is : %s" % (evt)) timestamp = evt['timestamp'] values = evt['values'] count_of_value = len(values) ACCESS_ID = 'XXXXX' ACCESS_KEY = 'XXXXX' ENDPOINT = 'http://dh-cn-XXXXX.aliyun-inc.com' dh = DataHub(ACCESS_ID, ACCESS_KEY, ENDPOINT) PROJECT_NAME = 'veolia_d4b_poc' TOPIC_NAME = 'extract_result_table' # ===================== put tuple records ===================== # block等待所有shard状态ready dh.wait_shards_ready(PROJECT_NAME, TOPIC_NAME) topic = dh.get_topic(PROJECT_NAME, TOPIC_NAME) record_schema = topic.record_schema shards_result = dh.list_shard(PROJECT_NAME, TOPIC_NAME) shards = shards_result.shards shard_count = len(shards) # for shard in shards: # print("[print8]IoT trigger and send data to FunctionCompute test output, The Shard is : (%s)" % (shard)) records = [] for value in values: # id sample: SE433_OPC.S01.AISA0101 id = value['id'] id_list = id.split('.') id_company_code = (id_list[0].split('_'))[0] id_protocol_name = (id_list[0].split('_'))[1] id_system_code = id_list[1] id_tagname = id_list[2] v = value['v'] q = 'true' if value['q'] else 'false' t = value['t'] #print("[print7]IoT trigger and send data to FunctionCompute test output, The value is : (%s, %s, %s, %s)" % (id,v,q,t)) rec = TupleRecord(schema=topic.record_schema) rec.values = [ timestamp, id_company_code, id_protocol_name, id_system_code, id_tagname, v, q, t ] rec.shard_id = shards[random.randint(0, shard_count - 1)].shard_id records.append(rec) failed_indexs = dh.put_records(PROJECT_NAME, TOPIC_NAME, records) print("[print9] put tuple %d records, shard_id = %s, failed list: %s" % (len(records), rec.shard_id, failed_indexs)) # failed_indexs如果非空最好对failed record再进行重试 return 'success' # event样例: # { # "timestamp":1521698375065, # "values":[ # { # "id":"SE433_OPC.S01.IW1440", # "v":206, # "q":true, # "t":1521698358299 # }, # { # "id":"SESE433_OPC433.S01.LCV1414_ACT", # "v":42, # "q":true, # "t":1521698358222 # }, # { # "id":"SE433_OPC.S01.LT1430A", # "v":22, # "q":true, # "t":1521698358235 # }, # … # ] # }
db=db_name, charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor) if dev_or_product == 1: access_id = conf_aliyun_datahub['dev_access_id'] access_key = conf_aliyun_datahub['dev_access_key'] endpoint = conf_aliyun_datahub['dev_endpoint'] project_name = conf_aliyun_datahub['dev_project'] elif dev_or_product == 2: access_id = conf_aliyun_datahub['product_access_id'] access_key = conf_aliyun_datahub['product_access_key'] endpoint = conf_aliyun_datahub['product_endpoint'] project_name = conf_aliyun_datahub['product_project'] datahub = DataHub(access_id, access_key, endpoint) topic_name = "t_spread" datahub.wait_shards_ready(project_name, topic_name) topic = datahub.get_topic(project_name, topic_name) shards_result = datahub.list_shard(project_name, topic_name) shards = shards_result.shards ''' sql_select = "SELECT f_ex_id,f_bid,f_ask,f_ts FROM t_ticker_crrent where f_symbol=\"BTC/USDT\";" cursor_reaed = conn_read.cursor() cursor_reaed.execute(sql_select) rows = cursor_reaed.fetchall() logger.info(rows) ''' def handler(event, context):
class datahub(): def __init__(self): access_id = conf_aliyun_datahub['dev_access_id'] access_key = conf_aliyun_datahub['dev_access_key'] endpoint = conf_aliyun_datahub['dev_endpoint'] self.project_name = conf_aliyun_datahub['dev_project'] if conf.dev_or_product == 2: logger.debug("product ! ") access_id = conf_aliyun_datahub['product_access_id'] access_key = conf_aliyun_datahub['product_access_key'] endpoint = conf_aliyun_datahub['product_endpoint'] self.project_name = conf_aliyun_datahub['product_project'] #self.datahub = DataHub(access_id, access_key, endpoint, enable_pb=True) self.datahub = DataHub(access_id, access_key, endpoint) self.cursor_type = CursorType.LATEST self.get_limit_num = 30 logger.debug(self.to_string() + "__init__()") def to_string(self): return "datahub[{0}] ".format(self.project_name) def create_project(self): try: self.datahub.create_project(self.project_name, self.project_name) logger.debug(self.to_string() + "create_project({0})".format(self.project_name)) except ResourceExistException: logger.debug(self.to_string() + "project already exist!") except Exception: logger.info(traceback.format_exc()) raise def create_all_topic(self): for k, v in conf_aliyun_datahub['topics'].items(): topic_name = k shard_count = v['shard_count'] life_cycle = v['life_cycle'] record_schema = RecordSchema.from_lists(v['record_schema'][0], v['record_schema'][1], v['record_schema'][2]) try: self.datahub.create_tuple_topic(self.project_name, topic_name, shard_count, life_cycle, record_schema, topic_name) logger.debug(self.to_string() + "create_tuple_topic({0}, {1})".format( self.project_name, topic_name)) except ResourceExistException: logger.debug( self.to_string() + "create_tuple_topic({0}, {1}) ResourceExistException". format(self.project_name, topic_name)) except Exception: logger.info(traceback.format_exc()) raise def get_topic(self, topic_name): # block等待所有shard状态ready self.datahub.wait_shards_ready(self.project_name, topic_name) topic = self.datahub.get_topic(self.project_name, topic_name) #logger.debug(self.to_string() + "get_topic() topic={0}".format(topic)) if topic.record_type != RecordType.TUPLE: raise Exception( self.to_string() + "get_topic({0}) topic.record_type != RecordType.TUPLE".format( topic_name)) shards_result = self.datahub.list_shard(self.project_name, topic_name) shards = shards_result.shards return (topic, shards) def pub_topic(self, topic_name, records): if not records or len(records) <= 0: return #logger.debug(self.to_string() + "pub_topic({0}) len(records) = {1}".format(topic_name, len(records))) failed_indexs = self.datahub.put_records(self.project_name, topic_name, records) #logger.debug(self.to_string() + "pub_topic() failed_indexs = {0}".format(failed_indexs)) i = 0 while failed_indexs.failed_record_count > 0: logger.debug(self.to_string() + "pub_topic() put failed = {0}".format(failed_indexs)) failed_indexs = self.datahub.put_records( self.project_name, topic_name, failed_indexs.failed_records) i = i + 1 if i > 3: break async def pub_topic_once(self, ex_id, topic_name, func, *args, **kwargs): topic, shards = self.get_topic(topic_name) c = 0 while True: try: records = await func(ex_id, topic, shards, *args, **kwargs) logger.debug(self.to_string() + "pub_topic_once({0}, {1}) len(records) = {2}". format(ex_id, topic_name, len(records))) self.pub_topic(topic_name, records) return except ccxt.RequestTimeout: #logger.info(traceback.format_exc()) await asyncio.sleep(10) except ccxt.DDoSProtection: #logger.error(traceback.format_exc()) await asyncio.sleep(10) except: logger.error( self.to_string() + "pub_topic_once({0}, {1})".format(ex_id, topic_name)) logger.error(traceback.format_exc()) await asyncio.sleep(10) c = c + 1 if c > 10: return async def run_pub_topic(self, ex_id, topic_name, func, *args, **kwargs): topic, shards = self.get_topic(topic_name) while True: try: records = await func(ex_id, topic, shards, *args, **kwargs) #logger.debug(self.to_string() + "run_pub_topic({0}, {1}) len(records) = {2}".format(ex_id, topic_name, len(records))) self.pub_topic(topic_name, records) except DatahubException: logger.error(traceback.format_exc()) #await asyncio.sleep(10) except ccxt.RequestTimeout: #logger.info(traceback.format_exc()) await asyncio.sleep(10) except ccxt.DDoSProtection: #logger.error(traceback.format_exc()) await asyncio.sleep(10) except ccxt.AuthenticationError: logger.error(traceback.format_exc()) await asyncio.sleep(10) except ccxt.ExchangeNotAvailable: logger.error(traceback.format_exc()) await asyncio.sleep(10) except ccxt.ExchangeError: logger.error(traceback.format_exc()) await asyncio.sleep(10) except ccxt.NetworkError: logger.error(traceback.format_exc()) await asyncio.sleep(10) except Exception: logger.info(traceback.format_exc()) await asyncio.sleep(10) except: logger.error(traceback.format_exc()) await asyncio.sleep(10) ''' get_result= { 'NextCursor': '30005b54925e000000000002cd180001', 'RecordCount': 1, 'StartSeq': 183576, 'Records': [ { 'Data': [ 'okex', 'DENT/BTC', '15', '1532268900000', '4.6e-07', '4.6e-07', '4.6e-07', '4.6e-07', '4541051.5', '1532269148' ], 'Sequence': 183576, 'SystemTime': 1532269150134 } ] } get_result.records= [TupleRecord { Values { *name* *type* *value* f_ex_id string okex f_symbol string KEY/ETH f_timeframe bigint 1 f_ts bigint 1532269740000 f_o double 2.578e-05 f_h double 2.578e-05 f_l double 2.578e-05 f_c double 2.578e-05 f_v double 0.0 f_ts_update timestamp 1532269853 } } ] ''' def run_get_topic(self, topic_name, func, *args, **kwargs): logger.debug( self.to_string() + "run_get_topic({0},{1})".format(self.project_name, topic_name)) topic, shards = self.get_topic(topic_name) shard_id_cursor = dict() for shard in shards: shard_id_cursor[shard.shard_id] = self.datahub.get_cursor( self.project_name, topic_name, shard.shard_id, self.cursor_type).cursor while True: for shard_id, cursor in shard_id_cursor.items(): try: get_result = self.datahub.get_tuple_records( self.project_name, topic_name, shard_id, topic.record_schema, cursor, self.get_limit_num) shard_id_cursor[shard_id] = get_result.next_cursor if get_result.record_count > 0: func(get_result.records, *args, **kwargs) else: time.sleep(0.1) except DatahubException as e: logger.warn(self.to_string() + "run_get_topic({0},{1}) DatahubException={2}". format(self.project_name, topic_name, e)) except Exception as e: logger.warn(self.to_string() + "run_get_topic({0},{1}) Exception={2}".format( self.project_name, topic_name, e)) except: logger.error(traceback.format_exc())
from oauth2_provider.views import ApplicationUpdate from inventory.models import App, Annotation from account.utils import grant_app_permission from core.db.manager import DataHubManager from core.db.rlsmanager import RowLevelSecurityManager from core.db.rls_permissions import RLSPermissionsParser from datahub import DataHub from datahub.account import AccountService from service.handler import DataHubHandler from utils import post_or_get ''' Datahub Web Handler ''' handler = DataHubHandler() core_processor = DataHub.Processor(handler) account_processor = AccountService.Processor(handler) def home(request): username = request.user.get_username() if username: return HttpResponseRedirect(reverse('browser-user', args=(username, ))) else: return HttpResponseRedirect(reverse('www:index')) def about(request): return HttpResponseRedirect(reverse('www:index'))
# specific language governing permissions and limitations # under the License. import sys import time import traceback from datahub import DataHub from datahub.exceptions import DatahubException, ResourceExistException from datahub.models import RecordType, CursorType access_id = '******* your access id *******' access_key = '******* your access key *******' endpoint = '******* your endpoint *******' dh = DataHub(access_id, access_key, endpoint) project_name = 'tuple_record_test' topic_name = 'tuple_record_test' try: dh.create_project(project_name, 'comment') print("create project success!") print("=======================================\n\n") except ResourceExistException as e: print("project already exist!") print("=======================================\n\n") except Exception: print(traceback.format_exc()) sys.exit(-1)
configer = Configer('datahub.ini') access_id = configer.get('datahub', 'access_id', '') access_key = configer.get('datahub', 'access_key', '') endpoint = configer.get('datahub', 'endpoint', '') project_name = configer.get('datahub', 'project_name', 'pydatahub_project_test') topic_name = configer.get('datahub', 'topic_name', 'pydatahub_tuple_topic_test') print "=======================================" print "access_id: %s" % access_id print "access_key: %s" % access_key print "endpoint: %s" % endpoint print "project_name: %s" % project_name print "topic_name: %s" % topic_name print "=======================================\n\n" if not access_id or not access_key or not endpoint: print "access_id and access_key and endpoint must be set!" sys.exit(-1) dh = DataHub(access_id, access_key, endpoint) try: shards = dh.merge_shard(project_name, topic_name, '1', '2') for shard in shards: print shard print "=======================================\n\n" except Exception, e: print traceback.format_exc() sys.exit(-1)
print "access_id:%s" % args.access_id print "access_key:%s" % args.access_key print "endpoint:%s" % args.endpoint print "project:%s" % args.project print "topic:%s" % args.topic print "retry_times:%d" % args.retry_times print "conn_timeout:%d" % args.conn_timeout print "read_timeout:%d" % args.read_timeout print "batch record num:%d" % args.batch print "round num:%d" % args.round print "stream:%s" % args.stream print "=======================================\n\n" dh = DataHub(args.access_id, args.access_key, args.endpoint, retry_times=args.retry_times, conn_timeout=args.conn_timeout, read_timeout=args.read_timeout) #project = Project(name=args.project, comment='perf project for python sdk') #dh.create_project(project) #print "create project %s success!" % args.project #print "=======================================\n\n" topic = dh.get_topic(args.topic, args.project) print "get topic %s success! detail:\n%s" % (args.topic, topic) print "=======================================\n\n" cursor = dh.get_cursor(args.project, args.topic, CursorType.OLDEST, '0') print "get topic %s oldest cursor success! detail:\n%s" % (args.topic, cursor) print "=======================================\n\n"
from datahub import DataHub from datahub.exceptions import ResourceNotFoundException, InvalidParameterException, DatahubException, \ InvalidOperationException, OffsetResetException from datahub.models import CursorType, OffsetWithSession endpoint = '' access_id = '' access_key = '' project_name = '' topic_name = '' sub_id = '' shard_id = '0' shards = [shard_id] dh = DataHub(access_id, access_key, endpoint) try: offset_result = dh.init_and_get_subscription_offset(project_name, topic_name, sub_id, shards) offsets = offset_result.offsets except ResourceNotFoundException as e: print(e) exit(-1) except InvalidParameterException as e: print(e) exit(-1) except DatahubException as e: print(e) exit(-1) # or retry offset = offsets.get(shard_id)
# KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import sys import traceback from datahub import DataHub from datahub.exceptions import DatahubException, ResourceExistException from datahub.models import RecordType, BlobRecord access_id = '******* your access id *******' access_key = '******* your access key *******' endpoint = '******* your endpoint *******' dh = DataHub(access_id, access_key, endpoint) project_name = 'blob_record_test' topic_name = 'blob_record_test' shard_count = 3 life_cycle = 7 try: dh.create_project(project_name, 'comment') print("create project success!") print("=======================================\n\n") except ResourceExistException as e: print("project already exist!") print("=======================================\n\n") except Exception: print(traceback.format_exc())
configer = Configer('datahub.ini') access_id = configer.get('datahub', 'access_id', '') access_key = configer.get('datahub', 'access_key', '') endpoint = configer.get('datahub', 'endpoint', '') project_name = configer.get('datahub', 'project_name', 'pydatahub_project_test') topic_name = configer.get('datahub', 'topic_name', 'pydatahub_tuple_topic_test') print "=======================================" print "access_id: %s" % access_id print "access_key: %s" % access_key print "endpoint: %s" % endpoint print "project_name: %s" % project_name print "topic_name: %s" % topic_name print "=======================================\n\n" if not access_id or not access_key or not endpoint: print "access_id and access_key and endpoint must be set!" sys.exit(-1) dh = DataHub(access_id, access_key, endpoint) try: meteringinfo = dh.get_meteringinfo(project_name, topic_name, '1') print meteringinfo print "=======================================\n\n" except Exception, e: print traceback.format_exc() sys.exit(-1)
project_name = configer.get('datahub', 'project_name', 'pydatahub_project_test') topic_name = configer.get('datahub', 'topic_name', 'pydatahub_blob_topic_test') print "=======================================" print "access_id: %s" % access_id print "access_key: %s" % access_key print "endpoint: %s" % endpoint print "project_name: %s" % project_name print "topic_name: %s" % topic_name print "=======================================\n\n" if not access_id or not access_key or not endpoint: print "access_id and access_key and endpoint must be set!" sys.exit(-1) dh = DataHub(access_id, access_key, endpoint) topic = Topic(name=topic_name) topic.project_name = project_name topic.shard_count = 3 topic.life_cycle = 7 topic.record_type = RecordType.BLOB try: dh.create_topic(topic) print "create topic success!" print "=======================================\n\n" except ObjectAlreadyExistException, e: print "topic already exist!" print "=======================================\n\n" except Exception, e:
print "access_key:%s" % args.access_key print "endpoint:%s" % args.endpoint print "project:%s" % args.project print "topic:%s" % args.topic print "retry_times:%d" % args.retry_times print "conn_timeout:%d" % args.conn_timeout print "read_timeout:%d" % args.read_timeout print "batch record num:%d" % args.batch print "round num:%d" % args.round print "stream:%s" % args.stream print "=======================================\n\n" dh = DataHub(args.access_id, args.access_key, args.endpoint, stream=args.stream, retry_times=args.retry_times, conn_timeout=args.conn_timeout, read_timeout=args.read_timeout) #project = Project(name=args.project, comment='perf project for python sdk') #dh.create_project(project) #print "create project %s success!" % args.project #print "=======================================\n\n" data = 'a' if args.file: with open(args.file, 'r') as f: data = f.read() record_schema = RecordSchema() # record_schema.add_field(Field('bigint_field', FieldType.BIGINT))
'pydatahub_project_test') topic_name = configer.get('datahub', 'topic_name', 'pydatahub_blob_topic_test') print "=======================================" print "access_id: %s" % access_id print "access_key: %s" % access_key print "endpoint: %s" % endpoint print "project_name: %s" % project_name print "topic_name: %s" % topic_name print "=======================================\n\n" if not access_id or not access_key or not endpoint: print "access_id and access_key and endpoint must be set!" sys.exit(-1) dh = DataHub(access_id, access_key, endpoint) topic = Topic(name=topic_name) topic.project_name = project_name topic.shard_count = 3 topic.life_cycle = 7 topic.record_type = RecordType.BLOB try: dh.create_topic(topic) print "create topic success!" print "=======================================\n\n" except ObjectAlreadyExistException, e: print "topic already exist!" print "=======================================\n\n" except Exception, e:
configer.read(os.path.join(current_path, '../datahub.ini')) access_id = configer.get('datahub', 'access_id') access_key = configer.get('datahub', 'access_key') endpoint = configer.get('datahub', 'endpoint') print("=======================================") print("access_id: %s" % access_id) print("access_key: %s" % access_key) print("endpoint: %s" % endpoint) print("=======================================\n\n") if not access_id or not access_key or not endpoint: print("[access_id, access_key, endpoint] must be set in datahub.ini!") sys.exit(-1) dh = DataHub(access_id, access_key, endpoint) def clean_topic(datahub_client, project_name, force=False): topic_names = datahub_client.list_topic(project_name).topic_names for topic_name in topic_names: if force: clean_subscription(datahub_client, project_name, topic_name) datahub_client.delete_topic(project_name, topic_name) def clean_project(datahub_client, force=False): project_names = datahub_client.list_project().project_names for project_name in project_names: if force: clean_topic(datahub_client, project_name)
from thrift.protocol import TBinaryProtocol from thrift.transport import THttpClient from thrift.transport import TTransport ''' @author: anant bhardwaj @date: Oct 11, 2013 Sample Python client for DataHub Account Creation ''' try: datahub_transport = THttpClient.THttpClient( 'http://datahub.csail.mit.edu/service') datahub_transport = TTransport.TBufferedTransport(datahub_transport) datahub_protocol = TBinaryProtocol.TBinaryProtocol(datahub_transport) datahub_client = DataHub.Client(datahub_protocol) account_transport = THttpClient.THttpClient( 'http://datahub.csail.mit.edu/service/account') account_transport = TTransport.TBufferedTransport(account_transport) account_protocol = TBinaryProtocol.TBinaryProtocol(account_transport) account_client = AccountService.Client(account_protocol) print "Version: %s" % (datahub_client.get_version()) try: print account_client.remove_account( username="******", app_id="confer", app_token="d089b3ed-1d82-4eae-934a-859d7070d364") except Exception, e: