Пример #1
0
 def puts(self, rowkey, columnFamilies, values):
     mutationsBatch = []
     try:
         if not isinstance(rowkey, list):
             rowKeys = [rowkey] * len(values)
         for i, value in enumerate(values):
             mutations = []
             for j, column in enumerate(value):
                 if isinstance(column, str):
                     m_name = Hbase.Mutation(column=columnFamilies[j] +
                                             ':' + '0',
                                             value=column)
                 elif isinstance(column, int):
                     m_name = Hbase.Mutation(column=columnFamilies[j] +
                                             ':' + '0',
                                             value=encode(column))
                 mutations.append(m_name)
             mutationsBatch.append(
                 Hbase.BatchMutation(row=rowKeys[i], mutations=mutations))
         self.client.mutateRows(self.dbname, mutationsBatch)
         return True
     except (Hbase.IOError, Hbase.TException, Hbase.TApplicationException,
             Hbase.IllegalArgument) as e:
         logInfo('puts')
         logInfo(e)
         print(e)
     return False
Пример #2
0
def creat_table(table_name="l_test_table"):
    transport.open()
    content_1 = Hbase.ColumnDescriptor(name='person:', maxVersions=2)
    content_2 = Hbase.ColumnDescriptor(name='content:', maxVersions=2)
    client.createTable(table_name, [content_1, content_2])
    print client.getTableNames()
    transport.close()
Пример #3
0
    def write_hbase(data, table_name, ip, server_port):
        """
        将数据写入Hbase中
        :param data: 包含数据的迭代器,单条数据为dict类型,比如 {'img_oss' = 'http://bj-image.oss-cn-hangzhou-internal.
        aliyuncs.com/6321965c0c96f1ea809b15ad757252f3.jpeg', 'img_type' = ['line_chart']}
        :param table_name: 需要推送的目标表的表名
        :param ip: 推送的目标thrift ip
        :param server_port: 推送的目标thrift port
        """

        if not isinstance(table_name, bytes):
            table_name = bytes(table_name, encoding='utf-8')

        # 建立 thrift 连接
        transport = TSocket.TSocket(ip, server_port)
        transport = TTransport.TBufferedTransport(transport)
        protocol = TBinaryProtocol.TBinaryProtocol(transport)
        client = Hbase.Client(protocol)
        transport.open()

        result = []
        count = 0
        for item in data:
            count += 1
            mutations = []
            img_type = bytes(item['img_type'], encoding='utf-8')
            row_key = bytes(hashlib.md5(item['url'].encode()).hexdigest(), encoding='utf-8')

            mutations.append(Mutation(column=b'info:img_type', value=img_type))
            result.append(Hbase.BatchMutation(row=row_key, mutations=mutations))

        client.mutateRows(table_name, result, None)

        transport.close()
Пример #4
0
    def puts(self, rowKeys, values, qualifier='1'):
        """ put sevel rows, `qualifier` is autoincrement

       :param rowKeys: a single rowKey
       :param values: values is a 2-dimension list, one piece element is [name, sex, age]
       :param qualifier: column family qualifier

       Usage::

       >>> HBaseTest().puts('test', [['lee', 'f', '27'], ['clark', 'm', 27], ['dan', 'f', '27']])

       """
        mutationsBatch = []
        if not isinstance(rowKeys, list):
            rowKeys = [rowKeys] * len(values)

        for i, value in enumerate(values):
            mutations = []
            for j, column in enumerate(value):
                if isinstance(column, str):
                    m_name = Hbase.Mutation(column=self.columnFamilies[j] +
                                            ':' + qualifier,
                                            value=column)
                elif isinstance(column, int):
                    m_name = Hbase.Mutation(column=self.columnFamilies[j] +
                                            ':' + qualifier,
                                            value=encode(column))
                mutations.append(m_name)

            qualifier = str(int(qualifier) + 1)
            mutationsBatch.append(
                Hbase.BatchMutation(row=rowKeys[i], mutations=mutations))
        self.client.mutateRows(self.table, mutationsBatch, {})
Пример #5
0
    def puts(self, rowKeys, qualifier, values):
        """ put sevel rows, `qualifier` is autoincrement

        :param rowKeys: a single rowKey
        :param values: values is a 2-dimension list, one piece element is [name, sex, age]
        :param qualifier: column family qualifier

        Usage::

        >>> HBaseTest('table').puts(rowKeys=[1,2,3],qualifier="name",values=[1,2,3])

        """

        mutationsBatch = []
        if not isinstance(rowKeys, list):
            rowKeys = [rowKeys] * len(values)

        for i, value in enumerate(values):
            mutations = []
            # for j, column in enumerate(value):
            if isinstance(value, str):
                value = value.encode('utf-8')
                m_name = Hbase.Mutation(column=(self.columnFamilies[0] + ':' +
                                                qualifier).encode('utf-8'),
                                        value=value)
            elif isinstance(value, int):
                m_name = Hbase.Mutation(column=(self.columnFamilies[0] + ':' +
                                                qualifier).encode('utf-8'),
                                        value=encode(value))
            mutations.append(m_name)
            mutationsBatch.append(
                Hbase.BatchMutation(row=rowKeys[i].encode('utf-8'),
                                    mutations=mutations))
        self.client.mutateRows(self.table, mutationsBatch, {})
Пример #6
0
    def push(self, table):
        """
        扫描 MongoDB 全表,并把数据写入Hbase 中
        :param table:
        :return:
        """

        handle = RotatingFileHandler('./full_sync.log', maxBytes=50 * 1024 * 1024, backupCount=3)
        handle.setFormatter(logging.Formatter(
            '%(asctime)s %(name)-12s %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s'))

        logger = logging.getLogger(table)
        logger.addHandler(handle)
        logger.setLevel(logging.INFO)
        logger.info('开始推送 ' + table + ' !')

        db_name = table.split('.')[0]
        table_name = table.split('.')[1]

        client = pymongo.MongoClient(MONGODB_HOST, MONGODB_PORT, unicode_decode_error_handler='ignore')
        admin = client['admin']
        admin.authenticate(USER, PASSWORD)

        transport = TSocket.TSocket(THRIFT_IP, THRIFT_PORT)
        transport = TTransport.TBufferedTransport(transport)
        protocol = TBinaryProtocol.TBinaryProtocol(transport)
        thrift_client = Hbase.Client(protocol)
        transport.open()

        count = 0
        cursor = client[db_name][table_name].find().sort('$natural', pymongo.ASCENDING)
        for record in cursor:
            count += 1
            mutations = []
            # row_key的值为 md5(_id)[0:10]:_id
            _id = str(record['_id'])
            row_key = bytes(hashlib.md5(bytes(_id, encoding="utf-8")).hexdigest()[0:10] + ':' + _id, encoding="utf-8")
            for item in record:
                if item == '_id':
                    continue
                key = bytes('data:' + item, encoding="utf8")
                var = bytes(str(record[item]), encoding="utf8")
                # hbase.client.keyvalue.maxsize 默认是10M,超出这个值则设置为None
                if len(var) < 10 * 1024 * 1024:
                    mutations.append(Hbase.Mutation(column=key, value=var))
                else:
                    mutations.append(Hbase.Mutation(column=key, value=bytes(str(None), encoding="utf8")))

            thrift_client.mutateRow(bytes(table_name, encoding="utf8"), row_key, mutations, {})

            if count % 100000 == 0:
                if 'create_time' in record:
                    logger.info(table + ' 已经读出 ' + str(count / 10000) + ' 万条数据'
                                + '    ' + str(record['create_time']))
                else:
                    logger.info(table + ' 已经读出 ' + str(count / 10000) + ' 万条数据')

        client.close()
        transport.close()
Пример #7
0
 def open_spider(self, spider):
     column_families = (Hbase.ColumnDescriptor(name=self.cf_basic,
                                               maxVersions=1),
                        Hbase.ColumnDescriptor(name=self.cf_price,
                                               maxVersions=1,
                                               timeToLive=365 * 24 * 60 *
                                               60))
     self.hbase.create_table_if_not_exists(column_families)
Пример #8
0
 def put(self, rowKey, qualifier='0', *args):
     mutations = []
     for j, column in enumerate(args):
         if isinstance(column, str):
             m_name = Hbase.Mutation(column=self.columnFamilies[j] + ':' +
                                     qualifier,
                                     value=column)
         elif isinstance(column, int):
             m_name = Hbase.Mutation(column=self.columnFamilies[j] + ':' +
                                     qualifier,
                                     value=encode(column))
             mutations.append(m_name)
         self.client.mutateRow(self.table, rowKey, mutations, {})
Пример #9
0
    def write_hbase(self, data):
        """
        将数据写入 HBase, 注意,源ID会经过
        :param data: 对 HBase 的一个操作,比如
        {
            # 'i' 是插入, 'd' 是删除 (只能是 'i' 或 'd')
            'op': 'i',
            # 写入的 HBase 表
            'table_name': 'hb_charts',
            # 数据id
            '_id': '121314125_img2',
            # 写入的各个字段的值
            'columns': {
                'title' : 'This is a title'
            }
        }
        :return:
        """

        op = data['op']
        table_name = bytes(data['table_name'], "utf-8")
        #
        row_key = bytes(self.generate_rowkey(data['_id']), "utf-8")
        columns = data['columns'] if 'columns' in data else []

        if op == 'i':
            mutations = []
            for item in columns:
                if item == '_id':
                    continue
                key = bytes(self.cf + ':' + item, encoding="utf8")
                var = bytes(str(columns[item]), encoding="utf8")
                # hbase.client.keyvalue.maxsize 默认是10M,超出这个值则设置为None
                if len(var) < 10 * 1024 * 1024:
                    mutations.append(Hbase.Mutation(column=key, value=var))
                else:
                    mutations.append(
                        Hbase.Mutation(column=key,
                                       value=bytes(str(None),
                                                   encoding="utf8")))
                    self.logger.warning(self.getName() + ' ' +
                                        data['table_name'] + ' 的 _id为 ' +
                                        data['_id'] + ' 的数据的 ' + str(item) +
                                        ' 字段的值大小超过了' +
                                        ' HBase 默认规定的键值10M限制,先已经置 None 替代该值')
            self.client.mutateRow(table_name, row_key, mutations, {})
            self.logger.debug(str(QUEUE.qsize()) + ' 插入到 HBase ' + str(data))
        elif op == 'd':
            self.client.deleteAllRow(table_name, row_key, {})
            self.logger.debug(str(QUEUE.qsize()) + ' 删除到 HBase ' + str(data))
Пример #10
0
    def write_data_to_hbase(data, col_names, table_name, ip, server_port):
        """
        该函数为在mapPartation中调用的功能函数。接受的RDD数据以迭代器的形式传入。
        通过遍历迭代器,将迭代器中的数据缓冲到一个缓冲变量中。
        当缓冲变量中的数据量到达1000条时,将数据推送到hbase中,然后清空变量,姐搜下一批数据。
        :param data: 包含数据的迭代器。
        :param col_names: 需要推送的列的列名
        :param table_name: 需要推送的目标表的表名
        :param ip: 推送的目标thrift ip
        :param server_port: 推送的目标thrift port
        :return: 每一行对应的缓冲变量的索引编号
        """
        print("start putDataAsPartition")
        if not isinstance(table_name, bytes):
            table_name = bytes(table_name, encoding='utf-8')
        col_names = HBaseUtils().str_list_to_bytes_list(col_names)

        # 建立hbase连接
        transport = TSocket.TSocket(ip, server_port)
        transport = TTransport.TBufferedTransport(transport)
        protocol = TBinaryProtocol.TBinaryProtocol(transport)
        client = Hbase.Client(protocol)
        transport.open()

        # 开始收集数据
        result = []
        return_data = []
        count = 0
        for line in data:
            # print("data: " + str(line))
            count += 1
            # 收集数据生成BathMutation
            mutations_ = []
            for colName in col_names:
                if str(colName, encoding='utf-8') in line:
                    mutations_.append(Mutation(column=colName,
                                               value=bytes(line[str(colName, encoding='utf-8')], encoding='utf-8')))
            result.append(Hbase.BatchMutation(row=bytes(line["rowKey"], encoding='utf-8'), mutations=mutations_))
            # 每1000条想hbase推送一次数据
            if count % 1000 == 0:
                client.mutateRows(table_name, result, None)
                result = []

        # 推送出缓冲变量中的剩余数据
        if len(result) > 0:
            client.mutateRows(table_name, result, None)

        transport.close()
        return return_data
Пример #11
0
    def __connect(self):
        """
        hbase 连接
        """
        for index in range(0, len(self.nodes)):
            try:
                host, port = self.nodes[index].split(':')
                for i in range(0, self.rety):
                    try:
                        transport = TSocket.TSocket(host, port)
                        transport.setTimeout(self.timeout)
                        self.transport = TTransport.TBufferedTransport(
                            transport)
                        self.client = Hbase.Client(
                            TBinaryProtocol.TBinaryProtocol(self.transport))
                        self.transport.open()
                        break
                    except:
                        if i + 1 >= self.rety:
                            raise Exception('cannot connect hbase, info: %s' %
                                            traceback.format_exc())

                break
            except Exception as e:
                if index >= len(self.nodes):
                    raise Exception(e.message)
Пример #12
0
def main(args):

#    getColumnInfo(table_name)            

    if(len(args)<2):
        print "TableScan.py tableName No[10]"
        sys.exit(1)

    table_name=args[1]
    NO=10;
    if(len(args)<3):
        NO=10; 
    else:
        NO=int(args[2]);

    getConfiguration('host.properties')

    transport = TBufferedTransport(TSocket(hbaseHost, 9090))
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    global client
    client = Hbase.Client(protocol)

    ret=getRowsLimit(table_name,NO)
    printRowsResult(ret)
Пример #13
0
    def __init__(self, columnn_family='data'):
        """
        初始化函数
        :param columnn_family: 写入到 HBase 的列族
        """

        super(HBaseSync, self).__init__()

        handle = RotatingFileHandler('./hbase_sync.log',
                                     maxBytes=50 * 1024 * 1024,
                                     backupCount=3)
        handle.setFormatter(
            logging.Formatter(
                '%(asctime)s %(name)-12s %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s'
            ))

        self.logger = logging.getLogger('HBaseSync')
        self.logger.addHandler(handle)
        # self.logger.setLevel(logging.INFO)

        transport = TSocket.TSocket(THRIFT_IP, THRIFT_PORT)
        transport = TTransport.TBufferedTransport(transport)
        protocol = TBinaryProtocol.TBinaryProtocol(transport)
        self.client = Hbase.Client(protocol)
        transport.open()

        self.cf = columnn_family
Пример #14
0
    def __init__(self,
                 table='test',
                 columnFamilies=['indexData:', 'result'],
                 host='localhost',
                 port=9090):
        if host == 'localhost':
            try:
                host = Utools().HOST_HBASE
            except:
                print 'use the default host(hbase):"localhost"'
                host = 'localhost'

        self.table = table
        self.port = port

        # Connect to HBase Thrift server
        socket = TSocket.TSocket(host, port)
        socket.setTimeout(1000 * 10)
        self.transport = TTransport.TBufferedTransport(socket)
        self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)

        # Create and open the client connection
        self.client = Hbase.Client(self.protocol)
        self.transport.open()

        # set type and field of column families
        #self.set_column_families([str, str], ['name', 'sex'])
        self.set_column_families(columnFamilies)
        self._build_column_families()
Пример #15
0
    def scanner(self, numRows=100, startRow=None, stopRow=None):
        scan = Hbase.TScan(startRow, stopRow)
        scannerId = self.client.scannerOpenWithScan(self.table, scan, {})
        #        row = self.client.scannerGet(scannerId)

        ret = []
        rowList = self.client.scannerGetList(scannerId, numRows)
        while rowList:
            for r in rowList:
                rd = {'row': r.row}
                for k, v in r.columns.iteritems():
                    cf, qualifier = k.split(':')
                    if qualifier not in rd:
                        rd[qualifier] = {}

                    idx = self.columnFamilies.index(cf)
                    if self.columnFamiliesType[idx] == str:
                        rd[qualifier].update({cf: v.value})
                    elif self.columnFamiliesType[idx] == int:
                        rd[qualifier].update({cf: decode(v.value)})

                ret.append(rd)

            rowList = self.client.scannerGetList(scannerId, numRows)

        self.client.scannerClose(scannerId)
        return ret
Пример #16
0
    def _connect(self):
        if hasattr(self.context.config, 'HBASE_STORAGE_SERVER_HOSTS'):
            host = self.context.config.HBASE_STORAGE_SERVER_HOSTS[
                (self.context.server.port + self.hbase_server_offset) %
                len(self.context.config.HBASE_STORAGE_SERVER_HOSTS)]
        else:
            host = self.context.config.HBASE_STORAGE_SERVER_HOST

        transport = TBufferedTransport(
            TSocket(host=host,
                    port=self.context.config.HBASE_STORAGE_SERVER_PORT))

        socket = TSocket(host=host,
                         port=self.context.config.HBASE_STORAGE_SERVER_PORT)
        # Timeout is sum of HTTP timeouts, plus a bit.
        try:
            timeout = 5
            socket.setTimeout(timeout * 1000)
        except:
            pass

        try:
            transport = TBufferedTransport(socket)
            transport.open()
            protocol = TBinaryProtocol.TBinaryProtocol(transport)
            self.storage = Hbase.Client(protocol)
            logger.info("Connected to HBase server " + host + ":" +
                        str(self.context.config.HBASE_STORAGE_SERVER_PORT))
        except:
            logger.error("Error connecting to HBase server " + host + ":" +
                         str(self.context.config.HBASE_STORAGE_SERVER_PORT))
            self.hbase_server_offset = self.hbase_server_offset + 1
Пример #17
0
 def __init__(self):
     self.host = "193.169.100.33"
     self.port = 2181
     self.transport = TBufferedTransport(TSocket(self.host, self.port))
     self.transport.open()
     self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
     self.client = Hbase.Client(self.protocol)
Пример #18
0
 def post(self):
     task_id = request.form['task_id']
     user_id = request.form['user_id']
     print(task_id, user_id, "===============")
     transport = TSocket.TSocket('172.16.100.200', 9090)
     transport = TTransport.TBufferedTransport(transport)
     protocol = TBinaryProtocol.TBinaryProtocol(transport)
     client = Hbase.Client(protocol)
     transport.open()
     tableName = 'tabledata'
     res_uid = int(user_id) * 1000
     res_tid = int(task_id)
     res_num = res_tid + res_uid
     rowKey = str(res_num + 1000000)
     print(rowKey)
     result = client.getRow(tableName, rowKey, None)
     la = {}
     li = []
     if result:
         for (k, v) in result[0].columns.items():
             kk = str("%-20s:%s" % (k, v.value))
             ll = []
             for i in kk.split(" :"):
                 ll.append(i)
             # ll_b=str(ll[2:])[2:-2]
             # la[ll[1]]=ll_b
             # la[ll[1]]=str(ll[2:])
             la[ll[0][5:]] = str(ll[1])
         return la
Пример #19
0
    def run(self):
        # server端地址和端口,web是HMaster也就是thriftServer主机名,9090是thriftServer默认端口
        transport = TSocket.TSocket('localhost', 6666)

        # 可以设置超时
        transport.setTimeout(5000)
        # 设置传输方式(TFramedTransport或TBufferedTransport)
        trans = TTransport.TBufferedTransport(transport)
        # 设置传输协议
        protocol = TBinaryProtocol.TBinaryProtocol(trans)
        # 确定客户端
        client = Hbase.Client(protocol)
        # 打开连接
        transport.open()
        total = 0.0
        for i in range(int(sys.argv[2])):
            key = str(i)
            key = str(random.randint(0, 1999))

            beg = time.time()
            get_row('users', key, client)
            end = time.time()
            total += end - beg
        print 'total:', total
        print 'avg:', total / int(sys.argv[2])
Пример #20
0
    def find_row(self, table_name, column_family, column, column_value):
        """
        查找hbase中的某条数据
        :param table_name:
        :param column_family:
        :param column:
        :param column_value:
        :return:
        """
        transport = TSocket.TSocket(self.master_ip, self.port)
        transport = TTransport.TBufferedTransport(transport)
        protocol = TBinaryProtocol.TBinaryProtocol(transport)
        client = Hbase.Client(protocol)
        transport.open()

        scan = TScan()
        scan.filterString = bytes("SingleColumnValueFilter('{cf}', '{col}', {opt}, 'binary:{val}', true, true)".format(
            cf=column_family, col=column, opt="=", val=column_value), encoding='utf-8')
        scanner = client.scannerOpenWithScan(bytes(table_name, encoding='utf-8'), scan, None)

        while True:
            r = client.scannerGet(scanner)
            if not r:
                transport.close()
                break
            else:
                res = {}
                for i in r[0].columns.items():
                    res[i[0]] = i[1].value
                yield res
Пример #21
0
def main(args):

    if (len(args) < 2):
        print "%s <verified file> -all" % (args[0])
        sys.exit(1)

    filename = args[1]
    opt_all = True if len(args) > 2 and args[2] == "-all" else False

    filenamearray = filename.split("_")
    orgId = filenamearray[0]
    subOrgId = filenamearray[1]

    getConfiguration('host.properties')

    transport = TBufferedTransport(TSocket(hbaseHost, 9090))
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocol(transport)

    global client
    client = Hbase.Client(protocol)

    tablename = "%s_%s_master_%s" % (orgId, subOrgId, orgId)

    for line in open(filename, "r"):
        input = line.strip()
        row = client.getRow(tablename, input)
        print input
        printRow(row)
        print ""
        if (not opt_all): break

    transport.close()
Пример #22
0
    def get_statuses(self, uid):
        key_beg = pack_mid(uid, 0)
        key_end = pack_mid(uid, 0x7fffffffffffffff)
        scan = Hbase.TScan(startRow=key_beg, stopRow=key_end)

        client = self._get_client()
        scanner = client.scannerOpenWithScan(self.cfg['table_status'], scan,
                                             None)

        i = 0
        while True:
            i += 1
            row_list = client.scannerGetList(scanner, i)
            if not row_list:
                break

            for row in row_list:
                (status, repost) = load_status(row.columns)
                if status is not None:
                    status.__dict__.pop('batches')
                    ret = {}
                    ret.update(status.__dict__)
                    if repost is not None:
                        repost.__dict__.pop('batches')
                        ret['retweeted_status'] = repost.__dict__
                yield ret

        client.scannerClose(scanner)
Пример #23
0
 def __init__(self):
     self.tableName = 'database_test'
     self.transport = TSocket.TSocket('student62', 9090)
     self.transport = TTransport.TBufferedTransport(self.transport)
     self.transport.open()
     self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
     self.client = Hbase.Client(self.protocol)
Пример #24
0
    def __init__(self, host='', port=9090):

        transport = TSocket.TSocket(host=host, port=port)
        self.transport = TTransport.TBufferedTransport(transport)
        protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
        self.client = Hbase.Client(protocol)
        self.transport.open()
Пример #25
0
def write_to_hbase(result):

    socket = TSocket.TSocket('127.0.0.1', 9090)
    socket.setTimeout(5000)
    transport = TTransport.TBufferedTransport(socket)
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    client = Hbase.Client(protocol)
    socket.open()
    # global socket
    # global client

    # 将这大量字段添加进去
    mutations = [
        Mutation(column=("f:" + x).encode('utf-8'), value=to_byte(result[x]))
        for x in result.keys()
    ]

    # 获得行键
    row_key = to_md5(result["n"]).encode('utf-8')

    # 写入
    client.mutateRow("film22".encode('utf-8'), row_key, mutations, None)

    print(result)
    print("录入完成")
Пример #26
0
def filter_data(x):
    host = '10.27.71.108'
    port = 9099
    transport = TTransport.TBufferedTransport(TSocket.TSocket(host, port))
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    client = Hbase.Client(protocol)
    transport.open()

    user = '******'
    password = '******'
    host = 'dds-bp1d09d4b278ceb41.mongodb.rds.aliyuncs.com'
    port = 3717
    db_name = 'cr_data'
    table = 'hb_charts'

    mongo_client = MongoClient(host, port)
    db = mongo_client[db_name]
    db.authenticate(user, password)
    collection = db[table]

    for row in x:
        rowkey = row['key'].split(':')[-1]
        state = row['state']
        if QUERY_MONGODB:
            if state in ['3', '4', '5'] or collection.find({'_id': rowkey}).count() == 0:
                try:
                    client.deleteAllRow(b'hb_charts', bytes(row['key'], 'utf-8'), attributes=None)
                except:
                    pass
        else:
            if state in ['3', '4', '5']:
                try:
                    client.deleteAllRow(b'hb_charts', bytes(row['key'], 'utf-8'), attributes=None)
                except:
                    pass
Пример #27
0
def main():
    hbasetransport = TSocket.TSocket("192.168.1.163", 9090)
    hbasetransport = TTransport.TBufferedTransport(hbasetransport)
    hbaseprotocol = TBinaryProtocol.TBinaryProtocol(hbasetransport)
    hbaseclient = Hbase.Client(hbaseprotocol)
    hbasetransport.open()

    id = hbaseclient.scannerOpen('userrelation', '', ['follower'])
    count = 0
    while True:
        li = hbaseclient.scannerGet(id)
        if len(li) == 0:
            break
        for item in li:
            idlist.add(item.row[0:10])
            ids = item.columns['follower:'].value
            for i in ids.split(':'):
                idlist.add(i)
        count = count + 1

    time_str = str(long(time.time()))
    fname = '/home/mapred/sinaid/' + 'id' + time_str
    f = open(fname, 'w')
    for i in idlist:
        f.write(i + '\n')
    f.close()

    hbaseclient.scannerClose(id)
    hbasetransport.close()
Пример #28
0
def setup_thrift_transport(host):
  transport = TSocket.TSocket(host, 9090)
  transport = TTransport.TBufferedTransport(transport)
  protocol = TBinaryProtocol.TBinaryProtocol(transport)
  client = Hbase.Client(protocol)
  transport.open()
  return client, transport
Пример #29
0
def display_3D_frame(request):
    # cwd = os.getcwd()
    # smiles_file = open(os.path.join(cwd,'smiles_sample.smiles'), 'r')
    # smiles_list = []
    # index = 0
    # for line in smiles_file:
    #       smiles_list.append((line.strip(), index))
    #       index += 1

    # Connect to HBase Thrift server
    host = 'ai-master.sh.intel.com'
    port = 9090
    transport = TTransport.TBufferedTransport(TSocket.TSocket(host, port))
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    client = Hbase.Client(protocol)
    transport.open()

    row_key_list = []
    # row key starts from 1
    for i in range(1, 101):
        row_key_list.append(str(i))

    smiles_list = []
    for row_key in row_key_list:
        row_label = client.get('drug', row_key, 'label')
        row_data = client.get('drug', row_key, 'data')
        smiles_list.append((row_label[0].value, row_data[0].value, row_key))

    transport.close()

    context_var = {
        'smiles_list': smiles_list,
    }

    return render(request, 'molview.html', context=context_var)
Пример #30
0
 def __create_table(self, table):
     """ create table in hbase with column families
    """
     columnFamilies = []
     for columnFamily in self.columnFamilies:
         name = Hbase.ColumnDescriptor(name=columnFamily)
         columnFamilies.append(name)
     self.client.createTable(table, columnFamilies)