Exemplo n.º 1
0
    def scanfilter(self, mmsi, table, query):
        conn = self.getHbaseConnection()
        t = happybase.Table(table, conn)
        ed = int(time.time())
        bg = ed - 8640000
        # query_str = "ColumnPrefixFilter('your_prsifx_str') AND TimestampsFilter(your_timestamp)"
        # filter = "SingleColumnValueFilter('f', 'id', =, 'substring:852223')", limit = 10
        # for k, v in t.scan(filter=query,columns=["motion:mmsi","motion:rot","motion:sog"]):
        res = list()
        row_start = "{0}{1}".format(mmsi, bg)
        row_stop = "{0}{1}".format(mmsi, ed)
        for k, v in t.scan(filter=query,columns=["info:mmsi","info:source","motion:cog","motion:latitude",\
                "motion:longitude","motion:rot","motion:sog","motion:time","motion:trueHeading"],row_start=row_start,row_stop=row_stop):
            res.append([v[b"info:mmsi"].decode('utf8'),v[b"info:source"].decode('utf8'),v[b"motion:cog"].decode('utf8'),v[b"motion:latitude"].decode('utf8'),\
                v[b"motion:longitude"].decode('utf8'),v[b"motion:rot"].decode('utf8'),v[b"motion:sog"].decode('utf8'),v[b"motion:time"].decode('utf8'),
                v[b"motion:trueHeading"].decode('utf8')])

        # for k,v in t.scan(filter=query,):
        #     print(v[b"info:mmsi"])
        # for i,j in v.items():
        #     print(j.decode('utf8'))
        # for i,j in v.items():
        #     res.append([])
        # print(j.decode('utf8'))
        return res
Exemplo n.º 2
0
    def __init__(self,
                 hbase: str,
                 table: str,
                 filter: str,
                 request: list = None,
                 batch_size: int = 128,
                 train_mode: bool = True,
                 field=FIELD):
        self.connection = happybase.Connection(
            hbase,
            autoconnect=False,
            # transport="framed",
            # protocol="compact"
        )
        #Hbase自带有线程安全的连接池,踏允许多个线程共享和重用已经打开的连接。这对于多线程的应用是非常有用的。
        # 当一个线程申请一个连接,它将获得一个租赁凭证,在此期间,这个线程单独享有这个连接。
        # 当这个线程使用完该连接之后,它将该连接归还给连接池以便其他的线程可以使用
        self.table = happybase.Table(table, self.connection)
        self.filter = filter
        #天 哪一天
        self.request = request or []

        assert isinstance(self.request, list), "request must be list!"
        #copy 有区别的
        self.field = field.copy()
Exemplo n.º 3
0
def connect_to_hbase(hosts, table_name):
    host = random.choice(hosts)
    connection = happybase.Connection(host)
    connection.open()
    if table_name not in connection.tables():
        connection.create_table(table_name, {'cf': dict()})
    return happybase.Table(table_name, connection)
Exemplo n.º 4
0
def create_htable(table_name):
    HOSTS = ["hadoop2-%02d.yandex.ru" % i for i in xrange(11, 14)]
    host = random.choice(HOSTS)
    conn = happybase.Connection(host)
    conn.open()
    if table_name not in conn.tables():
        conn.create_table(table_name, {'cf': dict()})
    return happybase.Table(table_name, conn)
Exemplo n.º 5
0
    def scan_table(self, table, row_start, row_stop, row_prefix):

        self.row_start = row_start

        conn = self.get_hbase_connection()
        t = happybase.Table(table, conn)
        scan = t.scan(row_start=row_start,
                      row_stop=row_stop,
                      row_prefix=row_prefix,
                      limit=1000)
        # print(self.recourd_count)
        count = 0

        for key, value in scan:

            count += 1
            # 记录用户登录事件
            distinct_id = str(dict(value)['i:phone'.encode()])
            if distinct_id == '':
                self.zero_count += 1
                continue

            self.recourd_count += 1
            grade = str(dict(value)['i:grade'.encode()])
            g_list = grade.split("_")[1:-1]

            corr = 0
            num = 0
            for r in g_list:
                corr += int(r.split(":")[1])
                num += int(r.split(":")[2])

            if num == 0:
                accuracy = 0.0
            else:
                accuracy = corr / num

            properties = {
                'HuaTuOnline_exercises':
                float(dict(value)['i:exerciseNum'.encode()]),
                'HuaTuOnline_prediction_score':
                float(dict(value)['i:predictScore'.encode()]),
                'HuaTuOnline_accuracy':
                accuracy
            }

            # self.sa.profile_set(distinct_id, properties, is_login_id=True)
            self.l.append((distinct_id, properties))
            self.row_stop = key

        if self.row_stop == self.row_start:
            return 0

        if count < 1000:
            conn.close()
            return 0
        conn.close()
        return 1
Exemplo n.º 6
0
def connect(table_name):
    host = random.choice(HOSTS)
    conn = happybase.Connection(host)

    logging.debug("Connecting to HBase Thrift Server on %s", host)
    conn.open()

    logging.debug("Using table %s", table_name)
    return happybase.Table(table_name, conn)
Exemplo n.º 7
0
 def getUserComment_user(self, user_id):
     c = happybase.Connection(host=hbase_host, port=hbase_port)
     c.open()
     comment_table = happybase.Table('comment_local', c)
     query_str = "RowFilter (=, 'substring:" + str(user_id) + "_')"
     query = comment_table.scan(filter=query_str, limit=1000)
     result = list(query)
     result = self.revert_user(result)
     c.close()
     return result
Exemplo n.º 8
0
 def batchPut(self, table):
     '''
     批量插入数据
     :param table:
     :return:
     '''
     with pool.connection() as conn:
         t = happybase.Table(table, conn)
         batch = t.batch(batch_size=10)
         return batch
Exemplo n.º 9
0
 def querySingleLine(self, table, rowkey):
     '''
     返回单行数据,返回tuple
     :param table:表名
     :param rowkey:行键
     :return:
     '''
     with pool.connection() as conn:
         t = happybase.Table(table, conn)
         return t.row(rowkey)
Exemplo n.º 10
0
 def queryMultilLines(self, table, list):
     '''
     返回多行数据,返回dict
     :param table:表名
     :param list:
     :return:
     '''
     with pool.connection() as conn:
         t = happybase.Table(table, conn)
         return dict(t.rows(list))
Exemplo n.º 11
0
 def singleDelete(self, table, rowkey):
     '''
     删除单行数据
     :param table:
     :param rowkey:
     :return:
     '''
     with pool.connection() as conn:
         t = happybase.Table(table, conn)
         t.delete(rowkey)
Exemplo n.º 12
0
def ucf(user_id):
    connection = happybase.Connection(host=hbase_host, port=hbase_port)
    connection.open()
    recommend_table = happybase.Table('recommend', connection)
    tmp = recommend_table.row(str(user_id))
    movie_id = ''
    for key, value in tmp.items():
        movie_id = value.decode('utf-8')
    connection.close()
    return movie_id
Exemplo n.º 13
0
def getRow(tableName, rowkey):
    # 创建实例
    connect = getConn()
    connect.open()
    table = happybase.Table(tableName, connect)
    row = table.row(row=rowkey)
    connect.close()
    if row == None or not row:
        return row
    row = changeEncode(row)
    return row
Exemplo n.º 14
0
 def deleteDetailColumns(self, table, rowkey, detailColumns):
     '''
     删除一个列族中的几个列的数据
     :param table:
     :param rowkey:
     :param detailColumns:
     :return:
     '''
     with pool.connection() as conn:
         t = happybase.Table(table, conn)
         t.delete(rowkey, columns=detailColumns)
Exemplo n.º 15
0
 def deleteColumns(self, table, rowkey, columns):
     '''
     删除多个列族的数据
     :param table:
     :param rowkey:
     :param columns:
     :return:
     '''
     with pool.connection() as conn:
         t = happybase.Table(table, conn)
         t.delete(rowkey, columns=columns)
Exemplo n.º 16
0
def getRows(tableName, rowkeys):
    connect = getConn()
    connect.open()
    table = happybase.Table(tableName, connect)
    rows = table.rows(rows=rowkeys)
    connect.close()
    new_rows = []
    for row in rows:
        new_row = changeEncode(row)
        new_rows.append(new_row)
    return new_rows
Exemplo n.º 17
0
 def singlePut(self, table, rowkey, data):
     '''
     插入单条数据
     :param table:
     :param rowkey:
     :param data:
     :return:
     '''
     with pool.connection() as conn:
         t = happybase.Table(table, conn)
         t.put(rowkey, data=data)
Exemplo n.º 18
0
 def scanTable(self, table):  #, row_start, row_stop, row_prefix
     conn = self.getHbaseConnection()
     t = happybase.Table(table, conn)
     scan = t.scan(
         limit=10,
         filter=
         "SingleColumnValueFilter('info', 'mmsi', =, 'substring:100704002')"
     )  #row_start=row_start, row_stop=row_stop, row_prefix=row_prefix
     for key, value in scan:
         for i, j in value.items():
             print(key.decode('utf8'), i.decode('utf8'), j.decode('utf8'))
Exemplo n.º 19
0
 def batchDelete(self, table, rowkeys):
     '''
     批量删除数据
     :param table:
     :param rowkeys:
     :return:
     '''
     with pool.connection() as conn:
         t = happybase.Table(table, conn)
         with t.batch() as bat:
             for rowkey in rowkeys:
                 bat.delete(rowkey)
def connect():
    host = random.choice(HOSTS)
    conn = happybase.Connection(host)

    logging.debug("Connecting to HBase Thrift Server on %s", host)
    conn.open()

    if TABLE not in conn.tables():
        # Create a table with column family `cf` with default settings.
        conn.create_table(TABLE, {"cf": dict()})
        logging.debug("Created table %s", TABLE)
    else:
        logging.debug("Using table %s", TABLE)
    return happybase.Table(TABLE, conn)
Exemplo n.º 21
0
 def getComments_movie(self, movie_id):
     c = happybase.Connection(host=hbase_host, port=hbase_port)
     c.open()
     comment_table = happybase.Table('comment', c)
     query_str = "RowFilter (=, 'substring:_" + str(movie_id) + "')"
     query = comment_table.scan(filter=query_str, limit=1000)
     try:
         result = list(query)
     except:
         c.close()
         return []
     result = self.revert(result)
     c.close()
     return result[0:10]
Exemplo n.º 22
0
 def addUserComment(self, movie_id, user_id, content, star, reviewtime):
     c = happybase.Connection(host=hbase_host, port=hbase_port)
     c.open()
     comment_table = happybase.Table('comment_local', c)
     if self.hasUserComment(movie_id, user_id):
         c.close()
         return False
     comment_table.put(
         str(user_id) + "_" + str(movie_id), {
             "region:content": str(content),
             "region:star": str(star),
             "region:reviewtime": str(reviewtime)
         })
     c.close()
Exemplo n.º 23
0
 def scanTable(self, table, row_start=None, row_stop=None, row_prefix=None):
     '''
     扫描一张表
     :param table:表名
     :param row_start:行键起
     :param row_stop:行键止
     :param row_prefix:
     :return:
     '''
     with pool.connection() as conn:
         t = happybase.Table(table, conn)
         scan = t.scan(row_start=row_start,
                       row_stop=row_stop,
                       row_prefix=row_prefix)
         for key, value in scan:
             print(key, value)
Exemplo n.º 24
0
 def getSimilar(self, movie_id):
     c = happybase.Connection(host=hbase_host, port=hbase_port)
     c.open()
     recommend_table = happybase.Table('movie_sim_1', c)
     tmp_dict = recommend_table.row(str(movie_id))
     movie_id_str = ''
     for key, value in tmp_dict.items():
         movie_id_str = value.decode('utf-8')
     movie_list = []
     if len(movie_id_str) > 0:
         movie_id_list = movie_id_str.split(',')
         for movie_id in movie_id_list:
             tmp = MovieInfo.objects.get(id=int(movie_id))
             movie_list.append(tmp)
     c.close()
     return movie_list
Exemplo n.º 25
0
    def __init__(self,
                 host: str,
                 table: str,
                 filter: str,
                 request: list = None,
                 field=FIELD):
        self.connection = happybase.Connection(host,
                                               autoconnect=False,
                                               timeout=30 * 1000)
        self.table = happybase.Table(table, self.connection)
        self.filter = filter
        self.request = request or []

        assert isinstance(self.request, list), "request must be list!"

        self.field = field.copy()
Exemplo n.º 26
0
 def hasUserComment(self, movie_id, user_id):
     c = happybase.Connection(host=hbase_host, port=hbase_port)
     c.open()
     comment_table = happybase.Table('comment_local', c)
     query_str = "RowFilter (=, 'binary:" + str(user_id) + "_" + str(
         movie_id) + "')"
     query = comment_table.scan(filter=query_str, limit=1000)
     try:
         result = list(query)
     except:
         c.close()
         return False
     c.close()
     if len(result) == 0:
         return False
     else:
         return True
Exemplo n.º 27
0
def get_batch_yesterday(date=None):
    conn = happybase.Connection(HBASE_HOST, port=HBASE_PORT)
    conn.open()
    app.logger.info(date)
    if not date:
        date = request.args.get('date')
    datetimeDate = datetime.datetime.strptime(date, "%Y%m%d")
    # return date
    pastWeek = (datetimeDate - datetime.timedelta(days=7)).strftime("%Y%m%d")
    yesterday = datetimeDate.strftime("%Y%m%d")
    # arguments formatted to yyyyMMdd
    row_start = '001#001#{}'.format(pastWeek)
    row_start_bytes = row_start.encode('utf-8')
    row_end = '001#001#{}'.format(yesterday)
    row_end_bytes = row_end.encode('utf-8')
    try:
        table = happybase.Table(BATCH_ANALYSIS_TABLE_NAME, conn)

        rows = []
        for key, data in table.scan(row_start=row_start_bytes,
                                    row_stop=row_end_bytes):
            keyStr = key.decode('utf-8')
            rowDataDict = {}
            for columnName in data:
                column = columnName.decode('utf-8')
                try:
                    # Java Bytes class converts Double to IEEE-754
                    # String is converted by utf-8
                    n = unpack(b'>d', data[columnName])
                    val = round(n[0], 2)
                except:
                    val = data[columnName].decode('utf-8')
                rowDataDict[column] = val
            # pass
            rows.append((keyStr, rowDataDict))
        app.logger.info('Retrieved data from HBase succesfully')
        return jsonify(items=rows)
    except:
        app.logger.error(
            "Table {} doesn't have row {}. Check with the hbase shell that you're retrieving the correct data."
            .format(BATCH_ANALYSIS_TABLE_NAME, row_start))
        return "ERROR"
    finally:
        conn.close()
Exemplo n.º 28
0
    def __init__(self,
                 hbase: str,
                 table: str,
                 filter: str,
                 request: list = None,
                 batch_size: int = 128,
                 field=FIELD):
        self.connection = happybase.Connection(
            hbase,
            autoconnect=False,
            # transport="framed",
            # protocol="compact"
        )
        self.table = happybase.Table(table, self.connection)
        self.filter = filter
        #天 哪一天
        self.request = request or []

        assert isinstance(self.request, list), "request must be list!"
        #copy 有区别的
        self.field = field.copy()
Exemplo n.º 29
0
def get_averages_past24sim():
    # Streaming avgs are based on actual time,
    # and saved in hbase with rowkey based on actual time.
    conn = happybase.Connection(HBASE_HOST, port=HBASE_PORT)
    conn.open()
    # yyyyMMddHHmm
    date = datetime.datetime.now()
    pastSimDay = (
        date -
        datetime.timedelta(minutes=SIM_DAY_IN_MIN)).strftime("%Y%m%d%H%M")
    simToday = date.strftime("%Y%m%d%H%M")
    try:
        table = happybase.Table(RUNNING_AVG_TABLE_NAME, conn)
        row_start = '001#001#{}'.format(pastSimDay)
        row_start_bytes = row_start.encode('utf-8')
        row_end = '001#001#{}'.format(simToday)
        row_end_bytes = row_end.encode('utf-8')
        rows = []

        for key, data in table.scan(row_start=row_start_bytes,
                                    row_stop=row_end_bytes):
            keyStr = key.decode('utf-8')
            rowDataDict = {}
            for columnName in data:
                column = columnName.decode('utf-8')
                # Java Bytes class converts Double to IEEE-754
                # String is converted by utf-8
                n = unpack(b'>d', data[columnName])
                val = round(n[0], 2)
                rowDataDict[column] = val
            rows.append((keyStr, rowDataDict))
        app.logger.info('Retrieved data from HBase succesfully')
        return jsonify(items=rows)
    except:
        errorMsg = "Table {} doesn't have row {}. Check with the hbase shell that you're retrieving the correct data.".format(
            BATCH_ANALYSIS_TABLE_NAME, pastSimDay)
        app.logger.error(errorMsg)
        return errorMsg
    finally:
        conn.close()
})
# 互联网行为表 internetBehaviorTable
if b'internetBehaviorTable' in connection.tables():
    connection.delete_table('internetBehaviorTable', disable=True)
connection.create_table(
    'internetBehaviorTable', {
        'news': dict(),
        'communications': dict(),
        'entertainment': dict(),
        'domersticServices': dict(),
        'busApp': dict(),
        'toolUse': dict(),
        'date': dict()
    })
"""连接表"""
basicFeaturesTable = happybase.Table('basicFeaturesTable', connection)
basicFeaturesTableBat = basicFeaturesTable.batch(batch_size=1000)
socialAttributesTable = happybase.Table('socialAttributesTable', connection)
socialAttributesTableBat = socialAttributesTable.batch(batch_size=1000)
consumptionCharacteristicsTable = happybase.Table(
    'consumptionCharacteristicsTable', connection)
consumptionCharacteristicsTableBat = consumptionCharacteristicsTable.batch(
    batch_size=1000)
internetBehaviorTable = happybase.Table('internetBehaviorTable', connection)
internetBehaviorTableBat = internetBehaviorTable.batch(batch_size=1000)
"""基础属性表 basicFeaturesTable"""
data = pd.read_csv("data/basicFeaturesData.csv")

for i in range(data.shape[0]):
    basicFeaturesTableBat.put(
        "%s" % data.loc[i, 'key'], {