Exemplo n.º 1
0
def gen_post_put(post_item):
    """
    产生插入post数据库的put
    :param post_item:
    :return:
    """
    info_fml = b'info'
    picture_fml = b'picture'

    post_id = post_item['post_id']
    author_id = post_item['author_id']

    row_key = gen_row_key(MD5Utils.md5_code(author_id),
                          MD5Utils.md5_code(post_id))

    column_values = []

    for key in post_item:
        if not key.startswith(('_', 'picture')):
            column_values.append(
                TColumnValue(info_fml, key.encode(),
                             str(post_item[key]).encode()))
        elif key.startswith('picture'):
            column_values.append(
                TColumnValue(picture_fml, (key + '_num').encode(),
                             str(len(post_item[key])).encode()))
            for p in range(len(post_item[key])):
                column_values.append(
                    TColumnValue(picture_fml, (key + str(p)).encode(),
                                 str(post_item[key][p]).encode()))

    put = TPut(row_key, column_values)

    return row_key, put
Exemplo n.º 2
0
def gen_fans_put(fans_item):
    info_fml = b'info'

    fans = fans_item['fans']
    follow = fans_item['follow']

    row_key = gen_row_key(MD5Utils.md5_code(fans), MD5Utils.md5_code(follow))

    column_values = []

    for key in fans_item:
        column_values.append(
            TColumnValue(info_fml, key.encode(),
                         str(comment_item[key]).encode()))

    put = TPut(row_key, column_values)

    return row_key, put
Exemplo n.º 3
0
def gen_start_spider_info():
    """
    在爬虫开始时记录爬虫开始的时间
    :return:
    """
    now = time.time()
    spider_name = 'bbs_sohu'
    # 让最新的行放在最前面
    row_key = gen_row_key(MD5Utils.md5_code(spider_name), str(10**12 - now))

    name_column = TColumnValue(b'spider_name', b'name', spider_name.encode())
    time_column = TColumnValue(b'time', b'start_time', str(now).encode())
    column_values = [name_column, time_column]
    put = TPut(row_key, column_values)

    return row_key, put
Exemplo n.º 4
0
def gen_author_put(author_item):
    """
    产生插入author数据库的put
    :param author_item:
    :return:
    """
    info_fml = b'info'

    author_id = author_item['author_id']

    row_key = MD5Utils.md5_code(author_id).encode()

    column_values = []

    for key in author_item:
        if not key.startswith('_'):
            column_values.append(TColumnValue(info_fml, key.encode(), str(author_item[key])).encode())

    put = TPut(row_key, column_values)

    return row_key, put