示例#1
0
def standard_water():
    """
    获取所有的水贴信息,判定条件为评论数小于等于5
    :return: a list contain water post.
    """
    reviews = database.select_all('postn', 'review_num')
    titles = database.select_all('postn', 'post_home')
    water_post = []
    for review, title in zip(reviews, titles):
        if int(review[0]) <= 5:
            water_post.append(title[0])
    return water_post
示例#2
0
def statistic_review():
    """
    统计post回复数据
    :return:
    """
    quality = {
        "0-5": 0,
        "5-50": 0,
        "50-100": 0,
        "0.1K-0.5K": 0,
        "0.5K-1K": 0,
        "1K+": 0
    }
    reviews = database.select_all("postn", "review_num")
    for review in reviews:
        if 0 <= int(review[0]) < 5:
            quality["0-5"] += 1
        elif 5 <= int(review[0]) < 50:
            quality["5-50"] += 1
        elif 50 <= int(review[0]) < 100:
            quality["50-100"] += 1
        elif 100 <= int(review[0]) < 500:
            quality["0.1K-0.5K"] += 1
        elif 500 <= int(review[0]) < 1000:
            quality["0.5K-1K"] += 1
        else:
            quality["1K+"] += 1
    return quality
示例#3
0
def statistic_barage():
    barages = database.select_all('pure_users', 'barage')
    age = {
        "onedown": 0,
        "one": 0,
        "two": 0,
        "three": 0,
        "four": 0,
        "five": 0,
        "six": 0,
        "seven": 0,
        "eight": 0,
        "nine": 0,
        "ten": 0,
        "eleven": 0,
        "twelve": 0,
        "twelveup": 0
    }
    for barage in barages:
        try:
            if 0 <= float(barage[0]) < 1:
                age['onedown'] += 1
            elif 1 <= float(barage[0]) < 2:
                age['one'] += 1
            elif 2 <= float(barage[0]) < 3:
                age['two'] += 1
            elif 3 <= float(barage[0]) < 4:
                age['three'] += 1
            elif 4 <= float(barage[0]) < 5:
                age['four'] += 1
            elif 5 <= float(barage[0]) < 6:
                age['five'] += 1
            elif 6 <= float(barage[0]) < 7:
                age['six'] += 1
            elif 7 <= float(barage[0]) < 8:
                age['seven'] += 1
            elif 8 <= float(barage[0]) < 9:
                age['eight'] += 1
            elif 9 <= float(barage[0]) < 10:
                age['nine'] += 1
            elif 10 <= float(barage[0]) < 11:
                age['ten'] += 1
            elif 11 <= float(barage[0]) < 12:
                age['eleven'] += 1
            elif 12 <= float(barage[0]) <13:
                age['twelve'] += 1
            else:
                age['twelveup'] += 1
        except ValueError as why:
            pass

    return age
示例#4
0
def get_detail_member():
    """
    # 获取member详细信息----->member_detail
    :return:
    """
    urls = database.select_all('sortmember', 'homepage')
    for url in urls:
        print(url)
        res = index.get_response(url[0])
        soup = index.get_bs(res)
        data = index.author_post_info(soup)
        if data != 0:
            database.insert_sql_member_detail('member_detail', data)
示例#5
0
def statistic_client():
    select_result = database.select_all('postn', 'client_type')
    android = 0
    apple = 0
    unknown = 0
    for result in select_result:
        if result[0] == 'android':
            android += 1
        elif result[0] == 'apple':
            apple += 1
        else:
            unknown += 1
    return android, apple, unknown
示例#6
0
def zombie_member():
    """
    # 获取僵尸会员用户
    :return: 僵尸用户数
    """
    zombie_user = 0
    urls = database.select_all('sortmember', 'homepage')
    for url in urls:
        print(url)
        res = index.get_response(url[0])
        soup = index.get_bs(res)
        data = index.author_post_info(soup)
        if data == 0:
            zombie_user += 1
    return zombie_user
示例#7
0
def statistic_sex():
    """
    Return the number of boys, girls and unknow.

    :return:
    """
    sexes = database.select_all('pure_users', 'sex')
    boys = 0
    girls = 0
    for sex in sexes:
        for one in sex:
            if str(one) == 'female':
                girls += 1
            else:
                boys += 1
    return boys, girls
示例#8
0
def barage_wrong():
    abc = database.select_all("pure_users", 'id', 'barage')
    datetime.now()
    d = date(2003, 11, 25)
    t = time(0, 0, 0)
    publishing = datetime.combine(d, t)
    maxage = datetime.now() - publishing
    maxyear = int(str(maxage)[0:4]) / 365
    print(maxyear)
    wrongage = []
    for age in abc:
        try:
            if float(age[1]) > maxyear:
                wrongage.append(age[0])
        except ValueError as why:
            wrongage.append(age[0])
    return wrongage
示例#9
0
def save_detail_post():
    """
    # # #以下代码分析帖子主页,提取需要的数据----->postn
    :return:
    """
    selectResult = database.select_all('source1', 'homepage')
    for home in selectResult:
        homeUrl = ''.join(home)
        print(homeUrl)
        response = index.get_response(homeUrl)

        try:
            post = index.post_info(index.get_bs(response))
            post.insert(0, homeUrl)
            print(post)
            database.insert_sql_post('postn', post)
        except AttributeError as why:
            continue
        except KeyError as why:
            continue
示例#10
0
def statistic_post():
    """
    判断用户的活跃度,活跃等级为七级, 以用户的发帖数为判断标准
    :return:
    """
    posts = database.select_all('pure_users', 'post_number')
    active = {
        "0-20": 0,
        "20-100": 0,
        "100-500": 0,
        "0.5K-1K": 0,
        "1K-5K": 0,
        "5K-10K": 0,
        "10K+": 0
    }

    for post in posts:
        try:
            if post[0][-1] == '万':
                post_int = float(post[0][:-1]) * 10000
            else:
                post_int = int(post[0])
            if 0 <= post_int < 20:
                active["0-20"] += 1
            elif 20 <= post_int < 100:
                active["20-100"] += 1
            elif 100 <= post_int < 500:
                active["100-500"] += 1
            elif 500 <= post_int < 1000:
                active["0.5K-1K"] += 1
            elif 1000 <= post_int < 5000:
                active["1K-5K"] += 1
            elif 5000 <= post_int < 10000:
                active["5K-10K"] += 1
            else:
                active["10K+"] += 1
        except ValueError as why:
            pass
    return active
示例#11
0
# !/usr/bin/env python3
# -*- coding: utf-8 -*-

from biye.analysis.simple import an_time
from biye.spider.operate import dict_to_json as tr
from biye.spider.operate import database

# # 获取每个小时中的发帖数
abc = database.select_all('postn', 'str_date', 'str_time')
hour = an_time.count_post_hour(abc)
tr.dict_to_json(hour, name='../../show/data/hour.json')

# 获取每月的发帖数
month_2017, month_2016, month_2015 = an_time.count_post_month(abc)
tr.dict_to_json(month_2017, name='../../show/data/month_2017.json')
tr.dict_to_json(month_2016, name='../../data/show/month_2016.json')
tr.dict_to_json(month_2015, name='../../show/data/month_2015.json')
示例#12
0
# !/usr/bin/env python3
# -*- coding: utf-8 -*-

from biye.spider.operate import database
import pymysql

au_ids = database.select_all('users', 'au_id')
ids = database.select_all('users', 'id')

allin = database.select_all('pure_users', 'au_id')
for au_id, data_id in zip(au_ids, ids):
    print(data_id[0])
    conn = pymysql.connect(host='localhost',
                          user='******',
                          db='tieba',
                          charset='utf8mb4')
    cursor = conn.cursor()
    cursor.execute('select au_id, nickname, sex, barage, post_number, member from users where id=%d' % data_id[0])
    data = cursor.fetchone()
    conn.close()
    if au_id in allin:
        print('exist')
    else:
        print('not exist!')
        database.insert_sql_users('pure_users', data)