Exemplo n.º 1
0
def get_wb_by_mid(mid):
    """
    :param mid: 微博id
    :return: 
    """
    return db_session.query(WeiboData).filter(
        WeiboData.weibo_id == mid).first()
Exemplo n.º 2
0
def get_pic_by_url(url, url_hash):
    r = db_session.query(WeiboPic).filter(
        WeiboPic.url_hash == url_hash).first()
    if r:
        if r.pic_url == url:
            return True
    return False
Exemplo n.º 3
0
def get_repost_by_rid(rid):
    """
    根据转发微博id获取该转发微博信息
    :param rid: 转发微博id
    :return: 
    """
    return db_session.query(WeiboRepost).filter(WeiboRepost.weibo_id == rid).first()
Exemplo n.º 4
0
def freeze_account(name, rs):
    """
    :param name: 账户名
    :param rs: 0表示被封,1表示正常,2表示账号密码不匹配
    :return: 
    """
    account = db_session.query(LoginInfo).filter(LoginInfo.name == name).first()
    account.enable = rs
    db_session.commit()
Exemplo n.º 5
0
def freeze_account(name, rs):
    """
    :param name: login account
    :param rs: 0 stands for banned,1 stands for normal,2 stands for name or password is invalid
    :return: 
    """
    account = db_session.query(LoginInfo).filter(LoginInfo.name == name).first()
    account.enable = rs
    db_session.commit()
Exemplo n.º 6
0
def set_seed_crawled(uid, result):
    """
    :param uid: user id that is crawled
    :param result: crawling result, 1 stands for succeed, 2 stands for fail
    :return: None
    """
    seed = db_session.query(SeedIds).filter(SeedIds.uid == uid).first()

    if seed and seed.is_crawled == 0:
        seed.is_crawled = result
    else:
        seed = SeedIds(uid=uid, is_crawled=result)
        db_session.add(seed)
    db_session.commit()
Exemplo n.º 7
0
def set_seed_crawled(uid, result):
    """
    :param uid: user id that is crawled
    :param result: crawling result
    :return: None
    """
    seed = db_session.query(SeedIds).filter(SeedIds.uid == uid).first()
    if seed:
        if seed.is_crawled == 0:
            seed.is_crawled = result
    else:
        seed = SeedIds(uid=uid, is_crawled=result)
        db_session.add(seed)
    db_session.commit()
Exemplo n.º 8
0
def set_seed_crawled(uid, result):
    """
    该表适用于用户抓取相关逻辑
    :param uid: 被抓取用户id
    :param result: 抓取结果
    :return: None
    """
    seed = db_session.query(SeedIds).filter(SeedIds.uid == uid).first()
    if seed:
        seed.is_crawled = result
    else:
        seed = SeedIds(uid=uid, is_crawled=result)
        db_session.add(seed)
    db_session.commit()
Exemplo n.º 9
0
def set_crawled(id, result):
    """
    该表适用于用户抓取相关逻辑
    :param uid: 被抓取用户id
    :param result: 抓取结果
    :return: None
    """
    user = db_session.query(User).filter(User.id == id).first()
    if user:
        if user.is_crawled == 0:
            user.is_crawled = result
    else:
        user = User(id=id, is_crawled=result)
        db_session.add(user)
    db_session.commit()
Exemplo n.º 10
0
def get_ids_by_home_flag_random(status = 0, num = 100):
    return db_session.query(SeedIds).filter(SeedIds.home_crawled == status).order_by(func.random()).limit(num).all()
Exemplo n.º 11
0
def get_search_keywords():
    return db_session.query(KeyWords).filter(text('enable=1')).all()
Exemplo n.º 12
0
def get_login_info():
    return db_session.query(LoginInfo.name, LoginInfo.password, LoginInfo.enable).\
        filter(text('enable=1')).all()
Exemplo n.º 13
0
def get_user_by_uid(uid):
    return db_session.query(User).filter(User.uid == uid).first()
Exemplo n.º 14
0
def get_by_alias(alias):
    return db_session.query(User).filter(User.alias == alias).first()
Exemplo n.º 15
0
def get_home_ids():
    """
    Get all user id who's home pages need to be crawled
    :return: user ids
    """
    return db_session.query(SeedIds.uid).filter(text('home_crawled=0')).all()
Exemplo n.º 16
0
def get_weibo_repost_not_crawled():
    return db_session.query(WeiboData.weibo_id, WeiboData.uid).filter(text('repost_crawled=0')).all()
Exemplo n.º 17
0
def get_weibo_comment_not_crawled():
    return db_session.query(WeiboData.weibo_id).filter(text('comment_crawled=0')).all()
Exemplo n.º 18
0
def get_wb_by_mid(mid):
    return db_session.query(WeiboData).filter(WeiboData.weibo_id == mid).first()
Exemplo n.º 19
0
def get_seed_ids():
    return db_session.query(SeedIds.uid).filter(text('is_crawled=0')).all()
Exemplo n.º 20
0
def get_user_by_uid(uid):
    return db_session.query(User).filter(User.uid == uid).first()
Exemplo n.º 21
0
def get_zhihu_answer_comment_not_crawled():
    return db_session.query(ZhihuComment.comment_id).filter(
        text('comment_crawled=0')).order_by(desc(ZhihuComment.id)).all()
Exemplo n.º 22
0
def get_comment_by_id(cid):
    return db_session.query(ZhihuComment).filter(
        ZhihuComment.comment_id == cid).first()
Exemplo n.º 23
0
def get_seed():
    """
    Get all user id to be crawled
    :return: user ids
    """
    return db_session.query(SeedIds).filter(text('status=0')).all()
Exemplo n.º 24
0
def get_repost_by_rid(rid):
    return db_session.query(WeiboRepost).filter(WeiboRepost.weibo_id == rid).first()
Exemplo n.º 25
0
def get_by_id(id):
    return db_session.query(User).filter(User.id == id).first()
Exemplo n.º 26
0
def freeze_account(name):
    account = db_session.query(LoginInfo).filter(
        LoginInfo.name == name).first()
    account.enable = 0
    db_session.commit()
Exemplo n.º 27
0
def get_user_by_nickname(nickname):
    return db_session.query(User).filter(User.nickname == nickname).first()
Exemplo n.º 28
0
def get_seed_ids():
    """
    Get all user id to be crawled
    :return: user ids
    """
    return db_session.query(SeedIds.uid).filter(text('is_crawled=0')).all()
Exemplo n.º 29
0
def get_is_monitored():
    """
    获取所有需要监控的用户
    :return:
    """
    return db_session.query(User).filter(text('is_monitored=1')).all()
Exemplo n.º 30
0
def get_seed_by_id(uid):
    return db_session.query(SeedIds).filter(SeedIds.uid == uid).first()
Exemplo n.º 31
0
def get_home_ids():
    """
    Get all user id who's home pages need to be crawled
    :return: user ids
    """
    return db_session.query(SeedIds.uid).filter(text('home_crawled=0')).all()
Exemplo n.º 32
0
def get_seed_by_id(uid):
    return db_session.query(SeedIds).filter(SeedIds.uid == uid).first()
Exemplo n.º 33
0
def set_useless_keyword(keyword):
    search_word = db_session.query(KeyWords).filter(
        KeyWords.keyword == keyword).first()
    search_word.enable = 0
    db_session.commit()
Exemplo n.º 34
0
def get_seed_ids():
    """
    获取所有个人信息需要被抓取的用户id
    :return: 
    """
    return db_session.query(SeedIds.uid).filter(text('is_crawled=0')).all()
Exemplo n.º 35
0
def get_home_ids(status = 0, num = 100):
    """
    Get all user id who's home pages need to be crawled
    :return: user ids
    """
    return db_session.query(SeedIds).filter(SeedIds.home_crawled == status).limit(num).all()
Exemplo n.º 36
0
def get_by_url(url):
    """
    :param url: url
    :return: 
    """
    return db_session.query(WeChatData).filter(WeChatData.url == url).first()
Exemplo n.º 37
0
def get_weibo_comment_not_crawled():
    return db_session.query(WeiboData.weibo_id).filter(
        text('comment_crawled=0')).all()
Exemplo n.º 38
0
def get_comment_by_id(cid):
    return db_session.query(WeiboComment).filter(WeiboComment.comment_id == cid).first()
Exemplo n.º 39
0
def get_seed_ids():
    """
    Get all user id to be crawled
    :return: user ids
    """
    return db_session.query(SeedIds.uid).filter(text('is_crawled=0')).all()
Exemplo n.º 40
0
def get_home_ids():
    """
    获取所有主页需要被抓取的用户id
    :return: 
    """
    return db_session.query(SeedIds.uid).filter(text('home_crawled=0')).all()