Пример #1
0
def main():
    choices = ['statuses_show_batch_biz','get_comment_by_since_id','get_attitude_by_since_id','get_repost_by_since_id']
    try:
        parser = argparse.ArgumentParser()
        parser.description = 'Command line interface of Weibo'
        parser.add_argument("-f", "--function", required=True, choices=choices,help="Choose one function")
        parser.add_argument("-m","--mid",help="Weibo post ID")
        parser.add_argument("-i","--ids",help="Weibo post IDs")
        parser.add_argument("--others",
                            help="Any optional argument not mentioned above, example: --others=key1//value1//key2//value2")
        args = parser.parse_args()
        opt = vars(args)

        if opt.get('others'):
            others_str = opt.get('others').split('//')
            others_dict = dict(zip(others_str[0::2], others_str[1::2]))
            del opt['others']
            opt = {**opt, **others_dict}

        # Remove params whose value are None
        none_key = [key for key in opt.keys() if opt[key] is None]
        for key in none_key:
            del opt[key]
        func=opt.pop('function')

        weibo = SocialWeiboAPI()
        getattr(weibo, func)(**opt)


    except Exception as e:
        print(e)
Пример #2
0
def main(day_back):
    # Get the last 2000 comments for each post at most
    try:
        weibo = SocialWeiboAPI()
        session = weibo.createSession()

        startTime = weibo.getStrTime(day_back)
        # Get post IDs from search limited for attitude
        pids = session.query(WeiboSearchLimitedLastAttitude.pid,WeiboSearchLimitedLastAttitude.since_id)\
            .filter(WeiboSearchLimitedLastAttitude.created_at>=startTime)\
            .all()
        attitudePostList = [{'id': _[0], 'since_id': _[1]} for _ in pids]

        # Get post IDs from post daily for attitude
        pids = session.query(WeiboKolLastAttitude.pid,WeiboKolLastAttitude.since_id)\
            .filter(WeiboKolLastAttitude.created_at>=startTime)\
            .all()
        attitudePostListFromKOL = [{
            'id': _[0],
            'since_id': _[1]
        } for _ in pids]

        # Get post IDs from mention for attitude
        pids = session.query(WeiboMentionLastAttitude.pid, WeiboMentionLastAttitude.since_id) \
            .filter(WeiboMentionLastAttitude.created_at >= startTime) \
            .all()
        attitudePostListFromMention = [{
            'id': _[0],
            'since_id': _[1]
        } for _ in pids]

        # Merge daily, kol, mention pid
        attitudePostList += attitudePostListFromKOL
        attitudePostList += attitudePostListFromMention
        if attitudePostList:
            df = pd.DataFrame(attitudePostList)
            df = df.sort_values(by='since_id', ascending=False)
            df = df.drop_duplicates(subset='id', keep='first')
            attitudePostList_dedup = df.to_dict('records')
            weibo.doParallel('attitude', attitudePostList_dedup)
    finally:
        session.close()
Пример #3
0
from SocialAPI.SocialAPI.WeiboAPI import SocialWeiboAPI
import datetime
from SocialAPI.Model import MasterWeiboSearch

if __name__ == '__main__':

    weibo = SocialWeiboAPI()
    session = weibo.createSession()
    brands = session.query(MasterWeiboSearch.id,MasterWeiboSearch.search_query)\
        .filter(MasterWeiboSearch.status==1)\
        .all()
    brand_queries = brands  #[brand[0] for brand in brands]
    session.close()

    # Get last hour time range
    # for x in range(30):
    start_date_time = datetime.datetime.now() + datetime.timedelta(days=-3)
    # end_date_time = datetime.datetime.now() + datetime.timedelta(hours=-72)
    # start_time = start_date_time.strftime("%Y-%m-%d 00:00:00")
    for q in brand_queries:
        for i in range(4):
            temptime = start_date_time.replace(hour=i * 6, minute=0, second=0)
            start_time = temptime.strftime('%Y-%m-%d %H:%M:%S')

            temptime = start_date_time.replace(hour=(i * 6) + 5,
                                               minute=59,
                                               second=59)
            end_time = temptime.strftime('%Y-%m-%d %H:%M:%S')

            weibo.search_statuses_limited(start_time,
                                          end_time,
Пример #4
0
from SocialAPI.SocialAPI.WeiboAPI import SocialWeiboAPI
from SocialAPI.Helper import Helper
import asyncio
import uvloop
from SocialAPI.Model import MasterUid

if __name__ == '__main__':
    rootPath = Helper().getRootPath()

    weibo = SocialWeiboAPI()

    session = weibo.createSession()
    uids = session.query(MasterUid.uid)\
        .filter(MasterUid.crawl_master==1,MasterUid.crawl_user_growth==1)\
        .all()
    uidList = [str(uid[0]) for uid in uids]

    uidGroup = [
        ','.join(uidList[i:i + 100]) for i in range(0, len(uidList), 100)
    ]

    asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
    loop = asyncio.new_event_loop()
    tasks = [
        asyncio.ensure_future(weibo.get_users_count_batch(uids), loop=loop)
        for uids in uidGroup
    ]
    loop.run_until_complete(asyncio.wait(tasks))
    result = [task.result() for task in tasks]

    loop.close()
Пример #5
0
from SocialAPI.SocialAPI.WeiboAPI import SocialWeiboAPI
import asyncio
import uvloop
from datetime import datetime
from SocialAPI.Model import MasterUid


if __name__ == '__main__':

    weibo = SocialWeiboAPI()
    session = weibo.createSession()
    uids = session.query(MasterUid.uid).filter(MasterUid.crawl_master==1).all()
    uidList = [str(uid[0]) for uid in uids]
    #uidList = ['1828260462']
    uidGroup = [','.join(uidList[i:i + 50]) for i in range(0, len(uidList), 50)]


    asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
    loop = asyncio.new_event_loop()
    tasks = [asyncio.ensure_future(weibo.get_user_show_batch_other(uids), loop=loop) for uids in uidGroup]
    loop.run_until_complete(asyncio.wait(tasks))
    result = [task.result() for task in tasks]

    loop.close()
    session.close()





Пример #6
0
from SocialAPI.SocialAPI.WeiboAPI import SocialWeiboAPI
from SocialAPI.Model import Kol, WeiboLastMentionedPost
import pandas as pd
import numpy as np
import threading

if __name__ == '__main__':
    weibo = SocialWeiboAPI()
    session = weibo.createSession()
    uid_list = session.query(WeiboLastMentionedPost.uid,
                             WeiboLastMentionedPost.since_id).all()
    session.close()
    """
    client = weibo.client
    db = client.weibo
    mention_table = db.weibo_post_mention

    # get the latest mention post as the starting point
    pipeline = [
        {'$group': {'_id': '$uid_mentioned', 'since_id': {'$max': '$id'}, 'count': {'$sum': 1}}}
    ]
    mention_list = list(mention_table.aggregate(pipeline))
    if mention_list:
        df_mention_list = pd.DataFrame(mention_list)
        df_mention_list['_id'] = pd.to_numeric(df_mention_list['_id'])
        df = df_uid_list.merge(df_mention_list,left_on='uid',right_on='_id',how='left')
        df['since_id'] = df['since_id'].replace(np.nan, 0)

    else:
        df = df_uid_list
        df['since_id'] = 0
Пример #7
0
import pandas as pd
from SocialAPI.SocialAPI.WeiboAPI import SocialWeiboAPI
from SocialAPI.Helper import Helper
from SocialAPI.Crawler import WeiBoCrawler
from SocialAPI.Model import Kol
import time
from datetime import datetime
from pymongo import MongoClient

if __name__ == '__main__':
    myHelper = Helper()

    weibo = SocialWeiboAPI()
    session = weibo.createSession()
    client = weibo.client
    db = client.weibo
    crawlTable = db.weibo_post_crawl
    postTable = db.weibo_user_post

    crawlDict = {}
    startTime = weibo.getStrTime(-30)
    startTimeStamp = weibo.getTimeStamp(startTime)
    userDict = {}
    userInfo = session.query(Kol.uid,Kol.username,Kol.pw).filter(Kol.status == 1, Kol.crawl_status==1).all()

    #userInfo = session.query(Kol.uid,Kol.username,Kol.pw).filter(Kol.uid==2036201132).all()

    for user in userInfo:
        userDict[user[0]] = (user[1],user[2])

    for uid in userDict.keys():
Пример #8
0
from SocialAPI.SocialAPI.WeiboAPI import SocialWeiboAPI
from SocialAPI.Model import WeiboSearchLimitedLastRepost, WeiboKolLastRepost

if __name__ == '__main__':

    try:
        weibo = SocialWeiboAPI()
        session = weibo.createSession()

        startTime = weibo.getStrTime(-7)
        startTimeStamp = weibo.getTimeStamp(startTime)
        # Get post IDs from search limited for repost
        pids = session.query(WeiboSearchLimitedLastRepost.pid, WeiboSearchLimitedLastRepost.since_id) \
            .filter(WeiboSearchLimitedLastRepost.created_at >= startTime) \
            .all()

        repostPostList = [{'id': _[0], 'since_id': _[1]} for _ in pids]

        # Get post IDs from post daily for repost
        pids = session.query(WeiboKolLastRepost.pid, WeiboKolLastRepost.since_id) \
            .filter(WeiboKolLastRepost.created_at >= startTime) \
            .all()

        repostPostListFromKOL = [{'id': _[0], 'since_id': _[1]} for _ in pids]
        repostPostList += repostPostListFromKOL

        weibo.doParallel('repost', repostPostList)
    finally:
        session.close()
Пример #9
0
from SocialAPI.SocialAPI.WeiboAPI import SocialWeiboAPI
from pyspark.sql import SQLContext, SparkSession
from pyspark.sql.types import *
import logging
import urllib

if __name__ == '__main__':

    try:
        logging.basicConfig(format='%(asctime)s %(levelname)s  %(message)s',
                            level=logging.INFO,
                            datefmt='%Y/%m/%d %H:%M:%S')
        weibo = SocialWeiboAPI()
        mongod_db = 'weibo'
        mongo_table = 'weibo_search_statuses_history_result_merge'
        mongo_username = urllib.parse.quote_plus(
            weibo.cfp.get('mongodb_weibo', 'user'))
        mongo_pw = urllib.parse.quote_plus(
            weibo.cfp.get('mongodb_weibo', 'pwd'))
        host = '127.0.0.1'
        mysql_username = weibo.cfp.get('mysql', 'user')
        mysql_pw = weibo.cfp.get('mysql', 'password')
        mysql_db = weibo.cfp.get('mysql', 'db')
        mysql_table = 'weibo_search_history'

        mongo_uri = "mongodb://{}:{}@{}/{}.{}".format(mongo_username, mongo_pw,
                                                      host, mongod_db,
                                                      mongo_table)
        mysql_uri = "jdbc:mysql://{}:3306/{}?useUnicode=true&characterEncoding=utf8".format(
            host, mysql_db)