예제 #1
0
 def get_weibo_token(self, appkey, appsecret, url, username, password):
     logging.info("preparing weibo OAuth2:")
     logging.info("appkey: %s username: %s" % (appkey, username))
     self.weibo_client = APIClient(app_key=appkey,
                                   app_secret=appsecret,
                                   redirect_uri=url)
     code = WeiboLogin(username, password, appkey, url).get_code()
     logging.info("code: %s" % code)
     r = self.weibo_client.request_access_token(code)
     self.weibo_client.set_access_token(r.access_token, r.expires_in)
     logging.info("token: %s" % r.access_token)
예제 #2
0
    def __init__(self, search_key, user_name=USER_NAME, passwd=PASSWD):
        # login to sinaweibo
        self.driver = webdriver.PhantomJS()
        self.wl = WeiboLogin(user_name, passwd,
                             self.driver)  # the interface for authorization

        if self.wl.login():
            logging.info('login successfully')
        else:
            logging.info('login faied')
            sys.exit(1)
        self.sk = search_key.strip()
        return
예제 #3
0
    def __init__(self,
                 name="*****@*****.**",
                 password="******",
                 uid="09424248189",
                 *args,
                 **kwargs):
        super(UserInfoCrawl, self).__init__(*args, **kwargs)
        self.uid = uid
        self.start_urls = ["http://weibo.com"]
        self.allowed_domains = ["weibo.com", "weibo.cn"]
        self.url_base = "http://weibo.cn"
        self.first_flag_info = True  # 不爬取自己的微博
        self.first_flag_home = True  # 处理自己资料的时候和其他账户有所不一

        if os.path.exists("weibocookie.json"):
            with open("weibocookie.json", "r") as f:
                self.cookie = json.load(f)
        else:
            self.weibo = WeiboLogin()
            self.session = self.weibo.login(name, password)
            cookiejar = requests.utils.dict_from_cookiejar(
                self.session.cookies)

            # Set sina weibo cookie
            self.cookie = {
                'ALF': cookiejar['ALF'],
                'sso_info': cookiejar['sso_info'],
                'SUB': cookiejar['SUB'],
                'SUBP': cookiejar['SUBP'],
                'SUE': cookiejar['SUE'],
                'SUHB': cookiejar['SUHB'],
                'SUP': cookiejar['SUP'],
                'SUS': cookiejar['SUS']
            }
            with open("weibocookie.json", "w") as f:
                json.dump(self.cookie, f)
예제 #4
0
#coding:utf-8
import urllib2
import post_encode
from weibo_login import WeiboLogin
import get_weibo
if __name__ == '__main__':
	Login = WeiboLogin('17089368196', 'tttt5555')
	if Login.login() == True:
		print "登录成功"
	#可以根据page来循环以便达到爬取多页的目的
	html = urllib2.urlopen("http://s.weibo.com/weibo/%25E5%2591%25A8%25E6%2589%25AC%25E9%259D%2592&page=3").read()
	#调用解析html内容的函数	
	get_weibo.write_all_info(html)
예제 #5
0
import requests
import post_encode
from weibo_login import WeiboLogin
import get_weibo
import re
from lxml import html
from lxml import etree
import string
import random
import time
import os
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
#if __name__ == '__main__':
Login = WeiboLogin('*****@*****.**', 'dan5493')
if Login.login() == True:
    print "登录成功"
urls_1, names_1 = get_weibo.get_url()
length = len(urls_1)
for i in range(length):
    urls = []
    names_2 = re.sub('\\\\n', '', names_1[i])
    names_3 = re.sub('   ', '', names_2)
    new_path = get_weibo.path(names_3.decode('unicode_escape'))
    url_1 = re.sub('\\\\', '', urls_1[i])
    url_2 = 'http://s.weibo.com' + url_1
    url_3 = re.sub('Refer=top', 'page=1.html', url_2)  #每一话题的第一页
    print url_3
    sleeptime_rand = random.randint(3, 10)
    time.sleep(sleeptime_rand)
예제 #6
0
from weibo import APIClient, APIError
from weibo_login import WeiboLogin, WeiboLoginError

APP_KEY = "3226611318"
APP_SECRET = "4f94b19d1d30c6bce2505e69d22cd62e"
CALLBACK_URL = "https://api.weibo.com/oauth2/default.html"

print("start login...")

client = APIClient(app_key=APP_KEY,
                   app_secret=APP_SECRET,
                   redirect_uri=CALLBACK_URL)

code = ''
try:
    code = WeiboLogin("*****@*****.**", "s2013h1cfr", APP_KEY,
                      CALLBACK_URL).get_code()
except WeiboLoginError as e:
    print("Login Fail [%s]: %s" % (e.error_code, e.error))
    exit(1)

print("code: %s" % code)

r = client.request_access_token(code)

access_token = r.access_token
expires_in = r.expires_in

print("token: %s" % access_token)
print("expires in %s" % expires_in)

client.set_access_token(access_token, expires_in)
예제 #7
0
파일: start.py 프로젝트: Echo-Ws/WBspider
# -*- coding:utf-8 -*-
import sys
from weibo_login import WeiboLogin
from spider.SearchSpider import SearchSpider
from spider.RCSpider import RCSpider
import requests
import time
import csv
from logconfig import LogConfig
logger = LogConfig.get_logger()
reload(sys)
sys.setdefaultencoding("utf-8")

# 登录,保存Session
s_login = requests.session()
w = WeiboLogin()
s_login = w.login_un()
logger.info("login has finished")
time.sleep(1.5)

Search_urls = [
    # 搜索关键词语:杨洋 傅园慧 可以运行
    "http://m.weibo.cn/page/pageJson?containerid=&containerid=100103type%3D1%26q%3D%E6%9D%A8%E6%B4%8B+%E5%82%85%E5%9B%AD%E6%85%A7&type=all&queryVal=%E6%9D%A8%E6%B4%8B+%E5%82%85%E5%9B%AD%E6%85%A7&luicode=20000174&title=%E6%9D%A8%E6%B4%8B+%E5%82%85%E5%9B%AD%E6%85%A7&v_p=11&ext=&fid=100103type%3D1%26q%3D%E6%9D%A8%E6%B4%8B+%E5%82%85%E5%9B%AD%E6%85%A7&uicode=10000011&next_cursor=&page=",
    # 搜索关键词语:傅园慧 可以运行
    "http://m.weibo.cn/page/pageJson?containerid=&containerid=100103type%3D1%26q%3D%E5%82%85%E5%9B%AD%E6%85%A7&type=all&queryVal=%E5%82%85%E5%9B%AD%E6%85%A7&luicode=10000011&lfid=100103type%3D1%26q%3D%E5%82%85%E5%9B%AD%E6%85%A7&title=%E5%82%85%E5%9B%AD%E6%85%A7&v_p=11&ext=&fid=100103type%3D1%26q%3D%E5%82%85%E5%9B%AD%E6%85%A7&uicode=10000011&next_cursor=&page=",
    # 搜索关键词语:洪荒少女 可以运行
    "http://m.weibo.cn/page/pageJson?containerid=&containerid=100103type%3D1%26q%3D%E6%B4%AA%E8%8D%92%E5%B0%91%E5%A5%B3&type=all&queryVal=%E6%B4%AA%E8%8D%92%E5%B0%91%E5%A5%B3&title=%E6%B4%AA%E8%8D%92%E5%B0%91%E5%A5%B3&v_p=11&ext=&fid=100103type%3D1%26q%3D%E6%B4%AA%E8%8D%92%E5%B0%91%E5%A5%B3&uicode=10000011&next_cursor=&page=",
    # 搜索关键词语:洪荒少女傅园慧 可以运行
    "http://m.weibo.cn/page/pageJson?containerid=&containerid=100103type%3D1%26q%3D%E6%B4%AA%E8%8D%92%E5%B0%91%E5%A5%B3%E5%82%85%E5%9B%AD%E6%85%A7&type=all&queryVal=%E6%B4%AA%E8%8D%92%E5%B0%91%E5%A5%B3%E5%82%85%E5%9B%AD%E6%85%A7&luicode=10000011&lfid=100103type%3D1%26q%3D%E6%B4%AA%E8%8D%92%E5%B0%91%E5%A5%B3&title=%E6%B4%AA%E8%8D%92%E5%B0%91%E5%A5%B3%E5%82%85%E5%9B%AD%E6%85%A7&v_p=11&ext=&fid=100103type%3D1%26q%3D%E6%B4%AA%E8%8D%92%E5%B0%91%E5%A5%B3%E5%82%85%E5%9B%AD%E6%85%A7&uicode=10000011&next_cursor=&page=",
    # 搜索关键词语:傅园慧表情包 可以运行
    "http://m.weibo.cn/page/pageJson?containerid=&containerid=100103type%3D1%26q%3D%E5%82%85%E5%9B%AD%E6%85%A7%E8%A1%A8%E6%83%85%E5%8C%85&type=all&queryVal=%E5%82%85%E5%9B%AD%E6%85%A7%E8%A1%A8%E6%83%85%E5%8C%85&luicode=10000011&lfid=100103type%3D1%26q%3D%E6%B4%AA%E8%8D%92%E5%B0%91%E5%A5%B3%E5%82%85%E5%9B%AD%E6%85%A7&title=%E5%82%85%E5%9B%AD%E6%85%A7%E8%A1%A8%E6%83%85%E5%8C%85&v_p=11&ext=&fid=100103type%3D1%26q%3D%E5%82%85%E5%9B%AD%E6%85%A7%E8%A1%A8%E6%83%85%E5%8C%85&uicode=10000011&next_cursor=&page=",
#coding:utf-8
import urllib2
import post_encode
import time
import os
from weibo_login import WeiboLogin
import get_weibo
if __name__ == '__main__':
    #Login = WeiboLogin('17089368196', 'tttt5555')
    Login = WeiboLogin('用户名', '密码')
    if Login.login() == True:
        print "登录成功"
    rnd = long((time.time()) * 1000)
    #可以根据page来循环以便达到爬取多页的目的
    init_url = "http://weibo.com/aj/v6/mblog/info/big?ajwvr=6&id=3917356680052180&max_id=3917762218921722"
    url = init_url + "&page=1&_rnd=" + str(rnd)
    html = urllib2.urlopen(url)
    #调用解析html内容的函数
    forward_html = urllib2.urlopen(
        "http://weibo.com/1336593085/D7hwE0dzC?type=repost#_rnd1449550776472"
    ).read()
    #print forward_html
    origin_uid = get_weibo.get_origin_weibo(forward_html)
    (uid, origin_uid2_no, uid2_no, time_no, time_no2, total_forward,
     total_page, current_page) = get_weibo.get_forward(html, forward_html)
    #循环抓取多页
    print total_page
    uid = []
    origin_uid2 = []
    uid2 = []
    forward_time = []