def get_weibo_token(self, appkey, appsecret, url, username, password): logging.info("preparing weibo OAuth2:") logging.info("appkey: %s username: %s" % (appkey, username)) self.weibo_client = APIClient(app_key=appkey, app_secret=appsecret, redirect_uri=url) code = WeiboLogin(username, password, appkey, url).get_code() logging.info("code: %s" % code) r = self.weibo_client.request_access_token(code) self.weibo_client.set_access_token(r.access_token, r.expires_in) logging.info("token: %s" % r.access_token)
def __init__(self, search_key, user_name=USER_NAME, passwd=PASSWD): # login to sinaweibo self.driver = webdriver.PhantomJS() self.wl = WeiboLogin(user_name, passwd, self.driver) # the interface for authorization if self.wl.login(): logging.info('login successfully') else: logging.info('login faied') sys.exit(1) self.sk = search_key.strip() return
def __init__(self, name="*****@*****.**", password="******", uid="09424248189", *args, **kwargs): super(UserInfoCrawl, self).__init__(*args, **kwargs) self.uid = uid self.start_urls = ["http://weibo.com"] self.allowed_domains = ["weibo.com", "weibo.cn"] self.url_base = "http://weibo.cn" self.first_flag_info = True # 不爬取自己的微博 self.first_flag_home = True # 处理自己资料的时候和其他账户有所不一 if os.path.exists("weibocookie.json"): with open("weibocookie.json", "r") as f: self.cookie = json.load(f) else: self.weibo = WeiboLogin() self.session = self.weibo.login(name, password) cookiejar = requests.utils.dict_from_cookiejar( self.session.cookies) # Set sina weibo cookie self.cookie = { 'ALF': cookiejar['ALF'], 'sso_info': cookiejar['sso_info'], 'SUB': cookiejar['SUB'], 'SUBP': cookiejar['SUBP'], 'SUE': cookiejar['SUE'], 'SUHB': cookiejar['SUHB'], 'SUP': cookiejar['SUP'], 'SUS': cookiejar['SUS'] } with open("weibocookie.json", "w") as f: json.dump(self.cookie, f)
#coding:utf-8 import urllib2 import post_encode from weibo_login import WeiboLogin import get_weibo if __name__ == '__main__': Login = WeiboLogin('17089368196', 'tttt5555') if Login.login() == True: print "登录成功" #可以根据page来循环以便达到爬取多页的目的 html = urllib2.urlopen("http://s.weibo.com/weibo/%25E5%2591%25A8%25E6%2589%25AC%25E9%259D%2592&page=3").read() #调用解析html内容的函数 get_weibo.write_all_info(html)
import requests import post_encode from weibo_login import WeiboLogin import get_weibo import re from lxml import html from lxml import etree import string import random import time import os import sys reload(sys) sys.setdefaultencoding('utf-8') #if __name__ == '__main__': Login = WeiboLogin('*****@*****.**', 'dan5493') if Login.login() == True: print "登录成功" urls_1, names_1 = get_weibo.get_url() length = len(urls_1) for i in range(length): urls = [] names_2 = re.sub('\\\\n', '', names_1[i]) names_3 = re.sub(' ', '', names_2) new_path = get_weibo.path(names_3.decode('unicode_escape')) url_1 = re.sub('\\\\', '', urls_1[i]) url_2 = 'http://s.weibo.com' + url_1 url_3 = re.sub('Refer=top', 'page=1.html', url_2) #每一话题的第一页 print url_3 sleeptime_rand = random.randint(3, 10) time.sleep(sleeptime_rand)
from weibo import APIClient, APIError from weibo_login import WeiboLogin, WeiboLoginError APP_KEY = "3226611318" APP_SECRET = "4f94b19d1d30c6bce2505e69d22cd62e" CALLBACK_URL = "https://api.weibo.com/oauth2/default.html" print("start login...") client = APIClient(app_key=APP_KEY, app_secret=APP_SECRET, redirect_uri=CALLBACK_URL) code = '' try: code = WeiboLogin("*****@*****.**", "s2013h1cfr", APP_KEY, CALLBACK_URL).get_code() except WeiboLoginError as e: print("Login Fail [%s]: %s" % (e.error_code, e.error)) exit(1) print("code: %s" % code) r = client.request_access_token(code) access_token = r.access_token expires_in = r.expires_in print("token: %s" % access_token) print("expires in %s" % expires_in) client.set_access_token(access_token, expires_in)
# -*- coding:utf-8 -*- import sys from weibo_login import WeiboLogin from spider.SearchSpider import SearchSpider from spider.RCSpider import RCSpider import requests import time import csv from logconfig import LogConfig logger = LogConfig.get_logger() reload(sys) sys.setdefaultencoding("utf-8") # 登录,保存Session s_login = requests.session() w = WeiboLogin() s_login = w.login_un() logger.info("login has finished") time.sleep(1.5) Search_urls = [ # 搜索关键词语:杨洋 傅园慧 可以运行 "http://m.weibo.cn/page/pageJson?containerid=&containerid=100103type%3D1%26q%3D%E6%9D%A8%E6%B4%8B+%E5%82%85%E5%9B%AD%E6%85%A7&type=all&queryVal=%E6%9D%A8%E6%B4%8B+%E5%82%85%E5%9B%AD%E6%85%A7&luicode=20000174&title=%E6%9D%A8%E6%B4%8B+%E5%82%85%E5%9B%AD%E6%85%A7&v_p=11&ext=&fid=100103type%3D1%26q%3D%E6%9D%A8%E6%B4%8B+%E5%82%85%E5%9B%AD%E6%85%A7&uicode=10000011&next_cursor=&page=", # 搜索关键词语:傅园慧 可以运行 "http://m.weibo.cn/page/pageJson?containerid=&containerid=100103type%3D1%26q%3D%E5%82%85%E5%9B%AD%E6%85%A7&type=all&queryVal=%E5%82%85%E5%9B%AD%E6%85%A7&luicode=10000011&lfid=100103type%3D1%26q%3D%E5%82%85%E5%9B%AD%E6%85%A7&title=%E5%82%85%E5%9B%AD%E6%85%A7&v_p=11&ext=&fid=100103type%3D1%26q%3D%E5%82%85%E5%9B%AD%E6%85%A7&uicode=10000011&next_cursor=&page=", # 搜索关键词语:洪荒少女 可以运行 "http://m.weibo.cn/page/pageJson?containerid=&containerid=100103type%3D1%26q%3D%E6%B4%AA%E8%8D%92%E5%B0%91%E5%A5%B3&type=all&queryVal=%E6%B4%AA%E8%8D%92%E5%B0%91%E5%A5%B3&title=%E6%B4%AA%E8%8D%92%E5%B0%91%E5%A5%B3&v_p=11&ext=&fid=100103type%3D1%26q%3D%E6%B4%AA%E8%8D%92%E5%B0%91%E5%A5%B3&uicode=10000011&next_cursor=&page=", # 搜索关键词语:洪荒少女傅园慧 可以运行 "http://m.weibo.cn/page/pageJson?containerid=&containerid=100103type%3D1%26q%3D%E6%B4%AA%E8%8D%92%E5%B0%91%E5%A5%B3%E5%82%85%E5%9B%AD%E6%85%A7&type=all&queryVal=%E6%B4%AA%E8%8D%92%E5%B0%91%E5%A5%B3%E5%82%85%E5%9B%AD%E6%85%A7&luicode=10000011&lfid=100103type%3D1%26q%3D%E6%B4%AA%E8%8D%92%E5%B0%91%E5%A5%B3&title=%E6%B4%AA%E8%8D%92%E5%B0%91%E5%A5%B3%E5%82%85%E5%9B%AD%E6%85%A7&v_p=11&ext=&fid=100103type%3D1%26q%3D%E6%B4%AA%E8%8D%92%E5%B0%91%E5%A5%B3%E5%82%85%E5%9B%AD%E6%85%A7&uicode=10000011&next_cursor=&page=", # 搜索关键词语:傅园慧表情包 可以运行 "http://m.weibo.cn/page/pageJson?containerid=&containerid=100103type%3D1%26q%3D%E5%82%85%E5%9B%AD%E6%85%A7%E8%A1%A8%E6%83%85%E5%8C%85&type=all&queryVal=%E5%82%85%E5%9B%AD%E6%85%A7%E8%A1%A8%E6%83%85%E5%8C%85&luicode=10000011&lfid=100103type%3D1%26q%3D%E6%B4%AA%E8%8D%92%E5%B0%91%E5%A5%B3%E5%82%85%E5%9B%AD%E6%85%A7&title=%E5%82%85%E5%9B%AD%E6%85%A7%E8%A1%A8%E6%83%85%E5%8C%85&v_p=11&ext=&fid=100103type%3D1%26q%3D%E5%82%85%E5%9B%AD%E6%85%A7%E8%A1%A8%E6%83%85%E5%8C%85&uicode=10000011&next_cursor=&page=",
#coding:utf-8 import urllib2 import post_encode import time import os from weibo_login import WeiboLogin import get_weibo if __name__ == '__main__': #Login = WeiboLogin('17089368196', 'tttt5555') Login = WeiboLogin('用户名', '密码') if Login.login() == True: print "登录成功" rnd = long((time.time()) * 1000) #可以根据page来循环以便达到爬取多页的目的 init_url = "http://weibo.com/aj/v6/mblog/info/big?ajwvr=6&id=3917356680052180&max_id=3917762218921722" url = init_url + "&page=1&_rnd=" + str(rnd) html = urllib2.urlopen(url) #调用解析html内容的函数 forward_html = urllib2.urlopen( "http://weibo.com/1336593085/D7hwE0dzC?type=repost#_rnd1449550776472" ).read() #print forward_html origin_uid = get_weibo.get_origin_weibo(forward_html) (uid, origin_uid2_no, uid2_no, time_no, time_no2, total_forward, total_page, current_page) = get_weibo.get_forward(html, forward_html) #循环抓取多页 print total_page uid = [] origin_uid2 = [] uid2 = [] forward_time = []