def redo_login(self, login_url): ''' 第三步登录 ''' try: headers = self.__get_headers() headers[ 'Referer'] = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.2)' req = self.pack_request(login_url, headers) urllib2.urlopen(req) # 保存cookie!! self.cj.save(self.cookiefile, True, True) msg = 'login success' logInfo(msg) loginFalg = True except Exception, e: logError(e) s = sys.exc_info() msg = ('redo_login %s happened on line %d' % (s[1], s[2].tb_lineno)) logError(msg) loginFalg = False
def run(): loginValid = False # 模拟登录 username = syscontext.user.get('un', 'wwang1969@126') password = syscontext.user.get('pw', 'w196988') file_path = syscontext.config.get('temp', './temp') httpproxy = syscontext.config.get('httpproxy', 'http://web-proxy.oa.com:8080') # 公司网络,必须走代理 sina = LoginSinaWeibo(soft_path=file_path, proxy=httpproxy) # sina = LoginSinaWeibo(soft_path = file_path) if sina.check_cookie(username, password, file_path): loginValid = True logInfo('sina weibo login sucess!') else: loginValid = False logInfo('sina weibo login failure, check username/password!') if loginValid: timePref = time.strftime("%Y-%m-%d-", time.localtime()) start = timePref + '0' end = timePref + '23' thread1 = SearchWeiboThread(1, start, end, sina) thread1.start()
def saveToDB(self): ''' 结果写入SQLite3 ''' weiboDB.connect() weiboDB.create_tables([Weibo], safe=True) if self.weibolist: for i in range(len(self.weibolist)): weiboData = self.weibolist[i] Weibo.create(mid = weiboData.mid, name = weiboData.name, userurl = weiboData.userurl, \ content = weiboData.content, weibourl = weiboData.weibourl) logInfo('save over~')
def analyze(self): ''' 文本分词,文本情感分析 ''' # print到终端用 # reload(sys) # sys.setdefaultencoding('gb18030') if self.weibolist: for i in range(len(self.weibolist)): weibo = self.weibolist[i] if u'手机QQ浏览器' not in weibo.content: seg_list = jieba.cut(weibo.content) logInfo('|'.join(seg_list))
def redo_login(self, login_url): ''' 第三步登录 ''' try: headers = self.__get_headers() headers['Referer'] = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.2)' req = self.pack_request(login_url, headers) urllib2.urlopen(req) # 保存cookie!! self.cj.save(self.cookiefile, True, True) msg = 'login success' logInfo(msg) loginFalg = True except Exception, e: logError(e) s = sys.exc_info() msg = ('redo_login %s happened on line %d' % (s[1], s[2].tb_lineno)) logError(msg) loginFalg = False
def check_cookie(self, un, pw, softPath): ''' 检查本地cookie文件 ''' loginFalg = True self.cookiefile = os.path.join(softPath, 'cookie.dat') if os.path.exists(self.cookiefile): msg = 'cookie dat exist.' logInfo(msg) if 'Set-Cookie' not in open(self.cookiefile, 'r').read(): msg = 'but does not contain a valid cookie.' logInfo(msg) loginFalg = self.login(un, pw) else: msg = 'cookie dat not exist.' logInfo(msg) loginFalg = self.login(un, pw) if loginFalg: return self.valid_cookie() else: return False
def fetch(self, content): ''' 1. 提取json数据, 关键词 <script>STK && STK\.pageletM && STK\.pageletM\.view\((.*)\).*?</script> ; 2. 提取weibo列表,关键词 <div mid=\\"\d*\\" action-type=\\"feed_list_item\\"> ; 3. ''' # 剔除Emoji # try: # # UCS-4 # highpoints = re.compile(u'([\U00002600-\U000027BF])|([\U0001f300-\U0001f64F])|([\U0001f680-\U0001f6FF])') # except re.error: # # UCS-2 # highpoints = re.compile(u'([\u2600-\u27BF])|([\uD83C][\uDF00-\uDFFF])|([\uD83D][\uDC00-\uDE4F])|([\uD83D][\uDE80-\uDEFF])') # content = highpoints.sub(u'\u25FD', content) pattern = re.compile( r'<script>STK && STK\.pageletM && STK\.pageletM\.view\((.*)\).*?</script>' ) result = pattern.findall(content) if result: # 遍历,提取json数据 for i in range(len(result)): strContent = result[i] if '"pl_weibo_direct"' in strContent: decodejson = json.loads(strContent) htmlDoc = decodejson['html'] # result2 = open('result2.html', 'w') # print >> result2, htmlDoc # print到终端用 # reload(sys) # sys.setdefaultencoding('gb18030') soup = BeautifulSoup(htmlDoc) li = soup.find_all('div', {'action-type': 'feed_list_item'}) for i in range(len(li)): soupi = li[i] weibo = WeiboBean() weibo.mid = soupi['mid'] soupii = soupi.find('div', {'class': 'feed_content wbcon'}) weibo.name = soupii.a['nick-name'] weibo.userurl = soupii.a['href'] weibo.content = soupii.p.get_text() weibo.weibourl = weibo.userurl + '/' + weibomid.midToStr( weibo.mid) weibo.weibourl = weibo.weibourl.replace('/u', '') # logInfo(weibo) self.weibolist.append(weibo) break logInfo('weibolist size = ' + str(len(self.weibolist))) # self.analyze() self.saveToDB()
return False elif '您的帐号存在异常' in html and '解除限制' in html: msg = u'账号被限制.' logError(msg) self.clear_cookiedat(self.cookiefile) return False elif "$CONFIG['islogin'] = '******'" in html: msg = u'登录失败.' logError(msg) self.clear_cookiedat(self.cookiefile) return False elif "$CONFIG['islogin']='******'" in html: msg = 'cookie success.' logInfo(msg) self.cj.save(self.cookiefile, True, True) return True else: msg = u'登录失败.' logError(msg) self.clear_cookiedat(self.cookiefile) return False def get_response_content(self, url, headers = {}, data = None): ''' 获取响应数据 '''
return False elif '您的帐号存在异常' in html and '解除限制' in html: msg = u'账号被限制.' logError(msg) self.clear_cookiedat(self.cookiefile) return False elif "$CONFIG['islogin'] = '******'" in html: msg = u'登录失败.' logError(msg) self.clear_cookiedat(self.cookiefile) return False elif "$CONFIG['islogin']='******'" in html: msg = 'cookie success.' logInfo(msg) self.cj.save(self.cookiefile, True, True) return True else: msg = u'登录失败.' logError(msg) self.clear_cookiedat(self.cookiefile) return False def get_response_content(self, url, headers={}, data=None): ''' 获取响应数据 ''' content = ''
def run(): loginValid = False # 模拟登录 username = syscontext.user.get('un', 'wwang1969@126') password = syscontext.user.get('pw', 'w196988') file_path = syscontext.config.get('temp', './temp') httpproxy = syscontext.config.get('httpproxy', 'http://web-proxy.oa.com:8080') # 公司网络,必须走代理 sina = LoginSinaWeibo(soft_path=file_path, proxy=httpproxy) # sina = LoginSinaWeibo(soft_path = file_path) if sina.check_cookie(username, password, file_path): loginValid = True logInfo('sina weibo login sucess!') else: loginValid = False logInfo('sina weibo login failure, check username/password!') if loginValid: timePref = time.strftime("%Y-%m-%d-", time.localtime()) start = timePref + '0' end = timePref + '23' thread1 = SearchWeiboThread(1, start, end, sina) thread1.start() if __name__ == '__main__': logInfo('hello') run()