def user_scrape(D, token, stime=""): FIELDS = [u"统计时间", u"新关注人数", u"取消关注人数", u"净增关注人数", u"累积关注人数", u"公众号搜索", u"二维码扫描", u"图文页右上角菜单", u"名片分享", u"其他"] writer = UnicodeWriter("user_info.csv", "gbk") writer.writerow(FIELDS) bag = {} url_total = "https://mp.weixin.qq.com/misc/useranalysis?&token=%s&lang=zh_CN" html = D.get(url_total % token) if not stime: m = common_re(r'\{\s*(date:\s"[^\}]+)\}\s*\]\s*\}\s*\]', html) else: m = common_re(r'\{\s*(date:\s"%s"[^\}]+)\}' % stime, html) bag[u"统计时间"] = common_re(r'date:\s"([^"]+)"', m) if m else "" bag[u"新关注人数"] = common_re(r"new_user:\s([^\s]+)\s", m) if m else "" bag[u"取消关注人数"] = common_re(r"cancel_user:\s([^,]+),", m) if m else "" bag[u"净增关注人数"] = common_re(r"netgain_user:\s([^,]+),", m) if m else "" bag[u"累积关注人数"] = common_re(r"cumulate_user:\s([^,]+),", m) if m else "" assert bag[u"统计时间"] a = ["1", "30", "43", "17", "0"] b = [u"公众号搜索", u"二维码扫描", u"图文页右上角菜单", u"名片分享", u"其他"] for num, i in enumerate(a): gain_url = ( "https://mp.weixin.qq.com/misc/useranalysis?&begin_date=%s&end_date=%s&source=%s&token=%s&lang=zh_CN&f=json&ajax=1" % (bag[u"统计时间"], bag[u"统计时间"], i, token) ) gain_html = D.get(gain_url) bag[b[num]] = common_re(r'"new_user":([^,]+),', gain_html) writer.writerow(bag.get(field) for field in FIELDS) return bag[u"统计时间"]
def get_last_trade_date(): url = 'http://data.eastmoney.com/stock/lhb.html' html = download().get(url) m = common_re(r'"readonly"\svalue\="([^"]+)"', html) if m: return m else: print 'Error: Can not get last trade date!' return ''
def main(): D = download(is_cookie=True) if not login(D, userid, passwd, dynamic_passwd): # login fail, return return hr_url = 'https://hr.guosen.com.cn/sso/SsoHrssServlet' html = D.get(hr_url) login_data = {} login_data['IASID'] = common_re(r'"IASID"[^>]+value="([^"]+)"', html) login_data['TimeStamp'] = common_re(r'"TimeStamp"[^>]+value="([^"]+)"', html) login_data['Authenticator'] = common_re(r'"Authenticator"[^>]+value="([^"]+)"', html) login_data['ReturnURL'] = 'https://hr.guosen.com.cn/sso/SsoHrssServlet' html = D.post('https://sso.guosen.com.cn/login.aspx', login_data) login_data = {} login_data['IASID'] = common_re(r'"IASID"[^>]+value="([^"]+)"', html) login_data['Result'] = '0' login_data['TimeStamp'] = common_re(r'"TimeStamp"[^>]+value="([^"]+)"', html) login_data['UserAccount'] = userid login_data['ErrorDescription'] = '' login_data['Authenticator'] = common_re(r'"Authenticator"[^>]+value="([^"]+)"', html) login_data['IASUserAccount'] = userid html = D.post(hr_url, login_data) end_url = 'https://hr.guosen.com.cn/hrss/ta/Clockin.jsp?_funcode=E0020902' D.get(end_url) post_url = 'https://hr.guosen.com.cn/hrss/dorado/smartweb2.RPC.d?__rpc=true' search_data = {} search_data['__type'] = 'loadData' search_data['__viewInstanceId'] = 'nc.bs.hrss.ta.Clockin~nc.bs.hrss.ta.ClockinViewModel' search_data['__xml'] = get_post_xml(month, year) html = D.post(post_url, search_data) if 'result succeed="true"' in html: print 'Hello world!' else: print html
def login(D, username, passwd): url = 'https://mp.weixin.qq.com/cgi-bin/login' post_data = {} post_data['username'] = username m = hashlib.md5() m.update(passwd) post_data['pwd'] = m.hexdigest() post_data['imgcode'] = '' post_data['f'] = 'json' m = D.post(url, data=post_data) token = common_re(r'token=([^"]+)"', m) url = 'https://mp.weixin.qq.com/cgi-bin/home?t=home/index&lang=zh_CN&token=%s' % token html = D.get(url) if 'success">已认证' in html: print 'Login succefully!' return token else: print 'Login fail.' return False
def login(D, userid, passwd, dynamic_passwd): url = 'https://sso.guosen.com.cn/Login.aspx' html = D.get(url) post_data = {} post_data['__LASTFOCUS'] = '' post_data['__VIEWSTATE'] = common_re(r'id="__VIEWSTATE"\s*value="([^"]+)"', html) post_data['__EVENTTARGET'] = common_re( r'id="__EVENTTARGET"\s*value="([^"]+)"', html) post_data['__EVENTARGUMENT'] = common_re( r'id="__EVENTARGUMENT"\s*value="([^"]+)"', html) post_data['__EVENTVALIDATION'] = common_re( r'id="__EVENTVALIDATION"\s*value="([^"]+)"', html) post_data['hidReturnUrl'] = 'Default.aspx' post_data['hidIASID'] = '000' post_data['txtUserName'] = userid post_data['chkRememberAccount'] = 'on' post_data['txtPassword'] = passwd post_data['chkEnableDynamicCode'] = 'on' post_data['txtDynamicPassword'] = dynamic_passwd post_data['hidSms'] = '0' post_data['txtValidateCode'] = '' post_data['btnLogin.x'] = '0' post_data['btnLogin.y'] = '0' html = D.post(url, post_data) sso_url = 'https://sso.guosen.com.cn/Default.aspx' login_data = {} login_data['IASID'] = common_re(r'"IASID"[^>]+value="([^"]+)"', html) login_data['Result'] = '0' login_data['TimeStamp'] = common_re(r'"TimeStamp"[^>]+value="([^"]+)"', html) login_data['UserAccount'] = userid login_data['ErrorDescription'] = '' login_data['Authenticator'] = common_re( r'"Authenticator"[^>]+value="([^"]+)"', html) login_data['IASUserAccount'] = userid html = D.post(sso_url, login_data) if '您好!欢迎访问单点系统' in html and '<span id="lblCurrentUser"></span>' not in html: print 'Login sucessfully!' return True else: print 'Login fail...' return False
def login(D, userid, passwd, dynamic_passwd): url = 'https://sso.guosen.com.cn/Login.aspx' html = D.get(url) post_data = {} post_data['__LASTFOCUS'] = '' post_data['__VIEWSTATE'] = common_re(r'id="__VIEWSTATE"\s*value="([^"]+)"', html) post_data['__EVENTTARGET'] = common_re(r'id="__EVENTTARGET"\s*value="([^"]+)"', html) post_data['__EVENTARGUMENT'] = common_re(r'id="__EVENTARGUMENT"\s*value="([^"]+)"', html) post_data['__EVENTVALIDATION'] = common_re(r'id="__EVENTVALIDATION"\s*value="([^"]+)"', html) post_data['hidReturnUrl'] = 'Default.aspx' post_data['hidIASID'] = '000' post_data['txtUserName'] = userid post_data['chkRememberAccount'] = 'on' post_data['txtPassword'] = passwd post_data['chkEnableDynamicCode'] = 'on' post_data['txtDynamicPassword'] = dynamic_passwd post_data['hidSms'] = '0' post_data['txtValidateCode'] = '' post_data['btnLogin.x'] = '0' post_data['btnLogin.y'] = '0' html = D.post(url, post_data) sso_url = 'https://sso.guosen.com.cn/Default.aspx' login_data = {} login_data['IASID'] = common_re(r'"IASID"[^>]+value="([^"]+)"', html) login_data['Result'] = '0' login_data['TimeStamp'] = common_re(r'"TimeStamp"[^>]+value="([^"]+)"', html) login_data['UserAccount'] = userid login_data['ErrorDescription'] = '' login_data['Authenticator'] = common_re(r'"Authenticator"[^>]+value="([^"]+)"', html) login_data['IASUserAccount'] = userid html = D.post(sso_url, login_data) if '您好!欢迎访问单点系统' in html and '<span id="lblCurrentUser"></span>' not in html: print 'Login sucessfully!' return True else: print 'Login fail...' return False