def get_student_info(id,name,idcard): id = str(id) if int(id[2])>3: base_url = 'http://xxjw.hnust.cn/xxjw' else : base_url = 'http://kdjw.hnust.cn/kdjw' # if int(id[2])>3: base_url = 'http://localhost:2167/xxjw' # else : base_url = 'http://localhost:2169/kdjw' opener = requests.Session() url_img = base_url+'/verifycode.servlet' try: opener.get(base_url,timeout=1) except: return {'error':True,'msg':"7.服务器网络故障,可能查询的人太多了,亲再等等"} for i in range(5): try: img = opener.get(url_img,timeout=1).content except : continue code = ocr.recognize(img) url_submit = base_url+'/xscjcx_check.jsp' payload = { 'xsxm':name, 'xssfzh':idcard, 'yzm':code, } try: res = opener.post(url_submit,payload,timeout=1).text except : continue if 'yzm_guoq' in res : return {'error':True,'msg':"1.验证码过期"} if 'yzm_cuowu' in res : continue if 'notQueryXs' in res : return {'error':True,'msg':"3.未找到您输入信息的学生"} if 'systemError' in res : return {'error':True,'msg':"4.您访问的功能出现错误"} url_scope = base_url+'/xscjcx.jsp?yzbh=' + res[-36:-4] try: html = opener.get(url_scope,timeout=1).text except : continue if not re.search('\d{10}',html) : return {'error':True,'msg':"5.登陆成功了,但是似乎没有你的信息哦"} return htmlToJson.htmlToJson(html) return {'error':True,'msg':"8.服务器网络故障,可能查询的人太多了,亲再等等"}
def parse(self,response): url = response.url html = response.body_as_unicode() try: js = htmlToJson.htmlToJson(html) # import ipdb;ipdb.set_trace() MYSCOPE_DB.save(js) except : pass
def parse(self, response): url = response.url html = response.body_as_unicode() try: js = htmlToJson.htmlToJson(html) # import ipdb;ipdb.set_trace() MYSCOPE_DB.save(js) except: pass
def get_student_info(id, name, idcard): id = str(id) if int(id[2]) > 3: base_url = 'http://xxjw.hnust.cn/xxjw' else: base_url = 'http://kdjw.hnust.cn/kdjw' # if int(id[2])>3: base_url = 'http://localhost:2167/xxjw' # else : base_url = 'http://localhost:2169/kdjw' opener = requests.Session() url_img = base_url + '/verifycode.servlet' try: opener.get(base_url, timeout=1) except: return {'error': True, 'msg': "7.服务器网络故障,可能查询的人太多了,亲再等等"} for i in range(5): try: img = opener.get(url_img, timeout=1).content except: continue code = ocr.recognize(img) url_submit = base_url + '/xscjcx_check.jsp' payload = { 'xsxm': name, 'xssfzh': idcard, 'yzm': code, } try: res = opener.post(url_submit, payload, timeout=1).text except: continue if 'yzm_guoq' in res: return {'error': True, 'msg': "1.验证码过期"} if 'yzm_cuowu' in res: continue if 'notQueryXs' in res: return {'error': True, 'msg': "3.未找到您输入信息的学生"} if 'systemError' in res: return {'error': True, 'msg': "4.您访问的功能出现错误"} url_scope = base_url + '/xscjcx.jsp?yzbh=' + res[-36:-4] try: html = opener.get(url_scope, timeout=1).text except: continue if not re.search('\d{10}', html): return {'error': True, 'msg': "5.登陆成功了,但是似乎没有你的信息哦"} return htmlToJson.htmlToJson(html) return {'error': True, 'msg': "8.服务器网络故障,可能查询的人太多了,亲再等等"}
if not dic : return "{'status':false}" # if (datetime.datetime.now()-dic['datetime']).total_seconds() > 24*3600 : return getScoreByWeb(id) dic['from'] = 'mongodb' dic['datetime'] = dic['datetime'].isoformat() return json.dumps(dic,ensure_ascii=False,indent=None,encoding='UTF8') def getScoreByWeb(id): id=str(id) # if id[2]=='5': url = 'http://211.67.208.67/xxjw/xscjcx.jsp?yzbh=' # else: url = 'http://211.67.208.69/kdjw/xscjcx.jsp?yzbh=' if id[2]=='5' or id[2]=='6': url = 'http://127.0.0.1:2222/xxjw/xscjcx.jsp?yzbh=' else: url = 'http://127.0.0.1:2222/kdjw/xscjcx.jsp?yzbh=' try: html=urllib2.urlopen(url+str(id),timeout=5).read() if len(html)<100 : return u"{'error':true,'msg':'内网服务器脱机'}" except Exception,e: return u"{'error':true,'msg':'服务器加载数据失败'}" dic = htmlToJson.htmlToJson(html) MYSCOPE_DB.save(dic) del dic['_id'] dic['from'] = 'web' dic['datetime'] = dic['datetime'].isoformat() return json.dumps(dic,ensure_ascii=False,indent=None,encoding='UTF8') if __name__ == 'main': print s.encode('gbk','ignore')