def __init__(self): self.qids = [] self.entity_count = 0 self.sparql_query = '' self.count_query = '' self.log_fname = '' self.__sparql_endpoint_url = 'https://query.wikidata.org/sparql' self.__sparql_response = None self.__count_response = None self.__http_request_sparql = HTTPRequest(self.__sparql_endpoint_url)
def __init__(self, usrname='0', usrpswd='0', user_type=u"学生", display=True): self.usrname = usrname self.usrpswd = usrpswd self.user_type = user_type self.display = display self.http_request = HTTPRequest()
def parse_host_from_header(data): """ Parse host from request header. :param data: :return: (address, port) """ request = HTTPRequest(data) host = request.headers['host'].split(':') remote_addr = host[0] remote_port = 80 if len(host) == 1 else int(host[1]) return remote_addr, remote_port
def parse_headers(path): with open(path) as f: data = f.read() request = HTTPRequest(data) headers = dict(request.headers) if headers: output_filename = path + ".json" with open(output_filename, "w") as f: json.dump(headers, f, indent=4)
def parse_headers(path): with open(path) as f: data = f.read() request = HTTPRequest(data) headers = dict(request.headers) # Yes this is terrible design. Please fix this. if headers: output_filename = path + ".json" with open(output_filename, "w") as f: json.dump(headers, f, indent=4) return headers
class Qids: """ Class for retrieving, storing and managing QIDs using sparql queries. QIDs are unique entity identifiers in the Wikidata knowledge base. QIDS of some popular entities: 1. Cristiano Ronaldo - Q11571 2. COVID 19 - Q84263196 3. Germany - Q183 """ def __init__(self): self.qids = [] self.entity_count = 0 self.sparql_query = '' self.count_query = '' self.log_fname = '' self.__sparql_endpoint_url = 'https://query.wikidata.org/sparql' self.__sparql_response = None self.__count_response = None self.__http_request_sparql = HTTPRequest(self.__sparql_endpoint_url) def retrieve_qids(self, sparql_fname, log_fname): """ This method expects a sparql filename that contains a valid sparql query, specifying wikidata Entities of Interest. The sparql query is then submitted to the Wikidata sparql endpoint at https://query.wikidata.org/sparql. The method extracts all returned QIDs from the response, storing them in the list self.qids. The sparql endpoint does not always return all Entities of Interest as specified by the sparql query, frequently due to timeout exceptions. This method hence also submits an automatically generated count query that counts the total number of Entities of Interest and stores the count in self.entity_count. """ with open(sparql_fname, 'r') as f: self.sparql_query = f.read() self.log_fname = log_fname # send http request to sparql endpoint self.__sparql_response = self.__http_request_sparql.get( url=self.__sparql_endpoint_url, params={ 'query': self.sparql_query }, headers={ 'Accept': 'application/sparql-results+json' }, exc_fname=log_fname, # for logging purposes timeout=300, raw=True # to only return raw http response ) # extract all the QIDs from the http response self.qids = self.__extract_qids() # automatically generate count query from sparql query self.count_query = re.sub(r'\?item', '(COUNT(?item) AS ?count)', self.sparql_query, count=1) # send count query to sparql endpoint self.__count_response = self.__http_request_sparql.get( url=self.__sparql_endpoint_url, params={ 'query': self.count_query }, headers={ 'Accept': 'application/sparql-results+json' }, exc_fname=log_fname, raw=False ) # extract the count from count query response self.entity_count = self.__extract_count() return def save(self, qids_fname): """ This methods writes the retrieved QIDs to file. """ with open(qids_fname, 'wb') as f: pickle.dump(self.qids, f) def __extract_qids(self): """ This method extracts QIDs from sparql response. """ try: qids = [] result_list = self.__sparql_response.json()['results']['bindings'] for result in result_list: uri = result['item']['value'] qid = uri.split('/')[-1] qids.append(qid) except: """ Timeout exceptions are frequent which garble the response. In such cases, QIDs from the response need to be scraped. """ uri_regex = r"http\:\/\/www.wikidata.org\/entity\/Q[0-9]*" uris = re.findall(uri_regex, self.__sparql_response.text) qids = [uri.split('/')[-1] for uri in uris] return qids def __extract_count(self): """ This method extracts the count from a count query response. """ try: entity_count = int(self.__count_response['results']['bindings'][0]['count']['value']) except Exception as e: log(e, self.__count_response, self.log_fname) entity_count = -1 return entity_count
class Score: def __init__(self, usrname='0', usrpswd='0', user_type=u"学生", display=True): self.usrname = usrname self.usrpswd = usrpswd self.user_type = user_type self.display = display self.http_request = HTTPRequest() def login(self): ''' 模拟登录教务系统 :param username: :param pswd: :return: 登录状态 ''' login_url = 'http://219.242.68.34/Login.aspx' form_data = { "ToolkitScriptManager1": "ToolkitScriptManager1|btnLogin", "ToolkitScriptManager1_HiddenField": "", "__EVENTTARGET": "", "__EVENTARGUMENT": "", "__VIEWSTATE": ("/wEPDwULLTEzMzI5MDg5NTdkZLijHQ2AP8RlCsoyMA5wGxkp7N/Eu" "+tNRGRIjssUUJzA"), "__VIEWSTATEGENERATOR": "C2EE9ABB", "__EVENTVALIDATION": ("/wEdAAkh8ZU3Gnyvn5kXEKbCJUIRDFTzKcXJqLg+OeJ6QAEa2nY2+" "Mc6SrnAqio3oCKbxYah3xa5rLP/Bn9R8gnPYyv9Y/jNR+V3Y2V63r" "/uGJ6o3e8y1uAcWHw+joLTxl+mj+XJMG57d9TuD5oRZdFmOwSdT3X" "UHy040LsOHhWYhzfBA6KeKEbp39eHc9mbdvkCgxBZmNzqpPs6SHyh" "2ECtT46MMRcPc39Fn2KuRL/PbaoRvw=="), "txtUser": self.usrname, "txtPassword": self.usrpswd, "rbLx": self.user_type, "__ASYNCPOST": "true", "btnLogin": "******" } response = self.http_request.post(login_url, data=form_data).text return validate_login(response, validator={ "pageRedirect": { 'status': True, 'info': "登录成功" }, u"密码不正确": { 'status': True, 'info': "密码错误" }, }, default={ 'status': False, 'info': "登录失败" }) def get_info(self): ''' 通过登录会话session获取学生信息 :param sess: :return: 学生信息 ''' ifo_url = 'http://219.242.68.33/xuesheng/xsxx.aspx' soup = Soup(self.http_request.session, ifo_url) data = {} data['a.姓名'] = soup.find(id="ctl00_ContentPlaceHolder1_lblXm").text data['b.身份证号'] = soup.find(id="ctl00_ContentPlaceHolder1_lblSfz").text data['c.学号'] = soup.find(id="ctl00_ContentPlaceHolder1_lblXh").text data['d.班级'] = soup.find(id="ctl00_ContentPlaceHolder1_className").text data['e.院系'] = soup.find( id="ctl00_ContentPlaceHolder1_collegeName").text if self.display is True: tabletitle = [item[2:] for item in sorted(data.keys())] cont = [data[item] for item in sorted(data.keys())] table_print(tabletitle, cont) return data def get_score(self): score_url = 'http://219.242.68.33/xuesheng/cjcx.aspx' soup = Soup(self.http_request.session, score_url) all_scoreifo = [item.text.strip() for item in soup.find_all('td')] indexs = all_scoreifo[0::10] years = all_scoreifo[2::10] terms = all_scoreifo[3::10] units = all_scoreifo[5::10] natures = all_scoreifo[7::10] courses = all_scoreifo[8::10] scores = map(lambda x: ' / '.join(x), [item.split('\n') for item in all_scoreifo[9::10]]) average = soup.find(id="ctl00_ContentPlaceHolder1_lblpjcj").text total = soup.find(id="ctl00_ContentPlaceHolder1_lblKcms").text credit = soup.find(id="ctl00_ContentPlaceHolder1_lblXfs").text tabletitle = ['序号', '课程', '成绩', '学分', '学年', '学期', '性质'] conts = [] for index, year, term, unit, nature, course, score in \ zip(indexs, years, terms, units, natures, courses, scores): temp = [ index, course.strip(), score.replace('\n', ''), unit, year, term, nature ] conts.append(temp) if self.display: table_print(tabletitle, conts) table_print(['平均成绩', '课程门数', '已获得学分'], [[average, total, credit]]) return conts def elective(self): """ 获取选修课信息 """ eleurl = 'http://219.242.68.33/xuesheng/xsxk.aspx' form_data = { "__EVENTTARGET": "", "__EVENTARGUMENT": "", "__VIEWSTATE": ("/wEPDwULLTE1NDU0NjAxMDUPZBYCZg9kFgICAw9kFgICAQ9kFgIC" "Aw8QDxYGHg1EYXRhVGV4dEZpZWxkBQRrenNtHg5EYXRhVmFsdWVG" "aWVsZAUDa3poHgtfIURhdGFCb3VuZGdkEBUdFzE1LTE256ys5LqM" "5a2m5pyf5YWs6YCJFzE1LTE256ys5LiA5a2m5pyf5YWs6YCJFzE0" "LTE156ys5LqM5a2m5pyf5YWs6YCJFzE0LTE156ys5LiA5a2m5pyf" "5YWs6YCJFzEzLTE056ys5LqM5a2m5pyf5YWs6YCJFzEzLTE056ys" "5LiA5a2m5pyf5YWs6YCJGeiLseivree7vOWQiOaKgOiDveWfueWF" "uzEXMTItMTPnrKzkuozlrabmnJ/lhazpgIkZ6Iux6K+t57u85ZCI" "5oqA6IO95Z+55YW7MRcxMi0xM+esrOS4gOWtpuacn+WFrOmAiRcx" "MS0xMuesrOS6jOWtpuacn+WFrOmAiRcxMS0xMuesrOS4gOWtpuac" "n+WFrOmAiRcxMC0xMeesrOS6jOWtpuacn+WFrOmAiRcxMC0xMees" "rOS4gOWtpuacn+WFrOmAiRcwOS0xMOesrOS6jOWtpuacn+WFrOmA" "iRcwOS0xMOesrOS4gOWtpuacn+WFrOmAiRcwOC0wOeesrOS6jOWt" "puacn+WFrOmAiRcwOC0wOeesrOS4gOWtpuacn+WFrOmAiRcwNy0w" "OOesrOS6jOWtpuacn+WFrOmAiRcwNy0wOOesrOS4gOWtpuacn+WF" "rOmAiRcwNi0wN+esrOS6jOWtpuacn+WFrOmAiRcwNi0wN+esrOS4" "gOWtpuacn+WFrOmAiRcwNS0wNuesrOS6jOWtpuacn+WFrOmAiRcw" "NS0wNuesrOS4gOWtpuacn+WFrOmAiRcwNC0wNeesrOS6jOWtpuac" "n+WFrOmAiRcwNC0wNeesrOS4gOWtpuacn+WFrOmAiRcwMy0wNOes" "rOS6jOWtpuacn+WFrOmAiRcwMy0wNOesrOS4gOWtpuacn+WFrOmA" "iRcwMi0wM+esrOS6jOWtpuacn+WFrOmAiRUdAzMyMQMzMTgDMzE0" "AzMxMwMzMDIDMjQzAzI0MgMyNDEDMjQwAzIzOQMyMzgDMjM3AzIz" "NgMyMzUDMjM0AzIzMwMyMzIDMjMxAzIzMAMyMjkDMjI4AzIyNwMy" "MjYDMjE2AzIxNQMyMTQDMjEzAzIxMgMyMTAUKwMdZ2dnZ2dnZ2dn" "Z2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2cWAWZkZBgWDkNmM5ksFZPYJS+C" "Xe3IihlDoFim1X/o3cfNS5fN"), "__VIEWSTATEGENERATOR": "E7E695A4", "ctl00$ContentPlaceHolder1$drplKcz": '321', "ctl00$ContentPlaceHolder1$btnYxkc": "查 看" } ss = self.http_request.post(eleurl, data=form_data) soup = BeautifulSoup(ss.text, 'lxml') all_num = soup.find_all('td') all_item = [item.text for item in all_num] indexs = all_item[1::5] times = [item[4:].strip() for item in all_item[2::5]] courses = [item.split()[0] for item in all_item[4::5]] teachers = [item.split()[1] for item in all_item[4::5]] tabletitle = ['序号', '课程组', '课程名称', '任课教师'] conts = [] for index, time, course, teacher in zip(indexs, times, courses, teachers): temp = [index, time, course, teacher] conts.append(temp) if self.display: table_print(tabletitle, conts) def cli(self): prompt = ''' +===========================+ | [0]查成绩 | | [1]个人信息 | | [2]选修课 | | [3]登录其他账号 | | [4]清除历史记录 | | [5]安全退出 | +===========================+ >>> ''' self.usrname = rinput('学号: ') self.usrpswd = rinput('密码: 00000000\b\b\b\b\b\b\b\b') status = self.login() if status['status']: choice = True choice_dict = { '0': self.get_score, '1': self.get_info, '2': self.elective, '3': self.cli, '4': clear, '5': quit } while choice is True: usr_choice = rinput('\r' + prompt).strip()[0] os.system('clear') if usr_choice in choice_dict: choice_dict.get(usr_choice)() choice = usr_choice not in "35" else: print('Input incorrect..again!') else: print(status['info']) cho = rinput('Any key to continue, [q] to quit.') if cho == 'q': quit() else: self.cli()
class Score: def __init__(self, usrname='0', usrpswd='0', user_type=u"学生", display=True): self.usrname = usrname self.usrpswd = usrpswd self.user_type = user_type self.display = display self.http_request = HTTPRequest() def login(self): ''' 模拟登录教务系统 :param username: :param pswd: :return: 登录状态 ''' login_url = 'http://219.242.68.34/Login.aspx' form_data = { "ToolkitScriptManager1": "ToolkitScriptManager1|btnLogin", "ToolkitScriptManager1_HiddenField": "", "__EVENTTARGET": "", "__EVENTARGUMENT": "", "__VIEWSTATE": ( "/wEPDwULLTEzMzI5MDg5NTdkZLijHQ2AP8RlCsoyMA5wGxkp7N/Eu" "+tNRGRIjssUUJzA" ), "__VIEWSTATEGENERATOR": "C2EE9ABB", "__EVENTVALIDATION": ( "/wEdAAkh8ZU3Gnyvn5kXEKbCJUIRDFTzKcXJqLg+OeJ6QAEa2nY2+" "Mc6SrnAqio3oCKbxYah3xa5rLP/Bn9R8gnPYyv9Y/jNR+V3Y2V63r" "/uGJ6o3e8y1uAcWHw+joLTxl+mj+XJMG57d9TuD5oRZdFmOwSdT3X" "UHy040LsOHhWYhzfBA6KeKEbp39eHc9mbdvkCgxBZmNzqpPs6SHyh" "2ECtT46MMRcPc39Fn2KuRL/PbaoRvw==" ), "txtUser": self.usrname, "txtPassword": self.usrpswd, "rbLx": self.user_type, "__ASYNCPOST": "true", "btnLogin": "******" } response = self.http_request.post(login_url, data=form_data).text return validate_login( response, validator={ "pageRedirect": {'status': True, 'info': "登录成功"}, u"密码不正确": {'status': True, 'info': "密码错误"}, }, default={'status': False, 'info': "登录失败"} ) def get_info(self): ''' 通过登录会话session获取学生信息 :param sess: :return: 学生信息 ''' ifo_url = 'http://219.242.68.33/xuesheng/xsxx.aspx' soup = Soup(self.http_request.session, ifo_url) data = {} data['a.姓名'] = soup.find(id="ctl00_ContentPlaceHolder1_lblXm").text data['b.身份证号'] = soup.find(id="ctl00_ContentPlaceHolder1_lblSfz").text data['c.学号'] = soup.find(id="ctl00_ContentPlaceHolder1_lblXh").text data['d.班级'] = soup.find(id="ctl00_ContentPlaceHolder1_className").text data['e.院系'] = soup.find(id="ctl00_ContentPlaceHolder1_collegeName").text if self.display is True: tabletitle = [item[2:] for item in sorted(data.keys())] cont = [data[item] for item in sorted(data.keys())] table_print(tabletitle, cont) return data def get_score(self): score_url = 'http://219.242.68.33/xuesheng/cjcx.aspx' soup = Soup(self.http_request.session, score_url) all_scoreifo = [item.text.strip() for item in soup.find_all('td')] indexs = all_scoreifo[0::10] years = all_scoreifo[2::10] terms = all_scoreifo[3::10] units = all_scoreifo[5::10] natures = all_scoreifo[7::10] courses = all_scoreifo[8::10] scores = map(lambda x: ' / '.join(x), [item.split('\n') for item in all_scoreifo[9::10]]) average = soup.find(id="ctl00_ContentPlaceHolder1_lblpjcj").text total = soup.find(id="ctl00_ContentPlaceHolder1_lblKcms").text credit = soup.find( id="ctl00_ContentPlaceHolder1_lblXfs").text tabletitle = ['序号', '课程', '成绩', '学分', '学年', '学期', '性质'] conts = [] for index, year, term, unit, nature, course, score in \ zip(indexs, years, terms, units, natures, courses, scores): temp = [index, course.strip(), score.replace('\n', ''), unit, year, term, nature] conts.append(temp) if self.display: table_print(tabletitle, conts) table_print(['平均成绩','课程门数', '已获得学分'], [[average, total, credit]]) return conts def elective(self): """ 获取选修课信息 """ eleurl = 'http://219.242.68.33/xuesheng/xsxk.aspx' form_data= { "__EVENTTARGET": "", "__EVENTARGUMENT": "", "__VIEWSTATE": ( "/wEPDwULLTE1NDU0NjAxMDUPZBYCZg9kFgICAw9kFgICAQ9kFgIC" "Aw8QDxYGHg1EYXRhVGV4dEZpZWxkBQRrenNtHg5EYXRhVmFsdWVG" "aWVsZAUDa3poHgtfIURhdGFCb3VuZGdkEBUdFzE1LTE256ys5LqM" "5a2m5pyf5YWs6YCJFzE1LTE256ys5LiA5a2m5pyf5YWs6YCJFzE0" "LTE156ys5LqM5a2m5pyf5YWs6YCJFzE0LTE156ys5LiA5a2m5pyf" "5YWs6YCJFzEzLTE056ys5LqM5a2m5pyf5YWs6YCJFzEzLTE056ys" "5LiA5a2m5pyf5YWs6YCJGeiLseivree7vOWQiOaKgOiDveWfueWF" "uzEXMTItMTPnrKzkuozlrabmnJ/lhazpgIkZ6Iux6K+t57u85ZCI" "5oqA6IO95Z+55YW7MRcxMi0xM+esrOS4gOWtpuacn+WFrOmAiRcx" "MS0xMuesrOS6jOWtpuacn+WFrOmAiRcxMS0xMuesrOS4gOWtpuac" "n+WFrOmAiRcxMC0xMeesrOS6jOWtpuacn+WFrOmAiRcxMC0xMees" "rOS4gOWtpuacn+WFrOmAiRcwOS0xMOesrOS6jOWtpuacn+WFrOmA" "iRcwOS0xMOesrOS4gOWtpuacn+WFrOmAiRcwOC0wOeesrOS6jOWt" "puacn+WFrOmAiRcwOC0wOeesrOS4gOWtpuacn+WFrOmAiRcwNy0w" "OOesrOS6jOWtpuacn+WFrOmAiRcwNy0wOOesrOS4gOWtpuacn+WF" "rOmAiRcwNi0wN+esrOS6jOWtpuacn+WFrOmAiRcwNi0wN+esrOS4" "gOWtpuacn+WFrOmAiRcwNS0wNuesrOS6jOWtpuacn+WFrOmAiRcw" "NS0wNuesrOS4gOWtpuacn+WFrOmAiRcwNC0wNeesrOS6jOWtpuac" "n+WFrOmAiRcwNC0wNeesrOS4gOWtpuacn+WFrOmAiRcwMy0wNOes" "rOS6jOWtpuacn+WFrOmAiRcwMy0wNOesrOS4gOWtpuacn+WFrOmA" "iRcwMi0wM+esrOS6jOWtpuacn+WFrOmAiRUdAzMyMQMzMTgDMzE0" "AzMxMwMzMDIDMjQzAzI0MgMyNDEDMjQwAzIzOQMyMzgDMjM3AzIz" "NgMyMzUDMjM0AzIzMwMyMzIDMjMxAzIzMAMyMjkDMjI4AzIyNwMy" "MjYDMjE2AzIxNQMyMTQDMjEzAzIxMgMyMTAUKwMdZ2dnZ2dnZ2dn" "Z2dnZ2dnZ2dnZ2dnZ2dnZ2dnZ2cWAWZkZBgWDkNmM5ksFZPYJS+C" "Xe3IihlDoFim1X/o3cfNS5fN" ), "__VIEWSTATEGENERATOR": "E7E695A4", "ctl00$ContentPlaceHolder1$drplKcz": '321', "ctl00$ContentPlaceHolder1$btnYxkc": "查 看" } ss = self.http_request.post(eleurl, data=form_data) soup = BeautifulSoup(ss.text, 'lxml') all_num = soup.find_all('td') all_item = [item.text for item in all_num] indexs = all_item[1::5] times = [item[4:].strip() for item in all_item[2::5]] courses = [item.split()[0] for item in all_item[4::5]] teachers = [item.split()[1] for item in all_item[4::5]] tabletitle = ['序号', '课程组', '课程名称', '任课教师'] conts = [] for index, time, course, teacher in zip(indexs, times, courses, teachers): temp = [index, time, course, teacher] conts.append(temp) if self.display: table_print(tabletitle, conts) def cli(self): prompt = ''' +===========================+ | [0]查成绩 | | [1]个人信息 | | [2]选修课 | | [3]登录其他账号 | | [4]清除历史记录 | | [5]安全退出 | +===========================+ >>> ''' self.usrname = rinput('学号: ') self.usrpswd = rinput('密码: 00000000\b\b\b\b\b\b\b\b') status = self.login() if status['status']: choice = True choice_dict = { '0': self.get_score, '1': self.get_info, '2': self.elective, '3': self.cli, '4': clear, '5': quit } while choice is True: usr_choice = rinput('\r'+prompt).strip()[0] os.system('clear') if usr_choice in choice_dict: choice_dict.get(usr_choice)() choice = usr_choice not in "35" else: print('Input incorrect..again!') else: print(status['info']) cho = rinput('Any key to continue, [q] to quit.') if cho == 'q': quit() else: self.cli()