def get_study_record_begin(self): url = "https://degree.qingshuxuetang.com/hngd/Student/UploadStudyRecordBegin" querystring = {"_t": int(time.time() * 1000)} headers = self.get_headers_template() headers['Cookie'] = json_utils.json_parse_to_equivalent(self.user_info['cookies']) result = [] for item in self.user_class_info['class']: print('[system] get the detail video code... and current class: ' + self.user_class_info['class'][item]['name']) for detail in self.user_class_info['class'][item]['class_detail']: payload = { "classId": detail['classId'], "contentId": detail['contentId'], "contentType": 11, "courseId": self.user_class_info['class'][item]['courseId'], "periodId": detail['periodId'], "position": 0, "schoolId": detail['schoolId'] } response = requests.request("POST", url, data=json.dumps(payload), headers=headers, params=querystring) if response.status_code == 200: result.append(json.loads(response.content)['data']) detail.setdefault('studyId', json.loads(response.content)['data']) detail.setdefault('studyTime', time.time()) return result
def spider_class_home_work(self): headers = self.get_headers_template() headers['Cookie'] = json_utils.json_parse_to_equivalent( self.document['cookies']) for k, v in self.document['task']['class'].items(): self.document['task']['class'][k]['test'] = [] response = requests.get(v['homework_url'], headers=headers) document = pq(response.text) for item in document.find('.exercise-body').items(): url = re.findall('ExercisePaper(.*?)"', item.html(), re.S)[0] exercise_id = re.findall('exerciseId=(.*?)&', url, re.S)[0] basic_params = 'courseId=' + v[ 'courseId'] + '&exerciseId=' + exercise_id + '&teachPlanId=' + v[ 'teachPlanId'] + '&periodId=' + v['periodId'] test_url = 'https://degree.qingshuxuetang.com/hngd/Student/ExercisePaper?' + basic_params re_test_url = 'https://degree.qingshuxuetang.com/hngd/Student/ExercisePaper?' + basic_params + '&isRetest=yes' answer_url = 'https://degree.qingshuxuetang.com/hngd/Student/ViewExerciseAnswer?' + basic_params save_url = 'https://degree.qingshuxuetang.com/hngd/Student/ExercisePaper?courseId=' + v[ 'courseId'] + '&exerciseId=' + exercise_id + '&action=save' submit_url = 'https://degree.qingshuxuetang.com/hngd/Student/ExercisePaper?courseId=' + v[ 'courseId'] + '&exerciseId=' + exercise_id + '&action=submit' student_id = '' test_html = requests.get(test_url, headers=headers).text test_document = pq(test_html) for input in test_document.find('#form1 input'): if input.name == 'studentId': student_id = input.value self.document['task']['class'][k]['test'].append({ 'studentId': student_id, 'exerciseId': exercise_id, 'title': item.find('.title').text(), 'status': item.find('.exercise-status').text(), 'test_url': test_url, 're_test_url': re_test_url, 'answer_url': answer_url, 'save_url': save_url, 'submit_url': submit_url })
def upload_study_video_record(self, study_max_time): url = "https://degree.qingshuxuetang.com/hngd/Student/UploadStudyRecordContinue" querystring = {"_t": int(time.time() * 1000)} class_count = 0 finish_count = 0 for k, v in self.document['task']['class'].items(): if not v['enable']: continue else: class_count += len(v['video']) print('[system] begin study video...') while finish_count < class_count: for k, v in self.document['task']['class'].items(): if not v['enable']: continue else: for detail in v['video']: study_time = time.time() - detail['studyTime'] if study_time >= study_max_time: finish_count += 1 print('[finish] major: ' + v['name'] + ', video: ' + str(detail['studyId']) + ', study time: ' + str(study_time) + '\'s') else: payload = { 'recordId': detail['studyId'], 'position': '103' } headers = self.get_headers_template() headers[ 'Cookie'] = json_utils.json_parse_to_equivalent( self.document['cookies']) response = requests.request( "POST", url, data=json.dumps(payload), headers=headers, params=querystring) print('[studying] study time: ' + str(study_time) + '\'s' + ', major: ' + v['name'] + ', class: ' + str(detail['studyId']) + ', result: ' + response.text) if finish_count < class_count: print('[system] 本轮学习时间已经全部提交, 等待下一轮继续...') # 休息 60s time.sleep(60)
def spider_home_work_answer(self): headers = self.get_headers_template() headers['Cookie'] = json_utils.json_parse_to_equivalent( self.document['cookies']) headers[ 'Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8' for k, v in self.document['task']['class'].items(): if len(v['test']) == 0: continue else: for home_work in v['test']: response = requests.post(home_work['answer_url'], headers=headers) document = pq(response.text) answer = {} for item in document.items('.test'): for input in item.find('.test-heading input'): answer.setdefault(input.name, input.value) content = item.find('.test-heading input')[0].name answer_key = content[:-(content[::-1].find('.')) - 1] + '.answer' answer_detail = re.findall('标准答案:(.*?)解', item.text(), re.S)[0].replace('\n', '') # clear data answer_type = -1 for i in list(answer_detail): ascii_i = ord(i) if (65 <= ascii_i <= 90) or (97 <= ascii_i <= 122): # 这不是一道选择题 answer_type = 1 break else: # 这是一道选择题 answer_type = 2 if answer_type == 1: answer_detail = list(answer_detail) elif answer_detail == 2: answer_detail = answer_detail answer.setdefault(answer_key, answer_detail) answer.setdefault('studentId', home_work['studentId']) home_work['answer'] = answer
def submit_home_work(self): headers = self.get_headers_template() headers['Cookie'] = json_utils.json_parse_to_equivalent( self.document['cookies']) headers[ 'Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8' for k, v in self.document['task']['class'].items(): if len(v['test']) == 0: continue else: for test in v['test']: querystring = {"_t": int(time.time() * 1000)} response = requests.post(test['submit_url'], headers=headers, data=test['answer'], params=querystring) print('[system]' + 'submit home work; current major is: ' + v['name'] + ', and the home work is: ' + test['title'] + ', and result: ' + response.text)
def spider_student_info(self): self.browser.get( 'https://degree.qingshuxuetang.com/hngd/Student/UserInfoBasic') student_info_html = self.browser.page_source # user info user_info_script = re.findall('Behavior\.init\((.*?)\);', self.browser.page_source, re.S)[0] user_info = { 'college': re.findall('college.*?\'(.*?)\',', user_info_script, re.S)[0], 'schoolId': re.findall('schoolId:.(.*?),', user_info_script, re.S)[0], 'userId': re.findall('userId:.(.*?),', user_info_script, re.S)[0], 'userRole': re.findall('userRole:.(.*?),', user_info_script, re.S)[0], 'userSchools': re.findall('userSchools:.\'(.*?)\',', user_info_script, re.S)[0], 'userSchoolType': re.findall('userSchoolType:.\'(.*?)\',', user_info_script, re.S)[0], 'schoolType': re.findall('schoolType:.\'(.*?)\',', user_info_script, re.S)[0], 'promoteId': '' } identify_info = re.findall( 'window.KF5SupportBoxAPI.identify\((.*?)\);', student_info_html, re.S)[0] user_info['name'] = re.findall('"name".:."(.*?)",', identify_info, re.S)[0] user_info['number'] = re.findall('\'用户帐号.*value.*\'(.*?)\'},', identify_info, re.S)[0] info_pq = pq(student_info_html).find('#userEditForm') user_info['gender'] = re.findall('性别.*?class.*?>(.*?)</div>', info_pq.html(), re.S)[0] user_info['id_no'] = re.findall('身份证.*?class.*?>(.*?)</div>', info_pq.html(), re.S)[0] user_info['address'] = info_pq.find('input').attr('value') # info headers = self.get_headers_template() headers['Cookie'] = json_utils.json_parse_to_equivalent( self.document['cookies']) response = requests.get( 'https://degree.qingshuxuetang.com/hngd/Student/Svc/UserInfo', headers=headers) user_info_data = json.loads(response.text)['data'] for info in user_info_data: user_info[info] = user_info_data[info] self.document['student'] = user_info return self.document['student']
def login(self, username, password): self.browser.get('https://degree.qingshuxuetang.com/hngd/Home') username_input = self.wait.until( EC.presence_of_element_located((By.ID, 'uname')) ) username_input.clear() username_input.send_keys(username) password_input = self.wait.until( EC.presence_of_element_located((By.ID, 'pwd')) ) password_input.clear() password_input.send_keys(password) login_button = self.wait.until( EC.presence_of_element_located((By.ID, 'loginBtn')) ) login_button.click() cookies = {} user_info = {} # cookies while len(cookies) < 5: for cookie in self.browser.get_cookies(): cookies.setdefault(cookie['name'], cookie['value']) if len(cookies) < 5: time.sleep(1) # user info user_info_script = re.findall('Behavior\.init\((.*?)\);', self.browser.page_source, re.S)[0] user_info = { 'college': re.findall('college.*?\'(.*?)\',', user_info_script, re.S)[0], 'schoolId': re.findall('schoolId:.(.*?),', user_info_script, re.S)[0], 'userId': re.findall('userId:.(.*?),', user_info_script, re.S)[0], 'userRole': re.findall('userRole:.(.*?),', user_info_script, re.S)[0], 'userSchools': re.findall('userSchools:.\'(.*?)\',', user_info_script, re.S)[0], 'userSchoolType': re.findall('userSchoolType:.\'(.*?)\',', user_info_script, re.S)[0], 'schoolType': re.findall('schoolType:.\'(.*?)\',', user_info_script, re.S)[0], 'promoteId': '' } identify_info = re.findall('window.KF5SupportBoxAPI.identify\((.*?)\);', self.browser.page_source, re.S)[ 0] user_info['name'] = re.findall('"name".:."(.*?)",', identify_info, re.S)[0] user_info['number'] = re.findall('\'用户帐号.*value.*\'(.*?)\'},', identify_info, re.S)[0] headers = self.get_headers_template() headers['Cookie'] = json_utils.json_parse_to_equivalent(cookies) response = requests.get('https://degree.qingshuxuetang.com/hngd/Student/Svc/UserInfo', headers=headers) user_info_data = json.loads(response.text)['data'] for info in user_info_data: user_info[info] = user_info_data[info] self.user_info = { 'cookies': cookies, 'user_info': user_info } return self.user_info