def get_sections_for_spider(course_code, term, section_code): ret = Course.get_course_by_code(course_code) if ret.error is not Error.OK: return Ret(ret.error) o_course = ret.body ret = Section.get_sections_by_course(o_course) if ret.error is not Error.OK: return Ret(ret.error) sections = ret.body for section in sections: if section.term == term and section.section_code == section_code: return Ret(body=section) return Ret(Error.SECTION_NOT_EXIST)
def parse_course_detail(self, response): # store the course detail # course_code = None # course_name = None response = response.replace( body=response.body.replace(b'<br />', b'\n')) name = response.xpath( "//span[@class='PALEVEL0SECONDARY']/text()").extract_first() try: course_code = name.split(" - ")[0] course_name = name.split(" - ")[1] except: course_code = name course_name = name print(course_code) career = response.xpath( "//label[@for='SSR_CRSE_OFF_VW_ACAD_CAREER$0']/../../following-sibling::tr[1]/td/span[@class='PSDROPDOWNLIST_DISPONLY']/text()" ).extract_first() or '' try: units = float( response.xpath( "//label[@for='DERIVED_CRSECAT_UNITS_RANGE$0']/../../following-sibling::tr[1]/td/span[@class='PSEDITBOX_DISPONLY']/text()" ).extract_first() or 0) except ValueError: units = 0 grading_basis = response.xpath( "//label[@for='SSR_CRSE_OFF_VW_GRADING_BASIS$0']/../following-sibling::td[1]/span/text()" ).extract_first() or '' add_consent = response.xpath( "//label[@for='SSR_CRSE_OFF_VW_CONSENT$0']/../following-sibling::td[1]/span/text()" ).extract_first() or '' drop_consent = response.xpath( "//label[@for='SSR_CRSE_OFF_VW_SSR_DROP_CONSENT$0']/../following-sibling::td[1]/span/text()" ).extract_first() or '' enroll_requirement = response.xpath( "//label[@for='SSR_CRSE_OFF_VW_RQRMNT_GROUP$0']/../../following-sibling::tr[1]//span/text()" ).extract_first() or '' description = response.xpath( "//td[text()='Description']/../following-sibling::tr[1]//span/text()" ).extract_first() or '' saved_item = None ret = Course.get_course_by_code(course_code) # if course is not exist, create new course if ret.error is not Error.OK: course_item = CourseItem() course_item['course_code'] = course_code course_item['course_name'] = course_name course_item['career'] = career course_item['units'] = units course_item['grading_basis'] = grading_basis course_item['add_consent'] = add_consent course_item['drop_consent'] = drop_consent course_item['enroll_requirement'] = enroll_requirement course_item['description'] = description try: saved_item = course_item.save() except: pass # if course is exist, update the info else: course_item = ret.body course_item.course_code = course_code course_item.course_name = course_name course_item.career = career course_item.units = units course_item.grading_basis = grading_basis course_item.add_consent = add_consent course_item.drop_consent = drop_consent course_item.enroll_requirement = enroll_requirement course_item.description = description course_item.save() try: saved_item = course_item except: pass # delete all the assessment and components Component.delete_component_by_course(course_item) Assessment.delete_assessment_by_course(course_item) # course_item['course_code'] = course_code # course_item['course_name'] = course_name # course_item['career'] = response.xpath("//label[@for='SSR_CRSE_OFF_VW_ACAD_CAREER$0']/../../following-sibling::tr[1]/td/span[@class='PSDROPDOWNLIST_DISPONLY']/text()").extract_first() # try: # course_item['units'] = float(response.xpath("//label[@for='DERIVED_CRSECAT_UNITS_RANGE$0']/../../following-sibling::tr[1]/td/span[@class='PSEDITBOX_DISPONLY']/text()").extract_first() or 0) # except ValueError: # course_item['units'] = 0 # course_item['grading_basis'] = response.xpath("//label[@for='SSR_CRSE_OFF_VW_GRADING_BASIS$0']/../following-sibling::td[1]/span/text()").extract_first() # course_item['add_consent'] = response.xpath("//label[@for='SSR_CRSE_OFF_VW_CONSENT$0']/../following-sibling::td[1]/span/text()").extract_first() # course_item['drop_consent'] = response.xpath("//label[@for='SSR_CRSE_OFF_VW_SSR_DROP_CONSENT$0']/../following-sibling::td[1]/span/text()").extract_first() # course_item['enroll_requirement'] = response.xpath("//label[@for='SSR_CRSE_OFF_VW_RQRMNT_GROUP$0']/../../following-sibling::tr[1]//span/text()").extract_first() # course_item['description'] = response.xpath("//td[text()='Description']/../following-sibling::tr[1]//span/text()").extract_first() # # saved_item = course_item.save() # save the component for later use components = response.xpath( "//label[@for='SR_LBL_WRK_CRSE_COMPONENT_LBL$0']/../../preceding-sibling::tr[1]//span[@class='PSEDITBOX_DISPONLY']/text()" ).extract() request = scrapy.FormRequest.from_response( response=response, formname='win0', formdata={'ICAction': 'DERIVED_SAA_CRS_SSR_PB_GO'}, callback=self.get_class_sections) self.term_counter = 0 request.meta['item'] = saved_item request.meta['components'] = components yield request