def parse(self, response): course_info = response.css( 'script[class="js-schema"]::text').extract_first() course_info = json.loads(course_info) Course.create( course_id=uuid.uuid4(), course_title=course_info['@graph'][1]['name'], course_description=course_info['@graph'][1]['description'], language='English', level='All level', student_enrolled=0, ratings=0, overall_rating=0, course_url=response.url, cover_image=course_info['@graph'][1]['image']['url'], source='edx')
def parse(self, response): course_title = response.css( 'h1.course-title::text').extract_first().strip() enrolled_information = response.css( 'span.rating-and-enrolled__element').extract() rating = [ x.strip() for x in response.css( 'span.rate-count>span.tooltip-container::text').extract_first( ).strip().split("\n") ] skill_level = response.css( 'div.right-middle>ul.list>li.list-item:nth-child(3)>span.list-right::text' ).extract_first() course_thumbnail = response.css( 'div.placeholder__thumbnail-container.play-button-trigger>img' ).extract_first() course_thumbnail = course_thumbnail[course_thumbnail.find('"') + 1:].replace('">', '').strip() course_description = response.css('div#desc').extract_first() course_description = re.sub(cleanr, '', course_description).replace( "\n\n", '').replace(" ", ' ').strip() overall_rating = float(rating[0].replace(',', '')) ratings = float(rating[-1].replace('ratings)', '').replace('(', '').replace(',', '').strip()) student_enrolled = re.sub( cleanr, '', enrolled_information[2].replace('students enrolled', '').strip()).strip() student_enrolled = int(student_enrolled.replace(',', '')) Course.create(course_id=uuid.uuid4(), course_title=course_title, course_description=course_description, language='English', level=skill_level, student_enrolled=student_enrolled, ratings=ratings, overall_rating=overall_rating, course_url=response.url, cover_image=course_thumbnail)
def parse(self, response): data = response.css( 'script[type="application/ld+json"]::text').extract_first() data = json.loads(data) Course.create( course_id=uuid.uuid4(), course_title=data['name'], course_description=data['description'], language='English', level= 'All Levels', # response.css('td.td-data[data-reactid="153"]::text').extract_first(), student_enrolled=0, ratings= 0, # int(response.css('span[data-reactid="383"]::text').extract_first()), overall_rating=0, # overall_rating, course_url=response.url, cover_image=data['thumbnail']['contentUrl'], source='coursera')