示例#1
0
    def parse(self, response):
        course_info = response.css(
            'script[class="js-schema"]::text').extract_first()
        course_info = json.loads(course_info)

        Course.create(
            course_id=uuid.uuid4(),
            course_title=course_info['@graph'][1]['name'],
            course_description=course_info['@graph'][1]['description'],
            language='English',
            level='All level',
            student_enrolled=0,
            ratings=0,
            overall_rating=0,
            course_url=response.url,
            cover_image=course_info['@graph'][1]['image']['url'],
            source='edx')
示例#2
0
    def parse(self, response):
        course_title = response.css(
            'h1.course-title::text').extract_first().strip()
        enrolled_information = response.css(
            'span.rating-and-enrolled__element').extract()

        rating = [
            x.strip() for x in response.css(
                'span.rate-count>span.tooltip-container::text').extract_first(
                ).strip().split("\n")
        ]
        skill_level = response.css(
            'div.right-middle>ul.list>li.list-item:nth-child(3)>span.list-right::text'
        ).extract_first()
        course_thumbnail = response.css(
            'div.placeholder__thumbnail-container.play-button-trigger>img'
        ).extract_first()
        course_thumbnail = course_thumbnail[course_thumbnail.find('"') +
                                            1:].replace('">', '').strip()

        course_description = response.css('div#desc').extract_first()
        course_description = re.sub(cleanr, '', course_description).replace(
            "\n\n", '').replace("  ", ' ').strip()

        overall_rating = float(rating[0].replace(',', ''))
        ratings = float(rating[-1].replace('ratings)',
                                           '').replace('(',
                                                       '').replace(',',
                                                                   '').strip())
        student_enrolled = re.sub(
            cleanr, '',
            enrolled_information[2].replace('students enrolled',
                                            '').strip()).strip()
        student_enrolled = int(student_enrolled.replace(',', ''))

        Course.create(course_id=uuid.uuid4(),
                      course_title=course_title,
                      course_description=course_description,
                      language='English',
                      level=skill_level,
                      student_enrolled=student_enrolled,
                      ratings=ratings,
                      overall_rating=overall_rating,
                      course_url=response.url,
                      cover_image=course_thumbnail)
示例#3
0
    def parse(self, response):
        data = response.css(
            'script[type="application/ld+json"]::text').extract_first()
        data = json.loads(data)

        Course.create(
            course_id=uuid.uuid4(),
            course_title=data['name'],
            course_description=data['description'],
            language='English',
            level=
            'All Levels',  # response.css('td.td-data[data-reactid="153"]::text').extract_first(), 
            student_enrolled=0,
            ratings=
            0,  # int(response.css('span[data-reactid="383"]::text').extract_first()), 
            overall_rating=0,  # overall_rating, 
            course_url=response.url,
            cover_image=data['thumbnail']['contentUrl'],
            source='coursera')