Пример #1
0
    def scrape(self):
        if self.semester.type == Semester.FALL:
            year = self.semester.year
        else:
            year = self.semester.year - 1

        code_re = re.compile('/studier/emner/([^/]+)/', re.I|re.L)

        url = 'http://www.ntnu.no/web/studier/emnesok'
        query = {
            'p_p_lifecycle': '2',
            'p_p_id': 'courselistportlet_WAR_courselistportlet_INSTANCE_m8nT',
            '_courselistportlet_WAR_courselistportlet_INSTANCE_m8nT_year': year}

        courses_root = fetch.html(url, query=query, verbose=True)
        for a in courses_root.cssselect('a[href*="/studier/emner/"]'):
            course_url = a.attrib['href']
            code = code_re.search(course_url).group(1)
            quoted_code = urllib.quote(code.encode('utf-8'))
            name = a.text_content()

            if not ntnu.valid_course_code(code):
                continue
            elif not self.should_proccess_course(code):
                continue

            title = None
            data = {}
            root = fetch.html(
                'http://www.ntnu.no/studier/emner/%s/%s' % (quoted_code, year))

            # Construct dict out of info boxes.
            for box in root.cssselect('.infoBox'):
                for child in box.getchildren():
                    if child.tag == 'h3':
                        title = child.text_content()
                    else:
                        parts = [child.text or '']
                        for br in child.getchildren():
                            parts.append(br.tail or '')
                        for key, value in [p.split(':', 1) for p in parts if ':' in p]:
                            key = key.strip(u' \n\xa0')
                            value = value.strip(u' \n\xa0')
                            data.setdefault(title, {}).setdefault(key, []).append(value)

            try:
                semesters = data['Undervisning']['Undervises']
            except KeyError:
                continue

            if self.semester.type == Semester.FALL and u'HØST %s' % year not in semesters:
                continue
            elif self.semester.type == Semester.SPRING and u'VÅR %s' % year not in semesters:
                continue

            yield {'code': code,
                   'name': name,
                   'version': int(data['Fakta om emnet']['Versjon'][0]),
                   'points': float(data['Fakta om emnet']['Studiepoeng'][0]),
                   'url': course_url}
Пример #2
0
def fetch_courses(semester, prefix=None):
    courses = fetch.json(BASE + '/course/-')['course']
    for course in courses:
        if not ntnu.valid_course_code(course['code']):
            logging.warning('Skipped invalid course name: %s', course['code'])
            continue

        # TODO: shouldn't reimplement should_proccess_course
        if prefix and not course['code'].startswith(prefix):
            continue

        result = fetch_course(course['code'])
        if not result:
             continue

        if semester.year < result['taughtFromYear']:
            continue

        if result['lastYearTaught'] and semester.year > result['lastYearTaught']:
            continue

        if result['versionCode'] != course['versionCode']:
            continue

        if semester.type == semester.FALL and result['taughtInAutumn']:
            yield result
        elif semester.type == semester.SPRING and result['taughtInSpring']:
            yield result
Пример #3
0
Файл: xml.py Проект: mydos/plan
    def scrape(self):
        prefix = ntnu.prefix(self.semester, template='{year}{letter}')
        url = 'http://www.ntnu.no/eksamen/plan/%s/dato.XML' % prefix

        courses = Course.objects.filter(semester=self.semester)
        courses = {c.code: c for c in courses}

        root = fetch.xml(url)
        if root is None:
            return

        for row in root.xpath('//dato/dato_row'):
            course_code = get(row, 'emnekode')
            course_version = get(row, 'versjonskode')
            status_code = get(row, 'vurdstatuskode')

            if status_code != 'ORD':
                continue
            elif not ntnu.valid_course_code(course_code):
                logging.warning("Invalid course code: %s", course_code)
                continue
            elif course_code not in courses:
                logging.debug("Unknown course %s.", course_code)
                continue
            elif not self.should_proccess_course(code):
                continue

            combination = get(row, 'vurdkombkode')
            duration = get(row, 'varighettimer')
            exam_date = get(row, 'dato_eksamen')
            exam_semester = get(row, 'terminkode_gjelder_i')
            exam_time = get(row, 'klokkeslett_fremmote_tid')
            exam_year = get(row, 'arstall_gjelder_i')
            handin_date = get(row, 'dato_innlevering')
            handin_time = get(row, 'klokkeslett_innlevering')
            handout_date = get(row, 'dato_uttak')
            handout_time = get(row, 'klokkeslett_uttak')
            type_code = get(row, 'vurderingsformkode')
            type_name = get(row, 'vurderingskombinasjon_vurdkombnavn_bokmal')

            if not type_code:
                logging.warning('Missing exam type for %s', course_code)
                continue

            yield {
                'course': courses[course_code],
                'exam_date': utils.parse_date(handin_date or exam_date),
                'exam_time': utils.parse_time(handin_time or exam_time),
                'combination': combination,
                'handout_date': utils.parse_date(handout_date),
                'handout_time': utils.parse_time(handout_time),
                'type': self.exam_type(type_code, type_name),
                'duration': duration
            }
Пример #4
0
    def scrape(self):
        prefix = ntnu.prefix(self.semester, template='{year}{letter}')
        url = 'http://www.ntnu.no/eksamen/plan/%s/dato.XML' % prefix

        courses = Course.objects.filter(semester=self.semester)
        courses = {c.code: c for c in courses}

        root = fetch.xml(url)
        if root is None:
            return

        for row in root.xpath('//dato/dato_row'):
            course_code = get(row, 'emnekode')
            course_version = get(row, 'versjonskode')
            status_code = get(row, 'vurdstatuskode')

            if status_code != 'ORD':
                continue
            elif not ntnu.valid_course_code(course_code):
                logging.warning("Invalid course code: %s", course_code)
                continue
            elif course_code not in courses:
                logging.debug("Unknown course %s.", course_code)
                continue
            elif not self.should_proccess_course(code):
                continue

            combination = get(row, 'vurdkombkode')
            duration = get(row, 'varighettimer')
            exam_date = get(row, 'dato_eksamen')
            exam_semester = get(row, 'terminkode_gjelder_i')
            exam_time = get(row, 'klokkeslett_fremmote_tid')
            exam_year = get(row, 'arstall_gjelder_i')
            handin_date = get(row, 'dato_innlevering')
            handin_time = get(row, 'klokkeslett_innlevering')
            handout_date = get(row, 'dato_uttak')
            handout_time = get(row, 'klokkeslett_uttak')
            type_code = get(row, 'vurderingsformkode')
            type_name = get(row, 'vurderingskombinasjon_vurdkombnavn_bokmal')

            if not type_code:
                logging.warning('Missing exam type for %s', course_code)
                continue

            yield {'course': courses[course_code],
                   'exam_date': utils.parse_date(handin_date or exam_date),
                   'exam_time': utils.parse_time(handin_time or exam_time),
                   'combination': combination,
                   'handout_date': utils.parse_date(handout_date),
                   'handout_time': utils.parse_time(handout_time),
                   'type': self.exam_type(type_code, type_name),
                   'duration': duration}
Пример #5
0
def fetch_courses(semester):
    courses = fetch.json('http://www.ime.ntnu.no/api/course/-')['course']
    for course in courses:
        if not ntnu.valid_course_code(course['code']):
            logging.warning('Skipped invalid course name: %s', course['code'])
            continue

        result = fetch_course(course['code'])
        if not result:
             continue

        if semester.year < result['taughtFromYear']:
            continue

        if result['lastYearTaught'] and semester.year > result['lastYearTaught']:
            continue

        if semester.type == semester.FALL and result['taughtInAutumn']:
            yield result
        elif semester.type == semester.SPRING and result['taughtInSpring']:
            yield result
Пример #6
0
    def scrape(self):
        if self.semester.type == Semester.FALL:
            year = self.semester.year
        else:
            year = self.semester.year - 1

        code_re = re.compile('/studier/emner/([^/]+)/', re.I | re.L)

        url = 'http://www.ntnu.no/web/studier/emnesok'
        query = {
            'p_p_lifecycle': '2',
            'p_p_id': 'courselistportlet_WAR_courselistportlet_INSTANCE_m8nT',
            '_courselistportlet_WAR_courselistportlet_INSTANCE_m8nT_year': year
        }

        courses_root = fetch.html(url, query=query, verbose=True)
        for a in courses_root.cssselect('a[href*="/studier/emner/"]'):
            course_url = a.attrib['href']
            code = code_re.search(course_url).group(1)
            quoted_code = urllib.quote(code.encode('utf-8'))
            name = a.text_content()

            if not ntnu.valid_course_code(code):
                continue
            elif not self.should_proccess_course(code):
                continue

            title = None
            data = {}
            root = fetch.html('http://www.ntnu.no/studier/emner/%s/%s' %
                              (quoted_code, year))

            # Construct dict out of info boxes.
            for box in root.cssselect('.infoBox'):
                for child in box.getchildren():
                    if child.tag == 'h3':
                        title = child.text_content()
                    else:
                        parts = [child.text or '']
                        for br in child.getchildren():
                            parts.append(br.tail or '')
                        for key, value in [
                                p.split(':', 1) for p in parts if ':' in p
                        ]:
                            key = key.strip(u' \n\xa0')
                            value = value.strip(u' \n\xa0')
                            data.setdefault(title,
                                            {}).setdefault(key,
                                                           []).append(value)

            try:
                semesters = data['Undervisning']['Undervises']
            except KeyError:
                continue

            if self.semester.type == Semester.FALL and u'HØST %s' % year not in semesters:
                continue
            elif self.semester.type == Semester.SPRING and u'VÅR %s' % year not in semesters:
                continue

            yield {
                'code': code,
                'name': name,
                'version': int(data['Fakta om emnet']['Versjon'][0]),
                'points': float(data['Fakta om emnet']['Studiepoeng'][0]),
                'url': course_url
            }