Пример #1
0
def get_all_ctecs(subject, caesar_scraper=None):
    logging.debug('Starting %s' % subject)
    caesar_scraper = caesar_scraper or CaesarScraper()
    for i, current_class_title in caesar_scraper.get_courses(subject):
        logging.debug("Starting %s %s %s" % (subject, current_class_title, i))
        for j, quarter in caesar_scraper.get_ctecs(subject, i):
            ctec = caesar_scraper.get_ctec(subject, j)
            if ctec == {}:
                logging.error("Could not download %s %s %s %s %s" % (subject, current_catalog_num, quarter, i, j))
                continue

            current_catalog_num = current_class_title.split(":")[0]
            original_catalog_num = "-".join(ctec['class_title'].split()[0].split("-")[0:2])

            ctec['current_class_title'] = current_class_title

            section = ctec['class_title'].split()[0].split("-")[2]
            subj = ctec['subj'].split()[0]
            courses_query = courses.find({"term": quarter, "catalog_num": original_catalog_num, "subject": subj, "section": section})

            if courses_query.count() > 1:
                courses_query = courses.find({"term": quarter, "catalog_num": original_catalog_num, "subject": subj, "section": section, "instructor.name": {"$regex" : ".*".join(ctec['instructor'].split())}})
                if courses_query.count() > 1:
                    logging.error("%s too many courses found for %s %s %s %s %s" % (j, quarter, original_catalog_num, subj, section, ctec['instructor']))
                elif courses_query.count() == 1:
                    logging.error("%s no courses found for %s %s %s %s %s" % (j, quarter, original_catalog_num, subj, section, ctec['instructor']))
            elif courses_query.count() == 0:
                logging.error("%s no courses found for %s %s %s %s" % (j, quarter, original_catalog_num, subj, section))

            if courses_query.count() == 1:
                course = list(courses_query)[0]
                ctec['_id'] = course['_id']
                ctecs.save(ctec)
                logging.debug("Saved %s %s %s %s %s" % (subject, current_catalog_num, quarter, i, j))

            # BUG, if the course starts with 300, CTEC thinks its part of the graduate school
            # for some subjects such as EECS
            if original_catalog_num[0] == "3" or subj != subject:
                caesar_scraper.post_doc(caesar_scraper.CTEC_URL, data={"ICAction": "NW_CT_PB_SRCH_ACAD_CAREER", "NW_CT_PB_SRCH_ACAD_CAREER": "UGRD", "NW_CT_PB_SRCH_SUBJECT": subject, "NW_CT_PB_SRCH_NW_CTEC_SRCH_CHOIC$4$": "C"})
                caesar_scraper.post_doc(caesar_scraper.CTEC_URL, data={"ICAction": "NW_CT_PB_SRCH_SUBJECT", "NW_CT_PB_SRCH_ACAD_CAREER": "UGRD", "NW_CT_PB_SRCH_SUBJECT": subject, "NW_CT_PB_SRCH_NW_CTEC_SRCH_CHOIC$4$": "C"})
                caesar_scraper.get_courses(subject)
                caesar_scraper.get_ctecs(subject, i)
                # sometimes after getting a single ctec, we need to get the courses AND ctecs again
                # because it routes back to the original search page
            elif subj == "AAL" or subj == "AF_AM_ST":
                caesar_scraper.get_ctecs(subject, i)
                # sometimes after getting a single ctec, we need to get ctecs again
                # because it routes back to the list of courses page

        caesar_scraper.get_courses(subject)
        # after getting all the ctecs for a single course, we need to get_courses again
    logging.debug('Finished %s' % subject)
Пример #2
0
def get_all_ctecs(subject, caesar_scraper=None):
    logging.debug('Starting %s' % subject)
    caesar_scraper = caesar_scraper or CaesarScraper()
    for i, current_class_title in caesar_scraper.get_courses(subject):
        logging.debug("Starting %s %s %s" % (subject, current_class_title, i))
        for j, quarter in caesar_scraper.get_ctecs(subject, i):
            ctec = caesar_scraper.get_ctec(subject, j)
            if ctec == {}:
                logging.error("Could not download %s %s %s %s %s" %
                              (subject, current_catalog_num, quarter, i, j))
                continue

            current_catalog_num = current_class_title.split(":")[0]
            original_catalog_num = "-".join(
                ctec['class_title'].split()[0].split("-")[0:2])

            ctec['current_class_title'] = current_class_title

            section = ctec['class_title'].split()[0].split("-")[2]
            subj = ctec['subj'].split()[0]
            courses_query = courses.find({
                "term": quarter,
                "catalog_num": original_catalog_num,
                "subject": subj,
                "section": section
            })

            if courses_query.count() > 1:
                courses_query = courses.find({
                    "term": quarter,
                    "catalog_num": original_catalog_num,
                    "subject": subj,
                    "section": section,
                    "instructor.name": {
                        "$regex": ".*".join(ctec['instructor'].split())
                    }
                })
                if courses_query.count() > 1:
                    logging.error(
                        "%s too many courses found for %s %s %s %s %s" %
                        (j, quarter, original_catalog_num, subj, section,
                         ctec['instructor']))
                elif courses_query.count() == 1:
                    logging.error("%s no courses found for %s %s %s %s %s" %
                                  (j, quarter, original_catalog_num, subj,
                                   section, ctec['instructor']))
            elif courses_query.count() == 0:
                logging.error(
                    "%s no courses found for %s %s %s %s" %
                    (j, quarter, original_catalog_num, subj, section))

            if courses_query.count() == 1:
                course = list(courses_query)[0]
                ctec['_id'] = course['_id']
                ctecs.save(ctec)
                logging.debug("Saved %s %s %s %s %s" %
                              (subject, current_catalog_num, quarter, i, j))

            # BUG, if the course starts with 300, CTEC thinks its part of the graduate school
            # for some subjects such as EECS
            if original_catalog_num[0] == "3" or subj != subject:
                caesar_scraper.post_doc(
                    caesar_scraper.CTEC_URL,
                    data={
                        "ICAction": "NW_CT_PB_SRCH_ACAD_CAREER",
                        "NW_CT_PB_SRCH_ACAD_CAREER": "UGRD",
                        "NW_CT_PB_SRCH_SUBJECT": subject,
                        "NW_CT_PB_SRCH_NW_CTEC_SRCH_CHOIC$4$": "C"
                    })
                caesar_scraper.post_doc(
                    caesar_scraper.CTEC_URL,
                    data={
                        "ICAction": "NW_CT_PB_SRCH_SUBJECT",
                        "NW_CT_PB_SRCH_ACAD_CAREER": "UGRD",
                        "NW_CT_PB_SRCH_SUBJECT": subject,
                        "NW_CT_PB_SRCH_NW_CTEC_SRCH_CHOIC$4$": "C"
                    })
                caesar_scraper.get_courses(subject)
                caesar_scraper.get_ctecs(subject, i)
                # sometimes after getting a single ctec, we need to get the courses AND ctecs again
                # because it routes back to the original search page
            elif subj == "AAL" or subj == "AF_AM_ST":
                caesar_scraper.get_ctecs(subject, i)
                # sometimes after getting a single ctec, we need to get ctecs again
                # because it routes back to the list of courses page

        caesar_scraper.get_courses(subject)
        # after getting all the ctecs for a single course, we need to get_courses again
    logging.debug('Finished %s' % subject)
Пример #3
0
from models import courses
import csv

course_fieldnames = [
    "id", "term", "year", "quarter", "course_id", "class_num", "school",
    "subject", "catalog_num", "section", "title", "instructor", "start_time",
    "end_time", "meeting_days"
]
fieldnames = course_fieldnames

with open("courses.csv", "w") as f:
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()
    for course in courses.find({"term": "2015 Spring"}):
        course = dict(course)
        course['id'] = course.pop('_id')
        course['year'] = course['term'].split()[0]
        course['quarter'] = course['term'].split()[1]
        course['instructor'] = course['instructor']['name']
        writer.writerow(
            {k: v
             for k, v in course.iteritems() if k in fieldnames})
Пример #4
0
from models import courses, ctecs

for course in courses.find({
        "school": "WCAS",
        "subject": "ECON",
        "catalog_num": "281-0",
        "instructor.name": {
            "$regex": ".*Walker.*"
        }
}):

    print course['term'], course['instructor']['name']
    ctec = ctecs.find_one({"_id": course['_id']})
    if ctec:
        for statement in ctec['essay'].split("/"):
            print statement
    print "-------------------------"
Пример #5
0
from models import courses, ctecs

for course in courses.find({
		"school": "WCAS",
		"subject": "ECON",
		"catalog_num": "281-0",
		"instructor.name": {"$regex" : ".*Walker.*"}
}):

	print course['term'], course['instructor']['name']
	ctec = ctecs.find_one({"_id": course['_id']})
	if ctec:
		for statement in ctec['essay'].split("/"):
			print statement
	print "-------------------------"