Exemplo n.º 1
0
def scrape():
    clean()

    # Gets the main page of all classes
    url = "https://registrar.princeton.edu/course-offerings/search_results.xml?term={}".format(
        TERM)

    html = urllib2.urlopen(url).read()
    soup = BeautifulSoup(html)

    # Iterates through all courses
    rows = soup.find_all('tr')[1:]
    for row in rows:
        try:
            fields = row.find_all('td')

            if (fields[10].get_text().strip() == "Cancelled"):
                continue

            courseNumber = fields[1].a['href'][28:34]
            section = fields[4].get_text().strip()
            enroll = int(fields[8].get_text().strip())
            closed = fields[10].get_text().strip(
            ) == "Closed"  # or Closed Closed?

            entry, created = Entry.objects.get_or_create(
                courseNumber=courseNumber, section=section)
            if created or enroll != entry.totalEnroll or closed != entry.totalClosed:
                entry.totalEnroll = enroll
                entry.totalClosed = closed
                entry.save()

                course, created = Course.objects.get_or_create(
                    number=courseNumber)
                course.code = ' / '.join([
                    code.strip().replace("  ", " ")
                    for code in fields[1].text.split('\n \n')
                ])
                course.title = fields[2].text.strip()
                course.save()
                updateCourse(course)
        except Exception:
            pass

    list = CoursesList.objects.all()[0]
    list.cache()
Exemplo n.º 2
0
import sys,os
sys.path.insert(0,os.path.abspath("/srv/tigerapps"))
import settings
from django.core.management import setup_environ
setup_environ(settings)

from pounce.models import Course, Class, Subscription, Entry, CoursesList
import pounce.log as log
from update import updateCourse, clean


log.log('Running updateall.py')

clean()

for course in Course.objects.all():
	updateCourse(course)

list = CoursesList.objects.all()[0]
list.cache()
Exemplo n.º 3
0
import sys, os
sys.path.insert(0, os.path.abspath("/srv/tigerapps"))
import settings
from django.core.management import setup_environ
setup_environ(settings)

from pounce.models import Course, Class, Subscription, Entry, CoursesList
import pounce.log as log
from update import updateCourse, clean

log.log('Running updateall.py')

clean()

for course in Course.objects.all():
    try:
        updateCourse(course)
    except Exception:
        print "Error updating course %s" % course.title

list = CoursesList.objects.all()[0]
list.cache()