def parse_department_page(url, q):

    html = get_quarter(url, q.quarter + " " + str(q.year))
    soup = BeautifulSoup(html, "lxml")
    classes = []


    rows =soup(class_="resultrow")

    for row in rows:
        classes.append([
            row.find(class_="name").a.next_sibling.strip(),
            row.find(class_="two").string.strip()
        ])





    for cl in classes:
        if not Course.objects.filter(name=cl[1], department=(cl[0].split(" "))[0],
                   code=(((cl[0].split(" "))[1]).split("/"))[0]):
            c = Course(name=cl[1], department=(cl[0].split(" "))[0],
                       code=(((cl[0].split(" "))[1]).split("/"))[0]  )
            c.save()


            c.quarter_set.add(q)

        else:
            c = Course.objects.get(name=cl[1], department=(cl[0].split(" "))[0],
                   code=(((cl[0].split(" "))[1]).split("/"))[0])

            c.quarter_set.add(q)
def scrapecrosslistings(course):
    lastqoffered = course.quarter_set.extra(order_by = ["-index"])[0]
    html = get_quarter("https://classes.uchicago.edu/courseDetail.php?courseName=" + str(course), str(lastqoffered))
    soup = BeautifulSoup(html, "lxml")
    try:
        crossp = soup.find(id="tabs-1").find("p")
    except:
        crossp = ""

    if crossp:
        for l in crossp.find_all("a"):
            coursename = l.string.split(" ")

            try:
                lastcourseversion = Course.objects.filter(department=coursename[0], code=coursename[1])[0]
            except:
                c = Course(department=coursename[0], code=coursename[1], name=course.name)
                c.save()
                c.quarter_set.add(lastqoffered)
                print "added " + str(c)
                lastcourseversion = c
            course.cross_listings.add(lastcourseversion)


    else:
        return None
示例#3
0
def parse_department_page(url, q):
    response = urllib2.urlopen(url)
    html = response.read()
    soup = BeautifulSoup(html, "lxml")
    classes = []

    rows =soup(class_="resultrow")

    for row in rows:
        classes.append([
            row.find(class_="name").a.next_sibling.strip(),
            row.find(class_="two").string.strip()
        ])

    for cl in classes:
        if not Course.objects.filter(name=cl[1], department=(cl[0].split(" "))[0],
                   code=(((cl[0].split(" "))[1]).split("/"))[0]):
            c = Course(name=cl[1], department=(cl[0].split(" "))[0],
                       code=(((cl[0].split(" "))[1]).split("/"))[0]  )
            c.save()
            q.courses.add(c)