/
get_dept_acronyms.py
36 lines (30 loc) · 1016 Bytes
/
get_dept_acronyms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from bs4 import BeautifulSoup
from string import ascii_lowercase
import requests
import platform
import itertools
import json
def is_python_3():
major, _, _ = platform.python_version_tuple()
return major == '3'
assert(is_python_3)
def get_html(url):
return requests.get(url).text
def extract_depts(parsed_html):
try:
rows = parsed_html.find(id="subjects_DataList").find_all("tr")
except AttributeError:
# print("Could not parse rows from %s" % parsed_html)
rows = []
inner_content = [r.find('a').string for r in rows]
return [c.split("-")[0].strip() for c in inner_content]
def get_start_with_letter(l):
url = "http://courses.ucsd.edu/default.aspx?u_letter=%s" % l
html = get_html(url)
return extract_depts(BeautifulSoup(html))
def main():
depts = [get_start_with_letter(letter) for letter in ascii_lowercase]
flattened = list(itertools.chain(*depts))
print(json.dumps({'depts':flattened}))
if __name__ == '__main__':
main()