示例#1
0
def download_catalog_year(college_pages, auth):
    # Create target URL
    parameters = copy.copy(URL_PARAMS)
    url = SOURCE_URL + '?' + urllib.parse.urlencode(parameters)
    parameters['changeMajor'] = 'Next'
    parameters['call'] = '5'

    for college in COLLEGES:
        parameters['college'] = college
        print('Downloading College', college)
        for idx, major in enumerate(college_pages[college]):
            print('    Getting Degree', major['name'])
            parameters['major'] = major['name']

            url = SOURCE_URL + '?' + urllib.parse.urlencode(parameters)
            export_page = auth.get(url).content
            soup = bs4.BeautifulSoup(export_page)
            # print(export_page)
            data = soup.select('option')
            # print(data)
            if len(data) > 0:
                college_pages[college][idx]['year'] = _.map_(
                    soup.select('option'), lambda x: _.strip_tags(x))
            else:
                # print(soup.find('body table tbody'))
                decoded = export_page.decode()
                college_pages[college][idx]['MajorFile'] = decoded.split(
                    'name=MajorFile value=')[1].split('>')[0]

                year = _.js_match(
                    _.js_match(
                        export_page,
                        '/[<input type=hidden name=year year="](\d{4})/'),
                    '/\d{4}/')
                college_pages[college][idx]['year'] = year

            # if idx>5:
            #   return college_pages
            # print(college_pages[college][idx])

    return college_pages
示例#2
0
def download_catalog_year(college_pages, auth):
    # Create target URL
    parameters = copy.copy(URL_PARAMS)
    url = SOURCE_URL + '?' + urllib.parse.urlencode(parameters)
    parameters['changeMajor']='Next'
    parameters['call']='5'

    for college in COLLEGES:
      parameters['college']=college
      print('Downloading College',college)
      for idx,major in enumerate(college_pages[college]):
        print('    Getting Degree', major['name'])
        parameters['major']=major['name']

        url = SOURCE_URL + '?' + urllib.parse.urlencode(parameters)
        export_page=auth.get(url).content
        soup = bs4.BeautifulSoup(export_page)
        # print(export_page)
        data=soup.select('option')
        # print(data)
        if len(data) > 0:
          college_pages[college][idx]['year']=_.map_(soup.select('option'),lambda x: _.strip_tags(x))
        else:
          # print(soup.find('body table tbody'))
          decoded = export_page.decode()
          college_pages[college][idx]['MajorFile']=decoded.split('name=MajorFile value=')[1].split('>')[0]

          year=_.js_match(_.js_match(export_page,'/[<input type=hidden name=year year="](\d{4})/'),'/\d{4}/')
          college_pages[college][idx]['year']=year

        # if idx>5:
        #   return college_pages
        # print(college_pages[college][idx])


    return college_pages
示例#3
0
def test_js_match(case, expected):
    assert _.js_match(*case) == expected