def get_sitting_urls(assembly_id, div_id, sessionurl): root = get.webpage(get.htmltree(sessionurl)) js_calls = [parse_js_call(j) for j in root.xpath('.//a/@href')] params = match_name_codes(js_calls, filter='mainsearch2', type='sessions') nsittings = len(params) params['j'] = str(nsittings) urls = [] for i in range(nsittings): params['SES_NUM'] = params['SES_NUM%s' % i] url = '%s&%s' % (sessionurl, urlencode(params)) # TODO: generalize me url = url.replace('con_search2', 'con_search3') urls.append({'session_name': params['SES_NUM'], 'url': url}) return urls
def get_session_urls(assembly_id, div_id, listurl): def searchform(root, num=''): return root.xpath('.//form[@name="searchform%s"]/@action' % num)[0] root = get.webpage(get.htmltree(listurl)) js_calls = [parse_js_call(j) for j in root.xpath('.//a/@href')] params = match_name_codes(js_calls, filter='mainsearch', type='committees') nsessions = len(params)/2 params['i'] = str(nsessions) params['div'] = str(div_id) params['DAE_NUM'] = str(assembly_id) urls = [] for i in range(nsessions): params['COMM_NAME'] = params['COMM_NAME%s' % i] params['COMM_CODE'] = params['COMM_CODE%s' % i] urls.append(\ {'committee': params['COMM_NAME'], 'url': '%s/content/%s?%s' %\ (BASEURL, searchform(root)[:-2], urlencode(params))}) return urls
def get_session_urls(assembly_id, div_id, listurl): def searchform(root, num=''): return root.xpath('.//form[@name="searchform%s"]/@action' % num)[0] root = get.webpage(get.htmltree(listurl)) js_calls = [parse_js_call(j) for j in root.xpath('.//a/@href')] params = match_name_codes(js_calls, filter='mainsearch', type='committees') nsessions = len(params) / 2 params['i'] = str(nsessions) params['div'] = str(div_id) params['DAE_NUM'] = str(assembly_id) urls = [] for i in range(nsessions): params['COMM_NAME'] = params['COMM_NAME%s' % i] params['COMM_CODE'] = params['COMM_CODE%s' % i] urls.append(\ {'committee': params['COMM_NAME'], 'url': '%s/content/%s?%s' %\ (BASEURL, searchform(root)[:-2], urlencode(params))}) return urls
def get_doc_ids(assembly_id, div_id, sittingurl): root = get.webpage(get.htmltree(sittingurl)) js_calls = [parse_js_call(j) for j in root.xpath('.//a/@href')] return [{'sitting_name': c[1][0], 'docid': c[1][1]}\ for c in js_calls if c[0]=='mainsearch4']