Exemplos de year_from_session em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: fiftystates.scrape.ore.utils

Método / Função: year_from_session

Exemplos em hotexamples.com: 2

year_from_session em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de fiftystates.scrape.ore.utils.year_from_session em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Relacionados

genesis_seqs

LayoutSite

start

run

is_valid_ipv4

Ui_MeshToolsPlugin

getLog

NorthAmericanPlantDistributionMap

get_mod

msg

Related in langs

Proyecto (PHP)

CroutonThrift\NamedCounter (PHP)

SystemUserDomainContext (C#)

JsonSchemaResolver (C#)

PointXYZ (C++)

BLE_HCI_DATA_HANDLE (C++)

Ms2Ss (Go)

Repository (Go)

WiyReportConfiguration (Java)

StringUtility (Java)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: legislators.py Projeto: jsoma/openstates

def scrape(self, chamber, session): if year_from_session(session) != 2010: raise NoDataForPeriod(session) if chamber == 'upper': url_piece = 'senate' url_piece2 = 'senator' else: url_piece = 'house' url_piece2 = 'representative' chamber_url = chambers_url(url_piece) leg_reader = csv.reader(urllib.urlopen(chamber_url)) with self.urlopen(legs_url(url_piece2)) as leg_page_html: leg_page = lxml.html.fromstring(leg_page_html) font_elements = leg_page.cssselect('font') names = {} for fe in font_elements: name_elements = fe.cssselect('a') for ne in name_elements: if 'Senator' in ne.text_content() or 'Representative' in ne.text_content(): break name_and_party_list = ne.text_content().split('-') names[name_and_party_list[0]] = name_and_party_list[1] district_matches = re.findall("District: ([0-9]+)", leg_page.text_content()) # Title,First Name,Last Name,Capitol Address,Capitol Phone,District Address,District Phone,Session Email for row, district, name_and_party in zip(leg_reader, district_matches, names.iteritems()): leg = Legislator(session, chamber, district, name_and_party[0], row[1], row[2], "", name_and_party[1], \ capitol_address=row[3], capitol_phone=row[4], district_adress=row[5], \ district_phone=row[6], session_email=row[7]) self.save_legislator(leg)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: bills.py Projeto: acmewebservices/openstates

def scrape(self, chamber, session): bills_link = bills_url() bills_sessions_pages = [] with self.urlopen(bills_link) as bills_page_html: bills_page = lxml.html.fromstring(bills_page_html) for element, attribute, link, pos in bills_page.iterlinks(): match = re.search("..(/measures[0-9]{2}s?.html)", link) if match != None: bills_sessions_pages.append(base_url() + match.group(1)) year = year_from_session(session) shortened_year = int(year) % 100 if shortened_year == 00: return pages_for_year = [] for bsp in bills_sessions_pages: if str(shortened_year) in bsp: pages_for_year.append(bsp) measure_pages = [] bill_pages_directory = [] for pfy in pages_for_year: with self.urlopen(pfy) as year_bills_page_html: year_bills_page = lxml.html.fromstring(year_bills_page_html) for element, attribute, link, pos in year_bills_page.iterlinks(): if chamber == 'upper': link_part = 'senmh' else: link_part = 'hsemh' regex = "([0-9]{2}(reg|ss[0-9]))/pubs/" + link_part + ".(html|txt)" match = re.search(regex, link) if match != None: measure_pages.append(base_url() + match.group(0)) bill_pages_directory.append(base_url() + match.group(1) + "/measures/main.html") bill_pages = [] for bp in bill_pages_directory: with self.urlopen(bp) as bills_page_html: bills_page = lxml.html.fromstring(bills_page_html) for element, attribute, link, pos in bills_page.iterlinks(): if re.search(' +.html +', link)!= None: continue base_link = bp.rstrip('main.html') if chamber == 'upper': if link[0] == 's': bill_pages.append(base_link + link.translate(None, '\n')) else: if link[0] == 'h': bill_pages.append(base_link + link.translate(None, '\n')) # Remove unnecesary link bill_pages.pop(0) bills_dict = {} for bp in bill_pages: with self.urlopen(bp) as bills_page_html: bills_page = lxml.html.fromstring(bills_page_html) bills = bills_page.cssselect('a') for b in bills: bill_description = b.text_content() title, sep, version = bill_description.partition('-') splitted_title = title.split() bill_number = splitted_title[-1] splitted_title.pop(-1) initials = '' for t in splitted_title: initials += t[0] key = initials + ' ' + bill_number.lstrip('0') link = b.iterlinks().next()[2] try: bills_dict[key] except KeyError: bills_dict[key] = [] bills_dict[key].append((version, base_link + link)) if chamber == 'upper': markers = ('SB', 'SR', 'SJR', 'SJM', 'SCR', 'SM') else: markers = ('HB', 'HR', 'HJR', 'HJM', 'HCR', 'JM') bill_info = {} for mp in measure_pages: with self.urlopen(mp) as measure_page_html: measure_page = lxml.html.fromstring(measure_page_html) measures = measure_page.text_content() lines = measures.split('\n') raw_date = '' action_party = '' key = '' text = '' actions = [] first_bill = True for line in lines: date_match = re.search('([0-9]{1,2}-[0-9]{1,2})(\((S|H)\))? ', line) marker_in_line = False for marker in markers: if marker in line[0:2]: marker_in_line = True break if marker_in_line: if not first_bill: value = bill_info[key] date = dt.datetime.strptime(raw_date + '-' + year, '%m-%d-%Y') actions.append((date, action_party, self.clean_space(text))) value.append(actions) actions = [] else: first_bill = False new_bill = True regex = marker + ' +[0-9]{1,4}' key_match = re.search(regex, line) if key_match == None: print line print regex key = self.clean_space(key_match.group(0)) text = line.split(key)[1] elif date_match != None: if new_bill: bill_info[key] = [self.clean_space(text)] print self.clean_space(text) print new_bill = False else: date = dt.datetime.strptime(raw_date + '-' + year, '%m-%d-%Y') actions.append((date, action_party, self.clean_space(text))) raw_date = date_match.group(1) action_party = date_match.group(2) text = line.split(date_match.group(0))[1] elif line.isspace(): continue elif '---' in line: continue else: text = text + ' ' + line