def extract_seance(element): return { 'url': urljoin(AN_BASE_URL, element.a['href']) if element.a else None, 'date': extract_datetime(element.text.split('séance du ')[1]), 'type': LegislativeActType.DISCUSSION_SEANCE_PUBLIQUE }
def extract_data(self): if not self.elements: return matched_dates = re.findall(r' le (\d+\s?\w* \w+ \d{4})', self.elements[0].text, re.I | re.UNICODE) return [{ 'type': LegislativeActType.PROCEDURE_ACCELEREE, 'date': extract_datetime(matched_dates[0]) if matched_dates else None }]
def extract_data(self): status = None if 'adopté' in self.elements[0].text: status = DecisionStatus.ADOPTE elif 'modifié' in self.elements[0].text: status = DecisionStatus.MODIFIE elif 'rejeté' in self.elements[0].text: status = DecisionStatus.REJETE matched_dates = re.findall(r' le (\d+\s?\w* \w+ \d{4})', self.elements[0].text, re.I | re.UNICODE) return [{ 'type': LegislativeActType.DECISION, 'status': status, 'date': extract_datetime(matched_dates[0]) if matched_dates else None, 'url': urljoin(AN_BASE_URL, self.elements[0].a['href']) if self.elements[0].a else None }]
def extract_date(self): matched_dates = re.findall(r' déposée? le (\d+ \w+ \d{4})', self.elements[0].text, re.UNICODE) return extract_datetime(matched_dates[0]) if matched_dates else None
def test_extract_date(): assert extract_datetime('15 mai 2013 à 14 heures 30') == datetime(2013, 5, 15, 14, 30) assert extract_datetime('lundi 17 juin 2013') == datetime(2013, 6, 17) assert extract_datetime('mercredi 11 septembre 2013') == datetime(2013, 9, 11) assert extract_datetime('24 mars 2015 à 17 heures') == datetime(2015, 3, 24, 17, 0)