def process(API_DIRECTORY, url): disable_cache = '--enable-cache' not in sys.argv only_promulgated = '--only-promulgated' in sys.argv verbose = '--quiet' not in sys.argv if not disable_cache: enable_requests_cache() with log_print(io.StringIO()) as log: try: if verbose: print('======') print(url) dos, an_dos, senat_dos = download_merged_dos(url, log=log, verbose=verbose) if not dos: return if verbose: print(' title:', dos.get('long_title')) find_anomalies([dos], verbose=verbose) if not dos.get('url_jo') and only_promulgated: if verbose: print(' ----- passed: no JO link') return if not verbose: print() print('======') print(url) debug_file(an_dos, 'debug_an_dos.json') debug_file(senat_dos, 'debug_senat_dos.json') debug_file(dos, 'debug_dos.json') # download the groupes in case they are not there yet download_groupes(API_DIRECTORY) # Add potential common name from Legifrance's "Lois dites" common_laws = download_lois_dites(API_DIRECTORY) if dos.get('legifrance_cidTexte') in common_laws and common_laws[ dos['legifrance_cidTexte']].lower( ) not in dos['short_title'].lower(): dos['loi_dite'] = common_laws[dos['legifrance_cidTexte']] print(' [] parse the texts') dos_with_texts = parse_doslegs_texts.process(dos) print(' [] format data for the frontend') format_data_for_frontend.process(dos_with_texts, API_DIRECTORY, log=log) except KeyboardInterrupt as e: raise e except Exception as e: # dump log for each failed doslegs in logs/ dump_error_log(url, e, API_DIRECTORY, log) raise e
def process(API_DIRECTORY, url): only_promulgated = '--only-promulgated' in sys.argv quiet = '--quiet' in sys.argv if '--enable-cache' in sys.argv: enable_requests_cache() dos = None with log_print(only_log=quiet) as log: try: print('======') print(url) # download the AN open data or just retrieve the last stored version opendata_an = download_AN_opendata(API_DIRECTORY) dos, an_dos, senat_dos = download_merged_dos(url, opendata_an, log=log) if not dos: raise Exception('Nothing found at %s' % url) find_anomalies([dos]) if not dos.get('url_jo') and only_promulgated: print(' ----- passed: no JO link') return print(' title:', dos.get('long_title')) debug_file(dos, 'dos.json') # download the groupes in case they are not there yet download_groupes(API_DIRECTORY) # Add potential common name from Legifrance's "Lois dites" common_laws = download_lois_dites(API_DIRECTORY) if dos.get('legifrance_cidTexte') in common_laws and common_laws[ dos['legifrance_cidTexte']].lower( ) not in dos['short_title'].lower(): dos['loi_dite'] = common_laws[dos['legifrance_cidTexte']] print(' [] parse the texts') dos_with_texts = parse_doslegs_texts.process(dos) print(' [] format data for the frontend') format_data_for_frontend.process(dos_with_texts, API_DIRECTORY, log=log) return dos except KeyboardInterrupt as e: # bypass the error log dump when doing Ctrl-C raise e except Exception as e: print(*traceback.format_tb(e.__traceback__), e, sep='', file=log) # dump log for each failed doslegs in logs/ logdir = 'logs' if dos and not dos.get('url_jo'): logdir = 'logs-encours' dump_error_log(url, e, API_DIRECTORY, logdir, log)
def test_dosleg_regressions(): enable_requests_cache() BASE_DIR = os.path.dirname(os.path.abspath(__file__)) DIR = join(BASE_DIR, 'resources/verified_dosleg/') for test_dir in os.listdir(DIR): path = join(DIR, test_dir) print('## try', path) data = parse(open(join(path, 'input.html')).read()) output = json.load(open(join(path, 'output.json'))) data_json = json.dumps(data, ensure_ascii=False, indent=2, sort_keys=True) output_json = json.dumps(output, ensure_ascii=False, indent=2, sort_keys=True) if data_json != output_json: diff = difflib.unified_diff(data_json.split('\n'), output_json.split('\n')) for line in diff: print(line) assert data_json == output_json if os.path.exists(join(path, 'lawfactory.json')): proc = json.load(open(join(path, 'lawfactory.json'))) score_ok, score_nok = compare(proc, output) last_score_ok, last_score_nok = [ int(x) for x in open(join( path, 'lawfactory_scores')).read().split('\n') if x ] assert score_ok == last_score_ok assert score_nok == last_score_nok if os.path.exists(join(path, 'anpy.json')): anpy = json.load(open(join(path, 'anpy.json'))) assert compare_anpy(anpy, output) == open(join(path, 'anpy_scores')).read() if os.path.exists(join(path, 'legipy.json')): legipy = json.load(open(join(path, 'legipy.json'))) score_ok, score_nok = compare_legipy(legipy, output) last_score_ok, last_score_nok = [ int(x) for x in open(join(path, 'legipy_scores')).read().split('\n') if x ] assert score_ok == last_score_ok assert score_nok == last_score_nok
def test_dosleg_smoketest(): enable_requests_cache() BASE_DIR = os.path.dirname(os.path.abspath(__file__)) DIR = os.path.join(BASE_DIR, 'resources/recents_dosleg/') files = os.listdir(DIR) for file in files: path = os.path.join(DIR, file) print('## try', path) parse(open(path).read())
def process(API_DIRECTORY, url): only_promulgated = '--only-promulgated' in sys.argv quiet = '--quiet' in sys.argv if '--enable-cache' in sys.argv: enable_requests_cache() dos = None with log_print(only_log=quiet) as log: try: print('======') print(url) # download the AN open data or just retrieve the last stored version opendata_an = download_AN_opendata(API_DIRECTORY) dos, an_dos, senat_dos = download_merged_dos(url, opendata_an, log=log) if not dos: raise Exception('Nothing found at %s' % url) find_anomalies([dos]) if not dos.get('url_jo') and only_promulgated: print(' ----- passed: no JO link') return print(' title:', dos.get('long_title')) debug_file(dos, 'dos.json') # download the groupes in case they are not there yet download_groupes(API_DIRECTORY) # Add potential common name from Legifrance's "Lois dites" common_laws = download_lois_dites(API_DIRECTORY) if dos.get('legifrance_cidTexte') in common_laws and common_laws[dos['legifrance_cidTexte']].lower() not in dos['short_title'].lower(): dos['loi_dite'] = common_laws[dos['legifrance_cidTexte']] print(' [] parse the texts') dos_with_texts = parse_doslegs_texts.process(dos) print(' [] format data for the frontend') format_data_for_frontend.process(dos_with_texts, API_DIRECTORY, log=log) return dos except KeyboardInterrupt as e: # bypass the error log dump when doing Ctrl-C raise e except Exception as e: print(*traceback.format_tb(e.__traceback__), e, sep='', file=log) # dump log for each failed doslegs in logs/ logdir = 'logs' if dos and not dos.get('url_jo'): logdir = 'logs-encours' dump_error_log(url, e, API_DIRECTORY, logdir, log)
def test_dosleg_regressions(): enable_requests_cache() BASE_DIR = os.path.dirname(os.path.abspath(__file__)) DIR = join(BASE_DIR, 'resources/verified_dosleg/') for test_dir in os.listdir(DIR): path = join(DIR, test_dir) print('## try', path) data = parse(open(join(path, 'input.html')).read()) output = json.load(open(join(path, 'output.json'))) data_json = json.dumps(data, ensure_ascii=False, indent=2, sort_keys=True) output_json = json.dumps(output, ensure_ascii=False, indent=2, sort_keys=True) if data_json != output_json: diff = difflib.unified_diff(data_json.split('\n'), output_json.split('\n')) for line in diff: print(line) assert data_json == output_json if os.path.exists(join(path, 'lawfactory.json')): proc = json.load(open(join(path, 'lawfactory.json'))) score_ok, score_nok = compare(proc, output) last_score_ok, last_score_nok = [int(x) for x in open(join(path, 'lawfactory_scores')).read().split('\n') if x] assert score_ok == last_score_ok assert score_nok == last_score_nok if os.path.exists(join(path, 'anpy.json')): anpy = json.load(open(join(path, 'anpy.json'))) assert compare_anpy(anpy, output) == open(join(path, 'anpy_scores')).read() if os.path.exists(join(path, 'legipy.json')): legipy = json.load(open(join(path, 'legipy.json'))) score_ok, score_nok = compare_legipy(legipy, output) last_score_ok, last_score_nok = [int(x) for x in open(join(path, 'legipy_scores')).read().split('\n') if x] assert score_ok == last_score_ok assert score_nok == last_score_nok
print("ERROR: could not find visa in decision CC", url, file=sys.stderr) return None decision_txt = decision_src.split('<a name=\'visa\' id="visa"></a>')[1] if not re_delibere.search(decision_txt): print("ERROR: could not find siège in décision CC", url, file=sys.stderr) return None decision_txt = clean_delib(decision_txt) return strip_text(decision_txt) def get_decision_length(url): decision_txt = extract_full_decision(url) if not decision_txt: return -1 return len(decision_txt) if __name__ == "__main__": enable_requests_cache() if len(sys.argv) == 2: with open(sys.argv[1]) as f: for url in f.readlines(): url = url.strip() print(url, ':', get_decision_length(url)) else: print(extract_full_decision(sys.argv[1]))
re_delibere = re.compile(r"<p>\s*(Jug|Délibér)é par le Conseil constitutionnel .*$", re.S) clean_delib = lambda x: re_delibere.sub("", x) def extract_full_decision(url): decision_src = download(url).text if '<a name=\'visa\' id="visa"></a>' not in decision_src: print("ERROR: could not find visa in decision CC", url, file=sys.stderr) return None decision_txt = decision_src.split('<a name=\'visa\' id="visa"></a>')[1] if not re_delibere.search(decision_txt): print("ERROR: could not find siège in décision CC", url, file=sys.stderr) return None decision_txt = clean_delib(decision_txt) return strip_text(decision_txt) def get_decision_length(url): decision_txt = extract_full_decision(url) if not decision_txt: return -1 return len(decision_txt) if __name__ == "__main__": enable_requests_cache() if len(sys.argv) == 2: with open(sys.argv[1]) as f: for url in f.readlines(): url = url.strip() print(url, ':', get_decision_length(url)) else: print(extract_full_decision(sys.argv[1]))