Python enable_requests_cache示例，lawfactory_utils.urls.enable_requests_cache Python示例

示例#1

0

显示文件

文件： parse_one.py 项目： bcantagrel/the-law-factory-parser

def process(API_DIRECTORY, url):
    disable_cache = '--enable-cache' not in sys.argv
    only_promulgated = '--only-promulgated' in sys.argv
    verbose = '--quiet' not in sys.argv
    if not disable_cache:
        enable_requests_cache()
    with log_print(io.StringIO()) as log:
        try:
            if verbose:
                print('======')
                print(url)

            dos, an_dos, senat_dos = download_merged_dos(url,
                                                         log=log,
                                                         verbose=verbose)
            if not dos:
                return

            if verbose:
                print('        title:', dos.get('long_title'))
            find_anomalies([dos], verbose=verbose)

            if not dos.get('url_jo') and only_promulgated:
                if verbose:
                    print('    ----- passed: no JO link')
                return

            if not verbose:
                print()
                print('======')
                print(url)

            debug_file(an_dos, 'debug_an_dos.json')
            debug_file(senat_dos, 'debug_senat_dos.json')
            debug_file(dos, 'debug_dos.json')

            # download the groupes in case they are not there yet
            download_groupes(API_DIRECTORY)

            # Add potential common name from Legifrance's "Lois dites"
            common_laws = download_lois_dites(API_DIRECTORY)
            if dos.get('legifrance_cidTexte') in common_laws and common_laws[
                    dos['legifrance_cidTexte']].lower(
                    ) not in dos['short_title'].lower():
                dos['loi_dite'] = common_laws[dos['legifrance_cidTexte']]

            print('  [] parse the texts')
            dos_with_texts = parse_doslegs_texts.process(dos)

            print('  [] format data for the frontend')
            format_data_for_frontend.process(dos_with_texts,
                                             API_DIRECTORY,
                                             log=log)
        except KeyboardInterrupt as e:
            raise e
        except Exception as e:
            # dump log for each failed doslegs in logs/
            dump_error_log(url, e, API_DIRECTORY, log)
            raise e

示例#2

0

显示文件

文件： parse_one.py 项目： olivierlacan/the-law-factory-parser

def process(API_DIRECTORY, url):
    only_promulgated = '--only-promulgated' in sys.argv
    quiet = '--quiet' in sys.argv
    if '--enable-cache' in sys.argv:
        enable_requests_cache()

    dos = None
    with log_print(only_log=quiet) as log:
        try:
            print('======')
            print(url)

            # download the AN open data or just retrieve the last stored version
            opendata_an = download_AN_opendata(API_DIRECTORY)

            dos, an_dos, senat_dos = download_merged_dos(url,
                                                         opendata_an,
                                                         log=log)
            if not dos:
                raise Exception('Nothing found at %s' % url)

            find_anomalies([dos])

            if not dos.get('url_jo') and only_promulgated:
                print('    ----- passed: no JO link')
                return

            print('        title:', dos.get('long_title'))

            debug_file(dos, 'dos.json')

            # download the groupes in case they are not there yet
            download_groupes(API_DIRECTORY)

            # Add potential common name from Legifrance's "Lois dites"
            common_laws = download_lois_dites(API_DIRECTORY)
            if dos.get('legifrance_cidTexte') in common_laws and common_laws[
                    dos['legifrance_cidTexte']].lower(
                    ) not in dos['short_title'].lower():
                dos['loi_dite'] = common_laws[dos['legifrance_cidTexte']]

            print('  [] parse the texts')
            dos_with_texts = parse_doslegs_texts.process(dos)

            print('  [] format data for the frontend')
            format_data_for_frontend.process(dos_with_texts,
                                             API_DIRECTORY,
                                             log=log)
            return dos
        except KeyboardInterrupt as e:  # bypass the error log dump when doing Ctrl-C
            raise e
        except Exception as e:
            print(*traceback.format_tb(e.__traceback__), e, sep='', file=log)
            # dump log for each failed doslegs in logs/
            logdir = 'logs'
            if dos and not dos.get('url_jo'):
                logdir = 'logs-encours'
            dump_error_log(url, e, API_DIRECTORY, logdir, log)

示例#3

0

显示文件

文件： test_dosleg_regressions.py 项目： regardscitoyens/senapy

def test_dosleg_regressions():
    enable_requests_cache()

    BASE_DIR = os.path.dirname(os.path.abspath(__file__))
    DIR = join(BASE_DIR, 'resources/verified_dosleg/')

    for test_dir in os.listdir(DIR):
        path = join(DIR, test_dir)
        print('## try', path)

        data = parse(open(join(path, 'input.html')).read())
        output = json.load(open(join(path, 'output.json')))

        data_json = json.dumps(data,
                               ensure_ascii=False,
                               indent=2,
                               sort_keys=True)
        output_json = json.dumps(output,
                                 ensure_ascii=False,
                                 indent=2,
                                 sort_keys=True)

        if data_json != output_json:
            diff = difflib.unified_diff(data_json.split('\n'),
                                        output_json.split('\n'))
            for line in diff:
                print(line)
        assert data_json == output_json

        if os.path.exists(join(path, 'lawfactory.json')):
            proc = json.load(open(join(path, 'lawfactory.json')))
            score_ok, score_nok = compare(proc, output)
            last_score_ok, last_score_nok = [
                int(x) for x in open(join(
                    path, 'lawfactory_scores')).read().split('\n') if x
            ]
            assert score_ok == last_score_ok
            assert score_nok == last_score_nok

        if os.path.exists(join(path, 'anpy.json')):
            anpy = json.load(open(join(path, 'anpy.json')))
            assert compare_anpy(anpy,
                                output) == open(join(path,
                                                     'anpy_scores')).read()

        if os.path.exists(join(path, 'legipy.json')):
            legipy = json.load(open(join(path, 'legipy.json')))
            score_ok, score_nok = compare_legipy(legipy, output)
            last_score_ok, last_score_nok = [
                int(x)
                for x in open(join(path, 'legipy_scores')).read().split('\n')
                if x
            ]
            assert score_ok == last_score_ok
            assert score_nok == last_score_nok

示例#4

0

显示文件

def test_dosleg_smoketest():
    enable_requests_cache()

    BASE_DIR = os.path.dirname(os.path.abspath(__file__))
    DIR = os.path.join(BASE_DIR, 'resources/recents_dosleg/')

    files = os.listdir(DIR)

    for file in files:
        path = os.path.join(DIR, file)
        print('## try', path)
        parse(open(path).read())

示例#5

0

显示文件

文件： parse_one.py 项目： regardscitoyens/the-law-factory-parser

def process(API_DIRECTORY, url):
    only_promulgated = '--only-promulgated' in sys.argv
    quiet = '--quiet' in sys.argv
    if '--enable-cache' in sys.argv:
        enable_requests_cache()

    dos = None
    with log_print(only_log=quiet) as log:
        try:
            print('======')
            print(url)

            # download the AN open data or just retrieve the last stored version
            opendata_an = download_AN_opendata(API_DIRECTORY)

            dos, an_dos, senat_dos = download_merged_dos(url, opendata_an, log=log)
            if not dos:
                raise Exception('Nothing found at %s' % url)

            find_anomalies([dos])

            if not dos.get('url_jo') and only_promulgated:
                print('    ----- passed: no JO link')
                return

            print('        title:', dos.get('long_title'))

            debug_file(dos, 'dos.json')

            # download the groupes in case they are not there yet
            download_groupes(API_DIRECTORY)

            # Add potential common name from Legifrance's "Lois dites"
            common_laws = download_lois_dites(API_DIRECTORY)
            if dos.get('legifrance_cidTexte') in common_laws and common_laws[dos['legifrance_cidTexte']].lower() not in dos['short_title'].lower():
                dos['loi_dite'] = common_laws[dos['legifrance_cidTexte']]

            print('  [] parse the texts')
            dos_with_texts = parse_doslegs_texts.process(dos)

            print('  [] format data for the frontend')
            format_data_for_frontend.process(dos_with_texts, API_DIRECTORY, log=log)
            return dos
        except KeyboardInterrupt as e:  # bypass the error log dump when doing Ctrl-C
            raise e
        except Exception as e:
            print(*traceback.format_tb(e.__traceback__), e, sep='', file=log)
            # dump log for each failed doslegs in logs/
            logdir = 'logs'
            if dos and not dos.get('url_jo'):
                logdir = 'logs-encours'
            dump_error_log(url, e, API_DIRECTORY, logdir, log)

示例#6

0

显示文件

文件： test_dosleg_regressions.py 项目： regardscitoyens/senapy

def test_dosleg_regressions():
    enable_requests_cache()

    BASE_DIR = os.path.dirname(os.path.abspath(__file__))
    DIR = join(BASE_DIR, 'resources/verified_dosleg/')

    for test_dir in os.listdir(DIR):
        path = join(DIR, test_dir)
        print('## try', path)

        data = parse(open(join(path, 'input.html')).read())
        output = json.load(open(join(path, 'output.json')))

        data_json = json.dumps(data, ensure_ascii=False, indent=2, sort_keys=True)
        output_json = json.dumps(output, ensure_ascii=False, indent=2, sort_keys=True)

        if data_json != output_json:
            diff = difflib.unified_diff(data_json.split('\n'), output_json.split('\n'))
            for line in diff:
                print(line)
        assert data_json == output_json

        if os.path.exists(join(path, 'lawfactory.json')):
            proc = json.load(open(join(path, 'lawfactory.json')))
            score_ok, score_nok = compare(proc, output)
            last_score_ok, last_score_nok = [int(x) for x in open(join(path, 'lawfactory_scores')).read().split('\n') if x]
            assert score_ok == last_score_ok
            assert score_nok == last_score_nok

        if os.path.exists(join(path, 'anpy.json')):
            anpy = json.load(open(join(path, 'anpy.json')))
            assert compare_anpy(anpy, output) == open(join(path, 'anpy_scores')).read()

        if os.path.exists(join(path, 'legipy.json')):
            legipy = json.load(open(join(path, 'legipy.json')))
            score_ok, score_nok = compare_legipy(legipy, output)
            last_score_ok, last_score_nok = [int(x) for x in open(join(path, 'legipy_scores')).read().split('\n') if x]
            assert score_ok == last_score_ok
            assert score_nok == last_score_nok

示例#7

0

显示文件

文件： process_conscons.py 项目： bcantagrel/the-law-factory-parser

        print("ERROR: could not find visa in decision CC",
              url,
              file=sys.stderr)
        return None
    decision_txt = decision_src.split('<a name=\'visa\' id="visa"></a>')[1]
    if not re_delibere.search(decision_txt):
        print("ERROR: could not find siège in décision CC",
              url,
              file=sys.stderr)
        return None
    decision_txt = clean_delib(decision_txt)
    return strip_text(decision_txt)


def get_decision_length(url):
    decision_txt = extract_full_decision(url)
    if not decision_txt:
        return -1
    return len(decision_txt)


if __name__ == "__main__":
    enable_requests_cache()
    if len(sys.argv) == 2:
        with open(sys.argv[1]) as f:
            for url in f.readlines():
                url = url.strip()
                print(url, ':', get_decision_length(url))
    else:
        print(extract_full_decision(sys.argv[1]))

示例#8

0

显示文件

文件： process_conscons.py 项目： regardscitoyens/the-law-factory-parser

re_delibere = re.compile(r"<p>\s*(Jug|Délibér)é par le Conseil constitutionnel .*$", re.S)
clean_delib = lambda x: re_delibere.sub("", x)

def extract_full_decision(url):
    decision_src = download(url).text
    if '<a name=\'visa\' id="visa"></a>' not in decision_src:
        print("ERROR: could not find visa in decision CC", url, file=sys.stderr)
        return None
    decision_txt = decision_src.split('<a name=\'visa\' id="visa"></a>')[1]
    if not re_delibere.search(decision_txt):
        print("ERROR: could not find siège in décision CC", url, file=sys.stderr)
        return None
    decision_txt = clean_delib(decision_txt)
    return strip_text(decision_txt)

def get_decision_length(url):
    decision_txt = extract_full_decision(url)
    if not decision_txt:
        return -1
    return len(decision_txt)

if __name__ == "__main__":
    enable_requests_cache()
    if len(sys.argv) == 2:
        with open(sys.argv[1]) as f:
            for url in f.readlines():
                url = url.strip()
                print(url, ':', get_decision_length(url))
    else:
        print(extract_full_decision(sys.argv[1]))