Exemplo n.º 1
0
async def download_file(filename, url, session: aiohttp.ClientSession):
    file_directory = get_path_from_root(FILES)
    path = get_path_from_root(f'{filename}.pdf', file_directory)

    if not os.path.exists(file_directory):
        os.mkdir(file_directory)

    async with session.get(url) as response:
        async with aiofiles.open(path, mode='wb') as file:
            async for data, _ in response.content.iter_chunks():
                await file.write(data)
    logger.debug(f'saved new report: {filename}')
Exemplo n.º 2
0
async def extract_data():
    download_directory_path = get_path_from_root(FILES)
    report_directory_path = get_path_from_root(INDIVIDUAL_REPORTS)
    complete_report_name = DATA

    if not os.path.exists(download_directory_path):
        return

    analysis = await analyse_all_reports(download_directory_path)
    logger.info(f'analysed {len(analysis)} '
                f'report{"s" if len(analysis) else ""}')
    return await asyncio.gather(
        save_individual_reports(analysis, report_directory_path),
        save_complete_report(analysis, complete_report_name))
async def save_individual_reports(data, directory):
    if not os.path.isdir(directory):
        os.mkdir(directory)
    for timestamp, analysis in data:
        csv_file_path = get_path_from_root(f'{timestamp}.csv', directory)
        async with aiofiles.open(csv_file_path, mode='w') as file:
            fieldnames = list(analysis[0].keys())
            csv_countries = [list(country.values()) for country in analysis]
            writer = aiocsv.AsyncWriter(file)
            await writer.writerow(fieldnames)
            await writer.writerows(csv_countries)

        json_file_path = get_path_from_root(f'{timestamp}.json', directory)
        async with aiofiles.open(json_file_path, mode='w') as file:
            await file.write(json.dumps(analysis, indent=2))
Exemplo n.º 4
0
def get_missing_reports(scraped_results):
    local_directory = get_path_from_root(INDIVIDUAL_REPORTS)
    if not os.path.isdir(local_directory):
        return scraped_results
    local_files = {item.split('.')[0] for item in os.listdir(local_directory)}
    missing_reports = {
        key: val
        for key, val in scraped_results.items()
        if val['date'] not in local_files
    }
    return missing_reports
Exemplo n.º 5
0
def main(debug):
    logging.getLogger('pdfminer').setLevel(logging.WARNING)
    logging.getLogger('urllib3').setLevel(logging.WARNING)
    logging.getLogger('asyncio').setLevel(logging.WARNING)
    logging_defaults = {'custom_level': 'DEBUG' if debug else 'INFO'}
    logging.config.fileConfig(fname=get_path_from_root(LOGGING_CONFIG),
                              defaults=logging_defaults,
                              disable_existing_loggers=False)

    try:
        loop = asyncio.get_event_loop()
        loop.run_until_complete(get_reports())
        loop.run_until_complete(extract_data())
        loop.close()
        remove_downloaded_files()
        return 0
    except KeyboardInterrupt:
        return 0
    except RuntimeError:
        return 1
Exemplo n.º 6
0
async def save_complete_json_report(data, filename):
    analysed_dates = [el[0] for el in data]
    json_file_path = get_path_from_root(f'{filename}.json')
    if not os.path.exists(json_file_path):
        logger.info(f'{json_file_path} doesn\'t exist, creating new one')
        await write_to_json(data, json_file_path)
    else:
        async with aiofiles.open(json_file_path, mode='r+') as f:
            content = await f.read()
            if not content.strip():
                logger.debug(f'{json_file_path} is empty, '
                             f'writing from scratch')
                await write_to_json(data, json_file_path)
            else:
                saved_data = {
                    k: v
                    for k, v in json.loads(content).items()
                    if k not in analysed_dates
                }
                complete_data = {**saved_data, **parse_data_for_json(data)}
                await f.seek(0)
                await f.truncate()
                await f.write(
                    json.dumps(complete_data, sort_keys=True, indent=2))
Exemplo n.º 7
0
async def save_complete_csv_report(data, filename):
    analysed_dates = [el[0] for el in data]
    csv_file_path = get_path_from_root(f'{filename}.csv')
    if not os.path.exists(csv_file_path):
        logger.info(f'{csv_file_path} doesn\'t exist, creating new one')
        await write_to_csv(data, csv_file_path)
    else:
        async with aiofiles.open(csv_file_path, mode='r+') as f:
            content = await f.read()
            if not content.strip():
                logger.debug(f'{csv_file_path} is empty, writing from scratch')
                await write_to_csv(data, csv_file_path)
            else:
                fieldnames, *saved_data = [
                    line for line in csv.reader(content.splitlines())
                    if line and line[0] not in analysed_dates
                ]
                complete_data = [*saved_data, *parse_data_for_csv(data)]
                complete_data = sorted(complete_data, key=lambda el: el[0])
                await f.seek(0)
                await f.truncate()
                writer = aiocsv.AsyncWriter(f)
                await writer.writerow(fieldnames)
                await writer.writerows(complete_data)
Exemplo n.º 8
0
def remove_downloaded_files(folder=DOWNLOAD_FOLDER):
    if not os.path.exists(folder):
        return
    for file in os.listdir(folder):
        os.remove(get_path_from_root(file, folder))
    os.rmdir(folder)
Exemplo n.º 9
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os

from risikogebiete_api.utils import get_path_from_root
from risikogebiete_api.constants import FILES

DOWNLOAD_FOLDER = get_path_from_root(FILES)


def remove_downloaded_files(folder=DOWNLOAD_FOLDER):
    if not os.path.exists(folder):
        return
    for file in os.listdir(folder):
        os.remove(get_path_from_root(file, folder))
    os.rmdir(folder)
Exemplo n.º 10
0
async def analyse_all_reports(directory):
    tasks = [
        analyse_report(get_path_from_root(file, directory))
        for file in os.listdir(directory)
    ]
    return await asyncio.gather(*tasks)