def get_layers(tempdir,
               minio_edge_access,
               minio_edge_secret,
               layers=LAYER_FILES,
               minio_lake_access=None,
               minio_lake_secret=None):

    for layer, layer_classification, layer_bucket, layer_minio_prefix in layers:
        local_path = os.path.join(tempdir, layer)

        if layer_classification is minio_utils.DataClassification.EDGE:
            minio_access, minio_secret = minio_edge_access, minio_edge_secret
        else:
            minio_access, minio_secret = minio_lake_access, minio_lake_secret

        minio_utils.minio_to_file(
            filename=local_path,
            minio_filename_override=layer_minio_prefix + layer,
            minio_bucket=layer_bucket,
            minio_key=minio_access,
            minio_secret=minio_secret,
            data_classification=layer_classification,
        )

        read_df_func = geopandas.read_file if local_path.endswith(
            ".geojson") else pandas.read_json
        layer_gdf = read_df_func(local_path)

        yield layer, local_path, layer_gdf
示例#2
0
def get_data(minio_key, minio_access, minio_secret):
    with tempfile.NamedTemporaryFile() as temp_datafile:
        minio_utils.minio_to_file(
            filename=temp_datafile.name,
            minio_filename_override=DATA_RESTRICTED_PREFIX + minio_key,
            minio_bucket=MINIO_BUCKET,
            minio_key=minio_access,
            minio_secret=minio_secret,
            data_classification=MINIO_CLASSIFICATION,
        )

        data_df = pandas.read_csv(temp_datafile.name)

    return data_df
示例#3
0
def get_layers(directorate_file_prefix, tempdir, minio_access, minio_secret):
    for layer_filename, layer_props in LAYER_PROPERTIES_TUPLES:
        is_choropleth = layer_filename in service_request_map_layers_to_minio.CHOROPLETH_LAYERS

        # Deciding between the directorate and time period specific layer or not
        directorate_layer_filename = (
            f'{directorate_file_prefix}_{layer_filename}'
            if is_choropleth else layer_filename)

        local_path = os.path.join(tempdir, directorate_layer_filename)

        layer_minio_path = (
            f"{service_request_map_layers_to_minio.SERVICE_REQUEST_MAP_PREFIX}"
            f"{directorate_layer_filename}")
        minio_utils.minio_to_file(
            filename=local_path,
            minio_filename_override=layer_minio_path,
            minio_bucket=MINIO_BUCKET,
            minio_key=minio_access,
            minio_secret=minio_secret,
            data_classification=MINIO_CLASSIFICATION,
        )

        layer_gdf = geopandas.read_file(local_path)

        *_, has_metadata = layer_props
        if has_metadata:
            metadata_filename = os.path.splitext(
                directorate_layer_filename)[0] + ".json"
            metadata_local_path = os.path.join(tempdir, metadata_filename)
            metadata_minio_path = (
                f"{service_request_map_layers_to_minio.SERVICE_REQUEST_MAP_PREFIX}"
                f"{metadata_filename}")

            minio_utils.minio_to_file(
                filename=metadata_local_path,
                minio_filename_override=metadata_minio_path,
                minio_bucket=MINIO_BUCKET,
                minio_key=minio_access,
                minio_secret=minio_secret,
                data_classification=MINIO_CLASSIFICATION,
            )
            with open(metadata_local_path, "r") as metadata_file:
                layer_metadata = json.load(metadata_file)
        else:
            layer_metadata = {}

        yield directorate_layer_filename, (local_path, layer_gdf,
                                           is_choropleth, layer_metadata,
                                           layer_props)
def get_subdistrict_populations(minio_access, minio_secret):
    with tempfile.NamedTemporaryFile() as temp_datafile:
        minio_utils.minio_to_file(
            filename=temp_datafile.name,
            minio_filename_override=epi_map_case_layers_to_minio.DATA_RESTRICTED_PREFIX + SUBDISTRICT_POP_FILE,
            minio_bucket=epi_map_case_layers_to_minio.MINIO_COVID_BUCKET,
            minio_key=minio_access,
            minio_secret=minio_secret,
            data_classification=epi_map_case_layers_to_minio.EDGE_MINIO_CLASSIFICATION,
        )

        population_df = pandas.read_csv(temp_datafile.name)
        population_df.set_index(epi_map_case_layers_to_minio.SUBDISTRICT_COL, inplace=True)

    return population_df
示例#5
0
def minio_to_df(minio_filename_override,
                minio_bucket,
                data_classification,
                reader="csv"):
    logging.debug("Pulling data from Minio bucket...")
    if reader == "csv":
        file_reader = pd.read_csv
    elif reader == "parquet":
        file_reader = pd.read_parquet
    else:
        logging.error("reader is not 'csv' or 'parquet")
        sys.exit(-1)
    with tempfile.NamedTemporaryFile() as temp_data_file:
        result = minio_utils.minio_to_file(
            filename=temp_data_file.name,
            minio_filename_override=minio_filename_override,
            minio_bucket=minio_bucket,
            data_classification=data_classification,
        )
        if not result:
            logging.debug(f"Could not get data from minio bucket")
            sys.exit(-1)
        else:
            logging.debug(
                f"Reading in raw data from '{temp_data_file.name}'...")
            df = file_reader(temp_data_file.name)

            return df
def minio_csv_to_df(minio_filename_override, minio_bucket, minio_key,
                    minio_secret, data_classification):
    """
        function to pull minio csv file to python dict
        :param minio_filename_override: (str) minio override string (prefix and file name)
        :param minio_bucket: (str) minio bucket name
        :param minio_key: (str) the minio access key
        :param minio_secret: (str) the minio key secret
        :param data_classification: minio classification (edge | lake)
        :return: pandas dataframe
        """
    logging.debug("Pulling data from Minio bucket...")
    with tempfile.NamedTemporaryFile() as temp_data_file:
        result = minio_utils.minio_to_file(
            filename=temp_data_file.name,
            minio_filename_override=minio_filename_override,
            minio_bucket=minio_bucket,
            minio_key=minio_key,
            minio_secret=minio_secret,
            data_classification=data_classification,
        )
        if not result:
            logging.debug(f"Could not get data from minio bucket")
            sys.exit(-1)
        else:
            logging.debug(
                f"Reading in raw data from '{temp_data_file.name}'...")
            df = pd.read_csv(temp_data_file.name)
            return df
def get_plot_df(minio_key, minio_access, minio_secret):
    '''
    Read HR org unit status .csv file
    '''
    with tempfile.NamedTemporaryFile() as temp_datafile:
        minio_utils.minio_to_file(
            filename=temp_datafile.name,
            minio_filename_override=minio_key,
            minio_bucket=MINIO_BUCKET,
            minio_key=minio_access,
            minio_secret=minio_secret,
            data_classification=MINIO_CLASSIFICATION,
        )
        data_df = pd.read_csv(temp_datafile.name)

    return data_df
def get_case_data(minio_access, minio_secret):
    with tempfile.NamedTemporaryFile() as temp_datafile:
        minio_utils.minio_to_file(
            filename=temp_datafile.name,
            minio_filename_override=DATA_RESTRICTED_PREFIX + PROV_CASE_DATA_FILENAME,
            minio_bucket=MINIO_COVID_BUCKET,
            minio_key=minio_access,
            minio_secret=minio_secret,
            data_classification=EDGE_MINIO_CLASSIFICATION,
        )

        case_data_df = pandas.read_csv(temp_datafile.name, encoding=PROV_CASE_FILE_ENCODING)

        for col in (DATE_DIAGNOSIS_COL, DATE_ADMITTED_COL, DATE_DEATH_COL):
            case_data_df[col] = pandas.to_datetime(case_data_df[col])

    return case_data_df
示例#9
0
def get_mobile_data(minio_access, minio_secret):
    with tempfile.NamedTemporaryFile() as temp_datafile:
        minio_utils.minio_to_file(
            filename=temp_datafile.name,
            minio_filename_override=CT_MOBILE_METRICS,
            minio_bucket=MINIO_COVID_BUCKET,
            minio_key=minio_access,
            minio_secret=minio_secret,
            data_classification=MINIO_EDGE_CLASSIFICATION,
        )

        mobile_data_df = pandas.read_csv(temp_datafile.name)

        for col in (TIMESTAMP_COL, ):
            mobile_data_df[col] = pandas.to_datetime(mobile_data_df[col])

        mobile_data_df.set_index([TIMESTAMP_COL, HOURLY_METRIC_POLYGON_ID],
                                 inplace=True)
        mobile_data_df.sort_index(inplace=True)

    return mobile_data_df
示例#10
0
def _fetch_layer(tempdir, layer_filename_prefix, layer_suffix, apply_prefix,
                 has_metadata, minio_path_prefix, minio_access, minio_secret):
    layer_filename = (f"{layer_filename_prefix}_{layer_suffix}"
                      if apply_prefix else layer_suffix)

    local_path = os.path.join(tempdir, layer_filename)

    layer_minio_path = (f"{minio_path_prefix}" f"{layer_filename}")
    logging.debug(layer_minio_path)
    minio_utils.minio_to_file(
        filename=local_path,
        minio_filename_override=layer_minio_path,
        minio_bucket=MINIO_BUCKET,
        minio_key=minio_access,
        minio_secret=minio_secret,
        data_classification=MINIO_CLASSIFICATION,
    )

    layer_gdf = geopandas.read_file(local_path)

    # Getting the layer's metadata
    if has_metadata:
        metadata_filename = os.path.splitext(layer_filename)[0] + ".json"
        metadata_local_path = os.path.join(tempdir, metadata_filename)
        metadata_minio_path = (f"{minio_path_prefix}" f"{metadata_filename}")

        minio_utils.minio_to_file(
            filename=metadata_local_path,
            minio_filename_override=metadata_minio_path,
            minio_bucket=MINIO_BUCKET,
            minio_key=minio_access,
            minio_secret=minio_secret,
            data_classification=MINIO_CLASSIFICATION,
        )
        with open(metadata_local_path, "r") as metadata_file:
            layer_metadata = json.load(metadata_file)
    else:
        layer_metadata = {}

    return local_path, layer_gdf, layer_metadata
def get_data(minio_key, minio_access, minio_secret):
    with tempfile.NamedTemporaryFile() as temp_datafile:
        minio_utils.minio_to_file(
            filename=temp_datafile.name,
            minio_filename_override=DATA_RESTRICTED_PREFIX + minio_key,
            minio_bucket=MINIO_BUCKET,
            minio_key=minio_access,
            minio_secret=minio_secret,
            data_classification=MINIO_CLASSIFICATION,
        )

        data_df = pandas.read_csv(temp_datafile.name)

    data_df[DATE_COL] = pandas.to_datetime(data_df[DATE_COL])
    data_df[SOCIAL_NETWORK_COL].fillna(NA_VALUE, inplace=True)
    data_df[CATEGORY_COL].fillna(NA_VALUE, inplace=True)

    logging.debug(f"data_df.columns=\n{data_df.columns}")
    logging.debug(
        f"data_df.columns=\n{pprint.pformat(data_df.dtypes.to_dict())}")

    return data_df
def get_data_df(filename, minio_access, minio_secret):
    with tempfile.NamedTemporaryFile() as temp_data_file:
        logging.debug("Pulling data from Minio bucket...")
        result = minio_utils.minio_to_file(temp_data_file.name,
                                           BUCKET,
                                           minio_access,
                                           minio_secret,
                                           CLASSIFICATION,
                                           minio_filename_override=filename)
        assert result

        logging.debug(f"Reading in raw data from '{temp_data_file.name}'...")
        data_df = pandas.read_csv(temp_data_file)

    return data_df
示例#13
0
def minio_csv_to_df(minio_filename_override, minio_bucket, minio_key, minio_secret):
    logging.debug("Pulling data from Minio bucket...")
    with tempfile.NamedTemporaryFile() as temp_data_file:
        result = minio_utils.minio_to_file(filename=temp_data_file.name,
                                           minio_filename_override=minio_filename_override,
                                           minio_bucket=minio_bucket,
                                           minio_key=minio_key,
                                           minio_secret=minio_secret,
                                           data_classification=MINIO_CLASSIFICATION,
                                           )
        if not result:
            logging.debug(f"Could not get data from minio bucket")
            sys.exit(-1)
        else:
            logging.debug(f"Reading in raw data from '{temp_data_file.name}'...") 
            df = pd.read_csv(temp_data_file, engine='c', encoding='ISO-8859-1')
            return df
    logging.info("Fetch[ed] current totals from dfs")

    # get plot files
    logging.info("Fetch[ing] email plot files")
    attachments_file_paths_dict = defaultdict(str)
    attachment_zip = []
    with tempfile.TemporaryDirectory() as temp_dir:
        for plot_level in [
                TOP_LEVEL, DEPARTMENT, SUBDISTRICT, STAFF_TYPE, RISK
        ]:
            plot_file = f"{OUTFILE_PREFIX}_{plot_level}.png"
            minio_filename_override = f"{VACC_PLOT_PREFIX}{plot_file}"
            tmp_file_name = str(pathlib.Path(temp_dir, plot_file))
            minio_result = minio_utils.minio_to_file(
                filename=tmp_file_name,
                minio_filename_override=minio_filename_override,
                minio_bucket=COVID_BUCKET,
                data_classification=EDGE_CLASSIFICATION,
            )
            if not minio_result:
                logging.debug(f"Could not get data from minio bucket")
                sys.exit(-1)

            attachments_file_paths_dict[plot_level] = plot_file
            attachment_zip.append((plot_file, tmp_file_name))

        logging.info("Fetch[ed] email plot files")

        # set email params
        logging.info("Load[ing] email template")
        email_template_path = os.path.join(RESOURCES_PATH,
                                           EMAIL_TEMPLATE_FILENAME)