Пример #1
0
def path_prediction(run_id: str, dataset: data_api.Dataset) -> str:
    """ Get the path to a stored prediction df """
    dir_run = os.path.join(DIR_PREDICTIONS, run_id)
    io.create_directory(dir_run)
    path = os.path.join(dir_run,
                        f"predictions_{run_id}_{dataset.name}.parquet")
    return path
Пример #2
0
def fetch_prediction_competition_data(
    fnames_want: Optional[List[str]] = None,
) -> Dict[str, str]:
    """ Fetch and unpack the prediction competition data"""

    fname_zip = "views_pred_comp_data_20200324.zip"
    url = f"https://views.pcr.uu.se/download/datasets/{fname_zip}"

    if not fnames_want:
        fnames_want = ["cm.csv", "pgm.csv"]

    dir_destination = DIR_STORAGE
    # dir_destination = os.path.join(DIR_STORAGE, "prediction_competition")
    paths_want = [
        os.path.join(dir_destination, fname) for fname in fnames_want
    ]

    io.create_directory(dir_destination)

    if all([os.path.isfile(path) for path in paths_want]):
        log.info(f"Files already where we need them")
    else:
        log.info(f"Fetching {fnames_want} from {url}")
        with tempfile.TemporaryDirectory() as tempdir:
            path_zip = os.path.join(tempdir, fname_zip)
            io.fetch_url_to_file(url=url, path=path_zip)
            paths_unzipped = io.unpack_zipfile(path_zip, destination=tempdir)
            paths_destination: List[str] = []
            for path in paths_unzipped:
                fname = os.path.basename(path)
                if fname in fnames_want:
                    path_destination = os.path.join(dir_destination, fname)
                    io.move_file(path_from=path, path_to=path_destination)
                    paths_destination.append(path_destination)

        paths_missing = [
            path for path in paths_want if path not in paths_destination
        ]
        if paths_missing:
            raise RuntimeError(f"Missing paths {paths_missing}")

    data = {os.path.basename(path): path for path in paths_want}

    return data
Пример #3
0
def get_path_tar(name: str) -> str:
    """ Get a path to a tarfile timestamped for today """
    io.create_directory(DIR_FETCHES)
    today = date.today().strftime("%Y%m%d")
    return os.path.join(DIR_FETCHES, f"{name}_{today}.tar.xz")
Пример #4
0
""" Get the prediction competition data from the ViEWS website """
from typing import List, Optional, Dict
import tempfile
import logging
import os
from datetime import date

from views.apps.data import api
from views.utils import io
from views.config import DIR_STORAGE
from views.specs.data import DATASETS

log = logging.getLogger()

DIR_UPLOAD = os.path.join(DIR_STORAGE, "upload")
io.create_directory(DIR_UPLOAD)


def fetch_prediction_competition_data(
    fnames_want: Optional[List[str]] = None, ) -> Dict[str, str]:
    """ Fetch and unpack the prediction competition data"""

    fname_zip = "views_pred_comp_data_20200427.zip"
    url = f"https://views.pcr.uu.se/download/datasets/{fname_zip}"

    if not fnames_want:
        fnames_want = ["cm.csv", "pgm.csv"]

    dir_destination = os.path.join(DIR_STORAGE, "prediction_competition")
    paths_want = [
        os.path.join(dir_destination, fname) for fname in fnames_want
Пример #5
0
import joblib  # type: ignore
import numpy as np  # type: ignore
import pandas as pd  # type: ignore

from views.apps.evaluation import lib as evallib
from views.apps.transforms import lib as translib
from views.apps.ensemble import run_ebma
from views.utils import data as datautils, misc as miscutils, io
from views import config
from . import calibration

log = logging.getLogger(__name__)

DIR_STORAGE_MODELS = os.path.join(config.DIR_STORAGE, "models")
if not os.path.isdir(DIR_STORAGE_MODELS):
    io.create_directory(DIR_STORAGE_MODELS)


# pylint: disable= too-few-public-methods
class NoEstimator:
    """ The default to EstimatorCollection estimator when no estimator
    is passed in. Perhaps the user is expecting to load it from a
    joblib file but that file is missing.
    """
    def __init__(self):
        pass

    # pylint: disable= no-self-use
    def predict(self, X):
        """ Inform user that they didn't pass an estimator """
        raise RuntimeError("NoEstimator's can't do anything. "
Пример #6
0
 def make_path_runfile(name: str):
     """ Make a runfile in DIR_STORAGE/runfiles/{name}.sh, creating dir """
     dir_runfiles = os.path.join(DIR_STORAGE, "runfiles")
     io.create_directory(dir_runfiles)
     return os.path.join(dir_runfiles, f"{name}.sh")
Пример #7
0
    def _make_runfile_str(
        command: str,
        project: str,
        jobtype: str,
        cores: int,
        time: str,
        name: str,
    ):
        """ Create a slurm runfile string

        Args:
            project: slurm project id
            jobtype: "core" or "node"
            cores: number of cores
            time: time like "8:00:00" for 8 hours
            name: job name, make it unique
            command: the command to run
        Returns:
            runfile: A string of a slurm runfile

        """

        path_template = os.path.join(THIS_DIR, "templates",
                                     f"runfile_{jobtype}.txt")
        with open(path_template, "r") as f:
            template_str = f.read()

        template = string.Template(template_str)

        dir_logs = os.path.join(DIR_STORAGE, "logs", "slurm")
        io.create_directory(dir_logs)
        log_location = os.path.join(dir_logs, f"{name}.log")

        msg = "jobtype must be core or node!"
        if jobtype not in ["core", "node"]:
            raise TypeError(msg)

        if jobtype == "core":
            mapping = {
                "PROJECT_ID": project,
                "JOBTYPE": jobtype,
                "N_CORES": cores,
                "TIME": time,
                "NAME": name,
                "LOGFILE_LOCATION": log_location,
                "COMMAND": command,
            }
        # Don't have N_CORES for node jobs.
        elif jobtype == "node":
            mapping = {
                "PROJECT_ID": project,
                "JOBTYPE": jobtype,
                "TIME": time,
                "NAME": name,
                "LOGFILE_LOCATION": log_location,
                "COMMAND": command,
            }

        runfile = template.substitute(mapping)

        return runfile