def path_prediction(run_id: str, dataset: data_api.Dataset) -> str: """ Get the path to a stored prediction df """ dir_run = os.path.join(DIR_PREDICTIONS, run_id) io.create_directory(dir_run) path = os.path.join(dir_run, f"predictions_{run_id}_{dataset.name}.parquet") return path
def fetch_prediction_competition_data( fnames_want: Optional[List[str]] = None, ) -> Dict[str, str]: """ Fetch and unpack the prediction competition data""" fname_zip = "views_pred_comp_data_20200324.zip" url = f"https://views.pcr.uu.se/download/datasets/{fname_zip}" if not fnames_want: fnames_want = ["cm.csv", "pgm.csv"] dir_destination = DIR_STORAGE # dir_destination = os.path.join(DIR_STORAGE, "prediction_competition") paths_want = [ os.path.join(dir_destination, fname) for fname in fnames_want ] io.create_directory(dir_destination) if all([os.path.isfile(path) for path in paths_want]): log.info(f"Files already where we need them") else: log.info(f"Fetching {fnames_want} from {url}") with tempfile.TemporaryDirectory() as tempdir: path_zip = os.path.join(tempdir, fname_zip) io.fetch_url_to_file(url=url, path=path_zip) paths_unzipped = io.unpack_zipfile(path_zip, destination=tempdir) paths_destination: List[str] = [] for path in paths_unzipped: fname = os.path.basename(path) if fname in fnames_want: path_destination = os.path.join(dir_destination, fname) io.move_file(path_from=path, path_to=path_destination) paths_destination.append(path_destination) paths_missing = [ path for path in paths_want if path not in paths_destination ] if paths_missing: raise RuntimeError(f"Missing paths {paths_missing}") data = {os.path.basename(path): path for path in paths_want} return data
def get_path_tar(name: str) -> str: """ Get a path to a tarfile timestamped for today """ io.create_directory(DIR_FETCHES) today = date.today().strftime("%Y%m%d") return os.path.join(DIR_FETCHES, f"{name}_{today}.tar.xz")
""" Get the prediction competition data from the ViEWS website """ from typing import List, Optional, Dict import tempfile import logging import os from datetime import date from views.apps.data import api from views.utils import io from views.config import DIR_STORAGE from views.specs.data import DATASETS log = logging.getLogger() DIR_UPLOAD = os.path.join(DIR_STORAGE, "upload") io.create_directory(DIR_UPLOAD) def fetch_prediction_competition_data( fnames_want: Optional[List[str]] = None, ) -> Dict[str, str]: """ Fetch and unpack the prediction competition data""" fname_zip = "views_pred_comp_data_20200427.zip" url = f"https://views.pcr.uu.se/download/datasets/{fname_zip}" if not fnames_want: fnames_want = ["cm.csv", "pgm.csv"] dir_destination = os.path.join(DIR_STORAGE, "prediction_competition") paths_want = [ os.path.join(dir_destination, fname) for fname in fnames_want
import joblib # type: ignore import numpy as np # type: ignore import pandas as pd # type: ignore from views.apps.evaluation import lib as evallib from views.apps.transforms import lib as translib from views.apps.ensemble import run_ebma from views.utils import data as datautils, misc as miscutils, io from views import config from . import calibration log = logging.getLogger(__name__) DIR_STORAGE_MODELS = os.path.join(config.DIR_STORAGE, "models") if not os.path.isdir(DIR_STORAGE_MODELS): io.create_directory(DIR_STORAGE_MODELS) # pylint: disable= too-few-public-methods class NoEstimator: """ The default to EstimatorCollection estimator when no estimator is passed in. Perhaps the user is expecting to load it from a joblib file but that file is missing. """ def __init__(self): pass # pylint: disable= no-self-use def predict(self, X): """ Inform user that they didn't pass an estimator """ raise RuntimeError("NoEstimator's can't do anything. "
def make_path_runfile(name: str): """ Make a runfile in DIR_STORAGE/runfiles/{name}.sh, creating dir """ dir_runfiles = os.path.join(DIR_STORAGE, "runfiles") io.create_directory(dir_runfiles) return os.path.join(dir_runfiles, f"{name}.sh")
def _make_runfile_str( command: str, project: str, jobtype: str, cores: int, time: str, name: str, ): """ Create a slurm runfile string Args: project: slurm project id jobtype: "core" or "node" cores: number of cores time: time like "8:00:00" for 8 hours name: job name, make it unique command: the command to run Returns: runfile: A string of a slurm runfile """ path_template = os.path.join(THIS_DIR, "templates", f"runfile_{jobtype}.txt") with open(path_template, "r") as f: template_str = f.read() template = string.Template(template_str) dir_logs = os.path.join(DIR_STORAGE, "logs", "slurm") io.create_directory(dir_logs) log_location = os.path.join(dir_logs, f"{name}.log") msg = "jobtype must be core or node!" if jobtype not in ["core", "node"]: raise TypeError(msg) if jobtype == "core": mapping = { "PROJECT_ID": project, "JOBTYPE": jobtype, "N_CORES": cores, "TIME": time, "NAME": name, "LOGFILE_LOCATION": log_location, "COMMAND": command, } # Don't have N_CORES for node jobs. elif jobtype == "node": mapping = { "PROJECT_ID": project, "JOBTYPE": jobtype, "TIME": time, "NAME": name, "LOGFILE_LOCATION": log_location, "COMMAND": command, } runfile = template.substitute(mapping) return runfile