Пример #1
0
        def executeTask(t):
            actualArgs = {}
            config = exp.parse_config()
            context.projectPath = project.path
            for p in self.sig.parameters:
                par = self.sig.parameters[p]
                type = par.annotation
                if issubclass(type, musket_core.model.Model):
                    if p in args:
                        actualArgs[p] = args[p].wrap(config, exp)
                    else:
                        actualArgs[p] = config

                elif issubclass(type, musket_core.datasets.DataSet):
                    actualArgs[p] = config.get_dataset(args[p])
                    if not hasattr(actualArgs[p], "name") and not hasattr(
                            actualArgs[p], "origName"):
                        actualArgs[p].name = args[p]
                    pass
                else:
                    if p in args:
                        actualArgs[p] = args[p]
            taskFolder = os.path.join(os.path.dirname(config.path), self.name)
            utils.ensure(taskFolder)
            os.chdir(taskFolder)

            self.func(**actualArgs)
            t.results = taskFolder
Пример #2
0
    def calculate(self) -> DataSet:
        ds = self.srcDataset
        nm = self.name
        if ds is None:
            if self.name is not None and isinstance(self.name, str):
                if self.name == "holdout":
                    ds = self.cfg.holdout()
                elif self.name == "validation":
                    ds = self.cfg.validation(None, self.fold)
                else:
                    ds = self.cfg.get_dataset(self.name)
        else:
            nm = ds.name

        if ds is None:
            raise ValueError("No dataset has been specified for prediction")

        ensure(constructPredictionsDirPath(self.cfg.directory()))
        path = f"{constructPredictionsDirPath(self.cfg.directory())}/{nm}{str(self.stage)}{str(self.fold)}.npy"

        if os.path.exists(path):
            return self.cfg.load_writeable_dataset(ds, path)

        if self.cfg.separatePredictions:
            if isinstance(self.fold, list) and len(self.fold) > 1:
                prs = [
                    Prediction(self.cfg, v, self.stage, self.name,
                               self.srcDataset).calculate() for v in self.fold
                ]
                vls = self.cfg.createPredictionsBlend(prs)
                wd = self.cfg.create_writeable_dataset(vls, path)
                for i in tqdm(vls, "Blending"):
                    wd.append(i.prediction)
                wd.commit()
                return self.cfg.load_writeable_dataset(ds, path)
            if isinstance(self.stage, list) and len(self.stage) > 1:
                prs = [
                    Prediction(self.cfg, self.fold, v, self.name,
                               self.srcDataset).calculate() for v in self.stage
                ]
                vls = self.cfg.createPredictionsBlend(prs)
                wd = self.cfg.create_writeable_dataset(vls, path)
                for i in tqdm(vls, "Blending"):
                    wd.append(i.prediction)
                wd.commit()
                return self.cfg.load_writeable_dataset(ds, path)

            pass
        if self.cfg.needsSessionForPrediction:
            #K.clear_session()
            try:
                with self.create_session().as_default():
                    value = self.cfg.predict_all_to_dataset(
                        ds, self.fold, self.stage, -1, None, False, path)
            finally:
                K.clear_session()
        else:
            value = self.cfg.predict_all_to_dataset(ds, self.fold, self.stage,
                                                    -1, None, False, path)
        return value
Пример #3
0
    def __init__(self, project: musket_projects.Project):
        tasks.Task.__init__(self)

        self.project = project
        self.process = None

        musket_utils.ensure(self.report_dir())
Пример #4
0
    def apply(self, all=False):
        if self.hyperparameters() is not None:
            return [self]
        m = self.config()
        if "num_seeds" in m:
            paths = []
            for i in range(m["num_seeds"]):
                i_ = self.path + "/" + str(i)
                ensure(i_)
                if not all:
                    if Experiment(i_).isCompleted():
                        continue

                s = random.randint(0, 100000)
                if not all or not os.path.exists(
                        constructConfigYamlConcretePath(i_)):
                    self.dumpTo(i_, {"testSplitSeed": s}, ["num_seeds"])
                e = Experiment(i_, self.allowResume)
                e.gpus = self.gpus
                e.onlyReports = self.onlyReports
                e.launchTasks = self.launchTasks
                e.project = self.project
                paths.append(e)
            return paths
        return [self]
Пример #5
0
def collect_results(project_id, experiment=None):
    workspace_dir = workspace_folder()
    files = project_results(project_id, experiment)

    results_folder = project_results_folder(project_id)

    temp = results_folder

    temp = os.path.join(temp, "zip")

    if os.path.exists(temp):
        shutil.rmtree(temp)

    utils.ensure(temp)

    for item in files:
        src = os.path.join(workspace_dir, item)

        print("collecting: " + src)

        if not os.path.exists(src):
            continue

        dst = os.path.join(temp, item)

        parent = os.path.dirname(dst)

        if not os.path.exists(parent):
            utils.ensure(parent)

        print("collecting: " + src)

        shutil.copy(src, dst)

    utils.archive(os.path.join(temp, project_id), os.path.join(results_folder, "project"))
def load_item(url, dest):
    utils.ensure(dest)

    parsed = parse_url(url)

    loader = build_loader(parsed)

    loader.load(parsed["url"], dest)
Пример #7
0
    def __init__(self, project: musket_projects.Project, experiment=None):
        tasks.Task.__init__(self)

        self.project = project
        self.process = None
        self.experiment = experiment

        musket_utils.ensure(self.report_dir())
Пример #8
0
def get_cache_dir():
    if CACHE_DIR is not None:
        return CACHE_DIR
    cp = context.get_current_project_path()
    if cp is None:
        cp = os.getcwd()
    d = os.path.join(cp, ".cache/")
    utils.ensure(d)
    return d
Пример #9
0
def copytree(src, dst):
    utils.ensure(dst)

    for item in listdir(src):
        src_item = os.path.join(src, item)
        dst_item = os.path.join(dst, item)

        if os.path.isdir(src_item):
            copytree(src_item, dst_item)
        else:
            if os.path.exists(dst_item):
                os.remove(dst_item)

            shutil.copy(src_item, dst_item)
Пример #10
0
 def report(self):
     cf = self.parse_config()
     path = os.path.join(os.path.dirname(__file__), "templates",
                         "logs.html")
     template = load_string(path)
     eex = self.apply()
     for e in eex:
         for i in range(cf.folds_count):
             for j in range(len(cf.stages)):
                 if os.path.exists(e.log_path(i, j)):
                     m = load_string(e.log_path(i, j))
                     ensure(os.path.join(e.path, "reports"))
                     rp = os.path.join(
                         e.path, "reports",
                         "report-" + str(i) + "." + str(j) + ".html")
                     save_string(rp, template.replace("${metrics}", m))
Пример #11
0
 def perform(self, server, reporter: ProgressMonitor):
     exp:Experiment=server.experiment(self.experimentPath)
     ms=ModelSpec(**self.spec)
     cf=exp.parse_config()
     wrappedModel = ms.wrap(cf, exp)
     predictions=wrappedModel.predictions(self.datasetName);
     ps=str(wrappedModel.stages)+"."+str(wrappedModel.folds)
     parentPath=os.path.join(os.path.dirname(cf.path),"predictions")
     ensure(parentPath)
     p1=os.path.join(parentPath,self.datasetName+ps+"-pr.csv")
     predictions.dump(p1)
     if self.exportGroundTruth:
         p2=os.path.join(parentPath,self.datasetName+ps+"-gt.csv")
         predictions.dump(p2,encode_y=True)
         return p1+"::::"+p2
     return p1
def mark_loaded(root, url):
    fullPath = os.path.join(root, ".metadata")

    utils.ensure(fullPath)

    fullPath = os.path.join(fullPath, "downloaded_deps.yaml")

    try:
        loaded_yaml = load_yaml(fullPath)
    except:
        loaded_yaml = {"dependencies": []}

    deps = loaded_yaml["dependencies"]

    deps.append(url)

    utils.save_yaml(fullPath, loaded_yaml)
Пример #13
0
    def do_POST(self):
        self.send_response(200)
        self.end_headers()

        if "/zipfile" in self.path:
            with self.server.task_manager.lock:
                destination = utils.temp_folder()

                shutil.rmtree(destination, True)
                musket_utils.ensure(destination)

                zip_path = utils.stream_to_zip(self.rfile, self.headers,
                                               "file", destination)

                shutil.unpack_archive(zip_path, destination)
                os.remove(zip_path)

                self.pickup_project()
Пример #14
0
def get_results(host, project, task_id):
    url = host + "/download_delta?project_id=" + os.path.basename(project)

    response = requests.get(url, stream=True)

    size = int(response.headers.get("Content-Length"))

    destination = os.path.expanduser("~/.musket_core/delta_zip_download")

    if os.path.exists(destination):
        shutil.rmtree(destination)

    utils.ensure(destination)

    zip_name = os.path.join(destination, "project")

    with open(zip_name + ".zip", "wb") as f:
        pbar = tqdm.tqdm(total=size)

        for item in response.iter_content(1024):
            f.write(item)

            pbar.update(1024)

    if os.path.exists(zip_name + ".zip"):
        shutil.unpack_archive(zip_name + ".zip", os.path.dirname(zip_name),
                              "zip")

        os.remove(zip_name + ".zip")

        delta_list = delta_files(destination)

        for item in delta_list:
            rel_path = os.path.relpath(item, destination)

            src = os.path.join(destination, rel_path)
            dst = os.path.join(project, rel_path)

            utils.ensure(os.path.dirname(dst))

            if os.path.exists(dst):
                os.remove(dst)

            shutil.copy(src, dst)
Пример #15
0
    def __init__(self, root, id_set = [], parameters = {}):
        self.root = os.path.join(root, id_set[0] + "_" + id_set[1])

        self.parameters = parameters

        ensure(self.root)
Пример #16
0
 def create(self, d: datasets.DataSet, path):
     utils.ensure(path)
     return visualization.Visualizer(self.func, path, d)
Пример #17
0
 def _attach_visualizer(self, visualizer,
                        dataset) -> visualization.Visualizer:
     visualization = os.path.join(self.path, "visualizations",
                                  visualizer.name, dataset.name)
     utils.ensure(visualization)
     return visualizer.create(dataset, visualization)
Пример #18
0
 def get_visualizer(self, name, datasetName) -> visualization.Visualizer:
     visualization = os.path.join(self.path, "visualizations", name,
                                  datasetName)
     utils.ensure(visualization)
     return self.element(name).create(self.get_dataset(datasetName),
                                      visualization)
Пример #19
0
def to_dataset(src, experiments, name, data=False):
    try:
        from kaggle.api.kaggle_api_extended import KaggleApi
        from kaggle.api_client import ApiClient
    except:
        print("Kaggle not found or user credentials not provided.")

        return

    api = KaggleApi(ApiClient())

    api.authenticate()

    dest = os.path.expanduser("~/.musket_core/proj_to_dataset")

    if os.path.exists(dest):
        shutil.rmtree(dest, ignore_errors=True)

    utils.ensure(dest)

    visit_tree(
        src, lambda path: utils.throw("zip files not allowed!")
        if path.lower().endswith(".zip") else ())

    if data:
        src = os.path.join(src, "data")
        dest = os.path.join(dest, "data")

        shutil.copytree(src, dst)
    else:
        utils.collect_project(src, dest, True, False, experiments)

    api.dataset_initialize(dest)

    metapath = os.path.join(dest, "dataset-metadata.json")

    with open(metapath, "r") as f:
        metadata = f.read()

    metadata = metadata.replace("INSERT_SLUG_HERE",
                                name).replace("INSERT_TITLE_HERE", name)

    with open(metapath, "w") as f:
        f.write(metadata)

    id = json.loads(metadata)["id"]

    sets = []

    page = 1

    resp = api.dataset_list(mine=True, search=name, page=page)

    while len(resp):
        sets += resp

        page += 1

        resp = api.dataset_list(mine=True, search=name, page=page)

    if id in [str(item) for item in sets]:
        api.dataset_create_version(dest,
                                   delete_old_versions=True,
                                   convert_to_csv=False,
                                   version_notes="new version",
                                   dir_mode="zip")
    else:
        api.dataset_create_new(dest, convert_to_csv=False, dir_mode="zip")
Пример #20
0
    def __init__(self, task_manager):
        self.root = os.path.expanduser("~/.musket_core/server_workspace")

        self.task_manager: tasks.TaskManager = task_manager

        musket_utils.ensure(self.root)