def __init__(self, info_path): """ Plate object returned by Meadiator. :param info_path: :return: """ tmpd = defaultdict(str, parse_meta(info_path)) self.path = info_path self.plate_id = tmpd["plate_id"] self.date = tmpd["created_at"] self.lineage = tmpd["lineage"] desc = tmpd["description"] desc_elstring = desc.replace("Material library with ", "") if desc_elstring != "": desc_elstring = desc_elstring.split()[0] desc_elstring = re.sub("([A-Za-z])([A-Z])", "\\1,\\2", desc_elstring) desc_els = desc_elstring.split(",") if isinstance(desc_els, str): desc_els = [desc_els] self.elements = list(set(desc_els)) if desc != "": self.deposition_method = desc.split("deposited by ")[-1].split()[0] else: self.deposition_method = "" self.substrate = tmpd["substrate"] self.map = tmpd["screening_map_id"] if "annealed at" in desc: self.anneal_temp = float( desc.split("annealed at ")[-1].split()[0].replace("C", "").strip()) else: self.anneal_temp = 0 if "to add" in desc: self.anneal_type = desc.split("to add ")[-1].split()[0] else: self.anneal_type = "" if " on " in desc: self.anneal_date = desc.split(" on ")[-1].strip() else: self.anneal_date = "" self.run_dict = {} self.exp_dict = {} self.ana_dict = {} self.runs = [] if "runs" in tmpd.keys(): if isinstance(tmpd["runs"], dict): for rund in tmpd["runs"].values(): self.runs.append(rund["path"]) self.experiments = [] if "experiments" in tmpd.keys(): if isinstance(tmpd["experiments"], dict): for expd in tmpd["experiments"].values(): self.experiments.append(expd["path"]) self.analyses = [] if "analyses" in tmpd.keys(): if isinstance(tmpd["analyses"], dict): for anad in tmpd["analyses"].values(): self.analyses.append(anad["path"]) del tmpd
def __init__(self, run_path): """ Run object returned by Meadiator. :param run_path: :return: """ tmpd = defaultdict(str, parse_meta(run_path)) self.path = run_path # tmpd["file_path"] self.date = "" self.type = tmpd["experiment_type"] self.plate_id = tmpd["plate_id"] self.machine = "" self.elements = [] self.anneal_temp = 0 self.anneal_type = "" self.file_count = 0 common_keys = ["plate_id", "experiment_type"] file_keys = [k for k in tmpd.keys() if "files_technique__" in k] self.files = {} for key in file_keys: file_tech = key.split("__")[-1] self.files[file_tech] = {} for file_group in tmpd[key].keys(): if isinstance(tmpd[key][file_group], dict): file_dict = make_file_dict(tmpd[key][file_group]) for v in file_dict.values(): v.update(source_zip=self.path) self.files[file_tech][file_group] = file_dict else: continue param_keys = [ k for k in tmpd.keys() if "params__" in k or k == "parameters" ] self.techs = list(self.files.keys()) self.tech_params = {} tech_param_list = [] for key in param_keys: self.tech_params[key.split("__")[-1]] = tmpd[key] tech_param_list += list(tmpd[key].keys()) self.tech_param_groups = list(self.tech_params.keys()) self.tech_param_keys = list(set(tech_param_list)) other_keys = [ k for k in tmpd.keys() if k not in common_keys + file_keys + param_keys ] self.root_params = {k: tmpd[k] for k in other_keys} self.root_keys = other_keys del tmpd
def get_info(self, plate_id, return_dict=False): """ Return dict of metadata for plate. :param plate_id: Integer plate_id. :return: Absolute path to info file as string. """ zip_path = pjoin(self.plate_dir, str(plate_id), f"{plate_id}.zip") info_path = pjoin(dirname(zip_path), f"{plate_id}.info") if exists(zip_path): file_path = zip_path elif exists(info_path): file_path = info_path else: file_path = "" return parse_meta(file_path) if return_dict else file_path
def __init__(self, exp_path): """ Experiment object returned by Meadiator. :param exp_path: :return: """ tmpd = defaultdict(str, parse_meta(exp_path)) base_dir = exp_path.strip("\\/").split("experiment")[0] self.path = exp_path # tmpd["file_path"] self.date = "" self.type = basename(dirname(self.path)) self.plate_id = tmpd["plate_ids"] if isinstance(self.plate_id, str): self.plate_id = [int(x.strip()) for x in self.plate_id.split(",")] self.elements = [] self.anneal_temp = 0 self.anneal_type = "" run_keys = [k for k in tmpd.keys() if k.startswith("run__")] self.run_count = len(run_keys) self.run_paths = [tmpd[k]["run_path"] for k in run_keys] self.files = {} for run_key in run_keys: file_keys = [ k for k in tmpd[run_key].keys() if "files_technique__" in k ] for key in file_keys: file_tech = key.split("__")[-1] if file_tech not in self.files.keys(): self.files[file_tech] = {} for file_group in tmpd[run_key][key].keys(): if file_group not in self.files[file_tech].keys(): self.files[file_tech][file_group] = {} if isinstance(tmpd[run_key][key][file_group], dict): file_dict = make_file_dict( tmpd[run_key][key][file_group]) for v in file_dict.values(): v.update(source_zip=pjoin( base_dir, "run", *tmpd[run_key] ["run_path"].split("/"))) self.files[file_tech][file_group].update(file_dict) else: continue self.techs = list(self.files.keys()) del tmpd self.runs = []
def load_objects(self, overwrite_cache=False): """ Load MEAD objects from file paths. :return: """ objects_pck = f"{basename(self.base_dir)}_objects.bz2.pck" if exists(objects_pck) and not overwrite_cache: self.meadia = pickle.load(bz2.BZ2File(objects_pck, "r")) print( f"found existing objects dictionary in {pjoin(getcwd(), objects_pck)}" ) else: self.log_entry(f"loading plate objects") self.meadia["plate"] = {} for plate_path in self.files["plate"]: plate_key = int( basename(plate_path).split(".")[0].split("-")[0]) self.meadia["plate"][plate_key] = Plate(plate_path) for key, key_dir in self.object_tups: self.log_entry(f"loading {key} objects") for file_path in self.files[key]: obj_type = file_path.replace(key_dir, "").strip("\\/").split(sep)[0] if obj_type not in self.meadia[key].keys(): self.meadia[key][obj_type] = {} obj_key = basename(file_path) try: meadia_obj = self.object_dict[key](file_path) self.meadia[key][obj_type][obj_key] = meadia_obj pid = meadia_obj.plate_id if isinstance(pid, str): if "," in pid: pids = [int(x.strip()) for x in pid.split(",")] elif isinstance(pid, list): pids = [int(x) for x in pid] else: pids = [pid] for p in pids: if p in self.meadia["plate"].keys(): if key == "run": if (obj_type not in self.meadia["plate"] [p].run_dict.keys()): self.meadia["plate"][p].run_dict[ obj_type] = {} self.meadia["plate"][p].run_dict[obj_type][ obj_key] = meadia_obj elif key == "exp": if (obj_type not in self.meadia["plate"] [p].exp_dict.keys()): self.meadia["plate"][p].exp_dict[ obj_type] = {} self.meadia["plate"][p].exp_dict[obj_type][ obj_key] = meadia_obj elif key == "ana": if (obj_type not in self.meadia["plate"] [p].ana_dict.keys()): self.meadia["plate"][p].ana_dict[ obj_type] = {} self.meadia["plate"][p].ana_dict[obj_type][ obj_key] = meadia_obj else: self.meadia["load_errors"].append( (file_path, f"plate {p} not in release")) except Exception as e: self.meadia["load_errors"].append((file_path, str(e))) num_errors = len(self.meadia["load_errors"]) self.log_entry( f"{num_errors} files were not loaded due to read errors") in_info_no_release = 0 for plate_path in self.files["plate"]: # propogate meta data plate_meta = parse_meta(plate_path) if "plate_id" in plate_meta.keys(): id = plate_meta["plate_id"] elements = self.meadia["plate"][id].elements ann_temp = self.meadia["plate"][id].anneal_temp ann_type = self.meadia["plate"][id].anneal_type for block in ["runs", "experiments", "analyses"]: blk = block[:3] if block in plate_meta.keys(): # update date, elements, anneal_temp, anneal_type if isinstance(plate_meta[block], dict): for k, blkd in plate_meta[block].items(): otype = blkd["path"].split("/")[1] okey = blkd["path"].split("/")[-1] if not otype in self.meadia[blk].keys(): print( f"{otype} not found in {blk} info for plate {id}" ) continue if okey in self.meadia[blk][otype].keys(): self.meadia[blk][otype][ okey].elements = elements self.meadia[blk][otype][ okey].anneal_temp = ann_temp self.meadia[blk][otype][ okey].anneal_type = ann_type self.meadia[blk][otype][ okey].anneal_temp = ann_temp self.meadia[blk][otype][ okey].date = blkd["created_at"] if blk == "run": # update machine, file_count if "machine" in blkd.keys(): self.meadia["run"][otype][ okey].machine = blkd[ "machine"] if "description" in blkd.keys(): self.meadia["run"][otype][ okey].file_count = ( blkd["description"]. split("containing ") [1].split(" files")[0]) elif blk == "exp": # update runs if "run_paths" in blkd.keys(): self.meadia["exp"][otype][ okey].runs = [ self.meadia["run"] [otype][basename(p)] for p in blkd["run_paths"] ] elif blk == "ana": # update experiments if "experiment_path" in blkd.keys( ): self.meadia["ana"][otype][ okey].experiment = self.meadia[ "exp"][otype][basename( blkd[ "experiment_path"] )] else: in_info_no_release += 1 self.meadia["load_errors"].append( f"{otype} {blk} {okey} in plate {id} info but not in release" ) else: self.log_entry( f"{plate_path} does not contain a 'plate_id' key") if in_info_no_release > 0: self.log_entry( f"{in_info_no_release} runs/exps/anas are present in plate info files but were not included in the release" ) # if len(self.load_errors) == 0: self.gen_links() pickle.dump(self.meadia, bz2.BZ2File(objects_pck, "w")) self.log_entry( f"wrote object dictionary to {pjoin(getcwd(), objects_pck)}")
def __init__(self, ana_path): """ Experiment object returned by Meadiator. :param exp_path: :return: """ tmpd = defaultdict(str, parse_meta(ana_path)) self.path = ana_path # tmpd["file_path"] self.date = "" self.type = basename(dirname(self.path)) self.plate_id = tmpd["plate_ids"] if isinstance(self.plate_id, str): self.plate_id = [int(x.strip()) for x in self.plate_id.split(",")] self.elements = [] self.anneal_temp = 0 self.anneal_type = "" self.experiment_path = tmpd["experiment_path"] self.analyses = {} self.analysis_names = [] self.files = {} ana_keys = [k for k in tmpd.keys() if k.startswith("ana__")] self.ana_count = len(ana_keys) for ana_key in ana_keys: self.analysis_names.append(tmpd[ana_key]["name"]) self.analyses[ana_key] = {"name": tmpd[ana_key]["name"]} if "description" in tmpd[ana_key].keys(): self.analyses[ana_key]["description"] = tmpd[ana_key][ "description"] if "analysis_fcn_version" in tmpd[ana_key].keys(): self.analyses[ana_key]["version"] = tmpd[ana_key][ "analysis_fcn_version"] self.analyses[ana_key]["parameters"] = {} if "parameters" in tmpd[ana_key].keys(): self.analyses[ana_key]["parameters"].update( tmpd[ana_key]["parameters"]) file_keys = [k for k in tmpd[ana_key].keys() if "files_" in k] if "technique" in tmpd[ana_key].keys(): file_tech = tmpd[ana_key]["technique"] elif "analysis_general_type" in tmpd[ana_key].keys(): if tmpd[ana_key]["analysis_general_type"] == "process_fom": sourced = tmpd[tmpd[ana_key]["parameters"]["select_ana"]] while "technique" not in sourced.keys(): sourced = tmpd[sourced["parameters"]["select_ana"]] file_tech = sourced["technique"] else: file_tech = "no_technique" else: file_tech = "no_technique" self.analyses[ana_key]["technique"] = file_tech for key in file_keys: if file_tech not in self.files.keys(): self.files[file_tech] = {} for file_group in tmpd[ana_key][key].keys(): if file_group not in self.files[file_tech].keys(): self.files[file_tech][file_group] = {} if isinstance(tmpd[ana_key][key][file_group], dict): file_dict = make_file_dict( tmpd[ana_key][key][file_group]) for v in file_dict.values(): v.update(source_zip=self.path) self.files[file_tech][file_group].update(file_dict) else: continue self.techs = list(self.files.keys()) del tmpd self.experiment = None