def filter_and_group_tasks(self, tasks: List[Dict]) -> Iterator[List[Dict]]: """ Groups tasks by structure matching """ allowed_task_types = self.allowed_task_types or _TASK_TYPES filtered_tasks = [ task for task in tasks if any(allowed_type in task_type(task.get("orig_inputs", {})) for allowed_type in allowed_task_types) ] structures = [] for idx, t in enumerate(filtered_tasks): s = Structure.from_dict(t["output"]["structure"]) s.index = idx structures.append(s) grouped_structures = group_structures( structures, ltol=self.ltol, stol=self.stol, angle_tol=self.angle_tol, symprec=self.symprec, ) for group in grouped_structures: yield [filtered_tasks[struc.index] for struc in group]
def filter_and_group_tasks(self, tasks: List[TaskDocument]) -> Iterator[List[Dict]]: """ Groups tasks by structure matching """ filtered_tasks = [ task for task in tasks if any( allowed_type is task.task_type for allowed_type in self.settings.VASP_ALLOWED_VASP_TYPES ) ] structures = [] for idx, task in enumerate(filtered_tasks): s = task.output.structure s.index = idx structures.append(s) grouped_structures = group_structures( structures, ltol=self.settings.LTOL, stol=self.settings.STOL, angle_tol=self.settings.ANGLE_TOL, symprec=self.settings.SYMPREC, ) for group in grouped_structures: grouped_tasks = [filtered_tasks[struc.index] for struc in group] yield grouped_tasks
def match(self, snls, mat): """ Finds a material doc that matches with the given snl Args: snl ([dict]): the snls list mat (dict): a materials doc Returns: generator of materials doc keys """ m_strucs = [Structure.from_dict(mat["structure"])] + [ Structure.from_dict(init_struc) for init_struc in mat["initial_structures"] ] snl_strucs = [StructureNL.from_dict(snl) for snl in snls] groups = group_structures( m_strucs + snl_strucs, ltol=self.settings.LTOL, stol=self.settings.STOL, angle_tol=self.settings.ANGLE_TOL, ) matched_groups = [ group for group in groups if any( isinstance(struc, Structure) for struc in group) ] snls = [ struc for struc in group for group in matched_groups if isinstance(struc, StructureNL) ] self.logger.debug(f"Found {len(snls)} SNLs for {mat['material_id']}") return snls
def load_canonical_structures(ctx, full_name, formula): from emmet.core.vasp.calc_types import task_type # TODO import error collection = ctx.obj["COLLECTIONS"][full_name] if formula not in canonical_structures[full_name]: canonical_structures[full_name][formula] = {} structures = defaultdict(list) if "tasks" in full_name: query = {"formula_pretty": formula} query.update(SETTINGS.task_base_query) projection = {"input.structure": 1, "task_id": 1, "orig_inputs": 1} tasks = collection.find(query, projection) for task in tasks: task_label = task_type(task["orig_inputs"], include_calc_type=False) if task_label == "Structure Optimization": s = load_structure(task["input"]["structure"]) s.id = task["task_id"] structures[get_sg(s)].append(s) elif "snl" in full_name: query = {"$or": [{k: formula} for k in SETTINGS.aggregation_keys]} query.update(SETTINGS.exclude) query.update(SETTINGS.base_query) for group in aggregate_by_formula(collection, query): for dct in group["structures"]: s = load_structure(dct) s.id = dct["snl_id"] if "snl_id" in dct else dct["task_id"] structures[get_sg(s)].append(s) if structures: for sg, slist in structures.items(): canonical_structures[full_name][formula][sg] = [ g[0] for g in group_structures(slist) ] total = sum([ len(x) for x in canonical_structures[full_name][formula].values() ]) logger.debug( f"{total} canonical structure(s) for {formula} in {full_name}")
def filter_and_group_tasks(self, tasks: List[TaskDocument]) -> Iterator[List[Dict]]: """ Groups tasks by structure matching """ filtered_tasks = [ task for task in tasks if any( allowed_type is task.task_type for allowed_type in self._allowed_task_types ) ] structures = [] for idx, task in enumerate(filtered_tasks): s = task.output.structure s.index = idx structures.append(s) grouped_structures = group_structures( structures, ltol=self.ltol, stol=self.stol, angle_tol=self.angle_tol, symprec=self.symprec, ) for group in grouped_structures: grouped_tasks = [filtered_tasks[struc.index] for struc in group] sandboxes = {frozenset(task.sandboxes) for task in grouped_tasks} for sbx_set in maximal_spanning_non_intersecting_subsets(sandboxes): yield [ task for task in grouped_tasks if len(set(task.sandboxes).intersection(sbx_set)) > 0 ]
def structures_match(s1, s2): return bool(len(list(group_structures([s1, s2]))) == 1)