def get_keys(self): """ Gets the doc keys to process """ mat_keys = set(self.materials.distinct(self.materials.key, criteria=self.query)) keys = set( source_keys_updated( source=self.materials, target=self.website, query=self.query ) ) keys |= set(source_keys_updated(source=self.thermo, target=self.website)) # Get keys for aux docs that have been updated since last processed. for source in self.aux: new_keys = source_keys_updated(source=source, target=self.website) self.logger.info( "Only considering {} new keys for {}".format( len(new_keys), source.collection_name ) ) keys |= set(new_keys) keys = ( keys & mat_keys ) # Ensure all keys are present in main materials collection return keys
def get_items(self): self.logger.info("Starting {} Builder".format(self.__class__.__name__)) self.ensure_indexes() if self.incremental: keys = source_keys_updated(source=self.source, target=self.target, query=self.query) else: keys = self.source.distinct(self.source.key, self.query) self.logger.info("Processing {} items".format(len(keys))) if self.projection: projection = list( set(self.projection + [self.source.key, self.source.lu_field])) else: projection = None self.total = len(keys) for chunked_keys in grouper(keys, self.chunk_size, None): chunked_keys = list(filter(None.__ne__, chunked_keys)) for doc in list( self.source.query( criteria={self.source.key: { "$in": chunked_keys }}, properties=projection)): yield doc
def get_items(self): """ Gets all materials that need new substrates Returns: generator of materials to calculate substrates """ self.logger.info("Substrate Builder Started") self.logger.info("Setting up indicies") self.ensure_indicies() mat_keys = set( self.materials.distinct(self.materials.key, criteria=self.query)) updated_mats = source_keys_updated(source=self.materials, target=self.substrates, query=self.query) e_tensor_updated_mats = source_keys_updated(source=self.elasticity, target=self.substrates) # To ensure all mats are within our scope mats = set(e_tensor_updated_mats + updated_mats) & mat_keys self.logger.info( "Updating all substrate calculations for {} materials".format( len(mats))) for m in mats: e_tensor = self.elasticity.query_one( criteria={self.elasticity.key: m}) e_tensor = e_tensor.get("elasticity", {}).get( "elastic_tensor", None) if e_tensor else None mat = self.materials.query_one( criteria={self.materials.key: m}, properties=["structure", self.materials.key]) yield { "structure": mat["structure"], "task_id": mat[self.materials.key], "elastic_tensor": e_tensor }
def updated_keys(self, target, criteria=None): """ Returns keys for docs that are newer in the target store in comparison with this store when comparing the last updated field (lu_field) Args: target (Store): store to look for updated documents criteria (dict): mongo query to limit scope Returns: list of keys that have been updated in target store """ self.ensure_index(self.key) self.ensure_index(self.lu_field) return source_keys_updated(target, self, query=criteria)
def get_items(self): criteria = source_keys_updated(self.source, self.target, query=self.query) if all(isinstance(entry, str) for entry in self.grouping_properties()): properties = {entry: 1 for entry in self.grouping_properties()} if "_id" not in properties: properties.update({"_id": 0}) else: properties = {entry: include for entry, include in self.grouping_properties()} groups = self.docs_to_groups(self.source.query(criteria=criteria, properties=properties)) self.total = len(groups) if hasattr(self, "n_items_per_group"): n = self.n_items_per_group if isinstance(n, int) and n >= 1: self.total *= n for group in groups: for item in self.group_to_items(group): yield item
def get_items(self): """ Gets all materials that need a new DOS Returns: generator of materials to calculate DOS """ self.logger.info("BoltzTrap Dos Builder Started") # All relevant materials that have been updated since boltztrap was last run # and a uniform bandstructure exists q = dict(self.query) q.update({"bandstructure.uniform_task": {"$exists": 1}}) mats = set( source_keys_updated(source=self.materials, target=self.boltztrap_dos, query=self.query)) self.logger.info( "Found {} new materials for calculating boltztrap dos".format( len(mats))) for m in mats: mat = self.materials.query( [self.materials.key, "structure", "bandstructure"], criteria={self.materials.key: m}) # If a bandstructure uniform task exists bs_task_id = mat.get("bandstructure", {}).get("uniform_task", None) if bs_task_id: bs_dict = self.bandstructures.query_one( {self.bandstructures.key: bs_task_id}) mat["bandstructure_uniform"] = bs_dict yield mat