def __process_molecule_design_pools(self): self.add_debug('Processing molecule design pools.') md_pool_agg = get_root_aggregate(IMoleculeDesignPool) mdpri_hash_map = {} hash_func = MoleculeDesignPool.make_member_hash new_mdpri_map = {} new_mds = self.return_value['molecule_designs'] for mdpri in self.registration_items: # By definition, any mdpri that contains one or more new designs # must be new. We must treat this as a special case because # building member hash values with the new designs does not work # reliably since they may not have been flushed yet. mds = [mdri.molecule_design for mdri in mdpri.molecule_design_registration_items] if any(md in new_mds for md in mds): # We use the *structure* as key for the new pools map here # as this is always available (unlike the design IDs, which # may not have been generated at this point). key = self.__make_new_mdpri_key(mdpri) new_mdpri_map.setdefault(key, []).append(mdpri) else: # For pools that consist only of existing designs, we build # a map with member hashes as keys so we can query with a # single DB call. hash_val = \ hash_func([mdri.molecule_design for mdri in mdpri.molecule_design_registration_items]) mdpri_hash_map.setdefault(hash_val, []).append(mdpri) if len(mdpri_hash_map) > 0: md_pool_agg.filter = cntd(member_hash=mdpri_hash_map.keys()) existing_mdp_map = dict([(mdp.member_hash, mdp) for mdp in md_pool_agg.iterator()]) # Update existing molecule design pool registration items. for hash_val, mdp in existing_mdp_map.iteritems(): mdpris = mdpri_hash_map[hash_val] for mdpri in mdpris: if not mdpri.molecule_design_pool is None \ and mdp.id != mdpri.molecule_design_pool.id: msg = 'The molecule design pool ID (%s) specified ' \ 'in the sample data does not match the ID ' \ 'of the pool that retrieved for the design ' \ 'structure information associated with it.' self.add_error(msg) continue mdpri.molecule_design_pool = mdp else: existing_mdp_map = {} # Determine non-existing molecule design pool registration items and # build up a map (this makes sure the same design is registered at # most once. new_mdp_hashes = \ set(mdpri_hash_map.keys()).difference(existing_mdp_map.keys()) for new_mdp_hash in new_mdp_hashes: mdpris = mdpri_hash_map[new_mdp_hash] for mdpri in mdpris: if not mdpri.molecule_design_pool is None: # This is a case where we supplied both design pool ID *and* # structure information in the data file and the two do not # match (i.e., the structures were not found). msg = 'The molecule design pool ID (%s) specified in the ' \ 'sample data does not match the design structure ' \ 'information associated with it.' self.add_error(msg) continue key = self.__make_new_mdpri_key(mdpri) new_mdpri_map.setdefault(key, []).append(mdpri) if len(new_mdpri_map) > 0: new_md_pools = [] for mdpris in new_mdpri_map.values(): # We use the first mdp registration item to create a # new pool and update all with the latter. md_pool = MoleculeDesignPool( set([mdri.molecule_design for mdri in mdpris[0].molecule_design_registration_items])) md_pool_agg.add(md_pool) new_md_pools.append(md_pool) for mdpri in mdpris: mdpri.molecule_design_pool = md_pool self.return_value['molecule_design_pools'] = new_md_pools