def test_from_dict(self): d = json.load(open(os.path.join(test_dir, "transformations.json"), "r")) d["other_parameters"] = {"tags": ["test"]} ts = TransformedStructure.from_dict(d) ts.other_parameters["author"] = "Will" ts.append_transformation(SubstitutionTransformation({"Fe": "Mn"})) self.assertEqual("MnPO4", ts.final_structure.composition.reduced_formula) self.assertEqual(ts.other_parameters, {"author": "Will", "tags": ["test"]})
def test_from_dict(self): d = json.load(open(os.path.join(PymatgenTest.TEST_FILES_DIR, "transformations.json"), "r")) d["other_parameters"] = {"tags": ["test"]} ts = TransformedStructure.from_dict(d) ts.other_parameters["author"] = "Will" ts.append_transformation(SubstitutionTransformation({"Fe": "Mn"})) self.assertEqual("MnPO4", ts.final_structure.composition.reduced_formula) self.assertEqual(ts.other_parameters, {"author": "Will", "tags": ["test"]})
def test_from_dict(self): d = json.load(open(os.path.join(test_dir, 'transformations.json'), 'r')) d['other_parameters'] = {'tags': ['test']} ts = TransformedStructure.from_dict(d) ts.set_parameter('author', 'Will') ts.append_transformation(SubstitutionTransformation({"Fe":"Mn"})) self.assertEqual("MnPO4", ts.final_structure.composition.reduced_formula) self.assertEqual(ts.other_parameters, {'author': 'Will', 'tags': ['test']})
def test_from_dict(self): d = json.load(open(os.path.join(test_dir, 'transformations.json'), 'r')) d['other_parameters'] = {'tags': ['test']} ts = TransformedStructure.from_dict(d) ts.other_parameters['author'] = 'Will' ts.append_transformation(SubstitutionTransformation({"Fe": "Mn"})) self.assertEqual("MnPO4", ts.final_structure.composition.reduced_formula) self.assertEqual(ts.other_parameters, {'author': 'Will', 'tags': ['test']})
def prep(ctx, archive, authors): """prep structures from an archive for submission""" run = ctx.obj["RUN"] collections = ctx.obj["COLLECTIONS"] snl_collection = ctx.obj["CLIENT"].db.snls handler = ctx.obj["MONGO_HANDLER"] nmax = ctx.obj["NMAX"] skip = ctx.obj["SKIP"] # TODO no_dupe_check flag fname, ext = os.path.splitext(os.path.basename(archive)) tag, sec_ext = fname.rsplit(".", 1) if "." in fname else [fname, ""] logger.info(click.style(f"tag: {tag}", fg="cyan")) if sec_ext: ext = "".join([sec_ext, ext]) exts = ["tar.gz", ".tgz", "bson.gz", ".zip"] if ext not in exts: raise EmmetCliError( f"{ext} not supported (yet)! Please use one of {exts}.") meta = {"authors": [Author.parse_author(a) for a in authors]} references = meta.get("references", "").strip() source_ids_scanned = handler.collection.distinct("source_id", {"tags": tag}) # TODO add archive of StructureNL files input_structures, source_total = [], None if ext == "bson.gz": input_bson = gzip.open(archive) source_total = count_file_documents(input_bson) for doc in bson.decode_file_iter(input_bson): if len(input_structures) >= nmax: break if skip and doc["db_id"] in source_ids_scanned: continue elements = set([ specie["element"] for site in doc["structure"]["sites"] for specie in site["species"] ]) for l in SETTINGS.skip_labels: if l in elements: logger.log( logging.ERROR if run else logging.INFO, f'Skip structure {doc["db_id"]}: unsupported element {l}!', extra={ "tags": [tag], "source_id": doc["db_id"] }, ) break else: s = TransformedStructure.from_dict(doc["structure"]) s.source_id = doc["db_id"] input_structures.append(s) elif ext == ".zip": input_zip = ZipFile(archive) namelist = input_zip.namelist() source_total = len(namelist) for fname in namelist: if len(input_structures) >= nmax: break if skip and fname in source_ids_scanned: continue contents = input_zip.read(fname) fmt = get_format(fname) s = Structure.from_str(contents, fmt=fmt) s.source_id = fname input_structures.append(s) else: tar = tarfile.open(archive, "r:gz") members = tar.getmembers() source_total = len(members) for member in members: if os.path.basename(member.name).startswith("."): continue if len(input_structures) >= nmax: break fname = member.name.lower() if skip and fname in source_ids_scanned: continue f = tar.extractfile(member) if f: contents = f.read().decode("utf-8") fmt = get_format(fname) s = Structure.from_str(contents, fmt=fmt) s.source_id = fname input_structures.append(s) total = len(input_structures) logger.info( f"{total} of {source_total} structure(s) loaded " f"({len(source_ids_scanned)} unique structures already scanned).") save_logs(ctx) snls, index = [], None for istruct in input_structures: # number of log messages equals number of structures processed if --run # only logger.warning goes to DB if --run if run and len(handler.buffer) >= handler.buffer_size: insert_snls(ctx, snls) struct = (istruct.final_structure if isinstance( istruct, TransformedStructure) else istruct) struct.remove_oxidation_states() struct = struct.get_primitive_structure() formula = struct.composition.reduced_formula sg = get_sg(struct) if not (struct.is_ordered and struct.is_valid()): logger.log( logging.WARNING if run else logging.INFO, f"Skip structure {istruct.source_id}: disordered or invalid!", extra={ "formula": formula, "spacegroup": sg, "tags": [tag], "source_id": istruct.source_id, }, ) continue for full_name, coll in collections.items(): # load canonical structures in collection for current formula and # duplicate-check them against current structure load_canonical_structures(ctx, full_name, formula) for canonical_structure in canonical_structures[full_name][ formula].get(sg, []): if structures_match(struct, canonical_structure): logger.log( logging.WARNING if run else logging.INFO, f"Duplicate for {istruct.source_id} ({formula}/{sg}): {canonical_structure.id}", extra={ "formula": formula, "spacegroup": sg, "tags": [tag], "source_id": istruct.source_id, "duplicate_dbname": full_name, "duplicate_id": canonical_structure.id, }, ) break else: continue # no duplicate found -> continue to next collection break # duplicate found else: # no duplicates in any collection prefix = snl_collection.database.name if index is None: # get start index for SNL id snl_ids = snl_collection.distinct("snl_id") index = max( [int(snl_id[len(prefix) + 1:]) for snl_id in snl_ids]) index += 1 snl_id = "{}-{}".format(prefix, index) kwargs = {"references": references, "projects": [tag]} if isinstance(istruct, TransformedStructure): snl = istruct.to_snl(meta["authors"], **kwargs) else: snl = StructureNL(istruct, meta["authors"], **kwargs) snl_dct = snl.as_dict() snl_dct.update(get_meta_from_structure(struct)) snl_dct["snl_id"] = snl_id snls.append(snl_dct) logger.log( logging.WARNING if run else logging.INFO, f"SNL {snl_id} created for {istruct.source_id} ({formula}/{sg})", extra={ "formula": formula, "spacegroup": sg, "tags": [tag], "source_id": istruct.source_id, }, ) # final save if run: insert_snls(ctx, snls)
excluded_bonding_elements = args.exclude_bonding[0].split(',') if args.exclude_bonding else [] file_format = args.format filename = args.input_file[0] s = None if filename.endswith(".cif"): file_format = "cif" elif filename.startswith("POSCAR"): file_format = "poscar" elif re.search('\.json', filename): file_format = 'mpjson' if file_format == 'poscar': p = Poscar.from_file(filename) s = p.struct elif file_format == 'cif': r = CifParser(filename) s = r.get_structures(False)[0] else: d = json.load(file_open_zip_aware(filename)) ts = TransformedStructure.from_dict(d) s = ts.final_structure if s: vis = StructureVis(excluded_bonding_elements=excluded_bonding_elements) vis.set_structure(s) vis.show()