def test_pad_positions(null_position_structure): """Make sure None values in cartesian_site_positions are converted to padding float value""" positions, padded_position = pad_positions( null_position_structure.attributes.cartesian_site_positions) assert not any(value is None for vector in positions for value in vector) assert padded_position positions, padded_position = pad_positions(positions) assert not any(value is None for vector in positions for value in vector) assert not padded_position
def get_jarvis_atoms(optimade_structure: OptimadeStructure) -> Atoms: """ Get jarvis Atoms from OPTIMADE structure NOTE: Cannot handle partial occupancies :param optimade_structure: OPTIMADE structure :return: jarvis.core.Atoms """ if globals().get("Atoms", None) is None: warn(JARVIS_NOT_FOUND) return None attributes = optimade_structure.attributes # Cannot handle partial occupancies if "disorder" in attributes.structure_features: raise ConversionError( "jarvis-tools cannot handle structures with partial occupancies." ) cartesian_site_positions, _ = pad_positions(attributes.cartesian_site_positions) return Atoms( lattice_mat=attributes.lattice_vectors, elements=[specie.name for specie in attributes.species], coords=cartesian_site_positions, cartesian=True, )
def get_aiida_structure_data( optimade_structure: OptimadeStructure) -> StructureData: """ Get AiiDA StructureData from OPTIMADE structure :param optimade_structure: OPTIMADE structure :return: StructureData """ if globals().get("StructureData", None) is None: warn(AIIDA_NOT_FOUND) return None attributes = optimade_structure.attributes # Convert null/None values to float("nan") lattice_vectors, adjust_cell = pad_cell(attributes.lattice_vectors) structure = StructureData(cell=lattice_vectors) # Add Kinds for kind in attributes.species: symbols = [] concentration = [] for index, chemical_symbol in enumerate(kind.chemical_symbols): # NOTE: The non-chemical element identifier "X" is identical to how AiiDA handles this, # so it will be treated the same as any other true chemical identifier. if chemical_symbol == "vacancy": # Skip. This is how AiiDA handles vacancies; # to not include them, while keeping the concentration in a site less than 1. continue else: symbols.append(chemical_symbol) concentration.append(kind.concentration[index]) # AiiDA needs a definition for the mass, and for it to be > 0 # mass is OPTIONAL for OPTIMADE structures mass = kind.mass if kind.mass else 1 structure.append_kind( Kind(symbols=symbols, weights=concentration, mass=mass, name=kind.name)) # Convert null/None values to float("nan") cartesian_site_positions, _ = pad_positions( attributes.cartesian_site_positions) # Add Sites for index in range(attributes.nsites): # range() to ensure 1-to-1 between kind and site structure.append_site( Site( kind_name=attributes.species_at_sites[index], position=cartesian_site_positions[index], )) if adjust_cell: structure._adjust_default_cell( pbc=[bool(dim.value) for dim in attributes.dimension_types]) return structure
def _get_molecule(optimade_structure: OptimadeStructure) -> Molecule: """Create pymatgen Molecule from OPTIMADE structure""" attributes = optimade_structure.attributes cartesian_site_positions, _ = pad_positions( attributes.cartesian_site_positions) return Molecule( species=_pymatgen_species( nsites=attributes.nsites, species=attributes.species, species_at_sites=attributes.species_at_sites, ), coords=cartesian_site_positions, )
def _get_structure(optimade_structure: OptimadeStructure) -> Structure: """Create pymatgen Structure from OPTIMADE structure""" attributes = optimade_structure.attributes cartesian_site_positions, _ = pad_positions( attributes.cartesian_site_positions) return Structure( lattice=attributes.lattice_vectors, species=_pymatgen_species( nsites=attributes.nsites, species=attributes.species, species_at_sites=attributes.species_at_sites, ), coords=cartesian_site_positions, coords_are_cartesian=True, )
def get_pdbx_mmcif( # pylint: disable=too-many-locals optimade_structure: OptimadeStructure, ) -> str: # pragma: no cover """ Write Protein Data Bank (PDB) structure in the PDBx/mmCIF format from OPTIMADE structure Inspired by `ase.io.proteindatabank:write_proteindatabank()` in the ASE package, as well as `ase.io.cif:write_cif()`. :param optimade_structure: OPTIMADE structure :return: str """ if globals().get("np", None) is None: warn(NUMPY_NOT_FOUND) return None cif = """# # Created from an OPTIMADE structure. # # See https://www.optimade.org and/or # https://github.com/Materials-Consortia/OPTIMADE for more information. # # CIF 2.0 format, specifically mmCIF (PDBx). # See http://mmcif.wwpdb.org for more information. # """ entry_id = f"{optimade_structure.type}{optimade_structure.id}" cif += f"data_{entry_id}\n_entry.id {entry_id}\n#\n" attributes = optimade_structure.attributes # Do this only if there's three non-zero lattice vectors if all(attributes.dimension_types): a_vector, b_vector, c_vector, alpha, beta, gamma = cell_to_cellpar( attributes.lattice_vectors) cif += (f"_cell.entry_id {entry_id}\n" f"_cell.length_a {a_vector:g}\n" f"_cell.length_b {b_vector:g}\n" f"_cell.length_c {c_vector:g}\n" f"_cell.angle_alpha {alpha:g}\n" f"_cell.angle_beta {beta:g}\n" f"_cell.angle_gamma {gamma:g}\n" "_cell.Z_PDB 1\n#\n") cif += (f"_symmetry.entry_id {entry_id}\n" "_symmetry.space_group_name_H-M 'P 1'\n" "_symmetry.Int_Tables_number 1\n#\n") # Since some structure viewers are having issues with cartesian coordinates, # we calculate the fractional coordinates if this is a 3D structure and we have all the necessary information. if not hasattr(attributes, "fractional_site_positions"): sites, _ = pad_positions(attributes.cartesian_site_positions) attributes.fractional_site_positions = fractional_coordinates( cell=attributes.lattice_vectors, cartesian_positions=sites) # TODO: The following lines are perhaps needed to create a "valid" PDBx/mmCIF file. # However, at the same time, the information here is "default" and will for all structures "at this moment in time" # be the same. I.e., no information is gained by adding this now. # If it is found that they indeed are needed to create a "valid" PDBx/mmCIF file, they should be included in the output. # cif += ( # "loop_\n" # "_struct_asym.id\n" # "_struct_asym.entity_id\n" # "A 1\n#\n" # At this point, not using this feature. # ) # cif += ( # "loop_\n" # "_chem_comp.id\n" # "X\n#\n" # At this point, not using this feature. # ) # cif += ( # "loop_\n" # "_entity.id\n" # "1\n#\n" # At this point, not using this feature. # ) # NOTE: This is otherwise a bit ahead of its time, since this OPTIMADE property is part of an open PR. # See https://github.com/Materials-Consortia/OPTIMADE/pull/206 coord_type = ("fract" if hasattr(attributes, "fractional_site_positions") else "Cartn") cif += ( "loop_\n" "_atom_site.group_PDB\n" # Always "ATOM" "_atom_site.id\n" # number (1-counting) "_atom_site.type_symbol\n" # species.chemical_symbols "_atom_site.label_atom_id\n" # species.checmical_symbols symbol + number # For these next keys, see the comment above. # "_atom_site.label_asym_id\n" # Will be set to "A" _struct_asym.id above # "_atom_site.label_comp_id\n" # Will be set to "X" _chem_comp.id above # "_atom_site.label_entity_id\n" # Will be set to "1" _entity.id above # "_atom_site.label_seq_id\n" "_atom_site.occupancy\n" # species.concentration f"_atom_site.{coord_type}_x\n" # cartesian_site_positions f"_atom_site.{coord_type}_y\n" # cartesian_site_positions f"_atom_site.{coord_type}_z\n" # cartesian_site_positions "_atom_site.thermal_displace_type\n" # Set to 'Biso' "_atom_site.B_iso_or_equiv\n" # Set to 1.0:f ) if coord_type == "fract": sites, _ = pad_positions(attributes.fractional_site_positions) else: sites, _ = pad_positions(attributes.cartesian_site_positions) species: Dict[str, OptimadeStructureSpecies] = { species.name: species for species in attributes.species } for site_number in range(attributes.nsites): species_name = attributes.species_at_sites[site_number] site = sites[site_number] current_species = species[species_name] for index, symbol in enumerate(current_species.chemical_symbols): if symbol == "vacancy": continue label = f"{species_name.upper()}{site_number + 1}" if len(current_species.chemical_symbols) > 1: if ("vacancy" in current_species.chemical_symbols and len(current_species.chemical_symbols) == 2): pass else: label = f"{symbol.upper()}{index + 1}" cif += ( f"ATOM {site_number + 1:5d} {symbol} {label:8} " f"{current_species.concentration[index]:6.4f} {site[0]:8.5f} " f"{site[1]:8.5f} {site[2]:8.5f} {'Biso':4} {'1.000':6}\n") return cif
def get_pdb( # pylint: disable=too-many-locals optimade_structure: OptimadeStructure, ) -> str: """ Write Protein Data Bank (PDB) structure in the old PDB format from OPTIMADE structure Inspired by `ase.io.proteindatabank.write_proteindatabank()` in the ASE package. :param optimade_structure: OPTIMADE structure :return: str """ if globals().get("np", None) is None: warn(NUMPY_NOT_FOUND) return None pdb = "" attributes = optimade_structure.attributes rotation = None if all(attributes.dimension_types): currentcell = np.asarray(attributes.lattice_vectors) cellpar = cell_to_cellpar(currentcell) exportedcell = cellpar_to_cell(cellpar) rotation = np.linalg.solve(currentcell, exportedcell) # Setting Z-value = 1 and using P1 since we have all atoms defined explicitly Z = 1 spacegroup = "P 1" pdb += ( f"CRYST1{cellpar[0]:9.3f}{cellpar[1]:9.3f}{cellpar[2]:8.3f}" f"{cellpar[3]:7.2f}{cellpar[4]:7.2f}{cellpar[5]:7.2f} {spacegroup:11s}{Z:4d}\n" ) for i, vector in enumerate(scaled_cell(currentcell)): pdb += f"SCALE{i + 1} {vector[0]:10.6f}{vector[1]:10.6f}{vector[2]:10.6f} {0:10.5f}\n" # There is a limit of 5 digit numbers in this field. pdb_maxnum = 100000 bfactor = 1.0 pdb += "MODEL 1\n" species: Dict[str, OptimadeStructureSpecies] = { species.name: species for species in attributes.species } cartesian_site_positions, _ = pad_positions( attributes.cartesian_site_positions) sites = np.asarray(cartesian_site_positions) if rotation is not None: sites = sites.dot(rotation) for site_number in range(attributes.nsites): species_name = attributes.species_at_sites[site_number] site = sites[site_number] current_species = species[species_name] for index, symbol in enumerate(current_species.chemical_symbols): if symbol == "vacancy": continue label = species_name if len(current_species.chemical_symbols) > 1: if ("vacancy" in current_species.chemical_symbols and len(current_species.chemical_symbols) == 2): pass else: label = f"{symbol}{index + 1}" pdb += ( f"ATOM {site_number % pdb_maxnum:5d} {label:4} MOL 1 " f"{site[0]:8.3f}{site[1]:8.3f}{site[2]:8.3f}" f"{current_species.concentration[index]:6.2f}" f"{bfactor:6.2f} {symbol.upper():2} \n") pdb += "ENDMDL\n" return pdb
def get_cif( # pylint: disable=too-many-locals,too-many-branches optimade_structure: OptimadeStructure, ) -> str: """ Get CIF file as string from OPTIMADE structure Based on `ase.io.cif:write_cif()`. :param optimade_structure: OPTIMADE structure :param formatting: What formatting to use for the CIF file data keys. Can be either "mp" or "default". :param encoding: Encoding used for the string. CIF files use "latin-1" as standard. If encoding is "str", a Python str object will be returned. :return: str """ # NumPy is needed for calculations if globals().get("np", None) is None: warn(NUMPY_NOT_FOUND) return None cif = """# # Created from an OPTIMADE structure. # # See https://www.optimade.org and/or # https://github.com/Materials-Consortia/OPTIMADE for more information. # """ cif += f"data_{optimade_structure.id}\n\n" attributes = optimade_structure.attributes # Do this only if there's three non-zero lattice vectors # NOTE: This also negates handling of lattice_vectors with null/None values if all(attributes.dimension_types): a_vector, b_vector, c_vector, alpha, beta, gamma = cell_to_cellpar( attributes.lattice_vectors ) cif += ( f"_cell_length_a {a_vector:g}\n" f"_cell_length_b {b_vector:g}\n" f"_cell_length_c {c_vector:g}\n" f"_cell_angle_alpha {alpha:g}\n" f"_cell_angle_beta {beta:g}\n" f"_cell_angle_gamma {gamma:g}\n\n" ) cif += ( "_symmetry_space_group_name_H-M 'P 1'\n" "_symmetry_int_tables_number 1\n\n" "loop_\n" " _symmetry_equiv_pos_as_xyz\n" " 'x, y, z'\n\n" ) # Since some structure viewers are having issues with cartesian coordinates, # we calculate the fractional coordinates if this is a 3D structure and we have all the necessary information. if not hasattr(attributes, "fractional_site_positions"): sites, _ = pad_positions(attributes.cartesian_site_positions) attributes.fractional_site_positions = fractional_coordinates( cell=attributes.lattice_vectors, cartesian_positions=sites ) # NOTE: This is otherwise a bit ahead of its time, since this OPTIMADE property is part of an open PR. # See https://github.com/Materials-Consortia/OPTIMADE/pull/206 coord_type = ( "fract" if hasattr(attributes, "fractional_site_positions") else "Cartn" ) cif += ( "loop_\n" " _atom_site_type_symbol\n" # species.chemical_symbols " _atom_site_label\n" # species.name + unique int " _atom_site_occupancy\n" # species.concentration f" _atom_site_{coord_type}_x\n" # cartesian_site_positions f" _atom_site_{coord_type}_y\n" # cartesian_site_positions f" _atom_site_{coord_type}_z\n" # cartesian_site_positions " _atom_site_thermal_displace_type\n" # Set to 'Biso' " _atom_site_B_iso_or_equiv\n" # Set to 1.0:f ) if coord_type == "fract": sites, _ = pad_positions(attributes.fractional_site_positions) else: sites, _ = pad_positions(attributes.cartesian_site_positions) species: Dict[str, OptimadeStructureSpecies] = { species.name: species for species in attributes.species } symbol_occurences = {} for site_number in range(attributes.nsites): species_name = attributes.species_at_sites[site_number] site = sites[site_number] current_species = species[species_name] for index, symbol in enumerate(current_species.chemical_symbols): if symbol == "vacancy": continue if symbol in symbol_occurences: symbol_occurences[symbol] += 1 else: symbol_occurences[symbol] = 1 label = f"{symbol}{symbol_occurences[symbol]}" cif += ( f" {symbol} {label} {current_species.concentration[index]:6.4f} {site[0]:8.5f} " f"{site[1]:8.5f} {site[2]:8.5f} {'Biso':4} {'1.000':6}\n" ) return cif