Source code for storq.vasp.getters

import os
import re
from pathlib import Path
from xml.etree import ElementTree

import numpy as np

import storq.vasp.readers as readers
from ase.calculators.calculator import Calculator
from storq.tools import uniquify
from storq.vasp.state_engine import StateEngine


[docs]class Getters:
[docs] def get_num_bands(self): """ Return the number of bands. NBANDS is read from an OUTCAR if it exists, otherwise the default value used by vasp is computed and returned. """ if self.outcar.is_file(): with open(self.outcar) as f: line = next((l for l in f if "NBANDS" in l), None) line = line.split("NBANDS=") nbands = int(line[1].strip()[0]) else: # this is the actual default VASP uses nelect = self.get_num_valence() natoms = len(self.atoms) nbands = int( max(round(nelect + 2) / 2 + max(natoms // 2, 3), int(0.6 * nelect)) ) if self.parameters.get("npar", None): nbands = ((nbands + npar - 1) // npar) * npar
return nbands
[docs] def get_ibz_k_points(self, cartesian=True): """Return the IBZ k-point list. Uses vasprun.xml and returns them in cartesian coordinates. set cartesian=False to get them in reciprocal coordinates. """ with StateEngine(self): with open(self.xml) as f: tree = ElementTree.parse(f) # each weight is in a <v>w</v> element in this varray kpts = np.array( [ [float(y) for y in x.text.split()] for x in tree.find("kpoints/varray[@name='kpointlist']") ] ) if cartesian: kpts = np.dot(kpts, np.linalg.inv(self.atoms.cell).T)
return kpts
[docs] def get_occupation_numbers(self, kpt=0, spin=0): """Read occupation_numbers for KPT and spin. Read from vasprun.xml. This may be fractional occupation. For non-spin-polarized calculations you may need to multiply by 2. Returns an np.array. """ with StateEngine(self): with open(self.xml) as f: tree = ElementTree.parse(f) path = "/".join( [ "calculation", "eigenvalues", "array", "set", "set[@comment='spin {}']".format(spin + 1), "set[@comment='kpoint {}']".format(kpt + 1), ] ) # these are all in elements like this. # <r> -3.8965 1.0000 </r>
return np.array([float(x.text.split()[1]) for x in tree.find(path)])
[docs] def get_k_point_weights(self): """Return the k-point weights.""" with StateEngine(self): with open(self.xml, "r") as f: tree = ElementTree.parse(f) # each weight is in a <v>w</v> element in this varray return np.array( [ float(x.text) for x in tree.find("kpoints/varray[@name='weights']") ]
)
[docs] def get_num_spins(self): """Returns number of spins. 1 if not spin-polarized 2 if spin-polarized """ if "ispin" in self.parameters: return self.parameters["ispin"] else:
return 1
[docs] def get_eigenvalues(self, kpt=0, spin=0): """Return array of eigenvalues for kpt and spin.""" with StateEngine(self): with open(self.xml) as f: tree = ElementTree.parse(f) path = "/".join( [ "calculation", "eigenvalues", "array", "set", "set[@comment='spin {}']", "set[@comment='kpoint {}']", ] ) path = path.format(spin + 1, kpt + 1) # Vasp seems to start at 1 not 0 fields = tree.find(path)
return np.array([float(x.text.split()[0]) for x in fields])
[docs] def get_fermi_level(self): """Method that reads Fermi energy from OUTCAR file""" with StateEngine(self): E_f = None for line in open(self.outcar, "r"): if line.rfind("E-fermi") > -1: E_f = float(line.split()[2])
return E_f
[docs] def get_ados(self, atom_index, orbital, spin=1, efermi=None): """Return Atom projected DOS for atom index, orbital and spin. Parameters ---------- atom_index : int pass orbital: string can be any of 's', 'p', 'd' spin : int pass efermi : float the fermi energy returns ------- energies : np.ndarray pass ados : np.ndarray pass """ with StateEngine(self): with open(self.xml) as f: tree = ElementTree.parse(f) path = "/".join( [ "calculation", "dos", "partial", "array", "set", 'set[@comment="ion {}"]', 'set[@comment="spin {}"]', "r", ] ) us = [k[1] for k in sorted([[j, i] for i, j in enumerate(self.resort)])] path = path.format(us.index(atom_index) + 1, spin) results = [[float(x) for x in el.text.split()] for el in tree.findall(path)] if efermi is None: efermi = self.get_fermi_level() else: efermi = 0.0 energy = np.array([x[0] for x in results]) - efermi ados = np.array([x["spd".index(orbital) + 1] for x in results])
return [energy, ados]
[docs] def get_elapsed_time(self): """Return elapsed calculation time in seconds from the OUTCAR file.""" import re with StateEngine(self): regexp = re.compile("Elapsed time \(sec\):\s*(?P<time>[0-9]*\.[0-9]*)") with open(self.xml) as f: lines = f.readlines() # fragile but fast. m = re.search(regexp, lines[-8]) time = m.groupdict().get("time", None) if time is not None: return float(time) else:
return None
[docs] def get_num_valence_potcar(self, filename=None): """Get the default number of valence electrons. Parameters ---------- filename : str name of the POTCAR file in which to look Returns ------- list number of electorns for each species in filename """ if filename is None: filename = self.potcar else: filename = Path(filename) if not filename.is_file(): self.write_input() nelect = [] lines = open(filename).readlines() for n, line in enumerate(lines): if line.find("TITEL") != -1: symbol = line.split("=")[1].split()[1].split("_")[0].strip() valence = float( lines[n + 4].split(";")[1].split("=")[1].split()[0].strip() ) nelect.append((symbol, valence))
return nelect
[docs] def get_num_valence(self): """Return the number of valence electrons for the atoms. Calculated from the POTCAR file. """ default_electrons = self.get_num_valence_potcar() d = {} for s, n in default_electrons: d[s] = n atoms = self.get_atoms() nelectrons = 0 for atom in atoms: nelectrons += d[atom.symbol]
return nelectrons
[docs] def get_volumetric_data(self, filename=None, **kwargs): """Read filename to read the volumetric data in it. Supported filenames are CHG, CHGCAR, and LOCPOT. """ from storq.vasp.charge_density import VaspChargeDensity with StateEngine(self): if filename is None: filename = self.directory.joinpath("CHG") else: filename = Path(filename) atoms = self.get_atoms() vd = VaspChargeDensity(filename) data = np.array(vd.chg) n0, n1, n2 = data[0].shape X, Y, Z = np.mgrid[ 0.0 : 1.0 : n0 * 1j, 0.0 : 1.0 : n1 * 1j, 0.0 : 1.0 : n2 * 1j ] C = np.column_stack([X.ravel(), Y.ravel(), Z.ravel()]) uc = atoms.get_cell() real = np.dot(C, uc) # now convert arrays back to unitcell shape x = np.reshape(real[:, 0], (n0, n1, n2)) y = np.reshape(real[:, 1], (n0, n1, n2)) z = np.reshape(real[:, 2], (n0, n1, n2))
return (x, y, z, data)
[docs] def get_charge_density(self, spin=0, filename=None): """Returns x, y, and z coordinate and charge density arrays. Supported file formats: CHG, CHGCAR Parameters ---------- spin : int pass Returns ------- x, y, z : np.ndarrays charge density arrays """ with StateEngine(self): if not self.parameters.get("lcharg", False): raise Exception("CHG was not written. Set lcharg=True") if filename is None: filename = self.directory.joinpath("CHG") else: filename = Path(filename) x, y, z, data = self.get_volumetric_data(filename=filename)
return x, y, z, data[spin]
[docs] def get_local_potential(self): """Returns x, y, z, and local potential arrays We multiply the data by the volume because we are reusing the charge density code which divides by volume. """ with StateEngine(self): x, y, z, data = self.get_volumetric_data( filename=self.directory.joinpath("LOCPOT") ) atoms = self.get_atoms()
return x, y, z, data[0] * atoms.get_volume()
[docs] def get_elf(self): """Returns x, y, z and electron localization function arrays.""" assert self.parameters.get("lelf", None) is True, "lelf is not set to True!" with StateEngine(self): fname = self.directory.joinpath("ELFCAR") x, y, z, data = self.get_volumetric_data(filename=fname) atoms = self.get_atoms()
return x, y, z, data[0] * atoms.get_volume()
[docs] def get_electron_density_center(self, spin=0, scaled=True): """Returns center of electron density. If scaled, use scaled coordinates, otherwise use cartesian coordinates. """ with StateEngine(self): atoms = self.get_atoms() x, y, z, cd = self.get_charge_density(spin) n0, n1, n2 = cd.shape nelements = n0 * n1 * n2 voxel_volume = atoms.get_volume() / nelements total_electron_charge = cd.sum() * voxel_volume electron_density_center = np.array( [(cd * x).sum(), (cd * y).sum(), (cd * z).sum()] ) electron_density_center *= voxel_volume electron_density_center /= total_electron_charge if scaled: uc = atoms.get_cell() return np.dot(np.linalg.inv(uc.T), electron_density_center.T).T else:
return electron_density_center
[docs] def get_dipole_vector(self): """Tries to return the dipole vector of the unit cell in atomic units. Returns None when CHG file is empty/not-present. """ from storq.vasp.readers import read_zval with StateEngine(self): try: x, y, z, cd = self.get_charge_density() except (IOError, IndexError): # IOError: no CHG file, function called outside context manager # IndexError: Empty CHG file, Vasp run with lcharg=False return None n0, n1, n2 = cd.shape nelements = n0 * n1 * n2 voxel_volume = self.atoms.get_volume() / nelements total_electron_charge = -(cd.sum()) * voxel_volume electron_density_center = np.array( [(cd * x).sum(), (cd * y).sum(), (cd * z).sum()] ) electron_density_center *= voxel_volume electron_density_center /= total_electron_charge electron_dipole_moment = electron_density_center * total_electron_charge electron_dipole_moment *= -1.0 # make dictionary for ease of use potcars = self.get_potcars() zval = {} for symbol, path in potcars.items(): zval[symbol] = read_zval(path) ion_charge_center = np.array([0.0, 0.0, 0.0]) total_ion_charge = 0.0 for atom in self.atoms: Z = zval[atom.symbol] total_ion_charge += Z pos = atom.position ion_charge_center += Z * pos ion_charge_center /= total_ion_charge ion_dipole_moment = ion_charge_center * total_ion_charge dipole_vector = ion_dipole_moment + electron_dipole_moment
return dipole_vector
[docs] def get_dipole_moment(self, atoms=None): """Return dipole_moment. dipole_moment = ((dipole_vector**2).sum())**0.5/Debye """ from ase.units import Debye with StateEngine(self): dv = self.get_dipole_vector(atoms)
return ((dv ** 2).sum()) ** 0.5 / Debye
[docs] def get_potcars(self): potpaw_dir = Path(self.conf["vasp_potentials"]).joinpath( f"potpaw_{self.parameters['pp']}" ) symbols_uniq = uniquify( np.asarray(self.atoms.get_chemical_symbols())[self.isort] ) setups = self.parameters.get("setups", {}) potcars = {} for symb in symbols_uniq: suffix = setups.get(symb, "") potcars[symb] = potpaw_dir.joinpath(symb + suffix).joinpath("POTCAR")
return potcars
[docs] def get_memory(self): """ Retrieves the recommended memory from the OUTCAR in GB. """ with open(self.outcar) as f: for line in f: if "memory" in line: required_mem = ( float(line.split()[-2]) / 1e6 ) # memory estimate in GB
return required_mem
[docs] def get_orbital_occupations(self): """Read occuations from OUTCAR. Returns a numpy array of [[s, p, d tot]] for each atom. You probably need to have used LORBIT=11 for this function to work. """ with StateEngine(self): # this finds the last entry of occupations. Sometimes, this is # printed multiple times in the OUTCAR. with open(self.outcar, "r") as f: lines = f.readlines() start = None for i, line in enumerate(lines): if line.startswith(" total charge "): start = i if not i: raise Exception("Occupations not found") atoms = self.get_atoms() occupations = [] for j in range(len(atoms)): line = lines[start + 4 + j] fields = line.split() s, p, d, tot = [float(x) for x in fields[1:]] occupations.append(np.array((s, p, d, tot)))
return np.array(occupations)
[docs] def get_num_ionic_steps(self): """Get the number of ionic steps from the OUTCAR file. Returns ------- int The number of ionic steps. """ with StateEngine(self):
return readers.read_num_ionic_steps(self.outcar) def get_composition(self, basis=None): """ Acquire the chemical composition of an atoms object Parameters ---------- basis: string allows the user to define the basis element for determining the composition. If a basis element is specified, the composition of that element is returned. Returns ------- dict atoms and their compositions dictionary sorted by atomic number """ atoms = self.get_atoms() symbols = atoms.get_chemical_symbols() # Collect compositions S = {} for symbol in set(symbols): S[symbol] = float(symbols.count(symbol)) / len(atoms) if basis: if basis in S.keys(): return S[basis] else: return 0.0 else: return S
[docs] def get_homo_lumo(self): """ Calculate VBM and CBM.""" with StateEngine(self): homo = -1.0e15 lumo = 1.0e15 kpts = self.get_ibz_k_points() nkpts = len(kpts) for ikpt in range(nkpts): for energy, occupation in zip( self.get_eigenvalues(ikpt), self.get_occupation_numbers(ikpt) ): if occupation >= 1.0 and energy > homo: homo = energy if occupation < 1.0 and energy < lumo: lumo = energy
return homo, lumo
[docs] def get_composition(self, basis=None): """ Acquire the chemical composition of an atoms object Returns ------- dict atoms and their compositions dictionary sorted by atomic number Parameters ---------- basis: string allows the user to define the basis element for determining the composition. If a basis element is specified, the composition of that element is returned. """ atoms = self.atoms symbols = atoms.get_chemical_symbols() # Collect compositions S = {} for symbol in set(symbols): S[symbol] = float(symbols.count(symbol)) / len(atoms) if basis: if basis in S.keys(): return S[basis] else: return 0.0 else:
return S
[docs] def get_charges(self, atoms=None): """ Returns a list of cached charges from a previous call to bader(). Useful for storing the charges to a database. """ if atoms is None: atoms = self.atoms if hasattr(self, "_calculated_charges"): return self._calculated_charges else:
return None
[docs] def get_program_info(self): """Return data about the vasp that was used for the calculation.""" info = {} with open(self.xml) as f: tree = ElementTree.parse(f) info["program"] = tree.find("generator/i[@name='program']").text info["version"] = tree.find("generator/i[@name='version']").text info["subversion"] = tree.find("generator/i[@name='subversion']").text info["rundate"] = tree.find("generator/i[@name='date']").text info["runtime"] = tree.find("generator/i[@name='time']").text
return info def _get_xml_parameter(par): """An auxillary function that enables convenient extraction of parameter values from a vasprun.xml file with proper type handling. """ def to_bool(b): if b == "T": return True else: return False to_type = {"int": int, "logical": to_bool, "string": str, "float": float} text = par.text if text is None: text = "" # Float parameters do not have a 'type' attrib var_type = to_type[par.attrib.get("type", "float")] if par.tag == "v": return map(var_type, text.split()) else: return var_type(text.strip())
[docs] def get_density_of_states(self, index=-1): """Parse vasprun.xml file and extract the density of states. Parameters ---------- f : fileobj input file index : int index of frame(s) to return Returns ------- generator dictionary, each entry of corresponds to another spin channel and comprises a tuple of two lists (energies and density of states) """ with StateEngine(self): import xml.etree.ElementTree as ET tree = ET.iterparse(self.xml, events=["start", "end"]) calculation = [] try: for event, elem in tree: if event == "start" and elem.tag == "calculation": calculation.append(elem) except ET.ParseError as parse_error: if calculation[-1].find("energy") is None: calculation = calculation[:-1] if not calculation: return None if calculation: if isinstance(index, int): steps = [calculation[index]] else: steps = calculation[index] else: steps = [] for step in steps: spinblocks = step.findall("dos/total/array/set/set") if spinblocks is not None: dos = {} for i, spb in enumerate(spinblocks): values = spb.findall("r") energies = np.zeros(len(values)) densities = np.zeros(len(values)) for j, val in enumerate(values): val = val.text.split() energies[j] = float(val[0]) densities[j] = float(val[1]) if len(spinblocks) == 1: densities *= 2.0 tag = spb.attrib["comment"] dos[tag] = (energies, densities) return dos else:
return None
[docs] def get_core_levels(self): """Parse an OUTCAR file and extract either the average (electrostatic) potential at core or the core state eigenenergies. Parameters ---------- f : fileobj input file Returns ------- list the list is either comprised of floats representing the average electrostatic vasp_potentials at the ionic cores or dictionaries containing the Kohn-Sham eigen energies of the core states; if neither information is found the method returns a None value """ with StateEngine(self): core_levels = None core_vasp_potentials = None with open(self.xml, "r") as f: data = f.readlines() for n, line in enumerate(data): if "average (electrostatic) potential at core" in line: core_vasp_potentials = [] nl = 1 while len(data[n + 2 + nl]) > 0: block = data[n + 2 + nl].replace("-", " -") block = np.array(block.split()).reshape(-1, 2) if len(block) < 1: break for i, pot in block: core_vasp_potentials.append(float(pot)) nl += 1 return core_vasp_potentials if "the core state eigenenergies are" in line: core_levels_atom = {} core_levels = [] nl = -1 nat = 0 while True: if "E-fermi" in line: break if len(data[n + 2 + nl].split()) % 2 == 1: if len(core_levels_atom) > 0: nat += 1 core_levels.append(core_levels_atom) core_levels_atom = {} off = 1 else: off = 0 block = np.array(data[n + 2 + nl].split()[off:]).reshape(-1, 2) if len(block) < 1 and "E-fermi" in data[n + 2 + nl + 1]: break for el in block: core_levels_atom[el[0]] = float(el[1]) nl += 1 core_levels.append(core_levels_atom) return core_levels
return None
[docs] def get_elastic_moduli(self): """Returns the total elastic moduli in GPa. (i.e. the rigid ion and contributions from relaxation) from the OUTCAR file. you must run with IBRION=6 and ISIF>= 3 for this output to exist. There are also contributions from ionic relaxation ELASTIC MODULI CONTR FROM IONIC RELAXATION (kBar) and the rigid moduli SYMMETRIZED ELASTIC MODULI (kBar) For now these are not returned. """ assert self.parameters.get("ibrion", 0) == 6 assert self.parameters.get("isif", 0) >= 3 with StateEngine(self): with open(self.outcar) as f: lines = f.readlines() TEM = [] for i, line in enumerate(lines): if line.startswith(" TOTAL ELASTIC MODULI (kBar)"): j = i + 3 data = lines[j : j + 6] break for line in data: # each line looks like this: # XX 2803.5081 1622.6085 1622.6085 0.0000 0.0000 0.0000 TEM += [[float(x) for x in line.split()[1:]]]
return np.array(TEM) * 0.1 # (convert kbar to GPa)
[docs] def get_linear_response_properties(self): """Parse an OUTCAR file and extract properties that are obtained from linear response calculations. This includes elastic and dielectric tensors, Born effective charges as well as normal vasp_modes. Parameters ---------- f : fileobj input file Returns ------- dictionary The properties are returned in the form of a dictionary, which contains entries for all properties that were succesfully parsed from the input file. """ with StateEngine(self): properties = {} with open(self.xml, "r") as f: data = f.readlines() for n, line in enumerate(data): if "NIONS" in line: natoms = int(line.split()[-1]) if "TOTAL ELASTIC MODULI" in line: c = np.zeros((6, 6)) for i in range(6): for j in range(6): c[i][j] = float(data[n + 3 + i].split()[1 + j]) properties["elastic-constants"] = c if "THz" in line and "2PiTHz" in line: keystr = "phonon-frequencies" if keystr not in properties: properties[keystr] = [] flds = line.split() vasp_mode = {} properties[keystr].append(vasp_mode) if flds[1] == "f": vasp_mode["frequency"] = float(flds[3]) elif flds[1] == "f/i=": vasp_mode["frequency"] = -float(flds[2]) vasp_mode["positions"] = [] vasp_mode["eigenvector"] = [] for iatom in range(natoms): flds = data[n + iatom + 2].replace("-", " -").split() vasp_mode["positions"].append( np.array([float(flds[0]), float(flds[1]), float(flds[2])]) ) vasp_mode["eigenvector"].append( np.array([float(flds[3]), float(flds[4]), float(flds[5])]) ) if ( "MACROSCOPIC STATIC DIELECTRIC TENSOR" in line and "(including local field effects in DFT)" in line ): keystr = "dielectric-tensor-electronic" if keystr in properties: continue dielec = [] for i in range(3): flds = data[n + 2 + i].split() row = [] for j in range(3): row.append(float(flds[j])) dielec.append(row) properties[keystr] = dielec if "MACROSCOPIC STATIC DIELECTRIC TENSOR IONIC CONTRIBUTION" in line: keystr = "dielectric-tensor-ionic" if keystr in properties: continue dielec = [] for i in range(3): flds = data[n + 2 + i].split() row = [] for j in range(3): row.append(float(flds[j])) dielec.append(row) properties[keystr] = dielec if "BORN EFFECTIVE CHARGES (in e, cummulative output)" in line: keystr = "born-effective-charges" if keystr in properties: continue charges = [] k = 0 while "ion" in data[n + k * 4 + 2]: tensor = [] for i in range(3): flds = data[n + k * 4 + 3 + i].split() row = [] for j in range(3): row.append(float(flds[1 + j])) tensor.append(row) charges.append(tensor) k += 1 properties[keystr] = charges if "REAL DIELECTRIC FUNCTION" in line: keystr = "dielectric-tensor-electronic-optics-real" if keystr in properties: continue dielec = [] k = 0 while True: k += 1 flds = data[n + 2 + k].split() if len(flds) == 0: break if len(flds) != 7: continue row = [] for val in flds: row.append(float(val)) dielec.append(row) properties[keystr] = dielec if "IMAGINARY DIELECTRIC FUNCTION" in line: keystr = "dielectric-tensor-electronic-optics-imag" if keystr in properties: continue dielec = [] k = 0 while True: k += 1 flds = data[n + 2 + k].split() if len(flds) == 0: break if len(flds) != 7: continue row = [] for val in flds: row.append(float(val)) dielec.append(row) properties[keystr] = dielec
return properties