Source code for galore.formats

###############################################################################
#                                                                             #
# GALORE: Gaussian and Lorentzian broadening for simulated spectra            #
#                                                                             #
# Developed by Adam J. Jackson (2016) at University College London            #
#                                                                             #
###############################################################################
#                                                                             #
# This file is part of Galore. Galore is free software: you can redistribute  #
# it and/or modify it under the terms of the GNU General Public License as    #
# published by the Free Software Foundation, either version 3 of the License, #
# or (at your option) any later version.  This program is distributed in the  #
# hope that it will be useful, but WITHOUT ANY WARRANTY; without even the     #
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.    #
# See the GNU General Public License for more details.  You should have       #
# received a copy of the GNU General Public License along with this program.  #
# If not, see <http://www.gnu.org/licenses/>.                                 #
#                                                                             #
###############################################################################
import os
import csv
import re
import sys
from collections import OrderedDict
import numpy as np


[docs]def is_gpw(filename): """Determine whether file is GPAW calculation by checking extension""" return filename.split('.')[-1] == 'gpw'
[docs]def is_doscar(filename): """Determine whether file is a DOSCAR by checking fourth line""" # This doesn't break when the file is 3 lines or less; f.readline() just # starts returning empty strings which also fail the test. with open(filename, 'r') as f: for i in range(3): f.readline() if f.readline().strip() == 'CAR': return True else: return False
[docs]def is_vasp_raman(filename): """Determine if file is raman-sc/vasp_raman.py data by checking header""" with open(filename, 'r') as f: line = f.readline() return line.strip() == '# mode freq(cm-1) alpha beta2 activity'
[docs]def is_csv(filename): """Determine whether file is CSV by checking extension""" return filename.split('.')[-1] == 'csv'
[docs]def is_xml(filename): """Determine whether file is XML by checking extension""" if filename.split('.')[-1] == 'gz': return filename.split('.')[-2] == 'xml' else: return filename.split('.')[-1] == 'xml'
[docs]def is_complete_dos(pdos): """Determine whether the object is a pymatgen CompleteDos object""" densities_fn = getattr(pdos, "get_densities", None) return callable(densities_fn)
[docs]def write_txt(x_values, y_values, filename="galore_output.txt", header=None): """Write output to a simple space-delimited file Args: x_values (iterable): Values to print in first column y_value (iterable): Values to print in second column filename (str): Path to output file, including extension. If None, write to standard output instead. header (str): Additional line to prepend to file. If None, no header is used. """ rows = zip(x_values, y_values) _write_txt_rows(rows, filename=filename, header=header)
def _write_txt_rows(rows, filename=None, header=None): """Write rows of data to space-separated text output Args: rows (iterable): Rows to write. Rows should be a list of values. filename (str or None): Filename for text output. If None, write to standard output instead. header (iterable or None): Optionally add another row to the top of the file. Useful if rows is a generator you don't want to mess with. """ def _format_line(row): return ' '.join(('{0:10.6e}'.format(x) for x in row)) + '\n' lines = map(_format_line, rows) if filename is not None: with open(filename, 'w') as f: if header is not None: f.write(header + '\n') f.writelines(lines) else: if header is not None: print(header) for line in lines: print(line, end='') def _write_csv_rows(rows, filename=None, header=None): """Write rows of data to output in CSV format Args: rows (iterable): Rows to write. Rows should be a list of values. filename (str or None): Filename for CSV output. If None, write to standard output instead. header (iterable or None): Optionally add another row to the top of the file. Useful if rows is a generator you don't want to mess with. """ def _write_csv(rows, f, header): writer = csv.writer(f, lineterminator=os.linesep) if header is not None: writer.writerow(header) writer.writerows(rows) if filename is None: _write_csv(rows, sys.stdout, header=header) else: with open(filename, 'w') as f: _write_csv(rows, f, header=header)
[docs]def write_csv(x_values, y_values, filename="galore_output.csv", header=None): """Write output to a simple space-delimited file Args: x_values (iterable): Values to print in first column y_value (iterable): Values to print in second column filename (str): Path to output file, including extension. If None, write to standard output instead. header (iterable): Additional line to prepend to file. If None, no header is used. """ rows = zip(x_values, y_values) _write_csv_rows(rows, filename=filename, header=header)
[docs]def write_pdos(pdos_data, filename=None, filetype="txt", flipx=False): """Write PDOS or XPS data to CSV file Args: pdos_data (dict): Data for pdos plot in format:: {'el1': {'energy': values, 's': values, 'p': values ...}, 'el2': {'energy': values, 's': values, ...}, ...} where DOS values are 1D numpy arrays. For deterministic output, use ordered dictionaries! filename (str or None): Filename for output. If None, write to stdout filetype (str): Format for output; "csv" or "txt. flipx (bool): Negate the x-axis (i.e. energy) values to make binding energies """ header = ['energy'] cols = [list(pdos_data.values())[0]['energy']] if flipx: cols[0] = -cols[0] for el, orbitals in pdos_data.items(): for orbital, values in orbitals.items(): if orbital.lower() != 'energy': header += ['_'.join((el, orbital))] cols.append(values) data = np.array(cols).T total = data[:, 1:].sum(axis=1) data = np.insert(data, 1, total, axis=1) header.insert(1, 'total') if filetype == 'csv': _write_csv_rows(data, filename=filename, header=header) elif filetype == 'txt': header = ' ' + ' '.join(('{0:12s}'.format(x) for x in header)) _write_txt_rows(data, filename=filename, header=header) else: raise ValueError('filetype "{0}" not recognised. Use "txt" or "csv".')
[docs]def read_csv(filename): """Read a txt file containing frequencies and intensities If input file contains three columns, the first column is ignored. (It is presumed to be a vibrational mode index.) Args: filename (str): Path to data file Returns: n x 2 Numpy array of frequencies and intensities """ return read_txt(filename, delimiter=',')
[docs]def read_txt(filename, delimiter=None): """Read a txt file containing frequencies and intensities If input file contains three columns, the first column is ignored. (It is presumed to be a vibrational mode index.) Args: filename (str): Path to data file Returns: n x 2 Numpy array of frequencies and intensities """ xy_data = np.genfromtxt(filename, comments='#', delimiter=delimiter) columns = np.shape(xy_data)[1] if columns == 2: return xy_data elif columns == 3: return xy_data[:, 1:] elif columns < 2: raise Exception("Not sure how to interpret {0}: " "not enough columns.".format(filename)) else: raise Exception("Not sure how to interpret {0}: " "too many columns.".format(filename))
[docs]def read_pdos_txt(filename, abs_values=True): """Read a text file containing projected density-of-states (PDOS) data The first row should be a header identifying the orbitals, e.g. "# Energy s p d f". The following rows contain the corresponding energy and DOS values. Spin channels indicated by (up) or (down) suffixes will be combined. Args: filename (str): Path to file for import abs_values (bool, optional): Convert intensity values to absolute numbers. This is primarily for compatibility with spin-polarised .dat files from Sumo. Set to False if negative values in spectrum are resonable. Returns: data (np.ndarray): Numpy structured array with named columns corresponding to input data format. """ data = np.genfromtxt(filename, names=True) if abs_values: for col in data.dtype.names[1:]: data[col] = np.abs(data[col]) # Get a list of orbitals that have 'up' and 'down' variants spin_pairs = [] for col in data.dtype.names: if re.match('.+up', col): orbital = re.match('(.+)up', col).groups()[0] if orbital + 'down' in data.dtype.names: spin_pairs.append(orbital) if len(spin_pairs) == 0: return data else: # Sum up/down channel pairs into their respective up channels for orbital in spin_pairs: data[orbital + 'up'] += data[orbital + 'down'] # Rename spin-up channels column_names = list(data.dtype.names) for orbital in spin_pairs: column_names[column_names.index(orbital + 'up')] = orbital data.dtype.names = tuple(column_names) # Exclude spin-down channels from returned data spin_down_orbs = [orb + 'down' for orb in spin_pairs] return data[[col for col in data.dtype.names if col not in spin_down_orbs]]
[docs]def read_doscar(filename="DOSCAR"): """Read an x, y series of frequencies and DOS from a VASP DOSCAR file Args: filename (str): Path to DOSCAR file Returns: data (2-tuple): Tuple containing x values and y values as lists """ with open(filename, 'r') as f: # Scroll to line 6 which contains NEDOS for i in range(5): f.readline() nedos = int(f.readline().split()[2]) # Get number of fields and infer number of spin channels first_dos_line = f.readline().split() spin_channels = (len(first_dos_line) - 1) / 2 if spin_channels == 1: def _tdos_from_line(line): return (float(line[0]), float(line[1])) elif spin_channels == 2: def _tdos_from_line(line): line = [float(x) for x in line] return (line[0], line[1] + line[2]) else: raise Exception("Too many columns in DOSCAR") dos_pairs = ( [_tdos_from_line(first_dos_line)] + [_tdos_from_line(f.readline().split()) for i in range(nedos - 1)]) return np.array(dos_pairs)
[docs]def read_vasprun(filename='vasprun.xml'): """Read a VASP vasprun.xml file to obtain the density of states Pymatgen must be present on the system to use this method Args: filename (str): Path to vasprun.xml file Returns: data (pymatgen.electronic_structure.dos.Dos): A pymatgen Dos object """ try: from pymatgen.io.vasp.outputs import Vasprun except ImportError as e: e.msg = "pymatgen package neccessary to load vasprun files" raise vr = Vasprun(filename) band = vr.get_band_structure() dos = vr.complete_dos if band.is_metal(): zero_point = vr.efermi else: zero_point = band.get_vbm()['energy'] # Shift the energies so that the vbm is at 0 eV, also taking into account # any gaussian broadening dos.energies -= zero_point if vr.parameters['ISMEAR'] == 0 or vr.parameters['ISMEAR'] == -1: dos.energies -= vr.parameters['SIGMA'] return dos
[docs]def read_gpaw_totaldos(filename, npts=50001, width=1e-3, ref='vbm'): """Read total DOS from GPAW with minimal broadening This requires GPAW to be installed and on your PYTHONPATH! Args: filename (str): Path to GPAW calculation file. This should be a .gpw file generated with ``calc.write('myfilename.gpw')``. npts (int): Number of DOS samples width (float): Gaussian broadening parameter applied by GPAW. Default is minimal so that broadening is dominated by choices in Galore. Beware that there is a strong interaction between this parameter and npts; with a small npts and small width, many energy levels will be missed from the DOS! ref (str): Reference energy for DOS. 'vbm' or 'efermi' are accepted for the valence-band maximum or the Fermi energy, respectively. VBM is determined from calculation eigenvalues and not DOS values. If set to None, raw values are used. Returns: data (np.ndarray): 2D array of energy and DOS values """ from gpaw import GPAW calc = GPAW(filename) if ref is None: ref_energy = 0 elif ref.lower() == 'vbm': ref_energy, _ = calc.get_homo_lumo() elif ref.lower() == 'efermi': ref_energy = calc.get_fermi_level() energies, dos = calc.get_dos(npts=npts, width=width) return np.array(list(zip(energies - ref_energy, dos)))
[docs]def read_gpaw_pdos(filename, npts=50001, width=1e-3, ref='vbm'): """Read orbital-projected DOS from GPAW with minimal broadening. This requires GPAW to be installed and on your PYTHONPATH! Args: filename (str): Path to GPAW calculation file. This should be a .gpw file generated with ``calc.write('myfilename.gpw')``. npts (int): Number of DOS samples width (float): Gaussian broadening parameter applied by GPAW. Default is minimal so that broadening is dominated by choices in Galore. Beware that there is a strong interaction between this parameter and npts; with a small npts and small width, many energy levels will be missed from the DOS! ref (str): Reference energy for DOS. 'vbm' or 'efermi' are accepted for the valence-band maximum or the Fermi energy, respectively. VBM is determined from calculation eigenvalues and not DOS values. If set to None, raw values are used. Returns: pdos_data (OrderedDict): PDOS data formatted as nestled OrderedDict of: {element: {'energy': energies, 's': densities, 'p' ... } """ from gpaw import GPAW calc = GPAW(filename) if ref is None: ref_energy = 0 elif ref.lower() == 'vbm': ref_energy, _ = calc.get_homo_lumo() elif ref.lower() == 'efermi': ref_energy = calc.get_fermi_level() # Set up the structure of elements and orbitals. # Repeated elements will leave a single entry in the dict proto_orbitals = OrderedDict((('energy', np.zeros(npts)), ('s', np.zeros(npts)), ('p', np.zeros(npts)), ('d', np.zeros(npts)), ('f', np.zeros(npts)))) pdos_data = OrderedDict([(atom.symbol, proto_orbitals.copy()) for atom in calc.atoms]) # Read orbital DOS, adding to collected PDOS for that element/orbital for atom in calc.atoms: for orbital in 'spdf': energies, dos = calc.get_orbital_ldos(atom.index, angular=orbital, npts=npts, width=width) pdos_data[atom.symbol][orbital] += dos pdos_data[atom.symbol]['energy'] = energies - ref_energy # Set any zero arrays to None so they can be easily skipped over # This should get rid of unused orbitals; if GPAW put some density in those # orbitals then we should keep that evidence rather than discard it. for element, orbitals in pdos_data.items(): for orbital, dos in orbitals.items(): if orbital != 'energy' and max(dos) == 0.: pdos_data[element][orbital] = None return pdos_data
[docs]def read_vasprun_totaldos(filename='vasprun.xml'): """Read an x, y series of energies and DOS from a VASP vasprun.xml file Pymatgen must be present on the system to use this method Args: filename (str): Path to vasprun.xml file Returns: data (np.ndarray): 2D array of energy and DOS values """ dos = read_vasprun(filename) from pymatgen.electronic_structure.core import Spin # sum spin up and spin down channels densities = dos.densities[Spin.up] if len(dos.densities) > 1: densities += dos.densities[Spin.down] return np.array(list(zip(dos.energies, densities)))
[docs]def read_vasprun_pdos(filename='vasprun.xml'): """Read a vasprun.xml containing projected density-of-states (PDOS) data Pymatgen must be present on the system to use this method Args: filename (str or CompleteDos): Path to vasprun.xml file or pymatgen CompleteDos object. Returns: pdos_data (np.ndarray): PDOS data formatted as nestled OrderedDict of: {element: {'energy': energies, 's': densities, 'p' ... } """ if isinstance(filename, str): dos = read_vasprun(filename) else: # filename is actually a pre-loaded CompleteDos dos = filename from pymatgen.electronic_structure.core import Spin, OrbitalType pdos_data = OrderedDict() for element in dos.structure.symbol_set: pdos_data[element] = OrderedDict([('energy', dos.energies)]) pdos = dos.get_element_spd_dos(element) for orb in sorted([orb.value for orb in pdos.keys()]): # this way we can ensure the orbitals remain in the correct order orbital = OrbitalType(orb) # sum spin up and spin down channels densities = pdos[orbital].densities[Spin.up] if len(dos.densities) > 1: densities += pdos[orbital].densities[Spin.down] pdos_data[element][orbital.name] = densities return pdos_data
[docs]def read_vasp_raman(filename="vasp_raman.dat"): """Read output file from Vasp raman simulation Args: filename (str): Path to formatted data file generated by https://github.com/raman-sc/VASP - Raman intensities are computed by following vibrational modes and calculating polarisability. The generated output file is named *vasp_raman.dat* but can be renamed if desired. The format is five space-separated columns, headed by ``# mode freq(cm-1) alpha beta2 activity``. Returns: 2-D np.array: Only the columns corresponding to frequency and activity are retained. """ data = np.genfromtxt(filename) return data[:, [1, -1]]