Source code for galore.formats

###############################################################################
#                                                                             #
# GALORE: Gaussian and Lorentzian broadening for simulated spectra            #
#                                                                             #
# Developed by Adam J. Jackson (2016) at University College London            #
#                                                                             #
###############################################################################
#                                                                             #
# This file is part of Galore. Galore is free software: you can redistribute  #
# it and/or modify it under the terms of the GNU General Public License as    #
# published by the Free Software Foundation, either version 3 of the License, #
# or (at your option) any later version.  This program is distributed in the  #
# hope that it will be useful, but WITHOUT ANY WARRANTY; without even the     #
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.    #
# See the GNU General Public License for more details.  You should have       #
# received a copy of the GNU General Public License along with this program.  #
# If not, see <http://www.gnu.org/licenses/>.                                 #
#                                                                             #
###############################################################################
import os
import csv
import re
import sys
from collections import OrderedDict
import numpy as np


[docs]def is_gpw(filename):
    """Determine whether file is GPAW calculation by checking extension"""
    return filename.split('.')[-1] == 'gpw'


[docs]def is_doscar(filename):
    """Determine whether file is a DOSCAR by checking fourth line"""

    # This doesn't break when the file is 3 lines or less; f.readline() just
    # starts returning empty strings which also fail the test.
    with open(filename, 'r') as f:
        for i in range(3):
            f.readline()
        if f.readline().strip() == 'CAR':
            return True
        else:
            return False


[docs]def is_vasp_raman(filename):
    """Determine if file is raman-sc/vasp_raman.py data by checking header"""
    with open(filename, 'r') as f:
        line = f.readline()

    return line.strip() == '# mode    freq(cm-1)    alpha    beta2    activity'


[docs]def is_csv(filename):
    """Determine whether file is CSV by checking extension"""
    return filename.split('.')[-1] == 'csv'


[docs]def is_xml(filename):
    """Determine whether file is XML by checking extension"""
    if filename.split('.')[-1] == 'gz':
        return filename.split('.')[-2] == 'xml'
    else:
        return filename.split('.')[-1] == 'xml'


[docs]def is_complete_dos(pdos):
    """Determine whether the object is a pymatgen CompleteDos object"""
    densities_fn = getattr(pdos, "get_densities", None)
    return callable(densities_fn)


[docs]def write_txt(x_values, y_values, filename="galore_output.txt", header=None):
    """Write output to a simple space-delimited file

    Args:
        x_values (iterable): Values to print in first column
        y_value (iterable): Values to print in second column
        filename (str): Path to output file, including extension. If None,
            write to standard output instead.
        header (str): Additional line to prepend to file. If None, no header
            is used.

        """

    rows = zip(x_values, y_values)
    _write_txt_rows(rows, filename=filename, header=header)


def _write_txt_rows(rows, filename=None, header=None):
    """Write rows of data to space-separated text output

    Args:
        rows (iterable): Rows to write. Rows should be a list of values.
        filename (str or None): Filename for text output. If None, write to
            standard output instead.
        header (iterable or None): Optionally add another row to the top of the
            file. Useful if rows is a generator you don't want to mess with.

    """

    def _format_line(row):
        return ' '.join(('{0:10.6e}'.format(x) for x in row)) + '\n'

    lines = map(_format_line, rows)

    if filename is not None:
        with open(filename, 'w') as f:
            if header is not None:
                f.write(header + '\n')
            f.writelines(lines)

    else:
        if header is not None:
            print(header)
        for line in lines:
            print(line, end='')


def _write_csv_rows(rows, filename=None, header=None):
    """Write rows of data to output in CSV format

    Args:
        rows (iterable): Rows to write. Rows should be a list of values.
        filename (str or None): Filename for CSV output. If None, write to
            standard output instead.
        header (iterable or None): Optionally add another row to the top of the
            file. Useful if rows is a generator you don't want to mess with.

    """

    def _write_csv(rows, f, header):
        writer = csv.writer(f, lineterminator=os.linesep)
        if header is not None:
            writer.writerow(header)
        writer.writerows(rows)

    if filename is None:
        _write_csv(rows, sys.stdout, header=header)

    else:
        with open(filename, 'w') as f:
            _write_csv(rows, f, header=header)


[docs]def write_csv(x_values, y_values, filename="galore_output.csv", header=None):
    """Write output to a simple space-delimited file

    Args:
        x_values (iterable): Values to print in first column
        y_value (iterable): Values to print in second column
        filename (str): Path to output file, including extension. If None,
            write to standard output instead.
        header (iterable): Additional line to prepend to file. If None,
            no header is used.

        """

    rows = zip(x_values, y_values)
    _write_csv_rows(rows, filename=filename, header=header)


[docs]def write_pdos(pdos_data, filename=None, filetype="txt", flipx=False):
    """Write PDOS or XPS data to CSV file

    Args:
        pdos_data (dict): Data for pdos plot in format::

                {'el1': {'energy': values, 's': values, 'p': values ...},
                 'el2': {'energy': values, 's': values, ...}, ...}

             where DOS values are 1D numpy arrays. For deterministic output,
             use ordered dictionaries!
        filename (str or None): Filename for output. If None, write to stdout
        filetype (str): Format for output; "csv" or "txt.
        flipx (bool): Negate the x-axis (i.e. energy) values to make binding
            energies
    """

    header = ['energy']
    cols = [list(pdos_data.values())[0]['energy']]
    if flipx:
        cols[0] = -cols[0]

    for el, orbitals in pdos_data.items():
        for orbital, values in orbitals.items():
            if orbital.lower() != 'energy':
                header += ['_'.join((el, orbital))]
                cols.append(values)

    data = np.array(cols).T

    total = data[:, 1:].sum(axis=1)
    data = np.insert(data, 1, total, axis=1)
    header.insert(1, 'total')

    if filetype == 'csv':
        _write_csv_rows(data, filename=filename, header=header)
    elif filetype == 'txt':
        header = ' ' + ' '.join(('{0:12s}'.format(x) for x in header))
        _write_txt_rows(data, filename=filename, header=header)
    else:
        raise ValueError('filetype "{0}" not recognised. Use "txt" or "csv".')


[docs]def read_csv(filename):
    """Read a txt file containing frequencies and intensities

    If input file contains three columns, the first column is ignored. (It is
    presumed to be a vibrational mode index.)

    Args:
        filename (str): Path to data file

    Returns:
        n x 2 Numpy array of frequencies and intensities

    """
    return read_txt(filename, delimiter=',')


[docs]def read_txt(filename, delimiter=None):
    """Read a txt file containing frequencies and intensities

    If input file contains three columns, the first column is ignored. (It is
    presumed to be a vibrational mode index.)

    Args:
        filename (str): Path to data file

    Returns:
        n x 2 Numpy array of frequencies and intensities

    """
    xy_data = np.genfromtxt(filename, comments='#', delimiter=delimiter)

    columns = np.shape(xy_data)[1]

    if columns == 2:
        return xy_data
    elif columns == 3:
        return xy_data[:, 1:]
    elif columns < 2:
        raise Exception("Not sure how to interpret {0}: "
                        "not enough columns.".format(filename))
    else:
        raise Exception("Not sure how to interpret {0}: "
                        "too many columns.".format(filename))


[docs]def read_pdos_txt(filename, abs_values=True):
    """Read a text file containing projected density-of-states (PDOS) data

    The first row should be a header identifying the orbitals, e.g.
    "# Energy s p d f". The following rows contain the corresponding energy and
    DOS values. Spin channels indicated by (up) or (down) suffixes will be
    combined.

    Args:
        filename (str): Path to file for import
        abs_values (bool, optional):
            Convert intensity values to absolute numbers. This is primarily for
            compatibility with spin-polarised .dat files from Sumo. Set to
            False if negative values in spectrum are resonable.

    Returns:
        data (np.ndarray): Numpy structured array with named columns
            corresponding to input data format.
    """
    data = np.genfromtxt(filename, names=True)

    if abs_values:
        for col in data.dtype.names[1:]:
            data[col] = np.abs(data[col])

    # Get a list of orbitals that have 'up' and 'down' variants
    spin_pairs = []
    for col in data.dtype.names:
        if re.match('.+up', col):
            orbital = re.match('(.+)up', col).groups()[0]
            if orbital + 'down' in data.dtype.names:
                spin_pairs.append(orbital)

    if len(spin_pairs) == 0:
        return data

    else:
        # Sum up/down channel pairs into their respective up channels
        for orbital in spin_pairs:
            data[orbital + 'up'] += data[orbital + 'down']

        # Rename spin-up channels
        column_names = list(data.dtype.names)
        for orbital in spin_pairs:
            column_names[column_names.index(orbital + 'up')] = orbital
        data.dtype.names = tuple(column_names)

        # Exclude spin-down channels from returned data
        spin_down_orbs = [orb + 'down' for orb in spin_pairs]
        return data[[col for col in data.dtype.names
                     if col not in spin_down_orbs]]


[docs]def read_doscar(filename="DOSCAR"):
    """Read an x, y series of frequencies and DOS from a VASP DOSCAR file

    Args:
        filename (str): Path to DOSCAR file

    Returns:
        data (2-tuple): Tuple containing x values and y values as lists
    """
    with open(filename, 'r') as f:
        # Scroll to line 6 which contains NEDOS
        for i in range(5):
            f.readline()
        nedos = int(f.readline().split()[2])

        # Get number of fields and infer number of spin channels
        first_dos_line = f.readline().split()
        spin_channels = (len(first_dos_line) - 1) / 2
        if spin_channels == 1:

            def _tdos_from_line(line):
                return (float(line[0]), float(line[1]))
        elif spin_channels == 2:

            def _tdos_from_line(line):
                line = [float(x) for x in line]
                return (line[0], line[1] + line[2])
        else:
            raise Exception("Too many columns in DOSCAR")

        dos_pairs = (
            [_tdos_from_line(first_dos_line)] +
            [_tdos_from_line(f.readline().split()) for i in range(nedos - 1)])

        return np.array(dos_pairs)


[docs]def read_vasprun(filename='vasprun.xml'):
    """Read a VASP vasprun.xml file to obtain the density of states

    Pymatgen must be present on the system to use this method

    Args:
        filename (str): Path to vasprun.xml file

    Returns:
        data (pymatgen.electronic_structure.dos.Dos): A pymatgen Dos object
    """
    try:
        from pymatgen.io.vasp.outputs import Vasprun
    except ImportError as e:
        e.msg = "pymatgen package neccessary to load vasprun files"
        raise

    vr = Vasprun(filename)
    band = vr.get_band_structure()
    dos = vr.complete_dos

    if band.is_metal():
        zero_point = vr.efermi
    else:
        zero_point = band.get_vbm()['energy']

    # Shift the energies so that the vbm is at 0 eV, also taking into account
    # any gaussian broadening
    dos.energies -= zero_point
    if vr.parameters['ISMEAR'] == 0 or vr.parameters['ISMEAR'] == -1:
        dos.energies -= vr.parameters['SIGMA']

    return dos


[docs]def read_gpaw_totaldos(filename, npts=50001, width=1e-3, ref='vbm'):
    """Read total DOS from GPAW with minimal broadening

    This requires GPAW to be installed and on your PYTHONPATH!

    Args:
        filename (str): Path to GPAW calculation file. This should be a .gpw
            file generated with ``calc.write('myfilename.gpw')``.
        npts (int): Number of DOS samples
        width (float): Gaussian broadening parameter applied by GPAW. Default
            is minimal so that broadening is dominated by choices in Galore.
            Beware that there is a strong interaction between this parameter
            and npts; with a small npts and small width, many energy levels
            will be missed from the DOS!
        ref (str): Reference energy for DOS. 'vbm' or 'efermi' are accepted for
            the valence-band maximum or the Fermi energy, respectively. VBM is
            determined from calculation eigenvalues and not DOS values. If set
            to None, raw values are used.
    Returns:
        data (np.ndarray): 2D array of energy and DOS values
    """
    from gpaw import GPAW
    calc = GPAW(filename)

    if ref is None:
        ref_energy = 0
    elif ref.lower() == 'vbm':
        ref_energy, _ = calc.get_homo_lumo()
    elif ref.lower() == 'efermi':
        ref_energy = calc.get_fermi_level()

    energies, dos = calc.get_dos(npts=npts, width=width)
    return np.array(list(zip(energies - ref_energy, dos)))


[docs]def read_gpaw_pdos(filename, npts=50001, width=1e-3, ref='vbm'):
    """Read orbital-projected DOS from GPAW with minimal broadening.

    This requires GPAW to be installed and on your PYTHONPATH!

    Args:
        filename (str): Path to GPAW calculation file. This should be a .gpw
            file generated with ``calc.write('myfilename.gpw')``.
        npts (int): Number of DOS samples
        width (float): Gaussian broadening parameter applied by GPAW. Default
            is minimal so that broadening is dominated by choices in Galore.
            Beware that there is a strong interaction between this parameter
            and npts; with a small npts and small width, many energy levels
            will be missed from the DOS!
        ref (str): Reference energy for DOS. 'vbm' or 'efermi' are accepted for
            the valence-band maximum or the Fermi energy, respectively. VBM is
            determined from calculation eigenvalues and not DOS values. If set
            to None, raw values are used.

    Returns:
        pdos_data (OrderedDict): PDOS data formatted as nestled OrderedDict of:
            {element: {'energy': energies, 's': densities, 'p' ... }
    """
    from gpaw import GPAW
    calc = GPAW(filename)

    if ref is None:
        ref_energy = 0
    elif ref.lower() == 'vbm':
        ref_energy, _ = calc.get_homo_lumo()
    elif ref.lower() == 'efermi':
        ref_energy = calc.get_fermi_level()

    # Set up the structure of elements and orbitals.
    # Repeated elements will leave a single entry in the dict
    proto_orbitals = OrderedDict((('energy', np.zeros(npts)),
                                  ('s', np.zeros(npts)),
                                  ('p', np.zeros(npts)),
                                  ('d', np.zeros(npts)),
                                  ('f', np.zeros(npts))))
    pdos_data = OrderedDict([(atom.symbol, proto_orbitals.copy())
                             for atom in calc.atoms])

    # Read orbital DOS, adding to collected PDOS for that element/orbital
    for atom in calc.atoms:
        for orbital in 'spdf':
            energies, dos = calc.get_orbital_ldos(atom.index, angular=orbital,
                                                  npts=npts, width=width)
            pdos_data[atom.symbol][orbital] += dos

        pdos_data[atom.symbol]['energy'] = energies - ref_energy

    # Set any zero arrays to None so they can be easily skipped over
    # This should get rid of unused orbitals; if GPAW put some density in those
    # orbitals then we should keep that evidence rather than discard it.
    for element, orbitals in pdos_data.items():
        for orbital, dos in orbitals.items():
            if orbital != 'energy' and max(dos) == 0.:
                pdos_data[element][orbital] = None

    return pdos_data


[docs]def read_vasprun_totaldos(filename='vasprun.xml'):
    """Read an x, y series of energies and DOS from a VASP vasprun.xml file

    Pymatgen must be present on the system to use this method

    Args:
        filename (str): Path to vasprun.xml file

    Returns:
        data (np.ndarray): 2D array of energy and DOS values
    """
    dos = read_vasprun(filename)

    from pymatgen.electronic_structure.core import Spin

    # sum spin up and spin down channels
    densities = dos.densities[Spin.up]
    if len(dos.densities) > 1:
        densities += dos.densities[Spin.down]

    return np.array(list(zip(dos.energies, densities)))


[docs]def read_vasprun_pdos(filename='vasprun.xml'):
    """Read a vasprun.xml containing projected density-of-states (PDOS) data

    Pymatgen must be present on the system to use this method

    Args:
        filename (str or CompleteDos):
            Path to vasprun.xml file or pymatgen CompleteDos object.

    Returns:
        pdos_data (np.ndarray): PDOS data formatted as nestled OrderedDict of:
            {element: {'energy': energies, 's': densities, 'p' ... }
    """
    if isinstance(filename, str):
        dos = read_vasprun(filename)
    else:
        # filename is actually a pre-loaded CompleteDos
        dos = filename

    from pymatgen.electronic_structure.core import Spin, OrbitalType

    pdos_data = OrderedDict()
    for element in dos.structure.symbol_set:
        pdos_data[element] = OrderedDict([('energy', dos.energies)])
        pdos = dos.get_element_spd_dos(element)
        for orb in sorted([orb.value for orb in pdos.keys()]):
            # this way we can ensure the orbitals remain in the correct order
            orbital = OrbitalType(orb)
            # sum spin up and spin down channels
            densities = pdos[orbital].densities[Spin.up]
            if len(dos.densities) > 1:
                densities += pdos[orbital].densities[Spin.down]
            pdos_data[element][orbital.name] = densities

    return pdos_data


[docs]def read_vasp_raman(filename="vasp_raman.dat"):
    """Read output file from Vasp raman simulation

    Args:
        filename (str): Path to formatted data file generated by
            https://github.com/raman-sc/VASP - Raman intensities are computed
            by following vibrational modes and calculating polarisability. The
            generated output file is named *vasp_raman.dat* but can be renamed
            if desired. The format is five space-separated columns, headed by
            ``# mode    freq(cm-1)    alpha    beta2    activity``.

    Returns:
        2-D np.array:
            Only the columns corresponding to frequency and activity are
            retained.
    """

    data = np.genfromtxt(filename)
    return data[:, [1, -1]]