Source code for galore

###############################################################################
#                                                                             #
# GALORE: Gaussian and Lorentzian broadening for simulated spectra            #
#                                                                             #
# Developed by Scanlon Materials Theory Group at University College London    #
# (c) 2017                                                                    #
#                                                                             #
###############################################################################
#                                                                             #
# This file is part of Galore. Galore is free software: you can redistribute  #
# it and/or modify it under the terms of the GNU General Public License as    #
# published by the Free Software Foundation, either version 3 of the License, #
# or (at your option) any later version.  This program is distributed in the  #
# hope that it will be useful, but WITHOUT ANY WARRANTY; without even the     #
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.    #
# See the GNU General Public License for more details.  You should have       #
# received a copy of the GNU General Public License along with this program.  #
# If not, see <http://www.gnu.org/licenses/>.                                 #
#                                                                             #
###############################################################################

from __future__ import print_function

from collections import OrderedDict
from collections.abc import Sequence
import logging
from math import sqrt, log
import os.path

import numpy as np
from scipy.interpolate import interp1d

import galore.formats
from galore.cross_sections import get_cross_sections, cross_sections_info


[docs]def auto_limits(data_1d, padding=0.05):
    """Return limiting values outside data range

    Args:
        data_1d (iterable): Data to obtain range from
        padding (float): Scale factor for padding relative to data range

    Returns:
        (2-tuple) (xmin, xmax)

    """
    xmin, xmax = min(data_1d), max(data_1d)
    auto_xmax = xmax + padding * (xmax - xmin)
    auto_xmin = xmin - padding * (xmax - xmin)
    return auto_xmin, auto_xmax


[docs]def process_1d_data(input=['vasprun.xml'],
                    gaussian=None, lorentzian=None,
                    sampling=1e-2,
                    xmin=None, xmax=None,
                    spikes=False,
                    **kwargs):
    """Read 1D data series from files, process for output

    Args:
        input (str or 1-list):
            Input data file. Pass as either a string or a list containing one
            string
        **kwargs:
            See main command reference

    Returns:
        2-tuple (np.ndarray, np.ndarray):
            Resampled x-values and corresponding broadened data as 1D numpy
            arrays

    """

    if 'flipx' in kwargs and kwargs['flipx']:
        raise Exception("x-flip not currently implemented in 1D mode.")

    if type(input) == str:
        pass
    elif len(input) > 1:
        raise ValueError("Simple DOS only uses one input file, "
                         "not list: {0}".format(input))
    else:
        input = input[0]

    if not os.path.exists(input):
        raise Exception(
            "Input file {0} does not exist!".format(input))
    if galore.formats.is_xml(input):
        xy_data = galore.formats.read_vasprun_totaldos(input)
    elif galore.formats.is_gpw(input):
        xy_data = galore.formats.read_gpaw_totaldos(input)
    elif galore.formats.is_doscar(input):
        xy_data = galore.formats.read_doscar(input)
    elif galore.formats.is_vasp_raman(input):
        xy_data = galore.formats.read_vasp_raman(input)
    elif galore.formats.is_csv(input):
        xy_data = galore.formats.read_csv(input)
    else:
        xy_data = galore.formats.read_txt(input)

    # Add 5% to data range if not specified
    auto_xmin, auto_xmax = auto_limits(xy_data[:, 0], padding=0.05)
    if xmax is None:
        xmax = auto_xmax
    if xmin is None:
        xmin = auto_xmin

    d = sampling
    x_values = np.arange(xmin, xmax, d)
    data_1d = galore.xy_to_1d(xy_data, x_values, spikes=spikes)

    broadened_data = data_1d.copy()
    if lorentzian:
        broadened_data = galore.broaden(
            broadened_data, d=d, dist='lorentzian', width=lorentzian)

    if gaussian:
        broadened_data = galore.broaden(
            broadened_data, d=d, dist='gaussian', width=gaussian)

    return (x_values, broadened_data)


[docs]def process_pdos(input=['vasprun.xml'],
                 gaussian=None, lorentzian=None,
                 weighting=None, sampling=1e-2,
                 xmin=None, xmax=None, flipx=False, **kwargs):
    """Read PDOS from files, process for output

    Args:
        input (list or str):
            Files for processing. Vasp output or space-separated files with
                XXX_EL_YYY.EXT filename pattern where EL is the element label.
                We recommend SYSTEM_EL_dos.dat. Alternatively, a
                `pymatgen.electronic_structure.dos.CompleteDos` can be
                provided. Spin channels indicated by an (up) or (down) suffix
                in file header will be combined for each orbital type.
        **kwargs:
            See main command reference

    Returns:
        dict:
            Weighted and resampled orbital data in format::

                {'el1': {'energy': values, 's': values, 'p': values ...},
                 'el2': {'energy': values, 's': values, ...}, ...}

    """
    if isinstance(input, str) or not isinstance(input, Sequence):
        input = [input]

    # Read files into dict, check for consistency
    energy_label = None
    pdos_data = OrderedDict()
    for pdos_file in input:
        if (galore.formats.is_complete_dos(pdos_file)
                or galore.formats.is_xml(pdos_file)):
            pdos_data = galore.formats.read_vasprun_pdos(pdos_file)
            kwargs['units'] = 'eV'
            break

        elif galore.formats.is_gpw(pdos_file):
            pdos_data = galore.formats.read_gpaw_pdos(pdos_file)
            kwargs['units'] = 'eV'
            break

        if not os.path.exists(pdos_file):
            raise Exception("Input file {0} does not "
                            "exist!".format(input))

        basename = os.path.basename(pdos_file)
        try:
            element = basename.split("_")[-2]
        except IndexError:
            raise Exception("Couldn't guess element name from filename. "
                            "Please format filename as XXX_EL_YYY.EXT"
                            "Where EL is the element label, and XXX, YYY "
                            "and EXT are labels of your choice. We recommend"
                            "SYSTEM_EL_dos.dat")

        data = galore.formats.read_pdos_txt(pdos_file)

        if energy_label is None:
            energy_label = data.dtype.names[0]
        else:
            try:
                assert data.dtype.names[0] == energy_label
            except AssertionError as error:
                error.args += ("Energy labels are not consistent "
                               "between input files",)
                raise

        orbital_labels = data.dtype.names[1:]
        pdos_data[element] = OrderedDict([('energy', data[energy_label])])
        pdos_data[element].update(OrderedDict((orbital, data[orbital])
                                  for orbital in orbital_labels))

    # Work out sampling details; 5% pad added to data if no limits specified
    # In x-flip mode, the user specifies these as binding energies so values
    # are reversed while treating DOS data.
    d = sampling
    limits = (auto_limits(data['energy'], padding=0.05)
              for (element, data) in pdos_data.items())
    xmins, xmaxes = zip(*limits)

    if xmax is None:
        xmax = max(xmaxes)

    if xmin is None:
        xmin = min(xmins)

    if flipx:
        xmin, xmax = -xmax, -xmin

    x_values = np.arange(xmin, xmax, d)

    # Resample data into new dictionary
    pdos_plotting_data = OrderedDict()
    for element, el_data in pdos_data.items():
        pdos_plotting_data[element] = OrderedDict([('energy', x_values)])
        for orbital, orb_data in el_data.items():
            if orbital == 'energy':
                continue
            elif orb_data is None:
                continue

            xy_data = np.column_stack([el_data['energy'], orb_data])

            pdos_resampled = galore.xy_to_1d(xy_data, x_values)
            broadened_data = pdos_resampled.copy()

            if lorentzian:
                broadened_data = galore.broaden(broadened_data, d=d,
                                                dist='lorentzian',
                                                width=lorentzian)

            if gaussian:
                broadened_data = galore.broaden(broadened_data, d=d,
                                                dist='gaussian',
                                                width=gaussian)

            pdos_plotting_data[element][orbital] = broadened_data

    if weighting:
        cross_sections = galore.get_cross_sections(weighting,
                                                   elements=pdos_data.keys())
        pdos_plotting_data = galore.apply_orbital_weights(
            pdos_plotting_data, cross_sections)

    return pdos_plotting_data


[docs]def xy_to_1d(xy, x_values, spikes=False):
    """Convert a set of x,y coordinates to 1D array

    Data is resampled to a given sequence of regularly-spaced x-values. By
    default linear interpolation is used between neighbouring points, which is
    suitable for resampling distribution data.

    Where the data consists of a set of discrete energy levels, it should be
    resampled to a series of "spikes". y-values are placed on the nearest
    x-value by subtracting d/2 and rounding up. d is determined by examining
    the first two elements of x_values.

    Args:
        xy: (ndarray) 2D numpy array of x, y values
        x_values: (iterable) An evenly-spaced x-value mesh
    Returns:
        (np.array): re-sampled y values corresponding to x_values
    """

    x_values = np.array(x_values)
    n_x_values = x_values.size
    d = x_values[1] - x_values[0]

    # Structured arrays are allowed, in which case first field is x,
    # second is y. A bit of hackery is needed to slice these interchangeably.
    if xy.dtype.names is None:
        x_field, y_field = (Ellipsis, 0), (Ellipsis, 1)
    else:
        x_field, y_field = xy.dtype.names

    if spikes:
        spikes = np.zeros(n_x_values)
        spike_locations = x_values.searchsorted(xy[x_field] - (0.5 * d))

        for location, value in zip(spike_locations, xy[y_field]):
            if location == 0 or location == n_x_values:
                pass
            else:
                spikes[location] += value

        return spikes

    else:
        y_func = interp1d(xy[x_field], xy[y_field],
                          assume_sorted=False,
                          bounds_error=False, fill_value=0)
        return y_func(x_values)


[docs]def delta(f1, f2, w=1):
    """Compare two frequencies, return 1 if close"""
    if abs(f1 - f2) <= 0.5 * w:
        return 1
    else:
        return 0


[docs]def lorentzian(f, f0=0, fwhm=1):
    """Lorentzian function with height 1 centered on f0.

    Args:
        f (np.array): 1D array of x-values (e.g. frequencies)
        f0 (float): Origin of function
        fwhm (float): full-width half-maximum (FWHM);
            i.e. the width of the function at half its maximum value.

    """
    return 0.5 * fwhm / (np.pi * ((f - f0)**2 + (0.5 * fwhm)**2))


[docs]def gaussian(f, f0=0, fwhm=1):
    """Gaussian function with height 1 centered on f0

       f (np.array): 1D array of x-values (e.g. frequencies)
       f0 (float): Origin of function
       fwhm (float): full-width half-maximum (FWHM); i.e. the width of the
       function at half its maximum value.

    """
    c = fwhm / (2 * sqrt(2 * log(2)))
    return np.exp(-np.power(f - f0, 2) / (2 * c**2))


[docs]def broaden(data, dist='lorentz', width=2, pad=False, d=1):
    """Given a 1d data set, use convolution to apply a broadening function

    Args:
        data (np.array): 1D array of data points to broaden
        dist (str): Type of distribution used for broadening. Currently only
            "Lorentz" is supported.
        width (float): Width parameter for broadening function. Determines the
            full-width at half-maximum (FWHM) of the broadening function.
        pad (float): Distance sampled on each side of broadening function.
        d (float): x-axis distance associated with each sample in 1D data

    """

    if not pad:
        pad = width * 20

    if dist.lower() in ('lorentz', 'lorentzian'):
        fwhm = width
        broadening = lorentzian(np.arange(-pad, pad, d), f0=0, fwhm=fwhm)
    elif dist.lower() in ('gauss', 'gaussian'):
        fwhm = width
        broadening = gaussian(np.arange(-pad, pad, d), f0=0, fwhm=fwhm)
    else:
        raise Exception('Broadening distribution '
                        ' "{0}" not known.'.format(dist))

    pad_points = int(pad / d)
    broadened_data = np.convolve(broadening, data)
    broadened_data = broadened_data[pad_points:len(data) + pad_points]

    return broadened_data


[docs]def apply_orbital_weights(pdos_data, cross_sections):
    """Weight orbital intensities by cross-section for photoemission simulation

    Args:
        pdos_data (dict): DOS data in format::

                {'el1': {'energy': values, 's': values, 'p': values ...},
                 'el2': {'energy': values, 's': values, ...}, ...}

             where DOS values are 1D numpy arrays. Orbital labels must match
             cross_sections data. It is recommended to use
             collections.OrderedDict instead of regular dictionaries, to ensure
             consistent output.

             In addition, the fields "citation", "link" and "energy" are
             recognised and logged as "INFO".

        cross_sections (dict): Weightings in format::

                {'el1': {'1s': x1, '2s': x2, '2p': x3 ...},
                 'el2': {'3s': y1, '3p': y2 ...}, ...}

             The labels should correspond to the headers in the input data. It
             is fine not so specify the level (e.g. use 's', 'p', etc.) as is
             done in the sample data; however, this means that all levels are
             being treated equally and hence probably the core levels will be
             weighted incorrectly. It is possible to set the cross-section of
             undesired orbitals (e.g. projection onto d-orbital for early
             elements) to None; in this case the orbital will be dropped from
             the returned data set.

    Returns:
        weighted_pdos_data (dict): Weighted data in same format as input
    """

    if type(cross_sections) != dict:
        raise TypeError('Cross-section data should be a dictionary. Try using '
                        'galore.get_cross_sections for a suitable data set.')

    logging.info("Applying cross-section weighting values to PDOS:")

    cross_sections_info(cross_sections, logging=logging)

    logging.info("  Orbital cross-section weights per electron:")

    weighted_pdos_data = OrderedDict()
    for el, orbitals in pdos_data.items():
        if 'warning' in cross_sections[el]:
            logging.warning("  {0}: {1}".format(
                el, cross_sections[el]['warning']))
        weighted_orbitals = OrderedDict()
        for orbital, data in orbitals.items():
            if orbital == 'energy':
                weighted_orbitals.update({'energy': data})
            else:
                try:
                    cs = cross_sections[el][orbital]
                except KeyError:
                    logging.warning("Could not find cross-section data for " +
                                    "element {0}, ".format(el) +
                                    "orbital {0}. ".format(orbital) +
                                    "Skipping this orbital.")
                    cs = None

                if cs is None:
                    pass
                else:
                    logging.info("   {0} {1}: {2:.3e}".format(el, orbital, cs))
                    weighted_orbitals.update({orbital: data * cs})

        weighted_pdos_data.update({el: weighted_orbitals})

    return weighted_pdos_data