gitlab | materialsmodeling.org
Source code for icet.tools.structure_mapping

from typing import List, Tuple

import numpy as np

from ase import Atoms
from ase.build import make_supercell
from ase.geometry import get_distances
from scipy.optimize import linear_sum_assignment
from icet.input_output.logging_tools import logger
import scipy.linalg


def calculate_strain_tensor(A: np.ndarray,
                            B: np.ndarray) -> np.ndarray:
    """Calculates the strain tensor for mapping a cell A onto cell B. The
    strain calculated is the Biot strain tensor and is rotationally invariant.

    Parameters
    ----------
    A
        Reference cell (row-major format).
    B
        Target cell (row-major format).
    """
    assert A.shape == (3, 3)
    assert B.shape == (3, 3)

    # Calculate deformation gradient (F) in column-major format
    F = np.linalg.solve(A, B).T

    # Calculate right stretch tensor (U)
    _, U = scipy.linalg.polar(F)

    # return Biot strain tensor
    return U - np.eye(3)



[docs]
def map_structure_to_reference(structure: Atoms,
                               reference: Atoms,
                               inert_species: List[str] = None,
                               tol_positions: float = 1e-4,
                               suppress_warnings: bool = False,
                               assume_no_cell_relaxation: bool = False) \
        -> Tuple[Atoms, dict]:
    """Maps a structure onto a reference structure.
    This is often desirable when, for example, a structure has been
    relaxed using DFT, and one wants to use it as a training structure
    in a cluster expansion.

    The function returns a tuple comprising the ideal supercell most
    closely matching the input structure and a dictionary with
    supplementary information concerning the mapping. The latter
    includes for example the largest deviation of any position in the
    input structure from its reference position (``drmax``), the average
    deviation of the positions in the input structure from the
    reference positions (``dravg``), and the strain tensor for the input
    structure relative to the reference structure (``strain_tensor``).

    The returned Atoms object provides further supplemental information via
    custom per-atom arrays including the atomic displacements
    (``Displacement``, ``Displacement_Magnitude``),the distances to the
    three closest sites (``Minimum_Distances``), as well as a mapping
    between the indices of the returned Atoms object and those of the input
    structure (``IndexMapping``).

    Note
    ----
    The input and reference structures should adhere to the same crystallographic setting.
    In other words, they should not be rotated or translated with respect to each other.

    Parameters
    ----------
    structure
        Input structure, typically a relaxed structure.
    reference
        Reference structure, which can but need not be the primitive structure.
    inert_species
        List of chemical symbols (e.g., ``['Au', 'Pd']``) that are never
        substituted for a vacancy. The number of inert sites is used to rescale
        the volume of the input structure to match the reference structure.
    tol_positions
        Tolerance factor applied when scanning for overlapping positions in
        Ångstrom (forwarded to :func:`ase.build.make_supercell`).
    suppress_warnings
        if ``True`` print no warnings of large strain or relaxation distances.
    assume_no_cell_relaxation
        If ``False`` volume and cell metric of the input structure are rescaled
        to match the reference structure. This can be unnecessary (and
        counterproductive) for some structures, e.g., with many vacancies.

        Note
        ----
        When setting this parameter to ``False`` the reference cell metric
        must be obtainable via an *integer* transformation matrix from the
        reference cell metric. In other words the input structure should not
        involve relaxations of the volume or the cell metric.

    Example
    -------
    The following code snippet illustrates the general usage. It first creates
    a primitive FCC cell, which is latter used as reference structure. To
    emulate a relaxed structure obtained from, e.g., a density functional
    theory calculation, the code then creates a 4x4x4 conventional FCC
    supercell, which is populated with two different atom types, has distorted
    cell vectors, and random displacements to the atoms. Finally, the present
    function is used to map the structure back the ideal lattice::

        >>> from ase.build import bulk
        >>> reference = bulk('Au', a=4.09)
        >>> structure = bulk('Au', cubic=True, a=4.09).repeat(4)
        >>> structure.symbols = 10 * ['Ag'] + (len(structure) - 10) * ['Au']
        >>> structure.set_cell(structure.cell * 1.02, scale_atoms=True)
        >>> structure.rattle(0.1)
        >>> mapped_structure, info = map_structure_to_reference(structure, reference)
    """

    # Obtain supercell of reference structure that is compatible
    # with relaxed structure
    reference_supercell, P = _get_reference_supercell(
        structure, reference, inert_species=inert_species,
        tol_positions=tol_positions, assume_no_cell_relaxation=assume_no_cell_relaxation)

    # Calculate strain tensor
    strain_tensor = calculate_strain_tensor(reference_supercell.cell, structure.cell)

    # Symmetric matrix has real eigenvalues
    eigenvalues, _ = np.linalg.eigh(strain_tensor)
    volumetric_strain = sum(eigenvalues)

    # Rescale the relaxed atoms object
    structure_scaled = structure.copy()
    structure_scaled.set_cell(reference_supercell.cell, scale_atoms=True)

    # Match positions
    mapped_structure, drmax, dravg, warning = _match_positions(structure_scaled,
                                                               reference_supercell)

    if warning:
        warnings = [warning]
    else:
        warnings = []

    if not suppress_warnings:
        s = 'Consider excluding this structure when training a cluster expansion.'
        if assume_no_cell_relaxation:
            trigger_levels = {'volumetric_strain': 1e-3,
                              'eigenvalue_diff': 1e-3}
        else:
            trigger_levels = {'volumetric_strain': 0.25,
                              'eigenvalue_diff': 0.1}
        if abs(volumetric_strain) > trigger_levels['volumetric_strain']:
            warnings.append('high_volumetric_strain')
            logger.warning('High volumetric strain ({:.2f} %). {}'.format(
                100 * volumetric_strain, s))
        if max(eigenvalues) - min(eigenvalues) > trigger_levels['eigenvalue_diff']:
            warnings.append('high_anisotropic_strain')
            logger.warning('High anisotropic strain (the difference between '
                           'largest and smallest eigenvalues of strain tensor is '
                           '{:.5f}). {}'.format(max(eigenvalues) - min(eigenvalues), s))
        if drmax > 1.0:
            warnings.append('large_maximum_relaxation_distance')
            logger.warning('Large maximum relaxation distance '
                           '({:.5f} Angstrom). {}'.format(drmax, s))
        if dravg > 0.5:
            warnings.append('large_average_relaxation_distance')
            logger.warning('Large average relaxation distance '
                           '({:.5f} Angstrom). {}'.format(dravg, s))

    # Populate dictionary with supplementary information
    info = {'drmax': drmax,
            'dravg': dravg,
            'strain_tensor': strain_tensor,
            'volumetric_strain': volumetric_strain,
            'strain_tensor_eigenvalues': eigenvalues,
            'warnings': warnings,
            'transformation_matrix': P}

    return mapped_structure, info



def _get_reference_supercell(structure: Atoms,
                             reference: Atoms,
                             inert_species: List[str] = None,
                             tol_positions: float = 1e-5,
                             assume_no_cell_relaxation: bool = False) -> Tuple[Atoms, np.ndarray]:
    """
    Returns a tuple comprising a supercell of the reference structure that is compatible with the
    cell metric of the input structure as well as the transformation matrix that maps from the
    primitive to the supercell structure.

    Parameters
    ----------
    structure
        Input structure, typically a relaxed structure.
    reference
        Reference structure, which can but need not be the primitive structure.
    inert_species
        List of chemical symbols (e.g., ``['Au', 'Pd']``) that are never
        substituted for a vacancy. The number of inert sites is used to rescale
        of the input structure to match the reference structure.
    tol_positions
        Tolerance factor applied when scanning for overlapping positions in
        Ångstrom (forwarded to :func:`ase.build.make_supercell`).
    assume_no_cell_relaxation
        If ``False`` volume and cell metric of the input structure are rescaled
        to match the reference structure. This can be unnecessary (and
        counterproductive) for some structures, e.g., with many vacancies.

        Note
        ----
        When setting this parameter to ``False``, the input structure
        must be obtainable via an *integer* transformation matrix from the
        reference cell metric. In other words the input structure should not
        involve relaxations of the volume or the cell metric.

    Raises
    ------
    ValueError
        If the boundary conditions of the reference and the input structure
        do not match.
    ValueError
        If the transformation matrix deviates too strongly from the nearest
        integer matrix.
    ValueError
        If :attr:`assume_no_cell_relaxation` is ``True`` but the input structure is
        not obtainable via an integer transformation from the reference
        cell metric.
    """

    if not np.all(reference.pbc == structure.pbc):
        msg = 'The boundary conditions of reference and input structures do not match.'
        msg += '\n  reference: ' + str(reference.pbc)
        msg += '\n  input: ' + str(structure.pbc)
        raise ValueError(msg)

    # Step 1:
    # rescale cell metric of relaxed cell to match volume per atom of reference cell
    if not assume_no_cell_relaxation:
        if inert_species is None:
            n_ref = len(reference)
            n_rlx = len(structure)
        else:
            n_ref = sum([reference.get_chemical_symbols().count(s) for s in inert_species])
            n_rlx = sum([structure.get_chemical_symbols().count(s) for s in inert_species])
        vol_scale = reference.get_volume() / n_ref
        vol_scale /= structure.get_volume() / n_rlx
        scaled_structure_cell = structure.cell * vol_scale ** (1 / 3)
    else:
        scaled_structure_cell = structure.cell

    # Step 2:
    # get transformation matrix
    P = np.dot(scaled_structure_cell, np.linalg.inv(reference.cell))

    # reduce the (real) transformation matrix to the nearest integer one
    P = np.around(P)

    # Step 3:
    # generate supercell of reference structure
    reference_supercell = make_supercell(reference, P, tol=tol_positions)

    return reference_supercell, P


def _match_positions(structure: Atoms, reference: Atoms) -> Tuple[Atoms, float, float]:
    """Matches the atoms in the input :attr:`structure` to the sites in the
    :attr:`reference` structure. The function returns tuple the first element of which
    is a copy of the :attr:`reference` structure, in which the chemical species are
    assigned to comply with the :attr:`structure`. The second and third element
    of the tuple represent the maximum and average distance between relaxed and
    reference sites.

    Parameters
    ----------
    structure
        Structure with relaxed positions.
    reference
        Structure with idealized positions.

    Raises
    ------
    ValueError
        If the cell metrics of the two input structures do not match.
    ValueError
        If the periodic boundary conditions of the two input structures do not match.
    ValueError
        If the input structure contains more atoms than the reference structure.
    """

    if not np.all(reference.pbc == structure.pbc):
        msg = 'The boundary conditions of reference and relaxed structures do not match.'
        msg += '\n  reference: ' + str(reference.pbc)
        msg += '\n  relaxed: ' + str(structure.pbc)
        raise ValueError(msg)

    if len(structure) > len(reference):
        msg = 'The relaxed structure contains more atoms than the reference structure.'
        msg += '\n  reference: ' + str(len(reference))
        msg += '\n  relaxed: ' + str(len(structure))
        raise ValueError(msg)

    if not np.all(np.isclose(reference.cell, structure.cell)):
        msg = 'The cell metrics of reference and relaxed structures do not match.'
        msg += '\n  reference: ' + str(reference.cell)
        msg += '\n  relaxed: ' + str(structure.cell)
        raise ValueError(msg)

    # compute distances between reference and relaxed positions
    _, dists = get_distances(reference.positions, structure.positions,
                             cell=reference.cell, pbc=reference.pbc)
    # pad matrix with zeros to obtain square matrix
    n, m = dists.shape
    cost_matrix = np.pad(dists, ((0, 0), (0, abs(n - m))),
                         mode='constant', constant_values=0)
    # find optimal mapping using Kuhn-Munkres (Hungarian) algorithm
    row_ind, col_ind = linear_sum_assignment(cost_matrix)

    # compile new configuration with supplementary information
    mapped = reference.copy()
    displacement_magnitudes = []
    displacements = []
    minimum_distances = []
    n_dist_max = min(len(mapped), 3)
    warning = None
    for i, j in zip(row_ind, col_ind):
        atom = mapped[i]
        if j >= len(structure):
            # vacant site in reference structure
            atom.symbol = 'X'
            displacement_magnitudes.append(None)
            displacements.append(3 * [None])
            minimum_distances.append(n_dist_max * [None])
        else:
            atom.symbol = structure[j].symbol
            dvecs, drs = get_distances([structure[j].position],
                                       [reference[i].position],
                                       cell=reference.cell, pbc=reference.pbc)
            displacement_magnitudes.append(drs[0][0])
            displacements.append(dvecs[0][0])
            # distances to the next three available sites
            minimum_distances.append(sorted(dists[:, j])[:n_dist_max])
            if len(structure) > 1:
                if drs[0][0] > min(dists[:, j]) + 1e-6:
                    logger.warning('An atom was mapped to a site that was further '
                                   'away than the closest site (that site was already '
                                   'occupied by another atom).')
                    warning = 'possible_ambiguity_in_mapping'
                elif minimum_distances[-1][0] > 0.9 * minimum_distances[-1][1]:
                    logger.warning('An atom was approximately equally far from its '
                                   'two closest sites.')
                    warning = 'possible_ambiguity_in_mapping'

    displacement_magnitudes = np.array(displacement_magnitudes, dtype=np.float64)
    mapped.new_array('Displacement', displacements, float, shape=(3, ))
    mapped.new_array('Displacement_Magnitude', displacement_magnitudes, float)
    mapped.new_array('Minimum_Distances', minimum_distances,
                     float, shape=(n_dist_max,))
    mapped.new_array('IndexMapping', col_ind, int)

    drmax = np.nanmax(displacement_magnitudes)
    dravg = np.nanmean(displacement_magnitudes)

    return mapped, drmax, dravg, warning