Source code for icet.tools.structure_mapping

from typing import List, Tuple

import numpy as np

from ase import Atoms
from ase.build import make_supercell
from ase.geometry import get_distances
from scipy.optimize import linear_sum_assignment
from icet.input_output.logging_tools import logger
import scipy.linalg


def calculate_strain_tensor(A: np.ndarray,
                            B: np.ndarray) -> np.ndarray:
    """Calculates the strain tensor for mapping a cell A onto cell B. The
    strain calculated is the Biot strain tensor and is rotationally invariant.

    Parameters
    ----------
    A
        reference cell (row-major format)
    B
        target cell (row-major format)

    Returns
    -------
    strain_tensor
        Biot strain tensor (symmetric matrix)
    """
    assert A.shape == (3, 3)
    assert B.shape == (3, 3)

    # Calculate deformation gradient (F) in column-major format
    F = np.linalg.solve(A, B).T

    # Calculate right stretch tensor (U)
    _, U = scipy.linalg.polar(F)

    # return Biot strain tensor
    return U - np.eye(3)


[docs]def map_structure_to_reference(structure: Atoms, reference: Atoms, inert_species: List[str] = None, tol_positions: float = 1e-4, suppress_warnings: bool = False, assume_no_cell_relaxation: bool = False) \ -> Tuple[Atoms, dict]: """Maps a structure onto a reference structure. This is often desirable when, for example, a structure has been relaxed using DFT, and one wants to use it as a training structure in a cluster expansion. The function returns a tuple comprising the ideal supercell most closely matching the input structure and a dictionary with supplementary information concerning the mapping. The latter includes for example the largest deviation of any position in the input structure from its reference position (`drmax`), the average deviation of the positions in the input structure from the reference positions (`dravg`), and the strain tensor for the input structure relative to the reference structure (`strain_tensor`). The returned Atoms object provides further supplemental information via custom per-atom arrays including the atomic displacements (`Displacement`, `Displacement_Magnitude`),the distances to the three closest sites (`Minimum_Distances`), as well as a mapping between the indices of the returned Atoms object and those of the input structure ('IndexMapping'). Note ---- The input and reference structures should adhere to the same crystallographic setting. In other words, they should not be rotated or translated with respect to each other. Parameters ---------- structure input structure, typically a relaxed structure reference reference structure, which can but need not be the primitive structure inert_species list of chemical symbols (e.g., ``['Au', 'Pd']``) that are never substituted for a vacancy; the number of inert sites is used to rescale the volume of the input structure to match the reference structure. tol_positions tolerance factor applied when scanning for overlapping positions in Angstrom (forwarded to :func:`ase.build.make_supercell`) suppress_warnings if True, print no warnings of large strain or relaxation distances assume_no_cell_relaxation if False volume and cell metric of the input structure are rescaled to match the reference structure; this can be unnecessary (and counterproductive) for some structures, e.g., with many vacancies **Note**: When setting this parameter to False the reference cell metric must be obtainable via an *integer* transformation matrix from the reference cell metric. In other words the input structure should not involve relaxations of the volume or the cell metric. Example ------- The following code snippet illustrates the general usage. It first creates a primitive FCC cell, which is latter used as reference structure. To emulate a relaxed structure obtained from, e.g., a density functional theory calculation, the code then creates a 4x4x4 conventional FCC supercell, which is populated with two different atom types, has distorted cell vectors, and random displacements to the atoms. Finally, the present function is used to map the structure back the ideal lattice:: >>> from ase.build import bulk >>> reference = bulk('Au', a=4.09) >>> structure = bulk('Au', cubic=True, a=4.09).repeat(4) >>> structure.set_chemical_symbols(10 * ['Ag'] + (len(structure) - 10) * ['Au']) >>> structure.set_cell(structure.cell * 1.02, scale_atoms=True) >>> structure.rattle(0.1) >>> mapped_structure, info = map_structure_to_reference(structure, reference) """ # Obtain supercell of reference structure that is compatible # with relaxed structure reference_supercell, P = _get_reference_supercell( structure, reference, inert_species=inert_species, tol_positions=tol_positions, assume_no_cell_relaxation=assume_no_cell_relaxation) # Calculate strain tensor strain_tensor = calculate_strain_tensor(reference_supercell.cell, structure.cell) # Symmetric matrix has real eigenvalues eigenvalues, _ = np.linalg.eigh(strain_tensor) volumetric_strain = sum(eigenvalues) # Rescale the relaxed atoms object structure_scaled = structure.copy() structure_scaled.set_cell(reference_supercell.cell, scale_atoms=True) # Match positions mapped_structure, drmax, dravg, warning = _match_positions(structure_scaled, reference_supercell) if warning: warnings = [warning] else: warnings = [] if not suppress_warnings: s = 'Consider excluding this structure when training a cluster expansion.' if assume_no_cell_relaxation: trigger_levels = {'volumetric_strain': 1e-3, 'eigenvalue_diff': 1e-3} else: trigger_levels = {'volumetric_strain': 0.25, 'eigenvalue_diff': 0.1} if abs(volumetric_strain) > trigger_levels['volumetric_strain']: warnings.append('high_volumetric_strain') logger.warning('High volumetric strain ({:.2f} %). {}'.format( 100 * volumetric_strain, s)) if max(eigenvalues) - min(eigenvalues) > trigger_levels['eigenvalue_diff']: warnings.append('high_anisotropic_strain') logger.warning('High anisotropic strain (the difference between ' 'largest and smallest eigenvalues of strain tensor is ' '{:.5f}). {}'.format(max(eigenvalues) - min(eigenvalues), s)) if drmax > 1.0: warnings.append('large_maximum_relaxation_distance') logger.warning('Large maximum relaxation distance ' '({:.5f} Angstrom). {}'.format(drmax, s)) if dravg > 0.5: warnings.append('large_average_relaxation_distance') logger.warning('Large average relaxation distance ' '({:.5f} Angstrom). {}'.format(dravg, s)) # Populate dictionary with supplementary information info = {'drmax': drmax, 'dravg': dravg, 'strain_tensor': strain_tensor, 'volumetric_strain': volumetric_strain, 'strain_tensor_eigenvalues': eigenvalues, 'warnings': warnings, 'transformation_matrix': P} return mapped_structure, info
def _get_reference_supercell(structure: Atoms, reference: Atoms, inert_species: List[str] = None, tol_positions: float = 1e-5, assume_no_cell_relaxation: bool = False) -> Tuple[Atoms, np.ndarray]: """ Returns a tuple comprising a supercell of the reference structure that is compatible with the cell metric of the input structure as well as the transformation matrix that maps from the primitive to the supercell structure. Parameters ---------- structure input structure, typically a relaxed structure reference reference structure, which can but need not be the primitive structure inert_species list of chemical symbols (e.g., ``['Au', 'Pd']``) that are never substituted for a vacancy; the number of inert sites is used to rescale of the input structure to match the reference structure. tol_positions tolerance factor applied when scanning for overlapping positions in Angstrom (forwarded to :func:`ase.build.make_supercell`) assume_no_cell_relaxation if False volume and cell metric of the input structure are rescaled to match the reference structure; this can be unnecessary (and counterproductive) for some structures, e.g., with many vacancies **Note**: When setting this parameter to False, the input structure must be obtainable via an *integer* transformation matrix from the reference cell metric. In other words the input structure should not involve relaxations of the volume or the cell metric. Raises ------ ValueError if the boundary conditions of the reference and the input structure do not match ValueError if the transformation matrix deviates too strongly from the nearest integer matrix ValueError if assume_no_cell_relaxation is True but the input structure is not obtainable via an integer transformation from the reference cell metric """ if not np.all(reference.pbc == structure.pbc): msg = 'The boundary conditions of reference and input structures do not match.' msg += '\n reference: ' + str(reference.pbc) msg += '\n input: ' + str(structure.pbc) raise ValueError(msg) # Step 1: # rescale cell metric of relaxed cell to match volume per atom of reference cell if not assume_no_cell_relaxation: if inert_species is None: n_ref = len(reference) n_rlx = len(structure) else: n_ref = sum([reference.get_chemical_symbols().count(s) for s in inert_species]) n_rlx = sum([structure.get_chemical_symbols().count(s) for s in inert_species]) vol_scale = reference.get_volume() / n_ref vol_scale /= structure.get_volume() / n_rlx scaled_structure_cell = structure.cell * vol_scale ** (1 / 3) else: scaled_structure_cell = structure.cell # Step 2: # get transformation matrix P = np.dot(scaled_structure_cell, np.linalg.inv(reference.cell)) # reduce the (real) transformation matrix to the nearest integer one P = np.around(P) # Step 3: # generate supercell of reference structure reference_supercell = make_supercell(reference, P, tol=tol_positions) return reference_supercell, P def _match_positions(structure: Atoms, reference: Atoms) -> Tuple[Atoms, float, float]: """Matches the atoms in the input `structure` to the sites in the `reference` structure. The function returns tuple the first element of which is a copy of the `reference` structure, in which the chemical species are assigned to comply with the `structure`. The second and third element of the tuple represent the maximum and average distance between relaxed and reference sites. Parameters ---------- structure structure with relaxed positions reference structure with idealized positions Raises ------ ValueError if the cell metrics of the two input structures do not match ValueError if the periodic boundary conditions of the two input structures do not match ValueError if the input structure contains more atoms than the reference structure """ if not np.all(reference.pbc == structure.pbc): msg = 'The boundary conditions of reference and relaxed structures do not match.' msg += '\n reference: ' + str(reference.pbc) msg += '\n relaxed: ' + str(structure.pbc) raise ValueError(msg) if len(structure) > len(reference): msg = 'The relaxed structure contains more atoms than the reference structure.' msg += '\n reference: ' + str(len(reference)) msg += '\n relaxed: ' + str(len(structure)) raise ValueError(msg) if not np.all(np.isclose(reference.cell, structure.cell)): msg = 'The cell metrics of reference and relaxed structures do not match.' msg += '\n reference: ' + str(reference.cell) msg += '\n relaxed: ' + str(structure.cell) raise ValueError(msg) # compute distances between reference and relaxed positions _, dists = get_distances(reference.positions, structure.positions, cell=reference.cell, pbc=reference.pbc) # pad matrix with zeros to obtain square matrix n, m = dists.shape cost_matrix = np.pad(dists, ((0, 0), (0, abs(n - m))), mode='constant', constant_values=0) # find optimal mapping using Kuhn-Munkres (Hungarian) algorithm row_ind, col_ind = linear_sum_assignment(cost_matrix) # compile new configuration with supplementary information mapped = reference.copy() displacement_magnitudes = [] displacements = [] minimum_distances = [] n_dist_max = min(len(mapped), 3) warning = None for i, j in zip(row_ind, col_ind): atom = mapped[i] if j >= len(structure): # vacant site in reference structure atom.symbol = 'X' displacement_magnitudes.append(None) displacements.append(3 * [None]) minimum_distances.append(n_dist_max * [None]) else: atom.symbol = structure[j].symbol dvecs, drs = get_distances([structure[j].position], [reference[i].position], cell=reference.cell, pbc=reference.pbc) displacement_magnitudes.append(drs[0][0]) displacements.append(dvecs[0][0]) # distances to the next three available sites minimum_distances.append(sorted(dists[:, j])[:n_dist_max]) if len(structure) > 1: if drs[0][0] > min(dists[:, j]) + 1e-6: logger.warning('An atom was mapped to a site that was further ' 'away than the closest site (that site was already ' 'occupied by another atom).') warning = 'possible_ambiguity_in_mapping' elif minimum_distances[-1][0] > 0.9 * minimum_distances[-1][1]: logger.warning('An atom was approximately equally far from its ' 'two closest sites.') warning = 'possible_ambiguity_in_mapping' displacement_magnitudes = np.array(displacement_magnitudes, dtype=np.float64) mapped.new_array('Displacement', displacements, float, shape=(3, )) mapped.new_array('Displacement_Magnitude', displacement_magnitudes, float) mapped.new_array('Minimum_Distances', minimum_distances, float, shape=(n_dist_max,)) mapped.new_array('IndexMapping', col_ind, int) drmax = np.nanmax(displacement_magnitudes) dravg = np.nanmean(displacement_magnitudes) return mapped, drmax, dravg, warning