Coverage for icet/tools/structure_mapping.py: 100%
117 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-12-26 04:12 +0000
« prev ^ index » next coverage.py v7.5.0, created at 2024-12-26 04:12 +0000
1from typing import List, Tuple
3import numpy as np
5from ase import Atoms
6from ase.build import make_supercell
7from ase.geometry import get_distances
8from scipy.optimize import linear_sum_assignment
9from icet.input_output.logging_tools import logger
10import scipy.linalg
13def calculate_strain_tensor(A: np.ndarray,
14 B: np.ndarray) -> np.ndarray:
15 """Calculates the strain tensor for mapping a cell A onto cell B. The
16 strain calculated is the Biot strain tensor and is rotationally invariant.
18 Parameters
19 ----------
20 A
21 Reference cell (row-major format).
22 B
23 Target cell (row-major format).
24 """
25 assert A.shape == (3, 3)
26 assert B.shape == (3, 3)
28 # Calculate deformation gradient (F) in column-major format
29 F = np.linalg.solve(A, B).T
31 # Calculate right stretch tensor (U)
32 _, U = scipy.linalg.polar(F)
34 # return Biot strain tensor
35 return U - np.eye(3)
38def map_structure_to_reference(structure: Atoms,
39 reference: Atoms,
40 inert_species: List[str] = None,
41 tol_positions: float = 1e-4,
42 suppress_warnings: bool = False,
43 assume_no_cell_relaxation: bool = False) \
44 -> Tuple[Atoms, dict]:
45 """Maps a structure onto a reference structure.
46 This is often desirable when, for example, a structure has been
47 relaxed using DFT, and one wants to use it as a training structure
48 in a cluster expansion.
50 The function returns a tuple comprising the ideal supercell most
51 closely matching the input structure and a dictionary with
52 supplementary information concerning the mapping. The latter
53 includes for example the largest deviation of any position in the
54 input structure from its reference position (``drmax``), the average
55 deviation of the positions in the input structure from the
56 reference positions (``dravg``), and the strain tensor for the input
57 structure relative to the reference structure (``strain_tensor``).
59 The returned Atoms object provides further supplemental information via
60 custom per-atom arrays including the atomic displacements
61 (``Displacement``, ``Displacement_Magnitude``),the distances to the
62 three closest sites (``Minimum_Distances``), as well as a mapping
63 between the indices of the returned Atoms object and those of the input
64 structure (``IndexMapping``).
66 Note
67 ----
68 The input and reference structures should adhere to the same crystallographic setting.
69 In other words, they should not be rotated or translated with respect to each other.
71 Parameters
72 ----------
73 structure
74 Input structure, typically a relaxed structure.
75 reference
76 Reference structure, which can but need not be the primitive structure.
77 inert_species
78 List of chemical symbols (e.g., ``['Au', 'Pd']``) that are never
79 substituted for a vacancy. The number of inert sites is used to rescale
80 the volume of the input structure to match the reference structure.
81 tol_positions
82 Tolerance factor applied when scanning for overlapping positions in
83 Ångstrom (forwarded to :func:`ase.build.make_supercell`).
84 suppress_warnings
85 if ``True`` print no warnings of large strain or relaxation distances.
86 assume_no_cell_relaxation
87 If ``False`` volume and cell metric of the input structure are rescaled
88 to match the reference structure. This can be unnecessary (and
89 counterproductive) for some structures, e.g., with many vacancies.
91 Note
92 ----
93 When setting this parameter to ``False`` the reference cell metric
94 must be obtainable via an *integer* transformation matrix from the
95 reference cell metric. In other words the input structure should not
96 involve relaxations of the volume or the cell metric.
98 Example
99 -------
100 The following code snippet illustrates the general usage. It first creates
101 a primitive FCC cell, which is latter used as reference structure. To
102 emulate a relaxed structure obtained from, e.g., a density functional
103 theory calculation, the code then creates a 4x4x4 conventional FCC
104 supercell, which is populated with two different atom types, has distorted
105 cell vectors, and random displacements to the atoms. Finally, the present
106 function is used to map the structure back the ideal lattice::
108 >>> from ase.build import bulk
109 >>> reference = bulk('Au', a=4.09)
110 >>> structure = bulk('Au', cubic=True, a=4.09).repeat(4)
111 >>> structure.set_chemical_symbols(10 * ['Ag'] + (len(structure) - 10) * ['Au'])
112 >>> structure.set_cell(structure.cell * 1.02, scale_atoms=True)
113 >>> structure.rattle(0.1)
114 >>> mapped_structure, info = map_structure_to_reference(structure, reference)
115 """
117 # Obtain supercell of reference structure that is compatible
118 # with relaxed structure
119 reference_supercell, P = _get_reference_supercell(
120 structure, reference, inert_species=inert_species,
121 tol_positions=tol_positions, assume_no_cell_relaxation=assume_no_cell_relaxation)
123 # Calculate strain tensor
124 strain_tensor = calculate_strain_tensor(reference_supercell.cell, structure.cell)
126 # Symmetric matrix has real eigenvalues
127 eigenvalues, _ = np.linalg.eigh(strain_tensor)
128 volumetric_strain = sum(eigenvalues)
130 # Rescale the relaxed atoms object
131 structure_scaled = structure.copy()
132 structure_scaled.set_cell(reference_supercell.cell, scale_atoms=True)
134 # Match positions
135 mapped_structure, drmax, dravg, warning = _match_positions(structure_scaled,
136 reference_supercell)
138 if warning:
139 warnings = [warning]
140 else:
141 warnings = []
143 if not suppress_warnings:
144 s = 'Consider excluding this structure when training a cluster expansion.'
145 if assume_no_cell_relaxation:
146 trigger_levels = {'volumetric_strain': 1e-3,
147 'eigenvalue_diff': 1e-3}
148 else:
149 trigger_levels = {'volumetric_strain': 0.25,
150 'eigenvalue_diff': 0.1}
151 if abs(volumetric_strain) > trigger_levels['volumetric_strain']:
152 warnings.append('high_volumetric_strain')
153 logger.warning('High volumetric strain ({:.2f} %). {}'.format(
154 100 * volumetric_strain, s))
155 if max(eigenvalues) - min(eigenvalues) > trigger_levels['eigenvalue_diff']:
156 warnings.append('high_anisotropic_strain')
157 logger.warning('High anisotropic strain (the difference between '
158 'largest and smallest eigenvalues of strain tensor is '
159 '{:.5f}). {}'.format(max(eigenvalues) - min(eigenvalues), s))
160 if drmax > 1.0:
161 warnings.append('large_maximum_relaxation_distance')
162 logger.warning('Large maximum relaxation distance '
163 '({:.5f} Angstrom). {}'.format(drmax, s))
164 if dravg > 0.5:
165 warnings.append('large_average_relaxation_distance')
166 logger.warning('Large average relaxation distance '
167 '({:.5f} Angstrom). {}'.format(dravg, s))
169 # Populate dictionary with supplementary information
170 info = {'drmax': drmax,
171 'dravg': dravg,
172 'strain_tensor': strain_tensor,
173 'volumetric_strain': volumetric_strain,
174 'strain_tensor_eigenvalues': eigenvalues,
175 'warnings': warnings,
176 'transformation_matrix': P}
178 return mapped_structure, info
181def _get_reference_supercell(structure: Atoms,
182 reference: Atoms,
183 inert_species: List[str] = None,
184 tol_positions: float = 1e-5,
185 assume_no_cell_relaxation: bool = False) -> Tuple[Atoms, np.ndarray]:
186 """
187 Returns a tuple comprising a supercell of the reference structure that is compatible with the
188 cell metric of the input structure as well as the transformation matrix that maps from the
189 primitive to the supercell structure.
191 Parameters
192 ----------
193 structure
194 Input structure, typically a relaxed structure.
195 reference
196 Reference structure, which can but need not be the primitive structure.
197 inert_species
198 List of chemical symbols (e.g., ``['Au', 'Pd']``) that are never
199 substituted for a vacancy. The number of inert sites is used to rescale
200 of the input structure to match the reference structure.
201 tol_positions
202 Tolerance factor applied when scanning for overlapping positions in
203 Ångstrom (forwarded to :func:`ase.build.make_supercell`).
204 assume_no_cell_relaxation
205 If ``False`` volume and cell metric of the input structure are rescaled
206 to match the reference structure. This can be unnecessary (and
207 counterproductive) for some structures, e.g., with many vacancies.
209 Note
210 ----
211 When setting this parameter to ``False``, the input structure
212 must be obtainable via an *integer* transformation matrix from the
213 reference cell metric. In other words the input structure should not
214 involve relaxations of the volume or the cell metric.
216 Raises
217 ------
218 ValueError
219 If the boundary conditions of the reference and the input structure
220 do not match.
221 ValueError
222 If the transformation matrix deviates too strongly from the nearest
223 integer matrix.
224 ValueError
225 If :attr:`assume_no_cell_relaxation` is ``True`` but the input structure is
226 not obtainable via an integer transformation from the reference
227 cell metric.
228 """
230 if not np.all(reference.pbc == structure.pbc):
231 msg = 'The boundary conditions of reference and input structures do not match.'
232 msg += '\n reference: ' + str(reference.pbc)
233 msg += '\n input: ' + str(structure.pbc)
234 raise ValueError(msg)
236 # Step 1:
237 # rescale cell metric of relaxed cell to match volume per atom of reference cell
238 if not assume_no_cell_relaxation:
239 if inert_species is None:
240 n_ref = len(reference)
241 n_rlx = len(structure)
242 else:
243 n_ref = sum([reference.get_chemical_symbols().count(s) for s in inert_species])
244 n_rlx = sum([structure.get_chemical_symbols().count(s) for s in inert_species])
245 vol_scale = reference.get_volume() / n_ref
246 vol_scale /= structure.get_volume() / n_rlx
247 scaled_structure_cell = structure.cell * vol_scale ** (1 / 3)
248 else:
249 scaled_structure_cell = structure.cell
251 # Step 2:
252 # get transformation matrix
253 P = np.dot(scaled_structure_cell, np.linalg.inv(reference.cell))
255 # reduce the (real) transformation matrix to the nearest integer one
256 P = np.around(P)
258 # Step 3:
259 # generate supercell of reference structure
260 reference_supercell = make_supercell(reference, P, tol=tol_positions)
262 return reference_supercell, P
265def _match_positions(structure: Atoms, reference: Atoms) -> Tuple[Atoms, float, float]:
266 """Matches the atoms in the input :attr:`structure` to the sites in the
267 :attr:`reference` structure. The function returns tuple the first element of which
268 is a copy of the :attr:`reference` structure, in which the chemical species are
269 assigned to comply with the :attr:`structure`. The second and third element
270 of the tuple represent the maximum and average distance between relaxed and
271 reference sites.
273 Parameters
274 ----------
275 structure
276 Structure with relaxed positions.
277 reference
278 Structure with idealized positions.
280 Raises
281 ------
282 ValueError
283 If the cell metrics of the two input structures do not match.
284 ValueError
285 If the periodic boundary conditions of the two input structures do not match.
286 ValueError
287 If the input structure contains more atoms than the reference structure.
288 """
290 if not np.all(reference.pbc == structure.pbc):
291 msg = 'The boundary conditions of reference and relaxed structures do not match.'
292 msg += '\n reference: ' + str(reference.pbc)
293 msg += '\n relaxed: ' + str(structure.pbc)
294 raise ValueError(msg)
296 if len(structure) > len(reference):
297 msg = 'The relaxed structure contains more atoms than the reference structure.'
298 msg += '\n reference: ' + str(len(reference))
299 msg += '\n relaxed: ' + str(len(structure))
300 raise ValueError(msg)
302 if not np.all(np.isclose(reference.cell, structure.cell)):
303 msg = 'The cell metrics of reference and relaxed structures do not match.'
304 msg += '\n reference: ' + str(reference.cell)
305 msg += '\n relaxed: ' + str(structure.cell)
306 raise ValueError(msg)
308 # compute distances between reference and relaxed positions
309 _, dists = get_distances(reference.positions, structure.positions,
310 cell=reference.cell, pbc=reference.pbc)
311 # pad matrix with zeros to obtain square matrix
312 n, m = dists.shape
313 cost_matrix = np.pad(dists, ((0, 0), (0, abs(n - m))),
314 mode='constant', constant_values=0)
315 # find optimal mapping using Kuhn-Munkres (Hungarian) algorithm
316 row_ind, col_ind = linear_sum_assignment(cost_matrix)
318 # compile new configuration with supplementary information
319 mapped = reference.copy()
320 displacement_magnitudes = []
321 displacements = []
322 minimum_distances = []
323 n_dist_max = min(len(mapped), 3)
324 warning = None
325 for i, j in zip(row_ind, col_ind):
326 atom = mapped[i]
327 if j >= len(structure):
328 # vacant site in reference structure
329 atom.symbol = 'X'
330 displacement_magnitudes.append(None)
331 displacements.append(3 * [None])
332 minimum_distances.append(n_dist_max * [None])
333 else:
334 atom.symbol = structure[j].symbol
335 dvecs, drs = get_distances([structure[j].position],
336 [reference[i].position],
337 cell=reference.cell, pbc=reference.pbc)
338 displacement_magnitudes.append(drs[0][0])
339 displacements.append(dvecs[0][0])
340 # distances to the next three available sites
341 minimum_distances.append(sorted(dists[:, j])[:n_dist_max])
342 if len(structure) > 1:
343 if drs[0][0] > min(dists[:, j]) + 1e-6:
344 logger.warning('An atom was mapped to a site that was further '
345 'away than the closest site (that site was already '
346 'occupied by another atom).')
347 warning = 'possible_ambiguity_in_mapping'
348 elif minimum_distances[-1][0] > 0.9 * minimum_distances[-1][1]:
349 logger.warning('An atom was approximately equally far from its '
350 'two closest sites.')
351 warning = 'possible_ambiguity_in_mapping'
353 displacement_magnitudes = np.array(displacement_magnitudes, dtype=np.float64)
354 mapped.new_array('Displacement', displacements, float, shape=(3, ))
355 mapped.new_array('Displacement_Magnitude', displacement_magnitudes, float)
356 mapped.new_array('Minimum_Distances', minimum_distances,
357 float, shape=(n_dist_max,))
358 mapped.new_array('IndexMapping', col_ind, int)
360 drmax = np.nanmax(displacement_magnitudes)
361 dravg = np.nanmean(displacement_magnitudes)
363 return mapped, drmax, dravg, warning