Coverage for icet/tools/structure_mapping.py: 100%
116 statements
« prev ^ index » next coverage.py v7.10.1, created at 2025-09-14 04:08 +0000
« prev ^ index » next coverage.py v7.10.1, created at 2025-09-14 04:08 +0000
1import numpy as np
3from ase import Atoms
4from ase.build import make_supercell
5from ase.geometry import get_distances
6from scipy.optimize import linear_sum_assignment
7from icet.input_output.logging_tools import logger
8import scipy.linalg
11def calculate_strain_tensor(A: np.ndarray,
12 B: np.ndarray) -> np.ndarray:
13 """Calculates the strain tensor for mapping a cell A onto cell B. The
14 strain calculated is the Biot strain tensor and is rotationally invariant.
16 Parameters
17 ----------
18 A
19 Reference cell (row-major format).
20 B
21 Target cell (row-major format).
22 """
23 assert A.shape == (3, 3)
24 assert B.shape == (3, 3)
26 # Calculate deformation gradient (F) in column-major format
27 F = np.linalg.solve(A, B).T
29 # Calculate right stretch tensor (U)
30 _, U = scipy.linalg.polar(F)
32 # return Biot strain tensor
33 return U - np.eye(3)
36def map_structure_to_reference(structure: Atoms,
37 reference: Atoms,
38 inert_species: list[str] = None,
39 tol_positions: float = 1e-4,
40 suppress_warnings: bool = False,
41 assume_no_cell_relaxation: bool = False) \
42 -> tuple[Atoms, dict]:
43 """Maps a structure onto a reference structure.
44 This is often desirable when, for example, a structure has been
45 relaxed using DFT, and one wants to use it as a training structure
46 in a cluster expansion.
48 The function returns a tuple comprising the ideal supercell most
49 closely matching the input structure and a dictionary with
50 supplementary information concerning the mapping. The latter
51 includes for example the largest deviation of any position in the
52 input structure from its reference position (``drmax``), the average
53 deviation of the positions in the input structure from the
54 reference positions (``dravg``), and the strain tensor for the input
55 structure relative to the reference structure (``strain_tensor``).
57 The returned Atoms object provides further supplemental information via
58 custom per-atom arrays including the atomic displacements
59 (``Displacement``, ``Displacement_Magnitude``),the distances to the
60 three closest sites (``Minimum_Distances``), as well as a mapping
61 between the indices of the returned Atoms object and those of the input
62 structure (``IndexMapping``).
64 Note
65 ----
66 The input and reference structures should adhere to the same crystallographic setting.
67 In other words, they should not be rotated or translated with respect to each other.
69 Parameters
70 ----------
71 structure
72 Input structure, typically a relaxed structure.
73 reference
74 Reference structure, which can but need not be the primitive structure.
75 inert_species
76 List of chemical symbols (e.g., ``['Au', 'Pd']``) that are never
77 substituted for a vacancy. The number of inert sites is used to rescale
78 the volume of the input structure to match the reference structure.
79 tol_positions
80 Tolerance factor applied when scanning for overlapping positions in
81 Ångstrom (forwarded to :func:`ase.build.make_supercell`).
82 suppress_warnings
83 if ``True`` print no warnings of large strain or relaxation distances.
84 assume_no_cell_relaxation
85 If ``False`` volume and cell metric of the input structure are rescaled
86 to match the reference structure. This can be unnecessary (and
87 counterproductive) for some structures, e.g., with many vacancies.
89 Note
90 ----
91 When setting this parameter to ``False`` the reference cell metric
92 must be obtainable via an *integer* transformation matrix from the
93 reference cell metric. In other words the input structure should not
94 involve relaxations of the volume or the cell metric.
96 Example
97 -------
98 The following code snippet illustrates the general usage. It first creates
99 a primitive FCC cell, which is latter used as reference structure. To
100 emulate a relaxed structure obtained from, e.g., a density functional
101 theory calculation, the code then creates a 4x4x4 conventional FCC
102 supercell, which is populated with two different atom types, has distorted
103 cell vectors, and random displacements to the atoms. Finally, the present
104 function is used to map the structure back the ideal lattice::
106 >>> from ase.build import bulk
107 >>> reference = bulk('Au', a=4.09)
108 >>> structure = bulk('Au', cubic=True, a=4.09).repeat(4)
109 >>> structure.symbols = 10 * ['Ag'] + (len(structure) - 10) * ['Au']
110 >>> structure.set_cell(structure.cell * 1.02, scale_atoms=True)
111 >>> structure.rattle(0.1)
112 >>> mapped_structure, info = map_structure_to_reference(structure, reference)
113 """
115 # Obtain supercell of reference structure that is compatible
116 # with relaxed structure
117 reference_supercell, P = _get_reference_supercell(
118 structure, reference, inert_species=inert_species,
119 tol_positions=tol_positions, assume_no_cell_relaxation=assume_no_cell_relaxation)
121 # Calculate strain tensor
122 strain_tensor = calculate_strain_tensor(reference_supercell.cell, structure.cell)
124 # Symmetric matrix has real eigenvalues
125 eigenvalues, _ = np.linalg.eigh(strain_tensor)
126 volumetric_strain = sum(eigenvalues)
128 # Rescale the relaxed atoms object
129 structure_scaled = structure.copy()
130 structure_scaled.set_cell(reference_supercell.cell, scale_atoms=True)
132 # Match positions
133 mapped_structure, drmax, dravg, warning = _match_positions(structure_scaled,
134 reference_supercell)
136 if warning:
137 warnings = [warning]
138 else:
139 warnings = []
141 if not suppress_warnings:
142 s = 'Consider excluding this structure when training a cluster expansion.'
143 if assume_no_cell_relaxation:
144 trigger_levels = {'volumetric_strain': 1e-3,
145 'eigenvalue_diff': 1e-3}
146 else:
147 trigger_levels = {'volumetric_strain': 0.25,
148 'eigenvalue_diff': 0.1}
149 if abs(volumetric_strain) > trigger_levels['volumetric_strain']:
150 warnings.append('high_volumetric_strain')
151 logger.warning('High volumetric strain ({:.2f} %). {}'.format(
152 100 * volumetric_strain, s))
153 if max(eigenvalues) - min(eigenvalues) > trigger_levels['eigenvalue_diff']:
154 warnings.append('high_anisotropic_strain')
155 logger.warning('High anisotropic strain (the difference between '
156 'largest and smallest eigenvalues of strain tensor is '
157 '{:.5f}). {}'.format(max(eigenvalues) - min(eigenvalues), s))
158 if drmax > 1.0:
159 warnings.append('large_maximum_relaxation_distance')
160 logger.warning('Large maximum relaxation distance '
161 '({:.5f} Angstrom). {}'.format(drmax, s))
162 if dravg > 0.5:
163 warnings.append('large_average_relaxation_distance')
164 logger.warning('Large average relaxation distance '
165 '({:.5f} Angstrom). {}'.format(dravg, s))
167 # Populate dictionary with supplementary information
168 info = {'drmax': drmax,
169 'dravg': dravg,
170 'strain_tensor': strain_tensor,
171 'volumetric_strain': volumetric_strain,
172 'strain_tensor_eigenvalues': eigenvalues,
173 'warnings': warnings,
174 'transformation_matrix': P}
176 return mapped_structure, info
179def _get_reference_supercell(structure: Atoms,
180 reference: Atoms,
181 inert_species: list[str] = None,
182 tol_positions: float = 1e-5,
183 assume_no_cell_relaxation: bool = False) -> tuple[Atoms, np.ndarray]:
184 """
185 Returns a tuple comprising a supercell of the reference structure that is compatible with the
186 cell metric of the input structure as well as the transformation matrix that maps from the
187 primitive to the supercell structure.
189 Parameters
190 ----------
191 structure
192 Input structure, typically a relaxed structure.
193 reference
194 Reference structure, which can but need not be the primitive structure.
195 inert_species
196 List of chemical symbols (e.g., ``['Au', 'Pd']``) that are never
197 substituted for a vacancy. The number of inert sites is used to rescale
198 of the input structure to match the reference structure.
199 tol_positions
200 Tolerance factor applied when scanning for overlapping positions in
201 Ångstrom (forwarded to :func:`ase.build.make_supercell`).
202 assume_no_cell_relaxation
203 If ``False`` volume and cell metric of the input structure are rescaled
204 to match the reference structure. This can be unnecessary (and
205 counterproductive) for some structures, e.g., with many vacancies.
207 Note
208 ----
209 When setting this parameter to ``False``, the input structure
210 must be obtainable via an *integer* transformation matrix from the
211 reference cell metric. In other words the input structure should not
212 involve relaxations of the volume or the cell metric.
214 Raises
215 ------
216 ValueError
217 If the boundary conditions of the reference and the input structure
218 do not match.
219 ValueError
220 If the transformation matrix deviates too strongly from the nearest
221 integer matrix.
222 ValueError
223 If :attr:`assume_no_cell_relaxation` is ``True`` but the input structure is
224 not obtainable via an integer transformation from the reference
225 cell metric.
226 """
228 if not np.all(reference.pbc == structure.pbc):
229 msg = 'The boundary conditions of reference and input structures do not match.'
230 msg += '\n reference: ' + str(reference.pbc)
231 msg += '\n input: ' + str(structure.pbc)
232 raise ValueError(msg)
234 # Step 1:
235 # rescale cell metric of relaxed cell to match volume per atom of reference cell
236 if not assume_no_cell_relaxation:
237 if inert_species is None:
238 n_ref = len(reference)
239 n_rlx = len(structure)
240 else:
241 n_ref = sum([reference.get_chemical_symbols().count(s) for s in inert_species])
242 n_rlx = sum([structure.get_chemical_symbols().count(s) for s in inert_species])
243 vol_scale = reference.get_volume() / n_ref
244 vol_scale /= structure.get_volume() / n_rlx
245 scaled_structure_cell = structure.cell * vol_scale ** (1 / 3)
246 else:
247 scaled_structure_cell = structure.cell
249 # Step 2:
250 # get transformation matrix
251 P = np.dot(scaled_structure_cell, np.linalg.inv(reference.cell))
253 # reduce the (real) transformation matrix to the nearest integer one
254 P = np.around(P)
256 # Step 3:
257 # generate supercell of reference structure
258 reference_supercell = make_supercell(reference, P, tol=tol_positions)
260 return reference_supercell, P
263def _match_positions(structure: Atoms, reference: Atoms) -> tuple[Atoms, float, float]:
264 """Matches the atoms in the input :attr:`structure` to the sites in the
265 :attr:`reference` structure. The function returns tuple the first element of which
266 is a copy of the :attr:`reference` structure, in which the chemical species are
267 assigned to comply with the :attr:`structure`. The second and third element
268 of the tuple represent the maximum and average distance between relaxed and
269 reference sites.
271 Parameters
272 ----------
273 structure
274 Structure with relaxed positions.
275 reference
276 Structure with idealized positions.
278 Raises
279 ------
280 ValueError
281 If the cell metrics of the two input structures do not match.
282 ValueError
283 If the periodic boundary conditions of the two input structures do not match.
284 ValueError
285 If the input structure contains more atoms than the reference structure.
286 """
288 if not np.all(reference.pbc == structure.pbc):
289 msg = 'The boundary conditions of reference and relaxed structures do not match.'
290 msg += '\n reference: ' + str(reference.pbc)
291 msg += '\n relaxed: ' + str(structure.pbc)
292 raise ValueError(msg)
294 if len(structure) > len(reference):
295 msg = 'The relaxed structure contains more atoms than the reference structure.'
296 msg += '\n reference: ' + str(len(reference))
297 msg += '\n relaxed: ' + str(len(structure))
298 raise ValueError(msg)
300 if not np.all(np.isclose(reference.cell, structure.cell)):
301 msg = 'The cell metrics of reference and relaxed structures do not match.'
302 msg += '\n reference: ' + str(reference.cell)
303 msg += '\n relaxed: ' + str(structure.cell)
304 raise ValueError(msg)
306 # compute distances between reference and relaxed positions
307 _, dists = get_distances(reference.positions, structure.positions,
308 cell=reference.cell, pbc=reference.pbc)
309 # pad matrix with zeros to obtain square matrix
310 n, m = dists.shape
311 cost_matrix = np.pad(dists, ((0, 0), (0, abs(n - m))),
312 mode='constant', constant_values=0)
313 # find optimal mapping using Kuhn-Munkres (Hungarian) algorithm
314 row_ind, col_ind = linear_sum_assignment(cost_matrix)
316 # compile new configuration with supplementary information
317 mapped = reference.copy()
318 displacement_magnitudes = []
319 displacements = []
320 minimum_distances = []
321 n_dist_max = min(len(mapped), 3)
322 warning = None
323 for i, j in zip(row_ind, col_ind):
324 atom = mapped[i]
325 if j >= len(structure):
326 # vacant site in reference structure
327 atom.symbol = 'X'
328 displacement_magnitudes.append(None)
329 displacements.append(3 * [None])
330 minimum_distances.append(n_dist_max * [None])
331 else:
332 atom.symbol = structure[j].symbol
333 dvecs, drs = get_distances([structure[j].position],
334 [reference[i].position],
335 cell=reference.cell, pbc=reference.pbc)
336 displacement_magnitudes.append(drs[0][0])
337 displacements.append(dvecs[0][0])
338 # distances to the next three available sites
339 minimum_distances.append(sorted(dists[:, j])[:n_dist_max])
340 if len(structure) > 1:
341 if drs[0][0] > min(dists[:, j]) + 1e-6:
342 logger.warning('An atom was mapped to a site that was further '
343 'away than the closest site (that site was already '
344 'occupied by another atom).')
345 warning = 'possible_ambiguity_in_mapping'
346 elif minimum_distances[-1][0] > 0.9 * minimum_distances[-1][1]:
347 logger.warning('An atom was approximately equally far from its '
348 'two closest sites.')
349 warning = 'possible_ambiguity_in_mapping'
351 displacement_magnitudes = np.array(displacement_magnitudes, dtype=np.float64)
352 mapped.new_array('Displacement', displacements, float, shape=(3, ))
353 mapped.new_array('Displacement_Magnitude', displacement_magnitudes, float)
354 mapped.new_array('Minimum_Distances', minimum_distances,
355 float, shape=(n_dist_max,))
356 mapped.new_array('IndexMapping', col_ind, int)
358 drmax = np.nanmax(displacement_magnitudes)
359 dravg = np.nanmean(displacement_magnitudes)
361 return mapped, drmax, dravg, warning