Coverage for icet/tools/structure_mapping.py: 100%

116 statements  

« prev     ^ index     » next       coverage.py v7.10.1, created at 2025-09-14 04:08 +0000

1import numpy as np 

2 

3from ase import Atoms 

4from ase.build import make_supercell 

5from ase.geometry import get_distances 

6from scipy.optimize import linear_sum_assignment 

7from icet.input_output.logging_tools import logger 

8import scipy.linalg 

9 

10 

11def calculate_strain_tensor(A: np.ndarray, 

12 B: np.ndarray) -> np.ndarray: 

13 """Calculates the strain tensor for mapping a cell A onto cell B. The 

14 strain calculated is the Biot strain tensor and is rotationally invariant. 

15 

16 Parameters 

17 ---------- 

18 A 

19 Reference cell (row-major format). 

20 B 

21 Target cell (row-major format). 

22 """ 

23 assert A.shape == (3, 3) 

24 assert B.shape == (3, 3) 

25 

26 # Calculate deformation gradient (F) in column-major format 

27 F = np.linalg.solve(A, B).T 

28 

29 # Calculate right stretch tensor (U) 

30 _, U = scipy.linalg.polar(F) 

31 

32 # return Biot strain tensor 

33 return U - np.eye(3) 

34 

35 

36def map_structure_to_reference(structure: Atoms, 

37 reference: Atoms, 

38 inert_species: list[str] = None, 

39 tol_positions: float = 1e-4, 

40 suppress_warnings: bool = False, 

41 assume_no_cell_relaxation: bool = False) \ 

42 -> tuple[Atoms, dict]: 

43 """Maps a structure onto a reference structure. 

44 This is often desirable when, for example, a structure has been 

45 relaxed using DFT, and one wants to use it as a training structure 

46 in a cluster expansion. 

47 

48 The function returns a tuple comprising the ideal supercell most 

49 closely matching the input structure and a dictionary with 

50 supplementary information concerning the mapping. The latter 

51 includes for example the largest deviation of any position in the 

52 input structure from its reference position (``drmax``), the average 

53 deviation of the positions in the input structure from the 

54 reference positions (``dravg``), and the strain tensor for the input 

55 structure relative to the reference structure (``strain_tensor``). 

56 

57 The returned Atoms object provides further supplemental information via 

58 custom per-atom arrays including the atomic displacements 

59 (``Displacement``, ``Displacement_Magnitude``),the distances to the 

60 three closest sites (``Minimum_Distances``), as well as a mapping 

61 between the indices of the returned Atoms object and those of the input 

62 structure (``IndexMapping``). 

63 

64 Note 

65 ---- 

66 The input and reference structures should adhere to the same crystallographic setting. 

67 In other words, they should not be rotated or translated with respect to each other. 

68 

69 Parameters 

70 ---------- 

71 structure 

72 Input structure, typically a relaxed structure. 

73 reference 

74 Reference structure, which can but need not be the primitive structure. 

75 inert_species 

76 List of chemical symbols (e.g., ``['Au', 'Pd']``) that are never 

77 substituted for a vacancy. The number of inert sites is used to rescale 

78 the volume of the input structure to match the reference structure. 

79 tol_positions 

80 Tolerance factor applied when scanning for overlapping positions in 

81 Ångstrom (forwarded to :func:`ase.build.make_supercell`). 

82 suppress_warnings 

83 if ``True`` print no warnings of large strain or relaxation distances. 

84 assume_no_cell_relaxation 

85 If ``False`` volume and cell metric of the input structure are rescaled 

86 to match the reference structure. This can be unnecessary (and 

87 counterproductive) for some structures, e.g., with many vacancies. 

88 

89 Note 

90 ---- 

91 When setting this parameter to ``False`` the reference cell metric 

92 must be obtainable via an *integer* transformation matrix from the 

93 reference cell metric. In other words the input structure should not 

94 involve relaxations of the volume or the cell metric. 

95 

96 Example 

97 ------- 

98 The following code snippet illustrates the general usage. It first creates 

99 a primitive FCC cell, which is latter used as reference structure. To 

100 emulate a relaxed structure obtained from, e.g., a density functional 

101 theory calculation, the code then creates a 4x4x4 conventional FCC 

102 supercell, which is populated with two different atom types, has distorted 

103 cell vectors, and random displacements to the atoms. Finally, the present 

104 function is used to map the structure back the ideal lattice:: 

105 

106 >>> from ase.build import bulk 

107 >>> reference = bulk('Au', a=4.09) 

108 >>> structure = bulk('Au', cubic=True, a=4.09).repeat(4) 

109 >>> structure.symbols = 10 * ['Ag'] + (len(structure) - 10) * ['Au'] 

110 >>> structure.set_cell(structure.cell * 1.02, scale_atoms=True) 

111 >>> structure.rattle(0.1) 

112 >>> mapped_structure, info = map_structure_to_reference(structure, reference) 

113 """ 

114 

115 # Obtain supercell of reference structure that is compatible 

116 # with relaxed structure 

117 reference_supercell, P = _get_reference_supercell( 

118 structure, reference, inert_species=inert_species, 

119 tol_positions=tol_positions, assume_no_cell_relaxation=assume_no_cell_relaxation) 

120 

121 # Calculate strain tensor 

122 strain_tensor = calculate_strain_tensor(reference_supercell.cell, structure.cell) 

123 

124 # Symmetric matrix has real eigenvalues 

125 eigenvalues, _ = np.linalg.eigh(strain_tensor) 

126 volumetric_strain = sum(eigenvalues) 

127 

128 # Rescale the relaxed atoms object 

129 structure_scaled = structure.copy() 

130 structure_scaled.set_cell(reference_supercell.cell, scale_atoms=True) 

131 

132 # Match positions 

133 mapped_structure, drmax, dravg, warning = _match_positions(structure_scaled, 

134 reference_supercell) 

135 

136 if warning: 

137 warnings = [warning] 

138 else: 

139 warnings = [] 

140 

141 if not suppress_warnings: 

142 s = 'Consider excluding this structure when training a cluster expansion.' 

143 if assume_no_cell_relaxation: 

144 trigger_levels = {'volumetric_strain': 1e-3, 

145 'eigenvalue_diff': 1e-3} 

146 else: 

147 trigger_levels = {'volumetric_strain': 0.25, 

148 'eigenvalue_diff': 0.1} 

149 if abs(volumetric_strain) > trigger_levels['volumetric_strain']: 

150 warnings.append('high_volumetric_strain') 

151 logger.warning('High volumetric strain ({:.2f} %). {}'.format( 

152 100 * volumetric_strain, s)) 

153 if max(eigenvalues) - min(eigenvalues) > trigger_levels['eigenvalue_diff']: 

154 warnings.append('high_anisotropic_strain') 

155 logger.warning('High anisotropic strain (the difference between ' 

156 'largest and smallest eigenvalues of strain tensor is ' 

157 '{:.5f}). {}'.format(max(eigenvalues) - min(eigenvalues), s)) 

158 if drmax > 1.0: 

159 warnings.append('large_maximum_relaxation_distance') 

160 logger.warning('Large maximum relaxation distance ' 

161 '({:.5f} Angstrom). {}'.format(drmax, s)) 

162 if dravg > 0.5: 

163 warnings.append('large_average_relaxation_distance') 

164 logger.warning('Large average relaxation distance ' 

165 '({:.5f} Angstrom). {}'.format(dravg, s)) 

166 

167 # Populate dictionary with supplementary information 

168 info = {'drmax': drmax, 

169 'dravg': dravg, 

170 'strain_tensor': strain_tensor, 

171 'volumetric_strain': volumetric_strain, 

172 'strain_tensor_eigenvalues': eigenvalues, 

173 'warnings': warnings, 

174 'transformation_matrix': P} 

175 

176 return mapped_structure, info 

177 

178 

179def _get_reference_supercell(structure: Atoms, 

180 reference: Atoms, 

181 inert_species: list[str] = None, 

182 tol_positions: float = 1e-5, 

183 assume_no_cell_relaxation: bool = False) -> tuple[Atoms, np.ndarray]: 

184 """ 

185 Returns a tuple comprising a supercell of the reference structure that is compatible with the 

186 cell metric of the input structure as well as the transformation matrix that maps from the 

187 primitive to the supercell structure. 

188 

189 Parameters 

190 ---------- 

191 structure 

192 Input structure, typically a relaxed structure. 

193 reference 

194 Reference structure, which can but need not be the primitive structure. 

195 inert_species 

196 List of chemical symbols (e.g., ``['Au', 'Pd']``) that are never 

197 substituted for a vacancy. The number of inert sites is used to rescale 

198 of the input structure to match the reference structure. 

199 tol_positions 

200 Tolerance factor applied when scanning for overlapping positions in 

201 Ångstrom (forwarded to :func:`ase.build.make_supercell`). 

202 assume_no_cell_relaxation 

203 If ``False`` volume and cell metric of the input structure are rescaled 

204 to match the reference structure. This can be unnecessary (and 

205 counterproductive) for some structures, e.g., with many vacancies. 

206 

207 Note 

208 ---- 

209 When setting this parameter to ``False``, the input structure 

210 must be obtainable via an *integer* transformation matrix from the 

211 reference cell metric. In other words the input structure should not 

212 involve relaxations of the volume or the cell metric. 

213 

214 Raises 

215 ------ 

216 ValueError 

217 If the boundary conditions of the reference and the input structure 

218 do not match. 

219 ValueError 

220 If the transformation matrix deviates too strongly from the nearest 

221 integer matrix. 

222 ValueError 

223 If :attr:`assume_no_cell_relaxation` is ``True`` but the input structure is 

224 not obtainable via an integer transformation from the reference 

225 cell metric. 

226 """ 

227 

228 if not np.all(reference.pbc == structure.pbc): 

229 msg = 'The boundary conditions of reference and input structures do not match.' 

230 msg += '\n reference: ' + str(reference.pbc) 

231 msg += '\n input: ' + str(structure.pbc) 

232 raise ValueError(msg) 

233 

234 # Step 1: 

235 # rescale cell metric of relaxed cell to match volume per atom of reference cell 

236 if not assume_no_cell_relaxation: 

237 if inert_species is None: 

238 n_ref = len(reference) 

239 n_rlx = len(structure) 

240 else: 

241 n_ref = sum([reference.get_chemical_symbols().count(s) for s in inert_species]) 

242 n_rlx = sum([structure.get_chemical_symbols().count(s) for s in inert_species]) 

243 vol_scale = reference.get_volume() / n_ref 

244 vol_scale /= structure.get_volume() / n_rlx 

245 scaled_structure_cell = structure.cell * vol_scale ** (1 / 3) 

246 else: 

247 scaled_structure_cell = structure.cell 

248 

249 # Step 2: 

250 # get transformation matrix 

251 P = np.dot(scaled_structure_cell, np.linalg.inv(reference.cell)) 

252 

253 # reduce the (real) transformation matrix to the nearest integer one 

254 P = np.around(P) 

255 

256 # Step 3: 

257 # generate supercell of reference structure 

258 reference_supercell = make_supercell(reference, P, tol=tol_positions) 

259 

260 return reference_supercell, P 

261 

262 

263def _match_positions(structure: Atoms, reference: Atoms) -> tuple[Atoms, float, float]: 

264 """Matches the atoms in the input :attr:`structure` to the sites in the 

265 :attr:`reference` structure. The function returns tuple the first element of which 

266 is a copy of the :attr:`reference` structure, in which the chemical species are 

267 assigned to comply with the :attr:`structure`. The second and third element 

268 of the tuple represent the maximum and average distance between relaxed and 

269 reference sites. 

270 

271 Parameters 

272 ---------- 

273 structure 

274 Structure with relaxed positions. 

275 reference 

276 Structure with idealized positions. 

277 

278 Raises 

279 ------ 

280 ValueError 

281 If the cell metrics of the two input structures do not match. 

282 ValueError 

283 If the periodic boundary conditions of the two input structures do not match. 

284 ValueError 

285 If the input structure contains more atoms than the reference structure. 

286 """ 

287 

288 if not np.all(reference.pbc == structure.pbc): 

289 msg = 'The boundary conditions of reference and relaxed structures do not match.' 

290 msg += '\n reference: ' + str(reference.pbc) 

291 msg += '\n relaxed: ' + str(structure.pbc) 

292 raise ValueError(msg) 

293 

294 if len(structure) > len(reference): 

295 msg = 'The relaxed structure contains more atoms than the reference structure.' 

296 msg += '\n reference: ' + str(len(reference)) 

297 msg += '\n relaxed: ' + str(len(structure)) 

298 raise ValueError(msg) 

299 

300 if not np.all(np.isclose(reference.cell, structure.cell)): 

301 msg = 'The cell metrics of reference and relaxed structures do not match.' 

302 msg += '\n reference: ' + str(reference.cell) 

303 msg += '\n relaxed: ' + str(structure.cell) 

304 raise ValueError(msg) 

305 

306 # compute distances between reference and relaxed positions 

307 _, dists = get_distances(reference.positions, structure.positions, 

308 cell=reference.cell, pbc=reference.pbc) 

309 # pad matrix with zeros to obtain square matrix 

310 n, m = dists.shape 

311 cost_matrix = np.pad(dists, ((0, 0), (0, abs(n - m))), 

312 mode='constant', constant_values=0) 

313 # find optimal mapping using Kuhn-Munkres (Hungarian) algorithm 

314 row_ind, col_ind = linear_sum_assignment(cost_matrix) 

315 

316 # compile new configuration with supplementary information 

317 mapped = reference.copy() 

318 displacement_magnitudes = [] 

319 displacements = [] 

320 minimum_distances = [] 

321 n_dist_max = min(len(mapped), 3) 

322 warning = None 

323 for i, j in zip(row_ind, col_ind): 

324 atom = mapped[i] 

325 if j >= len(structure): 

326 # vacant site in reference structure 

327 atom.symbol = 'X' 

328 displacement_magnitudes.append(None) 

329 displacements.append(3 * [None]) 

330 minimum_distances.append(n_dist_max * [None]) 

331 else: 

332 atom.symbol = structure[j].symbol 

333 dvecs, drs = get_distances([structure[j].position], 

334 [reference[i].position], 

335 cell=reference.cell, pbc=reference.pbc) 

336 displacement_magnitudes.append(drs[0][0]) 

337 displacements.append(dvecs[0][0]) 

338 # distances to the next three available sites 

339 minimum_distances.append(sorted(dists[:, j])[:n_dist_max]) 

340 if len(structure) > 1: 

341 if drs[0][0] > min(dists[:, j]) + 1e-6: 

342 logger.warning('An atom was mapped to a site that was further ' 

343 'away than the closest site (that site was already ' 

344 'occupied by another atom).') 

345 warning = 'possible_ambiguity_in_mapping' 

346 elif minimum_distances[-1][0] > 0.9 * minimum_distances[-1][1]: 

347 logger.warning('An atom was approximately equally far from its ' 

348 'two closest sites.') 

349 warning = 'possible_ambiguity_in_mapping' 

350 

351 displacement_magnitudes = np.array(displacement_magnitudes, dtype=np.float64) 

352 mapped.new_array('Displacement', displacements, float, shape=(3, )) 

353 mapped.new_array('Displacement_Magnitude', displacement_magnitudes, float) 

354 mapped.new_array('Minimum_Distances', minimum_distances, 

355 float, shape=(n_dist_max,)) 

356 mapped.new_array('IndexMapping', col_ind, int) 

357 

358 drmax = np.nanmax(displacement_magnitudes) 

359 dravg = np.nanmean(displacement_magnitudes) 

360 

361 return mapped, drmax, dravg, warning