Coverage for icet/tools/structure_mapping.py: 100%

117 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-12-26 04:12 +0000

1from typing import List, Tuple 

2 

3import numpy as np 

4 

5from ase import Atoms 

6from ase.build import make_supercell 

7from ase.geometry import get_distances 

8from scipy.optimize import linear_sum_assignment 

9from icet.input_output.logging_tools import logger 

10import scipy.linalg 

11 

12 

13def calculate_strain_tensor(A: np.ndarray, 

14 B: np.ndarray) -> np.ndarray: 

15 """Calculates the strain tensor for mapping a cell A onto cell B. The 

16 strain calculated is the Biot strain tensor and is rotationally invariant. 

17 

18 Parameters 

19 ---------- 

20 A 

21 Reference cell (row-major format). 

22 B 

23 Target cell (row-major format). 

24 """ 

25 assert A.shape == (3, 3) 

26 assert B.shape == (3, 3) 

27 

28 # Calculate deformation gradient (F) in column-major format 

29 F = np.linalg.solve(A, B).T 

30 

31 # Calculate right stretch tensor (U) 

32 _, U = scipy.linalg.polar(F) 

33 

34 # return Biot strain tensor 

35 return U - np.eye(3) 

36 

37 

38def map_structure_to_reference(structure: Atoms, 

39 reference: Atoms, 

40 inert_species: List[str] = None, 

41 tol_positions: float = 1e-4, 

42 suppress_warnings: bool = False, 

43 assume_no_cell_relaxation: bool = False) \ 

44 -> Tuple[Atoms, dict]: 

45 """Maps a structure onto a reference structure. 

46 This is often desirable when, for example, a structure has been 

47 relaxed using DFT, and one wants to use it as a training structure 

48 in a cluster expansion. 

49 

50 The function returns a tuple comprising the ideal supercell most 

51 closely matching the input structure and a dictionary with 

52 supplementary information concerning the mapping. The latter 

53 includes for example the largest deviation of any position in the 

54 input structure from its reference position (``drmax``), the average 

55 deviation of the positions in the input structure from the 

56 reference positions (``dravg``), and the strain tensor for the input 

57 structure relative to the reference structure (``strain_tensor``). 

58 

59 The returned Atoms object provides further supplemental information via 

60 custom per-atom arrays including the atomic displacements 

61 (``Displacement``, ``Displacement_Magnitude``),the distances to the 

62 three closest sites (``Minimum_Distances``), as well as a mapping 

63 between the indices of the returned Atoms object and those of the input 

64 structure (``IndexMapping``). 

65 

66 Note 

67 ---- 

68 The input and reference structures should adhere to the same crystallographic setting. 

69 In other words, they should not be rotated or translated with respect to each other. 

70 

71 Parameters 

72 ---------- 

73 structure 

74 Input structure, typically a relaxed structure. 

75 reference 

76 Reference structure, which can but need not be the primitive structure. 

77 inert_species 

78 List of chemical symbols (e.g., ``['Au', 'Pd']``) that are never 

79 substituted for a vacancy. The number of inert sites is used to rescale 

80 the volume of the input structure to match the reference structure. 

81 tol_positions 

82 Tolerance factor applied when scanning for overlapping positions in 

83 Ångstrom (forwarded to :func:`ase.build.make_supercell`). 

84 suppress_warnings 

85 if ``True`` print no warnings of large strain or relaxation distances. 

86 assume_no_cell_relaxation 

87 If ``False`` volume and cell metric of the input structure are rescaled 

88 to match the reference structure. This can be unnecessary (and 

89 counterproductive) for some structures, e.g., with many vacancies. 

90 

91 Note 

92 ---- 

93 When setting this parameter to ``False`` the reference cell metric 

94 must be obtainable via an *integer* transformation matrix from the 

95 reference cell metric. In other words the input structure should not 

96 involve relaxations of the volume or the cell metric. 

97 

98 Example 

99 ------- 

100 The following code snippet illustrates the general usage. It first creates 

101 a primitive FCC cell, which is latter used as reference structure. To 

102 emulate a relaxed structure obtained from, e.g., a density functional 

103 theory calculation, the code then creates a 4x4x4 conventional FCC 

104 supercell, which is populated with two different atom types, has distorted 

105 cell vectors, and random displacements to the atoms. Finally, the present 

106 function is used to map the structure back the ideal lattice:: 

107 

108 >>> from ase.build import bulk 

109 >>> reference = bulk('Au', a=4.09) 

110 >>> structure = bulk('Au', cubic=True, a=4.09).repeat(4) 

111 >>> structure.set_chemical_symbols(10 * ['Ag'] + (len(structure) - 10) * ['Au']) 

112 >>> structure.set_cell(structure.cell * 1.02, scale_atoms=True) 

113 >>> structure.rattle(0.1) 

114 >>> mapped_structure, info = map_structure_to_reference(structure, reference) 

115 """ 

116 

117 # Obtain supercell of reference structure that is compatible 

118 # with relaxed structure 

119 reference_supercell, P = _get_reference_supercell( 

120 structure, reference, inert_species=inert_species, 

121 tol_positions=tol_positions, assume_no_cell_relaxation=assume_no_cell_relaxation) 

122 

123 # Calculate strain tensor 

124 strain_tensor = calculate_strain_tensor(reference_supercell.cell, structure.cell) 

125 

126 # Symmetric matrix has real eigenvalues 

127 eigenvalues, _ = np.linalg.eigh(strain_tensor) 

128 volumetric_strain = sum(eigenvalues) 

129 

130 # Rescale the relaxed atoms object 

131 structure_scaled = structure.copy() 

132 structure_scaled.set_cell(reference_supercell.cell, scale_atoms=True) 

133 

134 # Match positions 

135 mapped_structure, drmax, dravg, warning = _match_positions(structure_scaled, 

136 reference_supercell) 

137 

138 if warning: 

139 warnings = [warning] 

140 else: 

141 warnings = [] 

142 

143 if not suppress_warnings: 

144 s = 'Consider excluding this structure when training a cluster expansion.' 

145 if assume_no_cell_relaxation: 

146 trigger_levels = {'volumetric_strain': 1e-3, 

147 'eigenvalue_diff': 1e-3} 

148 else: 

149 trigger_levels = {'volumetric_strain': 0.25, 

150 'eigenvalue_diff': 0.1} 

151 if abs(volumetric_strain) > trigger_levels['volumetric_strain']: 

152 warnings.append('high_volumetric_strain') 

153 logger.warning('High volumetric strain ({:.2f} %). {}'.format( 

154 100 * volumetric_strain, s)) 

155 if max(eigenvalues) - min(eigenvalues) > trigger_levels['eigenvalue_diff']: 

156 warnings.append('high_anisotropic_strain') 

157 logger.warning('High anisotropic strain (the difference between ' 

158 'largest and smallest eigenvalues of strain tensor is ' 

159 '{:.5f}). {}'.format(max(eigenvalues) - min(eigenvalues), s)) 

160 if drmax > 1.0: 

161 warnings.append('large_maximum_relaxation_distance') 

162 logger.warning('Large maximum relaxation distance ' 

163 '({:.5f} Angstrom). {}'.format(drmax, s)) 

164 if dravg > 0.5: 

165 warnings.append('large_average_relaxation_distance') 

166 logger.warning('Large average relaxation distance ' 

167 '({:.5f} Angstrom). {}'.format(dravg, s)) 

168 

169 # Populate dictionary with supplementary information 

170 info = {'drmax': drmax, 

171 'dravg': dravg, 

172 'strain_tensor': strain_tensor, 

173 'volumetric_strain': volumetric_strain, 

174 'strain_tensor_eigenvalues': eigenvalues, 

175 'warnings': warnings, 

176 'transformation_matrix': P} 

177 

178 return mapped_structure, info 

179 

180 

181def _get_reference_supercell(structure: Atoms, 

182 reference: Atoms, 

183 inert_species: List[str] = None, 

184 tol_positions: float = 1e-5, 

185 assume_no_cell_relaxation: bool = False) -> Tuple[Atoms, np.ndarray]: 

186 """ 

187 Returns a tuple comprising a supercell of the reference structure that is compatible with the 

188 cell metric of the input structure as well as the transformation matrix that maps from the 

189 primitive to the supercell structure. 

190 

191 Parameters 

192 ---------- 

193 structure 

194 Input structure, typically a relaxed structure. 

195 reference 

196 Reference structure, which can but need not be the primitive structure. 

197 inert_species 

198 List of chemical symbols (e.g., ``['Au', 'Pd']``) that are never 

199 substituted for a vacancy. The number of inert sites is used to rescale 

200 of the input structure to match the reference structure. 

201 tol_positions 

202 Tolerance factor applied when scanning for overlapping positions in 

203 Ångstrom (forwarded to :func:`ase.build.make_supercell`). 

204 assume_no_cell_relaxation 

205 If ``False`` volume and cell metric of the input structure are rescaled 

206 to match the reference structure. This can be unnecessary (and 

207 counterproductive) for some structures, e.g., with many vacancies. 

208 

209 Note 

210 ---- 

211 When setting this parameter to ``False``, the input structure 

212 must be obtainable via an *integer* transformation matrix from the 

213 reference cell metric. In other words the input structure should not 

214 involve relaxations of the volume or the cell metric. 

215 

216 Raises 

217 ------ 

218 ValueError 

219 If the boundary conditions of the reference and the input structure 

220 do not match. 

221 ValueError 

222 If the transformation matrix deviates too strongly from the nearest 

223 integer matrix. 

224 ValueError 

225 If :attr:`assume_no_cell_relaxation` is ``True`` but the input structure is 

226 not obtainable via an integer transformation from the reference 

227 cell metric. 

228 """ 

229 

230 if not np.all(reference.pbc == structure.pbc): 

231 msg = 'The boundary conditions of reference and input structures do not match.' 

232 msg += '\n reference: ' + str(reference.pbc) 

233 msg += '\n input: ' + str(structure.pbc) 

234 raise ValueError(msg) 

235 

236 # Step 1: 

237 # rescale cell metric of relaxed cell to match volume per atom of reference cell 

238 if not assume_no_cell_relaxation: 

239 if inert_species is None: 

240 n_ref = len(reference) 

241 n_rlx = len(structure) 

242 else: 

243 n_ref = sum([reference.get_chemical_symbols().count(s) for s in inert_species]) 

244 n_rlx = sum([structure.get_chemical_symbols().count(s) for s in inert_species]) 

245 vol_scale = reference.get_volume() / n_ref 

246 vol_scale /= structure.get_volume() / n_rlx 

247 scaled_structure_cell = structure.cell * vol_scale ** (1 / 3) 

248 else: 

249 scaled_structure_cell = structure.cell 

250 

251 # Step 2: 

252 # get transformation matrix 

253 P = np.dot(scaled_structure_cell, np.linalg.inv(reference.cell)) 

254 

255 # reduce the (real) transformation matrix to the nearest integer one 

256 P = np.around(P) 

257 

258 # Step 3: 

259 # generate supercell of reference structure 

260 reference_supercell = make_supercell(reference, P, tol=tol_positions) 

261 

262 return reference_supercell, P 

263 

264 

265def _match_positions(structure: Atoms, reference: Atoms) -> Tuple[Atoms, float, float]: 

266 """Matches the atoms in the input :attr:`structure` to the sites in the 

267 :attr:`reference` structure. The function returns tuple the first element of which 

268 is a copy of the :attr:`reference` structure, in which the chemical species are 

269 assigned to comply with the :attr:`structure`. The second and third element 

270 of the tuple represent the maximum and average distance between relaxed and 

271 reference sites. 

272 

273 Parameters 

274 ---------- 

275 structure 

276 Structure with relaxed positions. 

277 reference 

278 Structure with idealized positions. 

279 

280 Raises 

281 ------ 

282 ValueError 

283 If the cell metrics of the two input structures do not match. 

284 ValueError 

285 If the periodic boundary conditions of the two input structures do not match. 

286 ValueError 

287 If the input structure contains more atoms than the reference structure. 

288 """ 

289 

290 if not np.all(reference.pbc == structure.pbc): 

291 msg = 'The boundary conditions of reference and relaxed structures do not match.' 

292 msg += '\n reference: ' + str(reference.pbc) 

293 msg += '\n relaxed: ' + str(structure.pbc) 

294 raise ValueError(msg) 

295 

296 if len(structure) > len(reference): 

297 msg = 'The relaxed structure contains more atoms than the reference structure.' 

298 msg += '\n reference: ' + str(len(reference)) 

299 msg += '\n relaxed: ' + str(len(structure)) 

300 raise ValueError(msg) 

301 

302 if not np.all(np.isclose(reference.cell, structure.cell)): 

303 msg = 'The cell metrics of reference and relaxed structures do not match.' 

304 msg += '\n reference: ' + str(reference.cell) 

305 msg += '\n relaxed: ' + str(structure.cell) 

306 raise ValueError(msg) 

307 

308 # compute distances between reference and relaxed positions 

309 _, dists = get_distances(reference.positions, structure.positions, 

310 cell=reference.cell, pbc=reference.pbc) 

311 # pad matrix with zeros to obtain square matrix 

312 n, m = dists.shape 

313 cost_matrix = np.pad(dists, ((0, 0), (0, abs(n - m))), 

314 mode='constant', constant_values=0) 

315 # find optimal mapping using Kuhn-Munkres (Hungarian) algorithm 

316 row_ind, col_ind = linear_sum_assignment(cost_matrix) 

317 

318 # compile new configuration with supplementary information 

319 mapped = reference.copy() 

320 displacement_magnitudes = [] 

321 displacements = [] 

322 minimum_distances = [] 

323 n_dist_max = min(len(mapped), 3) 

324 warning = None 

325 for i, j in zip(row_ind, col_ind): 

326 atom = mapped[i] 

327 if j >= len(structure): 

328 # vacant site in reference structure 

329 atom.symbol = 'X' 

330 displacement_magnitudes.append(None) 

331 displacements.append(3 * [None]) 

332 minimum_distances.append(n_dist_max * [None]) 

333 else: 

334 atom.symbol = structure[j].symbol 

335 dvecs, drs = get_distances([structure[j].position], 

336 [reference[i].position], 

337 cell=reference.cell, pbc=reference.pbc) 

338 displacement_magnitudes.append(drs[0][0]) 

339 displacements.append(dvecs[0][0]) 

340 # distances to the next three available sites 

341 minimum_distances.append(sorted(dists[:, j])[:n_dist_max]) 

342 if len(structure) > 1: 

343 if drs[0][0] > min(dists[:, j]) + 1e-6: 

344 logger.warning('An atom was mapped to a site that was further ' 

345 'away than the closest site (that site was already ' 

346 'occupied by another atom).') 

347 warning = 'possible_ambiguity_in_mapping' 

348 elif minimum_distances[-1][0] > 0.9 * minimum_distances[-1][1]: 

349 logger.warning('An atom was approximately equally far from its ' 

350 'two closest sites.') 

351 warning = 'possible_ambiguity_in_mapping' 

352 

353 displacement_magnitudes = np.array(displacement_magnitudes, dtype=np.float64) 

354 mapped.new_array('Displacement', displacements, float, shape=(3, )) 

355 mapped.new_array('Displacement_Magnitude', displacement_magnitudes, float) 

356 mapped.new_array('Minimum_Distances', minimum_distances, 

357 float, shape=(n_dist_max,)) 

358 mapped.new_array('IndexMapping', col_ind, int) 

359 

360 drmax = np.nanmax(displacement_magnitudes) 

361 dravg = np.nanmean(displacement_magnitudes) 

362 

363 return mapped, drmax, dravg, warning