Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from typing import List, Tuple 

2 

3import numpy as np 

4 

5from ase import Atoms 

6from ase.build import make_supercell 

7from ase.geometry import get_distances 

8from scipy.optimize import linear_sum_assignment 

9from icet.input_output.logging_tools import logger 

10import scipy.linalg 

11 

12 

13def calculate_strain_tensor(A: np.ndarray, 

14 B: np.ndarray) -> np.ndarray: 

15 """Calculates the strain tensor for mapping a cell A onto cell B. The 

16 strain calculated is the Biot strain tensor and is rotationally invariant. 

17 

18 Parameters 

19 ---------- 

20 A 

21 reference cell (row-major format) 

22 B 

23 target cell (row-major format) 

24 

25 Returns 

26 ------- 

27 strain_tensor 

28 Biot strain tensor (symmetric matrix) 

29 """ 

30 assert A.shape == (3, 3) 

31 assert B.shape == (3, 3) 

32 

33 # Calculate deformation gradient (F) in column-major format 

34 F = np.linalg.solve(A, B).T 

35 

36 # Calculate right stretch tensor (U) 

37 _, U = scipy.linalg.polar(F) 

38 

39 # return Biot strain tensor 

40 return U - np.eye(3) 

41 

42 

43def map_structure_to_reference(structure: Atoms, 

44 reference: Atoms, 

45 inert_species: List[str] = None, 

46 tol_positions: float = 1e-4, 

47 suppress_warnings: bool = False, 

48 assume_no_cell_relaxation: bool = False) \ 

49 -> Tuple[Atoms, dict]: 

50 """Maps a structure onto a reference structure. 

51 This is often desirable when, for example, a structure has been 

52 relaxed using DFT, and one wants to use it as a training structure 

53 in a cluster expansion. 

54 

55 The function returns a tuple comprising the ideal supercell most 

56 closely matching the input structure and a dictionary with 

57 supplementary information concerning the mapping. The latter 

58 includes for example the largest deviation of any position in the 

59 input structure from its reference position (`drmax`), the average 

60 deviation of the positions in the input structure from the 

61 reference positions (`dravg`), and the strain tensor for the input 

62 structure relative to the reference structure (`strain_tensor`). 

63 

64 The Atoms object that provide further supplemental information via 

65 custom per-atom arrays including the atomic displacements 

66 (`Displacement`, `Displacement_Magnitude`) as well as the 

67 distances to the three closest sites (`Minimum_Distances`). 

68 

69 Parameters 

70 ---------- 

71 structure 

72 input structure, typically a relaxed structure 

73 reference 

74 reference structure, which can but need not be the primitive structure 

75 inert_species 

76 list of chemical symbols (e.g., ``['Au', 'Pd']``) that are never 

77 substituted for a vacancy; the number of inert sites is used to rescale 

78 the volume of the input structure to match the reference structure. 

79 tol_positions 

80 tolerance factor applied when scanning for overlapping positions in 

81 Angstrom (forwarded to :func:`ase.build.make_supercell`) 

82 suppress_warnings 

83 if True, print no warnings of large strain or relaxation distances 

84 assume_no_cell_relaxation 

85 if False volume and cell metric of the input structure are rescaled 

86 to match the reference structure; this can be unnecessary (and 

87 counterproductive) for some structures, e.g., with many vacancies 

88 

89 **Note**: When setting this parameter to False the reference cell metric 

90 must be obtainable via an *integer* transformation matrix from the 

91 reference cell metric. In other words the input structure should not 

92 involve relaxations of the volume or the cell metric. 

93 

94 Example 

95 ------- 

96 The following code snippet illustrates the general usage. It first creates 

97 a primitive FCC cell, which is latter used as reference structure. To 

98 emulate a relaxed structure obtained from, e.g., a density functional 

99 theory calculation, the code then creates a 4x4x4 conventional FCC 

100 supercell, which is populated with two different atom types, has distorted 

101 cell vectors, and random displacements to the atoms. Finally, the present 

102 function is used to map the structure back the ideal lattice:: 

103 

104 >>> from ase.build import bulk 

105 >>> reference = bulk('Au', a=4.09) 

106 >>> structure = bulk('Au', cubic=True, a=4.09).repeat(4) 

107 >>> structure.set_chemical_symbols(10 * ['Ag'] + (len(structure) - 10) * ['Au']) 

108 >>> structure.set_cell(structure.cell * 1.02, scale_atoms=True) 

109 >>> structure.rattle(0.1) 

110 >>> mapped_structure, info = map_structure_to_reference(structure, reference) 

111 """ 

112 

113 # Obtain supercell of reference structure that is compatible 

114 # with relaxed structure 

115 reference_supercell = _get_reference_supercell( 

116 structure, reference, inert_species=inert_species, 

117 tol_positions=tol_positions, assume_no_cell_relaxation=assume_no_cell_relaxation) 

118 

119 # Calculate strain tensor 

120 strain_tensor = calculate_strain_tensor(reference_supercell.cell, structure.cell) 

121 

122 # Symmetric matrix has real eigenvalues 

123 eigenvalues, _ = np.linalg.eigh(strain_tensor) 

124 volumetric_strain = sum(eigenvalues) 

125 

126 # Rescale the relaxed atoms object 

127 structure_scaled = structure.copy() 

128 structure_scaled.set_cell(reference_supercell.cell, scale_atoms=True) 

129 

130 # Match positions 

131 mapped_structure, drmax, dravg, warning = _match_positions(structure_scaled, 

132 reference_supercell) 

133 

134 if warning: 

135 warnings = [warning] 

136 else: 

137 warnings = [] 

138 

139 if not suppress_warnings: 

140 s = 'Consider excluding this structure when training a cluster expansion.' 

141 if assume_no_cell_relaxation: 

142 trigger_levels = {'volumetric_strain': 1e-3, 

143 'eigenvalue_diff': 1e-3} 

144 else: 

145 trigger_levels = {'volumetric_strain': 0.25, 

146 'eigenvalue_diff': 0.1} 

147 if abs(volumetric_strain) > trigger_levels['volumetric_strain']: 

148 warnings.append('high_volumetric_strain') 

149 logger.warning('High volumetric strain ({:.2f} %). {}'.format( 

150 100 * volumetric_strain, s)) 

151 if max(eigenvalues) - min(eigenvalues) > trigger_levels['eigenvalue_diff']: 

152 warnings.append('high_anisotropic_strain') 

153 logger.warning('High anisotropic strain (the difference between ' 

154 'largest and smallest eigenvalues of strain tensor is ' 

155 '{:.5f}). {}'.format(max(eigenvalues) - min(eigenvalues), s)) 

156 if drmax > 1.0: 

157 warnings.append('large_maximum_relaxation_distance') 

158 logger.warning('Large maximum relaxation distance ' 

159 '({:.5f} Angstrom). {}'.format(drmax, s)) 

160 if dravg > 0.5: 

161 warnings.append('large_average_relaxation_distance') 

162 logger.warning('Large average relaxation distance ' 

163 '({:.5f} Angstrom). {}'.format(dravg, s)) 

164 

165 # Populate dictionary with supplementary information 

166 info = {'drmax': drmax, 

167 'dravg': dravg, 

168 'strain_tensor': strain_tensor, 

169 'volumetric_strain': volumetric_strain, 

170 'strain_tensor_eigenvalues': eigenvalues, 

171 'warnings': warnings} 

172 

173 return mapped_structure, info 

174 

175 

176def _get_reference_supercell(structure: Atoms, 

177 reference: Atoms, 

178 inert_species: List[str] = None, 

179 tol_positions: float = 1e-5, 

180 assume_no_cell_relaxation: bool = False) -> Atoms: 

181 """ 

182 Returns a supercell of the reference structure that is compatible with the 

183 cell metric of the input structure. 

184 

185 Parameters 

186 ---------- 

187 structure 

188 input structure, typically a relaxed structure 

189 reference 

190 reference structure, which can but need not be the primitive structure 

191 inert_species 

192 list of chemical symbols (e.g., ``['Au', 'Pd']``) that are never 

193 substituted for a vacancy; the number of inert sites is used to rescale 

194 of the input structure to match the reference structure. 

195 tol_positions 

196 tolerance factor applied when scanning for overlapping positions in 

197 Angstrom (forwarded to :func:`ase.build.make_supercell`) 

198 assume_no_cell_relaxation 

199 if False volume and cell metric of the input structure are rescaled 

200 to match the reference structure; this can be unnecessary (and 

201 counterproductive) for some structures, e.g., with many vacancies 

202 

203 **Note**: When setting this parameter to False, the input structure 

204 must be obtainable via an *integer* transformation matrix from the 

205 reference cell metric. In other words the input structure should not 

206 involve relaxations of the volume or the cell metric. 

207 

208 Raises 

209 ------ 

210 ValueError 

211 if the boundary conditions of the reference and the input structure 

212 do not match 

213 ValueError 

214 if the transformation matrix deviates too strongly from the nearest 

215 integer matrix 

216 ValueError 

217 if assume_no_cell_relaxation is True but the input structure is 

218 not obtainable via an integer transformation from the reference 

219 cell metric 

220 """ 

221 

222 if not np.all(reference.pbc == structure.pbc): 

223 msg = 'The boundary conditions of reference and input structures do not match.' 

224 msg += '\n reference: ' + str(reference.pbc) 

225 msg += '\n input: ' + str(structure.pbc) 

226 raise ValueError(msg) 

227 

228 # Step 1: 

229 # rescale cell metric of relaxed cell to match volume per atom of reference cell 

230 if not assume_no_cell_relaxation: 

231 if inert_species is None: 

232 n_ref = len(reference) 

233 n_rlx = len(structure) 

234 else: 

235 n_ref = sum([reference.get_chemical_symbols().count(s) for s in inert_species]) 

236 n_rlx = sum([structure.get_chemical_symbols().count(s) for s in inert_species]) 

237 vol_scale = reference.get_volume() / n_ref 

238 vol_scale /= structure.get_volume() / n_rlx 

239 scaled_structure_cell = structure.cell * vol_scale ** (1 / 3) 

240 else: 

241 scaled_structure_cell = structure.cell 

242 

243 # Step 2: 

244 # get transformation matrix 

245 P = np.dot(scaled_structure_cell, np.linalg.inv(reference.cell)) 

246 

247 # reduce the (real) transformation matrix to the nearest integer one 

248 P = np.around(P) 

249 

250 # Step 3: 

251 # generate supercell of reference structure 

252 reference_supercell = make_supercell(reference, P, tol=tol_positions) 

253 

254 return reference_supercell 

255 

256 

257def _match_positions(structure: Atoms, reference: Atoms) -> Tuple[Atoms, float, float]: 

258 """Matches the atoms in the input `structure` to the sites in the 

259 `reference` structure. The function returns tuple the first element of which 

260 is a copy of the `reference` structure, in which the chemical species are 

261 assigned to comply with the `structure`. The second and third element 

262 of the tuple represent the maximum and average distance between relaxed and 

263 reference sites. 

264 

265 Parameters 

266 ---------- 

267 structure 

268 structure with relaxed positions 

269 reference 

270 structure with idealized positions 

271 

272 Raises 

273 ------ 

274 ValueError 

275 if the cell metrics of the two input structures do not match 

276 ValueError 

277 if the periodic boundary conditions of the two input structures do not match 

278 ValueError 

279 if the input structure contains more atoms than the reference structure 

280 """ 

281 

282 if not np.all(reference.pbc == structure.pbc): 

283 msg = 'The boundary conditions of reference and relaxed structures do not match.' 

284 msg += '\n reference: ' + str(reference.pbc) 

285 msg += '\n relaxed: ' + str(structure.pbc) 

286 raise ValueError(msg) 

287 

288 if len(structure) > len(reference): 

289 msg = 'The relaxed structure contains more atoms than the reference structure.' 

290 msg += '\n reference: ' + str(len(reference)) 

291 msg += '\n relaxed: ' + str(len(structure)) 

292 raise ValueError(msg) 

293 

294 if not np.all(np.isclose(reference.cell, structure.cell)): 

295 msg = 'The cell metrics of reference and relaxed structures do not match.' 

296 msg += '\n reference: ' + str(reference.cell) 

297 msg += '\n relaxed: ' + str(structure.cell) 

298 raise ValueError(msg) 

299 

300 # compute distances between reference and relaxed positions 

301 _, dists = get_distances(reference.positions, structure.positions, 

302 cell=reference.cell, pbc=reference.pbc) 

303 # pad matrix with zeros to obtain square matrix 

304 n, m = dists.shape 

305 cost_matrix = np.pad(dists, ((0, 0), (0, abs(n - m))), 

306 mode='constant', constant_values=0) 

307 # find optimal mapping using Kuhn-Munkres (Hungarian) algorithm 

308 row_ind, col_ind = linear_sum_assignment(cost_matrix) 

309 

310 # compile new configuration with supplementary information 

311 mapped = reference.copy() 

312 displacement_magnitudes = [] 

313 displacements = [] 

314 minimum_distances = [] 

315 n_dist_max = min(len(mapped), 3) 

316 warning = None 

317 for i, j in zip(row_ind, col_ind): 

318 atom = mapped[i] 

319 if j >= len(structure): 

320 # vacant site in reference structure 

321 atom.symbol = 'X' 

322 displacement_magnitudes.append(None) 

323 displacements.append(3 * [None]) 

324 minimum_distances.append(n_dist_max * [None]) 

325 else: 

326 atom.symbol = structure[j].symbol 

327 dvecs, drs = get_distances([structure[j].position], 

328 [reference[i].position], 

329 cell=reference.cell, pbc=reference.pbc) 

330 displacement_magnitudes.append(drs[0][0]) 

331 displacements.append(dvecs[0][0]) 

332 # distances to the next three available sites 

333 minimum_distances.append(sorted(dists[:, j])[:n_dist_max]) 

334 if len(structure) > 1: 

335 if drs[0][0] > min(dists[:, j]) + 1e-6: 

336 logger.warning('An atom was mapped to a site that was further ' 

337 'away than the closest site (that site was already ' 

338 'occupied by another atom).') 

339 warning = 'possible_ambiguity_in_mapping' 

340 elif minimum_distances[-1][0] > 0.9 * minimum_distances[-1][1]: 

341 logger.warning('An atom was approximately equally far from its ' 

342 'two closest sites.') 

343 warning = 'possible_ambiguity_in_mapping' 

344 

345 displacement_magnitudes = np.array(displacement_magnitudes, dtype=np.float64) 

346 mapped.new_array('Displacement', displacements, float, shape=(3, )) 

347 mapped.new_array('Displacement_Magnitude', displacement_magnitudes, float) 

348 mapped.new_array('Minimum_Distances', minimum_distances, 

349 float, shape=(n_dist_max,)) 

350 

351 drmax = np.nanmax(displacement_magnitudes) 

352 dravg = np.nanmean(displacement_magnitudes) 

353 

354 return mapped, drmax, dravg, warning