Coverage for icet/tools/structure

1from typing import List, Tuple

3import numpy as np

5from ase import Atoms

6from ase.build import make_supercell

7from ase.geometry import get_distances

8from scipy.optimize import linear_sum_assignment

9from icet.input_output.logging_tools import logger

10import scipy.linalg

13def calculate_strain_tensor(A: np.ndarray,

14 B: np.ndarray) -> np.ndarray:

15 """Calculates the strain tensor for mapping a cell A onto cell B. The

16 strain calculated is the Biot strain tensor and is rotationally invariant.

18 Parameters

19 ----------

20 A

21 Reference cell (row-major format).

22 B

23 Target cell (row-major format).

24 """

25 assert A.shape == (3, 3)

26 assert B.shape == (3, 3)

28 # Calculate deformation gradient (F) in column-major format

29 F = np.linalg.solve(A, B).T

31 # Calculate right stretch tensor (U)

32 _, U = scipy.linalg.polar(F)

34 # return Biot strain tensor

35 return U - np.eye(3)

38def map_structure_to_reference(structure: Atoms,

39 reference: Atoms,

40 inert_species: List[str] = None,

41 tol_positions: float = 1e-4,

42 suppress_warnings: bool = False,

43 assume_no_cell_relaxation: bool = False) \

44 -> Tuple[Atoms, dict]:

45 """Maps a structure onto a reference structure.

46 This is often desirable when, for example, a structure has been

47 relaxed using DFT, and one wants to use it as a training structure

48 in a cluster expansion.

50 The function returns a tuple comprising the ideal supercell most

51 closely matching the input structure and a dictionary with

52 supplementary information concerning the mapping. The latter

53 includes for example the largest deviation of any position in the

54 input structure from its reference position (``drmax``), the average

55 deviation of the positions in the input structure from the

56 reference positions (``dravg``), and the strain tensor for the input

57 structure relative to the reference structure (``strain_tensor``).

59 The returned Atoms object provides further supplemental information via

60 custom per-atom arrays including the atomic displacements

61 (``Displacement``, ``Displacement_Magnitude``),the distances to the

62 three closest sites (``Minimum_Distances``), as well as a mapping

63 between the indices of the returned Atoms object and those of the input

64 structure (``IndexMapping``).

66 Note

67 ----

68 The input and reference structures should adhere to the same crystallographic setting.

69 In other words, they should not be rotated or translated with respect to each other.

71 Parameters

72 ----------

73 structure

74 Input structure, typically a relaxed structure.

75 reference

76 Reference structure, which can but need not be the primitive structure.

77 inert_species

78 List of chemical symbols (e.g., ``['Au', 'Pd']``) that are never

79 substituted for a vacancy. The number of inert sites is used to rescale

80 the volume of the input structure to match the reference structure.

81 tol_positions

82 Tolerance factor applied when scanning for overlapping positions in

83 Ångstrom (forwarded to :func:`ase.build.make_supercell`).

84 suppress_warnings

85 if ``True`` print no warnings of large strain or relaxation distances.

86 assume_no_cell_relaxation

87 If ``False`` volume and cell metric of the input structure are rescaled

88 to match the reference structure. This can be unnecessary (and

89 counterproductive) for some structures, e.g., with many vacancies.

91 Note

92 ----

93 When setting this parameter to ``False`` the reference cell metric

94 must be obtainable via an *integer* transformation matrix from the

95 reference cell metric. In other words the input structure should not

96 involve relaxations of the volume or the cell metric.

98 Example

99 -------

100 The following code snippet illustrates the general usage. It first creates

101 a primitive FCC cell, which is latter used as reference structure. To

102 emulate a relaxed structure obtained from, e.g., a density functional

103 theory calculation, the code then creates a 4x4x4 conventional FCC

104 supercell, which is populated with two different atom types, has distorted

105 cell vectors, and random displacements to the atoms. Finally, the present

106 function is used to map the structure back the ideal lattice::

107

108 >>> from ase.build import bulk

109 >>> reference = bulk('Au', a=4.09)

110 >>> structure = bulk('Au', cubic=True, a=4.09).repeat(4)

111 >>> structure.set_chemical_symbols(10 * ['Ag'] + (len(structure) - 10) * ['Au'])

112 >>> structure.set_cell(structure.cell * 1.02, scale_atoms=True)

113 >>> structure.rattle(0.1)

114 >>> mapped_structure, info = map_structure_to_reference(structure, reference)

115 """

116

117 # Obtain supercell of reference structure that is compatible

118 # with relaxed structure

119 reference_supercell, P = _get_reference_supercell(

120 structure, reference, inert_species=inert_species,

121 tol_positions=tol_positions, assume_no_cell_relaxation=assume_no_cell_relaxation)

122

123 # Calculate strain tensor

124 strain_tensor = calculate_strain_tensor(reference_supercell.cell, structure.cell)

125

126 # Symmetric matrix has real eigenvalues

127 eigenvalues, _ = np.linalg.eigh(strain_tensor)

128 volumetric_strain = sum(eigenvalues)

129

130 # Rescale the relaxed atoms object

131 structure_scaled = structure.copy()

132 structure_scaled.set_cell(reference_supercell.cell, scale_atoms=True)

133

134 # Match positions

135 mapped_structure, drmax, dravg, warning = _match_positions(structure_scaled,

136 reference_supercell)

137

138 if warning:

139 warnings = [warning]

140 else:

141 warnings = []

142

143 if not suppress_warnings:

144 s = 'Consider excluding this structure when training a cluster expansion.'

145 if assume_no_cell_relaxation:

146 trigger_levels = {'volumetric_strain': 1e-3,

147 'eigenvalue_diff': 1e-3}

148 else:

149 trigger_levels = {'volumetric_strain': 0.25,

150 'eigenvalue_diff': 0.1}

151 if abs(volumetric_strain) > trigger_levels['volumetric_strain']:

152 warnings.append('high_volumetric_strain')

153 logger.warning('High volumetric strain ({:.2f} %). {}'.format(

154 100 * volumetric_strain, s))

155 if max(eigenvalues) - min(eigenvalues) > trigger_levels['eigenvalue_diff']:

156 warnings.append('high_anisotropic_strain')

157 logger.warning('High anisotropic strain (the difference between '

158 'largest and smallest eigenvalues of strain tensor is '

159 '{:.5f}). {}'.format(max(eigenvalues) - min(eigenvalues), s))

160 if drmax > 1.0:

161 warnings.append('large_maximum_relaxation_distance')

162 logger.warning('Large maximum relaxation distance '

163 '({:.5f} Angstrom). {}'.format(drmax, s))

164 if dravg > 0.5:

165 warnings.append('large_average_relaxation_distance')

166 logger.warning('Large average relaxation distance '

167 '({:.5f} Angstrom). {}'.format(dravg, s))

168

169 # Populate dictionary with supplementary information

170 info = {'drmax': drmax,

171 'dravg': dravg,

172 'strain_tensor': strain_tensor,

173 'volumetric_strain': volumetric_strain,

174 'strain_tensor_eigenvalues': eigenvalues,

175 'warnings': warnings,

176 'transformation_matrix': P}

177

178 return mapped_structure, info

179

180

181def _get_reference_supercell(structure: Atoms,

182 reference: Atoms,

183 inert_species: List[str] = None,

184 tol_positions: float = 1e-5,

185 assume_no_cell_relaxation: bool = False) -> Tuple[Atoms, np.ndarray]:

186 """

187 Returns a tuple comprising a supercell of the reference structure that is compatible with the

188 cell metric of the input structure as well as the transformation matrix that maps from the

189 primitive to the supercell structure.

190

191 Parameters

192 ----------

193 structure

194 Input structure, typically a relaxed structure.

195 reference

196 Reference structure, which can but need not be the primitive structure.

197 inert_species

198 List of chemical symbols (e.g., ``['Au', 'Pd']``) that are never

199 substituted for a vacancy. The number of inert sites is used to rescale

200 of the input structure to match the reference structure.

201 tol_positions

202 Tolerance factor applied when scanning for overlapping positions in

203 Ångstrom (forwarded to :func:`ase.build.make_supercell`).

204 assume_no_cell_relaxation

205 If ``False`` volume and cell metric of the input structure are rescaled

206 to match the reference structure. This can be unnecessary (and

207 counterproductive) for some structures, e.g., with many vacancies.

208

209 Note

210 ----

211 When setting this parameter to ``False``, the input structure

212 must be obtainable via an *integer* transformation matrix from the

213 reference cell metric. In other words the input structure should not

214 involve relaxations of the volume or the cell metric.

215

216 Raises

217 ------

218 ValueError

219 If the boundary conditions of the reference and the input structure

220 do not match.

221 ValueError

222 If the transformation matrix deviates too strongly from the nearest

223 integer matrix.

224 ValueError

225 If :attr:`assume_no_cell_relaxation` is ``True`` but the input structure is

226 not obtainable via an integer transformation from the reference

227 cell metric.

228 """

229

230 if not np.all(reference.pbc == structure.pbc):

231 msg = 'The boundary conditions of reference and input structures do not match.'

232 msg += '\n reference: ' + str(reference.pbc)

233 msg += '\n input: ' + str(structure.pbc)

234 raise ValueError(msg)

235

236 # Step 1:

237 # rescale cell metric of relaxed cell to match volume per atom of reference cell

238 if not assume_no_cell_relaxation:

239 if inert_species is None:

240 n_ref = len(reference)

241 n_rlx = len(structure)

242 else:

243 n_ref = sum([reference.get_chemical_symbols().count(s) for s in inert_species])

244 n_rlx = sum([structure.get_chemical_symbols().count(s) for s in inert_species])

245 vol_scale = reference.get_volume() / n_ref

246 vol_scale /= structure.get_volume() / n_rlx

247 scaled_structure_cell = structure.cell * vol_scale ** (1 / 3)

248 else:

249 scaled_structure_cell = structure.cell

250

251 # Step 2:

252 # get transformation matrix

253 P = np.dot(scaled_structure_cell, np.linalg.inv(reference.cell))

254

255 # reduce the (real) transformation matrix to the nearest integer one

256 P = np.around(P)

257

258 # Step 3:

259 # generate supercell of reference structure

260 reference_supercell = make_supercell(reference, P, tol=tol_positions)

261

262 return reference_supercell, P

263

264

265def _match_positions(structure: Atoms, reference: Atoms) -> Tuple[Atoms, float, float]:

266 """Matches the atoms in the input :attr:`structure` to the sites in the

267 :attr:`reference` structure. The function returns tuple the first element of which

268 is a copy of the :attr:`reference` structure, in which the chemical species are

269 assigned to comply with the :attr:`structure`. The second and third element

270 of the tuple represent the maximum and average distance between relaxed and

271 reference sites.

272

273 Parameters

274 ----------

275 structure

276 Structure with relaxed positions.

277 reference

278 Structure with idealized positions.

279

280 Raises

281 ------

282 ValueError

283 If the cell metrics of the two input structures do not match.

284 ValueError

285 If the periodic boundary conditions of the two input structures do not match.

286 ValueError

287 If the input structure contains more atoms than the reference structure.

288 """

289

290 if not np.all(reference.pbc == structure.pbc):

291 msg = 'The boundary conditions of reference and relaxed structures do not match.'

292 msg += '\n reference: ' + str(reference.pbc)

293 msg += '\n relaxed: ' + str(structure.pbc)

294 raise ValueError(msg)

295

296 if len(structure) > len(reference):

297 msg = 'The relaxed structure contains more atoms than the reference structure.'

298 msg += '\n reference: ' + str(len(reference))

299 msg += '\n relaxed: ' + str(len(structure))

300 raise ValueError(msg)

301

302 if not np.all(np.isclose(reference.cell, structure.cell)):

303 msg = 'The cell metrics of reference and relaxed structures do not match.'

304 msg += '\n reference: ' + str(reference.cell)

305 msg += '\n relaxed: ' + str(structure.cell)

306 raise ValueError(msg)

307

308 # compute distances between reference and relaxed positions

309 _, dists = get_distances(reference.positions, structure.positions,

310 cell=reference.cell, pbc=reference.pbc)

311 # pad matrix with zeros to obtain square matrix

312 n, m = dists.shape

313 cost_matrix = np.pad(dists, ((0, 0), (0, abs(n - m))),

314 mode='constant', constant_values=0)

315 # find optimal mapping using Kuhn-Munkres (Hungarian) algorithm

316 row_ind, col_ind = linear_sum_assignment(cost_matrix)

317

318 # compile new configuration with supplementary information

319 mapped = reference.copy()

320 displacement_magnitudes = []

321 displacements = []

322 minimum_distances = []

323 n_dist_max = min(len(mapped), 3)

324 warning = None

325 for i, j in zip(row_ind, col_ind):

326 atom = mapped[i]

327 if j >= len(structure):

328 # vacant site in reference structure

329 atom.symbol = 'X'

330 displacement_magnitudes.append(None)

331 displacements.append(3 * [None])

332 minimum_distances.append(n_dist_max * [None])

333 else:

334 atom.symbol = structure[j].symbol

335 dvecs, drs = get_distances([structure[j].position],

336 [reference[i].position],

337 cell=reference.cell, pbc=reference.pbc)

338 displacement_magnitudes.append(drs[0][0])

339 displacements.append(dvecs[0][0])

340 # distances to the next three available sites

341 minimum_distances.append(sorted(dists[:, j])[:n_dist_max])

342 if len(structure) > 1:

343 if drs[0][0] > min(dists[:, j]) + 1e-6:

344 logger.warning('An atom was mapped to a site that was further '

345 'away than the closest site (that site was already '

346 'occupied by another atom).')

347 warning = 'possible_ambiguity_in_mapping'

348 elif minimum_distances[-1][0] > 0.9 * minimum_distances[-1][1]:

349 logger.warning('An atom was approximately equally far from its '

350 'two closest sites.')

351 warning = 'possible_ambiguity_in_mapping'

352

353 displacement_magnitudes = np.array(displacement_magnitudes, dtype=np.float64)

354 mapped.new_array('Displacement', displacements, float, shape=(3, ))

355 mapped.new_array('Displacement_Magnitude', displacement_magnitudes, float)

356 mapped.new_array('Minimum_Distances', minimum_distances,

357 float, shape=(n_dist_max,))

358 mapped.new_array('IndexMapping', col_ind, int)

359

360 drmax = np.nanmax(displacement_magnitudes)

361 dravg = np.nanmean(displacement_magnitudes)

362

363 return mapped, drmax, dravg, warning

Coverage for icet/tools/structure_mapping.py: 100%

117 statements