r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from typing import List, Tuple

3import numpy as np

5from ase import Atoms

6from ase.build import make_supercell

7from ase.geometry import get_distances

8from scipy.optimize import linear_sum_assignment

9from icet.input_output.logging_tools import logger

10import scipy.linalg

13def calculate_strain_tensor(A: np.ndarray,

14 B: np.ndarray) -> np.ndarray:

15 """Calculates the strain tensor for mapping a cell A onto cell B. The

16 strain calculated is the Biot strain tensor and is rotationally invariant.

18 Parameters

19 ----------

20 A

21 reference cell (row-major format)

22 B

23 target cell (row-major format)

25 Returns

26 -------

27 strain_tensor

28 Biot strain tensor (symmetric matrix)

29 """

30 assert A.shape == (3, 3)

31 assert B.shape == (3, 3)

33 # Calculate deformation gradient (F) in column-major format

34 F = np.linalg.solve(A, B).T

36 # Calculate right stretch tensor (U)

37 _, U = scipy.linalg.polar(F)

39 # return Biot strain tensor

40 return U - np.eye(3)

43def map_structure_to_reference(structure: Atoms,

44 reference: Atoms,

45 inert_species: List[str] = None,

46 tol_positions: float = 1e-4,

47 suppress_warnings: bool = False,

48 assume_no_cell_relaxation: bool = False) \

49 -> Tuple[Atoms, dict]:

50 """Maps a structure onto a reference structure.

51 This is often desirable when, for example, a structure has been

52 relaxed using DFT, and one wants to use it as a training structure

53 in a cluster expansion.

55 The function returns a tuple comprising the ideal supercell most

56 closely matching the input structure and a dictionary with

57 supplementary information concerning the mapping. The latter

58 includes for example the largest deviation of any position in the

59 input structure from its reference position (`drmax`), the average

60 deviation of the positions in the input structure from the

61 reference positions (`dravg`), and the strain tensor for the input

62 structure relative to the reference structure (`strain_tensor`).

64 The Atoms object that provide further supplemental information via

65 custom per-atom arrays including the atomic displacements

66 (`Displacement`, `Displacement_Magnitude`) as well as the

67 distances to the three closest sites (`Minimum_Distances`).

69 Parameters

70 ----------

71 structure

72 input structure, typically a relaxed structure

73 reference

74 reference structure, which can but need not be the primitive structure

75 inert_species

76 list of chemical symbols (e.g., ``['Au', 'Pd']``) that are never

77 substituted for a vacancy; the number of inert sites is used to rescale

78 the volume of the input structure to match the reference structure.

79 tol_positions

80 tolerance factor applied when scanning for overlapping positions in

81 Angstrom (forwarded to :func:`ase.build.make_supercell`)

82 suppress_warnings

83 if True, print no warnings of large strain or relaxation distances

84 assume_no_cell_relaxation

85 if False volume and cell metric of the input structure are rescaled

86 to match the reference structure; this can be unnecessary (and

87 counterproductive) for some structures, e.g., with many vacancies

89 **Note**: When setting this parameter to False the reference cell metric

90 must be obtainable via an *integer* transformation matrix from the

91 reference cell metric. In other words the input structure should not

92 involve relaxations of the volume or the cell metric.

94 Example

95 -------

96 The following code snippet illustrates the general usage. It first creates

97 a primitive FCC cell, which is latter used as reference structure. To

98 emulate a relaxed structure obtained from, e.g., a density functional

99 theory calculation, the code then creates a 4x4x4 conventional FCC

100 supercell, which is populated with two different atom types, has distorted

101 cell vectors, and random displacements to the atoms. Finally, the present

102 function is used to map the structure back the ideal lattice::

104 >>> from ase.build import bulk

105 >>> reference = bulk('Au', a=4.09)

106 >>> structure = bulk('Au', cubic=True, a=4.09).repeat(4)

107 >>> structure.set_chemical_symbols(10 * ['Ag'] + (len(structure) - 10) * ['Au'])

108 >>> structure.set_cell(structure.cell * 1.02, scale_atoms=True)

109 >>> structure.rattle(0.1)

110 >>> mapped_structure, info = map_structure_to_reference(structure, reference)

111 """

113 # Obtain supercell of reference structure that is compatible

114 # with relaxed structure

115 reference_supercell = _get_reference_supercell(

116 structure, reference, inert_species=inert_species,

117 tol_positions=tol_positions, assume_no_cell_relaxation=assume_no_cell_relaxation)

119 # Calculate strain tensor

120 strain_tensor = calculate_strain_tensor(reference_supercell.cell, structure.cell)

122 # Symmetric matrix has real eigenvalues

123 eigenvalues, _ = np.linalg.eigh(strain_tensor)

124 volumetric_strain = sum(eigenvalues)

126 # Rescale the relaxed atoms object

127 structure_scaled = structure.copy()

128 structure_scaled.set_cell(reference_supercell.cell, scale_atoms=True)

130 # Match positions

131 mapped_structure, drmax, dravg, warning = _match_positions(structure_scaled,

132 reference_supercell)

134 if warning:

135 warnings = [warning]

136 else:

137 warnings = []

139 if not suppress_warnings:

140 s = 'Consider excluding this structure when training a cluster expansion.'

141 if assume_no_cell_relaxation:

142 trigger_levels = {'volumetric_strain': 1e-3,

143 'eigenvalue_diff': 1e-3}

144 else:

145 trigger_levels = {'volumetric_strain': 0.25,

146 'eigenvalue_diff': 0.1}

147 if abs(volumetric_strain) > trigger_levels['volumetric_strain']:

148 warnings.append('high_volumetric_strain')

149 logger.warning('High volumetric strain ({:.2f} %). {}'.format(

150 100 * volumetric_strain, s))

151 if max(eigenvalues) - min(eigenvalues) > trigger_levels['eigenvalue_diff']:

152 warnings.append('high_anisotropic_strain')

153 logger.warning('High anisotropic strain (the difference between '

154 'largest and smallest eigenvalues of strain tensor is '

155 '{:.5f}). {}'.format(max(eigenvalues) - min(eigenvalues), s))

156 if drmax > 1.0:

157 warnings.append('large_maximum_relaxation_distance')

158 logger.warning('Large maximum relaxation distance '

159 '({:.5f} Angstrom). {}'.format(drmax, s))

160 if dravg > 0.5:

161 warnings.append('large_average_relaxation_distance')

162 logger.warning('Large average relaxation distance '

163 '({:.5f} Angstrom). {}'.format(dravg, s))

165 # Populate dictionary with supplementary information

166 info = {'drmax': drmax,

167 'dravg': dravg,

168 'strain_tensor': strain_tensor,

169 'volumetric_strain': volumetric_strain,

170 'strain_tensor_eigenvalues': eigenvalues,

171 'warnings': warnings}

173 return mapped_structure, info

176def _get_reference_supercell(structure: Atoms,

177 reference: Atoms,

178 inert_species: List[str] = None,

179 tol_positions: float = 1e-5,

180 assume_no_cell_relaxation: bool = False) -> Atoms:

181 """

182 Returns a supercell of the reference structure that is compatible with the

183 cell metric of the input structure.

185 Parameters

186 ----------

187 structure

188 input structure, typically a relaxed structure

189 reference

190 reference structure, which can but need not be the primitive structure

191 inert_species

192 list of chemical symbols (e.g., ``['Au', 'Pd']``) that are never

193 substituted for a vacancy; the number of inert sites is used to rescale

194 of the input structure to match the reference structure.

195 tol_positions

196 tolerance factor applied when scanning for overlapping positions in

197 Angstrom (forwarded to :func:`ase.build.make_supercell`)

198 assume_no_cell_relaxation

199 if False volume and cell metric of the input structure are rescaled

200 to match the reference structure; this can be unnecessary (and

201 counterproductive) for some structures, e.g., with many vacancies

203 **Note**: When setting this parameter to False, the input structure

204 must be obtainable via an *integer* transformation matrix from the

205 reference cell metric. In other words the input structure should not

206 involve relaxations of the volume or the cell metric.

208 Raises

209 ------

210 ValueError

211 if the boundary conditions of the reference and the input structure

212 do not match

213 ValueError

214 if the transformation matrix deviates too strongly from the nearest

215 integer matrix

216 ValueError

217 if assume_no_cell_relaxation is True but the input structure is

218 not obtainable via an integer transformation from the reference

219 cell metric

220 """

222 if not np.all(reference.pbc == structure.pbc):

223 msg = 'The boundary conditions of reference and input structures do not match.'

224 msg += '\n reference: ' + str(reference.pbc)

225 msg += '\n input: ' + str(structure.pbc)

226 raise ValueError(msg)

228 # Step 1:

229 # rescale cell metric of relaxed cell to match volume per atom of reference cell

230 if not assume_no_cell_relaxation:

231 if inert_species is None:

232 n_ref = len(reference)

233 n_rlx = len(structure)

234 else:

235 n_ref = sum([reference.get_chemical_symbols().count(s) for s in inert_species])

236 n_rlx = sum([structure.get_chemical_symbols().count(s) for s in inert_species])

237 vol_scale = reference.get_volume() / n_ref

238 vol_scale /= structure.get_volume() / n_rlx

239 scaled_structure_cell = structure.cell * vol_scale ** (1 / 3)

240 else:

241 scaled_structure_cell = structure.cell

243 # Step 2:

244 # get transformation matrix

245 P = np.dot(scaled_structure_cell, np.linalg.inv(reference.cell))

247 # reduce the (real) transformation matrix to the nearest integer one

248 P = np.around(P)

250 # Step 3:

251 # generate supercell of reference structure

252 reference_supercell = make_supercell(reference, P, tol=tol_positions)

254 return reference_supercell

257def _match_positions(structure: Atoms, reference: Atoms) -> Tuple[Atoms, float, float]:

258 """Matches the atoms in the input `structure` to the sites in the

259 `reference` structure. The function returns tuple the first element of which

260 is a copy of the `reference` structure, in which the chemical species are

261 assigned to comply with the `structure`. The second and third element

262 of the tuple represent the maximum and average distance between relaxed and

263 reference sites.

265 Parameters

266 ----------

267 structure

268 structure with relaxed positions

269 reference

270 structure with idealized positions

272 Raises

273 ------

274 ValueError

275 if the cell metrics of the two input structures do not match

276 ValueError

277 if the periodic boundary conditions of the two input structures do not match

278 ValueError

279 if the input structure contains more atoms than the reference structure

280 """

282 if not np.all(reference.pbc == structure.pbc):

283 msg = 'The boundary conditions of reference and relaxed structures do not match.'

284 msg += '\n reference: ' + str(reference.pbc)

285 msg += '\n relaxed: ' + str(structure.pbc)

286 raise ValueError(msg)

288 if len(structure) > len(reference):

289 msg = 'The relaxed structure contains more atoms than the reference structure.'

290 msg += '\n reference: ' + str(len(reference))

291 msg += '\n relaxed: ' + str(len(structure))

292 raise ValueError(msg)

294 if not np.all(np.isclose(reference.cell, structure.cell)):

295 msg = 'The cell metrics of reference and relaxed structures do not match.'

296 msg += '\n reference: ' + str(reference.cell)

297 msg += '\n relaxed: ' + str(structure.cell)

298 raise ValueError(msg)

300 # compute distances between reference and relaxed positions

301 _, dists = get_distances(reference.positions, structure.positions,

302 cell=reference.cell, pbc=reference.pbc)

303 # pad matrix with zeros to obtain square matrix

304 n, m = dists.shape

305 cost_matrix = np.pad(dists, ((0, 0), (0, abs(n - m))),

306 mode='constant', constant_values=0)

307 # find optimal mapping using Kuhn-Munkres (Hungarian) algorithm

308 row_ind, col_ind = linear_sum_assignment(cost_matrix)

310 # compile new configuration with supplementary information

311 mapped = reference.copy()

312 displacement_magnitudes = []

313 displacements = []

314 minimum_distances = []

315 n_dist_max = min(len(mapped), 3)

316 warning = None

317 for i, j in zip(row_ind, col_ind):

318 atom = mapped[i]

319 if j >= len(structure):

320 # vacant site in reference structure

321 atom.symbol = 'X'

322 displacement_magnitudes.append(None)

323 displacements.append(3 * [None])

324 minimum_distances.append(n_dist_max * [None])

325 else:

326 atom.symbol = structure[j].symbol

327 dvecs, drs = get_distances([structure[j].position],

328 [reference[i].position],

329 cell=reference.cell, pbc=reference.pbc)

330 displacement_magnitudes.append(drs[0][0])

331 displacements.append(dvecs[0][0])

332 # distances to the next three available sites

333 minimum_distances.append(sorted(dists[:, j])[:n_dist_max])

334 if len(structure) > 1:

335 if drs[0][0] > min(dists[:, j]) + 1e-6:

336 logger.warning('An atom was mapped to a site that was further '

337 'away than the closest site (that site was already '

338 'occupied by another atom).')

339 warning = 'possible_ambiguity_in_mapping'

340 elif minimum_distances[-1][0] > 0.9 * minimum_distances[-1][1]:

341 logger.warning('An atom was approximately equally far from its '

342 'two closest sites.')

343 warning = 'possible_ambiguity_in_mapping'

345 displacement_magnitudes = np.array(displacement_magnitudes, dtype=np.float64)

346 mapped.new_array('Displacement', displacements, float, shape=(3, ))

347 mapped.new_array('Displacement_Magnitude', displacement_magnitudes, float)

348 mapped.new_array('Minimum_Distances', minimum_distances,

349 float, shape=(n_dist_max,))

351 drmax = np.nanmax(displacement_magnitudes)

352 dravg = np.nanmean(displacement_magnitudes)

354 return mapped, drmax, dravg, warning