Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

from typing import List, Tuple 

 

import numpy as np 

 

from ase import Atoms 

from ase.build import make_supercell 

from ase.geometry import get_distances 

from scipy.optimize import linear_sum_assignment 

from icet.io.logging import logger 

 

 

def map_structure_to_reference(relaxed: Atoms, 

reference: Atoms, 

inert_species: List[str] = None, 

tol_positions: float = 1e-4, 

suppress_warnings: bool = False, 

assume_no_cell_relaxation: bool = False) \ 

-> Tuple[Atoms, dict]: 

"""Maps a relaxed structure onto a reference structure. 

The function returns a tuple comprising the ideal supercell most 

closely matching the relaxed structure and a dictionary with 

supplementary information concerning the mapping. The latter 

includes for example the largest deviation of any position in the 

relaxed structure from its reference position (`drmax`), the average 

deviation of the positions in the relaxed structure from the 

reference positions (`dravg`), and the strain tensor for the relaxed 

structure relative to the reference structure (`strain_tensor`). 

 

The Atoms object that provide further supplemental information via 

custom per-atom arrays including the atomic displacements 

(`Displacement`, `Displacement_Magnitude`) as well as the 

distances to the three closest sites (`Minimum_Distances`). 

 

Parameters 

---------- 

relaxed 

relaxed input structure 

reference 

reference structure, which can but need not be the primitive structure 

inert_species 

list of chemical symbols (e.g., ``['Au', 'Pd']``) that are never 

substituted for a vacancy; the number of inert sites is used to rescale 

the volume of the relaxed structure to match the reference structure. 

tol_positions 

tolerance factor applied when scanning for overlapping positions in 

Angstrom (forwarded to :func:`ase.build.make_supercell`) 

suppress_warnings 

if True, print no warnings of large strain or relaxation distances 

assume_no_cell_relaxation 

if False volume and cell metric of the relaxed structure are rescaled 

to match the reference structure; this can be unnecessary (and 

counterproductive) for some structures, e.g., with many vacancies 

 

**Note**: When setting this parameter to False the reference cell metric 

must be obtainable via an *integer* transformation matrix from the 

reference cell metric. In other words the relaxed structure should not 

involve relaxations of the volume or the cell metric. 

 

Example 

------- 

The following code snippet illustrates the general usage. It first creates 

a primitive FCC cell, which is latter used as reference structure. To 

emulate a relaxed structure obtained from, e.g., a density functional 

theory calculation, the code then creates a 4x4x4 conventional FCC 

supercell, which is populated with two different atom types, has distorted 

cell vectors, and random displacements to the atoms. Finally, the present 

function is used to map the structure back the ideal lattice:: 

 

from icet.tools import map_structure_to_reference 

from ase.build import bulk 

reference = bulk('Au', a=4.09) 

structure = bulk('Au', cubic=True, a=4.09).repeat(4) 

structure.set_chemical_symbols(10 * ['Ag'] + (len(structure) - 10) * ['Au']) 

structure.set_cell(structure.cell * 1.02, scale_atoms=True) 

structure.rattle(0.1) 

mapped_structure, info = map_structure_to_reference(structure, reference) 

 

""" 

 

# Obtain supercell of reference structure that is compatible 

# with relaxed structure 

reference_supercell = _get_reference_supercell( 

relaxed, reference, inert_species=inert_species, 

tol_positions=tol_positions, assume_no_cell_relaxation=assume_no_cell_relaxation) 

 

# Compute strain 

epsilon = np.dot(relaxed.cell.T - reference_supercell.cell.T, 

np.linalg.inv(reference_supercell.cell.T)) 

strain_tensor = 0.5 * (epsilon + epsilon.T) 

eigenvalues = np.linalg.eig(strain_tensor)[0] 

volumetric_strain = sum(eigenvalues) 

 

# Rescale the relaxed atoms object 

relaxed_scaled = relaxed.copy() 

relaxed_scaled.set_cell(reference_supercell.cell, scale_atoms=True) 

 

# Match positions 

mapped_structure, drmax, dravg = _match_positions(relaxed_scaled, reference_supercell) 

 

if not suppress_warnings: 

s = 'Consider excluding this structure when training a cluster expansion.' 

if assume_no_cell_relaxation: 

trigger_levels = {'volumetric_strain': 1e-3, 

'eigenvalue_diff': 1e-3} 

else: 

trigger_levels = {'volumetric_strain': 0.25, 

'eigenvalue_diff': 0.1} 

if abs(volumetric_strain) > trigger_levels['volumetric_strain']: 

logger.warning('High volumetric strain ({:.2f} %). {}'.format( 

100 * volumetric_strain, s)) 

if max(eigenvalues) - min(eigenvalues) > trigger_levels['eigenvalue_diff']: 

logger.warning('High anisotropic strain (the difference between ' 

'largest and smallest eigenvalues of strain tensor is ' 

'{:.5f}). {}'.format(max(eigenvalues) - min(eigenvalues), s)) 

if drmax > 1.0: 

logger.warning('Large maximum relaxation distance ' 

'({:.5f} Angstrom). {}'.format(drmax, s)) 

if dravg > 0.5: 

logger.warning('Large average relaxation distance ' 

'({:.5f} Angstrom). {}'.format(dravg, s)) 

 

# Populate dictionary with supplementary information 

info = {'drmax': drmax, 

'dravg': dravg, 

'strain_tensor': strain_tensor, 

'volumetric_strain': volumetric_strain, 

'strain_tensor_eigenvalues': eigenvalues} 

 

return mapped_structure, info 

 

 

def _get_reference_supercell(relaxed: Atoms, 

reference: Atoms, 

inert_species: List[str] = None, 

tol_positions: float = 1e-5, 

assume_no_cell_relaxation: bool = False) -> Atoms: 

""" 

Returns a supercell of the reference structure that is compatible with the 

cell metric of the relaxed structure. 

 

Parameters 

---------- 

relaxed 

relaxed input structure 

reference 

reference structure, which can but need not be the primitive structure 

inert_species 

list of chemical symbols (e.g., ``['Au', 'Pd']``) that are never 

substituted for a vacancy; the number of inert sites is used to rescale 

of the relaxed structure to match the reference structure. 

tol_positions 

tolerance factor applied when scanning for overlapping positions in 

Angstrom (forwarded to :func:`ase.build.make_supercell`) 

assume_no_cell_relaxation 

if False volume and cell metric of the relaxed structure are rescaled 

to match the reference structure; this can be unnecessary (and 

counterproductive) for some structures, e.g., with many vacancies 

 

**Note**: When setting this parameter to False relaxed structure 

must be obtainable via an *integer* transformation matrix from the 

reference cell metric. In other words the relaxed structure should not 

involve relaxations of the volume or the cell metric. 

 

Raises 

------ 

ValueError 

if the boundary conditions of the reference and the relaxed structure 

do not match 

ValueError 

if the transformation matrix deviates too strongly from the nearest 

integer matrix 

ValueError 

if assume_no_cell_relaxation is True but the relaxed structure is 

not obtainable via an integer transformation from the reference 

cell metric 

""" 

 

if not np.all(reference.pbc == relaxed.pbc): 

msg = 'The boundary conditions of reference and relaxed structures do not match.' 

msg += '\n reference: ' + str(reference.pbc) 

msg += '\n relaxed: ' + str(relaxed.pbc) 

raise ValueError(msg) 

 

# Step 1: 

# rescale cell metric of relaxed cell to match volume per atom of reference cell 

if not assume_no_cell_relaxation: 

if inert_species is None: 

n_ref = len(reference) 

n_rlx = len(relaxed) 

else: 

n_ref = sum([reference.get_chemical_symbols().count(s) for s in inert_species]) 

n_rlx = sum([relaxed.get_chemical_symbols().count(s) for s in inert_species]) 

vol_scale = reference.get_volume() / n_ref 

vol_scale /= relaxed.get_volume() / n_rlx 

scaled_relaxed_cell = relaxed.cell * vol_scale ** (1 / 3) 

else: 

scaled_relaxed_cell = relaxed.cell 

 

# Step 2: 

# get transformation matrix 

P = np.dot(scaled_relaxed_cell, np.linalg.inv(reference.cell)) 

 

# reduce the (real) transformation matrix to the nearest integer one 

P = np.around(P) 

 

# Step 3: 

# generate supercell of reference structure 

reference_supercell = make_supercell(reference, P, tol=tol_positions) 

 

return reference_supercell 

 

 

def _match_positions(relaxed: Atoms, reference: Atoms) -> Tuple[Atoms, float, float]: 

"""Matches the atoms in the `relaxed` structure to the sites in the 

`reference` structure. The function returns tuple the first element of which 

is a copy of the `reference` structure, in which the chemical species are 

assigned to comply with the `relaxed` structure. The second and third element 

of the tuple represent the maximum and average distance between relaxed and 

reference sites. 

 

Parameters 

---------- 

relaxed 

structure with relaxed positions 

reference 

structure with idealized positions 

 

Raises 

------ 

ValueError 

if the cell metrics of the two input structures do not match 

ValueError 

if the periodic boundary conditions of the two input structures do not match 

ValueError 

if the relaxed structure contains more atoms than the reference structure 

""" 

 

if not np.all(reference.pbc == relaxed.pbc): 

msg = 'The boundary conditions of reference and relaxed structures do not match.' 

msg += '\n reference: ' + str(reference.pbc) 

msg += '\n relaxed: ' + str(relaxed.pbc) 

raise ValueError(msg) 

 

if len(relaxed) > len(reference): 

msg = 'The relaxed structure contains more atoms than the reference structure.' 

msg += '\n reference: ' + str(len(reference)) 

msg += '\n relaxed: ' + str(len(relaxed)) 

raise ValueError(msg) 

 

if not np.all(np.isclose(reference.cell, relaxed.cell)): 

msg = 'The cell metrics of reference and relaxed structures do not match.' 

msg += '\n reference: ' + str(reference.cell) 

msg += '\n relaxed: ' + str(relaxed.cell) 

raise ValueError(msg) 

 

# compute distances between reference and relaxed positions 

_, dists = get_distances(reference.positions, relaxed.positions, 

cell=reference.cell, pbc=reference.pbc) 

# pad matrix with zeros to obtain square matrix 

n, m = dists.shape 

cost_matrix = np.pad(dists, ((0, 0), (0, abs(n - m))), 

mode='constant', constant_values=0) 

# find optimal mapping using Kuhn-Munkres (Hungarian) algorithm 

row_ind, col_ind = linear_sum_assignment(cost_matrix) 

 

# compile new configuration with supplementary information 

mapped = reference.copy() 

displacement_magnitudes = [] 

displacements = [] 

minimum_distances = [] 

for i, j in zip(row_ind, col_ind): 

atom = mapped[i] 

if j >= len(relaxed): 

# vacant site in reference structure 

atom.symbol = 'X' 

displacement_magnitudes.append(None) 

displacements.append(3 * [None]) 

minimum_distances.append(3 * [None]) 

else: 

atom.symbol = relaxed[j].symbol 

dvecs, drs = get_distances([relaxed[j].position], 

[reference[i].position], 

cell=reference.cell, pbc=reference.pbc) 

displacement_magnitudes.append(drs[0][0]) 

displacements.append(dvecs[0][0]) 

# distances to the next three available sites 

minimum_distances.append(sorted(dists[:, j])[:3]) 

 

displacement_magnitudes = np.array(displacement_magnitudes, dtype=np.float64) 

mapped.new_array('Displacement', displacements, float, (3, )) 

mapped.new_array('Displacement_Magnitude', displacement_magnitudes, float) 

mapped.new_array('Minimum_Distances', minimum_distances, 

float, (min(3, len(mapped)), )) 

 

drmax = np.nanmax(displacement_magnitudes) 

dravg = np.nanmean(displacement_magnitudes) 

 

return mapped, drmax, dravg