Coverage for icet/tools/structure_generation.py: 99%

171 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-12-26 04:12 +0000

1import itertools 

2from typing import List, Dict, Tuple, Union 

3import numpy as np 

4from mchammer.ensembles import TargetClusterVectorAnnealing 

5from mchammer.calculators import (TargetVectorCalculator, 

6 compare_cluster_vectors) 

7from icet.tools import (enumerate_structures, 

8 enumerate_supercells) 

9from icet import ClusterSpace 

10from icet.input_output.logging_tools import logger 

11from ase import Atoms 

12from ase.data import chemical_symbols as periodic_table 

13 

14 

15def generate_target_structure_from_supercells(cluster_space: ClusterSpace, 

16 supercells: List[Atoms], 

17 target_concentrations: dict, 

18 target_cluster_vector: List[float], 

19 T_start: float = 5.0, 

20 T_stop: float = 0.001, 

21 n_steps: int = None, 

22 optimality_weight: float = 1.0, 

23 random_seed: int = None, 

24 random_start: bool = True, 

25 tol: float = 1e-5) -> Atoms: 

26 """ 

27 Given a :attr:`cluster_space` and a :attr:`target_cluster_vector` and one 

28 or more :attr:`supercells`, generate a structure that as closely as 

29 possible matches that cluster vector. 

30 

31 Internally the function uses a simulated annealing algorithm and the 

32 difference between two cluster vectors is calculated with the 

33 measure suggested by A. van de Walle et al. in Calphad **42**, 13-18 

34 (2013) [WalTiwJon13]_ (for more information, see 

35 :class:`mchammer.calculators.TargetVectorCalculator`). 

36 

37 Parameters 

38 ---------- 

39 cluster_space 

40 A cluster space defining the lattice to be occupied. 

41 supercells 

42 List of one or more supercells among which an optimal 

43 structure will be searched for. 

44 target_concentrations 

45 Concentration of each species in the target structure, per 

46 sublattice (for example ``{'Au': 0.5, 'Pd': 0.5}`` for a 

47 single sublattice Au-Pd structure, or 

48 ``{'A': {'Au': 0.5, 'Pd': 0.5}, 'B': {'H': 0.25, 'X': 0.75}}`` 

49 for a system with two sublattices. 

50 The symbols defining sublattices ('A', 'B' etc) can be 

51 found by printing the :attr:`cluster_space`. 

52 target_cluster_vector 

53 Cluster vector that the generated structure should match as 

54 closely as possible. 

55 T_start 

56 Artificial temperature at which the simulated annealing starts. 

57 T_stop 

58 Artifical temperature at which the simulated annealing stops. 

59 n_steps 

60 Total number of Monte Carlo steps in the simulation. 

61 optimality_weight 

62 Controls weighting :math:`L` of perfect correlations, see 

63 :class:`mchammer.calculators.TargetVectorCalculator`. 

64 random_seed 

65 Seed for the random number generator used in the Monte Carlo simulation 

66 and used for initializing the occupation of the supercells if 

67 random_start is ``True``. 

68 random_start 

69 Randomly occupy starting structure, can be disabled 

70 if the user prefers to pass an initial structure. 

71 tol 

72 Numerical tolerance. 

73 """ 

74 target_concentrations = _validate_concentrations(target_concentrations, cluster_space) 

75 

76 calculators = [] 

77 

78 # Loop over all supercells and intialize 

79 # them with a "random" occupation of symbols that 

80 # fulfill the target concentrations 

81 valid_supercells = [] 

82 warning_issued = False 

83 for supercell in supercells: 

84 supercell_copy = supercell.copy() 

85 if random_start: 85 ↛ 96line 85 didn't jump to line 96, because the condition on line 85 was never false

86 try: 

87 occupy_structure_randomly(supercell_copy, cluster_space, 

88 target_concentrations, 

89 random_seed) 

90 except ValueError: 

91 if not warning_issued: 

92 logger.warning('At least one supercell was not commensurate with the specified ' 

93 'target concentrations.') 

94 warning_issued = True 

95 continue 

96 valid_supercells.append(supercell_copy) 

97 calculators.append(TargetVectorCalculator(supercell_copy, cluster_space, 

98 target_cluster_vector, 

99 optimality_weight=optimality_weight, 

100 optimality_tol=tol)) 

101 

102 if len(valid_supercells) == 0: 

103 raise ValueError('No supercells that may host the specified ' 

104 'target_concentrations were supplied.') 

105 

106 ens = TargetClusterVectorAnnealing(structure=valid_supercells, calculators=calculators, 

107 T_start=T_start, T_stop=T_stop, 

108 random_seed=random_seed) 

109 return ens.generate_structure(number_of_trial_steps=n_steps) 

110 

111 

112def generate_target_structure(cluster_space: ClusterSpace, 

113 max_size: int, 

114 target_concentrations: dict, 

115 target_cluster_vector: List[float], 

116 include_smaller_cells: bool = True, 

117 pbc: Union[Tuple[bool, bool, bool], Tuple[int, int, int]] = None, 

118 T_start: float = 5.0, 

119 T_stop: float = 0.001, 

120 n_steps: int = None, 

121 optimality_weight: float = 1.0, 

122 random_seed: int = None, 

123 tol: float = 1e-5) -> Atoms: 

124 """ 

125 Given a :attr:`cluster_space` and a :attr:`target_cluster_vector`, generate 

126 a structure that as closely as possible matches that cluster vector. 

127 The search is performed among all inequivalent supercells shapes up 

128 to a certain size. 

129 

130 Internally the function uses a simulated annealing algorithm and the 

131 difference between two cluster vectors is calculated with the 

132 measure suggested by A. van de Walle et al. in Calphad **42**, 13-18 

133 (2013) [WalTiwJon13]_ (for more information, see 

134 :class:`mchammer.calculators.TargetVectorCalculator`). 

135 

136 Parameters 

137 ---------- 

138 cluster_space 

139 Cluster space defining the lattice to be occupied. 

140 max_size 

141 Maximum supercell size. 

142 target_concentrations 

143 Concentration of each species in the target structure, per 

144 sublattice (for example ``{'Au': 0.5, 'Pd': 0.5}`` for a 

145 single sublattice Au-Pd structure, or 

146 ``{'A': {'Au': 0.5, 'Pd': 0.5}, 'B': {'H': 0.25, 'X': 0.75}}`` 

147 for a system with two sublattices. 

148 The symbols defining sublattices ('A', 'B' etc) can be 

149 found by printing the :attr:`cluster_space`. 

150 target_cluster_vector 

151 Cluster vector that the generated structure should match as 

152 closely as possible. 

153 include_smaller_cells 

154 If ``True``, search among all supercell sizes including 

155 :attr:`max_size`, else search only among those exactly matching 

156 :attr:`max_size` 

157 pbc 

158 Periodic boundary conditions for each direction, e.g., 

159 ``(True, True, False)``. The axes are defined by 

160 the cell of :attr:`cluster_space.primitive_structure``. 

161 Default is periodic boundary in all directions. 

162 T_start 

163 Artificial temperature at which the simulated annealing starts. 

164 T_stop 

165 Artifical temperature at which the simulated annealing stops. 

166 n_steps 

167 Total number of Monte Carlo steps in the simulation. 

168 optimality_weight 

169 Controls weighting :math:`L` of perfect correlations, see 

170 :class:`mchammer.calculators.TargetVectorCalculator`. 

171 random_seed 

172 Seed for the random number generator used in the Monte Carlo simulation. 

173 tol 

174 Numerical tolerance. 

175 """ 

176 target_concentrations = _validate_concentrations(target_concentrations, cluster_space) 

177 

178 if pbc is None: 

179 pbc = (True, True, True) 

180 

181 prim = cluster_space.primitive_structure 

182 prim.set_pbc(pbc) 

183 

184 supercells = [] 

185 if include_smaller_cells: 

186 sizes = list(range(1, max_size + 1)) 

187 else: 

188 sizes = [max_size] 

189 

190 for size in sizes: 

191 # For efficiency, make a first check that the current size is 

192 # commensurate with all concentrations (example: {'Au': 0.5, 

193 # 'Pd': 0.5} would not be commensurate with a supercell with 3 

194 # atoms). 

195 supercell = cluster_space.primitive_structure.repeat((size, 1, 1)) 

196 if not _concentrations_fit_structure(structure=supercell, 

197 cluster_space=cluster_space, 

198 concentrations=target_concentrations): 

199 continue 

200 

201 # Loop over all inequivalent supercells and intialize 

202 # them with a "random" occupation of symbols that 

203 # fulfill the target concentrations 

204 for supercell in enumerate_supercells(prim, [size]): 

205 supercell.set_pbc(True) 

206 supercells.append(supercell) 

207 

208 return generate_target_structure_from_supercells(cluster_space=cluster_space, 

209 supercells=supercells, 

210 target_concentrations=target_concentrations, 

211 target_cluster_vector=target_cluster_vector, 

212 T_start=T_start, 

213 T_stop=T_stop, 

214 n_steps=n_steps, 

215 optimality_weight=optimality_weight, 

216 random_seed=random_seed, 

217 tol=tol) 

218 

219 

220def generate_sqs_from_supercells(cluster_space: ClusterSpace, 

221 supercells: List[Atoms], 

222 target_concentrations: dict, 

223 T_start: float = 5.0, 

224 T_stop: float = 0.001, 

225 n_steps: int = None, 

226 optimality_weight: float = 1.0, 

227 random_seed: int = None, 

228 random_start: bool = True, 

229 tol: float = 1e-5) -> Atoms: 

230 """ 

231 Given a :attr:`cluster_space`` and one or more :attr:`supercells`, generate 

232 a special quasirandom structure (SQS), i.e., a structure that for 

233 the provided supercells size provides the best possible 

234 approximation to a random alloy [ZunWeiFer90]_. 

235 

236 In the present case, this means that the generated structure will 

237 have a cluster vector that as closely as possible matches the 

238 cluster vector of an infintely large randomly occupied supercell. 

239 Internally the function uses a simulated annealing algorithm and the 

240 difference between two cluster vectors is calculated with the 

241 measure suggested by A. van de Walle et al. in Calphad **42**, 13-18 

242 (2013) [WalTiwJon13]_ (for more information, see 

243 :class:`mchammer.calculators.TargetVectorCalculator`). 

244 

245 Parameters 

246 ---------- 

247 cluster_space 

248 Cluster space defining the lattice to be occupied. 

249 supercells 

250 List of one or more supercells among which an optimal 

251 structure will be searched for. 

252 target_concentrations 

253 Concentration of each species in the target structure, per 

254 sublattice (for example ``{'Au': 0.5, 'Pd': 0.5}`` for a 

255 single sublattice Au-Pd structure, or 

256 ``{'A': {'Au': 0.5, 'Pd': 0.5}, 'B': {'H': 0.25, 'X': 0.75}}`` 

257 for a system with two sublattices. 

258 The symbols defining sublattices ('A', 'B' etc) can be 

259 found by printing the :attr:`cluster_space`. 

260 T_start 

261 Artificial temperature at which the simulated annealing starts. 

262 T_stop 

263 Artifical temperature at which the simulated annealing stops. 

264 n_steps 

265 Total number of Monte Carlo steps in the simulation. 

266 optimality_weight 

267 Controls weighting :math:`L` of perfect correlations, see 

268 :class:`mchammer.calculators.TargetVectorCalculator`. 

269 random_seed 

270 Seed for the random number generator used in the Monte Carlo simulation 

271 and used for initializing the occupation of the supercells if 

272 random_start is ``True``. 

273 random_start 

274 Randomly occupy starting structure, can be disabled 

275 if the user prefers to pass an initial structure. 

276 tol 

277 Numerical tolerance. 

278 """ 

279 

280 sqs_vector = _get_sqs_cluster_vector(cluster_space=cluster_space, 

281 target_concentrations=target_concentrations) 

282 return generate_target_structure_from_supercells(cluster_space=cluster_space, 

283 supercells=supercells, 

284 target_concentrations=target_concentrations, 

285 target_cluster_vector=sqs_vector, 

286 T_start=T_start, T_stop=T_stop, 

287 n_steps=n_steps, 

288 optimality_weight=optimality_weight, 

289 random_seed=random_seed, 

290 random_start=random_start, 

291 tol=tol) 

292 

293 

294def generate_sqs(cluster_space: ClusterSpace, 

295 max_size: int, 

296 target_concentrations: dict, 

297 include_smaller_cells: bool = True, 

298 pbc: Union[Tuple[bool, bool, bool], Tuple[int, int, int]] = None, 

299 T_start: float = 5.0, 

300 T_stop: float = 0.001, 

301 n_steps: int = None, 

302 optimality_weight: float = 1.0, 

303 random_seed: int = None, 

304 tol: float = 1e-5) -> Atoms: 

305 """ 

306 Given a :attr:`cluster_space`, generate a special quasirandom structure 

307 (SQS), i.e., a structure that for a given supercell size provides 

308 the best possible approximation to a random alloy [ZunWeiFer90]_. 

309 

310 In the present case, this means that the generated structure will 

311 have a cluster vector that as closely as possible matches the 

312 cluster vector of an infintely large randomly occupied supercell. 

313 Internally the function uses a simulated annealing algorithm and the 

314 difference between two cluster vectors is calculated with the 

315 measure suggested by A. van de Walle et al. in Calphad **42**, 13-18 

316 (2013) [WalTiwJon13]_ (for more information, see 

317 :class:`mchammer.calculators.TargetVectorCalculator`). 

318 

319 Parameters 

320 ---------- 

321 cluster_space 

322 Cluster space defining the lattice to be occupied. 

323 max_size 

324 Maximum supercell size. 

325 target_concentrations 

326 Concentration of each species in the target structure, per 

327 sublattice (for example ``{'Au': 0.5, 'Pd': 0.5}`` for a 

328 single sublattice Au-Pd structure, or 

329 ``{'A': {'Au': 0.5, 'Pd': 0.5}, 'B': {'H': 0.25, 'X': 0.75}}`` 

330 for a system with two sublattices. 

331 The symbols defining sublattices ('A', 'B' etc) can be 

332 found by printing the :attr:`cluster_space`. 

333 include_smaller_cells 

334 If ``True``, search among all supercell sizes including 

335 :attr:`max_size`, else search only among those exactly matching 

336 :attr:`max_size` 

337 pbc 

338 Periodic boundary conditions for each direction, e.g., 

339 ``(True, True, False)``. The axes are defined by 

340 the cell of ``cluster_space.primitive_structure``. 

341 Default is periodic boundary in all directions. 

342 T_start 

343 Artificial temperature at which the simulated annealing starts. 

344 T_stop 

345 Artifical temperature at which the simulated annealing stops. 

346 n_steps 

347 Total number of Monte Carlo steps in the simulation. 

348 optimality_weight 

349 Controls weighting :math:`L` of perfect correlations, see 

350 :class:`mchammer.calculators.TargetVectorCalculator`. 

351 random_seed 

352 Seed for the random number generator used in the 

353 Monte Carlo simulation. 

354 tol 

355 Numerical tolerance. 

356 """ 

357 

358 sqs_vector = _get_sqs_cluster_vector(cluster_space=cluster_space, 

359 target_concentrations=target_concentrations) 

360 return generate_target_structure(cluster_space=cluster_space, 

361 max_size=max_size, 

362 target_concentrations=target_concentrations, 

363 target_cluster_vector=sqs_vector, 

364 include_smaller_cells=include_smaller_cells, 

365 pbc=pbc, 

366 T_start=T_start, T_stop=T_stop, 

367 n_steps=n_steps, 

368 optimality_weight=optimality_weight, 

369 random_seed=random_seed, 

370 tol=tol) 

371 

372 

373def generate_sqs_by_enumeration(cluster_space: ClusterSpace, 

374 max_size: int, 

375 target_concentrations: dict, 

376 include_smaller_cells: bool = True, 

377 pbc: Union[Tuple[bool, bool, bool], Tuple[int, int, int]] = None, 

378 optimality_weight: float = 1.0, 

379 tol: float = 1e-5) -> Atoms: 

380 """ 

381 Given a :attr:`cluster_space`, generate a special quasirandom structure 

382 (SQS), i.e., a structure that for a given supercell size provides 

383 the best possible approximation to a random alloy [ZunWeiFer90]_. 

384 

385 In the present case, this means that the generated structure will 

386 have a cluster vector that as closely as possible matches the 

387 cluster vector of an infintely large randomly occupied supercell. 

388 Internally the function uses a simulated annealing algorithm and the 

389 difference between two cluster vectors is calculated with the 

390 measure suggested by A. van de Walle et al. in Calphad **42**, 13-18 

391 (2013) [WalTiwJon13]_ (for more information, see 

392 :class:`mchammer.calculators.TargetVectorCalculator`). 

393 

394 This functions generates SQS cells by exhaustive enumeration, which 

395 means that the generated SQS cell is guaranteed to be optimal with 

396 regard to the specified measure and cell size. 

397 

398 Parameters 

399 ---------- 

400 cluster_space 

401 Cluster space defining the lattice to be occupied. 

402 max_size 

403 Maximum supercell size. 

404 target_concentrations 

405 Concentration of each species in the target structure, per 

406 sublattice (for example ``{'Au': 0.5, 'Pd': 0.5}`` for a 

407 single sublattice Au-Pd structure, or 

408 ``{'A': {'Au': 0.5, 'Pd': 0.5}, 'B': {'H': 0.25, 'X': 0.75}}`` 

409 for a system with two sublattices. 

410 The symbols defining sublattices ('A', 'B' etc) can be 

411 found by printing the :attr:`cluster_space`. 

412 include_smaller_cells 

413 if ``True`` search among all supercell sizes including 

414 :attr:`max_size`, else search only among those exactly matching 

415 :attr:`max_size`. 

416 pbc 

417 Periodic boundary conditions for each direction, e.g., 

418 ``(True, True, False)``. The axes are defined by 

419 the cell of ``cluster_space.primitive_structure``. 

420 Default is periodic boundary in all directions. 

421 optimality_weight 

422 Controls weighting :math:`L` of perfect correlations, see 

423 :class:`mchammer.calculators.TargetVectorCalculator`. 

424 tol 

425 Numerical tolerance. 

426 """ 

427 target_concentrations = _validate_concentrations(target_concentrations, cluster_space) 

428 sqs_vector = _get_sqs_cluster_vector(cluster_space=cluster_space, 

429 target_concentrations=target_concentrations) 

430 # Translate concentrations to the format required for concentration 

431 # restricted enumeration 

432 cr: Dict[str, tuple] = {} 

433 sublattices = cluster_space.get_sublattices(cluster_space.primitive_structure) 

434 for sl in sublattices: 

435 mult_factor = len(sl.indices) / len(cluster_space.primitive_structure) 

436 if sl.symbol in target_concentrations: 

437 sl_conc = target_concentrations[sl.symbol] 

438 else: 

439 sl_conc = {sl.chemical_symbols[0]: 1.0} 

440 for species, value in sl_conc.items(): 

441 c = value * mult_factor 

442 if species in cr: 

443 cr[species] = (cr[species][0] + c, 

444 cr[species][1] + c) 

445 else: 

446 cr[species] = (c, c) 

447 

448 # Check to be sure... 

449 c_sum = sum(c[0] for c in cr.values()) 

450 assert abs(c_sum - 1) < tol # Should never happen, but... 

451 

452 as_list = cluster_space.as_list 

453 best_score = 1e9 

454 

455 if include_smaller_cells: 

456 sizes = list(range(1, max_size + 1)) 

457 else: 

458 sizes = [max_size] 

459 

460 # Prepare primitive structure with the right boundary conditions 

461 prim = cluster_space.primitive_structure 

462 if pbc is None: 

463 pbc = (True, True, True) 

464 prim.set_pbc(pbc) 

465 

466 # Enumerate and calculate score for each structuer 

467 for structure in enumerate_structures(prim, 

468 sizes, 

469 cluster_space.chemical_symbols, 

470 concentration_restrictions=cr): 

471 cv = cluster_space.get_cluster_vector(structure) 

472 score = compare_cluster_vectors(cv_1=cv, cv_2=sqs_vector, 

473 as_list=as_list, 

474 optimality_weight=optimality_weight, 

475 tol=tol) 

476 

477 if score < best_score: 

478 best_score = score 

479 best_structure = structure 

480 return best_structure 

481 

482 

483def occupy_structure_randomly(structure: Atoms, cluster_space: ClusterSpace, 

484 target_concentrations: dict, 

485 random_seed: int = None) -> None: 

486 """ 

487 Occupy a structure with quasirandom order but fulfilling 

488 :attr:`target_concentrations`. 

489 

490 Parameters 

491 ---------- 

492 structure 

493 ASE Atoms object that will be occupied randomly. 

494 cluster_space 

495 Cluster space (needed as it carries information about sublattices). 

496 target_concentrations 

497 Concentration of each species in the target structure, per 

498 sublattice (for example ``{'Au': 0.5, 'Pd': 0.5}`` for a 

499 single sublattice Au-Pd structure, or 

500 ``{'A': {'Au': 0.5, 'Pd': 0.5}, 'B': {'H': 0.25, 'X': 0.75}}`` 

501 for a system with two sublattices. 

502 The symbols defining sublattices ('A', 'B' etc) can be 

503 found by printing the :attr:`cluster_space`. 

504 random_seed 

505 Seed for the random number generator. 

506 """ 

507 rng = np.random.default_rng(random_seed) 

508 target_concentrations = _validate_concentrations(cluster_space=cluster_space, 

509 concentrations=target_concentrations) 

510 

511 if not _concentrations_fit_structure(structure, cluster_space, target_concentrations): 

512 raise ValueError('Structure with {} atoms cannot accomodate ' 

513 'target concentrations {}'.format(len(structure), 

514 target_concentrations)) 

515 

516 symbols_all = [''] * len(structure) 

517 for sl in cluster_space.get_sublattices(structure): 

518 symbols: List[str] = [] # chemical_symbols in one sublattice 

519 chemical_symbols = sl.chemical_symbols 

520 if len(chemical_symbols) == 1: 

521 symbols += [chemical_symbols[0]] * len(sl.indices) 

522 else: 

523 sl_conc = target_concentrations[sl.symbol] 

524 for chemical_symbol in sl.chemical_symbols: 

525 n_symbol = int(round(len(sl.indices) * sl_conc[chemical_symbol])) 

526 symbols += [chemical_symbol] * n_symbol 

527 

528 # Should not happen but you never know 

529 assert len(symbols) == len(sl.indices) 

530 

531 # Shuffle to introduce randomness 

532 rng.shuffle(symbols) 

533 

534 # Assign symbols to the right indices 

535 for symbol, lattice_site in zip(symbols, sl.indices): 

536 symbols_all[lattice_site] = symbol 

537 

538 assert symbols_all.count('') == 0 

539 structure.set_chemical_symbols(symbols_all) 

540 

541 

542def _validate_concentrations(concentrations: dict, 

543 cluster_space: ClusterSpace, 

544 tol: float = 1e-5) -> dict: 

545 """ 

546 Validates concentration specification against a cluster space 

547 (raises `ValueError` if they do not match). 

548 

549 Parameters 

550 ---------- 

551 concentrations 

552 Concentration specification. 

553 cluster_space 

554 Cluster space to check against. 

555 tol 

556 Numerical tolerance. 

557 

558 Returns 

559 ------- 

560 An adapted version of concentrations, which is always a dictionary 

561 even if there is only one sublattice. 

562 """ 

563 sls = cluster_space.get_sublattices(cluster_space.primitive_structure) 

564 

565 if not isinstance(list(concentrations.values())[0], dict): 

566 concentrations = {'A': concentrations} 

567 

568 # Ensure concentrations sum to 1 at each sublattice 

569 for sl_conc in concentrations.values(): 

570 conc_sum = sum(list(sl_conc.values())) 

571 if abs(conc_sum - 1.0) > tol: 

572 raise ValueError('Concentrations must sum up ' 

573 'to 1 for each sublattice (not {})'.format(conc_sum)) 

574 

575 # Symbols need to match on each sublattice 

576 for sl in sls: 

577 if sl.symbol not in concentrations: 

578 if len(sl.chemical_symbols) > 1: 

579 raise ValueError('A sublattice ({}: {}) is missing in ' 

580 'target_concentrations'.format(sl.symbol, 

581 list(sl.chemical_symbols))) 

582 else: 

583 sl_conc = concentrations[sl.symbol] 

584 if tuple(sorted(sl.chemical_symbols)) != tuple(sorted(list(sl_conc.keys()))): 

585 raise ValueError('Chemical symbols on a sublattice ({}: {}) are ' 

586 'not the same as those in the specified ' 

587 'concentrations {}'.format(sl.symbol, list(sl.chemical_symbols), 

588 list(sl_conc.keys()))) 

589 

590 return concentrations 

591 

592 

593def _concentrations_fit_structure(structure: Atoms, 

594 cluster_space: ClusterSpace, 

595 concentrations: Dict[str, Dict[str, float]], 

596 tol: float = 1e-5) -> bool: 

597 """ 

598 Check if specified concentrations are commensurate with a 

599 certain supercell (including sublattices). 

600 

601 Parameters 

602 ---------- 

603 structure 

604 Atomic configuration to be checked. 

605 cluster_space 

606 Corresponding cluster space. 

607 concentrations 

608 Which concentrations, per sublattice, e.g., ``{'A': {'Ag': 0.3, 'Au': 0.7}}``. 

609 tol 

610 Numerical tolerance. 

611 """ 

612 # Check that concentrations are OK in each sublattice 

613 for sublattice in cluster_space.get_sublattices(structure): 

614 if sublattice.symbol in concentrations: 

615 sl_conc = concentrations[sublattice.symbol] 

616 for conc in sl_conc.values(): 

617 n_symbol = conc * len(sublattice.indices) 

618 if abs(int(round(n_symbol)) - n_symbol) > tol: 

619 return False 

620 return True 

621 

622 

623def _get_sqs_cluster_vector(cluster_space: ClusterSpace, 

624 target_concentrations: Dict[str, Dict[str, float]]) -> np.ndarray: 

625 """ 

626 Get the SQS vector for a certain cluster space and certain 

627 concentration. Here SQS vector refers to the cluster vector of an 

628 infintely large supercell with random occupation. 

629 

630 Parameters 

631 ---------- 

632 cluster_space 

633 The kind of lattice to be occupied. 

634 target_concentrations 

635 Concentration of each species in the target structure, 

636 per sublattice (for example `{'A': {'Ag': 0.5, 'Pd': 0.5}}`). 

637 """ 

638 target_concentrations = _validate_concentrations(concentrations=target_concentrations, 

639 cluster_space=cluster_space) 

640 

641 sublattice_to_index = {letter: index for index, 

642 letter in enumerate('ABCDEFGHIJKLMNOPQRSTUVWXYZ')} 

643 all_sublattices = cluster_space.get_sublattices( 

644 cluster_space.primitive_structure) 

645 

646 # Make a map from chemical symbol to integer, later on used 

647 # for evaluating cluster functions. 

648 # Internally, icet sorts species according to atomic numbers. 

649 # Also check that each symbol only occurs in one sublattice. 

650 symbol_to_integer_map = {} 

651 found_species: List[str] = [] 

652 for sublattice in all_sublattices: 

653 if len(sublattice.chemical_symbols) < 2: 

654 continue 

655 atomic_numbers = [periodic_table.index(sym) for sym in sublattice.chemical_symbols] 

656 for i, species in enumerate(sorted(atomic_numbers)): 

657 found_species.append(species) 

658 symbol_to_integer_map[periodic_table[species]] = i 

659 

660 # Target concentrations refer to all atoms, but probabilities only 

661 # to the sublattice. 

662 probabilities = {} 

663 for sl_conc in target_concentrations.values(): 

664 if len(sl_conc) == 1: 

665 continue 

666 for symbol in sl_conc.keys(): 

667 probabilities[symbol] = sl_conc[symbol] 

668 

669 # For every orbit, calculate average cluster function 

670 cv = [1.0] 

671 for orbit in cluster_space.as_list: 

672 if orbit['order'] < 1: 

673 continue 

674 

675 # What sublattices are there in this orbit? 

676 sublattices = [all_sublattices[sublattice_to_index[letter]] 

677 for letter in orbit['sublattices']] 

678 

679 # What chemical symbols do these sublattices refer to? 

680 symbol_groups = [sublattice.chemical_symbols for sublattice in sublattices] 

681 

682 # How many allowed species in each of those sublattices? 

683 nbr_of_allowed_species = [len(symbol_group) 

684 for symbol_group in symbol_groups] 

685 

686 # Calculate contribution from every possible combination of 

687 # symbols weighted with their probability 

688 cluster_product_average = 0 

689 for symbols in itertools.product(*symbol_groups): 

690 cluster_product = 1 

691 for i, symbol in enumerate(symbols): 

692 mc_vector_component = orbit['multicomponent_vector'][i] 

693 species_i = symbol_to_integer_map[symbol] 

694 prod = cluster_space.evaluate_cluster_function(nbr_of_allowed_species[i], 

695 mc_vector_component, 

696 species_i) 

697 cluster_product *= probabilities[symbol] * prod 

698 cluster_product_average += cluster_product 

699 cv.append(cluster_product_average) 

700 return np.array(cv)