Coverage for mchammer/ensembles/hybrid_ensemble.py: 95%
142 statements
« prev ^ index » next coverage.py v7.10.1, created at 2025-09-14 04:08 +0000
« prev ^ index » next coverage.py v7.10.1, created at 2025-09-14 04:08 +0000
1import random
2from typing import Any
4from math import isclose
6from ase import Atoms
7from ase.units import kB
8from ase.data import atomic_numbers, chemical_symbols
10from .. import DataContainer
11from ..calculators.base_calculator import BaseCalculator
12from .thermodynamic_base_ensemble import ThermodynamicBaseEnsemble
13from .vcsgc_ensemble import get_phis
14from .semi_grand_canonical_ensemble import get_chemical_potentials
17class HybridEnsemble(ThermodynamicBaseEnsemble):
18 r"""
19 Instances of this class allows one to combine multiple ensembles.
20 In particular, a dictionary should be provided for each ensemble,
21 which must include the type (:attr:`ensemble`) as well as the index of
22 the sublattice (:attr:`sublattice_index`). In addition, it is possible
23 to provide lists of allowed symbols (:attr:`allowed_symbols`) and site
24 indices (:attr:`allowed_sites`) for the trial steps. Here the allowed
25 symbols must represent a subset of the elements that can occupy the sites on
26 the specified sublattice. Note that additional arguments are
27 required for the SGC and VCSGC ensembles, namely chemical
28 potentials (:attr:`chemical_potentials`) for the former and constraint
29 parameters (:attr:`phis` and :attr:`kappa`) for the latter. For more detailed
30 information regarding the different ensembles, please see
31 :class:`CanonicalEnsemble <mchammer.ensembles.CanonicalEnsemble>`,
32 :class:`SemiGrandCanonicalEnsemble
33 <mchammer.ensembles.SemiGrandCanonicalEnsemble>`, and
34 :class:`VCSGCEnsemble <mchammer.ensembles.VCSGCEnsemble>`.
36 This class is particularly useful for effectively sampling complex
37 multi-component systems with several active sublattices, in which
38 case different ensembles can be defined for each of the latter.
39 The fact that it is possible to set the allowed chemical symbols
40 means that one can vary the concentrations of a few selected
41 species, with the help of a VCSGC or semi-grand canonical
42 ensemble, while still allowing swaps between any two sites, using
43 a canonical ensemble (see also the below example). It is advisable
44 to carefully consider how to define the ensemble probabilities. By
45 default the ensembles are weighted by the sizes of the
46 corresponding sublattices, which should give suitable
47 probabilities in most cases. As is shown in the example below, it
48 might be prudent to provide different values if allowed symbols
49 are provided as well as for cases where there are several
50 ensembles that are active on different sublattices.
52 Parameters
53 ----------
55 structure
56 Atomic configuration to be used in the Monte Carlo simulation;
57 also defines the initial occupation vector.
58 calculator
59 Calculator to be used for calculating the potential changes
60 that enter the evaluation of the Metropolis criterion.
61 temperature
62 Temperature :math:`T` in appropriate units, commonly Kelvin.
63 ensemble_specs
64 A list of dictionaries, which should contain the following items:
66 * :attr:`ensemble`: Can be either ``'vcsgc'``, ``'semi-grand'``
67 or ``'canonical'``, lowercase and uppercase letters or any
68 combination thereof are accepted (required).
69 * :attr:`sublattice_index`: Index for the sublattice of
70 interest (required).
71 * :attr:`allowed_symbols`: List of allowed chemical symbols.
72 Default: read from :class:`ClusterSpace`.
73 * :attr:`allowed_sites`: List of allowed sites.
74 Default: all sites.
75 * :attr:`chemical_potentials`: Dictionary of chemical
76 potentials for each species :math:`\mu_i`.
77 The key denotes the species, the value
78 specifies the chemical potential in units that are
79 consistent with the underlying cluster expansion.
80 Only applicable and required for SGC ensembles.
81 * :attr:`phis`: Dictionary with average constraint parameters
82 :math:`\phi_i`. The key denotes the species. For a
83 :math:`N`-component sublattice, there should be :math:`N - 1`
84 different :math:`\phi_i` (referred to as :math:`\bar{\phi}` in [SadErh12]_).
85 Only applicable and required for VCSGC ensembles.
86 * :attr:`kappa`: Parameter that constrains the variance of the
87 concentration (referred to as :math:`\bar{\kappa}` in [SadErh12]_).
88 Only applicable and required for VCSGC ensembles.
89 probabilities
90 Probabilities for choosing a
91 particular ensemble with the same length as ensemble specs.
92 If left unspecified the probabilties are scaled to match the
93 sizes of the associated sublattices.
94 boltzmann_constant
95 Boltzmann constant :math:`k_B` in appropriate units, i.e.,
96 units that are consistent with the underlying cluster
97 expansion and the temperature units. Default: eV/K.
98 user_tag
99 Human-readable tag for ensemble. Default: ``None``.
100 random_seed
101 Seed for the random number generator used in the Monte Carlo simulation.
102 dc_filename
103 Name of file the data container associated with the
104 ensemble will be written to. If the file
105 exists it will be read, the data container will be appended,
106 and the file will be updated/overwritten.
107 data_container_write_period
108 Period in units of seconds at which the data container is
109 written to file. Writing periodically to file provides both
110 a way to examine the progress of the simulation and to
111 back up the data. Default: 600 s.
112 ensemble_data_write_interval
113 Interval at which data is written to the data container.
114 This includes for example the current value of the
115 calculator (i.e., usually the energy) as well as ensembles
116 specific fields such as temperature or the number of atoms
117 of different species.
118 Default: Number of sites in the :attr:`structure`.
119 trajectory_write_interval
120 Interval at which the current occupation vector of the
121 atomic configuration is written to the data container.
122 Default: Number of sites in the :attr:`structure`.
124 Example
125 -------
126 The following snippet illustrates how to carry out a simple Monte Carlo
127 simulation using a combination of one canonical and one VCSGC ensemble.
128 Specifically, the concentration of one species (Au) is kept constant
129 while the others (Ag and Pd) are varied, while swaps are still allowed.
130 Here, the parameters of the cluster expansion are set to emulate a simple
131 Ising model in order to obtain an example that can be run without
132 modification. In practice, one should of course use a proper cluster
133 expansion::
135 >>> from ase.build import bulk
136 >>> from icet import ClusterExpansion, ClusterSpace
137 >>> from mchammer.calculators import ClusterExpansionCalculator
139 >>> # prepare cluster expansion
140 >>> # the setup emulates a second nearest-neighbor (NN) Ising model
141 >>> # (zerolet and singlet ECIs are zero; only first and second neighbor
142 >>> # pairs are included)
143 >>> prim = bulk('Au')
144 >>> cs = ClusterSpace(prim, cutoffs=[4.3],
145 ... chemical_symbols=['Ag', 'Au', 'Pd'])
146 >>> ce = ClusterExpansion(
147 ... cs, [0, 0, 0, 0.1, 0.1, 0.1, -0.02, -0.02, -0.02])
149 >>> # define structure object
150 >>> structure = prim.repeat(3)
151 >>> for i, atom in enumerate(structure):
152 >>> if i % 2 == 0:
153 >>> atom.symbol = 'Ag'
154 >>> elif i % 3 == 0:
155 >>> atom.symbol = 'Pd'
157 >>> # the default probabilities for this case would be [0.5, 0.5], but
158 >>> # since the VCSGC ensemble only performs flips on a subset of all
159 >>> # sites on the sublattice, namely those originally occupied by Ag
160 >>> # and Pd atoms, specific values will be provided
161 >>> weights = [len(structure),
162 ... len([s for s in structure.symbols if s != 'Au'])]
163 >>> norm = sum(weights)
164 >>> probabilities = [w / norm for w in weights]
166 >>> # set up and run MC simulation
167 >>> calc = ClusterExpansionCalculator(structure, ce)
168 >>> ensemble_specs = [
169 ... {'ensemble': 'canonical', 'sublattice_index': 0},
170 ... {'ensemble': 'vcsgc', 'sublattice_index': 0,
171 ... 'phis': {'Ag': -0.2}, 'kappa': 200,
172 ... 'allowed_symbols':['Ag', 'Pd']}]
173 >>> mc = HybridEnsemble(structure=structure, calculator=calc,
174 ... ensemble_specs=ensemble_specs,
175 ... temperature=600, probabilities=probabilities,
176 ... dc_filename='myrun_hybrid.dc')
177 >>> mc.run(100) # carry out 100 trial steps
178 """
180 def __init__(self,
181 structure: Atoms,
182 calculator: BaseCalculator,
183 temperature: float,
184 ensemble_specs: list[dict],
185 probabilities: list[float] = None,
186 boltzmann_constant: float = kB,
187 user_tag: str = None,
188 random_seed: int = None,
189 dc_filename: str = None,
190 data_container: str = None,
191 data_container_write_period: float = 600,
192 ensemble_data_write_interval: int = None,
193 trajectory_write_interval: int = None) -> None:
195 # define available ensembles
196 self._ensemble_trial_steps = dict([
197 ('canonical', self.do_canonical_swap),
198 ('semi-grand', self.do_sgc_flip),
199 ('vcsgc', self.do_vcsgc_flip),
200 ])
202 self._ensemble_parameters = dict(temperature=temperature)
204 self._trial_steps_per_ensemble = {'ensemble_{}'.format(i): 0 for i in
205 range(len(ensemble_specs))}
207 # process the list of ensembles and parameters
208 self._process_ensemble_specs(ensemble_specs)
210 super().__init__(
211 structure=structure,
212 calculator=calculator,
213 user_tag=user_tag,
214 random_seed=random_seed,
215 boltzmann_constant=boltzmann_constant,
216 dc_filename=dc_filename,
217 data_container=data_container,
218 data_container_class=DataContainer,
219 data_container_write_period=data_container_write_period,
220 ensemble_data_write_interval=ensemble_data_write_interval,
221 trajectory_write_interval=trajectory_write_interval)
223 # postprocess the list of ensembles and parameters
224 self._postprocess_ensemble_args()
226 # set the probabilities
227 self._process_probabilities(probabilities)
229 @property
230 def temperature(self) -> float:
231 """ Current temperature. """
232 return self._ensemble_parameters['temperature']
234 @property
235 def probabilities(self) -> list[float]:
236 """ Ensemble propabilities. """
237 return self._probabilities
239 @property
240 def trial_steps_per_ensemble(self) -> dict[str, int]:
241 """ Number of Monte Carlo trial steps for each ensemble. """
242 return self._trial_steps_per_ensemble
244 def _process_ensemble_specs(self, ensemble_specs: list[dict]) -> None:
245 r"""Process the list of ensembles and parameters.
247 Parameters
248 ----------
249 ensemble_specs: list[dict]
250 A list of dictionaries, which should contain the following items:
251 * 'ensemble', which could be either 'vcsgc'; 'semi-grand' or 'canonical', lowercase
252 and upercase letters or any combination thereof are accepted
253 * 'sublattice_index', index for the sublattice of interest
254 * 'allowed_symbols', list of allowed chemical symbols
255 * 'allowed_sites', list of indices of allowed sites
256 * 'chemical_potentials', a dictionary of chemical potentials for each species
257 :math:`\mu_i`; the key denotes the species, the value specifies the chemical
258 potential in units that are consistent with the underlying cluster expansion
259 * 'phis ', dictionary with average constraint parameters :math:`\phi_i`; the key
260 denotes the species; for a N-component sublattice, there should be N-1
261 different `\phi_i`
262 * 'kappa', parameter that constrains the variance of the concentration
263 """
265 ensemble_args = []
267 for ind, ensemble_spec in enumerate(ensemble_specs):
269 ensemble_arg: dict[str, Any] = {}
270 tag = f'ensemble_{ind}'
271 ensemble_arg['tag'] = tag
273 # check the ensemble name
274 if 'ensemble' not in ensemble_spec:
275 raise ValueError(
276 f"The dictionary {ensemble_spec} lacks the required key 'ensemble'")
277 ensemble = ensemble_spec['ensemble'].lower()
278 if ensemble not in self._ensemble_trial_steps.keys():
279 msg = ['Ensemble not available']
280 msg += ['Please choose one of the following:']
281 for key in self._ensemble_trial_steps.keys():
282 msg += [' * ' + key]
283 raise ValueError('\n'.join(msg))
284 ensemble_arg['ensemble'] = ensemble
285 self._ensemble_parameters[tag] = ensemble
287 # check that all required keys, and no unknown keys, are present
288 keys = ['ensemble', 'sublattice_index', 'allowed_symbols', 'allowed_sites']
289 if ensemble == 'semi-grand':
290 keys = ['chemical_potentials'] + keys
291 elif ensemble == 'vcsgc':
292 keys = ['phis', 'kappa'] + keys
293 for key in keys[:-2]:
294 if key not in ensemble_spec:
295 raise ValueError(f"The dictionary {ensemble_spec} lacks the key '{key}',"
296 f' which is required for {ensemble} ensembles')
297 for key in ensemble_spec.keys():
298 if key not in keys:
299 raise ValueError(f"Unknown key '{key}', for a {ensemble} ensemble,"
300 f' in the dictionary {ensemble_spec}')
302 # record the sublattice index
303 ensemble_arg['sublattice_index'] = ensemble_spec['sublattice_index']
305 # process chemical potentials
306 if 'chemical_potentials' in ensemble_spec:
307 chemical_potentials = get_chemical_potentials(ensemble_spec['chemical_potentials'])
308 ensemble_arg['chemical_potentials'] = chemical_potentials
309 for atnum, chempot in chemical_potentials.items():
310 mu_sym = '{}_mu_{}'.format(tag, chemical_symbols[atnum])
311 self._ensemble_parameters[mu_sym] = chempot
313 # process phis
314 if 'phis' in ensemble_spec:
315 phis = get_phis(ensemble_spec['phis'])
316 ensemble_arg['phis'] = phis
317 for sym, phi in phis.items():
318 if isinstance(sym, str): 318 ↛ 319line 318 didn't jump to line 319 because the condition on line 318 was never true
319 chemical_symbol = sym
320 else:
321 chemical_symbol = chemical_symbols[sym]
322 phi_sym = '{}_phi_{}'.format(tag, chemical_symbol)
323 self._ensemble_parameters[phi_sym] = phi
325 # process kappa
326 if 'kappa' in ensemble_spec:
327 ensemble_arg['kappa'] = ensemble_spec['kappa']
328 self._ensemble_parameters['{}_kappa'.format(tag)] = ensemble_spec['kappa']
330 # record the allowed chemical symbols
331 if 'allowed_symbols' in ensemble_spec:
332 ensemble_arg['allowed_symbols'] = ensemble_spec['allowed_symbols']
334 # record the allowed sites
335 if 'allowed_sites' in ensemble_spec: 335 ↛ 336line 335 didn't jump to line 336 because the condition on line 335 was never true
336 ensemble_arg['allowed_sites'] = ensemble_spec['allowed_sites']
338 ensemble_args.append(ensemble_arg)
340 self._ensemble_args = ensemble_args
342 def _postprocess_ensemble_args(self):
343 """Process the list of dictionaries with ensemble specific parameters.
344 """
346 for i in range(len(self._ensemble_args)):
348 # check the sublattice index
349 self._check_sublattice_index(self._ensemble_args[i]['sublattice_index'])
351 # extract the allowed species
352 if 'allowed_symbols' in self._ensemble_args[i]:
353 self._ensemble_args[i]['allowed_species'] =\
354 self._extract_allowed_species(self._ensemble_args[i]['allowed_symbols'],
355 self._ensemble_args[i]['sublattice_index'])
356 del self._ensemble_args[i]['allowed_symbols']
357 else:
358 self._ensemble_args[i]['allowed_species'] = None
360 # handle lack of allowed sites
361 if 'allowed_sites' not in self._ensemble_args[i]: 361 ↛ 364line 361 didn't jump to line 364 because the condition on line 361 was always true
362 self._ensemble_args[i]['allowed_sites'] = None
364 if self._ensemble_args[i]['ensemble'] == 'vcsgc':
365 # Check that each sublattice has N - 1 phis
366 count_specified_elements = 0
367 if self._ensemble_args[i]['allowed_species'] is None: 367 ↛ 371line 367 didn't jump to line 371 because the condition on line 367 was always true
368 allowed_species =\
369 self.sublattices[self._ensemble_args[i]['sublattice_index']].atomic_numbers
370 else:
371 allowed_species = self._ensemble_args[i]['allowed_species']
372 for number in allowed_species:
373 if number in self._ensemble_args[i]['phis'].keys():
374 count_specified_elements += 1
375 if count_specified_elements != len(allowed_species) - 1:
376 raise ValueError('phis must be set for N - 1 elements on a sublattice with'
377 ' N elements')
379 def _check_sublattice_index(self, sublattice_index: int):
380 """Check the :attr:`sublattice_index` item in the :attr:`ensemble_spec` dictionary.
382 Parameters
383 ----------
384 sublattice_index
385 Specific sublattice to consider provided as as an index or a symbol.
386 """
388 if not isinstance(sublattice_index, int):
389 raise TypeError("'sublattice_index' must be an integer, not"
390 f' {format(type(sublattice_index))}')
392 # check that the sublattice exists
393 if sublattice_index not in range(len(self.sublattices)):
394 raise ValueError('There is no sublattice with index {}'.format(sublattice_index))
396 # check that the sublattice is active
397 if len(self.sublattices[sublattice_index].chemical_symbols) == 1:
398 raise ValueError('The sublattice {} is inactive'.format(sublattice_index))
400 def _extract_allowed_species(
401 self,
402 allowed_symbols: list[str],
403 sublattice_index: int,
404 ) -> list[int]:
405 """Check and extract the allowed species from the
406 :attr:`allowed_symbols` in the :attr:`ensemble_spec`
407 dictionary.
409 Parameters
410 ----------
411 allowed_symbols
412 List of allowed chemical symbols.
413 sublattice_index
414 Index for the relevant sublattice.
416 """
418 if not isinstance(allowed_symbols, list) or not all(
419 [isinstance(i, str) for i in allowed_symbols]):
420 raise TypeError(
421 f"'allowed_symbols' must be a list[str], not {type(allowed_symbols)}")
422 for symbol in allowed_symbols:
423 if symbol not in self.sublattices[sublattice_index].chemical_symbols:
424 raise ValueError('The species {} is not allowed on sublattice'
425 ' {}'.format(symbol, sublattice_index))
427 return [atomic_numbers[s] for s in allowed_symbols]
429 def _process_probabilities(self, probabilities: list[float]):
430 """Process the list of probabilities.
432 Parameters
433 ----------
434 probabilities
435 Probabilities for choosing a particular ensemble with the same
436 length as :attr:`self._ensemble_args`.
437 """
439 if probabilities is None:
440 # use the sizes of the associated sublattices when calculating the ensemble
441 # probabilities
442 weights = [len(self.sublattices[ensemble_arg['sublattice_index']].indices) for
443 ensemble_arg in self._ensemble_args]
444 norm = sum(weights)
445 probabilities = [w / norm for w in weights]
446 else:
447 if len(probabilities) != len(self._ensemble_args):
448 raise ValueError('The number of probabilities must be match the number of'
449 ' ensembles')
451 if not isclose(sum(probabilities), 1.0):
452 raise ValueError('The sum of all probabilities must be equal to 1')
454 self._probabilities = probabilities
456 def _do_trial_step(self):
457 """ Carries out one Monte Carlo trial step. """
458 # randomly pick an ensemble
459 ensemble_arg = random.choices(self._ensemble_args, weights=self._probabilities)[0]
461 # count number of trial steps for each ensemble
462 self._trial_steps_per_ensemble[ensemble_arg['tag']] += 1
464 if ensemble_arg['ensemble'] == 'canonical' and not self.configuration.is_swap_possible( 464 ↛ 466line 464 didn't jump to line 466 because the condition on line 464 was never true
465 ensemble_arg['sublattice_index'], ensemble_arg['allowed_species']):
466 return 0
467 else:
468 arguments = {key: val for key, val in ensemble_arg.items() if key not in
469 ['ensemble', 'tag']}
470 return self._ensemble_trial_steps[ensemble_arg['ensemble']](**arguments)
472 def _get_ensemble_data(self) -> dict:
473 """
474 Returns a dict with the default data of the ensemble. This includes
475 atom counts and free energy derivative.
476 """
477 data = super()._get_ensemble_data()
479 ensemble_types = [e['ensemble'] for e in self._ensemble_args]
481 # free energy derivative
482 if 'vcsgc' in ensemble_types: 482 ↛ 490line 482 didn't jump to line 490 because the condition on line 482 was always true
483 for ensemble_arg in self._ensemble_args:
484 if 'vcsgc' == ensemble_arg['ensemble']:
485 data.update(self._get_vcsgc_free_energy_derivatives(
486 ensemble_arg['phis'], ensemble_arg['kappa'],
487 ensemble_arg['sublattice_index']))
489 # species counts
490 if any([e in ensemble_types for e in ['vcsgc', 'semi-grand']]): 490 ↛ 493line 490 didn't jump to line 493 because the condition on line 490 was always true
491 data.update(self._get_species_counts())
493 return data