Coverage for mchammer/data_containers/wang_landau_data_container.py: 99%
235 statements
« prev ^ index » next coverage.py v7.10.1, created at 2025-09-14 04:08 +0000
« prev ^ index » next coverage.py v7.10.1, created at 2025-09-14 04:08 +0000
1"""Definition of the Wang-Landau data container class."""
3from warnings import warn
4from collections import Counter, OrderedDict
5from typing import Any, BinaryIO, Optional, TextIO, Union
7import numpy as np
8import pandas as pd
10from ase.units import kB
11from ase import Atoms
12from pandas import DataFrame, concat as pd_concat
14from icet import ClusterSpace
15from .base_data_container import BaseDataContainer
18class WangLandauDataContainer(BaseDataContainer):
19 """
20 Data container for storing information concerned with :ref:`Wang-Landau
21 simulation <wang_landau_ensemble>` performed with :program:`mchammer`.
23 Parameters
24 ----------
25 structure
26 Reference atomic structure associated with the data container.
27 ensemble_parameters
28 Parameters associated with the underlying ensemble.
29 metadata
30 Metadata associated with the data container.
31 """
33 def _update_last_state(self,
34 last_step: int,
35 occupations: list[int],
36 accepted_trials: int,
37 random_state: tuple,
38 fill_factor: float,
39 fill_factor_history: dict[int, float],
40 entropy_history: dict[int, dict[int, float]],
41 histogram=dict[int, int],
42 entropy=dict[int, float]):
43 """Updates last state of the Wang-Landau simulation.
45 Parameters
46 ----------
47 last_step
48 Last trial step.
49 occupations
50 Occupation vector observed during the last trial step.
51 accepted_trial
52 Number of current accepted trial steps.
53 random_state
54 Tuple representing the last state of the random generator.
55 fill_factor
56 Fill factor of Wang-Landau algorithm.
57 fill_factor_history
58 Evolution of the fill factor of Wang-Landau algorithm (key=MC
59 trial step, value=fill factor).
60 entropy_history
61 Evolution of the (relative) entropy accumulated during Wang-Landau
62 simulation (key=MC trial step, value=(key=bin, value=entropy)).
63 histogram
64 Histogram of states visited during Wang-Landau simulation.
65 entropy
66 (Relative) entropy accumulated during Wang-Landau simulation.
67 """
68 super()._update_last_state(
69 last_step=last_step,
70 occupations=occupations,
71 accepted_trials=accepted_trials,
72 random_state=random_state)
73 self._last_state['fill_factor'] = fill_factor
74 self._last_state['fill_factor_history'] = fill_factor_history
75 self._last_state['entropy_history'] = entropy_history
76 self._last_state['histogram'] = histogram
77 self._last_state['entropy'] = entropy
79 @property
80 def fill_factor(self) -> float:
81 """ Final value of the fill factor in the Wang-Landau algorithm. """
82 return float(self._last_state['fill_factor'])
84 @property
85 def fill_factor_history(self) -> DataFrame:
86 """ Evolution of the fill factor in the Wang-Landau algorithm. """
87 return DataFrame({'mctrial': list(self._last_state['fill_factor_history'].keys()),
88 'fill_factor': list(self._last_state['fill_factor_history'].values())})
90 def get(self,
91 *tags: str,
92 fill_factor_limit: float = None) \
93 -> Union[np.ndarray, list[Atoms], tuple[np.ndarray, list[Atoms]]]:
94 """Returns the accumulated data for the requested observables,
95 including configurations stored in the data container. The latter
96 can be achieved by including ``'trajectory'`` as one of the tags.
98 Parameters
99 ----------
100 tags
101 Names of the requested properties.
102 fill_factor_limit
103 Return data recorded up to the point when the specified fill
104 factor limit was reached, or ``None`` if the entropy history is
105 empty or the last fill factor is above the limit; otherwise
106 return all data.
108 Raises
109 ------
110 ValueError
111 If :attr:`tags` is empty.
112 ValueError
113 If observables are requested that are not in the data container.
115 Examples
116 --------
117 Below the :func:`get` method is
118 illustrated but first we require a data container.
120 >>> from ase import Atoms
121 >>> from icet import ClusterExpansion, ClusterSpace
122 >>> from mchammer.calculators import ClusterExpansionCalculator
123 >>> from mchammer.ensembles import WangLandauEnsemble
125 >>> # prepare cluster expansion
126 >>> prim = Atoms('Au', positions=[[0, 0, 0]], cell=[1, 1, 10], pbc=True)
127 >>> cs = ClusterSpace(prim, cutoffs=[1.1], chemical_symbols=['Ag', 'Au'])
128 >>> ce = ClusterExpansion(cs, [0, 0, 2])
130 >>> # prepare initial configuration
131 >>> structure = prim.repeat((4, 4, 1))
132 >>> for k in range(8):
133 ... structure[k].symbol = 'Ag'
135 >>> # set up and run Wang-Landau simulation
136 >>> calculator = ClusterExpansionCalculator(structure, ce)
137 >>> mc = WangLandauEnsemble(structure=structure,
138 ... calculator=calculator,
139 ... energy_spacing=1,
140 ... dc_filename='ising_2d_run.dc',
141 ... fill_factor_limit=0.3)
142 >>> # N.B.: in practice one requires more steps
143 >>> mc.run(number_of_trial_steps=len(structure) * 3000)
145 We can now access the data container by reading it from file
146 by using the :func:`read` method. For the purpose of this
147 example, however, we access the data container associated with
148 the ensemble directly.
150 >>> dc = mc.data_container
152 The following lines illustrate how to use the :func:`get` method
153 for extracting data from the data container.
155 >>> # obtain all values of the potential represented by
156 >>> # the cluster expansion and the MC trial step along the
157 >>> # trajectory
158 >>> import matplotlib.pyplot as plt
159 >>> s, p = dc.get('mctrial', 'potential')
160 >>> _ = plt.plot(s, p)
162 >>> # as above but this time only included data recorded up to
163 >>> # the point when the fill factor reached below 0.6
164 >>> s, p = dc.get('mctrial', 'potential', fill_factor_limit=0.6)
165 >>> _ = plt.plot(s, p)
166 >>> plt.show(block=False)
168 >>> # obtain configurations along the trajectory along with
169 >>> # their potential
170 >>> p, confs = dc.get('potential', 'trajectory')
172 """
174 if len(tags) == 0: 174 ↛ 175line 174 didn't jump to line 175 because the condition on line 174 was never true
175 raise TypeError('Missing tags argument')
177 local_tags = ['occupations' if tag == 'trajectory' else tag for tag in tags]
179 for tag in local_tags:
180 if tag in 'mctrial':
181 continue
182 if tag not in self.observables: 182 ↛ 183line 182 didn't jump to line 183 because the condition on line 182 was never true
183 raise ValueError('No observable named {} in data container'.format(tag))
185 # collect data
186 mctrials = [row_dict['mctrial'] for row_dict in self._data_list]
187 data = pd.DataFrame.from_records(self._data_list, index=mctrials, columns=local_tags)
188 if fill_factor_limit is not None:
189 # only include data for fill factors up to the limit
190 df_ffh = self.fill_factor_history.astype(
191 {'mctrial': np.int64, 'fill_factor': np.float64})
192 mctrial_last = df_ffh.loc[
193 df_ffh.fill_factor <= fill_factor_limit].mctrial.min()
194 data = data.loc[data.index <= mctrial_last]
195 data.dropna(inplace=True)
197 # handling of trajectory
198 def occupation_to_atoms(occupation):
199 structure = self.structure.copy()
200 structure.numbers = occupation
201 return structure
203 data_list = []
204 for tag in local_tags:
205 if tag == 'occupations':
206 traj = [occupation_to_atoms(o) for o in data['occupations']]
207 data_list.append(traj)
208 else:
209 data_list.append(data[tag].values)
211 if len(data_list) > 1:
212 return tuple(data_list)
213 else:
214 return data_list[0]
216 def get_entropy(self, fill_factor_limit: float = None) -> DataFrame:
217 """Returns the (relative) entropy from this data container accumulated
218 during a :ref:`Wang-Landau simulation <wang_landau_ensemble>`. Returns
219 ``None`` if the data container does not contain the required
220 information.
222 Parameters
223 ----------
224 fill_factor_limit
225 Return the entropy recorded up to the point when the specified fill
226 factor limit was reached, or ``None`` if the entropy history is
227 empty or the last fill factor is above the limit. Otherwise
228 return the entropy for the last state.
229 """
231 if 'entropy' not in self._last_state:
232 warn('There is no entropy information in the data container.')
233 return None
234 entropy = self._last_state['entropy']
235 if fill_factor_limit is not None:
236 if 'entropy_history' not in self._last_state or \
237 len(self._last_state['entropy_history']) == 0:
238 warn('The entropy history is empty.')
239 return None
240 if self._last_state['fill_factor'] > fill_factor_limit:
241 warn('The last fill factor {} is higher than the limit'
242 ' {}.'.format(self.fill_factor, fill_factor_limit))
243 return None
244 for step, fill_factor in self._last_state['fill_factor_history'].items(): 244 ↛ 250line 244 didn't jump to line 250 because the loop on line 244 didn't complete
245 if fill_factor <= fill_factor_limit:
246 entropy = self._last_state['entropy_history'][step]
247 break
249 # compile entropy into DataFrame
250 energy_spacing = self.ensemble_parameters['energy_spacing']
251 df = DataFrame(data={'energy': energy_spacing * np.array(list(entropy.keys())),
252 'entropy': np.array(list(entropy.values()))},
253 index=list(entropy.keys()))
254 # shift entropy for numerical stability
255 df['entropy'] -= np.min(df['entropy'])
257 return df
259 def get_histogram(self) -> DataFrame:
260 """Returns the histogram from this data container accumulated since the
261 last update of the fill factor. Returns ``None`` if the data container
262 does not contain the required information.
263 """
265 if 'histogram' not in self._last_state:
266 return None
268 # compile histogram into DataFrame
269 histogram = self._last_state['histogram']
270 energy_spacing = self.ensemble_parameters['energy_spacing']
271 df = DataFrame(data={'energy': energy_spacing * np.array(list(histogram.keys())),
272 'histogram': np.array(list(histogram.values()))},
273 index=list(histogram.keys()))
275 return df
277 @classmethod
278 # todo: cls and the return should be type hinted as BaseDataContainer.
279 # Unfortunately, this requires from __future__ import annotations, which
280 # in turn requires Python 3.8.
281 def read(cls, infile: Union[str, BinaryIO, TextIO], old_format: bool = False):
282 """Reads data container from file.
284 Parameters
285 ----------
286 infile
287 file from which to read
288 old_format
289 If true use old json format to read runtime data; default to false
291 Raises
292 ------
293 FileNotFoundError
294 if file is not found (str)
295 ValueError
296 if file is of incorrect type (not a tarball)
297 """
298 dc = super(WangLandauDataContainer, cls).read(infile=infile, old_format=old_format)
300 for tag, value in dc._last_state.items():
301 if tag in ['histogram', 'entropy', 'fill_factor_history', 'entropy_history']:
302 # the following accounts for the fact that the keys of dicts
303 # are converted to str when writing to json and have to
304 # converted back into numerical values
305 dc._last_state[tag] = {}
306 for key, val in value.items():
307 if isinstance(val, dict):
308 val = {int(k): v for k, v in val.items()}
309 dc._last_state[tag][int(key)] = val
311 return dc
314def get_density_of_states_wl(dcs: Union[WangLandauDataContainer,
315 dict[Any, WangLandauDataContainer]],
316 fill_factor_limit: float = None) \
317 -> tuple[DataFrame, dict]:
318 """Returns a pandas DataFrame with the total density of states from a
319 :ref:`Wang-Landau simulation <wang_landau_ensemble>`. If a dict of data
320 containers is provided the function also returns a dictionary that
321 contains the standard deviation between the entropy of neighboring data
322 containers in the overlap region. These errors should be small compared to
323 the variation of the entropy across each bin.
325 The function can handle both a single data container and a dict thereof.
326 In the latter case the data containers must cover a contiguous energy
327 range and must at least partially overlap.
329 Parameters
330 ----------
331 dcs
332 Data container(s), from which to extract the density of states.
333 fill_factor_limit
334 Calculate the density of states using the entropy recorded up to the
335 point when the specified fill factor limit was reached. Otherwise
336 return the density of states for the last state.
338 Raises
339 ------
340 TypeError
341 If :attr:`dcs` does not correspond to not a single (dictionary) of data
342 container(s) from which the entropy can retrieved.
343 ValueError
344 If the data container does not contain entropy information.
345 ValueError
346 If a fill factor limit has been provided and the data container either
347 does not contain information about the entropy history or if the last
348 fill factor is higher than the specified limit.
349 ValueError
350 If multiple data containers are provided and there are inconsistencies
351 with regard to basic simulation parameters such as system size or
352 energy spacing.
353 ValueError
354 If multiple data containers are provided and there is at least
355 one energy region without overlap.
356 """
358 # preparations
359 if isinstance(dcs, WangLandauDataContainer):
360 # fetch raw entropy data from data container
361 df = dcs.get_entropy(fill_factor_limit)
362 if df is None:
363 raise ValueError('Entropy information could not be retrieved from'
364 ' the data container {}.'.format(dcs))
365 errors = None
366 if len(dcs.fill_factor_history) == 0 or dcs.fill_factor > 1e-4:
367 warn('The data container appears to contain data from an'
368 ' underconverged Wang-Landau simulation.')
370 elif isinstance(dcs, dict) and isinstance(dcs[next(iter(dcs))], WangLandauDataContainer):
371 # minimal consistency checks
372 tags = list(dcs.keys())
373 tagref = tags[0]
374 dcref = dcs[tagref]
375 for tag in tags:
376 dc = dcs[tag]
377 if len(dc.structure) != len(dcref.structure):
378 raise ValueError('Number of atoms differs between data containers ({}: {}, {}: {})'
379 .format(tagref, dcref.ensemble_parameters['n_atoms'],
380 tag, dc.ensemble_parameters['n_atoms']))
381 for param in ['energy_spacing', 'trial_move']:
382 if dc.ensemble_parameters[param] != dcref.ensemble_parameters[param]:
383 raise ValueError('{} differs between data containers ({}: {}, {}: {})'
384 .format(param,
385 tagref, dcref.ensemble_parameters['n_atoms'],
386 tag, dc.ensemble_parameters['n_atoms']))
387 if len(dc.fill_factor_history) == 0 or dc.fill_factor > 1e-4:
388 warn('Data container {} appears to contain data from an'
389 ' underconverged Wang-Landau simulation.'.format(tag))
391 # fetch raw entropy data from data containers
392 entropies = {}
393 for tag, dc in dcs.items():
394 entropies[tag] = dc.get_entropy(fill_factor_limit)
395 if entropies[tag] is None:
396 raise ValueError('Entropy information could not be retrieved'
397 ' from the data container {}.'.format(dc))
399 # sort entropies by energy
400 entropies = OrderedDict(sorted(entropies.items(), key=lambda row: row[1].energy.iloc[0]))
402 # line up entropy data
403 errors = {}
404 tags = list(entropies.keys())
405 for tag1, tag2 in zip(tags[:-1], tags[1:]):
406 df1 = entropies[tag1]
407 df2 = entropies[tag2]
408 if all(df2.energy.isin(df1.energy)):
409 warn('Window {} is a subset of {}'.format(tag2, tag1))
410 left_lim = np.min(df2.energy)
411 right_lim = np.max(df1.energy)
412 if left_lim >= right_lim:
413 raise ValueError('No overlap in the energy range {}...{}.\n'
414 .format(right_lim, left_lim) +
415 ' The closest data containers have tags "{}" and "{}".'
416 .format(tag1, tag2))
417 df1_ = df1[(df1.energy >= left_lim) & (df1.energy <= right_lim)]
418 df2_ = df2[(df2.energy >= left_lim) & (df2.energy <= right_lim)]
419 offset = (df2_.entropy - df1_.entropy).mean()
420 errors['{}-{}'.format(tag1, tag2)] = (df2_.entropy - df1_.entropy).std()
421 entropies[tag2].entropy = entropies[tag2].entropy - offset
423 # compile entropy over the entire energy range
424 data: dict[float, float] = {}
425 indices = {}
426 counts = Counter()
427 for df in entropies.values():
428 for index, en, ent in zip(df.index, df.energy, df.entropy):
429 data[en] = data.get(en, 0) + ent
430 counts[en] += 1
431 indices[en] = index
432 for en in data:
433 data[en] = data[en] / counts[en]
435 # center entropy to prevent possible numerical issues
436 entmin = np.min(list(data.values()))
437 df = DataFrame(data={'energy': np.array(list(data.keys())),
438 'entropy': np.array(np.array(list(data.values()))) - entmin},
439 index=list(indices.values()))
440 else:
441 raise TypeError('dcs ({}) must be a data container with entropy data'
442 ' or be a list of data containers'
443 .format(type(dcs)))
445 # density of states
446 S_max = df.entropy.max()
447 df['density'] = np.exp(df.entropy - S_max) / np.sum(np.exp(df.entropy - S_max))
449 return df, errors
452def _extract_filter_data(dc: BaseDataContainer,
453 columns_to_keep: list[str],
454 fill_factor_limit: float = None) -> DataFrame:
455 """ Extract data from a data container and filter the content.
457 Parameters
458 ----------
459 dc
460 Data container from which to extract the data.
461 columns_to_keep
462 list of requested properties.
463 fill_factor_limit
464 Only include data recorded up to the point when the specified fill
465 factor limit was reached when computing averages. Otherwise include
466 all data.
467 """
469 df = dc.data
470 if fill_factor_limit is not None:
471 # only include data for fill factors up to the limit
472 df_ffh = dc.fill_factor_history.astype(
473 {'mctrial': np.int64, 'fill_factor': np.float64})
474 mctrial_last = df_ffh.loc[
475 df_ffh.fill_factor <= fill_factor_limit].mctrial.min()
476 df = df.loc[df.mctrial <= mctrial_last]
478 return df.filter(columns_to_keep)
481def get_average_observables_wl(dcs: Union[WangLandauDataContainer,
482 dict[Any, WangLandauDataContainer]],
483 temperatures: list[float],
484 observables: list[str] = None,
485 boltzmann_constant: float = kB,
486 fill_factor_limit: float = None) -> DataFrame:
487 """Returns the average and the standard deviation of the energy from a
488 :ref:`Wang-Landau simulation <wang_landau_ensemble>` for the temperatures
489 specified. If the :attr:`observables` keyword argument is specified
490 the function will also return the mean and standard deviation of the
491 specified observables.
493 Parameters
494 ----------
495 dcs
496 Data container(s) from which to extract density of states
497 as well as observables.
498 temperatures
499 Temperatures at which to compute the averages.
500 observables
501 Observables for which to compute averages; the observables
502 must refer to fields in the data container.
503 boltzmann_constant
504 Boltzmann constant :math:`k_B` in appropriate
505 units, i.e., units that are consistent
506 with the underlying cluster expansion
507 and the temperature units [default: eV/K].
508 fill_factor_limit
509 Use data recorded up to the point when the specified fill factor limit
510 was reached when computing averages. Otherwise use data for the last
511 state.
513 Raises
514 ------
515 ValueError
516 If the data container(s) do(es) not contain entropy data
517 from Wang-Landau simulation.
518 ValueError
519 If data container(s) do(es) not contain requested observable.
520 """
522 def check_observables(dc: WangLandauDataContainer, observables: Optional[list[str]]) -> None:
523 """ Helper function that checks that observables are available in data frame. """
524 if observables is None:
525 return
526 for obs in observables:
527 if obs not in dc.data.columns:
528 raise ValueError('Observable ({}) not in data container.\n'
529 'Available observables: {}'.format(obs, dc.data.columns))
531 # preparation of observables
532 columns_to_keep = ['potential', 'density']
533 if observables is not None:
534 columns_to_keep.extend(observables)
536 # check that observables are available in data container
537 # and prepare comprehensive data frame with relevant information
538 if isinstance(dcs, WangLandauDataContainer):
539 check_observables(dcs, observables)
540 df_combined = _extract_filter_data(dcs, columns_to_keep, fill_factor_limit)
541 dcref = dcs
542 elif isinstance(dcs, dict) and isinstance(dcs[next(iter(dcs))], WangLandauDataContainer):
543 dfs = []
544 for dc in dcs.values():
545 check_observables(dc, observables)
546 dfs.append(_extract_filter_data(dc, columns_to_keep, fill_factor_limit))
547 df_combined = pd_concat([df for df in dfs], ignore_index=True)
548 dcref = list(dcs.values())[0]
549 else:
550 raise TypeError('dcs ({}) must be a data container with entropy data'
551 ' or be a list of data containers'
552 .format(type(dcs)))
554 # fetch entropy and density of states from data container(s)
555 df_density, _ = get_density_of_states_wl(dcs, fill_factor_limit)
557 # compute density for each row in data container if observable averages
558 # are to be computed
559 if observables is not None:
560 energy_spacing = dcref.ensemble_parameters['energy_spacing']
561 # NOTE: we rely on the indices of the df_density DataFrame to
562 # correspond to the energy scale! This is expected to be handled in
563 # the get_density_of_states function.
564 bins = list(np.array(np.round(df_combined.potential / energy_spacing), dtype=int))
565 data_density = [dens / bins.count(k) for k, dens in df_density.density[bins].items()]
567 enref = np.min(df_density.energy)
568 averages = []
569 for temperature in temperatures:
571 # mean and standard deviation of energy
572 boltz = np.exp(- (df_density.energy - enref) / temperature / boltzmann_constant)
573 sumint = np.sum(df_density.density * boltz)
574 en_mean = np.sum(df_density.energy * df_density.density * boltz) / sumint
575 en_std = np.sum(df_density.energy ** 2 * df_density.density * boltz) / sumint
576 en_std = np.sqrt(en_std - en_mean ** 2)
577 record = {'temperature': temperature,
578 'potential_mean': en_mean,
579 'potential_std': en_std}
581 # mean and standard deviation of other observables
582 if observables is not None:
583 boltz = np.exp(- (df_combined.potential - enref) / temperature / boltzmann_constant)
584 sumint = np.sum(data_density * boltz)
585 for obs in observables:
586 obs_mean = np.sum(data_density * boltz * df_combined[obs]) / sumint
587 obs_std = np.sum(data_density * boltz * df_combined[obs] ** 2) / sumint
588 obs_std = np.sqrt(obs_std - obs_mean ** 2)
589 record['{}_mean'.format(obs)] = obs_mean
590 record['{}_std'.format(obs)] = obs_std
592 averages.append(record)
594 return DataFrame.from_dict(averages)
597def get_average_cluster_vectors_wl(dcs: Union[WangLandauDataContainer, dict],
598 cluster_space: ClusterSpace,
599 temperatures: list[float],
600 boltzmann_constant: float = kB,
601 fill_factor_limit: float = None) -> DataFrame:
602 """Returns the average cluster vectors from a :ref:`Wang-Landau simulation
603 <wang_landau_ensemble>` for the temperatures specified.
605 Parameters
606 ----------
607 dcs
608 Data container(s), from which to extract density of states
609 as well as observables.
610 cluster_space
611 Cluster space to use for calculation of cluster vectors.
612 temperatures
613 Temperatures at which to compute the averages.
614 boltzmann_constant
615 Boltzmann constant :math:`k_B` in appropriate
616 units, i.e., units that are consistent
617 with the underlying cluster expansion
618 and the temperature units [default: eV/K].
619 fill_factor_limit
620 Use data recorded up to the point when the specified fill factor limit
621 was reached when computing the average cluster vectors. Otherwise use
622 data for the last state.
624 Raises
625 ------
626 ValueError
627 If the data container(s) do(es) not contain entropy data
628 from Wang-Landau simulation.
629 """
631 # fetch potential and structures
632 if isinstance(dcs, WangLandauDataContainer):
633 potential, trajectory = dcs.get('potential', 'trajectory',
634 fill_factor_limit=fill_factor_limit)
635 energy_spacing = dcs.ensemble_parameters['energy_spacing']
636 elif isinstance(dcs, dict) and isinstance(dcs[next(iter(dcs))], WangLandauDataContainer):
637 potential, trajectory = [], []
638 for dc in dcs.values():
639 p, t = dc.get('potential', 'trajectory', fill_factor_limit=fill_factor_limit)
640 potential.extend(p)
641 trajectory.extend(t)
642 energy_spacing = list(dcs.values())[0].ensemble_parameters['energy_spacing']
643 potential = np.array(potential)
644 else:
645 raise TypeError('dcs ({}) must be a data container with entropy data'
646 ' or be a list of data containers'
647 .format(type(dcs)))
649 # fetch entropy and density of states from data container(s)
650 df_density, _ = get_density_of_states_wl(dcs, fill_factor_limit)
652 # compute weighted density and cluster vector for each bin in energy
653 # range; the weighted density is the total density divided by the number
654 # of structures that fall in the respective bin
655 # NOTE: the following code relies on the indices of the df_density
656 # DataFrame to correspond to the energy scale. This is expected to be
657 # handled in the get_density_of_states function.
658 cvs = []
659 weighted_density = []
660 bins = list(np.array(np.round(potential / energy_spacing), dtype=int))
661 for k, structure in zip(bins, trajectory):
662 cvs.append(cluster_space.get_cluster_vector(structure))
663 weighted_density.append(df_density.density[k] / bins.count(k))
665 # compute mean and standard deviation (std) of temperature weighted
666 # cluster vector
667 averages = []
668 enref = np.min(potential)
669 for temperature in temperatures:
670 boltz = np.exp(- (potential - enref) / temperature / boltzmann_constant)
671 sumint = np.sum(weighted_density * boltz)
672 cv_mean = np.array([np.sum(weighted_density * boltz * cv) / sumint
673 for cv in np.transpose(cvs)])
674 cv_std = np.array([np.sum(weighted_density * boltz * cv ** 2) / sumint
675 for cv in np.transpose(cvs)])
676 cv_std = np.sqrt(cv_std - cv_mean ** 2)
677 record = {'temperature': temperature,
678 'cv_mean': cv_mean,
679 'cv_std': cv_std}
680 averages.append(record)
682 return DataFrame.from_dict(averages)