Coverage for mchammer/data_containers/wang_landau_data_container.py: 99%

235 statements  

« prev     ^ index     » next       coverage.py v7.10.1, created at 2025-09-14 04:08 +0000

1"""Definition of the Wang-Landau data container class.""" 

2 

3from warnings import warn 

4from collections import Counter, OrderedDict 

5from typing import Any, BinaryIO, Optional, TextIO, Union 

6 

7import numpy as np 

8import pandas as pd 

9 

10from ase.units import kB 

11from ase import Atoms 

12from pandas import DataFrame, concat as pd_concat 

13 

14from icet import ClusterSpace 

15from .base_data_container import BaseDataContainer 

16 

17 

18class WangLandauDataContainer(BaseDataContainer): 

19 """ 

20 Data container for storing information concerned with :ref:`Wang-Landau 

21 simulation <wang_landau_ensemble>` performed with :program:`mchammer`. 

22 

23 Parameters 

24 ---------- 

25 structure 

26 Reference atomic structure associated with the data container. 

27 ensemble_parameters 

28 Parameters associated with the underlying ensemble. 

29 metadata 

30 Metadata associated with the data container. 

31 """ 

32 

33 def _update_last_state(self, 

34 last_step: int, 

35 occupations: list[int], 

36 accepted_trials: int, 

37 random_state: tuple, 

38 fill_factor: float, 

39 fill_factor_history: dict[int, float], 

40 entropy_history: dict[int, dict[int, float]], 

41 histogram=dict[int, int], 

42 entropy=dict[int, float]): 

43 """Updates last state of the Wang-Landau simulation. 

44 

45 Parameters 

46 ---------- 

47 last_step 

48 Last trial step. 

49 occupations 

50 Occupation vector observed during the last trial step. 

51 accepted_trial 

52 Number of current accepted trial steps. 

53 random_state 

54 Tuple representing the last state of the random generator. 

55 fill_factor 

56 Fill factor of Wang-Landau algorithm. 

57 fill_factor_history 

58 Evolution of the fill factor of Wang-Landau algorithm (key=MC 

59 trial step, value=fill factor). 

60 entropy_history 

61 Evolution of the (relative) entropy accumulated during Wang-Landau 

62 simulation (key=MC trial step, value=(key=bin, value=entropy)). 

63 histogram 

64 Histogram of states visited during Wang-Landau simulation. 

65 entropy 

66 (Relative) entropy accumulated during Wang-Landau simulation. 

67 """ 

68 super()._update_last_state( 

69 last_step=last_step, 

70 occupations=occupations, 

71 accepted_trials=accepted_trials, 

72 random_state=random_state) 

73 self._last_state['fill_factor'] = fill_factor 

74 self._last_state['fill_factor_history'] = fill_factor_history 

75 self._last_state['entropy_history'] = entropy_history 

76 self._last_state['histogram'] = histogram 

77 self._last_state['entropy'] = entropy 

78 

79 @property 

80 def fill_factor(self) -> float: 

81 """ Final value of the fill factor in the Wang-Landau algorithm. """ 

82 return float(self._last_state['fill_factor']) 

83 

84 @property 

85 def fill_factor_history(self) -> DataFrame: 

86 """ Evolution of the fill factor in the Wang-Landau algorithm. """ 

87 return DataFrame({'mctrial': list(self._last_state['fill_factor_history'].keys()), 

88 'fill_factor': list(self._last_state['fill_factor_history'].values())}) 

89 

90 def get(self, 

91 *tags: str, 

92 fill_factor_limit: float = None) \ 

93 -> Union[np.ndarray, list[Atoms], tuple[np.ndarray, list[Atoms]]]: 

94 """Returns the accumulated data for the requested observables, 

95 including configurations stored in the data container. The latter 

96 can be achieved by including ``'trajectory'`` as one of the tags. 

97 

98 Parameters 

99 ---------- 

100 tags 

101 Names of the requested properties. 

102 fill_factor_limit 

103 Return data recorded up to the point when the specified fill 

104 factor limit was reached, or ``None`` if the entropy history is 

105 empty or the last fill factor is above the limit; otherwise 

106 return all data. 

107 

108 Raises 

109 ------ 

110 ValueError 

111 If :attr:`tags` is empty. 

112 ValueError 

113 If observables are requested that are not in the data container. 

114 

115 Examples 

116 -------- 

117 Below the :func:`get` method is 

118 illustrated but first we require a data container. 

119 

120 >>> from ase import Atoms 

121 >>> from icet import ClusterExpansion, ClusterSpace 

122 >>> from mchammer.calculators import ClusterExpansionCalculator 

123 >>> from mchammer.ensembles import WangLandauEnsemble 

124 

125 >>> # prepare cluster expansion 

126 >>> prim = Atoms('Au', positions=[[0, 0, 0]], cell=[1, 1, 10], pbc=True) 

127 >>> cs = ClusterSpace(prim, cutoffs=[1.1], chemical_symbols=['Ag', 'Au']) 

128 >>> ce = ClusterExpansion(cs, [0, 0, 2]) 

129 

130 >>> # prepare initial configuration 

131 >>> structure = prim.repeat((4, 4, 1)) 

132 >>> for k in range(8): 

133 ... structure[k].symbol = 'Ag' 

134 

135 >>> # set up and run Wang-Landau simulation 

136 >>> calculator = ClusterExpansionCalculator(structure, ce) 

137 >>> mc = WangLandauEnsemble(structure=structure, 

138 ... calculator=calculator, 

139 ... energy_spacing=1, 

140 ... dc_filename='ising_2d_run.dc', 

141 ... fill_factor_limit=0.3) 

142 >>> # N.B.: in practice one requires more steps 

143 >>> mc.run(number_of_trial_steps=len(structure) * 3000) 

144 

145 We can now access the data container by reading it from file 

146 by using the :func:`read` method. For the purpose of this 

147 example, however, we access the data container associated with 

148 the ensemble directly. 

149 

150 >>> dc = mc.data_container 

151 

152 The following lines illustrate how to use the :func:`get` method 

153 for extracting data from the data container. 

154 

155 >>> # obtain all values of the potential represented by 

156 >>> # the cluster expansion and the MC trial step along the 

157 >>> # trajectory 

158 >>> import matplotlib.pyplot as plt 

159 >>> s, p = dc.get('mctrial', 'potential') 

160 >>> _ = plt.plot(s, p) 

161 

162 >>> # as above but this time only included data recorded up to 

163 >>> # the point when the fill factor reached below 0.6 

164 >>> s, p = dc.get('mctrial', 'potential', fill_factor_limit=0.6) 

165 >>> _ = plt.plot(s, p) 

166 >>> plt.show(block=False) 

167 

168 >>> # obtain configurations along the trajectory along with 

169 >>> # their potential 

170 >>> p, confs = dc.get('potential', 'trajectory') 

171 

172 """ 

173 

174 if len(tags) == 0: 174 ↛ 175line 174 didn't jump to line 175 because the condition on line 174 was never true

175 raise TypeError('Missing tags argument') 

176 

177 local_tags = ['occupations' if tag == 'trajectory' else tag for tag in tags] 

178 

179 for tag in local_tags: 

180 if tag in 'mctrial': 

181 continue 

182 if tag not in self.observables: 182 ↛ 183line 182 didn't jump to line 183 because the condition on line 182 was never true

183 raise ValueError('No observable named {} in data container'.format(tag)) 

184 

185 # collect data 

186 mctrials = [row_dict['mctrial'] for row_dict in self._data_list] 

187 data = pd.DataFrame.from_records(self._data_list, index=mctrials, columns=local_tags) 

188 if fill_factor_limit is not None: 

189 # only include data for fill factors up to the limit 

190 df_ffh = self.fill_factor_history.astype( 

191 {'mctrial': np.int64, 'fill_factor': np.float64}) 

192 mctrial_last = df_ffh.loc[ 

193 df_ffh.fill_factor <= fill_factor_limit].mctrial.min() 

194 data = data.loc[data.index <= mctrial_last] 

195 data.dropna(inplace=True) 

196 

197 # handling of trajectory 

198 def occupation_to_atoms(occupation): 

199 structure = self.structure.copy() 

200 structure.numbers = occupation 

201 return structure 

202 

203 data_list = [] 

204 for tag in local_tags: 

205 if tag == 'occupations': 

206 traj = [occupation_to_atoms(o) for o in data['occupations']] 

207 data_list.append(traj) 

208 else: 

209 data_list.append(data[tag].values) 

210 

211 if len(data_list) > 1: 

212 return tuple(data_list) 

213 else: 

214 return data_list[0] 

215 

216 def get_entropy(self, fill_factor_limit: float = None) -> DataFrame: 

217 """Returns the (relative) entropy from this data container accumulated 

218 during a :ref:`Wang-Landau simulation <wang_landau_ensemble>`. Returns 

219 ``None`` if the data container does not contain the required 

220 information. 

221 

222 Parameters 

223 ---------- 

224 fill_factor_limit 

225 Return the entropy recorded up to the point when the specified fill 

226 factor limit was reached, or ``None`` if the entropy history is 

227 empty or the last fill factor is above the limit. Otherwise 

228 return the entropy for the last state. 

229 """ 

230 

231 if 'entropy' not in self._last_state: 

232 warn('There is no entropy information in the data container.') 

233 return None 

234 entropy = self._last_state['entropy'] 

235 if fill_factor_limit is not None: 

236 if 'entropy_history' not in self._last_state or \ 

237 len(self._last_state['entropy_history']) == 0: 

238 warn('The entropy history is empty.') 

239 return None 

240 if self._last_state['fill_factor'] > fill_factor_limit: 

241 warn('The last fill factor {} is higher than the limit' 

242 ' {}.'.format(self.fill_factor, fill_factor_limit)) 

243 return None 

244 for step, fill_factor in self._last_state['fill_factor_history'].items(): 244 ↛ 250line 244 didn't jump to line 250 because the loop on line 244 didn't complete

245 if fill_factor <= fill_factor_limit: 

246 entropy = self._last_state['entropy_history'][step] 

247 break 

248 

249 # compile entropy into DataFrame 

250 energy_spacing = self.ensemble_parameters['energy_spacing'] 

251 df = DataFrame(data={'energy': energy_spacing * np.array(list(entropy.keys())), 

252 'entropy': np.array(list(entropy.values()))}, 

253 index=list(entropy.keys())) 

254 # shift entropy for numerical stability 

255 df['entropy'] -= np.min(df['entropy']) 

256 

257 return df 

258 

259 def get_histogram(self) -> DataFrame: 

260 """Returns the histogram from this data container accumulated since the 

261 last update of the fill factor. Returns ``None`` if the data container 

262 does not contain the required information. 

263 """ 

264 

265 if 'histogram' not in self._last_state: 

266 return None 

267 

268 # compile histogram into DataFrame 

269 histogram = self._last_state['histogram'] 

270 energy_spacing = self.ensemble_parameters['energy_spacing'] 

271 df = DataFrame(data={'energy': energy_spacing * np.array(list(histogram.keys())), 

272 'histogram': np.array(list(histogram.values()))}, 

273 index=list(histogram.keys())) 

274 

275 return df 

276 

277 @classmethod 

278 # todo: cls and the return should be type hinted as BaseDataContainer. 

279 # Unfortunately, this requires from __future__ import annotations, which 

280 # in turn requires Python 3.8. 

281 def read(cls, infile: Union[str, BinaryIO, TextIO], old_format: bool = False): 

282 """Reads data container from file. 

283 

284 Parameters 

285 ---------- 

286 infile 

287 file from which to read 

288 old_format 

289 If true use old json format to read runtime data; default to false 

290 

291 Raises 

292 ------ 

293 FileNotFoundError 

294 if file is not found (str) 

295 ValueError 

296 if file is of incorrect type (not a tarball) 

297 """ 

298 dc = super(WangLandauDataContainer, cls).read(infile=infile, old_format=old_format) 

299 

300 for tag, value in dc._last_state.items(): 

301 if tag in ['histogram', 'entropy', 'fill_factor_history', 'entropy_history']: 

302 # the following accounts for the fact that the keys of dicts 

303 # are converted to str when writing to json and have to 

304 # converted back into numerical values 

305 dc._last_state[tag] = {} 

306 for key, val in value.items(): 

307 if isinstance(val, dict): 

308 val = {int(k): v for k, v in val.items()} 

309 dc._last_state[tag][int(key)] = val 

310 

311 return dc 

312 

313 

314def get_density_of_states_wl(dcs: Union[WangLandauDataContainer, 

315 dict[Any, WangLandauDataContainer]], 

316 fill_factor_limit: float = None) \ 

317 -> tuple[DataFrame, dict]: 

318 """Returns a pandas DataFrame with the total density of states from a 

319 :ref:`Wang-Landau simulation <wang_landau_ensemble>`. If a dict of data 

320 containers is provided the function also returns a dictionary that 

321 contains the standard deviation between the entropy of neighboring data 

322 containers in the overlap region. These errors should be small compared to 

323 the variation of the entropy across each bin. 

324 

325 The function can handle both a single data container and a dict thereof. 

326 In the latter case the data containers must cover a contiguous energy 

327 range and must at least partially overlap. 

328 

329 Parameters 

330 ---------- 

331 dcs 

332 Data container(s), from which to extract the density of states. 

333 fill_factor_limit 

334 Calculate the density of states using the entropy recorded up to the 

335 point when the specified fill factor limit was reached. Otherwise 

336 return the density of states for the last state. 

337 

338 Raises 

339 ------ 

340 TypeError 

341 If :attr:`dcs` does not correspond to not a single (dictionary) of data 

342 container(s) from which the entropy can retrieved. 

343 ValueError 

344 If the data container does not contain entropy information. 

345 ValueError 

346 If a fill factor limit has been provided and the data container either 

347 does not contain information about the entropy history or if the last 

348 fill factor is higher than the specified limit. 

349 ValueError 

350 If multiple data containers are provided and there are inconsistencies 

351 with regard to basic simulation parameters such as system size or 

352 energy spacing. 

353 ValueError 

354 If multiple data containers are provided and there is at least 

355 one energy region without overlap. 

356 """ 

357 

358 # preparations 

359 if isinstance(dcs, WangLandauDataContainer): 

360 # fetch raw entropy data from data container 

361 df = dcs.get_entropy(fill_factor_limit) 

362 if df is None: 

363 raise ValueError('Entropy information could not be retrieved from' 

364 ' the data container {}.'.format(dcs)) 

365 errors = None 

366 if len(dcs.fill_factor_history) == 0 or dcs.fill_factor > 1e-4: 

367 warn('The data container appears to contain data from an' 

368 ' underconverged Wang-Landau simulation.') 

369 

370 elif isinstance(dcs, dict) and isinstance(dcs[next(iter(dcs))], WangLandauDataContainer): 

371 # minimal consistency checks 

372 tags = list(dcs.keys()) 

373 tagref = tags[0] 

374 dcref = dcs[tagref] 

375 for tag in tags: 

376 dc = dcs[tag] 

377 if len(dc.structure) != len(dcref.structure): 

378 raise ValueError('Number of atoms differs between data containers ({}: {}, {}: {})' 

379 .format(tagref, dcref.ensemble_parameters['n_atoms'], 

380 tag, dc.ensemble_parameters['n_atoms'])) 

381 for param in ['energy_spacing', 'trial_move']: 

382 if dc.ensemble_parameters[param] != dcref.ensemble_parameters[param]: 

383 raise ValueError('{} differs between data containers ({}: {}, {}: {})' 

384 .format(param, 

385 tagref, dcref.ensemble_parameters['n_atoms'], 

386 tag, dc.ensemble_parameters['n_atoms'])) 

387 if len(dc.fill_factor_history) == 0 or dc.fill_factor > 1e-4: 

388 warn('Data container {} appears to contain data from an' 

389 ' underconverged Wang-Landau simulation.'.format(tag)) 

390 

391 # fetch raw entropy data from data containers 

392 entropies = {} 

393 for tag, dc in dcs.items(): 

394 entropies[tag] = dc.get_entropy(fill_factor_limit) 

395 if entropies[tag] is None: 

396 raise ValueError('Entropy information could not be retrieved' 

397 ' from the data container {}.'.format(dc)) 

398 

399 # sort entropies by energy 

400 entropies = OrderedDict(sorted(entropies.items(), key=lambda row: row[1].energy.iloc[0])) 

401 

402 # line up entropy data 

403 errors = {} 

404 tags = list(entropies.keys()) 

405 for tag1, tag2 in zip(tags[:-1], tags[1:]): 

406 df1 = entropies[tag1] 

407 df2 = entropies[tag2] 

408 if all(df2.energy.isin(df1.energy)): 

409 warn('Window {} is a subset of {}'.format(tag2, tag1)) 

410 left_lim = np.min(df2.energy) 

411 right_lim = np.max(df1.energy) 

412 if left_lim >= right_lim: 

413 raise ValueError('No overlap in the energy range {}...{}.\n' 

414 .format(right_lim, left_lim) + 

415 ' The closest data containers have tags "{}" and "{}".' 

416 .format(tag1, tag2)) 

417 df1_ = df1[(df1.energy >= left_lim) & (df1.energy <= right_lim)] 

418 df2_ = df2[(df2.energy >= left_lim) & (df2.energy <= right_lim)] 

419 offset = (df2_.entropy - df1_.entropy).mean() 

420 errors['{}-{}'.format(tag1, tag2)] = (df2_.entropy - df1_.entropy).std() 

421 entropies[tag2].entropy = entropies[tag2].entropy - offset 

422 

423 # compile entropy over the entire energy range 

424 data: dict[float, float] = {} 

425 indices = {} 

426 counts = Counter() 

427 for df in entropies.values(): 

428 for index, en, ent in zip(df.index, df.energy, df.entropy): 

429 data[en] = data.get(en, 0) + ent 

430 counts[en] += 1 

431 indices[en] = index 

432 for en in data: 

433 data[en] = data[en] / counts[en] 

434 

435 # center entropy to prevent possible numerical issues 

436 entmin = np.min(list(data.values())) 

437 df = DataFrame(data={'energy': np.array(list(data.keys())), 

438 'entropy': np.array(np.array(list(data.values()))) - entmin}, 

439 index=list(indices.values())) 

440 else: 

441 raise TypeError('dcs ({}) must be a data container with entropy data' 

442 ' or be a list of data containers' 

443 .format(type(dcs))) 

444 

445 # density of states 

446 S_max = df.entropy.max() 

447 df['density'] = np.exp(df.entropy - S_max) / np.sum(np.exp(df.entropy - S_max)) 

448 

449 return df, errors 

450 

451 

452def _extract_filter_data(dc: BaseDataContainer, 

453 columns_to_keep: list[str], 

454 fill_factor_limit: float = None) -> DataFrame: 

455 """ Extract data from a data container and filter the content. 

456 

457 Parameters 

458 ---------- 

459 dc 

460 Data container from which to extract the data. 

461 columns_to_keep 

462 list of requested properties. 

463 fill_factor_limit 

464 Only include data recorded up to the point when the specified fill 

465 factor limit was reached when computing averages. Otherwise include 

466 all data. 

467 """ 

468 

469 df = dc.data 

470 if fill_factor_limit is not None: 

471 # only include data for fill factors up to the limit 

472 df_ffh = dc.fill_factor_history.astype( 

473 {'mctrial': np.int64, 'fill_factor': np.float64}) 

474 mctrial_last = df_ffh.loc[ 

475 df_ffh.fill_factor <= fill_factor_limit].mctrial.min() 

476 df = df.loc[df.mctrial <= mctrial_last] 

477 

478 return df.filter(columns_to_keep) 

479 

480 

481def get_average_observables_wl(dcs: Union[WangLandauDataContainer, 

482 dict[Any, WangLandauDataContainer]], 

483 temperatures: list[float], 

484 observables: list[str] = None, 

485 boltzmann_constant: float = kB, 

486 fill_factor_limit: float = None) -> DataFrame: 

487 """Returns the average and the standard deviation of the energy from a 

488 :ref:`Wang-Landau simulation <wang_landau_ensemble>` for the temperatures 

489 specified. If the :attr:`observables` keyword argument is specified 

490 the function will also return the mean and standard deviation of the 

491 specified observables. 

492 

493 Parameters 

494 ---------- 

495 dcs 

496 Data container(s) from which to extract density of states 

497 as well as observables. 

498 temperatures 

499 Temperatures at which to compute the averages. 

500 observables 

501 Observables for which to compute averages; the observables 

502 must refer to fields in the data container. 

503 boltzmann_constant 

504 Boltzmann constant :math:`k_B` in appropriate 

505 units, i.e., units that are consistent 

506 with the underlying cluster expansion 

507 and the temperature units [default: eV/K]. 

508 fill_factor_limit 

509 Use data recorded up to the point when the specified fill factor limit 

510 was reached when computing averages. Otherwise use data for the last 

511 state. 

512 

513 Raises 

514 ------ 

515 ValueError 

516 If the data container(s) do(es) not contain entropy data 

517 from Wang-Landau simulation. 

518 ValueError 

519 If data container(s) do(es) not contain requested observable. 

520 """ 

521 

522 def check_observables(dc: WangLandauDataContainer, observables: Optional[list[str]]) -> None: 

523 """ Helper function that checks that observables are available in data frame. """ 

524 if observables is None: 

525 return 

526 for obs in observables: 

527 if obs not in dc.data.columns: 

528 raise ValueError('Observable ({}) not in data container.\n' 

529 'Available observables: {}'.format(obs, dc.data.columns)) 

530 

531 # preparation of observables 

532 columns_to_keep = ['potential', 'density'] 

533 if observables is not None: 

534 columns_to_keep.extend(observables) 

535 

536 # check that observables are available in data container 

537 # and prepare comprehensive data frame with relevant information 

538 if isinstance(dcs, WangLandauDataContainer): 

539 check_observables(dcs, observables) 

540 df_combined = _extract_filter_data(dcs, columns_to_keep, fill_factor_limit) 

541 dcref = dcs 

542 elif isinstance(dcs, dict) and isinstance(dcs[next(iter(dcs))], WangLandauDataContainer): 

543 dfs = [] 

544 for dc in dcs.values(): 

545 check_observables(dc, observables) 

546 dfs.append(_extract_filter_data(dc, columns_to_keep, fill_factor_limit)) 

547 df_combined = pd_concat([df for df in dfs], ignore_index=True) 

548 dcref = list(dcs.values())[0] 

549 else: 

550 raise TypeError('dcs ({}) must be a data container with entropy data' 

551 ' or be a list of data containers' 

552 .format(type(dcs))) 

553 

554 # fetch entropy and density of states from data container(s) 

555 df_density, _ = get_density_of_states_wl(dcs, fill_factor_limit) 

556 

557 # compute density for each row in data container if observable averages 

558 # are to be computed 

559 if observables is not None: 

560 energy_spacing = dcref.ensemble_parameters['energy_spacing'] 

561 # NOTE: we rely on the indices of the df_density DataFrame to 

562 # correspond to the energy scale! This is expected to be handled in 

563 # the get_density_of_states function. 

564 bins = list(np.array(np.round(df_combined.potential / energy_spacing), dtype=int)) 

565 data_density = [dens / bins.count(k) for k, dens in df_density.density[bins].items()] 

566 

567 enref = np.min(df_density.energy) 

568 averages = [] 

569 for temperature in temperatures: 

570 

571 # mean and standard deviation of energy 

572 boltz = np.exp(- (df_density.energy - enref) / temperature / boltzmann_constant) 

573 sumint = np.sum(df_density.density * boltz) 

574 en_mean = np.sum(df_density.energy * df_density.density * boltz) / sumint 

575 en_std = np.sum(df_density.energy ** 2 * df_density.density * boltz) / sumint 

576 en_std = np.sqrt(en_std - en_mean ** 2) 

577 record = {'temperature': temperature, 

578 'potential_mean': en_mean, 

579 'potential_std': en_std} 

580 

581 # mean and standard deviation of other observables 

582 if observables is not None: 

583 boltz = np.exp(- (df_combined.potential - enref) / temperature / boltzmann_constant) 

584 sumint = np.sum(data_density * boltz) 

585 for obs in observables: 

586 obs_mean = np.sum(data_density * boltz * df_combined[obs]) / sumint 

587 obs_std = np.sum(data_density * boltz * df_combined[obs] ** 2) / sumint 

588 obs_std = np.sqrt(obs_std - obs_mean ** 2) 

589 record['{}_mean'.format(obs)] = obs_mean 

590 record['{}_std'.format(obs)] = obs_std 

591 

592 averages.append(record) 

593 

594 return DataFrame.from_dict(averages) 

595 

596 

597def get_average_cluster_vectors_wl(dcs: Union[WangLandauDataContainer, dict], 

598 cluster_space: ClusterSpace, 

599 temperatures: list[float], 

600 boltzmann_constant: float = kB, 

601 fill_factor_limit: float = None) -> DataFrame: 

602 """Returns the average cluster vectors from a :ref:`Wang-Landau simulation 

603 <wang_landau_ensemble>` for the temperatures specified. 

604 

605 Parameters 

606 ---------- 

607 dcs 

608 Data container(s), from which to extract density of states 

609 as well as observables. 

610 cluster_space 

611 Cluster space to use for calculation of cluster vectors. 

612 temperatures 

613 Temperatures at which to compute the averages. 

614 boltzmann_constant 

615 Boltzmann constant :math:`k_B` in appropriate 

616 units, i.e., units that are consistent 

617 with the underlying cluster expansion 

618 and the temperature units [default: eV/K]. 

619 fill_factor_limit 

620 Use data recorded up to the point when the specified fill factor limit 

621 was reached when computing the average cluster vectors. Otherwise use 

622 data for the last state. 

623 

624 Raises 

625 ------ 

626 ValueError 

627 If the data container(s) do(es) not contain entropy data 

628 from Wang-Landau simulation. 

629 """ 

630 

631 # fetch potential and structures 

632 if isinstance(dcs, WangLandauDataContainer): 

633 potential, trajectory = dcs.get('potential', 'trajectory', 

634 fill_factor_limit=fill_factor_limit) 

635 energy_spacing = dcs.ensemble_parameters['energy_spacing'] 

636 elif isinstance(dcs, dict) and isinstance(dcs[next(iter(dcs))], WangLandauDataContainer): 

637 potential, trajectory = [], [] 

638 for dc in dcs.values(): 

639 p, t = dc.get('potential', 'trajectory', fill_factor_limit=fill_factor_limit) 

640 potential.extend(p) 

641 trajectory.extend(t) 

642 energy_spacing = list(dcs.values())[0].ensemble_parameters['energy_spacing'] 

643 potential = np.array(potential) 

644 else: 

645 raise TypeError('dcs ({}) must be a data container with entropy data' 

646 ' or be a list of data containers' 

647 .format(type(dcs))) 

648 

649 # fetch entropy and density of states from data container(s) 

650 df_density, _ = get_density_of_states_wl(dcs, fill_factor_limit) 

651 

652 # compute weighted density and cluster vector for each bin in energy 

653 # range; the weighted density is the total density divided by the number 

654 # of structures that fall in the respective bin 

655 # NOTE: the following code relies on the indices of the df_density 

656 # DataFrame to correspond to the energy scale. This is expected to be 

657 # handled in the get_density_of_states function. 

658 cvs = [] 

659 weighted_density = [] 

660 bins = list(np.array(np.round(potential / energy_spacing), dtype=int)) 

661 for k, structure in zip(bins, trajectory): 

662 cvs.append(cluster_space.get_cluster_vector(structure)) 

663 weighted_density.append(df_density.density[k] / bins.count(k)) 

664 

665 # compute mean and standard deviation (std) of temperature weighted 

666 # cluster vector 

667 averages = [] 

668 enref = np.min(potential) 

669 for temperature in temperatures: 

670 boltz = np.exp(- (potential - enref) / temperature / boltzmann_constant) 

671 sumint = np.sum(weighted_density * boltz) 

672 cv_mean = np.array([np.sum(weighted_density * boltz * cv) / sumint 

673 for cv in np.transpose(cvs)]) 

674 cv_std = np.array([np.sum(weighted_density * boltz * cv ** 2) / sumint 

675 for cv in np.transpose(cvs)]) 

676 cv_std = np.sqrt(cv_std - cv_mean ** 2) 

677 record = {'temperature': temperature, 

678 'cv_mean': cv_mean, 

679 'cv_std': cv_std} 

680 averages.append(record) 

681 

682 return DataFrame.from_dict(averages)