Coverage for mchammer/data_containers/wang_landau_data_container.py: 99%

235 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-05-06 04:14 +0000

1"""Definition of the Wang-Landau data container class.""" 

2 

3from warnings import warn 

4from collections import Counter, OrderedDict 

5from typing import Any, BinaryIO, Dict, List, Optional, TextIO, Tuple, Union 

6 

7import numpy as np 

8import pandas as pd 

9 

10from ase.units import kB 

11from ase import Atoms 

12from pandas import DataFrame, concat as pd_concat 

13 

14from icet import ClusterSpace 

15from .base_data_container import BaseDataContainer 

16 

17 

18class WangLandauDataContainer(BaseDataContainer): 

19 """ 

20 Data container for storing information concerned with :ref:`Wang-Landau 

21 simulation <wang_landau_ensemble>` performed with :program:`mchammer`. 

22 

23 Parameters 

24 ---------- 

25 structure 

26 Reference atomic structure associated with the data container. 

27 ensemble_parameters 

28 Parameters associated with the underlying ensemble. 

29 metadata 

30 Metadata associated with the data container. 

31 """ 

32 

33 def _update_last_state(self, 

34 last_step: int, 

35 occupations: List[int], 

36 accepted_trials: int, 

37 random_state: tuple, 

38 fill_factor: float, 

39 fill_factor_history: Dict[int, float], 

40 entropy_history: Dict[int, Dict[int, float]], 

41 histogram=Dict[int, int], 

42 entropy=Dict[int, float]): 

43 """Updates last state of the Wang-Landau simulation. 

44 

45 Parameters 

46 ---------- 

47 last_step 

48 Last trial step. 

49 occupations 

50 Occupation vector observed during the last trial step. 

51 accepted_trial 

52 Number of current accepted trial steps. 

53 random_state 

54 Tuple representing the last state of the random generator. 

55 fill_factor 

56 Fill factor of Wang-Landau algorithm. 

57 fill_factor_history 

58 Evolution of the fill factor of Wang-Landau algorithm (key=MC 

59 trial step, value=fill factor). 

60 entropy_history 

61 Evolution of the (relative) entropy accumulated during Wang-Landau 

62 simulation (key=MC trial step, value=(key=bin, value=entropy)). 

63 histogram 

64 Histogram of states visited during Wang-Landau simulation. 

65 entropy 

66 (Relative) entropy accumulated during Wang-Landau simulation. 

67 """ 

68 super()._update_last_state( 

69 last_step=last_step, 

70 occupations=occupations, 

71 accepted_trials=accepted_trials, 

72 random_state=random_state) 

73 self._last_state['fill_factor'] = fill_factor 

74 self._last_state['fill_factor_history'] = fill_factor_history 

75 self._last_state['entropy_history'] = entropy_history 

76 self._last_state['histogram'] = histogram 

77 self._last_state['entropy'] = entropy 

78 

79 @property 

80 def fill_factor(self) -> float: 

81 """ Final value of the fill factor in the Wang-Landau algorithm. """ 

82 return float(self._last_state['fill_factor']) 

83 

84 @property 

85 def fill_factor_history(self) -> DataFrame: 

86 """ Evolution of the fill factor in the Wang-Landau algorithm. """ 

87 return DataFrame({'mctrial': list(self._last_state['fill_factor_history'].keys()), 

88 'fill_factor': list(self._last_state['fill_factor_history'].values())}) 

89 

90 def get(self, 

91 *tags: str, 

92 fill_factor_limit: float = None) \ 

93 -> Union[np.ndarray, List[Atoms], Tuple[np.ndarray, List[Atoms]]]: 

94 """Returns the accumulated data for the requested observables, 

95 including configurations stored in the data container. The latter 

96 can be achieved by including ``'trajectory'`` as one of the tags. 

97 

98 Parameters 

99 ---------- 

100 tags 

101 Names of the requested properties. 

102 fill_factor_limit 

103 Return data recorded up to the point when the specified fill 

104 factor limit was reached, or ``None`` if the entropy history is 

105 empty or the last fill factor is above the limit; otherwise 

106 return all data. 

107 

108 Raises 

109 ------ 

110 ValueError 

111 If :attr:`tags` is empty. 

112 ValueError 

113 If observables are requested that are not in the data container. 

114 

115 Examples 

116 -------- 

117 Below the :func:`get` method is 

118 illustrated but first we require a data container. 

119 

120 >>> from ase import Atoms 

121 >>> from icet import ClusterExpansion, ClusterSpace 

122 >>> from mchammer.calculators import ClusterExpansionCalculator 

123 >>> from mchammer.ensembles import WangLandauEnsemble 

124 

125 >>> # prepare cluster expansion 

126 >>> prim = Atoms('Au', positions=[[0, 0, 0]], cell=[1, 1, 10], pbc=True) 

127 >>> cs = ClusterSpace(prim, cutoffs=[1.1], chemical_symbols=['Ag', 'Au']) 

128 >>> ce = ClusterExpansion(cs, [0, 0, 2]) 

129 

130 >>> # prepare initial configuration 

131 >>> structure = prim.repeat((4, 4, 1)) 

132 >>> for k in range(8): 

133 ... structure[k].symbol = 'Ag' 

134 

135 >>> # set up and run Wang-Landau simulation 

136 >>> calculator = ClusterExpansionCalculator(structure, ce) 

137 >>> mc = WangLandauEnsemble(structure=structure, 

138 ... calculator=calculator, 

139 ... energy_spacing=1, 

140 ... dc_filename='ising_2d_run.dc', 

141 ... fill_factor_limit=0.3) 

142 >>> mc.run(number_of_trial_steps=len(structure)*3000) # in practice one requires more steps 

143 

144 We can now access the data container by reading it from file 

145 by using the :func:`read` method. For the purpose of this 

146 example, however, we access the data container associated with 

147 the ensemble directly. 

148 

149 >>> dc = mc.data_container 

150 

151 The following lines illustrate how to use the :func:`get` method 

152 for extracting data from the data container. 

153 

154 >>> # obtain all values of the potential represented by 

155 >>> # the cluster expansion and the MC trial step along the 

156 >>> # trajectory 

157 >>> import matplotlib.pyplot as plt 

158 >>> s, p = dc.get('mctrial', 'potential') 

159 >>> _ = plt.plot(s, p) 

160 

161 >>> # as above but this time only included data recorded up to 

162 >>> # the point when the fill factor reached below 0.6 

163 >>> s, p = dc.get('mctrial', 'potential', fill_factor_limit=0.6) 

164 >>> _ = plt.plot(s, p) 

165 >>> plt.show(block=False) 

166 

167 >>> # obtain configurations along the trajectory along with 

168 >>> # their potential 

169 >>> p, confs = dc.get('potential', 'trajectory') 

170 

171 """ 

172 

173 if len(tags) == 0: 173 ↛ 174line 173 didn't jump to line 174, because the condition on line 173 was never true

174 raise TypeError('Missing tags argument') 

175 

176 local_tags = ['occupations' if tag == 'trajectory' else tag for tag in tags] 

177 

178 for tag in local_tags: 

179 if tag in 'mctrial': 

180 continue 

181 if tag not in self.observables: 181 ↛ 182line 181 didn't jump to line 182, because the condition on line 181 was never true

182 raise ValueError('No observable named {} in data container'.format(tag)) 

183 

184 # collect data 

185 mctrials = [row_dict['mctrial'] for row_dict in self._data_list] 

186 data = pd.DataFrame.from_records(self._data_list, index=mctrials, columns=local_tags) 

187 if fill_factor_limit is not None: 

188 # only include data for fill factors up to the limit 

189 df_ffh = self.fill_factor_history.astype( 

190 {'mctrial': np.int64, 'fill_factor': np.float64}) 

191 mctrial_last = df_ffh.loc[ 

192 df_ffh.fill_factor <= fill_factor_limit].mctrial.min() 

193 data = data.loc[data.index <= mctrial_last] 

194 data.dropna(inplace=True) 

195 

196 # handling of trajectory 

197 def occupation_to_atoms(occupation): 

198 structure = self.structure.copy() 

199 structure.numbers = occupation 

200 return structure 

201 

202 data_list = [] 

203 for tag in local_tags: 

204 if tag == 'occupations': 

205 traj = [occupation_to_atoms(o) for o in data['occupations']] 

206 data_list.append(traj) 

207 else: 

208 data_list.append(data[tag].values) 

209 

210 if len(data_list) > 1: 

211 return tuple(data_list) 

212 else: 

213 return data_list[0] 

214 

215 def get_entropy(self, fill_factor_limit: float = None) -> DataFrame: 

216 """Returns the (relative) entropy from this data container accumulated 

217 during a :ref:`Wang-Landau simulation <wang_landau_ensemble>`. Returns 

218 ``None`` if the data container does not contain the required 

219 information. 

220 

221 Parameters 

222 ---------- 

223 fill_factor_limit 

224 Return the entropy recorded up to the point when the specified fill 

225 factor limit was reached, or ``None`` if the entropy history is 

226 empty or the last fill factor is above the limit. Otherwise 

227 return the entropy for the last state. 

228 """ 

229 

230 if 'entropy' not in self._last_state: 

231 warn('There is no entropy information in the data container.') 

232 return None 

233 entropy = self._last_state['entropy'] 

234 if fill_factor_limit is not None: 

235 if 'entropy_history' not in self._last_state or \ 

236 len(self._last_state['entropy_history']) == 0: 

237 warn('The entropy history is empty.') 

238 return None 

239 if self._last_state['fill_factor'] > fill_factor_limit: 

240 warn('The last fill factor {} is higher than the limit' 

241 ' {}.'.format(self.fill_factor, fill_factor_limit)) 

242 return None 

243 for step, fill_factor in self._last_state['fill_factor_history'].items(): 243 ↛ 249line 243 didn't jump to line 249, because the loop on line 243 didn't complete

244 if fill_factor <= fill_factor_limit: 

245 entropy = self._last_state['entropy_history'][step] 

246 break 

247 

248 # compile entropy into DataFrame 

249 energy_spacing = self.ensemble_parameters['energy_spacing'] 

250 df = DataFrame(data={'energy': energy_spacing * np.array(list(entropy.keys())), 

251 'entropy': np.array(list(entropy.values()))}, 

252 index=list(entropy.keys())) 

253 # shift entropy for numerical stability 

254 df['entropy'] -= np.min(df['entropy']) 

255 

256 return df 

257 

258 def get_histogram(self) -> DataFrame: 

259 """Returns the histogram from this data container accumulated since the 

260 last update of the fill factor. Returns ``None`` if the data container 

261 does not contain the required information. 

262 """ 

263 

264 if 'histogram' not in self._last_state: 

265 return None 

266 

267 # compile histogram into DataFrame 

268 histogram = self._last_state['histogram'] 

269 energy_spacing = self.ensemble_parameters['energy_spacing'] 

270 df = DataFrame(data={'energy': energy_spacing * np.array(list(histogram.keys())), 

271 'histogram': np.array(list(histogram.values()))}, 

272 index=list(histogram.keys())) 

273 

274 return df 

275 

276 @classmethod 

277 # todo: cls and the return should be type hinted as BaseDataContainer. 

278 # Unfortunately, this requires from __future__ import annotations, which 

279 # in turn requires Python 3.8. 

280 def read(cls, infile: Union[str, BinaryIO, TextIO], old_format: bool = False): 

281 """Reads data container from file. 

282 

283 Parameters 

284 ---------- 

285 infile 

286 file from which to read 

287 old_format 

288 If true use old json format to read runtime data; default to false 

289 

290 Raises 

291 ------ 

292 FileNotFoundError 

293 if file is not found (str) 

294 ValueError 

295 if file is of incorrect type (not a tarball) 

296 """ 

297 dc = super(WangLandauDataContainer, cls).read(infile=infile, old_format=old_format) 

298 

299 for tag, value in dc._last_state.items(): 

300 if tag in ['histogram', 'entropy', 'fill_factor_history', 'entropy_history']: 

301 # the following accounts for the fact that the keys of dicts 

302 # are converted to str when writing to json and have to 

303 # converted back into numerical values 

304 dc._last_state[tag] = {} 

305 for key, val in value.items(): 

306 if isinstance(val, dict): 

307 val = {int(k): v for k, v in val.items()} 

308 dc._last_state[tag][int(key)] = val 

309 

310 return dc 

311 

312 

313def get_density_of_states_wl(dcs: Union[WangLandauDataContainer, 

314 Dict[Any, WangLandauDataContainer]], 

315 fill_factor_limit: float = None) \ 

316 -> Tuple[DataFrame, dict]: 

317 """Returns a pandas DataFrame with the total density of states from a 

318 :ref:`Wang-Landau simulation <wang_landau_ensemble>`. If a dict of data 

319 containers is provided the function also returns a dictionary that 

320 contains the standard deviation between the entropy of neighboring data 

321 containers in the overlap region. These errors should be small compared to 

322 the variation of the entropy across each bin. 

323 

324 The function can handle both a single data container and a dict thereof. 

325 In the latter case the data containers must cover a contiguous energy 

326 range and must at least partially overlap. 

327 

328 Parameters 

329 ---------- 

330 dcs 

331 Data container(s), from which to extract the density of states. 

332 fill_factor_limit 

333 Calculate the density of states using the entropy recorded up to the 

334 point when the specified fill factor limit was reached. Otherwise 

335 return the density of states for the last state. 

336 

337 Raises 

338 ------ 

339 TypeError 

340 If :attr:`dcs` does not correspond to not a single (dictionary) of data 

341 container(s) from which the entropy can retrieved. 

342 ValueError 

343 If the data container does not contain entropy information. 

344 ValueError 

345 If a fill factor limit has been provided and the data container either 

346 does not contain information about the entropy history or if the last 

347 fill factor is higher than the specified limit. 

348 ValueError 

349 If multiple data containers are provided and there are inconsistencies 

350 with regard to basic simulation parameters such as system size or 

351 energy spacing. 

352 ValueError 

353 If multiple data containers are provided and there is at least 

354 one energy region without overlap. 

355 """ 

356 

357 # preparations 

358 if isinstance(dcs, WangLandauDataContainer): 

359 # fetch raw entropy data from data container 

360 df = dcs.get_entropy(fill_factor_limit) 

361 if df is None: 

362 raise ValueError('Entropy information could not be retrieved from' 

363 ' the data container {}.'.format(dcs)) 

364 errors = None 

365 if len(dcs.fill_factor_history) == 0 or dcs.fill_factor > 1e-4: 

366 warn('The data container appears to contain data from an' 

367 ' underconverged Wang-Landau simulation.') 

368 

369 elif isinstance(dcs, dict) and isinstance(dcs[next(iter(dcs))], WangLandauDataContainer): 

370 # minimal consistency checks 

371 tags = list(dcs.keys()) 

372 tagref = tags[0] 

373 dcref = dcs[tagref] 

374 for tag in tags: 

375 dc = dcs[tag] 

376 if len(dc.structure) != len(dcref.structure): 

377 raise ValueError('Number of atoms differs between data containers ({}: {}, {}: {})' 

378 .format(tagref, dcref.ensemble_parameters['n_atoms'], 

379 tag, dc.ensemble_parameters['n_atoms'])) 

380 for param in ['energy_spacing', 'trial_move']: 

381 if dc.ensemble_parameters[param] != dcref.ensemble_parameters[param]: 

382 raise ValueError('{} differs between data containers ({}: {}, {}: {})' 

383 .format(param, 

384 tagref, dcref.ensemble_parameters['n_atoms'], 

385 tag, dc.ensemble_parameters['n_atoms'])) 

386 if len(dc.fill_factor_history) == 0 or dc.fill_factor > 1e-4: 

387 warn('Data container {} appears to contain data from an' 

388 ' underconverged Wang-Landau simulation.'.format(tag)) 

389 

390 # fetch raw entropy data from data containers 

391 entropies = {} 

392 for tag, dc in dcs.items(): 

393 entropies[tag] = dc.get_entropy(fill_factor_limit) 

394 if entropies[tag] is None: 

395 raise ValueError('Entropy information could not be retrieved' 

396 ' from the data container {}.'.format(dc)) 

397 

398 # sort entropies by energy 

399 entropies = OrderedDict(sorted(entropies.items(), key=lambda row: row[1].energy.iloc[0])) 

400 

401 # line up entropy data 

402 errors = {} 

403 tags = list(entropies.keys()) 

404 for tag1, tag2 in zip(tags[:-1], tags[1:]): 

405 df1 = entropies[tag1] 

406 df2 = entropies[tag2] 

407 if all(df2.energy.isin(df1.energy)): 

408 warn('Window {} is a subset of {}'.format(tag2, tag1)) 

409 left_lim = np.min(df2.energy) 

410 right_lim = np.max(df1.energy) 

411 if left_lim >= right_lim: 

412 raise ValueError('No overlap in the energy range {}...{}.\n' 

413 .format(right_lim, left_lim) + 

414 ' The closest data containers have tags "{}" and "{}".' 

415 .format(tag1, tag2)) 

416 df1_ = df1[(df1.energy >= left_lim) & (df1.energy <= right_lim)] 

417 df2_ = df2[(df2.energy >= left_lim) & (df2.energy <= right_lim)] 

418 offset = (df2_.entropy - df1_.entropy).mean() 

419 errors['{}-{}'.format(tag1, tag2)] = (df2_.entropy - df1_.entropy).std() 

420 entropies[tag2].entropy = entropies[tag2].entropy - offset 

421 

422 # compile entropy over the entire energy range 

423 data: Dict[float, float] = {} 

424 indices = {} 

425 counts = Counter() 

426 for df in entropies.values(): 

427 for index, en, ent in zip(df.index, df.energy, df.entropy): 

428 data[en] = data.get(en, 0) + ent 

429 counts[en] += 1 

430 indices[en] = index 

431 for en in data: 

432 data[en] = data[en] / counts[en] 

433 

434 # center entropy to prevent possible numerical issues 

435 entmin = np.min(list(data.values())) 

436 df = DataFrame(data={'energy': np.array(list(data.keys())), 

437 'entropy': np.array(np.array(list(data.values()))) - entmin}, 

438 index=list(indices.values())) 

439 else: 

440 raise TypeError('dcs ({}) must be a data container with entropy data' 

441 ' or be a list of data containers' 

442 .format(type(dcs))) 

443 

444 # density of states 

445 S_max = df.entropy.max() 

446 df['density'] = np.exp(df.entropy - S_max) / np.sum(np.exp(df.entropy - S_max)) 

447 

448 return df, errors 

449 

450 

451def _extract_filter_data(dc: BaseDataContainer, 

452 columns_to_keep: List[str], 

453 fill_factor_limit: float = None) -> DataFrame: 

454 """ Extract data from a data container and filter the content. 

455 

456 Parameters 

457 ---------- 

458 dc 

459 Data container from which to extract the data. 

460 columns_to_keep 

461 List of requested properties. 

462 fill_factor_limit 

463 Only include data recorded up to the point when the specified fill 

464 factor limit was reached when computing averages. Otherwise include 

465 all data. 

466 """ 

467 

468 df = dc.data 

469 if fill_factor_limit is not None: 

470 # only include data for fill factors up to the limit 

471 df_ffh = dc.fill_factor_history.astype( 

472 {'mctrial': np.int64, 'fill_factor': np.float64}) 

473 mctrial_last = df_ffh.loc[ 

474 df_ffh.fill_factor <= fill_factor_limit].mctrial.min() 

475 df = df.loc[df.mctrial <= mctrial_last] 

476 

477 return df.filter(columns_to_keep) 

478 

479 

480def get_average_observables_wl(dcs: Union[WangLandauDataContainer, 

481 Dict[Any, WangLandauDataContainer]], 

482 temperatures: List[float], 

483 observables: List[str] = None, 

484 boltzmann_constant: float = kB, 

485 fill_factor_limit: float = None) -> DataFrame: 

486 """Returns the average and the standard deviation of the energy from a 

487 :ref:`Wang-Landau simulation <wang_landau_ensemble>` for the temperatures 

488 specified. If the :attr:`observables` keyword argument is specified 

489 the function will also return the mean and standard deviation of the 

490 specified observables. 

491 

492 Parameters 

493 ---------- 

494 dcs 

495 Data container(s) from which to extract density of states 

496 as well as observables. 

497 temperatures 

498 Temperatures at which to compute the averages. 

499 observables 

500 Observables for which to compute averages; the observables 

501 must refer to fields in the data container. 

502 boltzmann_constant 

503 Boltzmann constant :math:`k_B` in appropriate 

504 units, i.e., units that are consistent 

505 with the underlying cluster expansion 

506 and the temperature units [default: eV/K]. 

507 fill_factor_limit 

508 Use data recorded up to the point when the specified fill factor limit 

509 was reached when computing averages. Otherwise use data for the last 

510 state. 

511 

512 Raises 

513 ------ 

514 ValueError 

515 If the data container(s) do(es) not contain entropy data 

516 from Wang-Landau simulation. 

517 ValueError 

518 If data container(s) do(es) not contain requested observable. 

519 """ 

520 

521 def check_observables(dc: WangLandauDataContainer, observables: Optional[List[str]]) -> None: 

522 """ Helper function that checks that observables are available in data frame. """ 

523 if observables is None: 

524 return 

525 for obs in observables: 

526 if obs not in dc.data.columns: 

527 raise ValueError('Observable ({}) not in data container.\n' 

528 'Available observables: {}'.format(obs, dc.data.columns)) 

529 

530 # preparation of observables 

531 columns_to_keep = ['potential', 'density'] 

532 if observables is not None: 

533 columns_to_keep.extend(observables) 

534 

535 # check that observables are available in data container 

536 # and prepare comprehensive data frame with relevant information 

537 if isinstance(dcs, WangLandauDataContainer): 

538 check_observables(dcs, observables) 

539 df_combined = _extract_filter_data(dcs, columns_to_keep, fill_factor_limit) 

540 dcref = dcs 

541 elif isinstance(dcs, dict) and isinstance(dcs[next(iter(dcs))], WangLandauDataContainer): 

542 dfs = [] 

543 for dc in dcs.values(): 

544 check_observables(dc, observables) 

545 dfs.append(_extract_filter_data(dc, columns_to_keep, fill_factor_limit)) 

546 df_combined = pd_concat([df for df in dfs], ignore_index=True) 

547 dcref = list(dcs.values())[0] 

548 else: 

549 raise TypeError('dcs ({}) must be a data container with entropy data' 

550 ' or be a list of data containers' 

551 .format(type(dcs))) 

552 

553 # fetch entropy and density of states from data container(s) 

554 df_density, _ = get_density_of_states_wl(dcs, fill_factor_limit) 

555 

556 # compute density for each row in data container if observable averages 

557 # are to be computed 

558 if observables is not None: 

559 energy_spacing = dcref.ensemble_parameters['energy_spacing'] 

560 # NOTE: we rely on the indices of the df_density DataFrame to 

561 # correspond to the energy scale! This is expected to be handled in 

562 # the get_density_of_states function. 

563 bins = list(np.array(np.round(df_combined.potential / energy_spacing), dtype=int)) 

564 data_density = [dens / bins.count(k) for k, dens in df_density.density[bins].items()] 

565 

566 enref = np.min(df_density.energy) 

567 averages = [] 

568 for temperature in temperatures: 

569 

570 # mean and standard deviation of energy 

571 boltz = np.exp(- (df_density.energy - enref) / temperature / boltzmann_constant) 

572 sumint = np.sum(df_density.density * boltz) 

573 en_mean = np.sum(df_density.energy * df_density.density * boltz) / sumint 

574 en_std = np.sum(df_density.energy ** 2 * df_density.density * boltz) / sumint 

575 en_std = np.sqrt(en_std - en_mean ** 2) 

576 record = {'temperature': temperature, 

577 'potential_mean': en_mean, 

578 'potential_std': en_std} 

579 

580 # mean and standard deviation of other observables 

581 if observables is not None: 

582 boltz = np.exp(- (df_combined.potential - enref) / temperature / boltzmann_constant) 

583 sumint = np.sum(data_density * boltz) 

584 for obs in observables: 

585 obs_mean = np.sum(data_density * boltz * df_combined[obs]) / sumint 

586 obs_std = np.sum(data_density * boltz * df_combined[obs] ** 2) / sumint 

587 obs_std = np.sqrt(obs_std - obs_mean ** 2) 

588 record['{}_mean'.format(obs)] = obs_mean 

589 record['{}_std'.format(obs)] = obs_std 

590 

591 averages.append(record) 

592 

593 return DataFrame.from_dict(averages) 

594 

595 

596def get_average_cluster_vectors_wl(dcs: Union[WangLandauDataContainer, dict], 

597 cluster_space: ClusterSpace, 

598 temperatures: List[float], 

599 boltzmann_constant: float = kB, 

600 fill_factor_limit: float = None) -> DataFrame: 

601 """Returns the average cluster vectors from a :ref:`Wang-Landau simulation 

602 <wang_landau_ensemble>` for the temperatures specified. 

603 

604 Parameters 

605 ---------- 

606 dcs 

607 Data container(s), from which to extract density of states 

608 as well as observables. 

609 cluster_space 

610 Cluster space to use for calculation of cluster vectors. 

611 temperatures 

612 Temperatures at which to compute the averages. 

613 boltzmann_constant 

614 Boltzmann constant :math:`k_B` in appropriate 

615 units, i.e., units that are consistent 

616 with the underlying cluster expansion 

617 and the temperature units [default: eV/K]. 

618 fill_factor_limit 

619 Use data recorded up to the point when the specified fill factor limit 

620 was reached when computing the average cluster vectors. Otherwise use 

621 data for the last state. 

622 

623 Raises 

624 ------ 

625 ValueError 

626 If the data container(s) do(es) not contain entropy data 

627 from Wang-Landau simulation. 

628 """ 

629 

630 # fetch potential and structures 

631 if isinstance(dcs, WangLandauDataContainer): 

632 potential, trajectory = dcs.get('potential', 'trajectory', 

633 fill_factor_limit=fill_factor_limit) 

634 energy_spacing = dcs.ensemble_parameters['energy_spacing'] 

635 elif isinstance(dcs, dict) and isinstance(dcs[next(iter(dcs))], WangLandauDataContainer): 

636 potential, trajectory = [], [] 

637 for dc in dcs.values(): 

638 p, t = dc.get('potential', 'trajectory', fill_factor_limit=fill_factor_limit) 

639 potential.extend(p) 

640 trajectory.extend(t) 

641 energy_spacing = list(dcs.values())[0].ensemble_parameters['energy_spacing'] 

642 potential = np.array(potential) 

643 else: 

644 raise TypeError('dcs ({}) must be a data container with entropy data' 

645 ' or be a list of data containers' 

646 .format(type(dcs))) 

647 

648 # fetch entropy and density of states from data container(s) 

649 df_density, _ = get_density_of_states_wl(dcs, fill_factor_limit) 

650 

651 # compute weighted density and cluster vector for each bin in energy 

652 # range; the weighted density is the total density divided by the number 

653 # of structures that fall in the respective bin 

654 # NOTE: the following code relies on the indices of the df_density 

655 # DataFrame to correspond to the energy scale. This is expected to be 

656 # handled in the get_density_of_states function. 

657 cvs = [] 

658 weighted_density = [] 

659 bins = list(np.array(np.round(potential / energy_spacing), dtype=int)) 

660 for k, structure in zip(bins, trajectory): 

661 cvs.append(cluster_space.get_cluster_vector(structure)) 

662 weighted_density.append(df_density.density[k] / bins.count(k)) 

663 

664 # compute mean and standard deviation (std) of temperature weighted 

665 # cluster vector 

666 averages = [] 

667 enref = np.min(potential) 

668 for temperature in temperatures: 

669 boltz = np.exp(- (potential - enref) / temperature / boltzmann_constant) 

670 sumint = np.sum(weighted_density * boltz) 

671 cv_mean = np.array([np.sum(weighted_density * boltz * cv) / sumint 

672 for cv in np.transpose(cvs)]) 

673 cv_std = np.array([np.sum(weighted_density * boltz * cv ** 2) / sumint 

674 for cv in np.transpose(cvs)]) 

675 cv_std = np.sqrt(cv_std - cv_mean ** 2) 

676 record = {'temperature': temperature, 

677 'cv_mean': cv_mean, 

678 'cv_std': cv_std} 

679 averages.append(record) 

680 

681 return DataFrame.from_dict(averages)