Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""Definition of the Wang-Landau data container class.""" 

2 

3from warnings import warn 

4from collections import Counter, OrderedDict 

5from typing import Any, BinaryIO, Counter as CounterType, Dict, List, Optional, TextIO, Tuple, Union 

6 

7import numpy as np 

8import pandas as pd 

9 

10from ase.units import kB 

11from ase import Atoms 

12from pandas import DataFrame, concat as pd_concat 

13 

14from icet import ClusterSpace 

15from .base_data_container import BaseDataContainer 

16 

17 

18class WangLandauDataContainer(BaseDataContainer): 

19 """ 

20 Data container for storing information concerned with :ref:`Wang-Landau 

21 simulation <wang_landau_ensemble>` performed with mchammer. 

22 

23 Parameters 

24 ---------- 

25 structure : ase.Atoms 

26 reference atomic structure associated with the data container 

27 

28 ensemble_parameters : dict 

29 parameters associated with the underlying ensemble 

30 

31 metadata : dict 

32 metadata associated with the data container 

33 """ 

34 

35 def _update_last_state(self, 

36 last_step: int, 

37 occupations: List[int], 

38 accepted_trials: int, 

39 random_state: tuple, 

40 fill_factor: float, 

41 fill_factor_history: Dict[int, float], 

42 entropy_history: Dict[int, Dict[int, float]], 

43 histogram=Dict[int, int], 

44 entropy=Dict[int, float]): 

45 """Updates last state of the Wang-Landau simulation. 

46 

47 Parameters 

48 ---------- 

49 last_step 

50 last trial step 

51 occupations 

52 occupation vector observed during the last trial step 

53 accepted_trial 

54 number of current accepted trial steps 

55 random_state 

56 tuple representing the last state of the random generator 

57 fill_factor 

58 fill factor of Wang-Landau algorithm 

59 fill_factor_history 

60 evolution of the fill factor of Wang-Landau algorithm (key=MC 

61 trial step, value=fill factor) 

62 entropy_history 

63 evolution of the (relative) entropy accumulated during Wang-Landau 

64 simulation (key=MC trial step, value=(key=bin, value=entropy)) 

65 histogram 

66 histogram of states visited during Wang-Landau simulation 

67 entropy 

68 (relative) entropy accumulated during Wang-Landau simulation 

69 """ 

70 super()._update_last_state( 

71 last_step=last_step, 

72 occupations=occupations, 

73 accepted_trials=accepted_trials, 

74 random_state=random_state) 

75 self._last_state['fill_factor'] = fill_factor 

76 self._last_state['fill_factor_history'] = fill_factor_history 

77 self._last_state['entropy_history'] = entropy_history 

78 self._last_state['histogram'] = histogram 

79 self._last_state['entropy'] = entropy 

80 

81 @property 

82 def fill_factor(self) -> float: 

83 """ final value of the fill factor in the Wang-Landau algorithm """ 

84 return float(self._last_state['fill_factor']) 

85 

86 @property 

87 def fill_factor_history(self) -> DataFrame: 

88 """ evolution of the fill factor in the Wang-Landau algorithm """ 

89 return DataFrame({'mctrial': list(self._last_state['fill_factor_history'].keys()), 

90 'fill_factor': list(self._last_state['fill_factor_history'].values())}) 

91 

92 def get(self, 

93 *tags: str, 

94 fill_factor_limit: float = None) \ 

95 -> Union[np.ndarray, List[Atoms], Tuple[np.ndarray, List[Atoms]]]: 

96 """Returns the accumulated data for the requested observables, 

97 including configurations stored in the data container. The latter 

98 can be achieved by including 'trajectory' as one of the tags. 

99 

100 Parameters 

101 ---------- 

102 tags 

103 names of the requested properties 

104 fill_factor_limit 

105 return data recorded up to the point when the specified fill 

106 factor limit was reached, or ``None`` if the entropy history is 

107 empty or the last fill factor is above the limit; otherwise 

108 return all data 

109 

110 Raises 

111 ------ 

112 ValueError 

113 if tags is empty 

114 ValueError 

115 if observables are requested that are not in data container 

116 

117 Examples 

118 -------- 

119 Below the `get` method is illustrated but first we require a data container. 

120 

121 >>> from ase import Atoms 

122 >>> from icet import ClusterExpansion, ClusterSpace 

123 >>> from mchammer.calculators import ClusterExpansionCalculator 

124 >>> from mchammer.ensembles import WangLandauEnsemble 

125 

126 >>> # prepare cluster expansion 

127 >>> prim = Atoms('Au', positions=[[0, 0, 0]], cell=[1, 1, 10], pbc=True) 

128 >>> cs = ClusterSpace(prim, cutoffs=[1.1], chemical_symbols=['Ag', 'Au']) 

129 >>> ce = ClusterExpansion(cs, [0, 0, 2]) 

130 

131 >>> # prepare initial configuration 

132 >>> structure = prim.repeat((4, 4, 1)) 

133 >>> for k in range(8): 

134 ... structure[k].symbol = 'Ag' 

135 

136 >>> # set up and run Wang-Landau simulation 

137 >>> calculator = ClusterExpansionCalculator(structure, ce) 

138 >>> mc = WangLandauEnsemble(structure=structure, 

139 ... calculator=calculator, 

140 ... energy_spacing=1, 

141 ... dc_filename='ising_2d_run.dc', 

142 ... fill_factor_limit=0.3) 

143 >>> mc.run(number_of_trial_steps=len(structure)*3000) # in practice one requires more steps 

144 

145 We can now access the data container by reading it from file by using 

146 the `read` method. For the purpose of this example, however, we access 

147 the data container associated with the ensemble directly. 

148 

149 >>> dc = mc.data_container 

150 

151 The following lines illustrate how to use the `get` method 

152 for extracting data from the data container. 

153 

154 >>> # obtain all values of the potential represented by 

155 >>> # the cluster expansion and the MC trial step along the 

156 >>> # trajectory 

157 >>> import matplotlib.pyplot as plt 

158 >>> s, p = dc.get('mctrial', 'potential') 

159 >>> _ = plt.plot(s, p) 

160 

161 >>> # as above but this time only included data recorded up to 

162 >>> # the point when the fill factor reached below 0.6 

163 >>> s, p = dc.get('mctrial', 'potential', fill_factor_limit=0.6) 

164 >>> _ = plt.plot(s, p) 

165 >>> plt.show() 

166 

167 >>> # obtain configurations along the trajectory along with 

168 >>> # their potential 

169 >>> p, confs = dc.get('potential', 'trajectory') 

170 """ 

171 

172 if len(tags) == 0: 172 ↛ 173line 172 didn't jump to line 173, because the condition on line 172 was never true

173 raise TypeError('Missing tags argument') 

174 

175 local_tags = ['occupations' if tag == 'trajectory' else tag for tag in tags] 

176 

177 for tag in local_tags: 

178 if tag in 'mctrial': 

179 continue 

180 if tag not in self.observables: 180 ↛ 181line 180 didn't jump to line 181, because the condition on line 180 was never true

181 raise ValueError('No observable named {} in data container'.format(tag)) 

182 

183 # collect data 

184 mctrials = [row_dict['mctrial'] for row_dict in self._data_list] 

185 data = pd.DataFrame.from_records(self._data_list, index=mctrials, columns=local_tags) 

186 if fill_factor_limit is not None: 

187 # only include data for fill factors up to the limit 

188 df_ffh = self.fill_factor_history.astype( 

189 {'mctrial': np.int64, 'fill_factor': np.float64}) 

190 mctrial_last = df_ffh.loc[ 

191 df_ffh.fill_factor <= fill_factor_limit].mctrial.min() 

192 data = data.loc[data.index <= mctrial_last] 

193 data.dropna(inplace=True) 

194 

195 # handling of trajectory 

196 def occupation_to_atoms(occupation): 

197 structure = self.structure.copy() 

198 structure.numbers = occupation 

199 return structure 

200 

201 data_list = [] 

202 for tag in local_tags: 

203 if tag == 'occupations': 

204 traj = [occupation_to_atoms(o) for o in data['occupations']] 

205 data_list.append(traj) 

206 else: 

207 data_list.append(data[tag].values) 

208 

209 if len(data_list) > 1: 

210 return tuple(data_list) 

211 else: 

212 return data_list[0] 

213 

214 def get_entropy(self, fill_factor_limit: float = None) -> DataFrame: 

215 """Returns the (relative) entropy from this data container accumulated 

216 during a :ref:`Wang-Landau simulation <wang_landau_ensemble>`. Returns 

217 ``None`` if the data container does not contain the required 

218 information. 

219 

220 Parameters 

221 ---------- 

222 fill_factor_limit 

223 return the entropy recorded up to the point when the specified fill 

224 factor limit was reached, or ``None`` if the entropy history is 

225 empty or the last fill factor is above the limit; otherwise 

226 return the entropy for the last state 

227 """ 

228 

229 if 'entropy' not in self._last_state: 

230 warn('There is no entropy information in the data container.') 

231 return None 

232 entropy = self._last_state['entropy'] 

233 if fill_factor_limit is not None: 

234 if 'entropy_history' not in self._last_state or \ 

235 len(self._last_state['entropy_history']) == 0: 

236 warn('The entropy history is empty.') 

237 return None 

238 if self._last_state['fill_factor'] > fill_factor_limit: 

239 warn('The last fill factor {} is higher than the limit' 

240 ' {}.'.format(self.fill_factor, fill_factor_limit)) 

241 return None 

242 for step, fill_factor in self._last_state['fill_factor_history'].items(): 242 ↛ 248line 242 didn't jump to line 248, because the loop on line 242 didn't complete

243 if fill_factor <= fill_factor_limit: 

244 entropy = self._last_state['entropy_history'][step] 

245 break 

246 

247 # compile entropy into DataFrame 

248 energy_spacing = self.ensemble_parameters['energy_spacing'] 

249 df = DataFrame(data={'energy': energy_spacing * np.array(list(entropy.keys())), 

250 'entropy': np.array(list(entropy.values()))}, 

251 index=list(entropy.keys())) 

252 # shift entropy for numerical stability 

253 df['entropy'] -= np.min(df['entropy']) 

254 

255 return df 

256 

257 def get_histogram(self) -> DataFrame: 

258 """Returns the histogram from this data container accumulated since the 

259 last update of the fill factor. Returns ``None`` if the data container 

260 does not contain the required information. 

261 """ 

262 

263 if 'histogram' not in self._last_state: 

264 return None 

265 

266 # compile histogram into DataFrame 

267 histogram = self._last_state['histogram'] 

268 energy_spacing = self.ensemble_parameters['energy_spacing'] 

269 df = DataFrame(data={'energy': energy_spacing * np.array(list(histogram.keys())), 

270 'histogram': np.array(list(histogram.values()))}, 

271 index=list(histogram.keys())) 

272 

273 return df 

274 

275 @classmethod 

276 # todo: cls and the return should be type hinted as BaseDataContainer. 

277 # Unfortunately, this requires from __future__ import annotations, which 

278 # in turn requires Python 3.8. 

279 def read(cls, infile: Union[str, BinaryIO, TextIO], old_format: bool = False): 

280 """Reads data container from file. 

281 

282 Parameters 

283 ---------- 

284 infile 

285 file from which to read 

286 old_format 

287 If true use old json format to read runtime data; default to false 

288 

289 Raises 

290 ------ 

291 FileNotFoundError 

292 if file is not found (str) 

293 ValueError 

294 if file is of incorrect type (not a tarball) 

295 """ 

296 dc = super(WangLandauDataContainer, cls).read(infile=infile, old_format=old_format) 

297 

298 for tag, value in dc._last_state.items(): 

299 if tag in ['histogram', 'entropy', 'fill_factor_history', 'entropy_history']: 

300 # the following accounts for the fact that the keys of dicts 

301 # are converted to str when writing to json and have to 

302 # converted back into numerical values 

303 dc._last_state[tag] = {} 

304 for key, val in value.items(): 

305 if isinstance(val, dict): 

306 val = {int(k): v for k, v in val.items()} 

307 dc._last_state[tag][int(key)] = val 

308 

309 return dc 

310 

311 

312def get_density_of_states_wl(dcs: Union[WangLandauDataContainer, 

313 Dict[Any, WangLandauDataContainer]], 

314 fill_factor_limit: float = None) \ 

315 -> Tuple[DataFrame, dict]: 

316 """Returns a pandas DataFrame with the total density of states from a 

317 :ref:`Wang-Landau simulation <wang_landau_ensemble>`. If a dict of data 

318 containers is provided the function also returns a dictionary that 

319 contains the standard deviation between the entropy of neighboring data 

320 containers in the overlap region. These errors should be small compared to 

321 the variation of the entropy across each bin. 

322 

323 The function can handle both a single data container and a dict thereof. 

324 In the latter case the data containers must cover a contiguous energy 

325 range and must at least partially overlap. 

326 

327 Parameters 

328 ---------- 

329 dcs 

330 data container(s), from which to extract the density of states 

331 fill_factor_limit 

332 calculate the density of states using the entropy recorded up to the 

333 point when the specified fill factor limit was reached; otherwise 

334 return the density of states for the last state 

335 

336 Raises 

337 ------ 

338 TypeError 

339 if dcs does not correspond to not a single (dictionary) of data 

340 container(s) from which the entropy can retrieved 

341 ValueError 

342 if the data container does not contain entropy information 

343 ValueError 

344 if a fill factor limit has been provided and the data container either 

345 does not contain information about the entropy history or if the last 

346 fill factor is higher than the specified limit 

347 ValueError 

348 if multiple data containers are provided and there are inconsistencies 

349 with regard to basic simulation parameters such as system size or 

350 energy spacing 

351 ValueError 

352 if multiple data containers are provided and there is at least 

353 one energy region without overlap 

354 """ 

355 

356 # preparations 

357 if isinstance(dcs, WangLandauDataContainer): 

358 # fetch raw entropy data from data container 

359 df = dcs.get_entropy(fill_factor_limit) 

360 if df is None: 

361 raise ValueError('Entropy information could not be retrieved from' 

362 ' the data container {}.'.format(dcs)) 

363 errors = None 

364 if len(dcs.fill_factor_history) == 0 or dcs.fill_factor > 1e-4: 

365 warn('The data container appears to contain data from an' 

366 ' underconverged Wang-Landau simulation.') 

367 

368 elif isinstance(dcs, dict) and isinstance(dcs[next(iter(dcs))], WangLandauDataContainer): 

369 # minimal consistency checks 

370 tags = list(dcs.keys()) 

371 tagref = tags[0] 

372 dcref = dcs[tagref] 

373 for tag in tags: 

374 dc = dcs[tag] 

375 if len(dc.structure) != len(dcref.structure): 

376 raise ValueError('Number of atoms differs between data containers ({}: {}, {}: {})' 

377 .format(tagref, dcref.ensemble_parameters['n_atoms'], 

378 tag, dc.ensemble_parameters['n_atoms'])) 

379 for param in ['energy_spacing', 'trial_move']: 

380 if dc.ensemble_parameters[param] != dcref.ensemble_parameters[param]: 

381 raise ValueError('{} differs between data containers ({}: {}, {}: {})' 

382 .format(param, 

383 tagref, dcref.ensemble_parameters['n_atoms'], 

384 tag, dc.ensemble_parameters['n_atoms'])) 

385 if len(dc.fill_factor_history) == 0 or dc.fill_factor > 1e-4: 

386 warn('Data container {} appears to contain data from an' 

387 ' underconverged Wang-Landau simulation.'.format(tag)) 

388 

389 # fetch raw entropy data from data containers 

390 entropies = {} 

391 for tag, dc in dcs.items(): 

392 entropies[tag] = dc.get_entropy(fill_factor_limit) 

393 if entropies[tag] is None: 

394 raise ValueError('Entropy information could not be retrieved' 

395 ' from the data container {}.'.format(dc)) 

396 

397 # sort entropies by energy 

398 entropies = OrderedDict(sorted(entropies.items(), key=lambda row: row[1].energy.iloc[0])) 

399 

400 # line up entropy data 

401 errors = {} 

402 tags = list(entropies.keys()) 

403 for tag1, tag2 in zip(tags[:-1], tags[1:]): 

404 df1 = entropies[tag1] 

405 df2 = entropies[tag2] 

406 if all(df2.energy.isin(df1.energy)): 

407 warn('Window {} is a subset of {}'.format(tag2, tag1)) 

408 left_lim = np.min(df2.energy) 

409 right_lim = np.max(df1.energy) 

410 if left_lim >= right_lim: 

411 raise ValueError('No overlap in the energy range {}...{}.\n' 

412 .format(right_lim, left_lim) + 

413 ' The closest data containers have tags "{}" and "{}".' 

414 .format(tag1, tag2)) 

415 df1_ = df1[(df1.energy >= left_lim) & (df1.energy <= right_lim)] 

416 df2_ = df2[(df2.energy >= left_lim) & (df2.energy <= right_lim)] 

417 offset = (df2_.entropy - df1_.entropy).mean() 

418 errors['{}-{}'.format(tag1, tag2)] = (df2_.entropy - df1_.entropy).std() 

419 entropies[tag2].entropy = entropies[tag2].entropy - offset 

420 

421 # compile entropy over the entire energy range 

422 data = {} # type: Dict[float, float] 

423 indices = {} 

424 counts = Counter() # type: CounterType[float] 

425 for df in entropies.values(): 

426 for index, en, ent in zip(df.index, df.energy, df.entropy): 

427 data[en] = data.get(en, 0) + ent 

428 counts[en] += 1 

429 indices[en] = index 

430 for en in data: 

431 data[en] = data[en] / counts[en] 

432 

433 # center entropy to prevent possible numerical issues 

434 entmin = np.min(list(data.values())) 

435 df = DataFrame(data={'energy': np.array(list(data.keys())), 

436 'entropy': np.array(np.array(list(data.values()))) - entmin}, 

437 index=list(indices.values())) 

438 else: 

439 raise TypeError('dcs ({}) must be a data container with entropy data' 

440 ' or be a list of data containers' 

441 .format(type(dcs))) 

442 

443 # density of states 

444 S_max = df.entropy.max() 

445 df['density'] = np.exp(df.entropy - S_max) / np.sum(np.exp(df.entropy - S_max)) 

446 

447 return df, errors 

448 

449 

450def _extract_filter_data(dc: BaseDataContainer, 

451 columns_to_keep: List[str], 

452 fill_factor_limit: float = None) -> DataFrame: 

453 """ Extract data from a data container and filter the content. 

454 

455 Parameters 

456 ---------- 

457 dc 

458 data container, from which to extract the data 

459 columns_to_keep 

460 list of requested properties 

461 fill_factor_limit 

462 only include data recorded up to the point when the specified fill 

463 factor limit was reached when computing averages; otherwise include 

464 all data 

465 """ 

466 

467 df = dc.data 

468 if fill_factor_limit is not None: 

469 # only include data for fill factors up to the limit 

470 df_ffh = dc.fill_factor_history.astype( 

471 {'mctrial': np.int64, 'fill_factor': np.float64}) 

472 mctrial_last = df_ffh.loc[ 

473 df_ffh.fill_factor <= fill_factor_limit].mctrial.min() 

474 df = df.loc[df.mctrial <= mctrial_last] 

475 

476 return df.filter(columns_to_keep) 

477 

478 

479def get_average_observables_wl(dcs: Union[WangLandauDataContainer, 

480 Dict[Any, WangLandauDataContainer]], 

481 temperatures: List[float], 

482 observables: List[str] = None, 

483 boltzmann_constant: float = kB, 

484 fill_factor_limit: float = None) -> DataFrame: 

485 """Returns the average and the standard deviation of the energy from a 

486 :ref:`Wang-Landau simulation <wang_landau_ensemble>` for the temperatures 

487 specified. If the ``observables`` keyword argument is specified 

488 the function will also return the mean and standard deviation of the 

489 specified observables. 

490 

491 Parameters 

492 ---------- 

493 dcs 

494 data container(s), from which to extract density of states 

495 as well as observables 

496 temperatures 

497 temperatures, at which to compute the averages 

498 observables 

499 observables, for which to compute averages; the observables 

500 must refer to fields in the data container 

501 boltzmann_constant 

502 Boltzmann constant :math:`k_B` in appropriate 

503 units, i.e. units that are consistent 

504 with the underlying cluster expansion 

505 and the temperature units [default: eV/K] 

506 fill_factor_limit 

507 use data recorded up to the point when the specified fill factor limit 

508 was reached when computing averages; otherwise use data for the last 

509 state 

510 

511 Raises 

512 ------ 

513 ValueError 

514 if the data container(s) do(es) not contain entropy data 

515 from Wang-Landau simulation 

516 ValueError 

517 if data container(s) do(es) not contain requested observable 

518 """ 

519 

520 def check_observables(dc: WangLandauDataContainer, observables: Optional[List[str]]) -> None: 

521 """ Helper function that checks that observables are available in data frame. """ 

522 if observables is None: 

523 return 

524 for obs in observables: 

525 if obs not in dc.data.columns: 

526 raise ValueError('Observable ({}) not in data container.\n' 

527 'Available observables: {}'.format(obs, dc.data.columns)) 

528 

529 # preparation of observables 

530 columns_to_keep = ['potential', 'density'] 

531 if observables is not None: 

532 columns_to_keep.extend(observables) 

533 

534 # check that observables are available in data container 

535 # and prepare comprehensive data frame with relevant information 

536 if isinstance(dcs, WangLandauDataContainer): 

537 check_observables(dcs, observables) 

538 df_combined = _extract_filter_data(dcs, columns_to_keep, fill_factor_limit) 

539 dcref = dcs 

540 elif isinstance(dcs, dict) and isinstance(dcs[next(iter(dcs))], WangLandauDataContainer): 

541 dfs = [] 

542 for dc in dcs.values(): 

543 check_observables(dc, observables) 

544 dfs.append(_extract_filter_data(dc, columns_to_keep, fill_factor_limit)) 

545 df_combined = pd_concat([df for df in dfs], ignore_index=True) 

546 dcref = list(dcs.values())[0] 

547 else: 

548 raise TypeError('dcs ({}) must be a data container with entropy data' 

549 ' or be a list of data containers' 

550 .format(type(dcs))) 

551 

552 # fetch entropy and density of states from data container(s) 

553 df_density, _ = get_density_of_states_wl(dcs, fill_factor_limit) 

554 

555 # compute density for each row in data container if observable averages 

556 # are to be computed 

557 if observables is not None: 

558 energy_spacing = dcref.ensemble_parameters['energy_spacing'] 

559 # NOTE: we rely on the indices of the df_density DataFrame to 

560 # correspond to the energy scale! This is expected to be handled in 

561 # the get_density_of_states function. 

562 bins = list(np.array(np.round(df_combined.potential / energy_spacing), dtype=int)) 

563 data_density = [dens / bins.count(k) for k, dens in df_density.density[bins].items()] 

564 

565 enref = np.min(df_density.energy) 

566 averages = [] 

567 for temperature in temperatures: 

568 

569 # mean and standard deviation of energy 

570 boltz = np.exp(- (df_density.energy - enref) / temperature / boltzmann_constant) 

571 sumint = np.sum(df_density.density * boltz) 

572 en_mean = np.sum(df_density.energy * df_density.density * boltz) / sumint 

573 en_std = np.sum(df_density.energy ** 2 * df_density.density * boltz) / sumint 

574 en_std = np.sqrt(en_std - en_mean ** 2) 

575 record = {'temperature': temperature, 

576 'potential_mean': en_mean, 

577 'potential_std': en_std} 

578 

579 # mean and standard deviation of other observables 

580 if observables is not None: 

581 boltz = np.exp(- (df_combined.potential - enref) / temperature / boltzmann_constant) 

582 sumint = np.sum(data_density * boltz) 

583 for obs in observables: 

584 obs_mean = np.sum(data_density * boltz * df_combined[obs]) / sumint 

585 obs_std = np.sum(data_density * boltz * df_combined[obs] ** 2) / sumint 

586 obs_std = np.sqrt(obs_std - obs_mean ** 2) 

587 record['{}_mean'.format(obs)] = obs_mean 

588 record['{}_std'.format(obs)] = obs_std 

589 

590 averages.append(record) 

591 

592 return DataFrame.from_dict(averages) 

593 

594 

595def get_average_cluster_vectors_wl(dcs: Union[WangLandauDataContainer, dict], 

596 cluster_space: ClusterSpace, 

597 temperatures: List[float], 

598 boltzmann_constant: float = kB, 

599 fill_factor_limit: float = None) -> DataFrame: 

600 """Returns the average cluster vectors from a :ref:`Wang-Landau simulation 

601 <wang_landau_ensemble>` for the temperatures specified. 

602 

603 Parameters 

604 ---------- 

605 dcs 

606 data container(s), from which to extract density of states 

607 as well as observables 

608 cluster_space 

609 cluster space to use for calculation of cluster vectors 

610 temperatures 

611 temperatures, at which to compute the averages 

612 boltzmann_constant 

613 Boltzmann constant :math:`k_B` in appropriate 

614 units, i.e. units that are consistent 

615 with the underlying cluster expansion 

616 and the temperature units [default: eV/K] 

617 fill_factor_limit 

618 use data recorded up to the point when the specified fill factor limit 

619 was reached when computing the average cluster vectors; otherwise use 

620 data for the last state 

621 

622 Raises 

623 ------ 

624 ValueError 

625 if the data container(s) do(es) not contain entropy data 

626 from Wang-Landau simulation 

627 """ 

628 

629 # fetch potential and structures 

630 if isinstance(dcs, WangLandauDataContainer): 

631 potential, trajectory = dcs.get('potential', 'trajectory', 

632 fill_factor_limit=fill_factor_limit) 

633 energy_spacing = dcs.ensemble_parameters['energy_spacing'] 

634 elif isinstance(dcs, dict) and isinstance(dcs[next(iter(dcs))], WangLandauDataContainer): 

635 potential, trajectory = [], [] 

636 for dc in dcs.values(): 

637 p, t = dc.get('potential', 'trajectory', fill_factor_limit=fill_factor_limit) 

638 potential.extend(p) 

639 trajectory.extend(t) 

640 energy_spacing = list(dcs.values())[0].ensemble_parameters['energy_spacing'] 

641 potential = np.array(potential) 

642 else: 

643 raise TypeError('dcs ({}) must be a data container with entropy data' 

644 ' or be a list of data containers' 

645 .format(type(dcs))) 

646 

647 # fetch entropy and density of states from data container(s) 

648 df_density, _ = get_density_of_states_wl(dcs, fill_factor_limit) 

649 

650 # compute weighted density and cluster vector for each bin in energy 

651 # range; the weighted density is the total density divided by the number 

652 # of structures that fall in the respective bin 

653 # NOTE: the following code relies on the indices of the df_density 

654 # DataFrame to correspond to the energy scale. This is expected to be 

655 # handled in the get_density_of_states function. 

656 cvs = [] 

657 weighted_density = [] 

658 bins = list(np.array(np.round(potential / energy_spacing), dtype=int)) 

659 for k, structure in zip(bins, trajectory): 

660 cvs.append(cluster_space.get_cluster_vector(structure)) 

661 weighted_density.append(df_density.density[k] / bins.count(k)) 

662 

663 # compute mean and standard deviation (std) of temperature weighted 

664 # cluster vector 

665 averages = [] 

666 enref = np.min(potential) 

667 for temperature in temperatures: 

668 boltz = np.exp(- (potential - enref) / temperature / boltzmann_constant) 

669 sumint = np.sum(weighted_density * boltz) 

670 cv_mean = np.array([np.sum(weighted_density * boltz * cv) / sumint 

671 for cv in np.transpose(cvs)]) 

672 cv_std = np.array([np.sum(weighted_density * boltz * cv ** 2) / sumint 

673 for cv in np.transpose(cvs)]) 

674 cv_std = np.sqrt(cv_std - cv_mean ** 2) 

675 record = {'temperature': temperature, 

676 'cv_mean': cv_mean, 

677 'cv_std': cv_std} 

678 averages.append(record) 

679 

680 return DataFrame.from_dict(averages)