Coverage for mchammer/data_containers/wang_landau_data

1"""Definition of the Wang-Landau data container class."""

3from warnings import warn

4from collections import Counter, OrderedDict

5from typing import Any, BinaryIO, Dict, List, Optional, TextIO, Tuple, Union

7import numpy as np

8import pandas as pd

10from ase.units import kB

11from ase import Atoms

12from pandas import DataFrame, concat as pd_concat

14from icet import ClusterSpace

15from .base_data_container import BaseDataContainer

18class WangLandauDataContainer(BaseDataContainer):

19 """

20 Data container for storing information concerned with :ref:`Wang-Landau

21 simulation <wang_landau_ensemble>` performed with :program:`mchammer`.

23 Parameters

24 ----------

25 structure

26 Reference atomic structure associated with the data container.

27 ensemble_parameters

28 Parameters associated with the underlying ensemble.

29 metadata

30 Metadata associated with the data container.

31 """

33 def _update_last_state(self,

34 last_step: int,

35 occupations: List[int],

36 accepted_trials: int,

37 random_state: tuple,

38 fill_factor: float,

39 fill_factor_history: Dict[int, float],

40 entropy_history: Dict[int, Dict[int, float]],

41 histogram=Dict[int, int],

42 entropy=Dict[int, float]):

43 """Updates last state of the Wang-Landau simulation.

45 Parameters

46 ----------

47 last_step

48 Last trial step.

49 occupations

50 Occupation vector observed during the last trial step.

51 accepted_trial

52 Number of current accepted trial steps.

53 random_state

54 Tuple representing the last state of the random generator.

55 fill_factor

56 Fill factor of Wang-Landau algorithm.

57 fill_factor_history

58 Evolution of the fill factor of Wang-Landau algorithm (key=MC

59 trial step, value=fill factor).

60 entropy_history

61 Evolution of the (relative) entropy accumulated during Wang-Landau

62 simulation (key=MC trial step, value=(key=bin, value=entropy)).

63 histogram

64 Histogram of states visited during Wang-Landau simulation.

65 entropy

66 (Relative) entropy accumulated during Wang-Landau simulation.

67 """

68 super()._update_last_state(

69 last_step=last_step,

70 occupations=occupations,

71 accepted_trials=accepted_trials,

72 random_state=random_state)

73 self._last_state['fill_factor'] = fill_factor

74 self._last_state['fill_factor_history'] = fill_factor_history

75 self._last_state['entropy_history'] = entropy_history

76 self._last_state['histogram'] = histogram

77 self._last_state['entropy'] = entropy

79 @property

80 def fill_factor(self) -> float:

81 """ Final value of the fill factor in the Wang-Landau algorithm. """

82 return float(self._last_state['fill_factor'])

84 @property

85 def fill_factor_history(self) -> DataFrame:

86 """ Evolution of the fill factor in the Wang-Landau algorithm. """

87 return DataFrame({'mctrial': list(self._last_state['fill_factor_history'].keys()),

88 'fill_factor': list(self._last_state['fill_factor_history'].values())})

90 def get(self,

91 *tags: str,

92 fill_factor_limit: float = None) \

93 -> Union[np.ndarray, List[Atoms], Tuple[np.ndarray, List[Atoms]]]:

94 """Returns the accumulated data for the requested observables,

95 including configurations stored in the data container. The latter

96 can be achieved by including ``'trajectory'`` as one of the tags.

98 Parameters

99 ----------

100 tags

101 Names of the requested properties.

102 fill_factor_limit

103 Return data recorded up to the point when the specified fill

104 factor limit was reached, or ``None`` if the entropy history is

105 empty or the last fill factor is above the limit; otherwise

106 return all data.

107

108 Raises

109 ------

110 ValueError

111 If :attr:`tags` is empty.

112 ValueError

113 If observables are requested that are not in the data container.

114

115 Examples

116 --------

117 Below the :func:`get` method is

118 illustrated but first we require a data container.

119

120 >>> from ase import Atoms

121 >>> from icet import ClusterExpansion, ClusterSpace

122 >>> from mchammer.calculators import ClusterExpansionCalculator

123 >>> from mchammer.ensembles import WangLandauEnsemble

124

125 >>> # prepare cluster expansion

126 >>> prim = Atoms('Au', positions=[[0, 0, 0]], cell=[1, 1, 10], pbc=True)

127 >>> cs = ClusterSpace(prim, cutoffs=[1.1], chemical_symbols=['Ag', 'Au'])

128 >>> ce = ClusterExpansion(cs, [0, 0, 2])

129

130 >>> # prepare initial configuration

131 >>> structure = prim.repeat((4, 4, 1))

132 >>> for k in range(8):

133 ... structure[k].symbol = 'Ag'

134

135 >>> # set up and run Wang-Landau simulation

136 >>> calculator = ClusterExpansionCalculator(structure, ce)

137 >>> mc = WangLandauEnsemble(structure=structure,

138 ... calculator=calculator,

139 ... energy_spacing=1,

140 ... dc_filename='ising_2d_run.dc',

141 ... fill_factor_limit=0.3)

142 >>> mc.run(number_of_trial_steps=len(structure)*3000) # in practice one requires more steps

143

144 We can now access the data container by reading it from file

145 by using the :func:`read` method. For the purpose of this

146 example, however, we access the data container associated with

147 the ensemble directly.

148

149 >>> dc = mc.data_container

150

151 The following lines illustrate how to use the :func:`get` method

152 for extracting data from the data container.

153

154 >>> # obtain all values of the potential represented by

155 >>> # the cluster expansion and the MC trial step along the

156 >>> # trajectory

157 >>> import matplotlib.pyplot as plt

158 >>> s, p = dc.get('mctrial', 'potential')

159 >>> _ = plt.plot(s, p)

160

161 >>> # as above but this time only included data recorded up to

162 >>> # the point when the fill factor reached below 0.6

163 >>> s, p = dc.get('mctrial', 'potential', fill_factor_limit=0.6)

164 >>> _ = plt.plot(s, p)

165 >>> plt.show(block=False)

166

167 >>> # obtain configurations along the trajectory along with

168 >>> # their potential

169 >>> p, confs = dc.get('potential', 'trajectory')

170

171 """

172

173 if len(tags) == 0: 173 ↛ 174line 173 didn't jump to line 174, because the condition on line 173 was never true

174 raise TypeError('Missing tags argument')

175

176 local_tags = ['occupations' if tag == 'trajectory' else tag for tag in tags]

177

178 for tag in local_tags:

179 if tag in 'mctrial':

180 continue

181 if tag not in self.observables: 181 ↛ 182line 181 didn't jump to line 182, because the condition on line 181 was never true

182 raise ValueError('No observable named {} in data container'.format(tag))

183

184 # collect data

185 mctrials = [row_dict['mctrial'] for row_dict in self._data_list]

186 data = pd.DataFrame.from_records(self._data_list, index=mctrials, columns=local_tags)

187 if fill_factor_limit is not None:

188 # only include data for fill factors up to the limit

189 df_ffh = self.fill_factor_history.astype(

190 {'mctrial': np.int64, 'fill_factor': np.float64})

191 mctrial_last = df_ffh.loc[

192 df_ffh.fill_factor <= fill_factor_limit].mctrial.min()

193 data = data.loc[data.index <= mctrial_last]

194 data.dropna(inplace=True)

195

196 # handling of trajectory

197 def occupation_to_atoms(occupation):

198 structure = self.structure.copy()

199 structure.numbers = occupation

200 return structure

201

202 data_list = []

203 for tag in local_tags:

204 if tag == 'occupations':

205 traj = [occupation_to_atoms(o) for o in data['occupations']]

206 data_list.append(traj)

207 else:

208 data_list.append(data[tag].values)

209

210 if len(data_list) > 1:

211 return tuple(data_list)

212 else:

213 return data_list[0]

214

215 def get_entropy(self, fill_factor_limit: float = None) -> DataFrame:

216 """Returns the (relative) entropy from this data container accumulated

217 during a :ref:`Wang-Landau simulation <wang_landau_ensemble>`. Returns

218 ``None`` if the data container does not contain the required

219 information.

220

221 Parameters

222 ----------

223 fill_factor_limit

224 Return the entropy recorded up to the point when the specified fill

225 factor limit was reached, or ``None`` if the entropy history is

226 empty or the last fill factor is above the limit. Otherwise

227 return the entropy for the last state.

228 """

229

230 if 'entropy' not in self._last_state:

231 warn('There is no entropy information in the data container.')

232 return None

233 entropy = self._last_state['entropy']

234 if fill_factor_limit is not None:

235 if 'entropy_history' not in self._last_state or \

236 len(self._last_state['entropy_history']) == 0:

237 warn('The entropy history is empty.')

238 return None

239 if self._last_state['fill_factor'] > fill_factor_limit:

240 warn('The last fill factor {} is higher than the limit'

241 ' {}.'.format(self.fill_factor, fill_factor_limit))

242 return None

243 for step, fill_factor in self._last_state['fill_factor_history'].items(): 243 ↛ 249line 243 didn't jump to line 249, because the loop on line 243 didn't complete

244 if fill_factor <= fill_factor_limit:

245 entropy = self._last_state['entropy_history'][step]

246 break

247

248 # compile entropy into DataFrame

249 energy_spacing = self.ensemble_parameters['energy_spacing']

250 df = DataFrame(data={'energy': energy_spacing * np.array(list(entropy.keys())),

251 'entropy': np.array(list(entropy.values()))},

252 index=list(entropy.keys()))

253 # shift entropy for numerical stability

254 df['entropy'] -= np.min(df['entropy'])

255

256 return df

257

258 def get_histogram(self) -> DataFrame:

259 """Returns the histogram from this data container accumulated since the

260 last update of the fill factor. Returns ``None`` if the data container

261 does not contain the required information.

262 """

263

264 if 'histogram' not in self._last_state:

265 return None

266

267 # compile histogram into DataFrame

268 histogram = self._last_state['histogram']

269 energy_spacing = self.ensemble_parameters['energy_spacing']

270 df = DataFrame(data={'energy': energy_spacing * np.array(list(histogram.keys())),

271 'histogram': np.array(list(histogram.values()))},

272 index=list(histogram.keys()))

273

274 return df

275

276 @classmethod

277 # todo: cls and the return should be type hinted as BaseDataContainer.

278 # Unfortunately, this requires from __future__ import annotations, which

279 # in turn requires Python 3.8.

280 def read(cls, infile: Union[str, BinaryIO, TextIO], old_format: bool = False):

281 """Reads data container from file.

282

283 Parameters

284 ----------

285 infile

286 file from which to read

287 old_format

288 If true use old json format to read runtime data; default to false

289

290 Raises

291 ------

292 FileNotFoundError

293 if file is not found (str)

294 ValueError

295 if file is of incorrect type (not a tarball)

296 """

297 dc = super(WangLandauDataContainer, cls).read(infile=infile, old_format=old_format)

298

299 for tag, value in dc._last_state.items():

300 if tag in ['histogram', 'entropy', 'fill_factor_history', 'entropy_history']:

301 # the following accounts for the fact that the keys of dicts

302 # are converted to str when writing to json and have to

303 # converted back into numerical values

304 dc._last_state[tag] = {}

305 for key, val in value.items():

306 if isinstance(val, dict):

307 val = {int(k): v for k, v in val.items()}

308 dc._last_state[tag][int(key)] = val

309

310 return dc

311

312

313def get_density_of_states_wl(dcs: Union[WangLandauDataContainer,

314 Dict[Any, WangLandauDataContainer]],

315 fill_factor_limit: float = None) \

316 -> Tuple[DataFrame, dict]:

317 """Returns a pandas DataFrame with the total density of states from a

318 :ref:`Wang-Landau simulation <wang_landau_ensemble>`. If a dict of data

319 containers is provided the function also returns a dictionary that

320 contains the standard deviation between the entropy of neighboring data

321 containers in the overlap region. These errors should be small compared to

322 the variation of the entropy across each bin.

323

324 The function can handle both a single data container and a dict thereof.

325 In the latter case the data containers must cover a contiguous energy

326 range and must at least partially overlap.

327

328 Parameters

329 ----------

330 dcs

331 Data container(s), from which to extract the density of states.

332 fill_factor_limit

333 Calculate the density of states using the entropy recorded up to the

334 point when the specified fill factor limit was reached. Otherwise

335 return the density of states for the last state.

336

337 Raises

338 ------

339 TypeError

340 If :attr:`dcs` does not correspond to not a single (dictionary) of data

341 container(s) from which the entropy can retrieved.

342 ValueError

343 If the data container does not contain entropy information.

344 ValueError

345 If a fill factor limit has been provided and the data container either

346 does not contain information about the entropy history or if the last

347 fill factor is higher than the specified limit.

348 ValueError

349 If multiple data containers are provided and there are inconsistencies

350 with regard to basic simulation parameters such as system size or

351 energy spacing.

352 ValueError

353 If multiple data containers are provided and there is at least

354 one energy region without overlap.

355 """

356

357 # preparations

358 if isinstance(dcs, WangLandauDataContainer):

359 # fetch raw entropy data from data container

360 df = dcs.get_entropy(fill_factor_limit)

361 if df is None:

362 raise ValueError('Entropy information could not be retrieved from'

363 ' the data container {}.'.format(dcs))

364 errors = None

365 if len(dcs.fill_factor_history) == 0 or dcs.fill_factor > 1e-4:

366 warn('The data container appears to contain data from an'

367 ' underconverged Wang-Landau simulation.')

368

369 elif isinstance(dcs, dict) and isinstance(dcs[next(iter(dcs))], WangLandauDataContainer):

370 # minimal consistency checks

371 tags = list(dcs.keys())

372 tagref = tags[0]

373 dcref = dcs[tagref]

374 for tag in tags:

375 dc = dcs[tag]

376 if len(dc.structure) != len(dcref.structure):

377 raise ValueError('Number of atoms differs between data containers ({}: {}, {}: {})'

378 .format(tagref, dcref.ensemble_parameters['n_atoms'],

379 tag, dc.ensemble_parameters['n_atoms']))

380 for param in ['energy_spacing', 'trial_move']:

381 if dc.ensemble_parameters[param] != dcref.ensemble_parameters[param]:

382 raise ValueError('{} differs between data containers ({}: {}, {}: {})'

383 .format(param,

384 tagref, dcref.ensemble_parameters['n_atoms'],

385 tag, dc.ensemble_parameters['n_atoms']))

386 if len(dc.fill_factor_history) == 0 or dc.fill_factor > 1e-4:

387 warn('Data container {} appears to contain data from an'

388 ' underconverged Wang-Landau simulation.'.format(tag))

389

390 # fetch raw entropy data from data containers

391 entropies = {}

392 for tag, dc in dcs.items():

393 entropies[tag] = dc.get_entropy(fill_factor_limit)

394 if entropies[tag] is None:

395 raise ValueError('Entropy information could not be retrieved'

396 ' from the data container {}.'.format(dc))

397

398 # sort entropies by energy

399 entropies = OrderedDict(sorted(entropies.items(), key=lambda row: row[1].energy.iloc[0]))

400

401 # line up entropy data

402 errors = {}

403 tags = list(entropies.keys())

404 for tag1, tag2 in zip(tags[:-1], tags[1:]):

405 df1 = entropies[tag1]

406 df2 = entropies[tag2]

407 if all(df2.energy.isin(df1.energy)):

408 warn('Window {} is a subset of {}'.format(tag2, tag1))

409 left_lim = np.min(df2.energy)

410 right_lim = np.max(df1.energy)

411 if left_lim >= right_lim:

412 raise ValueError('No overlap in the energy range {}...{}.\n'

413 .format(right_lim, left_lim) +

414 ' The closest data containers have tags "{}" and "{}".'

415 .format(tag1, tag2))

416 df1_ = df1[(df1.energy >= left_lim) & (df1.energy <= right_lim)]

417 df2_ = df2[(df2.energy >= left_lim) & (df2.energy <= right_lim)]

418 offset = (df2_.entropy - df1_.entropy).mean()

419 errors['{}-{}'.format(tag1, tag2)] = (df2_.entropy - df1_.entropy).std()

420 entropies[tag2].entropy = entropies[tag2].entropy - offset

421

422 # compile entropy over the entire energy range

423 data: Dict[float, float] = {}

424 indices = {}

425 counts = Counter()

426 for df in entropies.values():

427 for index, en, ent in zip(df.index, df.energy, df.entropy):

428 data[en] = data.get(en, 0) + ent

429 counts[en] += 1

430 indices[en] = index

431 for en in data:

432 data[en] = data[en] / counts[en]

433

434 # center entropy to prevent possible numerical issues

435 entmin = np.min(list(data.values()))

436 df = DataFrame(data={'energy': np.array(list(data.keys())),

437 'entropy': np.array(np.array(list(data.values()))) - entmin},

438 index=list(indices.values()))

439 else:

440 raise TypeError('dcs ({}) must be a data container with entropy data'

441 ' or be a list of data containers'

442 .format(type(dcs)))

443

444 # density of states

445 S_max = df.entropy.max()

446 df['density'] = np.exp(df.entropy - S_max) / np.sum(np.exp(df.entropy - S_max))

447

448 return df, errors

449

450

451def _extract_filter_data(dc: BaseDataContainer,

452 columns_to_keep: List[str],

453 fill_factor_limit: float = None) -> DataFrame:

454 """ Extract data from a data container and filter the content.

455

456 Parameters

457 ----------

458 dc

459 Data container from which to extract the data.

460 columns_to_keep

461 List of requested properties.

462 fill_factor_limit

463 Only include data recorded up to the point when the specified fill

464 factor limit was reached when computing averages. Otherwise include

465 all data.

466 """

467

468 df = dc.data

469 if fill_factor_limit is not None:

470 # only include data for fill factors up to the limit

471 df_ffh = dc.fill_factor_history.astype(

472 {'mctrial': np.int64, 'fill_factor': np.float64})

473 mctrial_last = df_ffh.loc[

474 df_ffh.fill_factor <= fill_factor_limit].mctrial.min()

475 df = df.loc[df.mctrial <= mctrial_last]

476

477 return df.filter(columns_to_keep)

478

479

480def get_average_observables_wl(dcs: Union[WangLandauDataContainer,

481 Dict[Any, WangLandauDataContainer]],

482 temperatures: List[float],

483 observables: List[str] = None,

484 boltzmann_constant: float = kB,

485 fill_factor_limit: float = None) -> DataFrame:

486 """Returns the average and the standard deviation of the energy from a

487 :ref:`Wang-Landau simulation <wang_landau_ensemble>` for the temperatures

488 specified. If the :attr:`observables` keyword argument is specified

489 the function will also return the mean and standard deviation of the

490 specified observables.

491

492 Parameters

493 ----------

494 dcs

495 Data container(s) from which to extract density of states

496 as well as observables.

497 temperatures

498 Temperatures at which to compute the averages.

499 observables

500 Observables for which to compute averages; the observables

501 must refer to fields in the data container.

502 boltzmann_constant

503 Boltzmann constant :math:`k_B` in appropriate

504 units, i.e., units that are consistent

505 with the underlying cluster expansion

506 and the temperature units [default: eV/K].

507 fill_factor_limit

508 Use data recorded up to the point when the specified fill factor limit

509 was reached when computing averages. Otherwise use data for the last

510 state.

511

512 Raises

513 ------

514 ValueError

515 If the data container(s) do(es) not contain entropy data

516 from Wang-Landau simulation.

517 ValueError

518 If data container(s) do(es) not contain requested observable.

519 """

520

521 def check_observables(dc: WangLandauDataContainer, observables: Optional[List[str]]) -> None:

522 """ Helper function that checks that observables are available in data frame. """

523 if observables is None:

524 return

525 for obs in observables:

526 if obs not in dc.data.columns:

527 raise ValueError('Observable ({}) not in data container.\n'

528 'Available observables: {}'.format(obs, dc.data.columns))

529

530 # preparation of observables

531 columns_to_keep = ['potential', 'density']

532 if observables is not None:

533 columns_to_keep.extend(observables)

534

535 # check that observables are available in data container

536 # and prepare comprehensive data frame with relevant information

537 if isinstance(dcs, WangLandauDataContainer):

538 check_observables(dcs, observables)

539 df_combined = _extract_filter_data(dcs, columns_to_keep, fill_factor_limit)

540 dcref = dcs

541 elif isinstance(dcs, dict) and isinstance(dcs[next(iter(dcs))], WangLandauDataContainer):

542 dfs = []

543 for dc in dcs.values():

544 check_observables(dc, observables)

545 dfs.append(_extract_filter_data(dc, columns_to_keep, fill_factor_limit))

546 df_combined = pd_concat([df for df in dfs], ignore_index=True)

547 dcref = list(dcs.values())[0]

548 else:

549 raise TypeError('dcs ({}) must be a data container with entropy data'

550 ' or be a list of data containers'

551 .format(type(dcs)))

552

553 # fetch entropy and density of states from data container(s)

554 df_density, _ = get_density_of_states_wl(dcs, fill_factor_limit)

555

556 # compute density for each row in data container if observable averages

557 # are to be computed

558 if observables is not None:

559 energy_spacing = dcref.ensemble_parameters['energy_spacing']

560 # NOTE: we rely on the indices of the df_density DataFrame to

561 # correspond to the energy scale! This is expected to be handled in

562 # the get_density_of_states function.

563 bins = list(np.array(np.round(df_combined.potential / energy_spacing), dtype=int))

564 data_density = [dens / bins.count(k) for k, dens in df_density.density[bins].items()]

565

566 enref = np.min(df_density.energy)

567 averages = []

568 for temperature in temperatures:

569

570 # mean and standard deviation of energy

571 boltz = np.exp(- (df_density.energy - enref) / temperature / boltzmann_constant)

572 sumint = np.sum(df_density.density * boltz)

573 en_mean = np.sum(df_density.energy * df_density.density * boltz) / sumint

574 en_std = np.sum(df_density.energy ** 2 * df_density.density * boltz) / sumint

575 en_std = np.sqrt(en_std - en_mean ** 2)

576 record = {'temperature': temperature,

577 'potential_mean': en_mean,

578 'potential_std': en_std}

579

580 # mean and standard deviation of other observables

581 if observables is not None:

582 boltz = np.exp(- (df_combined.potential - enref) / temperature / boltzmann_constant)

583 sumint = np.sum(data_density * boltz)

584 for obs in observables:

585 obs_mean = np.sum(data_density * boltz * df_combined[obs]) / sumint

586 obs_std = np.sum(data_density * boltz * df_combined[obs] ** 2) / sumint

587 obs_std = np.sqrt(obs_std - obs_mean ** 2)

588 record['{}_mean'.format(obs)] = obs_mean

589 record['{}_std'.format(obs)] = obs_std

590

591 averages.append(record)

592

593 return DataFrame.from_dict(averages)

594

595

596def get_average_cluster_vectors_wl(dcs: Union[WangLandauDataContainer, dict],

597 cluster_space: ClusterSpace,

598 temperatures: List[float],

599 boltzmann_constant: float = kB,

600 fill_factor_limit: float = None) -> DataFrame:

601 """Returns the average cluster vectors from a :ref:`Wang-Landau simulation

602 <wang_landau_ensemble>` for the temperatures specified.

603

604 Parameters

605 ----------

606 dcs

607 Data container(s), from which to extract density of states

608 as well as observables.

609 cluster_space

610 Cluster space to use for calculation of cluster vectors.

611 temperatures

612 Temperatures at which to compute the averages.

613 boltzmann_constant

614 Boltzmann constant :math:`k_B` in appropriate

615 units, i.e., units that are consistent

616 with the underlying cluster expansion

617 and the temperature units [default: eV/K].

618 fill_factor_limit

619 Use data recorded up to the point when the specified fill factor limit

620 was reached when computing the average cluster vectors. Otherwise use

621 data for the last state.

622

623 Raises

624 ------

625 ValueError

626 If the data container(s) do(es) not contain entropy data

627 from Wang-Landau simulation.

628 """

629

630 # fetch potential and structures

631 if isinstance(dcs, WangLandauDataContainer):

632 potential, trajectory = dcs.get('potential', 'trajectory',

633 fill_factor_limit=fill_factor_limit)

634 energy_spacing = dcs.ensemble_parameters['energy_spacing']

635 elif isinstance(dcs, dict) and isinstance(dcs[next(iter(dcs))], WangLandauDataContainer):

636 potential, trajectory = [], []

637 for dc in dcs.values():

638 p, t = dc.get('potential', 'trajectory', fill_factor_limit=fill_factor_limit)

639 potential.extend(p)

640 trajectory.extend(t)

641 energy_spacing = list(dcs.values())[0].ensemble_parameters['energy_spacing']

642 potential = np.array(potential)

643 else:

644 raise TypeError('dcs ({}) must be a data container with entropy data'

645 ' or be a list of data containers'

646 .format(type(dcs)))

647

648 # fetch entropy and density of states from data container(s)

649 df_density, _ = get_density_of_states_wl(dcs, fill_factor_limit)

650

651 # compute weighted density and cluster vector for each bin in energy

652 # range; the weighted density is the total density divided by the number

653 # of structures that fall in the respective bin

654 # NOTE: the following code relies on the indices of the df_density

655 # DataFrame to correspond to the energy scale. This is expected to be

656 # handled in the get_density_of_states function.

657 cvs = []

658 weighted_density = []

659 bins = list(np.array(np.round(potential / energy_spacing), dtype=int))

660 for k, structure in zip(bins, trajectory):

661 cvs.append(cluster_space.get_cluster_vector(structure))

662 weighted_density.append(df_density.density[k] / bins.count(k))

663

664 # compute mean and standard deviation (std) of temperature weighted

665 # cluster vector

666 averages = []

667 enref = np.min(potential)

668 for temperature in temperatures:

669 boltz = np.exp(- (potential - enref) / temperature / boltzmann_constant)

670 sumint = np.sum(weighted_density * boltz)

671 cv_mean = np.array([np.sum(weighted_density * boltz * cv) / sumint

672 for cv in np.transpose(cvs)])

673 cv_std = np.array([np.sum(weighted_density * boltz * cv ** 2) / sumint

674 for cv in np.transpose(cvs)])

675 cv_std = np.sqrt(cv_std - cv_mean ** 2)

676 record = {'temperature': temperature,

677 'cv_mean': cv_mean,

678 'cv_std': cv_std}

679 averages.append(record)

680

681 return DataFrame.from_dict(averages)

Coverage for mchammer/data_containers/wang_landau_data_container.py: 99%

235 statements