# Source code for icet.tools.structure_generation

```
import itertools
import random
from typing import List
import numpy as np
from mchammer.ensembles import TargetClusterVectorAnnealing
from mchammer.calculators import (TargetVectorCalculator,
compare_cluster_vectors)
from icet.tools import (enumerate_structures,
enumerate_supercells)
from icet import ClusterSpace
from ase import Atoms
from ase.data import chemical_symbols as periodic_table
[docs]def generate_target_structure(cluster_space: ClusterSpace,
max_size: int,
target_concentrations: dict,
target_cluster_vector: List[float],
include_smaller_cells: bool = True,
T_start: float = 5.0,
T_stop: float = 0.001,
n_steps: float = None,
optimality_weight: float = 1.0,
random_seed: int = None,
tol: float = 1e-5) -> Atoms:
"""
Given a ``cluster_space`` and a ``target_cluster_vector``, generate
a structure that as closely as possible matches that cluster vector.
The search is performed among all inequivalent supercells shapes up
to a certain size.
Internally the function uses a simulated annealing algorithm and the
difference between two cluster vectors is calculated with the
measure suggested by A. van de Walle et al. in Calphad **42**, 13-18
(2013) [WalTiwJon13]_ (for more information, see
:class:`mchammer.calculators.TargetVectorCalculator`).
Parameters
----------
cluster_space
a cluster space defining the lattice to be occupied
max_size
maximum supercell size
target_concentrations
concentration of each species in the target structure (for
example ``{'Ag': 0.5, 'Pd': 0.5}``
Concentrations are always expressed with respect to all atoms in
the supercell, which implies that the sum of all concentrations
should always be 1. In the case of multiple sublattices, a valid
specification would thus be ``{'Au': 0.25, 'Pd': 0.25, 'H': 0.1,
'V': 0.4}``.
target_cluster_vector
cluster vector that the generated structure should match as
closely as possible
include_smaller_cells
if True, search among all supercell sizes including
``max_size``, else search only among those exactly matching
``max_size``
T_start
artificial temperature at which the simulated annealing starts
T_stop
artifical temperature at which the simulated annealing stops
n_steps
total number of Monte Carlo steps in the simulation
optimality_weight
controls weighting :math:`L` of perfect correlations, see
:class:`mchammer.calculators.TargetVectorCalculator`
random_seed
seed for the random number generator used in the
Monte Carlo simulation
tol
Numerical tolerance
"""
_validate_concentrations(target_concentrations, cluster_space)
supercells = []
calculators = []
if include_smaller_cells:
sizes = range(1, max_size + 1)
else:
sizes = [max_size]
for size in sizes:
# For efficiency, make a first check that the current size is
# commensurate with all concentrations (example: {'Au': 0.5,
# 'Pd': 0.5} would not be commensurate with a supercell with 3
# atoms). This step does *not* ensure that the concentrations
# are commensurate with all sublattices (that is checked
# internally later on).
natoms = size * len(cluster_space.primitive_structure)
if not _concentrations_fit_atom_count(natoms, target_concentrations):
continue
# Loop over all inequivalent supercells and intialize
# them with a "random" occupation of symbols that
# fulfill the target concentrations
for supercell in enumerate_supercells(cluster_space.primitive_structure,
[size]):
occupy_structure_randomly(supercell, cluster_space,
target_concentrations)
supercells.append(supercell)
calculators.append(TargetVectorCalculator(supercell, cluster_space,
target_cluster_vector,
optimality_weight=optimality_weight,
optimality_tol=tol))
ens = TargetClusterVectorAnnealing(structure=supercells, calculators=calculators,
T_start=T_start, T_stop=T_stop,
random_seed=random_seed)
return ens.generate_structure(number_of_trial_steps=n_steps)
[docs]def generate_sqs(cluster_space: ClusterSpace,
max_size: int,
target_concentrations: dict,
include_smaller_cells: bool = True,
T_start: float = 5.0,
T_stop: float = 0.001,
n_steps: float = None,
optimality_weight: float = 1.0,
random_seed: int = None,
tol: float = 1e-5) -> Atoms:
"""
Given a ``cluster_space``, generate a special quasirandom structure
(SQS), i.e., a structure that for a given supercell size provides
the best possible approximation to a random alloy [ZunWeiFer90]_.
In the present case, this means that the generated structure will
have a cluster vector that as closely as possible matches the
cluster vector of an infintely large randomly occupated supercell.
Internally the function uses a simulated annealing algorithm and the
difference between two cluster vectors is calculated with the
measure suggested by A. van de Walle et al. in Calphad **42**, 13-18
(2013) [WalTiwJon13]_ (for more information, see
:class:`mchammer.calculators.TargetVectorCalculator`).
Parameters
----------
cluster_space
a cluster space defining the lattice to be occupated
max_size
maximum supercell size
target_concentrations
concentration of each species in the target structure (for
example ``{'Ag': 0.5, 'Pd': 0.5}``
Concentrations are always expressed with respect to all atoms in
the supercell, which implies that the sum of all concentrations
should always be 1. In the case of multiple sublattices, a valid
specification would thus be ``{'Au': 0.25, 'Pd': 0.25, 'H': 0.1,
'V': 0.4}``.
include_smaller_cells
if True, search among all supercell sizes including
``max_size``, else search only among those exactly matching
``max_size``
T_start
artificial temperature at which the simulated annealing starts
T_stop
artifical temperature at which the simulated annealing stops
n_steps
total number of Monte Carlo steps in the simulation
optimality_weight
controls weighting :math:`L` of perfect correlations, see
:class:`mchammer.calculators.TargetVectorCalculator`
random_seed
seed for the random number generator used in the
Monte Carlo simulation
tol
Numerical tolerance
"""
sqs_vector = _get_sqs_cluster_vector(cluster_space=cluster_space,
target_concentrations=target_concentrations)
return generate_target_structure(cluster_space=cluster_space,
max_size=max_size,
target_concentrations=target_concentrations,
target_cluster_vector=sqs_vector,
include_smaller_cells=include_smaller_cells,
T_start=T_start, T_stop=T_stop,
n_steps=n_steps,
optimality_weight=optimality_weight,
random_seed=random_seed,
tol=tol)
[docs]def generate_sqs_by_enumeration(cluster_space: ClusterSpace,
max_size: int,
target_concentrations: dict,
include_smaller_cells: bool = True,
optimality_weight: float = 1.0,
tol: float = 1e-5) -> Atoms:
"""
Given a ``cluster_space``, generate a special quasirandom structure
(SQS), i.e., a structure that for a given supercell size provides
the best possible approximation to a random alloy [ZunWeiFer90]_.
In the present case, this means that the generated structure will
have a cluster vector that as closely as possible matches the
cluster vector of an infintely large randomly occupied supercell.
Internally the function uses a simulated annealing algorithm and the
difference between two cluster vectors is calculated with the
measure suggested by A. van de Walle et al. in Calphad **42**, 13-18
(2013) [WalTiwJon13]_ (for more information, see
:class:`mchammer.calculators.TargetVectorCalculator`).
This functions generates SQS cells by exhaustive enumeration, which
means that the generated SQS cell is guaranteed to be optimal with
regard to the specified measure and cell size.
Parameters
----------
cluster_space
a cluster space defining the lattice to be occupied
max_size
maximum supercell size
target_concentrations
concentration of each species in the target structure (for
example ``{'Ag': 0.5, 'Pd': 0.5}``
Concentrations are always expressed with respect to all atoms in
the supercell, which implies that the sum of all concentrations
should always be 1. In the case of multiple sublattices, a valid
specification would thus be ``{'Au': 0.25, 'Pd': 0.25, 'H': 0.1,
'V': 0.4}``.
include_smaller_cells
if True, search among all supercell sizes including
``max_size``, else search only among those exactly matching
``max_size``
optimality_weight
controls weighting :math:`L` of perfect correlations, see
:class:`mchammer.calculators.TargetVectorCalculator`
tol
Numerical tolerance
"""
_validate_concentrations(target_concentrations, cluster_space)
sqs_vector = _get_sqs_cluster_vector(cluster_space=cluster_space,
target_concentrations=target_concentrations)
# Translate concentrations to the format required for concentration
# restricted enumeration
cr = {key: (value, value) for key, value in target_concentrations.items()}
orbit_data = cluster_space.orbit_data
best_score = 1e9
if include_smaller_cells:
sizes = range(1, max_size + 1)
else:
sizes = [max_size]
# Enumerate and calculate score for each structuer
for structure in enumerate_structures(cluster_space.primitive_structure,
sizes,
cluster_space.chemical_symbols,
concentration_restrictions=cr):
cv = cluster_space.get_cluster_vector(structure)
score = compare_cluster_vectors(cv_1=cv, cv_2=sqs_vector,
orbit_data=orbit_data,
optimality_weight=optimality_weight,
tol=tol)
if score < best_score:
best_score = score
best_structure = structure
return best_structure
[docs]def occupy_structure_randomly(structure: Atoms, cluster_space: ClusterSpace,
target_concentrations: dict):
"""
Occupy a structure with quasirandom order but fulfilling
``target_concentrations``.
Parameters
----------
structure
ASE Atoms object that will be occupied randomly
cluster_space
cluster space (needed as it carries information about sublattices)
target_concentrations
concentration of each species in the target structure (for
example ``{'Ag': 0.5, 'Pd': 0.5}``
Concentrations are always expressed with respect to all atoms in
the supercell, which implies that the sum of all concentrations
should always be 1. In the case of multiple sublattices, a valid
specification would thus be ``{'Au': 0.25, 'Pd': 0.25, 'H': 0.1,
'V': 0.4}``.
"""
if not _concentrations_fit_structure(structure, cluster_space, target_concentrations):
raise ValueError('Structure with {} atoms cannot accomodate '
'target concentrations {}'.format(len(structure),
target_concentrations))
# symbols_all will hold chemical_symbols of all sublattices
symbols_all = [''] * len(structure)
for sublattice in cluster_space.get_sublattices(structure):
symbols = [] # chemical_symbols in one sublattice
for chemical_symbol in sublattice.chemical_symbols:
n_symbol = int(round(len(structure) * target_concentrations[chemical_symbol]))
symbols += [chemical_symbol] * n_symbol
# Should not happen but you never know
assert len(symbols) == len(sublattice.indices)
# Shuffle to introduce randomness
random.shuffle(symbols)
# Assign symbols to the right indices
for symbol, lattice_site in zip(symbols, sublattice.indices):
symbols_all[lattice_site] = symbol
assert symbols_all.count('') == 0
structure.set_chemical_symbols(symbols_all)
def _validate_concentrations(concentrations: dict,
cluster_space: ClusterSpace,
tol: float = 1e-5):
"""
Validates concentration specification against a cluster space
(raises `ValueError` if they do not match).
Parameters
----------
concentrations
concentration specification
cluster_space
cluster space to check against
tol
Numerical tolerance
"""
# Concentrations should sum to 1
conc_sum = sum(list(concentrations.values()))
if abs(conc_sum - 1.0) > tol:
raise ValueError('Concentrations must sum up '
'to 1 (not {})'.format(conc_sum))
# Symbols need to match
# Flatten chemical symbols from cluster space
chemical_symbols = set(sym for syms in cluster_space.chemical_symbols for sym in syms)
if chemical_symbols != set(concentrations.keys()):
raise ValueError('Chemical symbols in cluster space ({}) are '
'not the same as those in the specified '
'concentrations ({})'.format(set(chemical_symbols),
set(concentrations.keys())))
# Concentrations for each sublattice need to sum up to
# its proportion of the total structure
for sublattice in cluster_space.get_sublattices(cluster_space.primitive_structure):
part = len(sublattice.indices) / len(cluster_space.primitive_structure)
conc = sum([concentrations[sym]
for sym in sublattice.chemical_symbols])
if abs(conc - part) > tol:
raise ValueError('Concentrations must be specified such that '
'concentrations per element and per sublattice '
'match, sublattice with species {} contains a '
'fraction {:.2f} of all atoms, but the sum of '
'the concentrations of those symbols is '
'{:.2f}.'.format(sublattice.chemical_symbols,
part,
conc))
def _concentrations_fit_atom_count(atom_count: int,
concentrations: dict,
tol: float = 1e-5) -> bool:
"""
Check if specified concentrations are commensurate with a
certain number of atoms.
Parameters
----------
atom_count
how many atoms?
concentrations
which concentrations, e.g., ``{'Ag': 0.3, 'Au': 0.7}``
tol
numerical tolerance
"""
for conc in concentrations.values():
n_symbol = conc * atom_count
if abs(int(round(n_symbol)) - n_symbol) > tol:
return False
return True
def _concentrations_fit_structure(structure: Atoms,
cluster_space: ClusterSpace,
concentrations: dict,
tol: float = 1e-5) -> bool:
"""
Check if specified concentrations are commensurate with a
certain supercell (including sublattices)
Parameters
----------
structure
atomic configuration to be checked
cluster_space
corresponding cluster space
concentrations
which concentrations, e.g., ``{'Ag': 0.3, 'Au': 0.7}``
tol
numerical tolerance
"""
# Check that concentrations are OK in each sublattice
for sublattice in cluster_space.get_sublattices(structure):
# Since concentrations are specified relative to all atoms,
# we must rescale when we check against the size of a sublattice
scaling = len(structure) / len(sublattice.indices)
concentrations_sub = {elem: scaling * concentrations[elem]
for elem in sublattice.chemical_symbols}
if not _concentrations_fit_atom_count(len(sublattice.indices),
concentrations_sub,
tol=tol):
return False
return True
def _get_sqs_cluster_vector(cluster_space: ClusterSpace,
target_concentrations: dict) -> np.ndarray:
"""
Get the SQS vector for a certain cluster space and certain
concentration. Here SQS vector refers to the cluster vector of an
infintely large supercell with random occupation.
Parameters
----------
cluster_space
the kind of lattice to be occupied
target_concentrations
concentration of each species in the target structure (for
example `{'Ag': 0.5, 'Pd': 0.5}`)
Concentrations are always expressed with respect to all atoms in
the supercell, which implies that the sum of all concentrations
should always be 1. In the case of multiple sublattices, a valid
specification would thus be `{'Au': 0.25, 'Pd': 0.25, 'H': 0.1,
'V': 0.4}`.
"""
_validate_concentrations(target_concentrations, cluster_space)
sublattice_to_index = {letter: index for index,
letter in enumerate('ABCDEFGHIJKLMNOPQRSTVUWWXYZ')}
all_sublattices = cluster_space.get_sublattices(
cluster_space.primitive_structure)
# Make a map from chemical symbol to integer, later on used
# for evaluating cluster functions.
# Internally, icet sorts species according to atomic numbers.
# Also check that each symbol only occurs in one sublattice.
symbol_to_integer_map = {}
found_species = []
for sublattice in all_sublattices:
syms = sublattice.chemical_symbols
atomic_numbers = [periodic_table.index(sym) for sym in syms]
for i, species in enumerate(sorted(atomic_numbers)):
if species in found_species:
raise ValueError('Each chemical symbol can only occur on one '
'sublattice, {} occurred more than '
'once.'.format(periodic_table[species]))
found_species.append(species)
symbol_to_integer_map[periodic_table[species]] = i
# Target concentrations refer to all atoms, but probabilities only
# to the sublattice.
probabilities = {}
for sublattice in all_sublattices:
mult_factor = len(cluster_space.primitive_structure) / \
len(sublattice.indices)
for symbol in sublattice.chemical_symbols:
probabilities[symbol] = target_concentrations[symbol] * mult_factor
# For every orbit, calculate average cluster function
cv = [1.0]
for orbit in cluster_space.orbit_data:
if orbit['order'] < 1:
continue
# What sublattices are there in this orbit?
sublattices = [all_sublattices[sublattice_to_index[letter]]
for letter in orbit['sublattices'].split('-')]
# What chemical symbols do these sublattices refer to?
symbol_groups = [
sublattice.chemical_symbols for sublattice in sublattices]
# How many allowed species in each of those sublattices?
nbr_of_allowed_species = [len(symbol_group)
for symbol_group in symbol_groups]
# Calculate contribtion from every possible combination of
# symbols weighted with their probability
cluster_product_average = 0
for symbols in itertools.product(*symbol_groups):
cluster_product = 1
for i, symbol in enumerate(symbols):
mc_vector_component = orbit['multi_component_vector'][i]
species_i = symbol_to_integer_map[symbol]
prod = cluster_space.evaluate_cluster_function(nbr_of_allowed_species[i],
mc_vector_component,
species_i)
cluster_product *= probabilities[symbol] * prod
cluster_product_average += cluster_product
cv.append(cluster_product_average)
return np.array(cv)
```