Source code for icet.tools.structure_generation

import itertools
import random
from typing import List, Dict, Tuple, Union
import numpy as np
from mchammer.ensembles import TargetClusterVectorAnnealing
from mchammer.calculators import (TargetVectorCalculator,
                                  compare_cluster_vectors)
from icet.tools import (enumerate_structures,
                        enumerate_supercells)
from icet import ClusterSpace
from icet.input_output.logging_tools import logger
from ase import Atoms
from ase.data import chemical_symbols as periodic_table


[docs]def generate_target_structure_from_supercells(cluster_space: ClusterSpace, supercells: List[Atoms], target_concentrations: dict, target_cluster_vector: List[float], T_start: float = 5.0, T_stop: float = 0.001, n_steps: int = None, optimality_weight: float = 1.0, random_seed: int = None, tol: float = 1e-5) -> Atoms: """ Given a ``cluster_space`` and a ``target_cluster_vector`` and one or more ``supercells``, generate a structure that as closely as possible matches that cluster vector. Internally the function uses a simulated annealing algorithm and the difference between two cluster vectors is calculated with the measure suggested by A. van de Walle et al. in Calphad **42**, 13-18 (2013) [WalTiwJon13]_ (for more information, see :class:`mchammer.calculators.TargetVectorCalculator`). Parameters ---------- cluster_space a cluster space defining the lattice to be occupied supercells list of one or more supercells among which an optimal structure will be searched for target_concentrations concentration of each species in the target structure, per sublattice (for example ``{'Au': 0.5, 'Pd': 0.5}`` for a single sublattice Au-Pd structure, or ``{'A': {'Au': 0.5, 'Pd': 0.5}, 'B': {'H': 0.25, 'X': 0.75}}`` for a system with two sublattices. The symbols defining sublattices ('A', 'B' etc) can be found by printing the `cluster_space` target_cluster_vector cluster vector that the generated structure should match as closely as possible T_start artificial temperature at which the simulated annealing starts T_stop artifical temperature at which the simulated annealing stops n_steps total number of Monte Carlo steps in the simulation optimality_weight controls weighting :math:`L` of perfect correlations, see :class:`mchammer.calculators.TargetVectorCalculator` random_seed seed for the random number generator used in the Monte Carlo simulation tol Numerical tolerance """ target_concentrations = _validate_concentrations(target_concentrations, cluster_space) calculators = [] # Loop over all supercells and intialize # them with a "random" occupation of symbols that # fulfill the target concentrations valid_supercells = [] warning_issued = False for supercell in supercells: supercell_copy = supercell.copy() try: occupy_structure_randomly(supercell_copy, cluster_space, target_concentrations) except ValueError: if not warning_issued: logger.warning('At least one supercell was not commensurate with the specified ' 'target concentrations.') warning_issued = True continue valid_supercells.append(supercell_copy) calculators.append(TargetVectorCalculator(supercell_copy, cluster_space, target_cluster_vector, optimality_weight=optimality_weight, optimality_tol=tol)) if len(valid_supercells) == 0: raise ValueError('No supercells that may host the specified ' 'target_concentrations were supplied.') ens = TargetClusterVectorAnnealing(structure=valid_supercells, calculators=calculators, T_start=T_start, T_stop=T_stop, random_seed=random_seed) return ens.generate_structure(number_of_trial_steps=n_steps)
[docs]def generate_target_structure(cluster_space: ClusterSpace, max_size: int, target_concentrations: dict, target_cluster_vector: List[float], include_smaller_cells: bool = True, pbc: Union[Tuple[bool, bool, bool], Tuple[int, int, int]] = None, T_start: float = 5.0, T_stop: float = 0.001, n_steps: int = None, optimality_weight: float = 1.0, random_seed: int = None, tol: float = 1e-5) -> Atoms: """ Given a ``cluster_space`` and a ``target_cluster_vector``, generate a structure that as closely as possible matches that cluster vector. The search is performed among all inequivalent supercells shapes up to a certain size. Internally the function uses a simulated annealing algorithm and the difference between two cluster vectors is calculated with the measure suggested by A. van de Walle et al. in Calphad **42**, 13-18 (2013) [WalTiwJon13]_ (for more information, see :class:`mchammer.calculators.TargetVectorCalculator`). Parameters ---------- cluster_space a cluster space defining the lattice to be occupied max_size maximum supercell size target_concentrations concentration of each species in the target structure, per sublattice (for example ``{'Au': 0.5, 'Pd': 0.5}`` for a single sublattice Au-Pd structure, or ``{'A': {'Au': 0.5, 'Pd': 0.5}, 'B': {'H': 0.25, 'X': 0.75}}`` for a system with two sublattices. The symbols defining sublattices ('A', 'B' etc) can be found by printing the `cluster_space` target_cluster_vector cluster vector that the generated structure should match as closely as possible include_smaller_cells if True, search among all supercell sizes including ``max_size``, else search only among those exactly matching ``max_size`` pbc Periodic boundary conditions for each direction, e.g., ``(True, True, False)``. The axes are defined by the cell of ``cluster_space.primitive_structure``. Default is periodic boundary in all directions. T_start artificial temperature at which the simulated annealing starts T_stop artifical temperature at which the simulated annealing stops n_steps total number of Monte Carlo steps in the simulation optimality_weight controls weighting :math:`L` of perfect correlations, see :class:`mchammer.calculators.TargetVectorCalculator` random_seed seed for the random number generator used in the Monte Carlo simulation tol Numerical tolerance """ target_concentrations = _validate_concentrations(target_concentrations, cluster_space) if pbc is None: pbc = (True, True, True) prim = cluster_space.primitive_structure prim.set_pbc(pbc) supercells = [] if include_smaller_cells: sizes = list(range(1, max_size + 1)) else: sizes = [max_size] for size in sizes: # For efficiency, make a first check that the current size is # commensurate with all concentrations (example: {'Au': 0.5, # 'Pd': 0.5} would not be commensurate with a supercell with 3 # atoms). supercell = cluster_space.primitive_structure.repeat((size, 1, 1)) if not _concentrations_fit_structure(structure=supercell, cluster_space=cluster_space, concentrations=target_concentrations): continue # Loop over all inequivalent supercells and intialize # them with a "random" occupation of symbols that # fulfill the target concentrations for supercell in enumerate_supercells(prim, [size]): supercell.set_pbc(True) supercells.append(supercell) return generate_target_structure_from_supercells(cluster_space=cluster_space, supercells=supercells, target_concentrations=target_concentrations, target_cluster_vector=target_cluster_vector, T_start=T_start, T_stop=T_stop, n_steps=n_steps, optimality_weight=optimality_weight, random_seed=random_seed, tol=tol)
[docs]def generate_sqs_from_supercells(cluster_space: ClusterSpace, supercells: List[Atoms], target_concentrations: dict, T_start: float = 5.0, T_stop: float = 0.001, n_steps: int = None, optimality_weight: float = 1.0, random_seed: int = None, tol: float = 1e-5) -> Atoms: """ Given a ``cluster_space`` and one or more ``supercells``, generate a special quasirandom structure (SQS), i.e., a structure that for the provided supercells size provides the best possible approximation to a random alloy [ZunWeiFer90]_. In the present case, this means that the generated structure will have a cluster vector that as closely as possible matches the cluster vector of an infintely large randomly occupated supercell. Internally the function uses a simulated annealing algorithm and the difference between two cluster vectors is calculated with the measure suggested by A. van de Walle et al. in Calphad **42**, 13-18 (2013) [WalTiwJon13]_ (for more information, see :class:`mchammer.calculators.TargetVectorCalculator`). Parameters ---------- cluster_space a cluster space defining the lattice to be occupated supercells list of one or more supercells among which an optimal structure will be searched for target_concentrations concentration of each species in the target structure, per sublattice (for example ``{'Au': 0.5, 'Pd': 0.5}`` for a single sublattice Au-Pd structure, or ``{'A': {'Au': 0.5, 'Pd': 0.5}, 'B': {'H': 0.25, 'X': 0.75}}`` for a system with two sublattices. The symbols defining sublattices ('A', 'B' etc) can be found by printing the `cluster_space` T_start artificial temperature at which the simulated annealing starts T_stop artifical temperature at which the simulated annealing stops n_steps total number of Monte Carlo steps in the simulation optimality_weight controls weighting :math:`L` of perfect correlations, see :class:`mchammer.calculators.TargetVectorCalculator` random_seed seed for the random number generator used in the Monte Carlo simulation tol Numerical tolerance """ sqs_vector = _get_sqs_cluster_vector(cluster_space=cluster_space, target_concentrations=target_concentrations) return generate_target_structure_from_supercells(cluster_space=cluster_space, supercells=supercells, target_concentrations=target_concentrations, target_cluster_vector=sqs_vector, T_start=T_start, T_stop=T_stop, n_steps=n_steps, optimality_weight=optimality_weight, random_seed=random_seed, tol=tol)
[docs]def generate_sqs(cluster_space: ClusterSpace, max_size: int, target_concentrations: dict, include_smaller_cells: bool = True, pbc: Union[Tuple[bool, bool, bool], Tuple[int, int, int]] = None, T_start: float = 5.0, T_stop: float = 0.001, n_steps: int = None, optimality_weight: float = 1.0, random_seed: int = None, tol: float = 1e-5) -> Atoms: """ Given a ``cluster_space``, generate a special quasirandom structure (SQS), i.e., a structure that for a given supercell size provides the best possible approximation to a random alloy [ZunWeiFer90]_. In the present case, this means that the generated structure will have a cluster vector that as closely as possible matches the cluster vector of an infintely large randomly occupated supercell. Internally the function uses a simulated annealing algorithm and the difference between two cluster vectors is calculated with the measure suggested by A. van de Walle et al. in Calphad **42**, 13-18 (2013) [WalTiwJon13]_ (for more information, see :class:`mchammer.calculators.TargetVectorCalculator`). Parameters ---------- cluster_space a cluster space defining the lattice to be occupated max_size maximum supercell size target_concentrations concentration of each species in the target structure, per sublattice (for example ``{'Au': 0.5, 'Pd': 0.5}`` for a single sublattice Au-Pd structure, or ``{'A': {'Au': 0.5, 'Pd': 0.5}, 'B': {'H': 0.25, 'X': 0.75}}`` for a system with two sublattices. The symbols defining sublattices ('A', 'B' etc) can be found by printing the `cluster_space` include_smaller_cells if True, search among all supercell sizes including ``max_size``, else search only among those exactly matching ``max_size`` pbc Periodic boundary conditions for each direction, e.g., ``(True, True, False)``. The axes are defined by the cell of ``cluster_space.primitive_structure``. Default is periodic boundary in all directions. T_start artificial temperature at which the simulated annealing starts T_stop artifical temperature at which the simulated annealing stops n_steps total number of Monte Carlo steps in the simulation optimality_weight controls weighting :math:`L` of perfect correlations, see :class:`mchammer.calculators.TargetVectorCalculator` random_seed seed for the random number generator used in the Monte Carlo simulation tol Numerical tolerance """ sqs_vector = _get_sqs_cluster_vector(cluster_space=cluster_space, target_concentrations=target_concentrations) return generate_target_structure(cluster_space=cluster_space, max_size=max_size, target_concentrations=target_concentrations, target_cluster_vector=sqs_vector, include_smaller_cells=include_smaller_cells, pbc=pbc, T_start=T_start, T_stop=T_stop, n_steps=n_steps, optimality_weight=optimality_weight, random_seed=random_seed, tol=tol)
[docs]def generate_sqs_by_enumeration(cluster_space: ClusterSpace, max_size: int, target_concentrations: dict, include_smaller_cells: bool = True, pbc: Union[Tuple[bool, bool, bool], Tuple[int, int, int]] = None, optimality_weight: float = 1.0, tol: float = 1e-5) -> Atoms: """ Given a ``cluster_space``, generate a special quasirandom structure (SQS), i.e., a structure that for a given supercell size provides the best possible approximation to a random alloy [ZunWeiFer90]_. In the present case, this means that the generated structure will have a cluster vector that as closely as possible matches the cluster vector of an infintely large randomly occupied supercell. Internally the function uses a simulated annealing algorithm and the difference between two cluster vectors is calculated with the measure suggested by A. van de Walle et al. in Calphad **42**, 13-18 (2013) [WalTiwJon13]_ (for more information, see :class:`mchammer.calculators.TargetVectorCalculator`). This functions generates SQS cells by exhaustive enumeration, which means that the generated SQS cell is guaranteed to be optimal with regard to the specified measure and cell size. Parameters ---------- cluster_space a cluster space defining the lattice to be occupied max_size maximum supercell size target_concentrations concentration of each species in the target structure, per sublattice (for example ``{'Au': 0.5, 'Pd': 0.5}`` for a single sublattice Au-Pd structure, or ``{'A': {'Au': 0.5, 'Pd': 0.5}, 'B': {'H': 0.25, 'X': 0.75}}`` for a system with two sublattices. The symbols defining sublattices ('A', 'B' etc) can be found by printing the `cluster_space` include_smaller_cells if True, search among all supercell sizes including ``max_size``, else search only among those exactly matching ``max_size`` pbc Periodic boundary conditions for each direction, e.g., ``(True, True, False)``. The axes are defined by the cell of ``cluster_space.primitive_structure``. Default is periodic boundary in all directions. optimality_weight controls weighting :math:`L` of perfect correlations, see :class:`mchammer.calculators.TargetVectorCalculator` tol Numerical tolerance """ target_concentrations = _validate_concentrations(target_concentrations, cluster_space) sqs_vector = _get_sqs_cluster_vector(cluster_space=cluster_space, target_concentrations=target_concentrations) # Translate concentrations to the format required for concentration # restricted enumeration cr = {} # type: Dict[str, tuple] sublattices = cluster_space.get_sublattices(cluster_space.primitive_structure) for sl in sublattices: mult_factor = len(sl.indices) / len(cluster_space.primitive_structure) if sl.symbol in target_concentrations: sl_conc = target_concentrations[sl.symbol] else: sl_conc = {sl.chemical_symbols[0]: 1.0} for species, value in sl_conc.items(): c = value * mult_factor if species in cr: cr[species] = (cr[species][0] + c, cr[species][1] + c) else: cr[species] = (c, c) # Check to be sure... c_sum = sum(c[0] for c in cr.values()) assert abs(c_sum - 1) < tol # Should never happen, but... orbit_data = cluster_space.orbit_data best_score = 1e9 if include_smaller_cells: sizes = list(range(1, max_size + 1)) else: sizes = [max_size] # Prepare primitive structure with the right boundary conditions prim = cluster_space.primitive_structure if pbc is None: pbc = (True, True, True) prim.set_pbc(pbc) # Enumerate and calculate score for each structuer for structure in enumerate_structures(prim, sizes, cluster_space.chemical_symbols, concentration_restrictions=cr): cv = cluster_space.get_cluster_vector(structure) score = compare_cluster_vectors(cv_1=cv, cv_2=sqs_vector, orbit_data=orbit_data, optimality_weight=optimality_weight, tol=tol) if score < best_score: best_score = score best_structure = structure return best_structure
[docs]def occupy_structure_randomly(structure: Atoms, cluster_space: ClusterSpace, target_concentrations: dict) -> None: """ Occupy a structure with quasirandom order but fulfilling ``target_concentrations``. Parameters ---------- structure ASE Atoms object that will be occupied randomly cluster_space cluster space (needed as it carries information about sublattices) target_concentrations concentration of each species in the target structure, per sublattice (for example ``{'Au': 0.5, 'Pd': 0.5}`` for a single sublattice Au-Pd structure, or ``{'A': {'Au': 0.5, 'Pd': 0.5}, 'B': {'H': 0.25, 'X': 0.75}}`` for a system with two sublattices. The symbols defining sublattices ('A', 'B' etc) can be found by printing the `cluster_space` """ target_concentrations = _validate_concentrations(cluster_space=cluster_space, concentrations=target_concentrations) if not _concentrations_fit_structure(structure, cluster_space, target_concentrations): raise ValueError('Structure with {} atoms cannot accomodate ' 'target concentrations {}'.format(len(structure), target_concentrations)) symbols_all = [''] * len(structure) for sl in cluster_space.get_sublattices(structure): symbols = [] # type: List[str] # chemical_symbols in one sublattice chemical_symbols = sl.chemical_symbols if len(chemical_symbols) == 1: symbols += [chemical_symbols[0]] * len(sl.indices) else: sl_conc = target_concentrations[sl.symbol] for chemical_symbol in sl.chemical_symbols: n_symbol = int(round(len(sl.indices) * sl_conc[chemical_symbol])) symbols += [chemical_symbol] * n_symbol # Should not happen but you never know assert len(symbols) == len(sl.indices) # Shuffle to introduce randomness random.shuffle(symbols) # Assign symbols to the right indices for symbol, lattice_site in zip(symbols, sl.indices): symbols_all[lattice_site] = symbol assert symbols_all.count('') == 0 structure.set_chemical_symbols(symbols_all)
def _validate_concentrations(concentrations: dict, cluster_space: ClusterSpace, tol: float = 1e-5) -> dict: """ Validates concentration specification against a cluster space (raises `ValueError` if they do not match). Parameters ---------- concentrations concentration specification cluster_space cluster space to check against tol Numerical tolerance Returns ------- target concentrations An adapted version of concentrations, which is always a dictionary even if there is only one sublattice """ sls = cluster_space.get_sublattices(cluster_space.primitive_structure) if not isinstance(list(concentrations.values())[0], dict): concentrations = {'A': concentrations} # Ensure concentrations sum to 1 at each sublattice for sl_conc in concentrations.values(): conc_sum = sum(list(sl_conc.values())) if abs(conc_sum - 1.0) > tol: raise ValueError('Concentrations must sum up ' 'to 1 for each sublattice (not {})'.format(conc_sum)) # Symbols need to match on each sublattice for sl in sls: if sl.symbol not in concentrations: if len(sl.chemical_symbols) > 1: raise ValueError('A sublattice ({}: {}) is missing in ' 'target_concentrations'.format(sl.symbol, list(sl.chemical_symbols))) else: sl_conc = concentrations[sl.symbol] if tuple(sorted(sl.chemical_symbols)) != tuple(sorted(list(sl_conc.keys()))): raise ValueError('Chemical symbols on a sublattice ({}: {}) are ' 'not the same as those in the specified ' 'concentrations {}'.format(sl.symbol, list(sl.chemical_symbols), list(sl_conc.keys()))) return concentrations def _concentrations_fit_structure(structure: Atoms, cluster_space: ClusterSpace, concentrations: Dict[str, Dict[str, float]], tol: float = 1e-5) -> bool: """ Check if specified concentrations are commensurate with a certain supercell (including sublattices) Parameters ---------- structure atomic configuration to be checked cluster_space corresponding cluster space concentrations which concentrations, per sublattice, e.g., ``{'A': {'Ag': 0.3, 'Au': 0.7}}`` tol numerical tolerance """ # Check that concentrations are OK in each sublattice for sublattice in cluster_space.get_sublattices(structure): if sublattice.symbol in concentrations: sl_conc = concentrations[sublattice.symbol] for conc in sl_conc.values(): n_symbol = conc * len(sublattice.indices) if abs(int(round(n_symbol)) - n_symbol) > tol: return False return True def _get_sqs_cluster_vector(cluster_space: ClusterSpace, target_concentrations: Dict[str, Dict[str, float]]) -> np.ndarray: """ Get the SQS vector for a certain cluster space and certain concentration. Here SQS vector refers to the cluster vector of an infintely large supercell with random occupation. Parameters ---------- cluster_space the kind of lattice to be occupied target_concentrations concentration of each species in the target structure, per sublattice (for example `{'A': {'Ag': 0.5, 'Pd': 0.5}}`) """ target_concentrations = _validate_concentrations(concentrations=target_concentrations, cluster_space=cluster_space) sublattice_to_index = {letter: index for index, letter in enumerate('ABCDEFGHIJKLMNOPQRSTUVWXYZ')} all_sublattices = cluster_space.get_sublattices( cluster_space.primitive_structure) # Make a map from chemical symbol to integer, later on used # for evaluating cluster functions. # Internally, icet sorts species according to atomic numbers. # Also check that each symbol only occurs in one sublattice. symbol_to_integer_map = {} found_species = [] # type: List[str] for sublattice in all_sublattices: if len(sublattice.chemical_symbols) < 2: continue atomic_numbers = [periodic_table.index(sym) for sym in sublattice.chemical_symbols] for i, species in enumerate(sorted(atomic_numbers)): found_species.append(species) symbol_to_integer_map[periodic_table[species]] = i # Target concentrations refer to all atoms, but probabilities only # to the sublattice. probabilities = {} for sl_conc in target_concentrations.values(): if len(sl_conc) == 1: continue for symbol in sl_conc.keys(): probabilities[symbol] = sl_conc[symbol] # For every orbit, calculate average cluster function cv = [1.0] for orbit in cluster_space.orbit_data: if orbit['order'] < 1: continue # What sublattices are there in this orbit? sublattices = [all_sublattices[sublattice_to_index[letter]] for letter in orbit['sublattices'].split('-')] # What chemical symbols do these sublattices refer to? symbol_groups = [sublattice.chemical_symbols for sublattice in sublattices] # How many allowed species in each of those sublattices? nbr_of_allowed_species = [len(symbol_group) for symbol_group in symbol_groups] # Calculate contribution from every possible combination of # symbols weighted with their probability cluster_product_average = 0 for symbols in itertools.product(*symbol_groups): cluster_product = 1 for i, symbol in enumerate(symbols): mc_vector_component = orbit['multi_component_vector'][i] species_i = symbol_to_integer_map[symbol] prod = cluster_space.evaluate_cluster_function(nbr_of_allowed_species[i], mc_vector_component, species_i) cluster_product *= probabilities[symbol] * prod cluster_product_average += cluster_product cv.append(cluster_product_average) return np.array(cv)