Coverage for icet/tools/training_set_generation.py: 89%
56 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-12-26 04:12 +0000
« prev ^ index » next coverage.py v7.5.0, created at 2024-12-26 04:12 +0000
1from typing import Callable, List, Tuple, Union
3import numpy as np
4from ase import Atoms
5from icet import ClusterSpace, StructureContainer
6from icet.input_output.logging_tools import logger
7from mchammer.ensembles.canonical_annealing import available_cooling_functions
10logger = logger.getChild('training_set_generation')
13def _get_fit_matrix(structure_container: StructureContainer,
14 new_inds: np.ndarray,
15 n_base_structures: int) -> np.ndarray:
16 """
17 Get the current fit matrix.
19 Parameters
20 ----------
21 structure_container
22 A structure container.
23 new_inds
24 The part of the structure container that contains the
25 new structures to be added.
26 n_base_structures
27 Number of structures in the base pool.
28 """
29 base_inds = np.array(range(n_base_structures), dtype=int)
30 inds = np.append(base_inds, n_base_structures + new_inds)
31 fit_matrix, _ = structure_container.get_fit_data(inds)
32 return fit_matrix
35def _do_swap(inds: np.ndarray,
36 n_structures_to_add: int,
37 n_mcmc_structures: int) -> np.ndarray:
38 """
39 Update indices to be used as training data.
41 Parameters
42 ----------
43 inds
44 The current indicies that are used.
45 n_structures_to_add
46 Total number of structures to add to the current base structures.
47 n_mcmc_structures
48 The size of the pool of potential candidate structures.
49 """
50 # Get index to swap out
51 _inds = inds.copy()
52 swap_out = np.random.choice(range(inds.size))
54 # Get index from the pool that are not currently in inds
55 inds_pool = np.array([range(n_mcmc_structures)])
56 inds_pool = np.setdiff1d(inds_pool, inds, assume_unique=True)
58 # Get index of structure to swap in
59 swap_in = np.random.choice(inds_pool)
61 # Do the swap
62 _inds[swap_out] = swap_in
63 return _inds
66def structure_selection_annealing(
67 cluster_space: ClusterSpace,
68 monte_carlo_structures: List[Atoms],
69 n_structures_to_add: int,
70 n_steps: int,
71 base_structures: List[Atoms] = None,
72 cooling_start: float = 5,
73 cooling_stop: float = 0.001,
74 cooling_function: Union[str, Callable] = 'exponential',
75 initial_indices: List[int] = None) \
76 -> Tuple[List[int], List[float]]:
77 """Given a cluster space, a base pool of structures, and a new pool
78 of structures, this function uses a Monte Carlo inspired annealing
79 method to find a good structure pool for training.
81 Returns
82 -------
83 A tuple comprising the indices of the optimal structures in
84 the :attr:`monte_carlo_structures` pool and a list of accepted
85 metric values.
87 Parameters
88 ----------
89 cluster_space
90 A cluster space defining the lattice to be occupied.
91 monte_carlo_structures
92 A list of candidate training structures.
93 n_structures_to_add
94 How many of the structures in the :attr:`monte_carlo_structures`
95 pool that should be kept for training.
96 n_steps
97 Number of steps in the annealing algorithm.
98 base_structures
99 A list of structures that is already in your training pool;
100 can be ``None`` if you do not have any structures yet.
101 cooling_start
102 Initial value of the :attr:`cooling_function`.
103 cooling_stop
104 Last value of the :attr:`cooling_function`.
105 cooling_function
106 Artificial number that rescales the difference between the
107 metric value between two iterations. Available options are
108 ``'linear'`` and ``'exponential'``.
109 initial_indices
110 Picks out the starting structure from the
111 :attr:`monte_carlo_structures` pool. Can be used if you want
112 to continue from an old run for example.
114 Example
115 -------
117 The following snippet demonstrates the use of this function for
118 generating an optimized structure pool. Here, we first set up a
119 pool of candidate structures by randomly occupying a FCC supercell
120 with Au and Pd::
122 >>> from ase.build import bulk
123 >>> from icet.tools.structure_generation import occupy_structure_randomly
125 >>> prim = bulk('Au', a=4.0)
126 >>> cs = ClusterSpace(prim, [6.0], [['Au', 'Pd']])
127 >>> structure_pool = []
128 >>> for _ in range(500):
129 >>> # Create random supercell.
130 >>> supercell = np.random.randint(1, 4, size=3)
131 >>> structure = prim.repeat(supercell)
132 >>>
133 >>> # Randomize concentrations in the supercell
134 >>> n_atoms = len(structure)
135 >>> n_Au = np.random.randint(0, n_atoms)
136 >>> n_Pd = n_atoms - n_Au
137 >>> concentration = {'Au': n_Au / n_atoms, 'Pd': n_Pd / n_atoms}
138 >>>
139 >>> # Occupy the structure randomly and store it.
140 >>> occupy_structure_randomly(structure, cs, concentration)
141 >>> structure_pool.append(structure)
142 >>> start_inds = [f for f in range(10)]
144 Now we can use the :func:`structure_selection_annealing` function to find an
145 optimized structure pool::
147 >>> inds, cond = structure_selection_annealing(cs,
148 >>> structure_pool,
149 >>> n_structures_to_add=10,
150 >>> n_steps=100)
151 >>> training_structures = [structure_pool[ind] for ind in inds]
152 >>> print(training_structures)
154 """
155 if base_structures is None:
156 base_structures = []
158 # set up cooling function
159 if isinstance(cooling_function, str): 159 ↛ 164line 159 didn't jump to line 164, because the condition on line 159 was never false
160 available = sorted(available_cooling_functions.keys())
161 if cooling_function not in available: 161 ↛ 162line 161 didn't jump to line 162, because the condition on line 161 was never true
162 raise ValueError(f'Select from the available cooling_functions: {available}')
163 _cooling_function = available_cooling_functions[cooling_function]
164 elif callable(cooling_function):
165 _cooling_function = cooling_function
166 else:
167 raise TypeError('cooling_function must be either str or a function')
169 # set up cluster vectors
170 structure_container = StructureContainer(cluster_space)
171 for structure in base_structures:
172 structure_container.add_structure(structure, properties={'energy': 0})
173 for structure in monte_carlo_structures:
174 structure_container.add_structure(structure, properties={'energy': 0})
176 # get number of structures in monte_carlo_structures
177 n_mcmc_structures = len(monte_carlo_structures)
178 n_base_structures = len(base_structures)
180 # randomly chose starting structure unless user want specific indices
181 if not initial_indices:
182 inds = np.random.choice(range(len(monte_carlo_structures)), size=n_structures_to_add,
183 replace=False)
184 else:
185 inds = np.array(initial_indices)
187 # get initial fitting_matrix, A in Ax = y
188 fit_matrix = _get_fit_matrix(structure_container, inds, n_base_structures)
190 # get metric of fitting_matrix
191 cond = np.linalg.cond(fit_matrix)
192 cond_traj = [cond]
193 for n in range(n_steps):
194 # get current artificial cooling
195 T = _cooling_function(n, cooling_start, cooling_stop, n_steps)
197 # do swaps from pool
198 new_inds = _do_swap(inds, n_structures_to_add, n_mcmc_structures)
199 new_fit_matrix = _get_fit_matrix(structure_container, new_inds, n_base_structures)
201 # get new metric
202 cond_new = np.linalg.cond(new_fit_matrix)
203 if (cond - cond_new) / T > np.log(np.random.uniform()):
204 # if accepted update data
205 cond = cond_new
206 inds = new_inds
207 cond_traj.append(cond)
209 if n % 100 == 0:
210 logger.info(f'step {n:6d}, T {T:8.5f} , current condition number {cond:8.5f}')
212 return inds, cond_traj