Coverage for icet/tools/training_set_generation.py: 89%

56 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-05-06 04:14 +0000

1from typing import Callable, List, Tuple, Union 

2 

3import numpy as np 

4from ase import Atoms 

5from icet import ClusterSpace, StructureContainer 

6from icet.input_output.logging_tools import logger 

7from mchammer.ensembles.canonical_annealing import available_cooling_functions 

8 

9 

10logger = logger.getChild('training_set_generation') 

11 

12 

13def _get_fit_matrix(structure_container: StructureContainer, 

14 new_inds: np.ndarray, 

15 n_base_structures: int) -> np.ndarray: 

16 """ 

17 Get the current fit matrix. 

18 

19 Parameters 

20 ---------- 

21 structure_container 

22 A structure container. 

23 new_inds 

24 The part of the structure container that contains the 

25 new structures to be added. 

26 n_base_structures 

27 Number of structures in the base pool. 

28 """ 

29 base_inds = np.array(range(n_base_structures), dtype=int) 

30 inds = np.append(base_inds, n_base_structures + new_inds) 

31 fit_matrix, _ = structure_container.get_fit_data(inds) 

32 return fit_matrix 

33 

34 

35def _do_swap(inds: np.ndarray, 

36 n_structures_to_add: int, 

37 n_mcmc_structures: int) -> np.ndarray: 

38 """ 

39 Update indices to be used as training data. 

40 

41 Parameters 

42 ---------- 

43 inds 

44 The current indicies that are used. 

45 n_structures_to_add 

46 Total number of structures to add to the current base structures. 

47 n_mcmc_structures 

48 The size of the pool of potential candidate structures. 

49 """ 

50 # Get index to swap out 

51 _inds = inds.copy() 

52 swap_out = np.random.choice(range(inds.size)) 

53 

54 # Get index from the pool that are not currently in inds 

55 inds_pool = np.array([range(n_mcmc_structures)]) 

56 inds_pool = np.setdiff1d(inds_pool, inds, assume_unique=True) 

57 

58 # Get index of structure to swap in 

59 swap_in = np.random.choice(inds_pool) 

60 

61 # Do the swap 

62 _inds[swap_out] = swap_in 

63 return _inds 

64 

65 

66def structure_selection_annealing( 

67 cluster_space: ClusterSpace, 

68 monte_carlo_structures: List[Atoms], 

69 n_structures_to_add: int, 

70 n_steps: int, 

71 base_structures: List[Atoms] = None, 

72 cooling_start: float = 5, 

73 cooling_stop: float = 0.001, 

74 cooling_function: Union[str, Callable] = 'exponential', 

75 initial_indices: List[int] = None) \ 

76 -> Tuple[List[int], List[float]]: 

77 """Given a cluster space, a base pool of structures, and a new pool 

78 of structures, this function uses a Monte Carlo inspired annealing 

79 method to find a good structure pool for training. 

80 

81 Returns 

82 ------- 

83 A tuple comprising the indices of the optimal structures in 

84 the :attr:`monte_carlo_structures` pool and a list of accepted 

85 metric values. 

86 

87 Parameters 

88 ---------- 

89 cluster_space 

90 A cluster space defining the lattice to be occupied. 

91 monte_carlo_structures 

92 A list of candidate training structures. 

93 n_structures_to_add 

94 How many of the structures in the :attr:`monte_carlo_structures` 

95 pool that should be kept for training. 

96 n_steps 

97 Number of steps in the annealing algorithm. 

98 base_structures 

99 A list of structures that is already in your training pool; 

100 can be ``None`` if you do not have any structures yet. 

101 cooling_start 

102 Initial value of the :attr:`cooling_function`. 

103 cooling_stop 

104 Last value of the :attr:`cooling_function`. 

105 cooling_function 

106 Artificial number that rescales the difference between the 

107 metric value between two iterations. Available options are 

108 ``'linear'`` and ``'exponential'``. 

109 initial_indices 

110 Picks out the starting structure from the 

111 :attr:`monte_carlo_structures` pool. Can be used if you want 

112 to continue from an old run for example. 

113 

114 Example 

115 ------- 

116 

117 The following snippet demonstrates the use of this function for 

118 generating an optimized structure pool. Here, we first set up a 

119 pool of candidate structures by randomly occupying a FCC supercell 

120 with Au and Pd:: 

121 

122 >>> from ase.build import bulk 

123 >>> from icet.tools.structure_generation import occupy_structure_randomly 

124 

125 >>> prim = bulk('Au', a=4.0) 

126 >>> cs = ClusterSpace(prim, [6.0], [['Au', 'Pd']]) 

127 >>> structure_pool = [] 

128 >>> for _ in range(500): 

129 >>> # Create random supercell. 

130 >>> supercell = np.random.randint(1, 4, size=3) 

131 >>> structure = prim.repeat(supercell) 

132 >>> 

133 >>> # Randomize concentrations in the supercell 

134 >>> n_atoms = len(structure) 

135 >>> n_Au = np.random.randint(0, n_atoms) 

136 >>> n_Pd = n_atoms - n_Au 

137 >>> concentration = {'Au': n_Au / n_atoms, 'Pd': n_Pd / n_atoms} 

138 >>> 

139 >>> # Occupy the structure randomly and store it. 

140 >>> occupy_structure_randomly(structure, cs, concentration) 

141 >>> structure_pool.append(structure) 

142 >>> start_inds = [f for f in range(10)] 

143 

144 Now we can use the :func:`structure_selection_annealing` function to find an 

145 optimized structure pool:: 

146 

147 >>> inds, cond = structure_selection_annealing(cs, 

148 >>> structure_pool, 

149 >>> n_structures_to_add=10, 

150 >>> n_steps=100) 

151 >>> training_structures = [structure_pool[ind] for ind in inds] 

152 >>> print(training_structures) 

153 

154 """ 

155 if base_structures is None: 

156 base_structures = [] 

157 

158 # set up cooling function 

159 if isinstance(cooling_function, str): 159 ↛ 164line 159 didn't jump to line 164, because the condition on line 159 was never false

160 available = sorted(available_cooling_functions.keys()) 

161 if cooling_function not in available: 161 ↛ 162line 161 didn't jump to line 162, because the condition on line 161 was never true

162 raise ValueError(f'Select from the available cooling_functions: {available}') 

163 _cooling_function = available_cooling_functions[cooling_function] 

164 elif callable(cooling_function): 

165 _cooling_function = cooling_function 

166 else: 

167 raise TypeError('cooling_function must be either str or a function') 

168 

169 # set up cluster vectors 

170 structure_container = StructureContainer(cluster_space) 

171 for structure in base_structures: 

172 structure_container.add_structure(structure, properties={'energy': 0}) 

173 for structure in monte_carlo_structures: 

174 structure_container.add_structure(structure, properties={'energy': 0}) 

175 

176 # get number of structures in monte_carlo_structures 

177 n_mcmc_structures = len(monte_carlo_structures) 

178 n_base_structures = len(base_structures) 

179 

180 # randomly chose starting structure unless user want specific indices 

181 if not initial_indices: 

182 inds = np.random.choice(range(len(monte_carlo_structures)), size=n_structures_to_add, 

183 replace=False) 

184 else: 

185 inds = np.array(initial_indices) 

186 

187 # get initial fitting_matrix, A in Ax = y 

188 fit_matrix = _get_fit_matrix(structure_container, inds, n_base_structures) 

189 

190 # get metric of fitting_matrix 

191 cond = np.linalg.cond(fit_matrix) 

192 cond_traj = [cond] 

193 for n in range(n_steps): 

194 # get current artificial cooling 

195 T = _cooling_function(n, cooling_start, cooling_stop, n_steps) 

196 

197 # do swaps from pool 

198 new_inds = _do_swap(inds, n_structures_to_add, n_mcmc_structures) 

199 new_fit_matrix = _get_fit_matrix(structure_container, new_inds, n_base_structures) 

200 

201 # get new metric 

202 cond_new = np.linalg.cond(new_fit_matrix) 

203 if (cond - cond_new) / T > np.log(np.random.uniform()): 

204 # if accepted update data 

205 cond = cond_new 

206 inds = new_inds 

207 cond_traj.append(cond) 

208 

209 if n % 100 == 0: 

210 logger.info(f'step {n:6d}, T {T:8.5f} , current condition number {cond:8.5f}') 

211 

212 return inds, cond_traj