Coverage for icet/tools/structure_enumeration_support/labeling_generation.py: 100%

81 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-05-06 04:14 +0000

1import itertools 

2from collections import OrderedDict 

3from typing import List, Tuple, Dict 

4 

5 

6class LabelingGenerator(): 

7 """ 

8 Object used to generate all possible permutations of species on a given 

9 set of sites. If concentration restrictions are not specified, the 

10 generation will be a simple itertools.product loop. 

11 

12 If concentrations are specified, the approach is a bit more elaborate. 

13 Take as an example a system with four sites, where Pd and Au are allowed 

14 on two and H and V on the others. `iter_species` will then look something 

15 like this: `[(0, 1), (0, 1), (2, 3), (2, 3)]`. The algorithm proceeds as 

16 follows: 

17 

18 (1) Identify all unique species of `iter_species`, hereafter called 

19 `site_groups`. In this example they will be `(0, 1)` and `(2, 3)`. These 

20 unqiue species are saved in `site_groups`. 

21 

22 (2) For each unique `site_group` and a given cell size, construct all 

23 (combinations of the species in the site group. If the cell size is 2, 

24 i.e. a supercell twice as big as the primitive cell, the possible 

25 (combinations for the `(0, 1)`-`site_group` will be `(0, 0, 0, 0)`, `(0, 

26 0, 0, 1)`, `(0, 0, 1, 1)` and `(1, 1, 1, 1)`. Order does not matter and 

27 concentration restrictions are not respected at this point. 

28 

29 (3) Construct all "products" of the combinations of the unique site 

30 groups. In our example, the first ones are `[(0, 0, 0, 0), (2, 2, 2, 

31 2)]`, `[(0, 0, 0, 1), (2, 2, 2, 2)]` etc. Products that do not respect the 

32 concentration restrictions are discarded at this point. 

33 

34 (4) For each "product", construct the permutations for each constituent 

35 `site_group`. For `[(0, 0, 0, 1), (2, 2, 2, 2)]` these would be 

36 `[(0, 0, 0, 1), (2, 2, 2, 2)]`, `[(0, 0, 1, 0), (2, 2, 2, 2)]`, 

37 `[(0, 1, 0, 0), (2, 2, 2, 2)]` and `[(1, 0, 0, 0), (2, 2, 2, 2)]`. 

38 If the second group was not all 2, there would of course be many more 

39 such permutations. 

40 

41 (5) Sort the result according to the expectations of the structure 

42 enumeration (if we call the site group (0, 1) "A" and site group 

43 (2, 3) "B", the labeling should be AABBAABBAABB etc, i.e., one cell 

44 at a time). In the first permutation of (4), the resulting labeling 

45 will thus be `(0, 0, 2, 2, 0, 1, 2, 2)`. 

46 

47 Parameters 

48 ---------- 

49 iter_species 

50 list of tuples of ints, each element in the list representing a site, 

51 and each tuple containing the allowed species on that site 

52 concentrations 

53 keys represent species (integers), values specify concentration ranges 

54 as tuples of two floats 

55 

56 Attribtues 

57 ---------- 

58 site_groups : OrderedDict 

59 keys are unique iter_species, the values are SiteGroup objects 

60 corresponding to that iter_species 

61 """ 

62 

63 def __init__(self, iter_species: List[Tuple[int]], concentrations: Dict) -> None: 

64 self.iter_species = iter_species 

65 self.concentrations = concentrations 

66 

67 if self.concentrations: 

68 self.site_groups = OrderedDict() 

69 count = 0 

70 for iter_species_key in iter_species: 

71 if iter_species_key in self.site_groups.keys(): 

72 self.site_groups[iter_species_key].multiplicity += 1 

73 else: 

74 self.site_groups[iter_species_key] = SiteGroup( 

75 iter_species_key, count) 

76 count += 1 

77 

78 def yield_labelings(self, ncells: int) -> Tuple[int]: 

79 """ 

80 Yield labelings that comply with the concentration restrictions and, 

81 with `ncells` primitive cells. 

82 

83 Parameters 

84 ---------- 

85 ncells 

86 Size of supercell 

87 

88 Yields 

89 ------ 

90 Labelings 

91 """ 

92 

93 if self.concentrations: 

94 for site_group in self.site_groups.values(): 

95 site_group.compute_all_combinations(ncells) 

96 for product in self.yield_products(ncells): 

97 for labeling in self.yield_permutations(product, 0): 

98 yield self.sort_labeling(labeling, ncells) 

99 else: 

100 for labeling in itertools.product(*self.iter_species * ncells): 

101 yield labeling 

102 

103 def yield_products(self, ncells: int) -> Tuple[Tuple[int]]: 

104 """ 

105 Yield combinations (or rather products in the itertools terminology) 

106 of decorated site group combinations that comply with the concentration 

107 restrictions. 

108 

109 Parameters 

110 ---------- 

111 ncells 

112 Size of supercell 

113 

114 Returns 

115 ------- 

116 all unique combinations (products) of unordered `iter_species` 

117 combinations, 

118 """ 

119 natoms = len(self.iter_species) * ncells 

120 combinations = [sg.combinations for sg in self.site_groups.values()] 

121 for product in itertools.product(*combinations): 

122 counts = {species: 0 for species in self.concentrations.keys()} 

123 for species_group in product: 

124 for species in self.concentrations.keys(): 

125 counts[species] += species_group.count(species) 

126 conc_restr_violation = False 

127 for species, conc_range in self.concentrations.items(): 

128 if counts[species] / natoms < conc_range[0] - 1e-9 or \ 

129 counts[species] / natoms > conc_range[1] + 1e-9: 

130 conc_restr_violation = True 

131 break 

132 if not conc_restr_violation: 

133 yield product 

134 

135 def yield_unique_permutations(self, unique_species: Dict, permutation: List[int], 

136 position: int) -> Tuple[int]: 

137 """ 

138 Recursively generate all _unique_ permutations of a set of species 

139 with given multiplicities. The algorithm is inspired by the one given 

140 at https://stackoverflow.com/a/6285203/6729551 

141 

142 Parameters 

143 ---------- 

144 unique_species 

145 keys represent species,values the corresponding multiplicity 

146 permutation 

147 permutation in process; should have length equal to the sum of all 

148 multiplicities 

149 position 

150 position currently processed; should be the index of the last 

151 species of `permutation` upon initialization 

152 

153 Yields 

154 ------ 

155 permutation where each species occurs according to the multiplicity 

156 specified in `unique_species` 

157 """ 

158 if position < 0: 

159 # Finish recursion 

160 yield tuple(permutation) 

161 else: 

162 for species, occurrences in unique_species.items(): 

163 # Add if the multiplicity allows 

164 if occurrences > 0: 

165 permutation[position] = species 

166 unique_species[species] -= 1 

167 for perm in self.yield_unique_permutations(unique_species, 

168 permutation, 

169 position - 1): 

170 yield perm 

171 unique_species[species] += 1 

172 

173 def yield_permutations(self, product: Tuple[Tuple[int]], position: int) -> List[Tuple[int]]: 

174 """ 

175 Recursively generate all combinations of unique permutations of the 

176 tuples in `product`. 

177 

178 Parameters 

179 ---------- 

180 product 

181 species allowed for each site group 

182 position 

183 keeps track of the position where recursion occurs; set to 0 upon 

184 initialization. 

185 

186 Yields 

187 ------ 

188 unique combinations of unique permutations, ordered by site group 

189 """ 

190 unique_species = {species: product[position].count(species) 

191 for species in set(product[position])} 

192 natoms = len(product[position]) 

193 for permutation in self.yield_unique_permutations(unique_species, 

194 [0] * natoms, 

195 natoms - 1): 

196 if position == len(product) - 1: 

197 yield [permutation] 

198 else: 

199 for permutation_rest in self.yield_permutations(product, 

200 position + 1): 

201 yield [permutation] + permutation_rest 

202 

203 def sort_labeling(self, labeling: List[Tuple[int]], ncells: int) -> Tuple[int]: 

204 """ 

205 The elements in labeling are now given in site groups. We want just 

206 one labeling, ordered by site group, but one primitive cell at a time. 

207 So if iter_species given upon initialization was 

208 `[(0, 1), (2), (0, 1)]` and `ncells=2`, the current labeling has two 

209 tuples, the first corresponding to the `(0, 1)` site group, with 8 

210 elements in total, and the second corresponding to the `(2)` site group 

211 and with 2 elements in total. Now, we reorder it so that the elements 

212 are ordered according to `[(0, 1), (2), (0, 1), (0, 1), (2), (0, 1)]`. 

213 

214 Parameters 

215 ---------- 

216 labeling 

217 As given by yield_permutations 

218 ncells 

219 Size of supercell 

220 

221 Returns 

222 ------- 

223 Labeling properly sorted, ready to be given to the enumeration code 

224 """ 

225 sorted_labeling = [0] * len(self.iter_species * ncells) 

226 count = 0 

227 for _ in range(ncells): 

228 for iter_species in self.iter_species: 

229 

230 # Find the site group corresponding to the iter species 

231 site_group = self.site_groups[iter_species] 

232 

233 # Find the right element by checking (1) where the 

234 # proper site_group is in the unsorted labeling and 

235 # (2) which element is next in turn 

236 sorted_labeling[count] = labeling[ 

237 site_group.position][site_group.next_to_add] 

238 count += 1 

239 site_group.next_to_add += 1 

240 

241 # Reset site group counters 

242 for site_group in self.site_groups.values(): 

243 assert site_group.next_to_add == len(labeling[site_group.position]) 

244 site_group.next_to_add = 0 

245 return tuple(sorted_labeling) 

246 

247 

248class SiteGroup(): 

249 """ 

250 Keeps track of a group of sites that have the same allowed species. 

251 That is a site group could correspond to all sites on which species 0 and 

252 1 are allowed, and the number of such sites is stored in the `multiplicity` 

253 attribute.. 

254 

255 Parameters 

256 ---------- 

257 iter_species 

258 allowed species on these sites 

259 position 

260 helps to keep track of when the first group occurred; the first 

261 site group encountered will have position = 0, the next 1 etc. 

262 

263 Attributes 

264 ---------- 

265 multiplicity : int 

266 multiplicity if the group, i.e., how many sites that have this 

267 iter_species 

268 next_to_add : int 

269 helper attribute that keeps track of which site in this group is the 

270 next to be added when the elements of a labeling are to be sorted 

271 """ 

272 

273 def __init__(self, iter_species: Tuple[int], position: int) -> None: 

274 self.iter_species = iter_species 

275 self.position = position 

276 self.multiplicity = 1 

277 self.next_to_add = 0 # Will keep count when reordering elements 

278 

279 def compute_all_combinations(self, ncells: int) -> None: 

280 """ 

281 Compute all combinations (without considering order) of the elements 

282 in the group. 

283 

284 Parameters 

285 ---------- 

286 ncells : int 

287 Size of supercell 

288 """ 

289 self.combinations = [] 

290 natoms = self.multiplicity * ncells 

291 for combination in \ 

292 itertools.combinations_with_replacement(self.iter_species, 

293 natoms): 

294 self.combinations.append(combination)