Coverage for icet/tools/structure_enumeration_support/labeling_generation.py: 100%

80 statements  

« prev     ^ index     » next       coverage.py v7.10.1, created at 2025-09-14 04:08 +0000

1import itertools 

2from collections import OrderedDict 

3 

4 

5class LabelingGenerator(): 

6 """ 

7 Object used to generate all possible permutations of species on a given 

8 set of sites. If concentration restrictions are not specified, the 

9 generation will be a simple itertools.product loop. 

10 

11 If concentrations are specified, the approach is a bit more elaborate. 

12 Take as an example a system with four sites, where Pd and Au are allowed 

13 on two and H and V on the others. `iter_species` will then look something 

14 like this: `[(0, 1), (0, 1), (2, 3), (2, 3)]`. The algorithm proceeds as 

15 follows: 

16 

17 (1) Identify all unique species of `iter_species`, hereafter called 

18 `site_groups`. In this example they will be `(0, 1)` and `(2, 3)`. These 

19 unqiue species are saved in `site_groups`. 

20 

21 (2) For each unique `site_group` and a given cell size, construct all 

22 (combinations of the species in the site group. If the cell size is 2, 

23 i.e. a supercell twice as big as the primitive cell, the possible 

24 (combinations for the `(0, 1)`-`site_group` will be `(0, 0, 0, 0)`, `(0, 

25 0, 0, 1)`, `(0, 0, 1, 1)` and `(1, 1, 1, 1)`. Order does not matter and 

26 concentration restrictions are not respected at this point. 

27 

28 (3) Construct all "products" of the combinations of the unique site 

29 groups. In our example, the first ones are `[(0, 0, 0, 0), (2, 2, 2, 

30 2)]`, `[(0, 0, 0, 1), (2, 2, 2, 2)]` etc. Products that do not respect the 

31 concentration restrictions are discarded at this point. 

32 

33 (4) For each "product", construct the permutations for each constituent 

34 `site_group`. For `[(0, 0, 0, 1), (2, 2, 2, 2)]` these would be 

35 `[(0, 0, 0, 1), (2, 2, 2, 2)]`, `[(0, 0, 1, 0), (2, 2, 2, 2)]`, 

36 `[(0, 1, 0, 0), (2, 2, 2, 2)]` and `[(1, 0, 0, 0), (2, 2, 2, 2)]`. 

37 If the second group was not all 2, there would of course be many more 

38 such permutations. 

39 

40 (5) Sort the result according to the expectations of the structure 

41 enumeration (if we call the site group (0, 1) "A" and site group 

42 (2, 3) "B", the labeling should be AABBAABBAABB etc, i.e., one cell 

43 at a time). In the first permutation of (4), the resulting labeling 

44 will thus be `(0, 0, 2, 2, 0, 1, 2, 2)`. 

45 

46 Parameters 

47 ---------- 

48 iter_species 

49 list of tuples of ints, each element in the list representing a site, 

50 and each tuple containing the allowed species on that site 

51 concentrations 

52 keys represent species (integers), values specify concentration ranges 

53 as tuples of two floats 

54 

55 Attribtues 

56 ---------- 

57 site_groups : OrderedDict 

58 keys are unique iter_species, the values are SiteGroup objects 

59 corresponding to that iter_species 

60 """ 

61 

62 def __init__(self, iter_species: list[tuple[int]], concentrations: dict) -> None: 

63 self.iter_species = iter_species 

64 self.concentrations = concentrations 

65 

66 if self.concentrations: 

67 self.site_groups = OrderedDict() 

68 count = 0 

69 for iter_species_key in iter_species: 

70 if iter_species_key in self.site_groups.keys(): 

71 self.site_groups[iter_species_key].multiplicity += 1 

72 else: 

73 self.site_groups[iter_species_key] = SiteGroup( 

74 iter_species_key, count) 

75 count += 1 

76 

77 def yield_labelings(self, ncells: int) -> tuple[int]: 

78 """ 

79 Yield labelings that comply with the concentration restrictions and, 

80 with `ncells` primitive cells. 

81 

82 Parameters 

83 ---------- 

84 ncells 

85 Size of supercell 

86 

87 Yields 

88 ------ 

89 Labelings 

90 """ 

91 

92 if self.concentrations: 

93 for site_group in self.site_groups.values(): 

94 site_group.compute_all_combinations(ncells) 

95 for product in self.yield_products(ncells): 

96 for labeling in self.yield_permutations(product, 0): 

97 yield self.sort_labeling(labeling, ncells) 

98 else: 

99 for labeling in itertools.product(*self.iter_species * ncells): 

100 yield labeling 

101 

102 def yield_products(self, ncells: int) -> tuple[tuple[int]]: 

103 """ 

104 Yield combinations (or rather products in the itertools terminology) 

105 of decorated site group combinations that comply with the concentration 

106 restrictions. 

107 

108 Parameters 

109 ---------- 

110 ncells 

111 Size of supercell 

112 

113 Returns 

114 ------- 

115 all unique combinations (products) of unordered `iter_species` 

116 combinations, 

117 """ 

118 natoms = len(self.iter_species) * ncells 

119 combinations = [sg.combinations for sg in self.site_groups.values()] 

120 for product in itertools.product(*combinations): 

121 counts = {species: 0 for species in self.concentrations.keys()} 

122 for species_group in product: 

123 for species in self.concentrations.keys(): 

124 counts[species] += species_group.count(species) 

125 conc_restr_violation = False 

126 for species, conc_range in self.concentrations.items(): 

127 if counts[species] / natoms < conc_range[0] - 1e-9 or \ 

128 counts[species] / natoms > conc_range[1] + 1e-9: 

129 conc_restr_violation = True 

130 break 

131 if not conc_restr_violation: 

132 yield product 

133 

134 def yield_unique_permutations(self, unique_species: dict, permutation: list[int], 

135 position: int) -> tuple[int]: 

136 """ 

137 Recursively generate all _unique_ permutations of a set of species 

138 with given multiplicities. The algorithm is inspired by the one given 

139 at https://stackoverflow.com/a/6285203/6729551 

140 

141 Parameters 

142 ---------- 

143 unique_species 

144 keys represent species,values the corresponding multiplicity 

145 permutation 

146 permutation in process; should have length equal to the sum of all 

147 multiplicities 

148 position 

149 position currently processed; should be the index of the last 

150 species of `permutation` upon initialization 

151 

152 Yields 

153 ------ 

154 permutation where each species occurs according to the multiplicity 

155 specified in `unique_species` 

156 """ 

157 if position < 0: 

158 # Finish recursion 

159 yield tuple(permutation) 

160 else: 

161 for species, occurrences in unique_species.items(): 

162 # Add if the multiplicity allows 

163 if occurrences > 0: 

164 permutation[position] = species 

165 unique_species[species] -= 1 

166 for perm in self.yield_unique_permutations(unique_species, 

167 permutation, 

168 position - 1): 

169 yield perm 

170 unique_species[species] += 1 

171 

172 def yield_permutations(self, product: tuple[tuple[int]], position: int) -> list[tuple[int]]: 

173 """ 

174 Recursively generate all combinations of unique permutations of the 

175 tuples in `product`. 

176 

177 Parameters 

178 ---------- 

179 product 

180 species allowed for each site group 

181 position 

182 keeps track of the position where recursion occurs; set to 0 upon 

183 initialization. 

184 

185 Yields 

186 ------ 

187 unique combinations of unique permutations, ordered by site group 

188 """ 

189 unique_species = {species: product[position].count(species) 

190 for species in set(product[position])} 

191 natoms = len(product[position]) 

192 for permutation in self.yield_unique_permutations(unique_species, 

193 [0] * natoms, 

194 natoms - 1): 

195 if position == len(product) - 1: 

196 yield [permutation] 

197 else: 

198 for permutation_rest in self.yield_permutations(product, 

199 position + 1): 

200 yield [permutation] + permutation_rest 

201 

202 def sort_labeling(self, labeling: list[tuple[int]], ncells: int) -> tuple[int]: 

203 """ 

204 The elements in labeling are now given in site groups. We want just 

205 one labeling, ordered by site group, but one primitive cell at a time. 

206 So if iter_species given upon initialization was 

207 `[(0, 1), (2), (0, 1)]` and `ncells=2`, the current labeling has two 

208 tuples, the first corresponding to the `(0, 1)` site group, with 8 

209 elements in total, and the second corresponding to the `(2)` site group 

210 and with 2 elements in total. Now, we reorder it so that the elements 

211 are ordered according to `[(0, 1), (2), (0, 1), (0, 1), (2), (0, 1)]`. 

212 

213 Parameters 

214 ---------- 

215 labeling 

216 As given by yield_permutations 

217 ncells 

218 Size of supercell 

219 

220 Returns 

221 ------- 

222 Labeling properly sorted, ready to be given to the enumeration code 

223 """ 

224 sorted_labeling = [0] * len(self.iter_species * ncells) 

225 count = 0 

226 for _ in range(ncells): 

227 for iter_species in self.iter_species: 

228 

229 # Find the site group corresponding to the iter species 

230 site_group = self.site_groups[iter_species] 

231 

232 # Find the right element by checking (1) where the 

233 # proper site_group is in the unsorted labeling and 

234 # (2) which element is next in turn 

235 sorted_labeling[count] = labeling[ 

236 site_group.position][site_group.next_to_add] 

237 count += 1 

238 site_group.next_to_add += 1 

239 

240 # Reset site group counters 

241 for site_group in self.site_groups.values(): 

242 assert site_group.next_to_add == len(labeling[site_group.position]) 

243 site_group.next_to_add = 0 

244 return tuple(sorted_labeling) 

245 

246 

247class SiteGroup(): 

248 """ 

249 Keeps track of a group of sites that have the same allowed species. 

250 That is a site group could correspond to all sites on which species 0 and 

251 1 are allowed, and the number of such sites is stored in the `multiplicity` 

252 attribute.. 

253 

254 Parameters 

255 ---------- 

256 iter_species 

257 allowed species on these sites 

258 position 

259 helps to keep track of when the first group occurred; the first 

260 site group encountered will have position = 0, the next 1 etc. 

261 

262 Attributes 

263 ---------- 

264 multiplicity : int 

265 multiplicity if the group, i.e., how many sites that have this 

266 iter_species 

267 next_to_add : int 

268 helper attribute that keeps track of which site in this group is the 

269 next to be added when the elements of a labeling are to be sorted 

270 """ 

271 

272 def __init__(self, iter_species: tuple[int], position: int) -> None: 

273 self.iter_species = iter_species 

274 self.position = position 

275 self.multiplicity = 1 

276 self.next_to_add = 0 # Will keep count when reordering elements 

277 

278 def compute_all_combinations(self, ncells: int) -> None: 

279 """ 

280 Compute all combinations (without considering order) of the elements 

281 in the group. 

282 

283 Parameters 

284 ---------- 

285 ncells : int 

286 Size of supercell 

287 """ 

288 self.combinations = [] 

289 natoms = self.multiplicity * ncells 

290 for combination in \ 

291 itertools.combinations_with_replacement(self.iter_species, 

292 natoms): 

293 self.combinations.append(combination)