Coverage for icet/tools/structure_enumeration_support/labeling_generation.py: 100%
80 statements
« prev ^ index » next coverage.py v7.10.1, created at 2025-09-14 04:08 +0000
« prev ^ index » next coverage.py v7.10.1, created at 2025-09-14 04:08 +0000
1import itertools
2from collections import OrderedDict
5class LabelingGenerator():
6 """
7 Object used to generate all possible permutations of species on a given
8 set of sites. If concentration restrictions are not specified, the
9 generation will be a simple itertools.product loop.
11 If concentrations are specified, the approach is a bit more elaborate.
12 Take as an example a system with four sites, where Pd and Au are allowed
13 on two and H and V on the others. `iter_species` will then look something
14 like this: `[(0, 1), (0, 1), (2, 3), (2, 3)]`. The algorithm proceeds as
15 follows:
17 (1) Identify all unique species of `iter_species`, hereafter called
18 `site_groups`. In this example they will be `(0, 1)` and `(2, 3)`. These
19 unqiue species are saved in `site_groups`.
21 (2) For each unique `site_group` and a given cell size, construct all
22 (combinations of the species in the site group. If the cell size is 2,
23 i.e. a supercell twice as big as the primitive cell, the possible
24 (combinations for the `(0, 1)`-`site_group` will be `(0, 0, 0, 0)`, `(0,
25 0, 0, 1)`, `(0, 0, 1, 1)` and `(1, 1, 1, 1)`. Order does not matter and
26 concentration restrictions are not respected at this point.
28 (3) Construct all "products" of the combinations of the unique site
29 groups. In our example, the first ones are `[(0, 0, 0, 0), (2, 2, 2,
30 2)]`, `[(0, 0, 0, 1), (2, 2, 2, 2)]` etc. Products that do not respect the
31 concentration restrictions are discarded at this point.
33 (4) For each "product", construct the permutations for each constituent
34 `site_group`. For `[(0, 0, 0, 1), (2, 2, 2, 2)]` these would be
35 `[(0, 0, 0, 1), (2, 2, 2, 2)]`, `[(0, 0, 1, 0), (2, 2, 2, 2)]`,
36 `[(0, 1, 0, 0), (2, 2, 2, 2)]` and `[(1, 0, 0, 0), (2, 2, 2, 2)]`.
37 If the second group was not all 2, there would of course be many more
38 such permutations.
40 (5) Sort the result according to the expectations of the structure
41 enumeration (if we call the site group (0, 1) "A" and site group
42 (2, 3) "B", the labeling should be AABBAABBAABB etc, i.e., one cell
43 at a time). In the first permutation of (4), the resulting labeling
44 will thus be `(0, 0, 2, 2, 0, 1, 2, 2)`.
46 Parameters
47 ----------
48 iter_species
49 list of tuples of ints, each element in the list representing a site,
50 and each tuple containing the allowed species on that site
51 concentrations
52 keys represent species (integers), values specify concentration ranges
53 as tuples of two floats
55 Attribtues
56 ----------
57 site_groups : OrderedDict
58 keys are unique iter_species, the values are SiteGroup objects
59 corresponding to that iter_species
60 """
62 def __init__(self, iter_species: list[tuple[int]], concentrations: dict) -> None:
63 self.iter_species = iter_species
64 self.concentrations = concentrations
66 if self.concentrations:
67 self.site_groups = OrderedDict()
68 count = 0
69 for iter_species_key in iter_species:
70 if iter_species_key in self.site_groups.keys():
71 self.site_groups[iter_species_key].multiplicity += 1
72 else:
73 self.site_groups[iter_species_key] = SiteGroup(
74 iter_species_key, count)
75 count += 1
77 def yield_labelings(self, ncells: int) -> tuple[int]:
78 """
79 Yield labelings that comply with the concentration restrictions and,
80 with `ncells` primitive cells.
82 Parameters
83 ----------
84 ncells
85 Size of supercell
87 Yields
88 ------
89 Labelings
90 """
92 if self.concentrations:
93 for site_group in self.site_groups.values():
94 site_group.compute_all_combinations(ncells)
95 for product in self.yield_products(ncells):
96 for labeling in self.yield_permutations(product, 0):
97 yield self.sort_labeling(labeling, ncells)
98 else:
99 for labeling in itertools.product(*self.iter_species * ncells):
100 yield labeling
102 def yield_products(self, ncells: int) -> tuple[tuple[int]]:
103 """
104 Yield combinations (or rather products in the itertools terminology)
105 of decorated site group combinations that comply with the concentration
106 restrictions.
108 Parameters
109 ----------
110 ncells
111 Size of supercell
113 Returns
114 -------
115 all unique combinations (products) of unordered `iter_species`
116 combinations,
117 """
118 natoms = len(self.iter_species) * ncells
119 combinations = [sg.combinations for sg in self.site_groups.values()]
120 for product in itertools.product(*combinations):
121 counts = {species: 0 for species in self.concentrations.keys()}
122 for species_group in product:
123 for species in self.concentrations.keys():
124 counts[species] += species_group.count(species)
125 conc_restr_violation = False
126 for species, conc_range in self.concentrations.items():
127 if counts[species] / natoms < conc_range[0] - 1e-9 or \
128 counts[species] / natoms > conc_range[1] + 1e-9:
129 conc_restr_violation = True
130 break
131 if not conc_restr_violation:
132 yield product
134 def yield_unique_permutations(self, unique_species: dict, permutation: list[int],
135 position: int) -> tuple[int]:
136 """
137 Recursively generate all _unique_ permutations of a set of species
138 with given multiplicities. The algorithm is inspired by the one given
139 at https://stackoverflow.com/a/6285203/6729551
141 Parameters
142 ----------
143 unique_species
144 keys represent species,values the corresponding multiplicity
145 permutation
146 permutation in process; should have length equal to the sum of all
147 multiplicities
148 position
149 position currently processed; should be the index of the last
150 species of `permutation` upon initialization
152 Yields
153 ------
154 permutation where each species occurs according to the multiplicity
155 specified in `unique_species`
156 """
157 if position < 0:
158 # Finish recursion
159 yield tuple(permutation)
160 else:
161 for species, occurrences in unique_species.items():
162 # Add if the multiplicity allows
163 if occurrences > 0:
164 permutation[position] = species
165 unique_species[species] -= 1
166 for perm in self.yield_unique_permutations(unique_species,
167 permutation,
168 position - 1):
169 yield perm
170 unique_species[species] += 1
172 def yield_permutations(self, product: tuple[tuple[int]], position: int) -> list[tuple[int]]:
173 """
174 Recursively generate all combinations of unique permutations of the
175 tuples in `product`.
177 Parameters
178 ----------
179 product
180 species allowed for each site group
181 position
182 keeps track of the position where recursion occurs; set to 0 upon
183 initialization.
185 Yields
186 ------
187 unique combinations of unique permutations, ordered by site group
188 """
189 unique_species = {species: product[position].count(species)
190 for species in set(product[position])}
191 natoms = len(product[position])
192 for permutation in self.yield_unique_permutations(unique_species,
193 [0] * natoms,
194 natoms - 1):
195 if position == len(product) - 1:
196 yield [permutation]
197 else:
198 for permutation_rest in self.yield_permutations(product,
199 position + 1):
200 yield [permutation] + permutation_rest
202 def sort_labeling(self, labeling: list[tuple[int]], ncells: int) -> tuple[int]:
203 """
204 The elements in labeling are now given in site groups. We want just
205 one labeling, ordered by site group, but one primitive cell at a time.
206 So if iter_species given upon initialization was
207 `[(0, 1), (2), (0, 1)]` and `ncells=2`, the current labeling has two
208 tuples, the first corresponding to the `(0, 1)` site group, with 8
209 elements in total, and the second corresponding to the `(2)` site group
210 and with 2 elements in total. Now, we reorder it so that the elements
211 are ordered according to `[(0, 1), (2), (0, 1), (0, 1), (2), (0, 1)]`.
213 Parameters
214 ----------
215 labeling
216 As given by yield_permutations
217 ncells
218 Size of supercell
220 Returns
221 -------
222 Labeling properly sorted, ready to be given to the enumeration code
223 """
224 sorted_labeling = [0] * len(self.iter_species * ncells)
225 count = 0
226 for _ in range(ncells):
227 for iter_species in self.iter_species:
229 # Find the site group corresponding to the iter species
230 site_group = self.site_groups[iter_species]
232 # Find the right element by checking (1) where the
233 # proper site_group is in the unsorted labeling and
234 # (2) which element is next in turn
235 sorted_labeling[count] = labeling[
236 site_group.position][site_group.next_to_add]
237 count += 1
238 site_group.next_to_add += 1
240 # Reset site group counters
241 for site_group in self.site_groups.values():
242 assert site_group.next_to_add == len(labeling[site_group.position])
243 site_group.next_to_add = 0
244 return tuple(sorted_labeling)
247class SiteGroup():
248 """
249 Keeps track of a group of sites that have the same allowed species.
250 That is a site group could correspond to all sites on which species 0 and
251 1 are allowed, and the number of such sites is stored in the `multiplicity`
252 attribute..
254 Parameters
255 ----------
256 iter_species
257 allowed species on these sites
258 position
259 helps to keep track of when the first group occurred; the first
260 site group encountered will have position = 0, the next 1 etc.
262 Attributes
263 ----------
264 multiplicity : int
265 multiplicity if the group, i.e., how many sites that have this
266 iter_species
267 next_to_add : int
268 helper attribute that keeps track of which site in this group is the
269 next to be added when the elements of a labeling are to be sorted
270 """
272 def __init__(self, iter_species: tuple[int], position: int) -> None:
273 self.iter_species = iter_species
274 self.position = position
275 self.multiplicity = 1
276 self.next_to_add = 0 # Will keep count when reordering elements
278 def compute_all_combinations(self, ncells: int) -> None:
279 """
280 Compute all combinations (without considering order) of the elements
281 in the group.
283 Parameters
284 ----------
285 ncells : int
286 Size of supercell
287 """
288 self.combinations = []
289 natoms = self.multiplicity * ncells
290 for combination in \
291 itertools.combinations_with_replacement(self.iter_species,
292 natoms):
293 self.combinations.append(combination)