Coverage for icet/tools/structure_enumeration_support/labeling_generation.py: 100%
81 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-12-26 04:12 +0000
« prev ^ index » next coverage.py v7.5.0, created at 2024-12-26 04:12 +0000
1import itertools
2from collections import OrderedDict
3from typing import List, Tuple, Dict
6class LabelingGenerator():
7 """
8 Object used to generate all possible permutations of species on a given
9 set of sites. If concentration restrictions are not specified, the
10 generation will be a simple itertools.product loop.
12 If concentrations are specified, the approach is a bit more elaborate.
13 Take as an example a system with four sites, where Pd and Au are allowed
14 on two and H and V on the others. `iter_species` will then look something
15 like this: `[(0, 1), (0, 1), (2, 3), (2, 3)]`. The algorithm proceeds as
16 follows:
18 (1) Identify all unique species of `iter_species`, hereafter called
19 `site_groups`. In this example they will be `(0, 1)` and `(2, 3)`. These
20 unqiue species are saved in `site_groups`.
22 (2) For each unique `site_group` and a given cell size, construct all
23 (combinations of the species in the site group. If the cell size is 2,
24 i.e. a supercell twice as big as the primitive cell, the possible
25 (combinations for the `(0, 1)`-`site_group` will be `(0, 0, 0, 0)`, `(0,
26 0, 0, 1)`, `(0, 0, 1, 1)` and `(1, 1, 1, 1)`. Order does not matter and
27 concentration restrictions are not respected at this point.
29 (3) Construct all "products" of the combinations of the unique site
30 groups. In our example, the first ones are `[(0, 0, 0, 0), (2, 2, 2,
31 2)]`, `[(0, 0, 0, 1), (2, 2, 2, 2)]` etc. Products that do not respect the
32 concentration restrictions are discarded at this point.
34 (4) For each "product", construct the permutations for each constituent
35 `site_group`. For `[(0, 0, 0, 1), (2, 2, 2, 2)]` these would be
36 `[(0, 0, 0, 1), (2, 2, 2, 2)]`, `[(0, 0, 1, 0), (2, 2, 2, 2)]`,
37 `[(0, 1, 0, 0), (2, 2, 2, 2)]` and `[(1, 0, 0, 0), (2, 2, 2, 2)]`.
38 If the second group was not all 2, there would of course be many more
39 such permutations.
41 (5) Sort the result according to the expectations of the structure
42 enumeration (if we call the site group (0, 1) "A" and site group
43 (2, 3) "B", the labeling should be AABBAABBAABB etc, i.e., one cell
44 at a time). In the first permutation of (4), the resulting labeling
45 will thus be `(0, 0, 2, 2, 0, 1, 2, 2)`.
47 Parameters
48 ----------
49 iter_species
50 list of tuples of ints, each element in the list representing a site,
51 and each tuple containing the allowed species on that site
52 concentrations
53 keys represent species (integers), values specify concentration ranges
54 as tuples of two floats
56 Attribtues
57 ----------
58 site_groups : OrderedDict
59 keys are unique iter_species, the values are SiteGroup objects
60 corresponding to that iter_species
61 """
63 def __init__(self, iter_species: List[Tuple[int]], concentrations: Dict) -> None:
64 self.iter_species = iter_species
65 self.concentrations = concentrations
67 if self.concentrations:
68 self.site_groups = OrderedDict()
69 count = 0
70 for iter_species_key in iter_species:
71 if iter_species_key in self.site_groups.keys():
72 self.site_groups[iter_species_key].multiplicity += 1
73 else:
74 self.site_groups[iter_species_key] = SiteGroup(
75 iter_species_key, count)
76 count += 1
78 def yield_labelings(self, ncells: int) -> Tuple[int]:
79 """
80 Yield labelings that comply with the concentration restrictions and,
81 with `ncells` primitive cells.
83 Parameters
84 ----------
85 ncells
86 Size of supercell
88 Yields
89 ------
90 Labelings
91 """
93 if self.concentrations:
94 for site_group in self.site_groups.values():
95 site_group.compute_all_combinations(ncells)
96 for product in self.yield_products(ncells):
97 for labeling in self.yield_permutations(product, 0):
98 yield self.sort_labeling(labeling, ncells)
99 else:
100 for labeling in itertools.product(*self.iter_species * ncells):
101 yield labeling
103 def yield_products(self, ncells: int) -> Tuple[Tuple[int]]:
104 """
105 Yield combinations (or rather products in the itertools terminology)
106 of decorated site group combinations that comply with the concentration
107 restrictions.
109 Parameters
110 ----------
111 ncells
112 Size of supercell
114 Returns
115 -------
116 all unique combinations (products) of unordered `iter_species`
117 combinations,
118 """
119 natoms = len(self.iter_species) * ncells
120 combinations = [sg.combinations for sg in self.site_groups.values()]
121 for product in itertools.product(*combinations):
122 counts = {species: 0 for species in self.concentrations.keys()}
123 for species_group in product:
124 for species in self.concentrations.keys():
125 counts[species] += species_group.count(species)
126 conc_restr_violation = False
127 for species, conc_range in self.concentrations.items():
128 if counts[species] / natoms < conc_range[0] - 1e-9 or \
129 counts[species] / natoms > conc_range[1] + 1e-9:
130 conc_restr_violation = True
131 break
132 if not conc_restr_violation:
133 yield product
135 def yield_unique_permutations(self, unique_species: Dict, permutation: List[int],
136 position: int) -> Tuple[int]:
137 """
138 Recursively generate all _unique_ permutations of a set of species
139 with given multiplicities. The algorithm is inspired by the one given
140 at https://stackoverflow.com/a/6285203/6729551
142 Parameters
143 ----------
144 unique_species
145 keys represent species,values the corresponding multiplicity
146 permutation
147 permutation in process; should have length equal to the sum of all
148 multiplicities
149 position
150 position currently processed; should be the index of the last
151 species of `permutation` upon initialization
153 Yields
154 ------
155 permutation where each species occurs according to the multiplicity
156 specified in `unique_species`
157 """
158 if position < 0:
159 # Finish recursion
160 yield tuple(permutation)
161 else:
162 for species, occurrences in unique_species.items():
163 # Add if the multiplicity allows
164 if occurrences > 0:
165 permutation[position] = species
166 unique_species[species] -= 1
167 for perm in self.yield_unique_permutations(unique_species,
168 permutation,
169 position - 1):
170 yield perm
171 unique_species[species] += 1
173 def yield_permutations(self, product: Tuple[Tuple[int]], position: int) -> List[Tuple[int]]:
174 """
175 Recursively generate all combinations of unique permutations of the
176 tuples in `product`.
178 Parameters
179 ----------
180 product
181 species allowed for each site group
182 position
183 keeps track of the position where recursion occurs; set to 0 upon
184 initialization.
186 Yields
187 ------
188 unique combinations of unique permutations, ordered by site group
189 """
190 unique_species = {species: product[position].count(species)
191 for species in set(product[position])}
192 natoms = len(product[position])
193 for permutation in self.yield_unique_permutations(unique_species,
194 [0] * natoms,
195 natoms - 1):
196 if position == len(product) - 1:
197 yield [permutation]
198 else:
199 for permutation_rest in self.yield_permutations(product,
200 position + 1):
201 yield [permutation] + permutation_rest
203 def sort_labeling(self, labeling: List[Tuple[int]], ncells: int) -> Tuple[int]:
204 """
205 The elements in labeling are now given in site groups. We want just
206 one labeling, ordered by site group, but one primitive cell at a time.
207 So if iter_species given upon initialization was
208 `[(0, 1), (2), (0, 1)]` and `ncells=2`, the current labeling has two
209 tuples, the first corresponding to the `(0, 1)` site group, with 8
210 elements in total, and the second corresponding to the `(2)` site group
211 and with 2 elements in total. Now, we reorder it so that the elements
212 are ordered according to `[(0, 1), (2), (0, 1), (0, 1), (2), (0, 1)]`.
214 Parameters
215 ----------
216 labeling
217 As given by yield_permutations
218 ncells
219 Size of supercell
221 Returns
222 -------
223 Labeling properly sorted, ready to be given to the enumeration code
224 """
225 sorted_labeling = [0] * len(self.iter_species * ncells)
226 count = 0
227 for _ in range(ncells):
228 for iter_species in self.iter_species:
230 # Find the site group corresponding to the iter species
231 site_group = self.site_groups[iter_species]
233 # Find the right element by checking (1) where the
234 # proper site_group is in the unsorted labeling and
235 # (2) which element is next in turn
236 sorted_labeling[count] = labeling[
237 site_group.position][site_group.next_to_add]
238 count += 1
239 site_group.next_to_add += 1
241 # Reset site group counters
242 for site_group in self.site_groups.values():
243 assert site_group.next_to_add == len(labeling[site_group.position])
244 site_group.next_to_add = 0
245 return tuple(sorted_labeling)
248class SiteGroup():
249 """
250 Keeps track of a group of sites that have the same allowed species.
251 That is a site group could correspond to all sites on which species 0 and
252 1 are allowed, and the number of such sites is stored in the `multiplicity`
253 attribute..
255 Parameters
256 ----------
257 iter_species
258 allowed species on these sites
259 position
260 helps to keep track of when the first group occurred; the first
261 site group encountered will have position = 0, the next 1 etc.
263 Attributes
264 ----------
265 multiplicity : int
266 multiplicity if the group, i.e., how many sites that have this
267 iter_species
268 next_to_add : int
269 helper attribute that keeps track of which site in this group is the
270 next to be added when the elements of a labeling are to be sorted
271 """
273 def __init__(self, iter_species: Tuple[int], position: int) -> None:
274 self.iter_species = iter_species
275 self.position = position
276 self.multiplicity = 1
277 self.next_to_add = 0 # Will keep count when reordering elements
279 def compute_all_combinations(self, ncells: int) -> None:
280 """
281 Compute all combinations (without considering order) of the elements
282 in the group.
284 Parameters
285 ----------
286 ncells : int
287 Size of supercell
288 """
289 self.combinations = []
290 natoms = self.multiplicity * ncells
291 for combination in \
292 itertools.combinations_with_replacement(self.iter_species,
293 natoms):
294 self.combinations.append(combination)