Coverage for icet/tools/convex_hull.py: 97%
95 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-12-26 04:12 +0000
« prev ^ index » next coverage.py v7.5.0, created at 2024-12-26 04:12 +0000
1"""
2This module provides the ConvexHull class.
3"""
5import itertools
6import numpy as np
7from typing import List, Sized, Union
8from scipy.interpolate import griddata
9from scipy.spatial import ConvexHull as ConvexHullSciPy
10from scipy.spatial import QhullError
13class ConvexHull:
14 """This class provides functionality for extracting the convex hull
15 of the (free) energy of mixing. It is based on the `convex hull
16 calculator in SciPy
17 <http://docs.scipy.org/doc/scipy-dev/reference/\
18generated/scipy.spatial.ConvexHull.html>`_.
20 Parameters
21 ----------
22 concentrations
23 Concentrations for each structure listed as ``[[c1, c2], [c1, c2],
24 ...]``; for binaries, in which case there is only one independent
25 concentration, the format ``[c1, c2, c3, ...]`` works as well.
26 energies
27 Energy (or energy of mixing) for each structure.
29 Attributes
30 ----------
31 concentrations : np.ndarray
32 Concentrations of the structures on the convex hull.
33 energies : np.ndarray
34 Energies of the structures on the convex hull.
35 dimensions : int
36 Number of independent concentrations needed to specify a point in
37 concentration space (1 for binaries, 2 for ternaries and so on).
38 structures : List[int]
39 Indices of structures that constitute the convex hull (indices are
40 defined by the order of their concentrations and energies are fed when
41 initializing the :class:`ConvexHull` object).
43 Examples
44 --------
45 A :class:`ConvexHull` object is easily initialized by providing lists of
46 concentrations and energies::
48 >>> data = {'concentration': [0, 0.2, 0.2, 0.3, 0.4, 0.5, 0.8, 1.0],
49 ... 'mixing_energy': [0.1, -0.2, -0.1, -0.2, 0.2, -0.4, -0.2, -0.1]}
50 >>> hull = ConvexHull(data['concentration'], data['mixing_energy'])
52 Now one can for example access the points along the convex hull directly::
54 >>> for c, e in zip(hull.concentrations, hull.energies):
55 ... print(c, e)
56 0.0 0.1
57 0.2 -0.2
58 0.5 -0.4
59 1.0 -0.1
61 or plot the convex hull along with the original data using e.g., matplotlib::
63 >>> import matplotlib.pyplot as plt
64 >>> plt.scatter(data['concentration'], data['mixing_energy'], color='darkred')
65 >>> plt.plot(hull.concentrations, hull.energies)
66 >>> plt.show(block=False)
68 It is also possible to extract structures at or close to the convex hull::
70 >>> low_energy_structures = hull.extract_low_energy_structures(
71 ... data['concentration'], data['mixing_energy'],
72 ... energy_tolerance=0.005)
74 A complete example can be found in the :ref:`basic tutorial
75 <tutorial_enumerate_structures>`.
76 """
78 def __init__(self,
79 concentrations: Union[List[float], List[List[float]]],
80 energies: List[float]) -> None:
81 assert len(concentrations) == len(energies)
82 # Prepare data in format suitable for SciPy-ConvexHull
83 concentrations = np.array(concentrations)
84 energies = np.array(energies)
85 points = np.column_stack((concentrations, energies))
86 self.dimensions = len(points[0]) - 1
88 # Construct convex hull
89 hull = ConvexHullSciPy(points)
91 # Collect convex hull points in handy arrays
92 concentrations = []
93 energies = []
94 for vertex in hull.vertices:
95 if self.dimensions == 1:
96 concentrations.append(points[vertex][0])
97 else:
98 concentrations.append(points[vertex][0:-1])
99 energies.append(points[vertex][-1])
100 concentrations = np.array(concentrations)
101 energies = np.array(energies)
103 structures = hull.vertices
104 # If there is just one independent concentration, we'd better sort
105 # according to it
106 if self.dimensions == 1:
107 ces = list(zip(*sorted(zip(concentrations, energies, structures))))
108 self.concentrations = np.array(ces[0])
109 self.energies = np.array(ces[1])
110 self.structures = np.array(ces[2])
111 else:
112 self.concentrations = concentrations
113 self.energies = energies
114 self.structures = structures
116 # Remove points that are above the "pure components plane"
117 self._remove_points_above_tie_plane()
119 def __str__(self) -> str:
120 width = 40
121 s = []
122 s += ['{s:=^{n}}'.format(s=' Convex Hull ', n=width)]
123 s += [' {:24} : {}'.format('dimensions', self.dimensions)]
124 s += [' {:24} : {}'.format('number of points', len(self.concentrations))]
125 s += [' {:24} : {}'.format('smallest concentration', np.min(self.concentrations))]
126 s += [' {:24} : {}'.format('largest concentration', np.max(self.concentrations))]
127 s += [''.center(width, '=')]
128 return '\n'.join(s)
130 def _repr_html_(self) -> str:
131 """ HTML representation. Used, e.g., in jupyter notebooks. """
132 s = ['<h4>Convex Hull</h4>']
133 s += ['<table border="1" class="dataframe">']
134 s += ['<tbody>']
135 s += [f'<tr><td style="text-align: left;">Dimensions</td><td>{self.dimensions}</td></tr>']
136 s += ['<tr><td style="text-align: left;">Number of points</td>'
137 f'<td>{len(self.concentrations)}</td></tr>']
138 s += ['<tr><td style="text-align: left;">Smallest concentration</td>'
139 f'<td>{np.min(self.concentrations)}</td></tr>']
140 s += ['<tr><td style="text-align: left;">Largest concentration</td>'
141 f'<td>{np.max(self.concentrations)}</td></tr>']
142 s += ['</tbody>']
143 s += ['</table>']
144 return ''.join(s)
146 def _remove_points_above_tie_plane(self, tol: float = 1e-6) -> None:
147 """
148 Remove all points on the convex hull that correspond to maximum rather
149 than minimum energy.
151 Parameters
152 ----------
153 tol
154 Tolerance for what energy constitutes a lower one.
155 """
157 # Identify the "complex concentration hull", i.e. the extremal
158 # concentrations. In the simplest case, these should simply be the
159 # pure components.
160 if self.dimensions == 1:
161 # Then the ConvexHullScipy function doesn't work, so we just pick
162 # the indices of the lowest and highest concentrations.
163 vertices = []
164 vertices.append(np.argmin(self.concentrations))
165 vertices.append(np.argmax(self.concentrations))
166 vertices = np.array(vertices)
167 else:
168 concentration_hull = ConvexHullSciPy(self.concentrations)
169 vertices = concentration_hull.vertices
171 # Remove all points of the convex energy hull that have an energy that
172 # is higher than what would be gotten with pure components at the same
173 # concentration. These points are mathematically on the convex hull,
174 # but in the physically uninteresting upper part, i.e. they maximize
175 # rather than minimize energy.
176 to_delete = []
177 for i, concentration in enumerate(self.concentrations):
178 # The points on the convex concentration hull should always be
179 # included, so skip them.
180 if i in vertices:
181 continue
183 # The energy obtained as a linear combination of concentrations on
184 # the convex hull is the "z coordinate" of the position on a
185 # (hyper)plane in the (number of independent concentrations +
186 # 1)-dimensional (N-D) space. This plane is spanned by N points.
187 # If there are more vertices on the convex hull, we need to loop
188 # over all combinations of N vertices.
189 for plane in itertools.combinations(vertices,
190 min(len(vertices),
191 self.dimensions + 1)):
192 # Calculate energy that would be gotten with pure components
193 # with ascribed concentration.
194 energy_pure = griddata(self.concentrations[np.array(plane)],
195 self.energies[np.array(plane)],
196 concentration,
197 method='linear')
199 # Prepare to delete if the energy was lowered. `griddata` gives
200 # NaN if the concentration is outside the triangle formed by
201 # the three vertices. The result of the below comparison is
202 # then False, which is what we want.
203 if energy_pure < self.energies[i] - tol:
204 to_delete.append(i)
205 break
207 # Finally remove all points
208 self.concentrations = np.delete(self.concentrations, to_delete, 0)
209 self.energies = np.delete(self.energies, to_delete, 0)
210 self.structures = list(np.delete(self.structures, to_delete, 0))
212 def get_energy_at_convex_hull(self, target_concentrations:
213 Union[List[float],
214 List[List[float]]]) -> np.ndarray:
215 """Returns the energy of the convex hull at specified concentrations.
216 If any concentration is outside the allowed range, NaN is
217 returned.
219 Parameters
220 ----------
221 target_concentrations
222 Concentrations at target points.
224 If there is one independent concentration, a list of
225 floats is sufficient. Otherwise, the concentrations ought
226 to be provided as a list of lists, such as ``[[0.1, 0.2],
227 [0.3, 0.1], ...]``.
228 """
229 if self.dimensions > 1 and isinstance(target_concentrations[0], Sized):
230 assert len(target_concentrations[0]) == self.dimensions
232 # Loop over all complexes of N+1 points to make sure that the lowest
233 # energy plane is used in the end. This is needed in two dimensions
234 # but in higher.
235 hull_candidate_energies = []
236 for plane in itertools.combinations(range(len(self.energies)),
237 min(len(self.energies),
238 self.dimensions + 1)):
239 try:
240 plane_energies = griddata(self.concentrations[list(plane)],
241 self.energies[list(plane)],
242 np.array(target_concentrations),
243 method='linear')
244 except QhullError:
245 # If the points lie on a line, the convex hull will fail, but
246 # we do not need to care about these "planes" anyway
247 continue
248 hull_candidate_energies.append(plane_energies)
250 # Pick out the lowest energies found
251 hull_energies = np.nanmin(hull_candidate_energies, axis=0)
252 return hull_energies
254 def extract_low_energy_structures(self, concentrations:
255 Union[List[float],
256 List[List[float]]],
257 energies: List[float],
258 energy_tolerance: float) -> List[int]:
259 """Returns the indices of energies that lie within a certain
260 tolerance of the convex hull.
262 Parameters
263 ----------
264 concentrations
265 Concentrations of candidate structures.
267 If there is one independent concentration, a list of
268 floats is sufficient. Otherwise, the concentrations must
269 be provided as a list of lists, such as ``[[0.1, 0.2],
270 [0.3, 0.1], ...]``.
271 energies
272 Energies of candidate structures.
273 energy_tolerance
274 Include structures with an energy that is at most this far
275 from the convex hull.
276 """
277 # Convert to numpy arrays, can be necessary if, for example,
278 # they are Pandas Series with "gaps"
279 concentrations = np.array(concentrations)
280 energies = np.array(energies)
282 n_points = len(concentrations)
283 if len(energies) != n_points: 283 ↛ 284line 283 didn't jump to line 284, because the condition on line 283 was never true
284 raise ValueError('concentrations and energies must have '
285 'the same length')
287 # Calculate energy at convex hull for specified concentrations
288 hull_energies = self.get_energy_at_convex_hull(concentrations)
290 # Extract those that are close enough
291 close_to_hull = [i for i in range(n_points)
292 if energies[i] <= hull_energies[i] + energy_tolerance]
294 return close_to_hull