Coverage for icet/tools/convex_hull.py: 97%

95 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2024-05-06 04:14 +0000

1""" 

2This module provides the ConvexHull class. 

3""" 

4 

5import itertools 

6import numpy as np 

7from typing import List, Sized, Union 

8from scipy.interpolate import griddata 

9from scipy.spatial import ConvexHull as ConvexHullSciPy 

10from scipy.spatial import QhullError 

11 

12 

13class ConvexHull: 

14 """This class provides functionality for extracting the convex hull 

15 of the (free) energy of mixing. It is based on the `convex hull 

16 calculator in SciPy 

17 <http://docs.scipy.org/doc/scipy-dev/reference/\ 

18generated/scipy.spatial.ConvexHull.html>`_. 

19 

20 Parameters 

21 ---------- 

22 concentrations 

23 Concentrations for each structure listed as ``[[c1, c2], [c1, c2], 

24 ...]``; for binaries, in which case there is only one independent 

25 concentration, the format ``[c1, c2, c3, ...]`` works as well. 

26 energies 

27 Energy (or energy of mixing) for each structure. 

28 

29 Attributes 

30 ---------- 

31 concentrations : np.ndarray 

32 Concentrations of the structures on the convex hull. 

33 energies : np.ndarray 

34 Energies of the structures on the convex hull. 

35 dimensions : int 

36 Number of independent concentrations needed to specify a point in 

37 concentration space (1 for binaries, 2 for ternaries and so on). 

38 structures : List[int] 

39 Indices of structures that constitute the convex hull (indices are 

40 defined by the order of their concentrations and energies are fed when 

41 initializing the :class:`ConvexHull` object). 

42 

43 Examples 

44 -------- 

45 A :class:`ConvexHull` object is easily initialized by providing lists of 

46 concentrations and energies:: 

47 

48 >>> data = {'concentration': [0, 0.2, 0.2, 0.3, 0.4, 0.5, 0.8, 1.0], 

49 ... 'mixing_energy': [0.1, -0.2, -0.1, -0.2, 0.2, -0.4, -0.2, -0.1]} 

50 >>> hull = ConvexHull(data['concentration'], data['mixing_energy']) 

51 

52 Now one can for example access the points along the convex hull directly:: 

53 

54 >>> for c, e in zip(hull.concentrations, hull.energies): 

55 ... print(c, e) 

56 0.0 0.1 

57 0.2 -0.2 

58 0.5 -0.4 

59 1.0 -0.1 

60 

61 or plot the convex hull along with the original data using e.g., matplotlib:: 

62 

63 >>> import matplotlib.pyplot as plt 

64 >>> plt.scatter(data['concentration'], data['mixing_energy'], color='darkred') 

65 >>> plt.plot(hull.concentrations, hull.energies) 

66 >>> plt.show(block=False) 

67 

68 It is also possible to extract structures at or close to the convex hull:: 

69 

70 >>> low_energy_structures = hull.extract_low_energy_structures( 

71 ... data['concentration'], data['mixing_energy'], 

72 ... energy_tolerance=0.005) 

73 

74 A complete example can be found in the :ref:`basic tutorial 

75 <tutorial_enumerate_structures>`. 

76 """ 

77 

78 def __init__(self, 

79 concentrations: Union[List[float], List[List[float]]], 

80 energies: List[float]) -> None: 

81 assert len(concentrations) == len(energies) 

82 # Prepare data in format suitable for SciPy-ConvexHull 

83 concentrations = np.array(concentrations) 

84 energies = np.array(energies) 

85 points = np.column_stack((concentrations, energies)) 

86 self.dimensions = len(points[0]) - 1 

87 

88 # Construct convex hull 

89 hull = ConvexHullSciPy(points) 

90 

91 # Collect convex hull points in handy arrays 

92 concentrations = [] 

93 energies = [] 

94 for vertex in hull.vertices: 

95 if self.dimensions == 1: 

96 concentrations.append(points[vertex][0]) 

97 else: 

98 concentrations.append(points[vertex][0:-1]) 

99 energies.append(points[vertex][-1]) 

100 concentrations = np.array(concentrations) 

101 energies = np.array(energies) 

102 

103 structures = hull.vertices 

104 # If there is just one independent concentration, we'd better sort 

105 # according to it 

106 if self.dimensions == 1: 

107 ces = list(zip(*sorted(zip(concentrations, energies, structures)))) 

108 self.concentrations = np.array(ces[0]) 

109 self.energies = np.array(ces[1]) 

110 self.structures = np.array(ces[2]) 

111 else: 

112 self.concentrations = concentrations 

113 self.energies = energies 

114 self.structures = structures 

115 

116 # Remove points that are above the "pure components plane" 

117 self._remove_points_above_tie_plane() 

118 

119 def __str__(self) -> str: 

120 width = 40 

121 s = [] 

122 s += ['{s:=^{n}}'.format(s=' Convex Hull ', n=width)] 

123 s += [' {:24} : {}'.format('dimensions', self.dimensions)] 

124 s += [' {:24} : {}'.format('number of points', len(self.concentrations))] 

125 s += [' {:24} : {}'.format('smallest concentration', np.min(self.concentrations))] 

126 s += [' {:24} : {}'.format('largest concentration', np.max(self.concentrations))] 

127 s += [''.center(width, '=')] 

128 return '\n'.join(s) 

129 

130 def _repr_html_(self) -> str: 

131 """ HTML representation. Used, e.g., in jupyter notebooks. """ 

132 s = ['<h4>Convex Hull</h4>'] 

133 s += ['<table border="1" class="dataframe">'] 

134 s += ['<tbody>'] 

135 s += [f'<tr><td style="text-align: left;">Dimensions</td><td>{self.dimensions}</td></tr>'] 

136 s += ['<tr><td style="text-align: left;">Number of points</td>' 

137 f'<td>{len(self.concentrations)}</td></tr>'] 

138 s += ['<tr><td style="text-align: left;">Smallest concentration</td>' 

139 f'<td>{np.min(self.concentrations)}</td></tr>'] 

140 s += ['<tr><td style="text-align: left;">Largest concentration</td>' 

141 f'<td>{np.max(self.concentrations)}</td></tr>'] 

142 s += ['</tbody>'] 

143 s += ['</table>'] 

144 return ''.join(s) 

145 

146 def _remove_points_above_tie_plane(self, tol: float = 1e-6) -> None: 

147 """ 

148 Remove all points on the convex hull that correspond to maximum rather 

149 than minimum energy. 

150 

151 Parameters 

152 ---------- 

153 tol 

154 Tolerance for what energy constitutes a lower one. 

155 """ 

156 

157 # Identify the "complex concentration hull", i.e. the extremal 

158 # concentrations. In the simplest case, these should simply be the 

159 # pure components. 

160 if self.dimensions == 1: 

161 # Then the ConvexHullScipy function doesn't work, so we just pick 

162 # the indices of the lowest and highest concentrations. 

163 vertices = [] 

164 vertices.append(np.argmin(self.concentrations)) 

165 vertices.append(np.argmax(self.concentrations)) 

166 vertices = np.array(vertices) 

167 else: 

168 concentration_hull = ConvexHullSciPy(self.concentrations) 

169 vertices = concentration_hull.vertices 

170 

171 # Remove all points of the convex energy hull that have an energy that 

172 # is higher than what would be gotten with pure components at the same 

173 # concentration. These points are mathematically on the convex hull, 

174 # but in the physically uninteresting upper part, i.e. they maximize 

175 # rather than minimize energy. 

176 to_delete = [] 

177 for i, concentration in enumerate(self.concentrations): 

178 # The points on the convex concentration hull should always be 

179 # included, so skip them. 

180 if i in vertices: 

181 continue 

182 

183 # The energy obtained as a linear combination of concentrations on 

184 # the convex hull is the "z coordinate" of the position on a 

185 # (hyper)plane in the (number of independent concentrations + 

186 # 1)-dimensional (N-D) space. This plane is spanned by N points. 

187 # If there are more vertices on the convex hull, we need to loop 

188 # over all combinations of N vertices. 

189 for plane in itertools.combinations(vertices, 

190 min(len(vertices), 

191 self.dimensions + 1)): 

192 # Calculate energy that would be gotten with pure components 

193 # with ascribed concentration. 

194 energy_pure = griddata(self.concentrations[np.array(plane)], 

195 self.energies[np.array(plane)], 

196 concentration, 

197 method='linear') 

198 

199 # Prepare to delete if the energy was lowered. `griddata` gives 

200 # NaN if the concentration is outside the triangle formed by 

201 # the three vertices. The result of the below comparison is 

202 # then False, which is what we want. 

203 if energy_pure < self.energies[i] - tol: 

204 to_delete.append(i) 

205 break 

206 

207 # Finally remove all points 

208 self.concentrations = np.delete(self.concentrations, to_delete, 0) 

209 self.energies = np.delete(self.energies, to_delete, 0) 

210 self.structures = list(np.delete(self.structures, to_delete, 0)) 

211 

212 def get_energy_at_convex_hull(self, target_concentrations: 

213 Union[List[float], 

214 List[List[float]]]) -> np.ndarray: 

215 """Returns the energy of the convex hull at specified concentrations. 

216 If any concentration is outside the allowed range, NaN is 

217 returned. 

218 

219 Parameters 

220 ---------- 

221 target_concentrations 

222 Concentrations at target points. 

223 

224 If there is one independent concentration, a list of 

225 floats is sufficient. Otherwise, the concentrations ought 

226 to be provided as a list of lists, such as ``[[0.1, 0.2], 

227 [0.3, 0.1], ...]``. 

228 """ 

229 if self.dimensions > 1 and isinstance(target_concentrations[0], Sized): 

230 assert len(target_concentrations[0]) == self.dimensions 

231 

232 # Loop over all complexes of N+1 points to make sure that the lowest 

233 # energy plane is used in the end. This is needed in two dimensions 

234 # but in higher. 

235 hull_candidate_energies = [] 

236 for plane in itertools.combinations(range(len(self.energies)), 

237 min(len(self.energies), 

238 self.dimensions + 1)): 

239 try: 

240 plane_energies = griddata(self.concentrations[list(plane)], 

241 self.energies[list(plane)], 

242 np.array(target_concentrations), 

243 method='linear') 

244 except QhullError: 

245 # If the points lie on a line, the convex hull will fail, but 

246 # we do not need to care about these "planes" anyway 

247 continue 

248 hull_candidate_energies.append(plane_energies) 

249 

250 # Pick out the lowest energies found 

251 hull_energies = np.nanmin(hull_candidate_energies, axis=0) 

252 return hull_energies 

253 

254 def extract_low_energy_structures(self, concentrations: 

255 Union[List[float], 

256 List[List[float]]], 

257 energies: List[float], 

258 energy_tolerance: float) -> List[int]: 

259 """Returns the indices of energies that lie within a certain 

260 tolerance of the convex hull. 

261 

262 Parameters 

263 ---------- 

264 concentrations 

265 Concentrations of candidate structures. 

266 

267 If there is one independent concentration, a list of 

268 floats is sufficient. Otherwise, the concentrations must 

269 be provided as a list of lists, such as ``[[0.1, 0.2], 

270 [0.3, 0.1], ...]``. 

271 energies 

272 Energies of candidate structures. 

273 energy_tolerance 

274 Include structures with an energy that is at most this far 

275 from the convex hull. 

276 """ 

277 # Convert to numpy arrays, can be necessary if, for example, 

278 # they are Pandas Series with "gaps" 

279 concentrations = np.array(concentrations) 

280 energies = np.array(energies) 

281 

282 n_points = len(concentrations) 

283 if len(energies) != n_points: 283 ↛ 284line 283 didn't jump to line 284, because the condition on line 283 was never true

284 raise ValueError('concentrations and energies must have ' 

285 'the same length') 

286 

287 # Calculate energy at convex hull for specified concentrations 

288 hull_energies = self.get_energy_at_convex_hull(concentrations) 

289 

290 # Extract those that are close enough 

291 close_to_hull = [i for i in range(n_points) 

292 if energies[i] <= hull_energies[i] + energy_tolerance] 

293 

294 return close_to_hull