Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

""" 

This module provides the ClusterExpansion class. 

""" 

 

import pandas as pd 

import numpy as np 

import pickle 

import tempfile 

import tarfile 

import re 

 

from icet import ClusterSpace 

from icet.core.structure import Structure 

from typing import List, Union 

from ase import Atoms 

 

 

class ClusterExpansion: 

"""Cluster expansions are obtained by combining a cluster space with 

a set of effective cluster interactions (ECIs). Instances of this 

class allow one to predict the property of interest for a given 

structure. 

 

Attributes 

---------- 

cluster_space : icet.ClusterSpace 

cluster space that was used for constructing the cluster expansion 

parameters : np.ndarray 

effective cluster interactions (ECIs) 

 

Example 

------- 

The following snippet illustrates the initialization and usage of 

a ClusterExpansion object. Here, the ECIs are taken to be a list 

of ones. Usually, they would be obtained by training with 

respect to a set of reference data:: 

 

from ase.build import bulk 

from icet import ClusterSpace, ClusterExpansion 

 

# create cluster expansion 

prim = bulk('Au') 

cs = ClusterSpace(prim, cutoffs=[7.0, 5.0], 

chemical_symbols=[['Au', 'Pd']]) 

ecis = 14 * [1.0] 

ce = ClusterExpansion(cs, ecis) 

 

# make prediction for supercell 

sc = prim.repeat(3) 

for k in [1, 4, 7]: 

sc[k].symbol = 'Pd' 

print(ce.predict(sc)) 

""" 

 

def __init__(self, cluster_space: ClusterSpace, parameters: np.array) -> None: 

""" 

Initializes a ClusterExpansion object. 

 

Parameters 

---------- 

cluster_space 

cluster space to be used for constructing the cluster expansion 

parameters 

effective cluster interactions (ECIs) 

 

Raises 

------ 

ValueError 

if cluster space and parameters differ in length 

""" 

if len(cluster_space) != len(parameters): 

raise ValueError('cluster_space ({}) and parameters ({}) must have' 

' the same length'.format(len(cluster_space), len(parameters))) 

self._cluster_space = cluster_space.copy() 

if isinstance(parameters, list): 

parameters = np.array(parameters) 

self._parameters = parameters 

self._original_parameters = parameters.copy() 

 

def predict(self, structure: Union[Atoms, Structure]) -> float: 

""" 

Predicts the property of interest (e.g., the energy) for the input 

structure using the cluster expansion. 

 

Parameters 

---------- 

structure 

atomic configuration 

 

Returns 

------- 

float 

property value of predicted by the cluster expansion 

""" 

cluster_vector = self._cluster_space.get_cluster_vector(structure) 

prop = np.dot(cluster_vector, self.parameters) 

return prop 

 

@property 

def parameters_as_dataframe(self) -> pd.DataFrame: 

""" dataframe containing orbit data and ECIs """ 

rows = self._cluster_space.orbit_data 

for row, eci in zip(rows, self.parameters): 

row['eci'] = eci 

return pd.DataFrame(rows) 

 

@property 

def orders(self) -> List[int]: 

""" orders included in cluster expansion """ 

return list(range(len(self._cluster_space.cutoffs) + 2)) 

 

@property 

def cluster_space(self) -> ClusterSpace: 

""" cluster space on which cluster expansion is based """ 

return self._cluster_space.copy() 

 

@property 

def parameters(self) -> List[float]: 

""" effective cluster interactions (ECIs) """ 

return self._parameters 

 

def plot_parameters(self, orders=None): 

""" Plot ECIs for given orders, default plots for all orders """ 

 

if orders is None: 

orders = self.orders 

df = self.parameters_as_dataframe 

 

# plotting 

import matplotlib.pyplot as plt 

fig = plt.figure() 

ax = fig.add_subplot(1, 1, 1) 

ax.axhline(y=0.0, c='k', lw=1) 

for order in orders: 

df_order = df.loc[df['order'] == order] 

ax.plot(df_order.radius, df_order.eci, 'o', ms=8, label='order {}'.format(order)) 

ax.legend(loc='best') 

ax.set_xlabel('Radius') 

ax.set_ylabel('ECI') 

plt.show() 

 

def __len__(self) -> int: 

return len(self._parameters) 

 

def _get_string_representation(self, print_threshold: int = None, 

print_minimum: int = 10): 

""" String representation of the cluster expansion. """ 

cluster_space_repr = self._cluster_space._get_string_representation( 

print_threshold, print_minimum).split('\n') 

# rescale width 

eci_col_width = max(len('{:9.3g}'.format(max(self._parameters, key=abs))), len('ECI')) 

width = len(cluster_space_repr[0]) + len(' | ') + eci_col_width 

 

s = [] # type: List 

s += ['{s:=^{n}}'.format(s=' Cluster Expansion ', n=width)] 

s += [t for t in cluster_space_repr if re.search(':', t)] 

 

# additional information about number of nonzero the ECIs 

df = self.parameters_as_dataframe 

orders = self.orders 

nzp_by_order = [np.count_nonzero(df[df.order == order].eci) for order in orders] 

assert sum(nzp_by_order) == np.count_nonzero(self.parameters) 

s += [' total number of nonzero parameters: {}'.format(sum(nzp_by_order))] 

line = ' number of nonzero parameters by order: ' 

for order, nzp in zip(orders, nzp_by_order): 

line += '{}= {} '.format(order, nzp) 

s += [line] 

 

# table header 

s += [''.center(width, '-')] 

t = [t for t in cluster_space_repr if 'index' in t] 

t += ['{s:^{n}}'.format(s='ECI', n=eci_col_width)] 

s += [' | '.join(t)] 

s += [''.center(width, '-')] 

 

# table body 

index = 0 

while index < len(self): 

if (print_threshold is not None and 

len(self) > print_threshold and 

index >= print_minimum and 

index <= len(self) - print_minimum): 

index = len(self) - print_minimum 

s += [' ...'] 

pattern = r'^{:4}'.format(index) 

t = [t for t in cluster_space_repr if re.match(pattern, t)] 

eci_value = '{:9.3g}'.format(self._parameters[index]) 

t += ['{s:^{n}}'.format(s=eci_value, n=eci_col_width)] 

s += [' | '.join(t)] 

index += 1 

s += [''.center(width, '=')] 

 

return '\n'.join(s) 

 

def __repr__(self) -> str: 

""" string representation """ 

return self._get_string_representation(print_threshold=50) 

 

def print_overview(self, 

print_threshold: int = None, 

print_minimum: int = 10) -> None: 

""" 

Print an overview of the cluster expansion in terms of the orbits (order, 

radius, multiplicity, corresponding ECI etc). 

 

Parameters 

---------- 

print_threshold 

if the number of orbits exceeds this number print dots 

print_minimum 

number of lines printed from the top and the bottom of the orbit 

list if `print_threshold` is exceeded 

""" 

print(self._get_string_representation(print_threshold=print_threshold, 

print_minimum=print_minimum)) 

 

def prune(self, indices: List[int] = None, tol: float = 0): 

""" 

Removes orbits from the cluster expansion (CE), for which the effective 

cluster interactions (ECIs; parameters) are zero or close to zero. 

This commonly reduces the computational cost for evaluating the CE and 

is therefore recommended prior to using it in production. If the method 

is called without arguments orbits will be pruned, for which the ECIs 

are strictly zero. Less restrictive pruning can be achived by setting 

the `tol` keyword. 

 

Parameters 

---------- 

indices 

indices to parameters to remove in the cluster expansion. 

tol 

orbits for which the absolute ECIs is/are within this 

value will be pruned 

""" 

 

# find orbit indices to be removed 

if indices is None: 

indices = [i for i, param in enumerate( 

self.parameters) if np.abs(param) <= tol and i > 0] 

df = self.parameters_as_dataframe 

indices = list(set(indices)) 

 

if 0 in indices: 

raise ValueError('Orbit index cannot be 0 since the zerolet may not be pruned.') 

orbit_candidates_for_removal = df.orbit_index[np.array(indices)].tolist() 

safe_to_remove_orbits, safe_to_remove_params = [], [] 

for oi in set(orbit_candidates_for_removal): 

248 ↛ 249line 248 didn't jump to line 249, because the condition on line 248 was never true if oi == -1: 

continue 

orbit_count = df.orbit_index.tolist().count(oi) 

oi_remove_count = orbit_candidates_for_removal.count(oi) 

252 ↛ 247line 252 didn't jump to line 247, because the condition on line 252 was never false if orbit_count <= oi_remove_count: 

safe_to_remove_orbits.append(oi) 

safe_to_remove_params += df.index[df['orbit_index'] == oi].tolist() 

 

# prune cluster space 

self._cluster_space._prune_orbit_list(indices=safe_to_remove_orbits) 

self._parameters = self._parameters[np.setdiff1d( 

np.arange(len(self._parameters)), safe_to_remove_params)] 

assert len(self._parameters) == len(self._cluster_space) 

 

def write(self, filename: str): 

""" 

Writes ClusterExpansion object to file. 

 

Parameters 

--------- 

filename 

name of file to which to write 

""" 

self._cluster_space.write(filename) 

 

items = dict() 

items['parameters'] = self.parameters 

 

with tarfile.open(name=filename, mode='w') as tar_file: 

cs_file = tempfile.NamedTemporaryFile() 

self._cluster_space.write(cs_file.name) 

tar_file.add(cs_file.name, arcname='cluster_space') 

 

# write items 

temp_file = tempfile.TemporaryFile() 

pickle.dump(items, temp_file) 

temp_file.seek(0) 

tar_info = tar_file.gettarinfo(arcname='items', fileobj=temp_file) 

tar_file.addfile(tar_info, temp_file) 

temp_file.close() 

 

@staticmethod 

def read(filename: str): 

""" 

Reads ClusterExpansion object from file. 

 

Parameters 

--------- 

filename 

file from which to read 

""" 

with tarfile.open(name=filename, mode='r') as tar_file: 

cs_file = tempfile.NamedTemporaryFile() 

cs_file.write(tar_file.extractfile('cluster_space').read()) 

cs_file.seek(0) 

cs = ClusterSpace.read(cs_file.name) 

items = pickle.load(tar_file.extractfile('items')) 

 

parameters = items['parameters'] 

ce = ClusterExpansion(cs, parameters) 

 

assert list(parameters) == list(ce.parameters) 

return ce