Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2BaseOptimizer serves as base for all optimizers. 

3""" 

4 

5import numpy as np 

6from abc import ABC, abstractmethod 

7from typing import Any, Dict, Tuple, Union 

8from .fit_methods import available_fit_methods 

9from .oi import _write_pickle 

10 

11 

12class BaseOptimizer(ABC): 

13 """BaseOptimizer class. 

14 

15 Serves as base class for all Optimizers solving the linear 

16 :math:`\\boldsymbol{X}\\boldsymbol{a} = \\boldsymbol{y}` problem. 

17 

18 Parameters 

19 ---------- 

20 fit_data : tuple(numpy.ndarray, numpy.ndarray) 

21 the first element of the tuple represents the `NxM`-dimensional 

22 fit matrix `A` whereas the second element represents the 

23 vector of `N`-dimensional target values `y`; here `N` (=rows of 

24 `A`, elements of `y`) equals the number of target values and 

25 `M` (=columns of `A`) equals the number of parameters 

26 fit_method : str 

27 method to be used for training; possible choice are 

28 "least-squares", "lasso", "elasticnet", "bayesian-ridge", "ardr", 

29 "rfe", "split-bregman" 

30 standardize : bool 

31 if True the fit matrix and target values are standardized before fitting, 

32 meaning columns in the fit matrix and th target values are rescaled to 

33 have a standard deviation of 1.0. 

34 check_condition : bool 

35 if True the condition number will be checked 

36 (this can be sligthly more time consuming for larger 

37 matrices) 

38 seed : int 

39 seed for pseudo random number generator 

40 """ 

41 

42 def __init__(self, 

43 fit_data: Tuple[np.ndarray, np.ndarray], 

44 fit_method: str, 

45 standardize: bool = True, 

46 check_condition: bool = True, 

47 seed: int = 42): 

48 """ 

49 Attributes 

50 ---------- 

51 _A : numpy.ndarray 

52 fit matrix (N, M) 

53 _y : numpy.ndarray 

54 target values (N) 

55 """ 

56 

57 if fit_method not in available_fit_methods: 

58 raise ValueError('Unknown fit_method: {}'.format(fit_method)) 

59 

60 if fit_data is None: 60 ↛ 61line 60 didn't jump to line 61, because the condition on line 60 was never true

61 raise TypeError('Invalid fit data; Fit data can not be None') 

62 if fit_data[0].shape[0] != fit_data[1].shape[0]: 

63 raise ValueError('Invalid fit data; shapes of fit matrix' 

64 ' and target vector do not match') 

65 if len(fit_data[0].shape) != 2: 65 ↛ 66line 65 didn't jump to line 66, because the condition on line 65 was never true

66 raise ValueError('Invalid fit matrix; must have two dimensions') 

67 

68 self._A, self._y = fit_data 

69 self._n_rows = self._A.shape[0] 

70 self._n_cols = self._A.shape[1] 

71 self._fit_method = fit_method 

72 self._standarize = standardize 

73 self._check_condition = check_condition 

74 self._seed = seed 

75 self._fit_results = {'parameters': None} 

76 

77 def compute_rmse(self, A: np.ndarray, y: np.ndarray) -> float: 

78 """ 

79 Returns the root mean squared error (RMSE) using 

80 :math:`\\boldsymbol{A}`, :math:`\\boldsymbol{y}`, and the vector of 

81 fitted parameters :math:`\\boldsymbol{x}`, corresponding to 

82 :math:`\\|\\boldsymbol{A}\\boldsymbol{x}-\\boldsymbol{y}\\|_2`. 

83 

84 Parameters 

85 ---------- 

86 A 

87 fit matrix (`N,M` array) where `N` (=rows of `A`, elements 

88 of `y`) equals the number of target values and `M` 

89 (=columns of `A`) equals the number of parameters 

90 (=elements of `x`) 

91 y 

92 vector of target values 

93 """ 

94 y_predicted = self.predict(A) 

95 delta_y = y_predicted - y 

96 rmse = np.sqrt(np.mean(delta_y**2)) 

97 return rmse 

98 

99 def predict(self, A: np.ndarray) -> Union[np.ndarray, float]: 

100 """ 

101 Predicts data given an input matrix :math:`\\boldsymbol{A}`, 

102 i.e., :math:`\\boldsymbol{A}\\boldsymbol{x}`, where 

103 :math:`\\boldsymbol{x}` is the vector of the fitted parameters. 

104 The method returns the vector of predicted values or a float 

105 if a single row provided as input. 

106 

107 Parameters 

108 ---------- 

109 A 

110 fit matrix where `N` (=rows of `A`, elements of `y`) equals the 

111 number of target values and `M` (=columns of `A`) equals the number 

112 of parameters 

113 """ 

114 return np.dot(A, self.parameters) 

115 

116 def get_contributions(self, A: np.ndarray) -> np.ndarray: 

117 """ 

118 Returns the average contribution for each row of `A` 

119 to the predicted values from each element of the parameter vector. 

120 

121 Parameters 

122 ---------- 

123 A 

124 fit matrix where `N` (=rows of `A`, elements of `y`) equals the 

125 number of target values and `M` (=columns of `A`) equals the number 

126 of parameters 

127 """ 

128 return np.mean(np.abs(np.multiply(A, self.parameters)), axis=0) 

129 

130 @abstractmethod 

131 def train(self) -> None: 

132 pass 

133 

134 @property 

135 def summary(self) -> Dict[str, Any]: 

136 """ comprehensive information about the optimizer """ 

137 target_values_std = np.std(self._y) 

138 

139 info = dict() 

140 info['seed'] = self.seed 

141 info['fit_method'] = self.fit_method 

142 info['standardize'] = self.standardize 

143 info['n_target_values'] = self.n_target_values 

144 info['n_parameters'] = self.n_parameters 

145 info['n_nonzero_parameters'] = self.n_nonzero_parameters 

146 info['parameters_norm'] = self.parameters_norm 

147 info['target_values_std'] = target_values_std 

148 return {**info, **self._fit_results} 

149 

150 def write_summary(self, fname: str): 

151 """ Writes summary dict to file """ 

152 _write_pickle(fname, self.summary) 

153 

154 def __str__(self) -> str: 

155 width = 54 

156 s = [] 

157 s.append(' {} '.format(self.__class__.__name__).center(width, '=')) 

158 for key in sorted(self.summary.keys()): 

159 value = self.summary[key] 

160 if isinstance(value, (str, int, np.integer)): 

161 s.append('{:30} : {}'.format(key, value)) 

162 elif isinstance(value, (float)): 

163 s.append('{:30} : {:.7g}'.format(key, value)) 

164 s.append(''.center(width, '=')) 

165 return '\n'.join(s) 

166 

167 def __repr__(self) -> str: 

168 return 'BaseOptimizer((A, y), {}, {}'.format( 

169 self.fit_method, self.seed) 

170 

171 @property 

172 def fit_method(self) -> str: 

173 """ fit method """ 

174 return self._fit_method 

175 

176 @property 

177 def parameters(self) -> np.ndarray: 

178 """ copy of parameter vector """ 

179 if self._fit_results['parameters'] is None: 

180 return None 

181 else: 

182 return self._fit_results['parameters'].copy() 

183 

184 @property 

185 def parameters_norm(self) -> float: 

186 """ the norm of the parameters """ 

187 if self.parameters is None: 

188 return None 

189 else: 

190 return np.linalg.norm(self.parameters) 

191 

192 @property 

193 def n_nonzero_parameters(self) -> int: 

194 """ number of non-zero parameters """ 

195 if self.parameters is None: 

196 return None 

197 else: 

198 return np.count_nonzero(self.parameters) 

199 

200 @property 

201 def n_target_values(self) -> int: 

202 """ number of target values (=rows in `A` matrix) """ 

203 return self._n_rows 

204 

205 @property 

206 def n_parameters(self) -> int: 

207 """ number of parameters (=columns in `A` matrix) """ 

208 return self._n_cols 

209 

210 @property 

211 def standardize(self) -> bool: 

212 """ if True standardize the fit matrix before fitting """ 

213 return self._standarize 

214 

215 @property 

216 def seed(self) -> int: 

217 """ seed used to initialize pseudo random number generator """ 

218 return self._seed