r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""

2BaseOptimizer serves as base for all optimizers.

3"""

5import numpy as np

6from abc import ABC, abstractmethod

7from typing import Any, Dict, Tuple, Union

8from .fit_methods import available_fit_methods

9from .oi import _write_pickle

12class BaseOptimizer(ABC):

13 """BaseOptimizer class.

15 Serves as base class for all Optimizers solving the linear

16 :math:`\\boldsymbol{X}\\boldsymbol{a} = \\boldsymbol{y}` problem.

18 Parameters

19 ----------

20 fit_data : tuple(numpy.ndarray, numpy.ndarray)

21 the first element of the tuple represents the `NxM`-dimensional

22 fit matrix `A` whereas the second element represents the

23 vector of `N`-dimensional target values `y`; here `N` (=rows of

24 `A`, elements of `y`) equals the number of target values and

25 `M` (=columns of `A`) equals the number of parameters

26 fit_method : str

27 method to be used for training; possible choice are

28 "least-squares", "lasso", "elasticnet", "bayesian-ridge", "ardr",

29 "rfe", "split-bregman"

30 standardize : bool

31 if True the fit matrix and target values are standardized before fitting,

32 meaning columns in the fit matrix and th target values are rescaled to

33 have a standard deviation of 1.0.

34 check_condition : bool

35 if True the condition number will be checked

36 (this can be sligthly more time consuming for larger

37 matrices)

38 seed : int

39 seed for pseudo random number generator

40 """

42 def __init__(self,

43 fit_data: Tuple[np.ndarray, np.ndarray],

44 fit_method: str,

45 standardize: bool = True,

46 check_condition: bool = True,

47 seed: int = 42):

48 """

49 Attributes

50 ----------

51 _A : numpy.ndarray

52 fit matrix (N, M)

53 _y : numpy.ndarray

54 target values (N)

55 """

57 if fit_method not in available_fit_methods:

58 raise ValueError('Unknown fit_method: {}'.format(fit_method))

60 if fit_data is None: 60 ↛ 61line 60 didn't jump to line 61, because the condition on line 60 was never true

61 raise TypeError('Invalid fit data; Fit data can not be None')

62 if fit_data[0].shape[0] != fit_data[1].shape[0]:

63 raise ValueError('Invalid fit data; shapes of fit matrix'

64 ' and target vector do not match')

65 if len(fit_data[0].shape) != 2: 65 ↛ 66line 65 didn't jump to line 66, because the condition on line 65 was never true

66 raise ValueError('Invalid fit matrix; must have two dimensions')

68 self._A, self._y = fit_data

69 self._n_rows = self._A.shape[0]

70 self._n_cols = self._A.shape[1]

71 self._fit_method = fit_method

72 self._standarize = standardize

73 self._check_condition = check_condition

74 self._seed = seed

75 self._fit_results = {'parameters': None}

77 def compute_rmse(self, A: np.ndarray, y: np.ndarray) -> float:

78 """

79 Returns the root mean squared error (RMSE) using

80 :math:`\\boldsymbol{A}`, :math:`\\boldsymbol{y}`, and the vector of

81 fitted parameters :math:`\\boldsymbol{x}`, corresponding to

82 :math:`\\|\\boldsymbol{A}\\boldsymbol{x}-\\boldsymbol{y}\\|_2`.

84 Parameters

85 ----------

86 A

87 fit matrix (`N,M` array) where `N` (=rows of `A`, elements

88 of `y`) equals the number of target values and `M`

89 (=columns of `A`) equals the number of parameters

90 (=elements of `x`)

91 y

92 vector of target values

93 """

94 y_predicted = self.predict(A)

95 delta_y = y_predicted - y

96 rmse = np.sqrt(np.mean(delta_y**2))

97 return rmse

99 def predict(self, A: np.ndarray) -> Union[np.ndarray, float]:

100 """

101 Predicts data given an input matrix :math:`\\boldsymbol{A}`,

102 i.e., :math:`\\boldsymbol{A}\\boldsymbol{x}`, where

103 :math:`\\boldsymbol{x}` is the vector of the fitted parameters.

104 The method returns the vector of predicted values or a float

105 if a single row provided as input.

107 Parameters

108 ----------

109 A

110 fit matrix where `N` (=rows of `A`, elements of `y`) equals the

111 number of target values and `M` (=columns of `A`) equals the number

112 of parameters

113 """

114 return np.dot(A, self.parameters)

116 def get_contributions(self, A: np.ndarray) -> np.ndarray:

117 """

118 Returns the average contribution for each row of `A`

119 to the predicted values from each element of the parameter vector.

121 Parameters

122 ----------

123 A

124 fit matrix where `N` (=rows of `A`, elements of `y`) equals the

125 number of target values and `M` (=columns of `A`) equals the number

126 of parameters

127 """

128 return np.mean(np.abs(np.multiply(A, self.parameters)), axis=0)

130 @abstractmethod

131 def train(self) -> None:

132 pass

134 @property

135 def summary(self) -> Dict[str, Any]:

136 """ comprehensive information about the optimizer """

137 target_values_std = np.std(self._y)

139 info = dict()

140 info['seed'] = self.seed

141 info['fit_method'] = self.fit_method

142 info['standardize'] = self.standardize

143 info['n_target_values'] = self.n_target_values

144 info['n_parameters'] = self.n_parameters

145 info['n_nonzero_parameters'] = self.n_nonzero_parameters

146 info['parameters_norm'] = self.parameters_norm

147 info['target_values_std'] = target_values_std

148 return {**info, **self._fit_results}

150 def write_summary(self, fname: str):

151 """ Writes summary dict to file """

152 _write_pickle(fname, self.summary)

154 def __str__(self) -> str:

155 width = 54

156 s = []

157 s.append(' {} '.format(self.__class__.__name__).center(width, '='))

158 for key in sorted(self.summary.keys()):

159 value = self.summary[key]

160 if isinstance(value, (str, int, np.integer)):

161 s.append('{:30} : {}'.format(key, value))

162 elif isinstance(value, (float)):

163 s.append('{:30} : {:.7g}'.format(key, value))

164 s.append(''.center(width, '='))

165 return '\n'.join(s)

167 def __repr__(self) -> str:

168 return 'BaseOptimizer((A, y), {}, {}'.format(

169 self.fit_method, self.seed)

171 @property

172 def fit_method(self) -> str:

173 """ fit method """

174 return self._fit_method

176 @property

177 def parameters(self) -> np.ndarray:

178 """ copy of parameter vector """

179 if self._fit_results['parameters'] is None:

180 return None

181 else:

182 return self._fit_results['parameters'].copy()

184 @property

185 def parameters_norm(self) -> float:

186 """ the norm of the parameters """

187 if self.parameters is None:

188 return None

189 else:

190 return np.linalg.norm(self.parameters)

192 @property

193 def n_nonzero_parameters(self) -> int:

194 """ number of non-zero parameters """

195 if self.parameters is None:

196 return None

197 else:

198 return np.count_nonzero(self.parameters)

200 @property

201 def n_target_values(self) -> int:

202 """ number of target values (=rows in `A` matrix) """

203 return self._n_rows

205 @property

206 def n_parameters(self) -> int:

207 """ number of parameters (=columns in `A` matrix) """

208 return self._n_cols

210 @property

211 def standardize(self) -> bool:

212 """ if True standardize the fit matrix before fitting """

213 return self._standarize

215 @property

216 def seed(self) -> int:

217 """ seed used to initialize pseudo random number generator """

218 return self._seed