r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from typing import Optional

3import numpy as np

4import pandas as pd

5import scipy

8def analyze_data(data: np.ndarray, max_lag: int = None) -> dict:

9 """ Carries out an extensive analysis of the data series.

11 Parameters

12 ----------

13 data

14 data series to compute autocorrelation function for

15 max_lag

16 maximum lag between two data points, used for computing autocorrelation

18 Returns

19 -------

20 dict

21 calculated properties of the data including, mean, standard deviation,

22 correlation length and a 95% error estimate.

23 """

24 summary = dict(mean=data.mean(),

25 std=data.std())

26 acf = get_autocorrelation_function(data, max_lag)

27 correlation_length = _estimate_correlation_length_from_acf(acf)

28 if correlation_length is not None:

29 error_estimate = _estimate_error(data, correlation_length, confidence=0.95)

30 summary['correlation_length'] = correlation_length

31 summary['error_estimate'] = error_estimate

32 else:

33 summary['correlation_length'] = np.nan

34 summary['error_estimate'] = np.nan

35 return summary

38def get_autocorrelation_function(data: np.ndarray, max_lag: int = None) -> np.ndarray:

39 """ Returns autocorrelation function.

41 The autocorrelation function is computed using pandas.Series.autocorr

42 <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.autocorr.html>_.

44 Parameters

45 ----------

46 data

47 data series to compute autocorrelation function for

48 max_lag

49 maximum lag between two data points

51 Returns

52 -------

53 calculated autocorrelation function

54 """

55 if max_lag is None: 55 ↛ 57line 55 didn't jump to line 57, because the condition on line 55 was never false

56 max_lag = len(data) - 1

57 if 1 > max_lag >= len(data): 57 ↛ 58line 57 didn't jump to line 58, because the condition on line 57 was never true

58 raise ValueError('max_lag should be between 1 and len(data)-1.')

59 series = pd.Series(data)

60 acf = [series.autocorr(lag) for lag in range(0, max_lag)]

61 return np.array(acf)

64def get_correlation_length(data: np.ndarray) -> Optional[int]:

65 """ Returns estimate of the correlation length of data.

67 The correlation length is taken as the first point where the

68 autocorrelation functions is less than :math:\\exp(-2). If the

69 correlation function never drops below :math:\\exp(-2) np.nan is

70 returned.

72 If the correlation length cannot be computed since the ACF is

73 unconverged the function returns None.

75 Parameters

76 ----------

77 data

78 data series for which to the compute autocorrelation function

80 Returns

81 -------

82 correlation length

83 """

85 acf = get_autocorrelation_function(data)

86 correlation_length = _estimate_correlation_length_from_acf(acf)

87 if correlation_length is None: 87 ↛ 88line 87 didn't jump to line 88, because the condition on line 87 was never true

88 return None

89 return correlation_length

92def get_error_estimate(data: np.ndarray, confidence: float = 0.95) -> Optional[float]:

93 """Returns estimate of standard error :math:\\mathrm{error}

94 with confidence interval.

96 .. math::

98 \\mathrm{error} = t_\\mathrm{factor} * \\mathrm{std}(\\mathrm{data}) / \\sqrt{N_s}

100 where :math:t_{factor} is the factor corresponding to the confidence

101 interval and :math:N_s is the number of independent measurements

102 (with correlation taken into account).

104 If the correlation length cannot be computed since the ACF is

105 unconverged the function returns None.

107 Parameters

108 ----------

109 data

110 data series for which to estimate the error

112 Returns

113 -------

114 error estimate

116 """

117 correlation_length = get_correlation_length(data)

118 if correlation_length is None: 118 ↛ 119line 118 didn't jump to line 119, because the condition on line 118 was never true

119 return None

120 error_estimate = _estimate_error(data, correlation_length, confidence)

121 return error_estimate

124def _estimate_correlation_length_from_acf(acf: np.ndarray) -> Optional[int]:

125 """Estimates correlation length from acf. Returns None if the ACF is

126 uncoverged."""

127 for i, a in enumerate(acf):

128 if a < np.exp(-2):

129 return i

130 return None # np.nan

133def _estimate_error(data: np.ndarray,

134 correlation_length: int,

135 confidence: float) -> float:

136 """ Estimates error using correlation length. """

137 t_factor = scipy.stats.t.ppf((1 + confidence) / 2, len(data)-1) # type: float

138 error = t_factor * np.std(data) / np.sqrt(len(data) / correlation_length) # type: float

139 return error