Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from typing import Optional 

2 

3import numpy as np 

4import pandas as pd 

5import scipy 

6 

7 

8def analyze_data(data: np.ndarray, max_lag: int = None) -> dict: 

9 """ Carries out an extensive analysis of the data series. 

10 

11 Parameters 

12 ---------- 

13 data 

14 data series to compute autocorrelation function for 

15 max_lag 

16 maximum lag between two data points, used for computing autocorrelation 

17 

18 Returns 

19 ------- 

20 dict 

21 calculated properties of the data including, mean, standard deviation, 

22 correlation length and a 95% error estimate. 

23 """ 

24 summary = dict(mean=data.mean(), 

25 std=data.std()) 

26 acf = get_autocorrelation_function(data, max_lag) 

27 correlation_length = _estimate_correlation_length_from_acf(acf) 

28 if correlation_length is not None: 

29 error_estimate = _estimate_error(data, correlation_length, confidence=0.95) 

30 summary['correlation_length'] = correlation_length 

31 summary['error_estimate'] = error_estimate 

32 else: 

33 summary['correlation_length'] = np.nan 

34 summary['error_estimate'] = np.nan 

35 return summary 

36 

37 

38def get_autocorrelation_function(data: np.ndarray, max_lag: int = None) -> np.ndarray: 

39 """ Returns autocorrelation function. 

40 

41 The autocorrelation function is computed using `pandas.Series.autocorr 

42 <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.autocorr.html>`_. 

43 

44 Parameters 

45 ---------- 

46 data 

47 data series to compute autocorrelation function for 

48 max_lag 

49 maximum lag between two data points 

50 

51 Returns 

52 ------- 

53 calculated autocorrelation function 

54 """ 

55 if max_lag is None: 55 ↛ 57line 55 didn't jump to line 57, because the condition on line 55 was never false

56 max_lag = len(data) - 1 

57 if 1 > max_lag >= len(data): 57 ↛ 58line 57 didn't jump to line 58, because the condition on line 57 was never true

58 raise ValueError('max_lag should be between 1 and len(data)-1.') 

59 series = pd.Series(data) 

60 acf = [series.autocorr(lag) for lag in range(0, max_lag)] 

61 return np.array(acf) 

62 

63 

64def get_correlation_length(data: np.ndarray) -> Optional[int]: 

65 """ Returns estimate of the correlation length of data. 

66 

67 The correlation length is taken as the first point where the 

68 autocorrelation functions is less than :math:`\\exp(-2)`. If the 

69 correlation function never drops below :math:`\\exp(-2)` ``np.nan`` is 

70 returned. 

71 

72 If the correlation length cannot be computed since the ACF is 

73 unconverged the function returns `None`. 

74 

75 Parameters 

76 ---------- 

77 data 

78 data series for which to the compute autocorrelation function 

79 

80 Returns 

81 ------- 

82 correlation length 

83 """ 

84 

85 acf = get_autocorrelation_function(data) 

86 correlation_length = _estimate_correlation_length_from_acf(acf) 

87 if correlation_length is None: 87 ↛ 88line 87 didn't jump to line 88, because the condition on line 87 was never true

88 return None 

89 return correlation_length 

90 

91 

92def get_error_estimate(data: np.ndarray, confidence: float = 0.95) -> Optional[float]: 

93 """Returns estimate of standard error :math:`\\mathrm{error}` 

94 with confidence interval. 

95 

96 .. math:: 

97 

98 \\mathrm{error} = t_\\mathrm{factor} * \\mathrm{std}(\\mathrm{data}) / \\sqrt{N_s} 

99 

100 where :math:`t_{factor}` is the factor corresponding to the confidence 

101 interval and :math:`N_s` is the number of independent measurements 

102 (with correlation taken into account). 

103 

104 If the correlation length cannot be computed since the ACF is 

105 unconverged the function returns `None`. 

106 

107 Parameters 

108 ---------- 

109 data 

110 data series for which to estimate the error 

111 

112 Returns 

113 ------- 

114 error estimate 

115 

116 """ 

117 correlation_length = get_correlation_length(data) 

118 if correlation_length is None: 118 ↛ 119line 118 didn't jump to line 119, because the condition on line 118 was never true

119 return None 

120 error_estimate = _estimate_error(data, correlation_length, confidence) 

121 return error_estimate 

122 

123 

124def _estimate_correlation_length_from_acf(acf: np.ndarray) -> Optional[int]: 

125 """Estimates correlation length from acf. Returns None if the ACF is 

126 uncoverged.""" 

127 for i, a in enumerate(acf): 

128 if a < np.exp(-2): 

129 return i 

130 return None # np.nan 

131 

132 

133def _estimate_error(data: np.ndarray, 

134 correlation_length: int, 

135 confidence: float) -> float: 

136 """ Estimates error using correlation length. """ 

137 t_factor = scipy.stats.t.ppf((1 + confidence) / 2, len(data)-1) # type: float 

138 error = t_factor * np.std(data) / np.sqrt(len(data) / correlation_length) # type: float 

139 return error