Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

import pandas as pd 

import numpy as np 

import scipy 

 

 

def analyze_data(data: np.ndarray, max_lag: int = None) -> dict: 

""" Carries out an extensive analysis of the data series. 

 

Parameters 

---------- 

data 

data series to compute autocorrelation function for 

max_lag 

maximum lag between two data points, used for computing autocorrelation 

 

Returns 

------- 

dict 

calculated properties of the data including, mean, standard deviation, 

correlation length and a 95% error estimate. 

""" 

acf = get_autocorrelation_function(data, max_lag) 

correlation_length = _estimate_correlation_length_from_acf(acf) 

error_estimate = _estimate_error(data, correlation_length, confidence=0.95) 

summary = dict(mean=data.mean(), 

std=data.std(), 

correlation_length=correlation_length, 

error_estimate=error_estimate) 

return summary 

 

 

def get_autocorrelation_function(data: np.ndarray, max_lag: int = None) -> np.ndarray: 

""" Returns autocorrelation function. 

 

The autocorrelation function is computed using Pandas.Series.autocorr 

 

Parameters 

---------- 

data 

data series to compute autocorrelation function for 

max_lag 

maximum lag between two data points 

 

Returns 

------- 

calculated autocorrelation function 

""" 

48 ↛ 50line 48 didn't jump to line 50, because the condition on line 48 was never false if max_lag is None: 

max_lag = len(data) - 1 

50 ↛ 51line 50 didn't jump to line 51, because the condition on line 50 was never true if 1 > max_lag >= len(data): 

raise ValueError('max_lag should be between 1 and len(data)-1.') 

series = pd.Series(data) 

acf = [series.autocorr(lag) for lag in range(0, max_lag)] 

return np.array(acf) 

 

 

def get_correlation_length(data: np.ndarray) -> int: 

""" Returns estimate of the correlation length of data. 

 

The correlation length is taken as the first point where the 

autocorrelation functions is less than exp(-2). 

 

If correlation function never goes below exp(-2) then np.nan is returned 

 

Parameters 

---------- 

data 

data series to compute autocorrelation function for 

 

Returns 

------- 

correlation length 

""" 

 

acf = get_autocorrelation_function(data) 

correlation_length = _estimate_correlation_length_from_acf(acf) 

return correlation_length 

 

 

def get_error_estimate(data: np.ndarray, confidence: float = 0.95) -> float: 

""" Returns estimate of standard error with confidence interval. 

 

error = t_factor * std(data) / sqrt(Ns) 

where t_factor is the factor corresponding to the confidence interval 

Ns is the number of independent measurements (with correlation taken 

into account) 

 

Parameters 

---------- 

data 

data series to to estimate error for 

 

Returns 

------- 

error estimate 

""" 

correlation_length = get_correlation_length(data) 

error_estimate = _estimate_error(data, correlation_length, confidence) 

return error_estimate 

 

 

def _estimate_correlation_length_from_acf(acf: np.ndarray) -> int: 

""" Estimate correlation length from acf """ 

lengths = np.where(acf < np.exp(-2))[0] # ACF < exp(-2) 

if len(lengths) == 0: 

return np.nan 

else: 

return lengths[0] 

 

 

def _estimate_error(data: np.ndarray, correlation_length: int, 

confidence: float) -> float: 

""" Estimate error using correlation length""" 

t_factor = scipy.stats.t.ppf((1 + confidence) / 2, len(data)-1) 

error = t_factor * np.std(data) / np.sqrt(len(data) / correlation_length) 

return error