 Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 import pandas as pd  import numpy as np  import scipy      def analyze_data(data: np.ndarray, max_lag: int = None) -> dict:  """ Carries out an extensive analysis of the data series.    Parameters  ----------  data  data series to compute autocorrelation function for  max_lag  maximum lag between two data points, used for computing autocorrelation    Returns  -------  dict  calculated properties of the data including, mean, standard deviation,  correlation length and a 95% error estimate.  """  acf = get_autocorrelation_function(data, max_lag)  correlation_length = _estimate_correlation_length_from_acf(acf)  error_estimate = _estimate_error(data, correlation_length, confidence=0.95)  summary = dict(mean=data.mean(),  std=data.std(),  correlation_length=correlation_length,  error_estimate=error_estimate)  return summary      def get_autocorrelation_function(data: np.ndarray, max_lag: int = None) -> np.ndarray:  """ Returns autocorrelation function.    The autocorrelation function is computed using Pandas.Series.autocorr    Parameters  ----------  data  data series to compute autocorrelation function for  max_lag  maximum lag between two data points    Returns  -------  calculated autocorrelation function  """  48 ↛ 50line 48 didn't jump to line 50, because the condition on line 48 was never false if max_lag is None:  max_lag = len(data) - 1  50 ↛ 51line 50 didn't jump to line 51, because the condition on line 50 was never true if 1 > max_lag >= len(data):  raise ValueError('max_lag should be between 1 and len(data)-1.')  series = pd.Series(data)  acf = [series.autocorr(lag) for lag in range(0, max_lag)]  return np.array(acf)      def get_correlation_length(data: np.ndarray) -> int:  """ Returns estimate of the correlation length of data.    The correlation length is taken as the first point where the  autocorrelation functions is less than exp(-2).    If correlation function never goes below exp(-2) then np.nan is returned    Parameters  ----------  data  data series to compute autocorrelation function for    Returns  -------  correlation length  """    acf = get_autocorrelation_function(data)  correlation_length = _estimate_correlation_length_from_acf(acf)  return correlation_length      def get_error_estimate(data: np.ndarray, confidence: float = 0.95) -> float:  """ Returns estimate of standard error with confidence interval.    error = t_factor * std(data) / sqrt(Ns)  where t_factor is the factor corresponding to the confidence interval  Ns is the number of independent measurements (with correlation taken  into account)    Parameters  ----------  data  data series to to estimate error for    Returns  -------  error estimate  """  correlation_length = get_correlation_length(data)  error_estimate = _estimate_error(data, correlation_length, confidence)  return error_estimate      def _estimate_correlation_length_from_acf(acf: np.ndarray) -> int:  """ Estimate correlation length from acf """  lengths = np.where(acf < np.exp(-2)) # ACF < exp(-2)  if len(lengths) == 0:  return np.nan  else:  return lengths      def _estimate_error(data: np.ndarray, correlation_length: int,  confidence: float) -> float:  """ Estimate error using correlation length"""  t_factor = scipy.stats.t.ppf((1 + confidence) / 2, len(data)-1)  error = t_factor * np.std(data) / np.sqrt(len(data) / correlation_length)  return error