utils
1# ************************************************************************************************************************* # 2# UTC Header # 3# :::::::::::::::::::: ::: ::: ::::::::::: :::::::: # 4# utils.py :::::::::::::::::::: :+: :+: :+: :+: :+: # 5# ::::::::::::::+++#####+++ +:+ +:+ +:+ +:+ # 6# By: branlyst and ismailkad < > ::+++##############+++ +:+ +:+ +:+ +:+ # 7# +++##############+++:::: +#+ +:+ +#+ +#+ # 8# +++##+++:::::::::::::: +#+ +:+ +#+ +#+ # 9# :::::::::::::::::::: +#+ +#+ +#+ +#+ # 10# :::::::::::::::::::: #+# #+# #+# #+# #+# # 11# Update: 2022/06/08 15:23:32 by branlyst and ismai :::::::::::::::::::: ######## ### ######## .fr # 12# # 13# ************************************************************************************************************************* # 14 15import pandas as pd 16import numpy as np 17from statsmodels.tsa.stattools import adfuller 18 19 20def adf_test(series, title="", verbose=False): 21 """ 22 Pass in a time series and an optional title, returns an ADF report 23 """ 24 result = adfuller( 25 series.dropna(), autolag="AIC" 26 ) # .dropna() handles differenced data 27 labels = ["ADF test statistic", "p-value", "# lags used", "# observations"] 28 out = pd.Series(result[0:4], index=labels) 29 for key, val in result[4].items(): 30 out[f"critical value ({key})"] = val 31 if verbose == True: 32 print(f"Augmented Dickey-Fuller Test: {title}") 33 print(out.to_string()) # .to_string() removes the line "dtype: float64" 34 if result[1] <= 0.05: 35 if verbose == True: 36 print(f"Strong evidence against the null hypothesis for {series.name}") 37 print("Reject the null hypothesis") 38 print("Data has no unit root and is stationary") 39 return True 40 else: 41 if verbose == True: 42 print(f"Weak evidence against the null hypothesis for {series.name}") 43 print("Fail to reject the null hypothesis") 44 print("Data has a unit root and is non-stationary") 45 return False 46 47 48def is_stationary(ts): 49 """ 50 Check for stationarity of time series composing a dataframe or a series 51 returns a boolean 52 """ 53 54 if isinstance(ts, pd.Series): 55 return adf_test(ts) 56 elif isinstance(ts, pd.DataFrame): 57 for c in ts.columns: 58 if not adf_test(ts[c]): 59 return False 60 return True 61 else: 62 print("Wrong input") 63 return False 64 65 66def stationary_dataframe(dataframe, verbose=False): 67 """ 68 Pass in a dataframe, checks for stationarity for each series with adf test and if not verified performs differentiation 69 returns a dataframe with each series verifying stationarity property 70 """ 71 df = dataframe 72 diff = 0 73 while not is_stationary(df): 74 df = df.diff().dropna() 75 diff += 1 76 if verbose: 77 print("Number of times dataframe got differed: ", diff) 78 return df, diff 79 80 81def symmetrize(df): 82 A = df 83 if not isinstance(df, np.ndarray): 84 A = df.to_numpy() 85 n_row, n_col = A.shape 86 87 if n_row != n_col: 88 print("Please use a square matrix") 89 return 0 90 91 for i in range(0, n_row): 92 for j in range(i + 1): 93 if i == j: 94 A[i, j] = 0 95 A[i, j] = 1 - max(A[i, j], A[j, i]) 96 A[j, i] = A[i, j] 97 98 return A
def
adf_test(series, title='', verbose=False)
21def adf_test(series, title="", verbose=False): 22 """ 23 Pass in a time series and an optional title, returns an ADF report 24 """ 25 result = adfuller( 26 series.dropna(), autolag="AIC" 27 ) # .dropna() handles differenced data 28 labels = ["ADF test statistic", "p-value", "# lags used", "# observations"] 29 out = pd.Series(result[0:4], index=labels) 30 for key, val in result[4].items(): 31 out[f"critical value ({key})"] = val 32 if verbose == True: 33 print(f"Augmented Dickey-Fuller Test: {title}") 34 print(out.to_string()) # .to_string() removes the line "dtype: float64" 35 if result[1] <= 0.05: 36 if verbose == True: 37 print(f"Strong evidence against the null hypothesis for {series.name}") 38 print("Reject the null hypothesis") 39 print("Data has no unit root and is stationary") 40 return True 41 else: 42 if verbose == True: 43 print(f"Weak evidence against the null hypothesis for {series.name}") 44 print("Fail to reject the null hypothesis") 45 print("Data has a unit root and is non-stationary") 46 return False
Pass in a time series and an optional title, returns an ADF report
def
is_stationary(ts)
49def is_stationary(ts): 50 """ 51 Check for stationarity of time series composing a dataframe or a series 52 returns a boolean 53 """ 54 55 if isinstance(ts, pd.Series): 56 return adf_test(ts) 57 elif isinstance(ts, pd.DataFrame): 58 for c in ts.columns: 59 if not adf_test(ts[c]): 60 return False 61 return True 62 else: 63 print("Wrong input") 64 return False
Check for stationarity of time series composing a dataframe or a series returns a boolean
def
stationary_dataframe(dataframe, verbose=False)
67def stationary_dataframe(dataframe, verbose=False): 68 """ 69 Pass in a dataframe, checks for stationarity for each series with adf test and if not verified performs differentiation 70 returns a dataframe with each series verifying stationarity property 71 """ 72 df = dataframe 73 diff = 0 74 while not is_stationary(df): 75 df = df.diff().dropna() 76 diff += 1 77 if verbose: 78 print("Number of times dataframe got differed: ", diff) 79 return df, diff
Pass in a dataframe, checks for stationarity for each series with adf test and if not verified performs differentiation returns a dataframe with each series verifying stationarity property
def
symmetrize(df)
82def symmetrize(df): 83 A = df 84 if not isinstance(df, np.ndarray): 85 A = df.to_numpy() 86 n_row, n_col = A.shape 87 88 if n_row != n_col: 89 print("Please use a square matrix") 90 return 0 91 92 for i in range(0, n_row): 93 for j in range(i + 1): 94 if i == j: 95 A[i, j] = 0 96 A[i, j] = 1 - max(A[i, j], A[j, i]) 97 A[j, i] = A[i, j] 98 99 return A