utils

View Source

 1# ************************************************************************************************************************* #
 2#   UTC Header                                                                                                              #
 3#                                                         ::::::::::::::::::::       :::    ::: :::::::::::  ::::::::       #
 4#      utils.py                                           ::::::::::::::::::::       :+:    :+:     :+:     :+:    :+:      #
 5#                                                         ::::::::::::::+++#####+++  +:+    +:+     +:+     +:+             #
 6#      By: branlyst and ismailkad < >                     ::+++##############+++     +:+    +:+     +:+     +:+             #
 7#                                                     +++##############+++::::       +#+    +:+     +#+     +#+             #
 8#                                                       +++##+++::::::::::::::       +#+    +:+     +#+     +#+             #
 9#                                                         ::::::::::::::::::::       +#+    +#+     +#+     +#+             #
10#                                                         ::::::::::::::::::::       #+#    #+#     #+#     #+#    #+#      #
11#      Update: 2022/06/08 15:23:32 by branlyst and ismai  ::::::::::::::::::::        ########      ###      ######## .fr   #
12#                                                                                                                           #
13# ************************************************************************************************************************* #
14
15import pandas as pd
16import numpy as np
17from statsmodels.tsa.stattools import adfuller
18
19
20def adf_test(series, title="", verbose=False):
21    """
22    Pass in a time series and an optional title, returns an ADF report
23    """
24    result = adfuller(
25        series.dropna(), autolag="AIC"
26    )  # .dropna() handles differenced data
27    labels = ["ADF test statistic", "p-value", "# lags used", "# observations"]
28    out = pd.Series(result[0:4], index=labels)
29    for key, val in result[4].items():
30        out[f"critical value ({key})"] = val
31    if verbose == True:
32        print(f"Augmented Dickey-Fuller Test: {title}")
33        print(out.to_string())  # .to_string() removes the line "dtype: float64"
34    if result[1] <= 0.05:
35        if verbose == True:
36            print(f"Strong evidence against the null hypothesis for {series.name}")
37            print("Reject the null hypothesis")
38            print("Data has no unit root and is stationary")
39        return True
40    else:
41        if verbose == True:
42            print(f"Weak evidence against the null hypothesis for {series.name}")
43            print("Fail to reject the null hypothesis")
44            print("Data has a unit root and is non-stationary")
45        return False
46
47
48def is_stationary(ts):
49    """
50    Check for stationarity of time series composing a dataframe or a series
51    returns a boolean
52    """
53
54    if isinstance(ts, pd.Series):
55        return adf_test(ts)
56    elif isinstance(ts, pd.DataFrame):
57        for c in ts.columns:
58            if not adf_test(ts[c]):
59                return False
60        return True
61    else:
62        print("Wrong input")
63        return False
64
65
66def stationary_dataframe(dataframe, verbose=False):
67    """
68    Pass in a dataframe, checks for stationarity for each series with adf test and if not verified performs differentiation
69    returns a dataframe with each series verifying stationarity property
70    """
71    df = dataframe
72    diff = 0
73    while not is_stationary(df):
74        df = df.diff().dropna()
75        diff += 1
76    if verbose:
77        print("Number of times dataframe got differed: ", diff)
78    return df, diff
79
80
81def symmetrize(df):
82    A = df
83    if not isinstance(df, np.ndarray):
84        A = df.to_numpy()
85    n_row, n_col = A.shape
86
87    if n_row != n_col:
88        print("Please use a square matrix")
89        return 0
90
91    for i in range(0, n_row):
92        for j in range(i + 1):
93            if i == j:
94                A[i, j] = 0
95            A[i, j] = 1 - max(A[i, j], A[j, i])
96            A[j, i] = A[i, j]
97
98    return A

def adf_test(series, title='', verbose=False) View Source

21def adf_test(series, title="", verbose=False):
22    """
23    Pass in a time series and an optional title, returns an ADF report
24    """
25    result = adfuller(
26        series.dropna(), autolag="AIC"
27    )  # .dropna() handles differenced data
28    labels = ["ADF test statistic", "p-value", "# lags used", "# observations"]
29    out = pd.Series(result[0:4], index=labels)
30    for key, val in result[4].items():
31        out[f"critical value ({key})"] = val
32    if verbose == True:
33        print(f"Augmented Dickey-Fuller Test: {title}")
34        print(out.to_string())  # .to_string() removes the line "dtype: float64"
35    if result[1] <= 0.05:
36        if verbose == True:
37            print(f"Strong evidence against the null hypothesis for {series.name}")
38            print("Reject the null hypothesis")
39            print("Data has no unit root and is stationary")
40        return True
41    else:
42        if verbose == True:
43            print(f"Weak evidence against the null hypothesis for {series.name}")
44            print("Fail to reject the null hypothesis")
45            print("Data has a unit root and is non-stationary")
46        return False

Pass in a time series and an optional title, returns an ADF report

def is_stationary(ts) View Source

49def is_stationary(ts):
50    """
51    Check for stationarity of time series composing a dataframe or a series
52    returns a boolean
53    """
54
55    if isinstance(ts, pd.Series):
56        return adf_test(ts)
57    elif isinstance(ts, pd.DataFrame):
58        for c in ts.columns:
59            if not adf_test(ts[c]):
60                return False
61        return True
62    else:
63        print("Wrong input")
64        return False

Check for stationarity of time series composing a dataframe or a series returns a boolean

def stationary_dataframe(dataframe, verbose=False) View Source

67def stationary_dataframe(dataframe, verbose=False):
68    """
69    Pass in a dataframe, checks for stationarity for each series with adf test and if not verified performs differentiation
70    returns a dataframe with each series verifying stationarity property
71    """
72    df = dataframe
73    diff = 0
74    while not is_stationary(df):
75        df = df.diff().dropna()
76        diff += 1
77    if verbose:
78        print("Number of times dataframe got differed: ", diff)
79    return df, diff

Pass in a dataframe, checks for stationarity for each series with adf test and if not verified performs differentiation returns a dataframe with each series verifying stationarity property

def symmetrize(df) View Source

82def symmetrize(df):
83    A = df
84    if not isinstance(df, np.ndarray):
85        A = df.to_numpy()
86    n_row, n_col = A.shape
87
88    if n_row != n_col:
89        print("Please use a square matrix")
90        return 0
91
92    for i in range(0, n_row):
93        for j in range(i + 1):
94            if i == j:
95                A[i, j] = 0
96            A[i, j] = 1 - max(A[i, j], A[j, i])
97            A[j, i] = A[i, j]
98
99    return A