src.FeatureSelectionMethods.PearsonCorrelation

 1# ************************************************************************************************************************* #
 2#   UTC Header                                                                                                              #
 3#                                                         ::::::::::::::::::::       :::    ::: :::::::::::  ::::::::       #
 4#      PearsonCorrelation.py                              ::::::::::::::::::::       :+:    :+:     :+:     :+:    :+:      #
 5#                                                         ::::::::::::::+++#####+++  +:+    +:+     +:+     +:+             #
 6#      By: branlyst and ismailkad < >                     ::+++##############+++     +:+    +:+     +:+     +:+             #
 7#                                                     +++##############+++::::       +#+    +:+     +#+     +#+             #
 8#                                                       +++##+++::::::::::::::       +#+    +:+     +#+     +#+             #
 9#                                                         ::::::::::::::::::::       +#+    +#+     +#+     +#+             #
10#                                                         ::::::::::::::::::::       #+#    #+#     #+#     #+#    #+#      #
11#      Update: 2022/06/16 17:42:24 by branlyst and ismai  ::::::::::::::::::::        ########      ###      ######## .fr   #
12#                                                                                                                           #
13# ************************************************************************************************************************* #
14
15from src.FeatureSelectionMethods.TemplateMethod import TemplateMethod
16
17
18class PearsonCorrelation(TemplateMethod):
19    """
20    PearsonCorrelation is a class which implements the TemplateMethods in order to implement the Pearson Correlation feature selection
21    """
22
23    def __init__(self):
24        TemplateMethod.__init__(self, "PearsonCorrelation")
25
26    def select(self, dataframe, target_columns, number_of_target_to_keep=1):
27        target_correlation = dataframe.corr()[target_columns]
28        self._score = abs(target_correlation)
29
30        self._selected_features = dict()
31        for target_column in target_columns:
32            self._selected_features[target_column] = list(
33                self._score.sort_values(by=target_column, ascending=False)[
34                    :number_of_target_to_keep
35                ].index
36            )
class PearsonCorrelation(src.FeatureSelectionMethods.TemplateMethod.TemplateMethod):
19class PearsonCorrelation(TemplateMethod):
20    """
21    PearsonCorrelation is a class which implements the TemplateMethods in order to implement the Pearson Correlation feature selection
22    """
23
24    def __init__(self):
25        TemplateMethod.__init__(self, "PearsonCorrelation")
26
27    def select(self, dataframe, target_columns, number_of_target_to_keep=1):
28        target_correlation = dataframe.corr()[target_columns]
29        self._score = abs(target_correlation)
30
31        self._selected_features = dict()
32        for target_column in target_columns:
33            self._selected_features[target_column] = list(
34                self._score.sort_values(by=target_column, ascending=False)[
35                    :number_of_target_to_keep
36                ].index
37            )

PearsonCorrelation is a class which implements the TemplateMethods in order to implement the Pearson Correlation feature selection

PearsonCorrelation()
24    def __init__(self):
25        TemplateMethod.__init__(self, "PearsonCorrelation")
def select(self, dataframe, target_columns, number_of_target_to_keep=1)
27    def select(self, dataframe, target_columns, number_of_target_to_keep=1):
28        target_correlation = dataframe.corr()[target_columns]
29        self._score = abs(target_correlation)
30
31        self._selected_features = dict()
32        for target_column in target_columns:
33            self._selected_features[target_column] = list(
34                self._score.sort_values(by=target_column, ascending=False)[
35                    :number_of_target_to_keep
36                ].index
37            )

Select abstract method. Must be implemented.

Args
  • dataframe (DataFrame) : dataframe which contains the data used to apply the feature selection. 1 column by feature and 1 line by entry
  • target_columns (str[]) : array of the target column names used to apply the feature selection
  • number_of_target_to_keep (int | None) : number of target to keep to select features. If None, algorithm will try to find the best compromise