src.FeatureSelectionMethods.PearsonCorrelation
1# ************************************************************************************************************************* # 2# UTC Header # 3# :::::::::::::::::::: ::: ::: ::::::::::: :::::::: # 4# PearsonCorrelation.py :::::::::::::::::::: :+: :+: :+: :+: :+: # 5# ::::::::::::::+++#####+++ +:+ +:+ +:+ +:+ # 6# By: branlyst and ismailkad < > ::+++##############+++ +:+ +:+ +:+ +:+ # 7# +++##############+++:::: +#+ +:+ +#+ +#+ # 8# +++##+++:::::::::::::: +#+ +:+ +#+ +#+ # 9# :::::::::::::::::::: +#+ +#+ +#+ +#+ # 10# :::::::::::::::::::: #+# #+# #+# #+# #+# # 11# Update: 2022/06/16 17:42:24 by branlyst and ismai :::::::::::::::::::: ######## ### ######## .fr # 12# # 13# ************************************************************************************************************************* # 14 15from src.FeatureSelectionMethods.TemplateMethod import TemplateMethod 16 17 18class PearsonCorrelation(TemplateMethod): 19 """ 20 PearsonCorrelation is a class which implements the TemplateMethods in order to implement the Pearson Correlation feature selection 21 """ 22 23 def __init__(self): 24 TemplateMethod.__init__(self, "PearsonCorrelation") 25 26 def select(self, dataframe, target_columns, number_of_target_to_keep=1): 27 target_correlation = dataframe.corr()[target_columns] 28 self._score = abs(target_correlation) 29 30 self._selected_features = dict() 31 for target_column in target_columns: 32 self._selected_features[target_column] = list( 33 self._score.sort_values(by=target_column, ascending=False)[ 34 :number_of_target_to_keep 35 ].index 36 )
19class PearsonCorrelation(TemplateMethod): 20 """ 21 PearsonCorrelation is a class which implements the TemplateMethods in order to implement the Pearson Correlation feature selection 22 """ 23 24 def __init__(self): 25 TemplateMethod.__init__(self, "PearsonCorrelation") 26 27 def select(self, dataframe, target_columns, number_of_target_to_keep=1): 28 target_correlation = dataframe.corr()[target_columns] 29 self._score = abs(target_correlation) 30 31 self._selected_features = dict() 32 for target_column in target_columns: 33 self._selected_features[target_column] = list( 34 self._score.sort_values(by=target_column, ascending=False)[ 35 :number_of_target_to_keep 36 ].index 37 )
PearsonCorrelation is a class which implements the TemplateMethods in order to implement the Pearson Correlation feature selection
def
select(self, dataframe, target_columns, number_of_target_to_keep=1)
27 def select(self, dataframe, target_columns, number_of_target_to_keep=1): 28 target_correlation = dataframe.corr()[target_columns] 29 self._score = abs(target_correlation) 30 31 self._selected_features = dict() 32 for target_column in target_columns: 33 self._selected_features[target_column] = list( 34 self._score.sort_values(by=target_column, ascending=False)[ 35 :number_of_target_to_keep 36 ].index 37 )
Select abstract method. Must be implemented.
Args
- dataframe (DataFrame) : dataframe which contains the data used to apply the feature selection. 1 column by feature and 1 line by entry
- target_columns (str[]) : array of the target column names used to apply the feature selection
- number_of_target_to_keep (int | None) : number of target to keep to select features. If None, algorithm will try to find the best compromise