Source code for prepropy.scaler

import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import MinMaxScaler


[docs]def scaler( X_train, X_Valid, X_test, scale_features, scaler_type="StandardScaler" ): """ This function scales numerical features based on scaling requirement Parameters -------- X_train : pandas.core.frame.DataFrame, numpy array or list The DataFrame, numpy array or list X_Valid : pandas.core.frame.DataFrame, numpy array or list The DataFrame, numpy array or list X_test : pandas.core.frame.DataFrame, numpy array or list The DataFrame, numpy array or list scale_features: list of strings The list of numerical features to be scaled scaler_type: string The type of scaling to perform on the numerical columns. Returns -------- dict dict containing three dataframes with scaled features Examples -------- >>>scaler(X_train, X_Valid, X_test, scale_features, scaler_type="MaxAbsScaler") # noqa: E501 """ # Error Checking if scaler_type not in ["StandardScaler", "MinMaxScaler", "MaxAbsScaler"]: raise KeyError( 'Please use scaler "StandardScaler", "MinMaxScaler", "MaxAbsScaler"' # noqa: E501 ) if ( not isinstance(X_train, pd.DataFrame) or not isinstance(X_Valid, pd.DataFrame) or not isinstance(X_test, pd.DataFrame) ): raise TypeError("Input data must be a Pandas Dataframe") if ( X_train.empty or X_Valid.empty or X_test.empty or (len(scale_features) == 0) ): raise ValueError("Inputs cannot be empty") for feature in scale_features: if (X_train[feature].str.isnumeric().sum()) != len(X_train[feature]): raise ValueError("Features should have only numeric values") if (X_Valid[feature].str.isnumeric().sum()) != len(X_Valid[feature]): raise ValueError("Features should have only numeric values") if (X_test[feature].str.isnumeric().sum()) != len(X_test[feature]): raise ValueError("Features should have only numeric values") # Scaling Instance scaled_data = {} if scaler_type == "StandardScaler": scaler_instance = StandardScaler() elif scaler_type == "MinMaxScaler": scaler_instance = MinMaxScaler() elif scaler_type == "MaxAbsScaler": scaler_instance = MaxAbsScaler() # Fitting the data for Scaling scaler_instance.fit(X_train[scale_features]) # Scaling train data X_train_scaled = X_train.copy() X_train_scaled[scale_features] = scaler_instance.transform( X_train[scale_features] ) scaled_data["X_train"] = X_train_scaled # Scaling the validation data X_Valid_scaled = X_Valid.copy() X_Valid_scaled[scale_features] = scaler_instance.transform( X_Valid[scale_features] ) scaled_data["X_Valid"] = X_Valid_scaled # Scaling the test data X_test_scaled = X_test.copy() X_test_scaled[scale_features] = scaler_instance.transform( X_test[scale_features] ) scaled_data["X_test"] = X_test_scaled return scaled_data