# License: Apache-2.0
from ._base_scaler import _BaseScaler
from ..util import util
from gators import DataFrame, Series
[docs]class StandardScaler(_BaseScaler):
"""Scale each column by setting the mean to 0 and the standard deviation to 1.
Parameters
----------
inplace : bool, default True.
If True, perform the scaling in-place.
If False, create new columns.
Examples
--------
Imports and initialization:
>>> from gators.scalers import StandardScaler
>>> obj = StandardScaler()
The `fit`, `transform`, and `fit_transform` methods accept:
* `dask` dataframes:
>>> import dask.dataframe as dd
>>> import pandas as pd
>>> X = dd.from_pandas(pd.DataFrame({'A': [1, 2, 3], 'B': [-0.1, 0.2, 0.3]}), npartitions=1)
* `koalas` dataframes:
>>> import databricks.koalas as ks
>>> X = ks.DataFrame({'A': [1, 2, 3], 'B': [-0.1, 0.2, 0.3]})
* and `pandas` dataframes:
>>> import pandas as pd
>>> X = pd.DataFrame({'A': [1, 2, 3], 'B': [-0.1, 0.2, 0.3]})
The result is a transformed dataframe belonging to the same dataframe library.
>>> obj.fit_transform(X)
A B
0 -1.0 -1.120897
1 0.0 0.320256
2 1.0 0.800641
>>> X = pd.DataFrame({'A': [1, 2, 3], 'B': [-0.1, 0.2, 0.3]})
>>> _ = obj.fit(X)
>>> obj.transform_numpy(X.to_numpy())
array([[-1. , -1.12089708],
[ 0. , 0.32025631],
[ 1. , 0.80064077]])
"""
def __init__(self, inplace: bool = True):
_BaseScaler.__init__(self, inplace=inplace)
[docs] def fit(self, X: DataFrame, y: Series = None) -> "StandardScaler":
"""Fit the transformer on the pandas/koalas dataframe X.
Parameters
----------
X : DataFrame.
Input dataframe.
y : Series, default None.
Target values.
Returns
-------
self : 'StandardScaler'
Instance of itself.
"""
self.check_dataframe(X)
self.columns = util.get_numerical_columns(X)
self.idx_columns = util.get_idx_columns(X, self.columns)
self.column_names = self.get_column_names(self.inplace, self.columns, "scale")
self.X_offset = (
util.get_function(X).to_pandas(X[self.columns].mean()).astype(float)
)
self.X_scale = (
1.0 / util.get_function(X).to_pandas(X[self.columns].std())
).astype(float)
self.X_offset_np = util.get_function(self.X_offset).to_numpy(self.X_offset)
self.X_scale_np = util.get_function(self.X_scale).to_numpy(self.X_scale)
self.X_offset = self.X_offset.to_dict()
self.X_scale = self.X_scale.to_dict()
return self