Here's other way of getting Zscore using custom function:
In [6]: import pandas as pd; import numpy as np
In [7]: np.random.seed(0) # Fixes the random seed
In [8]: df = pd.DataFrame(np.random.randn(5,3), columns=["randomA", "randomB","randomC"])
In [9]: df # watch output of dataframe
Out[9]:
randomA randomB randomC
0 1.764052 0.400157 0.978738
1 2.240893 1.867558 -0.977278
2 0.950088 -0.151357 -0.103219
3 0.410599 0.144044 1.454274
4 0.761038 0.121675 0.443863
## Create custom function to compute Zscore
In [10]: def z_score(df):
....: df.columns = [x + "_zscore" for x in df.columns.tolist()]
....: return ((df - df.mean())/df.std(ddof=0))
....:
## make sure you filter or select columns of interest before passing dataframe to function
In [11]: z_score(df) # compute Zscore
Out[11]:
randomA_zscore randomB_zscore randomC_zscore
0 0.798350 -0.106335 0.731041
1 1.505002 1.939828 -1.577295
2 -0.407899 -0.875374 -0.545799
3 -1.207392 -0.463464 1.292230
4 -0.688061 -0.494655 0.099824
In [12]: from scipy.stats import zscore
In [13]: df.apply(zscore) # (Credit: Manuel)
Out[13]:
randomA randomB randomC
0 0.798350 -0.106335 0.731041
1 1.505002 1.939828 -1.577295
2 -0.407899 -0.875374 -0.545799
3 -1.207392 -0.463464 1.292230
4 -0.688061 -0.494655 0.099824