1#Python, pandas
2#Count missing values for each column of the dataframe df
3
4df.isnull().sum()
5
1# Six(6) ways to handle NaN values
2
3# 1. Drop/delete any rows with NaN values
4df.dropna(axis = 0) #row is axis = 0
5# 2. Drop/delete any columns with NaN values
6df.dropna(axis = 1) #column is axis = 1
7# 3. Replace all NaN values with 0
8df.fillna(0)
9# 4. Replace NaN values with the previous value in the column, Fill Forward
10df.fillna(method = 'ffill', axis = 0) #OR axis = 1 for rows
11# 5. Replace NaN values with the next value in the column, Fill Backward
12df.fillna(method = 'backfill', axis = 0) #OR axis =1 for rows
13# 6. replace NaN values by using linear interpolation using column values
14df.interpolate(method = 'linear', axis = 0) #OR axis = 1 for rows
15
16#NB: 1. For the last three options, depending on the method, changes to NaN
17# in the first row, last row, first column or last column may not be effected.
18# 2. Remember to include inplace = True if you want the original dataframe to
19#be modified, else the changes will revert back to the original when you
20#reference the dataframe again. Eg.
21df.dropna(axis = 0, inplace = True)
1In [27]: df
2Out[27]:
3 A B C
41 NaN -2.027325 1.533582
52 NaN NaN 0.461821
63 -0.788073 NaN NaN
74 -0.916080 -0.612343 NaN
85 -0.887858 1.033826 NaN
9
10In [28]: df.isnull().sum() # Returns the sum of NaN values in each column.
11Out[28]:
12A 2
13B 2
14C 3
15
16In [29]: df.isnull().sum().sum # Returns the total NaN values in the dataframe
17Out[29]:
187