1df['ext price'].value_counts(bins=4, sort=False)
2
3#res
4(55603.988000000005, 87998.212] 5
5(87998.212, 120263.375] 12
6(120263.375, 152528.538] 2
7(152528.538, 184793.7] 1
8Name: ext price, dtype: int64
9
1pd.interval_range(start=0, freq=10000, end=200000, closed='left')
2
3#res
4IntervalIndex([[0, 10000), [10000, 20000), [20000, 30000), [30000, 40000), [40000, 50000) ... [150000, 160000),
5[160000, 170000), [170000, 180000), [180000, 190000), [190000, 200000)],
6 closed='left',
7 dtype='interval[int64]')
8
1interval_range = pd.interval_range(start=0, freq=10000, end=200000)
2df['cut_ex2'] = pd.cut(df['ext price'], bins=interval_range, labels=[1,2,3])
3df.head()
4
5#There is a downside to using interval_range . You can not define custom labels.
1pd.cut(df['ext price'], bins=4).value_counts() #bin range size afre equal
2
3#res
4(87998.212, 120263.375] 12 #different no. of observation
5(55603.989, 87998.212] 5
6(120263.375, 152528.538] 2
7(152528.538, 184793.7] 1
8Name: ext price, dtype: int64
9
10#If you want equal distribution of the items in your bins, use qcut . If you want to define your own numeric bin ranges, then use cut .
1pd.cut(df['ext price'], bins=4)
2#res
30 (55603.989, 87998.212]
41 (87998.212, 120263.375]
52 (55603.989, 87998.212]
63 (120263.375, 152528.538]
74 (87998.212, 120263.375]
8....
914 (87998.212, 120263.375]
1015 (120263.375, 152528.538]
1116 (87998.212, 120263.375]
1217 (87998.212, 120263.375]
1318 (87998.212, 120263.375]
1419 (87998.212, 120263.375]
15Name: ext price, dtype: category
16Categories (4, interval[float64]): [(55603.989, 87998.212] < (87998.212, 120263.375] < (120263.375, 152528.538] < (152528.538, 184793.7]]
1\
2df['quantile_ex_4'] = pd.qcut(df['ext price'],
3 q=[0, .2, .4, .6, .8, 1], #quartiles bin range will vary
4 labels=False, #returns integers as categories
5 precision=0)
6df.head()
7#all bins will have roughly same no. of observation