dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]
import pandas as pd
from mlxtend.preprocessing import OnehotTransactions
from mlxtend.frequent_patterns import apriori
oht = OnehotTransactions()
oht_ary = oht.fit(dataset).transform(dataset)
df = pd.DataFrame(oht_ary, columns=oht.columns_)
print (df)
frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)
print (frequent_itemsets)
association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.2)
print (rules)
"""
Below is the output
support itemsets
0 0.8 [Eggs]
1 1.0 [Kidney Beans]
2 0.6 [Milk]
3 0.6 [Onion]
4 0.6 [Yogurt]
5 0.8 [Eggs, Kidney Beans]
6 0.6 [Eggs, Onion]
7 0.6 [Kidney Beans, Milk]
8 0.6 [Kidney Beans, Onion]
9 0.6 [Kidney Beans, Yogurt]
10 0.6 [Eggs, Kidney Beans, Onion]
antecedants consequents support confidence lift
0 (Kidney Beans, Onion) (Eggs) 0.6 1.00 1.25
1 (Kidney Beans, Eggs) (Onion) 0.8 0.75 1.25
2 (Onion) (Kidney Beans, Eggs) 0.6 1.00 1.25
3 (Eggs) (Kidney Beans, Onion) 0.8 0.75 1.25
4 (Onion) (Eggs) 0.6 1.00 1.25
5 (Eggs) (Onion) 0.8 0.75 1.25
"""