Generate frequent itemsets using Apriori Algorithm in python and also generate association
rules for any market basket data.
import pandas as pd
[2]: import numpy as np
[3]: import matplotlib.pyplot as plt
[4]: pip install mlxtend
Requirement already satisfied: mlxtend in /usr/local/lib/python3.10/distpackages (0.22.0)
Requirement already satisfied: scipy>=1.2.1 in /usr/local/lib/python3.10/distpackages (from mlxtend) (1.11.4)
Requirement already satisfied: numpy>=1.16.2 in /usr/local/lib/python3.10/distpackages (from mlxtend) (1.25.2)
Requirement already satisfied: pandas>=0.24.2 in /usr/local/lib/python3.10/distpackages (from mlxtend) (1.5.3)
Requirement already satisfied: scikit-learn>=1.0.2 in
/usr/local/lib/python3.10/dist-packages (from mlxtend) (1.2.2)
Requirement already satisfied: matplotlib>=3.0.0 in
/usr/local/lib/python3.10/dist-packages (from mlxtend) (3.7.1)
Requirement already satisfied: joblib>=0.13.2 in /usr/local/lib/python3.10/distpackages (from mlxtend) (1.3.2)
Requirement already satisfied: setuptools in /usr/local/lib/python3.10/distpackages (from mlxtend) (67.7.2)
Requirement already satisfied: contourpy>=1.0.1 in
/usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->mlxtend)
(1.2.0)
Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/distpackages (from matplotlib>=3.0.0->mlxtend) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in
/usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->mlxtend)
(4.49.0)
Requirement already satisfied: kiwisolver>=1.0.1 in
/usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->mlxtend)
(1.4.5)
1
Requirement already satisfied: packaging>=20.0 in
/usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->mlxtend) (23.2)
Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/distpackages (from matplotlib>=3.0.0->mlxtend) (9.4.0)
Requirement already satisfied: pyparsing>=2.3.1 in
/usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->mlxtend)
(3.1.2)
Requirement already satisfied: python-dateutil>=2.7 in
/usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->mlxtend)
(2.8.2)
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/distpackages (from pandas>=0.24.2->mlxtend) (2023.4)
Requirement already satisfied: threadpoolctl>=2.0.0 in
/usr/local/lib/python3.10/dist-packages (from scikit-learn>=1.0.2->mlxtend)
(3.3.0)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/distpackages (from python-dateutil>=2.7->matplotlib>=3.0.0->mlxtend) (1.16.0)
[5]: from mlxtend.frequent_patterns import apriori, association_rules
[6]: df = pd.read_csv('retail.csv')
/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283:
DeprecationWarning: `should_run_async` will not call `transform_cell`
automatically in the future. Please pass the result to `transformed_cell`
argument and any exception that happen during thetransform in
`preprocessing_exc_tuple` in IPython 7.17 and above.
and should_run_async(code)
[7]: df.head()
/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283:
DeprecationWarning: `should_run_async` will not call `transform_cell`
automatically in the future. Please pass the result to `transformed_cell`
argument and any exception that happen during thetransform in
`preprocessing_exc_tuple` in IPython 7.17 and above.
and should_run_async(code)
[7]: 0 1 2 3 4 5 6
0 Bread Wine Eggs Meat Cheese Pencil Diaper
1 Bread Cheese Meat Diaper Wine Milk Pencil
2 Cheese Meat Eggs Milk Wine NaN NaN
3 Cheese Meat Eggs Milk Wine NaN NaN
4 Meat Pencil Wine NaN NaN NaN NaN
[8]: items = set()
/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283:
2
DeprecationWarning: `should_run_async` will not call `transform_cell`
automatically in the future. Please pass the result to `transformed_cell`
argument and any exception that happen during thetransform in
`preprocessing_exc_tuple` in IPython 7.17 and above.
and should_run_async(code)
[9]: for col in df:
items.update(df[col].unique())
/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283:
DeprecationWarning: `should_run_async` will not call `transform_cell`
automatically in the future. Please pass the result to `transformed_cell`
argument and any exception that happen during thetransform in
`preprocessing_exc_tuple` in IPython 7.17 and above.
and should_run_async(code)
[10]: print(items)
{'Bread', 'Bagel', 'Meat', 'Wine', 'Milk', 'Pencil', 'Cheese', 'Eggs', 'Diaper',
nan}
/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283:
DeprecationWarning: `should_run_async` will not call `transform_cell`
automatically in the future. Please pass the result to `transformed_cell`
argument and any exception that happen during thetransform in
`preprocessing_exc_tuple` in IPython 7.17 and above.
and should_run_async(code)
[11]: itemset = set(items)
/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283:
DeprecationWarning: `should_run_async` will not call `transform_cell`
automatically in the future. Please pass the result to `transformed_cell`
argument and any exception that happen during thetransform in
`preprocessing_exc_tuple` in IPython 7.17 and above.
and should_run_async(code)
[12]: enncoded_vals = []
/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283:
DeprecationWarning: `should_run_async` will not call `transform_cell`
automatically in the future. Please pass the result to `transformed_cell`
argument and any exception that happen during thetransform in
`preprocessing_exc_tuple` in IPython 7.17 and above.
and should_run_async(code)
[16]: for index, row in df.iterrows():
rowset = set(row)
3
labels = {}
uncommons = list(itemset - rowset)
commons = list(itemset.intersection(rowset))
for uc in uncommons:
labels[uc] = 0
for c in commons:
labels[c] = 1
enncoded_vals.append(labels)
enncoded_vals[0]
ohe_df = pd.DataFrame(enncoded_vals)
/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283:
DeprecationWarning: `should_run_async` will not call `transform_cell`
automatically in the future. Please pass the result to `transformed_cell`
argument and any exception that happen during thetransform in
`preprocessing_exc_tuple` in IPython 7.17 and above.
and should_run_async(code)
[17]: ohe_df
/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283:
DeprecationWarning: `should_run_async` will not call `transform_cell`
automatically in the future. Please pass the result to `transformed_cell`
argument and any exception that happen during thetransform in
`preprocessing_exc_tuple` in IPython 7.17 and above.
and should_run_async(code)
[17]: Milk NaN Bagel Bread Meat Wine Pencil Cheese Eggs Diaper
0 0 0 0 1 1 1 1 1 1 1
1 1 0 0 1 1 1 1 1 0 1
2 1 1 0 0 1 1 0 1 1 0
3 1 1 0 0 1 1 0 1 1 0
4 0 1 0 0 1 1 1 0 0 0
.. … … … … … … … … … …
310 0 1 0 1 0 0 0 1 1 0
311 1 1 0 0 1 0 1 0 0 0
312 0 0 0 1 1 1 1 1 1 1
313 0 1 0 0 1 0 0 1 0 0
314 0 1 1 1 1 1 0 0 1 0
[315 rows x 10 columns]
[18]: freq_items = apriori(ohe_df,min_support=0.2,use_colnames=True)
/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283:
DeprecationWarning: `should_run_async` will not call `transform_cell`
automatically in the future. Please pass the result to `transformed_cell`
argument and any exception that happen during thetransform in
4
`preprocessing_exc_tuple` in IPython 7.17 and above.
and should_run_async(code)
/usr/local/lib/python3.10/distpackages/mlxtend/frequent_patterns/fpcommon.py:110: DeprecationWarning:
DataFrames with non-bool types result in worse computationalperformance and
their support might be discontinued in the future.Please use a DataFrame with
bool type
warnings.warn(
[19]: freq_items
/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283:
DeprecationWarning: `should_run_async` will not call `transform_cell`
automatically in the future. Please pass the result to `transformed_cell`
argument and any exception that happen during thetransform in
`preprocessing_exc_tuple` in IPython 7.17 and above.
and should_run_async(code)
[19]: support itemsets
0 0.501587 (Milk)
1 0.869841 (nan)
2 0.425397 (Bagel)
3 0.504762 (Bread)
4 0.476190 (Meat)
5 0.438095 (Wine)
6 0.361905 (Pencil)
7 0.501587 (Cheese)
8 0.438095 (Eggs)
9 0.406349 (Diaper)
10 0.409524 (Milk, nan)
11 0.225397 (Milk, Bagel)
12 0.279365 (Milk, Bread)
13 0.244444 (Meat, Milk)
14 0.219048 (Wine, Milk)
15 0.304762 (Cheese, Milk)
16 0.244444 (Eggs, Milk)
17 0.336508 (nan, Bagel)
18 0.396825 (Bread, nan)
19 0.368254 (Meat, nan)
20 0.317460 (Wine, nan)
21 0.266667 (Pencil, nan)
22 0.393651 (Cheese, nan)
23 0.336508 (Eggs, nan)
24 0.317460 (nan, Diaper)
25 0.279365 (Bread, Bagel)
26 0.206349 (Meat, Bread)
27 0.244444 (Wine, Bread)
5
28 0.200000 (Pencil, Bread)
29 0.238095 (Cheese, Bread)
30 0.231746 (Bread, Diaper)
31 0.250794 (Meat, Wine)
32 0.323810 (Meat, Cheese)
33 0.266667 (Meat, Eggs)
34 0.200000 (Wine, Pencil)
35 0.269841 (Wine, Cheese)
36 0.241270 (Wine, Eggs)
37 0.234921 (Wine, Diaper)
38 0.200000 (Cheese, Pencil)
39 0.298413 (Cheese, Eggs)
40 0.200000 (Cheese, Diaper)
41 0.234921 (Cheese, Milk, nan)
42 0.203175 (Meat, Cheese, Milk)
43 0.212698 (Bread, nan, Bagel)
44 0.234921 (Meat, Cheese, nan)
45 0.219048 (Cheese, Eggs, nan)
46 0.215873 (Meat, Cheese, Eggs)
[22]: freq_items = apriori(ohe_df,min_support=0.2)
/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283:
DeprecationWarning: `should_run_async` will not call `transform_cell`
automatically in the future. Please pass the result to `transformed_cell`
argument and any exception that happen during thetransform in
`preprocessing_exc_tuple` in IPython 7.17 and above.
and should_run_async(code)
/usr/local/lib/python3.10/distpackages/mlxtend/frequent_patterns/fpcommon.py:110: DeprecationWarning:
DataFrames with non-bool types result in worse computationalperformance and
their support might be discontinued in the future.Please use a DataFrame with
bool type
warnings.warn(
[23]: freq_items
/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283:
DeprecationWarning: `should_run_async` will not call `transform_cell`
automatically in the future. Please pass the result to `transformed_cell`
argument and any exception that happen during thetransform in
`preprocessing_exc_tuple` in IPython 7.17 and above.
and should_run_async(code)
[23]: support itemsets
0 0.501587 (0)
1 0.869841 (1)
6
2 0.425397 (2)
3 0.504762 (3)
4 0.476190 (4)
5 0.438095 (5)
6 0.361905 (6)
7 0.501587 (7)
8 0.438095 (8)
9 0.406349 (9)
10 0.409524 (0, 1)
11 0.225397 (0, 2)
12 0.279365 (0, 3)
13 0.244444 (0, 4)
14 0.219048 (0, 5)
15 0.304762 (0, 7)
16 0.244444 (0, 8)
17 0.336508 (1, 2)
18 0.396825 (1, 3)
19 0.368254 (1, 4)
20 0.317460 (1, 5)
21 0.266667 (1, 6)
22 0.393651 (1, 7)
23 0.336508 (8, 1)
24 0.317460 (1, 9)
25 0.279365 (2, 3)
26 0.206349 (3, 4)
27 0.244444 (3, 5)
28 0.200000 (3, 6)
29 0.238095 (3, 7)
30 0.231746 (9, 3)
31 0.250794 (4, 5)
32 0.323810 (4, 7)
33 0.266667 (8, 4)
34 0.200000 (5, 6)
35 0.269841 (5, 7)
36 0.241270 (8, 5)
37 0.234921 (9, 5)
38 0.200000 (6, 7)
39 0.298413 (8, 7)
40 0.200000 (9, 7)
41 0.234921 (0, 1, 7)
42 0.203175 (0, 4, 7)
43 0.212698 (1, 2, 3)
44 0.234921 (1, 4, 7)
45 0.219048 (8, 1, 7)
46 0.215873 (8, 4, 7)
[20]: rules = association_rules(freq_items,metric = "confidence" , min_threshold=0.6)
7
/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283:
DeprecationWarning: `should_run_async` will not call `transform_cell`
automatically in the future. Please pass the result to `transformed_cell`
argument and any exception that happen during thetransform in
`preprocessing_exc_tuple` in IPython 7.17 and above.
and should_run_async(code)
[21]: rules
/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283:
DeprecationWarning: `should_run_async` will not call `transform_cell`
automatically in the future. Please pass the result to `transformed_cell`
argument and any exception that happen during thetransform in
`preprocessing_exc_tuple` in IPython 7.17 and above.
and should_run_async(code)
[21]: antecedents consequents antecedent support consequent support \
0 (Milk) (nan) 0.501587 0.869841
1 (Cheese) (Milk) 0.501587 0.501587
2 (Milk) (Cheese) 0.501587 0.501587
3 (Bagel) (nan) 0.425397 0.869841
4 (Bread) (nan) 0.504762 0.869841
5 (Meat) (nan) 0.476190 0.869841
6 (Wine) (nan) 0.438095 0.869841
7 (Pencil) (nan) 0.361905 0.869841
8 (Cheese) (nan) 0.501587 0.869841
9 (Eggs) (nan) 0.438095 0.869841
10 (Diaper) (nan) 0.406349 0.869841
11 (Bagel) (Bread) 0.425397 0.504762
12 (Meat) (Cheese) 0.476190 0.501587
13 (Cheese) (Meat) 0.501587 0.476190
14 (Eggs) (Meat) 0.438095 0.476190
15 (Wine) (Cheese) 0.438095 0.501587
16 (Eggs) (Cheese) 0.438095 0.501587
17 (Cheese, Milk) (nan) 0.304762 0.869841
18 (Meat, Cheese) (Milk) 0.323810 0.501587
19 (Meat, Milk) (Cheese) 0.244444 0.501587
20 (Cheese, Milk) (Meat) 0.304762 0.476190
21 (Bread, Bagel) (nan) 0.279365 0.869841
22 (nan, Bagel) (Bread) 0.336508 0.504762
23 (Meat, Cheese) (nan) 0.323810 0.869841
24 (Meat, nan) (Cheese) 0.368254 0.501587
25 (Cheese, Eggs) (nan) 0.298413 0.869841
26 (Eggs, nan) (Cheese) 0.336508 0.501587
27 (Meat, Cheese) (Eggs) 0.323810 0.438095
28 (Meat, Eggs) (Cheese) 0.266667 0.501587
29 (Cheese, Eggs) (Meat) 0.298413 0.476190
8
support confidence lift leverage conviction zhangs_metric
0 0.409524 0.816456 0.938626 -0.026778 0.709141 -0.115976
1 0.304762 0.607595 1.211344 0.053172 1.270148 0.350053
2 0.304762 0.607595 1.211344 0.053172 1.270148 0.350053
3 0.336508 0.791045 0.909413 -0.033520 0.622902 -0.147743
4 0.396825 0.786164 0.903801 -0.042237 0.608683 -0.176903
5 0.368254 0.773333 0.889051 -0.045956 0.574230 -0.192405
6 0.317460 0.724638 0.833069 -0.063613 0.472682 -0.262869
7 0.266667 0.736842 0.847100 -0.048133 0.494603 -0.220499
8 0.393651 0.784810 0.902245 -0.042651 0.604855 -0.178565
9 0.336508 0.768116 0.883053 -0.044565 0.561310 -0.190735
10 0.317460 0.781250 0.898152 -0.035999 0.595011 -0.160381
11 0.279365 0.656716 1.301042 0.064641 1.442650 0.402687
12 0.323810 0.680000 1.355696 0.084958 1.557540 0.500891
13 0.323810 0.645570 1.355696 0.084958 1.477891 0.526414
14 0.266667 0.608696 1.278261 0.058050 1.338624 0.387409
15 0.269841 0.615942 1.227986 0.050098 1.297754 0.330409
16 0.298413 0.681159 1.358008 0.078670 1.563203 0.469167
17 0.234921 0.770833 0.886177 -0.030174 0.567965 -0.155938
18 0.203175 0.627451 1.250931 0.040756 1.337845 0.296655
19 0.203175 0.831169 1.657077 0.080564 2.952137 0.524816
20 0.203175 0.666667 1.400000 0.058050 1.571429 0.410959
21 0.212698 0.761364 0.875290 -0.030305 0.545427 -0.165075
22 0.212698 0.632075 1.252225 0.042842 1.346032 0.303578
23 0.234921 0.725490 0.834049 -0.046742 0.474150 -0.227353
24 0.234921 0.637931 1.271825 0.050209 1.376568 0.338313
25 0.219048 0.734043 0.843881 -0.040524 0.489397 -0.208666
26 0.219048 0.650943 1.297767 0.050260 1.427885 0.345815
27 0.215873 0.666667 1.521739 0.074014 1.685714 0.507042
28 0.215873 0.809524 1.613924 0.082116 2.616667 0.518717
29 0.215873 0.723404 1.519149 0.073772 1.893773 0.487091