Editable Interpretable Models

Load the libraries

import pandas as pd
from sklearn.model_selection import train_test_split
from interpret.glassbox import ExplainableBoostingClassifier
from json import load
from interpret import set_visualize_provider
from interpret.provider import InlineProvider
from interpret import show
import gamchanger as gc
set_visualize_provider(InlineProvider())

Load the data

df = pd.read_csv(
    "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data",
    header=None)

df.columns = [
    "Age", "WorkClass", "fnlwgt", "Education", "EducationNum",
    "MaritalStatus", "Occupation", "Relationship", "Race", "Gender",
    "CapitalGain", "CapitalLoss", "HoursPerWeek", "NativeCountry", "Income"
]

Explore the data

df.head()

	Age	WorkClass	fnlwgt	Education	EducationNum	MaritalStatus	Occupation	Relationship	Race	Gender	CapitalGain	HoursPerWeek	NativeCountry	Income
0	39	State-gov	77516	Bachelors	13	Never-married	Adm-clerical	Not-in-family	White	Male	2174	40	United-States	<=50K
1	50	Self-emp-not-inc	83311	Bachelors	13	Married-civ-spouse	Exec-managerial	Husband	White	Male	0	13	United-States	<=50K
2	38	Private	215646	HS-grad	9	Divorced	Handlers-cleaners	Not-in-family	White	Male	0	40	United-States	<=50K
3	53	Private	234721	11th	7	Married-civ-spouse	Handlers-cleaners	Husband	Black	Male	0	40	United-States	<=50K
4	28	Private	338409	Bachelors	13	Married-civ-spouse	Prof-specialty	Wife	Black	Female	0	40	Cuba	<=50K

df.shape

(32561, 15)

df.isna().sum()

Age              0
WorkClass        0
fnlwgt           0
Education        0
EducationNum     0
MaritalStatus    0
Occupation       0
Relationship     0
Race             0
Gender           0
CapitalGain      0
CapitalLoss      0
HoursPerWeek     0
NativeCountry    0
Income           0
dtype: int64

train_cols = df.columns[0:-1]

label = df.columns[-1]

X = df[train_cols]

y = df[label]

seed = 163
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.20,random_state=seed)

Train the model

ebm = ExplainableBoostingClassifier(random_state=seed)
ebm.fit(X_train,y_train)

ExplainableBoostingClassifier(feature_names=['Age', 'WorkClass', 'fnlwgt',
                                             'Education', 'EducationNum',
                                             'MaritalStatus', 'Occupation',
                                             'Relationship', 'Race', 'Gender',
                                             'CapitalGain', 'CapitalLoss',
                                             'HoursPerWeek', 'NativeCountry',
                                             'Relationship x HoursPerWeek',
                                             'Age x Relationship',
                                             'EducationNum x Occupation',
                                             'EducationNum x MaritalStatus',
                                             'Age x HoursPerWeek',
                                             'MaritalSta...
                              feature_types=['continuous', 'categorical',
                                             'continuous', 'categorical',
                                             'continuous', 'categorical',
                                             'categorical', 'categorical',
                                             'categorical', 'categorical',
                                             'continuous', 'continuous',
                                             'continuous', 'categorical',
                                             'interaction', 'interaction',
                                             'interaction', 'interaction',
                                             'interaction', 'interaction',
                                             'interaction', 'interaction',
                                             'interaction', 'interaction'],
                              random_state=163)

In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.

ebm.feature_names

['Age',
 'WorkClass',
 'fnlwgt',
 'Education',
 'EducationNum',
 'MaritalStatus',
 'Occupation',
 'Relationship',
 'Race',
 'Gender',
 'CapitalGain',
 'CapitalLoss',
 'HoursPerWeek',
 'NativeCountry',
 'Relationship x HoursPerWeek',
 'Age x Relationship',
 'EducationNum x Occupation',
 'EducationNum x MaritalStatus',
 'Age x HoursPerWeek',
 'MaritalStatus x HoursPerWeek',
 'WorkClass x CapitalLoss',
 'Age x CapitalLoss',
 'Occupation x Relationship',
 'fnlwgt x HoursPerWeek']

Global explanations


ebm_global = ebm.explain_global()
show(ebm_global)

Local Explanations

ebm_local = ebm.explain_local(X_test[:5],y_test[:5])

/tmp/ipykernel_312818/1167488945.py:1: FutureWarning:

The behavior of `series[i:j]` with an integer-dtype index is deprecated. In a future version, this will be treated as *label-based* indexing, consistent with e.g. `series[i]` lookups. To retain the old behavior, use `series.iloc[i:j]`. To get the future behavior, use `series.loc[i:j]`.

show(ebm_local)

Edit the model to match expert expectations

gc.visualize(ebm=ebm,x_test=X_test,y_test=y_test)

Load the model and check if the changes took place

gc_dict = load(open('/home/thulasiram/personal/going_deep_and_wide/Interpretable_ml/interpret_ai/interpret_ai/data/modified_model.gamchanger','r'))

new_ebm = gc.get_edited_model(ebm,gc_dict)

gc.visualize(ebm=new_ebm,x_test=X_test,y_test=y_test)