Editable Interpretable Models

Load the libraries

import pandas as pd
from sklearn.model_selection import train_test_split
from interpret.glassbox import ExplainableBoostingClassifier
from json import load
from interpret import set_visualize_provider
from interpret.provider import InlineProvider
from interpret import show
import gamchanger as gc
set_visualize_provider(InlineProvider())

Load the data

df = pd.read_csv(
    "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data",
    header=None)
df.columns = [
    "Age", "WorkClass", "fnlwgt", "Education", "EducationNum",
    "MaritalStatus", "Occupation", "Relationship", "Race", "Gender",
    "CapitalGain", "CapitalLoss", "HoursPerWeek", "NativeCountry", "Income"
]

Explore the data

df.head()
Age WorkClass fnlwgt Education EducationNum MaritalStatus Occupation Relationship Race Gender CapitalGain CapitalLoss HoursPerWeek NativeCountry Income
0 39 State-gov 77516 Bachelors 13 Never-married Adm-clerical Not-in-family White Male 2174 0 40 United-States <=50K
1 50 Self-emp-not-inc 83311 Bachelors 13 Married-civ-spouse Exec-managerial Husband White Male 0 0 13 United-States <=50K
2 38 Private 215646 HS-grad 9 Divorced Handlers-cleaners Not-in-family White Male 0 0 40 United-States <=50K
3 53 Private 234721 11th 7 Married-civ-spouse Handlers-cleaners Husband Black Male 0 0 40 United-States <=50K
4 28 Private 338409 Bachelors 13 Married-civ-spouse Prof-specialty Wife Black Female 0 0 40 Cuba <=50K
df.shape
(32561, 15)
df.isna().sum()
Age              0
WorkClass        0
fnlwgt           0
Education        0
EducationNum     0
MaritalStatus    0
Occupation       0
Relationship     0
Race             0
Gender           0
CapitalGain      0
CapitalLoss      0
HoursPerWeek     0
NativeCountry    0
Income           0
dtype: int64
train_cols = df.columns[0:-1]
label = df.columns[-1]
X = df[train_cols]
y = df[label]
seed = 163
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.20,random_state=seed)

Train the model

ebm = ExplainableBoostingClassifier(random_state=seed)
ebm.fit(X_train,y_train)
ExplainableBoostingClassifier(feature_names=['Age', 'WorkClass', 'fnlwgt',
                                             'Education', 'EducationNum',
                                             'MaritalStatus', 'Occupation',
                                             'Relationship', 'Race', 'Gender',
                                             'CapitalGain', 'CapitalLoss',
                                             'HoursPerWeek', 'NativeCountry',
                                             'Relationship x HoursPerWeek',
                                             'Age x Relationship',
                                             'EducationNum x Occupation',
                                             'EducationNum x MaritalStatus',
                                             'Age x HoursPerWeek',
                                             'MaritalSta...
                              feature_types=['continuous', 'categorical',
                                             'continuous', 'categorical',
                                             'continuous', 'categorical',
                                             'categorical', 'categorical',
                                             'categorical', 'categorical',
                                             'continuous', 'continuous',
                                             'continuous', 'categorical',
                                             'interaction', 'interaction',
                                             'interaction', 'interaction',
                                             'interaction', 'interaction',
                                             'interaction', 'interaction',
                                             'interaction', 'interaction'],
                              random_state=163)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
ebm.feature_names
['Age',
 'WorkClass',
 'fnlwgt',
 'Education',
 'EducationNum',
 'MaritalStatus',
 'Occupation',
 'Relationship',
 'Race',
 'Gender',
 'CapitalGain',
 'CapitalLoss',
 'HoursPerWeek',
 'NativeCountry',
 'Relationship x HoursPerWeek',
 'Age x Relationship',
 'EducationNum x Occupation',
 'EducationNum x MaritalStatus',
 'Age x HoursPerWeek',
 'MaritalStatus x HoursPerWeek',
 'WorkClass x CapitalLoss',
 'Age x CapitalLoss',
 'Occupation x Relationship',
 'fnlwgt x HoursPerWeek']

Global explanations


ebm_global = ebm.explain_global()
show(ebm_global)

Local Explanations

ebm_local = ebm.explain_local(X_test[:5],y_test[:5])
/tmp/ipykernel_312818/1167488945.py:1: FutureWarning:

The behavior of `series[i:j]` with an integer-dtype index is deprecated. In a future version, this will be treated as *label-based* indexing, consistent with e.g. `series[i]` lookups. To retain the old behavior, use `series.iloc[i:j]`. To get the future behavior, use `series.loc[i:j]`.
show(ebm_local)

Edit the model to match expert expectations

gc.visualize(ebm=ebm,x_test=X_test,y_test=y_test)

Load the model and check if the changes took place

gc_dict = load(open('/home/thulasiram/personal/going_deep_and_wide/Interpretable_ml/interpret_ai/interpret_ai/data/modified_model.gamchanger','r'))
new_ebm = gc.get_edited_model(ebm,gc_dict)
gc.visualize(ebm=new_ebm,x_test=X_test,y_test=y_test)