import pandas as pd
from sklearn.model_selection import train_test_split
from interpret.glassbox import ExplainableBoostingClassifier
from json import load
from interpret import set_visualize_provider
from interpret.provider import InlineProvider
from interpret import show
import gamchanger as gc
set_visualize_provider(InlineProvider())Editable Interpretable Models
Load the libraries
Load the data
df = pd.read_csv(
"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data",
header=None)df.columns = [
"Age", "WorkClass", "fnlwgt", "Education", "EducationNum",
"MaritalStatus", "Occupation", "Relationship", "Race", "Gender",
"CapitalGain", "CapitalLoss", "HoursPerWeek", "NativeCountry", "Income"
]Explore the data
df.head()| Age | WorkClass | fnlwgt | Education | EducationNum | MaritalStatus | Occupation | Relationship | Race | Gender | CapitalGain | CapitalLoss | HoursPerWeek | NativeCountry | Income | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 39 | State-gov | 77516 | Bachelors | 13 | Never-married | Adm-clerical | Not-in-family | White | Male | 2174 | 0 | 40 | United-States | <=50K |
| 1 | 50 | Self-emp-not-inc | 83311 | Bachelors | 13 | Married-civ-spouse | Exec-managerial | Husband | White | Male | 0 | 0 | 13 | United-States | <=50K |
| 2 | 38 | Private | 215646 | HS-grad | 9 | Divorced | Handlers-cleaners | Not-in-family | White | Male | 0 | 0 | 40 | United-States | <=50K |
| 3 | 53 | Private | 234721 | 11th | 7 | Married-civ-spouse | Handlers-cleaners | Husband | Black | Male | 0 | 0 | 40 | United-States | <=50K |
| 4 | 28 | Private | 338409 | Bachelors | 13 | Married-civ-spouse | Prof-specialty | Wife | Black | Female | 0 | 0 | 40 | Cuba | <=50K |
df.shape(32561, 15)
df.isna().sum()Age 0
WorkClass 0
fnlwgt 0
Education 0
EducationNum 0
MaritalStatus 0
Occupation 0
Relationship 0
Race 0
Gender 0
CapitalGain 0
CapitalLoss 0
HoursPerWeek 0
NativeCountry 0
Income 0
dtype: int64
train_cols = df.columns[0:-1]label = df.columns[-1]X = df[train_cols]y = df[label]seed = 163
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.20,random_state=seed)Train the model
ebm = ExplainableBoostingClassifier(random_state=seed)
ebm.fit(X_train,y_train)ExplainableBoostingClassifier(feature_names=['Age', 'WorkClass', 'fnlwgt',
'Education', 'EducationNum',
'MaritalStatus', 'Occupation',
'Relationship', 'Race', 'Gender',
'CapitalGain', 'CapitalLoss',
'HoursPerWeek', 'NativeCountry',
'Relationship x HoursPerWeek',
'Age x Relationship',
'EducationNum x Occupation',
'EducationNum x MaritalStatus',
'Age x HoursPerWeek',
'MaritalSta...
feature_types=['continuous', 'categorical',
'continuous', 'categorical',
'continuous', 'categorical',
'categorical', 'categorical',
'categorical', 'categorical',
'continuous', 'continuous',
'continuous', 'categorical',
'interaction', 'interaction',
'interaction', 'interaction',
'interaction', 'interaction',
'interaction', 'interaction',
'interaction', 'interaction'],
random_state=163)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
ExplainableBoostingClassifier(feature_names=['Age', 'WorkClass', 'fnlwgt',
'Education', 'EducationNum',
'MaritalStatus', 'Occupation',
'Relationship', 'Race', 'Gender',
'CapitalGain', 'CapitalLoss',
'HoursPerWeek', 'NativeCountry',
'Relationship x HoursPerWeek',
'Age x Relationship',
'EducationNum x Occupation',
'EducationNum x MaritalStatus',
'Age x HoursPerWeek',
'MaritalSta...
feature_types=['continuous', 'categorical',
'continuous', 'categorical',
'continuous', 'categorical',
'categorical', 'categorical',
'categorical', 'categorical',
'continuous', 'continuous',
'continuous', 'categorical',
'interaction', 'interaction',
'interaction', 'interaction',
'interaction', 'interaction',
'interaction', 'interaction',
'interaction', 'interaction'],
random_state=163)ebm.feature_names['Age',
'WorkClass',
'fnlwgt',
'Education',
'EducationNum',
'MaritalStatus',
'Occupation',
'Relationship',
'Race',
'Gender',
'CapitalGain',
'CapitalLoss',
'HoursPerWeek',
'NativeCountry',
'Relationship x HoursPerWeek',
'Age x Relationship',
'EducationNum x Occupation',
'EducationNum x MaritalStatus',
'Age x HoursPerWeek',
'MaritalStatus x HoursPerWeek',
'WorkClass x CapitalLoss',
'Age x CapitalLoss',
'Occupation x Relationship',
'fnlwgt x HoursPerWeek']
Global explanations
ebm_global = ebm.explain_global()
show(ebm_global)Local Explanations
ebm_local = ebm.explain_local(X_test[:5],y_test[:5])/tmp/ipykernel_312818/1167488945.py:1: FutureWarning:
The behavior of `series[i:j]` with an integer-dtype index is deprecated. In a future version, this will be treated as *label-based* indexing, consistent with e.g. `series[i]` lookups. To retain the old behavior, use `series.iloc[i:j]`. To get the future behavior, use `series.loc[i:j]`.
show(ebm_local)Edit the model to match expert expectations
gc.visualize(ebm=ebm,x_test=X_test,y_test=y_test)Load the model and check if the changes took place
gc_dict = load(open('/home/thulasiram/personal/going_deep_and_wide/Interpretable_ml/interpret_ai/interpret_ai/data/modified_model.gamchanger','r'))new_ebm = gc.get_edited_model(ebm,gc_dict)gc.visualize(ebm=new_ebm,x_test=X_test,y_test=y_test)