import pandas as pd
from sklearn.model_selection import train_test_split
from interpret.glassbox import ExplainableBoostingClassifier
from json import load
from interpret import set_visualize_provider
from interpret.provider import InlineProvider
from interpret import show
import gamchanger as gc
set_visualize_provider(InlineProvider())
Editable Interpretable Models
Load the libraries
Load the data
= pd.read_csv(
df "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data",
=None) header
= [
df.columns "Age", "WorkClass", "fnlwgt", "Education", "EducationNum",
"MaritalStatus", "Occupation", "Relationship", "Race", "Gender",
"CapitalGain", "CapitalLoss", "HoursPerWeek", "NativeCountry", "Income"
]
Explore the data
df.head()
Age | WorkClass | fnlwgt | Education | EducationNum | MaritalStatus | Occupation | Relationship | Race | Gender | CapitalGain | CapitalLoss | HoursPerWeek | NativeCountry | Income | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 39 | State-gov | 77516 | Bachelors | 13 | Never-married | Adm-clerical | Not-in-family | White | Male | 2174 | 0 | 40 | United-States | <=50K |
1 | 50 | Self-emp-not-inc | 83311 | Bachelors | 13 | Married-civ-spouse | Exec-managerial | Husband | White | Male | 0 | 0 | 13 | United-States | <=50K |
2 | 38 | Private | 215646 | HS-grad | 9 | Divorced | Handlers-cleaners | Not-in-family | White | Male | 0 | 0 | 40 | United-States | <=50K |
3 | 53 | Private | 234721 | 11th | 7 | Married-civ-spouse | Handlers-cleaners | Husband | Black | Male | 0 | 0 | 40 | United-States | <=50K |
4 | 28 | Private | 338409 | Bachelors | 13 | Married-civ-spouse | Prof-specialty | Wife | Black | Female | 0 | 0 | 40 | Cuba | <=50K |
df.shape
(32561, 15)
sum() df.isna().
Age 0
WorkClass 0
fnlwgt 0
Education 0
EducationNum 0
MaritalStatus 0
Occupation 0
Relationship 0
Race 0
Gender 0
CapitalGain 0
CapitalLoss 0
HoursPerWeek 0
NativeCountry 0
Income 0
dtype: int64
= df.columns[0:-1] train_cols
= df.columns[-1] label
= df[train_cols] X
= df[label] y
= 163
seed = train_test_split(X,y,test_size=0.20,random_state=seed) X_train, X_test, y_train, y_test
Train the model
= ExplainableBoostingClassifier(random_state=seed)
ebm ebm.fit(X_train,y_train)
ExplainableBoostingClassifier(feature_names=['Age', 'WorkClass', 'fnlwgt', 'Education', 'EducationNum', 'MaritalStatus', 'Occupation', 'Relationship', 'Race', 'Gender', 'CapitalGain', 'CapitalLoss', 'HoursPerWeek', 'NativeCountry', 'Relationship x HoursPerWeek', 'Age x Relationship', 'EducationNum x Occupation', 'EducationNum x MaritalStatus', 'Age x HoursPerWeek', 'MaritalSta... feature_types=['continuous', 'categorical', 'continuous', 'categorical', 'continuous', 'categorical', 'categorical', 'categorical', 'categorical', 'categorical', 'continuous', 'continuous', 'continuous', 'categorical', 'interaction', 'interaction', 'interaction', 'interaction', 'interaction', 'interaction', 'interaction', 'interaction', 'interaction', 'interaction'], random_state=163)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
ExplainableBoostingClassifier(feature_names=['Age', 'WorkClass', 'fnlwgt', 'Education', 'EducationNum', 'MaritalStatus', 'Occupation', 'Relationship', 'Race', 'Gender', 'CapitalGain', 'CapitalLoss', 'HoursPerWeek', 'NativeCountry', 'Relationship x HoursPerWeek', 'Age x Relationship', 'EducationNum x Occupation', 'EducationNum x MaritalStatus', 'Age x HoursPerWeek', 'MaritalSta... feature_types=['continuous', 'categorical', 'continuous', 'categorical', 'continuous', 'categorical', 'categorical', 'categorical', 'categorical', 'categorical', 'continuous', 'continuous', 'continuous', 'categorical', 'interaction', 'interaction', 'interaction', 'interaction', 'interaction', 'interaction', 'interaction', 'interaction', 'interaction', 'interaction'], random_state=163)
ebm.feature_names
['Age',
'WorkClass',
'fnlwgt',
'Education',
'EducationNum',
'MaritalStatus',
'Occupation',
'Relationship',
'Race',
'Gender',
'CapitalGain',
'CapitalLoss',
'HoursPerWeek',
'NativeCountry',
'Relationship x HoursPerWeek',
'Age x Relationship',
'EducationNum x Occupation',
'EducationNum x MaritalStatus',
'Age x HoursPerWeek',
'MaritalStatus x HoursPerWeek',
'WorkClass x CapitalLoss',
'Age x CapitalLoss',
'Occupation x Relationship',
'fnlwgt x HoursPerWeek']
Global explanations
= ebm.explain_global()
ebm_global show(ebm_global)
Local Explanations
= ebm.explain_local(X_test[:5],y_test[:5]) ebm_local
/tmp/ipykernel_312818/1167488945.py:1: FutureWarning:
The behavior of `series[i:j]` with an integer-dtype index is deprecated. In a future version, this will be treated as *label-based* indexing, consistent with e.g. `series[i]` lookups. To retain the old behavior, use `series.iloc[i:j]`. To get the future behavior, use `series.loc[i:j]`.
show(ebm_local)
Edit the model to match expert expectations
=ebm,x_test=X_test,y_test=y_test) gc.visualize(ebm
Load the model and check if the changes took place
= load(open('/home/thulasiram/personal/going_deep_and_wide/Interpretable_ml/interpret_ai/interpret_ai/data/modified_model.gamchanger','r')) gc_dict
= gc.get_edited_model(ebm,gc_dict) new_ebm
=new_ebm,x_test=X_test,y_test=y_test) gc.visualize(ebm