-
Notifications
You must be signed in to change notification settings - Fork 140
/
Copy pathtest_treeinterpreter.py
127 lines (101 loc) · 5.05 KB
/
test_treeinterpreter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
test_treeinterpreter
----------------------------------
Tests for `treeinterpreter` module.
"""
import numpy as np
import unittest
from sklearn.datasets import load_boston, load_iris
from sklearn.ensemble import (RandomForestRegressor, RandomForestClassifier,
ExtraTreesClassifier, ExtraTreesRegressor,)
from sklearn.tree import (DecisionTreeClassifier, DecisionTreeRegressor,
ExtraTreeClassifier, ExtraTreeRegressor,)
from treeinterpreter import treeinterpreter
class TestTreeinterpreter(unittest.TestCase):
def setUp(self):
self.boston = load_boston()
self.iris = load_iris()
def test_tree_regressor(self):
for TreeRegressor in (DecisionTreeRegressor, ExtraTreeRegressor):
X = self.boston.data
Y = self.boston.target
testX = X[int(len(X)/2):]
#Predict for decision tree
dt = TreeRegressor()
dt.fit(X[:int(len(X)/2)], Y[:int(len(X)/2)])
base_prediction = dt.predict(testX)
pred, bias, contrib = treeinterpreter.predict(dt, testX)
self.assertTrue(np.allclose(base_prediction, pred))
self.assertTrue(np.allclose(pred, bias + np.sum(contrib, axis=1)))
testX = X[-1:]
base_prediction = dt.predict(testX)
pred, bias, contrib = treeinterpreter.predict(dt, testX)
self.assertTrue(np.allclose(base_prediction, pred))
self.assertTrue(np.allclose(pred, bias + np.sum(contrib, axis=1)))
def test_tree_classifier(self):
for TreeClassifier in (DecisionTreeClassifier, ExtraTreeClassifier):
X = self.iris.data
Y = self.iris.target
dt = TreeClassifier()
dt.fit(X[:int(len(X)/2)], Y[:int(len(X)/2)])
testX = X[int(len(X)/2):int(len(X)/2)+1]
base_prediction = dt.predict_proba(testX)
pred, bias, contrib = treeinterpreter.predict(dt, testX)
self.assertTrue(np.allclose(base_prediction, pred))
self.assertTrue(np.allclose(pred, bias + np.sum(contrib, axis=1)))
def test_forest_regressor(self):
for ForestRegressor in (RandomForestRegressor, ExtraTreesRegressor):
X = self.boston.data
Y = self.boston.target
testX = X[int(len(X)/2):]
#Predict for decision tree
dt = ForestRegressor(n_estimators=10)
dt.fit(X[:int(len(X)/2)], Y[:int(len(X)/2)])
base_prediction = dt.predict(testX)
pred, bias, contrib = treeinterpreter.predict(dt, testX)
self.assertTrue(np.allclose(base_prediction, pred))
self.assertTrue(np.allclose(pred, bias + np.sum(contrib, axis=1)))
def test_forest_regressor_joint(self):
for ForestRegressor in (RandomForestRegressor, ExtraTreesRegressor):
X = self.boston.data
Y = self.boston.target
testX = X[int(len(X)/2):]
#Predict for decision tree
dt = ForestRegressor(n_estimators=10)
dt.fit(X[:int(len(X)/2)], Y[:int(len(X)/2)])
base_prediction = dt.predict(testX)
pred, bias, contribs = treeinterpreter.predict(dt, testX, joint_contribution=True)
self.assertTrue(np.allclose(base_prediction, pred))
self.assertTrue(np.allclose(base_prediction, np.array([sum(contrib.values()) for contrib in contribs]) + bias))
def test_forest_classifier(self):
for ForestClassifier in (RandomForestClassifier, ExtraTreesClassifier):
idx = np.arange(len(self.iris.data))
np.random.shuffle(idx)
X = self.iris.data[idx]
Y = self.iris.target[idx]
dt = ForestClassifier(max_depth=3)
dt.fit(X[:int(len(X)/2)], Y[:int(len(X)/2)])
testX = X[int(len(X)/2):]
base_prediction = dt.predict_proba(testX)
pred, bias, contrib = treeinterpreter.predict(dt, testX)
self.assertTrue(np.allclose(base_prediction, pred))
self.assertTrue(np.allclose(pred, bias + np.sum(contrib, axis=1)))
def test_forest_classifier_joint(self):
for ForestClassifier in (RandomForestClassifier, ExtraTreesClassifier):
idx = np.arange(len(self.iris.data))
np.random.shuffle(idx)
X = self.iris.data[idx]
Y = self.iris.target[idx]
dt = ForestClassifier(max_depth=3)
dt.fit(X[:int(len(X)/2)], Y[:int(len(X)/2)])
testX = X[int(len(X)/2):]
base_prediction = dt.predict_proba(testX)
pred, bias, contribs = treeinterpreter.predict(dt, testX, joint_contribution=True)
self.assertTrue(np.allclose(base_prediction, pred))
self.assertTrue(np.allclose(base_prediction, np.array([sum(contrib.values()) for contrib in contribs]) + bias))
def tearDown(self):
pass
if __name__ == '__main__':
unittest.main()