Case 3. When a tree is convenient in a real product
Implementation with RubixML
In credit scoring, not only accuracy matters, but also transparency. A decision must be understandable: explainable to a risk manager, a customer, and sometimes a regulator. In this case we train a decision tree that outputs "approve" or "reject" based on numeric features, and then add a simple human-readable explanation that can be shown to users.
Example of code:
<?php
use Rubix\ML\Classifiers\ClassificationTree;
use Rubix\ML\Datasets\Labeled;
mt_srand(42);
srand(42);
$samples = [
// income, loan, dti, history, late, cards, age, job, home
[5000, 10000, 0.2, 5, 0, 2, 35, 5, 1],
[3000, 15000, 0.6, 2, 3, 4, 28, 2, 0],
[8000, 20000, 0.25, 8, 0, 3, 45, 10, 1],
[2500, 5000, 0.5, 1, 2, 1, 23, 1, 0],
[6000, 12000, 0.3, 6, 1, 2, 38, 7, 1],
[4000, 18000, 0.7, 3, 4, 5, 30, 3, 0],
[7000, 9000, 0.18, 9, 0, 2, 41, 9, 1],
[3200, 11000, 0.55, 2, 1, 3, 29, 2, 0],
[5200, 16000, 0.42, 4, 2, 2, 36, 6, 1],
[2800, 14000, 0.65, 2, 3, 4, 26, 2, 0],
[9000, 25000, 0.33, 10, 0, 4, 48, 12, 1],
[4200, 8000, 0.38, 4, 0, 1, 33, 4, 0],
[3500, 22000, 0.75, 3, 5, 5, 31, 3, 0],
[6500, 15000, 0.28, 7, 1, 2, 39, 8, 1],
[2700, 6000, 0.49, 1, 2, 1, 24, 1, 0],
[5600, 13000, 0.31, 6, 1, 2, 37, 7, 1],
[3800, 17000, 0.68, 3, 4, 4, 30, 3, 0],
[4800, 9000, 0.22, 5, 0, 1, 27, 6, 1],
[7500, 14000, 0.62, 4, 4, 3, 44, 9, 1],
[2900, 7000, 0.28, 6, 0, 2, 25, 2, 0],
[6200, 10000, 0.58, 2, 3, 4, 34, 5, 1],
];
$labels = [
'approve',
'reject',
'approve',
'reject',
'approve',
'reject',
'approve',
'reject',
'approve',
'reject',
'approve',
'approve',
'reject',
'approve',
'reject',
'approve',
'reject',
'approve',
'reject',
'approve',
'reject',
];
$dataset = new Labeled($samples, $labels);
$tree = new ClassificationTree(
maxHeight: 10,
maxLeafSize: 2
);
$tree->train($dataset);