Polynomial Regression with PHP
Polynomial Regression with PHP-ML
An extension where the relationship between variables is non-linear.
Polynomial regression transforms input variables to higher powers (e.g., $x2,x3x^2, x^3x2,x3$) but remains a
linear model concerning the parameters, making it suitable for more complex patterns.
In polynomial regression, we aim to model a non-linear relationship by transforming the input variable $x$ to
include higher powers. The model equation for a polynomial regression of degree is:
$y = \beta_0 + \beta_1 x + \beta_2 x^2 + \beta_3 x^3 + \dots + \beta_d x^d + \epsilon$
In this example we compare RM: average number of rooms per dwelling vs PRICE.
Dataset
CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE
0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,25.0
0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,22.6
0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,33.4
0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,5.33,36.2
0.02985,0.0,2.18,0,0.458,6.430,58.7,6.0622,3,222,18.7,394.12,5.21,28.7
0.08829,12.5,7.87,0,0.524,6.012,66.6,5.5605,5,311,15.2,395.60,12.43,20.6
0.14455,12.5,7.87,0,0.524,6.172,96.1,5.9505,5,311,15.2,396.90,19.15,22.9
0.21124,12.5,7.87,0,0.524,5.631,100.0,6.0821,5,311,15.2,386.63,29.93,16.9
0.17004,12.5,7.87,0,0.524,6.004,85.9,6.5921,5,311,15.2,386.71,17.10,18.9
0.22489,12.5,7.87,0,0.524,6.377,94.3,6.3467,5,311,15.2,392.52,20.45,21.6
0.11747,12.5,7.87,0,0.524,6.009,82.9,6.2267,5,311,15.2,396.90,13.27,18.9
0.09378,12.5,7.87,0,0.524,5.889,39.0,5.4509,5,311,15.2,390.50,15.71,21.7
0.62976,0.0,8.14,0,0.538,5.949,61.8,4.7075,4,307,21.0,396.90,8.26,20.4
0.63796,0.0,8.14,0,0.538,6.096,84.5,4.4619,4,307,21.0,380.02,10.26,21.2
0.62739,0.0,8.14,0,0.538,5.834,56.5,4.4986,4,307,21.0,395.62,8.47,19.9
0.41238,0.0,8.14,0,0.538,5.989,61.8,4.7075,4,307,21.0,396.90,10.62,22.2
0.36894,22.0,5.86,0,0.431,8.259,8.4,8.9067,7,330,19.1,396.90,3.54,37.7
0.37578,22.0,5.86,0,0.431,8.183,7.5,8.9067,7,330,19.1,396.90,3.54,37.3
0.21719,22.0,5.86,0,0.431,7.853,33.2,8.9067,7,330,19.1,396.90,3.54,40.1
0.19133,22.0,5.86,0,0.431,7.255,92.2,8.9067,7,330,19.1,393.63,6.48,37.2
0.33983,22.0,5.86,0,0.431,6.383,67.7,7.8265,7,330,19.1,396.90,9.69,25.7
0.19657,22.0,5.86,0,0.431,6.816,40.5,8.3248,7,330,19.1,392.90,5.37,31.6
0.16439,22.0,5.86,0,0.431,7.420,71.9,8.3248,7,330,19.1,396.90,4.21,38.7
0.19073,22.0,5.86,0,0.431,7.685,17.7,8.3248,7,330,19.1,396.90,3.01,38.1
Example of use:
<?php
use Phpml\Dataset\CsvDataset;
use Phpml\Regression\LeastSquares;
use Phpml\Metric\Regression;
use Phpml\Preprocessing\Normalizer;
use Phpml\Math\Matrix;
try {
// Load the raw data from CSV
$dataset = new CsvDataset(dirname(__FILE__) . '/data/boston_housing.csv', 13, true);
// Get the 6th column (index 5 since arrays are zero-based)
$samples = array_map(function($row) {
return [(float)$row[5]];
}, $dataset->getSamples());
// Convert targets to float values (prices in thousands)
$targets = array_map(function($target) {
return (float)$target;
}, $dataset->getTargets());
// Calculate dataset statistics
$rooms = array_column($samples, 0);
$stats = [
'min_rooms' => min($rooms),
'max_rooms' => max($rooms),
'avg_rooms' => array_sum($rooms) / count($rooms),
'sample_count' => count($rooms)
];
// Display dataset statistics
echo "\nDataset Statistics:";
echo "\n-----------------\n";
printf("Number of samples: %d\n", $stats['sample_count']);
printf("Average rooms: %.2f\n", $stats['avg_rooms']);
printf("Room range: %.1f - %.1f\n", $stats['min_rooms'], $stats['max_rooms']);
// Validation checks
if (empty($samples) || empty($targets)) {
throw new InvalidArgumentException('Empty training data provided');
}
if (count($samples) !== count($targets)) {
throw new InvalidArgumentException("Number of samples doesn't match number of targets");
}
// Create regression model
$regression = new LeastSquares();
// Polynomial expander - transform features to include squared and cubed terms
$samplesTransformed = array_map(function($sample) {
return [
$sample[0], // original feature
pow($sample[0], 2), // squared feature
pow($sample[0], 3) // cubed feature
];
}, $samples);
// Train the model
echo "\nTraining model...\n";
// Train the model with original and squared features
$regression->train($samplesTransformed, $targets);
// Make predictions
echo "\nPredicting house prices...\n";
// Prepare test samples
$testSamples = [
[5.5], // Small house
[6.0], // Medium house
[8.0], // Large house
[$stats['min_rooms'] + ($stats['max_rooms'] - $stats['min_rooms']) / 2], // Middle
[$stats['min_rooms']], // Smallest in dataset
[$stats['max_rooms']] // Largest in dataset
];
// Polynomial expander - transform features to include squared and cubed terms
$samplesTransformed = array_map(function($sample) {
return [
$sample[0], // original feature
pow($sample[0], 2), // squared feature
pow($sample[0], 3) // cubed feature
];
}, $testSamples);
$predictions = $regression->predict($samplesTransformed);
// Display results
echo "\nPrice Predictions:";
echo "\n-----------------\n";
foreach (array_map(null, $testSamples, $predictions) as [$rooms, $price]) {
printf(
"A house with %.1f rooms is predicted to cost $%s\n",
$rooms[0],
number_format($price * 1000, 2)
);
}
} catch (Exception $e) {
echo "Error: " . $e->getMessage() . "\n";
exit(1);
}