Big Data Techniques in PHP

Dataset Generator

Generators provide a memory-efficient way to iterate over large datasets by yielding values one at a time.

 
<?php

// Instantiate the DatasetGenerator with an optional buffer size
// You can set the buffer size here or use the default
$generator = new DatasetGenerator(4096);

// Specify the path to the large file
$filename dirname(__FILE__) . '/large_data.json';

// Process the file in batches of 100 items
foreach ($generator->processBatchedData($filename100) as $batch) {
    
// Perform processing on each batch (e.g., save to a database or further transform data)
    
foreach ($batch as $item) {
        
// Example of handling each item in the batch
        
echo 'Processing item: ' json_encode($item) . PHP_EOL;

        
// Add your specific logic here, such as inserting data into a database or validating items
    
}
}
Result: Memory: 0.008 Mb Time running: < 0.001 sec.
Processing item: {"id":1,"name":"John Doe","email":"john.doe@example.com","status":"active"}
Processing item: {"id":2,"name":"Jane Smith","email":"jane.smith@example.com","status":"inactive"}
Processing item: {"id":3,"name":"Alice Johnson","email":"alice.johnson@example.com","status":"active"}
Processing item: {"id":4,"name":"Bob Brown","email":"bob.brown@example.com","status":"pending"}
Processing item: {"id":5,"name":"Charlie White","email":"charlie.white@example.com","status":"active"}
Processing item: {"id":6,"name":"Diana Green","email":"diana.green@example.com","status":"suspended"}
Processing item: {"id":7,"name":"Emily Black","email":"emily.black@example.com","status":"active"}
Processing item: {"id":8,"name":"Frank Harris","email":"frank.harris@example.com","status":"inactive"}
Processing item: {"id":9,"name":"Grace Lee","email":"grace.lee@example.com","status":"pending"}
Processing item: {"id":10,"name":"Henry Walker","email":"henry.walker@example.com","status":"suspended"}