Detailed analysis of data_explorer.php with recommendations
The current data_explorer.php file handles a large dataset (38,594 records) reasonably well but has significant performance, security, and maintainability issues. This document outlines specific improvements ranked by priority and implementation complexity.
htmlspecialchars() for output escapingImpact: High | Complexity: Low | Time: 2-3 hours
// Add at the top of the file
function validateInput($input, $type = 'string', $maxLength = 255) {
if ($input === null || $input === '') {
return '';
}
switch($type) {
case 'int':
$value = filter_var($input, FILTER_VALIDATE_INT);
return $value !== false ? $value : 0;
case 'float':
$value = filter_var($input, FILTER_VALIDATE_FLOAT);
return $value !== false ? $value : 0.0;
case 'string':
default:
$value = trim($input);
if (strlen($value) > $maxLength) {
$value = substr($value, 0, $maxLength);
}
return htmlspecialchars($value, ENT_QUOTES, 'UTF-8');
}
}
// Replace all $_GET usage with:
$country = validateInput($_GET['country'] ?? '');
$pillar = validateInput($_GET['pillar'] ?? '');
$cluster = validateInput($_GET['cluster'] ?? '');
$hierarchyLevel = validateInput($_GET['hierarchy_level'] ?? '', 'int');
$search = validateInput($_GET['search'] ?? '', 'string', 500);
$minScore = validateInput($_GET['min_score'] ?? '', 'float');
$maxScore = validateInput($_GET['max_score'] ?? '', 'float');
$perPage = in_array((int)($_GET['per_page'] ?? 50), [25, 50, 100, 250]) ? (int)$_GET['per_page'] : 50;
$page = max(1, validateInput($_GET['page'] ?? 1, 'int'));
Impact: High | Complexity: Medium | Time: 4-6 hours
// Add caching functionality
class SimpleCache {
private $cacheDir = 'cache/';
private $defaultTTL = 3600; // 1 hour
public function __construct() {
if (!is_dir($this->cacheDir)) {
mkdir($this->cacheDir, 0755, true);
}
}
public function get($key) {
$filename = $this->cacheDir . md5($key) . '.cache';
if (!file_exists($filename)) {
return null;
}
$data = unserialize(file_get_contents($filename));
if ($data['expires'] < time()) {
unlink($filename);
return null;
}
return $data['value'];
}
public function set($key, $value, $ttl = null) {
$ttl = $ttl ?? $this->defaultTTL;
$filename = $this->cacheDir . md5($key) . '.cache';
$data = [
'value' => $value,
'expires' => time() + $ttl
];
file_put_contents($filename, serialize($data));
}
}
// Usage in main code:
$cache = new SimpleCache();
$cacheKey = 'gdb_data_' . filemtime($csvFile);
$cachedData = $cache->get($cacheKey);
if ($cachedData === null) {
// Load and process CSV as before
$cache->set($cacheKey, [
'data' => $data,
'countries' => $countries,
'pillars' => $pillars,
'clusters' => $clusters,
'totalRecords' => $totalRecords
]);
} else {
extract($cachedData);
}
Impact: Medium | Complexity: Low | Time: 2-3 hours
// Add comprehensive error handling
function logError($message, $context = []) {
$logFile = 'logs/error_log.txt';
$timestamp = date('Y-m-d H:i:s');
$contextStr = !empty($context) ? json_encode($context) : '';
$logEntry = "[$timestamp] $message $contextStr\n";
if (!is_dir('logs')) {
mkdir('logs', 0755, true);
}
file_put_contents($logFile, $logEntry, FILE_APPEND | LOCK_EX);
}
// Wrap CSV processing in try-catch
try {
if (!file_exists($csvFile)) {
throw new Exception("CSV file not found: $csvFile");
}
if (!is_readable($csvFile)) {
throw new Exception("CSV file is not readable: $csvFile");
}
// CSV processing code here
} catch (Exception $e) {
logError("CSV Processing Error: " . $e->getMessage(), [
'file' => $csvFile,
'user_ip' => $_SERVER['REMOTE_ADDR'] ?? 'unknown'
]);
// Show user-friendly error
$errorMessage = "Unable to load data. Please try again later.";
}
Impact: Medium | Complexity: Low | Time: 1-2 hours
// Add rate limiting for export functionality
function checkRateLimit($action, $limit = 5, $window = 300) { // 5 requests per 5 minutes
$ip = $_SERVER['REMOTE_ADDR'] ?? 'unknown';
$key = "rate_limit_{$action}_{$ip}";
$cacheFile = "cache/{$key}.json";
if (file_exists($cacheFile)) {
$data = json_decode(file_get_contents($cacheFile), true);
$requests = array_filter($data['requests'], function($time) use ($window) {
return $time > (time() - $window);
});
if (count($requests) >= $limit) {
return false;
}
$requests[] = time();
file_put_contents($cacheFile, json_encode(['requests' => $requests]));
} else {
file_put_contents($cacheFile, json_encode(['requests' => [time()]]));
}
return true;
}
// Use before export
if (isset($_GET['export']) && $_GET['export'] === 'csv') {
if (!checkRateLimit('export')) {
die('Rate limit exceeded. Please wait before requesting another export.');
}
// Export code here
}
Impact: High | Complexity: Medium | Time: 6-8 hours
Files to Create:
data_explorer_styles.css - All CSS stylesdata_explorer_scripts.js - All JavaScript functions data_explorer_config.php - Configuration settingsdata_explorer_functions.php - All PHP functionsImpact: Medium | Complexity: Medium | Time: 4-6 hours
// Improved search with multiple criteria
function enhancedSearch($data, $searchTerm, $searchFields = []) {
if (empty($searchTerm)) {
return $data;
}
$searchTerms = explode(' ', strtolower($searchTerm));
$defaultFields = ['question_text', 'response', 'indicator_name', 'action_area'];
$fieldsToSearch = empty($searchFields) ? $defaultFields : $searchFields;
return array_filter($data, function($record) use ($searchTerms, $fieldsToSearch) {
$matchCount = 0;
foreach ($searchTerms as $term) {
$termFound = false;
foreach ($fieldsToSearch as $field) {
if (isset($record[$field]) &&
strpos(strtolower($record[$field]), $term) !== false) {
$termFound = true;
break;
}
}
if ($termFound) {
$matchCount++;
}
}
// All search terms must be found
return $matchCount === count($searchTerms);
});
}
Impact: Medium | Complexity: High | Time: 8-10 hours
// Add advanced filtering capabilities
function applyAdvancedFilters($data, $filters) {
return array_filter($data, function($record) use ($filters) {
// Date range filtering
if (!empty($filters['date_from']) || !empty($filters['date_to'])) {
$recordDate = strtotime($record['created_at'] ?? '');
$dateFrom = !empty($filters['date_from']) ? strtotime($filters['date_from']) : 0;
$dateTo = !empty($filters['date_to']) ? strtotime($filters['date_to']) : time();
if ($recordDate < $dateFrom || $recordDate > $dateTo) {
return false;
}
}
// Multiple selection filters
if (!empty($filters['countries']) && is_array($filters['countries'])) {
if (!in_array($record['country'], $filters['countries'])) {
return false;
}
}
// Score percentile filtering
if (!empty($filters['score_percentile'])) {
// Implementation for percentile-based filtering
}
return true;
});
}
Impact: Very High | Complexity: High | Time: 2-3 weeks
See mysql_migration_plan.md for detailed implementation.
Impact: High | Complexity: High | Time: 1-2 weeks
// Create RESTful API endpoints
class DataExplorerAPI {
public function handleRequest() {
$method = $_SERVER['REQUEST_METHOD'];
$path = parse_url($_SERVER['REQUEST_URI'], PHP_URL_PATH);
switch ($method) {
case 'GET':
return $this->handleGet($path);
case 'POST':
return $this->handlePost($path);
default:
return $this->jsonResponse(['error' => 'Method not allowed'], 405);
}
}
private function handleGet($path) {
switch ($path) {
case '/api/data':
return $this->getData();
case '/api/filters':
return $this->getFilters();
case '/api/export':
return $this->exportData();
default:
return $this->jsonResponse(['error' => 'Not found'], 404);
}
}
private function jsonResponse($data, $status = 200) {
http_response_code($status);
header('Content-Type: application/json');
echo json_encode($data);
exit;
}
}
Impact: High | Complexity: Medium | Time: 1-2 weeks
// Add Chart.js integration for visualizations
function createScoreDistributionChart(data) {
const ctx = document.getElementById('scoreChart').getContext('2d');
const scoreRanges = [
{ label: '0-20', count: 0 },
{ label: '21-40', count: 0 },
{ label: '41-60', count: 0 },
{ label: '61-80', count: 0 },
{ label: '81-100', count: 0 }
];
data.forEach(record => {
const score = parseFloat(record.score);
if (score <= 20) scoreRanges[0].count++;
else if (score <= 40) scoreRanges[1].count++;
else if (score <= 60) scoreRanges[2].count++;
else if (score <= 80) scoreRanges[3].count++;
else scoreRanges[4].count++;
});
new Chart(ctx, {
type: 'bar',
data: {
labels: scoreRanges.map(r => r.label),
datasets: [{
label: 'Score Distribution',
data: scoreRanges.map(r => r.count),
backgroundColor: 'rgba(102, 126, 234, 0.6)'
}]
},
options: {
responsive: true,
plugins: {
title: {
display: true,
text: 'Score Distribution'
}
}
}
});
}
The current data_explorer.php file is functional but has significant room for improvement. The immediate fixes (Priority 1) will provide substantial performance and security benefits with minimal development time. The MySQL migration (Priority 3) will provide the most significant long-term benefits for scalability and performance.
Recommended Action: Implement Priority 1 improvements immediately, then evaluate whether the performance gains are sufficient or if MySQL migration is necessary based on user load and requirements.