<?php
// Parse eplusgo_wilayah.sql to extract city/regency names and provinces into a JSON file.
// Usage:
//   php backend_api/tools/sql_extract_cities.php

function stderr($m){ fwrite(STDERR, $m.PHP_EOL); }

$sqlPath = __DIR__ . '/../database/eplusgo_wilayah.sql';
$outPath = __DIR__ . '/../database/indonesia_cities_names.json';

if (!file_exists($sqlPath)){
  stderr('SQL file not found: '.$sqlPath);
  exit(1);
}

$sql = file_get_contents($sqlPath);
if ($sql === false){ stderr('Failed to read SQL.'); exit(1);} 

// Extract provinces mapping: prov_id -> prov_name
$provMap = [];
if (preg_match('/INSERT INTO `provinces` \(`prov_id`, `prov_name`[^\)]*\) VALUES\s*(.+?);/is', $sql, $m)){
  $vals = $m[1];
  $rows = preg_split('/\),\s*\(/', trim($vals, "() \n\r\t"));
  foreach ($rows as $row){
    $cols = preg_split('/,\s*/', $row);
    if (count($cols) >= 2){
      $id = intval(trim($cols[0]));
      // value like 'ACEH'
      $name = trim($cols[1]);
      $name = trim($name, "'\"");
      // Normalize casing to Title Case for consistency
      $provMap[$id] = titleCase($name);
    }
  }
} else {
  stderr('Failed to locate provinces INSERT block.');
}

// Extract cities: city_id, city_name, prov_id
$out = [];
if (preg_match('/INSERT INTO `cities` \(`city_id`, `city_name`, `prov_id`\) VALUES\s*(.+?);/is', $sql, $m)){
  $vals = $m[1];
  $rows = preg_split('/\),\s*\(/', trim($vals, "() \n\r\t"));
  foreach ($rows as $row){
    $cols = preg_split('/,\s*/', $row);
    if (count($cols) >= 3){
      // city_id unused
      $rawName = trim($cols[1]);
      $rawName = trim($rawName, "'\"");
      $provId = intval(trim($cols[2]));
      $provName = $provMap[$provId] ?? '';
      $name = titleCase(cleanName($rawName));
      if ($name === '' || $provName === '') continue;
      $out[] = [
        'name' => $name,
        'province' => $provName,
        'type' => '',
      ];
    }
  }
} else {
  stderr('Failed to locate cities INSERT block.');
}

// Dedup by name|province
$seen = [];
$deduped = [];
foreach ($out as $r){
  $k = strtolower($r['name'].'|'.$r['province']);
  if (isset($seen[$k])) continue; $seen[$k] = true; $deduped[] = $r;
}

// Sort
usort($deduped, function($a,$b){ $c = strcmp($a['province'],$b['province']); if ($c!==0) return $c; return strcmp($a['name'],$b['name']); });

file_put_contents($outPath, json_encode($deduped, JSON_UNESCAPED_UNICODE|JSON_UNESCAPED_SLASHES));
echo 'Extracted '.count($deduped).' city/regency names to '.$outPath.PHP_EOL;
exit(0);

function titleCase($s){
  $s = strtolower($s);
  // Keep common uppercase words
  $s = preg_replace_callback('/\b([a-z])([a-z]*)\b/u', function($m){ return mb_strtoupper($m[1]).$m[2]; }, $s);
  // Fix common acronyms
  $s = str_replace(['Dki','Di'], ['DKI','DI'], $s);
  return $s;
}

function cleanName($s){
  // Remove extra parentheticals, normalize spaces, replace slashes
  $s = preg_replace('/\s+/',' ', $s);
  $s = str_replace([' / ','/'], [' / ', ' / '], $s);
  $s = trim($s);
  return $s;
}
?>
