import csv
from collections import defaultdict

# 1. Charger les codes postaux (latin-1 -> UTF-8)
postal_codes = {}
with open("019HexaSmal.csv", "r", encoding="latin-1") as f:
    reader = csv.reader(f, delimiter=";")
    for row in reader:
        if len(row) < 3:
            continue
        insee_code = row[0].strip()
        postal_code = row[2].strip()
        postal_codes[insee_code] = postal_code

# 2. Charger les coordonnées (latin-1 -> UTF-8)
geo_data = defaultdict(dict)
with open("FR.txt", "r", encoding="latin-1") as f:
    reader = csv.reader(f, delimiter="\t")
    for row in reader:
        if len(row) < 14:
            continue
        insee_code = row[13]
        if not insee_code.isdigit() or len(insee_code) != 5:
            continue
        try:
            latitude = float(row[4]) if row[4] else None
            longitude = float(row[5]) if row[5] else None
            if latitude and longitude:
                geo_data[insee_code] = {
                    'latitude': latitude,
                    'longitude': longitude
                }
        except (ValueError, IndexError):
            continue

# 3. Mappage des régions
region_mapping = {
    '01': 'FRA-ARA', '03': 'FRA-ARA', '07': 'FRA-ARA', '15': 'FRA-ARA', '26': 'FRA-ARA',
    '21': 'FRA-BFC', '25': 'FRA-BFC', '39': 'FRA-BFC', '58': 'FRA-BFC', '70': 'FRA-BFC',
    '22': 'FRA-BRE', '29': 'FRA-BRE', '35': 'FRA-BRE', '56': 'FRA-BRE',
    '44': 'FRA-PDL', '49': 'FRA-PDL', '53': 'FRA-PDL', '72': 'FRA-PDL', '85': 'FRA-PDL',
    '18': 'FRA-CVL', '28': 'FRA-CVL', '36': 'FRA-CVL', '37': 'FRA-CVL', '41': 'FRA-CVL', '45': 'FRA-CVL',
    '08': 'FRA-GES', '10': 'FRA-GES', '51': 'FRA-GES', '54': 'FRA-GES', '55': 'FRA-GES',
    '14': 'FRA-NOR', '27': 'FRA-NOR', '50': 'FRA-NOR', '61': 'FRA-NOR', '76': 'FRA-NOR',
    '75': 'FRA-IDF', '77': 'FRA-IDF', '78': 'FRA-IDF', '91': 'FRA-IDF', '92': 'FRA-IDF', '93': 'FRA-IDF', '94': 'FRA-IDF', '95': 'FRA-IDF',
    '09': 'FRA-OCC', '11': 'FRA-OCC', '12': 'FRA-OCC', '30': 'FRA-OCC', '31': 'FRA-OCC', '32': 'FRA-OCC', '34': 'FRA-OCC', '46': 'FRA-OCC', '48': 'FRA-OCC', '65': 'FRA-OCC', '66': 'FRA-OCC', '81': 'FRA-OCC', '82': 'FRA-OCC',
    '16': 'FRA-NAQ', '17': 'FRA-NAQ', '19': 'FRA-NAQ', '23': 'FRA-NAQ', '24': 'FRA-NAQ', '33': 'FRA-NAQ', '40': 'FRA-NAQ', '47': 'FRA-NAQ', '64': 'FRA-NAQ', '79': 'FRA-NAQ', '86': 'FRA-NAQ', '87': 'FRA-NAQ',
    '02': 'FRA-HDF', '59': 'FRA-HDF', '60': 'FRA-HDF', '62': 'FRA-HDF', '80': 'FRA-HDF',
    '2A': 'FRA-COR', '2B': 'FRA-COR',
    '971': 'FRA-GUA', '972': 'FRA-MTQ', '973': 'FRA-GUF', '974': 'FRA-REU', '976': 'FRA-MAY'
}

# 4. Générer le fichier SQL (avec conversion explicite des noms)
input_file = "communes_cog.csv"
output_file = "/var/www/html/villes_finales.sql"
missing_coords_file = "/var/www/html/communes_sans_coords.txt"

with open(input_file, "r", encoding="latin-1") as infile, \
     open(output_file, "w", encoding="utf-8") as outfile, \
     open(missing_coords_file, "w", encoding="utf-8") as missing_file:

    reader = csv.DictReader(infile)
    missing_count = 0

    for row in reader:
        if row["TYPECOM"] == "COM":
            insee_code = row["COM"].zfill(5)
            # Conversion du nom en UTF-8 et échappement des apostrophes
            city_name = row["NCCENR"].encode("latin-1").decode("utf-8", errors="replace").replace("'", "''")
            department_code = row["DEP"]
            region_code = region_mapping.get(department_code, "FRA-IDF")

            # Code postal
            postal_code = f"'{postal_codes.get(insee_code, '')}'" if insee_code in postal_codes else "NULL"

            # Coordonnées
            geo_info = geo_data.get(insee_code, {})
            latitude = geo_info.get('latitude', "NULL")
            longitude = geo_info.get('longitude', "NULL")

            if latitude == "NULL" or longitude == "NULL":
                missing_file.write(f"{insee_code};{city_name}\n")
                missing_count += 1

            sql = f"""
            INSERT INTO cities (
                insee_code, city_name, postal_code, latitude, longitude, department_code, region_code
            ) VALUES (
                '{insee_code}', '{city_name}', {postal_code}, {latitude}, {longitude}, '{department_code}', '{region_code}'
            ) ON DUPLICATE KEY UPDATE
                city_name = VALUES(city_name),
                postal_code = VALUES(postal_code),
                latitude = VALUES(latitude),
                longitude = VALUES(longitude),
                department_code = VALUES(department_code),
                region_code = VALUES(region_code);
            """
            outfile.write(sql + "\n")

print(f"Fichier SQL généré : {output_file}")
print(f"Rapport des communes sans coordonnées : {missing_coords_file} ({missing_count} entrées)")
