import pandas as pd

# 1. Lire le fichier DOM-TOM
df_dom_tom = pd.read_excel("dom_tom_coords.xlsx")  # ou pd.read_csv("dom_tom_coords.csv")

# 2. Créer le dictionnaire des coordonnées
geo_data = {}
for _, row in df_dom_tom.iterrows():
    insee_code = str(row["Code INSEE"]).strip()  # Ex: "97617"
    geo_data[insee_code] = {
        'latitude': row["Latitude"],
        'longitude': row["Longitude"]
    }

# 3. Générer les requêtes SQL
output_file = "updates_coordinates.sql"
missing_report = "insee_codes_manquants.txt"

found_count = 0
missing_count = 0

with open("cities_sql.txt", "r", encoding="utf-8") as infile, \
     open(output_file, "w", encoding="utf-8") as outfile, \
     open(missing_report, "w", encoding="utf-8") as report:

    for line in infile:
        parts = line.strip().strip("(),").split(",")
        if len(parts) < 3:
            continue
        insee_code = parts[1].strip().strip("'")
        city_name = parts[2].strip().strip("'")

        if insee_code in geo_data:
            lat = geo_data[insee_code]['latitude']
            lon = geo_data[insee_code]['longitude']
            sql = f"""
            UPDATE cities
            SET latitude = {lat}, longitude = {lon}
            WHERE insee_code = '{insee_code}';
            """
            outfile.write(sql + "\n")
            found_count += 1
        else:
            report.write(f"{insee_code};{city_name}\n")
            missing_count += 1

print(f"Fichier SQL généré : {output_file} ({found_count} requêtes)")
print(f"Rapport des codes manquants : {missing_report} ({missing_count} entrées)")
