"""Core functions to interact with OGR data sources."""

from pathlib import Path
from typing import Optional, Union

from pyogrio._env import GDALEnv
from pyogrio.util import (
    _mask_to_wkb,
    _preprocess_options_key_value,
    get_vsi_path_or_buffer,
)

with GDALEnv():
    from pyogrio._err import _register_error_handler
    from pyogrio._io import ogr_list_layers, ogr_read_bounds, ogr_read_info
    from pyogrio._ogr import (
        _get_drivers_for_path,
        _register_drivers,
        get_gdal_config_option as _get_gdal_config_option,
        get_gdal_data_path as _get_gdal_data_path,
        get_gdal_geos_version,
        get_gdal_version,
        get_gdal_version_string,
        init_gdal_data as _init_gdal_data,
        init_proj_data as _init_proj_data,
        ogr_list_drivers,
        set_gdal_config_options as _set_gdal_config_options,
    )
    from pyogrio._vsi import (
        ogr_vsi_listtree,
        ogr_vsi_rmtree,
        ogr_vsi_unlink,
    )

    _init_gdal_data()
    _init_proj_data()
    _register_drivers()
    _register_error_handler()

    __gdal_version__ = get_gdal_version()
    __gdal_version_string__ = get_gdal_version_string()
    __gdal_geos_version__ = get_gdal_geos_version()


def list_drivers(read=False, write=False):
    """List drivers available in GDAL.

    Parameters
    ----------
    read: bool, optional (default: False)
        If True, will only return drivers that are known to support read capabilities.
    write: bool, optional (default: False)
        If True, will only return drivers that are known to support write capabilities.

    Returns
    -------
    dict
        Mapping of driver name to file mode capabilities: ``"r"``: read, ``"w"``: write.
        Drivers that are available but with unknown support are marked with ``"?"``

    """
    drivers = ogr_list_drivers()

    if read:
        drivers = {k: v for k, v in drivers.items() if v.startswith("r")}

    if write:
        drivers = {k: v for k, v in drivers.items() if v.endswith("w")}

    return drivers


def detect_write_driver(path):
    """Attempt to infer the driver for a path by extension or prefix.

    Only drivers that support write capabilities will be detected.

    If the path cannot be resolved to a single driver, a ValueError will be
    raised.

    Parameters
    ----------
    path : str
        data source path

    Returns
    -------
    str
        name of the driver, if detected

    """
    # try to infer driver from path
    drivers = _get_drivers_for_path(path)

    if len(drivers) == 0:
        raise ValueError(
            f"Could not infer driver from path: {path}; please specify driver "
            "explicitly"
        )

    # if there are multiple drivers detected, user needs to specify the correct
    # one manually
    elif len(drivers) > 1:
        raise ValueError(
            f"Could not infer driver from path: {path}; multiple drivers are "
            f"available for that extension: {', '.join(drivers)}.  Please "
            "specify driver explicitly."
        )

    return drivers[0]


def list_layers(path_or_buffer, /):
    """List layers available in an OGR data source.

    NOTE: includes both spatial and nonspatial layers.

    Parameters
    ----------
    path_or_buffer : str, pathlib.Path, bytes, or file-like
        A dataset path or URI, raw buffer, or file-like object with a read method.

    Returns
    -------
    ndarray shape (2, n)
        array of pairs of [<layer name>, <layer geometry type>]
        Note: geometry is `None` for nonspatial layers.

    """
    return ogr_list_layers(get_vsi_path_or_buffer(path_or_buffer))


def read_bounds(
    path_or_buffer,
    /,
    layer=None,
    skip_features=0,
    max_features=None,
    where=None,
    bbox=None,
    mask=None,
):
    """Read bounds of each feature.

    This can be used to assist with spatial indexing and partitioning, in
    order to avoid reading all features into memory.  It is roughly 2-3x faster
    than reading the full geometry and attributes of a dataset.

    Parameters
    ----------
    path_or_buffer : str, pathlib.Path, bytes, or file-like
        A dataset path or URI, raw buffer, or file-like object with a read method.
    layer : int or str, optional (default: first layer)
        If an integer is provided, it corresponds to the index of the layer
        with the data source.  If a string is provided, it must match the name
        of the layer in the data source.  Defaults to first layer in data source.
    skip_features : int, optional (default: 0)
        Number of features to skip from the beginning of the file before returning
        features.  Must be less than the total number of features in the file.
    max_features : int, optional (default: None)
        Number of features to read from the file.  Must be less than the total
        number of features in the file minus ``skip_features`` (if used).
    where : str, optional (default: None)
        Where clause to filter features in layer by attribute values.  Uses a
        restricted form of SQL WHERE clause, defined here:
        http://ogdi.sourceforge.net/prop/6.2.CapabilitiesMetadata.html
        Examples: ``"ISO_A3 = 'CAN'"``, ``"POP_EST > 10000000 AND POP_EST < 100000000"``
    bbox : tuple of (xmin, ymin, xmax, ymax), optional (default: None)
        If present, will be used to filter records whose geometry intersects this
        box.  This must be in the same CRS as the dataset.  If GEOS is present
        and used by GDAL, only geometries that intersect this bbox will be
        returned; if GEOS is not available or not used by GDAL, all geometries
        with bounding boxes that intersect this bbox will be returned.
    mask : Shapely geometry, optional (default: None)
        If present, will be used to filter records whose geometry intersects
        this geometry.  This must be in the same CRS as the dataset.  If GEOS is
        present and used by GDAL, only geometries that intersect this geometry
        will be returned; if GEOS is not available or not used by GDAL, all
        geometries with bounding boxes that intersect the bounding box of this
        geometry will be returned.  Requires Shapely >= 2.0.
        Cannot be combined with ``bbox`` keyword.

    Returns
    -------
    tuple of (fids, bounds)
        fids are global IDs read from the FID field of the dataset
        bounds are ndarray of shape(4, n) containing ``xmin``, ``ymin``, ``xmax``,
        ``ymax``

    """
    return ogr_read_bounds(
        get_vsi_path_or_buffer(path_or_buffer),
        layer=layer,
        skip_features=skip_features,
        max_features=max_features or 0,
        where=where,
        bbox=bbox,
        mask=_mask_to_wkb(mask),
    )


def read_info(
    path_or_buffer,
    /,
    layer=None,
    encoding=None,
    force_feature_count=False,
    force_total_bounds=False,
    **kwargs,
):
    """Read information about an OGR data source.

    ``crs``, ``geometry`` and ``total_bounds`` will be ``None`` and ``features`` will be
    0 for a nonspatial layer.

    ``features`` will be -1 if this is an expensive operation for this driver. You can
    force it to be calculated using the ``force_feature_count`` parameter.

    ``total_bounds`` is the 2-dimensional extent of all features within the dataset:
    (xmin, ymin, xmax, ymax). It will be None if this is an expensive operation for this
    driver or if the data source is nonspatial. You can force it to be calculated using
    the ``force_total_bounds`` parameter.

    ``fid_column`` is the name of the FID field in the data source, if the FID is
    physically stored (e.g. in GPKG). If the FID is just a sequence, ``fid_column``
    will be "" (e.g. ESRI Shapefile).

    ``geometry_name`` is the name of the field where the main geometry is stored in the
    data data source, if the field name can by customized (e.g. in GPKG). If no custom
    name is supported, ``geometry_name`` will be "" (e.g. ESRI Shapefile).

    ``encoding`` will be ``UTF-8`` if either the native encoding is likely to be
    ``UTF-8`` or GDAL can automatically convert from the detected native encoding
    to ``UTF-8``.

    Parameters
    ----------
    path_or_buffer : str, pathlib.Path, bytes, or file-like
        A dataset path or URI, raw buffer, or file-like object with a read method.
    layer : [type], optional
        Name or index of layer in data source.  Reads the first layer by default.
    encoding : [type], optional (default: None)
        If present, will be used as the encoding for reading string values from
        the data source, unless encoding can be inferred directly from the data
        source.
    force_feature_count : bool, optional (default: False)
        True if the feature count should be computed even if it is expensive.
    force_total_bounds : bool, optional (default: False)
        True if the total bounds should be computed even if it is expensive.
    **kwargs
        Additional driver-specific dataset open options passed to OGR.  Invalid
        options will trigger a warning.

    Returns
    -------
    dict
        A dictionary with the following keys::

            {
                "layer_name": "<layer name>",
                "crs": "<crs>",
                "fields": <ndarray of field names>,
                "dtypes": <ndarray of field dtypes>,
                "encoding": "<encoding>",
                "fid_column": "<fid column name or "">",
                "geometry_name": "<geometry column name or "">",
                "geometry_type": "<geometry type>",
                "features": <feature count or -1>,
                "total_bounds": <tuple with total bounds or None>,
                "driver": "<driver>",
                "capabilities": "<dict of driver capabilities>"
                "dataset_metadata": "<dict of dataset metadata or None>"
                "layer_metadata": "<dict of layer metadata or None>"
            }

    """
    dataset_kwargs = _preprocess_options_key_value(kwargs) if kwargs else {}

    return ogr_read_info(
        get_vsi_path_or_buffer(path_or_buffer),
        layer=layer,
        encoding=encoding,
        force_feature_count=force_feature_count,
        force_total_bounds=force_total_bounds,
        dataset_kwargs=dataset_kwargs,
    )


def set_gdal_config_options(options):
    """Set GDAL configuration options.

    Options are listed here: https://trac.osgeo.org/gdal/wiki/ConfigOptions

    No error is raised if invalid option names are provided.

    These options are applied for an entire session rather than for individual
    functions.

    Parameters
    ----------
    options : dict
        If present, provides a mapping of option name / value pairs for GDAL
        configuration options.  ``True`` / ``False`` are normalized to ``'ON'``
        / ``'OFF'``. A value of ``None`` for a config option can be used to clear out a
        previously set value.

    """
    _set_gdal_config_options(options)


def get_gdal_config_option(name):
    """Get the value for a GDAL configuration option.

    Parameters
    ----------
    name : str
        name of the option to retrive

    Returns
    -------
    value of the option or None if not set
        ``'ON'`` / ``'OFF'`` are normalized to ``True`` / ``False``.

    """
    return _get_gdal_config_option(name)


def get_gdal_data_path():
    """Get the path to the directory GDAL uses to read data files.

    Returns
    -------
    str, or None if data directory was not found

    """
    return _get_gdal_data_path()


def vsi_listtree(path: Union[str, Path], pattern: Optional[str] = None):
    """Recursively list the contents of a VSI directory.

    An fnmatch pattern can be specified to filter the directories/files
    returned.

    Parameters
    ----------
    path : str or pathlib.Path
        Path to the VSI directory to be listed.
    pattern : str, optional
        Pattern to filter results, in fnmatch format.

    """
    if isinstance(path, Path):
        path = path.as_posix()

    return ogr_vsi_listtree(path, pattern=pattern)


def vsi_rmtree(path: Union[str, Path]):
    """Recursively remove VSI directory.

    Parameters
    ----------
    path : str or pathlib.Path
        path to the VSI directory to be removed.

    """
    if isinstance(path, Path):
        path = path.as_posix()

    ogr_vsi_rmtree(path)


def vsi_unlink(path: Union[str, Path]):
    """Remove a VSI file.

    Parameters
    ----------
    path : str or pathlib.Path
        path to vsimem file to be removed

    """
    if isinstance(path, Path):
        path = path.as_posix()

    ogr_vsi_unlink(path)
