#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import warnings
from typing import TYPE_CHECKING, Any, Literal

from typing_extensions import TypeAlias, overload

import paddle
from paddle import _C_ops
from paddle.framework import (
    in_dynamic_mode,
    in_dynamic_or_pir_mode,
)
from paddle.utils.decorator_utils import (
    param_two_alias,
    param_two_alias_one_default,
    use_first_signature,
)

from ..base.data_feeder import check_type, check_variable_and_dtype
from ..common_ops_import import Variable
from ..framework import LayerHelper, convert_np_dtype_to_dtype_, core
from .manipulation import cast
from .math import _get_reduce_axis_with_tensor

if TYPE_CHECKING:
    from collections.abc import Sequence

    from paddle import Tensor
    from paddle._typing import DTypeLike

_Interpolation: TypeAlias = Literal[
    'linear', 'higher', 'lower', 'midpoint', 'nearest'
]
__all__ = []


@param_two_alias(["x", "input"], ["axis", "dim"])
def mean(
    x: Tensor,
    axis: int | Sequence[int] | None = None,
    keepdim: bool = False,
    name: str | None = None,
    *,
    dtype: DTypeLike | None = None,
    out: Tensor | None = None,
) -> Tensor:
    """
    Computes the mean of the input tensor's elements along ``axis``.

    Args:
        x (Tensor): The input Tensor with data type bool, bfloat16, float16, float32,
            float64, int32, int64, complex64, complex128.
            alias: ``input``
        axis (int|list|tuple|None, optional): The axis along which to perform mean
            calculations. ``axis`` should be int, list(int) or tuple(int). If
            ``axis`` is a list/tuple of dimension(s), mean is calculated along
            all element(s) of ``axis`` . ``axis`` or element(s) of ``axis``
            should be in range [-D, D), where D is the dimensions of ``x`` . If
            ``axis`` or element(s) of ``axis`` is less than 0, it works the
            same way as :math:`axis + D` . If ``axis`` is None, mean is
            calculated over all elements of ``x``. Default is None.
            alias: ``dim``
        keepdim (bool, optional): Whether to reserve the reduced dimension(s)
            in the output Tensor. If ``keepdim`` is True, the dimensions of
            the output Tensor is the same as ``x`` except in the reduced
            dimensions(it is of size 1 in this case). Otherwise, the shape of
            the output Tensor is squeezed in ``axis`` . Default is False.
        name (str|None, optional): Name for the operation (optional, default is None).
            For more information, please refer to :ref:`api_guide_Name`.
        dtype (str): The desired data type of returned tensor. Default: None.
        out(Tensor|None, optional): The output tensor. Default: None.

    Returns:
        Tensor, results of average along ``axis`` of ``x``, with the same data
        type as ``x``.

    Examples:
        .. code-block:: pycon

            >>> import paddle

            >>> x = paddle.to_tensor(
            ...     [
            ...         [
            ...             [1.0, 2.0, 3.0, 4.0],
            ...             [5.0, 6.0, 7.0, 8.0],
            ...             [9.0, 10.0, 11.0, 12.0],
            ...         ],
            ...         [
            ...             [13.0, 14.0, 15.0, 16.0],
            ...             [17.0, 18.0, 19.0, 20.0],
            ...             [21.0, 22.0, 23.0, 24.0],
            ...         ],
            ...     ]
            ... )
            >>> out1 = paddle.mean(x)
            >>> print(out1.numpy())
            12.5
            >>> out2 = paddle.mean(x, axis=-1)
            >>> print(out2.numpy())
            [[ 2.5  6.5 10.5]
             [14.5 18.5 22.5]]
            >>> out3 = paddle.mean(x, axis=-1, keepdim=True)
            >>> print(out3.numpy())
            [[[ 2.5]
              [ 6.5]
              [10.5]]
             [[14.5]
              [18.5]
              [22.5]]]
            >>> out4 = paddle.mean(x, axis=[0, 2])
            >>> print(out4.numpy())
            [ 8.5 12.5 16.5]
            >>> out5 = paddle.mean(x, dtype='float64')
            >>> out5
            Tensor(shape=[], dtype=float64, place=Place(cpu), stop_gradient=True,
                12.50000000)
    """
    if dtype is not None:
        if not isinstance(dtype, (core.VarDesc.VarType, core.DataType)):
            dtype = convert_np_dtype_to_dtype_(dtype)
        if x.dtype != dtype:
            x = cast(x, dtype)

    if in_dynamic_or_pir_mode():
        return _C_ops.mean(x, axis, keepdim, out=out)
    else:
        reduce_all, axis = _get_reduce_axis_with_tensor(axis, x)
        check_variable_and_dtype(
            x,
            'x/input',
            [
                'bool',
                'uint16',
                'float16',
                'float32',
                'float64',
                'int32',
                'int64',
                'complex64',
                'complex128',
            ],
            'mean/reduce_mean',
        )
        check_type(
            axis, 'axis/dim', (int, list, tuple, Variable), 'mean/reduce_mean'
        )
        if isinstance(axis, (list, tuple)):
            for item in axis:
                check_type(
                    item,
                    'elements of axis/dim',
                    (int, Variable),
                    'mean/reduce_mean',
                )

        helper = LayerHelper('mean', **locals())

        attrs = {'dim': axis, 'keep_dim': keepdim, 'reduce_all': reduce_all}
        out_tensor = helper.create_variable_for_type_inference(x.dtype)
        helper.append_op(
            type='reduce_mean',
            inputs={'X': x},
            outputs={'Out': out_tensor},
            attrs=attrs,
        )
        return out_tensor


@param_two_alias(["x", "input"], ["axis", "dim"])
def var(
    x: Tensor,
    axis: int | Sequence[int] | None = None,
    unbiased: bool | None = None,
    keepdim: bool = False,
    name: str | None = None,
    *,
    correction: float = 1,
    out: Tensor | None = None,
) -> Tensor:
    """
    Computes the variance of ``x`` along ``axis`` .

    .. note::
        Alias Support: The parameter name ``input`` can be used as an alias for ``x``, and ``dim`` can be used as an alias for ``axis``.
        For example, ``var(input=tensor_x, dim=1, ...)`` is equivalent to ``var(x=tensor_x, axis=1, ...)``.

    Args:
        x (Tensor): The input Tensor with data type float16, float32, float64.
            alias: ``input``.
        axis (int|list|tuple|None, optional): The axis along which to perform variance calculations. ``axis`` should be int, list(int) or tuple(int).
            alias: ``dim``.

            - If ``axis`` is a list/tuple of dimension(s), variance is calculated along all element(s) of ``axis`` . ``axis`` or element(s) of ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
            - If ``axis`` or element(s) of ``axis`` is less than 0, it works the same way as :math:`axis + D` .
            - If ``axis`` is None, variance is calculated over all elements of ``x``. Default is None.

        unbiased (bool, optional): Whether to use the unbiased estimation. If ``unbiased`` is True, the divisor used in the computation is :math:`N - 1`, where :math:`N` represents the number of elements along ``axis`` , otherwise the divisor is :math:`N`. Default is True.
        keep_dim (bool, optional): Whether to reserve the reduced dimension in the output Tensor. The result tensor will have one fewer dimension than the input unless keep_dim is true. Default is False.
        name (str|None, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
        correction (int|float, optional): Difference between the sample size and sample degrees of freedom.
            Defaults to 1 (Bessel's correction). If unbiased is specified, this parameter is ignored.
        out (Tensor|None, optional): Output tensor. Default is None.

    Returns:
        Tensor, results of variance along ``axis`` of ``x``, with the same data type as ``x``.

    Examples:
        .. code-block:: pycon

            >>> import paddle

            >>> x = paddle.to_tensor([[1.0, 2.0, 3.0], [1.0, 4.0, 5.0]])
            >>> out1 = paddle.var(x)
            >>> print(out1.numpy())
            2.6666667
            >>> out2 = paddle.var(x, axis=1)
            >>> print(out2.numpy())
            [1.         4.3333335]
    """
    if unbiased is not None and correction != 1:
        raise ValueError("Only one of unbiased and correction may be given")

    if unbiased is not None:
        actual_correction = 1.0 if unbiased else 0.0
    else:
        actual_correction = float(correction)

    if paddle.is_compiled_with_cuda() and in_dynamic_or_pir_mode():
        return _C_ops.var(
            x,
            axis if axis is not None else [],
            keepdim,
            unbiased,
            actual_correction,
            out=out,
        )

    if not in_dynamic_mode():
        check_variable_and_dtype(
            x, 'x', ['float16', 'float32', 'float64'], 'var'
        )

    u = mean(x, axis, True, name)
    dtype = paddle.float32 if x.dtype == paddle.float16 else x.dtype
    out_tensor = paddle.sum(
        paddle.pow((x - u), 2), axis, keepdim=keepdim, name=name, dtype=dtype
    )

    n = paddle.cast(paddle.numel(x), "int64") / paddle.cast(
        paddle.numel(out_tensor), "int64"
    )
    n = n.astype(dtype)

    if actual_correction != 0:
        corrected_n = n - actual_correction
        corrected_n = paddle.maximum(
            corrected_n, paddle.zeros_like(corrected_n)
        )
        if paddle.in_dynamic_mode() and paddle.any(corrected_n <= 0):
            warnings.warn("Degrees of freedom is <= 0.", stacklevel=2)
    else:
        corrected_n = n

    corrected_n.stop_gradient = True
    out_tensor /= corrected_n

    def _replace_nan(out):
        indices = paddle.arange(out.numel(), dtype='int64')
        out_nan = paddle.index_fill(
            out.flatten(), indices, 0, float('nan')
        ).reshape(out.shape)
        return out_nan

    if 0 in x.shape:
        out_tensor = _replace_nan(out_tensor)
    if len(x.shape) == 0 and actual_correction == 0:
        out_tensor = paddle.to_tensor(0, stop_gradient=out_tensor.stop_gradient)

    if out_tensor.dtype != x.dtype:
        result = out_tensor.astype(x.dtype)
    else:
        result = out_tensor

    if out is not None:
        paddle.assign(result, out)
        return out

    return result


@overload
def std(
    x: Tensor,
    axis: int | Sequence[int] | None = None,
    unbiased: bool | None = None,
    keepdim: bool = False,
    name: str | None = None,
    *,
    correction: float = 1,
    out: Tensor | None = None,
) -> Tensor: ...


@overload
def std(
    input: Tensor,
    dim: int | Sequence[int] | None = None,
    *,
    correction: float = 1,
    keepdim: bool = False,
    out: Tensor | None = None,
) -> Tensor: ...


@use_first_signature
def std(*args: Any, **kwargs: Any) -> Tensor:
    """
    Computes the standard-deviation of ``x`` along ``axis`` .

    .. note::
        Alias Support:
        1. The parameter name ``input`` can be used as an alias for ``x``.
        2. The parameter name ``dim`` can be used as an alias for ``axis``.

    Args:
        x (Tensor): The input Tensor with data type float16, float32, float64.
        axis (int|list|tuple|None, optional): The axis along which to perform
            standard-deviation calculations. ``axis`` should be int, list(int)
            or tuple(int). If ``axis`` is a list/tuple of dimension(s),
            standard-deviation is calculated along all element(s) of ``axis`` .
            ``axis`` or element(s) of ``axis`` should be in range [-D, D),
            where D is the dimensions of ``x`` . If ``axis`` or element(s) of
            ``axis`` is less than 0, it works the same way as :math:`axis + D` .
            If ``axis`` is None, standard-deviation is calculated over all
            elements of ``x``. Default is None.
        unbiased (bool, optional): Whether to use the unbiased estimation. If
            ``unbiased`` is True, the standard-deviation is calculated via the
            unbiased estimator. If ``unbiased`` is True,  the divisor used in
            the computation is :math:`N - 1`, where :math:`N` represents the
            number of elements along ``axis`` , otherwise the divisor is
            :math:`N`. Default is True.
        keepdim (bool, optional): Whether to reserve the reduced dimension(s)
            in the output Tensor. If ``keepdim`` is True, the dimensions of
            the output Tensor is the same as ``x`` except in the reduced
            dimensions(it is of size 1 in this case). Otherwise, the shape of
            the output Tensor is squeezed in ``axis`` . Default is False.
        name (str|None, optional): Name for the operation (optional, default is None).
            For more information, please refer to :ref:`api_guide_Name`.
        correction (int|float, optional): Difference between the sample size and sample degrees of freedom.
            Defaults to 1 (Bessel's correction). If unbiased is specified, this parameter is ignored.
        out (Tensor|None, optional): Output tensor. Default is None.

    Returns:
        Tensor, results of standard-deviation along ``axis`` of ``x``, with the
        same data type as ``x``.

    Examples:
        .. code-block:: pycon

            >>> import paddle

            >>> x = paddle.to_tensor([[1.0, 2.0, 3.0], [1.0, 4.0, 5.0]])
            >>> out1 = paddle.std(x)
            >>> print(out1.numpy())
            1.6329932

            >>> out2 = paddle.std(x, unbiased=False)
            >>> print(out2.numpy())
            1.490712

            >>> out3 = paddle.std(x, axis=1)
            >>> print(out3.numpy())
            [1.       2.081666]

            >>> out4 = paddle.std(x=x, keepdim=True, correction=1.5)
            >>> print(out4.numpy())
            [[1.721326]]

            >>> out5 = paddle.std(input=x, dim=[0, 1])
            >>> print(out5.numpy())
            1.6329932

    """
    if paddle.is_compiled_with_cuda() and in_dynamic_or_pir_mode():
        x = args[0] if len(args) > 0 else kwargs.get('x', kwargs.get('input'))
        axis = (
            args[1]
            if len(args) > 1
            else kwargs.get('axis', kwargs.get('dim', None))
        )
        unbiased = args[2] if len(args) > 2 else kwargs.get('unbiased', None)
        keepdim = args[3] if len(args) > 3 else kwargs.get('keepdim', False)
        correction = kwargs.get('correction', 1.0)
        out = kwargs.get('out', None)

        axis = axis if axis is not None else []
        if unbiased is not None:
            correction = 1.0 if unbiased else 0.0
        else:
            correction = float(correction)
        return _C_ops.std(x, axis, keepdim, unbiased, correction, out=out)

    variance = var(*args, **kwargs)
    if 'out' in kwargs:
        return paddle.sqrt(variance, out=kwargs['out'])
    return paddle.sqrt(variance)


def numel(x: Tensor, name: str | None = None) -> Tensor:
    """
    Returns the number of elements for a tensor, which is a 0-D int64 Tensor with shape [].

    Args:
        x (Tensor): The input Tensor, it's data type can be bool, float16, float32, float64, uint8, int8, int32, int64, complex64, complex128.
        name (str|None, optional): Name for the operation (optional, default is None).
            For more information, please refer to :ref:`api_guide_Name`.

    Returns:
        Tensor: The number of elements for the input Tensor, whose shape is [].

    Examples:
        .. code-block:: pycon

            >>> import paddle

            >>> x = paddle.full(shape=[4, 5, 7], fill_value=0, dtype='int32')
            >>> numel = paddle.numel(x)
            >>> print(numel.numpy())
            140


    """
    if in_dynamic_or_pir_mode():
        return _C_ops.numel(x)
    else:
        if not isinstance(x, Variable):
            raise TypeError("x must be a Tensor in numel")
        helper = LayerHelper('numel', **locals())
        out = helper.create_variable_for_type_inference(
            dtype=core.VarDesc.VarType.INT64
        )
        helper.append_op(type='size', inputs={'Input': x}, outputs={'Out': out})
        return out


@overload
def nanmedian(
    x: Tensor,
    axis: int,
    keepdim: bool = ...,
    mode: Literal['min'] = ...,
    name: str | None = ...,
) -> tuple[Tensor, Tensor]: ...


@overload
def nanmedian(
    x: Tensor,
    axis: int | Sequence[int] | None = ...,
    keepdim: bool = ...,
    mode: Literal['avg', 'min'] = ...,
    name: str | None = ...,
) -> Tensor: ...


def nanmedian(
    x,
    axis=None,
    keepdim=False,
    mode='avg',
    name=None,
):
    r"""
    Compute the median along the specified axis, while ignoring NaNs.

    If the valid count of elements is a even number,
    the average value of both elements in the middle is calculated as the median.

    Args:
        x (Tensor): The input Tensor, it's data type can be int32, int64, float16, bfloat16, float32, float64.
        axis (None|int|list|tuple, optional):
            The axis along which to perform median calculations ``axis`` should be int or list of int.
            ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
            If ``axis`` is less than 0, it works the same way as :math:`axis + D`.
            If ``axis`` is None, median is calculated over all elements of ``x``. Default is None.
        keepdim (bool, optional): Whether to reserve the reduced dimension(s)
            in the output Tensor. If ``keepdim`` is True, the dimensions of
            the output Tensor is the same as ``x`` except in the reduced
            dimensions(it is of size 1 in this case). Otherwise, the shape of
            the output Tensor is squeezed in ``axis`` . Default is False.
        mode (str, optional): Whether to use mean or min operation to calculate
            the nanmedian values when the input tensor has an even number of non-NaN elements
            along the dimension ``axis``. Support 'avg' and 'min'. Default is 'avg'.
        name (str|None, optional): Name for the operation (optional, default is None).
            For more information, please refer to :ref:`api_guide_Name`.

    Returns:
        Tensor or tuple of Tensor. If ``mode`` == 'min' and ``axis`` is int, the result
        will be a tuple of two tensors (nanmedian value and nanmedian index). Otherwise,
        only nanmedian value will be returned.

    Examples:
        .. code-block:: pycon

            >>> import paddle
            >>> x = paddle.to_tensor(
            ...     [
            ...         [float('nan'), 2.0, 3.0],
            ...         [0.0, 1.0, 2.0],
            ...     ]
            ... )

            >>> y1 = x.nanmedian()
            >>> print(y1.numpy())
            2.0

            >>> y2 = x.nanmedian(0)
            >>> print(y2.numpy())
            [0.  1.5 2.5]

            >>> y3 = x.nanmedian(0, keepdim=True)
            >>> print(y3.numpy())
            [[0.  1.5 2.5]]

            >>> y4 = x.nanmedian((0, 1))
            >>> print(y4.numpy())
            2.0

            >>> y5 = x.nanmedian(mode='min')
            >>> print(y5.numpy())
            2.0

            >>> y6, y6_index = x.nanmedian(0, mode='min')
            >>> print(y6.numpy())
            [0. 1. 2.]
            >>> print(y6_index.numpy())
            [1 1 1]

            >>> y7, y7_index = x.nanmedian(1, mode='min')
            >>> print(y7.numpy())
            [2. 1.]
            >>> print(y7_index.numpy())
            [1 1]

            >>> y8 = x.nanmedian((0, 1), mode='min')
            >>> print(y8.numpy())
            2.0
    """
    if not isinstance(x, (Variable, paddle.pir.Value)):
        raise TypeError("In median, the input x should be a Tensor.")

    if isinstance(axis, (list, tuple)) and len(axis) == 0:
        raise ValueError("Axis list should not be empty.")

    if mode not in ('avg', 'min'):
        raise ValueError(f"Mode {mode} is not supported. Must be avg or min.")

    need_index = (axis is not None) and (not isinstance(axis, (list, tuple)))
    if axis is None:
        axis = []
    elif isinstance(axis, tuple):
        axis = list(axis)
    elif isinstance(axis, int):
        axis = [axis]

    if in_dynamic_or_pir_mode():
        out, indices = _C_ops.nanmedian(x, axis, keepdim, mode)
        indices.stop_gradient = True
    else:
        check_variable_and_dtype(
            x,
            'X',
            ['int32', 'int64', 'float16', 'float32', 'float64', 'uint16'],
            'nanmedian',
        )

        helper = LayerHelper('nanmedian', **locals())
        attrs = {'axis': axis, 'keepdim': keepdim, 'mode': mode}
        out = helper.create_variable_for_type_inference(x.dtype)
        indices = helper.create_variable_for_type_inference(paddle.int64)
        helper.append_op(
            type='nanmedian',
            inputs={'X': x},
            outputs={'Out': out, 'MedianIndex': indices},
            attrs=attrs,
        )
        indices.stop_gradient = True
    if mode == 'min' and need_index:
        return out, indices
    else:
        return out


@overload
def median(
    x: Tensor,
    axis: int = ...,
    keepdim: bool = ...,
    mode: Literal['min'] = ...,
    name: str | None = ...,
    *,
    out: tuple[Tensor, Tensor] | None = ...,
) -> tuple[Tensor, Tensor]: ...


@overload
def median(
    x: Tensor,
    axis: int | None = ...,
    keepdim: bool = ...,
    mode: Literal['avg', 'min'] = ...,
    name: str | None = ...,
) -> Tensor: ...


@param_two_alias_one_default(["x", "input"], ["axis", "dim"], ["mode", 'min'])
def median(
    x,
    axis=None,
    keepdim=False,
    mode='avg',
    name=None,
    *,
    out=None,
):
    """
    Compute the median along the specified axis.

    .. note::
        Alias Support: The parameter name ``input`` can be used as an alias for ``x``, and ``dim`` can be used as an alias for ``axis``.
        When an alias replacement occurs, the default parameter for mode setting is min instead of avg.
        For example, ``median(input=tensor_x, dim=1, ...)`` is equivalent to ``median(x=tensor_x, axis=1, ...)``.

    Args:
        x (Tensor): The input Tensor, it's data type can be bfloat16, float16, float32, float64, int32, int64.
            alias: ``input``.
        axis (int|None, optional): The axis along which to perform median calculations ``axis`` should be int.
            alias: ``dim``.
            ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
            If ``axis`` is less than 0, it works the same way as :math:`axis + D`.
            If ``axis`` is None, median is calculated over all elements of ``x``. Default is None.
        keepdim (bool, optional): Whether to reserve the reduced dimension(s)
            in the output Tensor. If ``keepdim`` is True, the dimensions of
            the output Tensor is the same as ``x`` except in the reduced
            dimensions(it is of size 1 in this case). Otherwise, the shape of
            the output Tensor is squeezed in ``axis`` . Default is False.
        mode (str, optional): Whether to use mean or min operation to calculate
            the median values when the input tensor has an even number of elements
            in the dimension ``axis``. Support 'avg' and 'min'. Default is 'avg'.
            When an alias replacement occurs, the default parameter for mode setting is min instead of avg.
        name (str|None, optional): Name for the operation (optional, default is None).
            For more information, please refer to :ref:`api_guide_Name`.

    Returns:
        Tensor or tuple of Tensor.
        If ``mode`` == 'avg', the result will be the tensor of median values;
        If ``mode`` == 'min' and ``axis`` is None, the result will be the tensor of median values;
        If ``mode`` == 'min' and ``axis`` is not None, the result will be a tuple of two tensors
        containing median values and their indices.

        When ``mode`` == 'avg', if data type of ``x`` is float64, data type of median values will be float64,
        otherwise data type of median values will be float32.
        When ``mode`` == 'min', the data type of median values will be the same as ``x``. The data type of
        indices will be int64.

    Examples:
        .. code-block:: pycon

            >>> import paddle
            >>> import numpy as np

            >>> x = paddle.arange(12).reshape([3, 4])
            >>> print(x)
            Tensor(shape=[3, 4], dtype=int64, place=Place(cpu), stop_gradient=True,
            [[0 , 1 , 2 , 3 ],
             [4 , 5 , 6 , 7 ],
             [8 , 9 , 10, 11]])

            >>> y1 = paddle.median(x)
            >>> print(y1)
            Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
            5.50000000)

            >>> y2 = paddle.median(x, axis=0)
            >>> print(y2)
            Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
            [4., 5., 6., 7.])

            >>> y3 = paddle.median(x, axis=1)
            >>> print(y3)
            Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
            [1.50000000, 5.50000000, 9.50000000])

            >>> y4 = paddle.median(x, axis=0, keepdim=True)
            >>> print(y4)
            Tensor(shape=[1, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
            [[4., 5., 6., 7.]])

            >>> y5 = paddle.median(x, mode='min')
            >>> print(y5)
            Tensor(shape=[], dtype=int64, place=Place(cpu), stop_gradient=True,
            5)

            >>> median_value, median_indices = paddle.median(x, axis=1, mode='min')
            >>> print(median_value)
            Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True,
            [1, 5, 9])
            >>> print(median_indices)
            Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True,
            [1, 1, 1])

            >>> # cases containing nan values
            >>> x = paddle.to_tensor(
            ...     np.array(
            ...         [
            ...             [1, float('nan'), 3, float('nan')],
            ...             [1, 2, 3, 4],
            ...             [float('nan'), 1, 2, 3],
            ...         ]
            ...     )
            ... )

            >>> y6 = paddle.median(x, axis=-1, keepdim=True)
            >>> print(y6)
            Tensor(shape=[3, 1], dtype=float64, place=Place(cpu), stop_gradient=True,
            [[nan       ],
             [2.50000000],
             [nan       ]])

            >>> median_value, median_indices = paddle.median(x, axis=1, keepdim=True, mode='min')
            >>> print(median_value)
            Tensor(shape=[3, 1], dtype=float64, place=Place(cpu), stop_gradient=True,
            [[nan],
             [2. ],
             [nan]])
            >>> print(median_indices)
            Tensor(shape=[3, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
            [[1],
             [1],
             [0]])
    """
    if not isinstance(x, (Variable, paddle.pir.Value)):
        raise TypeError("In median, the input x should be a Tensor.")

    if isinstance(axis, (list, tuple)) and len(axis) == 0:
        raise ValueError("Axis list should not be empty.")
    dims = len(x.shape)
    if dims == 0:
        assert axis in [
            -1,
            0,
            None,
        ], 'when input 0-D, axis can only be [-1, 0] or default None'
    elif axis is not None:
        if not isinstance(axis, int) or not (axis < dims and axis >= -dims):
            raise ValueError(
                "In median, axis should be none or an integer in range [-rank(x), rank(x))."
            )

    if mode not in ('avg', 'min'):
        raise ValueError(f"Mode {mode} is not supported. Must be avg or min.")
    need_idx = axis is not None
    if axis is None:
        is_flatten = True

    if axis is None:
        axis = []
    elif isinstance(axis, int):
        axis = [axis]

    if mode == "avg" and not x.dtype == paddle.float64:
        x = x.astype(paddle.float32)

    values, indices = _C_ops.median(x, axis, keepdim, mode, out=out)
    indices.stop_gradient = True

    if mode == 'min' and need_idx:
        return values, indices
    else:
        return values


def _compute_quantile(
    x: Tensor,
    q: float | Sequence[float] | Tensor | None,
    axis: int | list[int] | None = None,
    keepdim: bool = False,
    interpolation: _Interpolation = "linear",
    ignore_nan: bool = False,
    out: Tensor | None = None,
) -> Tensor:
    """
    Compute the quantile of the input along the specified axis.

    Args:
        x (Tensor): The input Tensor, it's data type can be float32, float64, int32, int64.
        q (int|float|list|Tensor): The q for calculate quantile, which should be in range [0, 1]. If q is a list or
            a 1-D Tensor, each element of q will be calculated and the first dimension of output is same to the number of ``q`` .
            If q is a 0-D Tensor, it will be treated as an integer or float.
        axis (int|list, optional): The axis along which to calculate quantile. ``axis`` should be int or list of int.
            ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
            If ``axis`` is less than 0, it works the same way as :math:`axis + D`.
            If ``axis`` is a list, quantile is calculated over all elements of given axes.
            If ``axis`` is None, quantile is calculated over all elements of ``x``. Default is None.
        keepdim (bool, optional): Whether to reserve the reduced dimension(s)
            in the output Tensor. If ``keepdim`` is True, the dimensions of
            the output Tensor is the same as ``x`` except in the reduced
            dimensions(it is of size 1 in this case). Otherwise, the shape of
            the output Tensor is squeezed in ``axis`` . Default is False.
        interpolation (str, optional): The interpolation method to use
            when the desired quantile falls between two data points. Must be one of linear, higher,
            lower, midpoint and nearest. Default is linear.
        ignore_nan: (bool, optional): Whether to ignore NaN of input Tensor.
            If ``ignore_nan`` is True, it will calculate nanquantile.
            Otherwise it will calculate quantile. Default is False.
        out (Tensor|None, optional): The output tensor. Default: None.

    Returns:
        Tensor, results of quantile along ``axis`` of ``x``.
        In order to obtain higher precision, data type of results will be float64.
    """
    # Validate x
    if not isinstance(x, (Variable, paddle.pir.Value)):
        raise TypeError("input x should be a Tensor.")

    # Validate q
    if isinstance(q, (int, float)):
        q = [q]
    elif isinstance(q, (list, tuple)):
        if len(q) <= 0:
            raise ValueError("q should not be empty")
    elif isinstance(q, (Variable, paddle.pir.Value)):
        if len(q.shape) > 1:
            raise ValueError("q should be a 0-D tensor or a 1-D tensor")
        if len(q.shape) == 0:
            q = [q]
    else:
        raise TypeError(
            "Type of q should be int, float, list or tuple, or tensor"
        )
    for q_num in q:
        # we do not validate tensor q in static mode
        if not in_dynamic_mode() and isinstance(
            q_num, (Variable, paddle.pir.Value)
        ):
            break
        if q_num < 0 or q_num > 1:
            raise ValueError("q should be in range [0, 1]")

    if interpolation not in [
        "linear",
        "lower",
        "higher",
        "nearest",
        "midpoint",
    ]:
        raise ValueError(
            f"interpolation must be one of 'linear', 'lower', 'higher', 'nearest' or 'midpoint', but got {interpolation}"
        )
    # Validate axis
    dims = len(x.shape)
    out_shape = list(x.shape)
    if axis is None:
        x = paddle.flatten(x)
        axis = 0
        out_shape = [1] * dims
    else:
        if isinstance(axis, list):
            axis_src, axis_dst = [], []
            for axis_single in axis:
                if not isinstance(axis_single, int) or not (
                    axis_single < dims and axis_single >= -dims
                ):
                    raise ValueError(
                        "Axis should be None, int, or a list, element should in range [-rank(x), rank(x))."
                    )
                if axis_single < 0:
                    axis_single = axis_single + dims
                axis_src.append(axis_single)
                out_shape[axis_single] = 1

            axis_dst = list(range(-len(axis), 0))
            x = paddle.moveaxis(x, axis_src, axis_dst)
            if len(axis_dst) == 0:
                x = paddle.flatten(x)
                axis = 0
            else:
                x = paddle.flatten(x, axis_dst[0], axis_dst[-1])
                axis = axis_dst[0]
        else:
            if not isinstance(axis, int) or not (axis < dims and axis >= -dims):
                raise ValueError(
                    "Axis should be None, int, or a list, element should in range [-rank(x), rank(x))."
                )
            if axis < 0:
                axis += dims
            out_shape[axis] = 1

    mask = x.isnan()
    valid_counts = mask.logical_not().sum(axis=axis, keepdim=True)

    indices = []

    for q_num in q:
        if in_dynamic_or_pir_mode():
            q_num = paddle.to_tensor(q_num, dtype=x.dtype)
        if ignore_nan:
            indices.append(q_num * (valid_counts - 1))
        else:
            index = q_num * (valid_counts - 1)
            last_index = x.shape[axis] - 1
            nums = paddle.full_like(index, fill_value=last_index)
            index = paddle.where(mask.any(axis=axis, keepdim=True), nums, index)
            indices.append(index)

    sorted_tensor = paddle.sort(x, axis)

    def _compute_index(index):
        if interpolation == "nearest":
            idx = paddle.round(index).astype(paddle.int32)
            return paddle.take_along_axis(sorted_tensor, idx, axis=axis)

        indices_below = paddle.floor(index).astype(paddle.int32)
        if interpolation != "higher":
            # avoid unnecessary compute
            tensor_below = paddle.take_along_axis(
                sorted_tensor, indices_below, axis=axis
            )
        if interpolation == "lower":
            return tensor_below

        indices_upper = paddle.ceil(index).astype(paddle.int32)
        tensor_upper = paddle.take_along_axis(
            sorted_tensor, indices_upper, axis=axis
        )
        if interpolation == "higher":
            return tensor_upper

        if interpolation == "midpoint":
            return (
                tensor_upper.astype(x.dtype) + tensor_below.astype(x.dtype)
            ) / 2

        weights = (index - indices_below.astype(index.dtype)).astype(x.dtype)
        # "linear"
        return paddle.lerp(
            tensor_below.astype(x.dtype),
            tensor_upper.astype(x.dtype),
            weights,
        )

    outputs = []

    # TODO(chenjianye): replace the for-loop to directly take elements.
    for index in indices:
        ret = _compute_index(index)
        if not keepdim:
            ret = paddle.squeeze(ret, axis=axis)
        else:
            ret = ret.reshape(out_shape)
        outputs.append(ret)

    if len(outputs) > 1:
        outputs = paddle.stack(outputs, 0)
    else:
        outputs = outputs[0]

    if out is not None:
        paddle.assign(outputs, out)
        return out
    return outputs


@param_two_alias(["x", "input"], ["axis", "dim"])
def quantile(
    x: Tensor,
    q: float | Sequence[float] | Tensor,
    axis: int | list[int] | None = None,
    keepdim: bool = False,
    interpolation: _Interpolation = "linear",
    name: str | None = None,
    *,
    out: Tensor | None = None,
) -> Tensor:
    """
    Compute the quantile of the input along the specified axis.
    If any values in a reduced row are NaN, then the quantiles for that reduction will be NaN.

    Args:
        x (Tensor): The input Tensor, it's data type can be float32, float64, int32, int64.
        q (int|float|list|Tensor): The q for calculate quantile, which should be in range [0, 1]. If q is a list or
            a 1-D Tensor, each element of q will be calculated and the first dimension of output is same to the number of ``q`` .
            If q is a 0-D Tensor, it will be treated as an integer or float.
        axis (int|list, optional): The axis along which to calculate quantile. ``axis`` should be int or list of int.
            ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
            If ``axis`` is less than 0, it works the same way as :math:`axis + D`.
            If ``axis`` is a list, quantile is calculated over all elements of given axes.
            If ``axis`` is None, quantile is calculated over all elements of ``x``. Default is None.
        keepdim (bool, optional): Whether to reserve the reduced dimension(s)
            in the output Tensor. If ``keepdim`` is True, the dimensions of
            the output Tensor is the same as ``x`` except in the reduced
            dimensions(it is of size 1 in this case). Otherwise, the shape of
            the output Tensor is squeezed in ``axis`` . Default is False.
        interpolation (str, optional): The interpolation method to use
            when the desired quantile falls between two data points. Must be one of linear, higher,
            lower, midpoint and nearest. Default is linear.
        name (str, optional): Name for the operation (optional, default is None).
            For more information, please refer to :ref:`api_guide_Name`.
        out (Tensor|None, optional): The output tensor. Default: None.


    Returns:
        Tensor, results of quantile along ``axis`` of ``x``.

    Examples:
        .. code-block:: pycon

            >>> import paddle

            >>> y = paddle.arange(0, 8, dtype="float32").reshape([4, 2])
            >>> print(y)
            Tensor(shape=[4, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
            [[0., 1.],
             [2., 3.],
             [4., 5.],
             [6., 7.]])

            >>> y1 = paddle.quantile(y, q=0.5, axis=[0, 1])
            >>> print(y1)
            Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
            3.50000000)

            >>> y2 = paddle.quantile(y, q=0.5, axis=1)
            >>> print(y2)
            Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
            [0.50000000, 2.50000000, 4.50000000, 6.50000000])

            >>> y3 = paddle.quantile(y, q=[0.3, 0.5], axis=0)
            >>> print(y3)
            Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
            [[1.80000007, 2.80000019],
             [3.        , 4.        ]])

            >>> y[0, 0] = float("nan")
            >>> y4 = paddle.quantile(y, q=0.8, axis=1, keepdim=True)
            >>> print(y4)
            Tensor(shape=[4, 1], dtype=float32, place=Place(cpu), stop_gradient=True,
            [[nan       ],
             [2.79999995],
             [4.80000019],
             [6.80000019]])

    """
    return _compute_quantile(
        x,
        q,
        axis=axis,
        keepdim=keepdim,
        interpolation=interpolation,
        ignore_nan=False,
        out=out,
    )


@param_two_alias(["x", "input"], ["axis", "dim"])
def nanquantile(
    x: Tensor,
    q: float | Sequence[float] | Tensor,
    axis: list[int] | int | None = None,
    keepdim: bool = False,
    interpolation: _Interpolation = "linear",
    *,
    out: Tensor | None = None,
) -> Tensor:
    """
    Compute the quantile of the input as if NaN values in input did not exist.
    If all values in a reduced row are NaN, then the quantiles for that reduction will be NaN.

    Args:
        x (Tensor): The input Tensor, it's data type can be float32, float64, int32, int64.
            Alias: ``input``.
        q (int|float|list|Tensor): The q for calculate quantile, which should be in range [0, 1]. If q is a list or
            a 1-D Tensor, each element of q will be calculated and the first dimension of output is same to the number of ``q`` .
            If q is a 0-D Tensor, it will be treated as an integer or float.
        axis (int|list, optional): The axis along which to calculate quantile. ``axis`` should be int or list of int.
            ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
            If ``axis`` is less than 0, it works the same way as :math:`axis + D`.
            If ``axis`` is a list, quantile is calculated over all elements of given axes.
            If ``axis`` is None, quantile is calculated over all elements of ``x``. Default is None.
            Alias: ``dim``.
        keepdim (bool, optional): Whether to reserve the reduced dimension(s)
            in the output Tensor. If ``keepdim`` is True, the dimensions of
            the output Tensor is the same as ``x`` except in the reduced
            dimensions(it is of size 1 in this case). Otherwise, the shape of
            the output Tensor is squeezed in ``axis`` . Default is False.
        interpolation (str, optional): The interpolation method to use
            when the desired quantile falls between two data points. Must be one of linear, higher,
            lower, midpoint and nearest. Default is linear.

    Keyword Args:
        out (Tensor|None, optional): The output tensor. Default: None.

    Returns:
        Tensor, results of quantile along ``axis`` of ``x``.

    Examples:
        .. code-block:: pycon

            >>> import paddle

            >>> x = paddle.to_tensor(
            ...     [
            ...         [0, 1, 2, 3, 4],
            ...         [5, 6, 7, 8, 9],
            ...     ],
            ...     dtype="float32",
            ... )
            >>> x[0, 0] = float("nan")

            >>> y1 = paddle.nanquantile(x, q=0.5, axis=[0, 1])
            >>> print(y1)
            Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
            5.)

            >>> y2 = paddle.nanquantile(x, q=0.5, axis=1)
            >>> print(y2)
            Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=True,
            [2.50000000, 7.        ])

            >>> y3 = paddle.nanquantile(x, q=[0.3, 0.5], axis=0)
            >>> print(y3)
            Tensor(shape=[2, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
            [[5.        , 2.50000000, 3.50000000, 4.50000000, 5.50000000],
             [5.        , 3.50000000, 4.50000000, 5.50000000, 6.50000000]])

            >>> y4 = paddle.nanquantile(x, q=0.8, axis=1, keepdim=True)
            >>> print(y4)
            Tensor(shape=[2, 1], dtype=float32, place=Place(cpu), stop_gradient=True,
            [[3.40000010],
             [8.19999981]])

            >>> nan = paddle.full(shape=[2, 3], fill_value=float("nan"))
            >>> y5 = paddle.nanquantile(nan, q=0.8, axis=1, keepdim=True)
            >>> print(y5)
            Tensor(shape=[2, 1], dtype=float32, place=Place(cpu), stop_gradient=True,
            [[nan],
             [nan]])

    """
    return _compute_quantile(
        x,
        q,
        axis=axis,
        keepdim=keepdim,
        interpolation=interpolation,
        ignore_nan=True,
        out=out,
    )