# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import numpy as np

import paddle

from ..base.framework import in_dygraph_mode, in_pir_mode
from .initializer.constant import Constant
from .initializer.dirac import Dirac
from .initializer.initializer import calculate_gain  # noqa: F401
from .initializer.kaiming import KaimingNormal, KaimingUniform
from .initializer.normal import Normal, TruncatedNormal
from .initializer.orthogonal import Orthogonal
from .initializer.uniform import Uniform
from .initializer.xavier import XavierNormal, XavierUniform


def _calculate_fan_in_and_fan_out(var: paddle.Tensor) -> tuple[int, int]:
    """Compute the fan_in and the fan_out for layers

    This method computes the fan_in and the fan_out
    for neural network layers, if not specified. It is
    not possible to perfectly estimate fan_in and fan_out.
    This method will estimate it correctly for matrix multiply and
    convolutions.

    Args:
        var: variable for which fan_in and fan_out have to be computed.

    Returns:
        tuple of two integers (fan_in, fan_out).
    """
    shape = var.shape
    if not shape or len(shape) == 0:
        fan_in = fan_out = 1
    elif len(shape) == 1:
        fan_in = fan_out = shape[0]
    elif len(shape) == 2:
        # This is the case for simple matrix multiply
        fan_in = shape[0]
        fan_out = shape[1]
    else:
        # Assume this to be a convolutional kernel
        # In PaddlePaddle, the shape of the kernel is like:
        # [num_filters, num_filter_channels, ...] where the remaining
        # dimensions are the filter_size
        receptive_field_size = np.prod(shape[2:])
        fan_in = int(shape[1] * receptive_field_size)
        fan_out = int(shape[0] * receptive_field_size)
    return (fan_in, fan_out)


def kaiming_uniform_(
    tensor: paddle.Tensor,
    a: float = 0,
    mode: str = "fan_in",
    nonlinearity: str = "leaky_relu",
) -> paddle.Tensor | None:
    """Modify tensor inplace using Kaiming uniform method.

    Args:
        tensor (Tensor):  Paddle Tensor.
        a (float, optional): The negative slope of the rectifier used after this layer.
            Defaults to 0.
        mode (str, optional): Mode to compute the fan. Choose from ["fan_in", "fan_out"].
            When set to 'fan_in', the fan_in parameter is used for initialization.
            When set to 'fan_out', the out_features of trainable Tensor will be used.
            Default is 'fan_in'.
        nonlinearity (str, optional): Nonlinearity method name. Defaults to "leaky_relu".

    Returns:
        Tensor: Initialized tensor.
    """
    init = KaimingUniform(
        negative_slope=a, nonlinearity=nonlinearity, mode=mode
    )

    return init(tensor)


def kaiming_normal_(
    tensor: paddle.Tensor,
    a: float = 0,
    mode: str = "fan_in",
    nonlinearity: str = "leaky_relu",
) -> paddle.Tensor | None:
    """Modify tensor inplace using Kaiming normal method.

    Args:
        tensor (Tensor):  Paddle Tensor.
        a (float, optional): The negative slope of the rectifier used after this layer.
            Defaults to 0.
        mode (str, optional): Mode to compute the fan. Choose from ["fan_in", "fan_out"].
            When set to 'fan_in', the fan_in parameter is used for initialization.
            When set to 'fan_out', the out_features of trainable Tensor will be used.
            Default is 'fan_in'.
        nonlinearity (str, optional): Nonlinearity method name. Defaults to "leaky_relu".

    Returns:
        Tensor: Initialized tensor.
    """
    init = KaimingNormal(negative_slope=a, nonlinearity=nonlinearity, mode=mode)

    return init(tensor)


def xavier_uniform_(
    tensor: paddle.Tensor,
    gain: float = 1.0,
    fan_in: float | None = None,
    fan_out: float | None = None,
) -> paddle.Tensor | None:
    """Modify tensor inplace using Xavier uniform method.

    Args:
        tensor (Tensor):  Paddle Tensor.
        gain (float, optional): Scaling Tensor. Default is 1.0.
        fan_in (float|None, optional): fan_in for Xavier initialization, which is
                inferred from the Tensor. Default is None.
        fan_out (float|None, optional): fan_out for Xavier initialization, which is
                 inferred from the Tensor. Default is None.

    Returns:
        Tensor: Initialized tensor.
    """
    init = XavierUniform(
        gain=gain,
        fan_in=fan_in,
        fan_out=fan_out,
    )

    return init(tensor)


def xavier_normal_(
    tensor: paddle.Tensor,
    gain: float = 1.0,
    fan_in: float | None = None,
    fan_out: float | None = None,
) -> paddle.Tensor | None:
    """Modify tensor inplace using Xavier normal method.

    Args:
        tensor (Tensor):  Paddle Tensor.
        gain (float, optional): Scaling Tensor. Default is 1.0.
        fan_in (float|None, optional): fan_in for Xavier initialization, which is
                inferred from the Tensor. Default is None.
        fan_out (float|None, optional): fan_out for Xavier initialization, which is
                 inferred from the Tensor. Default is None.

    Returns:
        Tensor: Initialized tensor.
    """
    init = XavierNormal(
        gain=gain,
        fan_in=fan_in,
        fan_out=fan_out,
    )

    return init(tensor)


def uniform_(
    tensor: paddle.Tensor,
    a: float = 0.0,
    b: float = 1.0,
) -> paddle.Tensor | None:
    """Modify tensor inplace using uniform method.

    Args:
        tensor (Tensor):  Paddle Tensor.
        low (float, optional): Lower boundary of the uniform distribution. Default is :math:`-1.0`.
        high (float, optional): Upper boundary of the uniform distribution. Default is :math:`1.0`.

    Returns:
        Tensor: Initialized tensor.
    """
    init = Uniform(low=a, high=b)

    return init(tensor)


def normal_(
    tensor: paddle.Tensor,
    mean: float = 0.0,
    std: float = 1.0,
) -> paddle.Tensor | None:
    """Modify tensor inplace using normal method.

    Args:
        tensor (Tensor):  Paddle Tensor.
        mean (float|complex, optional): mean of the normal distribution. Default is 0.0.
        std (float, optional): standard deviation of the normal distribution. Default is 1.0.

    Returns:
        Tensor: Initialized tensor.
    """
    init = Normal(mean=mean, std=std)

    return init(tensor)


def trunc_normal_(
    tensor: paddle.Tensor,
    mean: float = 0.0,
    std: float = 1.0,
    a: float = -2.0,
    b: float = 2.0,
) -> paddle.Tensor | None:
    """Modify tensor inplace using truncated normal method.

    Args:
        tensor (Tensor):  Paddle Tensor.
        mean (float|complex, optional): mean of the normal distribution. Default is 0.0.
        std (float, optional): standard deviation of the normal distribution. Default is 1.0.
        a (float, optional): The minimum cutoff value. Default is -2.0.
        b (float, optional): The maximum cutoff value. Default is 2.0.

    Returns:
        Tensor: Initialized tensor.
    """
    init = TruncatedNormal(mean=mean, std=std, a=a, b=b)

    return init(tensor)


def constant_(
    tensor: paddle.Tensor,
    val: float,
) -> paddle.Tensor | None:
    """Modify tensor inplace using constant method.

    Args:
        tensor (Tensor):  Paddle Tensor.
        value (float32|float64, optional): constant value to initialize the parameter.

    Returns:
        Tensor: Initialized tensor.
    """
    init = Constant(value=val)

    return init(tensor)


def ones_(
    tensor: paddle.Tensor,
) -> paddle.Tensor | None:
    """Fill the input Tensor with the scalar value 1.

    Args:
        tensor (Tensor):  Paddle Tensor.

    Returns:
        Tensor: Initialized tensor.
    """
    init = Constant(value=1.0)

    return init(tensor)


def zeros_(
    tensor: paddle.Tensor,
) -> paddle.Tensor | None:
    """Fill the input Tensor with the scalar value 0.

    Args:
        tensor (Tensor):  Paddle Tensor.

    Returns:
        Tensor: Initialized tensor.
    """
    init = Constant(value=0.0)

    return init(tensor)


def dirac_(
    tensor: paddle.Tensor,
    groups: int = 1,
) -> paddle.Tensor | None:
    """Initialize the 3D/4D/5D Tensor with Dirac delta function.

    Args:
        tensor (Tensor):  Paddle Tensor.
        groups (int|None, optional): 0-dimension of the Tensor will be divided by groups,
            each group has the same value. Default: 1.
    Returns:
        Tensor: Initialized tensor.
    """
    init = Dirac(groups=groups)

    return init(tensor)


def eye_(
    tensor: paddle.Tensor,
) -> paddle.Tensor | None:
    """Fill the 2-dimensional input Tensor with the identity matrix.

    Args:
        tensor (Tensor):  Paddle Tensor.
    Returns:
        Tensor: Initialized tensor.
    """

    if len(tensor.shape) != 2:
        raise AssertionError(
            f"Only support 2 dimensional tensor, but got {len(tensor.shape)}."
        )

    if in_dygraph_mode():
        new_tensor = paddle.eye(
            tensor.shape[0], tensor.shape[1], dtype=tensor.dtype
        )
        new_tensor._share_underline_tensor_to(tensor)
        return None
    elif in_pir_mode():
        new_tensor = paddle.eye(
            tensor.shape[0], tensor.shape[1], dtype=tensor.dtype
        )
        return new_tensor
    else:
        raise NotImplementedError(
            'Only support run in dygraph mode or PIR mode.'
        )


def orthogonal_(
    tensor: paddle.Tensor,
    gain: float = 1,
) -> paddle.Tensor | None:
    """Fill the input Tensor with a (semi) orthogonal matrix.

    Args:
        tensor (Tensor):  Paddle Tensor.
        gain(float, optional): The multiplication coefficient for initialized tensor. Default: 1.0.
    Returns:
        Tensor: Initialized tensor.
    """
    init = Orthogonal(gain=gain)
    return init(tensor)