2020-07-11 23:26:24 +08:00
|
|
|
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
|
|
|
|
#
|
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
|
#
|
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
#
|
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
|
# limitations under the License.
|
|
|
|
|
|
2024-07-10 00:06:32 +08:00
|
|
|
from __future__ import annotations
|
|
|
|
|
|
2022-11-29 18:50:04 +08:00
|
|
|
import logging
|
2024-07-10 00:06:32 +08:00
|
|
|
from typing import TYPE_CHECKING, Any, Literal
|
2022-11-29 18:50:04 +08:00
|
|
|
|
2022-08-12 18:37:08 +08:00
|
|
|
import paddle
|
2024-05-16 14:47:10 +08:00
|
|
|
from paddle import _C_ops, _legacy_C_ops, in_dynamic_mode
|
2023-09-07 17:26:19 +08:00
|
|
|
from paddle.base.data_feeder import check_variable_and_dtype
|
|
|
|
|
from paddle.base.framework import _create_tensor
|
|
|
|
|
from paddle.base.log_helper import get_logger
|
2022-11-29 18:50:04 +08:00
|
|
|
from paddle.framework import ParamAttr, core
|
|
|
|
|
from paddle.nn import functional as F
|
|
|
|
|
from paddle.nn.initializer import Constant
|
2022-09-27 11:54:11 +08:00
|
|
|
from paddle.nn.quant.lsq import FakeQuantActLSQPlus, FakeQuantWeightLSQPlus
|
2022-11-29 18:50:04 +08:00
|
|
|
from paddle.utils import unique_name
|
2020-07-11 23:26:24 +08:00
|
|
|
|
2023-11-07 11:14:06 +08:00
|
|
|
from ..layer.layers import Layer
|
|
|
|
|
|
2024-07-10 00:06:32 +08:00
|
|
|
if TYPE_CHECKING:
|
|
|
|
|
from typing_extensions import Never, TypeAlias
|
|
|
|
|
|
|
|
|
|
from paddle import Tensor
|
|
|
|
|
from paddle._typing import DTypeLike, Size2
|
|
|
|
|
|
|
|
|
|
_QuantType: TypeAlias = Literal[
|
|
|
|
|
'abs_max',
|
|
|
|
|
'moving_average_abs_max',
|
|
|
|
|
'channel_wise_abs_max',
|
|
|
|
|
'lsq_weight',
|
|
|
|
|
'channel_wise_lsq_weight',
|
|
|
|
|
'lsq_act',
|
|
|
|
|
]
|
|
|
|
|
|
2020-07-11 23:26:24 +08:00
|
|
|
__all__ = [
|
2021-06-09 10:23:34 +08:00
|
|
|
'FakeQuantAbsMax',
|
2021-07-05 10:26:24 +08:00
|
|
|
'FakeQuantMovingAverageAbsMax',
|
2021-06-09 10:23:34 +08:00
|
|
|
'FakeQuantChannelWiseAbsMax',
|
|
|
|
|
'QuantizedConv2D',
|
2021-08-18 14:18:23 +08:00
|
|
|
'QuantizedConv2DTranspose',
|
2021-06-09 10:23:34 +08:00
|
|
|
'QuantizedLinear',
|
|
|
|
|
'MovingAverageAbsMaxScale',
|
|
|
|
|
'MAOutputScaleLayer',
|
|
|
|
|
'FakeQuantMAOutputScaleLayer',
|
2021-07-05 10:26:24 +08:00
|
|
|
'QuantStub',
|
2022-08-12 18:37:08 +08:00
|
|
|
'QuantizedRowParallelLinear',
|
|
|
|
|
'QuantizedColumnParallelLinear',
|
2022-12-08 19:06:09 +08:00
|
|
|
'QuantizedMatmul',
|
2020-07-11 23:26:24 +08:00
|
|
|
]
|
|
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
_logger = get_logger(
|
|
|
|
|
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
|
|
|
|
|
)
|
2020-07-11 23:26:24 +08:00
|
|
|
|
2021-06-09 10:23:34 +08:00
|
|
|
|
2022-02-22 15:07:11 +08:00
|
|
|
class FakeQuantAbsMax(Layer):
|
2021-07-05 10:26:24 +08:00
|
|
|
r"""
|
|
|
|
|
FakeQuantAbsMax layer does the abs_max quant and then dequant.
|
|
|
|
|
Its computational formula is described as below:
|
|
|
|
|
|
|
|
|
|
:math:`scale = max(abs(X))`
|
|
|
|
|
:math:`range = 2^{bit\_length - 1} - 1`
|
|
|
|
|
:math:`Out = round(X / scale * range) * scale / range`
|
|
|
|
|
"""
|
|
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
def __init__(
|
|
|
|
|
self,
|
2024-07-10 00:06:32 +08:00
|
|
|
name: str | None = None,
|
|
|
|
|
quant_bits: int = 8,
|
|
|
|
|
dtype: DTypeLike = 'float32',
|
|
|
|
|
quant_on_weight: bool = False,
|
|
|
|
|
reduce_type: Literal['max'] | None = None,
|
|
|
|
|
) -> None:
|
2022-11-03 14:33:00 +08:00
|
|
|
super().__init__()
|
2021-07-05 10:26:24 +08:00
|
|
|
self._quant_bits = quant_bits
|
|
|
|
|
self._name = name
|
2022-08-12 18:37:08 +08:00
|
|
|
self._reduce_type = reduce_type
|
2023-03-31 10:11:56 +08:00
|
|
|
scale_prefix = f"{name}.scale" if name else 'quant_dequant.scale'
|
2021-07-05 10:26:24 +08:00
|
|
|
self._scale_name = unique_name.generate(scale_prefix)
|
|
|
|
|
if quant_on_weight:
|
2022-10-23 20:01:27 +08:00
|
|
|
scale_attr = ParamAttr(
|
|
|
|
|
name=self._scale_name,
|
|
|
|
|
initializer=Constant(0.001),
|
|
|
|
|
trainable=False,
|
|
|
|
|
)
|
|
|
|
|
self._scale = self.create_parameter(
|
|
|
|
|
shape=[1], attr=scale_attr, dtype=self._dtype
|
|
|
|
|
)
|
2021-07-05 10:26:24 +08:00
|
|
|
self._scale.stop_gradient = True
|
|
|
|
|
else:
|
|
|
|
|
self._scale = None
|
|
|
|
|
|
2024-07-10 00:06:32 +08:00
|
|
|
def forward(self, input: Tensor) -> Tensor:
|
2022-02-22 15:07:11 +08:00
|
|
|
if in_dynamic_mode():
|
2021-07-05 10:26:24 +08:00
|
|
|
attrs = ('bit_length', self._quant_bits)
|
2023-04-18 12:15:18 +08:00
|
|
|
quant_out = _create_tensor(
|
2022-10-23 20:01:27 +08:00
|
|
|
type=input.type,
|
2023-03-31 10:11:56 +08:00
|
|
|
name=f"{input.name}.quantized.dequantized",
|
2022-10-23 20:01:27 +08:00
|
|
|
shape=input.shape,
|
|
|
|
|
dtype=input.dtype,
|
|
|
|
|
persistable=False,
|
|
|
|
|
)
|
2021-07-05 10:26:24 +08:00
|
|
|
out_scale = self._scale
|
2022-08-12 18:37:08 +08:00
|
|
|
if self._reduce_type == "max":
|
|
|
|
|
paddle.distributed.all_reduce(
|
2022-10-23 20:01:27 +08:00
|
|
|
out_scale, op=paddle.distributed.ReduceOp.MAX
|
|
|
|
|
)
|
2022-08-12 18:37:08 +08:00
|
|
|
|
2021-07-05 10:26:24 +08:00
|
|
|
if not out_scale:
|
2023-04-18 12:15:18 +08:00
|
|
|
out_scale = _create_tensor(
|
2024-11-13 16:43:16 +08:00
|
|
|
type=core.VarDesc.VarType.DENSE_TENSOR,
|
2021-07-05 10:26:24 +08:00
|
|
|
name=self._scale_name,
|
|
|
|
|
shape=[1],
|
|
|
|
|
dtype=self._dtype,
|
2022-10-23 20:01:27 +08:00
|
|
|
persistable=False,
|
|
|
|
|
)
|
2021-07-05 10:26:24 +08:00
|
|
|
out_scale.stop_gradient = True
|
2023-06-12 10:08:05 +08:00
|
|
|
(
|
2024-05-22 15:13:25 +08:00
|
|
|
out1,
|
|
|
|
|
out2,
|
|
|
|
|
) = _C_ops.fake_quantize_dequantize_abs_max(
|
|
|
|
|
input, self._quant_bits, 1
|
2022-10-23 20:01:27 +08:00
|
|
|
)
|
2024-05-22 15:13:25 +08:00
|
|
|
_C_ops.assign_out_(out1, quant_out)
|
|
|
|
|
_C_ops.assign_out_(out2, out_scale)
|
|
|
|
|
return quant_out
|
2021-07-05 10:26:24 +08:00
|
|
|
|
|
|
|
|
check_variable_and_dtype(input, 'input', ['float32'], "FakeQuantAbsMax")
|
|
|
|
|
attrs = {'bit_length': self._quant_bits}
|
|
|
|
|
inputs = {"X": [input]}
|
|
|
|
|
quant_out = self._helper.create_variable(
|
2023-03-31 10:11:56 +08:00
|
|
|
name=f"{input.name}.quantized.dequantized",
|
2021-07-05 10:26:24 +08:00
|
|
|
dtype=input.dtype,
|
2024-11-13 16:43:16 +08:00
|
|
|
type=core.VarDesc.VarType.DENSE_TENSOR,
|
2021-07-05 10:26:24 +08:00
|
|
|
persistable=False,
|
2022-10-23 20:01:27 +08:00
|
|
|
stop_gradient=False,
|
|
|
|
|
)
|
2021-07-05 10:26:24 +08:00
|
|
|
out_scale = self._scale
|
|
|
|
|
if not out_scale:
|
|
|
|
|
out_scale = self._helper.create_variable(
|
|
|
|
|
name=self._scale_name,
|
|
|
|
|
dtype=self._dtype,
|
2024-11-13 16:43:16 +08:00
|
|
|
type=core.VarDesc.VarType.DENSE_TENSOR,
|
2021-07-05 10:26:24 +08:00
|
|
|
persistable=False,
|
2022-10-23 20:01:27 +08:00
|
|
|
stop_gradient=True,
|
|
|
|
|
)
|
2021-07-05 10:26:24 +08:00
|
|
|
outputs = {"Out": [quant_out], "OutScale": [out_scale]}
|
|
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
self._helper.append_op(
|
|
|
|
|
type="fake_quantize_dequantize_abs_max",
|
|
|
|
|
inputs=inputs,
|
|
|
|
|
outputs=outputs,
|
|
|
|
|
attrs=attrs,
|
|
|
|
|
)
|
2021-07-05 10:26:24 +08:00
|
|
|
|
|
|
|
|
return quant_out
|
|
|
|
|
|
|
|
|
|
|
2022-02-22 15:07:11 +08:00
|
|
|
class FakeQuantMovingAverageAbsMax(Layer):
|
2020-11-24 14:53:51 +08:00
|
|
|
r"""
|
2021-06-09 10:23:34 +08:00
|
|
|
FakeQuantMovingAverageAbsMax layer does the moving_average_abs_max quant and then dequant.
|
2020-07-11 23:26:24 +08:00
|
|
|
Its computational formula is described as below:
|
|
|
|
|
|
|
|
|
|
:math:`scale = (moving\_rate*accum+max(abs(x)))/(moving\_rate*state+1)`
|
|
|
|
|
:math:`range = 2^{bit\_length - 1} - 1`
|
|
|
|
|
:math:`Out = round(X / scale * range) * scale / range`
|
|
|
|
|
"""
|
|
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
def __init__(
|
|
|
|
|
self,
|
2024-07-10 00:06:32 +08:00
|
|
|
name: str | None = None,
|
|
|
|
|
moving_rate: float = 0.9,
|
|
|
|
|
quant_bits: int = 8,
|
|
|
|
|
dtype: DTypeLike = 'float32',
|
|
|
|
|
reduce_type: Literal['max'] | None = None,
|
|
|
|
|
) -> None:
|
2022-11-03 14:33:00 +08:00
|
|
|
super().__init__()
|
2020-07-11 23:26:24 +08:00
|
|
|
self._moving_rate = moving_rate
|
|
|
|
|
self._quant_bits = quant_bits
|
2022-08-12 18:37:08 +08:00
|
|
|
self._reduce_type = reduce_type
|
2023-03-31 10:11:56 +08:00
|
|
|
scale_prefix = f"{name}.scale" if name else 'quant_dequant.scale'
|
2022-10-23 20:01:27 +08:00
|
|
|
scale_attr = ParamAttr(
|
|
|
|
|
name=unique_name.generate(scale_prefix),
|
|
|
|
|
initializer=Constant(0.001),
|
|
|
|
|
trainable=False,
|
|
|
|
|
)
|
|
|
|
|
self._scale = self.create_parameter(
|
|
|
|
|
shape=[1], attr=scale_attr, dtype=dtype
|
|
|
|
|
)
|
2020-07-11 23:26:24 +08:00
|
|
|
self._scale.stop_gradient = True
|
|
|
|
|
|
2023-03-31 10:11:56 +08:00
|
|
|
state_prefix = f"{name}.state" if name else 'quant_dequant.state'
|
2022-10-23 20:01:27 +08:00
|
|
|
state_attr = ParamAttr(
|
|
|
|
|
name=unique_name.generate(state_prefix),
|
|
|
|
|
initializer=Constant(1),
|
|
|
|
|
trainable=False,
|
|
|
|
|
)
|
|
|
|
|
self._state = self.create_parameter(
|
|
|
|
|
shape=[1], attr=state_attr, dtype=dtype
|
|
|
|
|
)
|
2020-07-11 23:26:24 +08:00
|
|
|
self._state.stop_gradient = True
|
|
|
|
|
|
2023-03-31 10:11:56 +08:00
|
|
|
accum_prefix = f"{name}.accum" if name else 'quant_dequant.accum'
|
2022-10-23 20:01:27 +08:00
|
|
|
accum_attr = ParamAttr(
|
|
|
|
|
name=unique_name.generate(accum_prefix),
|
|
|
|
|
initializer=Constant(1),
|
|
|
|
|
trainable=False,
|
|
|
|
|
)
|
|
|
|
|
self._accum = self.create_parameter(
|
|
|
|
|
shape=[1], attr=accum_attr, dtype=dtype
|
|
|
|
|
)
|
2020-07-11 23:26:24 +08:00
|
|
|
self._accum.stop_gradient = True
|
|
|
|
|
|
2024-07-10 00:06:32 +08:00
|
|
|
def forward(self, input: Tensor) -> Tensor:
|
2022-02-22 15:07:11 +08:00
|
|
|
if in_dynamic_mode():
|
2022-10-23 20:01:27 +08:00
|
|
|
attrs = (
|
|
|
|
|
'moving_rate',
|
|
|
|
|
self._moving_rate,
|
|
|
|
|
'bit_length',
|
|
|
|
|
self._quant_bits,
|
|
|
|
|
'is_test',
|
|
|
|
|
not self.training,
|
|
|
|
|
)
|
2023-04-18 12:15:18 +08:00
|
|
|
quant_out = _create_tensor(
|
2022-10-23 20:01:27 +08:00
|
|
|
type=input.type,
|
2023-03-31 10:11:56 +08:00
|
|
|
name=f"{input.name}.quantized.dequantized",
|
2022-10-23 20:01:27 +08:00
|
|
|
shape=input.shape,
|
|
|
|
|
dtype=input.dtype,
|
|
|
|
|
persistable=False,
|
|
|
|
|
)
|
2022-08-12 18:37:08 +08:00
|
|
|
if self._reduce_type == "max":
|
|
|
|
|
paddle.distributed.all_reduce(
|
2022-10-23 20:01:27 +08:00
|
|
|
self._scale, op=paddle.distributed.ReduceOp.MAX
|
|
|
|
|
)
|
2022-08-12 18:37:08 +08:00
|
|
|
|
2020-07-11 23:26:24 +08:00
|
|
|
state = self._state if self.training else None
|
|
|
|
|
accum = self._accum if self.training else None
|
|
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
(
|
2024-05-22 15:13:25 +08:00
|
|
|
out1,
|
|
|
|
|
out2,
|
|
|
|
|
out3,
|
|
|
|
|
out4,
|
|
|
|
|
) = _C_ops.fake_quantize_dequantize_moving_average_abs_max(
|
2022-10-23 20:01:27 +08:00
|
|
|
input,
|
|
|
|
|
self._scale,
|
|
|
|
|
accum,
|
|
|
|
|
state,
|
2024-05-22 15:13:25 +08:00
|
|
|
self._moving_rate,
|
|
|
|
|
self._quant_bits,
|
|
|
|
|
not self.training,
|
|
|
|
|
1,
|
2022-10-23 20:01:27 +08:00
|
|
|
)
|
2024-05-22 15:13:25 +08:00
|
|
|
_C_ops.assign_out_(out1, quant_out)
|
|
|
|
|
if out2._is_initialized():
|
|
|
|
|
_C_ops.assign_out_(out2, self._scale)
|
|
|
|
|
if state:
|
|
|
|
|
_C_ops.assign_out_(out3, state)
|
|
|
|
|
if accum:
|
|
|
|
|
_C_ops.assign_out_(out4, accum)
|
|
|
|
|
return quant_out
|
2020-07-11 23:26:24 +08:00
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
check_variable_and_dtype(
|
|
|
|
|
input, 'input', ['float32'], "FakeQuantMovingAverageAbsMax"
|
|
|
|
|
)
|
2020-07-11 23:26:24 +08:00
|
|
|
attrs = {
|
|
|
|
|
'moving_rate': self._moving_rate,
|
|
|
|
|
'bit_length': self._quant_bits,
|
2022-10-23 20:01:27 +08:00
|
|
|
'is_test': not self.training,
|
2020-07-11 23:26:24 +08:00
|
|
|
}
|
|
|
|
|
inputs = {"X": [input], "InScale": [self._scale]}
|
|
|
|
|
quant_out = self._helper.create_variable(
|
2023-03-31 10:11:56 +08:00
|
|
|
name=f"{input.name}.quantized.dequantized",
|
2020-07-11 23:26:24 +08:00
|
|
|
dtype=input.dtype,
|
2024-11-13 16:43:16 +08:00
|
|
|
type=core.VarDesc.VarType.DENSE_TENSOR,
|
2020-07-11 23:26:24 +08:00
|
|
|
persistable=False,
|
2022-10-23 20:01:27 +08:00
|
|
|
stop_gradient=False,
|
|
|
|
|
)
|
2020-07-11 23:26:24 +08:00
|
|
|
outputs = {"Out": [quant_out], "OutScale": [self._scale]}
|
|
|
|
|
|
|
|
|
|
if self.training:
|
|
|
|
|
inputs['InState'] = [self._state]
|
|
|
|
|
inputs['InAccum'] = [self._accum]
|
|
|
|
|
outputs['OutState'] = [self._state]
|
|
|
|
|
outputs['OutAccum'] = [self._accum]
|
|
|
|
|
|
|
|
|
|
self._helper.append_op(
|
|
|
|
|
type="fake_quantize_dequantize_moving_average_abs_max",
|
|
|
|
|
inputs=inputs,
|
|
|
|
|
outputs=outputs,
|
2022-10-23 20:01:27 +08:00
|
|
|
attrs=attrs,
|
|
|
|
|
)
|
2020-07-11 23:26:24 +08:00
|
|
|
|
|
|
|
|
return quant_out
|
|
|
|
|
|
|
|
|
|
|
2022-02-22 15:07:11 +08:00
|
|
|
class FakeQuantChannelWiseAbsMax(Layer):
|
2022-10-23 20:01:27 +08:00
|
|
|
def __init__(
|
|
|
|
|
self,
|
2024-07-10 00:06:32 +08:00
|
|
|
name: str | None = None,
|
|
|
|
|
channel_num: int | None = None,
|
|
|
|
|
quant_bits: int = 8,
|
|
|
|
|
quant_axis: int = 0,
|
|
|
|
|
dtype: DTypeLike = 'float32',
|
|
|
|
|
quant_on_weight: bool = False,
|
|
|
|
|
reduce_type: Literal['max'] | None = None,
|
|
|
|
|
) -> None:
|
2025-08-21 01:51:31 +08:00
|
|
|
assert quant_on_weight, (
|
|
|
|
|
"Channel_wise only can be used on weight quantization."
|
|
|
|
|
)
|
2022-11-03 14:33:00 +08:00
|
|
|
super().__init__()
|
2020-07-11 23:26:24 +08:00
|
|
|
self._quant_bits = quant_bits
|
2021-07-05 10:26:24 +08:00
|
|
|
self._quant_axis = quant_axis
|
|
|
|
|
self._dtype = dtype
|
2020-07-11 23:26:24 +08:00
|
|
|
self._name = name
|
2021-07-05 10:26:24 +08:00
|
|
|
self._channel_num = channel_num
|
2022-08-12 18:37:08 +08:00
|
|
|
self._reduce_type = reduce_type
|
2023-03-31 10:11:56 +08:00
|
|
|
scale_prefix = f"{name}.scale" if name else 'quant_dequant.scale'
|
2020-07-11 23:26:24 +08:00
|
|
|
self._scale_name = unique_name.generate(scale_prefix)
|
|
|
|
|
if quant_on_weight:
|
2022-10-23 20:01:27 +08:00
|
|
|
scale_attr = ParamAttr(
|
|
|
|
|
name=self._scale_name,
|
|
|
|
|
initializer=Constant(0.0),
|
|
|
|
|
trainable=False,
|
|
|
|
|
)
|
|
|
|
|
self._scale = self.create_parameter(
|
|
|
|
|
shape=[self._channel_num], attr=scale_attr, dtype=self._dtype
|
|
|
|
|
)
|
2020-07-11 23:26:24 +08:00
|
|
|
self._scale.stop_gradient = True
|
|
|
|
|
else:
|
|
|
|
|
self._scale = None
|
|
|
|
|
|
2024-07-10 00:06:32 +08:00
|
|
|
def forward(self, input: Tensor) -> Tensor:
|
2022-02-22 15:07:11 +08:00
|
|
|
if in_dynamic_mode():
|
2022-10-23 20:01:27 +08:00
|
|
|
attrs = (
|
|
|
|
|
'bit_length',
|
|
|
|
|
self._quant_bits,
|
|
|
|
|
'quant_axis',
|
|
|
|
|
self._quant_axis,
|
|
|
|
|
)
|
2023-04-18 12:15:18 +08:00
|
|
|
quant_out = _create_tensor(
|
2022-10-23 20:01:27 +08:00
|
|
|
type=input.type,
|
2023-03-31 10:11:56 +08:00
|
|
|
name=f"{input.name}.quantized.dequantized",
|
2022-10-23 20:01:27 +08:00
|
|
|
shape=input.shape,
|
|
|
|
|
dtype=input.dtype,
|
|
|
|
|
persistable=False,
|
|
|
|
|
)
|
2021-07-05 10:26:24 +08:00
|
|
|
|
2020-07-11 23:26:24 +08:00
|
|
|
out_scale = self._scale
|
2022-08-12 18:37:08 +08:00
|
|
|
if self._reduce_type == "max":
|
|
|
|
|
paddle.distributed.all_reduce(
|
2022-10-23 20:01:27 +08:00
|
|
|
out_scale, op=paddle.distributed.ReduceOp.MAX
|
|
|
|
|
)
|
2021-07-05 10:26:24 +08:00
|
|
|
if out_scale is None:
|
2023-04-18 12:15:18 +08:00
|
|
|
out_scale = _create_tensor(
|
2024-11-13 16:43:16 +08:00
|
|
|
type=core.VarDesc.VarType.DENSE_TENSOR,
|
2020-07-11 23:26:24 +08:00
|
|
|
name=self._scale_name,
|
2021-07-05 10:26:24 +08:00
|
|
|
shape=[self._channel_num],
|
2020-07-11 23:26:24 +08:00
|
|
|
dtype=self._dtype,
|
2022-10-23 20:01:27 +08:00
|
|
|
persistable=False,
|
|
|
|
|
)
|
2020-07-11 23:26:24 +08:00
|
|
|
out_scale.stop_gradient = True
|
2021-07-05 10:26:24 +08:00
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
(
|
|
|
|
|
out,
|
2024-05-16 14:47:10 +08:00
|
|
|
scale,
|
|
|
|
|
) = _C_ops.fake_channel_wise_quantize_dequantize_abs_max(
|
|
|
|
|
input, self._quant_bits, 1, self._quant_axis
|
2022-10-23 20:01:27 +08:00
|
|
|
)
|
2024-05-16 14:47:10 +08:00
|
|
|
_C_ops.assign_out_(out, quant_out)
|
|
|
|
|
_C_ops.assign_out_(scale, out_scale)
|
|
|
|
|
return quant_out
|
2020-07-11 23:26:24 +08:00
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
check_variable_and_dtype(
|
|
|
|
|
input, 'input', ['float32'], "FakeQuantChannelWiseAbsMax"
|
|
|
|
|
)
|
2024-05-16 14:47:10 +08:00
|
|
|
attrs = {
|
|
|
|
|
'bit_length': self._quant_bits,
|
|
|
|
|
'round_type': 1,
|
|
|
|
|
'quant_axis': self._quant_axis,
|
|
|
|
|
}
|
2020-07-11 23:26:24 +08:00
|
|
|
inputs = {"X": [input]}
|
|
|
|
|
quant_out = self._helper.create_variable(
|
2023-03-31 10:11:56 +08:00
|
|
|
name=f"{input.name}.quantized.dequantized",
|
2020-07-11 23:26:24 +08:00
|
|
|
dtype=input.dtype,
|
2024-11-13 16:43:16 +08:00
|
|
|
type=core.VarDesc.VarType.DENSE_TENSOR,
|
2020-07-11 23:26:24 +08:00
|
|
|
persistable=False,
|
2022-10-23 20:01:27 +08:00
|
|
|
stop_gradient=False,
|
|
|
|
|
)
|
2020-07-11 23:26:24 +08:00
|
|
|
out_scale = self._scale
|
|
|
|
|
if not out_scale:
|
|
|
|
|
out_scale = self._helper.create_variable(
|
|
|
|
|
name=self._scale_name,
|
|
|
|
|
dtype=self._dtype,
|
2024-11-13 16:43:16 +08:00
|
|
|
type=core.VarDesc.VarType.DENSE_TENSOR,
|
2020-07-11 23:26:24 +08:00
|
|
|
persistable=False,
|
2022-10-23 20:01:27 +08:00
|
|
|
stop_gradient=True,
|
|
|
|
|
)
|
2020-07-11 23:26:24 +08:00
|
|
|
outputs = {"Out": [quant_out], "OutScale": [out_scale]}
|
|
|
|
|
|
|
|
|
|
self._helper.append_op(
|
2021-07-05 10:26:24 +08:00
|
|
|
type="fake_channel_wise_quantize_dequantize_abs_max",
|
2020-07-11 23:26:24 +08:00
|
|
|
inputs=inputs,
|
|
|
|
|
outputs=outputs,
|
2022-10-23 20:01:27 +08:00
|
|
|
attrs=attrs,
|
|
|
|
|
)
|
2020-07-11 23:26:24 +08:00
|
|
|
|
|
|
|
|
return quant_out
|
|
|
|
|
|
|
|
|
|
|
2022-02-22 15:07:11 +08:00
|
|
|
class MovingAverageAbsMaxScale(Layer):
|
2022-10-23 20:01:27 +08:00
|
|
|
def __init__(
|
2024-07-10 00:06:32 +08:00
|
|
|
self,
|
|
|
|
|
name: str | None = None,
|
|
|
|
|
moving_rate: float = 0.9,
|
|
|
|
|
dtype: DTypeLike = 'float32',
|
|
|
|
|
reduce_type: Literal['max'] | None = None,
|
|
|
|
|
) -> None:
|
2021-07-05 10:26:24 +08:00
|
|
|
r"""
|
|
|
|
|
MovingAverageMaxScale layer is used to calculating the output quantization
|
|
|
|
|
scale of Layer. Its computational formula is described as below:
|
|
|
|
|
|
|
|
|
|
:math:`scale = (moving\_rate*accum+max(abs(x)))/(moving\_rate*state+1)`
|
|
|
|
|
:math:`Out = X`
|
|
|
|
|
"""
|
2022-11-03 14:33:00 +08:00
|
|
|
super().__init__()
|
2021-07-05 10:26:24 +08:00
|
|
|
self._moving_rate = moving_rate
|
2022-08-12 18:37:08 +08:00
|
|
|
self._reduce_type = reduce_type
|
2023-03-31 10:11:56 +08:00
|
|
|
scale_prefix = f'{name}.scale' if name else 'outscale.scale'
|
2021-07-05 10:26:24 +08:00
|
|
|
scale_name = unique_name.generate(scale_prefix)
|
2022-10-23 20:01:27 +08:00
|
|
|
scale_attr = ParamAttr(
|
|
|
|
|
name=scale_name, initializer=Constant(0), trainable=False
|
|
|
|
|
)
|
|
|
|
|
self._scale = self.create_parameter(
|
|
|
|
|
shape=[1], attr=scale_attr, dtype=dtype
|
|
|
|
|
)
|
2021-07-05 10:26:24 +08:00
|
|
|
self._scale.stop_gradient = True
|
|
|
|
|
|
2023-03-31 10:11:56 +08:00
|
|
|
state_prefix = f"{name}.state" if name else 'outscale.state'
|
2022-10-23 20:01:27 +08:00
|
|
|
state_attr = ParamAttr(
|
|
|
|
|
name=unique_name.generate(state_prefix),
|
|
|
|
|
initializer=Constant(0),
|
|
|
|
|
trainable=False,
|
|
|
|
|
)
|
|
|
|
|
self._state = self.create_parameter(
|
|
|
|
|
shape=[1], attr=state_attr, dtype=dtype
|
|
|
|
|
)
|
2021-07-05 10:26:24 +08:00
|
|
|
self._state.stop_gradient = True
|
|
|
|
|
|
2023-03-31 10:11:56 +08:00
|
|
|
accum_prefix = f"{name}.accum" if name else 'outscale.accum'
|
2022-10-23 20:01:27 +08:00
|
|
|
accum_attr = ParamAttr(
|
|
|
|
|
name=unique_name.generate(accum_prefix),
|
|
|
|
|
initializer=Constant(0),
|
|
|
|
|
trainable=False,
|
|
|
|
|
)
|
|
|
|
|
self._accum = self.create_parameter(
|
|
|
|
|
shape=[1], attr=accum_attr, dtype=dtype
|
|
|
|
|
)
|
2021-07-05 10:26:24 +08:00
|
|
|
self._accum.stop_gradient = True
|
2020-09-21 13:35:37 +08:00
|
|
|
|
2024-07-10 00:06:32 +08:00
|
|
|
def forward(self, input: Tensor) -> Tensor:
|
2022-02-22 15:07:11 +08:00
|
|
|
if in_dynamic_mode():
|
2022-10-23 20:01:27 +08:00
|
|
|
attrs = (
|
|
|
|
|
'moving_rate',
|
|
|
|
|
self._moving_rate,
|
|
|
|
|
'is_test',
|
|
|
|
|
not self.training,
|
|
|
|
|
)
|
|
|
|
|
|
2023-04-18 12:15:18 +08:00
|
|
|
quant_out = _create_tensor(
|
2022-10-23 20:01:27 +08:00
|
|
|
type=input.type,
|
2023-03-31 10:11:56 +08:00
|
|
|
name=f"{input.name}.tmp",
|
2022-10-23 20:01:27 +08:00
|
|
|
shape=input.shape,
|
|
|
|
|
dtype=input.dtype,
|
|
|
|
|
persistable=False,
|
|
|
|
|
)
|
2022-08-12 18:37:08 +08:00
|
|
|
if self._reduce_type == "max":
|
|
|
|
|
paddle.distributed.all_reduce(
|
2022-10-23 20:01:27 +08:00
|
|
|
self._scale, op=paddle.distributed.ReduceOp.MAX
|
|
|
|
|
)
|
2022-08-12 18:37:08 +08:00
|
|
|
|
|
|
|
|
state = self._state if self.training else None
|
|
|
|
|
accum = self._accum if self.training else None
|
2020-09-21 13:35:37 +08:00
|
|
|
|
2022-08-26 16:11:47 +08:00
|
|
|
out, _, _, _ = _legacy_C_ops.moving_average_abs_max_scale(
|
2022-10-23 20:01:27 +08:00
|
|
|
input,
|
|
|
|
|
accum,
|
|
|
|
|
state,
|
|
|
|
|
quant_out,
|
|
|
|
|
self._scale,
|
|
|
|
|
state,
|
|
|
|
|
accum,
|
2023-03-31 10:11:56 +08:00
|
|
|
*attrs,
|
2022-10-23 20:01:27 +08:00
|
|
|
)
|
2020-09-21 13:35:37 +08:00
|
|
|
return out
|
|
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
check_variable_and_dtype(
|
|
|
|
|
input, 'input', ['float32', 'float64'], 'MovingAverageAbsMaxScale'
|
|
|
|
|
)
|
2021-07-05 10:26:24 +08:00
|
|
|
|
|
|
|
|
attrs = {'moving_rate': self._moving_rate, 'is_test': not self.training}
|
2020-09-21 13:35:37 +08:00
|
|
|
inputs = {"X": [input]}
|
|
|
|
|
quant_out = self._helper.create_variable(
|
2023-03-31 10:11:56 +08:00
|
|
|
name=f"{input.name}.tmp",
|
2020-09-21 13:35:37 +08:00
|
|
|
dtype=input.dtype,
|
2024-11-13 16:43:16 +08:00
|
|
|
type=core.VarDesc.VarType.DENSE_TENSOR,
|
2020-09-21 13:35:37 +08:00
|
|
|
persistable=False,
|
2022-10-23 20:01:27 +08:00
|
|
|
stop_gradient=False,
|
|
|
|
|
)
|
2021-07-05 10:26:24 +08:00
|
|
|
outputs = {"Out": [quant_out], "OutScale": [self._scale]}
|
|
|
|
|
|
|
|
|
|
if self.training:
|
|
|
|
|
inputs['InState'] = [self._state]
|
|
|
|
|
inputs['InAccum'] = [self._accum]
|
|
|
|
|
outputs['OutState'] = [self._state]
|
|
|
|
|
outputs['OutAccum'] = [self._accum]
|
2020-09-21 13:35:37 +08:00
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
self._helper.append_op(
|
|
|
|
|
type="moving_average_abs_max_scale",
|
|
|
|
|
inputs=inputs,
|
|
|
|
|
outputs=outputs,
|
|
|
|
|
attrs=attrs,
|
|
|
|
|
)
|
2020-09-21 13:35:37 +08:00
|
|
|
|
|
|
|
|
return quant_out
|
|
|
|
|
|
|
|
|
|
|
2021-07-05 10:26:24 +08:00
|
|
|
QuantStub = MovingAverageAbsMaxScale
|
2020-07-11 23:26:24 +08:00
|
|
|
|
|
|
|
|
|
2022-02-22 15:07:11 +08:00
|
|
|
class QuantizedConv2D(Layer):
|
2020-07-11 23:26:24 +08:00
|
|
|
"""
|
|
|
|
|
The computational logic of QuantizedConv2D is the same with Conv2D.
|
|
|
|
|
The only difference is that its inputs are all fake quantized.
|
|
|
|
|
"""
|
|
|
|
|
|
2024-07-10 00:06:32 +08:00
|
|
|
weight: Tensor
|
|
|
|
|
bias: Tensor
|
|
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
def __init__(
|
|
|
|
|
self,
|
2024-07-10 00:06:32 +08:00
|
|
|
layer: Layer,
|
|
|
|
|
weight_bits: int = 8,
|
|
|
|
|
activation_bits: int = 8,
|
|
|
|
|
moving_rate: float = 0.9,
|
|
|
|
|
weight_quantize_type: _QuantType = 'abs_max',
|
|
|
|
|
activation_quantize_type: _QuantType = 'abs_max',
|
|
|
|
|
weight_pre_layer: Layer | None = None,
|
|
|
|
|
act_pre_layer: Layer | None = None,
|
|
|
|
|
weight_quant_layer: Layer | None = None,
|
|
|
|
|
act_quant_layer: Layer | None = None,
|
|
|
|
|
) -> None:
|
2022-11-03 14:33:00 +08:00
|
|
|
super().__init__()
|
2020-07-11 23:26:24 +08:00
|
|
|
# For Conv2D
|
2023-03-17 01:46:48 +08:00
|
|
|
self._groups = layer._groups
|
|
|
|
|
self._stride = layer._stride
|
|
|
|
|
self._padding = layer._padding
|
|
|
|
|
self._padding_mode = layer._padding_mode
|
2020-11-25 11:46:30 +08:00
|
|
|
if self._padding_mode != 'zeros':
|
2023-03-17 01:46:48 +08:00
|
|
|
self._reversed_padding_repeated_twice = (
|
|
|
|
|
layer._reversed_padding_repeated_twice
|
2022-10-23 20:01:27 +08:00
|
|
|
)
|
2023-03-17 01:46:48 +08:00
|
|
|
self._dilation = layer._dilation
|
|
|
|
|
self._data_format = layer._data_format
|
|
|
|
|
self.weight = layer.weight
|
|
|
|
|
self.bias = layer.bias
|
2020-11-25 11:46:30 +08:00
|
|
|
|
2020-07-11 23:26:24 +08:00
|
|
|
# For FakeQuant
|
2020-09-21 13:35:37 +08:00
|
|
|
self._conv2d_quant_axis = 0
|
2020-11-18 11:20:04 +08:00
|
|
|
if weight_quant_layer is not None:
|
|
|
|
|
self._fake_quant_weight = weight_quant_layer()
|
|
|
|
|
else:
|
|
|
|
|
self._fake_quant_weight = _get_fake_quant_type(
|
|
|
|
|
weight_quantize_type,
|
|
|
|
|
name=self.weight.name,
|
|
|
|
|
moving_rate=moving_rate,
|
|
|
|
|
quant_bits=weight_bits,
|
|
|
|
|
dtype=self._dtype,
|
|
|
|
|
quant_on_weight=True,
|
|
|
|
|
channel_num=self.weight.shape[self._conv2d_quant_axis],
|
2022-10-23 20:01:27 +08:00
|
|
|
quant_axis=self._conv2d_quant_axis,
|
|
|
|
|
)
|
2020-11-18 11:20:04 +08:00
|
|
|
if act_quant_layer is not None:
|
|
|
|
|
self._fake_quant_input = act_quant_layer()
|
|
|
|
|
else:
|
|
|
|
|
self._fake_quant_input = _get_fake_quant_type(
|
|
|
|
|
activation_quantize_type,
|
|
|
|
|
name=layer.full_name(),
|
|
|
|
|
moving_rate=moving_rate,
|
|
|
|
|
quant_bits=activation_bits,
|
|
|
|
|
dtype=self._dtype,
|
2022-10-23 20:01:27 +08:00
|
|
|
quant_on_weight=False,
|
|
|
|
|
)
|
2020-11-18 11:20:04 +08:00
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
self._act_preprocess = (
|
|
|
|
|
act_pre_layer() if act_pre_layer is not None else None
|
|
|
|
|
)
|
|
|
|
|
self._weight_preprocess = (
|
|
|
|
|
weight_pre_layer() if weight_pre_layer is not None else None
|
|
|
|
|
)
|
2020-07-11 23:26:24 +08:00
|
|
|
|
2024-07-10 00:06:32 +08:00
|
|
|
def forward(self, input: Tensor) -> Tensor:
|
2020-11-18 11:20:04 +08:00
|
|
|
if self._act_preprocess is not None:
|
|
|
|
|
input = self._act_preprocess(input)
|
2020-07-11 23:26:24 +08:00
|
|
|
quant_input = self._fake_quant_input(input)
|
2020-11-18 11:20:04 +08:00
|
|
|
|
|
|
|
|
weight = self.weight
|
|
|
|
|
if self._weight_preprocess is not None:
|
|
|
|
|
weight = self._weight_preprocess(self.weight)
|
|
|
|
|
quant_weight = self._fake_quant_weight(weight)
|
2020-07-11 23:26:24 +08:00
|
|
|
|
2020-11-25 11:46:30 +08:00
|
|
|
if self._padding_mode != 'zeros':
|
2022-10-23 20:01:27 +08:00
|
|
|
quant_input = F.pad(
|
|
|
|
|
quant_input,
|
|
|
|
|
self._reversed_padding_repeated_twice,
|
|
|
|
|
mode=self._padding_mode,
|
|
|
|
|
data_format=self._data_format,
|
|
|
|
|
)
|
2020-11-25 11:46:30 +08:00
|
|
|
self._padding = 0
|
2020-07-11 23:26:24 +08:00
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
return F.conv2d(
|
|
|
|
|
quant_input,
|
|
|
|
|
quant_weight,
|
|
|
|
|
bias=self.bias,
|
|
|
|
|
padding=self._padding,
|
|
|
|
|
stride=self._stride,
|
|
|
|
|
dilation=self._dilation,
|
|
|
|
|
groups=self._groups,
|
|
|
|
|
data_format=self._data_format,
|
|
|
|
|
)
|
2020-07-11 23:26:24 +08:00
|
|
|
|
|
|
|
|
|
2022-02-22 15:07:11 +08:00
|
|
|
class QuantizedConv2DTranspose(Layer):
|
2021-08-18 14:18:23 +08:00
|
|
|
"""
|
[Docs]fix math api en docs issue (#47448)
* fix_docx_stanh
* fix einsum api en docs issue
* fix model api en docs issue
* for codestyle
* fix_einsum.py_einsum, test=document_fix
* fix_model.py_Model, test=ducument_fix
* fix_creation.py_meshgrid, test=document_fix
* fix_linalg.py_slogdet, test=document_fix
* fix_loss.py_SoftMarginLoss_CrossEntropyLoss_NLLLoss_BCELoss, test=document_fix
* norm.py_SyncBatchNorm, test=document-fix
* norm.py_SyncBatchNorm, test=document_fix
* norm.py_SyncBatchNorm, test=document_fix
* list18-30, test=document_fix
* refix_list1-15, test=document_fix
* deletefiles, test=document_fix
* fixedapi_pre-commit, test=document_fix
* fix_list31-45, test=document_fix
* list111, test=document_fix
* some_fix, test=document_fix
* some_fix, test=document_fix
* somefix, test=document_fix
* somefix, test=document_fix
* refix, test=document_fix
* refix, test=document_fix
* refix, test=document_fix
* refix, test=document_fix
* rerfix, test=document_fix
Co-authored-by: Ligoml <limengliu@tiaozhan.com>
2022-11-22 17:31:35 +08:00
|
|
|
|
2021-08-18 14:18:23 +08:00
|
|
|
The computational logic of QuantizedConv2DTranspose is the same with Conv2DTranspose.
|
|
|
|
|
The only difference is that its inputs are all fake quantized.
|
2022-09-14 21:56:19 +08:00
|
|
|
|
2021-08-18 14:18:23 +08:00
|
|
|
Examples:
|
[CodeStyle][Xdoctest][8,12,15,16,18-20,22-26,29-32,34-36,38-40,42,43,45-48,50,51,60-65,75,80,82,83,85-87,89-94,99-141,143,145,147-167,169-187,207-220,257,258,260-275,277-313,315-325][API Compatibility] Update shape output format in documentation examples (#76574)
---------
Co-authored-by: SigureMo <sigure.qaq@gmail.com>
2025-11-26 10:31:46 +08:00
|
|
|
.. code-block:: pycon
|
2023-08-03 11:03:39 +08:00
|
|
|
|
|
|
|
|
>>> import paddle
|
|
|
|
|
>>> import paddle.nn as nn
|
[CodeStyle][Xdoctest][8,12,15,16,18-20,22-26,29-32,34-36,38-40,42,43,45-48,50,51,60-65,75,80,82,83,85-87,89-94,99-141,143,145,147-167,169-187,207-220,257,258,260-275,277-313,315-325][API Compatibility] Update shape output format in documentation examples (#76574)
---------
Co-authored-by: SigureMo <sigure.qaq@gmail.com>
2025-11-26 10:31:46 +08:00
|
|
|
>>> from paddle.nn.quant.quant_layers import (
|
|
|
|
|
... QuantizedConv2DTranspose,
|
|
|
|
|
... )
|
2023-08-03 11:03:39 +08:00
|
|
|
|
[CodeStyle][Xdoctest][8,12,15,16,18-20,22-26,29-32,34-36,38-40,42,43,45-48,50,51,60-65,75,80,82,83,85-87,89-94,99-141,143,145,147-167,169-187,207-220,257,258,260-275,277-313,315-325][API Compatibility] Update shape output format in documentation examples (#76574)
---------
Co-authored-by: SigureMo <sigure.qaq@gmail.com>
2025-11-26 10:31:46 +08:00
|
|
|
>>> x_var = paddle.uniform((2, 4, 8, 8), dtype='float32', min=-1.0, max=1.0)
|
2023-08-03 11:03:39 +08:00
|
|
|
>>> conv = nn.Conv2DTranspose(4, 6, (3, 3))
|
|
|
|
|
>>> conv_quantized = QuantizedConv2DTranspose(conv)
|
|
|
|
|
>>> y_quantized = conv_quantized(x_var)
|
|
|
|
|
>>> y_var = conv(x_var)
|
|
|
|
|
>>> print(y_var.shape)
|
[CodeStyle][Xdoctest][8,12,15,16,18-20,22-26,29-32,34-36,38-40,42,43,45-48,50,51,60-65,75,80,82,83,85-87,89-94,99-141,143,145,147-167,169-187,207-220,257,258,260-275,277-313,315-325][API Compatibility] Update shape output format in documentation examples (#76574)
---------
Co-authored-by: SigureMo <sigure.qaq@gmail.com>
2025-11-26 10:31:46 +08:00
|
|
|
paddle.Size([2, 6, 10, 10])
|
2023-08-03 11:03:39 +08:00
|
|
|
>>> print(y_quantized.shape)
|
[CodeStyle][Xdoctest][8,12,15,16,18-20,22-26,29-32,34-36,38-40,42,43,45-48,50,51,60-65,75,80,82,83,85-87,89-94,99-141,143,145,147-167,169-187,207-220,257,258,260-275,277-313,315-325][API Compatibility] Update shape output format in documentation examples (#76574)
---------
Co-authored-by: SigureMo <sigure.qaq@gmail.com>
2025-11-26 10:31:46 +08:00
|
|
|
paddle.Size([2, 6, 10, 10])
|
[Docs]fix math api en docs issue (#47448)
* fix_docx_stanh
* fix einsum api en docs issue
* fix model api en docs issue
* for codestyle
* fix_einsum.py_einsum, test=document_fix
* fix_model.py_Model, test=ducument_fix
* fix_creation.py_meshgrid, test=document_fix
* fix_linalg.py_slogdet, test=document_fix
* fix_loss.py_SoftMarginLoss_CrossEntropyLoss_NLLLoss_BCELoss, test=document_fix
* norm.py_SyncBatchNorm, test=document-fix
* norm.py_SyncBatchNorm, test=document_fix
* norm.py_SyncBatchNorm, test=document_fix
* list18-30, test=document_fix
* refix_list1-15, test=document_fix
* deletefiles, test=document_fix
* fixedapi_pre-commit, test=document_fix
* fix_list31-45, test=document_fix
* list111, test=document_fix
* some_fix, test=document_fix
* some_fix, test=document_fix
* somefix, test=document_fix
* somefix, test=document_fix
* refix, test=document_fix
* refix, test=document_fix
* refix, test=document_fix
* refix, test=document_fix
* rerfix, test=document_fix
Co-authored-by: Ligoml <limengliu@tiaozhan.com>
2022-11-22 17:31:35 +08:00
|
|
|
|
2021-08-18 14:18:23 +08:00
|
|
|
"""
|
|
|
|
|
|
2024-07-10 00:06:32 +08:00
|
|
|
weight: Tensor
|
|
|
|
|
bias: Tensor
|
|
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
def __init__(
|
|
|
|
|
self,
|
2024-07-10 00:06:32 +08:00
|
|
|
layer: Layer,
|
|
|
|
|
weight_bits: int = 8,
|
|
|
|
|
activation_bits: int = 8,
|
|
|
|
|
moving_rate: float = 0.9,
|
|
|
|
|
weight_quantize_type: _QuantType = 'abs_max',
|
|
|
|
|
activation_quantize_type: _QuantType = 'abs_max',
|
|
|
|
|
weight_pre_layer: Layer | None = None,
|
|
|
|
|
act_pre_layer: Layer | None = None,
|
|
|
|
|
weight_quant_layer: Layer | None = None,
|
|
|
|
|
act_quant_layer: Layer | None = None,
|
|
|
|
|
) -> None:
|
2021-08-18 14:18:23 +08:00
|
|
|
r"""
|
|
|
|
|
Constructor.
|
|
|
|
|
|
|
|
|
|
The arguments are the same as ImperativeQuantAware.
|
|
|
|
|
"""
|
2022-11-03 14:33:00 +08:00
|
|
|
super().__init__()
|
2021-08-18 14:18:23 +08:00
|
|
|
# For Conv2DTranspose
|
2023-03-17 01:46:48 +08:00
|
|
|
self._groups = layer._groups
|
|
|
|
|
self._stride = layer._stride
|
|
|
|
|
self._padding = layer._padding
|
|
|
|
|
self._output_padding = layer.output_padding
|
|
|
|
|
self._dilation = layer._dilation
|
|
|
|
|
self._data_format = layer._data_format
|
|
|
|
|
self.weight = layer.weight
|
|
|
|
|
self.bias = layer.bias
|
2021-08-18 14:18:23 +08:00
|
|
|
# For FakeQuant
|
|
|
|
|
self._conv2d_transpose_quant_axis = 1
|
|
|
|
|
if weight_quant_layer is not None:
|
|
|
|
|
self._fake_quant_weight = weight_quant_layer()
|
|
|
|
|
else:
|
|
|
|
|
self._fake_quant_weight = _get_fake_quant_type(
|
|
|
|
|
weight_quantize_type,
|
|
|
|
|
name=self.weight.name,
|
|
|
|
|
moving_rate=moving_rate,
|
|
|
|
|
quant_bits=weight_bits,
|
|
|
|
|
dtype=self._dtype,
|
|
|
|
|
quant_on_weight=True,
|
|
|
|
|
channel_num=self.weight.shape[
|
2022-10-23 20:01:27 +08:00
|
|
|
self._conv2d_transpose_quant_axis
|
|
|
|
|
],
|
|
|
|
|
quant_axis=self._conv2d_transpose_quant_axis,
|
|
|
|
|
)
|
2021-08-18 14:18:23 +08:00
|
|
|
if act_quant_layer is not None:
|
|
|
|
|
self._fake_quant_input = act_quant_layer()
|
|
|
|
|
else:
|
|
|
|
|
self._fake_quant_input = _get_fake_quant_type(
|
|
|
|
|
activation_quantize_type,
|
|
|
|
|
name=layer.full_name(),
|
|
|
|
|
moving_rate=moving_rate,
|
|
|
|
|
quant_bits=activation_bits,
|
|
|
|
|
dtype=self._dtype,
|
2022-10-23 20:01:27 +08:00
|
|
|
quant_on_weight=False,
|
|
|
|
|
)
|
2021-08-18 14:18:23 +08:00
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
self._act_preprocess = (
|
|
|
|
|
act_pre_layer() if act_pre_layer is not None else None
|
|
|
|
|
)
|
|
|
|
|
self._weight_preprocess = (
|
|
|
|
|
weight_pre_layer() if weight_pre_layer is not None else None
|
|
|
|
|
)
|
2021-08-18 14:18:23 +08:00
|
|
|
|
2024-07-10 00:06:32 +08:00
|
|
|
def forward(
|
|
|
|
|
self, input: Tensor, output_size: Size2 | None = None
|
|
|
|
|
) -> Tensor:
|
2021-08-18 14:18:23 +08:00
|
|
|
if self._act_preprocess is not None:
|
|
|
|
|
input = self._act_preprocess(input)
|
|
|
|
|
quant_input = self._fake_quant_input(input)
|
|
|
|
|
|
|
|
|
|
weight = self.weight
|
|
|
|
|
if self._weight_preprocess is not None:
|
|
|
|
|
weight = self._weight_preprocess(self.weight)
|
|
|
|
|
quant_weight = self._fake_quant_weight(weight)
|
|
|
|
|
|
|
|
|
|
if output_size is None:
|
|
|
|
|
output_padding = self._output_padding
|
|
|
|
|
else:
|
|
|
|
|
output_padding = 0
|
|
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
return F.conv2d_transpose(
|
|
|
|
|
quant_input,
|
|
|
|
|
quant_weight,
|
|
|
|
|
bias=self.bias,
|
|
|
|
|
padding=self._padding,
|
|
|
|
|
output_padding=output_padding,
|
|
|
|
|
stride=self._stride,
|
|
|
|
|
dilation=self._dilation,
|
|
|
|
|
groups=self._groups,
|
|
|
|
|
output_size=output_size,
|
|
|
|
|
data_format=self._data_format,
|
|
|
|
|
)
|
2021-08-18 14:18:23 +08:00
|
|
|
|
|
|
|
|
|
2022-02-22 15:07:11 +08:00
|
|
|
class QuantizedLinear(Layer):
|
2020-07-11 23:26:24 +08:00
|
|
|
"""
|
|
|
|
|
The computational logic of QuantizedLinear is the same with Linear.
|
|
|
|
|
The only difference is that its inputs are all fake quantized.
|
|
|
|
|
"""
|
|
|
|
|
|
2024-07-10 00:06:32 +08:00
|
|
|
weight: Tensor
|
|
|
|
|
bias: Tensor
|
|
|
|
|
name: str
|
|
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
def __init__(
|
|
|
|
|
self,
|
2024-07-10 00:06:32 +08:00
|
|
|
layer: Layer,
|
|
|
|
|
weight_bits: int = 8,
|
|
|
|
|
activation_bits: int = 8,
|
|
|
|
|
moving_rate: float = 0.9,
|
|
|
|
|
weight_quantize_type: _QuantType = 'abs_max',
|
|
|
|
|
activation_quantize_type: _QuantType = 'abs_max',
|
|
|
|
|
weight_pre_layer: Layer | None = None,
|
|
|
|
|
act_pre_layer: Layer | None = None,
|
|
|
|
|
weight_quant_layer: Layer | None = None,
|
|
|
|
|
act_quant_layer: Layer | None = None,
|
|
|
|
|
) -> None:
|
2022-11-03 14:33:00 +08:00
|
|
|
super().__init__()
|
2020-07-11 23:26:24 +08:00
|
|
|
# For Linear
|
2023-03-17 01:46:48 +08:00
|
|
|
self.weight = layer.weight
|
|
|
|
|
self.bias = layer.bias
|
|
|
|
|
self.name = layer.name
|
2020-07-11 23:26:24 +08:00
|
|
|
# For FakeQuant
|
2020-09-21 13:35:37 +08:00
|
|
|
self._linear_quant_axis = 1
|
2020-11-18 11:20:04 +08:00
|
|
|
|
|
|
|
|
if weight_quant_layer is not None:
|
|
|
|
|
self._fake_quant_weight = weight_quant_layer()
|
|
|
|
|
else:
|
|
|
|
|
self._fake_quant_weight = _get_fake_quant_type(
|
|
|
|
|
weight_quantize_type,
|
|
|
|
|
name=self.weight.name,
|
|
|
|
|
moving_rate=moving_rate,
|
|
|
|
|
quant_bits=weight_bits,
|
|
|
|
|
dtype=self._dtype,
|
|
|
|
|
quant_on_weight=True,
|
|
|
|
|
channel_num=self.weight.shape[self._linear_quant_axis],
|
2022-09-27 11:54:11 +08:00
|
|
|
quant_axis=self._linear_quant_axis,
|
2022-10-23 20:01:27 +08:00
|
|
|
quant_linear=True,
|
|
|
|
|
)
|
2020-11-18 11:20:04 +08:00
|
|
|
|
|
|
|
|
if act_quant_layer is not None:
|
|
|
|
|
self._fake_quant_input = act_quant_layer()
|
|
|
|
|
else:
|
|
|
|
|
self._fake_quant_input = _get_fake_quant_type(
|
|
|
|
|
activation_quantize_type,
|
|
|
|
|
name=layer.full_name(),
|
|
|
|
|
moving_rate=moving_rate,
|
|
|
|
|
quant_bits=activation_bits,
|
|
|
|
|
dtype=self._dtype,
|
2022-10-23 20:01:27 +08:00
|
|
|
quant_on_weight=False,
|
|
|
|
|
)
|
2020-11-18 11:20:04 +08:00
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
self._act_preprocess = (
|
|
|
|
|
act_pre_layer() if act_pre_layer is not None else None
|
|
|
|
|
)
|
|
|
|
|
self._weight_preprocess = (
|
|
|
|
|
weight_pre_layer() if weight_pre_layer is not None else None
|
|
|
|
|
)
|
2020-07-11 23:26:24 +08:00
|
|
|
|
2024-07-10 00:06:32 +08:00
|
|
|
def forward(self, input: Tensor) -> Tensor:
|
2020-11-18 11:20:04 +08:00
|
|
|
if self._act_preprocess is not None:
|
|
|
|
|
input = self._act_preprocess(input)
|
2020-07-11 23:26:24 +08:00
|
|
|
quant_input = self._fake_quant_input(input)
|
2020-11-18 11:20:04 +08:00
|
|
|
|
|
|
|
|
weight = self.weight
|
|
|
|
|
if self._weight_preprocess is not None:
|
|
|
|
|
weight = self._weight_preprocess(self.weight)
|
|
|
|
|
quant_weight = self._fake_quant_weight(weight)
|
|
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
out = F.linear(
|
|
|
|
|
x=quant_input, weight=quant_weight, bias=self.bias, name=self.name
|
|
|
|
|
)
|
2020-11-25 11:46:30 +08:00
|
|
|
return out
|
2020-10-14 21:43:15 +08:00
|
|
|
|
|
|
|
|
|
2022-08-12 18:37:08 +08:00
|
|
|
class QuantizedColumnParallelLinear(Layer):
|
2024-07-10 00:06:32 +08:00
|
|
|
weight: Tensor
|
|
|
|
|
bias: Tensor
|
|
|
|
|
name: str
|
|
|
|
|
is_mp: bool
|
|
|
|
|
model_parallel_group: paddle.distributed.collective.Group
|
|
|
|
|
gather_output: bool
|
|
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
def __init__(
|
|
|
|
|
self,
|
2024-07-10 00:06:32 +08:00
|
|
|
layer: Layer,
|
|
|
|
|
weight_bits: int = 8,
|
|
|
|
|
activation_bits: int = 8,
|
|
|
|
|
moving_rate: float = 0.9,
|
|
|
|
|
weight_quantize_type: _QuantType = 'abs_max',
|
|
|
|
|
activation_quantize_type: _QuantType = 'abs_max',
|
|
|
|
|
weight_pre_layer: Layer | None = None,
|
|
|
|
|
act_pre_layer: Layer | None = None,
|
2025-09-13 15:02:00 +08:00
|
|
|
weight_quant_layer: None = None,
|
|
|
|
|
act_quant_layer: None = None,
|
2024-07-10 00:06:32 +08:00
|
|
|
) -> None:
|
2022-11-03 14:33:00 +08:00
|
|
|
super().__init__()
|
2022-08-12 18:37:08 +08:00
|
|
|
'''
|
2022-09-14 21:56:19 +08:00
|
|
|
|
2022-08-12 18:37:08 +08:00
|
|
|
'''
|
2025-08-21 01:51:31 +08:00
|
|
|
assert weight_quant_layer is None, (
|
|
|
|
|
"When quantizing ColumnParallelLinear, weight_quant_layer should be None."
|
|
|
|
|
)
|
|
|
|
|
assert act_quant_layer is None, (
|
|
|
|
|
"When quantizing ColumnParallelLinear, act_quant_layer should be None."
|
|
|
|
|
)
|
2022-08-12 18:37:08 +08:00
|
|
|
|
2023-03-17 01:46:48 +08:00
|
|
|
self.weight = layer.weight
|
|
|
|
|
self.bias = layer.bias
|
|
|
|
|
self.name = layer._name
|
2022-08-12 18:37:08 +08:00
|
|
|
# For FakeQuant
|
|
|
|
|
self._linear_quant_axis = 1
|
|
|
|
|
|
2023-03-17 01:46:48 +08:00
|
|
|
self.is_mp = layer.is_mp
|
|
|
|
|
self.model_parallel_group = layer.model_parallel_group
|
|
|
|
|
self.gather_output = layer.gather_output
|
2022-08-12 18:37:08 +08:00
|
|
|
|
|
|
|
|
self._fake_quant_weight = _get_fake_quant_type(
|
|
|
|
|
weight_quantize_type,
|
|
|
|
|
name=self.weight.name,
|
|
|
|
|
moving_rate=moving_rate,
|
|
|
|
|
quant_bits=weight_bits,
|
|
|
|
|
dtype=self._dtype,
|
|
|
|
|
quant_on_weight=True,
|
|
|
|
|
channel_num=self.weight.shape[self._linear_quant_axis],
|
|
|
|
|
quant_axis=self._linear_quant_axis,
|
2024-08-08 10:10:45 +08:00
|
|
|
reduce_type=(
|
|
|
|
|
'max' if paddle.distributed.get_world_size() > 1 else None
|
|
|
|
|
),
|
2022-10-23 20:01:27 +08:00
|
|
|
)
|
2022-08-12 18:37:08 +08:00
|
|
|
|
|
|
|
|
self._fake_quant_input = _get_fake_quant_type(
|
|
|
|
|
activation_quantize_type,
|
|
|
|
|
name=layer.full_name(),
|
|
|
|
|
moving_rate=moving_rate,
|
|
|
|
|
quant_bits=activation_bits,
|
|
|
|
|
dtype=self._dtype,
|
|
|
|
|
quant_on_weight=False,
|
2022-10-23 20:01:27 +08:00
|
|
|
reduce_type=None,
|
|
|
|
|
)
|
2022-08-12 18:37:08 +08:00
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
self._act_preprocess = (
|
|
|
|
|
act_pre_layer() if act_pre_layer is not None else None
|
|
|
|
|
)
|
|
|
|
|
self._weight_preprocess = (
|
|
|
|
|
weight_pre_layer() if weight_pre_layer is not None else None
|
|
|
|
|
)
|
2022-08-12 18:37:08 +08:00
|
|
|
|
2024-07-10 00:06:32 +08:00
|
|
|
def forward(self, input: Tensor) -> Tensor:
|
2022-08-12 18:37:08 +08:00
|
|
|
if self.is_mp:
|
|
|
|
|
input_parallel = paddle.distributed.collective._c_identity(
|
2022-10-23 20:01:27 +08:00
|
|
|
input, group=self.model_parallel_group
|
|
|
|
|
)
|
2022-08-12 18:37:08 +08:00
|
|
|
else:
|
|
|
|
|
input_parallel = input
|
|
|
|
|
|
|
|
|
|
if self._act_preprocess is not None:
|
|
|
|
|
input_parallel = self._act_preprocess(input_parallel)
|
|
|
|
|
quant_input = self._fake_quant_input(input_parallel)
|
|
|
|
|
|
|
|
|
|
weight = self.weight
|
|
|
|
|
if self._weight_preprocess is not None:
|
|
|
|
|
weight = self._weight_preprocess(self.weight)
|
|
|
|
|
quant_weight = self._fake_quant_weight(weight)
|
|
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
output_parallel = F.linear(
|
|
|
|
|
x=quant_input, weight=quant_weight, bias=self.bias, name=self.name
|
|
|
|
|
)
|
2022-08-12 18:37:08 +08:00
|
|
|
|
|
|
|
|
if self.gather_output and self.is_mp:
|
|
|
|
|
output = paddle.distributed.collective._c_concat(
|
2022-10-23 20:01:27 +08:00
|
|
|
output_parallel, group=self.model_parallel_group
|
|
|
|
|
)
|
2022-08-12 18:37:08 +08:00
|
|
|
else:
|
|
|
|
|
output = output_parallel
|
|
|
|
|
return output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class QuantizedRowParallelLinear(Layer):
|
2024-07-10 00:06:32 +08:00
|
|
|
weight: Tensor
|
|
|
|
|
bias: Tensor
|
|
|
|
|
name: str
|
|
|
|
|
is_mp: bool
|
|
|
|
|
model_parallel_group: paddle.distributed.collective.Group
|
|
|
|
|
gather_output: bool
|
|
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
def __init__(
|
|
|
|
|
self,
|
2024-07-10 00:06:32 +08:00
|
|
|
layer: Layer,
|
|
|
|
|
weight_bits: int = 8,
|
|
|
|
|
activation_bits: int = 8,
|
|
|
|
|
moving_rate: float = 0.9,
|
|
|
|
|
weight_quantize_type: _QuantType = 'abs_max',
|
|
|
|
|
activation_quantize_type: _QuantType = 'abs_max',
|
|
|
|
|
weight_pre_layer: Layer | None = None,
|
|
|
|
|
act_pre_layer: Layer | None = None,
|
2025-09-13 15:02:00 +08:00
|
|
|
weight_quant_layer: None = None,
|
|
|
|
|
act_quant_layer: None = None,
|
2024-07-10 00:06:32 +08:00
|
|
|
) -> None:
|
2022-11-03 14:33:00 +08:00
|
|
|
super().__init__()
|
2025-08-21 01:51:31 +08:00
|
|
|
assert weight_quant_layer is None, (
|
|
|
|
|
"When quantizing RowParallelLinear, weight_quant_layer cannot defined by yourself."
|
|
|
|
|
)
|
|
|
|
|
assert act_quant_layer is None, (
|
|
|
|
|
"When quantizing RowParallelLinear, act_quant_layer cannot defined by yourself."
|
|
|
|
|
)
|
2022-08-12 18:37:08 +08:00
|
|
|
|
|
|
|
|
# For Linear
|
2023-03-17 01:46:48 +08:00
|
|
|
self.weight = layer.weight
|
|
|
|
|
self.bias = layer.bias
|
|
|
|
|
self.name = layer._name
|
2022-08-12 18:37:08 +08:00
|
|
|
# For FakeQuant
|
|
|
|
|
self._linear_quant_axis = 1
|
|
|
|
|
|
2023-03-17 01:46:48 +08:00
|
|
|
self.input_is_parallel = layer.input_is_parallel
|
|
|
|
|
self.is_mp = layer.is_mp
|
|
|
|
|
self.model_parallel_group = layer.model_parallel_group
|
2022-08-12 18:37:08 +08:00
|
|
|
|
|
|
|
|
self._fake_quant_weight = _get_fake_quant_type(
|
|
|
|
|
weight_quantize_type,
|
|
|
|
|
name=self.weight.name,
|
|
|
|
|
moving_rate=moving_rate,
|
|
|
|
|
quant_bits=weight_bits,
|
|
|
|
|
dtype=self._dtype,
|
|
|
|
|
quant_on_weight=True,
|
|
|
|
|
channel_num=self.weight.shape[self._linear_quant_axis],
|
|
|
|
|
quant_axis=self._linear_quant_axis,
|
2024-08-08 10:10:45 +08:00
|
|
|
reduce_type=(
|
|
|
|
|
'max' if paddle.distributed.get_world_size() > 1 else None
|
|
|
|
|
),
|
2022-10-23 20:01:27 +08:00
|
|
|
)
|
2022-08-12 18:37:08 +08:00
|
|
|
|
|
|
|
|
self._fake_quant_input = _get_fake_quant_type(
|
|
|
|
|
activation_quantize_type,
|
|
|
|
|
name=layer.full_name(),
|
|
|
|
|
moving_rate=moving_rate,
|
|
|
|
|
quant_bits=activation_bits,
|
|
|
|
|
dtype=self._dtype,
|
|
|
|
|
quant_on_weight=False,
|
2024-08-08 10:10:45 +08:00
|
|
|
reduce_type=(
|
|
|
|
|
'max' if paddle.distributed.get_world_size() > 1 else None
|
|
|
|
|
),
|
2022-10-23 20:01:27 +08:00
|
|
|
)
|
2022-08-12 18:37:08 +08:00
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
self._act_preprocess = (
|
|
|
|
|
act_pre_layer() if act_pre_layer is not None else None
|
|
|
|
|
)
|
|
|
|
|
self._weight_preprocess = (
|
|
|
|
|
weight_pre_layer() if weight_pre_layer is not None else None
|
|
|
|
|
)
|
2022-08-12 18:37:08 +08:00
|
|
|
|
2024-07-10 00:06:32 +08:00
|
|
|
def forward(self, input: Tensor) -> Tensor:
|
2022-08-12 18:37:08 +08:00
|
|
|
if self.input_is_parallel or (not self.is_mp):
|
|
|
|
|
input_parallel = input
|
|
|
|
|
else:
|
|
|
|
|
# split last dim
|
|
|
|
|
input_parallel = paddle.distributed.collective._c_split(
|
2022-10-23 20:01:27 +08:00
|
|
|
input, group=self.model_parallel_group
|
|
|
|
|
)
|
2022-08-12 18:37:08 +08:00
|
|
|
|
|
|
|
|
if self._act_preprocess is not None:
|
|
|
|
|
input_parallel = self._act_preprocess(input_parallel)
|
|
|
|
|
quant_input = self._fake_quant_input(input_parallel)
|
|
|
|
|
|
|
|
|
|
weight = self.weight
|
|
|
|
|
if self._weight_preprocess is not None:
|
|
|
|
|
weight = self._weight_preprocess(self.weight)
|
|
|
|
|
quant_weight = self._fake_quant_weight(weight)
|
|
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
output_parallel = F.linear(
|
|
|
|
|
x=quant_input, weight=quant_weight, name=self.name
|
|
|
|
|
)
|
2022-08-12 18:37:08 +08:00
|
|
|
if self.is_mp:
|
|
|
|
|
output_ = paddle.distributed.collective._mp_allreduce(
|
|
|
|
|
output_parallel,
|
|
|
|
|
group=self.model_parallel_group,
|
|
|
|
|
use_calc_stream=True,
|
2022-10-23 20:01:27 +08:00
|
|
|
use_model_parallel=True,
|
|
|
|
|
)
|
2022-08-12 18:37:08 +08:00
|
|
|
else:
|
|
|
|
|
output_ = output_parallel
|
|
|
|
|
output = output_ + self.bias if self.bias is not None else output_
|
|
|
|
|
return output
|
|
|
|
|
|
|
|
|
|
|
2022-12-08 19:06:09 +08:00
|
|
|
class QuantizedMatmul(Layer):
|
|
|
|
|
"""
|
|
|
|
|
The computational logic of QuantizedMatmul is the same with Matmul.
|
|
|
|
|
The only difference is that its inputs are all fake quantized.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
|
self,
|
2024-07-10 00:06:32 +08:00
|
|
|
layer: Layer | None = None,
|
|
|
|
|
weight_bits: int = 8,
|
|
|
|
|
activation_bits: int = 8,
|
|
|
|
|
moving_rate: float = 0.9,
|
|
|
|
|
weight_quantize_type: _QuantType = 'abs_max',
|
|
|
|
|
activation_quantize_type: _QuantType = 'abs_max',
|
|
|
|
|
weight_pre_layer: Layer | None = None,
|
|
|
|
|
act_pre_layer: Layer | None = None,
|
|
|
|
|
weight_quant_layer: Layer | None = None,
|
|
|
|
|
act_quant_layer: Layer | None = None,
|
|
|
|
|
) -> None:
|
2022-12-08 19:06:09 +08:00
|
|
|
super().__init__()
|
|
|
|
|
|
|
|
|
|
# For FakeQuant
|
|
|
|
|
if act_quant_layer is not None:
|
|
|
|
|
self._fake_quant_x = act_quant_layer()
|
|
|
|
|
self._fake_quant_y = act_quant_layer()
|
|
|
|
|
else:
|
|
|
|
|
self._fake_quant_x = _get_fake_quant_type(
|
|
|
|
|
activation_quantize_type,
|
|
|
|
|
moving_rate=moving_rate,
|
|
|
|
|
quant_bits=activation_bits,
|
|
|
|
|
quant_on_weight=False,
|
|
|
|
|
)
|
|
|
|
|
self._fake_quant_y = _get_fake_quant_type(
|
|
|
|
|
activation_quantize_type,
|
|
|
|
|
moving_rate=moving_rate,
|
|
|
|
|
quant_bits=activation_bits,
|
|
|
|
|
quant_on_weight=False,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self._act_preprocess_x = (
|
|
|
|
|
act_pre_layer() if act_pre_layer is not None else None
|
|
|
|
|
)
|
|
|
|
|
self._act_preprocess_y = (
|
|
|
|
|
act_pre_layer() if act_pre_layer is not None else None
|
|
|
|
|
)
|
|
|
|
|
|
2024-07-10 00:06:32 +08:00
|
|
|
def forward(
|
|
|
|
|
self,
|
|
|
|
|
x: Tensor,
|
|
|
|
|
y: Tensor,
|
|
|
|
|
transpose_x: bool = False,
|
|
|
|
|
transpose_y: bool = False,
|
|
|
|
|
name: str | None = None,
|
|
|
|
|
) -> Tensor:
|
2022-12-08 19:06:09 +08:00
|
|
|
if self._act_preprocess_x is not None:
|
|
|
|
|
x = self._act_preprocess_x(x)
|
|
|
|
|
quant_x = self._fake_quant_x(x)
|
|
|
|
|
|
|
|
|
|
if self._act_preprocess_y is not None:
|
|
|
|
|
y = self._act_preprocess_y(y)
|
|
|
|
|
quant_y = self._fake_quant_y(y)
|
|
|
|
|
|
|
|
|
|
out = paddle.matmul(quant_x, quant_y, transpose_x, transpose_y, name)
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
|
2022-02-22 15:07:11 +08:00
|
|
|
class MAOutputScaleLayer(Layer):
|
2021-06-09 10:23:34 +08:00
|
|
|
"""
|
|
|
|
|
Add MovingAverageMaxScale layer to the behind of the input layer.
|
2021-07-05 10:26:24 +08:00
|
|
|
Calculate the scale (moving average abs max) for the output of the input layer.
|
2021-06-09 10:23:34 +08:00
|
|
|
"""
|
|
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
def __init__(
|
|
|
|
|
self,
|
2024-07-10 00:06:32 +08:00
|
|
|
layer: Layer | None = None,
|
|
|
|
|
moving_rate: float = 0.9,
|
|
|
|
|
name: str | None = None,
|
|
|
|
|
dtype: DTypeLike = 'float32',
|
|
|
|
|
reduce_type: Literal['max'] | None = None,
|
|
|
|
|
) -> None:
|
2021-03-26 19:15:56 +08:00
|
|
|
r"""
|
2021-06-09 10:23:34 +08:00
|
|
|
Construct
|
2021-03-26 19:15:56 +08:00
|
|
|
"""
|
2022-11-03 14:33:00 +08:00
|
|
|
super().__init__()
|
2021-03-26 19:15:56 +08:00
|
|
|
self._layer = layer
|
2021-06-09 10:23:34 +08:00
|
|
|
if name is None:
|
|
|
|
|
name = layer.full_name()
|
2022-10-23 20:01:27 +08:00
|
|
|
self._ma_output_scale = MovingAverageAbsMaxScale(
|
|
|
|
|
name, moving_rate, dtype, reduce_type
|
|
|
|
|
)
|
2021-06-09 10:23:34 +08:00
|
|
|
|
2024-07-10 00:06:32 +08:00
|
|
|
def forward(self, *inputs: Any, **kwargs: Any) -> Tensor:
|
2021-06-09 10:23:34 +08:00
|
|
|
out = self._layer(*inputs, **kwargs)
|
|
|
|
|
# TODO (jc): support the ops of several outputs
|
2023-03-28 14:40:48 +08:00
|
|
|
if isinstance(out, (list, tuple, dict)):
|
2021-06-09 10:23:34 +08:00
|
|
|
return out
|
|
|
|
|
else:
|
|
|
|
|
return self._ma_output_scale(out)
|
2021-03-26 19:15:56 +08:00
|
|
|
|
2021-06-09 10:23:34 +08:00
|
|
|
|
2022-02-22 15:07:11 +08:00
|
|
|
class FakeQuantMAOutputScaleLayer(Layer):
|
2021-07-05 10:26:24 +08:00
|
|
|
"""
|
|
|
|
|
Add FakeQuantMovingAverageAbsMax layer to the behind of the input layer.
|
|
|
|
|
"""
|
|
|
|
|
|
2022-10-23 20:01:27 +08:00
|
|
|
def __init__(
|
|
|
|
|
self,
|
2024-07-10 00:06:32 +08:00
|
|
|
layer: Layer,
|
|
|
|
|
weight_bits: int = 8,
|
|
|
|
|
activation_bits: int = 8,
|
|
|
|
|
moving_rate: float = 0.9,
|
|
|
|
|
name: str | None = None,
|
|
|
|
|
reduce_type: Literal['max'] | None = None,
|
|
|
|
|
*args: Never,
|
|
|
|
|
**kwargs: Never,
|
|
|
|
|
) -> None:
|
2022-11-03 14:33:00 +08:00
|
|
|
super().__init__()
|
2021-06-09 10:23:34 +08:00
|
|
|
self._layer = layer
|
|
|
|
|
self._fake_quant_output = _get_fake_quant_type(
|
|
|
|
|
'moving_average_abs_max',
|
|
|
|
|
name=layer.full_name() if name is None else name,
|
|
|
|
|
moving_rate=moving_rate,
|
|
|
|
|
quant_bits=activation_bits,
|
|
|
|
|
dtype=self._dtype,
|
2022-08-12 18:37:08 +08:00
|
|
|
quant_on_weight=False,
|
2022-10-23 20:01:27 +08:00
|
|
|
reduce_type=reduce_type,
|
|
|
|
|
)
|
2021-06-09 10:23:34 +08:00
|
|
|
|
2024-07-10 00:06:32 +08:00
|
|
|
def forward(self, *inputs: Any, **kwargs: Any) -> Tensor:
|
2021-06-09 10:23:34 +08:00
|
|
|
out = self._layer(*inputs, **kwargs)
|
|
|
|
|
# TODO (jc): support the ops of several outputs
|
2023-03-28 14:40:48 +08:00
|
|
|
if (isinstance(out, (list, tuple))) and len(out) > 1:
|
2021-06-09 10:23:34 +08:00
|
|
|
return out
|
|
|
|
|
else:
|
|
|
|
|
return self._fake_quant_output(out)
|
2021-07-05 10:26:24 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def _get_fake_quant_type(quant_type, **kwargs):
|
|
|
|
|
call_args = {
|
|
|
|
|
"name": kwargs.get("name", None),
|
|
|
|
|
"quant_bits": kwargs.get("quant_bits", 8),
|
2022-08-12 18:37:08 +08:00
|
|
|
"dtype": kwargs.get("dtype", "float32"),
|
2022-10-23 20:01:27 +08:00
|
|
|
"reduce_type": kwargs.get("reduce_type", None),
|
2021-07-05 10:26:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if quant_type == 'abs_max':
|
|
|
|
|
call_args["quant_on_weight"] = kwargs.get("quant_on_weight", False)
|
|
|
|
|
elif quant_type == 'moving_average_abs_max':
|
|
|
|
|
call_args["moving_rate"] = kwargs.get("moving_rate", 0.9)
|
|
|
|
|
elif quant_type == 'channel_wise_abs_max':
|
|
|
|
|
call_args["quant_on_weight"] = kwargs.get("quant_on_weight", False)
|
|
|
|
|
call_args["channel_num"] = kwargs.get("channel_num", None)
|
|
|
|
|
call_args["quant_axis"] = kwargs.get("quant_axis", 0)
|
|
|
|
|
assert call_args["channel_num"] is not None, (
|
|
|
|
|
"You need to input channel_num"
|
2022-10-23 20:01:27 +08:00
|
|
|
"when you use channel_wise_abs_max strategy."
|
|
|
|
|
)
|
2022-09-27 11:54:11 +08:00
|
|
|
elif quant_type == 'lsq_weight':
|
2024-02-19 16:36:13 +08:00
|
|
|
call_args["all_positive"] = kwargs.get("all_positive", False)
|
2022-09-27 11:54:11 +08:00
|
|
|
call_args["per_channel"] = False
|
|
|
|
|
call_args["channel_num"] = 1
|
|
|
|
|
call_args["quant_linear"] = kwargs.get("quant_linear", False)
|
|
|
|
|
elif quant_type == 'channel_wise_lsq_weight':
|
|
|
|
|
quant_type = 'lsq_weight'
|
2024-02-19 16:36:13 +08:00
|
|
|
call_args["all_positive"] = kwargs.get("all_positive", False)
|
2022-09-27 11:54:11 +08:00
|
|
|
call_args["per_channel"] = True
|
|
|
|
|
call_args["channel_num"] = kwargs.get("channel_num", None)
|
|
|
|
|
call_args["quant_linear"] = kwargs.get("quant_linear", False)
|
|
|
|
|
assert call_args["channel_num"] is not None, (
|
|
|
|
|
"You need to input channel_num"
|
2022-10-23 20:01:27 +08:00
|
|
|
"when you use channel_wise_abs_max strategy."
|
|
|
|
|
)
|
2022-09-27 11:54:11 +08:00
|
|
|
elif quant_type == 'lsq_act':
|
2024-02-19 16:36:13 +08:00
|
|
|
call_args["all_positive"] = kwargs.get("all_positive", False)
|
2022-09-27 11:54:11 +08:00
|
|
|
call_args["symmetric"] = kwargs.get("symmetric", True)
|
2021-07-05 10:26:24 +08:00
|
|
|
fake_quant_map = {
|
|
|
|
|
'abs_max': FakeQuantAbsMax,
|
|
|
|
|
'moving_average_abs_max': FakeQuantMovingAverageAbsMax,
|
2022-09-27 11:54:11 +08:00
|
|
|
'channel_wise_abs_max': FakeQuantChannelWiseAbsMax,
|
|
|
|
|
'lsq_weight': FakeQuantWeightLSQPlus,
|
2022-10-23 20:01:27 +08:00
|
|
|
'lsq_act': FakeQuantActLSQPlus,
|
2021-07-05 10:26:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return fake_quant_map[quant_type](**call_args)
|