#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from abc import ABC, abstractmethod
from dataclasses import dataclass


class ReadLimit:
    """
    Specifies limits on how much data to read from a streaming source when
    determining the latest offset.

    As of Spark 4.2.0, only built-in implementations of :class:`ReadLimit` are supported. Please
    refer to the following classes for the supported types:

    - :class:`ReadAllAvailable`
    - :class:`ReadMinRows`
    - :class:`ReadMaxRows`
    - :class:`ReadMaxFiles`
    - :class:`ReadMaxBytes`
    """


@dataclass
class ReadAllAvailable(ReadLimit):
    """
    A :class:`ReadLimit` that indicates to read all available data, regardless of the given source
    options.
    """


@dataclass
class ReadMinRows(ReadLimit):
    """
    A :class:`ReadLimit` that indicates to read minimum N rows. If there is less than N rows
    available for read, the source should skip producing a new offset to read and wait until more
    data arrives.

    Note that the semantic does not work properly with Trigger.AvailableNow since the source
    may end up waiting forever for more data to arrive. It is the source's responsibility to
    handle this case properly.
    """

    min_rows: int


@dataclass
class ReadMaxRows(ReadLimit):
    """
    A :class:`ReadLimit` that indicates to read maximum N rows. The source should not read more
    than N rows when determining the latest offset.
    """

    max_rows: int


@dataclass
class ReadMaxFiles(ReadLimit):
    """
    A :class:`ReadLimit` that indicates to read maximum N files. The source should not read more
    than N files when determining the latest offset.
    """

    max_files: int


@dataclass
class ReadMaxBytes(ReadLimit):
    """
    A :class:`ReadLimit` that indicates to read maximum N bytes. The source should not read more
    than N bytes when determining the latest offset.
    """

    max_bytes: int


class SupportsTriggerAvailableNow(ABC):
    """
    A mixin interface for streaming sources that support Trigger.AvailableNow. This interface can
    be added to both :class:`DataSourceStreamReader` and :class:`SimpleDataSourceStreamReader`.
    """

    @abstractmethod
    def prepareForTriggerAvailableNow(self) -> None:
        """
        This will be called at the beginning of streaming queries with Trigger.AvailableNow, to let
        the source record the offset for the current latest data at the time (a.k.a the target
        offset for the query). The source must behave as if there is no new data coming in after
        the target offset, i.e., the source must not return an offset higher than the target offset
        when :meth:`DataSourceStreamReader.latestOffset()` is called.

        The source can extend the semantic of "current latest data" based on its own logic, but the
        extended semantic must not violate the expectation that the source will not read any data
        which is added later than the time this method has called.

        Note that it is the source's responsibility to ensure that calling
        :meth:`DataSourceStreamReader.latestOffset()` or :meth:`SimpleDataSourceStreamReader.read()`
        after calling this method will eventually reach the target offset, and finally returns the
        same offset as given start parameter, to indicate that there is no more data to read. This
        includes the case where the query is restarted and the source is asked to read from the
        offset being journaled in previous run - source should take care of exceptional cases like
        new partition has added during the restart, etc, to ensure that the query run will be
        completed at some point.
        """
        pass