#!/usr/bin/env python3

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# cd python
# python packaging/classic/setup.py sdist

# cd python/packaging/classic
# python setup.py sdist

import sys
from setuptools import setup
import os
from shutil import copyfile, move
import glob
from pathlib import Path

if (
    # When we package, the parent directory 'client' dir
    # (as we pip install -e python/packaging/client)
    os.getcwd() == str(Path(__file__).parent.absolute())
    and str(Path(__file__).parent.name) == "client"
):
    # For:
    # - pip install -e python/packaging/client
    #     It moves the current working directory to 'client'
    # - cd python/packaging/client; python setup.py sdist
    #
    # For:
    # - python packaging/client/setup.py sdist, it does not
    #     execute this branch.
    #
    # Move to spark/python
    os.chdir(Path(__file__).parent.parent.parent.absolute())

try:
    exec(open("pyspark/version.py").read())
except IOError:
    print(
        "Failed to load PySpark version file for packaging. You must be in Spark's python dir.",
        file=sys.stderr,
    )
    sys.exit(-1)
VERSION = __version__  # noqa

# Check and see if we are under the spark path in which case we need to build the symlink farm.
# This is important because we only want to build the symlink farm while under Spark otherwise we
# want to use the symlink farm. And if the symlink farm exists under while under Spark (e.g. a
# partially built sdist) we should error and have the user sort it out.
in_spark = os.path.isfile("../core/src/main/scala/org/apache/spark/SparkContext.scala") or (
    os.path.isfile("../RELEASE") and len(glob.glob("../jars/spark*core*.jar")) == 1
)

test_packages = []
if "SPARK_TESTING" in os.environ:
    test_packages = [
        "pyspark.errors.tests.connect",
        "pyspark.tests",  # for Memory profiler parity tests
        "pyspark.resource.tests",
        "pyspark.sql.tests",
        "pyspark.sql.tests.arrow",
        "pyspark.sql.tests.connect",
        "pyspark.sql.tests.connect.arrow",
        "pyspark.sql.tests.connect.streaming",
        "pyspark.sql.tests.connect.client",
        "pyspark.sql.tests.connect.pandas",
        "pyspark.sql.tests.connect.shell",
        "pyspark.sql.tests.pandas",
        "pyspark.sql.tests.pandas.helper",
        "pyspark.sql.tests.plot",
        "pyspark.sql.tests.streaming",
        "pyspark.ml.tests",
        "pyspark.ml.tests.connect",
        "pyspark.pandas.tests",
        "pyspark.pandas.tests.computation",
        "pyspark.pandas.tests.data_type_ops",
        "pyspark.pandas.tests.diff_frames_ops",
        "pyspark.pandas.tests.frame",
        "pyspark.pandas.tests.groupby",
        "pyspark.pandas.tests.indexes",
        "pyspark.pandas.tests.io",
        "pyspark.pandas.tests.plot",
        "pyspark.pandas.tests.resample",
        "pyspark.pandas.tests.reshape",
        "pyspark.pandas.tests.series",
        "pyspark.pandas.tests.window",
        "pyspark.pandas.tests.connect",
        "pyspark.pandas.tests.connect.computation",
        "pyspark.pandas.tests.connect.data_type_ops",
        "pyspark.pandas.tests.connect.diff_frames_ops",
        "pyspark.pandas.tests.connect.frame",
        "pyspark.pandas.tests.connect.groupby",
        "pyspark.pandas.tests.connect.indexes",
        "pyspark.pandas.tests.connect.io",
        "pyspark.pandas.tests.connect.plot",
        "pyspark.pandas.tests.connect.resample",
        "pyspark.pandas.tests.connect.reshape",
        "pyspark.pandas.tests.connect.series",
        "pyspark.pandas.tests.connect.window",
        "pyspark.pipelines.tests",
        "pyspark.logger.tests",
        "pyspark.logger.tests.connect",
    ]

try:
    if in_spark:
        # !!HACK ALTERT!!
        # 1. `setup.py` has to be located with the same directory with the package.
        #    Therefore, we copy the current file, and place it at `spark/python` directory.
        #    After that, we remove it in the end.
        # 2. Here it renames `lib` to `lib.back` so MANIFEST.in does not pick `py4j` up.
        #    We rename it back in the end.
        move("lib", "lib.back")
        copyfile("packaging/client/setup.py", "setup.py")
        copyfile("packaging/client/setup.cfg", "setup.cfg")

    # If you are changing the versions here, please also change ./python/pyspark/sql/pandas/utils.py
    # For Arrow, you should also check ./pom.xml and ensure there are no breaking changes in the
    # binary format protocol with the Java version, see ARROW_HOME/format/* for specifications.
    # Also don't forget to update python/docs/source/getting_started/install.rst,
    # python/packaging/classic/setup.py, and python/packaging/connect/setup.py
    _minimum_pandas_version = "2.2.0"
    _minimum_numpy_version = "1.21"
    _minimum_pyarrow_version = "18.0.0"
    _minimum_grpc_version = "1.76.0"
    _minimum_googleapis_common_protos_version = "1.71.0"
    _minimum_pyyaml_version = "3.11"
    _minimum_zstandard_version = "0.25.0"

    with open("README.md") as f:
        long_description = f.read()

    connect_packages = [
        "pyspark",
        "pyspark.cloudpickle",
        "pyspark.mllib",
        "pyspark.mllib.linalg",
        "pyspark.mllib.stat",
        "pyspark.ml",
        "pyspark.ml.connect",
        "pyspark.ml.linalg",
        "pyspark.ml.param",
        "pyspark.ml.torch",
        "pyspark.ml.deepspeed",
        "pyspark.sql",
        "pyspark.sql.avro",
        "pyspark.sql.connect",
        "pyspark.sql.connect.avro",
        "pyspark.sql.connect.client",
        "pyspark.sql.connect.functions",
        "pyspark.sql.connect.proto",
        "pyspark.sql.connect.protobuf",
        "pyspark.sql.connect.resource",
        "pyspark.sql.connect.shell",
        "pyspark.sql.connect.streaming",
        "pyspark.sql.connect.streaming.worker",
        "pyspark.sql.functions",
        "pyspark.sql.pandas",
        "pyspark.sql.plot",
        "pyspark.sql.protobuf",
        "pyspark.sql.streaming",
        "pyspark.sql.streaming.proto",
        "pyspark.sql.worker",
        "pyspark.streaming",
        "pyspark.pandas",
        "pyspark.pandas.data_type_ops",
        "pyspark.pandas.indexes",
        "pyspark.pandas.missing",
        "pyspark.pandas.plot",
        "pyspark.pandas.spark",
        "pyspark.pandas.typedef",
        "pyspark.pandas.usage_logging",
        "pyspark.pipelines",
        "pyspark.testing",
        "pyspark.resource",
        "pyspark.errors",
        "pyspark.errors.exceptions",
        "pyspark.logger",
    ]

    setup(
        name="pyspark-client",
        version=VERSION,
        description="Python Spark Connect client for Apache Spark",
        long_description=long_description,
        long_description_content_type="text/markdown",
        author="Spark Developers",
        author_email="dev@spark.apache.org",
        url="https://github.com/apache/spark/tree/master/python",
        packages=connect_packages + test_packages,
        include_package_data=True,
        license="Apache-2.0",
        # Don't forget to update python/docs/source/getting_started/install.rst
        # if you're updating the versions or dependencies.
        install_requires=[
            "pandas>=%s" % _minimum_pandas_version,
            "pyarrow>=%s" % _minimum_pyarrow_version,
            "grpcio>=%s" % _minimum_grpc_version,
            "grpcio-status>=%s" % _minimum_grpc_version,
            "googleapis-common-protos>=%s" % _minimum_googleapis_common_protos_version,
            "zstandard>=%s" % _minimum_zstandard_version,
            "numpy>=%s" % _minimum_numpy_version,
            "pyyaml>=%s" % _minimum_pyyaml_version,
        ],
        python_requires=">=3.10",
        classifiers=[
            "Development Status :: 5 - Production/Stable",
            "Programming Language :: Python :: 3.10",
            "Programming Language :: Python :: 3.11",
            "Programming Language :: Python :: 3.12",
            "Programming Language :: Python :: 3.13",
            "Programming Language :: Python :: 3.14",
            "Programming Language :: Python :: Implementation :: CPython",
            "Typing :: Typed",
        ],
    )
finally:
    if in_spark:
        move("lib.back", "lib")
        os.remove("setup.py")
        os.remove("setup.cfg")