# syntax=docker/dockerfile:1.4 # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # # WARNING: THIS DOCKERFILE IS NOT INTENDED FOR PRODUCTION USE OR DEPLOYMENT. # # PYTHON_LTO: Controls whether Python is built with Link-Time Optimization (LTO). # # Link-Time Optimization uses MD5 checksums during the compilation process to verify # object files and intermediate representations. In FIPS-compliant environments, MD5 # is blocked as it's not an approved cryptographic algorithm (see FIPS 140-2/140-3). # This can cause Python builds with LTO to fail when FIPS mode is enabled. # # When building FIPS-compliant images, set this to "false" to disable LTO: # docker build --build-arg PYTHON_LTO="false" ... # # Default: "true" (LTO enabled for better performance) # # Related: https://github.com/apache/airflow/issues/58337 ARG PYTHON_LTO="true" ARG BASE_IMAGE="debian:bookworm-slim" ############################################################################################## # This is the script image where we keep all inlined bash scripts needed in other segments # We use BASE_IMAGE to make sure that the scripts are different for different platforms. ############################################################################################## FROM ${BASE_IMAGE} as scripts ############################################################################################## # Please DO NOT modify the inlined scripts manually. The content of those files will be # replaced by prek automatically from the "scripts/docker/" folder. # This is done in order to avoid problems with caching and file permissions and in order to # make the PROD Dockerfile standalone ############################################################################################## # The content below is automatically copied from scripts/docker/install_os_dependencies.sh COPY <<"EOF" /install_os_dependencies.sh #!/usr/bin/env bash set -euo pipefail if [[ "$#" != 1 ]]; then echo echo "ERROR! There should be 'runtime', 'ci' or 'dev' parameter passed as argument.". echo exit 1 fi AIRFLOW_PYTHON_VERSION=${AIRFLOW_PYTHON_VERSION:-3.10.18} PYTHON_LTO=${PYTHON_LTO:-true} GOLANG_MAJOR_MINOR_VERSION=${GOLANG_MAJOR_MINOR_VERSION:-1.24.4} COSIGN_VERSION=${COSIGN_VERSION:-3.0.5} if [[ "${1}" == "runtime" ]]; then INSTALLATION_TYPE="RUNTIME" elif [[ "${1}" == "dev" ]]; then INSTALLATION_TYPE="DEV" elif [[ "${1}" == "ci" ]]; then INSTALLATION_TYPE="CI" else echo echo "ERROR! Wrong argument. Passed ${1} and it should be one of 'runtime', 'ci' or 'dev'.". echo exit 1 fi function get_dev_apt_deps() { if [[ "${DEV_APT_DEPS=}" == "" ]]; then DEV_APT_DEPS="\ apt-transport-https \ apt-utils \ build-essential \ dirmngr \ freetds-bin \ freetds-dev \ git \ graphviz \ graphviz-dev \ krb5-user \ lcov \ ldap-utils \ libbluetooth-dev \ libbz2-dev \ libc6-dev \ libdb-dev \ libev-dev \ libev4 \ libffi-dev \ libgdbm-compat-dev \ libgdbm-dev \ libgeos-dev \ libkrb5-dev \ libldap2-dev \ libleveldb-dev \ libleveldb1d \ liblzma-dev \ libncurses5-dev \ libreadline6-dev \ libsasl2-2 \ libsasl2-dev \ libsasl2-modules \ libsqlite3-dev \ libssl-dev \ libxmlsec1 \ libxmlsec1-dev \ libzstd-dev \ locales \ lsb-release \ lzma \ lzma-dev \ openssh-client \ openssl \ pkg-config \ pkgconf \ sasl2-bin \ sqlite3 \ sudo \ tk-dev \ unixodbc \ unixodbc-dev \ uuid-dev \ wget \ xz-utils \ zlib1g-dev \ " export DEV_APT_DEPS fi } function get_runtime_apt_deps() { local debian_version local debian_version_apt_deps # Get debian version without installing lsb_release # shellcheck disable=SC1091 debian_version=$(. /etc/os-release; printf '%s\n' "$VERSION_CODENAME";) echo echo "DEBIAN CODENAME: ${debian_version}" echo debian_version_apt_deps="\ libffi8 \ libldap-2.5-0 \ libssl3 \ netcat-openbsd\ " echo echo "APPLIED INSTALLATION CONFIGURATION FOR DEBIAN VERSION: ${debian_version}" echo if [[ "${RUNTIME_APT_DEPS=}" == "" ]]; then RUNTIME_APT_DEPS="\ ${debian_version_apt_deps} \ apt-transport-https \ apt-utils \ curl \ dumb-init \ freetds-bin \ git \ gnupg \ iputils-ping \ krb5-user \ ldap-utils \ libev4 \ libgeos-dev \ libsasl2-2 \ libsasl2-modules \ libxmlsec1 \ locales \ lsb-release \ openssh-client \ rsync \ sasl2-bin \ sqlite3 \ sudo \ unixodbc \ wget\ " export RUNTIME_APT_DEPS fi } function install_docker_cli() { apt-get update apt-get install ca-certificates curl install -m 0755 -d /etc/apt/keyrings curl -fsSL https://download.docker.com/linux/debian/gpg -o /etc/apt/keyrings/docker.asc chmod a+r /etc/apt/keyrings/docker.asc # shellcheck disable=SC1091 echo \ "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/debian \ $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ tee /etc/apt/sources.list.d/docker.list > /dev/null apt-get update apt-get install -y --no-install-recommends docker-ce-cli } function install_debian_dev_dependencies() { apt-get update apt-get install -yqq --no-install-recommends apt-utils >/dev/null 2>&1 apt-get install -y --no-install-recommends wget curl gnupg2 lsb-release ca-certificates # shellcheck disable=SC2086 export ${ADDITIONAL_DEV_APT_ENV?} if [[ ${DEV_APT_COMMAND} != "" ]]; then bash -o pipefail -o errexit -o nounset -o nolog -c "${DEV_APT_COMMAND}" fi if [[ ${ADDITIONAL_DEV_APT_COMMAND} != "" ]]; then bash -o pipefail -o errexit -o nounset -o nolog -c "${ADDITIONAL_DEV_APT_COMMAND}" fi apt-get update local debian_version local debian_version_apt_deps # Get debian version without installing lsb_release # shellcheck disable=SC1091 debian_version=$(. /etc/os-release; printf '%s\n' "$VERSION_CODENAME";) echo echo "DEBIAN CODENAME: ${debian_version}" echo # shellcheck disable=SC2086 apt-get install -y --no-install-recommends ${DEV_APT_DEPS} } function install_additional_dev_dependencies() { if [[ "${ADDITIONAL_DEV_APT_DEPS=}" != "" ]]; then # shellcheck disable=SC2086 apt-get install -y --no-install-recommends ${ADDITIONAL_DEV_APT_DEPS} fi } function link_python() { # link python binaries to /usr/local/bin and /usr/python/bin with and without 3 suffix # Links in /usr/local/bin are needed for tools that expect python to be there # Links in /usr/python/bin are needed for tools that are detecting home of python installation including # lib/site-packages. The /usr/python/bin should be first in PATH in order to help with the last part. for dst in pip3 python3 python3-config; do src="$(echo "${dst}" | tr -d 3)" echo "Linking ${dst} in /usr/local/bin and /usr/python/bin" ln -sv "/usr/python/bin/${dst}" "/usr/local/bin/${dst}" for dir in /usr/local/bin /usr/python/bin; do if [[ ! -e "${dir}/${src}" ]]; then echo "Creating ${src} - > ${dst} link in ${dir}" ln -sv "${dir}/${dst}" "${dir}/${src}" fi done done for dst in /usr/python/lib/* do src="/usr/local/lib/$(basename "${dst}")" if [[ -e "${src}" ]]; then rm -rf "${src}" fi echo "Linking ${dst} to ${src}" ln -sv "${dst}" "${src}" done ldconfig } function install_debian_runtime_dependencies() { apt-get update apt-get install --no-install-recommends -yqq apt-utils >/dev/null 2>&1 apt-get install -y --no-install-recommends wget curl gnupg2 lsb-release ca-certificates # shellcheck disable=SC2086 export ${ADDITIONAL_RUNTIME_APT_ENV?} if [[ "${RUNTIME_APT_COMMAND}" != "" ]]; then bash -o pipefail -o errexit -o nounset -o nolog -c "${RUNTIME_APT_COMMAND}" fi if [[ "${ADDITIONAL_RUNTIME_APT_COMMAND}" != "" ]]; then bash -o pipefail -o errexit -o nounset -o nolog -c "${ADDITIONAL_RUNTIME_APT_COMMAND}" fi apt-get update # shellcheck disable=SC2086 apt-get install -y --no-install-recommends ${RUNTIME_APT_DEPS} ${ADDITIONAL_RUNTIME_APT_DEPS} apt-get autoremove -yqq --purge apt-get clean link_python rm -rf /var/lib/apt/lists/* /var/log/* } function install_cosign() { local arch arch="$(dpkg --print-architecture)" declare -A cosign_sha256s=( # https://github.com/sigstore/cosign/releases/download/v${COSIGN_VERSION}/cosign_checksums.txt [amd64]="db15cc99e6e4837daabab023742aaddc3841ce57f193d11b7c3e06c8003642b2" [arm64]="d098f3168ae4b3aa70b4ca78947329b953272b487727d1722cb3cb098a1a20ab" ) local cosign_sha256="${cosign_sha256s[${arch}]}" if [[ -z "${cosign_sha256}" ]]; then echo "Unsupported architecture for cosign: ${arch}" exit 1 fi curl -fsSL \ "https://github.com/sigstore/cosign/releases/download/v${COSIGN_VERSION}/cosign-linux-${arch}" \ -o /tmp/cosign echo "${cosign_sha256} /tmp/cosign" | sha256sum --check chmod +x /tmp/cosign } function install_python() { # If system python (3.11 in bookworm) is installed (via automatic installation of some dependencies for example), we need # to fail and make sure that it is not there, because there can be strange interactions if we install # newer version and system libraries are installed, because # when you create a virtualenv part of the shared libraries of Python can be taken from the system # Installation leading to weird errors when you want to install some modules - for example when you install ssl: # /usr/python/lib/python3.11/lib-dynload/_ssl.cpython-311-aarch64-linux-gnu.so: undefined symbol: _PyModule_Add if dpkg -l | grep '^ii' | grep '^ii libpython' >/dev/null; then echo echo "ERROR! System python is installed by one of the previous steps" echo echo "Please make sure that no python packages are installed by default. Displaying the reason why libpython3.11 is installed:" echo apt-get install -yqq aptitude >/dev/null aptitude why libpython3.11 echo exit 1 else echo echo "GOOD! System python is not installed - OK" echo fi wget -O python.tar.xz "https://www.python.org/ftp/python/${AIRFLOW_PYTHON_VERSION%%[a-z]*}/Python-${AIRFLOW_PYTHON_VERSION}.tar.xz" local major_minor_version major_minor_version="${AIRFLOW_PYTHON_VERSION%.*}" local major minor major="${major_minor_version%.*}" minor="${major_minor_version#*.}" echo "Verifying Python ${AIRFLOW_PYTHON_VERSION} (${major_minor_version})" if [[ "${major}" -gt 3 ]] || [[ "${major}" -eq 3 && "${minor}" -ge 11 ]]; then # Sigstore verification for Python >= 3.11 (PEP 761) declare -A sigstore_identities=( # https://peps.python.org/pep-0664/#release-manager-and-crew [3.11]="pablogsal@python.org" # https://peps.python.org/pep-0693/#release-manager-and-crew [3.12]="thomas@python.org" # https://peps.python.org/pep-0719/#release-manager-and-crew [3.13]="thomas@python.org" # https://peps.python.org/pep-0745/#release-manager-and-crew [3.14]="hugo@python.org" ) declare -A sigstore_issuers=( [3.11]="https://accounts.google.com" [3.12]="https://accounts.google.com" [3.13]="https://accounts.google.com" [3.14]="https://github.com/login/oauth" ) wget -O python.tar.xz.sigstore \ "https://www.python.org/ftp/python/${AIRFLOW_PYTHON_VERSION%%[a-z]*}/Python-${AIRFLOW_PYTHON_VERSION}.tar.xz.sigstore" install_cosign local identity="${sigstore_identities[${major_minor_version}]}" local issuer="${sigstore_issuers[${major_minor_version}]}" /tmp/cosign verify-blob \ --bundle python.tar.xz.sigstore \ --certificate-identity "${identity}" \ --certificate-oidc-issuer "${issuer}" \ python.tar.xz rm -f python.tar.xz.sigstore /tmp/cosign else # PGP verification for Python 3.10 declare -A keys=( # gpg: key 64E628F8D684696D: public key "Pablo Galindo Salgado " imported # https://peps.python.org/pep-0619/#release-manager-and-crew [3.10]="A035C8C19219BA821ECEA86B64E628F8D684696D" ) wget -O python.tar.xz.asc \ "https://www.python.org/ftp/python/${AIRFLOW_PYTHON_VERSION%%[a-z]*}/Python-${AIRFLOW_PYTHON_VERSION}.tar.xz.asc" GNUPGHOME="$(mktemp -d)"; export GNUPGHOME local gpg_key="${keys[${major_minor_version}]}" echo "Using GPG key ${gpg_key}" gpg --batch --keyserver hkps://keys.openpgp.org --recv-keys "${gpg_key}" gpg --batch --verify python.tar.xz.asc python.tar.xz gpgconf --kill all rm -rf "${GNUPGHOME}" python.tar.xz.asc fi mkdir -p /usr/src/python tar --extract --directory /usr/src/python --strip-components=1 --file python.tar.xz rm python.tar.xz cd /usr/src/python arch="$(dpkg --print-architecture)"; arch="${arch##*-}" gnuArch="$(dpkg-architecture --query DEB_BUILD_GNU_TYPE)" EXTRA_CFLAGS="$(dpkg-buildflags --get CFLAGS)" EXTRA_CFLAGS="${EXTRA_CFLAGS:-} -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer"; LDFLAGS="$(dpkg-buildflags --get LDFLAGS)" LDFLAGS="${LDFLAGS:--Wl},--strip-all" # Link-Time Optimization (LTO) uses MD5 checksums for object file verification during # compilation. In FIPS mode, MD5 is blocked as a non-approved algorithm, causing builds # to fail. The PYTHON_LTO variable allows disabling LTO for FIPS-compliant builds. # See: https://github.com/apache/airflow/issues/58337 local lto_option="" if [[ "${PYTHON_LTO:-true}" == "true" ]]; then lto_option="--with-lto" fi local build_log build_log=$(mktemp) echo "Building Python ${AIRFLOW_PYTHON_VERSION} from source..." if ! ( ./configure --enable-optimizations --prefix=/usr/python/ --with-ensurepip --build="$gnuArch" \ --enable-loadable-sqlite-extensions --enable-option-checking=fatal \ --enable-shared ${lto_option} && \ make -s -j "$(nproc)" "EXTRA_CFLAGS=${EXTRA_CFLAGS:-}" \ "LDFLAGS=${LDFLAGS:--Wl},-rpath='\$\$ORIGIN/../lib'" python && \ make -s -j "$(nproc)" install ) > "${build_log}" 2>&1; then echo echo "ERROR! Python build failed. Build output:" echo cat "${build_log}" rm -f "${build_log}" exit 1 fi rm -f "${build_log}" cd / rm -rf /usr/src/python find /usr/python -depth \ \( \ \( -type d -a \( -name test -o -name tests -o -name idle_test \) \) \ -o \( -type f -a \( -name 'libpython*.a' \) \) \ \) -exec rm -rf '{}' + link_python } function install_golang() { curl "https://dl.google.com/go/go${GOLANG_MAJOR_MINOR_VERSION}.linux-$(dpkg --print-architecture).tar.gz" -o "go${GOLANG_MAJOR_MINOR_VERSION}.linux.tar.gz" rm -rf /usr/local/go && tar -C /usr/local -xzf go"${GOLANG_MAJOR_MINOR_VERSION}".linux.tar.gz } function apt_clean() { apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false rm -rf /var/lib/apt/lists/* /var/log/* } if [[ "${INSTALLATION_TYPE}" == "RUNTIME" ]]; then get_runtime_apt_deps install_debian_runtime_dependencies install_docker_cli apt_clean else get_dev_apt_deps install_debian_dev_dependencies install_python install_additional_dev_dependencies if [[ "${INSTALLATION_TYPE}" == "CI" ]]; then install_golang fi install_docker_cli apt_clean fi EOF # The content below is automatically copied from scripts/docker/install_mysql.sh COPY <<"EOF" /install_mysql.sh #!/usr/bin/env bash . "$( dirname "${BASH_SOURCE[0]}" )/common.sh" set -euo pipefail common::get_colors declare -a packages readonly MARIADB_LTS_VERSION="10.11" : "${INSTALL_MYSQL_CLIENT:?Should be true or false}" : "${INSTALL_MYSQL_CLIENT_TYPE:-mariadb}" if [[ "${INSTALL_MYSQL_CLIENT}" != "true" && "${INSTALL_MYSQL_CLIENT}" != "false" ]]; then echo echo "${COLOR_RED}INSTALL_MYSQL_CLIENT must be either true or false${COLOR_RESET}" echo exit 1 fi if [[ "${INSTALL_MYSQL_CLIENT_TYPE}" != "mysql" && "${INSTALL_MYSQL_CLIENT_TYPE}" != "mariadb" ]]; then echo echo "${COLOR_RED}INSTALL_MYSQL_CLIENT_TYPE must be either mysql or mariadb${COLOR_RESET}" echo exit 1 fi if [[ "${INSTALL_MYSQL_CLIENT_TYPE}" == "mysql" ]]; then echo echo "${COLOR_RED}The 'mysql' client type is not supported any more. Use 'mariadb' instead.${COLOR_RESET}" echo echo "The MySQL drivers are wrongly packaged and released by Oracle with an expiration date on their GPG keys," echo "which causes builds to fail after the expiration date. MariaDB client is protocol-compatible with MySQL client." echo "" echo "Every two years the MySQL packages fail and Oracle team is always surprised and struggling" echo "with fixes and re-signing the packages which lasts few days" echo "See https://bugs.mysql.com/bug.php?id=113432 for more details." echo "As a community we are not able to support this broken packaging practice from Oracle" echo "Feel free however to install MySQL drivers on your own as extension of the image." echo exit 1 fi retry() { local retries=3 local count=0 # adding delay of 10 seconds local delay=10 until "$@"; do exit_code=$? count=$((count + 1)) if [[ $count -lt $retries ]]; then echo "Command failed. Attempt $count/$retries. Retrying in ${delay}s..." sleep $delay else echo "Command failed after $retries attempts." return $exit_code fi done } install_mariadb_client() { # List of compatible package Oracle MySQL -> MariaDB: # `mysql-client` -> `mariadb-client` or `mariadb-client-compat` (11+) # `libmysqlclientXX` (where XX is a number) -> `libmariadb3-compat` # `libmysqlclient-dev` -> `libmariadb-dev-compat` # # Different naming against Debian repo which we used before # that some of packages might contains `-compat` suffix, Debian repo -> MariaDB repo: # `libmariadb-dev` -> `libmariadb-dev-compat` # `mariadb-client-core` -> `mariadb-client` or `mariadb-client-compat` (11+) if [[ "${1}" == "dev" ]]; then packages=("libmariadb-dev-compat" "mariadb-client") elif [[ "${1}" == "prod" ]]; then packages=("libmariadb3-compat" "mariadb-client") else echo echo "${COLOR_RED}Specify either prod or dev${COLOR_RESET}" echo exit 1 fi common::import_trusted_gpg "0xF1656F24C74CD1D8" "mariadb" echo echo "${COLOR_BLUE}Installing MariaDB client version ${MARIADB_LTS_VERSION}: ${1}${COLOR_RESET}" echo "${COLOR_YELLOW}MariaDB client protocol-compatible with MySQL client.${COLOR_RESET}" echo echo "deb [arch=amd64,arm64] https://archive.mariadb.org/mariadb-${MARIADB_LTS_VERSION}/repo/debian/ $(lsb_release -cs) main" > \ /etc/apt/sources.list.d/mariadb.list # Make sure that dependencies from MariaDB repo are preferred over Debian dependencies printf "Package: *\nPin: release o=MariaDB\nPin-Priority: 999\n" > /etc/apt/preferences.d/mariadb retry apt-get update retry apt-get install --no-install-recommends -y "${packages[@]}" apt-get autoremove -yqq --purge apt-get clean && rm -rf /var/lib/apt/lists/* } if [[ ${INSTALL_MYSQL_CLIENT:="true"} == "true" ]]; then install_mariadb_client "${@}" fi EOF # The content below is automatically copied from scripts/docker/install_mssql.sh COPY <<"EOF" /install_mssql.sh #!/usr/bin/env bash . "$( dirname "${BASH_SOURCE[0]}" )/common.sh" set -euo pipefail common::get_colors declare -a packages : "${INSTALL_MSSQL_CLIENT:?Should be true or false}" function install_mssql_client() { # Install MsSQL client from Microsoft repositories if [[ ${INSTALL_MSSQL_CLIENT:="true"} != "true" ]]; then echo echo "${COLOR_BLUE}Skip installing mssql client${COLOR_RESET}" echo return fi packages=("msodbcsql18") common::import_trusted_gpg "EB3E94ADBE1229CF" "microsoft" echo echo "${COLOR_BLUE}Installing mssql client${COLOR_RESET}" echo echo "deb [arch=amd64,arm64] https://packages.microsoft.com/debian/$(lsb_release -rs)/prod $(lsb_release -cs) main" > \ /etc/apt/sources.list.d/mssql-release.list && mkdir -p /opt/microsoft/msodbcsql18 && touch /opt/microsoft/msodbcsql18/ACCEPT_EULA && apt-get update -yqq && apt-get upgrade -yqq && apt-get -yqq install --no-install-recommends "${packages[@]}" && apt-get autoremove -yqq --purge && apt-get clean && rm -rf /var/lib/apt/lists/* } install_mssql_client "${@}" EOF # The content below is automatically copied from scripts/docker/install_postgres.sh COPY <<"EOF" /install_postgres.sh #!/usr/bin/env bash . "$( dirname "${BASH_SOURCE[0]}" )/common.sh" set -euo pipefail common::get_colors declare -a packages : "${INSTALL_POSTGRES_CLIENT:?Should be true or false}" install_postgres_client() { echo echo "${COLOR_BLUE}Installing postgres client${COLOR_RESET}" echo if [[ "${1}" == "dev" ]]; then packages=("libpq-dev" "postgresql-client") elif [[ "${1}" == "prod" ]]; then packages=("postgresql-client") else echo echo "Specify either prod or dev" echo exit 1 fi common::import_trusted_gpg "7FCC7D46ACCC4CF8" "postgres" echo "deb [arch=amd64,arm64] https://apt.postgresql.org/pub/repos/apt/ $(lsb_release -cs)-pgdg main" > \ /etc/apt/sources.list.d/pgdg.list apt-get update apt-get install --no-install-recommends -y "${packages[@]}" apt-get autoremove -yqq --purge apt-get clean && rm -rf /var/lib/apt/lists/* } if [[ ${INSTALL_POSTGRES_CLIENT:="true"} == "true" ]]; then install_postgres_client "${@}" fi EOF # The content below is automatically copied from scripts/docker/install_packaging_tools.sh COPY <<"EOF" /install_packaging_tools.sh #!/usr/bin/env bash . "$( dirname "${BASH_SOURCE[0]}" )/common.sh" common::get_colors common::get_packaging_tool common::show_packaging_tool_version_and_location common::install_packaging_tools EOF # The content below is automatically copied from scripts/docker/common.sh COPY <<"EOF" /common.sh #!/usr/bin/env bash set -euo pipefail function common::get_colors() { COLOR_BLUE=$'\e[34m' COLOR_GREEN=$'\e[32m' COLOR_RED=$'\e[31m' COLOR_RESET=$'\e[0m' COLOR_YELLOW=$'\e[33m' export COLOR_BLUE export COLOR_GREEN export COLOR_RED export COLOR_RESET export COLOR_YELLOW } function common::get_packaging_tool() { : "${AIRFLOW_USE_UV:?Should be set}" ## IMPORTANT: IF YOU MODIFY THIS FUNCTION YOU SHOULD ALSO MODIFY CORRESPONDING FUNCTION IN ## `scripts/in_container/_in_container_utils.sh` if [[ ${AIRFLOW_USE_UV} == "true" ]]; then echo echo "${COLOR_BLUE}Using 'uv' to install Airflow${COLOR_RESET}" echo export PACKAGING_TOOL="uv" export PACKAGING_TOOL_CMD="uv pip" # --no-binary is needed in order to avoid libxml and xmlsec using different version of libxml2 # (binary lxml embeds its own libxml2, while xmlsec uses system one). # See https://bugs.launchpad.net/lxml/+bug/2110068 if [[ ${AIRFLOW_INSTALLATION_METHOD=} == "." && -f "./pyproject.toml" ]]; then # for uv only install dev group when we install from sources export EXTRA_INSTALL_FLAGS="--group=dev --no-binary lxml --no-binary xmlsec" else export EXTRA_INSTALL_FLAGS="--no-binary lxml --no-binary xmlsec" fi export EXTRA_UNINSTALL_FLAGS="" export UPGRADE_TO_HIGHEST_RESOLUTION="--upgrade --resolution highest" export UPGRADE_IF_NEEDED="--upgrade" UV_CONCURRENT_DOWNLOADS=$(nproc --all) export UV_CONCURRENT_DOWNLOADS if [[ ${INCLUDE_PRE_RELEASE=} == "true" ]]; then EXTRA_INSTALL_FLAGS="${EXTRA_INSTALL_FLAGS} --prerelease if-necessary" fi else echo echo "${COLOR_BLUE}Using 'pip' to install Airflow${COLOR_RESET}" echo export PACKAGING_TOOL="pip" export PACKAGING_TOOL_CMD="pip" # --no-binary is needed in order to avoid libxml and xmlsec using different version of libxml2 # (binary lxml embeds its own libxml2, while xmlsec uses system one). # See https://bugs.launchpad.net/lxml/+bug/2110068 export EXTRA_INSTALL_FLAGS="--root-user-action ignore --no-binary lxml,xmlsec" export EXTRA_UNINSTALL_FLAGS="--yes" export UPGRADE_TO_HIGHEST_RESOLUTION="--upgrade --upgrade-strategy eager" export UPGRADE_IF_NEEDED="--upgrade --upgrade-strategy only-if-needed" if [[ ${INCLUDE_PRE_RELEASE=} == "true" ]]; then EXTRA_INSTALL_FLAGS="${EXTRA_INSTALL_FLAGS} --pre" fi fi } function common::get_airflow_version_specification() { if [[ -z ${AIRFLOW_VERSION_SPECIFICATION=} && -n ${AIRFLOW_VERSION} && ${AIRFLOW_INSTALLATION_METHOD} != "." ]]; then AIRFLOW_VERSION_SPECIFICATION="==${AIRFLOW_VERSION}" fi } function common::get_constraints_location() { # When installing from sources without upgrade, generate constraints from uv.lock if [[ ${AIRFLOW_INSTALLATION_METHOD=} == "." && -z "${UPGRADE_RANDOM_INDICATOR_STRING=}" ]]; then echo echo "${COLOR_BLUE}Installing from sources with uv.lock - generating constraints from uv.lock${COLOR_RESET}" echo uv export --frozen --no-hashes --no-emit-project --no-editable --no-header \ --no-annotate > "${HOME}/constraints.txt" 2>/dev/null || true return fi # auto-detect Airflow-constraint reference and location if [[ -z "${AIRFLOW_CONSTRAINTS_REFERENCE=}" ]]; then if [[ ${AIRFLOW_VERSION} =~ v?2.* || ${AIRFLOW_VERSION} =~ v?3.* ]]; then AIRFLOW_CONSTRAINTS_REFERENCE=constraints-${AIRFLOW_VERSION} else AIRFLOW_CONSTRAINTS_REFERENCE=${DEFAULT_CONSTRAINTS_BRANCH} fi fi if [[ -z ${AIRFLOW_CONSTRAINTS_LOCATION=} ]]; then local constraints_base="https://raw.githubusercontent.com/${CONSTRAINTS_GITHUB_REPOSITORY}/${AIRFLOW_CONSTRAINTS_REFERENCE}" local python_version python_version=$(python -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")') AIRFLOW_CONSTRAINTS_LOCATION="${constraints_base}/${AIRFLOW_CONSTRAINTS_MODE}-${python_version}.txt" fi if [[ ${AIRFLOW_CONSTRAINTS_LOCATION} =~ http.* ]]; then echo echo "${COLOR_BLUE}Downloading constraints from ${AIRFLOW_CONSTRAINTS_LOCATION} to ${HOME}/constraints.txt ${COLOR_RESET}" echo if ! curl -sSf -o "${HOME}/constraints.txt" "${AIRFLOW_CONSTRAINTS_LOCATION}"; then echo echo "${COLOR_YELLOW}Constraints file not found at ${AIRFLOW_CONSTRAINTS_LOCATION} (new Python version being bootstrapped?).${COLOR_RESET}" echo "${COLOR_YELLOW}Falling back to no-constraints installation.${COLOR_RESET}" echo AIRFLOW_CONSTRAINTS_LOCATION="" # Create an empty constraints file so --constraint flag still works touch "${HOME}/constraints.txt" fi else echo echo "${COLOR_BLUE}Copying constraints from ${AIRFLOW_CONSTRAINTS_LOCATION} to ${HOME}/constraints.txt ${COLOR_RESET}" echo cp "${AIRFLOW_CONSTRAINTS_LOCATION}" "${HOME}/constraints.txt" fi } function common::show_packaging_tool_version_and_location() { echo "PATH=${PATH}" echo "Installed pip: $(pip --version): $(which pip)" if [[ ${PACKAGING_TOOL} == "pip" ]]; then echo "${COLOR_BLUE}Using 'pip' to install Airflow${COLOR_RESET}" else echo "${COLOR_BLUE}Using 'uv' to install Airflow${COLOR_RESET}" echo "Installed uv: $(uv --version 2>/dev/null || echo "Not installed yet"): $(which uv 2>/dev/null)" fi } function common::install_packaging_tools() { : "${AIRFLOW_USE_UV:?Should be set}" if [[ "${VIRTUAL_ENV=}" != "" ]]; then echo echo "${COLOR_BLUE}Checking packaging tools in venv: ${VIRTUAL_ENV}${COLOR_RESET}" echo else echo echo "${COLOR_BLUE}Checking packaging tools for system Python installation: $(which python)${COLOR_RESET}" echo fi if [[ ${AIRFLOW_PIP_VERSION=} == "" ]]; then echo echo "${COLOR_BLUE}Installing latest pip version${COLOR_RESET}" echo pip install --root-user-action ignore --disable-pip-version-check --upgrade pip elif [[ ! ${AIRFLOW_PIP_VERSION} =~ ^[0-9].* ]]; then echo echo "${COLOR_BLUE}Installing pip version from spec ${AIRFLOW_PIP_VERSION}${COLOR_RESET}" echo # shellcheck disable=SC2086 pip install --root-user-action ignore --disable-pip-version-check "pip @ ${AIRFLOW_PIP_VERSION}" else local installed_pip_version installed_pip_version=$(python -c 'from importlib.metadata import version; print(version("pip"))') if [[ ${installed_pip_version} != "${AIRFLOW_PIP_VERSION}" ]]; then echo echo "${COLOR_BLUE}(Re)Installing pip version: ${AIRFLOW_PIP_VERSION}${COLOR_RESET}" echo pip install --root-user-action ignore --disable-pip-version-check "pip==${AIRFLOW_PIP_VERSION}" fi fi if [[ ${AIRFLOW_UV_VERSION=} == "" ]]; then echo echo "${COLOR_BLUE}Installing latest uv version${COLOR_RESET}" echo pip install --root-user-action ignore --disable-pip-version-check --upgrade uv elif [[ ! ${AIRFLOW_UV_VERSION} =~ ^[0-9].* ]]; then echo echo "${COLOR_BLUE}Installing uv version from spec ${AIRFLOW_UV_VERSION}${COLOR_RESET}" echo # shellcheck disable=SC2086 pip install --root-user-action ignore --disable-pip-version-check "uv @ ${AIRFLOW_UV_VERSION}" else local installed_uv_version installed_uv_version=$(python -c 'from importlib.metadata import version; print(version("uv"))' 2>/dev/null || echo "Not installed yet") if [[ ${installed_uv_version} != "${AIRFLOW_UV_VERSION}" ]]; then echo echo "${COLOR_BLUE}(Re)Installing uv version: ${AIRFLOW_UV_VERSION}${COLOR_RESET}" echo # shellcheck disable=SC2086 pip install --root-user-action ignore --disable-pip-version-check "uv==${AIRFLOW_UV_VERSION}" fi fi if [[ ${AIRFLOW_PREK_VERSION=} == "" ]]; then echo echo "${COLOR_BLUE}Installing latest prek, uv${COLOR_RESET}" echo uv tool install prek --with uv # make sure that the venv/user in .local exists mkdir -p "${HOME}/.local/bin" else echo echo "${COLOR_BLUE}Installing predefined versions of prek, uv:${COLOR_RESET}" echo "${COLOR_BLUE}prek(${AIRFLOW_PREK_VERSION}) uv(${AIRFLOW_UV_VERSION})${COLOR_RESET}" echo uv tool install "prek==${AIRFLOW_PREK_VERSION}" --with "uv==${AIRFLOW_UV_VERSION}" # make sure that the venv/user in .local exists mkdir -p "${HOME}/.local/bin" fi } function common::import_trusted_gpg() { common::get_colors local key=${1:?${COLOR_RED}First argument expects OpenPGP Key ID${COLOR_RESET}} local name=${2:?${COLOR_RED}Second argument expected trust storage name${COLOR_RESET}} # Please note that not all servers could be used for retrieve keys # sks-keyservers.net: Unmaintained and DNS taken down due to GDPR requests. # keys.openpgp.org: User ID Mandatory, not suitable for APT repositories # keyring.debian.org: Only accept keys in Debian keyring. # pgp.mit.edu: High response time. local keyservers=( "hkps://keyserver.ubuntu.com" "hkps://pgp.surf.nl" ) GNUPGHOME="$(mktemp -d)" export GNUPGHOME set +e for keyserver in $(shuf -e "${keyservers[@]}"); do echo "${COLOR_BLUE}Try to receive GPG public key ${key} from ${keyserver}${COLOR_RESET}" gpg --keyserver "${keyserver}" --recv-keys "${key}" 2>&1 && break echo "${COLOR_YELLOW}Unable to receive GPG public key ${key} from ${keyserver}${COLOR_RESET}" done set -e gpg --export "${key}" > "/etc/apt/trusted.gpg.d/${name}.gpg" gpgconf --kill all rm -rf "${GNUPGHOME}" unset GNUPGHOME } EOF # The content below is automatically copied from scripts/docker/install_airflow_when_building_images.sh COPY <<"EOF" /install_airflow_when_building_images.sh #!/usr/bin/env bash . "$( dirname "${BASH_SOURCE[0]}" )/common.sh" function install_from_sources() { local extra_sync_flags extra_sync_flags="" if [[ ${VIRTUAL_ENV=} != "" ]]; then extra_sync_flags="--active" fi if [[ "${UPGRADE_RANDOM_INDICATOR_STRING=}" != "" ]]; then if [[ ${PACKAGING_TOOL_CMD} == "pip" ]]; then set +x echo echo "${COLOR_RED}We only support uv not pip installation for upgrading dependencies!.${COLOR_RESET}" echo exit 1 fi set +x echo echo "${COLOR_BLUE}Attempting to upgrade all packages to highest versions.${COLOR_RESET}" echo # --no-binary is needed in order to avoid libxml and xmlsec using different version of libxml2 # (binary lxml embeds its own libxml2, while xmlsec uses system one). # See https://bugs.launchpad.net/lxml/+bug/2110068 set -x uv sync --all-packages --resolution highest --group dev --group docs --group docs-gen \ --group leveldb ${extra_sync_flags} --no-binary-package lxml --no-binary-package xmlsec \ --no-python-downloads --no-managed-python else set +x echo echo "${COLOR_BLUE}Installing all packages from uv.lock (frozen).${COLOR_RESET}" echo # Use uv sync --frozen to install exactly what is pinned in uv.lock without re-resolving. # --no-binary-package is needed in order to avoid libxml and xmlsec using different version of # libxml2 (binary lxml embeds its own libxml2, while xmlsec uses system one). # See https://bugs.launchpad.net/lxml/+bug/2110068 set -x if ! uv sync --all-packages --frozen --group dev --group docs --group docs-gen \ --group leveldb ${extra_sync_flags} --no-binary-package lxml --no-binary-package xmlsec \ --no-python-downloads --no-managed-python; then set +x if [[ ${AIRFLOW_FALLBACK_NO_CONSTRAINTS_INSTALLATION} != "true" ]]; then echo echo "${COLOR_RED}Failing because frozen uv.lock installation failed and fallback is disabled.${COLOR_RESET}" echo exit 1 fi echo echo "${COLOR_YELLOW}Likely pyproject.toml has new dependencies not reflected in uv.lock.${COLOR_RESET}" echo echo "${COLOR_BLUE}Falling back to re-resolving dependencies (uv sync without --frozen).${COLOR_RESET}" echo set -x uv sync --all-packages --group dev --group docs --group docs-gen \ --group leveldb ${extra_sync_flags} --no-binary-package lxml --no-binary-package xmlsec \ --no-python-downloads --no-managed-python set +x fi fi } function install_from_external_spec() { local installation_command_flags if [[ ${AIRFLOW_INSTALLATION_METHOD} == "apache-airflow" ]]; then installation_command_flags="apache-airflow[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" else echo echo "${COLOR_RED}The '${AIRFLOW_INSTALLATION_METHOD}' installation method is not supported${COLOR_RESET}" echo echo "${COLOR_YELLOW}Supported methods are ('.', 'apache-airflow')${COLOR_RESET}" echo exit 1 fi if [[ "${UPGRADE_RANDOM_INDICATOR_STRING=}" != "" ]]; then echo echo "${COLOR_BLUE}Remove airflow and all provider distributions installed before potentially${COLOR_RESET}" echo set -x ${PACKAGING_TOOL_CMD} freeze | grep apache-airflow | xargs ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} 2>/dev/null || true set +x echo echo "${COLOR_BLUE}Installing all packages with highest resolutions. Installation method: ${AIRFLOW_INSTALLATION_METHOD}${COLOR_RESET}" echo set -x ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${UPGRADE_TO_HIGHEST_RESOLUTION} ${ADDITIONAL_PIP_INSTALL_FLAGS} ${installation_command_flags} set +x else echo echo "${COLOR_BLUE}Installing all packages with constraints. Installation method: ${AIRFLOW_INSTALLATION_METHOD}${COLOR_RESET}" echo set -x if ! ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${ADDITIONAL_PIP_INSTALL_FLAGS} ${installation_command_flags} --constraint "${HOME}/constraints.txt"; then set +x if [[ ${AIRFLOW_FALLBACK_NO_CONSTRAINTS_INSTALLATION} != "true" ]]; then echo echo "${COLOR_RED}Failing because constraints installation failed and fallback is disabled.${COLOR_RESET}" echo exit 1 fi echo echo "${COLOR_YELLOW}Likely pyproject.toml has new dependencies conflicting with constraints.${COLOR_RESET}" echo echo "${COLOR_BLUE}Falling back to no-constraints installation.${COLOR_RESET}" echo set -x ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${UPGRADE_IF_NEEDED} ${ADDITIONAL_PIP_INSTALL_FLAGS} ${installation_command_flags} set +x fi fi } function install_airflow_when_building_images() { # Remove mysql from extras if client is not going to be installed if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,} echo "${COLOR_YELLOW}MYSQL client installation is disabled. Extra 'mysql' installations were therefore omitted.${COLOR_RESET}" fi # Remove postgres from extras if client is not going to be installed if [[ ${INSTALL_POSTGRES_CLIENT} != "true" ]]; then AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/postgres,} echo "${COLOR_YELLOW}Postgres client installation is disabled. Extra 'postgres' installations were therefore omitted.${COLOR_RESET}" fi # Determine the installation_command_flags based on AIRFLOW_INSTALLATION_METHOD method if [[ ${AIRFLOW_INSTALLATION_METHOD} == "." ]]; then install_from_sources else install_from_external_spec fi set +x common::install_packaging_tools echo echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}" echo # We use pip check here to make sure that whatever `uv` installs, is also "correct" according to `pip` pip check } common::get_colors common::get_packaging_tool common::get_airflow_version_specification common::get_constraints_location common::show_packaging_tool_version_and_location install_airflow_when_building_images EOF # The content below is automatically copied from scripts/docker/install_additional_dependencies.sh COPY <<"EOF" /install_additional_dependencies.sh #!/usr/bin/env bash set -euo pipefail : "${ADDITIONAL_PYTHON_DEPS:?Should be set}" . "$( dirname "${BASH_SOURCE[0]}" )/common.sh" function install_additional_dependencies() { if [[ "${UPGRADE_RANDOM_INDICATOR_STRING=}" != "" ]]; then echo echo "${COLOR_BLUE}Installing additional dependencies while upgrading to newer dependencies${COLOR_RESET}" echo set -x ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${UPGRADE_TO_HIGHEST_RESOLUTION} \ ${ADDITIONAL_PIP_INSTALL_FLAGS} \ ${ADDITIONAL_PYTHON_DEPS} set +x common::install_packaging_tools echo echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}" echo # We use pip check here to make sure that whatever `uv` installs, is also "correct" according to `pip` pip check else echo echo "${COLOR_BLUE}Installing additional dependencies upgrading only if needed${COLOR_RESET}" echo set -x ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} ${UPGRADE_IF_NEEDED} \ ${ADDITIONAL_PIP_INSTALL_FLAGS} \ ${ADDITIONAL_PYTHON_DEPS} set +x common::install_packaging_tools echo echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}" echo # We use pip check here to make sure that whatever `uv` installs, is also "correct" according to `pip` pip check fi } common::get_colors common::get_packaging_tool common::get_airflow_version_specification common::get_constraints_location common::show_packaging_tool_version_and_location install_additional_dependencies EOF # The content below is automatically copied from scripts/docker/entrypoint_ci.sh COPY <<"EOF" /entrypoint_ci.sh #!/usr/bin/env bash function set_verbose() { if [[ ${VERBOSE_COMMANDS:="false"} == "true" ]]; then set -x else set +x fi } set_verbose . "${AIRFLOW_SOURCES:-/opt/airflow}"/scripts/in_container/_in_container_script_init.sh LD_PRELOAD="/usr/lib/$(uname -m)-linux-gnu/libstdc++.so.6" export LD_PRELOAD chmod 1777 /tmp AIRFLOW_SOURCES=$(cd "${IN_CONTAINER_DIR}/../.." || exit 1; pwd) PYTHON_MAJOR_MINOR_VERSION=${PYTHON_MAJOR_MINOR_VERSION:=3.10} export AIRFLOW_HOME=${AIRFLOW_HOME:=${HOME}} mkdir "${AIRFLOW_HOME}/sqlite" -p || true ASSET_COMPILATION_WAIT_MULTIPLIER=${ASSET_COMPILATION_WAIT_MULTIPLIER:=1} if [[ "${CI=}" == "true" ]]; then export COLUMNS="202" fi . "${IN_CONTAINER_DIR}/check_connectivity.sh" function wait_for_asset_compilation() { if [[ -f "${AIRFLOW_SOURCES}/.build/ui/.asset_compile.lock" ]]; then echo echo "${COLOR_YELLOW}Waiting for asset compilation to complete in the background.${COLOR_RESET}" echo local counter=0 while [[ -f "${AIRFLOW_SOURCES}/.build/ui/.asset_compile.lock" ]]; do if (( counter % 5 == 2 )); then echo "${COLOR_BLUE}Still waiting .....${COLOR_RESET}" fi sleep 1 ((counter=counter+1)) if [[ ${counter} == 30*$ASSET_COMPILATION_WAIT_MULTIPLIER ]]; then echo echo "${COLOR_YELLOW}The asset compilation is taking too long.${COLOR_YELLOW}" echo """ If it does not complete soon, you might want to stop it and remove file lock: * press Ctrl-C * run 'rm ${AIRFLOW_SOURCES}/.build/ui/.asset_compile.lock' """ fi if [[ ${counter} == 60*$ASSET_COMPILATION_WAIT_MULTIPLIER ]]; then echo echo "${COLOR_RED}The asset compilation is taking too long. Exiting.${COLOR_RED}" echo "${COLOR_RED}refer to dev/breeze/doc/04_troubleshooting.rst for resolution steps.${COLOR_RED}" echo exit 1 fi done fi if [ -f "${AIRFLOW_SOURCES}/.build/ui/asset_compile.out" ]; then echo echo "${COLOR_RED}The asset compilation failed. Exiting.${COLOR_RESET}" echo cat "${AIRFLOW_SOURCES}/.build/ui/asset_compile.out" rm "${AIRFLOW_SOURCES}/.build/ui/asset_compile.out" echo exit 1 fi } function environment_initialization() { if [[ ${SKIP_ENVIRONMENT_INITIALIZATION=} == "true" ]]; then return fi echo echo "${COLOR_BLUE}Running Initialization. Your basic configuration is:${COLOR_RESET}" echo echo " * ${COLOR_BLUE}Airflow home:${COLOR_RESET} ${AIRFLOW_HOME}" echo " * ${COLOR_BLUE}Airflow sources:${COLOR_RESET} ${AIRFLOW_SOURCES}" echo " * ${COLOR_BLUE}Airflow core SQL connection:${COLOR_RESET} ${AIRFLOW__DATABASE__SQL_ALCHEMY_CONN:=}" if [[ ${BACKEND=} == "postgres" ]]; then echo " * ${COLOR_BLUE}Airflow backend:${COLOR_RESET} Postgres: ${POSTGRES_VERSION}" elif [[ ${BACKEND=} == "mysql" ]]; then echo " * ${COLOR_BLUE}Airflow backend:${COLOR_RESET} MySQL: ${MYSQL_VERSION}" elif [[ ${BACKEND=} == "sqlite" ]]; then echo " * ${COLOR_BLUE}Airflow backend:${COLOR_RESET} Sqlite" elif [[ ${BACKEND=} == "custom" ]]; then local _conn_url="${AIRFLOW__DATABASE__SQL_ALCHEMY_CONN:-}" local _masked_url _masked_url=$(echo "${_conn_url}" | sed -E 's|://([^:]+):([^@]+)@|://\1:***@|') echo " * ${COLOR_BLUE}Airflow backend:${COLOR_RESET} Custom (${_masked_url})" fi echo if [[ ${STANDALONE_DAG_PROCESSOR=} == "true" ]]; then echo echo "${COLOR_BLUE}Forcing scheduler/standalone_dag_processor to True${COLOR_RESET}" echo export AIRFLOW__SCHEDULER__STANDALONE_DAG_PROCESSOR=True fi if [[ ${GO_WORKER=} == "true" ]]; then echo echo "${COLOR_BLUE}Starting go worker${COLOR_RESET}" echo export AIRFLOW__SCHEDULER__GO_WORKER=True fi RUN_TESTS=${RUN_TESTS:="false"} CI=${CI:="false"} # Added to have run-tests on path export PATH=${PATH}:${AIRFLOW_SOURCES}:/usr/local/go/bin/ mkdir -pv "${AIRFLOW_HOME}/logs/" # Change the default worker_concurrency for tests export AIRFLOW__CELERY__WORKER_CONCURRENCY=8 set +e # shellcheck source=scripts/in_container/configure_environment.sh . "${IN_CONTAINER_DIR}/configure_environment.sh" # shellcheck source=scripts/in_container/run_init_script.sh . "${IN_CONTAINER_DIR}/run_init_script.sh" "${IN_CONTAINER_DIR}/check_environment.sh" ENVIRONMENT_EXIT_CODE=$? set -e if [[ ${ENVIRONMENT_EXIT_CODE} != 0 ]]; then echo echo "Error: check_environment returned ${ENVIRONMENT_EXIT_CODE}. Exiting." echo exit ${ENVIRONMENT_EXIT_CODE} fi mkdir -p /usr/lib/google-cloud-sdk/bin touch /usr/lib/google-cloud-sdk/bin/gcloud ln -s -f /usr/bin/gcloud /usr/lib/google-cloud-sdk/bin/gcloud if [[ ${SKIP_SSH_SETUP="false"} == "false" ]]; then # Set up ssh keys echo 'yes' | ssh-keygen -t rsa -C your_email@youremail.com -m PEM -P '' -f ~/.ssh/id_rsa \ >"${AIRFLOW_HOME}/logs/ssh-keygen.log" 2>&1 cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys ln -s -f ~/.ssh/authorized_keys ~/.ssh/authorized_keys2 chmod 600 ~/.ssh/* # SSH Service sudo service ssh restart >/dev/null 2>&1 # Sometimes the server is not quick enough to load the keys! while [[ $(ssh-keyscan -H localhost 2>/dev/null | wc -l) != "3" ]] ; do echo "Not all keys yet loaded by the server" sleep 0.05 done ssh-keyscan -H localhost >> ~/.ssh/known_hosts 2>/dev/null fi if [[ ${INTEGRATION_LOCALSTACK:-"false"} == "true" ]]; then echo echo "${COLOR_BLUE}Configuring LocalStack integration${COLOR_RESET}" echo # Define LocalStack AWS configuration declare -A localstack_config=( ["AWS_ENDPOINT_URL"]="http://localstack:4566" ["AWS_ACCESS_KEY_ID"]="test" ["AWS_SECRET_ACCESS_KEY"]="test" ["AWS_DEFAULT_REGION"]="us-east-1" ) # Export each configuration variable and log it for key in "${!localstack_config[@]}"; do export "$key"="${localstack_config[$key]}" echo " * ${COLOR_BLUE}${key}:${COLOR_RESET} ${localstack_config[$key]}" done echo fi cd "${AIRFLOW_SOURCES}" # Temporarily add /opt/airflow/providers/standard/tests to PYTHONPATH in order to see example dags # in the UI when testing in Breeze. This might be solved differently in the future if [[ -d /opt/airflow/providers/standard/tests ]]; then export PYTHONPATH=${PYTHONPATH=}:/opt/airflow/providers/standard/tests fi if [[ ${START_AIRFLOW:="false"} == "true" || ${START_AIRFLOW} == "True" ]]; then if [[ ${BREEZE_DEBUG_CELERY_WORKER=} == "true" ]]; then export AIRFLOW__CELERY__POOL=${AIRFLOW__CELERY__POOL:-solo} fi export AIRFLOW__CORE__LOAD_EXAMPLES=${LOAD_EXAMPLES} if [[ ${SKIP_ASSETS_COMPILATION:="false"} == "false" ]]; then wait_for_asset_compilation fi if [[ ${TERMINAL_MULTIPLEXER:="mprocs"} == "mprocs" ]]; then # shellcheck source=scripts/in_container/bin/run_mprocs exec run_mprocs else # shellcheck source=scripts/in_container/bin/run_tmux exec run_tmux fi fi } function handle_mount_sources() { if [[ ${MOUNT_SOURCES=} == "remove" ]]; then echo echo "${COLOR_BLUE}Mounted sources are removed, cleaning up mounted dist-info files${COLOR_RESET}" echo rm -rf /usr/local/lib/python"${PYTHON_MAJOR_MINOR_VERSION}"/site-packages/apache_airflow*.dist-info/ fi } function determine_airflow_to_use() { USE_AIRFLOW_VERSION="${USE_AIRFLOW_VERSION:=""}" if [[ "${USE_AIRFLOW_VERSION}" == "" && "${USE_DISTRIBUTIONS_FROM_DIST=}" != "true" ]]; then export PYTHONPATH=${AIRFLOW_SOURCES} echo echo "${COLOR_BLUE}Using airflow version from current sources${COLOR_RESET}" echo # Cleanup the logs, tmp when entering the environment sudo rm -rf "${AIRFLOW_SOURCES}"/logs/* sudo rm -rf "${AIRFLOW_SOURCES}"/tmp/* mkdir -p "${AIRFLOW_SOURCES}"/logs/ mkdir -p "${AIRFLOW_SOURCES}"/tmp/ else if [[ ${CLEAN_AIRFLOW_INSTALLATION=} == "true" ]]; then echo echo "${COLOR_BLUE}Uninstalling all packages first${COLOR_RESET}" echo # shellcheck disable=SC2086 ${PACKAGING_TOOL_CMD} freeze | grep -ve "^-e" | grep -ve "^#" | grep -ve "^uv" | grep -v "@" | \ xargs ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} # Now install rich ad click first to use the installation script # shellcheck disable=SC2086 ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} rich rich-click click \ --constraint https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-${PYTHON_MAJOR_MINOR_VERSION}.txt fi echo echo "${COLOR_BLUE}Reinstalling all development dependencies${COLOR_RESET}" echo # Generate constraints from uv.lock and use them to install development dependencies # via the Python script. --no-cache is needed - otherwise there is possibility of # overriding temporary environments by multiple parallel processes local constraint_file="/tmp/constraints-from-lock.txt" uv export --frozen --no-hashes --no-emit-project --no-emit-workspace --no-editable --no-header \ --no-annotate > "${constraint_file}" 2>/dev/null || true uv run --no-cache /opt/airflow/scripts/in_container/install_development_dependencies.py \ --constraint "${constraint_file}" # Some packages might leave legacy typing module which causes test issues # shellcheck disable=SC2086 ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} typing || true echo echo "${COLOR_BLUE}Installing airflow and providers ${COLOR_RESET}" echo python "${IN_CONTAINER_DIR}/install_airflow_and_providers.py" fi if [[ "${USE_AIRFLOW_VERSION}" =~ ^2.* ]]; then # Remove auth manager setting unset AIRFLOW__CORE__AUTH_MANAGER fi if [[ "${USE_AIRFLOW_VERSION}" =~ ^2\.2\..*|^2\.1\..*|^2\.0\..* && "${AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=}" != "" ]]; then # make sure old variable is used for older airflow versions export AIRFLOW__CORE__SQL_ALCHEMY_CONN="${AIRFLOW__DATABASE__SQL_ALCHEMY_CONN}" fi } function check_boto_upgrade() { if [[ ${UPGRADE_BOTO=} != "true" ]]; then return fi echo echo "${COLOR_BLUE}Upgrading boto3, botocore to latest version to run Amazon tests with them${COLOR_RESET}" echo # shellcheck disable=SC2086 ${PACKAGING_TOOL_CMD} uninstall ${EXTRA_UNINSTALL_FLAGS} aiobotocore s3fs || true # Urllib 2.6.0 breaks kubernetes client because kubernetes client uses deprecated in 2.0.0 and # removed in 2.6.0 `getheaders()` call (instead of `headers` property. # Tracked in https://github.com/kubernetes-client/python/issues/2477 # shellcheck disable=SC2086 ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} --upgrade "boto3<1.38.3" "botocore<1.38.3" "urllib3<2.6.0" } function check_upgrade_sqlalchemy() { # The python version constraint is a TEMPORARY WORKAROUND to exclude all FAB tests. Is should be removed once we # upgrade FAB to v5 (PR #50960). if [[ "${UPGRADE_SQLALCHEMY=}" != "true" || ${PYTHON_MAJOR_MINOR_VERSION} != "3.13" ]]; then return fi echo echo "${COLOR_BLUE}Upgrading sqlalchemy to the latest version to run tests with it${COLOR_RESET}" echo uv sync --all-packages --no-install-package apache-airflow-providers-fab --resolution highest \ --no-python-downloads --no-managed-python } function check_downgrade_sqlalchemy() { if [[ ${DOWNGRADE_SQLALCHEMY=} != "true" ]]; then return fi local min_sqlalchemy_version min_sqlalchemy_version=$(grep "sqlalchemy\[asyncio\]>=" airflow-core/pyproject.toml | sed "s/.*>=\([0-9\.]*\).*/\1/" | xargs) echo echo "${COLOR_BLUE}Downgrading sqlalchemy to minimum supported version: ${min_sqlalchemy_version}${COLOR_RESET}" echo # shellcheck disable=SC2086 ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} "sqlalchemy[asyncio]==${min_sqlalchemy_version}" echo echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}" echo # We use pip check here to make sure that whatever `uv` installs, is also "correct" according to `pip` pip check } function check_downgrade_pendulum() { if [[ ${DOWNGRADE_PENDULUM=} != "true" || ${PYTHON_MAJOR_MINOR_VERSION} == "3.12" ]]; then return fi local min_pendulum_version min_pendulum_version=$(grep "pendulum>=" airflow-core/pyproject.toml | head -1 | sed "s/.*>=\([0-9\.]*\).*/\1/" | xargs) echo echo "${COLOR_BLUE}Downgrading pendulum to minimum supported version: ${min_pendulum_version}${COLOR_RESET}" echo # shellcheck disable=SC2086 ${PACKAGING_TOOL_CMD} install ${EXTRA_INSTALL_FLAGS} "pendulum==${min_pendulum_version}" echo echo "${COLOR_BLUE}Running 'pip check'${COLOR_RESET}" echo # We use pip check here to make sure that whatever `uv` installs, is also "correct" according to `pip` pip check } function check_run_tests() { if [[ ${RUN_TESTS=} != "true" ]]; then return fi if [[ ${TEST_GROUP:=""} == "system" ]]; then exec "${IN_CONTAINER_DIR}/run_system_tests.sh" "${@}" else exec "${IN_CONTAINER_DIR}/run_ci_tests.sh" "${@}" fi } function check_force_lowest_dependencies() { if [[ ${FORCE_LOWEST_DEPENDENCIES=} != "true" ]]; then return fi if [[ ${TEST_TYPE=} =~ Providers\[.*\] ]]; then local provider_id # shellcheck disable=SC2001 provider_id=$(echo "${TEST_TYPE}" | sed 's/Providers\[\(.*\)\]/\1/') echo echo "${COLOR_BLUE}Forcing dependencies to lowest versions for provider: ${provider_id}${COLOR_RESET}" echo if ! /opt/airflow/scripts/in_container/is_provider_excluded.py "${provider_id}"; then echo echo "S${COLOR_YELLOW}Skipping ${provider_id} provider check on Python ${PYTHON_MAJOR_MINOR_VERSION}!${COLOR_RESET}" echo exit 0 fi cd "${AIRFLOW_SOURCES}/providers/${provider_id/.//}" || exit 1 # --no-binary is needed in order to avoid libxml and xmlsec using different version of libxml2 # (binary lxml embeds its own libxml2, while xmlsec uses system one). # See https://bugs.launchpad.net/lxml/+bug/2110068 uv sync --resolution lowest-direct --no-binary-package lxml --no-binary-package xmlsec --all-extras \ --no-python-downloads --no-managed-python else echo echo "${COLOR_BLUE}Forcing dependencies to lowest versions for Airflow.${COLOR_RESET}" echo cd "${AIRFLOW_SOURCES}/airflow-core" # --no-binary is needed in order to avoid libxml and xmlsec using different version of libxml2 # (binary lxml embeds its own libxml2, while xmlsec uses system one). # See https://bugs.launchpad.net/lxml/+bug/2110068 uv sync --resolution lowest-direct --no-binary-package lxml --no-binary-package xmlsec --all-extras \ --no-python-downloads --no-managed-python fi } function check_airflow_python_client_installation() { if [[ ${INSTALL_AIRFLOW_PYTHON_CLIENT=} != "true" ]]; then return fi python "${IN_CONTAINER_DIR}/install_airflow_python_client.py" } function initialize_db() { # If we are going to start the api server OR we are a system test (which may or may not start the api server, # depending on the Airflow version being used to run the tests), then migrate the DB. if [[ ${START_API_SERVER_WITH_EXAMPLES=} == "true" || ${TEST_GROUP:=""} == "system" ]]; then echo echo "${COLOR_BLUE}Initializing database${COLOR_RESET}" echo airflow db migrate echo echo "${COLOR_BLUE}Database initialized${COLOR_RESET}" fi } function start_api_server_with_examples(){ USE_AIRFLOW_VERSION="${USE_AIRFLOW_VERSION:=""}" # Do not start the api server if either START_API_SERVER_WITH_EXAMPLES is false or the TEST_GROUP env var is not # equal to "system". if [[ ${START_API_SERVER_WITH_EXAMPLES=} != "true" && ${TEST_GROUP:=""} != "system" ]]; then return fi # If the use Airflow version is set and it is <= 3.0.0 (which does not have the API server anyway) also return if [[ ${USE_AIRFLOW_VERSION} != "" && ${USE_AIRFLOW_VERSION} < "3.0.0" ]]; then return fi export AIRFLOW__CORE__LOAD_EXAMPLES=True export AIRFLOW__API__EXPOSE_CONFIG=True airflow dags reserialize echo "Example dags parsing finished" if airflow config get-value core auth_manager | grep -q "FabAuthManager"; then echo "Create admin user" airflow users create -u admin -p admin -f Thor -l Administrator -r Admin -e admin@email.domain || true echo "Admin user created" else echo "Skipping user creation as auth manager different from Fab is used" fi echo echo "${COLOR_BLUE}Starting airflow api server${COLOR_RESET}" echo if [[ ${START_API_SERVER_DAEMON:-"true"} == "false" ]]; then airflow api-server --port 8080 & else airflow api-server --port 8080 --daemon fi echo echo "${COLOR_BLUE}Waiting for api-server to start${COLOR_RESET}" echo check_service_connection "Airflow api-server" "run_nc localhost 8080" 100 EXIT_CODE=$? if [[ ${EXIT_CODE} != 0 ]]; then echo echo "${COLOR_RED}Api server did not start properly${COLOR_RESET}" echo exit ${EXIT_CODE} fi echo echo "${COLOR_BLUE}Airflow api-server started${COLOR_RESET}" } handle_mount_sources determine_airflow_to_use environment_initialization check_boto_upgrade check_upgrade_sqlalchemy check_downgrade_sqlalchemy check_downgrade_pendulum check_force_lowest_dependencies check_airflow_python_client_installation initialize_db start_api_server_with_examples check_run_tests "${@}" exec /bin/bash "${@}" EOF # The content below is automatically copied from scripts/docker/entrypoint_exec.sh COPY <<"EOF" /entrypoint_exec.sh #!/usr/bin/env bash . /opt/airflow/scripts/in_container/_in_container_script_init.sh . /opt/airflow/scripts/in_container/configure_environment.sh . /opt/airflow/scripts/in_container/run_init_script.sh exec /bin/bash "${@}" EOF FROM ${BASE_IMAGE} as main # Nolog bash flag is currently ignored - but you can replace it with other flags (for example # xtrace - to show commands executed) SHELL ["/bin/bash", "-o", "pipefail", "-o", "errexit", "-o", "nounset", "-o", "nolog", "-c"] ARG BASE_IMAGE ARG AIRFLOW_IMAGE_REPOSITORY="https://github.com/apache/airflow" # By increasing this number we can do force build of all dependencies. # NOTE! When you want to make sure dependencies are installed from scratch in your PR after removing # some dependencies, you also need to set "disable image cache" in your PR to make sure the image is # not built using the "main" version of those dependencies. ARG DEPENDENCIES_EPOCH_NUMBER="15" # Make sure noninteractive debian install is used and language variables set ENV BASE_IMAGE=${BASE_IMAGE} \ DEBIAN_FRONTEND=noninteractive LANGUAGE=C.UTF-8 LANG=C.UTF-8 LC_ALL=C.UTF-8 \ LC_CTYPE=C.UTF-8 LC_MESSAGES=C.UTF-8 \ DEPENDENCIES_EPOCH_NUMBER=${DEPENDENCIES_EPOCH_NUMBER} \ INSTALL_MYSQL_CLIENT="true" \ INSTALL_MSSQL_CLIENT="true" \ INSTALL_POSTGRES_CLIENT="true" \ PIP_CACHE_DIR=/root/.cache/pip \ UV_CACHE_DIR=/root/.cache/uv RUN echo "Base image version: ${BASE_IMAGE}" ARG DEV_APT_COMMAND="" ARG ADDITIONAL_DEV_APT_COMMAND="" ARG ADDITIONAL_DEV_ENV_VARS="" ARG ADDITIONAL_DEV_APT_DEPS="bash-completion dumb-init git gdb graphviz krb5-user \ less libenchant-2-2 libgcc-11-dev libgeos-dev libpq-dev net-tools netcat-openbsd \ openssh-server postgresql-client software-properties-common rsync tmux unzip vim xxd" ARG ADDITIONAL_DEV_APT_ENV="" ENV DEV_APT_COMMAND=${DEV_APT_COMMAND} \ ADDITIONAL_DEV_APT_DEPS=${ADDITIONAL_DEV_APT_DEPS} \ ADDITIONAL_DEV_APT_COMMAND=${ADDITIONAL_DEV_APT_COMMAND} ARG AIRFLOW_PYTHON_VERSION="3.12.13" ENV AIRFLOW_PYTHON_VERSION=${AIRFLOW_PYTHON_VERSION} ENV GOLANG_MAJOR_MINOR_VERSION="1.26.1" ARG PYTHON_LTO COPY --from=scripts install_os_dependencies.sh /scripts/docker/ RUN PYTHON_LTO=${PYTHON_LTO} bash /scripts/docker/install_os_dependencies.sh ci # In case system python is installed, setting LD_LIBRARY_PATH prevents any case the system python # libraries will be accidentally used before the library installed from sources (which is newer and # python interpreter might break if accidentally the old system libraries are used. ENV LD_LIBRARY_PATH="/usr/python/lib" COPY --from=scripts common.sh /scripts/docker/ # Only copy mysql/mssql installation scripts for now - so that changing the other # scripts which are needed much later will not invalidate the docker layer here. COPY --from=scripts install_mysql.sh install_mssql.sh install_postgres.sh /scripts/docker/ ARG HOME=/root ARG AIRFLOW_IMAGE_TYPE="ci" ARG AIRFLOW_HOME=/root/airflow ARG AIRFLOW_SOURCES=/opt/airflow ARG INSTALL_MYSQL_CLIENT_TYPE="mariadb" ENV HOME=${HOME} \ AIRFLOW_IMAGE_TYPE=${AIRFLOW_IMAGE_TYPE} \ AIRFLOW_HOME=${AIRFLOW_HOME} \ AIRFLOW_SOURCES=${AIRFLOW_SOURCES} \ INSTALL_MYSQL_CLIENT_TYPE=${INSTALL_MYSQL_CLIENT_TYPE} # We run scripts with bash here to make sure we can execute the scripts. Changing to +x might have an # unexpected result - the cache for Dockerfiles might get invalidated in case the host system # had different umask set and group x bit was not set. In Azure the bit might be not set at all. # That also protects against AUFS Docker backend problem where changing the executable bit required sync RUN bash /scripts/docker/install_mysql.sh prod \ && bash /scripts/docker/install_mysql.sh dev \ && bash /scripts/docker/install_mssql.sh dev \ && bash /scripts/docker/install_postgres.sh dev \ # The user is added to allow ssh debugging (you can connect with airflow/airflow over ssh) && adduser --gecos "First Last,RoomNumber,WorkPhone,HomePhone" --disabled-password \ --quiet "airflow" --home "/home/airflow" \ && echo -e "airflow\nairflow" | passwd airflow 2>&1 \ && echo "airflow ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/airflow \ && chmod 0440 /etc/sudoers.d/airflow # Install Helm ARG HELM_VERSION="v3.19.0" RUN SYSTEM=$(uname -s | tr '[:upper:]' '[:lower:]') \ && PLATFORM=$([ "$(uname -m)" = "aarch64" ] && echo "arm64" || echo "amd64" ) \ && HELM_URL="https://get.helm.sh/helm-${HELM_VERSION}-${SYSTEM}-${PLATFORM}.tar.gz" \ && curl --silent --location "${HELM_URL}" | tar -xz -O "${SYSTEM}-${PLATFORM}/helm" > /usr/local/bin/helm \ && chmod +x /usr/local/bin/helm # Install mprocs - a modern process manager for managing multiple Airflow components ARG MPROCS_VERSION="0.9.2" RUN SYSTEM=$(uname -s | tr '[:upper:]' '[:lower:]') \ && PLATFORM="$(uname -m)" \ && MPROCS_URL="https://github.com/pvolok/mprocs/releases/download/v${MPROCS_VERSION}/mprocs-${MPROCS_VERSION}-${SYSTEM}-${PLATFORM}-musl.tar.gz" \ && echo "Downloading mprocs from ${MPROCS_URL}" \ && curl --silent --location "${MPROCS_URL}" | tar -xz -C /usr/local/bin/ mprocs \ && chmod +x /usr/local/bin/mprocs WORKDIR ${AIRFLOW_SOURCES} RUN mkdir -pv ${AIRFLOW_HOME} && \ mkdir -pv ${AIRFLOW_HOME}/dags && \ mkdir -pv ${AIRFLOW_HOME}/logs ARG AIRFLOW_REPO=apache/airflow ARG AIRFLOW_BRANCH=main # Airflow Extras installed ARG AIRFLOW_EXTRAS="all" ARG ADDITIONAL_AIRFLOW_EXTRAS="" # Allows to override constraints source ARG CONSTRAINTS_GITHUB_REPOSITORY="apache/airflow" ARG AIRFLOW_CONSTRAINTS_MODE="constraints-source-providers" ARG AIRFLOW_CONSTRAINTS_REFERENCE="" ARG AIRFLOW_CONSTRAINTS_LOCATION="" ARG DEFAULT_CONSTRAINTS_BRANCH="constraints-main" # By default fallback to installation without constraints because in CI image it should always be tried ARG AIRFLOW_FALLBACK_NO_CONSTRAINTS_INSTALLATION="true" # By changing the epoch we can force reinstalling Airflow and pip all dependencies # It can also be overwritten manually by setting the AIRFLOW_CI_BUILD_EPOCH environment variable. ARG AIRFLOW_CI_BUILD_EPOCH="10" # Setup PIP # By default PIP has progress bar but you can disable it. ARG PIP_PROGRESS_BAR="on" # Optimizing installation of Cassandra driver (in case there are no prebuilt wheels which is the # case as of 20.04.2021 with Python 3.9 # Speeds up building the image - cassandra driver without CYTHON saves around 10 minutes ARG CASS_DRIVER_NO_CYTHON="1" # Build cassandra driver on multiple CPUs ARG CASS_DRIVER_BUILD_CONCURRENCY="8" # This value should be set by the CI image build system to the current timestamp ARG AIRFLOW_VERSION="" # Additional PIP flags passed to all pip install commands except reinstalling pip itself ARG ADDITIONAL_PIP_INSTALL_FLAGS="" ARG AIRFLOW_USE_UV="true" ENV AIRFLOW_REPO=${AIRFLOW_REPO}\ AIRFLOW_BRANCH=${AIRFLOW_BRANCH} \ AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS}${ADDITIONAL_AIRFLOW_EXTRAS:+,}${ADDITIONAL_AIRFLOW_EXTRAS} \ CONSTRAINTS_GITHUB_REPOSITORY=${CONSTRAINTS_GITHUB_REPOSITORY} \ AIRFLOW_CONSTRAINTS_MODE=${AIRFLOW_CONSTRAINTS_MODE} \ AIRFLOW_CONSTRAINTS_REFERENCE=${AIRFLOW_CONSTRAINTS_REFERENCE} \ AIRFLOW_CONSTRAINTS_LOCATION=${AIRFLOW_CONSTRAINTS_LOCATION} \ AIRFLOW_FALLBACK_NO_CONSTRAINTS_INSTALLATION=${AIRFLOW_FALLBACK_NO_CONSTRAINTS_INSTALLATION} \ DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH} \ AIRFLOW_CI_BUILD_EPOCH=${AIRFLOW_CI_BUILD_EPOCH} \ AIRFLOW_VERSION=${AIRFLOW_VERSION} \ AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ AIRFLOW_UV_VERSION=${AIRFLOW_UV_VERSION} \ AIRFLOW_USE_UV=${AIRFLOW_USE_UV} \ UV_SYSTEM_PYTHON="true" \ UV_PROJECT_ENVIRONMENT="/usr/local" \ INSTALL_MYSQL_CLIENT="true" \ INSTALL_MYSQL_CLIENT_TYPE=${INSTALL_MYSQL_CLIENT_TYPE} \ INSTALL_MSSQL_CLIENT="true" \ INSTALL_POSTGRES_CLIENT="true" \ AIRFLOW_INSTALLATION_METHOD="." \ AIRFLOW_VERSION_SPECIFICATION="" \ PIP_PROGRESS_BAR=${PIP_PROGRESS_BAR} \ ADDITIONAL_PIP_INSTALL_FLAGS=${ADDITIONAL_PIP_INSTALL_FLAGS} \ INCLUDE_PRE_RELEASE="true" \ CASS_DRIVER_BUILD_CONCURRENCY=${CASS_DRIVER_BUILD_CONCURRENCY} \ CASS_DRIVER_NO_CYTHON=${CASS_DRIVER_NO_CYTHON} RUN echo "Airflow version: ${AIRFLOW_VERSION}" # Copy all scripts required for installation - changing any of those should lead to # rebuilding from here COPY --from=scripts common.sh install_packaging_tools.sh install_additional_dependencies.sh /scripts/docker/ # We are first creating a venv where all python packages and .so binaries needed by those are # installed. # Here we fix the versions so all subsequent commands will use the versions # from the sources # You can swap comments between those two args to test pip from the main version # When you attempt to test if the version of `pip` from specified branch works for our builds # Also use `force pip` label on your PR to swap all places we use `uv` to `pip` ARG AIRFLOW_PIP_VERSION=26.0.1 # ARG AIRFLOW_PIP_VERSION="git+https://github.com/pypa/pip.git@main" ARG AIRFLOW_UV_VERSION=0.10.12 ARG AIRFLOW_PREK_VERSION="0.3.6" # UV_LINK_MODE=copy is needed since we are using cache mounted from the host ENV AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ AIRFLOW_UV_VERSION=${AIRFLOW_UV_VERSION} \ UV_LINK_MODE=copy \ AIRFLOW_PREK_VERSION=${AIRFLOW_PREK_VERSION} # The PATH is needed for python to find installed and cargo to build the wheels ENV PATH="/usr/python/bin:/root/.local/bin:/root/.cargo/bin:${PATH}" # Useful for creating a cache id based on the underlying architecture, preventing the use of cached python packages from # an incorrect architecture. ARG TARGETARCH # Value to be able to easily change cache id and therefore use a bare new cache ARG DEPENDENCY_CACHE_EPOCH="2" # Install useful command line tools in their own virtualenv so that they do not clash with # dependencies installed in Airflow also reinstall PIP and UV to make sure they are installed # in the version specified above RUN bash /scripts/docker/install_packaging_tools.sh COPY --from=scripts install_airflow_when_building_images.sh /scripts/docker/ # We can copy everything here. The Context is filtered by dockerignore. This makes sure we are not # copying over stuff that is accidentally generated or that we do not need (such as egg-info) # if you want to add something that is missing and you expect to see it in the image you can # add it with ! in .dockerignore next to the airflow, test etc. directories there COPY . ${AIRFLOW_SOURCES}/ ARG UPGRADE_RANDOM_INDICATOR_STRING="" ARG VERSION_SUFFIX="" ENV UPGRADE_RANDOM_INDICATOR_STRING=${UPGRADE_RANDOM_INDICATOR_STRING} \ VERSION_SUFFIX=${VERSION_SUFFIX} # The goal of this line is to install the dependencies from the most current pyproject.toml from sources # This will be usually incremental small set of packages in CI optimized build, so it will be very fast # In non-CI optimized build this will install all dependencies before installing sources. # Usually we will install versions based on the dependencies in pyproject.toml and upgraded only if needed. # But in cron job we will install latest versions matching pyproject.toml to see if there is no breaking change # and push the constraints if everything is successful RUN --mount=type=cache,id=ci-$TARGETARCH-$DEPENDENCY_CACHE_EPOCH,target=/root/.cache/ bash /scripts/docker/install_airflow_when_building_images.sh COPY --from=scripts install_packaging_tools.sh install_additional_dependencies.sh /scripts/docker/ ARG ADDITIONAL_PYTHON_DEPS="" ENV ADDITIONAL_PYTHON_DEPS=${ADDITIONAL_PYTHON_DEPS} RUN --mount=type=cache,id=ci-$TARGETARCH-$DEPENDENCY_CACHE_EPOCH,target=/root/.cache/ \ bash /scripts/docker/install_packaging_tools.sh; \ if [[ -n "${ADDITIONAL_PYTHON_DEPS}" ]]; then \ bash /scripts/docker/install_additional_dependencies.sh; \ fi COPY --from=scripts entrypoint_ci.sh /entrypoint COPY --from=scripts entrypoint_exec.sh /entrypoint-exec RUN chmod a+x /entrypoint /entrypoint-exec # Install autocomplete for airflow and kubectl # hadolint ignore=SC2028 RUN if command -v airflow; then \ register-python-argcomplete airflow >> ~/.bashrc ; \ fi; \ echo "source /etc/bash_completion" >> ~/.bashrc ; \ echo 'export PS1="\[\033[1;36m\][Breeze:\$(python --version 2>&1 | cut -d\" \" -f2)]\[\033[0m\] \[\033[1;32m\]\u@\h\[\033[0m\]:\[\033[1;34m\]\w\[\033[0m\]\$ "' >> ~/.bashrc WORKDIR ${AIRFLOW_SOURCES} ARG BUILD_ID ARG COMMIT_SHA ARG AIRFLOW_IMAGE_DATE_CREATED ENV PATH="/files/bin/:/opt/airflow/scripts/in_container/bin/:${PATH}" \ GUNICORN_CMD_ARGS="--worker-tmp-dir /dev/shm/" \ BUILD_ID=${BUILD_ID} \ COMMIT_SHA=${COMMIT_SHA} \ # When we enter the image, the /root/.cache is not mounted from temporary mount cache. # We do not want to share the cache from host to avoid all kinds of problems where cache # is different with different platforms / python versions. We want to have a clean cache # in the image - and in this case /root/.cache is on the same filesystem as the installed packages. # so we can go back to the default link mode being hardlink. UV_LINK_MODE=hardlink \ MYPY_FORCE_COLOR="true" # Link dumb-init for backwards compatibility (so that older images also work) RUN ln -sf /usr/bin/dumb-init /usr/local/bin/dumb-init EXPOSE 8080 LABEL org.apache.airflow.distro="debian" \ org.apache.airflow.module="airflow" \ org.apache.airflow.component="airflow" \ org.apache.airflow.image="airflow-ci" \ org.apache.airflow.version="${AIRFLOW_VERSION}" \ org.apache.airflow.python.version="${AIRFLOW_PYTHON_VERSION}" \ org.apache.airflow.uid="0" \ org.apache.airflow.gid="0" \ org.apache.airflow.build-id="${BUILD_ID}" \ org.apache.airflow.commit-sha="${COMMIT_SHA}" \ org.opencontainers.image.source="${AIRFLOW_IMAGE_REPOSITORY}" \ org.opencontainers.image.created="${AIRFLOW_IMAGE_DATE_CREATED}" \ org.opencontainers.image.authors="dev@airflow.apache.org" \ org.opencontainers.image.url="https://airflow.apache.org" \ org.opencontainers.image.documentation="https://airflow.apache.org/docs/docker-stack/index.html" \ org.opencontainers.image.source="https://github.com/apache/airflow" \ org.opencontainers.image.version="${AIRFLOW_VERSION}" \ org.opencontainers.image.revision="${COMMIT_SHA}" \ org.opencontainers.image.vendor="Apache Software Foundation" \ org.opencontainers.image.licenses="Apache-2.0" \ org.opencontainers.image.ref.name="airflow-ci-image" \ org.opencontainers.image.title="Continuous Integration Airflow Image" \ org.opencontainers.image.description="Installed Apache Airflow with Continuous Integration dependencies" ENTRYPOINT ["/usr/bin/dumb-init", "--", "/entrypoint"] CMD []