# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. --- default_stages: [pre-commit, pre-push] minimum_prek_version: '0.3.4' default_language_version: python: python3 node: 22.19.0 golang: 1.24.0 exclude: ^.*/.*_vendor/ repos: - repo: meta hooks: - id: identity name: Print checked files description: Print input to the static check hooks for troubleshooting - id: check-hooks-apply name: Check if all hooks apply to the repository - repo: https://github.com/thlorenz/doctoc.git rev: d7815f1f950f8d5ec933fa4f70208bf316bb13f8 # frozen: v2.3.0 hooks: - id: doctoc name: Add TOC for Markdown and RST files files: (?x) ^README\.md$| ^UPDATING.*\.md$| ^chart/UPDATING.*\.md$| ^dev/.*\.md$| ^dev/.*\.rst$| ^docs/README\.md$| ^\.github/.*\.md$| ^airflow-core/tests/system/README\.md$ exclude: (?x) .github/PULL_REQUEST_TEMPLATE\.md$| .github/instructions/| .github/skills/ args: - "--maxlevel" - "2" - repo: https://github.com/Lucas-C/pre-commit-hooks rev: ad1b27d73581aa16cca06fc4a0761fc563ffe8e8 # frozen: v1.5.6 hooks: - id: insert-license name: Add license for all SQL files files: \.sql$ exclude: | (?x) ^\.github/| ^scripts/ci/license-templates/ args: - --comment-style - "/*||*/" - --license-filepath - scripts/ci/license-templates/LICENSE.txt - --fuzzy-match-generates-todo - id: insert-license name: Add license for all RST files args: - --comment-style - "||" - --license-filepath - scripts/ci/license-templates/LICENSE.rst - --fuzzy-match-generates-todo files: \.rst$ exclude: (?x) ^\.github/.*$| newsfragments/.*\.rst$| ^scripts/ci/license-templates/ - id: insert-license name: Add license for CSS/JS/JSX/PUML/TS/TSX files: \.(css|jsx?|puml|tsx?)$ exclude: ^\.github/.*$|ui/openapi-gen/|www/openapi-gen/|.*/dist/.* args: - --comment-style - "/*!| *| */" - --license-filepath - scripts/ci/license-templates/LICENSE.txt - --fuzzy-match-generates-todo - id: insert-license name: Add license for all Shell files exclude: ^\.github/.*$|^dev/breeze/autocomplete/.*$ files: \.bash$|\.sh$ args: - --comment-style - "|#|" - --license-filepath - scripts/ci/license-templates/LICENSE.txt - --fuzzy-match-generates-todo - id: insert-license name: Add license for all toml files exclude: ^\.github/.*$|^dev/breeze/autocomplete/.*$ files: \.toml$ args: - --comment-style - "|#|" - --license-filepath - scripts/ci/license-templates/LICENSE.txt - --fuzzy-match-generates-todo - id: insert-license name: Add license for all Python files exclude: ^\.github/.*$|^.*/_vendor/.*$|^airflow-ctl/.*/.*generated\.py$ files: \.py$|\.pyi$ args: - --comment-style - "|#|" - --license-filepath - scripts/ci/license-templates/LICENSE.txt - --fuzzy-match-generates-todo - id: insert-license name: Add license for all XML files exclude: ^\.github/.*$ files: \.xml$ args: - --comment-style - "" - --license-filepath - scripts/ci/license-templates/LICENSE.txt - --fuzzy-match-generates-todo - id: insert-license name: Add license for all YAML files except Helm templates exclude: > (?x) ^\.github/.*$| ^chart/templates/.*| .*reproducible_build\.yaml$| ^.*/v2.*\.yaml$| ^.*/openapi/_private_ui.*\.yaml$| ^.*/pnpm-lock\.yaml$| .*-generated\.yaml$ types: [yaml] files: \.ya?ml$ args: - --comment-style - "|#|" - --license-filepath - scripts/ci/license-templates/LICENSE.txt - --fuzzy-match-generates-todo - id: insert-license name: Add license for all Markdown files files: \.md$ args: - --comment-style - "" - --license-filepath - scripts/ci/license-templates/LICENSE.txt - --fuzzy-match-generates-todo exclude: (?x) ^\.github/.*\.md$| ^\.claude/| ^(?:.*/)?AGENTS\.md$| ^(?:.*/)?CLAUDE\.md$| ^(?:.*/)?SKILL\.md$| ^scripts/ci/license-templates/ - id: insert-license name: Add short license for agentic Markdown files args: - --comment-style - "||" - --license-filepath - scripts/ci/license-templates/SHORT_LICENSE.md - --fuzzy-match-generates-todo files: (?x) ^\.github/.*\.md$| ^\.claude/| ^(?:.*/)?AGENTS\.md$| ^(?:.*/)?CLAUDE\.md$| ^(?:.*/)?SKILL\.md$ exclude: (?x) ^scripts/ci/license-templates/| ^\.github/instructions/| ^\.github/skills/airflow-translations/SKILL\.md$ - id: insert-license name: Add license for all other files args: - --comment-style - "|#|" - --license-filepath - scripts/ci/license-templates/LICENSE.txt - --fuzzy-match-generates-todo files: | (?x) \.cfg$| \.conf$| \.ini$| \.ldif$| \.properties$| \.service$| \.tf$| Dockerfile.*$ exclude: (?x) ^\.github/.*$| ^scripts/ci/license-templates/ - repo: local hooks: - id: check-min-python-version name: Check minimum Python version entry: ./scripts/ci/prek/check_min_python_version.py language: python require_serial: true - id: check-notice-files name: Check NOTICE files for current year and ASF references entry: ./scripts/ci/prek/check_notice_files.py language: python files: ^.*NOTICE$ - id: check-version-consistency name: Check version consistency entry: ./scripts/ci/prek/check_version_consistency.py language: python files: > (?x) ^airflow-core/src/airflow/__init__\.py$| ^airflow-core/pyproject\.toml$| ^task-sdk/src/airflow/sdk/__init__\.py$| ^pyproject\.toml$ pass_filenames: false require_serial: true - id: check-distribution-gitignore name: Check distribution .gitignore files have *.iml entry: ./scripts/ci/prek/check_distribution_gitignore.py language: python files: > (?x) ^.*/\.gitignore$| ^\.gitignore$| ^.*/pyproject\.toml$| ^pyproject\.toml$ pass_filenames: false require_serial: true - id: upgrade-important-versions name: Upgrade important versions (manual) entry: ./scripts/ci/prek/upgrade_important_versions.py stages: ['manual'] language: python files: > (?x) ^\.pre-commit-config\.yaml$| ^\.github/\.pre-commit-config\.yaml$| ^scripts/ci/prek/update_installers_and_prek\.py$ pass_filenames: false require_serial: true - repo: https://github.com/adamchainz/blacken-docs rev: fda77690955e9b63c6687d8806bafd56a526e45f # frozen: 1.20.0 hooks: - id: blacken-docs name: Run black on docs args: - --line-length=110 - --target-version=py310 - --target-version=py311 - --target-version=py312 - --target-version=py313 alias: blacken-docs additional_dependencies: - 'black==26.1.0' - repo: https://github.com/pre-commit/pre-commit-hooks rev: 3e8a8703264a2f4a69428a0aa4dcb512790b2c8c # frozen: v6.0.0 hooks: - id: check-merge-conflict name: Check that merge conflicts are not being committed - id: debug-statements name: Detect accidentally committed debug statements - id: check-builtin-literals name: Require literal syntax when initializing builtins - id: detect-private-key name: Detect if private key is added to the repository exclude: ^providers/ssh/docs/connections/ssh\.rst$ - id: end-of-file-fixer name: Make sure that there is an empty line at the end exclude: > (?x) ^airflow-core/docs/img/.*\.dot| ^airflow-core/docs/img/.*\.sha256| .*/dist/.*| LICENSES-ui\.txt$| .*/openapi-gen/.* - id: mixed-line-ending name: Detect if mixed line ending is used (\r vs. \r\n) - id: check-executables-have-shebangs name: Check that executables have shebang - id: check-xml name: Check XML files with xmllint exclude: > (?x) ^scripts/ci/docker-compose/gremlin/. - id: trailing-whitespace name: Remove trailing whitespace at end of line exclude: > (?x) ^airflow-core/docs/img/.*\.dot$| ^dev/breeze/doc/images/output.*$| ^.*/openapi-gen/.*$| ^airflow-ctl/docs/images/.*\.svg$ - repo: https://github.com/pre-commit/pygrep-hooks rev: 3a6eb0fadf60b3cccfd80bad9dbb6fae7e47b316 # frozen: v1.10.0 hooks: - id: rst-backticks name: Check if RST files use double backticks for code - id: python-no-log-warn name: Check if there are no deprecate log warn - repo: https://github.com/adrienverge/yamllint rev: cba56bcde1fdd01c1deb3f945e69764c291a6530 # frozen: v1.38.0 hooks: - id: yamllint name: Check YAML files with yamllint entry: yamllint -c yamllint-config.yml --strict types: [yaml] exclude: > (?x) ^.*airflow\.template\.yaml$| ^.*init_git_sync\.template\.yaml$| ^chart/(?:templates|files)/.*\.yaml$| ^helm-tests/tests/chart_utils/keda.sh_scaledobjects\.yaml$| .*/v1.*\.yaml$| ^.*openapi.*\.yaml$| ^\.pre-commit-config\.yaml$| ^.*reproducible_build\.yaml$| ^.*pnpm-lock\.yaml$| ^.*-generated\.yaml$ - repo: https://github.com/ikamensh/flynt rev: 97be693bf18bc2f050667dd282d243e2824b81e2 # frozen: 1.0.6 hooks: - id: flynt name: Run flynt string format converter for Python args: # If flynt detects too long text it ignores it. So we set a very large limit to make it easy # to split the text by hand. Too long lines are detected by flake8 (below), # so the user is informed to take action. - --line-length - '99999' - repo: https://github.com/codespell-project/codespell rev: 2ccb47ff45ad361a21071a7eedda4c37e6ae8c5a # frozen: v2.4.2 hooks: - id: codespell name: Run codespell description: Run codespell to check for common misspellings in files entry: bash -c 'echo "If you think that this failure is an error, consider adding the word(s) to the codespell dictionary at docs/spelling_wordlist.txt. The word(s) should be in lowercase." && exec codespell "$@"' -- language: python types: [text] exclude: > (?x) material-icons\.css$| ^images/.*$| ^RELEASE_NOTES\.txt$| ^.*package-lock\.json$| ^.*/kinglear\.txt$| ^.*pnpm-lock\.yaml$| .*/dist/.*| ^airflow-core/src/airflow/ui/public/i18n/locales/(?!en/).+/| ^\.github/skills/airflow-translations/ args: - --ignore-words=docs/spelling_wordlist.txt - --skip=providers/.*/src/airflow/providers/*/*.rst,providers/*/docs/changelog.rst,docs/*/commits.rst,providers/*/docs/commits.rst,providers/*/*/docs/commits.rst,docs/apache-airflow/tutorial/pipeline_example.csv,*.min.js,*.lock,INTHEWILD.md,*.svg - --exclude-file=.codespellignorelines - repo: https://github.com/woodruffw/zizmor-pre-commit rev: ea2eb407b4cbce87cf0d502f36578950494f5ac9 # frozen: v1.23.1 hooks: - id: zizmor name: Run zizmor to check for github workflow syntax errors types: [yaml] files: ^\.github/workflows/.*$|^\.github/actions/.*$ require_serial: true entry: zizmor - repo: local # Note that this is the 2nd "local" repo group in the .pre-commit-config.yaml file. This is because # we try to minimize the number of passes that must happen to apply some of the changes # done by prek-hooks. Some of the prek hooks not only check for errors but also fix them. This means # that output from an earlier prek hook becomes input to another prek hook. Splitting the local # scripts of our and adding some other non-local prek hook in-between allows us to handle such # changes quickly - especially when we want the early modifications from the first local group # to be applied before the non-local prek hooks are run hooks: - id: check-shared-distributions-structure name: Check shared distributions structure entry: ./scripts/ci/prek/check_shared_distributions_structure.py language: python pass_filenames: false files: ^shared/.*$ - id: check-shared-distributions-usage name: Check shared distributions usage entry: ./scripts/ci/prek/check_shared_distributions_usage.py language: python pass_filenames: false files: ^shared/.*$|^.*/pyproject.toml$|^.*/_shared/.*$ - id: check-airflow-imports-in-shared name: Check for core/sdk imports in shared libraries entry: ./scripts/ci/prek/check_airflow_imports_in_shared.py language: python pass_filenames: true files: ^shared/.*/src/.*\.py$ exclude: | (?x) ^shared/listeners/src/airflow_shared/listeners/spec/taskinstance\.py$| ^shared/logging/src/airflow_shared/logging/remote\.py$| ^shared/observability/src/airflow_shared/observability/metrics/stats\.py$| ^shared/secrets_backend/src/airflow_shared/secrets_backend/base\.py$ - id: check-test-only-imports-in-src name: Check for test-only imports in production source entry: ./scripts/ci/prek/check_test_only_imports_in_src.py language: python pass_filenames: true files: > (?x) ^airflow-core/src/.*\.py$| ^airflow-ctl/src/.*\.py$| ^providers/.*/src/.*\.py$| ^task-sdk/src/.*\.py$| ^shared/.*/src/.*\.py$ - id: check-secrets-search-path-sync name: Check sync between sdk and core entry: ./scripts/ci/prek/check_secrets_search_path_sync.py language: python pass_filenames: false files: ^airflow-core/src/airflow/secrets/base_secrets\.py$|^task-sdk/src/airflow/sdk/execution_time/secrets/__init__\.py$ - id: check-registry-types-json-sync name: Check registry types.json in sync with types.py entry: ./scripts/ci/prek/check_registry_types_json_sync.py language: python pass_filenames: false files: ^dev/registry/registry_tools/types\.py$|^registry/src/_data/types\.json$ - id: ruff name: Run 'ruff' for extremely fast Python linting description: "Run 'ruff' for extremely fast Python linting" entry: ruff check --force-exclude language: python types_or: [python, pyi] args: [--fix] require_serial: true additional_dependencies: ['ruff==0.15.7'] exclude: ^airflow-core/tests/unit/dags/test_imports\.py$|^performance/tests/test_.*\.py$ - id: ruff-format name: Run 'ruff format' description: "Run 'ruff format' for extremely fast Python formatting" entry: ./scripts/ci/prek/ruff_format.py language: python types_or: [python, pyi] args: [] require_serial: true exclude: ^airflow-core/tests/unit/dags/test_imports\.py$ - id: replace-bad-characters name: Replace bad characters entry: ./scripts/ci/prek/replace_bad_characters.py language: python types: [file, text] exclude: > (?x) ^clients/gen/go\.sh$| ^\.gitmodules$| ^airflow-core/src/airflow/ui/openapi-gen/| ^airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/openapi-gen/| ^providers/edge3/src/airflow/providers/edge3/plugins/www/openapi-gen/| .*/dist/.*| \.go$| /go\.(mod|sum)$ - id: lint-dockerfile name: Lint Dockerfile language: python entry: ./scripts/ci/prek/lint_dockerfile.py files: Dockerfile.*$ pass_filenames: true require_serial: true - id: check-airflow-providers-bug-report-template name: Sort airflow-bug-report provider list language: python files: ^\.github/ISSUE_TEMPLATE/3-airflow_providers_bug_report\.yml$ require_serial: true entry: ./scripts/ci/prek/check_airflow_bug_report_template.py - id: update-local-yml-file name: Update mounts in the local yml file entry: ./scripts/ci/prek/local_yml_mounts.py language: python files: ^dev/breeze/src/airflow_breeze/utils/docker_command_utils\.py$|^scripts/ci/docker_compose/local\.yml$ pass_filenames: false - id: check-extras-order name: Check order of extras in Dockerfile entry: ./scripts/ci/prek/check_order_dockerfile_extras.py language: python files: ^Dockerfile$ pass_filenames: false - id: generate-airflow-diagrams name: Generate airflow diagrams entry: ./scripts/ci/prek/generate_airflow_diagrams.py language: python files: > (?x) ^airflow-core/docs/.*/diagram_[^/]*\.py$| ^docs/images/.*\.py$| ^airflow-ctl/docs/images/diagrams/.*\.py$ pass_filenames: true - id: prevent-deprecated-sqlalchemy-usage name: Prevent deprecated sqlalchemy usage entry: ./scripts/ci/prek/prevent_deprecated_sqlalchemy_usage.py language: python files: > (?x) ^airflow-core/.*\.py$| ^airflow-ctl.*\.py$| ^airflow-ctl-tests.*\.py| ^dev/.*\.py$| ^devel-common/.*\.py| ^providers/.*\.py$| ^task-sdk.*\.py$| ^task-sdk-integration-tests.*\.py pass_filenames: true - id: update-supported-versions name: Updates supported versions in documentation entry: ./scripts/ci/prek/supported_versions.py language: python files: ^airflow-core/docs/installation/supported-versions\.rst$|^scripts/ci/prek/supported_versions\.py$|^README\.md$ pass_filenames: false - id: check-revision-heads-map name: Check that the REVISION_HEADS_MAP is up-to-date language: python entry: ./scripts/ci/prek/check_revision_heads_map.py pass_filenames: false files: > (?x) ^scripts/ci/prek/version_heads_map\.py$| ^airflow-core/src/airflow/migrations/versions/.*$| ^airflow-core/src/airflow/migrations/versions| ^airflow-core/src/airflow/utils/db\.py$ - id: update-version name: Update versions in docs entry: ./scripts/ci/prek/update_versions.py language: python files: ^docs|^airflow-core/src/airflow/__init__\.py$|.*/pyproject\.toml$ pass_filenames: false - id: check-pydevd-left-in-code language: pygrep name: Check for pydevd debug statements accidentally left entry: "pydevd.*settrace\\(" pass_filenames: true files: \.py$ - id: check-safe-filter-usage-in-html language: pygrep name: Don't use safe in templates description: the Safe filter is error-prone, use Markup() in code instead entry: "\\|\\s*safe" files: \.html$ pass_filenames: true - id: check-urlparse-usage-in-code language: pygrep name: Don't use urlparse in code description: urlparse is not recommended, use urlsplit() in code instead entry: "^\\s*from urllib\\.parse import ((\\|, )(urlparse\\|urlunparse))+$" pass_filenames: true files: \.py$ - id: check-for-inclusive-language language: pygrep name: Check for language that we do not accept as community description: Please use more appropriate words for community documentation. entry: > (?ix) (black|white)[_-]?list| \bshe\b| \bhe\b| \bher\b| \bhis\b| \bmaster\b| \bslave\b| \bsanity\b| \bdummy\b pass_filenames: true exclude: > (?x) ^README\.md$| ^pyproject\.toml$| ^generated/PYPI_README\.md$| ^airflow-core/docs/.*commits\.rst$| ^airflow-core/newsfragments/41368\.significant\.rst$| ^airflow-core/newsfragments/41761.significant\.rst$| ^airflow-core/newsfragments/43349\.significant\.rst$| ^airflow-core/newsfragments/60921\.significant\.rst$| ^airflow-core/src/airflow/api_fastapi/auth/managers/simple/ui/pnpm-lock\.yaml$| ^airflow-core/src/airflow/api_fastapi/gunicorn_config\.py$| ^airflow-core/src/airflow/cli/commands/api_server_command\.py$| ^airflow-core/src/airflow/api_fastapi/gunicorn_monitor\.py$| ^airflow-core/src/airflow/cli/commands/local_commands/fastapi_api_command\.py$| ^airflow-core/src/airflow/config_templates/| ^airflow-core/src/airflow/models/baseoperator\.py$| ^airflow-core/src/airflow/operators/__init__\.py$| ^airflow-core/src/airflow/serialization/serialized_objects\.py$| ^airflow-core/src/airflow/ui/openapi-gen/| ^airflow-core/src/airflow/ui/pnpm-lock\.yaml$| ^providers/common/ai/src/airflow/providers/common/ai/plugins/www/pnpm-lock\.yaml$| ^airflow-core/src/airflow/ui/public/i18n/locales/de/README\.md$| ^airflow-core/src/airflow/ui/src/i18n/config\.ts$| ^airflow-core/src/airflow/utils/db\.py$| ^airflow-core/src/airflow/utils/trigger_rule\.py$| ^airflow-core/tests/| ^task-sdk/tests/| ^.*changelog\.(rst|txt)$| ^.*CHANGELOG\.(rst|txt)$| ^chart/values.schema\.json$| ^.*commits\.(rst|txt)$| ^.*/conf_constants\.py$| ^.*/conf\.py$| ^contributing-docs/03_contributors_quick_start\.rst$| ^dev/| ^devel-common/src/docs/README\.rst$| ^devel-common/src/sphinx_exts/removemarktransform\.py| ^devel-common/src/tests_common/test_utils/db\.py| .*/dist/.*| ^docs/apache-airflow-providers-amazon/secrets-backends/aws-ssm-parameter-store\.rst$| git| ^helm-tests/tests/chart_utils/helm_template_generator\.py$| package-lock\.json$| ^.*\.(png|gif|jp[e]?g|svg|tgz|lock|woff2?)$| ^\.pre-commit-config\.yaml$| ^.*/provider_conf\.py$| ^providers/\.pre-commit-config\.yaml$| ^providers/amazon/src/airflow/providers/amazon/aws/hooks/emr\.py$| ^providers/amazon/src/airflow/providers/amazon/aws/operators/emr\.py$| ^providers/.*/get_provider_info\.py$| ^providers/.*/provider\.yaml$| ^providers/apache/cassandra/src/airflow/providers/apache/cassandra/hooks/cassandra\.py$| ^providers/apache/hdfs/docs/connections\.rst$| ^providers/apache/hive/src/airflow/providers/apache/hive/operators/hive_stats\.py$| ^providers/apache/hive/src/airflow/providers/apache/hive/transfers/vertica_to_hive\.py$| ^providers/apache/kafka/docs/connections/kafka\.rst$| ^providers/apache/spark/docs/decorators/pyspark\.rst$| ^providers/apache/spark/docs/connections/spark-submit.rst$| ^providers/apache/spark/src/airflow/providers/apache/spark/decorators/| ^providers/apache/spark/src/airflow/providers/apache/spark/hooks/| ^providers/apache/spark/src/airflow/providers/apache/spark/operators/| ^providers/cncf/kubernetes/docs/operators\.rst$| ^providers/common/sql/tests/provider_tests/common/sql/operators/test_sql_execute\.py$| ^providers/edge3/src/airflow/providers/edge3/plugins/www/pnpm-lock.yaml$| ^providers/exasol/src/airflow/providers/exasol/hooks/exasol\.py$| ^providers/fab/docs/auth-manager/webserver-authentication\.rst$| ^providers/fab/src/airflow/providers/fab/auth_manager/security_manager/| ^providers/fab/src/airflow/providers/fab/www/static/| ^providers/fab/src/airflow/providers/fab/www/templates/| ^providers/google/docs/operators/cloud/kubernetes_engine\.rst$| ^providers/google/src/airflow/providers/google/cloud/hooks/bigquery\.py$| ^providers/google/src/airflow/providers/google/cloud/operators/cloud_build\.py$| ^providers/google/src/airflow/providers/google/cloud/operators/dataproc\.py$| ^providers/google/src/airflow/providers/google/cloud/operators/mlengine\.py$| ^providers/keycloak/src/airflow/providers/keycloak/cli/definition.py| ^providers/microsoft/azure/docs/connections/azure_cosmos\.rst$| ^providers/microsoft/azure/src/airflow/providers/microsoft/azure/hooks/cosmos\.py$| ^providers/microsoft/winrm/src/airflow/providers/microsoft/winrm/hooks/winrm\.py$| ^providers/microsoft/winrm/src/airflow/providers/microsoft/winrm/operators/winrm\.py$| ^providers/opsgenie/src/airflow/providers/opsgenie/hooks/opsgenie\.py$| ^providers/redis/src/airflow/providers/redis/provider\.yaml$| ^providers/.*/tests/| .rat-excludes| ^.*RELEASE_NOTES\.rst$| ^scripts/ci/docker-compose/integration-keycloak\.yml$| ^scripts/ci/docker-compose/keycloak/keycloak-entrypoint\.sh$| ^scripts/ci/prek/upgrade_important_versions.py$| ^scripts/ci/prek/download_k8s_schemas\.py$| ^scripts/ci/prek/vendor_k8s_json_schema\.py$ - id: check-template-context-variable-in-sync name: Sync template context variable refs language: python entry: ./scripts/ci/prek/check_template_context_variable_in_sync.py files: (?x) ^airflow-core/src/airflow/models/taskinstance\.py$| ^task-sdk/src/airflow/sdk/definitions/context\.py$| ^airflow-core/docs/templates-ref\.rst$ - id: check-base-operator-usage language: pygrep name: Check BaseOperator core imports description: Make sure BaseOperator is imported from airflow.models.baseoperator in core entry: "from airflow\\.models import.* BaseOperator\\b" files: \.py$ pass_filenames: true exclude: > (?x) ^airflow-core/src/airflow/decorators/.*$| ^airflow-core/src/airflow/hooks/.*$| ^airflow-core/src/airflow/operators/.*$| ^providers/.*$ - id: check-base-operator-usage language: pygrep name: Check BaseOperatorLink core imports description: Make sure BaseOperatorLink is not imported from airflow.models in core entry: "^\\s*from airflow\\.models\\.baseoperatorlink import BaseOperatorLink\\b" files: \.py$ pass_filenames: true exclude: > (?x) ^airflow-core/src/airflow/decorators/.*$| ^airflow-core/src/airflow/hooks/.*$| ^airflow-core/src/airflow/operators/.*$| ^providers/.*/src/airflow/providers/.*$| ^providers/.*/src/airflow/providers/standard/sensors/.*$ - id: check-core-deprecation-classes language: pygrep name: Verify usage of Airflow deprecation classes in core entry: category=DeprecationWarning|category=PendingDeprecationWarning files: \.py$ exclude: > (?x) ^airflow-core/src/airflow/configuration\.py$| ^airflow-core/tests/.*$| ^providers/.*/src/airflow/providers/| ^scripts/in_container/verify_providers\.py$| ^providers/.*/tests/.*$| ^scripts/tests/.*$| ^devel-common/ pass_filenames: true - id: check-provide-create-sessions-imports language: pygrep name: Check session util imports description: NEW_SESSION, provide_session, and create_session should be imported from airflow.utils.session to avoid import cycles. entry: "from airflow\\.utils\\.db import.* (NEW_SESSION|provide_session|create_session)" files: \.py$ pass_filenames: true - id: check-incorrect-use-of-LoggingMixin language: pygrep name: Make sure LoggingMixin is not used alone entry: "LoggingMixin\\(\\)" files: \.py$ pass_filenames: true - id: check-start-date-not-used-in-defaults language: pygrep name: start_date not in default_args entry: "default_args\\s*=\\s*{\\s*(\"|')start_date(\"|')|(\"|')start_date(\"|'):" files: \.*example_dags.*\.py$ pass_filenames: true - id: check-apache-license-rat name: Check if licenses are OK for Apache entry: ./scripts/ci/prek/check_license.py language: python files: ^LICENSE$ pass_filenames: false - id: check-metrics-synced-with-registry name: Check that metrics in the codebase are in sync with the metrics registry YAML file. entry: ./scripts/ci/prek/check_metrics_synced_with_the_registry.py language: python files: \.py$ exclude: ^(tests/|.*/tests/) pass_filenames: true additional_dependencies: ["PyYAML>=6.0", "rich>=13.6.0"] - id: check-boring-cyborg-configuration name: Checks for Boring Cyborg configuration consistency language: python entry: ./scripts/ci/prek/boring_cyborg.py pass_filenames: false require_serial: true - id: update-in-the-wild-to-be-sorted name: Sort INTHEWILD.md alphabetically entry: ./scripts/ci/prek/sort_in_the_wild.py language: python files: ^\.pre-commit-config\.yaml$|^INTHEWILD\.md$ pass_filenames: false require_serial: true - id: update-installed-providers-to-be-sorted name: Sort and uniquify installed_providers.txt entry: ./scripts/ci/prek/sort_installed_providers.py language: python files: ^\.pre-commit-config\.yaml$|^.*_installed_providers\.txt$ pass_filenames: false require_serial: true - id: update-spelling-wordlist-to-be-sorted name: Sort spelling_wordlist.txt entry: ./scripts/ci/prek/sort_spelling_wordlist.py language: python files: ^\.pre-commit-config\.yaml$|^docs/spelling_wordlist\.txt$ require_serial: true pass_filenames: false - id: shellcheck name: Check Shell scripts syntax correctness language: docker_image entry: koalaman/shellcheck:v0.8.0 -x -a files: \.(bash|sh)$|^hooks/build$|^hooks/push$ exclude: ^dev/breeze/autocomplete/.*$ - id: check-integrations-list-consistent name: Sync integrations list with docs entry: ./scripts/ci/prek/check_integrations_list.py language: python files: ^scripts/ci/docker-compose/integration-.*\.yml$|^contributing-docs/testing/integration_tests\.rst$ require_serial: true pass_filenames: false - id: sync-translation-namespaces name: Sync translation namespace file list entry: ./scripts/ci/prek/sync_translation_namespaces.py language: python files: > (?x) ^airflow-core/src/airflow/ui/public/i18n/locales/en/.*\.json$| ^\.github/skills/airflow-translations/SKILL\.md$ pass_filenames: false - id: update-pyproject-toml name: Update Airflow's meta-package pyproject.toml language: python entry: ./scripts/ci/prek/update_airflow_pyproject_toml.py files: > (?x) ^.*/pyproject\.toml$| ^scripts/ci/prek/update_airflow_pyproject_toml\.py$| ^providers/.*/pyproject\.toml$| ^providers/.*/provider\.yaml$ pass_filenames: false require_serial: true - id: check-excluded-provider-markers name: Check excluded-provider python_version markers in pyproject.toml language: python entry: ./scripts/ci/prek/check_excluded_provider_markers.py files: > (?x) ^pyproject\.toml$| ^providers/.*/provider\.yaml$ pass_filenames: false require_serial: true additional_dependencies: ['packaging>=25', 'pyyaml', 'tomli>=2.0.1', 'rich>=13.6.0'] - id: update-reproducible-source-date-epoch name: Update Source Date Epoch for reproducible builds language: python entry: ./scripts/ci/prek/update_source_date_epoch.py files: ^RELEASE_NOTES\.rst$|^chart/RELEASE_NOTES\.rst$ require_serial: true - id: check-breeze-top-dependencies-limited name: Check top-level breeze deps description: Breeze should have small number of top-level dependencies language: python entry: ./scripts/tools/check_if_limited_dependencies.py files: ^dev/breeze/.*$ pass_filenames: false require_serial: true - id: check-system-tests-present name: Check if system tests have required segments of code entry: ./scripts/ci/prek/check_system_tests.py language: python files: ^.*/tests/system/.*/example_[^/]*\.py$ pass_filenames: true - id: generate-pypi-readme name: Generate PyPI README entry: ./scripts/ci/prek/generate_pypi_readme.py language: python files: ^README\.md$ pass_filenames: false - id: lint-markdown name: Run markdownlint description: Checks the style of Markdown files. entry: markdownlint language: node types: [markdown] files: \.(md|mdown|markdown)$ additional_dependencies: ['markdownlint-cli@0.38.0'] - id: lint-json-schema name: Lint JSON Schema files entry: ./scripts/ci/prek/lint_json_schema.py args: - --spec-file - scripts/ci/prek/draft7_schema.json language: python pass_filenames: true files: .*\.schema\.json$ require_serial: true - id: lint-json-schema name: Lint NodePort Service entry: ./scripts/ci/prek/lint_json_schema.py args: - --spec-url - https://raw.githubusercontent.com/yannh/kubernetes-json-schema/master/v1.20.2-standalone/service-v1.json language: python pass_filenames: true files: ^scripts/ci/kubernetes/nodeport\.yaml$ require_serial: true - id: lint-json-schema name: Lint Docker compose files entry: ./scripts/ci/prek/lint_json_schema.py args: - --spec-url - https://raw.githubusercontent.com/compose-spec/compose-spec/master/schema/compose-spec.json language: python pass_filenames: true files: ^scripts/ci/docker-compose/.+\.ya?ml$|docker-compose\.ya?ml$ exclude: > (?x) ^scripts/ci/docker-compose/grafana/.| ^scripts/ci/docker-compose/gremlin/.| ^scripts/ci/docker-compose/.+-config\.ya?ml$ require_serial: true - id: check-persist-credentials-disabled-in-github-workflows name: Check persistent creds in workflow files description: Check that workflow files have persist-credentials disabled entry: ./scripts/ci/prek/checkout_no_credentials.py language: python pass_filenames: true files: ^\.github/workflows/.*\.yml$ - id: check-docstring-param-types name: Check that docstrings do not specify param types entry: ./scripts/ci/prek/docstring_param_type.py language: python pass_filenames: true files: \.py$ - id: check-zip-file-is-not-committed name: Check no zip files are committed description: Zip files are not allowed in the repository language: fail entry: | Zip files are not allowed in the repository as they are hard to track and have security implications. Please remove the zip file from the repository. files: \.zip$ - id: update-inlined-dockerfile-scripts name: Inline Dockerfile and Dockerfile.ci scripts entry: ./scripts/ci/prek/inline_scripts_in_docker.py language: python pass_filenames: false files: ^Dockerfile$|^Dockerfile\.ci$|^scripts/docker/.*$ require_serial: true - id: check-changelog-has-no-duplicates name: Check changelogs for duplicate entries language: python files: changelog\.(rst|txt)$ entry: ./scripts/ci/prek/changelog_duplicates.py pass_filenames: true - id: check-changelog-format name: Check changelog format language: python files: changelog\.(rst|txt)$ entry: ./scripts/ci/prek/check_changelog_format.py pass_filenames: true - id: check-newsfragments-are-valid name: Check newsfragments are valid language: python files: newsfragments/.*\.rst$ entry: ./scripts/ci/prek/newsfragments.py pass_filenames: true # We sometimes won't have newsfragments in the repo, so always run it so `check-hooks-apply` passes # This is fast, so not too much downside always_run: true - id: update-breeze-cmd-output name: Update breeze docs description: Update output of breeze commands in Breeze documentation entry: ./scripts/ci/prek/breeze_cmd_line.py language: python files: > (?x) ^dev/breeze/.*$| ^\.pre-commit-config\.yaml$| ^scripts/ci/prek/breeze_cmd_line\.py$| ^generated/provider_dependencies\.json$ require_serial: true pass_filenames: false - id: check-example-dags-urls name: Check that example dags url include provider versions entry: ./scripts/ci/prek/update_example_dags_paths.py language: python pass_filenames: true files: (?x) ^airflow-core/docs/.*example-dags\.rst$| ^airflow-core/docs/.*index\.rst$| ^docs/.*index\.rst$ always_run: true - id: check-lazy-logging name: Check that all logging methods are lazy entry: ./scripts/ci/prek/check_lazy_logging.py language: python pass_filenames: true files: \.py$ - id: bandit name: bandit description: "Bandit is a tool for finding common security issues in Python code" entry: bandit language: python language_version: python3 types: [python] additional_dependencies: ['bandit==1.7.6'] require_serial: true files: ^airflow-core/src/airflow/.* # TODO Expand this to more than just airflow-core exclude: airflow/example_dags/.* args: - "--skip" - "B101,B301,B324,B403,B404,B603" - "--severity-level" - "high" # TODO: remove this line when we fix all the issues - id: check-k8s-schemas-published name: Check K8s schemas are published on airflow.apache.org entry: ./scripts/ci/prek/check_k8s_schemas_published.py language: python pass_filenames: false files: ^dev/breeze/src/airflow_breeze/global_constants\.py$ require_serial: true # This is a fast regular hook that runs when any pyproject.toml changes # It runs locally and usually will not result in modifying the lock unnecessarily # Unless there is a conflict and uv will determine that the lock needs to be updated to resolve it - id: update-uv-lock name: Update uv.lock entry: uv lock language: system files: > (?x) (^|/)pyproject\.toml$| ^uv\.lock$ pass_filenames: false require_serial: true ## ADD MOST PREK HOOK ABOVE THAT LINE # The below prek hooks are those requiring CI image to be built ## ONLY ADD PREK HOOKS HERE THAT REQUIRE CI IMAGE - id: mypy-dev stages: ['pre-push'] name: Run mypy for dev language: python entry: ./scripts/ci/prek/mypy.py files: ^dev/.*\.py$|^scripts/.*\.py$ require_serial: true - id: mypy-dev stages: ['manual'] name: Run mypy for dev (manual) language: python entry: ./scripts/ci/prek/mypy_folder.py dev scripts pass_filenames: false files: ^.*\.py$ require_serial: true - id: mypy-devel-common stages: ['pre-push'] name: Run mypy for devel-common language: python entry: ./scripts/ci/prek/mypy.py files: ^devel-common/.*\.py$ require_serial: true - id: mypy-devel-common stages: ['manual'] name: Run mypy for devel-common (manual) language: python entry: ./scripts/ci/prek/mypy_folder.py devel-common pass_filenames: false files: ^.*\.py$ require_serial: true - id: check-template-fields-valid name: Check templated fields mapped in operators/sensors language: python entry: ./scripts/ci/prek/check_template_fields.py files: ^(providers/.*/)?airflow-core/.*/(sensors|operators)/.*\.py$ require_serial: true - id: check-execution-api-versions name: Check execution API datamodel changes have corresponding version updates entry: ./scripts/ci/prek/check_execution_api_versions.py language: python pass_filenames: true files: ^airflow-core/src/airflow/api_fastapi/execution_api/(datamodels|versions)/.*\.py$ require_serial: true - id: generate-tasksdk-datamodels name: Generate Datamodels for TaskSDK client language: python entry: uv run -p 3.12 --no-progress --active --group codegen --project apache-airflow-task-sdk --directory task-sdk -s dev/generate_task_sdk_models.py pass_filenames: false files: ^airflow-core/src/airflow/api_fastapi/execution_api/.*\.py$ require_serial: true - id: generate-airflowctl-datamodels name: Generate Datamodels for AirflowCTL language: python entry: > bash -c ' uv run -p 3.12 --no-dev --no-progress --active --group codegen --project apache-airflow-ctl --directory airflow-ctl/ datamodel-codegen && uv run -p 3.12 --no-dev --no-progress --active --group codegen --project apache-airflow-ctl --directory airflow-ctl/ datamodel-codegen --input="../airflow-core/src/airflow/api_fastapi/auth/managers/simple/openapi/v2-simple-auth-manager-generated.yaml" --output="src/airflowctl/api/datamodels/auth_generated.py"' pass_filenames: false files: (?x) ^airflow-core/src/airflow/api_fastapi/core_api/datamodels/.*\.py$| ^airflow-core/src/airflow/api_fastapi/auth/managers/simple/(datamodels|routes|services|openapi)/.*\.py$ require_serial: true - id: check-contextmanager-class-decorators name: Check for problematic context manager class decorators entry: ./scripts/ci/prek/check_contextmanager_class_decorators.py language: python files: .*test.*\.py$ pass_filenames: true # This is a manual hook, run by `breeze ci upgrade` - upgrading all dependencies inside the # Breeze CI image - which allows checking all dependencies for all providers. # ALWAYS keep it at the end so that it can take into account all the other hook's changes. - id: update-uv-lock stages: ['manual'] name: Update uv.lock (manual) entry: breeze run uv lock --upgrade language: system files: > (?x) (^|/)pyproject\.toml$| ^uv\.lock$ pass_filenames: false require_serial: true