#!/usr/bin/env bash # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. set -ex if [[ $# -lt 2 ]]; then echo "Usage: $0 [ctest args ...]" exit 1 fi arrow_dir=${1}; shift build_dir=${1}/cpp; shift source_dir=${arrow_dir}/cpp binary_output_dir=${build_dir}/${ARROW_BUILD_TYPE:-debug} export ARROW_TEST_DATA=${arrow_dir}/testing/data export PARQUET_TEST_DATA=${source_dir}/submodules/parquet-testing/data export LD_LIBRARY_PATH=${ARROW_HOME}/${CMAKE_INSTALL_LIBDIR:-lib}:${LD_LIBRARY_PATH} # By default, aws-sdk tries to contact a non-existing local ip host # to retrieve metadata. Disable this so that S3FileSystem tests run faster. export AWS_EC2_METADATA_DISABLED=TRUE # Enable memory debug checks if the env is not set already if [ -z "${ARROW_DEBUG_MEMORY_POOL}" ]; then export ARROW_DEBUG_MEMORY_POOL=trap fi exclude_tests=() ctest_options=() if ! type azurite >/dev/null 2>&1; then exclude_tests+=("arrow-azurefs-test") fi if ! type storage-testbench >/dev/null 2>&1; then exclude_tests+=("arrow-gcsfs-test") fi if ! type minio >/dev/null 2>&1; then exclude_tests+=("arrow-s3fs-test") fi case "$(uname)" in Linux) n_jobs=$(nproc) ;; Darwin) n_jobs=$(sysctl -n hw.ncpu) # TODO: https://github.com/apache/arrow/issues/40410 exclude_tests+=("arrow-s3fs-test") ;; MINGW*) n_jobs=${NUMBER_OF_PROCESSORS:-1} # TODO: Enable these crashed tests. # https://issues.apache.org/jira/browse/ARROW-9072 exclude_tests+=("gandiva-binary-test") exclude_tests+=("gandiva-boolean-expr-test") exclude_tests+=("gandiva-date-time-test") exclude_tests+=("gandiva-decimal-single-test") exclude_tests+=("gandiva-decimal-test") exclude_tests+=("gandiva-filter-project-test") exclude_tests+=("gandiva-filter-test") exclude_tests+=("gandiva-hash-test") exclude_tests+=("gandiva-if-expr-test") exclude_tests+=("gandiva-in-expr-test") exclude_tests+=("gandiva-internals-test") exclude_tests+=("gandiva-literal-test") exclude_tests+=("gandiva-null-validity-test") exclude_tests+=("gandiva-precompiled-test") exclude_tests+=("gandiva-projector-test") exclude_tests+=("gandiva-utf8-test") ;; *) n_jobs=${NPROC:-1} ;; esac if [ "${#exclude_tests[@]}" -gt 0 ]; then IFS="|" ctest_options+=(--exclude-regex "${exclude_tests[*]}") unset IFS fi if [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then n_jobs=1 # avoid spurious fails on emscripten due to loading too many big executables fi pushd "${build_dir}" if [ -z "${PYTHON}" ] && ! which python > /dev/null 2>&1; then export PYTHON="${PYTHON:-python3}" fi if [ "${ARROW_USE_MESON:-OFF}" = "ON" ]; then ARROW_BUILD_EXAMPLES=OFF # TODO: Remove this meson test \ --max-lines=0 \ --no-rebuild \ --print-errorlogs \ --suite arrow \ --timeout-multiplier=10 \ "$@" else ctest \ --label-regex unittest \ --output-on-failure \ --parallel "${n_jobs}" \ --repeat until-pass:3 \ --timeout "${ARROW_CTEST_TIMEOUT:-300}" \ "${ctest_options[@]}" \ "$@" fi # This is for testing find_package(Arrow). # # Note that this is not a perfect solution. We should improve this # later. # # * This is ad-hoc # * This doesn't test other CMake packages such as ArrowDataset if [ "${ARROW_USE_MESON:-OFF}" = "OFF" ] && \ [ "${ARROW_EMSCRIPTEN:-OFF}" = "OFF" ] && \ [ "${ARROW_USE_ASAN:-OFF}" = "OFF" ] && \ [ "${ARROW_USE_TSAN:-OFF}" = "OFF" ] && \ [ "${ARROW_CSV:-ON}" = "ON" ]; then CMAKE_PREFIX_PATH="${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}}" case "$(uname)" in MINGW*) # /lib/cmake/ isn't searched on Windows. # # See also: # https://cmake.org/cmake/help/latest/command/find_package.html#config-mode-search-procedure CMAKE_PREFIX_PATH+="/lib/cmake/" ;; esac if [ -n "${VCPKG_ROOT}" ] && [ -n "${VCPKG_DEFAULT_TRIPLET}" ]; then # Search vcpkg before /lib/cmake. CMAKE_PREFIX_PATH="${VCPKG_ROOT}/installed/${VCPKG_DEFAULT_TRIPLET};${CMAKE_PREFIX_PATH}" fi cmake \ -S "${source_dir}/examples/minimal_build" \ -B "${build_dir}/examples/minimal_build" \ -DCMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}" cmake --build "${build_dir}/examples/minimal_build" pushd "${source_dir}/examples/minimal_build" # PATH= is for Windows. PATH="${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}}/bin:${PATH}" \ "${build_dir}/examples/minimal_build/arrow-example" popd fi if [ "${ARROW_BUILD_EXAMPLES}" == "ON" ]; then examples=$(find "${binary_output_dir}" -executable -name "*example") if [ "${examples}" == "" ]; then echo "==================" echo "No examples found!" echo "==================" exit 1 fi for ex in ${examples} do echo "==================" echo "Executing ${ex}" echo "==================" ${ex} done fi if [ "${ARROW_FUZZING}" == "ON" ]; then # Fuzzing regression tests # This will display any errors generated during fuzzing. These errors are # usually not bugs (most fuzz files are invalid and hence generate errors # when trying to read them), which is why they are hidden by default when # fuzzing. export ARROW_FUZZING_VERBOSITY=1 # Some fuzz regression files may trigger huge memory allocations, # let the allocator return null instead of aborting. export ASAN_OPTIONS="$ASAN_OPTIONS allocator_may_return_null=1" # 1. Generate seed corpuses # For IPC fuzz targets, these will include the golden IPC integration files. "${source_dir}/build-support/fuzzing/generate_corpuses.sh" "${binary_output_dir}" # 2. Run fuzz targets on seed corpus entries function run_fuzz_target_on_seed_corpus() { fuzz_target_basename=$1 corpus_dir=${binary_output_dir}/${fuzz_target_basename}_seed_corpus mkdir -p "${corpus_dir}" pushd "${corpus_dir}" unzip -q "${binary_output_dir}"/"${fuzz_target_basename}"_seed_corpus.zip -d . "${binary_output_dir}"/"${fuzz_target_basename}" -rss_limit_mb=4000 ./* popd rm -rf "${corpus_dir}" } run_fuzz_target_on_seed_corpus arrow-csv-fuzz run_fuzz_target_on_seed_corpus arrow-ipc-file-fuzz run_fuzz_target_on_seed_corpus arrow-ipc-stream-fuzz run_fuzz_target_on_seed_corpus arrow-ipc-tensor-stream-fuzz if [ "${ARROW_PARQUET}" == "ON" ]; then run_fuzz_target_on_seed_corpus parquet-arrow-fuzz run_fuzz_target_on_seed_corpus parquet-encoding-fuzz fi # 3. Run fuzz targets on regression files from arrow-testing fuzz_target_options="-rss_limit_mb=2560" # same as on OSS-Fuzz pushd "${ARROW_TEST_DATA}" "${binary_output_dir}/arrow-ipc-stream-fuzz" ${fuzz_target_options} arrow-ipc-stream/crash-* "${binary_output_dir}/arrow-ipc-stream-fuzz" ${fuzz_target_options} arrow-ipc-stream/*-testcase-* "${binary_output_dir}/arrow-ipc-file-fuzz" ${fuzz_target_options} arrow-ipc-file/*-testcase-* "${binary_output_dir}/arrow-ipc-tensor-stream-fuzz" ${fuzz_target_options} arrow-ipc-tensor-stream/*-testcase-* if [ "${ARROW_PARQUET}" == "ON" ]; then "${binary_output_dir}/parquet-arrow-fuzz" ${fuzz_target_options} parquet/fuzzing/*-testcase-* "${binary_output_dir}/parquet-encoding-fuzz" ${fuzz_target_options} parquet/encoding-fuzzing/*-testcase-* fi "${binary_output_dir}/arrow-csv-fuzz" ${fuzz_target_options} csv/fuzzing/*-testcase-* popd fi popd