mirror of
https://github.com/abetlen/llama-cpp-python.git
synced 2026-03-26 07:21:25 +00:00
181 lines
7.5 KiB
YAML
181 lines
7.5 KiB
YAML
name: Build Wheels (CUDA)
|
|
|
|
on: workflow_dispatch
|
|
|
|
permissions:
|
|
contents: write
|
|
|
|
jobs:
|
|
define_matrix:
|
|
name: Define Build Matrix
|
|
runs-on: ubuntu-22.04
|
|
outputs:
|
|
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
|
defaults:
|
|
run:
|
|
shell: pwsh
|
|
|
|
steps:
|
|
- name: Define Job Output
|
|
id: set-matrix
|
|
run: |
|
|
$matrix = @{
|
|
'os' = @('ubuntu-22.04') #, 'windows-2022')
|
|
'pyver' = @("3.9", "3.10", "3.11", "3.12")
|
|
'cuda' = @("12.1.1", "12.2.2", "12.3.2", "12.4.1") #, "12.5.1", "12.6.1")
|
|
'releasetag' = @("basic")
|
|
}
|
|
|
|
$matrixOut = ConvertTo-Json $matrix -Compress
|
|
Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
|
|
|
|
build_wheels:
|
|
name: Build Wheel ${{ matrix.os }} ${{ matrix.pyver }} ${{ matrix.cuda }} ${{ matrix.releasetag == 'wheels' && 'AVX2' || matrix.releasetag }}
|
|
needs: define_matrix
|
|
runs-on: ${{ matrix.os }}
|
|
strategy:
|
|
matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
|
|
defaults:
|
|
run:
|
|
shell: pwsh
|
|
env:
|
|
CUDAVER: ${{ matrix.cuda }}
|
|
AVXVER: ${{ matrix.releasetag }}
|
|
|
|
steps:
|
|
- name: Add MSBuild to PATH
|
|
if: runner.os == 'Windows'
|
|
uses: microsoft/setup-msbuild@v2
|
|
with:
|
|
vs-version: '[16.11,16.12)'
|
|
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
submodules: "recursive"
|
|
|
|
- uses: actions/setup-python@v5
|
|
with:
|
|
python-version: ${{ matrix.pyver }}
|
|
cache: 'pip'
|
|
|
|
- name: Setup Mamba
|
|
uses: conda-incubator/setup-miniconda@v3.1.0
|
|
with:
|
|
activate-environment: "llamacpp"
|
|
python-version: ${{ matrix.pyver }}
|
|
miniforge-version: latest
|
|
add-pip-as-python-dependency: true
|
|
auto-activate-base: false
|
|
|
|
- name: VS Integration Cache
|
|
id: vs-integration-cache
|
|
if: runner.os == 'Windows'
|
|
uses: actions/cache@v4
|
|
with:
|
|
path: ./MSBuildExtensions
|
|
key: cuda-${{ matrix.cuda }}-vs-integration
|
|
|
|
- name: Get Visual Studio Integration
|
|
if: runner.os == 'Windows' && steps.vs-integration-cache.outputs.cache-hit != 'true'
|
|
run: |
|
|
if ($env:CUDAVER -eq '12.1.1') {$x = '12.1.0'} else {$x = $env:CUDAVER}
|
|
$links = (Invoke-RestMethod 'https://raw.githubusercontent.com/Jimver/cuda-toolkit/master/src/links/windows-links.ts').Trim().split().where({$_ -ne ''})
|
|
for ($i=$q=0;$i -lt $links.count -and $q -lt 2;$i++) {if ($links[$i] -eq "'$x',") {$q++}}
|
|
Invoke-RestMethod $links[$i].Trim("'") -OutFile 'cudainstaller.zip'
|
|
& 'C:\Program Files\7-Zip\7z.exe' e cudainstaller.zip -oMSBuildExtensions -r *\MSBuildExtensions\* > $null
|
|
Remove-Item 'cudainstaller.zip'
|
|
|
|
- name: Install Visual Studio Integration
|
|
if: runner.os == 'Windows'
|
|
run: |
|
|
$y = (gi '.\MSBuildExtensions').fullname + '\*'
|
|
(gi 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Microsoft\VC\*\BuildCustomizations').fullname.foreach({cp $y $_})
|
|
$cupath = 'CUDA_PATH_V' + $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','_')
|
|
echo "$cupath=$env:CONDA_PREFIX" >> $env:GITHUB_ENV
|
|
|
|
- name: Install Dependencies
|
|
env:
|
|
MAMBA_DOWNLOAD_FAILFAST: "0"
|
|
MAMBA_NO_LOW_SPEED_LIMIT: "1"
|
|
run: |
|
|
$cudaVersion = $env:CUDAVER
|
|
$cudaChannel = "nvidia/label/cuda-$cudaVersion"
|
|
if ($IsLinux) {
|
|
# Keep nvcc, cudart, and headers on the same NVIDIA label so the
|
|
# detected toolkit version matches the published wheel tag.
|
|
mamba install -y --channel-priority flexible --override-channels -c $cudaChannel "$cudaChannel::cuda-toolkit=$cudaVersion" "$cudaChannel::cuda-nvcc_linux-64=$cudaVersion" "$cudaChannel::cuda-cudart" "$cudaChannel::cuda-cudart-dev"
|
|
} else {
|
|
mamba install -y --channel-priority flexible --override-channels -c $cudaChannel "$cudaChannel::cuda-toolkit=$cudaVersion"
|
|
}
|
|
if ($LASTEXITCODE -ne 0) {
|
|
exit $LASTEXITCODE
|
|
}
|
|
python -m pip install build wheel
|
|
|
|
- name: Build Wheel
|
|
run: |
|
|
$env:CUDA_PATH = $env:CONDA_PREFIX
|
|
$env:CUDA_HOME = $env:CONDA_PREFIX
|
|
$env:CUDA_TOOLKIT_ROOT_DIR = $env:CONDA_PREFIX
|
|
$cudaHostCompilerArg = ''
|
|
$env:CMAKE_ARGS = ''
|
|
if ($IsLinux) {
|
|
if (Test-Path '/usr/bin/g++-12') {
|
|
$env:CC = '/usr/bin/gcc-12'
|
|
$env:CXX = '/usr/bin/g++-12'
|
|
$env:CUDAHOSTCXX = '/usr/bin/g++-12'
|
|
$cudaHostCompilerArg = " -DCMAKE_CUDA_HOST_COMPILER=$env:CUDAHOSTCXX"
|
|
}
|
|
if (Test-Path (Join-Path $env:CONDA_PREFIX 'include/cuda_runtime.h')) {
|
|
$env:CUDAToolkit_ROOT = $env:CONDA_PREFIX
|
|
$env:CUDA_TOOLKIT_ROOT_DIR = $env:CONDA_PREFIX
|
|
$env:CMAKE_ARGS = "-DCUDAToolkit_ROOT=$env:CONDA_PREFIX -DCUDA_TOOLKIT_ROOT_DIR=$env:CONDA_PREFIX$cudaHostCompilerArg"
|
|
$env:CPATH = "$env:CONDA_PREFIX/include:$env:CPATH"
|
|
$env:CPLUS_INCLUDE_PATH = "$env:CONDA_PREFIX/include:$env:CPLUS_INCLUDE_PATH"
|
|
$env:LIBRARY_PATH = "$env:CONDA_PREFIX/lib:$env:LIBRARY_PATH"
|
|
$env:LD_LIBRARY_PATH = "$env:CONDA_PREFIX/lib:$env:LD_LIBRARY_PATH"
|
|
} else {
|
|
$env:CMAKE_ARGS = $cudaHostCompilerArg.Trim()
|
|
}
|
|
}
|
|
$nvccPath = Join-Path $env:CONDA_PREFIX 'bin/nvcc'
|
|
if (-not (Test-Path $nvccPath)) {
|
|
$nvccPath = Join-Path $env:CONDA_PREFIX 'targets/x86_64-linux/bin/nvcc'
|
|
}
|
|
if (-not (Test-Path $nvccPath)) {
|
|
throw 'Failed to find nvcc in the conda environment'
|
|
}
|
|
$env:CUDACXX = $nvccPath
|
|
$env:PATH = "$(Split-Path $nvccPath):$env:PATH"
|
|
$nvccVersion = ((& $nvccPath --version) | Select-String 'release ([0-9]+\.[0-9]+)').Matches[0].Groups[1].Value
|
|
if (-not $nvccVersion) {
|
|
throw 'Failed to detect the installed CUDA toolkit version'
|
|
}
|
|
$cudaTagVersion = $nvccVersion.Replace('.','')
|
|
$env:VERBOSE = '1'
|
|
# Build real cubins for the supported GPUs, including sm_70, and keep
|
|
# one forward-compatible PTX target instead of embedding PTX for every
|
|
# SM. This keeps the wheel under GitHub's 2 GiB release-asset limit.
|
|
$env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON -DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=70-real;75-real;80-real;86-real;89-real;90-real;90-virtual -DCMAKE_CUDA_FLAGS=--allow-unsupported-compiler $env:CMAKE_ARGS"
|
|
# if ($env:AVXVER -eq 'AVX') {
|
|
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off'
|
|
# }
|
|
# if ($env:AVXVER -eq 'AVX512') {
|
|
# $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX512=on'
|
|
# }
|
|
# if ($env:AVXVER -eq 'basic') {
|
|
# $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX=off -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off'
|
|
# }
|
|
python -m build --wheel
|
|
# Publish tags that reflect the actual installed toolkit version.
|
|
Write-Output "CUDA_VERSION=$cudaTagVersion" >> $env:GITHUB_ENV
|
|
|
|
- uses: softprops/action-gh-release@v2
|
|
if: startsWith(github.ref, 'refs/tags/')
|
|
with:
|
|
files: dist/*
|
|
# Set tag_name to <tag>-cu<cuda_version>
|
|
tag_name: ${{ github.ref_name }}-cu${{ env.CUDA_VERSION }}
|
|
env:
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|