mirror of
https://github.com/huggingface/tokenizers.git
synced 2026-03-27 06:01:18 +00:00
* Add benchmark for deserializing large added vocab * revert dumb stuff, isolate changes * try to only normalize once * small improvement? * some updates * nit * fmt * normalized string are a fucking waste of time when you just want to add tokens to the vocab man.... * more attempts * works * let's fucking go, parity * update * hahahhahaha * revert changes that are not actually even needed * add a python test! * use normalizer before come on * nit * update to a more concrete usecase * fix build * style * reduce sample size * --allow unmaintained * clippy happy * up * up * derive impl * revert unrelated * fmt * ignore * remove stupid file
111 lines
3.2 KiB
YAML
111 lines
3.2 KiB
YAML
name: Rust
|
|
|
|
on:
|
|
push:
|
|
branches:
|
|
- main
|
|
pull_request:
|
|
|
|
jobs:
|
|
build:
|
|
runs-on: ${{ matrix.os }}
|
|
env:
|
|
MACOSX_DEPLOYMENT_TARGET: 10.12
|
|
strategy:
|
|
matrix:
|
|
os: [ubuntu-latest, windows-latest, macOS-latest]
|
|
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
|
|
- name: Install Rust Stable
|
|
uses: actions-rs/toolchain@v1
|
|
with:
|
|
toolchain: stable
|
|
components: rustfmt, clippy
|
|
override: true
|
|
|
|
# Necessary for now for the cargo cache: https://github.com/actions/cache/issues/133#issuecomment-599102035
|
|
- if: matrix.os == 'ubuntu-latest'
|
|
run: sudo chown -R $(whoami):$(id -ng) ~/.cargo/
|
|
|
|
- name: Install cargo-readme for Ubuntu
|
|
if: matrix.os == 'ubuntu-latest'
|
|
uses: actions-rs/cargo@v1
|
|
with:
|
|
command: install
|
|
args: cargo-readme
|
|
|
|
- name: Install audit
|
|
uses: actions-rs/cargo@v1
|
|
with:
|
|
command: install
|
|
args: cargo-audit
|
|
|
|
- name: Build
|
|
uses: actions-rs/cargo@v1
|
|
with:
|
|
command: build
|
|
args: --all-targets --verbose --manifest-path ./tokenizers/Cargo.toml
|
|
|
|
- name: Lint with RustFmt
|
|
uses: actions-rs/cargo@v1
|
|
with:
|
|
command: fmt
|
|
args: --manifest-path ./tokenizers/Cargo.toml -- --check
|
|
|
|
- name: Lint Benchmarks with RustFmt
|
|
uses: actions-rs/cargo@v1
|
|
with:
|
|
command: fmt
|
|
args: --manifest-path ./tokenizers/Cargo.toml -- ./tokenizers/benches/bpe_benchmark.rs --check
|
|
|
|
- name: Lint with Clippy
|
|
uses: actions-rs/cargo@v1
|
|
with:
|
|
command: clippy
|
|
args: --manifest-path ./tokenizers/Cargo.toml --all-targets --all-features -- -D warnings
|
|
|
|
- name: Run Tests
|
|
if: matrix.os != 'windows-latest'
|
|
shell: bash
|
|
working-directory: ./tokenizers
|
|
run: make test
|
|
|
|
# Skip integration tests for now on Windows
|
|
- name: Run lib Tests on Windows
|
|
if: matrix.os == 'windows-latest'
|
|
uses: actions-rs/cargo@v1
|
|
with:
|
|
command: test
|
|
args: --verbose --manifest-path ./tokenizers/Cargo.toml --lib
|
|
|
|
- name: Run doc Tests on Windows
|
|
if: matrix.os == 'windows-latest'
|
|
uses: actions-rs/cargo@v1
|
|
with:
|
|
command: test
|
|
args: --verbose --manifest-path ./tokenizers/Cargo.toml --doc
|
|
|
|
- name: Install cargo-audit
|
|
run: cargo install cargo-audit
|
|
|
|
- name: Run Audit
|
|
uses: actions-rs/cargo@v1
|
|
with:
|
|
command: audit
|
|
args: -D warnings -f ./tokenizers/Cargo.lock --ignore RUSTSEC-2024-0436 --ignore RUSTSEC-2025-0014 --ignore RUSTSEC-2025-0119
|
|
|
|
# Verify that Readme.md is up to date.
|
|
- name: Make sure, Readme generated from lib.rs matches actual Readme
|
|
if: matrix.os == 'ubuntu-latest'
|
|
shell: bash
|
|
working-directory: ./tokenizers
|
|
run: cargo readme > must_match_readme.md && diff must_match_readme.md README.md
|
|
|
|
- name: Check semver
|
|
if: matrix.os == 'ubuntu-latest'
|
|
uses: obi1kenobi/cargo-semver-checks-action@v2
|
|
with:
|
|
manifest-path: ./tokenizers/Cargo.toml
|