text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2026-03-26 04:42:06 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

compile-grammar-in-router

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-flake-deps-and-logit-processor

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3327

#3329

#3330

#3334

#3338

#3338

#3339

#334

#3340

#3340

#3341

#3341

#3343

#3343

#3344

#3345

#3346

#3347

#3347

#3348

#3348

#3349

#3349

#335

#3352

#3356

#3356

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

v3.3.6

v3.3.7

b4adbf2f6e docs: add AWS (EC2/SageMaker) deployment + benchmarking guide (#3352) main Fahad Alghanim 2026-03-21 04:34:22 -07:00
db931fcf42 Update CodeQL workflow for security analysis Pauline Bailly-Masson 2026-01-08 15:02:49 +01:00
dfb3fbe78e fix(num_devices): fix num_shard/num device auto compute when NVIDIA_VISIBLE_DEVICES == "all" or "void" (#3346) v3.3.7 oOraph 2025-12-18 16:58:43 +01:00
34988476cb Maintenance mode (#3345) Lysandre Debut 2025-12-11 15:29:10 +01:00
52c6dddf97 maintenance mode Julien Chaumond 2025-12-11 14:49:38 +01:00
55f7f7cb8b Maintenance mode (#3344) Lysandre Debut 2025-12-11 14:05:46 +01:00
71d5987816 fix: avoid gaudi until harware is available compile-grammar-in-router drbh 2025-12-09 22:02:55 +00:00
5a34147f53 fix: avoid flaky test for dev and update neuron deps drbh 2025-12-09 21:33:10 +00:00
d3d4f7a5f1 fix: bump test timeout drbh 2025-12-08 22:32:37 +00:00
dded0cdc92 fix: adjust typer for neuron cli drbh 2025-12-08 14:57:17 +00:00
71450d3ef0 fix: removed unsused vocab population comment drbh 2025-12-08 12:48:01 +00:00
68398dbb36 fix: adjust aiter install on amd docker drbh 2025-11-19 18:47:48 +00:00
9f4283f190 fix: adjust flaky tests and amd dockerfile tweaks drbh 2025-11-19 16:32:27 +00:00
93a821a2ed fix: version pins for amd, intel and neuron drbh 2025-11-19 02:45:26 +00:00
e42b430f47 fix: clippy cleanups drbh 2025-11-19 02:11:09 +00:00
433abf1141 fix: bump outline crate hash and remove debug log drbh 2025-11-19 01:57:33 +00:00
2e72daa279 feat: prefer latest outlines core and compile grammar in router drbh 2025-11-14 03:34:48 +00:00
24ee40d143 feat: support max_image_fetch_size to limit (#3339) drbh 2025-11-18 12:29:21 -05:00
85790a19a7 misc(gha): expose action cache url and runtime as secrets (#2964) Funtowicz Morgan 2025-11-17 10:50:10 +01:00
27bc1271d1 fix: prefer meta-llama/Llama-2-7b-hf over deprecated repo update-flake-deps-and-logit-processor drbh 2025-11-06 19:42:45 +00:00
ae1fb28434 fix: adjust leftover spaces lint drbh 2025-11-06 18:21:18 +00:00
00cef24e7f fix: bump flake and update grammar logit processor drbh 2025-11-06 00:55:48 +00:00
efb94e0d3d Patch version 3.3.6 (#3329) v3.3.6 Alvaro Moran 2025-09-17 01:15:23 +02:00
5e747f4e30 Revert "feat: bump flake including transformers and huggingface_hub versions" (#3330) drbh 2025-09-16 11:32:19 -04:00
1b90c508af Revert "Revert "feat: bump flake including transformers and huggingfa… (#3326) drbh 2025-09-09 10:44:25 -04:00
d2ad7c484e Update iframe sources for streaming demo (#3327) Eliott C. 2025-09-09 15:36:19 +02:00
c6071749db Fix mask passed to flashinfer (#3324) Daniël de Kok 2025-09-08 19:47:03 +02:00
4f067c22c3 fix: remove azure (#3325) drbh 2025-09-08 13:41:45 -04:00
9dedeb89ac Revert "feat: bump flake including transformers and huggingface_hub versions" (#3323) drbh 2025-09-08 06:17:29 -04:00
5739b5b088 Add missing backslash (#3311) Phil 2025-09-06 09:50:14 +02:00
49b414b5b8 Bump transformers in non-Nix as well to run CI transformers-ci Daniël de Kok 2025-09-05 11:45:25 +00:00
8d029d2fc3 chore: release v3.3.5 v3.3.5 git_v3.3.5 Alvaro Moran 2025-09-02 16:58:41 +02:00
356de85c29 feat: bump flake including transformers and huggingface_hub versions (#3313) drbh 2025-09-02 09:46:41 -04:00
0f79162288 chore: prepare version 3.3.5 (#3314) Alvaro Moran 2025-09-02 15:35:42 +02:00
06d9d88b95 Disable Cachix pushes (#3312) Daniël de Kok 2025-08-26 19:27:57 +02:00
8801ba12cf Optimum neuron 0.3.0 (#3308) Alvaro Moran 2025-08-26 11:07:47 +02:00
d618424d50 HuggingFaceM4/Idefics3-8B-Llama3 crash fix (#3267) Wang, Yi 2025-08-21 16:04:30 +08:00
c5e6f9a178 Fix outline import issue (#3282) Wang, Yi 2025-08-21 15:53:04 +08:00
6624fec1f9 Some gptq case could not be handled by ipex. but could be handle by triton (#3298) Wang, Yi 2025-08-19 15:37:49 +08:00
5284b5c654 Multi modality fix (#3283) Wang, Yi 2025-08-19 15:36:36 +08:00
6a2fa83540 XCCL for XPU (#3252) Wang, Yi 2025-08-19 06:37:27 +08:00
b4386b8c77 Migrate to V2 Pydantic interface (#3262) Emmanuel Ferdman 2025-08-19 00:55:21 +03:00
75ebb228f4 compressed_tensors w8a8 test fixes quantization-0.1 Daniël de Kok 2025-07-18 11:05:02 +00:00
24c2bff659 Gaudi gptq gidx support (#3297) Wang, Yi 2025-07-17 22:00:12 +08:00
bd33a23cac More grpcio shenanigans 20250708-ci-fixes Daniël de Kok 2025-07-08 14:49:23 +00:00
df53facda9 AMD grpcio? Daniël de Kok 2025-07-08 14:15:41 +00:00
a3db7edd67 Set grpcio upper bound to 1.73 (exclusive) Daniël de Kok 2025-07-08 13:55:00 +00:00
5a6e09e32e Revert "protobuf < 6.0" Daniël de Kok 2025-07-08 13:53:13 +00:00
48bb4b4f1e protobuf < 6.0 Daniël de Kok 2025-07-08 13:32:04 +00:00
bfdaf5773c Add outlines upper bound Daniël de Kok 2025-07-08 13:28:51 +00:00
47d5991b25 Update quantization kernels to 0.1.2 for fixes Daniël de Kok 2025-07-08 12:48:15 +00:00
da47e5754b fix: cleanup unit tests improve-json-schema-field drbh 2025-07-07 17:59:35 +00:00
43fd3bd7f4 fix: refactor and simplify structs and openapi drbh 2025-07-07 17:53:34 +00:00
b6540cea50 fix: lint and format fix-tool-call-def drbh 2025-07-07 16:14:00 +00:00
71fbe88a30 fix: enable defs references in tool calls drbh 2025-07-07 14:35:04 +00:00
fc2405c549 [gaudi] Fix the CI test errors (#3286) Yuan Wu 2025-07-07 17:32:07 +08:00
ebb26f0ccd [gaudi] Deepseek v2 mla and add ep to unquantized moe (#3287) Wang, Yi 2025-07-07 17:29:39 +08:00
c90ac9f65a Update snapshot Daniël de Kok 2025-07-06 17:18:28 +00:00
a76ae953fe Update quantization kernels Daniël de Kok 2025-07-07 06:12:18 +00:00
778b61c0da [gaudi] Remove unnecessary reinitialize to HeterogeneousNextTokenChooser to make sampling output correct (#3284) Wang, Yi 2025-07-03 16:03:16 +08:00
3d2e7c8fce Optimum neuron 0.2.2 (#3281) David Corvoysier 2025-07-03 07:59:25 +02:00
f6005d6813 xpu lora support (#3232) Wang, Yi 2025-07-02 23:54:25 +08:00
429dcd9c64 [gaudi] Gemma3 sliding window support (#3280) Wang, Yi 2025-07-01 16:06:01 +08:00
5f70fbdc2a feat: allow json_schema in response format and add test drbh 2025-06-25 19:43:49 +00:00
9f38d93051 Gaudi: add CI (#3160) Baptiste Colle 2025-06-24 18:51:09 +02:00
719907410b [gaudi] Refine rope memory, do not need to keep sin/cos cache per layer (#3274) Wang, Yi 2025-06-23 17:15:39 +08:00
d4bd5cac79 chore: version 3.3.4 v3.3.4 git_v3.3.4 David Corvoysier 2025-06-19 09:08:38 +00:00
238fbd4d50 Neuron backend fix and patch version 3.3.4 (#3273) David Corvoysier 2025-06-19 10:52:41 +02:00
14ee6e7804 [gaudi] gemma3 text and vlm model intial support. need to add sliding window support later (#3270) Wang, Yi 2025-06-19 15:32:34 +08:00
1754b79f10 chore: release 3.2.3 v3.3.3 git_v3.3.3 David Corvoysier 2025-06-18 12:59:29 +00:00
bd1bdebb47 doc: fix README (#3271) David Corvoysier 2025-06-18 12:35:36 +02:00
f13e28c98d [gaudi] Refine logging for Gaudi warmup (#3222) regisss 2025-06-18 04:34:00 -06:00
b4d17f18ff chore: prepare release 3.3.3 (#3269) David Corvoysier 2025-06-18 11:55:26 +02:00
0627983c17 [Gaudi] use pad_token_id to pad input id (#3268) Wang, Yi 2025-06-17 15:07:25 +08:00
3752143b39 [Gaudi] Fix the integration-test issues (#3265) Yuan Wu 2025-06-13 20:47:06 +08:00
ded4cb52ac [Gaudi] Enable Qwen3_moe model (#3244) Yuan Wu 2025-06-13 18:03:24 +08:00
a220e57f45 [gaudi] HuggingFaceM4/idefics2-8b issue fix (#3264) Wang, Yi 2025-06-13 18:00:08 +08:00
e07056ab3f [Gaudi] Remove optimum-habana (#3261) Yuan Wu 2025-06-13 04:35:36 +08:00
25fdc5f03c [gaudi] Move the _update_cos_sin_cache into get_cos_sin (#3254) Yuan Wu 2025-06-13 04:31:11 +08:00
613b8dd647 [gaudi] Vlm rebase and issue fix in benchmark test (#3263) Wang, Yi 2025-06-13 04:26:37 +08:00
839477670a [gaudi] Perf optimization (#3256) Wang, Yi 2025-06-11 21:00:21 +08:00
79183d1647 Bump neuron SDK version (#3260) David Corvoysier 2025-06-10 17:56:25 +02:00
2204f91f32 fix: adjust llava logic and bump snaps support-granite-vision drbh 2025-06-06 14:54:10 +00:00
30bdf922bd feat: improve llava next pooling for granite vision drbh 2025-06-04 13:50:39 +00:00
1ff9d185d5 Remove useless packages (#3253) Yuan Wu 2025-06-03 19:42:29 +08:00
8e41da951d Release 3.3.2 v3.3.2 git_3.3.2 Daniël de Kok 2025-05-30 14:19:18 +00:00
249189d96e Prepare for 3.3.2 (#3249) Daniël de Kok 2025-05-30 16:16:36 +02:00
6b6e30a6f6 [gaudi] Fix the Llama-4-Maverick-17B-128E crash issue (#3246) Yuan Wu 2025-05-29 17:38:44 +08:00
70217ac345 [Gaudi] Fix the OOM issue of Llama-4-Scout-17B-16E-Instruct (#3245) Yuan Wu 2025-05-29 15:58:24 +08:00
f14044009a fp8 compressed tensors w8a8 support for Gaudi backend (#3242) Wang, Yi 2025-05-28 20:54:20 +08:00
1883a62a94 Add Qwen3 for Gaudi backend (#3229) Yuan Wu 2025-05-23 14:58:35 +08:00
f58d7cf50e Nix: switch to hf-nix (#3240) Daniël de Kok 2025-05-22 17:09:15 +02:00
f08b44ade5 Upgrade to new vllm extension ops for Gaudi backend (fix issue in exponential bucketing) (#3239) Wang, Yi 2025-05-22 21:29:16 +08:00
767a65202d Release 3.3.1 v3.3.1 git_3.3.1 Daniël de Kok 2025-05-22 07:47:12 +00:00
674c514d44 Prepare for 3.3.1 (#3238) Daniël de Kok 2025-05-22 09:43:55 +02:00
9e7e546923 Move input_ids to hpu and remove disposal of adapter_meta (#3237) Wang, Yi 2025-05-22 15:21:31 +08:00
e32528792c Switch to punica-sgmv kernel from the Hub (#3236) Daniël de Kok 2025-05-21 15:44:15 +02:00
43b1b07fb9 Fix the crash in default ATTENTION path for Gaudi backend (#3235) Wang, Yi 2025-05-20 20:02:32 +08:00
000e313a92 Refine warmup and upgrade to synapse AI 1.21.0 (#3234) Wang, Yi 2025-05-20 16:22:43 +08:00
d658b5def3 Deepseek R1 for Gaudi backend (#3211) Wang, Yi 2025-05-19 22:36:39 +08:00

1 2 3 4 5 ...