text-generation-inference

huggingface/text-generation-inference

Fork 0

mirror of https://github.com/huggingface/text-generation-inference.git synced 2026-03-26 04:42:06 +00:00

Commit Graph

Select branches

Hide Pull Requests

20250708-ci-fixes

add-chat-response-format

add-google-cloud-provider

add-quickstart-script

add-rotary-embed-tests

add-small-ttft-script

add-test-for-warmup-and-kvcache

add_L4

add_api_key

add_batch_dimension

add_chunked_atn

add_chunked_attn

add_deepseekv3

add_gptq_docs

add_integration_test

add_readme_dashboard

add_tunable_prefill

add_vlm_chunking

adding_docs

adjust-mllama-test-output

adjust-where-request-max-tokens-is-defaulted

aiter_kernels

amd-ci-fx

auto_length

automodel-supports-flash-paged-attention

avoid-cuda-graph-during-warmup-if-oom

avoid-zero-seed

backends/trtllm

backends/trtllm-executor

baichuan2-13b

bnb4

bugfix/add_tools_prompt

bugfix/moe-kernels-imports

bugfix/phi-exl2

bump-client-0.6.2

bump-kernel-versions

bump-poetry-and-requirements

chunked_attn_l4

ci-amihalik-update-chat-completion-messages

ci-new-cluster

ci-patch

ci-run-openai-function-calling-compatible-support

ci-update_xpu_image

ci-xpu

ci-xpu2

ci2

ci_amd

ci_amd2

ci_amd3

ci_amd4

close_dl_thread

compat_logger

compile-grammar-in-router

cuda_ipc_allreduce

debug-gpt2

debug-request-id

debug-torch-23

debug/gemma2

debugging-timeouts

deploy/aml

dev

development-guide

dummy

enable-non-grammar-constrained-tools

enable-qwen2vl-video

enable-transformers-vlm

enable_non_divisible_embeddings

exl2

experiment/moe

explore-static-triton-kernels

explore-t4-gemma-issues

feat-backend-llamacpp

feat/add-load-test

feat/attention_sinks

feat/backend_abstraction

feat/backend_feature

feat/better_tokens

feat/cuda_12

feat/flash_decoding

feat/improve_max_tokens

feat/max_queue_size

feat/page_re_alloc

feat/parse_logs

feat/support_deepspeed

feature/machete

feature/moe-kernels

feature/no_repeat_ngram_size

feature/no_repeat_ngram_size_ci

feature/phi-3-small

feature/prefix

feature/radix-prefix-cache

feature/radix-prefix-cache-bench

feature/vlm-prefix-caching

fix-cudagraph-bug

fix-gemma-tokenization

fix-grammar-cleanup-bug

fix-grammar-fsm-batching

fix-mixtral-adapter-loading

fix-release-tests

fix-repack-for-marlin

fix-tool-call-def

fix-tp

fix-version-install

fix/allow-top-p-0

fix/avoid_record_streams

fix/op-trace-id

fix/parse-mamba-config

fix_default_arg

fix_exl2

fix_fp8_llama3.2

fix_leak

fix_mistral2

fix_neox_rotary_emb

fix_phi3

flashinfer

flashinfer-0.2.5

fp8_kvcache

fp8_rocm

gaudi_llama4_tmp

git_2.0.4

git_3.1.1

git_3.2.0

git_3.2.1

git_3.3.0

git_3.3.1

git_3.3.2

git_v2.1.0

git_v2.1.1

git_v2.2.0

git_v2.3.0

git_v2.3.1

git_v2.4.0

git_v2.4.1

git_v3.0.0

git_v3.0.1

git_v3.0.2

git_v3.1.0

git_v3.2.2

git_v3.2.3

git_v3.3.3

git_v3.3.4

git_v3.3.5

improve-docs

improve-dynamic-message-content

improve-json-schema-field

improve-tool-call-and-response-ids

improve_defaults

improve_launcher_defaults

inlcude-latest-release-on-commit-builds-tags

ipex-moe

kvrouter

kvrouter-endpoints

llama-fused-compiled-mlp

main

maintenance/docker-network

maintenance/merge-vlm-input-prep

mamba2

martinigoyanes-fix-frequency-penalty

medusa

megatron

message-more-info

mi300-temp

mllama

model_compat_log

more_logs

multi-lora

new_minor_version

nix/cargo-clippy

nix/docker2

nix/pytorch-2.5.1

nix_integration_tests

nix_test2

no_root_user

no_root_user2

op-compilation-benchmarking

origin/slind_window_fix

osanseviero-patch-1

pip-installable

pr-1869-ci-run

pr-2076-ci-run

pr-2290-ci-runner

pr-2366-ci-branch

pr-2444-ci-branch

pr-2517-ci-branch

pr-2711-ci-branch

pr-2784-ci-branch

pr-2840-ci-branch

pr-2954-ci-branch

pr-3002-ci-branch

pr-3004-ci-branch

pr-3018-ci-branch

precompile-kernels-workflow

prefix_chunk

prefix_default

proxy_sse_engine_state

quantization

quantization-0.1

refactor-lora-linear

release-3.2.4

remove_post_load_weights

response-header-metrics

revert

rocm-ci-build

rocm_6.2_fixes

router-grammar-compile

s3-cache

self-generating-docs

set-num-blocks

simpler_exllama

skip-mistral-test

speculative

streaming_conceptual

support-granite-vision

support-logit-bias-in-chat

support-phi-model

support-phi3-small

support-pre-compile-kernels

temp_work

test-batch-speedup-amount

test_docs

test_rocm

tmp_invariants

tmp_medusa

tmp_torch_compile

transformers-ci

triton_fix

trtllm-stop-words

trtllm/executor_stats

tuna

update-flake-deps-and-logit-processor

update-jsonschema

update_docs2

update_internal_version

update_peft

update_readme

upgrade-outlines

upgrade_mlp_speculator

use_g6

use_updated_kernels

vllm/setup

zstd

#1

#100

#101

#1010

#1018

#1019

#102

#1022

#1023

#1024

#103

#1033

#1034

#1042

#1044

#1045

#1048

#1049

#1052

#1054

#1058

#1059

#106

#1060

#1061

#1063

#1064

#1065

#1066

#1068

#107

#1070

#1071

#1075

#1076

#1077

#108

#1080

#1081

#1089

#109

#1090

#1091

#1092

#1094

#1096

#1097

#1099

#11

#110

#1100

#1101

#1102

#1103

#1105

#1110

#1112

#1116

#1123

#1128

#1134

#114

#1140

#1141

#115

#1153

#1155

#116

#1165

#1165

#117

#1173

#1176

#1178

#1179

#118

#1182

#1183

#1184

#1187

#119

#1198

#1202

#1211

#1214

#1219

#122

#1224

#1228

#123

#1239

#1241

#1242

#1243

#1246

#1252

#126

#1260

#1267

#1270

#1272

#1274

#1276

#1279

#128

#1285

#1287

#129

#1294

#1295

#13

#130

#1301

#1305

#1307

#1308

#1313

#132

#1326

#1328

#133

#1336

#1337

#134

#1341

#1343

#1346

#1347

#1348

#135

#1351

#1352

#1353

#1358

#136

#1361

#1364

#137

#1370

#1373

#138

#1381

#1386

#139

#1390

#1395

#14

#140

#1408

#141

#1414

#1419

#142

#1420

#1424

#1425

#1427

#1428

#143

#1436

#144

#1442

#1448

#145

#1450

#1453

#1454

#1455

#1459

#1461

#1462

#1463

#1469

#147

#1470

#1471

#1473

#1475

#1476

#1477

#1478

#148

#1480

#1484

#1486

#1488

#1489

#149

#1490

#1491

#1492

#1494

#1495

#1496

#1497

#1498

#15

#150

#1502

#1504

#1505

#1506

#151

#1511

#1512

#1514

#1515

#1516

#1517

#1518

#152

#1520

#1523

#1524

#1526

#1527

#153

#1532

#1533

#1537

#1539

#154

#1540

#1541

#1542

#1543

#155

#1550

#1552

#1555

#1556

#1557

#1560

#1563

#1564

#1567

#1568

#1569

#1570

#1571

#1576

#1577

#1578

#1579

#1580

#1583

#1584

#1585

#1586

#1587

#1588

#159

#1591

#1592

#1594

#16

#160

#1603

#1605

#1606

#1607

#1608

#1609

#161

#1610

#1614

#1617

#1618

#1619

#162

#1621

#1626

#1628

#163

#1632

#1637

#1638

#1639

#164

#1646

#1648

#1650

#1651

#1653

#1658

#1660

#1662

#1663

#1664

#1666

#1667

#1668

#167

#1676

#168

#1682

#1685

#1686

#1693

#1697

#1698

#17

#170

#1702

#1703

#1704

#1707

#1708

#1709

#1710

#1713

#1714

#1715

#1716

#1718

#1719

#1726

#1727

#1729

#173

#1730

#1731

#1734

#1735

#1736

#1737

#1739

#174

#1740

#1747

#1748

#1749

#175

#1751

#1755

#1756

#1758

#1759

#1760

#1764

#1765

#1767

#1768

#1773

#1774

#1775

#178

#1784

#1789

#179

#1790

#1791

#1797

#1798

#18

#180

#1800

#1801

#1808

#181

#1811

#1812

#1813

#1815

#1816

#1817

#1818

#1820

#1825

#1827

#1828

#1829

#183

#1830

#1832

#1833

#1835

#1836

#1839

#184

#1840

#1841

#1843

#1844

#1845

#1848

#1849

#185

#1850

#1851

#1854

#1855

#186

#1860

#1865

#1866

#1869

#187

#1882

#1884

#1886

#1888

#1889

#1890

#1892

#1894

#1895

#1898

#19

#190

#1902

#1906

#1908

#1909

#191

#1910

#1912

#1915

#1916

#1917

#1918

#1919

#1920

#1921

#1923

#1924

#1925

#1929

#193

#1931

#1932

#1934

#1935

#1936

#1937

#1938

#1939

#194

#1940

#1942

#1947

#1948

#1949

#1950

#1951

#1953

#1954

#1958

#1959

#196

#1963

#1965

#1967

#1970

#1971

#1975

#1980

#1981

#1985

#1986

#1988

#1989

#1990

#1994

#1995

#1996

#2

#20

#200

#2002

#2003

#2004

#2005

#2006

#2008

#201

#2010

#2011

#2013

#2014

#2015

#2016

#2017

#202

#2020

#2021

#2022

#2023

#2024

#2028

#203

#2031

#2032

#2033

#2034

#2038

#2039

#2044

#2045

#2046

#2047

#2049

#205

#2050

#2052

#2054

#2059

#2060

#2061

#2062

#2063

#2065

#2066

#2068

#207

#2071

#2072

#2074

#2075

#2076

#2078

#2079

#208

#2080

#2084

#2085

#2086

#2087

#2088

#2089

#2091

#2092

#2094

#2095

#2097

#2099

#210

#2101

#2102

#2103

#2104

#2105

#2109

#2110

#2111

#2114

#2115

#2116

#2117

#2118

#2119

#212

#2120

#2123

#2124

#2125

#2127

#2128

#2129

#213

#2131

#2132

#2133

#2134

#2135

#2137

#2138

#214

#2140

#2141

#2142

#2148

#2149

#215

#2151

#2152

#2153

#2155

#2156

#2158

#216

#2161

#2163

#2164

#2165

#2166

#2167

#2168

#2169

#217

#2170

#2173

#2175

#2176

#2178

#2179

#218

#2180

#2181

#2182

#2186

#2187

#2189

#219

#2190

#2191

#2193

#2194

#2196

#22

#220

#2201

#2202

#2203

#2204

#2205

#2208

#2209

#221

#2212

#2213

#2215

#2216

#2217

#222

#2220

#2221

#2224

#2225

#2226

#2228

#2230

#2231

#2232

#2233

#2237

#2242

#2243

#2244

#2245

#2248

#2249

#2250

#2251

#2254

#2255

#2256

#2257

#2258

#2259

#226

#2260

#2261

#2262

#2266

#2268

#2269

#227

#2271

#2272

#2273

#2274

#2276

#2277

#2278

#2279

#228

#2281

#2282

#2283

#2284

#2285

#2286

#2287

#2288

#2289

#2290

#2291

#2292

#2298

#2299

#23

#2300

#2303

#2304

#2306

#2307

#2308

#2309

#2311

#2311

#2313

#2314

#2315

#2317

#2320

#2323

#2325

#2326

#2327

#2328

#2329

#233

#2330

#2331

#2333

#2335

#2336

#2337

#2338

#2339

#234

#2341

#2342

#2343

#2344

#2345

#2346

#2347

#235

#2350

#2351

#2352

#2353

#2354

#2357

#2358

#2359

#2360

#2361

#2364

#2365

#2366

#2367

#2368

#237

#2370

#2371

#2372

#2374

#2377

#2378

#2379

#2381

#2382

#2384

#2385

#2386

#2387

#2389

#2390

#2391

#2392

#2394

#2395

#2396

#2397

#2398

#2399

#24

#2401

#2402

#2403

#2404

#2405

#2406

#2407

#2408

#2410

#2411

#2412

#2414

#2415

#2416

#2419

#242

#2420

#2422

#2423

#2424

#2426

#2427

#2428

#2429

#2430

#2431

#2433

#2437

#2438

#2439

#244

#2442

#2443

#2444

#2449

#2450

#2451

#2453

#2454

#2455

#2459

#246

#2462

#2463

#2468

#2469

#2470

#2471

#2472

#2473

#2477

#2478

#2479

#248

#2481

#2482

#2484

#2486

#2489

#2490

#2491

#2492

#2493

#2494

#2496

#2497

#2498

#2499

#25

#250

#2500

#2501

#2507

#251

#2510

#2511

#2512

#2513

#2514

#2515

#2516

#2517

#2518

#2519

#252

#2520

#2521

#2524

#2525

#2527

#2528

#2529

#2532

#2533

#2535

#2536

#2537

#2538

#2539

#2540

#2545

#2546

#2547

#2548

#255

#2550

#2551

#2552

#2553

#2554

#2555

#2556

#2557

#2558

#2561

#2562

#2563

#2566

#2567

#2568

#257

#2574

#2575

#2577

#2578

#2579

#258

#2580

#2582

#2585

#2586

#2587

#2588

#2589

#259

#2590

#2591

#2592

#2594

#2595

#2596

#2597

#2599

#26

#2600

#2601

#2602

#2603

#2604

#2605

#2606

#2607

#2609

#261

#2610

#2611

#2612

#2614

#2616

#2617

#2619

#262

#2620

#2622

#2623

#2625

#2627

#2628

#2629

#2630

#2631

#2632

#2633

#2634

#2637

#264

#2640

#2642

#2642

#2645

#2646

#2647

#2648

#2650

#2651

#2652

#2655

#2658

#2659

#266

#2661

#2663

#2664

#2665

#2666

#2668

#267

#2673

#2674

#2677

#2678

#2680

#2682

#2683

#2684

#2685

#2686

#2687

#2688

#2689

#269

#2690

#2691

#2692

#2693

#2694

#2695

#2697

#2698

#2699

#27

#2701

#2702

#2704

#2706

#2707

#2708

#2709

#2710

#2711

#2712

#2713

#2714

#2716

#2717

#2718

#2719

#272

#2720

#2721

#2723

#2724

#2725

#2726

#2732

#2733

#2738

#274

#2740

#2741

#2742

#2743

#2745

#2746

#275

#2750

#2751

#2753

#2754

#2755

#2756

#2756

#2758

#276

#2760

#2761

#2762

#2764

#2765

#2766

#2767

#2768

#2769

#277

#2770

#2771

#2772

#2773

#2774

#2777

#2778

#2779

#278

#2782

#2784

#2785

#2786

#2788

#2789

#2790

#2791

#2792

#2793

#2795

#2796

#2797

#2798

#2799

#28

#2802

#2803

#2806

#2807

#2808

#2809

#2810

#2811

#2812

#2814

#2815

#2816

#2817

#2818

#282

#2824

#2825

#2826

#2827

#2829

#2831

#2833

#2834

#2837

#284

#2840

#2841

#2845

#2846

#2848

#2848

#2849

#285

#2850

#2852

#2855

#2856

#2858

#2859

#286

#2861

#2862

#2863

#2866

#2866

#287

#2870

#2874

#2874

#2878

#2882

#2883

#2884

#2885

#2886

#2891

#2892

#2893

#2898

#29

#2901

#2903

#2904

#2905

#2907

#2908

#2910

#2911

#2912

#2913

#2916

#2917

#2918

#2919

#292

#2921

#2922

#2924

#2928

#2929

#2931

#2932

#2933

#2935

#2936

#2937

#2938

#294

#2940

#2941

#2942

#2943

#2945

#2947

#2948

#2949

#2950

#2951

#2953

#2954

#2957

#2958

#2960

#2961

#2962

#2963

#2964

#2965

#2965

#2966

#2967

#2968

#297

#2970

#2971

#2972

#2975

#2976

#2977

#2979

#298

#2980

#2981

#2982

#2983

#2988

#299

#2990

#2991

#2992

#2993

#2995

#2996

#2998

#2999

#30

#3000

#3001

#3002

#3003

#3004

#3006

#3007

#3008

#3010

#3012

#3012

#3013

#3015

#3016

#3017

#3018

#302

#3021

#3022

#3024

#3027

#3028

#3029

#303

#3030

#3031

#3032

#3033

#3034

#3034

#3035

#3036

#3037

#3039

#304

#3040

#3041

#3042

#3044

#3045

#3046

#3047

#3048

#3049

#305

#3051

#3053

#3054

#3055

#3056

#3057

#3060

#3061

#3063

#3063

#3064

#3064

#3065

#3065

#3067

#3068

#3069

#3072

#3073

#3074

#3075

#3076

#3077

#3078

#3079

#3079

#308

#3080

#3081

#3083

#3084

#3085

#3086

#3089

#3091

#3092

#3093

#3094

#3095

#3096

#3098

#3099

#31

#310

#3100

#3101

#3103

#3106

#3107

#3108

#3109

#3111

#3111

#3112

#3113

#3116

#3117

#3118

#3120

#3121

#3122

#3124

#3125

#3126

#3127

#3128

#3129

#313

#3131

#3132

#3134

#3136

#3141

#3142

#3144

#3145

#3147

#3148

#3150

#3151

#3152

#3154

#3154

#3156

#3157

#3160

#3161

#3162

#3162

#3164

#3164

#3166

#3166

#3167

#317

#3170

#3171

#3172

#3174

#3176

#3179

#318

#3182

#3184

#3186

#3186

#3187

#3188

#3189

#3193

#3194

#3194

#3196

#3197

#3197

#32

#3200

#3201

#3201

#3202

#3204

#3206

#3207

#3210

#3210

#3211

#3217

#3218

#3220

#3221

#3222

#3223

#3224

#3229

#3230

#3231

#3231

#3232

#3234

#3235

#3236

#3237

#3238

#3239

#3240

#3242

#3244

#3245

#3246

#3249

#325

#3252

#3253

#3254

#3255

#3255

#3256

#3258

#3260

#3261

#3262

#3263

#3264

#3265

#3266

#3266

#3267

#3268

#3269

#327

#3270

#3271

#3273

#3274

#3276

#3276

#328

#3280

#3281

#3282

#3283

#3284

#3286

#3287

#3288

#3288

#329

#3291

#3291

#3292

#3292

#3297

#3298

#33

#3300

#3300

#3302

#3302

#3308

#3310

#3310

#3311

#3312

#3313

#3314

#3315

#3315

#3319

#3322

#3323

#3324

#3325

#3326

#3327

#3329

#3330

#3334

#3338

#3338

#3339

#334

#3340

#3340

#3341

#3341

#3343

#3343

#3344

#3345

#3346

#3347

#3347

#3348

#3348

#3349

#3349

#335

#3352

#3356

#3356

#336

#34

#340

#341

#343

#344

#348

#35

#351

#352

#353

#356

#357

#358

#359

#36

#360

#362

#363

#364

#367

#368

#37

#370

#373

#379

#384

#385

#388

#39

#393

#394

#395

#396

#4

#40

#400

#404

#406

#407

#41

#411

#412

#42

#434

#438

#44

#441

#443

#45

#453

#46

#462

#465

#47

#470

#472

#475

#477

#48

#480

#483

#485

#488

#49

#498

#5

#50

#501

#502

#51

#513

#514

#516

#519

#52

#520

#521

#522

#525

#529

#53

#534

#54

#543

#544

#545

#55

#550

#553

#557

#558

#56

#561

#562

#567

#57

#575

#578

#579

#58

#580

#581

#582

#583

#585

#586

#587

#588

#59

#590

#595

#596

#6

#60

#600

#605

#608

#609

#61

#611

#616

#617

#618

#619

#62

#621

#623

#624

#626

#63

#630

#633

#634

#635

#639

#64

#642

#643

#647

#648

#659

#66

#661

#664

#665

#666

#67

#670

#671

#678

#68

#684

#689

#698

#7

#70

#704

#708

#71

#712

#713

#715

#719

#72

#721

#723

#725

#727

#73

#733

#737

#738

#740

#741

#743

#745

#746

#748

#75

#750

#76

#761

#762

#767

#768

#770

#773

#783

#785

#789

#791

#793

#794

#795

#797

#798

#799

#8

#803

#805

#806

#809

#810

#812

#82

#820

#821

#822

#823

#829

#831

#836

#838

#84

#842

#848

#85

#851

#852

#853

#854

#858

#86

#860

#862

#867

#868

#87

#872

#88

#881

#884

#886

#889

#89

#892

#893

#898

#9

#90

#900

#901

#905

#906

#91

#910

#911

#918

#921

#93

#930

#932

#935

#94

#941

#947

#95

#950

#951

#953

#954

#957

#958

#96

#963

#964

#966

#968

#97

#971

#977

#981

#986

#989

#990

#993

#994

#999

v0.2.0

v0.2.1

v0.3.0

v0.3.1

v0.3.2

v0.4.0

v0.4.1

v0.4.2

v0.4.3

v0.5.0

v0.6.0

v0.7.0

v0.8.0

v0.8.1

v0.8.2

v0.9.0

v0.9.1

v0.9.2

v0.9.3

v0.9.4

v1.0.0

v1.0.1

v1.0.2

v1.0.3

v1.1.0

v1.1.1

v1.2.0

v1.3.0

v1.3.1

v1.3.2

v1.3.3

v1.3.4

v1.4.0

v1.4.1

v1.4.2

v1.4.3

v1.4.4

v1.4.5

v2.0.0

v2.0.1

v2.0.2

v2.0.3

v2.0.4

v2.1.0

v2.1.1

v2.2.0

v2.3.0

v2.3.1

v2.4.0

v2.4.1

v3.0.0

v3.0.1

v3.0.2

v3.1.0

v3.1.1

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.3.0

v3.3.1

v3.3.2

v3.3.3

v3.3.4

v3.3.5

v3.3.6

v3.3.7

40b00275b2 Attempt to remove AWS S3 flaky cache for sccache (#2953) Funtowicz Morgan 2025-01-27 11:21:48 +01:00
bafbd06744 Update transformers_flash_causal_lm.py fix-tp Cyril Vallez 2025-01-24 15:06:50 +01:00
de83178bc3 tp monkey patch Cyril Vallez 2025-01-24 15:03:14 +01:00
6cb41a80a1 Revert "Remove AWS credentials?" Nicolas Patry 2025-01-24 14:34:17 +01:00
d2ff68e98d Remove AWS credentials? Nicolas Patry 2025-01-24 12:18:28 +01:00
b70f29d729 Bypasse perm issue. v3.0.2 git_v3.0.2 Nicolas Patry 2025-01-24 12:12:47 +01:00
e413b01eb1 Create patch release. Nicolas Patry 2025-01-24 10:50:15 +01:00
02e4b9ab32 backend(vllm): plug in the tokio server and CLI Morgan Funtowicz 2025-01-24 10:41:07 +01:00
d9dda11726 Trying to put back the archlist (to fix the oom). (#2947) Nicolas Patry 2025-01-24 09:32:17 +01:00
d937eb64da Fixing cargo lock. Nicolas Patry 2025-01-23 18:54:34 +01:00
18c4607d46 Transformers backend TP fix (#2945) Cyril Vallez 2025-01-23 18:09:57 +01:00
29a0893b67 Tmp tp transformers (#2942) Nicolas Patry 2025-01-23 18:07:30 +01:00
0a89902663 [TRTLLM] Expose finish reason (#2841) Funtowicz Morgan 2025-01-23 16:48:26 +01:00
4e172028aa Add NVIDIA A40 to known cards (#2941) Nikolai Kolodziej 2025-01-23 14:19:21 +01:00
6ab02931cf Set alias for max_completion_tokens in ChatRequest (#2932) Alvaro Bartolome 2025-01-23 14:18:47 +01:00
cc212154e0 Bump TensorRT-LLM backend dependency to v0.16.0 (#2931) Funtowicz Morgan 2025-01-23 13:54:40 +01:00
bd2ec03d53 backend(vllm): statically allocate LLMEngine Morgan Funtowicz 2025-01-22 22:15:33 +01:00
1dd346666a Clarify FP8-Marlin use on capability 8.9 (#2940) Daniël de Kok 2025-01-22 18:18:11 +01:00
1d3c9beba8 fix moe in quantization path (#2935) Wang, Yi 2025-01-22 21:36:15 +08:00
6d335ca7ce Remove modifications in Lock. new_minor_version Nicolas Patry 2025-01-22 13:37:17 +01:00
b21d3c1e73 Upgrade the version number. Nicolas Patry 2025-01-22 12:29:50 +01:00
2dfe3b3ee6 Upgrading the deps to have transformers==4.48.0 necessary (#2937) Nicolas Patry 2025-01-22 12:20:15 +01:00
cfd22726c9 backend(vllm): initial commit Morgan Funtowicz 2025-01-21 23:37:56 +01:00
64a33c1f05 Run pre-commit run --all-files to fix CI (#2933) Alvaro Bartolome 2025-01-21 17:33:33 +01:00
bdb3e488e4 Trying to avoid the random timeout. (#2929) Nicolas Patry 2025-01-21 11:06:10 +01:00
17367438f3 Give TensorRT-LLMa proper CI/CD 😍 (#2886) Funtowicz Morgan 2025-01-21 10:19:16 +01:00
b980848abf Flash Transformers modeling backend support (#2913) Cyril Vallez 2025-01-21 10:01:51 +01:00
16162602c2 Add fp8 support moe models Mohit Sharma 2025-01-20 13:55:54 +00:00
447a5b2f87 Fixing TRTLLM dockerfile. (#2922) Nicolas Patry 2025-01-20 11:13:46 +01:00
630f198624 flashinfer: switch to plan API (#2904) Daniël de Kok 2025-01-17 18:18:02 +01:00
8f6146f11a Revert "feat: improve qwen2-vl startup " (#2924) drbh 2025-01-17 12:09:05 -05:00
eecca27113 feat: improve qwen2-vl startup (#2802) drbh 2025-01-17 11:50:41 -05:00
17192c9a0e fix: remove test debug params enable-qwen2vl-video drbh 2025-01-17 16:19:02 +00:00
6e982f43a1 fix the crash of meta-llama/Llama-3.2-1B (#2918) Wang, Yi 2025-01-17 22:50:58 +08:00
b4187d6022 Add tgi_batch_current_size and tgi_batch_current_size as response header response-header-metrics Corentin REGAL 2025-01-17 15:48:02 +01:00
c20025dbf7 Add fp8 kv cache for ROCm (#2856) Mohit Sharma 2025-01-17 18:43:29 +05:30
de19e7e844 Moving to uv instead of poetry. (#2919) Nicolas Patry 2025-01-17 12:32:00 +01:00
d61f14f271 nix: update to PyTorch 2.5.1 (#2921) Daniël de Kok 2025-01-17 12:12:11 +01:00
885144166f Flash decoding kernel adding and prefill-chunking and prefix caching enabling in intel cpu/xpu (#2815) Wang, Yi 2025-01-17 19:04:57 +08:00
bde5f9ad82 nix: update to PyTorch 2.5.1 nix/pytorch-2.5.1 Daniël de Kok 2025-01-17 06:44:21 +00:00
82f6ea1b71 feat: improve star coder to support multi lora layers (#2883) drbh 2025-01-16 16:23:55 -05:00
78cd756caf fix: improve video processing and update unsupported paths drbh 2025-01-16 17:20:27 +00:00
5f78ec32a5 Do not convert weight scale to e4m3fnuz on CUDA (#2917) Daniël de Kok 2025-01-16 13:44:32 +01:00
922cc38fbc Upgrading bitsandbytes. (#2910) Nicolas Patry 2025-01-15 20:07:21 +01:00
120bd3e3bb Removing the github runner. (#2912) Nicolas Patry 2025-01-15 19:20:44 +01:00
1470aec9d9 Fix typo in TPU docs (#2911) Baptiste Colle 2025-01-15 18:32:07 +01:00
203cade244 Upgrading our rustc version. (#2908) Nicolas Patry 2025-01-15 17:04:03 +01:00
46994b34fb 📝 add guide on using TPU with TGI in the docs (#2907) Baptiste Colle 2025-01-15 16:26:11 +01:00
dc9b8e9814 Fix docker run in README.md (#2861) Alvaro Bartolome 2025-01-15 16:07:10 +01:00
3c7ae48f7f docs(conceptual/speculation): available links Train Medusa (#2863) Guspan Tanadi 2025-01-15 22:05:54 +07:00
cc8b9650bd Baichuan2-13B does not have max_position_embeddings in config (#2903) Wang, Yi 2025-01-15 22:56:52 +08:00
e07acc7f68 Enable FP8 Per-Tensor Scales and Integrate Marlin/MoE Kernels Repo for ROCm (#2825) Mohit Sharma 2025-01-15 11:38:58 +05:30
48067e4a0d fmt baichuan2-13b Wang, Yi A 2025-01-13 17:23:28 -08:00
22ed5703de Update server/text_generation_server/models/flash_causal_lm.py Wang, Yi 2025-01-14 08:58:48 +08:00
880ab9c2f3 Add Flash decoding kernel ROCm (#2855) Mohit Sharma 2025-01-13 15:42:35 +05:30
1660154ae6 fix crash in torch2.6 if TP=1 (#2885) Wang, Yi 2025-01-13 18:11:31 +08:00
2e22164d4a Update using_guidance.md (#2901) Nicholas Broad 2025-01-13 02:09:35 -08:00
5ad8c9a40b Baichuan2-13B does not have max_position_embeddings in config see https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat/blob/main/config.json Wang, Yi A 2025-01-12 22:47:23 -08:00
83624a07be Add possible variants for A100 and H100 GPUs for auto-detecting flops (#2837) lazariv 2025-01-10 16:12:02 +01:00
01067f8ba8 chore: Update jsonschema to 0.28.0 (#2870) Dmitry Dygalo 2025-01-10 06:01:54 -08:00
4f7e00f4ce Update to marlin-kernels 0.3.7 (#2882) Daniël de Kok 2025-01-10 12:43:44 +01:00
da5ab46705 Improve vlm support (add idefics3 support) (#2437) drbh 2025-01-09 10:35:32 -05:00
a9c7d2e3b6 Basic flashinfer 0.2 support (#2862) Daniël de Kok 2025-01-09 16:25:00 +01:00
c7b2e3f100 chore: Enable blocking feature for reqwest update-jsonschema Dmitry Dygalo 2025-01-09 11:07:49 +01:00
afb6c728d8 update ipex xpu to fix issue in ARC770 (#2884) Wang, Yi 2025-01-09 17:11:03 +08:00
d37a43e581 chore: fixed some typos and attribute issues in README (#2891) Ruida Zeng 2025-01-09 03:09:23 -06:00
db6a9e1232 add ats support ci-update_xpu_image Wang, Yi A 2025-01-07 16:23:16 -08:00
b51fc1cc0f update ipex xpu to fix issue in ARC770 Wang, Yi A 2025-01-06 18:28:14 -08:00
b27749eba7 fix: small refactor and cleanups drbh 2025-01-03 11:01:07 -05:00
840efc5f6c chore: Update jsonschema to 0.28.0 Dmitry Dygalo 2024-12-29 15:56:22 +01:00
dcc1194198 fix: adjust trtllm looper for video chunk enum drbh 2024-12-16 17:05:28 +00:00
4f42d0c731 fix: include the video feature in cargo chef command drbh 2024-12-13 18:11:04 +00:00
27f758de0a fix: make ffmpeg-next dep optional with feature drbh 2024-12-13 18:00:15 +00:00
b4da6ad30e fix: feature flag video and remove from non cuda dockerfiles drbh 2024-12-13 17:36:34 +00:00
5322abd9f5 fix: adjust whitespace lint drbh 2024-12-13 17:05:01 +00:00
91ed362e74 fix: update trtllm dockefile after rebase drbh 2024-12-13 16:59:19 +00:00
bb00fb33ba fix: update lints after rebase drbh 2024-12-13 16:07:52 +00:00
5c7bc91a2f fix: adjust batch_tokenized_inputs output in mllama drbh 2024-12-13 15:51:06 +00:00
2ae152a188 fix: update all vlm forward args, pass shared libraries to final layer in docker and doc bump drbh 2024-12-12 22:00:02 +00:00
1d6bf243eb fix: remove unnecessary cast drbh 2024-12-12 18:52:17 +00:00
e2b75a572f fix: resolve rebase issues and add test drbh 2024-12-12 18:31:33 +00:00
71ed75a21b fix: pre commit and clippy lints drbh 2024-12-12 12:00:08 -05:00
db97d979fb cleanup prints Miquel Farre 2024-12-11 21:09:17 +00:00
19e1c8da31 working version Miquel Farre 2024-12-11 21:08:03 +00:00
af77a0cadf fixing ssl issue Miquel Farre 2024-12-11 14:00:23 +00:00
cbf1d982ec installing ssl requirements prior to rust building stage Miquel Farre 2024-12-04 09:53:29 +00:00
75ab887dda fix: copy shared libraries from builder drbh 2024-12-02 18:21:47 -05:00
b5b2184c0a fix: include usr lib in ld path drbh 2024-11-27 20:50:42 -05:00
50b5399d9c fix: add ffmpeg to final layer of container drbh 2024-11-27 18:37:38 -05:00
2dc078ad1d fix: bump deps in other dockerfiles drbh 2024-11-27 15:49:35 -05:00
063104c217 Fix test devshell Daniël de Kok 2024-11-27 19:51:55 +00:00
05004a6cfd Make the pure build work Daniël de Kok 2024-11-27 19:09:26 +00:00
98392a7a3f Cleanup impure Nix shell Daniël de Kok 2024-11-27 18:43:57 +00:00
167c6f06ab fix: include ffmpeg deps in autodocs workflow drbh 2024-11-27 11:28:30 -05:00
96968a0da3 fix: add ffmpeg overlay and enable build drbh 2024-11-27 00:14:45 -05:00
f0c38412d1 fix: add libavdevice dep to tests workflow drbh 2024-11-26 17:29:57 -05:00
4a76e8b8b4 fix: add libavfilter dep to test drbh 2024-11-26 17:26:10 -05:00
d5cc6707e0 fix: ensure pip is installed after installing deps in test workflow drbh 2024-11-26 17:16:50 -05:00
daf83a95c5 fix: adjust pkg config in test drbh 2024-11-26 17:16:00 -05:00
137f3bb2ef fix: adjust dependencies and bump pip along with python drbh 2024-11-26 16:56:04 -05:00

... 3 4 5 6 7 ...