pytorch-image-models

huggingface/pytorch-image-models

Fork 0

mirror of https://github.com/huggingface/pytorch-image-models.git synced 2026-03-27 06:31:08 +00:00

Commit Graph

Select branches

Hide Pull Requests

0.5.x

0.6.x

adafactor_bv

adamuon

aimv2

apple_mobileclip2

attention_pool_dim_handling

attn_update

augmentation_update

beit3_remap

beit3_remap_clean

bits_and_tpu

bulk_runner_tweaks

cache_dir

calculate_dpr

caojiaolong-main

cautious_optim

check_intermediate_feat

cleanup_fp32_norm

cleanup_vit_convert

cleanup_xla_model_fixes

clip_laion2b

convnext_and_copyright

convnext_nano_r384

convnext_shortcut

convnext_xxlarge_eps

convnext_zepto

convnextv2

convnormact_aa_none

coord_attn

corrected_weight_decay

csatv2-gusdlf93

dataset_info

dd_factory_kwargs

device_amp_cleanup

dfnvit

diff_parallel

diffattn

dinov2_reg4

dinov3

dist_loss_tracking

distill_fixups

dot_nine_cleanup

dtensor_muon

dynamic_resize_vit

edgenext_csp_and_more

efficientnet_attn

efficientnet_x

efficientnet_x_w

effnet_improve_features_only

endof2025_weights

eva

eva02

eva_input_size

eva_pe_integration

fcossio-vit-maxpool

feature_base

few_more_weights

fix_asymm_set_grad_enable

fix_drop_block

fix_hrnet_head

fix_imagenet22k_ms_mapping

fix_levit_load

fix_mnv4_query_strides

fix_onnx

fix_out_indices_order

fix_parallel_scaling_bias

fix_rope_xy

fix_swin_pad

fix_tests

fix_tests_210

fix_window_trace

fixes-syncbn_pretrain_cfg_resolve

fixes_bce_regnet

florence2

focalnet_and_swin_refactor

fp32_norm

freeze_norm_revisit

friday_weights

gemma3n_enc_weights

gh-pages

grodino-dataset_trust_remote

group_size_eff

half_prec_trainval

hf_hub_revisit

hparam_docs

improve_attn_mask

inception_next

inference_tests

init_non_persistent_buffers

intern300m

kron_flatten

kron_optimizer

legacy_adadamw_update

levit_efficientformer_redux

local_dir

main

mambaout

mars_tweak

maxxvit_hub

mega_merge

mesa_ema

meshgrid_ndgrid

metaformer_baselines_for_vision

misc_fixes_2026_02

mnv4_first_weights

mnv5_bias_str_norm

mobileclip_hub_id

mobilenetv5

mobilenetv5_final_uib_pad

mobileone

model_cleanup_docstrings_anno

more_attn

more_datasets

more_mobile

more_naflex

more_opt_stuff

more_tiny_test_models

more_vit

multi-weight

multi-weight_effnet_convnext

muon

muon_fallback

mw-deit

mw-final

mw-mlp_mixer

mw-more

mw-resnet

naflex

naflex_in1k_weights

naflex_resampling_speedups

naflex_rope

naflex_siglip2_weights

no_opt_layer_decay

non_reentrant_ckpt

norm_mlp_classifier

norm_norm_norm

onnx_dynamo

onnx_export

openai_clip_resnet

openclip_weight_move

opt_mars_more

other_pool

parse_kwargs

parse_kwargs2

patch_drop_refactor

perceiver

pos_embed_type_safe

preact_resnet18

prr_pool

pyproject_pdm

refactor-imports

refactor_pre_logits

remove_clamp_min

resnetv2_18_34

rope_vit

samvit_fix_and_rope

sbb2_vit_hiera_weights

scaling_vit

scheduler_types

script_cleanup

siglip2

siglip_update

sknet_kwargs_merge

small_384_weights

small_things

so150m2

ssl_tasks

stream_device

swin_maxvit_resize

swin_v2

tiny_test_models

token_distill_task

transforms_dataset_updates

update-test-pytorch

update_clip_pretrained

update_results_2025

update_test_workflow

vibe_rsvit

vit_and_bit_test_fixes

vit_packed

vit_relpos_refactor

vit_siglip_and_reg

warmup_prefix_fix

weights_only

weights_only_default_true

wip-voidbag-accumulate-grad

xsepconv

yehuitang-Add-GhostNetV2

yuweihao-inception_next_atto

#1

#1007

#101

#1012

#1014

#1034

#1037

#105

#1055

#1056

#1056

#1058

#1061

#1063

#1063

#1069

#1083

#1091

#1094

#1105

#1112

#1113

#115

#1150

#117

#1187

#1190

#1194

#12

#1208

#1210

#1212

#1213

#1213

#122

#1222

#1229

#123

#1230

#1233

#1239

#1245

#1249

#125

#1251

#1253

#1256

#1259

#1265

#1266

#1270

#1271

#1273

#1274

#1276

#1277

#1294

#13

#1317

#132

#1322

#1327

#1330

#1336

#1340

#1345

#1349

#1351

#1354

#136

#1363

#1365

#137

#1381

#1383

#1385

#140

#141

#1414

#1415

#142

#1420

#143

#1439

#145

#146

#1467

#1471

#1473

#1476

#1479

#148

#1483

#1493

#1494

#1498

#150

#1500

#1502

#1520

#1522

#1525

#1527

#1532

#1538

#154

#1545

#1547

#155

#1552

#1553

#156

#1562

#1564

#1565

#1575

#1578

#1581

#1582

#1583

#1586

#1590

#1592

#1593

#16

#1606

#1612

#1614

#1617

#1621

#1622

#1624

#1627

#1628

#163

#1630

#1633

#1641

#1643

#1645

#1647

#1649

#1654

#1655

#1659

#1662

#1673

#1674

#1677

#1679

#1680

#1683

#1685

#1687

#1688

#1690

#1691

#1696

#1708

#1714

#1715

#1719

#1720

#1720

#1736

#1741

#1745

#1746

#1747

#1748

#175

#1756

#1760

#1761

#1766

#1778

#1784

#1785

#1789

#179

#1793

#1797

#1799

#1804

#1812

#1823

#183

#1830

#1834

#1835

#1837

#1841

#1846

#1847

#1850

#1852

#1866

#1873

#1876

#1888

#1889

#1890

#1894

#19

#1900

#1903

#191

#1911

#1913

#1914

#1916

#1918

#1919

#1920

#1923

#1925

#1926

#1930

#1937

#1945

#195

#1951

#1952

#1954

#1955

#1958

#1961

#1964

#1965

#1967

#1971

#1973

#1974

#1975

#1978

#1980

#1985

#1988

#199

#1993

#1995

#1996

#1998

#2

#2001

#2002

#2003

#2005

#2008

#2010

#2011

#2012

#2014

#2015

#2016

#2017

#2019

#2020

#2023

#2029

#2032

#2032

#2033

#2034

#2036

#2037

#2039

#2040

#2041

#2043

#2045

#2045

#2048

#2049

#2050

#2051

#2052

#2053

#2058

#2062

#2062

#2064

#2071

#2077

#2079

#208

#2082

#2089

#2090

#2091

#2092

#2096

#2098

#2102

#2105

#2108

#2109

#2111

#2113

#2117

#212

#2121

#2123

#2124

#2126

#213

#2136

#2138

#214

#2142

#2143

#2144

#2145

#2156

#216

#2162

#2164

#2165

#2167

#2167

#2168

#2169

#2174

#2175

#2176

#2176

#2178

#218

#2180

#2181

#2182

#2183

#2189

#2191

#2193

#2195

#2196

#2197

#2198

#2199

#2200

#2202

#2203

#2205

#2207

#2209

#2213

#2214

#2217

#2220

#2223

#2225

#2229

#2230

#2236

#2238

#2239

#2240

#2243

#2245

#2246

#2252

#2253

#2257

#2258

#2263

#2264

#2266

#227

#2274

#2275

#2277

#228

#2281

#2285

#2292

#2297

#2298

#2299

#23

#2304

#2305

#2308

#2311

#2312

#2314

#2316

#2317

#2319

#2319

#2320

#2321

#2322

#2325

#2326

#2328

#233

#2331

#2333

#2334

#2336

#2337

#2340

#2341

#2343

#2346

#2347

#2349

#2350

#2350

#2351

#2353

#2356

#2357

#2358

#2359

#2360

#2361

#2369

#237

#2372

#2372

#2376

#2379

#2381

#2382

#2384

#2386

#2386

#2387

#2388

#2389

#2394

#2396

#2397

#2398

#2399

#24

#240

#2400

#2402

#2403

#2404

#2405

#2406

#2408

#2409

#2412

#2413

#2415

#2416

#2417

#2418

#2418

#2421

#2423

#2426

#2426

#2427

#2429

#2431

#2433

#2439

#244

#2440

#2441

#2442

#2444

#2448

#2466

#2467

#2471

#2474

#2475

#2476

#2477

#2478

#2480

#2482

#2483

#2484

#2486

#2487

#2489

#2490

#2490

#2496

#2499

#250

#2501

#2502

#2503

#2504

#2507

#2508

#2509

#2510

#2511

#2515

#2516

#2517

#2518

#2519

#2523

#2526

#2527

#2529

#2532

#2533

#2534

#2535

#2536

#2537

#2538

#2539

#2541

#2543

#2544

#2545

#255

#2552

#2553

#2554

#2559

#256

#2560

#2561

#2563

#2564

#2568

#2569

#2571

#2574

#2574

#2576

#2578

#2579

#2585

#2589

#2591

#2592

#2595

#2596

#2598

#2599

#2602

#2603

#2605

#2606

#2610

#2614

#2617

#2620

#2621

#2624

#2625

#2626

#2627

#2629

#263

#2630

#2631

#2632

#2633

#2634

#2635

#2636

#2637

#2638

#2639

#2640

#2641

#2642

#2643

#2645

#2647

#2648

#2649

#2650

#2650

#2654

#2655

#2656

#2656

#2657

#2658

#2662

#2664

#2665

#2665

#2666

#2668

#2669

#2672

#2673

#2674

#2675

#2676

#2677

#2677

#2678

#2679

#268

#2680

#2683

#2684

#2685

#2686

#282

#284

#286

#29

#290

#297

#30

#300

#302

#31

#315

#32

#322

#323

#33

#334

#335

#35

#352

#367

#382

#389

#401

#413

#418

#419

#421

#425

#427

#429

#434

#437

#440

#447

#450

#459

#475

#489

#494

#497

#501

#502

#510

#52

#525

#528

#53

#533

#537

#542

#548

#550

#554

#555

#556

#561

#571

#580

#581

#595

#6

#603

#609

#612

#613

#62

#625

#626

#636

#637

#645

#647

#65

#651

#652

#653

#659

#66

#660

#668

#669

#679

#681

#685

#693

#7

#702

#714

#729

#731

#738

#74

#746

#747

#750

#768

#771

#772

#800

#801

#804

#804

#805

#808

#812

#813

#816

#817

#82

#821

#828

#83

#841

#842

#853

#858

#87

#874

#875

#876

#88

#880

#882

#891

#898

#910

#913

#914

#915

#933

#94

#95

#964

#967

#968

#986

#989

#99

#993

v0.1-attn-weights

v0.1-cadene

v0.1-coat-weights

v0.1-dnf-weights

v0.1-effv2-weights

v0.1-ger-weights

v0.1-hrnet

v0.1-mvit-weights

v0.1-pit-weights

v0.1-regnet

v0.1-repvgg-weights

v0.1-res2net

v0.1-resnest

v0.1-rexnet

v0.1-rs-weights

v0.1-rsb-weights

v0.1-selecsls

v0.1-tpu-weights

v0.1-tresnet

v0.1-vitjx

v0.1-vt3p-weights

v0.1-weights

v0.1-weights-maxx

v0.1-weights-morevit

v0.1-weights-swinv2

v0.1.1

v0.2.1

v0.3.3

v0.3.4

v0.4.12

v0.4.5

v0.4.9

v0.5.4

v0.6.11

v0.6.12

v0.6.13

v0.6.5

v0.6.7

v0.8.10dev0

v0.8.13dev0

v0.8.17dev0

v0.8.2dev0

v0.8.6dev0

v0.9.0

v0.9.1

v0.9.10

v0.9.11

v0.9.12

v0.9.16

v0.9.2

v0.9.5

v0.9.6

v0.9.7

v0.9.8

v0.9.9

v1.0.10

v1.0.11

v1.0.12

v1.0.13

v1.0.14

v1.0.15

v1.0.16

v1.0.17

v1.0.18

v1.0.19

v1.0.20

v1.0.21

v1.0.22

v1.0.23

v1.0.24

v1.0.25

v1.0.26

v1.0.3

v1.0.7

v1.0.8

v1.0.9

2d0ac6f567 Merge pull request #2397 from huggingface/half_prec_trainval Ross Wightman 2025-01-07 11:48:02 -08:00
1969528296 Fix dtype log when default (None) is used w/o AMP half_prec_trainval Ross Wightman 2025-01-07 11:47:22 -08:00
92f610c982 Add half-precision (bfloat16, float16) support to train & validate scripts. Should push dtype handling into model factory / pretrained load at some point... Ross Wightman 2025-01-07 10:25:14 -08:00
40c19f3939 Add wandb project name argument and allow change wandb run name Jiao-Long Cao 2025-01-07 16:43:34 +08:00
6f80214e80 Merge pull request #2394 from huggingface/non_reentrant_ckpt Ross Wightman 2025-01-06 14:44:06 -08:00
155f6e7fea Update README, few minor fixups. non_reentrant_ckpt Ross Wightman 2025-01-06 13:09:15 -08:00
2b251fb291 Wrap torch checkpoint() fn to default use_reentrant flag to False and allow env var override Ross Wightman 2025-01-06 11:28:39 -08:00
131518c15c Add comments to MLP layers re expected layouts Ross Wightman 2025-01-02 09:41:35 -08:00
d23facd697 Merge pull request #2388 from laclouis5/fix-mqa-v2 Ross Wightman 2025-01-02 07:48:35 -08:00
2d5277e858 Merge branch 'main' into fix-mqa-v2 Louis Lac 2025-01-02 00:11:22 +01:00
2d734d9058 Fixed unfused attn2d scale Louis Lac 2025-01-01 12:03:34 +01:00
6171e756d3 Fix MQA V2 scale and out shape Louis Lac 2025-01-01 15:37:28 +01:00
851e0746a9 Update README.md Ross Wightman 2024-12-31 14:12:16 -08:00
e846b2cf28 Add 384x384 in12k pretrain and finetune for convnext_nano Ross Wightman 2024-12-31 11:00:44 -08:00
2bd531e033 Add 384x384 in12k pretrain and finetune for convnext_nano convnext_nano_r384 Ross Wightman 2024-12-31 11:00:44 -08:00
dafe866047 Update README.md Ross Wightman 2024-12-31 10:19:43 -08:00
52595a9641 Update README.md Ross Wightman 2024-12-31 10:10:52 -08:00
1245b83924 fix: minor typos in UPGRADING Ruida Zeng 2024-12-31 03:01:51 -06:00
8fd2f48b65 fix: minor typos in README Ruida Zeng 2024-12-31 02:37:35 -06:00
b0068ba5d0 Switch hf hub entries for new aimv2 / dfn weights to point to timm locations. Undo forced device for SDR linspace, part of another change. Ross Wightman 2024-12-30 16:59:55 -08:00
cc7fd34015 test filter tweaks Ross Wightman 2024-12-30 16:09:31 -08:00
1bf84b35c3 Update tests for aimv2 filtering Ross Wightman 2024-12-30 15:34:03 -08:00
b33418713a Add (almost) full set of aimv2 model instances. Switch back to unpacked SwiGLU. Verify correctness. Add DFN L/14 39B weight. Ross Wightman 2024-12-30 14:23:20 -08:00
de35fd87f5 Add SimpleNorm to create_norm factory Ross Wightman 2024-12-30 14:22:42 -08:00
d5375ca769 Use torch F.rms_norm when possible, select fast vs normal paths appropriately and test with torchscript Ross Wightman 2024-12-29 14:05:07 -08:00
5f12a25114 Add bias arg to Vitamin GeGLU Ross Wightman 2024-12-29 09:01:46 -08:00
5804d92e4b Switch aimv2 to used packed SwiGLU Ross Wightman 2024-12-28 21:05:38 -08:00
15406a939e Fixing RmsNorm to fix #2380 and noticed with aimv2 when comparing outputs. Still some work to do, need to look at AMP / fast mode behaviour, dispatch to torch when possible. Add SimpleNorm for 'LayerNorm w/o centering and bias' Ross Wightman 2024-12-28 21:03:49 -08:00
a648a04834 Supporting aimv2 encoders Ross Wightman 2024-12-27 14:01:13 -08:00
eb84e4b571 Switch hf hub entries for new aimv2 / dfn weights to point to timm locations. Undo forced device for SDR linspace, part of another change. aimv2 Ross Wightman 2024-12-30 16:59:55 -08:00
874037e675 test filter tweaks Ross Wightman 2024-12-30 16:09:31 -08:00
cb294c83a8 Update tests for aimv2 filtering Ross Wightman 2024-12-30 15:34:03 -08:00
1d6ebeb102 Add (almost) full set of aimv2 model instances. Switch back to unpacked SwiGLU. Verify correctness. Add DFN L/14 39B weight. Ross Wightman 2024-12-30 14:23:20 -08:00
a4146b79d1 Add SimpleNorm to create_norm factory Ross Wightman 2024-12-30 14:22:42 -08:00
3a6661ac78 fix broken image link ariG23498 2024-12-30 12:55:38 +05:30
5809c2fe5e Use torch F.rms_norm when possible, select fast vs normal paths appropriately and test with torchscript Ross Wightman 2024-12-29 14:05:07 -08:00
e0cacbfd15 Add bias arg to Vitamin GeGLU Ross Wightman 2024-12-29 09:01:46 -08:00
0d87caefff Switch aimv2 to used packed SwiGLU Ross Wightman 2024-12-28 21:05:38 -08:00
04a484a895 Fixing RmsNorm to fix #2380 and noticed with aimv2 when comparing outputs. Still some work to do, need to look at AMP / fast mode behaviour, dispatch to torch when possible. Add SimpleNorm for 'LayerNorm w/o centering and bias' Ross Wightman 2024-12-28 21:03:49 -08:00
e752b5d07c Supporting aimv2 encoders Ross Wightman 2024-12-27 14:01:13 -08:00
790decc89b Add more pali(2) weights. Switch rest of models adapting open_clip weights to their own weight instances. Ross Wightman 2024-12-27 12:05:22 -08:00
01cf0f72af Add support for tag, license customization through push_to_hub Ross Wightman 2024-12-27 12:04:04 -08:00
b12ecbd614 Move siglip timm weights to own repos Ross Wightman 2024-12-23 17:40:21 -08:00
6fb7aaf37d Switching to timm specific weight instances for open_clip image encoders to facilitate hf-hub: use in timm and new transformers TimmWrapper Ross Wightman 2024-12-23 16:52:08 -08:00
364c567dd2 Merge pull request #2357 from huggingface/more_opt_stuff Ross Wightman 2024-12-27 12:54:02 -08:00
5cf022f228 Add more pali(2) weights. Switch rest of models adapting open_clip weights to their own weight instances. openclip_weight_move Ross Wightman 2024-12-27 12:05:22 -08:00
4f4f40baa6 Add support for tag, license customization through push_to_hub Ross Wightman 2024-12-27 12:04:04 -08:00
7533a7f0c2 Move siglip timm weights to own repos Ross Wightman 2024-12-23 17:40:21 -08:00
447147a25b Switching to timm specific weight instances for open_clip image encoders to facilitate hf-hub: use in timm and new transformers TimmWrapper Ross Wightman 2024-12-23 16:52:08 -08:00
d285526dc9 Lazy loader for TF, more LAB fiddling augmentation_update Ross Wightman 2024-12-23 13:24:11 -08:00
a02b1a8e79 Merge pull request #2369 from brianhou0208/fix_reduction Ross Wightman 2024-12-18 16:51:53 -08:00
3fbbd511e6 Testing some LAB stuff Ross Wightman 2024-12-18 16:49:17 -08:00
3b181b78d1 Updating augmentations, esp randaug to support full torch.Tensor pipeline Ross Wightman 2024-12-18 12:24:04 -08:00
ab0a70dfff fix feature_info.reduction Ryan 2024-12-18 21:12:40 +08:00
ea231079f5 Merge pull request #2361 from huggingface/grodino-dataset_trust_remote Ross Wightman 2024-12-06 12:06:56 -08:00
7573096eb8 Make sure trust_remote code only passed to HF datasets. Improve some docstrings. grodino-dataset_trust_remote Ross Wightman 2024-12-06 11:40:04 -08:00
95d903fd87 Merge branch 'main' of github.com:grodino/pytorch-image-models into grodino-dataset_trust_remote Ross Wightman 2024-12-06 11:14:26 -08:00
9eee47de52 Back to dev version Ross Wightman 2024-12-06 10:44:41 -08:00
9383f2880d Add cache_dir example Álvaro Justen (@turicas) 2024-12-05 23:15:54 -03:00
d1e9a8622a Rename inception_next_atto pretrained str Ross Wightman 2024-12-06 10:08:03 -08:00
0576175d85 Add inception_next_atto Weihao Yu 2024-12-06 14:22:29 +08:00
9cec2f17cd Merge pull request #2358 from turicas/cache_dir cache_dir Ross Wightman 2024-12-06 10:25:29 -08:00
7ab2b938e5 More tweaks to docstrings for hub/builder Ross Wightman 2024-12-06 08:58:02 -08:00
dc1bb05e8e Punch cache_dir through model factory / builder / pretrain helpers. Improve some annotations in related code. Ross Wightman 2024-12-04 22:02:40 -08:00
e90b68b603 Rename inception_next_atto pretrained str yuweihao-inception_next_atto Ross Wightman 2024-12-06 10:08:03 -08:00
b09f81c8cb More tweaks to docstrings for hub/builder Ross Wightman 2024-12-06 08:58:02 -08:00
d7a7ed7ba9 Add inception_next_atto Weihao Yu 2024-12-06 14:22:29 +08:00
a1d219c1c3 Add cache_dir example Álvaro Justen (@turicas) 2024-12-05 23:15:54 -03:00
afdf11d9ae Add caution to Adan. Add decouple decay option to LAMB. more_opt_stuff Ross Wightman 2024-12-05 13:50:30 -08:00
71849b972a Punch cache_dir through model factory / builder / pretrain helpers. Improve some annotations in related code. Ross Wightman 2024-12-04 22:02:40 -08:00
553ded5c6b Version 1.0.12 v1.0.12 Ross Wightman 2024-12-03 10:34:38 -08:00
464885e135 See if we can avoid some model / layer pickle issues with the aa attr in ConvNormAct Ross Wightman 2024-12-02 16:55:29 -08:00
ceaff7668e See if we can avoid some model / layer pickle issues with the aa attr in ConvNormAct convnormact_aa_none Ross Wightman 2024-12-02 16:55:29 -08:00
5fe5f9d488 Add a different mnv4 conv-small weight Ross Wightman 2024-12-02 16:14:37 -08:00
303f7691a1 Add cautious mars, improve test reliability by skipping grad diff for first step Ross Wightman 2024-12-02 09:38:25 -08:00
9fc8bac3d2 Add cautious mars, improve test reliability by skipping grad diff for first step mars_tweak Ross Wightman 2024-12-02 09:38:25 -08:00
82e8677690 Make LaProp weight decay match typical PyTorch 'decoupled' behaviour where it's scaled by LR Ross Wightman 2024-11-29 16:44:43 -08:00
886eb77938 Update README, missed small discrep in adafactor min dim update Ross Wightman 2024-11-29 10:57:47 -08:00
e3e434bbc4 To be technically correct, need to check the in-place _ ver of op Ross Wightman 2024-11-28 13:46:17 -08:00
7c32d3bd82 Work around _foreach_maximum issue, need scalar other support Ross Wightman 2024-11-28 13:39:44 -08:00
7cf683628f Cautious optimizer impl plus some typing cleanup. Ross Wightman 2024-11-28 12:34:51 -08:00
9b27f84876 To be technically correct, need to check the in-place _ ver of op cautious_optim Ross Wightman 2024-11-28 13:46:17 -08:00
b0a121bed0 Work around _foreach_maximum issue, need scalar other support Ross Wightman 2024-11-28 13:39:44 -08:00
3086dd03fd Cautious optimizer impl plus some typing cleanup. Ross Wightman 2024-11-28 12:34:51 -08:00
aeb1ed7a15 Keep basic optim test LR range closer to before w/ updated code Ross Wightman 2024-11-26 13:40:20 -08:00
7a165fcb62 Remove rogue import, thanks IDE :/ Ross Wightman 2024-11-26 12:20:20 -08:00
73d10ab482 Update tests, need handling for radamw with older PyTorch, need to back-off basic test LR in mars? Ross Wightman 2024-11-26 12:13:21 -08:00
09bc21774e Update optimizers.mdx Ross Wightman 2024-11-26 11:18:30 -08:00
4f64ec4e14 Add guard around 'somewhat' newer torch RAdam / NAdam imports Ross Wightman 2024-11-26 11:10:42 -08:00
0903d98162 Reduce tolerance on model inference 'owl' test, pillow output varies a lot, was failing locally Ross Wightman 2024-11-26 10:55:52 -08:00
1ab02a11a1 Update Adan with newer impl (from original source) that includes multi-tensor fn Ross Wightman 2024-11-26 10:55:20 -08:00
a024ab3170 Replace radam & nadam impl with torch.optim ver, rename legacy adamw, nadam, radam impl in timm. Update optim factory & tests. Ross Wightman 2024-11-26 10:54:17 -08:00
7b54eab807 Add MARS and LaProp impl, simplified from originals Ross Wightman 2024-11-26 10:51:53 -08:00
e5aea357b1 Update Adopt to include clipping for stability, separate wd so no param decay if update not taken on first step Ross Wightman 2024-11-26 10:42:01 -08:00
1a70036691 Keep basic optim test LR range closer to before w/ updated code opt_mars_more Ross Wightman 2024-11-26 13:40:20 -08:00
269bc084fa Remove rogue import, thanks IDE :/ Ross Wightman 2024-11-26 12:20:20 -08:00
bc7d2247bf Update tests, need handling for radamw with older PyTorch, need to back-off basic test LR in mars? Ross Wightman 2024-11-26 12:13:21 -08:00
7d3146b97b Update optimizers.mdx Ross Wightman 2024-11-26 11:18:30 -08:00
444c506ce3 Merge pull request #2346 from JohannesTheo/patch-1 Ross Wightman 2024-11-26 11:15:17 -08:00
835a1a60ab Add guard around 'somewhat' newer torch RAdam / NAdam imports Ross Wightman 2024-11-26 11:10:42 -08:00

... 5 6 7 8 9 ...