2023-08-28 16:24:14 +02:00
|
|
|
[project]
|
|
|
|
|
name = 'tokenizers'
|
|
|
|
|
requires-python = '>=3.7'
|
|
|
|
|
authors = [
|
|
|
|
|
{name = 'Nicolas Patry', email = 'patry.nicolas@protonmail.com'},
|
|
|
|
|
{name = 'Anthony Moi', email = 'anthony@huggingface.co'}
|
|
|
|
|
]
|
|
|
|
|
classifiers = [
|
|
|
|
|
"Development Status :: 5 - Production/Stable",
|
|
|
|
|
"Intended Audience :: Developers",
|
|
|
|
|
"Intended Audience :: Education",
|
|
|
|
|
"Intended Audience :: Science/Research",
|
|
|
|
|
"License :: OSI Approved :: Apache Software License",
|
|
|
|
|
"Operating System :: OS Independent",
|
|
|
|
|
"Programming Language :: Python :: 3",
|
|
|
|
|
"Programming Language :: Python :: 3.7",
|
|
|
|
|
"Programming Language :: Python :: 3.8",
|
|
|
|
|
"Programming Language :: Python :: 3.9",
|
|
|
|
|
"Programming Language :: Python :: 3.10",
|
|
|
|
|
"Programming Language :: Python :: 3.11",
|
|
|
|
|
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
|
|
|
]
|
|
|
|
|
keywords = ["NLP", "tokenizer", "BPE", "transformer", "deep learning"]
|
|
|
|
|
dynamic = [
|
|
|
|
|
'description',
|
|
|
|
|
'license',
|
|
|
|
|
'readme',
|
|
|
|
|
]
|
2023-11-10 13:51:07 +01:00
|
|
|
dependencies = ["huggingface_hub>=0.16.4,<1.0"]
|
2023-08-28 16:24:14 +02:00
|
|
|
|
|
|
|
|
[project.urls]
|
|
|
|
|
Homepage = 'https://github.com/huggingface/tokenizers'
|
|
|
|
|
Source = 'https://github.com/huggingface/tokenizers'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
[project.optional-dependencies]
|
2024-03-12 21:24:21 +11:00
|
|
|
testing = ["pytest", "requests", "numpy", "datasets", "black==22.3", "ruff"]
|
2023-08-28 16:24:14 +02:00
|
|
|
docs = ["sphinx", "sphinx_rtd_theme", "setuptools_rust"]
|
|
|
|
|
dev = ["tokenizers[testing]"]
|
|
|
|
|
|
|
|
|
|
|
2019-11-27 12:21:37 -05:00
|
|
|
[build-system]
|
2023-08-28 16:24:14 +02:00
|
|
|
requires = ["maturin>=1.0,<2.0"]
|
|
|
|
|
build-backend = "maturin"
|
|
|
|
|
|
|
|
|
|
[tool.maturin]
|
|
|
|
|
python-source = "py_src"
|
|
|
|
|
module-name = "tokenizers.tokenizers"
|
|
|
|
|
bindings = 'pyo3'
|
|
|
|
|
features = ["pyo3/extension-module"]
|
2020-09-23 11:58:35 -04:00
|
|
|
|
|
|
|
|
[tool.black]
|
2022-10-05 15:29:33 +02:00
|
|
|
line-length = 119
|
2023-08-28 16:24:14 +02:00
|
|
|
target-version = ['py35']
|
2024-03-12 21:24:21 +11:00
|
|
|
|
|
|
|
|
[tool.ruff]
|
|
|
|
|
line-length = 119
|
|
|
|
|
target-version = "py311"
|
|
|
|
|
lint.ignore = [
|
|
|
|
|
# a == None in tests vs is None.
|
|
|
|
|
"E711",
|
|
|
|
|
# a == False in tests vs is False.
|
|
|
|
|
"E712",
|
|
|
|
|
# try.. import except.. pattern without using the lib.
|
|
|
|
|
"F401",
|
|
|
|
|
# Raw type equality is required in asserts
|
|
|
|
|
"E721",
|
|
|
|
|
# Import order
|
|
|
|
|
"E402",
|
|
|
|
|
# Fixtures unused import
|
|
|
|
|
"F811",
|
|
|
|
|
]
|