2020-01-01 00:28:32 -05:00
# -*- coding:utf-8 -*-
# Author: hankcs
# Date: 2019-12-28 19:26
2023-05-23 18:03:24 -04:00
import sys
2020-01-01 00:28:32 -05:00
from os . path import abspath , join , dirname
from setuptools import find_packages , setup
this_dir = abspath ( dirname ( __file__ ) )
with open ( join ( this_dir , ' README.md ' ) , encoding = ' utf-8 ' ) as file :
long_description = file . read ( )
version = { }
with open ( join ( this_dir , " hanlp " , " version.py " ) ) as fp :
exec ( fp . read ( ) , version )
2022-09-28 17:38:18 -04:00
FASTTEXT = ' fasttext-wheel==0.9.2 '
2023-11-27 19:46:15 -08:00
sys_version_info = sys . version_info
2023-05-23 18:03:24 -04:00
2025-01-12 16:54:03 -08:00
EXTRAS = [ ]
if sys . platform in { ' darwin ' , ' win32 ' } :
if ( sys_version_info . major , sys_version_info . minor ) == ( 3 , 6 ) :
EXTRAS = [ ' tokenizers==0.10.3 ' ]
elif ( sys_version_info . major , sys_version_info . minor ) == ( 3 , 7 ) :
EXTRAS = [ ' safetensors<0.5 ' ] # Failed to build safetensors
2023-11-27 19:46:15 -08:00
2022-04-12 22:41:09 -04:00
extras_require = {
' amr ' : [
' penman==1.2.1 ' ,
' networkx>=2.5.1 ' ,
2022-04-15 12:29:06 -04:00
' perin-parser>=0.0.12 ' ,
2022-04-12 22:41:09 -04:00
] ,
2022-09-28 17:38:18 -04:00
' fasttext ' : [ FASTTEXT ] ,
2025-10-18 19:28:49 -07:00
' tf ' : [ FASTTEXT , ' tensorflow>=2.6.0,<2.14 ' , " transformers<4.55 " ] # TF is deprecated in Transformers and no longer maintained
2022-04-12 22:41:09 -04:00
}
2022-09-28 17:38:18 -04:00
extras_require [ ' full ' ] = list ( set ( sum ( extras_require . values ( ) , [ ] ) ) )
2022-04-12 22:41:09 -04:00
2020-01-01 00:28:32 -05:00
setup (
name = ' hanlp ' ,
version = version [ ' __version__ ' ] ,
description = ' HanLP: Han Language Processing ' ,
long_description = long_description ,
long_description_content_type = " text/markdown " ,
url = ' https://github.com/hankcs/HanLP ' ,
author = ' hankcs ' ,
author_email = ' hankcshe@gmail.com ' ,
license = ' Apache License 2.0 ' ,
classifiers = [
' Intended Audience :: Science/Research ' ,
2020-01-02 16:24:07 -05:00
' Intended Audience :: Developers ' ,
2021-12-28 21:14:50 -05:00
" Development Status :: 4 - Beta " ,
2020-01-01 00:28:32 -05:00
' Operating System :: OS Independent ' ,
" License :: OSI Approved :: Apache Software License " ,
2022-01-25 20:53:25 -05:00
' Programming Language :: Python :: 3.6 ' ,
' Programming Language :: Python :: 3.7 ' ,
' Programming Language :: Python :: 3.8 ' ,
' Programming Language :: Python :: 3.9 ' ,
2022-12-07 16:33:47 -05:00
' Programming Language :: Python :: 3.10 ' ,
2020-01-02 16:24:07 -05:00
' Topic :: Scientific/Engineering :: Artificial Intelligence ' ,
" Topic :: Text Processing :: Linguistic "
2020-01-01 00:28:32 -05:00
] ,
keywords = ' corpus,machine-learning,NLU,NLP ' ,
packages = find_packages ( exclude = [ ' docs ' , ' tests* ' ] ) ,
include_package_data = True ,
2020-02-16 10:58:06 -05:00
install_requires = [
' termcolor ' ,
' pynvml ' ,
' toposort==1.5 ' ,
2021-01-05 12:20:12 -05:00
' transformers>=4.1.1 ' ,
2022-11-04 06:25:08 -04:00
' sentencepiece>=0.1.91 ' , # Essential for tokenization_bert_japanese
2020-02-16 10:58:06 -05:00
' torch>=1.6.0 ' ,
2025-01-12 16:54:03 -08:00
' hanlp-common>=0.0.23 ' ,
2021-10-27 20:11:54 -04:00
' hanlp-trie>=0.0.4 ' ,
2021-03-04 22:38:11 -05:00
' hanlp-downloader ' ,
2025-01-12 16:54:03 -08:00
* EXTRAS ,
2020-02-16 10:58:06 -05:00
] ,
2022-04-12 22:41:09 -04:00
extras_require = extras_require ,
2020-01-01 00:28:32 -05:00
python_requires = ' >=3.6 ' ,
)