2019-12-11 11:20:40 +08:00
import json
2026-02-22 10:02:35 +08:00
from pathlib import Path
2015-09-08 09:47:48 -04:00
2022-10-26 16:56:11 +08:00
import numpy as np
import pytest
import xgboost as xgb
from xgboost import testing as tm
2024-01-26 04:58:48 +08:00
from xgboost . core import Integer
2025-04-15 14:28:49 +08:00
from xgboost . testing . basic_models import run_custom_objective
2026-01-23 05:03:50 +08:00
from xgboost . testing . updater import get_basescore
2022-10-26 16:56:11 +08:00
2016-04-24 16:34:46 +09:00
2020-11-03 02:27:39 -05:00
class TestModels :
2016-04-24 16:34:46 +09:00
def test_glm ( self ) :
2024-12-06 16:48:17 +08:00
param = {
" objective " : " binary:logistic " ,
" booster " : " gblinear " ,
" alpha " : 0.0001 ,
" lambda " : 1 ,
" nthread " : 1 ,
}
2023-04-28 19:45:15 +08:00
dtrain , dtest = tm . load_agaricus ( __file__ )
2024-12-06 16:48:17 +08:00
watchlist = [ ( dtest , " eval " ) , ( dtrain , " train " ) ]
2016-04-24 16:34:46 +09:00
num_round = 4
bst = xgb . train ( param , dtrain , num_round , watchlist )
assert isinstance ( bst , xgb . core . Booster )
preds = bst . predict ( dtest )
labels = dtest . get_label ( )
2024-12-06 16:48:17 +08:00
err = sum (
1 for i in range ( len ( preds ) ) if int ( preds [ i ] > 0.5 ) != labels [ i ]
) / float ( len ( preds ) )
2016-11-20 18:23:19 -06:00
assert err < 0.2
2016-04-24 16:34:46 +09:00
2026-02-22 10:02:35 +08:00
def test_dart ( self , tmp_path : Path ) - > None :
2023-04-28 19:45:15 +08:00
dtrain , dtest = tm . load_agaricus ( __file__ )
2024-12-06 16:48:17 +08:00
param = {
" max_depth " : 5 ,
" objective " : " binary:logistic " ,
" eval_metric " : " logloss " ,
" booster " : " dart " ,
" verbosity " : 1 ,
}
2016-06-09 06:04:01 +09:00
# specify validations set to watch performance
2024-12-06 16:48:17 +08:00
watchlist = [ ( dtest , " eval " ) , ( dtrain , " train " ) ]
2016-06-09 06:04:01 +09:00
num_round = 2
bst = xgb . train ( param , dtrain , num_round , watchlist )
# this is prediction
2023-03-31 19:01:55 +08:00
preds = bst . predict ( dtest , iteration_range = ( 0 , num_round ) )
2016-06-09 06:04:01 +09:00
labels = dtest . get_label ( )
2024-12-06 16:48:17 +08:00
err = sum (
1 for i in range ( len ( preds ) ) if int ( preds [ i ] > 0.5 ) != labels [ i ]
) / float ( len ( preds ) )
2016-06-09 06:04:01 +09:00
# error must be smaller than 10%
assert err < 0.1
2026-02-22 10:02:35 +08:00
dtest_path = tmp_path / " dtest.dmatrix "
model_path = tmp_path / " xgboost.model.dart.ubj "
# save dmatrix into binary buffer
dtest . save_binary ( dtest_path )
# save model
bst . save_model ( model_path )
# load model and data in
bst2 = xgb . Booster ( params = param , model_file = model_path )
dtest2 = xgb . DMatrix ( dtest_path )
2020-08-22 13:18:48 +08:00
2023-03-31 19:01:55 +08:00
preds2 = bst2 . predict ( dtest2 , iteration_range = ( 0 , num_round ) )
2020-08-22 13:18:48 +08:00
2016-06-09 06:04:01 +09:00
# assert they are the same
assert np . sum ( np . abs ( preds2 - preds ) ) == 0
2020-01-13 08:48:30 -05:00
def my_logloss ( preds , dtrain ) :
labels = dtrain . get_label ( )
2024-12-06 16:48:17 +08:00
return " logloss " , np . sum ( np . log ( np . where ( labels , preds , 1 - preds ) ) )
2020-01-13 08:48:30 -05:00
# check whether custom evaluation metrics work
2024-12-06 16:48:17 +08:00
bst = xgb . train (
param , dtrain , num_round , evals = watchlist , custom_metric = my_logloss
)
2023-03-31 19:01:55 +08:00
preds3 = bst . predict ( dtest , iteration_range = ( 0 , num_round ) )
2020-01-13 08:48:30 -05:00
assert all ( preds3 == preds )
2016-06-09 06:04:01 +09:00
# check whether sample_type and normalize_type work
num_round = 50
2024-12-06 16:48:17 +08:00
param [ " learning_rate " ] = 0.1
param [ " rate_drop " ] = 0.1
2016-06-09 06:04:01 +09:00
preds_list = [ ]
2024-12-06 16:48:17 +08:00
for p in [
[ p0 , p1 ] for p0 in [ " uniform " , " weighted " ] for p1 in [ " tree " , " forest " ]
] :
param [ " sample_type " ] = p [ 0 ]
param [ " normalize_type " ] = p [ 1 ]
bst = xgb . train ( param , dtrain , num_round , evals = watchlist )
2023-03-31 19:01:55 +08:00
preds = bst . predict ( dtest , iteration_range = ( 0 , num_round ) )
2024-12-06 16:48:17 +08:00
err = sum (
1 for i in range ( len ( preds ) ) if int ( preds [ i ] > 0.5 ) != labels [ i ]
) / float ( len ( preds ) )
2016-06-09 06:04:01 +09:00
assert err < 0.1
preds_list . append ( preds )
for ii in range ( len ( preds_list ) ) :
for jj in range ( ii + 1 , len ( preds_list ) ) :
assert np . sum ( np . abs ( preds_list [ ii ] - preds_list [ jj ] ) ) > 0
2019-12-24 09:43:41 +08:00
def test_boost_from_prediction ( self ) :
# Re-construct dtrain here to avoid modification
2023-04-28 19:45:15 +08:00
margined , _ = tm . load_agaricus ( __file__ )
2024-01-26 04:58:48 +08:00
bst = xgb . train ( { " tree_method " : " hist " } , margined , 1 )
2019-12-24 09:43:41 +08:00
predt_0 = bst . predict ( margined , output_margin = True )
margined . set_base_margin ( predt_0 )
2024-01-26 04:58:48 +08:00
bst = xgb . train ( { " tree_method " : " hist " } , margined , 1 )
2019-12-24 09:43:41 +08:00
predt_1 = bst . predict ( margined )
assert np . any ( np . abs ( predt_1 - predt_0 ) > 1e-6 )
2023-04-28 19:45:15 +08:00
dtrain , _ = tm . load_agaricus ( __file__ )
2024-01-26 04:58:48 +08:00
bst = xgb . train ( { " tree_method " : " hist " } , dtrain , 2 )
2019-12-24 09:43:41 +08:00
predt_2 = bst . predict ( dtrain )
assert np . all ( np . abs ( predt_2 - predt_1 ) < 1e-6 )
2024-01-09 09:54:39 +08:00
def test_boost_from_existing_model ( self ) - > None :
2023-04-28 19:45:15 +08:00
X , _ = tm . load_agaricus ( __file__ )
2024-01-09 09:54:39 +08:00
booster = xgb . train ( { " tree_method " : " hist " } , X , num_boost_round = 4 )
2020-12-17 19:59:19 +08:00
assert booster . num_boosted_rounds ( ) == 4
2024-01-09 09:54:39 +08:00
booster . set_param ( { " tree_method " : " approx " } )
assert booster . num_boosted_rounds ( ) == 4
booster = xgb . train (
{ " tree_method " : " hist " } , X , num_boost_round = 4 , xgb_model = booster
)
2020-12-17 19:59:19 +08:00
assert booster . num_boosted_rounds ( ) == 8
2024-01-09 09:54:39 +08:00
with pytest . warns ( UserWarning , match = " `updater` " ) :
booster = xgb . train (
{ " updater " : " prune " , " process_type " : " update " } ,
X ,
num_boost_round = 4 ,
xgb_model = booster ,
)
2020-12-17 19:59:19 +08:00
# Trees are moved for update, the rounds is reduced. This test is
# written for being compatible with current code (1.0.0). If the
# behaviour is considered sub-optimal, feel free to change.
assert booster . num_boosted_rounds ( ) == 4
2024-01-09 09:54:39 +08:00
booster = xgb . train ( { " booster " : " gblinear " } , X , num_boost_round = 4 )
assert booster . num_boosted_rounds ( ) == 4
booster . set_param ( { " updater " : " coord_descent " } )
assert booster . num_boosted_rounds ( ) == 4
booster . set_param ( { " updater " : " shotgun " } )
assert booster . num_boosted_rounds ( ) == 4
booster = xgb . train (
{ " booster " : " gblinear " } , X , num_boost_round = 4 , xgb_model = booster
)
assert booster . num_boosted_rounds ( ) == 8
2025-04-15 14:28:49 +08:00
def test_custom_objective ( self ) - > None :
2023-04-28 19:45:15 +08:00
dtrain , dtest = tm . load_agaricus ( __file__ )
2025-04-15 14:28:49 +08:00
run_custom_objective ( " hist " , " cpu " , dtrain , dtest )
2021-06-09 14:51:17 +08:00
2025-04-15 14:28:49 +08:00
def test_multi_eval_metric ( self ) - > None :
2023-04-28 19:45:15 +08:00
dtrain , dtest = tm . load_agaricus ( __file__ )
2024-12-06 16:48:17 +08:00
watchlist = [ ( dtest , " eval " ) , ( dtrain , " train " ) ]
param = {
" max_depth " : 2 ,
" eta " : 0.2 ,
" verbosity " : 1 ,
" objective " : " binary:logistic " ,
}
param [ " eval_metric " ] = [ " auc " , " logloss " , " error " ]
2016-06-05 00:17:35 -05:00
evals_result = { }
2024-12-06 16:48:17 +08:00
bst = xgb . train ( param , dtrain , 4 , evals = watchlist , evals_result = evals_result )
2016-06-05 00:17:35 -05:00
assert isinstance ( bst , xgb . core . Booster )
2024-12-06 16:48:17 +08:00
assert len ( evals_result [ " eval " ] ) == 3
assert set ( evals_result [ " eval " ] . keys ( ) ) == { " auc " , " error " , " logloss " }
2016-06-05 00:17:35 -05:00
2016-04-24 16:34:46 +09:00
def test_fpreproc ( self ) :
2024-12-06 16:48:17 +08:00
param = { " max_depth " : 2 , " eta " : 1 , " objective " : " binary:logistic " }
2016-04-24 16:34:46 +09:00
num_round = 2
def fpreproc ( dtrain , dtest , param ) :
label = dtrain . get_label ( )
ratio = float ( np . sum ( label == 0 ) ) / np . sum ( label == 1 )
2024-12-06 16:48:17 +08:00
param [ " scale_pos_weight " ] = ratio
2016-04-24 16:34:46 +09:00
return ( dtrain , dtest , param )
2023-04-28 19:45:15 +08:00
dtrain , _ = tm . load_agaricus ( __file__ )
2024-12-06 16:48:17 +08:00
xgb . cv (
param ,
dtrain ,
num_round ,
nfold = 5 ,
metrics = { " auc " } ,
seed = 0 ,
fpreproc = fpreproc ,
)
2016-04-24 16:34:46 +09:00
def test_show_stdv ( self ) :
2024-12-06 16:48:17 +08:00
param = { " max_depth " : 2 , " eta " : 1 , " objective " : " binary:logistic " }
2016-04-24 16:34:46 +09:00
num_round = 2
2023-04-28 19:45:15 +08:00
dtrain , _ = tm . load_agaricus ( __file__ )
2024-12-06 16:48:17 +08:00
xgb . cv (
param ,
dtrain ,
num_round ,
nfold = 5 ,
metrics = { " error " } ,
seed = 0 ,
show_stdv = False ,
)
2016-04-29 13:51:34 +09:00
2026-02-22 10:02:35 +08:00
def test_prediction_cache ( self , tmp_path : Path ) - > None :
2023-03-14 22:09:36 +08:00
X , y = tm . make_sparse_regression ( 512 , 4 , 0.5 , as_dense = False )
Xy = xgb . DMatrix ( X , y )
param = { " max_depth " : 8 }
booster = xgb . train ( param , Xy , num_boost_round = 1 )
2026-02-22 10:02:35 +08:00
path = tmp_path / " model.json "
booster . save_model ( path )
2023-03-14 22:09:36 +08:00
2026-02-22 10:02:35 +08:00
predt_0 = booster . predict ( Xy )
2023-03-14 22:09:36 +08:00
2026-02-22 10:02:35 +08:00
param [ " max_depth " ] = 2
2023-03-14 22:09:36 +08:00
2026-02-22 10:02:35 +08:00
booster = xgb . train ( param , Xy , num_boost_round = 1 )
predt_1 = booster . predict ( Xy )
assert not np . isclose ( predt_0 , predt_1 ) . all ( )
2023-03-14 22:09:36 +08:00
2026-02-22 10:02:35 +08:00
booster . load_model ( path )
predt_2 = booster . predict ( Xy )
np . testing . assert_allclose ( predt_0 , predt_2 )
2023-03-14 22:09:36 +08:00
2016-04-29 13:51:34 +09:00
def test_feature_names_validation ( self ) :
X = np . random . random ( ( 10 , 3 ) )
y = np . random . randint ( 2 , size = ( 10 , ) )
2021-02-25 18:54:16 +08:00
dm1 = xgb . DMatrix ( X , y , feature_names = ( " a " , " b " , " c " ) )
dm2 = xgb . DMatrix ( X , y )
2016-04-29 13:51:34 +09:00
bst = xgb . train ( [ ] , dm1 )
bst . predict ( dm1 ) # success
2020-11-03 02:27:39 -05:00
with pytest . raises ( ValueError ) :
bst . predict ( dm2 )
2016-04-29 13:51:34 +09:00
bst . predict ( dm1 ) # success
bst = xgb . train ( [ ] , dm2 )
bst . predict ( dm2 ) # success
2019-12-11 11:20:40 +08:00
2023-12-28 22:45:13 +08:00
def test_special_model_dump_characters ( self ) - > None :
2023-08-14 15:49:00 +08:00
params = { " objective " : " reg:squarederror " , " max_depth " : 3 }
2023-12-28 22:45:13 +08:00
feature_names = [ ' " feature 0 " ' , " \t feature \n 1 " , """ feature " 2 " . """ ]
2023-08-14 15:49:00 +08:00
X , y , w = tm . make_regression ( n_samples = 128 , n_features = 3 , use_cupy = False )
Xy = xgb . DMatrix ( X , label = y , feature_names = feature_names )
booster = xgb . train ( params , Xy , num_boost_round = 3 )
2023-12-28 22:45:13 +08:00
2023-08-14 15:49:00 +08:00
json_dump = booster . get_dump ( dump_format = " json " )
assert len ( json_dump ) == 3
2023-12-28 22:45:13 +08:00
def validate_json ( obj : dict ) - > None :
2023-08-14 15:49:00 +08:00
for k , v in obj . items ( ) :
if k == " split " :
assert v in feature_names
elif isinstance ( v , dict ) :
2023-12-28 22:45:13 +08:00
validate_json ( v )
2023-08-14 15:49:00 +08:00
for j_tree in json_dump :
loaded = json . loads ( j_tree )
2023-12-28 22:45:13 +08:00
validate_json ( loaded )
dot_dump = booster . get_dump ( dump_format = " dot " )
for d in dot_dump :
assert d . find ( r " feature \" 2 \" " ) != - 1
text_dump = booster . get_dump ( dump_format = " text " )
for d in text_dump :
assert d . find ( r " feature \" 2 \" " ) != - 1
2023-08-14 15:49:00 +08:00
2022-03-29 02:32:42 +08:00
def run_slice (
self ,
booster : xgb . Booster ,
dtrain : xgb . DMatrix ,
num_parallel_tree : int ,
num_classes : int ,
2024-01-26 04:58:48 +08:00
num_boost_round : int ,
use_np_type : bool ,
2022-03-29 02:32:42 +08:00
) :
2020-11-03 02:27:39 -05:00
beg = 3
2024-01-26 04:58:48 +08:00
if use_np_type :
end : Integer = np . int32 ( 7 )
else :
end = 7
2022-03-29 02:32:42 +08:00
sliced : xgb . Booster = booster [ beg : end ]
2021-07-06 11:47:49 +08:00
assert sliced . feature_types == booster . feature_types
2020-11-03 02:27:39 -05:00
sliced_trees = ( end - beg ) * num_parallel_tree * num_classes
assert sliced_trees == len ( sliced . get_dump ( ) )
sliced_trees = sliced_trees / / 2
2022-03-29 02:32:42 +08:00
sliced = booster [ beg : end : 2 ]
2020-11-03 02:27:39 -05:00
assert sliced_trees == len ( sliced . get_dump ( ) )
2024-01-26 04:58:48 +08:00
sliced = booster [ beg : ]
2020-11-03 02:27:39 -05:00
sliced_trees = ( num_boost_round - beg ) * num_parallel_tree * num_classes
assert sliced_trees == len ( sliced . get_dump ( ) )
2022-03-29 02:32:42 +08:00
sliced = booster [ beg : ]
2020-11-03 02:27:39 -05:00
sliced_trees = ( num_boost_round - beg ) * num_parallel_tree * num_classes
assert sliced_trees == len ( sliced . get_dump ( ) )
2022-03-29 02:32:42 +08:00
sliced = booster [ : end ]
2020-11-03 02:27:39 -05:00
sliced_trees = end * num_parallel_tree * num_classes
assert sliced_trees == len ( sliced . get_dump ( ) )
2024-12-06 16:48:17 +08:00
sliced = booster [ : end ]
2020-11-03 02:27:39 -05:00
sliced_trees = end * num_parallel_tree * num_classes
assert sliced_trees == len ( sliced . get_dump ( ) )
2022-03-29 02:32:42 +08:00
with pytest . raises ( ValueError , match = r " >= 0 " ) :
booster [ - 1 : 0 ]
2020-11-03 02:27:39 -05:00
# we do not accept empty slice.
2023-03-27 23:10:54 +08:00
with pytest . raises ( ValueError , match = " Empty slice " ) :
2020-11-03 02:27:39 -05:00
booster [ 1 : 1 ]
# stop can not be smaller than begin
2022-03-29 02:32:42 +08:00
with pytest . raises ( ValueError , match = r " Invalid.* " ) :
2020-11-03 02:27:39 -05:00
booster [ 3 : 0 ]
2022-03-29 02:32:42 +08:00
with pytest . raises ( ValueError , match = r " Invalid.* " ) :
2020-11-03 02:27:39 -05:00
booster [ 3 : - 1 ]
# negative step is not supported.
2022-03-29 02:32:42 +08:00
with pytest . raises ( ValueError , match = r " .*>= 1.* " ) :
2020-11-03 02:27:39 -05:00
booster [ 0 : 2 : - 1 ]
# step can not be 0.
2022-03-29 02:32:42 +08:00
with pytest . raises ( ValueError , match = r " .*>= 1.* " ) :
2020-11-03 02:27:39 -05:00
booster [ 0 : 2 : 0 ]
trees = [ _ for _ in booster ]
assert len ( trees ) == num_boost_round
with pytest . raises ( TypeError ) :
2024-01-26 04:58:48 +08:00
booster [ " wrong type " ] # type: ignore
2020-11-03 02:27:39 -05:00
with pytest . raises ( IndexError ) :
2022-03-29 02:32:42 +08:00
booster [ : num_boost_round + 1 ]
2020-11-03 02:27:39 -05:00
with pytest . raises ( ValueError ) :
2022-03-29 02:32:42 +08:00
booster [ 1 , 2 ] # too many dims
2020-11-03 02:27:39 -05:00
# setitem is not implemented as model is immutable during slicing.
with pytest . raises ( TypeError ) :
2024-01-26 04:58:48 +08:00
booster [ : end ] = booster # type: ignore
2020-11-03 02:27:39 -05:00
sliced_0 = booster [ 1 : 3 ]
2021-02-08 18:26:32 +08:00
np . testing . assert_allclose (
booster . predict ( dtrain , iteration_range = ( 1 , 3 ) ) , sliced_0 . predict ( dtrain )
)
2020-11-03 02:27:39 -05:00
sliced_1 = booster [ 3 : 7 ]
2021-02-08 18:26:32 +08:00
np . testing . assert_allclose (
booster . predict ( dtrain , iteration_range = ( 3 , 7 ) ) , sliced_1 . predict ( dtrain )
)
2020-11-03 02:27:39 -05:00
predt_0 = sliced_0 . predict ( dtrain , output_margin = True )
predt_1 = sliced_1 . predict ( dtrain , output_margin = True )
2025-09-05 03:01:36 +08:00
# base score.
intercept = np . broadcast_to ( np . array ( get_basescore ( booster ) ) , predt_0 . shape )
merged = predt_0 + predt_1 - intercept
2020-11-03 02:27:39 -05:00
single = booster [ 1 : 7 ] . predict ( dtrain , output_margin = True )
np . testing . assert_allclose ( merged , single , atol = 1e-6 )
sliced_0 = booster [ 1 : 7 : 2 ] # 1,3,5
sliced_1 = booster [ 2 : 8 : 2 ] # 2,4,6
predt_0 = sliced_0 . predict ( dtrain , output_margin = True )
predt_1 = sliced_1 . predict ( dtrain , output_margin = True )
2025-09-05 03:01:36 +08:00
merged = predt_0 + predt_1 - intercept
2020-11-03 02:27:39 -05:00
single = booster [ 1 : 7 ] . predict ( dtrain , output_margin = True )
np . testing . assert_allclose ( merged , single , atol = 1e-6 )
2021-02-25 18:54:16 +08:00
2022-03-29 02:32:42 +08:00
@pytest.mark.skipif ( * * tm . no_sklearn ( ) )
2024-07-30 17:31:06 +08:00
@pytest.mark.parametrize ( " booster_name " , [ " gbtree " , " dart " ] )
def test_slice ( self , booster_name : str ) - > None :
2022-03-29 02:32:42 +08:00
from sklearn . datasets import make_classification
num_classes = 3
X , y = make_classification (
n_samples = 1000 , n_informative = 5 , n_classes = num_classes
)
dtrain = xgb . DMatrix ( data = X , label = y )
num_parallel_tree = 4
num_boost_round = 16
total_trees = num_parallel_tree * num_classes * num_boost_round
booster = xgb . train (
{
" num_parallel_tree " : num_parallel_tree ,
" subsample " : 0.5 ,
" num_class " : num_classes ,
2024-07-30 17:31:06 +08:00
" booster " : booster_name ,
2022-03-29 02:32:42 +08:00
" objective " : " multi:softprob " ,
} ,
num_boost_round = num_boost_round ,
dtrain = dtrain ,
)
booster . feature_types = [ " q " ] * X . shape [ 1 ]
assert len ( booster . get_dump ( ) ) == total_trees
2024-07-30 17:31:06 +08:00
assert booster [ . . . ] . num_boosted_rounds ( ) == num_boost_round
2024-01-26 04:58:48 +08:00
self . run_slice (
booster , dtrain , num_parallel_tree , num_classes , num_boost_round , False
)
2022-03-29 02:32:42 +08:00
bytesarray = booster . save_raw ( raw_format = " ubj " )
booster = xgb . Booster ( model_file = bytesarray )
2024-01-26 04:58:48 +08:00
self . run_slice (
booster , dtrain , num_parallel_tree , num_classes , num_boost_round , False
)
2022-03-29 02:32:42 +08:00
2021-02-25 18:54:16 +08:00
@pytest.mark.skipif ( * * tm . no_pandas ( ) )
2024-01-09 09:54:39 +08:00
@pytest.mark.parametrize ( " ext " , [ " json " , " ubj " ] )
2026-02-22 10:02:35 +08:00
def test_feature_info ( self , ext : str , tmp_path : Path ) - > None :
2021-02-25 18:54:16 +08:00
import pandas as pd
2023-12-28 22:45:13 +08:00
2024-01-09 09:54:39 +08:00
# make data
2021-02-25 18:54:16 +08:00
rows = 100
cols = 10
2025-04-15 14:28:49 +08:00
rng = np . random . RandomState ( 1994 )
2021-02-25 18:54:16 +08:00
X = rng . randn ( rows , cols )
y = rng . randn ( rows )
2024-01-09 09:54:39 +08:00
# Test with pandas, which has feature info.
2021-02-25 18:54:16 +08:00
feature_names = [ " test_feature_ " + str ( i ) for i in range ( cols ) ]
X_pd = pd . DataFrame ( X , columns = feature_names )
2023-04-11 00:17:34 +08:00
X_pd [ f " test_feature_ { 3 } " ] = X_pd . iloc [ : , 3 ] . astype ( np . int32 )
2021-02-25 18:54:16 +08:00
Xy = xgb . DMatrix ( X_pd , y )
2024-01-09 09:54:39 +08:00
assert Xy . feature_types is not None
2021-02-25 18:54:16 +08:00
assert Xy . feature_types [ 3 ] == " int "
booster = xgb . train ( { } , dtrain = Xy , num_boost_round = 1 )
assert booster . feature_names == Xy . feature_names
assert booster . feature_names == feature_names
assert booster . feature_types == Xy . feature_types
2026-02-22 10:02:35 +08:00
path = tmp_path / f " model. { ext } "
booster . save_model ( path )
booster = xgb . Booster ( )
booster . load_model ( path )
2021-02-25 18:54:16 +08:00
2026-02-22 10:02:35 +08:00
assert booster . feature_names == Xy . feature_names
assert booster . feature_types == Xy . feature_types
2024-01-09 09:54:39 +08:00
# Test with numpy, no feature info is set
Xy = xgb . DMatrix ( X , y )
assert Xy . feature_names is None
assert Xy . feature_types is None
booster = xgb . train ( { } , dtrain = Xy , num_boost_round = 1 )
assert booster . feature_names is None
assert booster . feature_types is None
# test explicitly set
fns = [ str ( i ) for i in range ( cols ) ]
booster . feature_names = fns
assert booster . feature_names == fns
2026-02-22 10:02:35 +08:00
path = tmp_path / f " model2. { ext } "
booster . save_model ( path )
2024-01-09 09:54:39 +08:00
2026-02-22 10:02:35 +08:00
booster = xgb . Booster ( model_file = path )
assert booster . feature_names == fns