#include "gtest/gtest.h" #include "taichi/ir/statements.h" #include "taichi/ir/transforms.h" #include "tests/cpp/program/test_program.h" namespace taichi::lang { TEST(Half2Vectorization, Ndarray) { // Basic tests within a basic block TestProgram test_prog; test_prog.setup(); auto block = std::make_unique(); auto func = []() {}; auto kernel = std::make_unique(*test_prog.prog(), func, "fake_kernel"); auto half2_type = TypeFactory::get_instance().create_tensor_type({2}, PrimitiveType::f16); auto argload_stmt = block->push_back( std::vector{0} /*arg_id*/, PrimitiveType::f16, /*is_ptr*/ true, /*create_load*/ false, /*arg_depth*/ 0); argload_stmt->ret_type = half2_type; auto const_0_stmt = block->push_back(TypedConstant(0)); std::vector external_ptr_indices0 = {const_0_stmt}; auto external_ptr_stmt_0 = block->push_back(argload_stmt, external_ptr_indices0); external_ptr_stmt_0->ret_type = half2_type; external_ptr_stmt_0->ret_type.set_is_pointer(true); auto val_0_stmt = block->push_back(TypedConstant(10)); auto val_1_stmt = block->push_back(TypedConstant(20)); std::vector values = {val_0_stmt, val_1_stmt}; auto matrix_stmt = block->push_back(values); matrix_stmt->ret_type = half2_type; auto atomic_stmt = block->push_back( AtomicOpType::add, external_ptr_stmt_0, matrix_stmt); atomic_stmt->ret_type = half2_type; /* Before: <[Tensor (2) f16]> $0 = argaddr[0] $1 = const 0 <*[Tensor (2) f16]> $2 = external_ptr $0, [$1] layout=AOS is_grad=false $3 = const 10 $4 = const 20 <[Tensor (2) f16]> $5 = [$3, $4] <[Tensor (2) f16]> $6 = atomic add($2, $5) */ irpass::scalarize(block.get(), true /*half2_optimization_enabled*/); CompileConfig config; irpass::full_simplify(block.get(), config, {false, false}); /* After: <[Tensor (2) f16]> $0 = argaddr[0] $1 = const 0 <*[Tensor (2) f16]> $2 = external_ptr $0, [$1] layout=AOS is_grad=false $3 = const 10 $4 = const 20 <[Tensor (2) f16]> $5 = [$3, $4] <[Tensor (2) f16]> $6 = atomic add($2, $5) */ EXPECT_EQ(block->size(), 7); // Check for scalarized statements EXPECT_EQ(block->statements[5]->is(), true); EXPECT_EQ(block->statements[2]->is(), true); EXPECT_EQ(block->statements[6]->is(), true); } TEST(Half2Vectorization, GlobalTemporary) { // Basic tests within a basic block TestProgram test_prog; test_prog.setup(); auto block = std::make_unique(); auto func = []() {}; auto kernel = std::make_unique(*test_prog.prog(), func, "fake_kernel"); auto half2_type = TypeFactory::get_instance().create_tensor_type({2}, PrimitiveType::f16); auto val_0_stmt = block->push_back(TypedConstant(10)); auto val_1_stmt = block->push_back(TypedConstant(20)); std::vector values = {val_0_stmt, val_1_stmt}; auto matrix_stmt = block->push_back(values); matrix_stmt->ret_type = half2_type; auto global_temp_stmt_0 = block->push_back(0, half2_type); block->push_back(AtomicOpType::add, global_temp_stmt_0, matrix_stmt); irpass::type_check(block.get(), CompileConfig()); /* Before: $0 = const 10 $1 = cast_value $0 $2 = const 20 $3 = cast_value $2 <[Tensor (2) f16]> $4 = [$1, $3] <*[Tensor (2) f16]> $5 = global tmp var (offset = 0 B) <[Tensor (2) f16]> $6 = atomic add($5, $4) */ irpass::scalarize(block.get(), true /*half2_optimization_enabled*/); CompileConfig config; irpass::full_simplify(block.get(), config, {false, false}); /* After: $0 = const 10.0 $1 = const 20.0 <[Tensor (2) f16]> $2 = [$0, $1] <*[Tensor (2) f16]> $3 = global tmp var (offset = 0 B) <[Tensor (2) f16]> $4 = atomic add($3, $2) */ EXPECT_EQ(block->size(), 5); // Check for scalarized statements EXPECT_EQ(block->statements[2]->is(), true); EXPECT_EQ(block->statements[3]->is(), true); EXPECT_EQ(block->statements[4]->is(), true); } TEST(Half2Vectorization, Field) { // Basic tests within a basic block TestProgram test_prog; test_prog.setup(); auto block = std::make_unique(); auto func = []() {}; auto kernel = std::make_unique(*test_prog.prog(), func, "fake_kernel"); auto get_root = block->push_back(); auto linearized_empty = block->push_back(std::vector(), std::vector()); SNode root(0, SNodeType::root); root.insert_children(SNodeType::place); root.insert_children(SNodeType::place); auto lookup = block->push_back(&root, get_root, linearized_empty, false); auto get_ch_stmt_0 = block->push_back(lookup, 0); auto half2_type = TypeFactory::get_instance().create_tensor_type({2}, PrimitiveType::f16); get_ch_stmt_0->ret_type = half2_type; get_ch_stmt_0->ret_type.set_is_pointer(true); get_ch_stmt_0->as()->overrided_dtype = true; auto val_0_stmt = block->push_back(TypedConstant(10)); auto val_1_stmt = block->push_back(TypedConstant(20)); std::vector values = {val_0_stmt, val_1_stmt}; auto matrix_stmt = block->push_back(values); matrix_stmt->ret_type = half2_type; block->push_back(AtomicOpType::add, get_ch_stmt_0, matrix_stmt); irpass::type_check(block.get(), CompileConfig()); /* Before: <*gen> $0 = get root nullptr $1 = linearized(ind {}, stride {}) <*gen> $2 = [S1root][root]::lookup($0, $1) activate = false <*[Tensor (2) f16]> $3 = get child [S1root->S2place] $2 $4 = const 10 $5 = cast_value $4 $6 = const 20 $7 = cast_value $6 <[Tensor (2) f16]> $8 = [$5, $7] <[Tensor (2) f16]> $9 = atomic add($3, $8) */ irpass::scalarize(block.get(), true /*half2_optimization_enabled*/); CompileConfig config; irpass::full_simplify(block.get(), config, {false, false}); /* After: <*gen> $0 = get root nullptr $1 = const 0 <*gen> $2 = [S1root][root]::lookup($0, $1) activate = false <*[Tensor (2) f16]> $3 = get child [S1root->S2place] $2 $4 = const 10.0 $5 = const 20.0 <[Tensor (2) f16]> $6 = [$4, $5] <[Tensor (2) f16]> $7 = atomic add($3, $6) */ EXPECT_EQ(block->size(), 8); // Check for scalarized statements EXPECT_EQ(block->statements[6]->is(), true); EXPECT_EQ(block->statements[3]->is(), true); EXPECT_EQ(block->statements[7]->is(), true); } } // namespace taichi::lang