import pytest import taichi as ti from tests import test_utils n = 128 def run_atomic_add_global_case(vartype, step, valproc=lambda x: x): x = ti.field(vartype) y = ti.field(vartype) c = ti.field(vartype) ti.root.dense(ti.i, n).place(x, y) ti.root.place(c) # Make Taichi correctly infer the type # TODO: Taichi seems to treat numpy.int32 as a float type, fix that. init_ck = 0 if vartype == ti.i32 else 0.0 @ti.kernel def func(): ck = init_ck for i in range(n): x[i] = ti.atomic_add(c[None], step) y[i] = ti.atomic_add(ck, step) func() assert valproc(c[None]) == n * step x_actual = sorted(x.to_numpy()) y_actual = sorted(y.to_numpy()) expect = [i * step for i in range(n)] for xa, ya, e in zip(x_actual, y_actual, expect): print(xa, ya, e) assert valproc(xa) == e assert valproc(ya) == e @test_utils.test() def test_atomic_add_global_i32(): run_atomic_add_global_case(ti.i32, 42) @test_utils.test() def test_atomic_add_global_f32(): run_atomic_add_global_case(ti.f32, 4.2, valproc=lambda x: test_utils.approx(x, rel=1e-5)) @test_utils.test(arch=[ti.cpu, ti.cuda]) def test_atomic_min_max_uint(): x = ti.field(ti.u64, shape=100) @ti.kernel def test0(): for I in x: x[I] = 0 x[1] = ti.cast(1, ti.u64) << 63 for I in x: ti.atomic_max(x[0], x[I]) test0() assert x[0] == 9223372036854775808 @ti.kernel def test1(): for I in x: x[I] = ti.cast(1, ti.u64) << 63 x[1] = 100 for I in x: ti.atomic_min(x[0], x[I]) test1() assert x[0] == 100 @test_utils.test() def test_atomic_add_expr_evaled(): c = ti.field(ti.i32) step = 42 ti.root.place(c) @ti.kernel def func(): for i in range(n): # this is an expr with side effect, make sure it's not optimized out. ti.atomic_add(c[None], step) func() assert c[None] == n * step @test_utils.test() def test_atomic_add_demoted(): # Ensure demoted atomics do not crash the program. x = ti.field(ti.i32) y = ti.field(ti.i32) step = 42 ti.root.dense(ti.i, n).place(x, y) @ti.kernel def func(): for i in range(n): s = i # Both adds should get demoted. x[i] = ti.atomic_add(s, step) y[i] = ti.atomic_add(s, step) func() for i in range(n): assert x[i] == i assert y[i] == i + step @test_utils.test() def test_atomic_add_with_local_store_simplify1(): # Test for the following LocalStoreStmt simplification case: # # local store [$a <- ...] # atomic add ($a, ...) # local store [$a <- ...] # # Specifically, the second store should not suppress the first one, because # atomic_add can return value. x = ti.field(ti.i32) y = ti.field(ti.i32) step = 42 ti.root.dense(ti.i, n).place(x, y) @ti.kernel def func(): for i in range(n): # do a local store j = i x[i] = ti.atomic_add(j, step) # do another local store, make sure the previous one is not optimized out j = x[i] y[i] = j func() for i in range(n): assert x[i] == i assert y[i] == i @test_utils.test() def test_atomic_add_with_local_store_simplify2(): # Test for the following LocalStoreStmt simplification case: # # local store [$a <- ...] # atomic add ($a, ...) # # Specifically, the local store should not be removed, because # atomic_add can return its value. x = ti.field(ti.i32) step = 42 ti.root.dense(ti.i, n).place(x) @ti.kernel def func(): for i in range(n): j = i x[i] = ti.atomic_add(j, step) func() for i in range(n): assert x[i] == i @test_utils.test() def test_atomic_add_with_if_simplify(): # Make sure IfStmt simplification doesn't move stmts depending on the result # of atomic_add() x = ti.field(ti.i32) step = 42 ti.root.dense(ti.i, n).place(x) boundary = n / 2 @ti.kernel def func(): for i in range(n): if i > boundary: # A sequence of commands designed such that atomic_add() is the only # thing to decide whether the if branch can be simplified. s = i j = ti.atomic_add(s, s) k = j + s x[i] = k else: # If we look at the IR, this branch should be simplified, since nobody # is using atomic_add's result. ti.atomic_add(x[i], i) x[i] += step func() for i in range(n): expect = i * 3 if i > boundary else (i + step) assert x[i] == expect @test_utils.test() def test_local_atomic_with_if(): ret = ti.field(dtype=ti.i32, shape=()) @ti.kernel def test(): if True: x = 0 x += 1 ret[None] = x test() assert ret[None] == 1 @test_utils.test(arch=[ti.cpu, ti.cuda]) def test_atomic_sub_with_type_promotion(): # Test Case 1 @ti.kernel def test_u16_sub_u8() -> ti.uint16: x: ti.uint16 = 1000 y: ti.uint8 = 255 ti.atomic_sub(x, y) return x res = test_u16_sub_u8() assert res == 745 # Test Case 2 @ti.kernel def test_u8_sub_u16() -> ti.uint8: x: ti.uint8 = 255 y: ti.uint16 = 100 ti.atomic_sub(x, y) return x res = test_u8_sub_u16() assert res == 155 # Test Case 3 A = ti.field(ti.uint8, shape=()) B = ti.field(ti.uint16, shape=()) @ti.kernel def test_with_field(): v: ti.uint16 = 1000 v -= A[None] B[None] = v A[None] = 255 test_with_field() assert B[None] == 745 @test_utils.test() def test_atomic_sub_expr_evaled(): c = ti.field(ti.i32) step = 42 ti.root.place(c) @ti.kernel def func(): for i in range(n): # this is an expr with side effect, make sure it's not optimized out. ti.atomic_sub(c[None], step) func() assert c[None] == -n * step @test_utils.test() def test_atomic_mul_expr_evaled(): c = ti.field(ti.i32) base = 2 ti.root.place(c) @ti.kernel def func(): c[None] = 1 for i in range(16): # this is an expr with side effect, make sure it's not optimized out. ti.atomic_mul(c[None], base) func() assert c[None] == base**16 @test_utils.test() def test_atomic_max_expr_evaled(): c = ti.field(ti.i32) step = 42 ti.root.place(c) @ti.kernel def func(): for i in range(n): # this is an expr with side effect, make sure it's not optimized out. ti.atomic_max(c[None], i * step) func() assert c[None] == (n - 1) * step @test_utils.test() def test_atomic_min_expr_evaled(): c = ti.field(ti.i32) step = 42 ti.root.place(c) @ti.kernel def func(): c[None] = 1000 for i in range(n): # this is an expr with side effect, make sure it's not optimized out. ti.atomic_min(c[None], i * step) func() assert c[None] == 0 @test_utils.test() def test_atomic_and_expr_evaled(): c = ti.field(ti.i32) step = 42 ti.root.place(c) max_int = 2147483647 @ti.kernel def func(): c[None] = 1023 for i in range(10): # this is an expr with side effect, make sure it's not optimized out. ti.atomic_and(c[None], max_int - 2**i) func() assert c[None] == 0 @test_utils.test() def test_atomic_or_expr_evaled(): c = ti.field(ti.i32) step = 42 ti.root.place(c) @ti.kernel def func(): c[None] = 0 for i in range(10): # this is an expr with side effect, make sure it's not optimized out. ti.atomic_or(c[None], 2**i) func() assert c[None] == 1023 @test_utils.test() def test_atomic_xor_expr_evaled(): c = ti.field(ti.i32) step = 42 ti.root.place(c) @ti.kernel def func(): c[None] = 1023 for i in range(10): # this is an expr with side effect, make sure it's not optimized out. ti.atomic_xor(c[None], 2**i) func() assert c[None] == 0 @test_utils.test() def test_atomic_min_rvalue_as_frist_op(): @ti.kernel def func(): y = ti.Vector([1, 2, 3]) z = ti.atomic_min([3, 2, 1], y) with pytest.raises(ti.TaichiSyntaxError) as e: func() assert "atomic_min" in str(e.value) assert "cannot use a non-writable target as the first operand of" in str(e.value) @test_utils.test() def test_atomic_max_f32(): @ti.kernel def max_kernel() -> ti.f32: x = -1000.0 for i in range(1, 20): ti.atomic_max(x, -ti.f32(i)) return x assert max_kernel() == -1.0 @test_utils.test() def test_atomic_mul_f32(): @ti.kernel def mul_kernel() -> ti.f32: x = 1.0 for i in range(1, 8): ti.atomic_mul(x, ti.f32(i)) return x assert mul_kernel() == 5040.0