[Kernels_0] m = 1536 k = 4096 bm = 256 bk = 128 bmm = 32 [Kernels_1] m = 1536 k = 1536 bm = 128 bk = 64 bmm = 64 [Kernels_2] m = 4096 k = 1536 bm = 256 bk = 128 bmm = 32