SIGN IN SIGN UP
const std = @import("../std.zig");
const assert = std.debug.assert;
2019-03-02 16:46:04 -05:00
pub const int = @import("big/int.zig");
pub const Limb = usize;
const limb_info = @typeInfo(Limb).int;
2021-10-01 15:22:51 +02:00
pub const SignedLimb = std.meta.Int(.signed, limb_info.bits);
pub const DoubleLimb = std.meta.Int(.unsigned, 2 * limb_info.bits);
std/math: optimize division with divisors less than a half-limb This adds a new path which avoids using compiler_rt generated div udivmod instructions in the case that a divisor is less than half the max usize value. Two half-limb divisions are performed instead which ensures that non-emulated division instructions are actually used. This does not improve the udivmod code which should still be reviewed independently of this issue. Notably this improves the performance of the toString implementation of non-power-of-two bases considerably. Division performance is improved ~1000% based on some coarse testing. The following test code is used to provide a rough comparison between the old vs. new method. ``` const std = @import("std"); const Managed = std.math.big.int.Managed; const allocator = std.heap.c_allocator; fn fib(a: *Managed, n: usize) !void { var b = try Managed.initSet(allocator, 1); defer b.deinit(); var c = try Managed.init(allocator); defer c.deinit(); var i: usize = 0; while (i < n) : (i += 1) { try c.add(a.toConst(), b.toConst()); a.swap(&b); b.swap(&c); } } pub fn main() !void { var a = try Managed.initSet(allocator, 0); defer a.deinit(); try fib(&a, 1_000_000); // Note: Next two lines (and printed digit count) omitted on no-print version. const as = try a.toString(allocator, 10, .lower); defer allocator.free(as); std.debug.print("fib: digit count: {}, limb count: {}\n", .{ as.len, a.limbs.len }); } ``` ``` ==> time.no-print <== limb count: 10849 ________________________________________________________ Executed in 10.60 secs fish external usr time 10.44 secs 0.00 millis 10.44 secs sys time 0.02 secs 1.12 millis 0.02 secs ==> time.old <== fib: digit count: 208988, limb count: 10849 ________________________________________________________ Executed in 22.78 secs fish external usr time 22.43 secs 1.01 millis 22.43 secs sys time 0.03 secs 0.13 millis 0.03 secs ==> time.optimized <== fib: digit count: 208988, limb count: 10849 ________________________________________________________ Executed in 11.59 secs fish external usr time 11.56 secs 1.03 millis 11.56 secs sys time 0.03 secs 0.12 millis 0.03 secs ``` Perf data for non-optimized and optimized, verifying no udivmod is generated by the new code. ``` $ perf report -i perf.data.old --stdio - Total Lost Samples: 0 - - Samples: 90K of event 'cycles:u' - Event count (approx.): 71603695208 - - Overhead Command Shared Object Symbol - ........ ....... ................ ........................................... - 52.97% t t [.] compiler_rt.udivmod.udivmod 45.97% t t [.] std.math.big.int.Mutable.addCarry 0.83% t t [.] main 0.08% t libc-2.33.so [.] __memmove_avx_unaligned_erms 0.08% t t [.] __udivti3 0.03% t [unknown] [k] 0xffffffff9a0010a7 0.02% t t [.] std.math.big.int.Managed.ensureCapacity 0.01% t libc-2.33.so [.] _int_malloc 0.00% t libc-2.33.so [.] __malloc_usable_size 0.00% t libc-2.33.so [.] _int_free 0.00% t t [.] 0x0000000000004a80 0.00% t t [.] std.heap.CAllocator.resize 0.00% t libc-2.33.so [.] _mid_memalign 0.00% t libc-2.33.so [.] sysmalloc 0.00% t libc-2.33.so [.] __posix_memalign 0.00% t t [.] std.heap.CAllocator.alloc 0.00% t ld-2.33.so [.] do_lookup_x $ perf report -i perf.data.optimized --stdio - Total Lost Samples: 0 - - Samples: 46K of event 'cycles:u' - Event count (approx.): 36790112336 - - Overhead Command Shared Object Symbol - ........ ....... ................ ........................................... - 79.98% t t [.] std.math.big.int.Mutable.addCarry 15.14% t t [.] main 4.58% t t [.] std.math.big.int.Managed.ensureCapacity 0.21% t libc-2.33.so [.] __memmove_avx_unaligned_erms 0.05% t [unknown] [k] 0xffffffff9a0010a7 0.02% t libc-2.33.so [.] _int_malloc 0.01% t t [.] std.heap.CAllocator.alloc 0.01% t libc-2.33.so [.] __malloc_usable_size 0.00% t libc-2.33.so [.] systrim.constprop.0 0.00% t libc-2.33.so [.] _mid_memalign 0.00% t t [.] 0x0000000000000c7d 0.00% t libc-2.33.so [.] malloc 0.00% t ld-2.33.so [.] check_match ``` Closes #10630.
2022-02-05 20:36:48 +13:00
pub const HalfLimb = std.meta.Int(.unsigned, limb_info.bits / 2);
pub const SignedDoubleLimb = std.meta.Int(.signed, 2 * limb_info.bits);
pub const Log2Limb = std.math.Log2Int(Limb);
comptime {
assert(std.math.floorPowerOfTwo(usize, limb_info.bits) == limb_info.bits);
assert(limb_info.signedness == .unsigned);
}
test {
_ = int;
_ = Limb;
2021-10-01 15:22:51 +02:00
_ = SignedLimb;
_ = DoubleLimb;
_ = SignedDoubleLimb;
_ = Log2Limb;
2019-03-02 16:46:04 -05:00
}