From 0dbad452e656cff8365e9d03d940f3e68e4f567f Mon Sep 17 00:00:00 2001 From: Moonchild Date: Tue, 27 Jan 2026 11:43:43 +0100 Subject: [PATCH] Issue #2895 - Implement 32-bit compatible Xoroshiro128++ This puts in a reduced-register version of Xoroshiro128++ which /just/ fits in our x86 7-register space by making use of an extra mState to temporarily store the result to be passed back. Resolves #2895 --- js/src/jit/CodeGenerator.cpp | 71 +++++++++++-------- js/src/jit/arm/Lowering-arm.cpp | 4 -- js/src/jit/arm64/Lowering-arm64.cpp | 2 - js/src/jit/mips32/Lowering-mips32.cpp | 4 -- js/src/jit/mips64/Lowering-mips64.cpp | 2 - js/src/jit/shared/LIR-shared.h | 32 ++------- js/src/jit/x64/Lowering-x64.cpp | 2 - js/src/jit/x86/Lowering-x86.cpp | 4 -- .../tests/manual/random-uniformity-test.html | 12 ++-- mfbt/Xoroshiro128PlusPlusRNG.h | 18 +++-- 10 files changed, 68 insertions(+), 83 deletions(-) diff --git a/js/src/jit/CodeGenerator.cpp b/js/src/jit/CodeGenerator.cpp index 1b7ca318df..3aa6fba49d 100644 --- a/js/src/jit/CodeGenerator.cpp +++ b/js/src/jit/CodeGenerator.cpp @@ -12059,26 +12059,23 @@ CodeGenerator::visitRandom(LRandom* ins) Register64 s0Reg(ToRegister(ins->temp1())); Register64 s1Reg(ToRegister(ins->temp2())); // Helper registers for intermediate and final results - Register64 imr1Reg(ToRegister(ins->temp3())); - Register64 imr2Reg(ToRegister(ins->temp4())); - Register64 resultReg(ToRegister(ins->temp5())); + Register64 imrReg(ToRegister(ins->temp3())); #else Register64 s0Reg(ToRegister(ins->temp1()), ToRegister(ins->temp2())); Register64 s1Reg(ToRegister(ins->temp3()), ToRegister(ins->temp4())); // Helper registers for intermediate and final results - Register64 imr1Reg(ToRegister(ins->temp5()), ToRegister(ins->temp6())); - Register64 imr2Reg(ToRegister(ins->temp7()), ToRegister(ins->temp8())); - Register64 resultReg(ToRegister(ins->temp9()), ToRegister(ins->temp10())); + Register64 imrReg(ToRegister(ins->temp5()), ToRegister(ins->temp6())); #endif const void* rng = gen->compartment->addressOfRandomNumberGenerator(); masm.movePtr(ImmPtr(rng), tempReg); - static_assert(sizeof(Xoroshiro128PlusPlusRNG) == 2 * sizeof(uint64_t), - "Code below assumes Xoroshiro128PlusPlusRNG contains two uint64_t values"); + static_assert(sizeof(Xoroshiro128PlusPlusRNG) == 3 * sizeof(uint64_t), + "Code below assumes Xoroshiro128PlusPlusRNG contains three uint64_t values"); Address state0Addr(tempReg, Xoroshiro128PlusPlusRNG::offsetOfState0()); Address state1Addr(tempReg, Xoroshiro128PlusPlusRNG::offsetOfState1()); + Address state2Addr(tempReg, Xoroshiro128PlusPlusRNG::offsetOfState2()); // const uint64_t s0 = mState[0]; masm.load64(state0Addr, s0Reg); @@ -12086,49 +12083,61 @@ CodeGenerator::visitRandom(LRandom* ins) masm.load64(state1Addr, s1Reg); // const uint64_t result = rotl(s0 + s1, 17) + s0; - masm.move64(s0Reg, imr1Reg); - masm.add64(s1Reg, imr1Reg); + masm.move64(s0Reg, imrReg); + masm.add64(s1Reg, imrReg); #ifdef JS_PUNBOX64 - masm.rotateLeft64(Imm32(17), imr1Reg, resultReg); + masm.rotateLeft64(Imm32(17), imrReg, imrReg); #else - masm.rotateLeft64(Imm32(17), imr1Reg, resultReg, tempReg); + masm.Push(tempReg); + masm.rotateLeft64(Imm32(17), imrReg, imrReg, tempReg); + masm.Pop(tempReg); #endif - masm.add64(s0Reg, resultReg); + masm.add64(s0Reg, imrReg); + + // Store the result in mState[2], freeing up the intermediate register again. + masm.store64(imrReg, state2Addr); // s1 ^= s0; masm.xor64(s0Reg, s1Reg); - // mState[0] = rotl(s0, 49) ^ s1 ^ (s1 << 21); // a, b -#ifdef JS_PUNBOX64 - masm.rotateLeft64(Imm32(49), s0Reg, imr1Reg); // imr = s0 rotl 49 -#else - masm.rotateLeft64(Imm32(49), s0Reg, imr1Reg, tempReg); // imr = s0 rotl 49 -#endif - masm.xor64(s1Reg, imr1Reg); // imr ^ s1 - masm.move64(s1Reg, imr2Reg); // imr2 = s1 - masm.lshift64(Imm32(21), imr2Reg); // imr2 << 21 - masm.xor64(imr2Reg, imr1Reg); // imr ^ imr2 - masm.store64(imr1Reg, state0Addr); - // mState[1] = rotl(s1, 28); // c #ifdef JS_PUNBOX64 - masm.rotateLeft64(Imm32(28), s1Reg, imr1Reg); + masm.rotateLeft64(Imm32(28), s1Reg, imrReg); #else - masm.rotateLeft64(Imm32(28), s1Reg, imr1Reg, tempReg); + masm.Push(tempReg); + masm.rotateLeft64(Imm32(28), s1Reg, imrReg, tempReg); + masm.Pop(tempReg); #endif - masm.store64(imr1Reg, state1Addr); + masm.store64(imrReg, state1Addr); + + // mState[0] = rotl(s0, 49) ^ s1 ^ (s1 << 21); // a, b +#ifdef JS_PUNBOX64 + masm.rotateLeft64(Imm32(49), s0Reg, s0Reg); // s0 rotl 49 +#else + masm.Push(tempReg); + masm.rotateLeft64(Imm32(49), s0Reg, s0Reg, tempReg); // s0 rotl 49 + masm.Pop(tempReg); +#endif + masm.move64(s1Reg, imrReg); // imr = s1 + masm.lshift64(Imm32(21), imrReg); // imr << 21 + masm.xor64(imrReg, s0Reg); // s0 ^= imr + masm.xor64(s1Reg, s0Reg); // s0 ^= s1 + masm.store64(s0Reg, state0Addr); + + // Recall the result from mState[2] + masm.load64(state2Addr, s1Reg); // See comment in Xoroshiro128PlusPlusRNG::nextDouble(). static const int MantissaBits = FloatingPoint::kExponentShift + 1; static const double ScaleInv = double(1) / (1ULL << MantissaBits); // Mask the result bits to mantissa size - masm.and64(Imm64((1ULL << MantissaBits) - 1), resultReg); + masm.and64(Imm64((1ULL << MantissaBits) - 1), s1Reg); if (masm.convertUInt64ToDoubleNeedsTemp()) - masm.convertUInt64ToDouble(resultReg, output, tempReg); + masm.convertUInt64ToDouble(s1Reg, output, tempReg); else - masm.convertUInt64ToDouble(resultReg, output, Register::Invalid()); + masm.convertUInt64ToDouble(s1Reg, output, Register::Invalid()); // output *= ScaleInv masm.mulDoublePtr(ImmPtr(&ScaleInv), tempReg, output); diff --git a/js/src/jit/arm/Lowering-arm.cpp b/js/src/jit/arm/Lowering-arm.cpp index 917937343e..e0e6e07d29 100644 --- a/js/src/jit/arm/Lowering-arm.cpp +++ b/js/src/jit/arm/Lowering-arm.cpp @@ -968,10 +968,6 @@ void LIRGeneratorARM::visitRandom(MRandom* ins) { LRandom *lir = new(alloc()) LRandom(temp(), - temp(), - temp(), - temp(), - temp(), temp(), temp(), temp(), diff --git a/js/src/jit/arm64/Lowering-arm64.cpp b/js/src/jit/arm64/Lowering-arm64.cpp index 2e688c8e1f..6134ef7a6a 100644 --- a/js/src/jit/arm64/Lowering-arm64.cpp +++ b/js/src/jit/arm64/Lowering-arm64.cpp @@ -326,8 +326,6 @@ void LIRGeneratorARM64::visitRandom(MRandom* ins) { LRandom *lir = new(alloc()) LRandom(temp(), - temp(), - temp(), temp(), temp(), temp()); diff --git a/js/src/jit/mips32/Lowering-mips32.cpp b/js/src/jit/mips32/Lowering-mips32.cpp index 8d2af2ef86..f63fd43891 100644 --- a/js/src/jit/mips32/Lowering-mips32.cpp +++ b/js/src/jit/mips32/Lowering-mips32.cpp @@ -249,10 +249,6 @@ void LIRGeneratorMIPS::visitRandom(MRandom* ins) { LRandom *lir = new(alloc()) LRandom(temp(), - temp(), - temp(), - temp(), - temp(), temp(), temp(), temp(), diff --git a/js/src/jit/mips64/Lowering-mips64.cpp b/js/src/jit/mips64/Lowering-mips64.cpp index 945cb8e6a0..c4d2c16425 100644 --- a/js/src/jit/mips64/Lowering-mips64.cpp +++ b/js/src/jit/mips64/Lowering-mips64.cpp @@ -179,8 +179,6 @@ void LIRGeneratorMIPS64::visitRandom(MRandom* ins) { LRandom *lir = new(alloc()) LRandom(temp(), - temp(), - temp(), temp(), temp(), temp()); diff --git a/js/src/jit/shared/LIR-shared.h b/js/src/jit/shared/LIR-shared.h index 674590b9de..b412353997 100644 --- a/js/src/jit/shared/LIR-shared.h +++ b/js/src/jit/shared/LIR-shared.h @@ -8220,9 +8220,9 @@ class LArrowNewTarget : public LInstructionHelper // Math.random(). #ifdef JS_PUNBOX64 -# define LRANDOM_NUM_TEMPS 6 +# define LRANDOM_NUM_TEMPS 4 #else -# define LRANDOM_NUM_TEMPS 11 +# define LRANDOM_NUM_TEMPS 7 #endif class LRandom : public LInstructionHelper<1, 0, LRANDOM_NUM_TEMPS> @@ -8230,12 +8230,10 @@ class LRandom : public LInstructionHelper<1, 0, LRANDOM_NUM_TEMPS> public: LIR_HEADER(Random) LRandom(const LDefinition &temp0, const LDefinition &temp1, - const LDefinition &temp2, const LDefinition &temp3, - const LDefinition &temp4, const LDefinition &temp5 + const LDefinition &temp2, const LDefinition &temp3 #ifndef JS_PUNBOX64 - , const LDefinition &temp6, const LDefinition &temp7 - , const LDefinition &temp8, const LDefinition &temp9 - , const LDefinition &temp10 + , const LDefinition &temp4, const LDefinition &temp5 + , const LDefinition &temp6 #endif ) { @@ -8243,14 +8241,10 @@ class LRandom : public LInstructionHelper<1, 0, LRANDOM_NUM_TEMPS> setTemp(1, temp1); setTemp(2, temp2); setTemp(3, temp3); +#ifndef JS_PUNBOX64 setTemp(4, temp4); setTemp(5, temp5); -#ifndef JS_PUNBOX64 setTemp(6, temp6); - setTemp(7, temp7); - setTemp(8, temp8); - setTemp(9, temp9); - setTemp(10, temp10); #endif } const LDefinition* temp0() { @@ -8265,28 +8259,16 @@ class LRandom : public LInstructionHelper<1, 0, LRANDOM_NUM_TEMPS> const LDefinition* temp3() { return getTemp(3); } +#ifndef JS_PUNBOX64 const LDefinition* temp4() { return getTemp(4); } const LDefinition* temp5() { return getTemp(5); } -#ifndef JS_PUNBOX64 const LDefinition* temp6() { return getTemp(6); } - const LDefinition* temp7() { - return getTemp(7); - } - const LDefinition* temp8() { - return getTemp(8); - } - const LDefinition* temp9() { - return getTemp(9); - } - const LDefinition* temp10() { - return getTemp(10); - } #endif MRandom* mir() const { diff --git a/js/src/jit/x64/Lowering-x64.cpp b/js/src/jit/x64/Lowering-x64.cpp index 6ad54fa272..4a385db165 100644 --- a/js/src/jit/x64/Lowering-x64.cpp +++ b/js/src/jit/x64/Lowering-x64.cpp @@ -412,8 +412,6 @@ void LIRGeneratorX64::visitRandom(MRandom* ins) { LRandom *lir = new(alloc()) LRandom(temp(), - temp(), - temp(), temp(), temp(), temp()); diff --git a/js/src/jit/x86/Lowering-x86.cpp b/js/src/jit/x86/Lowering-x86.cpp index 4080d3d8c4..53302a4d71 100644 --- a/js/src/jit/x86/Lowering-x86.cpp +++ b/js/src/jit/x86/Lowering-x86.cpp @@ -607,10 +607,6 @@ void LIRGeneratorX86::visitRandom(MRandom* ins) { LRandom *lir = new(alloc()) LRandom(temp(), - temp(), - temp(), - temp(), - temp(), temp(), temp(), temp(), diff --git a/js/src/tests/manual/random-uniformity-test.html b/js/src/tests/manual/random-uniformity-test.html index dbee14c151..59993e2716 100644 --- a/js/src/tests/manual/random-uniformity-test.html +++ b/js/src/tests/manual/random-uniformity-test.html @@ -31,15 +31,17 @@ const original = () => { var startTime = performance.now(); const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height); const data = imageData.data; - for (let i = 0; i < data.length; i += 4) { + for (let j = 0; j < 10; j++) { // 10x for performance measurement + for (let i = 0; i < data.length; i += 4) { data[i] = Math.random()*255; // red data[i + 1] = Math.random()*255; // green data[i + 2] = Math.random()*255; // blue data[i+3] = 255; + } } ctx.putImageData(imageData, 0, 0); var endTime = performance.now(); - duration.innerHTML = "Total pixels: " + (data.length / 4) +" -- Time taken: " + (endTime - startTime) + " ms"; + duration.innerHTML = "Total pixels: " + (10 * data.length / 4) +" -- Time taken: " + (endTime - startTime) + " ms"; }; const invert = () => { @@ -60,16 +62,18 @@ const grayscale = () => { var startTime = performance.now(); const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height); const data = imageData.data; - for (let i = 0; i < data.length; i += 4) { + for (let j = 0; j < 10; j++) { // 10x for performance measurement + for (let i = 0; i < data.length; i += 4) { const avg = Math.random()*255; data[i] = avg; // red data[i + 1] = avg; // green data[i + 2] = avg; // blue data[i+3] = 255; + } } ctx.putImageData(imageData, 0, 0); var endTime = performance.now(); - duration.innerHTML = "Total pixels: " + (data.length / 4) +" -- Time taken: " + (endTime - startTime) + " ms"; + duration.innerHTML = "Total pixels: " + (10 * data.length / 4) +" -- Time taken: " + (endTime - startTime) + " ms"; }; const sepia = () => { diff --git a/mfbt/Xoroshiro128PlusPlusRNG.h b/mfbt/Xoroshiro128PlusPlusRNG.h index 04197e1a00..c6537127c9 100644 --- a/mfbt/Xoroshiro128PlusPlusRNG.h +++ b/mfbt/Xoroshiro128PlusPlusRNG.h @@ -32,12 +32,16 @@ namespace non_crypto { * the same speed and use half of the space; the same comments apply. * They are suitable only for low-scale parallel applications. * - * The stream of numbers produced by this method repeats every 2**256 - 1 calls (i.e. never, for all practical + * The stream of numbers produced by this method repeats every 2^128 - 1 calls (i.e. never, for all practical * purposes). * */ class Xoroshiro128PlusPlusRNG { - uint64_t mState[2]; + /* + * mState[0] and mState[1] are as-described in the Xoroshiro128++ paper. + * mState[2] is used for temporary storage of the result in JIT code. + */ + uint64_t mState[3]; public: /* @@ -82,15 +86,15 @@ class Xoroshiro128PlusPlusRNG { /* * Return a pseudo-random floating-point value in the range [0, 1). More - * precisely, choose an integer in the range [0, 2**53) and divide it by - * 2**53. Given the 2**256 - 1 period noted above, the produced doubles are + * precisely, choose an integer in the range [0, 2^53) and divide it by + * 2^53. Given the 2^128 - 1 period noted above, the produced doubles are * all but uniformly distributed in this range. */ double nextDouble() { /* * Because the IEEE 64-bit floating point format stores the leading '1' bit * of the mantissa implicitly, it effectively represents a mantissa in the - * range [0, 2**53) in only 52 bits. FloatingPoint::kExponentShift + * range [0, 2^53) in only 52 bits. FloatingPoint::kExponentShift * is the width of the bitfield in the in-memory format, so we must add one * to get the mantissa's range. */ @@ -109,6 +113,7 @@ class Xoroshiro128PlusPlusRNG { MOZ_ASSERT(aState0 || aState1); mState[0] = aState0; mState[1] = aState1; + mState[2] = 0; // Could be left uninitialized, but we do this just-in-case. } static size_t offsetOfState0() { @@ -117,6 +122,9 @@ class Xoroshiro128PlusPlusRNG { static size_t offsetOfState1() { return offsetof(Xoroshiro128PlusPlusRNG, mState[1]); } + static size_t offsetOfState2() { + return offsetof(Xoroshiro128PlusPlusRNG, mState[2]); + } }; } // namespace non_crypto