1
0
mirror of https://github.com/roytam1/UXP.git synced 2026-05-26 13:58:49 +00:00

Issue #2895 - Implement 32-bit compatible Xoroshiro128++

This puts in a reduced-register version of Xoroshiro128++ which /just/
fits in our x86 7-register space by making use of an extra mState to
temporarily store the result to be passed back.

Resolves #2895
This commit is contained in:
Moonchild
2026-01-27 11:43:43 +01:00
committed by roytam1
parent e5497c8425
commit 0dbad452e6
10 changed files with 68 additions and 83 deletions
+40 -31
View File
@@ -12059,26 +12059,23 @@ CodeGenerator::visitRandom(LRandom* ins)
Register64 s0Reg(ToRegister(ins->temp1()));
Register64 s1Reg(ToRegister(ins->temp2()));
// Helper registers for intermediate and final results
Register64 imr1Reg(ToRegister(ins->temp3()));
Register64 imr2Reg(ToRegister(ins->temp4()));
Register64 resultReg(ToRegister(ins->temp5()));
Register64 imrReg(ToRegister(ins->temp3()));
#else
Register64 s0Reg(ToRegister(ins->temp1()), ToRegister(ins->temp2()));
Register64 s1Reg(ToRegister(ins->temp3()), ToRegister(ins->temp4()));
// Helper registers for intermediate and final results
Register64 imr1Reg(ToRegister(ins->temp5()), ToRegister(ins->temp6()));
Register64 imr2Reg(ToRegister(ins->temp7()), ToRegister(ins->temp8()));
Register64 resultReg(ToRegister(ins->temp9()), ToRegister(ins->temp10()));
Register64 imrReg(ToRegister(ins->temp5()), ToRegister(ins->temp6()));
#endif
const void* rng = gen->compartment->addressOfRandomNumberGenerator();
masm.movePtr(ImmPtr(rng), tempReg);
static_assert(sizeof(Xoroshiro128PlusPlusRNG) == 2 * sizeof(uint64_t),
"Code below assumes Xoroshiro128PlusPlusRNG contains two uint64_t values");
static_assert(sizeof(Xoroshiro128PlusPlusRNG) == 3 * sizeof(uint64_t),
"Code below assumes Xoroshiro128PlusPlusRNG contains three uint64_t values");
Address state0Addr(tempReg, Xoroshiro128PlusPlusRNG::offsetOfState0());
Address state1Addr(tempReg, Xoroshiro128PlusPlusRNG::offsetOfState1());
Address state2Addr(tempReg, Xoroshiro128PlusPlusRNG::offsetOfState2());
// const uint64_t s0 = mState[0];
masm.load64(state0Addr, s0Reg);
@@ -12086,49 +12083,61 @@ CodeGenerator::visitRandom(LRandom* ins)
masm.load64(state1Addr, s1Reg);
// const uint64_t result = rotl(s0 + s1, 17) + s0;
masm.move64(s0Reg, imr1Reg);
masm.add64(s1Reg, imr1Reg);
masm.move64(s0Reg, imrReg);
masm.add64(s1Reg, imrReg);
#ifdef JS_PUNBOX64
masm.rotateLeft64(Imm32(17), imr1Reg, resultReg);
masm.rotateLeft64(Imm32(17), imrReg, imrReg);
#else
masm.rotateLeft64(Imm32(17), imr1Reg, resultReg, tempReg);
masm.Push(tempReg);
masm.rotateLeft64(Imm32(17), imrReg, imrReg, tempReg);
masm.Pop(tempReg);
#endif
masm.add64(s0Reg, resultReg);
masm.add64(s0Reg, imrReg);
// Store the result in mState[2], freeing up the intermediate register again.
masm.store64(imrReg, state2Addr);
// s1 ^= s0;
masm.xor64(s0Reg, s1Reg);
// mState[0] = rotl(s0, 49) ^ s1 ^ (s1 << 21); // a, b
#ifdef JS_PUNBOX64
masm.rotateLeft64(Imm32(49), s0Reg, imr1Reg); // imr = s0 rotl 49
#else
masm.rotateLeft64(Imm32(49), s0Reg, imr1Reg, tempReg); // imr = s0 rotl 49
#endif
masm.xor64(s1Reg, imr1Reg); // imr ^ s1
masm.move64(s1Reg, imr2Reg); // imr2 = s1
masm.lshift64(Imm32(21), imr2Reg); // imr2 << 21
masm.xor64(imr2Reg, imr1Reg); // imr ^ imr2
masm.store64(imr1Reg, state0Addr);
// mState[1] = rotl(s1, 28); // c
#ifdef JS_PUNBOX64
masm.rotateLeft64(Imm32(28), s1Reg, imr1Reg);
masm.rotateLeft64(Imm32(28), s1Reg, imrReg);
#else
masm.rotateLeft64(Imm32(28), s1Reg, imr1Reg, tempReg);
masm.Push(tempReg);
masm.rotateLeft64(Imm32(28), s1Reg, imrReg, tempReg);
masm.Pop(tempReg);
#endif
masm.store64(imr1Reg, state1Addr);
masm.store64(imrReg, state1Addr);
// mState[0] = rotl(s0, 49) ^ s1 ^ (s1 << 21); // a, b
#ifdef JS_PUNBOX64
masm.rotateLeft64(Imm32(49), s0Reg, s0Reg); // s0 rotl 49
#else
masm.Push(tempReg);
masm.rotateLeft64(Imm32(49), s0Reg, s0Reg, tempReg); // s0 rotl 49
masm.Pop(tempReg);
#endif
masm.move64(s1Reg, imrReg); // imr = s1
masm.lshift64(Imm32(21), imrReg); // imr << 21
masm.xor64(imrReg, s0Reg); // s0 ^= imr
masm.xor64(s1Reg, s0Reg); // s0 ^= s1
masm.store64(s0Reg, state0Addr);
// Recall the result from mState[2]
masm.load64(state2Addr, s1Reg);
// See comment in Xoroshiro128PlusPlusRNG::nextDouble().
static const int MantissaBits = FloatingPoint<double>::kExponentShift + 1;
static const double ScaleInv = double(1) / (1ULL << MantissaBits);
// Mask the result bits to mantissa size
masm.and64(Imm64((1ULL << MantissaBits) - 1), resultReg);
masm.and64(Imm64((1ULL << MantissaBits) - 1), s1Reg);
if (masm.convertUInt64ToDoubleNeedsTemp())
masm.convertUInt64ToDouble(resultReg, output, tempReg);
masm.convertUInt64ToDouble(s1Reg, output, tempReg);
else
masm.convertUInt64ToDouble(resultReg, output, Register::Invalid());
masm.convertUInt64ToDouble(s1Reg, output, Register::Invalid());
// output *= ScaleInv
masm.mulDoublePtr(ImmPtr(&ScaleInv), tempReg, output);
-4
View File
@@ -968,10 +968,6 @@ void
LIRGeneratorARM::visitRandom(MRandom* ins)
{
LRandom *lir = new(alloc()) LRandom(temp(),
temp(),
temp(),
temp(),
temp(),
temp(),
temp(),
temp(),
-2
View File
@@ -326,8 +326,6 @@ void
LIRGeneratorARM64::visitRandom(MRandom* ins)
{
LRandom *lir = new(alloc()) LRandom(temp(),
temp(),
temp(),
temp(),
temp(),
temp());
-4
View File
@@ -249,10 +249,6 @@ void
LIRGeneratorMIPS::visitRandom(MRandom* ins)
{
LRandom *lir = new(alloc()) LRandom(temp(),
temp(),
temp(),
temp(),
temp(),
temp(),
temp(),
temp(),
-2
View File
@@ -179,8 +179,6 @@ void
LIRGeneratorMIPS64::visitRandom(MRandom* ins)
{
LRandom *lir = new(alloc()) LRandom(temp(),
temp(),
temp(),
temp(),
temp(),
temp());
+7 -25
View File
@@ -8220,9 +8220,9 @@ class LArrowNewTarget : public LInstructionHelper<BOX_PIECES, 1, 0>
// Math.random().
#ifdef JS_PUNBOX64
# define LRANDOM_NUM_TEMPS 6
# define LRANDOM_NUM_TEMPS 4
#else
# define LRANDOM_NUM_TEMPS 11
# define LRANDOM_NUM_TEMPS 7
#endif
class LRandom : public LInstructionHelper<1, 0, LRANDOM_NUM_TEMPS>
@@ -8230,12 +8230,10 @@ class LRandom : public LInstructionHelper<1, 0, LRANDOM_NUM_TEMPS>
public:
LIR_HEADER(Random)
LRandom(const LDefinition &temp0, const LDefinition &temp1,
const LDefinition &temp2, const LDefinition &temp3,
const LDefinition &temp4, const LDefinition &temp5
const LDefinition &temp2, const LDefinition &temp3
#ifndef JS_PUNBOX64
, const LDefinition &temp6, const LDefinition &temp7
, const LDefinition &temp8, const LDefinition &temp9
, const LDefinition &temp10
, const LDefinition &temp4, const LDefinition &temp5
, const LDefinition &temp6
#endif
)
{
@@ -8243,14 +8241,10 @@ class LRandom : public LInstructionHelper<1, 0, LRANDOM_NUM_TEMPS>
setTemp(1, temp1);
setTemp(2, temp2);
setTemp(3, temp3);
#ifndef JS_PUNBOX64
setTemp(4, temp4);
setTemp(5, temp5);
#ifndef JS_PUNBOX64
setTemp(6, temp6);
setTemp(7, temp7);
setTemp(8, temp8);
setTemp(9, temp9);
setTemp(10, temp10);
#endif
}
const LDefinition* temp0() {
@@ -8265,28 +8259,16 @@ class LRandom : public LInstructionHelper<1, 0, LRANDOM_NUM_TEMPS>
const LDefinition* temp3() {
return getTemp(3);
}
#ifndef JS_PUNBOX64
const LDefinition* temp4() {
return getTemp(4);
}
const LDefinition* temp5() {
return getTemp(5);
}
#ifndef JS_PUNBOX64
const LDefinition* temp6() {
return getTemp(6);
}
const LDefinition* temp7() {
return getTemp(7);
}
const LDefinition* temp8() {
return getTemp(8);
}
const LDefinition* temp9() {
return getTemp(9);
}
const LDefinition* temp10() {
return getTemp(10);
}
#endif
MRandom* mir() const {
-2
View File
@@ -412,8 +412,6 @@ void
LIRGeneratorX64::visitRandom(MRandom* ins)
{
LRandom *lir = new(alloc()) LRandom(temp(),
temp(),
temp(),
temp(),
temp(),
temp());
-4
View File
@@ -607,10 +607,6 @@ void
LIRGeneratorX86::visitRandom(MRandom* ins)
{
LRandom *lir = new(alloc()) LRandom(temp(),
temp(),
temp(),
temp(),
temp(),
temp(),
temp(),
temp(),
@@ -31,15 +31,17 @@ const original = () => {
var startTime = performance.now();
const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
const data = imageData.data;
for (let i = 0; i < data.length; i += 4) {
for (let j = 0; j < 10; j++) { // 10x for performance measurement
for (let i = 0; i < data.length; i += 4) {
data[i] = Math.random()*255; // red
data[i + 1] = Math.random()*255; // green
data[i + 2] = Math.random()*255; // blue
data[i+3] = 255;
}
}
ctx.putImageData(imageData, 0, 0);
var endTime = performance.now();
duration.innerHTML = "Total pixels: " + (data.length / 4) +" -- Time taken: " + (endTime - startTime) + " ms";
duration.innerHTML = "Total pixels: " + (10 * data.length / 4) +" -- Time taken: " + (endTime - startTime) + " ms";
};
const invert = () => {
@@ -60,16 +62,18 @@ const grayscale = () => {
var startTime = performance.now();
const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
const data = imageData.data;
for (let i = 0; i < data.length; i += 4) {
for (let j = 0; j < 10; j++) { // 10x for performance measurement
for (let i = 0; i < data.length; i += 4) {
const avg = Math.random()*255;
data[i] = avg; // red
data[i + 1] = avg; // green
data[i + 2] = avg; // blue
data[i+3] = 255;
}
}
ctx.putImageData(imageData, 0, 0);
var endTime = performance.now();
duration.innerHTML = "Total pixels: " + (data.length / 4) +" -- Time taken: " + (endTime - startTime) + " ms";
duration.innerHTML = "Total pixels: " + (10 * data.length / 4) +" -- Time taken: " + (endTime - startTime) + " ms";
};
const sepia = () => {
+13 -5
View File
@@ -32,12 +32,16 @@ namespace non_crypto {
* the same speed and use half of the space; the same comments apply.
* They are suitable only for low-scale parallel applications.
*
* The stream of numbers produced by this method repeats every 2**256 - 1 calls (i.e. never, for all practical
* The stream of numbers produced by this method repeats every 2^128 - 1 calls (i.e. never, for all practical
* purposes).
*
*/
class Xoroshiro128PlusPlusRNG {
uint64_t mState[2];
/*
* mState[0] and mState[1] are as-described in the Xoroshiro128++ paper.
* mState[2] is used for temporary storage of the result in JIT code.
*/
uint64_t mState[3];
public:
/*
@@ -82,15 +86,15 @@ class Xoroshiro128PlusPlusRNG {
/*
* Return a pseudo-random floating-point value in the range [0, 1). More
* precisely, choose an integer in the range [0, 2**53) and divide it by
* 2**53. Given the 2**256 - 1 period noted above, the produced doubles are
* precisely, choose an integer in the range [0, 2^53) and divide it by
* 2^53. Given the 2^128 - 1 period noted above, the produced doubles are
* all but uniformly distributed in this range.
*/
double nextDouble() {
/*
* Because the IEEE 64-bit floating point format stores the leading '1' bit
* of the mantissa implicitly, it effectively represents a mantissa in the
* range [0, 2**53) in only 52 bits. FloatingPoint<double>::kExponentShift
* range [0, 2^53) in only 52 bits. FloatingPoint<double>::kExponentShift
* is the width of the bitfield in the in-memory format, so we must add one
* to get the mantissa's range.
*/
@@ -109,6 +113,7 @@ class Xoroshiro128PlusPlusRNG {
MOZ_ASSERT(aState0 || aState1);
mState[0] = aState0;
mState[1] = aState1;
mState[2] = 0; // Could be left uninitialized, but we do this just-in-case.
}
static size_t offsetOfState0() {
@@ -117,6 +122,9 @@ class Xoroshiro128PlusPlusRNG {
static size_t offsetOfState1() {
return offsetof(Xoroshiro128PlusPlusRNG, mState[1]);
}
static size_t offsetOfState2() {
return offsetof(Xoroshiro128PlusPlusRNG, mState[2]);
}
};
} // namespace non_crypto