mirror of
https://github.com/roytam1/UXP.git
synced 2026-05-26 13:58:49 +00:00
Issue #2895 - Implement 32-bit compatible Xoroshiro128++
This puts in a reduced-register version of Xoroshiro128++ which /just/ fits in our x86 7-register space by making use of an extra mState to temporarily store the result to be passed back. Resolves #2895
This commit is contained in:
@@ -12059,26 +12059,23 @@ CodeGenerator::visitRandom(LRandom* ins)
|
||||
Register64 s0Reg(ToRegister(ins->temp1()));
|
||||
Register64 s1Reg(ToRegister(ins->temp2()));
|
||||
// Helper registers for intermediate and final results
|
||||
Register64 imr1Reg(ToRegister(ins->temp3()));
|
||||
Register64 imr2Reg(ToRegister(ins->temp4()));
|
||||
Register64 resultReg(ToRegister(ins->temp5()));
|
||||
Register64 imrReg(ToRegister(ins->temp3()));
|
||||
#else
|
||||
Register64 s0Reg(ToRegister(ins->temp1()), ToRegister(ins->temp2()));
|
||||
Register64 s1Reg(ToRegister(ins->temp3()), ToRegister(ins->temp4()));
|
||||
// Helper registers for intermediate and final results
|
||||
Register64 imr1Reg(ToRegister(ins->temp5()), ToRegister(ins->temp6()));
|
||||
Register64 imr2Reg(ToRegister(ins->temp7()), ToRegister(ins->temp8()));
|
||||
Register64 resultReg(ToRegister(ins->temp9()), ToRegister(ins->temp10()));
|
||||
Register64 imrReg(ToRegister(ins->temp5()), ToRegister(ins->temp6()));
|
||||
#endif
|
||||
|
||||
const void* rng = gen->compartment->addressOfRandomNumberGenerator();
|
||||
masm.movePtr(ImmPtr(rng), tempReg);
|
||||
|
||||
static_assert(sizeof(Xoroshiro128PlusPlusRNG) == 2 * sizeof(uint64_t),
|
||||
"Code below assumes Xoroshiro128PlusPlusRNG contains two uint64_t values");
|
||||
static_assert(sizeof(Xoroshiro128PlusPlusRNG) == 3 * sizeof(uint64_t),
|
||||
"Code below assumes Xoroshiro128PlusPlusRNG contains three uint64_t values");
|
||||
|
||||
Address state0Addr(tempReg, Xoroshiro128PlusPlusRNG::offsetOfState0());
|
||||
Address state1Addr(tempReg, Xoroshiro128PlusPlusRNG::offsetOfState1());
|
||||
Address state2Addr(tempReg, Xoroshiro128PlusPlusRNG::offsetOfState2());
|
||||
|
||||
// const uint64_t s0 = mState[0];
|
||||
masm.load64(state0Addr, s0Reg);
|
||||
@@ -12086,49 +12083,61 @@ CodeGenerator::visitRandom(LRandom* ins)
|
||||
masm.load64(state1Addr, s1Reg);
|
||||
|
||||
// const uint64_t result = rotl(s0 + s1, 17) + s0;
|
||||
masm.move64(s0Reg, imr1Reg);
|
||||
masm.add64(s1Reg, imr1Reg);
|
||||
masm.move64(s0Reg, imrReg);
|
||||
masm.add64(s1Reg, imrReg);
|
||||
#ifdef JS_PUNBOX64
|
||||
masm.rotateLeft64(Imm32(17), imr1Reg, resultReg);
|
||||
masm.rotateLeft64(Imm32(17), imrReg, imrReg);
|
||||
#else
|
||||
masm.rotateLeft64(Imm32(17), imr1Reg, resultReg, tempReg);
|
||||
masm.Push(tempReg);
|
||||
masm.rotateLeft64(Imm32(17), imrReg, imrReg, tempReg);
|
||||
masm.Pop(tempReg);
|
||||
#endif
|
||||
masm.add64(s0Reg, resultReg);
|
||||
masm.add64(s0Reg, imrReg);
|
||||
|
||||
// Store the result in mState[2], freeing up the intermediate register again.
|
||||
masm.store64(imrReg, state2Addr);
|
||||
|
||||
// s1 ^= s0;
|
||||
masm.xor64(s0Reg, s1Reg);
|
||||
|
||||
// mState[0] = rotl(s0, 49) ^ s1 ^ (s1 << 21); // a, b
|
||||
#ifdef JS_PUNBOX64
|
||||
masm.rotateLeft64(Imm32(49), s0Reg, imr1Reg); // imr = s0 rotl 49
|
||||
#else
|
||||
masm.rotateLeft64(Imm32(49), s0Reg, imr1Reg, tempReg); // imr = s0 rotl 49
|
||||
#endif
|
||||
masm.xor64(s1Reg, imr1Reg); // imr ^ s1
|
||||
masm.move64(s1Reg, imr2Reg); // imr2 = s1
|
||||
masm.lshift64(Imm32(21), imr2Reg); // imr2 << 21
|
||||
masm.xor64(imr2Reg, imr1Reg); // imr ^ imr2
|
||||
masm.store64(imr1Reg, state0Addr);
|
||||
|
||||
// mState[1] = rotl(s1, 28); // c
|
||||
#ifdef JS_PUNBOX64
|
||||
masm.rotateLeft64(Imm32(28), s1Reg, imr1Reg);
|
||||
masm.rotateLeft64(Imm32(28), s1Reg, imrReg);
|
||||
#else
|
||||
masm.rotateLeft64(Imm32(28), s1Reg, imr1Reg, tempReg);
|
||||
masm.Push(tempReg);
|
||||
masm.rotateLeft64(Imm32(28), s1Reg, imrReg, tempReg);
|
||||
masm.Pop(tempReg);
|
||||
#endif
|
||||
masm.store64(imr1Reg, state1Addr);
|
||||
masm.store64(imrReg, state1Addr);
|
||||
|
||||
// mState[0] = rotl(s0, 49) ^ s1 ^ (s1 << 21); // a, b
|
||||
#ifdef JS_PUNBOX64
|
||||
masm.rotateLeft64(Imm32(49), s0Reg, s0Reg); // s0 rotl 49
|
||||
#else
|
||||
masm.Push(tempReg);
|
||||
masm.rotateLeft64(Imm32(49), s0Reg, s0Reg, tempReg); // s0 rotl 49
|
||||
masm.Pop(tempReg);
|
||||
#endif
|
||||
masm.move64(s1Reg, imrReg); // imr = s1
|
||||
masm.lshift64(Imm32(21), imrReg); // imr << 21
|
||||
masm.xor64(imrReg, s0Reg); // s0 ^= imr
|
||||
masm.xor64(s1Reg, s0Reg); // s0 ^= s1
|
||||
masm.store64(s0Reg, state0Addr);
|
||||
|
||||
// Recall the result from mState[2]
|
||||
masm.load64(state2Addr, s1Reg);
|
||||
|
||||
// See comment in Xoroshiro128PlusPlusRNG::nextDouble().
|
||||
static const int MantissaBits = FloatingPoint<double>::kExponentShift + 1;
|
||||
static const double ScaleInv = double(1) / (1ULL << MantissaBits);
|
||||
|
||||
// Mask the result bits to mantissa size
|
||||
masm.and64(Imm64((1ULL << MantissaBits) - 1), resultReg);
|
||||
masm.and64(Imm64((1ULL << MantissaBits) - 1), s1Reg);
|
||||
|
||||
if (masm.convertUInt64ToDoubleNeedsTemp())
|
||||
masm.convertUInt64ToDouble(resultReg, output, tempReg);
|
||||
masm.convertUInt64ToDouble(s1Reg, output, tempReg);
|
||||
else
|
||||
masm.convertUInt64ToDouble(resultReg, output, Register::Invalid());
|
||||
masm.convertUInt64ToDouble(s1Reg, output, Register::Invalid());
|
||||
|
||||
// output *= ScaleInv
|
||||
masm.mulDoublePtr(ImmPtr(&ScaleInv), tempReg, output);
|
||||
|
||||
@@ -968,10 +968,6 @@ void
|
||||
LIRGeneratorARM::visitRandom(MRandom* ins)
|
||||
{
|
||||
LRandom *lir = new(alloc()) LRandom(temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
|
||||
@@ -326,8 +326,6 @@ void
|
||||
LIRGeneratorARM64::visitRandom(MRandom* ins)
|
||||
{
|
||||
LRandom *lir = new(alloc()) LRandom(temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
temp());
|
||||
|
||||
@@ -249,10 +249,6 @@ void
|
||||
LIRGeneratorMIPS::visitRandom(MRandom* ins)
|
||||
{
|
||||
LRandom *lir = new(alloc()) LRandom(temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
|
||||
@@ -179,8 +179,6 @@ void
|
||||
LIRGeneratorMIPS64::visitRandom(MRandom* ins)
|
||||
{
|
||||
LRandom *lir = new(alloc()) LRandom(temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
temp());
|
||||
|
||||
@@ -8220,9 +8220,9 @@ class LArrowNewTarget : public LInstructionHelper<BOX_PIECES, 1, 0>
|
||||
|
||||
// Math.random().
|
||||
#ifdef JS_PUNBOX64
|
||||
# define LRANDOM_NUM_TEMPS 6
|
||||
# define LRANDOM_NUM_TEMPS 4
|
||||
#else
|
||||
# define LRANDOM_NUM_TEMPS 11
|
||||
# define LRANDOM_NUM_TEMPS 7
|
||||
#endif
|
||||
|
||||
class LRandom : public LInstructionHelper<1, 0, LRANDOM_NUM_TEMPS>
|
||||
@@ -8230,12 +8230,10 @@ class LRandom : public LInstructionHelper<1, 0, LRANDOM_NUM_TEMPS>
|
||||
public:
|
||||
LIR_HEADER(Random)
|
||||
LRandom(const LDefinition &temp0, const LDefinition &temp1,
|
||||
const LDefinition &temp2, const LDefinition &temp3,
|
||||
const LDefinition &temp4, const LDefinition &temp5
|
||||
const LDefinition &temp2, const LDefinition &temp3
|
||||
#ifndef JS_PUNBOX64
|
||||
, const LDefinition &temp6, const LDefinition &temp7
|
||||
, const LDefinition &temp8, const LDefinition &temp9
|
||||
, const LDefinition &temp10
|
||||
, const LDefinition &temp4, const LDefinition &temp5
|
||||
, const LDefinition &temp6
|
||||
#endif
|
||||
)
|
||||
{
|
||||
@@ -8243,14 +8241,10 @@ class LRandom : public LInstructionHelper<1, 0, LRANDOM_NUM_TEMPS>
|
||||
setTemp(1, temp1);
|
||||
setTemp(2, temp2);
|
||||
setTemp(3, temp3);
|
||||
#ifndef JS_PUNBOX64
|
||||
setTemp(4, temp4);
|
||||
setTemp(5, temp5);
|
||||
#ifndef JS_PUNBOX64
|
||||
setTemp(6, temp6);
|
||||
setTemp(7, temp7);
|
||||
setTemp(8, temp8);
|
||||
setTemp(9, temp9);
|
||||
setTemp(10, temp10);
|
||||
#endif
|
||||
}
|
||||
const LDefinition* temp0() {
|
||||
@@ -8265,28 +8259,16 @@ class LRandom : public LInstructionHelper<1, 0, LRANDOM_NUM_TEMPS>
|
||||
const LDefinition* temp3() {
|
||||
return getTemp(3);
|
||||
}
|
||||
#ifndef JS_PUNBOX64
|
||||
const LDefinition* temp4() {
|
||||
return getTemp(4);
|
||||
}
|
||||
const LDefinition* temp5() {
|
||||
return getTemp(5);
|
||||
}
|
||||
#ifndef JS_PUNBOX64
|
||||
const LDefinition* temp6() {
|
||||
return getTemp(6);
|
||||
}
|
||||
const LDefinition* temp7() {
|
||||
return getTemp(7);
|
||||
}
|
||||
const LDefinition* temp8() {
|
||||
return getTemp(8);
|
||||
}
|
||||
const LDefinition* temp9() {
|
||||
return getTemp(9);
|
||||
}
|
||||
const LDefinition* temp10() {
|
||||
return getTemp(10);
|
||||
}
|
||||
#endif
|
||||
|
||||
MRandom* mir() const {
|
||||
|
||||
@@ -412,8 +412,6 @@ void
|
||||
LIRGeneratorX64::visitRandom(MRandom* ins)
|
||||
{
|
||||
LRandom *lir = new(alloc()) LRandom(temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
temp());
|
||||
|
||||
@@ -607,10 +607,6 @@ void
|
||||
LIRGeneratorX86::visitRandom(MRandom* ins)
|
||||
{
|
||||
LRandom *lir = new(alloc()) LRandom(temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
temp(),
|
||||
|
||||
@@ -31,15 +31,17 @@ const original = () => {
|
||||
var startTime = performance.now();
|
||||
const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
|
||||
const data = imageData.data;
|
||||
for (let i = 0; i < data.length; i += 4) {
|
||||
for (let j = 0; j < 10; j++) { // 10x for performance measurement
|
||||
for (let i = 0; i < data.length; i += 4) {
|
||||
data[i] = Math.random()*255; // red
|
||||
data[i + 1] = Math.random()*255; // green
|
||||
data[i + 2] = Math.random()*255; // blue
|
||||
data[i+3] = 255;
|
||||
}
|
||||
}
|
||||
ctx.putImageData(imageData, 0, 0);
|
||||
var endTime = performance.now();
|
||||
duration.innerHTML = "Total pixels: " + (data.length / 4) +" -- Time taken: " + (endTime - startTime) + " ms";
|
||||
duration.innerHTML = "Total pixels: " + (10 * data.length / 4) +" -- Time taken: " + (endTime - startTime) + " ms";
|
||||
};
|
||||
|
||||
const invert = () => {
|
||||
@@ -60,16 +62,18 @@ const grayscale = () => {
|
||||
var startTime = performance.now();
|
||||
const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
|
||||
const data = imageData.data;
|
||||
for (let i = 0; i < data.length; i += 4) {
|
||||
for (let j = 0; j < 10; j++) { // 10x for performance measurement
|
||||
for (let i = 0; i < data.length; i += 4) {
|
||||
const avg = Math.random()*255;
|
||||
data[i] = avg; // red
|
||||
data[i + 1] = avg; // green
|
||||
data[i + 2] = avg; // blue
|
||||
data[i+3] = 255;
|
||||
}
|
||||
}
|
||||
ctx.putImageData(imageData, 0, 0);
|
||||
var endTime = performance.now();
|
||||
duration.innerHTML = "Total pixels: " + (data.length / 4) +" -- Time taken: " + (endTime - startTime) + " ms";
|
||||
duration.innerHTML = "Total pixels: " + (10 * data.length / 4) +" -- Time taken: " + (endTime - startTime) + " ms";
|
||||
};
|
||||
|
||||
const sepia = () => {
|
||||
|
||||
@@ -32,12 +32,16 @@ namespace non_crypto {
|
||||
* the same speed and use half of the space; the same comments apply.
|
||||
* They are suitable only for low-scale parallel applications.
|
||||
*
|
||||
* The stream of numbers produced by this method repeats every 2**256 - 1 calls (i.e. never, for all practical
|
||||
* The stream of numbers produced by this method repeats every 2^128 - 1 calls (i.e. never, for all practical
|
||||
* purposes).
|
||||
*
|
||||
*/
|
||||
class Xoroshiro128PlusPlusRNG {
|
||||
uint64_t mState[2];
|
||||
/*
|
||||
* mState[0] and mState[1] are as-described in the Xoroshiro128++ paper.
|
||||
* mState[2] is used for temporary storage of the result in JIT code.
|
||||
*/
|
||||
uint64_t mState[3];
|
||||
|
||||
public:
|
||||
/*
|
||||
@@ -82,15 +86,15 @@ class Xoroshiro128PlusPlusRNG {
|
||||
|
||||
/*
|
||||
* Return a pseudo-random floating-point value in the range [0, 1). More
|
||||
* precisely, choose an integer in the range [0, 2**53) and divide it by
|
||||
* 2**53. Given the 2**256 - 1 period noted above, the produced doubles are
|
||||
* precisely, choose an integer in the range [0, 2^53) and divide it by
|
||||
* 2^53. Given the 2^128 - 1 period noted above, the produced doubles are
|
||||
* all but uniformly distributed in this range.
|
||||
*/
|
||||
double nextDouble() {
|
||||
/*
|
||||
* Because the IEEE 64-bit floating point format stores the leading '1' bit
|
||||
* of the mantissa implicitly, it effectively represents a mantissa in the
|
||||
* range [0, 2**53) in only 52 bits. FloatingPoint<double>::kExponentShift
|
||||
* range [0, 2^53) in only 52 bits. FloatingPoint<double>::kExponentShift
|
||||
* is the width of the bitfield in the in-memory format, so we must add one
|
||||
* to get the mantissa's range.
|
||||
*/
|
||||
@@ -109,6 +113,7 @@ class Xoroshiro128PlusPlusRNG {
|
||||
MOZ_ASSERT(aState0 || aState1);
|
||||
mState[0] = aState0;
|
||||
mState[1] = aState1;
|
||||
mState[2] = 0; // Could be left uninitialized, but we do this just-in-case.
|
||||
}
|
||||
|
||||
static size_t offsetOfState0() {
|
||||
@@ -117,6 +122,9 @@ class Xoroshiro128PlusPlusRNG {
|
||||
static size_t offsetOfState1() {
|
||||
return offsetof(Xoroshiro128PlusPlusRNG, mState[1]);
|
||||
}
|
||||
static size_t offsetOfState2() {
|
||||
return offsetof(Xoroshiro128PlusPlusRNG, mState[2]);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace non_crypto
|
||||
|
||||
Reference in New Issue
Block a user