Issue #1677 - Part 3: Create shim definitions for V8-specific code in new regexp implementation

This commit is contained in:
Matt A. Tobin
2020-11-09 20:20:18 -05:00
parent 19499014a3
commit cb0208163f
14 changed files with 1748 additions and 0 deletions
+1
View File
@@ -7,6 +7,7 @@
#ifdef V8_INTL_SUPPORT
#include "regexp/regexp-shim.h"
namespace v8 {
namespace internal {
+1
View File
@@ -5,6 +5,7 @@
#ifndef V8_REGEXP_REGEXP_AST_H_
#define V8_REGEXP_REGEXP_AST_H_
#include "regexp/regexp-shim.h"
namespace v8 {
namespace internal {
+1
View File
@@ -5,6 +5,7 @@
#ifndef V8_REGEXP_REGEXP_BYTECODE_PEEPHOLE_H_
#define V8_REGEXP_REGEXP_BYTECODE_PEEPHOLE_H_
#include "regexp/regexp-shim.h"
namespace v8 {
namespace internal {
+1
View File
@@ -5,6 +5,7 @@
#ifndef V8_REGEXP_REGEXP_BYTECODES_H_
#define V8_REGEXP_REGEXP_BYTECODES_H_
#include "regexp/regexp-shim.h"
namespace v8 {
namespace internal {
+1
View File
@@ -5,6 +5,7 @@
#ifndef V8_REGEXP_REGEXP_DOTPRINTER_H_
#define V8_REGEXP_REGEXP_DOTPRINTER_H_
#include "regexp/regexp-shim.h"
namespace v8 {
namespace internal {
@@ -0,0 +1,16 @@
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: set ts=8 sts=2 et sw=2 tw=80:
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// This file implements the NativeRegExpMacroAssembler interface for
// SpiderMonkey. It provides the same interface as each of V8's
// architecture-specific implementations.
#ifndef RegexpMacroAssemblerArch_h
#define RegexpMacroAssemblerArch_h
#include "regexp/regexp-shim.h"
#endif // RegexpMacroAssemblerArch_h
+1
View File
@@ -6,6 +6,7 @@
#define V8_REGEXP_REGEXP_MACRO_ASSEMBLER_H_
#include "regexp/regexp-ast.h"
#include "regexp/regexp-shim.h"
#include "regexp/regexp.h"
namespace v8 {
+72
View File
@@ -0,0 +1,72 @@
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: set ts=8 sts=2 et sw=2 tw=80:
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "regexp/regexp-shim.h"
namespace v8 {
namespace internal {
void PrintF(const char* format, ...) {
va_list arguments;
va_start(arguments, format);
vprintf(format, arguments);
va_end(arguments);
}
void PrintF(FILE* out, const char* format, ...) {
va_list arguments;
va_start(arguments, format);
vfprintf(out, format, arguments);
va_end(arguments);
}
// Origin:
// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/utils/ostreams.cc#L120-L169
// (This is a hand-simplified version.)
// Writes the given character to the output escaping everything outside
// of printable ASCII range.
std::ostream& operator<<(std::ostream& os, const AsUC16& c) {
uint16_t v = c.value;
bool isPrint = 0x20 < v && v <= 0x7e;
char buf[10];
const char* format = isPrint ? "%c" : (v <= 0xFF) ? "\\x%02x" : "\\u%04x";
snprintf(buf, sizeof(buf), format, v);
return os << buf;
}
std::ostream& operator<<(std::ostream& os, const AsUC32& c) {
int32_t v = c.value;
if (v <= String::kMaxUtf16CodeUnit) {
return os << AsUC16(v);
}
char buf[13];
snprintf(buf, sizeof(buf), "\\u{%06x}", v);
return os << buf;
}
DisallowJavascriptExecution::DisallowJavascriptExecution(Isolate* isolate)
: nojs_(isolate->cx()) {}
// TODO: Map flags to jitoptions
bool FLAG_correctness_fuzzer_suppressions = false;
bool FLAG_enable_regexp_unaligned_accesses = false;
bool FLAG_harmony_regexp_sequence = false;
bool FLAG_regexp_interpret_all = false;
bool FLAG_regexp_mode_modifiers = false;
bool FLAG_regexp_optimization = false;
bool FLAG_regexp_peephole_optimization = false;
bool FLAG_regexp_possessive_quantifier = false;
bool FLAG_regexp_tier_up = false;
bool FLAG_trace_regexp_assembler = false;
bool FLAG_trace_regexp_bytecodes = false;
bool FLAG_trace_regexp_parser = false;
bool FLAG_trace_regexp_peephole_optimization = false;
} // namespace internal
} // namespace v8
+999
View File
@@ -0,0 +1,999 @@
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: set ts=8 sts=2 et sw=2 tw=80:
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef RegexpShim_h
#define RegexpShim_h
#include "mozilla/Assertions.h"
#include "mozilla/Attributes.h"
#include "mozilla/MathAlgorithms.h"
#include "mozilla/Maybe.h"
#include "mozilla/Types.h"
#include <algorithm>
#include "jit/Label.h"
#include "js/Value.h"
#include "regexp/util/flags.h"
#include "regexp/util/vector.h"
#include "regexp/util/zone.h"
#include "vm/NativeObject.h"
// Forward declaration of classes
namespace v8 {
namespace internal {
class Heap;
class Isolate;
class RegExpMatchInfo;
class RegExpStack;
} // namespace internal
} // namespace v8
#define V8_WARN_UNUSED_RESULT MOZ_MUST_USE
#define V8_EXPORT_PRIVATE MOZ_EXPORT
#define V8_FALLTHROUGH MOZ_FALLTHROUGH
#define FATAL(x) MOZ_CRASH(x)
#define UNREACHABLE() MOZ_CRASH("unreachable code")
#define UNIMPLEMENTED() MOZ_CRASH("unimplemented code")
#define STATIC_ASSERT(exp) static_assert(exp, #exp)
#define DCHECK MOZ_ASSERT
#define DCHECK_EQ(lhs, rhs) MOZ_ASSERT((lhs) == (rhs))
#define DCHECK_NE(lhs, rhs) MOZ_ASSERT((lhs) != (rhs))
#define DCHECK_GT(lhs, rhs) MOZ_ASSERT((lhs) > (rhs))
#define DCHECK_GE(lhs, rhs) MOZ_ASSERT((lhs) >= (rhs))
#define DCHECK_LT(lhs, rhs) MOZ_ASSERT((lhs) < (rhs))
#define DCHECK_LE(lhs, rhs) MOZ_ASSERT((lhs) <= (rhs))
#define DCHECK_NULL(val) MOZ_ASSERT((val) == nullptr)
#define DCHECK_NOT_NULL(val) MOZ_ASSERT((val) != nullptr)
#define DCHECK_IMPLIES(lhs, rhs) MOZ_ASSERT_IF(lhs, rhs)
#define CHECK MOZ_RELEASE_ASSERT
template <class T>
static constexpr inline T Min(T t1, T t2) {
return t1 < t2 ? t1 : t2;
}
template <class T>
static constexpr inline T Max(T t1, T t2) {
return t1 > t2 ? t1 : t2;
}
#define MemCopy memcpy
// Origin:
// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L310-L319
// ptrdiff_t is 't' according to the standard, but MSVC uses 'I'.
#ifdef _MSC_VER
# define V8PRIxPTRDIFF "Ix"
# define V8PRIdPTRDIFF "Id"
# define V8PRIuPTRDIFF "Iu"
#else
# define V8PRIxPTRDIFF "tx"
# define V8PRIdPTRDIFF "td"
# define V8PRIuPTRDIFF "tu"
#endif
// Origin:
// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L27-L38
// The arraysize(arr) macro returns the # of elements in an array arr.
// The expression is a compile-time constant, and therefore can be
// used in defining new arrays, for example. If you use arraysize on
// a pointer by mistake, you will get a compile-time error.
#define arraysize(array) (sizeof(ArraySizeHelper(array)))
// This template function declaration is used in defining arraysize.
// Note that the function doesn't need an implementation, as we only
// use its type.
template <typename T, size_t N>
char (&ArraySizeHelper(T (&array)[N]))[N];
// Explicitly declare the assignment operator as deleted.
#define DISALLOW_ASSIGN(TypeName) TypeName& operator=(const TypeName&) = delete
// Explicitly declare the copy constructor and assignment operator as deleted.
// This also deletes the implicit move constructor and implicit move assignment
// operator, but still allows to manually define them.
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
TypeName(const TypeName&) = delete; \
DISALLOW_ASSIGN(TypeName)
// Explicitly declare all implicit constructors as deleted, namely the
// default constructor, copy constructor and operator= functions.
// This is especially useful for classes containing only static methods.
#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
TypeName() = delete; \
DISALLOW_COPY_AND_ASSIGN(TypeName)
namespace v8 {
// Origin:
// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L364-L367
template <typename T, typename U>
constexpr inline bool IsAligned(T value, U alignment) {
return (value & (alignment - 1)) == 0;
}
using byte = uint8_t;
using Address = uintptr_t;
static const Address kNullAddress = 0;
namespace base {
// Origin:
// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L247-L258
// The USE(x, ...) template is used to silence C++ compiler warnings
// issued for (yet) unused variables (typically parameters).
// The arguments are guaranteed to be evaluated from left to right.
struct Use {
template <typename T>
Use(T&&) {} // NOLINT(runtime/explicit)
};
#define USE(...) \
do { \
::v8::base::Use unused_tmp_array_for_use_macro[]{__VA_ARGS__}; \
(void)unused_tmp_array_for_use_macro; \
} while (false)
// Origin:
// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/safe_conversions.h#L35-L39
// saturated_cast<> is analogous to static_cast<> for numeric types, except
// that the specified numeric conversion will saturate rather than overflow or
// underflow.
template <typename Dst, typename Src>
inline Dst saturated_cast(Src value);
// This is the only specialization that is needed for regexp code.
// Instead of pulling in dozens of lines of template goo
// to derive it, I used the implementation from uint8_clamped in
// ArrayBufferObject.h.
template <>
inline uint8_t saturated_cast<uint8_t, int>(int x) {
return (x >= 0) ? ((x < 255) ? uint8_t(x) : 255) : 0;
}
namespace bits {
inline uint64_t CountTrailingZeros(uint64_t value) {
return mozilla::CountTrailingZeroes64(value);
}
} // namespace bits
} // namespace base
namespace unibrow {
using uchar = unsigned int;
// Origin:
// https://github.com/v8/v8/blob/1f1e4cdb04c75eab77adbecd5f5514ddc3eb56cf/src/strings/unicode.h#L133-L150
class Latin1 {
public:
static const uint16_t kMaxChar = 0xff;
// Convert the character to Latin-1 case equivalent if possible.
static inline uint16_t TryConvertToLatin1(uint16_t c) {
// "GREEK CAPITAL LETTER MU" case maps to "MICRO SIGN".
// "GREEK SMALL LETTER MU" case maps to "MICRO SIGN".
if (c == 0x039C || c == 0x03BC) {
return 0xB5;
}
// "LATIN CAPITAL LETTER Y WITH DIAERESIS" case maps to "LATIN SMALL LETTER
// Y WITH DIAERESIS".
if (c == 0x0178) {
return 0xFF;
}
return c;
}
};
// Origin:
// https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L99-L131
class Utf16 {
public:
static inline bool IsLeadSurrogate(int code) {
return js::unicode::IsLeadSurrogate(code);
}
static inline bool IsTrailSurrogate(int code) {
return js::unicode::IsTrailSurrogate(code);
}
static inline uint16_t LeadSurrogate(uint32_t char_code) {
return js::unicode::LeadSurrogate(char_code);
}
static inline uint16_t TrailSurrogate(uint32_t char_code) {
return js::unicode::TrailSurrogate(char_code);
}
static inline uint32_t CombineSurrogatePair(char16_t lead, char16_t trail) {
return js::unicode::UTF16Decode(lead, trail);
}
static const uchar kMaxNonSurrogateCharCode = 0xffff;
};
// A cache used in case conversion. It caches the value for characters
// that either have no mapping or map to a single character independent
// of context. Characters that map to more than one character or that
// map differently depending on context are always looked up.
// Origin:
// https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L64-L88
template <class T, int size = 256>
class Mapping {
public:
inline Mapping() = default;
int get(uchar c, uchar n, uchar* result);
private:
friend class Test;
int CalculateValue(uchar c, uchar n, uchar* result);
struct CacheEntry {
inline CacheEntry() : code_point_(kNoChar), offset_(0) {}
inline CacheEntry(uchar code_point, signed offset)
: code_point_(code_point), offset_(offset) {}
uchar code_point_;
signed offset_;
static const int kNoChar = (1 << 21) - 1;
};
static const int kSize = size;
static const int kMask = kSize - 1;
CacheEntry entries_[kSize];
};
// Origin:
// https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L241-L252
struct Ecma262Canonicalize {
static const int kMaxWidth = 1;
static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr);
};
struct Ecma262UnCanonicalize {
static const int kMaxWidth = 1;
static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr);
};
struct CanonicalizationRange {
static const int kMaxWidth = 1;
static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr);
};
} // namespace unibrow
namespace internal {
#define PRINTF_FORMAT(x, y) MOZ_FORMAT_PRINTF(x, y)
void PRINTF_FORMAT(1, 2) PrintF(const char* format, ...);
void PRINTF_FORMAT(2, 3) PrintF(FILE* out, const char* format, ...);
// Superclass for classes only using static method functions.
// The subclass of AllStatic cannot be instantiated at all.
class AllStatic {
#ifdef DEBUG
public:
AllStatic() = delete;
#endif
};
// Superclass for classes managed with new and delete.
// In irregexp, this is only AlternativeGeneration (in regexp-compiler.cc)
// Compare:
// https://github.com/v8/v8/blob/7b3332844212d78ee87a9426f3a6f7f781a8fbfa/src/utils/allocation.cc#L88-L96
class Malloced {
public:
static void* operator new(size_t size) {
js::AutoEnterOOMUnsafeRegion oomUnsafe;
void* result = js_malloc(size);
if (!result) {
oomUnsafe.crash("Irregexp Malloced shim");
}
return result;
}
static void operator delete(void* p) { js_free(p); }
};
constexpr int32_t KB = 1024;
constexpr int32_t MB = 1024 * 1024;
#define kMaxInt JSVAL_INT_MAX
#define kMinInt JSVAL_INT_MIN
constexpr int kSystemPointerSize = sizeof(void*);
// The largest integer n such that n and n + 1 are both exactly
// representable as a Number value. ES6 section 20.1.2.6
constexpr double kMaxSafeInteger = 9007199254740991.0; // 2^53-1
// Latin1/UTF-16 constants
// Code-point values in Unicode 4.0 are 21 bits wide.
// Code units in UTF-16 are 16 bits wide.
using uc16 = uint16_t;
using uc32 = int32_t;
constexpr int kBitsPerByte = 8;
constexpr int kBitsPerByteLog2 = 3;
constexpr int kUInt32Size = sizeof(uint32_t);
constexpr int kInt64Size = sizeof(int64_t);
constexpr int kUC16Size = sizeof(uc16);
inline constexpr bool IsDecimalDigit(uc32 c) { return c >= '0' && c <= '9'; }
inline bool is_uint24(int val) { return (val & 0x00ffffff) == val; }
inline bool IsIdentifierStart(uc32 c) {
return js::unicode::IsIdentifierStart(uint32_t(c));
}
inline bool IsIdentifierPart(uc32 c) {
return js::unicode::IsIdentifierPart(uint32_t(c));
}
// Wrappers to disambiguate uint16_t and uc16.
struct AsUC16 {
explicit AsUC16(uint16_t v) : value(v) {}
uint16_t value;
};
struct AsUC32 {
explicit AsUC32(int32_t v) : value(v) {}
int32_t value;
};
class StdoutStream : public std::ostream {};
std::ostream& operator<<(std::ostream& os, const AsUC16& c);
std::ostream& operator<<(std::ostream& os, const AsUC32& c);
// Reuse existing Maybe implementation
using mozilla::Maybe;
template <typename T>
Maybe<T> Just(const T& value) {
return mozilla::Some(value);
}
template <typename T>
mozilla::Nothing Nothing() {
return mozilla::Nothing();
}
// Origin:
// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/utils/utils.h#L600-L642
// Compare 8bit/16bit chars to 8bit/16bit chars.
// Used indirectly by regexp-interpreter.cc
template <typename lchar, typename rchar>
inline int CompareCharsUnsigned(const lchar* lhs, const rchar* rhs,
size_t chars) {
const lchar* limit = lhs + chars;
if (sizeof(*lhs) == sizeof(char) && sizeof(*rhs) == sizeof(char)) {
// memcmp compares byte-by-byte, yielding wrong results for two-byte
// strings on little-endian systems.
return memcmp(lhs, rhs, chars);
}
while (lhs < limit) {
int r = static_cast<int>(*lhs) - static_cast<int>(*rhs);
if (r != 0) return r;
++lhs;
++rhs;
}
return 0;
}
template <typename lchar, typename rchar>
inline int CompareChars(const lchar* lhs, const rchar* rhs, size_t chars) {
DCHECK_LE(sizeof(lchar), 2);
DCHECK_LE(sizeof(rchar), 2);
if (sizeof(lchar) == 1) {
if (sizeof(rchar) == 1) {
return CompareCharsUnsigned(reinterpret_cast<const uint8_t*>(lhs),
reinterpret_cast<const uint8_t*>(rhs), chars);
} else {
return CompareCharsUnsigned(reinterpret_cast<const uint8_t*>(lhs),
reinterpret_cast<const uint16_t*>(rhs),
chars);
}
} else {
if (sizeof(rchar) == 1) {
return CompareCharsUnsigned(reinterpret_cast<const uint16_t*>(lhs),
reinterpret_cast<const uint8_t*>(rhs), chars);
} else {
return CompareCharsUnsigned(reinterpret_cast<const uint16_t*>(lhs),
reinterpret_cast<const uint16_t*>(rhs),
chars);
}
}
}
// Origin:
// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/utils/utils.h#L40-L48
// Returns the value (0 .. 15) of a hexadecimal character c.
// If c is not a legal hexadecimal character, returns a value < 0.
// Used in regexp-parser.cc
inline int HexValue(uc32 c) {
c -= '0';
if (static_cast<unsigned>(c) <= 9) return c;
c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36.
if (static_cast<unsigned>(c) <= 5) return c + 10;
return -1;
}
// V8::Object ~= JS::Value
class Object {
public:
// The default object constructor in V8 stores a nullptr,
// which has its low bit clear and is interpreted as Smi(0).
constexpr Object() : value_(JS::Int32Value(0)) {}
// Conversions to/from SpiderMonkey types
constexpr Object(JS::Value value) : value_(value) {}
operator JS::Value() const { return value_; }
// Used in regexp-macro-assembler.cc and regexp-interpreter.cc to
// check the return value of isolate->stack_guard()->HandleInterrupts()
// In V8, this will be either an exception object or undefined.
// In SM, we store the exception in the context, so we can use our normal
// idiom: return false iff we are throwing an exception.
inline bool IsException(Isolate*) const { return !value_.toBoolean(); }
// SpiderMonkey tries to avoid leaking the internal representation of its
// objects. V8 is not so strict. These functions are used when calling /
// being called by native code: objects are converted to Addresses for the
// call, then cast back to objects on the other side.
// We might be able to upstream a patch that eliminates the need for these.
Object(Address bits);
Address ptr() const;
protected:
JS::Value value_;
};
class Smi : public Object {
public:
static Smi FromInt(int32_t value) {
Smi smi;
smi.value_ = JS::Int32Value(value);
return smi;
}
static inline int32_t ToInt(const Object object) {
return JS::Value(object).toInt32();
}
};
// V8::HeapObject ~= JSObject
class HeapObject : public Object {
public:
// Only used for bookkeeping of total code generated in regexp-compiler.
// We may be able to refactor this away.
int Size() const;
};
// A fixed-size array with Objects (aka Values) as element types
// Implemented as a wrapper around a regular native object with dense elements.
class FixedArray : public HeapObject {
public:
inline void set(uint32_t index, Object value) {
JS::Value(*this).toObject().as<js::NativeObject>().setDenseElement(index,
value);
}
};
// A fixed-size array of bytes.
// TODO: figure out the best implementation for this. Uint8Array might work,
// but it's not currently visible outside of TypedArrayObject.cpp.
class ByteArray : public HeapObject {
public:
uint8_t get(uint32_t index);
void set(uint32_t index, uint8_t val);
uint32_t length();
byte* GetDataStartAddress();
byte* GetDataEndAddress();
static ByteArray cast(Object object);
};
// Like Handles in SM, V8 handles are references to marked pointers.
// Unlike SM, where Rooted pointers are created individually on the
// stack, the target of a V8 handle lives in a HandleScope.
// HandleScopes are created on the stack and register themselves with
// the isolate (~= JSContext). Whenever a Handle is created, the
// outermost HandleScope is retrieved from the isolate, and a new root
// is created in that HandleScope. The Handle remains valid for the
// lifetime of the HandleScope.
class HandleScope {
public:
HandleScope(Isolate* isolate);
};
// Origin:
// https://github.com/v8/v8/blob/5792f3587116503fc047d2f68c951c72dced08a5/src/handles/handles.h#L88-L171
template <typename T>
class Handle {
public:
Handle();
Handle(T object, Isolate* isolate);
// Constructor for handling automatic up casting.
template <typename S, typename = typename std::enable_if<
std::is_convertible<S*, T*>::value>::type>
/*inline*/ Handle(Handle<S> handle);
template <typename S>
/*inline*/ static const Handle<T> cast(Handle<S> that);
T* operator->() const;
T operator*() const;
bool is_null() const;
Address address();
private:
template <typename>
friend class Handle;
template <typename>
friend class MaybeHandle;
};
// A Handle can be converted into a MaybeHandle. Converting a MaybeHandle
// into a Handle requires checking that it does not point to nullptr. This
// ensures nullptr checks before use.
//
// Also note that Handles do not provide default equality comparison or hashing
// operators on purpose. Such operators would be misleading, because intended
// semantics is ambiguous between Handle location and object identity.
// Origin:
// https://github.com/v8/v8/blob/5792f3587116503fc047d2f68c951c72dced08a5/src/handles/maybe-handles.h#L15-L78
template <typename T>
class MaybeHandle final {
public:
MaybeHandle() = default;
// Constructor for handling automatic up casting from Handle.
// Ex. Handle<JSArray> can be passed when MaybeHandle<Object> is expected.
template <typename S, typename = typename std::enable_if<
std::is_convertible<S*, T*>::value>::type>
MaybeHandle(Handle<S> handle);
/*inline*/ Handle<T> ToHandleChecked() const;
// Convert to a Handle with a type that can be upcasted to.
template <typename S>
/*inline*/ bool ToHandle(Handle<S>* out) const;
};
// From v8/src/handles/handles-inl.h
template <typename T>
inline Handle<T> handle(T object, Isolate* isolate) {
return Handle<T>(object, isolate);
}
// RAII Guard classes
class DisallowHeapAllocation {
public:
DisallowHeapAllocation() {}
operator const JS::AutoAssertNoGC&() const { return no_gc_; }
private:
const JS::AutoAssertNoGC no_gc_;
};
// This is used inside DisallowHeapAllocation regions to enable
// allocation just before throwing an exception, to allocate the
// exception object. Specifically, it only ever guards:
// - isolate->stack_guard()->HandleInterrupts()
// - isolate->StackOverflow()
// Those cases don't allocate in SpiderMonkey, so this can be a no-op.
class AllowHeapAllocation {
public:
// Empty constructor to avoid unused_variable warnings
AllowHeapAllocation() {}
};
class DisallowJavascriptExecution {
public:
DisallowJavascriptExecution(Isolate* isolate);
private:
js::AutoAssertNoContentJS nojs_;
};
// Origin:
// https://github.com/v8/v8/blob/84f3877c15bc7f8956d21614da4311337525a3c8/src/objects/string.h#L83-L474
class String : public HeapObject {
private:
JSString* str() const { return value_.toString(); }
public:
operator JSString*() const { return str(); }
// Max char codes.
static const int32_t kMaxOneByteCharCode = unibrow::Latin1::kMaxChar;
static const uint32_t kMaxOneByteCharCodeU = unibrow::Latin1::kMaxChar;
static const int kMaxUtf16CodeUnit = 0xffff;
static const uc32 kMaxCodePoint = 0x10ffff;
MOZ_ALWAYS_INLINE int length() const { return str()->length(); }
uint16_t Get(uint32_t index);
bool IsFlat() { return str()->isLinear(); };
// These are only used in V8 functions that I want to rewrite.
// TODO: Rewrite those functions and delete this
bool IsConsString();
bool IsExternalString();
bool IsExternalOneByteString();
bool IsExternalTwoByteString();
bool IsSeqString();
bool IsSeqOneByteString();
bool IsSeqTwoByteString();
bool IsSlicedString();
bool IsThinString();
// Origin:
// https://github.com/v8/v8/blob/84f3877c15bc7f8956d21614da4311337525a3c8/src/objects/string.h#L95-L152
class FlatContent {
public:
FlatContent(JSLinearString* string, const DisallowHeapAllocation& no_gc)
: string_(string), no_gc_(no_gc) {}
inline bool IsOneByte() const { return string_->hasLatin1Chars(); }
inline bool IsTwoByte() const { return !string_->hasLatin1Chars(); }
Vector<const uint8_t> ToOneByteVector() const {
MOZ_ASSERT(IsOneByte());
return Vector<const uint8_t>(string_->latin1Chars(no_gc_),
string_->length());
}
Vector<const uc16> ToUC16Vector() const;
// TODO: twoByteChars returns char16_t*, but uc16 is uint16_t, which is
// not compatible :( :( :(
// {
// MOZ_ASSERT(IsTwoByte());
// return Vector<const uc16>(string_->twoByteChars(no_gc_),
// string_->length());
// }
private:
const JSLinearString* string_;
const JS::AutoAssertNoGC& no_gc_;
};
FlatContent GetFlatContent(const DisallowHeapAllocation& no_gc) {
MOZ_ASSERT(IsFlat());
return FlatContent(&str()->asLinear(), no_gc);
}
static Handle<String> Flatten(Isolate* isolate, Handle<String> string);
inline static String cast(Object object) {
String s;
s.value_ = JS::StringValue(JS::Value(object).toString());
return s;
}
inline static bool IsOneByteRepresentationUnderneath(String string) {
return string.str()->hasLatin1Chars();
}
inline bool IsOneByteRepresentation() const {
return str()->hasLatin1Chars();
}
std::unique_ptr<char[]> ToCString();
template <typename Char>
Vector<const Char> GetCharVector(const DisallowHeapAllocation& no_gc);
};
// A flat string reader provides random access to the contents of a
// string independent of the character width of the string. The handle
// must be valid as long as the reader is being used.
// Origin:
// https://github.com/v8/v8/blob/84f3877c15bc7f8956d21614da4311337525a3c8/src/objects/string.h#L807-L825
class MOZ_STACK_CLASS FlatStringReader {
public:
FlatStringReader(JSAtom* string) : string_(string) {}
int length() { return string_->length(); }
inline char16_t Get(size_t index) {
return string_->latin1OrTwoByteChar(index);
}
private:
JSAtom* string_;
JS::AutoCheckCannotGC nogc;
};
//////////////////////////////////////////////////
// TODO: Refactor NativeRegExpMacroAssembler and delete all of these:
class ConsString : public String {
public:
String first();
String second();
static ConsString cast(Object object);
};
class ExternalOneByteString : public String {
public:
const uint8_t* GetChars();
static ExternalOneByteString cast(Object object);
};
class ExternalTwoByteString : public String {
public:
const uc16* GetChars();
static ExternalTwoByteString cast(Object object);
};
class SeqOneByteString : public String {
public:
uint8_t* GetChars(const DisallowHeapAllocation& no_gc);
static SeqOneByteString cast(Object object);
};
class SeqTwoByteString : public String {
public:
uc16* GetChars(const DisallowHeapAllocation& no_gc);
static SeqTwoByteString cast(Object object);
static constexpr size_t kMaxCharsSize = JSString::MAX_LENGTH * 2;
};
class SlicedString : public String {
public:
String parent();
int offset();
static SlicedString cast(Object object);
};
class ThinString : public String {
public:
String actual();
static ThinString cast(Object object);
};
class StringShape {
public:
explicit StringShape(const String s);
bool IsCons();
bool IsSliced();
bool IsThin();
};
// End of "TODO: Delete all of these"
//////////////////////////////////////////////////
class JSRegExp : public HeapObject {
public:
// ******************************************************
// Methods that are called from inside the implementation
// ******************************************************
void TierUpTick();
bool MarkedForTierUp() const;
Object Code(bool is_latin1) const;
Object Bytecode(bool is_latin1) const;
uint32_t BacktrackLimit() const;
static JSRegExp cast(Object object);
// ******************************
// Static constants
// ******************************
// Meaning of Type:
// NOT_COMPILED: Initial value. No data has been stored in the JSRegExp yet.
// ATOM: A simple string to match against using an indexOf operation.
// IRREGEXP: Compiled with Irregexp.
enum Type { NOT_COMPILED, ATOM, IRREGEXP };
// Maximum number of captures allowed.
static constexpr int kMaxCaptures = 1 << 16;
// **************************************************
// JSRegExp::Flags
// **************************************************
struct FlagShiftBit {
static constexpr int kGlobal = 0;
static constexpr int kIgnoreCase = 1;
static constexpr int kMultiline = 2;
static constexpr int kSticky = 3;
static constexpr int kUnicode = 4;
static constexpr int kDotAll = 5;
static constexpr int kInvalid = 6;
};
enum Flag : uint8_t {
kNone = 0,
kGlobal = 1 << FlagShiftBit::kGlobal,
kIgnoreCase = 1 << FlagShiftBit::kIgnoreCase,
kMultiline = 1 << FlagShiftBit::kMultiline,
kSticky = 1 << FlagShiftBit::kSticky,
kUnicode = 1 << FlagShiftBit::kUnicode,
kDotAll = 1 << FlagShiftBit::kDotAll,
kInvalid = 1 << FlagShiftBit::kInvalid, // Not included in FlagCount.
};
using Flags = base::Flags<Flag>;
static constexpr int kFlagCount = 6;
static constexpr int kNoBacktrackLimit = 0;
};
class Histogram {
public:
inline void AddSample(int sample) {}
};
class Counters {
public:
Histogram* regexp_backtracks() { return &regexp_backtracks_; }
private:
Histogram regexp_backtracks_;
};
#define PROFILE(isolate, call) \
do { \
} while (false);
enum class AllocationType : uint8_t {
kYoung, // Allocate in the nursery
kOld, // Allocate in the tenured heap
};
using StackGuard = Isolate;
using Factory = Isolate;
class Isolate {
public:
//********** Isolate code **********//
RegExpStack* regexp_stack() const { return regexp_stack_; }
bool has_pending_exception() { return cx()->isExceptionPending(); }
void StackOverflow() { js::ReportOverRecursed(cx()); }
unibrow::Mapping<unibrow::Ecma262UnCanonicalize>* jsregexp_uncanonicalize();
unibrow::Mapping<unibrow::Ecma262Canonicalize>*
regexp_macro_assembler_canonicalize();
unibrow::Mapping<unibrow::CanonicalizationRange>* jsregexp_canonrange();
// An empty stub for telemetry we don't support
void IncreaseTotalRegexpCodeGenerated(int size) {}
Counters* counters() { return &counters_; }
//********** Factory code **********//
inline Factory* factory() { return this; }
Handle<ByteArray> NewByteArray(
int length, AllocationType allocation = AllocationType::kYoung);
MOZ_MUST_USE MaybeHandle<String> NewStringFromOneByte(
const Vector<const uint8_t>& str,
AllocationType allocation = AllocationType::kYoung);
// Allocates a fixed array initialized with undefined values.
Handle<FixedArray> NewFixedArray(
int length, AllocationType allocation = AllocationType::kYoung);
template <typename Char>
Handle<String> InternalizeString(const Vector<const Char>& str);
//********** Stack guard code **********//
inline StackGuard* stack_guard() { return this; }
Object HandleInterrupts() {
return Object(JS::BooleanValue(cx()->handleInterrupt()));
}
JSContext* cx() const { return cx_; }
private:
JSContext* cx_;
RegExpStack* regexp_stack_;
Counters counters_;
};
// Origin:
// https://github.com/v8/v8/blob/50dcf2af54ce27801a71c47c1be1d2c5e36b0dd6/src/execution/isolate.h#L1909-L1931
class StackLimitCheck {
public:
StackLimitCheck(Isolate* isolate) : cx_(isolate->cx()) {}
// Use this to check for stack-overflows in C++ code.
bool HasOverflowed() { return !CheckRecursionLimitDontReport(cx_); }
// Use this to check for interrupt request in C++ code.
bool InterruptRequested() { return cx_->hasAnyPendingInterrupt(); }
// Use this to check for stack-overflow when entering runtime from JS code.
bool JsHasOverflowed() {
return !CheckRecursionLimitConservativeDontReport(cx_);
}
private:
JSContext* cx_;
};
class Code {
public:
bool operator!=(Code& other) const;
Address raw_instruction_start();
Address raw_instruction_end();
Address address();
static Code cast(Object object);
};
// GeneratedCode provides an interface for calling into jit code.
// It will probably require additional work to hook this up to the
// arm simulator.
// Origin:
// https://github.com/v8/v8/blob/abfbe7687edb5b2dffe0b33b24e0a41bb86a8214/src/execution/simulator.h#L96-L164
template <typename Return, typename... Args>
class GeneratedCode {
public:
using Signature = Return(Args...);
static GeneratedCode FromCode(Code code); // TODO: implement
Return Call(Args... args) { return fn_ptr_(args...); }
private:
friend class GeneratedCode<Return(Args...)>;
Isolate* isolate_;
Signature* fn_ptr_;
GeneratedCode(Isolate* isolate, Signature* fn_ptr)
: isolate_(isolate), fn_ptr_(fn_ptr) {}
};
// Allow to use {GeneratedCode<ret(arg1, arg2)>} instead of
// {GeneratedCode<ret, arg1, arg2>}.
template <typename Return, typename... Args>
class GeneratedCode<Return(Args...)> : public GeneratedCode<Return, Args...> {
public:
// Automatically convert from {GeneratedCode<ret, arg1, arg2>} to
// {GeneratedCode<ret(arg1, arg2)>}.
GeneratedCode(GeneratedCode<Return, Args...> other)
: GeneratedCode<Return, Args...>(other.isolate_, other.fn_ptr_) {}
};
enum class MessageTemplate { kStackOverflow };
class MessageFormatter {
public:
static const char* TemplateString(MessageTemplate index) {
switch (index) {
case MessageTemplate::kStackOverflow:
return "too much recursion";
}
}
};
// Origin: https://github.com/v8/v8/blob/master/src/codegen/label.h
class Label {
public:
Label() : inner_(js::jit::Label()) {}
operator js::jit::Label*() { return &inner_; }
void Unuse() { inner_.reset(); }
bool is_linked() { return inner_.used(); }
bool is_bound() { return inner_.bound(); }
bool is_unused() { return !inner_.used() && !inner_.bound(); }
int pos() { return inner_.offset(); }
void link_to(int pos) { inner_.use(pos); }
void bind_to(int pos) { inner_.bind(pos); }
private:
js::jit::Label inner_;
};
// TODO: Map flags to jitoptions
extern bool FLAG_correctness_fuzzer_suppressions;
extern bool FLAG_enable_regexp_unaligned_accesses;
extern bool FLAG_harmony_regexp_sequence;
extern bool FLAG_regexp_interpret_all;
extern bool FLAG_regexp_mode_modifiers;
extern bool FLAG_regexp_optimization;
extern bool FLAG_regexp_peephole_optimization;
extern bool FLAG_regexp_possessive_quantifier;
extern bool FLAG_regexp_tier_up;
extern bool FLAG_trace_regexp_assembler;
extern bool FLAG_trace_regexp_bytecodes;
extern bool FLAG_trace_regexp_parser;
extern bool FLAG_trace_regexp_peephole_optimization;
} // namespace internal
} // namespace v8
#endif // RegexpShim_h
+1
View File
@@ -5,6 +5,7 @@
#ifndef V8_REGEXP_REGEXP_STACK_H_
#define V8_REGEXP_REGEXP_STACK_H_
#include "regexp/regexp-shim.h"
namespace v8 {
namespace internal {
+1
View File
@@ -5,6 +5,7 @@
#ifndef V8_REGEXP_REGEXP_H_
#define V8_REGEXP_REGEXP_H_
#include "regexp/regexp-shim.h"
namespace v8 {
namespace internal {
+93
View File
@@ -0,0 +1,93 @@
// Copyright 2014 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_UTIL_FLAGS_H_
#define V8_UTIL_FLAGS_H_
// Origin: https://github.com/v8/v8/blob/1bafcc6b999b23ea1d394f5d267a08183e3c4e19/src/base/flags.h#L15-L90
namespace v8 {
namespace base {
// The Flags class provides a type-safe way of storing OR-combinations of enum
// values. The Flags<T, S> class is a template class, where T is an enum type,
// and S is the underlying storage type (usually int).
//
// The traditional C++ approach for storing OR-combinations of enum values is to
// use an int or unsigned int variable. The inconvenience with this approach is
// that there's no type checking at all; any enum value can be OR'd with any
// other enum value and passed on to a function that takes an int or unsigned
// int.
template <typename T, typename S = int>
class Flags final {
public:
using flag_type = T;
using mask_type = S;
constexpr Flags() : mask_(0) {}
constexpr Flags(flag_type flag)
: mask_(static_cast<S>(flag)) {}
constexpr explicit Flags(mask_type mask) : mask_(static_cast<S>(mask)) {}
constexpr bool operator==(flag_type flag) const {
return mask_ == static_cast<S>(flag);
}
constexpr bool operator!=(flag_type flag) const {
return mask_ != static_cast<S>(flag);
}
Flags& operator&=(const Flags& flags) {
mask_ &= flags.mask_;
return *this;
}
Flags& operator|=(const Flags& flags) {
mask_ |= flags.mask_;
return *this;
}
Flags& operator^=(const Flags& flags) {
mask_ ^= flags.mask_;
return *this;
}
constexpr Flags operator&(const Flags& flags) const {
return Flags(mask_ & flags.mask_);
}
constexpr Flags operator|(const Flags& flags) const {
return Flags(mask_ | flags.mask_);
}
constexpr Flags operator^(const Flags& flags) const {
return Flags(mask_ ^ flags.mask_);
}
Flags& operator&=(flag_type flag) { return operator&=(Flags(flag)); }
Flags& operator|=(flag_type flag) { return operator|=(Flags(flag)); }
Flags& operator^=(flag_type flag) { return operator^=(Flags(flag)); }
constexpr Flags operator&(flag_type flag) const {
return operator&(Flags(flag));
}
constexpr Flags operator|(flag_type flag) const {
return operator|(Flags(flag));
}
constexpr Flags operator^(flag_type flag) const {
return operator^(Flags(flag));
}
constexpr Flags operator~() const { return Flags(~mask_); }
constexpr operator mask_type() const { return mask_; }
constexpr bool operator!() const { return !mask_; }
Flags without(flag_type flag) { return *this & (~Flags(flag)); }
friend size_t hash_value(const Flags& flags) { return flags.mask_; }
private:
mask_type mask_;
};
} // namespace base
} // namespace v8
#endif // V8_UTIL_FLAG_H_
+204
View File
@@ -0,0 +1,204 @@
// Copyright 2014 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_UTIL_VECTOR_H_
#define V8_UTIL_VECTOR_H_
#include <algorithm>
#include <cstring>
#include <iterator>
#include <memory>
#include "js/Utility.h"
namespace v8 {
namespace internal {
//////////////////////////////////////////////////
// Adapted from: https://github.com/v8/v8/blob/5f69bbc233c2d1baf149faf869a7901603929914/src/utils/allocation.h#L36-L58
template <typename T>
T* NewArray(size_t size) {
static_assert(std::is_pod<T>::value, "");
js::AutoEnterOOMUnsafeRegion oomUnsafe;
T* result = static_cast<T*>(js_malloc(size * sizeof(T)));
if (!result) {
oomUnsafe.crash("Irregexp NewArray");
}
return result;
}
template <typename T>
void DeleteArray(T* array) {
js_free(array);
}
//////////////////////////////////////////////////
// A non-resizable vector containing a pointer and a length.
// The Vector may or may not own the pointer, depending on context.
// Origin:
// https://github.com/v8/v8/blob/5f69bbc233c2d1baf149faf869a7901603929914/src/utils/vector.h#L20-L134
template <typename T>
class Vector {
public:
constexpr Vector() : start_(nullptr), length_(0) {}
constexpr Vector(T* data, size_t length) : start_(data), length_(length) {
MOZ_ASSERT_IF(length != 0, data != nullptr);
}
static Vector<T> New(size_t length) {
return Vector<T>(NewArray<T>(length), length);
}
// Returns a vector using the same backing storage as this one,
// spanning from and including 'from', to but not including 'to'.
Vector<T> SubVector(size_t from, size_t to) const {
MOZ_ASSERT(from < to);
MOZ_ASSERT(to < length_);
return Vector<T>(begin() + from, to - from);
}
// Returns the length of the vector. Only use this if you really need an
// integer return value. Use {size()} otherwise.
int length() const {
MOZ_ASSERT(length_ <= std::numeric_limits<int>::max());
return static_cast<int>(length_);
}
// Returns the length of the vector as a size_t.
constexpr size_t size() const { return length_; }
// Returns whether or not the vector is empty.
constexpr bool empty() const { return length_ == 0; }
// Access individual vector elements - checks bounds in debug mode.
T& operator[](size_t index) const {
MOZ_ASSERT(index < length_);
return start_[index];
}
const T& at(size_t index) const { return operator[](index); }
T& first() { return start_[0]; }
T& last() {
MOZ_ASSERT(length_ > 0);
return start_[length_ - 1];
}
// Returns a pointer to the start of the data in the vector.
constexpr T* begin() const { return start_; }
// Returns a pointer past the end of the data in the vector.
constexpr T* end() const { return start_ + length_; }
// Returns a clone of this vector with a new backing store.
Vector<T> Clone() const {
T* result = NewArray<T>(length_);
for (size_t i = 0; i < length_; i++) result[i] = start_[i];
return Vector<T>(result, length_);
}
void Truncate(size_t length) {
MOZ_ASSERT(length <= length_);
length_ = length;
}
// Releases the array underlying this vector. Once disposed the
// vector is empty.
void Dispose() {
DeleteArray(start_);
start_ = nullptr;
length_ = 0;
}
Vector<T> operator+(size_t offset) {
MOZ_ASSERT(offset <= length_);
return Vector<T>(start_ + offset, length_ - offset);
}
Vector<T> operator+=(size_t offset) {
MOZ_ASSERT(offset <= length_);
start_ += offset;
length_ -= offset;
return *this;
}
// Implicit conversion from Vector<T> to Vector<const T>.
inline operator Vector<const T>() const {
return Vector<const T>::cast(*this);
}
template <typename S>
static constexpr Vector<T> cast(Vector<S> input) {
return Vector<T>(reinterpret_cast<T*>(input.begin()),
input.length() * sizeof(S) / sizeof(T));
}
bool operator==(const Vector<const T> other) const {
if (length_ != other.length_) return false;
if (start_ == other.start_) return true;
for (size_t i = 0; i < length_; ++i) {
if (start_[i] != other.start_[i]) {
return false;
}
}
return true;
}
private:
T* start_;
size_t length_;
};
// The resulting vector does not contain a null-termination byte. If you want
// the null byte, use ArrayVector("foo").
inline Vector<const char> CStrVector(const char* data) {
return Vector<const char>(data, strlen(data));
}
} // namespace internal
namespace base {
// SmallVector uses inline storage first, and reallocates when full.
// It is basically equivalent to js::Vector, and is implemented
// as a thin wrapper.
// V8's implementation: https://github.com/v8/v8/blob/master/src/base/small-vector.h
template <typename T, size_t kSize>
class SmallVector {
public:
inline bool empty() const { return inner_.empty(); }
inline const T& back() const { return inner_.back(); }
inline void pop_back() { inner_.popBack(); };
template <typename... Args>
inline void emplace_back(Args&&... args) {
js::AutoEnterOOMUnsafeRegion oomUnsafe;
if (!inner_.emplaceBack(args...)) {
oomUnsafe.crash("Irregexp SmallVector emplace_back");
}
};
inline size_t size() const { return inner_.length(); }
inline const T& at(size_t index) const { return inner_[index]; }
void resize_no_init(size_t new_size) {
js::AutoEnterOOMUnsafeRegion oomUnsafe;
if (!inner_.resizeUninitialized(new_size)) {
oomUnsafe.crash("Irregexp SmallVector resize");
}
}
private:
js::Vector<T, kSize, js::SystemAllocPolicy> inner_;
};
} // namespace base
} // namespace v8
#endif // V8_UTIL_VECTOR_H_
+356
View File
@@ -0,0 +1,356 @@
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_UTIL_ZONE_H_
#define V8_UTIL_ZONE_H_
#include <list>
#include <map>
#include <set>
#include <unordered_map>
#include <vector>
#include "ds/LifoAlloc.h"
#include "regexp/util/vector.h"
namespace v8 {
namespace internal {
// V8::Zone ~= LifoAlloc
class Zone {
public:
Zone(size_t defaultChunkSize) : lifoAlloc_(defaultChunkSize) {
lifoAlloc_.setAsInfallibleByDefault();
}
void* New(size_t size) { return lifoAlloc_.alloc(size); }
void DeleteAll() { lifoAlloc_.freeAll(); }
// Returns true if the total memory allocated exceeds a threshold.
static const size_t kExcessLimit = 256 * 1024 * 1024;
bool excess_allocation() const {
return lifoAlloc_.peakSizeOfExcludingThis() > kExcessLimit;
}
private:
js::LifoAlloc lifoAlloc_;
};
// Superclass for classes allocated in a Zone.
// Origin: https://github.com/v8/v8/blob/7b3332844212d78ee87a9426f3a6f7f781a8fbfa/src/zone/zone.h#L138-L155
class ZoneObject {
public:
// Allocate a new ZoneObject of 'size' bytes in the Zone.
void* operator new(size_t size, Zone* zone) { return zone->New(size); }
// Ideally, the delete operator should be private instead of
// public, but unfortunately the compiler sometimes synthesizes
// (unused) destructors for classes derived from ZoneObject, which
// require the operator to be visible. MSVC requires the delete
// operator to be public.
// ZoneObjects should never be deleted individually; use
// Zone::DeleteAll() to delete all zone objects in one go.
void operator delete(void*, size_t) { MOZ_CRASH("unreachable"); }
void operator delete(void* pointer, Zone* zone) { MOZ_CRASH("unreachable"); }
};
// ZoneLists are growable lists with constant-time access to the
// elements. The list itself and all its elements are allocated in the
// Zone. ZoneLists cannot be deleted individually; you can delete all
// objects in the Zone by calling Zone::DeleteAll().
// Used throughout irregexp.
// Origin: https://github.com/v8/v8/blob/5e514a969376dc63517d575b062758efd36cd757/src/zone/zone.h#L173-L318
// Inlines: https://github.com/v8/v8/blob/5e514a969376dc63517d575b062758efd36cd757/src/zone/zone-list-inl.h#L17-L155
template <typename T>
class ZoneList final {
public:
// Construct a new ZoneList with the given capacity; the length is
// always zero. The capacity must be non-negative.
ZoneList(int capacity, Zone* zone) { Initialize(capacity, zone); }
// Construct a new ZoneList from a std::initializer_list
ZoneList(std::initializer_list<T> list, Zone* zone) {
Initialize(static_cast<int>(list.size()), zone);
for (auto& i : list) Add(i, zone);
}
// Construct a new ZoneList by copying the elements of the given ZoneList.
ZoneList(const ZoneList<T>& other, Zone* zone) {
Initialize(other.length(), zone);
AddAll(other, zone);
}
void* operator new(size_t size, Zone* zone) { return zone->New(size); }
// Returns a reference to the element at index i. This reference is not safe
// to use after operations that can change the list's backing store
// (e.g. Add).
inline T& operator[](int i) const {
MOZ_ASSERT(0 < i);
MOZ_ASSERT(static_cast<unsigned>(i) < static_cast<unsigned>(length_));
return data_[i];
}
inline T& at(int i) const { return operator[](i); }
inline T& last() const { return at(length_ - 1); }
inline T& first() const { return at(0); }
using iterator = T*;
inline iterator begin() const { return &data_[0]; }
inline iterator end() const { return &data_[length_]; }
inline bool is_empty() const { return length_ == 0; }
inline int length() const { return length_; }
inline int capacity() const { return capacity_; }
Vector<T> ToVector() const { return Vector<T>(data_, length_); }
Vector<T> ToVector(int start, int length) const {
return Vector<T>(data_ + start, std::min(length_ - start, length));
}
Vector<const T> ToConstVector() const {
return Vector<const T>(data_, length_);
}
inline void Initialize(int capacity, Zone* zone) {
MOZ_ASSERT(capacity >= 0);
data_ = (capacity > 0) ? NewData(capacity, zone) : nullptr;
capacity_ = capacity;
length_ = 0;
}
// Adds a copy of the given 'element' to the end of the list,
// expanding the list if necessary.
void Add(const T& element, Zone* zone) {
if (length_ < capacity_) {
data_[length_++] = element;
} else {
ZoneList<T>::ResizeAdd(element, zone);
}
}
// Add all the elements from the argument list to this list.
void AddAll(const ZoneList<T>& other, Zone* zone) {
AddAll(other.ToVector(), zone);
}
// Add all the elements from the vector to this list.
void AddAll(const Vector<T>& other, Zone* zone) {
int result_length = length_ + other.length();
if (capacity_ < result_length) {
Resize(result_length, zone);
}
if (std::is_fundamental<T>()) {
memcpy(data_ + length_, other.begin(), sizeof(*data_) * other.length());
} else {
for (int i = 0; i < other.length(); i++) {
data_[length_ + i] = other.at(i);
}
}
length_ = result_length;
}
// Overwrites the element at the specific index.
void Set(int index, const T& element) {
MOZ_ASSERT(index >= 0 && index <= length_);
data_[index] = element;
}
// Removes the i'th element without deleting it even if T is a
// pointer type; moves all elements above i "down". Returns the
// removed element. This function's complexity is linear in the
// size of the list.
T Remove(int i) {
T element = at(i);
length_--;
while (i < length_) {
data_[i] = data_[i + 1];
i++;
}
return element;
}
// Removes the last element without deleting it even if T is a
// pointer type. Returns the removed element.
inline T RemoveLast() { return Remove(length_ - 1); }
// Clears the list by freeing the storage memory. If you want to keep the
// memory, use Rewind(0) instead. Be aware, that even if T is a
// pointer type, clearing the list doesn't delete the entries.
inline void Clear() {
data_ = nullptr;
capacity_ = 0;
length_ = 0;
}
// Drops all but the first 'pos' elements from the list.
inline void Rewind(int pos) {
MOZ_ASSERT(0 <= pos && pos <= length_);
length_ = pos;
}
inline bool Contains(const T& elm) const {
for (int i = 0; i < length_; i++) {
if (data_[i] == elm) return true;
}
return false;
}
template <typename CompareFunction>
void StableSort(CompareFunction cmp, size_t s, size_t l) {
std::stable_sort(begin() + s, begin() + s + l,
[cmp](const T& a, const T& b) { return cmp(&a, &b) < 0; });
}
void operator delete(void* pointer) { MOZ_CRASH("unreachable"); }
void operator delete(void* pointer, Zone* zone) { MOZ_CRASH("unreachable"); }
private:
T* data_;
int capacity_;
int length_;
inline T* NewData(int n, Zone* zone) {
return static_cast<T*>(zone->New(n * sizeof(T)));
}
// Increase the capacity of a full list, and add an element.
// List must be full already.
void ResizeAdd(const T& element, Zone* zone) {
MOZ_ASSERT(length_ >= capacity_);
// Grow the list capacity by 100%, but make sure to let it grow
// even when the capacity is zero (possible initial case).
int new_capacity = 1 + 2 * capacity_;
// Since the element reference could be an element of the list, copy
// it out of the old backing storage before resizing.
T temp = element;
Resize(new_capacity, zone);
data_[length_++] = temp;
}
// Resize the list.
void Resize(int new_capacity, Zone* zone) {
MOZ_ASSERT(length_ <= new_capacity);
T* new_data = NewData(new_capacity, zone);
if (length_ > 0) {
memcpy(new_data, data_, length_ * sizeof(T));
}
data_ = new_data;
capacity_ = new_capacity;
}
ZoneList& operator=(const ZoneList&) = delete;
ZoneList() = delete;
ZoneList(const ZoneList&) = delete;
};
// Origin: https://github.com/v8/v8/blob/5e514a969376dc63517d575b062758efd36cd757/src/zone/zone-allocator.h#L14-L77
template <typename T>
class ZoneAllocator {
public:
using pointer = T*;
using const_pointer = const T*;
using reference = T&;
using const_reference = const T&;
using value_type = T;
using size_type = size_t;
using difference_type = ptrdiff_t;
template <class O>
struct rebind {
using other = ZoneAllocator<O>;
};
explicit ZoneAllocator(Zone* zone) : zone_(zone) {}
template <typename U>
ZoneAllocator(const ZoneAllocator<U>& other)
: ZoneAllocator<T>(other.zone_) {}
template <typename U>
friend class ZoneAllocator;
T* allocate(size_t n) { return static_cast<T*>(zone_->New(n * sizeof(T))); }
void deallocate(T* p, size_t) {} // noop for zones
bool operator==(ZoneAllocator const& other) const {
return zone_ == other.zone_;
}
bool operator!=(ZoneAllocator const& other) const {
return zone_ != other.zone_;
}
private:
Zone* zone_;
};
// Zone wrappers for std containers:
// Origin: https://github.com/v8/v8/blob/5e514a969376dc63517d575b062758efd36cd757/src/zone/zone-containers.h#L25-L169
// A wrapper subclass for std::vector to make it easy to construct one
// that uses a zone allocator.
// Used throughout irregexp
template <typename T>
class ZoneVector : public std::vector<T, ZoneAllocator<T>> {
public:
ZoneVector(Zone* zone)
: std::vector<T, ZoneAllocator<T>>(ZoneAllocator<T>(zone)) {}
// Constructs a new vector and fills it with the contents of the range
// [first, last).
template <class Iter>
ZoneVector(Iter first, Iter last, Zone* zone)
: std::vector<T, ZoneAllocator<T>>(first, last, ZoneAllocator<T>(zone)) {}
};
// A wrapper subclass for std::list to make it easy to construct one
// that uses a zone allocator.
// Used in regexp-bytecode-peephole.cc
template <typename T>
class ZoneLinkedList : public std::list<T, ZoneAllocator<T>> {
public:
// Constructs an empty list.
explicit ZoneLinkedList(Zone* zone)
: std::list<T, ZoneAllocator<T>>(ZoneAllocator<T>(zone)) {}
};
// A wrapper subclass for std::set to make it easy to construct one that uses
// a zone allocator.
// Used in regexp-parser.cc
template <typename K, typename Compare = std::less<K>>
class ZoneSet : public std::set<K, Compare, ZoneAllocator<K>> {
public:
// Constructs an empty set.
explicit ZoneSet(Zone* zone)
: std::set<K, Compare, ZoneAllocator<K>>(Compare(),
ZoneAllocator<K>(zone)) {}
};
// A wrapper subclass for std::map to make it easy to construct one that uses
// a zone allocator.
// Used in regexp-bytecode-peephole.cc
template <typename K, typename V, typename Compare = std::less<K>>
class ZoneMap
: public std::map<K, V, Compare, ZoneAllocator<std::pair<const K, V>>> {
public:
// Constructs an empty map.
explicit ZoneMap(Zone* zone)
: std::map<K, V, Compare, ZoneAllocator<std::pair<const K, V>>>(
Compare(), ZoneAllocator<std::pair<const K, V>>(zone)) {}
};
// A wrapper subclass for std::unordered_map to make it easy to construct one
// that uses a zone allocator.
// Used in regexp-bytecode-peephole.cc
template <typename K, typename V, typename Hash = std::hash<K>,
typename KeyEqual = std::equal_to<K>>
class ZoneUnorderedMap
: public std::unordered_map<K, V, Hash, KeyEqual,
ZoneAllocator<std::pair<const K, V>>> {
public:
// Constructs an empty map.
explicit ZoneUnorderedMap(Zone* zone, size_t bucket_count = 100)
: std::unordered_map<K, V, Hash, KeyEqual,
ZoneAllocator<std::pair<const K, V>>>(
bucket_count, Hash(), KeyEqual(),
ZoneAllocator<std::pair<const K, V>>(zone)) {}
};
} // namespace internal
} // namespace v8
#endif // V8_UTIL_FLAG_H_