Issue #2863 - Part 17 - Update ICU source to 78.1

2026-05-26 13:58:49 +00:00 · 2025-12-20 00:43:40 -08:00
parent 3f06fa4c2a
commit a29a80493f
6291 changed files with 4259683 additions and 370834 deletions
@@ -18,20 +18,30 @@ sources = [
   '/intl/icu/source/common/dictionarydata.cpp',
   '/intl/icu/source/common/dtintrv.cpp',
   '/intl/icu/source/common/edits.cpp',
+   '/intl/icu/source/common/emojiprops.cpp',
   '/intl/icu/source/common/errorcode.cpp',
   '/intl/icu/source/common/filteredbrk.cpp',
   '/intl/icu/source/common/filterednormalizer2.cpp',
+   '/intl/icu/source/common/fixedstring.cpp',
   '/intl/icu/source/common/loadednormalizer2impl.cpp',
+   '/intl/icu/source/common/localebuilder.cpp',
+   '/intl/icu/source/common/localematcher.cpp',
+   '/intl/icu/source/common/localeprioritylist.cpp',
   '/intl/icu/source/common/locavailable.cpp',
   '/intl/icu/source/common/locbased.cpp',
   '/intl/icu/source/common/locdispnames.cpp',
+   '/intl/icu/source/common/locdistance.cpp',
   '/intl/icu/source/common/locdspnm.cpp',
   '/intl/icu/source/common/locid.cpp',
   '/intl/icu/source/common/loclikely.cpp',
+   '/intl/icu/source/common/loclikelysubtags.cpp',
   '/intl/icu/source/common/locmap.cpp',
   '/intl/icu/source/common/locresdata.cpp',
   '/intl/icu/source/common/locutil.cpp',
+   '/intl/icu/source/common/lsr.cpp',
+   '/intl/icu/source/common/lstmbe.cpp',
   '/intl/icu/source/common/messagepattern.cpp',
+   '/intl/icu/source/common/mlbe.cpp',
   '/intl/icu/source/common/normalizer2.cpp',
   '/intl/icu/source/common/normalizer2impl.cpp',
   '/intl/icu/source/common/normlzr.cpp',
@@ -51,6 +61,7 @@ sources = [
   '/intl/icu/source/common/rbbitblb.cpp',
   '/intl/icu/source/common/resbund.cpp',
   '/intl/icu/source/common/resource.cpp',
+   '/intl/icu/source/common/restrace.cpp',
   '/intl/icu/source/common/ruleiter.cpp',
   '/intl/icu/source/common/schriter.cpp',
   '/intl/icu/source/common/serv.cpp',
@@ -110,6 +121,8 @@ sources = [
   '/intl/icu/source/common/uloc.cpp',
   '/intl/icu/source/common/uloc_keytype.cpp',
   '/intl/icu/source/common/uloc_tag.cpp',
+   '/intl/icu/source/common/ulocale.cpp',
+   '/intl/icu/source/common/ulocbuilder.cpp',
   '/intl/icu/source/common/umapfile.cpp',
   '/intl/icu/source/common/umath.cpp',
   '/intl/icu/source/common/umutablecptrie.cpp',
@@ -151,6 +164,7 @@ sources = [
   '/intl/icu/source/common/utext.cpp',
   '/intl/icu/source/common/utf_impl.cpp',
   '/intl/icu/source/common/util.cpp',
+   '/intl/icu/source/common/util_props.cpp',
   '/intl/icu/source/common/utrace.cpp',
   '/intl/icu/source/common/utrie.cpp',
   '/intl/icu/source/common/utrie2.cpp',
@@ -190,7 +204,6 @@ other_sources = [
   '/intl/icu/source/common/usc_impl.cpp',
   '/intl/icu/source/common/ushape.cpp',
   '/intl/icu/source/common/ustr_wcs.cpp',
-   '/intl/icu/source/common/util_props.cpp',
 ]
 EXPORTS.unicode += [
   '/intl/icu/source/common/unicode/appendable.h',
@@ -212,6 +225,8 @@ EXPORTS.unicode += [
   '/intl/icu/source/common/unicode/icudataver.h',
   '/intl/icu/source/common/unicode/icuplug.h',
   '/intl/icu/source/common/unicode/idna.h',
+   '/intl/icu/source/common/unicode/localebuilder.h',
+   '/intl/icu/source/common/unicode/localematcher.h',
   '/intl/icu/source/common/unicode/localpointer.h',
   '/intl/icu/source/common/unicode/locdspnm.h',
   '/intl/icu/source/common/unicode/locid.h',
@@ -259,6 +274,8 @@ EXPORTS.unicode += [
   '/intl/icu/source/common/unicode/uiter.h',
   '/intl/icu/source/common/unicode/uldnames.h',
   '/intl/icu/source/common/unicode/uloc.h',
+   '/intl/icu/source/common/unicode/ulocale.h',
+   '/intl/icu/source/common/unicode/ulocbuilder.h',
   '/intl/icu/source/common/unicode/umachine.h',
   '/intl/icu/source/common/unicode/umisc.h',
   '/intl/icu/source/common/unicode/umutablecptrie.h',
@@ -286,6 +303,8 @@ EXPORTS.unicode += [
   '/intl/icu/source/common/unicode/utf32.h',
   '/intl/icu/source/common/unicode/utf8.h',
   '/intl/icu/source/common/unicode/utf_old.h',
+   '/intl/icu/source/common/unicode/utfiterator.h',
+   '/intl/icu/source/common/unicode/utfstring.h',
   '/intl/icu/source/common/unicode/utrace.h',
   '/intl/icu/source/common/unicode/utypes.h',
   '/intl/icu/source/common/unicode/uvernum.h',
@@ -42,13 +42,14 @@ sources = [
   '/intl/icu/source/i18n/decContext.cpp',
   '/intl/icu/source/i18n/decimfmt.cpp',
   '/intl/icu/source/i18n/decNumber.cpp',
+   '/intl/icu/source/i18n/displayoptions.cpp',
   '/intl/icu/source/i18n/double-conversion-bignum-dtoa.cpp',
   '/intl/icu/source/i18n/double-conversion-bignum.cpp',
   '/intl/icu/source/i18n/double-conversion-cached-powers.cpp',
-   '/intl/icu/source/i18n/double-conversion-diy-fp.cpp',
+   '/intl/icu/source/i18n/double-conversion-double-to-string.cpp',
   '/intl/icu/source/i18n/double-conversion-fast-dtoa.cpp',
+   '/intl/icu/source/i18n/double-conversion-string-to-double.cpp',
   '/intl/icu/source/i18n/double-conversion-strtod.cpp',
-   '/intl/icu/source/i18n/double-conversion.cpp',
   '/intl/icu/source/i18n/dtfmtsym.cpp',
   '/intl/icu/source/i18n/dtitvfmt.cpp',
   '/intl/icu/source/i18n/dtitvinf.cpp',
@@ -58,6 +59,10 @@ sources = [
   '/intl/icu/source/i18n/ethpccal.cpp',
   '/intl/icu/source/i18n/fmtable.cpp',
   '/intl/icu/source/i18n/format.cpp',
+   '/intl/icu/source/i18n/formatted_string_builder.cpp',
+   '/intl/icu/source/i18n/formattedval_iterimpl.cpp',
+   '/intl/icu/source/i18n/formattedval_sbimpl.cpp',
+   '/intl/icu/source/i18n/formattedvalue.cpp',
   '/intl/icu/source/i18n/fphdlimp.cpp',
   '/intl/icu/source/i18n/fpositer.cpp',
   '/intl/icu/source/i18n/gregocal.cpp',
@@ -65,16 +70,28 @@ sources = [
   '/intl/icu/source/i18n/hebrwcal.cpp',
   '/intl/icu/source/i18n/indiancal.cpp',
   '/intl/icu/source/i18n/islamcal.cpp',
+   '/intl/icu/source/i18n/iso8601cal.cpp',
   '/intl/icu/source/i18n/japancal.cpp',
   '/intl/icu/source/i18n/listformatter.cpp',
   '/intl/icu/source/i18n/measfmt.cpp',
   '/intl/icu/source/i18n/measunit.cpp',
+   '/intl/icu/source/i18n/measunit_extra.cpp',
   '/intl/icu/source/i18n/measure.cpp',
+   '/intl/icu/source/i18n/messageformat2.cpp',
+   '/intl/icu/source/i18n/messageformat2_arguments.cpp',
+   '/intl/icu/source/i18n/messageformat2_checker.cpp',
+   '/intl/icu/source/i18n/messageformat2_data_model.cpp',
+   '/intl/icu/source/i18n/messageformat2_errors.cpp',
+   '/intl/icu/source/i18n/messageformat2_evaluation.cpp',
+   '/intl/icu/source/i18n/messageformat2_formattable.cpp',
+   '/intl/icu/source/i18n/messageformat2_formatter.cpp',
+   '/intl/icu/source/i18n/messageformat2_function_registry.cpp',
+   '/intl/icu/source/i18n/messageformat2_parser.cpp',
+   '/intl/icu/source/i18n/messageformat2_serializer.cpp',
   '/intl/icu/source/i18n/msgfmt.cpp',
   '/intl/icu/source/i18n/nfrs.cpp',
   '/intl/icu/source/i18n/nfrule.cpp',
   '/intl/icu/source/i18n/nfsubs.cpp',
-   '/intl/icu/source/i18n/nounit.cpp',
   '/intl/icu/source/i18n/number_affixutils.cpp',
   '/intl/icu/source/i18n/number_asformat.cpp',
   '/intl/icu/source/i18n/number_capi.cpp',
@@ -91,13 +108,16 @@ sources = [
   '/intl/icu/source/i18n/number_modifiers.cpp',
   '/intl/icu/source/i18n/number_multiplier.cpp',
   '/intl/icu/source/i18n/number_notation.cpp',
+   '/intl/icu/source/i18n/number_output.cpp',
   '/intl/icu/source/i18n/number_padding.cpp',
   '/intl/icu/source/i18n/number_patternmodifier.cpp',
   '/intl/icu/source/i18n/number_patternstring.cpp',
   '/intl/icu/source/i18n/number_rounding.cpp',
   '/intl/icu/source/i18n/number_scientific.cpp',
+   '/intl/icu/source/i18n/number_simple.cpp',
   '/intl/icu/source/i18n/number_skeletons.cpp',
-   '/intl/icu/source/i18n/number_stringbuilder.cpp',
+   '/intl/icu/source/i18n/number_symbolswrapper.cpp',
+   '/intl/icu/source/i18n/number_usageprefs.cpp',
   '/intl/icu/source/i18n/number_utils.cpp',
   '/intl/icu/source/i18n/numfmt.cpp',
   '/intl/icu/source/i18n/numparse_affixes.cpp',
@@ -107,14 +127,15 @@ sources = [
   '/intl/icu/source/i18n/numparse_impl.cpp',
   '/intl/icu/source/i18n/numparse_parsednumber.cpp',
   '/intl/icu/source/i18n/numparse_scientific.cpp',
-   '/intl/icu/source/i18n/numparse_stringsegment.cpp',
   '/intl/icu/source/i18n/numparse_symbols.cpp',
   '/intl/icu/source/i18n/numparse_validators.cpp',
+   '/intl/icu/source/i18n/numrange_capi.cpp',
   '/intl/icu/source/i18n/numrange_fluent.cpp',
   '/intl/icu/source/i18n/numrange_impl.cpp',
   '/intl/icu/source/i18n/numsys.cpp',
   '/intl/icu/source/i18n/olsontz.cpp',
   '/intl/icu/source/i18n/persncal.cpp',
+   '/intl/icu/source/i18n/pluralranges.cpp',
   '/intl/icu/source/i18n/plurfmt.cpp',
   '/intl/icu/source/i18n/plurrule.cpp',
   '/intl/icu/source/i18n/quantityformatter.cpp',
@@ -133,6 +154,7 @@ sources = [
   '/intl/icu/source/i18n/smpdtfst.cpp',
   '/intl/icu/source/i18n/sortkey.cpp',
   '/intl/icu/source/i18n/standardplural.cpp',
+   '/intl/icu/source/i18n/string_segment.cpp',
   '/intl/icu/source/i18n/stsearch.cpp',
   '/intl/icu/source/i18n/taiwncal.cpp',
   '/intl/icu/source/i18n/timezone.cpp',
@@ -157,6 +179,10 @@ sources = [
   '/intl/icu/source/i18n/uitercollationiterator.cpp',
   '/intl/icu/source/i18n/ulistformatter.cpp',
   '/intl/icu/source/i18n/umsg.cpp',
+   '/intl/icu/source/i18n/units_complexconverter.cpp',
+   '/intl/icu/source/i18n/units_converter.cpp',
+   '/intl/icu/source/i18n/units_data.cpp',
+   '/intl/icu/source/i18n/units_router.cpp',
   '/intl/icu/source/i18n/unum.cpp',
   '/intl/icu/source/i18n/unumsys.cpp',
   '/intl/icu/source/i18n/upluralrules.cpp',
@@ -245,6 +271,7 @@ EXPORTS.unicode += [
   '/intl/icu/source/i18n/unicode/datefmt.h',
   '/intl/icu/source/i18n/unicode/dcfmtsym.h',
   '/intl/icu/source/i18n/unicode/decimfmt.h',
+   '/intl/icu/source/i18n/unicode/displayoptions.h',
   '/intl/icu/source/i18n/unicode/dtfmtsym.h',
   '/intl/icu/source/i18n/unicode/dtitvfmt.h',
   '/intl/icu/source/i18n/unicode/dtitvinf.h',
@@ -253,6 +280,8 @@ EXPORTS.unicode += [
   '/intl/icu/source/i18n/unicode/fieldpos.h',
   '/intl/icu/source/i18n/unicode/fmtable.h',
   '/intl/icu/source/i18n/unicode/format.h',
+   '/intl/icu/source/i18n/unicode/formattednumber.h',
+   '/intl/icu/source/i18n/unicode/formattedvalue.h',
   '/intl/icu/source/i18n/unicode/fpositer.h',
   '/intl/icu/source/i18n/unicode/gender.h',
   '/intl/icu/source/i18n/unicode/gregocal.h',
@@ -260,6 +289,12 @@ EXPORTS.unicode += [
   '/intl/icu/source/i18n/unicode/measfmt.h',
   '/intl/icu/source/i18n/unicode/measunit.h',
   '/intl/icu/source/i18n/unicode/measure.h',
+   '/intl/icu/source/i18n/unicode/messageformat2.h',
+   '/intl/icu/source/i18n/unicode/messageformat2_arguments.h',
+   '/intl/icu/source/i18n/unicode/messageformat2_data_model.h',
+   '/intl/icu/source/i18n/unicode/messageformat2_data_model_names.h',
+   '/intl/icu/source/i18n/unicode/messageformat2_formattable.h',
+   '/intl/icu/source/i18n/unicode/messageformat2_function_registry.h',
   '/intl/icu/source/i18n/unicode/msgfmt.h',
   '/intl/icu/source/i18n/unicode/nounit.h',
   '/intl/icu/source/i18n/unicode/numberformatter.h',
@@ -276,6 +311,7 @@ EXPORTS.unicode += [
   '/intl/icu/source/i18n/unicode/scientificnumberformatter.h',
   '/intl/icu/source/i18n/unicode/search.h',
   '/intl/icu/source/i18n/unicode/selfmt.h',
+   '/intl/icu/source/i18n/unicode/simplenumberformatter.h',
   '/intl/icu/source/i18n/unicode/simpletz.h',
   '/intl/icu/source/i18n/unicode/smpdtfmt.h',
   '/intl/icu/source/i18n/unicode/sortkey.h',
@@ -297,8 +333,11 @@ EXPORTS.unicode += [
   '/intl/icu/source/i18n/unicode/udat.h',
   '/intl/icu/source/i18n/unicode/udateintervalformat.h',
   '/intl/icu/source/i18n/unicode/udatpg.h',
+   '/intl/icu/source/i18n/unicode/udisplayoptions.h',
   '/intl/icu/source/i18n/unicode/ufieldpositer.h',
   '/intl/icu/source/i18n/unicode/uformattable.h',
+   '/intl/icu/source/i18n/unicode/uformattednumber.h',
+   '/intl/icu/source/i18n/unicode/uformattedvalue.h',
   '/intl/icu/source/i18n/unicode/ugender.h',
   '/intl/icu/source/i18n/unicode/ulistformatter.h',
   '/intl/icu/source/i18n/unicode/ulocdata.h',
@@ -306,12 +345,15 @@ EXPORTS.unicode += [
   '/intl/icu/source/i18n/unicode/unirepl.h',
   '/intl/icu/source/i18n/unicode/unum.h',
   '/intl/icu/source/i18n/unicode/unumberformatter.h',
+   '/intl/icu/source/i18n/unicode/unumberoptions.h',
+   '/intl/icu/source/i18n/unicode/unumberrangeformatter.h',
   '/intl/icu/source/i18n/unicode/unumsys.h',
   '/intl/icu/source/i18n/unicode/upluralrules.h',
   '/intl/icu/source/i18n/unicode/uregex.h',
   '/intl/icu/source/i18n/unicode/uregion.h',
   '/intl/icu/source/i18n/unicode/ureldatefmt.h',
   '/intl/icu/source/i18n/unicode/usearch.h',
+   '/intl/icu/source/i18n/unicode/usimplenumberformatter.h',
   '/intl/icu/source/i18n/unicode/uspoof.h',
   '/intl/icu/source/i18n/unicode/utmscale.h',
   '/intl/icu/source/i18n/unicode/utrans.h',
@@ -1,5 +1,5 @@
-commit 4f715ae124c418a15a3fa4d8fb14f406576a7ee5
-Author: Jeff Genovy <29107334+jefgen@users.noreply.github.com>
-Date:   Wed Apr 10 17:32:11 2019 -0700
+commit b66f1dfd8012eba718813c3938cb6373a3af6f84
+Author: Robin Leroy <egg.robin.leroy@gmail.com>
+Date:   Wed Dec 17 00:54:31 2025 +0100

-    ICU-20536 ICU 63.2 release tasks + tzdata2019a
+    ICU-23299 wstring woes
@@ -0,0 +1,11 @@
+# © 2020 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+---
+Language: Cpp
+BasedOnStyle: LLVM
+IndentWidth: 4
+ColumnLimit: 105
+AllowShortBlocksOnASingleLine: false 
+AllowShortIfStatementsOnASingleLine: true 
+...
@@ -1,4 +1,4 @@
-# Doxyfile 1.8.13
+# Doxyfile 1.9.1

 # Copyright (C) 2016 and later: Unicode, Inc. and others.
 # License & terms of use: http://www.unicode.org/copyright.html
@@ -25,11 +25,11 @@
 # Project related configuration options
 #---------------------------------------------------------------------------

-# This tag specifies the encoding used for all characters in the config file
-# that follow. The default is UTF-8 which is also the encoding used for all text
-# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv
-# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv
-# for the list of possible encodings.
+# This tag specifies the encoding used for all characters in the configuration
+# file that follow. The default is UTF-8 which is also the encoding used for all
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the
+# iconv built into libc) for the transcoding. See
+# https://www.gnu.org/software/libiconv/ for the list of possible encodings.
 # The default value is: UTF-8.

 DOXYFILE_ENCODING      = UTF-8
@@ -101,6 +101,14 @@ ALLOW_UNICODE_NAMES    = NO

 OUTPUT_LANGUAGE        = English

+# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all generated output in the proper direction.
+# Possible values are: None, LTR, RTL and Context.
+# The default value is: None.
+
+OUTPUT_TEXT_DIRECTION  = None
+
 # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member
 # descriptions after the members that are listed in the file and class
 # documentation (similar to Javadoc). Set to NO to disable this.
@@ -148,7 +156,7 @@ INLINE_INHERITED_MEMB  = NO
 # shortest path that makes the file name unique will be used
 # The default value is: YES.

-FULL_PATH_NAMES        = NO
+FULL_PATH_NAMES        = YES

 # The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
 # Stripping is only done if one of the specified strings matches the left-hand
@@ -160,7 +168,7 @@ FULL_PATH_NAMES        = NO
 # will be relative from the directory where doxygen is started.
 # This tag requires that the tag FULL_PATH_NAMES is set to YES.

-STRIP_FROM_PATH        =
+STRIP_FROM_PATH        = @srcdir@

 # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
 # path mentioned in the documentation of a class, which tells the reader which
@@ -187,6 +195,16 @@ SHORT_NAMES            = NO

 JAVADOC_AUTOBRIEF      = YES

+# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line
+# such as
+# /***************
+# as being the beginning of a Javadoc-style comment "banner". If set to NO, the
+# Javadoc-style will behave just like regular comments and it will not be
+# interpreted by doxygen.
+# The default value is: NO.
+
+JAVADOC_BANNER         = NO
+
 # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
 # line (until the first dot) of a Qt-style comment as the brief description. If
 # set to NO, the Qt-style will behave just like regular Qt-style comments (thus
@@ -207,6 +225,14 @@ QT_AUTOBRIEF           = NO

 MULTILINE_CPP_IS_BRIEF = NO

+# By default Python docstrings are displayed as preformatted text and doxygen's
+# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the
+# doxygen's special commands can be used and the contents of the docstring
+# documentation blocks is shown as doxygen documentation.
+# The default value is: YES.
+
+PYTHON_DOCSTRING       = YES
+
 # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
 # documentation from any documented member that it re-implements.
 # The default value is: YES.
@@ -234,7 +260,12 @@ TAB_SIZE               = 8
 # will allow you to put the command \sideeffect (or @sideeffect) in the
 # documentation, which will result in a user-defined paragraph with heading
 # "Side Effects:". You can put \n's in the value part of an alias to insert
-# newlines.
+# newlines (in the resulting output). You can put ^^ in the value part of an
+# alias to insert a newline as if a physical newline was in the original file.
+# When you need a literal { or } or , in the value part of an alias you have to
+# escape them by means of a backslash (\), this can lead to conflicts with the
+# commands \{ and \} for these it is advised to use the version @{ and @} or use
+# a double escape (\\{ and \\})

 ALIASES                = "memo=\par Note:\n" \
                         "draft=\xrefitem draft \"Draft\" \"Draft List\"  This API may be changed in the future versions and was introduced in" \
@@ -244,12 +275,6 @@ ALIASES                = "memo=\par Note:\n" \
                         "system=\xrefitem system \"System\" \"System List\" \n Do not use unless you know what you are doing." \
                         "internal=\xrefitem internal \"Internal\"  \"Internal List\"  Do not use. This API is for internal use only."

-# This tag can be used to specify a number of word-keyword mappings (TCL only).
-# A mapping has the form "name=value". For example adding "class=itcl::class"
-# will allow you to use the command class in the itcl::class meaning.
-
-TCL_SUBST              =
-
 # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
 # only. Doxygen will then generate output that is more tailored for C. For
 # instance, some of the names that are used will be different. The list of all
@@ -278,28 +303,40 @@ OPTIMIZE_FOR_FORTRAN   = NO

 OPTIMIZE_OUTPUT_VHDL   = NO

+# Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice
+# sources only. Doxygen will then generate output that is more tailored for that
+# language. For instance, namespaces will be presented as modules, types will be
+# separated into more groups, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_SLICE  = NO
+
 # Doxygen selects the parser to use depending on the extension of the files it
 # parses. With this tag you can assign which parser to use for a given
 # extension. Doxygen has a built-in mapping, but you can override or extend it
 # using this tag. The format is ext=language, where ext is a file extension, and
-# language is one of the parsers supported by doxygen: IDL, Java, Javascript,
-# C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran:
-# FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran:
-# Fortran. In the later case the parser tries to guess whether the code is fixed
-# or free formatted code, this is the default for Fortran type files), VHDL. For
-# instance to make doxygen treat .inc files as Fortran files (default is PHP),
-# and .f files as C (default is Fortran), use: inc=Fortran f=C.
+# language is one of the parsers supported by doxygen: IDL, Java, JavaScript,
+# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, VHDL,
+# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran:
+# FortranFree, unknown formatted Fortran: Fortran. In the later case the parser
+# tries to guess whether the code is fixed or free formatted code, this is the
+# default for Fortran type files). For instance to make doxygen treat .inc files
+# as Fortran files (default is PHP), and .f files as C (default is Fortran),
+# use: inc=Fortran f=C.
 #
 # Note: For files without extension you can use no_extension as a placeholder.
 #
 # Note that for custom extensions you also need to set FILE_PATTERNS otherwise
-# the files are not read by doxygen.
+# the files are not read by doxygen. When specifying no_extension you should add
+# * to the FILE_PATTERNS.
+#
+# Note see also the list of default file extension mappings.

 EXTENSION_MAPPING      =

 # If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
 # according to the Markdown format, which allows for more readable
-# documentation. See http://daringfireball.net/projects/markdown/ for details.
+# documentation. See https://daringfireball.net/projects/markdown/ for details.
 # The output of markdown processing is further processed by doxygen, so you can
 # mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in
 # case of backward compatibilities issues.
@@ -311,7 +348,7 @@ MARKDOWN_SUPPORT       = YES
 # to that level are automatically included in the table of contents, even if
 # they do not have an id attribute.
 # Note: This feature currently applies only to Markdown headings.
-# Minimum value: 0, maximum value: 99, default value: 0.
+# Minimum value: 0, maximum value: 99, default value: 5.
 # This tag requires that the tag MARKDOWN_SUPPORT is set to YES.

 TOC_INCLUDE_HEADINGS   = 0
@@ -341,7 +378,7 @@ BUILTIN_STL_SUPPORT    = NO
 CPP_CLI_SUPPORT        = NO

 # Set the SIP_SUPPORT tag to YES if your project consists of sip (see:
-# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen
+# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen
 # will parse them like normal C++ but will assume all classes use public instead
 # of private inheritance when no explicit protection keyword is present.
 # The default value is: NO.
@@ -427,6 +464,19 @@ TYPEDEF_HIDES_STRUCT   = NO

 LOOKUP_CACHE_SIZE      = 0

+# The NUM_PROC_THREADS specifies the number threads doxygen is allowed to use
+# during processing. When set to 0 doxygen will based this on the number of
+# cores available in the system. You can set it explicitly to a value larger
+# than 0 to get more control over the balance between CPU load and processing
+# speed. At this moment only the input processing can be done using multiple
+# threads. Since this is still an experimental feature the default is set to 1,
+# which efficively disables parallel processing. Please report any issues you
+# encounter. Generating dot graphs in parallel is controlled by the
+# DOT_NUM_THREADS setting.
+# Minimum value: 0, maximum value: 32, default value: 1.
+
+NUM_PROC_THREADS       = 1
+
 #---------------------------------------------------------------------------
 # Build related configuration options
 #---------------------------------------------------------------------------
@@ -447,6 +497,12 @@ EXTRACT_ALL            = NO

 EXTRACT_PRIVATE        = NO

+# If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual
+# methods of a class will be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PRIV_VIRTUAL   = NO
+
 # If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal
 # scope will be included in the documentation.
 # The default value is: NO.
@@ -484,6 +540,13 @@ EXTRACT_LOCAL_METHODS  = NO

 EXTRACT_ANON_NSPACES   = NO

+# If this flag is set to YES, the name of an unnamed parameter in a declaration
+# will be determined by the corresponding definition. By default unnamed
+# parameters remain unnamed in the output.
+# The default value is: YES.
+
+RESOLVE_UNNAMED_PARAMS = YES
+
 # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
 # undocumented members inside documented classes or files. If set to NO these
 # members will be included in the various overviews, but no documentation
@@ -501,8 +564,8 @@ HIDE_UNDOC_MEMBERS     = NO
 HIDE_UNDOC_CLASSES     = NO

 # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
-# (class|struct|union) declarations. If set to NO, these declarations will be
-# included in the documentation.
+# declarations. If set to NO, these declarations will be included in the
+# documentation.
 # The default value is: NO.

 HIDE_FRIEND_COMPOUNDS  = NO
@@ -521,11 +584,18 @@ HIDE_IN_BODY_DOCS      = NO

 INTERNAL_DOCS          = YES

-# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
-# names in lower-case letters. If set to YES, upper-case letters are also
-# allowed. This is useful if you have classes or files whose names only differ
-# in case and if your file system supports case sensitive file names. Windows
-# and Mac users are advised to set this option to NO.
+# With the correct setting of option CASE_SENSE_NAMES doxygen will better be
+# able to match the capabilities of the underlying filesystem. In case the
+# filesystem is case sensitive (i.e. it supports files in the same directory
+# whose names only differ in casing), the option must be set to YES to properly
+# deal with such files in case they appear in the input. For filesystems that
+# are not case sensitive the option should be be set to NO to properly deal with
+# output files written for symbols that only differ in casing, such as for two
+# classes, one named CLASS and the other named Class, and to also support
+# references to files without having to specify the exact matching casing. On
+# Windows (including Cygwin) and MacOS, users should typically set this option
+# to NO, whereas on Linux or other Unix flavors it should typically be set to
+# YES.
 # The default value is: system dependent.

 CASE_SENSE_NAMES       = YES
@@ -712,7 +782,7 @@ LAYOUT_FILE            =
 # The CITE_BIB_FILES tag can be used to specify one or more bib files containing
 # the reference definitions. This must be a list of .bib files. The .bib
 # extension is automatically appended if omitted. This requires the bibtex tool
-# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info.
+# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info.
 # For LaTeX the style of the bibliography can be controlled using
 # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
 # search path. See also \cite for info how to create references.
@@ -757,13 +827,17 @@ WARN_IF_DOC_ERROR      = YES
 # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
 # are documented, but have no documentation for their parameters or return
 # value. If set to NO, doxygen will only warn about wrong or incomplete
-# parameter documentation, but not about the absence of documentation.
+# parameter documentation, but not about the absence of documentation. If
+# EXTRACT_ALL is set to YES then this flag will automatically be disabled.
 # The default value is: NO.

 WARN_NO_PARAMDOC       = NO

 # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
-# a warning is encountered.
+# a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS
+# then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but
+# at the end of the doxygen process doxygen will return with a non-zero status.
+# Possible values are: NO, YES and FAIL_ON_WARNINGS.
 # The default value is: NO.

 WARN_AS_ERROR          = NO
@@ -802,8 +876,8 @@ INPUT                  = @srcdir@/common/unicode \
 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
 # libiconv (or the iconv built into libc) for the transcoding. See the libiconv
-# documentation (see: http://www.gnu.org/software/libiconv) for the list of
-# possible encodings.
+# documentation (see:
+# https://www.gnu.org/software/libiconv/) for the list of possible encodings.
 # The default value is: UTF-8.

 INPUT_ENCODING         = UTF-8
@@ -816,11 +890,15 @@ INPUT_ENCODING         = UTF-8
 # need to set EXTENSION_MAPPING for the extension otherwise the files are not
 # read by doxygen.
 #
+# Note the list of default checked file patterns might differ from the list of
+# default file extension mappings.
+#
 # If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
 # *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
 # *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,
-# *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f95, *.f03, *.f08,
-# *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf and *.qsf.
+# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment),
+# *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, *.vhdl,
+# *.ucf, *.qsf and *.ice.

 FILE_PATTERNS          = *.h

@@ -979,7 +1057,7 @@ INLINE_SOURCES         = NO
 STRIP_CODE_COMMENTS    = YES

 # If the REFERENCED_BY_RELATION tag is set to YES then for each documented
-# function all documented functions referencing it will be listed.
+# entity all documented functions referencing it will be listed.
 # The default value is: NO.

 REFERENCED_BY_RELATION = YES
@@ -1011,12 +1089,12 @@ SOURCE_TOOLTIPS        = YES
 # If the USE_HTAGS tag is set to YES then the references to source code will
 # point to the HTML generated by the htags(1) tool instead of doxygen built-in
 # source browser. The htags tool is part of GNU's global source tagging system
-# (see http://www.gnu.org/software/global/global.html). You will need version
+# (see https://www.gnu.org/software/global/global.html). You will need version
 # 4.8.6 or higher.
 #
 # To use it do the following:
 # - Install the latest version of global
-# - Enable SOURCE_BROWSER and USE_HTAGS in the config file
+# - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file
 # - Make sure the INPUT points to the root of the source tree
 # - Run doxygen as normal
 #
@@ -1039,16 +1117,22 @@ USE_HTAGS              = NO
 VERBATIM_HEADERS       = YES

 # If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the
-# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the
-# cost of reduced performance. This can be particularly helpful with template
-# rich C++ code for which doxygen's built-in parser lacks the necessary type
-# information.
+# clang parser (see:
+# http://clang.llvm.org/) for more accurate parsing at the cost of reduced
+# performance. This can be particularly helpful with template rich C++ code for
+# which doxygen's built-in parser lacks the necessary type information.
 # Note: The availability of this option depends on whether or not doxygen was
-# generated with the -Duse-libclang=ON option for CMake.
+# generated with the -Duse_libclang=ON option for CMake.
 # The default value is: NO.

 CLANG_ASSISTED_PARSING = NO

+# If clang assisted parsing is enabled and the CLANG_ADD_INC_PATHS tag is set to
+# YES then doxygen will add the directory of each input to the include path.
+# The default value is: YES.
+
+CLANG_ADD_INC_PATHS    = YES
+
 # If clang assisted parsing is enabled you can provide the compiler with command
 # line options that you would normally use when invoking the compiler. Note that
 # the include paths will already be set by doxygen for the files and directories
@@ -1057,6 +1141,19 @@ CLANG_ASSISTED_PARSING = NO

 CLANG_OPTIONS          =

+# If clang assisted parsing is enabled you can provide the clang parser with the
+# path to the directory containing a file called compile_commands.json. This
+# file is the compilation database (see:
+# http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) containing the
+# options used when the source files were built. This is equivalent to
+# specifying the -p option to a clang tool, such as clang-check. These options
+# will then be passed to the parser. Any options specified with CLANG_OPTIONS
+# will be added as well.
+# Note: The availability of this option depends on whether or not doxygen was
+# generated with the -Duse_libclang=ON option for CMake.
+
+CLANG_DATABASE_PATH    =
+
 #---------------------------------------------------------------------------
 # Configuration options related to the alphabetical class index
 #---------------------------------------------------------------------------
@@ -1068,13 +1165,6 @@ CLANG_OPTIONS          =

 ALPHABETICAL_INDEX     = YES

-# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
-# which the alphabetical index list will be split.
-# Minimum value: 1, maximum value: 20, default value: 5.
-# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
-
-COLS_IN_ALPHA_INDEX    = 5
-
 # In case all classes in a project start with a common prefix, all classes will
 # be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
 # can be used to specify a prefix (or a list of prefixes) that should be ignored
@@ -1175,7 +1265,7 @@ HTML_EXTRA_FILES       =
 # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
 # will adjust the colors in the style sheet and background images according to
 # this color. Hue is specified as an angle on a colorwheel, see
-# http://en.wikipedia.org/wiki/Hue for more information. For instance the value
+# https://en.wikipedia.org/wiki/Hue for more information. For instance the value
 # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
 # purple, and 360 is red again.
 # Minimum value: 0, maximum value: 359, default value: 220.
@@ -1211,6 +1301,17 @@ HTML_COLORSTYLE_GAMMA  = 80

 HTML_TIMESTAMP         = NO

+# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML
+# documentation will contain a main index with vertical navigation menus that
+# are dynamically created via JavaScript. If disabled, the navigation index will
+# consists of multiple levels of tabs that are statically embedded in every HTML
+# page. Disable this option to support browsers that do not have JavaScript,
+# like the Qt help browser.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_DYNAMIC_MENUS     = YES
+
 # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
 # documentation will contain sections that can be hidden and shown after the
 # page has loaded.
@@ -1234,13 +1335,14 @@ HTML_INDEX_NUM_ENTRIES = 100

 # If the GENERATE_DOCSET tag is set to YES, additional index files will be
 # generated that can be used as input for Apple's Xcode 3 integrated development
-# environment (see: http://developer.apple.com/tools/xcode/), introduced with
-# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a
-# Makefile in the HTML output directory. Running make will produce the docset in
-# that directory and running make install will install the docset in
+# environment (see:
+# https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To
+# create a documentation set, doxygen will generate a Makefile in the HTML
+# output directory. Running make will produce the docset in that directory and
+# running make install will install the docset in
 # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
-# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
-# for more information.
+# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy
+# genXcode/_index.html for more information.
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTML is set to YES.

@@ -1279,8 +1381,8 @@ DOCSET_PUBLISHER_NAME  = Publisher
 # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
 # additional HTML index files: index.hhp, index.hhc, and index.hhk. The
 # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
-# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on
-# Windows.
+# (see:
+# https://www.microsoft.com/en-us/download/details.aspx?id=21138) on Windows.
 #
 # The HTML Help Workshop contains a compiler that can convert all HTML output
 # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
@@ -1310,7 +1412,7 @@ CHM_FILE               =
 HHC_LOCATION           =

 # The GENERATE_CHI flag controls if a separate .chi index file is generated
-# (YES) or that it should be included in the master .chm file (NO).
+# (YES) or that it should be included in the main .chm file (NO).
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTMLHELP is set to YES.

@@ -1355,7 +1457,8 @@ QCH_FILE               =

 # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
 # Project output. For more information please see Qt Help Project / Namespace
-# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace).
+# (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
 # The default value is: org.doxygen.Project.
 # This tag requires that the tag GENERATE_QHP is set to YES.

@@ -1363,8 +1466,8 @@ QHP_NAMESPACE          = org.doxygen.Project

 # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
 # Help Project output. For more information please see Qt Help Project / Virtual
-# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual-
-# folders).
+# Folders (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders).
 # The default value is: doc.
 # This tag requires that the tag GENERATE_QHP is set to YES.

@@ -1372,30 +1475,30 @@ QHP_VIRTUAL_FOLDER     = doc

 # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
 # filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
-# filters).
+# Filters (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
 # This tag requires that the tag GENERATE_QHP is set to YES.

 QHP_CUST_FILTER_NAME   =

 # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
 # custom filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
-# filters).
+# Filters (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
 # This tag requires that the tag GENERATE_QHP is set to YES.

 QHP_CUST_FILTER_ATTRS  =

 # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
 # project's filter section matches. Qt Help Project / Filter Attributes (see:
-# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes).
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes).
 # This tag requires that the tag GENERATE_QHP is set to YES.

 QHP_SECT_FILTER_ATTRS  =

-# The QHG_LOCATION tag can be used to specify the location of Qt's
-# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
-# generated .qhp file.
+# The QHG_LOCATION tag can be used to specify the location (absolute path
+# including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to
+# run qhelpgenerator on the generated .qhp file.
 # This tag requires that the tag GENERATE_QHP is set to YES.

 QHG_LOCATION           =
@@ -1472,6 +1575,17 @@ TREEVIEW_WIDTH         = 250

 EXT_LINKS_IN_WINDOW    = NO

+# If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg
+# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see
+# https://inkscape.org) to generate formulas as SVG images instead of PNGs for
+# the HTML output. These images will generally look nicer at scaled resolutions.
+# Possible values are: png (the default) and svg (looks nicer but requires the
+# pdf2svg or inkscape tool).
+# The default value is: png.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FORMULA_FORMAT    = png
+
 # Use this tag to change the font size of LaTeX formulas included as images in
 # the HTML documentation. When you change the font size after a successful
 # doxygen run you need to manually remove any form_*.png images from the HTML
@@ -1481,7 +1595,7 @@ EXT_LINKS_IN_WINDOW    = NO

 FORMULA_FONTSIZE       = 10

-# Use the FORMULA_TRANPARENT tag to determine whether or not the images
+# Use the FORMULA_TRANSPARENT tag to determine whether or not the images
 # generated for formulas are transparent PNGs. Transparent PNGs are not
 # supported properly for IE 6.0, but are supported on all modern browsers.
 #
@@ -1492,8 +1606,14 @@ FORMULA_FONTSIZE       = 10

 FORMULA_TRANSPARENT    = YES

+# The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands
+# to create new LaTeX commands to be used in formulas as building blocks. See
+# the section "Including formulas" for details.
+
+FORMULA_MACROFILE      =
+
 # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see
-# http://www.mathjax.org) which uses client side Javascript for the rendering
+# https://www.mathjax.org) which uses client side JavaScript for the rendering
 # instead of using pre-rendered bitmaps. Use this if you do not have LaTeX
 # installed or if you want to formulas look prettier in the HTML output. When
 # enabled you may also need to install MathJax separately and configure the path
@@ -1505,7 +1625,7 @@ USE_MATHJAX            = NO

 # When MathJax is enabled you can set the default output format to be used for
 # the MathJax output. See the MathJax site (see:
-# http://docs.mathjax.org/en/latest/output.html) for more details.
+# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details.
 # Possible values are: HTML-CSS (which is slower, but has the best
 # compatibility), NativeMML (i.e. MathML) and SVG.
 # The default value is: HTML-CSS.
@@ -1520,8 +1640,8 @@ MATHJAX_FORMAT         = HTML-CSS
 # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
 # Content Delivery Network so you can quickly see the result without installing
 # MathJax. However, it is strongly recommended to install a local copy of
-# MathJax from http://www.mathjax.org before deployment.
-# The default value is: http://cdn.mathjax.org/mathjax/latest.
+# MathJax from https://www.mathjax.org before deployment.
+# The default value is: https://cdn.jsdelivr.net/npm/mathjax@2.
 # This tag requires that the tag USE_MATHJAX is set to YES.

 MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest
@@ -1535,7 +1655,8 @@ MATHJAX_EXTENSIONS     =

 # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
 # of code that will be used on startup of the MathJax code. See the MathJax site
-# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
+# (see:
+# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an
 # example see the documentation.
 # This tag requires that the tag USE_MATHJAX is set to YES.

@@ -1563,7 +1684,7 @@ MATHJAX_CODEFILE       =
 SEARCHENGINE           = NO

 # When the SERVER_BASED_SEARCH tag is enabled the search engine will be
-# implemented using a web server instead of a web client using Javascript. There
+# implemented using a web server instead of a web client using JavaScript. There
 # are two flavors of web server based searching depending on the EXTERNAL_SEARCH
 # setting. When disabled, doxygen will generate a PHP script for searching and
 # an index file used by the script. When EXTERNAL_SEARCH is enabled the indexing
@@ -1582,7 +1703,8 @@ SERVER_BASED_SEARCH    = NO
 #
 # Doxygen ships with an example indexer (doxyindexer) and search engine
 # (doxysearch.cgi) which are based on the open source search engine library
-# Xapian (see: http://xapian.org/).
+# Xapian (see:
+# https://xapian.org/).
 #
 # See the section "External Indexing and Searching" for details.
 # The default value is: NO.
@@ -1595,8 +1717,9 @@ EXTERNAL_SEARCH        = NO
 #
 # Doxygen ships with an example indexer (doxyindexer) and search engine
 # (doxysearch.cgi) which are based on the open source search engine library
-# Xapian (see: http://xapian.org/). See the section "External Indexing and
-# Searching" for details.
+# Xapian (see:
+# https://xapian.org/). See the section "External Indexing and Searching" for
+# details.
 # This tag requires that the tag SEARCHENGINE is set to YES.

 SEARCHENGINE_URL       =
@@ -1647,21 +1770,35 @@ LATEX_OUTPUT           = latex
 # The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
 # invoked.
 #
-# Note that when enabling USE_PDFLATEX this option is only used for generating
-# bitmaps for formulas in the HTML output, but not in the Makefile that is
-# written to the output directory.
-# The default file is: latex.
+# Note that when not enabling USE_PDFLATEX the default is latex when enabling
+# USE_PDFLATEX the default is pdflatex and when in the later case latex is
+# chosen this is overwritten by pdflatex. For specific output languages the
+# default can have been set differently, this depends on the implementation of
+# the output language.
 # This tag requires that the tag GENERATE_LATEX is set to YES.

 LATEX_CMD_NAME         = latex

 # The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate
 # index for LaTeX.
+# Note: This tag is used in the Makefile / make.bat.
+# See also: LATEX_MAKEINDEX_CMD for the part in the generated output file
+# (.tex).
 # The default file is: makeindex.
 # This tag requires that the tag GENERATE_LATEX is set to YES.

 MAKEINDEX_CMD_NAME     = makeindex

+# The LATEX_MAKEINDEX_CMD tag can be used to specify the command name to
+# generate index for LaTeX. In case there is no backslash (\) as first character
+# it will be automatically added in the LaTeX code.
+# Note: This tag is used in the generated output file (.tex).
+# See also: MAKEINDEX_CMD_NAME for the part in the Makefile / make.bat.
+# The default value is: makeindex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_MAKEINDEX_CMD    = makeindex
+
 # If the COMPACT_LATEX tag is set to YES, doxygen generates more compact LaTeX
 # documents. This may be useful for small projects and may help to save some
 # trees in general.
@@ -1677,7 +1814,7 @@ COMPACT_LATEX          = NO
 # The default value is: a4.
 # This tag requires that the tag GENERATE_LATEX is set to YES.

-PAPER_TYPE             = a4wide
+PAPER_TYPE             = a4

 # The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names
 # that should be included in the LaTeX output. The package can be specified just
@@ -1746,9 +1883,11 @@ LATEX_EXTRA_FILES      =

 PDF_HYPERLINKS         = NO

-# If the USE_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate
-# the PDF file directly from the LaTeX files. Set this option to YES, to get a
-# higher quality PDF documentation.
+# If the USE_PDFLATEX tag is set to YES, doxygen will use the engine as
+# specified with LATEX_CMD_NAME to generate the PDF file directly from the LaTeX
+# files. Set this option to YES, to get a higher quality PDF documentation.
+#
+# See also section LATEX_CMD_NAME for selecting the engine.
 # The default value is: YES.
 # This tag requires that the tag GENERATE_LATEX is set to YES.

@@ -1782,7 +1921,7 @@ LATEX_SOURCE_CODE      = NO

 # The LATEX_BIB_STYLE tag can be used to specify the style to use for the
 # bibliography, e.g. plainnat, or ieeetr. See
-# http://en.wikipedia.org/wiki/BibTeX and \cite for more info.
+# https://en.wikipedia.org/wiki/BibTeX and \cite for more info.
 # The default value is: plain.
 # This tag requires that the tag GENERATE_LATEX is set to YES.

@@ -1796,6 +1935,14 @@ LATEX_BIB_STYLE        = plain

 LATEX_TIMESTAMP        = NO

+# The LATEX_EMOJI_DIRECTORY tag is used to specify the (relative or absolute)
+# path from which the emoji images will be read. If a relative path is entered,
+# it will be relative to the LATEX_OUTPUT directory. If left blank the
+# LATEX_OUTPUT directory will be used.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EMOJI_DIRECTORY  =
+
 #---------------------------------------------------------------------------
 # Configuration options related to the RTF output
 #---------------------------------------------------------------------------
@@ -1835,9 +1982,9 @@ COMPACT_RTF            = NO

 RTF_HYPERLINKS         = NO

-# Load stylesheet definitions from file. Syntax is similar to doxygen's config
-# file, i.e. a series of assignments. You only have to provide replacements,
-# missing definitions are set to their default value.
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# configuration file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
 #
 # See also section "Doxygen usage" for information on how to generate the
 # default style sheet that doxygen normally uses.
@@ -1846,8 +1993,8 @@ RTF_HYPERLINKS         = NO
 RTF_STYLESHEET_FILE    =

 # Set optional variables used in the generation of an RTF document. Syntax is
-# similar to doxygen's config file. A template extensions file can be generated
-# using doxygen -e rtf extensionFile.
+# similar to doxygen's configuration file. A template extensions file can be
+# generated using doxygen -e rtf extensionFile.
 # This tag requires that the tag GENERATE_RTF is set to YES.

 RTF_EXTENSIONS_FILE    =
@@ -1933,6 +2080,13 @@ XML_OUTPUT             = xml

 XML_PROGRAMLISTING     = YES

+# If the XML_NS_MEMB_FILE_SCOPE tag is set to YES, doxygen will include
+# namespace members in file scope as well, matching the HTML output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_NS_MEMB_FILE_SCOPE = NO
+
 #---------------------------------------------------------------------------
 # Configuration options related to the DOCBOOK output
 #---------------------------------------------------------------------------
@@ -1965,9 +2119,9 @@ DOCBOOK_PROGRAMLISTING = NO
 #---------------------------------------------------------------------------

 # If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an
-# AutoGen Definitions (see http://autogen.sf.net) file that captures the
-# structure of the code including all documentation. Note that this feature is
-# still experimental and incomplete at the moment.
+# AutoGen Definitions (see http://autogen.sourceforge.net/) file that captures
+# the structure of the code including all documentation. Note that this feature
+# is still experimental and incomplete at the moment.
 # The default value is: NO.

 GENERATE_AUTOGEN_DEF   = NO
@@ -2079,14 +2233,13 @@ PREDEFINED             = U_EXPORT2= \
                         U_CDECL_BEGIN= \
                         U_CDECL_END= \
                         "U_NAMESPACE_BEGIN=namespace icu{" \
-                         "U_NAMESPACE_END=}" \
+                         U_NAMESPACE_END=} \
                         U_SHOW_CPLUSPLUS_API=1 \
                         U_DEFINE_LOCAL_OPEN_POINTER()= \
                         U_IN_DOXYGEN=1 \
                         U_CHAR16_IS_TYPEDEF=0 \
-                         U_CPLUSPLUS_VERSION=11 \
-                         U_WCHAR_IS_UTF16 \
-                         U_FINAL=final
+                         U_CPLUSPLUS_VERSION=17 \
+                         U_WCHAR_IS_UTF16

 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
 # tag can be used to specify a list of macro names that should be expanded. The
@@ -2130,7 +2283,7 @@ TAGFILES               =
 # tag file that is based on the input files it reads. See section "Linking to
 # external documentation" for more information about the usage of tag files.

-GENERATE_TAGFILE       = "@builddir@/doc/html/icudocs.tag"
+GENERATE_TAGFILE       = @builddir@/doc/html/icudocs.tag

 # If the ALLEXTERNALS tag is set to YES, all external class will be listed in
 # the class index. If set to NO, only the inherited external classes will be
@@ -2153,12 +2306,6 @@ EXTERNAL_GROUPS        = YES

 EXTERNAL_PAGES         = YES

-# The PERL_PATH should be the absolute path and name of the perl script
-# interpreter (i.e. the result of 'which perl').
-# The default file (with absolute path) is: /usr/bin/perl.
-
-PERL_PATH              = /usr/bin/perl
-
 #---------------------------------------------------------------------------
 # Configuration options related to the dot tool
 #---------------------------------------------------------------------------
@@ -2172,15 +2319,6 @@ PERL_PATH              = /usr/bin/perl

 CLASS_DIAGRAMS         = YES

-# You can define message sequence charts within doxygen comments using the \msc
-# command. Doxygen will then run the mscgen tool (see:
-# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the
-# documentation. The MSCGEN_PATH tag allows you to specify the directory where
-# the mscgen tool resides. If left empty the tool is assumed to be found in the
-# default search path.
-
-MSCGEN_PATH            =
-
 # You can include diagrams made with dia in doxygen documentation. Doxygen will
 # then run dia to produce the diagram and insert it in the documentation. The
 # DIA_PATH tag allows you to specify the directory where the dia binary resides.
@@ -2278,10 +2416,32 @@ UML_LOOK               = NO
 # but if the number exceeds 15, the total amount of fields shown is limited to
 # 10.
 # Minimum value: 0, maximum value: 100, default value: 10.
-# This tag requires that the tag HAVE_DOT is set to YES.
+# This tag requires that the tag UML_LOOK is set to YES.

 UML_LIMIT_NUM_FIELDS   = 10

+# If the DOT_UML_DETAILS tag is set to NO, doxygen will show attributes and
+# methods without types and arguments in the UML graphs. If the DOT_UML_DETAILS
+# tag is set to YES, doxygen will add type and arguments for attributes and
+# methods in the UML graphs. If the DOT_UML_DETAILS tag is set to NONE, doxygen
+# will not generate fields with class member information in the UML graphs. The
+# class diagrams will look similar to the default class diagrams but using UML
+# notation for the relationships.
+# Possible values are: NO, YES and NONE.
+# The default value is: NO.
+# This tag requires that the tag UML_LOOK is set to YES.
+
+DOT_UML_DETAILS        = NO
+
+# The DOT_WRAP_THRESHOLD tag can be used to set the maximum number of characters
+# to display on a single line. If the actual line length exceeds this threshold
+# significantly it will wrapped across multiple lines. Some heuristics are apply
+# to avoid ugly line breaks.
+# Minimum value: 0, maximum value: 1000, default value: 17.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_WRAP_THRESHOLD     = 17
+
 # If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
 # collaboration graphs will show the relations between templates and their
 # instances.
@@ -2473,9 +2633,11 @@ DOT_MULTI_TARGETS      = NO

 GENERATE_LEGEND        = YES

-# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate dot
+# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate
 # files that are used to generate the various graphs.
+#
+# Note: This setting is not only used for dot files but also for msc and
+# plantuml temporary files.
 # The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.

 DOT_CLEANUP            = YES
@@ -38,7 +38,7 @@ subdir = .
@TOOLS_TRUE@@TESTS_TRUE@TEST = test
@SAMPLES_TRUE@SAMPLE = samples
@TOOLS_TRUE@TOOLS = tools
-@TOOLS_TRUE@DATASUBDIR = data
+@DATA_TRUE@DATASUBDIR = data

 ## pkgconfig setup. Always have uc and i18n. Others are optional.
 ALL_PKGCONFIG_SUFFIX=uc i18n
@@ -77,7 +77,7 @@ EXTRA_DATA =

 ## List of phony targets
 .PHONY : all all-local all-recursive install install-local install-udata install-udata-files install-udata-dlls		\
-install-recursive clean clean-local clean-recursive distclean		\
+install-recursive install-manx clean clean-local clean-recursive distclean		\
 distclean-local distclean-recursive doc dist dist-local dist-recursive	\
 check check-local check-recursive clean-recursive-with-twist install-icu \
 doc install-doc tests icu4j-data icu4j-data-install update-windows-makefiles xcheck-local xcheck-recursive xperf xcheck xperf-recursive \
@@ -88,9 +88,9 @@ check-exhaustive check-exhaustive-local check-exhaustive-recursive releaseDist

 ## List of standard targets
 all: all-local all-recursive
-install: install-recursive install-local
+install: install-recursive
 clean: clean-recursive-with-twist clean-local
-distclean : distclean-recursive distclean-local
+distclean : distclean-recursive
 dist: dist-recursive
 check: all check-recursive
 check-recursive: all
@@ -133,7 +133,7 @@ doc/html/index.html: Doxyfile $(wildcard ./common/unicode/platform.h $(srcdir)/c
 Doxyfile: $(srcdir)/Doxyfile.in
 	CONFIG_FILES=$@ CONFIG_HEADERS= $(SHELL) ./config.status

-$(DOCZIP): doc
+$(DOCZIP): doc-searchengine
 	-$(RMV) $(DOCZIP)
 	( cd doc/html ; zip -r ../../$(DOCZIP) * )
 endif
@@ -210,7 +210,7 @@ endif
 ifeq ($(DOXYGEN),)
 install-doc:
 else
-install-doc: doc
+install-doc: doc-searchengine
 	$(RM) -r $(DESTDIR)$(docdir)/$(docsubdir)
 	$(MKINSTALLDIRS) $(DESTDIR)$(docdir)/$(docsubdir)
 	$(INSTALL_DATA) $(docfiles) $(DESTDIR)$(docdir)/$(docsubdir)
@@ -244,7 +244,7 @@ xcheck-local: $(top_builddir)/config/icu-config $(top_builddir)/config/Makefile.
 	@echo verifying that icu-config --selfcheck can operate
 	@test "passed" = "$(shell $(top_builddir)/config/icu-config --selfcheck 2>&1)" || (echo "FAIL: icu-config could not run properly." ; exit 1)
 	@echo verifying that $(MAKE) -f Makefile.inc selfcheck can operate
-	@test "passed" = "$(shell $(MAKE) --no-print-directory -f $(top_builddir)/config/Makefile.inc SELFCHECK=1 selfcheck)" || (echo "FAIL: Makefile.inc could not run properly." ; exit 1 )
+	@test "passed" = "$(shell MAKEFLAGS= $(MAKE) --no-print-directory -f $(top_builddir)/config/Makefile.inc SELFCHECK=1 selfcheck)" || (echo "FAIL: Makefile.inc could not run properly." ; exit 1 )
 	@echo "PASS: config selfcheck OK"

 #$(srcdir)/configure : $(srcdir)/configure.ac $(top_srcdir)/aclocal.m4
@@ -279,24 +279,36 @@ config/icu-uc.pc: config/icu.pc Makefile icudefs.mk
 	@cat config/icu.pc > $@
 	@echo "Description: $(PACKAGE_ICU_DESCRIPTION): Common and Data libraries" >> $@
 	@echo "Name: $(PACKAGE)-uc" >> $@
-	@echo "Libs:" '-L$${libdir}' "${ICULIBS_UC}" "${ICULIBS_DT}" >> $@
-	@echo "Libs.private:" '$${baselibs}' >> $@
+ifeq ($(ENABLE_SHARED),)
+	@echo "Libs:" '-L$${libdir}' "${ICULIBS_UC}" "${ICULIBS_DT}" '$${baselibs}' >> $@
+else
+	@echo "Libs:" '-L$${libdir}' "${ICULIBS_UC}" >> $@
+	@echo "Libs.private:" "${ICULIBS_DT}" '$${baselibs}' >> $@
+endif
 	@echo $@ updated.

 config/icu-i18n.pc: config/icu.pc Makefile icudefs.mk
 	@cat config/icu.pc > $@
 	@echo "Description: $(PACKAGE_ICU_DESCRIPTION): Internationalization library" >> $@
 	@echo "Name: $(PACKAGE)-i18n" >> $@
+ifeq ($(ENABLE_SHARED),)
 	@echo "Requires: icu-uc" >> $@
-	@echo "Libs:" "${ICULIBS_I18N}" >> $@
+else
+	@echo "Requires.private: icu-uc" >> $@
+endif
+	@echo "Libs:" '-L$${libdir}' "${ICULIBS_I18N}" >> $@
 	@echo $@ updated.

 config/icu-io.pc: config/icu.pc Makefile icudefs.mk
 	@cat config/icu.pc > $@
 	@echo "Description: $(PACKAGE_ICU_DESCRIPTION): Stream and I/O Library" >> $@
 	@echo "Name: $(PACKAGE)-io" >> $@
+ifeq ($(ENABLE_SHARED),)
 	@echo "Requires: icu-i18n" >> $@
-	@echo "Libs:" "${ICULIBS_IO}" >> $@
+else
+	@echo "Requires.private: icu-i18n" >> $@
+endif
+	@echo "Libs:" '-L$${libdir}' "${ICULIBS_IO}" >> $@
 	@echo $@ updated.

 ICULEHB_LIBS=@ICULEHB_LIBS@
@@ -310,12 +322,20 @@ config/icu-lx.pc: config/icu.pc Makefile icudefs.mk
 	@cat config/icu.pc > $@
 	@echo "Description: $(PACKAGE_ICU_DESCRIPTION): Paragraph Layout library $(USING_HB)" >> $@
 	@echo "Name: $(PACKAGE)-lx" >> $@
+ifeq ($(ENABLE_SHARED),)
 ifneq ($(ICULEHB_LIBS),)
 	@echo "Requires: icu-le-hb icu-uc" >> $@
 else
 	@echo "Requires: icu-le" >> $@
 endif
-	@echo "Libs:" "${ICULIBS_LX}" >> $@
+else
+ifneq ($(ICULEHB_LIBS),)
+	@echo "Requires.private: icu-le-hb icu-uc" >> $@
+else
+	@echo "Requires.private: icu-le" >> $@
+endif
+endif
+	@echo "Libs:" '-L$${libdir}' "${ICULIBS_LX}" >> $@
 	@echo $@ updated.


@@ -341,7 +361,7 @@ $(top_builddir)/config/icu-config: $(top_builddir)/Makefile $(top_srcdir)/config
 	chmod u+w $@
 	@echo "# Following from icu/icu4c/source/config/Makefile.inc" >> $@
 	LC_ALL=C $(SED) -f $(top_srcdir)/config/make2sh.sed < $(top_builddir)/config/Makefile.inc | grep -v '#M#' | uniq >> $@
-	@echo "# Following from @platform_make_fragment@" >> $@
+	@echo "# Following from @platform_make_fragment_name@" >> $@
 	LC_ALL=C $(SED) -f $(top_srcdir)/config/make2sh.sed < @platform_make_fragment@ | grep -v '#M#' | uniq >> $@
 	cat $(top_srcdir)/config/icu-config-bottom >> $@
 	chmod u-w $@
@@ -361,7 +381,7 @@ config.status: $(srcdir)/configure $(srcdir)/common/unicode/uvernum.h
 install-manx: $(MANX_FILES)
 	$(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
 ifneq ($(MANX_FILES),)
-	$(INSTALL_DATA) $? $(DESTDIR)$(mandir)/man$(SECTION)
+	$(INSTALL_DATA) $^ $(DESTDIR)$(mandir)/man$(SECTION)
 endif

 config/%.$(SECTION): $(srcdir)/config/%.$(SECTION).in
@@ -375,7 +395,7 @@ icu4j-data-install icu4j-data: all tests

 # For updating Windows makefiles

-WINDOWS_UPDATEFILES=$(srcdir)/data/makedata.mak $(shell find $(srcdir) -name '*.vcproj' -o -name '*.vcxproj')
+WINDOWS_UPDATEFILES=$(srcdir)/data/makedata.mak $(srcdir)/allinone/Build.Windows.IcuVersion.props

 WINDOWS_UPDATEFILES_SED=config/windows-update.sed

@@ -77,6 +77,7 @@ x86_64-*-cygwin)
 *-*ibm-openedition*|*-*-os390*)	icu_cv_host_frag=mh-os390 ;;
 *-*-os400*)	icu_cv_host_frag=mh-os400 ;;
 *-apple-rhapsody*)	icu_cv_host_frag=mh-darwin ;;
+powerpc*-apple-darwin*)	icu_cv_host_frag=mh-darwin-ppc ;;
 *-apple-darwin*)	icu_cv_host_frag=mh-darwin ;;
 *-*-beos)       icu_cv_host_frag=mh-beos ;; 
 *-*-haiku)      icu_cv_host_frag=mh-haiku ;; 
@@ -448,7 +449,7 @@ AC_DEFUN([AC_CHECK_64BIT_LIBS],
 AC_DEFUN([AC_CHECK_STRICT_COMPILE],
 [
    AC_MSG_CHECKING([whether strict compiling is on])
-    AC_ARG_ENABLE(strict,[  --enable-strict         compile with strict compiler options [default=yes]], [
+    AC_ARG_ENABLE(strict,[  --disable-strict        do not compile with strict compiler options], [
        if test "$enableval" = no
        then
            ac_use_strict_options=no
@@ -462,17 +463,6 @@ AC_DEFUN([AC_CHECK_STRICT_COMPILE],
    then
        if test "$GCC" = yes
        then
-            case "${host}" in
-            *-*-solaris*)
-                # Don't use -std=c11 on Solaris because of timezone check fails
-                ;;
-            *)
-                # Do not use -ansi. It limits us to C90, and it breaks some platforms.
-                # We use -std=c11 to disable the gnu99 defaults and its associated warnings
-                CFLAGS="$CFLAGS -std=c11"
-                ;;
-            esac
-            
            CFLAGS="$CFLAGS -Wall -pedantic -Wshadow -Wpointer-arith -Wmissing-prototypes -Wwrite-strings"

            # Suppress clang C warnings:
@@ -1,6 +1,6 @@
-# generated automatically by aclocal 1.15 -*- Autoconf -*-
+# generated automatically by aclocal 1.16.5 -*- Autoconf -*-

-# Copyright (C) 1996-2014 Free Software Foundation, Inc.
+# Copyright (C) 1996-2021 Free Software Foundation, Inc.

 # This file is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
@@ -12,9 +12,9 @@
 # PARTICULAR PURPOSE.

 m4_ifndef([AC_CONFIG_MACRO_DIRS], [m4_defun([_AM_CONFIG_MACRO_DIRS], [])m4_defun([AC_CONFIG_MACRO_DIRS], [_AM_CONFIG_MACRO_DIRS($@)])])
-dnl pkg.m4 - Macros to locate and utilise pkg-config.   -*- Autoconf -*-
-dnl serial 11 (pkg-config-0.29.1)
-dnl
+# pkg.m4 - Macros to locate and use pkg-config.   -*- Autoconf -*-
+# serial 12 (pkg-config-0.29.2)
+
 dnl Copyright © 2004 Scott James Remnant <scott@netsplit.com>.
 dnl Copyright © 2012-2015 Dan Nicholson <dbn.lists@gmail.com>
 dnl
@@ -55,7 +55,7 @@ dnl
 dnl See the "Since" comment for each macro you use to see what version
 dnl of the macros you require.
 m4_defun([PKG_PREREQ],
-[m4_define([PKG_MACROS_VERSION], [0.29.1])
+[m4_define([PKG_MACROS_VERSION], [0.29.2])
 m4_if(m4_version_compare(PKG_MACROS_VERSION, [$1]), -1,
    [m4_fatal([pkg.m4 version $1 or higher is required but ]PKG_MACROS_VERSION[ found])])
 ])dnl PKG_PREREQ
@@ -100,7 +100,7 @@ dnl Check to see whether a particular set of modules exists. Similar to
 dnl PKG_CHECK_MODULES(), but does not set variables or print errors.
 dnl
 dnl Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG])
-dnl only at the first occurence in configure.ac, so if the first place
+dnl only at the first occurrence in configure.ac, so if the first place
 dnl it's called might be skipped (such as if it is within an "if", you
 dnl have to call PKG_CHECK_EXISTS manually
 AC_DEFUN([PKG_CHECK_EXISTS],
@@ -156,7 +156,7 @@ AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl
 AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl

 pkg_failed=no
-AC_MSG_CHECKING([for $1])
+AC_MSG_CHECKING([for $2])

 _PKG_CONFIG([$1][_CFLAGS], [cflags], [$2])
 _PKG_CONFIG([$1][_LIBS], [libs], [$2])
@@ -166,17 +166,17 @@ and $1[]_LIBS to avoid the need to call pkg-config.
 See the pkg-config man page for more details.])

 if test $pkg_failed = yes; then
-   	AC_MSG_RESULT([no])
+        AC_MSG_RESULT([no])
        _PKG_SHORT_ERRORS_SUPPORTED
        if test $_pkg_short_errors_supported = yes; then
-	        $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1`
-        else 
-	        $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1`
+                $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1`
+        else
+                $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1`
        fi
-	# Put the nasty error message in config.log where it belongs
-	echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD
+        # Put the nasty error message in config.log where it belongs
+        echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD

-	m4_default([$4], [AC_MSG_ERROR(
+        m4_default([$4], [AC_MSG_ERROR(
 [Package requirements ($2) were not met:

 $$1_PKG_ERRORS
@@ -187,8 +187,8 @@ installed software in a non-standard prefix.
 _PKG_TEXT])[]dnl
        ])
 elif test $pkg_failed = untried; then
-     	AC_MSG_RESULT([no])
-	m4_default([$4], [AC_MSG_FAILURE(
+        AC_MSG_RESULT([no])
+        m4_default([$4], [AC_MSG_FAILURE(
 [The pkg-config script could not be found or is too old.  Make sure it
 is in your PATH or set the PKG_CONFIG environment variable to the full
 path to pkg-config.
@@ -198,10 +198,10 @@ _PKG_TEXT
 To get pkg-config, see <http://pkg-config.freedesktop.org/>.])[]dnl
        ])
 else
-	$1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS
-	$1[]_LIBS=$pkg_cv_[]$1[]_LIBS
+        $1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS
+        $1[]_LIBS=$pkg_cv_[]$1[]_LIBS
        AC_MSG_RESULT([yes])
-	$3
+        $3
 fi[]dnl
 ])dnl PKG_CHECK_MODULES

@@ -288,5 +288,73 @@ AS_VAR_COPY([$1], [pkg_cv_][$1])
 AS_VAR_IF([$1], [""], [$5], [$4])dnl
 ])dnl PKG_CHECK_VAR

+dnl PKG_WITH_MODULES(VARIABLE-PREFIX, MODULES,
+dnl   [ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND],
+dnl   [DESCRIPTION], [DEFAULT])
+dnl ------------------------------------------
+dnl
+dnl Prepare a "--with-" configure option using the lowercase
+dnl [VARIABLE-PREFIX] name, merging the behaviour of AC_ARG_WITH and
+dnl PKG_CHECK_MODULES in a single macro.
+AC_DEFUN([PKG_WITH_MODULES],
+[
+m4_pushdef([with_arg], m4_tolower([$1]))
+
+m4_pushdef([description],
+           [m4_default([$5], [build with ]with_arg[ support])])
+
+m4_pushdef([def_arg], [m4_default([$6], [auto])])
+m4_pushdef([def_action_if_found], [AS_TR_SH([with_]with_arg)=yes])
+m4_pushdef([def_action_if_not_found], [AS_TR_SH([with_]with_arg)=no])
+
+m4_case(def_arg,
+            [yes],[m4_pushdef([with_without], [--without-]with_arg)],
+            [m4_pushdef([with_without],[--with-]with_arg)])
+
+AC_ARG_WITH(with_arg,
+     AS_HELP_STRING(with_without, description[ @<:@default=]def_arg[@:>@]),,
+    [AS_TR_SH([with_]with_arg)=def_arg])
+
+AS_CASE([$AS_TR_SH([with_]with_arg)],
+            [yes],[PKG_CHECK_MODULES([$1],[$2],$3,$4)],
+            [auto],[PKG_CHECK_MODULES([$1],[$2],
+                                        [m4_n([def_action_if_found]) $3],
+                                        [m4_n([def_action_if_not_found]) $4])])
+
+m4_popdef([with_arg])
+m4_popdef([description])
+m4_popdef([def_arg])
+
+])dnl PKG_WITH_MODULES
+
+dnl PKG_HAVE_WITH_MODULES(VARIABLE-PREFIX, MODULES,
+dnl   [DESCRIPTION], [DEFAULT])
+dnl -----------------------------------------------
+dnl
+dnl Convenience macro to trigger AM_CONDITIONAL after PKG_WITH_MODULES
+dnl check._[VARIABLE-PREFIX] is exported as make variable.
+AC_DEFUN([PKG_HAVE_WITH_MODULES],
+[
+PKG_WITH_MODULES([$1],[$2],,,[$3],[$4])
+
+AM_CONDITIONAL([HAVE_][$1],
+               [test "$AS_TR_SH([with_]m4_tolower([$1]))" = "yes"])
+])dnl PKG_HAVE_WITH_MODULES
+
+dnl PKG_HAVE_DEFINE_WITH_MODULES(VARIABLE-PREFIX, MODULES,
+dnl   [DESCRIPTION], [DEFAULT])
+dnl ------------------------------------------------------
+dnl
+dnl Convenience macro to run AM_CONDITIONAL and AC_DEFINE after
+dnl PKG_WITH_MODULES check. HAVE_[VARIABLE-PREFIX] is exported as make
+dnl and preprocessor variable.
+AC_DEFUN([PKG_HAVE_DEFINE_WITH_MODULES],
+[
+PKG_HAVE_WITH_MODULES([$1],[$2],[$3],[$4])
+
+AS_IF([test "$AS_TR_SH([with_]m4_tolower([$1]))" = "yes"],
+        [AC_DEFINE([HAVE_][$1], 1, [Enable ]m4_tolower([$1])[ support])])
+])dnl PKG_HAVE_DEFINE_WITH_MODULES
+
 m4_include([config/m4/icu-conditional.m4])
 m4_include([acinclude.m4])
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html -->
+<Project ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <!-- This file is used to set the ICU Major Version number, which is used as a suffix on various file names in other Visual Studio projects. -->
+  <PropertyGroup>
+    <IcuMajorVersion>78</IcuMajorVersion>
+  </PropertyGroup>
+</Project>
@@ -7,14 +7,12 @@
    <ClCompile>
      <!-- ICU does not use exceptions in library code. -->
      <PreprocessorDefinitions>
+        $(DefineConstants);
        _HAS_EXCEPTIONS=0;
        %(PreprocessorDefinitions)
      </PreprocessorDefinitions>
    </ClCompile>
  </ItemDefinitionGroup>
-  <PropertyGroup>
-    <!-- Disable MSBuild warning about Linker OutputFile. -->
-    <!-- Ex: MSBuild complains that the common project creates "icuuc62.dll" rather than "common.dll". However, this is intentional. -->
-    <MSBuildWarningsAsMessages>MSB8012</MSBuildWarningsAsMessages>
-  </PropertyGroup>
+  <!-- The following import will set various settings for compiler Warnings and Errors. -->
+  <Import Project="Build.Windows.Library.WarningSettings.ProjectConfiguration.props" />
 </Project>
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Copyright (C) 2018 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html -->
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <!-- This file is used to set compiler Warning configurations that are common to *all* ICU library code (common, i18n, and io). -->
+  <!-- Note: These options are for *all* configurations for *all* library projects.  -->
+  <ItemDefinitionGroup>
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <!--
+        We treat the following Warnings as Errors in the ICU library code, in order to catch/prevent
+        issues like ICU-20209, ICU-20157, ICU-13816 and others.
+
+          C4251 Need to DLL-Export template instantiations.
+          C4661 No suitable definition provided for explicit template instantiation request.
+          C4715 Not all control paths return a value.
+          C4706 Assignment within conditional expression.
+          C4005 Macro redefinition.
+          C4068 Unknown pragma.
+          C4267 Conversion from size_t to type, possible loss of data.
+          C4910 __declspec(dllexport) and extern are incompatible on an explicit instantiation.
+          C4003 Not enough parameters for macro.
+      -->
+      <TreatSpecificWarningsAsErrors>4251;4661;4715;4706;4005;4068;4267;4910;4003;%(TreatSpecificWarningsAsErrors)</TreatSpecificWarningsAsErrors>
+    </ClCompile>
+  </ItemDefinitionGroup>
+</Project>
@@ -0,0 +1,68 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html -->
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <!-- This file is used to set configuration option for all projects. -->
+
+  <!-- Automatic PlatformToolset version selection. -->
+  <!-- If there is no DefaultPlatformToolset set, we will try to detect the version based on version of the build tools. -->
+  <PropertyGroup>
+    <BuildToolVersion>$(VisualStudioVersion)</BuildToolVersion>
+    <!-- Only use the MSBuildToolsVersion if we don't have the VisualStudioVersion and MSBuildToolsVersion is set to something other than Current. -->
+    <BuildToolVersion Condition="'$(BuildToolVersion)'=='' and '$(MSBuildToolsVersion)' != 'Current'">$(MSBuildToolsVersion)</BuildToolVersion>
+    <!-- Version Note:
+      v140 is the Visual Studio 2015 toolset. (14.0)
+      v141 is the Visual Studio 2017 toolset. (15.0)
+      v142 is the Visual Studio 2019 toolset. (16.0)
+      v143 is the Visual Studio 2022 toolset. (17.0)
+    -->
+    <AutoDetectedPlatformToolset Condition="'$(BuildToolVersion)'=='14.0'">v140</AutoDetectedPlatformToolset>
+    <AutoDetectedPlatformToolset Condition="'$(BuildToolVersion)'=='15.0'">v141</AutoDetectedPlatformToolset>
+    <AutoDetectedPlatformToolset Condition="'$(BuildToolVersion)'=='16.0'">v142</AutoDetectedPlatformToolset>
+    <AutoDetectedPlatformToolset Condition="'$(BuildToolVersion)'=='17.0'">v143</AutoDetectedPlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Label="EmptyDefaultPlatformToolset">
+    <DefaultPlatformToolset Condition=" '$(DefaultPlatformToolset)' == '' ">$(AutoDetectedPlatformToolset)</DefaultPlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Label="PlatformToolset">
+    <PlatformToolset>$(DefaultPlatformToolset)</PlatformToolset>
+  </PropertyGroup>
+
+  <!-- This is the default SDK target. -->
+  <!--
+    If not already set, use the latest installed version of the Windows 10 SDK.
+    The Windows 10 SDK is backwards compatible to Windows 7, as long as WINVER and _WIN32_WINNT are set before compiling.
+    Note:
+      - With VS2019, VS2022 using a value of "10.0" means that it will use the latest installed version.
+      - With VS2017, we need to manually detect the latest SDK version from the registry.
+      - With VS2015, use the Windows 8.1 SDK.
+  -->
+  <PropertyGroup Condition="'$(WindowsTargetPlatformVersion)'=='' and ('$(PlatformToolset)'=='v142' or '$(PlatformToolset)'=='v143')">
+    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(WindowsTargetPlatformVersion)'=='' and ('$(PlatformToolset)'=='v141' or '$(AutodetectWin10SDK)'=='true')">
+    <!-- Detect the SDK version. -->
+    <WindowsSdkInstallFolder_10 Condition="'$(WindowsSdkInstallFolder_10)' == ''">$(Registry:HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Microsoft SDKs\Windows\v10.0@InstallationFolder)</WindowsSdkInstallFolder_10>
+    <WindowsSdkInstallFolder_10 Condition="'$(WindowsSdkInstallFolder_10)' == ''">$(Registry:HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\Microsoft SDKs\Windows\v10.0@InstallationFolder)</WindowsSdkInstallFolder_10>
+    <WindowsTargetPlatformVersion_10 Condition="'$(WindowsTargetPlatformVersion_10)' == ''">$(Registry:HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Microsoft SDKs\Windows\v10.0@ProductVersion)</WindowsTargetPlatformVersion_10>
+    <WindowsTargetPlatformVersion_10 Condition="'$(WindowsTargetPlatformVersion_10)' == ''">$(Registry:HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\Microsoft SDKs\Windows\v10.0@ProductVersion)</WindowsTargetPlatformVersion_10>
+    <!-- Sometimes the version in the registry has the '.0' suffix, and sometimes it doesn't. Check and add it. -->
+    <WindowsTargetPlatformVersion_10 Condition="'$(WindowsTargetPlatformVersion_10)' != '' and !$(WindowsTargetPlatformVersion_10.EndsWith('.0'))">$(WindowsTargetPlatformVersion_10).0</WindowsTargetPlatformVersion_10>
+    <!-- Set the default. -->
+    <WindowsTargetPlatformVersion>$(WindowsTargetPlatformVersion_10)</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(WindowsTargetPlatformVersion)'=='' and '$(PlatformToolset)'=='v140'">
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <!-- Disable MSBuild warning about Linker OutputFile. -->
+  <PropertyGroup>
+    <!-- For example: MSBuild complains that the common project creates "icuuc62.dll" rather than "common.dll". However, this is intentional. -->
+    <MSBuildWarningsAsMessages>MSB8012</MSBuildWarningsAsMessages>
+  </PropertyGroup>
+  
+  <!-- This enables outputting the source code line when an error occurs (to make it easier to see what/where the issue is). -->
+  <ItemDefinitionGroup>
+    <ClCompile>
+      <DiagnosticsFormat>Caret</DiagnosticsFormat>
+    </ClCompile>
+  </ItemDefinitionGroup>
+</Project>
@@ -1,7 +1,9 @@
 <?xml version="1.0" encoding="utf-8"?>
 <!-- Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html -->
-<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+<Project ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <!-- This file is used to set default configuration options for all non-UWP Visual Studio projects. -->
+  <!-- The following import will set the ICU Major Version number. -->
+  <Import Project="Build.Windows.IcuVersion.props" />
  <!-- These are the default project configurations for building. -->
  <ItemGroup Label="ProjectConfigurations">
    <ProjectConfiguration Include="Debug|Win32">
@@ -12,6 +14,14 @@
      <Configuration>Debug</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|ARM">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|ARM64">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
    <ProjectConfiguration Include="Release|Win32">
      <Configuration>Release</Configuration>
      <Platform>Win32</Platform>
@@ -20,18 +30,45 @@
      <Configuration>Release</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM">
+      <Configuration>Release</Configuration>
+      <Platform>ARM</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM64">
+      <Configuration>Release</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
  </ItemGroup>
-  <PropertyGroup>
-    <!-- This is the version of the MSVC tool-set to use. -->
-    <!-- v140 is the Visual Studio 2015 toolset. -->
-    <!-- v141 is the Visual Studio 2017 toolset. -->
-    <PlatformToolset>v141</PlatformToolset>
+  <!-- The following import will set the PlatformToolset configuration. -->
+  <Import Project="Build.Windows.PlatformToolset.props" />
+  <!-- The following PropertyGroups are used to set the binary and lib output locations -->
+  <PropertyGroup Condition="'$(Platform)'=='Win32'">
+    <IcuBinOutputDir>bin</IcuBinOutputDir>
+    <IcuLibOutputDir>lib</IcuLibOutputDir>
  </PropertyGroup>
-  <PropertyGroup>
-    <!-- This is the default SDK target. -->
-    <!-- Note that the Windows 8.1 SDK is backwards compatible down-level to Windows 7, so
-         setting this to 8.1 does not actually imply targeting Windows 8.1. -->
-    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  <PropertyGroup Condition="'$(Platform)'=='x64'">
+    <IcuBinOutputDir>bin64</IcuBinOutputDir>
+    <IcuLibOutputDir>lib64</IcuLibOutputDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Platform)'=='ARM'">
+    <IcuBinOutputDir>binARM</IcuBinOutputDir>
+    <IcuLibOutputDir>libARM</IcuLibOutputDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Platform)'=='ARM64'">
+    <IcuBinOutputDir>binARM64</IcuBinOutputDir>
+    <IcuLibOutputDir>libARM64</IcuLibOutputDir>
+  </PropertyGroup>
+  <!-- Desktop ARM/ARM64 support requires a different version of the Windows SDK -->
+  <!--
+      Note: This version must match the version below in the ARM64 section for AdditionalLibraryDirectories
+  -->
+  <PropertyGroup Condition="'$(Platform)'=='ARM'">
+    <WindowsTargetPlatformVersion>10.0.22621.0</WindowsTargetPlatformVersion>
+    <WindowsSDKDesktopARMSupport>true</WindowsSDKDesktopARMSupport>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Platform)'=='ARM64'">
+    <WindowsTargetPlatformVersion>10.0.22621.0</WindowsTargetPlatformVersion>
+    <WindowsSDKDesktopARM64Support>true</WindowsSDKDesktopARM64Support>
  </PropertyGroup>
  <PropertyGroup>
    <!-- We need to explicitly set the target version to Windows 7. -->
@@ -59,12 +96,17 @@
      <AdditionalOptions>/utf-8 %(AdditionalOptions)</AdditionalOptions>
      <!-- Enable parallel compilation for faster builds. -->
      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <!-- Set the C/C++ versions supported. -->
+      <LanguageStandard Condition="'$(OverrideLanguageStandard)'==''">stdcpp17</LanguageStandard>
+      <LanguageStandard Condition="'$(OverrideLanguageStandard)'!=''">$(OverrideLanguageStandard)</LanguageStandard>
+      <LanguageStandard_C>stdc11</LanguageStandard_C>
    </ClCompile>
    <ResourceCompile>
      <Culture>0x0409</Culture>
    </ResourceCompile>
    <Link>
      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <RandomizedBaseAddress>true</RandomizedBaseAddress>
    </Link>
  </ItemDefinitionGroup>
  <!-- Options that are common to all 'Release' configurations for *all* projects. -->
@@ -75,12 +117,15 @@
    <ClCompile>
      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <StringPooling>true</StringPooling>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
    </ClCompile>
    <ResourceCompile>
      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </ResourceCompile>
    <Link>
      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
    </Link>
  </ItemDefinitionGroup>
  <!-- Options that are common to all 'Debug' configurations for *all* projects. -->
@@ -126,4 +171,36 @@
      <TargetMachine>MachineX64</TargetMachine>
    </Link>
  </ItemDefinitionGroup>
+  <!-- Options that are common to all ARM 32-bit configurations for *all* projects. -->
+  <ItemDefinitionGroup Condition="'$(Platform)'=='ARM'">
+    <Midl>
+      <TargetEnvironment>ARM</TargetEnvironment>
+    </Midl>
+    <ClCompile>
+      <PreprocessorDefinitions>ARM;WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <TargetMachine>MachineARM</TargetMachine>
+      <!-- The ARM64 Desktop SDK doesn't include this by default -->
+      <AdditionalDependencies>kernel32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <!-- Note: This needs to match the same version as WindowsTargetPlatformVersion for ARM -->
+      <AdditionalLibraryDirectories>C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\arm</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <!-- Options that are common to all ARM 64-bit configurations for *all* projects. -->
+  <ItemDefinitionGroup Condition="'$(Platform)'=='ARM64'">
+    <Midl>
+      <TargetEnvironment>ARM64</TargetEnvironment>
+    </Midl>
+    <ClCompile>
+      <PreprocessorDefinitions>ARM64;WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <TargetMachine>MachineARM64</TargetMachine>
+      <!-- The ARM64 Desktop SDK doesn't include this by default -->
+      <AdditionalDependencies>kernel32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <!-- Note: This needs to match the same version as WindowsTargetPlatformVersion for ARM64 -->
+      <AdditionalLibraryDirectories>C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\arm64</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
 </Project>
@@ -1,22 +1,37 @@
 <?xml version="1.0" encoding="utf-8"?>
 <!-- Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html -->
-<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+<Project ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <!-- The following import will set the ICU Major Version number. -->
+  <Import Project="Build.Windows.IcuVersion.props" />
  <!-- This file is used to set common configuration options for all *_uwp projects. -->
  <PropertyGroup>
-    <!-- If not already set, use this version of the Win10 SDK -->
-    <WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
-    <!-- If not already set, set the minimum Win10 SDK version to TH1/RTM -->
-    <WindowsTargetPlatformMinVersion>10.0.10240.0</WindowsTargetPlatformMinVersion>
-    
    <MinimumVisualStudioVersion>14.0</MinimumVisualStudioVersion>
    <AppContainerApplication>true</AppContainerApplication>
    <ApplicationType>Windows Store</ApplicationType>
    <ApplicationTypeRevision>10.0</ApplicationTypeRevision>
  </PropertyGroup>
  <PropertyGroup>
-    <!-- This is the version of the MSVC tool-set to use. -->
-    <!-- v141 is the Visual Studio 2017 toolset. -->
-    <PlatformToolset>v141</PlatformToolset>
+    <!-- Set the minimum Windows 10 SDK version to TH1/RTM. -->
+    <WindowsTargetPlatformMinVersion>10.0.10240.0</WindowsTargetPlatformMinVersion>
+  </PropertyGroup>
+  <!-- The following import will set the PlatformToolset configuration. -->
+  <Import Project="..\allinone\Build.Windows.PlatformToolset.props" />
+  <!-- The following PropertyGroups are used to set the binary and lib output locations -->
+  <PropertyGroup Condition="'$(Platform)'=='Win32'">
+    <IcuBinOutputDir>bin32uwp</IcuBinOutputDir>
+    <IcuLibOutputDir>lib32uwp</IcuLibOutputDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Platform)'=='x64'">
+    <IcuBinOutputDir>bin64uwp</IcuBinOutputDir>
+    <IcuLibOutputDir>lib64uwp</IcuLibOutputDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Platform)'=='ARM'">
+    <IcuBinOutputDir>binARMuwp</IcuBinOutputDir>
+    <IcuLibOutputDir>libARMuwp</IcuLibOutputDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Platform)'=='ARM64'">
+    <IcuBinOutputDir>binARM64uwp</IcuBinOutputDir>
+    <IcuLibOutputDir>libARM64uwp</IcuLibOutputDir>
  </PropertyGroup>
  <ItemDefinitionGroup>
    <Midl>
@@ -30,6 +45,10 @@
        %(PreprocessorDefinitions);
        U_PLATFORM_HAS_WINUWP_API=1;
      </PreprocessorDefinitions>
+      <!-- Set the C/C++ versions supported. -->
+      <LanguageStandard Condition="'$(OverrideLanguageStandard)'==''">stdcpp17</LanguageStandard>
+      <LanguageStandard Condition="'$(OverrideLanguageStandard)'!=''">$(OverrideLanguageStandard)</LanguageStandard>
+      <LanguageStandard_C>stdc11</LanguageStandard_C>
    </ClCompile>
    <ResourceCompile>
      <PreprocessorDefinitions>
@@ -2,12 +2,6 @@ Microsoft Visual Studio Solution File, Format Version 12.00
 # Visual Studio 15
 VisualStudioVersion = 15.0.27130.2036
 MinimumVisualStudioVersion = 10.0.40219.1
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cal", "..\samples\cal\cal.vcxproj", "{F7659D77-09CF-4FE9-ACEE-927287AA9509}"
-	ProjectSection(ProjectDependencies) = postProject
-		{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
-		{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
-	EndProjectSection
-EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cintltst", "..\test\cintltst\cintltst.vcxproj", "{3D1246AE-1B32-479B-BECA-AEFA97BE2321}"
 	ProjectSection(ProjectDependencies) = postProject
 		{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
@@ -27,12 +21,6 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ctestfw", "..\tools\ctestfw
 		{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
 	EndProjectSection
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "date", "..\samples\date\date.vcxproj", "{38B5751A-C6F9-4409-950C-F4F9DA17275F}"
-	ProjectSection(ProjectDependencies) = postProject
-		{0178B127-6269-407D-B112-93877BB62776} = {0178B127-6269-407D-B112-93877BB62776}
-		{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
-	EndProjectSection
-EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "derb", "..\tools\genrb\derb.vcxproj", "{D3065ADB-8820-4CC7-9B6C-9510833961A3}"
 	ProjectSection(ProjectDependencies) = postProject
 		{C2B04507-2521-4801-BF0D-5FD79D6D518C} = {C2B04507-2521-4801-BF0D-5FD79D6D518C}
@@ -106,6 +94,12 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pkgdata", "..\tools\pkgdata
 		{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
 	EndProjectSection
 EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "icuexportdata", "..\tools\icuexportdata\icuexportdata.vcxproj", "{C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}"
+	ProjectSection(ProjectDependencies) = postProject
+		{6B231032-3CB5-4EED-9210-810D666A23A0} = {6B231032-3CB5-4EED-9210-810D666A23A0}
+		{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D} = {73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}
+	EndProjectSection
+EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "stubdata", "..\stubdata\stubdata.vcxproj", "{203EC78A-0531-43F0-A636-285439BDE025}"
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "toolutil", "..\tools\toolutil\toolutil.vcxproj", "{6B231032-3CB5-4EED-9210-810D666A23A0}"
@@ -197,335 +191,507 @@ EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|ARM = Debug|ARM
+		Debug|ARM64 = Debug|ARM64
 		Debug|Win32 = Debug|Win32
 		Debug|x64 = Debug|x64
 		Release|ARM = Release|ARM
+		Release|ARM64 = Release|ARM64
 		Release|Win32 = Release|Win32
 		Release|x64 = Release|x64
 	EndGlobalSection
 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Debug|ARM.ActiveCfg = Debug|Win32
-		{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Debug|Win32.ActiveCfg = Debug|Win32
-		{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Debug|Win32.Build.0 = Debug|Win32
-		{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Debug|x64.ActiveCfg = Debug|x64
-		{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Debug|x64.Build.0 = Debug|x64
-		{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Release|ARM.ActiveCfg = Release|Win32
-		{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Release|Win32.ActiveCfg = Release|Win32
-		{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Release|Win32.Build.0 = Release|Win32
-		{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Release|x64.ActiveCfg = Release|x64
-		{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Release|x64.Build.0 = Release|x64
-		{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Debug|ARM.ActiveCfg = Debug|Win32
+		{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Debug|ARM.ActiveCfg = Debug|ARM
+		{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Debug|ARM.Build.0 = Debug|ARM
+		{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Debug|ARM64.Build.0 = Debug|ARM64
 		{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Debug|Win32.ActiveCfg = Debug|Win32
 		{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Debug|Win32.Build.0 = Debug|Win32
 		{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Debug|x64.ActiveCfg = Debug|x64
 		{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Debug|x64.Build.0 = Debug|x64
-		{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Release|ARM.ActiveCfg = Release|Win32
+		{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Release|ARM.ActiveCfg = Release|ARM
+		{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Release|ARM.Build.0 = Release|ARM
+		{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Release|ARM64.ActiveCfg = Release|ARM64
+		{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Release|ARM64.Build.0 = Release|ARM64
 		{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Release|Win32.ActiveCfg = Release|Win32
 		{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Release|Win32.Build.0 = Release|Win32
 		{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Release|x64.ActiveCfg = Release|x64
 		{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Release|x64.Build.0 = Release|x64
-		{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Debug|ARM.ActiveCfg = Debug|Win32
+		{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Debug|ARM.ActiveCfg = Debug|ARM
+		{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Debug|ARM.Build.0 = Debug|ARM
+		{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Debug|ARM64.Build.0 = Debug|ARM64
 		{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Debug|Win32.ActiveCfg = Debug|Win32
 		{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Debug|Win32.Build.0 = Debug|Win32
 		{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Debug|x64.ActiveCfg = Debug|x64
 		{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Debug|x64.Build.0 = Debug|x64
-		{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Release|ARM.ActiveCfg = Release|Win32
+		{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Release|ARM.ActiveCfg = Release|ARM
+		{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Release|ARM.Build.0 = Release|ARM
+		{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Release|ARM64.ActiveCfg = Release|ARM64
+		{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Release|ARM64.Build.0 = Release|ARM64
 		{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Release|Win32.ActiveCfg = Release|Win32
 		{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Release|Win32.Build.0 = Release|Win32
 		{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Release|x64.ActiveCfg = Release|x64
 		{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Release|x64.Build.0 = Release|x64
-		{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Debug|ARM.ActiveCfg = Debug|Win32
+		{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Debug|ARM.ActiveCfg = Debug|ARM
+		{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Debug|ARM.Build.0 = Debug|ARM
+		{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Debug|ARM64.Build.0 = Debug|ARM64
 		{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Debug|Win32.ActiveCfg = Debug|Win32
 		{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Debug|Win32.Build.0 = Debug|Win32
 		{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Debug|x64.ActiveCfg = Debug|x64
 		{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Debug|x64.Build.0 = Debug|x64
-		{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Release|ARM.ActiveCfg = Release|Win32
+		{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Release|ARM.ActiveCfg = Release|ARM
+		{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Release|ARM.Build.0 = Release|ARM
+		{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Release|ARM64.ActiveCfg = Release|ARM64
+		{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Release|ARM64.Build.0 = Release|ARM64
 		{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Release|Win32.ActiveCfg = Release|Win32
 		{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Release|Win32.Build.0 = Release|Win32
 		{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Release|x64.ActiveCfg = Release|x64
 		{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Release|x64.Build.0 = Release|x64
-		{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Debug|ARM.ActiveCfg = Debug|Win32
-		{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Debug|Win32.ActiveCfg = Debug|Win32
-		{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Debug|Win32.Build.0 = Debug|Win32
-		{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Debug|x64.ActiveCfg = Debug|x64
-		{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Debug|x64.Build.0 = Debug|x64
-		{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Release|ARM.ActiveCfg = Release|Win32
-		{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Release|Win32.ActiveCfg = Release|Win32
-		{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Release|Win32.Build.0 = Release|Win32
-		{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Release|x64.ActiveCfg = Release|x64
-		{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Release|x64.Build.0 = Release|x64
-		{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Debug|ARM.ActiveCfg = Debug|Win32
+		{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Debug|ARM.ActiveCfg = Debug|ARM
+		{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Debug|ARM.Build.0 = Debug|ARM
+		{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Debug|ARM64.Build.0 = Debug|ARM64
 		{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Debug|Win32.ActiveCfg = Debug|Win32
 		{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Debug|Win32.Build.0 = Debug|Win32
 		{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Debug|x64.ActiveCfg = Debug|x64
 		{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Debug|x64.Build.0 = Debug|x64
-		{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Release|ARM.ActiveCfg = Release|Win32
+		{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Release|ARM.ActiveCfg = Release|ARM
+		{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Release|ARM.Build.0 = Release|ARM
+		{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Release|ARM64.ActiveCfg = Release|ARM64
+		{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Release|ARM64.Build.0 = Release|ARM64
 		{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Release|Win32.ActiveCfg = Release|Win32
 		{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Release|Win32.Build.0 = Release|Win32
 		{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Release|x64.ActiveCfg = Release|x64
 		{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Release|x64.Build.0 = Release|x64
-		{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Debug|ARM.ActiveCfg = Debug|Win32
+		{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Debug|ARM.ActiveCfg = Debug|ARM
+		{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Debug|ARM.Build.0 = Debug|ARM
+		{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Debug|ARM64.Build.0 = Debug|ARM64
 		{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Debug|Win32.ActiveCfg = Debug|Win32
 		{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Debug|Win32.Build.0 = Debug|Win32
 		{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Debug|x64.ActiveCfg = Debug|x64
 		{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Debug|x64.Build.0 = Debug|x64
-		{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Release|ARM.ActiveCfg = Release|Win32
+		{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Release|ARM.ActiveCfg = Release|ARM
+		{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Release|ARM.Build.0 = Release|ARM
+		{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Release|ARM64.ActiveCfg = Release|ARM64
+		{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Release|ARM64.Build.0 = Release|ARM64
 		{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Release|Win32.ActiveCfg = Release|Win32
 		{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Release|Win32.Build.0 = Release|Win32
 		{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Release|x64.ActiveCfg = Release|x64
 		{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Release|x64.Build.0 = Release|x64
-		{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Debug|ARM.ActiveCfg = Debug|Win32
+		{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Debug|ARM.ActiveCfg = Debug|ARM
+		{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Debug|ARM.Build.0 = Debug|ARM
+		{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Debug|ARM64.Build.0 = Debug|ARM64
 		{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Debug|Win32.ActiveCfg = Debug|Win32
 		{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Debug|Win32.Build.0 = Debug|Win32
 		{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Debug|x64.ActiveCfg = Debug|x64
 		{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Debug|x64.Build.0 = Debug|x64
-		{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Release|ARM.ActiveCfg = Release|Win32
+		{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Release|ARM.ActiveCfg = Release|ARM
+		{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Release|ARM.Build.0 = Release|ARM
+		{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Release|ARM64.ActiveCfg = Release|ARM64
+		{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Release|ARM64.Build.0 = Release|ARM64
 		{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Release|Win32.ActiveCfg = Release|Win32
 		{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Release|Win32.Build.0 = Release|Win32
 		{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Release|x64.ActiveCfg = Release|x64
 		{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Release|x64.Build.0 = Release|x64
-		{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Debug|ARM.ActiveCfg = Debug|Win32
+		{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Debug|ARM.ActiveCfg = Debug|ARM
+		{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Debug|ARM.Build.0 = Debug|ARM
+		{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Debug|ARM64.Build.0 = Debug|ARM64
 		{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Debug|Win32.ActiveCfg = Debug|Win32
 		{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Debug|Win32.Build.0 = Debug|Win32
 		{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Debug|x64.ActiveCfg = Debug|x64
 		{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Debug|x64.Build.0 = Debug|x64
-		{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Release|ARM.ActiveCfg = Release|Win32
+		{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Release|ARM.ActiveCfg = Release|ARM
+		{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Release|ARM.Build.0 = Release|ARM
+		{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Release|ARM64.ActiveCfg = Release|ARM64
+		{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Release|ARM64.Build.0 = Release|ARM64
 		{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Release|Win32.ActiveCfg = Release|Win32
 		{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Release|Win32.Build.0 = Release|Win32
 		{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Release|x64.ActiveCfg = Release|x64
 		{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Release|x64.Build.0 = Release|x64
-		{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Debug|ARM.ActiveCfg = Debug|Win32
+		{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Debug|ARM.ActiveCfg = Debug|ARM
+		{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Debug|ARM.Build.0 = Debug|ARM
+		{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Debug|ARM64.Build.0 = Debug|ARM64
 		{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Debug|Win32.ActiveCfg = Debug|Win32
 		{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Debug|Win32.Build.0 = Debug|Win32
 		{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Debug|x64.ActiveCfg = Debug|x64
 		{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Debug|x64.Build.0 = Debug|x64
-		{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Release|ARM.ActiveCfg = Release|Win32
+		{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Release|ARM.ActiveCfg = Release|ARM
+		{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Release|ARM.Build.0 = Release|ARM
+		{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Release|ARM64.ActiveCfg = Release|ARM64
+		{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Release|ARM64.Build.0 = Release|ARM64
 		{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Release|Win32.ActiveCfg = Release|Win32
 		{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Release|Win32.Build.0 = Release|Win32
 		{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Release|x64.ActiveCfg = Release|x64
 		{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Release|x64.Build.0 = Release|x64
-		{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Debug|ARM.ActiveCfg = Debug|Win32
+		{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Debug|ARM.ActiveCfg = Debug|ARM
+		{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Debug|ARM.Build.0 = Debug|ARM
+		{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Debug|ARM64.Build.0 = Debug|ARM64
 		{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Debug|Win32.ActiveCfg = Debug|Win32
 		{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Debug|Win32.Build.0 = Debug|Win32
 		{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Debug|x64.ActiveCfg = Debug|x64
 		{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Debug|x64.Build.0 = Debug|x64
-		{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Release|ARM.ActiveCfg = Release|Win32
+		{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Release|ARM.ActiveCfg = Release|ARM
+		{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Release|ARM.Build.0 = Release|ARM
+		{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Release|ARM64.ActiveCfg = Release|ARM64
+		{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Release|ARM64.Build.0 = Release|ARM64
 		{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Release|Win32.ActiveCfg = Release|Win32
 		{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Release|Win32.Build.0 = Release|Win32
 		{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Release|x64.ActiveCfg = Release|x64
 		{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Release|x64.Build.0 = Release|x64
-		{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Debug|ARM.ActiveCfg = Debug|Win32
+		{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Debug|ARM.ActiveCfg = Debug|ARM
+		{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Debug|ARM.Build.0 = Debug|ARM
+		{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Debug|ARM64.Build.0 = Debug|ARM64
 		{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Debug|Win32.ActiveCfg = Debug|Win32
 		{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Debug|Win32.Build.0 = Debug|Win32
 		{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Debug|x64.ActiveCfg = Debug|x64
 		{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Debug|x64.Build.0 = Debug|x64
-		{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Release|ARM.ActiveCfg = Release|Win32
+		{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Release|ARM.ActiveCfg = Release|ARM
+		{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Release|ARM.Build.0 = Release|ARM
+		{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Release|ARM64.ActiveCfg = Release|ARM64
+		{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Release|ARM64.Build.0 = Release|ARM64
 		{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Release|Win32.ActiveCfg = Release|Win32
 		{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Release|Win32.Build.0 = Release|Win32
 		{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Release|x64.ActiveCfg = Release|x64
 		{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Release|x64.Build.0 = Release|x64
-		{0178B127-6269-407D-B112-93877BB62776}.Debug|ARM.ActiveCfg = Debug|Win32
+		{0178B127-6269-407D-B112-93877BB62776}.Debug|ARM.ActiveCfg = Debug|ARM
+		{0178B127-6269-407D-B112-93877BB62776}.Debug|ARM.Build.0 = Debug|ARM
+		{0178B127-6269-407D-B112-93877BB62776}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{0178B127-6269-407D-B112-93877BB62776}.Debug|ARM64.Build.0 = Debug|ARM64
 		{0178B127-6269-407D-B112-93877BB62776}.Debug|Win32.ActiveCfg = Debug|Win32
 		{0178B127-6269-407D-B112-93877BB62776}.Debug|Win32.Build.0 = Debug|Win32
 		{0178B127-6269-407D-B112-93877BB62776}.Debug|x64.ActiveCfg = Debug|x64
 		{0178B127-6269-407D-B112-93877BB62776}.Debug|x64.Build.0 = Debug|x64
-		{0178B127-6269-407D-B112-93877BB62776}.Release|ARM.ActiveCfg = Release|Win32
+		{0178B127-6269-407D-B112-93877BB62776}.Release|ARM.ActiveCfg = Release|ARM
+		{0178B127-6269-407D-B112-93877BB62776}.Release|ARM.Build.0 = Release|ARM
+		{0178B127-6269-407D-B112-93877BB62776}.Release|ARM64.ActiveCfg = Release|ARM64
+		{0178B127-6269-407D-B112-93877BB62776}.Release|ARM64.Build.0 = Release|ARM64
 		{0178B127-6269-407D-B112-93877BB62776}.Release|Win32.ActiveCfg = Release|Win32
 		{0178B127-6269-407D-B112-93877BB62776}.Release|Win32.Build.0 = Release|Win32
 		{0178B127-6269-407D-B112-93877BB62776}.Release|x64.ActiveCfg = Release|x64
 		{0178B127-6269-407D-B112-93877BB62776}.Release|x64.Build.0 = Release|x64
-		{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|ARM.ActiveCfg = Debug|Win32
+		{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|ARM.ActiveCfg = Debug|ARM
+		{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|ARM.Build.0 = Debug|ARM
+		{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|ARM64.Build.0 = Debug|ARM64
 		{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|Win32.ActiveCfg = Debug|Win32
 		{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|Win32.Build.0 = Debug|Win32
 		{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|x64.ActiveCfg = Debug|x64
 		{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|x64.Build.0 = Debug|x64
-		{73632960-B3A6-464D-83A3-4B43365F19B8}.Release|ARM.ActiveCfg = Release|Win32
+		{73632960-B3A6-464D-83A3-4B43365F19B8}.Release|ARM.ActiveCfg = Release|ARM
+		{73632960-B3A6-464D-83A3-4B43365F19B8}.Release|ARM.Build.0 = Release|ARM
+		{73632960-B3A6-464D-83A3-4B43365F19B8}.Release|ARM64.ActiveCfg = Release|ARM64
+		{73632960-B3A6-464D-83A3-4B43365F19B8}.Release|ARM64.Build.0 = Release|ARM64
 		{73632960-B3A6-464D-83A3-4B43365F19B8}.Release|Win32.ActiveCfg = Release|Win32
 		{73632960-B3A6-464D-83A3-4B43365F19B8}.Release|Win32.Build.0 = Release|Win32
 		{73632960-B3A6-464D-83A3-4B43365F19B8}.Release|x64.ActiveCfg = Release|x64
 		{73632960-B3A6-464D-83A3-4B43365F19B8}.Release|x64.Build.0 = Release|x64
-		{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Debug|ARM.ActiveCfg = Debug|Win32
+		{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Debug|ARM.ActiveCfg = Debug|ARM
+		{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Debug|ARM.Build.0 = Debug|ARM
+		{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Debug|ARM64.Build.0 = Debug|ARM64
 		{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Debug|Win32.ActiveCfg = Debug|Win32
 		{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Debug|Win32.Build.0 = Debug|Win32
 		{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Debug|x64.ActiveCfg = Debug|x64
 		{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Debug|x64.Build.0 = Debug|x64
-		{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Release|ARM.ActiveCfg = Release|Win32
+		{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Release|ARM.ActiveCfg = Release|ARM
+		{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Release|ARM.Build.0 = Release|ARM
+		{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Release|ARM64.ActiveCfg = Release|ARM64
+		{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Release|ARM64.Build.0 = Release|ARM64
 		{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Release|Win32.ActiveCfg = Release|Win32
 		{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Release|Win32.Build.0 = Release|Win32
 		{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Release|x64.ActiveCfg = Release|x64
 		{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Release|x64.Build.0 = Release|x64
-		{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Debug|ARM.ActiveCfg = Debug|Win32
+		{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Debug|ARM.ActiveCfg = Debug|ARM
+		{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Debug|ARM.Build.0 = Debug|ARM
+		{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Debug|ARM64.Build.0 = Debug|ARM64
 		{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Debug|Win32.ActiveCfg = Debug|Win32
 		{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Debug|Win32.Build.0 = Debug|Win32
 		{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Debug|x64.ActiveCfg = Debug|x64
 		{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Debug|x64.Build.0 = Debug|x64
-		{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Release|ARM.ActiveCfg = Release|Win32
+		{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Release|ARM.ActiveCfg = Release|ARM
+		{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Release|ARM.Build.0 = Release|ARM
+		{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Release|ARM64.ActiveCfg = Release|ARM64
+		{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Release|ARM64.Build.0 = Release|ARM64
 		{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Release|Win32.ActiveCfg = Release|Win32
 		{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Release|Win32.Build.0 = Release|Win32
 		{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Release|x64.ActiveCfg = Release|x64
 		{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Release|x64.Build.0 = Release|x64
-		{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Debug|ARM.ActiveCfg = Debug|Win32
+		{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Debug|ARM.ActiveCfg = Debug|ARM
+		{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Debug|ARM.Build.0 = Debug|ARM
+		{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Debug|ARM64.Build.0 = Debug|ARM64
 		{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Debug|Win32.ActiveCfg = Debug|Win32
 		{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Debug|Win32.Build.0 = Debug|Win32
 		{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Debug|x64.ActiveCfg = Debug|x64
 		{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Debug|x64.Build.0 = Debug|x64
-		{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Release|ARM.ActiveCfg = Release|Win32
+		{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Release|ARM.ActiveCfg = Release|ARM
+		{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Release|ARM.Build.0 = Release|ARM
+		{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Release|ARM64.ActiveCfg = Release|ARM64
+		{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Release|ARM64.Build.0 = Release|ARM64
 		{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Release|Win32.ActiveCfg = Release|Win32
 		{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Release|Win32.Build.0 = Release|Win32
 		{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Release|x64.ActiveCfg = Release|x64
 		{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Release|x64.Build.0 = Release|x64
-		{203EC78A-0531-43F0-A636-285439BDE025}.Debug|ARM.ActiveCfg = Debug|Win32
+		{C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Debug|ARM.ActiveCfg = Debug|ARM
+		{C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Debug|ARM.Build.0 = Debug|ARM
+		{C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Debug|ARM64.Build.0 = Debug|ARM64
+		{C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Debug|Win32.ActiveCfg = Debug|Win32
+		{C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Debug|Win32.Build.0 = Debug|Win32
+		{C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Debug|x64.ActiveCfg = Debug|x64
+		{C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Debug|x64.Build.0 = Debug|x64
+		{C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Release|ARM.ActiveCfg = Release|ARM
+		{C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Release|ARM.Build.0 = Release|ARM
+		{C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Release|ARM64.ActiveCfg = Release|ARM64
+		{C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Release|ARM64.Build.0 = Release|ARM64
+		{C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Release|Win32.ActiveCfg = Release|Win32
+		{C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Release|Win32.Build.0 = Release|Win32
+		{C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Release|x64.ActiveCfg = Release|x64
+		{C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}.Release|x64.Build.0 = Release|x64
+		{203EC78A-0531-43F0-A636-285439BDE025}.Debug|ARM.ActiveCfg = Debug|ARM
+		{203EC78A-0531-43F0-A636-285439BDE025}.Debug|ARM.Build.0 = Debug|ARM
+		{203EC78A-0531-43F0-A636-285439BDE025}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{203EC78A-0531-43F0-A636-285439BDE025}.Debug|ARM64.Build.0 = Debug|ARM64
 		{203EC78A-0531-43F0-A636-285439BDE025}.Debug|Win32.ActiveCfg = Debug|Win32
 		{203EC78A-0531-43F0-A636-285439BDE025}.Debug|Win32.Build.0 = Debug|Win32
 		{203EC78A-0531-43F0-A636-285439BDE025}.Debug|x64.ActiveCfg = Debug|x64
 		{203EC78A-0531-43F0-A636-285439BDE025}.Debug|x64.Build.0 = Debug|x64
-		{203EC78A-0531-43F0-A636-285439BDE025}.Release|ARM.ActiveCfg = Release|Win32
+		{203EC78A-0531-43F0-A636-285439BDE025}.Release|ARM.ActiveCfg = Release|ARM
+		{203EC78A-0531-43F0-A636-285439BDE025}.Release|ARM.Build.0 = Release|ARM
+		{203EC78A-0531-43F0-A636-285439BDE025}.Release|ARM64.ActiveCfg = Release|ARM64
+		{203EC78A-0531-43F0-A636-285439BDE025}.Release|ARM64.Build.0 = Release|ARM64
 		{203EC78A-0531-43F0-A636-285439BDE025}.Release|Win32.ActiveCfg = Release|Win32
 		{203EC78A-0531-43F0-A636-285439BDE025}.Release|Win32.Build.0 = Release|Win32
 		{203EC78A-0531-43F0-A636-285439BDE025}.Release|x64.ActiveCfg = Release|x64
 		{203EC78A-0531-43F0-A636-285439BDE025}.Release|x64.Build.0 = Release|x64
-		{6B231032-3CB5-4EED-9210-810D666A23A0}.Debug|ARM.ActiveCfg = Debug|Win32
+		{6B231032-3CB5-4EED-9210-810D666A23A0}.Debug|ARM.ActiveCfg = Debug|ARM
+		{6B231032-3CB5-4EED-9210-810D666A23A0}.Debug|ARM.Build.0 = Debug|ARM
+		{6B231032-3CB5-4EED-9210-810D666A23A0}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{6B231032-3CB5-4EED-9210-810D666A23A0}.Debug|ARM64.Build.0 = Debug|ARM64
 		{6B231032-3CB5-4EED-9210-810D666A23A0}.Debug|Win32.ActiveCfg = Debug|Win32
 		{6B231032-3CB5-4EED-9210-810D666A23A0}.Debug|Win32.Build.0 = Debug|Win32
 		{6B231032-3CB5-4EED-9210-810D666A23A0}.Debug|x64.ActiveCfg = Debug|x64
 		{6B231032-3CB5-4EED-9210-810D666A23A0}.Debug|x64.Build.0 = Debug|x64
-		{6B231032-3CB5-4EED-9210-810D666A23A0}.Release|ARM.ActiveCfg = Release|Win32
+		{6B231032-3CB5-4EED-9210-810D666A23A0}.Release|ARM.ActiveCfg = Release|ARM
+		{6B231032-3CB5-4EED-9210-810D666A23A0}.Release|ARM.Build.0 = Release|ARM
+		{6B231032-3CB5-4EED-9210-810D666A23A0}.Release|ARM64.ActiveCfg = Release|ARM64
+		{6B231032-3CB5-4EED-9210-810D666A23A0}.Release|ARM64.Build.0 = Release|ARM64
 		{6B231032-3CB5-4EED-9210-810D666A23A0}.Release|Win32.ActiveCfg = Release|Win32
 		{6B231032-3CB5-4EED-9210-810D666A23A0}.Release|Win32.Build.0 = Release|Win32
 		{6B231032-3CB5-4EED-9210-810D666A23A0}.Release|x64.ActiveCfg = Release|x64
 		{6B231032-3CB5-4EED-9210-810D666A23A0}.Release|x64.Build.0 = Release|x64
-		{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Debug|ARM.ActiveCfg = Debug|Win32
+		{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Debug|ARM.ActiveCfg = Debug|ARM
+		{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Debug|ARM.Build.0 = Debug|ARM
+		{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Debug|ARM64.Build.0 = Debug|ARM64
 		{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Debug|Win32.ActiveCfg = Debug|Win32
 		{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Debug|Win32.Build.0 = Debug|Win32
 		{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Debug|x64.ActiveCfg = Debug|x64
 		{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Debug|x64.Build.0 = Debug|x64
-		{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Release|ARM.ActiveCfg = Release|Win32
+		{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Release|ARM.ActiveCfg = Release|ARM
+		{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Release|ARM.Build.0 = Release|ARM
+		{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Release|ARM64.ActiveCfg = Release|ARM64
+		{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Release|ARM64.Build.0 = Release|ARM64
 		{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Release|Win32.ActiveCfg = Release|Win32
 		{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Release|Win32.Build.0 = Release|Win32
 		{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Release|x64.ActiveCfg = Release|x64
 		{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Release|x64.Build.0 = Release|x64
-		{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Debug|ARM.ActiveCfg = Debug|Win32
+		{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Debug|ARM.ActiveCfg = Debug|ARM
+		{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Debug|ARM.Build.0 = Debug|ARM
+		{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Debug|ARM64.Build.0 = Debug|ARM64
 		{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Debug|Win32.ActiveCfg = Debug|Win32
 		{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Debug|Win32.Build.0 = Debug|Win32
 		{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Debug|x64.ActiveCfg = Debug|x64
 		{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Debug|x64.Build.0 = Debug|x64
-		{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Release|ARM.ActiveCfg = Release|Win32
+		{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Release|ARM.ActiveCfg = Release|ARM
+		{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Release|ARM.Build.0 = Release|ARM
+		{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Release|ARM64.ActiveCfg = Release|ARM64
+		{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Release|ARM64.Build.0 = Release|ARM64
 		{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Release|Win32.ActiveCfg = Release|Win32
 		{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Release|Win32.Build.0 = Release|Win32
 		{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Release|x64.ActiveCfg = Release|x64
 		{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Release|x64.Build.0 = Release|x64
-		{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Debug|ARM.ActiveCfg = Debug|Win32
+		{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Debug|ARM.ActiveCfg = Debug|ARM
+		{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Debug|ARM.Build.0 = Debug|ARM
+		{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Debug|ARM64.Build.0 = Debug|ARM64
 		{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Debug|Win32.ActiveCfg = Debug|Win32
 		{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Debug|Win32.Build.0 = Debug|Win32
 		{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Debug|x64.ActiveCfg = Debug|x64
 		{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Debug|x64.Build.0 = Debug|x64
-		{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Release|ARM.ActiveCfg = Release|Win32
+		{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Release|ARM.ActiveCfg = Release|ARM
+		{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Release|ARM.Build.0 = Release|ARM
+		{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Release|ARM64.ActiveCfg = Release|ARM64
+		{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Release|ARM64.Build.0 = Release|ARM64
 		{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Release|Win32.ActiveCfg = Release|Win32
 		{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Release|Win32.Build.0 = Release|Win32
 		{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Release|x64.ActiveCfg = Release|x64
 		{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Release|x64.Build.0 = Release|x64
-		{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Debug|ARM.ActiveCfg = Debug|Win32
+		{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Debug|ARM.ActiveCfg = Debug|ARM
+		{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Debug|ARM.Build.0 = Debug|ARM
+		{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Debug|ARM64.Build.0 = Debug|ARM64
 		{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Debug|Win32.ActiveCfg = Debug|Win32
 		{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Debug|Win32.Build.0 = Debug|Win32
 		{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Debug|x64.ActiveCfg = Debug|x64
 		{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Debug|x64.Build.0 = Debug|x64
-		{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Release|ARM.ActiveCfg = Release|Win32
+		{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Release|ARM.ActiveCfg = Release|ARM
+		{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Release|ARM.Build.0 = Release|ARM
+		{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Release|ARM64.ActiveCfg = Release|ARM64
+		{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Release|ARM64.Build.0 = Release|ARM64
 		{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Release|Win32.ActiveCfg = Release|Win32
 		{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Release|Win32.Build.0 = Release|Win32
 		{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Release|x64.ActiveCfg = Release|x64
 		{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Release|x64.Build.0 = Release|x64
-		{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Debug|ARM.ActiveCfg = Debug|Win32
+		{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Debug|ARM.ActiveCfg = Debug|ARM
+		{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Debug|ARM.Build.0 = Debug|ARM
+		{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Debug|ARM64.Build.0 = Debug|ARM64
 		{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Debug|Win32.ActiveCfg = Debug|Win32
 		{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Debug|Win32.Build.0 = Debug|Win32
 		{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Debug|x64.ActiveCfg = Debug|x64
 		{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Debug|x64.Build.0 = Debug|x64
-		{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Release|ARM.ActiveCfg = Release|Win32
+		{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Release|ARM.ActiveCfg = Release|ARM
+		{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Release|ARM.Build.0 = Release|ARM
+		{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Release|ARM64.ActiveCfg = Release|ARM64
+		{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Release|ARM64.Build.0 = Release|ARM64
 		{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Release|Win32.ActiveCfg = Release|Win32
 		{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Release|Win32.Build.0 = Release|Win32
 		{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Release|x64.ActiveCfg = Release|x64
 		{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Release|x64.Build.0 = Release|x64
-		{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Debug|ARM.ActiveCfg = Debug|Win32
+		{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Debug|ARM.ActiveCfg = Debug|ARM
+		{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Debug|ARM.Build.0 = Debug|ARM
+		{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Debug|ARM64.Build.0 = Debug|ARM64
 		{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Debug|Win32.ActiveCfg = Debug|Win32
 		{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Debug|Win32.Build.0 = Debug|Win32
 		{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Debug|x64.ActiveCfg = Debug|x64
 		{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Debug|x64.Build.0 = Debug|x64
-		{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Release|ARM.ActiveCfg = Release|Win32
+		{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Release|ARM.ActiveCfg = Release|ARM
+		{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Release|ARM.Build.0 = Release|ARM
+		{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Release|ARM64.ActiveCfg = Release|ARM64
+		{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Release|ARM64.Build.0 = Release|ARM64
 		{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Release|Win32.ActiveCfg = Release|Win32
 		{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Release|Win32.Build.0 = Release|Win32
 		{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Release|x64.ActiveCfg = Release|x64
 		{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Release|x64.Build.0 = Release|x64
-		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Debug|ARM.ActiveCfg = Debug|Win32
+		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Debug|ARM.ActiveCfg = Debug|ARM
+		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Debug|ARM.Build.0 = Debug|ARM
+		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Debug|ARM64.Build.0 = Debug|ARM64
 		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Debug|Win32.ActiveCfg = Debug|Win32
 		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Debug|Win32.Build.0 = Debug|Win32
 		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Debug|x64.ActiveCfg = Debug|x64
 		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Debug|x64.Build.0 = Debug|x64
-		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|ARM.ActiveCfg = Release|Win32
+		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|ARM.ActiveCfg = Release|ARM
+		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|ARM.Build.0 = Release|ARM
+		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|ARM64.ActiveCfg = Release|ARM64
+		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|ARM64.Build.0 = Release|ARM64
 		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|Win32.ActiveCfg = Release|Win32
 		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|Win32.Build.0 = Release|Win32
 		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|x64.ActiveCfg = Release|x64
 		{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|x64.Build.0 = Release|x64
-		{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Debug|ARM.ActiveCfg = Debug|Win32
+		{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Debug|ARM.ActiveCfg = Debug|ARM
+		{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Debug|ARM.Build.0 = Debug|ARM
+		{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Debug|ARM64.Build.0 = Debug|ARM64
 		{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Debug|Win32.ActiveCfg = Debug|Win32
 		{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Debug|Win32.Build.0 = Debug|Win32
 		{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Debug|x64.ActiveCfg = Debug|x64
 		{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Debug|x64.Build.0 = Debug|x64
-		{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Release|ARM.ActiveCfg = Release|Win32
+		{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Release|ARM.ActiveCfg = Release|ARM
+		{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Release|ARM.Build.0 = Release|ARM
+		{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Release|ARM64.ActiveCfg = Release|ARM64
+		{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Release|ARM64.Build.0 = Release|ARM64
 		{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Release|Win32.ActiveCfg = Release|Win32
 		{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Release|Win32.Build.0 = Release|Win32
 		{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Release|x64.ActiveCfg = Release|x64
 		{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Release|x64.Build.0 = Release|x64
-		{E7611F49-F088-4175-9446-6111444E72C8}.Debug|ARM.ActiveCfg = Debug|Win32
+		{E7611F49-F088-4175-9446-6111444E72C8}.Debug|ARM.ActiveCfg = Debug|ARM
+		{E7611F49-F088-4175-9446-6111444E72C8}.Debug|ARM.Build.0 = Debug|ARM
+		{E7611F49-F088-4175-9446-6111444E72C8}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{E7611F49-F088-4175-9446-6111444E72C8}.Debug|ARM64.Build.0 = Debug|ARM64
 		{E7611F49-F088-4175-9446-6111444E72C8}.Debug|Win32.ActiveCfg = Debug|Win32
 		{E7611F49-F088-4175-9446-6111444E72C8}.Debug|Win32.Build.0 = Debug|Win32
 		{E7611F49-F088-4175-9446-6111444E72C8}.Debug|x64.ActiveCfg = Debug|x64
 		{E7611F49-F088-4175-9446-6111444E72C8}.Debug|x64.Build.0 = Debug|x64
-		{E7611F49-F088-4175-9446-6111444E72C8}.Release|ARM.ActiveCfg = Release|Win32
+		{E7611F49-F088-4175-9446-6111444E72C8}.Release|ARM.ActiveCfg = Release|ARM
+		{E7611F49-F088-4175-9446-6111444E72C8}.Release|ARM.Build.0 = Release|ARM
+		{E7611F49-F088-4175-9446-6111444E72C8}.Release|ARM64.ActiveCfg = Release|ARM64
+		{E7611F49-F088-4175-9446-6111444E72C8}.Release|ARM64.Build.0 = Release|ARM64
 		{E7611F49-F088-4175-9446-6111444E72C8}.Release|Win32.ActiveCfg = Release|Win32
 		{E7611F49-F088-4175-9446-6111444E72C8}.Release|Win32.Build.0 = Release|Win32
 		{E7611F49-F088-4175-9446-6111444E72C8}.Release|x64.ActiveCfg = Release|x64
 		{E7611F49-F088-4175-9446-6111444E72C8}.Release|x64.Build.0 = Release|x64
-		{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}.Debug|ARM.ActiveCfg = Debug|Win32
+		{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}.Debug|ARM.ActiveCfg = Debug|ARM
+		{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}.Debug|ARM.Build.0 = Debug|ARM
+		{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}.Debug|ARM64.Build.0 = Debug|ARM64
 		{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}.Debug|Win32.ActiveCfg = Debug|Win32
 		{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}.Debug|Win32.Build.0 = Debug|Win32
 		{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}.Debug|x64.ActiveCfg = Debug|x64
 		{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}.Debug|x64.Build.0 = Debug|x64
-		{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}.Release|ARM.ActiveCfg = Release|Win32
+		{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}.Release|ARM.ActiveCfg = Release|ARM
+		{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}.Release|ARM.Build.0 = Release|ARM
+		{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}.Release|ARM64.ActiveCfg = Release|ARM64
+		{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}.Release|ARM64.Build.0 = Release|ARM64
 		{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}.Release|Win32.ActiveCfg = Release|Win32
 		{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}.Release|Win32.Build.0 = Release|Win32
 		{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}.Release|x64.ActiveCfg = Release|x64
 		{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}.Release|x64.Build.0 = Release|x64
 		{B1D53358-37BD-48BC-B27C-68BAF1E78508}.Debug|ARM.ActiveCfg = Debug|ARM
 		{B1D53358-37BD-48BC-B27C-68BAF1E78508}.Debug|ARM.Build.0 = Debug|ARM
+		{B1D53358-37BD-48BC-B27C-68BAF1E78508}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{B1D53358-37BD-48BC-B27C-68BAF1E78508}.Debug|ARM64.Build.0 = Debug|ARM64
 		{B1D53358-37BD-48BC-B27C-68BAF1E78508}.Debug|Win32.ActiveCfg = Debug|Win32
 		{B1D53358-37BD-48BC-B27C-68BAF1E78508}.Debug|Win32.Build.0 = Debug|Win32
 		{B1D53358-37BD-48BC-B27C-68BAF1E78508}.Debug|x64.ActiveCfg = Debug|x64
 		{B1D53358-37BD-48BC-B27C-68BAF1E78508}.Debug|x64.Build.0 = Debug|x64
 		{B1D53358-37BD-48BC-B27C-68BAF1E78508}.Release|ARM.ActiveCfg = Release|ARM
 		{B1D53358-37BD-48BC-B27C-68BAF1E78508}.Release|ARM.Build.0 = Release|ARM
+		{B1D53358-37BD-48BC-B27C-68BAF1E78508}.Release|ARM64.ActiveCfg = Release|ARM64
+		{B1D53358-37BD-48BC-B27C-68BAF1E78508}.Release|ARM64.Build.0 = Release|ARM64
 		{B1D53358-37BD-48BC-B27C-68BAF1E78508}.Release|Win32.ActiveCfg = Release|Win32
 		{B1D53358-37BD-48BC-B27C-68BAF1E78508}.Release|Win32.Build.0 = Release|Win32
 		{B1D53358-37BD-48BC-B27C-68BAF1E78508}.Release|x64.ActiveCfg = Release|x64
 		{B1D53358-37BD-48BC-B27C-68BAF1E78508}.Release|x64.Build.0 = Release|x64
 		{6786C051-383B-47E0-9E82-B8B994E06A25}.Debug|ARM.ActiveCfg = Debug|ARM
 		{6786C051-383B-47E0-9E82-B8B994E06A25}.Debug|ARM.Build.0 = Debug|ARM
+		{6786C051-383B-47E0-9E82-B8B994E06A25}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{6786C051-383B-47E0-9E82-B8B994E06A25}.Debug|ARM64.Build.0 = Debug|ARM64
 		{6786C051-383B-47E0-9E82-B8B994E06A25}.Debug|Win32.ActiveCfg = Debug|Win32
 		{6786C051-383B-47E0-9E82-B8B994E06A25}.Debug|Win32.Build.0 = Debug|Win32
 		{6786C051-383B-47E0-9E82-B8B994E06A25}.Debug|x64.ActiveCfg = Debug|x64
 		{6786C051-383B-47E0-9E82-B8B994E06A25}.Debug|x64.Build.0 = Debug|x64
 		{6786C051-383B-47E0-9E82-B8B994E06A25}.Release|ARM.ActiveCfg = Release|ARM
 		{6786C051-383B-47E0-9E82-B8B994E06A25}.Release|ARM.Build.0 = Release|ARM
+		{6786C051-383B-47E0-9E82-B8B994E06A25}.Release|ARM64.ActiveCfg = Release|ARM64
+		{6786C051-383B-47E0-9E82-B8B994E06A25}.Release|ARM64.Build.0 = Release|ARM64
 		{6786C051-383B-47E0-9E82-B8B994E06A25}.Release|Win32.ActiveCfg = Release|Win32
 		{6786C051-383B-47E0-9E82-B8B994E06A25}.Release|Win32.Build.0 = Release|Win32
 		{6786C051-383B-47E0-9E82-B8B994E06A25}.Release|x64.ActiveCfg = Release|x64
 		{6786C051-383B-47E0-9E82-B8B994E06A25}.Release|x64.Build.0 = Release|x64
 		{C10CF34B-3F79-430E-AD38-5A32DC0589C2}.Debug|ARM.ActiveCfg = Debug|ARM
 		{C10CF34B-3F79-430E-AD38-5A32DC0589C2}.Debug|ARM.Build.0 = Debug|ARM
+		{C10CF34B-3F79-430E-AD38-5A32DC0589C2}.Debug|ARM64.ActiveCfg = Debug|ARM64
+		{C10CF34B-3F79-430E-AD38-5A32DC0589C2}.Debug|ARM64.Build.0 = Debug|ARM64
 		{C10CF34B-3F79-430E-AD38-5A32DC0589C2}.Debug|Win32.ActiveCfg = Debug|Win32
 		{C10CF34B-3F79-430E-AD38-5A32DC0589C2}.Debug|Win32.Build.0 = Debug|Win32
 		{C10CF34B-3F79-430E-AD38-5A32DC0589C2}.Debug|x64.ActiveCfg = Debug|x64
 		{C10CF34B-3F79-430E-AD38-5A32DC0589C2}.Debug|x64.Build.0 = Debug|x64
 		{C10CF34B-3F79-430E-AD38-5A32DC0589C2}.Release|ARM.ActiveCfg = Release|ARM
 		{C10CF34B-3F79-430E-AD38-5A32DC0589C2}.Release|ARM.Build.0 = Release|ARM
+		{C10CF34B-3F79-430E-AD38-5A32DC0589C2}.Release|ARM64.ActiveCfg = Release|ARM64
+		{C10CF34B-3F79-430E-AD38-5A32DC0589C2}.Release|ARM64.Build.0 = Release|ARM64
 		{C10CF34B-3F79-430E-AD38-5A32DC0589C2}.Release|Win32.ActiveCfg = Release|Win32
 		{C10CF34B-3F79-430E-AD38-5A32DC0589C2}.Release|Win32.Build.0 = Release|Win32
 		{C10CF34B-3F79-430E-AD38-5A32DC0589C2}.Release|x64.ActiveCfg = Release|x64
@@ -11,7 +11,7 @@ set ICU_ARCH=%1
 set ICU_DBRL=%2

 if "%1" == "" (
-echo Usage: %0 "x86 or x64"  "Debug or Release"
+echo Usage: %0 "x86 or x64 or ARM or ARM64"  "Debug or Release"
 exit /b 1
 )

@@ -25,9 +25,13 @@ set ICU_OPATH=%PATH%
 set ICU_ICUDIR="%~dp0"\..\..

 if "%ICU_ARCH%" == "x64" (
-set ICU_BINDIR=%~dp0\..\..\bin64
+    set ICU_BINDIR=%~dp0\..\..\bin64
+) else if "%ICU_ARCH%" == "ARM64" (
+    set ICU_BINDIR=%~dp0\..\..\binARM64
+) else if "%ICU_ARCH%" == "ARM" (
+    set ICU_BINDIR=%~dp0\..\..\binARM
 ) else (
-set ICU_BINDIR=%~dp0\..\..\bin
+    set ICU_BINDIR=%~dp0\..\..\bin
 )

 set PATH=%ICU_BINDIR%;%PATH%
@@ -44,10 +44,10 @@ ifeq ($(ENABLE_SO_VERSION_DATA),1)
 SO_VERSION_DATA = common.res
 endif

-ifeq ($(OS390BATCH),1)
+ifeq ($(BUILD_HOST_ICU),OS390)
 BATCH_TARGET = $(BATCH_COMMON_TARGET)
 BATCH_LIBS = $(BATCH_LIBICUDT) -lm
-endif   # OS390BATCH
+endif

 endif   # ENABLE_SHARED

@@ -58,10 +58,6 @@ DYNAMICCFLAGS = $(SHAREDLIBCFLAGS)
 DYNAMICCXXFLAGS = $(SHAREDLIBCXXFLAGS)
 CFLAGS += $(LIBCFLAGS)
 CXXFLAGS += $(LIBCXXFLAGS)
-ifeq ($(OS390BATCH),1)
-CFLAGS += -WI
-CXXFLAGS += -WI
-endif

 CPPFLAGS += -I$(srcdir) $(LIBCPPFLAGS) $(CPPFLAGSICUUC)
 # we want DEFS here
@@ -79,42 +75,8 @@ endif
 # $(LIBICUDT) is either stub data or the real DLL common data.
 LIBS = $(LIBICUDT) $(DEFAULT_LIBS)

-OBJECTS = errorcode.o putil.o umath.o utypes.o uinvchar.o umutex.o ucln_cmn.o \
-uinit.o uobject.o cmemory.o charstr.o cstr.o \
-udata.o ucmndata.o udatamem.o umapfile.o udataswp.o utrie_swap.o ucol_swp.o utrace.o \
-uhash.o uhash_us.o uenum.o ustrenum.o uvector.o ustack.o uvectr32.o uvectr64.o \
-ucnv.o ucnv_bld.o ucnv_cnv.o ucnv_io.o ucnv_cb.o ucnv_err.o ucnvlat1.o \
-ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o ucnvscsu.o ucnvbocu.o \
-ucnv_ext.o ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o ucnvdisp.o ucnv_set.o ucnv_ct.o \
-resource.o uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \
-ucurr.o \
-messagepattern.o ucat.o locmap.o uloc.o locid.o locutil.o locavailable.o locdispnames.o locdspnm.o loclikely.o locresdata.o \
-bytestream.o stringpiece.o bytesinkutil.o \
-stringtriebuilder.o bytestriebuilder.o \
-bytestrie.o bytestrieiterator.o \
-ucharstrie.o ucharstriebuilder.o ucharstrieiterator.o \
-dictionarydata.o \
-edits.o \
-appendable.o ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \
-utf_impl.o ustring.o ustrcase.o ucasemap.o ucasemap_titlecase_brkiter.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \
-unistr_case_locale.o ustrcase_locale.o unistr_titlecase_brkiter.o ustr_titlecase_brkiter.o \
-normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o loadednormalizer2impl.o \
-chariter.o schriter.o uchriter.o uiter.o \
-patternprops.o uchar.o uprops.o ucase.o propname.o ubidi_props.o characterproperties.o \
-ubidi.o ubidiwrt.o ubidiln.o ushape.o \
-uscript.o uscript_props.o usc_impl.o unames.o \
-utrie.o utrie2.o utrie2_builder.o ucptrie.o umutablecptrie.o \
-bmpset.o unisetspan.o uset_props.o uniset_props.o uniset_closure.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
-uarrsort.o brkiter.o ubrk.o brkeng.o dictbe.o filteredbrk.o \
-rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o rbbi_cache.o \
-serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \
-uidna.o usprep.o uts46.o punycode.o \
-util.o util_props.o parsepos.o locbased.o cwchar.o wintz.o dtintrv.o ucnvsel.o propsvec.o \
-ulist.o uloc_tag.o icudataver.o icuplug.o \
-sharedobject.o simpleformatter.o unifiedcache.o uloc_keytype.o \
-ubiditransform.o \
-pluralmap.o \
-static_unicode_sets.o
+SOURCES = $(shell cat $(srcdir)/sources.txt)
+OBJECTS = $(SOURCES:.cpp=.o)

 ## Header files to install
 HEADERS = $(srcdir)/unicode/*.h
@@ -156,6 +118,11 @@ ifneq ($(ENABLE_STATIC),)
 	$(INSTALL-L) $(TARGET) $(DESTDIR)$(libdir)
 endif
 ifneq ($(ENABLE_SHARED),)
+# For MinGW, do we want the DLL to go in the bin location?
+ifeq ($(MINGW_MOVEDLLSTOBINDIR),YES)
+	$(MKINSTALLDIRS) $(DESTDIR)$(bindir)
+	$(INSTALL-L) $(FINAL_SO_TARGET) $(DESTDIR)$(bindir)
+else
 	$(INSTALL-L) $(FINAL_SO_TARGET) $(DESTDIR)$(libdir)
 ifneq ($(FINAL_SO_TARGET),$(SO_TARGET))
 	cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(SO_TARGET)) && ln -s $(notdir $(FINAL_SO_TARGET)) $(notdir $(SO_TARGET))
@@ -163,6 +130,7 @@ ifneq ($(FINAL_SO_TARGET),$(MIDDLE_SO_TARGET))
 	cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(MIDDLE_SO_TARGET)) && ln -s $(notdir $(FINAL_SO_TARGET)) $(notdir $(MIDDLE_SO_TARGET))
 endif
 endif
+endif
 ifneq ($(IMPORT_LIB_EXT),)
 	$(INSTALL-L) $(FINAL_IMPORT_LIB) $(DESTDIR)$(libdir)
 ifneq ($(IMPORT_LIB),$(FINAL_IMPORT_LIB))
@@ -219,10 +187,10 @@ ifneq ($(wildcard $(libdir)/$(MIDDLE_SO_TARGET)),)
 endif
 endif

-ifeq ($(OS390BATCH),1)
+ifeq ($(BUILD_HOST_ICU),OS390)
 $(BATCH_TARGET):$(OBJECTS)
-	$(SHLIB.cc) $(LD_SONAME) $(OUTOPT)$@ $^ $(BATCH_LIBS)
-endif   # OS390BATCH
+	$(SHLIB.cc) $(LD_SONAME) -Wl,-x$@.x $(OUTOPT)$@ $^ $(BATCH_LIBS)
+endif
 endif   # ENABLE_SHARED

 ifeq (,$(MAKECMDGOALS))
@@ -25,45 +25,45 @@ Appendable::~Appendable() {}
 UBool
 Appendable::appendCodePoint(UChar32 c) {
    if(c<=0xffff) {
-        return appendCodeUnit((UChar)c);
+        return appendCodeUnit(static_cast<char16_t>(c));
    } else {
        return appendCodeUnit(U16_LEAD(c)) && appendCodeUnit(U16_TRAIL(c));
    }
 }

 UBool
-Appendable::appendString(const UChar *s, int32_t length) {
+Appendable::appendString(const char16_t *s, int32_t length) {
    if(length<0) {
-        UChar c;
+        char16_t c;
        while((c=*s++)!=0) {
            if(!appendCodeUnit(c)) {
-                return FALSE;
+                return false;
            }
        }
    } else if(length>0) {
-        const UChar *limit=s+length;
+        const char16_t *limit=s+length;
        do {
            if(!appendCodeUnit(*s++)) {
-                return FALSE;
+                return false;
            }
        } while(s<limit);
    }
-    return TRUE;
+    return true;
 }

 UBool
 Appendable::reserveAppendCapacity(int32_t /*appendCapacity*/) {
-    return TRUE;
+    return true;
 }

-UChar *
+char16_t *
 Appendable::getAppendBuffer(int32_t minCapacity,
                            int32_t /*desiredCapacityHint*/,
-                            UChar *scratch, int32_t scratchCapacity,
+                            char16_t *scratch, int32_t scratchCapacity,
                            int32_t *resultCapacity) {
    if(minCapacity<1 || scratchCapacity<minCapacity) {
        *resultCapacity=0;
-        return NULL;
+        return nullptr;
    }
    *resultCapacity=scratchCapacity;
    return scratch;
@@ -75,7 +75,7 @@ static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
    int32_t trail=start&0x3f;  // Named for UTF-8 2-byte trail byte with lower 6 bits.

    // Set one bit indicating an all-one block.
-    uint32_t bits=(uint32_t)1<<lead;
+    uint32_t bits = static_cast<uint32_t>(1) << lead;
    if((start+1)==limit) {  // Single-character shortcut.
        table[trail]|=bits;
        return;
@@ -100,9 +100,9 @@ static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
            ++lead;
        }
        if(lead<limitLead) {
-            bits=~(((unsigned)1<<lead)-1);
+            bits = ~((static_cast<unsigned>(1) << lead) - 1);
            if(limitLead<0x20) {
-                bits&=((unsigned)1<<limitLead)-1;
+                bits &= (static_cast<unsigned>(1) << limitLead) - 1;
            }
            for(trail=0; trail<64; ++trail) {
                table[trail]|=bits;
@@ -111,7 +111,7 @@ static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
        // limit<=0x800. If limit==0x800 then limitLead=32 and limitTrail=0.
        // In that case, bits=1<<limitLead is undefined but the bits value
        // is not used because trail<limitTrail is already false.
-        bits=(uint32_t)1<<((limitLead == 0x20) ? (limitLead - 1) : limitLead);
+        bits = static_cast<uint32_t>(1) << ((limitLead == 0x20) ? (limitLead - 1) : limitLead);
        for(trail=0; trail<limitTrail; ++trail) {
            table[trail]|=bits;
        }
@@ -290,28 +290,28 @@ int32_t BMPSet::findCodePoint(UChar32 c, int32_t lo, int32_t hi) const {

 UBool
 BMPSet::contains(UChar32 c) const {
-    if((uint32_t)c<=0xff) {
-        return (UBool)latin1Contains[c];
-    } else if((uint32_t)c<=0x7ff) {
-        return (UBool)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0);
-    } else if((uint32_t)c<0xd800 || (c>=0xe000 && c<=0xffff)) {
+    if (static_cast<uint32_t>(c) <= 0xff) {
+        return latin1Contains[c];
+    } else if (static_cast<uint32_t>(c) <= 0x7ff) {
+        return (table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) != 0;
+    } else if (static_cast<uint32_t>(c) < 0xd800 || (c >= 0xe000 && c <= 0xffff)) {
        int lead=c>>12;
        uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
        if(twoBits<=1) {
            // All 64 code points with the same bits 15..6
            // are either in the set or not.
-            return (UBool)twoBits;
+            return twoBits;
        } else {
            // Look up the code point in its 4k block of code points.
            return containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]);
        }
-    } else if((uint32_t)c<=0x10ffff) {
+    } else if (static_cast<uint32_t>(c) <= 0x10ffff) {
        // surrogate or supplementary code point
        return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]);
    } else {
-        // Out-of-range code points get FALSE, consistent with long-standing
+        // Out-of-range code points get false, consistent with long-standing
        // behavior of UnicodeSet::contains(c).
-        return FALSE;
+        return false;
    }
 }

@@ -319,9 +319,9 @@ BMPSet::contains(UChar32 c) const {
 * Check for sufficient length for trail unit for each surrogate pair.
 * Handle single surrogates as surrogate code points as usual in ICU.
 */
-const UChar *
-BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
-    UChar c, c2;
+const char16_t *
+BMPSet::span(const char16_t *s, const char16_t *limit, USetSpanCondition spanCondition) const {
+    char16_t c, c2;

    if(spanCondition) {
        // span
@@ -332,7 +332,7 @@ BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition
                    break;
                }
            } else if(c<=0x7ff) {
-                if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
+                if ((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) == 0) {
                    break;
                }
            } else if(c<0xd800 || c>=0xe000) {
@@ -372,7 +372,7 @@ BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition
                    break;
                }
            } else if(c<=0x7ff) {
-                if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
+                if ((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) != 0) {
                    break;
                }
            } else if(c<0xd800 || c>=0xe000) {
@@ -408,9 +408,9 @@ BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition
 }

 /* Symmetrical with span(). */
-const UChar *
-BMPSet::spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
-    UChar c, c2;
+const char16_t *
+BMPSet::spanBack(const char16_t *s, const char16_t *limit, USetSpanCondition spanCondition) const {
+    char16_t c, c2;

    if(spanCondition) {
        // span
@@ -421,7 +421,7 @@ BMPSet::spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondi
                    break;
                }
            } else if(c<=0x7ff) {
-                if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
+                if ((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) == 0) {
                    break;
                }
            } else if(c<0xd800 || c>=0xe000) {
@@ -464,7 +464,7 @@ BMPSet::spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondi
                    break;
                }
            } else if(c<=0x7ff) {
-                if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
+                if ((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) != 0) {
                    break;
                }
            } else if(c<0xd800 || c>=0xe000) {
@@ -527,7 +527,7 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
                b=*s;
            } while(U8_IS_SINGLE(b));
        }
-        length=(int32_t)(limit-s);
+        length = static_cast<int32_t>(limit - s);
    }

    if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
@@ -547,7 +547,7 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
     * the truncated sequence.
     */
    b=*(limit-1);
-    if((int8_t)b<0) {
+    if (static_cast<int8_t>(b) < 0) {
        // b>=0x80: lead or trail byte
        if(b<0xc0) {
            // single trail byte, check for preceding 3- or 4-byte lead byte
@@ -602,15 +602,15 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
        if(b>=0xe0) {
            if(b<0xf0) {
                if( /* handle U+0000..U+FFFF inline */
-                    (t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
-                    (t2=(uint8_t)(s[1]-0x80)) <= 0x3f
+                    (t1 = static_cast<uint8_t>(s[0] - 0x80)) <= 0x3f &&
+                    (t2 = static_cast<uint8_t>(s[1] - 0x80)) <= 0x3f
                ) {
                    b&=0xf;
                    uint32_t twoBits=(bmpBlockBits[t1]>>b)&0x10001;
                    if(twoBits<=1) {
                        // All 64 code points with this lead byte and middle trail byte
                        // are either in the set or not.
-                        if(twoBits!=(uint32_t)spanCondition) {
+                        if (twoBits != static_cast<uint32_t>(spanCondition)) {
                            return s-1;
                        }
                    } else {
@@ -624,12 +624,12 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
                    continue;
                }
            } else if( /* handle U+10000..U+10FFFF inline */
-                (t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
-                (t2=(uint8_t)(s[1]-0x80)) <= 0x3f &&
-                (t3=(uint8_t)(s[2]-0x80)) <= 0x3f
+                (t1 = static_cast<uint8_t>(s[0] - 0x80)) <= 0x3f &&
+                (t2 = static_cast<uint8_t>(s[1] - 0x80)) <= 0x3f &&
+                (t3 = static_cast<uint8_t>(s[2] - 0x80)) <= 0x3f
            ) {
                // Give an illegal sequence the same value as the result of contains(FFFD).
-                UChar32 c=((UChar32)(b-0xf0)<<18)|((UChar32)t1<<12)|(t2<<6)|t3;
+                UChar32 c = (static_cast<UChar32>(b - 0xf0) << 18) | (static_cast<UChar32>(t1) << 12) | (t2 << 6) | t3;
                if( (   (0x10000<=c && c<=0x10ffff) ?
                            containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) :
                            containsFFFD
@@ -643,9 +643,9 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
        } else {
            if( /* handle U+0000..U+07FF inline */
                b>=0xc0 &&
-                (t1=(uint8_t)(*s-0x80)) <= 0x3f
+                (t1 = static_cast<uint8_t>(*s - 0x80)) <= 0x3f
            ) {
-                if((USetSpanCondition)((table7FF[t1]&((uint32_t)1<<(b&0x1f)))!=0) != spanCondition) {
+                if (static_cast<USetSpanCondition>((table7FF[t1] & (static_cast<uint32_t>(1) << (b & 0x1f))) != 0) != spanCondition) {
                    return s-1;
                }
                ++s;
@@ -711,7 +711,7 @@ BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCon
        c=utf8_prevCharSafeBody(s, 0, &length, b, -3);
        // c is a valid code point, not ASCII, not a surrogate
        if(c<=0x7ff) {
-            if((USetSpanCondition)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) != spanCondition) {
+            if (static_cast<USetSpanCondition>((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) != 0) != spanCondition) {
                return prev+1;
            }
        } else if(c<=0xffff) {
@@ -720,7 +720,7 @@ BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCon
            if(twoBits<=1) {
                // All 64 code points with the same bits 15..6
                // are either in the set or not.
-                if(twoBits!=(uint32_t)spanCondition) {
+                if (twoBits != static_cast<uint32_t>(spanCondition)) {
                    return prev+1;
                }
            } else {
@@ -48,14 +48,14 @@ public:
     * It must be s<limit and spanCondition==0 or 1.
     * @return The string pointer which limits the span.
     */
-    const UChar *span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const;
+    const char16_t *span(const char16_t *s, const char16_t *limit, USetSpanCondition spanCondition) const;

    /*
     * Span the trailing substring for which each character c has spanCondition==contains(c).
     * It must be s<limit and spanCondition==0 or 1.
     * @return The string pointer which starts the span.
     */
-    const UChar *spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const;
+    const char16_t *spanBack(const char16_t *s, const char16_t *limit, USetSpanCondition spanCondition) const;

    /*
     * Span the initial substring for which each character c has spanCondition==contains(c).
@@ -101,7 +101,7 @@ private:
     */
    UBool latin1Contains[0x100];

-    /* TRUE if contains(U+FFFD). */
+    /* true if contains(U+FFFD). */
    UBool containsFFFD;

    /*
@@ -156,7 +156,7 @@ private:
 };

 inline UBool BMPSet::containsSlow(UChar32 c, int32_t lo, int32_t hi) const {
-    return (UBool)(findCodePoint(c, lo, hi) & 1);
+    return findCodePoint(c, lo, hi) & 1;
 }

 U_NAMESPACE_END
@@ -21,10 +21,12 @@
 #include "unicode/uscript.h"
 #include "unicode/ucharstrie.h"
 #include "unicode/bytestrie.h"
+#include "unicode/rbbi.h"

 #include "brkeng.h"
 #include "cmemory.h"
 #include "dictbe.h"
+#include "lstmbe.h"
 #include "charstr.h"
 #include "dictionarydata.h"
 #include "mutex.h"
@@ -69,17 +71,22 @@ UnhandledEngine::~UnhandledEngine() {
 }

 UBool
-UnhandledEngine::handles(UChar32 c) const {
+UnhandledEngine::handles(UChar32 c, const char* locale) const {
+    (void)locale; // Unused
    return fHandled && fHandled->contains(c);
 }

 int32_t
 UnhandledEngine::findBreaks( UText *text,
-                             int32_t /* startPos */,
+                             int32_t startPos,
                             int32_t endPos,
-                             UVector32 &/*foundBreaks*/ ) const {
-    UChar32 c = utext_current32(text); 
-    while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) {
+                             UVector32 &/*foundBreaks*/,
+                             UBool /* isPhraseBreaking */,
+                             UErrorCode &status) const {
+    if (U_FAILURE(status)) return 0;
+    utext_setNativeIndex(text, startPos);
+    UChar32 c = utext_current32(text);
+    while (static_cast<int32_t>(utext_getNativeIndex(text)) < endPos && fHandled->contains(c)) {
        utext_next32(text);            // TODO:  recast loop to work with post-increment operations.
        c = utext_current32(text);
    }
@@ -107,66 +114,77 @@ UnhandledEngine::handleCharacter(UChar32 c) {
 */

 ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) {
-    fEngines = 0;
+    fEngines = nullptr;
 }

 ICULanguageBreakFactory::~ICULanguageBreakFactory() {
-    if (fEngines != 0) {
-        delete fEngines;
+    delete fEngines;
+}
+
+void ICULanguageBreakFactory::ensureEngines(UErrorCode& status) {
+    static UMutex gBreakEngineMutex;
+    Mutex m(&gBreakEngineMutex);
+    if (fEngines == nullptr) {
+        LocalPointer<UStack>  engines(new UStack(uprv_deleteUObject, nullptr, status), status);
+        if (U_SUCCESS(status)) {
+            fEngines = engines.orphan();
+        }
    }
 }

-U_NAMESPACE_END
-U_CDECL_BEGIN
-static void U_CALLCONV _deleteEngine(void *obj) {
-    delete (const icu::LanguageBreakEngine *) obj;
-}
-U_CDECL_END
-U_NAMESPACE_BEGIN
-
-static UMutex gBreakEngineMutex = U_MUTEX_INITIALIZER;
-
 const LanguageBreakEngine *
-ICULanguageBreakFactory::getEngineFor(UChar32 c) {
-    const LanguageBreakEngine *lbe = NULL;
+ICULanguageBreakFactory::getEngineFor(UChar32 c, const char* locale) {
+    const LanguageBreakEngine *lbe = nullptr;
    UErrorCode  status = U_ZERO_ERROR;
+    ensureEngines(status);
+    if (U_FAILURE(status) ) {
+        // Note: no way to return error code to caller.
+        return nullptr;
+    }

+    static UMutex gBreakEngineMutex;
    Mutex m(&gBreakEngineMutex);
-
-    if (fEngines == NULL) {
-        UStack  *engines = new UStack(_deleteEngine, NULL, status);
-        if (U_FAILURE(status) || engines == NULL) {
-            // Note: no way to return error code to caller.
-            delete engines;
-            return NULL;
-        }
-        fEngines = engines;
-    } else {
-        int32_t i = fEngines->size();
-        while (--i >= 0) {
-            lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
-            if (lbe != NULL && lbe->handles(c)) {
-                return lbe;
-            }
+    int32_t i = fEngines->size();
+    while (--i >= 0) {
+        lbe = static_cast<const LanguageBreakEngine*>(fEngines->elementAt(i));
+        if (lbe != nullptr && lbe->handles(c, locale)) {
+            return lbe;
        }
    }
-    
+
    // We didn't find an engine. Create one.
-    lbe = loadEngineFor(c);
-    if (lbe != NULL) {
+    lbe = loadEngineFor(c, locale);
+    if (lbe != nullptr) {
        fEngines->push((void *)lbe, status);
    }
-    return lbe;
+    return U_SUCCESS(status) ? lbe : nullptr;
 }

 const LanguageBreakEngine *
-ICULanguageBreakFactory::loadEngineFor(UChar32 c) {
+ICULanguageBreakFactory::loadEngineFor(UChar32 c, const char*) {
    UErrorCode status = U_ZERO_ERROR;
    UScriptCode code = uscript_getScript(c, &status);
    if (U_SUCCESS(status)) {
+        const LanguageBreakEngine *engine = nullptr;
+        // Try to use LSTM first
+        const LSTMData *data = CreateLSTMDataForScript(code, status);
+        if (U_SUCCESS(status)) {
+            if (data != nullptr) {
+                engine = CreateLSTMBreakEngine(code, data, status);
+                if (U_SUCCESS(status) && engine != nullptr) {
+                    return engine;
+                }
+                if (engine != nullptr) {
+                    delete engine;
+                    engine = nullptr;
+                } else {
+                    DeleteLSTMData(data);
+                }
+            }
+        }
+        status = U_ZERO_ERROR;  // fallback to dictionary based
        DictionaryMatcher *m = loadDictionaryMatcherFor(code);
-        if (m != NULL) {
-            const LanguageBreakEngine *engine = NULL;
+        if (m != nullptr) {
            switch(code) {
            case USCRIPT_THAI:
                engine = new ThaiBreakEngine(m, status);
@@ -211,17 +229,17 @@ ICULanguageBreakFactory::loadEngineFor(UChar32 c) {
            default:
                break;
            }
-            if (engine == NULL) {
+            if (engine == nullptr) {
                delete m;
            }
            else if (U_FAILURE(status)) {
                delete engine;
-                engine = NULL;
+                engine = nullptr;
            }
            return engine;
        }
    }
-    return NULL;
+    return nullptr;
 }

 DictionaryMatcher *
@@ -231,53 +249,117 @@ ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
    UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);
    b = ures_getByKeyWithFallback(b, "dictionaries", b, &status);
    int32_t dictnlength = 0;
-    const UChar *dictfname =
+    const char16_t *dictfname =
        ures_getStringByKeyWithFallback(b, uscript_getShortName(script), &dictnlength, &status);
    if (U_FAILURE(status)) {
        ures_close(b);
-        return NULL;
+        return nullptr;
    }
    CharString dictnbuf;
    CharString ext;
-    const UChar *extStart = u_memrchr(dictfname, 0x002e, dictnlength);  // last dot
-    if (extStart != NULL) {
-        int32_t len = (int32_t)(extStart - dictfname);
-        ext.appendInvariantChars(UnicodeString(FALSE, extStart + 1, dictnlength - len - 1), status);
+    const char16_t *extStart = u_memrchr(dictfname, 0x002e, dictnlength);  // last dot
+    if (extStart != nullptr) {
+        int32_t len = static_cast<int32_t>(extStart - dictfname);
+        ext.appendInvariantChars(UnicodeString(false, extStart + 1, dictnlength - len - 1), status);
        dictnlength = len;
    }
-    dictnbuf.appendInvariantChars(UnicodeString(FALSE, dictfname, dictnlength), status);
+    dictnbuf.appendInvariantChars(UnicodeString(false, dictfname, dictnlength), status);
    ures_close(b);

    UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext.data(), dictnbuf.data(), &status);
    if (U_SUCCESS(status)) {
        // build trie
-        const uint8_t *data = (const uint8_t *)udata_getMemory(file);
-        const int32_t *indexes = (const int32_t *)data;
+        const uint8_t* data = static_cast<const uint8_t*>(udata_getMemory(file));
+        const int32_t* indexes = reinterpret_cast<const int32_t*>(data);
        const int32_t offset = indexes[DictionaryData::IX_STRING_TRIE_OFFSET];
        const int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;
-        DictionaryMatcher *m = NULL;
+        DictionaryMatcher *m = nullptr;
        if (trieType == DictionaryData::TRIE_TYPE_BYTES) {
            const int32_t transform = indexes[DictionaryData::IX_TRANSFORM];
-            const char *characters = (const char *)(data + offset);
+            const char* characters = reinterpret_cast<const char*>(data + offset);
            m = new BytesDictionaryMatcher(characters, transform, file);
        }
        else if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {
-            const UChar *characters = (const UChar *)(data + offset);
+            const char16_t* characters = reinterpret_cast<const char16_t*>(data + offset);
            m = new UCharsDictionaryMatcher(characters, file);
        }
-        if (m == NULL) {
+        if (m == nullptr) {
            // no matcher exists to take ownership - either we are an invalid 
            // type or memory allocation failed
            udata_close(file);
        }
        return m;
-    } else if (dictfname != NULL) {
+    } else if (dictfname != nullptr) {
        // we don't have a dictionary matcher.
-        // returning NULL here will cause us to fail to find a dictionary break engine, as expected
+        // returning nullptr here will cause us to fail to find a dictionary break engine, as expected
        status = U_ZERO_ERROR;
-        return NULL;
+        return nullptr;
    }
-    return NULL;
+    return nullptr;
+}
+
+
+void ICULanguageBreakFactory::addExternalEngine(
+        ExternalBreakEngine* external, UErrorCode& status) {
+    LocalPointer<ExternalBreakEngine> engine(external, status);
+    ensureEngines(status);
+    LocalPointer<BreakEngineWrapper> wrapper(
+        new BreakEngineWrapper(engine.orphan(), status), status);
+    static UMutex gBreakEngineMutex;
+    Mutex m(&gBreakEngineMutex);
+    fEngines->push(wrapper.getAlias(), status);
+    wrapper.orphan();
+}
+
+BreakEngineWrapper::BreakEngineWrapper(
+    ExternalBreakEngine* engine, UErrorCode &status) : delegate(engine, status) {
+}
+
+BreakEngineWrapper::~BreakEngineWrapper() {
+}
+
+UBool BreakEngineWrapper::handles(UChar32 c, const char* locale) const {
+    return delegate->isFor(c, locale);
+}
+
+int32_t BreakEngineWrapper::findBreaks(
+    UText *text,
+    int32_t startPos,
+    int32_t endPos,
+    UVector32 &foundBreaks,
+    UBool /* isPhraseBreaking */,
+    UErrorCode &status) const {
+    if (U_FAILURE(status)) return 0;
+    int32_t result = 0;
+
+    // Find the span of characters included in the set.
+    //   The span to break begins at the current position in the text, and
+    //   extends towards the start or end of the text, depending on 'reverse'.
+
+    utext_setNativeIndex(text, startPos);
+    int32_t start = static_cast<int32_t>(utext_getNativeIndex(text));
+    int32_t current;
+    int32_t rangeStart;
+    int32_t rangeEnd;
+    UChar32 c = utext_current32(text);
+    while ((current = static_cast<int32_t>(utext_getNativeIndex(text))) < endPos && delegate->handles(c)) {
+        utext_next32(text);         // TODO:  recast loop for postincrement
+        c = utext_current32(text);
+    }
+    rangeStart = start;
+    rangeEnd = current;
+    int32_t beforeSize = foundBreaks.size();
+    int32_t additionalCapacity = rangeEnd - rangeStart + 1;
+    // enlarge to contains (rangeEnd-rangeStart+1) more items
+    foundBreaks.ensureCapacity(beforeSize+additionalCapacity, status);
+    if (U_FAILURE(status)) return 0;
+    foundBreaks.setSize(beforeSize + beforeSize+additionalCapacity);
+    result = delegate->fillBreaks(text, rangeStart, rangeEnd, foundBreaks.getBuffer()+beforeSize,
+                                  additionalCapacity, status);
+    if (U_FAILURE(status)) return 0;
+    foundBreaks.setSize(beforeSize + result);
+    utext_setNativeIndex(text, current);
+    return result;
 }

 U_NAMESPACE_END
@@ -10,6 +10,7 @@
 #ifndef BRKENG_H
 #define BRKENG_H

+#include "unicode/umisc.h"
 #include "unicode/utypes.h"
 #include "unicode/uobject.h"
 #include "unicode/utext.h"
@@ -21,6 +22,7 @@ class UnicodeSet;
 class UStack;
 class UVector32;
 class DictionaryMatcher;
+class ExternalBreakEngine;

 /*******************************************************************
 * LanguageBreakEngine
@@ -35,7 +37,7 @@ class DictionaryMatcher;
 * <p>LanguageBreakEngines should normally be implemented so as to
 * be shared between threads without locking.</p>
 */
-class LanguageBreakEngine : public UMemory {
+class LanguageBreakEngine : public UObject {
 public:

  /**
@@ -54,10 +56,11 @@ class LanguageBreakEngine : public UMemory {
  * a particular kind of break.</p>
  *
  * @param c A character which begins a run that the engine might handle
-  * @return TRUE if this engine handles the particular character and break
+  * @param locale The locale.
+  * @return true if this engine handles the particular character and break
  * type.
  */
-  virtual UBool handles(UChar32 c) const = 0;
+  virtual UBool handles(UChar32 c, const char* locale) const = 0;

 /**
  * <p>Find any breaks within a run in the supplied text.</p>
@@ -68,15 +71,47 @@ class LanguageBreakEngine : public UMemory {
  * @param startPos The start of the run within the supplied text.
  * @param endPos The end of the run within the supplied text.
  * @param foundBreaks A Vector of int32_t to receive the breaks.
+  * @param status Information on any errors encountered.
  * @return The number of breaks found.
  */
  virtual int32_t findBreaks( UText *text,
                              int32_t startPos,
                              int32_t endPos,
-                              UVector32 &foundBreaks ) const = 0;
+                              UVector32 &foundBreaks,
+                              UBool isPhraseBreaking,
+                              UErrorCode &status) const = 0;

 };

+/*******************************************************************
+ * BreakEngineWrapper
+ */
+
+/**
+ * <p>BreakEngineWrapper implement LanguageBreakEngine by
+ * a thin wrapper that delegate the task to ExternalBreakEngine
+ * </p>
+ */
+class BreakEngineWrapper : public  LanguageBreakEngine {
+ public:
+
+  BreakEngineWrapper(ExternalBreakEngine* engine, UErrorCode &status);
+
+  virtual ~BreakEngineWrapper();
+
+  virtual UBool handles(UChar32 c, const char* locale) const override;
+
+  virtual int32_t findBreaks( UText *text,
+                              int32_t startPos,
+                              int32_t endPos,
+                              UVector32 &foundBreaks,
+                              UBool isPhraseBreaking,
+                              UErrorCode &status) const override;
+
+ private:
+  LocalPointer<ExternalBreakEngine> delegate;
+};
+
 /*******************************************************************
 * LanguageBreakFactory
 */
@@ -122,9 +157,10 @@ class LanguageBreakFactory : public UMemory {
  *
  * @param c A character that begins a run for which a LanguageBreakEngine is
  * sought.
+  * @param locale The locale.
  * @return A LanguageBreakEngine with the desired characteristics, or 0.
  */
-  virtual const LanguageBreakEngine *getEngineFor(UChar32 c) = 0;
+  virtual const LanguageBreakEngine *getEngineFor(UChar32 c, const char* locale) = 0;

 };

@@ -171,10 +207,11 @@ class UnhandledEngine : public LanguageBreakEngine {
  * a particular kind of break.</p>
  *
  * @param c A character which begins a run that the engine might handle
-  * @return TRUE if this engine handles the particular character and break
+  * @param locale The locale.
+  * @return true if this engine handles the particular character and break
  * type.
  */
-  virtual UBool handles(UChar32 c) const;
+  virtual UBool handles(UChar32 c, const char* locale) const override;

 /**
  * <p>Find any breaks within a run in the supplied text.</p>
@@ -185,12 +222,15 @@ class UnhandledEngine : public LanguageBreakEngine {
  * @param startPos The start of the run within the supplied text.
  * @param endPos The end of the run within the supplied text.
  * @param foundBreaks An allocated C array of the breaks found, if any
+  * @param status Information on any errors encountered.
  * @return The number of breaks found.
  */
  virtual int32_t findBreaks( UText *text,
                              int32_t startPos,
                              int32_t endPos,
-                              UVector32 &foundBreaks ) const;
+                              UVector32 &foundBreaks,
+                              UBool isPhraseBreaking,
+                              UErrorCode &status) const override;

 /**
  * <p>Tell the engine to handle a particular character and break type.</p>
@@ -241,9 +281,18 @@ class ICULanguageBreakFactory : public LanguageBreakFactory {
  *
  * @param c A character that begins a run for which a LanguageBreakEngine is
  * sought.
+  * @param locale The locale.
  * @return A LanguageBreakEngine with the desired characteristics, or 0.
  */
-  virtual const LanguageBreakEngine *getEngineFor(UChar32 c);
+  virtual const LanguageBreakEngine *getEngineFor(UChar32 c, const char* locale) override;
+
+  /**
+   * Add and adopt the engine and return an URegistryKey.
+   * @param engine The ExternalBreakEngine to be added and adopt. The caller
+   *     pass the ownership and should not release the memory after this.
+   * @param status the error code.
+   */
+  virtual void addExternalEngine(ExternalBreakEngine* engine, UErrorCode& status);

 protected:
 /**
@@ -252,17 +301,21 @@ protected:
  *
  * @param c A character that begins a run for which a LanguageBreakEngine is
  * sought.
+  * @param locale The locale.
  * @return A LanguageBreakEngine with the desired characteristics, or 0.
  */
-  virtual const LanguageBreakEngine *loadEngineFor(UChar32 c);
+  virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, const char* locale);

  /**
   * <p>Create a DictionaryMatcher for the specified script and break type.</p>
   * @param script An ISO 15924 script code that identifies the dictionary to be
   * created.
-   * @return A DictionaryMatcher with the desired characteristics, or NULL.
+   * @return A DictionaryMatcher with the desired characteristics, or nullptr.
   */
  virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script);
+
+ private:
+  void ensureEngines(UErrorCode& status);
 };

 U_NAMESPACE_END
@@ -27,9 +27,11 @@
 #include "unicode/rbbi.h"
 #include "unicode/brkiter.h"
 #include "unicode/udata.h"
+#include "unicode/uloc.h"
 #include "unicode/ures.h"
 #include "unicode/ustring.h"
 #include "unicode/filteredbrk.h"
+#include "bytesinkutil.h"
 #include "ucln_cmn.h"
 #include "cstring.h"
 #include "umutex.h"
@@ -38,6 +40,7 @@
 #include "uresimp.h"
 #include "uassert.h"
 #include "ubrkimpl.h"
+#include "utracimp.h"
 #include "charstr.h"

 // *****************************************************************************
@@ -56,17 +59,17 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
 {
    char fnbuff[256];
    char ext[4]={'\0'};
-    CharString actualLocale;
+    CharString actual;
    int32_t size;
-    const UChar* brkfname = NULL;
+    const char16_t* brkfname = nullptr;
    UResourceBundle brkRulesStack;
    UResourceBundle brkNameStack;
    UResourceBundle *brkRules = &brkRulesStack;
    UResourceBundle *brkName  = &brkNameStack;
-    RuleBasedBreakIterator *result = NULL;
+    RuleBasedBreakIterator *result = nullptr;

    if (U_FAILURE(status))
-        return NULL;
+        return nullptr;

    ures_initStackObject(brkRules);
    ures_initStackObject(brkName);
@@ -82,7 +85,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
        // Get the actual string
        brkfname = ures_getString(brkName, &size, &status);
        U_ASSERT((size_t)size<sizeof(fnbuff));
-        if ((size_t)size>=sizeof(fnbuff)) {
+        if (static_cast<size_t>(size) >= sizeof(fnbuff)) {
            size=0;
            if (U_SUCCESS(status)) {
                status = U_BUFFER_OVERFLOW_ERROR;
@@ -91,12 +94,12 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st

        // Use the string if we found it
        if (U_SUCCESS(status) && brkfname) {
-            actualLocale.append(ures_getLocaleInternal(brkName, &status), -1, status);
+            actual.append(ures_getLocaleInternal(brkName, &status), -1, status);

-            UChar* extStart=u_strchr(brkfname, 0x002e);
+            char16_t* extStart=u_strchr(brkfname, 0x002e);
            int len = 0;
-            if(extStart!=NULL){
-                len = (int)(extStart-brkfname);
+            if (extStart != nullptr){
+                len = static_cast<int>(extStart - brkfname);
                u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff
                u_UCharsToChars(brkfname, fnbuff, len);
            }
@@ -110,27 +113,27 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
    UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status);
    if (U_FAILURE(status)) {
        ures_close(b);
-        return NULL;
+        return nullptr;
    }

    // Create a RuleBasedBreakIterator
-    result = new RuleBasedBreakIterator(file, status);
+    result = new RuleBasedBreakIterator(file, uprv_strstr(type, "phrase") != nullptr, status);

    // If there is a result, set the valid locale and actual locale, and the kind
-    if (U_SUCCESS(status) && result != NULL) {
-        U_LOCALE_BASED(locBased, *(BreakIterator*)result);
-        locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), 
-                              actualLocale.data());
+    if (U_SUCCESS(status) && result != nullptr) {
+        result->actualLocale = Locale(actual.data());
+        result->validLocale = Locale(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status));
+        result->requestLocale = loc;
    }

    ures_close(b);

-    if (U_FAILURE(status) && result != NULL) {  // Sometimes redundant check, but simple
+    if (U_FAILURE(status) && result != nullptr) {  // Sometimes redundant check, but simple
        delete result;
-        return NULL;
+        return nullptr;
    }

-    if (result == NULL) {
+    if (result == nullptr) {
        udata_close(file);
        if (U_SUCCESS(status)) {
            status = U_MEMORY_ALLOCATION_ERROR;
@@ -199,19 +202,22 @@ BreakIterator::getAvailableLocales(int32_t& count)
 //-------------------------------------------

 BreakIterator::BreakIterator()
+    : actualLocale(Locale::getRoot()), validLocale(Locale::getRoot()), requestLocale(Locale::getRoot())
 {
-    *validLocale = *actualLocale = 0;
 }

-BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) {
-    uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
-    uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
+BreakIterator::BreakIterator(const BreakIterator &other)
+    : UObject(other),
+      actualLocale(other.actualLocale),
+      validLocale(other.validLocale),
+      requestLocale(other.requestLocale) {
 }

 BreakIterator &BreakIterator::operator =(const BreakIterator &other) {
    if (this != &other) {
-        uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
-        uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
+        actualLocale = other.actualLocale;
+        validLocale = other.validLocale;
+        requestLocale = other.requestLocale;
    }
    return *this;
 }
@@ -233,7 +239,7 @@ class ICUBreakIteratorFactory : public ICUResourceBundleFactory {
 public:
    virtual ~ICUBreakIteratorFactory();
 protected:
-    virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const {
+    virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const override {
        return BreakIterator::makeInstance(loc, kind, status);
    }
 };
@@ -253,19 +259,19 @@ public:

    virtual ~ICUBreakIteratorService();

-    virtual UObject* cloneInstance(UObject* instance) const {
+    virtual UObject* cloneInstance(UObject* instance) const override {
        return ((BreakIterator*)instance)->clone();
    }

-    virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const {
-        LocaleKey& lkey = (LocaleKey&)key;
+    virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const override {
+        LocaleKey& lkey = static_cast<LocaleKey&>(const_cast<ICUServiceKey&>(key));
        int32_t kind = lkey.kind();
        Locale loc;
        lkey.currentLocale(loc);
        return BreakIterator::makeInstance(loc, kind, status);
    }

-    virtual UBool isDefault() const {
+    virtual UBool isDefault() const override {
        return countFactories() == 1;
    }
 };
@@ -277,8 +283,8 @@ ICUBreakIteratorService::~ICUBreakIteratorService() {}
 // defined in ucln_cmn.h
 U_NAMESPACE_END

-static icu::UInitOnce gInitOnceBrkiter;
-static icu::ICULocaleService* gService = NULL;
+static icu::UInitOnce gInitOnceBrkiter {};
+static icu::ICULocaleService* gService = nullptr;



@@ -286,27 +292,27 @@ static icu::ICULocaleService* gService = NULL;
 * Release all static memory held by breakiterator.
 */
 U_CDECL_BEGIN
-static UBool U_CALLCONV breakiterator_cleanup(void) {
+static UBool U_CALLCONV breakiterator_cleanup() {
 #if !UCONFIG_NO_SERVICE
    if (gService) {
        delete gService;
-        gService = NULL;
+        gService = nullptr;
    }
    gInitOnceBrkiter.reset();
 #endif
-    return TRUE;
+    return true;
 }
 U_CDECL_END
 U_NAMESPACE_BEGIN

 static void U_CALLCONV 
-initService(void) {
+initService() {
    gService = new ICUBreakIteratorService();
    ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup);
 }

 static ICULocaleService*
-getService(void)
+getService()
 {
    umtx_initOnce(gInitOnceBrkiter, &initService);
    return gService;
@@ -316,9 +322,9 @@ getService(void)
 // -------------------------------------

 static inline UBool
-hasService(void)
+hasService()
 {
-    return !gInitOnceBrkiter.isReset() && getService() != NULL;
+    return !gInitOnceBrkiter.isReset() && getService() != nullptr;
 }

 // -------------------------------------
@@ -327,9 +333,9 @@ URegistryKey U_EXPORT2
 BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status)
 {
    ICULocaleService *service = getService();
-    if (service == NULL) {
+    if (service == nullptr) {
        status = U_MEMORY_ALLOCATION_ERROR;
-        return NULL;
+        return nullptr;
    }
    return service->registerInstance(toAdopt, locale, kind, status);
 }
@@ -345,17 +351,17 @@ BreakIterator::unregister(URegistryKey key, UErrorCode& status)
        }
        status = U_MEMORY_ALLOCATION_ERROR;
    }
-    return FALSE;
+    return false;
 }

 // -------------------------------------

 StringEnumeration* U_EXPORT2
-BreakIterator::getAvailableLocales(void)
+BreakIterator::getAvailableLocales()
 {
    ICULocaleService *service = getService();
-    if (service == NULL) {
-        return NULL;
+    if (service == nullptr) {
+        return nullptr;
    }
    return service->getAvailableLocales();
 }
@@ -367,7 +373,7 @@ BreakIterator*
 BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status)
 {
    if (U_FAILURE(status)) {
-        return NULL;
+        return nullptr;
    }

 #if !UCONFIG_NO_SERVICE
@@ -384,9 +390,9 @@ BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& statu
        // handleDefault calls), so we don't touch it.  YES, A COMMENT
        // THIS LONG is a sign of bad code -- so the action item is to
        // revisit this in ICU 3.0 and clean it up/fix it/remove it.
-        if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) {
-            U_LOCALE_BASED(locBased, *result);
-            locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName());
+        if (U_SUCCESS(status) && (result != nullptr) && *actualLoc.getName() != 0) {
+            result->actualLocale = actualLoc;
+            result->validLocale = actualLoc;
        }
        return result;
    }
@@ -405,35 +411,55 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
 {

    if (U_FAILURE(status)) {
-        return NULL;
+        return nullptr;
    }
-    char lbType[kKeyValueLenMax];

-    BreakIterator *result = NULL;
+    BreakIterator *result = nullptr;
    switch (kind) {
    case UBRK_CHARACTER:
-        result = BreakIterator::buildInstance(loc, "grapheme", status);
+        {
+            UTRACE_ENTRY(UTRACE_UBRK_CREATE_CHARACTER);
+            result = BreakIterator::buildInstance(loc, "grapheme", status);
+            UTRACE_EXIT_STATUS(status);
+        }
        break;
    case UBRK_WORD:
-        result = BreakIterator::buildInstance(loc, "word", status);
+        {
+            UTRACE_ENTRY(UTRACE_UBRK_CREATE_WORD);
+            result = BreakIterator::buildInstance(loc, "word", status);
+            UTRACE_EXIT_STATUS(status);
+        }
        break;
    case UBRK_LINE:
-        uprv_strcpy(lbType, "line");
        {
-            char lbKeyValue[kKeyValueLenMax] = {0};
+            char lb_lw[kKeyValueLenMax];
+            UTRACE_ENTRY(UTRACE_UBRK_CREATE_LINE);
+            uprv_strcpy(lb_lw, "line");
            UErrorCode kvStatus = U_ZERO_ERROR;
-            int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kKeyValueLenMax, kvStatus);
-            if (U_SUCCESS(kvStatus) && kLen > 0 && (uprv_strcmp(lbKeyValue,"strict")==0 || uprv_strcmp(lbKeyValue,"normal")==0 || uprv_strcmp(lbKeyValue,"loose")==0)) {
-                uprv_strcat(lbType, "_");
-                uprv_strcat(lbType, lbKeyValue);
+            auto value = loc.getKeywordValue<CharString>("lb", kvStatus);
+            if (U_SUCCESS(kvStatus) && (value == "strict" || value == "normal" || value == "loose")) {
+                uprv_strcat(lb_lw, "_");
+                uprv_strcat(lb_lw, value.data());
            }
+            // lw=phrase is only supported in Japanese and Korean
+            if (uprv_strcmp(loc.getLanguage(), "ja") == 0 || uprv_strcmp(loc.getLanguage(), "ko") == 0) {
+                value = loc.getKeywordValue<CharString>("lw", kvStatus);
+                if (U_SUCCESS(kvStatus) && value == "phrase") {
+                    uprv_strcat(lb_lw, "_");
+                    uprv_strcat(lb_lw, value.data());
+                }
+            }
+            result = BreakIterator::buildInstance(loc, lb_lw, status);
+
+            UTRACE_DATA1(UTRACE_INFO, "lb_lw=%s", lb_lw);
+            UTRACE_EXIT_STATUS(status);
        }
-        result = BreakIterator::buildInstance(loc, lbType, status);
        break;
    case UBRK_SENTENCE:
-        result = BreakIterator::buildInstance(loc, "sentence", status);
-#if !UCONFIG_NO_FILTERED_BREAK_ITERATION
        {
+            UTRACE_ENTRY(UTRACE_UBRK_CREATE_SENTENCE);
+            result = BreakIterator::buildInstance(loc, "sentence", status);
+#if !UCONFIG_NO_FILTERED_BREAK_ITERATION
            char ssKeyValue[kKeyValueLenMax] = {0};
            UErrorCode kvStatus = U_ZERO_ERROR;
            int32_t kLen = loc.getKeywordValue("ss", ssKeyValue, kKeyValueLenMax, kvStatus);
@@ -444,18 +470,24 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
                    delete fbiBuilder;
                }
            }
-        }
 #endif
+            UTRACE_EXIT_STATUS(status);
+        }
        break;
    case UBRK_TITLE:
-        result = BreakIterator::buildInstance(loc, "title", status);
+        {
+            UTRACE_ENTRY(UTRACE_UBRK_CREATE_TITLE);
+            result = BreakIterator::buildInstance(loc, "title", status);
+            UTRACE_EXIT_STATUS(status);
+        }
        break;
    default:
        status = U_ILLEGAL_ARGUMENT_ERROR;
    }

    if (U_FAILURE(status)) {
-        return NULL;
+        delete result;
+        return nullptr;
    }

    return result;
@@ -463,14 +495,24 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)

 Locale
 BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
-    U_LOCALE_BASED(locBased, *this);
-    return locBased.getLocale(type, status);
+    if (U_FAILURE(status)) {
+        return Locale::getRoot();
+    }
+    if (type == ULOC_REQUESTED_LOCALE) {
+        return requestLocale;
+    }
+    return LocaleBased::getLocale(validLocale, actualLocale, type, status);
 }

 const char *
 BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
-    U_LOCALE_BASED(locBased, *this);
-    return locBased.getLocaleID(type, status);
+    if (U_FAILURE(status)) {
+        return nullptr;
+    }
+    if (type == ULOC_REQUESTED_LOCALE) {
+        return requestLocale.getName();
+    }
+    return LocaleBased::getLocaleID(validLocale, actualLocale, type, status);
 }


@@ -496,9 +538,8 @@ int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UE
    return 1;
 }

-BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) {
-  U_LOCALE_BASED(locBased, (*this));
-  locBased.setLocaleIDs(valid, actual);
+BreakIterator::BreakIterator(const Locale& valid, const Locale& actual)
+    : actualLocale(actual), validLocale(valid), requestLocale(Locale::getRoot()) {
 }

 U_NAMESPACE_END
@@ -20,7 +20,7 @@ U_NAMESPACE_BEGIN
 UBool
 ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Length,
                           ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
-    if (U_FAILURE(errorCode)) { return FALSE; }
+    if (U_FAILURE(errorCode)) { return false; }
    char scratch[200];
    int32_t s8Length = 0;
    for (int32_t i = 0; i < s16Length;) {
@@ -44,7 +44,7 @@ ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Lengt
        }
        if (j > (INT32_MAX - s8Length)) {
            errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
-            return FALSE;
+            return false;
        }
        sink.Append(buffer, j);
        s8Length += j;
@@ -52,19 +52,19 @@ ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Lengt
    if (edits != nullptr) {
        edits->addReplace(length, s8Length);
    }
-    return TRUE;
+    return true;
 }

 UBool
 ByteSinkUtil::appendChange(const uint8_t *s, const uint8_t *limit,
                           const char16_t *s16, int32_t s16Length,
                           ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
-    if (U_FAILURE(errorCode)) { return FALSE; }
+    if (U_FAILURE(errorCode)) { return false; }
    if ((limit - s) > INT32_MAX) {
        errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
-        return FALSE;
+        return false;
    }
-    return appendChange((int32_t)(limit - s), s16, s16Length, sink, edits, errorCode);
+    return appendChange(static_cast<int32_t>(limit - s), s16, s16Length, sink, edits, errorCode);
 }

 void
@@ -81,15 +81,15 @@ ByteSinkUtil::appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *
 namespace {

 // See unicode/utf8.h U8_APPEND_UNSAFE().
-inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
-inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
+inline uint8_t getTwoByteLead(UChar32 c) { return static_cast<uint8_t>((c >> 6) | 0xc0); }
+inline uint8_t getTwoByteTrail(UChar32 c) { return static_cast<uint8_t>((c & 0x3f) | 0x80); }

 }  // namespace

 void
 ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) {
    U_ASSERT(0x80 <= c && c <= 0x7ff);  // 2-byte UTF-8
-    char s8[2] = { (char)getTwoByteLead(c), (char)getTwoByteTrail(c) };
+    char s8[2] = {static_cast<char>(getTwoByteLead(c)), static_cast<char>(getTwoByteTrail(c))};
    sink.Append(s8, 2);
 }

@@ -109,16 +109,16 @@ UBool
 ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
                              ByteSink &sink, uint32_t options, Edits *edits,
                              UErrorCode &errorCode) {
-    if (U_FAILURE(errorCode)) { return FALSE; }
+    if (U_FAILURE(errorCode)) { return false; }
    if ((limit - s) > INT32_MAX) {
        errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
-        return FALSE;
+        return false;
    }
-    int32_t length = (int32_t)(limit - s);
+    int32_t length = static_cast<int32_t>(limit - s);
    if (length > 0) {
        appendNonEmptyUnchanged(s, length, sink, options, edits);
    }
-    return TRUE;
+    return true;
 }

 CharStringByteSink::CharStringByteSink(CharString* dest) : dest_(*dest) {
@@ -4,18 +4,55 @@
 // bytesinkutil.h
 // created: 2017sep14 Markus W. Scherer

+#ifndef BYTESINKUTIL_H
+#define BYTESINKUTIL_H
+
+#include <type_traits>
+
 #include "unicode/utypes.h"
 #include "unicode/bytestream.h"
 #include "unicode/edits.h"
+#include "charstr.h"
 #include "cmemory.h"
 #include "uassert.h"
+#include "ustr_imp.h"

 U_NAMESPACE_BEGIN

 class ByteSink;
-class CharString;
 class Edits;

+class U_COMMON_API CharStringByteSink : public ByteSink {
+public:
+    CharStringByteSink(CharString* dest);
+    ~CharStringByteSink() override;
+
+    CharStringByteSink() = delete;
+    CharStringByteSink(const CharStringByteSink&) = delete;
+    CharStringByteSink& operator=(const CharStringByteSink&) = delete;
+
+    void Append(const char* bytes, int32_t n) override;
+
+    char* GetAppendBuffer(int32_t min_capacity,
+                          int32_t desired_capacity_hint,
+                          char* scratch,
+                          int32_t scratch_capacity,
+                          int32_t* result_capacity) override;
+
+private:
+    CharString& dest_;
+};
+
+// CharString doesn't provide the public API that StringByteSink requires a
+// string class to have so this template specialization replaces the default
+// implementation of StringByteSink<CharString> with CharStringByteSink.
+template<>
+class StringByteSink<CharString> : public CharStringByteSink {
+ public:
+  StringByteSink(CharString* dest) : CharStringByteSink(dest) { }
+  StringByteSink(CharString* dest, int32_t /*initialAppendCapacity*/) : CharStringByteSink(dest) { }
+};
+
 class U_COMMON_API ByteSinkUtil {
 public:
    ByteSinkUtil() = delete;  // all static
@@ -36,7 +73,7 @@ public:
    /** The few bytes at [src, nextSrc[ were mapped/changed to valid code point c. */
    static inline void appendCodePoint(const uint8_t *src, const uint8_t *nextSrc, UChar32 c,
                                       ByteSink &sink, Edits *edits = nullptr) {
-        appendCodePoint((int32_t)(nextSrc - src), c, sink, edits);
+        appendCodePoint(static_cast<int32_t>(nextSrc - src), c, sink, edits);
    }

    /** Append the two-byte character (U+0080..U+07FF). */
@@ -45,39 +82,75 @@ public:
    static UBool appendUnchanged(const uint8_t *s, int32_t length,
                                 ByteSink &sink, uint32_t options, Edits *edits,
                                 UErrorCode &errorCode) {
-        if (U_FAILURE(errorCode)) { return FALSE; }
+        if (U_FAILURE(errorCode)) { return false; }
        if (length > 0) { appendNonEmptyUnchanged(s, length, sink, options, edits); }
-        return TRUE;
+        return true;
    }

    static UBool appendUnchanged(const uint8_t *s, const uint8_t *limit,
                                 ByteSink &sink, uint32_t options, Edits *edits,
                                 UErrorCode &errorCode);

+    /**
+     * Calls a lambda that writes to a ByteSink with a CheckedArrayByteSink
+     * and then returns through u_terminateChars(), in order to implement
+     * the classic ICU4C C API writing to a fix sized buffer on top of a
+     * contemporary C++ API.
+     *
+     * @param buffer receiving buffer
+     * @param capacity capacity of receiving buffer
+     * @param lambda that gets called with the sink as an argument
+     * @param status set to U_BUFFER_OVERFLOW_ERROR on overflow
+     * @return number of bytes written, or needed (in case of overflow)
+     * @internal
+     */
+    template <typename F,
+              typename = std::enable_if_t<
+                  std::is_invocable_r_v<void, F, ByteSink&, UErrorCode&>>>
+    static int32_t viaByteSinkToTerminatedChars(char* buffer, int32_t capacity,
+                                                F&& lambda,
+                                                UErrorCode& status) {
+        if (U_FAILURE(status)) { return 0; }
+        CheckedArrayByteSink sink(buffer, capacity);
+        lambda(sink, status);
+        if (U_FAILURE(status)) { return 0; }
+
+        int32_t reslen = sink.NumberOfBytesAppended();
+
+        if (sink.Overflowed()) {
+            status = U_BUFFER_OVERFLOW_ERROR;
+            return reslen;
+        }
+
+        return u_terminateChars(buffer, capacity, reslen, &status);
+    }
+
+    /**
+     * Calls a lambda that writes to a ByteSink with a CharStringByteSink and
+     * then returns a CharString, in order to implement a contemporary C++ API
+     * on top of a C/C++ compatibility ByteSink API.
+     *
+     * @param lambda that gets called with the sink as an argument
+     * @param status to check and report
+     * @return the resulting string, or an empty string (in case of error)
+     * @internal
+     */
+    template <typename F,
+              typename = std::enable_if_t<
+                  std::is_invocable_r_v<void, F, ByteSink&, UErrorCode&>>>
+    static CharString viaByteSinkToCharString(F&& lambda, UErrorCode& status) {
+        if (U_FAILURE(status)) { return {}; }
+        CharString result;
+        CharStringByteSink sink(&result);
+        lambda(sink, status);
+        return result;
+    }
+
 private:
    static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
                                        ByteSink &sink, uint32_t options, Edits *edits);
 };

-class CharStringByteSink : public ByteSink {
-public:
-    CharStringByteSink(CharString* dest);
-    ~CharStringByteSink() override;
-
-    CharStringByteSink() = delete;
-    CharStringByteSink(const CharStringByteSink&) = delete;
-    CharStringByteSink& operator=(const CharStringByteSink&) = delete;
-
-    void Append(const char* bytes, int32_t n) override;
-
-    char* GetAppendBuffer(int32_t min_capacity,
-                          int32_t desired_capacity_hint,
-                          char* scratch,
-                          int32_t scratch_capacity,
-                          int32_t* result_capacity) override;
-
-private:
-    CharString& dest_;
-};
-
 U_NAMESPACE_END
+
+#endif //BYTESINKUTIL_H
@@ -20,7 +20,7 @@ char* ByteSink::GetAppendBuffer(int32_t min_capacity,
                                int32_t* result_capacity) {
  if (min_capacity < 1 || scratch_capacity < min_capacity) {
    *result_capacity = 0;
-    return NULL;
+    return nullptr;
  }
  *result_capacity = scratch_capacity;
  return scratch;
@@ -30,14 +30,14 @@ void ByteSink::Flush() {}

 CheckedArrayByteSink::CheckedArrayByteSink(char* outbuf, int32_t capacity)
    : outbuf_(outbuf), capacity_(capacity < 0 ? 0 : capacity),
-      size_(0), appended_(0), overflowed_(FALSE) {
+      size_(0), appended_(0), overflowed_(false) {
 }

 CheckedArrayByteSink::~CheckedArrayByteSink() {}

 CheckedArrayByteSink& CheckedArrayByteSink::Reset() {
  size_ = appended_ = 0;
-  overflowed_ = FALSE;
+  overflowed_ = false;
  return *this;
 }

@@ -48,14 +48,14 @@ void CheckedArrayByteSink::Append(const char* bytes, int32_t n) {
  if (n > (INT32_MAX - appended_)) {
    // TODO: Report as integer overflow, not merely buffer overflow.
    appended_ = INT32_MAX;
-    overflowed_ = TRUE;
+    overflowed_ = true;
    return;
  }
  appended_ += n;
  int32_t available = capacity_ - size_;
  if (n > available) {
    n = available;
-    overflowed_ = TRUE;
+    overflowed_ = true;
  }
  if (n > 0 && bytes != (outbuf_ + size_)) {
    uprv_memcpy(outbuf_ + size_, bytes, n);
@@ -70,7 +70,7 @@ char* CheckedArrayByteSink::GetAppendBuffer(int32_t min_capacity,
                                            int32_t* result_capacity) {
  if (min_capacity < 1 || scratch_capacity < min_capacity) {
    *result_capacity = 0;
-    return NULL;
+    return nullptr;
  }
  int32_t available = capacity_ - size_;
  if (available >= min_capacity) {
@@ -68,7 +68,7 @@ BytesTrie::jumpByDelta(const uint8_t *pos) {
 UStringTrieResult
 BytesTrie::current() const {
    const uint8_t *pos=pos_;
-    if(pos==NULL) {
+    if(pos==nullptr) {
        return USTRINGTRIE_NO_MATCH;
    } else {
        int32_t node;
@@ -182,7 +182,7 @@ BytesTrie::nextImpl(const uint8_t *pos, int32_t inByte) {
 UStringTrieResult
 BytesTrie::next(int32_t inByte) {
    const uint8_t *pos=pos_;
-    if(pos==NULL) {
+    if(pos==nullptr) {
        return USTRINGTRIE_NO_MATCH;
    }
    if(inByte<0) {
@@ -212,7 +212,7 @@ BytesTrie::next(const char *s, int32_t sLength) {
        return current();
    }
    const uint8_t *pos=pos_;
-    if(pos==NULL) {
+    if(pos==nullptr) {
        return USTRINGTRIE_NO_MATCH;
    }
    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
@@ -317,8 +317,8 @@ BytesTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
                                     UBool haveUniqueValue, int32_t &uniqueValue) {
    while(length>kMaxBranchLinearSubNodeLength) {
        ++pos;  // ignore the comparison byte
-        if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
-            return NULL;
+        if(nullptr==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
+            return nullptr;
        }
        length=length-(length>>1);
        pos=skipDelta(pos);
@@ -327,23 +327,23 @@ BytesTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
        ++pos;  // ignore a comparison byte
        // handle its value
        int32_t node=*pos++;
-        UBool isFinal=(UBool)(node&kValueIsFinal);
+        UBool isFinal = static_cast<UBool>(node & kValueIsFinal);
        int32_t value=readValue(pos, node>>1);
        pos=skipValue(pos, node);
        if(isFinal) {
            if(haveUniqueValue) {
                if(value!=uniqueValue) {
-                    return NULL;
+                    return nullptr;
                }
            } else {
                uniqueValue=value;
-                haveUniqueValue=TRUE;
+                haveUniqueValue=true;
            }
        } else {
            if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
-                return NULL;
+                return nullptr;
            }
-            haveUniqueValue=TRUE;
+            haveUniqueValue=true;
        }
    } while(--length>1);
    return pos+1;  // ignore the last comparison byte
@@ -358,26 +358,26 @@ BytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &u
                node=*pos++;
            }
            pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
-            if(pos==NULL) {
-                return FALSE;
+            if(pos==nullptr) {
+                return false;
            }
-            haveUniqueValue=TRUE;
+            haveUniqueValue=true;
        } else if(node<kMinValueLead) {
            // linear-match node
            pos+=node-kMinLinearMatch+1;  // Ignore the match bytes.
        } else {
-            UBool isFinal=(UBool)(node&kValueIsFinal);
+            UBool isFinal = static_cast<UBool>(node & kValueIsFinal);
            int32_t value=readValue(pos, node>>1);
            if(haveUniqueValue) {
                if(value!=uniqueValue) {
-                    return FALSE;
+                    return false;
                }
            } else {
                uniqueValue=value;
-                haveUniqueValue=TRUE;
+                haveUniqueValue=true;
            }
            if(isFinal) {
-                return TRUE;
+                return true;
            }
            pos=skipValue(pos, node);
        }
@@ -387,7 +387,7 @@ BytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &u
 int32_t
 BytesTrie::getNextBytes(ByteSink &out) const {
    const uint8_t *pos=pos_;
-    if(pos==NULL) {
+    if(pos==nullptr) {
        return 0;
    }
    if(remainingMatchLength_>=0) {
@@ -434,7 +434,7 @@ BytesTrie::getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out)

 void
 BytesTrie::append(ByteSink &out, int c) {
-    char ch=(char)c;
+    char ch = static_cast<char>(c);
    out.Append(&ch, 1);
 }

@@ -43,10 +43,10 @@ public:
        int32_t offset=stringOffset;
        int32_t length;
        if(offset>=0) {
-            length=(uint8_t)strings[offset++];
+            length = static_cast<uint8_t>(strings[offset++]);
        } else {
            offset=~offset;
-            length=((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1];
+            length = (static_cast<int32_t>(static_cast<uint8_t>(strings[offset])) << 8) | static_cast<uint8_t>(strings[offset + 1]);
            offset+=2;
        }
        return StringPiece(strings.data()+offset, length);
@@ -54,10 +54,10 @@ public:
    int32_t getStringLength(const CharString &strings) const {
        int32_t offset=stringOffset;
        if(offset>=0) {
-            return (uint8_t)strings[offset];
+            return static_cast<uint8_t>(strings[offset]);
        } else {
            offset=~offset;
-            return ((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1];
+            return (static_cast<int32_t>(static_cast<uint8_t>(strings[offset])) << 8) | static_cast<uint8_t>(strings[offset + 1]);
        }
    }

@@ -102,9 +102,9 @@ BytesTrieElement::setTo(StringPiece s, int32_t val,
    int32_t offset=strings.length();
    if(length>0xff) {
        offset=~offset;
-        strings.append((char)(length>>8), errorCode);
+        strings.append(static_cast<char>(length >> 8), errorCode);
    }
-    strings.append((char)length, errorCode);
+    strings.append(static_cast<char>(length), errorCode);
    stringOffset=offset;
    value=val;
    strings.append(s, errorCode);
@@ -127,13 +127,13 @@ BytesTrieElement::compareStringTo(const BytesTrieElement &other, const CharStrin
 }

 BytesTrieBuilder::BytesTrieBuilder(UErrorCode &errorCode)
-        : strings(NULL), elements(NULL), elementsCapacity(0), elementsLength(0),
-          bytes(NULL), bytesCapacity(0), bytesLength(0) {
+        : strings(nullptr), elements(nullptr), elementsCapacity(0), elementsLength(0),
+          bytes(nullptr), bytesCapacity(0), bytesLength(0) {
    if(U_FAILURE(errorCode)) {
        return;
    }
    strings=new CharString();
-    if(strings==NULL) {
+    if(strings==nullptr) {
        errorCode=U_MEMORY_ALLOCATION_ERROR;
    }
 }
@@ -162,7 +162,7 @@ BytesTrieBuilder::add(StringPiece s, int32_t value, UErrorCode &errorCode) {
            newCapacity=4*elementsCapacity;
        }
        BytesTrieElement *newElements=new BytesTrieElement[newCapacity];
-        if(newElements==NULL) {
+        if(newElements==nullptr) {
            errorCode=U_MEMORY_ALLOCATION_ERROR;
            return *this; // error instead of dereferencing null
        }
@@ -192,13 +192,13 @@ U_CDECL_END
 BytesTrie *
 BytesTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
    buildBytes(buildOption, errorCode);
-    BytesTrie *newTrie=NULL;
+    BytesTrie *newTrie=nullptr;
    if(U_SUCCESS(errorCode)) {
        newTrie=new BytesTrie(bytes, bytes+(bytesCapacity-bytesLength));
-        if(newTrie==NULL) {
+        if(newTrie==nullptr) {
            errorCode=U_MEMORY_ALLOCATION_ERROR;
        } else {
-            bytes=NULL;  // The new trie now owns the array.
+            bytes=nullptr;  // The new trie now owns the array.
            bytesCapacity=0;
        }
    }
@@ -220,7 +220,7 @@ BytesTrieBuilder::buildBytes(UStringTrieBuildOption buildOption, UErrorCode &err
    if(U_FAILURE(errorCode)) {
        return;
    }
-    if(bytes!=NULL && bytesLength>0) {
+    if(bytes!=nullptr && bytesLength>0) {
        // Already built.
        return;
    }
@@ -229,9 +229,9 @@ BytesTrieBuilder::buildBytes(UStringTrieBuildOption buildOption, UErrorCode &err
            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
            return;
        }
-        uprv_sortArray(elements, elementsLength, (int32_t)sizeof(BytesTrieElement),
+        uprv_sortArray(elements, elementsLength, static_cast<int32_t>(sizeof(BytesTrieElement)),
                      compareElementStrings, strings,
-                      FALSE,  // need not be a stable sort
+                      false,  // need not be a stable sort
                      &errorCode);
        if(U_FAILURE(errorCode)) {
            return;
@@ -256,7 +256,7 @@ BytesTrieBuilder::buildBytes(UStringTrieBuildOption buildOption, UErrorCode &err
    if(bytesCapacity<capacity) {
        uprv_free(bytes);
        bytes=static_cast<char *>(uprv_malloc(capacity));
-        if(bytes==NULL) {
+        if(bytes==nullptr) {
            errorCode=U_MEMORY_ALLOCATION_ERROR;
            bytesCapacity=0;
            return;
@@ -264,7 +264,7 @@ BytesTrieBuilder::buildBytes(UStringTrieBuildOption buildOption, UErrorCode &err
        bytesCapacity=capacity;
    }
    StringTrieBuilder::build(buildOption, elementsLength, errorCode);
-    if(bytes==NULL) {
+    if(bytes==nullptr) {
        errorCode=U_MEMORY_ALLOCATION_ERROR;
    }
 }
@@ -282,9 +282,9 @@ BytesTrieBuilder::getElementStringLength(int32_t i) const {
    return elements[i].getStringLength(*strings);
 }

-UChar
+char16_t
 BytesTrieBuilder::getElementUnit(int32_t i, int32_t byteIndex) const {
-    return (uint8_t)elements[i].charAt(byteIndex, *strings);
+    return static_cast<uint8_t>(elements[i].charAt(byteIndex, *strings));
 }

 int32_t
@@ -329,8 +329,8 @@ BytesTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t
 }

 int32_t
-BytesTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const {
-    char b=(char)byte;
+BytesTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, char16_t byte) const {
+    char b = static_cast<char>(byte);
    while(b==elements[i].charAt(byteIndex, *strings)) {
        ++i;
    }
@@ -343,21 +343,21 @@ BytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_
        static_cast<uint32_t>(hash)*37u + static_cast<uint32_t>(ustr_hashCharsN(bytes, len)));
 }

-UBool
+bool
 BytesTrieBuilder::BTLinearMatchNode::operator==(const Node &other) const {
    if(this==&other) {
-        return TRUE;
+        return true;
    }
    if(!LinearMatchNode::operator==(other)) {
-        return FALSE;
+        return false;
    }
-    const BTLinearMatchNode &o=(const BTLinearMatchNode &)other;
+    const BTLinearMatchNode &o=static_cast<const BTLinearMatchNode &>(other);
    return 0==uprv_memcmp(s, o.s, length);
 }

 void
 BytesTrieBuilder::BTLinearMatchNode::write(StringTrieBuilder &builder) {
-    BytesTrieBuilder &b=(BytesTrieBuilder &)builder;
+    BytesTrieBuilder &b=static_cast<BytesTrieBuilder &>(builder);
    next->write(builder);
    b.write(s, length);
    offset=b.write(b.getMinLinearMatch()+length-1);
@@ -374,8 +374,8 @@ BytesTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t le

 UBool
 BytesTrieBuilder::ensureCapacity(int32_t length) {
-    if(bytes==NULL) {
-        return FALSE;  // previous memory allocation had failed
+    if(bytes==nullptr) {
+        return false;  // previous memory allocation had failed
    }
    if(length>bytesCapacity) {
        int32_t newCapacity=bytesCapacity;
@@ -383,12 +383,12 @@ BytesTrieBuilder::ensureCapacity(int32_t length) {
            newCapacity*=2;
        } while(newCapacity<=length);
        char *newBytes=static_cast<char *>(uprv_malloc(newCapacity));
-        if(newBytes==NULL) {
+        if(newBytes==nullptr) {
            // unable to allocate memory
            uprv_free(bytes);
-            bytes=NULL;
+            bytes=nullptr;
            bytesCapacity=0;
-            return FALSE;
+            return false;
        }
        uprv_memcpy(newBytes+(newCapacity-bytesLength),
                    bytes+(bytesCapacity-bytesLength), bytesLength);
@@ -396,7 +396,7 @@ BytesTrieBuilder::ensureCapacity(int32_t length) {
        bytes=newBytes;
        bytesCapacity=newCapacity;
    }
-    return TRUE;
+    return true;
 }

 int32_t
@@ -404,7 +404,7 @@ BytesTrieBuilder::write(int32_t byte) {
    int32_t newLength=bytesLength+1;
    if(ensureCapacity(newLength)) {
        bytesLength=newLength;
-        bytes[bytesCapacity-bytesLength]=(char)byte;
+        bytes[bytesCapacity - bytesLength] = static_cast<char>(byte);
    }
    return bytesLength;
 }
@@ -432,30 +432,30 @@ BytesTrieBuilder::writeValueAndFinal(int32_t i, UBool isFinal) {
    char intBytes[5];
    int32_t length=1;
    if(i<0 || i>0xffffff) {
-        intBytes[0]=(char)BytesTrie::kFiveByteValueLead;
-        intBytes[1]=(char)((uint32_t)i>>24);
-        intBytes[2]=(char)((uint32_t)i>>16);
-        intBytes[3]=(char)((uint32_t)i>>8);
-        intBytes[4]=(char)i;
+        intBytes[0] = static_cast<char>(BytesTrie::kFiveByteValueLead);
+        intBytes[1] = static_cast<char>(static_cast<uint32_t>(i) >> 24);
+        intBytes[2] = static_cast<char>(static_cast<uint32_t>(i) >> 16);
+        intBytes[3] = static_cast<char>(static_cast<uint32_t>(i) >> 8);
+        intBytes[4] = static_cast<char>(i);
        length=5;
    // } else if(i<=BytesTrie::kMaxOneByteValue) {
    //     intBytes[0]=(char)(BytesTrie::kMinOneByteValueLead+i);
    } else {
        if(i<=BytesTrie::kMaxTwoByteValue) {
-            intBytes[0]=(char)(BytesTrie::kMinTwoByteValueLead+(i>>8));
+            intBytes[0] = static_cast<char>(BytesTrie::kMinTwoByteValueLead + (i >> 8));
        } else {
            if(i<=BytesTrie::kMaxThreeByteValue) {
-                intBytes[0]=(char)(BytesTrie::kMinThreeByteValueLead+(i>>16));
+                intBytes[0] = static_cast<char>(BytesTrie::kMinThreeByteValueLead + (i >> 16));
            } else {
-                intBytes[0]=(char)BytesTrie::kFourByteValueLead;
-                intBytes[1]=(char)(i>>16);
+                intBytes[0] = static_cast<char>(BytesTrie::kFourByteValueLead);
+                intBytes[1] = static_cast<char>(i >> 16);
                length=2;
            }
-            intBytes[length++]=(char)(i>>8);
+            intBytes[length++] = static_cast<char>(i >> 8);
        }
-        intBytes[length++]=(char)i;
+        intBytes[length++] = static_cast<char>(i);
    }
-    intBytes[0]=(char)((intBytes[0]<<1)|isFinal);
+    intBytes[0] = static_cast<char>((intBytes[0] << 1) | isFinal);
    return write(intBytes, length);
 }

@@ -463,7 +463,7 @@ int32_t
 BytesTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) {
    int32_t offset=write(node);
    if(hasValue) {
-        offset=writeValueAndFinal(value, FALSE);
+        offset=writeValueAndFinal(value, false);
    }
    return offset;
 }
@@ -474,31 +474,39 @@ BytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
    U_ASSERT(i>=0);
    if(i<=BytesTrie::kMaxOneByteDelta) {
        return write(i);
+    } else {
+        char intBytes[5];
+        return write(intBytes, internalEncodeDelta(i, intBytes));
    }
-    char intBytes[5];
-    int32_t length;
+}
+
+int32_t
+BytesTrieBuilder::internalEncodeDelta(int32_t i, char intBytes[]) {
+    U_ASSERT(i>=0);
+    if(i<=BytesTrie::kMaxOneByteDelta) {
+        intBytes[0] = static_cast<char>(i);
+        return 1;
+    }
+    int32_t length=1;
    if(i<=BytesTrie::kMaxTwoByteDelta) {
-        intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8));
-        length=1;
+        intBytes[0] = static_cast<char>(BytesTrie::kMinTwoByteDeltaLead + (i >> 8));
    } else {
        if(i<=BytesTrie::kMaxThreeByteDelta) {
-            intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16));
-            length=2;
+            intBytes[0] = static_cast<char>(BytesTrie::kMinThreeByteDeltaLead + (i >> 16));
        } else {
            if(i<=0xffffff) {
-                intBytes[0]=(char)BytesTrie::kFourByteDeltaLead;
-                length=3;
+                intBytes[0] = static_cast<char>(BytesTrie::kFourByteDeltaLead);
            } else {
-                intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead;
-                intBytes[1]=(char)(i>>24);
-                length=4;
+                intBytes[0] = static_cast<char>(BytesTrie::kFiveByteDeltaLead);
+                intBytes[1] = static_cast<char>(i >> 24);
+                length=2;
            }
-            intBytes[1]=(char)(i>>16);
+            intBytes[length++] = static_cast<char>(i >> 16);
        }
-        intBytes[1]=(char)(i>>8);
+        intBytes[length++] = static_cast<char>(i >> 8);
    }
-    intBytes[length++]=(char)i;
-    return write(intBytes, length);
+    intBytes[length++] = static_cast<char>(i);
+    return length;
 }

 U_NAMESPACE_END
@@ -27,7 +27,7 @@ BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength,
        : bytes_(static_cast<const uint8_t *>(trieBytes)),
          pos_(bytes_), initialPos_(bytes_),
          remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
-          str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
+          str_(nullptr), maxLength_(maxStringLength), value_(0), stack_(nullptr) {
    if(U_FAILURE(errorCode)) {
        return;
    }
@@ -39,7 +39,7 @@ BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength,
    // cost is minimal.
    str_=new CharString();
    stack_=new UVector32(errorCode);
-    if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) {
+    if(U_SUCCESS(errorCode) && (str_==nullptr || stack_==nullptr)) {
        errorCode=U_MEMORY_ALLOCATION_ERROR;
    }
 }
@@ -49,7 +49,7 @@ BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength,
        : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
          remainingMatchLength_(trie.remainingMatchLength_),
          initialRemainingMatchLength_(trie.remainingMatchLength_),
-          str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
+          str_(nullptr), maxLength_(maxStringLength), value_(0), stack_(nullptr) {
    if(U_FAILURE(errorCode)) {
        return;
    }
@@ -58,7 +58,7 @@ BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength,
    if(U_FAILURE(errorCode)) {
        return;
    }
-    if(str_==NULL || stack_==NULL) {
+    if(str_==nullptr || stack_==nullptr) {
        errorCode=U_MEMORY_ALLOCATION_ERROR;
        return;
    }
@@ -96,17 +96,17 @@ BytesTrie::Iterator::reset() {
 }

 UBool
-BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
+BytesTrie::Iterator::hasNext() const { return pos_!=nullptr || !stack_->isEmpty(); }

 UBool
 BytesTrie::Iterator::next(UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) {
-        return FALSE;
+        return false;
    }
    const uint8_t *pos=pos_;
-    if(pos==NULL) {
+    if(pos==nullptr) {
        if(stack_->isEmpty()) {
-            return FALSE;
+            return false;
        }
        // Pop the state off the stack and continue with the next outbound edge of
        // the branch node.
@@ -115,14 +115,14 @@ BytesTrie::Iterator::next(UErrorCode &errorCode) {
        pos=bytes_+stack_->elementAti(stackSize-2);
        stack_->setSize(stackSize-2);
        str_->truncate(length&0xffff);
-        length=(int32_t)((uint32_t)length>>16);
+        length = static_cast<int32_t>(static_cast<uint32_t>(length) >> 16);
        if(length>1) {
            pos=branchNext(pos, length, errorCode);
-            if(pos==NULL) {
-                return TRUE;  // Reached a final value.
+            if(pos==nullptr) {
+                return true;  // Reached a final value.
            }
        } else {
-            str_->append((char)*pos++, errorCode);
+            str_->append(static_cast<char>(*pos++), errorCode);
        }
    }
    if(remainingMatchLength_>=0) {
@@ -134,14 +134,14 @@ BytesTrie::Iterator::next(UErrorCode &errorCode) {
        int32_t node=*pos++;
        if(node>=kMinValueLead) {
            // Deliver value for the byte sequence so far.
-            UBool isFinal=(UBool)(node&kValueIsFinal);
+            UBool isFinal = static_cast<UBool>(node & kValueIsFinal);
            value_=readValue(pos, node>>1);
            if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) {
-                pos_=NULL;
+                pos_=nullptr;
            } else {
                pos_=skipValue(pos, node);
            }
-            return TRUE;
+            return true;
        }
        if(maxLength_>0 && str_->length()==maxLength_) {
            return truncateAndStop();
@@ -151,8 +151,8 @@ BytesTrie::Iterator::next(UErrorCode &errorCode) {
                node=*pos++;
            }
            pos=branchNext(pos, node+1, errorCode);
-            if(pos==NULL) {
-                return TRUE;  // Reached a final value.
+            if(pos==nullptr) {
+                return true;  // Reached a final value.
            }
        } else {
            // Linear-match node, append length bytes to str_.
@@ -170,14 +170,14 @@ BytesTrie::Iterator::next(UErrorCode &errorCode) {

 StringPiece
 BytesTrie::Iterator::getString() const {
-    return str_ == NULL ? StringPiece() : str_->toStringPiece();
+    return str_ == nullptr ? StringPiece() : str_->toStringPiece();
 }

 UBool
 BytesTrie::Iterator::truncateAndStop() {
-    pos_=NULL;
+    pos_=nullptr;
    value_=-1;  // no real value for str
-    return TRUE;
+    return true;
 }

 // Branch node, needs to take the first outbound edge and push state for the rest.
@@ -186,7 +186,7 @@ BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &
    while(length>kMaxBranchLinearSubNodeLength) {
        ++pos;  // ignore the comparison byte
        // Push state for the greater-or-equal edge.
-        stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode);
+        stack_->addElement(static_cast<int32_t>(skipDelta(pos) - bytes_), errorCode);
        stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode);
        // Follow the less-than edge.
        length>>=1;
@@ -196,16 +196,16 @@ BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &
    // Read the first (key, value) pair.
    uint8_t trieByte=*pos++;
    int32_t node=*pos++;
-    UBool isFinal=(UBool)(node&kValueIsFinal);
+    UBool isFinal = static_cast<UBool>(node & kValueIsFinal);
    int32_t value=readValue(pos, node>>1);
    pos=skipValue(pos, node);
-    stack_->addElement((int32_t)(pos-bytes_), errorCode);
+    stack_->addElement(static_cast<int32_t>(pos - bytes_), errorCode);
    stack_->addElement(((length-1)<<16)|str_->length(), errorCode);
-    str_->append((char)trieByte, errorCode);
+    str_->append(static_cast<char>(trieByte), errorCode);
    if(isFinal) {
-        pos_=NULL;
+        pos_=nullptr;
        value_=value;
-        return NULL;
+        return nullptr;
    } else {
        return pos+value;
    }
@@ -64,19 +64,20 @@ U_NAMESPACE_BEGIN

 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CanonicalIterator)

+
 /**
 *@param source string to get results for
 */
 CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode &status) :
-    pieces(NULL),
+    pieces(nullptr),
    pieces_length(0),
-    pieces_lengths(NULL),
-    current(NULL),
+    pieces_lengths(nullptr),
+    current(nullptr),
    current_length(0),
-    nfd(*Normalizer2::getNFDInstance(status)),
-    nfcImpl(*Normalizer2Factory::getNFCImpl(status))
+    nfd(Normalizer2::getNFDInstance(status)),
+    nfcImpl(Normalizer2Factory::getNFCImpl(status))
 {
-    if(U_SUCCESS(status) && nfcImpl.ensureCanonIterData(status)) {
+    if(U_SUCCESS(status) && nfcImpl->ensureCanonIterData(status)) {
      setSource(sourceStr, status);
    }
 }
@@ -87,23 +88,23 @@ CanonicalIterator::~CanonicalIterator() {

 void CanonicalIterator::cleanPieces() {
    int32_t i = 0;
-    if(pieces != NULL) {
+    if(pieces != nullptr) {
        for(i = 0; i < pieces_length; i++) {
-            if(pieces[i] != NULL) {
+            if(pieces[i] != nullptr) {
                delete[] pieces[i];
            }
        }
        uprv_free(pieces);
-        pieces = NULL;
+        pieces = nullptr;
        pieces_length = 0;
    }
-    if(pieces_lengths != NULL) {
+    if(pieces_lengths != nullptr) {
        uprv_free(pieces_lengths);
-        pieces_lengths = NULL;
+        pieces_lengths = nullptr;
    }
-    if(current != NULL) {
+    if(current != nullptr) {
        uprv_free(current);
-        current = NULL;
+        current = nullptr;
        current_length = 0;
    }
 }
@@ -119,7 +120,7 @@ UnicodeString CanonicalIterator::getSource() {
 * Resets the iterator so that one can start again from the beginning.
 */
 void CanonicalIterator::reset() {
-    done = FALSE;
+    done = false;
    for (int i = 0; i < current_length; ++i) {
        current[i] = 0;
    }
@@ -151,7 +152,7 @@ UnicodeString CanonicalIterator::next() {

    for (i = current_length - 1; ; --i) {
        if (i < 0) {
-            done = TRUE;
+            done = true;
            break;
        }
        current[i]++;
@@ -170,31 +171,31 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
    UChar32 cp = 0;
    int32_t start = 0;
    int32_t i = 0;
-    UnicodeString *list = NULL;
+    UnicodeString *list = nullptr;

-    nfd.normalize(newSource, source, status);
+    nfd->normalize(newSource, source, status);
    if(U_FAILURE(status)) {
      return;
    }
-    done = FALSE;
+    done = false;

    cleanPieces();

    // catch degenerate case
    if (newSource.length() == 0) {
-        pieces = (UnicodeString **)uprv_malloc(sizeof(UnicodeString *));
-        pieces_lengths = (int32_t*)uprv_malloc(1 * sizeof(int32_t));
+        pieces = static_cast<UnicodeString**>(uprv_malloc(sizeof(UnicodeString*)));
+        pieces_lengths = static_cast<int32_t*>(uprv_malloc(1 * sizeof(int32_t)));
        pieces_length = 1;
-        current = (int32_t*)uprv_malloc(1 * sizeof(int32_t));
+        current = static_cast<int32_t*>(uprv_malloc(1 * sizeof(int32_t)));
        current_length = 1;
-        if (pieces == NULL || pieces_lengths == NULL || current == NULL) {
+        if (pieces == nullptr || pieces_lengths == nullptr || current == nullptr) {
            status = U_MEMORY_ALLOCATION_ERROR;
            goto CleanPartialInitialization;
        }
        current[0] = 0;
        pieces[0] = new UnicodeString[1];
        pieces_lengths[0] = 1;
-        if (pieces[0] == 0) {
+        if (pieces[0] == nullptr) {
            status = U_MEMORY_ALLOCATION_ERROR;
            goto CleanPartialInitialization;
        }
@@ -203,15 +204,15 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st


    list = new UnicodeString[source.length()];
-    if (list == 0) {
+    if (list == nullptr) {
        status = U_MEMORY_ALLOCATION_ERROR;
        goto CleanPartialInitialization;
    }

-    // i should initialy be the number of code units at the 
+    // i should initially be the number of code units at the 
    // start of the string
    i = U16_LENGTH(source.char32At(0));
-    //int32_t i = 1;
+    // int32_t i = 1;
    // find the segments
    // This code iterates through the source string and 
    // extracts segments that end up on a codepoint that
@@ -219,7 +220,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
    // on the NFD form - see above).
    for (; i < source.length(); i += U16_LENGTH(cp)) {
        cp = source.char32At(i);
-        if (nfcImpl.isCanonSegmentStarter(cp)) {
+        if (nfcImpl->isCanonSegmentStarter(cp)) {
            source.extract(start, i-start, list[list_length++]); // add up to i
            start = i;
        }
@@ -228,12 +229,12 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st


    // allocate the arrays, and find the strings that are CE to each segment
-    pieces = (UnicodeString **)uprv_malloc(list_length * sizeof(UnicodeString *));
+    pieces = static_cast<UnicodeString**>(uprv_malloc(list_length * sizeof(UnicodeString*)));
    pieces_length = list_length;
-    pieces_lengths = (int32_t*)uprv_malloc(list_length * sizeof(int32_t));
-    current = (int32_t*)uprv_malloc(list_length * sizeof(int32_t));
+    pieces_lengths = static_cast<int32_t*>(uprv_malloc(list_length * sizeof(int32_t)));
+    current = static_cast<int32_t*>(uprv_malloc(list_length * sizeof(int32_t)));
    current_length = list_length;
-    if (pieces == NULL || pieces_lengths == NULL || current == NULL) {
+    if (pieces == nullptr || pieces_lengths == nullptr || current == nullptr) {
        status = U_MEMORY_ALLOCATION_ERROR;
        goto CleanPartialInitialization;
    }
@@ -252,9 +253,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
    return;
 // Common section to cleanup all local variables and reset object variables.
 CleanPartialInitialization:
-    if (list != NULL) {
-        delete[] list;
-    }
+    delete[] list;
    cleanPieces();
 }

@@ -264,10 +263,19 @@ CleanPartialInitialization:
 * @param source the string to find permutations for
 * @return the results in a set.
 */
-void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) {
+void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status, int32_t depth) {
    if(U_FAILURE(status)) {
        return;
    }
+    // To avoid infinity loop caused by permute, we limit the depth of recursive
+    // call to permute and return U_UNSUPPORTED_ERROR.
+    // We know in some unit test we need at least 4. Set to 8 just in case some
+    // unforseen use cases.
+    constexpr int32_t kPermuteDepthLimit = 8;
+    if (depth > kPermuteDepthLimit) {
+        status = U_UNSUPPORTED_ERROR;
+        return;
+    }
    //if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source)));
    int32_t i = 0;

@@ -276,8 +284,8 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros
    // we check for length < 2 to keep from counting code points all the time
    if (source.length() <= 2 && source.countChar32() <= 1) {
        UnicodeString *toPut = new UnicodeString(source);
-        /* test for NULL */
-        if (toPut == 0) {
+        /* test for nullptr */
+        if (toPut == nullptr) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
@@ -295,7 +303,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros

    for (i = 0; i < source.length(); i += U16_LENGTH(cp)) {
        cp = source.char32At(i);
-        const UHashElement *ne = NULL;
+        const UHashElement *ne = nullptr;
        int32_t el = UHASH_FIRST;
        UnicodeString subPermuteString = source;

@@ -311,7 +319,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros

        // see what the permutations of the characters before and after this one are
        //Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp)));
-        permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status);
+        permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status, depth+1);
        /* Test for buffer overflows */
        if(U_FAILURE(status)) {
            return;
@@ -321,11 +329,11 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros

        // prefix this character to all of them
        ne = subpermute.nextElement(el);
-        while (ne != NULL) {
-            UnicodeString *permRes = (UnicodeString *)(ne->value.pointer);
+        while (ne != nullptr) {
+            UnicodeString* permRes = static_cast<UnicodeString*>(ne->value.pointer);
            UnicodeString *chStr = new UnicodeString(cp);
-            //test for  NULL
-            if (chStr == NULL) {
+            //test for nullptr
+            if (chStr == nullptr) {
                status = U_MEMORY_ALLOCATION_ERROR;
                return;
            }
@@ -346,42 +354,45 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
    Hashtable permutations(status);
    Hashtable basic(status);
    if (U_FAILURE(status)) {
-        return 0;
+        return nullptr;
    }
    result.setValueDeleter(uprv_deleteUObject);
    permutations.setValueDeleter(uprv_deleteUObject);
    basic.setValueDeleter(uprv_deleteUObject);

-    UChar USeg[256];
+    char16_t USeg[256];
    int32_t segLen = segment.extract(USeg, 256, status);
    getEquivalents2(&basic, USeg, segLen, status);
+    if (U_FAILURE(status)) {
+        return nullptr;
+    }

    // now get all the permutations
    // add only the ones that are canonically equivalent
    // TODO: optimize by not permuting any class zero.

-    const UHashElement *ne = NULL;
+    const UHashElement *ne = nullptr;
    int32_t el = UHASH_FIRST;
    //Iterator it = basic.iterator();
    ne = basic.nextElement(el);
    //while (it.hasNext())
-    while (ne != NULL) {
+    while (ne != nullptr) {
        //String item = (String) it.next();
-        UnicodeString item = *((UnicodeString *)(ne->value.pointer));
+        UnicodeString item = *static_cast<UnicodeString*>(ne->value.pointer);

        permutations.removeAll();
        permute(item, CANITER_SKIP_ZEROES, &permutations, status);
-        const UHashElement *ne2 = NULL;
+        const UHashElement *ne2 = nullptr;
        int32_t el2 = UHASH_FIRST;
        //Iterator it2 = permutations.iterator();
        ne2 = permutations.nextElement(el2);
        //while (it2.hasNext())
-        while (ne2 != NULL) {
+        while (ne2 != nullptr) {
            //String possible = (String) it2.next();
            //UnicodeString *possible = new UnicodeString(*((UnicodeString *)(ne2->value.pointer)));
-            UnicodeString possible(*((UnicodeString *)(ne2->value.pointer)));
+            UnicodeString possible(*static_cast<UnicodeString*>(ne2->value.pointer));
            UnicodeString attempt;
-            nfd.normalize(possible, attempt, status);
+            nfd->normalize(possible, attempt, status);

            // TODO: check if operator == is semanticaly the same as attempt.equals(segment)
            if (attempt==segment) {
@@ -399,29 +410,29 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i

    /* Test for buffer overflows */
    if(U_FAILURE(status)) {
-        return 0;
+        return nullptr;
    }
    // convert into a String[] to clean up storage
    //String[] finalResult = new String[result.size()];
-    UnicodeString *finalResult = NULL;
+    UnicodeString *finalResult = nullptr;
    int32_t resultCount;
    if((resultCount = result.count()) != 0) {
        finalResult = new UnicodeString[resultCount];
-        if (finalResult == 0) {
+        if (finalResult == nullptr) {
            status = U_MEMORY_ALLOCATION_ERROR;
-            return NULL;
+            return nullptr;
        }
    }
    else {
        status = U_ILLEGAL_ARGUMENT_ERROR;
-        return NULL;
+        return nullptr;
    }
    //result.toArray(finalResult);
    result_len = 0;
    el = UHASH_FIRST;
    ne = result.nextElement(el);
-    while(ne != NULL) {
-        finalResult[result_len++] = *((UnicodeString *)(ne->value.pointer));
+    while(ne != nullptr) {
+        finalResult[result_len++] = *static_cast<UnicodeString*>(ne->value.pointer);
        ne = result.nextElement(el);
    }

@@ -429,10 +440,10 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
    return finalResult;
 }

-Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status) {
+Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const char16_t *segment, int32_t segLen, UErrorCode &status) {

    if (U_FAILURE(status)) {
-        return NULL;
+        return nullptr;
    }

    //if (PROGRESS) printf("Adding: %s\n", UToS(Tr(segment)));
@@ -448,7 +459,7 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UCh
    for (int32_t i = 0; i < segLen; i += U16_LENGTH(cp)) {
        // see if any character is at the start of some decomposition
        U16_GET(segment, 0, i, segLen, cp);
-        if (!nfcImpl.getCanonStartSet(cp, starts)) {
+        if (!nfcImpl->getCanonStartSet(cp, starts)) {
            continue;
        }
        // if so, see which decompositions match
@@ -457,7 +468,10 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UCh
            UChar32 cp2 = iter.getCodepoint();
            Hashtable remainder(status);
            remainder.setValueDeleter(uprv_deleteUObject);
-            if (extract(&remainder, cp2, segment, segLen, i, status) == NULL) {
+            if (extract(&remainder, cp2, segment, segLen, i, status) == nullptr) {
+                if (U_FAILURE(status)) {
+                    return nullptr;
+                }
                continue;
            }

@@ -467,13 +481,13 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UCh

            int32_t el = UHASH_FIRST;
            const UHashElement *ne = remainder.nextElement(el);
-            while (ne != NULL) {
-                UnicodeString item = *((UnicodeString *)(ne->value.pointer));
+            while (ne != nullptr) {
+                UnicodeString item = *static_cast<UnicodeString*>(ne->value.pointer);
                UnicodeString *toAdd = new UnicodeString(prefix);
-                /* test for NULL */
-                if (toAdd == 0) {
+                /* test for nullptr */
+                if (toAdd == nullptr) {
                    status = U_MEMORY_ALLOCATION_ERROR;
-                    return NULL;
+                    return nullptr;
                }
                *toAdd += item;
                fillinResult->put(*toAdd, toAdd, status);
@@ -482,46 +496,53 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UCh

                ne = remainder.nextElement(el);
            }
+            // ICU-22642 Guards against strings that have so many permutations
+            // that they would otherwise hang the function.
+            constexpr int32_t kResultLimit = 4096;
+            if (fillinResult->count() > kResultLimit) {
+                status = U_UNSUPPORTED_ERROR;
+                return nullptr;
+            }
        }
    }

    /* Test for buffer overflows */
    if(U_FAILURE(status)) {
-        return NULL;
+        return nullptr;
    }
    return fillinResult;
 }

 /**
 * See if the decomposition of cp2 is at segment starting at segmentPos 
- * (with canonical rearrangment!)
+ * (with canonical rearrangement!)
 * If so, take the remainder, and return the equivalents 
 */
-Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
+Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, const char16_t *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
 //Hashtable *CanonicalIterator::extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
    //if (PROGRESS) printf(" extract: %s, ", UToS(Tr(UnicodeString(comp))));
    //if (PROGRESS) printf("%s, %i\n", UToS(Tr(segment)), segmentPos);

    if (U_FAILURE(status)) {
-        return NULL;
+        return nullptr;
    }

    UnicodeString temp(comp);
    int32_t inputLen=temp.length();
    UnicodeString decompString;
-    nfd.normalize(temp, decompString, status);
+    nfd->normalize(temp, decompString, status);
    if (U_FAILURE(status)) {
-        return NULL;
+        return nullptr;
    }
    if (decompString.isBogus()) {
        status = U_MEMORY_ALLOCATION_ERROR;
-        return NULL;
+        return nullptr;
    }
-    const UChar *decomp=decompString.getBuffer();
+    const char16_t *decomp=decompString.getBuffer();
    int32_t decompLen=decompString.length();

    // See if it matches the start of segment (at segmentPos)
-    UBool ok = FALSE;
+    UBool ok = false;
    UChar32 cp;
    int32_t decompPos = 0;
    UChar32 decompCp;
@@ -537,7 +558,7 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con

            if (decompPos == decompLen) { // done, have all decomp characters!
                temp.append(segment+i, segLen-i);
-                ok = TRUE;
+                ok = true;
                break;
            }
            U16_NEXT(decomp, decompPos, decompLen, decompCp);
@@ -561,7 +582,7 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con
        }
    }
    if (!ok)
-        return NULL; // we failed, characters left over
+        return nullptr; // we failed, characters left over

    //if (PROGRESS) printf("Matches\n");

@@ -573,9 +594,9 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con
    // brute force approach
    // check to make sure result is canonically equivalent
    UnicodeString trial;
-    nfd.normalize(temp, trial, status);
+    nfd->normalize(temp, trial, status);
    if(U_FAILURE(status) || trial.compare(segment+segmentPos, segLen - segmentPos) != 0) {
-        return NULL;
+        return nullptr;
    }

    return getEquivalents2(fillinResult, temp.getBuffer()+inputLen, temp.length()-inputLen, status);
@@ -0,0 +1,97 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#ifndef __CAPI_HELPER_H__
+#define __CAPI_HELPER_H__
+
+#include "unicode/utypes.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * An internal helper class to help convert between C and C++ APIs.
+ */
+template<typename CType, typename CPPType, int32_t kMagic>
+class IcuCApiHelper {
+  public:
+    /**
+     * Convert from the C type to the C++ type (const version).
+     */
+    static const CPPType* validate(const CType* input, UErrorCode& status);
+
+    /**
+     * Convert from the C type to the C++ type (non-const version).
+     */
+    static CPPType* validate(CType* input, UErrorCode& status);
+
+    /**
+     * Convert from the C++ type to the C type (const version).
+     */
+    const CType* exportConstForC() const;
+
+    /**
+     * Convert from the C++ type to the C type (non-const version).
+     */
+    CType* exportForC();
+
+    /**
+     * Invalidates the object.
+     */
+    ~IcuCApiHelper();
+
+  private:
+    /**
+     * While the object is valid, fMagic equals kMagic.
+     */
+    int32_t fMagic = kMagic;
+};
+
+
+template<typename CType, typename CPPType, int32_t kMagic>
+const CPPType*
+IcuCApiHelper<CType, CPPType, kMagic>::validate(const CType* input, UErrorCode& status) {
+    if (U_FAILURE(status)) {
+        return nullptr;
+    }
+    if (input == nullptr) {
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+        return nullptr;
+    }
+    auto* impl = reinterpret_cast<const CPPType*>(input);
+    if (static_cast<const IcuCApiHelper<CType, CPPType, kMagic>*>(impl)->fMagic != kMagic) {
+        status = U_INVALID_FORMAT_ERROR;
+        return nullptr;
+    }
+    return impl;
+}
+
+template<typename CType, typename CPPType, int32_t kMagic>
+CPPType*
+IcuCApiHelper<CType, CPPType, kMagic>::validate(CType* input, UErrorCode& status) {
+    auto* constInput = static_cast<const CType*>(input);
+    auto* validated = validate(constInput, status);
+    return const_cast<CPPType*>(validated);
+}
+
+template<typename CType, typename CPPType, int32_t kMagic>
+const CType*
+IcuCApiHelper<CType, CPPType, kMagic>::exportConstForC() const {
+    return reinterpret_cast<const CType*>(static_cast<const CPPType*>(this));
+}
+
+template<typename CType, typename CPPType, int32_t kMagic>
+CType*
+IcuCApiHelper<CType, CPPType, kMagic>::exportForC() {
+    return reinterpret_cast<CType*>(static_cast<CPPType*>(this));
+}
+
+template<typename CType, typename CPPType, int32_t kMagic>
+IcuCApiHelper<CType, CPPType, kMagic>::~IcuCApiHelper() {
+    // head off application errors by preventing use of of deleted objects.
+    fMagic = 0;
+}
+
+
+U_NAMESPACE_END
+
+#endif // __CAPI_HELPER_H__
@@ -14,6 +14,7 @@
 #include "unicode/uscript.h"
 #include "unicode/uset.h"
 #include "cmemory.h"
+#include "emojiprops.h"
 #include "mutex.h"
 #include "normalizer2impl.h"
 #include "uassert.h"
@@ -24,8 +25,10 @@
 #include "uprops.h"

 using icu::LocalPointer;
+#if !UCONFIG_NO_NORMALIZATION
 using icu::Normalizer2Factory;
 using icu::Normalizer2Impl;
+#endif
 using icu::UInitOnce;
 using icu::UnicodeSet;

@@ -33,11 +36,11 @@ namespace {

 UBool U_CALLCONV characterproperties_cleanup();

-constexpr int32_t NUM_INCLUSIONS = UPROPS_SRC_COUNT + UCHAR_INT_LIMIT - UCHAR_INT_START;
+constexpr int32_t NUM_INCLUSIONS = UPROPS_SRC_COUNT + (UCHAR_INT_LIMIT - UCHAR_INT_START);

 struct Inclusion {
-    UnicodeSet  *fSet;
-    UInitOnce    fInitOnce;
+    UnicodeSet  *fSet = nullptr;
+    UInitOnce    fInitOnce {};
 };
 Inclusion gInclusions[NUM_INCLUSIONS]; // cached getInclusions()

@@ -45,7 +48,7 @@ UnicodeSet *sets[UCHAR_BINARY_LIMIT] = {};

 UCPMap *maps[UCHAR_INT_LIMIT - UCHAR_INT_START] = {};

-UMutex cpMutex = U_MUTEX_INITIALIZER;
+icu::UMutex cpMutex;

 //----------------------------------------------------------------
 // Inclusions list
@@ -55,17 +58,17 @@ UMutex cpMutex = U_MUTEX_INITIALIZER;
 // Does not use uset.h to reduce code dependencies
 void U_CALLCONV
 _set_add(USet *set, UChar32 c) {
-    ((UnicodeSet *)set)->add(c);
+    reinterpret_cast<UnicodeSet*>(set)->add(c);
 }

 void U_CALLCONV
 _set_addRange(USet *set, UChar32 start, UChar32 end) {
-    ((UnicodeSet *)set)->add(start, end);
+    reinterpret_cast<UnicodeSet*>(set)->add(start, end);
 }

 void U_CALLCONV
-_set_addString(USet *set, const UChar *str, int32_t length) {
-    ((UnicodeSet *)set)->add(icu::UnicodeString((UBool)(length<0), str, length));
+_set_addString(USet *set, const char16_t *str, int32_t length) {
+    reinterpret_cast<UnicodeSet*>(set)->add(icu::UnicodeString(static_cast<UBool>(length < 0), str, length));
 }

 UBool U_CALLCONV characterproperties_cleanup() {
@@ -82,7 +85,7 @@ UBool U_CALLCONV characterproperties_cleanup() {
        ucptrie_close(reinterpret_cast<UCPTrie *>(maps[i]));
        maps[i] = nullptr;
    }
-    return TRUE;
+    return true;
 }

 void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) {
@@ -100,7 +103,7 @@ void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) {
        return;
    }
    USetAdder sa = {
-        (USet *)incl.getAlias(),
+        reinterpret_cast<USet*>(incl.getAlias()),
        _set_add,
        _set_addRange,
        _set_addString,
@@ -166,7 +169,26 @@ void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) {
    case UPROPS_SRC_INPC:
    case UPROPS_SRC_INSC:
    case UPROPS_SRC_VO:
-        uprops_addPropertyStarts((UPropertySource)src, &sa, &errorCode);
+        uprops_addPropertyStarts(src, &sa, &errorCode);
+        break;
+    case UPROPS_SRC_EMOJI: {
+        const icu::EmojiProps *ep = icu::EmojiProps::getSingleton(errorCode);
+        if (U_SUCCESS(errorCode)) {
+            ep->addPropertyStarts(&sa, errorCode);
+        }
+        break;
+    }
+    case UPROPS_SRC_IDSU:
+        // New in Unicode 15.1 for just two characters.
+        sa.add(sa.set, 0x2FFE);
+        sa.add(sa.set, 0x2FFF + 1);
+        break;
+    case UPROPS_SRC_ID_COMPAT_MATH:
+    case UPROPS_SRC_MCM:
+        uprops_addPropertyStarts(src, &sa, &errorCode);
+        break;
+    case UPROPS_SRC_BLOCK:
+        ublock_addPropertyStarts(&sa, errorCode);
        break;
    default:
        errorCode = U_INTERNAL_PROGRAM_ERROR;
@@ -200,7 +222,7 @@ const UnicodeSet *getInclusionsForSource(UPropertySource src, UErrorCode &errorC
 void U_CALLCONV initIntPropInclusion(UProperty prop, UErrorCode &errorCode) {
    // This function is invoked only via umtx_initOnce().
    U_ASSERT(UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT);
-    int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START;
+    int32_t inclIndex = UPROPS_SRC_COUNT + (prop - UCHAR_INT_START);
    U_ASSERT(gInclusions[inclIndex].fSet == nullptr);
    UPropertySource src = uprops_getSource(prop);
    const UnicodeSet *incl = getInclusionsForSource(src, errorCode);
@@ -245,7 +267,7 @@ const UnicodeSet *CharacterProperties::getInclusionsForProperty(
        UProperty prop, UErrorCode &errorCode) {
    if (U_FAILURE(errorCode)) { return nullptr; }
    if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) {
-        int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START;
+        int32_t inclIndex = UPROPS_SRC_COUNT + (prop - UCHAR_INT_START);
        Inclusion &i = gInclusions[inclIndex];
        umtx_initOnce(i.fInitOnce, &initIntPropInclusion, prop, errorCode);
        return i.fSet;
@@ -266,6 +288,26 @@ UnicodeSet *makeSet(UProperty property, UErrorCode &errorCode) {
        errorCode = U_MEMORY_ALLOCATION_ERROR;
        return nullptr;
    }
+    if (UCHAR_BASIC_EMOJI <= property && property <= UCHAR_RGI_EMOJI) {
+        // property of strings
+        const icu::EmojiProps *ep = icu::EmojiProps::getSingleton(errorCode);
+        if (U_FAILURE(errorCode)) { return nullptr; }
+        USetAdder sa = {
+            reinterpret_cast<USet*>(set.getAlias()),
+            _set_add,
+            _set_addRange,
+            _set_addString,
+            nullptr, // don't need remove()
+            nullptr // don't need removeRange()
+        };
+        ep->addStrings(&sa, property, errorCode);
+        if (property != UCHAR_BASIC_EMOJI && property != UCHAR_RGI_EMOJI) {
+            // property of _only_ strings
+            set->freeze();
+            return set.orphan();
+        }
+    }
+
    const UnicodeSet *inclusions =
        icu::CharacterProperties::getInclusionsForProperty(property, errorCode);
    if (U_FAILURE(errorCode)) { return nullptr; }
@@ -347,22 +389,30 @@ UCPMap *makeMap(UProperty property, UErrorCode &errorCode) {

 }  // namespace

-U_NAMESPACE_USE
+U_NAMESPACE_BEGIN

-U_CAPI const USet * U_EXPORT2
-u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode) {
-    if (U_FAILURE(*pErrorCode)) { return nullptr; }
+const UnicodeSet *CharacterProperties::getBinaryPropertySet(UProperty property, UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return nullptr; }
    if (property < 0 || UCHAR_BINARY_LIMIT <= property) {
-        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
        return nullptr;
    }
    Mutex m(&cpMutex);
    UnicodeSet *set = sets[property];
    if (set == nullptr) {
-        sets[property] = set = makeSet(property, *pErrorCode);
+        sets[property] = set = makeSet(property, errorCode);
    }
-    if (U_FAILURE(*pErrorCode)) { return nullptr; }
-    return set->toUSet();
+    return set;
+}
+
+U_NAMESPACE_END
+
+U_NAMESPACE_USE
+
+U_CAPI const USet * U_EXPORT2
+u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode) {
+    const UnicodeSet *set = CharacterProperties::getBinaryPropertySet(property, *pErrorCode);
+    return U_SUCCESS(*pErrorCode) ? set->toUSet() : nullptr;
 }

 U_CAPI const UCPMap * U_EXPORT2
@@ -85,14 +85,14 @@ CharacterIterator::operator=(const CharacterIterator &that) {

 // implementing first[32]PostInc() directly in a subclass should be faster
 // but these implementations make subclassing a little easier
-UChar
-CharacterIterator::firstPostInc(void) {
+char16_t
+CharacterIterator::firstPostInc() {
    setToStart();
    return nextPostInc();
 }

 UChar32
-CharacterIterator::first32PostInc(void) {
+CharacterIterator::first32PostInc() {
    setToStart();
    return next32PostInc();
 }
@@ -14,27 +14,54 @@
 *   created by: Markus W. Scherer
 */

+#include <cstdlib>
+
 #include "unicode/utypes.h"
 #include "unicode/putil.h"
 #include "charstr.h"
 #include "cmemory.h"
 #include "cstring.h"
 #include "uinvchar.h"
+#include "ustr_imp.h"

 U_NAMESPACE_BEGIN

-CharString::CharString(CharString&& src) U_NOEXCEPT
+CharString::CharString(CharString&& src) noexcept
        : buffer(std::move(src.buffer)), len(src.len) {
    src.len = 0;  // not strictly necessary because we make no guarantees on the source string
 }

-CharString& CharString::operator=(CharString&& src) U_NOEXCEPT {
+CharString& CharString::operator=(CharString&& src) noexcept {
    buffer = std::move(src.buffer);
    len = src.len;
    src.len = 0;  // not strictly necessary because we make no guarantees on the source string
    return *this;
 }

+char *CharString::cloneData(UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return nullptr; }
+    char *p = static_cast<char *>(uprv_malloc(len + 1));
+    if (p == nullptr) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        return nullptr;
+    }
+    uprv_memcpy(p, buffer.getAlias(), len + 1);
+    return p;
+}
+
+int32_t CharString::extract(char *dest, int32_t capacity, UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return len; }
+    if (capacity < 0 || (capacity > 0 && dest == nullptr)) {
+        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+        return len;
+    }
+    const char *src = buffer.getAlias();
+    if (0 < len && len <= capacity && src != dest) {
+        uprv_memcpy(dest, src, len);
+    }
+    return u_terminateChars(dest, capacity, len, &errorCode);
+}
+
 CharString &CharString::copyFrom(const CharString &s, UErrorCode &errorCode) {
    if(U_SUCCESS(errorCode) && this!=&s && ensureCapacity(s.len+1, 0, errorCode)) {
        len=s.len;
@@ -43,6 +70,15 @@ CharString &CharString::copyFrom(const CharString &s, UErrorCode &errorCode) {
    return *this;
 }

+CharString &CharString::copyFrom(StringPiece s, UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) {
+        return *this;
+    }
+    len = 0;
+    append(s, errorCode);
+    return *this;
+}
+
 int32_t CharString::lastIndexOf(char c) const {
    for(int32_t i=len; i>0;) {
        if(buffer[--i]==c) {
@@ -52,6 +88,18 @@ int32_t CharString::lastIndexOf(char c) const {
    return -1;
 }

+bool CharString::contains(StringPiece s) const {
+    if (s.empty()) { return false; }
+    const char *p = buffer.getAlias();
+    int32_t lastStart = len - s.length();
+    for (int32_t i = 0; i <= lastStart; ++i) {
+        if (uprv_memcmp(p + i, s.data(), s.length()) == 0) {
+            return true;
+        }
+    }
+    return false;
+}
+
 CharString &CharString::truncate(int32_t newLength) {
    if(newLength<0) {
        newLength=0;
@@ -74,7 +122,7 @@ CharString &CharString::append(const char *s, int32_t sLength, UErrorCode &error
    if(U_FAILURE(errorCode)) {
        return *this;
    }
-    if(sLength<-1 || (s==NULL && sLength!=0)) {
+    if(sLength<-1 || (s==nullptr && sLength!=0)) {
        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return *this;
    }
@@ -104,13 +152,45 @@ CharString &CharString::append(const char *s, int32_t sLength, UErrorCode &error
    return *this;
 }

+CharString &CharString::appendNumber(int64_t number, UErrorCode &status) {
+    if (number < 0) {
+        this->append('-', status);
+        if (U_FAILURE(status)) {
+            return *this;
+        }
+    }
+
+    if (number == 0) {
+        this->append('0', status);
+        return *this;
+    }
+
+    int32_t numLen = 0;
+    while (number != 0) {
+        int32_t residue = number % 10;
+        number /= 10;
+        this->append(std::abs(residue) + '0', status);
+        numLen++;
+        if (U_FAILURE(status)) {
+            return *this;
+        }
+    }
+
+    int32_t start = this->length() - numLen, end = this->length() - 1;
+    while(start < end) {
+        std::swap(this->data()[start++], this->data()[end--]);
+    }
+
+    return *this;
+}
+
 char *CharString::getAppendBuffer(int32_t minCapacity,
                                  int32_t desiredCapacityHint,
                                  int32_t &resultCapacity,
                                  UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) {
        resultCapacity=0;
-        return NULL;
+        return nullptr;
    }
    int32_t appendCapacity=buffer.getCapacity()-len-1;  // -1 for NUL
    if(appendCapacity>=minCapacity) {
@@ -122,14 +202,14 @@ char *CharString::getAppendBuffer(int32_t minCapacity,
        return buffer.getAlias()+len;
    }
    resultCapacity=0;
-    return NULL;
+    return nullptr;
 }

 CharString &CharString::appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode) {
    return appendInvariantChars(s.getBuffer(), s.length(), errorCode);
 }

-CharString &CharString::appendInvariantChars(const UChar* uchars, int32_t ucharsLen, UErrorCode &errorCode) {
+CharString &CharString::appendInvariantChars(const char16_t* uchars, int32_t ucharsLen, UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) {
        return *this;
    }
@@ -149,20 +229,20 @@ UBool CharString::ensureCapacity(int32_t capacity,
                                 int32_t desiredCapacityHint,
                                 UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) {
-        return FALSE;
+        return false;
    }
    if(capacity>buffer.getCapacity()) {
        if(desiredCapacityHint==0) {
            desiredCapacityHint=capacity+buffer.getCapacity();
        }
-        if( (desiredCapacityHint<=capacity || buffer.resize(desiredCapacityHint, len+1)==NULL) &&
-            buffer.resize(capacity, len+1)==NULL
+        if( (desiredCapacityHint<=capacity || buffer.resize(desiredCapacityHint, len+1)==nullptr) &&
+            buffer.resize(capacity, len+1)==nullptr
        ) {
            errorCode=U_MEMORY_ALLOCATION_ERROR;
-            return FALSE;
+            return false;
        }
    }
-    return TRUE;
+    return true;
 }

 CharString &CharString::appendPathPart(StringPiece s, UErrorCode &errorCode) {
@@ -174,7 +254,7 @@ CharString &CharString::appendPathPart(StringPiece s, UErrorCode &errorCode) {
    }
    char c;
    if(len>0 && (c=buffer[len-1])!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) {
-        append(U_FILE_SEP_CHAR, errorCode);
+        append(getDirSepChar(), errorCode);
    }
    append(s, errorCode);
    return *this;
@@ -184,9 +264,19 @@ CharString &CharString::ensureEndsWithFileSeparator(UErrorCode &errorCode) {
    char c;
    if(U_SUCCESS(errorCode) && len>0 &&
            (c=buffer[len-1])!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) {
-        append(U_FILE_SEP_CHAR, errorCode);
+        append(getDirSepChar(), errorCode);
    }
    return *this;
 }

+char CharString::getDirSepChar() const {
+    char dirSepChar = U_FILE_SEP_CHAR;
+#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
+    // We may need to return a different directory separator when building for Cygwin or MSYS2.
+    if(len>0 && !uprv_strchr(data(), U_FILE_SEP_CHAR) && uprv_strchr(data(), U_FILE_ALT_SEP_CHAR))
+        dirSepChar = U_FILE_ALT_SEP_CHAR;
+#endif
+    return dirSepChar;
+}
+
 U_NAMESPACE_END
@@ -21,12 +21,6 @@

 U_NAMESPACE_BEGIN

-// Windows needs us to DLL-export the MaybeStackArray template specialization,
-// but MacOS X cannot handle it. Same as in digitlst.h.
-#if !U_PLATFORM_IS_DARWIN_BASED
-template class U_COMMON_API MaybeStackArray<char, 40>;
-#endif
-
 /**
 * ICU-internal char * string class.
 * This class does not assume or enforce any particular character encoding.
@@ -38,34 +32,34 @@ template class U_COMMON_API MaybeStackArray<char, 40>;
 * For example:
 *   cs.data()[5]='a';  // no need for setCharAt(5, 'a')
 */
-class U_COMMON_API CharString : public UMemory {
+class U_COMMON_API_CLASS CharString : public UMemory {
 public:
-    CharString() : len(0) { buffer[0]=0; }
-    CharString(StringPiece s, UErrorCode &errorCode) : len(0) {
+    U_COMMON_API CharString() : len(0) { buffer[0]=0; }
+    U_COMMON_API CharString(StringPiece s, UErrorCode &errorCode) : len(0) {
        buffer[0]=0;
        append(s, errorCode);
    }
-    CharString(const CharString &s, UErrorCode &errorCode) : len(0) {
+    U_COMMON_API CharString(const CharString &s, UErrorCode &errorCode) : len(0) {
        buffer[0]=0;
        append(s, errorCode);
    }
-    CharString(const char *s, int32_t sLength, UErrorCode &errorCode) : len(0) {
+    U_COMMON_API CharString(const char *s, int32_t sLength, UErrorCode &errorCode) : len(0) {
        buffer[0]=0;
        append(s, sLength, errorCode);
    }
-    ~CharString() {}
+    U_COMMON_API ~CharString() {}

    /**
     * Move constructor; might leave src in an undefined state.
     * This string will have the same contents and state that the source string had.
     */
-    CharString(CharString &&src) U_NOEXCEPT;
+    U_COMMON_API CharString(CharString &&src) noexcept;
    /**
     * Move assignment operator; might leave src in an undefined state.
     * This string will have the same contents and state that the source string had.
     * The behavior is undefined if *this and src are the same object.
     */
-    CharString &operator=(CharString &&src) U_NOEXCEPT;
+    U_COMMON_API CharString &operator=(CharString &&src) noexcept;

    /**
     * Replaces this string's contents with the other string's contents.
@@ -73,30 +67,71 @@ public:
     * the assignment operator, to make copies explicit and to
     * use a UErrorCode where memory allocations might be needed.
     */
-    CharString &copyFrom(const CharString &other, UErrorCode &errorCode);
+    U_COMMON_API CharString &copyFrom(const CharString &other, UErrorCode &errorCode);
+    U_COMMON_API CharString &copyFrom(StringPiece s, UErrorCode &errorCode);

-    UBool isEmpty() const { return len==0; }
-    int32_t length() const { return len; }
-    char operator[](int32_t index) const { return buffer[index]; }
-    StringPiece toStringPiece() const { return StringPiece(buffer.getAlias(), len); }
+    U_COMMON_API UBool isEmpty() const { return len==0; }
+    U_COMMON_API int32_t length() const { return len; }
+    U_COMMON_API char operator[](int32_t index) const { return buffer[index]; }
+    U_COMMON_API StringPiece toStringPiece() const { return StringPiece(buffer.getAlias(), len); }

-    const char *data() const { return buffer.getAlias(); }
-    char *data() { return buffer.getAlias(); }
+    U_COMMON_API const char *data() const { return buffer.getAlias(); }
+    U_COMMON_API char *data() { return buffer.getAlias(); }
+    /**
+     * Allocates length()+1 chars and copies the NUL-terminated data().
+     * The caller must uprv_free() the result.
+     */
+    U_COMMON_API char *cloneData(UErrorCode &errorCode) const;
+    /**
+     * Copies the contents of the string into dest.
+     * Checks if there is enough space in dest, extracts the entire string if possible,
+     * and NUL-terminates dest if possible.
+     *
+     * If the string fits into dest but cannot be NUL-terminated (length()==capacity),
+     * then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
+     * If the string itself does not fit into dest (length()>capacity),
+     * then the error code is set to U_BUFFER_OVERFLOW_ERROR.
+     *
+     * @param dest Destination string buffer.
+     * @param capacity Size of the dest buffer (number of chars).
+     * @param errorCode ICU error code.
+     * @return length()
+     */
+    U_COMMON_API int32_t extract(char *dest, int32_t capacity, UErrorCode &errorCode) const;
+
+    U_COMMON_API bool operator==(const CharString& other) const {
+        return len == other.length() && (len == 0 || uprv_memcmp(data(), other.data(), len) == 0);
+    }
+    U_COMMON_API bool operator!=(const CharString& other) const {
+        return !operator==(other);
+    }
+
+    U_COMMON_API bool operator==(StringPiece other) const {
+        return len == other.length() && (len == 0 || uprv_memcmp(data(), other.data(), len) == 0);
+    }
+    U_COMMON_API bool operator!=(StringPiece other) const {
+        return !operator==(other);
+    }

    /** @return last index of c, or -1 if c is not in this string */
-    int32_t lastIndexOf(char c) const;
+    U_COMMON_API int32_t lastIndexOf(char c) const;

-    CharString &clear() { len=0; buffer[0]=0; return *this; }
-    CharString &truncate(int32_t newLength);
+    U_COMMON_API bool contains(StringPiece s) const;

-    CharString &append(char c, UErrorCode &errorCode);
-    CharString &append(StringPiece s, UErrorCode &errorCode) {
+    U_COMMON_API CharString &clear() { len=0; buffer[0]=0; return *this; }
+    U_COMMON_API CharString &truncate(int32_t newLength);
+
+    U_COMMON_API CharString &append(char c, UErrorCode &errorCode);
+    U_COMMON_API CharString &append(StringPiece s, UErrorCode &errorCode) {
        return append(s.data(), s.length(), errorCode);
    }
-    CharString &append(const CharString &s, UErrorCode &errorCode) {
+    U_COMMON_API CharString &append(const CharString &s, UErrorCode &errorCode) {
        return append(s.data(), s.length(), errorCode);
    }
-    CharString &append(const char *s, int32_t sLength, UErrorCode &status);
+    U_COMMON_API CharString &append(const char *s, int32_t sLength, UErrorCode &status);
+
+    U_COMMON_API CharString &appendNumber(int64_t number, UErrorCode &status);
+
    /**
     * Returns a writable buffer for appending and writes the buffer's capacity to
     * resultCapacity. Guarantees resultCapacity>=minCapacity if U_SUCCESS().
@@ -117,26 +152,28 @@ public:
     * @param errorCode in/out error code
     * @return a buffer with resultCapacity>=min_capacity
     */
-    char *getAppendBuffer(int32_t minCapacity,
-                          int32_t desiredCapacityHint,
-                          int32_t &resultCapacity,
-                          UErrorCode &errorCode);
+    U_COMMON_API char *getAppendBuffer(int32_t minCapacity,
+                                       int32_t desiredCapacityHint,
+                                       int32_t &resultCapacity,
+                                       UErrorCode &errorCode);

-    CharString &appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode);
-    CharString &appendInvariantChars(const UChar* uchars, int32_t ucharsLen, UErrorCode& errorCode);
+    U_COMMON_API CharString &appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode);
+    U_COMMON_API CharString &appendInvariantChars(const char16_t* uchars,
+                                                  int32_t ucharsLen,
+                                                  UErrorCode& errorCode);

    /**
     * Appends a filename/path part, e.g., a directory name.
-     * First appends a U_FILE_SEP_CHAR if necessary.
+     * First appends a U_FILE_SEP_CHAR or U_FILE_ALT_SEP_CHAR if necessary.
     * Does nothing if s is empty.
     */
-    CharString &appendPathPart(StringPiece s, UErrorCode &errorCode);
+    U_COMMON_API CharString &appendPathPart(StringPiece s, UErrorCode &errorCode);

    /**
-     * Appends a U_FILE_SEP_CHAR if this string is not empty
+     * Appends a U_FILE_SEP_CHAR or U_FILE_ALT_SEP_CHAR if this string is not empty
     * and does not already end with a U_FILE_SEP_CHAR or U_FILE_ALT_SEP_CHAR.
     */
-    CharString &ensureEndsWithFileSeparator(UErrorCode &errorCode);
+    U_COMMON_API CharString &ensureEndsWithFileSeparator(UErrorCode &errorCode);

 private:
    MaybeStackArray<char, 40> buffer;
@@ -144,8 +181,14 @@ private:

    UBool ensureCapacity(int32_t capacity, int32_t desiredCapacityHint, UErrorCode &errorCode);

-    CharString(const CharString &other); // forbid copying of this class
-    CharString &operator=(const CharString &other); // forbid copying of this class
+    CharString(const CharString &other) = delete; // forbid copying of this class
+    CharString &operator=(const CharString &other) = delete; // forbid copying of this class
+
+    /**
+     * Returns U_FILE_ALT_SEP_CHAR if found in string, and U_FILE_SEP_CHAR is not found.
+     * Otherwise returns U_FILE_SEP_CHAR.
+     */
+    char getDirSepChar() const;
 };

 U_NAMESPACE_END
@@ -0,0 +1,55 @@
+// © 2020 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// charstrmap.h
+// created: 2020sep01 Frank Yung-Fong Tang
+
+#ifndef __CHARSTRMAP_H__
+#define __CHARSTRMAP_H__
+
+#include <utility>
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "uhash.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Map of const char * keys & values.
+ * Stores pointers as is: Does not own/copy/adopt/release strings.
+ */
+class CharStringMap final : public UMemory {
+public:
+    /** Constructs an unusable non-map. */
+    CharStringMap() : map(nullptr) {}
+    CharStringMap(int32_t size, UErrorCode &errorCode) {
+        map = uhash_openSize(uhash_hashChars, uhash_compareChars, uhash_compareChars,
+                             size, &errorCode);
+    }
+    CharStringMap(CharStringMap &&other) noexcept : map(other.map) {
+        other.map = nullptr;
+    }
+    CharStringMap(const CharStringMap &other) = delete;
+    ~CharStringMap() {
+        uhash_close(map);
+    }
+
+    CharStringMap &operator=(CharStringMap &&other) noexcept {
+        map = other.map;
+        other.map = nullptr;
+        return *this;
+    }
+    CharStringMap &operator=(const CharStringMap &other) = delete;
+
+    const char *get(const char *key) const { return static_cast<const char *>(uhash_get(map, key)); }
+    void put(const char *key, const char *value, UErrorCode &errorCode) {
+        uhash_put(map, const_cast<char *>(key), const_cast<char *>(value), &errorCode);
+    }
+
+private:
+    UHashtable *map;
+};
+
+U_NAMESPACE_END
+
+#endif  //  __CHARSTRMAP_H__
@@ -15,8 +15,8 @@
 *                     If you have a need to replace ICU allocation, this is the
 *                     place to do it.
 *
-*                     Note that uprv_malloc(0) returns a non-NULL pointer, and
-*                     that a subsequent free of that pointer value is a NOP.
+*                     Note that uprv_malloc(0) returns a non-nullptr pointer,
+*                     and that a subsequent free of that pointer value is a NOP.
 *
 ******************************************************************************
 */
@@ -103,7 +103,7 @@ uprv_free(void *buffer) {

 U_CAPI void * U_EXPORT2
 uprv_calloc(size_t num, size_t size) {
-    void *mem = NULL;
+    void *mem = nullptr;
    size *= num;
    mem = uprv_malloc(size);
    if (mem) {
@@ -118,7 +118,7 @@ u_setMemoryFunctions(const void *context, UMemAllocFn *a, UMemReallocFn *r, UMem
    if (U_FAILURE(*status)) {
        return;
    }
-    if (a==NULL || r==NULL || f==NULL) {
+    if (a==nullptr || r==nullptr || f==nullptr) {
        *status = U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }
@@ -129,10 +129,10 @@ u_setMemoryFunctions(const void *context, UMemAllocFn *a, UMemReallocFn *r, UMem
 }


-U_CFUNC UBool cmemory_cleanup(void) {
-    pContext   = NULL;
-    pAlloc     = NULL;
-    pRealloc   = NULL;
-    pFree      = NULL;
-    return TRUE;
+U_CFUNC UBool cmemory_cleanup() {
+    pContext   = nullptr;
+    pAlloc     = nullptr;
+    pRealloc   = nullptr;
+    pFree      = nullptr;
+    return true;
 }
@@ -31,14 +31,63 @@
 #include <stddef.h>
 #include <string.h>
 #include "unicode/localpointer.h"
+#include "uassert.h"

 #if U_DEBUG && defined(UPRV_MALLOC_COUNT)
 #include <stdio.h>
 #endif

-
-#define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size)
-#define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size)
+// uprv_memcpy and uprv_memmove
+#if defined(__clang__)
+#define uprv_memcpy(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
+    /* Suppress warnings about addresses that will never be NULL */ \
+    _Pragma("clang diagnostic push") \
+    _Pragma("clang diagnostic ignored \"-Waddress\"") \
+    U_ASSERT(dst != NULL); \
+    U_ASSERT(src != NULL); \
+    _Pragma("clang diagnostic pop") \
+    U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size); \
+} UPRV_BLOCK_MACRO_END
+#define uprv_memmove(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
+    /* Suppress warnings about addresses that will never be NULL */ \
+    _Pragma("clang diagnostic push") \
+    _Pragma("clang diagnostic ignored \"-Waddress\"") \
+    U_ASSERT(dst != NULL); \
+    U_ASSERT(src != NULL); \
+    _Pragma("clang diagnostic pop") \
+    U_STANDARD_CPP_NAMESPACE memmove(dst, src, size); \
+} UPRV_BLOCK_MACRO_END
+#elif defined(__GNUC__)
+#define uprv_memcpy(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
+    /* Suppress warnings about addresses that will never be NULL */ \
+    _Pragma("GCC diagnostic push") \
+    _Pragma("GCC diagnostic ignored \"-Waddress\"") \
+    U_ASSERT(dst != NULL); \
+    U_ASSERT(src != NULL); \
+    _Pragma("GCC diagnostic pop") \
+    U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size); \
+} UPRV_BLOCK_MACRO_END
+#define uprv_memmove(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
+    /* Suppress warnings about addresses that will never be NULL */ \
+    _Pragma("GCC diagnostic push") \
+    _Pragma("GCC diagnostic ignored \"-Waddress\"") \
+    U_ASSERT(dst != NULL); \
+    U_ASSERT(src != NULL); \
+    _Pragma("GCC diagnostic pop") \
+    U_STANDARD_CPP_NAMESPACE memmove(dst, src, size); \
+} UPRV_BLOCK_MACRO_END
+#else
+#define uprv_memcpy(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
+    U_ASSERT(dst != NULL); \
+    U_ASSERT(src != NULL); \
+    U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size); \
+} UPRV_BLOCK_MACRO_END
+#define uprv_memmove(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
+    U_ASSERT(dst != NULL); \
+    U_ASSERT(src != NULL); \
+    U_STANDARD_CPP_NAMESPACE memmove(dst, src, size); \
+} UPRV_BLOCK_MACRO_END
+#endif

 /**
 * \def UPRV_LENGTHOF
@@ -50,6 +99,7 @@
 #define UPRV_LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
 #define uprv_memset(buffer, mark, size) U_STANDARD_CPP_NAMESPACE memset(buffer, mark, size)
 #define uprv_memcmp(buffer1, buffer2, size) U_STANDARD_CPP_NAMESPACE memcmp(buffer1, buffer2,size)
+#define uprv_memchr(ptr, value, num) U_STANDARD_CPP_NAMESPACE memchr(ptr, value, num)

 U_CAPI void * U_EXPORT2
 uprv_malloc(size_t s) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR(1);
@@ -63,38 +113,37 @@ uprv_free(void *mem);
 U_CAPI void * U_EXPORT2
 uprv_calloc(size_t num, size_t size) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR2(1,2);

-/**
- * This should align the memory properly on any machine.
- * This is very useful for the safeClone functions.
- */
-typedef union {
-    long    t1;
-    double  t2;
-    void   *t3;
-} UAlignedMemory;
-
 /**
 * Get the least significant bits of a pointer (a memory address).
 * For example, with a mask of 3, the macro gets the 2 least significant bits,
 * which will be 0 if the pointer is 32-bit (4-byte) aligned.
 *
- * ptrdiff_t is the most appropriate integer type to cast to.
- * size_t should work too, since on most (or all?) platforms it has the same
- * width as ptrdiff_t.
+ * uintptr_t is the most appropriate integer type to cast to.
 */
-#define U_POINTER_MASK_LSB(ptr, mask) (((ptrdiff_t)(char *)(ptr)) & (mask))
+#define U_POINTER_MASK_LSB(ptr, mask) ((uintptr_t)(ptr) & (mask))

 /**
- * Get the amount of bytes that a pointer is off by from
- * the previous UAlignedMemory-aligned pointer.
+ * Create & return an instance of "type" in statically allocated storage.
+ * e.g.
+ *    static std::mutex *myMutex = STATIC_NEW(std::mutex);
+ * To destroy an object created in this way, invoke the destructor explicitly, e.g.
+ *    myMutex->~mutex();
+ * DO NOT use delete.
+ * DO NOT use with class UMutex, which has specific support for static instances.
+ *
+ * STATIC_NEW is intended for use when
+ *   - We want a static (or global) object.
+ *   - We don't want it to ever be destructed, or to explicitly control destruction,
+ *     to avoid use-after-destruction problems.
+ *   - We want to avoid an ordinary heap allocated object,
+ *     to avoid the possibility of memory allocation failures, and
+ *     to avoid memory leak reports, from valgrind, for example.
+ * This is defined as a macro rather than a template function because each invocation
+ * must define distinct static storage for the object being returned.
 */
-#define U_ALIGNMENT_OFFSET(ptr) U_POINTER_MASK_LSB(ptr, sizeof(UAlignedMemory) - 1)
-
-/**
- * Get the amount of bytes to add to a pointer
- * in order to get the next UAlignedMemory-aligned address.
- */
-#define U_ALIGNMENT_OFFSET_UP(ptr) (sizeof(UAlignedMemory) - U_ALIGNMENT_OFFSET(ptr))
+#define STATIC_NEW(type) [] () { \
+    alignas(type) static char storage[sizeof(type)]; \
+    return new(storage) type();} ()

 /**
  *  Heap clean up function, called from u_cleanup()
@@ -122,6 +171,9 @@ uprv_deleteUObject(void *obj);

 #ifdef __cplusplus

+#include <utility>
+#include "unicode/uobject.h"
+
 U_NAMESPACE_BEGIN

 /**
@@ -140,13 +192,13 @@ public:
     * Constructor takes ownership.
     * @param p simple pointer to an array of T items that is adopted
     */
-    explicit LocalMemory(T *p=NULL) : LocalPointerBase<T>(p) {}
+    explicit LocalMemory(T *p=nullptr) : LocalPointerBase<T>(p) {}
    /**
     * Move constructor, leaves src with isNull().
     * @param src source smart pointer
     */
-    LocalMemory(LocalMemory<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
-        src.ptr=NULL;
+    LocalMemory(LocalMemory<T> &&src) noexcept : LocalPointerBase<T>(src.ptr) {
+        src.ptr=nullptr;
    }
    /**
     * Destructor deletes the memory it owns.
@@ -160,28 +212,17 @@ public:
     * @param src source smart pointer
     * @return *this
     */
-    LocalMemory<T> &operator=(LocalMemory<T> &&src) U_NOEXCEPT {
-        return moveFrom(src);
-    }
-    /**
-     * Move assignment, leaves src with isNull().
-     * The behavior is undefined if *this and src are the same object.
-     *
-     * Can be called explicitly, does not need C++11 support.
-     * @param src source smart pointer
-     * @return *this
-     */
-    LocalMemory<T> &moveFrom(LocalMemory<T> &src) U_NOEXCEPT {
+    LocalMemory<T> &operator=(LocalMemory<T> &&src) noexcept {
        uprv_free(LocalPointerBase<T>::ptr);
        LocalPointerBase<T>::ptr=src.ptr;
-        src.ptr=NULL;
+        src.ptr=nullptr;
        return *this;
    }
    /**
     * Swap pointers.
     * @param other other smart pointer
     */
-    void swap(LocalMemory<T> &other) U_NOEXCEPT {
+    void swap(LocalMemory<T> &other) noexcept {
        T *temp=LocalPointerBase<T>::ptr;
        LocalPointerBase<T>::ptr=other.ptr;
        other.ptr=temp;
@@ -191,7 +232,7 @@ public:
     * @param p1 will get p2's pointer
     * @param p2 will get p1's pointer
     */
-    friend inline void swap(LocalMemory<T> &p1, LocalMemory<T> &p2) U_NOEXCEPT {
+    friend inline void swap(LocalMemory<T> &p1, LocalMemory<T> &p2) noexcept {
        p1.swap(p2);
    }
    /**
@@ -207,21 +248,21 @@ public:
     * Deletes the array it owns, allocates a new one and reset its bytes to 0.
     * Returns the new array pointer.
     * If the allocation fails, then the current array is unchanged and
-     * this method returns NULL.
+     * this method returns nullptr.
     * @param newCapacity must be >0
-     * @return the allocated array pointer, or NULL if the allocation failed
+     * @return the allocated array pointer, or nullptr if the allocation failed
     */
    inline T *allocateInsteadAndReset(int32_t newCapacity=1);
    /**
     * Deletes the array it owns and allocates a new one, copying length T items.
     * Returns the new array pointer.
     * If the allocation fails, then the current array is unchanged and
-     * this method returns NULL.
+     * this method returns nullptr.
     * @param newCapacity must be >0
     * @param length number of T items to be copied from the old array to the new one;
     *               must be no more than the capacity of the old array,
     *               which the caller must track because the LocalMemory does not track it
-     * @return the allocated array pointer, or NULL if the allocation failed
+     * @return the allocated array pointer, or nullptr if the allocation failed
     */
    inline T *allocateInsteadAndCopy(int32_t newCapacity=1, int32_t length=0);
    /**
@@ -237,14 +278,14 @@ template<typename T>
 inline T *LocalMemory<T>::allocateInsteadAndReset(int32_t newCapacity) {
    if(newCapacity>0) {
        T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
-        if(p!=NULL) {
+        if(p!=nullptr) {
            uprv_memset(p, 0, newCapacity*sizeof(T));
            uprv_free(LocalPointerBase<T>::ptr);
            LocalPointerBase<T>::ptr=p;
        }
        return p;
    } else {
-        return NULL;
+        return nullptr;
    }
 }

@@ -253,7 +294,7 @@ template<typename T>
 inline T *LocalMemory<T>::allocateInsteadAndCopy(int32_t newCapacity, int32_t length) {
    if(newCapacity>0) {
        T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
-        if(p!=NULL) {
+        if(p!=nullptr) {
            if(length>0) {
                if(length>newCapacity) {
                    length=newCapacity;
@@ -265,7 +306,7 @@ inline T *LocalMemory<T>::allocateInsteadAndCopy(int32_t newCapacity, int32_t le
        }
        return p;
    } else {
-        return NULL;
+        return nullptr;
    }
 }

@@ -282,23 +323,38 @@ inline T *LocalMemory<T>::allocateInsteadAndCopy(int32_t newCapacity, int32_t le
 *
 * WARNING: MaybeStackArray only works with primitive (plain-old data) types.
 * It does NOT know how to call a destructor! If you work with classes with
- * destructors, consider LocalArray in localpointer.h.
+ * destructors, consider:
+ *
+ * - LocalArray in localpointer.h if you know the length ahead of time
+ * - MaybeStackVector if you know the length at runtime
 */
 template<typename T, int32_t stackCapacity>
 class MaybeStackArray {
 public:
+    // No heap allocation. Use only on the stack.
+    static void* U_EXPORT2 operator new(size_t) noexcept = delete;
+    static void* U_EXPORT2 operator new[](size_t) noexcept = delete;
+    static void* U_EXPORT2 operator new(size_t, void*) noexcept = delete;
+
    /**
     * Default constructor initializes with internal T[stackCapacity] buffer.
     */
-    MaybeStackArray() : ptr(stackArray), capacity(stackCapacity), needToRelease(FALSE) {}
+    MaybeStackArray() : ptr(stackArray), capacity(stackCapacity), needToRelease(false) {}
    /**
     * Automatically allocates the heap array if the argument is larger than the stack capacity.
     * Intended for use when an approximate capacity is known at compile time but the true
     * capacity is not known until runtime.
     */
-    MaybeStackArray(int32_t newCapacity) : MaybeStackArray() {
-        if (capacity < newCapacity) { resize(newCapacity); }
-    };
+    MaybeStackArray(int32_t newCapacity, UErrorCode status) : MaybeStackArray() {
+        if (U_FAILURE(status)) {
+            return;
+        }
+        if (capacity < newCapacity) {
+            if (resize(newCapacity) == nullptr) {
+                status = U_MEMORY_ALLOCATION_ERROR;
+            }
+        }
+    }
    /**
     * Destructor deletes the array (if owned).
     */
@@ -306,11 +362,11 @@ public:
    /**
     * Move constructor: transfers ownership or copies the stack array.
     */
-    MaybeStackArray(MaybeStackArray<T, stackCapacity> &&src) U_NOEXCEPT;
+    MaybeStackArray(MaybeStackArray<T, stackCapacity> &&src) noexcept;
    /**
     * Move assignment: transfers ownership or copies the stack array.
     */
-    MaybeStackArray<T, stackCapacity> &operator=(MaybeStackArray<T, stackCapacity> &&src) U_NOEXCEPT;
+    MaybeStackArray<T, stackCapacity> &operator=(MaybeStackArray<T, stackCapacity> &&src) noexcept;
    /**
     * Returns the array capacity (number of T items).
     * @return array capacity
@@ -345,32 +401,32 @@ public:
    /**
     * Deletes the array (if owned) and aliases another one, no transfer of ownership.
     * If the arguments are illegal, then the current array is unchanged.
-     * @param otherArray must not be NULL
+     * @param otherArray must not be nullptr
     * @param otherCapacity must be >0
     */
    void aliasInstead(T *otherArray, int32_t otherCapacity) {
-        if(otherArray!=NULL && otherCapacity>0) {
+        if(otherArray!=nullptr && otherCapacity>0) {
            releaseArray();
            ptr=otherArray;
            capacity=otherCapacity;
-            needToRelease=FALSE;
+            needToRelease=false;
        }
    }
    /**
     * Deletes the array (if owned) and allocates a new one, copying length T items.
     * Returns the new array pointer.
     * If the allocation fails, then the current array is unchanged and
-     * this method returns NULL.
+     * this method returns nullptr.
     * @param newCapacity can be less than or greater than the current capacity;
     *                    must be >0
     * @param length number of T items to be copied from the old array to the new one
-     * @return the allocated array pointer, or NULL if the allocation failed
+     * @return the allocated array pointer, or nullptr if the allocation failed
     */
    inline T *resize(int32_t newCapacity, int32_t length=0);
    /**
     * Gives up ownership of the array if owned, or else clones it,
     * copying length T items; resets itself to the internal stack array.
-     * Returns NULL if the allocation failed.
+     * Returns nullptr if the allocation failed.
     * @param length number of T items to copy when cloning,
     *        and capacity of the clone when cloning
     * @param resultCapacity will be set to the returned array's capacity (output-only)
@@ -378,6 +434,20 @@ public:
     *         caller becomes responsible for deleting the array
     */
    inline T *orphanOrClone(int32_t length, int32_t &resultCapacity);
+
+protected:
+    // Resizes the array to the size of src, then copies the contents of src.
+    void copyFrom(const MaybeStackArray &src, UErrorCode &status) {
+        if (U_FAILURE(status)) {
+            return;
+        }
+        if (this->resize(src.capacity, 0) == nullptr) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+        uprv_memcpy(this->ptr, src.ptr, (size_t)capacity * sizeof(T));
+    }
+
 private:
    T *ptr;
    int32_t capacity;
@@ -391,33 +461,19 @@ private:
    void resetToStackArray() {
        ptr=stackArray;
        capacity=stackCapacity;
-        needToRelease=FALSE;
+        needToRelease=false;
    }
    /* No comparison operators with other MaybeStackArray's. */
-    bool operator==(const MaybeStackArray & /*other*/) {return FALSE;}
-    bool operator!=(const MaybeStackArray & /*other*/) {return TRUE;}
+    bool operator==(const MaybeStackArray & /*other*/) = delete;
+    bool operator!=(const MaybeStackArray & /*other*/) = delete;
    /* No ownership transfer: No copy constructor, no assignment operator. */
-    MaybeStackArray(const MaybeStackArray & /*other*/) {}
-    void operator=(const MaybeStackArray & /*other*/) {}
-
-    // No heap allocation. Use only on the stack.
-    //   (Declaring these functions private triggers a cascade of problems:
-    //      MSVC insists on exporting an instantiation of MaybeStackArray, which
-    //      requires that all functions be defined.
-    //      An empty implementation of new() is rejected, it must return a value.
-    //      Returning NULL is rejected by gcc for operator new.
-    //      The expedient thing is just not to override operator new.
-    //      While relatively pointless, heap allocated instances will function.
-    // static void * U_EXPORT2 operator new(size_t size); 
-    // static void * U_EXPORT2 operator new[](size_t size);
-#if U_HAVE_PLACEMENT_NEW
-    // static void * U_EXPORT2 operator new(size_t, void *ptr);
-#endif
+    MaybeStackArray(const MaybeStackArray & /*other*/) = delete;
+    void operator=(const MaybeStackArray & /*other*/) = delete;
 };

 template<typename T, int32_t stackCapacity>
 icu::MaybeStackArray<T, stackCapacity>::MaybeStackArray(
-        MaybeStackArray <T, stackCapacity>&& src) U_NOEXCEPT
+        MaybeStackArray <T, stackCapacity>&& src) noexcept
        : ptr(src.ptr), capacity(src.capacity), needToRelease(src.needToRelease) {
    if (src.ptr == src.stackArray) {
        ptr = stackArray;
@@ -429,7 +485,7 @@ icu::MaybeStackArray<T, stackCapacity>::MaybeStackArray(

 template<typename T, int32_t stackCapacity>
 inline MaybeStackArray <T, stackCapacity>&
-MaybeStackArray<T, stackCapacity>::operator=(MaybeStackArray <T, stackCapacity>&& src) U_NOEXCEPT {
+MaybeStackArray<T, stackCapacity>::operator=(MaybeStackArray <T, stackCapacity>&& src) noexcept {
    releaseArray();  // in case this instance had its own memory allocated
    capacity = src.capacity;
    needToRelease = src.needToRelease;
@@ -447,10 +503,10 @@ template<typename T, int32_t stackCapacity>
 inline T *MaybeStackArray<T, stackCapacity>::resize(int32_t newCapacity, int32_t length) {
    if(newCapacity>0) {
 #if U_DEBUG && defined(UPRV_MALLOC_COUNT)
-      ::fprintf(::stderr,"MaybeStacArray (resize) alloc %d * %lu\n", newCapacity,sizeof(T));
+        ::fprintf(::stderr, "MaybeStackArray (resize) alloc %d * %lu\n", newCapacity, sizeof(T));
 #endif
        T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
-        if(p!=NULL) {
+        if(p!=nullptr) {
            if(length>0) {
                if(length>capacity) {
                    length=capacity;
@@ -463,11 +519,11 @@ inline T *MaybeStackArray<T, stackCapacity>::resize(int32_t newCapacity, int32_t
            releaseArray();
            ptr=p;
            capacity=newCapacity;
-            needToRelease=TRUE;
+            needToRelease=true;
        }
        return p;
    } else {
-        return NULL;
+        return nullptr;
    }
 }

@@ -477,7 +533,7 @@ inline T *MaybeStackArray<T, stackCapacity>::orphanOrClone(int32_t length, int32
    if(needToRelease) {
        p=ptr;
    } else if(length<=0) {
-        return NULL;
+        return nullptr;
    } else {
        if(length>capacity) {
            length=capacity;
@@ -486,8 +542,8 @@ inline T *MaybeStackArray<T, stackCapacity>::orphanOrClone(int32_t length, int32
 #if U_DEBUG && defined(UPRV_MALLOC_COUNT)
      ::fprintf(::stderr,"MaybeStacArray (orphan) alloc %d * %lu\n", length,sizeof(T));
 #endif
-        if(p==NULL) {
-            return NULL;
+        if(p==nullptr) {
+            return nullptr;
        }
        uprv_memcpy(p, ptr, (size_t)length*sizeof(T));
    }
@@ -509,10 +565,15 @@ inline T *MaybeStackArray<T, stackCapacity>::orphanOrClone(int32_t length, int32
 template<typename H, typename T, int32_t stackCapacity>
 class MaybeStackHeaderAndArray {
 public:
+    // No heap allocation. Use only on the stack.
+    static void* U_EXPORT2 operator new(size_t) noexcept = delete;
+    static void* U_EXPORT2 operator new[](size_t) noexcept = delete;
+    static void* U_EXPORT2 operator new(size_t, void*) noexcept = delete;
+
    /**
     * Default constructor initializes with internal H+T[stackCapacity] buffer.
     */
-    MaybeStackHeaderAndArray() : ptr(&stackHeader), capacity(stackCapacity), needToRelease(FALSE) {}
+    MaybeStackHeaderAndArray() : ptr(&stackHeader), capacity(stackCapacity), needToRelease(false) {}
    /**
     * Destructor deletes the memory (if owned).
     */
@@ -553,15 +614,15 @@ public:
    /**
     * Deletes the memory block (if owned) and aliases another one, no transfer of ownership.
     * If the arguments are illegal, then the current memory is unchanged.
-     * @param otherArray must not be NULL
+     * @param otherArray must not be nullptr
     * @param otherCapacity must be >0
     */
    void aliasInstead(H *otherMemory, int32_t otherCapacity) {
-        if(otherMemory!=NULL && otherCapacity>0) {
+        if(otherMemory!=nullptr && otherCapacity>0) {
            releaseMemory();
            ptr=otherMemory;
            capacity=otherCapacity;
-            needToRelease=FALSE;
+            needToRelease=false;
        }
    }
    /**
@@ -569,17 +630,17 @@ public:
     * copying the header and length T array items.
     * Returns the new header pointer.
     * If the allocation fails, then the current memory is unchanged and
-     * this method returns NULL.
+     * this method returns nullptr.
     * @param newCapacity can be less than or greater than the current capacity;
     *                    must be >0
     * @param length number of T items to be copied from the old array to the new one
-     * @return the allocated pointer, or NULL if the allocation failed
+     * @return the allocated pointer, or nullptr if the allocation failed
     */
    inline H *resize(int32_t newCapacity, int32_t length=0);
    /**
     * Gives up ownership of the memory if owned, or else clones it,
     * copying the header and length T array items; resets itself to the internal memory.
-     * Returns NULL if the allocation failed.
+     * Returns nullptr if the allocation failed.
     * @param length number of T items to copy when cloning,
     *        and array capacity of the clone when cloning
     * @param resultCapacity will be set to the returned array's capacity (output-only)
@@ -600,20 +661,11 @@ private:
        }
    }
    /* No comparison operators with other MaybeStackHeaderAndArray's. */
-    bool operator==(const MaybeStackHeaderAndArray & /*other*/) {return FALSE;}
-    bool operator!=(const MaybeStackHeaderAndArray & /*other*/) {return TRUE;}
+    bool operator==(const MaybeStackHeaderAndArray & /*other*/) {return false;}
+    bool operator!=(const MaybeStackHeaderAndArray & /*other*/) {return true;}
    /* No ownership transfer: No copy constructor, no assignment operator. */
    MaybeStackHeaderAndArray(const MaybeStackHeaderAndArray & /*other*/) {}
    void operator=(const MaybeStackHeaderAndArray & /*other*/) {}
-
-    // No heap allocation. Use only on the stack.
-    //   (Declaring these functions private triggers a cascade of problems;
-    //    see the MaybeStackArray class for details.)
-    // static void * U_EXPORT2 operator new(size_t size); 
-    // static void * U_EXPORT2 operator new[](size_t size);
-#if U_HAVE_PLACEMENT_NEW
-    // static void * U_EXPORT2 operator new(size_t, void *ptr);
-#endif
 };

 template<typename H, typename T, int32_t stackCapacity>
@@ -624,7 +676,7 @@ inline H *MaybeStackHeaderAndArray<H, T, stackCapacity>::resize(int32_t newCapac
      ::fprintf(::stderr,"MaybeStackHeaderAndArray alloc %d + %d * %ul\n", sizeof(H),newCapacity,sizeof(T));
 #endif
        H *p=(H *)uprv_malloc(sizeof(H)+newCapacity*sizeof(T));
-        if(p!=NULL) {
+        if(p!=nullptr) {
            if(length<0) {
                length=0;
            } else if(length>0) {
@@ -639,11 +691,11 @@ inline H *MaybeStackHeaderAndArray<H, T, stackCapacity>::resize(int32_t newCapac
            releaseMemory();
            ptr=p;
            capacity=newCapacity;
-            needToRelease=TRUE;
+            needToRelease=true;
        }
        return p;
    } else {
-        return NULL;
+        return nullptr;
    }
 }

@@ -663,18 +715,181 @@ inline H *MaybeStackHeaderAndArray<H, T, stackCapacity>::orphanOrClone(int32_t l
      ::fprintf(::stderr,"MaybeStackHeaderAndArray (orphan) alloc %ul + %d * %lu\n", sizeof(H),length,sizeof(T));
 #endif
        p=(H *)uprv_malloc(sizeof(H)+length*sizeof(T));
-        if(p==NULL) {
-            return NULL;
+        if(p==nullptr) {
+            return nullptr;
        }
        uprv_memcpy(p, ptr, sizeof(H)+(size_t)length*sizeof(T));
    }
    resultCapacity=length;
    ptr=&stackHeader;
    capacity=stackCapacity;
-    needToRelease=FALSE;
+    needToRelease=false;
    return p;
 }

+/**
+ * A simple memory management class that creates new heap allocated objects (of
+ * any class that has a public constructor), keeps track of them and eventually
+ * deletes them all in its own destructor.
+ *
+ * A typical use-case would be code like this:
+ *
+ *     MemoryPool<MyType> pool;
+ *
+ *     MyType* o1 = pool.create();
+ *     if (o1 != nullptr) {
+ *         foo(o1);
+ *     }
+ *
+ *     MyType* o2 = pool.create(1, 2, 3);
+ *     if (o2 != nullptr) {
+ *         bar(o2);
+ *     }
+ *
+ *     // MemoryPool will take care of deleting the MyType objects.
+ *
+ * It doesn't do anything more than that, and is intentionally kept minimalist.
+ */
+template<typename T, int32_t stackCapacity = 8>
+class MemoryPool : public UMemory {
+public:
+    MemoryPool() : fCount(0), fPool() {}
+
+    ~MemoryPool() {
+        for (int32_t i = 0; i < fCount; ++i) {
+            delete fPool[i];
+        }
+    }
+
+    MemoryPool(const MemoryPool&) = delete;
+    MemoryPool& operator=(const MemoryPool&) = delete;
+
+    MemoryPool(MemoryPool&& other) noexcept : fCount(other.fCount),
+                                                fPool(std::move(other.fPool)) {
+        other.fCount = 0;
+    }
+
+    MemoryPool& operator=(MemoryPool&& other) noexcept {
+        // Since `this` may contain instances that need to be deleted, we can't
+        // just throw them away and replace them with `other`. The normal way of
+        // dealing with this in C++ is to swap `this` and `other`, rather than
+        // simply overwrite: the destruction of `other` can then take care of
+        // running MemoryPool::~MemoryPool() over the still-to-be-deallocated
+        // instances.
+        std::swap(fCount, other.fCount);
+        std::swap(fPool, other.fPool);
+        return *this;
+    }
+
+    /**
+     * Creates a new object of typename T, by forwarding any and all arguments
+     * to the typename T constructor.
+     *
+     * @param args Arguments to be forwarded to the typename T constructor.
+     * @return A pointer to the newly created object, or nullptr on error.
+     */
+    template<typename... Args>
+    T* create(Args&&... args) {
+        int32_t capacity = fPool.getCapacity();
+        if (fCount == capacity &&
+            fPool.resize(capacity == stackCapacity ? 4 * capacity : 2 * capacity,
+                         capacity) == nullptr) {
+            return nullptr;
+        }
+        return fPool[fCount++] = new T(std::forward<Args>(args)...);
+    }
+
+    template <typename... Args>
+    T* createAndCheckErrorCode(UErrorCode &status, Args &&... args) {
+        if (U_FAILURE(status)) {
+            return nullptr;
+        }
+        T *pointer = this->create(args...);
+        if (U_SUCCESS(status) && pointer == nullptr) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+        }
+        return pointer;
+    }
+
+    /**
+     * @return Number of elements that have been allocated.
+     */
+    int32_t count() const {
+        return fCount;
+    }
+
+protected:
+    int32_t fCount;
+    MaybeStackArray<T*, stackCapacity> fPool;
+};
+
+/**
+ * An internal Vector-like implementation based on MemoryPool.
+ *
+ * Heap-allocates each element and stores pointers.
+ *
+ * To append an item to the vector, use emplaceBack.
+ *
+ *     MaybeStackVector<MyType> vector;
+ *     MyType* element = vector.emplaceBack();
+ *     if (!element) {
+ *         status = U_MEMORY_ALLOCATION_ERROR;
+ *     }
+ *     // do stuff with element
+ *
+ * To loop over the vector, use a for loop with indices:
+ *
+ *     for (int32_t i = 0; i < vector.length(); i++) {
+ *         MyType* element = vector[i];
+ *     }
+ */
+template<typename T, int32_t stackCapacity = 8>
+class MaybeStackVector : protected MemoryPool<T, stackCapacity> {
+public:
+    template<typename... Args>
+    T* emplaceBack(Args&&... args) {
+        return this->create(args...);
+    }
+
+    template <typename... Args>
+    T *emplaceBackAndCheckErrorCode(UErrorCode &status, Args &&... args) {
+        return this->createAndCheckErrorCode(status, args...);
+    }
+
+    int32_t length() const {
+        return this->fCount;
+    }
+
+    T** getAlias() {
+        return this->fPool.getAlias();
+    }
+
+    const T *const *getAlias() const {
+        return this->fPool.getAlias();
+    }
+
+    /**
+     * Array item access (read-only).
+     * No index bounds check.
+     * @param i array index
+     * @return reference to the array item
+     */
+    const T* operator[](ptrdiff_t i) const {
+        return this->fPool[i];
+    }
+
+    /**
+     * Array item access (writable).
+     * No index bounds check.
+     * @param i array index
+     * @return reference to the array item
+     */
+    T* operator[](ptrdiff_t i) {
+        return this->fPool[i];
+    }
+};
+
+
 U_NAMESPACE_END

 #endif  /* __cplusplus */
@@ -1,72 +1,65 @@
 <?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <!-- The following import will include the 'default' configuration options for VS projects. -->
-  <Import Project="..\allinone\Build.Windows.ProjectConfiguration.props" />
-  <!-- The following import will include the library configuration options for VS projects. -->
-  <Import Project="..\allinone\Build.Windows.Library.ProjectConfiguration.props" />
  <PropertyGroup Label="Globals">
    <ProjectGuid>{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}</ProjectGuid>
  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
  <PropertyGroup Label="Configuration">
    <ConfigurationType>DynamicLibrary</ConfigurationType>
    <UseOfMfc>false</UseOfMfc>
    <CharacterSet>MultiByte</CharacterSet>
  </PropertyGroup>
+    <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <!-- The following import will include the 'default' configuration options for VS projects. -->
+  <Import Project="..\allinone\Build.Windows.ProjectConfiguration.props" />
+  <!-- The following import will include the library configuration options for VS projects. -->
+  <Import Project="..\allinone\Build.Windows.Library.ProjectConfiguration.props" />
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
  <ImportGroup Label="ExtensionSettings">
  </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
-  </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
-  </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
-  </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
-  </ImportGroup>
  <PropertyGroup Label="UserMacros" />
  <PropertyGroup>
    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.\..\..\lib\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.\x86\Release\</IntDir>
-    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.\..\..\lib\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.\x86\Debug\</IntDir>
-    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">.\x64\Release\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">.\x64\Release\</IntDir>
-    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">.\x64\Debug\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">.\x64\Debug\</IntDir>
-    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir>.\$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>.\$(Platform)\$(Configuration)\</IntDir>
+    <!-- The ICU projects use "Win32" to mean "x86", so we need to special case it. -->
+    <OutDir Condition="'$(Platform)'=='Win32'">.\x86\$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Platform)'=='Win32'">.\x86\$(Configuration)\</IntDir>
+    <!-- Disable Incremental Linking for Release builds as it prevents Link-time Code Generation -->
+    <LinkIncremental Condition="'$(Configuration)'=='Debug'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)'=='Release'">false</LinkIncremental>
  </PropertyGroup>
  <!-- Options that are common to *all* "common" project configurations -->
  <ItemDefinitionGroup>
+    <Midl>
+      <TypeLibraryName>$(OutDir)\icuuc.tlb</TypeLibraryName>
+    </Midl>
    <ClCompile>
      <PreprocessorDefinitions>U_ATTRIBUTE_DEPRECATED=;U_COMMON_IMPLEMENTATION;U_PLATFORM_USES_ONLY_WIN32_API=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <DisableLanguageExtensions>false</DisableLanguageExtensions>
      <WarningLevel>Level3</WarningLevel>
+      <CompileAs>Default</CompileAs>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <PrecompiledHeaderOutputFile>$(OutDir)/icuuc.pch</PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>$(OutDir)/</AssemblerListingLocation>
+      <ObjectFileName>$(OutDir)/</ObjectFileName>
+      <ProgramDataBaseFileName>$(OutDir)/icuuc.pdb</ProgramDataBaseFileName>
    </ClCompile>
    <Link>
+      <!-- The icudt.lib is for U_ICUDATA_ENTRY_POINT -->
      <AdditionalDependencies>icudt.lib;%(AdditionalDependencies)</AdditionalDependencies>
-      <BaseAddress>0x4a800000</BaseAddress>
+      <AdditionalLibraryDirectories>.\..\..\$(IcuLibOutputDir);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
    </Link>
  </ItemDefinitionGroup>
  <!-- Options that are common to all 'Debug' project configurations -->
  <ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
    <ClCompile>
      <PreprocessorDefinitions>RBBI_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <BrowseInformation>true</BrowseInformation>
      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
    </ClCompile>
+    <Link>
+      <OutputFile>..\..\$(IcuBinOutputDir)\icuuc$(IcuMajorVersion)d.dll</OutputFile>
+      <ProgramDatabaseFile>.\..\..\$(IcuLibOutputDir)\icuucd.pdb</ProgramDatabaseFile>
+      <ImportLibrary>..\..\$(IcuLibOutputDir)\icuucd.lib</ImportLibrary>
+    </Link>
  </ItemDefinitionGroup>
  <!-- Options that are common to all 'Release' project configurations -->
  <ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
@@ -74,79 +67,10 @@
      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
      <FunctionLevelLinking>true</FunctionLevelLinking>
    </ClCompile>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <Midl>
-      <TypeLibraryName>.\..\..\lib\icuuc.tlb</TypeLibraryName>
-    </Midl>
-    <ClCompile>
-      <PrecompiledHeaderOutputFile>.\x86\Release/common.pch</PrecompiledHeaderOutputFile>
-      <AssemblerListingLocation>.\x86\Release/</AssemblerListingLocation>
-      <ObjectFileName>.\x86\Release/</ObjectFileName>
-      <ProgramDataBaseFileName>.\x86\Release/</ProgramDataBaseFileName>
-    </ClCompile>
    <Link>
-      <OutputFile>..\..\bin\icuuc63.dll</OutputFile>
-      <AdditionalLibraryDirectories>.\..\..\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
-      <ProgramDatabaseFile>.\..\..\lib\icuuc.pdb</ProgramDatabaseFile>
-      <DataExecutionPrevention>
-      </DataExecutionPrevention>
-      <ImportLibrary>..\..\lib\icuuc.lib</ImportLibrary>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <Midl>
-      <TypeLibraryName>.\..\..\lib\icuucd.tlb</TypeLibraryName>
-    </Midl>
-    <ClCompile>
-      <PrecompiledHeaderOutputFile>.\x86\Debug/common.pch</PrecompiledHeaderOutputFile>
-      <AssemblerListingLocation>.\x86\Debug/</AssemblerListingLocation>
-      <ObjectFileName>.\x86\Debug/</ObjectFileName>
-      <ProgramDataBaseFileName>.\x86\Debug/</ProgramDataBaseFileName>
-      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
-    </ClCompile>
-    <Link>
-      <OutputFile>..\..\bin\icuuc63d.dll</OutputFile>
-      <AdditionalLibraryDirectories>.\..\..\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
-      <ProgramDatabaseFile>.\..\..\lib\icuucd.pdb</ProgramDatabaseFile>
-      <DataExecutionPrevention>
-      </DataExecutionPrevention>
-      <ImportLibrary>..\..\lib\icuucd.lib</ImportLibrary>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Midl>
-      <TypeLibraryName>.\..\..\lib64\icuuc.tlb</TypeLibraryName>
-    </Midl>
-    <ClCompile>
-      <PrecompiledHeaderOutputFile>.\x64\Release/common.pch</PrecompiledHeaderOutputFile>
-      <AssemblerListingLocation>.\x64\Release/</AssemblerListingLocation>
-      <ObjectFileName>.\x64\Release/</ObjectFileName>
-      <ProgramDataBaseFileName>.\x64\Release/</ProgramDataBaseFileName>
-    </ClCompile>
-    <Link>
-      <OutputFile>..\..\bin64\icuuc63.dll</OutputFile>
-      <AdditionalLibraryDirectories>.\..\..\lib64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
-      <ProgramDatabaseFile>.\..\..\lib64\icuuc.pdb</ProgramDatabaseFile>
-      <ImportLibrary>..\..\lib64\icuuc.lib</ImportLibrary>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Midl>
-      <TypeLibraryName>.\..\..\lib64\icuucd.tlb</TypeLibraryName>
-    </Midl>
-    <ClCompile>
-      <PrecompiledHeaderOutputFile>.\x64\Debug/common.pch</PrecompiledHeaderOutputFile>
-      <AssemblerListingLocation>.\x64\Debug/</AssemblerListingLocation>
-      <ObjectFileName>.\x64\Debug/</ObjectFileName>
-      <ProgramDataBaseFileName>.\x64\Debug/</ProgramDataBaseFileName>
-      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
-    </ClCompile>
-    <Link>
-      <OutputFile>..\..\bin64\icuuc63d.dll</OutputFile>
-      <AdditionalLibraryDirectories>.\..\..\lib64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
-      <ProgramDatabaseFile>.\..\..\lib64\icuucd.pdb</ProgramDatabaseFile>
-      <ImportLibrary>..\..\lib64\icuucd.lib</ImportLibrary>
+      <OutputFile>..\..\$(IcuBinOutputDir)\icuuc$(IcuMajorVersion).dll</OutputFile>
+      <ProgramDatabaseFile>.\..\..\$(IcuLibOutputDir)\icuuc.pdb</ProgramDatabaseFile>
+      <ImportLibrary>..\..\$(IcuLibOutputDir)\icuuc.lib</ImportLibrary>
    </Link>
  </ItemDefinitionGroup>
  <ItemGroup>
@@ -161,6 +85,8 @@
    <ClCompile Include="brkeng.cpp" />
    <ClCompile Include="brkiter.cpp" />
    <ClCompile Include="dictbe.cpp" />
+    <ClCompile Include="lstmbe.cpp" />
+    <ClCompile Include="mlbe.cpp" />
    <ClCompile Include="pluralmap.cpp" />
    <ClCompile Include="rbbi.cpp" />
    <ClCompile Include="rbbidata.cpp" />
@@ -238,19 +164,27 @@
    <ClCompile Include="punycode.cpp" />
    <ClCompile Include="uidna.cpp" />
    <ClCompile Include="uts46.cpp" />
+    <ClCompile Include="localebuilder.cpp" />
+    <ClCompile Include="ulocbuilder.cpp" />
+    <ClCompile Include="localematcher.cpp" />
+    <ClCompile Include="localeprioritylist.cpp" />
    <ClCompile Include="locavailable.cpp" />
    <ClCompile Include="locbased.cpp" />
    <ClCompile Include="locdispnames.cpp" />
+    <ClCompile Include="locdistance.cpp" />
    <ClCompile Include="locdspnm.cpp" />
    <ClCompile Include="locid.cpp" />
    <ClCompile Include="loclikely.cpp" />
+    <ClCompile Include="loclikelysubtags.cpp" />
    <ClCompile Include="locresdata.cpp" />
    <ClCompile Include="locutil.cpp" />
+    <ClCompile Include="lsr.cpp" />
    <ClCompile Include="resbund.cpp" />
    <ClCompile Include="resbund_cnv.cpp" />
    <ClCompile Include="ucat.cpp" />
    <ClCompile Include="uloc.cpp" />
    <ClCompile Include="uloc_tag.cpp" />
+    <ClCompile Include="ulocale.cpp" />
    <ClCompile Include="ures_cnv.cpp" />
    <ClCompile Include="uresbund.cpp" />
    <ClCompile Include="uresdata.cpp" />
@@ -271,6 +205,7 @@
    <ClCompile Include="ucase.cpp" />
    <ClCompile Include="uchar.cpp" />
    <ClCompile Include="characterproperties.cpp" />
+    <ClCompile Include="emojiprops.cpp" />
    <ClCompile Include="unames.cpp" />
    <ClCompile Include="unifiedcache.cpp" />
    <ClCompile Include="unifilt.cpp" />
@@ -338,6 +273,8 @@
    <ClCompile Include="utext.cpp" />
    <ClCompile Include="utf_impl.cpp" />
    <ClCompile Include="static_unicode_sets.cpp" />
+    <ClCompile Include="restrace.cpp" />
+    <ClCompile Include="fixedstring.cpp" />
    <ClInclude Include="localsvc.h" />
    <ClInclude Include="msvcres.h" />
    <ClInclude Include="pluralmap.h" />
@@ -346,6 +283,8 @@
    <ClInclude Include="ubidiimp.h" />
    <ClInclude Include="brkeng.h" />
    <ClInclude Include="dictbe.h" />
+    <ClInclude Include="lstmbe.h" />
+    <ClInclude Include="mlbe.h" />
    <ClInclude Include="rbbidata.h" />
    <ClInclude Include="rbbinode.h" />
    <ClInclude Include="rbbirb.h" />
@@ -405,8 +344,14 @@
    <ClInclude Include="ustrfmt.h" />
    <ClInclude Include="util.h" />
    <ClInclude Include="punycode.h" />
+    <ClInclude Include="localeprioritylist.h" />
    <ClInclude Include="locbased.h" />
+    <ClInclude Include="locdistance.h" />
+    <ClInclude Include="charstrmap.h" />
+    <ClInclude Include="loclikelysubtags.h" />
+    <ClInclude Include="uniquecharstr.h" />
    <ClInclude Include="locutil.h" />
+    <ClInclude Include="lsr.h" />
    <ClInclude Include="sharedobject.h" />
    <ClCompile Include="sharedobject.cpp" />
    <ClInclude Include="ulocimp.h" />
@@ -424,7 +369,9 @@
    <ClInclude Include="patternprops.h" />
    <ClInclude Include="propname.h" />
    <ClInclude Include="ruleiter.h" />
+    <ClInclude Include="emojiprops.h" />
    <ClInclude Include="ucase.h" />
+    <ClInclude Include="ulayout_props.h" />
    <ClInclude Include="unisetspan.h" />
    <ClInclude Include="uprops.h" />
    <ClInclude Include="usc_impl.h" />
@@ -444,6 +391,9 @@
    <ClInclude Include="ustr_cnv.h" />
    <ClInclude Include="ustr_imp.h" />
    <ClInclude Include="static_unicode_sets.h" />
+    <ClInclude Include="capi_helper.h" />
+    <ClInclude Include="restrace.h" />
+    <ClInclude Include="fixedstring.h" />
  </ItemGroup>
  <ItemGroup>
    <ResourceCompile Include="common.rc" />
@@ -73,6 +73,12 @@
    <ClCompile Include="dictbe.cpp">
      <Filter>break iteration</Filter>
    </ClCompile>
+    <ClCompile Include="lstmbe.cpp">
+      <Filter>break iteration</Filter>
+    </ClCompile>
+    <ClCompile Include="mlbe.cpp">
+      <Filter>break iteration</Filter>
+    </ClCompile>
    <ClCompile Include="rbbi.cpp">
      <Filter>break iteration</Filter>
    </ClCompile>
@@ -313,6 +319,18 @@
    <ClCompile Include="uts46.cpp">
      <Filter>idna</Filter>
    </ClCompile>
+    <ClCompile Include="localebuilder.cpp">
+      <Filter>locales &amp; resources</Filter>
+    </ClCompile>
+    <ClCompile Include="ulocbuilder.cpp">
+      <Filter>locales &amp; resources</Filter>
+    </ClCompile>
+    <ClCompile Include="localematcher.cpp">
+      <Filter>locales &amp; resources</Filter>
+    </ClCompile>
+    <ClCompile Include="localeprioritylist.cpp">
+      <Filter>locales &amp; resources</Filter>
+    </ClCompile>
    <ClCompile Include="locavailable.cpp">
      <Filter>locales &amp; resources</Filter>
    </ClCompile>
@@ -322,18 +340,27 @@
    <ClCompile Include="locdispnames.cpp">
      <Filter>locales &amp; resources</Filter>
    </ClCompile>
+    <ClCompile Include="locdistance.cpp">
+      <Filter>locales &amp; resources</Filter>
+    </ClCompile>
    <ClCompile Include="locid.cpp">
      <Filter>locales &amp; resources</Filter>
    </ClCompile>
    <ClCompile Include="loclikely.cpp">
      <Filter>locales &amp; resources</Filter>
    </ClCompile>
+    <ClCompile Include="loclikelysubtags.cpp">
+      <Filter>locales &amp; resources</Filter>
+    </ClCompile>
    <ClCompile Include="locresdata.cpp">
      <Filter>locales &amp; resources</Filter>
    </ClCompile>
    <ClCompile Include="locutil.cpp">
      <Filter>locales &amp; resources</Filter>
    </ClCompile>
+    <ClCompile Include="lsr.cpp">
+      <Filter>locales &amp; resources</Filter>
+    </ClCompile>
    <ClCompile Include="resbund.cpp">
      <Filter>locales &amp; resources</Filter>
    </ClCompile>
@@ -349,6 +376,9 @@
    <ClCompile Include="uloc_tag.cpp">
      <Filter>locales &amp; resources</Filter>
    </ClCompile>
+    <ClCompile Include="ulocale.cpp">
+      <Filter>locales &amp; resources</Filter>
+    </ClCompile>
    <ClCompile Include="ures_cnv.cpp">
      <Filter>locales &amp; resources</Filter>
    </ClCompile>
@@ -391,6 +421,9 @@
    <ClCompile Include="characterproperties.cpp">
      <Filter>properties &amp; sets</Filter>
    </ClCompile>
+    <ClCompile Include="emojiprops.cpp">
+      <Filter>properties &amp; sets</Filter>
+    </ClCompile>
    <ClCompile Include="propname.cpp">
      <Filter>properties &amp; sets</Filter>
    </ClCompile>
@@ -616,6 +649,12 @@
    <ClCompile Include="static_unicode_sets.cpp">
      <Filter>formatting</Filter>
    </ClCompile>
+    <ClCompile Include="restrace.cpp">
+      <Filter>data &amp; memory</Filter>
+    </ClCompile>
+    <ClCompile Include="fixedstring.cpp">
+      <Filter>strings</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="ubidi_props.h">
@@ -630,6 +669,12 @@
    <ClInclude Include="dictbe.h">
      <Filter>break iteration</Filter>
    </ClInclude>
+    <ClInclude Include="lstmbe.h">
+      <Filter>break iteration</Filter>
+    </ClInclude>
+    <ClInclude Include="mlbe.h">
+      <Filter>break iteration</Filter>
+    </ClInclude>
    <ClInclude Include="rbbidata.h">
      <Filter>break iteration</Filter>
    </ClInclude>
@@ -810,12 +855,30 @@
    <ClInclude Include="punycode.h">
      <Filter>idna</Filter>
    </ClInclude>
+    <ClInclude Include="localeprioritylist.h">
+      <Filter>locales &amp; resources</Filter>
+    </ClInclude>
    <ClInclude Include="locbased.h">
      <Filter>locales &amp; resources</Filter>
    </ClInclude>
+    <ClInclude Include="locdistance.h">
+      <Filter>locales &amp; resources</Filter>
+    </ClInclude>
+    <ClInclude Include="charstrmap.h">
+      <Filter>locales &amp; resources</Filter>
+    </ClInclude>
+    <ClInclude Include="uniquecharstr.h">
+      <Filter>locales &amp; resources</Filter>
+    </ClInclude>
+    <ClInclude Include="loclikelysubtags.h">
+      <Filter>locales &amp; resources</Filter>
+    </ClInclude>
    <ClInclude Include="locutil.h">
      <Filter>locales &amp; resources</Filter>
    </ClInclude>
+    <ClInclude Include="lsr.h">
+      <Filter>locales &amp; resources</Filter>
+    </ClInclude>
    <ClInclude Include="ulocimp.h">
      <Filter>locales &amp; resources</Filter>
    </ClInclude>
@@ -849,9 +912,15 @@
    <ClInclude Include="ruleiter.h">
      <Filter>properties &amp; sets</Filter>
    </ClInclude>
+    <ClInclude Include="emojiprops.h">
+      <Filter>properties &amp; sets</Filter>
+    </ClInclude>
    <ClInclude Include="ucase.h">
      <Filter>properties &amp; sets</Filter>
    </ClInclude>
+    <ClInclude Include="ulayout_props.h">
+      <Filter>properties &amp; sets</Filter>
+    </ClInclude>
    <ClInclude Include="unisetspan.h">
      <Filter>properties &amp; sets</Filter>
    </ClInclude>
@@ -948,6 +1017,15 @@
    <ClInclude Include="static_unicode_sets.h">
      <Filter>formatting</Filter>
    </ClInclude>
+    <ClInclude Include="capi_helper.h">
+      <Filter>data &amp; memory</Filter>
+    </ClInclude>
+    <ClInclude Include="restrace.h">
+      <Filter>data &amp; memory</Filter>
+    </ClInclude>
+    <ClInclude Include="fixedstring.h">
+      <Filter>strings</Filter>
+    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <ResourceCompile Include="common.rc">
@@ -1063,6 +1141,15 @@
    <CustomBuild Include="unicode\uidna.h">
      <Filter>idna</Filter>
    </CustomBuild>
+    <CustomBuild Include="unicode\localebuilder.h">
+      <Filter>locales &amp; resources</Filter>
+    </CustomBuild>
+    <CustomBuild Include="unicode\ulocbuilder.h">
+      <Filter>locales &amp; resources</Filter>
+    </CustomBuild>
+    <CustomBuild Include="unicode\localematcher.h">
+      <Filter>locales &amp; resources</Filter>
+    </CustomBuild>
    <CustomBuild Include="unicode\locid.h">
      <Filter>locales &amp; resources</Filter>
    </CustomBuild>
@@ -1075,6 +1162,9 @@
    <CustomBuild Include="unicode\uloc.h">
      <Filter>locales &amp; resources</Filter>
    </CustomBuild>
+    <CustomBuild Include="unicode\ulocale.h">
+      <Filter>locales &amp; resources</Filter>
+    </CustomBuild>
    <CustomBuild Include="unicode\ures.h">
      <Filter>locales &amp; resources</Filter>
    </CustomBuild>
@@ -1183,6 +1273,12 @@
    <CustomBuild Include="unicode\utf_old.h">
      <Filter>strings</Filter>
    </CustomBuild>
+    <CustomBuild Include="unicode\utfiterator.h">
+      <Filter>strings</Filter>
+    </CustomBuild>
+    <CustomBuild Include="unicode\utfstring.h">
+      <Filter>strings</Filter>
+    </CustomBuild>
    <CustomBuild Include="unicode\bytestrie.h">
      <Filter>collections</Filter>
    </CustomBuild>
@@ -1,9 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <!-- The following import will include the UWP configuration options for VS projects. -->
-  <Import Project="..\allinone\Build.Windows.UWP.ProjectConfiguration.props" />
-  <!-- The following import will include the library configuration options for VS projects. -->
-  <Import Project="..\allinone\Build.Windows.Library.ProjectConfiguration.props" />
  <ItemGroup Label="ProjectConfigurations">
    <ProjectConfiguration Include="Debug|Win32">
      <Configuration>Debug</Configuration>
@@ -17,6 +13,10 @@
      <Configuration>Debug</Configuration>
      <Platform>ARM</Platform>
    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|ARM64">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
    <ProjectConfiguration Include="Release|Win32">
      <Configuration>Release</Configuration>
      <Platform>Win32</Platform>
@@ -29,51 +29,50 @@
      <Configuration>Release</Configuration>
      <Platform>ARM</Platform>
    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM64">
+      <Configuration>Release</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
  </ItemGroup>
  <PropertyGroup Label="Globals">
    <ProjectGuid>{C10CF34B-3F79-430E-AD38-5A32DC0589C2}</ProjectGuid>
    <Keyword>DynamicLibrary</Keyword>
    <DefaultLanguage>en-US</DefaultLanguage>
  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
  <PropertyGroup Label="Configuration">
    <ConfigurationType>DynamicLibrary</ConfigurationType>
    <UseOfMfc>false</UseOfMfc>
    <CharacterSet>MultiByte</CharacterSet>
  </PropertyGroup>
+<!-- The following import will include the UWP configuration options for VS projects. -->
+  <Import Project="..\allinone\Build.Windows.UWP.ProjectConfiguration.props" />
+  <!-- The following import will include the library configuration options for VS projects. -->
+  <Import Project="..\allinone\Build.Windows.Library.ProjectConfiguration.props" />
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
  <ImportGroup Label="ExtensionSettings">
  </ImportGroup>
-  <ImportGroup Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-    <Import Project="$(VCTargetsPath)Microsoft.CPP.UpgradeFromVC71.props" />
-  </ImportGroup>
  <PropertyGroup Label="UserMacros" />
  <PropertyGroup>
    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.\x86\ReleaseUWP\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.\x86\ReleaseUWP\</IntDir>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.\x86\DebugUWP\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.\x86\DebugUWP\</IntDir>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">.\x64\ReleaseUWP\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">.\x64\ReleaseUWP\</IntDir>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">.\x64\DebugUWP\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">.\x64\DebugUWP\</IntDir>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">.\arm\ReleaseUWP\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">.\arm\ReleaseUWP\</IntDir>
-    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">.\arm\DebugUWP\</OutDir>
-    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">.\arm\DebugUWP\</IntDir>
-    <LinkIncremental Condition="'$(Configuration)'=='Release'">false</LinkIncremental>
+    <OutDir>.\$(Platform)\$(Configuration)UWP\</OutDir>
+    <IntDir>.\$(Platform)\$(Configuration)UWP\</IntDir>
+    <!-- The ICU projects use "Win32" to mean "x86", so we need to special case it. -->
+    <OutDir Condition="'$(Platform)'=='Win32'">.\x86\$(Configuration)UWP\</OutDir>
+    <IntDir Condition="'$(Platform)'=='Win32'">.\x86\$(Configuration)UWP\</IntDir>
+    <!-- Disable Incremental Linking for Release builds as it prevents Link-time Code Generation -->
    <LinkIncremental Condition="'$(Configuration)'=='Debug'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)'=='Release'">false</LinkIncremental>
  </PropertyGroup>
  <ItemDefinitionGroup>
    <!-- Options that are common to *all* configurations -->
    <Midl>
      <MkTypLibCompatible>true</MkTypLibCompatible>
      <SuppressStartupBanner>true</SuppressStartupBanner>
+      <TypeLibraryName>$(OutDir)\icuuc.tlb</TypeLibraryName>
    </Midl>
    <ClCompile>
-      <AdditionalIncludeDirectories>..\..\include;..\common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
      <!-- U_DISABLE_RENAMING -->
      <!-- U_HIDE_DRAFT_API & U_HIDE_DEPRECATED_API -->
      <PreprocessorDefinitions>U_ATTRIBUTE_DEPRECATED=;_CRT_SECURE_NO_DEPRECATE;U_COMMON_IMPLEMENTATION;U_PLATFORM_USES_ONLY_WIN32_API=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
@@ -81,7 +80,6 @@
      <ExceptionHandling>
      </ExceptionHandling>
      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <DisableLanguageExtensions>false</DisableLanguageExtensions>
      <TreatWChar_tAsBuiltInType>true</TreatWChar_tAsBuiltInType>
      <WarningLevel>Level3</WarningLevel>
      <SuppressStartupBanner>true</SuppressStartupBanner>
@@ -89,6 +87,11 @@
      <PrecompiledHeader>NotUsing</PrecompiledHeader>
      <CompileAsWinRT>false</CompileAsWinRT>
      <AdditionalOptions>/utf-8 %(AdditionalOptions)</AdditionalOptions>
+      <PrecompiledHeaderOutputFile>$(OutDir)/icuuc.pch</PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>$(OutDir)/</AssemblerListingLocation>
+      <ObjectFileName>$(OutDir)/</ObjectFileName>
+      <ProgramDataBaseFileName>$(OutDir)/icuuc.pdb</ProgramDataBaseFileName>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
    </ClCompile>
    <ResourceCompile>
      <Culture>0x0409</Culture>
@@ -102,6 +105,8 @@
      </DataExecutionPrevention>
      <TurnOffAssemblyGeneration>true</TurnOffAssemblyGeneration>
      <IgnoreSpecificDefaultLibraries>vccorlib.lib;msvcrt.lib</IgnoreSpecificDefaultLibraries>
+      <!-- The icudt.lib is for U_ICUDATA_ENTRY_POINT -->
+      <AdditionalDependencies>icudt.lib;onecore.lib;%(AdditionalDependencies)</AdditionalDependencies>
    </Link>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
@@ -118,7 +123,10 @@
    </ResourceCompile>
    <Link>
      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <AdditionalDependencies>vccorlib.lib;WindowsApp.lib;msvcrt.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>vccorlib.lib;msvcrt.lib;vcruntime.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <OutputFile>..\..\$(IcuBinOutputDir)\icuuc$(IcuMajorVersion).dll</OutputFile>
+      <ProgramDatabaseFile>.\..\..\$(IcuLibOutputDir)\icuuc.pdb</ProgramDatabaseFile>
+      <ImportLibrary>..\..\$(IcuLibOutputDir)\icuuc.lib</ImportLibrary>
    </Link>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
@@ -132,14 +140,16 @@
      <Optimization>Disabled</Optimization>
      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
      <BufferSecurityCheck>true</BufferSecurityCheck>
-      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
    </ClCompile>
    <ResourceCompile>
      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </ResourceCompile>
    <Link>
      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalDependencies>vccorlibd.lib;WindowsApp.lib;msvcrtd.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>vccorlibd.lib;msvcrtd.lib;vcruntimed.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <OutputFile>..\..\$(IcuBinOutputDir)\icuuc$(IcuMajorVersion)d.dll</OutputFile>
+      <ProgramDatabaseFile>.\..\..\$(IcuLibOutputDir)\icuucd.pdb</ProgramDatabaseFile>
+      <ImportLibrary>..\..\$(IcuLibOutputDir)\icuucd.lib</ImportLibrary>
    </Link>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Platform)'=='Win32'">
@@ -150,6 +160,10 @@
    <ClCompile>
      <PreprocessorDefinitions>WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </ClCompile>
+    <Link>
+      <!-- This is so that we can use the existing stubdata icudt.lib and not need a UWP version. -->
+      <AdditionalLibraryDirectories>.\..\..\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+    </Link>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Platform)'=='x64'">
    <!-- Options that are common to all 64-bit configurations -->
@@ -161,6 +175,8 @@
    </ClCompile>
    <Link>
      <TargetMachine>MachineX64</TargetMachine>
+      <!-- This is so that we can use the existing stubdata icudt.lib and not need a UWP version. -->
+      <AdditionalLibraryDirectories>.\..\..\lib64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
    </Link>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Platform)'=='ARM'">
@@ -173,103 +189,22 @@
    </ClCompile>
    <Link>
      <TargetMachine>MachineARM</TargetMachine>
+      <!-- This is so that we can use the existing stubdata icudt.lib and not need a UWP version. -->
+      <AdditionalLibraryDirectories>.\..\..\libARM;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
    </Link>
  </ItemDefinitionGroup>
-  <!-- Options that are specific to a particular configuration -->
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+  <ItemDefinitionGroup Condition="'$(Platform)'=='ARM64'">
+    <!-- Options that are common to all ARM64 configurations -->
    <Midl>
-      <TypeLibraryName>.\..\..\lib32uwp\icuuc.tlb</TypeLibraryName>
+      <TargetEnvironment>ARM64</TargetEnvironment>
    </Midl>
    <ClCompile>
-      <PrecompiledHeaderOutputFile>.\x86\ReleaseUWP/common.pch</PrecompiledHeaderOutputFile>
-      <AssemblerListingLocation>.\x86\ReleaseUWP/</AssemblerListingLocation>
-      <ObjectFileName>.\x86\ReleaseUWP/</ObjectFileName>
-      <ProgramDataBaseFileName>.\x86\ReleaseUWP/</ProgramDataBaseFileName>
+      <PreprocessorDefinitions>ARM64;WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </ClCompile>
    <Link>
-      <OutputFile>..\..\bin32uwp\icuuc63.dll</OutputFile>
-      <ProgramDatabaseFile>.\..\..\lib32uwp\icuuc.pdb</ProgramDatabaseFile>
-      <ImportLibrary>..\..\lib32uwp\icuuc.lib</ImportLibrary>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <Midl>
-      <TypeLibraryName>.\..\..\lib32uwp\icuucd.tlb</TypeLibraryName>
-    </Midl>
-    <ClCompile>
-      <PrecompiledHeaderOutputFile>.\x86\DebugUWP/common.pch</PrecompiledHeaderOutputFile>
-      <AssemblerListingLocation>.\x86\DebugUWP/</AssemblerListingLocation>
-      <ObjectFileName>.\x86\DebugUWP/</ObjectFileName>
-      <ProgramDataBaseFileName>.\x86\DebugUWP/</ProgramDataBaseFileName>
-    </ClCompile>
-    <Link>
-      <OutputFile>..\..\bin32uwp\icuuc63d.dll</OutputFile>
-      <ProgramDatabaseFile>.\..\..\lib32uwp\icuucd.pdb</ProgramDatabaseFile>
-      <ImportLibrary>..\..\lib32uwp\icuucd.lib</ImportLibrary>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Midl>
-      <TypeLibraryName>.\..\..\lib64uwp\icuuc.tlb</TypeLibraryName>
-    </Midl>
-    <ClCompile>
-      <PrecompiledHeaderOutputFile>.\x64\ReleaseUWP/common.pch</PrecompiledHeaderOutputFile>
-      <AssemblerListingLocation>.\x64\ReleaseUWP/</AssemblerListingLocation>
-      <ObjectFileName>.\x64\ReleaseUWP/</ObjectFileName>
-      <ProgramDataBaseFileName>.\x64\ReleaseUWP/</ProgramDataBaseFileName>
-    </ClCompile>
-    <Link>
-      <OutputFile>..\..\bin64uwp\icuuc63.dll</OutputFile>
-      <ProgramDatabaseFile>.\..\..\lib64uwp\icuuc.pdb</ProgramDatabaseFile>
-      <ImportLibrary>..\..\lib64uwp\icuuc.lib</ImportLibrary>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Midl>
-      <TypeLibraryName>.\..\..\lib64uwp\icuucd.tlb</TypeLibraryName>
-    </Midl>
-    <ClCompile>
-      <PrecompiledHeaderOutputFile>.\x64\DebugUWP/common.pch</PrecompiledHeaderOutputFile>
-      <AssemblerListingLocation>.\x64\DebugUWP/</AssemblerListingLocation>
-      <ObjectFileName>.\x64\DebugUWP/</ObjectFileName>
-      <ProgramDataBaseFileName>.\x64\DebugUWP/</ProgramDataBaseFileName>
-    </ClCompile>
-    <Link>
-      <OutputFile>..\..\bin64uwp\icuuc63d.dll</OutputFile>
-      <ProgramDatabaseFile>.\..\..\lib64uwp\icuucd.pdb</ProgramDatabaseFile>
-      <ImportLibrary>..\..\lib64uwp\icuucd.lib</ImportLibrary>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">
-    <Midl>
-      <TypeLibraryName>.\..\..\libARMuwp\icuuc.tlb</TypeLibraryName>
-    </Midl>
-    <ClCompile>
-      <PrecompiledHeaderOutputFile>.\ARM\ReleaseUWP/common.pch</PrecompiledHeaderOutputFile>
-      <AssemblerListingLocation>.\ARM\ReleaseUWP/</AssemblerListingLocation>
-      <ObjectFileName>.\ARM\ReleaseUWP/</ObjectFileName>
-      <ProgramDataBaseFileName>.\ARM\ReleaseUWP/</ProgramDataBaseFileName>
-    </ClCompile>
-    <Link>
-      <OutputFile>..\..\binARMuwp\icuuc63.dll</OutputFile>
-      <ProgramDatabaseFile>.\..\..\libARMuwp\icuuc.pdb</ProgramDatabaseFile>
-      <ImportLibrary>..\..\libARMuwp\icuuc.lib</ImportLibrary>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">
-    <Midl>
-      <TypeLibraryName>.\..\..\libARMuwp\icuucd.tlb</TypeLibraryName>
-    </Midl>
-    <ClCompile>
-      <PrecompiledHeaderOutputFile>.\ARM\DebugUWP/common.pch</PrecompiledHeaderOutputFile>
-      <AssemblerListingLocation>.\ARM\DebugUWP/</AssemblerListingLocation>
-      <ObjectFileName>.\ARM\DebugUWP/</ObjectFileName>
-      <ProgramDataBaseFileName>.\ARM\DebugUWP/</ProgramDataBaseFileName>
-    </ClCompile>
-    <Link>
-      <OutputFile>..\..\binARMuwp\icuuc63d.dll</OutputFile>
-      <ProgramDatabaseFile>.\..\..\libARMuwp\icuucd.pdb</ProgramDatabaseFile>
-      <ImportLibrary>..\..\libARMuwp\icuucd.lib</ImportLibrary>
+      <TargetMachine>MachineARM64</TargetMachine>
+      <!-- This is so that we can use the existing stubdata icudt.lib and not need a UWP version. -->
+      <AdditionalLibraryDirectories>.\..\..\libARM64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
    </Link>
  </ItemDefinitionGroup>
  <ItemGroup>
@@ -285,6 +220,8 @@
    <ClCompile Include="brkeng.cpp" />
    <ClCompile Include="brkiter.cpp" />
    <ClCompile Include="dictbe.cpp" />
+    <ClCompile Include="lstmbe.cpp" />
+    <ClCompile Include="mlbe.cpp" />
    <ClCompile Include="pluralmap.cpp" />
    <ClCompile Include="rbbi.cpp" />
    <ClCompile Include="rbbidata.cpp" />
@@ -316,9 +253,7 @@
    <ClCompile Include="errorcode.cpp" />
    <ClCompile Include="icudataver.cpp" />
    <ClCompile Include="locmap.cpp" />
-    <ClCompile Include="putil.cpp">
-      <CompileAsWinRT>true</CompileAsWinRT>
-    </ClCompile>
+    <ClCompile Include="putil.cpp" />
    <ClCompile Include="umath.cpp" />
    <ClCompile Include="umutex.cpp" />
    <ClCompile Include="utrace.cpp" />
@@ -364,19 +299,27 @@
    <ClCompile Include="punycode.cpp" />
    <ClCompile Include="uidna.cpp" />
    <ClCompile Include="uts46.cpp" />
+    <ClCompile Include="localebuilder.cpp" />
+    <ClCompile Include="ulocbuilder.cpp" />
+    <ClCompile Include="localematcher.cpp" />
+    <ClCompile Include="localeprioritylist.cpp" />
    <ClCompile Include="locavailable.cpp" />
    <ClCompile Include="locbased.cpp" />
    <ClCompile Include="locdispnames.cpp" />
+    <ClCompile Include="locdistance.cpp" />
    <ClCompile Include="locdspnm.cpp" />
    <ClCompile Include="locid.cpp" />
    <ClCompile Include="loclikely.cpp" />
+    <ClCompile Include="loclikelysubtags.cpp" />
    <ClCompile Include="locresdata.cpp" />
    <ClCompile Include="locutil.cpp" />
+    <ClCompile Include="lsr.cpp" />
    <ClCompile Include="resbund.cpp" />
    <ClCompile Include="resbund_cnv.cpp" />
    <ClCompile Include="ucat.cpp" />
    <ClCompile Include="uloc.cpp" />
    <ClCompile Include="uloc_tag.cpp" />
+    <ClCompile Include="ulocale.cpp" />
    <ClCompile Include="ures_cnv.cpp" />
    <ClCompile Include="uresbund.cpp" />
    <ClCompile Include="uresdata.cpp" />
@@ -397,6 +340,7 @@
    <ClCompile Include="ucase.cpp" />
    <ClCompile Include="uchar.cpp" />
    <ClCompile Include="characterproperties.cpp" />
+    <ClCompile Include="emojiprops.cpp" />
    <ClCompile Include="unames.cpp" />
    <ClCompile Include="unifiedcache.cpp" />
    <ClCompile Include="unifilt.cpp" />
@@ -463,6 +407,8 @@
    <ClCompile Include="utext.cpp" />
    <ClCompile Include="utf_impl.cpp" />
    <ClCompile Include="static_unicode_sets.cpp" />
+    <ClCompile Include="restrace.cpp" />
+    <ClCompile Include="fixedstring.cpp" />
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="localsvc.h" />
@@ -473,6 +419,8 @@
    <ClInclude Include="ubidiimp.h" />
    <ClInclude Include="brkeng.h" />
    <ClInclude Include="dictbe.h" />
+    <ClInclude Include="lstmbe.h" />
+    <ClInclude Include="mlbe.h" />
    <ClInclude Include="rbbidata.h" />
    <ClInclude Include="rbbinode.h" />
    <ClInclude Include="rbbirb.h" />
@@ -532,8 +480,14 @@
    <ClInclude Include="ustrfmt.h" />
    <ClInclude Include="util.h" />
    <ClInclude Include="punycode.h" />
+    <ClInclude Include="localeprioritylist.h" />
    <ClInclude Include="locbased.h" />
+    <ClInclude Include="locdistance.h" />
+    <ClInclude Include="charstrmap.h" />
+    <ClInclude Include="uniquecharstr.h" />
+    <ClInclude Include="loclikelysubtags.h" />
    <ClInclude Include="locutil.h" />
+    <ClInclude Include="lsr.h" />
    <ClInclude Include="sharedobject.h" />
    <ClCompile Include="sharedobject.cpp" />
    <ClInclude Include="ulocimp.h" />
@@ -551,7 +505,9 @@
    <ClInclude Include="patternprops.h" />
    <ClInclude Include="propname.h" />
    <ClInclude Include="ruleiter.h" />
+    <ClInclude Include="emojiprops.h" />
    <ClInclude Include="ucase.h" />
+    <ClInclude Include="ulayout_props.h" />
    <ClInclude Include="unisetspan.h" />
    <ClInclude Include="uprops.h" />
    <ClInclude Include="usc_impl.h" />
@@ -570,11 +526,17 @@
    <ClInclude Include="ustr_cnv.h" />
    <ClInclude Include="ustr_imp.h" />
    <ClInclude Include="static_unicode_sets.h" />
+    <ClInclude Include="capi_helper.h" />
+    <ClInclude Include="restrace.h" />
+    <ClInclude Include="fixedstring.h" />
  </ItemGroup>
  <ItemGroup>
    <ResourceCompile Include="common.rc" />
  </ItemGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <Target Name="Build" Condition="'$(SkipUWP)'=='true'">
+    <Message Text="Skipping building the UWP project: $(MSBuildThisFile)" Importance="high" />
+  </Target>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" Condition="'$(SkipUWP)'!='true'" />
  <ImportGroup Label="ExtensionTargets">
  </ImportGroup>
 </Project>
@@ -62,8 +62,8 @@ inline void uprv_arrayCopy(const int32_t* src, int32_t srcStart,

 static
 inline void
-uprv_arrayCopy(const UChar *src, int32_t srcStart,
-        UChar *dst, int32_t dstStart, int32_t count)
+uprv_arrayCopy(const char16_t *src, int32_t srcStart,
+        char16_t *dst, int32_t dstStart, int32_t count)
 { uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); }

 /**
@@ -21,7 +21,7 @@ U_NAMESPACE_BEGIN
 CStr::CStr(const UnicodeString &in) {
    UErrorCode status = U_ZERO_ERROR;
 #if !UCONFIG_NO_CONVERSION || U_CHARSET_IS_UTF8
-    int32_t length = in.extract(0, in.length(), static_cast<char *>(NULL), static_cast<uint32_t>(0));
+    int32_t length = in.extract(0, in.length(), static_cast<char *>(nullptr), static_cast<uint32_t>(0));
    int32_t resultCapacity = 0;
    char *buf = s.getAppendBuffer(length, length, resultCapacity, status);
    if (U_SUCCESS(status)) {
@@ -28,7 +28,7 @@
 * default code page conversion, which will do the best job possible,
 * but may be lossy, depending on the platform.
 *
- * If no other conversion is available, use invariant conversion and substitue
+ * If no other conversion is available, use invariant conversion and substitute
 * '?' for non-invariant characters.
 *
 * Example Usage:
@@ -43,16 +43,16 @@

 U_NAMESPACE_BEGIN

-class U_COMMON_API CStr : public UMemory {
+class U_COMMON_API_CLASS CStr : public UMemory {
  public:
-    CStr(const UnicodeString &in);
-    ~CStr();
-    const char * operator ()() const;
+    U_COMMON_API CStr(const UnicodeString &in);
+    U_COMMON_API ~CStr();
+    U_COMMON_API const char * operator ()() const;

  private:
    CharString s;
-    CStr(const CStr &other);               //  Forbid copying of this class.
-    CStr &operator =(const CStr &other);   //  Forbid assignment.
+    CStr(const CStr &other) = delete;               //  Forbid copying of this class.
+    CStr &operator =(const CStr &other) = delete;   //  Forbid assignment.
 };

 U_NAMESPACE_END
@@ -126,7 +126,7 @@ T_CString_toLowerCase(char* str)

    if (str) {
        do
-            *str = (char)uprv_tolower(*str);
+            *str = uprv_tolower(*str);
        while (*(str++));
    }

@@ -140,7 +140,7 @@ T_CString_toUpperCase(char* str)

    if (str) {
        do
-            *str = (char)uprv_toupper(*str);
+            *str = uprv_toupper(*str);
        while (*(str++));
    }

@@ -189,7 +189,7 @@ T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
 /*
 * Takes a int64_t and fills in  a char* string with that number "radix"-based.
 * Writes at most 21: chars ("-9223372036854775807" plus NUL).
- * Returns the length of the string, not including the terminating NULL.
+ * Returns the length of the string, not including the terminating NUL.
 */
 U_CAPI int32_t U_EXPORT2
 T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
@@ -233,16 +233,16 @@ T_CString_stringToInteger(const char *integerString, int32_t radix)

 U_CAPI int U_EXPORT2
 uprv_stricmp(const char *str1, const char *str2) {
-    if(str1==NULL) {
-        if(str2==NULL) {
+    if(str1==nullptr) {
+        if(str2==nullptr) {
            return 0;
        } else {
            return -1;
        }
-    } else if(str2==NULL) {
+    } else if(str2==nullptr) {
        return 1;
    } else {
-        /* compare non-NULL strings lexically with lowercase */
+        /* compare non-nullptr strings lexically with lowercase */
        int rc;
        unsigned char c1, c2;

@@ -272,16 +272,16 @@ uprv_stricmp(const char *str1, const char *str2) {

 U_CAPI int U_EXPORT2
 uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {
-    if(str1==NULL) {
-        if(str2==NULL) {
+    if(str1==nullptr) {
+        if(str2==nullptr) {
            return 0;
        } else {
            return -1;
        }
-    } else if(str2==NULL) {
+    } else if(str2==nullptr) {
        return 1;
    } else {
-        /* compare non-NULL strings lexically with lowercase */
+        /* compare non-nullptr strings lexically with lowercase */
        int rc;
        unsigned char c1, c2;

@@ -52,6 +52,8 @@
 U_CAPI UBool U_EXPORT2
 uprv_isASCIILetter(char c);

+// NOTE: For u_asciiToUpper that takes a UChar, see ustr_imp.h
+
 U_CAPI char U_EXPORT2
 uprv_toupper(char c);

@@ -7,6 +7,8 @@
 *******************************************************************************
 */

+#include <utility>
+
 #include "unicode/utypes.h"

 #if !UCONFIG_NO_BREAK_ITERATION
@@ -15,7 +17,11 @@
 #include "dictbe.h"
 #include "unicode/uniset.h"
 #include "unicode/chariter.h"
+#include "unicode/resbund.h"
 #include "unicode/ubrk.h"
+#include "unicode/usetiter.h"
+#include "ubrkimpl.h"
+#include "utracimp.h"
 #include "uvectr32.h"
 #include "uvector.h"
 #include "uassert.h"
@@ -36,7 +42,7 @@ DictionaryBreakEngine::~DictionaryBreakEngine() {
 }

 UBool
-DictionaryBreakEngine::handles(UChar32 c) const {
+DictionaryBreakEngine::handles(UChar32 c, const char*) const {
    return fSet.contains(c);
 }

@@ -44,26 +50,29 @@ int32_t
 DictionaryBreakEngine::findBreaks( UText *text,
                                 int32_t startPos,
                                 int32_t endPos,
-                                 UVector32 &foundBreaks ) const {
-    (void)startPos;            // TODO: remove this param?
+                                 UVector32 &foundBreaks,
+                                 UBool isPhraseBreaking,
+                                 UErrorCode& status) const {
+    if (U_FAILURE(status)) return 0;
    int32_t result = 0;

    // Find the span of characters included in the set.
    //   The span to break begins at the current position in the text, and
    //   extends towards the start or end of the text, depending on 'reverse'.

-    int32_t start = (int32_t)utext_getNativeIndex(text);
+    utext_setNativeIndex(text, startPos);
+    int32_t start = static_cast<int32_t>(utext_getNativeIndex(text));
    int32_t current;
    int32_t rangeStart;
    int32_t rangeEnd;
    UChar32 c = utext_current32(text);
-    while((current = (int32_t)utext_getNativeIndex(text)) < endPos && fSet.contains(c)) {
+    while ((current = static_cast<int32_t>(utext_getNativeIndex(text))) < endPos && fSet.contains(c)) {
        utext_next32(text);         // TODO:  recast loop for postincrement
        c = utext_current32(text);
    }
    rangeStart = start;
    rangeEnd = current;
-    result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
+    result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks, isPhraseBreaking, status);
    utext_setNativeIndex(text, current);
    
    return result;
@@ -101,8 +110,8 @@ private:
    int32_t   cpLengths[POSSIBLE_WORD_LIST_MAX];   // Word Lengths, in code points.

 public:
-    PossibleWord() : count(0), prefix(0), offset(-1), mark(0), current(0) {};
-    ~PossibleWord() {};
+    PossibleWord() : count(0), prefix(0), offset(-1), mark(0), current(0) {}
+    ~PossibleWord() {}
  
    // Fill the list of candidates if needed, select the longest, and return the number found
    int32_t   candidates( UText *text, DictionaryMatcher *dict, int32_t rangeEnd );
@@ -110,28 +119,28 @@ public:
    // Select the currently marked candidate, point after it in the text, and invalidate self
    int32_t   acceptMarked( UText *text );
  
-    // Back up from the current candidate to the next shorter one; return TRUE if that exists
+    // Back up from the current candidate to the next shorter one; return true if that exists
    // and point the text after it
    UBool     backUp( UText *text );
  
    // Return the longest prefix this candidate location shares with a dictionary word
    // Return value is in code points.
-    int32_t   longestPrefix() { return prefix; };
+    int32_t   longestPrefix() { return prefix; }
  
    // Mark the current candidate as the one we like
-    void      markCurrent() { mark = current; };
+    void      markCurrent() { mark = current; }
    
    // Get length in code points of the marked word.
-    int32_t   markedCPLength() { return cpLengths[mark]; };
+    int32_t   markedCPLength() { return cpLengths[mark]; }
 };


 int32_t PossibleWord::candidates( UText *text, DictionaryMatcher *dict, int32_t rangeEnd ) {
    // TODO: If getIndex is too slow, use offset < 0 and add discardAll()
-    int32_t start = (int32_t)utext_getNativeIndex(text);
+    int32_t start = static_cast<int32_t>(utext_getNativeIndex(text));
    if (start != offset) {
        offset = start;
-        count = dict->matches(text, rangeEnd-start, UPRV_LENGTHOF(cuLengths), cuLengths, cpLengths, NULL, &prefix);
+        count = dict->matches(text, rangeEnd-start, UPRV_LENGTHOF(cuLengths), cuLengths, cpLengths, nullptr, &prefix);
        // Dictionary leaves text after longest prefix, not longest word. Back up.
        if (count <= 0) {
            utext_setNativeIndex(text, start);
@@ -156,9 +165,9 @@ UBool
 PossibleWord::backUp( UText *text ) {
    if (current > 0) {
        utext_setNativeIndex(text, offset + cuLengths[--current]);
-        return TRUE;
+        return true;
    }
-    return FALSE;
+    return false;
 }

 /*
@@ -176,7 +185,7 @@ static const int32_t THAI_ROOT_COMBINE_THRESHOLD = 3;
 // dictionary word, with a preceding word
 static const int32_t THAI_PREFIX_COMBINE_THRESHOLD = 3;

-// Ellision character
+// Elision character
 static const int32_t THAI_PAIYANNOI = 0x0E2F;

 // Repeat character
@@ -192,13 +201,15 @@ ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode
    : DictionaryBreakEngine(),
      fDictionary(adoptDictionary)
 {
-    fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status);
+    UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
+    UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Thai");
+    UnicodeSet thaiWordSet(UnicodeString(u"[[:Thai:]&[:LineBreak=SA:]]"), status);
    if (U_SUCCESS(status)) {
-        setCharacters(fThaiWordSet);
+        setCharacters(thaiWordSet);
    }
-    fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]&[:M:]]"), status);
+    fMarkSet.applyPattern(UnicodeString(u"[[:Thai:]&[:LineBreak=SA:]&[:M:]]"), status);
    fMarkSet.add(0x0020);
-    fEndWordSet = fThaiWordSet;
+    fEndWordSet = thaiWordSet;
    fEndWordSet.remove(0x0E31);             // MAI HAN-AKAT
    fEndWordSet.remove(0x0E40, 0x0E44);     // SARA E through SARA AI MAIMALAI
    fBeginWordSet.add(0x0E01, 0x0E2E);      // KO KAI through HO NOKHUK
@@ -211,6 +222,7 @@ ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode
    fEndWordSet.compact();
    fBeginWordSet.compact();
    fSuffixSet.compact();
+    UTRACE_EXIT_STATUS(status);
 }

 ThaiBreakEngine::~ThaiBreakEngine() {
@@ -221,7 +233,10 @@ int32_t
 ThaiBreakEngine::divideUpDictionaryRange( UText *text,
                                                int32_t rangeStart,
                                                int32_t rangeEnd,
-                                                UVector32 &foundBreaks ) const {
+                                                UVector32 &foundBreaks,
+                                                UBool /* isPhraseBreaking */,
+                                                UErrorCode& status) const {
+    if (U_FAILURE(status)) return 0;
    utext_setNativeIndex(text, rangeStart);
    utext_moveIndex32(text, THAI_MIN_WORD_SPAN);
    if (utext_getNativeIndex(text) >= rangeEnd) {
@@ -234,12 +249,11 @@ ThaiBreakEngine::divideUpDictionaryRange( UText *text,
    int32_t cpWordLength = 0;    // Word Length in Code Points.
    int32_t cuWordLength = 0;    // Word length in code units (UText native indexing)
    int32_t current;
-    UErrorCode status = U_ZERO_ERROR;
    PossibleWord words[THAI_LOOKAHEAD];
    
    utext_setNativeIndex(text, rangeStart);
    
-    while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
+    while (U_SUCCESS(status) && (current = static_cast<int32_t>(utext_getNativeIndex(text))) < rangeEnd) {
        cpWordLength = 0;
        cuWordLength = 0;

@@ -255,20 +269,16 @@ ThaiBreakEngine::divideUpDictionaryRange( UText *text,
        // If there was more than one, see which one can take us forward the most words
        else if (candidates > 1) {
            // If we're already at the end of the range, we're done
-            if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
+            if (static_cast<int32_t>(utext_getNativeIndex(text)) >= rangeEnd) {
                goto foundBest;
            }
            do {
-                int32_t wordsMatched = 1;
                if (words[(wordsFound + 1) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
-                    if (wordsMatched < 2) {
-                        // Followed by another dictionary word; mark first word as a good candidate
-                        words[wordsFound%THAI_LOOKAHEAD].markCurrent();
-                        wordsMatched = 2;
-                    }
+                    // Followed by another dictionary word; mark first word as a good candidate
+                    words[wordsFound%THAI_LOOKAHEAD].markCurrent();
                    
                    // If we're already at the end of the range, we're done
-                    if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
+                    if (static_cast<int32_t>(utext_getNativeIndex(text)) >= rangeEnd) {
                        goto foundBest;
                    }
                    
@@ -298,7 +308,7 @@ foundBest:
        // The text iterator should now be positioned at the end of the word we found.
        
        UChar32 uc = 0;
-        if ((int32_t)utext_getNativeIndex(text) < rangeEnd &&  cpWordLength < THAI_ROOT_COMBINE_THRESHOLD) {
+        if (static_cast<int32_t>(utext_getNativeIndex(text)) < rangeEnd && cpWordLength < THAI_ROOT_COMBINE_THRESHOLD) {
            // if it is a dictionary word, do nothing. If it isn't, then if there is
            // no preceding word, or the non-word shares less than the minimum threshold
            // of characters with a dictionary word, then scan to resynchronize
@@ -310,9 +320,9 @@ foundBest:
                UChar32 pc;
                int32_t chars = 0;
                for (;;) {
-                    int32_t pcIndex = (int32_t)utext_getNativeIndex(text);
+                    int32_t pcIndex = static_cast<int32_t>(utext_getNativeIndex(text));
                    pc = utext_next32(text);
-                    int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex;
+                    int32_t pcSize = static_cast<int32_t>(utext_getNativeIndex(text)) - pcIndex;
                    chars += pcSize;
                    remaining -= pcSize;
                    if (remaining <= 0) {
@@ -346,28 +356,28 @@ foundBest:
                utext_setNativeIndex(text, current+cuWordLength);
            }
        }
-        
+
        // Never stop before a combining mark.
        int32_t currPos;
-        while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
+        while ((currPos = static_cast<int32_t>(utext_getNativeIndex(text))) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
            utext_next32(text);
-            cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
+            cuWordLength += static_cast<int32_t>(utext_getNativeIndex(text)) - currPos;
        }
-        
+
        // Look ahead for possible suffixes if a dictionary word does not follow.
        // We do this in code rather than using a rule so that the heuristic
        // resynch continues to function. For example, one of the suffix characters
        // could be a typo in the middle of a word.
-        if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cuWordLength > 0) {
+        if (static_cast<int32_t>(utext_getNativeIndex(text)) < rangeEnd && cuWordLength > 0) {
            if (words[wordsFound%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
                && fSuffixSet.contains(uc = utext_current32(text))) {
                if (uc == THAI_PAIYANNOI) {
                    if (!fSuffixSet.contains(utext_previous32(text))) {
                        // Skip over previous end and PAIYANNOI
                        utext_next32(text);
-                        int32_t paiyannoiIndex = (int32_t)utext_getNativeIndex(text);
+                        int32_t paiyannoiIndex = static_cast<int32_t>(utext_getNativeIndex(text));
                        utext_next32(text);
-                        cuWordLength += (int32_t)utext_getNativeIndex(text) - paiyannoiIndex;    // Add PAIYANNOI to word
+                        cuWordLength += static_cast<int32_t>(utext_getNativeIndex(text)) - paiyannoiIndex; // Add PAIYANNOI to word
                        uc = utext_current32(text);     // Fetch next character
                    }
                    else {
@@ -379,9 +389,9 @@ foundBest:
                    if (utext_previous32(text) != THAI_MAIYAMOK) {
                        // Skip over previous end and MAIYAMOK
                        utext_next32(text);
-                        int32_t maiyamokIndex = (int32_t)utext_getNativeIndex(text);
+                        int32_t maiyamokIndex = static_cast<int32_t>(utext_getNativeIndex(text));
                        utext_next32(text);
-                        cuWordLength += (int32_t)utext_getNativeIndex(text) - maiyamokIndex;    // Add MAIYAMOK to word
+                        cuWordLength += static_cast<int32_t>(utext_getNativeIndex(text)) - maiyamokIndex; // Add MAIYAMOK to word
                    }
                    else {
                        // Restore prior position
@@ -434,13 +444,15 @@ LaoBreakEngine::LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &s
    : DictionaryBreakEngine(),
      fDictionary(adoptDictionary)
 {
-    fLaoWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]]"), status);
+    UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
+    UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Laoo");
+    UnicodeSet laoWordSet(UnicodeString(u"[[:Laoo:]&[:LineBreak=SA:]]"), status);
    if (U_SUCCESS(status)) {
-        setCharacters(fLaoWordSet);
+        setCharacters(laoWordSet);
    }
-    fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]&[:M:]]"), status);
+    fMarkSet.applyPattern(UnicodeString(u"[[:Laoo:]&[:LineBreak=SA:]&[:M:]]"), status);
    fMarkSet.add(0x0020);
-    fEndWordSet = fLaoWordSet;
+    fEndWordSet = laoWordSet;
    fEndWordSet.remove(0x0EC0, 0x0EC4);     // prefix vowels
    fBeginWordSet.add(0x0E81, 0x0EAE);      // basic consonants (including holes for corresponding Thai characters)
    fBeginWordSet.add(0x0EDC, 0x0EDD);      // digraph consonants (no Thai equivalent)
@@ -450,6 +462,7 @@ LaoBreakEngine::LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &s
    fMarkSet.compact();
    fEndWordSet.compact();
    fBeginWordSet.compact();
+    UTRACE_EXIT_STATUS(status);
 }

 LaoBreakEngine::~LaoBreakEngine() {
@@ -460,7 +473,10 @@ int32_t
 LaoBreakEngine::divideUpDictionaryRange( UText *text,
                                                int32_t rangeStart,
                                                int32_t rangeEnd,
-                                                UVector32 &foundBreaks ) const {
+                                                UVector32 &foundBreaks,
+                                                UBool /* isPhraseBreaking */,
+                                                UErrorCode& status) const {
+    if (U_FAILURE(status)) return 0;
    if ((rangeEnd - rangeStart) < LAO_MIN_WORD_SPAN) {
        return 0;       // Not enough characters for two words
    }
@@ -469,12 +485,11 @@ LaoBreakEngine::divideUpDictionaryRange( UText *text,
    int32_t cpWordLength = 0;
    int32_t cuWordLength = 0;
    int32_t current;
-    UErrorCode status = U_ZERO_ERROR;
    PossibleWord words[LAO_LOOKAHEAD];
-    
+
    utext_setNativeIndex(text, rangeStart);
-    
-    while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
+
+    while (U_SUCCESS(status) && (current = static_cast<int32_t>(utext_getNativeIndex(text))) < rangeEnd) {
        cuWordLength = 0;
        cpWordLength = 0;

@@ -494,16 +509,12 @@ LaoBreakEngine::divideUpDictionaryRange( UText *text,
                goto foundBest;
            }
            do {
-                int32_t wordsMatched = 1;
                if (words[(wordsFound + 1) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
-                    if (wordsMatched < 2) {
-                        // Followed by another dictionary word; mark first word as a good candidate
-                        words[wordsFound%LAO_LOOKAHEAD].markCurrent();
-                        wordsMatched = 2;
-                    }
+                    // Followed by another dictionary word; mark first word as a good candidate
+                    words[wordsFound%LAO_LOOKAHEAD].markCurrent();
                    
                    // If we're already at the end of the range, we're done
-                    if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
+                    if (static_cast<int32_t>(utext_getNativeIndex(text)) >= rangeEnd) {
                        goto foundBest;
                    }
                    
@@ -526,11 +537,11 @@ foundBest:
        }
        
        // We come here after having either found a word or not. We look ahead to the
-        // next word. If it's not a dictionary word, we will combine it withe the word we
+        // next word. If it's not a dictionary word, we will combine it with the word we
        // just found (if there is one), but only if the preceding word does not exceed
        // the threshold.
        // The text iterator should now be positioned at the end of the word we found.
-        if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < LAO_ROOT_COMBINE_THRESHOLD) {
+        if (static_cast<int32_t>(utext_getNativeIndex(text)) < rangeEnd && cpWordLength < LAO_ROOT_COMBINE_THRESHOLD) {
            // if it is a dictionary word, do nothing. If it isn't, then if there is
            // no preceding word, or the non-word shares less than the minimum threshold
            // of characters with a dictionary word, then scan to resynchronize
@@ -543,9 +554,9 @@ foundBest:
                UChar32 uc;
                int32_t chars = 0;
                for (;;) {
-                    int32_t pcIndex = (int32_t)utext_getNativeIndex(text);
+                    int32_t pcIndex = static_cast<int32_t>(utext_getNativeIndex(text));
                    pc = utext_next32(text);
-                    int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex;
+                    int32_t pcSize = static_cast<int32_t>(utext_getNativeIndex(text)) - pcIndex;
                    chars += pcSize;
                    remaining -= pcSize;
                    if (remaining <= 0) {
@@ -579,9 +590,9 @@ foundBest:
        
        // Never stop before a combining mark.
        int32_t currPos;
-        while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
+        while ((currPos = static_cast<int32_t>(utext_getNativeIndex(text))) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
            utext_next32(text);
-            cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
+            cuWordLength += static_cast<int32_t>(utext_getNativeIndex(text)) - currPos;
        }
        
        // Look ahead for possible suffixes if a dictionary word does not follow.
@@ -630,19 +641,21 @@ BurmeseBreakEngine::BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErro
    : DictionaryBreakEngine(),
      fDictionary(adoptDictionary)
 {
-    fBurmeseWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]]"), status);
-    if (U_SUCCESS(status)) {
-        setCharacters(fBurmeseWordSet);
-    }
-    fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]&[:M:]]"), status);
-    fMarkSet.add(0x0020);
-    fEndWordSet = fBurmeseWordSet;
+    UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
+    UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Mymr");
    fBeginWordSet.add(0x1000, 0x102A);      // basic consonants and independent vowels
+    fEndWordSet.applyPattern(UnicodeString(u"[[:Mymr:]&[:LineBreak=SA:]]"), status);
+    fMarkSet.applyPattern(UnicodeString(u"[[:Mymr:]&[:LineBreak=SA:]&[:M:]]"), status);
+    fMarkSet.add(0x0020);
+    if (U_SUCCESS(status)) {
+        setCharacters(fEndWordSet);
+    }

    // Compact for caching.
    fMarkSet.compact();
    fEndWordSet.compact();
    fBeginWordSet.compact();
+    UTRACE_EXIT_STATUS(status);
 }

 BurmeseBreakEngine::~BurmeseBreakEngine() {
@@ -653,7 +666,10 @@ int32_t
 BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
                                                int32_t rangeStart,
                                                int32_t rangeEnd,
-                                                UVector32 &foundBreaks ) const {
+                                                UVector32 &foundBreaks,
+                                                UBool /* isPhraseBreaking */,
+                                                UErrorCode& status ) const {
+    if (U_FAILURE(status)) return 0;
    if ((rangeEnd - rangeStart) < BURMESE_MIN_WORD_SPAN) {
        return 0;       // Not enough characters for two words
    }
@@ -662,12 +678,11 @@ BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
    int32_t cpWordLength = 0;
    int32_t cuWordLength = 0;
    int32_t current;
-    UErrorCode status = U_ZERO_ERROR;
    PossibleWord words[BURMESE_LOOKAHEAD];
-    
+
    utext_setNativeIndex(text, rangeStart);
-    
-    while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
+
+    while (U_SUCCESS(status) && (current = static_cast<int32_t>(utext_getNativeIndex(text))) < rangeEnd) {
        cuWordLength = 0;
        cpWordLength = 0;

@@ -687,16 +702,12 @@ BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
                goto foundBest;
            }
            do {
-                int32_t wordsMatched = 1;
                if (words[(wordsFound + 1) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
-                    if (wordsMatched < 2) {
-                        // Followed by another dictionary word; mark first word as a good candidate
-                        words[wordsFound%BURMESE_LOOKAHEAD].markCurrent();
-                        wordsMatched = 2;
-                    }
+                    // Followed by another dictionary word; mark first word as a good candidate
+                    words[wordsFound%BURMESE_LOOKAHEAD].markCurrent();
                    
                    // If we're already at the end of the range, we're done
-                    if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
+                    if (static_cast<int32_t>(utext_getNativeIndex(text)) >= rangeEnd) {
                        goto foundBest;
                    }
                    
@@ -719,11 +730,11 @@ foundBest:
        }
        
        // We come here after having either found a word or not. We look ahead to the
-        // next word. If it's not a dictionary word, we will combine it withe the word we
+        // next word. If it's not a dictionary word, we will combine it with the word we
        // just found (if there is one), but only if the preceding word does not exceed
        // the threshold.
        // The text iterator should now be positioned at the end of the word we found.
-        if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < BURMESE_ROOT_COMBINE_THRESHOLD) {
+        if (static_cast<int32_t>(utext_getNativeIndex(text)) < rangeEnd && cpWordLength < BURMESE_ROOT_COMBINE_THRESHOLD) {
            // if it is a dictionary word, do nothing. If it isn't, then if there is
            // no preceding word, or the non-word shares less than the minimum threshold
            // of characters with a dictionary word, then scan to resynchronize
@@ -736,9 +747,9 @@ foundBest:
                UChar32 uc;
                int32_t chars = 0;
                for (;;) {
-                    int32_t pcIndex = (int32_t)utext_getNativeIndex(text);
+                    int32_t pcIndex = static_cast<int32_t>(utext_getNativeIndex(text));
                    pc = utext_next32(text);
-                    int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex;
+                    int32_t pcSize = static_cast<int32_t>(utext_getNativeIndex(text)) - pcIndex;
                    chars += pcSize;
                    remaining -= pcSize;
                    if (remaining <= 0) {
@@ -772,9 +783,9 @@ foundBest:
        
        // Never stop before a combining mark.
        int32_t currPos;
-        while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
+        while ((currPos = static_cast<int32_t>(utext_getNativeIndex(text))) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
            utext_next32(text);
-            cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
+            cuWordLength += static_cast<int32_t>(utext_getNativeIndex(text)) - currPos;
        }
        
        // Look ahead for possible suffixes if a dictionary word does not follow.
@@ -823,13 +834,15 @@ KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCod
    : DictionaryBreakEngine(),
      fDictionary(adoptDictionary)
 {
-    fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status);
+    UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
+    UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Khmr");
+    UnicodeSet khmerWordSet(UnicodeString(u"[[:Khmr:]&[:LineBreak=SA:]]"), status);
    if (U_SUCCESS(status)) {
-        setCharacters(fKhmerWordSet);
+        setCharacters(khmerWordSet);
    }
-    fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]&[:M:]]"), status);
+    fMarkSet.applyPattern(UnicodeString(u"[[:Khmr:]&[:LineBreak=SA:]&[:M:]]"), status);
    fMarkSet.add(0x0020);
-    fEndWordSet = fKhmerWordSet;
+    fEndWordSet = khmerWordSet;
    fBeginWordSet.add(0x1780, 0x17B3);
    //fBeginWordSet.add(0x17A3, 0x17A4);      // deprecated vowels
    //fEndWordSet.remove(0x17A5, 0x17A9);     // Khmer independent vowels that can't end a word
@@ -848,6 +861,7 @@ KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCod
    fEndWordSet.compact();
    fBeginWordSet.compact();
 //    fSuffixSet.compact();
+    UTRACE_EXIT_STATUS(status);
 }

 KhmerBreakEngine::~KhmerBreakEngine() {
@@ -858,7 +872,10 @@ int32_t
 KhmerBreakEngine::divideUpDictionaryRange( UText *text,
                                                int32_t rangeStart,
                                                int32_t rangeEnd,
-                                                UVector32 &foundBreaks ) const {
+                                                UVector32 &foundBreaks,
+                                                UBool /* isPhraseBreaking */,
+                                                UErrorCode& status ) const {
+    if (U_FAILURE(status)) return 0;
    if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) {
        return 0;       // Not enough characters for two words
    }
@@ -867,12 +884,11 @@ KhmerBreakEngine::divideUpDictionaryRange( UText *text,
    int32_t cpWordLength = 0;
    int32_t cuWordLength = 0;
    int32_t current;
-    UErrorCode status = U_ZERO_ERROR;
    PossibleWord words[KHMER_LOOKAHEAD];

    utext_setNativeIndex(text, rangeStart);

-    while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
+    while (U_SUCCESS(status) && (current = static_cast<int32_t>(utext_getNativeIndex(text))) < rangeEnd) {
        cuWordLength = 0;
        cpWordLength = 0;

@@ -889,20 +905,16 @@ KhmerBreakEngine::divideUpDictionaryRange( UText *text,
        // If there was more than one, see which one can take us forward the most words
        else if (candidates > 1) {
            // If we're already at the end of the range, we're done
-            if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
+            if (static_cast<int32_t>(utext_getNativeIndex(text)) >= rangeEnd) {
                goto foundBest;
            }
            do {
-                int32_t wordsMatched = 1;
                if (words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
-                    if (wordsMatched < 2) {
-                        // Followed by another dictionary word; mark first word as a good candidate
-                        words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
-                        wordsMatched = 2;
-                    }
+                    // Followed by another dictionary word; mark first word as a good candidate
+                    words[wordsFound % KHMER_LOOKAHEAD].markCurrent();

                    // If we're already at the end of the range, we're done
-                    if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
+                    if (static_cast<int32_t>(utext_getNativeIndex(text)) >= rangeEnd) {
                        goto foundBest;
                    }

@@ -929,7 +941,7 @@ foundBest:
        // just found (if there is one), but only if the preceding word does not exceed
        // the threshold.
        // The text iterator should now be positioned at the end of the word we found.
-        if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < KHMER_ROOT_COMBINE_THRESHOLD) {
+        if (static_cast<int32_t>(utext_getNativeIndex(text)) < rangeEnd && cpWordLength < KHMER_ROOT_COMBINE_THRESHOLD) {
            // if it is a dictionary word, do nothing. If it isn't, then if there is
            // no preceding word, or the non-word shares less than the minimum threshold
            // of characters with a dictionary word, then scan to resynchronize
@@ -942,9 +954,9 @@ foundBest:
                UChar32 uc;
                int32_t chars = 0;
                for (;;) {
-                    int32_t pcIndex = (int32_t)utext_getNativeIndex(text);
+                    int32_t pcIndex = static_cast<int32_t>(utext_getNativeIndex(text));
                    pc = utext_next32(text);
-                    int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex;
+                    int32_t pcSize = static_cast<int32_t>(utext_getNativeIndex(text)) - pcIndex;
                    chars += pcSize;
                    remaining -= pcSize;
                    if (remaining <= 0) {
@@ -977,9 +989,9 @@ foundBest:

        // Never stop before a combining mark.
        int32_t currPos;
-        while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
+        while ((currPos = static_cast<int32_t>(utext_getNativeIndex(text))) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
            utext_next32(text);
-            cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
+            cuWordLength += static_cast<int32_t>(utext_getNativeIndex(text)) - currPos;
        }

        // Look ahead for possible suffixes if a dictionary word does not follow.
@@ -1042,32 +1054,48 @@ foundBest:
 */
 static const uint32_t kuint32max = 0xFFFFFFFF;
 CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status)
-: DictionaryBreakEngine(), fDictionary(adoptDictionary) {
-    // Korean dictionary only includes Hangul syllables
-    fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status);
-    fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status);
-    fKatakanaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Katakana:]\\uff9e\\uff9f]"), status);
-    fHiraganaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Hiragana:]"), status);
+: DictionaryBreakEngine(), fDictionary(adoptDictionary), isCj(false) {
+    UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
+    UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Hani");
+    fMlBreakEngine = nullptr;
    nfkcNorm2 = Normalizer2::getNFKCInstance(status);
+    // Korean dictionary only includes Hangul syllables
+    fHangulWordSet.applyPattern(UnicodeString(u"[\\uac00-\\ud7a3]"), status);
+    fHangulWordSet.compact();
+    // Digits, open puncutation and Alphabetic characters.
+    fDigitOrOpenPunctuationOrAlphabetSet.applyPattern(
+        UnicodeString(u"[[:Nd:][:Pi:][:Ps:][:Alphabetic:]]"), status);
+    fDigitOrOpenPunctuationOrAlphabetSet.compact();
+    fClosePunctuationSet.applyPattern(UnicodeString(u"[[:Pc:][:Pd:][:Pe:][:Pf:][:Po:]]"), status);
+    fClosePunctuationSet.compact();

-    if (U_SUCCESS(status)) {
-        // handle Korean and Japanese/Chinese using different dictionaries
-        if (type == kKorean) {
+    // handle Korean and Japanese/Chinese using different dictionaries
+    if (type == kKorean) {
+        if (U_SUCCESS(status)) {
            setCharacters(fHangulWordSet);
-        } else { //Chinese and Japanese
-            UnicodeSet cjSet;
-            cjSet.addAll(fHanWordSet);
-            cjSet.addAll(fKatakanaWordSet);
-            cjSet.addAll(fHiraganaWordSet);
-            cjSet.add(0xFF70); // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
-            cjSet.add(0x30FC); // KATAKANA-HIRAGANA PROLONGED SOUND MARK
+        }
+    } else { // Chinese and Japanese
+        UnicodeSet cjSet(UnicodeString(u"[[:Han:][:Hiragana:][:Katakana:]\\u30fc\\uff70\\uff9e\\uff9f]"), status);
+        isCj = true;
+        if (U_SUCCESS(status)) {
            setCharacters(cjSet);
+#if UCONFIG_USE_ML_PHRASE_BREAKING
+            fMlBreakEngine = new MlBreakEngine(fDigitOrOpenPunctuationOrAlphabetSet,
+                                               fClosePunctuationSet, status);
+            if (fMlBreakEngine == nullptr) {
+                status = U_MEMORY_ALLOCATION_ERROR;
+            }
+#else
+            initJapanesePhraseParameter(status);
+#endif
        }
    }
+    UTRACE_EXIT_STATUS(status);
 }

 CjkBreakEngine::~CjkBreakEngine(){
    delete fDictionary;
+    delete fMlBreakEngine;
 }

 // The katakanaCost values below are based on the length frequencies of all
@@ -1088,14 +1116,12 @@ static inline bool isKatakana(UChar32 value) {
            (value >= 0xFF66 && value <= 0xFF9f);
 }

-
 // Function for accessing internal utext flags.
 //   Replicates an internal UText function.

 static inline int32_t utext_i32_flag(int32_t bitIndex) {
-    return (int32_t)1 << bitIndex;
+    return static_cast<int32_t>(1) << bitIndex;
 }
-
       
 /*
 * @param text A UText representing the text
@@ -1108,7 +1134,10 @@ int32_t
 CjkBreakEngine::divideUpDictionaryRange( UText *inText,
        int32_t rangeStart,
        int32_t rangeEnd,
-        UVector32 &foundBreaks ) const {
+        UVector32 &foundBreaks,
+        UBool isPhraseBreaking,
+        UErrorCode& status) const {
+    if (U_FAILURE(status)) return 0;
    if (rangeStart >= rangeEnd) {
        return 0;
    }
@@ -1117,12 +1146,9 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
    UnicodeString inString;

    // inputMap[inStringIndex] = corresponding native index from UText inText.
-    // If NULL then mapping is 1:1
+    // If nullptr then mapping is 1:1
    LocalPointer<UVector32>     inputMap;

-    UErrorCode     status      = U_ZERO_ERROR;
-
-
    // if UText has the input string as one contiguous UTF-16 chunk
    if ((inText->providerProperties & utext_i32_flag(UTEXT_PROVIDER_STABLE_CHUNKS)) &&
         inText->chunkNativeStart <= rangeStart &&
@@ -1131,7 +1157,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,

        // Input UText is in one contiguous UTF-16 chunk.
        // Use Read-only aliasing UnicodeString.
-        inString.setTo(FALSE,
+        inString.setTo(false,
                       inText->chunkContents + rangeStart - inText->chunkNativeStart,
                       rangeEnd - rangeStart);
    } else {
@@ -1141,14 +1167,14 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
        int32_t limit = rangeEnd;
        U_ASSERT(limit <= utext_nativeLength(inText));
        if (limit > utext_nativeLength(inText)) {
-            limit = (int32_t)utext_nativeLength(inText);
+            limit = static_cast<int32_t>(utext_nativeLength(inText));
        }
        inputMap.adoptInsteadAndCheckErrorCode(new UVector32(status), status);
        if (U_FAILURE(status)) {
            return 0;
        }
        while (utext_getNativeIndex(inText) < limit) {
-            int32_t nativePosition = (int32_t)utext_getNativeIndex(inText);
+            int32_t nativePosition = static_cast<int32_t>(utext_getNativeIndex(inText));
            UChar32 c = utext_next32(inText);
            U_ASSERT(c != U_SENTINEL);
            inString.append(c);
@@ -1204,8 +1230,8 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
                inputMap->elementAti(inString.length()) : inString.length()+rangeStart;
        normalizedMap->addElement(nativeEnd, status);

-        inputMap.moveFrom(normalizedMap);
-        inString.moveFrom(normalizedInput);
+        inputMap = std::move(normalizedMap);
+        inString = std::move(normalizedInput);
    }

    int32_t numCodePts = inString.countChar32();
@@ -1236,7 +1262,15 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
            }
        }
    }
-                
+
+#if UCONFIG_USE_ML_PHRASE_BREAKING
+    // PhraseBreaking is supported in ja and ko; MlBreakEngine only supports ja.
+    if (isPhraseBreaking && isCj) {
+        return fMlBreakEngine->divideUpRange(inText, rangeStart, rangeEnd, foundBreaks, inString,
+                                             inputMap, status);
+    }
+#endif
+
    // bestSnlp[i] is the snlp of the best segmentation of the first i
    // code points in the range to be matched.
    UVector32 bestSnlp(numCodePts + 1, status);
@@ -1270,16 +1304,16 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
    int32_t ix = 0;
    bool is_prev_katakana = false;
    for (int32_t i = 0;  i < numCodePts;  ++i, ix = inString.moveIndex32(ix, 1)) {
-        if ((uint32_t)bestSnlp.elementAti(i) == kuint32max) {
+        if (static_cast<uint32_t>(bestSnlp.elementAti(i)) == kuint32max) {
            continue;
        }

        int32_t count;
        utext_setNativeIndex(&fu, ix);
        count = fDictionary->matches(&fu, maxWordSize, numCodePts,
-                             NULL, lengths.getBuffer(), values.getBuffer(), NULL);
+                             nullptr, lengths.getBuffer(), values.getBuffer(), nullptr);
                             // Note: lengths is filled with code point lengths
-                             //       The NULL parameter is the ignored code unit lengths.
+                             //       The nullptr parameter is the ignored code unit lengths.

        // if there are no single character matches found in the dictionary 
        // starting with this character, treat character as a 1-character word 
@@ -1293,9 +1327,9 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
        }

        for (int32_t j = 0; j < count; j++) {
-            uint32_t newSnlp = (uint32_t)bestSnlp.elementAti(i) + (uint32_t)values.elementAti(j);
+            uint32_t newSnlp = static_cast<uint32_t>(bestSnlp.elementAti(i)) + static_cast<uint32_t>(values.elementAti(j));
            int32_t ln_j_i = lengths.elementAti(j) + i;
-            if (newSnlp < (uint32_t)bestSnlp.elementAti(ln_j_i)) {
+            if (newSnlp < static_cast<uint32_t>(bestSnlp.elementAti(ln_j_i))) {
                bestSnlp.setElementAt(newSnlp, ln_j_i);
                prev.setElementAt(i, ln_j_i);
            }
@@ -1319,7 +1353,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
            }
            if (katakanaRunLength < kMaxKatakanaGroupLength) {
                uint32_t newSnlp = bestSnlp.elementAti(i) + getKatakanaCost(katakanaRunLength);
-                if (newSnlp < (uint32_t)bestSnlp.elementAti(i+katakanaRunLength)) {
+                if (newSnlp < static_cast<uint32_t>(bestSnlp.elementAti(i + katakanaRunLength))) {
                    bestSnlp.setElementAt(newSnlp, i+katakanaRunLength);
                    prev.setElementAt(i, i+katakanaRunLength);  // prev[j] = i;
                }
@@ -1337,9 +1371,34 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,

    int32_t numBreaks = 0;
    // No segmentation found, set boundary to end of range
-    if ((uint32_t)bestSnlp.elementAti(numCodePts) == kuint32max) {
+    if (static_cast<uint32_t>(bestSnlp.elementAti(numCodePts)) == kuint32max) {
        t_boundary.addElement(numCodePts, status);
        numBreaks++;
+    } else if (isPhraseBreaking) {
+        t_boundary.addElement(numCodePts, status);
+        if(U_SUCCESS(status)) {
+            numBreaks++;
+            int32_t prevIdx = numCodePts;
+
+            int32_t codeUnitIdx = -1;
+            int32_t prevCodeUnitIdx = -1;
+            int32_t length = -1;
+            for (int32_t i = prev.elementAti(numCodePts); i > 0; i = prev.elementAti(i)) {
+                codeUnitIdx = inString.moveIndex32(0, i);
+                prevCodeUnitIdx = inString.moveIndex32(0, prevIdx);
+                // Calculate the length by using the code unit.
+                length = prevCodeUnitIdx - codeUnitIdx;
+                prevIdx = i;
+                // Keep the breakpoint if the pattern is not in the fSkipSet and continuous Katakana
+                // characters don't occur.
+                if (!fSkipSet.containsKey(inString.tempSubString(codeUnitIdx, length))
+                    && (!isKatakana(inString.char32At(inString.moveIndex32(codeUnitIdx, -1)))
+                           || !isKatakana(inString.char32At(codeUnitIdx)))) {
+                    t_boundary.addElement(i, status);
+                    numBreaks++;
+                }
+            }
+        }
    } else {
        for (int32_t i = numCodePts; i > 0; i = prev.elementAti(i)) {
            t_boundary.addElement(i, status);
@@ -1360,7 +1419,8 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
    // while reversing t_boundary and pushing values to foundBreaks.
    int32_t prevCPPos = -1;
    int32_t prevUTextPos = -1;
-    for (int32_t i = numBreaks-1; i >= 0; i--) {
+    int32_t correctedNumBreaks = 0;
+    for (int32_t i = numBreaks - 1; i >= 0; i--) {
        int32_t cpPos = t_boundary.elementAti(i);
        U_ASSERT(cpPos > prevCPPos);
        int32_t utextPos =  inputMap.isValid() ? inputMap->elementAti(cpPos) : cpPos + rangeStart;
@@ -1368,7 +1428,15 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
        if (utextPos > prevUTextPos) {
            // Boundaries are added to foundBreaks output in ascending order.
            U_ASSERT(foundBreaks.size() == 0 || foundBreaks.peeki() < utextPos);
-            foundBreaks.push(utextPos, status);
+            // In phrase breaking, there has to be a breakpoint between Cj character and close
+            // punctuation.
+            // E.g.［携帯電話］正しい選択 -> ［携帯▁電話］▁正しい▁選択 -> breakpoint between ］ and 正
+            if (utextPos != rangeStart
+                || (isPhraseBreaking && utextPos > 0
+                       && fClosePunctuationSet.contains(utext_char32At(inText, utextPos - 1)))) {
+                foundBreaks.push(utextPos, status);
+                correctedNumBreaks++;
+            }
        } else {
            // Normalization expanded the input text, the dictionary found a boundary
            // within the expansion, giving two boundaries with the same index in the
@@ -1380,9 +1448,52 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
    }
    (void)prevCPPos; // suppress compiler warnings about unused variable

+    UChar32 nextChar = utext_char32At(inText, rangeEnd);
+    if (!foundBreaks.isEmpty() && foundBreaks.peeki() == rangeEnd) {
+        // In phrase breaking, there has to be a breakpoint between Cj character and
+        // the number/open punctuation.
+        // E.g. る文字「そうだ、京都」->る▁文字▁「そうだ、▁京都」-> breakpoint between 字 and「
+        // E.g. 乗車率９０％程度だろうか -> 乗車▁率▁９０％▁程度だろうか -> breakpoint between 率 and ９
+        // E.g. しかもロゴがＵｎｉｃｏｄｅ！ -> しかも▁ロゴが▁Ｕｎｉｃｏｄｅ！-> breakpoint between が and Ｕ
+        if (isPhraseBreaking) {
+            if (!fDigitOrOpenPunctuationOrAlphabetSet.contains(nextChar)) {
+                foundBreaks.popi();
+                correctedNumBreaks--;
+            }
+        } else {
+            foundBreaks.popi();
+            correctedNumBreaks--;
+        }
+    }
+
    // inString goes out of scope
    // inputMap goes out of scope
-    return numBreaks;
+    return correctedNumBreaks;
+}
+
+void CjkBreakEngine::initJapanesePhraseParameter(UErrorCode& error) {
+    loadJapaneseExtensions(error);
+    loadHiragana(error);
+}
+
+void CjkBreakEngine::loadJapaneseExtensions(UErrorCode& error) {
+    const char* tag = "extensions";
+    ResourceBundle ja(U_ICUDATA_BRKITR, "ja", error);
+    if (U_SUCCESS(error)) {
+        ResourceBundle bundle = ja.get(tag, error);
+        while (U_SUCCESS(error) && bundle.hasNext()) {
+            fSkipSet.puti(bundle.getNextString(error), 1, error);
+        }
+    }
+}
+
+void CjkBreakEngine::loadHiragana(UErrorCode& error) {
+    UnicodeSet hiraganaWordSet(UnicodeString(u"[:Hiragana:]"), error);
+    hiraganaWordSet.compact();
+    UnicodeSetIterator iterator(hiraganaWordSet);
+    while (iterator.next()) {
+        fSkipSet.puti(UnicodeString(iterator.getCodepoint()), 1, error);
+    }
 }
 #endif

@@ -15,11 +15,14 @@
 #include "unicode/utext.h"

 #include "brkeng.h"
+#include "hash.h"
+#include "mlbe.h"
 #include "uvectr32.h"

 U_NAMESPACE_BEGIN

 class DictionaryMatcher;
+class MlBreakEngine;
 class Normalizer2;

 /*******************************************************************
@@ -59,26 +62,30 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
   * a particular kind of break.</p>
   *
   * @param c A character which begins a run that the engine might handle
-   * @return TRUE if this engine handles the particular character and break
+   * @param locale The locale.
+   * @return true if this engine handles the particular character and break
   * type.
   */
-  virtual UBool handles(UChar32 c) const;
+  virtual UBool handles(UChar32 c, const char* locale) const override;

  /**
   * <p>Find any breaks within a run in the supplied text.</p>
   *
   * @param text A UText representing the text. The iterator is left at
-   * the end of the run of characters which the engine is capable of handling 
+   * the end of the run of characters which the engine is capable of handling
   * that starts from the first character in the range.
   * @param startPos The start of the run within the supplied text.
   * @param endPos The end of the run within the supplied text.
   * @param foundBreaks vector of int32_t to receive the break positions
+   * @param status Information on any errors encountered.
   * @return The number of breaks found.
   */
  virtual int32_t findBreaks( UText *text,
                              int32_t startPos,
                              int32_t endPos,
-                              UVector32 &foundBreaks ) const;
+                              UVector32 &foundBreaks,
+                              UBool isPhraseBreaking,
+                              UErrorCode& status ) const override;

 protected:

@@ -96,12 +103,15 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
  * @param rangeStart The start of the range of dictionary characters
  * @param rangeEnd The end of the range of dictionary characters
  * @param foundBreaks Output of C array of int32_t break positions, or 0
+  * @param status Information on any errors encountered.
  * @return The number of breaks found
  */
  virtual int32_t divideUpDictionaryRange( UText *text,
                                           int32_t rangeStart,
                                           int32_t rangeEnd,
-                                           UVector32 &foundBreaks ) const = 0;
+                                           UVector32 &foundBreaks,
+                                           UBool isPhraseBreaking,
+                                           UErrorCode& status) const = 0;

 };

@@ -123,7 +133,6 @@ class ThaiBreakEngine : public DictionaryBreakEngine {
     * @internal
     */

-  UnicodeSet                fThaiWordSet;
  UnicodeSet                fEndWordSet;
  UnicodeSet                fBeginWordSet;
  UnicodeSet                fSuffixSet;
@@ -153,12 +162,15 @@ class ThaiBreakEngine : public DictionaryBreakEngine {
  * @param rangeStart The start of the range of dictionary characters
  * @param rangeEnd The end of the range of dictionary characters
  * @param foundBreaks Output of C array of int32_t break positions, or 0
+  * @param status Information on any errors encountered.
  * @return The number of breaks found
  */
  virtual int32_t divideUpDictionaryRange( UText *text,
                                           int32_t rangeStart,
                                           int32_t rangeEnd,
-                                           UVector32 &foundBreaks ) const;
+                                           UVector32 &foundBreaks,
+                                           UBool isPhraseBreaking,
+                                           UErrorCode& status) const override;

 };

@@ -180,7 +192,6 @@ class LaoBreakEngine : public DictionaryBreakEngine {
     * @internal
     */

-  UnicodeSet                fLaoWordSet;
  UnicodeSet                fEndWordSet;
  UnicodeSet                fBeginWordSet;
  UnicodeSet                fMarkSet;
@@ -209,127 +220,134 @@ class LaoBreakEngine : public DictionaryBreakEngine {
  * @param rangeStart The start of the range of dictionary characters
  * @param rangeEnd The end of the range of dictionary characters
  * @param foundBreaks Output of C array of int32_t break positions, or 0
+  * @param status Information on any errors encountered.
  * @return The number of breaks found
  */
  virtual int32_t divideUpDictionaryRange( UText *text,
                                           int32_t rangeStart,
                                           int32_t rangeEnd,
-                                           UVector32 &foundBreaks ) const;
+                                           UVector32 &foundBreaks,
+                                           UBool isPhraseBreaking,
+                                           UErrorCode& status) const override;
+
+};
+
+/*******************************************************************
+ * BurmeseBreakEngine
+ */
+
+/**
+ * <p>BurmeseBreakEngine is a kind of DictionaryBreakEngine that uses a
+ * DictionaryMatcher and heuristics to determine Burmese-specific breaks.</p>
+ *
+ * <p>After it is constructed a BurmeseBreakEngine may be shared between
+ * threads without synchronization.</p>
+ */
+class BurmeseBreakEngine : public DictionaryBreakEngine {
+ private:
+    /**
+     * The set of characters handled by this engine
+     * @internal
+     */
+
+  UnicodeSet                fEndWordSet;
+  UnicodeSet                fBeginWordSet;
+  UnicodeSet                fMarkSet;
+  DictionaryMatcher  *fDictionary;
+
+ public:
+
+  /**
+   * <p>Default constructor.</p>
+   *
+   * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
+   * engine is deleted.
+   */
+  BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
+
+  /**
+   * <p>Virtual destructor.</p>
+   */
+  virtual ~BurmeseBreakEngine();
+
+ protected:
+ /**
+  * <p>Divide up a range of known dictionary characters.</p>
+  *
+  * @param text A UText representing the text
+  * @param rangeStart The start of the range of dictionary characters
+  * @param rangeEnd The end of the range of dictionary characters
+  * @param foundBreaks Output of C array of int32_t break positions, or 0
+  * @param status Information on any errors encountered.
+  * @return The number of breaks found
+  */
+  virtual int32_t divideUpDictionaryRange( UText *text,
+                                           int32_t rangeStart,
+                                           int32_t rangeEnd,
+                                           UVector32 &foundBreaks,
+                                           UBool isPhraseBreaking,
+                                           UErrorCode& status) const override;
+
+};
+
+/*******************************************************************
+ * KhmerBreakEngine
+ */
+
+/**
+ * <p>KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a
+ * DictionaryMatcher and heuristics to determine Khmer-specific breaks.</p>
+ *
+ * <p>After it is constructed a KhmerBreakEngine may be shared between
+ * threads without synchronization.</p>
+ */
+class KhmerBreakEngine : public DictionaryBreakEngine {
+ private:
+    /**
+     * The set of characters handled by this engine
+     * @internal
+     */
+
+  UnicodeSet                fEndWordSet;
+  UnicodeSet                fBeginWordSet;
+  UnicodeSet                fMarkSet;
+  DictionaryMatcher  *fDictionary;
+
+ public:
+
+  /**
+   * <p>Default constructor.</p>
+   *
+   * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
+   * engine is deleted.
+   */
+  KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
+
+  /**
+   * <p>Virtual destructor.</p>
+   */
+  virtual ~KhmerBreakEngine();
+
+ protected:
+ /**
+  * <p>Divide up a range of known dictionary characters.</p>
+  *
+  * @param text A UText representing the text
+  * @param rangeStart The start of the range of dictionary characters
+  * @param rangeEnd The end of the range of dictionary characters
+  * @param foundBreaks Output of C array of int32_t break positions, or 0
+  * @param status Information on any errors encountered.
+  * @return The number of breaks found
+  */
+  virtual int32_t divideUpDictionaryRange( UText *text,
+                                           int32_t rangeStart,
+                                           int32_t rangeEnd,
+                                           UVector32 &foundBreaks,
+                                           UBool isPhraseBreaking,
+                                           UErrorCode& status) const override;

 };

-/******************************************************************* 
- * BurmeseBreakEngine 
- */ 
- 
-/** 
- * <p>BurmeseBreakEngine is a kind of DictionaryBreakEngine that uses a 
- * DictionaryMatcher and heuristics to determine Burmese-specific breaks.</p> 
- * 
- * <p>After it is constructed a BurmeseBreakEngine may be shared between 
- * threads without synchronization.</p> 
- */ 
-class BurmeseBreakEngine : public DictionaryBreakEngine { 
- private: 
-    /** 
-     * The set of characters handled by this engine 
-     * @internal 
-     */ 
- 
-  UnicodeSet                fBurmeseWordSet; 
-  UnicodeSet                fEndWordSet; 
-  UnicodeSet                fBeginWordSet; 
-  UnicodeSet                fMarkSet; 
-  DictionaryMatcher  *fDictionary; 
- 
- public: 
- 
-  /** 
-   * <p>Default constructor.</p> 
-   * 
-   * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the 
-   * engine is deleted. 
-   */ 
-  BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status); 
- 
-  /** 
-   * <p>Virtual destructor.</p> 
-   */ 
-  virtual ~BurmeseBreakEngine(); 
- 
- protected: 
- /** 
-  * <p>Divide up a range of known dictionary characters.</p> 
-  * 
-  * @param text A UText representing the text 
-  * @param rangeStart The start of the range of dictionary characters 
-  * @param rangeEnd The end of the range of dictionary characters 
-  * @param foundBreaks Output of C array of int32_t break positions, or 0 
-  * @return The number of breaks found 
-  */ 
-  virtual int32_t divideUpDictionaryRange( UText *text, 
-                                           int32_t rangeStart, 
-                                           int32_t rangeEnd, 
-                                           UVector32 &foundBreaks ) const; 
- 
-}; 
- 
-/******************************************************************* 
- * KhmerBreakEngine 
- */ 
- 
-/** 
- * <p>KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a 
- * DictionaryMatcher and heuristics to determine Khmer-specific breaks.</p> 
- * 
- * <p>After it is constructed a KhmerBreakEngine may be shared between 
- * threads without synchronization.</p> 
- */ 
-class KhmerBreakEngine : public DictionaryBreakEngine { 
- private: 
-    /** 
-     * The set of characters handled by this engine 
-     * @internal 
-     */ 
- 
-  UnicodeSet                fKhmerWordSet; 
-  UnicodeSet                fEndWordSet; 
-  UnicodeSet                fBeginWordSet; 
-  UnicodeSet                fMarkSet; 
-  DictionaryMatcher  *fDictionary; 
- 
- public: 
- 
-  /** 
-   * <p>Default constructor.</p> 
-   * 
-   * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the 
-   * engine is deleted. 
-   */ 
-  KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status); 
- 
-  /** 
-   * <p>Virtual destructor.</p> 
-   */ 
-  virtual ~KhmerBreakEngine(); 
- 
- protected: 
- /** 
-  * <p>Divide up a range of known dictionary characters.</p> 
-  * 
-  * @param text A UText representing the text 
-  * @param rangeStart The start of the range of dictionary characters 
-  * @param rangeEnd The end of the range of dictionary characters 
-  * @param foundBreaks Output of C array of int32_t break positions, or 0 
-  * @return The number of breaks found 
-  */ 
-  virtual int32_t divideUpDictionaryRange( UText *text, 
-                                           int32_t rangeStart, 
-                                           int32_t rangeEnd, 
-                                           UVector32 &foundBreaks ) const; 
- 
-}; 
- 
 #if !UCONFIG_NO_NORMALIZATION

 /*******************************************************************
@@ -354,12 +372,23 @@ class CjkBreakEngine : public DictionaryBreakEngine {
     * @internal
     */
  UnicodeSet                fHangulWordSet;
-  UnicodeSet                fHanWordSet;
-  UnicodeSet                fKatakanaWordSet;
-  UnicodeSet                fHiraganaWordSet;
+  UnicodeSet                fDigitOrOpenPunctuationOrAlphabetSet;
+  UnicodeSet                fClosePunctuationSet;

  DictionaryMatcher        *fDictionary;
  const Normalizer2        *nfkcNorm2;
+  MlBreakEngine            *fMlBreakEngine;
+  bool                      isCj;
+
+ private:
+  // Load Japanese extensions.
+  void loadJapaneseExtensions(UErrorCode& error);
+  // Load Japanese Hiragana.
+  void loadHiragana(UErrorCode& error);
+  // Initialize fSkipSet by loading Japanese Hiragana and extensions.
+  void initJapanesePhraseParameter(UErrorCode& error);
+
+  Hashtable fSkipSet;

 public:

@@ -385,12 +414,15 @@ class CjkBreakEngine : public DictionaryBreakEngine {
     * @param rangeStart The start of the range of dictionary characters
     * @param rangeEnd The end of the range of dictionary characters
     * @param foundBreaks Output of C array of int32_t break positions, or 0
+     * @param status Information on any errors encountered.
     * @return The number of breaks found
     */
  virtual int32_t divideUpDictionaryRange( UText *text,
          int32_t rangeStart,
          int32_t rangeEnd,
-          UVector32 &foundBreaks ) const;
+          UVector32 &foundBreaks,
+          UBool isPhraseBreaking,
+          UErrorCode& status) const override;

 };

@@ -47,23 +47,23 @@ int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t
                            int32_t *prefix) const {

    UCharsTrie uct(characters);
-    int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);
+    int32_t startingTextIndex = static_cast<int32_t>(utext_getNativeIndex(text));
    int32_t wordCount = 0;
    int32_t codePointsMatched = 0;

    for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) {
        UStringTrieResult result = (codePointsMatched == 0) ? uct.first(c) : uct.next(c);
-        int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;
+        int32_t lengthMatched = static_cast<int32_t>(utext_getNativeIndex(text)) - startingTextIndex;
        codePointsMatched += 1;
        if (USTRINGTRIE_HAS_VALUE(result)) {
            if (wordCount < limit) {
-                if (values != NULL) {
+                if (values != nullptr) {
                    values[wordCount] = uct.getValue();
                }
-                if (lengths != NULL) {
+                if (lengths != nullptr) {
                    lengths[wordCount] = lengthMatched;
                }
-                if (cpLengths != NULL) {
+                if (cpLengths != nullptr) {
                    cpLengths[wordCount] = codePointsMatched;
                }
                ++wordCount;
@@ -80,7 +80,7 @@ int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t
        }
    }

-    if (prefix != NULL) {
+    if (prefix != nullptr) {
        *prefix = codePointsMatched;
    }
    return wordCount;
@@ -101,7 +101,7 @@ UChar32 BytesDictionaryMatcher::transform(UChar32 c) const {
        if (delta < 0 || 0xFD < delta) {
            return U_SENTINEL;
        }
-        return (UChar32)delta;
+        return static_cast<UChar32>(delta);
    }
    return c;
 }
@@ -114,23 +114,23 @@ int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t
                            int32_t *lengths, int32_t *cpLengths, int32_t *values,
                            int32_t *prefix) const {
    BytesTrie bt(characters);
-    int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);
+    int32_t startingTextIndex = static_cast<int32_t>(utext_getNativeIndex(text));
    int32_t wordCount = 0;
    int32_t codePointsMatched = 0;

    for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) {
        UStringTrieResult result = (codePointsMatched == 0) ? bt.first(transform(c)) : bt.next(transform(c));
-        int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;
+        int32_t lengthMatched = static_cast<int32_t>(utext_getNativeIndex(text)) - startingTextIndex;
        codePointsMatched += 1;
        if (USTRINGTRIE_HAS_VALUE(result)) {
            if (wordCount < limit) {
-                if (values != NULL) {
+                if (values != nullptr) {
                    values[wordCount] = bt.getValue();
                }
-                if (lengths != NULL) {
+                if (lengths != nullptr) {
                    lengths[wordCount] = lengthMatched;
                }
-                if (cpLengths != NULL) {
+                if (cpLengths != nullptr) {
                    cpLengths[wordCount] = codePointsMatched;
                }
                ++wordCount;
@@ -147,7 +147,7 @@ int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t
        }
    }

-    if (prefix != NULL) {
+    if (prefix != nullptr) {
        *prefix = codePointsMatched;
    }
    return wordCount;
@@ -170,7 +170,7 @@ udict_swap(const UDataSwapper *ds, const void *inData, int32_t length,
    int32_t i, offset, size;

    headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
-    if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) return 0;
+    if (pErrorCode == nullptr || U_FAILURE(*pErrorCode)) return 0;
    pInfo = (const UDataInfo *)((const char *)inData + 4);
    if (!(pInfo->dataFormat[0] == 0x44 && 
          pInfo->dataFormat[1] == 0x69 && 
@@ -184,7 +184,7 @@ udict_swap(const UDataSwapper *ds, const void *inData, int32_t length,
    }

    inBytes = (const uint8_t *)inData + headerSize;
-    outBytes = (uint8_t *)outData + headerSize;
+    outBytes = (outData == nullptr) ? nullptr : (uint8_t *)outData + headerSize;

    inIndexes = (const int32_t *)inBytes;
    if (length >= 0) {
@@ -68,7 +68,7 @@ public:
 */
 class U_COMMON_API DictionaryMatcher : public UMemory {
 public:
-    DictionaryMatcher() {};
+    DictionaryMatcher() {}
    virtual ~DictionaryMatcher();
    // this should emulate CompactTrieDictionary::matches()
    /*  @param text      The text in which to look for matching words. Matching begins
@@ -79,15 +79,15 @@ public:
     *                   matching words to be found.
     *  @param lengths   output array, filled with the lengths of the matches, in order,
     *                   from shortest to longest. Lengths are in native indexing units
-     *                   of the UText. May be NULL.
+     *                   of the UText. May be nullptr.
     *  @param cpLengths output array, filled with the lengths of the matches, in order,
     *                   from shortest to longest. Lengths are the number of Unicode code points.
-     *                   May be NULL.
+     *                   May be nullptr.
     *  @param values    Output array, filled with the values associated with the words found.
-     *                   May be NULL.
+     *                   May be nullptr.
     *  @param prefix    Output parameter, the code point length of the prefix match, even if that
     *                   prefix didn't lead to a complete word. Will always be >= the cpLength
-     *                   of the longest complete word matched. May be NULL.
+     *                   of the longest complete word matched. May be nullptr.
     *  @return          Number of matching words found.
     */
    virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
@@ -103,14 +103,14 @@ class U_COMMON_API UCharsDictionaryMatcher : public DictionaryMatcher {
 public:
    // constructs a new UCharsDictionaryMatcher.
    // The UDataMemory * will be closed on this object's destruction.
-    UCharsDictionaryMatcher(const UChar *c, UDataMemory *f) : characters(c), file(f) { }
+    UCharsDictionaryMatcher(const char16_t *c, UDataMemory *f) : characters(c), file(f) { }
    virtual ~UCharsDictionaryMatcher();
    virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
                            int32_t *lengths, int32_t *cpLengths, int32_t *values,
-                            int32_t *prefix) const;
-    virtual int32_t getType() const;
+                            int32_t *prefix) const override;
+    virtual int32_t getType() const override;
 private:
-    const UChar *characters;
+    const char16_t *characters;
    UDataMemory *file;
 };

@@ -125,8 +125,8 @@ public:
    virtual ~BytesDictionaryMatcher();
    virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
                            int32_t *lengths, int32_t *cpLengths, int32_t *values,
-                            int32_t *prefix) const;
-    virtual int32_t getType() const;
+                            int32_t *prefix) const override;
+    virtual int32_t getType() const override;
 private:
    UChar32 transform(UChar32 c) const;

@@ -159,7 +159,7 @@ udict_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *out
 * Constants are defined in the DictionaryData class.
 *
 * For the data structure of BytesTrie & UCharsTrie see
- * http://site.icu-project.org/design/struct/tries
+ * https://icu.unicode.org/design/struct/tries
 * and the bytestrie.h and ucharstrie.h header files.
 *
 * int32_t indexes[indexesLength]; -- indexesLength=indexes[IX_STRING_TRIE_OFFSET]/4;
@@ -53,7 +53,7 @@ DateInterval::clone() const {
 }


-UBool 
+bool
 DateInterval::operator==(const DateInterval& other) const { 
    return ( fromDate == other.fromDate && toDate == other.toDate );
 }
@@ -35,7 +35,7 @@ const int32_t LENGTH_IN_2TRAIL = 62;

 }  // namespace

-void Edits::releaseArray() U_NOEXCEPT {
+void Edits::releaseArray() noexcept {
    if (array != stackArray) {
        uprv_free(array);
    }
@@ -47,7 +47,7 @@ Edits &Edits::copyArray(const Edits &other) {
        return *this;
    }
    if (length > capacity) {
-        uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)length * 2);
+        uint16_t* newArray = static_cast<uint16_t*>(uprv_malloc(static_cast<size_t>(length) * 2));
        if (newArray == nullptr) {
            length = delta = numChanges = 0;
            errorCode_ = U_MEMORY_ALLOCATION_ERROR;
@@ -63,7 +63,7 @@ Edits &Edits::copyArray(const Edits &other) {
    return *this;
 }

-Edits &Edits::moveArray(Edits &src) U_NOEXCEPT {
+Edits &Edits::moveArray(Edits &src) noexcept {
    if (U_FAILURE(errorCode_)) {
        length = delta = numChanges = 0;
        return *this;
@@ -86,6 +86,7 @@ Edits &Edits::moveArray(Edits &src) U_NOEXCEPT {
 }

 Edits &Edits::operator=(const Edits &other) {
+    if (this == &other) { return *this; }  // self-assignment: no-op
    length = other.length;
    delta = other.delta;
    numChanges = other.numChanges;
@@ -93,7 +94,7 @@ Edits &Edits::operator=(const Edits &other) {
    return copyArray(other);
 }

-Edits &Edits::operator=(Edits &&src) U_NOEXCEPT {
+Edits &Edits::operator=(Edits &&src) noexcept {
    length = src.length;
    delta = src.delta;
    numChanges = src.numChanges;
@@ -105,7 +106,7 @@ Edits::~Edits() {
    releaseArray();
 }

-void Edits::reset() U_NOEXCEPT {
+void Edits::reset() noexcept {
    length = delta = numChanges = 0;
    errorCode_ = U_ZERO_ERROR;
 }
@@ -185,30 +186,30 @@ void Edits::addReplace(int32_t oldLength, int32_t newLength) {
            head |= oldLength << 6;
        } else if(oldLength <= 0x7fff) {
            head |= LENGTH_IN_1TRAIL << 6;
-            array[limit++] = (uint16_t)(0x8000 | oldLength);
+            array[limit++] = static_cast<uint16_t>(0x8000 | oldLength);
        } else {
            head |= (LENGTH_IN_2TRAIL + (oldLength >> 30)) << 6;
-            array[limit++] = (uint16_t)(0x8000 | (oldLength >> 15));
-            array[limit++] = (uint16_t)(0x8000 | oldLength);
+            array[limit++] = static_cast<uint16_t>(0x8000 | (oldLength >> 15));
+            array[limit++] = static_cast<uint16_t>(0x8000 | oldLength);
        }
        if(newLength < LENGTH_IN_1TRAIL) {
            head |= newLength;
        } else if(newLength <= 0x7fff) {
            head |= LENGTH_IN_1TRAIL;
-            array[limit++] = (uint16_t)(0x8000 | newLength);
+            array[limit++] = static_cast<uint16_t>(0x8000 | newLength);
        } else {
            head |= LENGTH_IN_2TRAIL + (newLength >> 30);
-            array[limit++] = (uint16_t)(0x8000 | (newLength >> 15));
-            array[limit++] = (uint16_t)(0x8000 | newLength);
+            array[limit++] = static_cast<uint16_t>(0x8000 | (newLength >> 15));
+            array[limit++] = static_cast<uint16_t>(0x8000 | newLength);
        }
-        array[length] = (uint16_t)head;
+        array[length] = static_cast<uint16_t>(head);
        length = limit;
    }
 }

 void Edits::append(int32_t r) {
    if(length < capacity || growArray()) {
-        array[length++] = (uint16_t)r;
+        array[length++] = static_cast<uint16_t>(r);
    }
 }

@@ -220,7 +221,7 @@ UBool Edits::growArray() {
        // Not U_BUFFER_OVERFLOW_ERROR because that could be confused on a string transform API
        // with a result-string-buffer overflow.
        errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
-        return FALSE;
+        return false;
    } else if (capacity >= (INT32_MAX / 2)) {
        newCapacity = INT32_MAX;
    } else {
@@ -229,25 +230,25 @@ UBool Edits::growArray() {
    // Grow by at least 5 units so that a maximal change record will fit.
    if ((newCapacity - capacity) < 5) {
        errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
-        return FALSE;
+        return false;
    }
-    uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2);
-    if (newArray == NULL) {
+    uint16_t* newArray = static_cast<uint16_t*>(uprv_malloc(static_cast<size_t>(newCapacity) * 2));
+    if (newArray == nullptr) {
        errorCode_ = U_MEMORY_ALLOCATION_ERROR;
-        return FALSE;
+        return false;
    }
    uprv_memcpy(newArray, array, (size_t)length * 2);
    releaseArray();
    array = newArray;
    capacity = newCapacity;
-    return TRUE;
+    return true;
 }

-UBool Edits::copyErrorTo(UErrorCode &outErrorCode) {
-    if (U_FAILURE(outErrorCode)) { return TRUE; }
-    if (U_SUCCESS(errorCode_)) { return FALSE; }
+UBool Edits::copyErrorTo(UErrorCode &outErrorCode) const {
+    if (U_FAILURE(outErrorCode)) { return true; }
+    if (U_SUCCESS(errorCode_)) { return false; }
    outErrorCode = errorCode_;
-    return TRUE;
+    return true;
 }

 Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode) {
@@ -256,7 +257,7 @@ Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &error
    // Parallel iteration over both Edits.
    Iterator abIter = ab.getFineIterator();
    Iterator bcIter = bc.getFineIterator();
-    UBool abHasNext = TRUE, bcHasNext = TRUE;
+    UBool abHasNext = true, bcHasNext = true;
    // Copy iterator state into local variables, so that we can modify and subdivide spans.
    // ab old & new length, bc old & new length
    int32_t aLength = 0, ab_bLength = 0, bc_bLength = 0, cLength = 0;
@@ -399,7 +400,7 @@ Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &error
 Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
        array(a), index(0), length(len), remaining(0),
        onlyChanges_(oc), coarse(crs),
-        dir(0), changed(FALSE), oldLength_(0), newLength_(0),
+        dir(0), changed(false), oldLength_(0), newLength_(0),
        srcIndex(0), replIndex(0), destIndex(0) {}

 int32_t Edits::Iterator::readLength(int32_t head) {
@@ -414,7 +415,7 @@ int32_t Edits::Iterator::readLength(int32_t head) {
        U_ASSERT(array[index] >= 0x8000);
        U_ASSERT(array[index + 1] >= 0x8000);
        int32_t len = ((head & 1) << 30) |
-                ((int32_t)(array[index] & 0x7fff) << 15) |
+                (static_cast<int32_t>(array[index] & 0x7fff) << 15) |
                (array[index + 1] & 0x7fff);
        index += 2;
        return len;
@@ -440,16 +441,16 @@ void Edits::Iterator::updatePreviousIndexes() {
 UBool Edits::Iterator::noNext() {
    // No change before or beyond the string.
    dir = 0;
-    changed = FALSE;
+    changed = false;
    oldLength_ = newLength_ = 0;
-    return FALSE;
+    return false;
 }

 UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
    // Forward iteration: Update the string indexes to the limit of the current span,
    // and post-increment-read array units to assemble a new span.
    // Leaves the array index one after the last unit of that span.
-    if (U_FAILURE(errorCode)) { return FALSE; }
+    if (U_FAILURE(errorCode)) { return false; }
    // We have an errorCode in case we need to start guarding against integer overflows.
    // It is also convenient for caller loops if we bail out when an error was set elsewhere.
    if (dir > 0) {
@@ -463,7 +464,7 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
                // Stay on the current one of a sequence of compressed changes.
                ++index;  // next() rests on the index after the sequence unit.
                dir = 1;
-                return TRUE;
+                return true;
            }
        }
        dir = 1;
@@ -472,7 +473,7 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
        // Fine-grained iterator: Continue a sequence of compressed changes.
        if (remaining > 1) {
            --remaining;
-            return TRUE;
+            return true;
        }
        remaining = 0;
    }
@@ -482,7 +483,7 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
    int32_t u = array[index++];
    if (u <= MAX_UNCHANGED) {
        // Combine adjacent unchanged ranges.
-        changed = FALSE;
+        changed = false;
        oldLength_ = u + 1;
        while (index < length && (u = array[index]) <= MAX_UNCHANGED) {
            ++index;
@@ -497,10 +498,10 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
            // already fetched u > MAX_UNCHANGED at index
            ++index;
        } else {
-            return TRUE;
+            return true;
        }
    }
-    changed = TRUE;
+    changed = true;
    if (u <= MAX_SHORT_CHANGE) {
        int32_t oldLen = u >> 12;
        int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
@@ -515,14 +516,14 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
            if (num > 1) {
                remaining = num;  // This is the first of two or more changes.
            }
-            return TRUE;
+            return true;
        }
    } else {
        U_ASSERT(u <= 0x7fff);
        oldLength_ = readLength((u >> 6) & 0x3f);
        newLength_ = readLength(u & 0x3f);
        if (!coarse) {
-            return TRUE;
+            return true;
        }
    }
    // Combine adjacent changes.
@@ -538,14 +539,14 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
            newLength_ += readLength(u & 0x3f);
        }
    }
-    return TRUE;
+    return true;
 }

 UBool Edits::Iterator::previous(UErrorCode &errorCode) {
    // Backward iteration: Pre-decrement-read array units to assemble a new span,
    // then update the string indexes to the start of that span.
    // Leaves the array index on the head unit of that span.
-    if (U_FAILURE(errorCode)) { return FALSE; }
+    if (U_FAILURE(errorCode)) { return false; }
    // We have an errorCode in case we need to start guarding against integer overflows.
    // It is also convenient for caller loops if we bail out when an error was set elsewhere.
    if (dir >= 0) {
@@ -558,7 +559,7 @@ UBool Edits::Iterator::previous(UErrorCode &errorCode) {
                // Stay on the current one of a sequence of compressed changes.
                --index;  // previous() rests on the sequence unit.
                dir = -1;
-                return TRUE;
+                return true;
            }
            updateNextIndexes();
        }
@@ -571,7 +572,7 @@ UBool Edits::Iterator::previous(UErrorCode &errorCode) {
        if (remaining <= (u & SHORT_CHANGE_NUM_MASK)) {
            ++remaining;
            updatePreviousIndexes();
-            return TRUE;
+            return true;
        }
        remaining = 0;
    }
@@ -581,7 +582,7 @@ UBool Edits::Iterator::previous(UErrorCode &errorCode) {
    int32_t u = array[--index];
    if (u <= MAX_UNCHANGED) {
        // Combine adjacent unchanged ranges.
-        changed = FALSE;
+        changed = false;
        oldLength_ = u + 1;
        while (index > 0 && (u = array[index - 1]) <= MAX_UNCHANGED) {
            --index;
@@ -590,9 +591,9 @@ UBool Edits::Iterator::previous(UErrorCode &errorCode) {
        newLength_ = oldLength_;
        // No need to handle onlyChanges as long as previous() is called only from findIndex().
        updatePreviousIndexes();
-        return TRUE;
+        return true;
    }
-    changed = TRUE;
+    changed = true;
    if (u <= MAX_SHORT_CHANGE) {
        int32_t oldLen = u >> 12;
        int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
@@ -608,7 +609,7 @@ UBool Edits::Iterator::previous(UErrorCode &errorCode) {
                remaining = 1;  // This is the last of two or more changes.
            }
            updatePreviousIndexes();
-            return TRUE;
+            return true;
        }
    } else {
        if (u <= 0x7fff) {
@@ -628,7 +629,7 @@ UBool Edits::Iterator::previous(UErrorCode &errorCode) {
        }
        if (!coarse) {
            updatePreviousIndexes();
-            return TRUE;
+            return true;
        }
    }
    // Combine adjacent changes.
@@ -647,7 +648,7 @@ UBool Edits::Iterator::previous(UErrorCode &errorCode) {
        }
    }
    updatePreviousIndexes();
-    return TRUE;
+    return true;
 }

 int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &errorCode) {
@@ -704,7 +705,7 @@ int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &erro
        // The index is in the current span.
        return 0;
    }
-    while (next(FALSE, errorCode)) {
+    while (next(false, errorCode)) {
        if (findSource) {
            spanStart = srcIndex;
            spanLength = oldLength_;
@@ -738,7 +739,7 @@ int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &erro
 }

 int32_t Edits::Iterator::destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode) {
-    int32_t where = findIndex(i, TRUE, errorCode);
+    int32_t where = findIndex(i, true, errorCode);
    if (where < 0) {
        // Error or before the string.
        return 0;
@@ -757,7 +758,7 @@ int32_t Edits::Iterator::destinationIndexFromSourceIndex(int32_t i, UErrorCode &
 }

 int32_t Edits::Iterator::sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode) {
-    int32_t where = findIndex(i, FALSE, errorCode);
+    int32_t where = findIndex(i, false, errorCode);
    if (where < 0) {
        // Error or before the string.
        return 0;
@@ -0,0 +1,220 @@
+// © 2021 and later: Unicode, Inc. and others.
+// License & terms of use: https://www.unicode.org/copyright.html
+
+// emojiprops.cpp
+// created: 2021sep04 Markus W. Scherer
+
+#include "unicode/utypes.h"
+#include "unicode/uchar.h"
+#include "unicode/ucharstrie.h"
+#include "unicode/ucptrie.h"
+#include "unicode/udata.h"
+#include "unicode/ustringtrie.h"
+#include "unicode/utf16.h"
+#include "emojiprops.h"
+#include "ucln.h"
+#include "ucln_cmn.h"
+#include "umutex.h"
+#include "uset_imp.h"
+
+U_NAMESPACE_BEGIN
+
+namespace {
+
+EmojiProps *singleton = nullptr;
+icu::UInitOnce emojiInitOnce {};
+
+UBool U_CALLCONV emojiprops_cleanup() {
+    delete singleton;
+    singleton = nullptr;
+    emojiInitOnce.reset();
+    return true;
+}
+
+void U_CALLCONV initSingleton(UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return; }
+    singleton = new EmojiProps(errorCode);
+    if (singleton == nullptr) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+    } else if (U_FAILURE(errorCode)) {
+        delete singleton;
+        singleton = nullptr;
+    }
+    ucln_common_registerCleanup(UCLN_COMMON_EMOJIPROPS, emojiprops_cleanup);
+}
+
+// TODO: turn this into a shared helper function
+// Requires the major version to match, and then requires at least the minor version.
+UBool udata_isAcceptableMajorMinor(
+        const UDataInfo &info, const char16_t *dataFormat, uint8_t major, uint8_t minor) {
+    return
+        info.size >= 20 &&
+        info.isBigEndian == U_IS_BIG_ENDIAN &&
+        info.charsetFamily == U_CHARSET_FAMILY &&
+        info.dataFormat[0] == dataFormat[0] &&
+        info.dataFormat[1] == dataFormat[1] &&
+        info.dataFormat[2] == dataFormat[2] &&
+        info.dataFormat[3] == dataFormat[3] &&
+        info.formatVersion[0] == major &&
+        info.formatVersion[1] >= minor;
+}
+
+}  // namespace
+
+EmojiProps::~EmojiProps() {
+    udata_close(memory);
+    ucptrie_close(cpTrie);
+}
+
+const EmojiProps *
+EmojiProps::getSingleton(UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return nullptr; }
+    umtx_initOnce(emojiInitOnce, &initSingleton, errorCode);
+    return singleton;
+}
+
+UBool U_CALLCONV
+EmojiProps::isAcceptable(void * /*context*/, const char * /*type*/, const char * /*name*/,
+                         const UDataInfo *pInfo) {
+    return udata_isAcceptableMajorMinor(*pInfo, u"Emoj", 1, 0);
+}
+
+void
+EmojiProps::load(UErrorCode &errorCode) {
+    memory = udata_openChoice(nullptr, "icu", "uemoji", isAcceptable, this, &errorCode);
+    if (U_FAILURE(errorCode)) { return; }
+    const uint8_t* inBytes = static_cast<const uint8_t*>(udata_getMemory(memory));
+    const int32_t* inIndexes = reinterpret_cast<const int32_t*>(inBytes);
+    int32_t indexesLength = inIndexes[IX_CPTRIE_OFFSET] / 4;
+    if (indexesLength <= IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET) {
+        errorCode = U_INVALID_FORMAT_ERROR;  // Not enough indexes.
+        return;
+    }
+
+    int32_t i = IX_CPTRIE_OFFSET;
+    int32_t offset = inIndexes[i++];
+    int32_t nextOffset = inIndexes[i];
+    cpTrie = ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_8,
+                                    inBytes + offset, nextOffset - offset, nullptr, &errorCode);
+    if (U_FAILURE(errorCode)) {
+        return;
+    }
+
+    for (i = IX_BASIC_EMOJI_TRIE_OFFSET; i <= IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET; ++i) {
+        offset = inIndexes[i];
+        nextOffset = inIndexes[i + 1];
+        // Set/leave nullptr if there is no UCharsTrie.
+        const char16_t* p = nextOffset > offset ? reinterpret_cast<const char16_t*>(inBytes + offset) : nullptr;
+        stringTries[getStringTrieIndex(i)] = p;
+    }
+}
+
+void
+EmojiProps::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode*/) const {
+    // Add the start code point of each same-value range of the trie.
+    UChar32 start = 0, end;
+    uint32_t value;
+    while ((end = ucptrie_getRange(cpTrie, start, UCPMAP_RANGE_NORMAL, 0,
+                                   nullptr, nullptr, &value)) >= 0) {
+        sa->add(sa->set, start);
+        start = end + 1;
+    }
+}
+
+UBool
+EmojiProps::hasBinaryProperty(UChar32 c, UProperty which) {
+    UErrorCode errorCode = U_ZERO_ERROR;
+    const EmojiProps *ep = getSingleton(errorCode);
+    return U_SUCCESS(errorCode) && ep->hasBinaryPropertyImpl(c, which);
+}
+
+UBool
+EmojiProps::hasBinaryPropertyImpl(UChar32 c, UProperty which) const {
+    if (which < UCHAR_EMOJI || UCHAR_RGI_EMOJI < which) {
+        return false;
+    }
+    // Note: UCHAR_REGIONAL_INDICATOR is a single, hardcoded range implemented elsewhere.
+    static constexpr int8_t bitFlags[] = {
+        BIT_EMOJI,                  // UCHAR_EMOJI=57
+        BIT_EMOJI_PRESENTATION,     // UCHAR_EMOJI_PRESENTATION=58
+        BIT_EMOJI_MODIFIER,         // UCHAR_EMOJI_MODIFIER=59
+        BIT_EMOJI_MODIFIER_BASE,    // UCHAR_EMOJI_MODIFIER_BASE=60
+        BIT_EMOJI_COMPONENT,        // UCHAR_EMOJI_COMPONENT=61
+        -1,                         // UCHAR_REGIONAL_INDICATOR=62
+        -1,                         // UCHAR_PREPENDED_CONCATENATION_MARK=63
+        BIT_EXTENDED_PICTOGRAPHIC,  // UCHAR_EXTENDED_PICTOGRAPHIC=64
+        BIT_BASIC_EMOJI,            // UCHAR_BASIC_EMOJI=65
+        -1,                         // UCHAR_EMOJI_KEYCAP_SEQUENCE=66
+        -1,                         // UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE=67
+        -1,                         // UCHAR_RGI_EMOJI_FLAG_SEQUENCE=68
+        -1,                         // UCHAR_RGI_EMOJI_TAG_SEQUENCE=69
+        -1,                         // UCHAR_RGI_EMOJI_ZWJ_SEQUENCE=70
+        BIT_BASIC_EMOJI,            // UCHAR_RGI_EMOJI=71
+    };
+    int32_t bit = bitFlags[which - UCHAR_EMOJI];
+    if (bit < 0) {
+        return false;  // not a property that we support in this function
+    }
+    uint8_t bits = UCPTRIE_FAST_GET(cpTrie, UCPTRIE_8, c);
+    return (bits >> bit) & 1;
+}
+
+UBool
+EmojiProps::hasBinaryProperty(const char16_t *s, int32_t length, UProperty which) {
+    UErrorCode errorCode = U_ZERO_ERROR;
+    const EmojiProps *ep = getSingleton(errorCode);
+    return U_SUCCESS(errorCode) && ep->hasBinaryPropertyImpl(s, length, which);
+}
+
+UBool
+EmojiProps::hasBinaryPropertyImpl(const char16_t *s, int32_t length, UProperty which) const {
+    if (s == nullptr && length != 0) { return false; }
+    if (length <= 0 && (length == 0 || *s == 0)) { return false; }  // empty string
+    // The caller should have delegated single code points to hasBinaryProperty(c, which).
+    if (which < UCHAR_BASIC_EMOJI || UCHAR_RGI_EMOJI < which) {
+        return false;
+    }
+    UProperty firstProp = which, lastProp = which;
+    if (which == UCHAR_RGI_EMOJI) {
+        // RGI_Emoji is the union of the other emoji properties of strings.
+        firstProp = UCHAR_BASIC_EMOJI;
+        lastProp = UCHAR_RGI_EMOJI_ZWJ_SEQUENCE;
+    }
+    for (int32_t prop = firstProp; prop <= lastProp; ++prop) {
+        const char16_t *trieUChars = stringTries[prop - UCHAR_BASIC_EMOJI];
+        if (trieUChars != nullptr) {
+            UCharsTrie trie(trieUChars);
+            UStringTrieResult result = trie.next(s, length);
+            if (USTRINGTRIE_HAS_VALUE(result)) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+void
+EmojiProps::addStrings(const USetAdder *sa, UProperty which, UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return; }
+    if (which < UCHAR_BASIC_EMOJI || UCHAR_RGI_EMOJI < which) {
+        return;
+    }
+    UProperty firstProp = which, lastProp = which;
+    if (which == UCHAR_RGI_EMOJI) {
+        // RGI_Emoji is the union of the other emoji properties of strings.
+        firstProp = UCHAR_BASIC_EMOJI;
+        lastProp = UCHAR_RGI_EMOJI_ZWJ_SEQUENCE;
+    }
+    for (int32_t prop = firstProp; prop <= lastProp; ++prop) {
+        const char16_t *trieUChars = stringTries[prop - UCHAR_BASIC_EMOJI];
+        if (trieUChars != nullptr) {
+            UCharsTrie::Iterator iter(trieUChars, 0, errorCode);
+            while (iter.next(errorCode)) {
+                const UnicodeString &s = iter.getString();
+                sa->addString(sa->set, s.getBuffer(), s.length());
+            }
+        }
+    }
+}
+
+U_NAMESPACE_END
@@ -0,0 +1,90 @@
+// © 2021 and later: Unicode, Inc. and others.
+// License & terms of use: https://www.unicode.org/copyright.html
+
+// emojiprops.h
+// created: 2021sep03 Markus W. Scherer
+
+#ifndef __EMOJIPROPS_H__
+#define __EMOJIPROPS_H__
+
+#include "unicode/utypes.h"
+#include "unicode/ucptrie.h"
+#include "unicode/udata.h"
+#include "unicode/uobject.h"
+#include "uset_imp.h"
+
+U_NAMESPACE_BEGIN
+
+class EmojiProps : public UMemory {
+public:
+    // @internal
+    EmojiProps(UErrorCode &errorCode) { load(errorCode); }
+    ~EmojiProps();
+
+    static const EmojiProps *getSingleton(UErrorCode &errorCode);
+    static UBool hasBinaryProperty(UChar32 c, UProperty which);
+    static UBool hasBinaryProperty(const char16_t *s, int32_t length, UProperty which);
+
+    void addPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const;
+    void addStrings(const USetAdder *sa, UProperty which, UErrorCode &errorCode) const;
+
+    enum {
+        // Byte offsets from the start of the data, after the generic header,
+        // in ascending order.
+        // UCPTrie=CodePointTrie, follows the indexes
+        IX_CPTRIE_OFFSET,
+        IX_RESERVED1,
+        IX_RESERVED2,
+        IX_RESERVED3,
+
+        // UCharsTrie=CharsTrie
+        IX_BASIC_EMOJI_TRIE_OFFSET,
+        IX_EMOJI_KEYCAP_SEQUENCE_TRIE_OFFSET,
+        IX_RGI_EMOJI_MODIFIER_SEQUENCE_TRIE_OFFSET,
+        IX_RGI_EMOJI_FLAG_SEQUENCE_TRIE_OFFSET,
+        IX_RGI_EMOJI_TAG_SEQUENCE_TRIE_OFFSET,
+        IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET,
+        IX_RESERVED10,
+        IX_RESERVED11,
+        IX_RESERVED12,
+        IX_TOTAL_SIZE,
+
+        // Not initially byte offsets.
+        IX_RESERVED14,
+        IX_RESERVED15,
+        IX_COUNT  // 16
+    };
+
+    // Properties in the code point trie.
+    enum {
+        // https://www.unicode.org/reports/tr51/#Emoji_Properties
+        BIT_EMOJI,
+        BIT_EMOJI_PRESENTATION,
+        BIT_EMOJI_MODIFIER,
+        BIT_EMOJI_MODIFIER_BASE,
+        BIT_EMOJI_COMPONENT,
+        BIT_EXTENDED_PICTOGRAPHIC,
+        // https://www.unicode.org/reports/tr51/#Emoji_Sets
+        BIT_BASIC_EMOJI
+    };
+
+private:
+    static UBool U_CALLCONV
+    isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
+    /** Input i: One of the IX_..._TRIE_OFFSET indexes into the data file indexes[] array. */
+    static int32_t getStringTrieIndex(int32_t i) {
+        return i - IX_BASIC_EMOJI_TRIE_OFFSET;
+    }
+
+    void load(UErrorCode &errorCode);
+    UBool hasBinaryPropertyImpl(UChar32 c, UProperty which) const;
+    UBool hasBinaryPropertyImpl(const char16_t *s, int32_t length, UProperty which) const;
+
+    UDataMemory *memory = nullptr;
+    UCPTrie *cpTrie = nullptr;
+    const char16_t *stringTries[6] = { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr };
+};
+
+U_NAMESPACE_END
+
+#endif  // __EMOJIPROPS_H__
@@ -20,6 +20,7 @@
 #include "ubrkimpl.h" // U_ICUDATA_BRKITR
 #include "uvector.h"
 #include "cmemory.h"
+#include "umutex.h"

 U_NAMESPACE_BEGIN

@@ -34,7 +35,7 @@ static void _fb_trace(const char *m, const UnicodeString *s, UBool b, int32_t d,
  if(s) {
    s->extract(0,s->length(),buf,2048);
  } else {
-    strcpy(buf,"NULL");
+    strcpy(buf,"nullptr");
  }
  fprintf(stderr,"%s:%d: %s. s='%s'(%p), b=%c, d=%d\n",
          f, l, m, buf, (const void*)s, b?'T':'F',(int)d);
@@ -48,16 +49,16 @@ static void _fb_trace(const char *m, const UnicodeString *s, UBool b, int32_t d,
 /**
 * Used with sortedInsert()
 */
-static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
-    const UnicodeString &a = *(const UnicodeString*)t1.pointer;
-    const UnicodeString &b = *(const UnicodeString*)t2.pointer;
+static int32_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
+    const UnicodeString& a = *static_cast<const UnicodeString*>(t1.pointer);
+    const UnicodeString& b = *static_cast<const UnicodeString*>(t2.pointer);
    return a.compare(b);
 }

 /**
 * A UVector which implements a set of strings.
 */
-class U_COMMON_API UStringSet : public UVector {
+class UStringSet : public UVector {
 public:
  UStringSet(UErrorCode &status) : UVector(uprv_deleteUObject,
                                           uhash_compareUnicodeString,
@@ -75,7 +76,7 @@ class U_COMMON_API UStringSet : public UVector {
   * Return the ith UnicodeString alias
   */
  inline const UnicodeString* getStringAt(int32_t i) const {
-    return (const UnicodeString*)elementAt(i);
+    return static_cast<const UnicodeString*>(elementAt(i));
  }
  /**
   * Adopt the UnicodeString if not already contained.
@@ -89,7 +90,6 @@ class U_COMMON_API UStringSet : public UVector {
    } else {
      sortedInsert(str, compareUnicodeString, status);
      if(U_FAILURE(status)) {
-        delete str;
        return false;
      }
      return true;
@@ -102,7 +102,7 @@ class U_COMMON_API UStringSet : public UVector {
  inline UBool add(const UnicodeString& str, UErrorCode &status) {
    if(U_FAILURE(status)) return false;
    UnicodeString *t = new UnicodeString(str);
-    if(t==NULL) {
+    if(t==nullptr) {
      status = U_MEMORY_ALLOCATION_ERROR; return false;
    }
    return adopt(t, status);
@@ -130,7 +130,7 @@ static const int32_t kPARTIAL = (1<<0); //< partial - need to run through forwar
 static const int32_t kMATCH   = (1<<1); //< exact match - skip this one.
 static const int32_t kSuppressInReverse = (1<<0);
 static const int32_t kAddToForward = (1<<1);
-static const UChar   kFULLSTOP = 0x002E; // '.'
+static const char16_t kFULLSTOP = 0x002E; // '.'

 /**
 * Shared data for SimpleFilteredSentenceBreakIterator
@@ -139,13 +139,30 @@ class SimpleFilteredSentenceBreakData : public UMemory {
 public:
  SimpleFilteredSentenceBreakData(UCharsTrie *forwards, UCharsTrie *backwards ) 
      : fForwardsPartialTrie(forwards), fBackwardsTrie(backwards), refcount(1) { }
-  SimpleFilteredSentenceBreakData *incr() { refcount++;  return this; }
-  SimpleFilteredSentenceBreakData *decr() { if((--refcount) <= 0) delete this; return 0; }
-  virtual ~SimpleFilteredSentenceBreakData();
+    SimpleFilteredSentenceBreakData *incr() {
+        umtx_atomic_inc(&refcount);
+        return this;
+    }
+    SimpleFilteredSentenceBreakData *decr() {
+        if(umtx_atomic_dec(&refcount) <= 0) {
+            delete this;
+        }
+        return nullptr;
+    }
+    virtual ~SimpleFilteredSentenceBreakData();

-  LocalPointer<UCharsTrie>    fForwardsPartialTrie; //  Has ".a" for "a.M."
-  LocalPointer<UCharsTrie>    fBackwardsTrie; //  i.e. ".srM" for Mrs.
-  int32_t                     refcount;
+    bool hasForwardsPartialTrie() const { return fForwardsPartialTrie.isValid(); }
+    bool hasBackwardsTrie() const { return fBackwardsTrie.isValid(); }
+
+    const UCharsTrie &getForwardsPartialTrie() const { return *fForwardsPartialTrie; }
+    const UCharsTrie &getBackwardsTrie() const { return *fBackwardsTrie; }
+
+private:
+    // These tries own their data arrays.
+    // They are shared and must therefore not be modified.
+    LocalPointer<UCharsTrie>    fForwardsPartialTrie; //  Has ".a" for "a.M."
+    LocalPointer<UCharsTrie>    fBackwardsTrie; //  i.e. ".srM" for Mrs.
+    u_atomic_int32_t            refcount;
 };

 SimpleFilteredSentenceBreakData::~SimpleFilteredSentenceBreakData() {}
@@ -168,37 +185,37 @@ public:
  /* -- cloning and other subclass stuff -- */
  virtual BreakIterator *  createBufferClone(void * /*stackBuffer*/,
                                             int32_t &/*BufferSize*/,
-                                             UErrorCode &status) {
+                                             UErrorCode &status) override {
    // for now - always deep clone
    status = U_SAFECLONE_ALLOCATED_WARNING;
    return clone();
  }
-  virtual BreakIterator* clone(void) const { return new SimpleFilteredSentenceBreakIterator(*this); }
-  virtual UClassID getDynamicClassID(void) const { return NULL; }
-  virtual UBool operator==(const BreakIterator& o) const { if(this==&o) return true; return false; }
+  virtual SimpleFilteredSentenceBreakIterator* clone() const override { return new SimpleFilteredSentenceBreakIterator(*this); }
+  virtual UClassID getDynamicClassID() const override { return nullptr; }
+  virtual bool operator==(const BreakIterator& o) const override { if(this==&o) return true; return false; }

  /* -- text modifying -- */
-  virtual void setText(UText *text, UErrorCode &status) { fDelegate->setText(text,status); }
-  virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) { fDelegate->refreshInputText(input,status); return *this; }
-  virtual void adoptText(CharacterIterator* it) { fDelegate->adoptText(it); }
-  virtual void setText(const UnicodeString &text) { fDelegate->setText(text); }
+  virtual void setText(UText *text, UErrorCode &status) override { fDelegate->setText(text,status); }
+  virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) override { fDelegate->refreshInputText(input,status); return *this; }
+  virtual void adoptText(CharacterIterator* it) override { fDelegate->adoptText(it); }
+  virtual void setText(const UnicodeString &text) override { fDelegate->setText(text); }

  /* -- other functions that are just delegated -- */
-  virtual UText *getUText(UText *fillIn, UErrorCode &status) const { return fDelegate->getUText(fillIn,status); }
-  virtual CharacterIterator& getText(void) const { return fDelegate->getText(); }
+  virtual UText *getUText(UText *fillIn, UErrorCode &status) const override { return fDelegate->getUText(fillIn,status); }
+  virtual CharacterIterator& getText() const override { return fDelegate->getText(); }

  /* -- ITERATION -- */
-  virtual int32_t first(void);
-  virtual int32_t preceding(int32_t offset);
-  virtual int32_t previous(void);
-  virtual UBool isBoundary(int32_t offset);
-  virtual int32_t current(void) const { return fDelegate->current(); } // we keep the delegate current, so this should be correct.
+  virtual int32_t first() override;
+  virtual int32_t preceding(int32_t offset) override;
+  virtual int32_t previous() override;
+  virtual UBool isBoundary(int32_t offset) override;
+  virtual int32_t current() const override { return fDelegate->current(); } // we keep the delegate current, so this should be correct.

-  virtual int32_t next(void);
+  virtual int32_t next() override;

-  virtual int32_t next(int32_t n);
-  virtual int32_t following(int32_t offset);
-  virtual int32_t last(void);
+  virtual int32_t next(int32_t n) override;
+  virtual int32_t following(int32_t offset) override;
+  virtual int32_t last() override;

 private:
    /**
@@ -244,7 +261,13 @@ SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIt
  fData(new SimpleFilteredSentenceBreakData(forwards, backwards)),
  fDelegate(adopt)
 {
-  // all set..
+    if (fData == nullptr) {
+        delete forwards;
+        delete backwards;
+        if (U_SUCCESS(status)) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+        }
+    }
 }

 SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() {
@@ -261,68 +284,71 @@ SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
    int32_t bestValue = -1;
    // loops while 'n' points to an exception.
    utext_setNativeIndex(fText.getAlias(), n); // from n..
-    fData->fBackwardsTrie->reset();
-    UChar32 uch;

    //if(debug2) u_printf(" n@ %d\n", n);
    // Assume a space is following the '.'  (so we handle the case:  "Mr. /Brown")
-    if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) {  // TODO: skip a class of chars here??
+    if(utext_previous32(fText.getAlias())==u' ') {  // TODO: skip a class of chars here??
      // TODO only do this the 1st time?
-      //if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch);
+      //if(debug2) u_printf("skipping prev: |%C| \n", (char16_t)uch);
    } else {
-      //if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch);
-      uch = utext_next32(fText.getAlias());
-      //if(debug2) u_printf(" -> : |%C| \n", (UChar)uch);
+      //if(debug2) u_printf("not skipping prev: |%C| \n", (char16_t)uch);
+      utext_next32(fText.getAlias());
+      //if(debug2) u_printf(" -> : |%C| \n", (char16_t)uch);
    }

-    UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE;
-
-    while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL  &&   // more to consume backwards and..
-          USTRINGTRIE_HAS_NEXT(r=fData->fBackwardsTrie->nextForCodePoint(uch))) {// more in the trie
-      if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
-        bestPosn = utext_getNativeIndex(fText.getAlias());
-        bestValue = fData->fBackwardsTrie->getValue();
-      }
-      //if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias()));
+    {
+        // Do not modify the shared trie!
+        UCharsTrie iter(fData->getBackwardsTrie());
+        UChar32 uch;
+        while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL) {  // more to consume backwards
+            UStringTrieResult r = iter.nextForCodePoint(uch);
+            if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
+                bestPosn = utext_getNativeIndex(fText.getAlias());
+                bestValue = iter.getValue();
+            }
+            if(!USTRINGTRIE_HAS_NEXT(r)) {
+                break;
+            }
+            //if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (char16_t)uch, r, utext_getNativeIndex(fText.getAlias()));
+        }
    }

-    if(USTRINGTRIE_MATCHES(r)) { // exact match?
-      //if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
-      bestValue = fData->fBackwardsTrie->getValue();
-      bestPosn = utext_getNativeIndex(fText.getAlias());
-      //if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
-    }
+    //if(bestValue >= 0) {
+        //if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (char16_t)uch, r, bestPosn, bestValue);
+    //}

    if(bestPosn>=0) {
-      //if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
+      //if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (char16_t)uch, r, bestPosn, bestValue);

      //if(USTRINGTRIE_MATCHES(r)) {  // matched - so, now what?
-      //int32_t bestValue = fBackwardsTrie->getValue();
-      ////if(debug2) u_printf("rev< /%C/ matched, skip..%d  bestValue=%d\n", (UChar)uch, r, bestValue);
+      //int32_t bestValue = iter.getValue();
+      ////if(debug2) u_printf("rev< /%C/ matched, skip..%d  bestValue=%d\n", (char16_t)uch, r, bestValue);

      if(bestValue == kMATCH) { // exact match!
        //if(debug2) u_printf(" exact backward match\n");
        return kExceptionHere; // See if the next is another exception.
      } else if(bestValue == kPARTIAL
-                && fData->fForwardsPartialTrie.isValid()) { // make sure there's a forward trie
+                && fData->hasForwardsPartialTrie()) { // make sure there's a forward trie
        //if(debug2) u_printf(" partial backward match\n");
        // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
        // to see if it matches something going forward.
-        fData->fForwardsPartialTrie->reset();
        UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE;
        utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close ..
        //if(debug2) u_printf("Retrying at %d\n", bestPosn);
+        // Do not modify the shared trie!
+        UCharsTrie iter(fData->getForwardsPartialTrie());
+        UChar32 uch;
        while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL &&
-              USTRINGTRIE_HAS_NEXT(rfwd=fData->fForwardsPartialTrie->nextForCodePoint(uch))) {
-          //if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, utext_getNativeIndex(fText.getAlias()));
+              USTRINGTRIE_HAS_NEXT(rfwd=iter.nextForCodePoint(uch))) {
+          //if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (char16_t)uch, rfwd, utext_getNativeIndex(fText.getAlias()));
        }
        if(USTRINGTRIE_MATCHES(rfwd)) {
-          //if(debug2) u_printf("fwd> /%C/ == forward match!\n", (UChar)uch);
+          //if(debug2) u_printf("fwd> /%C/ == forward match!\n", (char16_t)uch);
          // only full matches here, nothing to check
          // skip the next:
            return kExceptionHere;
        } else {
-          //if(debug2) u_printf("fwd> /%C/ no match.\n", (UChar)uch);
+          //if(debug2) u_printf("fwd> /%C/ no match.\n", (char16_t)uch);
          // no match (no exception) -return the 'underlying' break
          return kNoExceptionHere;
        }
@@ -330,7 +356,7 @@ SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
        return kNoExceptionHere; // internal error and/or no forwards trie
      }
    } else {
-      //if(debug2) u_printf("rev< /%C/ .. no match..%d\n", (UChar)uch, r);  // no best match
+      //if(debug2) u_printf("rev< /%C/ .. no match..%d\n", (char16_t)uch, r);  // no best match
      return kNoExceptionHere; // No match - so exit. Not an exception.
    }
 }
@@ -339,7 +365,7 @@ SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
 int32_t
 SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) {
  if(n == UBRK_DONE || // at end  or
-    fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
+    !fData->hasBackwardsTrie()) { // .. no backwards table loaded == no exceptions
      return n;
  }
  // OK, do we need to break here?
@@ -369,7 +395,7 @@ SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) {
 int32_t
 SimpleFilteredSentenceBreakIterator::internalPrev(int32_t n) {
  if(n == 0 || n == UBRK_DONE || // at end  or
-    fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
+    !fData->hasBackwardsTrie()) { // .. no backwards table loaded == no exceptions
      return n;
  }
  // OK, do we need to break here?
@@ -402,7 +428,7 @@ SimpleFilteredSentenceBreakIterator::next() {
 }

 int32_t
-SimpleFilteredSentenceBreakIterator::first(void) {
+SimpleFilteredSentenceBreakIterator::first() {
  // Don't suppress a break opportunity at the beginning of text.
  return fDelegate->first();
 }
@@ -413,14 +439,14 @@ SimpleFilteredSentenceBreakIterator::preceding(int32_t offset) {
 }

 int32_t
-SimpleFilteredSentenceBreakIterator::previous(void) {
+SimpleFilteredSentenceBreakIterator::previous() {
  return internalPrev(fDelegate->previous());
 }

 UBool SimpleFilteredSentenceBreakIterator::isBoundary(int32_t offset) {
  if (!fDelegate->isBoundary(offset)) return false; // no break to suppress

-  if (fData->fBackwardsTrie.isNull()) return true; // no data = no suppressions
+  if (!fData->hasBackwardsTrie()) return true; // no data = no suppressions

  UErrorCode status = U_ZERO_ERROR;
  resetState(status);
@@ -447,7 +473,7 @@ SimpleFilteredSentenceBreakIterator::following(int32_t offset) {
 }

 int32_t
-SimpleFilteredSentenceBreakIterator::last(void) {
+SimpleFilteredSentenceBreakIterator::last() {
  // Don't suppress a break opportunity at the end of text.
  return fDelegate->last();
 }
@@ -456,14 +482,14 @@ SimpleFilteredSentenceBreakIterator::last(void) {
 /**
 * Concrete implementation of builder class.
 */
-class U_COMMON_API SimpleFilteredBreakIteratorBuilder : public FilteredBreakIteratorBuilder {
+class SimpleFilteredBreakIteratorBuilder : public FilteredBreakIteratorBuilder {
 public:
  virtual ~SimpleFilteredBreakIteratorBuilder();
  SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status);
  SimpleFilteredBreakIteratorBuilder(UErrorCode &status);
-  virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
-  virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
-  virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status);
+  virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& status) override;
+  virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status) override;
+  virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) override;
 private:
  UStringSet fSet;
 };
@@ -490,7 +516,7 @@ SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(const Loc
 #endif
      return;  // leaves the builder empty, if you try to use it.
    }
-    LocalUResourceBundlePointer exceptions(ures_getByKeyWithFallback(b.getAlias(), "exceptions", NULL, &subStatus));
+    LocalUResourceBundlePointer exceptions(ures_getByKeyWithFallback(b.getAlias(), "exceptions", nullptr, &subStatus));
    if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) {    
      status = subStatus; // copy the failing status 
 #if FB_DEBUG
@@ -498,7 +524,7 @@ SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(const Loc
 #endif
      return;  // leaves the builder empty, if you try to use it.
    }
-    LocalUResourceBundlePointer breaks(ures_getByKeyWithFallback(exceptions.getAlias(), "SentenceBreak", NULL, &subStatus));
+    LocalUResourceBundlePointer breaks(ures_getByKeyWithFallback(exceptions.getAlias(), "SentenceBreak", nullptr, &subStatus));

 #if FB_DEBUG
    {
@@ -564,7 +590,7 @@ SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr
  LocalPointer<UCharsTrieBuilder> builder(new UCharsTrieBuilder(status), status);
  LocalPointer<UCharsTrieBuilder> builder2(new UCharsTrieBuilder(status), status);
  if(U_FAILURE(status)) {
-    return NULL;
+    return nullptr;
  }

  int32_t revCount = 0;
@@ -588,13 +614,13 @@ SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr
        i++) {
    const UnicodeString *abbr = fSet.getStringAt(i);
    if(abbr) {
-      FB_TRACE("build",abbr,TRUE,i);
+      FB_TRACE("build",abbr,true,i);
      ustrs[n] = *abbr; // copy by value
-      FB_TRACE("ustrs[n]",&ustrs[n],TRUE,i);
+      FB_TRACE("ustrs[n]",&ustrs[n],true,i);
    } else {
-      FB_TRACE("build",abbr,FALSE,i);
+      FB_TRACE("build",abbr,false,i);
      status = U_MEMORY_ALLOCATION_ERROR;
-      return NULL;
+      return nullptr;
    }
    partials[n] = 0; // default: not partial
    n++;
@@ -603,37 +629,37 @@ SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr
  for(int i=0;i<subCount;i++) {
    int nn = ustrs[i].indexOf(kFULLSTOP); // TODO: non-'.' abbreviations
    if(nn>-1 && (nn+1)!=ustrs[i].length()) {
-      FB_TRACE("partial",&ustrs[i],FALSE,i);
+      FB_TRACE("partial",&ustrs[i],false,i);
      // is partial.
      // is it unique?
      int sameAs = -1;
      for(int j=0;j<subCount;j++) {
        if(j==i) continue;
        if(ustrs[i].compare(0,nn+1,ustrs[j],0,nn+1)==0) {
-          FB_TRACE("prefix",&ustrs[j],FALSE,nn+1);
+          FB_TRACE("prefix",&ustrs[j],false,nn+1);
          //UBool otherIsPartial = ((nn+1)!=ustrs[j].length());  // true if ustrs[j] doesn't end at nn
          if(partials[j]==0) { // hasn't been processed yet
            partials[j] = kSuppressInReverse | kAddToForward;
-            FB_TRACE("suppressing",&ustrs[j],FALSE,j);
+            FB_TRACE("suppressing",&ustrs[j],false,j);
          } else if(partials[j] & kSuppressInReverse) {
            sameAs = j; // the other entry is already in the reverse table.
          }
        }
      }
-      FB_TRACE("for partial same-",&ustrs[i],FALSE,sameAs);
-      FB_TRACE(" == partial #",&ustrs[i],FALSE,partials[i]);
+      FB_TRACE("for partial same-",&ustrs[i],false,sameAs);
+      FB_TRACE(" == partial #",&ustrs[i],false,partials[i]);
      UnicodeString prefix(ustrs[i], 0, nn+1);
      if(sameAs == -1 && partials[i] == 0) {
        // first one - add the prefix to the reverse table.
        prefix.reverse();
        builder->add(prefix, kPARTIAL, status);
        revCount++;
-        FB_TRACE("Added partial",&prefix,FALSE, i);
-        FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i);
+        FB_TRACE("Added partial",&prefix,false, i);
+        FB_TRACE(u_errorName(status),&ustrs[i],false,i);
        partials[i] = kSuppressInReverse | kAddToForward;
      } else {
-        FB_TRACE("NOT adding partial",&prefix,FALSE, i);
-        FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i);
+        FB_TRACE("NOT adding partial",&prefix,false, i);
+        FB_TRACE(u_errorName(status),&ustrs[i],false,i);
      }
    }
  }
@@ -642,9 +668,9 @@ SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr
      ustrs[i].reverse();
      builder->add(ustrs[i], kMATCH, status);
      revCount++;
-      FB_TRACE(u_errorName(status), &ustrs[i], FALSE, i);
+      FB_TRACE(u_errorName(status), &ustrs[i], false, i);
    } else {
-      FB_TRACE("Adding fwd",&ustrs[i], FALSE, i);
+      FB_TRACE("Adding fwd",&ustrs[i], false, i);

      // an optimization would be to only add the portion after the '.'
      // for example, for "Ph.D." we store ".hP" in the reverse table. We could just store "D." in the forward,
@@ -656,21 +682,21 @@ SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr
      ////if(debug2) u_printf("SUPPRESS- not Added(%d):  /%S/ status=%s\n",partials[i], ustrs[i].getTerminatedBuffer(), u_errorName(status));
    }
  }
-  FB_TRACE("AbbrCount",NULL,FALSE, subCount);
+  FB_TRACE("AbbrCount",nullptr,false, subCount);

  if(revCount>0) {
    backwardsTrie.adoptInstead(builder->build(USTRINGTRIE_BUILD_FAST, status));
    if(U_FAILURE(status)) {
-      FB_TRACE(u_errorName(status),NULL,FALSE, -1);
-      return NULL;
+      FB_TRACE(u_errorName(status),nullptr,false, -1);
+      return nullptr;
    }
  }

  if(fwdCount>0) {
    forwardsPartialTrie.adoptInstead(builder2->build(USTRINGTRIE_BUILD_FAST, status));
    if(U_FAILURE(status)) {
-      FB_TRACE(u_errorName(status),NULL,FALSE, -1);
-      return NULL;
+      FB_TRACE(u_errorName(status),nullptr,false, -1);
+      return nullptr;
    }
  }

@@ -688,9 +714,9 @@ FilteredBreakIteratorBuilder::~FilteredBreakIteratorBuilder() {

 FilteredBreakIteratorBuilder *
 FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& status) {
-  if(U_FAILURE(status)) return NULL;
+  if(U_FAILURE(status)) return nullptr;
  LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(where, status), status);
-  return (U_SUCCESS(status))? ret.orphan(): NULL;
+  return (U_SUCCESS(status))? ret.orphan(): nullptr;
 }

 FilteredBreakIteratorBuilder *
@@ -700,9 +726,9 @@ FilteredBreakIteratorBuilder::createInstance(UErrorCode &status) {

 FilteredBreakIteratorBuilder *
 FilteredBreakIteratorBuilder::createEmptyInstance(UErrorCode& status) {
-  if(U_FAILURE(status)) return NULL;
+  if(U_FAILURE(status)) return nullptr;
  LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(status), status);
-  return (U_SUCCESS(status))? ret.orphan(): NULL;
+  return (U_SUCCESS(status))? ret.orphan(): nullptr;
 }

 U_NAMESPACE_END
@@ -137,14 +137,14 @@ UnicodeString &
 FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
                                              const UnicodeString &second,
                                              UErrorCode &errorCode) const {
-    return normalizeSecondAndAppend(first, second, TRUE, errorCode);
+    return normalizeSecondAndAppend(first, second, true, errorCode);
 }

 UnicodeString &
 FilteredNormalizer2::append(UnicodeString &first,
                            const UnicodeString &second,
                            UErrorCode &errorCode) const {
-    return normalizeSecondAndAppend(first, second, FALSE, errorCode);
+    return normalizeSecondAndAppend(first, second, false, errorCode);
 }

 UnicodeString &
@@ -224,7 +224,7 @@ UBool
 FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
    uprv_checkCanGetBuffer(s, errorCode);
    if(U_FAILURE(errorCode)) {
-        return FALSE;
+        return false;
    }
    USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
    for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
@@ -235,19 +235,19 @@ FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode)
            if( !norm2.isNormalized(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode) ||
                U_FAILURE(errorCode)
            ) {
-                return FALSE;
+                return false;
            }
            spanCondition=USET_SPAN_NOT_CONTAINED;
        }
        prevSpanLimit=spanLimit;
    }
-    return TRUE;
+    return true;
 }

 UBool
 FilteredNormalizer2::isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const {
    if(U_FAILURE(errorCode)) {
-        return FALSE;
+        return false;
    }
    const char *s = sp.data();
    int32_t length = sp.length();
@@ -259,14 +259,14 @@ FilteredNormalizer2::isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) con
        } else {
            if (!norm2.isNormalizedUTF8(StringPiece(s, spanLength), errorCode) ||
                    U_FAILURE(errorCode)) {
-                return FALSE;
+                return false;
            }
            spanCondition = USET_SPAN_NOT_CONTAINED;
        }
        s += spanLength;
        length -= spanLength;
    }
-    return TRUE;
+    return true;
 }

 UNormalizationCheckResult
@@ -346,15 +346,15 @@ U_NAMESPACE_USE
 U_CAPI UNormalizer2 * U_EXPORT2
 unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode) {
    if(U_FAILURE(*pErrorCode)) {
-        return NULL;
+        return nullptr;
    }
-    if(filterSet==NULL) {
+    if(filterSet==nullptr) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
-        return NULL;
+        return nullptr;
    }
    Normalizer2 *fn2=new FilteredNormalizer2(*(Normalizer2 *)norm2,
                                             *UnicodeSet::fromUSet(filterSet));
-    if(fn2==NULL) {
+    if(fn2==nullptr) {
        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    }
    return (UNormalizer2 *)fn2;
@@ -0,0 +1,29 @@
+// © 2025 and later: Unicode, Inc. and others.
+// License & terms of use: https://www.unicode.org/copyright.html
+
+#include "fixedstring.h"
+
+#include "unicode/unistr.h"
+#include "unicode/utypes.h"
+
+U_NAMESPACE_BEGIN
+
+U_EXPORT void copyInvariantChars(const UnicodeString& src, FixedString& dst, UErrorCode& status) {
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    if (src.isEmpty()) {
+        dst.clear();
+        return;
+    }
+
+    int32_t length = src.length();
+    if (!dst.reserve(length + 1)) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+    src.extract(0, length, dst.getAlias(), length + 1, US_INV);
+}
+
+U_NAMESPACE_END
@@ -0,0 +1,104 @@
+// © 2025 and later: Unicode, Inc. and others.
+// License & terms of use: https://www.unicode.org/copyright.html
+
+#ifndef FIXEDSTRING_H
+#define FIXEDSTRING_H
+
+#include <string_view>
+#include <utility>
+
+#include "unicode/uobject.h"
+#include "unicode/utypes.h"
+#include "cmemory.h"
+
+U_NAMESPACE_BEGIN
+
+class UnicodeString;
+
+/**
+ * ICU-internal fixed-length char* string class.
+ * This is a complement to CharString to store fixed-length strings efficiently
+ * (not allocating any unnecessary storage for future additions to the string).
+ *
+ * A terminating NUL is always stored, but the length of the string isn't.
+ * An empty string is stored as nullptr, allocating no storage at all.
+ *
+ * This class wants to be convenient but is also deliberately minimalist.
+ * Please do not add methods if they only add minor convenience.
+ */
+class FixedString : public UMemory {
+public:
+    FixedString() = default;
+    ~FixedString() { operator delete[](ptr); }
+
+    FixedString(const FixedString& other) : FixedString(other.data()) {}
+
+    FixedString(std::string_view init) {
+        size_t size = init.size();
+        if (size > 0 && reserve(size + 1)) {
+            uprv_memcpy(ptr, init.data(), size);
+            ptr[size] = '\0';
+        }
+    }
+
+    FixedString& operator=(const FixedString& other) {
+        *this = other.data();
+        return *this;
+    }
+
+    FixedString& operator=(std::string_view init) {
+        if (init.empty()) {
+            operator delete[](ptr);
+            ptr = nullptr;
+        } else {
+            size_t size = init.size();
+            if (reserve(size + 1)) {
+                uprv_memcpy(ptr, init.data(), size);
+                ptr[size] = '\0';
+            }
+        }
+        return *this;
+    }
+
+    FixedString(FixedString&& other) noexcept : ptr(std::exchange(other.ptr, nullptr)) {}
+
+    FixedString& operator=(FixedString&& other) noexcept {
+        operator delete[](ptr);
+        ptr = other.ptr;
+        other.ptr = nullptr;
+        return *this;
+    }
+
+    void clear() {
+        operator delete[](ptr);
+        ptr = nullptr;
+    }
+
+    const char* data() const {
+        return isEmpty() ? "" : ptr;
+    }
+
+    char* getAlias() {
+        return ptr;
+    }
+
+    bool isEmpty() const {
+        return ptr == nullptr;
+    }
+
+    /** Allocate storage for a new string, without initializing it. */
+    bool reserve(size_t size) {
+        operator delete[](ptr);
+        ptr = static_cast<char*>(operator new[](size));
+        return ptr != nullptr;
+    }
+
+private:
+    char* ptr = nullptr;
+};
+
+U_COMMON_API void copyInvariantChars(const UnicodeString& src, FixedString& dst, UErrorCode& status);
+
+U_NAMESPACE_END
+
+#endif
@@ -41,7 +41,7 @@ public:
     * @param ignoreKeyCase If true, keys are case insensitive.
     * @param status Error code
    */
-    Hashtable(UBool ignoreKeyCase, UErrorCode& status);
+    inline Hashtable(UBool ignoreKeyCase, UErrorCode& status);

    /**
     * Construct a hashtable
@@ -49,7 +49,7 @@ public:
     * @param size initial size allocation
     * @param status Error code
    */
-    Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status);
+    inline Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status);

    /**
     * Construct a hashtable
@@ -57,60 +57,66 @@ public:
     * @param valueComp Comparator for comparing the values
     * @param status Error code
    */
-    Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);
+    inline Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);

    /**
     * Construct a hashtable
     * @param status Error code
    */
-    Hashtable(UErrorCode& status);
+    inline Hashtable(UErrorCode& status);

    /**
     * Construct a hashtable, _disregarding any error_.  Use this constructor
     * with caution.
     */
-    Hashtable();
+    inline Hashtable();

    /**
     * Non-virtual destructor; make this virtual if Hashtable is subclassed
     * in the future.
     */
-    ~Hashtable();
+    inline ~Hashtable();

-    UObjectDeleter *setValueDeleter(UObjectDeleter *fn);
+    inline UObjectDeleter *setValueDeleter(UObjectDeleter *fn);

-    int32_t count() const;
+    inline int32_t count() const;

-    void* put(const UnicodeString& key, void* value, UErrorCode& status);
+    inline void* put(const UnicodeString& key, void* value, UErrorCode& status);

-    int32_t puti(const UnicodeString& key, int32_t value, UErrorCode& status);
+    inline int32_t puti(const UnicodeString& key, int32_t value, UErrorCode& status);

-    void* get(const UnicodeString& key) const;
+    inline int32_t putiAllowZero(const UnicodeString& key, int32_t value, UErrorCode& status);

-    int32_t geti(const UnicodeString& key) const;
+    inline void* get(const UnicodeString& key) const;

-    void* remove(const UnicodeString& key);
+    inline int32_t geti(const UnicodeString& key) const;

-    int32_t removei(const UnicodeString& key);
+    inline int32_t getiAndFound(const UnicodeString& key, UBool &found) const;

-    void removeAll(void);
+    inline void* remove(const UnicodeString& key);

-    const UHashElement* find(const UnicodeString& key) const;
+    inline int32_t removei(const UnicodeString& key);
+
+    inline void removeAll();
+
+    inline UBool containsKey(const UnicodeString& key) const;
+
+    inline const UHashElement* find(const UnicodeString& key) const;

    /**
     * @param pos - must be UHASH_FIRST on first call, and untouched afterwards.
     * @see uhash_nextElement
     */
-    const UHashElement* nextElement(int32_t& pos) const;
+    inline const UHashElement* nextElement(int32_t& pos) const;

-    UKeyComparator* setKeyComparator(UKeyComparator*keyComp);
+    inline UKeyComparator* setKeyComparator(UKeyComparator*keyComp);

-    UValueComparator* setValueComparator(UValueComparator* valueComp);
+    inline UValueComparator* setValueComparator(UValueComparator* valueComp);

-    UBool equals(const Hashtable& that) const;
+    inline UBool equals(const Hashtable& that) const;
 private:
-    Hashtable(const Hashtable &other); // forbid copying of this class
-    Hashtable &operator=(const Hashtable &other); // forbid copying of this class
+    Hashtable(const Hashtable &other) = delete; // forbid copying of this class
+    Hashtable &operator=(const Hashtable &other) = delete; // forbid copying of this class
 };

 /*********************************************************************
@@ -142,47 +148,47 @@ inline void Hashtable::initSize(UHashFunction *keyHash, UKeyComparator *keyComp,
 }

 inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp,
-                 UErrorCode& status) : hash(0) {
+                 UErrorCode& status) : hash(nullptr) {
    init( uhash_hashUnicodeString, keyComp, valueComp, status);
 }

 inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
- : hash(0)
+ : hash(nullptr)
 {
    init(ignoreKeyCase ? uhash_hashCaselessUnicodeString
                        : uhash_hashUnicodeString,
            ignoreKeyCase ? uhash_compareCaselessUnicodeString
                        : uhash_compareUnicodeString,
-            NULL,
+            nullptr,
            status);
 }

 inline Hashtable::Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status)
- : hash(0)
+ : hash(nullptr)
 {
    initSize(ignoreKeyCase ? uhash_hashCaselessUnicodeString
                        : uhash_hashUnicodeString,
            ignoreKeyCase ? uhash_compareCaselessUnicodeString
                        : uhash_compareUnicodeString,
-            NULL, size,
+            nullptr, size,
            status);
 }

 inline Hashtable::Hashtable(UErrorCode& status)
- : hash(0)
+ : hash(nullptr)
 {
-    init(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, status);
+    init(uhash_hashUnicodeString, uhash_compareUnicodeString, nullptr, status);
 }

 inline Hashtable::Hashtable()
- : hash(0)
+ : hash(nullptr)
 {
    UErrorCode status = U_ZERO_ERROR;
-    init(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, status);
+    init(uhash_hashUnicodeString, uhash_compareUnicodeString, nullptr, status);
 }

 inline Hashtable::~Hashtable() {
-    if (hash != NULL) {
+    if (hash != nullptr) {
        uhash_close(hash);
    }
 }
@@ -203,6 +209,11 @@ inline int32_t Hashtable::puti(const UnicodeString& key, int32_t value, UErrorCo
    return uhash_puti(hash, new UnicodeString(key), value, &status);
 }

+inline int32_t Hashtable::putiAllowZero(const UnicodeString& key, int32_t value,
+                                        UErrorCode& status) {
+    return uhash_putiAllowZero(hash, new UnicodeString(key), value, &status);
+}
+
 inline void* Hashtable::get(const UnicodeString& key) const {
    return uhash_get(hash, &key);
 }
@@ -211,6 +222,10 @@ inline int32_t Hashtable::geti(const UnicodeString& key) const {
    return uhash_geti(hash, &key);
 }

+inline int32_t Hashtable::getiAndFound(const UnicodeString& key, UBool &found) const {
+    return uhash_getiAndFound(hash, &key, &found);
+}
+
 inline void* Hashtable::remove(const UnicodeString& key) {
    return uhash_remove(hash, &key);
 }
@@ -219,6 +234,10 @@ inline int32_t Hashtable::removei(const UnicodeString& key) {
    return uhash_removei(hash, &key);
 }

+inline UBool Hashtable::containsKey(const UnicodeString& key) const {
+    return uhash_containsKey(hash, &key);
+}
+
 inline const UHashElement* Hashtable::find(const UnicodeString& key) const {
    return uhash_find(hash, &key);
 }
@@ -227,7 +246,7 @@ inline const UHashElement* Hashtable::nextElement(int32_t& pos) const {
    return uhash_nextElement(hash, &pos);
 }

-inline void Hashtable::removeAll(void) {
+inline void Hashtable::removeAll() {
    uhash_removeAll(hash);
 }

@@ -15,14 +15,14 @@
 #include "uresimp.h" /* for ures_getVersionByKey */

 U_CAPI void U_EXPORT2 u_getDataVersion(UVersionInfo dataVersionFillin, UErrorCode *status) {
-    UResourceBundle *icudatares = NULL;
+    UResourceBundle *icudatares = nullptr;
    
    if (U_FAILURE(*status)) {
        return;
    }
    
-    if (dataVersionFillin != NULL) {
-        icudatares = ures_openDirect(NULL, U_ICU_VERSION_BUNDLE , status);
+    if (dataVersionFillin != nullptr) {
+        icudatares = ures_openDirect(nullptr, U_ICU_VERSION_BUNDLE , status);
        if (U_SUCCESS(*status)) {
            ures_getVersionByKey(icudatares, U_ICU_DATA_KEY, dataVersionFillin, status);
        }
@@ -52,15 +52,15 @@ struct UPlugData {
  UPlugEntrypoint  *entrypoint; /**< plugin entrypoint */
  uint32_t structSize;    /**< initialized to the size of this structure */
  uint32_t token;         /**< must be U_PLUG_TOKEN */
-  void *lib;              /**< plugin library, or NULL */
+  void *lib;              /**< plugin library, or nullptr */
  char libName[UPLUG_NAME_MAX];   /**< library name */
-  char sym[UPLUG_NAME_MAX];        /**< plugin symbol, or NULL */
+  char sym[UPLUG_NAME_MAX];        /**< plugin symbol, or nullptr */
  char config[UPLUG_NAME_MAX];     /**< configuration data */
  void *context;          /**< user context data */
  char name[UPLUG_NAME_MAX];   /**< name of plugin */
  UPlugLevel  level; /**< level of plugin */
-  UBool   awaitingLoad; /**< TRUE if the plugin is awaiting a load call */
-  UBool   dontUnload; /**< TRUE if plugin must stay resident (leak plugin and lib) */
+  UBool   awaitingLoad; /**< true if the plugin is awaiting a load call */
+  UBool   dontUnload; /**< true if plugin must stay resident (leak plugin and lib) */
  UErrorCode pluginStatus; /**< status code of plugin */
 };

@@ -145,12 +145,12 @@ static int32_t searchForLibrary(void *lib) {
  return -1;
 }

-U_INTERNAL char * U_EXPORT2
+U_CAPI char * U_EXPORT2
 uplug_findLibrary(void *lib, UErrorCode *status) {
  int32_t libEnt;
-  char *ret = NULL;
+  char *ret = nullptr;
  if(U_FAILURE(*status)) {
-    return NULL;
+    return nullptr;
  }
  libEnt = searchForLibrary(lib);
  if(libEnt!=-1) { 
@@ -161,12 +161,12 @@ uplug_findLibrary(void *lib, UErrorCode *status) {
  return ret;
 }

-U_INTERNAL void * U_EXPORT2
+U_CAPI void * U_EXPORT2
 uplug_openLibrary(const char *libName, UErrorCode *status) {
  int32_t libEntry = -1;
-  void *lib = NULL;
+  void *lib = nullptr;
    
-  if(U_FAILURE(*status)) return NULL;
+  if(U_FAILURE(*status)) return nullptr;

  libEntry = searchForLibraryName(libName);
  if(libEntry == -1) {
@@ -177,7 +177,7 @@ uplug_openLibrary(const char *libName, UErrorCode *status) {
 #if UPLUG_TRACE
      DBG((stderr, "uplug_openLibrary() - out of library slots (max %d)\n", libraryMax));
 #endif
-      return NULL;
+      return nullptr;
    }
    /* Some operating systems don't want 
       DL operations from multiple threads. */
@@ -186,9 +186,9 @@ uplug_openLibrary(const char *libName, UErrorCode *status) {
    DBG((stderr, "uplug_openLibrary(%s,%s) libEntry %d, lib %p\n", libName, u_errorName(*status), libEntry, lib));
 #endif
        
-    if(libraryList[libEntry].lib == NULL || U_FAILURE(*status)) {
+    if(libraryList[libEntry].lib == nullptr || U_FAILURE(*status)) {
      /* cleanup. */
-      libraryList[libEntry].lib = NULL; /* failure with open */
+      libraryList[libEntry].lib = nullptr; /* failure with open */
      libraryList[libEntry].name[0] = 0;
 #if UPLUG_TRACE
      DBG((stderr, "uplug_openLibrary(%s,%s) libEntry %d, lib %p\n", libName, u_errorName(*status), libEntry, lib));
@@ -209,7 +209,7 @@ uplug_openLibrary(const char *libName, UErrorCode *status) {
  return lib;
 }

-U_INTERNAL void U_EXPORT2
+U_CAPI void U_EXPORT2
 uplug_closeLibrary(void *lib, UErrorCode *status) {
  int32_t i;
    
@@ -252,14 +252,14 @@ static int32_t uplug_pluginNumber(UPlugData* d) {

 U_CAPI UPlugData * U_EXPORT2
 uplug_nextPlug(UPlugData *prior) {
-  if(prior==NULL) {
+  if(prior==nullptr) {
    return pluginList;
  } else {
    UPlugData *nextPlug = &prior[1];
    UPlugData *pastPlug = &pluginList[pluginCount];
    
    if(nextPlug>=pastPlug) {
-      return NULL;
+      return nullptr;
    } else {
      return nextPlug;
    }
@@ -273,7 +273,7 @@ uplug_nextPlug(UPlugData *prior) {
 */
 static void uplug_callPlug(UPlugData *plug, UPlugReason reason, UErrorCode *status) {
  UPlugTokenReturn token;
-  if(plug==NULL||U_FAILURE(*status)) {
+  if(plug==nullptr||U_FAILURE(*status)) {
    return;
  }
  token = (*(plug->entrypoint))(plug, reason, status);
@@ -284,7 +284,7 @@ static void uplug_callPlug(UPlugData *plug, UPlugReason reason, UErrorCode *stat


 static void uplug_unloadPlug(UPlugData *plug, UErrorCode *status) {
-  if(plug->awaitingLoad) {  /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
+  if(plug->awaitingLoad) {  /* shouldn't happen. Plugin hasn't been loaded yet.*/
    *status = U_INTERNAL_PROGRAM_ERROR;
    return; 
  }
@@ -295,7 +295,7 @@ static void uplug_unloadPlug(UPlugData *plug, UErrorCode *status) {
 }

 static void uplug_queryPlug(UPlugData *plug, UErrorCode *status) {
-  if(!plug->awaitingLoad || !(plug->level == UPLUG_LEVEL_UNKNOWN) ) {  /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
+  if(!plug->awaitingLoad || !(plug->level == UPLUG_LEVEL_UNKNOWN) ) {  /* shouldn't happen. Plugin hasn't been loaded yet.*/
    *status = U_INTERNAL_PROGRAM_ERROR;
    return; 
  }
@@ -304,11 +304,11 @@ static void uplug_queryPlug(UPlugData *plug, UErrorCode *status) {
  if(U_SUCCESS(*status)) { 
    if(plug->level == UPLUG_LEVEL_INVALID) {
      plug->pluginStatus = U_PLUGIN_DIDNT_SET_LEVEL;
-      plug->awaitingLoad = FALSE;
+      plug->awaitingLoad = false;
    }
  } else {
    plug->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
-    plug->awaitingLoad = FALSE;
+    plug->awaitingLoad = false;
  }
 }

@@ -317,12 +317,12 @@ static void uplug_loadPlug(UPlugData *plug, UErrorCode *status) {
  if(U_FAILURE(*status)) {
    return;
  }
-  if(!plug->awaitingLoad || (plug->level < UPLUG_LEVEL_LOW) ) {  /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
+  if(!plug->awaitingLoad || (plug->level < UPLUG_LEVEL_LOW) ) {  /* shouldn't happen. Plugin hasn't been loaded yet.*/
    *status = U_INTERNAL_PROGRAM_ERROR;
    return;
  }
  uplug_callPlug(plug, UPLUG_REASON_LOAD, status);
-  plug->awaitingLoad = FALSE;
+  plug->awaitingLoad = false;
  if(!U_SUCCESS(*status)) {
    plug->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
  }
@@ -330,15 +330,15 @@ static void uplug_loadPlug(UPlugData *plug, UErrorCode *status) {

 static UPlugData *uplug_allocateEmptyPlug(UErrorCode *status)
 {
-  UPlugData *plug = NULL;
+  UPlugData *plug = nullptr;

  if(U_FAILURE(*status)) {
-    return NULL;
+    return nullptr;
  }

  if(pluginCount == UPLUG_PLUGIN_INITIAL_COUNT) {
    *status = U_MEMORY_ALLOCATION_ERROR;
-    return NULL;
+    return nullptr;
  }

  plug = &pluginList[pluginCount++];
@@ -347,14 +347,14 @@ static UPlugData *uplug_allocateEmptyPlug(UErrorCode *status)
  plug->structSize = sizeof(UPlugData);
  plug->name[0]=0;
  plug->level = UPLUG_LEVEL_UNKNOWN; /* initialize to null state */
-  plug->awaitingLoad = TRUE;
-  plug->dontUnload = FALSE;
+  plug->awaitingLoad = true;
+  plug->dontUnload = false;
  plug->pluginStatus = U_ZERO_ERROR;
  plug->libName[0] = 0;
  plug->config[0]=0;
  plug->sym[0]=0;
-  plug->lib=NULL;
-  plug->entrypoint=NULL;
+  plug->lib=nullptr;
+  plug->entrypoint=nullptr;


  return plug;
@@ -364,16 +364,16 @@ static UPlugData *uplug_allocatePlug(UPlugEntrypoint *entrypoint, const char *co
                                     UErrorCode *status) {
  UPlugData *plug = uplug_allocateEmptyPlug(status);
  if(U_FAILURE(*status)) {
-    return NULL;
+    return nullptr;
  }

-  if(config!=NULL) {
+  if(config!=nullptr) {
    uprv_strncpy(plug->config, config, UPLUG_NAME_MAX);
  } else {
    plug->config[0] = 0;
  }
    
-  if(symName!=NULL) {
+  if(symName!=nullptr) {
    uprv_strncpy(plug->sym, symName, UPLUG_NAME_MAX);
  } else {
    plug->sym[0] = 0;
@@ -393,7 +393,7 @@ static void uplug_deallocatePlug(UPlugData *plug, UErrorCode *status) {
    uplug_closeLibrary(plug->lib, &subStatus);
 #endif
  }
-  plug->lib = NULL;
+  plug->lib = nullptr;
  if(U_SUCCESS(*status) && U_FAILURE(subStatus)) {
    *status = subStatus;
  }
@@ -403,14 +403,14 @@ static void uplug_deallocatePlug(UPlugData *plug, UErrorCode *status) {
    pluginCount = uplug_removeEntryAt(pluginList, pluginCount, sizeof(plug[0]), uplug_pluginNumber(plug));
  } else {
    /* not ok- leave as a message. */
-    plug->awaitingLoad=FALSE;
+    plug->awaitingLoad=false;
    plug->entrypoint=0;
-    plug->dontUnload=TRUE;
+    plug->dontUnload=true;
  }
 }

 static void uplug_doUnloadPlug(UPlugData *plugToRemove, UErrorCode *status) {
-  if(plugToRemove != NULL) {
+  if(plugToRemove != nullptr) {
    uplug_unloadPlug(plugToRemove, status);
    uplug_deallocatePlug(plugToRemove, status);
  }
@@ -418,14 +418,14 @@ static void uplug_doUnloadPlug(UPlugData *plugToRemove, UErrorCode *status) {

 U_CAPI void U_EXPORT2
 uplug_removePlug(UPlugData *plug, UErrorCode *status)  {
-  UPlugData *cursor = NULL;
-  UPlugData *plugToRemove = NULL;
+  UPlugData *cursor = nullptr;
+  UPlugData *plugToRemove = nullptr;
  if(U_FAILURE(*status)) return;
    
-  for(cursor=pluginList;cursor!=NULL;) {
+  for(cursor=pluginList;cursor!=nullptr;) {
    if(cursor==plug) {
      plugToRemove = plug;
-      cursor=NULL;
+      cursor=nullptr;
    } else {
      cursor = uplug_nextPlug(cursor);
    }
@@ -481,7 +481,7 @@ uplug_getLibraryName(UPlugData *data, UErrorCode *status) {
 #if U_ENABLE_DYLOAD
    return uplug_findLibrary(data->lib, status);
 #else
-    return NULL;
+    return nullptr;
 #endif
  }
 }
@@ -507,10 +507,10 @@ uplug_getConfiguration(UPlugData *data) {
  return data->config;
 }

-U_INTERNAL UPlugData* U_EXPORT2
+U_CAPI UPlugData* U_EXPORT2
 uplug_getPlugInternal(int32_t n) { 
  if(n <0 || n >= pluginCount) {
-    return NULL;
+    return nullptr;
  } else { 
    return &(pluginList[n]);
  }
@@ -526,11 +526,11 @@ uplug_getPlugLoadStatus(UPlugData *plug) {


 /**
- * Initialize a plugin fron an entrypoint and library - but don't load it.
+ * Initialize a plugin from an entrypoint and library - but don't load it.
 */
 static UPlugData* uplug_initPlugFromEntrypointAndLibrary(UPlugEntrypoint *entrypoint, const char *config, void *lib, const char *sym,
                                                         UErrorCode *status) {
-  UPlugData *plug = NULL;
+  UPlugData *plug = nullptr;

  plug = uplug_allocatePlug(entrypoint, config, lib, sym, status);

@@ -538,13 +538,13 @@ static UPlugData* uplug_initPlugFromEntrypointAndLibrary(UPlugEntrypoint *entryp
    return plug;
  } else {
    uplug_deallocatePlug(plug, status);
-    return NULL;
+    return nullptr;
  }
 }

 U_CAPI UPlugData* U_EXPORT2
 uplug_loadPlugFromEntrypoint(UPlugEntrypoint *entrypoint, const char *config, UErrorCode *status) {
-  UPlugData* plug = uplug_initPlugFromEntrypointAndLibrary(entrypoint, config, NULL, NULL, status);
+  UPlugData* plug = uplug_initPlugFromEntrypointAndLibrary(entrypoint, config, nullptr, nullptr, status);
  uplug_loadPlug(plug, status);
  return plug;
 }
@@ -555,25 +555,25 @@ static UPlugData*
 uplug_initErrorPlug(const char *libName, const char *sym, const char *config, const char *nameOrError, UErrorCode loadStatus, UErrorCode *status)
 {
  UPlugData *plug = uplug_allocateEmptyPlug(status);
-  if(U_FAILURE(*status)) return NULL;
+  if(U_FAILURE(*status)) return nullptr;

  plug->pluginStatus = loadStatus;
-  plug->awaitingLoad = FALSE; /* Won't load. */
-  plug->dontUnload = TRUE; /* cannot unload. */
+  plug->awaitingLoad = false; /* Won't load. */
+  plug->dontUnload = true; /* cannot unload. */

-  if(sym!=NULL) {
+  if(sym!=nullptr) {
    uprv_strncpy(plug->sym, sym, UPLUG_NAME_MAX);
  }

-  if(libName!=NULL) {
+  if(libName!=nullptr) {
    uprv_strncpy(plug->libName, libName, UPLUG_NAME_MAX);
  }

-  if(nameOrError!=NULL) {
+  if(nameOrError!=nullptr) {
    uprv_strncpy(plug->name, nameOrError, UPLUG_NAME_MAX);
  }

-  if(config!=NULL) {
+  if(config!=nullptr) {
    uprv_strncpy(plug->config, config, UPLUG_NAME_MAX);
  }

@@ -585,39 +585,39 @@ uplug_initErrorPlug(const char *libName, const char *sym, const char *config, co
 */
 static UPlugData* 
 uplug_initPlugFromLibrary(const char *libName, const char *sym, const char *config, UErrorCode *status) {
-  void *lib = NULL;
-  UPlugData *plug = NULL;
-  if(U_FAILURE(*status)) { return NULL; }
+  void *lib = nullptr;
+  UPlugData *plug = nullptr;
+  if(U_FAILURE(*status)) { return nullptr; }
  lib = uplug_openLibrary(libName, status);
-  if(lib!=NULL && U_SUCCESS(*status)) {
-    UPlugEntrypoint *entrypoint = NULL;
+  if(lib!=nullptr && U_SUCCESS(*status)) {
+    UPlugEntrypoint *entrypoint = nullptr;
    entrypoint = (UPlugEntrypoint*)uprv_dlsym_func(lib, sym, status);

-    if(entrypoint!=NULL&&U_SUCCESS(*status)) {
+    if(entrypoint!=nullptr&&U_SUCCESS(*status)) {
      plug = uplug_initPlugFromEntrypointAndLibrary(entrypoint, config, lib, sym, status);
-      if(plug!=NULL&&U_SUCCESS(*status)) {
+      if(plug!=nullptr&&U_SUCCESS(*status)) {
        plug->lib = lib; /* plug takes ownership of library */
-        lib = NULL; /* library is now owned by plugin. */
+        lib = nullptr; /* library is now owned by plugin. */
      }
    } else {
      UErrorCode subStatus = U_ZERO_ERROR;
-      plug = uplug_initErrorPlug(libName,sym,config,"ERROR: Could not load entrypoint",(lib==NULL)?U_MISSING_RESOURCE_ERROR:*status,&subStatus);
+      plug = uplug_initErrorPlug(libName,sym,config,"ERROR: Could not load entrypoint",(lib==nullptr)?U_MISSING_RESOURCE_ERROR:*status,&subStatus);
    }
-    if(lib!=NULL) { /* still need to close the lib */
+    if(lib!=nullptr) { /* still need to close the lib */
      UErrorCode subStatus = U_ZERO_ERROR;
      uplug_closeLibrary(lib, &subStatus); /* don't care here */
    }
  } else {
    UErrorCode subStatus = U_ZERO_ERROR;
-    plug = uplug_initErrorPlug(libName,sym,config,"ERROR: could not load library",(lib==NULL)?U_MISSING_RESOURCE_ERROR:*status,&subStatus);
+    plug = uplug_initErrorPlug(libName,sym,config,"ERROR: could not load library",(lib==nullptr)?U_MISSING_RESOURCE_ERROR:*status,&subStatus);
  }
  return plug;
 }

 U_CAPI UPlugData* U_EXPORT2
 uplug_loadPlugFromLibrary(const char *libName, const char *sym, const char *config, UErrorCode *status) { 
-  UPlugData *plug = NULL;
-  if(U_FAILURE(*status)) { return NULL; }
+  UPlugData *plug = nullptr;
+  if(U_FAILURE(*status)) { return nullptr; }
  plug = uplug_initPlugFromLibrary(libName, sym, config, status);
  uplug_loadPlug(plug, status);

@@ -632,7 +632,7 @@ U_CAPI UPlugLevel U_EXPORT2 uplug_getCurrentLevel() {
  return gCurrentLevel;
 }

-static UBool U_CALLCONV uplug_cleanup(void)
+static UBool U_CALLCONV uplug_cleanup()
 {
  int32_t i;
    
@@ -646,7 +646,7 @@ static UBool U_CALLCONV uplug_cleanup(void)
  }
  /* close other held libs? */
  gCurrentLevel = UPLUG_LEVEL_LOW;
-  return TRUE;
+  return true;
 }

 #if U_ENABLE_DYLOAD
@@ -678,7 +678,7 @@ static void uplug_loadWaitingPlugs(UErrorCode *status) {
            currentLevel = newLevel;
          }
        }
-        pluginToLoad->awaitingLoad = FALSE;
+        pluginToLoad->awaitingLoad = false;
      } 
    }
  }    
@@ -694,7 +694,7 @@ static void uplug_loadWaitingPlugs(UErrorCode *status) {
      } else {
        uplug_loadPlug(pluginToLoad, &subStatus);
      }
-      pluginToLoad->awaitingLoad = FALSE;
+      pluginToLoad->awaitingLoad = false;
    }
  }
    
@@ -707,12 +707,12 @@ static void uplug_loadWaitingPlugs(UErrorCode *status) {
 static char plugin_file[2048] = "";
 #endif

-U_INTERNAL const char* U_EXPORT2
+U_CAPI const char* U_EXPORT2
 uplug_getPluginFile() {
 #if U_ENABLE_DYLOAD && !UCONFIG_NO_FILE_IO
  return plugin_file;
 #else
-  return NULL;
+  return nullptr;
 #endif
 }

@@ -728,7 +728,7 @@ uplug_init(UErrorCode *status) {
  const char *env = getenv("ICU_PLUGINS");

  if(U_FAILURE(*status)) return;
-  if(env != NULL) {
+  if(env != nullptr) {
    plugin_dir.append(env, -1, *status);
  }
  if(U_FAILURE(*status)) return;
@@ -747,7 +747,7 @@ uplug_init(UErrorCode *status) {
    FILE *f;
        
    CharString pluginFile;
-#ifdef OS390BATCH
+#ifdef ICU_PLUGINS_DD
 /* There are potentially a lot of ways to implement a plugin directory on OS390/zOS  */
 /* Keeping in mind that unauthorized file access is logged, monitored, and enforced  */
 /* I've chosen to open a DDNAME if BATCH and leave it alone for (presumably) UNIX    */
@@ -782,8 +782,8 @@ uplug_init(UErrorCode *status) {
    /* plugin_file is not used for processing - it is only used 
       so that uplug_getPluginFile() works (i.e. icuinfo)
    */
-    uprv_strncpy(plugin_file, pluginFile.data(), sizeof(plugin_file));
-        
+    pluginFile.extract(plugin_file, sizeof(plugin_file), *status);
+
 #if UPLUG_TRACE
    DBG((stderr, "pluginfile= %s len %d/%d\n", plugin_file, (int)strlen(plugin_file), (int)sizeof(plugin_file)));
 #endif
@@ -791,7 +791,7 @@ uplug_init(UErrorCode *status) {
 #ifdef __MVS__
    if (iscics()) /* 12 Nov 2011 JAM */
    {
-        f = NULL;
+        f = nullptr;
    }
    else
 #endif
@@ -799,9 +799,9 @@ uplug_init(UErrorCode *status) {
        f = fopen(pluginFile.data(), "r");
    }

-    if(f != NULL) {
+    if(f != nullptr) {
      char linebuf[1024];
-      char *p, *libName=NULL, *symName=NULL, *config=NULL;
+      char *p, *libName=nullptr, *symName=nullptr, *config=nullptr;
      int32_t line = 0;
            
            
@@ -843,7 +843,7 @@ uplug_init(UErrorCode *status) {
          }
                    
          /* chop whitespace at the end of the config */
-          if(config!=NULL&&*config!=0) {
+          if(config!=nullptr&&*config!=0) {
            p = config+strlen(config);
            while(p>config&&isspace((int)*(--p))) {
              *p=0;
@@ -36,7 +36,7 @@
 * @return the library pointer, or NULL
 * @internal internal use only
 */
-U_INTERNAL void * U_EXPORT2
+U_CAPI void * U_EXPORT2
 uplug_openLibrary(const char *libName, UErrorCode *status);

 /**
@@ -45,7 +45,7 @@ uplug_openLibrary(const char *libName, UErrorCode *status);
 * @param status error code
 * @internal internal use only
 */
-U_INTERNAL void U_EXPORT2
+U_CAPI void U_EXPORT2
 uplug_closeLibrary(void *lib, UErrorCode *status);

 /**
@@ -55,7 +55,7 @@ uplug_closeLibrary(void *lib, UErrorCode *status);
 * @return the library name, or NULL if not found.
 * @internal internal use only
 */
-U_INTERNAL  char * U_EXPORT2
+U_CAPI  char * U_EXPORT2
 uplug_findLibrary(void *lib, UErrorCode *status);

 /** @} */
@@ -69,21 +69,21 @@ uplug_findLibrary(void *lib, UErrorCode *status);
 * @param status error result
 * @internal - Internal use only.
 */
-U_INTERNAL void U_EXPORT2
+U_CAPI void U_EXPORT2
 uplug_init(UErrorCode *status);

 /**
 * Get raw plug N
 * @internal - Internal use only
 */ 
-U_INTERNAL UPlugData* U_EXPORT2
+U_CAPI UPlugData* U_EXPORT2
 uplug_getPlugInternal(int32_t n);

 /**
 * Get the name of the plugin file. 
 * @internal - Internal use only.
 */
-U_INTERNAL const char* U_EXPORT2
+U_CAPI const char* U_EXPORT2
 uplug_getPluginFile(void);

 /** @} */
@@ -33,7 +33,7 @@ U_NAMESPACE_BEGIN

 class LoadedNormalizer2Impl : public Normalizer2Impl {
 public:
-    LoadedNormalizer2Impl() : memory(NULL), ownedTrie(NULL) {}
+    LoadedNormalizer2Impl() : memory(nullptr), ownedTrie(nullptr) {}
    virtual ~LoadedNormalizer2Impl();

    void load(const char *packageName, const char *name, UErrorCode &errorCode);
@@ -63,13 +63,13 @@ LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
        pInfo->dataFormat[1]==0x72 &&
        pInfo->dataFormat[2]==0x6d &&
        pInfo->dataFormat[3]==0x32 &&
-        pInfo->formatVersion[0]==4
+        pInfo->formatVersion[0]==5
    ) {
        // Normalizer2Impl *me=(Normalizer2Impl *)context;
        // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
-        return TRUE;
+        return true;
    } else {
-        return FALSE;
+        return false;
    }
 }

@@ -82,8 +82,8 @@ LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCod
    if(U_FAILURE(errorCode)) {
        return;
    }
-    const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
-    const int32_t *inIndexes=(const int32_t *)inBytes;
+    const uint8_t* inBytes = static_cast<const uint8_t*>(udata_getMemory(memory));
+    const int32_t* inIndexes = reinterpret_cast<const int32_t*>(inBytes);
    int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
    if(indexesLength<=IX_MIN_LCCC_CP) {
        errorCode=U_INVALID_FORMAT_ERROR;  // Not enough indexes.
@@ -93,7 +93,7 @@ LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCod
    int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
    int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
    ownedTrie=ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16,
-                                     inBytes+offset, nextOffset-offset, NULL,
+                                     inBytes+offset, nextOffset-offset, nullptr,
                                     &errorCode);
    if(U_FAILURE(errorCode)) {
        return;
@@ -101,7 +101,7 @@ LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCod

    offset=nextOffset;
    nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
-    const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset);
+    const uint16_t* inExtraData = reinterpret_cast<const uint16_t*>(inBytes + offset);

    // smallFCD: new in formatVersion 2
    offset=nextOffset;
@@ -117,12 +117,12 @@ Norm2AllModes::createInstance(const char *packageName,
                              const char *name,
                              UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) {
-        return NULL;
+        return nullptr;
    }
    LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;
-    if(impl==NULL) {
+    if(impl==nullptr) {
        errorCode=U_MEMORY_ALLOCATION_ERROR;
-        return NULL;
+        return nullptr;
    }
    impl->load(packageName, name, errorCode);
    return createInstance(impl, errorCode);
@@ -134,30 +134,35 @@ U_CDECL_END

 #if !NORM2_HARDCODE_NFC_DATA
 static Norm2AllModes *nfcSingleton;
-static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER;
+static icu::UInitOnce nfcInitOnce {};
 #endif

 static Norm2AllModes *nfkcSingleton;
-static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
+static icu::UInitOnce nfkcInitOnce {};

 static Norm2AllModes *nfkc_cfSingleton;
-static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
+static icu::UInitOnce nfkc_cfInitOnce {};

-static UHashtable    *cache=NULL;
+static Norm2AllModes *nfkc_scfSingleton;
+static icu::UInitOnce nfkc_scfInitOnce {};
+
+static UHashtable    *cache=nullptr;

 // UInitOnce singleton initialization function
 static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
 #if !NORM2_HARDCODE_NFC_DATA
    if (uprv_strcmp(what, "nfc") == 0) {
-        nfcSingleton    = Norm2AllModes::createInstance(NULL, "nfc", errorCode);
+        nfcSingleton    = Norm2AllModes::createInstance(nullptr, "nfc", errorCode);
    } else
 #endif
    if (uprv_strcmp(what, "nfkc") == 0) {
-        nfkcSingleton    = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
+        nfkcSingleton    = Norm2AllModes::createInstance(nullptr, "nfkc", errorCode);
    } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
-        nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
+        nfkc_cfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_cf", errorCode);
+    } else if (uprv_strcmp(what, "nfkc_scf") == 0) {
+        nfkc_scfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_scf", errorCode);
    } else {
-        U_ASSERT(FALSE);   // Unknown singleton
+        UPRV_UNREACHABLE_EXIT;   // Unknown singleton
    }
    ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
 }
@@ -171,21 +176,25 @@ static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
 #if !NORM2_HARDCODE_NFC_DATA
    delete nfcSingleton;
-    nfcSingleton = NULL;
+    nfcSingleton = nullptr;
    nfcInitOnce.reset();
 #endif

    delete nfkcSingleton;
-    nfkcSingleton = NULL;
+    nfkcSingleton = nullptr;
    nfkcInitOnce.reset();

    delete nfkc_cfSingleton;
-    nfkc_cfSingleton = NULL;
+    nfkc_cfSingleton = nullptr;
    nfkc_cfInitOnce.reset();

+    delete nfkc_scfSingleton;
+    nfkc_scfSingleton = nullptr;
+    nfkc_scfInitOnce.reset();
+
    uhash_close(cache);
-    cache=NULL;
-    return TRUE;
+    cache=nullptr;
+    return true;
 }

 U_CDECL_END
@@ -193,7 +202,7 @@ U_CDECL_END
 #if !NORM2_HARDCODE_NFC_DATA
 const Norm2AllModes *
 Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {
-    if(U_FAILURE(errorCode)) { return NULL; }
+    if(U_FAILURE(errorCode)) { return nullptr; }
    umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
    return nfcSingleton;
 }
@@ -201,64 +210,77 @@ Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {

 const Norm2AllModes *
 Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
-    if(U_FAILURE(errorCode)) { return NULL; }
+    if(U_FAILURE(errorCode)) { return nullptr; }
    umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
    return nfkcSingleton;
 }

 const Norm2AllModes *
 Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
-    if(U_FAILURE(errorCode)) { return NULL; }
+    if(U_FAILURE(errorCode)) { return nullptr; }
    umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
    return nfkc_cfSingleton;
 }

+const Norm2AllModes *
+Norm2AllModes::getNFKC_SCFInstance(UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) { return nullptr; }
+    umtx_initOnce(nfkc_scfInitOnce, &initSingletons, "nfkc_scf", errorCode);
+    return nfkc_scfSingleton;
+}
+
 #if !NORM2_HARDCODE_NFC_DATA
 const Normalizer2 *
 Normalizer2::getNFCInstance(UErrorCode &errorCode) {
    const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
-    return allModes!=NULL ? &allModes->comp : NULL;
+    return allModes!=nullptr ? &allModes->comp : nullptr;
 }

 const Normalizer2 *
 Normalizer2::getNFDInstance(UErrorCode &errorCode) {
    const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
-    return allModes!=NULL ? &allModes->decomp : NULL;
+    return allModes!=nullptr ? &allModes->decomp : nullptr;
 }

 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
    const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
-    return allModes!=NULL ? &allModes->fcd : NULL;
+    return allModes!=nullptr ? &allModes->fcd : nullptr;
 }

 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
    const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
-    return allModes!=NULL ? &allModes->fcc : NULL;
+    return allModes!=nullptr ? &allModes->fcc : nullptr;
 }

 const Normalizer2Impl *
 Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
    const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
-    return allModes!=NULL ? allModes->impl : NULL;
+    return allModes!=nullptr ? allModes->impl : nullptr;
 }
 #endif

 const Normalizer2 *
 Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
    const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
-    return allModes!=NULL ? &allModes->comp : NULL;
+    return allModes!=nullptr ? &allModes->comp : nullptr;
 }

 const Normalizer2 *
 Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
    const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
-    return allModes!=NULL ? &allModes->decomp : NULL;
+    return allModes!=nullptr ? &allModes->decomp : nullptr;
 }

 const Normalizer2 *
 Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
    const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
-    return allModes!=NULL ? &allModes->comp : NULL;
+    return allModes!=nullptr ? &allModes->comp : nullptr;
+}
+
+const Normalizer2 *
+Normalizer2::getNFKCSimpleCasefoldInstance(UErrorCode &errorCode) {
+    const Norm2AllModes *allModes=Norm2AllModes::getNFKC_SCFInstance(errorCode);
+    return allModes!=nullptr ? &allModes->comp : nullptr;
 }

 const Normalizer2 *
@@ -267,62 +289,64 @@ Normalizer2::getInstance(const char *packageName,
                         UNormalization2Mode mode,
                         UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) {
-        return NULL;
+        return nullptr;
    }
-    if(name==NULL || *name==0) {
+    if(name==nullptr || *name==0) {
        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
-        return NULL;
+        return nullptr;
    }
-    const Norm2AllModes *allModes=NULL;
-    if(packageName==NULL) {
+    const Norm2AllModes *allModes=nullptr;
+    if(packageName==nullptr) {
        if(0==uprv_strcmp(name, "nfc")) {
            allModes=Norm2AllModes::getNFCInstance(errorCode);
        } else if(0==uprv_strcmp(name, "nfkc")) {
            allModes=Norm2AllModes::getNFKCInstance(errorCode);
        } else if(0==uprv_strcmp(name, "nfkc_cf")) {
            allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
+        } else if(0==uprv_strcmp(name, "nfkc_scf")) {
+            allModes=Norm2AllModes::getNFKC_SCFInstance(errorCode);
        }
    }
-    if(allModes==NULL && U_SUCCESS(errorCode)) {
+    if(allModes==nullptr && U_SUCCESS(errorCode)) {
        {
            Mutex lock;
-            if(cache!=NULL) {
-                allModes=(Norm2AllModes *)uhash_get(cache, name);
+            if(cache!=nullptr) {
+                allModes = static_cast<Norm2AllModes*>(uhash_get(cache, name));
            }
        }
-        if(allModes==NULL) {
+        if(allModes==nullptr) {
            ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
            LocalPointer<Norm2AllModes> localAllModes(
                Norm2AllModes::createInstance(packageName, name, errorCode));
            if(U_SUCCESS(errorCode)) {
                Mutex lock;
-                if(cache==NULL) {
-                    cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
+                if(cache==nullptr) {
+                    cache=uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &errorCode);
                    if(U_FAILURE(errorCode)) {
-                        return NULL;
+                        return nullptr;
                    }
                    uhash_setKeyDeleter(cache, uprv_free);
                    uhash_setValueDeleter(cache, deleteNorm2AllModes);
                }
                void *temp=uhash_get(cache, name);
-                if(temp==NULL) {
+                if(temp==nullptr) {
                    int32_t keyLength= static_cast<int32_t>(uprv_strlen(name)+1);
-                    char *nameCopy=(char *)uprv_malloc(keyLength);
-                    if(nameCopy==NULL) {
+                    char* nameCopy = static_cast<char*>(uprv_malloc(keyLength));
+                    if(nameCopy==nullptr) {
                        errorCode=U_MEMORY_ALLOCATION_ERROR;
-                        return NULL;
+                        return nullptr;
                    }
                    uprv_memcpy(nameCopy, name, keyLength);
                    allModes=localAllModes.getAlias();
                    uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
                } else {
                    // race condition
-                    allModes=(Norm2AllModes *)temp;
+                    allModes = static_cast<Norm2AllModes*>(temp);
                }
            }
        }
    }
-    if(allModes!=NULL && U_SUCCESS(errorCode)) {
+    if(allModes!=nullptr && U_SUCCESS(errorCode)) {
        switch(mode) {
        case UNORM2_COMPOSE:
            return &allModes->comp;
@@ -336,13 +360,13 @@ Normalizer2::getInstance(const char *packageName,
            break;  // do nothing
        }
    }
-    return NULL;
+    return nullptr;
 }

 const Normalizer2 *
 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) {
-        return NULL;
+        return nullptr;
    }
    switch(mode) {
    case UNORM_NFD:
@@ -363,13 +387,13 @@ Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode)
 const Normalizer2Impl *
 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
    const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
-    return allModes!=NULL ? allModes->impl : NULL;
+    return allModes!=nullptr ? allModes->impl : nullptr;
 }

 const Normalizer2Impl *
 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
    const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
-    return allModes!=NULL ? allModes->impl : NULL;
+    return allModes!=nullptr ? allModes->impl : nullptr;
 }

 U_NAMESPACE_END
@@ -393,6 +417,11 @@ unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
    return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
 }

+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFKCSimpleCasefoldInstance(UErrorCode *pErrorCode) {
+    return (const UNormalizer2 *)Normalizer2::getNFKCSimpleCasefoldInstance(*pErrorCode);
+}
+
 U_CAPI const UNormalizer2 * U_EXPORT2
 unorm2_getInstance(const char *packageName,
                   const char *name,
@@ -0,0 +1,485 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include <optional>
+#include <string_view>
+#include <utility>
+
+#include "bytesinkutil.h"  // StringByteSink<CharString>
+#include "charstr.h"
+#include "cstring.h"
+#include "fixedstring.h"
+#include "ulocimp.h"
+#include "unicode/localebuilder.h"
+#include "unicode/locid.h"
+
+namespace {
+
+inline bool UPRV_ISDIGIT(char c) { return c >= '0' && c <= '9'; }
+inline bool UPRV_ISALPHANUM(char c) { return uprv_isASCIILetter(c) || UPRV_ISDIGIT(c); }
+
+constexpr const char* kAttributeKey = "attribute";
+
+bool _isExtensionSubtags(char key, const char* s, int32_t len) {
+    switch (uprv_tolower(key)) {
+        case 'u':
+            return ultag_isUnicodeExtensionSubtags(s, len);
+        case 't':
+            return ultag_isTransformedExtensionSubtags(s, len);
+        case 'x':
+            return ultag_isPrivateuseValueSubtags(s, len);
+        default:
+            return ultag_isExtensionSubtags(s, len);
+    }
+}
+
+}  // namespace
+
+U_NAMESPACE_BEGIN
+
+LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR), language_(),
+    script_(), region_(), variant_(nullptr), extensions_(nullptr)
+{
+    language_[0] = 0;
+    script_[0] = 0;
+    region_[0] = 0;
+}
+
+LocaleBuilder::~LocaleBuilder()
+{
+    delete variant_;
+    delete extensions_;
+}
+
+LocaleBuilder& LocaleBuilder::setLocale(const Locale& locale)
+{
+    clear();
+    setLanguage(locale.getLanguage());
+    setScript(locale.getScript());
+    setRegion(locale.getCountry());
+    setVariant(locale.getVariant());
+    extensions_ = locale.clone();
+    if (extensions_ == nullptr) {
+        status_ = U_MEMORY_ALLOCATION_ERROR;
+    }
+    return *this;
+}
+
+LocaleBuilder& LocaleBuilder::setLanguageTag(StringPiece tag)
+{
+    Locale l = Locale::forLanguageTag(tag, status_);
+    if (U_FAILURE(status_)) { return *this; }
+    // Because setLocale will reset status_ we need to return
+    // first if we have error in forLanguageTag.
+    setLocale(l);
+    return *this;
+}
+
+namespace {
+
+void setField(StringPiece input, char* dest, UErrorCode& errorCode,
+              bool (*test)(const char*, int32_t)) {
+    if (U_FAILURE(errorCode)) { return; }
+    if (input.empty()) {
+        dest[0] = '\0';
+    } else if (test(input.data(), input.length())) {
+        uprv_memcpy(dest, input.data(), input.length());
+        dest[input.length()] = '\0';
+    } else {
+        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+    }
+}
+
+}  // namespace
+
+LocaleBuilder& LocaleBuilder::setLanguage(StringPiece language)
+{
+    setField(language, language_, status_, &ultag_isLanguageSubtag);
+    return *this;
+}
+
+LocaleBuilder& LocaleBuilder::setScript(StringPiece script)
+{
+    setField(script, script_, status_, &ultag_isScriptSubtag);
+    return *this;
+}
+
+LocaleBuilder& LocaleBuilder::setRegion(StringPiece region)
+{
+    setField(region, region_, status_, &ultag_isRegionSubtag);
+    return *this;
+}
+
+namespace {
+
+void transform(char* data, int32_t len) {
+    for (int32_t i = 0; i < len; i++, data++) {
+        if (*data == '_') {
+            *data = '-';
+        } else {
+            *data = uprv_tolower(*data);
+        }
+    }
+}
+
+}  // namespace
+
+LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant)
+{
+    if (U_FAILURE(status_)) { return *this; }
+    if (variant.empty()) {
+        delete variant_;
+        variant_ = nullptr;
+        return *this;
+    }
+    FixedString* new_variant = new FixedString(variant);
+    if (new_variant == nullptr || new_variant->isEmpty()) {
+        status_ = U_MEMORY_ALLOCATION_ERROR;
+        return *this;
+    }
+    transform(new_variant->getAlias(), variant.length());
+    if (!ultag_isVariantSubtags(new_variant->data(), variant.length())) {
+        delete new_variant;
+        status_ = U_ILLEGAL_ARGUMENT_ERROR;
+        return *this;
+    }
+    delete variant_;
+    variant_ = new_variant;
+    return *this;
+}
+
+namespace {
+
+bool
+_isKeywordValue(const char* key, const char* value, int32_t value_len)
+{
+    if (key[1] == '\0') {
+        // one char key
+        return (UPRV_ISALPHANUM(uprv_tolower(key[0])) &&
+                _isExtensionSubtags(key[0], value, value_len));
+    } else if (uprv_strcmp(key, kAttributeKey) == 0) {
+        // unicode attributes
+        return ultag_isUnicodeLocaleAttributes(value, value_len);
+    }
+    // otherwise: unicode extension value
+    // We need to convert from legacy key/value to unicode
+    // key/value
+    std::optional<std::string_view> unicode_locale_key = ulocimp_toBcpKeyWithFallback(key);
+    std::optional<std::string_view> unicode_locale_type = ulocimp_toBcpTypeWithFallback(key, value);
+
+    return unicode_locale_key.has_value() &&
+           unicode_locale_type.has_value() &&
+           ultag_isUnicodeLocaleKey(unicode_locale_key->data(),
+                                    static_cast<int32_t>(unicode_locale_key->size())) &&
+           ultag_isUnicodeLocaleType(unicode_locale_type->data(),
+                                     static_cast<int32_t>(unicode_locale_type->size()));
+}
+
+void
+_copyExtensions(const Locale& from, icu::StringEnumeration *keywords,
+                Locale& to, bool validate, UErrorCode& errorCode)
+{
+    if (U_FAILURE(errorCode)) { return; }
+    LocalPointer<icu::StringEnumeration> ownedKeywords;
+    if (keywords == nullptr) {
+        ownedKeywords.adoptInstead(from.createKeywords(errorCode));
+        if (U_FAILURE(errorCode) || ownedKeywords.isNull()) { return; }
+        keywords = ownedKeywords.getAlias();
+    }
+    const char* key;
+    while ((key = keywords->next(nullptr, errorCode)) != nullptr) {
+        auto value = from.getKeywordValue<CharString>(key, errorCode);
+        if (U_FAILURE(errorCode)) { return; }
+        if (uprv_strcmp(key, kAttributeKey) == 0) {
+            transform(value.data(), value.length());
+        }
+        if (validate &&
+            !_isKeywordValue(key, value.data(), value.length())) {
+            errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+            return;
+        }
+        to.setKeywordValue(key, value.data(), errorCode);
+        if (U_FAILURE(errorCode)) { return; }
+    }
+}
+
+void
+_clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode)
+{
+    if (U_FAILURE(errorCode)) { return; }
+    // Clear Unicode attributes
+    locale.setKeywordValue(kAttributeKey, "", errorCode);
+
+    // Clear all Unicode keyword values
+    LocalPointer<icu::StringEnumeration> iter(locale.createUnicodeKeywords(errorCode));
+    if (U_FAILURE(errorCode) || iter.isNull()) { return; }
+    const char* key;
+    while ((key = iter->next(nullptr, errorCode)) != nullptr) {
+        locale.setUnicodeKeywordValue(key, nullptr, errorCode);
+    }
+}
+
+void
+_setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& errorCode)
+{
+    if (U_FAILURE(errorCode)) { return; }
+    // Add the unicode extensions to extensions_
+    CharString locale_str("und-u-", errorCode);
+    locale_str.append(value, errorCode);
+    _copyExtensions(
+        Locale::forLanguageTag(locale_str.data(), errorCode), nullptr,
+        locale, false, errorCode);
+}
+
+}  // namespace
+
+LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value)
+{
+    if (U_FAILURE(status_)) { return *this; }
+    if (!UPRV_ISALPHANUM(key)) {
+        status_ = U_ILLEGAL_ARGUMENT_ERROR;
+        return *this;
+    }
+    CharString value_str(value, status_);
+    if (U_FAILURE(status_)) { return *this; }
+    transform(value_str.data(), value_str.length());
+    if (!value_str.isEmpty() &&
+            !_isExtensionSubtags(key, value_str.data(), value_str.length())) {
+        status_ = U_ILLEGAL_ARGUMENT_ERROR;
+        return *this;
+    }
+    if (extensions_ == nullptr) {
+        extensions_ = Locale::getRoot().clone();
+        if (extensions_ == nullptr) {
+            status_ = U_MEMORY_ALLOCATION_ERROR;
+            return *this;
+        }
+    }
+    if (uprv_tolower(key) != 'u') {
+        // for t, x and others extension.
+        extensions_->setKeywordValue(StringPiece(&key, 1), value_str.data(),
+                                     status_);
+        return *this;
+    }
+    _clearUAttributesAndKeyType(*extensions_, status_);
+    if (U_FAILURE(status_)) { return *this; }
+    if (!value.empty()) {
+        _setUnicodeExtensions(*extensions_, value_str, status_);
+    }
+    return *this;
+}
+
+LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword(
+      StringPiece key, StringPiece type)
+{
+    if (U_FAILURE(status_)) { return *this; }
+    if (!ultag_isUnicodeLocaleKey(key.data(), key.length()) ||
+            (!type.empty() &&
+                 !ultag_isUnicodeLocaleType(type.data(), type.length()))) {
+      status_ = U_ILLEGAL_ARGUMENT_ERROR;
+      return *this;
+    }
+    if (extensions_ == nullptr) {
+        extensions_ = Locale::getRoot().clone();
+        if (extensions_ == nullptr) {
+            status_ = U_MEMORY_ALLOCATION_ERROR;
+            return *this;
+        }
+    }
+    extensions_->setUnicodeKeywordValue(key, type, status_);
+    return *this;
+}
+
+LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute(
+    StringPiece value)
+{
+    CharString value_str(value, status_);
+    if (U_FAILURE(status_)) { return *this; }
+    transform(value_str.data(), value_str.length());
+    if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
+        status_ = U_ILLEGAL_ARGUMENT_ERROR;
+        return *this;
+    }
+    if (extensions_ == nullptr) {
+        extensions_ = Locale::getRoot().clone();
+        if (extensions_ == nullptr) {
+            status_ = U_MEMORY_ALLOCATION_ERROR;
+            return *this;
+        }
+        extensions_->setKeywordValue(kAttributeKey, value_str.data(), status_);
+        return *this;
+    }
+
+    UErrorCode localErrorCode = U_ZERO_ERROR;
+    auto attributes = extensions_->getKeywordValue<CharString>(kAttributeKey, localErrorCode);
+    if (U_FAILURE(localErrorCode)) {
+        CharString new_attributes(value_str.data(), status_);
+        // No attributes, set the attribute.
+        extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
+        return *this;
+    }
+
+    transform(attributes.data(),attributes.length());
+    const char* start = attributes.data();
+    const char* limit = attributes.data() + attributes.length();
+    CharString new_attributes;
+    bool inserted = false;
+    while (start < limit) {
+        if (!inserted) {
+            int cmp = uprv_strcmp(start, value_str.data());
+            if (cmp == 0) { return *this; }  // Found it in attributes: Just return
+            if (cmp > 0) {
+                if (!new_attributes.isEmpty()) new_attributes.append('_', status_);
+                new_attributes.append(value_str.data(), status_);
+                inserted = true;
+            }
+        }
+        if (!new_attributes.isEmpty()) {
+            new_attributes.append('_', status_);
+        }
+        new_attributes.append(start, status_);
+        start += uprv_strlen(start) + 1;
+    }
+    if (!inserted) {
+        if (!new_attributes.isEmpty()) {
+            new_attributes.append('_', status_);
+        }
+        new_attributes.append(value_str.data(), status_);
+    }
+    // Not yet in the attributes, set the attribute.
+    extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
+    return *this;
+}
+
+LocaleBuilder& LocaleBuilder::removeUnicodeLocaleAttribute(
+    StringPiece value)
+{
+    CharString value_str(value, status_);
+    if (U_FAILURE(status_)) { return *this; }
+    transform(value_str.data(), value_str.length());
+    if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
+        status_ = U_ILLEGAL_ARGUMENT_ERROR;
+        return *this;
+    }
+    if (extensions_ == nullptr) { return *this; }
+    UErrorCode localErrorCode = U_ZERO_ERROR;
+    auto attributes = extensions_->getKeywordValue<CharString>(kAttributeKey, localErrorCode);
+    // get failure, just return
+    if (U_FAILURE(localErrorCode)) { return *this; }
+    // Do not have any attributes, just return.
+    if (attributes.isEmpty()) { return *this; }
+
+    char* p = attributes.data();
+    // Replace null terminiator in place for _ and - so later
+    // we can use uprv_strcmp to compare.
+    for (int32_t i = 0; i < attributes.length(); i++, p++) {
+        *p = (*p == '_' || *p == '-') ? '\0' : uprv_tolower(*p);
+    }
+
+    const char* start = attributes.data();
+    const char* limit = attributes.data() + attributes.length();
+    CharString new_attributes;
+    bool found = false;
+    while (start < limit) {
+        if (uprv_strcmp(start, value_str.data()) == 0) {
+            found = true;
+        } else {
+            if (!new_attributes.isEmpty()) {
+                new_attributes.append('_', status_);
+            }
+            new_attributes.append(start, status_);
+        }
+        start += uprv_strlen(start) + 1;
+    }
+    // Found the value in attributes, set the attribute.
+    if (found) {
+        extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
+    }
+    return *this;
+}
+
+LocaleBuilder& LocaleBuilder::clear()
+{
+    status_ = U_ZERO_ERROR;
+    language_[0] = 0;
+    script_[0] = 0;
+    region_[0] = 0;
+    delete variant_;
+    variant_ = nullptr;
+    clearExtensions();
+    return *this;
+}
+
+LocaleBuilder& LocaleBuilder::clearExtensions()
+{
+    delete extensions_;
+    extensions_ = nullptr;
+    return *this;
+}
+
+Locale makeBogusLocale() {
+  Locale bogus;
+  bogus.setToBogus();
+  return bogus;
+}
+
+void LocaleBuilder::copyExtensionsFrom(const Locale& src, UErrorCode& errorCode)
+{
+    if (U_FAILURE(errorCode)) { return; }
+    LocalPointer<icu::StringEnumeration> keywords(src.createKeywords(errorCode));
+    if (U_FAILURE(errorCode) || keywords.isNull() || keywords->count(errorCode) == 0) {
+        // Error, or no extensions to copy.
+        return;
+    }
+    if (extensions_ == nullptr) {
+        extensions_ = Locale::getRoot().clone();
+        if (extensions_ == nullptr) {
+            status_ = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+    }
+    _copyExtensions(src, keywords.getAlias(), *extensions_, false, errorCode);
+}
+
+Locale LocaleBuilder::build(UErrorCode& errorCode)
+{
+    if (U_FAILURE(errorCode)) {
+        return makeBogusLocale();
+    }
+    if (U_FAILURE(status_)) {
+        errorCode = status_;
+        return makeBogusLocale();
+    }
+    CharString locale_str(language_, errorCode);
+    if (uprv_strlen(script_) > 0) {
+        locale_str.append('-', errorCode).append(StringPiece(script_), errorCode);
+    }
+    if (uprv_strlen(region_) > 0) {
+        locale_str.append('-', errorCode).append(StringPiece(region_), errorCode);
+    }
+    if (variant_ != nullptr) {
+        locale_str.append('-', errorCode).append(StringPiece(variant_->data()), errorCode);
+    }
+    if (U_FAILURE(errorCode)) {
+        return makeBogusLocale();
+    }
+    Locale product(locale_str.data());
+    if (extensions_ != nullptr) {
+        _copyExtensions(*extensions_, nullptr, product, true, errorCode);
+    }
+    if (U_FAILURE(errorCode)) {
+        return makeBogusLocale();
+    }
+    return product;
+}
+
+UBool LocaleBuilder::copyErrorTo(UErrorCode &outErrorCode) const {
+    if (U_FAILURE(outErrorCode)) {
+        // Do not overwrite the older error code
+        return true;
+    }
+    outErrorCode = status_;
+    return U_FAILURE(outErrorCode);
+}
+
+U_NAMESPACE_END
@@ -0,0 +1,834 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// localematcher.cpp
+// created: 2019may08 Markus W. Scherer
+
+#include <optional>
+
+#include "unicode/utypes.h"
+#include "unicode/localebuilder.h"
+#include "unicode/localematcher.h"
+#include "unicode/locid.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uloc.h"
+#include "unicode/uobject.h"
+#include "cstring.h"
+#include "localeprioritylist.h"
+#include "loclikelysubtags.h"
+#include "locdistance.h"
+#include "lsr.h"
+#include "uassert.h"
+#include "uhash.h"
+#include "ustr_imp.h"
+#include "uvector.h"
+
+#define UND_LSR LSR("und", "", "", LSR::EXPLICIT_LSR)
+
+/**
+ * Indicator for the lifetime of desired-locale objects passed into the LocaleMatcher.
+ *
+ * @draft ICU 65
+ */
+enum ULocMatchLifetime {
+    /**
+     * Locale objects are temporary.
+     * The matcher will make a copy of a locale that will be used beyond one function call.
+     *
+     * @draft ICU 65
+     */
+    ULOCMATCH_TEMPORARY_LOCALES,
+    /**
+     * Locale objects are stored at least as long as the matcher is used.
+     * The matcher will keep only a pointer to a locale that will be used beyond one function call,
+     * avoiding a copy.
+     *
+     * @draft ICU 65
+     */
+    ULOCMATCH_STORED_LOCALES  // TODO: permanent? cached? clone?
+};
+#ifndef U_IN_DOXYGEN
+typedef enum ULocMatchLifetime ULocMatchLifetime;
+#endif
+
+U_NAMESPACE_BEGIN
+
+LocaleMatcher::Result::Result(LocaleMatcher::Result &&src) noexcept :
+        desiredLocale(src.desiredLocale),
+        supportedLocale(src.supportedLocale),
+        desiredIndex(src.desiredIndex),
+        supportedIndex(src.supportedIndex),
+        desiredIsOwned(src.desiredIsOwned) {
+    if (desiredIsOwned) {
+        src.desiredLocale = nullptr;
+        src.desiredIndex = -1;
+        src.desiredIsOwned = false;
+    }
+}
+
+LocaleMatcher::Result::~Result() {
+    if (desiredIsOwned) {
+        delete desiredLocale;
+    }
+}
+
+LocaleMatcher::Result &LocaleMatcher::Result::operator=(LocaleMatcher::Result &&src) noexcept {
+    this->~Result();
+
+    desiredLocale = src.desiredLocale;
+    supportedLocale = src.supportedLocale;
+    desiredIndex = src.desiredIndex;
+    supportedIndex = src.supportedIndex;
+    desiredIsOwned = src.desiredIsOwned;
+
+    if (desiredIsOwned) {
+        src.desiredLocale = nullptr;
+        src.desiredIndex = -1;
+        src.desiredIsOwned = false;
+    }
+    return *this;
+}
+
+Locale LocaleMatcher::Result::makeResolvedLocale(UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode) || supportedLocale == nullptr) {
+        return Locale::getRoot();
+    }
+    const Locale *bestDesired = getDesiredLocale();
+    if (bestDesired == nullptr || *supportedLocale == *bestDesired) {
+        return *supportedLocale;
+    }
+    LocaleBuilder b;
+    b.setLocale(*supportedLocale);
+
+    // Copy the region from bestDesired, if there is one.
+    const char *region = bestDesired->getCountry();
+    if (*region != 0) {
+        b.setRegion(region);
+    }
+
+    // Copy the variants from bestDesired, if there are any.
+    // Note that this will override any supportedLocale variants.
+    // For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster).
+    const char *variants = bestDesired->getVariant();
+    if (*variants != 0) {
+        b.setVariant(variants);
+    }
+
+    // Copy the extensions from bestDesired, if there are any.
+    // C++ note: The following note, copied from Java, may not be true,
+    // as long as C++ copies by legacy ICU keyword, not by extension singleton.
+    // Note that this will override any supportedLocale extensions.
+    // For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native"
+    // (replacing calendar).
+    b.copyExtensionsFrom(*bestDesired, errorCode);
+    return b.build(errorCode);
+}
+
+LocaleMatcher::Builder::Builder(LocaleMatcher::Builder &&src) noexcept :
+        errorCode_(src.errorCode_),
+        supportedLocales_(src.supportedLocales_),
+        thresholdDistance_(src.thresholdDistance_),
+        demotion_(src.demotion_),
+        defaultLocale_(src.defaultLocale_),
+        withDefault_(src.withDefault_),
+        favor_(src.favor_),
+        direction_(src.direction_) {
+    src.supportedLocales_ = nullptr;
+    src.defaultLocale_ = nullptr;
+}
+
+LocaleMatcher::Builder::~Builder() {
+    delete supportedLocales_;
+    delete defaultLocale_;
+    delete maxDistanceDesired_;
+    delete maxDistanceSupported_;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::operator=(LocaleMatcher::Builder &&src) noexcept {
+    this->~Builder();
+
+    errorCode_ = src.errorCode_;
+    supportedLocales_ = src.supportedLocales_;
+    thresholdDistance_ = src.thresholdDistance_;
+    demotion_ = src.demotion_;
+    defaultLocale_ = src.defaultLocale_;
+    withDefault_ = src.withDefault_,
+    favor_ = src.favor_;
+    direction_ = src.direction_;
+
+    src.supportedLocales_ = nullptr;
+    src.defaultLocale_ = nullptr;
+    return *this;
+}
+
+void LocaleMatcher::Builder::clearSupportedLocales() {
+    if (supportedLocales_ != nullptr) {
+        supportedLocales_->removeAllElements();
+    }
+}
+
+bool LocaleMatcher::Builder::ensureSupportedLocaleVector() {
+    if (U_FAILURE(errorCode_)) { return false; }
+    if (supportedLocales_ != nullptr) { return true; }
+    LocalPointer<UVector> lpSupportedLocales(new UVector(uprv_deleteUObject, nullptr, errorCode_), errorCode_);
+    if (U_FAILURE(errorCode_)) { return false; }
+    supportedLocales_ = lpSupportedLocales.orphan();
+    return true;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListString(
+        StringPiece locales) {
+    LocalePriorityList list(locales, errorCode_);
+    if (U_FAILURE(errorCode_)) { return *this; }
+    clearSupportedLocales();
+    if (!ensureSupportedLocaleVector()) { return *this; }
+    int32_t length = list.getLengthIncludingRemoved();
+    for (int32_t i = 0; i < length; ++i) {
+        Locale *locale = list.orphanLocaleAt(i);
+        if (locale == nullptr) { continue; }
+        supportedLocales_->adoptElement(locale, errorCode_);
+        if (U_FAILURE(errorCode_)) {
+            break;
+        }
+    }
+    return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocales(Locale::Iterator &locales) {
+    if (ensureSupportedLocaleVector()) {
+        clearSupportedLocales();
+        while (locales.hasNext() && U_SUCCESS(errorCode_)) {
+            const Locale &locale = locales.next();
+            LocalPointer<Locale> clone (locale.clone(), errorCode_);
+            supportedLocales_->adoptElement(clone.orphan(), errorCode_);
+        }
+    }
+    return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::addSupportedLocale(const Locale &locale) {
+    if (ensureSupportedLocaleVector()) {
+        LocalPointer<Locale> clone(locale.clone(), errorCode_);
+        supportedLocales_->adoptElement(clone.orphan(), errorCode_);
+    }
+    return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setNoDefaultLocale() {
+    if (U_FAILURE(errorCode_)) { return *this; }
+    delete defaultLocale_;
+    defaultLocale_ = nullptr;
+    withDefault_ = false;
+    return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setDefaultLocale(const Locale *defaultLocale) {
+    if (U_FAILURE(errorCode_)) { return *this; }
+    Locale *clone = nullptr;
+    if (defaultLocale != nullptr) {
+        clone = defaultLocale->clone();
+        if (clone == nullptr) {
+            errorCode_ = U_MEMORY_ALLOCATION_ERROR;
+            return *this;
+        }
+    }
+    delete defaultLocale_;
+    defaultLocale_ = clone;
+    withDefault_ = true;
+    return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setFavorSubtag(ULocMatchFavorSubtag subtag) {
+    if (U_FAILURE(errorCode_)) { return *this; }
+    favor_ = subtag;
+    return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setDemotionPerDesiredLocale(ULocMatchDemotion demotion) {
+    if (U_FAILURE(errorCode_)) { return *this; }
+    demotion_ = demotion;
+    return *this;
+}
+
+LocaleMatcher::Builder &LocaleMatcher::Builder::setMaxDistance(const Locale &desired,
+                                                               const Locale &supported) {
+    if (U_FAILURE(errorCode_)) { return *this; }
+    Locale *desiredClone = desired.clone();
+    Locale *supportedClone = supported.clone();
+    if (desiredClone == nullptr || supportedClone == nullptr) {
+        delete desiredClone;  // in case only one could not be allocated
+        delete supportedClone;
+        errorCode_ = U_MEMORY_ALLOCATION_ERROR;
+        return *this;
+    }
+    delete maxDistanceDesired_;
+    delete maxDistanceSupported_;
+    maxDistanceDesired_ = desiredClone;
+    maxDistanceSupported_ = supportedClone;
+    return *this;
+}
+
+#if 0
+/**
+ * <i>Internal only!</i>
+ *
+ * @param thresholdDistance the thresholdDistance to set, with -1 = default
+ * @return this Builder object
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+@Deprecated
+LocaleMatcher::Builder &LocaleMatcher::Builder::internalSetThresholdDistance(int32_t thresholdDistance) {
+    if (U_FAILURE(errorCode_)) { return *this; }
+    if (thresholdDistance > 100) {
+        thresholdDistance = 100;
+    }
+    thresholdDistance_ = thresholdDistance;
+    return *this;
+}
+#endif
+
+UBool LocaleMatcher::Builder::copyErrorTo(UErrorCode &outErrorCode) const {
+    if (U_FAILURE(outErrorCode)) { return true; }
+    if (U_SUCCESS(errorCode_)) { return false; }
+    outErrorCode = errorCode_;
+    return true;
+}
+
+LocaleMatcher LocaleMatcher::Builder::build(UErrorCode &errorCode) const {
+    if (U_SUCCESS(errorCode) && U_FAILURE(errorCode_)) {
+        errorCode = errorCode_;
+    }
+    return LocaleMatcher(*this, errorCode);
+}
+
+namespace {
+
+LSR getMaximalLsrOrUnd(const LikelySubtags &likelySubtags, const Locale &locale,
+                       UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode) || locale.isBogus() || *locale.getName() == 0 /* "und" */) {
+        return UND_LSR;
+    } else {
+        return likelySubtags.makeMaximizedLsrFrom(locale, false, errorCode);
+    }
+}
+
+int32_t hashLSR(const UHashTok token) {
+    const LSR *lsr = static_cast<const LSR *>(token.pointer);
+    return lsr->hashCode;
+}
+
+UBool compareLSRs(const UHashTok t1, const UHashTok t2) {
+    const LSR *lsr1 = static_cast<const LSR *>(t1.pointer);
+    const LSR *lsr2 = static_cast<const LSR *>(t2.pointer);
+    return *lsr1 == *lsr2;
+}
+
+}  // namespace
+
+int32_t LocaleMatcher::putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength,
+                                   UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return suppLength; }
+    if (!uhash_containsKey(supportedLsrToIndex, &lsr)) {
+        uhash_putiAllowZero(supportedLsrToIndex, const_cast<LSR *>(&lsr), i, &errorCode);
+        if (U_SUCCESS(errorCode)) {
+            supportedLSRs[suppLength] = &lsr;
+            supportedIndexes[suppLength++] = i;
+        }
+    }
+    return suppLength;
+}
+
+LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
+        likelySubtags(*LikelySubtags::getSingleton(errorCode)),
+        localeDistance(*LocaleDistance::getSingleton(errorCode)),
+        thresholdDistance(builder.thresholdDistance_),
+        demotionPerDesiredLocale(0),
+        favorSubtag(builder.favor_),
+        direction(builder.direction_),
+        supportedLocales(nullptr), lsrs(nullptr), supportedLocalesLength(0),
+        supportedLsrToIndex(nullptr),
+        supportedLSRs(nullptr), supportedIndexes(nullptr), supportedLSRsLength(0),
+        ownedDefaultLocale(nullptr), defaultLocale(nullptr) {
+    if (U_FAILURE(errorCode)) { return; }
+    const Locale *def = builder.defaultLocale_;
+    LSR builderDefaultLSR;
+    const LSR *defLSR = nullptr;
+    if (def != nullptr) {
+        ownedDefaultLocale = def->clone();
+        if (ownedDefaultLocale == nullptr) {
+            errorCode = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+        def = ownedDefaultLocale;
+        builderDefaultLSR = getMaximalLsrOrUnd(likelySubtags, *def, errorCode);
+        if (U_FAILURE(errorCode)) { return; }
+        defLSR = &builderDefaultLSR;
+    }
+    supportedLocalesLength = builder.supportedLocales_ != nullptr ?
+        builder.supportedLocales_->size() : 0;
+    if (supportedLocalesLength > 0) {
+        // Store the supported locales in input order,
+        // so that when different types are used (e.g., language tag strings)
+        // we can return those by parallel index.
+        supportedLocales = static_cast<const Locale **>(
+            uprv_malloc(supportedLocalesLength * sizeof(const Locale *)));
+        // Supported LRSs in input order.
+        // In C++, we store these permanently to simplify ownership management
+        // in the hash tables. Duplicate LSRs (if any) are unused overhead.
+        lsrs = new LSR[supportedLocalesLength];
+        if (supportedLocales == nullptr || lsrs == nullptr) {
+            errorCode = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+        // If the constructor fails partway, we need null pointers for destructibility.
+        uprv_memset(supportedLocales, 0, supportedLocalesLength * sizeof(const Locale *));
+        for (int32_t i = 0; i < supportedLocalesLength; ++i) {
+            const Locale &locale = *static_cast<Locale *>(builder.supportedLocales_->elementAt(i));
+            supportedLocales[i] = locale.clone();
+            if (supportedLocales[i] == nullptr) {
+                errorCode = U_MEMORY_ALLOCATION_ERROR;
+                return;
+            }
+            const Locale &supportedLocale = *supportedLocales[i];
+            LSR &lsr = lsrs[i] = getMaximalLsrOrUnd(likelySubtags, supportedLocale, errorCode);
+            lsr.setHashCode();
+            if (U_FAILURE(errorCode)) { return; }
+        }
+
+        // We need an unordered map from LSR to first supported locale with that LSR,
+        // and an ordered list of (LSR, supported index) for
+        // the supported locales in the following order:
+        // 1. Default locale, if it is supported.
+        // 2. Priority locales (aka "paradigm locales") in builder order.
+        // 3. Remaining locales in builder order.
+        supportedLsrToIndex = uhash_openSize(hashLSR, compareLSRs, uhash_compareLong,
+                                             supportedLocalesLength, &errorCode);
+        if (U_FAILURE(errorCode)) { return; }
+        supportedLSRs = static_cast<const LSR **>(
+            uprv_malloc(supportedLocalesLength * sizeof(const LSR *)));
+        supportedIndexes = static_cast<int32_t *>(
+            uprv_malloc(supportedLocalesLength * sizeof(int32_t)));
+        if (supportedLSRs == nullptr || supportedIndexes == nullptr) {
+            errorCode = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+        int32_t suppLength = 0;
+        // Determine insertion order.
+        // Add locales immediately that are equivalent to the default.
+        MaybeStackArray<int8_t, 100> order(supportedLocalesLength, errorCode);
+        if (U_FAILURE(errorCode)) { return; }
+        int32_t numParadigms = 0;
+        for (int32_t i = 0; i < supportedLocalesLength; ++i) {
+            const Locale &locale = *supportedLocales[i];
+            const LSR &lsr = lsrs[i];
+            if (defLSR == nullptr && builder.withDefault_) {
+                // Implicit default locale = first supported locale, if not turned off.
+                U_ASSERT(i == 0);
+                def = &locale;
+                defLSR = &lsr;
+                order[i] = 1;
+                suppLength = putIfAbsent(lsr, 0, suppLength, errorCode);
+            } else if (defLSR != nullptr && lsr.isEquivalentTo(*defLSR)) {
+                order[i] = 1;
+                suppLength = putIfAbsent(lsr, i, suppLength, errorCode);
+            } else if (localeDistance.isParadigmLSR(lsr)) {
+                order[i] = 2;
+                ++numParadigms;
+            } else {
+                order[i] = 3;
+            }
+            if (U_FAILURE(errorCode)) { return; }
+        }
+        // Add supported paradigm locales.
+        int32_t paradigmLimit = suppLength + numParadigms;
+        for (int32_t i = 0; i < supportedLocalesLength && suppLength < paradigmLimit; ++i) {
+            if (order[i] == 2) {
+                suppLength = putIfAbsent(lsrs[i], i, suppLength, errorCode);
+            }
+        }
+        // Add remaining supported locales.
+        for (int32_t i = 0; i < supportedLocalesLength; ++i) {
+            if (order[i] == 3) {
+                suppLength = putIfAbsent(lsrs[i], i, suppLength, errorCode);
+            }
+        }
+        supportedLSRsLength = suppLength;
+        // If supportedLSRsLength < supportedLocalesLength then
+        // we waste as many array slots as there are duplicate supported LSRs,
+        // but the amount of wasted space is small as long as there are few duplicates.
+    }
+
+    defaultLocale = def;
+
+    if (builder.demotion_ == ULOCMATCH_DEMOTION_REGION) {
+        demotionPerDesiredLocale = localeDistance.getDefaultDemotionPerDesiredLocale();
+    }
+
+    if (thresholdDistance >= 0) {
+        // already copied
+    } else if (builder.maxDistanceDesired_ != nullptr) {
+        LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, *builder.maxDistanceSupported_, errorCode);
+        const LSR *pSuppLSR = &suppLSR;
+        int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
+                getMaximalLsrOrUnd(likelySubtags, *builder.maxDistanceDesired_, errorCode),
+                &pSuppLSR, 1,
+                LocaleDistance::shiftDistance(100), favorSubtag, direction);
+        if (U_SUCCESS(errorCode)) {
+            // +1 for an exclusive threshold from an inclusive max.
+            thresholdDistance = LocaleDistance::getDistanceFloor(indexAndDistance) + 1;
+        } else {
+            thresholdDistance = 0;
+        }
+    } else {
+        thresholdDistance = localeDistance.getDefaultScriptDistance();
+    }
+}
+
+LocaleMatcher::LocaleMatcher(LocaleMatcher &&src) noexcept :
+        likelySubtags(src.likelySubtags),
+        localeDistance(src.localeDistance),
+        thresholdDistance(src.thresholdDistance),
+        demotionPerDesiredLocale(src.demotionPerDesiredLocale),
+        favorSubtag(src.favorSubtag),
+        direction(src.direction),
+        supportedLocales(src.supportedLocales), lsrs(src.lsrs),
+        supportedLocalesLength(src.supportedLocalesLength),
+        supportedLsrToIndex(src.supportedLsrToIndex),
+        supportedLSRs(src.supportedLSRs),
+        supportedIndexes(src.supportedIndexes),
+        supportedLSRsLength(src.supportedLSRsLength),
+        ownedDefaultLocale(src.ownedDefaultLocale), defaultLocale(src.defaultLocale) {
+    src.supportedLocales = nullptr;
+    src.lsrs = nullptr;
+    src.supportedLocalesLength = 0;
+    src.supportedLsrToIndex = nullptr;
+    src.supportedLSRs = nullptr;
+    src.supportedIndexes = nullptr;
+    src.supportedLSRsLength = 0;
+    src.ownedDefaultLocale = nullptr;
+    src.defaultLocale = nullptr;
+}
+
+LocaleMatcher::~LocaleMatcher() {
+    for (int32_t i = 0; i < supportedLocalesLength; ++i) {
+        delete supportedLocales[i];
+    }
+    uprv_free(supportedLocales);
+    delete[] lsrs;
+    uhash_close(supportedLsrToIndex);
+    uprv_free(supportedLSRs);
+    uprv_free(supportedIndexes);
+    delete ownedDefaultLocale;
+}
+
+LocaleMatcher &LocaleMatcher::operator=(LocaleMatcher &&src) noexcept {
+    this->~LocaleMatcher();
+
+    thresholdDistance = src.thresholdDistance;
+    demotionPerDesiredLocale = src.demotionPerDesiredLocale;
+    favorSubtag = src.favorSubtag;
+    direction = src.direction;
+    supportedLocales = src.supportedLocales;
+    lsrs = src.lsrs;
+    supportedLocalesLength = src.supportedLocalesLength;
+    supportedLsrToIndex = src.supportedLsrToIndex;
+    supportedLSRs = src.supportedLSRs;
+    supportedIndexes = src.supportedIndexes;
+    supportedLSRsLength = src.supportedLSRsLength;
+    ownedDefaultLocale = src.ownedDefaultLocale;
+    defaultLocale = src.defaultLocale;
+
+    src.supportedLocales = nullptr;
+    src.lsrs = nullptr;
+    src.supportedLocalesLength = 0;
+    src.supportedLsrToIndex = nullptr;
+    src.supportedLSRs = nullptr;
+    src.supportedIndexes = nullptr;
+    src.supportedLSRsLength = 0;
+    src.ownedDefaultLocale = nullptr;
+    src.defaultLocale = nullptr;
+    return *this;
+}
+
+class LocaleLsrIterator {
+public:
+    LocaleLsrIterator(const LikelySubtags &likelySubtags, Locale::Iterator &locales,
+                      ULocMatchLifetime lifetime) :
+            likelySubtags(likelySubtags), locales(locales), lifetime(lifetime) {}
+
+    ~LocaleLsrIterator() {
+        if (lifetime == ULOCMATCH_TEMPORARY_LOCALES) {
+            delete remembered;
+        }
+    }
+
+    bool hasNext() const {
+        return locales.hasNext();
+    }
+
+    LSR next(UErrorCode &errorCode) {
+        current = &locales.next();
+        return getMaximalLsrOrUnd(likelySubtags, *current, errorCode);
+    }
+
+    void rememberCurrent(int32_t desiredIndex, UErrorCode &errorCode) {
+        if (U_FAILURE(errorCode)) { return; }
+        bestDesiredIndex = desiredIndex;
+        if (lifetime == ULOCMATCH_STORED_LOCALES) {
+            remembered = current;
+        } else {
+            // ULOCMATCH_TEMPORARY_LOCALES
+            delete remembered;
+            remembered = new Locale(*current);
+            if (remembered == nullptr) {
+                errorCode = U_MEMORY_ALLOCATION_ERROR;
+            }
+        }
+    }
+
+    const Locale *orphanRemembered() {
+        const Locale *rem = remembered;
+        remembered = nullptr;
+        return rem;
+    }
+
+    int32_t getBestDesiredIndex() const {
+        return bestDesiredIndex;
+    }
+
+private:
+    const LikelySubtags &likelySubtags;
+    Locale::Iterator &locales;
+    ULocMatchLifetime lifetime;
+    const Locale *current = nullptr, *remembered = nullptr;
+    int32_t bestDesiredIndex = -1;
+};
+
+const Locale *LocaleMatcher::getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return nullptr; }
+    std::optional<int32_t> suppIndex = getBestSuppIndex(
+        getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
+        nullptr, errorCode);
+    return U_SUCCESS(errorCode) && suppIndex.has_value() ? supportedLocales[*suppIndex]
+                                                         : defaultLocale;
+}
+
+const Locale *LocaleMatcher::getBestMatch(Locale::Iterator &desiredLocales,
+                                          UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return nullptr; }
+    if (!desiredLocales.hasNext()) {
+        return defaultLocale;
+    }
+    LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
+    std::optional<int32_t> suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
+    return U_SUCCESS(errorCode) && suppIndex.has_value() ? supportedLocales[*suppIndex]
+                                                         : defaultLocale;
+}
+
+const Locale *LocaleMatcher::getBestMatchForListString(
+        StringPiece desiredLocaleList, UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return nullptr; }
+    LocalePriorityList list(desiredLocaleList, errorCode);
+    LocalePriorityList::Iterator iter = list.iterator();
+    return getBestMatch(iter, errorCode);
+}
+
+LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
+        const Locale &desiredLocale, UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) {
+        return Result(nullptr, defaultLocale, -1, -1, false);
+    }
+    std::optional<int32_t> suppIndex = getBestSuppIndex(
+        getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
+        nullptr, errorCode);
+    if (U_FAILURE(errorCode) || !suppIndex.has_value()) {
+        return Result(nullptr, defaultLocale, -1, -1, false);
+    } else {
+        return Result(&desiredLocale, supportedLocales[*suppIndex], 0, *suppIndex, false);
+    }
+}
+
+LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
+        Locale::Iterator &desiredLocales, UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode) || !desiredLocales.hasNext()) {
+        return Result(nullptr, defaultLocale, -1, -1, false);
+    }
+    LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
+    std::optional<int32_t> suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
+    if (U_FAILURE(errorCode) || !suppIndex.has_value()) {
+        return Result(nullptr, defaultLocale, -1, -1, false);
+    } else {
+        return Result(lsrIter.orphanRemembered(), supportedLocales[*suppIndex],
+                      lsrIter.getBestDesiredIndex(), *suppIndex, true);
+    }
+}
+
+std::optional<int32_t> LocaleMatcher::getBestSuppIndex(LSR desiredLSR,
+                                                       LocaleLsrIterator *remainingIter,
+                                                       UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return std::nullopt; }
+    int32_t desiredIndex = 0;
+    int32_t bestSupportedLsrIndex = -1;
+    for (int32_t bestShiftedDistance = LocaleDistance::shiftDistance(thresholdDistance);;) {
+        // Quick check for exact maximized LSR.
+        if (supportedLsrToIndex != nullptr) {
+            desiredLSR.setHashCode();
+            UBool found = false;
+            int32_t suppIndex = uhash_getiAndFound(supportedLsrToIndex, &desiredLSR, &found);
+            if (found) {
+                if (remainingIter != nullptr) {
+                    remainingIter->rememberCurrent(desiredIndex, errorCode);
+                }
+                return suppIndex;
+            }
+        }
+        int32_t bestIndexAndDistance = localeDistance.getBestIndexAndDistance(
+                desiredLSR, supportedLSRs, supportedLSRsLength,
+                bestShiftedDistance, favorSubtag, direction);
+        if (bestIndexAndDistance >= 0) {
+            bestShiftedDistance = LocaleDistance::getShiftedDistance(bestIndexAndDistance);
+            if (remainingIter != nullptr) {
+                remainingIter->rememberCurrent(desiredIndex, errorCode);
+                if (U_FAILURE(errorCode)) { return std::nullopt; }
+            }
+            bestSupportedLsrIndex = LocaleDistance::getIndex(bestIndexAndDistance);
+        }
+        if ((bestShiftedDistance -= LocaleDistance::shiftDistance(demotionPerDesiredLocale)) <= 0) {
+            break;
+        }
+        if (remainingIter == nullptr || !remainingIter->hasNext()) {
+            break;
+        }
+        desiredLSR = remainingIter->next(errorCode);
+        if (U_FAILURE(errorCode)) { return std::nullopt; }
+        ++desiredIndex;
+    }
+    if (bestSupportedLsrIndex < 0) {
+        // no good match
+        return std::nullopt;
+    }
+    return supportedIndexes[bestSupportedLsrIndex];
+}
+
+UBool LocaleMatcher::isMatch(const Locale &desired, const Locale &supported,
+                             UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return false; }
+    LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
+    if (U_FAILURE(errorCode)) { return false; }
+    const LSR *pSuppLSR = &suppLSR;
+    int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
+            getMaximalLsrOrUnd(likelySubtags, desired, errorCode),
+            &pSuppLSR, 1,
+            LocaleDistance::shiftDistance(thresholdDistance), favorSubtag, direction);
+    return indexAndDistance >= 0;
+}
+
+double LocaleMatcher::internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return 0.; }
+    // Returns the inverse of the distance: That is, 1-distance(desired, supported).
+    LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
+    if (U_FAILURE(errorCode)) { return 0.; }
+    const LSR *pSuppLSR = &suppLSR;
+    int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
+            getMaximalLsrOrUnd(likelySubtags, desired, errorCode),
+            &pSuppLSR, 1,
+            LocaleDistance::shiftDistance(thresholdDistance), favorSubtag, direction);
+    double distance = LocaleDistance::getDistanceDouble(indexAndDistance);
+    return (100.0 - distance) / 100.0;
+}
+
+U_NAMESPACE_END
+
+// uloc_acceptLanguage() --------------------------------------------------- ***
+
+U_NAMESPACE_USE
+
+namespace {
+
+class LocaleFromTag {
+public:
+    LocaleFromTag() : locale(Locale::getRoot()) {}
+    const Locale &operator()(const char *tag) { return locale = Locale(tag); }
+
+private:
+    // Store the locale in the converter, rather than return a reference to a temporary,
+    // or a value which could go out of scope with the caller's reference to it.
+    Locale locale;
+};
+
+int32_t acceptLanguage(UEnumeration &supportedLocales, Locale::Iterator &desiredLocales,
+                       char *dest, int32_t capacity, UAcceptResult *acceptResult,
+                       UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return 0; }
+    LocaleMatcher::Builder builder;
+    const char *locString;
+    while ((locString = uenum_next(&supportedLocales, nullptr, &errorCode)) != nullptr) {
+        Locale loc(locString);
+        if (loc.isBogus()) {
+            errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+            return 0;
+        }
+        builder.addSupportedLocale(loc);
+    }
+    LocaleMatcher matcher = builder.build(errorCode);
+    LocaleMatcher::Result result = matcher.getBestMatchResult(desiredLocales, errorCode);
+    if (U_FAILURE(errorCode)) { return 0; }
+    if (result.getDesiredIndex() >= 0) {
+        if (acceptResult != nullptr) {
+            *acceptResult = *result.getDesiredLocale() == *result.getSupportedLocale() ?
+                ULOC_ACCEPT_VALID : ULOC_ACCEPT_FALLBACK;
+        }
+        const char *bestStr = result.getSupportedLocale()->getName();
+        int32_t bestLength = static_cast<int32_t>(uprv_strlen(bestStr));
+        if (bestLength <= capacity) {
+            uprv_memcpy(dest, bestStr, bestLength);
+        }
+        return u_terminateChars(dest, capacity, bestLength, &errorCode);
+    } else {
+        if (acceptResult != nullptr) {
+            *acceptResult = ULOC_ACCEPT_FAILED;
+        }
+        return u_terminateChars(dest, capacity, 0, &errorCode);
+    }
+}
+
+}  // namespace
+
+U_CAPI int32_t U_EXPORT2
+uloc_acceptLanguage(char *result, int32_t resultAvailable,
+                    UAcceptResult *outResult,
+                    const char **acceptList, int32_t acceptListCount,
+                    UEnumeration *availableLocales,
+                    UErrorCode *status) {
+    if (U_FAILURE(*status)) { return 0; }
+    if ((result == nullptr ? resultAvailable != 0 : resultAvailable < 0) ||
+            (acceptList == nullptr ? acceptListCount != 0 : acceptListCount < 0) ||
+            availableLocales == nullptr) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+    LocaleFromTag converter;
+    Locale::ConvertingIterator<const char **, LocaleFromTag> desiredLocales(
+        acceptList, acceptList + acceptListCount, converter);
+    return acceptLanguage(*availableLocales, desiredLocales,
+                          result, resultAvailable, outResult, *status);
+}
+
+U_CAPI int32_t U_EXPORT2
+uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable,
+                            UAcceptResult *outResult,
+                            const char *httpAcceptLanguage,
+                            UEnumeration *availableLocales,
+                            UErrorCode *status) {
+    if (U_FAILURE(*status)) { return 0; }
+    if ((result == nullptr ? resultAvailable != 0 : resultAvailable < 0) ||
+            httpAcceptLanguage == nullptr || availableLocales == nullptr) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+    LocalePriorityList list(httpAcceptLanguage, *status);
+    LocalePriorityList::Iterator desiredLocales = list.iterator();
+    return acceptLanguage(*availableLocales, desiredLocales,
+                          result, resultAvailable, outResult, *status);
+}
@@ -0,0 +1,240 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// localeprioritylist.cpp
+// created: 2019jul11 Markus W. Scherer
+
+#include "unicode/utypes.h"
+#include "unicode/localpointer.h"
+#include "unicode/locid.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "localeprioritylist.h"
+#include "uarrsort.h"
+#include "uassert.h"
+#include "uhash.h"
+
+U_NAMESPACE_BEGIN
+
+namespace {
+
+int32_t hashLocale(const UHashTok token) {
+    const auto* locale = static_cast<const Locale*>(token.pointer);
+    return locale->hashCode();
+}
+
+UBool compareLocales(const UHashTok t1, const UHashTok t2) {
+    const auto* l1 = static_cast<const Locale*>(t1.pointer);
+    const auto* l2 = static_cast<const Locale*>(t2.pointer);
+    return *l1 == *l2;
+}
+
+constexpr int32_t WEIGHT_ONE = 1000;
+
+struct LocaleAndWeight {
+    Locale *locale;
+    int32_t weight;  // 0..1000 = 0.0..1.0
+    int32_t index;  // force stable sort
+
+    int32_t compare(const LocaleAndWeight &other) const {
+        int32_t diff = other.weight - weight;  // descending: other-this
+        if (diff != 0) { return diff; }
+        return index - other.index;
+    }
+};
+
+int32_t U_CALLCONV
+compareLocaleAndWeight(const void * /*context*/, const void *left, const void *right) {
+    return static_cast<const LocaleAndWeight *>(left)->
+        compare(*static_cast<const LocaleAndWeight *>(right));
+}
+
+const char *skipSpaces(const char *p, const char *limit) {
+    while (p < limit && *p == ' ') { ++p; }
+    return p;
+}
+
+int32_t findTagLength(const char *p, const char *limit) {
+    // Look for accept-language delimiters.
+    // Leave other validation up to the Locale constructor.
+    const char *q;
+    for (q = p; q < limit; ++q) {
+        char c = *q;
+        if (c == ' ' || c == ',' || c == ';') { break; }
+    }
+    return static_cast<int32_t>(q - p);
+}
+
+/**
+ * Parses and returns a qvalue weight in millis.
+ * Advances p to after the parsed substring.
+ * Returns a negative value if parsing fails.
+ */
+int32_t parseWeight(const char *&p, const char *limit) {
+    p = skipSpaces(p, limit);
+    char c;
+    if (p == limit || ((c = *p) != '0' && c != '1')) { return -1; }
+    int32_t weight = (c - '0') * 1000;
+    if (++p == limit || *p != '.') { return weight; }
+    int32_t multiplier = 100;
+    while (++p != limit && '0' <= (c = *p) && c <= '9') {
+        c -= '0';
+        if (multiplier > 0) {
+            weight += c * multiplier;
+            multiplier /= 10;
+        } else if (multiplier == 0) {
+            // round up
+            if (c >= 5) { ++weight; }
+            multiplier = -1;
+        }  // else ignore further fraction digits
+    }
+    return weight <= WEIGHT_ONE ? weight : -1;  // bad if > 1.0
+}
+
+}  // namespace
+
+/**
+ * Nothing but a wrapper over a MaybeStackArray of LocaleAndWeight.
+ *
+ * This wrapper exists (and is not in an anonymous namespace)
+ * so that we can forward-declare it in the header file and
+ * don't have to expose the MaybeStackArray specialization and
+ * the LocaleAndWeight to code (like the test) that #includes localeprioritylist.h.
+ * Also, otherwise we would have to do a platform-specific
+ * template export declaration of some kind for the MaybeStackArray specialization
+ * to be properly exported from the common DLL.
+ */
+struct LocaleAndWeightArray : public UMemory {
+    MaybeStackArray<LocaleAndWeight, 20> array;
+};
+
+LocalePriorityList::LocalePriorityList(StringPiece s, UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return; }
+    list = new LocaleAndWeightArray();
+    if (list == nullptr) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+    const char *p = s.data();
+    const char *limit = p + s.length();
+    while ((p = skipSpaces(p, limit)) != limit) {
+        if (*p == ',') {  // empty range field
+            ++p;
+            continue;
+        }
+        int32_t tagLength = findTagLength(p, limit);
+        if (tagLength == 0) {
+            errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+            return;
+        }
+        CharString tag(p, tagLength, errorCode);
+        if (U_FAILURE(errorCode)) { return; }
+        Locale locale = Locale(tag.data());
+        if (locale.isBogus()) {
+            errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+            return;
+        }
+        int32_t weight = WEIGHT_ONE;
+        if ((p = skipSpaces(p + tagLength, limit)) != limit && *p == ';') {
+            if ((p = skipSpaces(p + 1, limit)) == limit || *p != 'q' ||
+                    (p = skipSpaces(p + 1, limit)) == limit || *p != '=' ||
+                    (++p, (weight = parseWeight(p, limit)) < 0)) {
+                errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+                return;
+            }
+            p = skipSpaces(p, limit);
+        }
+        if (p != limit && *p != ',') {  // trailing junk
+            errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+            return;
+        }
+        add(locale, weight, errorCode);
+        if (p == limit) { break; }
+        ++p;
+    }
+    sort(errorCode);
+}
+
+LocalePriorityList::~LocalePriorityList() {
+    if (list != nullptr) {
+        for (int32_t i = 0; i < listLength; ++i) {
+            delete list->array[i].locale;
+        }
+        delete list;
+    }
+    uhash_close(map);
+}
+
+const Locale *LocalePriorityList::localeAt(int32_t i) const {
+    return list->array[i].locale;
+}
+
+Locale *LocalePriorityList::orphanLocaleAt(int32_t i) {
+    if (list == nullptr) { return nullptr; }
+    LocaleAndWeight &lw = list->array[i];
+    Locale *l = lw.locale;
+    lw.locale = nullptr;
+    return l;
+}
+
+bool LocalePriorityList::add(const Locale &locale, int32_t weight, UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return false; }
+    if (map == nullptr) {
+        if (weight <= 0) { return true; }  // do not add q=0
+        map = uhash_open(hashLocale, compareLocales, uhash_compareLong, &errorCode);
+        if (U_FAILURE(errorCode)) { return false; }
+    }
+    LocalPointer<Locale> clone;
+    UBool found = false;
+    int32_t index = uhash_getiAndFound(map, &locale, &found);
+    if (found) {
+        // Duplicate: Remove the old item and append it anew.
+        LocaleAndWeight &lw = list->array[index];
+        clone.adoptInstead(lw.locale);
+        lw.locale = nullptr;
+        lw.weight = 0;
+        ++numRemoved;
+    }
+    if (weight <= 0) {  // do not add q=0
+        if (found) {
+            // Not strictly necessary but cleaner.
+            uhash_removei(map, &locale);
+        }
+        return true;
+    }
+    if (clone.isNull()) {
+        clone.adoptInstead(locale.clone());
+        if (clone.isNull() || (clone->isBogus() && !locale.isBogus())) {
+            errorCode = U_MEMORY_ALLOCATION_ERROR;
+            return false;
+        }
+    }
+    if (listLength == list->array.getCapacity()) {
+        int32_t newCapacity = listLength < 50 ? 100 : 4 * listLength;
+        if (list->array.resize(newCapacity, listLength) == nullptr) {
+            errorCode = U_MEMORY_ALLOCATION_ERROR;
+            return false;
+        }
+    }
+    uhash_putiAllowZero(map, clone.getAlias(), listLength, &errorCode);
+    if (U_FAILURE(errorCode)) { return false; }
+    LocaleAndWeight &lw = list->array[listLength];
+    lw.locale = clone.orphan();
+    lw.weight = weight;
+    lw.index = listLength++;
+    if (weight < WEIGHT_ONE) { hasWeights = true; }
+    U_ASSERT(uhash_count(map) == getLength());
+    return true;
+}
+
+void LocalePriorityList::sort(UErrorCode &errorCode) {
+    // Sort by descending weights if there is a mix of weights.
+    // The comparator forces a stable sort via the item index.
+    if (U_FAILURE(errorCode) || getLength() <= 1 || !hasWeights) { return; }
+    uprv_sortArray(list->array.getAlias(), listLength, sizeof(LocaleAndWeight),
+                   compareLocaleAndWeight, nullptr, false, &errorCode);
+}
+
+U_NAMESPACE_END
@@ -0,0 +1,115 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// localeprioritylist.h
+// created: 2019jul11 Markus W. Scherer
+
+#ifndef __LOCALEPRIORITYLIST_H__
+#define __LOCALEPRIORITYLIST_H__
+
+#include "unicode/utypes.h"
+#include "unicode/locid.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+
+struct UHashtable;
+
+U_NAMESPACE_BEGIN
+
+struct LocaleAndWeightArray;
+
+/**
+ * Parses a list of locales from an accept-language string.
+ * We are a bit more lenient than the spec:
+ * We accept extra whitespace in more places, empty range fields,
+ * and any number of qvalue fraction digits.
+ *
+ * https://tools.ietf.org/html/rfc2616#section-14.4
+ * 14.4 Accept-Language
+ *
+ *        Accept-Language = "Accept-Language" ":"
+ *                          1#( language-range [ ";" "q" "=" qvalue ] )
+ *        language-range  = ( ( 1*8ALPHA *( "-" 1*8ALPHA ) ) | "*" )
+ *
+ *    Each language-range MAY be given an associated quality value which
+ *    represents an estimate of the user's preference for the languages
+ *    specified by that range. The quality value defaults to "q=1". For
+ *    example,
+ *
+ *        Accept-Language: da, en-gb;q=0.8, en;q=0.7
+ *
+ * https://tools.ietf.org/html/rfc2616#section-3.9
+ * 3.9 Quality Values
+ *
+ *    HTTP content negotiation (section 12) uses short "floating point"
+ *    numbers to indicate the relative importance ("weight") of various
+ *    negotiable parameters.  A weight is normalized to a real number in
+ *    the range 0 through 1, where 0 is the minimum and 1 the maximum
+ *    value. If a parameter has a quality value of 0, then content with
+ *    this parameter is `not acceptable' for the client. HTTP/1.1
+ *    applications MUST NOT generate more than three digits after the
+ *    decimal point. User configuration of these values SHOULD also be
+ *    limited in this fashion.
+ *
+ *        qvalue         = ( "0" [ "." 0*3DIGIT ] )
+ *                       | ( "1" [ "." 0*3("0") ] )
+ */
+class U_COMMON_API LocalePriorityList : public UMemory {
+public:
+    class Iterator : public Locale::Iterator {
+    public:
+        UBool hasNext() const override { return count < length; }
+
+        const Locale &next() override {
+            for(;;) {
+                const Locale *locale = list.localeAt(index++);
+                if (locale != nullptr) {
+                    ++count;
+                    return *locale;
+                }
+            }
+        }
+
+    private:
+        friend class LocalePriorityList;
+
+        Iterator(const LocalePriorityList &list) : list(list), length(list.getLength()) {}
+
+        const LocalePriorityList &list;
+        int32_t index = 0;
+        int32_t count = 0;
+        const int32_t length;
+    };
+
+    LocalePriorityList(StringPiece s, UErrorCode &errorCode);
+
+    ~LocalePriorityList();
+
+    int32_t getLength() const { return listLength - numRemoved; }
+
+    int32_t getLengthIncludingRemoved() const { return listLength; }
+
+    Iterator iterator() const { return Iterator(*this); }
+
+    const Locale *localeAt(int32_t i) const;
+
+    Locale *orphanLocaleAt(int32_t i);
+
+private:
+    LocalePriorityList(const LocalePriorityList &) = delete;
+    LocalePriorityList &operator=(const LocalePriorityList &) = delete;
+
+    bool add(const Locale &locale, int32_t weight, UErrorCode &errorCode);
+
+    void sort(UErrorCode &errorCode);
+
+    LocaleAndWeightArray *list = nullptr;
+    int32_t listLength = 0;
+    int32_t numRemoved = 0;
+    bool hasWeights = false;  // other than 1.0
+    UHashtable *map = nullptr;
+};
+
+U_NAMESPACE_END
+
+#endif  // __LOCALEPRIORITYLIST_H__
@@ -12,7 +12,7 @@

 #include "unicode/utypes.h"

-#if U_LOCAL_SERVICE_HOOK
+#if defined(U_LOCAL_SERVICE_HOOK) && U_LOCAL_SERVICE_HOOK
 /**
 * Prototype for user-supplied service hook. This function is expected to return
 * a type of factory object specific to the requested service.
@@ -19,11 +19,13 @@
 *   that then do not depend on resource bundle code and res_index bundles.
 */

+#include "unicode/errorcode.h"
 #include "unicode/utypes.h"
 #include "unicode/locid.h"
 #include "unicode/uloc.h"
 #include "unicode/ures.h"
 #include "cmemory.h"
+#include "cstring.h"
 #include "ucln_cmn.h"
 #include "uassert.h"
 #include "umutex.h"
@@ -33,31 +35,25 @@

 U_NAMESPACE_BEGIN

-static icu::Locale*  availableLocaleList = NULL;
+static icu::Locale*  availableLocaleList = nullptr;
 static int32_t  availableLocaleListCount;
-static icu::UInitOnce gInitOnceLocale = U_INITONCE_INITIALIZER;
+static icu::UInitOnce gInitOnceLocale {};

-U_NAMESPACE_END
+namespace {

-U_CDECL_BEGIN
-
-static UBool U_CALLCONV locale_available_cleanup(void)
+UBool U_CALLCONV locale_available_cleanup()
 {
-    U_NAMESPACE_USE
-
    if (availableLocaleList) {
        delete []availableLocaleList;
-        availableLocaleList = NULL;
+        availableLocaleList = nullptr;
    }
    availableLocaleListCount = 0;
    gInitOnceLocale.reset();

-    return TRUE;
+    return true;
 }

-U_CDECL_END
-
-U_NAMESPACE_BEGIN
+}  // namespace

 void U_CALLCONV locale_available_init() {
    // This function is a friend of class Locale.
@@ -69,7 +65,7 @@ void U_CALLCONV locale_available_init() {
    if(availableLocaleListCount) {
       availableLocaleList = new Locale[availableLocaleListCount];
    }
-    if (availableLocaleList == NULL) {
+    if (availableLocaleList == nullptr) {
        availableLocaleListCount= 0;
    }
    for (int32_t locCount=availableLocaleListCount-1; locCount>=0; --locCount) {
@@ -95,87 +91,175 @@ U_NAMESPACE_USE

 /* ### Constants **************************************************/

-/* These strings describe the resources we attempt to load from
- the locale ResourceBundle data file.*/
-static const char _kIndexLocaleName[] = "res_index";
-static const char _kIndexTag[]        = "InstalledLocales";
+namespace {

-static char** _installedLocales = NULL;
-static int32_t _installedLocalesCount = 0;
-static icu::UInitOnce _installedLocalesInitOnce;
+// Enough capacity for the two lists in the res_index.res file
+const char** gAvailableLocaleNames[2] = {};
+int32_t gAvailableLocaleCounts[2] = {};
+icu::UInitOnce ginstalledLocalesInitOnce {};
+
+class AvailableLocalesSink : public ResourceSink {
+  public:
+    void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
+        if (U_FAILURE(status)) { return; }
+        ResourceTable resIndexTable = value.getTable(status);
+        if (U_FAILURE(status)) { return; }
+        for (int32_t i = 0; resIndexTable.getKeyAndValue(i, key, value); ++i) {
+            ULocAvailableType type;
+            if (uprv_strcmp(key, "InstalledLocales") == 0) {
+                type = ULOC_AVAILABLE_DEFAULT;
+            } else if (uprv_strcmp(key, "AliasLocales") == 0) {
+                type = ULOC_AVAILABLE_ONLY_LEGACY_ALIASES;
+            } else {
+                // CLDRVersion, etc.
+                continue;
+            }
+            ResourceTable availableLocalesTable = value.getTable(status);
+            if (U_FAILURE(status)) {
+                return;
+            }
+            gAvailableLocaleCounts[type] = availableLocalesTable.getSize();
+            gAvailableLocaleNames[type] = static_cast<const char**>(
+                uprv_malloc(gAvailableLocaleCounts[type] * sizeof(const char*)));
+            if (gAvailableLocaleNames[type] == nullptr) {
+                status = U_MEMORY_ALLOCATION_ERROR;
+                return;
+            }
+            for (int32_t j = 0; availableLocalesTable.getKeyAndValue(j, key, value); ++j) {
+                gAvailableLocaleNames[type][j] = key;
+            }
+        }
+    }
+};
+
+class AvailableLocalesStringEnumeration : public StringEnumeration {
+  public:
+    AvailableLocalesStringEnumeration(ULocAvailableType type) : fType(type) {
+    }
+
+    const char* next(int32_t *resultLength, UErrorCode &status) override {
+        if (U_FAILURE(status)) { return nullptr; }
+        ULocAvailableType actualType = fType;
+        int32_t actualIndex = fIndex++;
+
+        // If the "combined" list was requested, resolve that now
+        if (fType == ULOC_AVAILABLE_WITH_LEGACY_ALIASES) {
+            int32_t defaultLocalesCount = gAvailableLocaleCounts[ULOC_AVAILABLE_DEFAULT];
+            if (actualIndex < defaultLocalesCount) {
+                actualType = ULOC_AVAILABLE_DEFAULT;
+            } else {
+                actualIndex -= defaultLocalesCount;
+                actualType = ULOC_AVAILABLE_ONLY_LEGACY_ALIASES;
+            }
+        }
+
+        // Return the requested string
+        int32_t count = gAvailableLocaleCounts[actualType];
+        const char* result;
+        if (actualIndex < count) {
+            result = gAvailableLocaleNames[actualType][actualIndex];
+            if (resultLength != nullptr) {
+                *resultLength = static_cast<int32_t>(uprv_strlen(result));
+            }
+        } else {
+            result = nullptr;
+            if (resultLength != nullptr) {
+                *resultLength = 0;
+            }
+        }
+        return result;
+    }
+
+    void reset(UErrorCode &status) override {
+        if (U_FAILURE(status)) { return; }
+        fIndex = 0;
+    }
+
+    int32_t count(UErrorCode &status) const override {
+        if (U_FAILURE(status)) { return 0; }
+        if (fType == ULOC_AVAILABLE_WITH_LEGACY_ALIASES) {
+            return gAvailableLocaleCounts[ULOC_AVAILABLE_DEFAULT]
+                + gAvailableLocaleCounts[ULOC_AVAILABLE_ONLY_LEGACY_ALIASES];
+        } else {
+            return gAvailableLocaleCounts[fType];
+        }
+    }
+
+  private:
+    ULocAvailableType fType;
+    int32_t fIndex = 0;
+};

 /* ### Get available **************************************************/

-static UBool U_CALLCONV uloc_cleanup(void) {
-    char ** temp;
-
-    if (_installedLocales) {
-        temp = _installedLocales;
-        _installedLocales = NULL;
-
-        _installedLocalesCount = 0;
-        _installedLocalesInitOnce.reset();
-
-        uprv_free(temp);
+UBool U_CALLCONV uloc_cleanup() {
+    for (int32_t i = 0; i < UPRV_LENGTHOF(gAvailableLocaleNames); i++) {
+        uprv_free(gAvailableLocaleNames[i]);
+        gAvailableLocaleNames[i] = nullptr;
+        gAvailableLocaleCounts[i] = 0;
    }
-    return TRUE;
+    ginstalledLocalesInitOnce.reset();
+    return true;
 }

 // Load Installed Locales. This function will be called exactly once
 //   via the initOnce mechanism.

-static void U_CALLCONV loadInstalledLocales() {
-    UResourceBundle *indexLocale = NULL;
-    UResourceBundle installed;
-    UErrorCode status = U_ZERO_ERROR;
-    int32_t i = 0;
-    int32_t localeCount;
-    
-    U_ASSERT(_installedLocales == NULL);
-    U_ASSERT(_installedLocalesCount == 0);
+void U_CALLCONV loadInstalledLocales(UErrorCode& status) {
+    ucln_common_registerCleanup(UCLN_COMMON_ULOC, uloc_cleanup);

-    _installedLocalesCount = 0;
-    ures_initStackObject(&installed);
-    indexLocale = ures_openDirect(NULL, _kIndexLocaleName, &status);
-    ures_getByKey(indexLocale, _kIndexTag, &installed, &status);
-    
-    if(U_SUCCESS(status)) {
-        localeCount = ures_getSize(&installed);
-        _installedLocales = (char **) uprv_malloc(sizeof(char*) * (localeCount+1));
-        if (_installedLocales != NULL) {
-            ures_resetIterator(&installed);
-            while(ures_hasNext(&installed)) {
-                ures_getNextString(&installed, NULL, (const char **)&_installedLocales[i++], &status);
-            }
-            _installedLocales[i] = NULL;
-            _installedLocalesCount = localeCount;
-            ucln_common_registerCleanup(UCLN_COMMON_ULOC, uloc_cleanup);
-        }
-    }
-    ures_close(&installed);
-    ures_close(indexLocale);
+    icu::LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "res_index", &status));
+    AvailableLocalesSink sink;
+    ures_getAllItemsWithFallback(rb.getAlias(), "", sink, status);
 }

-static void _load_installedLocales()
-{
-    umtx_initOnce(_installedLocalesInitOnce, &loadInstalledLocales);
+void _load_installedLocales(UErrorCode& status) {
+    umtx_initOnce(ginstalledLocalesInitOnce, &loadInstalledLocales, status);
 }

+} // namespace
+
 U_CAPI const char* U_EXPORT2
-uloc_getAvailable(int32_t offset) 
-{
-    
-    _load_installedLocales();
-    
-    if (offset > _installedLocalesCount)
-        return NULL;
-    return _installedLocales[offset];
+uloc_getAvailable(int32_t offset) {
+    icu::ErrorCode status;
+    _load_installedLocales(status);
+    if (status.isFailure()) {
+        return nullptr;
+    }
+    if (offset > gAvailableLocaleCounts[0]) {
+        // *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return nullptr;
+    }
+    return gAvailableLocaleNames[0][offset];
 }

 U_CAPI int32_t  U_EXPORT2
-uloc_countAvailable()
-{
-    _load_installedLocales();
-    return _installedLocalesCount;
+uloc_countAvailable() {
+    icu::ErrorCode status;
+    _load_installedLocales(status);
+    if (status.isFailure()) {
+        return 0;
+    }
+    return gAvailableLocaleCounts[0];
 }

+U_CAPI UEnumeration* U_EXPORT2
+uloc_openAvailableByType(ULocAvailableType type, UErrorCode* status) {
+    if (U_FAILURE(*status)) {
+        return nullptr;
+    }
+    if (type < 0 || type >= ULOC_AVAILABLE_COUNT) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return nullptr;
+    }
+    _load_installedLocales(*status);
+    if (U_FAILURE(*status)) {
+        return nullptr;
+    }
+    LocalPointer<AvailableLocalesStringEnumeration> result(
+        new AvailableLocalesStringEnumeration(type), *status);
+    if (U_FAILURE(*status)) {
+        return nullptr;
+    }
+    return uenum_openFromStringEnumeration(result.orphan(), status);
+}
@@ -11,18 +11,14 @@
 **********************************************************************
 */
 #include "locbased.h"
-#include "cstring.h"
+#include "uresimp.h"

 U_NAMESPACE_BEGIN

-Locale LocaleBased::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
-    const char* id = getLocaleID(type, status);
-    return Locale((id != 0) ? id : "");
-}
-
-const char* LocaleBased::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
+const Locale& LocaleBased::getLocale(const Locale& valid, const Locale& actual,
+                                     ULocDataLocaleType type, UErrorCode& status) {
    if (U_FAILURE(status)) {
-        return NULL;
+        return Locale::getRoot();
    }

    switch(type) {
@@ -32,24 +28,19 @@ const char* LocaleBased::getLocaleID(ULocDataLocaleType type, UErrorCode& status
        return actual;
    default:
        status = U_ILLEGAL_ARGUMENT_ERROR;
-        return NULL;
+        return Locale::getRoot();
    }
 }

-void LocaleBased::setLocaleIDs(const char* validID, const char* actualID) {
-    if (validID != 0) {
-      uprv_strncpy(valid, validID, ULOC_FULLNAME_CAPACITY);
-      valid[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
-    }
-    if (actualID != 0) {
-      uprv_strncpy(actual, actualID, ULOC_FULLNAME_CAPACITY);
-      actual[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
-    }
-}
+const char* LocaleBased::getLocaleID(const Locale& valid, const Locale& actual,
+                                     ULocDataLocaleType type, UErrorCode& status) {
+    const Locale& locale = getLocale(valid, actual, type, status);

-void LocaleBased::setLocaleIDs(const Locale& validID, const Locale& actualID) {
-  uprv_strcpy(valid, validID.getName());
-  uprv_strcpy(actual, actualID.getName());
+    if (U_FAILURE(status)) {
+        return nullptr;
+    }
+
+    return locale == Locale::getRoot() ? kRootLocaleName : locale.getName();
 }

 U_NAMESPACE_END
@@ -16,14 +16,6 @@
 #include "unicode/locid.h"
 #include "unicode/uobject.h"

-/**
- * Macro to declare a locale LocaleBased wrapper object for the given
- * object, which must have two members named `validLocale' and
- * `actualLocale' of size ULOC_FULLNAME_CAPACITY
- */
-#define U_LOCALE_BASED(varname, objname) \
-  LocaleBased varname((objname).validLocale, (objname).actualLocale);
-
 U_NAMESPACE_BEGIN

 /**
@@ -37,71 +29,36 @@ class U_COMMON_API LocaleBased : public UMemory {

 public:

-    /**
-     * Construct a LocaleBased wrapper around the two pointers.  These
-     * will be aliased for the lifetime of this object.
-     */
-    inline LocaleBased(char* validAlias, char* actualAlias);
-
-    /**
-     * Construct a LocaleBased wrapper around the two const pointers.
-     * These will be aliased for the lifetime of this object.
-     */
-    inline LocaleBased(const char* validAlias, const char* actualAlias);
-
    /**
     * Return locale meta-data for the service object wrapped by this
     * object.  Either the valid or the actual locale may be
     * retrieved.
+     * @param valid The valid locale.
+     * @param actual The actual locale.
     * @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE
     * @param status input-output error code
     * @return the indicated locale
     */
-    Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
+    static const Locale& getLocale(
+        const Locale& valid, const Locale& actual,
+        ULocDataLocaleType type, UErrorCode& status);

    /**
     * Return the locale ID for the service object wrapped by this
     * object.  Either the valid or the actual locale may be
     * retrieved.
+     * @param valid The valid locale.
+     * @param actual The actual locale.
     * @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE
     * @param status input-output error code
     * @return the indicated locale ID
     */
-    const char* getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
+    static const char* getLocaleID(
+        const Locale& valid, const Locale& actual,
+        ULocDataLocaleType type, UErrorCode& status);

-    /**
-     * Set the locale meta-data for the service object wrapped by this
-     * object.  If either parameter is zero, it is ignored.
-     * @param valid the ID of the valid locale
-     * @param actual the ID of the actual locale
-     */
-    void setLocaleIDs(const char* valid, const char* actual);
-
-    /**
-     * Set the locale meta-data for the service object wrapped by this
-     * object.
-     * @param valid the ID of the valid locale
-     * @param actual the ID of the actual locale
-     */
-    void setLocaleIDs(const Locale& valid, const Locale& actual);
-
- private:
-
-    char* valid;
-    
-    char* actual;
 };

-inline LocaleBased::LocaleBased(char* validAlias, char* actualAlias) :
-    valid(validAlias), actual(actualAlias) {
-}
-
-inline LocaleBased::LocaleBased(const char* validAlias,
-                                const char* actualAlias) :
-    // ugh: cast away const
-    valid((char*)validAlias), actual((char*)actualAlias) {
-}
-
 U_NAMESPACE_END

 #endif
@@ -19,12 +19,16 @@
 *   that then do not depend on resource bundle code and display name data.
 */

+#include <string_view>
+
 #include "unicode/utypes.h"
 #include "unicode/brkiter.h"
 #include "unicode/locid.h"
+#include "unicode/uenum.h"
 #include "unicode/uloc.h"
 #include "unicode/ures.h"
 #include "unicode/ustring.h"
+#include "charstr.h"
 #include "cmemory.h"
 #include "cstring.h"
 #include "putilimp.h"
@@ -52,29 +56,29 @@ Locale::getDisplayLanguage(UnicodeString& dispLang) const
 UnicodeString&
 Locale::getDisplayLanguage(const Locale &displayLocale,
                           UnicodeString &result) const {
-    UChar *buffer;
+    char16_t *buffer;
    UErrorCode errorCode=U_ZERO_ERROR;
    int32_t length;

    buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
-    if(buffer==0) {
+    if (buffer == nullptr) {
        result.truncate(0);
        return result;
    }

-    length=uloc_getDisplayLanguage(fullName, displayLocale.fullName,
+    length=uloc_getDisplayLanguage(getName(), displayLocale.getName(),
                                   buffer, result.getCapacity(),
                                   &errorCode);
    result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);

    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
        buffer=result.getBuffer(length);
-        if(buffer==0) {
+        if (buffer == nullptr) {
            result.truncate(0);
            return result;
        }
        errorCode=U_ZERO_ERROR;
-        length=uloc_getDisplayLanguage(fullName, displayLocale.fullName,
+        length=uloc_getDisplayLanguage(getName(), displayLocale.getName(),
                                       buffer, result.getCapacity(),
                                       &errorCode);
        result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
@@ -92,29 +96,29 @@ Locale::getDisplayScript(UnicodeString& dispScript) const
 UnicodeString&
 Locale::getDisplayScript(const Locale &displayLocale,
                          UnicodeString &result) const {
-    UChar *buffer;
+    char16_t *buffer;
    UErrorCode errorCode=U_ZERO_ERROR;
    int32_t length;

    buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
-    if(buffer==0) {
+    if (buffer == nullptr) {
        result.truncate(0);
        return result;
    }

-    length=uloc_getDisplayScript(fullName, displayLocale.fullName,
+    length=uloc_getDisplayScript(getName(), displayLocale.getName(),
                                  buffer, result.getCapacity(),
                                  &errorCode);
    result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);

    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
        buffer=result.getBuffer(length);
-        if(buffer==0) {
+        if (buffer == nullptr) {
            result.truncate(0);
            return result;
        }
        errorCode=U_ZERO_ERROR;
-        length=uloc_getDisplayScript(fullName, displayLocale.fullName,
+        length=uloc_getDisplayScript(getName(), displayLocale.getName(),
                                      buffer, result.getCapacity(),
                                      &errorCode);
        result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
@@ -132,29 +136,29 @@ Locale::getDisplayCountry(UnicodeString& dispCntry) const
 UnicodeString&
 Locale::getDisplayCountry(const Locale &displayLocale,
                          UnicodeString &result) const {
-    UChar *buffer;
+    char16_t *buffer;
    UErrorCode errorCode=U_ZERO_ERROR;
    int32_t length;

    buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
-    if(buffer==0) {
+    if (buffer == nullptr) {
        result.truncate(0);
        return result;
    }

-    length=uloc_getDisplayCountry(fullName, displayLocale.fullName,
+    length=uloc_getDisplayCountry(getName(), displayLocale.getName(),
                                  buffer, result.getCapacity(),
                                  &errorCode);
    result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);

    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
        buffer=result.getBuffer(length);
-        if(buffer==0) {
+        if (buffer == nullptr) {
            result.truncate(0);
            return result;
        }
        errorCode=U_ZERO_ERROR;
-        length=uloc_getDisplayCountry(fullName, displayLocale.fullName,
+        length=uloc_getDisplayCountry(getName(), displayLocale.getName(),
                                      buffer, result.getCapacity(),
                                      &errorCode);
        result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
@@ -172,29 +176,29 @@ Locale::getDisplayVariant(UnicodeString& dispVar) const
 UnicodeString&
 Locale::getDisplayVariant(const Locale &displayLocale,
                          UnicodeString &result) const {
-    UChar *buffer;
+    char16_t *buffer;
    UErrorCode errorCode=U_ZERO_ERROR;
    int32_t length;

    buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
-    if(buffer==0) {
+    if (buffer == nullptr) {
        result.truncate(0);
        return result;
    }

-    length=uloc_getDisplayVariant(fullName, displayLocale.fullName,
+    length=uloc_getDisplayVariant(getName(), displayLocale.getName(),
                                  buffer, result.getCapacity(),
                                  &errorCode);
    result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);

    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
        buffer=result.getBuffer(length);
-        if(buffer==0) {
+        if (buffer == nullptr) {
            result.truncate(0);
            return result;
        }
        errorCode=U_ZERO_ERROR;
-        length=uloc_getDisplayVariant(fullName, displayLocale.fullName,
+        length=uloc_getDisplayVariant(getName(), displayLocale.getName(),
                                      buffer, result.getCapacity(),
                                      &errorCode);
        result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
@@ -212,29 +216,29 @@ Locale::getDisplayName( UnicodeString& name ) const
 UnicodeString&
 Locale::getDisplayName(const Locale &displayLocale,
                       UnicodeString &result) const {
-    UChar *buffer;
+    char16_t *buffer;
    UErrorCode errorCode=U_ZERO_ERROR;
    int32_t length;

    buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
-    if(buffer==0) {
+    if (buffer == nullptr) {
        result.truncate(0);
        return result;
    }

-    length=uloc_getDisplayName(fullName, displayLocale.fullName,
+    length=uloc_getDisplayName(getName(), displayLocale.getName(),
                               buffer, result.getCapacity(),
                               &errorCode);
    result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);

    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
        buffer=result.getBuffer(length);
-        if(buffer==0) {
+        if (buffer == nullptr) {
            result.truncate(0);
            return result;
        }
        errorCode=U_ZERO_ERROR;
-        length=uloc_getDisplayName(fullName, displayLocale.fullName,
+        length=uloc_getDisplayName(getName(), displayLocale.getName(),
                                   buffer, result.getCapacity(),
                                   &errorCode);
        result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
@@ -243,7 +247,7 @@ Locale::getDisplayName(const Locale &displayLocale,
    return result;
 }

-#if ! UCONFIG_NO_BREAK_ITERATION
+#if !UCONFIG_NO_BREAK_ITERATION

 // -------------------------------------
 // Gets the objectLocale display name in the default locale language.
@@ -273,166 +277,190 @@ U_NAMESPACE_END

 U_NAMESPACE_USE

+namespace {
+
 /* ### Constants **************************************************/

 /* These strings describe the resources we attempt to load from
 the locale ResourceBundle data file.*/
-static const char _kLanguages[]       = "Languages";
-static const char _kScripts[]         = "Scripts";
-static const char _kScriptsStandAlone[] = "Scripts%stand-alone";
-static const char _kCountries[]       = "Countries";
-static const char _kVariants[]        = "Variants";
-static const char _kKeys[]            = "Keys";
-static const char _kTypes[]           = "Types";
-//static const char _kRootName[]        = "root";
-static const char _kCurrency[]        = "currency";
-static const char _kCurrencies[]      = "Currencies";
-static const char _kLocaleDisplayPattern[] = "localeDisplayPattern";
-static const char _kPattern[]         = "pattern";
-static const char _kSeparator[]       = "separator";
+constexpr char _kLanguages[]       = "Languages";
+constexpr char _kScripts[]         = "Scripts";
+constexpr char _kScriptsStandAlone[] = "Scripts%stand-alone";
+constexpr char _kCountries[]       = "Countries";
+constexpr char _kVariants[]        = "Variants";
+constexpr char _kKeys[]            = "Keys";
+constexpr char _kTypes[]           = "Types";
+//constexpr char _kRootName[]        = "root";
+constexpr char _kCurrency[]        = "currency";
+constexpr char _kCurrencies[]      = "Currencies";
+constexpr char _kLocaleDisplayPattern[] = "localeDisplayPattern";
+constexpr char _kPattern[]         = "pattern";
+constexpr char _kSeparator[]       = "separator";

 /* ### Display name **************************************************/

-static int32_t
+int32_t
 _getStringOrCopyKey(const char *path, const char *locale,
                    const char *tableKey, 
                    const char* subTableKey,
                    const char *itemKey,
                    const char *substitute,
-                    UChar *dest, int32_t destCapacity,
-                    UErrorCode *pErrorCode) {
-    const UChar *s = NULL;
+                    char16_t *dest, int32_t destCapacity,
+                    UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return 0; }
+    const char16_t *s = nullptr;
    int32_t length = 0;

-    if(itemKey==NULL) {
+    if(itemKey==nullptr) {
        /* top-level item: normal resource bundle access */
-        UResourceBundle *rb;
+        icu::LocalUResourceBundlePointer rb(ures_open(path, locale, &errorCode));

-        rb=ures_open(path, locale, pErrorCode);
-
-        if(U_SUCCESS(*pErrorCode)) {
-            s=ures_getStringByKey(rb, tableKey, &length, pErrorCode);
+        if(U_SUCCESS(errorCode)) {
+            s=ures_getStringByKey(rb.getAlias(), tableKey, &length, &errorCode);
            /* see comment about closing rb near "return item;" in _res_getTableStringWithFallback() */
-            ures_close(rb);
        }
    } else {
+        bool isLanguageCode = (uprv_strncmp(tableKey, _kLanguages, 9) == 0);
        /* Language code should not be a number. If it is, set the error code. */
-        if (!uprv_strncmp(tableKey, "Languages", 9) && uprv_strtol(itemKey, NULL, 10)) {
-            *pErrorCode = U_MISSING_RESOURCE_ERROR;
+        if (isLanguageCode && uprv_strtol(itemKey, nullptr, 10)) {
+            errorCode = U_MISSING_RESOURCE_ERROR;
        } else {
            /* second-level item, use special fallback */
            s=uloc_getTableStringWithFallback(path, locale,
-                                               tableKey, 
+                                               tableKey,
                                               subTableKey,
                                               itemKey,
                                               &length,
-                                               pErrorCode);
+                                               &errorCode);
+            if (U_FAILURE(errorCode) && isLanguageCode && itemKey != nullptr) {
+                // convert itemKey locale code to canonical form and try again, ICU-20870
+                errorCode = U_ZERO_ERROR;
+                Locale canonKey = Locale::createCanonical(itemKey);
+                s=uloc_getTableStringWithFallback(path, locale,
+                                                    tableKey,
+                                                    subTableKey,
+                                                    canonKey.getName(),
+                                                    &length,
+                                                    &errorCode);
+            }
        }
    }

-    if(U_SUCCESS(*pErrorCode)) {
+    if(U_SUCCESS(errorCode)) {
        int32_t copyLength=uprv_min(length, destCapacity);
-        if(copyLength>0 && s != NULL) {
+        if(copyLength>0 && s != nullptr) {
            u_memcpy(dest, s, copyLength);
        }
    } else {
        /* no string from a resource bundle: convert the substitute */
-        length=(int32_t)uprv_strlen(substitute);
+        length = static_cast<int32_t>(uprv_strlen(substitute));
        u_charsToUChars(substitute, dest, uprv_min(length, destCapacity));
-        *pErrorCode=U_USING_DEFAULT_WARNING;
+        errorCode = U_USING_DEFAULT_WARNING;
    }

-    return u_terminateUChars(dest, destCapacity, length, pErrorCode);
+    return u_terminateUChars(dest, destCapacity, length, &errorCode);
 }

-typedef  int32_t U_CALLCONV UDisplayNameGetter(const char *, char *, int32_t, UErrorCode *);
+using UDisplayNameGetter = icu::CharString(std::string_view, UErrorCode&);

-static int32_t
+int32_t
 _getDisplayNameForComponent(const char *locale,
                            const char *displayLocale,
-                            UChar *dest, int32_t destCapacity,
+                            char16_t *dest, int32_t destCapacity,
                            UDisplayNameGetter *getter,
                            const char *tag,
-                            UErrorCode *pErrorCode) {
-    char localeBuffer[ULOC_FULLNAME_CAPACITY*4];
-    int32_t length;
+                            UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return 0; }
    UErrorCode localStatus;
-    const char* root = NULL;
+    const char* root = nullptr;

-    /* argument checking */
-    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+    if(destCapacity<0 || (destCapacity>0 && dest==nullptr)) {
+        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

-    if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
-        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
-        return 0;
+    if (locale == nullptr) {
+        locale = uloc_getDefault();
    }

    localStatus = U_ZERO_ERROR;
-    length=(*getter)(locale, localeBuffer, sizeof(localeBuffer), &localStatus);
-    if(U_FAILURE(localStatus) || localStatus==U_STRING_NOT_TERMINATED_WARNING) {
-        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+    icu::CharString localeBuffer = (*getter)(locale, localStatus);
+    if (U_FAILURE(localStatus)) {
+        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }
-    if(length==0) {
-        return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
+    if (localeBuffer.isEmpty()) {
+        // For the display name, we treat this as unknown language (ICU-20273).
+        if (getter == ulocimp_getLanguage) {
+            localeBuffer.append("und", errorCode);
+        } else {
+            return u_terminateUChars(dest, destCapacity, 0, &errorCode);
+        }
    }

    root = tag == _kCountries ? U_ICUDATA_REGION : U_ICUDATA_LANG;

    return _getStringOrCopyKey(root, displayLocale,
-                               tag, NULL, localeBuffer,
-                               localeBuffer,
+                               tag, nullptr, localeBuffer.data(),
+                               localeBuffer.data(),
                               dest, destCapacity,
-                               pErrorCode);
+                               errorCode);
 }

+}  // namespace
+
 U_CAPI int32_t U_EXPORT2
 uloc_getDisplayLanguage(const char *locale,
                        const char *displayLocale,
-                        UChar *dest, int32_t destCapacity,
+                        char16_t *dest, int32_t destCapacity,
                        UErrorCode *pErrorCode) {
    return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
-                uloc_getLanguage, _kLanguages, pErrorCode);
+                ulocimp_getLanguage, _kLanguages, *pErrorCode);
 }

 U_CAPI int32_t U_EXPORT2
 uloc_getDisplayScript(const char* locale,
                      const char* displayLocale,
-                      UChar *dest, int32_t destCapacity,
+                      char16_t *dest, int32_t destCapacity,
                      UErrorCode *pErrorCode)
 {
-	UErrorCode err = U_ZERO_ERROR;
-	int32_t res = _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
-                uloc_getScript, _kScriptsStandAlone, &err);
-	
-	if ( err == U_USING_DEFAULT_WARNING ) {
+    if (U_FAILURE(*pErrorCode)) { return 0; }
+    UErrorCode err = U_ZERO_ERROR;
+    int32_t res = _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
+                ulocimp_getScript, _kScriptsStandAlone, err);
+
+    if (destCapacity == 0 && err == U_BUFFER_OVERFLOW_ERROR) {
+        // For preflight, return the max of the value and the fallback.
+        int32_t fallback_res = _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
+                                                           ulocimp_getScript, _kScripts, *pErrorCode);
+        return (fallback_res > res) ? fallback_res : res;
+    }
+    if ( err == U_USING_DEFAULT_WARNING ) {
        return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
-                    uloc_getScript, _kScripts, pErrorCode);
-	} else {
-		*pErrorCode = err;
-		return res;
-	}
+                                           ulocimp_getScript, _kScripts, *pErrorCode);
+    } else {
+        *pErrorCode = err;
+        return res;
+    }
 }

-U_INTERNAL int32_t U_EXPORT2
+static int32_t
 uloc_getDisplayScriptInContext(const char* locale,
                      const char* displayLocale,
-                      UChar *dest, int32_t destCapacity,
+                      char16_t *dest, int32_t destCapacity,
                      UErrorCode *pErrorCode)
 {
    return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
-                    uloc_getScript, _kScripts, pErrorCode);
+                    ulocimp_getScript, _kScripts, *pErrorCode);
 }

 U_CAPI int32_t U_EXPORT2
 uloc_getDisplayCountry(const char *locale,
                       const char *displayLocale,
-                       UChar *dest, int32_t destCapacity,
+                       char16_t *dest, int32_t destCapacity,
                       UErrorCode *pErrorCode) {
    return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
-                uloc_getCountry, _kCountries, pErrorCode);
+                ulocimp_getRegion, _kCountries, *pErrorCode);
 }

 /*
@@ -443,10 +471,10 @@ uloc_getDisplayCountry(const char *locale,
 U_CAPI int32_t U_EXPORT2
 uloc_getDisplayVariant(const char *locale,
                       const char *displayLocale,
-                       UChar *dest, int32_t destCapacity,
+                       char16_t *dest, int32_t destCapacity,
                       UErrorCode *pErrorCode) {
    return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
-                uloc_getVariant, _kVariants, pErrorCode);
+                ulocimp_getVariant, _kVariants, *pErrorCode);
 }

 /* Instead of having a separate pass for 'special' patterns, reintegrate the two
@@ -464,14 +492,14 @@ uloc_getDisplayVariant(const char *locale,
 U_CAPI int32_t U_EXPORT2
 uloc_getDisplayName(const char *locale,
                    const char *displayLocale,
-                    UChar *dest, int32_t destCapacity,
+                    char16_t *dest, int32_t destCapacity,
                    UErrorCode *pErrorCode)
 {
-    static const UChar defaultSeparator[9] = { 0x007b, 0x0030, 0x007d, 0x002c, 0x0020, 0x007b, 0x0031, 0x007d, 0x0000 }; /* "{0}, {1}" */
-    static const UChar sub0[4] = { 0x007b, 0x0030, 0x007d , 0x0000 } ; /* {0} */
-    static const UChar sub1[4] = { 0x007b, 0x0031, 0x007d , 0x0000 } ; /* {1} */
+    static const char16_t defaultSeparator[9] = { 0x007b, 0x0030, 0x007d, 0x002c, 0x0020, 0x007b, 0x0031, 0x007d, 0x0000 }; /* "{0}, {1}" */
+    static const char16_t sub0[4] = { 0x007b, 0x0030, 0x007d , 0x0000 } ; /* {0} */
+    static const char16_t sub1[4] = { 0x007b, 0x0031, 0x007d , 0x0000 } ; /* {1} */
    static const int32_t subLen = 3;
-    static const UChar defaultPattern[10] = {
+    static const char16_t defaultPattern[10] = {
        0x007b, 0x0030, 0x007d, 0x0020, 0x0028, 0x007b, 0x0031, 0x007d, 0x0029, 0x0000
    }; /* {0} ({1}) */
    static const int32_t defaultPatLen = 9;
@@ -480,45 +508,44 @@ uloc_getDisplayName(const char *locale,

    int32_t length; /* of formatted result */

-    const UChar *separator;
+    const char16_t *separator;
    int32_t sepLen = 0;
-    const UChar *pattern;
+    const char16_t *pattern;
    int32_t patLen = 0;
    int32_t sub0Pos, sub1Pos;
    
-    UChar formatOpenParen         = 0x0028; // (
-    UChar formatReplaceOpenParen  = 0x005B; // [
-    UChar formatCloseParen        = 0x0029; // )
-    UChar formatReplaceCloseParen = 0x005D; // ]
+    char16_t formatOpenParen         = 0x0028; // (
+    char16_t formatReplaceOpenParen  = 0x005B; // [
+    char16_t formatCloseParen        = 0x0029; // )
+    char16_t formatReplaceCloseParen = 0x005D; // ]

-    UBool haveLang = TRUE; /* assume true, set false if we find we don't have
+    UBool haveLang = true; /* assume true, set false if we find we don't have
                              a lang component in the locale */
-    UBool haveRest = TRUE; /* assume true, set false if we find we don't have
+    UBool haveRest = true; /* assume true, set false if we find we don't have
                              any other component in the locale */
-    UBool retry = FALSE; /* set true if we need to retry, see below */
+    UBool retry = false; /* set true if we need to retry, see below */

    int32_t langi = 0; /* index of the language substitution (0 or 1), virtually always 0 */

-    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
        return 0;
    }

-    if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
+    if(destCapacity<0 || (destCapacity>0 && dest==nullptr)) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    {
        UErrorCode status = U_ZERO_ERROR;
-        UResourceBundle* locbundle=ures_open(U_ICUDATA_LANG, displayLocale, &status);
-        UResourceBundle* dspbundle=ures_getByKeyWithFallback(locbundle, _kLocaleDisplayPattern,
-                                                             NULL, &status);

-        separator=ures_getStringByKeyWithFallback(dspbundle, _kSeparator, &sepLen, &status);
-        pattern=ures_getStringByKeyWithFallback(dspbundle, _kPattern, &patLen, &status);
+        icu::LocalUResourceBundlePointer locbundle(
+                ures_open(U_ICUDATA_LANG, displayLocale, &status));
+        icu::LocalUResourceBundlePointer dspbundle(
+                ures_getByKeyWithFallback(locbundle.getAlias(), _kLocaleDisplayPattern, nullptr, &status));

-        ures_close(dspbundle);
-        ures_close(locbundle);
+        separator=ures_getStringByKeyWithFallback(dspbundle.getAlias(), _kSeparator, &sepLen, &status);
+        pattern=ures_getStringByKeyWithFallback(dspbundle.getAlias(), _kPattern, &patLen, &status);
    }

    /* If we couldn't find any data, then use the defaults */
@@ -535,13 +562,13 @@ uloc_getDisplayName(const char *locale,
     * This is similar to how pattern is handled below.
     */
    {
-        UChar *p0=u_strstr(separator, sub0);
-        UChar *p1=u_strstr(separator, sub1);
-        if (p0==NULL || p1==NULL || p1<p0) {
+        char16_t *p0=u_strstr(separator, sub0);
+        char16_t *p1=u_strstr(separator, sub1);
+        if (p0==nullptr || p1==nullptr || p1<p0) {
            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
            return 0;
        }
-        separator = (const UChar *)p0 + subLen;
+        separator = (const char16_t *)p0 + subLen;
        sepLen = static_cast<int32_t>(p1 - separator);
    }

@@ -552,9 +579,9 @@ uloc_getDisplayName(const char *locale,
        sub1Pos=defaultSub1Pos;
        // use default formatOpenParen etc. set above
    } else { /* non-default pattern */
-        UChar *p0=u_strstr(pattern, sub0);
-        UChar *p1=u_strstr(pattern, sub1);
-        if (p0==NULL || p1==NULL) {
+        char16_t *p0=u_strstr(pattern, sub0);
+        char16_t *p1=u_strstr(pattern, sub1);
+        if (p0==nullptr || p1==nullptr) {
            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
            return 0;
        }
@@ -564,7 +591,7 @@ uloc_getDisplayName(const char *locale,
            int32_t t=sub0Pos; sub0Pos=sub1Pos; sub1Pos=t;
            langi=1;
        }
-        if (u_strchr(pattern, 0xFF08) != NULL) {
+        if (u_strchr(pattern, 0xFF08) != nullptr) {
            formatOpenParen         = 0xFF08; // fullwidth (
            formatReplaceOpenParen  = 0xFF3B; // fullwidth [
            formatCloseParen        = 0xFF09; // fullwidth )
@@ -580,13 +607,13 @@ uloc_getDisplayName(const char *locale,
     * adjust the parameters so padding is not added, and repeat.
     */
    do {
-        UChar* p=dest;
+        char16_t* p=dest;
        int32_t patPos=0; /* position in the pattern, used for non-substitution portions */
        int32_t langLen=0; /* length of language substitution */
        int32_t langPos=0; /* position in output of language substitution */
        int32_t restLen=0; /* length of 'everything else' substitution */
        int32_t restPos=0; /* position in output of 'everything else' substitution */
-        UEnumeration* kenum = NULL; /* keyword enumeration */
+        icu::LocalUEnumerationPointer kenum; /* keyword enumeration */

        /* prefix of pattern, extremely likely to be empty */
        if(sub0Pos) {
@@ -603,7 +630,7 @@ uloc_getDisplayName(const char *locale,
        }

        for(int32_t subi=0,resti=0;subi<2;) { /* iterate through patterns 0 and 1*/
-            UBool subdone = FALSE; /* set true when ready to move to next substitution */
+            UBool subdone = false; /* set true when ready to move to next substitution */

            /* prep p and cap for calls to get display components, pin cap to 0 since
               they complain if cap is negative */
@@ -621,10 +648,10 @@ uloc_getDisplayName(const char *locale,
                    length+=langLen;
                    haveLang=langLen>0;
                }
-                subdone=TRUE;
+                subdone=true;
            } else { /* {1} */
                if(!haveRest) {
-                    subdone=TRUE;
+                    subdone=true;
                } else {
                    int32_t len; /* length of component (plus other stuff) we just fetched */
                    switch(resti++) {
@@ -639,14 +666,13 @@ uloc_getDisplayName(const char *locale,
                            len=uloc_getDisplayVariant(locale, displayLocale, p, cap, pErrorCode);
                            break;
                        case 3:
-                            kenum = uloc_openKeywords(locale, pErrorCode);
+                            kenum.adoptInstead(uloc_openKeywords(locale, pErrorCode));
                            U_FALLTHROUGH;
                        default: {
-                            const char* kw=uenum_next(kenum, &len, pErrorCode);
-                            if (kw == NULL) {
-                                uenum_close(kenum);
+                            const char* kw=uenum_next(kenum.getAlias(), &len, pErrorCode);
+                            if (kw == nullptr) {
                                len=0; /* mark that we didn't add a component */
-                                subdone=TRUE;
+                                subdone=true;
                            } else {
                                /* incorporating this behavior into the loop made it even more complex,
                                   so just special case it here */
@@ -689,9 +715,9 @@ uloc_getDisplayName(const char *locale,
                    } /* end switch */

                    if (len>0) {
-                        /* we addeed a component, so add separator and write it if there's room. */
+                        /* we added a component, so add separator and write it if there's room. */
                        if(len+sepLen<=cap) {
-                            const UChar * plimit = p + len;
+                            const char16_t * plimit = p + len;
                            for (; p < plimit; p++) {
                                if (*p == formatOpenParen) {
                                    *p = formatReplaceOpenParen;
@@ -726,7 +752,7 @@ uloc_getDisplayName(const char *locale,
                    int32_t padLen;
                    patPos+=subLen;
                    padLen=(subi==0 ? sub1Pos : patLen)-patPos;
-                    if(length+padLen < destCapacity) {
+                    if(length+padLen <= destCapacity) {
                        p=dest+length;
                        for(int32_t i=0;i<padLen;++i) {
                            *p++=pattern[patPos++];
@@ -751,7 +777,7 @@ uloc_getDisplayName(const char *locale,
                            /* would have fit, but didn't because of pattern prefix. */
                            sub0Pos=0; /* stops initial padding (and a second retry,
                                          so we won't end up here again) */
-                            retry=TRUE;
+                            retry=true;
                        }
                    }
                }
@@ -767,28 +793,28 @@ uloc_getDisplayName(const char *locale,
 U_CAPI int32_t U_EXPORT2
 uloc_getDisplayKeyword(const char* keyword,
                       const char* displayLocale,
-                       UChar* dest,
+                       char16_t* dest,
                       int32_t destCapacity,
                       UErrorCode* status){

    /* argument checking */
-    if(status==NULL || U_FAILURE(*status)) {
+    if(status==nullptr || U_FAILURE(*status)) {
        return 0;
    }

-    if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
+    if(destCapacity<0 || (destCapacity>0 && dest==nullptr)) {
        *status=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }


-    /* pass itemKey=NULL to look for a top-level item */
+    /* pass itemKey=nullptr to look for a top-level item */
    return _getStringOrCopyKey(U_ICUDATA_LANG, displayLocale,
-                               _kKeys, NULL, 
+                               _kKeys, nullptr,
                               keyword, 
                               keyword,      
                               dest, destCapacity,
-                               status);
+                               *status);

 }

@@ -799,30 +825,26 @@ U_CAPI int32_t U_EXPORT2
 uloc_getDisplayKeywordValue(   const char* locale,
                               const char* keyword,
                               const char* displayLocale,
-                               UChar* dest,
+                               char16_t* dest,
                               int32_t destCapacity,
                               UErrorCode* status){


-    char keywordValue[ULOC_FULLNAME_CAPACITY*4];
-    int32_t capacity = ULOC_FULLNAME_CAPACITY*4;
-    int32_t keywordValueLen =0;
-
    /* argument checking */
-    if(status==NULL || U_FAILURE(*status)) {
+    if(status==nullptr || U_FAILURE(*status)) {
        return 0;
    }

-    if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
+    if(destCapacity<0 || (destCapacity>0 && dest==nullptr)) {
        *status=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    /* get the keyword value */
-    keywordValue[0]=0;
-    keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, capacity, status);
-    if (*status == U_STRING_NOT_TERMINATED_WARNING)
-      *status = U_BUFFER_OVERFLOW_ERROR;
+    CharString keywordValue;
+    if (keyword != nullptr && *keyword != '\0') {
+        keywordValue = ulocimp_getKeywordValue(locale, keyword, *status);
+    }

    /* 
     * if the keyword is equal to currency .. then to get the display name 
@@ -831,19 +853,17 @@ uloc_getDisplayKeywordValue(   const char* locale,
    if(uprv_stricmp(keyword, _kCurrency)==0){

        int32_t dispNameLen = 0;
-        const UChar *dispName = NULL;
-        
-        UResourceBundle *bundle     = ures_open(U_ICUDATA_CURR, displayLocale, status);
-        UResourceBundle *currencies = ures_getByKey(bundle, _kCurrencies, NULL, status);
-        UResourceBundle *currency   = ures_getByKeyWithFallback(currencies, keywordValue, NULL, status);
-        
-        dispName = ures_getStringByIndex(currency, UCURRENCY_DISPLAY_NAME_INDEX, &dispNameLen, status);
-        
-        /*close the bundles */
-        ures_close(currency);
-        ures_close(currencies);
-        ures_close(bundle);
-        
+        const char16_t *dispName = nullptr;
+
+        icu::LocalUResourceBundlePointer bundle(
+                ures_open(U_ICUDATA_CURR, displayLocale, status));
+        icu::LocalUResourceBundlePointer currencies(
+                ures_getByKey(bundle.getAlias(), _kCurrencies, nullptr, status));
+        icu::LocalUResourceBundlePointer currency(
+                ures_getByKeyWithFallback(currencies.getAlias(), keywordValue.data(), nullptr, status));
+
+        dispName = ures_getStringByIndex(currency.getAlias(), UCURRENCY_DISPLAY_NAME_INDEX, &dispNameLen, status);
+
        if(U_FAILURE(*status)){
            if(*status == U_MISSING_RESOURCE_ERROR){
                /* we just want to write the value over if nothing is available */
@@ -853,8 +873,8 @@ uloc_getDisplayKeywordValue(   const char* locale,
            }
        }

-        /* now copy the dispName over if not NULL */
-        if(dispName != NULL){
+        /* now copy the dispName over if not nullptr */
+        if(dispName != nullptr){
            if(dispNameLen <= destCapacity){
                u_memcpy(dest, dispName, dispNameLen);
                return u_terminateUChars(dest, destCapacity, dispNameLen, status);
@@ -864,12 +884,12 @@ uloc_getDisplayKeywordValue(   const char* locale,
            }
        }else{
            /* we have not found the display name for the value .. just copy over */
-            if(keywordValueLen <= destCapacity){
-                u_charsToUChars(keywordValue, dest, keywordValueLen);
-                return u_terminateUChars(dest, destCapacity, keywordValueLen, status);
+            if(keywordValue.length() <= destCapacity){
+                u_charsToUChars(keywordValue.data(), dest, keywordValue.length());
+                return u_terminateUChars(dest, destCapacity, keywordValue.length(), status);
            }else{
                 *status = U_BUFFER_OVERFLOW_ERROR;
-                return keywordValueLen;
+                return keywordValue.length();
            }
        }

@@ -878,9 +898,9 @@ uloc_getDisplayKeywordValue(   const char* locale,

        return _getStringOrCopyKey(U_ICUDATA_LANG, displayLocale,
                                   _kTypes, keyword, 
-                                   keywordValue,
-                                   keywordValue,
+                                   keywordValue.data(),
+                                   keywordValue.data(),
                                   dest, destCapacity,
-                                   status);
+                                   *status);
    }
 }
@@ -0,0 +1,415 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// locdistance.cpp
+// created: 2019may08 Markus W. Scherer
+
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/localematcher.h"
+#include "unicode/locid.h"
+#include "unicode/uobject.h"
+#include "unicode/ures.h"
+#include "cstring.h"
+#include "locdistance.h"
+#include "loclikelysubtags.h"
+#include "uassert.h"
+#include "ucln_cmn.h"
+#include "uinvchar.h"
+#include "umutex.h"
+
+U_NAMESPACE_BEGIN
+
+namespace {
+
+/**
+ * Bit flag used on the last character of a subtag in the trie.
+ * Must be set consistently by the builder and the lookup code.
+ */
+constexpr int32_t END_OF_SUBTAG = 0x80;
+/** Distance value bit flag, set by the builder. */
+constexpr int32_t DISTANCE_SKIP_SCRIPT = 0x80;
+/** Distance value bit flag, set by trieNext(). */
+constexpr int32_t DISTANCE_IS_FINAL = 0x100;
+constexpr int32_t DISTANCE_IS_FINAL_OR_SKIP_SCRIPT = DISTANCE_IS_FINAL | DISTANCE_SKIP_SCRIPT;
+
+constexpr int32_t ABOVE_THRESHOLD = 100;
+
+// Indexes into array of distances.
+enum {
+    IX_DEF_LANG_DISTANCE,
+    IX_DEF_SCRIPT_DISTANCE,
+    IX_DEF_REGION_DISTANCE,
+    IX_MIN_REGION_DISTANCE,
+    IX_LIMIT
+};
+
+LocaleDistance *gLocaleDistance = nullptr;
+UInitOnce gInitOnce {};
+
+UBool U_CALLCONV cleanup() {
+    delete gLocaleDistance;
+    gLocaleDistance = nullptr;
+    gInitOnce.reset();
+    return true;
+}
+
+}  // namespace
+
+void U_CALLCONV LocaleDistance::initLocaleDistance(UErrorCode &errorCode) {
+    // This function is invoked only via umtx_initOnce().
+    U_ASSERT(gLocaleDistance == nullptr);
+    const LikelySubtags &likely = *LikelySubtags::getSingleton(errorCode);
+    if (U_FAILURE(errorCode)) { return; }
+    const LocaleDistanceData &data = likely.getDistanceData();
+    if (data.distanceTrieBytes == nullptr ||
+            data.regionToPartitions == nullptr || data.partitions == nullptr ||
+            // ok if no paradigms
+            data.distances == nullptr) {
+        errorCode = U_MISSING_RESOURCE_ERROR;
+        return;
+    }
+    gLocaleDistance = new LocaleDistance(data, likely);
+    if (gLocaleDistance == nullptr) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+    ucln_common_registerCleanup(UCLN_COMMON_LOCALE_DISTANCE, cleanup);
+}
+
+const LocaleDistance *LocaleDistance::getSingleton(UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return nullptr; }
+    umtx_initOnce(gInitOnce, &LocaleDistance::initLocaleDistance, errorCode);
+    return gLocaleDistance;
+}
+
+LocaleDistance::LocaleDistance(const LocaleDistanceData &data, const LikelySubtags &likely) :
+        likelySubtags(likely),
+        trie(data.distanceTrieBytes),
+        regionToPartitionsIndex(data.regionToPartitions), partitionArrays(data.partitions),
+        paradigmLSRs(data.paradigms), paradigmLSRsLength(data.paradigmsLength),
+        defaultLanguageDistance(data.distances[IX_DEF_LANG_DISTANCE]),
+        defaultScriptDistance(data.distances[IX_DEF_SCRIPT_DISTANCE]),
+        defaultRegionDistance(data.distances[IX_DEF_REGION_DISTANCE]),
+        minRegionDistance(data.distances[IX_MIN_REGION_DISTANCE]) {
+    // For the default demotion value, use the
+    // default region distance between unrelated Englishes.
+    // Thus, unless demotion is turned off,
+    // a mere region difference for one desired locale
+    // is as good as a perfect match for the next following desired locale.
+    // As of CLDR 36, we have <languageMatch desired="en_*_*" supported="en_*_*" distance="5"/>.
+    LSR en("en", "Latn", "US", LSR::EXPLICIT_LSR);
+    LSR enGB("en", "Latn", "GB", LSR::EXPLICIT_LSR);
+    const LSR *p_enGB = &enGB;
+    int32_t indexAndDistance = getBestIndexAndDistance(en, &p_enGB, 1,
+            shiftDistance(50), ULOCMATCH_FAVOR_LANGUAGE, ULOCMATCH_DIRECTION_WITH_ONE_WAY);
+    defaultDemotionPerDesiredLocale  = getDistanceFloor(indexAndDistance);
+}
+
+int32_t LocaleDistance::getBestIndexAndDistance(
+        const LSR &desired,
+        const LSR **supportedLSRs, int32_t supportedLSRsLength,
+        int32_t shiftedThreshold,
+        ULocMatchFavorSubtag favorSubtag, ULocMatchDirection direction) const {
+    BytesTrie iter(trie);
+    // Look up the desired language only once for all supported LSRs.
+    // Its "distance" is either a match point value of 0, or a non-match negative value.
+    // Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
+    int32_t desLangDistance = trieNext(iter, desired.language, false);
+    uint64_t desLangState = desLangDistance >= 0 && supportedLSRsLength > 1 ? iter.getState64() : 0;
+    // Index of the supported LSR with the lowest distance.
+    int32_t bestIndex = -1;
+    // Cached lookup info from LikelySubtags.compareLikely().
+    int32_t bestLikelyInfo = -1;
+    for (int32_t slIndex = 0; slIndex < supportedLSRsLength; ++slIndex) {
+        const LSR &supported = *supportedLSRs[slIndex];
+        bool star = false;
+        int32_t distance = desLangDistance;
+        if (distance >= 0) {
+            U_ASSERT((distance & DISTANCE_IS_FINAL) == 0);
+            if (slIndex != 0) {
+                iter.resetToState64(desLangState);
+            }
+            distance = trieNext(iter, supported.language, true);
+        }
+        // Note: The data builder verifies that there are no rules with "any" (*) language and
+        // real (non *) script or region subtags.
+        // This means that if the lookup for either language fails we can use
+        // the default distances without further lookups.
+        int32_t flags;
+        if (distance >= 0) {
+            flags = distance & DISTANCE_IS_FINAL_OR_SKIP_SCRIPT;
+            distance &= ~DISTANCE_IS_FINAL_OR_SKIP_SCRIPT;
+        } else {  // <*, *>
+            if (uprv_strcmp(desired.language, supported.language) == 0) {
+                distance = 0;
+            } else {
+                distance = defaultLanguageDistance;
+            }
+            flags = 0;
+            star = true;
+        }
+        U_ASSERT(0 <= distance && distance <= 100);
+        // Round up the shifted threshold (if fraction bits are not 0)
+        // for comparison with un-shifted distances until we need fraction bits.
+        // (If we simply shifted non-zero fraction bits away, then we might ignore a language
+        // when it's really still a micro distance below the threshold.)
+        int32_t roundedThreshold = (shiftedThreshold + DISTANCE_FRACTION_MASK) >> DISTANCE_SHIFT;
+        // We implement "favor subtag" by reducing the language subtag distance
+        // (unscientifically reducing it to a quarter of the normal value),
+        // so that the script distance is relatively more important.
+        // For example, given a default language distance of 80, we reduce it to 20,
+        // which is below the default threshold of 50, which is the default script distance.
+        if (favorSubtag == ULOCMATCH_FAVOR_SCRIPT) {
+            distance >>= 2;
+        }
+        // Let distance == roundedThreshold pass until the tie-breaker logic
+        // at the end of the loop.
+        if (distance > roundedThreshold) {
+            continue;
+        }
+
+        int32_t scriptDistance;
+        if (star || flags != 0) {
+            if (uprv_strcmp(desired.script, supported.script) == 0) {
+                scriptDistance = 0;
+            } else {
+                scriptDistance = defaultScriptDistance;
+            }
+        } else {
+            scriptDistance = getDesSuppScriptDistance(iter, iter.getState64(),
+                    desired.script, supported.script);
+            flags = scriptDistance & DISTANCE_IS_FINAL;
+            scriptDistance &= ~DISTANCE_IS_FINAL;
+        }
+        distance += scriptDistance;
+        if (distance > roundedThreshold) {
+            continue;
+        }
+
+        if (uprv_strcmp(desired.region, supported.region) == 0) {
+            // regionDistance = 0
+        } else if (star || (flags & DISTANCE_IS_FINAL) != 0) {
+            distance += defaultRegionDistance;
+        } else {
+            int32_t remainingThreshold = roundedThreshold - distance;
+            if (minRegionDistance > remainingThreshold) {
+                continue;
+            }
+
+            // From here on we know the regions are not equal.
+            // Map each region to zero or more partitions. (zero = one non-matching string)
+            // (Each array of single-character partition strings is encoded as one string.)
+            // If either side has more than one, then we find the maximum distance.
+            // This could be optimized by adding some more structure, but probably not worth it.
+            distance += getRegionPartitionsDistance(
+                    iter, iter.getState64(),
+                    partitionsForRegion(desired),
+                    partitionsForRegion(supported),
+                    remainingThreshold);
+        }
+        int32_t shiftedDistance = shiftDistance(distance);
+        if (shiftedDistance == 0) {
+            // Distinguish between equivalent but originally unequal locales via an
+            // additional micro distance.
+            shiftedDistance |= (desired.flags ^ supported.flags);
+            if (shiftedDistance < shiftedThreshold) {
+                if (direction != ULOCMATCH_DIRECTION_ONLY_TWO_WAY ||
+                        // Is there also a match when we swap desired/supported?
+                        isMatch(supported, desired, shiftedThreshold, favorSubtag)) {
+                    if (shiftedDistance == 0) {
+                        return slIndex << INDEX_SHIFT;
+                    }
+                    bestIndex = slIndex;
+                    shiftedThreshold = shiftedDistance;
+                    bestLikelyInfo = -1;
+                }
+            }
+        } else {
+            if (shiftedDistance < shiftedThreshold) {
+                if (direction != ULOCMATCH_DIRECTION_ONLY_TWO_WAY ||
+                        // Is there also a match when we swap desired/supported?
+                        isMatch(supported, desired, shiftedThreshold, favorSubtag)) {
+                    bestIndex = slIndex;
+                    shiftedThreshold = shiftedDistance;
+                    bestLikelyInfo = -1;
+                }
+            } else if (shiftedDistance == shiftedThreshold && bestIndex >= 0) {
+                if (direction != ULOCMATCH_DIRECTION_ONLY_TWO_WAY ||
+                        // Is there also a match when we swap desired/supported?
+                        isMatch(supported, desired, shiftedThreshold, favorSubtag)) {
+                    bestLikelyInfo = likelySubtags.compareLikely(
+                            supported, *supportedLSRs[bestIndex], bestLikelyInfo);
+                    if ((bestLikelyInfo & 1) != 0) {
+                        // This supported locale matches as well as the previous best match,
+                        // and neither matches perfectly,
+                        // but this one is "more likely" (has more-default subtags).
+                        bestIndex = slIndex;
+                    }
+                }
+            }
+        }
+    }
+    return bestIndex >= 0 ?
+            (bestIndex << INDEX_SHIFT) | shiftedThreshold :
+            INDEX_NEG_1 | shiftDistance(ABOVE_THRESHOLD);
+}
+
+int32_t LocaleDistance::getDesSuppScriptDistance(
+        BytesTrie &iter, uint64_t startState, const char *desired, const char *supported) {
+    // Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
+    int32_t distance = trieNext(iter, desired, false);
+    if (distance >= 0) {
+        distance = trieNext(iter, supported, true);
+    }
+    if (distance < 0) {
+        UStringTrieResult result = iter.resetToState64(startState).next(u'*');  // <*, *>
+        U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
+        if (uprv_strcmp(desired, supported) == 0) {
+            distance = 0;  // same script
+        } else {
+            distance = iter.getValue();
+            U_ASSERT(distance >= 0);
+        }
+        if (result == USTRINGTRIE_FINAL_VALUE) {
+            distance |= DISTANCE_IS_FINAL;
+        }
+    }
+    return distance;
+}
+
+int32_t LocaleDistance::getRegionPartitionsDistance(
+        BytesTrie &iter, uint64_t startState,
+        const char *desiredPartitions, const char *supportedPartitions, int32_t threshold) {
+    char desired = *desiredPartitions++;
+    char supported = *supportedPartitions++;
+    U_ASSERT(desired != 0 && supported != 0);
+    // See if we have single desired/supported partitions, from NUL-terminated
+    // partition strings without explicit length.
+    bool suppLengthGt1 = *supportedPartitions != 0;  // gt1: more than 1 character
+    // equivalent to: if (desLength == 1 && suppLength == 1)
+    if (*desiredPartitions == 0 && !suppLengthGt1) {
+        // Fastpath for single desired/supported partitions.
+        UStringTrieResult result = iter.next(uprv_invCharToAscii(desired) | END_OF_SUBTAG);
+        if (USTRINGTRIE_HAS_NEXT(result)) {
+            result = iter.next(uprv_invCharToAscii(supported) | END_OF_SUBTAG);
+            if (USTRINGTRIE_HAS_VALUE(result)) {
+                return iter.getValue();
+            }
+        }
+        return getFallbackRegionDistance(iter, startState);
+    }
+
+    const char *supportedStart = supportedPartitions - 1;  // for restart of inner loop
+    int32_t regionDistance = 0;
+    // Fall back to * only once, not for each pair of partition strings.
+    bool star = false;
+    for (;;) {
+        // Look up each desired-partition string only once,
+        // not for each (desired, supported) pair.
+        UStringTrieResult result = iter.next(uprv_invCharToAscii(desired) | END_OF_SUBTAG);
+        if (USTRINGTRIE_HAS_NEXT(result)) {
+            uint64_t desState = suppLengthGt1 ? iter.getState64() : 0;
+            for (;;) {
+                result = iter.next(uprv_invCharToAscii(supported) | END_OF_SUBTAG);
+                int32_t d;
+                if (USTRINGTRIE_HAS_VALUE(result)) {
+                    d = iter.getValue();
+                } else if (star) {
+                    d = 0;
+                } else {
+                    d = getFallbackRegionDistance(iter, startState);
+                    star = true;
+                }
+                if (d > threshold) {
+                    return d;
+                } else if (regionDistance < d) {
+                    regionDistance = d;
+                }
+                if ((supported = *supportedPartitions++) != 0) {
+                    iter.resetToState64(desState);
+                } else {
+                    break;
+                }
+            }
+        } else if (!star) {
+            int32_t d = getFallbackRegionDistance(iter, startState);
+            if (d > threshold) {
+                return d;
+            } else if (regionDistance < d) {
+                regionDistance = d;
+            }
+            star = true;
+        }
+        if ((desired = *desiredPartitions++) != 0) {
+            iter.resetToState64(startState);
+            supportedPartitions = supportedStart;
+            supported = *supportedPartitions++;
+        } else {
+            break;
+        }
+    }
+    return regionDistance;
+}
+
+int32_t LocaleDistance::getFallbackRegionDistance(BytesTrie &iter, uint64_t startState) {
+#if U_DEBUG
+    UStringTrieResult result =
+#endif
+    iter.resetToState64(startState).next(u'*');  // <*, *>
+    U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
+    int32_t distance = iter.getValue();
+    U_ASSERT(distance >= 0);
+    return distance;
+}
+
+int32_t LocaleDistance::trieNext(BytesTrie &iter, const char *s, bool wantValue) {
+    uint8_t c;
+    if ((c = *s) == 0) {
+        return -1;  // no empty subtags in the distance data
+    }
+    for (;;) {
+        c = uprv_invCharToAscii(c);
+        // EBCDIC: If *s is not an invariant character,
+        // then c is now 0 and will simply not match anything, which is harmless.
+        uint8_t next = *++s;
+        if (next != 0) {
+            if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) {
+                return -1;
+            }
+        } else {
+            // last character of this subtag
+            UStringTrieResult result = iter.next(c | END_OF_SUBTAG);
+            if (wantValue) {
+                if (USTRINGTRIE_HAS_VALUE(result)) {
+                    int32_t value = iter.getValue();
+                    if (result == USTRINGTRIE_FINAL_VALUE) {
+                        value |= DISTANCE_IS_FINAL;
+                    }
+                    return value;
+                }
+            } else {
+                if (USTRINGTRIE_HAS_NEXT(result)) {
+                    return 0;
+                }
+            }
+            return -1;
+        }
+        c = next;
+    }
+}
+
+bool LocaleDistance::isParadigmLSR(const LSR &lsr) const {
+    // Linear search for a very short list (length 6 as of 2019),
+    // because we look for equivalence not equality, and
+    // because it's easy.
+    // If there are many paradigm LSRs we should use a hash set
+    // with custom comparator and hasher.
+    U_ASSERT(paradigmLSRsLength <= 15);
+    for (int32_t i = 0; i < paradigmLSRsLength; ++i) {
+        if (lsr.isEquivalentTo(paradigmLSRs[i])) { return true; }
+    }
+    return false;
+}
+
+U_NAMESPACE_END
@@ -0,0 +1,151 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// locdistance.h
+// created: 2019may08 Markus W. Scherer
+
+#ifndef __LOCDISTANCE_H__
+#define __LOCDISTANCE_H__
+
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/localematcher.h"
+#include "unicode/locid.h"
+#include "unicode/uobject.h"
+#include "lsr.h"
+
+U_NAMESPACE_BEGIN
+
+struct LocaleDistanceData;
+
+/**
+ * Offline-built data for LocaleMatcher.
+ * Mostly but not only the data for mapping locales to their maximized forms.
+ */
+class LocaleDistance final : public UMemory {
+public:
+    static const LocaleDistance *getSingleton(UErrorCode &errorCode);
+
+    static int32_t shiftDistance(int32_t distance) {
+        return distance << DISTANCE_SHIFT;
+    }
+
+    static int32_t getShiftedDistance(int32_t indexAndDistance) {
+        return indexAndDistance & DISTANCE_MASK;
+    }
+
+    static double getDistanceDouble(int32_t indexAndDistance) {
+        double shiftedDistance = getShiftedDistance(indexAndDistance);
+        return shiftedDistance / (1 << DISTANCE_SHIFT);
+    }
+
+    static int32_t getDistanceFloor(int32_t indexAndDistance) {
+        return (indexAndDistance & DISTANCE_MASK) >> DISTANCE_SHIFT;
+    }
+
+    static int32_t getIndex(int32_t indexAndDistance) {
+        // assert indexAndDistance >= 0;
+        return indexAndDistance >> INDEX_SHIFT;
+    }
+
+    /**
+     * Finds the supported LSR with the smallest distance from the desired one.
+     * Equivalent LSR subtags must be normalized into a canonical form.
+     *
+     * <p>Returns the index of the lowest-distance supported LSR in the high bits
+     * (negative if none has a distance below the threshold),
+     * and its distance (0..ABOVE_THRESHOLD) in the low bits.
+     */
+    int32_t getBestIndexAndDistance(const LSR &desired,
+                                    const LSR **supportedLSRs, int32_t supportedLSRsLength,
+                                    int32_t shiftedThreshold,
+                                    ULocMatchFavorSubtag favorSubtag,
+                                    ULocMatchDirection direction) const;
+
+    bool isParadigmLSR(const LSR &lsr) const;
+
+    int32_t getDefaultScriptDistance() const {
+        return defaultScriptDistance;
+    }
+
+    int32_t getDefaultDemotionPerDesiredLocale() const {
+        return defaultDemotionPerDesiredLocale;
+    }
+
+private:
+    // The distance is shifted left to gain some fraction bits.
+    static constexpr int32_t DISTANCE_SHIFT = 3;
+    static constexpr int32_t DISTANCE_FRACTION_MASK = 7;
+    // 7 bits for 0..100
+    static constexpr int32_t DISTANCE_INT_SHIFT = 7;
+    static constexpr int32_t INDEX_SHIFT = DISTANCE_INT_SHIFT + DISTANCE_SHIFT;
+    static constexpr int32_t DISTANCE_MASK = 0x3ff;
+    // tic constexpr int32_t MAX_INDEX = 0x1fffff;  // avoids sign bit
+    static constexpr int32_t INDEX_NEG_1 = 0xfffffc00;
+
+    LocaleDistance(const LocaleDistanceData &data, const LikelySubtags &likely);
+    LocaleDistance(const LocaleDistance &other) = delete;
+    LocaleDistance &operator=(const LocaleDistance &other) = delete;
+
+    static void initLocaleDistance(UErrorCode &errorCode);
+
+    bool isMatch(const LSR &desired, const LSR &supported,
+                 int32_t shiftedThreshold, ULocMatchFavorSubtag favorSubtag) const {
+        const LSR *pSupp = &supported;
+        return getBestIndexAndDistance(
+            desired, &pSupp, 1,
+            shiftedThreshold, favorSubtag, ULOCMATCH_DIRECTION_WITH_ONE_WAY) >= 0;
+    }
+
+    static int32_t getDesSuppScriptDistance(BytesTrie &iter, uint64_t startState,
+                                            const char *desired, const char *supported);
+
+    static int32_t getRegionPartitionsDistance(
+        BytesTrie &iter, uint64_t startState,
+        const char *desiredPartitions, const char *supportedPartitions,
+        int32_t threshold);
+
+    static int32_t getFallbackRegionDistance(BytesTrie &iter, uint64_t startState);
+
+    static int32_t trieNext(BytesTrie &iter, const char *s, bool wantValue);
+
+    const char *partitionsForRegion(const LSR &lsr) const {
+        // ill-formed region -> one non-matching string
+        int32_t pIndex = regionToPartitionsIndex[lsr.regionIndex];
+        return partitionArrays[pIndex];
+    }
+
+    int32_t getDefaultRegionDistance() const {
+        return defaultRegionDistance;
+    }
+
+    const LikelySubtags &likelySubtags;
+
+    // The trie maps each dlang+slang+dscript+sscript+dregion+sregion
+    // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.
+    // There is also a trie value for each subsequence of whole subtags.
+    // One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"".
+    BytesTrie trie;
+
+    /**
+     * Maps each region to zero or more single-character partitions.
+     */
+    const uint8_t *regionToPartitionsIndex;
+    const char **partitionArrays;
+
+    /**
+     * Used to get the paradigm region for a cluster, if there is one.
+     */
+    const LSR *paradigmLSRs;
+    int32_t paradigmLSRsLength;
+
+    int32_t defaultLanguageDistance;
+    int32_t defaultScriptDistance;
+    int32_t defaultRegionDistance;
+    int32_t minRegionDistance;
+    int32_t defaultDemotionPerDesiredLocale;
+};
+
+U_NAMESPACE_END
+
+#endif  // __LOCDISTANCE_H__
@@ -18,45 +18,17 @@
 #include "unicode/udisplaycontext.h"
 #include "unicode/brkiter.h"
 #include "unicode/ucurr.h"
+#include "bytesinkutil.h"
+#include "charstr.h"
 #include "cmemory.h"
 #include "cstring.h"
 #include "mutex.h"
+#include "uassert.h"
 #include "ulocimp.h"
 #include "umutex.h"
 #include "ureslocs.h"
 #include "uresimp.h"

-#include <stdarg.h>
-
-/**
- * Concatenate a number of null-terminated strings to buffer, leaving a
- * null-terminated string.  The last argument should be the null pointer.
- * Return the length of the string in the buffer, not counting the trailing
- * null.  Return -1 if there is an error (buffer is null, or buflen < 1).
- */
-static int32_t ncat(char *buffer, uint32_t buflen, ...) {
-  va_list args;
-  char *str;
-  char *p = buffer;
-  const char* e = buffer + buflen - 1;
-
-  if (buffer == NULL || buflen < 1) {
-    return -1;
-  }
-
-  va_start(args, buflen);
-  while ((str = va_arg(args, char *)) != 0) {
-    char c;
-    while (p != e && (c = *str++) != 0) {
-      *p++ = c;
-    }
-  }
-  *p = 0;
-  va_end(args);
-
-  return static_cast<int32_t>(p - buffer);
-}
-
 U_NAMESPACE_BEGIN

 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -64,12 +36,13 @@ U_NAMESPACE_BEGIN
 // Access resource data for locale components.
 // Wrap code in uloc.c for now.
 class ICUDataTable {
-    const char* path;
+    const char* const path;
    Locale locale;

 public:
+    // Note: path should be a pointer to a statically allocated string.
    ICUDataTable(const char* path, const Locale& locale);
-    ~ICUDataTable();
+    ~ICUDataTable() = default;

    const Locale& getLocale();

@@ -86,32 +59,18 @@ public:

 inline UnicodeString &
 ICUDataTable::get(const char* tableKey, const char* itemKey, UnicodeString& result) const {
-    return get(tableKey, NULL, itemKey, result);
+    return get(tableKey, nullptr, itemKey, result);
 }

 inline UnicodeString &
 ICUDataTable::getNoFallback(const char* tableKey, const char* itemKey, UnicodeString& result) const {
-    return getNoFallback(tableKey, NULL, itemKey, result);
+    return getNoFallback(tableKey, nullptr, itemKey, result);
 }

 ICUDataTable::ICUDataTable(const char* path, const Locale& locale)
-    : path(NULL), locale(Locale::getRoot())
+    : path(path), locale(locale)
 {
-  if (path) {
-    int32_t len = static_cast<int32_t>(uprv_strlen(path));
-    this->path = (const char*) uprv_malloc(len + 1);
-    if (this->path) {
-      uprv_strcpy((char *)this->path, path);
-      this->locale = locale;
-    }
-  }
-}
-
-ICUDataTable::~ICUDataTable() {
-  if (path) {
-    uprv_free((void*) path);
-    path = NULL;
-  }
+    U_ASSERT(path != nullptr);
 }

 const Locale&
@@ -125,7 +84,7 @@ ICUDataTable::get(const char* tableKey, const char* subTableKey, const char* ite
  UErrorCode status = U_ZERO_ERROR;
  int32_t len = 0;

-  const UChar *s = uloc_getTableStringWithFallback(path, locale.getName(),
+  const char16_t *s = uloc_getTableStringWithFallback(path, locale.getName(),
                                                   tableKey, subTableKey, itemKey,
                                                   &len, &status);
  if (U_SUCCESS(status) && len > 0) {
@@ -140,7 +99,7 @@ ICUDataTable::getNoFallback(const char* tableKey, const char* subTableKey, const
  UErrorCode status = U_ZERO_ERROR;
  int32_t len = 0;

-  const UChar *s = uloc_getTableStringWithFallback(path, locale.getName(),
+  const char16_t *s = uloc_getTableStringWithFallback(path, locale.getName(),
                                                   tableKey, subTableKey, itemKey,
                                                   &len, &status);
  if (U_SUCCESS(status)) {
@@ -286,12 +245,12 @@ class LocaleDisplayNamesImpl : public LocaleDisplayNames {
 #else
    UObject* capitalizationBrkIter;
 #endif
-    static UMutex  capitalizationBrkIterLock;
    UnicodeString formatOpenParen;
    UnicodeString formatReplaceOpenParen;
    UnicodeString formatCloseParen;
    UnicodeString formatReplaceCloseParen;
    UDisplayContext nameLength;
+    UDisplayContext substitute;

    // Constants for capitalization context usage types.
    enum CapContextUsage {
@@ -305,7 +264,7 @@ class LocaleDisplayNamesImpl : public LocaleDisplayNames {
    };
    // Capitalization transforms. For each usage type, indicates whether to titlecase for
    // the context specified in capitalizationContext (which we know at construction time)
-     UBool fCapitalization[kCapContextUsageCount];
+     bool fCapitalization[kCapContextUsageCount];

 public:
    // constructor
@@ -313,55 +272,54 @@ public:
    LocaleDisplayNamesImpl(const Locale& locale, UDisplayContext *contexts, int32_t length);
    virtual ~LocaleDisplayNamesImpl();

-    virtual const Locale& getLocale() const;
-    virtual UDialectHandling getDialectHandling() const;
-    virtual UDisplayContext getContext(UDisplayContextType type) const;
+    virtual const Locale& getLocale() const override;
+    virtual UDialectHandling getDialectHandling() const override;
+    virtual UDisplayContext getContext(UDisplayContextType type) const override;

    virtual UnicodeString& localeDisplayName(const Locale& locale,
-                                                UnicodeString& result) const;
+                                                UnicodeString& result) const override;
    virtual UnicodeString& localeDisplayName(const char* localeId,
-                                                UnicodeString& result) const;
+                                                UnicodeString& result) const override;
    virtual UnicodeString& languageDisplayName(const char* lang,
-                                               UnicodeString& result) const;
+                                               UnicodeString& result) const override;
    virtual UnicodeString& scriptDisplayName(const char* script,
-                                                UnicodeString& result) const;
+                                                UnicodeString& result) const override;
    virtual UnicodeString& scriptDisplayName(UScriptCode scriptCode,
-                                                UnicodeString& result) const;
+                                                UnicodeString& result) const override;
    virtual UnicodeString& regionDisplayName(const char* region,
-                                                UnicodeString& result) const;
+                                                UnicodeString& result) const override;
    virtual UnicodeString& variantDisplayName(const char* variant,
-                                                UnicodeString& result) const;
+                                                UnicodeString& result) const override;
    virtual UnicodeString& keyDisplayName(const char* key,
-                                                UnicodeString& result) const;
+                                                UnicodeString& result) const override;
    virtual UnicodeString& keyValueDisplayName(const char* key,
                                                const char* value,
-                                                UnicodeString& result) const;
+                                                UnicodeString& result) const override;
 private:
    UnicodeString& localeIdName(const char* localeId,
-                                UnicodeString& result) const;
+                                UnicodeString& result, bool substitute) const;
    UnicodeString& appendWithSep(UnicodeString& buffer, const UnicodeString& src) const;
    UnicodeString& adjustForUsageAndContext(CapContextUsage usage, UnicodeString& result) const;
-    UnicodeString& scriptDisplayName(const char* script, UnicodeString& result, UBool skipAdjust) const;
-    UnicodeString& regionDisplayName(const char* region, UnicodeString& result, UBool skipAdjust) const;
-    UnicodeString& variantDisplayName(const char* variant, UnicodeString& result, UBool skipAdjust) const;
-    UnicodeString& keyDisplayName(const char* key, UnicodeString& result, UBool skipAdjust) const;
+    UnicodeString& scriptDisplayName(const char* script, UnicodeString& result, bool skipAdjust) const;
+    UnicodeString& regionDisplayName(const char* region, UnicodeString& result, bool skipAdjust) const;
+    UnicodeString& variantDisplayName(const char* variant, UnicodeString& result, bool skipAdjust) const;
+    UnicodeString& keyDisplayName(const char* key, UnicodeString& result, bool skipAdjust) const;
    UnicodeString& keyValueDisplayName(const char* key, const char* value,
-                                        UnicodeString& result, UBool skipAdjust) const;
-    void initialize(void);
+                                        UnicodeString& result, bool skipAdjust) const;
+    void initialize();

    struct CapitalizationContextSink;
 };

-UMutex LocaleDisplayNamesImpl::capitalizationBrkIterLock = U_MUTEX_INITIALIZER;
-
 LocaleDisplayNamesImpl::LocaleDisplayNamesImpl(const Locale& locale,
                                               UDialectHandling dialectHandling)
    : dialectHandling(dialectHandling)
    , langData(U_ICUDATA_LANG, locale)
    , regionData(U_ICUDATA_REGION, locale)
    , capitalizationContext(UDISPCTX_CAPITALIZATION_NONE)
-    , capitalizationBrkIter(NULL)
+    , capitalizationBrkIter(nullptr)
    , nameLength(UDISPCTX_LENGTH_FULL)
+    , substitute(UDISPCTX_SUBSTITUTE)
 {
    initialize();
 }
@@ -372,15 +330,17 @@ LocaleDisplayNamesImpl::LocaleDisplayNamesImpl(const Locale& locale,
    , langData(U_ICUDATA_LANG, locale)
    , regionData(U_ICUDATA_REGION, locale)
    , capitalizationContext(UDISPCTX_CAPITALIZATION_NONE)
-    , capitalizationBrkIter(NULL)
+    , capitalizationBrkIter(nullptr)
    , nameLength(UDISPCTX_LENGTH_FULL)
+    , substitute(UDISPCTX_SUBSTITUTE)
 {
    while (length-- > 0) {
        UDisplayContext value = *contexts++;
-        UDisplayContextType selector = (UDisplayContextType)((uint32_t)value >> 8);
+        UDisplayContextType selector =
+            static_cast<UDisplayContextType>(static_cast<uint32_t>(value) >> 8);
        switch (selector) {
            case UDISPCTX_TYPE_DIALECT_HANDLING:
-                dialectHandling = (UDialectHandling)value;
+                dialectHandling = static_cast<UDialectHandling>(value);
                break;
            case UDISPCTX_TYPE_CAPITALIZATION:
                capitalizationContext = value;
@@ -388,6 +348,9 @@ LocaleDisplayNamesImpl::LocaleDisplayNamesImpl(const Locale& locale,
            case UDISPCTX_TYPE_DISPLAY_LENGTH:
                nameLength = value;
                break;
+            case UDISPCTX_TYPE_SUBSTITUTE_HANDLING:
+                substitute = value;
+                break;
            default:
                break;
        }
@@ -396,15 +359,15 @@ LocaleDisplayNamesImpl::LocaleDisplayNamesImpl(const Locale& locale,
 }

 struct LocaleDisplayNamesImpl::CapitalizationContextSink : public ResourceSink {
-    UBool hasCapitalizationUsage;
+    bool hasCapitalizationUsage;
    LocaleDisplayNamesImpl& parent;

    CapitalizationContextSink(LocaleDisplayNamesImpl& _parent)
-      : hasCapitalizationUsage(FALSE), parent(_parent) {}
+      : hasCapitalizationUsage(false), parent(_parent) {}
    virtual ~CapitalizationContextSink();

    virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/,
-            UErrorCode &errorCode) {
+            UErrorCode &errorCode) override {
        ResourceTable contexts = value.getTable(errorCode);
        if (U_FAILURE(errorCode)) { return; }
        for (int i = 0; contexts.getKeyAndValue(i, key, value); ++i) {
@@ -434,8 +397,8 @@ struct LocaleDisplayNamesImpl::CapitalizationContextSink : public ResourceSink {
            int32_t titlecaseInt = (parent.capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU) ? intVector[0] : intVector[1];
            if (titlecaseInt == 0) { continue; }

-            parent.fCapitalization[usageEnum] = TRUE;
-            hasCapitalizationUsage = TRUE;
+            parent.fCapitalization[usageEnum] = true;
+            hasCapitalizationUsage = true;
        }
    }
 };
@@ -444,8 +407,8 @@ struct LocaleDisplayNamesImpl::CapitalizationContextSink : public ResourceSink {
 LocaleDisplayNamesImpl::CapitalizationContextSink::~CapitalizationContextSink() {}

 void
-LocaleDisplayNamesImpl::initialize(void) {
-    LocaleDisplayNamesImpl *nonConstThis = (LocaleDisplayNamesImpl *)this;
+LocaleDisplayNamesImpl::initialize() {
+    LocaleDisplayNamesImpl* nonConstThis = this;
    nonConstThis->locale = langData.getLocale() == Locale::getRoot()
        ? regionData.getLocale()
        : langData.getLocale();
@@ -464,16 +427,16 @@ LocaleDisplayNamesImpl::initialize(void) {
        pattern = UnicodeString("{0} ({1})", -1, US_INV);
    }
    format.applyPatternMinMaxArguments(pattern, 2, 2, status);
-    if (pattern.indexOf((UChar)0xFF08) >= 0) {
-        formatOpenParen.setTo((UChar)0xFF08);         // fullwidth (
-        formatReplaceOpenParen.setTo((UChar)0xFF3B);  // fullwidth [
-        formatCloseParen.setTo((UChar)0xFF09);        // fullwidth )
-        formatReplaceCloseParen.setTo((UChar)0xFF3D); // fullwidth ]
+    if (pattern.indexOf(static_cast<char16_t>(0xFF08)) >= 0) {
+        formatOpenParen.setTo(static_cast<char16_t>(0xFF08));         // fullwidth (
+        formatReplaceOpenParen.setTo(static_cast<char16_t>(0xFF3B));  // fullwidth [
+        formatCloseParen.setTo(static_cast<char16_t>(0xFF09));        // fullwidth )
+        formatReplaceCloseParen.setTo(static_cast<char16_t>(0xFF3D)); // fullwidth ]
    } else {
-        formatOpenParen.setTo((UChar)0x0028);         // (
-        formatReplaceOpenParen.setTo((UChar)0x005B);  // [
-        formatCloseParen.setTo((UChar)0x0029);        // )
-        formatReplaceCloseParen.setTo((UChar)0x005D); // ]
+        formatOpenParen.setTo(static_cast<char16_t>(0x0028));         // (
+        formatReplaceOpenParen.setTo(static_cast<char16_t>(0x005B));  // [
+        formatCloseParen.setTo(static_cast<char16_t>(0x0029));        // )
+        formatReplaceCloseParen.setTo(static_cast<char16_t>(0x005D)); // ]
    }

    UnicodeString ktPattern;
@@ -487,9 +450,9 @@ LocaleDisplayNamesImpl::initialize(void) {
 #if !UCONFIG_NO_BREAK_ITERATION
    // Only get the context data if we need it! This is a const object so we know now...
    // Also check whether we will need a break iterator (depends on the data)
-    UBool needBrkIter = FALSE;
+    bool needBrkIter = false;
    if (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_STANDALONE) {
-        LocalUResourceBundlePointer resource(ures_open(NULL, locale.getName(), &status));
+        LocalUResourceBundlePointer resource(ures_open(nullptr, locale.getName(), &status));
        if (U_FAILURE(status)) { return; }
        CapitalizationContextSink sink(*this);
        ures_getAllItemsWithFallback(resource.getAlias(), "contextTransforms", sink, status);
@@ -507,7 +470,7 @@ LocaleDisplayNamesImpl::initialize(void) {
        capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status);
        if (U_FAILURE(status)) {
            delete capitalizationBrkIter;
-            capitalizationBrkIter = NULL;
+            capitalizationBrkIter = nullptr;
        }
    }
 #endif
@@ -533,15 +496,17 @@ UDisplayContext
 LocaleDisplayNamesImpl::getContext(UDisplayContextType type) const {
    switch (type) {
        case UDISPCTX_TYPE_DIALECT_HANDLING:
-            return (UDisplayContext)dialectHandling;
+            return static_cast<UDisplayContext>(dialectHandling);
        case UDISPCTX_TYPE_CAPITALIZATION:
            return capitalizationContext;
        case UDISPCTX_TYPE_DISPLAY_LENGTH:
            return nameLength;
+        case UDISPCTX_TYPE_SUBSTITUTE_HANDLING:
+            return substitute;
        default:
            break;
    }
-    return (UDisplayContext)0;
+    return static_cast<UDisplayContext>(0);
 }

 UnicodeString&
@@ -549,9 +514,10 @@ LocaleDisplayNamesImpl::adjustForUsageAndContext(CapContextUsage usage,
                                                UnicodeString& result) const {
 #if !UCONFIG_NO_BREAK_ITERATION
    // check to see whether we need to titlecase result
-    if ( result.length() > 0 && u_islower(result.char32At(0)) && capitalizationBrkIter!= NULL &&
+    if ( result.length() > 0 && u_islower(result.char32At(0)) && capitalizationBrkIter!= nullptr &&
          ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || fCapitalization[usage] ) ) {
        // note fCapitalization[usage] won't be set unless capitalizationContext is UI_LIST_OR_MENU or STANDALONE
+        static UMutex capitalizationBrkIterLock;
        Mutex lock(&capitalizationBrkIterLock);
        result.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
    }
@@ -576,42 +542,61 @@ LocaleDisplayNamesImpl::localeDisplayName(const Locale& loc,
  const char* country = loc.getCountry();
  const char* variant = loc.getVariant();

-  UBool hasScript = uprv_strlen(script) > 0;
-  UBool hasCountry = uprv_strlen(country) > 0;
-  UBool hasVariant = uprv_strlen(variant) > 0;
+  bool hasScript = uprv_strlen(script) > 0;
+  bool hasCountry = uprv_strlen(country) > 0;
+  bool hasVariant = uprv_strlen(variant) > 0;

  if (dialectHandling == ULDN_DIALECT_NAMES) {
-    char buffer[ULOC_FULLNAME_CAPACITY];
+    UErrorCode status = U_ZERO_ERROR;
+    CharString buffer;
    do { // loop construct is so we can break early out of search
      if (hasScript && hasCountry) {
-        ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", script, "_", country, (char *)0);
-        localeIdName(buffer, resultName);
-        if (!resultName.isBogus()) {
-          hasScript = FALSE;
-          hasCountry = FALSE;
-          break;
+        buffer.append(lang, status)
+              .append('_', status)
+              .append(script, status)
+              .append('_', status)
+              .append(country, status);
+        if (U_SUCCESS(status)) {
+          localeIdName(buffer.data(), resultName, false);
+          if (!resultName.isBogus()) {
+            hasScript = false;
+            hasCountry = false;
+            break;
+          }
        }
      }
      if (hasScript) {
-        ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", script, (char *)0);
-        localeIdName(buffer, resultName);
-        if (!resultName.isBogus()) {
-          hasScript = FALSE;
-          break;
+        buffer.append(lang, status)
+              .append('_', status)
+              .append(script, status);
+        if (U_SUCCESS(status)) {
+          localeIdName(buffer.data(), resultName, false);
+          if (!resultName.isBogus()) {
+            hasScript = false;
+            break;
+          }
        }
      }
      if (hasCountry) {
-        ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", country, (char*)0);
-        localeIdName(buffer, resultName);
-        if (!resultName.isBogus()) {
-          hasCountry = FALSE;
-          break;
+        buffer.append(lang, status)
+              .append('_', status)
+              .append(country, status);
+        if (U_SUCCESS(status)) {
+          localeIdName(buffer.data(), resultName, false);
+          if (!resultName.isBogus()) {
+            hasCountry = false;
+            break;
+          }
        }
      }
-    } while (FALSE);
+    } while (false);
  }
  if (resultName.isBogus() || resultName.isEmpty()) {
-    localeIdName(lang, resultName);
+    localeIdName(lang, resultName, substitute == UDISPCTX_SUBSTITUTE);
+    if (resultName.isBogus()) {
+      result.setToBogus();
+      return result;
+    }
  }

  UnicodeString resultRemainder;
@@ -619,13 +604,28 @@ LocaleDisplayNamesImpl::localeDisplayName(const Locale& loc,
  UErrorCode status = U_ZERO_ERROR;

  if (hasScript) {
-    resultRemainder.append(scriptDisplayName(script, temp, TRUE));
+    UnicodeString script_str = scriptDisplayName(script, temp, true);
+    if (script_str.isBogus()) {
+      result.setToBogus();
+      return result;
+    }
+    resultRemainder.append(script_str);
  }
  if (hasCountry) {
-    appendWithSep(resultRemainder, regionDisplayName(country, temp, TRUE));
+    UnicodeString region_str = regionDisplayName(country, temp, true);
+    if (region_str.isBogus()) {
+      result.setToBogus();
+      return result;
+    }
+    appendWithSep(resultRemainder, region_str);
  }
  if (hasVariant) {
-    appendWithSep(resultRemainder, variantDisplayName(variant, temp, TRUE));
+    UnicodeString variant_str = variantDisplayName(variant, temp, true);
+    if (variant_str.isBogus()) {
+      result.setToBogus();
+      return result;
+    }
+    appendWithSep(resultRemainder, variant_str);
  }
  resultRemainder.findAndReplace(formatOpenParen, formatReplaceOpenParen);
  resultRemainder.findAndReplace(formatCloseParen, formatReplaceCloseParen);
@@ -633,21 +633,19 @@ LocaleDisplayNamesImpl::localeDisplayName(const Locale& loc,
  LocalPointer<StringEnumeration> e(loc.createKeywords(status));
  if (e.isValid() && U_SUCCESS(status)) {
    UnicodeString temp2;
-    char value[ULOC_KEYWORD_AND_VALUES_CAPACITY]; // sigh, no ULOC_VALUE_CAPACITY
    const char* key;
-    while ((key = e->next((int32_t *)0, status)) != NULL) {
-      value[0] = 0;
-      loc.getKeywordValue(key, value, ULOC_KEYWORD_AND_VALUES_CAPACITY, status);
-      if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
-        return result;
+    while ((key = e->next((int32_t*)nullptr, status)) != nullptr) {
+        auto value = loc.getKeywordValue<CharString>(key, status);
+        if (U_FAILURE(status)) {
+            return result;
      }
-      keyDisplayName(key, temp, TRUE);
+      keyDisplayName(key, temp, true);
      temp.findAndReplace(formatOpenParen, formatReplaceOpenParen);
      temp.findAndReplace(formatCloseParen, formatReplaceCloseParen);
-      keyValueDisplayName(key, value, temp2, TRUE);
+      keyValueDisplayName(key, value.data(), temp2, true);
      temp2.findAndReplace(formatOpenParen, formatReplaceOpenParen);
      temp2.findAndReplace(formatCloseParen, formatReplaceCloseParen);
-      if (temp2 != UnicodeString(value, -1, US_INV)) {
+      if (temp2 != UnicodeString(value.data(), -1, US_INV)) {
        appendWithSep(resultRemainder, temp2);
      } else if (temp != UnicodeString(key, -1, US_INV)) {
        UnicodeString temp3;
@@ -655,7 +653,7 @@ LocaleDisplayNamesImpl::localeDisplayName(const Locale& loc,
        appendWithSep(resultRemainder, temp3);
      } else {
        appendWithSep(resultRemainder, temp)
-          .append((UChar)0x3d /* = */)
+          .append(static_cast<char16_t>(0x3d) /* = */)
          .append(temp2);
      }
    }
@@ -677,7 +675,7 @@ LocaleDisplayNamesImpl::appendWithSep(UnicodeString& buffer, const UnicodeString
    } else {
        const UnicodeString *values[2] = { &buffer, &src };
        UErrorCode status = U_ZERO_ERROR;
-        separatorFormat.formatAndReplace(values, 2, buffer, NULL, 0, status);
+        separatorFormat.formatAndReplace(values, 2, buffer, nullptr, 0, status);
    }
    return buffer;
 }
@@ -691,122 +689,171 @@ LocaleDisplayNamesImpl::localeDisplayName(const char* localeId,
 // private
 UnicodeString&
 LocaleDisplayNamesImpl::localeIdName(const char* localeId,
-                                     UnicodeString& result) const {
+                                     UnicodeString& result, bool substitute) const {
    if (nameLength == UDISPCTX_LENGTH_SHORT) {
        langData.getNoFallback("Languages%short", localeId, result);
        if (!result.isBogus()) {
            return result;
        }
    }
-    return langData.getNoFallback("Languages", localeId, result);
+    langData.getNoFallback("Languages", localeId, result);
+    if (result.isBogus() && uprv_strchr(localeId, '_') == nullptr) {
+        // Canonicalize lang and try again, ICU-20870
+        // (only for language codes without script or region)
+        Locale canonLocale = Locale::createCanonical(localeId);
+        const char* canonLocId = canonLocale.getName();
+        if (nameLength == UDISPCTX_LENGTH_SHORT) {
+            langData.getNoFallback("Languages%short", canonLocId, result);
+            if (!result.isBogus()) {
+                return result;
+            }
+        }
+        langData.getNoFallback("Languages", canonLocId, result);
+    }
+    if (result.isBogus() && substitute) {
+        // use key, this is what langData.get (with fallback) falls back to.
+        result.setTo(UnicodeString(localeId, -1, US_INV)); // use key (
+    }
+    return result;
 }

 UnicodeString&
 LocaleDisplayNamesImpl::languageDisplayName(const char* lang,
                                            UnicodeString& result) const {
-    if (uprv_strcmp("root", lang) == 0 || uprv_strchr(lang, '_') != NULL) {
+    if (uprv_strcmp("root", lang) == 0 || uprv_strchr(lang, '_') != nullptr) {
        return result = UnicodeString(lang, -1, US_INV);
    }
    if (nameLength == UDISPCTX_LENGTH_SHORT) {
-        langData.get("Languages%short", lang, result);
+        langData.getNoFallback("Languages%short", lang, result);
        if (!result.isBogus()) {
            return adjustForUsageAndContext(kCapContextUsageLanguage, result);
        }
    }
-    langData.get("Languages", lang, result);
+    langData.getNoFallback("Languages", lang, result);
+    if (result.isBogus()) {
+        // Canonicalize lang and try again, ICU-20870
+        Locale canonLocale = Locale::createCanonical(lang);
+        const char* canonLocId = canonLocale.getName();
+        if (nameLength == UDISPCTX_LENGTH_SHORT) {
+            langData.getNoFallback("Languages%short", canonLocId, result);
+            if (!result.isBogus()) {
+                return adjustForUsageAndContext(kCapContextUsageLanguage, result);
+            }
+        }
+        langData.getNoFallback("Languages", canonLocId, result);
+    }
+    if (result.isBogus() && substitute == UDISPCTX_SUBSTITUTE) {
+        // use key, this is what langData.get (with fallback) falls back to.
+        result.setTo(UnicodeString(lang, -1, US_INV)); // use key (
+    }
    return adjustForUsageAndContext(kCapContextUsageLanguage, result);
 }

 UnicodeString&
 LocaleDisplayNamesImpl::scriptDisplayName(const char* script,
                                          UnicodeString& result,
-                                          UBool skipAdjust) const {
+                                          bool skipAdjust) const {
    if (nameLength == UDISPCTX_LENGTH_SHORT) {
-        langData.get("Scripts%short", script, result);
+        langData.getNoFallback("Scripts%short", script, result);
        if (!result.isBogus()) {
            return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageScript, result);
        }
    }
-    langData.get("Scripts", script, result);
+    if (substitute == UDISPCTX_SUBSTITUTE) {
+        langData.get("Scripts", script, result);
+    } else {
+        langData.getNoFallback("Scripts", script, result);
+    }
    return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageScript, result);
 }

 UnicodeString&
 LocaleDisplayNamesImpl::scriptDisplayName(const char* script,
                                          UnicodeString& result) const {
-    return scriptDisplayName(script, result, FALSE);
+    return scriptDisplayName(script, result, false);
 }

 UnicodeString&
 LocaleDisplayNamesImpl::scriptDisplayName(UScriptCode scriptCode,
                                          UnicodeString& result) const {
-    return scriptDisplayName(uscript_getName(scriptCode), result, FALSE);
+    return scriptDisplayName(uscript_getName(scriptCode), result, false);
 }

 UnicodeString&
 LocaleDisplayNamesImpl::regionDisplayName(const char* region,
                                          UnicodeString& result,
-                                          UBool skipAdjust) const {
+                                          bool skipAdjust) const {
    if (nameLength == UDISPCTX_LENGTH_SHORT) {
-        regionData.get("Countries%short", region, result);
+         regionData.getNoFallback("Countries%short", region, result);
        if (!result.isBogus()) {
            return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageTerritory, result);
        }
    }
-    regionData.get("Countries", region, result);
+    if (substitute == UDISPCTX_SUBSTITUTE) {
+        regionData.get("Countries", region, result);
+    } else {
+        regionData.getNoFallback("Countries", region, result);
+    }
    return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageTerritory, result);
 }

 UnicodeString&
 LocaleDisplayNamesImpl::regionDisplayName(const char* region,
                                          UnicodeString& result) const {
-    return regionDisplayName(region, result, FALSE);
+    return regionDisplayName(region, result, false);
 }


 UnicodeString&
 LocaleDisplayNamesImpl::variantDisplayName(const char* variant,
                                           UnicodeString& result,
-                                           UBool skipAdjust) const {
+                                           bool skipAdjust) const {
    // don't have a resource for short variant names
-    langData.get("Variants", variant, result);
+    if (substitute == UDISPCTX_SUBSTITUTE) {
+        langData.get("Variants", variant, result);
+    } else {
+        langData.getNoFallback("Variants", variant, result);
+    }
    return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageVariant, result);
 }

 UnicodeString&
 LocaleDisplayNamesImpl::variantDisplayName(const char* variant,
                                           UnicodeString& result) const {
-    return variantDisplayName(variant, result, FALSE);
+    return variantDisplayName(variant, result, false);
 }

 UnicodeString&
 LocaleDisplayNamesImpl::keyDisplayName(const char* key,
                                       UnicodeString& result,
-                                       UBool skipAdjust) const {
+                                       bool skipAdjust) const {
    // don't have a resource for short key names
-    langData.get("Keys", key, result);
+    if (substitute == UDISPCTX_SUBSTITUTE) {
+        langData.get("Keys", key, result);
+    } else {
+        langData.getNoFallback("Keys", key, result);
+    }
    return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageKey, result);
 }

 UnicodeString&
 LocaleDisplayNamesImpl::keyDisplayName(const char* key,
                                       UnicodeString& result) const {
-    return keyDisplayName(key, result, FALSE);
+    return keyDisplayName(key, result, false);
 }

 UnicodeString&
 LocaleDisplayNamesImpl::keyValueDisplayName(const char* key,
                                            const char* value,
                                            UnicodeString& result,
-                                            UBool skipAdjust) const {
+                                            bool skipAdjust) const {
    if (uprv_strcmp(key, "currency") == 0) {
        // ICU4C does not have ICU4J CurrencyDisplayInfo equivalent for now.
        UErrorCode sts = U_ZERO_ERROR;
        UnicodeString ustrValue(value, -1, US_INV);
        int32_t len;
-        UBool isChoice = FALSE;
-        const UChar *currencyName = ucurr_getName(ustrValue.getTerminatedBuffer(),
-            locale.getBaseName(), UCURR_LONG_NAME, &isChoice, &len, &sts);
+        const char16_t *currencyName = ucurr_getName(ustrValue.getTerminatedBuffer(),
+            locale.getBaseName(), UCURR_LONG_NAME, nullptr /* isChoiceFormat */, &len, &sts);
        if (U_FAILURE(sts)) {
            // Return the value as is on failure
            result = ustrValue;
@@ -817,12 +864,16 @@ LocaleDisplayNamesImpl::keyValueDisplayName(const char* key,
    }

    if (nameLength == UDISPCTX_LENGTH_SHORT) {
-        langData.get("Types%short", key, value, result);
+        langData.getNoFallback("Types%short", key, value, result);
        if (!result.isBogus()) {
            return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageKeyValue, result);
        }
    }
-    langData.get("Types", key, value, result);
+    if (substitute == UDISPCTX_SUBSTITUTE) {
+        langData.get("Types", key, value, result);
+    } else {
+        langData.getNoFallback("Types", key, value, result);
+    }
    return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageKeyValue, result);
 }

@@ -830,7 +881,7 @@ UnicodeString&
 LocaleDisplayNamesImpl::keyValueDisplayName(const char* key,
                                            const char* value,
                                            UnicodeString& result) const {
-    return keyValueDisplayName(key, value, result, FALSE);
+    return keyValueDisplayName(key, value, result, false);
 }

 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -844,7 +895,7 @@ LocaleDisplayNames::createInstance(const Locale& locale,
 LocaleDisplayNames*
 LocaleDisplayNames::createInstance(const Locale& locale,
                                   UDisplayContext *contexts, int32_t length) {
-    if (contexts == NULL) {
+    if (contexts == nullptr) {
        length = 0;
    }
    return new LocaleDisplayNamesImpl(locale, contexts, length);
@@ -861,9 +912,9 @@ uldn_open(const char * locale,
          UDialectHandling dialectHandling,
          UErrorCode *pErrorCode) {
  if (U_FAILURE(*pErrorCode)) {
-    return 0;
+    return nullptr;
  }
-  if (locale == NULL) {
+  if (locale == nullptr) {
    locale = uloc_getDefault();
  }
  return (ULocaleDisplayNames *)LocaleDisplayNames::createInstance(Locale(locale), dialectHandling);
@@ -874,9 +925,9 @@ uldn_openForContext(const char * locale,
                    UDisplayContext *contexts, int32_t length,
                    UErrorCode *pErrorCode) {
  if (U_FAILURE(*pErrorCode)) {
-    return 0;
+    return nullptr;
  }
-  if (locale == NULL) {
+  if (locale == nullptr) {
    locale = uloc_getDefault();
  }
  return (ULocaleDisplayNames *)LocaleDisplayNames::createInstance(Locale(locale), contexts, length);
@@ -893,7 +944,7 @@ uldn_getLocale(const ULocaleDisplayNames *ldn) {
  if (ldn) {
    return ((const LocaleDisplayNames *)ldn)->getLocale().getName();
  }
-  return NULL;
+  return nullptr;
 }

 U_CAPI UDialectHandling U_EXPORT2
@@ -917,13 +968,13 @@ uldn_getContext(const ULocaleDisplayNames *ldn,
 U_CAPI int32_t U_EXPORT2
 uldn_localeDisplayName(const ULocaleDisplayNames *ldn,
                       const char *locale,
-                       UChar *result,
+                       char16_t *result,
                       int32_t maxResultSize,
                       UErrorCode *pErrorCode) {
  if (U_FAILURE(*pErrorCode)) {
    return 0;
  }
-  if (ldn == NULL || locale == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) {
+  if (ldn == nullptr || locale == nullptr || (result == nullptr && maxResultSize > 0) || maxResultSize < 0) {
    *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    return 0;
  }
@@ -939,13 +990,13 @@ uldn_localeDisplayName(const ULocaleDisplayNames *ldn,
 U_CAPI int32_t U_EXPORT2
 uldn_languageDisplayName(const ULocaleDisplayNames *ldn,
                         const char *lang,
-                         UChar *result,
+                         char16_t *result,
                         int32_t maxResultSize,
                         UErrorCode *pErrorCode) {
  if (U_FAILURE(*pErrorCode)) {
    return 0;
  }
-  if (ldn == NULL || lang == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) {
+  if (ldn == nullptr || lang == nullptr || (result == nullptr && maxResultSize > 0) || maxResultSize < 0) {
    *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    return 0;
  }
@@ -957,13 +1008,13 @@ uldn_languageDisplayName(const ULocaleDisplayNames *ldn,
 U_CAPI int32_t U_EXPORT2
 uldn_scriptDisplayName(const ULocaleDisplayNames *ldn,
                       const char *script,
-                       UChar *result,
+                       char16_t *result,
                       int32_t maxResultSize,
                       UErrorCode *pErrorCode) {
  if (U_FAILURE(*pErrorCode)) {
    return 0;
  }
-  if (ldn == NULL || script == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) {
+  if (ldn == nullptr || script == nullptr || (result == nullptr && maxResultSize > 0) || maxResultSize < 0) {
    *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    return 0;
  }
@@ -975,7 +1026,7 @@ uldn_scriptDisplayName(const ULocaleDisplayNames *ldn,
 U_CAPI int32_t U_EXPORT2
 uldn_scriptCodeDisplayName(const ULocaleDisplayNames *ldn,
                           UScriptCode scriptCode,
-                           UChar *result,
+                           char16_t *result,
                           int32_t maxResultSize,
                           UErrorCode *pErrorCode) {
  return uldn_scriptDisplayName(ldn, uscript_getName(scriptCode), result, maxResultSize, pErrorCode);
@@ -984,13 +1035,13 @@ uldn_scriptCodeDisplayName(const ULocaleDisplayNames *ldn,
 U_CAPI int32_t U_EXPORT2
 uldn_regionDisplayName(const ULocaleDisplayNames *ldn,
                       const char *region,
-                       UChar *result,
+                       char16_t *result,
                       int32_t maxResultSize,
                       UErrorCode *pErrorCode) {
  if (U_FAILURE(*pErrorCode)) {
    return 0;
  }
-  if (ldn == NULL || region == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) {
+  if (ldn == nullptr || region == nullptr || (result == nullptr && maxResultSize > 0) || maxResultSize < 0) {
    *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    return 0;
  }
@@ -1002,13 +1053,13 @@ uldn_regionDisplayName(const ULocaleDisplayNames *ldn,
 U_CAPI int32_t U_EXPORT2
 uldn_variantDisplayName(const ULocaleDisplayNames *ldn,
                        const char *variant,
-                        UChar *result,
+                        char16_t *result,
                        int32_t maxResultSize,
                        UErrorCode *pErrorCode) {
  if (U_FAILURE(*pErrorCode)) {
    return 0;
  }
-  if (ldn == NULL || variant == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) {
+  if (ldn == nullptr || variant == nullptr || (result == nullptr && maxResultSize > 0) || maxResultSize < 0) {
    *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    return 0;
  }
@@ -1020,13 +1071,13 @@ uldn_variantDisplayName(const ULocaleDisplayNames *ldn,
 U_CAPI int32_t U_EXPORT2
 uldn_keyDisplayName(const ULocaleDisplayNames *ldn,
                    const char *key,
-                    UChar *result,
+                    char16_t *result,
                    int32_t maxResultSize,
                    UErrorCode *pErrorCode) {
  if (U_FAILURE(*pErrorCode)) {
    return 0;
  }
-  if (ldn == NULL || key == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) {
+  if (ldn == nullptr || key == nullptr || (result == nullptr && maxResultSize > 0) || maxResultSize < 0) {
    *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    return 0;
  }
@@ -1039,13 +1090,13 @@ U_CAPI int32_t U_EXPORT2
 uldn_keyValueDisplayName(const ULocaleDisplayNames *ldn,
                         const char *key,
                         const char *value,
-                         UChar *result,
+                         char16_t *result,
                         int32_t maxResultSize,
                         UErrorCode *pErrorCode) {
  if (U_FAILURE(*pErrorCode)) {
    return 0;
  }
-  if (ldn == NULL || key == NULL || value == NULL || (result == NULL && maxResultSize > 0)
+  if (ldn == nullptr || key == nullptr || value == nullptr || (result == nullptr && maxResultSize > 0)
      || maxResultSize < 0) {
    *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    return 0;
@@ -0,0 +1,990 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// loclikelysubtags.cpp
+// created: 2019may08 Markus W. Scherer
+
+#include <utility>
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/localpointer.h"
+#include "unicode/locid.h"
+#include "unicode/uobject.h"
+#include "unicode/ures.h"
+#include "unicode/uscript.h"
+#include "charstr.h"
+#include "cstring.h"
+#include "loclikelysubtags.h"
+#include "lsr.h"
+#include "uassert.h"
+#include "ucln_cmn.h"
+#include "uhash.h"
+#include "uinvchar.h"
+#include "umutex.h"
+#include "uniquecharstr.h"
+#include "uresdata.h"
+#include "uresimp.h"
+#include "uvector.h"
+
+U_NAMESPACE_BEGIN
+
+namespace {
+
+constexpr char PSEUDO_ACCENTS_PREFIX = '\'';  // -XA, -PSACCENT
+constexpr char PSEUDO_BIDI_PREFIX = '+';  // -XB, -PSBIDI
+constexpr char PSEUDO_CRACKED_PREFIX = ',';  // -XC, -PSCRACK
+
+}  // namespace
+
+LocaleDistanceData::LocaleDistanceData(LocaleDistanceData &&data) :
+        distanceTrieBytes(data.distanceTrieBytes),
+        regionToPartitions(data.regionToPartitions),
+        partitions(data.partitions),
+        paradigms(data.paradigms), paradigmsLength(data.paradigmsLength),
+        distances(data.distances) {
+    data.partitions = nullptr;
+    data.paradigms = nullptr;
+}
+
+LocaleDistanceData::~LocaleDistanceData() {
+    uprv_free(partitions);
+    delete[] paradigms;
+}
+
+struct LikelySubtagsData {
+    UResourceBundle *langInfoBundle = nullptr;
+    UniqueCharStrings strings;
+    CharStringMap languageAliases;
+    CharStringMap regionAliases;
+    const uint8_t *trieBytes = nullptr;
+    LSR *lsrs = nullptr;
+    int32_t lsrsLength = 0;
+
+    LocaleDistanceData distanceData;
+
+    LikelySubtagsData(UErrorCode &errorCode) : strings(errorCode) {}
+
+    ~LikelySubtagsData() {
+        ures_close(langInfoBundle);
+        delete[] lsrs;
+    }
+
+    void load(UErrorCode &errorCode) {
+        if (U_FAILURE(errorCode)) { return; }
+        langInfoBundle = ures_openDirect(nullptr, "langInfo", &errorCode);
+        if (U_FAILURE(errorCode)) { return; }
+        StackUResourceBundle stackTempBundle;
+        ResourceDataValue value;
+        ures_getValueWithFallback(langInfoBundle, "likely", stackTempBundle.getAlias(),
+                                  value, errorCode);
+        ResourceTable likelyTable = value.getTable(errorCode);
+        if (U_FAILURE(errorCode)) { return; }
+
+        // Read all strings in the resource bundle and convert them to invariant char *.
+        LocalMemory<int32_t> languageIndexes, regionIndexes, lsrSubtagIndexes;
+        int32_t languagesLength = 0, regionsLength = 0, lsrSubtagsLength = 0;
+        ResourceArray m49Array;
+        if (likelyTable.findValue("m49", value)) {
+            m49Array = value.getArray(errorCode);
+        } else {
+            errorCode = U_MISSING_RESOURCE_ERROR;
+            return;
+        }
+        if (!readStrings(likelyTable, "languageAliases", value,
+                         languageIndexes, languagesLength, errorCode) ||
+                !readStrings(likelyTable, "regionAliases", value,
+                             regionIndexes, regionsLength, errorCode) ||
+                !readLSREncodedStrings(likelyTable, "lsrnum", value, m49Array,
+                             lsrSubtagIndexes,lsrSubtagsLength, errorCode)) {
+            return;
+        }
+        if ((languagesLength & 1) != 0 ||
+                (regionsLength & 1) != 0 ||
+                (lsrSubtagsLength % 3) != 0) {
+            errorCode = U_INVALID_FORMAT_ERROR;
+            return;
+        }
+        if (lsrSubtagsLength == 0) {
+            errorCode = U_MISSING_RESOURCE_ERROR;
+            return;
+        }
+
+        if (!likelyTable.findValue("trie", value)) {
+            errorCode = U_MISSING_RESOURCE_ERROR;
+            return;
+        }
+        int32_t length;
+        trieBytes = value.getBinary(length, errorCode);
+        if (U_FAILURE(errorCode)) { return; }
+
+        // Also read distance/matcher data if available,
+        // to open & keep only one resource bundle pointer
+        // and to use one single UniqueCharStrings.
+        UErrorCode matchErrorCode = U_ZERO_ERROR;
+        ures_getValueWithFallback(langInfoBundle, "match", stackTempBundle.getAlias(),
+                                  value, matchErrorCode);
+        LocalMemory<int32_t> partitionIndexes, paradigmSubtagIndexes;
+        int32_t partitionsLength = 0, paradigmSubtagsLength = 0;
+        if (U_SUCCESS(matchErrorCode)) {
+            ResourceTable matchTable = value.getTable(errorCode);
+            if (U_FAILURE(errorCode)) { return; }
+
+            if (matchTable.findValue("trie", value)) {
+                distanceData.distanceTrieBytes = value.getBinary(length, errorCode);
+                if (U_FAILURE(errorCode)) { return; }
+            }
+
+            if (matchTable.findValue("regionToPartitions", value)) {
+                distanceData.regionToPartitions = value.getBinary(length, errorCode);
+                if (U_FAILURE(errorCode)) { return; }
+                if (length < LSR::REGION_INDEX_LIMIT) {
+                    errorCode = U_INVALID_FORMAT_ERROR;
+                    return;
+                }
+            }
+
+            if (!readStrings(matchTable, "partitions", value,
+                             partitionIndexes, partitionsLength, errorCode) ||
+                    !readLSREncodedStrings(matchTable, "paradigmnum", value, m49Array,
+                                 paradigmSubtagIndexes, paradigmSubtagsLength, errorCode)) {
+                return;
+            }
+            if ((paradigmSubtagsLength % 3) != 0) {
+                errorCode = U_INVALID_FORMAT_ERROR;
+                return;
+            }
+
+            if (matchTable.findValue("distances", value)) {
+                distanceData.distances = value.getIntVector(length, errorCode);
+                if (U_FAILURE(errorCode)) { return; }
+                if (length < 4) {  // LocaleDistance IX_LIMIT
+                    errorCode = U_INVALID_FORMAT_ERROR;
+                    return;
+                }
+            }
+        } else if (matchErrorCode == U_MISSING_RESOURCE_ERROR) {
+            // ok for likely subtags
+        } else {  // error other than missing resource
+            errorCode = matchErrorCode;
+            return;
+        }
+
+        // Fetch & store invariant-character versions of strings
+        // only after we have collected and de-duplicated all of them.
+        strings.freeze();
+
+        languageAliases = CharStringMap(languagesLength / 2, errorCode);
+        for (int32_t i = 0; i < languagesLength; i += 2) {
+            languageAliases.put(strings.get(languageIndexes[i]),
+                                strings.get(languageIndexes[i + 1]), errorCode);
+        }
+
+        regionAliases = CharStringMap(regionsLength / 2, errorCode);
+        for (int32_t i = 0; i < regionsLength; i += 2) {
+            regionAliases.put(strings.get(regionIndexes[i]),
+                              strings.get(regionIndexes[i + 1]), errorCode);
+        }
+        if (U_FAILURE(errorCode)) { return; }
+
+        lsrsLength = lsrSubtagsLength / 3;
+        lsrs = new LSR[lsrsLength];
+        if (lsrs == nullptr) {
+            errorCode = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+        for (int32_t i = 0, j = 0; i < lsrSubtagsLength; i += 3, ++j) {
+            lsrs[j] = LSR(strings.get(lsrSubtagIndexes[i]),
+                          strings.get(lsrSubtagIndexes[i + 1]),
+                          strings.get(lsrSubtagIndexes[i + 2]),
+                          LSR::IMPLICIT_LSR);
+        }
+
+        if (partitionsLength > 0) {
+            distanceData.partitions = static_cast<const char **>(
+                uprv_malloc(partitionsLength * sizeof(const char *)));
+            if (distanceData.partitions == nullptr) {
+                errorCode = U_MEMORY_ALLOCATION_ERROR;
+                return;
+            }
+            for (int32_t i = 0; i < partitionsLength; ++i) {
+                distanceData.partitions[i] = strings.get(partitionIndexes[i]);
+            }
+        }
+
+        if (paradigmSubtagsLength > 0) {
+            distanceData.paradigmsLength = paradigmSubtagsLength / 3;
+            LSR *paradigms = new LSR[distanceData.paradigmsLength];
+            if (paradigms == nullptr) {
+                errorCode = U_MEMORY_ALLOCATION_ERROR;
+                return;
+            }
+            for (int32_t i = 0, j = 0; i < paradigmSubtagsLength; i += 3, ++j) {
+                paradigms[j] = LSR(strings.get(paradigmSubtagIndexes[i]),
+                                   strings.get(paradigmSubtagIndexes[i + 1]),
+                                   strings.get(paradigmSubtagIndexes[i + 2]),
+                                   LSR::DONT_CARE_FLAGS);
+            }
+            distanceData.paradigms = paradigms;
+        }
+    }
+
+private:
+    bool readStrings(const ResourceTable &table, const char *key, ResourceValue &value,
+                     LocalMemory<int32_t> &indexes, int32_t &length, UErrorCode &errorCode) {
+        if (U_FAILURE(errorCode)) { return false; }
+        if (table.findValue(key, value)) {
+            ResourceArray stringArray = value.getArray(errorCode);
+            if (U_FAILURE(errorCode)) { return false; }
+            length = stringArray.getSize();
+            if (length == 0) { return true; }
+            int32_t *rawIndexes = indexes.allocateInsteadAndCopy(length);
+            if (rawIndexes == nullptr) {
+                errorCode = U_MEMORY_ALLOCATION_ERROR;
+                return false;
+            }
+            for (int i = 0; i < length; ++i) {
+                if (stringArray.getValue(i, value)) {  // returns true because i < length
+                    int32_t strLength = 0;
+                    rawIndexes[i] = strings.add(value.getString(strLength, errorCode), errorCode);
+                    if (U_FAILURE(errorCode)) { return false; }
+                }
+            }
+        }
+        return true;
+    }
+    UnicodeString toLanguage(int encoded) {
+        if (encoded == 0) {
+            return UNICODE_STRING_SIMPLE("");
+        }
+        if (encoded == 1) {
+            return UNICODE_STRING_SIMPLE("skip");
+        }
+        encoded &= 0x00ffffff;
+        encoded %= 27*27*27;
+        char lang[3];
+        lang[0] = 'a' + ((encoded % 27) - 1);
+        lang[1] = 'a' + (((encoded / 27 ) % 27) - 1);
+        if (encoded / (27 * 27) == 0) {
+            return UnicodeString(lang, 2, US_INV);
+        }
+        lang[2] = 'a' + ((encoded / (27 * 27)) - 1);
+        return UnicodeString(lang, 3, US_INV);
+    }
+    UnicodeString toScript(int encoded) {
+        if (encoded == 0) {
+            return UNICODE_STRING_SIMPLE("");
+        }
+        if (encoded == 1) {
+            return UNICODE_STRING_SIMPLE("script");
+        }
+        encoded = (encoded >> 24) & 0x000000ff;
+        const char* script = uscript_getShortName(static_cast<UScriptCode>(encoded));
+        if (script == nullptr) {
+            return UNICODE_STRING_SIMPLE("");
+        }
+        U_ASSERT(uprv_strlen(script) == 4);
+        return UnicodeString(script, 4, US_INV);
+    }
+    UnicodeString m49IndexToCode(const ResourceArray &m49Array, ResourceValue &value, int index, UErrorCode &errorCode) {
+        if (U_FAILURE(errorCode)) {
+            return UNICODE_STRING_SIMPLE("");
+        }
+        if (m49Array.getValue(index, value)) {
+            return value.getUnicodeString(errorCode);
+        }
+        // "m49" does not include the index.
+        errorCode = U_MISSING_RESOURCE_ERROR;
+        return UNICODE_STRING_SIMPLE("");
+    }
+
+    UnicodeString toRegion(const ResourceArray& m49Array, ResourceValue &value, int encoded, UErrorCode &errorCode) {
+        if (U_FAILURE(errorCode) || encoded == 0 || encoded == 1) {
+            return UNICODE_STRING_SIMPLE("");
+        }
+        encoded &= 0x00ffffff;
+        encoded /= 27 * 27 * 27;
+        encoded %= 27 * 27;
+        if (encoded < 27) {
+            // Selected M49 code index, find the code from "m49" resource.
+            return  m49IndexToCode(m49Array, value, encoded, errorCode);
+        }
+        char region[2];
+        region[0] = 'A' + ((encoded % 27) - 1);
+        region[1] = 'A' + (((encoded / 27) % 27) - 1);
+        return UnicodeString(region, 2, US_INV);
+    }
+
+    bool readLSREncodedStrings(const ResourceTable &table, const char* key, ResourceValue &value, const ResourceArray& m49Array,
+                     LocalMemory<int32_t> &indexes, int32_t &length, UErrorCode &errorCode) {
+        if (U_FAILURE(errorCode)) { return false; }
+        if (table.findValue(key, value)) {
+            const int32_t* vectors = value.getIntVector(length, errorCode);
+            if (U_FAILURE(errorCode)) { return false; }
+            if (length == 0) { return true; }
+            int32_t *rawIndexes = indexes.allocateInsteadAndCopy(length * 3);
+            if (rawIndexes == nullptr) {
+                errorCode = U_MEMORY_ALLOCATION_ERROR;
+                return false;
+            }
+            for (int i = 0; i < length; ++i) {
+                rawIndexes[i*3] = strings.addByValue(toLanguage(vectors[i]), errorCode);
+                rawIndexes[i*3+1] = strings.addByValue(toScript(vectors[i]), errorCode);
+                rawIndexes[i*3+2] = strings.addByValue(
+                    toRegion(m49Array, value, vectors[i], errorCode), errorCode);
+                if (U_FAILURE(errorCode)) { return false; }
+            }
+            length *= 3;
+        }
+        return true;
+    }
+};
+
+namespace {
+
+LikelySubtags *gLikelySubtags = nullptr;
+UVector *gMacroregions = nullptr;
+UInitOnce gInitOnce {};
+
+UBool U_CALLCONV cleanup() {
+    delete gLikelySubtags;
+    gLikelySubtags = nullptr;
+    delete gMacroregions;
+    gMacroregions = nullptr;
+    gInitOnce.reset();
+    return true;
+}
+
+constexpr const char16_t* MACROREGION_HARDCODE[] = {
+    u"001~3",
+    u"005",
+    u"009",
+    u"011",
+    u"013~5",
+    u"017~9",
+    u"021",
+    u"029",
+    u"030",
+    u"034~5",
+    u"039",
+    u"053~4",
+    u"057",
+    u"061",
+    u"142~3",
+    u"145",
+    u"150~1",
+    u"154~5",
+    u"202",
+    u"419",
+    u"EU",
+    u"EZ",
+    u"QO",
+    u"UN",
+};
+
+constexpr char16_t RANGE_MARKER = 0x7E; /* '~' */
+void processMacroregionRange(const UnicodeString& regionName, UVector* newMacroRegions, UErrorCode& status) {
+    if (U_FAILURE(status)) { return; }
+    int32_t rangeMarkerLocation = regionName.indexOf(RANGE_MARKER);
+    char16_t buf[6];
+    regionName.extract(buf,6,status);
+    if ( rangeMarkerLocation > 0 ) {
+        char16_t endRange = regionName.charAt(rangeMarkerLocation+1);
+        buf[rangeMarkerLocation] = 0;
+        while ( buf[rangeMarkerLocation-1] <= endRange && U_SUCCESS(status)) {
+            LocalPointer<UnicodeString> newRegion(new UnicodeString(buf), status);
+            newMacroRegions->adoptElement(newRegion.orphan(),status);
+            buf[rangeMarkerLocation-1]++;
+        }
+    } else {
+        LocalPointer<UnicodeString> newRegion(new UnicodeString(regionName), status);
+        newMacroRegions->adoptElement(newRegion.orphan(),status);
+    }
+}
+
+#if U_DEBUG
+UVector* loadMacroregions(UErrorCode &status) {
+    if (U_FAILURE(status)) { return nullptr; }
+    LocalPointer<UVector> newMacroRegions(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status);
+
+    LocalUResourceBundlePointer supplementalData(ures_openDirect(nullptr,"supplementalData",&status));
+    LocalUResourceBundlePointer idValidity(ures_getByKey(supplementalData.getAlias(),"idValidity",nullptr,&status));
+    LocalUResourceBundlePointer regionList(ures_getByKey(idValidity.getAlias(),"region",nullptr,&status));
+    LocalUResourceBundlePointer regionMacro(ures_getByKey(regionList.getAlias(),"macroregion",nullptr,&status));
+
+    if (U_FAILURE(status)) {
+        return nullptr;
+    }
+
+    while (ures_hasNext(regionMacro.getAlias())) {
+        UnicodeString regionName = ures_getNextUnicodeString(regionMacro.getAlias(),nullptr,&status);
+        processMacroregionRange(regionName, newMacroRegions.getAlias(), status);
+        if (U_FAILURE(status)) {
+            return nullptr;
+        }
+    }
+
+    return newMacroRegions.orphan();
+}
+#endif // U_DEBUG
+
+UVector* getStaticMacroregions(UErrorCode &status) {
+    if (U_FAILURE(status)) { return nullptr; }
+    LocalPointer<UVector> newMacroRegions(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status);
+
+    if (U_FAILURE(status)) {
+        return nullptr;
+    }
+
+    for (const auto *region : MACROREGION_HARDCODE) {
+        UnicodeString regionName(region);
+        processMacroregionRange(regionName, newMacroRegions.getAlias(), status);
+        if (U_FAILURE(status)) {
+            return nullptr;
+        }
+    }
+
+    return newMacroRegions.orphan();
+}
+
+}  // namespace
+
+void U_CALLCONV LikelySubtags::initLikelySubtags(UErrorCode &errorCode) {
+    // This function is invoked only via umtx_initOnce().
+    U_ASSERT(gLikelySubtags == nullptr);
+    LikelySubtagsData data(errorCode);
+    data.load(errorCode);
+    if (U_FAILURE(errorCode)) { return; }
+    gLikelySubtags = new LikelySubtags(data);
+    gMacroregions = getStaticMacroregions(errorCode);
+#if U_DEBUG
+    auto macroregionsFromData = loadMacroregions(errorCode);
+    U_ASSERT((*gMacroregions) == (*macroregionsFromData));
+    delete macroregionsFromData;
+#endif
+    if (U_FAILURE(errorCode) || gLikelySubtags == nullptr || gMacroregions == nullptr) {
+        delete gLikelySubtags;
+        delete gMacroregions;
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+
+    ucln_common_registerCleanup(UCLN_COMMON_LIKELY_SUBTAGS, cleanup);
+}
+
+const LikelySubtags *LikelySubtags::getSingleton(UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return nullptr; }
+    umtx_initOnce(gInitOnce, &LikelySubtags::initLikelySubtags, errorCode);
+    return gLikelySubtags;
+}
+
+LikelySubtags::LikelySubtags(LikelySubtagsData &data) :
+        langInfoBundle(data.langInfoBundle),
+        strings(data.strings.orphanCharStrings()),
+        languageAliases(std::move(data.languageAliases)),
+        regionAliases(std::move(data.regionAliases)),
+        trie(data.trieBytes),
+        lsrs(data.lsrs),
+#if U_DEBUG
+        lsrsLength(data.lsrsLength),
+#endif // U_DEBUG
+        distanceData(std::move(data.distanceData)) {
+    data.langInfoBundle = nullptr;
+    data.lsrs = nullptr;
+
+    // Cache the result of looking up language="und" encoded as "*", and "und-Zzzz" ("**").
+    UStringTrieResult result = trie.next(u'*');
+    U_ASSERT(USTRINGTRIE_HAS_NEXT(result));
+    trieUndState = trie.getState64();
+    result = trie.next(u'*');
+    U_ASSERT(USTRINGTRIE_HAS_NEXT(result));
+    trieUndZzzzState = trie.getState64();
+    result = trie.next(u'*');
+    U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
+    defaultLsrIndex = trie.getValue();
+    trie.reset();
+
+    for (char16_t c = u'a'; c <= u'z'; ++c) {
+        result = trie.next(c);
+        if (result == USTRINGTRIE_NO_VALUE) {
+            trieFirstLetterStates[c - u'a'] = trie.getState64();
+        }
+        trie.reset();
+    }
+}
+
+LikelySubtags::~LikelySubtags() {
+    ures_close(langInfoBundle);
+    delete strings;
+    delete[] lsrs;
+}
+
+LSR LikelySubtags::makeMaximizedLsrFrom(const Locale &locale,
+                                         bool returnInputIfUnmatch,
+                                         UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return {}; }
+    if (locale.isBogus()) {
+        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+        return {};
+    }
+    const char *name = locale.getName();
+    if (!returnInputIfUnmatch && uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') {  // name.startsWith("@x=")
+        // Private use language tag x-subtag-subtag... which CLDR changes to
+        // und-x-subtag-subtag...
+        return LSR(name, "", "", LSR::EXPLICIT_LSR);
+    }
+    LSR max = makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
+                            locale.getVariant(), returnInputIfUnmatch, errorCode);
+
+    if (uprv_strlen(max.language) == 0 &&
+        uprv_strlen(max.script) == 0 &&
+        uprv_strlen(max.region) == 0) {
+        // No match. ICU API mandate us to
+        // If the provided ULocale instance is already in the maximal form, or
+        // there is no data available available for maximization, it will be
+        // returned.
+        return LSR(locale.getLanguage(), locale.getScript(), locale.getCountry(), LSR::EXPLICIT_LSR, errorCode);
+    }
+    return max;
+}
+
+namespace {
+
+const char *getCanonical(const CharStringMap &aliases, const char *alias) {
+    const char *canonical = aliases.get(alias);
+    return canonical == nullptr ? alias : canonical;
+}
+
+}  // namespace
+
+LSR LikelySubtags::makeMaximizedLsr(const char *language, const char *script, const char *region,
+                                     const char *variant,
+                                     bool returnInputIfUnmatch,
+                                     UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return {}; }
+    // Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK.
+    // They should match only themselves,
+    // not other locales with what looks like the same language and script subtags.
+    if (!returnInputIfUnmatch) {
+        char c1;
+        if (region[0] == 'X' && (c1 = region[1]) != 0 && region[2] == 0) {
+            switch (c1) {
+            case 'A':
+                return LSR(PSEUDO_ACCENTS_PREFIX, language, script, region,
+                           LSR::EXPLICIT_LSR, errorCode);
+            case 'B':
+                return LSR(PSEUDO_BIDI_PREFIX, language, script, region,
+                           LSR::EXPLICIT_LSR, errorCode);
+            case 'C':
+                return LSR(PSEUDO_CRACKED_PREFIX, language, script, region,
+                           LSR::EXPLICIT_LSR, errorCode);
+            default:  // normal locale
+                break;
+            }
+        }
+
+        if (variant[0] == 'P' && variant[1] == 'S') {
+            int32_t lsrFlags = *region == 0 ?
+                LSR::EXPLICIT_LANGUAGE | LSR::EXPLICIT_SCRIPT : LSR::EXPLICIT_LSR;
+            if (uprv_strcmp(variant, "PSACCENT") == 0) {
+                return LSR(PSEUDO_ACCENTS_PREFIX, language, script,
+                           *region == 0 ? "XA" : region, lsrFlags, errorCode);
+            } else if (uprv_strcmp(variant, "PSBIDI") == 0) {
+                return LSR(PSEUDO_BIDI_PREFIX, language, script,
+                           *region == 0 ? "XB" : region, lsrFlags, errorCode);
+            } else if (uprv_strcmp(variant, "PSCRACK") == 0) {
+                return LSR(PSEUDO_CRACKED_PREFIX, language, script,
+                           *region == 0 ? "XC" : region, lsrFlags, errorCode);
+            }
+            // else normal locale
+        }
+    } // end of if (!returnInputIfUnmatch)
+
+    language = getCanonical(languageAliases, language);
+    // (We have no script mappings.)
+    region = getCanonical(regionAliases, region);
+    return maximize(language, script, region, returnInputIfUnmatch, errorCode);
+}
+
+LSR LikelySubtags::maximize(const char *language, const char *script, const char *region,
+                             bool returnInputIfUnmatch,
+                             UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return {}; }
+    return maximize({language, static_cast<int32_t>(uprv_strlen(language))},
+                    {script, static_cast<int32_t>(uprv_strlen(script))},
+                    {region, static_cast<int32_t>(uprv_strlen(region))},
+                    returnInputIfUnmatch,
+                    errorCode);
+}
+
+bool LikelySubtags::isMacroregion(StringPiece& region, UErrorCode& errorCode) const {
+    if (U_FAILURE(errorCode)) { return false; }
+    // In Java, we use Region class. In C++, since Region is under i18n,
+    // we read the same data used by Region into gMacroregions avoid dependency
+    // from common to i18n/region.cpp
+    umtx_initOnce(gInitOnce, &LikelySubtags::initLikelySubtags, errorCode);
+    if (U_FAILURE(errorCode)) { return false; }
+    UnicodeString str(UnicodeString::fromUTF8(region));
+    return gMacroregions->contains((void *)&str);
+}
+
+LSR LikelySubtags::maximize(StringPiece language, StringPiece script, StringPiece region,
+                             bool returnInputIfUnmatch,
+                             UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return {}; }
+    if (language.compare("und") == 0) {
+        language = "";
+    }
+    if (script.compare("Zzzz") == 0) {
+        script = "";
+    }
+    if (region.compare("ZZ") == 0) {
+        region = "";
+    }
+    if (!script.empty() && !region.empty() && !language.empty()) {
+        return LSR(language, script, region, LSR::EXPLICIT_LSR, errorCode);  // already maximized
+    }
+    bool retainLanguage = false;
+    bool retainScript = false;
+    bool retainRegion = false;
+
+    BytesTrie iter(trie);
+    uint64_t state;
+    int32_t value;
+    // Small optimization: Array lookup for first language letter.
+    int32_t c0;
+    if (0 <= (c0 = uprv_lowerOrdinal(language.data()[0])) && c0 <= 25 &&
+            language.length() >= 2 &&
+            (state = trieFirstLetterStates[c0]) != 0) {
+        value = trieNext(iter.resetToState64(state), language, 1);
+    } else {
+        value = trieNext(iter, language, 0);
+    }
+    bool matchLanguage = (value >= 0);
+    bool matchScript = false;
+    if (value >= 0) {
+        retainLanguage = !language.empty();
+        state = iter.getState64();
+    } else {
+        retainLanguage = true;
+        iter.resetToState64(trieUndState);  // "und" ("*")
+        state = 0;
+    }
+
+    if (value >= 0 && !script.empty()) {
+        matchScript = true;
+    }
+    if (value > 0) {
+        // Intermediate or final value from just language.
+        if (value == SKIP_SCRIPT) {
+            value = 0;
+        }
+        retainScript = !script.empty();
+    } else {
+        value = trieNext(iter, script, 0);
+        if (value >= 0) {
+            retainScript = !script.empty();
+            state = iter.getState64();
+        } else {
+            retainScript = true;
+            if (state == 0) {
+                iter.resetToState64(trieUndZzzzState);  // "und-Zzzz" ("**")
+            } else {
+                iter.resetToState64(state);
+                value = trieNext(iter, "", 0);
+                U_ASSERT(value >= 0);
+                state = iter.getState64();
+            }
+        }
+    }
+
+    bool matchRegion = false;
+    if (value > 0) {
+        // Final value from just language or language+script.
+        retainRegion = !region.empty();
+    } else {
+        value = trieNext(iter, region, 0);
+        if (value >= 0) {
+            if (!region.empty() && !isMacroregion(region, errorCode)) {
+                retainRegion = true;
+                matchRegion = true;
+            }
+        } else {
+            retainRegion = true;
+            if (state == 0) {
+                value = defaultLsrIndex;
+            } else {
+                iter.resetToState64(state);
+                value = trieNext(iter, "", 0);
+                U_ASSERT(value != 0);
+                // For the case of und_Latn
+                if (value < 0) {
+                    retainLanguage = !language.empty();
+                    retainScript = !script.empty();
+                    retainRegion = !region.empty();
+                    // Fallback to und_$region =>
+                    iter.resetToState64(trieUndState);  // "und" ("*")
+                    value = trieNext(iter, "", 0);
+                    U_ASSERT(value == 0);
+                    int64_t trieUndEmptyState = iter.getState64();
+                    value = trieNext(iter, region, 0);
+                    // Fallback to und =>
+                    if (value < 0) {
+                        iter.resetToState64(trieUndEmptyState);
+                        value = trieNext(iter, "", 0);
+                        U_ASSERT(value > 0);
+                    }
+                }
+            }
+        }
+    }
+    U_ASSERT(value < lsrsLength);
+    if (returnInputIfUnmatch &&
+        (!(matchLanguage || matchScript || (matchRegion && language.empty())))) {
+      return LSR("", "", "", LSR::EXPLICIT_LSR, errorCode);  // no matching.
+    }
+    if (language.empty()) {
+        language = StringPiece("und");
+    }
+
+    if (!(retainLanguage || retainScript || retainRegion)) {
+        U_ASSERT(value >= 0);
+        // Quickly return a copy of the lookup-result LSR
+        // without new allocation of the subtags.
+        const LSR &matched = lsrs[value];
+        return LSR(matched.language, matched.script, matched.region, matched.flags);
+    }
+    if (!retainLanguage) {
+        U_ASSERT(value >= 0);
+        language = lsrs[value].language;
+    }
+    if (!retainScript) {
+        U_ASSERT(value >= 0);
+        script = lsrs[value].script;
+    }
+    if (!retainRegion) {
+        U_ASSERT(value >= 0);
+        region = lsrs[value].region;
+    }
+    int32_t retainMask = (retainLanguage ? 4 : 0) + (retainScript ? 2 : 0) + (retainRegion ? 1 : 0);
+    // retainOldMask flags = LSR explicit-subtag flags
+    return LSR(language, script, region, retainMask, errorCode);
+}
+
+int32_t LikelySubtags::compareLikely(const LSR &lsr, const LSR &other, int32_t likelyInfo) const {
+    // If likelyInfo >= 0:
+    // likelyInfo bit 1 is set if the previous comparison with lsr
+    // was for equal language and script.
+    // Otherwise the scripts differed.
+    if (uprv_strcmp(lsr.language, other.language) != 0) {
+        return 0xfffffffc;  // negative, lsr not better than other
+    }
+    if (uprv_strcmp(lsr.script, other.script) != 0) {
+        int32_t index;
+        if (likelyInfo >= 0 && (likelyInfo & 2) == 0) {
+            index = likelyInfo >> 2;
+        } else {
+            index = getLikelyIndex(lsr.language, "");
+            likelyInfo = index << 2;
+        }
+        const LSR &likely = lsrs[index];
+        if (uprv_strcmp(lsr.script, likely.script) == 0) {
+            return likelyInfo | 1;
+        } else {
+            return likelyInfo & ~1;
+        }
+    }
+    if (uprv_strcmp(lsr.region, other.region) != 0) {
+        int32_t index;
+        if (likelyInfo >= 0 && (likelyInfo & 2) != 0) {
+            index = likelyInfo >> 2;
+        } else {
+            index = getLikelyIndex(lsr.language, lsr.region);
+            likelyInfo = (index << 2) | 2;
+        }
+        const LSR &likely = lsrs[index];
+        if (uprv_strcmp(lsr.region, likely.region) == 0) {
+            return likelyInfo | 1;
+        } else {
+            return likelyInfo & ~1;
+        }
+    }
+    return likelyInfo & ~1;  // lsr not better than other
+}
+
+// Subset of maximize().
+int32_t LikelySubtags::getLikelyIndex(const char *language, const char *script) const {
+    if (uprv_strcmp(language, "und") == 0) {
+        language = "";
+    }
+    if (uprv_strcmp(script, "Zzzz") == 0) {
+        script = "";
+    }
+
+    BytesTrie iter(trie);
+    uint64_t state;
+    int32_t value;
+    // Small optimization: Array lookup for first language letter.
+    int32_t c0;
+    if (0 <= (c0 = uprv_lowerOrdinal(language[0])) && c0 <= 25 &&
+            language[1] != 0 &&  // language.length() >= 2
+            (state = trieFirstLetterStates[c0]) != 0) {
+        value = trieNext(iter.resetToState64(state), language, 1);
+    } else {
+        value = trieNext(iter, language, 0);
+    }
+    if (value >= 0) {
+        state = iter.getState64();
+    } else {
+        iter.resetToState64(trieUndState);  // "und" ("*")
+        state = 0;
+    }
+
+    if (value > 0) {
+        // Intermediate or final value from just language.
+        if (value == SKIP_SCRIPT) {
+            value = 0;
+        }
+    } else {
+        value = trieNext(iter, script, 0);
+        if (value >= 0) {
+            state = iter.getState64();
+        } else {
+            if (state == 0) {
+                iter.resetToState64(trieUndZzzzState);  // "und-Zzzz" ("**")
+            } else {
+                iter.resetToState64(state);
+                value = trieNext(iter, "", 0);
+                U_ASSERT(value >= 0);
+                state = iter.getState64();
+            }
+        }
+    }
+
+    if (value > 0) {
+        // Final value from just language or language+script.
+    } else {
+        value = trieNext(iter, "", 0);
+        U_ASSERT(value > 0);
+    }
+    U_ASSERT(value < lsrsLength);
+    return value;
+}
+
+int32_t LikelySubtags::trieNext(BytesTrie &iter, const char *s, int32_t i) {
+    UStringTrieResult result;
+    uint8_t c;
+    if ((c = s[i]) == 0) {
+        result = iter.next(u'*');
+    } else {
+        for (;;) {
+            c = uprv_invCharToAscii(c);
+            // EBCDIC: If s[i] is not an invariant character,
+            // then c is now 0 and will simply not match anything, which is harmless.
+            uint8_t next = s[++i];
+            if (next != 0) {
+                if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) {
+                    return -1;
+                }
+            } else {
+                // last character of this subtag
+                result = iter.next(c | 0x80);
+                break;
+            }
+            c = next;
+        }
+    }
+    switch (result) {
+    case USTRINGTRIE_NO_MATCH: return -1;
+    case USTRINGTRIE_NO_VALUE: return 0;
+    case USTRINGTRIE_INTERMEDIATE_VALUE:
+        U_ASSERT(iter.getValue() == SKIP_SCRIPT);
+        return SKIP_SCRIPT;
+    case USTRINGTRIE_FINAL_VALUE: return iter.getValue();
+    default: return -1;
+    }
+}
+int32_t LikelySubtags::trieNext(BytesTrie &iter, StringPiece s, int32_t i) {
+    UStringTrieResult result;
+    uint8_t c;
+    if (s.length() == i) {
+        result = iter.next(u'*');
+    } else {
+        c = s.data()[i];
+        for (;;) {
+            c = uprv_invCharToAscii(c);
+            // EBCDIC: If s[i] is not an invariant character,
+            // then c is now 0 and will simply not match anything, which is harmless.
+            if (i+1 != s.length()) {
+                if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) {
+                    return -1;
+                }
+                c = s.data()[++i];
+            } else {
+                // last character of this subtag
+                result = iter.next(c | 0x80);
+                break;
+            }
+        }
+    }
+    switch (result) {
+    case USTRINGTRIE_NO_MATCH: return -1;
+    case USTRINGTRIE_NO_VALUE: return 0;
+    case USTRINGTRIE_INTERMEDIATE_VALUE:
+        U_ASSERT(iter.getValue() == SKIP_SCRIPT);
+        return SKIP_SCRIPT;
+    case USTRINGTRIE_FINAL_VALUE: return iter.getValue();
+    default: return -1;
+    }
+}
+
+LSR LikelySubtags::minimizeSubtags(StringPiece language, StringPiece script,
+                                    StringPiece region,
+                                    bool favorScript,
+                                    UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return {}; }
+    LSR max = maximize(language, script, region, true, errorCode);
+    if (U_FAILURE(errorCode)) { return {}; }
+    // If no match, return it.
+    if (uprv_strlen(max.language) == 0 &&
+        uprv_strlen(max.script) == 0 &&
+        uprv_strlen(max.region) == 0) {
+        // No match. ICU API mandate us to
+        // "If this Locale is already in the minimal form, or not valid, or
+        // there is no data available for minimization, the Locale will be
+        // unchanged."
+        return LSR(language, script, region, LSR::EXPLICIT_LSR, errorCode);
+    }
+    // try language
+    LSR test = maximize(max.language, "", "", true, errorCode);
+    if (U_FAILURE(errorCode)) { return {}; }
+    if (test.isEquivalentTo(max)) {
+        return LSR(max.language, "", "", LSR::DONT_CARE_FLAGS, errorCode);
+    }
+
+    if (!favorScript) {
+        // favor Region
+        // try language and region
+        test = maximize(max.language, "", max.region, true, errorCode);
+        if (U_FAILURE(errorCode)) { return {}; }
+        if (test.isEquivalentTo(max)) {
+            return LSR(max.language, "", max.region, LSR::DONT_CARE_FLAGS, errorCode);
+        }
+    }
+    // try language and script
+    test = maximize(max.language, max.script, "", true, errorCode);
+    if (U_FAILURE(errorCode)) { return {}; }
+    if (test.isEquivalentTo(max)) {
+        return LSR(max.language, max.script, "", LSR::DONT_CARE_FLAGS, errorCode);
+    }
+    if (favorScript) {
+        // try language and region
+        test = maximize(max.language, "", max.region, true, errorCode);
+        if (U_FAILURE(errorCode)) { return {}; }
+        if (test.isEquivalentTo(max)) {
+            return LSR(max.language, "", max.region, LSR::DONT_CARE_FLAGS, errorCode);
+        }
+    }
+    return LSR(max.language, max.script, max.region, LSR::DONT_CARE_FLAGS, errorCode);
+}
+
+U_NAMESPACE_END
@@ -0,0 +1,128 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// loclikelysubtags.h
+// created: 2019may08 Markus W. Scherer
+
+#ifndef __LOCLIKELYSUBTAGS_H__
+#define __LOCLIKELYSUBTAGS_H__
+
+#include <utility>
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/locid.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+#include "unicode/ures.h"
+#include "charstrmap.h"
+#include "lsr.h"
+
+U_NAMESPACE_BEGIN
+
+struct LikelySubtagsData;
+
+struct LocaleDistanceData {
+    LocaleDistanceData() = default;
+    LocaleDistanceData(LocaleDistanceData &&data);
+    ~LocaleDistanceData();
+
+    const uint8_t *distanceTrieBytes = nullptr;
+    const uint8_t *regionToPartitions = nullptr;
+    const char **partitions = nullptr;
+    const LSR *paradigms = nullptr;
+    int32_t paradigmsLength = 0;
+    const int32_t *distances = nullptr;
+
+private:
+    LocaleDistanceData &operator=(const LocaleDistanceData &) = delete;
+};
+
+class LikelySubtags final : public UMemory {
+public:
+    ~LikelySubtags();
+
+    static constexpr int32_t SKIP_SCRIPT = 1;
+
+    // VisibleForTesting
+    static const LikelySubtags *getSingleton(UErrorCode &errorCode);
+
+    // VisibleForTesting
+    LSR makeMaximizedLsrFrom(const Locale &locale,
+                             bool returnInputIfUnmatch,
+                             UErrorCode &errorCode) const;
+
+    /**
+     * Tests whether lsr is "more likely" than other.
+     * For example, fr-Latn-FR is more likely than fr-Latn-CH because
+     * FR is the default region for fr-Latn.
+     *
+     * The likelyInfo caches lookup information between calls.
+     * The return value is an updated likelyInfo value,
+     * with bit 0 set if lsr is "more likely".
+     * The initial value of likelyInfo must be negative.
+     */
+    int32_t compareLikely(const LSR &lsr, const LSR &other, int32_t likelyInfo) const;
+
+    LSR minimizeSubtags(StringPiece language, StringPiece script, StringPiece region,
+                        bool favorScript,
+                        UErrorCode &errorCode) const;
+
+    // visible for LocaleDistance
+    const LocaleDistanceData &getDistanceData() const { return distanceData; }
+
+private:
+    LikelySubtags(LikelySubtagsData &data);
+    LikelySubtags(const LikelySubtags &other) = delete;
+    LikelySubtags &operator=(const LikelySubtags &other) = delete;
+
+    static void initLikelySubtags(UErrorCode &errorCode);
+
+    LSR makeMaximizedLsr(const char *language, const char *script, const char *region,
+                         const char *variant,
+                         bool returnInputIfUnmatch,
+                         UErrorCode &errorCode) const;
+
+    /**
+     * Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
+     */
+    LSR maximize(const char *language, const char *script, const char *region,
+                 bool returnInputIfUnmatch,
+                 UErrorCode &errorCode) const;
+    LSR maximize(StringPiece language, StringPiece script, StringPiece region,
+                 bool returnInputIfUnmatch,
+                 UErrorCode &errorCode) const;
+
+    int32_t getLikelyIndex(const char *language, const char *script) const;
+    bool isMacroregion(StringPiece& region, UErrorCode &errorCode) const;
+
+    static int32_t trieNext(BytesTrie &iter, const char *s, int32_t i);
+    static int32_t trieNext(BytesTrie &iter, StringPiece s, int32_t i);
+
+    UResourceBundle *langInfoBundle;
+    // We could store the strings by value, except that if there were few enough strings,
+    // moving the contents could copy it to a different array,
+    // invalidating the pointers stored in the maps.
+    CharString *strings;
+    CharStringMap languageAliases;
+    CharStringMap regionAliases;
+
+    // The trie maps each lang+script+region (encoded in ASCII) to an index into lsrs.
+    // There is also a trie value for each intermediate lang and lang+script.
+    // '*' is used instead of "und", "Zzzz"/"" and "ZZ"/"".
+    BytesTrie trie;
+    uint64_t trieUndState;
+    uint64_t trieUndZzzzState;
+    int32_t defaultLsrIndex;
+    uint64_t trieFirstLetterStates[26];
+    const LSR *lsrs;
+#if U_DEBUG
+    int32_t lsrsLength;
+#endif
+
+    // distance/matcher data: see comment in LikelySubtagsData::load()
+    LocaleDistanceData distanceData;
+};
+
+U_NAMESPACE_END
+
+#endif  // __LOCLIKELYSUBTAGS_H__
@@ -28,22 +28,15 @@
 */

 #include "locmap.h"
+#include "charstr.h"
 #include "cstring.h"
 #include "cmemory.h"
+#include "ulocimp.h"
 #include "unicode/uloc.h"

-#if U_PLATFORM == U_PF_WINDOWS && defined(_MSC_VER) && (_MSC_VER >= 1500)
-/*
- * TODO: It seems like we should widen this to
- * either U_PLATFORM_USES_ONLY_WIN32_API (includes MinGW)
- * or U_PLATFORM_HAS_WIN32_API (includes MinGW and Cygwin)
- * but those use gcc and won't have defined(_MSC_VER).
- * We might need to #include some Windows header and test for some version macro from there.
- * Or call some Windows function and see what it returns.
- */
-#define USE_WINDOWS_LCID_MAPPING_API
+#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
 #include <windows.h>
-#include <winnls.h>
+#include <winnls.h> // LCIDToLocaleName and LocaleNameToLCID
 #endif

 /*
@@ -55,6 +48,8 @@
 * [MS-LCID] Windows Language Code Identifier (LCID) Reference
 */

+namespace {
+
 /*
 ////////////////////////////////////////////////
 //
@@ -93,7 +88,7 @@ typedef struct ILcidPosixMap
 * @param posixID posix ID of the language_TERRITORY such as 'de_CH'
 */
 #define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
-static const ILcidPosixElement locmap_ ## languageID [] = { \
+constexpr ILcidPosixElement locmap_ ## languageID [] = { \
    {LANGUAGE_LCID(hostID), #languageID},     /* parent locale */ \
    {hostID, #posixID}, \
 };
@@ -103,7 +98,7 @@ static const ILcidPosixElement locmap_ ## languageID [] = { \
 * @param id the POSIX ID, either a language or language_TERRITORY
 */
 #define ILCID_POSIX_SUBTABLE(id) \
-static const ILcidPosixElement locmap_ ## id [] =
+constexpr ILcidPosixElement locmap_ ## id [] =


 /**
@@ -802,7 +797,7 @@ ILCID_POSIX_SUBTABLE(zh) {
 ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)

 /* This must be static and grouped by LCID. */
-static const ILcidPosixMap gPosixIDmap[] = {
+constexpr ILcidPosixMap gPosixIDmap[] = {
    ILCID_POSIX_MAP(af),    /*  af  Afrikaans                 0x36 */
    ILCID_POSIX_MAP(am),    /*  am  Amharic                   0x5e */
    ILCID_POSIX_MAP(ar),    /*  ar  Arabic                    0x01 */
@@ -951,14 +946,14 @@ static const ILcidPosixMap gPosixIDmap[] = {
    ILCID_POSIX_MAP(zu),    /*  zu  Zulu                      0x35 */
 };

-static const uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);
+constexpr uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);

 /**
 * Do not call this function. It is called by hostID.
 * The function is not private because this struct must stay as a C struct,
 * and this is an internal class.
 */
-static int32_t
+int32_t
 idCmp(const char* id1, const char* id2)
 {
    int32_t diffIdx = 0;
@@ -973,17 +968,18 @@ idCmp(const char* id1, const char* id2)
 /**
 * Searches for a Windows LCID
 *
- * @param posixid the Posix style locale id.
+ * @param posixID the Posix style locale id.
 * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
 *               no equivalent Windows LCID.
 * @return the LCID
 */
-static uint32_t
-getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
+uint32_t
+getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode& status)
 {
+    if (U_FAILURE(status)) { return locmap_root->hostID; }
    int32_t bestIdx = 0;
    int32_t bestIdxDiff = 0;
-    int32_t posixIDlen = (int32_t)uprv_strlen(posixID);
+    int32_t posixIDlen = static_cast<int32_t>(uprv_strlen(posixID));
    uint32_t idx;

    for (idx = 0; idx < this_0->numRegions; idx++ ) {
@@ -1002,16 +998,16 @@ getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
    if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
        && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
    {
-        *status = U_USING_FALLBACK_WARNING;
+        status = U_USING_FALLBACK_WARNING;
        return this_0->regionMaps[bestIdx].hostID;
    }

    /*no match found */
-    *status = U_ILLEGAL_ARGUMENT_ERROR;
-    return this_0->regionMaps->hostID;
+    status = U_ILLEGAL_ARGUMENT_ERROR;
+    return locmap_root->hostID;
 }

-static const char*
+const char*
 getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
 {
    uint32_t i;
@@ -1035,34 +1031,39 @@ getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
 //
 /////////////////////////////////////
 */
-#ifdef USE_WINDOWS_LCID_MAPPING_API
+#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
 /*
 * Various language tags needs to be changed:
 * quz -> qu
 * prs -> fa
 */
-#define FIX_LANGUAGE_ID_TAG(buffer, len) \
-    if (len >= 3) { \
-        if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\
-            buffer[2] = 0; \
-            uprv_strcat(buffer, buffer+3); \
-        } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\
-            buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \
-            uprv_strcat(buffer, buffer+3); \
-        } \
+void FIX_LANGUAGE_ID_TAG(char* buffer, int32_t len) {
+    if (len >= 3) {
+        if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {
+            buffer[2] = 0;
+            uprv_strcat(buffer, buffer+3);
+        } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {
+            buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0;
+            uprv_strcat(buffer, buffer+3);
+        }
    }
-
+}
 #endif
+
+}  // namespace
+
 U_CAPI int32_t
 uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status)
 {
    uint16_t langID;
    uint32_t localeIndex;
-    UBool bLookup = TRUE;
-    const char *pPosixID = NULL;
+    UBool bLookup = true;
+    const char *pPosixID = nullptr;

-#ifdef USE_WINDOWS_LCID_MAPPING_API
-    char locName[LOCALE_NAME_MAX_LENGTH] = {};      // ICU name can't be longer than Windows name
+#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
+    static_assert(ULOC_FULLNAME_CAPACITY > LOCALE_NAME_MAX_LENGTH, "Windows locale names have smaller length than ICU locale names.");
+
+    char locName[LOCALE_NAME_MAX_LENGTH] = {};

    // Note: Windows primary lang ID 0x92 in LCID is used for Central Kurdish and
    // GetLocaleInfo() maps such LCID to "ku". However, CLDR uses "ku" for
@@ -1070,14 +1071,14 @@ uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UEr
    // use the Windows API to resolve locale ID for this specific case.
    if ((hostid & 0x3FF) != 0x92) {
        int32_t tmpLen = 0;
-        UChar windowsLocaleName[LOCALE_NAME_MAX_LENGTH];  // ULOC_FULLNAME_CAPACITY > LOCALE_NAME_MAX_LENGTH
+        char16_t windowsLocaleName[LOCALE_NAME_MAX_LENGTH] = {};

        // Note: LOCALE_ALLOW_NEUTRAL_NAMES was enabled in Windows7+, prior versions did not handle neutral (no-region) locale names.
        tmpLen = LCIDToLocaleName(hostid, (PWSTR)windowsLocaleName, UPRV_LENGTHOF(windowsLocaleName), LOCALE_ALLOW_NEUTRAL_NAMES);
        if (tmpLen > 1) {
            int32_t i = 0;
            // Only need to look up in table if have _, eg for de-de_phoneb type alternate sort.
-            bLookup = FALSE;
+            bLookup = false;
            for (i = 0; i < UPRV_LENGTHOF(locName); i++)
            {
                locName[i] = (char)(windowsLocaleName[i]);
@@ -1091,7 +1092,7 @@ uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UEr
                    // TODO: Should these be mapped from _phoneb to @collation=phonebook, etc.?
                    locName[i] = '\0';
                    tmpLen = i;
-                    bLookup = TRUE;
+                    bLookup = true;
                    break;
                }
                else if (windowsLocaleName[i] == L'-')
@@ -1110,10 +1111,10 @@ uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UEr
            pPosixID = locName;
        }
    }
-#endif // USE_WINDOWS_LCID_MAPPING_API
+#endif

    if (bLookup) {
-        const char *pCandidate = NULL;
+        const char *pCandidate = nullptr;
        langID = LANGUAGE_LCID(hostid);

        for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) {
@@ -1126,7 +1127,7 @@ uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UEr
        /* On Windows, when locale name has a variant, we still look up the hardcoded table.
           If a match in the hardcoded table is longer than the Windows locale name without
           variant, we use the one as the result */
-        if (pCandidate && (pPosixID == NULL || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) {
+        if (pCandidate && (pPosixID == nullptr || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) {
            pPosixID = pCandidate;
        }
    }
@@ -1150,7 +1151,7 @@ uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UEr

    /* no match found */
    *status = U_ILLEGAL_ARGUMENT_ERROR;
-    return -1;
+    return 0;
 }

 /*
@@ -1163,30 +1164,24 @@ uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UEr
 /////////////////////////////////////
 */
 U_CAPI uint32_t
-uprv_convertToLCIDPlatform(const char* localeID)
+uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status)
 {
-    // The purpose of this function is to leverage native platform name->lcid
+    if (U_FAILURE(*status)) {
+        return 0;
+    }
+
+    // The purpose of this function is to leverage the Windows platform name->lcid
    // conversion functionality when available.
-#ifdef USE_WINDOWS_LCID_MAPPING_API
-    DWORD nameLCIDFlags = 0;
-    UErrorCode myStatus = U_ZERO_ERROR;
-
-    // First check for a Windows name->LCID match, fall through to catch
-    // ICU special cases, but Windows may know it already.
-#if LOCALE_ALLOW_NEUTRAL_NAMES
-    nameLCIDFlags = LOCALE_ALLOW_NEUTRAL_NAMES;
-#endif /* LOCALE_ALLOW_NEUTRAL_NAMES */
-
+#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
    int32_t len;
-    char collVal[ULOC_KEYWORDS_CAPACITY] = {};
-    char baseName[ULOC_FULLNAME_CAPACITY] = {};
+    icu::CharString baseName;
    const char * mylocaleID = localeID;

    // Check any for keywords.
    if (uprv_strchr(localeID, '@'))
    {
-        len = uloc_getKeywordValue(localeID, "collation", collVal, UPRV_LENGTHOF(collVal) - 1, &myStatus);
-        if (U_SUCCESS(myStatus) && len > 0)
+        icu::CharString collVal = ulocimp_getKeywordValue(localeID, "collation", *status);
+        if (U_SUCCESS(*status) && !collVal.isEmpty())
        {
            // If it contains the keyword collation, return 0 so that the LCID lookup table will be used.
            return 0;
@@ -1194,21 +1189,18 @@ uprv_convertToLCIDPlatform(const char* localeID)
        else
        {
            // If the locale ID contains keywords other than collation, just use the base name.
-            len = uloc_getBaseName(localeID, baseName, UPRV_LENGTHOF(baseName) - 1, &myStatus);
-
-            if (U_SUCCESS(myStatus) && len > 0)
+            baseName = ulocimp_getBaseName(localeID, *status);
+            if (U_SUCCESS(*status) && !baseName.isEmpty())
            {
-                baseName[len] = 0;
-                mylocaleID = baseName;
+                mylocaleID = baseName.data();
            }
        }
    }

-    char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {};
    // this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form
-    (void)uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), FALSE, &myStatus);
+    icu::CharString asciiBCP47Tag = ulocimp_toLanguageTag(mylocaleID, false, *status);

-    if (U_SUCCESS(myStatus))
+    if (U_SUCCESS(*status))
    {
        // Need it to be UTF-16, not 8-bit
        wchar_t bcp47Tag[LOCALE_NAME_MAX_LENGTH] = {};
@@ -1230,7 +1222,7 @@ uprv_convertToLCIDPlatform(const char* localeID)
        {
            // Ensure it's null terminated
            bcp47Tag[i] = L'\0';
-            LCID lcid = LocaleNameToLCID(bcp47Tag, nameLCIDFlags);
+            LCID lcid = LocaleNameToLCID(bcp47Tag, LOCALE_ALLOW_NEUTRAL_NAMES);
            if (lcid > 0)
            {
                // Found LCID from windows, return that one, unless its completely ambiguous
@@ -1244,16 +1236,24 @@ uprv_convertToLCIDPlatform(const char* localeID)
        }
    }
 #else
-    (void)localeID; // Suppress unused variable warning.
-#endif /* USE_WINDOWS_LCID_MAPPING_API */
+    (void) localeID; // Suppress unused variable warning.
+#endif

-    // No found, or not implemented on platforms without native name->lcid conversion
+    // Nothing found, or not implemented.
    return 0;
 }

 U_CAPI uint32_t
 uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
 {
+    if (U_FAILURE(*status) ||
+            langID == nullptr ||
+            posixID == nullptr ||
+            uprv_strlen(langID) < 2 ||
+            uprv_strlen(posixID) < 2) {
+        return locmap_root->hostID;
+    }
+
    // This function does the table lookup when native platform name->lcid conversion isn't available,
    // or for locales that don't follow patterns the platform expects.
    uint32_t   low    = 0;
@@ -1267,11 +1267,6 @@ uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
    UErrorCode myStatus;
    uint32_t   idx;

-    /* Check for incomplete id. */
-    if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) {
-        return 0;
-    }
-
    /*Binary search for the map entry for normal cases */

    while (high > low)  /*binary search*/{
@@ -1289,7 +1284,7 @@ uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
            low = mid;
        }
        else /*we found it*/{
-            return getHostID(&gPosixIDmap[mid], posixID, status);
+            return getHostID(&gPosixIDmap[mid], posixID, *status);
        }
        oldmid = mid;
    }
@@ -1300,7 +1295,7 @@ uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
     */
    for (idx = 0; idx < gLocaleCount; idx++ ) {
        myStatus = U_ZERO_ERROR;
-        value = getHostID(&gPosixIDmap[idx], posixID, &myStatus);
+        value = getHostID(&gPosixIDmap[idx], posixID, myStatus);
        if (myStatus == U_ZERO_ERROR) {
            return value;
        }
@@ -1316,5 +1311,5 @@ uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)

    /* no match found */
    *status = U_ILLEGAL_ARGUMENT_ERROR;
-    return 0;   /* return international (root) */
+    return locmap_root->hostID;   /* return international (root) */
 }
@@ -33,8 +33,8 @@
 U_CAPI int32_t uprv_convertToPosix(uint32_t hostid, char* posixID, int32_t posixIDCapacity, UErrorCode* status);

 /* Don't call these functions directly. Use uloc_getLCID instead. */
-U_CAPI uint32_t uprv_convertToLCIDPlatform(const char *localeID);   // Leverage platform conversion if possible
-U_CAPI uint32_t uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status);
+U_CAPI uint32_t uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status); // Leverage platform conversion if possible
+U_CAPI uint32_t uprv_convertToLCID(const char* langID, const char* posixID, UErrorCode* status);

 #endif /* LOCMAP_H */

@@ -24,6 +24,7 @@
 #include "unicode/putil.h"
 #include "unicode/uloc.h"
 #include "unicode/ures.h"
+#include "charstr.h"
 #include "cstring.h"
 #include "ulocimp.h"
 #include "uresimp.h"
@@ -41,30 +42,29 @@
 * default locale because that would result in a mix of languages that is
 * unpredictable to the programmer and most likely useless.
 */
-U_CAPI const UChar * U_EXPORT2
+U_CAPI const char16_t * U_EXPORT2
 uloc_getTableStringWithFallback(const char *path, const char *locale,
                              const char *tableKey, const char *subTableKey,
                              const char *itemKey,
                              int32_t *pLength,
                              UErrorCode *pErrorCode)
 {
+    if (U_FAILURE(*pErrorCode)) { return nullptr; }
 /*    char localeBuffer[ULOC_FULLNAME_CAPACITY*4];*/
-    UResourceBundle *rb=NULL, table, subTable;
-    const UChar *item=NULL;
+    const char16_t *item=nullptr;
    UErrorCode errorCode;
-    char explicitFallbackName[ULOC_FULLNAME_CAPACITY] = {0};

    /*
     * open the bundle for the current locale
     * this falls back through the locale's chain to root
     */
    errorCode=U_ZERO_ERROR;
-    rb=ures_open(path, locale, &errorCode);
+    icu::LocalUResourceBundlePointer rb(ures_open(path, locale, &errorCode));

    if(U_FAILURE(errorCode)) {
        /* total failure, not even root could be opened */
        *pErrorCode=errorCode;
-        return NULL;
+        return nullptr;
    } else if(errorCode==U_USING_DEFAULT_WARNING ||
                (errorCode==U_USING_FALLBACK_WARNING && *pErrorCode!=U_USING_DEFAULT_WARNING)
    ) {
@@ -73,26 +73,26 @@ uloc_getTableStringWithFallback(const char *path, const char *locale,
    }

    for(;;){
-        ures_initStackObject(&table);
-        ures_initStackObject(&subTable);
-        ures_getByKeyWithFallback(rb, tableKey, &table, &errorCode);
+        icu::StackUResourceBundle table;
+        icu::StackUResourceBundle subTable;
+        ures_getByKeyWithFallback(rb.getAlias(), tableKey, table.getAlias(), &errorCode);

-        if (subTableKey != NULL) {
+        if (subTableKey != nullptr) {
            /*
-            ures_getByKeyWithFallback(&table,subTableKey, &subTable, &errorCode);
-            item = ures_getStringByKeyWithFallback(&subTable, itemKey, pLength, &errorCode);
+            ures_getByKeyWithFallback(table.getAlias(), subTableKey, subTable.getAlias(), &errorCode);
+            item = ures_getStringByKeyWithFallback(subTable.getAlias(), itemKey, pLength, &errorCode);
            if(U_FAILURE(errorCode)){
                *pErrorCode = errorCode;
            }
            
            break;*/
            
-            ures_getByKeyWithFallback(&table,subTableKey, &table, &errorCode);
+            ures_getByKeyWithFallback(table.getAlias(), subTableKey, table.getAlias(), &errorCode);
        }
        if(U_SUCCESS(errorCode)){
-            item = ures_getStringByKeyWithFallback(&table, itemKey, pLength, &errorCode);
+            item = ures_getStringByKeyWithFallback(table.getAlias(), itemKey, pLength, &errorCode);
            if(U_FAILURE(errorCode)){
-                const char* replacement = NULL;
+                const char* replacement = nullptr;
                *pErrorCode = errorCode; /*save the errorCode*/
                errorCode = U_ZERO_ERROR;
                /* may be a deprecated code */
@@ -102,8 +102,8 @@ uloc_getTableStringWithFallback(const char *path, const char *locale,
                    replacement =  uloc_getCurrentLanguageID(itemKey);
                }
                /*pointer comparison is ok since uloc_getCurrentCountryID & uloc_getCurrentLanguageID return the key itself is replacement is not found*/
-                if(replacement!=NULL && itemKey != replacement){
-                    item = ures_getStringByKeyWithFallback(&table, replacement, pLength, &errorCode);
+                if(replacement!=nullptr && itemKey != replacement){
+                    item = ures_getStringByKeyWithFallback(table.getAlias(), replacement, pLength, &errorCode);
                    if(U_SUCCESS(errorCode)){
                        *pErrorCode = errorCode;
                        break;
@@ -118,25 +118,25 @@ uloc_getTableStringWithFallback(const char *path, const char *locale,

            /* still can't figure out ?.. try the fallback mechanism */
            int32_t len = 0;
-            const UChar* fallbackLocale =  NULL;
+            const char16_t* fallbackLocale =  nullptr;
            *pErrorCode = errorCode;
            errorCode = U_ZERO_ERROR;

-            fallbackLocale = ures_getStringByKeyWithFallback(&table, "Fallback", &len, &errorCode);
+            fallbackLocale = ures_getStringByKeyWithFallback(table.getAlias(), "Fallback", &len, &errorCode);
            if(U_FAILURE(errorCode)){
               *pErrorCode = errorCode;
                break;
            }
-            
-            u_UCharsToChars(fallbackLocale, explicitFallbackName, len);
-            
+
+            icu::CharString explicitFallbackName;
+            explicitFallbackName.appendInvariantChars(fallbackLocale, len, errorCode);
+
            /* guard against recursive fallback */
-            if(uprv_strcmp(explicitFallbackName, locale)==0){
+            if (explicitFallbackName == locale) {
                *pErrorCode = U_INTERNAL_PROGRAM_ERROR;
                break;
            }
-            ures_close(rb);
-            rb = ures_open(path, explicitFallbackName, &errorCode);
+            rb.adoptInstead(ures_open(path, explicitFallbackName.data(), &errorCode));
            if(U_FAILURE(errorCode)){
                *pErrorCode = errorCode;
                break;
@@ -146,68 +146,72 @@ uloc_getTableStringWithFallback(const char *path, const char *locale,
            break;
        }
    }
-    /* done with the locale string - ready to close table and rb */
-    ures_close(&subTable);
-    ures_close(&table);
-    ures_close(rb);
+
    return item;
 }

-static ULayoutType
+namespace {
+
+ULayoutType
 _uloc_getOrientationHelper(const char* localeId,
                           const char* key,
-                           UErrorCode *status)
+                           UErrorCode& status)
 {
    ULayoutType result = ULOC_LAYOUT_UNKNOWN;

-    if (!U_FAILURE(*status)) {
-        int32_t length = 0;
-        char localeBuffer[ULOC_FULLNAME_CAPACITY];
+    if (U_FAILURE(status)) { return result; }

-        uloc_canonicalize(localeId, localeBuffer, sizeof(localeBuffer), status);
+    if (localeId == nullptr) {
+        localeId = uloc_getDefault();
+    }
+    icu::CharString localeBuffer = ulocimp_canonicalize(localeId, status);

-        if (!U_FAILURE(*status)) {
-            const UChar* const value =
-                uloc_getTableStringWithFallback(
-                    NULL,
-                    localeBuffer,
-                    "layout",
-                    NULL,
-                    key,
-                    &length,
-                    status);
+    if (U_FAILURE(status)) { return result; }

-            if (!U_FAILURE(*status) && length != 0) {
-                switch(value[0])
-                {
-                case 0x0062: /* 'b' */
-                    result = ULOC_LAYOUT_BTT;
-                    break;
-                case 0x006C: /* 'l' */
-                    result = ULOC_LAYOUT_LTR;
-                    break;
-                case 0x0072: /* 'r' */
-                    result = ULOC_LAYOUT_RTL;
-                    break;
-                case 0x0074: /* 't' */
-                    result = ULOC_LAYOUT_TTB;
-                    break;
-                default:
-                    *status = U_INTERNAL_PROGRAM_ERROR;
-                    break;
-                }
-            }
+    int32_t length = 0;
+    const char16_t* const value =
+        uloc_getTableStringWithFallback(
+            nullptr,
+            localeBuffer.data(),
+            "layout",
+            nullptr,
+            key,
+            &length,
+            &status);
+
+    if (U_FAILURE(status)) { return result; }
+
+    if (length != 0) {
+        switch(value[0])
+        {
+        case 0x0062: /* 'b' */
+            result = ULOC_LAYOUT_BTT;
+            break;
+        case 0x006C: /* 'l' */
+            result = ULOC_LAYOUT_LTR;
+            break;
+        case 0x0072: /* 'r' */
+            result = ULOC_LAYOUT_RTL;
+            break;
+        case 0x0074: /* 't' */
+            result = ULOC_LAYOUT_TTB;
+            break;
+        default:
+            status = U_INTERNAL_PROGRAM_ERROR;
+            break;
        }
    }

    return result;
 }

+}  // namespace
+
 U_CAPI ULayoutType U_EXPORT2
 uloc_getCharacterOrientation(const char* localeId,
                             UErrorCode *status)
 {
-    return _uloc_getOrientationHelper(localeId, "characters", status);
+    return _uloc_getOrientationHelper(localeId, "characters", *status);
 }

 /**
@@ -221,5 +225,5 @@ U_CAPI ULayoutType U_EXPORT2
 uloc_getLineOrientation(const char* localeId,
                        UErrorCode *status)
 {
-    return _uloc_getOrientationHelper(localeId, "lines", status);
+    return _uloc_getOrientationHelper(localeId, "lines", *status);
 }
@@ -11,6 +11,7 @@
 #if !UCONFIG_NO_SERVICE || !UCONFIG_NO_TRANSLITERATION

 #include "unicode/resbund.h"
+#include "unicode/uenum.h"
 #include "cmemory.h"
 #include "ustrfmt.h"
 #include "locutil.h"
@@ -20,12 +21,12 @@
 #include "umutex.h"

 // see LocaleUtility::getAvailableLocaleNames
-static icu::UInitOnce   LocaleUtilityInitOnce = U_INITONCE_INITIALIZER;
-static icu::Hashtable * LocaleUtility_cache = NULL;
+static icu::UInitOnce   LocaleUtilityInitOnce {};
+static icu::Hashtable * LocaleUtility_cache = nullptr;

-#define UNDERSCORE_CHAR ((UChar)0x005f)
-#define AT_SIGN_CHAR    ((UChar)64)
-#define PERIOD_CHAR     ((UChar)46)
+#define UNDERSCORE_CHAR ((char16_t)0x005f)
+#define AT_SIGN_CHAR    ((char16_t)64)
+#define PERIOD_CHAR     ((char16_t)46)

 /*
 ******************************************************************
@@ -35,26 +36,26 @@ static icu::Hashtable * LocaleUtility_cache = NULL;
 * Release all static memory held by Locale Utility.  
 */
 U_CDECL_BEGIN
-static UBool U_CALLCONV service_cleanup(void) {
+static UBool U_CALLCONV service_cleanup() {
    if (LocaleUtility_cache) {
        delete LocaleUtility_cache;
-        LocaleUtility_cache = NULL;
+        LocaleUtility_cache = nullptr;
    }
-    return TRUE;
+    return true;
 }


 static void U_CALLCONV locale_utility_init(UErrorCode &status) {
    using namespace icu;
-    U_ASSERT(LocaleUtility_cache == NULL);
+    U_ASSERT(LocaleUtility_cache == nullptr);
    ucln_common_registerCleanup(UCLN_COMMON_SERVICE, service_cleanup);
    LocaleUtility_cache = new Hashtable(status);
    if (U_FAILURE(status)) {
        delete LocaleUtility_cache;
-        LocaleUtility_cache = NULL;
+        LocaleUtility_cache = nullptr;
        return;
    }
-    if (LocaleUtility_cache == NULL) {
+    if (LocaleUtility_cache == nullptr) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
@@ -68,7 +69,7 @@ U_NAMESPACE_BEGIN
 UnicodeString&
 LocaleUtility::canonicalLocaleString(const UnicodeString* id, UnicodeString& result)
 {
-  if (id == NULL) {
+  if (id == nullptr) {
    result.setToBogus();
  } else {
    // Fix case only (no other changes) up to the first '@' or '.' or
@@ -93,14 +94,14 @@ LocaleUtility::canonicalLocaleString(const UnicodeString* id, UnicodeString& res
      n = end;
    }
    for (; i < n; ++i) {
-      UChar c = result.charAt(i);
+      char16_t c = result.charAt(i);
      if (c >= 0x0041 && c <= 0x005a) {
        c += 0x20;
        result.setCharAt(i, c);
      }
    }
    for (n = end; i < n; ++i) {
-      UChar c = result.charAt(i);
+      char16_t c = result.charAt(i);
      if (c >= 0x0061 && c <= 0x007a) {
        c -= 0x20;
        result.setCharAt(i, c);
@@ -111,7 +112,7 @@ LocaleUtility::canonicalLocaleString(const UnicodeString* id, UnicodeString& res

 #if 0
    // This code does a proper full level 2 canonicalization of id.
-    // It's nasty to go from UChar to char to char to UChar -- but
+    // It's nasty to go from char16_t to char to char to char16_t -- but
    // that's what you have to do to use the uloc_canonicalize
    // function on UnicodeStrings.

@@ -144,9 +145,7 @@ LocaleUtility::canonicalLocaleString(const UnicodeString* id, UnicodeString& res
 Locale&
 LocaleUtility::initLocaleFromName(const UnicodeString& id, Locale& result)
 {
-    enum { BUFLEN = 128 }; // larger than ever needed
-
-    if (id.isBogus() || id.length() >= BUFLEN) {
+    if (id.isBogus()) {
        result.setToBogus();
    } else {
        /*
@@ -167,24 +166,29 @@ LocaleUtility::initLocaleFromName(const UnicodeString& id, Locale& result)
         *
         * There should be only at most one '@' in a locale ID.
         */
-        char buffer[BUFLEN];
+        CharString buffer;
        int32_t prev, i;
        prev = 0;
-        for(;;) {
-            i = id.indexOf((UChar)0x40, prev);
+        UErrorCode status = U_ZERO_ERROR;
+        do {
+            i = id.indexOf(static_cast<char16_t>(0x40), prev);
            if(i < 0) {
                // no @ between prev and the rest of the string
-                id.extract(prev, INT32_MAX, buffer + prev, BUFLEN - prev, US_INV);
+                buffer.appendInvariantChars(id.tempSubString(prev), status);
                break; // done
            } else {
                // normal invariant-character conversion for text between @s
-                id.extract(prev, i - prev, buffer + prev, BUFLEN - prev, US_INV);
+                buffer.appendInvariantChars(id.tempSubString(prev, i - prev), status);
                // manually "convert" U+0040 at id[i] into '@' at buffer[i]
-                buffer[i] = '@';
+                buffer.append('@', status);
                prev = i + 1;
            }
+        } while (U_SUCCESS(status));
+        if (U_FAILURE(status)) {
+            result.setToBogus();
+        } else {
+            result = Locale::createFromName(buffer.data());
        }
-        result = Locale::createFromName(buffer);
    }
    return result;
 }
@@ -213,53 +217,52 @@ LocaleUtility::getAvailableLocaleNames(const UnicodeString& bundleID)
    UErrorCode status = U_ZERO_ERROR;
    umtx_initOnce(LocaleUtilityInitOnce, locale_utility_init, status);
    Hashtable *cache = LocaleUtility_cache;
-    if (cache == NULL) {
+    if (cache == nullptr) {
        // Catastrophic failure.
-        return NULL;
+        return nullptr;
    }

    Hashtable* htp;
-    umtx_lock(NULL);
-    htp = (Hashtable*) cache->get(bundleID);
-    umtx_unlock(NULL);
+    umtx_lock(nullptr);
+    htp = static_cast<Hashtable*>(cache->get(bundleID));
+    umtx_unlock(nullptr);

-    if (htp == NULL) {
+    if (htp == nullptr) {
        htp = new Hashtable(status);
        if (htp && U_SUCCESS(status)) {
            CharString cbundleID;
            cbundleID.appendInvariantChars(bundleID, status);
-            const char* path = cbundleID.isEmpty() ? NULL : cbundleID.data();
-            UEnumeration *uenum = ures_openAvailableLocales(path, &status);
+            const char* path = cbundleID.isEmpty() ? nullptr : cbundleID.data();
+            icu::LocalUEnumerationPointer uenum(ures_openAvailableLocales(path, &status));
            for (;;) {
-                const UChar* id = uenum_unext(uenum, NULL, &status);
-                if (id == NULL) {
+                const char16_t* id = uenum_unext(uenum.getAlias(), nullptr, &status);
+                if (id == nullptr) {
                    break;
                }
                htp->put(UnicodeString(id), (void*)htp, status);
            }
-            uenum_close(uenum);
            if (U_FAILURE(status)) {
                delete htp;
-                return NULL;
+                return nullptr;
            }
-            umtx_lock(NULL);
+            umtx_lock(nullptr);
            Hashtable *t = static_cast<Hashtable *>(cache->get(bundleID));
-            if (t != NULL) {
+            if (t != nullptr) {
                // Another thread raced through this code, creating the cache entry first.
                // Discard ours and return theirs.
-                umtx_unlock(NULL);
+                umtx_unlock(nullptr);
                delete htp;
                htp = t;
            } else {
                cache->put(bundleID, (void*)htp, status);
-                umtx_unlock(NULL);
+                umtx_unlock(nullptr);
            }
        }
    }
    return htp;
 }

-UBool
+bool
 LocaleUtility::isFallbackOf(const UnicodeString& root, const UnicodeString& child)
 {
    return child.indexOf(root) == 0 &&
@@ -271,5 +274,3 @@ U_NAMESPACE_END

 /* !UCONFIG_NO_SERVICE */
 #endif
-
-
@@ -28,7 +28,7 @@ public:
  static Locale& initLocaleFromName(const UnicodeString& id, Locale& result);
  static UnicodeString& initNameFromLocale(const Locale& locale, UnicodeString& result);
  static const Hashtable* getAvailableLocaleNames(const UnicodeString& bundleID);
-  static UBool isFallbackOf(const UnicodeString& root, const UnicodeString& child);
+  static bool isFallbackOf(const UnicodeString& root, const UnicodeString& child);
 };

 U_NAMESPACE_END
@@ -0,0 +1,134 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// lsr.cpp
+// created: 2019may08 Markus W. Scherer
+
+#include "unicode/utypes.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "lsr.h"
+#include "uinvchar.h"
+#include "ustr_imp.h"
+
+U_NAMESPACE_BEGIN
+
+LSR::LSR(char prefix, const char *lang, const char *scr, const char *r, int32_t f,
+         UErrorCode &errorCode) :
+        language(nullptr), script(nullptr), region(r),
+        regionIndex(indexForRegion(region)), flags(f) {
+    if (U_SUCCESS(errorCode)) {
+        CharString langScript;
+        langScript.append(prefix, errorCode).append(lang, errorCode).append('\0', errorCode);
+        int32_t scriptOffset = langScript.length();
+        langScript.append(prefix, errorCode).append(scr, errorCode);
+        owned = langScript.cloneData(errorCode);
+        if (U_SUCCESS(errorCode)) {
+            language = owned;
+            script = owned + scriptOffset;
+        }
+    }
+}
+
+LSR::LSR(StringPiece lang, StringPiece scr, StringPiece r, int32_t f,
+         UErrorCode &errorCode) :
+        language(nullptr), script(nullptr), region(nullptr),
+        regionIndex(indexForRegion(r.data())), flags(f) {
+    if (U_SUCCESS(errorCode)) {
+        CharString data;
+        data.append(lang, errorCode).append('\0', errorCode);
+        int32_t scriptOffset = data.length();
+        data.append(scr, errorCode).append('\0', errorCode);
+        int32_t regionOffset = data.length();
+        data.append(r, errorCode);
+        owned = data.cloneData(errorCode);
+        if (U_SUCCESS(errorCode)) {
+            language = owned;
+            script = owned + scriptOffset;
+            region = owned + regionOffset;
+        }
+    }
+}
+
+LSR::LSR(LSR &&other) noexcept :
+        language(other.language), script(other.script), region(other.region), owned(other.owned),
+        regionIndex(other.regionIndex), flags(other.flags),
+        hashCode(other.hashCode) {
+    if (owned != nullptr) {
+        other.language = other.script = "";
+        other.owned = nullptr;
+        other.hashCode = 0;
+    }
+}
+
+void LSR::deleteOwned() {
+    uprv_free(owned);
+}
+
+LSR &LSR::operator=(LSR &&other) noexcept {
+    this->~LSR();
+    language = other.language;
+    script = other.script;
+    region = other.region;
+    regionIndex = other.regionIndex;
+    flags = other.flags;
+    owned = other.owned;
+    hashCode = other.hashCode;
+    if (owned != nullptr) {
+        other.language = other.script = "";
+        other.owned = nullptr;
+        other.hashCode = 0;
+    }
+    return *this;
+}
+
+UBool LSR::isEquivalentTo(const LSR &other) const {
+    return
+        uprv_strcmp(language, other.language) == 0 &&
+        uprv_strcmp(script, other.script) == 0 &&
+        regionIndex == other.regionIndex &&
+        // Compare regions if both are ill-formed (and their indexes are 0).
+        (regionIndex > 0 || uprv_strcmp(region, other.region) == 0);
+}
+
+bool LSR::operator==(const LSR &other) const {
+    return
+        uprv_strcmp(language, other.language) == 0 &&
+        uprv_strcmp(script, other.script) == 0 &&
+        regionIndex == other.regionIndex &&
+        // Compare regions if both are ill-formed (and their indexes are 0).
+        (regionIndex > 0 || uprv_strcmp(region, other.region) == 0) &&
+        flags == other.flags;
+}
+
+int32_t LSR::indexForRegion(const char *region) {
+    int32_t c = region[0];
+    int32_t a = c - '0';
+    if (0 <= a && a <= 9) {  // digits: "419"
+        int32_t b = region[1] - '0';
+        if (b < 0 || 9 < b) { return 0; }
+        c = region[2] - '0';
+        if (c < 0 || 9 < c || region[3] != 0) { return 0; }
+        return (10 * a + b) * 10 + c + 1;
+    } else {  // letters: "DE"
+        a = uprv_upperOrdinal(c);
+        if (a < 0 || 25 < a) { return 0; }
+        int32_t b = uprv_upperOrdinal(region[1]);
+        if (b < 0 || 25 < b || region[2] != 0) { return 0; }
+        return 26 * a + b + 1001;
+    }
+    return 0;
+}
+
+LSR &LSR::setHashCode() {
+    if (hashCode == 0) {
+        uint32_t h = ustr_hashCharsN(language, static_cast<int32_t>(uprv_strlen(language)));
+        h = h * 37 + ustr_hashCharsN(script, static_cast<int32_t>(uprv_strlen(script)));
+        h = h * 37 + regionIndex;
+        hashCode = h * 37 + flags;
+    }
+    return *this;
+}
+
+U_NAMESPACE_END
@@ -0,0 +1,85 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// lsr.h
+// created: 2019may08 Markus W. Scherer
+
+#ifndef __LSR_H__
+#define __LSR_H__
+
+#include "unicode/stringpiece.h"
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "cstring.h"
+
+U_NAMESPACE_BEGIN
+
+struct LSR final : public UMemory {
+    static constexpr int32_t REGION_INDEX_LIMIT = 1001 + 26 * 26;
+
+    static constexpr int32_t EXPLICIT_LSR = 7;
+    static constexpr int32_t EXPLICIT_LANGUAGE = 4;
+    static constexpr int32_t EXPLICIT_SCRIPT = 2;
+    static constexpr int32_t EXPLICIT_REGION = 1;
+    static constexpr int32_t IMPLICIT_LSR = 0;
+    static constexpr int32_t DONT_CARE_FLAGS = 0;
+
+    const char *language;
+    const char *script;
+    const char *region;
+    char *owned = nullptr;
+    /** Index for region, 0 if ill-formed. @see indexForRegion */
+    int32_t regionIndex = 0;
+    int32_t flags = 0;
+    /** Only set for LSRs that will be used in a hash table. */
+    int32_t hashCode = 0;
+
+    LSR() : language("und"), script(""), region("") {}
+
+    /** Constructor which aliases all subtag pointers. */
+    LSR(const char *lang, const char *scr, const char *r, int32_t f) :
+            language(lang),  script(scr), region(r),
+            regionIndex(indexForRegion(region)), flags(f) {}
+    /**
+     * Constructor which prepends the prefix to the language and script,
+     * copies those into owned memory, and aliases the region.
+     */
+    LSR(char prefix, const char *lang, const char *scr, const char *r, int32_t f,
+        UErrorCode &errorCode);
+    LSR(StringPiece lang, StringPiece scr, StringPiece r, int32_t f,
+        UErrorCode &errorCode);
+    LSR(LSR &&other) noexcept;
+    LSR(const LSR &other) = delete;
+    inline ~LSR() {
+        // Pure inline code for almost all instances.
+        if (owned != nullptr) {
+            deleteOwned();
+        }
+    }
+
+    LSR &operator=(LSR &&other) noexcept;
+    LSR &operator=(const LSR &other) = delete;
+
+    /**
+     * Returns a positive index (>0) for a well-formed region code.
+     * Do not rely on a particular region->index mapping; it may change.
+     * Returns 0 for ill-formed strings.
+     */
+    static int32_t indexForRegion(const char *region);
+
+    UBool isEquivalentTo(const LSR &other) const;
+    bool operator==(const LSR &other) const;
+
+    inline bool operator!=(const LSR &other) const {
+        return !operator==(other);
+    }
+
+    LSR &setHashCode();
+
+private:
+    void deleteOwned();
+};
+
+U_NAMESPACE_END
+
+#endif  // __LSR_H__
@@ -0,0 +1,864 @@
+// © 2021 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include <complex>
+#include <utility>
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "brkeng.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "lstmbe.h"
+#include "putilimp.h"
+#include "uassert.h"
+#include "ubrkimpl.h"
+#include "uresimp.h"
+#include "uvectr32.h"
+#include "uvector.h"
+
+#include "unicode/brkiter.h"
+#include "unicode/resbund.h"
+#include "unicode/ubrk.h"
+#include "unicode/uniset.h"
+#include "unicode/ustring.h"
+#include "unicode/utf.h"
+
+U_NAMESPACE_BEGIN
+
+// Uncomment the following #define to debug.
+// #define LSTM_DEBUG 1
+// #define LSTM_VECTORIZER_DEBUG 1
+
+/**
+ * Interface for reading 1D array.
+ */
+class ReadArray1D {
+public:
+    virtual ~ReadArray1D();
+    virtual int32_t d1() const = 0;
+    virtual float get(int32_t i) const = 0;
+
+#ifdef LSTM_DEBUG
+    void print() const {
+        printf("\n[");
+        for (int32_t i = 0; i < d1(); i++) {
+           printf("%0.8e ", get(i));
+           if (i % 4 == 3) printf("\n");
+        }
+        printf("]\n");
+    }
+#endif
+};
+
+ReadArray1D::~ReadArray1D()
+{
+}
+
+/**
+ * Interface for reading 2D array.
+ */
+class ReadArray2D {
+public:
+    virtual ~ReadArray2D();
+    virtual int32_t d1() const = 0;
+    virtual int32_t d2() const = 0;
+    virtual float get(int32_t i, int32_t j) const = 0;
+};
+
+ReadArray2D::~ReadArray2D()
+{
+}
+
+/**
+ * A class to index a float array as a 1D Array without owning the pointer or
+ * copy the data.
+ */
+class ConstArray1D : public ReadArray1D {
+public:
+    ConstArray1D() : data_(nullptr), d1_(0) {}
+
+    ConstArray1D(const float* data, int32_t d1) : data_(data), d1_(d1) {}
+
+    virtual ~ConstArray1D();
+
+    // Init the object, the object does not own the data nor copy.
+    // It is designed to directly use data from memory mapped resources.
+    void init(const int32_t* data, int32_t d1) {
+        U_ASSERT(IEEE_754 == 1);
+        data_ = reinterpret_cast<const float*>(data);
+        d1_ = d1;
+    }
+
+    // ReadArray1D methods.
+    virtual int32_t d1() const override { return d1_; }
+    virtual float get(int32_t i) const override {
+        U_ASSERT(i < d1_);
+        return data_[i];
+    }
+
+private:
+    const float* data_;
+    int32_t d1_;
+};
+
+ConstArray1D::~ConstArray1D()
+{
+}
+
+/**
+ * A class to index a float array as a 2D Array without owning the pointer or
+ * copy the data.
+ */
+class ConstArray2D : public ReadArray2D {
+public:
+    ConstArray2D() : data_(nullptr), d1_(0), d2_(0) {}
+
+    ConstArray2D(const float* data, int32_t d1, int32_t d2)
+        : data_(data), d1_(d1), d2_(d2) {}
+
+    virtual ~ConstArray2D();
+
+    // Init the object, the object does not own the data nor copy.
+    // It is designed to directly use data from memory mapped resources.
+    void init(const int32_t* data, int32_t d1, int32_t d2) {
+        U_ASSERT(IEEE_754 == 1);
+        data_ = reinterpret_cast<const float*>(data);
+        d1_ = d1;
+        d2_ = d2;
+    }
+
+    // ReadArray2D methods.
+    inline int32_t d1() const override { return d1_; }
+    inline int32_t d2() const override { return d2_; }
+    float get(int32_t i, int32_t j) const override {
+        U_ASSERT(i < d1_);
+        U_ASSERT(j < d2_);
+        return data_[i * d2_ + j];
+    }
+
+    // Expose the ith row as a ConstArray1D
+    inline ConstArray1D row(int32_t i) const {
+        U_ASSERT(i < d1_);
+        return ConstArray1D(data_ + i * d2_, d2_);
+    }
+
+private:
+    const float* data_;
+    int32_t d1_;
+    int32_t d2_;
+};
+
+ConstArray2D::~ConstArray2D()
+{
+}
+
+/**
+ * A class to allocate data as a writable 1D array.
+ * This is the main class implement matrix operation.
+ */
+class Array1D : public ReadArray1D {
+public:
+    Array1D() : memory_(nullptr), data_(nullptr), d1_(0) {}
+    Array1D(int32_t d1, UErrorCode &status)
+        : memory_(uprv_malloc(d1 * sizeof(float))),
+          data_(static_cast<float*>(memory_)), d1_(d1) {
+        if (U_SUCCESS(status)) {
+            if (memory_ == nullptr) {
+                status = U_MEMORY_ALLOCATION_ERROR;
+                return;
+            }
+            clear();
+        }
+    }
+
+    virtual ~Array1D();
+
+    // A special constructor which does not own the memory but writeable
+    // as a slice of an array.
+    Array1D(float* data, int32_t d1)
+        : memory_(nullptr), data_(data), d1_(d1) {}
+
+    // ReadArray1D methods.
+    virtual int32_t d1() const override { return d1_; }
+    virtual float get(int32_t i) const override {
+        U_ASSERT(i < d1_);
+        return data_[i];
+    }
+
+    // Return the index which point to the max data in the array.
+    inline int32_t maxIndex() const {
+        int32_t index = 0;
+        float max = data_[0];
+        for (int32_t i = 1; i < d1_; i++) {
+            if (data_[i] > max) {
+                max = data_[i];
+                index = i;
+            }
+        }
+        return index;
+    }
+
+    // Slice part of the array to a new one.
+    inline Array1D slice(int32_t from, int32_t size) const {
+        U_ASSERT(from >= 0);
+        U_ASSERT(from < d1_);
+        U_ASSERT(from + size <= d1_);
+        return Array1D(data_ + from, size);
+    }
+
+    // Add dot product of a 1D array and a 2D array into this one.
+    inline Array1D& addDotProduct(const ReadArray1D& a, const ReadArray2D& b) {
+        U_ASSERT(a.d1() == b.d1());
+        U_ASSERT(b.d2() == d1());
+        for (int32_t i = 0; i < d1(); i++) {
+            for (int32_t j = 0; j < a.d1(); j++) {
+                data_[i] += a.get(j) * b.get(j, i);
+            }
+        }
+        return *this;
+    }
+
+    // Hadamard Product the values of another array of the same size into this one.
+    inline Array1D& hadamardProduct(const ReadArray1D& a) {
+        U_ASSERT(a.d1() == d1());
+        for (int32_t i = 0; i < d1(); i++) {
+            data_[i] *= a.get(i);
+        }
+        return *this;
+    }
+
+    // Add the Hadamard Product of two arrays of the same size into this one.
+    inline Array1D& addHadamardProduct(const ReadArray1D& a, const ReadArray1D& b) {
+        U_ASSERT(a.d1() == d1());
+        U_ASSERT(b.d1() == d1());
+        for (int32_t i = 0; i < d1(); i++) {
+            data_[i] += a.get(i) * b.get(i);
+        }
+        return *this;
+    }
+
+    // Add the values of another array of the same size into this one.
+    inline Array1D& add(const ReadArray1D& a) {
+        U_ASSERT(a.d1() == d1());
+        for (int32_t i = 0; i < d1(); i++) {
+            data_[i] += a.get(i);
+        }
+        return *this;
+    }
+
+    // Assign the values of another array of the same size into this one.
+    inline Array1D& assign(const ReadArray1D& a) {
+        U_ASSERT(a.d1() == d1());
+        for (int32_t i = 0; i < d1(); i++) {
+            data_[i] = a.get(i);
+        }
+        return *this;
+    }
+
+    // Apply tanh to all the elements in the array.
+    inline Array1D& tanh() {
+        return tanh(*this);
+    }
+
+    // Apply tanh of a and store into this array.
+    inline Array1D& tanh(const Array1D& a) {
+        U_ASSERT(a.d1() == d1());
+        for (int32_t i = 0; i < d1_; i++) {
+            data_[i] = std::tanh(a.get(i));
+        }
+        return *this;
+    }
+
+    // Apply sigmoid to all the elements in the array.
+    inline Array1D& sigmoid() {
+        for (int32_t i = 0; i < d1_; i++) {
+            data_[i] = 1.0f/(1.0f + expf(-data_[i]));
+        }
+        return *this;
+    }
+
+    inline Array1D& clear() {
+        uprv_memset(data_, 0, d1_ * sizeof(float));
+        return *this;
+    }
+
+private:
+    void* memory_;
+    float* data_;
+    int32_t d1_;
+};
+
+Array1D::~Array1D()
+{
+    uprv_free(memory_);
+}
+
+class Array2D : public ReadArray2D {
+public:
+    Array2D() : memory_(nullptr), data_(nullptr), d1_(0), d2_(0) {}
+    Array2D(int32_t d1, int32_t d2, UErrorCode &status)
+        : memory_(uprv_malloc(d1 * d2 * sizeof(float))),
+          data_(static_cast<float*>(memory_)), d1_(d1), d2_(d2) {
+        if (U_SUCCESS(status)) {
+            if (memory_ == nullptr) {
+                status = U_MEMORY_ALLOCATION_ERROR;
+                return;
+            }
+            clear();
+        }
+    }
+    virtual ~Array2D();
+
+    // ReadArray2D methods.
+    virtual int32_t d1() const override { return d1_; }
+    virtual int32_t d2() const override { return d2_; }
+    virtual float get(int32_t i, int32_t j) const override {
+        U_ASSERT(i < d1_);
+        U_ASSERT(j < d2_);
+        return data_[i * d2_ + j];
+    }
+
+    inline Array1D row(int32_t i) const {
+        U_ASSERT(i < d1_);
+        return Array1D(data_ + i * d2_, d2_);
+    }
+
+    inline Array2D& clear() {
+        uprv_memset(data_, 0, d1_ * d2_ * sizeof(float));
+        return *this;
+    }
+
+private:
+    void* memory_;
+    float* data_;
+    int32_t d1_;
+    int32_t d2_;
+};
+
+Array2D::~Array2D()
+{
+    uprv_free(memory_);
+}
+
+typedef enum {
+    BEGIN,
+    INSIDE,
+    END,
+    SINGLE
+} LSTMClass;
+
+typedef enum {
+    UNKNOWN,
+    CODE_POINTS,
+    GRAPHEME_CLUSTER,
+} EmbeddingType;
+
+struct LSTMData : public UMemory {
+    LSTMData(UResourceBundle* rb, UErrorCode &status);
+    ~LSTMData();
+    UHashtable* fDict;
+    EmbeddingType fType;
+    const char16_t* fName;
+    ConstArray2D fEmbedding;
+    ConstArray2D fForwardW;
+    ConstArray2D fForwardU;
+    ConstArray1D fForwardB;
+    ConstArray2D fBackwardW;
+    ConstArray2D fBackwardU;
+    ConstArray1D fBackwardB;
+    ConstArray2D fOutputW;
+    ConstArray1D fOutputB;
+
+private:
+    UResourceBundle* fBundle;
+};
+
+LSTMData::LSTMData(UResourceBundle* rb, UErrorCode &status)
+    : fDict(nullptr), fType(UNKNOWN), fName(nullptr),
+      fBundle(rb)
+{
+    if (U_FAILURE(status)) {
+        return;
+    }
+    if (IEEE_754 != 1) {
+        status = U_UNSUPPORTED_ERROR;
+        return;
+    }
+    LocalUResourceBundlePointer embeddings_res(
+        ures_getByKey(rb, "embeddings", nullptr, &status));
+    int32_t embedding_size = ures_getInt(embeddings_res.getAlias(), &status);
+    LocalUResourceBundlePointer hunits_res(
+        ures_getByKey(rb, "hunits", nullptr, &status));
+    if (U_FAILURE(status)) return;
+    int32_t hunits = ures_getInt(hunits_res.getAlias(), &status);
+    const char16_t* type = ures_getStringByKey(rb, "type", nullptr, &status);
+    if (U_FAILURE(status)) return;
+    if (u_strCompare(type, -1, u"codepoints", -1, false) == 0) {
+        fType = CODE_POINTS;
+    } else if (u_strCompare(type, -1, u"graphclust", -1, false) == 0) {
+        fType = GRAPHEME_CLUSTER;
+    }
+    fName = ures_getStringByKey(rb, "model", nullptr, &status);
+    LocalUResourceBundlePointer dataRes(ures_getByKey(rb, "data", nullptr, &status));
+    if (U_FAILURE(status)) return;
+    int32_t data_len = 0;
+    const int32_t* data = ures_getIntVector(dataRes.getAlias(), &data_len, &status);
+    fDict = uhash_open(uhash_hashUChars, uhash_compareUChars, nullptr, &status);
+
+    StackUResourceBundle stackTempBundle;
+    ResourceDataValue value;
+    ures_getValueWithFallback(rb, "dict", stackTempBundle.getAlias(), value, status);
+    ResourceArray stringArray = value.getArray(status);
+    int32_t num_index = stringArray.getSize();
+    if (U_FAILURE(status)) { return; }
+
+    // put dict into hash
+    int32_t stringLength;
+    for (int32_t idx = 0; idx < num_index; idx++) {
+        stringArray.getValue(idx, value);
+        const char16_t* str = value.getString(stringLength, status);
+        uhash_putiAllowZero(fDict, (void*)str, idx, &status);
+        if (U_FAILURE(status)) return;
+#ifdef LSTM_VECTORIZER_DEBUG
+        printf("Assign [");
+        while (*str != 0x0000) {
+            printf("U+%04x ", *str);
+            str++;
+        }
+        printf("] map to %d\n", idx-1);
+#endif
+    }
+    int32_t mat1_size = (num_index + 1) * embedding_size;
+    int32_t mat2_size = embedding_size * 4 * hunits;
+    int32_t mat3_size = hunits * 4 * hunits;
+    int32_t mat4_size = 4 * hunits;
+    int32_t mat5_size = mat2_size;
+    int32_t mat6_size = mat3_size;
+    int32_t mat7_size = mat4_size;
+    int32_t mat8_size = 2 * hunits * 4;
+#if U_DEBUG
+    int32_t mat9_size = 4;
+    U_ASSERT(data_len == mat1_size + mat2_size + mat3_size + mat4_size + mat5_size +
+        mat6_size + mat7_size + mat8_size + mat9_size);
+#endif
+
+    fEmbedding.init(data, (num_index + 1), embedding_size);
+    data += mat1_size;
+    fForwardW.init(data, embedding_size, 4 * hunits);
+    data += mat2_size;
+    fForwardU.init(data, hunits, 4 * hunits);
+    data += mat3_size;
+    fForwardB.init(data, 4 * hunits);
+    data += mat4_size;
+    fBackwardW.init(data, embedding_size, 4 * hunits);
+    data += mat5_size;
+    fBackwardU.init(data, hunits, 4 * hunits);
+    data += mat6_size;
+    fBackwardB.init(data, 4 * hunits);
+    data += mat7_size;
+    fOutputW.init(data, 2 * hunits, 4);
+    data += mat8_size;
+    fOutputB.init(data, 4);
+}
+
+LSTMData::~LSTMData() {
+    uhash_close(fDict);
+    ures_close(fBundle);
+}
+
+class Vectorizer : public UMemory {
+public:
+    Vectorizer(UHashtable* dict) : fDict(dict) {}
+    virtual ~Vectorizer();
+    virtual void vectorize(UText *text, int32_t startPos, int32_t endPos,
+                           UVector32 &offsets, UVector32 &indices,
+                           UErrorCode &status) const = 0;
+protected:
+    int32_t stringToIndex(const char16_t* str) const {
+        UBool found = false;
+        int32_t ret = uhash_getiAndFound(fDict, (const void*)str, &found);
+        if (!found) {
+            ret = fDict->count;
+        }
+#ifdef LSTM_VECTORIZER_DEBUG
+        printf("[");
+        while (*str != 0x0000) {
+            printf("U+%04x ", *str);
+            str++;
+        }
+        printf("] map to %d\n", ret);
+#endif
+        return ret;
+    }
+
+private:
+    UHashtable* fDict;
+};
+
+Vectorizer::~Vectorizer()
+{
+}
+
+class CodePointsVectorizer : public Vectorizer {
+public:
+    CodePointsVectorizer(UHashtable* dict) : Vectorizer(dict) {}
+    virtual ~CodePointsVectorizer();
+    virtual void vectorize(UText *text, int32_t startPos, int32_t endPos,
+                           UVector32 &offsets, UVector32 &indices,
+                           UErrorCode &status) const override;
+};
+
+CodePointsVectorizer::~CodePointsVectorizer()
+{
+}
+
+void CodePointsVectorizer::vectorize(
+    UText *text, int32_t startPos, int32_t endPos,
+    UVector32 &offsets, UVector32 &indices, UErrorCode &status) const
+{
+    if (offsets.ensureCapacity(endPos - startPos, status) &&
+            indices.ensureCapacity(endPos - startPos, status)) {
+        if (U_FAILURE(status)) return;
+        utext_setNativeIndex(text, startPos);
+        int32_t current;
+        char16_t str[2] = {0, 0};
+        while (U_SUCCESS(status) &&
+               (current = static_cast<int32_t>(utext_getNativeIndex(text))) < endPos) {
+            // Since the LSTMBreakEngine is currently only accept chars in BMP,
+            // we can ignore the possibility of hitting supplementary code
+            // point.
+            str[0] = static_cast<char16_t>(utext_next32(text));
+            U_ASSERT(!U_IS_SURROGATE(str[0]));
+            offsets.addElement(current, status);
+            indices.addElement(stringToIndex(str), status);
+        }
+    }
+}
+
+class GraphemeClusterVectorizer : public Vectorizer {
+public:
+    GraphemeClusterVectorizer(UHashtable* dict)
+        : Vectorizer(dict)
+    {
+    }
+    virtual ~GraphemeClusterVectorizer();
+    virtual void vectorize(UText *text, int32_t startPos, int32_t endPos,
+                           UVector32 &offsets, UVector32 &indices,
+                           UErrorCode &status) const override;
+};
+
+GraphemeClusterVectorizer::~GraphemeClusterVectorizer()
+{
+}
+
+constexpr int32_t MAX_GRAPHEME_CLSTER_LENGTH = 10;
+
+void GraphemeClusterVectorizer::vectorize(
+    UText *text, int32_t startPos, int32_t endPos,
+    UVector32 &offsets, UVector32 &indices, UErrorCode &status) const
+{
+    if (U_FAILURE(status)) return;
+    if (!offsets.ensureCapacity(endPos - startPos, status) ||
+            !indices.ensureCapacity(endPos - startPos, status)) {
+        return;
+    }
+    if (U_FAILURE(status)) return;
+    LocalPointer<BreakIterator> graphemeIter(BreakIterator::createCharacterInstance(Locale(), status));
+    if (U_FAILURE(status)) return;
+    graphemeIter->setText(text, status);
+    if (U_FAILURE(status)) return;
+
+    if (startPos != 0) {
+        graphemeIter->preceding(startPos);
+    }
+    int32_t last = startPos;
+    int32_t current = startPos;
+    char16_t str[MAX_GRAPHEME_CLSTER_LENGTH];
+    while ((current = graphemeIter->next()) != BreakIterator::DONE) {
+        if (current >= endPos) {
+            break;
+        }
+        if (current > startPos) {
+            utext_extract(text, last, current, str, MAX_GRAPHEME_CLSTER_LENGTH, &status);
+            if (U_FAILURE(status)) return;
+            offsets.addElement(last, status);
+            indices.addElement(stringToIndex(str), status);
+            if (U_FAILURE(status)) return;
+        }
+        last = current;
+    }
+    if (U_FAILURE(status) || last >= endPos) {
+        return;
+    }
+    utext_extract(text, last, endPos, str, MAX_GRAPHEME_CLSTER_LENGTH, &status);
+    if (U_SUCCESS(status)) {
+        offsets.addElement(last, status);
+        indices.addElement(stringToIndex(str), status);
+    }
+}
+
+// Computing LSTM as stated in
+// https://en.wikipedia.org/wiki/Long_short-term_memory#LSTM_with_a_forget_gate
+// ifco is temp array allocate outside which does not need to be
+// input/output value but could avoid unnecessary memory alloc/free if passing
+// in.
+void compute(
+    int32_t hunits,
+    const ReadArray2D& W, const ReadArray2D& U, const ReadArray1D& b,
+    const ReadArray1D& x, Array1D& h, Array1D& c,
+    Array1D& ifco)
+{
+    // ifco = x * W + h * U + b
+    ifco.assign(b)
+        .addDotProduct(x, W)
+        .addDotProduct(h, U);
+
+    ifco.slice(0*hunits, hunits).sigmoid();  // i: sigmod
+    ifco.slice(1*hunits, hunits).sigmoid(); // f: sigmoid
+    ifco.slice(2*hunits, hunits).tanh(); // c_: tanh
+    ifco.slice(3*hunits, hunits).sigmoid(); // o: sigmod
+
+    c.hadamardProduct(ifco.slice(hunits, hunits))
+        .addHadamardProduct(ifco.slice(0, hunits), ifco.slice(2*hunits, hunits));
+
+    h.tanh(c)
+        .hadamardProduct(ifco.slice(3*hunits, hunits));
+}
+
+// Minimum word size
+static const int32_t MIN_WORD = 2;
+
+// Minimum number of characters for two words
+static const int32_t MIN_WORD_SPAN = MIN_WORD * 2;
+
+int32_t
+LSTMBreakEngine::divideUpDictionaryRange( UText *text,
+                                                int32_t startPos,
+                                                int32_t endPos,
+                                                UVector32 &foundBreaks,
+                                                UBool /* isPhraseBreaking */,
+                                                UErrorCode& status) const {
+    if (U_FAILURE(status)) return 0;
+    int32_t beginFoundBreakSize = foundBreaks.size();
+    utext_setNativeIndex(text, startPos);
+    utext_moveIndex32(text, MIN_WORD_SPAN);
+    if (utext_getNativeIndex(text) >= endPos) {
+        return 0;       // Not enough characters for two words
+    }
+    utext_setNativeIndex(text, startPos);
+
+    UVector32 offsets(status);
+    UVector32 indices(status);
+    if (U_FAILURE(status)) return 0;
+    fVectorizer->vectorize(text, startPos, endPos, offsets, indices, status);
+    if (U_FAILURE(status)) return 0;
+    int32_t* offsetsBuf = offsets.getBuffer();
+    int32_t* indicesBuf = indices.getBuffer();
+
+    int32_t input_seq_len = indices.size();
+    int32_t hunits = fData->fForwardU.d1();
+
+    // ----- Begin of all the Array memory allocation needed for this function
+    // Allocate temp array used inside compute()
+    Array1D ifco(4 * hunits, status);
+
+    Array1D c(hunits, status);
+    Array1D logp(4, status);
+
+    // TODO: limit size of hBackward. If input_seq_len is too big, we could
+    // run out of memory.
+    // Backward LSTM
+    Array2D hBackward(input_seq_len, hunits, status);
+
+    // Allocate fbRow and slice the internal array in two.
+    Array1D fbRow(2 * hunits, status);
+
+    // ----- End of all the Array memory allocation needed for this function
+    if (U_FAILURE(status)) return 0;
+
+    // To save the needed memory usage, the following is different from the
+    // Python or ICU4X implementation. We first perform the Backward LSTM
+    // and then merge the iteration of the forward LSTM and the output layer
+    // together because we only neetdto remember the h[t-1] for Forward LSTM.
+    for (int32_t i = input_seq_len - 1; i >= 0; i--) {
+        Array1D hRow = hBackward.row(i);
+        if (i != input_seq_len - 1) {
+            hRow.assign(hBackward.row(i+1));
+        }
+#ifdef LSTM_DEBUG
+        printf("hRow %d\n", i);
+        hRow.print();
+        printf("indicesBuf[%d] = %d\n", i, indicesBuf[i]);
+        printf("fData->fEmbedding.row(indicesBuf[%d]):\n", i);
+        fData->fEmbedding.row(indicesBuf[i]).print();
+#endif  // LSTM_DEBUG
+        compute(hunits,
+                fData->fBackwardW, fData->fBackwardU, fData->fBackwardB,
+                fData->fEmbedding.row(indicesBuf[i]),
+                hRow, c, ifco);
+    }
+
+
+    Array1D forwardRow = fbRow.slice(0, hunits);  // point to first half of data in fbRow.
+    Array1D backwardRow = fbRow.slice(hunits, hunits);  // point to second half of data n fbRow.
+
+    // The following iteration merge the forward LSTM and the output layer
+    // together.
+    c.clear();  // reuse c since it is the same size.
+    for (int32_t i = 0; i < input_seq_len; i++) {
+#ifdef LSTM_DEBUG
+        printf("forwardRow %d\n", i);
+        forwardRow.print();
+#endif  // LSTM_DEBUG
+        // Forward LSTM
+        // Calculate the result into forwardRow, which point to the data in the first half
+        // of fbRow.
+        compute(hunits,
+                fData->fForwardW, fData->fForwardU, fData->fForwardB,
+                fData->fEmbedding.row(indicesBuf[i]),
+                forwardRow, c, ifco);
+
+        // assign the data from hBackward.row(i) to second half of fbRowa.
+        backwardRow.assign(hBackward.row(i));
+
+        logp.assign(fData->fOutputB).addDotProduct(fbRow, fData->fOutputW);
+#ifdef LSTM_DEBUG
+        printf("backwardRow %d\n", i);
+        backwardRow.print();
+        printf("logp %d\n", i);
+        logp.print();
+#endif  // LSTM_DEBUG
+
+        // current = argmax(logp)
+        LSTMClass current = static_cast<LSTMClass>(logp.maxIndex());
+        // BIES logic.
+        if (current == BEGIN || current == SINGLE) {
+            if (i != 0) {
+                foundBreaks.addElement(offsetsBuf[i], status);
+                if (U_FAILURE(status)) return 0;
+            }
+        }
+    }
+    return foundBreaks.size() - beginFoundBreakSize;
+}
+
+Vectorizer* createVectorizer(const LSTMData* data, UErrorCode &status) {
+    if (U_FAILURE(status)) {
+        return nullptr;
+    }
+    switch (data->fType) {
+        case CODE_POINTS:
+            return new CodePointsVectorizer(data->fDict);
+            break;
+        case GRAPHEME_CLUSTER:
+            return new GraphemeClusterVectorizer(data->fDict);
+            break;
+        default:
+            break;
+    }
+    UPRV_UNREACHABLE_EXIT;
+}
+
+LSTMBreakEngine::LSTMBreakEngine(const LSTMData* data, const UnicodeSet& set, UErrorCode &status)
+    : DictionaryBreakEngine(), fData(data), fVectorizer(createVectorizer(fData, status))
+{
+    if (U_FAILURE(status)) {
+      fData = nullptr;  // If failure, we should not delete fData in destructor because the caller will do so.
+      return;
+    }
+    setCharacters(set);
+}
+
+LSTMBreakEngine::~LSTMBreakEngine() {
+    delete fData;
+    delete fVectorizer;
+}
+
+const char16_t* LSTMBreakEngine::name() const {
+    return fData->fName;
+}
+
+UnicodeString defaultLSTM(UScriptCode script, UErrorCode& status) {
+    // open root from brkitr tree.
+    UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);
+    b = ures_getByKeyWithFallback(b, "lstm", b, &status);
+    UnicodeString result = ures_getUnicodeStringByKey(b, uscript_getShortName(script), &status);
+    ures_close(b);
+    return result;
+}
+
+U_CAPI const LSTMData* U_EXPORT2 CreateLSTMDataForScript(UScriptCode script, UErrorCode& status)
+{
+    if (script != USCRIPT_KHMER && script != USCRIPT_LAO && script != USCRIPT_MYANMAR && script != USCRIPT_THAI) {
+        return nullptr;
+    }
+    UnicodeString name = defaultLSTM(script, status);
+    if (U_FAILURE(status)) return nullptr;
+    CharString namebuf;
+    namebuf.appendInvariantChars(name, status).truncate(namebuf.lastIndexOf('.'));
+
+    LocalUResourceBundlePointer rb(
+        ures_openDirect(U_ICUDATA_BRKITR, namebuf.data(), &status));
+    if (U_FAILURE(status)) return nullptr;
+
+    return CreateLSTMData(rb.orphan(), status);
+}
+
+U_CAPI const LSTMData* U_EXPORT2 CreateLSTMData(UResourceBundle* rb, UErrorCode& status)
+{
+    if (U_FAILURE(status)) {
+        return nullptr;
+    }
+    const LSTMData* result = new LSTMData(rb, status);
+    if (U_FAILURE(status)) {
+        delete result;
+        return nullptr;
+    }
+    return result;
+}
+
+U_CAPI const LanguageBreakEngine* U_EXPORT2
+CreateLSTMBreakEngine(UScriptCode script, const LSTMData* data, UErrorCode& status)
+{
+    UnicodeString unicodeSetString;
+    switch(script) {
+        case USCRIPT_THAI:
+            unicodeSetString = UnicodeString(u"[[:Thai:]&[:LineBreak=SA:]]");
+            break;
+        case USCRIPT_MYANMAR:
+            unicodeSetString = UnicodeString(u"[[:Mymr:]&[:LineBreak=SA:]]");
+            break;
+        default:
+            delete data;
+            return nullptr;
+    }
+    UnicodeSet unicodeSet;
+    unicodeSet.applyPattern(unicodeSetString, status);
+    const LanguageBreakEngine* engine = new LSTMBreakEngine(data, unicodeSet, status);
+    if (U_FAILURE(status) || engine == nullptr) {
+        if (engine != nullptr) {
+            delete engine;
+        } else {
+            status = U_MEMORY_ALLOCATION_ERROR;
+        }
+        return nullptr;
+    }
+    return engine;
+}
+
+U_CAPI void U_EXPORT2 DeleteLSTMData(const LSTMData* data)
+{
+    delete data;
+}
+
+U_CAPI const char16_t* U_EXPORT2 LSTMDataName(const LSTMData* data)
+{
+    return data->fName;
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
@@ -0,0 +1,88 @@
+// © 2021 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#ifndef LSTMBE_H
+#define LSTMBE_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/uniset.h"
+#include "unicode/ures.h"
+#include "unicode/utext.h"
+#include "unicode/utypes.h"
+
+#include "brkeng.h"
+#include "dictbe.h"
+#include "uvectr32.h"
+
+U_NAMESPACE_BEGIN
+
+class Vectorizer;
+struct LSTMData;
+
+/*******************************************************************
+ * LSTMBreakEngine
+ */
+
+/**
+ * <p>LSTMBreakEngine is a kind of DictionaryBreakEngine that uses a
+ * LSTM to determine language-specific breaks.</p>
+ *
+ * <p>After it is constructed a LSTMBreakEngine may be shared between
+ * threads without synchronization.</p>
+ */
+class LSTMBreakEngine : public DictionaryBreakEngine {
+public:
+    /**
+     * <p>Constructor.</p>
+     */
+    LSTMBreakEngine(const LSTMData* data, const UnicodeSet& set, UErrorCode &status);
+
+    /**
+     * <p>Virtual destructor.</p>
+     */
+    virtual ~LSTMBreakEngine();
+
+    virtual const char16_t* name() const;
+
+protected:
+    /**
+     * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
+     *
+     * @param text A UText representing the text
+     * @param rangeStart The start of the range of dictionary characters
+     * @param rangeEnd The end of the range of dictionary characters
+     * @param foundBreaks Output of C array of int32_t break positions, or 0
+     * @param status Information on any errors encountered.
+     * @return The number of breaks found
+     */
+     virtual int32_t divideUpDictionaryRange(UText *text,
+                                             int32_t rangeStart,
+                                             int32_t rangeEnd,
+                                             UVector32 &foundBreaks,
+                                             UBool isPhraseBreaking,
+                                             UErrorCode& status) const override;
+private:
+    const LSTMData* fData;
+    const Vectorizer* fVectorizer;
+};
+
+U_CAPI const LanguageBreakEngine* U_EXPORT2 CreateLSTMBreakEngine(
+    UScriptCode script, const LSTMData* data, UErrorCode& status);
+
+U_CAPI const LSTMData* U_EXPORT2 CreateLSTMData(
+    UResourceBundle* rb, UErrorCode& status);
+
+U_CAPI const LSTMData* U_EXPORT2 CreateLSTMDataForScript(
+    UScriptCode script, UErrorCode& status);
+
+U_CAPI void U_EXPORT2 DeleteLSTMData(const LSTMData* data);
+U_CAPI const char16_t* U_EXPORT2 LSTMDataName(const LSTMData* data);
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
+#endif  /* LSTMBE_H */
@@ -33,7 +33,7 @@ U_NAMESPACE_BEGIN
 class U_COMMON_API MessageImpl {
 public:
    /**
-     * @return TRUE if getApostropheMode()==UMSGPAT_APOS_DOUBLE_REQUIRED
+     * @return true if getApostropheMode()==UMSGPAT_APOS_DOUBLE_REQUIRED
     */
    static UBool jdkAposMode(const MessagePattern &msgPattern) {
        return msgPattern.getApostropheMode()==UMSGPAT_APOS_DOUBLE_REQUIRED;
@@ -55,7 +55,7 @@ public:
                                                            UnicodeString &result);

 private:
-    MessageImpl();  // no constructor: all static methods
+    MessageImpl() = delete;  // no constructor: all static methods
 };

 U_NAMESPACE_END
@@ -32,56 +32,56 @@ U_NAMESPACE_BEGIN

 // Unicode character/code point constants ---------------------------------- ***

-static const UChar u_pound=0x23;
-static const UChar u_apos=0x27;
-static const UChar u_plus=0x2B;
-static const UChar u_comma=0x2C;
-static const UChar u_minus=0x2D;
-static const UChar u_dot=0x2E;
-static const UChar u_colon=0x3A;
-static const UChar u_lessThan=0x3C;
-static const UChar u_equal=0x3D;
-static const UChar u_A=0x41;
-static const UChar u_C=0x43;
-static const UChar u_D=0x44;
-static const UChar u_E=0x45;
-static const UChar u_H=0x48;
-static const UChar u_I=0x49;
-static const UChar u_L=0x4C;
-static const UChar u_N=0x4E;
-static const UChar u_O=0x4F;
-static const UChar u_P=0x50;
-static const UChar u_R=0x52;
-static const UChar u_S=0x53;
-static const UChar u_T=0x54;
-static const UChar u_U=0x55;
-static const UChar u_Z=0x5A;
-static const UChar u_a=0x61;
-static const UChar u_c=0x63;
-static const UChar u_d=0x64;
-static const UChar u_e=0x65;
-static const UChar u_f=0x66;
-static const UChar u_h=0x68;
-static const UChar u_i=0x69;
-static const UChar u_l=0x6C;
-static const UChar u_n=0x6E;
-static const UChar u_o=0x6F;
-static const UChar u_p=0x70;
-static const UChar u_r=0x72;
-static const UChar u_s=0x73;
-static const UChar u_t=0x74;
-static const UChar u_u=0x75;
-static const UChar u_z=0x7A;
-static const UChar u_leftCurlyBrace=0x7B;
-static const UChar u_pipe=0x7C;
-static const UChar u_rightCurlyBrace=0x7D;
-static const UChar u_lessOrEqual=0x2264;  // U+2264 is <=
+static const char16_t u_pound=0x23;
+static const char16_t u_apos=0x27;
+static const char16_t u_plus=0x2B;
+static const char16_t u_comma=0x2C;
+static const char16_t u_minus=0x2D;
+static const char16_t u_dot=0x2E;
+static const char16_t u_colon=0x3A;
+static const char16_t u_lessThan=0x3C;
+static const char16_t u_equal=0x3D;
+static const char16_t u_A=0x41;
+static const char16_t u_C=0x43;
+static const char16_t u_D=0x44;
+static const char16_t u_E=0x45;
+static const char16_t u_H=0x48;
+static const char16_t u_I=0x49;
+static const char16_t u_L=0x4C;
+static const char16_t u_N=0x4E;
+static const char16_t u_O=0x4F;
+static const char16_t u_P=0x50;
+static const char16_t u_R=0x52;
+static const char16_t u_S=0x53;
+static const char16_t u_T=0x54;
+static const char16_t u_U=0x55;
+static const char16_t u_Z=0x5A;
+static const char16_t u_a=0x61;
+static const char16_t u_c=0x63;
+static const char16_t u_d=0x64;
+static const char16_t u_e=0x65;
+static const char16_t u_f=0x66;
+static const char16_t u_h=0x68;
+static const char16_t u_i=0x69;
+static const char16_t u_l=0x6C;
+static const char16_t u_n=0x6E;
+static const char16_t u_o=0x6F;
+static const char16_t u_p=0x70;
+static const char16_t u_r=0x72;
+static const char16_t u_s=0x73;
+static const char16_t u_t=0x74;
+static const char16_t u_u=0x75;
+static const char16_t u_z=0x7A;
+static const char16_t u_leftCurlyBrace=0x7B;
+static const char16_t u_pipe=0x7C;
+static const char16_t u_rightCurlyBrace=0x7D;
+static const char16_t u_lessOrEqual=0x2264;  // U+2264 is <=

-static const UChar kOffsetColon[]={  // "offset:"
+static const char16_t kOffsetColon[]={  // "offset:"
    u_o, u_f, u_f, u_s, u_e, u_t, u_colon
 };

-static const UChar kOther[]={  // "other"
+static const char16_t kOther[]={  // "other"
    u_o, u_t, u_h, u_e, u_r
 };

@@ -97,9 +97,9 @@ public:
    UBool ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode);
    UBool equals(const MessagePatternList<T, stackCapacity> &other, int32_t length) const {
        for(int32_t i=0; i<length; ++i) {
-            if(a[i]!=other.a[i]) { return FALSE; }
+            if(a[i]!=other.a[i]) { return false; }
        }
-        return TRUE;
+        return true;
    }

    MaybeStackArray<T, stackCapacity> a;
@@ -112,7 +112,7 @@ MessagePatternList<T, stackCapacity>::copyFrom(
        int32_t length,
        UErrorCode &errorCode) {
    if(U_SUCCESS(errorCode) && length>0) {
-        if(length>a.getCapacity() && NULL==a.resize(length)) {
+        if(length>a.getCapacity() && nullptr==a.resize(length)) {
            errorCode=U_MEMORY_ALLOCATION_ERROR;
            return;
        }
@@ -124,13 +124,13 @@ template<typename T, int32_t stackCapacity>
 UBool
 MessagePatternList<T, stackCapacity>::ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) {
-        return FALSE;
+        return false;
    }
-    if(a.getCapacity()>oldLength || a.resize(2*oldLength, oldLength)!=NULL) {
-        return TRUE;
+    if(a.getCapacity()>oldLength || a.resize(2*oldLength, oldLength)!=nullptr) {
+        return true;
    }
    errorCode=U_MEMORY_ALLOCATION_ERROR;
-    return FALSE;
+    return false;
 }

 // MessagePatternList specializations -------------------------------------- ***
@@ -145,25 +145,25 @@ class MessagePatternPartsList : public MessagePatternList<MessagePattern::Part,

 MessagePattern::MessagePattern(UErrorCode &errorCode)
        : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE),
-          partsList(NULL), parts(NULL), partsLength(0),
-          numericValuesList(NULL), numericValues(NULL), numericValuesLength(0),
-          hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) {
+          partsList(nullptr), parts(nullptr), partsLength(0),
+          numericValuesList(nullptr), numericValues(nullptr), numericValuesLength(0),
+          hasArgNames(false), hasArgNumbers(false), needsAutoQuoting(false) {
    init(errorCode);
 }

 MessagePattern::MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode)
        : aposMode(mode),
-          partsList(NULL), parts(NULL), partsLength(0),
-          numericValuesList(NULL), numericValues(NULL), numericValuesLength(0),
-          hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) {
+          partsList(nullptr), parts(nullptr), partsLength(0),
+          numericValuesList(nullptr), numericValues(nullptr), numericValuesLength(0),
+          hasArgNames(false), hasArgNumbers(false), needsAutoQuoting(false) {
    init(errorCode);
 }

 MessagePattern::MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
        : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE),
-          partsList(NULL), parts(NULL), partsLength(0),
-          numericValuesList(NULL), numericValues(NULL), numericValuesLength(0),
-          hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) {
+          partsList(nullptr), parts(nullptr), partsLength(0),
+          numericValuesList(nullptr), numericValues(nullptr), numericValuesLength(0),
+          hasArgNames(false), hasArgNumbers(false), needsAutoQuoting(false) {
    if(init(errorCode)) {
        parse(pattern, parseError, errorCode);
    }
@@ -172,21 +172,21 @@ MessagePattern::MessagePattern(const UnicodeString &pattern, UParseError *parseE
 UBool
 MessagePattern::init(UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) {
-        return FALSE;
+        return false;
    }
    partsList=new MessagePatternPartsList();
-    if(partsList==NULL) {
+    if(partsList==nullptr) {
        errorCode=U_MEMORY_ALLOCATION_ERROR;
-        return FALSE;
+        return false;
    }
    parts=partsList->a.getAlias();
-    return TRUE;
+    return true;
 }

 MessagePattern::MessagePattern(const MessagePattern &other)
        : UObject(other), aposMode(other.aposMode), msg(other.msg),
-          partsList(NULL), parts(NULL), partsLength(0),
-          numericValuesList(NULL), numericValues(NULL), numericValuesLength(0),
+          partsList(nullptr), parts(nullptr), partsLength(0),
+          numericValuesList(nullptr), numericValues(nullptr), numericValuesLength(0),
          hasArgNames(other.hasArgNames), hasArgNumbers(other.hasArgNumbers),
          needsAutoQuoting(other.needsAutoQuoting) {
    UErrorCode errorCode=U_ZERO_ERROR;
@@ -215,46 +215,46 @@ MessagePattern::operator=(const MessagePattern &other) {
 UBool
 MessagePattern::copyStorage(const MessagePattern &other, UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) {
-        return FALSE;
+        return false;
    }
-    parts=NULL;
+    parts=nullptr;
    partsLength=0;
-    numericValues=NULL;
+    numericValues=nullptr;
    numericValuesLength=0;
-    if(partsList==NULL) {
+    if(partsList==nullptr) {
        partsList=new MessagePatternPartsList();
-        if(partsList==NULL) {
+        if(partsList==nullptr) {
            errorCode=U_MEMORY_ALLOCATION_ERROR;
-            return FALSE;
+            return false;
        }
        parts=partsList->a.getAlias();
    }
    if(other.partsLength>0) {
        partsList->copyFrom(*other.partsList, other.partsLength, errorCode);
        if(U_FAILURE(errorCode)) {
-            return FALSE;
+            return false;
        }
        parts=partsList->a.getAlias();
        partsLength=other.partsLength;
    }
    if(other.numericValuesLength>0) {
-        if(numericValuesList==NULL) {
+        if(numericValuesList==nullptr) {
            numericValuesList=new MessagePatternDoubleList();
-            if(numericValuesList==NULL) {
+            if(numericValuesList==nullptr) {
                errorCode=U_MEMORY_ALLOCATION_ERROR;
-                return FALSE;
+                return false;
            }
            numericValues=numericValuesList->a.getAlias();
        }
        numericValuesList->copyFrom(
            *other.numericValuesList, other.numericValuesLength, errorCode);
        if(U_FAILURE(errorCode)) {
-            return FALSE;
+            return false;
        }
        numericValues=numericValuesList->a.getAlias();
        numericValuesLength=other.numericValuesLength;
    }
-    return TRUE;
+    return true;
 }

 MessagePattern::~MessagePattern() {
@@ -303,16 +303,16 @@ void
 MessagePattern::clear() {
    // Mostly the same as preParse().
    msg.remove();
-    hasArgNames=hasArgNumbers=FALSE;
-    needsAutoQuoting=FALSE;
+    hasArgNames=hasArgNumbers=false;
+    needsAutoQuoting=false;
    partsLength=0;
    numericValuesLength=0;
 }

-UBool
+bool
 MessagePattern::operator==(const MessagePattern &other) const {
    if(this==&other) {
-        return TRUE;
+        return true;
    }
    return
        aposMode==other.aposMode &&
@@ -351,7 +351,7 @@ MessagePattern::autoQuoteApostropheDeep() const {
    for(int32_t i=count; i>0;) {
        const Part &part=getPart(--i);
        if(part.getType()==UMSGPAT_PART_TYPE_INSERT_CHAR) {
-           modified.insert(part.index, (UChar)part.value);
+           modified.insert(part.index, static_cast<char16_t>(part.value));
        }
    }
    return modified;
@@ -387,10 +387,10 @@ MessagePattern::getPluralOffset(int32_t pluralStart) const {

 // MessagePattern::Part ---------------------------------------------------- ***

-UBool
+bool
 MessagePattern::Part::operator==(const Part &other) const {
    if(this==&other) {
-        return TRUE;
+        return true;
    }
    return
        type==other.type &&
@@ -407,25 +407,25 @@ MessagePattern::preParse(const UnicodeString &pattern, UParseError *parseError,
    if(U_FAILURE(errorCode)) {
        return;
    }
-    if(parseError!=NULL) {
+    if(parseError!=nullptr) {
        parseError->line=0;
        parseError->offset=0;
        parseError->preContext[0]=0;
        parseError->postContext[0]=0;
    }
    msg=pattern;
-    hasArgNames=hasArgNumbers=FALSE;
-    needsAutoQuoting=FALSE;
+    hasArgNames=hasArgNumbers=false;
+    needsAutoQuoting=false;
    partsLength=0;
    numericValuesLength=0;
 }

 void
 MessagePattern::postParse() {
-    if(partsList!=NULL) {
+    if(partsList!=nullptr) {
        parts=partsList->a.getAlias();
    }
-    if(numericValuesList!=NULL) {
+    if(numericValuesList!=nullptr) {
        numericValues=numericValuesList->a.getAlias();
    }
 }
@@ -437,7 +437,7 @@ MessagePattern::parseMessage(int32_t index, int32_t msgStartLength,
    if(U_FAILURE(errorCode)) {
        return 0;
    }
-    if(nestingLevel>Part::MAX_VALUE) {
+    if(nestingLevel>Part::MAX_NESTED_LEVELS) {
        errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
        return 0;
    }
@@ -451,14 +451,14 @@ MessagePattern::parseMessage(int32_t index, int32_t msgStartLength,
        if(index>=msg.length()) {
            break;
        }
-        UChar c=msg.charAt(index++);
+        char16_t c=msg.charAt(index++);
        if(c==u_apos) {
            if(index==msg.length()) {
                // The apostrophe is the last character in the pattern. 
                // Add a Part for auto-quoting.
                addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0,
                        u_apos, errorCode);  // value=char to be inserted
-                needsAutoQuoting=TRUE;
+                needsAutoQuoting=true;
            } else {
                c=msg.charAt(index);
                if(c==u_apos) {
@@ -491,7 +491,7 @@ MessagePattern::parseMessage(int32_t index, int32_t msgStartLength,
                            // Add a Part for auto-quoting.
                            addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0,
                                    u_apos, errorCode);  // value=char to be inserted
-                            needsAutoQuoting=TRUE;
+                            needsAutoQuoting=true;
                            break;
                        }
                    }
@@ -500,7 +500,7 @@ MessagePattern::parseMessage(int32_t index, int32_t msgStartLength,
                    // Add a Part for auto-quoting.
                    addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0,
                            u_apos, errorCode);  // value=char to be inserted
-                    needsAutoQuoting=TRUE;
+                    needsAutoQuoting=true;
                }
            }
        } else if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(parentType) && c==u_pound) {
@@ -560,7 +560,7 @@ MessagePattern::parseArg(int32_t index, int32_t argStartLength, int32_t nestingL
            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
            return 0;
        }
-        hasArgNumbers=TRUE;
+        hasArgNumbers=true;
        addPart(UMSGPAT_PART_TYPE_ARG_NUMBER, nameIndex, length, number, errorCode);
    } else if(number==UMSGPAT_ARG_NAME_NOT_NUMBER) {
        int32_t length=index-nameIndex;
@@ -569,7 +569,7 @@ MessagePattern::parseArg(int32_t index, int32_t argStartLength, int32_t nestingL
            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
            return 0;
        }
-        hasArgNames=TRUE;
+        hasArgNames=true;
        addPart(UMSGPAT_PART_TYPE_ARG_NAME, nameIndex, length, 0, errorCode);
    } else {  // number<-1 (ARG_NAME_NOT_VALID)
        setParseError(parseError, nameIndex);  // Bad argument syntax.
@@ -582,7 +582,7 @@ MessagePattern::parseArg(int32_t index, int32_t argStartLength, int32_t nestingL
        errorCode=U_UNMATCHED_BRACES;
        return 0;
    }
-    UChar c=msg.charAt(index);
+    char16_t c=msg.charAt(index);
    if(c==u_rightCurlyBrace) {
        // all done
    } else if(c!=u_comma) {
@@ -628,7 +628,7 @@ MessagePattern::parseArg(int32_t index, int32_t argStartLength, int32_t nestingL
            }
        }
        // change the ARG_START type from NONE to argType
-        partsList->a[argStart].value=(int16_t)argType;
+        partsList->a[argStart].value = static_cast<int16_t>(argType);
        if(argType==UMSGPAT_ARG_TYPE_SIMPLE) {
            addPart(UMSGPAT_PART_TYPE_ARG_TYPE, typeIndex, length, 0, errorCode);
        }
@@ -663,7 +663,7 @@ MessagePattern::parseSimpleStyle(int32_t index, UParseError *parseError, UErrorC
    int32_t start=index;
    int32_t nestedBraces=0;
    while(index<msg.length()) {
-        UChar c=msg.charAt(index++);
+        char16_t c=msg.charAt(index++);
        if(c==u_apos) {
            // Treat apostrophe as quoting but include it in the style part.
            // Find the end of the quoted literal text.
@@ -727,7 +727,7 @@ MessagePattern::parseChoiceStyle(int32_t index, int32_t nestingLevel,
            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
            return 0;
        }
-        parseDouble(numberIndex, index, TRUE, parseError, errorCode);  // adds ARG_INT or ARG_DOUBLE
+        parseDouble(numberIndex, index, true, parseError, errorCode);  // adds ARG_INT or ARG_DOUBLE
        if(U_FAILURE(errorCode)) {
            return 0;
        }
@@ -738,7 +738,7 @@ MessagePattern::parseChoiceStyle(int32_t index, int32_t nestingLevel,
            errorCode=U_PATTERN_SYNTAX_ERROR;
            return 0;
        }
-        UChar c=msg.charAt(index);
+        char16_t c=msg.charAt(index);
        if(!(c==u_pound || c==u_lessThan || c==u_lessOrEqual)) {  // U+2264 is <=
            setParseError(parseError, start);  // Expected choice separator (#<\u2264) instead of c.
            errorCode=U_PATTERN_SYNTAX_ERROR;
@@ -774,8 +774,8 @@ MessagePattern::parsePluralOrSelectStyle(UMessagePatternArgType argType,
        return 0;
    }
    int32_t start=index;
-    UBool isEmpty=TRUE;
-    UBool hasOther=FALSE;
+    UBool isEmpty=true;
+    UBool hasOther=false;
    for(;;) {
        // First, collect the selector looking for a small set of terminators.
        // It would be a little faster to consider the syntax of each possible
@@ -811,7 +811,7 @@ MessagePattern::parsePluralOrSelectStyle(UMessagePatternArgType argType,
                return 0;
            }
            addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode);
-            parseDouble(selectorIndex+1, index, FALSE,
+            parseDouble(selectorIndex+1, index, false,
                        parseError, errorCode);  // adds ARG_INT or ARG_DOUBLE
        } else {
            index=skipIdentifier(index);
@@ -845,12 +845,12 @@ MessagePattern::parsePluralOrSelectStyle(UMessagePatternArgType argType,
                    errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
                    return 0;
                }
-                parseDouble(valueIndex, index, FALSE,
+                parseDouble(valueIndex, index, false,
                            parseError, errorCode);  // adds ARG_INT or ARG_DOUBLE
                if(U_FAILURE(errorCode)) {
                    return 0;
                }
-                isEmpty=FALSE;
+                isEmpty=false;
                continue;  // no message fragment after the offset
            } else {
                // normal selector word
@@ -861,7 +861,7 @@ MessagePattern::parsePluralOrSelectStyle(UMessagePatternArgType argType,
                }
                addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode);
                if(0==msg.compare(selectorIndex, length, kOther, 0, 5)) {
-                    hasOther=TRUE;
+                    hasOther=true;
                }
            }
        }
@@ -880,7 +880,7 @@ MessagePattern::parsePluralOrSelectStyle(UMessagePatternArgType argType,
        if(U_FAILURE(errorCode)) {
            return 0;
        }
-        isEmpty=FALSE;
+        isEmpty=false;
    }
 }

@@ -895,17 +895,17 @@ MessagePattern::parseArgNumber(const UnicodeString &s, int32_t start, int32_t li
    int32_t number;
    // Defer numeric errors until we know there are only digits.
    UBool badNumber;
-    UChar c=s.charAt(start++);
+    char16_t c=s.charAt(start++);
    if(c==0x30) {
        if(start==limit) {
            return 0;
        } else {
            number=0;
-            badNumber=TRUE;  // leading zero
+            badNumber=true;  // leading zero
        }
    } else if(0x31<=c && c<=0x39) {
        number=c-0x30;
-        badNumber=FALSE;
+        badNumber=false;
    } else {
        return UMSGPAT_ARG_NAME_NOT_NUMBER;
    }
@@ -913,9 +913,10 @@ MessagePattern::parseArgNumber(const UnicodeString &s, int32_t start, int32_t li
        c=s.charAt(start++);
        if(0x30<=c && c<=0x39) {
            if(number>=INT32_MAX/10) {
-                badNumber=TRUE;  // overflow
+                badNumber=true;  // overflow
+            } else {
+                number=number*10+(c-0x30);
            }
-            number=number*10+(c-0x30);
        } else {
            return UMSGPAT_ARG_NAME_NOT_NUMBER;
        }
@@ -941,7 +942,7 @@ MessagePattern::parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
        int32_t value=0;
        int32_t isNegative=0;  // not boolean so that we can easily add it to value
        int32_t index=start;
-        UChar c=msg.charAt(index++);
+        char16_t c=msg.charAt(index++);
        if(c==u_minus) {
            isNegative=1;
            if(index==limit) {
@@ -980,13 +981,13 @@ MessagePattern::parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
        }
        // Let Double.parseDouble() throw a NumberFormatException.
        char numberChars[128];
-        int32_t capacity=(int32_t)sizeof(numberChars);
+        int32_t capacity = static_cast<int32_t>(sizeof(numberChars));
        int32_t length=limit-start;
        if(length>=capacity) {
            break;  // number too long
        }
        msg.extract(start, length, numberChars, capacity, US_INV);
-        if((int32_t)uprv_strlen(numberChars)<length) {
+        if (static_cast<int32_t>(uprv_strlen(numberChars)) < length) {
            break;  // contains non-invariant character that was turned into NUL
        }
        char *end;
@@ -999,30 +1000,29 @@ MessagePattern::parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
    }
    setParseError(parseError, start /*, limit*/);  // Bad syntax for numeric value.
    errorCode=U_PATTERN_SYNTAX_ERROR;
-    return;
 }

 int32_t
 MessagePattern::skipWhiteSpace(int32_t index) {
-    const UChar *s=msg.getBuffer();
+    const char16_t *s=msg.getBuffer();
    int32_t msgLength=msg.length();
-    const UChar *t=PatternProps::skipWhiteSpace(s+index, msgLength-index);
-    return (int32_t)(t-s);
+    const char16_t *t=PatternProps::skipWhiteSpace(s+index, msgLength-index);
+    return static_cast<int32_t>(t - s);
 }

 int32_t
 MessagePattern::skipIdentifier(int32_t index) {
-    const UChar *s=msg.getBuffer();
+    const char16_t *s=msg.getBuffer();
    int32_t msgLength=msg.length();
-    const UChar *t=PatternProps::skipIdentifier(s+index, msgLength-index);
-    return (int32_t)(t-s);
+    const char16_t *t=PatternProps::skipIdentifier(s+index, msgLength-index);
+    return static_cast<int32_t>(t - s);
 }

 int32_t
 MessagePattern::skipDouble(int32_t index) {
    int32_t msgLength=msg.length();
    while(index<msgLength) {
-        UChar c=msg.charAt(index);
+        char16_t c=msg.charAt(index);
        // U+221E: Allow the infinity symbol, for ChoiceFormat patterns.
        if((c<0x30 && c!=u_plus && c!=u_minus && c!=u_dot) || (c>0x39 && c!=u_e && c!=u_E && c!=0x221e)) {
            break;
@@ -1039,7 +1039,7 @@ MessagePattern::isArgTypeChar(UChar32 c) {

 UBool
 MessagePattern::isChoice(int32_t index) {
-    UChar c;
+    char16_t c;
    return
        ((c=msg.charAt(index++))==u_c || c==u_C) &&
        ((c=msg.charAt(index++))==u_h || c==u_H) &&
@@ -1051,7 +1051,7 @@ MessagePattern::isChoice(int32_t index) {

 UBool
 MessagePattern::isPlural(int32_t index) {
-    UChar c;
+    char16_t c;
    return
        ((c=msg.charAt(index++))==u_p || c==u_P) &&
        ((c=msg.charAt(index++))==u_l || c==u_L) &&
@@ -1063,7 +1063,7 @@ MessagePattern::isPlural(int32_t index) {

 UBool
 MessagePattern::isSelect(int32_t index) {
-    UChar c;
+    char16_t c;
    return
        ((c=msg.charAt(index++))==u_s || c==u_S) &&
        ((c=msg.charAt(index++))==u_e || c==u_E) &&
@@ -1075,7 +1075,7 @@ MessagePattern::isSelect(int32_t index) {

 UBool
 MessagePattern::isOrdinal(int32_t index) {
-    UChar c;
+    char16_t c;
    return
        ((c=msg.charAt(index++))==u_o || c==u_O) &&
        ((c=msg.charAt(index++))==u_r || c==u_R) &&
@@ -1106,8 +1106,8 @@ MessagePattern::addPart(UMessagePatternPartType type, int32_t index, int32_t len
        Part &part=partsList->a[partsLength++];
        part.type=type;
        part.index=index;
-        part.length=(uint16_t)length;
-        part.value=(int16_t)value;
+        part.length = static_cast<uint16_t>(length);
+        part.value = static_cast<int16_t>(value);
        part.limitPartIndex=0;
    }
 }
@@ -1127,9 +1127,9 @@ MessagePattern::addArgDoublePart(double numericValue, int32_t start, int32_t len
        return;
    }
    int32_t numericIndex=numericValuesLength;
-    if(numericValuesList==NULL) {
+    if(numericValuesList==nullptr) {
        numericValuesList=new MessagePatternDoubleList();
-        if(numericValuesList==NULL) {
+        if(numericValuesList==nullptr) {
            errorCode=U_MEMORY_ALLOCATION_ERROR;
            return;
        }
@@ -1147,7 +1147,7 @@ MessagePattern::addArgDoublePart(double numericValue, int32_t start, int32_t len

 void
 MessagePattern::setParseError(UParseError *parseError, int32_t index) {
-    if(parseError==NULL) {
+    if(parseError==nullptr) {
        return;
    }
    parseError->offset=index;
@@ -0,0 +1,269 @@
+// © 2022 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "cmemory.h"
+#include "mlbe.h"
+#include "uassert.h"
+#include "ubrkimpl.h"
+#include "unicode/resbund.h"
+#include "unicode/udata.h"
+#include "unicode/utf16.h"
+#include "uresimp.h"
+#include "util.h"
+#include "uvectr32.h"
+
+U_NAMESPACE_BEGIN
+
+enum class ModelIndex { kUWStart = 0, kBWStart = 6, kTWStart = 9 };
+
+MlBreakEngine::MlBreakEngine(const UnicodeSet &digitOrOpenPunctuationOrAlphabetSet,
+                             const UnicodeSet &closePunctuationSet, UErrorCode &status)
+    : fDigitOrOpenPunctuationOrAlphabetSet(digitOrOpenPunctuationOrAlphabetSet),
+      fClosePunctuationSet(closePunctuationSet),
+      fNegativeSum(0) {
+    if (U_FAILURE(status)) {
+        return;
+    }
+    loadMLModel(status);
+}
+
+MlBreakEngine::~MlBreakEngine() {}
+
+int32_t MlBreakEngine::divideUpRange(UText *inText, int32_t rangeStart, int32_t rangeEnd,
+                                     UVector32 &foundBreaks, const UnicodeString &inString,
+                                     const LocalPointer<UVector32> &inputMap,
+                                     UErrorCode &status) const {
+    if (U_FAILURE(status)) {
+        return 0;
+    }
+    if (rangeStart >= rangeEnd) {
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    UVector32 boundary(inString.countChar32() + 1, status);
+    if (U_FAILURE(status)) {
+        return 0;
+    }
+    int32_t numBreaks = 0;
+    int32_t codePointLength = inString.countChar32();
+    // The ML algorithm groups six char and evaluates whether the 4th char is a breakpoint.
+    // In each iteration, it evaluates the 4th char and then moves forward one char like a sliding
+    // window. Initially, the first six values in the indexList are [-1, -1, 0, 1, 2, 3]. After
+    // moving forward, finally the last six values in the indexList are
+    // [length-4, length-3, length-2, length-1, -1, -1]. The "+4" here means four extra "-1".
+    int32_t indexSize = codePointLength + 4;
+    LocalMemory<int32_t> indexList(static_cast<int32_t*>(uprv_malloc(indexSize * sizeof(int32_t))));
+    if (indexList.isNull()) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return 0;
+    }
+    int32_t numCodeUnits = initIndexList(inString, indexList.getAlias(), status);
+
+    // Add a break for the start.
+    boundary.addElement(0, status);
+    numBreaks++;
+    if (U_FAILURE(status)) return 0;
+
+    for (int32_t idx = 0; idx + 1 < codePointLength && U_SUCCESS(status); idx++) {
+        numBreaks =
+            evaluateBreakpoint(inString, indexList.getAlias(), idx, numCodeUnits, numBreaks, boundary, status);
+        if (idx + 4 < codePointLength) {
+            indexList[idx + 6] = numCodeUnits;
+            numCodeUnits += U16_LENGTH(inString.char32At(indexList[idx + 6]));
+        }
+    }
+
+    if (U_FAILURE(status)) return 0;
+
+    // Add a break for the end if there is not one there already.
+    if (boundary.lastElementi() != inString.countChar32()) {
+        boundary.addElement(inString.countChar32(), status);
+        numBreaks++;
+    }
+
+    int32_t prevCPPos = -1;
+    int32_t prevUTextPos = -1;
+    int32_t correctedNumBreaks = 0;
+    for (int32_t i = 0; i < numBreaks; i++) {
+        int32_t cpPos = boundary.elementAti(i);
+        int32_t utextPos = inputMap.isValid() ? inputMap->elementAti(cpPos) : cpPos + rangeStart;
+        U_ASSERT(cpPos > prevCPPos);
+        U_ASSERT(utextPos >= prevUTextPos);
+
+        if (utextPos > prevUTextPos) {
+            if (utextPos != rangeStart ||
+                (utextPos > 0 &&
+                 fClosePunctuationSet.contains(utext_char32At(inText, utextPos - 1)))) {
+                foundBreaks.push(utextPos, status);
+                correctedNumBreaks++;
+            }
+        } else {
+            // Normalization expanded the input text, the dictionary found a boundary
+            // within the expansion, giving two boundaries with the same index in the
+            // original text. Ignore the second. See ticket #12918.
+            --numBreaks;
+        }
+        prevCPPos = cpPos;
+        prevUTextPos = utextPos;
+    }
+    (void)prevCPPos;  // suppress compiler warnings about unused variable
+
+    UChar32 nextChar = utext_char32At(inText, rangeEnd);
+    if (!foundBreaks.isEmpty() && foundBreaks.peeki() == rangeEnd) {
+        // In phrase breaking, there has to be a breakpoint between Cj character and
+        // the number/open punctuation.
+        // E.g. る文字「そうだ、京都」->る▁文字▁「そうだ、▁京都」-> breakpoint between 字 and「
+        // E.g. 乗車率９０％程度だろうか -> 乗車▁率▁９０％▁程度だろうか -> breakpoint between 率 and ９
+        // E.g. しかもロゴがＵｎｉｃｏｄｅ！ -> しかも▁ロゴが▁Ｕｎｉｃｏｄｅ！-> breakpoint between が and Ｕ
+        if (!fDigitOrOpenPunctuationOrAlphabetSet.contains(nextChar)) {
+            foundBreaks.popi();
+            correctedNumBreaks--;
+        }
+    }
+
+    return correctedNumBreaks;
+}
+
+int32_t MlBreakEngine::evaluateBreakpoint(const UnicodeString &inString, int32_t *indexList,
+                                          int32_t startIdx, int32_t numCodeUnits, int32_t numBreaks,
+                                          UVector32 &boundary, UErrorCode &status) const {
+    if (U_FAILURE(status)) {
+        return numBreaks;
+    }
+    int32_t start = 0, end = 0;
+    int32_t score = fNegativeSum;
+
+    for (int i = 0; i < 6; i++) {
+        // UW1 ~ UW6
+        start = startIdx + i;
+        if (indexList[start] != -1) {
+            end = (indexList[start + 1] != -1) ? indexList[start + 1] : numCodeUnits;
+            score += fModel[static_cast<int32_t>(ModelIndex::kUWStart) + i].geti(
+                inString.tempSubString(indexList[start], end - indexList[start]));
+        }
+    }
+    for (int i = 0; i < 3; i++) {
+        // BW1 ~ BW3
+        start = startIdx + i + 1;
+        if (indexList[start] != -1 && indexList[start + 1] != -1) {
+            end = (indexList[start + 2] != -1) ? indexList[start + 2] : numCodeUnits;
+            score += fModel[static_cast<int32_t>(ModelIndex::kBWStart) + i].geti(
+                inString.tempSubString(indexList[start], end - indexList[start]));
+        }
+    }
+    for (int i = 0; i < 4; i++) {
+        // TW1 ~ TW4
+        start = startIdx + i;
+        if (indexList[start] != -1 && indexList[start + 1] != -1 && indexList[start + 2] != -1) {
+            end = (indexList[start + 3] != -1) ? indexList[start + 3] : numCodeUnits;
+            score += fModel[static_cast<int32_t>(ModelIndex::kTWStart) + i].geti(
+                inString.tempSubString(indexList[start], end - indexList[start]));
+        }
+    }
+
+    if (score > 0) {
+        boundary.addElement(startIdx + 1, status);
+        numBreaks++;
+    }
+    return numBreaks;
+}
+
+int32_t MlBreakEngine::initIndexList(const UnicodeString &inString, int32_t *indexList,
+                                     UErrorCode &status) const {
+    if (U_FAILURE(status)) {
+        return 0;
+    }
+    int32_t index = 0;
+    int32_t length = inString.countChar32();
+    // Set all (lenght+4) items inside indexLength to -1 presuming -1 is 4 bytes of 0xff.
+    uprv_memset(indexList, 0xff, (length + 4) * sizeof(int32_t));
+    if (length > 0) {
+        indexList[2] = 0;
+        index = U16_LENGTH(inString.char32At(0));
+        if (length > 1) {
+            indexList[3] = index;
+            index += U16_LENGTH(inString.char32At(index));
+            if (length > 2) {
+                indexList[4] = index;
+                index += U16_LENGTH(inString.char32At(index));
+                if (length > 3) {
+                    indexList[5] = index;
+                    index += U16_LENGTH(inString.char32At(index));
+                }
+            }
+        }
+    }
+    return index;
+}
+
+void MlBreakEngine::loadMLModel(UErrorCode &error) {
+    // BudouX's model consists of thirteen categories, each of which is make up of pairs of the
+    // feature and its score. As integrating it into jaml.txt, we define thirteen kinds of key and
+    // value to represent the feature and the corresponding score respectively.
+
+    if (U_FAILURE(error)) return;
+
+    UnicodeString key;
+    StackUResourceBundle stackTempBundle;
+    ResourceDataValue modelKey;
+
+    LocalUResourceBundlePointer rbp(ures_openDirect(U_ICUDATA_BRKITR, "jaml", &error));
+    UResourceBundle *rb = rbp.getAlias();
+    if (U_FAILURE(error)) return;
+
+    int32_t index = 0;
+    initKeyValue(rb, "UW1Keys", "UW1Values", fModel[index++], error);
+    initKeyValue(rb, "UW2Keys", "UW2Values", fModel[index++], error);
+    initKeyValue(rb, "UW3Keys", "UW3Values", fModel[index++], error);
+    initKeyValue(rb, "UW4Keys", "UW4Values", fModel[index++], error);
+    initKeyValue(rb, "UW5Keys", "UW5Values", fModel[index++], error);
+    initKeyValue(rb, "UW6Keys", "UW6Values", fModel[index++], error);
+    initKeyValue(rb, "BW1Keys", "BW1Values", fModel[index++], error);
+    initKeyValue(rb, "BW2Keys", "BW2Values", fModel[index++], error);
+    initKeyValue(rb, "BW3Keys", "BW3Values", fModel[index++], error);
+    initKeyValue(rb, "TW1Keys", "TW1Values", fModel[index++], error);
+    initKeyValue(rb, "TW2Keys", "TW2Values", fModel[index++], error);
+    initKeyValue(rb, "TW3Keys", "TW3Values", fModel[index++], error);
+    initKeyValue(rb, "TW4Keys", "TW4Values", fModel[index++], error);
+    fNegativeSum /= 2;
+}
+
+void MlBreakEngine::initKeyValue(UResourceBundle *rb, const char *keyName, const char *valueName,
+                                 Hashtable &model, UErrorCode &error) {
+    int32_t keySize = 0;
+    int32_t valueSize = 0;
+    int32_t stringLength = 0;
+    UnicodeString key;
+    StackUResourceBundle stackTempBundle;
+    ResourceDataValue modelKey;
+
+    // get modelValues
+    LocalUResourceBundlePointer modelValue(ures_getByKey(rb, valueName, nullptr, &error));
+    const int32_t *value = ures_getIntVector(modelValue.getAlias(), &valueSize, &error);
+    if (U_FAILURE(error)) return;
+
+    // get modelKeys
+    ures_getValueWithFallback(rb, keyName, stackTempBundle.getAlias(), modelKey, error);
+    ResourceArray stringArray = modelKey.getArray(error);
+    keySize = stringArray.getSize();
+    if (U_FAILURE(error)) return;
+
+    for (int32_t idx = 0; idx < keySize; idx++) {
+        stringArray.getValue(idx, modelKey);
+        key = UnicodeString(modelKey.getString(stringLength, error));
+        if (U_SUCCESS(error)) {
+            U_ASSERT(idx < valueSize);
+            fNegativeSum -= value[idx];
+            model.puti(key, value[idx], error);
+        }
+    }
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
@@ -0,0 +1,116 @@
+// © 2022 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#ifndef MLBREAKENGINE_H
+#define MLBREAKENGINE_H
+
+#include "hash.h"
+#include "unicode/resbund.h"
+#include "unicode/uniset.h"
+#include "unicode/utext.h"
+#include "uvectr32.h"
+
+U_NAMESPACE_BEGIN
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * A machine learning break engine for the phrase breaking in Japanese.
+ */
+class MlBreakEngine : public UMemory {
+   public:
+    /**
+     * Constructor.
+     *
+     * @param digitOrOpenPunctuationOrAlphabetSet An UnicodeSet with the digit, open punctuation and
+     * alphabet.
+     * @param closePunctuationSet An UnicodeSet with close punctuation.
+     * @param status Information on any errors encountered.
+     */
+    MlBreakEngine(const UnicodeSet &digitOrOpenPunctuationOrAlphabetSet,
+                  const UnicodeSet &closePunctuationSet, UErrorCode &status);
+
+    /**
+     * Virtual destructor.
+     */
+    virtual ~MlBreakEngine();
+
+   public:
+    /**
+     * Divide up a range of characters handled by this break engine.
+     *
+     * @param inText A UText representing the text
+     * @param rangeStart The start of the range of the characters
+     * @param rangeEnd The end of the range of the characters
+     * @param foundBreaks Output of C array of int32_t break positions, or 0
+     * @param inString The normalized string of text ranging from rangeStart to rangeEnd
+     * @param inputMap The vector storing the native index of inText
+     * @param status Information on any errors encountered.
+     * @return The number of breaks found
+     */
+    int32_t divideUpRange(UText *inText, int32_t rangeStart, int32_t rangeEnd,
+                          UVector32 &foundBreaks, const UnicodeString &inString,
+                          const LocalPointer<UVector32> &inputMap, UErrorCode &status) const;
+
+   private:
+    /**
+     * Load the machine learning's model file.
+     *
+     * @param error Information on any errors encountered.
+     */
+    void loadMLModel(UErrorCode &error);
+
+    /**
+     * In the machine learning's model file, specify the name of the key and value to load the
+     * corresponding feature and its score.
+     *
+     * @param rb A ResouceBundle corresponding to the model file.
+     * @param keyName The kay name in the model file.
+     * @param valueName The value name in the model file.
+     * @param model A hashtable to store the pairs of the feature and its score.
+     * @param error Information on any errors encountered.
+     */
+    void initKeyValue(UResourceBundle *rb, const char *keyName, const char *valueName,
+                      Hashtable &model, UErrorCode &error);
+
+    /**
+     * Initialize the index list from the input string.
+     *
+     * @param inString A input string to be segmented.
+     * @param indexList A code unit index list of inString.
+     * @param status Information on any errors encountered.
+     * @return The number of code units of the first four characters in inString.
+     */
+    int32_t initIndexList(const UnicodeString &inString, int32_t *indexList,
+                          UErrorCode &status) const;
+
+    /**
+     * Evaluate whether the index is a potential breakpoint.
+     *
+     * @param inString A input string to be segmented.
+     * @param indexList A code unit index list of the inString.
+     * @param startIdx The start index of the indexList.
+     * @param numCodeUnits  The current code unit boundary of the indexList.
+     * @param numBreaks The accumulated number of breakpoints.
+     * @param boundary A vector including the index of the breakpoint.
+     * @param status Information on any errors encountered.
+     * @return The number of breakpoints
+     */
+    int32_t evaluateBreakpoint(const UnicodeString &inString, int32_t *indexList, int32_t startIdx,
+                               int32_t numCodeUnits, int32_t numBreaks, UVector32 &boundary,
+                               UErrorCode &status) const;
+
+    void printUnicodeString(const UnicodeString &s) const;
+
+    UnicodeSet fDigitOrOpenPunctuationOrAlphabetSet;
+    UnicodeSet fClosePunctuationSet;
+    Hashtable fModel[13];  // {UW1, UW2, ... UW6, BW1, ... BW3, TW1, TW2, ... TW4} 6+3+4= 13
+    int32_t fNegativeSum;
+};
+
+#endif
+
+U_NAMESPACE_END
+
+/* MLBREAKENGINE_H */
+#endif
@@ -19,7 +19,7 @@ STLPort's broken stddef.h from being used when rc.exe parses this file.

 #include "unicode/uversion.h"

-#define ICU_WEBSITE "http://icu-project.org"
+#define ICU_WEBSITE "https://icu.unicode.org/"
 #define ICU_COMPANY "The ICU Project"
 #define ICU_PRODUCT_PREFIX "ICU"
 #define ICU_PRODUCT "International Components for Unicode"
@@ -28,50 +28,48 @@

 U_NAMESPACE_BEGIN

-//----------------------------------------------------------------------------
-// Code within that accesses shared static or global data should
-// should instantiate a Mutex object while doing so. You should make your own 
-// private mutex where possible.
-
-// For example:
-//
-// UMutex myMutex = U_MUTEX_INITIALIZER;
-//
-// void Function(int arg1, int arg2)
-// {
-//    static Object* foo;     // Shared read-write object
-//    Mutex mutex(&myMutex);  // or no args for the global lock
-//    foo->Method();
-//    // When 'mutex' goes out of scope and gets destroyed here, the lock is released
-// }
-//
-// Note:  Do NOT use the form 'Mutex mutex();' as that merely forward-declares a function
-//        returning a Mutex. This is a common mistake which silently slips through the
-//        compiler!!
-//
+/**
+  * Mutex is a helper class for convenient locking and unlocking of a UMutex.
+  *
+  * Creating a local scope Mutex will lock a UMutex, holding the lock until the Mutex
+  * goes out of scope.
+  *
+  *  If no UMutex is specified, the ICU global mutex is implied.
+  *
+  *  For example:
+  *
+  *  static UMutex myMutex;
+  *
+  *  void Function(int arg1, int arg2)
+  *  {
+  *     static Object* foo;      // Shared read-write object
+  *     Mutex mutex(&myMutex);   // or no args for the global lock
+  *     foo->Method();
+  *     // When 'mutex' goes out of scope and gets destroyed here, the lock is released
+  *  }
+  *
+  *  Note:  Do NOT use the form 'Mutex mutex();' as that merely forward-declares a function
+  *         returning a Mutex. This is a common mistake which silently slips through the
+  *         compiler!!
+  */

 class U_COMMON_API Mutex : public UMemory {
 public:
-  inline Mutex(UMutex *mutex = NULL);
-  inline ~Mutex();
+    Mutex(UMutex *mutex = nullptr) : fMutex(mutex) {
+        umtx_lock(fMutex);
+    }
+    ~Mutex() {
+        umtx_unlock(fMutex);
+    }
+
+    Mutex(const Mutex &other) = delete; // forbid assigning of this class
+    Mutex &operator=(const Mutex &other) = delete; // forbid copying of this class
+    void *operator new(size_t s) = delete;  // forbid heap allocation. Locals only.

 private:
-  UMutex   *fMutex;
-
-  Mutex(const Mutex &other); // forbid copying of this class
-  Mutex &operator=(const Mutex &other); // forbid copying of this class
+    UMutex   *fMutex;
 };

-inline Mutex::Mutex(UMutex *mutex)
-  : fMutex(mutex)
-{
-  umtx_lock(fMutex);
-}
-
-inline Mutex::~Mutex()
-{
-  umtx_unlock(fMutex);
-}

 U_NAMESPACE_END

--- a/Show More
+++ b/Show More